Commit f9d7dd5f authored by Penom Nom's avatar Penom Nom

The orientation of count becomes the orientation of the sequençage.

parent a99b88b8
......@@ -14,15 +14,82 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# $Id$
=pod
=head1 NAME
cigarline_by_pos.pl
=head1 DESCRIPTION
Sum by position the status of reads. This provides for example the number of
reads that have a mismatch at first base.
=head1 SYNOPSIS
cigarline_by_pos.pl SAM_FILE OUTPUT_FILE
=head1 ARGUMENTS
1- The SAM file.
2- The CSV file with count.
eg :
;1;2;3;4;5
clipping;4;4;3;3;2
insertion;0;0;0;0;0
match;8;9;10;9;11
mismatch;1;0;0;1;0
=head1 OPTIONS
None.
=head1 VERSION
2.1
=head1 DEPENDENCIES
None.
=head1 AUTHOR
Escudie Frederic - Plateforme genomique Toulouse
=cut
#############################################################################################################################
#
# LIB
#
#############################################################################################################################
use strict ;
use warnings ;
#############################################################################################################################
#
# MAIN
#
#############################################################################################################################
MAIN:
{
my $sam = $ARGV[0] ;
my $csv = $ARGV[1] ;
my $help = 0 ;
if( $help || @ARGV != 2 )
{
pod2usage(
-sections => "SYNOPSIS|ARGUMENTS|DESCRIPTION|VERSION",
-verbose => 99
) ;
}
my @nb_by_pos = () ;
my %out_string = ( 'mismatch' => "", 'match' => "", 'clipping' => "", 'insertion' => "" ) ;
my $separator = ";" ;
......@@ -60,6 +127,11 @@ MAIN:
#############################################################################################################################
#
# SUB
#
#############################################################################################################################
=head2 procedure writeCsvFromHash
Title : writeCsvFromHash
......@@ -138,6 +210,44 @@ sub isAligned
}
=head2 function isForward
Title : isForward
Usage : $binary = isForward( $sam_line_obj )
Prerequisite : none
Function : Returns 1 if the strand of the query is forward.
Returns : binary
Args : $sam_line_obj Hash ref - The hash representing one read
on SAM file.
Globals : none
=cut
sub isForward
{
my $sam_line_obj = shift ;
my $isForward = 0 ;
#FLAG is in decimal format
if( $$sam_line_obj{'FLAG'} =~ /^\d+$/ )
{
if( ($$sam_line_obj{'FLAG'} & 16) == 0 )
{
$isForward = 1 ;
}
}
#FLAG is in human redable format
else
{
if( !($$sam_line_obj{'FLAG'} =~ /r/) )
{
$isForward = 1 ;
}
}
return $isForward ;
}
=head2 function nm2statesRead
Title : nm2statesRead
......@@ -204,6 +314,7 @@ sub parseSamLine
#Mandatory fields
%line_info = (
'QUERY' => $line_subdivisions[0],
'FLAG' => $line_subdivisions[1],
'REFNAME' => $line_subdivisions[2],
'CIGAR' => $line_subdivisions[5],
'SEQ' => $line_subdivisions[9] ) ;
......@@ -241,6 +352,7 @@ sub processPositionSatus
{
my($sam_line_obj, $nb_by_pos) = @_ ;
my @reads_status = () ;
my $pos = 0 ;
my $prec = "" ;
my $isDel = 0 ;
......@@ -251,7 +363,7 @@ sub processPositionSatus
#Process the start clipping
while( $cigar_states[$pos] =~ /[HS]/ )
{
$$nb_by_pos[$pos]{'clipping'} += 1 ;
$reads_status[$pos] = 'clipping' ;
$pos++ ;
}
......@@ -280,12 +392,12 @@ sub processPositionSatus
#While the position corresponds to an insertion
while( $cigar_states[$pos] eq 'I' )
{
$$nb_by_pos[$pos]{'insertion'} += 1 ;
$reads_status[$pos] = 'insertion' ;
$pos++ ;
}
#Count the match
$$nb_by_pos[$pos]{'match'} += 1 ;
$reads_status[$pos] = 'match' ;
$pos++ ;
}
$prec ="" ;
......@@ -294,12 +406,12 @@ sub processPositionSatus
#While the position corresponds to an insertion
while( $cigar_states[$pos] eq 'I' )
{
$$nb_by_pos[$pos]{'insertion'} += 1 ;
$reads_status[$pos] = 'insertion' ;
$pos++ ;
}
#Count the mismatch
$$nb_by_pos[$pos]{'mismatch'} += 1 ;
$reads_status[$pos] = 'mismatch' ;
$pos++ ;
}
#If the character is the start marker of deletion
......@@ -313,12 +425,12 @@ sub processPositionSatus
#While the position corresponds to an insertion
while( $cigar_states[$pos] eq 'I' )
{
$$nb_by_pos[$pos]{'insertion'} += 1 ;
$reads_status[$pos] = 'insertion' ;
$pos++ ;
}
#Count the match
$$nb_by_pos[$pos]{'match'} += 1 ;
$reads_status[$pos] = 'match' ;
$pos++ ;
}
$prec ="" ;
......@@ -337,12 +449,12 @@ sub processPositionSatus
#While the position corresponds to an insertion
while( $cigar_states[$pos] eq 'I' )
{
$$nb_by_pos[$pos]{'insertion'} += 1 ;
$reads_status[$pos] = 'insertion' ;
$pos++ ;
}
#Count the match
$$nb_by_pos[$pos]{'match'} += 1 ;
$reads_status[$pos] = 'match' ;
$pos++ ;
}
$prec ="" ;
......@@ -354,20 +466,33 @@ sub processPositionSatus
#The position corresponds to one clipping
if( $cigar_states[$pos] =~ /[HS]/ )
{
$$nb_by_pos[$pos]{'clipping'} += 1 ;
$reads_status[$pos] = 'clipping' ;
}
#The position corresponds to one insertion
elsif( $cigar_states[$pos] eq 'I' )
{
$$nb_by_pos[$pos]{'insertion'} += 1 ;
$reads_status[$pos] = 'insertion' ;
}
#The position exists in the CIGAR field but not in the MD when it should be
else
{
$$nb_by_pos[$pos]{'match'} += 0 ;
$reads_status[$pos] = 'undef' ;
warn "[WARNING] ".$$sam_line_obj{'QUERY'}." the position ".$pos." in the read isn't in MD field." ;
}
$pos++ ;
}
#If the query is forward
if( !isForward($sam_line_obj) )
{
#Reverse status
@reads_status = reverse @reads_status ;
}
#Add alignment information to global count
for(my $pos = 0 ; $pos < @reads_status ; $pos++)
{
$$nb_by_pos[$pos]{$reads_status[$pos]} += 1 ;
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment