Commit f9d7dd5f authored by Penom Nom's avatar Penom Nom

The orientation of count becomes the orientation of the sequençage.

parent a99b88b8
...@@ -14,15 +14,82 @@ ...@@ -14,15 +14,82 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
# $Id$
=pod
=head1 NAME
cigarline_by_pos.pl
=head1 DESCRIPTION
Sum by position the status of reads. This provides for example the number of
reads that have a mismatch at first base.
=head1 SYNOPSIS
cigarline_by_pos.pl SAM_FILE OUTPUT_FILE
=head1 ARGUMENTS
1- The SAM file.
2- The CSV file with count.
eg :
;1;2;3;4;5
clipping;4;4;3;3;2
insertion;0;0;0;0;0
match;8;9;10;9;11
mismatch;1;0;0;1;0
=head1 OPTIONS
None.
=head1 VERSION
2.1
=head1 DEPENDENCIES
None.
=head1 AUTHOR
Escudie Frederic - Plateforme genomique Toulouse
=cut
#############################################################################################################################
#
# LIB
#
#############################################################################################################################
use strict ; use strict ;
use warnings ; use warnings ;
#############################################################################################################################
#
# MAIN
#
#############################################################################################################################
MAIN: MAIN:
{ {
my $sam = $ARGV[0] ; my $sam = $ARGV[0] ;
my $csv = $ARGV[1] ; my $csv = $ARGV[1] ;
my $help = 0 ;
if( $help || @ARGV != 2 )
{
pod2usage(
-sections => "SYNOPSIS|ARGUMENTS|DESCRIPTION|VERSION",
-verbose => 99
) ;
}
my @nb_by_pos = () ; my @nb_by_pos = () ;
my %out_string = ( 'mismatch' => "", 'match' => "", 'clipping' => "", 'insertion' => "" ) ; my %out_string = ( 'mismatch' => "", 'match' => "", 'clipping' => "", 'insertion' => "" ) ;
my $separator = ";" ; my $separator = ";" ;
...@@ -60,6 +127,11 @@ MAIN: ...@@ -60,6 +127,11 @@ MAIN:
#############################################################################################################################
#
# SUB
#
#############################################################################################################################
=head2 procedure writeCsvFromHash =head2 procedure writeCsvFromHash
Title : writeCsvFromHash Title : writeCsvFromHash
...@@ -138,6 +210,44 @@ sub isAligned ...@@ -138,6 +210,44 @@ sub isAligned
} }
=head2 function isForward
Title : isForward
Usage : $binary = isForward( $sam_line_obj )
Prerequisite : none
Function : Returns 1 if the strand of the query is forward.
Returns : binary
Args : $sam_line_obj Hash ref - The hash representing one read
on SAM file.
Globals : none
=cut
sub isForward
{
my $sam_line_obj = shift ;
my $isForward = 0 ;
#FLAG is in decimal format
if( $$sam_line_obj{'FLAG'} =~ /^\d+$/ )
{
if( ($$sam_line_obj{'FLAG'} & 16) == 0 )
{
$isForward = 1 ;
}
}
#FLAG is in human redable format
else
{
if( !($$sam_line_obj{'FLAG'} =~ /r/) )
{
$isForward = 1 ;
}
}
return $isForward ;
}
=head2 function nm2statesRead =head2 function nm2statesRead
Title : nm2statesRead Title : nm2statesRead
...@@ -204,6 +314,7 @@ sub parseSamLine ...@@ -204,6 +314,7 @@ sub parseSamLine
#Mandatory fields #Mandatory fields
%line_info = ( %line_info = (
'QUERY' => $line_subdivisions[0], 'QUERY' => $line_subdivisions[0],
'FLAG' => $line_subdivisions[1],
'REFNAME' => $line_subdivisions[2], 'REFNAME' => $line_subdivisions[2],
'CIGAR' => $line_subdivisions[5], 'CIGAR' => $line_subdivisions[5],
'SEQ' => $line_subdivisions[9] ) ; 'SEQ' => $line_subdivisions[9] ) ;
...@@ -241,6 +352,7 @@ sub processPositionSatus ...@@ -241,6 +352,7 @@ sub processPositionSatus
{ {
my($sam_line_obj, $nb_by_pos) = @_ ; my($sam_line_obj, $nb_by_pos) = @_ ;
my @reads_status = () ;
my $pos = 0 ; my $pos = 0 ;
my $prec = "" ; my $prec = "" ;
my $isDel = 0 ; my $isDel = 0 ;
...@@ -251,7 +363,7 @@ sub processPositionSatus ...@@ -251,7 +363,7 @@ sub processPositionSatus
#Process the start clipping #Process the start clipping
while( $cigar_states[$pos] =~ /[HS]/ ) while( $cigar_states[$pos] =~ /[HS]/ )
{ {
$$nb_by_pos[$pos]{'clipping'} += 1 ; $reads_status[$pos] = 'clipping' ;
$pos++ ; $pos++ ;
} }
...@@ -280,12 +392,12 @@ sub processPositionSatus ...@@ -280,12 +392,12 @@ sub processPositionSatus
#While the position corresponds to an insertion #While the position corresponds to an insertion
while( $cigar_states[$pos] eq 'I' ) while( $cigar_states[$pos] eq 'I' )
{ {
$$nb_by_pos[$pos]{'insertion'} += 1 ; $reads_status[$pos] = 'insertion' ;
$pos++ ; $pos++ ;
} }
#Count the match #Count the match
$$nb_by_pos[$pos]{'match'} += 1 ; $reads_status[$pos] = 'match' ;
$pos++ ; $pos++ ;
} }
$prec ="" ; $prec ="" ;
...@@ -294,12 +406,12 @@ sub processPositionSatus ...@@ -294,12 +406,12 @@ sub processPositionSatus
#While the position corresponds to an insertion #While the position corresponds to an insertion
while( $cigar_states[$pos] eq 'I' ) while( $cigar_states[$pos] eq 'I' )
{ {
$$nb_by_pos[$pos]{'insertion'} += 1 ; $reads_status[$pos] = 'insertion' ;
$pos++ ; $pos++ ;
} }
#Count the mismatch #Count the mismatch
$$nb_by_pos[$pos]{'mismatch'} += 1 ; $reads_status[$pos] = 'mismatch' ;
$pos++ ; $pos++ ;
} }
#If the character is the start marker of deletion #If the character is the start marker of deletion
...@@ -313,12 +425,12 @@ sub processPositionSatus ...@@ -313,12 +425,12 @@ sub processPositionSatus
#While the position corresponds to an insertion #While the position corresponds to an insertion
while( $cigar_states[$pos] eq 'I' ) while( $cigar_states[$pos] eq 'I' )
{ {
$$nb_by_pos[$pos]{'insertion'} += 1 ; $reads_status[$pos] = 'insertion' ;
$pos++ ; $pos++ ;
} }
#Count the match #Count the match
$$nb_by_pos[$pos]{'match'} += 1 ; $reads_status[$pos] = 'match' ;
$pos++ ; $pos++ ;
} }
$prec ="" ; $prec ="" ;
...@@ -337,12 +449,12 @@ sub processPositionSatus ...@@ -337,12 +449,12 @@ sub processPositionSatus
#While the position corresponds to an insertion #While the position corresponds to an insertion
while( $cigar_states[$pos] eq 'I' ) while( $cigar_states[$pos] eq 'I' )
{ {
$$nb_by_pos[$pos]{'insertion'} += 1 ; $reads_status[$pos] = 'insertion' ;
$pos++ ; $pos++ ;
} }
#Count the match #Count the match
$$nb_by_pos[$pos]{'match'} += 1 ; $reads_status[$pos] = 'match' ;
$pos++ ; $pos++ ;
} }
$prec ="" ; $prec ="" ;
...@@ -354,20 +466,33 @@ sub processPositionSatus ...@@ -354,20 +466,33 @@ sub processPositionSatus
#The position corresponds to one clipping #The position corresponds to one clipping
if( $cigar_states[$pos] =~ /[HS]/ ) if( $cigar_states[$pos] =~ /[HS]/ )
{ {
$$nb_by_pos[$pos]{'clipping'} += 1 ; $reads_status[$pos] = 'clipping' ;
} }
#The position corresponds to one insertion #The position corresponds to one insertion
elsif( $cigar_states[$pos] eq 'I' ) elsif( $cigar_states[$pos] eq 'I' )
{ {
$$nb_by_pos[$pos]{'insertion'} += 1 ; $reads_status[$pos] = 'insertion' ;
} }
#The position exists in the CIGAR field but not in the MD when it should be #The position exists in the CIGAR field but not in the MD when it should be
else else
{ {
$$nb_by_pos[$pos]{'match'} += 0 ; $reads_status[$pos] = 'undef' ;
warn "[WARNING] ".$$sam_line_obj{'QUERY'}." the position ".$pos." in the read isn't in MD field." ; warn "[WARNING] ".$$sam_line_obj{'QUERY'}." the position ".$pos." in the read isn't in MD field." ;
} }
$pos++ ; $pos++ ;
} }
}
#If the query is forward
if( !isForward($sam_line_obj) )
{
#Reverse status
@reads_status = reverse @reads_status ;
}
#Add alignment information to global count
for(my $pos = 0 ; $pos < @reads_status ; $pos++)
{
$$nb_by_pos[$pos]{$reads_status[$pos]} += 1 ;
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment