Commit 4ce75d9a authored by Penom Nom's avatar Penom Nom
Browse files

Fix bug in OTU count when only a .taxonomy is provided.

parent 2af31e74
......@@ -72,7 +72,7 @@ sub getNames {
}
sub getGroups {
#create a hash of file.names : key = Num sample and value = array with all ID seq
#create a hash of file.groups : key = Num sample and value = array with all ID seq
my $filename = $_[0];
my $path = $_[1];
open INFILE, "${path}${filename}.groups" or die $!;
......@@ -106,8 +106,10 @@ sub getTaxonomy {
#create two hashs of file.taxonomy :
#taxonomyLineage : key = ID seq and value = array of taxons
#taxonomyScore : key = ID seq and value = array of pourcent
#taxonomyCount : key = ID seq and value = number of sequences
my %taxonomyLineage;
my %taxonomyScores;
my %taxonomyCount;
my $filename = $_[0];
my $path = $_[1];
say "${path}${filename}.taxonomy";
......@@ -120,20 +122,26 @@ sub getTaxonomy {
my @ranks;
my @scores;
my $allRanks;
my @fields = split /\t[0-9]*\t|[\t;]/, $line;
my $count = 1 ;
my @fields = split /[\t;]/, $line;
my $queryID = $fields[0];
for ( my $i = 1; $i < $#fields; $i++ )
{
my ($val, $int) = $fields[$i] =~ /\"?(\w+)\"?\(*(\d*)\)*/;
push @lineage, $val;
push @scores, $int;
if( ($i == 1) && $fields[$i] =~ /^(\d+)$/ ) {
$count = $1 ;
} else {
my ($val, $int) = $fields[$i] =~ /\"?(\w+)\"?\(*(\d*)\)*/;
push @lineage, $val;
push @scores, $int;
}
}
push(@{ $taxonomyLineage{$queryID} }, @lineage);
push(@{ $taxonomyScores{$queryID} }, @scores);
$taxonomyCount{$queryID} = $count;
}
return (\%taxonomyLineage, \%taxonomyScores);
return (\%taxonomyLineage, \%taxonomyScores, \%taxonomyCount);
}
sub getLineage{
......@@ -219,12 +227,12 @@ foreach my $input(@ARGV)
#~ say "my fullname ${path}${filewe}";
my $listFiles = getListFiles($filewe,$path);
my $size = $#$listFiles + 1;
my ($hashLineage, $hashScores) = getTaxonomy($filewe,$path);
my ($hashLineage, $hashScores, $hashCount) = getTaxonomy($filewe,$path);
#if i have multiple samples (FILE.names and FILE.groups)
if ($size eq 3)
{
my %samples = getGroups($filewe,$path);
my $hashNames = getNames($filewe,$path);
my %samples = getGroups($filewe,$path); # {'sample1:["seq_1", "seq_2"]', 'sample2':["seq_8"]}
my $hashNames = getNames($filewe,$path); # {'seq_3:["seq_4", "seq_5"]', 'seq_8':[]}
foreach my $sample (keys %samples)
{
say "My sample ",$sample;
......@@ -280,36 +288,36 @@ foreach my $input(@ARGV)
my $count = $counTable->{$seq}->{$sample};
if ($count != "0")
{
my $total = $count;
my $allRanks;
my @lineage = @{ $hashLineage->{$seq} };
my @scores = @{ $hashScores->{$seq} };
my $root = $lineage[0];
for ( my $i = 1; $i < @lineage; $i ++ )
{
if (! getOption('combine'))
my $total = $count;
my $allRanks;
my @lineage = @{ $hashLineage->{$seq} };
my @scores = @{ $hashScores->{$seq} };
my @count = @{ $hashCount->{$seq} };
my $root = $lineage[0];
for ( my $i = 1; $i < @lineage; $i ++ )
{
if ( $lineage[$i] eq $root)
if (! getOption('combine'))
{
setOption('name',$root );
$allRanks = 1;
next;
if ( $lineage[$i] eq $root)
{
setOption('name',$root );
$allRanks = 1;
next;
}
}
}
addByLineage
(
$tree,
$set,#rank for the different samples
\@lineage,
$seq,#name of id sequence
$total,
undef,
\@scores #pourcent
# $format ? ($allRanks ? undef : \@webRanks) : \@ranks
);
}
addByLineage
(
$tree,
$set,#rank for the different samples
\@lineage,
$seq,#name of id sequence
$total,
undef,
\@scores #pourcent
# $format ? ($allRanks ? undef : \@webRanks) : \@ranks
);
}
}
if ( ! getOption('combine') )
......@@ -344,7 +352,7 @@ foreach my $input(@ARGV)
$set,
\@lineage,
$seq,
undef,
$hashCount->{$seq},
undef,
\@scores
# $format ? ($allRanks ? undef : \@webRanks) : \@ranks
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment