Skip to content
Snippets Groups Projects
Commit 7e7ad652 authored by Cedric Cabau's avatar Cedric Cabau
Browse files

runMeta now accepts a list of assembly fasta files

parent 7d2d49c9
No related branches found
No related tags found
No related merge requests found
...@@ -8,6 +8,7 @@ use File::Basename; ...@@ -8,6 +8,7 @@ use File::Basename;
use File::Compare; use File::Compare;
use File::Path qw(make_path remove_tree); use File::Path qw(make_path remove_tree);
use POSIX qw(strftime); use POSIX qw(strftime);
use List::Util qw(uniq);
use lib ("$FindBin::RealBin/bin"); use lib ("$FindBin::RealBin/bin");
use AsmUtils; use AsmUtils;
use Report; use Report;
...@@ -21,7 +22,10 @@ my @backup_options = @ARGV; ...@@ -21,7 +22,10 @@ my @backup_options = @ARGV;
my @getOptions = ( my @getOptions = (
['help',undef,undef,0], ['help',undef,undef,0],
['o|outdir','s',undef,1], # DIRECTORY Ouput directory ['o|outdir','s',undef,1], # DIRECTORY Ouput directory
['drap-dirs','s{,}',undef,0], # DRAP DIRECTORIES ['drap-dirs','s{,}',undef,0], # DRAP DIRECTORIES
['R1','s{,}',undef,0], # FASTQ for RMBT step if drap-dirs is empty
['R2','s{,}',undef,0], # FASTQ Mates for RMBT step if drap-dirs is empty
['assemblies','s{,}',undef,0], # Assemblies to meta-assemble if drap-dirs is empty
['input-type','s','transcripts',0], # Primary assembly target type [transcripts] ['input-type','s','transcripts',0], # Primary assembly target type [transcripts]
['input-cov','i',undef,0], # Primary assembly target coverage [lowest coverage value] ['input-cov','i',undef,0], # Primary assembly target coverage [lowest coverage value]
['s|strand','s',undef,0], # For strand specific sequencing data: FR or RF for paired reads; F or R for single ['s|strand','s',undef,0], # For strand specific sequencing data: FR or RF for paired reads; F or R for single
...@@ -82,13 +86,26 @@ map { pod2usage("$_ is required") if ($required->{$_} && ! defined($opt->{$_})) ...@@ -82,13 +86,26 @@ map { pod2usage("$_ is required") if ($required->{$_} && ! defined($opt->{$_}))
map { my $key = $_; s/.+\|//; s/-/_/g; $opt->{$_} = delete($opt->{$key}) } keys %$opt; map { my $key = $_; s/.+\|//; s/-/_/g; $opt->{$_} = delete($opt->{$key}) } keys %$opt;
## allowing comma-separated lists of values as well as multiple occurrences of the options ## allowing comma-separated lists of values as well as multiple occurrences of the options
@{$opt->{drap_dirs}} = @ARGV unless (@{$opt->{drap_dirs}}); # compatibility with drap-v1.8 if (@{$opt->{assemblies}}) {
@{$opt->{drap_dirs}} = split(/,/, join(',', @{$opt->{drap_dirs}})); @{$opt->{assemblies}} = split(/,/, join(',', @{$opt->{assemblies}}));
} else {
@{$opt->{drap_dirs}} = @ARGV unless ($opt->{drap_dirs}); # compatibility with drap-v1.8
@{$opt->{drap_dirs}} = split(/,/, join(',', @{$opt->{drap_dirs}}));
for(@{$opt->{drap_dirs}}){ s|/$|| };
}
if (@{$opt->{R1}}) {
@{$opt->{R1}} = uniq(split(/,/, join(',', @{$opt->{R1}})));
}
if (@{$opt->{R2}}) {
@{$opt->{R2}} = uniq(split(/,/, join(',', @{$opt->{R2}})));
printOutMessage("NUMBER of UNIQ R1 and R2 FASTQ FILES DIFFERS") unless (scalar(@{$opt->{R1}}) == scalar(@{$opt->{R2}}));
}
# initialize drap path # initialize drap path
$opt->{binpath} = "$FindBin::RealBin/bin"; $opt->{binpath} = "$FindBin::RealBin/bin";
$opt->{cfgpath} = "$FindBin::RealBin/cfg"; $opt->{cfgpath} = "$FindBin::RealBin/cfg";
$opt->{reportpath} = "$FindBin::RealBin/report"; $opt->{pluginspath} = "$FindBin::RealBin/plugins";
$opt->{reportpath} = "$FindBin::RealBin/report";
# CFG_FILE param checking # CFG_FILE param checking
if (defined($opt->{cfg_file})) { if (defined($opt->{cfg_file})) {
...@@ -98,6 +115,7 @@ if (defined($opt->{cfg_file})) { ...@@ -98,6 +115,7 @@ if (defined($opt->{cfg_file})) {
} }
# expand PATH and ENV variables # expand PATH and ENV variables
$ENV{PATH} = join(':', glob("$opt->{pluginspath}/*")).":".$ENV{PATH};
$ENV{PATH} = $opt->{binpath}.":".$ENV{PATH}; $ENV{PATH} = $opt->{binpath}.":".$ENV{PATH};
$opt->{env}->{PATH} = $ENV{PATH}; $opt->{env}->{PATH} = $ENV{PATH};
set_extended_path( $opt->{cfg_file} ); set_extended_path( $opt->{cfg_file} );
...@@ -107,11 +125,23 @@ set_env_variables( $opt->{cfg_file}, $opt ); ...@@ -107,11 +125,23 @@ set_env_variables( $opt->{cfg_file}, $opt );
# PARAMS CHECKINGS # PARAMS CHECKINGS
#------------------ #------------------
my $paired_asm; my $paired_asm;
unless (scalar(@{$opt->{drap_dirs}}) > 1) { printOutMessage("AT LEAST TWO runDrap DIRECTORIES ARE MANDATORY\n") }; if (@{$opt->{drap_dirs}}) {
foreach (@{$opt->{drap_dirs}}) { unless (scalar(@{$opt->{drap_dirs}}) > 1) { printOutMessage("AT LEAST TWO runDrap DIRECTORIES ARE MANDATORY\n") };
unless (-d $_){ printOutMessage("NO SUCH DRAP DIRECTORY: $_"); } foreach (@{$opt->{drap_dirs}}) {
unless (-f "$_/.drap_conf.json"){ printOutMessage("CANNOT FIND DRAP CONFIG FILE .drap_conf.json INSIDE DRAP DIRECTORY: $_"); } unless (-d $_){ printOutMessage("NO SUCH DRAP DIRECTORY: $_"); }
$paired_asm += get_cfg_param($_, 'paired'); unless (-f "$_/.drap_conf.json"){ printOutMessage("CANNOT FIND DRAP CONFIG FILE .drap_conf.json INSIDE DRAP DIRECTORY: $_"); }
$paired_asm += get_cfg_param($_, 'paired');
}
if (@{$opt->{assemblies}}) { printOutMessage("OPTIONS drap-dirs AND assemblies ARE MUTUALLY EXCLUSIVE"); }
if (@{$opt->{R1}}) { printOutMessage("OPTIONS drap-dirs AND R1 ARE MUTUALLY EXCLUSIVE"); }
} else {
unless (@{$opt->{assemblies}}) { printOutMessage("PLEASE GIVE runDrap DIRECTORIES OR assembly FILES TO META-ASSEMBLE"); }
unless (scalar(@{$opt->{assemblies}}) > 1) { printOutMessage("AT LEAST TWO assembly FILES ARE MANDATORY\n") };
unless (@{$opt->{R1}}) { printOutMessage("OPTION R1 IS REQUIRED WITH assemblies OPTION"); }
map { printOutMessage("NO SUCH FASTQ FILE: $_") unless (-f $_) } @{$opt->{R1}};
if (@{$opt->{R2}}) {
map { printOutMessage("NO SUCH FASTQ FILE: $_") unless (-f $_) } @{$opt->{R2}};
}
} }
if (defined($opt->{ref})) { if (defined($opt->{ref})) {
...@@ -122,11 +152,13 @@ if (defined($opt->{strand})) { ...@@ -122,11 +152,13 @@ if (defined($opt->{strand})) {
if ($opt->{strand} ne 'F' && $opt->{strand} ne 'R' && $opt->{strand} ne 'RF' && $opt->{strand} ne 'FR') { if ($opt->{strand} ne 'F' && $opt->{strand} ne 'R' && $opt->{strand} ne 'RF' && $opt->{strand} ne 'FR') {
printOutMessage("UNKNOWN STRAND-SPECIFIC library TYPE. Please choose \'R\', \'F\' for SINGLE reads, \'RF\' or \'FR\' for PAIRED reads"); printOutMessage("UNKNOWN STRAND-SPECIFIC library TYPE. Please choose \'R\', \'F\' for SINGLE reads, \'RF\' or \'FR\' for PAIRED reads");
} }
foreach my $drap_dir (@{$opt->{drap_dirs}}) { if (@{$opt->{drap_dirs}}) {
my $used_strand = get_cfg_param($drap_dir, 'strand'); foreach my $drap_dir (@{$opt->{drap_dirs}}) {
printf("Warning: --strand parameter differs from STRAND-SPECIFIC library TYPE use for assembly %s (%s)\n", my $used_strand = get_cfg_param($drap_dir, 'strand');
$drap_dir, $used_strand ? "--strand $used_strand" : "no strand parameter") if ($opt->{strand} ne $used_strand printf("Warning: --strand parameter differs from STRAND-SPECIFIC library TYPE use for assembly %s (%s)\n",
); $drap_dir, $used_strand ? "--strand $used_strand" : "no strand parameter") if ($opt->{strand} ne $used_strand
);
}
} }
} }
...@@ -175,16 +207,21 @@ my $clean_msg; ...@@ -175,16 +207,21 @@ my $clean_msg;
if ($opt->{restart}) { if ($opt->{restart}) {
print "Check steps already executed:\n"; print "Check steps already executed:\n";
} else { } else {
print 'Run meta-assembly from '.scalar(@{$opt->{drap_dirs}})." primary assemblies\n"; printf("Run meta-assembly from %d primary assemblies\n", scalar(@{$opt->{drap_dirs}}) || scalar(@{$opt->{assemblies}}));
} }
# convert path to absolute path for outdir and files outside outdir # convert path to absolute path for outdir and files outside outdir
foreach my $key (qw(outdir ref cfg_file)) { foreach my $key (qw(outdir ref cfg_file drap_dirs assemblies R1 R2)) {
next unless (defined($opt->{$key})); next unless (defined($opt->{$key}));
$opt->{$key} = File::Spec->rel2abs($opt->{$key}); if (ref($opt->{$key}) eq 'ARRAY') {
for (@{$opt->{$key}}) { $_ = File::Spec->rel2abs($_) };
} else {
$opt->{$key} = File::Spec->rel2abs($opt->{$key});
}
} }
# create output directory # create output directory
$opt->{outdir} =~ s|/$||;
mkdir($opt->{outdir}) or die "Can't create directory $opt->{outdir}\n" unless (-d $opt->{outdir}); mkdir($opt->{outdir}) or die "Can't create directory $opt->{outdir}\n" unless (-d $opt->{outdir});
# write command in log file # write command in log file
...@@ -228,11 +265,18 @@ unless (-f "$opt->{dir_list}->[0]/.$opt->{step}.over" && step_complete($opt)) { ...@@ -228,11 +265,18 @@ unless (-f "$opt->{dir_list}->[0]/.$opt->{step}.over" && step_complete($opt)) {
$opt->{restart} = 0; $opt->{restart} = 0;
$resubmit = $opt->{step}; $resubmit = $opt->{step};
} }
foreach my $drap_dir (@{$opt->{drap_dirs}}) { if (@{$opt->{drap_dirs}}) {
my $condition = basename($drap_dir); foreach my $drap_dir (@{$opt->{drap_dirs}}) {
my $drap_lowest_fpkm = $opt->{input_cov} || shift(@{get_cfg_param($drap_dir, 'coverages')}); my $condition = basename($drap_dir);
die "Can't open file $drap_dir/$opt->{input_type}_fpkm_$drap_lowest_fpkm.fa\n" unless (-f "$drap_dir/$opt->{input_type}_fpkm_$drap_lowest_fpkm.fa"); my $drap_lowest_fpkm = $opt->{input_cov} || shift(@{get_cfg_param($drap_dir, 'coverages')});
$opt->{cmd} .= qq(cat $drap_dir/$opt->{input_type}_fpkm_$drap_lowest_fpkm.fa | sed -e "s/^>/>${condition}_/" >> $opt->{dir_list}->[0]/all_conditions_contigs.fa\n); die "Can't open file $drap_dir/$opt->{input_type}_fpkm_$drap_lowest_fpkm.fa\n" unless (-f "$drap_dir/$opt->{input_type}_fpkm_$drap_lowest_fpkm.fa");
$opt->{cmd} .= qq(cat $drap_dir/$opt->{input_type}_fpkm_$drap_lowest_fpkm.fa | sed -e "s/^>/>${condition}_/" >> $opt->{dir_list}->[0]/all_conditions_contigs.fa\n);
}
} else {
foreach my $assembly (@{$opt->{assemblies}}) {
(my $condition = basename($assembly)) =~ s/\.t?fn?a(sta)?$//;
$opt->{cmd} .= qq(cat $assembly | sed -e "s/^>/>${condition}_/" >> $opt->{dir_list}->[0]/all_conditions_contigs.fa\n);
}
} }
$opt->{cmd} .= "touch $opt->{dir_list}->[0]/.$opt->{step}.over\n"; $opt->{cmd} .= "touch $opt->{dir_list}->[0]/.$opt->{step}.over\n";
my $merge_metrics_file = $report_db_folder."/".$merge_analysis->get_or_create_step('Rename and merge')->get_or_create_metrics_filename('fastaAssembly'); my $merge_metrics_file = $report_db_folder."/".$merge_analysis->get_or_create_step('Rename and merge')->get_or_create_metrics_filename('fastaAssembly');
...@@ -363,8 +407,8 @@ unless (-f "$opt->{dir_list}->[2]/.$opt->{step}.over" && step_complete($opt)) { ...@@ -363,8 +407,8 @@ unless (-f "$opt->{dir_list}->[2]/.$opt->{step}.over" && step_complete($opt)) {
$resubmit = $opt->{step}; $resubmit = $opt->{step};
} }
$opt->{cmd} .= sprintf ("%s/runSamse.sh -r %s/meta_contigs.fa -f %s -o %s -m %s -t %d --index --sync%s\n", $opt->{cmd} .= sprintf ("%s/runSamse.sh -r %s/meta_contigs.fa -f %s -o %s -m %s -t %d --index --sync%s\n",
$opt->{binpath}, $opt->{dir_list}->[1], get_cfg_param($opt->{drap_dirs}->[0], 'alignR1')->[0], $opt->{dir_list}->[2], $opt->{binpath}, $opt->{dir_list}->[1], @{$opt->{drap_dirs}} ? get_cfg_param($opt->{drap_dirs}->[0], 'alignR1')->[0] : $opt->{R1}->[0],
$opt->{mapper}, $opt->{env}->{n_cpu}, $opt->{local} ? ' --local' : '' $opt->{dir_list}->[2], $opt->{mapper}, $opt->{env}->{n_cpu}, $opt->{local} ? ' --local' : ''
); );
$opt->{cmd} .= "touch $opt->{dir_list}->[2]/.$opt->{step}.over\n"; $opt->{cmd} .= "touch $opt->{dir_list}->[2]/.$opt->{step}.over\n";
$rmbt_analysis->get_or_create_step( 'Reference indexation', 'Index the reference contig set using bwa or STAR.' ); $rmbt_analysis->get_or_create_step( 'Reference indexation', 'Index the reference contig set using bwa or STAR.' );
...@@ -379,7 +423,7 @@ $previous_step = $opt->{steps}->[$nstep]; ...@@ -379,7 +423,7 @@ $previous_step = $opt->{steps}->[$nstep];
$opt->{step} = $opt->{steps}->[++$nstep]; $opt->{step} = $opt->{steps}->[++$nstep];
$opt->{complete} = 1; $opt->{complete} = 1;
$opt->{rmbt_ref}->{meta_rmbt} = $opt->{mapper} eq 'star' ? "$opt->{dir_list}->[2]/STAR_meta_contigs.fa" : "$opt->{dir_list}->[1]/meta_contigs.fa"; $opt->{rmbt_ref}->{meta_rmbt} = $opt->{mapper} eq 'star' ? "$opt->{dir_list}->[2]/STAR_meta_contigs.fa" : "$opt->{dir_list}->[1]/meta_contigs.fa";
unless (step_complete($opt) && exists($opt->{alignR1_list}) && scalar(@{[glob("$opt->{dir_list}->[2]/*.bam")]}) == scalar(@{$opt->{alignR1_list}})) { unless (step_complete($opt) && exists($opt->{alignR1}) && scalar(@{[glob("$opt->{dir_list}->[2]/*.bam")]}) == scalar(@{$opt->{alignR1}})) {
$opt->{complete} = 0; $opt->{complete} = 0;
if ($opt->{restart}) { if ($opt->{restart}) {
$clean_msg .= clean_rmbt_directory($opt, $opt->{dir_list}->[2]); $clean_msg .= clean_rmbt_directory($opt, $opt->{dir_list}->[2]);
...@@ -388,34 +432,63 @@ unless (step_complete($opt) && exists($opt->{alignR1_list}) && scalar(@{[glob("$ ...@@ -388,34 +432,63 @@ unless (step_complete($opt) && exists($opt->{alignR1_list}) && scalar(@{[glob("$
$resubmit = $opt->{step}; $resubmit = $opt->{step};
} }
$opt->{cmd} .= sprintf("set reference = %s\n", $opt->{mapper} eq 'star' ? "`find $opt->{dir_list}->[2] -name STAR_meta_contigs.fa_\\*`" : "$opt->{dir_list}->[1]/meta_contigs.fa"); $opt->{cmd} .= sprintf("set reference = %s\n", $opt->{mapper} eq 'star' ? "`find $opt->{dir_list}->[2] -name STAR_meta_contigs.fa_\\*`" : "$opt->{dir_list}->[1]/meta_contigs.fa");
foreach my $drap_dir (@{$opt->{drap_dirs}}) { if (@{$opt->{drap_dirs}}) {
my $paired = get_cfg_param($drap_dir, 'paired'); # Allowing to perform meta from meta
my $alignR1 = get_cfg_param($drap_dir, 'alignR1'); foreach my $dir (@{$opt->{drap_dirs}}) {
my $alignR2 = get_cfg_param($drap_dir, 'alignR2') if ($paired); if (get_cfg_param($dir, 'meta') == 1) {
for (my $i = 0 ; $i < scalar(@$alignR1) ; $i++) { if (ref(get_cfg_param($dir, 'ext_drap_dirs')) eq 'ARRAY') {
next if (exists $opt->{rmbt_status}->{$opt->{rmbt_ref}->{meta_rmbt}}->{$alignR1->[$i]} && $opt->{rmbt_status}->{$opt->{rmbt_ref}->{meta_rmbt}}->{$alignR1->[$i]}->{status} == 1); push(@{$opt->{ext_drap_dirs}}, @{get_cfg_param($dir, 'ext_drap_dirs')});
next if (exists($opt->{alignR1_list}) && find_in($alignR1->[$i], $opt->{alignR1_list})); } else {
my $fastq = $paired ? basename($alignR1->[$i]).'/'.basename($alignR2->[$i]) : basename($alignR1->[$i]); push(@{$opt->{ext_drap_dirs}}, @{get_cfg_param($dir, 'drap_dirs')});
$opt->{cmd} .= sprintf ("set bam = `%s/%s -r \$reference%s %s -o %s -m %s -t %d --sort_by_name --flagstat --bam%s%s`\n", }
$opt->{binpath}, $paired ? 'runSampe.sh' : 'runSamse.sh', $opt->{mapper} eq 'star' ? ' -g' : '', }
$paired ? "-1 $alignR1->[$i] -2 $alignR2->[$i]" : "-f $alignR1->[$i]", else {
$opt->{dir_list}->[2], $opt->{mapper}, $opt->{env}->{n_cpu}, $opt->{filter} ? ' -y' : '', $opt->{local} ? ' --local' : '' push(@{$opt->{ext_drap_dirs}}, $dir);
);
my $flagstat_metrics_file = $report_db_folder."/".$rmbt_analysis->get_or_create_step($opt->{mapper}, "Align reads using $opt->{mapper}.")->get_or_create_metrics_filename('flagstatLog', $fastq);
my $flagstat_metrics_basename = basename($flagstat_metrics_file, '.json');
if ($opt->{local}) {
$opt->{cmd} .= "$opt->{binpath}/alignmentMetrics2json.pl \$bam.flagstat > $flagstat_metrics_file\n" ;
} else {
$opt->{cmd} .= "set bam_jid = `echo \$bam | sed -e 's/.*\\.\\([0-9]*\\)\\.bam/\\1/'`\n"
."set hold_jid = `cat $opt->{dir_list}->[2]/err_log_\$bam_jid/runSam?e.\$bam_jid.jid | tr '\\n' ,`\n"
."set metric = \"$opt->{binpath}/submitJob --name meta_$flagstat_metrics_basename --stdout $opt->{outdir}/err_log --stderr $opt->{outdir}/err_log --options -hold_jid \$hold_jid --binary -- '$opt->{binpath}/alignmentMetrics2json.pl \$bam.flagstat > $flagstat_metrics_file'\"\n"
."echo \$metric\nset metric_out = `eval \$metric`\necho \$metric_out\n"
} }
if ($paired) { }
push(@{$opt->{alignR1_list}}, $alignR1->[$i]) unless (find_in($alignR1->[$i], $opt->{alignR1_list})); foreach my $drap_dir (@{$opt->{ext_drap_dirs}}) {
push(@{$opt->{alignR2_list}}, $alignR2->[$i]) unless (find_in($alignR2->[$i], $opt->{alignR2_list})); my $paired = get_cfg_param($drap_dir, 'paired');
my $alignR1 = get_cfg_param($drap_dir, 'alignR1');
my $alignR2 = get_cfg_param($drap_dir, 'alignR2') if ($paired);
for (my $i = 0 ; $i < scalar(@$alignR1) ; $i++) {
next if (exists($opt->{alignR1}) && find_in($alignR1->[$i], $opt->{alignR1}));
push(@{$opt->{alignR1}}, $alignR1->[$i]);
if ($paired) {
push(@{$opt->{alignR2}}, $alignR2->[$i]);
push(@{$opt->{transrateAlignR1}}, $alignR1->[$i]) unless (find_in($alignR1->[$i], $opt->{transrateAlignR1}));
push(@{$opt->{transrateAlignR2}}, $alignR2->[$i]) unless (find_in($alignR2->[$i], $opt->{transrateAlignR2}));
} else {
push(@{$opt->{alignR2}}, '');
}
} }
} }
} else {
@{$opt->{alignR1}} = @{$opt->{R1}};
if (@{$opt->{R2}}) {
@{$opt->{alignR2}} = @{$opt->{R2}};
@{$opt->{transrateAlignR1}} = @{$opt->{R1}};
@{$opt->{transrateAlignR2}} = @{$opt->{R2}};
}
}
for (my $i = 0 ; $i < scalar(@{$opt->{alignR1}}) ; $i++) {
next if (exists $opt->{rmbt_status}->{$opt->{rmbt_ref}->{meta_rmbt}}->{$opt->{alignR1}->[$i]} && $opt->{rmbt_status}->{$opt->{rmbt_ref}->{meta_rmbt}}->{$opt->{alignR1}->[$i]}->{status} == 1);
my $paired = $opt->{alignR2}->[$i] ? 1 : 0;
my $fastq = $paired ? basename($opt->{alignR1}->[$i]).'/'.basename($opt->{alignR2}->[$i]) : basename($opt->{alignR1}->[$i]);
$opt->{cmd} .= sprintf ("set bam = `%s/%s -r \$reference%s %s -o %s -m %s -t %d --sort_by_name --flagstat --bam%s%s`\n",
$opt->{binpath}, $paired ? 'runSampe.sh' : 'runSamse.sh', $opt->{mapper} eq 'star' ? ' -g' : '',
$paired ? "-1 $opt->{alignR1}->[$i] -2 $opt->{alignR2}->[$i]" : "-f $opt->{alignR1}->[$i]",
$opt->{dir_list}->[2], $opt->{mapper}, $opt->{env}->{n_cpu}, $opt->{filter} ? ' -y' : '', $opt->{local} ? ' --local' : ''
);
my $flagstat_metrics_file = $report_db_folder."/".$rmbt_analysis->get_or_create_step($opt->{mapper}, "Align reads using $opt->{mapper}.")->get_or_create_metrics_filename('flagstatLog', $fastq);
my $flagstat_metrics_basename = basename($flagstat_metrics_file, '.json');
if ($opt->{local}) {
$opt->{cmd} .= "$opt->{binpath}/alignmentMetrics2json.pl \$bam.flagstat > $flagstat_metrics_file\n" ;
} else {
$opt->{cmd} .= "set bam_jid = `echo \$bam | sed -e 's/.*\\.\\([0-9]*\\)\\.bam/\\1/'`\n"
."set hold_jid = `cat $opt->{dir_list}->[2]/err_log_\$bam_jid/runSam?e.\$bam_jid.jid | tr '\\n' ,`\n"
."set metric = \"$opt->{binpath}/submitJob --name meta_$flagstat_metrics_basename --stdout $opt->{outdir}/err_log --stderr $opt->{outdir}/err_log --options -hold_jid \$hold_jid --binary -- '$opt->{binpath}/alignmentMetrics2json.pl \$bam.flagstat > $flagstat_metrics_file'\"\n"
."echo \$metric\nset metric_out = `eval \$metric`\necho \$metric_out\n"
}
} }
unless ($opt->{local} || !$opt->{cmd}) { unless ($opt->{local} || !$opt->{cmd}) {
$opt->{cmd} .= "set all_jid = `cat $opt->{dir_list}->[2]/err_log_*/runSam?e.*.jid | tr '\\n' ,`\n" $opt->{cmd} .= "set all_jid = `cat $opt->{dir_list}->[2]/err_log_*/runSam?e.*.jid | tr '\\n' ,`\n"
...@@ -498,7 +571,7 @@ unless ((($opt->{no_rate} && -f "$opt->{outdir}/00-META-ASSEMBLY_COMPLETE") || - ...@@ -498,7 +571,7 @@ unless ((($opt->{no_rate} && -f "$opt->{outdir}/00-META-ASSEMBLY_COMPLETE") || -
unless ($opt->{no_rate}) { unless ($opt->{no_rate}) {
$opt->{cmd} .= "else\n" unless (-f "$opt->{outdir}/00-META-ASSEMBLY_COMPLETE"); $opt->{cmd} .= "else\n" unless (-f "$opt->{outdir}/00-META-ASSEMBLY_COMPLETE");
$opt->{cmd} .= sprintf("%stransrate --assembly=%s/transcripts_fpkm_%d.fa --left=%s --right=%s --threads=%d --output=%s/transrate %s> %s/00-META-ASSEMBLY_RATING\n", $opt->{cmd} .= sprintf("%stransrate --assembly=%s/transcripts_fpkm_%d.fa --left=%s --right=%s --threads=%d --output=%s/transrate %s> %s/00-META-ASSEMBLY_RATING\n",
$tab, $opt->{dir_list}->[3], $lowest_fpkm, join(',',@{$opt->{alignR1_list}}), join(',',@{$opt->{alignR2_list}}), $opt->{env}->{n_cpu}, $tab, $opt->{dir_list}->[3], $lowest_fpkm, join(',',@{$opt->{transrateAlignR1}}), join(',',@{$opt->{transrateAlignR2}}), $opt->{env}->{n_cpu},
$opt->{outdir}, $opt->{ref} ? "--reference=$opt->{ref} " : '', $opt->{outdir} $opt->{outdir}, $opt->{ref} ? "--reference=$opt->{ref} " : '', $opt->{outdir}
); );
$opt->{cmd} .= "${tab}$opt->{binpath}/check_assembly.pl $opt->{outdir} $opt->{step}\n"; $opt->{cmd} .= "${tab}$opt->{binpath}/check_assembly.pl $opt->{outdir} $opt->{step}\n";
...@@ -592,7 +665,10 @@ runMeta ...@@ -592,7 +665,10 @@ runMeta
runMeta \ runMeta \
--outdir OUTPUT_DIR \ --outdir OUTPUT_DIR \
--drap-dirs DRAP_DIR_1,DRAP_DIR_2[,...,DRAP_DIR_n] \ [--drap-dirs DRAP_DIR_1,DRAP_DIR_2[,...,DRAP_DIR_n]] \
[--assemblies FASTA_FILE_1,FASTA_FILE_2[,...,FASTA_FILE_n]] \
[--R1 R1_FILE[,...,R1_FILE_n]] \
[--R2 R2_FILE[,...,R2_FILE_n]] \
[--strand R|F|RF|FR] \ [--strand R|F|RF|FR] \
[--ref FASTA_REF] \ [--ref FASTA_REF] \
[--mapper bwa|STAR] \ [--mapper bwa|STAR] \
...@@ -604,7 +680,7 @@ runMeta ...@@ -604,7 +680,7 @@ runMeta
=head1 DESCRIPTION =head1 DESCRIPTION
runMeta is a workflow used to merge assemblies from several samples/tissues/development stages in one assembly without redundancy. runMeta is a workflow used to merge assemblies from several samples/tissues/development stages in one assembly without redundancy.
From two or more runDrap assemblies, it generates a meta-assembly and a report in report/DRAP_report.html. From two or more runDrap assemblies or input fasta files, it generates a meta-assembly and a report in report/DRAP_report.html.
=head1 OPTIONS =head1 OPTIONS
...@@ -622,17 +698,33 @@ the previous set of options will be used to rerun the workflow. ...@@ -622,17 +698,33 @@ the previous set of options will be used to rerun the workflow.
=item B<--drap-dirs> DRAP_DIR_1,DRAP_DIR_2[,...,DRAP_DIR_n] =item B<--drap-dirs> DRAP_DIR_1,DRAP_DIR_2[,...,DRAP_DIR_n]
The paths to the runDrap output directories of assemblies to meta-assemble, comma-separated. The paths to the runDrap output directories of assemblies to meta-assemble, comma-separated.
Mutually exclusive with the option --assemblies.
=item B<-a, --assemblies> FASTA_FILE_1,FASTA_FILE_2[,...,FASTA_FILE_n]
The assembly fasta files to meta-assemble, comma-separated.
Mutually exclusive with the option --drap-dirs.
=item B<-1, --R1> R1_FILE[,...,R1_FILE_n]
The R1 file(s) in FASTQ format to align at RMBT (Reads Mapped Back to Transcripts) step (gzipped or not), comma-separated.
Mandatory with option --assemblies. Ignored with option --drap-dirs.
=item B<-2, --R2> R2_FILE[,...,R2_FILE_n]
The R2 file(s) in FASTQ format to align at RMBT step (gzipped or not), comma-separated.
Ignored with option --drap-dirs.
=item B<--input-type> coding_transcripts|transcripts =item B<--input-type> coding_transcripts|transcripts
runDrap assembly target type: coding_transcripts or transcripts. Associated with the input-cov parameter, runDrap assembly target type: coding_transcripts or transcripts. Associated with the input-cov parameter,
indicates from which fasta files inside each runDrap output directories runMeta will be performed. indicates from which fasta files inside each runDrap output directories runMeta will be performed.
The default type is transcripts. The default type is transcripts. Ignored with option --assemblies.
=item B<--input-cov> INTEGER =item B<--input-cov> INTEGER
runDrap assembly target coverage. By default, runMeta is performed from transcripts_fpkm_$lowest.fa files runDrap assembly target coverage. By default, runMeta is performed from transcripts_fpkm_$lowest.fa files
found inside runDrap directories with $lowest equal to the lowest fpkm value. found inside runDrap directories with $lowest equal to the lowest fpkm value. Ignored with option --assemblies.
=item B<-s, --strand> FR|RF|F|R =item B<-s, --strand> FR|RF|F|R
...@@ -644,7 +736,7 @@ Fasta file with knowns proteins or transcripts to align with contigs using Exone ...@@ -644,7 +736,7 @@ Fasta file with knowns proteins or transcripts to align with contigs using Exone
=item B<-m, --mapper> bwa|star =item B<-m, --mapper> bwa|star
Mapper to perform the RMBT step (Reads Mapped Back to Transcripts). Mapper to perform the RMBT step.
Available mappers are bwa and star. Available mappers are bwa and star.
The default mapper is bwa. The default mapper is bwa.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment