Commit 5fc4281b authored by Celine Noirot's avatar Celine Noirot
Browse files

DEbug :

- mv after bismark_extraction
- processing step
- option of rmdup (-s for single and not for paired)
parent 04473b50
......@@ -23,9 +23,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<p>Following step were performed :</p>
<ul>
{foreach $params as $step}
{if $step == "samtools rmdup -s"}
{if $step == "samtools rmdup"}
<li class="parameter">{$step}: Clean paired duplicate.</li>
{$analyse.type="paired"}
{else if $step == "samtools rmdup -s"}
<li class="parameter">{$step}: Clean single duplicate.</li>
{else if $step == "samtools flagstat"}
<li class="parameter">{$step}: Count mapped reads.</li>
{else if $step == "samtools sort"}
......
......@@ -42,7 +42,7 @@ class Methylseq (CasavaNG6Workflow):
self.add_parameter("min_pct", "When generating the histogram, discard any data categories (out of FR, TANDEM, RF) that have"+
" fewer than this percentage of overall reads", type=float, default = 0.01, group="INSERTSIZE section")
# Bisulfite parameters
self.add_parameter("rrbs", "Workflow for RRBS data (digested with MspI) instead of WGBS", type="bool", default=False)
self.add_parameter("rrbs", "Workflow for RRBS data : clean data (digested with MspI) and do not perform RMdup", type="bool", default=False)
self.add_parameter("non_directional", "To set if the library is non directional (Default : False)", type="bool", default=False)
self.add_parameter("alignment_mismatch", "Sets the number of mismatches to allowed in a seed alignment during multiseed alignment ", type="int", default=1)
......@@ -108,21 +108,28 @@ class Methylseq (CasavaNG6Workflow):
if self.is_paired_end() :
# align the PE reads against the reference genome with bismark
bismarkReference = self.add_component("Bismark", [indexed_ref,trim_galore.output_files_R1, trim_galore.output_files_R2, self.samples_names,self.non_directional,self.bowtie1,self.alignment_mismatch, max_insert_size, "Library reads alignment"], component_prefix="paired",parent = trim_galore)
rmDuplicate = self.add_component("RemoveDuplicate", [bismarkReference.output_bam,self.is_paired_end()], component_prefix="paired",parent = bismarkReference)
bam_for_next_step = bismarkReference.output_bam
parent_for_next_step = bismarkReference
if not self.rrbs :
rmDuplicate = self.add_component("RemoveDuplicate", [bismarkReference.output_bam,self.is_paired_end()], component_prefix="paired",parent = bismarkReference)
bam_for_next_step = rmDuplicate.output
parent_for_next_step = rmDuplicate
# process insert sizes of the aligned reads
insertssizesReference = self.add_component("InsertsSizes", [rmDuplicate.output, insert_size_histogram_width, insert_size_min_percentage, "LENIENT", "inserts_sizes.tar.gz"], component_prefix="paired", parent = rmDuplicate)
insertssizesReference = self.add_component("InsertsSizes", [bam_for_next_step, insert_size_histogram_width, insert_size_min_percentage, "LENIENT", "inserts_sizes.tar.gz"], component_prefix="paired", parent = parent_for_next_step)
# compute the methylation extraction from the alignement
bismarkReference_extract = self.add_component("BismarkMethylationExtractor", [indexed_ref, bismarkReference.output_bam, "paired" , self.methylation_extractor_no_overlap, self.large_genome], component_prefix="paired",parent = rmDuplicate)
bismarkReference_extract = self.add_component("BismarkMethylationExtractor", [indexed_ref, bam_for_next_step, "paired" , self.methylation_extractor_no_overlap, self.large_genome], component_prefix="paired",parent = parent_for_next_step)
else:
# align the SE reads against the reference genome with bismark
bismarkReference = self.add_component("Bismark", [indexed_ref,trim_galore.output_files_R1, None, self.samples_names, self.non_directional,self.bowtie1,self.alignment_mismatch,max_insert_size,"Library reads Alignment"],parent = trim_galore)
rmDuplicate = self.add_component("RemoveDuplicate", [bismarkReference.output_bam,self.is_paired_end()], component_prefix="paired",parent = bismarkReference)
bam_for_next_step = bismarkReference.output_bam
parent_for_next_step = bismarkReference
if not self.rrbs :
rmDuplicate = self.add_component("RemoveDuplicate", [bismarkReference.output_bam,self.is_paired_end()], component_prefix="paired",parent = bismarkReference)
bam_for_next_step = rmDuplicate.output
parent_for_next_step = rmDuplicate
# compute the methylation extraction from the alignement
bismarkReference_extract = self.add_component("BismarkMethylationExtractor", [indexed_ref, rmDuplicate.output, "single" , self.methylation_extractor_no_overlap, self.large_genome], parent = rmDuplicate)
bismarkReference_extract = self.add_component("BismarkMethylationExtractor", [indexed_ref, bam_for_next_step, "single" , self.methylation_extractor_no_overlap, self.large_genome], parent = parent_for_next_step)
#Alignement against the control sequence if specified
if self.control_genome:
......@@ -134,18 +141,28 @@ class Methylseq (CasavaNG6Workflow):
if self.is_paired_end() :
# align the PE reads against the control sequence with bismark
bismarkControl = self.add_component("Bismark", [indexed_control,trim_galore.output_files_R1, trim_galore.output_files_R2, self.samples_names, self.non_directional,self.bowtie1,self.alignment_mismatch, max_insert_size, "Control reads Alignment"], component_prefix="control_paired", parent = trim_galore)
#remove duplicate
rmDuplicate = self.add_component("RemoveDuplicate", [bismarkControl.output_bam,self.is_paired_end()], component_prefix="control_paired",parent = bismarkControl)
bam_for_next_step = bismarkControl.output_bam
parent_for_next_step = bismarkControl
if not self.rrbs :
#remove duplicate
rmDuplicate = self.add_component("RemoveDuplicate", [bismarkControl.output_bam,self.is_paired_end()], component_prefix="control_paired",parent = bismarkControl)
bam_for_next_step = rmDuplicate.output
parent_for_next_step = rmDuplicate
# process insert sizes of the aligned reads
insertssizesControl = self.add_component("InsertsSizes", [rmDuplicate.output, insert_size_histogram_width, insert_size_min_percentage, "LENIENT", "inserts_sizes.tar.gz"], component_prefix="control_paired", parent = rmDuplicate)
insertssizesControl = self.add_component("InsertsSizes", [bam_for_next_step, insert_size_histogram_width, insert_size_min_percentage, "LENIENT", "inserts_sizes.tar.gz"], component_prefix="control_paired", parent = parent_for_next_step)
# compute the methylation extraction from the alignement
bismarkControl_extract = self.add_component("BismarkMethylationExtractor", [indexed_control, rmDuplicate.output, "paired" , self.methylation_extractor_no_overlap], component_prefix="control_paired", parent = rmDuplicate)
bismarkControl_extract = self.add_component("BismarkMethylationExtractor", [indexed_control, bam_for_next_step, "paired" , self.methylation_extractor_no_overlap], component_prefix="control_paired", parent = parent_for_next_step)
else:
# align the SE reads against the control sequence with bismark
bismarkControl = self.add_component("Bismark", [indexed_control,trim_galore.output_files_R1, None ,self.samples_names, self.non_directional,self.bowtie1,self.alignment_mismatch,max_insert_size,"Control reads Alignment"], component_prefix="control", parent = trim_galore)
#remove duplicate
rmDuplicate = self.add_component("RemoveDuplicate", [bismarkControl.output_bam,self.is_paired_end()], component_prefix="control",parent = bismarkControl)
bam_for_next_step = bismarkControl.output_bam
parent_for_next_step = bismarkControl
if not self.rrbs :
#remove duplicate
rmDuplicate = self.add_component("RemoveDuplicate", [bismarkControl.output_bam,self.is_paired_end()], component_prefix="control",parent = bismarkControl)
bam_for_next_step = rmDuplicate.output
parent_for_next_step = rmDuplicate
# compute the methylation extraction from the alignement
bismarkControl_extract = self.add_component("BismarkMethylationExtractor", [indexed_control, rmDuplicate.output, "single" , self.methylation_extractor_no_overlap], component_prefix="control", parent = bismarkControl)
bismarkControl_extract = self.add_component("BismarkMethylationExtractor", [indexed_control, bam_for_next_step, "single" , self.methylation_extractor_no_overlap], component_prefix="control", parent = parent_for_next_step)
......@@ -116,11 +116,11 @@ class Bismark (Analysis):
def process(self):
if self.is_paired:
bismark_fn = ShellFunction(self.get_exec_path("bismark") + " " + self.args + " -o " + self.output_directory + " --temp_dir " + self.output_directory + \
" " + self.reference_directory + " -1 $1 -2 $2 2> $4 ; ln -s $3 $6" , cmd_format='{EXE} {IN} {OUT}')
" " + self.reference_directory + " -1 $1 -2 $2 2> $4 ; "+ self.get_exec_path("samtools") + " sort $3 $6 2>> $4; mv $6.bam $6 2>> $4;" , cmd_format='{EXE} {IN} {OUT}')
bismark_map = MultiMap(bismark_fn, inputs=[self.input_files_R1, self.input_files_R2], outputs=[self.output_files,self.stderrs,self.output_report,self.output_bam], includes=self.reference_genome)
else:
bismark_fn = ShellFunction(self.get_exec_path("bismark") + " " + self.args + " -o " + self.output_directory + " --temp_dir " + self.output_directory + \
" " + self.reference_directory + " $1 2> $3 ; ln -s $2 $5" , cmd_format='{EXE} {IN} {OUT}')
" " + self.reference_directory + " $1 2> $3 ; "+ self.get_exec_path("samtools") + " sort $2 $5 2>> $3; mv $5.bam $5 2>> $3;" , cmd_format='{EXE} {IN} {OUT}')
bismark_map = MultiMap(bismark_fn, self.input_files_R1, outputs=[self.output_files,self.stderrs,self.output_report,self.output_bam], includes=self.reference_genome)
def post_process(self):
......
......@@ -57,7 +57,7 @@ class BismarkMethylationExtractor (Analysis):
self.options += " --scaffolds"
def process(self):
bismark_extract = ShellFunction("mkdir $2; cd $2; " + self.get_exec_path("bismark_methylation_extractor") + " $1 " + self.options + \
" --genome_folder " + os.path.dirname(self.reference_genome) + " -o $2 2>> $3; " + self.get_exec_path("samtools") + " sort $1 $1 2>> $3; rm $1; mv $1.bam $1 2>> $3;", cmd_format='{EXE} {IN} {OUT}')
" --genome_folder " + os.path.dirname(self.reference_genome) + " -o $2 2>> $3;", cmd_format='{EXE} {IN} {OUT}')
bismark_map = MultiMap(bismark_extract, inputs=self.bams, outputs=[self.output_directories,self.stderrs], includes=self.reference_genome)
......
......@@ -22,7 +22,7 @@ import os
class RemoveDuplicate (Analysis):
def define_parameters(self, bams, is_paired=True, mem="5G", cpu=2):
self.add_input_file_list( "bam", "bam files.", default=bams, required=True )
self.add_input_file_list( "bam", "SORTED bam files.", default=bams, required=True )
names=[]
self.temp_sorted1=[]
self.temp_sorted2=[]
......@@ -41,8 +41,7 @@ class RemoveDuplicate (Analysis):
self.add_output_file_list("flagstat_rmdup", "Flagstat result after rmdup", pattern='{basename_woext}.rmdup_flagstat', items=self.bam)
self.add_output_file_list("flagstat_finally", "Flagstat result after removing singleton", pattern='{basename_woext}.finally_flagstat', items=self.bam)
self.add_output_file_list("output", "The bam with removed duplicates and singleton (if paired)", pattern='{basename_woext}_clean.bam', items=self.bam)
self.add_output_file_list("sort1_stderr", "The error trace file", pattern='{basename_woext}_sort1.stderr', items=self.bam)
self.add_output_file_list("output", "The bam with removed duplicates (and singleton if paired)", pattern='{basename_woext}_clean.bam', items=self.bam)
self.add_output_file_list("rmdup_stderr", "The error trace file", pattern='{basename_woext}_rmdup.stderr', items=self.bam)
self.add_output_file_list("rmsinglet_stderr","The error trace file", pattern='{basename_woext}_rmsinglet.stderr', items=self.bam)
......@@ -55,9 +54,9 @@ class RemoveDuplicate (Analysis):
self.options = "samtools sort|samtools flagstat|samtools rmdup"
if self.is_paired:
self.description += " and remove singleton reads"
self.options += " -s|samtools flagstat|remove singleton|samtools flagstat"
self.options += "|samtools flagstat|remove singleton|samtools flagstat"
else :
self.options +="|samtools flagstat"
self.options +=" -s |samtools flagstat"
def get_version(self):
cmd = [self.get_exec_path("samtools")]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
......@@ -66,15 +65,13 @@ class RemoveDuplicate (Analysis):
def process(self):
self.add_shell_execution(self.get_exec_path("samtools") + " sort -m "+self.mem+" -@"+str(self.cpu)+" $1 $2 ; mv $2.bam $2 2>> $3 ", cmd_format='{EXE} {IN} {OUT}',
inputs=[self.bam], outputs=[self.temp_sorted1,self.sort1_stderr], map=True)
self.add_shell_execution(self.get_exec_path("samtools") + " flagstat $1 > $2", cmd_format='{EXE} {IN} {OUT}',
inputs=[self.temp_sorted1], outputs=[self.flagstat_init], map=True)
inputs=[self.bam], outputs=[self.flagstat_init], map=True)
if self.is_paired :
#samtools rmdup
self.add_shell_execution(self.get_exec_path("samtools") + " rmdup -s $1 $2 2>> $3", cmd_format='{EXE} {IN} {OUT}',
inputs=[self.temp_sorted1], outputs=[self.temp_rmdup,self.rmdup_stderr], map=True)
self.add_shell_execution(self.get_exec_path("samtools_old") + " rmdup $1 $2 2>> $3", cmd_format='{EXE} {IN} {OUT}',
inputs=[self.bam], outputs=[self.temp_rmdup,self.rmdup_stderr], map=True)
self.add_shell_execution(self.get_exec_path("samtools") + " flagstat $1 > $2", cmd_format='{EXE} {IN} {OUT}',
inputs=[self.temp_rmdup], outputs=[self.flagstat_rmdup], map=True)
#remove singleton after rmdup
......@@ -89,7 +86,7 @@ class RemoveDuplicate (Analysis):
else :
#samtools rmdup
self.add_shell_execution(self.get_exec_path("samtools") + " rmdup $1 $2 2>> $3", cmd_format='{EXE} {IN} {OUT}',
self.add_shell_execution(self.get_exec_path("samtools_old") + " rmdup -s $1 $2 2>> $3", cmd_format='{EXE} {IN} {OUT}',
inputs=[self.temp_sorted1], outputs=[self.output,self.rmdup_stderr], map=True)
self.add_shell_execution(self.get_exec_path("samtools") + " flagstat $1 > $2", cmd_format='{EXE} {IN} {OUT}',
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment