Commit dc2a5cd5 authored by Gerald Salin's avatar Gerald Salin
Browse files

add some comments

parent 2578e09c
......@@ -23,7 +23,10 @@ import logging
from ng6.ng6workflow import NG6Workflow
from ng6.utils import Utils
#TODO : this pipeline only supports bowtie2 for the moment
#TODO : tested in paired-end mode with fatsq.gz files, with casava_directory and --read-1 / --read-2
#TODO : tested in paired-end mode with fatsq files (not gzipped), with --read-1 / --read-2
#TODO : not tested in single-end mode
class Methylseq (NG6Workflow):
def process(self):
......@@ -35,8 +38,7 @@ class Methylseq (NG6Workflow):
# manage the sequences files
group_prefix = None
if self.args['casava_directory'] is None and self.args['read_1'] is None :
#TODO : ou sont listes les types d'erreur utlisables?
logging.getLogger("Methylseq").error("You must specify use either --casava_directory from which the pipeline can fetch fastq.gz files or --read-1 argument")
logging.getLogger("Methylseq").error("You must specify either --casava_directory from which the pipeline can fetch fastq.gz files or --read-1 argument")
raise ValueError, "You must specify use either --casava_directory from which the pipeline can fecth fastq.gz files or --read-1 argument "
elif self.args['casava_directory'] is not None :
logging.getLogger("Methylseq").debug("self.args['casava_directory'] = " + self.args['casava_directory'])
......@@ -97,15 +99,11 @@ class Methylseq (NG6Workflow):
reads_prefixes = (Utils.get_group_basenames(saved_files, "read")).keys()
concatenatefastq = self.add_component("ConcatenateFilesGroups", [saved_files, reads_prefixes])
saved_files = concatenatefastq.concat_files
logging.getLogger("Methylseq").debug("Before AddRawFiles")
addrawfiles = self.add_component("AddRawFiles", [self.runobj, saved_files, self.args["compression"]])
logging.getLogger("Methylseq").debug("Before FastQC")
# make some statistics on raw file
# make some statistics on raw files
fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (group_prefix is not None), True, run_name+"_fastqc.tar.gz"], parent = fastqilluminafilter)
logging.getLogger("Methylseq").debug("Before Utils.split_pair")
# spliced alignment of reads against indexed genome
if is_paired_end and (group_prefix is not None):
# split read 1 and read 2 from filtered files list
[concat_read1_files, concat_read2_files] = Utils.split_pair(concatenatefastq.concat_files, (group_prefix is not None))
......@@ -117,54 +115,65 @@ class Methylseq (NG6Workflow):
concat_read2_files = filtered_read2_files
concat_read1_files = sorted(concat_read1_files)
concat_read2_files = sorted(concat_read2_files)
#Trimming
#cleaning raw files (quality and adapter trimming)
logging.getLogger("Methylseq").debug("Before TrimGalore")
trim_galore = self.add_component("TrimGalore", [ concat_read1_files, concat_read2_files, self.args["non_directional"]], parent = fastqilluminafilter)
# make some statistics on cleaned files
fastqcCleaned = self.add_component("FastQC", [trim_galore.read1_cleaned + trim_galore.read2_cleaned, (group_prefix is not None), True, run_name+"_trim_galore_fastqc.tar.gz"], component_prefix="trim_galore", parent = trim_galore)
#TODO : pourquoi ce n'est pas utilise dans les lignes de commande plus bas?
# type="single"
# if is_paired_end :
# type="paired"
# spliced alignment of reads against indexed genome
logging.getLogger("Methylseq").debug("Before alignment")
if self.args["reference_genome"]:
logging.getLogger("Methylseq").debug("Reference genome = " + self.args["reference_genome"])
indexed_ref = self.args["reference_genome"]
if not os.path.exists( os.path.join(os.path.dirname(indexed_ref),"Bisulfite_Genome" )):
bismark_genome_preparation = self.add_component("BismarkGenomePreparation", [ self.args["reference_genome"], self.args["bowtie2"] ])
indexed_ref = bismark_genome_preparation.databank_fasta
if is_paired_end :
bismarkReference = self.add_component("Bismark", [indexed_ref,trim_galore.read1_cleaned, trim_galore.read2_cleaned,self.args["non_directional"],self.args["bowtie2"],self.args["alignment_mismatch"]], component_prefix="paired",parent = trim_galore)
insertssizesReference = self.add_component("InsertsSizes", [bismarkReference.output_sorted_bam, self.args["histogram_width"], self.args["min_pct"], "LENIENT", "inserts_sizes.tar.gz"], component_prefix="paired", parent = bismarkReference)
bismarkReference_extract = self.add_component("BismarkMethylationExtractor", [indexed_ref, bismarkReference.output_sorted_bam, "paired" , self.args["methylation_extractor_no_overlap"]], component_prefix="paired",parent = bismarkReference)
# make some statistic on the alignement (disabled due to errors during parsing)
alignmentstats = self.add_component("AlignmentStats", [bismarkReference.output_sorted_bam, is_paired_end, False], parent = bismarkReference, component_prefix="PEreference")
else:
bismarkReference = self.add_component("Bismark", [indexed_ref,trim_galore.read1_cleaned, None ,self.args["non_directional"],self.args["bowtie2"],self.args["alignment_mismatch"]],parent = trim_galore)
bismarkReference_extract = self.add_component("BismarkMethylationExtractor", [indexed_ref, bismarkReference.output_sorted_bam, "single" , self.args["methylation_extractor_no_overlap"]], parent = bismarkReference)
# make some statistic on the alignement (disabled due to errors during parsing)
alignmentstats = self.add_component("AlignmentStats", [bismarkReference.output_sorted_bam, is_paired_end, False], parent = bismarkReference, component_prefix="SEreference")
logging.getLogger("Methylseq").debug("Reference genome = " + self.args["reference_genome"])
#Alignement against the reference genome
indexed_ref = self.args["reference_genome"]
# index the reference genome if not already indexed
if not os.path.exists( os.path.join(os.path.dirname(indexed_ref),"Bisulfite_Genome" )):
bismark_genome_preparation = self.add_component("BismarkGenomePreparation", [ self.args["reference_genome"], self.args["bowtie2"] ])
indexed_ref = bismark_genome_preparation.databank_fasta
if is_paired_end :
# align the PE reads against the reference genome with bismark
bismarkReference = self.add_component("Bismark", [indexed_ref,trim_galore.read1_cleaned, trim_galore.read2_cleaned,self.args["non_directional"],self.args["bowtie2"],self.args["alignment_mismatch"]], component_prefix="paired",parent = trim_galore)
# process insert sizes of the aligned reads
insertssizesReference = self.add_component("InsertsSizes", [bismarkReference.output_sorted_bam, self.args["histogram_width"], self.args["min_pct"], "LENIENT", "inserts_sizes.tar.gz"], component_prefix="paired", parent = bismarkReference)
# compute the methylation extraction from the alignement
bismarkReference_extract = self.add_component("BismarkMethylationExtractor", [indexed_ref, bismarkReference.output_sorted_bam, "paired" , self.args["methylation_extractor_no_overlap"]], component_prefix="paired",parent = bismarkReference)
# make some statistic on the alignement
alignmentstats = self.add_component("AlignmentStats", [bismarkReference.output_sorted_bam, is_paired_end, False], parent = bismarkReference, component_prefix="PEreference")
else:
# align the SE reads against the reference genome with bismark
bismarkReference = self.add_component("Bismark", [indexed_ref,trim_galore.read1_cleaned, None ,self.args["non_directional"],self.args["bowtie2"],self.args["alignment_mismatch"]],parent = trim_galore)
# compute the methylation extraction from the alignement
bismarkReference_extract = self.add_component("BismarkMethylationExtractor", [indexed_ref, bismarkReference.output_sorted_bam, "single" , self.args["methylation_extractor_no_overlap"]], parent = bismarkReference)
# make some statistic on the alignement
alignmentstats = self.add_component("AlignmentStats", [bismarkReference.output_sorted_bam, is_paired_end, False], parent = bismarkReference, component_prefix="SEreference")
#Alignement against the control sequence if specified
if self.args["control_reference"]:
logging.getLogger("Methylseq").debug("Controle sequence = " + self.args["control_reference"])
indexed_control = self.args["control_reference"]
# index the control sequence if not already indexed
if not os.path.exists( os.path.join(os.path.dirname(indexed_control),"Bisulfite_Genome" )):
bismark_genome_preparation_control = self.add_component("BismarkGenomePreparation", [ self.args["control_reference"], self.args["bowtie2"] ], component_prefix="control")
bismark_genome_preparation_control.description = "test modif description analyse"
indexed_control = bismark_genome_preparation_control.databank_fasta
if is_paired_end :
# align the PE reads against the control sequence with bismark
bismarkControl = self.add_component("Bismark", [indexed_control,trim_galore.read1_cleaned, trim_galore.read2_cleaned,self.args["non_directional"],self.args["bowtie2"],self.args["alignment_mismatch"]], component_prefix="control_paired", parent = trim_galore)
# process insert sizes of the aligned reads
insertssizesControl = self.add_component("InsertsSizes", [bismarkControl.output_sorted_bam, self.args["histogram_width"], self.args["min_pct"], "LENIENT", "inserts_sizes.tar.gz"], component_prefix="control_paired", parent = bismarkControl)
# compute the methylation extraction from the alignement
bismarkControl_extract = self.add_component("BismarkMethylationExtractor", [indexed_control, bismarkControl.output_sorted_bam, "paired" , self.args["methylation_extractor_no_overlap"]], component_prefix="control_paired", parent = bismarkControl)
# make some statistic on the alignement (disabled due to errors during parsing)
# make some statistic on the alignement
alignmentstats = self.add_component("AlignmentStats", [bismarkControl.output_sorted_bam, is_paired_end, False], parent = bismarkControl, component_prefix="PEcontrol")
else:
# align the SE reads against the control sequence with bismark
bismarkControl = self.add_component("Bismark", [indexed_control,trim_galore.read1_cleaned, None ,self.args["non_directional"],self.args["bowtie2"],self.args["alignment_mismatch"]], component_prefix="control", parent = trim_galore)
# compute the methylation extraction from the alignement
bismarkControl_extract = self.add_component("BismarkMethylationExtractor", [indexed_control, bismarkControl.output_sorted_bam, "single" , self.args["methylation_extractor_no_overlap"]], component_prefix="control", parent = bismarkControl)
# make some statistic on the alignement (disabled due to errors during parsing)
# make some statistic on the alignement
alignmentstats = self.add_component("AlignmentStats", [bismarkControl.output_sorted_bam, is_paired_end, False], parent = bismarkControl, component_prefix="SEcontrol")
logging.getLogger("Methylseq").debug("Leaving Methylseq pipeline")
\ No newline at end of file
......@@ -54,14 +54,11 @@ class BismarkGenomePreparation (Component):
self.databank_fasta = OutputFile(os.path.join(self.output_directory,os.path.basename(self.reference_genome)), Formats.FASTA)
self.databank_directory = self.output_directory
self.args = ""
#TODO NB Processor car 2 si directionnal 4 sinon donc il faut les reserver
if bowtie2:
#self.args = " --bowtie2 --path_to_bowtie "+os.path.dirname(self.get_exec_path("bwt2_index"))
self.args = " --bowtie2"
self.stderr = os.path.join(self.output_directory, 'bismark_genome_preparation.stderr')
self.stdout = os.path.join(self.output_directory, "bismark_genome_preparation.stdout")
def process(self):
bismark2index = PythonFunction(bismark_index, cmd_format="{EXE} {IN} {OUT} {ARG}")
bismark2index(inputs=[self.reference_genome,self.databank_directory], outputs=[self.databank_fasta, self.stdout, self.stderr], arguments=[self.get_exec_path("bismark_genome_preparation"), self.args ])
\ No newline at end of file
......@@ -60,7 +60,7 @@ class BismarkMethylationExtractor (Analysis):
def define_analysis(self):
self.name = "Methylation Extraction"
self.description = "methylation extraction from Bismark output"
self.description = "Methylation extraction from Bismark output"
self.software = "bismark_methylation_extractor"
self.options = self.args
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment