Commit 0fc91c0d authored by Claire Kuchly's avatar Claire Kuchly

after testing, commit good version matepair workflow

parent 323216d4
......@@ -34,16 +34,11 @@ class IlluminaMatePair (NG6Workflow):
# manage the sequences files
group_prefix = None
print self.args['sample_description']['lane_number']
print self.project.get_name()
print self.args['sample_description']['casava_directory']
print "---------------------"
if self.args['sample_description']['casava_directory'] is not None :
if self.args['sample_description']['lane_number'] is None :
raise ValueError, "lane-number must be specified with casava-directory."
mids_desc_array, self.read1_files, self.read2_files, undetermined_read1_files, undetermined_read2_files = Utils.filesFromCasava( self.args['sample_description']['casava_directory'], self.project.get_name(), self.args['sample_description']['lane_number'] )
#print mids_desc_array, self.read1_files, self.read2_files, undetermined_read1_files, undetermined_read2_files
group_prefix = (Utils.get_group_basenames(self.read1_files+self.read2_files, "read")).keys()
self.runobj.add_mids_description(mids_desc_array)
......@@ -70,9 +65,10 @@ class IlluminaMatePair (NG6Workflow):
raise ValueError, "[casava-directory and lane-number] OR [read(s)] must be specified."
is_paired_end = len(self.read2_files) > 0
print is_paired_end
if self.args["keep_reads"] != "all" :
# fastq illumina filter
# fastq illumina filter
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.read1_files+self.read2_files, self.args["keep_reads"], group_prefix, run_name+"_fastqilluminafilter.tar.gz"])
# list filtered files
......@@ -111,6 +107,8 @@ class IlluminaMatePair (NG6Workflow):
# mate_pair analyse
concatenate1 = self.add_component("ConcatenateFilesGroups", [filtered_read1_files, (Utils.get_group_basenames(self.read1_files, "read")).keys()],component_prefix="read1")
concatenate2 = self.add_component("ConcatenateFilesGroups", [filtered_read2_files, (Utils.get_group_basenames(self.read2_files, "read")).keys()],component_prefix="read2")
concatenate1.concat_files = sorted(concatenate1.concat_files)
concatenate2.concat_files = sorted(concatenate2.concat_files)
cutadapt = self.add_component("CutAdapt",[concatenate1.concat_files, concatenate2.concat_files,{"g":["CTGTCTCTT","ATACACATCT","AGATCTAT","AAGAGACAG"]},{"g":["CTGTCTCTT","ATACACATCT","AGATCTAT","AAGAGACAG"]},is_paired_end,0.1,4,20 ],parent= fastqilluminafilter)
......@@ -118,6 +116,8 @@ class IlluminaMatePair (NG6Workflow):
revcom1 = self.add_component("FastxReverseComplement",[cutadapt.output_files_R1],component_prefix="read1")
revcom2 = self.add_component("FastxReverseComplement",[cutadapt.output_files_R2],component_prefix="read2")
revcom1.output_files = sorted(revcom1.output_files)
revcom2.output_files = sorted(revcom2.output_files)
# make some statistics on filtered file
fastqc = self.add_component("FastQC", [revcom1.output_files+revcom2.output_files, (group_prefix is not None), True, run_name+"_fastqc.tar.gz"], parent = cutadapt, component_prefix="Trimmed_read")
......@@ -133,12 +133,12 @@ class IlluminaMatePair (NG6Workflow):
# align reads against indexed genome
sample_lane_prefixes = None
if group_prefix is not None :
sample_lane_prefixes = (Utils.get_group_basenames(revcom_read1_files+revcom_read2_files, "lane")).keys()
sample_lane_prefixes = (Utils.get_group_basenames(revcom1.output_files+revcom2.output_files, "lane")).keys()
#bwa = self.add_component("BWA", [indexed_ref, gunzip.fastq_R1 , gunzip.fastq_R2, sample_lane_prefixes], parent = cutadapt)
bwa = self.add_component("BWA", [indexed_ref, revcom1.output_files , revcom2.output_files, sample_lane_prefixes, 'aln', False, True], parent = cutadapt)
bwa = self.add_component("BWA", [indexed_ref, revcom1.output_files , revcom2.output_files, sample_lane_prefixes, 'aln', not self.args["delete_bam"]], parent = cutadapt)
# make some statistic on the alignement
alignmentstats = self.add_component("AlignmentStats", [bwa.bam_files, is_paired_end, False], parent = bwa)
if is_paired_end:
# process insert sizes
insertssizes = self.add_component("InsertsSizes", [bwa.bam_files, 10000, self.args["min_pct"], "LENIENT", "inserts_sizes.tar.gz"], parent = bwa)
\ No newline at end of file
insertssizes = self.add_component("InsertsSizes", [bwa.bam_files, 10000, self.args["min_pct"], "LENIENT", "inserts_sizes.tar.gz"], parent = bwa)
......@@ -100,3 +100,9 @@ min_pct.name = min_pct
min_pct.flag = --min_pct
min_pct.help = When generating the histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads.
min_pct.default = 0.01
delete_bam.name = delete bam
delete_bam.flag = --delete-bam
delete_bam.help = The BAM are not stored
delete_bam.type = bool
delete_bam.default = False
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment