Commit 639242c8 authored by Claire Kuchly's avatar Claire Kuchly
Browse files

add data and conf files

parameters change : type multiple and add group 
parent a943b87b
......@@ -33,41 +33,53 @@ class IlluminaDiversityQC (NG6Workflow):
# manage the sequences files
group_prefix = None
if self.args['casava_directory'] is not None :
if self.args['lane_number'] is None :
if self.args['sample_description']['casava_directory'] is not None :
if self.args['sample_description']['lane_number'] is None :
raise ValueError, "lane-number must be specified with casava-directory."
mids_desc_array, self.read1_files, self.read2_files, undetermined_read1_files, undetermined_read2_files = Utils.filesFromCasava( self.args['casava_directory'], self.project.get_name(), self.args['lane_number'] )
mids_desc_array, self.read1_files, self.read2_files, undetermined_read1_files, undetermined_read2_files = Utils.filesFromCasava( self.args['sample_description']['casava_directory'], self.project.get_name(), self.args['sample_description']['lane_number'] )
print mids_desc_array, self.read1_files, self.read2_files, undetermined_read1_files, undetermined_read2_files
group_prefix = (Utils.get_group_basenames(self.read1_files+self.read2_files, "read")).keys()
self.runobj.add_mids_description(mids_desc_array)
# statistics about demultiplexing
if len(undetermined_read1_files) > 0 :
demultiplex_stats = self.add_component("DemultiplexStats", [self.read1_files, undetermined_read1_files])
elif (self.args['read_1'] is not None) and (len(self.args['read_1']) > 0) :
elif (self.args['files_read'] is not None) and (self.args['files_read'] > 0) :
self.read1_files = []
self.read2_files = []
for file in self.args["read_1"]:
if os.path.isfile(file):
self.read1_files.append(file)
else:
raise IOError, file + " file does not exists."
if self.args["read_2"]:
for file in self.args["read_2"]:
if os.path.isfile(file):
self.read2_files.append(file)
else:
raise IOError, file + " file does not exists."
else:
raise IOError, "read-2 must be specified with read-1."
for pair in self.args['files_read']:
R1_file = pair['read_1']
R2_file = pair['read_2']
if os.path.isfile(R1_file):
self.read1_files.append(R1_file)
else:
raise IOError, R1_file + " file does not exists."
if R2_file is not None:
if os.path.isfile(R2_file):
self.read2_files.append(R2_file)
else:
raise IOError, R2_file + " file does not exists."
else:
raise ValueError, "[casava-directory and lane-number] OR [read-1 and read-2] must be specified."
raise ValueError, "[casava-directory and lane-number] OR [read(s)] must be specified."
is_paired_end = len(self.read2_files) > 0
if self.args["keep_reads"] != "all" :
# fastq illumina filter
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.read1_files+self.read2_files, self.args["keep_reads"], group_prefix, run_name+"_fastqilluminafilter.tar.gz"])
# list filtered files
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (group_prefix is not None))
if is_paired_end :
# split read 1 and read 2 from filtered files list
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (group_prefix is not None))
else:
filtered_read1_files = fastqilluminafilter.fastq_files_filtered
filtered_read2_files = []
filtered_read1_files = sorted(filtered_read1_files)
filtered_read2_files = sorted(filtered_read2_files)
else:
......@@ -99,4 +111,4 @@ class IlluminaDiversityQC (NG6Workflow):
if self.args["assignation_databank"] is not None :
# subset assignation
subset_assignation = self.add_component("SubsetAssignation", [join_pairs.extended_frags, self.args["assignation_databank"]], parent=join_pairs)
\ No newline at end of file
subset_assignation = self.add_component("SubsetAssignation", [join_pairs.extended_frags, self.args["assignation_databank"]], parent=join_pairs)
......@@ -31,27 +31,36 @@ description = Illumina diversity quality check pipeline. This pipeline needs to
# .action [store]: the basic type of action to be taken (store|append)
#
[parameters]
casava_directory.name = casava_directory
casava_directory.flag = --casava-directory
casava_directory.help = Where are stored casava results (see also lane-number)
casava_directory.required = True
casava_directory.exclude = read_1
sample_description.name = description
sample_description.flag = --casava-description
sample_description.help = Where are stored casava results and the lane processed
sample_description.type = multiple
sample_description.required = True
sample_description.exclude = files_read
sample_description.casava_directory.name = casava directory
sample_description.casava_directory.flag = casava-directory
sample_description.casava_directory.help = Where are stored casava results
sample_description.casava_directory.type = casava_dir
sample_description.lane_number.name = lane number
sample_description.lane_number.flag = lane-number
sample_description.lane_number.help = Which lane should be processed
lane_number.name = lane_number
lane_number.flag = --lane-number
lane_number.help = Which lane should be processed (mandatory with casava-directory)
files_read.name = reads
files_read.flag = --reads
files_read.help = Define which read1 files and/or read2 files should be used
files_read.type = multiple
files_read.action = append
files_read.exclude = sample_description
files_read.read_1.name = read 1
files_read.read_1.flag = R1
files_read.read_1.help = Which read1 files should be used
files_read.read_1.required = True
files_read.read_1.type = localfile
files_read.read_2.name = read 2
files_read.read_2.flag = R2
files_read.read_2.help = Which read2 files should be used (if single end, leave empty)
files_read.read_2.type = localfile
read_1.name = read_1
read_1.flag = --read-1
read_1.help = Which read1 files should be used
read_1.action = append
read_1.required = True
read_1.exclude = casava_directory
read_2.name = read_2
read_2.flag = --read-2
read_2.help = Which read2 files should be used (if single end, leave empty)
read_2.action = append
compression.name = compression
compression.flag = --compression
......@@ -62,15 +71,18 @@ compression.choices = none|gz|bz2
reference_genome.name = reference genome
reference_genome.flag = --reference-genome
reference_genome.help = Which genome should the read being align on
reference_genome.type = localfile
databank.name = databank
databank.flag = --databank
databank.help = Which databank should be used to seek contamination (as to be phiX databank indexed for bwa)
databank.action = append
databank.type = localfile
assignation_databank.name = assignation databank
assignation_databank.flag = --assignation-databank
assignation_databank.help = Blast databank to classify a subset of sequences
assignation_databank.type = localfile
keep_reads.name = keep_reads
keep_reads.flag = --keep
......@@ -78,31 +90,25 @@ keep_reads.help = Keep reads which pass the Illumina filters or keep reads which
keep_reads.default = pass_illumina_filters
keep_reads.choices = pass_illumina_filters|not_pass_illumina_filters|all
# Flash
# Join pairs - Flash
mismatch_ratio.group = JOIN section
min_overlap.group = JOIN section
max_overlap.group = JOIN section
# Parameter mismatch_ratio
mismatch_ratio.name = Mismatch ratio
mismatch_ratio.flag = --mismatch-ratio
mismatch_ratio.help = Maximum allowed ratio between the number of mismatched base pairs and the overlap length.
mismatch_ratio.type = float
mismatch_ratio.default = 0.1
# Parameter min_overlap
min_overlap.name = Minimum overlap
min_overlap.flag = --min-overlap
min_overlap.help = The minimum required overlap length between two reads to provide a confident overlap.
min_overlap.type = int
min_overlap.default = 20
# Parameter max_overlap
max_overlap.name = Maximum overlap
max_overlap.flag = --max-overlap
max_overlap.help = Maximum overlap length expected in approximately 90 percent of read pairs.
max_overlap.type = int
max_overlap.default = 55
avg_reads_length.name = avg_reads_length
avg_reads_length.flag = --avg-reads-length
avg_reads_length.help = Average read length
avg_reads_length.default = 250
avg_fragment_length.name = avg_fragment_length
avg_fragment_length.flag = --avg-fragment-length
avg_fragment_length.help = Average fragment length (after join)
avg_fragment_length.default = 460
\ No newline at end of file
max_overlap.default = 55
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment