Commit 55900cc1 authored by Jerome Mariette's avatar Jerome Mariette
Browse files

add some parameters to the miseq workflow

parent 9cb7ea96
......@@ -85,7 +85,8 @@ class MiSeqDiversity (NG6Workflow):
addrawfiles = self.add_component("AddRawFiles", [self.runobj, saved_files, self.args["compression"]])
# make some statistics on raw file
fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (group_prefix is not None), True, run_name+"_fastqc.tar.gz"], parent = fastqilluminafilter)
fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (group_prefix is not None),
True, run_name+"_fastqc.tar.gz"], parent = fastqilluminafilter)
# list concatenated files
if is_paired_end and (group_prefix is not None):
......@@ -105,34 +106,81 @@ class MiSeqDiversity (NG6Workflow):
fileExtension = os.path.splitext(concat_read1_files[0])[1]
if fileExtension == fileFormat:
gunzip = self.add_component("GunZipFiles",[concat_read1_files,concat_read2_files])
makecontigs = self.add_component("MothurMakeContigs", kwargs={'read1_files':gunzip.fastq_R1,'read2_files':gunzip.fastq_R2,\
'sample_name': self.args["sample_name"],'maxambig':'0','maxlength':self.args["max_contigs_length"]}, component_prefix="with_gz", parent=fastqilluminafilter)
makecontigs = self.add_component("MothurMakeContigs",
kwargs={'read1_files':gunzip.fastq_R1,
'read2_files':gunzip.fastq_R2,
'sample_name': self.args["sample_name"],
'maxambig':self.args["max_ambiguous"],
'maxlength':self.args["max_contigs_length"],
'processors':self.args["processors"]},
component_prefix="with_gz",
parent=fastqilluminafilter)
else:
makecontigs = self.add_component("MothurMakeContigs", kwargs={'read1_files':concat_read1_files,'read2_files':concat_read2_files,\
'sample_name': self.args["sample_name"],'maxambig':'0','maxlength':self.args["max_contigs_length"]}, component_prefix="without_gz", parent=fastqilluminafilter)
makecontigs = self.add_component("MothurMakeContigs",
kwargs={'read1_files':concat_read1_files,
'read2_files':concat_read2_files,
'sample_name': self.args["sample_name"],
'maxambig':self.args["max_ambiguous"],
'maxlength':self.args["max_contigs_length"],
'processors':self.args["processors"]},
component_prefix="without_gz",
parent=fastqilluminafilter)
uniqueseqs = self.add_component("MothurUniqueSeqs", [makecontigs.good_fasta_files])
countseqs = self.add_component("MothurCountSeqs", [uniqueseqs.unique_names_files,makecontigs.good_groups_files])
pcrseqs = self.add_component("MothurPcrSeqs", kwargs={'fasta_files':self.args["reference_alignment"], 'forward_primer':self.args["forward_primer"],\
'reverse_primer':self.args["reverse_primer"]})
alignseqs = self.add_component("MothurAlign", kwargs={'fasta_files':uniqueseqs.unique_fasta_files,'reference_alignment_files':pcrseqs.pcr_fasta_files,\
'count_table_files':countseqs.count_table_files, 'maxhomop':8}, parent=makecontigs)
filterseqs = self.add_component("MothurFilterSeqs",[alignseqs.good_fasta_files])
uniqueseqs_filter = self.add_component("MothurUniqueSeqs", kwargs={'fasta_files':filterseqs.filtered_fasta_files, \
'count_table_files':alignseqs.good_count_table_files},component_prefix="after_filter")
precluster = self.add_component("MothurPreCluster",kwargs={'fasta_files':uniqueseqs_filter.unique_fasta_files,\
'count_table_files':uniqueseqs_filter.unique_count_table_files})
chimerauchime = self.add_component("MothurChimeraUchime",kwargs={'fasta_files':precluster.precluster_fasta_files,\
'count_table_files':precluster.precluster_count_table_files},parent=alignseqs)
pcrseqs = self.add_component("MothurPcrSeqs",
kwargs={'fasta_files':self.args["reference_alignment"],
'forward_primer':self.args["forward_primer"],
'reverse_primer':self.args["reverse_primer"]})
alignseqs = self.add_component("MothurAlign",
kwargs={'fasta_files':uniqueseqs.unique_fasta_files,
'reference_alignment_files':pcrseqs.pcr_fasta_files,
'count_table_files':countseqs.count_table_files,
'maxhomop':self.args["max_homopolymers"],
'processors':self.args["processors"]},
parent=makecontigs)
filterseqs = self.add_component("MothurFilterSeqs",
kwargs={'align_files':alignseqs.good_fasta_files,
'processors':self.args["processors"]})
uniqueseqs_filter = self.add_component("MothurUniqueSeqs",
kwargs={'fasta_files':filterseqs.filtered_fasta_files,
'count_table_files':alignseqs.good_count_table_files},
component_prefix="after_filter")
precluster = self.add_component("MothurPreCluster",
kwargs={'fasta_files':uniqueseqs_filter.unique_fasta_files,
'count_table_files':uniqueseqs_filter.unique_count_table_files,
'processors':self.args["processors"]})
chimerauchime = self.add_component("MothurChimeraUchime",
kwargs={'fasta_files':precluster.precluster_fasta_files,
'count_table_files':precluster.precluster_count_table_files,
'processors':self.args["processors"]},
parent=alignseqs)
# classify reads with the provided taxonomy
classifyseqs = self.add_component("MothurClassifySeqs",kwargs={'fasta_files':chimerauchime.pick_fasta_files,'template_files':self.args["classify_template"],
'taxonomy_files':self.args["classify_taxonomy"],'count_table_files':chimerauchime.good_count_table_files,
'without_krona':self.args["without_krona"]}, parent=chimerauchime)
classifyseqs = self.add_component("MothurClassifySeqs",
kwargs={'fasta_files':chimerauchime.pick_fasta_files,
'template_files':self.args["classify_template"],
'taxonomy_files':self.args["classify_taxonomy"],
'count_table_files':chimerauchime.good_count_table_files,
'without_krona':self.args["without_krona"],
'processors':self.args["processors"]},
parent=chimerauchime)
# OTUs approach
distseqs = self.add_component("MothurDistSeqs", [chimerauchime.pick_fasta_files])
cluster = self.add_component("MothurCluster", kwargs={'dist_files':distseqs.dist_files,'count_table_files':chimerauchime.good_count_table_files})
distseqs = self.add_component("MothurDistSeqs",
kwargs={'fasta_files':chimerauchime.pick_fasta_files,
'processors':self.args["processors"]})
cluster = self.add_component("MothurCluster",
kwargs={'dist_files':distseqs.dist_files,
'count_table_files':chimerauchime.good_count_table_files})
otuanalysis = self.add_component("MothurOTUAnalysis", kwargs={'an_list_files':cluster.an_list_files,'count_table_files':chimerauchime.good_count_table_files,\
'taxonomy_files':classifyseqs.taxonomy_files,'label':self.args["labels"],'tree_label':self.args["labels"],
'without_krona':self.args["without_krona"]},parent=chimerauchime)
\ No newline at end of file
otuanalysis = self.add_component("MothurOTUAnalysis",
kwargs={'an_list_files':cluster.an_list_files,
'count_table_files':chimerauchime.good_count_table_files,
'taxonomy_files':classifyseqs.taxonomy_files,
'label':self.args["labels"],
'tree_label':self.args["labels"],
'without_krona':self.args["without_krona"]},
parent=chimerauchime)
......@@ -74,6 +74,21 @@ max_contigs_length.name = max_contigs_length
max_contigs_length.flag = --max-contigs-length
max_contigs_length.help = Maximum length sequences after make contigs
max_ambiguous.flag = --max-ambiguous
max_ambiguous.help = How many N allowed in a sequence
max_ambiguous.default = 0
max_ambiguous.type = int
max_homopolymers.flag = --max-homopolymers
max_ambiguous.help = Which is the maximum size of homopolymers to allow
max_ambiguous.default = 8
max_ambiguous.type = int
processors.flag = --processors
processors.help = How many cpus can be use during the process
processors.default = 1
processors.type = int
reference_alignment.name = reference_alignment
reference_alignment.flag = --reference-alignment
reference_alignment.help = Where is stored the reference alignment
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment