Commit f3d0b6c2 authored by Penom Nom's avatar Penom Nom
Browse files

- Change workflow parameters.

- Change output name for GeneOTUStat.
parent d55c5f77
......@@ -28,20 +28,20 @@ class GeneDiversity (NG6Workflow):
addrawfiles = self.add_component( "AddRawFiles", [self.runobj, self.args["read_1"]+self.args["read_2"], "none"] )
# Trim sequences
trim_R1 = self.add_component("Trimmer", [self.args['read_1'], 1, self.args["trim"]["read_1"]], component_prefix="R1")
trim_R2 = self.add_component("Trimmer", [self.args['read_2'], 1, self.args["trim"]["read_2"]], component_prefix="R2")
trim_R1 = self.add_component("Trimmer", [self.args['read_1'], 1, self.args["trim_read_1"]], component_prefix="R1")
trim_R2 = self.add_component("Trimmer", [self.args['read_2'], 1, self.args["trim_read_2"]], component_prefix="R2")
# Make some statistics on raw file
fastqc = self.add_component("FastQC", [trim_R1.output_files + trim_R2.output_files, False, True])
# Merge overlapping pair
join_pairs = self.add_component("Flash", [trim_R1.output_files, trim_R2.output_files, self.args["join_pair"]["mismatch_ratio"], self.args["join_pair"]["min_overlap"], self.args["join_pair"]["max_overlap"]])
join_pairs = self.add_component("Flash", [trim_R1.output_files, trim_R2.output_files, self.args["mismatch_ratio"], self.args["min_overlap"], self.args["max_overlap"]])
# Fastq to fasta
fastq2fasta = self.add_component("Fastq2fasta", [join_pairs.extended_frags])
# Dereplicates sequences
dereplicate = self.add_component("UsearchDereplication", [fastq2fasta.output_files, self.args["dereplication"]["method"], self.args["dereplication"]["strand"], self.args["dereplication"]["min_unique_size"]])
dereplicate = self.add_component("UsearchDereplication", [fastq2fasta.output_files])
# Remove chimeric sequences
chimera = self.add_component("UsearchChimera", [dereplicate.output_files], parent=join_pairs)
......@@ -57,7 +57,7 @@ class GeneDiversity (NG6Workflow):
merge = self.add_component("ConcatenateFiles", [rename_clusters.output_files, "all_trad_sequences.fasta"])
# Create OTU
cdhit = self.add_component("Cdhit", [merge.output_file, self.args["otu"]["identity_threshold"], self.args["otu"]["length_diff_cutoff"], self.args["otu"]["cluster_most_similar"]])
cdhit = self.add_component("Cdhit", [merge.output_file, self.args["otu_identity_threshold"], self.args["otu_length_diff_cutoff"], self.args["otu_cluster_most_similar"]])
# Index the reference proteins for blast
blast_index = self.add_component("BlastIndex", [self.args["database"], "prot"])
......
......@@ -147,12 +147,12 @@ class GeneOTUStat (Analysis):
self.blast_file = InputFileList( blast_file )
self.taxonomy_file = InputFileList( taxonomy_file )
self.fasta_file = InputFileList( fasta_file, Formats.FASTA )
self.stats = OutputFileList( self.get_outputs('{basename_woext}.stat', self.cdhit_file) )
self.stats = OutputFileList( self.get_outputs('{basename_woext}_OTU_count.tsv', self.cdhit_file) )
self.fasta_renamed = OutputFileList( self.get_outputs('{basename_woext}.fasta', self.fasta_file) )
def define_analysis(self):
self.name = "GeneOTUAnalysis"
self.description = "Organizational Taxon Unit analysis."
self.description = "Operational Taxon Unit analysis."
self.software = "-"
self.options = ""
......
......@@ -59,94 +59,63 @@ taxonomy.type = localfile
taxonomy.required = True
# Trim sequences
trim.name = Trim sequences
trim.flag = --trim
trim.help = Options for trim sequences by maximum length
trim.type = multiple
trim.group = TRIM section
trim_read_1.group = TRIM section
trim_read_2.group = TRIM section
# Parameter read_1
trim.read_1.name = Maximum read 1 length
trim.read_1.flag = read-1
trim.read_1.help = Maximum length for reads 1.
trim.read_1.type = int
trim.read_1.default = 500
trim_read_1.name = Maximum read 1 length
trim_read_1.flag = --trim-read-1
trim_read_1.help = Maximum length for reads 1.
trim_read_1.type = int
trim_read_1.default = 500
# Parameter read_2
trim.read_2.name = Maximum read 2 length
trim.read_2.flag = read-2
trim.read_2.help = Maximum length for reads 2.
trim.read_2.type = int
trim.read_2.default = 500
trim_read_2.name = Maximum read 2 length
trim_read_2.flag = --trim-read-2
trim_read_2.help = Maximum length for reads 2.
trim_read_2.type = int
trim_read_2.default = 500
# Join pairs
join_pair.name = Join pairs
join_pair.flag = --join
join_pair.help = Options for join the overlapping pairs
join_pair.type = multiple
join_pair.group = JOIN section
mismatch_ratio.group = JOIN section
min_overlap.group = JOIN section
max_overlap.group = JOIN section
# Parameter mismatch_ratio
join_pair.mismatch_ratio.name = Mismatch ratio
join_pair.mismatch_ratio.flag = mismatch-ratio
join_pair.mismatch_ratio.help = Maximum allowed ratio between the number of mismatched base pairs and the overlap length.
join_pair.mismatch_ratio.type = float
join_pair.mismatch_ratio.default = 0.1
mismatch_ratio.name = Mismatch ratio
mismatch_ratio.flag = --mismatch-ratio
mismatch_ratio.help = Maximum allowed ratio between the number of mismatched base pairs and the overlap length.
mismatch_ratio.type = float
mismatch_ratio.default = 0.1
# Parameter min_overlap
join_pair.min_overlap.name = Minimum overlap
join_pair.min_overlap.flag = min-overlap
join_pair.min_overlap.help = The minimum required overlap length between two reads to provide a confident overlap.
join_pair.min_overlap.type = int
join_pair.min_overlap.default = 20
min_overlap.name = Minimum overlap
min_overlap.flag = --min-overlap
min_overlap.help = The minimum required overlap length between two reads to provide a confident overlap.
min_overlap.type = int
min_overlap.default = 20
# Parameter max_overlap
join_pair.max_overlap.name = Maximum overlap
join_pair.max_overlap.flag = max-overlap
join_pair.max_overlap.help = Maximum overlap length expected in approximately 90 percent of read pairs.
join_pair.max_overlap.type = int
join_pair.max_overlap.default = 55
# Dereplication
dereplication.name = Dereplication
dereplication.flag = --dereplication
dereplication.help = Options for the dereplication
dereplication.type = multiple
dereplication.group = DEREPLICATION section
# Parameter method
dereplication.method.name = Method
dereplication.method.flag = method
dereplication.method.help = Algorithm to search replicates. 'fulllength' => If two or more sequences are identical, all except one are kept. 'prefixes' => A sequence A is discarded if it is a prefix of some other sequence B in the set.
dereplication.method.choices = prefixes|fulllength
dereplication.method.default = fulllength
# Parameter strand
dereplication.strand.name = Strand
dereplication.strand.flag = strand
dereplication.strand.help = Search for hits on the forward ('plus') or forward and reverse-complemented strands ('both'). ATTENTION : the method 'prefixes' does not support 'both'.
dereplication.strand.choices = plus|both
dereplication.strand.default = both
# Parameter min_unique_size
dereplication.min_unique_size.name = Minimum unique size
dereplication.min_unique_size.flag = min-unique-size
dereplication.min_unique_size.help = Minimum size for a cluster.
dereplication.min_unique_size.type = int
max_overlap.name = Maximum overlap
max_overlap.flag = --max-overlap
max_overlap.help = Maximum overlap length expected in approximately 90 percent of read pairs.
max_overlap.type = int
max_overlap.default = 55
# OTU
otu.name = Operational taxonomic unit
otu.flag = --OTU
otu.help = Options for the OTU
otu.type = multiple
otu.group = OTU section
otu_identity_threshold.group = OTU section
otu_length_diff_cutoff.group = OTU section
otu_cluster_most_similar.group = OTU section
# Parameter identity_threshold
otu.identity_threshold.name = Identity threshold
otu.identity_threshold.flag = identity-threshold
otu.identity_threshold.help = Sequence identity threshold. Calculated as : number of identical amino acids in alignment divided by the full length of the shorter sequence.
otu.identity_threshold.type = float
otu.identity_threshold.default = 0.95
otu_identity_threshold.name = Identity threshold
otu_identity_threshold.flag = --otu-identity-threshold
otu_identity_threshold.help = Sequence identity threshold. Calculated as : number of identical amino acids in alignment divided by the full length of the shorter sequence.
otu_identity_threshold.type = float
otu_identity_threshold.default = 0.95
# Parameter length_diff_cutoff
otu.length_diff_cutoff.name = Length difference cutoff
otu.length_diff_cutoff.flag = lg-diff-cutoff
otu.length_diff_cutoff.help = Maximum length difference between shorter end representative sequence of the cluster. If set to 0.9, the shorter sequences need to be at least 90% length of the representative of the cluster.
otu.length_diff_cutoff.type = float
otu.length_diff_cutoff.default = 0.8
otu_length_diff_cutoff.name = Length difference cutoff
otu_length_diff_cutoff.flag = --otu-lg-diff-cutoff
otu_length_diff_cutoff.help = Maximum length difference between shorter end representative sequence of the cluster. If set to 0.9, the shorter sequences need to be at least 90 percent length of the representative of the cluster.
otu_length_diff_cutoff.type = float
otu_length_diff_cutoff.default = 0.8
# Parameter cluster_most_similar
otu.cluster_most_similar.name = Use method based on most similar cluster
otu.cluster_most_similar.flag = most-similar
otu.cluster_most_similar.help = Change the clustering method. True => the program will cluster it into the most similar cluster that meet the threshold (accurate but slow mode). False => the sequence can be clustered to the first cluster that meet the threshold (fast cluster). Either won't change the representatives of final clusters.
otu.cluster_most_similar.type = bool
otu.cluster_most_similar.default = True
\ No newline at end of file
otu_cluster_most_similar.name = Use method based on most similar cluster
otu_cluster_most_similar.flag = --otu-most-similar
otu_cluster_most_similar.help = Change the clustering method. True => the program will cluster it into the most similar cluster that meet the threshold (accurate but slow mode). False => the sequence can be clustered to the first cluster that meet the threshold (fast cluster). Either won't change the representatives of final clusters.
otu_cluster_most_similar.type = bool
otu_cluster_most_similar.default = True
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment