Commit 9fb8c018 authored by Penom Nom's avatar Penom Nom
Browse files

Add trim.

parent fca6609a
......@@ -24,11 +24,15 @@ from ng6.utils import Utils
class GeneDiversity (NG6Workflow):
def process(self):
# Trim sequences
trim_R1 = self.add_component("Trimmer", [self.args['read_1'], 1, self.args["trim"]["read_1"]], component_prefix="R1")
trim_R2 = self.add_component("Trimmer", [self.args['read_2'], 1, self.args["trim"]["read_2"]], component_prefix="R2")
# Make some statistics on raw file
fastqc = self.add_component("FastQC", [self.args['read_1'] + self.args['read_2']])
fastqc = self.add_component("FastQC", [trim_R1.output_files + trim_R2.output_files])
# Merge overlapping pair
join_pairs = self.add_component("Flash", [self.args['read_1'], self.args['read_2'], self.args["join_pair"]["mismatch_ratio"], self.args["join_pair"]["min_overlap"], self.args["join_pair"]["max_overlap"]])
join_pairs = self.add_component("Flash", [trim_R1.output_files, trim_R2.output_files, self.args["join_pair"]["mismatch_ratio"], self.args["join_pair"]["min_overlap"], self.args["join_pair"]["max_overlap"]])
# Fastq to fasta
fastq2fasta = self.add_component("Fastq2fasta", [join_pairs.extended_frags])
......
......@@ -55,10 +55,10 @@ class Framebot (Component):
self.failed_nucleotids = OutputFileList( self.get_outputs('{fullpath}_failed_nucl.fasta', self.stem_name), Formats.FASTA )
self.corrected_proteins = OutputFileList( self.get_outputs('{fullpath}_corr_prot.fasta', self.stem_name), Formats.FASTA )
self.stderr = OutputFileList( self.get_outputs('{basename_woext}.stderr', self.input_fasta) )
def process(self):
for idx in range(len(self.input_fasta)):
framebot = ShellFunction( self.get_exec_path("FrameBot") + " framebot " + self.options + " -o $1 " + self.bank + " $2 2> $3", cmd_format='{EXE} {ARG} {IN} {OUT}' )
framebot = ShellFunction( "java -Xmx4g -jar " + self.get_exec_path("FrameBot") + " framebot " + self.options + " -o $1 " + self.bank + " $2 2> $3", cmd_format='{EXE} {ARG} {IN} {OUT}' )
framebot( arguments=self.stem_name[idx],
inputs=self.input_fasta[idx],
outputs=[self.stderr[idx], self.ok_aln[idx], self.discarded_aln[idx], self.corrected_nucleotids[idx], self.failed_nucleotids[idx], self.corrected_proteins[idx]],
......
......@@ -37,5 +37,5 @@ class FramebotIndex (Component):
self.stderr = OutputFile(os.path.join(self.output_directory, "framebotindex.stderr"))
def process(self):
framebotIndex = ShellFunction( self.get_exec_path("FrameBot") + " index $1 $2 2> $3", cmd_format='{EXE} {IN} {OUT}' )
framebotIndex = ShellFunction( "java -Xmx4g -jar " + self.get_exec_path("FrameBot") + " index $1 $2 2> $3", cmd_format='{EXE} {IN} {OUT}' )
framebotIndex(inputs=self.bank, outputs=[self.index, self.stderr])
\ No newline at end of file
......@@ -26,10 +26,10 @@ from weaver.function import PythonFunction
def gene_OTU_write( cdhit_file, blast_file, taxonomy_file, stat_file ):
"""
@param cdhit_file : the list of '.clstr' files produced by cdhit.
@param blast_file : the blastp result in tabular format (outfmt 6 with NCBI Blast+).
@param taxonomy_file : the taxonomy file.
@param stat_file : the ouput file.
@param cdhit_file : [string] the list of '.clstr' files produced by cdhit.
@param blast_file : [string] the blastp result in tabular format (outfmt 6 with NCBI Blast+).
@param taxonomy_file : [string] the taxonomy file.
@param stat_file : [string] the ouput file.
"""
import os
......@@ -90,19 +90,28 @@ def gene_OTU_write( cdhit_file, blast_file, taxonomy_file, stat_file ):
cluster_count[csample] += ccount
class GeneOTUStat (Component):
class GeneOTUStat (Analysis):
def define_parameters(self, cdhit_cluster_file, blast_file, taxonomy_file):
"""
@param cdhit_cluster_file : the list of '.clstr' files produced by cdhit.
@param blast_file : the blastp result in tabular format (outfmt 6 with NCBI Blast+).
@param taxonomy_file : the taxonomy file.
@param cdhit_cluster_file : [list] the list of '.clstr' files produced by cdhit.
@param blast_file : [list] the blastp results in tabular format (outfmt 6 with NCBI Blast+).
@param taxonomy_file : [list] the taxonomy files.
"""
self.cdhit_file = InputFileList( cdhit_cluster_file )
self.blast_file = InputFileList( blast_file )
self.taxonomy_file = InputFileList( blast_file )
self.output_files = OutputFileList( self.get_outputs('{basename_woext}.stat', self.cdhit_file) )
def define_analysis(self):
self.name = "GeneOTUAnalysis"
self.description = "Organizational Taxon Unit analysis."
self.software = "-"
self.options = "-"
def post_process(self):
self._save_files(self.output_files)
def process(self):
stat = PythonFunction( gene_OTU_write, cmd_format='{EXE} {IN} {OUT}' )
MultiMap(stat, inputs=[self.cdhit_file, self.blast_file, self.taxonomy_file], outputs=self.output_files )
\ No newline at end of file
......@@ -41,7 +41,7 @@ class UsearchChimera (Component):
self.log = OutputFileList( self.get_outputs('{basename_woext}.log', self.input_fasta) )
self.chimeras = OutputFileList( self.get_outputs('{basename_woext}_chimeras.fasta', self.input_fasta), Formats.FASTA )
self.nonchimeras = OutputFileList( self.get_outputs('{basename_woext}_nonchimeras.fasta', self.input_fasta), Formats.FASTA )
self.stderr = OutputFileList( self.get_outputs('{basename_woext}.stderr', self.input_fasta) )
self.stderr = OutputFileList( self.get_outputs('{basename_woext}.stderr', self.input_fasta) )
def process(self):
chimera = ShellFunction( self.get_exec_path("usearch") + " -uchime_denovo $1 -uchimeout $2 -uchimealns $3 -chimeras $4 -nonchimeras $5 2> $6", cmd_format='{EXE} {IN} {OUT}' )
......
......@@ -58,6 +58,25 @@ taxonomy.help = The gene taxonomy. Format : 'GENE_ID<tab>TAX; TAX; TAX;'.
taxonomy.type = localfile
taxonomy.required = True
# Trim sequences
trim.name = Trim sequences
trim.flag = --trim
trim.help = Options for trim sequences by maximum length
trim.type = multiple
trim.group = TRIM section
# Parameter read_1
trim.read_1.name = Maximum read 1 length
trim.read_1.flag = read-1
trim.read_1.help = Maximum length for reads 1.
trim.read_1.type = int
trim.read_1.default = 500
# Parameter read_2
trim.read_2.name = Maximum read 2 length
trim.read_2.flag = read-2
trim.read_2.help = Maximum length for reads 2.
trim.read_2.type = int
trim.read_2.default = 500
# Join pairs
join_pair.name = Join pairs
join_pair.flag = --join
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment