Change orde of renameRules component to pass on cluster env.

0998f945 · Celine Noirot · 707723dd · 0998f945 · 0998f945
Commit 0998f945 authored 8 years ago by Celine Noirot
--- a/workflows/rnaseqdenovo/__init__.py
+++ b/workflows/rnaseqdenovo/__init__.py
@@ -110,7 +110,7 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
        # Parameters for process workflow
        elif function == "process":
            # Parameter rename
-            self.add_parameter("split", "Nb contig to put per split files.", display_name="Nb contig to split", type="int", 
+            self.add_parameter("split", "Nb contig to put per split files.", display_name="Nb contig to split", type="int", add_to="assembly", 
                               default=4000, group="ASSEMBLY section")
            self.add_parameter("rename", "With this option the contigs are renamed with the name of their best hit.", display_name="Rename contigs", type="bool", 
                               default=False, group="ASSEMBLY section")
@@ -169,9 +169,8 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
                raise ValueError( "The sequencer '" + lib_arg["sequencer"] + "' is not valid. Sequencer must be in : MiSeq, HiSeqXX00, ILLUMINA, SLX, SOLEXA, SOLID, 454, COMPLETE, PACBIO, IONTORRENT, CAPILLARY, HELICOS, UNKNOWN." )

        # Contigs split
-        split = self.add_component( "SplitSeq", [self.assembly["file"], self.split] )
+        split = self.add_component( "SplitSeq", [self.assembly["file"], self.assembly["split"]] )
        contigs = split.output_files
-
        # Contigs annotation
        # ##############################
        # Blast annotation of contigs
@@ -234,7 +233,7 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
            all_annotations = rename_all_annotations.output_files
            # Rename best annotations
            rename_best_annotations = self.add_component( "RenameTab", [best_annotations, rename_rules.output_file], component_prefix="bestAnnot" )
-            best_annotations = rename_best_annotations.output_files
+            best_annotations = rename_best_annotations.output_files            
            # If user provide GO
            if self.ontology != None:
                rename_GO = self.add_component( "RenameTab", [self.ontology, rename_rules.output_file], component_prefix="userGO" )
@@ -243,7 +242,7 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
                rename_vcf = self.add_component( "RenameVCF", [self.variant["file"], rename_rules.output_file], component_prefix="userVCF" )

        # Loads contigs
-        merge_contigs = self.add_component( "ConcatenateFiles", [contigs, "contigs.fa", True], component_prefix="contigs" )
+	merge_contigs = self.add_component( "ConcatenateFiles", [contigs, "contigs.fa", True], component_prefix="contigs" )
        merge_best_annot = self.add_component( "ConcatenateFiles", [best_annotations, "best_annotations.gff", True], component_prefix="bestAnnot" )
        contig_files = [merge_contigs.output_file, rename_rules.output_file] if rename_rules is not None else [merge_contigs.output_file]
        contig = self.add_biomart_load( Contig,
@@ -308,8 +307,7 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
                                       trna.get_version(),
                                       "Predicts tRNA with tRNAscan-SE.",
                                       [merge_trna_predict.output_file] )
-
-        if not self.skip_rnammer:
+	if not self.skip_rnammer:
            rnammer = self.add_component( "RNAmmer", [contigs, "eukaryota"] )
            merge_rnammer_predict = self.add_component( "ConcatenateFiles", [rnammer.output_files, "rnammer_predictions.gff", True], component_prefix="rnammerPredict" )
            self.add_biomart_load( Prediction,
@@ -322,8 +320,7 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
                                       rnammer.get_version(),
                                       "Predicts rRNA with RNAmmer.",
                                       [merge_rnammer_predict.output_file] )
-
-        if not self.skip_interpro:
+	if not self.skip_interpro:
            get_orf = self.add_component( "GetORF", [contigs, self.max_orf_nb] )
            # Process
            interpro = self.add_component( "InterProScan", [get_orf.output_files, "p"] )
@@ -379,7 +376,7 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):

        # Alignment
        # ##############################
-        all_bams = []
+	all_bams = []
        all_bais = []
        single_fastq, pair1_fastq, pair2_fastq = [], [], []
        single_librairies_names, pair_librairies_names = [], []
@@ -402,10 +399,10 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
                single_fastq.append(lib_arg["files"][0])

        # Index contigs
-        index_fai_dict = self.add_component( "IndexFaiDict", [merge_contigs.output_file, self.split] )
+        index_fai_dict = self.add_component( "IndexFaiDict", [merge_contigs.output_file, self.assembly["split"]] )
        index_bwa = self.add_component( "BWAIndex", [merge_contigs.output_file] )
        # Align paired-end reads
-        if len(pair2_fastq)>0 and len(pair1_fastq)>0 :
+	if len(pair2_fastq)>0 and len(pair1_fastq)>0 :
            align_paired     = self.add_component( "BWA", [index_bwa.databank, pair1_fastq, pair2_fastq, pair_librairies_names, "mem"], component_prefix="paired" )
            index_bam_paired = self.add_component( "SamtoolsIndex", [align_paired.bam_files, 2, 2], component_prefix="paired" )
            all_bams = all_bams + index_bam_paired.sorted_bams
@@ -425,7 +422,7 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
                                           files=index_bam_paired.sorted_bams+index_bam_paired.output_files )

        # Align single reads
-        if len(single_fastq)>0 :
+	if len(single_fastq)>0 :
            align_single     = self.add_component( "BWA", [index_bwa.databank, single_fastq, None, single_librairies_names, "mem"], component_prefix="single" )
            index_bam_single = self.add_component( "SamtoolsIndex", [align_single.bam_files, 2, 2], component_prefix="single" )
            all_bams = all_bams + index_bam_single.sorted_bams
@@ -445,12 +442,12 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
                                           files=index_bam_single.sorted_bams+index_bam_single.output_files )

        # Count and coverage on non filtered bam
-        count_compute = self.add_component( "CountReads", [all_bams] )
+	count_compute = self.add_component( "CountReads", [all_bams] )

        max_lib_size = self.project.get_max_library_size()
        step = max_lib_size/30
        rarefaction_curve = self.add_component( "RarefactionCurves", [all_bams,step] )
-        coverage      = self.add_component( "Coverage", [all_bams, all_bais, self._nb_seq(self.assembly["file"]), self.split] )
+        coverage      = self.add_component( "Coverage", [all_bams, all_bais, self._nb_seq(self.assembly["file"]), self.assembly["split"]] )
        expression    = self.add_biomart_load( Expression, [count_compute.matrix, self.project.libraries, coverage.output_files,rarefaction_curve.stdouts] )
        #Add venn application in web page
        self.add_venn()
@@ -461,7 +458,7 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):

        # Variant calling and annotation
        ##############################
-        vcf_file = None
+	vcf_file = None
        gatk_indel_filter_options=""
        gatk_snp_filter_options=""
        if self.variant and self.variant["file"] != None: # If user provide variant file
@@ -595,7 +592,6 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
        else:
            sys.stderr.write("Please provide a best annotation file or at least the best annotation source!\n")
            sys.exit(1)
-
        contig = self.add_biomart_load(Contig,
                                       contig_parameters,
                                       "Assembly",
@@ -603,7 +599,7 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
                                       "0",
                                       self.assembly["software_name"],
                                       self.assembly["software_parameters"],
-                                       self.assembly["software_version"],
+				       self.assembly["software_version"],
                                       self.assembly["comments"], [self.assembly["file"]]+fastq_files)
        
        self.add_blast_search( Contig, self.assembly["file"] )
@@ -682,7 +678,6 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
                                              variant_annotation_arg["software_parameters"],
                                              variant_annotation_arg["software_version"],
                                              variant_annotation_arg["comments"], [variant_annotation_arg["file"]])
-
            variant_count = self.add_biomart_load(CountAlleleLib, [self.variant["file"], self.project.libraries])
            contig_variant = self.add_biomart_load(Variant, [self.variant["file"], variant.new_table])

@@ -948,6 +943,8 @@ class RNAseqDenovo (ApplicationWithoutReferenceGenome):
            arguments.append('--assembly')
            if config.has_option(section, 'file'):
                arguments.append(  'file=' + config.get(section, 'file') )
+            if config.has_option(section, 'split'):
+                arguments.append(  'split=' + config.get(section, 'split') )
            for option in add_analysis_options:
                if config.has_option(section, option):
                    arguments.append( option.replace('_', '-')  + '=' + config.get(section, option) )

--- a/workflows/rnaseqdenovo/components/renamerules.py
+++ b/workflows/rnaseqdenovo/components/renamerules.py
@@ -22,7 +22,7 @@ from jflow.component import Component
 from weaver.function import PythonFunction


-def renames_with_annot( prefix, inputs, rename_file):
+def renames_with_annot( inputs, rename_file, prefix):
    """
     @summary : Writes the new name of each annotation (the new name is built from the subject name).
                For each sequence with same subject name, an unique number is added to the new name : 
@@ -78,7 +78,7 @@ def renames_with_annot( prefix, inputs, rename_file):
        
    out_fh.close()

-def names_with_prefix( prefix, inputs, rename_file   ):
+def names_with_prefix( inputs, rename_file , prefix ):
    """
     @summary : Writes the new name of each sequence (the new name is built from the old name and a prefix).
      @param prefix : the prefix used.
@@ -126,12 +126,12 @@ class RenameRules (Component):
        print self.rename_with_annot
        if self.rename_with_annot == True:
            self.add_python_execution(renames_with_annot,
-                                      cmd_format='{EXE} "' + self.prefix + '" {IN} {OUT}',
+                                      cmd_format='{EXE} {IN} {OUT} "' + self.prefix + '"',
                                    inputs=self.inputs, outputs=self.output_file, includes=self.best_annotations)
        # Only add prefix
        elif self.prefix != None:
            # Process prefix
            self.add_python_execution(names_with_prefix,
-                                      cmd_format='{EXE} "' + self.prefix + '" {IN} {OUT}',
+                                      cmd_format='{EXE} {IN} {OUT} "' + self.prefix + '"',
                                    inputs=self.inputs, outputs=self.output_file, includes=self.best_annotations)
        
\ No newline at end of file