Maintenance - Mise à jour mensuelle Lundi 1 Juin 2020 entre 7h00 et 9h00

Commit 5c7cc980 authored by Penom Nom's avatar Penom Nom

jflow update (package version)

parent d71edb64
......@@ -15,7 +15,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
[global]
# uncomment and set if not in the PATH
# uncomment and set if not in the PATH, should be version >= 4.4.3
#makeflow = /usr/bin/makeflow
# batch system type: local, condor, sge, moab, cluster, wq, hadoop, mpi-queue
batch_system_type = local
......
This diff is collapsed.
......@@ -333,14 +333,14 @@ class Component(object):
get a path to an executable. Has to be implemented by subclasses
if the process has not been implemented
"""
raise NotImplementedError
raise NotImplementedError("Either the Component.get_command() function or the Component.process() function has to be implemented!")
def get_abstraction(self):
"""
get the abstraction. Has to be implemented by subclasses
if the process has not been implemented
"""
raise NotImplementedError
raise NotImplementedError("Either the Component.get_abstraction() function or the Component.process() function has to be implemented!")
def get_version(self):
"""
......
......@@ -29,7 +29,7 @@ class ExternalParser(object):
return components
def parse(self, component_file):
raise NotImplementedError
raise NotImplementedError("The ExternalParser.parser() function has to be implemented!")
def build_component(self, component_name, fn_define_parameters, **kwargs):
"""
......
#
# Copyright (C) 2014 INRA
#
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
\ No newline at end of file
#
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import re
......
#
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import re
......
#
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import sys, re
class GFF3Record:
......
#
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import re
......
#
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import re
......
#
# Copyright (C) 2014 INRA
#
#
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
......
#
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import re
......
#
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import re
......
This diff is collapsed.
This diff is collapsed.
......@@ -91,18 +91,18 @@ class Makeflow(Engine):
# if a symbol is provided
if symbol:
self.dag_file.write('\t@SYMBOL="' + symbol+'"\n')
self.dag_file.write('@SYMBOL="' + symbol+'"\n')
# Write environmental variables
if options.local:
self.dag_file.write('\t@BATCH_LOCAL=1\n')
self.dag_file.write('@BATCH_LOCAL=1\n')
if options.batch:
self.dag_file.write('\t@BATCH_OPTIONS={0}\n'.format(options.batch))
self.dag_file.write('@BATCH_OPTIONS={0}\n'.format(options.batch))
if options.collect:
self.dag_file.write('\t@_MAKEFLOW_COLLECT_LIST+={0}\n'.format(
self.dag_file.write('@_MAKEFLOW_COLLECT_LIST+={0}\n'.format(
' '.join(map(str, options.collect))))
for k, v in options.environment.items():
self.dag_file.write('\t@{0}={1}\n'.format(k, v))
self.dag_file.write('@{0}={1}\n'.format(k, v))
# Write task command
self.dag_file.write('\t{0}\n'.format(command))
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -42,7 +42,7 @@ def inserts_metrics(bam_file, pairs_count_file, metrics_file, hist_file, log_fil
if properly_paired_nb > 0 :
# Process inserts sizes metrics
command = Popen( ["-c", "java -Xmx1g -jar " + collectinsertsizemetrics_path + options + " HISTOGRAM_FILE=" + hist_file + " INPUT=" + bam_file + " OUTPUT=" + metrics_file + " 2> " + log_file], shell=True, stdout=PIPE, stderr=PIPE )
command = Popen( ["-c", "java -Xmx1g -jar " + collectinsertsizemetrics_path + " " +options + " HISTOGRAM_FILE=" + hist_file + " INPUT=" + bam_file + " OUTPUT=" + metrics_file + " 2> " + log_file], shell=True, stdout=PIPE, stderr=PIPE )
stdout, stderr = command.communicate()
# Count nb pairs in bam file
command = Popen( ["-c", samtools_path + " view -F384 " + bam_file + " | wc -l"], shell=True, stdout=PIPE, stderr=PIPE) # First read in pair
......@@ -86,7 +86,7 @@ class InsertsSizes (Analysis):
self.name = "InsertsSizes"
self.description = "Insert size statistics"
self.software = "Picards tools - Insert size"
self.options = " HISTOGRAM_WIDTH="+str(self.histogram_width)+" VALIDATION_STRINGENCY="+self.validation_stringency+" MINIMUM_PCT="+str(self.minimum_pct)
self.options = "HISTOGRAM_WIDTH="+str(self.histogram_width)+" VALIDATION_STRINGENCY="+self.validation_stringency+" MINIMUM_PCT="+str(self.minimum_pct)
def post_process(self):
nb_omitted_samples = 0
......@@ -129,7 +129,7 @@ class InsertsSizes (Analysis):
def process(self):
for i in range(len(self.bam_files)):
insertsSizes = PythonFunction(inserts_metrics, cmd_format="{EXE} {IN} {OUT} {ARG}")
insertsSizes(inputs=self.bam_files[i], outputs=[self.pairs_count_files[i], self.info_files[i], self.hist_files[i], self.log_files[i]], arguments=[self.get_exec_path("samtools"), self.get_exec_path("CollectInsertSizeMetrics"), "'"+self.options+"'"])
insertsSizes(inputs=self.bam_files[i], outputs=[self.pairs_count_files[i], self.info_files[i], self.hist_files[i], self.log_files[i]], arguments=[self.get_exec_path("samtools"), self.get_exec_path("CollectInsertSizeMetrics"), '"'+self.options+'"'])
def parse_pairs_count_file(self, input_file):
"""
......
......@@ -85,5 +85,5 @@ class AddSamplesNames (Component):
def process(self):
# Rename files
for file_idx in range( len(self.input_fasta) ):
rename = PythonFunction( add_sample_name, cmd_format="{EXE} " + self.samples_names[file_idx] + " '" + str(self.sequence_name_sep) + "' '" + str(self.sequence_count_sep) + "' {IN} {OUT} 2>> " + self.stderr )
rename = PythonFunction( add_sample_name, cmd_format='{EXE} ' + self.samples_names[file_idx] + ' "' + str(self.sequence_name_sep) + '" "' + str(self.sequence_count_sep) + '" {IN} {OUT} 2>> ' + self.stderr )
rename( inputs=self.input_fasta[file_idx], outputs=self.output_files[file_idx] )
\ No newline at end of file
#
# Copyright (C) 2014 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from weaver.function import ShellFunction
from weaver.abstraction import Map
class Blast (Component):
"""
@summary : Launch the NCBI Blast on database(s).
Each sequences_files is aligned on each database. self.outputs format :
{
'/home/myhome/databank/pdbaa':['/home/myhome/data/seq_part_1_pdbaa.blast', '/home/myhome/data/seq_part_2_pdbaa.blast'],
'/home/myhome/databank/swissprot':['/home/myhome/data/seq_part_1_swissprot.blast', '/home/myhome/data/seq_part_2_swissprot.blast']
}
"""
def define_parameters(self, sequences_files, databanks, output_format=5, blast_used="blastp", evalue="1e-5", max_hit=20):
"""
@param sequences_files : sequences files to process.
@param databanks : databanks and parameters information. Example :
[
{'blast_used': 'blastp', 'evalue': '1e-5', 'max_hit': '5', 'file': '/home/myhome/databank/pdbaa'},
{'blast_used': 'blastp', 'evalue': '1e-6', 'max_hit': '10', 'file': '/home/myhome/databank/swissprot'},
]
'file' is the only mandatory key.
@param output_format : @see NCBI blast command.
@param blast_used : default value for blast used. This value is used when 'blast_used' is not provided in the param 'databanks'.
@param evalue : default value for evalue. This value is used when 'evalue' is not provided in the param 'databanks'.
@param max_hit : default value for the max number of hits. This value is used when 'max_hit' is not provided in the param 'databanks'.
"""
self.sequences_files = InputFileList(sequences_files, Formats.FASTA)
self.outputs = dict()
self.blast_used = dict()
self.options = dict()
databanks_path = list()
for current_databank in databanks:
databank_basename = os.path.basename( current_databank['file'] )
# Option blast used
self.blast_used[databank_basename] = blast_used
if current_databank.has_key( 'blast_used' ):
self.blast_used[databank_basename] = current_databank['blast_used']
# Option evalue
if current_databank.has_key( 'evalue' ):
self.options[databank_basename] = " -evalue " + current_databank['evalue']
else:
self.options[databank_basename] = " -evalue " + evalue
# Option max hits
if current_databank.has_key( 'max_hit' ):
self.options[databank_basename] += " -max_target_seqs " + str(current_databank['max_hit'])
else:
self.options[databank_basename] += " -max_target_seqs " + str(max_hit)
# Option output format
self.options[databank_basename] += " -outfmt " + str(output_format)
# Add to inputs list
databanks_path.append( current_databank['file'] )
# Add to outputs list
self.outputs[databank_basename] = OutputFileList(self.get_outputs('{basename_woext}_' + databank_basename + '.blast', self.sequences_files))#, Formats.BLAST)##########################
self.databanks = InputFileList(databanks_path)
self.stderr = OutputFile(os.path.join(self.output_directory, "blast.stderr"))
def process(self):
for current_databank in self.databanks:
databank_basename = os.path.basename( current_databank )
blast = ShellFunction(self.get_exec_path(self.blast_used[databank_basename]) + self.options[databank_basename] + " -query $1 -out $2 -db " + current_databank + " 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
Map(blast, inputs=self.sequences_files, outputs=self.outputs[databank_basename])
\ No newline at end of file
......@@ -249,7 +249,7 @@ class Cdhit (Analysis):
rename = MultiMap( rename, inputs=[tmp_fasta_files, self.cluster_files], outputs=self.output_files )
# Build biom
biom = PythonFunction( to_biom, cmd_format="{EXE} {OUT} {IN} '" + str(self.precluster_sample_sep) + "' '" + str(self.precluster_size_sep) + "' 2>> " + self.stderr )
biom = PythonFunction( to_biom, cmd_format='{EXE} {OUT} {IN} "' + str(self.precluster_sample_sep) + '" "' + str(self.precluster_size_sep) + '" 2>> ' + self.stderr )
biom = Map( biom, inputs=self.cluster_files, outputs=self.biom_files )
# Depths stats
......
......@@ -156,5 +156,5 @@ class UsearchChimera (Analysis):
# Statistics
if self.cluster_size_sep is not None:
stat = PythonFunction( filter_count, cmd_format="{EXE} '" + self.cluster_size_sep + "' {IN} {OUT}" )
stat = PythonFunction( filter_count, cmd_format='{EXE} "' + self.cluster_size_sep + '" {IN} {OUT}' )
stat = MultiMap(stat, inputs=[self.chimeras, self.nonchimeras], outputs=[self.stat])
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment