Commit fb81d857 authored by Jerome Mariette's avatar Jerome Mariette

add jflow and first test to get ride of ergatis

parent d342b146
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
[general]
# The Typo3 pid where to link datas
pid = 5
# The ng6 work directory
work_dir = /path/to/working/directory
# The ng6 save directory
save_dir = /path/to/saving/directory
# The ng6 tmp directory, should be accessible
# from any nodes if running in a cluster
# environment
tmp_dir = /path/to/tmp/directory
[database]
host = localhost
user = typo3
passwd = typo3
dbname = typo3
[logging]
log_file = /home/jmariett/scratch/ng6/ng6.log
[storage]
save_directory = /home/jmariett/scratch/ngspipelines/
working_directory = /home/jmariett/scratch/weaver
[softwares]
blastall = /usr/bin/blastall
formatdb = /usr/bin/formatdb
[454_mids]
MID1 = ACGAGTGCGT
MID2 = ACGCTCGACA
MID3 = AGACGCACTC
MID4 = AGCACTGTAG
MID5 = ATCAGACACG
MID6 = ATATCGCGAG
MID7 = CGTGTCTCTA
MID8 = CTCGCGTGTC
MID9 = TAGTATCAGC
MID10 = TCTCTATGCG
MID11 = TGATACGTCT
MID12 = TACTGAGCTA
MID13 = CATAGTAGTG
MID14 = CGAGAGATAC
MID15 = ATACGACGTA
MID16 = TCACGTACTA
MID17 = CGTCTAGTAC
MID18 = TCTACGTAGC
MID19 = TGTACTACTC
MID20 = ACGACTACAG
MID21 = CGTAGACTAG
MID22 = TACGAGTATG
MID23 = TACTCTCGTG
MID24 = TAGAGACGAG
MID25 = TCGTCGCTCG
MID26 = ACATACGCGT
MID27 = ACGCGAGTAT
MID28 = ACTACTATGT
MID29 = ACTGTACAGT
MID30 = AGACTATACT
MID31 = AGCGTCGTCT
MID32 = AGTACGCTAT
MID33 = ATAGAGTACT
MID34 = CACGCTACGT
MID35 = CAGTAGACGT
MID36 = CGACGTGACT
MID37 = TACACACACT
MID38 = TACACGTGAT
MID39 = TACAGATCGT
MID40 = TACGCTGTCT
MID41 = TAGTGTAGAT
MID42 = TCGATCACGT
MID43 = TCGCACTAGT
MID44 = TCTAGCGACT
MID45 = TCTATACTAT
MID46 = TGACGTATGT
MID47 = TGTGAGTAGT
MID48 = ACAGTATATA
MID49 = ACGCGATCGA
MID50 = ACTAGCAGTA
MID51 = AGCTCACGTA
MID52 = AGTATACATA
MID53 = AGTCGAGAGA
MID54 = AGTGCTACGA
MID55 = CGATCGTATA
MID56 = CGCAGTACGA
MID57 = CGCGTATACA
MID58 = CGTACAGTCA
MID59 = CGTACTCAGA
MID60 = CTACGCTCTA
MID61 = CTATAGCGTA
MID62 = TACGTCATCA
MID63 = TAGTCGCATA
MID64 = TATATATACA
MID65 = TATGCTAGTA
MID66 = TCACGCGAGA
MID67 = TCGATAGTGA
MID68 = TCGCTGCGTA
MID69 = TCTGACGTCA
MID70 = TGAGTCAGTA
MID71 = TGTAGTGTGA
MID72 = TGTCACACGA
MID73 = TGTCGTCGCA
MID74 = ACACATACGC
MID75 = ACAGTCGTGC
MID76 = ACATGACGAC
MID77 = ACGACAGCTC
MID78 = ACGTCTCATC
MID79 = ACTCATCTAC
MID80 = ACTCGCGCAC
MID81 = AGAGCGTCAC
MID82 = AGCGACTAGC
MID83 = AGTAGTGATC
MID84 = AGTGACACAC
MID85 = AGTGTATGTC
MID86 = ATAGATAGAC
MID87 = ATATAGTCGC
MID88 = ATCTACTGAC
MID89 = CACGTAGATC
MID90 = CACGTGTCGC
MID91 = CATACTCTAC
MID92 = CGACACTATC
MID93 = CGAGACGCGC
MID94 = CGTATGCGAC
MID95 = CGTCGATCTC
MID96 = CTACGACTGC
MID97 = CTAGTCACTC
MID98 = CTCTACGCTC
MID99 = CTGTACATAC
MID100 = TAGACTGCAC
MID101 = TAGCGCGCGC
MID102 = TAGCTCTATC
MID103 = TATAGACATC
MID104 = TATGATACGC
MID105 = TCACTCATAC
MID106 = TCATCGAGTC
MID107 = TCGAGCTCTC
MID108 = TCGCAGACAC
MID109 = TCTGTCTCGC
MID110 = TGAGTGACGC
MID111 = TGATGTGTAC
MID112 = TGCTATAGAC
MID113 = TGCTCGCTAC
MID114 = ACGTGCAGCG
MID115 = ACTCACAGAG
MID116 = AGACTCAGCG
MID117 = AGAGAGTGTG
MID118 = AGCTATCGCG
MID119 = AGTCTGACTG
MID120 = AGTGAGCTCG
MID121 = ATAGCTCTCG
MID122 = ATCACGTGCG
MID123 = ATCGTAGCAG
MID124 = ATCGTCTGTG
MID125 = ATGTACGATG
MID126 = ATGTGTCTAG
MID127 = CACACGATAG
MID128 = CACTCGCACG
MID129 = CAGACGTCTG
MID130 = CAGTACTGCG
MID131 = CGACAGCGAG
MID132 = CGATCTGTCG
MID133 = CGCGTGCTAG
MID134 = CGCTCGAGTG
MID135 = CGTGATGACG
MID136 = CTATGTACAG
MID137 = CTCGATATAG
MID138 = CTCGCACGCG
MID139 = CTGCGTCACG
MID140 = CTGTGCGTCG
MID141 = TAGCATACTG
MID142 = TATACATGTG
MID143 = TATCACTCAG
MID144 = TATCTGATAG
MID145 = TCGTGACATG
MID146 = TCTGATCGAG
MID147 = TGACATCTCG
MID148 = TGAGCTAGAG
MID149 = TGATAGAGCG
MID150 = TGCGTGTGCG
MID151 = TGCTAGTCAG
MID152 = TGTATCACAG
MID153 = TGTGCGCGTG
RL1 = ACACGACGACT,AGTCGTGGTGT
RL2 = ACACGTAGTAT,ATACTAGGTGT
RL3 = ACACTACTCGT,ACGAGTGGTGT
RL4 = ACGACACGTAT,ATACGTGGCGT
RL5 = ACGAGTAGACT,AGTCTACGCGT
RL6 = ACGCGTCTAGT,ACTAGAGGCGT
RL7 = ACGTACACACT,AGTGTGTGCGT
RL8 = ACGTACTGTGT,ACACAGTGCGT
RL9 = ACGTAGATCGT,ACGATCTGCGT
RL10 = ACTACGTCTCT,AGAGACGGAGT
RL11 = ACTATACGAGT,ACTCGTAGAGT
RL12 = ACTCGCGTCGT,ACGACGGGAGT
RL13 = AGACTCGACGT,ACGTCGGGTCT
RL14 = AGTACGAGAGT,ACTCTCGGACT
RL15 = AGTACTACTAT,ATAGTAGGACT
RL16 = AGTAGACGTCT,AGACGTCGACT
RL17 = AGTCGTACACT,AGTGTAGGACT
RL18 = AGTGTAGTAGT,ACTACTAGACT
RL19 = ATAGTATACGT,ACGTATAGTAT
RL20 = CAGTACGTACT,AGTACGTGCTG
RL21 = CGACGACGCGT,ACGCGTGGTCG
RL22 = CGACGAGTACT,AGTACTGGTCG
RL23 = CGATACTACGT,ACGTAGTGTCG
RL24 = CGTACGTCGAT,ATCGACGGACG
RL25 = CTACTCGTAGT,ACTACGGGTAG
RL26 = GTACAGTACGT,ACGTACGGTAC
RL27 = GTCGTACGTAT,ATACGTAGGAC
RL28 = GTGTACGACGT,ACGTCGTGCAC
RL29 = ACACAGTGAGT,ACTCACGGTGT
RL30 = ACACTCATACT,AGTATGGGTGT
RL31 = ACAGACAGCGT,ACGCTGTGTGT
RL32 = ACAGACTATAT,ATATAGTGTGT
RL33 = ACAGAGACTCT,AGAGTCTGTGT
RL34 = ACAGCTCGTGT,ACACGAGGTGT
RL35 = ACAGTGTCGAT,ATCGACAGTGT
RL36 = ACGAGCGCGCT,AGCGCGCGCGT
RL37 = ACGATGAGTGT,ACACTCAGCGT
RL38 = ACGCGAGAGAT,ATCTCTGGCGT
RL39 = ACGCTCTCTCT,AGAGAGGGCGT
RL40 = ACGTCGCTGAT,ATCAGCGGCGT
RL41 = ACGTCTAGCAT,ATGCTAGGCGT
RL42 = ACTAGTGATAT,ATATCACGAGT
RL43 = ACTCACACTGT,ACAGTGGGAGT
RL44 = ACTCACTAGCT,AGCTAGGGAGT
RL45 = ACTCTATATAT,ATATATGGAGT
RL46 = ACTGATCTCGT,ACGAGATGAGT
RL47 = ACTGCTGTACT,AGTACAGGAGT
RL48 = ACTGTAGCGCT,AGCGCTAGAGT
RL49 = AGACACTCACT,AGTGAGGGTCT
RL50 = AGACATATAGT,ACTATAGGTCT
RL51 = AGACGTGATCT,AGATCAGGTCT
RL52 = AGAGTACAGAT,ATCTGTAGTCT
RL53 = AGAGTATCTCT,AGAGATAGTCT
RL54 = AGATACGCTGT,ACAGCGTGTCT
RL55 = AGATCTAGTCT,AGACTAGGTCT
RL56 = AGCAGCGTAGT,ACTACGCGGCT
RL57 = AGCGCACGAGT,ACTCGTGGGCT
RL58 = AGCGTGTGCGT,ACGCACAGGCT
RL59 = AGCTAGATACT,AGTATCTGGCT
RL60 = AGCTGTCGACT,AGTCGACGGCT
RL61 = AGTATGCACGT,ACGTGCAGACT
RL62 = AGTCGCGCTAT,ATAGCGGGACT
RL63 = AGTCTGTCTGT,ACAGACGGACT
RL64 = ATACACACGAT,ATCGTGGGTAT
RL65 = ATACGCGTGCT,AGCACGGGTAT
RL66 = ATACTAGCACT,AGTGCTGGTAT
RL67 = ATAGAGCTAGT,ACTAGCTGTAT
RL68 = ATATAGAGTAT,ATACTCTGTAT
RL69 = ATCGCTCACGT,ACGTGAGGGAT
RL70 = ATCGTCAGTCT,AGACTGAGGAT
RL71 = ATCTCTCGTAT,ATACGAGGGAT
RL72 = ATCTGAGACGT,ACGTCTCGGAT
RL73 = ATGCTACGTCT,AGACGTGGCAT
RL74 = ATGTGACTACT,AGTAGTCGCAT
RL75 = CACGAGACAGT,ACTGTCTGGTG
RL76 = CACGCGAGTCT,AGACTCGGGTG
RL77 = CACGCTACGAT,ATCGTAGGGTG
RL78 = CACGTGTATAT,ATATACAGGTG
RL79 = CACTACGATGT,ACATCGTGGTG
RL80 = CACTATACTCT,AGAGTATGGTG
RL81 = CAGCGTACTGT,ACAGTAGGCTG
RL82 = CAGTCTCTAGT,ACTAGAGGCTG
RL83 = CATAGTCGCGT,ACGCGACGATG
RL84 = CGAGACACTAT,ATAGTGTGTCG
RL85 = CGAGAGTGTGT,ACACACTGTCG
RL86 = CGAGTCATCGT,ACGATGAGTCG
RL87 = CGATCGTATAT,ATATACGGTCG
RL88 = CGCAGTACGCT,AGCGTACGGCG
RL89 = CGCGATCGTAT,ATACGATGGCG
RL90 = CGCGCTATACT,AGTATAGGGCG
RL91 = CGTACAGATAT,ATATCTGGACG
RL92 = CGTAGCTCTCT,AGAGAGCGACG
RL93 = CGTATAGTGCT,AGCACTAGACG
RL94 = CGTCAGCGACT,AGTCGCGGACG
RL95 = CGTCGCAGTGT,ACACTGGGACG
RL96 = CGTCTCACGAT,ATCGTGGGACG
RL97 = CGTGACTCAGT,ACTGAGTGACG
RL98 = CTACACGCTCT,AGAGCGGGTAG
RL99 = CTACGATATGT,ACATATGGTAG
RL100 = CTAGACAGACT,AGTCTGTGTAG
RL101 = CTAGTACTCAT,ATGAGTAGTAG
RL102 = CTATATGTCGT,ACGACATGTAG
RL103 = CTATCGACACT,AGTGTCGGTAG
RL104 = CTATGTAGAGT,ACTCTACGTAG
RL105 = CTCACGTACAT,ATGTACGGGAG
RL106 = CTCGAGTCTCT,AGAGACTGGAG
RL107 = CTCGTCGAGAT,ATCTCGAGGAG
RL108 = CTCTACAGCGT,ACGCTGTGGAG
RL109 = CTGTCGTGCGT,ACGCACGGCAG
RL110 = CTGTGACGTGT,ACACGTCGCAG
RL111 = GACGCTGTCGT,ACGACAGGGTC
RL112 = GACGTATGACT,AGTCATAGGTC
RL113 = GACTAGCTAGT,ACTAGCTGGTC
RL114 = GAGACGTCGCT,AGCGACGGCTC
RL115 = GAGAGAGACGT,ACGTCTCGCTC
RL116 = GCGTAGACTAT,ATAGTCTGCGC
RL117 = GCGTCGTGTCT,AGACACGGCGC
RL118 = GCTCTCTACGT,ACGTAGGGAGC
RL119 = GTACACTGTAT,ATACAGGGTAC
RL120 = GTACGCGACAT,ATGTCGGGTAC
RL121 = GTACTATAGAT,ATCTATGGTAC
RL122 = GTACTGAGTCT,AGACTCGGTAC
RL123 = GTAGCTAGCGT,ACGCTAGGTAC
RL124 = GTAGTCACTGT,ACAGTGAGTAC
RL125 = GTAGTGTCACT,AGTGACAGTAC
RL126 = GTATACATAGT,ACTATGTGTAC
RL127 = GTCATCGTCGT,ACGACGAGGAC
RL128 = GTCGACACGCT,AGCGTGTGGAC
RL129 = GTCGAGTGAGT,ACTCACTGGAC
RL130 = GTCTACTATCT,AGATAGTGGAC
RL131 = GTGTCTAGACT,AGTCTAGGCAC
RL132 = GTGTGTATCGT,ACGATACGCAC
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import sys, os
path = os.path.abspath(sys.argv[0])
while os.path.dirname(path) != path:
if os.path.exists(os.path.join(path, 'jflow', '__init__.py')):
sys.path.insert(0, path)
break
path = os.path.dirname(path)
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import sys
import argparse
try:
import _preamble
except ImportError:
sys.exc_clear()
from jflow.workflows_manager import WorkflowsManager
from jflow.workflow import Workflow
from ng6.project import Project
if __name__ == '__main__':
# Create the top-level parser
parser = argparse.ArgumentParser(prog='ng6')
subparsers = parser.add_subparsers(title='Available pipelines')
# Add project menu
sub_parser = subparsers.add_parser("addproject", help="Add a brand new project")
sub_parser.add_argument("--name", type=str, help="Give a name to your project (has to be unique)",
required=True, dest="project_name", metavar="STR")
sub_parser.add_argument("--description", type=str, help="Give a description to your project",
required=True, dest="description", metavar="STR")
sub_parser.add_argument("--admin-login", type=str, help="Who is the project administrator",
required=True, dest="admin_login", metavar="STR")
sub_parser.set_defaults(cmd_object="addproject")
# Add available pipelines
wf_instances = WorkflowsManager.get_workflows()
for instance in wf_instances:
# create the subparser for each applications
sub_parser = subparsers.add_parser(instance.name, help=instance.description)
for param in instance.parameters:
sub_parser.add_argument(param.flag, type=param.type, help=param.help,
required=param.required, dest=param.name,
metavar=param.metavar.upper(), default=param.default)
sub_parser.set_defaults(cmd_object=instance)
args = vars(parser.parse_args())
if isinstance(args["cmd_object"], Workflow):
#try:
args["cmd_object"].execute(args)
#except (IOError, NameError), e:
# sys.exit("Error: " + str(e))
elif args["cmd_object"] == "addproject":
#try:
my_project = Project(args["project_name"], args["description"], args["admin_login"])
#except NameError, e:
# sys.exit("Error: " + str(e))
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
__version__ = '1.0'
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import inspect
from jflow.workflows_manager import WorkflowsManager
from jflow.config_reader import JFlowConfigReader
from jflow.dataset import ArrayList
from weaver.util import parse_string_list
class Component(object):
"""
"""
def __init__(self, is_dynamic=False):
"""
@param is_dynamic: if true, this component has dynamic inputs/outputs
"""
self.is_dynamic = is_dynamic
self.prefix = "default"
self.output_directory = None
def get_outputs(self, output_list=None, input_list=None):
"""
If `output_list` is a string template, then it may have the following
fields:
- `{fullpath}`, `{FULL}` -- Full input file path.
- `{basename}`, `{BASE}` -- Base input file name.
- `{fullpath_woext}`, `{FULLWE}` -- Full input file path without extension
- `{basename_woext}`, `{BASEWE}` -- Base input file name without extension
"""
if output_list is None:
return []
if isinstance(output_list, str):
if not input_list or not '{' in str(output_list):
return [output_list]
return [os.path.join(self.output_directory, str(output_list).format(
fullpath = input,
FULL = input,
i = '{0:05X}'.format(i),
NUMBER = '{0:05X}'.format(i),
fullpath_woext = os.path.splitext(input)[0],
FULL_WOEXT = os.path.splitext(input)[0],
basename = os.path.basename(input),
BASE = os.path.basename(input),
basename_woext = os.path.splitext(os.path.basename(input))[0] if os.path.splitext(os.path.basename(input))[1] != ".gz" else os.path.splitext(os.path.splitext(os.path.basename(input))[0])[0],
BASE_WOEXT = os.path.splitext(os.path.basename(input))[0] if os.path.splitext(os.path.basename(input))[1] != ".gz" else os.path.splitext(os.path.splitext(os.path.basename(input))[0])[0]))
for i, input in enumerate(parse_string_list(input_list))]
def execute(self):
# first create the output directory
os.makedirs(self.output_directory, 0751)
# then run the component
self.run()
def run(self):
"""
Run the component, has to be implemented by subclasses
"""
raise NotImplementedError
def define_parameters(self, *args):
"""
Define the component parameters, has to be implemented by subclasses
"""
raise NotImplementedError
def get_exec_path(self, software):
config_reader = JFlowConfigReader()
exec_path = config_reader.get_exec(software)
if exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), software)):
exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), software)
elif exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../bin", software)):
exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../bin", software)
elif exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "bin", software)):
exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "bin", software)
return exec_path
def __eq__(self, other):
return self.__class__ == other.__class__ and self.prefix == other.prefix
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from jflow.component import Component
from jflow.utils import split_and_rename_seq, get_file_base
from weaver.function import ShellFunction
class FormatDB (Component):
def __init__(self):
Component.__init__(self, is_dynamic=True)
def define_parameters(self, input_fasta, protein=False, nb_seq_per_file=None):
self.input_fasta = input_fasta
self.protein = protein
self.database = os.path.join(self.output_directory, os.path.basename(input_fasta))
def run(self):
# first make the symbolic link
os.symlink(self.input_fasta, self.database)
if self.protein: p = "T"
else: p = "F"
formatdb = ShellFunction(self.get_exec_path("formatdb") + " -p " + p + " -i $1 -n $2", cmd_format='{EXE} {IN} {OUT}')
formatdb(inputs=self.input_fasta, outputs=self.database)
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from jflow.component import Component
from jflow.utils import split_and_rename_seq, get_file_base
from weaver.function import PythonFunction, ShellFunction
from weaver.abstraction import Merge, Map
class SplitAndRename (Component):
def __init__(self):
Component.__init__(self, is_dynamic=True)
def define_parameters(self, input_file, nb_seq_per_file=None):
self.input_file = input_file
self.nb_seq_per_file = nb_seq_per_file
self.renamed_seq_file = os.path.join(self.output_directory, get_file_base(input_file) + ".numeric_ids.fasta")
def run(self):
rename = PythonFunction(split_and_rename_seq)
rename(outputs=self.renamed_seq_file, arguments=[self.input_file, self.output_directory, self.nb_seq_per_file])
class MergeFiles (Component):
def define_parameters(self, input_files, output_file_name="merged.txt", on_basename=False):
self.input_files = input_files
self.on_basename = on_basename
self.merged_files = []
if self.on_basename:
self.basenames = {}
self.merged_basenames = {}
for file in self.input_files:
basename = os.path.basename(file).split(".")[0]
if self.basenames.has_key(basename):
self.basenames[basename].append(file)
else:
self.basenames[basename] = [file]
out_name = os.path.join(self.output_directory, basename+"_"+output_file_name)
if not self.merged_basenames.has_key(basename):
self.merged_basenames[basename] = out_name