utils.py 3.18 KB
Newer Older
Jerome Mariette's avatar
Jerome Mariette committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#
# Copyright (C) 2012 INRA
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import os

from jflow.component import Component
from jflow.utils import split_and_rename_seq, get_file_base

from weaver.function import PythonFunction, ShellFunction
from weaver.abstraction import Merge, Map


class SplitAndRename (Component):
    
    def __init__(self):
        Component.__init__(self, is_dynamic=True)
    
    def define_parameters(self, input_file, nb_seq_per_file=None):
        self.input_file = input_file
        self.nb_seq_per_file = nb_seq_per_file
        self.renamed_seq_file = os.path.join(self.output_directory, get_file_base(input_file) + ".numeric_ids.fasta")
        
    def run(self):
        rename = PythonFunction(split_and_rename_seq)
        rename(outputs=self.renamed_seq_file, arguments=[self.input_file, self.output_directory, self.nb_seq_per_file])
        
class MergeFiles (Component):
    
    def define_parameters(self, input_files, output_file_name="merged.txt", on_basename=False):
        self.input_files = input_files
        self.on_basename = on_basename
        self.merged_files = []
        if self.on_basename:
            self.basenames = {}
            self.merged_basenames = {}
            for file in self.input_files:
                basename = os.path.basename(file).split(".")[0]
                if self.basenames.has_key(basename):
                    self.basenames[basename].append(file)
                else:
                    self.basenames[basename] = [file]
                out_name = os.path.join(self.output_directory, basename+"_"+output_file_name)
                if not self.merged_basenames.has_key(basename):
                    self.merged_basenames[basename] = out_name 
                    self.merged_files.append(out_name)
        else:
            self.merged_files.append(os.path.join(self.output_directory, output_file_name))
    
    def run(self):
        if self.on_basename and len(self.basenames) < len(self.input_files):
            for basename in self.basenames:
                merge = Merge(self.basenames[basename], self.merged_basenames[basename], local=False)
        else:
            merge = Merge(self.input_files, self.merged_files[0], local=False)

class Sort (Component):
    
    def define_parameters(self, input_files, column=1):
        self.input_files = input_files
        self.column = column
        self.sorted_files = self.get_outputs('{basename_woext}.sorted', self.input_files)
        
    def run(self):
        sort = ShellFunction("sort -k 1,1 -n $1 > $2", cmd_format='{EXE} {IN} {OUT}')
        blast = Map(sort, self.input_files, self.sorted_files)