component.py 7.9 KB
Newer Older
Jerome Mariette's avatar
Jerome Mariette committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#
# Copyright (C) 2012 INRA
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import os
19
import sys
Jerome Mariette's avatar
Jerome Mariette committed
20
21
22
23
24
25
import inspect
import tempfile

from jflow.workflows_manager import WorkflowsManager
from jflow.config_reader import JFlowConfigReader
from jflow.dataset import ArrayList
26
from jflow.utils import which
27
from jflow.iotypes import DynamicOutput, OutputFile, OutputFileList
Jerome Mariette's avatar
Jerome Mariette committed
28
29
30
31
32
33
34
35

from weaver.util import parse_string_list


class Component(object):
    """
    """
        
36
    def __init__(self):
Jerome Mariette's avatar
Jerome Mariette committed
37
38
39
        self.prefix = "default"
        self.output_directory = None
        self.config_reader = JFlowConfigReader()
40
        self.version = self.get_version()
41
        self.batch_options = self.config_reader.get_component_batch_options(self.__class__.__name__)
42
43
44
45
46
47
48
49
50
51
52
53
54
    
    def is_dynamic(self):           
        return len(self.get_dynamic_outputs()) != 0

    def get_dynamic_outputs(self):
        """
         @return : the list of outputs updated at the end of component execution.
        """
        dynamic_outputs = list()
        for attribute_value in self.__dict__.values():
            if issubclass( attribute_value.__class__, DynamicOutput ):
                dynamic_outputs.append( attribute_value )
        return dynamic_outputs
55
56
57
58
59
60
61
62
63
64
    
    def get_component_outputs(self):
        outputs = list()
        for attribute_value in self.__dict__.values():
            if ( issubclass( attribute_value.__class__, DynamicOutput ) or
                 issubclass( attribute_value.__class__, OutputFileList) ): 
                outputs.extend( attribute_value )
            elif issubclass( attribute_value.__class__, OutputFile):
                outputs.append( attribute_value )
        return outputs
65
        
Jerome Mariette's avatar
Jerome Mariette committed
66
    def _longestCommonSubstr(self, data, clean_end=True):
Jerome Mariette's avatar
Jerome Mariette committed
67
68
69
70
71
72
73
74
        substr = ''
        if len(data) > 1 and len(data[0]) > 0:
            for i in range(len(data[0])):
                for j in range(len(data[0])-i+1):
                    if j > len(substr) and all(data[0][i:i+j] in x for x in data):
                        substr = data[0][i:i+j]
        else:
            substr = data[0]
Jerome Mariette's avatar
Jerome Mariette committed
75
76
77
        if clean_end:
            while substr.endswith("_") or substr.endswith("-") or substr.endswith("."):
                substr = substr[:-1]
Jerome Mariette's avatar
Jerome Mariette committed
78
79
        return substr
    
Jerome Mariette's avatar
Jerome Mariette committed
80
81
82
83
84
85
86
87
88
89
90
91
92
93
    def get_outputs(self, output_list=None, input_list=None):
        """
        If `output_list` is a string template, then it may have the following
        fields:
    
        - `{fullpath}`, `{FULL}`         -- Full input file path.
        - `{basename}`, `{BASE}`         -- Base input file name.
        - `{fullpath_woext}`, `{FULLWE}` -- Full input file path without extension
        - `{basename_woext}`, `{BASEWE}` -- Base input file name without extension
        """        
        if output_list is None:
            return []
        
        if isinstance(output_list, str):
Jerome Mariette's avatar
Jerome Mariette committed
94
            ilist = []
Jerome Mariette's avatar
Jerome Mariette committed
95
            if not input_list or not '{' in str(output_list):
96
97
98
99
                if input_list is not None and len(input_list) == 0:
                    return []
                else:
                    return [output_list]
Jerome Mariette's avatar
Jerome Mariette committed
100
101
102
103
104
105
106
107
108
109
            # if multiple list of inputs is used
            elif isinstance(input_list[0], list):
                for i, val in enumerate(input_list[0]):
                    iter_values = []
                    for j, ingroup in enumerate(input_list):
                        iter_values.append(os.path.basename(input_list[j][i]))
                    ilist.append(self._longestCommonSubstr(iter_values))
            else:
                ilist = parse_string_list(input_list)
                            
Jerome Mariette's avatar
Jerome Mariette committed
110
111
112
113
114
115
116
117
118
119
120
            return [os.path.join(self.output_directory, str(output_list).format(
                        fullpath       = input,
                        FULL           = input,
                        i              = '{0:05X}'.format(i),
                        NUMBER         = '{0:05X}'.format(i),
                        fullpath_woext = os.path.splitext(input)[0],
                        FULL_WOEXT     = os.path.splitext(input)[0],
                        basename       = os.path.basename(input),
                        BASE           = os.path.basename(input),
                        basename_woext = os.path.splitext(os.path.basename(input))[0] if os.path.splitext(os.path.basename(input))[1] != ".gz" else os.path.splitext(os.path.splitext(os.path.basename(input))[0])[0],
                        BASE_WOEXT     = os.path.splitext(os.path.basename(input))[0] if os.path.splitext(os.path.basename(input))[1] != ".gz" else os.path.splitext(os.path.splitext(os.path.basename(input))[0])[0]))
Jerome Mariette's avatar
Jerome Mariette committed
121
                    for i, input in enumerate(ilist)]
Jerome Mariette's avatar
Jerome Mariette committed
122
123
124
    
    def execute(self):
        # first create the output directory
Jerome Mariette's avatar
Jerome Mariette committed
125
126
        if not os.path.isdir(self.output_directory):
            os.makedirs(self.output_directory, 0751)
Jerome Mariette's avatar
Jerome Mariette committed
127
        # then run the component
Jerome Mariette's avatar
Jerome Mariette committed
128
        self.process()
Jerome Mariette's avatar
Jerome Mariette committed
129
    
Jerome Mariette's avatar
Jerome Mariette committed
130
    def process(self):
Jerome Mariette's avatar
Jerome Mariette committed
131
132
133
134
135
        """ 
        Run the component, has to be implemented by subclasses
        """
        raise NotImplementedError
    
136
137
138
139
    def get_version(self):
        """ 
        Return the tool version, has to be implemented by subclasses
        """
Jerome Mariette's avatar
Jerome Mariette committed
140
        return None
141
    
Jerome Mariette's avatar
Jerome Mariette committed
142
143
144
145
146
147
148
149
150
151
152
153
    def get_temporary_file(self, suffix=".txt"):
        # first check if tmp directory exists
        if not os.path.isdir(self.config_reader.get_tmp_directory()):
            os.makedirs(self.config_reader.get_tmp_directory(), 0751)
        tempfile_name = os.path.basename(tempfile.NamedTemporaryFile(suffix=suffix).name)
        return os.path.join(self.config_reader.get_tmp_directory(), tempfile_name)
    
    def define_parameters(self, *args):
        """ 
        Define the component parameters, has to be implemented by subclasses
        """
        raise NotImplementedError
154
155
156

    def get_resource(self, resource):
        return self.config_reader.get_resource(resource)
Jerome Mariette's avatar
Jerome Mariette committed
157
158
    
    def get_exec_path(self, software):
159
        exec_path = self.config_reader.get_exec(software)
160
161
162
        if exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../../bin", software)):
            exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../../bin", software)
        elif exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../bin", software)):
163
164
165
            exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../bin", software)
        elif exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "bin", software)):
            exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "bin", software)
166
        elif exec_path is None and which(software) == None:
167
168
            sys.stderr.write("Error: '" + software + "' path connot be retrieved either in the PATH and in the application.properties file!\n")
            sys.exit(1)
169
170
171
172
173
        elif exec_path is None and which(software) != None: 
            exec_path = software
        elif exec_path != None and not os.path.isfile(exec_path):
            sys.stderr.write("Error: '" + exec_path + "' set for '" + software + "' does not exists, please provide a valid path!\n")
            sys.exit(1)
174
        return exec_path
Jerome Mariette's avatar
Jerome Mariette committed
175
    
Jerome Mariette's avatar
Jerome Mariette committed
176
177
178
    def get_nameid(self):
        return self.__class__.__name__ + "." + self.prefix
    
Jerome Mariette's avatar
Jerome Mariette committed
179
180
    def __eq__(self, other):
        return self.__class__ == other.__class__ and self.prefix == other.prefix