component.py 14.8 KB
Newer Older
Jerome Mariette's avatar
Jerome Mariette committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#
# Copyright (C) 2012 INRA
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import os
19
import sys
Jerome Mariette's avatar
Jerome Mariette committed
20
21
import inspect
import tempfile
Jerome Mariette's avatar
Jerome Mariette committed
22
import types
Jerome Mariette's avatar
Jerome Mariette committed
23
24
25
26

from jflow.workflows_manager import WorkflowsManager
from jflow.config_reader import JFlowConfigReader
from jflow.dataset import ArrayList
27
from jflow.utils import which
Jerome Mariette's avatar
Jerome Mariette committed
28
from jflow.parameter import *
Jerome Mariette's avatar
Jerome Mariette committed
29
30
31
32
33
34
from weaver.util import parse_string_list


class Component(object):
    """
    """
Frédéric Escudié's avatar
Frédéric Escudié committed
35

36
    def __init__(self):
Jerome Mariette's avatar
Jerome Mariette committed
37
        self.prefix = "default"
Jerome Mariette's avatar
Jerome Mariette committed
38
        self.params_order = []
Jerome Mariette's avatar
Jerome Mariette committed
39
40
        self.output_directory = None
        self.config_reader = JFlowConfigReader()
41
        self.version = self.get_version()
42
        self.batch_options = self.config_reader.get_component_batch_options(self.__class__.__name__)
Frédéric Escudié's avatar
Frédéric Escudié committed
43
44

    def is_dynamic(self):
45
46
47
48
49
50
51
52
53
54
55
        return len(self.get_dynamic_outputs()) != 0

    def get_dynamic_outputs(self):
        """
         @return : the list of outputs updated at the end of component execution.
        """
        dynamic_outputs = list()
        for attribute_value in self.__dict__.values():
            if issubclass( attribute_value.__class__, DynamicOutput ):
                dynamic_outputs.append( attribute_value )
        return dynamic_outputs
Frédéric Escudié's avatar
Frédéric Escudié committed
56

Philippe Bardou's avatar
Philippe Bardou committed
57
    def get_component_outputs(self, web_path):
Philippe Bardou's avatar
Philippe Bardou committed
58
        outputs = {}
59
60
        for attribute_value in self.__dict__.values():
            if ( issubclass( attribute_value.__class__, DynamicOutput ) or
Philippe Bardou's avatar
Philippe Bardou committed
61
62
                 issubclass( attribute_value.__class__, OutputFileList) ):
                for f in attribute_value:
Philippe Bardou's avatar
Philippe Bardou committed
63
                    outputs[os.path.basename(f)] = self._webify_workflow_outputs(web_path, f)
64
            elif issubclass( attribute_value.__class__, OutputFile):
Philippe Bardou's avatar
Philippe Bardou committed
65
                outputs[os.path.basename(attribute_value)] = self._webify_workflow_outputs(web_path, attribute_value)
66
        return outputs
Frédéric Escudié's avatar
Frédéric Escudié committed
67

68
69
70
71
72
73
74
75
76
77
    def add_input_file(self, name, help, file_format="any", default=None, type="inputfile", 
                       required=False, flag=None, group="default", display_name=None, add_to=None):
        new_param = InputFile(name, help, flag=flag, file_format=file_format, default=default, 
                              type=type, required=required, group=group, display_name=display_name)
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
Jerome Mariette's avatar
Jerome Mariette committed
78
        else:
79
            new_param.component_nameid = self.get_nameid()
Frédéric Escudié's avatar
Frédéric Escudié committed
80
            if issubclass( default.__class__, AbstractOutputFile ):
81
82
83
84
85
86
87
88
                new_param.parent_component_nameid = default.component_nameid
            self.params_order.append(name)
            self.__setattr__(name, new_param)

    def add_input_file_list(self, name, help, file_format="any", default=None, type="inputfile", 
                            required=False, flag=None, group="default", display_name=None, add_to=None):
        if default == None: default = []
        new_param = InputFileList(name, help, flag=flag, file_format=file_format, default=default, 
Jerome Mariette's avatar
Jerome Mariette committed
89
90
91
92
93
94
95
96
                                  type=type, required=required, group=group, display_name=display_name)
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
        else:
Jerome Mariette's avatar
Jerome Mariette committed
97
            new_param.component_nameid = self.get_nameid()
Frédéric Escudié's avatar
Frédéric Escudié committed
98
            if issubclass( default.__class__, AbstractOutputFile ):
Jerome Mariette's avatar
Jerome Mariette committed
99
                new_param.parent_component_nameid = default.component_nameid
Jerome Mariette's avatar
Jerome Mariette committed
100
101
            self.params_order.append(name)
            self.__setattr__(name, new_param)
Frédéric Escudié's avatar
Frédéric Escudié committed
102

103
104
    def add_parameter(self, name, help, default=None, type=types.StringType, choices=None, 
                      required=False, flag=None, group="default", display_name=None, add_to=None):
Frédéric Escudié's avatar
Frédéric Escudié committed
105
        new_param = ParameterFactory.factory(name, help, flag=flag, default=default, type=type, choices=choices, 
106
107
108
109
110
111
112
                              required=required, group=group, display_name=display_name)
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
Jerome Mariette's avatar
Jerome Mariette committed
113
        else:
114
115
116
117
118
119
120
            self.params_order.append(name)
            self.__setattr__(name, new_param)

    def add_parameter_list(self, name, help, default=None, type=types.StringType, choices=None, 
                           required=False, flag=None, group="default", display_name=None, add_to=None):
        if default == None: default = []
        new_param = ParameterList(name, help, flag=flag, default=default, type=type, choices=choices, 
Jerome Mariette's avatar
Jerome Mariette committed
121
122
123
124
125
126
127
128
129
130
                                  required=required, group=group, display_name=display_name)
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
        else:
            self.params_order.append(name)
            self.__setattr__(name, new_param)
Frédéric Escudié's avatar
Frédéric Escudié committed
131

132
133
    def add_output_file(self, name, help, file_format="any", filename=None, group="default", display_name=None, add_to=None):
        filename = os.path.basename(filename)
Frédéric Escudié's avatar
Frédéric Escudié committed
134
        new_param = OutputFile(name, help, default=os.path.join(self.output_directory, filename),
135
136
137
138
139
140
141
                               file_format=file_format, group=group, display_name=display_name)
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
Jerome Mariette's avatar
Jerome Mariette committed
142
        else:
143
144
145
            new_param.component_nameid = self.get_nameid()
            self.params_order.append(name)
            self.__setattr__(name, new_param)
Frédéric Escudié's avatar
Frédéric Escudié committed
146

147
148
149
    def add_output_file_list(self, name, help, file_format="any", pattern='{basename_woext}.out', 
                             items=None, group="default", display_name=None, add_to=None):
        new_param = OutputFileList(name, help, default=self.get_outputs(pattern, items), 
Jerome Mariette's avatar
Jerome Mariette committed
150
151
152
153
154
155
156
157
                                   file_format=file_format, group=group, display_name=display_name)
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
        else:
Jerome Mariette's avatar
Jerome Mariette committed
158
            new_param.component_nameid = self.get_nameid()
Jerome Mariette's avatar
Jerome Mariette committed
159
160
            self.params_order.append(name)
            self.__setattr__(name, new_param)
Frédéric Escudié's avatar
Frédéric Escudié committed
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191

    def add_output_file_endswith(self, name, help, file_format="any", pattern='.stderr', behaviour="exclude",
                               group="default", display_name=None, add_to=None):
        new_param = OutputFilesEndsWith(name, help, self.output_directory, pattern, include=(behaviour != "exclude"), 
                                       file_format="any", group="default", display_name=None)
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
        else:
            new_param.component_nameid = self.get_nameid()
            self.params_order.append(name)
            self.__setattr__(name, new_param)

    def add_output_file_pattern(self, name, help, file_format="any", pattern='.stderr$', behaviour="exclude",
                               group="default", display_name=None, add_to=None):
        new_param = OutputFilesPattern(name, help, self.output_directory, pattern, include=(behaviour != "exclude"), 
                                       file_format="any", group="default", display_name=None)
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
        else:
            new_param.component_nameid = self.get_nameid()
            self.params_order.append(name)
            self.__setattr__(name, new_param)

Philippe Bardou's avatar
Philippe Bardou committed
192
193
194
195
    def _webify_workflow_outputs(self, web_path, path):
        work_dir  = self.config_reader.get_work_directory()
        socket_opt = self.config_reader.get_socket_options()
        return "http://" + socket_opt[0] + ":" + str(socket_opt[1]) + "/" + path.replace(work_dir, web_path)
Frédéric Escudié's avatar
Frédéric Escudié committed
196

Jerome Mariette's avatar
Jerome Mariette committed
197
    def _longestCommonSubstr(self, data, clean_end=True):
Jerome Mariette's avatar
Jerome Mariette committed
198
199
200
201
202
203
204
205
        substr = ''
        if len(data) > 1 and len(data[0]) > 0:
            for i in range(len(data[0])):
                for j in range(len(data[0])-i+1):
                    if j > len(substr) and all(data[0][i:i+j] in x for x in data):
                        substr = data[0][i:i+j]
        else:
            substr = data[0]
Jerome Mariette's avatar
Jerome Mariette committed
206
207
208
        if clean_end:
            while substr.endswith("_") or substr.endswith("-") or substr.endswith("."):
                substr = substr[:-1]
Jerome Mariette's avatar
Jerome Mariette committed
209
        return substr
Frédéric Escudié's avatar
Frédéric Escudié committed
210

Jerome Mariette's avatar
Jerome Mariette committed
211
212
213
214
    def get_outputs(self, output_list=None, input_list=None):
        """
        If `output_list` is a string template, then it may have the following
        fields:
Frédéric Escudié's avatar
Frédéric Escudié committed
215

Jerome Mariette's avatar
Jerome Mariette committed
216
217
218
219
        - `{fullpath}`, `{FULL}`         -- Full input file path.
        - `{basename}`, `{BASE}`         -- Base input file name.
        - `{fullpath_woext}`, `{FULLWE}` -- Full input file path without extension
        - `{basename_woext}`, `{BASEWE}` -- Base input file name without extension
Frédéric Escudié's avatar
Frédéric Escudié committed
220
        """
Jerome Mariette's avatar
Jerome Mariette committed
221
222
        if output_list is None:
            return []
Frédéric Escudié's avatar
Frédéric Escudié committed
223

Jerome Mariette's avatar
Jerome Mariette committed
224
        if isinstance(output_list, str):
Jerome Mariette's avatar
Jerome Mariette committed
225
            ilist = []
Jerome Mariette's avatar
Jerome Mariette committed
226
            if not input_list or not '{' in str(output_list):
227
228
229
230
                if input_list is not None and len(input_list) == 0:
                    return []
                else:
                    return [output_list]
Jerome Mariette's avatar
Jerome Mariette committed
231
232
233
234
235
236
237
238
239
240
            # if multiple list of inputs is used
            elif isinstance(input_list[0], list):
                for i, val in enumerate(input_list[0]):
                    iter_values = []
                    for j, ingroup in enumerate(input_list):
                        iter_values.append(os.path.basename(input_list[j][i]))
                    ilist.append(self._longestCommonSubstr(iter_values))
            else:
                ilist = parse_string_list(input_list)
                            
Jerome Mariette's avatar
Jerome Mariette committed
241
242
243
244
245
246
247
248
249
250
251
            return [os.path.join(self.output_directory, str(output_list).format(
                        fullpath       = input,
                        FULL           = input,
                        i              = '{0:05X}'.format(i),
                        NUMBER         = '{0:05X}'.format(i),
                        fullpath_woext = os.path.splitext(input)[0],
                        FULL_WOEXT     = os.path.splitext(input)[0],
                        basename       = os.path.basename(input),
                        BASE           = os.path.basename(input),
                        basename_woext = os.path.splitext(os.path.basename(input))[0] if os.path.splitext(os.path.basename(input))[1] != ".gz" else os.path.splitext(os.path.splitext(os.path.basename(input))[0])[0],
                        BASE_WOEXT     = os.path.splitext(os.path.basename(input))[0] if os.path.splitext(os.path.basename(input))[1] != ".gz" else os.path.splitext(os.path.splitext(os.path.basename(input))[0])[0]))
Jerome Mariette's avatar
Jerome Mariette committed
252
                    for i, input in enumerate(ilist)]
Jerome Mariette's avatar
Jerome Mariette committed
253
254
255
    
    def execute(self):
        # first create the output directory
Jerome Mariette's avatar
Jerome Mariette committed
256
257
        if not os.path.isdir(self.output_directory):
            os.makedirs(self.output_directory, 0751)
Jerome Mariette's avatar
Jerome Mariette committed
258
        # then run the component
Jerome Mariette's avatar
Jerome Mariette committed
259
        self.process()
Jerome Mariette's avatar
Jerome Mariette committed
260
    
Jerome Mariette's avatar
Jerome Mariette committed
261
    def process(self):
Jerome Mariette's avatar
Jerome Mariette committed
262
263
264
265
266
        """ 
        Run the component, has to be implemented by subclasses
        """
        raise NotImplementedError
    
267
268
269
270
    def get_version(self):
        """ 
        Return the tool version, has to be implemented by subclasses
        """
Jerome Mariette's avatar
Jerome Mariette committed
271
        return None
272
    
Jerome Mariette's avatar
Jerome Mariette committed
273
274
275
276
277
278
279
280
281
282
283
284
    def get_temporary_file(self, suffix=".txt"):
        # first check if tmp directory exists
        if not os.path.isdir(self.config_reader.get_tmp_directory()):
            os.makedirs(self.config_reader.get_tmp_directory(), 0751)
        tempfile_name = os.path.basename(tempfile.NamedTemporaryFile(suffix=suffix).name)
        return os.path.join(self.config_reader.get_tmp_directory(), tempfile_name)
    
    def define_parameters(self, *args):
        """ 
        Define the component parameters, has to be implemented by subclasses
        """
        raise NotImplementedError
285
286
287

    def get_resource(self, resource):
        return self.config_reader.get_resource(resource)
Jerome Mariette's avatar
Jerome Mariette committed
288
289
    
    def get_exec_path(self, software):
290
        exec_path = self.config_reader.get_exec(software)
291
292
293
        if exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../../bin", software)):
            exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../../bin", software)
        elif exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../bin", software)):
294
295
296
            exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../bin", software)
        elif exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "bin", software)):
            exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "bin", software)
297
        elif exec_path is None and which(software) == None:
298
299
            sys.stderr.write("Error: '" + software + "' path connot be retrieved either in the PATH and in the application.properties file!\n")
            sys.exit(1)
300
301
302
303
304
        elif exec_path is None and which(software) != None: 
            exec_path = software
        elif exec_path != None and not os.path.isfile(exec_path):
            sys.stderr.write("Error: '" + exec_path + "' set for '" + software + "' does not exists, please provide a valid path!\n")
            sys.exit(1)
305
        return exec_path
Jerome Mariette's avatar
Jerome Mariette committed
306
    
Jerome Mariette's avatar
Jerome Mariette committed
307
308
309
    def get_nameid(self):
        return self.__class__.__name__ + "." + self.prefix
    
Jerome Mariette's avatar
Jerome Mariette committed
310
311
    def __eq__(self, other):
        return self.__class__ == other.__class__ and self.prefix == other.prefix