workflow.py 66.7 KB
Newer Older
Jerome Mariette's avatar
Jerome Mariette committed
1
#
Jerome Mariette's avatar
Jerome Mariette committed
2
# Copyright (C) 2015 INRA
Jerome Mariette's avatar
Jerome Mariette committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import inspect
import os
20
import re
Jerome Mariette's avatar
Jerome Mariette committed
21
22
23
24
25
26
27
import sys
import uuid
import pkgutil
import tempfile
import pickle
import time
import threading
Jerome Mariette's avatar
Jerome Mariette committed
28
import types
Jerome Mariette's avatar
Jerome Mariette committed
29
import datetime
30
31
import logging
import traceback
Jerome Mariette's avatar
Jerome Mariette committed
32

33
from configparser import ConfigParser, NoOptionError
Jerome Mariette's avatar
Jerome Mariette committed
34
from inspect import getcallargs
35
from datetime import date as ddate
Jerome Mariette's avatar
Jerome Mariette committed
36

37
38
from collections import OrderedDict

Jerome Mariette's avatar
Jerome Mariette committed
39
import jflow
Jerome Mariette's avatar
Jerome Mariette committed
40
import jflow.utils as utils
41
from jflow.utils import validate_email
42
from pygraph.classes.digraph import digraph
Ibouniyamine Nabihoudine's avatar
Ibouniyamine Nabihoudine committed
43
44
from jflow.workflows_manager import WorkflowsManager 
from jflow.config_reader import JFlowConfigReader
45
from jflow.utils import get_octet_string_representation, get_nb_octet
46
from jflow.parameter import *
47
from cctools.util import time_format
Jerome Mariette's avatar
Jerome Mariette committed
48

49
import jflow.rules as j_rules
50

Jerome Mariette's avatar
Jerome Mariette committed
51
52
53
54
55
56
57
58
59
60
61
from weaver.script import ABSTRACTIONS
from weaver.script import DATASETS
from weaver.script import FUNCTIONS
from weaver.script import NESTS
from weaver.script import OPTIONS
from weaver.script import STACKS
from weaver.nest import Nest
from weaver.options import Options
from cctools.makeflow import MakeflowLog
from cctools.makeflow.log import Node

62
63
from workflows import rules as wf_rules

Jerome Mariette's avatar
Jerome Mariette committed
64

65
66
67
68
69
70
71
class MINIWorkflow(object):
    
    def __init__(self, id, name, description, status, start_time, end_time, metadata, 
                 component_nameids, compts_status, errors):
        self.id = id
        self.name = name
        self.description = description
72
        self._status = status
73
74
75
76
77
78
79
80
81
82
        self.start_time = start_time
        self.end_time = end_time
        self.metadata = metadata
        self.component_nameids = component_nameids
        self.compts_status = compts_status
        self.errors = errors
        
    def get_components_nameid(self):
        return self.component_nameids

83
84
85
    def get_components_status(self):
        return self.compts_status
    
86
87
88
89
90
91
92
    def get_component_status(self, component_nameid):
        return self.compts_status[component_nameid]
    
    def get_errors(self):
        return self.errors
    
    def get_status(self):
93
        return self._status
94

95
96
class Workflow(threading.Thread):
    
Jerome Mariette's avatar
Jerome Mariette committed
97
    MAKEFLOW_LOG_FILE_NAME = "Makeflow.makeflowlog"
98
    DUMP_FILE_NAME = ".workflow.dump"
99
    STDERR_FILE_NAME = "wf_stderr.txt"
Jerome Mariette's avatar
Jerome Mariette committed
100
    WORKING = ".working"
101
    OLD_EXTENSION = ".old"
102
    DEFAULT_GROUP = "default"
103
    
104
    STATUS_PENDING = "pending"
Jerome Mariette's avatar
Jerome Mariette committed
105
106
107
108
    STATUS_STARTED = "started"
    STATUS_COMPLETED = "completed"
    STATUS_FAILED = "failed"
    STATUS_ABORTED = "aborted"
Jerome Mariette's avatar
Jerome Mariette committed
109
    STATUS_RESETED = "reseted"
110
    
Jerome Mariette's avatar
Jerome Mariette committed
111
112
113
    INPUTFILE_GRAPH_LABEL = "inputfile"
    INPUTFILES_GRAPH_LABEL = "inputfiles"
    INPUTDIRECTORY_GRAPH_LABEL = "inputdirectory"
114
115
    COMPONENT_GRAPH_LABEL = "component"
    
116
    
117
    def __init__(self, args={}, id=None, function= "process"):
Jerome Mariette's avatar
Jerome Mariette committed
118
119
120
121
        # define as a thread
        threading.Thread.__init__(self)
        self.jflow_config_reader = JFlowConfigReader()
        self.manager = WorkflowsManager()
122
        self.components_to_exec = []
123
        self.components = []
Jerome Mariette's avatar
Jerome Mariette committed
124
125
126
        self.makes = {}
        self.globals = {}
        self.options = Options()
127
128
        self._status = self.STATUS_STARTED
        self._postprocess_status = self.STATUS_PENDING
Jerome Mariette's avatar
Jerome Mariette committed
129
130
        self.start_time = None
        self.end_time = None
Jerome Mariette's avatar
Jerome Mariette committed
131
        self.__step = None
Frédéric Escudié's avatar
Frédéric Escudié committed
132
        self.stderr = None
Jerome Mariette's avatar
Jerome Mariette committed
133
        self.args = args
134
        self.dynamic_component_present = False
135
136
137
        self.__to_address = None
        self.__subject = None
        self.__message = None
138
        self.function = function
139
        # intruduce --log-verbose to be able to monitor the new version of makeflow >=4.2.2
Jerome Mariette's avatar
Jerome Mariette committed
140
        self.engine_arguments = ' --log-verbose '
141
        self.component_nameids_is_init = False
Jerome Mariette's avatar
Jerome Mariette committed
142
        self.component_nameids = {}
143
        self.reseted_components = []
Jerome Mariette's avatar
Jerome Mariette committed
144
145
        # try to parse engine arguments
        try:
146
147
            type, options, limit_submission = self.jflow_config_reader.get_batch()
            if limit_submission : self.engine_arguments += ' -J ' + str(limit_submission)
Jerome Mariette's avatar
Jerome Mariette committed
148
149
150
            if type: self.engine_arguments += ' -T ' + type
            if options : self.engine_arguments += ' -B "' + options + '"'
        except: self.engine_arguments = None
151

Jerome Mariette's avatar
Jerome Mariette committed
152
        self.id = id
153
154
        self.name = self.get_name()
        self.description = self.get_description()
Ibouniyamine Nabihoudine's avatar
Ibouniyamine Nabihoudine committed
155
        self.__group = self.jflow_config_reader.get_workflow_group(self.__class__.__name__) or Workflow.DEFAULT_GROUP
156
157
158
        
        # define the parameters 
        self.params_order = []
Jerome Mariette's avatar
Jerome Mariette committed
159
160
        if self.function != None:
            self.define_parameters(self.function)
161
        # add the metadata parameter
162
        self.metadata = []
Jerome Mariette's avatar
Jerome Mariette committed
163
        
Jerome Mariette's avatar
Jerome Mariette committed
164
165
166
        if self.id is not None:
            self.directory = self.manager.get_workflow_directory(self.name, self.id)
            if not os.path.isdir(self.directory):
167
                os.makedirs(self.directory, 0o751)
Frédéric Escudié's avatar
Frédéric Escudié committed
168
            if self.stderr is None:
169
                self.stderr = self._set_stderr()
Jerome Mariette's avatar
Jerome Mariette committed
170
            self._serialize()
Jerome Mariette's avatar
Jerome Mariette committed
171
            
172
173
        self.internal_components = self._import_internal_components()
        self.external_components = self._import_external_components()
174

175
176
    def get_workflow_group(self):
        return self.__group
177
            
Jerome Mariette's avatar
Jerome Mariette committed
178
    def add_input_directory(self, name, help, default=None, required=False, flag=None, 
179
                            group="default", display_name=None, get_files_fn=None, add_to=None, rules=None):
180
        new_param = InputDirectory(name, help, flag=flag, default=default, required=required, 
181
                                   group=group, display_name=display_name, get_files_fn=get_files_fn, rules=rules)
Jerome Mariette's avatar
Jerome Mariette committed
182
183
184
185
186
187
188
189
190
191
192
        new_param.linkTrace_nameid = name
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
        else:
            self.params_order.append(name)
            self.__setattr__(name, new_param)
    
193
    def add_input_file(self, name, help, file_format="any", default=None, type="inputfile", 
194
195
                       required=False, flag=None, group="default", display_name=None, size_limit="0", add_to=None,
                       rules=None):
196
197
198
        # check if the size provided is correct
        try: int(get_nb_octet(size_limit))
        except: size_limit="0"
199
        new_param = InputFile(name, help, flag=flag, file_format=file_format, default=default, 
200
201
                              type=type, required=required, group=group, display_name=display_name, size_limit=size_limit,
                              rules=rules)
Frédéric Escudié's avatar
Frédéric Escudié committed
202
        new_param.linkTrace_nameid = name
203
204
205
206
207
208
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
209
        else:
210
211
212
213
            self.params_order.append(name)
            self.__setattr__(name, new_param)
            
    def add_input_file_list(self, name, help, file_format="any", default=None, type="inputfile", 
214
215
                            required=False, flag=None, group="default", display_name=None, size_limit="0", add_to=None,
                            rules=None):
216
        # check if the size provided is correct
217
        if default == None: default = []
218
219
        try: int(get_nb_octet(size_limit))
        except: size_limit="0"
220
221
222
223
224
225
226
        if default == None:
            inputs = []
        elif issubclass(default.__class__, list):
            inputs = [IOFile(file, file_format, name, None) for file in default]
        else:
            inputs = [IOFile(default, file_format, name, None)]
        new_param = InputFileList(name, help, flag=flag, file_format=file_format, default=inputs, 
227
228
                                  type=type, required=required, group=group, display_name=display_name, size_limit=size_limit,
                                  rules=rules)
Frédéric Escudié's avatar
Frédéric Escudié committed
229
        new_param.linkTrace_nameid = name
230
231
232
        # if this input should be added to a particular parameter
        if add_to:
            try:
Jerome Mariette's avatar
help ok    
Jerome Mariette committed
233
                self.__getattribute__(add_to).add_sub_parameter(new_param)
234
235
236
237
238
            except: pass
        # otherwise, add it to the class itself
        else:
            self.params_order.append(name)
            self.__setattr__(name, new_param)
239
            
240
241
    def add_multiple_parameter(self, name, help, required=False, flag=None, group="default", display_name=None,
                               rules=None):
242
        self.params_order.append(name)
243
244
        new_param = MultiParameter(name, help, flag=flag, required=required, group=group, display_name=display_name,
                                   rules=rules)
245
246
        self.__setattr__(name, new_param)

247
    def add_multiple_parameter_list(self, name, help, required=False, flag=None, group="default", display_name=None, rules=None):
248
        self.params_order.append(name)
249
250
        new_param = MultiParameterList(name, help, flag=flag, required=required, group=group, display_name=display_name,
                                       rules=rules)
251
252
        self.__setattr__(name, new_param)
    
253
    def add_parameter(self, name, help, default=None, type=str, choices=None, 
254
                      required=False, flag=None, group="default", display_name=None, add_to=None, rules=None):
255
        new_param = ParameterFactory.factory(name, help, flag=flag, default=default, type=type, choices=choices, 
256
                              required=required, group=group, display_name=display_name, rules=rules)
257
258
259
260
261
262
        # if this input should be added to a particular parameter
        if add_to:
            try:
                self.__getattribute__(add_to).add_sub_parameter(new_param)
            except: pass
        # otherwise, add it to the class itself
263
        else:
264
265
266
            self.params_order.append(name)
            self.__setattr__(name, new_param)
    
267
    def add_parameter_list(self, name, help, default=None, type=str, choices=None, 
268
                           required=False, flag=None, group="default", display_name=None, add_to=None, rules=None):
269
270
        if default == None: default = []
        new_param = ParameterList(name, help, flag=flag, default=default, type=type, choices=choices, 
271
                                  required=required, group=group, display_name=display_name, rules=rules)
272
273
274
        # if this input should be added to a particular parameter
        if add_to:
            try:
Jerome Mariette's avatar
help ok    
Jerome Mariette committed
275
                self.__getattribute__(add_to).add_sub_parameter(new_param)
276
277
278
279
280
            except: pass
        # otherwise, add it to the class itself
        else:
            self.params_order.append(name)
            self.__setattr__(name, new_param)
Jerome Mariette's avatar
Jerome Mariette committed
281
#                         break
282

Frédéric Escudié's avatar
Frédéric Escudié committed
283
    def _prepare_parameter(self, args, parameter, key="name"):
284
285
        new_param = None
        # Retrieve value
286
        if parameter.__getattribute__(key) in args:
Frédéric Escudié's avatar
Frédéric Escudié committed
287
            value = args[parameter.__getattribute__(key)]
Jerome Mariette's avatar
Jerome Mariette committed
288
        elif parameter.default != None:
289
290
291
292
            value = parameter.default
        else:
            value = None
        # Set new parameter
Ibouniyamine Nabihoudine's avatar
Ibouniyamine Nabihoudine committed
293
        if parameter.__class__ in [StrParameter, IntParameter, FloatParameter, BoolParameter, DateParameter, PasswordParameter]:
Frédéric Escudié's avatar
Frédéric Escudié committed
294
            if value == "" and parameter.__class__ in [IntParameter, FloatParameter, BoolParameter, DateParameter] : value = None # from GUI
295
296
297
298
            new_param = ParameterFactory.factory( parameter.name, parameter.help, default=value, type=parameter.type, choices=parameter.choices, 
                                                  required=parameter.required, flag=parameter.flag, group=parameter.group, 
                                                  display_name=parameter.display_name )
        elif parameter.__class__ ==  ParameterList:
299
            if value == "" : value = [] # from GUI
300
301
302
303
            new_param = ParameterList( parameter.name, parameter.help, default=value, type=parameter.type, choices=parameter.choices,
                                       required=parameter.required, flag=parameter.flag, sub_parameters=parameter.sub_parameters,
                                       group=parameter.group, display_name=parameter.display_name )
        elif parameter.__class__ == InputFileList:
304
            if value == "" : value = [] # from GUI
305
            iovalues = []
Ibouniyamine Nabihoudine's avatar
Ibouniyamine Nabihoudine committed
306
            prepared_files = parameter.prepare(value)
307
            for file in prepared_files:
308
309
                iovalues.append(IOFile(file, parameter.file_format, parameter.linkTrace_nameid, None))
            new_param = InputFileList( parameter.name, parameter.help, file_format=parameter.file_format, default=iovalues,
310
311
                                       type=parameter.type, choices=parameter.choices, required=parameter.required, flag=parameter.flag,
                                       group=parameter.group, display_name=parameter.display_name, size_limit=parameter.size_limit )
312
            new_param.linkTrace_nameid = parameter.linkTrace_nameid            
313
        elif parameter.__class__ == InputFile:
314
            if value == "" : value = None # from GUI
Ibouniyamine Nabihoudine's avatar
Ibouniyamine Nabihoudine committed
315
            prepared_file = parameter.prepare(value)
316
            new_param = InputFile( parameter.name, parameter.help, file_format=parameter.file_format, default=prepared_file,
317
318
                                   type=parameter.type, choices=parameter.choices, required=parameter.required, flag=parameter.flag, 
                                   group=parameter.group, display_name=parameter.display_name )
319
            new_param.linkTrace_nameid = parameter.linkTrace_nameid
Jerome Mariette's avatar
Jerome Mariette committed
320
321
        elif parameter.__class__ == InputDirectory:
            if value == "" : value = None # from GUI
Ibouniyamine Nabihoudine's avatar
Ibouniyamine Nabihoudine committed
322
            prepared_directory = parameter.prepare(value)
323
            new_param = InputDirectory( parameter.name, parameter.help, default=prepared_directory, choices=parameter.choices, 
Jerome Mariette's avatar
Jerome Mariette committed
324
                                        required=parameter.required, flag=parameter.flag, group=parameter.group, 
325
                                        display_name=parameter.display_name, get_files_fn=parameter.get_files_fn)
Jerome Mariette's avatar
Jerome Mariette committed
326
            new_param.linkTrace_nameid = parameter.linkTrace_nameid
327
328
329
330
        else:
            raise Exception( "Unknown class '" +  parameter.__class__.__name__ + "' for parameter.")
        return new_param

331
    def set_parameters(self, args):
332
333
        parameters = self.get_parameters()
        for param in parameters:
334
            new_param = None
335
            if param.__class__ == MultiParameter:
336
                new_param = MultiParameter(param.name, param.help, required=param.required, flag=param.flag, group=param.group, display_name=param.display_name)
337
                new_param.sub_parameters = param.sub_parameters
338
                if param.name in args:
339
340
341
342
343
344
                    sub_args = {}
                    for sarg in args[param.name]:
                        sub_args[sarg[0]] = sarg[1]
                    for sub_param in param.sub_parameters:
                        new_sub_parameter = self._prepare_parameter(sub_args, sub_param, "flag")
                        new_param[new_sub_parameter.name] = new_sub_parameter
345
            elif param.__class__ == MultiParameterList:
346
                new_param = MultiParameterList(param.name, param.help, required=param.required, flag=param.flag, group=param.group, display_name=param.display_name)
347
                new_param.sub_parameters = param.sub_parameters
348
                if param.name in args:
349
350
351
352
353
354
355
356
357
                    for idx, sargs in enumerate(args[param.name]):
                        new_multi_param = MultiParameter(param.name + '_' + str(idx), '', required=False, flag=None, group="default", display_name=None)
                        sub_args = {}
                        for sarg in sargs:
                            sub_args[sarg[0]] = sarg[1]
                        for sub_param in param.sub_parameters:
                            new_sub_param = self._prepare_parameter(sub_args, sub_param, "flag")
                            new_multi_param[new_sub_param.name] = new_sub_param
                        new_param.append(new_multi_param)
358
            else:
359
                new_param = self._prepare_parameter(args, param)
360
            self.__setattr__(param.name, new_param)
Frédéric Escudié's avatar
Frédéric Escudié committed
361

Jerome Mariette's avatar
Jerome Mariette committed
362
    def get_execution_graph(self):
363
        gr = digraph()
364
365
        # build a all_nodes table to store all nodes
        all_nodes = {}
366
        for ioparameter in list(self.__dict__.values()):
367
368
            if issubclass(ioparameter.__class__, InputFile):
                gr.add_node(ioparameter.name)
Jerome Mariette's avatar
Jerome Mariette committed
369
                gr.add_node_attribute(ioparameter.name, self.INPUTFILE_GRAPH_LABEL)
370
                gr.add_node_attribute(ioparameter.name, ioparameter.display_name)
371
                all_nodes[ioparameter.name] = None
372
373
            elif issubclass(ioparameter.__class__, InputFileList):
                gr.add_node(ioparameter.name)
Jerome Mariette's avatar
Jerome Mariette committed
374
375
376
377
378
379
                gr.add_node_attribute(ioparameter.name, self.INPUTFILES_GRAPH_LABEL)
                gr.add_node_attribute(ioparameter.name, ioparameter.display_name)
                all_nodes[ioparameter.name] = None
            elif issubclass(ioparameter.__class__, InputDirectory):
                gr.add_node(ioparameter.name)
                gr.add_node_attribute(ioparameter.name, self.INPUTDIRECTORY_GRAPH_LABEL)
380
                gr.add_node_attribute(ioparameter.name, ioparameter.display_name)
381
                all_nodes[ioparameter.name] = None
Philippe Bardou's avatar
Philippe Bardou committed
382
            elif issubclass(ioparameter.__class__, MultiParameter):
Jerome Mariette's avatar
Jerome Mariette committed
383
384
385
386
387
388
389
390
391
392
                for subparam in ioparameter.sub_parameters:
                    gr.add_node(subparam.name)
                    all_nodes[subparam.name] = None
                    if issubclass(subparam.__class__, InputFile):
                        gr.add_node_attribute(subparam.name, self.INPUTFILE_GRAPH_LABEL)
                    elif issubclass(subparam.__class__, InputFileList):
                        gr.add_node_attribute(subparam.name, self.INPUTFILES_GRAPH_LABEL)
                    elif issubclass(subparam.__class__, InputDirectory):
                        gr.add_node_attribute(subparam.name, self.INPUTDIRECTORY_GRAPH_LABEL)
                    gr.add_node_attribute(subparam.name, subparam.display_name)
Philippe Bardou's avatar
Philippe Bardou committed
393
394
395
396
397
398
399
400
401
            elif issubclass(ioparameter.__class__, MultiParameterList):
                for subparam in ioparameter.sub_parameters:
                    gr.add_node(subparam.name)
                    all_nodes[subparam.name] = None                        
                    if issubclass(subparam.__class__, InputDirectory):
                        gr.add_node_attribute(subparam.name, self.INPUTDIRECTORY_GRAPH_LABEL)
                    else:
                        gr.add_node_attribute(subparam.name, self.INPUTFILES_GRAPH_LABEL)
                    gr.add_node_attribute(subparam.name, subparam.display_name)
402
        for cpt in self.components:
403
404
            gr.add_node(cpt.get_nameid())
            gr.add_node_attribute(cpt.get_nameid(), self.COMPONENT_GRAPH_LABEL)
405
            gr.add_node_attribute(cpt.get_nameid(), cpt.get_nameid())
406
            all_nodes[cpt.get_nameid()] = None
Jerome Mariette's avatar
Jerome Mariette committed
407
        for cpt in self.components:
408
            for ioparameter in list(cpt.__dict__.values()):
Jerome Mariette's avatar
Jerome Mariette committed
409
                if issubclass( ioparameter.__class__, InputFile ) or issubclass( ioparameter.__class__, InputFileList) or issubclass( ioparameter.__class__, InputDirectory):
Frédéric Escudié's avatar
Frédéric Escudié committed
410
411
                    for parent in ioparameter.parent_linkTrace_nameid:
                        try: gr.add_edge((parent, ioparameter.linkTrace_nameid))
Frédéric Escudié's avatar
Frédéric Escudié committed
412
                        except: pass
413
414
415
416
                elif issubclass( ioparameter.__class__, InputObject) or issubclass( ioparameter.__class__, InputObjectList):
                    for parent in ioparameter.parent_linkTrace_nameid:
                        try: gr.add_edge((parent, ioparameter.linkTrace_nameid))
                        except: pass
417
418
        # check if all nodes are connected
        for edge in gr.edges():
419
            if edge[0] in all_nodes:
420
                del all_nodes[edge[0]]
421
            if edge[1] in all_nodes:
422
423
                del all_nodes[edge[1]]
        # then remove all unconnected nodes: to delete inputs not defined by the user
424
        for orphan_node in list(all_nodes.keys()):
425
            gr.del_node(orphan_node)
426
        return gr
Frédéric Escudié's avatar
Frédéric Escudié committed
427

Jerome Mariette's avatar
Jerome Mariette committed
428
429
    def delete(self):
        if self.get_status() in [self.STATUS_COMPLETED, self.STATUS_FAILED, self.STATUS_ABORTED]:
Jerome Mariette's avatar
Jerome Mariette committed
430
            utils.robust_rmtree(self.directory)
Jerome Mariette's avatar
Jerome Mariette committed
431

432
    @staticmethod
433
    def config_parser(arg_lines):
434
435
        for arg in arg_lines:
            yield arg
436
437
438
439
            
    @staticmethod
    def get_status_under_text_format(workflow, detailed=False, display_errors=False, html=False):
        if workflow.start_time: start_time = time.asctime(time.localtime(workflow.start_time))
440
        else: start_time = "-"
441
442
        if workflow.start_time and workflow.end_time: elapsed_time = str(workflow.end_time-workflow.start_time)
        elif workflow.start_time: elapsed_time = str(time.time()-workflow.start_time)
443
        else: elapsed_time = "-"
Jerome Mariette's avatar
Jerome Mariette committed
444
        elapsed_time = "-" if elapsed_time == "-" else str(datetime.timedelta(seconds=int(str(elapsed_time).split(".")[0])))
445
        if workflow.end_time: end_time = time.asctime(time.localtime(workflow.end_time))
446
447
448
        else: end_time = "-"
        if detailed:
            # Global
449
450
            title = "Workflow #" + utils.get_nb_string(workflow.id) + " (" + workflow.name + ") is " + \
                    workflow.get_status() + ", time elapsed: " + str(elapsed_time) + " (from " + start_time + \
451
452
                    " to " + end_time + ")"
            worflow_errors = ""
453
            error = workflow.get_errors()
454
            if error is not None:
Jerome Mariette's avatar
Jerome Mariette committed
455
456
                if html: worflow_errors = "Workflow Error :\n  <span style='color:#ff0000'>" + error["location"] + "\n    " + "\n    ".join(error["msg"]) + "</span>"
                else: worflow_errors = "Workflow Error :\n  \033[91m" + error["location"] + "\n    " + "\n    ".join(error["msg"]) + "\033[0m"
457
458
459
            # By components
            components_errors = ""
            status = "Components Status :\n"
460
            components_status = workflow.get_components_status()
461
            for i, component in enumerate(workflow.get_components_nameid()):
462
                status_info = components_status[component]
463
464
465
466
467
468
469
470
471
472
473
                try: perc_waiting = (status_info["waiting"]*100.0)/status_info["tasks"]
                except: perc_waiting = 0
                try: perc_running = (status_info["running"]*100.0)/status_info["tasks"]
                except: perc_running = 0
                try: perc_failed = (status_info["failed"]*100.0)/status_info["tasks"]
                except: perc_failed = 0
                try: perc_aborted = (status_info["aborted"]*100.0)/status_info["tasks"]
                except: perc_aborted = 0
                try: perc_completed = (status_info["completed"]*100.0)/status_info["tasks"]
                except: perc_completed = 0
                
Jerome Mariette's avatar
Jerome Mariette committed
474
475
476
                if status_info["running"] > 0: 
                    if html: running = "<span style='color:#3b3bff'>running:" + str(status_info["running"]) + "</span>"
                    else: running = "\033[94mrunning:" + str(status_info["running"]) + "\033[0m"
477
                else: running = "running:" + str(status_info["running"])
Jerome Mariette's avatar
Jerome Mariette committed
478
479
480
                if status_info["waiting"] > 0: 
                    if html: waiting = "<span style='color:#ffea00'>waiting:" + str(status_info["waiting"]) + "</span>"
                    else: waiting = "\033[93mwaiting:" + str(status_info["waiting"]) + "\033[0m"
481
                else: waiting = "waiting:" + str(status_info["waiting"])            
Jerome Mariette's avatar
Jerome Mariette committed
482
483
484
                if status_info["failed"] > 0: 
                    if html: failed = "<span style='color:#ff0000'>failed:" + str(status_info["failed"]) + "</span>"
                    else: failed = "\033[91mfailed:" + str(status_info["failed"]) + "\033[0m"
485
                else: failed = "failed:" + str(status_info["failed"])
Jerome Mariette's avatar
Jerome Mariette committed
486
487
488
                if status_info["aborted"] > 0: 
                    if html: aborted = "<span style='color:#ff01ba'>aborted:" + str(status_info["aborted"]) + "</span>"
                    else: aborted = "\033[95maborted:" + str(status_info["aborted"]) + "\033[0m"
489
                else: aborted = "aborted:" + str(status_info["aborted"])
Jerome Mariette's avatar
Jerome Mariette committed
490
491
492
                if status_info["completed"] == status_info["tasks"] and status_info["completed"] > 0: 
                    if html: completed = "<span style='color:#14ac00'>completed:" + str(status_info["completed"]) + "</span>"
                    else: completed = "\033[92mcompleted:" + str(status_info["completed"]) + "\033[0m"
493
494
495
                else: completed = "completed:" + str(status_info["completed"])
                
                if display_errors and len(status_info["failed_commands"]) > 0:
496
497
                    if components_errors == "":
                        components_errors = "Failed Commands :\n"
498
                    components_errors += "  - " + component + " :\n    " + "\n    ".join(status_info["failed_commands"]) + "\n"
499
500
501
                status += "  - " + component + ", time elapsed " + time_format(status_info["time"]) + \
                    " (total:" + str(status_info["tasks"]) + ", " + waiting + ", " + running + ", " + failed + \
                    ", " + aborted + ", " + completed + ")"
502
                if i<len(workflow.get_components_nameid())-1: status += "\n"
503
504
505
            # Format str
            pretty_str = title
            pretty_str += ("\n" + worflow_errors) if worflow_errors != "" else ""
506
507
            if len(workflow.get_components_nameid()) > 0:
                pretty_str += ("\n" + status) if status != "" else ""
508
                pretty_str += ("\n" + components_errors[:-1]) if components_errors != "" else ""
Jerome Mariette's avatar
Jerome Mariette committed
509
510
            if html: return pretty_str.replace("\n", "<br />")
            else: return pretty_str
511
        else:
512
513
            pretty_str = utils.get_nb_string(workflow.id) + "\t" + workflow.name + "\t"
            if workflow.get_status() == Workflow.STATUS_STARTED:
Jerome Mariette's avatar
Jerome Mariette committed
514
                pretty_str += "\033[94m"
515
            elif workflow.get_status() == Workflow.STATUS_COMPLETED:
Jerome Mariette's avatar
Jerome Mariette committed
516
                pretty_str += "\033[92m"
517
            elif workflow.get_status() == Workflow.STATUS_FAILED:
Jerome Mariette's avatar
Jerome Mariette committed
518
                pretty_str += "\033[91m"
519
            elif workflow.get_status() == Workflow.STATUS_ABORTED:
Jerome Mariette's avatar
Jerome Mariette committed
520
                pretty_str += "\033[91m"
521
            elif workflow.get_status() == Workflow.STATUS_RESETED:
Jerome Mariette's avatar
Jerome Mariette committed
522
                pretty_str += "\033[3m"
523
            pretty_str += workflow.get_status() + "\033[0m"
Jerome Mariette's avatar
Jerome Mariette committed
524
525
            pretty_str += "\t" + elapsed_time + "\t" + start_time + "\t" + end_time
            return pretty_str
526
    
527
    def get_errors(self):
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
        if os.path.isfile(self.stderr):
            error = {
                "title"     : "",
                "msg"       : list(),
                "traceback" : list()
            }
            line_idx = 0
            FH_stderr = open( self.stderr )
            lines = FH_stderr.readlines()
            while line_idx < len(lines):
                if lines[line_idx].strip().startswith("##"):
                    error["title"]     = lines[line_idx].rstrip()
                    error["msg"]       = list()
                    error["traceback"] = list()
                    # skip all lines before the traceback
                    while not lines[line_idx].startswith("Traceback"):
                        line_idx += 1
                    # skip : "Traceback (most recent call last):"
                    line_idx += 1    
                    while lines[line_idx] != lines[line_idx].lstrip():
                        error["traceback"].append({ 
                                                   "location" : lines[line_idx].strip(),
                                                   "line"     : lines[line_idx].strip()
                        })
                        line_idx += 2
                    # Error message
                    while line_idx < len(lines) and not lines[line_idx].strip().startswith("##"):
                        try:
                            error["msg"].append( lines[line_idx].strip().split(":", 1)[1][1:] )
                        except:
                            error["msg"].append( lines[line_idx].strip() )
                        line_idx += 1
                    line_idx -= 1
                line_idx += 1
            FH_stderr.close()
            last_stack_location = ""
            if len(error["traceback"]) > 0:
                last_stack_location = error["traceback"][-1]["location"].strip()
                return { "msg" : error["msg"], "location" : last_stack_location }
            else:
                return None
Frédéric Escudié's avatar
Frédéric Escudié committed
569
570
        else:
            return None
571

572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
    def print_workflow_outputs(self, logs):
        outputs = self.get_outputs_per_components()
        descriptions = self.get_description_per_components()
        idx = 1
        for output in outputs:
            name = output[:output.index(".")]
            component = str(idx) + ". "
            if output in descriptions and descriptions[output] is not None:
                component += descriptions[output]
            else:
                component += name
            component += ":\n"
            has_outputs = False
            for output_file in outputs[output]:
                if logs or (not output_file.endswith(".stdout") and not output_file.endswith(".stderr")):
                    has_outputs = True
                    component += "  - " + outputs[output][output_file] + "\n"
            if has_outputs:
                print(component)
            idx += 1

593
594
595
596
597
598
    def get_description_per_components(self):
        dag = self.get_execution_graph()
        descriptions = {}
        for current_components in self.components:
            descriptions[current_components.get_nameid()] = current_components.get_description()
        return descriptions
599
                
600
    def get_outputs_per_components(self):
601
        outputs_files = OrderedDict()
602
        for current_components in self.components:
Philippe Bardou's avatar
Philippe Bardou committed
603
            #status = self.get_component_status(current_components.get_nameid())
604
            outputs_files[current_components.get_nameid()] = current_components.get_output_files()
Philippe Bardou's avatar
Philippe Bardou committed
605
            #outputs_files["0"] = status["completed"]
606
607
        return outputs_files
    
Jerome Mariette's avatar
Jerome Mariette committed
608
609
    def __setstate__(self, state):
        self.__dict__ = state.copy()
610
        self.external_components = self._import_external_components()
Jerome Mariette's avatar
Jerome Mariette committed
611
612
613
614
615
616
617
        threading.Thread.__init__(self, name=self.name)
        
    def __getstate__(self):
        """
        Threading uses Lock Object, do not consider these objects when serializing a workflow
        """
        odict = self.__dict__.copy()
618
        del odict['_started']
619
620
621
622
        if '_tstate_lock' in odict: # python 3.4
            del odict['_tstate_lock']
        else: # python 3.2
            del odict['_block']
623
624
        del odict['_stderr']
        if 'external_components' in odict:
625
            del odict['external_components']
Jerome Mariette's avatar
Jerome Mariette committed
626
627
        return odict
    
628
629
630
631
632
633
634
635
    def set_to_address(self, to_address):
        self.__to_address = to_address

    def set_subject(self, subject):
        self.__subject = subject

    def set_message(self, message):
        self.__message = message
636
637
638
639

    def _get_cleaned_email_placeholders(self, text):
        """
        @summary: Returns the text after replacement of placeholders by the corresponding workflow values (method or attribute).
Frédéric Escudié's avatar
Frédéric Escudié committed
640
                  Placeholders must be an attribute or a method of the workflow between three sharps: ###attribute### or ###method()###.
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
                  You can add "|date" after the attribute or the method to convert a timestamp in human readable date.
                  Examples: ###id### is replaced by wf.id ; ###get_status()### is replaced by wf.get_status() ; ###start_time|date### is rplaced by wf.start_time in date format.
        @param text: [str] The text containing placeholders.
        @return: [str] The text with placeholders replaced by her real value.
        """
        new_text = text
        placeholders = re.findall("\#\#\#([^\#]+)\#\#\#", text)
        for placeholder in placeholders:
            try:
                placeholder_value = ""
                placeholder_key = placeholder
                is_date = False
                if placeholder.endswith("|date"):
                    placeholder_key = placeholder[:-5]
                    is_date = True
                # Get value
                if placeholder_key.endswith("()"):
                    placeholder_value = str(getattr(self, placeholder_key[:-2])())
                else:
                    placeholder_value = str(getattr(self, placeholder_key))
                # Apply date format
                if is_date:
                    jflow_date_format = self.jflow_config_reader.get_date_format()
                    placeholder_value = time.strftime(jflow_date_format + " %H:%M:%S", time.gmtime(float(placeholder_value)))
                new_text = new_text.replace("###" + placeholder + "###", placeholder_value)
            except:
                pass
        return new_text

670
671
    def _send_email(self):
        import smtplib
672
        from email.mime.text import MIMEText
673
        smtps, smtpp, froma, fromp, toa, subject, message = self.jflow_config_reader.get_email_options()
674
675
676
677
678
        
        if self.__to_address: toa = self.__to_address
        if self.__subject: subject = self.__subject
        if self.__message: message = self.__message
        
679
        if smtps and smtpp and froma and fromp:
680
681
            if not toa: toa = froma
            if validate_email(froma) and validate_email(toa):
682
683
684
685
686
                try:
                    # Open a plain text file for reading.  For this example, assume that
                    # the text file contains only ASCII characters.
                    # Create a text/plain message
                    if not message:
687
                        message = Workflow.get_status_under_text_format(self, True, True, True)
688
                    message = self._get_cleaned_email_placeholders( message )
Jerome Mariette's avatar
Jerome Mariette committed
689
                    msg = MIMEText(message, 'html')
690
691
                    me = froma
                    you = toa
692
693
694
                    if not subject:
                        subject = "JFlow - Workflow #" + str(self.id) + " is " + self.get_status()
                    subject = self._get_cleaned_email_placeholders( subject )
695
696
697
698
699
700
701
                    msg['Subject'] = subject
                    msg['From'] = me
                    msg['To'] = you
                    # Send the message via our own SMTP server, but don't include the
                    # envelope header.
                    s = smtplib.SMTP(smtps, smtpp)
                    s.ehlo()
Jerome Mariette's avatar
Jerome Mariette committed
702
703
704
705
706
                    # if the SMTP server does not provides TLS or identification
                    try:
                        s.starttls()
                        s.login(me, fromp)
                    except smtplib.SMTPHeloError:
Jerome Mariette's avatar
Jerome Mariette committed
707
                        self._log("The server didn't reply properly to the HELO greeting.", level="warning", traceback=traceback.format_exc(chain=False))
Jerome Mariette's avatar
Jerome Mariette committed
708
                    except smtplib.SMTPAuthenticationError:
Jerome Mariette's avatar
Jerome Mariette committed
709
                        self._log("The server didn't accept the username/password combination.", level="warning", traceback=traceback.format_exc(chain=False))
Jerome Mariette's avatar
Jerome Mariette committed
710
                    except smtplib.SMTPException:
Jerome Mariette's avatar
Jerome Mariette committed
711
                        self._log("No suitable authentication method was found, or the server does not support the STARTTLS extension.", level="warning", traceback=traceback.format_exc(chain=False))
Jerome Mariette's avatar
Jerome Mariette committed
712
                    except RuntimeError:
Jerome Mariette's avatar
Jerome Mariette committed
713
                        self._log("SSL/TLS support is not available to your Python interpreter.", level="warning", traceback=traceback.format_exc(chain=False))
Jerome Mariette's avatar
Jerome Mariette committed
714
                    except:
Jerome Mariette's avatar
Jerome Mariette committed
715
                        self._log("Unhandled error when sending mail.", level="warning", traceback=traceback.format_exc(chain=False))
Jerome Mariette's avatar
Jerome Mariette committed
716
717
718
                    finally:
                        s.sendmail(me, [you], msg.as_string())
                        s.close()
719
                except:
Jerome Mariette's avatar
Jerome Mariette committed
720
                    self._log("Impossible to connect to smtp server '" + smtps + "'", level="warning", traceback=traceback.format_exc(chain=False))
721
    
722
723
724
725
    def get_parameters_per_groups(self):
        name = self.get_name()
        description = self.get_description()
        parameters = self.get_parameters()
726
727
        pgparameters, parameters_order = {}, []
        for param in parameters:
Jerome Mariette's avatar
Jerome Mariette committed
728
            if param.group not in parameters_order: parameters_order.append(param.group)
729
            if param.group in pgparameters:
730
                pgparameters[param.group].append(param)
Jerome Mariette's avatar
Jerome Mariette committed
731
            else:
732
733
                pgparameters[param.group] = [param]
        return [pgparameters, parameters_order]
Jerome Mariette's avatar
Jerome Mariette committed
734
    
735
736
737
    def get_parameters(self):
        params = []
        for param in self.params_order:
738
            for attribute_value in list(self.__dict__.values()):
739
740
741
742
                if (issubclass(attribute_value.__class__, AbstractParameter)) and param == attribute_value.name:
                    params.append(attribute_value)
        return params
    
743
    def get_exec_path(self, software):
744
745
746
747
748
        exec_path = self.jflow_config_reader.get_exec(software)
        if exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../bin", software)):
            exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "../bin", software)
        elif exec_path is None and os.path.isfile(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "bin", software)):
            exec_path = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), "bin", software)
749
        elif exec_path is None and utils.which(software) == None:
750
            raise Exception("'" + software + "' path connot be retrieved either in the PATH and in the application.properties file!")
751
752
753
        elif exec_path is None and utils.which(software) != None: 
            exec_path = software
        elif exec_path != None and not os.path.isfile(exec_path):
754
            raise Exception("'" + exec_path + "' set for '" + software + "' does not exists, please provide a valid path!")
755
        return exec_path
756
    
Jerome Mariette's avatar
Jerome Mariette committed
757
758
    def add_component(self, component_name, args=[], kwargs={}, component_prefix="default"):
        # first build and check if this component is OK
759
        if component_name in self.internal_components or component_name in self.external_components:
760
            
761
            if component_name in self.internal_components:
Jerome Mariette's avatar
Jerome Mariette committed
762
                my_pckge = __import__(self.internal_components[component_name], globals(), locals(), [component_name])
763
764
765
766
767
768
769
770
771
772
773
774
775
                # build the object and define required field
                cmpt_object = getattr(my_pckge, component_name)()
                cmpt_object.output_directory = self.get_component_output_directory(component_name, component_prefix)
                cmpt_object.prefix = component_prefix
                if kwargs: cmpt_object.define_parameters(**kwargs)
                else: cmpt_object.define_parameters(*args)
            # external components
            else :
                cmpt_object = self.external_components[component_name]()
                cmpt_object.output_directory = self.get_component_output_directory(component_name, component_prefix)
                cmpt_object.prefix = component_prefix
                # can't use positional arguments with external components
                cmpt_object.define_parameters(**kwargs)
776
777
            
            # there is a dynamic component
778
            if cmpt_object.is_dynamic():
779
780
781
782
                self.dynamic_component_present = True
                # if already init, add the component to the list and check if weaver should be executed
                if self.component_nameids_is_init:
                    # add the component
783
                    self.components_to_exec.append(cmpt_object)
784
                    self.components.append(cmpt_object)
785
786
787
788
789
790
791
792
793
                    self._execute_weaver()
                    # update outputs
                    for output in cmpt_object.get_dynamic_outputs():
                        output.update()
                else:
                    if self._component_is_duplicated(cmpt_object):
                        raise ValueError("Component " + cmpt_object.__class__.__name__ + " with prefix " + 
                                            cmpt_object.prefix + " already exist in this pipeline!")
                    self.component_nameids[cmpt_object.get_nameid()] = None
794
                    self.components_to_exec = []
795
                    self.components = []
796
797
798
            else:
                if self.component_nameids_is_init:
                    # add the component
799
                    self.components_to_exec.append(cmpt_object)
800
                    self.components.append(cmpt_object)
801
                elif not self.component_nameids_is_init and not self.dynamic_component_present:
Jerome Mariette's avatar
Jerome Mariette committed
802
803
804
                    if self._component_is_duplicated(cmpt_object):
                        raise ValueError("Component " + cmpt_object.__class__.__name__ + " with prefix " + 
                                            cmpt_object.prefix + " already exist in this pipeline!")
805
                    self.components_to_exec.append(cmpt_object)
806
                    self.components.append(cmpt_object)
807
808
809
810
811
812
                else:
                    if self._component_is_duplicated(cmpt_object):
                        raise ValueError("Component " + cmpt_object.__class__.__name__ + " with prefix " + 
                                            cmpt_object.prefix + " already exist in this pipeline!")
                    self.component_nameids[cmpt_object.get_nameid()] = None

813
814
            return cmpt_object
        else:
815
            raise ImportError(component_name + " component cannot be loaded, available components are: {0}".format(
816
                                           ", ".join(list(self.internal_components.keys()) + list(self.external_components.keys()))))
Jerome Mariette's avatar
Jerome Mariette committed
817
818
819
820
821
822
823
824
    
    def pre_process(self):
        pass
    
    def process(self):
        """ 
        Run the workflow, has to be implemented by subclasses
        """
825
        raise NotImplementedError( "Workflow.process() must be implemented in " + self.__class__.__name__ )
826
827
828

    def get_name(self):
        """ 
829
        Return the workflow name.
830
        """
Jerome Mariette's avatar
Jerome Mariette committed
831
        return self.__class__.__name__.lower()
832
833
834
835
836
    
    def get_description(self):
        """ 
        Return the workflow description, has to be implemented by subclasses
        """
837
        raise NotImplementedError( "Workflow.get_description() must be implemented in " + self.__class__.__name__ )
838
    
839
    def define_parameters(self, function="process"):
840
841
842
        """ 
        Define the workflow parameters, has to be implemented by subclasses
        """
843
        raise NotImplementedError( "Workflow.define_parameters() must be implemented in " + self.__class__.__name__ )
Jerome Mariette's avatar
Jerome Mariette committed
844
845
846
847
848
849
850
851
852
853
854
855
    
    def post_process(self):
        pass
    
    def get_temporary_file(self, suffix=".txt"):
        tempfile_name = os.path.basename(tempfile.NamedTemporaryFile(suffix=suffix).name)
        return os.path.join(self.jflow_config_reader.get_tmp_directory(), tempfile_name)

    def get_component_output_directory(self, component_name, component_prefix):
        return os.path.join(self.directory, component_name + "_" + component_prefix)
    
    def get_components_nameid(self):
856
        return list(self.component_nameids.keys())
Jerome Mariette's avatar
Jerome Mariette committed
857
    
Jerome Mariette's avatar
Jerome Mariette committed
858
859
    def wf_execution_wrapper(self):
        getattr(self, self.function)()
Jerome Mariette's avatar
Jerome Mariette committed
860
    
Jerome Mariette's avatar
Jerome Mariette committed
861
862
863
864
    def run(self):
        """
        Only require for Threading
        """
865
866
        try:
            # if this is the first time the workflow run
867
            if self.__step == None:
868
                self.start_time = time.time()
Jerome Mariette's avatar
Jerome Mariette committed
869
                self.__step = 0
870
871
                self._status = self.STATUS_STARTED
                self._postprocess_status = self.STATUS_PENDING
872
                self.end_time = None
Jerome Mariette's avatar
Jerome Mariette committed
873
                # if some args are provided, let's fill the parameters
874
                self.set_parameters(self.args)