Commit 65c03cf6 authored by Ibouniyamine Nabihoudine's avatar Ibouniyamine Nabihoudine
Browse files

mobyle component parsing

parent 0e7e5a33
......@@ -289,7 +289,8 @@ class Component(object):
if p:
commandline += " %s " % p.cmd_format
else :
commandline += " %s %s " % (p.cmd_format, p.default)
if p.default :
commandline += " %s %s " % (p.cmd_format, p.default)
abstraction = self.get_abstraction()
......
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
class ExternalParser(object):
def parse_directory(self, component_directory):
components = []
for component_file in os.listdir(component_directory):
try:
components.append(self.parse(os.path.join(component_directory, component_file)))
except: pass
return components
def parse(self, component_file):
raise NotImplementedError
......@@ -84,8 +84,6 @@ class MINIWorkflow(object):
class Workflow(threading.Thread):
PROPERTIES_FILE_NAME = "workflow.properties"
MAKEFLOW_LOG_FILE_NAME = "Makeflow.makeflowlog"
DUMP_FILE_NAME = ".workflow.dump"
STDERR_FILE_NAME = "wf_stderr.txt"
......@@ -156,7 +154,9 @@ class Workflow(threading.Thread):
self.stderr = self._set_stderr()
self._serialize()
self.comp_pckg = self._import_components()
self.internal_components = self._import_internal_components()
self.external_components = self._import_external_components()
def add_input_directory(self, name, help, default=None, required=False, flag=None,
group="default", display_name=None, get_files_fn=None, add_to=None):
......@@ -569,6 +569,7 @@ class Workflow(threading.Thread):
def __setstate__(self, state):
self.__dict__ = state.copy()
self.external_components = self._import_external_components()
threading.Thread.__init__(self, name=self.name)
def __getstate__(self):
......@@ -579,6 +580,8 @@ class Workflow(threading.Thread):
del odict['_Thread__started']
del odict['_Thread__block']
del odict['_Thread__stderr']
if odict.has_key('external_components') :
del odict['external_components']
return odict
def set_to_address(self, to_address):
......@@ -664,14 +667,23 @@ class Workflow(threading.Thread):
def add_component(self, component_name, args=[], kwargs={}, component_prefix="default"):
# first build and check if this component is OK
if self.comp_pckg.has_key(component_name):
my_pckge = __import__(self.comp_pckg[component_name], globals(), locals(), [component_name], -1)
# build the object and define required field
cmpt_object = getattr(my_pckge, component_name)()
cmpt_object.output_directory = self.get_component_output_directory(component_name, component_prefix)
cmpt_object.prefix = component_prefix
if kwargs: cmpt_object.define_parameters(**kwargs)
else: cmpt_object.define_parameters(*args)
if self.internal_components.has_key(component_name) or self.external_components.has_key(component_name):
if self.internal_components.has_key(component_name) :
my_pckge = __import__(self.internal_components[component_name], globals(), locals(), [component_name], -1)
# build the object and define required field
cmpt_object = getattr(my_pckge, component_name)()
cmpt_object.output_directory = self.get_component_output_directory(component_name, component_prefix)
cmpt_object.prefix = component_prefix
if kwargs: cmpt_object.define_parameters(**kwargs)
else: cmpt_object.define_parameters(*args)
# external components
else :
cmpt_object = self.external_components[component_name]()
cmpt_object.output_directory = self.get_component_output_directory(component_name, component_prefix)
cmpt_object.prefix = component_prefix
# can't use positional arguments with external components
cmpt_object.define_parameters(**kwargs)
# there is a dynamic component
if cmpt_object.is_dynamic():
......@@ -711,7 +723,8 @@ class Workflow(threading.Thread):
return cmpt_object
else:
raise ImportError(component_name + " component cannot be loaded, available components are: {0}".format(", ".join(self.comp_pckg.keys())))
raise ImportError(component_name + " component cannot be loaded, available components are: {0}".format(
", ".join(self.internal_components.keys() + self.external_components.keys())))
def pre_process(self):
pass
......@@ -1000,14 +1013,7 @@ class Workflow(threading.Thread):
return True
return False
def _get_property_path(self):
"""
Return(type:string): the path to the workflow properties file, None if does not exist
"""
property_file = os.path.join(os.path.dirname(inspect.getfile(self.__class__)), self.PROPERTIES_FILE_NAME)
return property_file if os.path.isfile(property_file) else None
def _import_components(self):
def _import_internal_components(self):
pckge = {}
# then import pipeline packages
pipeline_dir = os.path.dirname(inspect.getfile(self.__class__))
......@@ -1031,7 +1037,36 @@ class Workflow(threading.Thread):
except Exception as e:
logging.getLogger("wf." + str(self.id)).debug("Component <{0}> cannot be loaded: {1}".format(modname, e))
return pckge
def _import_external_components(self):
pckge = {}
parsers = []
# get exparsers
extparsers_dir = os.path.join( os.path.dirname(os.path.dirname(inspect.getfile(self.__class__))), 'extparsers' )
for importer, modname, ispkg in pkgutil.iter_modules([extparsers_dir], "workflows.extparsers.") :
try :
m = __import__(modname)
for class_name, obj in inspect.getmembers(sys.modules[modname], inspect.isclass):
if issubclass(obj, jflow.extparser.ExternalParser) and obj.__name__ != jflow.extparser.ExternalParser.__name__:
parsers.append(obj())
except Exception as e:
logging.getLogger("wf." + str(self.id)).debug("Parser <{0}> cannot be loaded: {1}".format(modname, e))
for parser in parsers :
# import from pipeline components package ...
pipeline_components_dir = os.path.join( os.path.dirname(inspect.getfile(self.__class__)), "components" )
# ... and from shared components package
workflow_components_dir = os.path.join(os.path.dirname(os.path.dirname(inspect.getfile(self.__class__))), "components" )
try :
comps = parser.parse_directory(pipeline_components_dir) + parser.parse_directory(workflow_components_dir)
for c in comps :
pckge[c.__name__] = c
except :
pass
return pckge
def _import(self, module, symbols):
""" Import ``symbols`` from ``module`` into global namespace. """
# Import module
......
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import re
import copy
import fnmatch
import xml.etree.ElementTree as ET
from jflow.component import Component
from jflow.extparser import ExternalParser
import argparse
"""
For nested class serialization, I use a modified version of what is stated here :
http://stackoverflow.com/questions/1947904/how-can-i-pickle-a-nested-class-in-python
_MobyleComponentFactory will contain all created mobyle classes.
_NestedClassGetter is a callable that will be called by __reduce__ for serialization
and that will provide us with an instance of our nested class
_SerializableNestedComponent is the component class used to create our mobyle classes
"""
class _NestedClassGetter(object):
"""
used for nested object serialization.
When called with the containing class as the first argument,
and the name of the nested class as the second argument,
returns an instance of the nested class.
"""
def __call__(self, containing_class, class_name):
nested_class = getattr(containing_class, class_name)
# make an instance of a simple object, for which we can change the __class__ later on.
nested_instance = _NestedClassGetter()
# set the class of the instance, the __init__ will never be called on the class
# but the original state will be set later on by pickle.
nested_instance.__class__ = nested_class
return nested_instance
class _SerializableNestedComponent(Component):
"""
For serialization of nested Component
"""
def __init__(self):
Component.__init__(self)
def __set_state__(self, state):
self.__dict__ = state.copy()
def __reduce__(self):
state = self.__dict__.copy()
return (_NestedClassGetter(), (_MobyleComponentFactory, self.__class__.__name__, ), state, )
class _MobyleComponentFactory(object): pass
"""
callable object used for the serialization of custom types
"""
class _serializable_nested_function(object):
def __init__(self):
self._container = object.__class__
def __set_state__(self, state):
self.__dict__ = state.copy()
def __reduce__(self):
state = self.__dict__.copy()
return (_NestedClassGetter(), (self._container, self.__class__.__name__, ), state, )
class MobyleParser(ExternalParser):
def parse(self, component_file):
parameters = []
parameters_names = []
tree = ET.parse(component_file)
root = tree.getroot()
# get command string
command = root.findtext(".//head/command", None)
# retrieve all parameters from xml file
for parameterNode in root.findall('.//parameters/parameter[name]'):
attrs = parameterNode.attrib
param = self._parseParameter(parameterNode)
if param['name'] in parameters_names :
raise Exception('Duplicated parameter (%s)'%param['name'])
parameters.append(param)
parameters_names.append(param['name'])
def fn_get_command(self):
if command is None:
return self._command
return command
def fn_get_abstraction(self):
return
def fn_define_parameters(self, **kwargs):
# invalid user arguments
for userarg in kwargs.keys():
if userarg not in parameters_names :
raise Exception("Invalid argument '%s' for %s"%(userarg, self.get_nameid()))
# all parameters
for param in parameters:
pname = param['name']
param['value'] = param.get('vdef')
if kwargs.has_key(pname) :
param['value'] = kwargs[pname]
printval = param['value']
if param['type'] == 'file' or type(param['value']) == str :
printval = "'" + str(param['value']) + "'"
exec "%s = %s"%(pname, printval) in globals(), locals()
filenames_parameters = []
# resolve format
for param in parameters:
pname = param['name']
value = param.get('value')
vdef = param.get('vdef')
if param['format'] is not None :
param['format'] = eval( param['format'] )
param['format'] = re.sub(r""+str(value)+"", "", param['format'])
# TODO : do I need to remove the '=' ?????
param['format'] = re.sub(r"=$", "", param['format'])
if not param['format'].strip() :
param['value'] = None
elif param['flist'] :
vlist = param['flist'].keys()
# custom string type depending on vlist for keys and convert to flist
class flist_type(_serializable_nested_function):
def __call__(self, vvv):
if vvv in vlist :
return eval(param['flist'][vvv])
raise argparse.ArgumentTypeError("%s is an invalid argument, valid one are (%s)"%(vvv, ', '.join( map(str,vlist) )) )
param['type'] = flist_type
param['vlist'] = vlist
if param['value'] :
param['value'] = flist_type(param['value'])
if param['isfilename'] :
filenames_parameters.append(param)
# add parameters
for param in parameters:
pname = param['name']
# first thing, is our parameter a command ??
if param['iscommand'] :
self._command = param['value'] or param['format']
continue
arguments = {}
arguments['required'] = param.get('required', False)
if param['value'] :
arguments['default'] = param['value']
if param['vlist'] :
arguments['choices'] = param['vlist']
if param['format'] :
arguments['cmd_format'] = param['format']
if param['argpos'] :
arguments['argpos'] = param['argpos']
if param['type'] == 'file' :
# output file
if param['isoutput'] :
arguments.pop('default', None)
arguments.pop('choices', None)
arguments.pop('required', None)
# if precondition is not valid, do not add the outputfile parameter
if param.get('preconditions') :
value = param.get('value')
vdef = param.get('vdef')
evaluated = True
for precond in param['preconditions'] :
if not eval(precond) :
evaluated = False
break
# skip to the next parameter
if not evaluated : continue
# resolve the output filename
try :
nfilenames = []
for filename in param['filenames'] :
nfilenames.append(eval(filename))
param['filenames'] = nfilenames
except :
raise Exception("Syntax error with output file parameter '%s' - filenames tag is incorrect"%pname)
if len(param['filenames']) == 1 :
filename = param['filenames'][0]
# try to get the format from an existing filename parameter
for idx,filenameparam in enumerate(filenames_parameters) :
if str(filenameparam['value']) == str(filename) :
# take the format from the filenameparameter
arguments['cmd_format'] = filenameparam['format']
break
if re.match(r"[*?]+", filename) :
# to regexp
filename = fnmatch.translate(filename)
self.add_output_file_pattern( pname, param['help'], filename, **arguments )
else :
arguments['filename'] = filename
self.add_output_file( pname, param['help'], **arguments )
# multiple filenames
else :
for idx, filename in enumerate(param['filenames']) :
new_name = pname + "_" + str(idx)
if re.match(r"[*?]+", filename) :
filename = fnmatch.translate(filename)
self.add_output_file_pattern( new_name, param['help'], filename, **arguments )
else :
arguments['filename'] = filename
self.add_output_file( new_name, param['help'], **arguments )
else :
self.add_input_file(pname, param['help'], **arguments)
else :
if not param['isfilename'] :
arguments['type'] = param['type']
self.add_parameter(pname, param['help'], **arguments)
# controls
for param in parameters :
pname = param['name']
if param.get("controls") :
value = param.get('value')
vdef = param.get('vdef')
if value :
for ctrl in param.get("controls") :
if not eval(ctrl[0]) :
raise Exception('The parameter %s does not respect the following condition : %s'%(pname, ctrl[1]))
component_name = root.find(".//head/name").text.replace('-', ' ').replace('_', ' ')
component_name = "".join(component_name.title().split())
MobyleComponent = type(component_name, (_SerializableNestedComponent,),{
'get_command' : fn_get_command,
'get_abstraction' : fn_get_abstraction,
'define_parameters' : fn_define_parameters
})
# serialization hack ...
setattr(_MobyleComponentFactory, component_name, MobyleComponent)
return MobyleComponent
def _parseParameter(self, parameterNode):
attrs = parameterNode.attrib
# convert value to boolean, since values will be string
# they are first converted to int
def fn_bool(v):
return bool(int(v))
PYTHON_TYPES = {
'String' : { 'func' : str, 'str' : 'str' },
'Integer' : { 'func' : int, 'str' : 'int' },
'Float' : { 'func' : float, 'str' : 'float'},
'Boolean' : { 'func' : fn_bool, 'str': 'bool'}
}
SIMPLE_TYPES = [ 'Choice', 'MultipleChoice']
param = {
'name' : parameterNode.findtext('./name'),
'help' : parameterNode.findtext('./prompt'),
'iscommand' : False,
'format' : None,
'type' : None,
'isfilename' : False,
'required' : False,
'multiple' : False,
'isoutput' : False,
'isstdout' : False,
'vdef' : None,
'argpos' : -1,
'vlist' : None, # list of choices
'flist' : None, # list of format
'controls' : None,
'preconditions' : None,
'filenames' : None
}
if 'iscommand' in attrs and attrs['iscommand'] in ["1","true"] :
param['iscommand'] = True
if ('ismandatory' in attrs and attrs['ismandatory'] in ["1","true"]) or \
('ismaininput' in attrs and attrs['ismaininput'] in ["1","true"]) :
param['required'] = True
if 'isout' in attrs and attrs[ 'isout' ] in ["1","true"]:
param['isoutput'] = True
if 'isstdout' in attrs and attrs[ 'isstdout' ] in ["1","true"]:
param['isstdout'] = True
param['isoutput'] = True
klass = parameterNode.findtext("./type/datatype/class", '').strip()
superclass = parameterNode.findtext("./type/datatype/superclass", '').strip()
casting = None
if superclass :
klass = superclass
if klass in PYTHON_TYPES :
casting = PYTHON_TYPES[klass]['func']
param['type'] = PYTHON_TYPES[klass]['str']
elif klass == "Filename" :
casting = PYTHON_TYPES['String']['func']
param['type'] = PYTHON_TYPES['String']['str']
param['isfilename'] = True
elif klass not in SIMPLE_TYPES :
param['type'] = 'file'
# TODO : missing the case of choice in SIMPLE_TYPES ....
# format represent the flag of the command
formatCode = parameterNode.findtext('./format/code[@proglang="python"]', '').strip()
if formatCode :
param['format'] = formatCode
# position on command line
try :
param['argpos'] = int( parameterNode.findtext( './argpos' ) )
except :
param['argpos'] = -1
# default value
vdefs = []
for vdefNode in parameterNode.findall( './vdef/value' ):
vdefs.append( casting(vdefNode.text) if casting else vdefNode.text)
if vdefs:
if len(vdefs) == 1 :
param['vdef'] = casting(vdefs[0]) if casting else vdefs[0]
else :
param['vdef'] = map(casting, vdefs) if casting else vdefs
# vlist : choices
if parameterNode.find( './vlist' ) is not None :
vlist = []
for velem in parameterNode.findall( './vlist/velem' ) :
label = velem.findtext('./value' , '' ).strip()
val = velem.findtext('./value' , '' ).strip()
if velem.attrib.get('undef', '') in ['1', "true"] :
if param['vdef'] == label :
param['vdef'] = None
else :
vlist.append( casting(val) if casting else val)
if vlist :
param['vlist'] = vlist
# flist : list of format
if parameterNode.find( './flist' ) is not None :
flist = {}
for felem in parameterNode.findall( './flist/felem' ) :
label = felem.findtext('./value' , '' ).strip()
format = felem.findtext('./code[@proglang="python"]' , '' ).strip()
if felem.attrib.get('undef', '') in ['1', "true"] :
if param['vdef'] == label :
param['vdef'] = None
else :
flist[label] = format
if flist:
param['flist'] = flist
# controls
controls = []
for ctrl in parameterNode.findall( './ctrl' ):
message = ' '.join([ n.text for n in ctrl.findall( './message/text' )])
for codeNode in ctrl.findall( './code[@proglang="python"]'):
code = codeNode.text
controls.append( (code,message) )
if controls :
param['controls'] = controls
# preconditions
preconditions = []
for precondNode in parameterNode.findall( './precond/code[@proglang="python"]' ) :
precond = precondNode.text.strip()
if precond != '':
preconditions.append(precond)
if preconditions:
param['preconditions'] = preconditions