Commit 52bd4b92 authored by Jerome Mariette's avatar Jerome Mariette
Browse files

update iofeatures and add python object parameter

parent 883316b2
......@@ -33,6 +33,8 @@ from jflow.abstraction import MultiMap
from weaver.util import parse_string_list
from weaver.function import ShellFunction
from weaver.abstraction import Map
from weaver.function import PythonFunction
class Component(object):
......@@ -149,6 +151,63 @@ class Component(object):
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_input_object(self, name, help, default=None, required=False):
new_param = InputObject(name, help, default=default, required=required)
# store where the parameter is coming from
new_param.linkTrace_nameid = self.get_nameid()
if issubclass( default.__class__, list ):
for idx, val in enumerate(default):
if hasattr( val, "linkTrace_nameid" ):
if not val.linkTrace_nameid in new_param.parent_linkTrace_nameid:
new_param.parent_linkTrace_nameid.append(val.linkTrace_nameid)
new_param.default[idx].parent_linkTrace_nameid = [val.linkTrace_nameid]
new_param.default[idx].linkTrace_nameid = self.get_nameid()
elif hasattr( default, "linkTrace_nameid" ):
new_param.parent_linkTrace_nameid = [default.linkTrace_nameid]
new_param.default.parent_linkTrace_nameid = [default.linkTrace_nameid]
new_param.default.linkTrace_nameid = self.get_nameid()
# add it to the class itself
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_input_object_list(self, name, help, default=None, required=False):
if default == None: default = []
new_param = InputObjectList(name, help, default=default, required=required)
# store where the parameter is coming from
new_param.linkTrace_nameid = self.get_nameid()
for idx, val in enumerate(new_param.default):
if hasattr( val, "linkTrace_nameid" ):
if not val.linkTrace_nameid in new_param.parent_linkTrace_nameid:
new_param.parent_linkTrace_nameid.append(val.linkTrace_nameid)
new_param.default[idx].parent_linkTrace_nameid = [val.linkTrace_nameid]
new_param.default[idx].linkTrace_nameid = self.get_nameid()
# add it to the class itself
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_output_object(self, name, help, required=False):
new_param = OutputObject(name, help, required=required)
# store where the parameter is coming from
new_param.linkTrace_nameid = self.get_nameid()
new_param.default.linkTrace_nameid = self.get_nameid()
# add it to the class itself
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_output_object_list(self, name, help, nb_items=0, required=False):
new_param = OutputObjectList(name, help, nb_items=nb_items, required=required)
# store where the parameter is coming from
new_param.linkTrace_nameid = self.get_nameid()
for idx, val in enumerate(new_param.default):
new_param.default[idx].linkTrace_nameid = self.get_nameid()
# add it to the class itself
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_output_file(self, name, help, file_format="any", filename=None, group="default", display_name=None,
cmd_format="", argpos=-1):
filename = os.path.basename(filename)
......@@ -399,3 +458,105 @@ class Component(object):
def __eq__(self, other):
return self.__class__ == other.__class__ and self.prefix == other.prefix
def __getattribute__(self, attr):
# an IOobject is a specific object defined by the presence of the dump_path attribute
if hasattr(object.__getattribute__(self, attr), "default"):
if isinstance (object.__getattribute__(self, attr).default, OObject) and os.path.exists(object.__getattribute__(self, attr).default.dump_path):
object.__getattribute__(self, attr).default=object.__getattribute__(self, attr).default.load()
if hasattr(object.__getattribute__(self, attr).default, "is_ioobject"):
return object.__getattribute__(self, attr).default
elif isinstance(object.__getattribute__(self, attr).default, list) and len(object.__getattribute__(self, attr).default)>0:
if isinstance(object.__getattribute__(self, attr).default[0], OObject):
for i, val in enumerate (object.__getattribute__(self, attr).default):
if os.path.exists(val.dump_path):
object.__getattribute__(self, attr).default[i]=val.load()
if hasattr(object.__getattribute__(self, attr).default[0], "is_ioobject"):
return object.__getattribute__(self, attr).default
return object.__getattribute__(self, attr)
def __generate_iolist (self, ioparameter, map):
new_ios = []
includes = []
if map :
if len (ioparameter) >0 :
if isinstance(ioparameter[0], list):
for cin in ioparameter:
if hasattr(cin[0], "is_ioobject"):
new_ios.append([i.dump_path for i in cin])
else:
new_ios.append(cin)
else:
for cin in ioparameter:
if hasattr(cin, "is_ioobject"):
new_ios.append(cin.dump_path)
else:
new_ios.append(cin)
else :
new_ios = []
if hasattr(ioparameter, "is_ioobject"):
includes.extend(ioparameter.includes)
new_ios.append(ioparameter.dump_path)
elif isinstance(ioparameter, list):
for cin in ioparameter:
if hasattr(cin, "is_ioobject"):
includes.extend(cin.includes)
new_ios.append(cin.dump_path)
else:
new_ios.append(cin)
else:
new_ios = ioparameter
return new_ios,includes
def add_python_execution(self, function, inputs=[], outputs=[], arguments=[], includes=[],
add_path=None, collect=False, local=False, map=False, cmd_format=""):
if map:
if arguments != [] :
logging.getLogger("jflow").exception("add_python_execution: '" + function.__name__ + "' arguments parameter not allowed with map!")
raise Exception("add_python_execution: '" + function.__name__ + "' arguments parameter not allowed with map!" )
if not issubclass(inputs.__class__, list) or not issubclass(outputs.__class__, list):
logging.getLogger("jflow").exception("add_python_execution: '" + function.__name__ + "' map requires a list as inputs and output!")
raise Exception("add_python_execution: '" + function.__name__ + "' map requires a list as inputs and output!")
#Command format to build
if cmd_format == "" :
cmd_format = "{EXE} "
if len(arguments)>0:
cmd_format += " {ARG}"
if (isinstance(inputs, list) and len(inputs)>0) or (inputs is not None and inputs != []):
cmd_format += " {IN}"
if (isinstance(outputs, list) and len(outputs)>0) or (outputs is not None and outputs != []):
cmd_format += " {OUT}"
py_function = PythonFunction(function, add_path=add_path, cmd_format=cmd_format)
new_inputs,includes_in = self.__generate_iolist(inputs, map)
new_outputs,includes_out = self.__generate_iolist(outputs, map)
if not isinstance(includes, list):
includes=[includes]
if map:
MultiMap(py_function, inputs=new_inputs, outputs=new_outputs, includes=includes+includes_in, collect=collect, local=local)
else:
py_function(inputs=new_inputs, outputs=new_outputs, arguments=arguments, includes=includes+includes_in)
def add_shell_execution(self, source, inputs=[], outputs=[], arguments=[], includes=[],
cmd_format=None, map=False, shell=None, collect=False, local=False):
shell_function = ShellFunction( source, shell=shell, cmd_format=cmd_format )
# if abstraction is map or multimap
if map :
# if input and output are list or filelist
if issubclass(inputs.__class__, list) and issubclass(outputs.__class__, list) :
# arguments cannot be set with
if arguments != [] :
logging.getLogger("jflow").exception("add_shell_execution: '" + source + "' arguments parameter not allowed with map")
raise Exception("add_shell_execution: '" + source + "' arguments parameter not allowed with map" )
MultiMap(shell_function,inputs=inputs, outputs=outputs, includes=includes, collect=collect, local=local)
else :
logging.getLogger("jflow").exception("add_shell_execution: '" + source + "' map requires a list as inputs and output")
raise Exception("add_shell_execution: '" + source + "' map requires a list as inputs and output")
else :
shell_function( inputs=inputs, outputs=outputs, arguments=arguments, includes=includes )
......@@ -41,8 +41,7 @@ class BEDReader(_AbstractFeatureReader):
def _wholefile_iter(self):
wholefile = self.fp.read()
assert '\r' not in wholefile, "Sorry, currently don't know how to deal with files that contain \\r linebreaks"
assert len(wholefile) == 0 , "Empty BED file"
assert len(wholefile) != 0 , "Empty BED file"
for line in wholefile.split('\n') :
if line.startswith('#') :
......
......@@ -52,8 +52,7 @@ class MpileupReader(_AbstractFeatureReader):
def _wholefile_iter(self):
wholefile = self.fp.read()
assert '\r' not in wholefile, "Sorry, currently don't know how to deal with files that contain \\r linebreaks"
assert len(wholefile) == 0 , "Empty VCF file"
assert len(wholefile) != 0 , "Empty VCF file"
for line in wholefile.split('\n') :
row = line.rstrip().split('\t')
......
......@@ -69,7 +69,6 @@ class OboReader(_AbstractFeatureReader):
David Benjamin Gordon).
"""
wholefile = self.fp.read()
assert '\r' not in wholefile, "Sorry, currently don't know how to deal with files that contain \\r linebreaks"
parts = wholefile.split('\n[Term]')
id = None
......
......@@ -61,7 +61,7 @@ class VCFReader(_AbstractFeatureReader):
if line.startswith('#') :
if line.startswith('#CHROM') :
row = line.rstrip().split('\t')
if len(row) <= 9 :
if len(row) < 8 :
raise FormatError( 'Invalid number of columns in your vcf header file {0}'.format(len(row)) )
for i in range(9, len(row)) :
self.samples_name.append( ( row[i] , os.path.splitext(os.path.basename(row[i]))[0] ) )
......@@ -69,8 +69,8 @@ class VCFReader(_AbstractFeatureReader):
else :
raise FormatError( 'The vcf file {0}must start with header lines (#) !!!'.format(self.fp.name) )
self.fp.seek(0,0)
if len(self.samples_name) < 0 :
raise FormatError( "Invalid VCF file {0}. Could not retrieve the sample names headers".format(self.fp.name) )
#if len(self.samples_name) < 0 :
# raise FormatError( "Invalid VCF file {0}. Could not retrieve the sample names headers".format(self.fp.name) )
def _process_line(self,line):
row = line.rstrip().split('\t')
......@@ -83,7 +83,7 @@ class VCFReader(_AbstractFeatureReader):
'qual' : autocast(row[5]),
'filter' : row[6],
'info' : {},
'format' : row[8].split(';'),
'format' : [],
'samples' : [],
'is_indel': False
})
......@@ -93,16 +93,24 @@ class VCFReader(_AbstractFeatureReader):
regexp_none=re.compile("\.(\/\.)*")
#if row[7] != '.' :
# variation.addattr( 'info', { p.split('=')[0] : autocast(p.split('=')[1]) for p in row[7].split(';') })
format = row[8].split(':')
for lib_infos in range (9,len(row)) :
if not regexp_none.match(row[lib_infos]):
sformat = row[lib_infos].split(':')
variation.samples.append( Entry(**{ autocast(format[i]) : autocast(sformat[i]) if sformat[i] != '.' else None for i in range(0,len(format)) } ) )
else :
variation.samples.append( Entry(**{ autocast(format[i]) : None for i in range(0,len(format)) }) )
if len(row) == 8 and row[7] != '.' :
info={}
for p in row[7].split(';') :
tab= p.split('=')
if len(tab)>1 :
info[tab[0]] = autocast(tab[1])
else :
info[tab[0]] = True
variation.addattr( 'info', info)
if len(row) > 8 :
format = row[8].split(';')
variation.format = format
for lib_infos in range (9,len(row)) :
if not regexp_none.match(row[lib_infos]):
sformat = row[lib_infos].split(':')
variation.samples.append( Entry(**{ autocast(format[i]) : autocast(sformat[i]) if sformat[i] != '.' else None for i in range(0,len(format)) } ) )
else :
variation.samples.append( Entry(**{ autocast(format[i]) : None for i in range(0,len(format)) }) )
return variation
def _streaming_iter(self):
......@@ -113,8 +121,7 @@ class VCFReader(_AbstractFeatureReader):
def _wholefile_iter(self):
wholefile = self.fp.read()
assert '\r' not in wholefile, "Sorry, currently don't know how to deal with files that contain \\r linebreaks"
assert len(wholefile) == 0 , "Empty VCF file"
assert len(wholefile) != 0 , "Empty VCF file"
for line in wholefile.split('\n') :
if line.startswith('#') :
continue
......
......@@ -49,8 +49,7 @@ class WEGOReader(_AbstractFeatureReader):
def _wholefile_iter(self):
wholefile = self.fp.read()
assert '\r' not in wholefile, "Sorry, currently don't know how to deal with files that contain \\r linebreaks"
assert len(wholefile) == 0 , "Empty WEGO file"
assert len(wholefile) != 0 , "Empty WEGO file"
if not wholefile.startswith('!WGOP') :
raise FormatError('WEGO header not found (!WEGOP_), invalid WEGO file ')
......
......@@ -246,8 +246,7 @@ class FastaReader(object):
David Benjamin Gordon).
"""
wholefile = self.fp.read()
assert '\r' not in wholefile, "Sorry, currently don't know how to deal with files that contain \\r linebreaks"
assert len(wholefile) == 0 or wholefile[0] == '>', "FASTA file must start with '>'"
assert len(wholefile) != 0 and wholefile[0] == '>', "FASTA file must start with '>'"
parts = wholefile.split('\n>')
# first part has '>' in front
parts[0] = parts[0][1:]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment