Commit aa9136de authored by Penom Nom's avatar Penom Nom

Update jflow graph to pygraph and update radseq pipeline.

parent 449bc8d8
......@@ -237,13 +237,17 @@ class JFlowServer (object):
parts = key.split(JFlowServer.MULTIPLE_TYPE_SPLITER)
# if this is a classic Parameter
if len(parts) == 1:
kwargs_modified[key] = kwargs[key]
values = kwargs[key].encode('utf8').split(",")
if len(values) > 1:
kwargs_modified[key] = values
else:
kwargs_modified[key] = values[0]
# if this is a MultiParameter
elif len(parts) == 2:
if kwargs_modified.has_key(parts[0]):
kwargs_modified[parts[0]].append((parts[1], kwargs[key]))
kwargs_modified[parts[0]].append((parts[1], kwargs[key].encode('utf8')))
else:
kwargs_modified[parts[0]] = [(parts[1], kwargs[key])]
kwargs_modified[parts[0]] = [(parts[1], kwargs[key].encode('utf8'))]
# if this is a MultiParameterList
# TODO: du cote interface faire qq chose du genre: key.sub_key.1 ... donc si len == 3
# l'objectif etant d'avoir une structure de type: [[(sub_key1: val), (sub_key2: val)], [(sub_key1: val2), (sub_key2: val2)]]
......@@ -335,6 +339,7 @@ class JFlowServer (object):
elif kwargs["display"] == "graph":
g = workflow.get_execution_graph()
status = self.jsonify_workflow_status(workflow)
status["nodes"] = g.nodes()
status["edges"] = g.edges()
return status
else:
......@@ -376,10 +381,15 @@ class JFlowServer (object):
try:
value_key = None
for key in kwargs.keys():
if key != "type" and key != "callback" and key != "_":
if key != "type" and key != "callback" and key != "_" and key != "action":
value_key = key
break
create_test_function(kwargs["type"])(kwargs[value_key])
# if it's an append parameter, let's check each value
if kwargs["action"] == "append":
for cval in kwargs[value_key].split("\n"):
create_test_function(kwargs["type"])(cval.encode('utf8'))
else:
create_test_function(kwargs["type"])(kwargs[value_key].encode('utf8'))
return True
except Exception, e:
return str(e)
......
......@@ -132,7 +132,9 @@ if __name__ == '__main__':
wfmanager.reset_workflow_component(args["workflow_id"], args["component_name"])
elif args["cmd_object"] == "execution_graph":
workflow = wfmanager.get_workflow(args["workflow_id"])
print workflow.get_execution_graph()
gr = workflow.get_execution_graph()
print "nodes: ", gr.nodes()
print "edges: ", gr.edges()
elif args["cmd_object"] == "status":
if args["workflow_id"]:
workflow = wfmanager.get_workflow(args["workflow_id"])
......
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
class Graph(object):
def __init__(self, graph_dict={}):
""" initializes a graph object """
self.__graph_dict = graph_dict
def vertices(self):
""" returns the vertices of a graph """
return list(self.__graph_dict.keys())
def edges(self):
""" returns the edges of a graph """
edges = []
for edge in self.__generate_edges():
edges.append(list(edge))
return edges
def add_vertex(self, vertex):
""" If the vertex "vertex" is not in
self.__graph_dict, a key "vertex" with an empty
list as a value is added to the dictionary.
Otherwise nothing has to be done.
"""
if vertex not in self.__graph_dict:
self.__graph_dict[vertex] = []
def add_edge(self, edge):
""" assumes that edge is of type set, tuple or list;
between two vertices can be multiple edges!
"""
edge = set(edge)
(vertex1, vertex2) = tuple(edge)
if vertex1 in self.__graph_dict:
self.__graph_dict[vertex1].append(vertex2)
else:
self.__graph_dict[vertex1] = [vertex2]
def __generate_edges(self):
""" A static method generating the edges of the
graph "graph". Edges are represented as sets
with one (a loop back to the vertex) or two
vertices
"""
edges = []
for vertex in self.__graph_dict:
for neighbour in self.__graph_dict[vertex]:
if {neighbour, vertex} not in edges:
edges.append({vertex, neighbour})
return edges
def __str__(self):
res = "vertices: "
for k in self.__graph_dict:
res += str(k) + " "
res += "\nedges: "
for edge in self.__generate_edges():
res += str(edge) + " "
return res
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os, re
import abc
from _pyio import __metaclass__
class Formats(object):
ANY = "any"
BAM = "bam"
FASTQ = "fastq"
FASTA = "fasta"
SFF = "sff"
QUAL = "qual"
FLOW = "flow"
HTML = "html"
#
# Inputs classes
#
class InputFile(str):
def __new__(self, input, format=Formats.ANY):
self.format = format
if input.__class__.__name__ == "str":
return str.__new__(self, input)
elif input.__class__.__name__ == "OutputFile" and (input.format == self.format or self.format == Formats.ANY):
return str.__new__(self, input)
elif input.__class__.__name__ == "OutputFile" and not (input.format == self.format or self.format == Formats.ANY):
raise IOError(input.format+" format is incompatible with allowed format "+self.format)
else:
raise IOError(input.__class__.__name__+" cannot be used as InputFile")
def check(self):
return "TODO: check input file"
class InputFileList(list):
def __init__(self, inputs, format=Formats.ANY):
self.format = format
if inputs.__class__.__name__ == "str":
return list.__init__(self, [inputs])
elif inputs.__class__.__name__ == "OutputFile" and (inputs.format == self.format or self.format == Formats.ANY):
return list.__init__(self, [inputs])
elif inputs.__class__.__name__ == "list":
return list.__init__(self, inputs)
elif inputs.__class__.__name__ == "OutputFileList" and (inputs.format == self.format or self.format == Formats.ANY):
return list.__init__(self, inputs)
elif inputs.__class__.__name__ == "OutputFileEndsWith" and (inputs.format == self.format or self.format == Formats.ANY):
return list.__init__(self, inputs)
elif inputs.__class__.__name__ == "OutputFilePattern" and (inputs.format == self.format or self.format == Formats.ANY):
return list.__init__(self, inputs)
elif (inputs.__class__.__name__ == "OutputFile" or inputs.__class__.__name__ == "OutputFileList") and \
not (inputs.format == self.format or self.format == Formats.ANY):
raise IOError(inputs.format+" format is incompatible with allowed format "+self.format)
else:
raise IOError(inputs.__class__.__name__+" cannot be used as InputFileList")
def check(self):
return "TODO: check input file list"
class InputDirectory(str):
def __new__(self, input):
if input.__class__.__name__ == "str":
return str.__new__(self, input)
elif input.__class__.__name__ == "OutputDirectory":
return str.__new__(self, input)
else:
raise IOError(input.__class__.__name__+" cannot be used as InputDirectory")
def check(self):
return "TODO: check input directory"
class InputDirectoryList(list):
def __init__(self, inputs):
if inputs.__class__.__name__ == "str":
return list.__init__(self, [inputs])
elif inputs.__class__.__name__ == "OutputDirectory":
return list.__init__(self, [inputs])
elif inputs.__class__.__name__ == "list":
return list.__init__(self, inputs)
elif inputs.__class__.__name__ == "OutputDirectoryList":
return list.__init__(self, inputs)
else:
raise IOError(inputs.__class__.__name__+" cannot be used as InputDirectoryList")
#
# Outputs classes
#
class DynamicOutput(list):
"""
@warning : with this class of output, the component become dynamic.
"""
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def update(self):
"""
This method is used at the end of component execution to update output list.
"""
raise NotImplementedError
class OutputFile(str):
def __new__(self, output, format=Formats.ANY):
self.format = format
if output.__class__.__name__ == "str":
return str.__new__(self, output)
else:
raise IOError(output.__class__.__name__+" cannot be used as OutputFile")
def check(self):
return "TODO: check output file"
class OutputFileList(list):
def __init__(self, outputs, format=Formats.ANY):
self.format = format
if outputs.__class__.__name__ == "str":
return list.__init__(self, [outputs])
elif outputs.__class__.__name__ == "list":
return list.__init__(self, outputs)
else:
raise IOError(outputs.__class__.__name__+" cannot be used as OutputFileList")
def check(self):
return "TODO: check output file list"
class OutputFilePattern(DynamicOutput):
def __init__(self, output_directory, pattern, format=Formats.ANY, include=True):
"""
@warning : with this class of output, the component become dynamic.
@param output_directory : path to the directory where outputs will be created.
@param pattern : the pattern of (a part) the file names.
@param format : the files format.
@param include : if true, the files with the pattern in file name are added into output files.
If false, the files with the pattern in file name are added into output files.
"""
self.output_directory = output_directory
self.pattern = pattern
self.format = format
self.include = include
return list.__init__(self, [])
def update(self):
output_files = list()
for file in os.listdir( self.output_directory ):
if self.include and re.search( self.pattern, file ) is not None:
output_files.append( os.path.join(self.output_directory, file) )
elif not self.include and re.search( self.pattern, file ) is None:
output_files.append( os.path.join(self.output_directory, file) )
return list.__init__(self, output_files)
def check(self):
return "TODO: check output file list"
class OutputFileEndsWith(DynamicOutput):
def __init__(self, output_directory, end_str, format=Formats.ANY, include=True):
"""
@warning : with this class of output, the component become dynamic.
@param output_directory : path to the directory where outputs will be created.
@param end_str : the end of the files names.
@param format : the files format.
@param include : if true, the files with name terminated by end_str are added into output files.
If false, the files with name not terminated by end_str are added into output files.
"""
self.output_directory = output_directory
self.end_str = end_str
self.format = format
self.include = include
return list.__init__(self, [])
def update(self):
output_files = list()
for file in os.listdir( self.output_directory ):
if file.endswith( self.end_str ) and self.include :
output_files.append( os.path.join(self.output_directory, file) )
elif not file.endswith( self.end_str ) and not self.include:
output_files.append( os.path.join(self.output_directory, file) )
return list.__init__(self, output_files)
def check(self):
return "TODO: check output file list"
class OutputFileList(list):
def __init__(self, outputs, format=Formats.ANY):
self.format = format
if outputs.__class__.__name__ == "str":
return list.__init__(self, [outputs])
elif outputs.__class__.__name__ == "list":
return list.__init__(self, outputs)
else:
raise IOError(outputs.__class__.__name__+" cannot be used as OutputFileList")
def check(self):
return "TODO: check output file list"
class OutputDirectory(str):
def __new__(self, output):
if output.__class__.__name__ == "str":
return str.__new__(self, output)
else:
raise IOError(output.__class__.__name__+" cannot be used as OutputDirectory")
def check(self):
return "TODO: check output directory"
class OutputDirectoryList(list):
def __init__(self, outputs):
if outputs.__class__.__name__ == "str":
return list.__init__(self, [outputs])
elif outputs.__class__.__name__ == "list":
return list.__init__(self, outputs)
else:
raise IOError(outputs.__class__.__name__+" cannot be used as OutputDirectoryList")
def check(self):
return "TODO: check output directory list"
\ No newline at end of file
......@@ -22,6 +22,7 @@ import datetime
import logging
import argparse
import os
import fnmatch
import tempfile
from argparse import _ensure_value
import urllib2
......@@ -75,6 +76,20 @@ def inputfile(file):
else:
return urlfile(file)
def regexpfiles(files_pattern):
try:
if ':' in files_pattern:
folder, pattern = files_pattern.rsplit(':')
else:
folder, pattern = os.path.split(files_pattern)
except:
raise argparse.ArgumentTypeError("Pattern '" + file + "' is invalid!")
if not os.path.exists(folder):
raise argparse.ArgumentTypeError("The folder '" + folder + "' doesn't exist!")
if not os.access(folder, os.R_OK):
raise argparse.ArgumentTypeError("You do not have permission to read '" + folder + "'!")
return files_pattern
def create_test_function(itype):
try: itype = itype.encode('ascii','ignore')
except: pass
......@@ -810,7 +825,7 @@ class InputFileList(ParameterList, AbstractInputFile):
type2test = type.__name__
else: type2test = type
if type2test not in INPUTFILE_TYPES:
if type2test not in INPUTFILE_TYPES + ["regexpfiles"]:
raise ValueError("InputFile.__new__: wrong type provided: '"+type2test+"', this should be choosen between '"
+ "', '".join(INPUTFILE_TYPES)+"'")
......@@ -843,6 +858,18 @@ class InputFileList(ParameterList, AbstractInputFile):
path2test = _copy.deepcopy(inputs)
if not path2test.__class__.__name__ == "list":
path2test = [path2test]
# handle regexp files
new_vals = list()
if self.type.__name__ == "regexpfiles":
for files_pattern in path2test:
if ':' in files_pattern:
folder, pattern = files_pattern.rsplit(':')
else:
folder, pattern = os.path.split(files_pattern)
for item in os.listdir(folder):
if os.path.isfile(os.path.join(folder, item)) and fnmatch.fnmatch(item, pattern):
new_vals.append( os.path.abspath(os.path.join(folder, item)) )
path2test = new_vals
new_vals, done = [], []
# handle url inputs
for path in path2test:
......
......@@ -77,10 +77,15 @@ def get_nb_octet(size):
Return the number of bytes: value has to be formated like this: 5Mb, 20Gb ...
"""
octets_link = ["bytes", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb"]
if size.endswith("bytes"):
unit = "bytes"
isize = size[:len(size)-5]
else:
unit = size[len(size)-2:len(size)]
isize = size[:len(size)-2]
pow_val = int(octets_link.index(unit)) * 10
val = pow(2, pow_val)
nb_octet = int(size[:len(size)-2]) * val
nb_octet = float(isize) * val
return nb_octet
def get_octet_string_representation(size):
......
......@@ -35,7 +35,7 @@ from inspect import getcallargs
import jflow
import jflow.utils as utils
from jflow.utils import validate_email
from jflow.graph import Graph
from pygraph.classes.digraph import digraph
from jflow.workflows_manager import WorkflowsManager, JFlowConfigReader
from jflow.utils import get_octet_string_representation, get_nb_octet
from jflow.parameter import *
......@@ -260,12 +260,11 @@ class Workflow(threading.Thread):
def _set_parameters(self, args):
parameters = self.get_parameters()
for param in parameters:
try: args[param.name] = args[param.name].encode('ascii','ignore')
except: pass
new_param = None
if param.__class__ == MultiParameter:
new_param = MultiParameter(param.name, param.help, required=param.required, flag=param.flag, group=param.group, display_name=param.display_name)
new_param.sub_parameters = param.sub_parameters
if args[param.name]:
sub_args = {}
for sarg in args[param.name]:
sub_args[sarg[0]] = sarg[1]
......@@ -289,15 +288,16 @@ class Workflow(threading.Thread):
self.__setattr__(param.name, new_param)
def get_execution_graph(self):
graph = Graph(graph_dict={})
gr = digraph()
for cpt in self.components:
gr.add_nodes([cpt.get_nameid()])
for cpt in self.components:
graph.add_vertex(cpt.get_nameid())
for ioparameter in cpt.__dict__.values():
if ( issubclass( ioparameter.__class__, InputFile ) or issubclass( ioparameter.__class__, OutputFile)
or issubclass( ioparameter.__class__, InputFileList) or issubclass( ioparameter.__class__, OutputFileList) ):
if ( issubclass( ioparameter.__class__, InputFile ) or issubclass( ioparameter.__class__, InputFileList)):
for pc_nameid in ioparameter.parent_component_nameid:
graph.add_edge({ioparameter.component_nameid, pc_nameid})
return graph
try: gr.add_edge((pc_nameid, ioparameter.component_nameid))
except: pass
return gr
def set_stderr(self):
if hasattr(self, "stderr") and self.stderr is not None:
......
......@@ -31,7 +31,7 @@ from ng6.run import Run
from ng6.sample import Sample
from ng6.utils import Utils
from workflows.types import date, adminlogin, casavadir, existingproject
from workflows.types import date, adminlogin, casavadir, existingproject, samplemetadata
class BasicNG6Workflow (Workflow):
......@@ -129,6 +129,7 @@ class NG6Workflow (BasicNG6Workflow):
self.add_parameter("type", "Read orientation and type", choices = Sample.AVAILABLE_TYPES, add_to = "input_sample")
self.add_parameter("insert_size", "Insert size for paired end reads", type = int, add_to = "input_sample")
self.add_parameter("species", "Species related to this sample", add_to = "input_sample")
self.add_parameter_list("metadata", "Add metadata to the sample", type=samplemetadata ,add_to = "input_sample")
self.add_input_file_list("read1", "Read 1 data file path", required = True, add_to = "input_sample")
self.add_input_file_list("read2", "Read 2 data file path", add_to = "input_sample")
......@@ -137,6 +138,9 @@ class NG6Workflow (BasicNG6Workflow):
sp_object = Sample( sd['sample_id'], sd['read1'], sd['read2'], name = sd['sample_name'], description = sd['sample_description'], type = sd['type'],
insert_size = sd['insert_size'], species = sd['species'] )
for metadata in sd['metadata'] :
k, v = metadata.split(':', 2)
sp_object.add_metadata(k, v)
self.reads1.extend(sp_object.reads1)
self.reads2.extend(sp_object.reads2)
self.samples.append(sp_object)
......
......@@ -95,13 +95,13 @@ class Sample(object):
def __str__(self, *args, **kwargs):
return "sid={sid}; name={name}; desc={desc}; r1={r1}; r2={r2}; insize={insize}; nbs={nbs}; fsize={fsize}; spec={spec}; t={t}".format(
sid = self.sample_id,
name = self.name,
desc = self.description,
r1 = self.reads1,
r2 = self.reads2,
insize = self.insert_size,
nbs = self.nb_sequences,
fsize = self.full_size,
spec = self.species,
t = self.type )
sid = self.sample_id or '',
name = self.name or '',
desc = self.description or '',
r1 = self.reads1 or [],
r2 = self.reads2 or [],
insize = self.insert_size or '',
nbs = self.nb_sequences or '',
fsize = self.full_size or '',
spec = self.species or '',
t = self.type or '')
# Copyright (c) 2007-2012 Pedro Matiello <pmatiello@gmail.com>
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
"""
B{python-graph}
A library for working with graphs in Python.
@version: 1.8.2
L{Data structure<pygraph.classes>} classes are located at C{pygraph.classes}.
L{Exception<pygraph.classes.exceptions>} classes are located at C{pygraph.classes.exceptions}.
L{Search filters<pygraph.algorithms.filters>} are located at C{pygraph.algorithms.filters}.
L{Heuristics<pygraph.algorithms.heuristics>} for the A* algorithm are exposed in
C{pygraph.algorithms.heuristics}.
A quick introductory example:
>>> # Import the module and instantiate a graph object
>>> from pygraph.classes.graph import graph
>>> from pygraph.algorithms.searching import depth_first_search
>>> gr = graph()
>>> # Add nodes
>>> gr.add_nodes(['X','Y','Z'])
>>> gr.add_nodes(['A','B','C'])
>>> # Add edges
>>> gr.add_edge(('X','Y'))
>>> gr.add_edge(('X','Z'))
>>> gr.add_edge(('A','B'))
>>> gr.add_edge(('A','C'))
>>> gr.add_edge(('Y','B'))
>>> # Depth first search rooted on node X
>>> st, pre, post = depth_first_search(gr, root='X')
>>> # Print the spanning tree
>>> print st
{'A': 'B', 'C': 'A', 'B': 'Y', 'Y': 'X', 'X': None, 'Z': 'X'}
"""
__import__('pkg_resources').declare_namespace(__name__)
# Copyright (c) 2008-2009 Pedro Matiello <pmatiello@gmail.com>
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
"""
Algorithms
This subpackage contains a set of modules, each one of them containing some algorithms.
"""
__import__('pkg_resources').declare_namespace(__name__)
This diff is collapsed.
# Copyright (c) 2009 Pedro Matiello <pmatiello@gmail.com>
# Tomaz Kovacic <tomaz.kovacic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following