Commit da901539 authored by Penom Nom's avatar Penom Nom

maj jflow workflows

parent 0fc91c0d
This diff is collapsed.
......@@ -27,7 +27,6 @@ except ImportError:
from jflow.workflows_manager import WorkflowsManager
from jflow.workflow import Workflow
import jflow.utils as utils
from cctools.util import time_format
class JflowArgumentParser (argparse.ArgumentParser):
def _read_args_from_files(self, arg_strings):
......@@ -55,50 +54,6 @@ class JflowArgumentParser (argparse.ArgumentParser):
# return the modified argument list
return new_arg_strings
def display_workflow_status(workflow, detailed=False):
if workflow.start_time: start_time = time.asctime(time.localtime(workflow.start_time))
else: start_time = "-"
if workflow.start_time and workflow.end_time: elapsed_time = str(workflow.end_time-workflow.start_time)
elif workflow.start_time: elapsed_time = str(time.time()-workflow.start_time)
else: elapsed_time = "-"
if workflow.end_time: end_time = time.asctime(time.localtime(workflow.end_time))
else: end_time = "-"
if detailed:
status = "Workflow #" + utils.get_nb_string(workflow.id) + " (" + workflow.name + ") is " + \
workflow.get_status() + ", time elapsed: " + str(elapsed_time) + " (from " + start_time + \
" to " + end_time + "):\n"
for i, component in enumerate(workflow.get_components_nameid()):
status_info = workflow.get_component_status(component)
try: perc_waiting = (status_info["waiting"]*100.0)/status_info["tasks"]
except: perc_waiting = 0
try: perc_running = (status_info["running"]*100.0)/status_info["tasks"]
except: perc_running = 0
try: perc_failed = (status_info["failed"]*100.0)/status_info["tasks"]
except: perc_failed = 0
try: perc_aborted = (status_info["aborted"]*100.0)/status_info["tasks"]
except: perc_aborted = 0
try: perc_completed = (status_info["completed"]*100.0)/status_info["tasks"]
except: perc_completed = 0
if status_info["running"] > 0: running = "\033[94mrunning:" + str(status_info["running"]) + "\033[0m"
else: running = "running:" + str(status_info["running"])
if status_info["waiting"] > 0: waiting = "\033[93mwaiting:" + str(status_info["waiting"]) + "\033[0m"
else: waiting = "waiting:" + str(status_info["waiting"])
if status_info["failed"] > 0: failed = "\033[91mfailed:" + str(status_info["failed"]) + "\033[0m"
else: failed = "failed:" + str(status_info["failed"])
if status_info["aborted"] > 0: aborted = "\033[95maborted:" + str(status_info["aborted"]) + "\033[0m"
else: aborted = "aborted:" + str(status_info["aborted"])
if status_info["completed"] == status_info["tasks"] and status_info["completed"] > 0: completed = "\033[92mcompleted:" + str(status_info["completed"]) + "\033[0m"
else: completed = "completed:" + str(status_info["completed"])
status += " - " + component + ", time elapsed " + time_format(status_info["time"]) + \
" (total:" + str(status_info["tasks"]) + ", " + waiting + ", " + running + ", " + failed + \
", " + aborted + ", " + completed + ")"
if i<len(workflow.get_components_nameid())-1: status += "\n"
return status
else:
return utils.get_nb_string(workflow.id) + "\t" + workflow.name + "\t" + workflow.get_status() + \
"\t" + elapsed_time + "\t" + start_time + "\t" + end_time
if __name__ == '__main__':
......@@ -110,21 +65,37 @@ if __name__ == '__main__':
subparsers = parser.add_subparsers(title='Available sub commands')
# Add rerun workflow availability
sub_parser = subparsers.add_parser("rerun", help="rerun a specific workflow")
sub_parser = subparsers.add_parser("rerun", help="Rerun a specific workflow")
sub_parser.add_argument("--workflow-id", type=str, help="Which workflow should be rerun",
required=True, dest="workflow_id")
sub_parser.set_defaults(cmd_object="rerun")
# Add rerun workflow availability
sub_parser = subparsers.add_parser("reset", help="Reset a workflow component")
sub_parser.add_argument("--workflow-id", type=str, help="Which workflow should be used",
required=True, dest="workflow_id")
sub_parser.add_argument("--component-name", type=str, help="Which component should be reseted",
required=True, dest="component_name")
sub_parser.set_defaults(cmd_object="reset")
# Add rerun workflow availability
sub_parser = subparsers.add_parser("execution-graph", help="Display the workflow execution graph")
sub_parser.add_argument("--workflow-id", type=str, help="Which workflow should be considered",
required=True, dest="workflow_id")
sub_parser.set_defaults(cmd_object="execution_graph")
# Add status workflow availability
sub_parser = subparsers.add_parser("status", help="monitor a specific workflow")
sub_parser = subparsers.add_parser("status", help="Monitor a specific workflow")
sub_parser.add_argument("--workflow-id", type=str, help="Which workflow status should be displayed",
default=None, dest="workflow_id")
sub_parser.add_argument("--all", action="store_true", help="Display all workflows status",
default=False, dest="all")
sub_parser.add_argument("--errors", action="store_true", help="Display failed commands",
default=False, dest="display_errors")
sub_parser.set_defaults(cmd_object="status")
# Add available pipelines
wf_instances = wfmanager.get_available_workflows()
wf_instances, wf_methodes = wfmanager.get_available_workflows()
wf_classes = []
for instance in wf_instances:
wf_classes.append(instance.__class__.__name__)
......@@ -157,16 +128,21 @@ if __name__ == '__main__':
wfmanager.run_workflow(args["cmd_object"], args)
elif args["cmd_object"] == "rerun":
wfmanager.rerun_workflow(args["workflow_id"])
elif args["cmd_object"] == "reset":
wfmanager.reset_workflow_component(args["workflow_id"], args["component_name"])
elif args["cmd_object"] == "execution_graph":
workflow = wfmanager.get_workflow(args["workflow_id"])
print workflow.get_execution_graph()
elif args["cmd_object"] == "status":
if args["workflow_id"]:
workflow = wfmanager.get_workflow(args["workflow_id"])
print display_workflow_status(workflow, True)
print workflow.get_status_under_text_format(True, args["display_errors"])
else:
workflows = wfmanager.get_workflows()
if len(workflows) > 0:
status = "ID\tNAME\tSTATUS\tELAPSED_TIME\tSTART_TIME\tEND_TIME\n"
for i, workflow in enumerate(workflows):
status += display_workflow_status(workflow)
status += workflow.get_status_under_text_format()
if i<len(workflows)-1: status += "\n"
else: status = "no workflow available"
print status
......@@ -22,6 +22,9 @@ import os
from jflow.config_reader import JFlowConfigReader
# Define some Error classes
class InvalidFormatError(Exception): pass
jflowconf = JFlowConfigReader()
# if log file directory does not exist, create it
......
......@@ -19,13 +19,13 @@ import os
import sys
import inspect
import tempfile
import types
from jflow.workflows_manager import WorkflowsManager
from jflow.config_reader import JFlowConfigReader
from jflow.dataset import ArrayList
from jflow.utils import which
from jflow.iotypes import DynamicOutput
from jflow.parameter import *
from weaver.util import parse_string_list
......@@ -35,6 +35,7 @@ class Component(object):
def __init__(self):
self.prefix = "default"
self.params_order = []
self.output_directory = None
self.config_reader = JFlowConfigReader()
self.version = self.get_version()
......@@ -53,6 +54,159 @@ class Component(object):
dynamic_outputs.append( attribute_value )
return dynamic_outputs
def get_output_files(self):
outputs = {}
for attribute_value in self.__dict__.values():
if ( issubclass( attribute_value.__class__, DynamicOutput ) or
issubclass( attribute_value.__class__, OutputFileList) ):
for f in attribute_value:
outputs[os.path.basename(f)] = f
elif issubclass( attribute_value.__class__, OutputFile):
outputs[os.path.basename(attribute_value)] = attribute_value
return outputs
def add_input_file(self, name, help, file_format="any", default=None, type="inputfile",
required=False, flag=None, group="default", display_name=None, add_to=None):
new_param = InputFile(name, help, flag=flag, file_format=file_format, default=default,
type=type, required=required, group=group, display_name=display_name)
new_param.component_nameid = self.get_nameid()
# store where the parameter is coming from
if issubclass( default.__class__, AbstractOutputFile ):
new_param.parent_component_nameid.append(default.component_nameid)
elif issubclass( default.__class__, list ):
for val in default:
if issubclass( val.__class__, AbstractOutputFile ):
new_param.parent_component_nameid.append(val.component_nameid)
# if this input should be added to a particular parameter
if add_to:
try:
self.__getattribute__(add_to).add_sub_parameter(new_param)
except: pass
# otherwise, add it to the class itself
else:
self.params_order.append(name)
self.__setattr__(name, new_param)
def reset(self):
for file in os.listdir(self.output_directory):
os.remove(os.path.join(self.output_directory, file))
def add_input_file_list(self, name, help, file_format="any", default=None, type="inputfile",
required=False, flag=None, group="default", display_name=None, add_to=None):
if default == None: default = []
new_param = InputFileList(name, help, flag=flag, file_format=file_format, default=default,
type=type, required=required, group=group, display_name=display_name)
new_param.component_nameid = self.get_nameid()
# store where the parameter is coming from
if issubclass( default.__class__, AbstractOutputFile ):
new_param.parent_component_nameid.append(default.component_nameid)
elif issubclass( default.__class__, list ):
for val in default:
if issubclass( val.__class__, AbstractOutputFile ):
new_param.parent_component_nameid.append(val.component_nameid)
# if this input should be added to a particular parameter
if add_to:
try:
self.__getattribute__(add_to).add_sub_parameter(new_param)
except: pass
# otherwise, add it to the class itself
else:
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_parameter(self, name, help, default=None, type=types.StringType, choices=None,
required=False, flag=None, group="default", display_name=None, add_to=None):
new_param = ParameterFactory.factory(name, help, flag=flag, default=default, type=type, choices=choices,
required=required, group=group, display_name=display_name)
# if this input should be added to a particular parameter
if add_to:
try:
self.__getattribute__(add_to).add_sub_parameter(new_param)
except: pass
# otherwise, add it to the class itself
else:
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_parameter_list(self, name, help, default=None, type=types.StringType, choices=None,
required=False, flag=None, group="default", display_name=None, add_to=None):
if default == None: default = []
new_param = ParameterList(name, help, flag=flag, default=default, type=type, choices=choices,
required=required, group=group, display_name=display_name)
# if this input should be added to a particular parameter
if add_to:
try:
self.__getattribute__(add_to).add_sub_parameter(new_param)
except: pass
# otherwise, add it to the class itself
else:
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_output_file(self, name, help, file_format="any", filename=None, group="default", display_name=None, add_to=None):
filename = os.path.basename(filename)
new_param = OutputFile(name, help, default=os.path.join(self.output_directory, filename),
file_format=file_format, group=group, display_name=display_name)
new_param.component_nameid = self.get_nameid()
# if this input should be added to a particular parameter
if add_to:
try:
self.__getattribute__(add_to).add_sub_parameter(new_param)
except: pass
# otherwise, add it to the class itself
else:
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_output_file_list(self, name, help, file_format="any", pattern='{basename_woext}.out',
items=None, group="default", display_name=None, add_to=None):
new_param = OutputFileList(name, help, default=self.get_outputs(pattern, items),
file_format=file_format, group=group, display_name=display_name)
new_param.component_nameid = self.get_nameid()
# if this input should be added to a particular parameter
if add_to:
try:
self.__getattribute__(add_to).add_sub_parameter(new_param)
except: pass
# otherwise, add it to the class itself
else:
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_output_file_endswith(self, name, help, pattern, file_format="any", behaviour="include",
group="default", display_name=None, add_to=None):
new_param = OutputFilesEndsWith(name, help, self.output_directory, pattern, include=(behaviour == "include"),
file_format=file_format, group=group, display_name=display_name)
new_param.component_nameid = self.get_nameid()
# if this input should be added to a particular parameter
if add_to:
try:
self.__getattribute__(add_to).add_sub_parameter(new_param)
except: pass
# otherwise, add it to the class itself
else:
self.params_order.append(name)
self.__setattr__(name, new_param)
def add_output_file_pattern(self, name, help, pattern, file_format="any", behaviour="include",
group="default", display_name=None, add_to=None):
new_param = OutputFilesPattern(name, help, self.output_directory, pattern, include=(behaviour == "exclude"),
file_format=file_format, group=group, display_name=display_name)
new_param.component_nameid = self.get_nameid()
# if this input should be added to a particular parameter
if add_to:
try:
self.__getattribute__(add_to).add_sub_parameter(new_param)
except: pass
# otherwise, add it to the class itself
else:
self.params_order.append(name)
self.__setattr__(name, new_param)
def _longestCommonSubstr(self, data, clean_end=True):
substr = ''
if len(data) > 1 and len(data[0]) > 0:
......@@ -83,6 +237,9 @@ class Component(object):
if isinstance(output_list, str):
ilist = []
if not input_list or not '{' in str(output_list):
if input_list is not None and len(input_list) == 0:
return []
else:
return [output_list]
# if multiple list of inputs is used
elif isinstance(input_list[0], list):
......
......@@ -37,6 +37,8 @@ class JFlowConfigReader(object):
self.reader.read(os.path.join(os.path.dirname(inspect.getfile(self.__class__)), self.CONFIG_FILE_PATH))
def get_tmp_directory(self):
if not os.path.isdir(self.reader.get("storage", "tmp_directory")):
os.makedirs(self.reader.get("storage", "tmp_directory"), 0751)
return self.reader.get("storage", "tmp_directory")
def get_work_directory(self):
......@@ -72,6 +74,13 @@ class JFlowConfigReader(object):
sys.exit(1)
return exec_path
def get_date_format(self):
try:
date_format = self.reader.get("global", "date_format")
except:
raise Error("Failed when parsing the config file, no parameter date_format!")
return date_format
def get_batch(self):
try:
type = self.reader.get("global", "batch_system_type")
......@@ -87,6 +96,23 @@ class JFlowConfigReader(object):
except:
return ["127.0.0.1", 8080]
def get_email_options(self):
try: smtps = self.reader.get("email", "smtp_server")
except: smtps = None
try: smtpp = self.reader.get("email", "smtp_port")
except: smtpp = None
try: froma = self.reader.get("email", "from_address")
except: froma = None
try: fromp = self.reader.get("email", "from_password")
except: fromp = None
try: toa = self.reader.get("email", "to_address")
except: toa = None
try: subject = self.reader.get("email", "subject")
except: subject = None
try: message = self.reader.get("email", "message")
except: message = None
return [smtps, smtpp, froma, fromp, toa, subject, message]
def get_component_batch_options(self, component_class):
try:
return self.reader.get("components", component_class+".batch_options")
......
......@@ -270,6 +270,18 @@ class SparseData( dict ):
self[row_idx][col_idx] = 0
self[row_idx][col_idx] += value
def subtract( self, row_idx, col_idx, value ):
"""
@summary : Subtract the 'value' to the count for the column col_idx in row row_idx.
@param row_idx : [int] The index of the row.
@param col_idx : [int] The index of the column.
@param value : [int] The value to subtract.
"""
if self.has_key( row_idx ) and self[row_idx].has_key( col_idx ) and self[row_idx][col_idx] >= value:
self[row_idx][col_idx] -= value
else:
raise Exception( "'" + str(value) + "' cannot be subtract from row " + str(row_idx) + " column " + str(col_idx) + "." )
def change( self, row_idx, col_idx, value ):
"""
@summary : Change the 'value' to the count for the column col_idx in row row_idx.
......@@ -286,6 +298,22 @@ class SparseData( dict ):
if self.has_key( row_idx ) and self[row_idx].has_key( col_idx ) :
del self[row_idx][col_idx]
def random_by_col( self, col_idx ):
"""
"""
elt_index = random.randint(1, self.get_col_sum(col_idx))
find = False
row_idx = 0
previous_elt = 0
while not find:
current_nb = previous_elt + self.nb_at( row_idx, col_idx )
if elt_index <= current_nb:
find = True
# Next row
previous_elt = current_nb
row_idx += 1
return( row_idx -1 )
def add_row( self ):
pass # Nothing to do
......@@ -461,24 +489,6 @@ class Biom:
raise ValueError( "'" + query_name + "' doesn't exist." )
return find_idx
def _count_by_OTU( self ):
"""
@summary : Count the number of elements by OTU.
@return : [list, int] the number of elements by OTU and the total number of elements.
Example for the number of elements by OTU :
[
{ 'id' : 'OTU_1', 'nb' : 10 },
{ 'id' : 'OTU_8', 'nb' : 0 }
]
"""
nb_total_elt = 0
OTU_count = list()
for row_idx in range( len(self.rows) ):
OTU_count.append( { "id" : self.rows[row_idx]["id"],
"nb" : self.get_row_sum( row_idx ) } )
nb_total_elt += nb_elt_for_OTU
return OTU_count, nb_total_elt
def add_metadata( self, subject_name, metadata_name, metadata_value, subject_type="sample"):
"""
@summary : Add a metadata on subject (a sample or an observation).
......@@ -538,6 +548,17 @@ class Biom:
# Remove sample from the self.data
self.data.remove_col( sample_idx )
def subtract_count( self, observation_name, sample_name, value ):
"""
@summary : Subtract a value to the count for one observation of one sample.
@param observation_name : [str] The observation name.
@param sample_name : [str] The sample name.
@param value : [int] The value to subtract.
"""
row_idx = self.find_idx( self.rows, observation_name )
col_idx = self.find_idx( self.columns, sample_name )
self.data.subtract( row_idx, col_idx, value )
def add_count( self, observation_name, sample_name, value ):
"""
@summary : Add a value to the count for one observation of one sample.
......@@ -626,7 +647,7 @@ class Biom:
def bootstrap_by_sample( self, nb_selected_elts, nb_removed_elts, nb_selection_round=1000 ):
"""
@summary : Replaces data of the sample by random sampling with rrrrr in the sample.
@summary : Replaces data of the sample by random sampling with replacement in the sample.
@param nb_selected_elts : [int] Number of elements selected on sampling.
@param nb_removed_elts : [int] Number of elements removed of the initial set before random sampling.
@param nb_selection_round : [int] Number of sampling round.
......@@ -665,6 +686,13 @@ class Biom:
sample_idx = self.find_idx( self.columns, current_sample['id'] )
self.data.add( OTU_idx, sample_idx, selected[OTU_id] )
def random_obs_by_sample( self, sample_name ):
sample_idx = self.find_idx(self.columns, sample_name)
return self.rows[self.data.random_by_col(sample_idx)]
def get_sample_count( self, sample_name ):
return self.data.get_col_sum( self.find_idx(self.columns, sample_name) )
def to_count( self ):
"""
@summary : Returns the count of observations by sample.
......@@ -823,13 +851,17 @@ class BiomIO:
for row_idx in range(len(biom.rows)):
count = biom.data.nb_at( row_idx, col_idx )
if count > 0:
tax = biom.rows[row_idx]["metadata"]["taxonomy"]
if isinstance(tax, list) or isinstance(tax, tuple):
tax = "\t".join( map(str, tax) )
taxonomy = biom.rows[row_idx]["metadata"]["taxonomy"]
cleaned_taxonomy = None
if isinstance(taxonomy, list) or isinstance(taxonomy, tuple):
taxa_list = list()
for taxon in taxonomy:
if not str(taxon).lower().startswith("unknown "):
taxa_list.append( taxon )
cleaned_taxonomy = "\t".join(taxa_list)
else:
tax = str( tax )
tax = "\t".join( map(str.strip, tax.split(";")) ) # Replace space separator between ranks by tabulation
out_fh.write( str(count) + "\t" + tax + "\n" )
cleaned_taxonomy = "\t".join( map(str.strip, taxonomy.split(";")) )
out_fh.write( str(count) + "\t" + cleaned_taxonomy + "\n" )
out_fh.close()
@staticmethod
......
......@@ -120,6 +120,8 @@ class GFF3Record:
# Parse attributes
gff_record.attributes = dict()
attributes = "\t".join(line_fields[8:])
if attributes.strip().endswith(";"): # if attributes end with ';'
attributes = attributes.strip()[:-1]
attributes_array = attributes.split(";")
cleaned_attributes = list()
for attribute in attributes_array:
......@@ -163,8 +165,10 @@ class GFF3IO:
yield gff_record
def close( self ) :
if self._handle is not None:
if hasattr(self, '_handle') and self._handle is not None:
self._handle.close()
self._handle = None
self._line = None
def write( self, gff_record ):
"""
......
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
class Graph(object):
def __init__(self, graph_dict={}):
""" initializes a graph object """
self.__graph_dict = graph_dict
def vertices(self):
""" returns the vertices of a graph """
return list(self.__graph_dict.keys())
def edges(self):
""" returns the edges of a graph """
edges = []
for edge in self.__generate_edges():
edges.append(list(edge))
return edges
def add_vertex(self, vertex):
""" If the vertex "vertex" is not in
self.__graph_dict, a key "vertex" with an empty
list as a value is added to the dictionary.
Otherwise nothing has to be done.
"""
if vertex not in self.__graph_dict:
self.__graph_dict[vertex] = []
def add_edge(self, edge):
""" assumes that edge is of type set, tuple or list;
between two vertices can be multiple edges!
"""
edge = set(edge)
(vertex1, vertex2) = tuple(edge)
if vertex1 in self.__graph_dict:
self.__graph_dict[vertex1].append(vertex2)
else:
self.__graph_dict[vertex1] = [vertex2]
def __generate_edges(self):
""" A static method generating the edges of the
graph "graph". Edges are represented as sets
with one (a loop back to the vertex) or two
vertices
"""
edges = []
for vertex in self.__graph_dict:
for neighbour in self.__graph_dict[vertex]:
if {neighbour, vertex} not in edges:
edges.append({vertex, neighbour})
return edges
def __str__(self):
res = "vertices: "
for k in self.__graph_dict:
res += str(k) + " "
res += "\nedges: "
for edge in self.__generate_edges():
res += str(edge) + " "
return res
This diff is collapsed.
......@@ -15,6 +15,18 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import re
import smtplib
import socket
import math
try:
import DNS
ServerError = DNS.ServerError
except:
DNS = None
class ServerError(Exception): pass
def which(program):
"""
Return if the asked program exist in the user path
......@@ -60,6 +72,34 @@ def get_nb_string(value, length=6):
s_value = zeros + s_value
return s_value
def get_nb_octet(size):
"""
Return the number of bytes: value has to be formated like this: 5Mb, 20Gb ...
"""
octets_link = ["bytes", "Kb", "Mb", "Gb", "Tb", "Pb", "Eb", "Zb"]
unit = size[len(size)-2:len(size)]
pow_val = int(octets_link.index(unit)) * 10
val = pow(2, pow_val)