function.py 12.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
# Copyright (c) 2010- The University of Notre Dame.
# This software is distributed under the GNU General Public License.
# See the file COPYING for details.

""" Weaver function module """

from weaver.compat  import callable, getfuncname
from weaver.data    import parse_input_list, parse_output_list
from weaver.logger  import D_FUNCTION, debug
from weaver.options import Options
from weaver.stack   import CurrentAbstraction, CurrentNest
from weaver.util    import find_executable, parse_string_list, type_str, WeaverError

import inspect
import itertools
import os
import sys
Penom Nom's avatar
Penom Nom committed
18
import collections
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84


# Base Function class

class Function(object):
    """ This is the base Function class.

    A :class:`Function` provides the :meth:`command` method that specifies how
    to generate the command for the executable associated with the
    :class:`Function` instance.

    At a minimum, the user must specify the name of the `executable`.  For
    convenience, the function :func:`~weaver.util.find_executable` is used to
    locate the executable.

    **Positional Arguments:**

    - `executable`  -- Path or name of executable.

    **Keyword Arguments:**

    - `cmd_format`  -- String template used to generate command string.
    - `find_dirs`   -- Additional directories to search for executable.

    The `cmd_format` supports the following fields:

    - `{executable}`, `{EXE}` -- The executable file.
    - `{inputs}`, `{IN}`      -- The inputs files.
    - `{outputs}`, `{OUT}`    -- The output files.
    - `{arguments}`, `{ARG}`  -- The arguments.

    The default command string template is :data:`~weaver.Function.CMD_FORMAT`.
    """
    #: Default command string format template
    CMD_FORMAT = '{executable} {arguments} {inputs} > {outputs}'

    def __init__(self, executable, cmd_format=None, find_dirs=None,
        environment=None):
        self.cmd_format  = cmd_format or Function.CMD_FORMAT
        self.path        = find_executable(executable, find_dirs)
        self.environment = environment or dict()
        self.includes    = set([self.path])

        debug(D_FUNCTION, 'Created Function {0}({1}, {2})'.format(
            type_str(self), self.path, self.cmd_format))

    def __call__(self, inputs=None, outputs=None, arguments=None,
        includes=None, local=False, environment=None, collect=False):
        abstraction = CurrentAbstraction()
        nest        = CurrentNest()

        # Engine Functions define inputs and output member attributes
        try:
            inputs  = inputs  or self.inputs
            outputs = outputs or self.outputs
        except AttributeError:
            pass

        inputs   = parse_input_list(inputs)
        outputs  = parse_output_list(outputs, inputs)
        includes = parse_input_list(includes) + parse_input_list(self.includes)
        command  = self.command_format(inputs, outputs, arguments)
        options  = Options(environment=dict(self.environment), collect=inputs if collect else None)

        if local:
            options.local = True
Penom Nom's avatar
Penom Nom committed
85

86 87
        if environment:
            options.environment.update(environment)
Penom Nom's avatar
Penom Nom committed
88

89 90
        if nest.batch:
            options.batch = nest.batch
Penom Nom's avatar
Penom Nom committed
91

92
        nest.schedule(abstraction, self, command,
93
            list(inputs) + list(includes), outputs, options, nest.symbol)
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152

        return outputs

    def command_format(self, inputs=None, outputs=None, arguments=None):
        """
        Returns command string by formatting function template with `inputs`
        and `outputs` arguments.

        This method requires the user to **explicitly** specify the `inputs`
        and `outputs` to be used in the command string.
        """
        inputs    = ' '.join(parse_string_list(inputs))
        outputs   = ' '.join(parse_string_list(outputs))
        arguments = ' '.join(parse_string_list(arguments))
        return self.cmd_format.format(
            executable  = self.path,
            EXE         = self.path,
            inputs      = inputs,
            IN          = inputs,
            outputs     = outputs,
            OUT         = outputs,
            arguments   = arguments,
            ARG         = arguments)

    def __str__(self):
        return self.cmd_format.format(
            executable  = self.path,
            EXE         = self.path,
            inputs      = '{inputs}',
            IN          = '{IN}',
            outputs     = '{outputs}',
            OUT         = '{OUT}',
            arguments   = '{arguments}',
            ARG         = '{ARG}')


# Scripting Function classes

class ScriptFunction(Function):
    """ This is the base scripting Function class.

    This class allows for users to define :class:`Function` objects by
    embedding scripts inside of their code.

    **Positional Arguments:**

        - `source`      -- Source code for the script.

    **Keyword Arguments:**

        - `executable`  -- Path or name to use for the script.
        - `cmd_format`  -- String template used to generate command string.

    If `executable` is ``None``, then a unique script name will be generated.
    """
    def __init__(self, source, executable=None, cmd_format=None):
        if executable is None:
            executable = next(CurrentNest().stash)

Penom Nom's avatar
Penom Nom committed
153
        with open(executable, 'w') as fs:
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
            fs.write(source)
        os.chmod(executable, 0o755)

        Function.__init__(self, executable, cmd_format)


class ShellFunction(ScriptFunction):
    """ This allows the user to embed a shell script.

    **Positional Arguments:**

        - `source`      -- Source code for the script.

    **Keyword Arguments:**

        - `shell`       -- Shell to be used to execute script.
        - `executable`  -- Path or name to use for the script.
        - `cmd_format`  -- String template used to generate command string.

    The supported values for `shell` are ``sh``, ``ksh``, ``bash``, ``csh``,
    and ``tcsh``.  The class assumes that the shells are located in ``/bin``.
    If you pass an absolute path instead of one of the mentioned `shell`
    values, then that will be used as the `shell` path and the basename of the
    specified `shell` path will be used as the script extension.
    """
    SHELL_TABLE = {
        'sh'    :   '/bin/sh',
        'ksh'   :   '/bin/ksh',
        'bash'  :   '/bin/bash',
        'csh'   :   '/bin/csh',
        'tcsh'  :   '/bin/tcsh',
    }
    SHELL_DEFAULT = 'sh'

Celine Noirot's avatar
Celine Noirot committed
188
    def __init__(self, source, shell=None, executable=None, cmd_format=None, modules=[]):
189 190 191 192 193 194 195
        if shell is None or not os.path.isabs(shell):
            if shell not in ShellFunction.SHELL_TABLE:
                shell = ShellFunction.SHELL_DEFAULT
            shell_path = ShellFunction.SHELL_TABLE[shell]
        else:
            shell_path = shell
            shell = os.path.basename(shell)
Celine Noirot's avatar
Celine Noirot committed
196 197 198 199
        source = '#!%s\n' % shell_path + \
                 ("\n".join(("module load " +  module) for module in modules) if len(modules) > 0 else "") + "\n" + \
                 source + \
                 "\n" + ("\n".join(("module unload " + module) for module in modules) if len(modules) > 0 else "")
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
        ScriptFunction.__init__(self, source, executable, cmd_format)


class PythonFunction(ScriptFunction):
    """ This allows the user to embed Python scripts as functions.

    **Positional Arguments:**

        - `function`    -- Name of Python function to materialize as a script.

    **Keyword Arguments:**

        - `executable`  -- Path or name to use for the script.
        - `cmd_format`  -- String template used to generate command string.
    """
Celine Noirot's avatar
Celine Noirot committed
215 216 217
    PYTHON_VERSION  = sys.executable
    # 'python{0}.{1}'.format(sys.version_info[0], sys.version_info[1])
    PYTHON_TEMPLATE =  '''#!{0}
Penom Nom's avatar
Penom Nom committed
218
import pickle
219
import {{0}}
Penom Nom's avatar
Penom Nom committed
220

221
{{1}}
Penom Nom's avatar
Penom Nom committed
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
from jflow.parameter import IOObject 
def prepare_arguments(*args):

    ioobj_ext = IOObject.IOOBJECT_EXT
    new_args = []
    output_path = []
    # load input object
    for i, arg in enumerate(args):
        if arg.endswith(ioobj_ext):
            if os.path.exists(arg):
                ioobjh = open(arg, 'rb')
                try:
                    ioobj = pickle.load(ioobjh)
                    if isinstance(ioobj, list):
                        for j, cioobj in enumerate(ioobj):
                            if (hasattr(cioobj, "is_ioobject")):
                                cioobjh = open(cioobj.dump_path, 'rb')
                                ioobj[j] = pickle.load(cioobjh)
                                cioobjh.close()
                finally:
                    ioobjh.close()
                new_args.append(ioobj)
            else :
                output_path.append(arg)
        else :
            new_args.append(arg)     
    return (new_args, output_path)
    
250 251
{{2}}
if __name__ == '__main__':
Penom Nom's avatar
Penom Nom committed
252 253 254 255 256 257 258 259 260 261 262 263
    (new_args, output_path) = prepare_arguments(*sys.argv[1:])
    outputs = {{3}}(*new_args)
    
    if not isinstance(outputs, list): outputs = [outputs]
    # in case the returned object is a table
    if len(output_path) == 1 and len(outputs) != len(output_path):
        outputs = [outputs]
    # otherwise the number does not match
    elif len(outputs) != len(output_path) and len (output_path) != 0:
        raise Exception( "The number of object returned by the function is different from the number of outputs specified!")
    if len (output_path) > 0 :
        for i, obj in enumerate(outputs):
Penom Nom's avatar
Penom Nom committed
264
            objh = open(output_path[i], "wb")
Penom Nom's avatar
Penom Nom committed
265 266
            pickle.dump(obj, objh)
            objh.close()
267 268
'''.format(PYTHON_VERSION)

Celine Noirot's avatar
Celine Noirot committed
269
    def __init__(self, function, add_path=None, executable=None, cmd_format=None, modules=[]):
270 271 272 273 274 275 276 277
        # TODO: this doesn't work with Python3
        body = inspect.getsource(function)
        name = getfuncname(function)
        imports = ['os', 'sys']
        try:
            imports.extend(function.func_imports)
        except AttributeError:
            pass
Penom Nom's avatar
Penom Nom committed
278
        
279
        if add_path:
Celine Noirot's avatar
Celine Noirot committed
280 281
            add_path.update([os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'),
                        os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..')])
282
        else: 
Celine Noirot's avatar
Celine Noirot committed
283 284
            add_path = {os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'),
                        os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..')}
285 286 287
        path = ""
        for apath in add_path:
            path += "sys.path.insert(0, '" + apath + "')\n"
Penom Nom's avatar
Penom Nom committed
288
        
289
        source = self.PYTHON_TEMPLATE.format(', '.join(imports), path, body, name)
Celine Noirot's avatar
Celine Noirot committed
290 291 292 293 294 295 296
        """
        source_modules = '#!%s\n' % path + \
                 ("\n".join(("module load " +  module) for module in modules) if len(modules) > 0 else "") + "\n" + \
                 source + \
                 "\n" + ("\n".join(("module unload " + module) for module in modules) if len(modules) > 0 else "")
                 
        """
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
        ScriptFunction.__init__(self, source, executable, cmd_format)


# Function argument parser

def parse_function(function, py_func_builder=PythonFunction, environment=None):
    """ Return a :class:`Function` object based on the input `function`.

    If `function` is already a :class:`Function`, then return it.  If it is a
    string, then parse it and automagically construct a :class:`Function`
    object.  Otherwise, raise a :class:`~weaver.util.WeaverError`.

    This means that a `function` must be one of the following:

    1. An existing :class:`Function`.
    2. A string template (ex. `{executable} {arguments} {inputs} {outputs}`)
    3. A real Python function that will be converted.

    .. note::

        The parser expects that the **first word** in the `function` string to
        refer to the name of the executable to be used for the
        :class:`Function`.
    """
    if isinstance(function, Function):
        return function

    if isinstance(function, str):
        if ' ' in function:
            flist = function.split(' ')
            return Function(flist[0],
                cmd_format  = ' '.join(['{executable}'] + flist[1:]),
                environment = environment)

        return Function(function, environment=environment)

Penom Nom's avatar
Penom Nom committed
333
    if isinstance(function, collections.Callable):
334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
        return py_func_builder(function)

    raise WeaverError(D_FUNCTION,
        'could not parse function argument: {0}'.format(function))

ParseFunction = parse_function


# Pipeline function

class Pipeline(Function):
    DEFAULT_SEPARATOR = '&&'

    def __init__(self, functions, separator=None):
        self.functions = [parse_function(f) for f in functions]
        Function.__init__(self, self.functions[0].path,
Penom Nom's avatar
Penom Nom committed
350
            cmd_format='Pipeline({0})'.format(list(map(str, self.functions))))
351 352 353 354 355 356 357 358 359 360 361 362 363 364
        self.includes  = set([f.path for f in self.functions])
        if separator is None:
            self.separator = Pipeline.DEFAULT_SEPARATOR
        else:
            self.separator = separator

    def command_format(self, inputs=None, outputs=None, arguments=None):
        divider = ' ' + self.separator + ' '
        return divider.join([f.command_format(inputs, outputs, arguments)
                             for f in self.functions])

    def __str__(self):
        return self.cmd_format

Celine Noirot's avatar
Celine Noirot committed
365
# vim: set sts=4 sw=4 ts=8 expandtab ft=python: