Commit 10ad6073 authored by Jerome Mariette's avatar Jerome Mariette

454 pipeline

parent 83d1ccd9
This diff is collapsed.
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from jflow.component import Component
from weaver.function import ShellFunction
from weaver.abstraction import Map
class SFFextract (Component):
def define_parameters(self, input_files):
self.input_files = input_files
self.output_files = self.get_outputs('{basename_woext}.fastq', self.input_files)
def run(self):
sff_extract = ShellFunction(self.get_exec_path("sff_extract.py") + " -c $1 -s $2", cmd_format='{EXE} {IN} {OUT}')
sff_extract = Map(sff_extract, self.input_files, self.output_files)
\ No newline at end of file
......@@ -49,4 +49,28 @@ class AddRawFiles (Component):
run_dump.close()
addraw = PythonFunction(add_raw_files)
addraw(outputs=self.stdout, arguments=[run_dump_path, self.compression, self.files_to_save])
def archive_files (run_dump_path, compression, *files_to_save):
import pickle
# load the analysis object
run_dump = open(run_dump_path, "rb")
my_run = pickle.load(run_dump)
run_dump.close()
my_run.archive_files(files_to_save, compression)
class ArchiveFiles (Component):
def define_parameters(self, runobj, files_to_save, compression):
self.runobj = runobj
self.files_to_save = files_to_save
self.compression = compression
self.stdout = os.path.join(self.output_directory, "archiveFiles.stdout")
def run(self):
run_dump_path = self.get_temporary_file(".dump")
run_dump = open(run_dump_path, "wb")
pickle.dump(self.runobj, run_dump)
run_dump.close()
archive = PythonFunction(archive_files)
archive(outputs=self.stdout, arguments=[run_dump_path, self.compression, self.files_to_save])
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import datetime
import os
from jflow.workflow import Workflow
from ng6.project import Project
from ng6.run import Run
class R454 (Workflow):
def run(self):
# first check if files provided exists
data_files = []
for file in self.args["data_file"]:
if os.path.isfile(file):
data_files.append(file)
else:
sys.exit(2, "error: %s file does not exists\n" % (file))
# build the project and the run
my_project = Project.get_from_id(self.args["project_id"])
run_date = self.args["run_date"].split("/")
my_run = Run(self.args["run_name"], datetime.date(int(run_date[2]), int(run_date[1]), int(run_date[0])),
self.args["species"], self.args["data_nature"], self.args["type"], self.args["run_description"],
self.args["sequencer"])
# if it's a run with some index, let's write down description
if self.args["mids_description"]:
mids_desc_array = {}
for mids_desc in self.args["mids_description"].split(";"):
parts = mids_desc.split(":")
mids_desc_array[parts[0]] = parts[1]
my_run.add_mids_description(mids_desc_array)
# then add the run to the project
my_project.add_run(my_run)
# extract the sff input file
sff_extract = self.add_component("SFFextract", [data_files])
# archive the files
addrawfiles = self.add_component("AddRawFiles", [my_run, sff_extract.output_files, self.args["compression"]])
archivefiles = self.add_component("ArchiveFiles", [my_run, data_files, self.args["compression"]])
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
[global]
name = r454
description = roche 454 quality check pipeline
#
# Parameter section
# param.name: the parameter display name
# .name: the parameter argument
# .flag: the command line flag to use the argument
# .help: a brief description of what the parameter does
# .default [None]: the value produced if the parameter is not provided
# .type [str]: the parameter type that should be tested (str|int|file|long|bool|...)
# .choices [None]: a container of the allowable values for the parameter
# .required [False]: whether or not the command-line option may be omitted
# .action [store]: the basic type of action to be taken when this argument is encountered at the command line.
#
[parameters]
data_file.name = data_file
data_file.flag = --data-file
data_file.help = Which data files are linked to the run
data_file.required = True
data_file.action = append
compression.name = compression
compression.flag = --compression
compression.help = How should data be compressed once archived (none|gz|bz2)
compression.default = none
compression.choices = none|gz|bz2
databank.name = databank
databank.flag = --databank
databank.help = Which databank should be used to seek contamination
databank.action = append
project_id.name = project_id
project_id.flag = --project-id
project_id.type = int
project_id.help = The project id the run belongs to
project_id.required = True
run_name.name = run_name
run_name.flag = --name
run_name.help = Give a name to your run
run_name.required = True
run_description.name = run_description
run_description.flag = --description
run_description.help = Give a description to your run
run_description.required = True
run_date.name = run_date
run_date.flag = --date
run_date.help = When were the data produced
run_date.required = True
data_nature.name = data_nature
data_nature.flag = --data-nature
data_nature.help = Are Sequences cDNA, genomique, RNA, ...
data_nature.required = True
sequencer.name = sequencer
sequencer.flag = --sequencer
sequencer.help = Which sequencer produced the data
sequencer.required = True
species.name = species
species.flag = --species
species.help = Which species has been sequenced
species.required = True
type.name = type
type.flag = --type
type.help = What type of data is it (1 lane, 1 region)
type.required = True
mids_description.name = mids_description
mids_description.flag = --mids-description
mids_description.help = Which description is link to which mids
mids_description.required = False
#
# Bellow workflow specifc sections
#
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment