Commit 49831438 authored by Jerome Mariette's avatar Jerome Mariette

add sfffilereader to seqio library

parent 10ad6073
......@@ -22,10 +22,6 @@ pid = 5
work_dir = /home/jmariett/scratch/ng6/
# The ng6 save directory
save_dir = /home/jmariett/scratch/ng6/
# The ng6 tmp directory, should be accessible
# from any nodes if running in a cluster
# environment
tmp_dir = /home/jmariett/scratch/ng6/tmp/
[database]
host = localhost
......@@ -47,9 +43,22 @@ tmp_directory = /home/jmariett/scratch/tmp
[softwares]
blastall = /usr/bin/blastall
formatdb = /usr/bin/formatdb
fastqc = /home/jmariett/softwares/FastQC/fastqc
sfffile = /home/jmariett/softwares/newbler/gsSeqTools/bin/sfffile
[454_mids]
TID1 = GTGAG
TID2 = TACGC
TID3 = GTCAC
TID4 = CTACT
TID5 = TCTGT
TID6 = GATGA
TID7 = TGACT
TID8 = ATGCT
TID9 = TCGCT
TID10 = TATAC
TID11 = ACGAC
TID12 = TCGCT
TID13 = TACAT
MID1 = ACGAGTGCGT
MID2 = ACGCTCGACA
MID3 = AGACGCACTC
......
......@@ -86,9 +86,9 @@ class NG6ConfigReader(object):
@return: tmp_dir
"""
try:
return self.reader.get('general', 'tmp_dir')
return self.reader.get('storage', 'tmp_directory')
except :
raise Error("Failed when parsing the config file !")
raise Error("Failed when parsing the config file !")
def get_save_directory(self):
......
......@@ -95,7 +95,7 @@ class Run(object):
@param mode: can be none, gz, bz2, tar.gz and tar.bz2
"""
nb_seq, full_size = 0, 0
for file in files:
for file in files:
# Get nb_seq and full_size values
reader = seqio.SequenceReader(file)
for id, desc, seq, qualities in reader:
......@@ -104,7 +104,39 @@ class Run(object):
self.set_nb_sequences(nb_seq)
self.set_full_size(full_size)
self.archive_files(files, mode)
def export_mids_to_newbler_cfg_file(self, error=2, midscheme="454MIDS"):
"""
Export the config file in newbler format
the script add_raw_files.
@param files: the files to archive
@param mode: can be none, gz, bz2, tar.gz and tar.bz2
"""
mids_cfg_path = tempfile.NamedTemporaryFile(suffix=".cfg").name
mid_file = open(mids_cfg_path, "w")
mid_file.write(midscheme+"\n")
mid_file.write("{\n")
# MID1:desc_ex1;MID2:desc_ex2;MID3,MID4:
cfg_reader = NG6ConfigReader()
mids = cfg_reader.get_454_mids()
# First write the run mids config file
for mid in self.__mids_description:
for mid_name in mid.split(","):
try:
mid_name = mid_name.strip()
seq = mids[mid_name.lower()].split(",")
seq_val = ''
if len(seq) > 1:
seq_val += '"' + seq[0] + '", ' + str(error) + ', "' + seq[1] + '";'
else:
seq_val += '"' + seq[0] + '", ' + str(error) + ';'
mid_file.write('\tmid = "' + self.__mids_description[mid] + '", ' + seq_val +"\n")
except:
pass
mid_file.write("}")
mid_file.close()
return [mids_cfg_path, midscheme]
def archive_files(self, files, mode, archive_name="ng6_archive.tar", delete=False):
"""
Copy, archive and compress the files list to the run. Files can then be downloaded and gave back by
......
This diff is collapsed.
......@@ -15,6 +15,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from jflow.component import Component
from weaver.function import ShellFunction
......@@ -30,4 +32,21 @@ class SFFextract (Component):
def run(self):
sff_extract = ShellFunction(self.get_exec_path("sff_extract.py") + " -c $1 -s $2", cmd_format='{EXE} {IN} {OUT}')
sff_extract = Map(sff_extract, self.input_files, self.output_files)
class SFFfile (Component):
def __init__(self):
Component.__init__(self, is_dynamic=True)
def define_parameters(self, sff_file, prefix, midscheme, mids_config_file):
self.sff_file = sff_file
self.prefix = os.path.join(self.output_directory, prefix)
self.midscheme = midscheme
self.mids_config_file = mids_config_file
self.stdout = os.path.join(self.output_directory, "sfffile.stdout")
def run(self):
sfffile = ShellFunction(self.get_exec_path("sfffile") + " -s $1 -mcf $2 -o $3 $4 > $5", cmd_format='{EXE} {ARG} {IN} {OUT}')
sfffile(inputs=self.sff_file, outputs=self.stdout, arguments=[self.midscheme, self.mids_config_file, self.prefix])
\ No newline at end of file
......@@ -17,6 +17,7 @@
import datetime
import os
import glob
from jflow.workflow import Workflow
from ng6.project import Project
......@@ -26,19 +27,16 @@ from ng6.run import Run
class R454 (Workflow):
def run(self):
# first check if files provided exists
data_files = []
for file in self.args["data_file"]:
if os.path.isfile(file):
data_files.append(file)
else:
sys.exit(2, "error: %s file does not exists\n" % (file))
# first check if file provided exists
if not os.path.isfile(self.args["data_file"]):
sys.exit(2, "error: %s file does not exists\n" % (file))
# build the project and the run
my_project = Project.get_from_id(self.args["project_id"])
run_date = self.args["run_date"].split("/")
my_run = Run(self.args["run_name"], datetime.date(int(run_date[2]), int(run_date[1]), int(run_date[0])),
self.args["species"], self.args["data_nature"], self.args["type"], self.args["run_description"],
self.args["sequencer"])
# if it's a run with some index, let's write down description
if self.args["mids_description"]:
mids_desc_array = {}
......@@ -46,13 +44,20 @@ class R454 (Workflow):
parts = mids_desc.split(":")
mids_desc_array[parts[0]] = parts[1]
my_run.add_mids_description(mids_desc_array)
[mids_config_file, midscheme] = my_run.export_mids_to_newbler_cfg_file(self.args["demux_error"])
# extract sff per mids
sfffile = self.add_component("SFFfile", [self.args["data_file"], my_run.name, midscheme, mids_config_file])
files_to_process = glob.glob(os.path.join(sfffile.output_directory, "*.sff"))
else:
files_to_process = [self.args["data_file"]]
# then add the run to the project
my_project.add_run(my_run)
# extract the sff input file
sff_extract = self.add_component("SFFextract", [data_files])
#sff_extract = self.add_component("SFFextract", [files_to_process])
# archive the files
addrawfiles = self.add_component("AddRawFiles", [my_run, sff_extract.output_files, self.args["compression"]])
archivefiles = self.add_component("ArchiveFiles", [my_run, data_files, self.args["compression"]])
addrawfiles = self.add_component("AddRawFiles", [my_run, self.args["data_file"], self.args["compression"]])
#archivefiles = self.add_component("ArchiveFiles", [my_run, files_to_process, self.args["compression"]])
......@@ -34,9 +34,8 @@ description = roche 454 quality check pipeline
[parameters]
data_file.name = data_file
data_file.flag = --data-file
data_file.help = Which data files are linked to the run
data_file.help = Which data file is linked to the run
data_file.required = True
data_file.action = append
compression.name = compression
compression.flag = --compression
......@@ -93,7 +92,12 @@ type.required = True
mids_description.name = mids_description
mids_description.flag = --mids-description
mids_description.help = Which description is link to which mids
mids_description.required = False
demux_error.name = demux_error
demux_error.flag = --demux-error
demux_error.type = int
demux_error.default = 0
demux_error.help = How many error allowed when demultiplexing
#
# Bellow workflow specifc sections
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment