Commit 940ecf37 authored by Penom Nom's avatar Penom Nom
Browse files

generic splitbc component

parent 1de1764c
......@@ -19,54 +19,58 @@ import os
from jflow.iotypes import OutputFileList,OutputFile, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from jflow.component import Component
from weaver.function import ShellFunction
from ng6.analysis import Analysis
class SplitBC (Analysis):
ENZYMES = {
'sbfI' : {
'rad' : 'CCTGCAGG',
'radtag' : 'TGCAGG'
}
}
def get_enzyme (self, name):
if not self.ENZYMES.has_key(name) :
raise ValueError("The enzyme name " + str(name) + " does not exists. Accepted names are " + str (self.ENZYMES.keys()))
return (self.ENZYMES[name]['rad'], self.ENZYMES[name]['radtag'])
class SplitBC (Component):
def define_parameters(self, fastq_file1, fastq_file2, matrix_indiv_name, barcode_file, enzyme,
mismatches, tag_mismatch, trim = True, forward = True ):
def define_parameters(self, fastq_file1, barcode_file, matrix_indiv_name, fastq_file2 = [],
rad = None, rad_tag = None, mismatches = None , tag_mismatch = None , trim_barcode = False,
trim_reads2 = False, bol = True, eol = False, partial = None, no_adapt = False):
"""
@param fastq_file1: list of fastq_files path
@param barcode_file: list of barcode file path
@param matrix_indiv_name: list of list of individual names (example : [[], [] , []]). The length of this
matrix_indiv_name must be the same as the length of fastq_file1 (and fastq_file2 if provided)
@param fastq_file2: list of fastq_files path
@param matrix_indiv_name: list of list of individual names
@param barcode_file: list of bascode file path
@param enzyme: enzyme name
@param rad:
@param rad_tag:
...
"""
check_len = len(fastq_file1) == len(matrix_indiv_name) == len(barcode_file)
self.fastq1 = OutputFileList(fastq_file1, Formats.FASTQ)
self.fastq2 = None
if fastq_file2 is not None:
if fastq_file2 :
check_len = len(fastq_file1) == len(matrix_indiv_name) == len(fastq_file1) == len(barcode_file)
self.fastq2 = OutputFileList(fastq_file2, Formats.FASTQ)
if not check_len :
raise Exception("length of fastq_file1, fastq_file2, matrix_indiv_name and barcode_file must be the same")
if trim_barcode and trim_reads2 :
raise Exception("you must specify either trim_barcode or trim_reads2, but not both")
if (bol == eol == True ) or ( bol == eol == False) :
raise Exception("one of bol, eol must be specified, but not both")
if (rad is not None and rad_tag is None) or (rad is None and rad_tag is not None) :
raise Exception("you must specify rad with rad_tag")
self.barcode_file = OutputFileList(barcode_file)
self.mismatches = mismatches
self.rad, self.rad_tag = self.get_enzyme(enzyme )
self.tag_mismatch = tag_mismatch
self.trim = trim
self.forward = forward
self.prefix_r1 = os.path.join(self.output_directory , "%_1.fq")
self.prefix_r2 = os.path.join(self.output_directory , "%_2.fq")
self.trim_barcode = trim_barcode
self.trim_reads2 = trim_reads2
self.bol = bol
self.eol = eol
self.partial = partial
self.no_adapt = no_adapt
self.rad = rad
self.rad_tag = rad_tag
self.matrix_read1 = []
self.matrix_read2 = []
self.output_read1 = []
......@@ -75,46 +79,54 @@ class SplitBC (Analysis):
for id, inames in enumerate(matrix_indiv_name) :
outr1 = OutputFileList(self.get_outputs('{basename_woext}_1.fq', inames), Formats.FASTQ)
outr2 = OutputFileList(self.get_outputs('{basename_woext}_2.fq', inames), Formats.FASTQ)
self.matrix_read1.append(outr1)
self.matrix_read2.append(outr2)
self.output_read1 += outr1
self.output_read2 += outr2
self.stdout.append(OutputFile(os.path.join(self.output_directory , "splitbc" + str(id) + ".stdout")))
def get_version(self):
return "1.1"
def define_analysis(self):
self.name = "splitbc"
self.description = "Demultiplex individual"
self.software = "splitbc.pl"
self.options = ' '
if self.fastq2 is not None :
outr2 = OutputFileList(self.get_outputs('{basename_woext}_2.fq', inames), Formats.FASTQ)
self.matrix_read2.append(outr2)
self.output_read2 += outr2
self.stdout.append(OutputFile(os.path.join(self.output_directory , "splitbc" + str(id) + ".stdout")))
def process(self):
strand = "--bol" if self.forward else "--eol"
prefix_r1 = os.path.join(self.output_directory , "%_1.fq")
command = [self.get_exec_path("splitbc.pl"), "--bcfile", "$1", "--prefix-r1", prefix_r1 ]
if self.bol :
command.append('--bol')
elif self.eol :
command.append('--eol')
if self.mismatches is not None :
command.extend(["--mismatches", self.mismatches])
if self.tag_mismatch is not None :
command.extend(["--TAG_mismatch", self.tag_mismatch])
if self.partial is not None :
command.extend(["--partial", self.partial])
if self.trim_barcode :
command.append( "--trim")
elif self.trim_reads2 :
command.append( "--trim2")
if self.no_adapt is not None :
command.append("--no_adapt")
if self.rad is not None and self.rad_tag is not None :
command.extend(["--rad", self.rad, "--radTAG", self.rad_tag])
if self.fastq2 is not None :
command = " ".join([
self.get_exec_path("splitbc.pl"), "$1", "$2", "--mismatches", self.mismatches,
"--bcfile", "$3", "--rad", self.rad, "--radTAG", self.rad_tag, "--TAG_mismatch", self.tag_mismatch,
"--trim" if self.trim else "", "--prefix-r1", self.prefix_r1, "--prefix-r2", self.prefix_r2, strand , ' 2>&1 >> $4 '
])
prefix_r2 = os.path.join(self.output_directory , "%_2.fq")
command.extend(["--prefix-r2", prefix_r2, "$2", "$3", '2>&1 >> $4' ])
command = ' '.join(command)
splitbc = ShellFunction(command, cmd_format='{EXE} {IN} {OUT}')
MultiMap(splitbc, inputs=[self.fastq1, self.fastq2, self.barcode_file], outputs=[self.stdout, self.matrix_read1, self.matrix_read2])
MultiMap(splitbc, inputs=[self.barcode_file, self.fastq1, self.fastq2], outputs=[self.stdout, self.matrix_read1, self.matrix_read2])
else :
command = " ".join([
self.get_exec_path("splitbc.pl"), "$1","--mismatches", self.mismatches,
"--bcfile", "$2", "--rad", self.rad, "--radTAG", self.rad_tag, "--TAG_mismatch", self.tag_mismatch,
"--trim" if self.trim else "", "--prefix-r1", self.prefix_r1, strand, ' 2>&1 > $3 '
])
command.extend([ "$2", '2>&1 >> $3' ])
command = ' '.join(command)
splitbc = ShellFunction(command, cmd_format='{EXE} {IN} {OUT}')
MultiMap(splitbc, inputs=[self.fastq1, self.barcode_file], outputs=[self.stdout, self.matrix_read1])
def post_process(self):
print ""
\ No newline at end of file
MultiMap(splitbc, inputs=[self.barcode_file, self.fastq1], outputs=[self.stdout, self.matrix_read1])
......@@ -24,8 +24,23 @@ from ng6.ng6workflow import NG6Workflow
class RADseq (NG6Workflow):
ENZYMES = {
'sbfI' : {
'rad' : 'CCTGCAGG',
'radtag' : 'TGCAGG'
}
}
def get_enzyme (self, name):
if not self.ENZYMES.has_key(name) :
raise ValueError("The enzyme name " + str(name) + " does not exists. Accepted names are " + str (self.ENZYMES.keys()))
return (self.ENZYMES[name]['rad'], self.ENZYMES[name]['radtag'])
def process(self):
rad, rad_tag = self.get_enzyme(self.args['enzyme'])
# group all individual by pool
pools = {}
for p in self.args['pool'] :
......@@ -65,10 +80,11 @@ class RADseq (NG6Workflow):
ff.write(indiv['indiv_name'] + "\t" + indiv['barcode'] +"\n")
indiv_names.append(inames)
splitbc = self.add_component("SplitBC", [ fastq_files_1,fastq_files_2 if fastq_files_2 else None,
indiv_names, barcode_files, self.args['enzyme'], self.args['mismatches'], self.args['tag_mismatch'],
self.args['trim'], self.args['forward']])
splitbc = self.add_component("SplitBC", [ fastq_files_1,barcode_files, indiv_names, fastq_files_2,
rad, rad_tag, self.args['mismatches'], self.args['tag_mismatch'],
self.args['trim_barcode'], self.args['trim_reads2']])
ustacks = self.add_component("Ustacks" , [], {"indiv_dic": indivs_by_name, "read1_files" : splitbc.output_read1 , "max_locus" : 3 } )
#ustacks = self.add_component("Ustacks" , [], {"indiv_dic": indivs_by_name, "read1_files" : splitbc.output_read1 , "max_locus" : 3 } )
#cstacks = self.add_component("Cstacks", [ustacks.alleles, ustacks.snps, ustacks.tags, self.args["catalog_mismatches"]])
......@@ -94,17 +94,19 @@ mismatches.flag = --mismatches
mismatches.help = Max. number of mismatches allowed.
mismatches.default = 1
forward.name = forward
forward.flag = --forward
forward.help = Match barcode at the begening of the sequence (5' 3')
forward.default = True
forward.type = bool
trim_barcode.name = trim_barcode
trim_barcode.flag = --trim-barcode
trim_barcode.help = Should the barecode be trimmed
trim_barcode.default = True
trim_barcode.exclude = trim_reads2
trim_barcode.type = bool
trim.name = trim
trim.flag = --trim
trim.help = Should the barecode be trimmed.
trim.default = True
trim.type = bool
trim_reads2.name = trim_reads2
trim_reads2.flag = --trim-reads2
trim_reads2.help = Shoud the read 2 be trimmed to have the same length as the read1
trim_reads2.default = False
trim_reads2.exclude = trim_barcode
trim_reads2.type = bool
catalog_mismatches.name = catalog_mismatches
catalog_mismatches.flag = --catalog-mismatches
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment