Commit 06420674 authored by Penom Nom's avatar Penom Nom
Browse files

Splitbc is now an analysis component.

parent 79dd1f80
......@@ -25,20 +25,20 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{/foreach}
<ul>
{if in_array("--trim", $params)}
<li class="parameter">Trim sequences when the barecode is found.</li>
<li class="parameter">Trim sequences when the barcode is found.</li>
{/if}
{if in_array("--mismatches", $params)}
{assign var="mindex" value=$params|@array_keys:"--mismatches"}
<li class="parameter">Allow {$params[$mindex[0]+1]} mismatch with the barecode.</li>
<li class="parameter">Allow {$params[$mindex[0]+1]} mismatch with the barcode.</li>
{/if}
{if in_array("--partial", $params)}
{assign var="pindex" value=$params|@array_keys:"--partial"}
<li class="parameter">Allow a trucated barecode of {$params[$pindex[0]+1]} nucleotides.</li>
<li class="parameter">Allow a trucated barcode of {$params[$pindex[0]+1]} nucleotides.</li>
{/if}
{assign var="index" value=$params|@array_keys:"--bcfile"}
<li class="parameter">With barecode file containing:
<li class="parameter">With barcode file containing:
<br />
<div class="file-display">{$analyse_results["barecode_file"]["default"]["barecode_file"]}</div>
<div class="file-display">{$analyse_results["barcode_file"]["default"]["barcode_file"]}</div>
</li>
</ul>
{/block}
......@@ -59,7 +59,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{assign var="total" value=0}
{assign var="analyse_results_sorted" value=$analyse_results|@ksort}
{foreach from=$analyse_results_sorted key=sample item=sample_results}
{if $sample != "barecode_file"}
{if $sample != "barcode_file"}
{foreach from=$sample_results["default"] key=type item=value}
{if $type == "R"}
{$total=$total+$value}
......@@ -91,4 +91,4 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
</tr>
</tfoot>
</table>
{/block}
\ No newline at end of file
{/block}
......@@ -18,48 +18,40 @@
import os
from jflow.iotypes import OutputFileList,OutputFile, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from jflow.component import Component
from ng6.analysis import Analysis
from weaver.function import ShellFunction
class SplitBC (Component):
class Splitbc (Analysis):
def define_parameters(self, fastq_file1, barcode_file, matrix_indiv_name, fastq_file2 = [],
def define_parameters(self, fastq_file1, barcode_file, indiv_names, fastq_file2 = [],
rad = None, rad_tag = None, mismatches = None , tag_mismatch = None , trim_barcode = False,
trim_reads2 = False, bol = True, eol = False, partial = None, no_adapt = False):
"""
@param fastq_file1: list of fastq_files path
@param barcode_file: list of barcode file path
@param matrix_indiv_name: list of list of individual names (example : [[], [] , []]). The length of this
matrix_indiv_name must be the same as the length of fastq_file1 (and fastq_file2 if provided)
@param fastq_file2: list of fastq_files path
@param barcode_file: barcode file
@param indiv_names: list of list of individual names (example : [[], [] , []]). The length of this
indiv_names must be the same as the length of fastq_file1 (and fastq_file2 if provided)
@param fastq_file2: list of fastq_files path
@param rad:
@param rad_tag:
...
"""
check_len = len(fastq_file1) == len(matrix_indiv_name) == len(barcode_file)
self.fastq1 = OutputFileList(fastq_file1, Formats.FASTQ)
self.fastq2 = None
if fastq_file2 :
check_len = len(fastq_file1) == len(matrix_indiv_name) == len(fastq_file1) == len(barcode_file)
self.fastq2 = OutputFileList(fastq_file2, Formats.FASTQ)
if not check_len :
raise Exception("length of fastq_file1, fastq_file2, matrix_indiv_name and barcode_file must be the same")
if not isinstance(fastq_file1, list):
raise Exception("fastq_file1 must be a list of file path")
if trim_barcode and trim_reads2 :
raise Exception("you must specify either trim_barcode or trim_reads2, but not both")
raise Exception("you must specify either trim_barcode or trim_reads2, but not both")
if (bol == eol == True ) or ( bol == eol == False) :
raise Exception("one of bol, eol must be specified, but not both")
raise Exception("One of bol, eol must be specified, but not both")
if (rad is not None and rad_tag is None) or (rad is None and rad_tag is not None) :
raise Exception("you must specify rad with rad_tag")
self.barcode_file = OutputFileList(barcode_file)
raise Exception("You must specify rad with rad_tag")
self.indiv_names = indiv_names
self.barcode_file = InputFile(barcode_file)
self.mismatches = mismatches
self.tag_mismatch = tag_mismatch
self.trim_barcode = trim_barcode
......@@ -70,63 +62,137 @@ class SplitBC (Component):
self.no_adapt = no_adapt
self.rad = rad
self.rad_tag = rad_tag
check_len = len(fastq_file1) == len(indiv_names)
self.fastq1 = InputFileList(fastq_file1, Formats.FASTQ)
self.fastq2 = None
if fastq_file2 :
check_len = len(fastq_file1) == len(indiv_names) == len(fastq_file1)
self.fastq2 = InputFileList(fastq_file2, Formats.FASTQ)
if not check_len :
raise Exception("length of fastq_file1, fastq_file2 and indiv_names must be the same")
self.matrix_read1 = []
self.matrix_read2 = []
self.output_read1 = []
self.output_read2 = []
self.stdout = []
self.stdouts = []
self.pools_output_dirs = []
for id, inames in enumerate(matrix_indiv_name) :
outr1 = OutputFileList(self.get_outputs('{basename_woext}_1.fq', inames), Formats.FASTQ)
for id, inames in enumerate(self.indiv_names) :
pool_outdir = "pool_" + str(id)
self.pools_output_dirs.append( os.path.join( self.output_directory , pool_outdir ) )
outr1 = OutputFileList(self.get_outputs(os.path.join( pool_outdir, '{basename_woext}_1.fq'), inames), Formats.FASTQ)
self.matrix_read1.append(outr1)
self.output_read1 += outr1
if self.fastq2 is not None :
outr2 = OutputFileList(self.get_outputs('{basename_woext}_2.fq', inames), Formats.FASTQ)
outr2 = OutputFileList(self.get_outputs(os.path.join(pool_outdir, '{basename_woext}_2.fq'), inames), Formats.FASTQ)
self.matrix_read2.append(outr2)
self.output_read2 += outr2
self.stdout.append(OutputFile(os.path.join(self.output_directory , "splitbc" + str(id) + ".stdout")))
self.stdouts = OutputFileList( self.get_outputs("splitBC_pool{FULL}.stdout", range(len(self.indiv_names))))
def get_version(self):
return "1.0"
def define_analysis(self):
self.name = "Demultiplexing"
self.description = "demultiplexing samples"
self.software = "Splitbc"
self.options = [ "--bcfile", self.barcode_file ]
if self.bol :
self.options.append('--bol')
elif self.eol :
self.options.append('--eol')
if self.mismatches is not None :
self.options.extend(["--mismatches", self.mismatches])
if self.tag_mismatch is not None :
self.options.extend(["--TAG_mismatch", self.tag_mismatch])
if self.partial is not None :
self.options.extend(["--partial", self.partial])
if self.trim_barcode :
self.options.append( "--trim")
elif self.trim_reads2 :
self.options.append( "--trim2")
if self.no_adapt is not None :
self.options.append("--no_adapt")
if self.rad is not None and self.rad_tag is not None :
self.options.extend(["--rad", self.rad, "--radTAG", self.rad_tag])
self.options = ' '.join(self.options)
def post_process(self):
self._add_result_element("barcode_file", "barcode_file", self._save_file(self.barcode_file, "barcode_file") )
rkey = 'R'
if self.output_read2 :
rkey = 'R1+R2'
for id, filepath in enumerate(self.stdouts) :
with open(filepath) as fp :
for line in fp:
ar = line.split('\t')
name = ar[0]
val = ar[1].replace("(*2)", "")
if name in ["Barcode", "ambiguous", "unmatched", "total"] or name.endswith("_2rad") :
continue
self._add_result_element(name, rkey, val)
def process(self):
prefix_r1 = os.path.join(self.output_directory , "%_1.fq")
command = [self.get_exec_path("splitbc.pl"), "--bcfile", "$1", "--prefix-r1", prefix_r1 ]
command_base = [self.get_exec_path("splitbc.pl"), "--bcfile", self.barcode_file]
if self.bol :
command.append('--bol')
command_base.append('--bol')
elif self.eol :
command.append('--eol')
command_base.append('--eol')
if self.mismatches is not None :
command.extend(["--mismatches", self.mismatches])
command_base.extend(["--mismatches", self.mismatches])
if self.tag_mismatch is not None :
command.extend(["--TAG_mismatch", self.tag_mismatch])
command_base.extend(["--TAG_mismatch", self.tag_mismatch])
if self.partial is not None :
command.extend(["--partial", self.partial])
command_base.extend(["--partial", self.partial])
if self.trim_barcode :
command.append( "--trim")
command_base.append( "--trim")
elif self.trim_reads2 :
command.append( "--trim2")
command_base.append( "--trim2")
if self.no_adapt is not None :
command.append("--no_adapt")
command_base.append("--no_adapt")
if self.rad is not None and self.rad_tag is not None :
command.extend(["--rad", self.rad, "--radTAG", self.rad_tag])
if self.fastq2 is not None :
prefix_r2 = os.path.join(self.output_directory , "%_2.fq")
command.extend(["--prefix-r2", prefix_r2, "$2", "$3", '2>&1 >> $4' ])
command = ' '.join(command)
splitbc = ShellFunction(command, cmd_format='{EXE} {IN} {OUT}')
MultiMap(splitbc, inputs=[self.barcode_file, self.fastq1, self.fastq2], outputs=[self.stdout, self.matrix_read1, self.matrix_read2])
else :
command.extend([ "$2", '2>&1 >> $3' ])
command = ' '.join(command)
splitbc = ShellFunction(command, cmd_format='{EXE} {IN} {OUT}')
MultiMap(splitbc, inputs=[self.barcode_file, self.fastq1], outputs=[self.stdout, self.matrix_read1])
command_base.extend(["--rad", self.rad, "--radTAG", self.rad_tag])
for id, fastq1 in enumerate(self.fastq1):
stdout = self.stdouts[id]
outputs_read1 = self.matrix_read1[id]
pool_outdir = self.pools_output_dirs[id]
command = ["mkdir", pool_outdir, ";"] + command_base
command.extend(["--prefix-r1", os.path.join(pool_outdir, "%_1.fq") ])
if self.fastq2 is not None :
fastq2 = self.fastq2[id]
outputs_read2 = self.matrix_read2[id]
command.extend(["--prefix-r2", os.path.join(pool_outdir, "%_2.fq") , "$1", "$2", '2>&1 >> $3'])
command = ' '.join(command)
splitbc = ShellFunction(command, cmd_format='{EXE} {IN} {OUT}')
splitbc(includes = [self.barcode_file] , inputs=[ fastq1, fastq2 ], outputs=[stdout, outputs_read1, outputs_read2])
else :
command.extend([ "$1", '2>&1 >> $2' ])
command = ' '.join(command)
splitbc = ShellFunction(command, cmd_format='{EXE} {IN} {OUT}')
splitbc(includes = [self.barcode_file] , inputs=[ fastq1], outputs=[stdout, outputs_read1])
......@@ -58,11 +58,11 @@ class RADseq (NG6Workflow):
raise ValueError, "Duplicated individual name " + indiv['indiv_name']
indivs_by_name[indiv['indiv_name'] ] = indiv
# prepare fastq files and create a barcode file per pool
barcode_files = []
# prepare fastq files
indiv_names = []
fastq_files_1 = []
fastq_files_2 = []
barcode_file = self.get_temporary_file()
for pool_id, data in pools.iteritems() :
pooldata = data[0]
indivs = data[1]
......@@ -71,17 +71,14 @@ class RADseq (NG6Workflow):
fastq_files_2.append(pooldata['read2'])
# write barcode file
barcode_file = self.get_temporary_file()
barcode_files.append(barcode_file)
inames = []
with open(barcode_file, "w") as ff:
with open(barcode_file, "a") as ff:
for indiv in indivs :
inames.append(indiv['indiv_name'])
ff.write(indiv['indiv_name'] + "\t" + indiv['barcode'] +"\n")
indiv_names.append(inames)
splitbc = self.add_component("SplitBC", [ fastq_files_1,barcode_files, indiv_names, fastq_files_2,
splitbc = self.add_component("Splitbc", [ fastq_files_1,barcode_file, indiv_names, fastq_files_2,
rad, rad_tag, self.args['mismatches'], self.args['tag_mismatch'],
self.args['trim_barcode'], self.args['trim_reads2']])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment