Commit 9a7cb4ab authored by Penom Nom's avatar Penom Nom
Browse files

splitbc for radseq pipeline (with test data).

parent c8257962
......@@ -75,6 +75,7 @@ tmp_directory = <path>/tmp
#bismark_genome_preparation = /nosave/software/bismark_v0.8.3/bismark_genome_preparation
#bismark_methylation_extractor = /nosave/software/bismark_v0.8.3/bismark_methylation_extractor
#fastx_reverse_complement = /usr/bin/fastx_reverse_complement
#splitbc = /usr/bin/splitbc.pl
# Set cluster parameters to some components
[components]
......@@ -90,6 +91,10 @@ phix_bwa = /bank/bwadb/phi.fa
ecoli_bwa = /bank/bwadb/ecoli536
yeast_bwa = /bank/bwadb/yeast.nt
# rad and rad tag for radseq data
sbfI_rad = CCTGCAGG
sbfI_radtag = TGCAGG
[454_mids]
MID1 = ACGAGTGCGT
MID2 = ACGCTCGACA
......
......@@ -24,15 +24,11 @@ from ng6.ng6workflow import NG6Workflow
class RADseq (NG6Workflow):
def __init__(self, args={}, id=None, function= "process", parameters_section="parameters", add_run_params=True):
NG6Workflow.__init__(self, args, id, function, parameters_section, add_run_params = False)
def process(self):
# handle if run name have spaces
#run_name = "_".join(self.runobj.name.split())
# add raw files to the run
#addrawfiles = self.add_component("AddRawFiles", [self.runobj, self.args["read_1"], self.args["compression"]])
# make some statistics on raw file
#fastqc = self.add_component("FastQC", [self.args["read_1"], False, False, run_name+"_fastqc.tar.gz"])
# group all individual by pool
pools = {}
for p in self.args['pool'] :
......@@ -52,18 +48,20 @@ class RADseq (NG6Workflow):
raise ValueError, "Duplicated individual name " + indiv['indiv_name']
indivs_by_name[indiv['indiv_name'] ] = indiv
# write barcode file for splitbc
# write barcode file
barcode_file = self.get_temporary_file()
with open(barcode_file, "w") as ff:
for i in barcodes :
ff.write(i[0] + "\t" + i[1] + "\n")
# process each pairs of fastq
# ----- DEMULTIPLEX -----
# process each pairs of fastq for each pool
for pool_id, data in pools.iteritems() :
pooldata = data[0]
splitbc = self.add_component("SplitBC", [ pooldata['read1'], pooldata['read2'] if pooldata['read2'] else None,
indivs_by_name.keys(), barcode_file, self.args['enzyme'], self.args['mismatches'], self.args['tag_mismatch'],
self.args['trim'], self.args['forward']], component_prefix = pool_id)
print splitbc.output_read1
#ustacks = self.add_component("Ustacks", [self.args["read_1"]])
#cstacks = self.add_component("Cstacks", [ustacks.alleles, ustacks.snps, ustacks.tags, self.args["catalog_mismatches"]])
......@@ -18,44 +18,52 @@
import os
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.iotypes import OutputFileList,OutputFile, InputFile, InputFileList, Formats
from weaver.function import PythonFunction, ShellFunction
from weaver.function import ShellFunction
class SplitBC (Component):
"""
/home/mbernard/save/script/libPerl/INRA/splitbc.pl $f1 $f2 --mismatches $mismatch
--bcfile $barcode --bol --rad $rad_seq --radTAG $tag_seq --TAG_mismatch $tagMM
--trim --prefix-r1 ${prefix}_%_1.fq --prefix-r2 ${prefix}_%_2.fq
dir=`dirname ${prefix}_unmatched_1.fq`
"""
def define_parameters(self, fastq_file1, fastq_file2 = None, barcode_file, rad,
mismatch, rad_tag, tag_mismatch, trim = True, forward = True ):
self.fastq_file1 = InputFileList(fastq_file1, Formats.FASTQ)
self.prefix_fq1 = "toto"
self.fastq_file2 = None
def define_parameters(self, fastq_file1, fastq_file2, indiv_names, barcode_file, enzyme,
mismatches, tag_mismatch, trim = True, forward = True ):
self.indiv_names = indiv_names
self.fastq1 = InputFile(fastq_file1, Formats.FASTQ)
self.fastq2 = None
if fastq_file2 is not None:
self.fastq_file2 = InputFileList(fastq_file2, Formats.FASTQ)
self.prefix_fq2 = "toto2"
self.fastq2 = InputFile(fastq_file2, Formats.FASTQ)
self.barcode_file = InputFile(barcode_file, Formats.FASTQ)
self.mismatch = mismatc
self.rad = rad
self.rad_tag = rad_tag
self.mismatches = mismatches
self.enzyme = enzyme
self.rad = self.get_resource(self.enzyme + '_rad')
self.rad_tag = self.get_resource(self.enzyme + '_radtag')
self.tag_mismatch = tag_mismatch
self.trim = trim
self.forward = forward
self.output_read1 = OutputFileList(self.get_outputs('{basename_woext}.fq', self.read1), Formats.BAM)
self.databank = OutputFile(os.path.join(self.output_directory, os.path.basename(input_fasta)))
self.stdout = OutputFile(os.path.join(self.output_directory, "bwaindex.stdout"))
self.stderr = OutputFile(os.path.join(self.output_directory, "bwaindex.stderr"))
self.prefix_r1 = os.path.join(self.output_directory , "%_1.fq")
self.prefix_r2 = os.path.join(self.output_directory , "%_2.fq")
self.output_read1 = OutputFileList(self.get_outputs('{basename_woext}_1.fq', self.indiv_names), Formats.FASTQ)
self.output_read2 = OutputFileList(self.get_outputs('{basename_woext}_2.fq', self.indiv_names), Formats.FASTQ)
self.stdout = OutputFile(os.path.join(self.output_directory, "splitbc.stdout"))
def process(self):
bwaindex = PythonFunction(bwa_index, cmd_format="{EXE} {ARG} {IN} {OUT}")
bwaindex(inputs=self.input_fasta, outputs=[self.databank, self.stdout, self.stderr], arguments=[self.get_exec_path("bwa"), self.algorithm])
\ No newline at end of file
strand = "--bol" if self.forward else "--eol"
if self.fastq2 is not None :
command = " ".join([
self.get_exec_path("splitbc"), self.fastq1, self.fastq2, "--mismatches", self.mismatches,
"--bcfile", self.barcode_file, "--rad", self.rad, "--radTAG", self.rad_tag, "--TAG_mismatch", self.tag_mismatch,
"--trim" if self.trim else "", "--prefix-r1", self.prefix_r1, "--prefix-r2", self.prefix_r2, strand, ' 2>&1 > ', self.stdout
])
splitbc = ShellFunction(command, cmd_format='{EXE} {IN} {OUT}')
splitbc(inputs =[self.fastq1, self.fastq2], outputs = [self.output_read1, self.output_read2, self.stdout])
else :
command = " ".join([
self.get_exec_path("splitbc"), self.fastq1,"--mismatches", self.mismatches,
"--bcfile", self.barcode_file, "--rad", self.rad, "--radTAG", self.rad_tag, "--TAG_mismatch", self.tag_mismatch,
"--trim" if self.trim else "", "--prefix-r1", self.prefix_r1, strand, ' 2>&1 > ', self.stdout
])
splitbc = ShellFunction(command, cmd_format='{EXE} {IN} {OUT}')
splitbc(inputs = self.fastq1, outputs = [self.output_read1, self.stdout])
......@@ -20,7 +20,7 @@
--enzyme
sbfI
# individual description
# individuals description
--individu
id=1
name=RAD1
......@@ -45,32 +45,15 @@ name=RAD4
pool_id=OMYMUNE
barcode=ACTGC
# pools description
--pool
id=OMYMUNE
read1=/work/sbsuser/jflow/Project_OMYMUNE.305/Run_pool-test.5000/RawData/pool-test_NoIndex_L001_R1.fastq.gz
read2=/work/sbsuser/jflow/Project_OMYMUNE.305/Run_pool-test.5000/RawData/pool-test_NoIndex_L001_R2.fastq.gz
#read1=/work/sbsuser/jflow/dev/ng6/nG6/workflows/radseq/data/omymune_r1.fq.gz
#read2=/work/sbsuser/jflow/dev/ng6/nG6/workflows/radseq/data/omymune_r2.fq.gz
read1=workflows/radseq/data/omymune_r1.fq.gz
read2=workflows/radseq/data/omymune_r2.fq.gz
# Run description
--admin-login
CTD
--project-id
7
--name
ART
--description
AZERTAZERAZE
--date
27/05/2014
--data-nature
"RAD-DNA"
--sequencer
HiSeq
--species
"Truffe-noire"
--type
"1/8 Flowcell A - Lane 1"
inabihoudine
......@@ -44,10 +44,12 @@ pool.id.help = Pool identifier
pool.id.required = True
pool.read1.name = read1
pool.read1.flag = read1
pool.read1.type = localfile
pool.read1.help = Read1 fastq file
pool.read1.required = True
pool.read2.name = read2
pool.read2.flag = read2
pool.read2.type = localfile
pool.read2.help = Read2 fastq file
individu.name = individu
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment