Commit c8257962 authored by Claire Kuchly's avatar Claire Kuchly
Browse files

add splitbc function

parent 6c2870fd
......@@ -33,6 +33,37 @@ class RADseq (NG6Workflow):
# make some statistics on raw file
#fastqc = self.add_component("FastQC", [self.args["read_1"], False, False, run_name+"_fastqc.tar.gz"])
ustacks = self.add_component("Ustacks", [self.args["read_1"]])
cstacks = self.add_component("Cstacks", [ustacks.alleles, ustacks.snps, ustacks.tags, self.args["catalog_mismatches"]])
\ No newline at end of file
# group all individual by pool
pools = {}
for p in self.args['pool'] :
if pools.has_key(p["id"]) :
raise ValueError, "Duplicated pool id." + p['id']
pools[p["id"]] = (p, [])
barcodes = [] # array of tuples
indivs_by_name = {}
for indiv in self.args["individu"]:
pool_id = indiv['pool_id']
if not pools.has_key(pool_id):
raise ValueError, "The pool id " + pool_id + " does not exists in (individual " + indiv['indiv_name'] + ")"
pools[pool_id][1].append(indiv)
barcodes.append((indiv['indiv_name'],indiv['barcode']))
if indivs_by_name.has_key(indiv['indiv_name']) :
raise ValueError, "Duplicated individual name " + indiv['indiv_name']
indivs_by_name[indiv['indiv_name'] ] = indiv
# write barcode file for splitbc
barcode_file = self.get_temporary_file()
with open(barcode_file, "w") as ff:
for i in barcodes :
ff.write(i[0] + "\t" + i[1] + "\n")
# process each pairs of fastq
for pool_id, data in pools.iteritems() :
pooldata = data[0]
splitbc = self.add_component("SplitBC", [ pooldata['read1'], pooldata['read2'] if pooldata['read2'] else None,
indivs_by_name.keys(), barcode_file, self.args['enzyme'], self.args['mismatches'], self.args['tag_mismatch'],
self.args['trim'], self.args['forward']], component_prefix = pool_id)
print splitbc.output_read1
#ustacks = self.add_component("Ustacks", [self.args["read_1"]])
#cstacks = self.add_component("Cstacks", [ustacks.alleles, ustacks.snps, ustacks.tags, self.args["catalog_mismatches"]])
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# globals
--enzyme
sbfI
# individual description
--individu
id=1
name=RAD1
pool_id=OMYMUNE
barcode=AAGGG
--individu
id=2
name=RAD2
pool_id=OMYMUNE
barcode=ACCAT
--individu
id=3
name=RAD3
pool_id=OMYMUNE
barcode=ACGTA
--individu
id=4
name=RAD4
pool_id=OMYMUNE
barcode=ACTGC
--pool
id=OMYMUNE
read1=/work/sbsuser/jflow/Project_OMYMUNE.305/Run_pool-test.5000/RawData/pool-test_NoIndex_L001_R1.fastq.gz
read2=/work/sbsuser/jflow/Project_OMYMUNE.305/Run_pool-test.5000/RawData/pool-test_NoIndex_L001_R2.fastq.gz
#read1=/work/sbsuser/jflow/dev/ng6/nG6/workflows/radseq/data/omymune_r1.fq.gz
#read2=/work/sbsuser/jflow/dev/ng6/nG6/workflows/radseq/data/omymune_r2.fq.gz
# Run description
--admin-login
CTD
--project-id
7
--name
ART
--description
AZERTAZERAZE
--date
27/05/2014
--data-nature
"RAD-DNA"
--sequencer
HiSeq
--species
"Truffe-noire"
--type
"1/8 Flowcell A - Lane 1"
......@@ -31,38 +31,78 @@ description = RADseq data analysis workflow
# .action [store]: the basic type of action to be taken (store|append)
#
[parameters]
read_1.name = read_1
read_1.flag = --read-1
read_1.help = Which read1 files should be used
read_1.required = True
read_1.action = append
individus.name = individus
individus.flag = --individu
individus.help = Define each individual
individus.type = multiple
individus.action = append
individus.id.name = id
individus.id.flag = id
individus.id.help = Id of individual
individus.id.required = True
individus.id.type = int
individus.indiv_name.name = name
individus.indiv_name.flag = name
individus.indiv_name.help = Individual name
individus.indiv_name.required = True
individus.indiv_name.type = str
individus.run.name = run name
individus.run.flag = run
individus.run.help = Individual run name
individus.run.required = True
individus.run.type = str
individus.barcode.name = barcode
individus.barcode.flag = barcode
individus.barcode.help = Individual barcode
individus.barcode.required = True
individus.barcode.type = str
pool.name = pool
pool.flag = --pool
pool.help = Sequence pool
pool.action = append
pool.type = multiple
pool.required = True
pool.id.name = pool name
pool.id.flag = id
pool.id.help = Pool identifier
pool.id.required = True
pool.read1.name = read1
pool.read1.flag = read1
pool.read1.help = Read1 fastq file
pool.read1.required = True
pool.read2.name = read2
pool.read2.flag = read2
pool.read2.help = Read2 fastq file
individu.name = individu
individu.flag = --individu
individu.help = Define each individual
individu.type = multiple
individu.required = True
individu.action = append
individu.id.name = id
individu.id.flag = id
individu.id.help = Id of individual
individu.id.required = True
individu.id.type = int
individu.indiv_name.name = name
individu.indiv_name.flag = name
individu.indiv_name.help = Individual name
individu.indiv_name.required = True
individu.indiv_name.type = str
individu.pool_id.name = pool id
individu.pool_id.flag = pool_id
individu.pool_id.help = Wich pool does this idividual belongs to
individu.pool_id.required = True
individu.barcode.name = barcode
individu.barcode.flag = barcode
individu.barcode.help = Individual barcode
individu.barcode.required = True
individu.barcode.type = str
enzyme.name = enzyme
enzyme.flag = --enzyme
enzyme.help = Resctriction enzyme name
enzyme.required = True
enzyme.choices = sbfI|toto|titi
tag_mismatch.name = tag mismatch
tag_mismatch.flag = --tag-mismatch
tag_mismatch.help = Max. number of mismatches allowed in the radTAG sequence. default is 0
tag_mismatch.default = 0
mismatches.name = mismatches
mismatches.flag = --mismatches
mismatches.help = Max. number of mismatches allowed.
mismatches.default = 1
forward.name = forward
forward.flag = --forward
forward.help = Match barcode at the begening of the sequence (5' 3')
forward.default = True
forward.type = bool
trim.name = trim
trim.flag = --trim
trim.help = Should the barecode be trimmed.
trim.default = True
trim.type = bool
catalog_mismatches.name = catalog_mismatches
catalog_mismatches.flag = --catalog-mismatches
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment