__init__.py 2.81 KB
Newer Older
Jerome Mariette's avatar
Jerome Mariette committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#
# Copyright (C) 2012 INRA
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import os
import glob
import sys

Penom Nom's avatar
Penom Nom committed
22
from ng6.ng6workflow import NG6Workflow, BasicNG6Workflow
Jerome Mariette's avatar
Jerome Mariette committed
23
24


Penom Nom's avatar
Penom Nom committed
25
class RADseq (BasicNG6Workflow):
26
    
Jerome Mariette's avatar
Jerome Mariette committed
27
    def process(self):
Jerome Mariette's avatar
Jerome Mariette committed
28
        
Claire Kuchly's avatar
Claire Kuchly committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
        # group all individual by pool
        pools = {}
        for p in self.args['pool'] :
            if pools.has_key(p["id"]) :
                raise ValueError, "Duplicated pool id." + p['id']
            pools[p["id"]] = (p, [])
        
        barcodes = [] # array of tuples 
        indivs_by_name = {}
        for indiv in  self.args["individu"]:
            pool_id = indiv['pool_id']
            if not pools.has_key(pool_id):
                raise ValueError, "The pool id " + pool_id + " does not exists in (individual " + indiv['indiv_name'] + ")"
            pools[pool_id][1].append(indiv)
            barcodes.append((indiv['indiv_name'],indiv['barcode']))
            if indivs_by_name.has_key(indiv['indiv_name']) :
                raise ValueError, "Duplicated individual name " + indiv['indiv_name']
            indivs_by_name[indiv['indiv_name'] ] = indiv
        
48
        # write barcode file
Claire Kuchly's avatar
Claire Kuchly committed
49
50
51
52
        barcode_file = self.get_temporary_file()
        with open(barcode_file, "w") as ff:
            for i in barcodes :
                ff.write(i[0] + "\t" + i[1] + "\n")
53
54
55
        
        # ----- DEMULTIPLEX -----
        # process each pairs of fastq for each pool
Claire Kuchly's avatar
Claire Kuchly committed
56
57
58
59
60
        for pool_id, data in pools.iteritems() :
            pooldata = data[0]
            splitbc = self.add_component("SplitBC", [ pooldata['read1'], pooldata['read2'] if pooldata['read2'] else None,
                              indivs_by_name.keys(), barcode_file, self.args['enzyme'], self.args['mismatches'], self.args['tag_mismatch'],
                              self.args['trim'], self.args['forward']], component_prefix = pool_id)
61
            
Claire Kuchly's avatar
Claire Kuchly committed
62
        #ustacks = self.add_component("Ustacks", [self.args["read_1"]])
Maria Bernard's avatar
Maria Bernard committed
63
        ustacks = self.add_component("Ustacks" , [], {"indiv_dic": self.args["individu"], "read1_files" : splitbc.output_read1 , "max_locus" : 3 } , component_prefix = "ustacks")
Maria Bernard's avatar
Maria Bernard committed
64
        
Claire Kuchly's avatar
Claire Kuchly committed
65
66
        #cstacks = self.add_component("Cstacks", [ustacks.alleles, ustacks.snps, ustacks.tags, self.args["catalog_mismatches"]])