Newer
Older
def __init__(self, tools, samples, chr_batches):
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def get_tool_prefix(self, tool, chrom, svtype):
common_prefix = "_".join([tool, chrom])
if tool == "pindel":
tool_suffix = self.get_pindel_suffix(svtype)
elif tool == "lumpy":
tool_suffix = self.get_lumpy_suffix(svtype)
elif tool == "delly":
tool_suffix = self.get_delly_suffix(svtype)
elif tool == "genomestrip":
tool_suffix = self.get_genomestrip_suffix(svtype)
return common_prefix + tool_suffix
@staticmethod
def get_delly_suffix(svtype):
return "_" + svtype + ".bcf"
@staticmethod
def get_lumpy_suffix(svtype):
return ".vcf.gz"
@staticmethod
def get_genomestrip_suffix(svtype):
if svtype != "DEL":
print("Genomestrip works only for deletions")
exit(1)
return ".vcf.gz"
@staticmethod
def get_pindel_suffix(svtype):
SV_TYPE_TO_PINDEL = {
"DEL": "D",
"DUP": "TD",
"INV": "INV",
"RPL": "RPL"}
if svtype not in SV_TYPE_TO_PINDEL:
raise KeyError('Unsupported variant type')
pindel_type = SV_TYPE_TO_PINDEL[svtype]
outfile = "_" + pindel_type + ".gz"
return outfile
def getToolOuputFile(self, wildcards):
tool_prefix = self.get_tool_prefix(wildcards.tool,
wildcards.chrom, wildcards.svtype)
return {'tooloutput': os.path.join(wildcards.batch, wildcards.tool,
tool_prefix)}
def set_tool_svtypes(self, tool, svtypes):
self.tools_sv_types[tool] = svtypes
def get_tools_for_svtype(self, wildcards):
inputs = []
for tool in self.tools:
if wildcards.svtype in self.tools_sv_types[tool]:
inputs.append("{batch}/parse/{svtype}/{tool}/{tool}_{chrom}_{svtype}_parsed.vcf.gz".format(
tool=tool, chrom=wildcards.chrom, svtype=wildcards.svtype, batch=wildcards.batch
def get_pindel_chr_batches(self, wildcards):
inputs = {"D": [],
"INV": [],
"TD": []}
for batch in self.chr_batches[wildcards.chrom]:
for svtype in inputs:
inputs[svtype].append("{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_{svtype}.gz".format(
batch=wildcards.batch, chrom=wildcards.chrom, chrbatch="-".join(map(str, batch)),
svtype=svtype
))
for sample in self.samples[wildcards.batch]:
bam = "data/bams/{sample}.bam".format(sample=sample)
inputs.append(bam)
indexes.append(bam + ".bai")
return {"bams": inputs, "idxs": indexes}