Skip to content
Snippets Groups Projects
svsnakemake_utils.py 3.05 KiB
Newer Older
class SnakemakeUtils:

    def __init__(self, tools, samples, chr_batches):
        self.tools = tools
Floreal Cabanettes's avatar
Floreal Cabanettes committed
        self.samples = samples
        self.tools_sv_types = {}
        self.chr_batches = chr_batches

    def get_tool_prefix(self, tool, chrom, svtype):
        common_prefix = "_".join([tool, chrom])
        if tool == "pindel":
            tool_suffix = self.get_pindel_suffix(svtype)
        elif tool == "lumpy":
            tool_suffix = self.get_lumpy_suffix(svtype)
        elif tool == "delly":
            tool_suffix = self.get_delly_suffix(svtype)
        elif tool == "genomestrip":
            tool_suffix = self.get_genomestrip_suffix(svtype)
        return common_prefix + tool_suffix

    @staticmethod
    def get_delly_suffix(svtype):
        return "_" + svtype + ".bcf"

    @staticmethod
    def get_lumpy_suffix(svtype):
        return ".vcf.gz"

    @staticmethod
    def get_genomestrip_suffix(svtype):
        if svtype != "DEL":
            print("Genomestrip works only for deletions")
            exit(1)
        return ".vcf.gz"

    @staticmethod
    def get_pindel_suffix(svtype):
        SV_TYPE_TO_PINDEL = {
            "DEL": "D",
            "DUP": "TD",
            "INV": "INV",
            "RPL": "RPL"}
        if svtype not in SV_TYPE_TO_PINDEL:
            raise KeyError('Unsupported variant type')
        pindel_type = SV_TYPE_TO_PINDEL[svtype]
        outfile = "_" + pindel_type + ".gz"
        return outfile

    def getToolOuputFile(self, wildcards):
        tool_prefix = self.get_tool_prefix(wildcards.tool,
                                           wildcards.chrom, wildcards.svtype)
        return {'tooloutput': os.path.join(wildcards.batch, wildcards.tool,
                                           tool_prefix)}

    def set_tool_svtypes(self, tool, svtypes):
        self.tools_sv_types[tool] = svtypes

    def get_tools_for_svtype(self, wildcards):
        inputs = []
        for tool in self.tools:
            if wildcards.svtype in self.tools_sv_types[tool]:
Thomas Faraut's avatar
Thomas Faraut committed
                inputs.append("{batch}/parse/{svtype}/{tool}/{tool}_{chrom}_{svtype}_parsed.vcf.gz".format(
                    tool=tool, chrom=wildcards.chrom, svtype=wildcards.svtype, batch=wildcards.batch
Floreal Cabanettes's avatar
Floreal Cabanettes committed

    def get_pindel_chr_batches(self, wildcards):
        inputs = {"D": [],
                  "INV": [],
                  "TD": []}
        for batch in self.chr_batches[wildcards.chrom]:
            for svtype in inputs:
                inputs[svtype].append("{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_{svtype}.gz".format(
                    batch=wildcards.batch, chrom=wildcards.chrom, chrbatch="-".join(map(str, batch)),
                    svtype=svtype
                ))
        return inputs

Floreal Cabanettes's avatar
Floreal Cabanettes committed
    def get_inputs_bams(self, wildcards):
        inputs = []
        indexes = []
        for sample in self.samples[wildcards.batch]:
            bam = "data/bams/{sample}.bam".format(sample=sample)
            inputs.append(bam)
            indexes.append(bam + ".bai")
        return {"bams": inputs, "idxs": indexes}