build_indexes.py 2.59 KB
Newer Older
Floreal Cabanettes's avatar
Floreal Cabanettes committed
1
2
3
4
5
6
7
8
#!/usr/bin/env python3

"""Prepare PAF file

Short desc: Prepare the PAF file from minimap output to be loaded by the program
Details: change coordinates of matches to be bounded one to another

Usage:
9
    prepare_paf.py -q FASTA1 -t FASTA2 -o OUT [-r NAME1] [-u NAME2]
Floreal Cabanettes's avatar
Floreal Cabanettes committed
10
11
12
    prepare_paf.py -v | --version

Options:
13
14
    -q --query=FASTA1    Query fasta file compared with minimap
    -t --target=FASTA2    Target fasta file compared with minimap
15
16
    -r --query-name=NAME1   Query name
    -u --target-name=NAME2  Target name
17
    -o --output=OUT Output directory
Floreal Cabanettes's avatar
Floreal Cabanettes committed
18
19
20
21
22
23
24
25
    -h --help   Show this screen
    -v --version    Show version
"""

__NAME__ = "PreparePAF"
__VERSION__ = 0.1

import os
26
from shutil import copyfile
Floreal Cabanettes's avatar
Floreal Cabanettes committed
27
28
29
30
31
from docopt import docopt
from collections import OrderedDict


class Fasta:
32
    def __init__(self, fasta, name=None):
Floreal Cabanettes's avatar
Floreal Cabanettes committed
33
34
        self.fasta = fasta
        self.fai = fasta + ".fai"
35
        self.name = os.path.splitext(os.path.basename(fasta))[0] if name is None else name
Floreal Cabanettes's avatar
Floreal Cabanettes committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
        self.contigs = OrderedDict()
        self.total_length = 0
        self.__load()

    def __load(self):
        start = 0
        with open(self.fai, "r") as fai_file:
            for line in fai_file:
                parts = line.strip("\n").split("\t")
                length = int(parts[1])
                self.contigs[parts[0]] = {
                    "length": length,
                    "start": start
                }
                start += length
                self.total_length += length

    def get_contig(self, contig):
        return self.contigs[contig]

    def build_index(self, filename):
        with open(filename, "w") as idx:
58
            idx.write(self.name + "\n")
Floreal Cabanettes's avatar
Floreal Cabanettes committed
59
60
61
62
            for contig, props in self.contigs.items():
                idx.write(contig + "\t" + str(props["length"]) + "\n")


63
def init(output_d, query, target, query_name=None, target_name=None):
64
65
66
67
    query = Fasta(query, query_name)
    target = Fasta(target, target_name)
    i = 0
    for fasta in [query, target]:
68
        idx_file = os.path.join(output_d, "query.idx" if i == 0 else "target.idx")
69
70
71
72
        fasta.build_index(idx_file)
        i += 1


Floreal Cabanettes's avatar
Floreal Cabanettes committed
73
74
75
76
77
if __name__ == '__main__':
    args = docopt(__doc__)
    if args["--version"]:
        print(__NAME__, __VERSION__)
    else:
78
79
80
81
        if not os.path.exists(args["--query"] + ".fai"):
            raise Exception("Fasta file %s is not indexed!" % args["--query"])
        if not os.path.exists(args["--target"] + ".fai"):
            raise Exception("Fasta file %s is not indexed!" % args["--target"])
82
        init(args["--output"], args["--query"], args["--target"], args["--query-name"], args["--target-name"])