cnv2bed.py 2.32 KB
Newer Older
1
2
3
4
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import sys
5
import argparse
6
from bedify import filter_out
7

8
9
10
11
def main(input_f, out, tag, len_min, len_max):
    """
        Read a structural variant file generated from CNVnator and 
        transform it in a bed-like format.
12

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
        :param input_f: pathway to structural variant file
        :param out: pathway of the output file
        :param tag: tag for each variant
        :param len_min : minimum length of a SV
        :param len_max : maximum length of a SV

        :type input_f: string (pathway)
        :type out: string (pathway)
        :type tag: string
        :type len_min: integer
        :type len_max: integer

        :return: none
        
        :output: bed-like file. The columns corresponds to the following:
            -CHR
            -START
            -END
            -SVR_ID
            -LENGTH
            -MEMBERS
            -SUPPORT   (1 col/ind)

        :Example:

        >>> main("/home/sangoku/kamehameha_call", "kame.bed", "DBZ", 50, 9000, ["Krilin","Freezer","Boo"]) 
    """
    with open(input_f, 'r') as SV_list, open(out,'w') as output_file:
        for c,line in enumerate(SV_list):
            call_id = tag + "-" + str(c+1)
            results = re.match("([a-z]+)\s(?:chr)?([0-9A-Z\.]+):([0-9]+)-([0-9]+)\s([0-9e\.\+]+)\s([0-9.]+)", line)
            if results:
                sv_type, chromosome, start, end, length, support = results.groups()
46
                new_sv = [chromosome, start, end, call_id, sv_type, length, "/", support]
47
48
49
50
51
                if filter_out(new_sv, len_min, len_max):
                    pass
                else:
                    new_sv = "\t".join(new_sv) + "\n"
                    output_file.write(new_sv)
52
            
53
54
55
56
57
58
59
60
61
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', help='outfile of pindel/delly/breakdancer to translate to a bedlike format')
    parser.add_argument('--output_file', help='bedlike translation')
    parser.add_argument('--tag', help='tag of each structural variant')
    parser.add_argument('--min_length', help='min length of a variant', type=int)
    parser.add_argument('--max_length', help='max length of a variant', type=int)
    args = parser.parse_args()
    main(args.input_file, args.output_file, args.tag, args.min_length, args.max_length)