Skip to content
Snippets Groups Projects
Commit 042230b9 authored by Thomas Faraut's avatar Thomas Faraut
Browse files

adding a Variant class

parent 58c1e9cd
No related branches found
No related tags found
No related merge requests found
build_pop.py 100644 → 100755
......@@ -296,7 +296,11 @@ def _build_vcf_header(vcf_file, prg_path, tmp_dir, nb_inds):
raise ExecException("ERROR: template file not found in program directory.")
def build_genotypes_vcf_list(deletions: dict, inversions: dict, duplications:dict, output_vcf, haploid, force_polymorphism, nb_inds,
def build_genotypes_vcf_list(deletions: dict,
inversions: dict,
duplications: dict,
output_vcf, haploid,
force_polymorphism, nb_inds,
tmp_dir, prg_path):
"""
Build VCF file describing genotypes for each individual (and the associated python dictionary)
......@@ -413,6 +417,7 @@ def load_genotypes_from_file(genotypes_file):
variants = VariantFile(genotypes_file)
genotypes_for_inds_DEL = {}
genotypes_for_inds_INV = {}
genotypes_for_inds_DUP = {}
nb_inds = 0
for variant in variants:
chrm = variant.chrom
......@@ -427,6 +432,8 @@ def load_genotypes_from_file(genotypes_file):
genotypes_for_inds = genotypes_for_inds_DEL
elif type_v == "INV":
genotypes_for_inds = genotypes_for_inds_INV
elif type_v == "DUP":
genotypes_for_inds = genotypes_for_inds_DUP
else:
print("ERROR: Variant type not supported: %s. Ignoring this line..." % type_v)
if genotypes_for_inds is not None:
......@@ -438,7 +445,7 @@ def load_genotypes_from_file(genotypes_file):
"genotypes": gt
}
nb_inds = len(gt)
return nb_inds, genotypes_for_inds_DEL, genotypes_for_inds_INV
return nb_inds, genotypes_for_inds_DEL, genotypes_for_inds_INV, genotypes_for_inds_DUP
def _compute_keeped_genomic_regions(svs, svs_infos, haploid):
......
#!/usr/bin/env python3
import sys
import argparse
from pysam import VariantFile
from collections import defaultdict
def reverse(seq):
"""
Returns a reversed string
Borrowed from https://gist.github.com/hurshd0
"""
return seq[::-1]
def complement(seq):
"""Returns a complement DNA sequence"""
complement_dict = {'A': 'T', 'C': 'G', 'T': 'A', 'G': 'C'}
seq_list = list(seq)
# I can think of 3 ways to do this but first is faster I think ???
# first list comprehension
seq_list = [complement_dict[base] for base in seq_list]
# second complicated lambda
# seq_list = list(map(lambda base: complement_dict[base], seq_list))
# third easy for loop
# for i in range(len(seq_list)):
# seq_list[i] = complement_dict[seq_list[i]]
return ''.join(seq_list)
class Fragment:
"""
A fragment is a chromosomal segment witha start and end are
1-based for compatibility wit vcf format
"""
def __init__(self, chrom, start, end):
self.chrom = chrom
self.start = start
self.end = end
self.type= "REF"
@property
def chrom(self):
return self.chrom
@property
def chrom(self):
return self.start
@property
def end(self):
return self.end
@property
def type(self):
return self.type
def size(self):
return self.end - self.start + 1
def get_seq(self, reference):
return reference[self.start-1:self.end]
class Variant(Fragment):
def __init__(self, chrom, start, end, ident, type):
Fragment.__init__(self, chrom, start, end)
self.id = ident
self.type = type
def get_seq(self, reference):
if type == "DEL":
return ""
elif type == "INV":
return reverse(complement(reference[self.start-1:self.end]))
elif type == "DUP":
template = reference[self.start-1:self.end]
return template + template
def read_vcf(vcf_file):
# opening the vcf file
vcf_in = VariantFile(vcf_file)
fragments = []
chromosomes = defaultdict(list)
offset = 1
for record in vcf_in.fetch():
fragment = Fragment(record.chrom, offset, record.start-1)
print("ref: %d\t%d" % (offset, record.start-1))
print("%s: %d\t%d" % (record.id,record.start, record.stop))
offset = record.stop + 1
def main():
parser = argparse.ArgumentParser(description="Reformat data for plink",
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-v', '--vcf', required=True,
help='the vcf file')
args = parser.parse_args()
read_vcf(args.vcf)
if __name__ == '__main__':
sys.exit(main())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment