Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • svdetection/svlib
1 result
Show changes
Commits on Source (4)
from __future__ import print_function
import sys
import os
import collections
......
import sys
import gzip
import shutil
import tempfile
import os
import multiprocessing
from functools import partial
from subprocess import call
import random
import string
from pysam import AlignmentFile, tabix_compress, tabix_index
from svtyper import singlesample as single
def fetchId(bamfile):
"""
Fetch sample id in a bam file
:param bamfile: the bam file
:type bamfile: file
:return: sample name
:rtype: str
"""
bamfile_fin = AlignmentFile(bamfile, 'rb')
name = bamfile_fin.header['RG'][0]['SM']
bamfile_fin.close()
return name
def get_bamfiles(bamlist):
with open(bamlist, "r") as fin:
bamfiles = [os.path.abspath(f.strip()) for f in fin]
return bamfiles
def random_string(length=10):
ramdom_str = [random.choice(string.ascii_uppercase + string.digits)
for _ in range(length)]
return ''.join(ramdom_str)
def genotype_multiple_samples(bamlist, vcf_in, vcf_out, cores=1):
bamfiles = get_bamfiles(bamlist)
if vcf_out == sys.stdout:
tmp_dir = tempfile.mkdtemp()
else:
tmp_dir = os.path.join(os.path.dirname(vcf_out), "tmp" + random_string())
os.mkdir(tmp_dir)
if vcf_in == sys.stdin:
vcf_in = single.dump_piped_vcf_to_file(vcf_in, tmp_dir)
elif vcf_in.endswith(".gz"):
vcf_in_gz = vcf_in
vcf_in = os.path.join(tmp_dir, os.path.basename(vcf_in[:-3]))
with gzip.open(vcf_in_gz, 'rb') as f_in, open(vcf_in, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
pool = multiprocessing.Pool(processes=cores)
launch_ind = partial(genotype_single_sample,
vcf_in=vcf_in, out_dir=tmp_dir)
vcf_files = pool.map(launch_ind, bamfiles)
merge_cmd = "bcftools merge -m id "
if vcf_out != sys.stdout:
merge_cmd += "-O z " + " -o " + vcf_out + " "
merge_cmd += " ".join(vcf_files)
exit_code = call(merge_cmd, shell=True)
if exit_code == 0:
if vcf_out != sys.stdout:
tabix_index(vcf_out, force=True, preset="vcf")
shutil.rmtree(tmp_dir)
else:
print("Failed: bcftools merge exits with status %d" % exit_code)
exit(1)
def genotype_single_sample(bam, vcf_in, out_dir):
lib_info_json = bam + ".json"
sample = fetchId(bam)
out_vcf = os.path.join(out_dir, sample + ".gt.vcf")
with open(vcf_in, "r") as inf, open(out_vcf, "w") as outf:
single.sso_genotype(bam_string=bam,
vcf_in=inf,
vcf_out=outf,
min_aligned=20,
split_weight=1,
disc_weight=1,
num_samp=1000000,
lib_info_path=lib_info_json,
debug=False,
ref_fasta=None,
sum_quals=False,
max_reads=1000,
max_ci_dist=1e10,
cores=None,
batch_size=None)
out_gz = out_vcf + ".gz"
tabix_compress(out_vcf, out_gz, force=True)
tabix_index(out_gz, force=True, preset="vcf")
return out_gz