Skip to content
Snippets Groups Projects
Commit 3437a5dd authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Make build_results working with the new snk pipeline

parent 21e1c0c4
No related branches found
No related tags found
No related merge requests found
......@@ -47,11 +47,10 @@ def get_args():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="\
Build Results \n \
description: Build results of the simulated data detection")
parser.add_argument('-v', '--vcf', type=str, required=True, help='folder containing all vcf results files')
parser.add_argument('-v', '--vcfs', type=str, required=True, help='Vcf files for each detection tool', nargs='+')
parser.add_argument('-t', '--true-vcf', type=str, required=True, help='VCF file containing the simulated deletions')
parser.add_argument('-f', '--filtered-vcf', type=str, required=False,
help='VCF file containing the filtered results', nargs='+')
parser.add_argument('-g', '--genotypes', type=str, help="VCF file containing genotypes", nargs='+')
parser.add_argument('--overlap_cutoff', type=float, default=0.5, help='cutoff for reciprocal overlap')
parser.add_argument('--left_precision', type=int, default=-1, help='left breakpoint precision')
parser.add_argument('--right_precision', type=int, default=-1, help='right breakpoint precision')
......@@ -611,23 +610,6 @@ def create_tsv_file(filename: str, headers: list, cells: dict, nb_tools: int, nb
tsv_file.write(tsv)
def search_vcf_files(my_folder):
"""
Search vcf files recursively (can be replaced by glob, but required python >= 3.5
(3.4 version in genotoul plateform)
:param my_folder: folder into search files
:return: list of vcf files, with absolute paths
"""
vcf_files = []
for item in os.listdir(my_folder):
item_file = os.path.join(my_folder, item)
if os.path.isfile(item_file) and (item_file.endswith(".vcf") or item_file.endswith(".vcf.gz")):
vcf_files.append(os.path.abspath(item_file))
elif os.path.isdir(item_file):
vcf_files += search_vcf_files(item_file)
return vcf_files
def print_results(nb_records, orphans, with_xlsx, output, do_genotype):
"""
Print list of outputs
......@@ -829,7 +811,7 @@ def build_xlsx_cols():
XLSX_COLS.append(alp + j)
def init(output, vcf_folder, true_vcf, filtered_vcfs=None, genotypes_files=None, overlap_cutoff=0.5,
def init(output, vcf_files, true_vcf, filtered_vcfs=None, overlap_cutoff=0.5,
left_precision=sys.maxsize, right_precision=sys.maxsize, no_xls=False, haploid=False):
build_xlsx_cols()
......@@ -838,34 +820,30 @@ def init(output, vcf_folder, true_vcf, filtered_vcfs=None, genotypes_files=None,
nb_inds = 0
if genotypes_files:
genotypes, gt_quality, nb_inds = get_genotypes(genotypes_files, true_vcf)
do_genotype = genotypes_files is not None
filtered_records = None
do_genotype = False
filenames = search_vcf_files(vcf_folder)
if filtered_vcfs:
filtered_records = []
for filtered_vcf in filtered_vcfs:
eprint(" Reading file %s" % filtered_vcf)
filtered_records += read_vcf_file(filtered_vcf)[1]
true_ones = true_vcf
genotypes, gt_quality, nb_inds = get_genotypes(filtered_vcfs, true_vcf)
do_genotype = True
# Reading all the vcf files
sv_set = []
for infile in filenames:
for infile in vcf_files:
eprint(" Reading file %s" % infile)
try:
sv_set += read_vcf_file(infile)[0]
except:
print("Ignoreing file %s" % infile)
eprint(" Reading file %s" % true_ones)
sv_set_to, true_ones_records = read_vcf_file(true_ones)
eprint(" Reading file %s" % true_vcf)
sv_set_to, true_ones_records = read_vcf_file(true_vcf)
sv_set += sv_set_to
filtered_records = None
if filtered_vcfs:
filtered_records = []
for filtered_vcf in filtered_vcfs:
eprint(" Reading file %s" % filtered_vcf)
filtered_records += read_vcf_file(filtered_vcf)[1]
# Compute connected components:
eprint("Computing Connected components")
......@@ -931,7 +909,7 @@ def main():
"""
# parse the command line args
args = get_args()
init(args.output, args.vcf, args.true_vcf, args.filtered_vcf, args.genotypes, args.overlap_cutoff,
init(args.output, args.vcfs, args.true_vcf, args.filtered_vcf, args.overlap_cutoff,
args.left_precision, args.right_precision, args.no_xls, args.haploid)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment