From 6f3d65b2efc4c2dff70467ef146952767608725f Mon Sep 17 00:00:00 2001 From: Floreal Cabanettes <floreal.cabanettes@inra.fr> Date: Thu, 7 Jun 2018 17:02:45 +0200 Subject: [PATCH] Add build of results --- cnvpipelines.py | 11 ++++++-- popsim | 2 +- snakecnv/popsim.snk | 67 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/cnvpipelines.py b/cnvpipelines.py index 18bcedb..0e91f48 100755 --- a/cnvpipelines.py +++ b/cnvpipelines.py @@ -792,7 +792,8 @@ class CnvPipeline: def run_simulation(self, nb_inds, reference, nstretches, sv_list, coverage, force_polymorphism, haploid, proba_del, proba_inv, read_len, insert_len_mean, insert_len_sd, min_deletions, - min_inversions, max_try, genotypes, tools, species, max_n_stretches, force_wdir=False, **kwargs): + min_inversions, max_try, genotypes, tools, species, max_n_stretches, + overlap_cutoff, left_precision, right_precision, force_wdir=False, **kwargs): """ Run simulation :param nb_inds: @@ -886,7 +887,10 @@ class CnvPipeline: "no_detection_index": True, "build_refbundle": "genomestrip" in tools, "species": species, - "maxNstretches": max_n_stretches + "maxNstretches": max_n_stretches, + "overlap_cutoff": overlap_cutoff, + "left_precision": left_precision, + "right_precision": right_precision } if "genomestrip" in tools: @@ -1178,6 +1182,9 @@ if __name__ == "__main__": help="[refbundle] Species name, according to the NCBI Taxonomy database") simul_parser.add_argument('-mn', '--max-n-stretches', type=int, required=False, default=100, help="[refbundle] Max size of nstretches to consider them as it") + simul_parser.add_argument('--overlap_cutoff', type=float, default=0.5, help='[results] cutoff for reciprocal overlap') + simul_parser.add_argument('--left-precision', type=int, default=-1, help='[results] left breakpoint precision') + simul_parser.add_argument('--right-precision', type=int, default=-1, help='[results] right breakpoint precision') add_run_and_rerun_options(simul_parser) simul_parser.add_argument('-w', '--wdir', type=str, required=True, help="Output folder where data will be stored") diff --git a/popsim b/popsim index 3a817c8..1903b65 160000 --- a/popsim +++ b/popsim @@ -1 +1 @@ -Subproject commit 3a817c8b952b74d2056a9ee41be30554c746f7df +Subproject commit 1903b654d5395769a250cce7228c1e28aa6a86ec diff --git a/snakecnv/popsim.snk b/snakecnv/popsim.snk index d23df53..48858b7 100644 --- a/snakecnv/popsim.snk +++ b/snakecnv/popsim.snk @@ -26,6 +26,9 @@ MIN_DELETIONS = config["min_deletions"] MIN_INVERSIONS = config["min_inversions"] MAX_TRY = config["max_try"] GENOTYPES = config["genotypes"] if "genotypes" in config else None +OVERLAP_CUTOFF = config["overlap_cutoff"] +LEFT_PRECISION = config["left_precision"] +RIGHT_PRECISION = config["right_precision"] # Functions here @@ -70,7 +73,9 @@ localrules: all_popsim rule all_popsim: input: - set_all_inputs + expand("results/{svtype}/Summarized_results_{svtype}.ipynb", svtype=config["varianttypes"]), + expand("results/{svtype}/Summarized_results_{svtype}.html", svtype=config["varianttypes"]), + #set_all_inputs #expand("bams/{sample}.bam", sample=["indiv%d" % i for i in range(1, NB_INDS+1)]) #*bp_outputs @@ -78,7 +83,8 @@ rule buildpop: input: **bp_inputs output: - *bp_outputs + *bp_outputs, + os.path.join("pop", "genotypes.vcf.gz") params: nb_inds = NB_INDS, out_dir = "pop", @@ -134,4 +140,59 @@ rule linkdatabams: bai="data/bams/{sample}.bam.bai" run: os.symlink(os.path.abspath(input.bam), output.bam) - os.symlink(os.path.abspath(input.bai), output.bai) \ No newline at end of file + os.symlink(os.path.abspath(input.bai), output.bai) + +rule buildinfilesresults: + input: + genotypes = expand("{batch}/genotypes/{{svtype}}/svtyper_{chrom}_{{svtype}}_genotypes.vcf.gz", + chrom=chromosomes, + batch=samples.keys()), + filtered = expand("{batch}/filtered/{{svtype}}/svtyper_{chrom}_{{svtype}}_genotypes_filtered.vcf.gz", + chrom=chromosomes, + batch=samples.keys()) + output: + genotypes = "list_files/tools_results_files_{svtype}.list", + filtered = "list_files/filtered_results_files_{svtype}.list" + threads: + 1 + run: + with open(output.genotypes, "w") as o_gt: + for i_gt in input.genotypes: + o_gt.write("%s\n" % i_gt) + +rule buildresults: + input: + genotypes = "list_files/tools_results_files_{svtype}.list", + filtered = "list_files/filtered_results_files_{svtype}.list", + true_vcf = "pop/genotypes.vcf.gz" + output: + "results/{svtype}/Summarized_results_{svtype}.ipynb", + "results/{svtype}/Summarized_results_{svtype}.html" + params: + overlap_cutoff = OVERLAP_CUTOFF, + left_precision = LEFT_PRECISION, + right_precision = RIGHT_PRECISION, + svlist = SV_LIST, + haploid = HAPLOID + threads: + 1 + log: + stdout = "logs/results_{svtype}.log" + run: + command = ["build_results.py", + "-v", input.genotypes, + "-t", input.true_vcf, + "-f", input.filtered_vcf, + "-y", wildcards.svtype, + "--overlap_cutoff", str(params.overlap_cutoff), + "--left-precision", str(params.overlap_cutoff), + "--right-precision", str(params.right_precision), + "-o", os.path.join("results", wildcards.svtype), + "--no-xls"] + + if params.haploid: + command.append("--haploid") + if params.svlist is not None: + command += ["-r", params.svlist] + + shell(" ".join(command) + "1> %s 2> %s" % (log.stdout, log.stderr)) -- GitLab