From 6f3d65b2efc4c2dff70467ef146952767608725f Mon Sep 17 00:00:00 2001
From: Floreal Cabanettes <floreal.cabanettes@inra.fr>
Date: Thu, 7 Jun 2018 17:02:45 +0200
Subject: [PATCH] Add build of results

---
 cnvpipelines.py     | 11 ++++++--
 popsim              |  2 +-
 snakecnv/popsim.snk | 67 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/cnvpipelines.py b/cnvpipelines.py
index 18bcedb..0e91f48 100755
--- a/cnvpipelines.py
+++ b/cnvpipelines.py
@@ -792,7 +792,8 @@ class CnvPipeline:
 
     def run_simulation(self, nb_inds, reference, nstretches, sv_list, coverage, force_polymorphism,
                        haploid, proba_del, proba_inv, read_len, insert_len_mean, insert_len_sd, min_deletions,
-                       min_inversions, max_try, genotypes, tools, species, max_n_stretches, force_wdir=False, **kwargs):
+                       min_inversions, max_try, genotypes, tools, species, max_n_stretches,
+                       overlap_cutoff, left_precision, right_precision, force_wdir=False, **kwargs):
         """
         Run simulation
         :param nb_inds:
@@ -886,7 +887,10 @@ class CnvPipeline:
                 "no_detection_index": True,
                 "build_refbundle": "genomestrip" in tools,
                 "species": species,
-                "maxNstretches": max_n_stretches
+                "maxNstretches": max_n_stretches,
+                "overlap_cutoff": overlap_cutoff,
+                "left_precision": left_precision,
+                "right_precision": right_precision
             }
 
             if "genomestrip" in tools:
@@ -1178,6 +1182,9 @@ if __name__ == "__main__":
                                   help="[refbundle] Species name, according to the NCBI Taxonomy database")
     simul_parser.add_argument('-mn', '--max-n-stretches', type=int, required=False, default=100,
                                   help="[refbundle] Max size of nstretches to consider them as it")
+    simul_parser.add_argument('--overlap_cutoff', type=float, default=0.5, help='[results] cutoff for reciprocal overlap')
+    simul_parser.add_argument('--left-precision', type=int, default=-1, help='[results] left breakpoint precision')
+    simul_parser.add_argument('--right-precision', type=int, default=-1, help='[results] right breakpoint precision')
     add_run_and_rerun_options(simul_parser)
     simul_parser.add_argument('-w', '--wdir', type=str, required=True,
                               help="Output folder where data will be stored")
diff --git a/popsim b/popsim
index 3a817c8..1903b65 160000
--- a/popsim
+++ b/popsim
@@ -1 +1 @@
-Subproject commit 3a817c8b952b74d2056a9ee41be30554c746f7df
+Subproject commit 1903b654d5395769a250cce7228c1e28aa6a86ec
diff --git a/snakecnv/popsim.snk b/snakecnv/popsim.snk
index d23df53..48858b7 100644
--- a/snakecnv/popsim.snk
+++ b/snakecnv/popsim.snk
@@ -26,6 +26,9 @@ MIN_DELETIONS = config["min_deletions"]
 MIN_INVERSIONS = config["min_inversions"]
 MAX_TRY = config["max_try"]
 GENOTYPES = config["genotypes"] if "genotypes" in config else None
+OVERLAP_CUTOFF = config["overlap_cutoff"]
+LEFT_PRECISION = config["left_precision"]
+RIGHT_PRECISION = config["right_precision"]
 
 
 # Functions here
@@ -70,7 +73,9 @@ localrules: all_popsim
 
 rule all_popsim:
     input:
-        set_all_inputs
+        expand("results/{svtype}/Summarized_results_{svtype}.ipynb", svtype=config["varianttypes"]),
+        expand("results/{svtype}/Summarized_results_{svtype}.html", svtype=config["varianttypes"]),
+        #set_all_inputs
         #expand("bams/{sample}.bam", sample=["indiv%d" % i for i in range(1, NB_INDS+1)])
         #*bp_outputs
 
@@ -78,7 +83,8 @@ rule buildpop:
     input:
         **bp_inputs
     output:
-        *bp_outputs
+        *bp_outputs,
+        os.path.join("pop", "genotypes.vcf.gz")
     params:
         nb_inds = NB_INDS,
         out_dir = "pop",
@@ -134,4 +140,59 @@ rule linkdatabams:
         bai="data/bams/{sample}.bam.bai"
     run:
         os.symlink(os.path.abspath(input.bam), output.bam)
-        os.symlink(os.path.abspath(input.bai), output.bai)
\ No newline at end of file
+        os.symlink(os.path.abspath(input.bai), output.bai)
+
+rule buildinfilesresults:
+    input:
+        genotypes = expand("{batch}/genotypes/{{svtype}}/svtyper_{chrom}_{{svtype}}_genotypes.vcf.gz",
+                           chrom=chromosomes,
+                           batch=samples.keys()),
+        filtered = expand("{batch}/filtered/{{svtype}}/svtyper_{chrom}_{{svtype}}_genotypes_filtered.vcf.gz",
+                          chrom=chromosomes,
+                          batch=samples.keys())
+    output:
+        genotypes = "list_files/tools_results_files_{svtype}.list",
+        filtered = "list_files/filtered_results_files_{svtype}.list"
+    threads:
+        1
+    run:
+        with open(output.genotypes, "w") as o_gt:
+            for i_gt in input.genotypes:
+                o_gt.write("%s\n" % i_gt)
+
+rule buildresults:
+    input:
+        genotypes = "list_files/tools_results_files_{svtype}.list",
+        filtered = "list_files/filtered_results_files_{svtype}.list",
+        true_vcf = "pop/genotypes.vcf.gz"
+    output:
+        "results/{svtype}/Summarized_results_{svtype}.ipynb",
+        "results/{svtype}/Summarized_results_{svtype}.html"
+    params:
+        overlap_cutoff = OVERLAP_CUTOFF,
+        left_precision = LEFT_PRECISION,
+        right_precision = RIGHT_PRECISION,
+        svlist = SV_LIST,
+        haploid = HAPLOID
+    threads:
+        1
+    log:
+        stdout = "logs/results_{svtype}.log"
+    run:
+        command = ["build_results.py",
+                   "-v", input.genotypes,
+                   "-t", input.true_vcf,
+                   "-f", input.filtered_vcf,
+                   "-y", wildcards.svtype,
+                   "--overlap_cutoff", str(params.overlap_cutoff),
+                   "--left-precision", str(params.overlap_cutoff),
+                   "--right-precision", str(params.right_precision),
+                   "-o", os.path.join("results", wildcards.svtype),
+                   "--no-xls"]
+
+        if params.haploid:
+            command.append("--haploid")
+        if params.svlist is not None:
+            command += ["-r", params.svlist]
+
+        shell(" ".join(command) + "1> %s 2> %s" % (log.stdout, log.stderr))
-- 
GitLab