diff --git a/cnvpipelines.py b/cnvpipelines.py index aca71c8bedc0c907a99af22bbca42ecb019133c3..ab7877543bd22a0dfac14b732c4c86367ebebfcf 100755 --- a/cnvpipelines.py +++ b/cnvpipelines.py @@ -942,7 +942,7 @@ class CnvPipeline: def run_simulation(self, nb_inds, reference, sv_list, coverage, force_polymorphism, haploid, proba_del, proba_inv, read_len, insert_len_mean, insert_len_sd, min_deletions, - min_inversions, max_try, genotypes, tools, species, max_n_stretches, + min_inversions, freq_del, freq_inv, max_try, genotypes, tools, species, max_n_stretches, overlap_cutoff, left_precision, right_precision, chromosomes, force_all_chr=False, force_wdir=False, **kwargs): """ @@ -1055,6 +1055,8 @@ class CnvPipeline: "insert_len_sd": insert_len_sd, "min_deletions": min_deletions, "min_inversions": min_inversions, + "freq_del": freq_del, + "freq_inv": freq_inv, "max_try": max_try, "sample_file_align": samples_file_fq, "tools": tools, @@ -1288,8 +1290,9 @@ if __name__ == "__main__": dest="force_all_chr", help="Do not remove small contigs (< 500bk) from the analysis") detection_parser.add_argument('-w', '--wdir', type=str, required=True, help="Output folder where data will be stored") - detection_parser.add_argument('-v', '--variant-types', type=str, help="type of variants to detect (default: all)", - choices=VTYPES, nargs="+", default=VTYPES) + detection_parser.add_argument('-v', '--variant-types', type=str, + help="type of variants to detect, space separated (default: DEL, INV and DUP)", + choices=VTYPES, nargs="+", default=["DEL", "INV", "DUP"]) add_run_options(detection_parser) detection_parser.set_defaults(func="run_detection") @@ -1355,9 +1358,13 @@ if __name__ == "__main__": type=check_min_size_0) simul_parser.add_argument("-mi", "--min-inversions", help="Minimum of inversions to generate (>=0)", default=1, type=check_min_size_0) + simul_parser.add_argument('-fd', '--freq-del', type=float, required=False, + help="Frequencies choices for deletions, space separated", nargs="+", default=[0.2, 0.5]) + simul_parser.add_argument('-fi', '--freq-inv', type=float, required=False, + help="Frequencies choices for inversions, space separated", nargs="+", default=[0.2, 0.5]) simul_parser.add_argument("--max-try", help="Maximum of tries", default=10, type=check_min_size_0) simul_parser.add_argument("-g", "--genotypes", help="Position of SV with genotypes of individuals") - simul_parser.add_argument('-t', '--tools', type=str, required=True, help="Tools to launch, coma separated", + simul_parser.add_argument('-t', '--tools', type=str, required=True, help="Tools to launch, space separated", nargs="+", choices=TOOLS) simul_parser.add_argument('-sp', '--species', type=str, required=False, help="[refbundle] Species name, according to the NCBI Taxonomy database") diff --git a/popsim b/popsim index 46eedc901eb19c978a7fe99f6b7f1a6d18d80c75..2a305da7796bfcd0e420bd0a646321d575ac8f56 160000 --- a/popsim +++ b/popsim @@ -1 +1 @@ -Subproject commit 46eedc901eb19c978a7fe99f6b7f1a6d18d80c75 +Subproject commit 2a305da7796bfcd0e420bd0a646321d575ac8f56 diff --git a/snakecnv/detection.snk b/snakecnv/detection.snk index 785c50af9a5196a34dd7ef3ccd4d4778692691bb..17028d1acb8325ca036e73c8ef9c3aa3a7df2026 100644 --- a/snakecnv/detection.snk +++ b/snakecnv/detection.snk @@ -54,13 +54,13 @@ def get_chr_batches(ref_file, chr): # Make ranges: groups = [] start = 1 - end = min(start + 19999999, len_chr) - while (len_chr - end + 1 >= 19900000) or (len_chr + 1 == end): + end = min(start + 9999999, len_chr) + while (len_chr - end + 1 >= 9950000) or (len_chr + 1 == end): groups.append((start, end)) if len_chr + 1 == end: break - start = end - 99999 - end = min(start + 19999999, len_chr + 1) + start = end - 49999 + end = min(start + 9999999, len_chr + 1) last_group = (start, len_chr + 1) if last_group not in groups: groups.append(last_group) diff --git a/snakecnv/popsim.snk b/snakecnv/popsim.snk index 8fc63c378259b363490aaf308f16dc9dd3f8b3e5..292403401504cc90927345272d08acb6e3115bc6 100644 --- a/snakecnv/popsim.snk +++ b/snakecnv/popsim.snk @@ -24,6 +24,8 @@ INSERT_LEN_MEAN = config["insert_len_mean"] INSERT_LEN_SD = config["insert_len_sd"] MIN_DELETIONS = config["min_deletions"] MIN_INVERSIONS = config["min_inversions"] +FREQ_DEL = config["freq_del"] +FREQ_INV = config["freq_inv"] MAX_TRY = config["max_try"] GENOTYPES = config["genotypes"] if "genotypes" in config else None OVERLAP_CUTOFF = config["overlap_cutoff"] @@ -125,6 +127,8 @@ rule buildpop: insert_len_sd = INSERT_LEN_SD, min_deletions = MIN_DELETIONS, min_inversions = MIN_INVERSIONS, + freq_del = FREQ_DEL, + freq_inv = FREQ_INV, max_try = MAX_TRY threads: 8 @@ -145,6 +149,8 @@ rule buildpop: "-q", "-md", str(params.min_deletions), "-mi", str(params.min_inversions), + "-fd", " ".join(map(str,params.freq_del)), + "-fi", " ".join(map(str,params.freq_inv)), "--max-try", str(params.max_try), "-t", str(threads), "-e"]