diff --git a/build_pop.py b/build_pop.py index 3dfbb664b54cd19aab890ed16ac77612bfba3c4a..d713393668a4075b2e1e04c58598e1d0927c9a4e 100755 --- a/build_pop.py +++ b/build_pop.py @@ -16,6 +16,14 @@ from exceptions import InputException import multiprocessing import subprocess + +def check_min_size(value): + ivalue = int(value) + if ivalue <= 1: + raise argparse.ArgumentTypeError("%s is an invalid size value (>= 1)" % value) + return ivalue + + def parse_args(): """ Parse script arguments @@ -43,6 +51,8 @@ def parse_args(): type=int, default=30) parser.add_argument("-q", "--quiet", help="Don't ask anything, choose default answer instead", action="store_const", const=True, default=False) + parser.add_argument("-md", "--min-deletions", help="Minimum of deletions to generate (>=1)", default=1, + type=check_min_size) parser.add_argument("-t", "--threads", help="Number of threads", default=multiprocessing.cpu_count(), type=int) args = parser.parse_args() @@ -319,7 +329,7 @@ def confirm(deletions: dict, variants: dict): def init(output_dir, force_outputdir, sv_list, nstretches, nb_inds, reference, proba_del, haploid, force_polymorphism, - coverage, read_len, insert_len_mean, insert_len_sd, threads, quiet=True): + coverage, read_len, insert_len_mean, insert_len_sd, threads, min_deletions=1, quiet=True): if os.path.isdir(output_dir): if force_outputdir: shutil.rmtree(output_dir) @@ -353,11 +363,21 @@ def init(output_dir, force_outputdir, sv_list, nstretches, nb_inds, reference, p print("GENERATE RANDOM DELETIONS VARIANTS...\n") try: + nb_deletions = 0 + nb_try = 0 sv_sim = VariantsSimulator(sv_list, nstretches, threads) - deletions = sv_sim.get_random_deletions(proba_del, reference) + print("Try to generate at least %s deletions..." % min_deletions) + max_try = 10 + while nb_deletions < min_deletions and nb_try < max_try: + print("\nTry {0} / {1}...".format(nb_try + 1, max_try)) + nb_deletions, deletions = sv_sim.get_random_deletions(proba_del, reference) + nb_try += 1 + if nb_deletions < min_deletions: + raise InputException("\nUnable to generate %s deletions. Try to reduce size of deletions or increase " + "general probability to have a deletion." % min_deletions) print("") except InputException as e: - print(e) + eprint(e) return False if quiet or confirm(deletions, sv_sim.variants): genotypes_for_inds = build_genotypes_vcf_list(deletions, output_vcf, haploid, force_polymorphism, nb_inds, @@ -387,9 +407,10 @@ def main(): insert_len_mean = args.insert_len_mean insert_len_sd = args.insert_len_sd quiet = args.quiet + min_deletions = args.min_deletions init(output_dir, force_outputdir, sv_list, nstretches, nb_inds, reference, proba_del, haploid, force_polymorphism, - coverage, read_len, insert_len_mean, insert_len_sd, threads, quiet) + coverage, read_len, insert_len_mean, insert_len_sd, threads, min_deletions, quiet) if __name__ == '__main__': sys.exit(main()) diff --git a/variants_simulator.py b/variants_simulator.py index 27cc7a29411f48b681ad6e57c0dd520403b4fe25..270b87dac4f8b723cec3f5370d9753026855fc63 100755 --- a/variants_simulator.py +++ b/variants_simulator.py @@ -267,7 +267,7 @@ class VariantsSimulator: else: self.print_err("N-stretch filter: removed", print_message_header) i += 1 - return deletions + return len(deletions[chrm]), deletions def get_random_deletions(self, proba_deletion, reference_genome): fasta_index = SeqIO.index(reference_genome, "fasta") @@ -278,10 +278,12 @@ class VariantsSimulator: results = Parallel(n_jobs=self.threads)(delayed(self._get_random_deletions_by_chr) (str(chrm), chr_length, deletions, proba_deletion) for chrm in fasta_index) + nb_dels = 0 for result in results: - deletions.update(result) + nb_dels += result[0] + deletions.update(result[1]) self.deletions = deletions - return deletions + return nb_dels, deletions def print_variants(self): print("SV\tCHR\tSTART\tEND\tLENGTH\tFREQ")