Skip to content
Snippets Groups Projects
Commit b45bc530 authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Add minimum number of deletions to generate parameter

parent ee7d12b3
No related branches found
No related tags found
No related merge requests found
......@@ -16,6 +16,14 @@ from exceptions import InputException
import multiprocessing
import subprocess
def check_min_size(value):
ivalue = int(value)
if ivalue <= 1:
raise argparse.ArgumentTypeError("%s is an invalid size value (>= 1)" % value)
return ivalue
def parse_args():
"""
Parse script arguments
......@@ -43,6 +51,8 @@ def parse_args():
type=int, default=30)
parser.add_argument("-q", "--quiet", help="Don't ask anything, choose default answer instead", action="store_const",
const=True, default=False)
parser.add_argument("-md", "--min-deletions", help="Minimum of deletions to generate (>=1)", default=1,
type=check_min_size)
parser.add_argument("-t", "--threads", help="Number of threads", default=multiprocessing.cpu_count(), type=int)
args = parser.parse_args()
......@@ -319,7 +329,7 @@ def confirm(deletions: dict, variants: dict):
def init(output_dir, force_outputdir, sv_list, nstretches, nb_inds, reference, proba_del, haploid, force_polymorphism,
coverage, read_len, insert_len_mean, insert_len_sd, threads, quiet=True):
coverage, read_len, insert_len_mean, insert_len_sd, threads, min_deletions=1, quiet=True):
if os.path.isdir(output_dir):
if force_outputdir:
shutil.rmtree(output_dir)
......@@ -353,11 +363,21 @@ def init(output_dir, force_outputdir, sv_list, nstretches, nb_inds, reference, p
print("GENERATE RANDOM DELETIONS VARIANTS...\n")
try:
nb_deletions = 0
nb_try = 0
sv_sim = VariantsSimulator(sv_list, nstretches, threads)
deletions = sv_sim.get_random_deletions(proba_del, reference)
print("Try to generate at least %s deletions..." % min_deletions)
max_try = 10
while nb_deletions < min_deletions and nb_try < max_try:
print("\nTry {0} / {1}...".format(nb_try + 1, max_try))
nb_deletions, deletions = sv_sim.get_random_deletions(proba_del, reference)
nb_try += 1
if nb_deletions < min_deletions:
raise InputException("\nUnable to generate %s deletions. Try to reduce size of deletions or increase "
"general probability to have a deletion." % min_deletions)
print("")
except InputException as e:
print(e)
eprint(e)
return False
if quiet or confirm(deletions, sv_sim.variants):
genotypes_for_inds = build_genotypes_vcf_list(deletions, output_vcf, haploid, force_polymorphism, nb_inds,
......@@ -387,9 +407,10 @@ def main():
insert_len_mean = args.insert_len_mean
insert_len_sd = args.insert_len_sd
quiet = args.quiet
min_deletions = args.min_deletions
init(output_dir, force_outputdir, sv_list, nstretches, nb_inds, reference, proba_del, haploid, force_polymorphism,
coverage, read_len, insert_len_mean, insert_len_sd, threads, quiet)
coverage, read_len, insert_len_mean, insert_len_sd, threads, min_deletions, quiet)
if __name__ == '__main__':
sys.exit(main())
......@@ -267,7 +267,7 @@ class VariantsSimulator:
else:
self.print_err("N-stretch filter: removed", print_message_header)
i += 1
return deletions
return len(deletions[chrm]), deletions
def get_random_deletions(self, proba_deletion, reference_genome):
fasta_index = SeqIO.index(reference_genome, "fasta")
......@@ -278,10 +278,12 @@ class VariantsSimulator:
results = Parallel(n_jobs=self.threads)(delayed(self._get_random_deletions_by_chr)
(str(chrm), chr_length, deletions, proba_deletion)
for chrm in fasta_index)
nb_dels = 0
for result in results:
deletions.update(result)
nb_dels += result[0]
deletions.update(result[1])
self.deletions = deletions
return deletions
return nb_dels, deletions
def print_variants(self):
print("SV\tCHR\tSTART\tEND\tLENGTH\tFREQ")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment