Skip to content
Snippets Groups Projects
Commit 6737fa62 authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Add inversions simulations

parent 9203fae4
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
......@@ -2,4 +2,9 @@ DEL 100 200 0.7
DEL 200 500 0.2
DEL 500 1000 0.07
DEL 1000 2000 0.02
DEL 2000 10000 0.01
\ No newline at end of file
DEL 2000 10000 0.01
INV 100 200 0.7
INV 200 500 0.2
INV 500 1000 0.07
INV 1000 2000 0.02
INV 2000 10000 0.01
......@@ -33,6 +33,7 @@ class VariantsSimulator:
if nstretches is not None:
self.nstretches = self.__load_nstretches(nstretches)
self.deletions = {}
self.invertions = {}
self.threads = threads if threads > -1 else multiprocessing.cpu_count()
self.stdout = stdout
self.stderr = stderr
......@@ -80,7 +81,7 @@ class VariantsSimulator:
def __load_variants(self, sv_list):
svs = {}
allowed_sv = {"DEL", "INV", "DUP"}
allowed_sv = {"DEL", "INV"}
with open(sv_list, "r") as sv_list_f:
for line in sv_list_f:
parts = re.split("\s+", line.strip("\n"))
......@@ -110,10 +111,10 @@ class VariantsSimulator:
self.min_sizes[type_sv] = min(self.min_sizes[type_sv], min_s)
# Only deletions are supported for now:
if "DEL" not in svs:
raise Exception("Error: only deletions are supported for now!")
elif len(svs) > 1:
self.print_err("Warn: only deletions are supported for now. Other variant types will be ignored.")
if "DEL" not in svs and "INV" not in svs:
raise Exception("Error: only deletions and inversions are supported for now!")
elif (("DEL" not in svs or "INV" not in svs) and len(svs) > 1) or (len(svs) > 2):
self.print_err("Warn: only deletions and inversions are supported for now. Other variant types will be ignored.")
for type_sv in svs:
svs[type_sv].sort(key=lambda x: -x["proba"])
......@@ -143,6 +144,13 @@ class VariantsSimulator:
stretches[chrm].sort(key=lambda x: x[0])
return stretches
def _get_inversion_size(self):
r = random.uniform(0, 1)
for sizes in self.variants["INV"]:
if r < sizes["proba_cumul"]:
break
return random.randint(sizes["min"], sizes["max"] + 1)
def _get_deletion_size(self):
r = random.uniform(0, 1)
for sizes in self.variants["DEL"]:
......@@ -168,7 +176,6 @@ class VariantsSimulator:
intervals.sort(key=lambda x: x[0]-x[1])
return intervals[0]
def _get_far_from_nstretches(self, chrm, start, end, chr_len, pos=0):
"""
Get if a deletion is far from N stretches
......@@ -242,18 +249,21 @@ class VariantsSimulator:
break
return True, False, False, start, end, -1
def _get_random_deletions_by_chr(self, chrm, chr_length, deletions, proba_deletion):
def _get_random_variants_by_chr(self, chrm, chr_length, deletions, inversions, proba_deletion, proba_inversion):
self._print_out("Generating for chromosome %s..." % chrm)
nb_on_ch = 0
nb_dels = 0
nb_dels_on_ch = 0
nb_invs_on_ch = 0
nb_inv_on_ch = 0
nb_inv = 0
i = 0
len_chr = chr_length[chrm]
deletions[chrm] = []
inversions[chrm] = []
proba_inversion_cum = proba_deletion + proba_inversion
while i < len_chr:
r = random.uniform(0, 1)
if r <= proba_deletion:
nb_dels += 1
nb_on_ch += 1
nb_dels_on_ch += 1
del_size = self._get_deletion_size()
start = i + 1
end = i + 1 + del_size
......@@ -285,7 +295,7 @@ class VariantsSimulator:
i = start
freq = 0.2 if random.uniform(0, 1) < 0.5 else 0.5
deletions[chrm].append({
"name": "DEL{0}_{1}".format(chrm, nb_on_ch),
"name": "DEL{0}_{1}".format(chrm, nb_dels_on_ch),
"start": start,
"end": end,
"length": del_size,
......@@ -294,26 +304,72 @@ class VariantsSimulator:
i += del_size
else:
self.print_err("N-stretch filter: removed", print_message_header)
elif r <= proba_inversion_cum:
nb_inv += 1
nb_inv_on_ch += 1
inv_size = self._get_inversion_size()
start = i + 1
end = i + 1 + inv_size
print_message_header = "One inversion found ({0}:{1}-{2} ; size: {3})...".format(chrm, start, end,
inv_size)
pos = 0
keep_inversion = True
has_resize = False
has_move = False
while keep_inversion and pos >= 0:
keep_inversion, with_resize, with_move, start, end, pos = self._get_far_from_nstretches(chrm, start,
end, len_chr,
pos)
if with_resize:
inv_size = end - start
has_resize = True
elif with_move:
has_move = True
if keep_inversion:
if has_resize:
self.print_warn("N-stretch filter: resized ({0}:{1}-{2}, new size:{3})".
format(chrm, start, end, inv_size), print_message_header)
elif has_move:
self.print_warn("N-stretch filter: moved ({0}:{1}-{2})".format(chrm, start, end),
print_message_header)
else:
self.print_ok("N-stretch filter: OK", print_message_header)
i = start
freq = 0.2 if random.uniform(0, 1) < 0.5 else 0.5
inversions[chrm].append({
"name": "INV{0}_{1}".format(chrm, nb_inv_on_ch),
"start": start,
"end": end,
"length": inv_size,
"freq": freq
})
i += inv_size
i += 1
return len(deletions[chrm]), deletions, self.stdout_stash, self.stderr_stash
return len(deletions[chrm]), deletions, len(inversions[chrm]), inversions, self.stdout_stash, self.stderr_stash
def get_random_deletions(self, proba_deletion, reference_genome):
def get_random_variants(self, proba_deletion, proba_inversion, reference_genome):
fasta_index = SeqIO.index(reference_genome, "fasta")
deletions = {}
inversions = {}
chr_length = {}
for chrm in fasta_index:
chr_length[str(chrm)] = len(fasta_index[chrm].seq)
results = Parallel(n_jobs=self.threads)(delayed(self._get_random_deletions_by_chr)
(str(chrm), chr_length, deletions, proba_deletion)
results = Parallel(n_jobs=self.threads)(delayed(self._get_random_variants_by_chr)
(str(chrm), chr_length, deletions, inversions, proba_deletion,
proba_inversion)
for chrm in fasta_index)
nb_dels = 0
nb_invs = 0
for result in results:
nb_dels += result[0]
deletions.update(result[1])
self.stdout_stash += result[2]
self.stderr_stash += result[3]
nb_invs += result[2]
inversions.update(result[3])
self.stdout_stash += result[4]
self.stderr_stash += result[5]
self.deletions = deletions
return nb_dels, deletions
self.invertions = inversions
return nb_dels, deletions, nb_invs, inversions
def print_variants(self):
self._print_out("SV\tCHR\tSTART\tEND\tLENGTH\tFREQ")
......@@ -332,7 +388,7 @@ def main():
args = parser.parse_args()
try:
simultator = VariantsSimulator(args.sv_list)
simultator.get_random_deletions(args.proba_del, args.reference)
simultator.get_random_variants(args.proba_del, args.reference)
simultator.print_variants()
except InputException as e:
print(e)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment