Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
popsim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
A compter du 1er avril, attention à vos pipelines :
Nouvelles limitations de Docker Hub
Show more breadcrumbs
SVdetection
popsim
Commits
6737fa62
Commit
6737fa62
authored
7 years ago
by
Floreal Cabanettes
Browse files
Options
Downloads
Patches
Plain Diff
Add inversions simulations
parent
9203fae4
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
build_pop.py
+173
-52
173 additions, 52 deletions
build_pop.py
defaults.rules
+6
-1
6 additions, 1 deletion
defaults.rules
variants_simulator.py
+76
-20
76 additions, 20 deletions
variants_simulator.py
with
255 additions
and
73 deletions
build_pop.py
+
173
−
52
View file @
6737fa62
This diff is collapsed.
Click to expand it.
defaults.rules
+
6
−
1
View file @
6737fa62
...
...
@@ -2,4 +2,9 @@ DEL 100 200 0.7
DEL 200 500 0.2
DEL 500 1000 0.07
DEL 1000 2000 0.02
DEL 2000 10000 0.01
\ No newline at end of file
DEL 2000 10000 0.01
INV 100 200 0.7
INV 200 500 0.2
INV 500 1000 0.07
INV 1000 2000 0.02
INV 2000 10000 0.01
This diff is collapsed.
Click to expand it.
variants_simulator.py
+
76
−
20
View file @
6737fa62
...
...
@@ -33,6 +33,7 @@ class VariantsSimulator:
if
nstretches
is
not
None
:
self
.
nstretches
=
self
.
__load_nstretches
(
nstretches
)
self
.
deletions
=
{}
self
.
invertions
=
{}
self
.
threads
=
threads
if
threads
>
-
1
else
multiprocessing
.
cpu_count
()
self
.
stdout
=
stdout
self
.
stderr
=
stderr
...
...
@@ -80,7 +81,7 @@ class VariantsSimulator:
def
__load_variants
(
self
,
sv_list
):
svs
=
{}
allowed_sv
=
{
"
DEL
"
,
"
INV
"
,
"
DUP
"
}
allowed_sv
=
{
"
DEL
"
,
"
INV
"
}
with
open
(
sv_list
,
"
r
"
)
as
sv_list_f
:
for
line
in
sv_list_f
:
parts
=
re
.
split
(
"
\s+
"
,
line
.
strip
(
"
\n
"
))
...
...
@@ -110,10 +111,10 @@ class VariantsSimulator:
self
.
min_sizes
[
type_sv
]
=
min
(
self
.
min_sizes
[
type_sv
],
min_s
)
# Only deletions are supported for now:
if
"
DEL
"
not
in
svs
:
raise
Exception
(
"
Error: only deletions are supported for now!
"
)
elif
len
(
svs
)
>
1
:
self
.
print_err
(
"
Warn: only deletions are supported for now. Other variant types will be ignored.
"
)
if
"
DEL
"
not
in
svs
and
"
INV
"
not
in
svs
:
raise
Exception
(
"
Error: only deletions
and inversions
are supported for now!
"
)
elif
((
"
DEL
"
not
in
svs
or
"
INV
"
not
in
svs
)
and
len
(
svs
)
>
1
)
or
(
len
(
svs
)
>
2
)
:
self
.
print_err
(
"
Warn: only deletions
and inversions
are supported for now. Other variant types will be ignored.
"
)
for
type_sv
in
svs
:
svs
[
type_sv
].
sort
(
key
=
lambda
x
:
-
x
[
"
proba
"
])
...
...
@@ -143,6 +144,13 @@ class VariantsSimulator:
stretches
[
chrm
].
sort
(
key
=
lambda
x
:
x
[
0
])
return
stretches
def
_get_inversion_size
(
self
):
r
=
random
.
uniform
(
0
,
1
)
for
sizes
in
self
.
variants
[
"
INV
"
]:
if
r
<
sizes
[
"
proba_cumul
"
]:
break
return
random
.
randint
(
sizes
[
"
min
"
],
sizes
[
"
max
"
]
+
1
)
def
_get_deletion_size
(
self
):
r
=
random
.
uniform
(
0
,
1
)
for
sizes
in
self
.
variants
[
"
DEL
"
]:
...
...
@@ -168,7 +176,6 @@ class VariantsSimulator:
intervals
.
sort
(
key
=
lambda
x
:
x
[
0
]
-
x
[
1
])
return
intervals
[
0
]
def
_get_far_from_nstretches
(
self
,
chrm
,
start
,
end
,
chr_len
,
pos
=
0
):
"""
Get if a deletion is far from N stretches
...
...
@@ -242,18 +249,21 @@ class VariantsSimulator:
break
return
True
,
False
,
False
,
start
,
end
,
-
1
def
_get_random_
deletion
s_by_chr
(
self
,
chrm
,
chr_length
,
deletions
,
proba_delet
ion
):
def
_get_random_
variant
s_by_chr
(
self
,
chrm
,
chr_length
,
deletions
,
inversions
,
proba_deletion
,
proba_invers
ion
):
self
.
_print_out
(
"
Generating for chromosome %s...
"
%
chrm
)
nb_on_ch
=
0
nb_dels
=
0
nb_dels_on_ch
=
0
nb_invs_on_ch
=
0
nb_inv_on_ch
=
0
nb_inv
=
0
i
=
0
len_chr
=
chr_length
[
chrm
]
deletions
[
chrm
]
=
[]
inversions
[
chrm
]
=
[]
proba_inversion_cum
=
proba_deletion
+
proba_inversion
while
i
<
len_chr
:
r
=
random
.
uniform
(
0
,
1
)
if
r
<=
proba_deletion
:
nb_dels
+=
1
nb_on_ch
+=
1
nb_dels_on_ch
+=
1
del_size
=
self
.
_get_deletion_size
()
start
=
i
+
1
end
=
i
+
1
+
del_size
...
...
@@ -285,7 +295,7 @@ class VariantsSimulator:
i
=
start
freq
=
0.2
if
random
.
uniform
(
0
,
1
)
<
0.5
else
0.5
deletions
[
chrm
].
append
({
"
name
"
:
"
DEL{0}_{1}
"
.
format
(
chrm
,
nb_on_ch
),
"
name
"
:
"
DEL{0}_{1}
"
.
format
(
chrm
,
nb_
dels_
on_ch
),
"
start
"
:
start
,
"
end
"
:
end
,
"
length
"
:
del_size
,
...
...
@@ -294,26 +304,72 @@ class VariantsSimulator:
i
+=
del_size
else
:
self
.
print_err
(
"
N-stretch filter: removed
"
,
print_message_header
)
elif
r
<=
proba_inversion_cum
:
nb_inv
+=
1
nb_inv_on_ch
+=
1
inv_size
=
self
.
_get_inversion_size
()
start
=
i
+
1
end
=
i
+
1
+
inv_size
print_message_header
=
"
One inversion found ({0}:{1}-{2} ; size: {3})...
"
.
format
(
chrm
,
start
,
end
,
inv_size
)
pos
=
0
keep_inversion
=
True
has_resize
=
False
has_move
=
False
while
keep_inversion
and
pos
>=
0
:
keep_inversion
,
with_resize
,
with_move
,
start
,
end
,
pos
=
self
.
_get_far_from_nstretches
(
chrm
,
start
,
end
,
len_chr
,
pos
)
if
with_resize
:
inv_size
=
end
-
start
has_resize
=
True
elif
with_move
:
has_move
=
True
if
keep_inversion
:
if
has_resize
:
self
.
print_warn
(
"
N-stretch filter: resized ({0}:{1}-{2}, new size:{3})
"
.
format
(
chrm
,
start
,
end
,
inv_size
),
print_message_header
)
elif
has_move
:
self
.
print_warn
(
"
N-stretch filter: moved ({0}:{1}-{2})
"
.
format
(
chrm
,
start
,
end
),
print_message_header
)
else
:
self
.
print_ok
(
"
N-stretch filter: OK
"
,
print_message_header
)
i
=
start
freq
=
0.2
if
random
.
uniform
(
0
,
1
)
<
0.5
else
0.5
inversions
[
chrm
].
append
({
"
name
"
:
"
INV{0}_{1}
"
.
format
(
chrm
,
nb_inv_on_ch
),
"
start
"
:
start
,
"
end
"
:
end
,
"
length
"
:
inv_size
,
"
freq
"
:
freq
})
i
+=
inv_size
i
+=
1
return
len
(
deletions
[
chrm
]),
deletions
,
self
.
stdout_stash
,
self
.
stderr_stash
return
len
(
deletions
[
chrm
]),
deletions
,
len
(
inversions
[
chrm
]),
inversions
,
self
.
stdout_stash
,
self
.
stderr_stash
def
get_random_
deletion
s
(
self
,
proba_deletion
,
reference_genome
):
def
get_random_
variant
s
(
self
,
proba_deletion
,
proba_inversion
,
reference_genome
):
fasta_index
=
SeqIO
.
index
(
reference_genome
,
"
fasta
"
)
deletions
=
{}
inversions
=
{}
chr_length
=
{}
for
chrm
in
fasta_index
:
chr_length
[
str
(
chrm
)]
=
len
(
fasta_index
[
chrm
].
seq
)
results
=
Parallel
(
n_jobs
=
self
.
threads
)(
delayed
(
self
.
_get_random_deletions_by_chr
)
(
str
(
chrm
),
chr_length
,
deletions
,
proba_deletion
)
results
=
Parallel
(
n_jobs
=
self
.
threads
)(
delayed
(
self
.
_get_random_variants_by_chr
)
(
str
(
chrm
),
chr_length
,
deletions
,
inversions
,
proba_deletion
,
proba_inversion
)
for
chrm
in
fasta_index
)
nb_dels
=
0
nb_invs
=
0
for
result
in
results
:
nb_dels
+=
result
[
0
]
deletions
.
update
(
result
[
1
])
self
.
stdout_stash
+=
result
[
2
]
self
.
stderr_stash
+=
result
[
3
]
nb_invs
+=
result
[
2
]
inversions
.
update
(
result
[
3
])
self
.
stdout_stash
+=
result
[
4
]
self
.
stderr_stash
+=
result
[
5
]
self
.
deletions
=
deletions
return
nb_dels
,
deletions
self
.
invertions
=
inversions
return
nb_dels
,
deletions
,
nb_invs
,
inversions
def
print_variants
(
self
):
self
.
_print_out
(
"
SV
\t
CHR
\t
START
\t
END
\t
LENGTH
\t
FREQ
"
)
...
...
@@ -332,7 +388,7 @@ def main():
args
=
parser
.
parse_args
()
try
:
simultator
=
VariantsSimulator
(
args
.
sv_list
)
simultator
.
get_random_
deletion
s
(
args
.
proba_del
,
args
.
reference
)
simultator
.
get_random_
variant
s
(
args
.
proba_del
,
args
.
reference
)
simultator
.
print_variants
()
except
InputException
as
e
:
print
(
e
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment