Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
svlib
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SVdetection
svlib
Compare revisions
42ac8f47a1f4df5dfa8843e8e5d82d08a812f17e to 15f5c6165c030a7c007bd739076a28f5f12f8700
Compare revisions
Changes are shown as if the
source
revision was being merged into the
target
revision.
Learn more about comparing revisions.
Source
svdetection/svlib
Select target project
No results found
15f5c6165c030a7c007bd739076a28f5f12f8700
Select Git revision
Branches
master
newfilter
Swap
Target
svdetection/svlib
Select target project
svdetection/svlib
1 result
42ac8f47a1f4df5dfa8843e8e5d82d08a812f17e
Select Git revision
Branches
master
newfilter
Show changes
Only incoming changes from source
Include changes to target since source was created
Compare
Commits on Source (1)
adding svtyper_utils
· 15f5c616
Thomas Faraut
authored
5 years ago
15f5c616
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
svtyper_utils.py
+110
-0
110 additions, 0 deletions
svtyper_utils.py
with
110 additions
and
0 deletions
svtyper_utils.py
0 → 100644
View file @
15f5c616
import
sys
import
gzip
import
shutil
import
tempfile
import
os
import
multiprocessing
from
functools
import
partial
from
subprocess
import
call
import
random
import
string
from
pysam
import
AlignmentFile
,
tabix_compress
,
tabix_index
from
svtyper
import
singlesample
as
single
def
fetchId
(
bamfile
):
"""
Fetch sample id in a bam file
:param bamfile: the bam file
:type bamfile: file
:return: sample name
:rtype: str
"""
bamfile_fin
=
AlignmentFile
(
bamfile
,
'
rb
'
)
name
=
bamfile_fin
.
header
[
'
RG
'
][
0
][
'
SM
'
]
bamfile_fin
.
close
()
return
name
def
get_bamfiles
(
bamlist
):
with
open
(
bamlist
,
"
r
"
)
as
fin
:
bamfiles
=
[
os
.
path
.
abspath
(
f
.
strip
())
for
f
in
fin
]
return
bamfiles
def
random_string
(
length
=
10
):
ramdom_str
=
[
random
.
choice
(
string
.
ascii_uppercase
+
string
.
digits
)
for
_
in
range
(
length
)]
return
''
.
join
(
ramdom_str
)
def
genotype_multiple_samples
(
bamlist
,
vcf_in
,
vcf_out
,
cores
=
1
):
bamfiles
=
get_bamfiles
(
bamlist
)
if
vcf_out
==
sys
.
stdout
:
tmp_dir
=
tempfile
.
mkdtemp
()
else
:
tmp_dir
=
os
.
path
.
join
(
os
.
path
.
dirname
(
vcf_out
),
"
tmp
"
+
random_string
())
os
.
mkdir
(
tmp_dir
)
if
vcf_in
==
sys
.
stdin
:
vcf_in
=
single
.
dump_piped_vcf_to_file
(
vcf_in
,
tmp_dir
)
elif
vcf_in
.
endswith
(
"
.gz
"
):
vcf_in_gz
=
vcf_in
vcf_in
=
os
.
path
.
join
(
tmp_dir
,
os
.
path
.
basename
(
vcf_in
[:
-
3
]))
with
gzip
.
open
(
vcf_in_gz
,
'
rb
'
)
as
f_in
,
open
(
vcf_in
,
'
wb
'
)
as
f_out
:
shutil
.
copyfileobj
(
f_in
,
f_out
)
pool
=
multiprocessing
.
Pool
(
processes
=
cores
)
launch_ind
=
partial
(
genotype_single_sample
,
vcf_in
=
vcf_in
,
out_dir
=
tmp_dir
)
vcf_files
=
pool
.
map
(
launch_ind
,
bamfiles
)
merge_cmd
=
"
bcftools merge -m id
"
if
vcf_out
!=
sys
.
stdout
:
merge_cmd
+=
"
-O z
"
+
"
-o
"
+
vcf_out
+
"
"
merge_cmd
+=
"
"
.
join
(
vcf_files
)
exit_code
=
call
(
merge_cmd
,
shell
=
True
)
if
exit_code
==
0
:
if
vcf_out
!=
sys
.
stdout
:
tabix_index
(
vcf_out
,
force
=
True
,
preset
=
"
vcf
"
)
shutil
.
rmtree
(
tmp_dir
)
else
:
print
(
"
Failed: bcftools merge exits with status %d
"
%
exit_code
)
exit
(
1
)
def
genotype_single_sample
(
bam
,
vcf_in
,
out_dir
):
lib_info_json
=
bam
+
"
.json
"
sample
=
fetchId
(
bam
)
out_vcf
=
os
.
path
.
join
(
out_dir
,
sample
+
"
.gt.vcf
"
)
with
open
(
vcf_in
,
"
r
"
)
as
inf
,
open
(
out_vcf
,
"
w
"
)
as
outf
:
single
.
sso_genotype
(
bam_string
=
bam
,
vcf_in
=
inf
,
vcf_out
=
outf
,
min_aligned
=
20
,
split_weight
=
1
,
disc_weight
=
1
,
num_samp
=
1000000
,
lib_info_path
=
lib_info_json
,
debug
=
False
,
ref_fasta
=
None
,
sum_quals
=
False
,
max_reads
=
1000
,
max_ci_dist
=
1e10
,
cores
=
None
,
batch_size
=
None
)
out_gz
=
out_vcf
+
"
.gz
"
tabix_compress
(
out_vcf
,
out_gz
,
force
=
True
)
tabix_index
(
out_gz
,
force
=
True
,
preset
=
"
vcf
"
)
return
out_gz
This diff is collapsed.
Click to expand it.