Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bios4biol
bioinfo-utils
Commits
d3fa7d2c
Commit
d3fa7d2c
authored
Feb 19, 2015
by
Claire Hoede
Browse files
first commit
parent
140885fd
Changes
1
Hide whitespace changes
Inline
Side-by-side
bin/getNbAllAllelesFromVcf.py
0 → 100644
View file @
d3fa7d2c
#!/usr/local/bioinfo/bin/python2.5
#
# Copyright (C) 2015 INRA
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import
sys
import
argparse
import
vcf
__name__
=
"getNbAllAllelesFromVcf.py"
__author__
=
'Plateforme bioinformatique Midi Pyrenees'
__copyright__
=
'Copyright (C) 2015 INRA'
__license__
=
'GNU General Public License'
__version__
=
'1.0'
__email__
=
'support.genopole@toulouse.inra.fr'
__status__
=
'beta'
__synopsis__
=
"getNbAllAllelesFromVcf.py INPUT --output OUTPUT"
__date__
=
"02/2015"
__authors__
=
"Claire Hoede"
__keywords__
=
"genotype, vcf, reads for each alleles"
__description__
=
"Write a tabulated file with count
\
of each read for each alternative allele whatever the alleles number.
\
The objective is to import the file in R to compute real alleles frequencies.
\
Columns in the output file will be
\
#chrom #pos #nbAltAll #DPTotal #nbRef #nbAlt1 ... #nbAltn
\
WARNING : for this first version your vcf file has to have only one sample
\
In the output file column not used for a variant is filled by NA"
def
get_args
():
"""
parse argument and options and do help and pydoc
@return : args and options
"""
parser
=
argparse
.
ArgumentParser
(
description
=
__description__
)
parser
.
add_argument
(
'input'
,
metavar
=
'input'
,
type
=
argparse
.
FileType
(
'r'
),
nargs
=
'?'
,
default
=
None
,
\
help
=
'File to process (use - for STDIN)'
)
parser
.
add_argument
(
'--output'
,
action
=
'store'
,
default
=
None
,
\
help
=
'Filename to output'
)
parser
.
add_argument
(
'--version'
,
action
=
'version'
,
version
=
__version__
)
return
parser
.
parse_known_args
()
def
write_output
(
lines2write
,
maxNbAlleles
,
outputFileName
):
"""
Write output file
@param lines2write : line from vcf genotype parsing
@param maxNbAlleles : maximum number of alleles found
@param outputFileName : output file name
"""
output
=
open
(
outputFileName
,
'w'
)
for
line
in
lines2write
:
litems
=
line
.
split
(
"
\t
"
)
nbAll
=
int
(
litems
[
2
])
nbNA
=
maxNbAlleles
-
nbAll
output
.
write
(
line
+
"
\t
NA"
*
nbNA
+
"
\n
"
)
output
.
close
()
(
args
,
unknown_args
)
=
get_args
()
inp
=
vcf
.
Reader
(
args
.
input
)
lines2write
=
[]
maxNbAlleles
=
0
for
record
in
inp
:
#only one sample is considered for this version
for
call
in
record
.
samples
:
try
:
if
len
(
record
.
alleles
)
>
maxNbAlleles
:
maxNbAlleles
=
len
(
record
.
alleles
)
lines2write
.
append
(
record
.
CHROM
+
"
\t
"
+
str
(
record
.
POS
)
+
"
\t
"
+
str
(
len
(
record
.
alleles
))
\
+
"
\t
"
+
str
(
record
.
genotype
(
call
.
sample
)[
'DP'
])
+
"
\t
"
+
"
\t
"
.
join
(
str
(
nb
)
for
nb
in
record
.
genotype
(
call
.
sample
)[
'AD'
]))
except
:
#the genotype is 1/1 (homozygote for the alt allele) and we do not have got read depth in the observed cases
sys
.
stderr
.
write
(
"no AD "
+
record
.
CHROM
+
"
\t
"
+
str
(
record
.
POS
)
+
"
\n
"
)
try
:
lines2write
.
append
(
record
.
CHROM
+
"
\t
"
+
str
(
record
.
POS
)
+
"
\t
"
+
str
(
len
(
record
.
alleles
))
\
+
"
\t
"
+
str
(
record
.
genotype
(
call
.
sample
)[
'DP'
])
\
+
"
\t
"
+
str
(
record
.
genotype
(
call
.
sample
)[
'DP'
]))
except
:
#no read depth
sys
.
stderr
.
write
(
"no DP "
+
record
.
CHROM
+
"
\t
"
+
str
(
record
.
POS
)
+
"
\n
"
)
write_output
(
lines2write
,
maxNbAlleles
,
args
.
output
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment