Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
genotoul-bioinfo
miniannotator
Commits
0f83d035
Commit
0f83d035
authored
Sep 05, 2018
by
Christophe Klopp
Browse files
add new parameter to set source string value in gtf and gff second column to ease file merging
parent
4171d11b
Changes
1
Hide whitespace changes
Inline
Side-by-side
miniannotator.py
View file @
0f83d035
...
...
@@ -367,7 +367,7 @@ class Miniannotator:
return
gene_id
,
gene_o
def
write_gtf_file
(
self
,
gtf_file
,
genes
):
def
write_gtf_file
(
self
,
gtf_file
,
genes
,
source
):
"""
Write genes, transcripts, exons and indels to a GTF file
...
...
@@ -380,7 +380,7 @@ class Miniannotator:
with
open
(
gtf_file
,
"w"
)
as
gtf
:
for
gene_id
,
gene
in
genes
.
items
():
line
=
'{seqname}
\t
miniannotator
\t
{feature}
\t
{start}
\t
{end}
\t
.
\t
{strand}
\t
.
\t
gene_id "{gene_id}"'
\
line
=
'{seqname}
\t
'
+
source
+
'
\t
{feature}
\t
{start}
\t
{end}
\t
.
\t
{strand}
\t
.
\t
gene_id "{gene_id}"'
\
'{attrs}
\n
'
gtf
.
write
(
line
.
format
(
seqname
=
gene
[
"seqname"
],
...
...
@@ -430,7 +430,7 @@ class Miniannotator:
attrs
=
attrs_ex
))
def
write_gff_file
(
self
,
gff_file
,
genes
):
def
write_gff_file
(
self
,
gff_file
,
genes
,
source
):
"""
Write genes, transcripts, exons and indels to a GFF file
...
...
@@ -443,7 +443,7 @@ class Miniannotator:
with
open
(
gff_file
,
"w"
)
as
gff
:
for
gene_id
,
gene
in
genes
.
items
():
line
=
'{seqname}
\t
miniannotator
\t
{feature}
\t
{start}
\t
{end}
\t
.
\t
{strand}
\t
.
\t
{attrs}
\n
'
line
=
'{seqname}
\t
'
+
source
+
'
\t
{feature}
\t
{start}
\t
{end}
\t
.
\t
{strand}
\t
.
\t
{attrs}
\n
'
attrs
=
'ID={gene_id}'
.
format
(
gene_id
=
gene_id
)
gff
.
write
(
line
.
format
(
seqname
=
gene
[
"seqname"
],
...
...
@@ -481,7 +481,7 @@ class Miniannotator:
def
search_genes
(
self
,
gtf_file
):
def
search_genes
(
self
,
gtf_file
,
source
):
"""
Parse BAM file to search genes and exons positions
Query match position on the reference defines gene position
...
...
@@ -522,11 +522,11 @@ class Miniannotator:
full_genes_2
=
OrderedDict
(
pool
.
map
(
Miniannotator
.
_rename_exons
,
list
(
full_genes
.
items
())))
print
(
"Writing to GTF file..."
,
flush
=
True
)
self
.
write_gtf_file
(
gtf_file
=
gtf_file
,
genes
=
full_genes_2
)
self
.
write_gtf_file
(
gtf_file
=
gtf_file
,
genes
=
full_genes_2
,
source
=
source
)
print
(
"Writing to GFF file..."
,
flush
=
True
)
gff_file
=
os
.
path
.
splitext
(
gtf_file
)[
0
]
+
".gff"
self
.
write_gff_file
(
gff_file
=
gff_file
,
genes
=
full_genes_2
)
self
.
write_gff_file
(
gff_file
=
gff_file
,
genes
=
full_genes_2
,
source
=
source
)
if
__name__
==
"__main__"
:
import
argparse
...
...
@@ -539,6 +539,7 @@ if __name__ == "__main__":
parser
.
add_argument
(
"-q"
,
"--min-qoverlap"
,
help
=
"Minimal query overlap [0-100]"
,
type
=
int
,
default
=
90
,
required
=
False
)
parser
.
add_argument
(
"-o"
,
"--output-dir"
,
help
=
"Output folder path"
,
required
=
False
,
default
=
"."
)
parser
.
add_argument
(
"-s"
,
"--source"
,
help
=
"Source description string (second column in gtf or gff files)"
,
required
=
False
,
default
=
"miniannotator"
)
args
=
parser
.
parse_args
()
...
...
@@ -568,4 +569,4 @@ if __name__ == "__main__":
map
=
args
.
map
)
# Search genes
annotator
.
search_genes
(
os
.
path
.
join
(
args
.
output_dir
,
"annotations.gtf"
))
annotator
.
search_genes
(
os
.
path
.
join
(
args
.
output_dir
,
"annotations.gtf"
)
,
args
.
source
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment