Commit 0f83d035 authored by Christophe Klopp's avatar Christophe Klopp
Browse files

add new parameter to set source string value in gtf and gff second column to ease file merging

parent 4171d11b
......@@ -367,7 +367,7 @@ class Miniannotator:
return gene_id, gene_o
def write_gtf_file(self, gtf_file, genes):
def write_gtf_file(self, gtf_file, genes, source):
"""
Write genes, transcripts, exons and indels to a GTF file
......@@ -380,7 +380,7 @@ class Miniannotator:
with open(gtf_file, "w") as gtf:
for gene_id, gene in genes.items():
line = '{seqname}\tminiannotator\t{feature}\t{start}\t{end}\t.\t{strand}\t.\tgene_id "{gene_id}"' \
line = '{seqname}\t'+source+'\t{feature}\t{start}\t{end}\t.\t{strand}\t.\tgene_id "{gene_id}"' \
'{attrs}\n'
gtf.write(line.format(
seqname=gene["seqname"],
......@@ -430,7 +430,7 @@ class Miniannotator:
attrs=attrs_ex
))
def write_gff_file(self, gff_file, genes):
def write_gff_file(self, gff_file, genes, source):
"""
Write genes, transcripts, exons and indels to a GFF file
......@@ -443,7 +443,7 @@ class Miniannotator:
with open(gff_file, "w") as gff:
for gene_id, gene in genes.items():
line = '{seqname}\tminiannotator\t{feature}\t{start}\t{end}\t.\t{strand}\t.\t{attrs}\n'
line = '{seqname}\t'+source+'\t{feature}\t{start}\t{end}\t.\t{strand}\t.\t{attrs}\n'
attrs = 'ID={gene_id}'.format(gene_id=gene_id)
gff.write(line.format(
seqname=gene["seqname"],
......@@ -481,7 +481,7 @@ class Miniannotator:
def search_genes(self, gtf_file):
def search_genes(self, gtf_file, source):
"""
Parse BAM file to search genes and exons positions
Query match position on the reference defines gene position
......@@ -522,11 +522,11 @@ class Miniannotator:
full_genes_2 = OrderedDict(pool.map(Miniannotator._rename_exons, list(full_genes.items())))
print("Writing to GTF file...", flush=True)
self.write_gtf_file(gtf_file=gtf_file, genes=full_genes_2)
self.write_gtf_file(gtf_file=gtf_file, genes=full_genes_2, source=source)
print("Writing to GFF file...", flush=True)
gff_file = os.path.splitext(gtf_file)[0]+".gff"
self.write_gff_file(gff_file=gff_file, genes=full_genes_2)
self.write_gff_file(gff_file=gff_file, genes=full_genes_2, source=source)
if __name__ == "__main__":
import argparse
......@@ -539,6 +539,7 @@ if __name__ == "__main__":
parser.add_argument("-q", "--min-qoverlap", help="Minimal query overlap [0-100]", type=int, default=90,
required=False)
parser.add_argument("-o", "--output-dir", help="Output folder path", required=False, default=".")
parser.add_argument("-s", "--source", help="Source description string (second column in gtf or gff files)", required=False, default="miniannotator")
args = parser.parse_args()
......@@ -568,4 +569,4 @@ if __name__ == "__main__":
map=args.map)
# Search genes
annotator.search_genes(os.path.join(args.output_dir, "annotations.gtf"))
annotator.search_genes(os.path.join(args.output_dir, "annotations.gtf"), args.source)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment