Skip to content
Snippets Groups Projects
Commit 62df6545 authored by Helene Rimbert's avatar Helene Rimbert
Browse files

correction of gff attribute modification in bin/reformatGmapWGGFF.py

parent fade4647
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
#!/usr/bin/env python3.5
# coding: utf-8
import os.path
from os import path
......@@ -23,6 +23,7 @@ class reformatGmapGff (object):
def main(self):
self.getOptions()
self.checkInputs()
self.regexlcl=re.compile(r'lcl\|')
# load inputs
self.loadChromosomeMap()
......@@ -74,6 +75,9 @@ class reformatGmapGff (object):
print("\t".join(map(str, feature)), file=self.errorFH)
def reformatGff(self):
lastmrna=''
featureindex={'CDS':1, 'exon':1}
with open(self.options.input) as mygff:
for line in mygff.readlines():
if not line.startswith('#'):
......@@ -84,6 +88,7 @@ class reformatGmapGff (object):
if featuretype == 'gene':
# get only the gene name because here the ID corresponds to the mrna isoform which has been mapped
geneId=featureId.split('.')[0]
lastmrna=''
if geneId not in self.knownGenes:
self.knownGenes.append(geneId)
......@@ -100,11 +105,13 @@ class reformatGmapGff (object):
'attributes': gff_array[8]}
elif featuretype == 'mRNA':
mrna=self.getFeatureAttribute(gff=line, attribute='ID')
mrna=self.getFeatureAttribute(gff=line, attribute='Name')
lastmrna=mrna
featureindex={'CDS':1, 'exon':1}
self.knownMrna.append(mrna)
parent=mrna.split('.')[0]
self.geneIsoforms[parent].append(mrna)
gff_array[8] = self.setGffAttributes(attributes=gff_array[8], new={'Parent': parent})
gff_array[8] = self.setGffAttributes(attributes=gff_array[8], new={'Parent': parent, 'ID':mrna, 'Name':mrna})
self.mrnaFeatures[mrna]=[gff_array]
# check if this new mrna has start or stop different from the gene feature
......@@ -118,9 +125,13 @@ class reformatGmapGff (object):
sys.stderr.write(" New gene length: {}\n".format(int(gff_array[4])-int(gff_array[3])+1))
else:
featureId=self.getFeatureAttribute(gff=line, attribute='ID')
featureParent=self.getFeatureAttribute(gff=line, attribute='Parent')
self.mrnaFeatures[featureParent].append(line.rstrip('\n').split('\t'))
featureId=lastmrna+'.'+featuretype.lower()+str(featureindex[featuretype])
featureParent=lastmrna
gff_array[8] = self.setGffAttributes(attributes=gff_array[8], new={'ID':featureId, 'Parent': featureParent, 'Name':featureId})
self.mrnaFeatures[featureParent].append(gff_array)
featureindex[featuretype]+=1
numgenes=len(self.knownGenes)
nummrna=len(self.knownMrna)
......@@ -154,6 +165,8 @@ class reformatGmapGff (object):
(key, val) = attr.split('=')
attr_dict[key] = val
if attribute in attr_dict.keys():
if self.regexlcl.search(attr_dict[attribute]) and attribute in ['ID', 'Parent', 'Name']:
attr_dict[attribute] = self.regexlcl.sub('',attr_dict[attribute])
return attr_dict[attribute]
else:
sys.stderr.write(' ERROR: no attribute {} in gff record {}'.format(attribute, gff))
......@@ -196,4 +209,4 @@ class reformatGmapGff (object):
if __name__== "__main__":
run=reformatGmapGff()
run.main()
\ No newline at end of file
run.main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment