Skip to content
Snippets Groups Projects
Commit 93a7ae2f authored by Celine Noirot's avatar Celine Noirot
Browse files

Try to handle species in refseq database.

parent c44ea033
No related branches found
No related tags found
No related merge requests found
......@@ -62,7 +62,7 @@ def spFromHit( alignment ):
@param alignment : [Bio::blast::Record::Alignment] the alignment.
"""
species = None
if re.match(".*OS=.*", alignment.hit_def) is not None:
if re.match(".*OS=.*", alignment.hit_def) is not None: # refseq_prot, swissprot
try:
matches = re.search("OS=([^=]+)", alignment.hit_def)
if matches is not None:
......@@ -73,16 +73,30 @@ def spFromHit( alignment ):
species = clean_matches.group(1)
except:
pass
else :
try:
matches = re.search('\[([^\]]+)\]', alignment.hit_def)
species = matches.group(1)
except:
species = None
pass
else :
#refseq_genomic >gi|76496352|ref|NT_165333.1|NT_165333 Mus musculus genomic sequence, ENCODE region ENm001
#refseq_rna >gi|31341369|ref|NM_176670.2| Bos taurus ATP synthase, H+ transporting, mitochondrial F1 complex, delta subunit (ATP5D), mRNA
#refseq_prot >gi|66818355|ref|XP_642837.1| hypothetical protein DDB_G0276911 [Dictyostelium discoideum AX4]
if re.match("gi\|\d+\|ref\|\S+\|", alignment.hit_id) is not None: #if refseq
try:
matches = re.search('\[([^\]]+)\]', alignment.hit_def) #si refseq_prot espece entre []
species = matches.group(1)
except: # refseq_genomic ou refseq_rna
matches = re.search("gi\|\d+\|ref\|\S+\|\s(\S+\s\S+)", alignment.hit_def)
if matches is not None:
species = matches.group(1)
else :
try:
matches = re.search('\[([^\]]+)\]', alignment.hit_def)
species = matches.group(1)
except:
species = None
pass
# Others databases are not implemented
return species
def geneNameFromHit( alignment, gene_name_from_accession ):
"""
@summary : Returns the subject's name.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment