Skip to content
Snippets Groups Projects
Commit 32c97e2e authored by Helene Rimbert's avatar Helene Rimbert
Browse files

new feature in micromapping: deal with target length between 2 mapped ISBP markers

parent 614c7080
No related branches found
No related tags found
No related merge requests found
......@@ -26,6 +26,8 @@ blatResults = defaultdict()
outputTable_file=outputDir+'/mappingSummary.csv'
outputBlatFile=outputDir+'/allBlat.csv'
mappingResults = defaultdict()
minTargetSize=500 # minimum size of the target genomique sequence: min 500bp
maxTargetSize=10000000 # maximum size of the target genomique sequence: max 1Mb
def main():
......@@ -79,6 +81,7 @@ def main():
markerCoords=defaultdict()
pairUsed = 'NA'
markerPairStatus = 0
targetLengthStatus = 'NA'
### check if markers are mapped on same target chromosome
for pair in markerPairs:
......@@ -167,7 +170,7 @@ def main():
This gene cannot be anchored
we move on to the next gene
"""
print(" Could not identify a pair of marker to use to anchor the gene {}.".format(gene))
sys.stderr.write(" Could not identify a pair of marker to use to anchor the gene {}".format(gene))
print(" going to next gene")
flankingMarkersPerGenes[gene]['markerPairStatus'] = 'noAnchorsOnTargetGenome'
flankingMarkersPerGenes[gene]['markerPairUsed'] = pairUsed
......@@ -223,25 +226,40 @@ def main():
"""
run Blat of the genomic sequence on the target region on new assembly
only if the target sequence matches the min and max size
============================================================================================================
"""
blatResultFile=runBlat(db=targetFasta2blast,
query=queryFasta2blast,
blatResult=workingDir+'/blat.txt',
outFormat='psl')
targetSeqLength = len(targetSeqRecord.seq)
if targetSeqLength < minTargetSize:
print(" The target genomic sequence is too small: {} inferior to {}".format(targetSeqLength, minTargetSize))
markerCoords['targetLengthStatus'] = 'tooSmall'
"""
check if the genomic align perfectly on the new reference
============================================================================================================
"""
blatresult=checkPerfectHit(blatresult=blatResultFile,
workingDir=workingDir,
maxhit=1)
elif targetSeqLength > maxTargetSize:
print(" The target genomic sequence is too long: {} supperior to {}".format(targetSeqLength, maxTargetSize))
markerCoords['targetLengthStatus'] = 'tooLong'
else:
markerCoords['targetLengthStatus'] = 'OK'
blatResultFile=runBlat(db=targetFasta2blast,
query=queryFasta2blast,
blatResult=workingDir+'/blat.txt',
outFormat='psl')
"""
check if the genomic align perfectly on the new reference
============================================================================================================
"""
blatresult=checkPerfectHit(blatresult=blatResultFile,
workingDir=workingDir,
maxhit=1)
if blatresult['blatStatus'] == 'unmapped':
print(" No BLAT results found for gene {}".format(gene))
outputSummaryLine = createOutputLine(blat=blatresult, gene=flankingMarkersPerGenes[gene], markers=markerCoords )
elif markerCoords['targetLengthStatus'] != 'OK':
print(" Target sequence for gene {} is too long/small: {}".format(gene,markerCoords['targetLengthStatus']))
outputSummaryLine = createOutputLine(blat=blatresult, gene=flankingMarkersPerGenes[gene], markers=markerCoords )
else:
print(" BLAT result found for gene {} with status {}".format(gene,blatresult['blatStatus']))
#print(blatresult)
......@@ -267,6 +285,7 @@ def createOutputLine(blat,gene,markers):
outputArray.append(str(value))
# loop over marker info
outputArray.append(str(markers['targetLengthStatus']))
upstreamMarkerInfo = [markers['upstream']['id'],
markers['upstream']['query']['chrom']+':'+str(markers['upstream']['query']['start'])+'-'+str(markers['upstream']['query']['stop']),
markers['upstream']['target']['chrom']+':'+str(markers['upstream']['target']['start'])+'-'+str(markers['upstream']['target']['stop'])]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment