Skip to content
Snippets Groups Projects
Commit 13be85a0 authored by Helene Rimbert's avatar Helene Rimbert
Browse files

Final version of blatMapping pipeline

parent 73118bff
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
......@@ -5,4 +5,4 @@ targetFasta: 'data/target.fasta'
isbpBam: 'data/ISBP_refseqv2mapping.bam'
isbpBed: 'data/ISBP_refseqv1.bed'
results: 'results'
blastdb: 'data/query_mrna_blastdb'
......@@ -8,32 +8,45 @@ rule selectMappedISBP:
cut -f 4 {input.mapped}|fgrep -wf - {input.original} |sort -k1,1 -k2,2n 1> {output} 2> {log}
"""
rule keepMappedOnSameChrom:
message: " Select Mapped ISBPs on same chromosome (or on unknown chromosome)"
input: isbpOnQuery=config['results']+'/2.mappedISBPs_coordsOnQuery.bed', isbpOnTarget=config['results']+"/1.filteredISBPs.sorted.bed"
output: config['results']+'/2.isbpMappedOnSameChrom.bed'
log: config['results']+'/2.isbpMappedOnSameChrom.log'
shell:
"""
join -1 4 -2 4 <(sort -k4,4 {input.isbpOnQuery}) <(sort -k4,4 {input.isbpOnTarget})|sed "s/ /\t/g"|awk "{{if (tolower(\$2) == tolower(\$5)) {{print \$2,\$3,\$4,\$1}} }}" |sort -k1,1 -k2,2n |sed "s/ /\t/g"> {output}
join -1 4 -2 4 <(sort -k4,4 {input.isbpOnQuery}) <(sort -k4,4 {input.isbpOnTarget})|sed "s/ /\t/g"|fgrep -i chrun | awk "{{print \$2,\$3,\$4,\$1}}" |sed "s/ /\t/g">> {output}
sort -k1,1 -k2,2n {output} > toto && mv toto {output}
"""
rule upstreamClosest:
message: " Collect closest marker upstream of genes"
input: annot=config['results']+"/1.features.bed", markers=config['results']+'/2.mappedISBPs_coordsOnQuery.bed'
input: annot=config['results']+"/1.features.bed", markers=config['results']+'/2.isbpMappedOnSameChrom.bed'
output: temp(config['results']+'/2.closestbedUpstream.txt')
log: config['results']+"/2.closestbedUpstream.log"
shell:
"""
bedtools closest -id -D ref -io -a {input.annot} -b {input.markers} 1> {output} 2> {log}
bedtools closest -k 5 -id -D ref -io -a {input.annot} -b {input.markers} 1> {output} 2> {log}
"""
rule downstreamClosest:
message: " Collect Downstream marker upstream of genes"
input: annot=config['results']+"/1.features.bed", markers=config['results']+'/2.mappedISBPs_coordsOnQuery.bed'
message: " Collect Downstream marker downstream of genes"
input: annot=config['results']+"/1.features.bed", markers=config['results']+'/2.isbpMappedOnSameChrom.bed'
output: temp(config['results']+'/2.downstreamClosest.txt')
log: config['results']+"/2.downstreamClosest.log"
shell:
"""
bedtools closest -iu -D ref -io -a {input.annot} -b {input.markers} 1> {output} 2> {log}
bedtools closest -k 5 -iu -D ref -io -a {input.annot} -b {input.markers} 1> {output} 2> {log}
"""
rule mergeClosest:
message: " Merge both upstream and downstream results"
input: up=config['results']+'/2.closestbedUpstream.txt', down=config['results']+'/2.downstreamClosest.txt'
output: config['results']+'/2.mergedClosestMarkers.txt'
log: config['results']+'/2.mergedClosestMarkers.log'
shell:
"""
join -1 4 -2 4 <(sort -k4,4 {input.up}) <(sort -k4,4 {input.down})|sed "s/ /\t/g"|cut -f -11,17- |sort -k2,2 -k3,3n |awk "{{size=\$14-\$8;print \$0,size}}"|sed "s/ /\t/g" 1> {output} 2> {log}
"""
\ No newline at end of file
# rule mergeClosest:
# message: " Merge both upstream and downstream results"
# input: up=config['results']+'/2.closestbedUpstream.txt', down=config['results']+'/2.downstreamClosest.txt'
# output: config['results']+'/2.mergedClosestMarkers.txt'
# log: config['results']+'/2.mergedClosestMarkers.log'
# shell:
# """
# join -1 4 -2 4 <(sort -k4,4 {input.up}) <(sort -k4,4 {input.down})|sed "s/ /\t/g"|cut -f -11,17- |sort -k2,2 -k3,3n |awk "{{size=\$14-\$8;print \$0,size}}"|sed "s/ /\t/g" 1> {output} 2> {log}
# """
\ No newline at end of file
rule mapHomologousRegions:
message: " mapping homologous regions of both references using ISBPs markers as anchors"
input: closestMarkers=config['results']+'/2.mergedClosestMarkers.txt',
input:
#closestMarkers=config['results']+'/2.mergedClosestMarkers.txt',
marker5prime=config['results']+'/2.closestbedUpstream.txt',
marker3prime=config['results']+'/2.downstreamClosest.txt',
refQuery=config['queryFasta'],
faiQuery=config['queryFasta']+'.fai',
refTarget=config['targetFasta'],
......@@ -10,5 +13,16 @@ rule mapHomologousRegions:
log: config['results']+'/3.mapping.log'
shell:
"""
bin/microMappingPipeline.py {input.closestMarkers} {output} {input.refQuery} {input.refTarget} {input.markersOnTarget} {markersIds} &> {log}
"""
\ No newline at end of file
echo bin/microMappingPipeline.py {input.marker5prime} {input.marker3prime} {output} {input.refQuery} {input.refTarget} {input.markersOnTarget} &> {log}
"""
rule gmapRescue:
message: " Rescue anchoring of failed genes with gmap"
input: anchoringDir=directory(config['results']+'/3.mapping'),
blastdb=config['blastdb']
output: config['results']+'/4.gmapRescue.txt'
log: config['results']+'/4.gmapRescue.log'
shell:
"""
bin/gmapRescue.sh {input.anchoringDir} {input.blastdb} 1> {output} 2> {log}
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment