Newer
Older
message: " mapping homologous regions of both references using ISBPs markers as anchors for chromosome {wildcards.chrom}"
input:
#closestMarkers=config['results']+'/2.mergedClosestMarkers.txt',

Helene Rimbert
committed
marker5prime=config['results']+'/2.closestbed_split/{chrom}.upstream.txt',
marker3prime=config['results']+'/2.closestbed_split/{chrom}.downstream.txt',
refQuery=config['queryFasta'],
faiQuery=config['queryFasta']+'.fai',
refTarget=config['targetFasta'],
faiTarget=config['targetFasta']+'.fai',

Helene Rimbert
committed
markersOnTarget=config['results']+"/1.filteredISBPs.sorted.{chrom}.bed"
output:
allblat=config['results']+'/3.mapping/{chrom}/allBlat.csv',
summary=config['results']+'/3.mapping/{chrom}/mappingSummary.csv'
params: dir=directory(config['results']+'/3.mapping/{chrom}')

Helene Rimbert
committed
log: config['results']+'/3.mapping/{chrom}/microMappingPipeline.log'
bin/microMappingPipeline.py {input.marker5prime} {input.marker3prime} {params.dir} {input.refQuery} {input.refTarget} {input.markersOnTarget} &> {log}
rule recalcBlatMapped:
message: " Recalc the coordinates of genes mapped with the Blat pipeline for chromosome {wildcards.chrom}"
input:
allBlat=config['results']+'/3.mapping/{chrom}/allBlat.csv',
summary=config['results']+'/3.mapping/{chrom}/mappingSummary.csv',
refQuery=config['queryFasta'],
faiQuery=config['queryFasta']+'.fai',
refTarget=config['targetFasta'],
faiTarget=config['targetFasta']+'.fai',
refGff=config['annotationQuery']
output: gff=temp(config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget.gff3')
log: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget.log'
shell:
"""
bin/recalcCoordsFromBlat.py -b {input.allBlat} -s {input.summary} -o {output.gff} -q {input.refQuery} -t {input.refTarget} -g {input.refGff} &> {log}
"""
rule gtCleanBlatGff:
message: " Clean the gff file recalculated based on Blat fine mapping for chromosome {wildcards.chrom}"
input: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget.gff3'
output: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.gff3'
log: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.log'
shell:

Helene Rimbert
committed
gt gff3 -fixregionboundaries -sort -tidy -retainids {input} 1> {output} 2> {log}
message: " Rescue anchoring of failed genes with gmap for chrom {wildcards.chrom}"
input: blat=config['results']+'/3.mapping/{chrom}/allBlat.csv', gmapindex=config['results']+"/target_gmapindex"
output: txt=config['results']+'/4.gmapRescue/{chrom}.txt',
targetgff=config['results']+'/4.gmapRescue/{chrom}.target.gff3',
wholegenomegff=config['results']+'/4.gmapRescue/{chrom}.wholeGenome.gff3'
params: blastdb=config['blastdb'], anchoringDir=config['results']+'/3.mapping/{chrom}/', gmapIndexDir=config['results']

Helene Rimbert
committed
log: config['results']+'/4.gmapRescue/{chrom}.log'
bin/gmapRescue.sh {params.anchoringDir} {params.blastdb} {input.gmapindex} {params.gmapIndexDir} {output.targetgff} {output.wholegenomegff} 1> {output.txt} 2> {log}

Helene Rimbert
committed
rule recalcGmapRescue:
message: "Recalc the coordinates of the GMAP on target GFF3 files for chromosome {wildcards.chrom}"
input: config['results']+'/4.gmapRescue/{chrom}.target.gff3',
output: config['results']+'/4.recalcGmap/{chrom}/recalc.gff3'
params: anchoringDir=config['results']+'/3.mapping/{chrom}',
bindir=os.getcwd()
log: config['results']+'/4.recalcGmap/{chrom}/recalc.log'
shell:
"""
bin/recalcGmapRescue.sh {params.anchoringDir} {input} {output} {params.bindir} &> {log}
"""
rule saveGmapWG:
message: "Save the GMAP results of genes mapped on Whole Genome"

Helene Rimbert
committed
input: gff=config['results']+'/4.gmapRescue/{chrom}.wholeGenome.gff3',
map=config['chromMapID']
output: gff=config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome.gff3',
gfferror=config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome_differentChrom.gff3'
log: config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome.log'
shell:
"""

Helene Rimbert
committed
#cp {input} {output} &> {log}
bin/reformatGmapWGGFF.py -i {input.gff} -m {input.map} -o {output.gff} -e {output.gfferror} &> {log}
"""
rule mergeFinalGff3:
message: " Merge Final GFF3 files: blat.gff3, rescue.gff3 and wholeGenome.gff3"
input: blat=config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.gff3',
rescue=config['results']+'/4.recalcGmap/{chrom}/recalc.gff3',
wg=config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome.gff3'

Helene Rimbert
committed
output: annot=config['results']+'/5.FINAL/{chrom}/annotation.gff3'
log: annot=config['results']+'/5.FINAL/{chrom}/annotation.log'

Helene Rimbert
committed
gt gff3 -sort -fixregionboundaries -tidy -retainids {input.blat} {input.rescue} {input.wg} 1> {output.annot} 2> {log.annot}
"""
rule checkMissing:
message: ""
input: gff=config['results']+'/5.FINAL/{chrom}/annotation.gff3',
ref=config['results']+"/1.features/{chrom}.bed"
output: config['results']+'/5.FINAL/{chrom}/missing.txt'
log: config['results']+'/5.FINAL/{chrom}/missing.log'
shell:
"""

Helene Rimbert
committed
bin/checkFinalAnnot.py -i {input.gff} -a {input.ref} -m {output} &> {log}
"""
rule concatAllChromResults:
message: " concat all per chromosome results"
input: annot=expand(config['results']+'/5.FINAL/{chrom}/annotation.gff3',chrom=config['chromosomes']),

Helene Rimbert
committed
differentChrom=expand(config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome_differentChrom.gff3',chrom=config['chromosomes']),
missing=expand(config['results']+'/5.FINAL/{chrom}/missing.txt', chrom=config['chromosomes'])
output: annot=temp(config['finalPrefix']+'tmp.gff3'),

Helene Rimbert
committed
diffChrom=config['finalPrefix']+'_gmap_differentChrom.gff3',
missing=config['finalPrefix']+'_missing.txt'
log: config['finalPrefix']+'.log'
shell:
"""

Helene Rimbert
committed
gt gff3 -fixregionboundaries -sort -tidy -retainids {input.annot} 1> {output.annot} 2> {log}
gt gff3 -fixregionboundaries -sort -tidy -retainids {input.differentChrom} 1> {output.diffChrom} 2>> {log}
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
cat {input.missing} 1> {output.missing} 2>> {log}
"""
rule renameGeneIds:
message: " set final gene IDs for the new annotation"
input: gff=config['finalPrefix']+'tmp.gff3',
map=config['chromMapID']
output: gffhc=config['finalPrefix']+'_HC.gff3',
gfflc=config['finalPrefix']+'_LC.gff3',
correspondance=config['finalPrefix']+'_IDMapping.csv',
overlap=config['finalPrefix']+'_overlap.bed'
params: prefix='TraesCS', version='03G', source='IWGSC_v2.1'
log: config['finalPrefix']+'_IDMapping.log'
shell:
"""
bin/renameGffID.py -i {input.gff} -m {input.map} -o {output.gffhc} -l {output.gfflc} -c {output.correspondance} -p {params.prefix} -v {params.version} -s {params.source} -x {output.overlap} &> {log}
"""
rule generateFastaSequencesHC:
message: "generate fasta sequences using gffread for HC genes"
input: gff=config['finalPrefix']+'_HC.gff3',
fastaref=config['targetFasta']
output: mrna=config['finalPrefix']+'_HC.transcripts.fasta',
cds=config['finalPrefix']+'_HC.cds.fasta',
pep=config['finalPrefix']+'_HC.proteins.fasta'
log: config['finalPrefix']+'_HC.gffread.log'
shell:
"""
gffread {input.gff} -w {output.mrna} -x {output.cds} -y {output.pep} -g {input.fastaref} -W -O &> {log}
"""
rule generateFastaSequencesLC:
message: "generate fasta sequences using gffread for LC genes"
input:
gff=config['finalPrefix']+'_LC.gff3',
fastaref=config['targetFasta']
output: mrna=config['finalPrefix']+'_LC.transcripts.fasta',
cds=config['finalPrefix']+'_LC.cds.fasta',
pep=config['finalPrefix']+'_LC.proteins.fasta'
log: config['finalPrefix']+'_LC.gffread.log'

Helene Rimbert
committed
shell:
"""
gffread {input.gff} -w {output.mrna} -x {output.cds} -y {output.pep} -g {input.fastaref} -W -O &> {log}
"""