Newer
Older
message: " mapping homologous regions of both references using ISBPs markers as anchors for chromosome {wildcards.chrom}"
input:
#closestMarkers=config['results']+'/2.mergedClosestMarkers.txt',
marker5prime=config['results']+'/2.closestbed/{chrom}/upstream.txt',
marker3prime=config['results']+'/2.closestbed/{chrom}/downstream.txt',
refQuery=config['queryFasta'],
faiQuery=config['queryFasta']+'.fai',
refTarget=config['targetFasta'],
faiTarget=config['targetFasta']+'.fai',
markersOnTarget=config['results']+"/1.filteredISBPs/{chrom}/sorted.bed"
output:
allblat=config['results']+'/3.mapping/{chrom}/allBlat.csv',

Helene Rimbert
committed
summary=config['results']+'/3.mapping/{chrom}/mappingSummary.csv',
mapping=temp(directory(config['results']+'/3.mapping/{chrom}/temp'))
params: dir=directory(config['results']+'/3.mapping/{chrom}')

Helene Rimbert
committed
log: config['results']+'/3.mapping/{chrom}/microMappingPipeline.log'
bin/microMappingPipeline.py {input.marker5prime} {input.marker3prime} {params.dir} {input.refQuery} {input.refTarget} {input.markersOnTarget} &> {log}
rule recalcBlatMapped:
message: " Recalc the coordinates of genes mapped with the Blat pipeline for chromosome {wildcards.chrom}"
input:
allBlat=config['results']+'/3.mapping/{chrom}/allBlat.csv',
summary=config['results']+'/3.mapping/{chrom}/mappingSummary.csv',

Helene Rimbert
committed
mapping=config['results']+'/3.mapping/{chrom}/temp',
refQuery=config['queryFasta'],
faiQuery=config['queryFasta']+'.fai',
refTarget=config['targetFasta'],
faiTarget=config['targetFasta']+'.fai',
refGff=config['annotationQuery']
output: gff=temp(config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget.gff3')
log: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget.log'
shell:
"""
bin/recalcCoordsFromBlat.py -b {input.allBlat} -s {input.summary} -o {output.gff} -q {input.refQuery} -t {input.refTarget} -g {input.refGff} &> {log}
"""
rule gtCleanBlatGff:
message: " Clean the gff file recalculated based on Blat fine mapping for chromosome {wildcards.chrom}"

Helene Rimbert
committed
input: gff=config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget.gff3',
mapping=config['results']+'/3.mapping/{chrom}/temp'
output: temp(config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.gff3')
log: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.log'
shell:

Helene Rimbert
committed
gt gff3 -fixregionboundaries -sort -tidy -retainids {input.gff} 1> {output} 2> {log}
message: " Rescue anchoring of failed genes with gmap for chrom {wildcards.chrom}"

Helene Rimbert
committed
input: blat=config['results']+'/3.mapping/{chrom}/allBlat.csv',
wholeGenomeFasta=config['targetFasta'],
mapping=config['results']+'/3.mapping/{chrom}/temp'
output: txt=config['results']+'/4.gmapRescue/{chrom}.txt',
targetgff=temp(config['results']+'/4.gmapRescue/{chrom}.target.gff3'),
wholegenomegff=temp(config['results']+'/4.gmapRescue/{chrom}.wholeGenome.gff3')
params: blastdb=config['blastdb'], anchoringDir=config['results']+'/3.mapping/{chrom}/', gmapIndexDir=config['results'], gmapindex="target_gmapindex"

Helene Rimbert
committed
log: config['results']+'/4.gmapRescue/{chrom}.log'
bin/gmapRescue.sh {params.anchoringDir} {params.blastdb} {params.gmapindex} {params.gmapIndexDir} {output.targetgff} {output.wholegenomegff} {input.wholeGenomeFasta} 1> {output.txt} 2> {log}

Helene Rimbert
committed
rule recalcGmapRescue:
message: "Recalc the coordinates of the GMAP on target GFF3 files for chromosome {wildcards.chrom}"
input: gff=config['results']+'/4.gmapRescue/{chrom}.target.gff3',

Helene Rimbert
committed
mapping=config['results']+'/3.mapping/{chrom}/temp'
output: temp(config['results']+'/4.recalcGmap/{chrom}/recalc.gff3')
params: anchoringDir=config['results']+'/3.mapping/{chrom}',
bindir=os.getcwd()
log: config['results']+'/4.recalcGmap/{chrom}/recalc.log'
shell:
"""

Helene Rimbert
committed
bin/convertGmapCoords.py -G {input.gff} -m 'GMAP_ON_TARGET' -o {output} &> {log}
"""
rule saveGmapWG:
message: "Save the GMAP results of genes mapped on Whole Genome"

Helene Rimbert
committed
input: gff=config['results']+'/4.gmapRescue/{chrom}.wholeGenome.gff3',

Helene Rimbert
committed
map=config['chromMapID'],
mapping=config['results']+'/3.mapping/{chrom}/temp'
output: gff=temp(config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome.gff3'),
gfferror=temp(config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome_differentChrom.gff3')
log: config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome.log'
shell:
"""

Helene Rimbert
committed
#cp {input} {output} &> {log}
bin/reformatGmapWGGFF.py -i {input.gff} -m {input.map} -o {output.gff} -e {output.gfferror} &> {log}
"""
rule mergeFinalGff3:
message: " Merge Final GFF3 files: blat.gff3, rescue.gff3 and wholeGenome.gff3"
input: blat=config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.gff3',
rescue=config['results']+'/4.recalcGmap/{chrom}/recalc.gff3',

Helene Rimbert
committed
wg=config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome.gff3',
mapping=config['results']+'/3.mapping/{chrom}/temp'
output: annot=temp(config['results']+'/5.FINAL/{chrom}/annotation.gff3')

Helene Rimbert
committed
log: annot=config['results']+'/5.FINAL/{chrom}/annotation.log'

Helene Rimbert
committed
gt gff3 -sort -fixregionboundaries -tidy -retainids {input.blat} {input.rescue} {input.wg} 1> {output.annot} 2> {log.annot}
"""
rule checkMissing:
message: ""
input: gff=config['results']+'/5.FINAL/{chrom}/annotation.gff3',
ref=config['results']+"/1.features/{chrom}.bed"
output: temp(config['results']+'/5.FINAL/{chrom}/missing.txt')
log: config['results']+'/5.FINAL/{chrom}/missing.log'
shell:
"""

Helene Rimbert
committed
bin/checkFinalAnnot.py -i {input.gff} -a {input.ref} -m {output} &> {log}
"""
rule concatAllChromResults:
message: " concat all per chromosome results"
input: annot=expand(config['results']+'/5.FINAL/{chrom}/annotation.gff3',chrom=config['chromosomes']),

Helene Rimbert
committed
differentChrom=expand(config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome_differentChrom.gff3',chrom=config['chromosomes']),
missing=expand(config['results']+'/5.FINAL/{chrom}/missing.txt', chrom=config['chromosomes'])
output: annot=temp(config['finalPrefix']+'tmp.gff3'),

Helene Rimbert
committed
diffChrom=config['finalPrefix']+'_gmap_differentChrom.gff3',
missing=config['finalPrefix']+'_missing.txt'
log: config['finalPrefix']+'.log'
shell:
"""

Helene Rimbert
committed
gt gff3 -fixregionboundaries -sort -tidy -retainids {input.annot} 1> {output.annot} 2> {log}
gt gff3 -fixregionboundaries -sort -tidy -retainids {input.differentChrom} 1> {output.diffChrom} 2>> {log}
cat {input.missing} 1> {output.missing} 2>> {log}
"""
rule renameGeneIds:
message: " set final gene IDs for the new annotation"
input: gff=config['finalPrefix']+'tmp.gff3',
map=config['chromMapID']
output: gffhc=config['finalPrefix']+'_HC.gff3',
gfflc=config['finalPrefix']+'_LC.gff3',
correspondance=config['finalPrefix']+'_IDMapping.csv',
overlap=config['finalPrefix']+'_overlap.bed'

Helene Rimbert
committed
params: prefix=config['gff_prefix'], version=config['gff_version'], source=config['gff_source']
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
log: config['finalPrefix']+'_IDMapping.log'
shell:
"""
bin/renameGffID.py -i {input.gff} -m {input.map} -o {output.gffhc} -l {output.gfflc} -c {output.correspondance} -p {params.prefix} -v {params.version} -s {params.source} -x {output.overlap} &> {log}
"""
rule generateFastaSequencesHC:
message: "generate fasta sequences using gffread for HC genes"
input: gff=config['finalPrefix']+'_HC.gff3',
fastaref=config['targetFasta']
output: mrna=config['finalPrefix']+'_HC.transcripts.fasta',
cds=config['finalPrefix']+'_HC.cds.fasta',
pep=config['finalPrefix']+'_HC.proteins.fasta'
log: config['finalPrefix']+'_HC.gffread.log'
shell:
"""
gffread {input.gff} -w {output.mrna} -x {output.cds} -y {output.pep} -g {input.fastaref} -W -O &> {log}
"""
rule generateFastaSequencesLC:
message: "generate fasta sequences using gffread for LC genes"
input:
gff=config['finalPrefix']+'_LC.gff3',
fastaref=config['targetFasta']
output: mrna=config['finalPrefix']+'_LC.transcripts.fasta',
cds=config['finalPrefix']+'_LC.cds.fasta',
pep=config['finalPrefix']+'_LC.proteins.fasta'
log: config['finalPrefix']+'_LC.gffread.log'

Helene Rimbert
committed
shell:
"""
gffread {input.gff} -w {output.mrna} -x {output.cds} -y {output.pep} -g {input.fastaref} -W -O &> {log}
"""

Helene Rimbert
committed
rule concatblatSummary:
message: " Concat all Blat summary"
input:blat=expand(config['results']+'/3.mapping/{chrom}/allBlat.csv', chrom=config['chromosomes'])
output: config['finalPrefix']+'_blatSummary.csv'
shell:
"""
cat {input.blat} |cut -f -3,5- 1> {output}
"""
rule concatAnchoringSummary:
message: " Concat all Anchoring summary"
input:anchoring=expand(config['results']+'/3.mapping/{chrom}/mappingSummary.csv', chrom=config['chromosomes'])
output: config['finalPrefix']+'_anchoringSummary.csv'
shell:
"""
cat {input.anchoring} |cut -f -4,5,8- 1> {output}
"""