Newer
Older
message: " mapping homologous regions of both references using ISBPs markers as anchors for chromosome {wildcards.chrom}"
input:
#closestMarkers=config['results']+'/2.mergedClosestMarkers.txt',

Helene Rimbert
committed
marker5prime=config['results']+'/2.closestbed_split/{chrom}.upstream.txt',
marker3prime=config['results']+'/2.closestbed_split/{chrom}.downstream.txt',
refQuery=config['queryFasta'],
faiQuery=config['queryFasta']+'.fai',
refTarget=config['targetFasta'],
faiTarget=config['targetFasta']+'.fai',

Helene Rimbert
committed
markersOnTarget=config['results']+"/1.filteredISBPs.sorted.{chrom}.bed"
output:
allblat=config['results']+'/3.mapping/{chrom}/allBlat.csv',
summary=config['results']+'/3.mapping/{chrom}/mappingSummary.csv'
params: dir=directory(config['results']+'/3.mapping/{chrom}')

Helene Rimbert
committed
log: config['results']+'/3.mapping/{chrom}/microMappingPipeline.log'
bin/microMappingPipeline.py {input.marker5prime} {input.marker3prime} {params.dir} {input.refQuery} {input.refTarget} {input.markersOnTarget} &> {log}
rule recalcBlatMapped:
message: " Recalc the coordinates of genes mapped with the Blat pipeline for chromosome {wildcards.chrom}"
input:
allBlat=config['results']+'/3.mapping/{chrom}/allBlat.csv',
summary=config['results']+'/3.mapping/{chrom}/mappingSummary.csv',
refQuery=config['queryFasta'],
faiQuery=config['queryFasta']+'.fai',
refTarget=config['targetFasta'],
faiTarget=config['targetFasta']+'.fai',
refGff=config['annotationQuery']
output: gff=temp(config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget.gff3')
log: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget.log'
shell:
"""
bin/recalcCoordsFromBlat.py -b {input.allBlat} -s {input.summary} -o {output.gff} -q {input.refQuery} -t {input.refTarget} -g {input.refGff} &> {log}
"""
rule gtCleanBlatGff:
message: " Clean the gff file recalculated based on Blat fine mapping for chromosome {wildcards.chrom}"
input: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget.gff3'
output: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.gff3'
log: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.log'
shell:
gt gff3 -sort -tidy -retainids {input} 1> {output} 2> {log}
message: " Rescue anchoring of failed genes with gmap for chrom {wildcards.chrom}"
input: blat=config['results']+'/3.mapping/{chrom}/allBlat.csv', gmapindex=config['results']+"/target_gmapindex"
output: txt=config['results']+'/4.gmapRescue/{chrom}.txt',
targetgff=config['results']+'/4.gmapRescue/{chrom}.target.gff3',
wholegenomegff=config['results']+'/4.gmapRescue/{chrom}.wholeGenome.gff3'
params: blastdb=config['blastdb'], anchoringDir=config['results']+'/3.mapping/{chrom}/', gmapIndexDir=config['results']

Helene Rimbert
committed
log: config['results']+'/4.gmapRescue/{chrom}.log'
bin/gmapRescue.sh {params.anchoringDir} {params.blastdb} {input.gmapindex} {params.gmapIndexDir} {output.targetgff} {output.wholegenomegff} 1> {output.txt} 2> {log}

Helene Rimbert
committed
rule recalcGmapRescue:
message: "Recalc the coordinates of the GMAP on target GFF3 files for chromosome {wildcards.chrom}"
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
input: config['results']+'/4.gmapRescue/{chrom}.target.gff3',
output: config['results']+'/4.recalcGmap/{chrom}/recalc.gff3'
params: anchoringDir=config['results']+'/3.mapping/{chrom}',
bindir=os.getcwd()
log: config['results']+'/4.recalcGmap/{chrom}/recalc.log'
shell:
"""
bin/recalcGmapRescue.sh {params.anchoringDir} {input} {output} {params.bindir} &> {log}
"""
rule saveGmapWG:
message: "Save the GMAP results of genes mapped on Whole Genome"
input: config['results']+'/4.gmapRescue/{chrom}.wholeGenome.gff3'
output: config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome.gff3'
log: config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome.log'
shell:
"""
cp {input} {output} &> {log}
"""
rule mergeFinalGff3:
message: " Merge Final GFF3 files: blat.gff3, rescue.gff3 and wholeGenome.gff3"
input: blat=config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.gff3',
rescue=config['results']+'/4.recalcGmap/{chrom}/recalc.gff3',
wg=config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome.gff3'
output: annot=config['results']+'/5.FINAL/{chrom}/annotation.gff3', gmapWGOnly=config['results']+'/5.FINAL/{chrom}/missing-gmap.gff3'
log: annot=config['results']+'/5.FINAL/{chrom}/annotation.log', gmap=config['results']+'/5.FINAL/{chrom}/missing-gmap.log'
shell:
"""
gt gff3 -sort -tidy -retainids {input.blat} {input.rescue} 1> {output.annot} 2> {log.annot}
gt gff3 -sort -tidy -retainids {input.wg} 1> {output.gmapWGOnly} 2> {log.gmap}
"""
rule checkMissing:
message: ""
input: gff=config['results']+'/5.FINAL/{chrom}/annotation.gff3',
gmap=config['results']+'/5.FINAL/{chrom}/missing-gmap.gff3',
ref=config['results']+"/1.features/{chrom}.bed"
output: config['results']+'/5.FINAL/{chrom}/missing.txt'
log: config['results']+'/5.FINAL/{chrom}/missing.log'
shell:
"""
bin/checkFinalAnnot.py -i {input.gff} -a {input.ref} -g {input.gmap} -m {output} &> {log}
"""
rule concatAllChromResults:
message: " concat all per chromosome results"
input: annot=expand(config['results']+'/5.FINAL/{chrom}/annotation.gff3',chrom=config['chromosomes']),
rescue=expand(config['results']+'/5.FINAL/{chrom}/missing-gmap.gff3',chrom=config['chromosomes']),
missing=expand(config['results']+'/5.FINAL/{chrom}/missing.txt', chrom=config['chromosomes'])
output: annot=temp(config['finalPrefix']+'tmp.gff3'),
rescue=config['finalPrefix']+'_rescue.gff3',
missing=config['finalPrefix']+'_missing.txt'
log: config['finalPrefix']+'.log'
shell:
"""
gt gff3 -sort -tidy -retainids {input.annot} 1> {output.annot} 2> {log}
gt gff3 -sort -tidy -retainids {input.rescue} 1> {output.rescue} 2> {log}
cat {input.missing} 1> {output.missing} 2>> {log}
"""
rule renameGeneIds:
message: " set final gene IDs for the new annotation"
input: gff=config['finalPrefix']+'tmp.gff3',
map=config['chromMapID']
output: gffhc=config['finalPrefix']+'_HC.gff3',
gfflc=config['finalPrefix']+'_LC.gff3',
correspondance=config['finalPrefix']+'_IDMapping.csv',
overlap=config['finalPrefix']+'_overlap.bed'
params: prefix='TraesCS', version='03G', source='IWGSC_v2.1'
log: config['finalPrefix']+'_IDMapping.log'
shell:
"""
bin/renameGffID.py -i {input.gff} -m {input.map} -o {output.gffhc} -l {output.gfflc} -c {output.correspondance} -p {params.prefix} -v {params.version} -s {params.source} -x {output.overlap} &> {log}
"""
rule generateFastaSequencesHC:
message: "generate fasta sequences using gffread for HC genes"
input: gff=config['finalPrefix']+'_HC.gff3',
fastaref=config['targetFasta']
output: mrna=config['finalPrefix']+'_HC.transcripts.fasta',
cds=config['finalPrefix']+'_HC.cds.fasta',
pep=config['finalPrefix']+'_HC.proteins.fasta'
log: config['finalPrefix']+'_HC.gffread.log'
shell:
"""
gffread {input.gff} -w {output.mrna} -x {output.cds} -y {output.pep} -g {input.fastaref} -W -O &> {log}
"""
rule generateFastaSequencesLC:
message: "generate fasta sequences using gffread for LC genes"
input:
gff=config['finalPrefix']+'_LC.gff3',
fastaref=config['targetFasta']
output: mrna=config['finalPrefix']+'_LC.transcripts.fasta',
cds=config['finalPrefix']+'_LC.cds.fasta',
pep=config['finalPrefix']+'_LC.proteins.fasta'
log: config['finalPrefix']+'_LC.gffread.log'

Helene Rimbert
committed
shell:
"""
gffread {input.gff} -w {output.mrna} -x {output.cds} -y {output.pep} -g {input.fastaref} -W -O &> {log}
"""
rule generateFastaSequencesRescue:
message: "generate fasta sequences using gffread for rescue genes"
input:
gff=config['finalPrefix']+'_rescue.gff3',
fastaref=config['targetFasta']
output: mrna=config['finalPrefix']+'_rescue.transcripts.fasta',
cds=config['finalPrefix']+'_rescue.cds.fasta',
pep=config['finalPrefix']+'_rescue.proteins.fasta'
log: config['finalPrefix']+'_rescue.gffread.log'
shell:

Helene Rimbert
committed
"""
gffread {input.gff} -w {output.mrna} -x {output.cds} -y {output.pep} -g {input.fastaref} -W -O &> {log}
"""