Skip to content
Snippets Groups Projects
Commit d2335b52 authored by Helene Rimbert's avatar Helene Rimbert
Browse files

IMPROVE: add conda pragma into all rules and docler path for use with --singularity option

parent 40c5249f
No related branches found
No related tags found
1 merge request!7Missing conda rules
configfile: "config.yaml"
container: "docker://continuumio/miniconda3:4.4.10"
rule all:
input:
config['finalPrefix']+'_HC.gff3',
......
File moved
rule selectMappedISBP:
message: " Select only mapped ISBPS on new refseq"
conda: "envs/environment.yml"
input: mapped=config['results']+'/1.filteredISBPs/{chrom}/sorted.bed',
original=config['isbpBed']
output: config['results']+'/2.mappedISBPs/{chrom}/coordsOnQuery.bed'
......@@ -12,6 +13,7 @@ rule selectMappedISBP:
rule keepMappedOnSameChrom:
message: " Select Mapped ISBPs on same chromosome (or on unknown chromosome)"
conda: "envs/environment.yml"
input: isbpOnQuery=config['results']+'/2.mappedISBPs/{chrom}/coordsOnQuery.bed',
isbpOnTarget=config['results']+'/1.filteredISBPs/{chrom}/sorted.bed',
output: config['results']+'/2.mappedISBPs/{chrom}/OnSameChrom.bed'
......@@ -23,6 +25,7 @@ rule keepMappedOnSameChrom:
rule upstreamClosest:
message: " Collect closest marker upstream of genes"
conda: "envs/environment.yml"
input: annot=config['results']+"/1.features/{chrom}.bed",
markers=config['results']+'/2.mappedISBPs/{chrom}/OnSameChrom.bed'
output: config['results']+'/2.closestbed/{chrom}/upstream.txt'
......@@ -34,6 +37,7 @@ rule upstreamClosest:
rule downstreamClosest:
message: " Collect Downstream marker downstream of genes"
conda: "envs/environment.yml"
input: annot=config['results']+"/1.features/{chrom}.bed",
markers=config['results']+'/2.mappedISBPs/{chrom}/OnSameChrom.bed'
output: config['results']+'/2.closestbed/{chrom}/downstream.txt'
......@@ -45,6 +49,7 @@ rule downstreamClosest:
# rule splitPerChrom:
# message: "Split data per chromosome"
conda: "envs/environment.yml"
# input:
# upstream=config['results']+'/2.closestbed/{chrom}/upstream.txt',
# downstream=config['results']+'/2.closestbed/{chrom}/downstream.txt'
......
rule validateCdsHC:
message: " check CDS integrity for HC genes"
conda: "envs/environment.yml"
input: config['finalPrefix']+'_HC.cds.fasta'
output: fasta=config['finalPrefix']+'_HC.cds.valid.fasta',
csv=config['finalPrefix']+'_HC.cds.valid.explained.txt'
......@@ -10,6 +11,7 @@ rule validateCdsHC:
rule validateCdsLC:
message: " check CDS integrity for LC genes"
conda: "envs/environment.yml"
input: config['finalPrefix']+'_LC.cds.fasta'
output: fasta=config['finalPrefix']+'_LC.cds.valid.fasta',
csv=config['finalPrefix']+'_LC.cds.valid.explained.txt'
......
rule mapHomologousRegions:
message: " mapping homologous regions of both references using ISBPs markers as anchors for chromosome {wildcards.chrom}"
conda: "envs/environment.yml"
input:
#closestMarkers=config['results']+'/2.mergedClosestMarkers.txt',
marker5prime=config['results']+'/2.closestbed/{chrom}/upstream.txt',
......@@ -22,6 +23,7 @@ rule mapHomologousRegions:
rule recalcBlatMapped:
message: " Recalc the coordinates of genes mapped with the Blat pipeline for chromosome {wildcards.chrom}"
conda: "envs/environment.yml"
input:
allBlat=config['results']+'/3.mapping/{chrom}/allBlat.csv',
summary=config['results']+'/3.mapping/{chrom}/mappingSummary.csv',
......@@ -41,6 +43,7 @@ rule recalcBlatMapped:
rule gtCleanBlatGff:
message: " Clean the gff file recalculated based on Blat fine mapping for chromosome {wildcards.chrom}"
conda: "envs/environment.yml"
input: gff=config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget.gff3',
mapping=config['results']+'/3.mapping/{chrom}/temp'
output: config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.gff3'
......@@ -52,6 +55,7 @@ rule gtCleanBlatGff:
rule gmapRescue:
message: " Rescue anchoring of failed genes with gmap for chrom {wildcards.chrom}"
conda: "envs/environment.yml"
input: blat=config['results']+'/3.mapping/{chrom}/allBlat.csv',
wholeGenomeFasta=config['targetFasta'],
mapping=config['results']+'/3.mapping/{chrom}/temp'
......@@ -71,6 +75,7 @@ rule gmapRescue:
rule recalcGmapRescue:
message: "Recalc the coordinates of the GMAP on target GFF3 files for chromosome {wildcards.chrom}"
conda: "envs/environment.yml"
input: gff=config['results']+'/4.gmapRescue/{chrom}.target.gff3',
mapping=config['results']+'/3.mapping/{chrom}/temp'
output: config['results']+'/4.recalcGmap/{chrom}/recalc.gff3'
......@@ -84,6 +89,7 @@ rule recalcGmapRescue:
rule saveGmapWG:
message: "Save the GMAP results of genes mapped on Whole Genome"
conda: "envs/environment.yml"
input: gff=config['results']+'/4.gmapRescue/{chrom}.wholeGenome.gff3',
map=config['chromMapID'],
mapping=config['results']+'/3.mapping/{chrom}/temp'
......@@ -99,6 +105,7 @@ rule saveGmapWG:
rule mergeFinalGff3:
message: " Merge Final GFF3 files: blat.gff3, rescue.gff3 and wholeGenome.gff3"
conda: "envs/environment.yml"
input: blat=config['results']+'/4.recalcBlat/{chrom}/RecalcAnnotOnTarget-clean.gff3',
rescue=config['results']+'/4.recalcGmap/{chrom}/recalc.gff3',
wg=config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome.gff3',
......@@ -110,7 +117,8 @@ rule mergeFinalGff3:
gt gff3 -sort -fixregionboundaries -tidy -retainids {input.blat} {input.rescue} {input.wg} 1> {output.annot} 2> {log.annot}
"""
rule checkMissing:
message: ""
message: " Check missing transfered genes"
conda: "envs/environment.yml"
input: gff=config['results']+'/5.FINAL/{chrom}/annotation.gff3',
ref=config['results']+"/1.features/{chrom}.bed"
output: config['results']+'/5.FINAL/{chrom}/missing.txt'
......@@ -122,6 +130,7 @@ rule checkMissing:
rule concatAllChromResults:
message: " concat all per chromosome results"
conda: "envs/environment.yml"
input: annot=expand(config['results']+'/5.FINAL/{chrom}/annotation.gff3',chrom=config['chromosomes']),
differentChrom=expand(config['results']+'/4.gmapWholeGenome/{chrom}.wholeGenome_differentChrom.txt',chrom=config['chromosomes']),
missing=expand(config['results']+'/5.FINAL/{chrom}/missing.txt', chrom=config['chromosomes']),
......@@ -139,6 +148,7 @@ rule concatAllChromResults:
rule renameGeneIds:
message: " set final gene IDs for the new annotation"
conda: "envs/environment.yml"
input: gff=config['finalPrefix']+'tmp.gff3',
map=config['chromMapID']
output: gffhc=config['finalPrefix']+'_HC.gff3',
......@@ -154,6 +164,7 @@ rule renameGeneIds:
rule generateFastaSequencesHC:
message: "generate fasta sequences using gffread for HC genes"
conda: "envs/environment.yml"
input: gff=config['finalPrefix']+'_HC.gff3',
fastaref=config['targetFasta']
output: mrna=config['finalPrefix']+'_HC.transcripts.fasta',
......@@ -166,6 +177,7 @@ rule generateFastaSequencesHC:
"""
rule generateFastaSequencesLC:
message: "generate fasta sequences using gffread for LC genes"
conda: "envs/environment.yml"
input:
gff=config['finalPrefix']+'_LC.gff3',
fastaref=config['targetFasta']
......@@ -180,6 +192,7 @@ rule generateFastaSequencesLC:
rule concatblatSummary:
message: " Concat all Blat summary"
conda: "envs/environment.yml"
input:blat=expand(config['results']+'/3.mapping/{chrom}/allBlat.csv', chrom=config['chromosomes']),
mapping=expand(config['results']+'/3.mapping/{chrom}/temp', chrom=config['chromosomes'])
output: config['finalPrefix']+'_blatSummary.csv'
......@@ -190,6 +203,7 @@ rule concatblatSummary:
rule concatAnchoringSummary:
message: " Concat all Anchoring summary"
conda: "envs/environment.yml"
input:anchoring=expand(config['results']+'/3.mapping/{chrom}/mappingSummary.csv', chrom=config['chromosomes']),
mapping=expand(config['results']+'/3.mapping/{chrom}/temp', chrom=config['chromosomes'])
output: config['finalPrefix']+'_anchoringSummary.csv'
......
rule grepGffFeature:
message: " Collect selected features from GFF file"
input: config['annotationQuery']
params: config['featureType']
output: temp(config['results']+"/1.features.bed")
log: config['results']+"/1.grepGffFeature.log"
shell: "bin/gff2bed.sh {params} {input} 1> {output} 2> {log}"
message: " Collect selected features from GFF file"
conda: "envs/environment.yml"
input: config['annotationQuery']
params: config['featureType']
output: temp(config['results']+"/1.features.bed")
log: config['results']+"/1.grepGffFeature.log"
shell: "bin/gff2bed.sh {params} {input} 1> {output} 2> {log}"
rule splitGffPerChrom:
message: "Split Gff Features per chromosome: current is {wildcards.chrom}"
input: config['results']+"/1.features.bed"
output: temp(config['results']+"/1.features/{chrom}.bed")
log: config['results']+"/1.features/{chrom}.fgrep.log"
params: "chr{chrom}"
shell: "fgrep -i {params} {input} 1> {output} 2> {log}"
message: "Split Gff Features per chromosome: current is {wildcards.chrom}"
conda: "envs/environment.yml"
input: config['results']+"/1.features.bed"
output: temp(config['results']+"/1.features/{chrom}.bed")
log: config['results']+"/1.features/{chrom}.fgrep.log"
params: "chr{chrom}"
shell: "fgrep -i {params} {input} 1> {output} 2> {log}"
rule indexQuery:
message: " Indexing Query fasta file using samtools faidx"
conda: "envs/environment.yml"
input: config['queryFasta']
output: config['queryFasta']+'.fai'
shell: "samtools faidx {input}"
rule indexTarget:
message: " Indexing Target fasta file using samtools faidx"
conda: "envs/environment.yml"
input: config['targetFasta']
output: config['targetFasta']+'.fai'
shell: "samtools faidx {input}"
rule gmapIndexTarget:
message: " Create Gmap Index for rescue"
conda: "envs/environment.yml"
input: config['targetFasta']
output: directory(config['results']+"/target_gmapindex")
params: indexname="target_gmapindex", indexPath=config['results']
......
rule filterBam:
message: "Filtering BAM file of ISBPs"
conda: "envs/environment.yml"
input: config['isbpBam']
output: config['results']+'/1.filteredISBPs.bam'
params: mapq=config['mapq'], mismatches=config['mismatches']
......@@ -8,6 +9,7 @@ rule filterBam:
rule bam2bed:
message: "Convert Filtered BAM file into BED"
conda: "envs/environment.yml"
input: config['results']+"/1.filteredISBPs.bam"
output: config['results']+"/1.filteredISBPs/{chrom}/sorted.bed"
log: config['results']+"/1.filteredISBPs/{chrom}/sorted.log"
......@@ -17,12 +19,14 @@ rule bam2bed:
#rule dumpISBPsID:
# message: "Dump ISBPs IDs"
conda: "envs/environment.yml"
# input: config['results']+"/1.filteredISBPs.bed"
# output: config['results']+"/1.filteredISBPs.ids"
# shell: " cut -f 4 {input} > {output}"
#rule splitISBP:
# message: "Split isbps per chromosome"
conda: "envs/environment.yml"
# input: config['results']+"/1.filteredISBPs.bed"
# output: config['results']+"/1.filteredISBPs/{chrom}/sorted.bed"
# log: config['results']+"/1.filteredISBPs/{chrom}/sorted.log"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment