Skip to content
Snippets Groups Projects
Commit 2e4a5a8c authored by Helene Rimbert's avatar Helene Rimbert
Browse files

Merge branch '6-gmaprescue_global' into 'missing_conda_rules'

Resolve "IMPROVE: gmap rescue in global scale"

See merge request !6
parents e077dff2 fde7ff34
No related branches found
No related tags found
2 merge requests!7Missing conda rules,!6Resolve "IMPROVE: gmap rescue in global scale"
......@@ -5,10 +5,14 @@ blastdb=$2
gmapdb=$3
gmapdbdir=$4
outputRescueGff=$5
outputRescueWholeGenomeGff=$6
wholegenomeFasta=$7
mode=$8
# files for unanchored genes: will be mapped with gmapl on whole genome
unmappedMrnaFasta=$inputDir'/missing.fasta'
outputRescueWholeGenomeGff=$6
querylist=($(find $inputDir -type f -name 'query.fasta'))
targetlist=($(find $inputDir -type f -name 'target.fasta'))
......@@ -47,11 +51,13 @@ do
if [ -f "$rootdir/UNMAPPED" ]
then
echo " Gene $geneid has status UNMAPPED: mapping on the whole genome"
targetindex=$gmapdb
targetindexdir=$gmapdbdir
targetgff=$rootdir'/gmap.wholeGenome.gff3'
gmapexe='gmapl'
eval $gmapexe --min-identity 0.80 --min-trimmed-coverage 0.50 --ordered -n 1 -d $targetindex -D $targetindexdir -f 2 $rootdir'/query.mrna.fasta' 1> $targetgff 2> $rootdir'/gmap.wg.log' && rm $rootdir'/query.mrna.fasta'
cat $rootdir'/query.mrna.fasta' >> $unmappedMrnaFasta && rm $rootdir'/query.mrna.fasta'
#targetindex=$gmapdb
#targetindexdir=$gmapdbdir
#targetgff=$rootdir'/gmap.wholeGenome.gff3'
#gmapexe='gmapl'
#eval $gmapexe --min-identity 0.80 --min-trimmed-coverage 0.50 --ordered -n 1 -d $targetindex -D $targetindexdir -f 2 $rootdir'/query.mrna.fasta' 1> $targetgff 2> $rootdir'/gmap.wg.log' && rm $rootdir'/query.mrna.fasta'
else
echo "running gmap on target"
eval gmap --ordered --min-identity 0.80 --min-trimmed-coverage 0.50 -n 1 -g $rootdir'/target.fasta' -f 2 $rootdir'/query.mrna.fasta' 1> $targetgff 2> $rootdir'/gmap.target.log' && rm $rootdir'/query.mrna.fasta'
......@@ -98,8 +104,12 @@ gffList=($(find $inputDir -type f -name "gmap.target.gff3" ))
gt gff3 -sort -tidy -retainids "${gffList[@]}" 1> $outputRescueGff 2> $outputRescueGff.log
# for genes mapped on whole genome
echo "Now concat all GFF of gmap on whole genome"
gffList=($(find $inputDir -type f -name "gmap.wholeGenome.gff3" ))
gt gff3 -sort -tidy -retainids "${gffList[@]}" 1> $outputRescueWholeGenomeGff 2> $outputRescueWholeGenomeGff.log
echo "Now map missing/unanchored genes with GMAPL on whole genome"
cmd="gmapl --min-identity 0.80 --min-trimmed-coverage 0.50 --ordered -n 1 -d $gmapdb -D $gmapdbdir -f 2 $unmappedMrnaFasta 1> $inputDir'/gmap.wholeGenome.gff3' 2> $inputDir'/gmap.wg.log' && rm $unmappedMrnaFasta"
eval $cmd
echo "Now sort and tidy all GFF of gmap on whole genome"
#gffList=($(find $inputDir -type f -name "gmap.wholeGenome.gff3" ))
#gt gff3 -sort -tidy -retainids "${gffList[@]}" 1> $outputRescueWholeGenomeGff 2> $outputRescueWholeGenomeGff.log
gt gff3 -sort -tidy -retainids $inputDir'/gmap.wholeGenome.gff3' 1> $outputRescueWholeGenomeGff 2> $outputRescueWholeGenomeGff.log
echo "ENDED AT `date`"
......@@ -16,7 +16,12 @@ refChrom: ['Chr1A', 'Chr1B', 'Chr1D', 'Chr2A', 'Chr2B', 'Chr2D', 'Chr3A', 'Chr3B
transferType: 'first'
##### TARGET related files/parameters
# FASTA of the target genome
targetFasta: 'data/Triticum_aestivum_arinalrfor.PGSBv2.1.dna.toplevel.fa'
#GMAP index of the genome for -d option
targetGmapIndex: 'ensembl_Triticum_aestivum_julius_2022-9-16'
#GMAP index: path to the gmapindex directory, for -D option
targetGmapIndexPath: '/home/herimbert/gdec/shared/triticum_aestivum/julius/current/gmapdb/all/'
##### ISBP/markers related config and parameters
# BAM file of markers/ISBPs mapped on the target genome
......
......@@ -64,8 +64,10 @@ rule gmapRescue:
wholegenomegff=config['results']+'/4.gmapRescue/{chrom}.wholeGenome.gff3'
params: blastdb=config['blastdb'],
anchoringDir=config['results']+'/3.mapping/{chrom}/',
gmapIndexDir=config['results'],
gmapindex="target_gmapindex",
#gmapIndexDir=config['results'],
#gmapindex="target_gmapindex",
gmapIndexDir=config['targetGmapIndexPath'],
gmapindex=config['targetGmapIndex'],
mode=config['transferType']
log: config['results']+'/4.gmapRescue/{chrom}.log'
shell:
......
......@@ -30,11 +30,11 @@ rule indexTarget:
output: config['targetFasta']+'.fai'
shell: "samtools faidx {input}"
rule gmapIndexTarget:
message: " Create Gmap Index for rescue"
conda: "envs/environment.yml"
input: config['targetFasta']
output: directory(config['results']+"/target_gmapindex")
params: indexname="target_gmapindex", indexPath=config['results']
log: config['results']+"/target_gmapindex.log"
shell: "gmap_build -D {params.indexPath} -d {params.indexname} {input} &> {log}"
#rule gmapIndexTarget:
# message: " Create Gmap Index for rescue"
# conda: "envs/environment.yml"
# input: config['targetFasta']
# output: directory(config['results']+"/target_gmapindex")
# params: indexname="target_gmapindex", indexPath=config['results']
# log: config['results']+"/target_gmapindex.log"
# shell: "gmap_build -D {params.indexPath} -d {params.indexname} {input} &> {log}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment