Skip to content
Snippets Groups Projects
Commit 50257cad authored by Helene Rimbert's avatar Helene Rimbert
Browse files

IMPROVE: gmapl rescue run at the end of each chomosome batch rather than for every gene

parent e077dff2
No related branches found
No related tags found
2 merge requests!7Missing conda rules,!6Resolve "IMPROVE: gmap rescue in global scale"
......@@ -5,10 +5,14 @@ blastdb=$2
gmapdb=$3
gmapdbdir=$4
outputRescueGff=$5
outputRescueWholeGenomeGff=$6
wholegenomeFasta=$7
mode=$8
# files for unanchored genes: will be mapped with gmapl on whole genome
unmappedMrnaFasta=$inputDir'/missing.fasta'
outputRescueWholeGenomeGff=$6
querylist=($(find $inputDir -type f -name 'query.fasta'))
targetlist=($(find $inputDir -type f -name 'target.fasta'))
......@@ -47,11 +51,13 @@ do
if [ -f "$rootdir/UNMAPPED" ]
then
echo " Gene $geneid has status UNMAPPED: mapping on the whole genome"
targetindex=$gmapdb
targetindexdir=$gmapdbdir
targetgff=$rootdir'/gmap.wholeGenome.gff3'
gmapexe='gmapl'
eval $gmapexe --min-identity 0.80 --min-trimmed-coverage 0.50 --ordered -n 1 -d $targetindex -D $targetindexdir -f 2 $rootdir'/query.mrna.fasta' 1> $targetgff 2> $rootdir'/gmap.wg.log' && rm $rootdir'/query.mrna.fasta'
cat $rootdir'/query.mrna.fasta' >> $unmappedMrnaFasta && rm $rootdir'/query.mrna.fasta'
#targetindex=$gmapdb
#targetindexdir=$gmapdbdir
#targetgff=$rootdir'/gmap.wholeGenome.gff3'
#gmapexe='gmapl'
#eval $gmapexe --min-identity 0.80 --min-trimmed-coverage 0.50 --ordered -n 1 -d $targetindex -D $targetindexdir -f 2 $rootdir'/query.mrna.fasta' 1> $targetgff 2> $rootdir'/gmap.wg.log' && rm $rootdir'/query.mrna.fasta'
else
echo "running gmap on target"
eval gmap --ordered --min-identity 0.80 --min-trimmed-coverage 0.50 -n 1 -g $rootdir'/target.fasta' -f 2 $rootdir'/query.mrna.fasta' 1> $targetgff 2> $rootdir'/gmap.target.log' && rm $rootdir'/query.mrna.fasta'
......@@ -98,8 +104,12 @@ gffList=($(find $inputDir -type f -name "gmap.target.gff3" ))
gt gff3 -sort -tidy -retainids "${gffList[@]}" 1> $outputRescueGff 2> $outputRescueGff.log
# for genes mapped on whole genome
echo "Now concat all GFF of gmap on whole genome"
gffList=($(find $inputDir -type f -name "gmap.wholeGenome.gff3" ))
gt gff3 -sort -tidy -retainids "${gffList[@]}" 1> $outputRescueWholeGenomeGff 2> $outputRescueWholeGenomeGff.log
echo "Now map missing/unanchored genes with GMAPL on whole genome"
cmd="gmapl --min-identity 0.80 --min-trimmed-coverage 0.50 --ordered -n 1 -d $gmapdb -D $gmapdbdir -f 2 $unmappedMrnaFasta 1> $inputDir'/gmap.wholeGenome.gff3' 2> $inputDir'/gmap.wg.log' && rm $unmappedMrnaFasta"
eval $cmd
echo "Now sort and tidy all GFF of gmap on whole genome"
#gffList=($(find $inputDir -type f -name "gmap.wholeGenome.gff3" ))
#gt gff3 -sort -tidy -retainids "${gffList[@]}" 1> $outputRescueWholeGenomeGff 2> $outputRescueWholeGenomeGff.log
gt gff3 -sort -tidy -retainids $inputDir'/gmap.wholeGenome.gff3' 1> $outputRescueWholeGenomeGff 2> $outputRescueWholeGenomeGff.log
echo "ENDED AT `date`"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment