Skip to content
Snippets Groups Projects
Commit 2e92a9f9 authored by Helene Rimbert's avatar Helene Rimbert
Browse files

NEW script to recalc coords of rescue with gmap

parent e64475d7
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
##SBATCH --array=0-7
##SBATCH -n1
##SBATCH --mail-user=helene.rimbert@inrae.fr
##SBATCH --mail-type=ALL
# for use with slurm
#chroms=($(echo TraesCS{1..7} TraesCSU))
#chrom=${chroms[$SLURM_ARRAY_TASK_ID]}
# get the list of query and target file for gmap
inputDir=$1
blastdb=$2
querylist=($(find $inputDir -type f -name 'gmap.target.gff3'))
numquery=${#querylist[@]}
echo "STARTING AT `date` FOR DATASET $inputDir"
echo "found $numquery query file"
for i in "${querylist[@]}";
do
rootdir=`dirname $i`
# extract the name of the gene
geneid=$(cat $rootdir'/query.fasta'|fgrep '>'|perl -ne 'print $1 if /^\>(TraesCS.+)\ coords/')
echo "\n\n================================================"
echo " gene is $geneid"
echo " datadir is $rootdir"
# extract the target chrom, start and stop
chrom=$(fgrep '>' $rootdir'/target.fasta'|sed "s/>//"|cut -d ' ' -f1|cut -d '_' -f2)
start=$(fgrep '>' $rootdir'/target.fasta'|sed "s/>//"|cut -d ' ' -f1|cut -d '_' -f3)
stop=$(fgrep '>' $rootdir'/target.fasta'|sed "s/>//"|cut -d ' ' -f1|cut -d '_' -f4)
echo " Target coords: $chrom from $start - $stop"
# check the status of the CDS: OK or CHANGED
mappingStatus='UNMAPPED'
if [ -f "$rootdir/CDS_OK" ]
then
echo " CDS has not changed between target and reference"
mappingStatus='CDS_OK'
elif [ -f "$rootdir/CDS_CHANGED" ]
then
echo " WARNING : CDS has changed between target and reference"
mappingStatus='CDS_CHANGED'
else
echo " Assume gene is unmapped"
echo " need to run gmap on entire genome"
mappingStatus='UNMAPPED'
#touch $rootdir/UNMAPPED
continue
fi
echo " 1. convert gmap coordinates on new reference"
eval $TRANSFERTANNOT_PATH'/bin/convertGmapCoords.py' -G $rootdir'/gmap.target.gff3' -s $start -e $stop -c $mappingStatus -o $rootdir'/recalc.gff3' -r $chrom -g $geneid -m GMAP_on_target &> $rootdir'/recalc.log'
echo " 2. sort gff"
eval gt gff3 -sort -retainids -tidy $rootdir'/recalc.gff3' 1> $rootdir'/recalc_clean.gff3'
done
echo "ENDED AT `date`"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment