#!/bin/bash ##SBATCH --array=0-7 ##SBATCH -n1 ##SBATCH --mail-user=helene.rimbert@inrae.fr ##SBATCH --mail-type=ALL # for use with slurm #chroms=($(echo TraesCS{1..7} TraesCSU)) #chrom=${chroms[$SLURM_ARRAY_TASK_ID]} # get the list of query and target file for gmap inputDir=$1 blastdb=$2 querylist=($(find $inputDir -type f -name 'gmap.target.gff3')) numquery=${#querylist[@]} echo "STARTING AT `date` FOR DATASET $inputDir" echo "found $numquery query file" for i in "${querylist[@]}"; do rootdir=`dirname $i` # extract the name of the gene geneid=$(cat $rootdir'/query.fasta'|fgrep '>'|perl -ne 'print $1 if /^\>(TraesCS.+)\ coords/') echo "\n\n================================================" echo " gene is $geneid" echo " datadir is $rootdir" # extract the target chrom, start and stop chrom=$(fgrep '>' $rootdir'/target.fasta'|sed "s/>//"|cut -d ' ' -f1|cut -d '_' -f2) start=$(fgrep '>' $rootdir'/target.fasta'|sed "s/>//"|cut -d ' ' -f1|cut -d '_' -f3) stop=$(fgrep '>' $rootdir'/target.fasta'|sed "s/>//"|cut -d ' ' -f1|cut -d '_' -f4) echo " Target coords: $chrom from $start - $stop" # check the status of the CDS: OK or CHANGED mappingStatus='UNMAPPED' if [ -f "$rootdir/CDS_OK" ] then echo " CDS has not changed between target and reference" mappingStatus='CDS_OK' elif [ -f "$rootdir/CDS_CHANGED" ] then echo " WARNING : CDS has changed between target and reference" mappingStatus='CDS_CHANGED' else echo " Assume gene is unmapped" echo " need to run gmap on entire genome" mappingStatus='UNMAPPED' #touch $rootdir/UNMAPPED continue fi echo " 1. convert gmap coordinates on new reference" eval $TRANSFERTANNOT_PATH'/bin/convertGmapCoords.py' -G $rootdir'/gmap.target.gff3' -s $start -e $stop -c $mappingStatus -o $rootdir'/recalc.gff3' -r $chrom -g $geneid -m GMAP_on_target &> $rootdir'/recalc.log' echo " 2. sort gff" eval gt gff3 -sort -retainids -tidy $rootdir'/recalc.gff3' 1> $rootdir'/recalc_clean.gff3' done echo "ENDED AT `date`"