Skip to content
Snippets Groups Projects
extractpepandcheck.sh 1.98 KiB
Newer Older
#!/bin/bash
#SBATCH --array=0-7
#SBATCH -n1
#SBATCH --mail-user=helene.rimbert@inrae.fr
#SBATCH --mail-type=ALL
#SBATCH --export=ALL

chroms=($(echo TraesCS{1..7} TraesCSU))
chrom=${chroms[$SLURM_ARRAY_TASK_ID]}

# get the list of query and target file for gmap
querylist=($(find  3_mapping_$chrom* -type f -name 'query.fasta'))
targetlist=($(find 3_mapping_$chrom* -type f -name 'target.fasta'))

numquery=${#querylist[@]}
numtarget=${#targetlist[@]}

echo "STARTING AT `date` FOR CHROM $chrom"
echo "found $numquery query fasta file"
echo "found $numtarget target fasta file"


for i in "${querylist[@]}";
do
	rootdir=`dirname $i`
	# extract the name of the gene
        geneid=$(cat $rootdir'/query.fasta'|fgrep '>'|perl -ne 'print $1 if /^\>(TraesCS.+)\ coords/')
	echo "\n\n================================================"
        echo " gene is $geneid"
	echo " datadir is $rootdir"


	echo "1. indexing the query and target genomic"
	if [ ! -s " $rootdir/query.fasta.fai" ]
	then
		samtools faidx $rootdir'/query.fasta'
		samtools faidx $rootdir'/target.fasta'
	else
		echo " index already done"
	fi

	echo "2. running gffread to extract the pep sequence"

	if [ ! -s " $rootdir/target.pep.faa" ]
	then
		gffread -x $rootdir'/target.cds.fasta' -y $rootdir'/target.pep.faa' -g $rootdir'/target.fasta' $rootdir'/gmap.target.gff3'
		gffread -x $rootdir'/query.cds.fasta' -y $rootdir'/query.pep.faa' -g $rootdir'/query.fasta' $rootdir'/gmap.query.gff3'
	else
		echo "pep extraction already done"
	fi

	echo "3. running diff to check if the pep sequences are different or not"
	diffresult=$(diff $rootdir'/target.pep.faa' $rootdir'/query.pep.faa')

	if [ "$diffresult" != "" ]
	then
		echo "RESULTS pep of gene $geneid has changed"
		echo $diffresults > $rootdir/CDS_CHANGED

	elif [ ! -s "$rootdir/blat.txt" ]
	then
		echo "RESULTS no blat results here: no extraction to make"
		touch $rootdir/UNMAPPED
	else
		echo "RESULTS pep of gene $geneid stays the same"
		touch $rootdir/CDS_OK
	fi

done

echo "ENDED AT `date`"