Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/bin/bash
#SBATCH --array=0-7
#SBATCH -n1
#SBATCH --mail-user=helene.rimbert@inrae.fr
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
chroms=($(echo TraesCS{1..7} TraesCSU))
chrom=${chroms[$SLURM_ARRAY_TASK_ID]}
# get the list of query and target file for gmap
querylist=($(find 3_mapping_$chrom* -type f -name 'query.fasta'))
targetlist=($(find 3_mapping_$chrom* -type f -name 'target.fasta'))
numquery=${#querylist[@]}
numtarget=${#targetlist[@]}
echo "STARTING AT `date` FOR CHROM $chrom"
echo "found $numquery query fasta file"
echo "found $numtarget target fasta file"
for i in "${querylist[@]}";
do
rootdir=`dirname $i`
# extract the name of the gene
geneid=$(cat $rootdir'/query.fasta'|fgrep '>'|perl -ne 'print $1 if /^\>(TraesCS.+)\ coords/')
echo "\n\n================================================"
echo " gene is $geneid"
echo " datadir is $rootdir"
echo "1. indexing the query and target genomic"
if [ ! -s " $rootdir/query.fasta.fai" ]
then
samtools faidx $rootdir'/query.fasta'
samtools faidx $rootdir'/target.fasta'
else
echo " index already done"
fi
echo "2. running gffread to extract the pep sequence"
if [ ! -s " $rootdir/target.pep.faa" ]
then
gffread -x $rootdir'/target.cds.fasta' -y $rootdir'/target.pep.faa' -g $rootdir'/target.fasta' $rootdir'/gmap.target.gff3'
gffread -x $rootdir'/query.cds.fasta' -y $rootdir'/query.pep.faa' -g $rootdir'/query.fasta' $rootdir'/gmap.query.gff3'
else
echo "pep extraction already done"
fi
echo "3. running diff to check if the pep sequences are different or not"
diffresult=$(diff $rootdir'/target.pep.faa' $rootdir'/query.pep.faa')
if [ "$diffresult" != "" ]
then
echo "RESULTS pep of gene $geneid has changed"
echo $diffresults > $rootdir/CDS_CHANGED
elif [ ! -s "$rootdir/blat.txt" ]
then
echo "RESULTS no blat results here: no extraction to make"
touch $rootdir/UNMAPPED
else
echo "RESULTS pep of gene $geneid stays the same"
touch $rootdir/CDS_OK
fi
done
echo "ENDED AT `date`"