diff --git a/CIGAR_FILTER_RNA.sh b/CIGAR_FILTER_RNA.sh new file mode 100644 index 0000000000000000000000000000000000000000..d28c9732a229c65e47a7dbea80d3258c0b2ffe42 --- /dev/null +++ b/CIGAR_FILTER_RNA.sh @@ -0,0 +1,32 @@ +########################################################################################################CIGAR_Filter_RNA.sh.sh +################################ +#!/bin/bash +#SBATCH -o output_all.out +#SBATCH -e error_all.out +#SBATCH -t 72:00:00 +#SBATCH -p workq +#SBATCH --mem=256G +#SBATCH --mail-type=END,FAIL +#SBATCH -c 4 +#Purge any previous modules +module purge + +#$1 pourcentage identité +#$2 pourcentage de la longueur +for i in *.sam +do name=$(basename $i .sam) +echo $name + +awk '$0~"^@"' $i > $name\_filtered_CIGAR_$1\_id_$2\_length_paired.sam + +#attention la sortie awk convertit les tab en espace, besoin d'un sed pour remplacer les espaces en tabulation + +awk '$0!~"^@"' $i | awk '{if($6!="*"){print $0}}' | awk '{a=$6;gsub("M","_M;",a);gsub("I","_I;",a);gsub("D","_D;",a);gsub("S","_S;",a);gsub("N","_N;",a);print $0,a}' | awk -v name=$name id=$1 len=$2'{split($NF,tab3,";");tab2["M"]=0;tab2["N"]=0;sum=0;for (i=1;i<=length(tab3);i++){split(tab3[i],tab,"_");sum=sum+tab[1];tab2[tab[2]]=tab2[tab[2]]+tab[1]};print (tab2["M"]/(sum - tab2["N"]) >> name"_freq.txt";if(tab2["M"]/(sum - tab2["N"])>id && sum > len){print $0}}' | awk '{$NF="";print $0}' | awk '$2==99 || $2==147' |sed 's/ /\t/g'>> $name\_filtered_CIGAR_$1\_id_$2\_length_paired.sam + +done +######################################################################################################################################## + +#Lancer le script avec cette commande : + +sbatch --export=ALL CIGAR_Filter_RNA.sh 0.90 90 +