diff --git a/Filtre_CIGAR b/Filtre_CIGAR new file mode 100644 index 0000000000000000000000000000000000000000..015ba2d327888dccbdd4c3f8eb9bd1bc66a9d81c --- /dev/null +++ b/Filtre_CIGAR @@ -0,0 +1,28 @@ +#!/bin/bash +module purge + +#$1 pourcentage identité +#$2 pourcentage de la longueur +for i in */*_mapped_fusariose.sam +do name=$(basename $i .sam) +echo $name + +awk '$0~"^@"' $i > $name\_filtered_CIGAR_$1\_id_$2\_length_paired.sam + +#attention la sortie awk convertit les tab en espace, besoin d'un sed pour remplacer les espaces en tabulation + +awk '$0!~"^@"' $i | awk '{if($6!="*"){print $0}}' | awk '{a=$6;gsub("M","_M;",a);gsub("I","_I;",a);gsub("D","_D;",a);gsub("S","_S;",a);print $0,a}' | awk -v name=$name id=$1 len=$2'{split($NF,tab3,";");tab2["M +"]=0;sum=0;for (i=1;i<=length(tab3);i++){split(tab3[i],tab,"_");sum=sum+tab[1];tab2[tab[2]]=tab2[tab[2]]+tab[1]};print tab2["M"]/sum >> name"_freq.txt";if(tab2["M"]/sum>id && sum > len){print $0}}' | awk '{$NF +="";print $0}' | awk '$2==99 || $2==147' |sed 's/ /\t/g'>> $name\_filtered_CIGAR_$1\_id_$2\_length_paired.sam + +module load bioinfo/samtools-1.9 +samtools view -S -b $name\_filtered_CIGAR_$1\_id_$2\_length_paired.sam > $name\_filtered_CIGAR_$1\_id_$2\_length_paired.bam +module unload bioinfo/samtools-1.9 + +module load bioinfo/bedtools-2.27.1 +bamToFastq -i $name\_filtered_CIGAR_$1\_id_$2\_length_paired.bam -fq $name\_filtered_CIGAR_$1\_id_$2\_length_paired_R1.fq -fq2 $name\_filtered_CIGAR_$1\_id_$2\_length_paired_R2.fq +module unload bioinfo/bedtools-2.27.1 + +done + +gzip *R1.fq *R2.fq