Skip to content
Snippets Groups Projects
Commit 193d465a authored by DURAND Karine's avatar DURAND Karine
Browse files

Upload New File

parent 27c18194
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
#SBATCH --mem=40G
#SBATCH -c 8
#SBATCH --partition=dgimi-eha
# Load necessary modules
module load JDK/jdk.8_x64
# Navigate to the working directory
cd /lustre/durandk/HELICOVERPA/ref_HZEA
#BUILD REF GENOME
#/nfs/work/faw_adaptation/programs/gatk-4.1.2.0/gatk CreateSequenceDictionary -R=NC_061456.1.chromosome.5.fna -O=NC_061456.1.chromosome.5.dict
#/nfs/work/faw_adaptation/programs/bowtie2-2.3.4.1-linux-x86_64/bowtie2-build /lustre/durandk/HELICOVERPA/ref_HZEA/NC_061456.1.chromosome.5.fna /lustre/durandk/HELICOVERPA/ref_HZEA/NC_061456.1.chromosome.5
#/storage/simple/projects/faw_adaptation/programs/samtools-1.9/samtools faidx /lustre/durandk/HELICOVERPA/ref_HZEA/NC_061456.1.chromosome.5.fna
cd /lustre/durandk/HELICOVERPA/ref_HZEA/VARIANT_CALLING/
#while read line
#do /nfs/work/faw_adaptation/programs/gatk-4.1.2.0/gatk SelectVariants -R /lustre/durandk/HELICOVERPA/ref_HZEA/NC_061456.1.chromosome.5.fna -V $line.g.vcf.gz -O $line.chromosome.5.g.vcf.gz --select "CHROM == 'NC_061456.1'"
#done <95liste.txt
# Step 1: The following command combines multiple GVCF files into a single file using GATK CombineGVCFs tool.
/nfs/work/faw_adaptation/programs/gatk-4.1.2.0/gatk CombineGVCFs -R /lustre/durandk/HELICOVERPA/ref_HZEA/NC_061456.1.chromosome.5.fna -O merged.Surjeet_95_HA.chromosome.5.g.vcf.gz --variant SRR24497453.chromosome.5.g.vcf.gz --variant SRR24497454.chromosome.5.g.vcf.gz --variant SRR24497455.chromosome.5.g.vcf.gz --variant SRR24497456.chromosome.5.g.vcf.gz --variant SRR24497457.chromosome.5.g.vcf.gz --variant SRR24497458.chromosome.5.g.vcf.gz --variant SRR24497459.chromosome.5.g.vcf.gz --variant SRR24497460.chromosome.5.g.vcf.gz --variant SRR24497461.chromosome.5.g.vcf.gz --variant SRR24497462.chromosome.5.g.vcf.gz --variant SRR24497463.chromosome.5.g.vcf.gz --variant SRR24497464.chromosome.5.g.vcf.gz --variant SRR24497465.chromosome.5.g.vcf.gz --variant SRR24497466.chromosome.5.g.vcf.gz --variant SRR24497467.chromosome.5.g.vcf.gz --variant SRR24497468.chromosome.5.g.vcf.gz --variant SRR24497469.chromosome.5.g.vcf.gz --variant SRR24497470.chromosome.5.g.vcf.gz --variant SRR24497471.chromosome.5.g.vcf.gz --variant SRR24497472.chromosome.5.g.vcf.gz --variant SRR24497473.chromosome.5.g.vcf.gz --variant SRR24497474.chromosome.5.g.vcf.gz --variant SRR24497475.chromosome.5.g.vcf.gz --variant SRR24497476.chromosome.5.g.vcf.gz --variant SRR24497477.chromosome.5.g.vcf.gz --variant SRR24497478.chromosome.5.g.vcf.gz --variant SRR24497479.chromosome.5.g.vcf.gz --variant SRR24497480.chromosome.5.g.vcf.gz --variant SRR24497481.chromosome.5.g.vcf.gz --variant SRR24497482.chromosome.5.g.vcf.gz --variant SRR24497483.chromosome.5.g.vcf.gz --variant SRR24497484.chromosome.5.g.vcf.gz --variant SRR24497485.chromosome.5.g.vcf.gz --variant SRR24497486.chromosome.5.g.vcf.gz --variant SRR24497487.chromosome.5.g.vcf.gz --variant SRR24497488.chromosome.5.g.vcf.gz --variant SRR24497489.chromosome.5.g.vcf.gz --variant SRR24497490.chromosome.5.g.vcf.gz --variant SRR24497491.chromosome.5.g.vcf.gz --variant SRR24497492.chromosome.5.g.vcf.gz --variant SRR24497493.chromosome.5.g.vcf.gz --variant SRR24497494.chromosome.5.g.vcf.gz --variant SRR24497495.chromosome.5.g.vcf.gz --variant SRR24497496.chromosome.5.g.vcf.gz --variant SRR24497497.chromosome.5.g.vcf.gz --variant SRR24497498.chromosome.5.g.vcf.gz --variant SRR24497499.chromosome.5.g.vcf.gz --variant SRR24497500.chromosome.5.g.vcf.gz --variant SRR24497501.chromosome.5.g.vcf.gz --variant SRR24497502.chromosome.5.g.vcf.gz --variant SRR24497503.chromosome.5.g.vcf.gz --variant SRR24497504.chromosome.5.g.vcf.gz --variant SRR24497505.chromosome.5.g.vcf.gz --variant SRR24497506.chromosome.5.g.vcf.gz --variant SRR24497507.chromosome.5.g.vcf.gz --variant SRR24497508.chromosome.5.g.vcf.gz --variant SRR24497509.chromosome.5.g.vcf.gz --variant SRR24497510.chromosome.5.g.vcf.gz --variant SRR24497511.chromosome.5.g.vcf.gz --variant SRR24497512.chromosome.5.g.vcf.gz --variant SRR24497513.chromosome.5.g.vcf.gz --variant SRR24497514.chromosome.5.g.vcf.gz --variant SRR24497515.chromosome.5.g.vcf.gz --variant SRR24497516.chromosome.5.g.vcf.gz --variant SRR24497517.chromosome.5.g.vcf.gz --variant SRR24497518.chromosome.5.g.vcf.gz --variant SRR24497519.chromosome.5.g.vcf.gz --variant SRR24497520.chromosome.5.g.vcf.gz --variant SRR24497521.chromosome.5.g.vcf.gz --variant SRR24497522.chromosome.5.g.vcf.gz --variant SRR24497523.chromosome.5.g.vcf.gz --variant SRR24497524.chromosome.5.g.vcf.gz --variant SRR24497525.chromosome.5.g.vcf.gz --variant SRR24497526.chromosome.5.g.vcf.gz --variant SRR24497527.chromosome.5.g.vcf.gz --variant SRR24497528.chromosome.5.g.vcf.gz --variant SRR24497529.chromosome.5.g.vcf.gz --variant SRR24497530.chromosome.5.g.vcf.gz --variant SRR24497531.chromosome.5.g.vcf.gz --variant SRR24497532.chromosome.5.g.vcf.gz --variant SRR24497533.chromosome.5.g.vcf.gz --variant SRR24497534.chromosome.5.g.vcf.gz --variant SRR24497535.chromosome.5.g.vcf.gz --variant SRR24497536.chromosome.5.g.vcf.gz --variant SRR24497537.chromosome.5.g.vcf.gz --variant SRR24497538.chromosome.5.g.vcf.gz --variant SRR24497539.chromosome.5.g.vcf.gz --variant SRR24497540.chromosome.5.g.vcf.gz --variant SRR24497541.chromosome.5.g.vcf.gz --variant SRR24497542.chromosome.5.g.vcf.gz --variant SRR24497543.chromosome.5.g.vcf.gz --variant SRR24497544.chromosome.5.g.vcf.gz --variant SRR24497545.chromosome.5.g.vcf.gz --variant SRR24497546.chromosome.5.g.vcf.gz --variant SRR24497547.chromosome.5.g.vcf.gz
# Step 2: The following command performs variant calling using GATK GenotypeGVCFs tool on the merged GVCF file.
/storage/simple/projects/faw_adaptation/programs/gatk-4.1.2.0/gatk GenotypeGVCFs -R /lustre/durandk/HELICOVERPA/ref_HZEA/NC_061456.1.chromosome.5.fna --variant merged.Surjeet_95_HA.chromosome.5.g.vcf.gz -O merged.Surjeet_95_HA.chromosome.5.vcf3.vcf.gz -all-sites
# Step 3: The following command selects only SNP variants from the genotyped VCF file.
/storage/simple/projects/faw_adaptation/programs/gatk-4.1.2.0/gatk SelectVariants -select-type SNP -R /lustre/durandk/HELICOVERPA/ref_HZEA/NC_061456.1.chromosome.5.fna -V merged.Surjeet_95_HA.chromosome.5.vcf3.vcf.gz -O merged.Surjeet_95_HA.chromosome.5.SNP.vcf.gz
# Step 4: The following command annotates SNPs based on certain filtering criteria using GATK VariantFiltration tool.
/storage/simple/projects/faw_adaptation/programs/gatk-4.1.2.0/gatk VariantFiltration -R /lustre/durandk/HELICOVERPA/ref_HZEA/NC_061456.1.chromosome.5.fna -V merged.Surjeet_95_HA.chromosome.5.SNP.vcf.gz --filter-expression "QD < 2.0 || FS > 60.0 || MQ < 40.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0" --filter-name "my_snp_filter" -O merged.Surjeet_95_HA.chromosome.5.SNP.annotated.vcf.gz
# Step 5: The following command filters out the SNPs that do not pass the specified criteria.
zcat merged.Surjeet_95_HA.chromosome.5.SNP.annotated.vcf.gz | grep -P '#|PASS' | /storage/simple/projects/faw_adaptation/programs/vcftools_0.1.13/bin/vcftools --vcf - --out merged.Surjeet_95_HA.chromosome.5.SNP --recode
# Step 6: The following command compresses the filtered VCF file.
/storage/simple/projects/faw_adaptation/programs/htslib-1.9/bgzip merged.Surjeet_95_HA.chromosome.5.SNP.recode.vcf
# Step 7: The following command indexes the compressed VCF file.
/storage/simple/projects/faw_adaptation/programs/htslib-1.9/tabix -p vcf merged.Surjeet_95_HA.chromosome.5.SNP.recode.vcf.gz
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment