From 8f9e6b0d82c5cd6ef76b409966aa6ab388ef6e34 Mon Sep 17 00:00:00 2001
From: mariabernard <maria.bernard@jouy.inra.fr>
Date: Thu, 14 Mar 2019 14:07:58 +0100
Subject: [PATCH] add minimum base quality threshold parameter

---
 Snakemake/IMAGE_calling/README.md             | 23 +++++++------------
 Snakemake/IMAGE_calling/config.yaml           |  1 +
 Snakemake/IMAGE_calling/rules/freebayes.rules |  4 ++--
 .../IMAGE_calling/rules/haplotypecaller.rules |  3 ++-
 Snakemake/IMAGE_calling/rules/mpileup.rules   |  3 ++-
 5 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/Snakemake/IMAGE_calling/README.md b/Snakemake/IMAGE_calling/README.md
index ea58c0d..9d1598f 100644
--- a/Snakemake/IMAGE_calling/README.md
+++ b/Snakemake/IMAGE_calling/README.md
@@ -2,12 +2,12 @@
 
 __author__  Kenza BAZI KABBAJ & Maria BERNARD, INRA - SIGENAE
 
-* [What is the IMAGE variant calling worklow?](#what-is-the-image-variant-calling-worklow-)
-  * [1. Inputs](#1-inputs)
-  * [2. Data preprocessing](#2-data-preprocessing)
-  * [3. Variant Calling](#3-variant-calling)
-* [Prerequisites](#prerequisites)
-* [Configure workflow and running workflow](#configure-workflow-and-running-workflow)
+- [What is the IMAGE variant calling worklow?](#what-is-the-image-variant-calling-worklow-)
+  - [1. Inputs](#1-inputs)
+  - [2. Data preprocessing](#2-data-preprocessing)
+  - [3. Variant Calling](#3-variant-calling)
+- [Prerequisites](#prerequisites)
+- [Configure workflow and running workflow](#configure-workflow-and-running-workflow)
 
 
 
@@ -29,7 +29,6 @@ The user must use the [config.yaml](config.yaml) file to provide all necessary i
   - R1 READS: absolut path of R1 reads
   - R2 READS: absolute path of R2 reads. If single reads, write "None"
   - PLATEFORM: Sequencing Plateform, choices : ILLUMINA, SOLID, LS454, HELICOS and PACBIO.
-
 - a set of known variant in VCF format.
 - path to binaries, and the resources.yaml (see [resources_SLURM.yaml](resources_SLURM.yaml)) file to set cluster resources for each programs
 
@@ -65,7 +64,7 @@ Merge bam files for multiple sample replicates (unit) into one using MergeSamFil
 
  
 
--  Marking duplicates
+- Marking duplicates
 
 Rule for marking PCR and optical duplicates using Picard MarkDuplicates. 
 
@@ -103,7 +102,7 @@ It performs mapping quality on realigned and recalibrated bams using qualimap ba
 
 ## 3. Variant Calling
 
-Note that for each of the 3 callers, by default, we keep only variant with a mapping quality > 30 ( see [config.yaml](config.yaml)). 
+Note that for each of the 3 callers, by default, we keep only variant with a minimum mapping quality of 30 and minimum base quality of 10( see [config.yaml](config.yaml)). 
 
 
 
@@ -180,17 +179,11 @@ Absolut paths need to be precise in the config.yaml
 
 # Configure workflow and running workflow
 
-
 - Copy the config.yaml file into your working directory and update it according to the instructions inside the file.
 - If necessary update resources_SLURM.yaml to define cpu and memory resources for each rule. The default section is the minimal resources per job. The other section take as name the rules name and define only resources that are different from the default section.
 - launch snakemake command as presented in SLURM.sh to launch the workflow.
 
-
 Resources_SLURM.yaml and SLURM.sh are specific to SLURM cluster. Therefore, if you use another cluster, you need to create other files. However, you can use these files to inspire you.
 ​    
 
 This is the official documentation of Snakemake if you need more information: <https://snakemake.readthedocs.io/en/stable/tutorial/tutorial.html>
-
-
-
-
diff --git a/Snakemake/IMAGE_calling/config.yaml b/Snakemake/IMAGE_calling/config.yaml
index d2e6509..a3894de 100755
--- a/Snakemake/IMAGE_calling/config.yaml
+++ b/Snakemake/IMAGE_calling/config.yaml
@@ -24,6 +24,7 @@ min_seed_length: ""             #Default = 4
 
 # Calling general parameter: 
 min_mapping_quality: ""         #Default=30: Minimum read mapping quality required to consider a read for calling
+min_base_quality: ""            #Default=10: Minimum base sequencing quality required to consider the base for calling
 
 # Gatk HaplotypeCaller parameters: 
 stand_call_conf: ""             #Default=30: The minimum phred-scaled confidence threshold at which variants should be called,(and filtered with LowQual if less than the calling threshold)
diff --git a/Snakemake/IMAGE_calling/rules/freebayes.rules b/Snakemake/IMAGE_calling/rules/freebayes.rules
index 777bf1e..b6b934b 100644
--- a/Snakemake/IMAGE_calling/rules/freebayes.rules
+++ b/Snakemake/IMAGE_calling/rules/freebayes.rules
@@ -20,12 +20,12 @@ rule freebayes:
         min_supporting_quality="0" if config["min_supporting_quality"]=="" else config["min_supporting_quality"],
         genotype_variant_threshold="0" if config["genotype_variant_threshold"]=="" else config["genotype_variant_threshold"],
         min_mapping_quality="30" if config["min_mapping_quality"]=="" else config["min_mapping_quality"],
-        
+        min_base_quality="10" if config["min_base_quality"]=="" else config["min_base_quality"]
     log:
         "results/freebayes/logs/{sample}_calling.stderr"
     
     shell:
-        "{config[bin][freebayes]} -f {input.ref} -P {params.pvar} -R {params.min_supporting_quality} -S {params.genotype_variant_threshold} -m {params.min_mapping_quality} {input.bam} > {output} 2> {log}"
+        "{config[bin][freebayes]} -f {input.ref} -P {params.pvar} -R {params.min_supporting_quality} -S {params.genotype_variant_threshold} --min-mapping-quality {params.min_mapping_quality} --min-base-quality {params.min_base_quality} {input.bam} > {output} 2> {log}"
 
 rule freebayes_gzip:
     input:
diff --git a/Snakemake/IMAGE_calling/rules/haplotypecaller.rules b/Snakemake/IMAGE_calling/rules/haplotypecaller.rules
index d8e2677..40fa56b 100755
--- a/Snakemake/IMAGE_calling/rules/haplotypecaller.rules
+++ b/Snakemake/IMAGE_calling/rules/haplotypecaller.rules
@@ -27,8 +27,9 @@ rule HaplotypeCaller:
         mem=config["HaplotypeCaller"]["mem"],
         stand_call_conf="30" if config["stand_call_conf"]=="" else config["stand_call_conf"],
         mmq="30" if config["min_mapping_quality"]=="" else config["min_mapping_quality"]
+        mbq="10" if config["min_base_quality"]=="" else config["min_base_quality"]
         
     shell:
         """
-        java -Xmx{params.mem} -jar {config[bin][gatk]} -T HaplotypeCaller -R {input.ref} -I {input.bam} -o {output.gvcf} -ERC GVCF -variant_index_type LINEAR -variant_index_parameter 128000 -stand_call_conf {params.stand_call_conf} -mmq {params.mmq} 2> {log.stderr} > {log.stdout}
+        java -Xmx{params.mem} -jar {config[bin][gatk]} -T HaplotypeCaller -R {input.ref} -I {input.bam} -o {output.gvcf} -ERC GVCF -variant_index_type LINEAR -variant_index_parameter 128000 -stand_call_conf {params.stand_call_conf} -mmq {params.mmq} -mbq {params.mbq} 2> {log.stderr} > {log.stdout}
         """
diff --git a/Snakemake/IMAGE_calling/rules/mpileup.rules b/Snakemake/IMAGE_calling/rules/mpileup.rules
index 169e7a1..2f17903 100644
--- a/Snakemake/IMAGE_calling/rules/mpileup.rules
+++ b/Snakemake/IMAGE_calling/rules/mpileup.rules
@@ -18,13 +18,14 @@ rule mpileup:
         
     params:
         min_mapping_quality="30" if config["min_mapping_quality"]=="" else config["min_mapping_quality"],
+        min_base_quality="10" if config["min_base_quality"]=="" else config["min_base_quality"],
         adjust_mapping_quality="50" if config["adjust_mapping_quality"]=="" else config["adjust_mapping_quality"]
     
     log:
         "results/mpileup/logs/{sample}_calling.stderr"
     
     shell:
-        "{config[bin][samtools]} mpileup -C {params.adjust_mapping_quality} -ugf {input.ref} -q {params.min_mapping_quality} {input.bam} 2>> {log} | {config[bin][bcftools]} call -m -v --output-type v -o {output} - 2>> {log}"
+        "{config[bin][samtools]} mpileup -C {params.adjust_mapping_quality} -ugf {input.ref} -q {params.min_mapping_quality} -Q {params.min_base_quality} {input.bam} 2>> {log} | {config[bin][bcftools]} call -m -v --output-type v -o {output} - 2>> {log}"
 
 rule mpileup_gzip:
     input:
-- 
GitLab