diff --git a/Snakemake/IMAGE_calling/README.md b/Snakemake/IMAGE_calling/README.md index ea58c0de07c66b8b82f165c4c851fbb8788b5a9e..9d1598fa2cbf88b051c70976cf1b00b7a5a4eea3 100644 --- a/Snakemake/IMAGE_calling/README.md +++ b/Snakemake/IMAGE_calling/README.md @@ -2,12 +2,12 @@ __author__ Kenza BAZI KABBAJ & Maria BERNARD, INRA - SIGENAE -* [What is the IMAGE variant calling worklow?](#what-is-the-image-variant-calling-worklow-) - * [1. Inputs](#1-inputs) - * [2. Data preprocessing](#2-data-preprocessing) - * [3. Variant Calling](#3-variant-calling) -* [Prerequisites](#prerequisites) -* [Configure workflow and running workflow](#configure-workflow-and-running-workflow) +- [What is the IMAGE variant calling worklow?](#what-is-the-image-variant-calling-worklow-) + - [1. Inputs](#1-inputs) + - [2. Data preprocessing](#2-data-preprocessing) + - [3. Variant Calling](#3-variant-calling) +- [Prerequisites](#prerequisites) +- [Configure workflow and running workflow](#configure-workflow-and-running-workflow) @@ -29,7 +29,6 @@ The user must use the [config.yaml](config.yaml) file to provide all necessary i - R1 READS: absolut path of R1 reads - R2 READS: absolute path of R2 reads. If single reads, write "None" - PLATEFORM: Sequencing Plateform, choices : ILLUMINA, SOLID, LS454, HELICOS and PACBIO. - - a set of known variant in VCF format. - path to binaries, and the resources.yaml (see [resources_SLURM.yaml](resources_SLURM.yaml)) file to set cluster resources for each programs @@ -65,7 +64,7 @@ Merge bam files for multiple sample replicates (unit) into one using MergeSamFil -- Marking duplicates +- Marking duplicates Rule for marking PCR and optical duplicates using Picard MarkDuplicates. @@ -103,7 +102,7 @@ It performs mapping quality on realigned and recalibrated bams using qualimap ba ## 3. Variant Calling -Note that for each of the 3 callers, by default, we keep only variant with a mapping quality > 30 ( see [config.yaml](config.yaml)). +Note that for each of the 3 callers, by default, we keep only variant with a minimum mapping quality of 30 and minimum base quality of 10( see [config.yaml](config.yaml)). @@ -180,17 +179,11 @@ Absolut paths need to be precise in the config.yaml # Configure workflow and running workflow - - Copy the config.yaml file into your working directory and update it according to the instructions inside the file. - If necessary update resources_SLURM.yaml to define cpu and memory resources for each rule. The default section is the minimal resources per job. The other section take as name the rules name and define only resources that are different from the default section. - launch snakemake command as presented in SLURM.sh to launch the workflow. - Resources_SLURM.yaml and SLURM.sh are specific to SLURM cluster. Therefore, if you use another cluster, you need to create other files. However, you can use these files to inspire you. ​ This is the official documentation of Snakemake if you need more information: <https://snakemake.readthedocs.io/en/stable/tutorial/tutorial.html> - - - - diff --git a/Snakemake/IMAGE_calling/config.yaml b/Snakemake/IMAGE_calling/config.yaml index d2e6509c4365e38ba08f670f69f7d8277e038d8a..a3894de11db393247d7f85a3e5eec75b6019013b 100755 --- a/Snakemake/IMAGE_calling/config.yaml +++ b/Snakemake/IMAGE_calling/config.yaml @@ -24,6 +24,7 @@ min_seed_length: "" #Default = 4 # Calling general parameter: min_mapping_quality: "" #Default=30: Minimum read mapping quality required to consider a read for calling +min_base_quality: "" #Default=10: Minimum base sequencing quality required to consider the base for calling # Gatk HaplotypeCaller parameters: stand_call_conf: "" #Default=30: The minimum phred-scaled confidence threshold at which variants should be called,(and filtered with LowQual if less than the calling threshold) diff --git a/Snakemake/IMAGE_calling/rules/freebayes.rules b/Snakemake/IMAGE_calling/rules/freebayes.rules index 777bf1e7061dee4f19149b31de8a1ca0c8aee62d..b6b934b9eefc26747a09c6d15638bc3bf0698c59 100644 --- a/Snakemake/IMAGE_calling/rules/freebayes.rules +++ b/Snakemake/IMAGE_calling/rules/freebayes.rules @@ -20,12 +20,12 @@ rule freebayes: min_supporting_quality="0" if config["min_supporting_quality"]=="" else config["min_supporting_quality"], genotype_variant_threshold="0" if config["genotype_variant_threshold"]=="" else config["genotype_variant_threshold"], min_mapping_quality="30" if config["min_mapping_quality"]=="" else config["min_mapping_quality"], - + min_base_quality="10" if config["min_base_quality"]=="" else config["min_base_quality"] log: "results/freebayes/logs/{sample}_calling.stderr" shell: - "{config[bin][freebayes]} -f {input.ref} -P {params.pvar} -R {params.min_supporting_quality} -S {params.genotype_variant_threshold} -m {params.min_mapping_quality} {input.bam} > {output} 2> {log}" + "{config[bin][freebayes]} -f {input.ref} -P {params.pvar} -R {params.min_supporting_quality} -S {params.genotype_variant_threshold} --min-mapping-quality {params.min_mapping_quality} --min-base-quality {params.min_base_quality} {input.bam} > {output} 2> {log}" rule freebayes_gzip: input: diff --git a/Snakemake/IMAGE_calling/rules/haplotypecaller.rules b/Snakemake/IMAGE_calling/rules/haplotypecaller.rules index d8e26778844ab0cabc3c519cdf87da1524641d57..40fa56beec4d0991c984b6415bad17142a54b595 100755 --- a/Snakemake/IMAGE_calling/rules/haplotypecaller.rules +++ b/Snakemake/IMAGE_calling/rules/haplotypecaller.rules @@ -27,8 +27,9 @@ rule HaplotypeCaller: mem=config["HaplotypeCaller"]["mem"], stand_call_conf="30" if config["stand_call_conf"]=="" else config["stand_call_conf"], mmq="30" if config["min_mapping_quality"]=="" else config["min_mapping_quality"] + mbq="10" if config["min_base_quality"]=="" else config["min_base_quality"] shell: """ - java -Xmx{params.mem} -jar {config[bin][gatk]} -T HaplotypeCaller -R {input.ref} -I {input.bam} -o {output.gvcf} -ERC GVCF -variant_index_type LINEAR -variant_index_parameter 128000 -stand_call_conf {params.stand_call_conf} -mmq {params.mmq} 2> {log.stderr} > {log.stdout} + java -Xmx{params.mem} -jar {config[bin][gatk]} -T HaplotypeCaller -R {input.ref} -I {input.bam} -o {output.gvcf} -ERC GVCF -variant_index_type LINEAR -variant_index_parameter 128000 -stand_call_conf {params.stand_call_conf} -mmq {params.mmq} -mbq {params.mbq} 2> {log.stderr} > {log.stdout} """ diff --git a/Snakemake/IMAGE_calling/rules/mpileup.rules b/Snakemake/IMAGE_calling/rules/mpileup.rules index 169e7a18129e91ca51ad47219270e20e78cee383..2f17903c233b1840e61dea32b9af2edf5c03e3d1 100644 --- a/Snakemake/IMAGE_calling/rules/mpileup.rules +++ b/Snakemake/IMAGE_calling/rules/mpileup.rules @@ -18,13 +18,14 @@ rule mpileup: params: min_mapping_quality="30" if config["min_mapping_quality"]=="" else config["min_mapping_quality"], + min_base_quality="10" if config["min_base_quality"]=="" else config["min_base_quality"], adjust_mapping_quality="50" if config["adjust_mapping_quality"]=="" else config["adjust_mapping_quality"] log: "results/mpileup/logs/{sample}_calling.stderr" shell: - "{config[bin][samtools]} mpileup -C {params.adjust_mapping_quality} -ugf {input.ref} -q {params.min_mapping_quality} {input.bam} 2>> {log} | {config[bin][bcftools]} call -m -v --output-type v -o {output} - 2>> {log}" + "{config[bin][samtools]} mpileup -C {params.adjust_mapping_quality} -ugf {input.ref} -q {params.min_mapping_quality} -Q {params.min_base_quality} {input.bam} 2>> {log} | {config[bin][bcftools]} call -m -v --output-type v -o {output} - 2>> {log}" rule mpileup_gzip: input: