Skip to content
Snippets Groups Projects
Commit 99b7067f authored by Maria Bernard's avatar Maria Bernard
Browse files

Snakemake 1000RNASeq : add fastq quality filtering threshol at 15 by default

parent 17359c00
No related branches found
No related tags found
No related merge requests found
......@@ -173,12 +173,13 @@ rule trim_se:
output:
out=temp("Results/TrimGalore/{prefix}_trim.fastq.gz")
params:
min_trim=lambda wildcards : str(int([ int(table[table["forward_read"] == f]["read_length"].tolist()[0]) for f in table["forward_read"] if f.startswith(wildcards.prefix)][0] / 3 ))
min_trim=lambda wildcards : str(int([ int(table[table["forward_read"] == f]["read_length"].tolist()[0]) for f in table["forward_read"] if f.startswith(wildcards.prefix)][0] / 3 )),
qual=config["trimming_quality"]
log:
"Results/TrimGalore/{prefix}_trimSe.log"
shell:
"""
trim_galore --no_report_file --length {params.min_trim} --quality 0 -o `dirname {output.out}` {input} 2> {log}
trim_galore --no_report_file --length {params.min_trim} --quality {params.qual} -o `dirname {output.out}` {input} 2> {log}
mv `dirname {output.out}`/{wildcards.prefix}*trimmed.fq.gz {output.out}
"""
......@@ -191,10 +192,11 @@ rule trim_pe:
log:
"Results/TrimGalore/{prefix}_trimPe.log"
params:
min_trim=lambda wildcards : str(int([ int(table[table["forward_read"] == f]["read_length"].tolist()[0]) for f in table["forward_read"] if f.startswith(wildcards.prefix)][0] / 3 ))
min_trim=lambda wildcards : str(int([ int(table[table["forward_read"] == f]["read_length"].tolist()[0]) for f in table["forward_read"] if f.startswith(wildcards.prefix)][0] / 3 )),
qual=config["trimming_quality"]
shell:
"""
trim_galore --paired --no_report_file --length {params.min_trim} --quality 0 -o `dirname {output.out1}` {input} 2> {log}
trim_galore --paired --no_report_file --length {params.min_trim} --quality {params.qual} -o `dirname {output.out1}` {input} 2> {log}
mv `dirname {output.out1}`/{wildcards.prefix}*_val_1.fq.gz {output.out1}
mv `dirname {output.out1}`/{wildcards.prefix}*_val_2.fq.gz {output.out2}
"""
......
......@@ -36,6 +36,9 @@ gtf_ref : data/reference.gtf
# known_vcf file is set of known variants used to recalibrate bases quality in GATK preprocessing steps RealignerTargetCreator and BaseRecalibrator
known_vcf : data/reference_known_var.vcf.gz
# quality trimming threshold used in trimgalore to remove low quality bases.
trimming_quality : 15
# computing ressources, also give to --cluster-config snakemake option if executed on a cluster
# this yaml file defined default resources (mem and cpu at least) in a __default__ section, and specific resources either all or one resource for particular rule if different from the default
resources: resources_calling_SLURM.yaml
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment