Commit 7e4d83e6 authored by Slaheddine Kastalli's avatar Slaheddine Kastalli
Browse files

add paired ends and prefetch rule

parent 89a5a86f
{
"THREADS": 8,
"SAMPLES": ["4A", "5A", "6A", "7A", "8A", "9A", "10A", "11A", "12A"],
"FIVE_PRIMER": {"4A": "GTGYCAGCMGCCGCGGTA", "5A": "GTGYCAGCMGCCGCGGTA","6A": "GTGYCAGCMGCCGCGGTA","7A": "GTGYCAGCMGCCGCGGTA","8A": "GTGYCAGCMGCCGCGGTA","9A": "GTGYCAGCMGCCGCGGTA","10A": "GTGYCAGCMGCCGCGGTA","11A": "GTGYCAGCMGCCGCGGTA","12A": "GTGYCAGCMGCCGCGGTA"},
"THREE_PRIMER": {"4A": "ACTYAAAKGAATTGRCGGGG", "5A": "ACTYAAAKGAATTGRCGGGG","6A": "ACTYAAAKGAATTGRCGGGG","7A": "ACTYAAAKGAATTGRCGGGG","8A": "ACTYAAAKGAATTGRCGGGG","9A": "ACTYAAAKGAATTGRCGGGG","10A": "ACTYAAAKGAATTGRCGGGG","11A": "ACTYAAAKGAATTGRCGGGG","12A": "ACTYAAAKGAATTGRCGGGG"},
"THREADS": 1,
"srr": ["SRR1187735"],
"SAMPLES": ["SRR1187735"],
"FIVE_PRIMER": {"SRR1187735": "GTGYCAGCMGCCGCGGTA"},
"THREE_PRIMER": {"SRR1187735": "ACTYAAAKGAATTGRCGGGG"},
"DATABASE": "/db/frogs_databanks/assignation/16S/silva_132_16S_pintail100/silva_132_16S_pintail100.fasta"
}
library(dada2)
derep <- derepFastq(snakemake@input[[1]], verbose = TRUE)
err <- learnErrors(snakemake@input[[1]], multithread = snakemake@threads, verbose = TRUE)
dada <- dada(derep, err = err, multithread = snakemake@threads, verbose = TRUE)
derepF <- derepFastq(snakemake@input[[1]], verbose = TRUE)
derepR <- derepFastq(snakemake@input[[2]], verbose = TRUE)
errF <- learnErrors(snakemake@input[[1]], multithread = snakemake@threads, verbose = TRUE)
errR <- learnErrors(snakemake@input[[2]], multithread = snakemake@threads, verbose = TRUE)
dadaFs <- dada(derep, err = errF, multithread = snakemake@threads, verbose = TRUE)
dadaRs <- dada(derep, err = errR, multithread = snakemake@threads, verbose = TRUE)
mergers <- mergePairs(dadaFs, dadaRs, verbose=TRUE)
saveRDS(dada, snakemake@output[[1]])
rule dada2:
input:
"work/filter/{sample}.fastq.gz"
filt = "work/filter/{sample}_1.fastq.gz"
filtrev = "work/filter/{sample}_2.fastq.gz"
output:
"work/dada/{sample}.rds"
threads:
......
library(dada2)
filterAndTrim(snakemake@input[[1]], snakemake@output[[1]], maxN=0, rm.phix=TRUE, compress=TRUE, verbose = TRUE)
filterAndTrim(snakemake@input[[1]],snakemake@input[[2]] snakemake@output[[1]], maxN=0, rm.phix=TRUE, compress=TRUE, verbose = TRUE)
......@@ -19,6 +19,7 @@ rule all:
"report/abundance.tsv",
# "report/tree.nwk"
include: "prefetech.smk"
include: "quality.smk"
include: "preprocess.smk"
include: "dada2.smk"
......
rule prefetch:
output:
"DATA/.prefetch/{srr}.sra"
params:
"{srr} --max-size 50GB -O DATA"
log:
"DATA/.prefetch/sra/{srr}.log"
shell:
"conda activate sra-tools-2.10.1 "
"&& "
"prefetch {params} > {log} 2>&1 && touch {output} "
"&& "
"conda deactivate "
rule fastqdump:
input:
"DATA/.prefetch/{srr}.sra"
output:
touch("DATA/done__{srr}_dump")
params:
args = "-S -O DATA/ -t DATA/",
id_srr = "{srr}"
log:
"DATA/{srr}.log"
shell:
"conda activate sra-tools-2.10.1 "
"&& "
"fasterq-dump {params.args} {params.id_srr} > {log} 2>&1 "
"&& "
"conda deactivate "
\ No newline at end of file
rule cutadapt:
input:
"DATA/{sample}.fastq.gz"
fwd = "DATA/{sample}_1.fastq.gz"
rev = "DATA/{sample}_2.fastq.gz"
output:
"work/cutadapt/{sample}.fastq.gz"
cut = "work/cutadapt/{sample}_1.fastq.gz"
cutrev = "work/cutadapt/{sample}_2.fastq.gz"
params:
five = lambda wildcards: config["FIVE_PRIMER"][wildcards.sample],
three = lambda wildcards: config["THREE_PRIMER"][wildcards.sample]
......@@ -22,8 +25,10 @@ rule cutadapt:
rule filter:
input:
"work/cutadapt/{sample}.fastq.gz"
cut = "work/cutadapt/{sample}_1.fastq.gz"
cutrev = "work/cutadapt/{sample}_2.fastq.gz"
output:
"work/filter/{sample}.fastq.gz"
filt = "work/filter/{sample}_1.fastq.gz"
filtrev = "work/filter/{sample}_2.fastq.gz"
script:
"filterAndTrim.R"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment