Commit a49ea56f authored by Slaheddine Kastalli's avatar Slaheddine Kastalli
Browse files

new modifications

parent 1579db1f
{
"THREADS": 5,
"SAMPLES": ["SRR5625858", "SRR5625859", "SRR5625860", "SRR5625864", "SRR5625861" ],
"FIVE_PRIMER": {"SRR5625858": "GTGYCAGCMGCCGCGGTA", "SRR5625859": "GTGYCAGCMGCCGCGGTA", "SRR5625860": "GTGYCAGCMGCCGCGGTA", "SRR5625864": "GTGYCAGCMGCCGCGGTA", "SRR5625861": "GTGYCAGCMGCCGCGGTA" },
"THREE_PRIMER": {"SRR5625858": "ACTYAAAKGAATTGRCGGGG", "SRR5625859": "ACTYAAAKGAATTGRCGGGG", "SRR5625860": "ACTYAAAKGAATTGRCGGGG", "SRR5625864": "ACTYAAAKGAATTGRCGGGG", "SRR5625861": "ACTYAAAKGAATTGRCGGGG"},
"THREADS": 20,
"unit": ["R1", "R2"],
"primer": ["fwd", "rev"],
"DATABASE": "/db/frogs_databanks/assignation/16S/silva_132_16S_pintail100/silva_132_16S_pintail100.fasta"
}
\ No newline at end of file
......@@ -2,9 +2,9 @@ library(dada2)
derepF <- derepFastq(snakemake@input[[1]], verbose = TRUE)
derepR <- derepFastq(snakemake@input[[2]], verbose = TRUE)
errF <- learnErrors(snakemake@input[[1]], multithread = snakemake@threads, verbose = TRUE)
errR <- learnErrors(snakemake@input[[2]], multithread = snakemake@threads, verbose = TRUE)
errF <- learnErrors(snakemake@input[[1]], multithread = snakemake@threads, randomize = TRUE, verbose = TRUE)
errR <- learnErrors(snakemake@input[[2]], multithread = snakemake@threads, randomize = TRUE, verbose = TRUE)
dadaFs <- dada(derepF, err = errF, multithread = snakemake@threads, verbose = TRUE)
dadaRs <- dada(derepR, err = errR, multithread = snakemake@threads, verbose = TRUE)
mergers <- mergePairs(dadaFs, derepF, dadaRs, derepR, verbose=TRUE)
saveRDS(mergers, snakemake@output[[1]])
saveRDS(mergers, snakemake@output[[1]])
\ No newline at end of file
rule dada2:
input:
filt = "work/filter/{sample}_1.fastq.gz",
filtrev = "work/filter/{sample}_2.fastq.gz"
filt = "work/filter/{sample}_R1.fastq.gz",
filtrev = "work/filter/{sample}_R2.fastq.gz"
output:
"work/dada/{sample}.rds"
threads:
......
library(dada2)
filterAndTrim(snakemake@input[[1]],snakemake@output[[1]], snakemake@input[[2]], snakemake@output[[2]], maxN=0, rm.phix=TRUE, compress=TRUE, verbose = TRUE)
\ No newline at end of file
library("ShortRead")
library(Matrix)
filterAndTrim(snakemake@input[[1]],snakemake@output[[1]], snakemake@input[[2]], snakemake@output[[2]], truncQ=2, truncLen=0, maxN=0, maxEE=Inf, rm.phix=TRUE, compress=TRUE, verbose = TRUE)
\ No newline at end of file
......@@ -3,14 +3,26 @@ shell.prefix("source /usr/local/genome/Anaconda2-5.1.0/etc/profile.d/conda.sh;")
configfile: "./config.json"
SAMPLES=config["SAMPLES"]
#SAMPLES=config["SAMPLES"]
unit=config["unit"]
primer=config["primer"]
import os
import pandas as pd
sample_table_file=config.get('sampletable','samples.tsv')
SampleTable = pd.read_table(sample_table_file,index_col=0)
SAMPLES = list(SampleTable.index)
JAVA_MEM_FRACTION=0.85
CONDAENV ='envs'
PAIRED_END= ('R2' in SampleTable.columns)
FRACTIONS= ['R1']
if PAIRED_END: FRACTIONS+= ['R2']
rule all:
input:#expand("DATA/{sample}.fastq.gz", sample=SAMPLES),
"report/multiqc_report.html",
expand("work/filter/{sample}_1.fastq.gz", sample=SAMPLES),
expand("work/filter/{sample}_2.fastq.gz", sample=SAMPLES),
#"report/multiqc_report.html",
#expand("work/filter/{sample}_R1.fastq.gz", sample=SAMPLES),
#expand("work/filter/{sample}_R2.fastq.gz", sample=SAMPLES),
expand("work/dada/{sample}.rds", sample=SAMPLES),
"work/dada/seqtab.fasta",
"work/dada/seqtab.tsv",
......@@ -23,7 +35,7 @@ rule all:
#"report/tree.nwk"
#include: "prefetech.smk"
include: "quality.smk"
include: "preprocess.smk"
include: "dada2.smk"
#include: "affiliation.smk"
#include: "quality.smk"
#include: "revcomp_primer.smk"
#include: "preprocess.smk"
include: "dada2.smk"
\ No newline at end of file
rule cutadapt:
input:
fwd = "DATA/{sample}_1.fastq.gz",
rev = "DATA/{sample}_2.fastq.gz"
output:
cut = "work/cutadapt/{sample}_1.fastq.gz",
cutrev = "work/cutadapt/{sample}_2.fastq.gz"
params:
five = lambda wildcards: config["FIVE_PRIMER"][wildcards.sample],
three = lambda wildcards: config["THREE_PRIMER"][wildcards.sample]
shell:
"conda activate cutadapt-2.5 "
"&& "
"cutadapt "
"-g {params.five} "
"-a {params.three} "
"-G {params.five} "
"-A {params.three} "
"--error-rate 0.1 "
"--discard-untrimmed "
"--match-read-wildcards "
"-o {output.cut} "
"-p {output.cutrev} "
"{input} "
"&& "
"conda deactivate"
rule cutadapt_pe:
input:
fwd='DATA/{sample}_R1.fastq.gz',
rev='DATA/{sample}_R2.fastq.gz',
fwd_prime='Primers/fwd_primers.fasta',
rev_prime='Primers/rev_primers.fasta',
fwd_revcomp_prime='Primers/fwd_revcomp_primers.fasta',
rev_revcomp_prime='Primers/rev_revcomp_primers.fasta'
output:
fwd_trim='work/cutadapt/{sample}_R1.fastq.gz',
rev_trim='work/cutadapt/{sample}_R2.fastq.gz'
log:
'report/cutadapt/cutadapt_{sample}'
params:
discard = '--discard-untrimmed ',
indels = '--no-indels ',
trim_n='--trim-n '
shell:
'''
cutadapt -g file:{input.fwd_prime} -G file:{input.rev_prime} -a file:{input.rev_revcomp_prime} -A file:{input.fwd_revcomp_prime} -o {output.fwd_trim} -p {output.rev_trim} --max-n=0 -m 50 --error-rate 0.1 {params.discard}{params.indels}{params.trim_n}--pair-filter any {input.fwd} {input.rev} > {log}
'''
rule filter:
input:
cut = "work/cutadapt/{sample}_1.fastq.gz",
cutrev = "work/cutadapt/{sample}_2.fastq.gz"
cut = "work/cutadapt/{sample}_R1.fastq.gz",
cutrev = "work/cutadapt/{sample}_R2.fastq.gz"
output:
filt = "work/filter/{sample}_1.fastq.gz",
filtrev = "work/filter/{sample}_2.fastq.gz"
filt = "work/filter/{sample}_R1.fastq.gz",
filtrev = "work/filter/{sample}_R2.fastq.gz"
script:
"filterAndTrim.R"
rule fastqc:
input:
fwd = "DATA/{sample}_1.fastq.gz",
rev = "DATA/{sample}_2.fastq.gz"
fwd = "DATA/{sample}_R1.fastq.gz",
rev = "DATA/{sample}_R2.fastq.gz"
output:
zip = "work/fastqc/{sample}_{unit}_fastqc.zip",
html = "work/fastqc/{sample}_{unit}_fastqc.html"
......@@ -32,4 +32,4 @@ rule multiqc:
"--outdir {params.output} "
"{input} "
"&& "
"conda deactivate "
"conda deactivate "
\ No newline at end of file
import os
def yield_fasta(fasta, gz=False, rev_comp=False):
'''Make a generator that yields (seq_header, seq) for each entry)'''
complement = {'A':'T', 'C':'G', 'G':'C', 'T':'A', 'N':'N', 'X':'X', 'M':'K', 'R':'Y', 'W':'W', 'S':'S', 'Y':'R', 'K':'M','V':'B', 'H':'D', 'D':'H', 'B':'V'}
if gz==False:
fhs=open(fasta, 'r')
else:
fhs=gzip.open(fasta, 'rt')
l=fhs.readline().strip()
while l != '':
if l.startswith('>'): #if header
header=l #save header
if rev_comp is False:
seq=fhs.readline().strip().upper()
else:
seq=''
for base in fhs.readline().strip().upper()[::-1]:
seq+=complement[base]
yield header, seq# yield data
l=fhs.readline().strip()
rule revcomp_primer:
input:
primers = "Primers/{primer}_primers.fasta"
output:
primers = temporary("Primers/{primer}_revcomp_primers.fasta")
run:
with open(output[0], 'w') as fho:
fho.writelines(['{}\n{}\n'.format(x[0],x[1]) for x in yield_fasta(input[0], rev_comp=True)])
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment