Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Cedric Midoux
deepomics16S
Commits
a49ea56f
Commit
a49ea56f
authored
May 24, 2021
by
Slaheddine Kastalli
Browse files
new modifications
parent
1579db1f
Changes
8
Hide whitespace changes
Inline
Side-by-side
config.json
View file @
a49ea56f
{
"THREADS"
:
5
,
"SAMPLES"
:
[
"SRR5625858"
,
"SRR5625859"
,
"SRR5625860"
,
"SRR5625864"
,
"SRR5625861"
],
"FIVE_PRIMER"
:
{
"SRR5625858"
:
"GTGYCAGCMGCCGCGGTA"
,
"SRR5625859"
:
"GTGYCAGCMGCCGCGGTA"
,
"SRR5625860"
:
"GTGYCAGCMGCCGCGGTA"
,
"SRR5625864"
:
"GTGYCAGCMGCCGCGGTA"
,
"SRR5625861"
:
"GTGYCAGCMGCCGCGGTA"
},
"THREE_PRIMER"
:
{
"SRR5625858"
:
"ACTYAAAKGAATTGRCGGGG"
,
"SRR5625859"
:
"ACTYAAAKGAATTGRCGGGG"
,
"SRR5625860"
:
"ACTYAAAKGAATTGRCGGGG"
,
"SRR5625864"
:
"ACTYAAAKGAATTGRCGGGG"
,
"SRR5625861"
:
"ACTYAAAKGAATTGRCGGGG"
},
"THREADS"
:
20
,
"unit"
:
[
"R1"
,
"R2"
],
"primer"
:
[
"fwd"
,
"rev"
],
"DATABASE"
:
"/db/frogs_databanks/assignation/16S/silva_132_16S_pintail100/silva_132_16S_pintail100.fasta"
}
\ No newline at end of file
dada2.R
View file @
a49ea56f
...
...
@@ -2,9 +2,9 @@ library(dada2)
derepF
<-
derepFastq
(
snakemake
@
input
[[
1
]],
verbose
=
TRUE
)
derepR
<-
derepFastq
(
snakemake
@
input
[[
2
]],
verbose
=
TRUE
)
errF
<-
learnErrors
(
snakemake
@
input
[[
1
]],
multithread
=
snakemake
@
threads
,
verbose
=
TRUE
)
errR
<-
learnErrors
(
snakemake
@
input
[[
2
]],
multithread
=
snakemake
@
threads
,
verbose
=
TRUE
)
errF
<-
learnErrors
(
snakemake
@
input
[[
1
]],
multithread
=
snakemake
@
threads
,
randomize
=
TRUE
,
verbose
=
TRUE
)
errR
<-
learnErrors
(
snakemake
@
input
[[
2
]],
multithread
=
snakemake
@
threads
,
randomize
=
TRUE
,
verbose
=
TRUE
)
dadaFs
<-
dada
(
derepF
,
err
=
errF
,
multithread
=
snakemake
@
threads
,
verbose
=
TRUE
)
dadaRs
<-
dada
(
derepR
,
err
=
errR
,
multithread
=
snakemake
@
threads
,
verbose
=
TRUE
)
mergers
<-
mergePairs
(
dadaFs
,
derepF
,
dadaRs
,
derepR
,
verbose
=
TRUE
)
saveRDS
(
mergers
,
snakemake
@
output
[[
1
]])
saveRDS
(
mergers
,
snakemake
@
output
[[
1
]])
\ No newline at end of file
dada2.smk
View file @
a49ea56f
rule dada2:
input:
filt = "work/filter/{sample}_1.fastq.gz",
filtrev = "work/filter/{sample}_2.fastq.gz"
filt = "work/filter/{sample}_
R
1.fastq.gz",
filtrev = "work/filter/{sample}_
R
2.fastq.gz"
output:
"work/dada/{sample}.rds"
threads:
...
...
filterAndTrim.R
View file @
a49ea56f
library
(
dada2
)
filterAndTrim
(
snakemake
@
input
[[
1
]],
snakemake
@
output
[[
1
]],
snakemake
@
input
[[
2
]],
snakemake
@
output
[[
2
]],
maxN
=
0
,
rm.phix
=
TRUE
,
compress
=
TRUE
,
verbose
=
TRUE
)
\ No newline at end of file
library
(
"ShortRead"
)
library
(
Matrix
)
filterAndTrim
(
snakemake
@
input
[[
1
]],
snakemake
@
output
[[
1
]],
snakemake
@
input
[[
2
]],
snakemake
@
output
[[
2
]],
truncQ
=
2
,
truncLen
=
0
,
maxN
=
0
,
maxEE
=
Inf
,
rm.phix
=
TRUE
,
compress
=
TRUE
,
verbose
=
TRUE
)
\ No newline at end of file
global.smk
View file @
a49ea56f
...
...
@@ -3,14 +3,26 @@ shell.prefix("source /usr/local/genome/Anaconda2-5.1.0/etc/profile.d/conda.sh;")
configfile: "./config.json"
SAMPLES=config["SAMPLES"]
#
SAMPLES=config["SAMPLES"]
unit=config["unit"]
primer=config["primer"]
import os
import pandas as pd
sample_table_file=config.get('sampletable','samples.tsv')
SampleTable = pd.read_table(sample_table_file,index_col=0)
SAMPLES = list(SampleTable.index)
JAVA_MEM_FRACTION=0.85
CONDAENV ='envs'
PAIRED_END= ('R2' in SampleTable.columns)
FRACTIONS= ['R1']
if PAIRED_END: FRACTIONS+= ['R2']
rule all:
input:#expand("DATA/{sample}.fastq.gz", sample=SAMPLES),
"report/multiqc_report.html",
expand("work/filter/{sample}_1.fastq.gz", sample=SAMPLES),
expand("work/filter/{sample}_2.fastq.gz", sample=SAMPLES),
#
"report/multiqc_report.html",
#
expand("work/filter/{sample}_
R
1.fastq.gz", sample=SAMPLES),
#
expand("work/filter/{sample}_
R
2.fastq.gz", sample=SAMPLES),
expand("work/dada/{sample}.rds", sample=SAMPLES),
"work/dada/seqtab.fasta",
"work/dada/seqtab.tsv",
...
...
@@ -23,7 +35,7 @@ rule all:
#"report/tree.nwk"
#include: "prefetech.smk"
include: "quality.smk"
include: "
p
re
process
.smk"
include: "
dada2
.smk"
#
include: "
affiliation
.smk"
#
include: "quality.smk"
#
include: "re
vcomp_primer
.smk"
#
include: "
preprocess
.smk"
include: "
dada2
.smk"
\ No newline at end of file
preprocess.smk
View file @
a49ea56f
rule cutadapt:
input:
fwd = "DATA/{sample}_1.fastq.gz",
rev = "DATA/{sample}_2.fastq.gz"
output:
cut = "work/cutadapt/{sample}_1.fastq.gz",
cutrev = "work/cutadapt/{sample}_2.fastq.gz"
params:
five = lambda wildcards: config["FIVE_PRIMER"][wildcards.sample],
three = lambda wildcards: config["THREE_PRIMER"][wildcards.sample]
shell:
"conda activate cutadapt-2.5 "
"&& "
"cutadapt "
"-g {params.five} "
"-a {params.three} "
"-G {params.five} "
"-A {params.three} "
"--error-rate 0.1 "
"--discard-untrimmed "
"--match-read-wildcards "
"-o {output.cut} "
"-p {output.cutrev} "
"{input} "
"&& "
"conda deactivate"
rule cutadapt_pe:
input:
fwd='DATA/{sample}_R1.fastq.gz',
rev='DATA/{sample}_R2.fastq.gz',
fwd_prime='Primers/fwd_primers.fasta',
rev_prime='Primers/rev_primers.fasta',
fwd_revcomp_prime='Primers/fwd_revcomp_primers.fasta',
rev_revcomp_prime='Primers/rev_revcomp_primers.fasta'
output:
fwd_trim='work/cutadapt/{sample}_R1.fastq.gz',
rev_trim='work/cutadapt/{sample}_R2.fastq.gz'
log:
'report/cutadapt/cutadapt_{sample}'
params:
discard = '--discard-untrimmed ',
indels = '--no-indels ',
trim_n='--trim-n '
shell:
'''
cutadapt -g file:{input.fwd_prime} -G file:{input.rev_prime} -a file:{input.rev_revcomp_prime} -A file:{input.fwd_revcomp_prime} -o {output.fwd_trim} -p {output.rev_trim} --max-n=0 -m 50 --error-rate 0.1 {params.discard}{params.indels}{params.trim_n}--pair-filter any {input.fwd} {input.rev} > {log}
'''
rule filter:
input:
cut = "work/cutadapt/{sample}_1.fastq.gz",
cutrev = "work/cutadapt/{sample}_2.fastq.gz"
cut = "work/cutadapt/{sample}_
R
1.fastq.gz",
cutrev = "work/cutadapt/{sample}_
R
2.fastq.gz"
output:
filt = "work/filter/{sample}_1.fastq.gz",
filtrev = "work/filter/{sample}_2.fastq.gz"
filt = "work/filter/{sample}_
R
1.fastq.gz",
filtrev = "work/filter/{sample}_
R
2.fastq.gz"
script:
"filterAndTrim.R"
quality.smk
View file @
a49ea56f
rule fastqc:
input:
fwd = "DATA/{sample}_1.fastq.gz",
rev = "DATA/{sample}_2.fastq.gz"
fwd = "DATA/{sample}_
R
1.fastq.gz",
rev = "DATA/{sample}_
R
2.fastq.gz"
output:
zip = "work/fastqc/{sample}_{unit}_fastqc.zip",
html = "work/fastqc/{sample}_{unit}_fastqc.html"
...
...
@@ -32,4 +32,4 @@ rule multiqc:
"--outdir {params.output} "
"{input} "
"&& "
"conda deactivate "
"conda deactivate "
\ No newline at end of file
revcomp_primer.smk
0 → 100644
View file @
a49ea56f
import os
def yield_fasta(fasta, gz=False, rev_comp=False):
'''Make a generator that yields (seq_header, seq) for each entry)'''
complement = {'A':'T', 'C':'G', 'G':'C', 'T':'A', 'N':'N', 'X':'X', 'M':'K', 'R':'Y', 'W':'W', 'S':'S', 'Y':'R', 'K':'M','V':'B', 'H':'D', 'D':'H', 'B':'V'}
if gz==False:
fhs=open(fasta, 'r')
else:
fhs=gzip.open(fasta, 'rt')
l=fhs.readline().strip()
while l != '':
if l.startswith('>'): #if header
header=l #save header
if rev_comp is False:
seq=fhs.readline().strip().upper()
else:
seq=''
for base in fhs.readline().strip().upper()[::-1]:
seq+=complement[base]
yield header, seq# yield data
l=fhs.readline().strip()
rule revcomp_primer:
input:
primers = "Primers/{primer}_primers.fasta"
output:
primers = temporary("Primers/{primer}_revcomp_primers.fasta")
run:
with open(output[0], 'w') as fho:
fho.writelines(['{}\n{}\n'.format(x[0],x[1]) for x in yield_fasta(input[0], rev_comp=True)])
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment