Commit 55a074fc authored by MARTIN Pierre's avatar MARTIN Pierre
Browse files

changed nextflow.config basic cpm filter + added publishdir for kaiju step

parent 8af503e5
......@@ -448,6 +448,7 @@ if (!params.skip_kaiju && ('01_clean_qc' in step || '02_assembly' in step || '03
process kaiju {
tag "${sampleId}"
publishDir "${params.outdir}/01_clean_qc/01_3_taxonomic_affiliation_reads", mode: 'copy', pattern: '*.krona.html'
publishDir "${params.outdir}/01_clean_qc/01_3_taxonomic_affiliation_reads", mode: 'copy', pattern: '*_kaiju_MEM_verbose.out'
input:
set sampleId, file(preprocessed_reads_R1), file(preprocessed_reads_R2) from clean_reads_for_kaiju_ch
......
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { COMMON } from './subworkflows/common'
// include { SHORT_READS } from './subworkflows/short_reads'
// include { LONG_READS } from './subworkflows/long_reads'
// Define list of available steps.
//def defineStepList() {
// return [
// '01_clean_qc',
// '02_assembly',
// '03_filtering',
// '04_structural_annot',
// '05_alignment',
// '06_func_annot',
// '07_taxo_affi',
// '08_binning'
// ]
//}
/*
* Create channels for input read files.
*/
Channel
.fromFilePairs( params.reads, size: params.single_end ? 1 : 2, flat: true )
.ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nNB: Path requires at least one * wildcard!\nIf this is single-end data, please specify --singleEnd on the command line." }
.set { ch_reads }
workflow {
COMMON ( ch_reads )
}
\ No newline at end of file
process CUTADAPT {
tag "$sampleId"
publishDir "${params.outdir}/01_clean_qc/01_1_cleaned_reads/", mode: 'copy', pattern: 'cleaned_*.fastq.gz'
publishDir "${params.outdir}/01_clean_qc/01_1_cleaned_reads/logs", mode: 'copy', pattern: '*_cutadapt.log'
input:
tuple val(sampleId), path(read1), path(read2)
val adapter1
val adapter2
output:
tuple val(sampleId), path("*${sampleId}*_R1.fastq.gz"), path("*${sampleId}*_R2.fastq.gz"), emit: reads
path "${sampleId}_cutadapt.log", emit: logs
// when: ('01_clean_qc' in step || '02_assembly' in step || '03_filtering' in step || '04_structural_annot' in step || '05_alignment' in step || '06_func_annot' in step || '07_taxo_affi' in step || '08_binning' in step) && (!params.skip_01_clean_qc)
script:
if(params.skip_sickle & params.skip_removal_host) {
// output are final cleaned paths
output_paths = "-o cleaned_${sampleId}_R1.fastq.gz -p cleaned_${sampleId}_R2.fastq.gz"
}
else {
// tempory paths not saved in publish dir
output_paths = "-o ${sampleId}_cutadapt_R1.fastq.gz -p ${sampleId}_cutadapt_R2.fastq.gz"
}
"""
cutadapt -a $adapter1 -A $adapter2 $output_paths -m 36 --trim-n -q 20,20 --max-n 0 \
--cores=${task.cpus} ${read1} ${read2} > ${sampleId}_cutadapt.log
"""
}
\ No newline at end of file
process SICKLE {
tag "$sampleId"
publishDir "${params.outdir}/01_clean_qc/01_1_cleaned_reads/", mode: 'copy', pattern: 'cleaned_*.fastq.gz'
publishDir "${params.outdir}/01_clean_qc/01_1_cleaned_reads/logs", mode: 'copy', pattern: '*_sickle.log'
// when: (!params.skip_sickle) && ('01_clean_qc' in step || '02_assembly' in step || '03_filtering' in step || '04_structural_annot' in step || '05_alignment' in step || '06_func_annot' in step || '07_taxo_affi' in step || '08_binning' in step) && (!params.skip_01_clean_qc)
input:
tuple val(sampleId), path(read1), path(read2)
output:
tuple val(sampleId), path("*${sampleId}*_R1.fastq.gz"), path("*${sampleId}*_R2.fastq.gz"), emit: reads
path "${sampleId}_single_sickle.fastq.gz", emit: single
path "${sampleId}_sickle.log", emit: logs
script:
mode = params.single_end ? 'se' : 'pe'
if(params.skip_removal_host) {
// output are final cleaned files
options = "-o cleaned_${sampleId}_R1.fastq.gz -p cleaned_${sampleId}_R2.fastq.gz"
}
else {
//tempory files not saved in publish dir
options = "-o ${sampleId}_sickle_R1.fastq.gz -p ${sampleId}_sickle_R2.fastq.gz"
}
options += " -t " + params.quality_type
"""
sickle ${mode} -f ${cutadapt_reads_R1} -r ${cutadapt_reads_R2} $options \
-s ${sampleId}_single_sickle.fastq.gz -g > ${sampleId}_sickle.log
"""
}
\ No newline at end of file
......@@ -17,7 +17,7 @@ params {
host_fasta = ""
metaspades_mem = 440
percentage_identity = 0.95
min_contigs_cpm = 10
min_contigs_cpm = 1
assembly = "metaspades"
min_contig_size = 1500
busco_reference = "https://busco-archive.ezlab.org/v3/datasets/bacteria_odb9.tar.gz"
......
include { CUTADAPT } from '../modules/cutadapt'
include { SICKLE } from '../modules/sickle'
ch_adapter1 = Channel.value(params.adapter1)
ch_adapter2 = Channel.value(params.adapter2)
workflow COMMON {
take:
ch_reads
main:
CUTADAPT (
ch_reads,
ch_adapter1,
ch_adapter2
)
SICKLE ( CUTADAPT.out.reads )
emit:
SICKLE.out.reads
}
#!/usr/bin/env nextflow
/*
========================================================================================
metagWGS
========================================================================================
metagWGS Analysis Pipeline.
#### Homepage / Documentation
https://forgemia.inra.fr/genotoul-bioinfo/metagwgs/
----------------------------------------------------------------------------------------
*/
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment