Commit d078d1b6 authored by Jules Sabban's avatar Jules Sabban
Browse files

#4 move processes into modules directory

parents cb01a41e 7083c7e2
1,MT,./data/MT_rep1_1_Ch6.fastq.gz,./data/MT_rep1_2_Ch6.fastq.gz
\ No newline at end of file
1,MT,./template/data/MT_rep1_1_Ch6.fastq.gz,./template/data/MT_rep1_2_Ch6.fastq.gz
\ No newline at end of file
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
/*
Copyright INRAE 2021
......@@ -75,6 +76,8 @@ if (params.help) {
}
// NOTE - THIS IS NOT USED IN THIS PIPELINE, EXAMPLE ONLY
/*
......@@ -91,17 +94,15 @@ ch_inputdir = params.inputdir ? Channel.fromPath(params.inputdir, checkIfExists:
// Create a channel for input read files
if(params.samplesheet){
if(params.single_end){
Channel
ch_read_files=Channel
.from(file("${params.samplesheet}"))
.splitCsv(header: false)
.map{ row -> [ row[0], [file(row[2])]] }
.into { ch_read_files_for_fastqc; ch_read_files_for_qc1; ch_read_files_for_assembly}
}else{
Channel
ch_read_files=Channel
.from(file("${params.samplesheet}"))
.splitCsv(header: false)
.map{ row -> [ row[0], [file(row[2]), file(row[3])]] }
.into { ch_read_files_for_fastqc; ch_read_files_for_qc1; ch_read_files_for_assembly}
.map{ row -> [ row[0], [file(row[2]), file(row[3])]] }
}
params.reads=false
} else {
......@@ -112,189 +113,70 @@ if(params.samplesheet){
*/
// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
custom_runName = params.name
params.custom_runName = params.name
if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
custom_runName = workflow.runName
params.custom_runName = workflow.runName
}
// Stage config files
ch_multiqc_config = file(params.multiqc_config, checkIfExists: true)
ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true)
ch_multiqc_config = Channel.fromPath("${params.multiqc_config}", checkIfExists: true)
ch_output_docs = Channel.fromPath("$projectDir/docs/output.md", checkIfExists: true)
def summary = [:]
if (workflow.revision) summary['Pipeline Release'] = workflow.revision
summary['Run Name'] = custom_runName ?: workflow.runName
params.summary = [:]
if (workflow.revision) params.summary['Pipeline Release'] = workflow.revision
params.summary['Run Name'] = params.custom_runName ?: workflow.runName
// TODO nf-core: Report custom parameters here
summary['Input dir'] = params.inputdir
summary['Sample sheet'] = params.samplesheet
summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End'
summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job"
if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container"
summary['Output dir'] = params.outdir
summary['Launch dir'] = workflow.launchDir
summary['Working dir'] = workflow.workDir
summary['Script dir'] = workflow.projectDir
summary['User'] = workflow.userName
params.summary['Input dir'] = params.inputdir
params.summary['Sample sheet'] = params.samplesheet
params.summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End'
params.summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job"
if (workflow.containerEngine) params.summary['Container'] = "$workflow.containerEngine - $workflow.container"
params.summary['Output dir'] = params.outdir
params.summary['Launch dir'] = workflow.launchDir
params.summary['Working dir'] = workflow.workDir
params.summary['Script dir'] = workflow.projectDir
params.summary['User'] = workflow.userName
if (workflow.profile == 'awsbatch') {
summary['AWS Region'] = params.awsregion
summary['AWS Queue'] = params.awsqueue
params.summary['AWS Region'] = params.awsregion
params.summary['AWS Queue'] = params.awsqueue
}
summary['Config Profile'] = workflow.profile
params.summary['Config Profile'] = workflow.profile
if (params.email || params.email_on_fail) {
summary['E-mail Address'] = params.email
summary['E-mail on failure'] = params.email_on_fail
params.summary['E-mail Address'] = params.email
params.summary['E-mail on failure'] = params.email_on_fail
}
log.info "-\033[2m--------------------------------------------------\033[0m-"
log.info "-\033[2m----------------"+ workflow.manifest.name +" --\033[0m-"
log.info "-\033[2m--------------------------------------------------\033[0m-"
log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n")
log.info params.summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n")
log.info "-\033[2m--------------------------------------------------\033[0m-"
/*
* Parse software version numbers
*/
process get_software_versions {
publishDir "${params.outdir}/pipeline_info", mode: 'copy',
saveAs: { filename ->
if (filename.indexOf(".csv") > 0) filename
else null
}
output:
file 'software_versions_mqc.yaml' into software_versions_yaml
file "software_versions.csv"
script:
// TODO nf-core: Get all tools to print their version number here
"""
echo $workflow.manifest.version > v_pipeline.txt
echo $workflow.nextflow.version > v_nextflow.txt
fastqc --version > v_fastqc.txt
multiqc --version > v_multiqc.txt
scrape_software_versions.py &> software_versions_mqc.yaml
"""
}
/*
* STEP 1 - FastQC
*/
process fastqc {
tag "$name"
label 'process_medium'
publishDir "${params.outdir}/fastqc", mode: 'copy',
saveAs: { filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename" }
input:
set val(name), file(reads) from ch_read_files_for_fastqc
output:
file "*_fastqc.{zip,html}" into ch_fastqc_results_for_multiqc
script:
"""
fastqc --quiet --threads $task.cpus $reads
"""
}
/*
* STEP 2 - Fake QC
*/
process qc1 {
input:
set replicate_id, file(reads) from ch_read_files_for_qc1
output:
file("${replicate_id}.qc1") into ch_fastqc_raw_for_assembly
script:
"""
echo "mkdir ${replicate_id} ; fastqc --nogroup --quiet -o ${replicate_id} --threads ${task.cpus} ${reads[0]} ${reads[1]}" > ${replicate_id}.qc1
"""
}
// -------------------------------------------------
// INCLUDES
// -------------------------------------------------
/*
* STEP 3 - Fake assembly
*/
process assembly {
input:
file (qc) from ch_fastqc_raw_for_assembly
set replicate_id, file(reads) from ch_read_files_for_assembly
output:
file("${replicate_id}.assembly") into ch_assembly_for_multiqc
script:
"""
echo "ASSEMBLY ${replicate_id} ; " > ${replicate_id}.assembly
"""
}
include { fastqc;
qc1;
assembly
} from './modules/local/utils.nf'
process workflow_summary {
output:
file 'workflow_summary_mqc.yaml' into ch_workflow_summary_yaml
exec:
def yaml_file = task.workDir.resolve('workflow_summary_mqc.yaml')
yaml_file.text = """
id: 'summary'
description: " - this information is collected when the pipeline is started."
section_name: 'Workflow Summary'
section_href: "${workflow.manifest.homePage}"
plot_type: 'html'
data: |
<dl class=\"dl-horizontal\">
${summary.collect { k,v -> " <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")}
</dl>
""".stripIndent()
}
include { workflow_summary;
multiqc;
output_documentation;
} from './modules/local/report.nf'
/*
* STEP - MultiQC
*/
process multiqc {
publishDir "${params.outdir}/MultiQC", mode: 'copy'
when:
!params.skip_multiQC
input:
file (multiqc_config) from ch_multiqc_config
file ('fastqc/*') from ch_fastqc_results_for_multiqc.collect().ifEmpty([])
// TODO get-nf: Add in log files from your new processes for MultiQC to find!
file ('software_versions/*') from software_versions_yaml.collect()
file ('workflowSummary/*') from ch_workflow_summary_yaml.collect()
output:
file "*report.html" into ch_multiqc_report
file "*_data"
file "multiqc_plots"
script:
rtitle = custom_runName ? "--title \"$custom_runName\"" : ''
rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
"""
multiqc -f $rtitle $rfilename --config $multiqc_config .
"""
workflow {
fastqc(ch_read_files)
qc1(ch_read_files)
assembly(qc1.out, ch_read_files)
workflow_summary()
//multiqc(ch_multiqc_config, fastqc.out.collect().ifEmpty([]), workflow_summary.out.collect())
output_documentation(ch_output_docs)
}
/*
* STEP - Output Description HTML
*/
process output_documentation {
publishDir "${params.outdir}/pipeline_info", mode: 'copy'
input:
file output_docs from ch_output_docs
output:
file "results_description.html"
script:
"""
pandoc $output_docs -t html -o results_description.html
"""
}
/*
* Completion e-mail notification
......@@ -309,7 +191,7 @@ workflow.onComplete {
}
def email_fields = [:]
email_fields['version'] = workflow.manifest.version
email_fields['runName'] = custom_runName ?: workflow.runName
email_fields['runName'] = params.custom_runName ?: workflow.runName
email_fields['success'] = workflow.success
email_fields['dateComplete'] = workflow.complete
email_fields['duration'] = workflow.duration
......@@ -318,7 +200,7 @@ workflow.onComplete {
email_fields['errorReport'] = (workflow.errorReport ?: 'None')
email_fields['commandLine'] = workflow.commandLine
email_fields['projectDir'] = workflow.projectDir
email_fields['summary'] = summary
email_fields['summary'] = params.summary
println(workflow)
email_fields['summary']['Date Started'] = 11 // workflow.start
......@@ -348,8 +230,12 @@ workflow.onComplete {
// Send the HTML e-mail
if (email_address) {
// Catch failures and try with plaintext
[ 'mail', '-s', subject, email_address ].execute() << email_txt
log.info "[$name_wf] Sent summary e-mail to $email_address (mail)"
try {
def sending = [ 'echo', '-e' , email_txt ].execute() | [ 'mail', '-s', subject, email_address ].execute()
log.debug "[$name_wf] Sent summary e-mail to $email_address (mail)"
} catch (all) {
log.error "[$name_wf] ERROR ON EMAIL SENDING TO $email_address !!"
}
log.info "$email_txt"
}
......
process workflow_summary {
output:
path('workflow_summary_mqc.yaml')
exec:
def yaml_file = task.workDir.resolve('workflow_summary_mqc.yaml')
yaml_file.text = """
id: 'summary'
description: " - this information is collected when the pipeline is started."
section_name: 'Workflow Summary'
section_href: "${workflow.manifest.homePage}"
plot_type: 'html'
data: |
<dl class=\"dl-horizontal\">
${params.summary.collect { k,v -> " <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")}
</dl>
""".stripIndent()
}
/*
* STEP - MultiQC
*/
process multiqc {
publishDir "${params.outdir}/MultiQC", mode: 'copy'
module 'bioinfo/MultiQC-v1.7'
when:
!params.skip_multiQC
input:
path(multiqc_config)
path('fastqc/*') //.collect().ifEmpty([])
// TODO get-nf: Add in log files from your new processes for MultiQC to find!
//path('software_versions/*') //.collect()
path('workflowSummary/*') //.collect()
output:
path("*report.html")
path("*_data")
path("multiqc_plots")
script:
rtitle = params.custom_runName ? "--title \"$params.custom_runName\"" : ''
rfilename = params.custom_runName ? "--filename " + params.custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
"""
multiqc -f $rtitle $rfilename --config $multiqc_config .
"""
}
/*
* STEP - Output Description HTML
*/
process output_documentation {
publishDir "${params.outdir}/pipeline_info", mode: 'copy'
module 'system/pandoc-2.1.3'
input:
path(output_docs)
output:
path("results_description.html")
script:
"""
pandoc $output_docs -t html -o results_description.html
"""
}
\ No newline at end of file
/*
* Parse software version numbers
*/
process get_software_versions {
publishDir "${params.outdir}/pipeline_info", mode: 'copy',
saveAs: { filename ->
if (filename.indexOf(".csv") > 0) filename
else null
}
output:
path('software_versions_mqc.yaml')
path("software_versions.csv")
script:
// TODO nf-core: Get all tools to print their version number here
"""
echo $workflow.manifest.version > v_pipeline.txt
echo $workflow.nextflow.version > v_nextflow.txt
fastqc --version > v_fastqc.txt
multiqc --version > v_multiqc.txt
scrape_software_versions.py &> software_versions_mqc.yaml
"""
}
/*
* STEP 1 - FastQC
*/
process fastqc {
tag "$name"
label 'process_medium'
publishDir "${params.outdir}/fastqc", mode: 'copy',
saveAs: { filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename" }
module 'bioinfo/FastQC_v0.11.7'
input:
tuple val(name), path(reads)
output:
path "*_fastqc.{zip,html}"
script:
"""
fastqc --quiet --threads $task.cpus $reads
"""
}
/*
* STEP 2 - Fake QC
*/
process qc1 {
module 'bioinfo/FastQC_v0.11.7'
input:
tuple val(replicate_id), path(reads)
output:
path("${replicate_id}.qc1")
script:
"""
echo "mkdir ${replicate_id} ; fastqc --nogroup --quiet -o ${replicate_id} --threads ${task.cpus} ${reads[0]} ${reads[1]}" > ${replicate_id}.qc1
"""
}
/*
* STEP 3 - Fake assembly
*/
process assembly {
input:
path(qc)
tuple val(replicate_id), path(reads)
output:
path("${replicate_id}.assembly")
script:
"""
echo "ASSEMBLY ${replicate_id} ; " > ${replicate_id}.assembly
"""
}
......@@ -10,7 +10,7 @@ params {
// Workflow flags
// TODO nf-core: Specify your pipeline's command line flags
inputdir = "./data"
inputdir = "./template/data"
samplesheet = "${params.inputdir}/samples.csv"
single_end = false
outdir = './results'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment