Commit 81ae65ef authored by Jules Sabban's avatar Jules Sabban
Browse files

Merge branch 'dsl2' into 'master'

DSL2

See merge request !2
parents 187eb782 dada582b
Pipeline #42454 failed with stages
in 6 seconds
1,MT,./data/MT_rep1_1_Ch6.fastq.gz,./data/MT_rep1_2_Ch6.fastq.gz
\ No newline at end of file
1,MT,./template/data/MT_rep1_1_Ch6.fastq.gz,./template/data/MT_rep1_2_Ch6.fastq.gz
\ No newline at end of file
#!/usr/bin/env nextflow
/*
Copyright INRAE 2021
This software is a computer program whose purpose is to
analyze high-throughput sequencing data.
You can use, modify and/ or redistribute the software under the terms
of license (see the LICENSE file for more details).
The software is distributed in the hope that it will be useful,
but "AS IS" WITHOUT ANY WARRANTY OF ANY KIND.
Users are therefore encouraged to test the software's suitability as regards
their requirements in conditions enabling the security of their systems and/or data.
The fact that you are presently reading this means that you have had knowledge
of the license and that you accept its terms.
This script is based on :
- the nf-core guidelines . See https://nf-co.re/ for more information
- the Curie institute template https://github.com/bioinfo-pf-curie/geniac-template/
*/
/*
========================================================================================
GeT/template
========================================================================================
GeT/template Analysis Pipeline.
#### Homepage / Documentation
https://github.com/get-nf/template
----------------------------------------------------------------------------------------
*/
def helpMessage() {
log.info"""
Usage:
The typical command for running the pipeline is as follows:
nextflow run get-nf/template --inputdir '/path/to/data' --samplesheet 'samples.csv' -profile docker
Mandatory arguments:
--inputdir Path to input directory
-profile Configuration profile to use. Can use multiple (comma separated)
Available: conda, docker, singularity, path, genotoul, test and more.
Options:
--samplesheet Default inputdir/samples.csv eg: SAMPLE_ID,SAMPLE_NAME,path/to/R1/fastq/file,path/to/R2/fastq/file (for paired-end only)
--contaminant Name of iGenomes // To be discussed ????
--outdir The output directory where the results will be saved
--email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
--email_on_fail Same as --email, except only send mail if the workflow is not successful
--maxMultiqcEmailFileSize Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
-name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
=======================================================
Available profiles
-profile test Run the test dataset
-profile conda Build a new conda environment before running the pipeline. Use `--condaCacheDir` to define the conda cache path
-profile path Use the installation path defined for all tools. Use `--globalPath` to define the installation path
-profile docker Use the Docker images for each process
-profile singularity Use the singularity images for each process
-profile genologin Run the workflow on the cluster, instead of locally
""".stripIndent()
}
// Show help message
if (params.help) {
helpMessage()
exit 0
}
// NOTE - THIS IS NOT USED IN THIS PIPELINE, EXAMPLE ONLY
/*
* Create a channel for input read files
*/
// If you want to use the channel below in a process, define the following:
// input:
// file dir from inputDirCh
//
ch_inputdir = params.inputdir ? Channel.fromPath(params.inputdir, checkIfExists: true) : Channel.empty()
// Create a channel for input read files
if(params.samplesheet){
if(params.single_end){
Channel
.from(file("${params.samplesheet}"))
.splitCsv(header: false)
.map{ row -> [ row[0], [file(row[2])]] }
.into { ch_read_files_for_fastqc; ch_read_files_for_qc1; ch_read_files_for_assembly}
}else{
Channel
.from(file("${params.samplesheet}"))
.splitCsv(header: false)
.map{ row -> [ row[0], [file(row[2]), file(row[3])]] }
.into { ch_read_files_for_fastqc; ch_read_files_for_qc1; ch_read_files_for_assembly}
}
params.reads=false
} else {
exit 1, "Expect a samplesheet and an input dir !"
}
/*
* SET UP CONFIGURATION VARIABLES
*/
// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
custom_runName = params.name
if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
custom_runName = workflow.runName
}
// Stage config files
ch_multiqc_config = file(params.multiqc_config, checkIfExists: true)
ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true)
def summary = [:]
if (workflow.revision) summary['Pipeline Release'] = workflow.revision
summary['Run Name'] = custom_runName ?: workflow.runName
// TODO nf-core: Report custom parameters here
summary['Input dir'] = params.inputdir
summary['Sample sheet'] = params.samplesheet
summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End'
summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job"
if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container"
summary['Output dir'] = params.outdir
summary['Launch dir'] = workflow.launchDir
summary['Working dir'] = workflow.workDir
summary['Script dir'] = workflow.projectDir
summary['User'] = workflow.userName
if (workflow.profile == 'awsbatch') {
summary['AWS Region'] = params.awsregion
summary['AWS Queue'] = params.awsqueue
}
summary['Config Profile'] = workflow.profile
if (params.email || params.email_on_fail) {
summary['E-mail Address'] = params.email
summary['E-mail on failure'] = params.email_on_fail
}
log.info "-\033[2m--------------------------------------------------\033[0m-"
log.info "-\033[2m----------------"+ workflow.manifest.name +" --\033[0m-"
log.info "-\033[2m--------------------------------------------------\033[0m-"
log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n")
log.info "-\033[2m--------------------------------------------------\033[0m-"
/*
* Parse software version numbers
*/
process get_software_versions {
publishDir "${params.outdir}/pipeline_info", mode: 'copy',
saveAs: { filename ->
if (filename.indexOf(".csv") > 0) filename
else null
}
output:
file 'software_versions_mqc.yaml' into software_versions_yaml
file "software_versions.csv"
script:
// TODO nf-core: Get all tools to print their version number here
"""
echo $workflow.manifest.version > v_pipeline.txt
echo $workflow.nextflow.version > v_nextflow.txt
fastqc --version > v_fastqc.txt
multiqc --version > v_multiqc.txt
scrape_software_versions.py &> software_versions_mqc.yaml
"""
}
/*
* STEP 1 - FastQC
*/
process fastqc {
tag "$name"
label 'process_medium'
publishDir "${params.outdir}/fastqc", mode: 'copy',
saveAs: { filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename" }
input:
set val(name), file(reads) from ch_read_files_for_fastqc
output:
file "*_fastqc.{zip,html}" into ch_fastqc_results_for_multiqc
script:
"""
fastqc --quiet --threads $task.cpus $reads
"""
}
/*
* STEP 2 - Fake QC
*/
process qc1 {
input:
set replicate_id, file(reads) from ch_read_files_for_qc1
output:
file("${replicate_id}.qc1") into ch_fastqc_raw_for_assembly
script:
"""
echo "mkdir ${replicate_id} ; fastqc --nogroup --quiet -o ${replicate_id} --threads ${task.cpus} ${reads[0]} ${reads[1]}" > ${replicate_id}.qc1
"""
}
/*
* STEP 3 - Fake assembly
*/
process assembly {
input:
file (qc) from ch_fastqc_raw_for_assembly
set replicate_id, file(reads) from ch_read_files_for_assembly
output:
file("${replicate_id}.assembly") into ch_assembly_for_multiqc
script:
"""
echo "ASSEMBLY ${replicate_id} ; " > ${replicate_id}.assembly
"""
}
process workflow_summary {
output:
file 'workflow_summary_mqc.yaml' into ch_workflow_summary_yaml
exec:
def yaml_file = task.workDir.resolve('workflow_summary_mqc.yaml')
yaml_file.text = """
id: 'summary'
description: " - this information is collected when the pipeline is started."
section_name: 'Workflow Summary'
section_href: "${workflow.manifest.homePage}"
plot_type: 'html'
data: |
<dl class=\"dl-horizontal\">
${summary.collect { k,v -> " <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")}
</dl>
""".stripIndent()
}
/*
* STEP - MultiQC
*/
process multiqc {
publishDir "${params.outdir}/MultiQC", mode: 'copy'
when:
!params.skip_multiQC
input:
file (multiqc_config) from ch_multiqc_config
file ('fastqc/*') from ch_fastqc_results_for_multiqc.collect().ifEmpty([])
// TODO get-nf: Add in log files from your new processes for MultiQC to find!
file ('software_versions/*') from software_versions_yaml.collect()
file ('workflowSummary/*') from ch_workflow_summary_yaml.collect()
output:
file "*report.html" into ch_multiqc_report
file "*_data"
file "multiqc_plots"
script:
rtitle = custom_runName ? "--title \"$custom_runName\"" : ''
rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
"""
multiqc -f $rtitle $rfilename --config $multiqc_config .
"""
}
/*
* STEP - Output Description HTML
*/
process output_documentation {
publishDir "${params.outdir}/pipeline_info", mode: 'copy'
input:
file output_docs from ch_output_docs
output:
file "results_description.html"
script:
"""
pandoc $output_docs -t html -o results_description.html
"""
}
/*
* Completion e-mail notification
*/
workflow.onComplete {
// Set up the e-mail variables
def name_wf = workflow.manifest.name
def subject = "[$name_wf] Successful: $workflow.runName"
if (!workflow.success) {
subject = "[$name_wf] FAILED: $workflow.runName"
}
def email_fields = [:]
email_fields['version'] = workflow.manifest.version
email_fields['runName'] = custom_runName ?: workflow.runName
email_fields['success'] = workflow.success
email_fields['dateComplete'] = workflow.complete
email_fields['duration'] = workflow.duration
email_fields['exitStatus'] = workflow.exitStatus
email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
email_fields['errorReport'] = (workflow.errorReport ?: 'None')
email_fields['commandLine'] = workflow.commandLine
email_fields['projectDir'] = workflow.projectDir
email_fields['summary'] = summary
println(workflow)
email_fields['summary']['Date Started'] = 11 // workflow.start
email_fields['summary']['Date Completed'] = 11 // workflow.complete
email_fields['summary']['Pipeline script file path'] = 'aaa' //workflow.scriptFile
email_fields['summary']['Pipeline script hash ID'] = 'aaa' //workflow.scriptId
if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository
if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId
if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision
if (workflow.container) email_fields['summary']['Docker image'] = workflow.container
email_fields['summary']['Nextflow Version'] = workflow.nextflow.version
email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
// Check if we are only sending emails on failure
email_address = params.email
if (!params.email && params.email_on_fail && !workflow.success) {
email_address = params.email_on_fail
}
// Render the TXT template
def engine = new groovy.text.GStringTemplateEngine()
def tf = new File("$baseDir/assets/email_template.txt")
def txt_template = engine.createTemplate(tf).make(email_fields)
def email_txt = txt_template.toString()
// Send the HTML e-mail
if (email_address) {
// Catch failures and try with plaintext
try {
def sending = [ 'echo', '-e' , email_txt ].execute() | [ 'mail', '-s', subject, email_address ].execute()
log.debug "[$name_wf] Sent summary e-mail to $email_address (mail)"
} catch (all) {
log.error "[$name_wf] ERROR ON EMAIL SENDING TO $email_address !!"
}
log.info "$email_txt"
}
// Write summary e-mail HTML to a file
def output_d = new File( "${params.outdir}/pipeline_info/" )
if (!output_d.exists()) {
output_d.mkdirs()
}
def output_tf = new File( output_d, "pipeline_report.txt" )
output_tf.withWriter { w -> w << email_txt }
c_green = params.monochrome_logs ? '' : "\033[0;32m";
c_purple = params.monochrome_logs ? '' : "\033[0;35m";
c_red = params.monochrome_logs ? '' : "\033[0;31m";
c_reset = params.monochrome_logs ? '' : "\033[0m";
if (workflow.stats.ignoredCount > 0 && workflow.success) {
log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}"
log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}"
log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}"
}
if (workflow.success) {
log.info "-${c_purple}[${name_wf}]${c_green} Pipeline completed successfully${c_reset}"
} else {
log.info "-${c_purple}[${name_wf}]${c_red} Pipeline completed with errors${c_reset}"
}
}
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
/*
Copyright INRAE 2021
This software is a computer program whose purpose is to
analyze high-throughput sequencing data.
You can use, modify and/ or redistribute the software under the terms
of license (see the LICENSE file for more details).
The software is distributed in the hope that it will be useful,
but "AS IS" WITHOUT ANY WARRANTY OF ANY KIND.
Users are therefore encouraged to test the software's suitability as regards
their requirements in conditions enabling the security of their systems and/or data.
The fact that you are presently reading this means that you have had knowledge
of the license and that you accept its terms.
This script is based on :
- the nf-core guidelines . See https://nf-co.re/ for more information
- the Curie institute template https://github.com/bioinfo-pf-curie/geniac-template/
*/
/*
========================================================================================
NAMED WORKFLOW FOR PIPELINE
========================================================================================
*/
include { techno_qc } from './workflow/techno_qc.nf'
workflow QC_ANALYSIS {
techno_qc()
}
/*
========================================================================================
RUN ALL WORKFLOWS
========================================================================================
*/
workflow {
QC_ANALYSIS()
}
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
process workflow_summary {
output:
path('workflow_summary_mqc.yaml')
exec:
def yaml_file = task.workDir.resolve('workflow_summary_mqc.yaml')
yaml_file.text = """
id: 'summary'
description: " - this information is collected when the pipeline is started."
section_name: 'Workflow Summary'
section_href: "${workflow.manifest.homePage}"
plot_type: 'html'
data: |
<dl class=\"dl-horizontal\">
${params.summary.collect { k,v -> " <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")}
</dl>
""".stripIndent()
}
/*
* STEP - MultiQC
*/
process multiqc {
publishDir "${params.outdir}/MultiQC", mode: 'copy'
module 'bioinfo/MultiQC-v1.7'
when:
!params.skip_multiQC
input:
path(multiqc_config)
path('fastqc/*') //.collect().ifEmpty([])
// TODO get-nf: Add in log files from your new processes for MultiQC to find!
//path('software_versions/*') //.collect()
path('workflowSummary/*') //.collect()
output:
path("*report.html")
path("*_data")
path("multiqc_plots")
script:
rtitle = params.custom_runName ? "--title \"$params.custom_runName\"" : ''
rfilename = params.custom_runName ? "--filename " + params.custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
"""
multiqc -f $rtitle $rfilename --config $multiqc_config .
"""
}
/*
* STEP - Output Description HTML
*/
process output_documentation {
publishDir "${params.outdir}/pipeline_info", mode: 'copy'
module 'system/pandoc-2.1.3'
input:
path(output_docs)
output:
path("results_description.html")
script:
"""
pandoc $output_docs -t html -o results_description.html
"""
}
\ No newline at end of file
/*
* Parse software version numbers
*/
process get_software_versions {
publishDir "${params.outdir}/pipeline_info", mode: 'copy',
saveAs: { filename ->
if (filename.indexOf(".csv") > 0) filename
else null
}
output:
path('software_versions_mqc.yaml')
path("software_versions.csv")
script:
// TODO nf-core: Get all tools to print their version number here
"""
echo $workflow.manifest.version > v_pipeline.txt
echo $workflow.nextflow.version > v_nextflow.txt
fastqc --version > v_fastqc.txt
multiqc --version > v_multiqc.txt
scrape_software_versions.py &> software_versions_mqc.yaml
"""
}
/*
* STEP 1 - FastQC
*/
process fastqc {
tag "$name"
label 'process_medium'
publishDir "${params.outdir}/fastqc", mode: 'copy',
saveAs: { filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename" }
module 'bioinfo/FastQC_v0.11.7'
input:
tuple val(name), path(reads)
output:
path "*_fastqc.{zip,html}"
script:
"""
fastqc --quiet --threads $task.cpus $reads
"""
}
/*
* STEP 2 - Fake QC
*/
process qc1 {
module 'bioinfo/FastQC_v0.11.7'
input:
tuple val(replicate_id), path(reads)
output:
path("${replicate_id}.qc1")
script:
"""
echo "mkdir ${replicate_id} ; fastqc --nogroup --quiet -o ${replicate_id} --threads ${task.cpus} ${reads[0]} ${reads[1]}" > ${replicate_id}.qc1
"""
}
/*
* STEP 3 - Fake assembly
*/
process assembly {
input:
path(qc)
tuple val(replicate_id), path(reads)
output:
path("${replicate_id}.assembly")
script:
"""
echo "ASSEMBLY ${replicate_id} ; " > ${replicate_id}.assembly
"""
}
......@@ -10,7 +10,7 @@ params {
// Workflow flags
// TODO nf-core: Specify your pipeline's command line flags
inputdir = "./data"
inputdir = "./template/data"
samplesheet = "${params.inputdir}/samples.csv"
single_end = false
outdir = './results'
......@@ -20,7 +20,7 @@ params {
name = false
multiqc_config = "$baseDir/assets/multiqc_config.yaml"
tracedir = "${params.outdir}/pipeline_info"
email = false
email = 'claire.kuchly@inrae.fr'
email_on_fail = false
monochrome_logs = false
help = false
......
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
include { workflow_summary;
multiqc;
output_documentation; } from '../../modules/local/modules_report.nf'
workflow report {
take:
ch_output_docs
main:
workflow_summary()
//multiqc(ch_multiqc_config, fastqc.out.collect().ifEmpty([]), workflow_summary.out.collect())
output_documentation(ch_output_docs)
}
\ No newline at end of file
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
def helpMessage() {
log.info"""
Usage:
The typical command for running the pipeline is as follows:
nextflow run get-nf/template --inputdir '/path/to/data' --samplesheet 'samples.csv' -profile docker
Mandatory arguments:
--inputdir Path to input directory
-profile Configuration profile to use. Can use multiple (comma separated)
Available: conda, docker, singularity, path, genotoul, test and more.
Options:
--samplesheet Default inputdir/samples.csv eg: SAMPLE_ID,SAMPLE_NAME,path/to/R1/fastq/file,path/to/R2/fastq/file (for paired-end only)
--contaminant Name of iGenomes // To be discussed ????
--outdir The output directory where the results will be saved
--email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
--email_on_fail Same as --email, except only send mail if the workflow is not successful