main.nf 6.66 KB
Newer Older
Celine Noirot's avatar
Celine Noirot committed
1
#!/usr/bin/env nextflow
Celine Noirot's avatar
Celine Noirot committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


/*
Copyright INRAE 2021

This software is a computer program whose purpose is to
analyze high-throughput sequencing data.
You can use, modify and/ or redistribute the software under the terms
of license (see the LICENSE file for more details).
The software is distributed in the hope that it will be useful,
but "AS IS" WITHOUT ANY WARRANTY OF ANY KIND.
Users are therefore encouraged to test the software's suitability as regards
their requirements in conditions enabling the security of their systems and/or data.
The fact that you are presently reading this means that you have had knowledge
of the license and that you accept its terms.
This script is based on : 
 - the nf-core guidelines . See https://nf-co.re/ for more information
 - the institut cury template https://github.com/bioinfo-pf-curie/geniac-template/

*/


Celine Noirot's avatar
Celine Noirot committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
/*
========================================================================================
                         GeT/template
========================================================================================
 GeT/template Analysis Pipeline.
 #### Homepage / Documentation
 https://github.com/nf-core/template
----------------------------------------------------------------------------------------
*/

def helpMessage() {
    log.info"""

    Usage:

    The typical command for running the pipeline is as follows:

Celine Noirot's avatar
Celine Noirot committed
41
    nextflow run nf-core/template --inputdir '/path/to/data' --samplesheet 'samples.csv' -profile docker
Celine Noirot's avatar
Celine Noirot committed
42
43

    Mandatory arguments:
Celine Noirot's avatar
Celine Noirot committed
44
      --inputdir                    Path to input directory 
Celine Noirot's avatar
Celine Noirot committed
45
      -profile                      Configuration profile to use. Can use multiple (comma separated)
Celine Noirot's avatar
Celine Noirot committed
46
                                    Available: conda, docker, singularity, path, genotoul, test and more.
Celine Noirot's avatar
Celine Noirot committed
47
48

    Options:
Celine Noirot's avatar
Celine Noirot committed
49
50
      --samplesheet                 Default inputdir/samples.csv eg: SAMPLE_ID,SAMPLE_NAME,path/to/R1/fastq/file,path/to/R2/fastq/file (for paired-end only)
      --contaminant                 Name of iGenomes // To be discussed ????
Celine Noirot's avatar
Celine Noirot committed
51
52
53
54
      --outdir                      The output directory where the results will be saved
      --email                       Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
      --email_on_fail               Same as --email, except only send mail if the workflow is not successful
      --maxMultiqcEmailFileSize     Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
Celine Noirot's avatar
Celine Noirot committed
55
56
57
58
59
60
61
62
63
64
65
      -name [str]                   Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.


    =======================================================
    Available profiles
    -profile test                    Run the test dataset
    -profile conda                   Build a new conda environment before running the pipeline. Use `--condaCacheDir` to define the conda cache path
    -profile path                    Use the installation path defined for all tools. Use `--globalPath` to define the installation path
    -profile docker                  Use the Docker images for each process
    -profile singularity             Use the singularity images for each process
    -profile genologin               Run the workflow on the cluster, instead of locally
Celine Noirot's avatar
Celine Noirot committed
66
67
68
69
70
71
72
73
74
75

    """.stripIndent()
}

// Show help message
if (params.help) {
    helpMessage()
    exit 0
}

Celine Noirot's avatar
Celine Noirot committed
76

Celine Noirot's avatar
Celine Noirot committed
77
// NOTE - THIS IS NOT USED IN THIS PIPELINE, EXAMPLE ONLY
Celine Noirot's avatar
Celine Noirot committed
78
79
80
81

/*
 * Create a channel for input read files
 */
Celine Noirot's avatar
Celine Noirot committed
82
83
// If you want to use the channel below in a process, define the following:
//   input:
Celine Noirot's avatar
Celine Noirot committed
84
//   file dir from inputDirCh
Celine Noirot's avatar
Celine Noirot committed
85
86
87
//


Celine Noirot's avatar
Celine Noirot committed
88
89
90
91
92
93
94
95
96
97
98
inputDirCh = Channel.fromPath(params.inputdir, checkIfExists: true) : Channel.empty()
if (! params.samplesheet){
    params.samplesheet = params.inputdir + "/samples.csv"
}

samplesheetCh = Channel.fromPath(params.samplesheet, checkIfExists: true) : Channel.empty()


// Create a channel for input read files
if(params.samplesheet){
  if(params.singleEnd){
Celine Noirot's avatar
Celine Noirot committed
99
    Channel
Celine Noirot's avatar
Celine Noirot committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
      .from(file("${params.samplePlan}"))
      .splitCsv(header: false)
      .map{ row -> [ row[0], [file(row[2])]] }
      .set { rawReadsFastqcCh }
  }else{
    Channel
      .from(file("${params.samplePlan}"))
      .splitCsv(header: false)
      .map{ row -> [ row[0], [file(row[2]), file(row[3])]] }
      .set { rawReadsFastqcCh }
   }
  params.reads=false
}
else if(params.readPaths){
  if(params.singleEnd){
    Channel
      .from(params.readPaths)
      .map { row -> [ row[0], [file(row[1][0])]] }
      .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied." }
      .set { rawReadsFastqcCh }
  } else {
    Channel
      .from(params.readPaths)
      .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] }
      .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied." }
      .set { rawReadsFastqcCh }
  }
} else {
  Channel
    .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 )
    .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nNB: Path requires at least one * wildcard!\nIf this is single-end data, please specify --singleEnd on the command line." }
    .set { rawReadsFastqcCh }
Celine Noirot's avatar
Celine Noirot committed
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
}

/* In case of modular pipeline*/
params.step = "qc"
availableStepList = 
    [
        'qc',
        'assembly',
        'filtering',
        'binning'
    ]

/*Check if step exist in stepList*/
step = params.step.split(",")
for (String a_step: step) {
    assert (a_step in availableStepList)
Celine Noirot's avatar
Celine Noirot committed
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
}

/*
 * STEP 1 - FastQC
 */
process fastqc {
    tag "$name"
    label 'process_medium'
    publishDir "${params.outdir}/fastqc", mode: 'copy',
        saveAs: { filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename" }

    input:
    set val(name), file(reads) from read_files_fastqc

    output:
    file "*_fastqc.{zip,html}" into fastqc_results

    script:
    """
    fastqc --quiet --threads $task.cpus $reads
    """
}

Celine Noirot's avatar
Celine Noirot committed
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
/*
 * STEP 2 - Fake QC
 */
process qc1 {
    input:
    set replicateId, file(reads) from raw_reads_fastqc

    output:
    file("${replicateId}.qc1") into fastqc_raw_ch_for_multiqc

    when: "qc" in step

    script:
    """
        echo "mkdir ${replicateId} ; fastqc --nogroup --quiet -o ${replicateId} --threads ${task.cpus} ${reads[0]} ${reads[1]}" > ${replicateId}.qc1
    """
}

/*
 * STEP 3 - Fake assembly
 */
process assembly {
    input:
    set file (qc) from fastqc_raw_ch_for_multiqc
    set replicateId, file(reads) from raw_reads_assembly

    output:
    file("${replicateId}.assembly") into assembly_ch

    when: "assembly" in step

    script:
    """
        echo "ASSEMBLY ${replicateId} ; " > ${replicateId}.assembly
    """
}