Commit 7648388e authored by ckuchly's avatar ckuchly
Browse files

1st commit : all the dev due to hackathon

parent 4544f7d6
Pipeline #42461 failed with stage
in 8 seconds
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
/*
Copyright INRAE 2021
......@@ -36,7 +37,6 @@ params.chemistry = 'sparse'
ch_SPARSE_ss = Channel.fromPath('/work/sbsuser/test/Dev_SPARSE/A1P3-maize/SS_barcode.csv')
ch_read=Channel
.fromPath(params.data+'/A3P2-PlaGe-Shallow-seq/Pool-Shallow-sequencing_S1_L001_R{1,2}_001.fastq.gz')
//.fromPath(params.data+'/TregThymus/**_R{1,2}_*.fastq.gz')
.map{$it -> [$it.simpleName, $it]}
.groupTuple()
......@@ -63,6 +63,7 @@ def helpMessage() {
--inputdir Path to input directory
-profile Configuration profile to use. Can use multiple (comma separated)
Available: conda, docker, singularity, path, genotoul, test and more.
--project Project name
Options:
--samplesheet Default inputdir/samples.csv eg: SAMPLE_ID,SAMPLE_NAME,path/to/R1/fastq/file,path/to/R2/fastq/file (for paired-end only)
......@@ -104,6 +105,15 @@ if (params.help) {
// file dir from inputDirCh
//
// -----------------------------
// Includes AFTER params !!
// -----------------------------
include { bar as FOO } from './modules/module_test.nf'
include {
Core as CORE;
} from './workflows/core_pipeline.nf'
ch_inputdir = params.inputdir ? Channel.fromPath(params.inputdir, checkIfExists: true) : Channel.empty()
......@@ -131,6 +141,11 @@ if(params.samplesheet){
*/
// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
project_name = params.project
if (params.project == '') {
exit 1, "Expect a project name for the pipeline !"
}
custom_runName = params.name
if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
custom_runName = workflow.runName
......
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
/*
* STEP - FGBIO DEMUXFASTQ
*/
process demuxSparse {
tag "$params.project"
module 'java8'
module 'bioinfo/fgbio-v1.3.0'
// Not used anymore
publishDir path: "${params.outdir}" , mode: 'copy'
input:
path multiSS
path rawdata
output:
path '*'
shell:
"""
java -jar ${FGBIO} $multiSS NovaSeq
"""
}
\ No newline at end of file
//params.sequencer = 'MiSeq'
//params.rawdata_location = '/home/sbsuser/work/Nextflow/wf-illumina-nf/data_test/MiSeq/210713_M07406_0007_000000000-JRCVF_bad'
params.outdir=''
banksForConta = [ ]
//mismatchNumber= params.sequencer == 'MiSeq'? 0 : 1
process decoupageSS {
// Not used anymore
publishDir path: "${params.outdir}/SampleSheets" , mode: 'copy'
input:
path multiSS
output:
path '*'
shell:
"""
extractReads.pl $multiSS NovaSeq
"""
}
process prepareReadSetCreation {
publishDir path: "${params.outdir}" , mode: 'copy'
input:
path sampleSheet
path runNGLBiCreated
output:
file 'readSetCreation.info'
script:
"""
extractInfoForReadSets.pl --sampleSheet $sampleSheet --runNGLBi $runNGLBiCreated
"""
}
process readsetNGLBiCreation {
publishDir path: "${params.outdir}/NGLBi" , mode: 'copy', pattern: '*.created'
executor = 'local'
beforeScript = "export ENV_NGL='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/'"
errorStrategy = { 'ignore' }
input :
path infoFile
output :
path 'ReadsetsNGL-Bi.created', emit: readSetFile
path 'ReadsetsNGL-BiCreation.log', emit: readSetLog
script :
"""
createNGLBiReadSets.pl --infoFile $infoFile --env_ngl_bi \$ENV_NGL 2> ReadsetsNGL-BiCreation.log 1> ReadsetsNGL-Bi.created
"""
}
process checkErrorFromNGLBi {
publishDir path: "${params.outdir}/NGLBi" , mode: 'copy'
input:
path logFile
output:
path 'ReadsetsNGL-BiCreation.log'
script:
"""
checkErrorNGLScripts.pl --file $logFile
"""
}
process maskMaker {
publishDir path: "${params.outdir}/Demux" , mode: 'copy'
input:
path SampleSheet
path RunInfoXML
output:
path 'Run.conf'
script:
"""
extractInfo.pl -s $SampleSheet -r $RunInfoXML
"""
}
process bcl2fastq {
publishDir path: "${params.outdir}/Demux/Files" , mode: 'copy'
echo=true
input:
path SampleSheet
path Runconf
val mismatchNumber
path rawdata_location
//output:
//path "*"
shell:
"""
mask=\$(grep 'MASQUE' !{Runconf} | cut -d'=' -f2)
echo "bcl2fastq -p 10 -r 4 -w 4 \${mask} --barcode-mismatches !{mismatchNumber} --output-dir ./ -R !{rawdata_location} --sample-sheet !{SampleSheet} -l DEBUG"
"""
}
process extractInfoForDemuxStats {
publishDir path: "${params.outdir}/Demux" , mode: 'copy'
input:
path SampleSheet
output:
path "*.indexNumber"
script:
"""
extractInfoForDemuxStats.pl --sampleSheet $SampleSheet
"""
}
process demultiplexStats {
publishDir path: "${params.outdir}/Demux" , mode: 'copy'
module 'system/R-4.0.4_gcc-9.3.0'
input:
path DemuxStatXML
path IndexNumberFile
path DemuxSummary
output:
path 'demultiplexStats.log', emit: log
path "DemultiplexStats_*", emit: demultiplexStatsCSV
script:
"""
Rscript /home/sbsuser/work/Nextflow/wf-illumina-nf/wf-illumina-nf/bin/demuxStatsFromXML.R --xml $DemuxStatXML --indexNumber $IndexNumberFile --demuxSum $DemuxSummary > demultiplexStats.log
"""
}
process fastqc {
publishDir path: "${params.outdir}/FastQC" , mode: 'copy'
errorStrategy { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
maxRetries 3
module 'bioinfo/FastQC_v0.11.7'
executor 'slurm'
queue 'wflowq'
cpus 1 //{ 1 * task.attempt }
time { 45.m * task.attempt }
memory '1.GB'
input:
tuple val(name), path(read)
output:
path "*_fastqc.{zip,html}" , emit: ch_fastqc_result
// path log files
script:
"""
fastqc -t $task.cpus --nogroup --noextract --outdir ./ ${read}
"""
}
process illuminaFilter {
publishDir path: "${params.outdir}/IlluminaFilter" , mode: 'copy', saveAs: { filename -> "${name}.fastq.gz" }
module 'bioinfo/fastq_illumina_filter-0.1'
executor 'slurm'
queue 'wflowq'
cpus { 1 * task.attempt }
time { 1.h * task.attempt }
memory '1.GB'
input:
tuple val(name), path(read)
output:
tuple val("$name"), path("*.fastq.gz"), emit: reads
path "*out", emit: log
script: // la sortie de gzip est redirige, donc peut etre que le -c est inutile...
"""
zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.out | gzip -c -f > good.fastq.gz
"""
}
process check_conta_bwa {
// aln command uses ~3.2GB memory and the sampe command uses ~5.4GB
module 'bioinfo/bwa-0.7.17'
time { 20.m * task.attempt }
memory { 10.GB * task.attempt }
input:
tuple val(name), path(read)
each genomeRef
output:
tuple val("${name}_${genomeName}"), path("*")
script:
genomeName=file(genomeRef).simpleName
"""
bwa aln $genomeRef $read 2>> ${name}_${genomeName}.err | bwa samse $genomeRef - $read > ${name}_${genomeName}.sam 2>> ${name}_${genomeName}.err
"""
//
}
process check_conta_samtools {
publishDir path: "${params.outdir}/CheckContamination" , mode: 'copy'
module 'bioinfo/samtools-1.9'
time { 10.m * task.attempt }
input:
tuple val(name), path("*")
script:
"""
samtools view -SF 260 ${name}.sam 2>> ${name}.err | cut -f1 - 2>> ${name}.err | sort - > ${name}.txt 2>> ${name}.err
"""
}
......@@ -15,6 +15,9 @@ params {
single_end = false
outdir = './results'
skip_multiQC = false
project = ''
chemistry = ''
// Boilerplate options
name = false
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment