Commit eb95e0a4 authored by Jean Mainguy's avatar Jean Mainguy
Browse files

add hifi reads subworflow and add filtering step for hifi reads

parent fef2c774
......@@ -15,12 +15,17 @@ module_order:
- fastqc:
name: 'FastQC'
path_filters:
- '*hifi_*.zip'
- '*fastqc.zip'
- quast:
name: 'Quast primary assembly'
info: 'This section of the report shows quast results after assembly'
path_filters:
- '*quast_hifi/*/report.tsv'
- quast:
name: 'Quast filtered assembly'
info: 'This section of the report shows quast results after filtering of assembly'
path_filters:
- '*quast_filtered/*/report.tsv'
- prokka
- featureCounts
......
......@@ -13,9 +13,8 @@ nextflow.enable.dsl = 2
include { SHARED as SH } from './subworkflows/shared'
include { SHORT_READS as SR } from './subworkflows/short_reads'
include { HIFI_READS } from './subworkflows/hifi_reads'
include { DATABASES } from './subworkflows/00_databases'
include { FASTQC_HIFI as S04_HIFI_FASTQC } from './modules/fastqc'
include { HIFI_QUAST as S04_HIFI_QUAST } from './modules/metaquast'
include { GET_SOFTWARE_VERSIONS } from './modules/get_software_versions'
include { MULTIQC } from './modules/multiqc'
......@@ -273,11 +272,18 @@ workflow {
ch_inputs.map { item -> [ item.sample, item.fastq_1 ] } // [sample, reads]
.set { ch_reads }
S04_HIFI_FASTQC( ch_reads )
S04_HIFI_QUAST( ch_assembly )
ch_hifi_fastqc_report = S04_HIFI_FASTQC.out.zip
ch_hifi_quast_report = S04_HIFI_QUAST.out.report
HIFI_READS (
ch_reads,
ch_assembly,
ch_multiqc_config
)
ch_hifi_fastqc_report = HIFI_READS.out.fastqc_report
ch_hifi_quast_report = HIFI_READS.out.quast_report
ch_filtered_report = HIFI_READS.out.quast_report_filtered
ch_assembly = HIFI_READS.out.assembly
}
else {
......
......@@ -38,7 +38,7 @@ process FASTQC_CLEANED {
process FASTQC_HIFI {
tag "${sampleId}"
label 'FASTQC'
publishDir "${params.outdir}/04_structural_annot/fastqc_hifi", mode: 'copy'
publishDir "${params.outdir}/01_clean_qc/fastqc_hifi", mode: 'copy'
input:
tuple val(sampleId), path(read)
......
......@@ -48,4 +48,30 @@ process MINIMAP2 {
samtools idxstats ${sampleId}.sort.bam > ${sampleId}.sort.bam.idxstats
"""
}
\ No newline at end of file
}
process MINIMAP2_FILTERING {
tag "${sampleId}"
publishDir "${params.outdir}/02_assembly/logs/", mode: 'copy'
input:
tuple val(sampleId), path(assembly), path(reads)
output:
tuple val(sampleId), path("${sampleId}.idxstats"), emit: sam_idxstat
path "${sampleId}*"
script:
"""
# align reads to contigs, keep only primary aln and sort resulting bam
minimap2 -t ${task.cpus} -ax asm20 $assembly $reads | samtools view -@ ${task.cpus} -b -F 2304 | samtools sort -@ ${task.cpus} -o ${sampleId}.sort.bam
samtools index ${sampleId}.sort.bam -@ ${task.cpus}
samtools flagstat -@ ${task.cpus} ${sampleId}.sort.bam > ${sampleId}.flagstat
samtools coverage ${sampleId}.sort.bam > ${sampleId}_coverage.tsv
samtools idxstats ${sampleId}.sort.bam > ${sampleId}.idxstats
rm ${sampleId}.sort.bam*
"""
}
include { ASSEMBLY_FILTER as S03_FILTERING } from './03_filtering'
include { FASTQC_HIFI } from '../modules/fastqc'
include { ASSEMBLY_QUAST } from '../modules/metaquast'
include { FILTERED_QUAST as S04_FILTERED_QUAST } from '../modules/metaquast'
include { MINIMAP2_FILTERING } from '../modules/read_alignment'
workflow HIFI_READS {
take:
ch_reads
ch_assembly
ch_multiqc_config
main:
ch_filtered_report = Channel.empty()
// stat on reads:
FASTQC_HIFI( ch_reads )
ch_fastqc_zip = FASTQC_HIFI.out.zip
// stat on assemblies
ASSEMBLY_QUAST( ch_assembly )
ch_quast_report = ASSEMBLY_QUAST.out.report
// filtering
if ( !params.skip_filtering ) {
// read alignment
ch_contigs_and_reads = ch_assembly
.join(ch_reads, remainder: true)
MINIMAP2_FILTERING(ch_contigs_and_reads)
ch_idxstats = MINIMAP2_FILTERING.out.sam_idxstat
ch_min_contigs_cpm = Channel.value(params.min_contigs_cpm)
ch_assembly
.splitFasta(by: 100000, file: true)
.set{ch_chunk_assembly_for_filter}
ch_chunk_assembly_for_filter
.combine(ch_idxstats, by:0)
.set{ch_assembly_and_idxstats}
S03_FILTERING (
ch_assembly_and_idxstats,
ch_min_contigs_cpm
)
ch_assembly = S03_FILTERING.out.selected
S04_FILTERED_QUAST( ch_assembly )
ch_filtered_report = S04_FILTERED_QUAST.out.report
}
emit:
fastqc_report = ch_fastqc_zip
quast_report = ch_quast_report
quast_report_filtered = ch_filtered_report
assembly = ch_assembly
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment