Commit eb95e0a4 authored by Jean Mainguy's avatar Jean Mainguy
Browse files

add hifi reads subworflow and add filtering step for hifi reads

parent fef2c774
...@@ -15,12 +15,17 @@ module_order: ...@@ -15,12 +15,17 @@ module_order:
- fastqc: - fastqc:
name: 'FastQC' name: 'FastQC'
path_filters: path_filters:
- '*hifi_*.zip' - '*fastqc.zip'
- quast: - quast:
name: 'Quast primary assembly' name: 'Quast primary assembly'
info: 'This section of the report shows quast results after assembly' info: 'This section of the report shows quast results after assembly'
path_filters: path_filters:
- '*quast_hifi/*/report.tsv' - '*quast_hifi/*/report.tsv'
- quast:
name: 'Quast filtered assembly'
info: 'This section of the report shows quast results after filtering of assembly'
path_filters:
- '*quast_filtered/*/report.tsv'
- prokka - prokka
- featureCounts - featureCounts
......
...@@ -13,9 +13,8 @@ nextflow.enable.dsl = 2 ...@@ -13,9 +13,8 @@ nextflow.enable.dsl = 2
include { SHARED as SH } from './subworkflows/shared' include { SHARED as SH } from './subworkflows/shared'
include { SHORT_READS as SR } from './subworkflows/short_reads' include { SHORT_READS as SR } from './subworkflows/short_reads'
include { HIFI_READS } from './subworkflows/hifi_reads'
include { DATABASES } from './subworkflows/00_databases' include { DATABASES } from './subworkflows/00_databases'
include { FASTQC_HIFI as S04_HIFI_FASTQC } from './modules/fastqc'
include { HIFI_QUAST as S04_HIFI_QUAST } from './modules/metaquast'
include { GET_SOFTWARE_VERSIONS } from './modules/get_software_versions' include { GET_SOFTWARE_VERSIONS } from './modules/get_software_versions'
include { MULTIQC } from './modules/multiqc' include { MULTIQC } from './modules/multiqc'
...@@ -273,11 +272,18 @@ workflow { ...@@ -273,11 +272,18 @@ workflow {
ch_inputs.map { item -> [ item.sample, item.fastq_1 ] } // [sample, reads] ch_inputs.map { item -> [ item.sample, item.fastq_1 ] } // [sample, reads]
.set { ch_reads } .set { ch_reads }
S04_HIFI_FASTQC( ch_reads ) HIFI_READS (
S04_HIFI_QUAST( ch_assembly ) ch_reads,
ch_hifi_fastqc_report = S04_HIFI_FASTQC.out.zip ch_assembly,
ch_hifi_quast_report = S04_HIFI_QUAST.out.report ch_multiqc_config
)
ch_hifi_fastqc_report = HIFI_READS.out.fastqc_report
ch_hifi_quast_report = HIFI_READS.out.quast_report
ch_filtered_report = HIFI_READS.out.quast_report_filtered
ch_assembly = HIFI_READS.out.assembly
} }
else { else {
......
...@@ -38,7 +38,7 @@ process FASTQC_CLEANED { ...@@ -38,7 +38,7 @@ process FASTQC_CLEANED {
process FASTQC_HIFI { process FASTQC_HIFI {
tag "${sampleId}" tag "${sampleId}"
label 'FASTQC' label 'FASTQC'
publishDir "${params.outdir}/04_structural_annot/fastqc_hifi", mode: 'copy' publishDir "${params.outdir}/01_clean_qc/fastqc_hifi", mode: 'copy'
input: input:
tuple val(sampleId), path(read) tuple val(sampleId), path(read)
......
...@@ -48,4 +48,30 @@ process MINIMAP2 { ...@@ -48,4 +48,30 @@ process MINIMAP2 {
samtools idxstats ${sampleId}.sort.bam > ${sampleId}.sort.bam.idxstats samtools idxstats ${sampleId}.sort.bam > ${sampleId}.sort.bam.idxstats
""" """
} }
\ No newline at end of file
process MINIMAP2_FILTERING {
tag "${sampleId}"
publishDir "${params.outdir}/02_assembly/logs/", mode: 'copy'
input:
tuple val(sampleId), path(assembly), path(reads)
output:
tuple val(sampleId), path("${sampleId}.idxstats"), emit: sam_idxstat
path "${sampleId}*"
script:
"""
# align reads to contigs, keep only primary aln and sort resulting bam
minimap2 -t ${task.cpus} -ax asm20 $assembly $reads | samtools view -@ ${task.cpus} -b -F 2304 | samtools sort -@ ${task.cpus} -o ${sampleId}.sort.bam
samtools index ${sampleId}.sort.bam -@ ${task.cpus}
samtools flagstat -@ ${task.cpus} ${sampleId}.sort.bam > ${sampleId}.flagstat
samtools coverage ${sampleId}.sort.bam > ${sampleId}_coverage.tsv
samtools idxstats ${sampleId}.sort.bam > ${sampleId}.idxstats
rm ${sampleId}.sort.bam*
"""
}
include { ASSEMBLY_FILTER as S03_FILTERING } from './03_filtering'
include { FASTQC_HIFI } from '../modules/fastqc'
include { ASSEMBLY_QUAST } from '../modules/metaquast'
include { FILTERED_QUAST as S04_FILTERED_QUAST } from '../modules/metaquast'
include { MINIMAP2_FILTERING } from '../modules/read_alignment'
workflow HIFI_READS {
take:
ch_reads
ch_assembly
ch_multiqc_config
main:
ch_filtered_report = Channel.empty()
// stat on reads:
FASTQC_HIFI( ch_reads )
ch_fastqc_zip = FASTQC_HIFI.out.zip
// stat on assemblies
ASSEMBLY_QUAST( ch_assembly )
ch_quast_report = ASSEMBLY_QUAST.out.report
// filtering
if ( !params.skip_filtering ) {
// read alignment
ch_contigs_and_reads = ch_assembly
.join(ch_reads, remainder: true)
MINIMAP2_FILTERING(ch_contigs_and_reads)
ch_idxstats = MINIMAP2_FILTERING.out.sam_idxstat
ch_min_contigs_cpm = Channel.value(params.min_contigs_cpm)
ch_assembly
.splitFasta(by: 100000, file: true)
.set{ch_chunk_assembly_for_filter}
ch_chunk_assembly_for_filter
.combine(ch_idxstats, by:0)
.set{ch_assembly_and_idxstats}
S03_FILTERING (
ch_assembly_and_idxstats,
ch_min_contigs_cpm
)
ch_assembly = S03_FILTERING.out.selected
S04_FILTERED_QUAST( ch_assembly )
ch_filtered_report = S04_FILTERED_QUAST.out.report
}
emit:
fastqc_report = ch_fastqc_zip
quast_report = ch_quast_report
quast_report_filtered = ch_filtered_report
assembly = ch_assembly
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment