Commit f771571d authored by MARTIN Pierre's avatar MARTIN Pierre
Browse files

changes to assembly filter method with chunks + merge pdf in kaiju

parent 78b0af96
......@@ -466,7 +466,8 @@ if (!params.skip_kaiju && ('01_clean_qc' in step || '02_assembly' in step || '03
file("*.summary_class") into kaiju_summary_class_ch
file("*.summary_order") into kaiju_summary_order_ch
file("*.summary_phylum") into kaiju_summary_phylum_ch
file("*.pdf") into plot_ch
file("*_normalized.pdf") into normalized_pdf_ch
file("*_counts.pdf") into counts_pdf_ch
script:
"""
......@@ -492,9 +493,11 @@ if (!params.skip_kaiju && ('01_clean_qc' in step || '02_assembly' in step || '03
file(kaiju_class) from kaiju_summary_class_ch.collect()
file(kaiju_order) from kaiju_summary_order_ch.collect()
file(kaiju_phylum) from kaiju_summary_phylum_ch.collect()
file(kaiju_pdf) from normalized_pdf_ch.collect()
output:
file("taxo_affi_reads_*.tsv") into merge_files_kaiju_ch
file("taxo_barplots.pdf") into merged_pdf_ch
script:
"""
......@@ -508,6 +511,7 @@ if (!params.skip_kaiju && ('01_clean_qc' in step || '02_assembly' in step || '03
do
merge_kaiju_results.py -f \$i".txt" -o taxo_affi_reads_\$i".tsv"
done
pdfunite ${kaiju_pdf} taxo_barplots.pdf
"""
}
}
......@@ -627,27 +631,56 @@ process reads_deduplication {
"""
}
// Assembly filter
assembly_and_logs_ch = assembly_for_filter_ch.join(idxstats_filter_logs_ch, remainder: true)
assembly_for_filter_ch
.splitFasta(by: 100000, file: true)
.set{chunk_assembly_for_filter_ch}
chunk_assembly_for_filter_ch
.combine(idxstats_filter_logs_ch, by:0)
.set{assembly_and_logs_ch}
process assembly_filter {
publishDir "${params.outdir}/03_filtering", mode: 'copy'
publishDir "${params.outdir}/03_filtering/", mode: 'copy'
input:
set sampleId, file(assembly_file), file(idxstats) from assembly_and_logs_ch
val min_cpm from min_contigs_cpm_ch
output:
set sampleId, file("${sampleId}_select_contigs_cpm${min_cpm}.fasta") into select_assembly_ch
set sampleId, file("${sampleId}_select_contigs_cpm${min_cpm}.fasta") into select_assembly_ch, select_assembly_for_quast_ch
set sampleId, file("${sampleId}_discard_contigs_cpm${min_cpm}.fasta") into discard_assembly_ch
when: ('03_filtering' in step)
shell:
'''
chunk_name=`basename !{assembly_file} .fa`
grep "^>" !{assembly_file} | cut -f 1 -d " " | sed 's/^>//g' | awk '{print($1"\t")}' > $chunk_name'.names'
grep -f $chunk_name'.names' !{idxstats} > $chunk_name'_chunk.idxstats'
Filter_contig_per_cpm.py -i $chunk_name'_chunk.idxstats' -f !{assembly_file} -c !{min_cpm} -s "!{sampleId}_"$chunk_name"_select_contigs_cpm!{min_cpm}.fasta" -d "!{sampleId}_"$chunk_name"_discard_contigs_cpm!{min_cpm}.fasta"
cat !{sampleId}_*_select_contigs_cpm!{min_cpm}.fasta > !{sampleId}_select_contigs_cpm!{min_cpm}.fasta
cat !{sampleId}_*_discard_contigs_cpm!{min_cpm}.fasta > !{sampleId}_discard_contigs_cpm!{min_cpm}.fasta
'''
}
process quast_filtered {
publishDir "${params.outdir}/03_filtering/", mode: 'copy'
input:
set sampleId, file(fasta) from select_assembly_for_quast_ch
output:
set sampleId, file("${sampleId}_select_contigs_QC/report.tsv") into quast_select_contigs_for_multiqc_ch
file("${sampleId}_select_contigs_QC/*") into quast_select_contigs_ch
file("${sampleId}_select_contigs_QC/report.tsv") into quast_select_contigs_for_multiqc_ch
when: ('03_filtering' in step)
script:
"""
Filter_contig_per_cpm.py -i ${idxstats} -f ${assembly_file} -c ${min_cpm} -s ${sampleId}_select_contigs_cpm${min_cpm}.fasta -d ${sampleId}_discard_contigs_cpm${min_cpm}.fasta
metaquast.py --threads "${task.cpus}" --rna-finding --max-ref-number 0 --min-contig 0 "${sampleId}_select_contigs_cpm${min_cpm}.fasta" -o "${sampleId}_select_contigs_QC"
metaquast.py --threads ${task.cpus} --rna-finding --max-ref-number 0 --min-contig 0 ${fasta} -o "${sampleId}_select_contigs_QC"
"""
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment