Commit ef122672 authored by Penom Nom's avatar Penom Nom
Browse files

Add "--casava" on FastQC.

parent 26e75587
......@@ -117,7 +117,7 @@ class Casava18 (NG6Workflow):
addrawfiles = self.add_component("AddRawFiles", [self.runobj, concatenatefastq.concat_files, self.args["compression"]])
# make some statistics on raw file
fastqc = self.add_component("FastQC", [fastqilluminafilter.fastq_files_filtered, True, True, self.runobj.name+"_fastqc.tar.gz"], parent = fastqilluminafilter)
fastqc = self.add_component("FastQC", [fastqilluminafilter.fastq_files_filtered, False, True, self.runobj.name+"_fastqc.tar.gz"], parent = fastqilluminafilter)
# contamination_search
if self.args["databank"]:
......@@ -131,4 +131,4 @@ class Casava18 (NG6Workflow):
# make some statistic on the alignement
alignmentstats = self.add_component("AlignmentStats", [bwa.bam_files], parent = bwa)
# process insert sizes
insertssizes = self.add_component("InsertsSizes",[bwa.bam_files], parent = bwa)
\ No newline at end of file
insertssizes = self.add_component("InsertsSizes",[bwa.bam_files, self.args["histogram_width"], self.args["min_pct"]], parent = bwa)
\ No newline at end of file
......@@ -102,3 +102,13 @@ keep_reads.flag = --keep
keep_reads.help = Keep reads which pass the Illumina filters or keep reads which not pass the Illumina filters (pass_illumina_filters|not_pass_illumina_filters)
keep_reads.default = pass_illumina_filters
keep_reads.choices = pass_illumina_filters|not_pass_illumina_filters
histogram_width.name = histogram_width
histogram_width.flag = --histogram_width
histogram_width.help = Explicitly sets the histogram width, overriding automatic truncation of histogram tail.
histogram_width.default = 500
min_pct.name = min_pct
min_pct.flag = --min_pct
min_pct.help = When generating the histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads.
min_pct.default = 0.01
\ No newline at end of file
......@@ -16,15 +16,18 @@
#
import os
import re
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFileList, InputFileList, Formats
from jflow.abstraction import MultiMap
from ng6.analysis import Analysis
from weaver.function import ShellFunction
from ng6.analysis import Analysis
from ng6.utils import Utils
class FastQC (Analysis):
......@@ -33,18 +36,23 @@ class FastQC (Analysis):
self.is_casava = is_casava
self.no_group = no_group
self.archive_name = archive_name
self.stdouts = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.input_files))
self.stderrs = OutputFileList(self.get_outputs('{basename_woext}.stderr', self.input_files))
if not is_casava:
self.stdouts = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.input_files))
self.stderrs = OutputFileList(self.get_outputs('{basename_woext}.stderr', self.input_files))
else:
group_prefix = Utils.get_group_basenames(self.input_files, "read")
self.stdouts = OutputFileList(self.get_outputs('{basename_woext}.stdout', group_prefix.keys()))
self.stderrs = OutputFileList(self.get_outputs('{basename_woext}.stderr', group_prefix.keys()))
def define_analysis(self):
self.name = "ReadsStats"
self.description = "Statistics on reads and their qualities."
self.software = "fastqc"
self.options = ""
if self.is_casava:
self.options += " --casava"
if self.no_group:
self.options += " --nogroup"
if self.is_casava:
self.options += " --casava"
def __parse_summary_file (self, summary_file):
"""
......@@ -145,7 +153,22 @@ class FastQC (Analysis):
return stdout.split()[1]
def process(self):
fastqc = ShellFunction(self.get_exec_path("fastqc") + " " + self.options + " --outdir " + \
self.output_directory + " $1 > $2 2> $3", cmd_format='{EXE} {IN} {OUT}')
fastqc = MultiMap(fastqc, self.input_files, [self.stdouts, self.stderrs])
output_pos = 0
for output in self.stdouts:
file_group = []
# Set prefix
reg = re.search("(.+).stdout$", output)
basename = os.path.basename(reg.group(1))
# Build fastq list for sample read
for file in self.input_files:
if (os.path.basename(file)).startswith(basename):
file_group.append(file)
# Create cmd
[cmd_inputs_pattern, next_arg_number] = Utils.get_argument_pattern(file_group, 1)
fastqc = ShellFunction(self.get_exec_path("fastqc") + " --outdir " + self.output_directory + " " + self.options + " " + cmd_inputs_pattern + " > $" + str(next_arg_number) + " 2> $" + str(next_arg_number+1), cmd_format='{EXE} {IN} {OUT}')
fastqc(inputs = file_group, outputs = [self.stdouts[output_pos], self.stderrs[output_pos]])
output_pos += 1
\ No newline at end of file
......@@ -40,10 +40,10 @@ class FastqIlluminaFilter (Analysis):
self.group_prefix = group_prefix
# Outputs list if the file is not zip
if not self.fastq_files[0].endswith(".gz"):
self.fastq_files_filtered = OutputFileList(self.get_outputs('{basename_woext}_filtered.fastq', self.fastq_files), Formats.FASTQ)
self.fastq_files_filtered = OutputFileList(self.get_outputs('{basename_woext}.fastq', self.fastq_files), Formats.FASTQ)
# Outputs list if the file is zip
else:
self.fastq_files_filtered = OutputFileList(self.get_outputs('{basename_woext}_filtered.fastq.gz', self.fastq_files), Formats.FASTQ)
self.fastq_files_filtered = OutputFileList(self.get_outputs('{basename_woext}.fastq.gz', self.fastq_files), Formats.FASTQ)
def define_analysis(self):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment