Commit 7eb2bbee authored by maxime mano's avatar maxime mano
Browse files

Add the Runstats analysis for the ONT data

parent 5086a3c4
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
import json
import logging
import re
from ng6.analysis import Analysis
from subprocess import Popen, PIPE
from jflow.utils import get_argument_pattern
from ng6.utils import Utils
class Run_stats (Analysis):
"""
This module make some statistic from ONT run with graphs
"""
def define_parameters(self, sequencing_summary_file, archive_name="RunStats_archive.tar.gz"):
logging.getLogger("jflow").debug("Begin Run_stats parameters")
self.add_input_file( "sequencing_summary_file", "Input ont sequencing summary file from Albacore", default=sequencing_summary_file, file_format = "txt", required=True)
self.add_parameter("archive_name", "Archive name", default=archive_name)
self.add_output_file_list("stderr", "stderr ouput file",pattern='{basename_woext}.stderr', items = self.sequencing_summary_file)
def get_version(self):
#cmd = [self.get_exec_path("Rscript")," /save/sbsuser/analyses_scripts/mmanno/graph_albacoresummary.R"]
#p = Popen(cmd, stdout=PIPE, stderr=PIPE)
#stdout, stderr = p.communicate()
#return stdout.split()[1]
return "v1"
def define_analysis(self):
self.name = "RUNStats"
self.description = "Statistics on reads and their qualities with R."
self.software = "Rscript"
self.options = "-"
def __parse_stat_file (self, stat_file):
"""
Parse the stat file
@param stat_file : the runstatsR stats file
@return : {"" : "", ...}
"""
stats = {}
logging.getLogger("jflow").debug("Begin post_process _parse_stat_file!")
for line in open(stat_file, 'r').readlines():
parts = line.strip().split("\t")
if parts[0] == "nb_reads": stats["nb_reads"] = parts[1]
if parts[0] == "total_bases": stats["total_bases"] = parts[1]
if parts[0] == "median_read_length": stats["median_read_length"] = parts[1]
if parts[0] == "mean_read_length": stats["mean_read_length"] = parts[1]
if parts[0] == "N50_read_length": stats["N50_read_length"] = parts[1]
if parts[0] == "median_read_quality": stats["median_read_quality"] = parts[1]
if parts[0] == "mean_read_quality": stats["mean_read_quality"] = parts[1]
if parts[0] == "nb_read_Q>5": stats["nb_read_Q>5"] = parts[1]
if parts[0] == "total_bases_Q>5": stats["total_bases_Q>5"] = parts[1]
if parts[0] == "nb_read_Q>10": stats["nb_read_Q>10"] = parts[1]
if parts[0] == "total_bases_Q>10": stats["total_bases_Q>10"] = parts[1]
if parts[0] == "nb_read_Q>15": stats["nb_read_Q>15"] = parts[1]
if parts[0] == "total_bases_Q>15": stats["total_bases_Q>15"] = parts[1]
if parts[0] == "median_yield_per_sec": stats["median_yield_per_sec"] = parts[1]
if parts[0] == "mean_yield_per_sec": stats["mean_yield_per_sec"] = parts[1]
if parts[0] == "nb_actif_channel": stats["nb_actif_channel"] = parts[1]
print(stats)
return stats
def post_process(self):
logging.getLogger("jflow").debug("Begin Run_stats.post_process! "+self.output_directory)
results_files = []
metrics = []
cmd = [self.get_exec_path("pwd")]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
print(stdout.split()[0])
sample = "ONT_sample"
#logging.getLogger("jflow").debug("Begin Nanoplot.post_process - sample "+file)
# stat file
statfile = os.path.join(self.output_directory,"plot_stats.txt")
for file in os.listdir(self.output_directory):
full_file_path = os.path.join(self.output_directory, file)
if file.endswith(".zip"):
results_files.append(full_file_path)
if os.path.isfile(statfile):
stat_info = self.__parse_stat_file(os.path.join(self.output_directory, "plot_stats.txt"))
group = 'basic'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["nb_reads", "total_bases", "median_read_length", "N50_read_length", "median_yield_per_sec", "nb_actif_channel"]), group)
self._add_result_element(sample, "nb_reads", str(stat_info["nb_reads"]),group),
self._add_result_element(sample, "total_bases", str(stat_info["total_bases"]),group),
self._add_result_element(sample, "median_read_length", str(stat_info["median_read_length"]),group),
self._add_result_element(sample, "mean_read_length", str(stat_info["mean_read_length"]),group),
self._add_result_element(sample, "N50_read_length", str(stat_info["N50_read_length"]),group),
group = 'quality'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["median_read_quality", "nb_read_Q>5", "nb_read_Q>10", "total_bases_Q>10", "nb_read_Q>15"]), group)
self._add_result_element(sample, "median_read_quality", str(stat_info["median_read_quality"]),group),
self._add_result_element(sample, "mean_read_quality", str(stat_info["mean_read_quality"]),group),
self._add_result_element(sample, "nb_read_Q>5", str(stat_info["nb_read_Q>5"]),group),
self._add_result_element(sample, "total_bases_Q>5", str(stat_info["total_bases_Q>5"]),group),
self._add_result_element(sample, "nb_read_Q>10", str(stat_info["nb_read_Q>10"]),group),
self._add_result_element(sample, "total_bases_Q>10", str(stat_info["total_bases_Q>10"]),group),
self._add_result_element(sample, "nb_read_Q>15", str(stat_info["nb_read_Q>15"]),group),
self._add_result_element(sample, "total_bases_Q>15", str(stat_info["total_bases_Q>15"]),group),
group = 'basic'
self._add_result_element(sample, "median_yield_per_sec", str(stat_info["median_yield_per_sec"]),group),
self._add_result_element(sample, "mean_yield_per_sec", str(stat_info["mean_yield_per_sec"]),group),
self._add_result_element(sample, "nb_actif_channel", str(stat_info["nb_actif_channel"]),group)
group = 'plots'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["cumulyieldperhour", "outrm_distriblength", "outrm_distribqscore", "outrm_lengthvsqscore_density"]), group)
if os.path.isfile(os.path.join(self.output_directory, "plot_cumulyieldperhour.png")):
self._add_result_element(sample, "cumulyieldperhour", self._save_file(os.path.join(self.output_directory, "plot_cumulyieldperhour.png"),
sample + ".cumulyieldperhour.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_cumulyieldperhour.png"))
if os.path.isfile(os.path.join(self.output_directory, "plot_outrm_distriblength.png")):
self._add_result_element(sample, "outrm_distriblength", self._save_file(os.path.join(self.output_directory, "plot_outrm_distriblength.png"),
sample + ".outrm_distriblength.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_outrm_distriblength.png"))
if os.path.isfile(os.path.join(self.output_directory, "plot_outrm_distribqscore.png")):
self._add_result_element(sample, "outrm_distribqscore", self._save_file(os.path.join(self.output_directory, "plot_outrm_distribqscore.png"),
sample + ".outrm_distribqscore.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_outrm_distribqscore.png"))
if os.path.isfile(os.path.join(self.output_directory, "plot_outrm_lengthvsqscore_density.png")):
self._add_result_element(sample, "outrm_lengthvsqscore_density", self._save_file(os.path.join(self.output_directory, "plot_outrm_lengthvsqscore_density.png"),
sample + ".outrm_lengthvsqscore_density.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_outrm_lengthvsqscore_density.png"))
if os.path.isfile(os.path.join(self.output_directory, "plot_poreactivity.png")):
self._add_result_element(sample, "poreactivity", self._save_file(os.path.join(self.output_directory, "plot_poreactivity.png"),
sample + ".poreactivity.png"), group)
results_files.append(os.path.join(self.output_directory, "plot_poreactivity.png"))
# Finaly create and add the archive to the analysis
self._create_and_archive(results_files,self.archive_name)
def process(self):
logging.getLogger("jflow").debug("Begin Run_stats.process! ont_qc")
#print (self.sequencing_summary_file)
self.add_shell_execution(self.get_exec_path("Rscript") +" /work/ng6-test/claire/test_ont_qc/graph_albacoresummary.R " +' -f '+ '$1' +' --out ' + self.output_directory + " 2> " +' $2',
cmd_format='{EXE} {IN} {OUT}' ,
map=False,
inputs = self.sequencing_summary_file,
outputs = self.stderr)
#self.add_shell_execution('tar -czf '+ self.output_directory +'/'+'Run_stats_archive.tar.gz -C '+ self.output_directory +' plot_stats.txt -C '+ self.output_directory +' *.png ', cmd_format='{EXE} {OUT}',
# map=False, outputs = self.archive_name)
logging.getLogger("jflow").debug("End Run_stats.process! ")
print('END PROCESS TEST')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment