Commit 66ce4315 authored by Maxime Manno's avatar Maxime Manno 🍜
Browse files

Merge branch 'nG6_ont_demultiplex' into 'master'

Ng6 ont demultiplex

See merge request !37
parents 992e5805 1c35a86e
......@@ -16,11 +16,18 @@
#
import logging
import os
from glob import glob
from subprocess import Popen, PIPE
from ng6.ng6workflow import NG6Workflow
from ng6.utils import Utils
from _codecs import encode
class OntQualityCheck (NG6Workflow):
def __init__(self, args={}, id=None, function= "process"):
NG6Workflow.__init__(self, args, id, function)
self.log_files = []
def get_name(self):
return 'ont_qc'
......@@ -31,8 +38,8 @@ class OntQualityCheck (NG6Workflow):
logging.getLogger("jflow").debug("Begin OntQualityCheck.define_parameters! ont_qc")
self.add_parameter("compression", "How should the data be compressed once archived", choices= [ "none", "gz", "bz2"], default = "gz")
self.add_parameter("trimming", "use trimming with porechop or not",choices= [ "yes", "no"], default = "no")
self.add_input_file( "summary_file", "Input summary basecalling file", default=None)
self.add_parameter("barcoded", "Barcoded run or not", choices= [ "yes", "no"], default = "no")
self.add_input_file("summary_file", "Input summary basecalling file", default=None)
self.add_parameter("barcoded", "If barcoded run : correspondance file", default = None)
self.add_parameter("fast5dir", "path of the fast5 directory", default = None)
def process(self):
......@@ -43,13 +50,31 @@ class OntQualityCheck (NG6Workflow):
sample_names.append( sample.name )
infiles.append(sample.reads1[0])
# add raw
print(self.get_all_reads())
print(sample_names)
print(self.summary_file)
logging.getLogger("jflow").debug("OntQualityCheck.process! get_all_reads : "+",".join(self.get_all_reads()))
logging.getLogger("jflow").debug("OntQualityCheck.process! sample_name : "+str(sample_names))
logging.getLogger("jflow").debug("OntQualityCheck.process! summary_file : "+str(self.summary_file))
### check for log file
# get current path
cmd = [self.get_exec_path("pwd")]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
exec_path = stdout.decode("utf-8").rsplit()[0]
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile pwd = " + str(exec_path))
# find .log files
for file in glob(exec_path+"/*.log"):
self.log_files.append(file)
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile self.log_files = " + ",".join(self.log_files))
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile exiting")
# add logs
if len(self.log_files) > 0 :
add_log = self.add_component("BasicAnalysis", [self.log_files,"Log Files","Log files generated during primary analysis","-","-","-","gz", "","log.gz"])
addrawfiles = self.add_component("AddRawFiles", [self.runobj, self.get_all_reads(), self.compression])
#nanoplot = self.add_component("Nanoplot", [sample.name,self.get_all_reads(), self.nb_threads, True, "png", self.nanoplot_color,"nanoplot.tar.gz"])
ontstat = self.add_component("Run_stats", [self.summary_file, self.barcoded, sample_names[0]])
ontstat = self.add_component("Run_stats", [self.summary_file, sample_names[0]])
if (self.barcoded != None) or (self.barcoded != "no") :
demultiplexont = self.add_component("Demultiplex_ONT", [self.get_all_reads() , self.barcoded])
if self.trimming == "yes":
trim_porechop = self.add_component("Trim_porechop", [self.get_all_reads() , "discard_middle"])
if self.fast5dir != None:
......
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import re, os
from subprocess import Popen, PIPE
import logging
import time
from ng6.analysis import Analysis
from ng6.utils import Utils
from jflow.utils import get_argument_pattern
class Demultiplex_ONT (Analysis):
"""
This module demultiplexes the total fastq of a barcoded ONT run and produces stats
"""
def __init__(self, args={}, id=None, function= "process"):
Analysis.__init__(self, args, id, function)
def define_parameters(self, fastq_files, barcode_file, archivename="DemultiplexONT_archive.tar"):
self.add_input_file_list( "fastq_files", "fastq_files", default=fastq_files, required=True, file_format = 'fastq')
self.add_parameter("barcode_file", "Name of the barcode file", default=barcode_file, required=True , file_format = 'str')
self.add_parameter("archive_name", "Name of the archive", default=archivename, type='str')
self.add_parameter( "run_name", "The name of the run (from total fastq file)", pattern='{basename_woext}', items=self.fastq_files, file_format = "fastq")
def define_analysis(self):
self.name = "DemultiplexONT"
self.description = "Demultiplexes the total fastq of a barcoded ONT run and produces stats"
self.software = "Qcat"
#if self.discard_middle == "discard_middle":
# self.options = "--discard_middle"
def __parse_stat_file (self, stat_file):
logging.getLogger("jflow").debug("Begin DemultiplexONT.__parse_stat_file! file =",stat_file)
"""
Parse the stat file
@param stat_file : the stdout porechop
@return : {"read_trim_start" : read_trim_start, ...}
"""
def post_process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.post_process! ont_qc")
# Create dictionary : key = file name or prefix, value = files path
results_files = []
# add header of stats
group = "statsporechop"
self._add_result_element("metrics", "headers", ','.join(["read_trim_start", "read_total_start", "bp_removed_start", "read_trim_end", "read_total_end", "bp_removed_end"]), group)
print(os.listdir(self.output_directory))
for file in os.listdir(self.output_directory):
full_file_path = os.path.join(self.output_directory, file)
logging.getLogger("jflow").debug("Trimporechop.post_process : full_file_path "+full_file_path)
if file.endswith(".fastq"):
logging.getLogger("jflow").debug("Trimporechop.post_process match .fastq : full_file_path "+full_file_path)
results_files.append(full_file_path)
elif file.endswith(".stdout"):
logging.getLogger("jflow").debug("Trimporechop.post_process match .stdout: full_file_path "+full_file_path)
results_files.append(full_file_path)
filename = os.path.basename(file).split(".stdout")[0]
resultlist = self.__parse_stat_file(full_file_path)
read_trim_start = resultlist[0]
read_total_start = resultlist[1]
bp_removed_start = resultlist[2]
read_trim_end = resultlist[3]
read_total_end = resultlist[4]
bp_removed_end = resultlist[5]
#add stats for each fastq file
self._add_result_element("ont_sample", "read_trim_start", read_trim_start,filename)
self._add_result_element("ont_sample", "read_total_start", read_total_start,filename)
self._add_result_element("ont_sample", "bp_removed_start", bp_removed_start,filename)
self._add_result_element("ont_sample", "read_trim_end", read_trim_end,filename)
self._add_result_element("ont_sample", "read_total_end", read_total_end,filename)
self._add_result_element("ont_sample", "bp_removed_end", bp_removed_end,filename)
#Finaly create and add the archive to the analysis
#self._create_and_archive(results_files,self.archive_name)
self._archive_files(results_files, "gz")
logging.getLogger("jflow").debug("End DemultiplexONT.post_process! ")
def get_version(self):
shell_script = "module load system/Python-3.6.3;" + self.get_exec_path("qcat") + " --version"
logging.getLogger("jflow").debug("DemultiplexONT.get_version ! shell_script " + str(shell_script))
cmd = ["sh","-c",shell_script]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
logging.getLogger("jflow").debug("DemultiplexONT.get_version !" + str(stderr))
return stdout
def process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.process! ont_qc")
# Create cmd
self.add_shell_execution(self.get_exec_path("qcat") +" " + self.options + "-f $1 -b " + str(self.outpath) + " -k " + str(self.kit) + " > ${" + str() + "}",
cmd_format='{EXE} {IN} {OUT}' ,
map=False,
inputs = self.fastq_files)
#archive = self.output_directory + '/' + self.archive_name + '.tar.gz'
#self.add_shell_execution('tar -czf $1 ' + self.output_directory + '/' + '*_trim.fastq ', cmd_format='{EXE} {OUT}', map=False, outputs = archive)
logging.getLogger("jflow").debug("End Trimporechop.process! ")
......@@ -32,17 +32,13 @@ class Run_stats (Analysis):
This module make some statistic from ONT run with graphs
"""
def define_parameters(self, sequencing_summary_file, barcoded=False, sample_name="plot", archive_name="RunStats_archive.tar.gz"):
def define_parameters(self, sequencing_summary_file, sample_name="plot", archive_name="RunStats_archive.tar.gz"):
logging.getLogger("jflow").debug("Begin Run_stats parameters")
self.add_input_file( "sequencing_summary_file", "Input sequencing summary file from Basecaller", default=sequencing_summary_file, file_format = "txt", required=True)
self.add_parameter("barcoded", "Indicate that barcodes are used for this run", default=barcoded, type='str')
self.add_parameter("sample_name", "Sample name for prefix", default=sample_name, type='str')
self.add_parameter("archive_name", "Archive name", default=archive_name)
self.add_output_file_list("stderr", "stderr ouput file",pattern='Run_stats.stderr', items = self.sequencing_summary_file)
if self.barcoded == "yes":
self.add_output_file_list("stderr_barcoded", "stderr ouput barcoded file",pattern='Run_stats_barcoded.stderr', items = self.sequencing_summary_file)
def get_version(self):
#cmd = [self.get_exec_path("Rscript")," /save/sbsuser/analyses_scripts/mmanno/graph_albacoresummary.R"]
......@@ -113,23 +109,6 @@ class Run_stats (Analysis):
#print(stats)
return stats
def __parse_barcode_file (self, barcode_file):
"""
Parse the barcode file
@param barcode_file : the runstatsR barcode file
@return : {"" : "", ...}
"""
stats = {}
barcode_names = []
logging.getLogger("jflow").debug("Begin post_process _parse_barcode_file!")
for line in open(barcode_file, 'r').readlines():
parts = line.strip().split(",")
stats[parts[0]] = parts
barcode_names.append(parts[0])
#print(barcode_names)
#print(stats)
return stats,barcode_names
def post_process(self):
logging.getLogger("jflow").debug("Begin Run_stats.post_process! "+self.output_directory)
results_files = []
......@@ -244,77 +223,16 @@ class Run_stats (Analysis):
self._add_result_element(sample, "channelreads", self._save_file(os.path.join(self.output_directory, sample+"_channelreads.png"), sample+"_channelreads.png"), group)
results_files.append(os.path.join(self.output_directory, sample+"_channelreads.png"))
if self.barcoded == "yes":
barcodefile = os.path.join(self.output_directory,sample+"_statsbarcodes.txt")
if os.path.isfile(barcodefile):
barcode_info, barcode_names = self.__parse_barcode_file(os.path.join(self.output_directory, sample+"_statsbarcodes.txt"))
group = 'barcode'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["barcode_score","nb_reads","total_bases","mean_read_length", "N50_read_length","mean_read_quality","mean_yield_per_sec",
"nb_reads_Q7","total_bases_Q7","mean_read_length_Q7", "N50_read_length_Q7","mean_read_quality_Q7","mean_yield_per_sec_Q7",
"nb_reads_Q9","total_bases_Q9","mean_read_length_Q9", "N50_read_length_Q9","mean_read_quality_Q9","mean_yield_per_sec_Q9"]), group)
self._add_result_element("metrics", "names", ','.join(barcode_names),group)
for barcode in barcode_names :
group = barcode
self._add_result_element(sample, "barcode_score", str(barcode_info[barcode][1]),group),
self._add_result_element(sample, "nb_reads", str(barcode_info[barcode][2]),group),
self._add_result_element(sample, "total_bases", str(barcode_info[barcode][3]),group),
self._add_result_element(sample, "mean_read_length", str(barcode_info[barcode][4]),group),
self._add_result_element(sample, "N50_read_length", str(barcode_info[barcode][5]),group),
self._add_result_element(sample, "mean_read_quality", str(barcode_info[barcode][6]),group),
self._add_result_element(sample, "mean_yield_per_sec", str(barcode_info[barcode][7]),group),
self._add_result_element(sample, "nb_reads_Q7", str(barcode_info[barcode][8]),group),
self._add_result_element(sample, "total_bases_Q7", str(barcode_info[barcode][9]),group),
self._add_result_element(sample, "mean_read_length_Q7", str(barcode_info[barcode][10]),group),
self._add_result_element(sample, "N50_read_length_Q7", str(barcode_info[barcode][11]),group),
self._add_result_element(sample, "mean_read_quality_Q7", str(barcode_info[barcode][12]),group),
self._add_result_element(sample, "mean_yield_per_sec_Q7", str(barcode_info[barcode][13]),group),
self._add_result_element(sample, "nb_reads_Q9", str(barcode_info[barcode][14]),group),
self._add_result_element(sample, "total_bases_Q9", str(barcode_info[barcode][15]),group),
self._add_result_element(sample, "mean_read_length_Q9", str(barcode_info[barcode][16]),group),
self._add_result_element(sample, "N50_read_length_Q9", str(barcode_info[barcode][17]),group),
self._add_result_element(sample, "mean_read_quality_Q9", str(barcode_info[barcode][18]),group),
self._add_result_element(sample, "mean_yield_per_sec_Q9", str(barcode_info[barcode][19]),group),
group = 'plots_barcode'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["qscore_boxplot", "qscore_per_time_intervals_boxplot"]), group)
if os.path.isfile(os.path.join(self.output_directory, sample+"_qscoreboxplot.png")):
self._add_result_element(sample, "qscore_boxplot", self._save_file(os.path.join(self.output_directory, sample+"_qscoreboxplot.png"),
sample + "_qscoreboxplot.png"), group)
results_files.append(os.path.join(self.output_directory, sample+"_qscoreboxplot.png"))
if os.path.isfile(os.path.join(self.output_directory, sample+"_qscorepertimeintervalsboxplot.png")):
self._add_result_element(sample, "qscore_per_time_intervals_boxplot", self._save_file(os.path.join(self.output_directory, sample+"_qscorepertimeintervalsboxplot.png"),
sample + "_qscorepertimeintervalsboxplot.png"), group)
results_files.append(os.path.join(self.output_directory, sample+"_qscorepertimeintervalsboxplot.png"))
# Finaly create and add the archive to the analysis
self._create_and_archive(results_files,self.archive_name)
def process(self):
logging.getLogger("jflow").debug("Begin Run_stats.process! ont_qc")
#print (self.sequencing_summary_file)
self.add_shell_execution(self.get_exec_path("Rscript") + " " + self.get_exec_path("graphe_summary") +' -f '+ '$1' +' --out ' + self.output_directory + " -p "+self.sample_name+" 2> " +' $2',
cmd_format='{EXE} {IN} {OUT}' ,
map=False,
inputs = self.sequencing_summary_file,
outputs = self.stderr)
if self.barcoded == "yes" :
self.add_shell_execution(self.get_exec_path("Rscript") + " " + self.get_exec_path("graphe_summary_barcode") +' -f '+ '$1' +' --out ' + self.output_directory + " -p "+self.sample_name+ " 2> " +' $2',
cmd_format='{EXE} {IN} {OUT}' ,
map=False,
inputs = self.sequencing_summary_file,
outputs = self.stderr_barcoded)
#self.add_shell_execution('tar -czf '+ self.output_directory +'/'+'Run_stats_archive.tar.gz -C '+ self.output_directory +' plot_stats.txt -C '+ self.output_directory +' *.png ', cmd_format='{EXE} {OUT}',
# map=False, outputs = self.archive_name)
logging.getLogger("jflow").debug("End Run_stats.process! ")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment