Commit 66ce4315 authored by Maxime Manno's avatar Maxime Manno 🍜
Browse files

Merge branch 'nG6_ont_demultiplex' into 'master'

Ng6 ont demultiplex

See merge request !37
parents 992e5805 1c35a86e
...@@ -16,11 +16,18 @@ ...@@ -16,11 +16,18 @@
# #
import logging import logging
import os import os
from glob import glob
from subprocess import Popen, PIPE
from ng6.ng6workflow import NG6Workflow from ng6.ng6workflow import NG6Workflow
from ng6.utils import Utils from ng6.utils import Utils
from _codecs import encode
class OntQualityCheck (NG6Workflow): class OntQualityCheck (NG6Workflow):
def __init__(self, args={}, id=None, function= "process"):
NG6Workflow.__init__(self, args, id, function)
self.log_files = []
def get_name(self): def get_name(self):
return 'ont_qc' return 'ont_qc'
...@@ -31,8 +38,8 @@ class OntQualityCheck (NG6Workflow): ...@@ -31,8 +38,8 @@ class OntQualityCheck (NG6Workflow):
logging.getLogger("jflow").debug("Begin OntQualityCheck.define_parameters! ont_qc") logging.getLogger("jflow").debug("Begin OntQualityCheck.define_parameters! ont_qc")
self.add_parameter("compression", "How should the data be compressed once archived", choices= [ "none", "gz", "bz2"], default = "gz") self.add_parameter("compression", "How should the data be compressed once archived", choices= [ "none", "gz", "bz2"], default = "gz")
self.add_parameter("trimming", "use trimming with porechop or not",choices= [ "yes", "no"], default = "no") self.add_parameter("trimming", "use trimming with porechop or not",choices= [ "yes", "no"], default = "no")
self.add_input_file( "summary_file", "Input summary basecalling file", default=None) self.add_input_file("summary_file", "Input summary basecalling file", default=None)
self.add_parameter("barcoded", "Barcoded run or not", choices= [ "yes", "no"], default = "no") self.add_parameter("barcoded", "If barcoded run : correspondance file", default = None)
self.add_parameter("fast5dir", "path of the fast5 directory", default = None) self.add_parameter("fast5dir", "path of the fast5 directory", default = None)
def process(self): def process(self):
...@@ -43,13 +50,31 @@ class OntQualityCheck (NG6Workflow): ...@@ -43,13 +50,31 @@ class OntQualityCheck (NG6Workflow):
sample_names.append( sample.name ) sample_names.append( sample.name )
infiles.append(sample.reads1[0]) infiles.append(sample.reads1[0])
# add raw # add raw
print(self.get_all_reads()) logging.getLogger("jflow").debug("OntQualityCheck.process! get_all_reads : "+",".join(self.get_all_reads()))
print(sample_names) logging.getLogger("jflow").debug("OntQualityCheck.process! sample_name : "+str(sample_names))
print(self.summary_file) logging.getLogger("jflow").debug("OntQualityCheck.process! summary_file : "+str(self.summary_file))
### check for log file
# get current path
cmd = [self.get_exec_path("pwd")]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
exec_path = stdout.decode("utf-8").rsplit()[0]
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile pwd = " + str(exec_path))
# find .log files
for file in glob(exec_path+"/*.log"):
self.log_files.append(file)
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile self.log_files = " + ",".join(self.log_files))
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile exiting")
# add logs
if len(self.log_files) > 0 :
add_log = self.add_component("BasicAnalysis", [self.log_files,"Log Files","Log files generated during primary analysis","-","-","-","gz", "","log.gz"])
addrawfiles = self.add_component("AddRawFiles", [self.runobj, self.get_all_reads(), self.compression]) addrawfiles = self.add_component("AddRawFiles", [self.runobj, self.get_all_reads(), self.compression])
#nanoplot = self.add_component("Nanoplot", [sample.name,self.get_all_reads(), self.nb_threads, True, "png", self.nanoplot_color,"nanoplot.tar.gz"]) ontstat = self.add_component("Run_stats", [self.summary_file, sample_names[0]])
ontstat = self.add_component("Run_stats", [self.summary_file, self.barcoded, sample_names[0]]) if (self.barcoded != None) or (self.barcoded != "no") :
demultiplexont = self.add_component("Demultiplex_ONT", [self.get_all_reads() , self.barcoded])
if self.trimming == "yes": if self.trimming == "yes":
trim_porechop = self.add_component("Trim_porechop", [self.get_all_reads() , "discard_middle"]) trim_porechop = self.add_component("Trim_porechop", [self.get_all_reads() , "discard_middle"])
if self.fast5dir != None: if self.fast5dir != None:
......
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import re, os
from subprocess import Popen, PIPE
import logging
import time
from ng6.analysis import Analysis
from ng6.utils import Utils
from jflow.utils import get_argument_pattern
class Demultiplex_ONT (Analysis):
"""
This module demultiplexes the total fastq of a barcoded ONT run and produces stats
"""
def __init__(self, args={}, id=None, function= "process"):
Analysis.__init__(self, args, id, function)
def define_parameters(self, fastq_files, barcode_file, archivename="DemultiplexONT_archive.tar"):
self.add_input_file_list( "fastq_files", "fastq_files", default=fastq_files, required=True, file_format = 'fastq')
self.add_parameter("barcode_file", "Name of the barcode file", default=barcode_file, required=True , file_format = 'str')
self.add_parameter("archive_name", "Name of the archive", default=archivename, type='str')
self.add_parameter( "run_name", "The name of the run (from total fastq file)", pattern='{basename_woext}', items=self.fastq_files, file_format = "fastq")
def define_analysis(self):
self.name = "DemultiplexONT"
self.description = "Demultiplexes the total fastq of a barcoded ONT run and produces stats"
self.software = "Qcat"
#if self.discard_middle == "discard_middle":
# self.options = "--discard_middle"
def __parse_stat_file (self, stat_file):
logging.getLogger("jflow").debug("Begin DemultiplexONT.__parse_stat_file! file =",stat_file)
"""
Parse the stat file
@param stat_file : the stdout porechop
@return : {"read_trim_start" : read_trim_start, ...}
"""
def post_process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.post_process! ont_qc")
# Create dictionary : key = file name or prefix, value = files path
results_files = []
# add header of stats
group = "statsporechop"
self._add_result_element("metrics", "headers", ','.join(["read_trim_start", "read_total_start", "bp_removed_start", "read_trim_end", "read_total_end", "bp_removed_end"]), group)
print(os.listdir(self.output_directory))
for file in os.listdir(self.output_directory):
full_file_path = os.path.join(self.output_directory, file)
logging.getLogger("jflow").debug("Trimporechop.post_process : full_file_path "+full_file_path)
if file.endswith(".fastq"):
logging.getLogger("jflow").debug("Trimporechop.post_process match .fastq : full_file_path "+full_file_path)
results_files.append(full_file_path)
elif file.endswith(".stdout"):
logging.getLogger("jflow").debug("Trimporechop.post_process match .stdout: full_file_path "+full_file_path)
results_files.append(full_file_path)
filename = os.path.basename(file).split(".stdout")[0]
resultlist = self.__parse_stat_file(full_file_path)
read_trim_start = resultlist[0]
read_total_start = resultlist[1]
bp_removed_start = resultlist[2]
read_trim_end = resultlist[3]
read_total_end = resultlist[4]
bp_removed_end = resultlist[5]
#add stats for each fastq file
self._add_result_element("ont_sample", "read_trim_start", read_trim_start,filename)
self._add_result_element("ont_sample", "read_total_start", read_total_start,filename)
self._add_result_element("ont_sample", "bp_removed_start", bp_removed_start,filename)
self._add_result_element("ont_sample", "read_trim_end", read_trim_end,filename)
self._add_result_element("ont_sample", "read_total_end", read_total_end,filename)
self._add_result_element("ont_sample", "bp_removed_end", bp_removed_end,filename)
#Finaly create and add the archive to the analysis
#self._create_and_archive(results_files,self.archive_name)
self._archive_files(results_files, "gz")
logging.getLogger("jflow").debug("End DemultiplexONT.post_process! ")
def get_version(self):
shell_script = "module load system/Python-3.6.3;" + self.get_exec_path("qcat") + " --version"
logging.getLogger("jflow").debug("DemultiplexONT.get_version ! shell_script " + str(shell_script))
cmd = ["sh","-c",shell_script]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
logging.getLogger("jflow").debug("DemultiplexONT.get_version !" + str(stderr))
return stdout
def process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.process! ont_qc")
# Create cmd
self.add_shell_execution(self.get_exec_path("qcat") +" " + self.options + "-f $1 -b " + str(self.outpath) + " -k " + str(self.kit) + " > ${" + str() + "}",
cmd_format='{EXE} {IN} {OUT}' ,
map=False,
inputs = self.fastq_files)
#archive = self.output_directory + '/' + self.archive_name + '.tar.gz'
#self.add_shell_execution('tar -czf $1 ' + self.output_directory + '/' + '*_trim.fastq ', cmd_format='{EXE} {OUT}', map=False, outputs = archive)
logging.getLogger("jflow").debug("End Trimporechop.process! ")
...@@ -32,17 +32,13 @@ class Run_stats (Analysis): ...@@ -32,17 +32,13 @@ class Run_stats (Analysis):
This module make some statistic from ONT run with graphs This module make some statistic from ONT run with graphs
""" """
def define_parameters(self, sequencing_summary_file, barcoded=False, sample_name="plot", archive_name="RunStats_archive.tar.gz"): def define_parameters(self, sequencing_summary_file, sample_name="plot", archive_name="RunStats_archive.tar.gz"):
logging.getLogger("jflow").debug("Begin Run_stats parameters") logging.getLogger("jflow").debug("Begin Run_stats parameters")
self.add_input_file( "sequencing_summary_file", "Input sequencing summary file from Basecaller", default=sequencing_summary_file, file_format = "txt", required=True) self.add_input_file( "sequencing_summary_file", "Input sequencing summary file from Basecaller", default=sequencing_summary_file, file_format = "txt", required=True)
self.add_parameter("barcoded", "Indicate that barcodes are used for this run", default=barcoded, type='str')
self.add_parameter("sample_name", "Sample name for prefix", default=sample_name, type='str') self.add_parameter("sample_name", "Sample name for prefix", default=sample_name, type='str')
self.add_parameter("archive_name", "Archive name", default=archive_name) self.add_parameter("archive_name", "Archive name", default=archive_name)
self.add_output_file_list("stderr", "stderr ouput file",pattern='Run_stats.stderr', items = self.sequencing_summary_file) self.add_output_file_list("stderr", "stderr ouput file",pattern='Run_stats.stderr', items = self.sequencing_summary_file)
if self.barcoded == "yes":
self.add_output_file_list("stderr_barcoded", "stderr ouput barcoded file",pattern='Run_stats_barcoded.stderr', items = self.sequencing_summary_file)
def get_version(self): def get_version(self):
#cmd = [self.get_exec_path("Rscript")," /save/sbsuser/analyses_scripts/mmanno/graph_albacoresummary.R"] #cmd = [self.get_exec_path("Rscript")," /save/sbsuser/analyses_scripts/mmanno/graph_albacoresummary.R"]
...@@ -113,23 +109,6 @@ class Run_stats (Analysis): ...@@ -113,23 +109,6 @@ class Run_stats (Analysis):
#print(stats) #print(stats)
return stats return stats
def __parse_barcode_file (self, barcode_file):
"""
Parse the barcode file
@param barcode_file : the runstatsR barcode file
@return : {"" : "", ...}
"""
stats = {}
barcode_names = []
logging.getLogger("jflow").debug("Begin post_process _parse_barcode_file!")
for line in open(barcode_file, 'r').readlines():
parts = line.strip().split(",")
stats[parts[0]] = parts
barcode_names.append(parts[0])
#print(barcode_names)
#print(stats)
return stats,barcode_names
def post_process(self): def post_process(self):
logging.getLogger("jflow").debug("Begin Run_stats.post_process! "+self.output_directory) logging.getLogger("jflow").debug("Begin Run_stats.post_process! "+self.output_directory)
results_files = [] results_files = []
...@@ -244,77 +223,16 @@ class Run_stats (Analysis): ...@@ -244,77 +223,16 @@ class Run_stats (Analysis):
self._add_result_element(sample, "channelreads", self._save_file(os.path.join(self.output_directory, sample+"_channelreads.png"), sample+"_channelreads.png"), group) self._add_result_element(sample, "channelreads", self._save_file(os.path.join(self.output_directory, sample+"_channelreads.png"), sample+"_channelreads.png"), group)
results_files.append(os.path.join(self.output_directory, sample+"_channelreads.png")) results_files.append(os.path.join(self.output_directory, sample+"_channelreads.png"))
if self.barcoded == "yes":
barcodefile = os.path.join(self.output_directory,sample+"_statsbarcodes.txt")
if os.path.isfile(barcodefile):
barcode_info, barcode_names = self.__parse_barcode_file(os.path.join(self.output_directory, sample+"_statsbarcodes.txt"))
group = 'barcode'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["barcode_score","nb_reads","total_bases","mean_read_length", "N50_read_length","mean_read_quality","mean_yield_per_sec",
"nb_reads_Q7","total_bases_Q7","mean_read_length_Q7", "N50_read_length_Q7","mean_read_quality_Q7","mean_yield_per_sec_Q7",
"nb_reads_Q9","total_bases_Q9","mean_read_length_Q9", "N50_read_length_Q9","mean_read_quality_Q9","mean_yield_per_sec_Q9"]), group)
self._add_result_element("metrics", "names", ','.join(barcode_names),group)
for barcode in barcode_names :
group = barcode
self._add_result_element(sample, "barcode_score", str(barcode_info[barcode][1]),group),
self._add_result_element(sample, "nb_reads", str(barcode_info[barcode][2]),group),
self._add_result_element(sample, "total_bases", str(barcode_info[barcode][3]),group),
self._add_result_element(sample, "mean_read_length", str(barcode_info[barcode][4]),group),
self._add_result_element(sample, "N50_read_length", str(barcode_info[barcode][5]),group),
self._add_result_element(sample, "mean_read_quality", str(barcode_info[barcode][6]),group),
self._add_result_element(sample, "mean_yield_per_sec", str(barcode_info[barcode][7]),group),
self._add_result_element(sample, "nb_reads_Q7", str(barcode_info[barcode][8]),group),
self._add_result_element(sample, "total_bases_Q7", str(barcode_info[barcode][9]),group),
self._add_result_element(sample, "mean_read_length_Q7", str(barcode_info[barcode][10]),group),
self._add_result_element(sample, "N50_read_length_Q7", str(barcode_info[barcode][11]),group),
self._add_result_element(sample, "mean_read_quality_Q7", str(barcode_info[barcode][12]),group),
self._add_result_element(sample, "mean_yield_per_sec_Q7", str(barcode_info[barcode][13]),group),
self._add_result_element(sample, "nb_reads_Q9", str(barcode_info[barcode][14]),group),
self._add_result_element(sample, "total_bases_Q9", str(barcode_info[barcode][15]),group),
self._add_result_element(sample, "mean_read_length_Q9", str(barcode_info[barcode][16]),group),
self._add_result_element(sample, "N50_read_length_Q9", str(barcode_info[barcode][17]),group),
self._add_result_element(sample, "mean_read_quality_Q9", str(barcode_info[barcode][18]),group),
self._add_result_element(sample, "mean_yield_per_sec_Q9", str(barcode_info[barcode][19]),group),
group = 'plots_barcode'
metrics.append(group)
self._add_result_element("metrics", "headers", ','.join(["qscore_boxplot", "qscore_per_time_intervals_boxplot"]), group)
if os.path.isfile(os.path.join(self.output_directory, sample+"_qscoreboxplot.png")):
self._add_result_element(sample, "qscore_boxplot", self._save_file(os.path.join(self.output_directory, sample+"_qscoreboxplot.png"),
sample + "_qscoreboxplot.png"), group)
results_files.append(os.path.join(self.output_directory, sample+"_qscoreboxplot.png"))
if os.path.isfile(os.path.join(self.output_directory, sample+"_qscorepertimeintervalsboxplot.png")):
self._add_result_element(sample, "qscore_per_time_intervals_boxplot", self._save_file(os.path.join(self.output_directory, sample+"_qscorepertimeintervalsboxplot.png"),
sample + "_qscorepertimeintervalsboxplot.png"), group)
results_files.append(os.path.join(self.output_directory, sample+"_qscorepertimeintervalsboxplot.png"))
# Finaly create and add the archive to the analysis # Finaly create and add the archive to the analysis
self._create_and_archive(results_files,self.archive_name) self._create_and_archive(results_files,self.archive_name)
def process(self): def process(self):
logging.getLogger("jflow").debug("Begin Run_stats.process! ont_qc") logging.getLogger("jflow").debug("Begin Run_stats.process! ont_qc")
#print (self.sequencing_summary_file)
self.add_shell_execution(self.get_exec_path("Rscript") + " " + self.get_exec_path("graphe_summary") +' -f '+ '$1' +' --out ' + self.output_directory + " -p "+self.sample_name+" 2> " +' $2', self.add_shell_execution(self.get_exec_path("Rscript") + " " + self.get_exec_path("graphe_summary") +' -f '+ '$1' +' --out ' + self.output_directory + " -p "+self.sample_name+" 2> " +' $2',
cmd_format='{EXE} {IN} {OUT}' , cmd_format='{EXE} {IN} {OUT}' ,
map=False, map=False,
inputs = self.sequencing_summary_file, inputs = self.sequencing_summary_file,
outputs = self.stderr) outputs = self.stderr)
if self.barcoded == "yes" :
self.add_shell_execution(self.get_exec_path("Rscript") + " " + self.get_exec_path("graphe_summary_barcode") +' -f '+ '$1' +' --out ' + self.output_directory + " -p "+self.sample_name+ " 2> " +' $2',
cmd_format='{EXE} {IN} {OUT}' ,
map=False,
inputs = self.sequencing_summary_file,
outputs = self.stderr_barcoded)
#self.add_shell_execution('tar -czf '+ self.output_directory +'/'+'Run_stats_archive.tar.gz -C '+ self.output_directory +' plot_stats.txt -C '+ self.output_directory +' *.png ', cmd_format='{EXE} {OUT}',
# map=False, outputs = self.archive_name)
logging.getLogger("jflow").debug("End Run_stats.process! ") logging.getLogger("jflow").debug("End Run_stats.process! ")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment