Commit 5687a776 authored by Romain Therville's avatar Romain Therville 🐭

Merge branch 'nG6_ont_demultiplex' into 'master'

[demultiplexont.py]

See merge request !62
parents b1716325 d1b301f0
......@@ -111,10 +111,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
{assign var="mapped" value=" "|explode:$sample_results["default"].mapped}
{assign var="properlypaired" value=" "|explode:$sample_results["default"].properlypaired}
{assign var="singletons" value=" "|explode:$sample_results["default"].singletons}
<td>{$mapped[0]|number_format:0:' ':' '} {$mapped[1]}</td>
<td>{$properlypaired[0]|number_format:0:' ':' '} {$properlypaired[1]}</td>
<td>{$mapped[0]|number_format:0:' ':' '} {$mapped[1]}{$mapped[2]}</td>
<td>{$properlypaired[0]|number_format:0:' ':' '} {$properlypaired[1]}{$properlypaired[2]}</td>
<td>{$sample_results["default"].matemapped|number_format:0:' ':' '}</td>
<td>{$singletons[0]|number_format:0:' ':' '} {$singletons[1]}</td>
<td>{$singletons[0]|number_format:0:' ':' '} {$singletons[1]}{$singletons[2]}</td>
<td>{$sample_results["default"].mapch1|number_format:0:' ':' '}</td>
<td>{$sample_results["default"].supplementary|number_format:0:' ':' '}</td>
{if !isset($sample_results["default"]["pairOpticalDuplicates"]) }
......@@ -122,7 +122,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
<td>-</td>
<td>-</td>
{else}
<td>{($sample_results["default"].pairDuplicates*2 + $sample_results["default"].unpairDuplicates)|number_format:0:' ':' '} ({$sample_results["default"].percentDuplication}*100%)</td>
<td>{($sample_results["default"].pairDuplicates*2 + $sample_results["default"].unpairDuplicates)|number_format:0:' ':' '} ({$sample_results["default"].percentDuplication*100}%)</td>
<td>{$sample_results["default"].pairDuplicates|number_format:0:' ':' '}</td>
<td>{$sample_results["default"].pairOpticalDuplicates|number_format:0:' ':' '}</td>
{/if}
......
......@@ -54,20 +54,17 @@ class OntQualityCheck (NG6Workflow):
logging.getLogger("jflow").debug("OntQualityCheck.process! sample_name : "+str(sample_names))
logging.getLogger("jflow").debug("OntQualityCheck.process! summary_file : "+str(self.summary_file))
### check for log file
# get current path
cmd = [self.get_exec_path("pwd")]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
exec_path = stdout.decode("utf-8").rsplit()[0]
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile pwd = " + str(exec_path))
### check for log file self.fastq_files
# get the dirname of reads folder, logs are in this dirname/jflow/
logpath = os.path.dirname( self.get_all_reads()[0] ) + "/jflow/"
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile logpath = " + logpath)
# find .log files
for file in glob(exec_path+"/*.log"):
for file in glob(logpath+"/*.log"):
self.log_files.append(file)
for file in glob(exec_path+"/*.pdf"):
for file in glob(logpath+"/*.pdf"):
self.log_files.append(file)
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile self.log_files = " + ",".join(self.log_files))
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile exiting")
# add logs
if len(self.log_files) > 0 :
......@@ -81,4 +78,7 @@ class OntQualityCheck (NG6Workflow):
trim_porechop = self.add_component("Trim_porechop", [self.get_all_reads() , "discard_middle"])
if self.fast5dir != None:
fast5archive = self.add_component("Fast5archive", [self.fast5dir, "fast5archive.tar"])
logging.getLogger("jflow").debug("OntQualityCheck._process.logfile exiting")
......@@ -36,6 +36,7 @@ class Demultiplex_ONT (Analysis):
#self.add_parameter("barcode_file", "Name of the barcode file", default=barcode_file, required=False , file_format = 'str')
self.add_parameter("archive_name", "Name of the archive", default=archivename, type='str')
#self.add_parameter( "run_name", "The name of the run (from total fastq file)", pattern='{basename_woext}', items=self.fastq_files, file_format = "fastq")
self.add_output_file("output_file", "output_file", filename=os.path.join(self.output_directory, "DemultiplexONT.output") )
def define_analysis(self):
self.name = "DemultiplexONT"
......@@ -43,7 +44,7 @@ class Demultiplex_ONT (Analysis):
self.software = "Seqkit"
def __parse_stat_file (self, stat_file):
logging.getLogger("jflow").debug("Begin DemultiplexONT.__parse_stat_file! file =",stat_file)
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.__parse_stat_file begins! file = "+str(stat_file))
"""
Parse the stat file
@param stat_file : the stdout porechop
......@@ -61,18 +62,16 @@ class Demultiplex_ONT (Analysis):
# dico_stats[SampleName][parameterName] = Value
dico_stats = {}
for sample_number in range(len(list_stats)):
print("Le numero de sample est : "+str(sample_number))
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.__parse_stat_file exe! Sample number : "+str(sample_number) )
dico_stats[list_stats[sample_number][0]] = {}
for parameter_idx in range(1, len(header)):
print(" Le numero de parametre est : " + str(parameter_idx))
print(" Dans : dico [list_stats[spl_nb][0]] [header[param_idx]]")
print(" on va mettre: list_stats[spl_nb][param_idx]")
dico_stats[list_stats[sample_number][0]][header[parameter_idx]] = list_stats[sample_number][parameter_idx]
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.__parse_stat_file ends! file = "+stat_file)
return dico_stats
def post_process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.post_process! ont_qc")
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.post_process begins!")
# Create dictionary : key = file name or prefix, value = files path
stats_dico = self.__parse_stat_file(os.path.join(self.output_directory, "DemultiplexONT.output"))
......@@ -83,26 +82,28 @@ class Demultiplex_ONT (Analysis):
# Add stats metrics
for fastq in stats_dico:
if re.search(".fastq.gz",fastq):
fastq_name = os.path.basename(fastq).replace(".fastq.gz","").split('_')[-1]
else :
fastq_name = os.path.splitext(os.path.basename(fastq))[0].split('_')[-1]
if re.search("undetermined.", fastq):
fastq_name = os.path.basename(fastq).split('.')[0].split('_')[-1]
else:
fastq_name = "_".join(os.path.basename(fastq).split('.')[0].split('_')[-2:])
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.fast_name = "+fastq_name)
for stat in stats_dico[fastq]:
self._add_result_element("stats_metrics", stat, stats_dico[fastq][stat],fastq_name)
logging.getLogger("jflow").debug("End DemultiplexONT.post_process! ")
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.post_process ends!")
def get_version(self):
shell_script = self.get_exec_path("seqkit") + " version | head -n1"
logging.getLogger("jflow").debug("DemultiplexONT.get_version ! shell_script " + str(shell_script))
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.get_version ! shell_script = " + str(shell_script))
cmd = ["sh","-c",shell_script]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
logging.getLogger("jflow").debug("DemultiplexONT.get_version !" + str(stderr))
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.get_version result: " + str(stderr))
return stdout
def process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.process! ont_qc")
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.process begins!")
iter = 1
str_input = ""
......@@ -122,5 +123,5 @@ class Demultiplex_ONT (Analysis):
#archive = self.output_directory + '/' + self.archive_name + '.tar.gz'
#self.add_shell_execution('tar -czf $1 ' + self.output_directory + '/' + '*_trim.fastq ', cmd_format='{EXE} {OUT}', map=False, outputs = archive)
logging.getLogger("jflow").debug("End Seqkit.process! ")
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.process ends!")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment