Commit 737d141d authored by Audrey Gibert's avatar Audrey Gibert

[demultiplexont.py]

Script was incorrect for long time processing seqkit
-> Now : postprocess waits the end of job in the process part
parent 5ced54a6
......@@ -36,6 +36,7 @@ class Demultiplex_ONT (Analysis):
#self.add_parameter("barcode_file", "Name of the barcode file", default=barcode_file, required=False , file_format = 'str')
self.add_parameter("archive_name", "Name of the archive", default=archivename, type='str')
#self.add_parameter( "run_name", "The name of the run (from total fastq file)", pattern='{basename_woext}', items=self.fastq_files, file_format = "fastq")
self.add_output_file("output_file", "output_file", filename=os.path.join(self.output_directory, "DemultiplexONT.output") )
def define_analysis(self):
self.name = "DemultiplexONT"
......@@ -43,7 +44,7 @@ class Demultiplex_ONT (Analysis):
self.software = "Seqkit"
def __parse_stat_file (self, stat_file):
logging.getLogger("jflow").debug("Begin DemultiplexONT.__parse_stat_file! file =",stat_file)
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.__parse_stat_file begins! file = "+str(stat_file))
"""
Parse the stat file
@param stat_file : the stdout porechop
......@@ -61,18 +62,16 @@ class Demultiplex_ONT (Analysis):
# dico_stats[SampleName][parameterName] = Value
dico_stats = {}
for sample_number in range(len(list_stats)):
print("Le numero de sample est : "+str(sample_number))
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.__parse_stat_file exe! Sample number : "+str(sample_number) )
dico_stats[list_stats[sample_number][0]] = {}
for parameter_idx in range(1, len(header)):
print(" Le numero de parametre est : " + str(parameter_idx))
print(" Dans : dico [list_stats[spl_nb][0]] [header[param_idx]]")
print(" on va mettre: list_stats[spl_nb][param_idx]")
dico_stats[list_stats[sample_number][0]][header[parameter_idx]] = list_stats[sample_number][parameter_idx]
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.__parse_stat_file ends! file = "+stat_file)
return dico_stats
def post_process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.post_process! ont_qc")
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.post_process begins!")
# Create dictionary : key = file name or prefix, value = files path
stats_dico = self.__parse_stat_file(os.path.join(self.output_directory, "DemultiplexONT.output"))
......@@ -83,26 +82,28 @@ class Demultiplex_ONT (Analysis):
# Add stats metrics
for fastq in stats_dico:
if re.search(".fastq.gz",fastq):
fastq_name = os.path.basename(fastq).replace(".fastq.gz","").split('_')[-1]
else :
fastq_name = os.path.splitext(os.path.basename(fastq))[0].split('_')[-1]
if re.search("undetermined.", fastq):
fastq_name = os.path.basename(fastq).split('.')[0].split('_')[-1]
else:
fastq_name = "_".join(os.path.basename(fastq).split('.')[0].split('_')[-2:])
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.fast_name = "+fastq_name)
for stat in stats_dico[fastq]:
self._add_result_element("stats_metrics", stat, stats_dico[fastq][stat],fastq_name)
logging.getLogger("jflow").debug("End DemultiplexONT.post_process! ")
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.post_process ends!")
def get_version(self):
shell_script = self.get_exec_path("seqkit") + " version | head -n1"
logging.getLogger("jflow").debug("DemultiplexONT.get_version ! shell_script " + str(shell_script))
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.get_version ! shell_script = " + str(shell_script))
cmd = ["sh","-c",shell_script]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
logging.getLogger("jflow").debug("DemultiplexONT.get_version !" + str(stderr))
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.get_version result: " + str(stderr))
return stdout
def process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.process! ont_qc")
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.process begins!")
iter = 1
str_input = ""
......@@ -122,5 +123,5 @@ class Demultiplex_ONT (Analysis):
#archive = self.output_directory + '/' + self.archive_name + '.tar.gz'
#self.add_shell_execution('tar -czf $1 ' + self.output_directory + '/' + '*_trim.fastq ', cmd_format='{EXE} {OUT}', map=False, outputs = archive)
logging.getLogger("jflow").debug("End Seqkit.process! ")
logging.getLogger("demultiplexont.process").debug("DemultiplexONT.process ends!")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment