RAPPEL : Opération de maintenance > ForgeMIA indisponible le 20 Janvier entre 7h et 12h

Commit 7324bc82 authored by Maxime Manno's avatar Maxime Manno 🍜
Browse files

Update files for ONT demultiplex

parent 9efb4b49
......@@ -71,12 +71,12 @@ class OntQualityCheck (NG6Workflow):
# add logs
if len(self.log_files) > 0 :
add_log = self.add_component("BasicAnalysis", [self.log_files,"Log Files","Log files generated during primary analysis","-","-","-","gz", "","log.gz"])
add_log = self.add_component("BasicAnalysis", [self.log_files,"Log Files","Log files generated during primary analysis","-","-","-","none", "","log.gz"])
addrawfiles = self.add_component("AddRawFiles", [self.runobj, self.get_all_reads(), self.compression])
ontstat = self.add_component("Run_stats", [self.summary_file, sample_names[0]])
if (self.barcoded != None) or (self.barcoded != "no") :
demultiplexont = self.add_component("Demultiplex_ONT", [self.get_all_reads() , self.barcoded])
if (self.barcoded == "yes"):
demultiplexont = self.add_component("Demultiplex_ONT", [self.get_all_reads()])
if self.trimming == "yes":
trim_porechop = self.add_component("Trim_porechop", [self.get_all_reads() , "discard_middle"])
if self.fast5dir != None:
......
......@@ -31,11 +31,11 @@ class Demultiplex_ONT (Analysis):
def __init__(self, args={}, id=None, function= "process"):
Analysis.__init__(self, args, id, function)
def define_parameters(self, fastq_files, barcode_file, archivename="DemultiplexONT_archive.tar"):
def define_parameters(self, fastq_files, archivename="DemultiplexONT_archive.tar"):
self.add_input_file_list( "fastq_files", "fastq_files", default=fastq_files, required=True, file_format = 'fastq')
self.add_parameter("barcode_file", "Name of the barcode file", default=barcode_file, required=False , file_format = 'str')
#self.add_parameter("barcode_file", "Name of the barcode file", default=barcode_file, required=False , file_format = 'str')
self.add_parameter("archive_name", "Name of the archive", default=archivename, type='str')
self.add_parameter( "run_name", "The name of the run (from total fastq file)", pattern='{basename_woext}', items=self.fastq_files, file_format = "fastq")
#self.add_parameter( "run_name", "The name of the run (from total fastq file)", pattern='{basename_woext}', items=self.fastq_files, file_format = "fastq")
def define_analysis(self):
self.name = "DemultiplexONT"
......@@ -53,7 +53,7 @@ class Demultiplex_ONT (Analysis):
list_stats= []
with open(stat_file, "r") as f_stat :
for line in f_stat.readlines():
list_stat.append(line.split())
list_stats.append(line.split())
# Registering file's header into a list
header = list_stats.pop(0)
......@@ -74,47 +74,26 @@ class Demultiplex_ONT (Analysis):
def post_process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.post_process! ont_qc")
# Create dictionary : key = file name or prefix, value = files path
results_files = []
stats_dico = self.__parse_stat_file(os.path.join(self.output_directory, "DemultiplexONT.output"))
# add header of stats
group = "statsporechop"
self._add_result_element("metrics", "headers", ','.join(["read_trim_start", "read_total_start", "bp_removed_start", "read_trim_end", "read_total_end", "bp_removed_end"]), group)
print(os.listdir(self.output_directory))
for file in os.listdir(self.output_directory):
full_file_path = os.path.join(self.output_directory, file)
logging.getLogger("jflow").debug("Trimporechop.post_process : full_file_path "+full_file_path)
if file.endswith(".fastq"):
logging.getLogger("jflow").debug("Trimporechop.post_process match .fastq : full_file_path "+full_file_path)
results_files.append(full_file_path)
elif file.endswith(".stdout"):
logging.getLogger("jflow").debug("Trimporechop.post_process match .stdout: full_file_path "+full_file_path)
results_files.append(full_file_path)
filename = os.path.basename(file).split(".stdout")[0]
resultlist = self.__parse_stat_file(full_file_path)
read_trim_start = resultlist[0]
read_total_start = resultlist[1]
bp_removed_start = resultlist[2]
read_trim_end = resultlist[3]
read_total_end = resultlist[4]
bp_removed_end = resultlist[5]
#add stats for each fastq file
self._add_result_element("ont_sample", "read_trim_start", read_trim_start,filename)
self._add_result_element("ont_sample", "read_total_start", read_total_start,filename)
self._add_result_element("ont_sample", "bp_removed_start", bp_removed_start,filename)
self._add_result_element("ont_sample", "read_trim_end", read_trim_end,filename)
self._add_result_element("ont_sample", "read_total_end", read_total_end,filename)
self._add_result_element("ont_sample", "bp_removed_end", bp_removed_end,filename)
#Finaly create and add the archive to the analysis
#self._create_and_archive(results_files,self.archive_name)
self._archive_files(results_files, "gz")
stats_names = ["format",'num_seqs','sum_len','avg_len','min_len','max_len',"N50"]
#'Q2', , , 'N50', , , 'format', 'Q1', 'Q3', 'type', 'sum_gap', 'Q30(%)', , 'Q20(%)'
self._add_result_element("metrics", "headers", ','.join(stats_names),"stats_names")
# Add stats metrics
for fastq in stats_dico:
if re.search(".fastq.gz",fastq):
fastq_name = os.path.basename(fastq).replace(".fastq.gz","").split('_')[-1]
else :
fastq_name = os.path.splitext(os.path.basename(fastq))[0].split('_')[-1]
for stat in stats_dico[fastq]:
self._add_result_element("stats_metrics", stat, stats_dico[fastq][stat],fastq_name)
logging.getLogger("jflow").debug("End DemultiplexONT.post_process! ")
def get_version(self):
shell_script = self.get_exec_path("seqkit") + " --version"
shell_script = self.get_exec_path("seqkit") + " version | head -n1"
logging.getLogger("jflow").debug("DemultiplexONT.get_version ! shell_script " + str(shell_script))
cmd = ["sh","-c",shell_script]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
......@@ -125,15 +104,23 @@ class Demultiplex_ONT (Analysis):
def process(self):
logging.getLogger("jflow").debug("Begin DemultiplexONT.process! ont_qc")
iter = 1
str_input = ""
str_output = ""
for fastq in self.fastq_files:
str_input = str_input + " $" + str(iter)
iter = iter + 1
str_output = " $"+ str(iter)
# Create cmd
self.add_shell_execution(self.get_exec_path("seqkit") +" " + self.options + " > ${" + str() + "}",
self.add_shell_execution(self.get_exec_path("seqkit") +" stats --all " + str_input + ">" + str_output,
cmd_format='{EXE} {IN} {OUT}' ,
map=False,
inputs = self.fastq_files)
inputs = [self.fastq_files],
outputs = os.path.join(self.output_directory, "DemultiplexONT.output"))
#archive = self.output_directory + '/' + self.archive_name + '.tar.gz'
#self.add_shell_execution('tar -czf $1 ' + self.output_directory + '/' + '*_trim.fastq ', cmd_format='{EXE} {OUT}', map=False, outputs = archive)
logging.getLogger("jflow").debug("End Trimporechop.process! ")
logging.getLogger("jflow").debug("End Seqkit.process! ")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment