Commit e52960af authored by Gerald Salin's avatar Gerald Salin

Merge branch 'count_files' into 'master'

Count reads and bases too early

See merge request !18
parents 82d05c7c b143425b
...@@ -52,6 +52,7 @@ def add_analysis(parent_id, analysis_cfg, *input_files): ...@@ -52,6 +52,7 @@ def add_analysis(parent_id, analysis_cfg, *input_files):
import pickle import pickle
import logging import logging
logging.getLogger("Analysis").debug("Start. Imports went good.") logging.getLogger("Analysis").debug("Start. Imports went good.")
logging.getLogger("Analysis").debug("Start. working for analysis " + analysis_cfg)
# get inputs from parameters # get inputs from parameters
analysis_serialized_path = input_files[0] analysis_serialized_path = input_files[0]
...@@ -366,6 +367,7 @@ class Analysis (Component): ...@@ -366,6 +367,7 @@ class Analysis (Component):
def _create_and_archive(self, files, archive_name=None, prefix="dir"): def _create_and_archive(self, files, archive_name=None, prefix="dir"):
logging.getLogger("Analysis").debug("_create_and_archive entering")
""" """
return the web path to the archive files return the web path to the archive files
If there are samples, datas will be organised by samples If there are samples, datas will be organised by samples
...@@ -391,13 +393,14 @@ class Analysis (Component): ...@@ -391,13 +393,14 @@ class Analysis (Component):
if len(file_basenames) != len(set(file_basenames)) : if len(file_basenames) != len(set(file_basenames)) :
try : # If there are samples try : # If there are samples
if self.run : if self.run :
samples = self.run.get_samples() samples = self.run.get_samples()
else : else :
run_id = self.__get_run_id() run_id = self.__get_run_id()
my_run = Run.get_from_id(run_id) my_run = Run.get_from_id(run_id)
samples = my_run.get_samples() samples = my_run.get_samples()
logging.getLogger("Analysis").debug("_create_and_archive with samples")
gfiles = {} gfiles = {}
ungrouped_files = [] ungrouped_files = []
what_left = [] what_left = []
...@@ -471,14 +474,20 @@ class Analysis (Component): ...@@ -471,14 +474,20 @@ class Analysis (Component):
for file in what_left: for file in what_left:
copyfile(file, os.path.join(tmp_dir, os.path.basename(file))) copyfile(file, os.path.join(tmp_dir, os.path.basename(file)))
logging.getLogger("Analysis").debug("_create_and_archive before tarf")
tarf = Utils.tar_dir(tmp_dir, os.path.join(self.__get_work_directory(), archive_name)) tarf = Utils.tar_dir(tmp_dir, os.path.join(self.__get_work_directory(), archive_name))
logging.getLogger("Analysis").debug("_create_and_archive before targzf")
targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False) targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False)
# Then delete temporary files # Then delete temporary files
logging.getLogger("Analysis").debug("_create_and_archive before os.remove(tarf)")
os.remove(tarf) os.remove(tarf)
logging.getLogger("Analysis").debug("_create_and_archive before rmtree(tmp_dir)")
rmtree(tmp_dir) rmtree(tmp_dir)
logging.getLogger("Analysis").debug("_create_and_archive before return " + os.path.join(self.directory, os.path.basename(targzf)))
return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf)) return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf))
except : except :
logging.getLogger("Analysis").debug("_create_and_archive in execpt, without samples?")
gfiles = {} gfiles = {}
# Group files by folders # Group files by folders
for file in files: for file in files:
...@@ -499,19 +508,29 @@ class Analysis (Component): ...@@ -499,19 +508,29 @@ class Analysis (Component):
os.makedirs(dir) os.makedirs(dir)
for file in gfiles[dir_name]: for file in gfiles[dir_name]:
copyfile(file, os.path.join(dir, os.path.basename(file))) copyfile(file, os.path.join(dir, os.path.basename(file)))
logging.getLogger("Analysis").debug("_create_and_archive before tarf")
tarf = Utils.tar_dir(tmp_dir, os.path.join(self.__get_work_directory(), archive_name)) tarf = Utils.tar_dir(tmp_dir, os.path.join(self.__get_work_directory(), archive_name))
logging.getLogger("Analysis").debug("_create_and_archive before targzf")
targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False) targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False)
# Then delete temporary files # Then delete temporary files
logging.getLogger("Analysis").debug("_create_and_archive before os.remove(tarf)")
os.remove(tarf) os.remove(tarf)
logging.getLogger("Analysis").debug("_create_and_archive before rmtree(tmp_dir)")
rmtree(tmp_dir) rmtree(tmp_dir)
logging.getLogger("Analysis").debug("_create_and_archive before return " + os.path.join(self.directory, os.path.basename(targzf)))
return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf)) return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf))
else : else :
logging.getLogger("Analysis").debug("_create_and_archive, length differs")
logging.getLogger("Analysis").debug("_create_and_archive before tarf")
tarf = Utils.tar_files(files, os.path.join(self.__get_work_directory(), archive_name)) tarf = Utils.tar_files(files, os.path.join(self.__get_work_directory(), archive_name))
logging.getLogger("Analysis").debug("_create_and_archive before targzf")
targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False) targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False)
# Then delete temporary files # Then delete temporary files
logging.getLogger("Analysis").debug("_create_and_archive before os.remove(tarf)")
os.remove(tarf) os.remove(tarf)
logging.getLogger("Analysis").debug("_create_and_archive before return " + os.path.join(self.directory, os.path.basename(targzf)))
return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf)) return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf))
......
...@@ -63,17 +63,21 @@ class t3MySQLdb(object): ...@@ -63,17 +63,21 @@ class t3MySQLdb(object):
# opened connection # opened connection
# #
if dictc : try:
return pymysql.connect(host = self.host, if dictc :
return pymysql.connect(host = self.host,
user = self.user, user = self.user,
password = self.passwd, password = self.passwd,
db = self.db, db = self.db,
cursorclass = pymysql.cursors.DictCursor) cursorclass = pymysql.cursors.DictCursor)
return pymysql.connect(host = self.host, return pymysql.connect(host = self.host,
user = self.user, user = self.user,
password = self.passwd, password = self.passwd,
db = self.db) db = self.db)
except Exception as e:
logging.getLogger("t3MySQLdb.get_connection").error("Exception while connecting to the database " + self.db + " on the server " + self.host + " with user "+self.user + ":" +str(e))
raise e
def execute(self,sql, commit = False, dictc = False, exc_msg = None): def execute(self,sql, commit = False, dictc = False, exc_msg = None):
""" """
...@@ -83,7 +87,9 @@ class t3MySQLdb(object): ...@@ -83,7 +87,9 @@ class t3MySQLdb(object):
@param dictc: use a dictionary cursor to get dictionary results @param dictc: use a dictionary cursor to get dictionary results
@param exc_msg: exception message, to be passed to the raised exception @param exc_msg: exception message, to be passed to the raised exception
""" """
logging.getLogger("t3MySQLdb.execute").debug("Entering execute")
connection = self.get_connection(dictc) connection = self.get_connection(dictc)
logging.getLogger("t3MySQLdb.execute").debug("after connection " + str(connection))
try: try:
id , rows = None, None id , rows = None, None
with connection.cursor() as cursor: with connection.cursor() as cursor:
...@@ -93,8 +99,10 @@ class t3MySQLdb(object): ...@@ -93,8 +99,10 @@ class t3MySQLdb(object):
rows = cursor.fetchall() rows = cursor.fetchall()
if commit : if commit :
connection.commit() connection.commit()
logging.getLogger("t3MySQLdb.execute").debug("before returning")
return _QResult( rowcount = rowcount, rows = rows, lastrowid = id) return _QResult( rowcount = rowcount, rows = rows, lastrowid = id)
except Exception as e: except Exception as e:
logging.getLogger("t3MySQLdb.execute").error("Exception" + str(e))
if exc_msg : raise Exception(e, exc_msg) if exc_msg : raise Exception(e, exc_msg)
else : raise e else : raise e
finally: finally:
...@@ -223,13 +231,17 @@ class t3MySQLdb(object): ...@@ -223,13 +231,17 @@ class t3MySQLdb(object):
@param project_id : the project id to select @param project_id : the project id to select
@return: [name, description] @return: [name, description]
""" """
logging.getLogger("t3MySQLdb.select_project").debug("Selecting project id=" + str(project_id)) try:
sql = "SELECT name, description FROM tx_nG6_project WHERE tx_nG6_project.uid ='" + str(project_id) + "'" logging.getLogger("t3MySQLdb.select_project").debug("Selecting project id=" + str(project_id))
logging.getLogger("t3MySQLdb.select_project").debug(sql) sql = "SELECT name, description FROM tx_nG6_project WHERE tx_nG6_project.uid ='" + str(project_id) + "'"
qresult = self.execute(sql) logging.getLogger("t3MySQLdb.select_project").debug(sql)
res = qresult.rows[0] qresult = self.execute(sql)
logging.getLogger("t3MySQLdb.select_project").debug("Returning [" + str([res[0], res[1]]) + "]") res = qresult.rows[0]
return [res[0], res[1]] logging.getLogger("t3MySQLdb.select_project").debug("Returning [" + str([res[0], res[1]]) + "]")
return [res[0], res[1]]
except Exception as e:
logging.getLogger("t3MySQLdb.select_project").error("Exception while getting the project : " +str(e))
raise e
def select_project_from_name(self, name): def select_project_from_name(self, name):
""" """
...@@ -239,10 +251,11 @@ class t3MySQLdb(object): ...@@ -239,10 +251,11 @@ class t3MySQLdb(object):
""" """
logging.getLogger("t3MySQLdb.select_project_from_name").debug("Selecting project name=" + str(name)) logging.getLogger("t3MySQLdb.select_project_from_name").debug("Selecting project name=" + str(name))
sql = "SELECT uid, description FROM tx_nG6_project WHERE tx_nG6_project.name ='" + str(name) + "'" sql = "SELECT uid, description FROM tx_nG6_project WHERE tx_nG6_project.name ='" + str(name) + "'"
logging.getLogger("t3MySQLdb.select_project").debug(sql) logging.getLogger("t3MySQLdb.select_project_from_name").debug(sql)
qresult = self.execute(sql) qresult = self.execute(sql)
logging.getLogger("t3MySQLdb.select_project_from_name").debug("before getting qresult")
res = qresult.rows[0] res = qresult.rows[0]
logging.getLogger("t3MySQLdb.select_project").debug("Returning [" + str([res[0], res[1]]) + "]") logging.getLogger("t3MySQLdb.select_project_from_name").debug("Returning [" + str([res[0], res[1]]) + "]")
return [res[0], res[1]] return [res[0], res[1]]
def get_project_analysis_ids(self, project_id): def get_project_analysis_ids(self, project_id):
......
...@@ -56,7 +56,7 @@ def md5sum(md5_file_out, files_to_md5sum): ...@@ -56,7 +56,7 @@ def md5sum(md5_file_out, files_to_md5sum):
stdout,stderr = p.communicate() stdout,stderr = p.communicate()
logging.getLogger("AddRawFiles").debug("md5sum. finished") logging.getLogger("AddRawFiles").debug("md5sum. finished")
def add_stats_to_run (run_dump_path,file): def add_stats_to_run (run_dump_path, file):
import pickle import pickle
import logging import logging
total_nb_seq = 0 total_nb_seq = 0
...@@ -164,8 +164,8 @@ class AddRawFiles (Component): ...@@ -164,8 +164,8 @@ class AddRawFiles (Component):
logging.getLogger("AddRawFiles").debug("process. Before self.add_stats_to_run(extract_stats_from_seq_file") logging.getLogger("AddRawFiles").debug("process. Before self.add_stats_to_run(extract_stats_from_seq_file")
logging.getLogger("AddRawFiles").debug("process. Dirname of files_to_save_stats[0] : "+ os.path.dirname(files_to_save_stats[0])) logging.getLogger("AddRawFiles").debug("process. Dirname of files_to_save_stats[0] : "+ os.path.dirname(files_to_save_stats[0]))
#Add number of reads and total length in base for each seq file and add these data to the run in the database #Add number of reads and total length in base for each seq file and add these data to the run in the database
self.add_python_execution(add_stats_to_run, cmd_format='{EXE} {ARG} {IN} > {OUT}', map=False, self.add_python_execution(add_stats_to_run, cmd_format='{EXE} {ARG} > {OUT}', map=False,
outputs = self.stdout, inputs = files_to_save_stats[0], includes=self.files_to_save, arguments=[run_dump_path]) outputs = self.stdout, includes = files_to_save_stats, arguments=[run_dump_path, files_to_save_stats[0]])
#archive the files in the work folder of the run to be rsynced at the end #archive the files in the work folder of the run to be rsynced at the end
logging.getLogger("AddRawFiles").debug("process. Before copying/archiving files with compression = " + self.compression ) logging.getLogger("AddRawFiles").debug("process. Before copying/archiving files with compression = " + self.compression )
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
import os import os
import re import re
import logging
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
from jflow.utils import get_argument_pattern from jflow.utils import get_argument_pattern
...@@ -101,9 +101,11 @@ class FastQC (Analysis): ...@@ -101,9 +101,11 @@ class FastQC (Analysis):
return stats return stats
def post_process(self): def post_process(self):
logging.getLogger("FastQC").debug("post_process, entering")
results_files = [] results_files = []
metrics = [] metrics = []
for file in os.listdir(self.output_directory): for file in os.listdir(self.output_directory):
logging.getLogger("FastQC").debug("post_process, working on " + file)
full_file_path = os.path.join(self.output_directory, file) full_file_path = os.path.join(self.output_directory, file)
if file.endswith(".zip"): if file.endswith(".zip"):
results_files.append(full_file_path) results_files.append(full_file_path)
...@@ -176,10 +178,9 @@ class FastQC (Analysis): ...@@ -176,10 +178,9 @@ class FastQC (Analysis):
for metric in metrics : for metric in metrics :
self._add_result_element("metrics", "metric", metric, metric) self._add_result_element("metrics", "metric", metric, metric)
# Finaly create and add the archive to the analysis # Finaly create and add the archive to the analysis
print(results_files) logging.getLogger("FastQC").debug("post_process, results_files = " + str(results_files))
print("archive_name " , self.archive_name) logging.getLogger("FastQC").debug("post_process, archive_name = " + self.archive_name)
self.archive_name = self.archive_name.replace("/","-") self.archive_name = self.archive_name.replace("/","-")
print("archive_name " , self.archive_name)
self._create_and_archive(results_files, self.archive_name) self._create_and_archive(results_files, self.archive_name)
def get_version(self): def get_version(self):
......
...@@ -49,20 +49,26 @@ class FastqIlluminaFilter (Analysis): ...@@ -49,20 +49,26 @@ class FastqIlluminaFilter (Analysis):
def post_process(self): def post_process(self):
logging.getLogger("FastqIlluminaFilter").debug("post_process entering")
# Create dictionary : key = file name or prefix, value = files path # Create dictionary : key = file name or prefix, value = files path
files = {} files = {}
if self.group_prefix: if self.group_prefix:
logging.getLogger("FastqIlluminaFilter").debug("post_process self.group_prefix is true")
files = Utils.get_filepath_by_prefix( self.stdout, self.group_prefix ) files = Utils.get_filepath_by_prefix( self.stdout, self.group_prefix )
else: else:
logging.getLogger("FastqIlluminaFilter").debug("post_process self.group_prefix is false")
for file in self.stdout: for file in self.stdout:
logging.getLogger("FastqIlluminaFilter").debug("post_process self.group_prefix is false, work on " + file)
file_name = os.path.splitext(os.path.basename(file))[0] file_name = os.path.splitext(os.path.basename(file))[0]
files[file_name] = [file] files[file_name] = [file]
# Merge analyses stat # Merge analyses stat
for sample_file in list(files.keys()): for sample_file in list(files.keys()):
logging.getLogger("FastqIlluminaFilter").debug("post_process, work on " + sample_file)
tot_input = 0 tot_input = 0
tot_output = 0 tot_output = 0
for file in files[sample_file]: for file in files[sample_file]:
logging.getLogger("FastqIlluminaFilter").debug("post_process, work on " + file)
[input, output] = self.__parse_stat_file(file) [input, output] = self.__parse_stat_file(file)
tot_input += int(input) tot_input += int(input)
tot_output += int(output) tot_output += int(output)
...@@ -78,6 +84,7 @@ class FastqIlluminaFilter (Analysis): ...@@ -78,6 +84,7 @@ class FastqIlluminaFilter (Analysis):
def __parse_stat_file (self, stat_file): def __parse_stat_file (self, stat_file):
logging.getLogger("FastqIlluminaFilter").debug("__parse_stat_file, entering")
""" """
Parse the stat file Parse the stat file
@param stat_file : the fastq_illumina_filter summary file @param stat_file : the fastq_illumina_filter summary file
...@@ -95,6 +102,7 @@ class FastqIlluminaFilter (Analysis): ...@@ -95,6 +102,7 @@ class FastqIlluminaFilter (Analysis):
if output_reg: if output_reg:
output = output_reg.group(1).replace(",", "") output = output_reg.group(1).replace(",", "")
logging.getLogger("FastqIlluminaFilter").debug("__parse_stat_file, returning")
return [input, output] return [input, output]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment