Commit e52960af authored by Gerald Salin's avatar Gerald Salin

Merge branch 'count_files' into 'master'

Count reads and bases too early

See merge request !18
parents 82d05c7c b143425b
......@@ -52,6 +52,7 @@ def add_analysis(parent_id, analysis_cfg, *input_files):
import pickle
import logging
logging.getLogger("Analysis").debug("Start. Imports went good.")
logging.getLogger("Analysis").debug("Start. working for analysis " + analysis_cfg)
# get inputs from parameters
analysis_serialized_path = input_files[0]
......@@ -366,6 +367,7 @@ class Analysis (Component):
def _create_and_archive(self, files, archive_name=None, prefix="dir"):
logging.getLogger("Analysis").debug("_create_and_archive entering")
"""
return the web path to the archive files
If there are samples, datas will be organised by samples
......@@ -398,6 +400,7 @@ class Analysis (Component):
run_id = self.__get_run_id()
my_run = Run.get_from_id(run_id)
samples = my_run.get_samples()
logging.getLogger("Analysis").debug("_create_and_archive with samples")
gfiles = {}
ungrouped_files = []
what_left = []
......@@ -471,14 +474,20 @@ class Analysis (Component):
for file in what_left:
copyfile(file, os.path.join(tmp_dir, os.path.basename(file)))
logging.getLogger("Analysis").debug("_create_and_archive before tarf")
tarf = Utils.tar_dir(tmp_dir, os.path.join(self.__get_work_directory(), archive_name))
logging.getLogger("Analysis").debug("_create_and_archive before targzf")
targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False)
# Then delete temporary files
logging.getLogger("Analysis").debug("_create_and_archive before os.remove(tarf)")
os.remove(tarf)
logging.getLogger("Analysis").debug("_create_and_archive before rmtree(tmp_dir)")
rmtree(tmp_dir)
logging.getLogger("Analysis").debug("_create_and_archive before return " + os.path.join(self.directory, os.path.basename(targzf)))
return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf))
except :
logging.getLogger("Analysis").debug("_create_and_archive in execpt, without samples?")
gfiles = {}
# Group files by folders
for file in files:
......@@ -499,19 +508,29 @@ class Analysis (Component):
os.makedirs(dir)
for file in gfiles[dir_name]:
copyfile(file, os.path.join(dir, os.path.basename(file)))
logging.getLogger("Analysis").debug("_create_and_archive before tarf")
tarf = Utils.tar_dir(tmp_dir, os.path.join(self.__get_work_directory(), archive_name))
logging.getLogger("Analysis").debug("_create_and_archive before targzf")
targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False)
# Then delete temporary files
logging.getLogger("Analysis").debug("_create_and_archive before os.remove(tarf)")
os.remove(tarf)
logging.getLogger("Analysis").debug("_create_and_archive before rmtree(tmp_dir)")
rmtree(tmp_dir)
logging.getLogger("Analysis").debug("_create_and_archive before return " + os.path.join(self.directory, os.path.basename(targzf)))
return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf))
else :
logging.getLogger("Analysis").debug("_create_and_archive, length differs")
logging.getLogger("Analysis").debug("_create_and_archive before tarf")
tarf = Utils.tar_files(files, os.path.join(self.__get_work_directory(), archive_name))
logging.getLogger("Analysis").debug("_create_and_archive before targzf")
targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False)
# Then delete temporary files
logging.getLogger("Analysis").debug("_create_and_archive before os.remove(tarf)")
os.remove(tarf)
logging.getLogger("Analysis").debug("_create_and_archive before return " + os.path.join(self.directory, os.path.basename(targzf)))
return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf))
......
......@@ -63,6 +63,7 @@ class t3MySQLdb(object):
# opened connection
#
try:
if dictc :
return pymysql.connect(host = self.host,
user = self.user,
......@@ -74,6 +75,9 @@ class t3MySQLdb(object):
user = self.user,
password = self.passwd,
db = self.db)
except Exception as e:
logging.getLogger("t3MySQLdb.get_connection").error("Exception while connecting to the database " + self.db + " on the server " + self.host + " with user "+self.user + ":" +str(e))
raise e
def execute(self,sql, commit = False, dictc = False, exc_msg = None):
"""
......@@ -83,7 +87,9 @@ class t3MySQLdb(object):
@param dictc: use a dictionary cursor to get dictionary results
@param exc_msg: exception message, to be passed to the raised exception
"""
logging.getLogger("t3MySQLdb.execute").debug("Entering execute")
connection = self.get_connection(dictc)
logging.getLogger("t3MySQLdb.execute").debug("after connection " + str(connection))
try:
id , rows = None, None
with connection.cursor() as cursor:
......@@ -93,8 +99,10 @@ class t3MySQLdb(object):
rows = cursor.fetchall()
if commit :
connection.commit()
logging.getLogger("t3MySQLdb.execute").debug("before returning")
return _QResult( rowcount = rowcount, rows = rows, lastrowid = id)
except Exception as e:
logging.getLogger("t3MySQLdb.execute").error("Exception" + str(e))
if exc_msg : raise Exception(e, exc_msg)
else : raise e
finally:
......@@ -223,6 +231,7 @@ class t3MySQLdb(object):
@param project_id : the project id to select
@return: [name, description]
"""
try:
logging.getLogger("t3MySQLdb.select_project").debug("Selecting project id=" + str(project_id))
sql = "SELECT name, description FROM tx_nG6_project WHERE tx_nG6_project.uid ='" + str(project_id) + "'"
logging.getLogger("t3MySQLdb.select_project").debug(sql)
......@@ -230,6 +239,9 @@ class t3MySQLdb(object):
res = qresult.rows[0]
logging.getLogger("t3MySQLdb.select_project").debug("Returning [" + str([res[0], res[1]]) + "]")
return [res[0], res[1]]
except Exception as e:
logging.getLogger("t3MySQLdb.select_project").error("Exception while getting the project : " +str(e))
raise e
def select_project_from_name(self, name):
"""
......@@ -239,10 +251,11 @@ class t3MySQLdb(object):
"""
logging.getLogger("t3MySQLdb.select_project_from_name").debug("Selecting project name=" + str(name))
sql = "SELECT uid, description FROM tx_nG6_project WHERE tx_nG6_project.name ='" + str(name) + "'"
logging.getLogger("t3MySQLdb.select_project").debug(sql)
logging.getLogger("t3MySQLdb.select_project_from_name").debug(sql)
qresult = self.execute(sql)
logging.getLogger("t3MySQLdb.select_project_from_name").debug("before getting qresult")
res = qresult.rows[0]
logging.getLogger("t3MySQLdb.select_project").debug("Returning [" + str([res[0], res[1]]) + "]")
logging.getLogger("t3MySQLdb.select_project_from_name").debug("Returning [" + str([res[0], res[1]]) + "]")
return [res[0], res[1]]
def get_project_analysis_ids(self, project_id):
......
......@@ -56,7 +56,7 @@ def md5sum(md5_file_out, files_to_md5sum):
stdout,stderr = p.communicate()
logging.getLogger("AddRawFiles").debug("md5sum. finished")
def add_stats_to_run (run_dump_path,file):
def add_stats_to_run (run_dump_path, file):
import pickle
import logging
total_nb_seq = 0
......@@ -164,8 +164,8 @@ class AddRawFiles (Component):
logging.getLogger("AddRawFiles").debug("process. Before self.add_stats_to_run(extract_stats_from_seq_file")
logging.getLogger("AddRawFiles").debug("process. Dirname of files_to_save_stats[0] : "+ os.path.dirname(files_to_save_stats[0]))
#Add number of reads and total length in base for each seq file and add these data to the run in the database
self.add_python_execution(add_stats_to_run, cmd_format='{EXE} {ARG} {IN} > {OUT}', map=False,
outputs = self.stdout, inputs = files_to_save_stats[0], includes=self.files_to_save, arguments=[run_dump_path])
self.add_python_execution(add_stats_to_run, cmd_format='{EXE} {ARG} > {OUT}', map=False,
outputs = self.stdout, includes = files_to_save_stats, arguments=[run_dump_path, files_to_save_stats[0]])
#archive the files in the work folder of the run to be rsynced at the end
logging.getLogger("AddRawFiles").debug("process. Before copying/archiving files with compression = " + self.compression )
......
......@@ -17,7 +17,7 @@
import os
import re
import logging
from subprocess import Popen, PIPE
from jflow.utils import get_argument_pattern
......@@ -101,9 +101,11 @@ class FastQC (Analysis):
return stats
def post_process(self):
logging.getLogger("FastQC").debug("post_process, entering")
results_files = []
metrics = []
for file in os.listdir(self.output_directory):
logging.getLogger("FastQC").debug("post_process, working on " + file)
full_file_path = os.path.join(self.output_directory, file)
if file.endswith(".zip"):
results_files.append(full_file_path)
......@@ -176,10 +178,9 @@ class FastQC (Analysis):
for metric in metrics :
self._add_result_element("metrics", "metric", metric, metric)
# Finaly create and add the archive to the analysis
print(results_files)
print("archive_name " , self.archive_name)
logging.getLogger("FastQC").debug("post_process, results_files = " + str(results_files))
logging.getLogger("FastQC").debug("post_process, archive_name = " + self.archive_name)
self.archive_name = self.archive_name.replace("/","-")
print("archive_name " , self.archive_name)
self._create_and_archive(results_files, self.archive_name)
def get_version(self):
......
......@@ -49,20 +49,26 @@ class FastqIlluminaFilter (Analysis):
def post_process(self):
logging.getLogger("FastqIlluminaFilter").debug("post_process entering")
# Create dictionary : key = file name or prefix, value = files path
files = {}
if self.group_prefix:
logging.getLogger("FastqIlluminaFilter").debug("post_process self.group_prefix is true")
files = Utils.get_filepath_by_prefix( self.stdout, self.group_prefix )
else:
logging.getLogger("FastqIlluminaFilter").debug("post_process self.group_prefix is false")
for file in self.stdout:
logging.getLogger("FastqIlluminaFilter").debug("post_process self.group_prefix is false, work on " + file)
file_name = os.path.splitext(os.path.basename(file))[0]
files[file_name] = [file]
# Merge analyses stat
for sample_file in list(files.keys()):
logging.getLogger("FastqIlluminaFilter").debug("post_process, work on " + sample_file)
tot_input = 0
tot_output = 0
for file in files[sample_file]:
logging.getLogger("FastqIlluminaFilter").debug("post_process, work on " + file)
[input, output] = self.__parse_stat_file(file)
tot_input += int(input)
tot_output += int(output)
......@@ -78,6 +84,7 @@ class FastqIlluminaFilter (Analysis):
def __parse_stat_file (self, stat_file):
logging.getLogger("FastqIlluminaFilter").debug("__parse_stat_file, entering")
"""
Parse the stat file
@param stat_file : the fastq_illumina_filter summary file
......@@ -95,6 +102,7 @@ class FastqIlluminaFilter (Analysis):
if output_reg:
output = output_reg.group(1).replace(",", "")
logging.getLogger("FastqIlluminaFilter").debug("__parse_stat_file, returning")
return [input, output]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment