Commit 992a5b3b authored by Audrey Gibert's avatar Audrey Gibert
Browse files

This commit resolves :

issue #97 
issue #96 which merging demand has not been accepted yet
parent b8ff28c8
......@@ -21,11 +21,11 @@ import logging
from jflow.component import Component
def extract_stats_from_seq_file(input_file,output_file_stat):
def extract_stats_from_seq_file(input_file, output_file_stat):
import jflow.seqio as seqio
import logging
import os
logging.getLogger("AddRawFiles").debug("extract_stats_from_seq_files. Entering, working on "+input_file)
logging.getLogger("AddRawFiles").debug("extract_stats_from_seq_files. Entering, working on " + input_file)
nb_seq, full_size = 0, 0
try:
# Get nb_seq and full_size values
......@@ -41,25 +41,33 @@ def extract_stats_from_seq_file(input_file,output_file_stat):
logging.getLogger("AddRawFiles").debug("extract_stats_from_seq_files. finished")
def md5sum(md5_file_out, *files_to_md5sum):
def md5sum(md5_file_out, files_to_md5sum):
import jflow.seqio as seqio
import logging
import os
from subprocess import Popen, PIPE
logging.getLogger("AddRawFiles").debug("md5sum. entering")
logging.getLogger("AddRawFiles").debug("md5sum. files_to_md5sum in "+os.path.dirname(md5_file_out)+" = " + ",".join(files_to_md5sum))
p = Popen("md5sum " + " ".join(files_to_md5sum) +"|awk -F/ {'print $1,$NF'} > "+ md5_file_out ,shell=True,stdout=PIPE,stderr=PIPE,universal_newlines=True)
logging.getLogger("AddRawFiles").debug("md5sum. md5sum in this directory : "+os.path.dirname(md5_file_out))
logging.getLogger("AddRawFiles").debug("md5sum. Generating the md5sum.txt with this command : " +
"find " + os.path.dirname(files_to_md5sum) + " -maxdepth 1 -type f -not -name " + os.path.basename(md5_file_out) +
" -exec md5sum {} \; | awk -F/ {'print $1$NF'} > "+ md5_file_out)
p = Popen("find " + os.path.dirname(files_to_md5sum) + " -maxdepth 1 -type f -not -name " + os.path.basename(md5_file_out) + " -exec md5sum {} \;" +
"| awk -F/ {'print $1$NF'} > "+ md5_file_out, shell = True, stdout = PIPE, stderr = PIPE, universal_newlines = True)
stdout,stderr = p.communicate()
logging.getLogger("AddRawFiles").debug("md5sum. finished")
def add_stats_to_run (run_dump_path,*files):
def add_stats_to_run (run_dump_path,file):
import pickle
import logging
total_nb_seq = 0
total_size = 0
for curr_stat_file in files:
logging.getLogger("AddRawFiles").debug("add_stats_to_run. Content of " + curr_stat_file)
with open(curr_stat_file, 'r') as myfile:
logging.getLogger("AddRawFiles").debug("add_stats_to_run. Working on files of the directory containing this one : " + file)
file_list = [ f for f in os.listdir(os.path.dirname(file)) if f.endswith('.count')]
logging.getLogger("AddRawFiles").debug("add_stats_to_run. Length on the file_list : " + str(len(file_list)))
logging.getLogger("AddRawFiles").debug("add_stats_to_run. Files on the file_list : " + str(file_list))
for curr_stat_file in file_list:
logging.getLogger("AddRawFiles").debug("add_stats_to_run. Content of " + os.path.dirname(file) + "/" + curr_stat_file)
with open(os.path.dirname(file) + "/" + curr_stat_file, 'r') as myfile:
nb_seq,size=myfile.read().split(':')
logging.getLogger("AddRawFiles").debug("add_stats_to_run. Content of " + curr_stat_file + " nb_seq = " + str(nb_seq) + ", size = " + str(size))
size= int(size)
......@@ -149,14 +157,15 @@ class AddRawFiles (Component):
files_to_save_stats = self.get_outputs( '{basename_woext}.count', self.files_to_save)
logging.getLogger("AddRawFiles").debug("process. Before self.add_python_execution(extract_stats_from_seq_file")
#count number of reads and total length in base for each seq file
for i,o in zip(self.files_to_save,files_to_save_stats ):
for i,o in zip(self.files_to_save, files_to_save_stats ):
self.add_python_execution(extract_stats_from_seq_file,cmd_format="{EXE} {IN} {OUT}",
inputs = i, outputs = o, map=False)
logging.getLogger("AddRawFiles").debug("process. Before self.add_stats_to_run(extract_stats_from_seq_file")
logging.getLogger("AddRawFiles").debug("process. Dirname of files_to_save_stats[0] : "+ os.path.dirname(files_to_save_stats[0]))
#Add number of reads and total length in base for each seq file and add these data to the run in the database
self.add_python_execution(add_stats_to_run, cmd_format='{EXE} {ARG} {IN} > {OUT}', map=False,
outputs=self.stdout, inputs=files_to_save_stats,includes=self.files_to_save, arguments=[run_dump_path])
outputs = self.stdout, inputs = files_to_save_stats[0], includes=self.files_to_save, arguments=[run_dump_path])
#archive the files in the work folder of the run to be rsynced at the end
logging.getLogger("AddRawFiles").debug("process. Before copying/archiving files with compression = " + self.compression )
......@@ -173,7 +182,7 @@ class AddRawFiles (Component):
for idx, file in enumerate(files_to_sync):
self.add_python_execution(copy_file,cmd_format="{EXE} {IN} {OUT}",
inputs = self.files_to_save[idx], outputs = file, map=False)
#TODO possible inconsistancy : if the filename ends with one extension in Utils.UNCOMPRESS_EXTENSION, output file name won't be suffixed with gz in Utils.gzip
#TODO: possible inconsistancy : if the filename ends with one extension in Utils.UNCOMPRESS_EXTENSION, output file name won't be suffixed with gz in Utils.gzip
elif self.compression=="gz":
for file in self.files_to_save:
files_to_sync_ori.append(os.path.join(self.runobj.get_work_directory(),os.path.basename(file)+".gz"))
......@@ -183,7 +192,7 @@ class AddRawFiles (Component):
for idx, file in enumerate(self.files_to_save):
self.add_python_execution(zip_file,cmd_format="{EXE} {IN} {OUT}",
inputs = file, outputs = files_to_sync[idx], map=False)
#TODO possible inconsistancy : if the filename ends with one extension in Utils.UNCOMPRESS_EXTENSION, output file name won't be suffixed with bz2 in Utils.bz2
#TODO: possible inconsistancy : if the filename ends with one extension in Utils.UNCOMPRESS_EXTENSION, output file name won't be suffixed with bz2 in Utils.bz2
elif self.compression=="bz2":
for file in self.files_to_save:
files_to_md5_ori.append(os.path.join(self.runobj.get_work_directory(),os.path.basename(file)))
......@@ -213,8 +222,7 @@ class AddRawFiles (Component):
if (len(files_to_md5_ori)>0):
md5_file = os.path.join(self.runobj.get_work_directory(), "md5sum.txt")
self.add_python_execution(md5sum,cmd_format="{EXE} {OUT} {IN}",
inputs = files_to_md5, outputs = md5_file, map=False)
self.add_python_execution(md5sum, cmd_format="{EXE} {OUT} {IN}",
inputs = files_to_md5[0], outputs = md5_file, map=False)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment