Commit 16c08430 authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Improve error detection on prepare

parent 3483037a
......@@ -3,6 +3,7 @@
import argparse
import time
import os
import sys
from split_fa import Splitter
from filter_contigs import Filter
from index import index_file
......@@ -20,7 +21,7 @@ def index_fasta(name, filepath, out_dir, type_f, dofilter = True):
if filepath.endswith(".gz") and dofilter:
uncompressed = filepath[:-3]
index = os.path.join(out_dir, type_f + ".idx")
success, nb_contigs = index_file(filepath, name, index, uncompressed)
success, nb_contigs, error = index_file(filepath, name, index, uncompressed)
if success:
is_filtered = False
if dofilter:
......@@ -45,6 +46,7 @@ def index_fasta(name, filepath, out_dir, type_f, dofilter = True):
os.remove(uncompressed)
else:
print("###ERR## Error while indexing %s file: %s" % (type_f, error), file=sys.stderr)
if uncompressed is not None:
try:
os.remove(uncompressed)
......@@ -82,7 +84,8 @@ with open(args.preptime_file, "w") as ptime:
index_split = os.path.join(out_dir, "query_split.idx")
splitter = Splitter(input_f=fasta_in, name_f=args.query_name, output_f=args.query_split,
query_index=index_split)
if splitter.split():
success, error = splitter.split()
if success:
filtered_fasta = os.path.join(os.path.dirname(args.query_split), "filtered_" +
os.path.basename(args.query_split))
filter_f = Filter(fasta=args.query_split,
......@@ -94,6 +97,7 @@ with open(args.preptime_file, "w") as ptime:
replace_fa=True)
filter_f.filter()
else:
print("###ERR## Error while loading query file: %s" % error, file=sys.stderr)
exit(1)
else:
print("Indexing query...")
......
......@@ -67,7 +67,9 @@ def index_file(fasta_path, fasta_name, out, write_fa=None):
with (io.TextIOWrapper(in_file) if compressed else in_file) as fasta:
contig = None
len_c = 0
nb_line = 0
for line in fasta:
nb_line += 1
if write_f is not None:
write_f.write(line)
line = line.strip("\n")
......@@ -79,12 +81,14 @@ def index_file(fasta_path, fasta_name, out, write_fa=None):
nb_contigs += 1
out_file.write("%s\t%d\n" % (contig, len_c))
else:
return False
return False, 0, "Error: contig is empty: %s" % contig
contig = re.split("\s", line[1:])[0]
len_c = 0
elif len(line) > 0:
if next_header or re.match(r"^[ATGCKMRYSWBVHDXN.\-]+$", line.upper()) is None:
return False
if next_header:
return False, 0, "Error: new header line expected at line %d" % nb_line
return False, 0, "Error: invalid sequence at line %d" % nb_line
len_c += len(line)
elif len(line) == 0:
next_header = True
......@@ -96,7 +100,7 @@ def index_file(fasta_path, fasta_name, out, write_fa=None):
if write_f is not None:
write_f.close()
return has_header, nb_contigs
return has_header, nb_contigs, ""
if __name__ == '__main__':
......@@ -108,7 +112,8 @@ if __name__ == '__main__':
parser.add_argument('-o', '--output', type=str, required=True, help="Output index file")
args = parser.parse_args()
if index_file(args.input, args.name, args.output):
success, message = index_file(args.input, args.name, args.output)
if success:
print("Success!")
else:
print("Error while building index")
print(message)
......@@ -37,7 +37,9 @@ class Splitter:
index_f.write(self.name_f + "\n")
chr_name = None
fasta_str = ""
nb_line = 0
for line in fasta:
nb_line += 1
line = line.strip("\n")
if re.match(r"^>.+", line) is not None:
has_header = True
......@@ -46,20 +48,22 @@ class Splitter:
self.nb_contigs += 1
self.flush_contig(fasta_str, chr_name, self.size_c, enc, index_f)
elif chr_name is not None:
return False
return False, "Error: contig is empty: %s" % chr_name
chr_name = re.split("\s", line[1:])[0]
fasta_str = ""
if self.debug:
print("Parsing contig \"%s\"... " % chr_name, end="")
elif len(line) > 0:
if next_header or re.match(r"^[ATGCKMRYSWBVHDXN.\-]+$", line.upper()) is None:
return False
if next_header:
return False, "Error: new header line expected at line %d" % nb_line
return False, "Error: invalid sequence at line %d" % nb_line
fasta_str += line
elif len(line) == 0:
next_header = True
self.nb_contigs += 1
self.flush_contig(fasta_str, chr_name, self.size_c, enc, index_f)
return has_header
return has_header, ""
@staticmethod
def write_contig(name, fasta, o_file):
......
......@@ -348,6 +348,17 @@ class JobManager:
else:
self.set_status_standalone(status)
@staticmethod
def find_error_in_log(log_file):
error = ""
with open(log_file, "r") as log:
lines = log.readlines()
if lines[-1].startswith("###ERR### "):
error = lines[-1][10:].rstrip()
elif lines[-2].startswith("###ERR### "):
error = lines[-2][10:].rstrip()
return error
def launch_to_cluster(self, step, batch_system_type, command, args, log_out, log_err):
import drmaa
from dgenies.lib.drmaasession import DrmaaSession
......@@ -414,7 +425,9 @@ class JobManager:
self.update_job_status(status)
s.deleteJobTemplate(jt)
return status == "succeed" or status == "prepared"
self.update_job_status("fail")
error = self.find_error_in_log(log_err)
self.set_job_status("fail", error)
s.deleteJobTemplate(jt)
return False
......@@ -825,7 +838,7 @@ class JobManager:
split = True
splitter = Splitter(input_f=fasta_in, name_f=self.query.get_name(), output_f=self.get_query_split(),
query_index=self.query_index_split, debug=DEBUG)
success = splitter.split()
success, error = splitter.split()
nb_contigs = splitter.nb_contigs
in_fasta = self.get_query_split()
else:
......@@ -833,8 +846,8 @@ class JobManager:
uncompressed = None
if self.query.get_path().endswith(".gz"):
uncompressed = self.query.get_path()[:-3]
success, nb_contigs = index_file(self.query.get_path(), self.query.get_name(), self.idx_q,
uncompressed)
success, nb_contigs, error = index_file(self.query.get_path(), self.query.get_name(), self.idx_q,
uncompressed)
in_fasta = self.query.get_path()
if uncompressed is not None:
in_fasta = uncompressed
......@@ -850,14 +863,14 @@ class JobManager:
replace_fa=True)
filter_f.filter()
else:
self.set_job_status("fail", "<br/>".join(["Query fasta file is not valid!", error_tail]))
self.set_job_status("fail", "<br/>".join(["Query fasta file is not valid:", error, error_tail]))
if self.config.send_mail_status:
self.send_mail_post()
return False
uncompressed = None
if self.target.get_path().endswith(".gz"):
uncompressed = self.target.get_path()[:-3]
success, nb_contigs = index_file(self.target.get_path(), self.target.get_name(), self.idx_t, uncompressed)
success, nb_contigs, error = index_file(self.target.get_path(), self.target.get_name(), self.idx_t, uncompressed)
if success:
in_fasta = self.target.get_path()
if uncompressed is not None:
......@@ -885,7 +898,7 @@ class JobManager:
os.remove(uncompressed)
except FileNotFoundError:
pass
self.set_job_status("fail", "<br/>".join(["Target fasta file is not valid!", error_tail]))
self.set_job_status("fail", "<br/>".join(["Target fasta file is not valid:", error, error_tail]))
if self.config.send_mail_status:
self.send_mail_post()
return False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment