Commit 4c292dd6 authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Add cluster mode for plot alignment mode

parent d84ad2c3
......@@ -8,7 +8,7 @@ from filter_contigs import Filter
from index import index_file
def index_fasta(name, filepath, out_dir, type_f):
def index_fasta(name, filepath, out_dir, type_f, dofilter = True):
"""
Index and filter fasta
:param name: name of the specie
......@@ -17,23 +17,25 @@ def index_fasta(name, filepath, out_dir, type_f):
:param type_f: type of fasta (query or target)
"""
uncompressed = None
if filepath.endswith(".gz"):
if filepath.endswith(".gz") and dofilter:
uncompressed = filepath[:-3]
index = os.path.join(out_dir, type_f + ".idx")
index = os.path.join(out_dir, type_f + ".idx")
success, nb_contigs = index_file(filepath, name, index, uncompressed)
if success:
in_fasta = filepath
if uncompressed is not None:
in_fasta = uncompressed
filtered_fasta = os.path.join(os.path.dirname(in_fasta), "filtered_" + os.path.basename(in_fasta))
filter_f = Filter(fasta=in_fasta,
index_file=index,
type_f=type_f,
min_filtered=nb_contigs / 4,
split=False,
out_fasta=filtered_fasta,
replace_fa=True)
is_filtered = filter_f.filter()
is_filtered = False
if dofilter:
in_fasta = filepath
if uncompressed is not None:
in_fasta = uncompressed
filtered_fasta = os.path.join(os.path.dirname(in_fasta), "filtered_" + os.path.basename(in_fasta))
filter_f = Filter(fasta=in_fasta,
index_file=index,
type_f=type_f,
min_filtered=nb_contigs / 4,
split=False,
out_fasta=filtered_fasta,
replace_fa=True)
is_filtered = filter_f.filter()
if uncompressed is not None:
if is_filtered:
os.remove(filepath)
......@@ -41,6 +43,7 @@ def index_fasta(name, filepath, out_dir, type_f):
save_file.write(uncompressed)
else:
os.remove(uncompressed)
else:
if uncompressed is not None:
try:
......@@ -53,16 +56,21 @@ def index_fasta(name, filepath, out_dir, type_f):
parser = argparse.ArgumentParser(description="Split huge contigs")
parser.add_argument('-q', '--query', type=str, required=False, help="Query fasta file")
parser.add_argument('-u', '--query-split', type=str, required=False, help="Query fasta file split")
parser.add_argument('-t', '--target', type=str, required=True, help="Target fasta file")
parser.add_argument('-t', '--target', type=str, required=False, help="Target fasta file")
parser.add_argument('-n', '--query-name', type=str, required=False, help="Query name")
parser.add_argument('-m', '--target-name', type=str, required=True, help="Target name")
parser.add_argument('-m', '--target-name', type=str, required=False, help="Target name")
parser.add_argument('-s', '--size', type=int, required=False, default=10,
help="Max size of contigs (Mb) - for query split")
parser.add_argument('-p', '--preptime-file', type=str, required=True, help="File into save prep times")
parser.add_argument('--split', type=bool, const=True, nargs="?", required=False, default=False,
help="Split query")
parser.add_argument('--index-only', type=bool, const=True, nargs="?", required=False, default=False,
help="Index files only. No split, no filter.")
args = parser.parse_args()
if args.index_only and args.split:
raise Exception("--index-only and --split arguments are mutually exclusive")
out_dir = os.path.dirname(args.target)
with open(args.preptime_file, "w") as ptime:
......@@ -88,9 +96,13 @@ with open(args.preptime_file, "w") as ptime:
else:
exit(1)
else:
index_fasta(name=args.query_name, filepath=args.query, out_dir=out_dir, type_f="query")
print("Indexing target...")
index_fasta(name=args.target_name, filepath=args.target, out_dir=out_dir, type_f="target")
print("Indexing query...")
index_fasta(name=args.query_name, filepath=args.query, out_dir=out_dir, type_f="query",
dofilter=not args.index_only)
if args.target is not None:
print("Indexing target...")
index_fasta(name=args.target_name, filepath=args.target, out_dir=out_dir, type_f="target",
dofilter=not args.index_only)
ptime.write(str(round(time.time())) + "\n")
......
......@@ -830,31 +830,7 @@ class JobManager:
self.set_job_status("prepared")
self.run_job("local")
def prepare_dotplot_local(self):
"""
Prepare data if alignment already done: just index the fasta (if index not given), then parse the alignment
file and sort it.
"""
self.set_job_status("preparing")
# Prepare target index:
target_format = os.path.splitext(self.target.get_path())[1][1:]
if target_format == "idx":
shutil.move(self.target.get_path(), self.idx_t)
os.remove(os.path.join(self.output_dir, ".target"))
else:
index_file(self.target.get_path(), self.target.get_name(), self.idx_t)
# Prepare query index:
if self.query is not None:
query_format = os.path.splitext(self.query.get_path())[1][1:]
if query_format == "idx":
shutil.move(self.query.get_path(), self.idx_q)
os.remove(os.path.join(self.output_dir, ".query"))
else:
index_file(self.query.get_path(), self.query.get_name(), self.idx_q)
else:
shutil.copy(self.idx_t, self.idx_q)
def _end_of_prepare_dotplot(self):
# Parse alignment file:
if hasattr(parsers, self.aln_format):
getattr(parsers, self.aln_format)(self.align.get_path(), self.paf_raw)
......@@ -881,6 +857,78 @@ class JobManager:
if MODE == "webserver" and self.config.send_mail_status:
self.send_mail_post()
def prepare_dotplot_cluster(self, batch_system_type):
"""
Prepare data if alignment already done: just index the fasta (if index not given), then parse the alignment
:param batch_system_type: type of cluster
"""
args = [self.config.cluster_prepare_script,
"-p", self.preptime_file, "--index-only"]
has_index = False
target_format = os.path.splitext(self.target.get_path())[1][1:]
if target_format == "idx":
shutil.move(self.target.get_path(), self.idx_t)
os.remove(os.path.join(self.output_dir, ".target"))
else:
has_index = True
args += ["-t", self.target.get_path(),
"-m", self.target.get_name()]
if self.query is not None:
query_format = os.path.splitext(self.query.get_path())[1][1:]
if query_format == "idx":
shutil.move(self.query.get_path(), self.idx_q)
os.remove(os.path.join(self.output_dir, ".query"))
else:
has_index = True
args += ["-q", self.query.get_path(),
"-n", self.query.get_name()]
success = True
if has_index:
success = self.launch_to_cluster(step="prepare",
batch_system_type=batch_system_type,
command=self.config.cluster_python_exec,
args=args,
log_out=self.logs,
log_err=self.logs)
if success:
if self.query is None:
shutil.copy(self.idx_t, self.idx_q)
self._end_of_prepare_dotplot()
elif MODE == "webserver" and self.config.send_mail_status:
self.send_mail_post()
def prepare_dotplot_local(self):
"""
Prepare data if alignment already done: just index the fasta (if index not given), then parse the alignment
file and sort it.
"""
self.set_job_status("preparing")
# Prepare target index:
target_format = os.path.splitext(self.target.get_path())[1][1:]
if target_format == "idx":
shutil.move(self.target.get_path(), self.idx_t)
os.remove(os.path.join(self.output_dir, ".target"))
else:
index_file(self.target.get_path(), self.target.get_name(), self.idx_t)
# Prepare query index:
if self.query is not None:
query_format = os.path.splitext(self.query.get_path())[1][1:]
if query_format == "idx":
shutil.move(self.query.get_path(), self.idx_q)
os.remove(os.path.join(self.output_dir, ".query"))
else:
index_file(self.query.get_path(), self.query.get_name(), self.idx_q)
else:
shutil.copy(self.idx_t, self.idx_q)
self._end_of_prepare_dotplot()
def prepare_data(self):
if self.align is None:
if MODE == "webserver":
......@@ -899,8 +947,7 @@ class JobManager:
if job.batch_type == "local":
self.prepare_dotplot_local()
else:
print("NOT IMPLEMENTED!")
# self.prepare_data_cluster(job.batch_type)
self.prepare_dotplot_cluster(job.batch_type)
else:
self.prepare_dotplot_local()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment