Commit 7915668e authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Change strategy to be sort-compatible

parent b537129e
......@@ -6,16 +6,15 @@ Short desc: Prepare the PAF file from minimap output to be loaded by the program
Details: change coordinates of matches to be bounded one to another
Usage:
prepare_paf.py -i IN -q FASTA1 -t FASTA2 -o OUT [-r NAME1] [-u NAME2]
prepare_paf.py -q FASTA1 -t FASTA2 -o OUT [-r NAME1] [-u NAME2]
prepare_paf.py -v | --version
Options:
-i --input=IN Input PAF file generated by minimap
-q --query=FASTA1 Query fasta file compared with minimap
-t --target=FASTA2 Target fasta file compared with minimap
-r --query-name=NAME1 Query name
-u --target-name=NAME2 Target name
-o --output=OUT Output PAF file
-o --output=OUT Output directory
-h --help Show this screen
-v --version Show version
"""
......@@ -24,6 +23,7 @@ __NAME__ = "PreparePAF"
__VERSION__ = 0.1
import os
from shutil import copyfile
from docopt import docopt
from collections import OrderedDict
......@@ -55,38 +55,17 @@ class Fasta:
def build_index(self, filename):
with open(filename, "w") as idx:
idx.write(self.name + "\n")
for contig, props in self.contigs.items():
idx.write(contig + "\t" + str(props["length"]) + "\n")
def build_new_paf_file(paf_in: str, paf_out: str, query: Fasta, target: Fasta):
with open(paf_in, "r") as paf:
with open(paf_out, "w") as paf_o:
for line in paf:
parts = line.strip("\n").split("\t")
q_name = parts[0]
q_start = query.get_contig(q_name)["start"]
parts[0] = query.name # Change query name
parts[1] = str(query.total_length) # Change length of query
parts[2] = str(int(parts[2]) + q_start) # Change start of query
parts[3] = str(int(parts[3]) + q_start) # Change end of query
t_name = parts[5]
parts[5] = target.name # Change target name
t_start = target.get_contig(t_name)["start"]
parts[6] = str(target.total_length) # Change length of query
parts[7] = str(int(parts[7]) + t_start) # Change start for target
parts[8] = str(int(parts[8]) + t_start) # Change end for target
paf_o.write("\t".join(parts) + "\n")
def init(input_f, output_f, query, target, query_name=None, target_name=None):
def init(output_d, query, target, query_name=None, target_name=None):
query = Fasta(query, query_name)
target = Fasta(target, target_name)
build_new_paf_file(input_f, output_f, query, target)
basedir = os.path.dirname(output_f)
i = 0
for fasta in [query, target]:
idx_file = os.path.join(basedir, "query.idx" if i == 0 else "target.idx")
idx_file = os.path.join(output_d, "query.idx" if i == 0 else "target.idx")
fasta.build_index(idx_file)
i += 1
......@@ -100,5 +79,4 @@ if __name__ == '__main__':
raise Exception("Fasta file %s is not indexed!" % args["--query"])
if not os.path.exists(args["--target"] + ".fai"):
raise Exception("Fasta file %s is not indexed!" % args["--target"])
init(args["--input"], args["--output"], args["--query"], args["--target"], args["--query-name"],
args["--target-name"])
init(args["--output"], args["--query"], args["--target"], args["--query-name"], args["--target-name"])
......@@ -9,18 +9,15 @@ fasta_t=$4
fasta_q=$5
query=$6
target=$7
paf_raw=$8
paf=$9
paf=$8
out_dir=$9
# Index fasta files:
${samtools_exec} faidx ${fasta_t}
${samtools_exec} faidx ${fasta_q}
# Run minimap:
${minimap_exec} -t ${nb_threads} ${fasta_t} ${fasta_q} > ${paf_raw}
${minimap_exec} -t ${nb_threads} ${fasta_t} ${fasta_q} > ${paf}
# Parse paf raw file:
prepare_paf.py -i ${paf_raw} -q ${fasta_q} -t ${fasta_t} -o ${paf} -r ${query} -u ${target}
# Remove raw file:
rm -f ${paf_raw}
\ No newline at end of file
build_indexes.py -q ${fasta_q} -t ${fasta_t} -o ${out_dir} -r ${query} -u ${target}
\ No newline at end of file
......@@ -85,7 +85,7 @@ input[type=range] {
#supdraw {
width:1020px;
height: 850px;
height: auto;
position: relative;
}
......
......@@ -56,7 +56,11 @@ d3.boxplot.init = function (id, from_file=false) {
$.post("/get_graph",
{"id": id},
function (data) {
d3.boxplot.launch(data);
if (data["success"])
d3.boxplot.launch(data);
else {
$("#supdraw").html($("<p>").html("This job does not exists!").css("margin-top", "15px"));
}
}
)
}
......
......@@ -25,7 +25,6 @@ class JobManager:
self.app_data = config_reader.get_app_data()
# Outputs:
self.output_dir = os.path.join(self.app_data, id_job)
self.paf_raw = os.path.join(self.output_dir, "map_raw.paf")
self.paf = os.path.join(self.output_dir, "map.paf")
self.idx_q = os.path.join(self.output_dir, "query.idx")
self.idx_t = os.path.join(self.output_dir, "target.idx")
......@@ -48,7 +47,7 @@ class JobManager:
@db_session
def __launch_local(self):
cmd = ["run_minimap2.sh", self.minimap2, self.samtools, self.threads, self.fasta_t, self.fasta_q, self.query,
self.target, self.paf_raw, self.paf]
self.target, self.paf, self.output_dir]
with open(self.logs, "w") as logs:
p = subprocess.Popen(cmd, stdout=logs, stderr=logs)
job = Job.get(id_job=self.id_job)
......
......@@ -38,45 +38,11 @@ class Paf:
"neg+": [],
"neg-": []
}
try:
with open(self.paf, "r") as paf_file:
for line in paf_file:
parts = line.strip("\n").split("\t")
v1 = parts[0]
v6 = parts[5]
ignore = False
strand = 1 if parts[4] == "+" else -1
idy = int(parts[9]) / int(parts[10]) * strand
min_idy = min(min_idy, idy)
max_idy = max(max_idy, idy)
if name_q is None:
name_q = v1
elif name_q != v1:
ignore = True
if not ignore:
name_t = v6
len_q = int(parts[1])
len_t = int(parts[6])
# x1, x2, y1, y2, idy
x1 = int(parts[2])
x2 = int(parts[3])
y1 = int(parts[7 if strand == 1 else 8])
y2 = int(parts[8 if strand == 1 else 7])
if idy < -self.limit_idy:
class_idy = "neg-"
elif idy < 0:
class_idy = "neg+"
elif idy < self.limit_idy:
class_idy = "pos-"
else:
class_idy = "pos+"
lines[class_idy].append([x1, x2, y1, y2, idy])
except IOError:
self.error = "PAF file does not exist!"
return False
q_abs_start = {}
q_abs_current_start = 0
try:
with open(self.idx_q, "r") as idx_q_f:
name_q = idx_q_f.readline().strip("\n")
q_order = []
q_contigs = {}
for line in idx_q_f:
......@@ -84,13 +50,18 @@ class Paf:
id_c = parts[0]
len_c = int(parts[1])
q_order.append(id_c)
q_abs_start[id_c] = q_abs_current_start
q_contigs[id_c] = len_c
q_abs_current_start += len_c
except IOError:
self.error = "Index file does not exist for query!"
return False
t_abs_start = {}
t_abs_current_start = 0
try:
with open(self.idx_t, "r") as idx_t_f:
name_t = idx_t_f.readline().strip("\n")
t_order = []
t_contigs = {}
for line in idx_t_f:
......@@ -98,11 +69,44 @@ class Paf:
id_c = parts[0]
len_c = int(parts[1])
t_order.append(id_c)
t_abs_start[id_c] = t_abs_current_start
t_contigs[id_c] = len_c
t_abs_current_start += len_c
except IOError:
self.error = "Index file does not exist for target!"
return False
len_q = q_abs_current_start
len_t = t_abs_current_start
try:
with open(self.paf, "r") as paf_file:
for line in paf_file:
parts = line.strip("\n").split("\t")
v1 = parts[0]
v6 = parts[5]
strand = 1 if parts[4] == "+" else -1
idy = int(parts[9]) / int(parts[10]) * strand
min_idy = min(min_idy, idy)
max_idy = max(max_idy, idy)
# x1, x2, y1, y2, idy
y1 = int(parts[2]) + q_abs_start[v1]
y2 = int(parts[3]) + q_abs_start[v1]
x1 = int(parts[7 if strand == 1 else 8]) + t_abs_start[v6]
x2 = int(parts[8 if strand == 1 else 7]) + t_abs_start[v6]
if idy < -self.limit_idy:
class_idy = "neg-"
elif idy < 0:
class_idy = "neg+"
elif idy < self.limit_idy:
class_idy = "pos-"
else:
class_idy = "pos+"
lines[class_idy].append([x1, x2, y1, y2, idy])
except IOError:
self.error = "PAF file does not exist!"
return False
self.parsed = True
self.len_q = len_q
self.len_t = len_t
......@@ -118,17 +122,17 @@ class Paf:
def get_d3js_data(self):
return {
'x_len': self.len_q,
'y_len': self.len_t,
'y_len': self.len_q,
'x_len': self.len_t,
'min_idy': self.min_idy,
'max_idy': self.max_idy,
'lines': self.lines,
'x_contigs': self.q_contigs,
'x_order': self.q_order,
'y_contigs': self.t_contigs,
'y_order': self.t_order,
'name_x': self.name_q,
'name_y': self.name_t,
'y_contigs': self.q_contigs,
'y_order': self.q_order,
'x_contigs': self.t_contigs,
'x_order': self.t_order,
'name_y': self.name_q,
'name_x': self.name_t,
'limit_idy': self.limit_idy
}
......
......@@ -56,7 +56,7 @@
</div><!--/.nav-collapse -->
</div>
</div>
<div class="container theme-showcase" role="main">
<div class="container theme-showcase" id="body-container" role="main">
{% block content %}
{% endblock %}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment