Commit 80c9bd62 authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Add download of fasta of query contigs assembled as chromosomes, Fixes #36

parent eea79ad9
......@@ -17,6 +17,8 @@ ALLOWED_EXTENSIONS = ['fa', 'fasta', 'fna', 'fa.gz', 'fasta.gz', 'fna.gz']
class Functions:
config = AppConfigReader()
@staticmethod
def allowed_file(filename):
return '.' in filename and \
......@@ -140,14 +142,13 @@ class Functions:
return j1.email
@staticmethod
def send_fasta_ready(mailer, job_name, sample_name, compressed=False):
config = AppConfigReader()
web_url = config.web_url
def send_fasta_ready(mailer, job_name, sample_name, compressed=False, path="fasta-query", status="success"):
web_url = Functions.config.web_url
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "mail_templates", "dl_fasta.html")) \
as t_file:
template = Template(t_file.read())
message_html = template.render(job_name=job_name, status="success", url_base=web_url,
sample_name=sample_name, compressed=compressed)
message_html = template.render(job_name=job_name, status=status, url_base=web_url,
sample_name=sample_name, compressed=compressed, path=path)
message = "D-Genies\n\n" \
"Job %s - Download fasta\n\n" % job_name
message += "Query fasta file for job %s (query: %s) is ready to download.\n" % (job_name, sample_name)
......@@ -156,6 +157,8 @@ class Functions:
mailer.send_mail([Functions.get_mail_for_job(job_name)], "Job %s - Download fasta" % job_name, message,
message_html)
@staticmethod
def sort_fasta(job_name, fasta_file, index_file, lock_file, compress=False, mailer=None):
index, sample_name = Functions.read_index(index_file)
......@@ -229,4 +232,3 @@ class Functions:
"time_elapsed": Functions.get_readable_time(item.job.time_elapsed)
})
return items
......@@ -29,7 +29,7 @@
<p>Hi,</p>
{% if status == "success" %}
<p>Query fasta file for job {{ job_name }} (query: {{ sample_name }}) is ready to download.<br/>
You can <a href="{{ url_base }}/fasta-query/{{ job_name }}/{{ sample_name }}.fasta{% if compressed %}.gz{% endif %}">click here</a> to download it.</p>
You can <a href="{{ url_base }}/{{ path }}/{{ job_name }}/{{ sample_name }}.fasta{% if compressed %}.gz{% endif %}">click here</a> to download it.</p>
{% else %}
<p>Build of query fasta file for job {{ job_name }} has failed. You can try again. If the problem persists, please contact the support.</p>
{% endif %}
......
......@@ -5,24 +5,29 @@ import shutil
from math import sqrt
from numpy import mean
from pathlib import Path
import matplotlib as mpl
mpl.use('Agg')
from matplotlib import pyplot as plt
import json
from dgenies.bin.index import Index
from dgenies.lib.functions import Functions
from intervaltree import IntervalTree
import matplotlib as mpl
mpl.use('Agg')
from matplotlib import pyplot as plt
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
class Paf:
limit_idy = [0.25, 0.5, 0.75]
max_nb_lines = 100000
def __init__(self, paf: str, idx_q: str, idx_t: str, auto_parse: bool=True):
def __init__(self, paf: str, idx_q: str, idx_t: str, auto_parse: bool=True, mailer=None, id_job=None):
self.paf = paf
self.idx_q = idx_q
self.idx_t = idx_t
self.sorted = False
if os.path.exists(os.path.join(os.path.dirname(paf), ".sorted")):
self.data_dir = os.path.dirname(paf)
if os.path.exists(os.path.join(self.data_dir, ".sorted")):
self.paf += ".sorted"
self.idx_q += ".sorted"
self.sorted = True
......@@ -41,6 +46,8 @@ class Paf:
self.name_t = None
self.parsed = False
self.error = False
self.mailer = mailer
self.id_job = id_job
if auto_parse:
self.parse_paf()
......@@ -444,6 +451,27 @@ class Paf:
query_on_target[contig] = None
return query_on_target
def get_queries_on_target_association(self):
"""
For each target, get the list of queries associated to it
:return:
"""
gravity_contig = self.compute_gravity_contigs()[0]
queries_on_target = {}
for contig, chr_blocks in gravity_contig.items():
# Find best block:
max_number = 0
max_chr = None
for chrm, size in chr_blocks.items():
if size > max_number:
max_number = size
max_chr = chrm
if max_chr is not None:
if max_chr not in queries_on_target:
queries_on_target[max_chr] = []
queries_on_target[max_chr].append(contig)
return queries_on_target
def build_query_on_target_association_file(self):
"""
For each query, get the best matching chromosome and save it to a CSV file.
......@@ -645,3 +673,54 @@ class Paf:
txt = summary_file.read()
return json.loads(txt)
return None
def build_query_chr_as_reference(self):
try:
if not self.sorted:
raise Exception("Contigs must be sorted to do that!")
contigs_assoc = self.get_queries_on_target_association()
with open(os.path.join(self.data_dir, ".query")) as query_file:
query_fasta = query_file.read().strip("\n")
if not os.path.isfile(query_fasta):
raise Exception("Query fasta does not exists")
uncompressed = False
if query_fasta.endswith(".gz"):
uncompressed = True
query_fasta = Functions.uncompress(query_fasta)
query_f = SeqIO.index(query_fasta, "fasta")
o_fasta = os.path.join(os.path.dirname(query_fasta), "as_reference_" + os.path.basename(query_fasta))
mapped_queries = set()
with open(o_fasta, "w") as out:
for target in self.t_order:
if target in contigs_assoc:
queries = sorted(contigs_assoc[target], key=lambda x: self.q_order.index(x))
seq = SeqRecord(Seq(""))
for query in queries:
mapped_queries.add(query)
new_seq = query_f[query]
if self.q_reversed[query]:
new_seq = new_seq.reverse_complement()
seq += new_seq
seq += 100 * "N"
seq = seq[:-100]
seq.id = seq.name = seq.description = target
SeqIO.write(seq, out, "fasta")
for contig in self.q_order:
if contig not in mapped_queries:
seq = query_f[contig]
seq.id += "_unaligned"
SeqIO.write(seq, out, "fasta")
if uncompressed:
os.remove(query_fasta)
status = "success"
except Exception:
o_fasta = None
status="fail"
Functions.send_fasta_ready(mailer=self.mailer,
job_name=self.id_job,
sample_name="as_reference_" + os.path.basename(query_fasta).rsplit(".")[0],
compressed=False,
path="download",
status=status)
return o_fasta
......@@ -103,10 +103,12 @@ d3.boxplot.launch = function(res, update=false, noise_change=false) {
if (res["sorted"]) {
$("input#sort-contigs").val("Undo sort");
$("#export").find("select option[value=4]").show();
$("#export").find("select option[value=8]").show();
}
else {
$("input#sort-contigs").val("Sort contigs");
$("#export").find("select option[value=4]").hide();
$("#export").find("select option[value=8]").hide();
}
d3.boxplot.name_x = res["name_x"];
d3.boxplot.name_y = res["name_y"];
......
......@@ -141,6 +141,19 @@ dgenies.result.export.export_no_association_file = function (to) {
})
};
dgenies.result.export.export_query_as_reference_fasta = function() {
dgenies.post(`/build-query-as-reference/${dgenies.result.id_res}`,
{},
function (data, success) {
if (data["success"]) {
dgenies.notify("You will receive a mail soon with the link to download your Fasta file", "success")
}
else {
dgenies.notify(`An error has occurred. Please contact the support`, "fatal")
}
});
}
dgenies.result.export.export = function () {
let select = $("form#export select");
let selection = parseInt(select.val());
......@@ -166,6 +179,9 @@ dgenies.result.export.export = function () {
else if (selection === 7) {
dgenies.result.export.export_no_association_file("target");
}
else if (selection === 8) {
dgenies.result.export.export_query_as_reference_fasta();
}
else
dgenies.notify("Not supported yet!", "danger", 2000);
if (!async)
......
......@@ -42,6 +42,7 @@
<option value="2">Png</option>
<option value="3">Paf file</option>
<option value="4">Query Fasta</option>
<option value="8">Query assembled as reference</option>
<option value="5">Association table</option>
<option value="6">No match queries</option>
<option value="7">No match targets</option>
......
......@@ -331,6 +331,26 @@ def build_fasta(id_res):
"message": "Unable to get fasta file for query. Please contact us to report the bug"})
@app.route('/build-query-as-reference/<id_res>', methods=['POST'])
def get_query_as_reference(id_res):
paf_file = os.path.join(APP_DATA, id_res, "map.paf")
idx1 = os.path.join(APP_DATA, id_res, "query.idx")
idx2 = os.path.join(APP_DATA, id_res, "target.idx")
paf = Paf(paf_file, idx1, idx2, False, mailer=mailer, id_job=id_res)
paf.parse_paf(False, True)
thread = threading.Timer(0, paf.build_query_chr_as_reference)
thread.start()
return jsonify({"success": True})
@app.route('/download/<id_res>/<filename>')
def download_file(id_res, filename):
file_dl = os.path.join(APP_DATA, id_res, filename)
if os.path.isfile(file_dl):
return send_file(file_dl)
return abort(404)
@app.route('/fasta-query/<id_res>', defaults={'filename': ""}, methods=['GET'])
@app.route('/fasta-query/<id_res>/<filename>', methods=['GET']) # Use fake URL in mail to set download file name
def dl_fasta(id_res, filename):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment