Commit 7584cd26 authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Add support to index gzipped fasta + remove samtools requirement

parent ef604d19
......@@ -3,41 +3,29 @@
set -e
minimap_exec=$1
samtools_exec=$2
nb_threads=$3
fasta_t=$4
fasta_q=$5
query=$6
target=$7
paf=$8
paf_raw=$9
out_dir=${10}
# Index fasta files:
${samtools_exec} faidx ${fasta_q}
nb_threads=$2
fasta_t=$3
fasta_q=$4
query=$5
target=$6
paf=$7
paf_raw=$8
out_dir=$9
# Run minimap:
if [ "$fasta_t" != "NONE" ]; then
${samtools_exec} faidx ${fasta_t}
echo "Running: ${minimap_exec} -t ${nb_threads} ${fasta_t} ${fasta_q} > ${paf_raw}"
${minimap_exec} -t ${nb_threads} ${fasta_t} ${fasta_q} > ${paf_raw}
# Parse paf raw file:
build_indexes.py -q ${fasta_q} -t ${fasta_t} -o ${out_dir} -r ${query} -u ${target}
else
echo "Running: ${minimap_exec} -t ${nb_threads} -X ${fasta_q} ${fasta_q} > ${paf_raw}"
${minimap_exec} -t ${nb_threads} -X ${fasta_q} ${fasta_q} > ${paf_raw}
# Parse paf raw file:
build_indexes.py -q ${fasta_q} -t ${fasta_q} -o ${out_dir} -r ${query} -u ${query}
fi
# Sort PAF file:
......
......@@ -4,7 +4,7 @@ import subprocess
import datetime
import threading
import gzip
import traceback
import io
from config_reader import AppConfigReader
from pony.orm import db_session, select
from database import db, Job
......@@ -36,25 +36,6 @@ class JobManager:
self.idx_t = os.path.join(self.output_dir, "target.idx")
self.logs = os.path.join(self.output_dir, "logs.txt")
@staticmethod
def _decompress(filename):
try:
uncompressed = filename.rsplit('.', 1)[0]
parts = uncompressed.rsplit("/", 1)
file_path = parts[0]
basename = parts[1]
n = 2
while os.path.exists(uncompressed):
uncompressed = "%s/%d_%s" % (file_path, n, basename)
n += 1
with open(filename, "rb") as infile, open(uncompressed, "wb") as outfile:
outfile.write(gzip.decompress(infile.read()))
os.remove(filename)
return uncompressed
except Exception as e:
print(traceback.format_exc())
return None
def __check_job_success_local(self):
if os.path.exists(self.paf):
if os.path.getsize(self.paf) > 0:
......@@ -71,7 +52,7 @@ class JobManager:
@db_session
def __launch_local(self):
cmd = ["run_minimap2.sh", self.minimap2, self.samtools, self.threads,
cmd = ["run_minimap2.sh", self.minimap2, self.threads,
self.target.get_path() if self.target is not None else "NONE", self.query.get_path(),
self.query.get_name(), self.target.get_name(), self.paf, self.paf_raw, self.output_dir]
with open(self.logs, "w") as logs:
......@@ -88,14 +69,10 @@ class JobManager:
def __getting_local_file(self, fasta: Fasta):
finale_path = os.path.join(self.output_dir, os.path.basename(fasta.get_path()))
shutil.move(fasta.get_path(), finale_path)
if finale_path.endswith(".gz"):
finale_path = self._decompress(finale_path)
return finale_path
def __getting_file_from_url(self, fasta: Fasta):
finale_path = wget.download(fasta.get_path(), self.output_dir, None)
if finale_path.endswith(".gz"):
finale_path = self._decompress(finale_path)
return finale_path
@db_session
......@@ -150,13 +127,44 @@ class JobManager:
self.target.set_name(filename)
else:
correct = False
if correct:
return correct
@db_session
def start_job(self):
success = self.getting_files()
if success:
job = Job.get(id_job=self.id_job)
job.status = "indexing"
db.commit()
self.index_file(self.query, os.path.join(self.output_dir, "query.idx"))
self.index_file(self.target, os.path.join(self.output_dir, "target.idx"))
job = Job.get(id_job=self.id_job)
job.status = "waiting"
db.commit()
if self.batch_system_type == "local":
self.__launch_local()
@staticmethod
def index_file(fasta: Fasta, out):
compressed = fasta.get_path().endswith(".gz")
with (gzip.open(fasta.get_path()) if compressed else open(fasta.get_path())) as in_file, \
open(out, "w") as out_file:
out_file.write(fasta.get_name() + "\n")
with (io.TextIOWrapper(in_file) if compressed else in_file) as fasta:
contig = None
len_c = 0
for line in fasta:
line = line.strip("\n")
if line.startswith(">"):
if contig is not None:
out_file.write("%s\t%d\n" % (contig, len_c))
contig = line[1:].split(" ")[0]
len_c = 0
elif len(line) > 0:
len_c += len(line)
if contig is not None and len_c > 0:
out_file.write("%s\t%d\n" % (contig, len_c))
@db_session
def launch(self):
j1 = select(j for j in Job if j.id_job == self.id_job)
......@@ -169,7 +177,7 @@ class JobManager:
db.commit()
if not os.path.exists(self.output_dir):
os.mkdir(self.output_dir)
thread = threading.Timer(1, self.getting_files)
thread = threading.Timer(1, self.start_job)
thread.start()
else:
job = Job(id_job=self.id_job, email=self.email, batch_type=self.batch_system_type,
......
import os
class uploadfile():
def __init__(self, name, type=None, size=None, not_allowed_msg=''):
class UploadFile:
def __init__(self, name, type_f=None, size=None, not_allowed_msg=''):
self.name = name
self.type = type
self.type = type_f
self.size = size
self.not_allowed_msg = not_allowed_msg
self.url = "data/%s" % name
def get_file(self):
if self.type != None:
if self.type is not None:
# POST an image
if self.type.startswith('image'):
return {"name": self.name,
......
......@@ -45,12 +45,6 @@ class AppConfigReader(object):
except NoOptionError:
return "minimap2"
def get_samtools_exec(self):
try:
return self.reader.get("softwares", "samtools")
except NoOptionError:
return "samtools"
def get_database(self):
try:
return self.reader.get("database", "sqlite_file")
......
......@@ -2,13 +2,12 @@
import time
import datetime
from flask import Flask, render_template, request, redirect, flash, url_for, jsonify, session
from werkzeug.utils import secure_filename
from flask import Flask, render_template, request, url_for, jsonify, session
from lib.paf import Paf
from config_reader import AppConfigReader
from lib.job_manager import JobManager
from lib.functions import *
from lib.upload_file import uploadfile
from lib.upload_file import UploadFile
from lib.Fasta import Fasta
import sys
......@@ -184,7 +183,7 @@ def upload():
mime_type = files.content_type
if not allowed_file(files.filename):
result = uploadfile(name=filename, type=mime_type, size=0, not_allowed_msg="File type not allowed")
result = UploadFile(name=filename, type_f=mime_type, size=0, not_allowed_msg="File type not allowed")
else:
# save file to disk
......@@ -195,7 +194,7 @@ def upload():
size = os.path.getsize(uploaded_file_path)
# return json for js call back
result = uploadfile(name=filename, type=mime_type, size=size)
result = UploadFile(name=filename, type_f=mime_type, size=size)
return jsonify({"files": [result.get_file()], "success": "OK"})
......
......@@ -13,6 +13,10 @@
<p>Your job has been submitted. We are retrieving data...<br/>
You will receive an email when it will be finished.<br/>
Alternatively, you can refresh this page to update status.</p>
{% elif status == "indexing" %}
<p>Your job has been submitted. We are indexing fasta files...<br/>
You will receive an email when it will be finished.<br/>
Alternatively, you can refresh this page to update status.</p>
{% elif status == "waiting" %}
<p>Your job has been shedulded. It will start soon...<br/>
You will receive an email when it will be finished.<br/>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment