RAPPEL : Opération de maintenance > ForgeMIA indisponible le 20 Janvier entre 7h et 12h

Commit cbf64fab authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Add docstrings to all lib package modules + little refactoring

parent a5fb93db
......@@ -9,6 +9,10 @@ from dgenies.config_reader import AppConfigReader
class Crons:
"""
Manage crontab jobs (webserver mode)
"""
def __init__(self, base_dir, debug):
self.base_dir = base_dir
self.debug = debug
......@@ -17,6 +21,12 @@ class Crons:
self.local_scheduler_pid_file = os.path.join(self.config.config_dir, ".local_scheduler_pid")
def clear(self, kill_scheduler=True):
"""
Clear all crons
:param kill_scheduler: if True, kill local scheduler currently running
:type kill_scheduler: bool
"""
# Remove old crons:
self.my_cron.remove_all(comment="dgenies")
self.my_cron.write()
......@@ -30,12 +40,18 @@ class Crons:
p.terminate()
def start_all(self):
"""
Start all crons
"""
self.clear(False)
self.init_clean_cron()
self.init_launch_local_cron()
@staticmethod
def _get_python_exec():
"""
Get python executable path
"""
pyexec = sys.executable
match = re.match(r"^(.+)/lib/(python[^/]+)/((site-packages/bin/python)|())$", pyexec)
if match:
......@@ -44,6 +60,7 @@ class Crons:
def init_clean_cron(self):
"""
Initialize clean cron: will clear old jobs.
Clean cron is launched at 1h00am each day
"""
clean_time = self.config.cron_clean_time
......@@ -63,7 +80,6 @@ class Crons:
def init_launch_local_cron(self):
"""
Try to launch local scheduler (if not already launched)
:return:
"""
if self.base_dir is not None:
pyexec = self._get_python_exec()
......
class Singleton:
"""
Define a singleton (design pattern)
"""
def __init__(self, klass):
self.klass = klass
self.instance = None
......
......@@ -6,6 +6,9 @@ from .decorators import Singleton
@Singleton
class DrmaaSession:
"""
Initialize and close a DRMAA session (for job submission to a cluster)
"""
def __init__(self):
self.session = drmaa.Session()
......
class Fasta:
"""
Defines a fasta file: name of the sample, path to the fasta file, type of file (URL or local file), ...
"""
def __init__(self, name, path, type_f, example=False):
self._name = name
self._path = path
......@@ -6,19 +10,55 @@ class Fasta:
self._example = example is not False
def set_path(self, path):
"""
Set path to the fasta file
:param path: new path
:type path: str
"""
self._path = path
def get_path(self):
"""
Get path of the fasta file
:return: fasta path
:rtype: str
"""
return self._path
def set_name(self, name):
"""
Set sample name
:param name: new sample name
:type name: str
"""
self._name = name
def get_name(self):
"""
Get sample name
:return: sample name
:rtype: str
"""
return self._name
def get_type(self):
"""
Get type: URL or local file
:return: type
:rtype: str
"""
return self._type
def is_example(self):
"""
Return if current sample is an example data
:return: current sample is an example data
:rtype: bool
"""
return self._example
......@@ -22,10 +22,21 @@ ALLOWED_EXTENSIONS = {"fasta": ['fa', 'fasta', 'fna', 'fa.gz', 'fasta.gz', 'fna.
class Functions:
"""
General functions
"""
config = AppConfigReader()
@staticmethod
def allowed_file(filename, file_formats=("fasta",)):
"""
Check whether a file has a valid format
:param filename: file path
:param file_formats: accepted file formats
:return: True if valid format, else False
"""
for file_format in file_formats:
if '.' in filename and \
(filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS[file_format]
......@@ -37,13 +48,26 @@ class Functions:
def random_string(s_len):
"""
Generate a random string
:param s_len: length of the string to generate
:type s_len: int
:return: the random string
:rtype: str
"""
return ''.join([random.choice(string.ascii_letters + string.digits) for n in range(s_len)])
@staticmethod
def get_valid_uploaded_filename(filename, folder):
"""
Check whether uploaded file already exists. If yes, rename it
:param filename: uploaded file
:type filename: str
:param folder: folder into save the file
:type folder: str
:return: unique filename
:rtype: str
"""
file_query_s = os.path.join(folder, filename)
i = 2
filename_orig = filename
......@@ -55,6 +79,16 @@ class Functions:
@staticmethod
def __get_do_sort(fasta, is_sorted):
"""
Check whether query must be sorted (False if already done)
:param fasta: fasta file
:type fasta: str
:param is_sorted: True if it's sorted
:type is_sorted: bool
:return: do sort
:rtype: bool
"""
do_sort = False
if is_sorted:
do_sort = True
......@@ -64,6 +98,18 @@ class Functions:
@staticmethod
def get_fasta_file(res_dir, type_f, is_sorted):
"""
Get fasta file path
:param res_dir: job results directory
:type res_dir: str
:param type_f: type of file (query or target)
:type type_f: str
:param is_sorted: is fasta sorted
:type is_sorted: bool
:return: fasta file path
:rtype: str
"""
fasta_file = None
try:
with open(os.path.join(res_dir, "." + type_f), "r") as save_name:
......@@ -88,6 +134,14 @@ class Functions:
@staticmethod
def uncompress(filename):
"""
Uncompress a gzipped file
:param filename: gzipped file
:type filename: str
:return: path of the uncompressed file
:rtype: str
"""
try:
uncompressed = filename.rsplit('.', 1)[0]
parts = uncompressed.rsplit("/", 1)
......@@ -106,6 +160,14 @@ class Functions:
@staticmethod
def compress(filename):
"""
Compress a file with gzip
:param filename: file to compress
:type filename: str
:return: path of the compressed file
:rtype: str
"""
try:
if not filename.endswith(".gz") and not filename.endswith(".gz.sorted"):
compressed = filename + ".gz" if not filename.endswith(".sorted") else filename[:-7] + ".gz.sorted"
......@@ -127,6 +189,16 @@ class Functions:
@staticmethod
def read_index(index_file):
"""
Load index of query or target
:param index_file: index file path
:type index_file: str
:return:
* [0] index (size of each chromosome) {dict}
* [1] sample name {str}
:rtype: (dict, str)
"""
index = OrderedDict()
with open(index_file, "r") as index_f:
# Sample name without special chars:
......@@ -145,6 +217,14 @@ class Functions:
@staticmethod
def get_mail_for_job(id_job):
"""
Retrieve associated mail for a job
:param id_job: job id
:type id_job: int
:return: associated mail address
:rtype: str
"""
from dgenies.database import Job
with Job.connect():
j1 = Job.get(Job.id_job == id_job)
......@@ -153,6 +233,24 @@ class Functions:
@staticmethod
def send_fasta_ready(mailer, job_name, sample_name, compressed=False, path="fasta-query", status="success",
ext="fasta"):
"""
Send link to fasta file when treatment ended
:param mailer: mailer object
:type mailer: Mailer
:param job_name: job id
:type job_name: str
:param sample_name: sample name
:type sample_name: str
:param compressed: is a compressed fasta file
:type compressed: bool
:param path: fasta path
:type path: str
:param status: treatment status
:type status: str
:param ext: file extension
:type ext: str
"""
web_url = Functions.config.web_url
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "mail_templates", "dl_fasta.html")) \
as t_file:
......@@ -171,6 +269,24 @@ class Functions:
@staticmethod
def sort_fasta(job_name, fasta_file, index_file, lock_file, compress=False, mailer=None, mode="webserver"):
"""
Sort fasta file according to the sorted index file
:param job_name: job id
:type job_name: str
:param fasta_file: fasta file path
:type fasta_file: str
:param index_file: index file path
:type index_file: str
:param lock_file: lock file path
:type lock_file: str
:param compress: compress result fasta file
:type compress: bool
:param mailer: mailer object (to send mail)
:type mailer: Mailer
:param mode: webserver or standalone
:type mode: str
"""
index, sample_name = Functions.read_index(index_file)
is_compressed = fasta_file.endswith(".gz")
if is_compressed:
......@@ -199,14 +315,38 @@ class Functions:
Functions.send_fasta_ready(mailer, job_name, sample_name, compress)
@staticmethod
def compress_and_send_mail(job_name, fasta_file, index_file, lock_file, compressed, mailer):
def compress_and_send_mail(job_name, fasta_file, index_file, lock_file, mailer):
"""
Compress fasta file and the send mail with its link to the client
:param job_name: job id
:type job_name: str
:param fasta_file: fasta file path
:type fasta_file: str
:param index_file: index file path
:type index_file: str
:param lock_file: lock file path
:type lock_file: str
:param mailer: mailer object (to send mail)
:type mailer: Mailer
"""
Functions.compress(fasta_file)
os.remove(lock_file)
index, sample_name = Functions.read_index(index_file)
Functions.send_fasta_ready(mailer, job_name, sample_name, compressed)
Functions.send_fasta_ready(mailer, job_name, sample_name, True)
@staticmethod
def get_readable_size(size, nb_after_coma=1):
"""
Get human readable size from a given size in bytes
:param size: size in bytes
:type size: int
:param nb_after_coma: number of digits after coma
:type nb_after_coma: int
:return: size, human readable
:rtype: str
"""
print(size)
units = ["b", "Kb", "Mb", "Gb"]
i = 0
......@@ -217,6 +357,14 @@ class Functions:
@staticmethod
def get_readable_time(seconds):
"""
Get human readable time
:param seconds: time in seconds
:type seconds: int
:return: time, human readable
:rtype: str
"""
time_r = "%d s" % seconds
if seconds >= 60:
minutes = seconds // 60
......@@ -228,10 +376,22 @@ class Functions:
time_r = "%d h %d min %d s" % (hours, minutes, seconds)
return time_r
@staticmethod
def get_gallery_items():
"""
Get list of items from the gallery
:return: list of item of the gallery. Each item is a dict with 7 keys:
* `name` : name of the job
* `id_job` : id of the job
* `picture` : illustrating picture filename (located in gallery folder of the data folder)
* `query` : query specie name
* `target` : target specie name
* `mem_peak` : max memory used for the run (human readable)
* `time_elapsed` : time elapsed for the run (human readable)
:rtype: list of dict
"""
from dgenies.database import Gallery
items = []
for item in Gallery.select():
......@@ -248,6 +408,16 @@ class Functions:
@staticmethod
def is_in_gallery(id_job, mode="webserver"):
"""
Check whether a job is in the gallery
:param id_job: job id
:type id_job: str
:param mode: webserver or standalone
:type mode: str
:return: True if job is in the gallery, else False
:rtype: bool
"""
if mode == "webserver":
from dgenies.database import Gallery, Job
from peewee import DoesNotExist
......@@ -259,6 +429,12 @@ class Functions:
@staticmethod
def _get_jobs_list():
"""
Get list of jobs
:return: list of valid jobs
:rtype: list
"""
all_jobs = os.listdir(Functions.config.app_data)
valid_jobs = []
for job in all_jobs:
......@@ -272,6 +448,14 @@ class Functions:
@staticmethod
def get_list_all_jobs(mode="webserver"):
"""
Get list of all jobs
:param mode: webserver or standalone
:type mode: str
:return: list of all jobs in standalone mode. Empty list in webserver mode
:rtype: list
"""
if mode == "webserver":
return [] # Don't give the list in webserver as it's multi-user
all_jobs = Functions._get_jobs_list()
......@@ -281,5 +465,13 @@ class Functions:
@staticmethod
def query_fasta_file_exists(res_dir):
"""
Check if a fasta file exists
:param res_dir: job result directory
:type res_dir: str
:return: True if file exists and is a regular file, else False
:rtype: bool
"""
fasta_file = os.path.join(res_dir, ".query")
return os.path.exists(fasta_file) and os.path.isfile(fasta_file)
......@@ -7,6 +7,10 @@ from dgenies.config_reader import AppConfigReader
class Latest:
"""
Search latest version
"""
def __init__(self):
self.latest = ""
self.win32 = ""
......@@ -15,6 +19,9 @@ class Latest:
self.load()
def load(self):
"""
Load latest version: use cached version (if any) and then sync with Github
"""
if os.path.exists(self._save_latest):
with open(self._save_latest, "r") as latest_f:
self.latest = latest_f.readline().rstrip()
......@@ -27,10 +34,16 @@ class Latest:
self.update()
def update_async(self):
"""
Update latest version asynchronously
"""
thread = threading.Timer(1, self.update)
thread.start()
def update(self):
"""
Get latest version from Github
"""
try:
call = requests.get("https://api.github.com/repos/genotoul-bioinfo/dgenies/releases/latest")
if call.ok:
......@@ -47,6 +60,9 @@ class Latest:
self._write_update()
def _write_update(self):
"""
Save latest version to a file
"""
if self.latest != "" or self.win32 != "":
with open(self._save_latest, "w") as latest_f:
latest_f.write("\n".join([self.latest, self.win32]))
......@@ -4,6 +4,10 @@ from flask_mail import Mail, Message
class Mailer:
"""
Send mail throw flask app
"""
def __init__(self, app):
self.app = app
self.mail = Mail(app)
......@@ -13,11 +17,29 @@ class Mailer:
# self.mail_org = config_reader.get_mail_org()
# self.disable = config_reader.get_disable_mail()
def __send_async_email(self, msg):
def _send_async_email(self, msg):
"""
Send mail asynchronously
:param msg: message to send
:type msg: Message
"""
with self.app.app_context():
self.mail.send(msg)
def send_mail(self, recipients: list, subject: str, message: str, message_html: str=None):
def send_mail(self, recipients, subject, message, message_html=None):
"""
Send mail
:param recipients: list of recipients
:type recipients: list
:param subject: mail subject
:type subject: str
:param message: message (text)
:type message: str
:param message_html: message (html)
:type message_html: str
"""
sender = (self.config.mail_org, self.config.mail_status_sender) if self.config.mail_org is not None else \
self.config.mail_status_sender
reply = self.config.mail_reply
......@@ -30,7 +52,7 @@ class Mailer:
sender=sender,
reply_to=reply
)
self.__send_async_email(msg)
self._send_async_email(msg)
else: # Print debug
print("################\n"
"# WARNING !!!! #\n"
......
......@@ -20,6 +20,9 @@ from Bio.SeqRecord import SeqRecord
class Paf:
"""
Functions applied to PAF files
"""
limit_idy = [0.25, 0.5, 0.75]
max_nb_lines = 100000
......@@ -57,7 +60,22 @@ class Paf:
self.parse_paf()
@staticmethod
def __flush_blocks(index_c, new_index_c, new_index_o, current_block):
def _flush_blocks(index_c, new_index_c, new_index_o, current_block):
"""
When parsing index, build a mix of too small sequential contigs (if their number exceed 5), else just add
co to the new index
:param index_c: current index contigs def
:type index_c: dict
:param new_index_o: new index contigs order
:type new_index_o: list
:param new_index_c: new index contigs def
:type new_index_c: dict
:param current_block: contigs in the current analyzed block
:type current_block: list
:return: (new index contigs defs, new index contigs order)
:rtype: (dict, list)
"""
if len(current_block) >= 5:
block_length = 0
for contig in current_block:
......@@ -73,28 +91,43 @@ class Paf:
def parse_index(self, index_o: list, index_c: dict, full_len: int):
"""
Parse index and merge too small contigs
:param index_o: index order
Parse index and merge too small contigs together
:param index_o: index contigs order
:type index_o: list
:param index_c: index contigs def
:type index_c: dict
:param full_len: length of the sequence
:return: new index orders and contigs def
:type full_len: int
:return: (new contigs def, new contigs order)
:rtype: (dict, list)
"""
new_index_o = []
new_index_c = {}
current_block = []
for index in index_o:
if index_c[index] >= 0.002 * full_len:
new_index_c, new_index_o = self.__flush_blocks(index_c, new_index_c, new_index_o, current_block)
new_index_c, new_index_o = self._flush_blocks(index_c, new_index_c, new_index_o, current_block)
current_block = []
new_index_c[index] = index_c[index]
new_index_o.append(index)
else:
current_block.append(index)
new_index_c, new_index_o = self.__flush_blocks(index_c, new_index_c, new_index_o, current_block)
new_index_c, new_index_o = self._flush_blocks(index_c, new_index_c, new_index_o, current_block)
return new_index_c, new_index_o