Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
genotoul-bioinfo
D-GENIES
Commits
cbf64fab
Commit
cbf64fab
authored
Jun 19, 2018
by
Floreal Cabanettes
Browse files
Add docstrings to all lib package modules + little refactoring
parent
a5fb93db
Changes
12
Hide whitespace changes
Inline
Side-by-side
src/dgenies/lib/crons.py
View file @
cbf64fab
...
...
@@ -9,6 +9,10 @@ from dgenies.config_reader import AppConfigReader
class
Crons
:
"""
Manage crontab jobs (webserver mode)
"""
def
__init__
(
self
,
base_dir
,
debug
):
self
.
base_dir
=
base_dir
self
.
debug
=
debug
...
...
@@ -17,6 +21,12 @@ class Crons:
self
.
local_scheduler_pid_file
=
os
.
path
.
join
(
self
.
config
.
config_dir
,
".local_scheduler_pid"
)
def
clear
(
self
,
kill_scheduler
=
True
):
"""
Clear all crons
:param kill_scheduler: if True, kill local scheduler currently running
:type kill_scheduler: bool
"""
# Remove old crons:
self
.
my_cron
.
remove_all
(
comment
=
"dgenies"
)
self
.
my_cron
.
write
()
...
...
@@ -30,12 +40,18 @@ class Crons:
p
.
terminate
()
def
start_all
(
self
):
"""
Start all crons
"""
self
.
clear
(
False
)
self
.
init_clean_cron
()
self
.
init_launch_local_cron
()
@
staticmethod
def
_get_python_exec
():
"""
Get python executable path
"""
pyexec
=
sys
.
executable
match
=
re
.
match
(
r
"^(.+)/lib/(python[^/]+)/((site-packages/bin/python)|())$"
,
pyexec
)
if
match
:
...
...
@@ -44,6 +60,7 @@ class Crons:
def
init_clean_cron
(
self
):
"""
Initialize clean cron: will clear old jobs.
Clean cron is launched at 1h00am each day
"""
clean_time
=
self
.
config
.
cron_clean_time
...
...
@@ -63,7 +80,6 @@ class Crons:
def
init_launch_local_cron
(
self
):
"""
Try to launch local scheduler (if not already launched)
:return:
"""
if
self
.
base_dir
is
not
None
:
pyexec
=
self
.
_get_python_exec
()
...
...
src/dgenies/lib/decorators.py
View file @
cbf64fab
class
Singleton
:
"""
Define a singleton (design pattern)
"""
def
__init__
(
self
,
klass
):
self
.
klass
=
klass
self
.
instance
=
None
...
...
src/dgenies/lib/drmaasession.py
View file @
cbf64fab
...
...
@@ -6,6 +6,9 @@ from .decorators import Singleton
@
Singleton
class
DrmaaSession
:
"""
Initialize and close a DRMAA session (for job submission to a cluster)
"""
def
__init__
(
self
):
self
.
session
=
drmaa
.
Session
()
...
...
src/dgenies/lib/fasta.py
View file @
cbf64fab
class
Fasta
:
"""
Defines a fasta file: name of the sample, path to the fasta file, type of file (URL or local file), ...
"""
def
__init__
(
self
,
name
,
path
,
type_f
,
example
=
False
):
self
.
_name
=
name
self
.
_path
=
path
...
...
@@ -6,19 +10,55 @@ class Fasta:
self
.
_example
=
example
is
not
False
def
set_path
(
self
,
path
):
"""
Set path to the fasta file
:param path: new path
:type path: str
"""
self
.
_path
=
path
def
get_path
(
self
):
"""
Get path of the fasta file
:return: fasta path
:rtype: str
"""
return
self
.
_path
def
set_name
(
self
,
name
):
"""
Set sample name
:param name: new sample name
:type name: str
"""
self
.
_name
=
name
def
get_name
(
self
):
"""
Get sample name
:return: sample name
:rtype: str
"""
return
self
.
_name
def
get_type
(
self
):
"""
Get type: URL or local file
:return: type
:rtype: str
"""
return
self
.
_type
def
is_example
(
self
):
"""
Return if current sample is an example data
:return: current sample is an example data
:rtype: bool
"""
return
self
.
_example
src/dgenies/lib/functions.py
View file @
cbf64fab
...
...
@@ -22,10 +22,21 @@ ALLOWED_EXTENSIONS = {"fasta": ['fa', 'fasta', 'fna', 'fa.gz', 'fasta.gz', 'fna.
class
Functions
:
"""
General functions
"""
config
=
AppConfigReader
()
@
staticmethod
def
allowed_file
(
filename
,
file_formats
=
(
"fasta"
,)):
"""
Check whether a file has a valid format
:param filename: file path
:param file_formats: accepted file formats
:return: True if valid format, else False
"""
for
file_format
in
file_formats
:
if
'.'
in
filename
and
\
(
filename
.
rsplit
(
'.'
,
1
)[
1
].
lower
()
in
ALLOWED_EXTENSIONS
[
file_format
]
...
...
@@ -37,13 +48,26 @@ class Functions:
def
random_string
(
s_len
):
"""
Generate a random string
:param s_len: length of the string to generate
:type s_len: int
:return: the random string
:rtype: str
"""
return
''
.
join
([
random
.
choice
(
string
.
ascii_letters
+
string
.
digits
)
for
n
in
range
(
s_len
)])
@
staticmethod
def
get_valid_uploaded_filename
(
filename
,
folder
):
"""
Check whether uploaded file already exists. If yes, rename it
:param filename: uploaded file
:type filename: str
:param folder: folder into save the file
:type folder: str
:return: unique filename
:rtype: str
"""
file_query_s
=
os
.
path
.
join
(
folder
,
filename
)
i
=
2
filename_orig
=
filename
...
...
@@ -55,6 +79,16 @@ class Functions:
@
staticmethod
def
__get_do_sort
(
fasta
,
is_sorted
):
"""
Check whether query must be sorted (False if already done)
:param fasta: fasta file
:type fasta: str
:param is_sorted: True if it's sorted
:type is_sorted: bool
:return: do sort
:rtype: bool
"""
do_sort
=
False
if
is_sorted
:
do_sort
=
True
...
...
@@ -64,6 +98,18 @@ class Functions:
@
staticmethod
def
get_fasta_file
(
res_dir
,
type_f
,
is_sorted
):
"""
Get fasta file path
:param res_dir: job results directory
:type res_dir: str
:param type_f: type of file (query or target)
:type type_f: str
:param is_sorted: is fasta sorted
:type is_sorted: bool
:return: fasta file path
:rtype: str
"""
fasta_file
=
None
try
:
with
open
(
os
.
path
.
join
(
res_dir
,
"."
+
type_f
),
"r"
)
as
save_name
:
...
...
@@ -88,6 +134,14 @@ class Functions:
@
staticmethod
def
uncompress
(
filename
):
"""
Uncompress a gzipped file
:param filename: gzipped file
:type filename: str
:return: path of the uncompressed file
:rtype: str
"""
try
:
uncompressed
=
filename
.
rsplit
(
'.'
,
1
)[
0
]
parts
=
uncompressed
.
rsplit
(
"/"
,
1
)
...
...
@@ -106,6 +160,14 @@ class Functions:
@
staticmethod
def
compress
(
filename
):
"""
Compress a file with gzip
:param filename: file to compress
:type filename: str
:return: path of the compressed file
:rtype: str
"""
try
:
if
not
filename
.
endswith
(
".gz"
)
and
not
filename
.
endswith
(
".gz.sorted"
):
compressed
=
filename
+
".gz"
if
not
filename
.
endswith
(
".sorted"
)
else
filename
[:
-
7
]
+
".gz.sorted"
...
...
@@ -127,6 +189,16 @@ class Functions:
@
staticmethod
def
read_index
(
index_file
):
"""
Load index of query or target
:param index_file: index file path
:type index_file: str
:return:
* [0] index (size of each chromosome) {dict}
* [1] sample name {str}
:rtype: (dict, str)
"""
index
=
OrderedDict
()
with
open
(
index_file
,
"r"
)
as
index_f
:
# Sample name without special chars:
...
...
@@ -145,6 +217,14 @@ class Functions:
@
staticmethod
def
get_mail_for_job
(
id_job
):
"""
Retrieve associated mail for a job
:param id_job: job id
:type id_job: int
:return: associated mail address
:rtype: str
"""
from
dgenies.database
import
Job
with
Job
.
connect
():
j1
=
Job
.
get
(
Job
.
id_job
==
id_job
)
...
...
@@ -153,6 +233,24 @@ class Functions:
@
staticmethod
def
send_fasta_ready
(
mailer
,
job_name
,
sample_name
,
compressed
=
False
,
path
=
"fasta-query"
,
status
=
"success"
,
ext
=
"fasta"
):
"""
Send link to fasta file when treatment ended
:param mailer: mailer object
:type mailer: Mailer
:param job_name: job id
:type job_name: str
:param sample_name: sample name
:type sample_name: str
:param compressed: is a compressed fasta file
:type compressed: bool
:param path: fasta path
:type path: str
:param status: treatment status
:type status: str
:param ext: file extension
:type ext: str
"""
web_url
=
Functions
.
config
.
web_url
with
open
(
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"mail_templates"
,
"dl_fasta.html"
))
\
as
t_file
:
...
...
@@ -171,6 +269,24 @@ class Functions:
@
staticmethod
def
sort_fasta
(
job_name
,
fasta_file
,
index_file
,
lock_file
,
compress
=
False
,
mailer
=
None
,
mode
=
"webserver"
):
"""
Sort fasta file according to the sorted index file
:param job_name: job id
:type job_name: str
:param fasta_file: fasta file path
:type fasta_file: str
:param index_file: index file path
:type index_file: str
:param lock_file: lock file path
:type lock_file: str
:param compress: compress result fasta file
:type compress: bool
:param mailer: mailer object (to send mail)
:type mailer: Mailer
:param mode: webserver or standalone
:type mode: str
"""
index
,
sample_name
=
Functions
.
read_index
(
index_file
)
is_compressed
=
fasta_file
.
endswith
(
".gz"
)
if
is_compressed
:
...
...
@@ -199,14 +315,38 @@ class Functions:
Functions
.
send_fasta_ready
(
mailer
,
job_name
,
sample_name
,
compress
)
@
staticmethod
def
compress_and_send_mail
(
job_name
,
fasta_file
,
index_file
,
lock_file
,
compressed
,
mailer
):
def
compress_and_send_mail
(
job_name
,
fasta_file
,
index_file
,
lock_file
,
mailer
):
"""
Compress fasta file and the send mail with its link to the client
:param job_name: job id
:type job_name: str
:param fasta_file: fasta file path
:type fasta_file: str
:param index_file: index file path
:type index_file: str
:param lock_file: lock file path
:type lock_file: str
:param mailer: mailer object (to send mail)
:type mailer: Mailer
"""
Functions
.
compress
(
fasta_file
)
os
.
remove
(
lock_file
)
index
,
sample_name
=
Functions
.
read_index
(
index_file
)
Functions
.
send_fasta_ready
(
mailer
,
job_name
,
sample_name
,
compressed
)
Functions
.
send_fasta_ready
(
mailer
,
job_name
,
sample_name
,
True
)
@
staticmethod
def
get_readable_size
(
size
,
nb_after_coma
=
1
):
"""
Get human readable size from a given size in bytes
:param size: size in bytes
:type size: int
:param nb_after_coma: number of digits after coma
:type nb_after_coma: int
:return: size, human readable
:rtype: str
"""
print
(
size
)
units
=
[
"b"
,
"Kb"
,
"Mb"
,
"Gb"
]
i
=
0
...
...
@@ -217,6 +357,14 @@ class Functions:
@
staticmethod
def
get_readable_time
(
seconds
):
"""
Get human readable time
:param seconds: time in seconds
:type seconds: int
:return: time, human readable
:rtype: str
"""
time_r
=
"%d s"
%
seconds
if
seconds
>=
60
:
minutes
=
seconds
//
60
...
...
@@ -228,10 +376,22 @@ class Functions:
time_r
=
"%d h %d min %d s"
%
(
hours
,
minutes
,
seconds
)
return
time_r
@
staticmethod
def
get_gallery_items
():
"""
Get list of items from the gallery
:return: list of item of the gallery. Each item is a dict with 7 keys:
* `name` : name of the job
* `id_job` : id of the job
* `picture` : illustrating picture filename (located in gallery folder of the data folder)
* `query` : query specie name
* `target` : target specie name
* `mem_peak` : max memory used for the run (human readable)
* `time_elapsed` : time elapsed for the run (human readable)
:rtype: list of dict
"""
from
dgenies.database
import
Gallery
items
=
[]
for
item
in
Gallery
.
select
():
...
...
@@ -248,6 +408,16 @@ class Functions:
@
staticmethod
def
is_in_gallery
(
id_job
,
mode
=
"webserver"
):
"""
Check whether a job is in the gallery
:param id_job: job id
:type id_job: str
:param mode: webserver or standalone
:type mode: str
:return: True if job is in the gallery, else False
:rtype: bool
"""
if
mode
==
"webserver"
:
from
dgenies.database
import
Gallery
,
Job
from
peewee
import
DoesNotExist
...
...
@@ -259,6 +429,12 @@ class Functions:
@
staticmethod
def
_get_jobs_list
():
"""
Get list of jobs
:return: list of valid jobs
:rtype: list
"""
all_jobs
=
os
.
listdir
(
Functions
.
config
.
app_data
)
valid_jobs
=
[]
for
job
in
all_jobs
:
...
...
@@ -272,6 +448,14 @@ class Functions:
@
staticmethod
def
get_list_all_jobs
(
mode
=
"webserver"
):
"""
Get list of all jobs
:param mode: webserver or standalone
:type mode: str
:return: list of all jobs in standalone mode. Empty list in webserver mode
:rtype: list
"""
if
mode
==
"webserver"
:
return
[]
# Don't give the list in webserver as it's multi-user
all_jobs
=
Functions
.
_get_jobs_list
()
...
...
@@ -281,5 +465,13 @@ class Functions:
@
staticmethod
def
query_fasta_file_exists
(
res_dir
):
"""
Check if a fasta file exists
:param res_dir: job result directory
:type res_dir: str
:return: True if file exists and is a regular file, else False
:rtype: bool
"""
fasta_file
=
os
.
path
.
join
(
res_dir
,
".query"
)
return
os
.
path
.
exists
(
fasta_file
)
and
os
.
path
.
isfile
(
fasta_file
)
src/dgenies/lib/latest.py
View file @
cbf64fab
...
...
@@ -7,6 +7,10 @@ from dgenies.config_reader import AppConfigReader
class
Latest
:
"""
Search latest version
"""
def
__init__
(
self
):
self
.
latest
=
""
self
.
win32
=
""
...
...
@@ -15,6 +19,9 @@ class Latest:
self
.
load
()
def
load
(
self
):
"""
Load latest version: use cached version (if any) and then sync with Github
"""
if
os
.
path
.
exists
(
self
.
_save_latest
):
with
open
(
self
.
_save_latest
,
"r"
)
as
latest_f
:
self
.
latest
=
latest_f
.
readline
().
rstrip
()
...
...
@@ -27,10 +34,16 @@ class Latest:
self
.
update
()
def
update_async
(
self
):
"""
Update latest version asynchronously
"""
thread
=
threading
.
Timer
(
1
,
self
.
update
)
thread
.
start
()
def
update
(
self
):
"""
Get latest version from Github
"""
try
:
call
=
requests
.
get
(
"https://api.github.com/repos/genotoul-bioinfo/dgenies/releases/latest"
)
if
call
.
ok
:
...
...
@@ -47,6 +60,9 @@ class Latest:
self
.
_write_update
()
def
_write_update
(
self
):
"""
Save latest version to a file
"""
if
self
.
latest
!=
""
or
self
.
win32
!=
""
:
with
open
(
self
.
_save_latest
,
"w"
)
as
latest_f
:
latest_f
.
write
(
"
\n
"
.
join
([
self
.
latest
,
self
.
win32
]))
src/dgenies/lib/mailer.py
View file @
cbf64fab
...
...
@@ -4,6 +4,10 @@ from flask_mail import Mail, Message
class
Mailer
:
"""
Send mail throw flask app
"""
def
__init__
(
self
,
app
):
self
.
app
=
app
self
.
mail
=
Mail
(
app
)
...
...
@@ -13,11 +17,29 @@ class Mailer:
# self.mail_org = config_reader.get_mail_org()
# self.disable = config_reader.get_disable_mail()
def
__send_async_email
(
self
,
msg
):
def
_send_async_email
(
self
,
msg
):
"""
Send mail asynchronously
:param msg: message to send
:type msg: Message
"""
with
self
.
app
.
app_context
():
self
.
mail
.
send
(
msg
)
def
send_mail
(
self
,
recipients
:
list
,
subject
:
str
,
message
:
str
,
message_html
:
str
=
None
):
def
send_mail
(
self
,
recipients
,
subject
,
message
,
message_html
=
None
):
"""
Send mail
:param recipients: list of recipients
:type recipients: list
:param subject: mail subject
:type subject: str
:param message: message (text)
:type message: str
:param message_html: message (html)
:type message_html: str
"""
sender
=
(
self
.
config
.
mail_org
,
self
.
config
.
mail_status_sender
)
if
self
.
config
.
mail_org
is
not
None
else
\
self
.
config
.
mail_status_sender
reply
=
self
.
config
.
mail_reply
...
...
@@ -30,7 +52,7 @@ class Mailer:
sender
=
sender
,
reply_to
=
reply
)
self
.
_
_
send_async_email
(
msg
)
self
.
_send_async_email
(
msg
)
else
:
# Print debug
print
(
"################
\n
"
"# WARNING !!!! #
\n
"
...
...
src/dgenies/lib/paf.py
View file @
cbf64fab
...
...
@@ -20,6 +20,9 @@ from Bio.SeqRecord import SeqRecord
class
Paf
:
"""
Functions applied to PAF files
"""
limit_idy
=
[
0.25
,
0.5
,
0.75
]
max_nb_lines
=
100000
...
...
@@ -57,7 +60,22 @@ class Paf:
self
.
parse_paf
()
@
staticmethod
def
__flush_blocks
(
index_c
,
new_index_c
,
new_index_o
,
current_block
):
def
_flush_blocks
(
index_c
,
new_index_c
,
new_index_o
,
current_block
):
"""
When parsing index, build a mix of too small sequential contigs (if their number exceed 5), else just add
co to the new index
:param index_c: current index contigs def
:type index_c: dict
:param new_index_o: new index contigs order
:type new_index_o: list
:param new_index_c: new index contigs def
:type new_index_c: dict
:param current_block: contigs in the current analyzed block
:type current_block: list
:return: (new index contigs defs, new index contigs order)
:rtype: (dict, list)
"""
if
len
(
current_block
)
>=
5
:
block_length
=
0
for
contig
in
current_block
:
...
...
@@ -73,28 +91,43 @@ class Paf:
def
parse_index
(
self
,
index_o
:
list
,
index_c
:
dict
,
full_len
:
int
):
"""
Parse index and merge too small contigs
:param index_o: index order
Parse index and merge too small contigs together
:param index_o: index contigs order
:type index_o: list
:param index_c: index contigs def
:type index_c: dict
:param full_len: length of the sequence
:return: new index orders and contigs def
:type full_len: int
:return: (new contigs def, new contigs order)
:rtype: (dict, list)
"""
new_index_o
=
[]
new_index_c
=
{}
current_block
=
[]
for
index
in
index_o
:
if
index_c
[
index
]
>=
0.002
*
full_len
:
new_index_c
,
new_index_o
=
self
.
_
_
flush_blocks
(
index_c
,
new_index_c
,
new_index_o
,
current_block
)
new_index_c
,
new_index_o
=
self
.
_flush_blocks
(
index_c
,
new_index_c
,
new_index_o
,
current_block
)
current_block
=
[]
new_index_c
[
index
]
=
index_c
[
index
]
new_index_o
.
append
(
index
)
else
:
current_block
.
append
(
index
)
new_index_c
,
new_index_o
=
self
.
_
_
flush_blocks
(
index_c
,
new_index_c
,
new_index_o
,
current_block
)
new_index_c
,
new_index_o
=
self
.
_flush_blocks
(
index_c
,
new_index_c
,
new_index_o
,
current_block
)