functions.py 10.9 KB
Newer Older
1
import os
Floreal Cabanettes's avatar
Floreal Cabanettes committed
2
3
import random
import string
4
import gzip
5
import shutil
Floreal Cabanettes's avatar
Floreal Cabanettes committed
6
import sys
7
import re
8
import traceback
9
from inspect import getmembers, isfunction
10
11
12
from collections import OrderedDict
from Bio import SeqIO
from jinja2 import Template
13
from dgenies.config_reader import AppConfigReader
14
import dgenies.lib.validators as validators
15

Floreal Cabanettes's avatar
Floreal Cabanettes committed
16
ALLOWED_EXTENSIONS = {"fasta": ['fa', 'fasta', 'fna', 'fa.gz', 'fasta.gz', 'fna.gz'],
17
18
19
                      "idx": ['idx',],
                      "map": [o[0] for o in getmembers(validators) if isfunction(o[1]) and not o[0].startswith("_")],
                      "backup": ['tar']}
20
# map: all functions of validators which does not starts with an underscore.
21

Floreal Cabanettes's avatar
Floreal Cabanettes committed
22

Floreal Cabanettes's avatar
Floreal Cabanettes committed
23
class Functions:
Floreal Cabanettes's avatar
Floreal Cabanettes committed
24

25
26
    config = AppConfigReader()

Floreal Cabanettes's avatar
Floreal Cabanettes committed
27
    @staticmethod
Floreal Cabanettes's avatar
Floreal Cabanettes committed
28
29
30
31
32
33
34
    def allowed_file(filename, file_formats=("fasta",)):
        for file_format in file_formats:
            if '.' in filename and \
                   (filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS[file_format]
                    or ".".join(filename.rsplit('.', 2)[1:]).lower() in ALLOWED_EXTENSIONS[file_format]):
                return True
        return False
Floreal Cabanettes's avatar
Floreal Cabanettes committed
35

Floreal Cabanettes's avatar
Floreal Cabanettes committed
36
37
38
39
40
41
42
43
    @staticmethod
    def random_string(s_len):
        """
        Generate a random string
        :param s_len: length of the string to generate
        :return: the random string
        """
        return ''.join([random.choice(string.ascii_letters + string.digits) for n in range(s_len)])
Floreal Cabanettes's avatar
Floreal Cabanettes committed
44

Floreal Cabanettes's avatar
Floreal Cabanettes committed
45
46
    @staticmethod
    def get_valid_uploaded_filename(filename, folder):
47
        file_query_s = os.path.join(folder, filename)
Floreal Cabanettes's avatar
Floreal Cabanettes committed
48
49
50
51
52
53
54
        i = 2
        filename_orig = filename
        while os.path.exists(file_query_s):
            filename = str(i) + "_" + filename_orig
            file_query_s = os.path.join(folder, filename)
            i += 1
        return filename
55

56
57
58
59
60
61
62
63
64
65
66
67
68
69
    @staticmethod
    def __get_do_sort(fasta, is_sorted):
        do_sort = False
        if is_sorted:
            do_sort = True
            if fasta.endswith(".sorted"):
                do_sort = False
        return do_sort

    @staticmethod
    def get_fasta_file(res_dir, type_f, is_sorted):
        fasta_file = None
        try:
            with open(os.path.join(res_dir, "." + type_f), "r") as save_name:
70
                fasta_file = save_name.readline().strip("\n")
71
        except IOError:
Floreal Cabanettes's avatar
Floreal Cabanettes committed
72
            print(res_dir + ": Unable to load saved name for " + type_f, file=sys.stderr)
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
            pass
        if fasta_file is not None and os.path.exists(fasta_file):
            fasta_file_uc = fasta_file
            if fasta_file.endswith(".gz"):
                fasta_file_uc = fasta_file[:-3]
            if is_sorted:
                sorted_fasta = fasta_file_uc + ".sorted"
                if os.path.exists(sorted_fasta):
                    fasta_file = sorted_fasta
                else:
                    sorted_fasta = fasta_file_uc + ".gz.sorted"
                    if os.path.exists(sorted_fasta):
                        fasta_file = sorted_fasta

        return fasta_file

    @staticmethod
    def uncompress(filename):
        try:
            uncompressed = filename.rsplit('.', 1)[0]
            parts = uncompressed.rsplit("/", 1)
            file_path = parts[0]
            basename = parts[1]
            n = 2
            while os.path.exists(uncompressed):
                uncompressed = "%s/%d_%s" % (file_path, n, basename)
                n += 1
            with open(filename, "rb") as infile, open(uncompressed, "wb") as outfile:
                outfile.write(gzip.decompress(infile.read()))
            return uncompressed
        except Exception as e:
            print(traceback.format_exc())
            return None

    @staticmethod
    def compress(filename):
        try:
            if not filename.endswith(".gz") and not filename.endswith(".gz.sorted"):
                compressed = filename + ".gz" if not filename.endswith(".sorted") else filename[:-7] + ".gz.sorted"
                parts = compressed.rsplit("/", 1)
                file_path = parts[0]
                basename = parts[1]
                n = 2
                while os.path.exists(compressed):
                    compressed = "%s/%d_%s" % (file_path, n, basename)
                    n += 1
                with open(filename, "rb") as infile, gzip.open(compressed, "wb") as outfile:
                    shutil.copyfileobj(infile, outfile)
                os.remove(filename)
                return compressed
            return filename
        except Exception as e:
            print(traceback.format_exc())
            return None

    @staticmethod
    def read_index(index_file):
        index = OrderedDict()
        with open(index_file, "r") as index_f:
132
133
134
            # Sample name without special chars:
            sample_name = re.sub('[^A-Za-z0-9_\-.]+', '', index_f.readline().strip("\n").replace(" ", "_"))
            for line in index_f:
135
136
137
138
139
140
141
142
143
                if line != "":
                    parts = line.strip("\n").split("\t")
                    name = parts[0]
                    lenght = int(parts[1])
                    to_reverse = parts[2] == "1" if len(parts) >= 3 else False
                    index[name] = {
                        "length": lenght,
                        "to_reverse": to_reverse
                    }
144
        return index, sample_name
145
146
147

    @staticmethod
    def get_mail_for_job(id_job):
148
        from dgenies.database import Job
Floreal Cabanettes's avatar
Floreal Cabanettes committed
149
150
151
        with Job.connect():
            j1 = Job.get(Job.id_job == id_job)
            return j1.email
152
153

    @staticmethod
Floreal Cabanettes's avatar
Floreal Cabanettes committed
154
155
    def send_fasta_ready(mailer, job_name, sample_name, compressed=False, path="fasta-query", status="success",
                         ext="fasta"):
156
        web_url = Functions.config.web_url
157
158
159
        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "mail_templates", "dl_fasta.html")) \
                as t_file:
            template = Template(t_file.read())
160
            message_html = template.render(job_name=job_name, status=status, url_base=web_url,
Floreal Cabanettes's avatar
Floreal Cabanettes committed
161
                                           sample_name=sample_name, compressed=compressed, path=path, ext=ext)
162
163
        message = "D-Genies\n\n" \
                  "Job %s - Download fasta\n\n" % job_name
164
        message += "Query fasta file for job %s (query: %s) is ready to download.\n" % (job_name, sample_name)
165
        message += "You can click on the link below to download it:\n\n"
166
        message += "%s/fasta-query/%s/%s" % (web_url, job_name, sample_name + ".fasta" + (".gz" if compressed else ""))
167
168
169
        mailer.send_mail([Functions.get_mail_for_job(job_name)], "Job %s - Download fasta" % job_name, message,
                         message_html)

170
171


172
    @staticmethod
173
    def sort_fasta(job_name, fasta_file, index_file, lock_file, compress=False, mailer=None, mode="webserver"):
174
        index, sample_name = Functions.read_index(index_file)
175
176
177
178
        is_compressed = fasta_file.endswith(".gz")
        if is_compressed:
            fasta_file = Functions.uncompress(fasta_file)
        fasta_file_o = fasta_file + ".sorted"
Floreal Cabanettes's avatar
Floreal Cabanettes committed
179
        seq = SeqIO.index(fasta_file, "fasta")
180
181
182
183
        with open(fasta_file_o, "w") as fasta_out:
            for name, props in index.items():
                sequence = seq[name]
                if props["to_reverse"]:
184
185
186
187
188
189
190
                    s_id = sequence.id
                    s_name = sequence.name
                    s_description = sequence.description
                    sequence = sequence.reverse_complement()
                    sequence.id = s_id
                    sequence.name = s_name
                    sequence.description = s_description
191
                SeqIO.write(sequence, fasta_out, "fasta")
Floreal Cabanettes's avatar
Floreal Cabanettes committed
192
        seq.close()
193
194
195
196
197
        if is_compressed:
            os.remove(fasta_file)
        if compress:
            Functions.compress(fasta_file_o)
        os.remove(lock_file)
198
        if mode == "webserver" and mailer is not None and not os.path.exists(lock_file + ".pending"):
199
200
201
            Functions.send_fasta_ready(mailer, job_name, sample_name, compress)

    @staticmethod
202
    def compress_and_send_mail(job_name, fasta_file, index_file, lock_file, compressed, mailer):
203
        Functions.compress(fasta_file)
204
        os.remove(lock_file)
205
206
        index, sample_name = Functions.read_index(index_file)
        Functions.send_fasta_ready(mailer, job_name, sample_name, compressed)
207

208
    @staticmethod
Floreal Cabanettes's avatar
Floreal Cabanettes committed
209
210
211
    def get_readable_size(size, nb_after_coma=1):
        print(size)
        units = ["b", "Kb", "Mb", "Gb"]
212
213
214
215
        i = 0
        while size >= 1024 and i < 3:
            size /= 1024.0
            i += 1
Floreal Cabanettes's avatar
Floreal Cabanettes committed
216
        return str("%." + str(nb_after_coma) + "f %s") % (size, units[i])
217

218
219
220
221
222
223
224
225
226
227
228
229
230
231
    @staticmethod
    def get_readable_time(seconds):
        time_r = "%d s" % seconds
        if seconds >= 60:
            minutes = seconds // 60
            seconds = seconds - (minutes * 60)
            time_r = "%d min %d s" % (minutes, seconds)
            if minutes >= 60:
                hours = minutes // 60
                minutes = minutes - (hours * 60)
                time_r = "%d h %d min %d s" % (hours, minutes, seconds)
        return time_r


232

233
234
    @staticmethod
    def get_gallery_items():
235
        from dgenies.database import Gallery
236
237
238
239
240
        items = []
        for item in Gallery.select():
            items.append({
                "name": item.name,
                "id_job": item.job.id_job,
241
242
243
244
                "picture": item.picture,
                "query": item.query,
                "target": item.target,
                "mem_peak": Functions.get_readable_size(item.job.mem_peak),
245
                "time_elapsed": Functions.get_readable_time(item.job.time_elapsed)
246
            })
247
248
249
        return items

    @staticmethod
250
    def is_in_gallery(id_job, mode="webserver"):
251
252
253
254
255
256
257
258
        if mode == "webserver":
            from dgenies.database import Gallery, Job
            from peewee import DoesNotExist
            try:
                return len(Gallery.select().where(Gallery.job == Job.get(id_job=id_job))) > 0
            except DoesNotExist:
                return False
        return False
259

260
261
262
263
264
265
266
267
268
269
270
271
272
    @staticmethod
    def _get_jobs_list():
        all_jobs = os.listdir(Functions.config.app_data)
        valid_jobs = []
        for job in all_jobs:
            job_path = os.path.join(Functions.config.app_data, job)
            if os.path.isfile(os.path.join(job_path, "map.paf")) and \
                    os.path.isfile(os.path.join(job_path, "target.idx")) and \
                    os.path.isfile(os.path.join(job_path, "query.idx")) and \
                    os.path.isfile(os.path.join(job_path, ".valid")):
                valid_jobs.append(job)
        return valid_jobs

273
274
275
276
    @staticmethod
    def get_list_all_jobs(mode="webserver"):
        if mode == "webserver":
            return []  # Don't give the list in webserver as it's multi-user
277
        all_jobs = Functions._get_jobs_list()
278
279
280
        if "gallery" in all_jobs:
            all_jobs.remove("gallery")
        return sorted(all_jobs, key=lambda x: x.lower())
281
282
283
284
285

    @staticmethod
    def query_fasta_file_exists(res_dir):
        fasta_file = os.path.join(res_dir, ".query")
        return os.path.exists(fasta_file) and os.path.isfile(fasta_file)