functions.py 9.66 KB
Newer Older
1
import os
Floreal Cabanettes's avatar
Floreal Cabanettes committed
2
3
import random
import string
4
import gzip
5
import shutil
Floreal Cabanettes's avatar
Floreal Cabanettes committed
6
import sys
7
import re
8
9
10
11
import traceback
from collections import OrderedDict
from Bio import SeqIO
from jinja2 import Template
12
from dgenies.config_reader import AppConfigReader
13

14
ALLOWED_EXTENSIONS = ['fa', 'fasta', 'fna', 'fa.gz', 'fasta.gz', 'fna.gz']
15

Floreal Cabanettes's avatar
Floreal Cabanettes committed
16

Floreal Cabanettes's avatar
Floreal Cabanettes committed
17
class Functions:
Floreal Cabanettes's avatar
Floreal Cabanettes committed
18

19
20
    config = AppConfigReader()

Floreal Cabanettes's avatar
Floreal Cabanettes committed
21
22
23
24
25
    @staticmethod
    def allowed_file(filename):
        return '.' in filename and \
               (filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS or ".".join(filename.rsplit('.', 2)[1:]).lower()
                in ALLOWED_EXTENSIONS)
Floreal Cabanettes's avatar
Floreal Cabanettes committed
26

Floreal Cabanettes's avatar
Floreal Cabanettes committed
27
28
29
30
31
32
33
34
    @staticmethod
    def random_string(s_len):
        """
        Generate a random string
        :param s_len: length of the string to generate
        :return: the random string
        """
        return ''.join([random.choice(string.ascii_letters + string.digits) for n in range(s_len)])
Floreal Cabanettes's avatar
Floreal Cabanettes committed
35

Floreal Cabanettes's avatar
Floreal Cabanettes committed
36
37
    @staticmethod
    def get_valid_uploaded_filename(filename, folder):
38
        file_query_s = os.path.join(folder, filename)
Floreal Cabanettes's avatar
Floreal Cabanettes committed
39
40
41
42
43
44
45
        i = 2
        filename_orig = filename
        while os.path.exists(file_query_s):
            filename = str(i) + "_" + filename_orig
            file_query_s = os.path.join(folder, filename)
            i += 1
        return filename
46

47
48
49
50
51
52
53
54
55
56
57
58
59
60
    @staticmethod
    def __get_do_sort(fasta, is_sorted):
        do_sort = False
        if is_sorted:
            do_sort = True
            if fasta.endswith(".sorted"):
                do_sort = False
        return do_sort

    @staticmethod
    def get_fasta_file(res_dir, type_f, is_sorted):
        fasta_file = None
        try:
            with open(os.path.join(res_dir, "." + type_f), "r") as save_name:
61
                fasta_file = save_name.readline().strip("\n")
62
        except IOError:
Floreal Cabanettes's avatar
Floreal Cabanettes committed
63
            print(res_dir + ": Unable to load saved name for " + type_f, file=sys.stderr)
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
            pass
        if fasta_file is not None and os.path.exists(fasta_file):
            fasta_file_uc = fasta_file
            if fasta_file.endswith(".gz"):
                fasta_file_uc = fasta_file[:-3]
            if is_sorted:
                sorted_fasta = fasta_file_uc + ".sorted"
                if os.path.exists(sorted_fasta):
                    fasta_file = sorted_fasta
                else:
                    sorted_fasta = fasta_file_uc + ".gz.sorted"
                    if os.path.exists(sorted_fasta):
                        fasta_file = sorted_fasta

        return fasta_file

    @staticmethod
    def uncompress(filename):
        try:
            uncompressed = filename.rsplit('.', 1)[0]
            parts = uncompressed.rsplit("/", 1)
            file_path = parts[0]
            basename = parts[1]
            n = 2
            while os.path.exists(uncompressed):
                uncompressed = "%s/%d_%s" % (file_path, n, basename)
                n += 1
            with open(filename, "rb") as infile, open(uncompressed, "wb") as outfile:
                outfile.write(gzip.decompress(infile.read()))
            return uncompressed
        except Exception as e:
            print(traceback.format_exc())
            return None

    @staticmethod
    def compress(filename):
        try:
            if not filename.endswith(".gz") and not filename.endswith(".gz.sorted"):
                compressed = filename + ".gz" if not filename.endswith(".sorted") else filename[:-7] + ".gz.sorted"
                parts = compressed.rsplit("/", 1)
                file_path = parts[0]
                basename = parts[1]
                n = 2
                while os.path.exists(compressed):
                    compressed = "%s/%d_%s" % (file_path, n, basename)
                    n += 1
                with open(filename, "rb") as infile, gzip.open(compressed, "wb") as outfile:
                    shutil.copyfileobj(infile, outfile)
                os.remove(filename)
                return compressed
            return filename
        except Exception as e:
            print(traceback.format_exc())
            return None

    @staticmethod
    def read_index(index_file):
        index = OrderedDict()
        with open(index_file, "r") as index_f:
123
124
125
            # Sample name without special chars:
            sample_name = re.sub('[^A-Za-z0-9_\-.]+', '', index_f.readline().strip("\n").replace(" ", "_"))
            for line in index_f:
126
127
128
129
130
131
132
133
134
                if line != "":
                    parts = line.strip("\n").split("\t")
                    name = parts[0]
                    lenght = int(parts[1])
                    to_reverse = parts[2] == "1" if len(parts) >= 3 else False
                    index[name] = {
                        "length": lenght,
                        "to_reverse": to_reverse
                    }
135
        return index, sample_name
136
137
138

    @staticmethod
    def get_mail_for_job(id_job):
139
        from dgenies.database import Job
Floreal Cabanettes's avatar
Floreal Cabanettes committed
140
141
142
        with Job.connect():
            j1 = Job.get(Job.id_job == id_job)
            return j1.email
143
144

    @staticmethod
Floreal Cabanettes's avatar
Floreal Cabanettes committed
145
146
    def send_fasta_ready(mailer, job_name, sample_name, compressed=False, path="fasta-query", status="success",
                         ext="fasta"):
147
        web_url = Functions.config.web_url
148
149
150
        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "mail_templates", "dl_fasta.html")) \
                as t_file:
            template = Template(t_file.read())
151
            message_html = template.render(job_name=job_name, status=status, url_base=web_url,
Floreal Cabanettes's avatar
Floreal Cabanettes committed
152
                                           sample_name=sample_name, compressed=compressed, path=path, ext=ext)
153
154
        message = "D-Genies\n\n" \
                  "Job %s - Download fasta\n\n" % job_name
155
        message += "Query fasta file for job %s (query: %s) is ready to download.\n" % (job_name, sample_name)
156
        message += "You can click on the link below to download it:\n\n"
157
        message += "%s/fasta-query/%s/%s" % (web_url, job_name, sample_name + ".fasta" + (".gz" if compressed else ""))
158
159
160
        mailer.send_mail([Functions.get_mail_for_job(job_name)], "Job %s - Download fasta" % job_name, message,
                         message_html)

161
162


163
    @staticmethod
164
    def sort_fasta(job_name, fasta_file, index_file, lock_file, compress=False, mailer=None, mode="webserver"):
165
        index, sample_name = Functions.read_index(index_file)
166
167
168
169
170
171
172
173
174
        is_compressed = fasta_file.endswith(".gz")
        if is_compressed:
            fasta_file = Functions.uncompress(fasta_file)
        seq = SeqIO.index(fasta_file, "fasta")
        fasta_file_o = fasta_file + ".sorted"
        with open(fasta_file_o, "w") as fasta_out:
            for name, props in index.items():
                sequence = seq[name]
                if props["to_reverse"]:
175
176
177
178
179
180
181
                    s_id = sequence.id
                    s_name = sequence.name
                    s_description = sequence.description
                    sequence = sequence.reverse_complement()
                    sequence.id = s_id
                    sequence.name = s_name
                    sequence.description = s_description
182
183
184
185
186
187
                SeqIO.write(sequence, fasta_out, "fasta")
        if is_compressed:
            os.remove(fasta_file)
        if compress:
            Functions.compress(fasta_file_o)
        os.remove(lock_file)
188
        if mode == "webserver" and mailer is not None and not os.path.exists(lock_file + ".pending"):
189
190
191
            Functions.send_fasta_ready(mailer, job_name, sample_name, compress)

    @staticmethod
192
    def compress_and_send_mail(job_name, fasta_file, index_file, lock_file, compressed, mailer):
193
        Functions.compress(fasta_file)
194
        os.remove(lock_file)
195
196
        index, sample_name = Functions.read_index(index_file)
        Functions.send_fasta_ready(mailer, job_name, sample_name, compressed)
197

198
    @staticmethod
Floreal Cabanettes's avatar
Floreal Cabanettes committed
199
200
201
    def get_readable_size(size, nb_after_coma=1):
        print(size)
        units = ["b", "Kb", "Mb", "Gb"]
202
203
204
205
        i = 0
        while size >= 1024 and i < 3:
            size /= 1024.0
            i += 1
Floreal Cabanettes's avatar
Floreal Cabanettes committed
206
        return str("%." + str(nb_after_coma) + "f %s") % (size, units[i])
207

208
209
210
211
212
213
214
215
216
217
218
219
220
221
    @staticmethod
    def get_readable_time(seconds):
        time_r = "%d s" % seconds
        if seconds >= 60:
            minutes = seconds // 60
            seconds = seconds - (minutes * 60)
            time_r = "%d min %d s" % (minutes, seconds)
            if minutes >= 60:
                hours = minutes // 60
                minutes = minutes - (hours * 60)
                time_r = "%d h %d min %d s" % (hours, minutes, seconds)
        return time_r


222

223
224
    @staticmethod
    def get_gallery_items():
225
        from dgenies.database import Gallery
226
227
228
229
230
        items = []
        for item in Gallery.select():
            items.append({
                "name": item.name,
                "id_job": item.job.id_job,
231
232
233
234
                "picture": item.picture,
                "query": item.query,
                "target": item.target,
                "mem_peak": Functions.get_readable_size(item.job.mem_peak),
235
                "time_elapsed": Functions.get_readable_time(item.job.time_elapsed)
236
            })
237
238
239
        return items

    @staticmethod
240
    def is_in_gallery(id_job, mode="webserver"):
241
242
243
244
245
246
247
248
        if mode == "webserver":
            from dgenies.database import Gallery, Job
            from peewee import DoesNotExist
            try:
                return len(Gallery.select().where(Gallery.job == Job.get(id_job=id_job))) > 0
            except DoesNotExist:
                return False
        return False
249
250
251
252
253
254
255
256
257

    @staticmethod
    def get_list_all_jobs(mode="webserver"):
        if mode == "webserver":
            return []  # Don't give the list in webserver as it's multi-user
        all_jobs = os.listdir(Functions.config.app_data)
        if "gallery" in all_jobs:
            all_jobs.remove("gallery")
        return sorted(all_jobs, key=lambda x: x.lower())