analysis.py 34.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#
# Copyright (C) 2009 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

__author__ = 'Plateforme bioinformatique Midi Pyrenees'
__copyright__ = 'Copyright (C) 2009 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.0'
22
__email__ = 'support.bioinfo.genotoul@inra.fr'
23
24
__status__ = 'beta'

Jerome Mariette's avatar
Jerome Mariette committed
25
26
27
28
29
30
31
32
33
import os
import re
import tempfile
import uuid
import sys
import logging
import pickle
import datetime
import time
Jerome Mariette's avatar
Jerome Mariette committed
34
import shutil
35
36
from subprocess import call
from shutil import copyfile, rmtree
Penom Nom's avatar
Penom Nom committed
37
from configparser import ConfigParser, RawConfigParser
38

Jerome Mariette's avatar
Jerome Mariette committed
39
from ng6.run import Run
40
41
from ng6.config_reader import NG6ConfigReader
from ng6.t3MySQLdb import t3MySQLdb
Jerome Mariette's avatar
Jerome Mariette committed
42
from ng6.utils import Utils
Jerome Mariette's avatar
Jerome Mariette committed
43

Jerome Mariette's avatar
Jerome Mariette committed
44
from jflow.component import Component
Jerome Mariette's avatar
Jerome Mariette committed
45
from weaver.function import PythonFunction
46
47


48
def add_analysis(parent_id, analysis_cfg, *input_files):
Jerome Mariette's avatar
Jerome Mariette committed
49
    from ng6.analysis import Analysis
Jerome Mariette's avatar
Jerome Mariette committed
50
51
52
    from ng6.project import Project
    from ng6.run import Run
    import pickle
53
54
    import logging
    logging.getLogger("Analysis").debug("Start. Imports went good.")
Gerald Salin's avatar
Gerald Salin committed
55
    logging.getLogger("Analysis").debug("Start. working for analysis " + analysis_cfg)
56

57
58
59
60
    # get inputs from parameters
    analysis_serialized_path = input_files[0]
    try: parent_analysis_cfg = input_files[1]
    except: parent_analysis_cfg = None
61
    logging.getLogger("Analysis").debug("Start. parent_analysis_cfg is initialised.")
62

Jerome Mariette's avatar
Jerome Mariette committed
63
64
65
66
    # load the analysis object
    analysis_dump = open(analysis_serialized_path, "rb")
    analysis = pickle.load(analysis_dump)
    analysis_dump.close()
67
    logging.getLogger("Analysis").debug("Dump. analysis_dump is closed and analysis is loaded.")
Jerome Mariette's avatar
Jerome Mariette committed
68

69
    # add the parent information
70
    logging.getLogger("Analysis").debug("parent_analysis_cfg. Starting")
71
72
    if parent_analysis_cfg:
        if os.path.isfile(parent_analysis_cfg):
73
            logging.getLogger("Analysis").debug("parent_analysis_cfg. Analysis.get_from_file(parent_analysis_cfg) is about to start")
74
            parent = Analysis.get_from_file(parent_analysis_cfg)
75
            logging.getLogger("Analysis").debug("parent_analysis_cfg. Analysis.get_from_file(parent_analysis_cfg) is done")
Jerome Mariette's avatar
Jerome Mariette committed
76
            analysis.parent = parent
77
    elif parent_id != 'none' :
78
        logging.getLogger("Analysis").debug("parent_analysis_cfg. Analysis.get_from_file(parent_analysis_cfg) is about to start")
79
80
        parent = Analysis.get_from_id(int(parent_id))
        analysis.parent = parent
81
        logging.getLogger("Analysis").debug("parent_analysis_cfg. Analysis.get_from_file(parent_analysis_cfg) is done")
Jerome Mariette's avatar
Jerome Mariette committed
82

Jerome Mariette's avatar
Jerome Mariette committed
83
    # process the parsing of the analysis
84
    logging.getLogger("Analysis").debug("analysis.post_process. Starting")
Jerome Mariette's avatar
Jerome Mariette committed
85
    analysis.post_process()
86
    logging.getLogger("Analysis").debug("analysis.post_process. Done")
87

Jerome Mariette's avatar
Jerome Mariette committed
88
    # add the analysis to the right run/project object
89
90
91
92
93
94
    logging.getLogger("Analysis").debug("analysis.project. Starting add_analysis(analysis)")
    if analysis.project:
        logging.getLogger("Analysis").debug("analysis.project. Starting analysis.project.add_analysis(analysis)")
        analysis.project.add_analysis(analysis)
        logging.getLogger("Analysis").debug("analysis.project. Finishing analysis.project.add_analysis(analysis)")
    elif analysis.run:
95
        logging.getLogger("Analysis").debug("analysis.run. Starting analysis.run.add_analysis(analysis)")
96
        analysis.run.add_analysis(analysis)
97
        logging.getLogger("Analysis").debug("analysis.run. Finishing analysis.run.add_analysis(analysis)")
98
    logging.getLogger("Analysis").debug("Config File. Starting the writing of config file")
Jerome Mariette's avatar
Jerome Mariette committed
99
    analysis.write_config_file()
100
    logging.getLogger("Analysis").debug("Config File. Is written")
Jerome Mariette's avatar
Jerome Mariette committed
101

Jerome Mariette's avatar
Jerome Mariette committed
102
class Analysis (Component):
103
    """
Jerome Mariette's avatar
Jerome Mariette committed
104
    Class Analysis: Define an nG6 Analysis
105
106
107
    """

    # The directories structure into ng6
108
    DIRECTORIES_STRUCTURE = "analyze"
Jerome Mariette's avatar
Jerome Mariette committed
109
    ANALYSIS_CONFIG_FILE_NAME = "analysis.cfg"
110

111
    def __init__(self, name="", description="", software="",
112
                 options="", version="", id=None, date=None, parent_analysis=None, space_id="default"):
113
        """
Jerome Mariette's avatar
Jerome Mariette committed
114
        Build an Analysis object
Jerome Mariette's avatar
Jerome Mariette committed
115
116
          @param name            : the analysis name
          @param description     : the analysis description
117
          @param admin_login     : the admin login
Jerome Mariette's avatar
Jerome Mariette committed
118
          @param software        : the software used to run the analysis
119
          @param options         : the options used to run the software
Jerome Mariette's avatar
Jerome Mariette committed
120
121
          @param id              : the analysis id if not defined
          @param parent_analysis : the parent analysis
122
        """
123
        Component.__init__(self)
124
125
        self.name = name
        self.description = description
126
        self.admin_login = None
127
128
        self.software = software
        self.options = options
129
130
        self.id = id
        self.version = version
Jerome Mariette's avatar
Jerome Mariette committed
131
        self.parent = parent_analysis
132
        self.results = {}
133
134
        self.run = None
        self.project = None
135
136
        self.is_editable = False
        self.space_id = space_id
Celine Noirot's avatar
Celine Noirot committed
137
        self.retention_date = None
138
139
        self.date = date
        
140
141
        # Set the temp folder to the ng6 temp folder
        ng6conf = NG6ConfigReader()
142
        logging.getLogger("Analysis.__init__").debug("ng6conf est chargé")
143
        tempfile.tempdir = ng6conf.get_tmp_directory()
144
        logging.getLogger("Analysis.__init__").debug("tempfile.tempdir = " + tempfile.tempdir)
145
146
        if not os.path.isdir(tempfile.tempdir):
            os.makedirs(tempfile.tempdir, 0o751)
147
148
149
        logging.getLogger("Analysis.__init__").debug("self.space_id = " + self.space_id)
        
        
Jerome Mariette's avatar
Jerome Mariette committed
150
        if id != None : # If not a new analysis
151
            logging.getLogger("Analysis.__init__").debug("Connexion à la BD")
152
            t3mysql = t3MySQLdb()
153
            logging.getLogger("Analysis.__init__").debug("Connexion effectuée")
154
155
            self.run = Run.get_from_id(self.__get_run_id())
            from ng6.project import Project
156
157
158
159
            if self.run != None :
                self.project = Project.get_from_run_id(self.__get_run_id())
            else :
                self.project = Project.get_from_id(self.__get_project_id())
160
            self.space_id=self.project.space_id
Jerome Mariette's avatar
Jerome Mariette committed
161
            self.directory = t3mysql.select_analysis_directory(id)
162
            logging.getLogger("Analysis.__init__").debug("Building analysis with id=" + str(id) + " [" + str(self) + "]")
163
        else :
Jerome Mariette's avatar
Jerome Mariette committed
164
            self.version = self.get_version()
Penom Nom's avatar
Penom Nom committed
165
166
            if isinstance(self.version, bytes):
                self.version = self.version.decode()
167
            logging.getLogger("Analysis.__init__").debug("Building brand new analysis [" + str(self) + "]")
168

Jerome Mariette's avatar
Jerome Mariette committed
169
    def define_analysis(self):
170
        """
Jerome Mariette's avatar
Jerome Mariette committed
171
172
173
        Define all analysis attributs, has to be implemented by subclasses
        """
        raise NotImplementedError
174

Jerome Mariette's avatar
Jerome Mariette committed
175
    def post_process(self):
176
        """
Jerome Mariette's avatar
Jerome Mariette committed
177
        Process the analysis, has to be implemented in the sub class
178
179
        """
        raise NotImplementedError
180

Jerome Mariette's avatar
Jerome Mariette committed
181
    def execute(self):
182
183
184
185
186
187
188
189
190
191
        ng6conf = NG6ConfigReader()
        directory_name = uuid.uuid4().hex[:9]
        while True:
            save_dir = os.path.join(ng6conf.get_save_directory(), ng6conf.get_space_directory(self.space_id), self.DIRECTORIES_STRUCTURE, directory_name)
            logging.getLogger("Analysis.__init__").debug("Building analysis save_dir=" + save_dir)
            work_dir = os.path.join(ng6conf.get_work_directory(), ng6conf.get_space_directory(self.space_id), self.DIRECTORIES_STRUCTURE, directory_name)
            logging.getLogger("Analysis.__init__").debug("Building analysis work_dir=" + save_dir)
            if not os.path.isdir(save_dir) and not os.path.isdir(work_dir):
                break
            directory_name = uuid.uuid4().hex[:9]
192
        self.directory = "/" + os.path.join(ng6conf.get_space_directory(self.space_id), self.DIRECTORIES_STRUCTURE, directory_name)
Celine Noirot's avatar
Celine Noirot committed
193
        self.retention_date = ng6conf.get_retention_date(self.space_id)
194

Jerome Mariette's avatar
Jerome Mariette committed
195
196
        # first create the output directory
        if not os.path.isdir(self.output_directory):
Penom Nom's avatar
Penom Nom committed
197
            os.makedirs(self.output_directory, 0o751)
Jerome Mariette's avatar
Jerome Mariette committed
198
199
        # and analysis output
        if not os.path.isdir(self.__get_work_directory()):
Penom Nom's avatar
Penom Nom committed
200
            os.makedirs(self.__get_work_directory(), 0o751)
Jerome Mariette's avatar
Jerome Mariette committed
201
202

        # then add analysis information
Jerome Mariette's avatar
Jerome Mariette committed
203
        self.define_analysis()
204

205
        inputs = []
Jerome Mariette's avatar
Jerome Mariette committed
206
207
208
209
210
        # serialized the object
        analysis_dump_path = self.get_temporary_file(".dump")
        analysis_dump = open(analysis_dump_path, "wb")
        pickle.dump(self, analysis_dump)
        analysis_dump.close()
211
        inputs.append(analysis_dump_path)
212

213
        parent_id = 'none'
Jerome Mariette's avatar
Jerome Mariette committed
214
215
216
217
218
219
220
221
222
        wait_for_files = []
        for attr in self.__dict__:
            # TODO if no Output object raise error
            if self.__getattribute__(attr).__class__.__name__ == "OutputFile" or \
                self.__getattribute__(attr).__class__.__name__ == "OutputDirectory":
                wait_for_files.append(self.__getattribute__(attr))
            elif self.__getattribute__(attr).__class__.__name__ == "OutputFileList":
                wait_for_files.extend(self.__getattribute__(attr))
        if self.parent:
223
224
225
226
            if self.parent.id:
                parent_id = self.parent.id
            else :
                inputs.append(self.parent.get_config_file_path())
Jerome Mariette's avatar
Jerome Mariette committed
227
228
229
        # then run the component
        self.process()
        # add the add_analysis command lines to the make
230
231
        add = PythonFunction(add_analysis, cmd_format="{EXE} {ARG} {OUT} {IN}")
        add(includes=wait_for_files, outputs=self.get_config_file_path(), inputs=inputs, arguments=parent_id)
232

Jerome Mariette's avatar
Jerome Mariette committed
233
234
    def __str__(self):
        """
235
        Return a full description of the analysis
Jerome Mariette's avatar
Jerome Mariette committed
236
        """
237
238
239
        return "id="+str(self.id)+";name="+str(self.name)+";description="+str(self.description)+";software="+\
            str(self.software)+";version="+str(self.version)+";options="+str(self.options)+\
            ";space_id="+str(self.space_id)+";project="+str(self.project)+";run="+str(self.run)
240
241
242

    def sync(self):
        """
Jerome Mariette's avatar
Jerome Mariette committed
243
        Synchronize data related to the Analysis between temporary folder and long term storage folder.
244
        """
Jerome Mariette's avatar
Jerome Mariette committed
245
        if os.path.isdir(self.__get_work_directory()):
246
            if len(os.listdir(self.__get_work_directory())):
Jerome Mariette's avatar
Jerome Mariette committed
247
248
                logging.getLogger("Analysis.sync").debug("Synchronizing analysis id=" + str(self.id) + " from " + self.__get_work_directory() + " to " + self.__get_save_directory())
                try :
249
250
                    if not os.path.isdir(self.__get_save_directory()) :
                        # First make the save directory
Penom Nom's avatar
Penom Nom committed
251
                        os.makedirs(self.__get_save_directory(), 0o751)
Jerome Mariette's avatar
Jerome Mariette committed
252
253
254
255
256
257
258
259
260
261
                    if os.path.exists(self.__get_work_directory()) and self.__get_work_directory() != self.__get_save_directory():
                        cmd = "rsync -avh --remove-sent-files "+ self.__get_work_directory() + "/ " + self.__get_save_directory()
                        logging.getLogger("Analysis.sync").debug(cmd)
                        retcode = call(["rsync", "-avh", "--remove-sent-files",self.__get_work_directory() + "/", self.__get_save_directory()], shell=False)
                        if retcode < 0:
                            logging.getLogger("Analysis.sync").error("Error when synchronizing analysis id=" + str(self.id) + "(retcode=" + str(retcode) + ")")
                        else:
                            try: os.rmdir(self.__get_work_directory())
                            except: pass
                            logging.getLogger("Analysis.sync").debug("Synchronization done for analysis id=" + str(self.id) + "(retcode=" + str(retcode) + ")")
Jerome Mariette's avatar
Jerome Mariette committed
262
                    else:
Jerome Mariette's avatar
Jerome Mariette committed
263
264
265
266
                        logging.getLogger("Analysis.sync").info("The synchronisation has not been performed, either because " + self.__get_work_directory() + " does not exists or because the source and destination folders are the same.")
                    # update the storage size
                    t3mysql = t3MySQLdb()
                    t3mysql.update_analysis_field(self.id, "storage_size", Utils.get_storage_size(self.__get_save_directory()))
Penom Nom's avatar
Penom Nom committed
267
                except Exception as err:
Jerome Mariette's avatar
Jerome Mariette committed
268
                    raise Exception(str(err))
269

270
271
272
273
274
275
276
277
278
    def list_or_copy_analysis_files(self, outputdir, pattern):
        """
        Return list of file corresponding to the pattern and copy unzipped files into output dir if it's zipped
          @param file  : the pattern of files to return
          @return      : list of file.
        """
        list = []
        if outputdir == "":
            outputdir = tempfile.mkdtemp()
279

280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
        for file in os.listdir(self.__get_save_directory()) :
            # If it fits criteria
            if pattern != "":
                if file.endswith(pattern):
                    list.append(os.path.join(self.__get_save_directory(), file))
                elif file.endswith(pattern + ".gz") or file.endswith(pattern + ".zip"):
                    list.append(Utils.gunzip(os.path.join(self.__get_save_directory(), file), outputdir))
                elif file.endswith(pattern + ".bz2") or file.endswith(pattern + ".bz"):
                    list.append(Utils.uncompress_bz2(os.path.join(self.__get_save_directory(), file), outputdir))
                elif file.endswith("tar.gz") or file.endswith("tar.zip"):
                    list.extend(Utils.untar_files(os.path.join(self.__get_save_directory(), file), pattern, outputdir))
                elif file.endswith("tar.bz2") or file.endswith("tar.bz"):
                    list.extend(Utils.untar_files(os.path.join(self.__get_save_directory(), file), pattern, outputdir))
            else:
                list.append(file)
        return list


    def get_run_config_file(self):
        """
Jerome Mariette's avatar
Jerome Mariette committed
300
301
        Return the file path to the config file if the analysis if link to a run
          @return      : the file run file if it's a run analysis.
302
303
304
        """
        try:
            t3mysql = t3MySQLdb()
Jerome Mariette's avatar
Jerome Mariette committed
305
            run_id = t3mysql.select_analysis_run_id(self.id)
306
307
308
309
310
            my_run = Run.get_from_id(run_id)
            return my_run.get_config_file()
        except :
            return None

Penom Nom's avatar
Penom Nom committed
311
312
313
314
315
    def get_template(self):
        """
        Return the name of the template used for the visualisation.
        """
        return self.__class__.__name__
316

317
318
319
320
321
    def save(self):
        """
        update the database info
        """
        # First add the run into the database
322
        logging.getLogger("Analysis.save").debug("Connexion try")
323
        t3mysql = t3MySQLdb()
324
        logging.getLogger("Analysis.save").debug("Connexion done")
Jerome Mariette's avatar
Jerome Mariette committed
325
326
        if self.parent: parent_uid = self.parent.id
        else: parent_uid = 0
327
        logging.getLogger("Analysis.save").debug("Doing the add analysis")
Penom Nom's avatar
Penom Nom committed
328
        self.id = t3mysql.add_analysis(self.get_template(), self.name, self.description, self.admin_login,
329
                                      datetime.date.today(), self.directory, self.software, self.version,
Celine Noirot's avatar
Celine Noirot committed
330
331
                                      self.options, self.retention_date, self.retention_date, 
                                      self.is_editable, parent_uid=parent_uid)
332
        logging.getLogger("Analysis.save").debug("add_analysis done")
Jerome Mariette's avatar
Jerome Mariette committed
333
        # Then add all results link to this analysis
334
335
336
337
338
339
340
        for file in self.results:
            for result in self.results[file]:
                t3mysql.add_result(self.id, file, result[0], result[1], result[2])

        # Finaly return it's id
        return self.id

Jerome Mariette's avatar
Jerome Mariette committed
341
342
343
344
    def get_config_file_path(self):
        return os.path.join(self.output_directory, self.ANALYSIS_CONFIG_FILE_NAME)

    def write_config_file(self):
345
        """
Jerome Mariette's avatar
Jerome Mariette committed
346
        Write an analysis config file
347
348
349
          @return : path the the config file
        """
        t3mysql = t3MySQLdb()
350

351
        # First select run
Jerome Mariette's avatar
Jerome Mariette committed
352
        [name, date, description, software, options, version] = t3mysql.select_analysis(self.id)
353
354
355
356
357
358
359
360
361
        config = RawConfigParser()
        config.add_section("Analysis")
        config.set("Analysis", "analysis_id", self.id)
        config.set("Analysis", "name", name)
        config.set("Analysis", "date", date)
        config.set("Analysis", "description", description)
        config.set("Analysis", "software", software)
        config.set("Analysis", "options", options)
        config.set("Analysis", "version", version)
362

Jerome Mariette's avatar
Jerome Mariette committed
363
        config_path = self.get_config_file_path()
364
365
366
367
368
369
370
371
372
373
374
375
        config.write(open(config_path,'w'))
        return config_path


    def _add_result_element(self, file, result_key, result_value, result_group="default"):
        """
        add the result row
          @param file         : the file name the result is linked to
          @param result_key   : the result key
          @param result_value : the result value associated to the key
          @param result_group : the result group it belongs to
        """
Penom Nom's avatar
Penom Nom committed
376
        if file in self.results:
377
378
379
380
381
382
            self.results[file].append([result_key, result_value, result_group])
        else :
            self.results[file] = [[result_key, result_value, result_group]]


    def _create_and_archive(self, files, archive_name=None, prefix="dir"):
383
        logging.getLogger("Analysis").debug("_create_and_archive entering")
384
385
        """
        return the web path to the archive files
Penom Nom's avatar
Penom Nom committed
386
        If there are samples, datas will be organised by samples
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
          @param files        : table of file
          @param archive_name : the archive name to use, if not set will be
                                the same as link_value
          @param prefix       : in case identique file names put them
                                in directories prefixed by this value
          @return             : the web path to the archive
        """
        # If archive name not set uses the link_value instead
        if archive_name == None:
            archive_name = "archive"
        if archive_name.endswith(".gz"):
            archive_name = os.path.splitext(archive_name)[0]

        file_basenames = []
        for file in files:
            (fhead, ftail)= os.path.split(file)
            file_basenames.append(ftail)

        #If some files have the same name
        if len(file_basenames) != len(set(file_basenames)) :

Penom Nom's avatar
Penom Nom committed
408
            try : # If there are samples
409

410
                if self.run :
Penom Nom's avatar
Penom Nom committed
411
                    samples = self.run.get_samples()
412
413
414
                else :
                    run_id = self.__get_run_id()
                    my_run = Run.get_from_id(run_id)
Penom Nom's avatar
Penom Nom committed
415
                    samples = my_run.get_samples()
Gerald Salin's avatar
Gerald Salin committed
416
                logging.getLogger("Analysis").debug("_create_and_archive with samples")
417
418
419
420
421
422
423
                gfiles = {}
                ungrouped_files = []
                what_left = []
                for file in files :
                    ungrouped_files.append(file)
                    what_left.append(file)

Penom Nom's avatar
Penom Nom committed
424
                # First group files if they have the sample name in their path
425
426
427
428
                for file in files :

                    # In case of multiple description take the longuest
                    best_description = []
Penom Nom's avatar
Penom Nom committed
429
430
431
432
433
434
                    for sample in samples :
                        spname = sample.name
                        spname_regex = re.compile(".*" + spname + ".*")
                        spr = spname_regex.match(file)
                        if spr :
                            best_description.append(spname)
435
436
437
438
439
440
441
442
443
444

                    max_len = 0
                    final_description = None
                    if len(best_description) > 0:
                        for bdescription in best_description:
                            if len(bdescription) > max_len:
                                max_len = len(bdescription)
                                final_description = bdescription

                    if final_description != None :
Penom Nom's avatar
Penom Nom committed
445
                        if final_description in gfiles:
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
                            gfiles[final_description].append(file)
                        else :
                            gfiles[final_description] = [file]
                        if file in ungrouped_files:
                            ungrouped_files.remove(file)
                            what_left.remove(file)

                # For files which don't have the mid in their path, try to group them considering there path
                to_add = {}
                for file in ungrouped_files:
                    (fhead, ftail)= os.path.split(file)
                    for mid in gfiles.keys() : # For each group
                        for grouped_file in gfiles[mid]: # for each file of this group
                            (ghead, gtail)= os.path.split(grouped_file)
                            if fhead == ghead: # If this group file
                                if file in what_left :
                                    what_left.remove(file)
Penom Nom's avatar
Penom Nom committed
463
                                if mid in to_add :
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
                                    to_add[mid].append(file)
                                else :
                                    to_add[mid] = [file]

                for mid in to_add.keys() :
                     gfiles[mid].extend(to_add[mid])

                # Then create the archive
                tmp_dir = tempfile.NamedTemporaryFile().name
                os.makedirs(tmp_dir)
                for mid in gfiles.keys() :
                    mid_dir = os.path.join(tmp_dir, mid)
                    os.makedirs(mid_dir)
                    for file in gfiles[mid]:
                        copyfile(file, os.path.join(mid_dir, os.path.basename(file)))

                if len(what_left) > 0:
                    if len(gfiles.keys()) > 0 :
                        other_dir = os.path.join(tmp_dir, "others")
                        os.makedirs(other_dir)
                        for file in what_left:
                            copyfile(file, os.path.join(other_dir, os.path.basename(file)))
                    else :
                        for file in what_left:
                            copyfile(file, os.path.join(tmp_dir, os.path.basename(file)))

Gerald Salin's avatar
Gerald Salin committed
490
                logging.getLogger("Analysis").debug("_create_and_archive before tarf")
491
                tarf = Utils.tar_dir(tmp_dir, os.path.join(self.__get_work_directory(), archive_name))
Gerald Salin's avatar
Gerald Salin committed
492
                logging.getLogger("Analysis").debug("_create_and_archive before targzf")
493
494
                targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False)
                # Then delete temporary files
Gerald Salin's avatar
Gerald Salin committed
495
                logging.getLogger("Analysis").debug("_create_and_archive before os.remove(tarf)")
496
                os.remove(tarf)
Gerald Salin's avatar
Gerald Salin committed
497
                logging.getLogger("Analysis").debug("_create_and_archive before rmtree(tmp_dir)")
498
                rmtree(tmp_dir)
Gerald Salin's avatar
Gerald Salin committed
499
                logging.getLogger("Analysis").debug("_create_and_archive before return " + os.path.join(self.directory, os.path.basename(targzf)))
500
501
502
                return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf))

            except :
Gerald Salin's avatar
Gerald Salin committed
503
                logging.getLogger("Analysis").debug("_create_and_archive in execpt, without samples?")
504
505
506
507
                gfiles = {}
                # Group files by folders
                for file in files:
                    (fhead, ftail)= os.path.split(file)
Penom Nom's avatar
Penom Nom committed
508
                    if fhead in gfiles:
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
                        gfiles[fhead].append(file)
                    else :
                        gfiles[fhead] = [file]
                # Then create the archive
                tmp_dir = tempfile.NamedTemporaryFile().name
                os.makedirs(tmp_dir)
                for i, dir_name in enumerate(gfiles.keys()) :
                    dir_index = prefix
                    for j in range(3 - len(str(i))):
                        dir_index += "0"
                    dir_index += str(i+1)
                    dir = os.path.join(tmp_dir, dir_index)
                    os.makedirs(dir)
                    for file in gfiles[dir_name]:
                        copyfile(file, os.path.join(dir, os.path.basename(file)))
Gerald Salin's avatar
Gerald Salin committed
524
                logging.getLogger("Analysis").debug("_create_and_archive before tarf")
525
526

                tarf = Utils.tar_dir(tmp_dir, os.path.join(self.__get_work_directory(), archive_name))
Gerald Salin's avatar
Gerald Salin committed
527
                logging.getLogger("Analysis").debug("_create_and_archive before targzf")
528
529
                targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False)
                # Then delete temporary files
Gerald Salin's avatar
Gerald Salin committed
530
                logging.getLogger("Analysis").debug("_create_and_archive before os.remove(tarf)")
531
                os.remove(tarf)
Gerald Salin's avatar
Gerald Salin committed
532
                logging.getLogger("Analysis").debug("_create_and_archive before rmtree(tmp_dir)")
533
                rmtree(tmp_dir)
Gerald Salin's avatar
Gerald Salin committed
534
                logging.getLogger("Analysis").debug("_create_and_archive before return " + os.path.join(self.directory, os.path.basename(targzf)))
535
536
537
                return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf))

        else :
Gerald Salin's avatar
Gerald Salin committed
538
539
            logging.getLogger("Analysis").debug("_create_and_archive, length differs")
            logging.getLogger("Analysis").debug("_create_and_archive before tarf")
540
            tarf = Utils.tar_files(files, os.path.join(self.__get_work_directory(), archive_name))
Gerald Salin's avatar
Gerald Salin committed
541
            logging.getLogger("Analysis").debug("_create_and_archive before targzf")
542
543
            targzf = Utils.gzip(tarf, self.__get_work_directory(), delete=False)
            # Then delete temporary files
Gerald Salin's avatar
Gerald Salin committed
544
            logging.getLogger("Analysis").debug("_create_and_archive before os.remove(tarf)")
545
            os.remove(tarf)
Gerald Salin's avatar
Gerald Salin committed
546
            logging.getLogger("Analysis").debug("_create_and_archive before return " + os.path.join(self.directory, os.path.basename(targzf)))
547
548
549
550
551
552
553
554
555
556
            return 'fileadmin' + os.path.join(self.directory, os.path.basename(targzf))


    def _archive_files(self, files, mode, archive_name="ng6_archive.tar", delete=False):
        """
        Copy, archive or compress the files list to the current analysis. Files can then be downloaded and gave back by
        the script ng6run2ergatis.
          @param files: the files to archive
          @param mode: can be none, gz, bz2, tar.gz and tar.bz2
          @param archive_name: the archive name if tar is requested
557
          @param delete: delete files
558
        """
559

560
561
562
563
564
        # First handle if only one file
        if len(files) == 1 and mode == "tar.gz":
            mode = "gz"
        elif len(files) == 1 and mode == "tar.bz2":
            mode = "bz2"
565

566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
        if mode == "none":
            for file in files:
                if os.path.isfile(file):
                    copyfile(file, os.path.join(self.__get_work_directory(), os.path.basename(file)))
                # Finaly try to delete the original file if asked to do so
                try:
                    if delete:
                        os.remove(file)
                except:
                    pass

        elif mode == "gz":
            for file in files:
                Utils.gzip(file, self.__get_work_directory(), delete)

        elif mode == "bz2":
            for file in files:
                Utils.bz2(file, self.__get_work_directory(), delete)

        elif mode == "tar.gz":
            Utils.tar_files(files, os.path.join(self.__get_work_directory(), archive_name), delete)
            Utils.gzip(os.path.join(self.__get_work_directory(), archive_name), self.__get_work_directory(), True)

        elif mode == "tar.bz2":
            Utils.tar_files(files, os.path.join(self.__get_work_directory(), archive_name), delete)
            Utils.bz2(os.path.join(self.__get_work_directory(), archive_name), self.__get_work_directory(), True)

Jerome Mariette's avatar
Jerome Mariette committed
593
594
595
596
597
598
599
600
601
602
603
    def _save_directory(self, directory, directory_name=None):
        """
        add a directory in the analysis directory
          @param directory      : the path to the directory to link
          @param directory_name : the directory name to use
        """
        # First copy the file into the analysis directory
        if os.path.isdir(directory) :
            if directory_name == None:
                directory_name = os.path.basename(directory)
            shutil.copytree(directory, os.path.join(self.__get_work_directory(), directory_name))
604
605
606

    def _save_file(self, file, file_name=None, gzip=False):
        """
Jerome Mariette's avatar
Jerome Mariette committed
607
        add a file in the analysis directory, and return its web path
608
609
610
611
612
          @param file         : the path to the file to link
          @param file_name    : the file name to use to store the image
          @param gzip         : if True the file will be gziped
          @return             : the web path to the file
        """
613

614
        if not os.path.isdir(self.__get_work_directory()):
Penom Nom's avatar
Penom Nom committed
615
            os.makedirs(self.__get_work_directory(), 0o751)
Jerome Mariette's avatar
Jerome Mariette committed
616
        # First copy the file into the analysis directory
617
618
619
        if os.path.isfile(file) :
            if file_name == None:
                file_name = os.path.basename(file)
Claire Kuchly's avatar
Claire Kuchly committed
620
            if gzip is True :
621
622
623
624
                file = Utils.gzip(file, self.__get_work_directory(), delete=False)
                file_name = os.path.basename(file)
            else :
                copyfile(file, os.path.join(self.__get_work_directory(), file_name))
625
            return self.get_full_filepath(file_name, 'fileadmin')
626

Penom Nom's avatar
Penom Nom committed
627
628
629
630
631
632
633
634
635
636
637
    def _save_files(self, files, gzip=False):
        """
        add a file in the analysis directory, and return its web path
          @param file         : the path to the file to link
          @param gzip         : if True the file will be gziped
          @return             : the web path to the file
        """
        file_path = []
        for file in files:
            file_path.append(self._save_file(file, gzip=gzip))
        return file_path
638
639
640

    def __get_run_id(self):
        """
Jerome Mariette's avatar
Jerome Mariette committed
641
642
        Return the run id the analysis belongs to
          @return run_id  : the run id the analysis belongs to
643
644
        """
        t3mysql = t3MySQLdb()
Jerome Mariette's avatar
Jerome Mariette committed
645
        return t3mysql.select_analysis_run_id(self.id)
646

647
648
649
650
651
652
653
    def __get_project_id(self):
        """
        Return the project id the analysis belongs to
          @return project_id  : the project id the analysis belongs to
        """
        t3mysql = t3MySQLdb()
        return t3mysql.select_analysis_project_id(self.id)
654
655
656

    def __get_save_directory(self):
        """
Jerome Mariette's avatar
Jerome Mariette committed
657
        Return the full path of the analysis directory into the save dir
658
659
        """
        ng6conf = NG6ConfigReader()
660
        return ng6conf.get_save_directory() + self.directory
661
662
663
664


    def __get_work_directory(self):
        """
Jerome Mariette's avatar
Jerome Mariette committed
665
        Return the full path of the analysis directory into the work dir
666
667
        """
        ng6conf = NG6ConfigReader()
668
        return ng6conf.get_work_directory() + self.directory
669
670
671
672
673


    @staticmethod
    def get_from_id (id):
        """
Jerome Mariette's avatar
Jerome Mariette committed
674
        Return an Analysis object specified by its id
Jerome Mariette's avatar
Jerome Mariette committed
675
          @param id : the analysis id
676
        """
Jerome Mariette's avatar
Jerome Mariette committed
677
        logging.getLogger("Analysis.get_from_id").debug("Loading the analysis from id " + str(id))
678
679
680
        #try:
        t3mysql = t3MySQLdb()
        [name, date, description, software, options, version] = t3mysql.select_analysis(id)
681
        my_analysis = Analysis(name, description, software, options, version, id, date)
682
683
684
685
686
687
688
        if my_analysis == None:
            logging.getLogger("Analysis.get_from_id").error("The analysis id=" + str(id) + " does not exists in the database.")
            raise Exception("The analysis id=" + str(id) + " does not exists in the database.\n")
        return my_analysis
        #except Exception as err:
        #    logging.getLogger("Analysis.qget_from_id").error("Error while loading the analysis from the database [" + str(err) + "]")
        #    raise Exception("Error while loading the analysis from the database\n" + str(err))
689
690
691
692
693
694
695
696
697
698
699
700


    @staticmethod
    def get_from_file (config_file):
        """
        Return an analysis object specified by its config file
          @param config_file : the analysis config file
        """
        try:
            reader = ConfigParser()
            reader.read(config_file)
            analysis_id = reader.get('Analysis', 'analysis_id')
Jerome Mariette's avatar
Jerome Mariette committed
701
            my_analysis = Analysis.get_from_id(analysis_id)
702
703
704
            return my_analysis
        except :
            pass
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721


    def get_creation_date(self):
        """
        Returns an analysis's crdate value
        """
        if self.id != None :
            t3mysql = t3MySQLdb()
            result = t3mysql.get_analysis_creation_date(str(self.id))
            return next(iter(result))
    
    def get_directory(self):
        """
        Returns an analysis's directory
        """
        if self.id != None :
            t3mysql = t3MySQLdb()
722
723
724
            return t3mysql.select_analysis_directory(str(self.id))
    
    def change_space (self, space_id ):
725
726
        import os
        from ng6.project import Project
727
728
729
730
731
732
733
734
        ng6conf = NG6ConfigReader()
        
        old_path = ng6conf.get_save_directory() + self.directory
        directory_name = os.path.split(old_path)[-1]
        new_relative_path = os.path.join(ng6conf.get_space_directory(space_id), self.DIRECTORIES_STRUCTURE, directory_name)
        
        new_absolute_path =  os.path.join(ng6conf.get_save_directory(), new_relative_path )
        
735
736
737
738
739
        #We create the /analyze directory if it's missing
        path_to_analyze_dir = os.path.join(ng6conf.get_save_directory(),ng6conf.get_space_directory(space_id),self.DIRECTORIES_STRUCTURE)
        if not os.path.isdir(path_to_analyze_dir):
            os.mkdir(path_to_analyze_dir,0o755)
        
740
741
742
743
744
745
        str_cmd = ""
        retcode = -1
        if str(old_path) != str(new_absolute_path):
            [retcode, str_cmd] = Utils.rsync_getcmd(old_path,new_absolute_path)
            if retcode != 0 :               
                raise Exception("Error while trying to rsync " + old_path + " to " + new_absolute_path + "\n" +
746
                                "Command : " + str_cmd + "\n" + "Error code : " + str(retcode) + "\n")
747
        else:
748
            str_cmd = "Source and destination directories are the same : " + old_path + " , ignored.\n"
749
750
751
752
753
        
        new_retention = ng6conf.get_retention_date(space_id, self.date)
        
        t3mysql = t3MySQLdb()
        t3mysql.update_fields('tx_nG6_analyze', str(self.id), ['directory','retention_date'], [str("/"+new_relative_path),new_retention] )
754
755
756
757
758
759
760
761
762
        
        old_dir_name = self.__get_save_directory()
        new_dir_name = ng6conf.get_space_directory(space_id)
        
        result_files = t3mysql.get_analysis_result_files(str(self.id), 'fileadmin')
        for result_file in result_files :
            filename = os.path.basename(result_file['rvalue'])
            new_filepath = self.get_full_filepath(filename)
            t3mysql.update_fields('tx_nG6_result', str(result_file['uid']), ['rvalue'], [str(new_filepath)] )
763
764
            
        return [retcode, str_cmd]
765
    
766
767
768
769
    def get_full_filepath(self, file_name, base_filepath='fileadmin' ):
        return base_filepath + os.path.join(self.directory, file_name)