blast_contamination_search2ng6.py 7.59 KB
Newer Older
Jerome Mariette's avatar
Jerome Mariette committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#
# Copyright (C) 2009 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

__author__ = 'Plateforme bioinformatique Midi Pyrenees'
__copyright__ = 'Copyright (C) 2009 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.0'
__email__ = 'support.genopole@toulouse.inra.fr'
__status__ = 'beta'

Jerome Mariette's avatar
Jerome Mariette committed
25
from optparse import *
Jerome Mariette's avatar
Jerome Mariette committed
26
27
import os, sys, re
from Bio import SeqIO
Jerome Mariette's avatar
Jerome Mariette committed
28

Jerome Mariette's avatar
Jerome Mariette committed
29
30
31
from ng6.Analyse import Analyse
from ng6.Project import Project
from ng6.Run import Run
Jerome Mariette's avatar
Jerome Mariette committed
32
33


34
class BlastContaminationSearchAnalyse (Analyse):
Jerome Mariette's avatar
Jerome Mariette committed
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
    """
    Class Analyse: Define a nG6 Analyse
    """
   
    def __init__(self, name, description, software, options, version):
        Analyse.__init__(self, name, description, software, options, version)
       
       
    def parse_m8_file (self, m8_file):
        """
        Parse the contamination m8 file
          @param m8_file    : the m8 file path
          @return           : a table [nb_of_contamination, contamination_base]
        """
        fd = open(m8_file, 'r')
        lu = fd.read()
        # Number of contamination
        n = lu.count('# BLASTN ')
        try:
            # Find out which database has been used
            m = re.search("# Database: (.*)", lu)
56
            db= m.group(1).strip()
Jerome Mariette's avatar
Jerome Mariette committed
57
        except :
Jerome Mariette's avatar
Jerome Mariette committed
58
59
60
61
            db= "???"
        fd.close()
       
        return [n, db]
Jerome Mariette's avatar
Jerome Mariette committed
62
63


64
    def parse_params (self, options):
Jerome Mariette's avatar
Jerome Mariette committed
65
66
        """
        Parse the contamination log file
67
68
          @param params   : the params used
          @return         : {"blastall": val, "filter": val}
Jerome Mariette's avatar
Jerome Mariette committed
69
        """
70
71
72
73
74
75
76
        params = {}
        parts = options.split(";")
        if parts[0].startswith("blastall"):
            params["blastall"] = parts[0].split("=")[1]
        if parts[1].startswith(" blast_filter"):
            params["filter"] = parts[1].split("=")[1]            
        return params
Jerome Mariette's avatar
Jerome Mariette committed
77

Jerome Mariette's avatar
Jerome Mariette committed
78

79
    def process(self, contamination_dirs, archive_name):
Jerome Mariette's avatar
Jerome Mariette committed
80
        """
81
        Process the analyse
Jerome Mariette's avatar
Jerome Mariette committed
82
          @param contamination_dirs  : list of contamination results directories
83
          @param archive_name        : the results archive name
Jerome Mariette's avatar
Jerome Mariette committed
84
85
86
87
88
89
90
91
        """
        # Organise datas
        contamination_info = {}
        samples = []
        databases = []
        result_files = []
        for contamination_dir in contamination_dirs:
            for file in os.listdir(contamination_dir):
92
                if file.endswith(".m8") :
Jerome Mariette's avatar
Jerome Mariette committed
93
94
                    m8f = os.path.join(contamination_dir, file)
                    result_files.append(m8f)
95
96
                if file.endswith(".names"):
                    result_files.append(os.path.join(contamination_dir, file))
Jerome Mariette's avatar
Jerome Mariette committed
97
            [n, db] = self.parse_m8_file(m8f)
98
            sample = os.path.splitext(os.path.splitext(os.path.basename(m8f))[0])[0]
Jerome Mariette's avatar
Jerome Mariette committed
99
100
101
102
103
104
105
106
            if sample not in samples:
                samples.append(sample)
            if db not in databases :
                databases.append(db)
            if contamination_info.has_key(db) :
                contamination_info[db][sample] = {}
                contamination_info[db][sample]["file"] = m8f
                contamination_info[db][sample]["nb"] = n
Jerome Mariette's avatar
Jerome Mariette committed
107
                
Jerome Mariette's avatar
Jerome Mariette committed
108
109
            else :
                contamination_info[db] = {}
110
                contamination_info[db]["params"] = self.parse_params(self.options)
Jerome Mariette's avatar
Jerome Mariette committed
111
112
113
                contamination_info[db][sample] = {}
                contamination_info[db][sample]["file"] = m8f
                contamination_info[db][sample]["nb"] = n
114
115

        # Then add the analyse resutls
Jerome Mariette's avatar
Jerome Mariette committed
116
117
118
        for sample in samples:
            total = 0
            for db in databases :
119
120
                total += contamination_info[db][sample]["nb"]
                self._add_result_element(sample, "nb_conta", contamination_info[db][sample]["nb"], db)
121
            self._add_result_element(sample, "nb_conta", str(total), "total")
122
123

        # Finaly create and add the archive to the analyse
124
        self._create_and_archive(result_files, archive_name)
Jerome Mariette's avatar
Jerome Mariette committed
125
126
127
128
       
       
if __name__ == '__main__':

129
    parser = OptionParser(usage="Usage: %prog", description = "Add a blast contamination search analyse to ng6.")
Jerome Mariette's avatar
Jerome Mariette committed
130
131
132
133
134
   
    # Define analyse specific options
    igroup = OptionGroup(parser, "Input options","")
    igroup.add_option("-i", "--input-dirs",   dest="input_dirs",   help="The contamination output directories file.",      metavar="FILE")
    igroup.add_option("-c", "--cfg",          dest="cfg",          help="The ng6 run config file the analyse belongs to.", metavar="FILE")
135
    igroup.add_option("-p", "--project",      dest="project_id",   help="The project id the analyse belongs to.",          type="string")
136
    igroup.add_option("-a", "--archive-name", dest="archive_name", help="The results archive name (optional).",            metavar="FILE", default="contamination.tar.gz")
Jerome Mariette's avatar
Jerome Mariette committed
137
138
139
140
141
142
143
144
145
146
    parser.add_option_group(igroup)
   
    # Define nG6 specific options
    agroup = OptionGroup(parser, "nG6 analysis options","")
    agroup.add_option("-n", "--analyse-name",                dest="analyse_name",                help="The analyse name to display.",        type="string", default="ContaminationSearch")
    agroup.add_option("-d", "--analyse-description",         dest="analyse_description",         help="The analyse description to display.", type="string", default="Recherche de contaminants.")
    agroup.add_option("-s", "--analyse-software",            dest="analyse_software",            help="The analyse software to display.",    type="string", default="Blast + blast_parser.py")
    agroup.add_option("-v", "--analyse-software-version",    dest="analyse_software_version",    help="The software version used.",          type="string", default="1.0")
    agroup.add_option("-m", "--analyse-software-parameters", dest="analyse_software_parameters", help="The software parameters used.",       type="string", default="")
    parser.add_option_group(agroup)
Jerome Mariette's avatar
Jerome Mariette committed
147

Jerome Mariette's avatar
Jerome Mariette committed
148
149
150
151
152
153
154
    (options, args) = parser.parse_args()
   
    if options.input_dirs == None or (options.cfg == None and options.project_id == None):
        parser.print_help()
        sys.exit(1)
    else :
       
155
        # Built a ContaminationSearchAnalyse
156
157
        my_analyse = BlastContaminationSearchAnalyse(options.analyse_name, options.analyse_description, options.analyse_software,
                                                     options.analyse_software_parameters, options.analyse_software_version)
158
159
160
161
162
163
164
165
        
        # Built either a project or a run considering parameters
        if options.cfg :
            my_run = Run.get_from_config(options.cfg)
            my_analyse.set_run(my_run)
        elif options.project_id :
            my_project = Project.get_from_id(options.project_id)

166
167
168
169
170
171
172
        # First get the m8 file list
        contamination_dirs = []
        for dir in open(options.input_dirs, 'r').readlines():
            contamination_dirs.append(dir.strip())
       
        # Then process the analyse
        my_analyse.process(contamination_dirs, options.archive_name)
Jerome Mariette's avatar
Jerome Mariette committed
173

Jerome Mariette's avatar
Jerome Mariette committed
174
175
176
177
178
        # Add the analyse either to a project or a run considering parameters
        if options.cfg :
            my_run.add_analyse(my_analyse)
        elif options.project_id :
            my_project.add_analyse(my_analyse)
179
        
Jerome Mariette's avatar
Jerome Mariette committed
180
        sys.exit(0)