Maintenance - Mise à jour mensuelle Lundi 6 Avril 2020 entre 7h00 et 9h00

Commit 323216d4 authored by Penom Nom's avatar Penom Nom

upgrade sample handling

parent 31b218d8
......@@ -1011,7 +1011,8 @@ CREATE TABLE IF NOT EXISTS `tx_nG6_sample` (
`type` varchar(255) NOT NULL DEFAULT '',
`insert_size` int(11) NOT NULL DEFAULT '0',
`species` varchar(255) NOT NULL DEFAULT '',
`nb_sequence` int(11) NOT NULL DEFAULT '0',
`nb_sequences` int(11) NOT NULL DEFAULT '0',
`full_seq_size` int(11) NOT NULL DEFAULT '0',
PRIMARY KEY (`uid`),
KEY `parent` (`pid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
......
......@@ -150,14 +150,13 @@ class NG6Workflow (BasicNG6Workflow):
subparams.append(Parameter("type", "Read orientation and type", "type", choices = '|'.join(Sample.AVAILABLE_TYPES) ))
subparams.append(Parameter("insert_size", "Insert size for paired end reads", "insert_size", type ='int' ))
subparams.append(Parameter("species", "Species related to this sample", "species"))
subparams.append(Parameter("nb_sequence", "Number of sequences in the sample file", "nb_sequence", type = 'int'))
osamples.append(Parameter("sample", "Definition of a sample", "--sample", action = "append", type="multiple", required = True, sub_parameters = subparams, group = "exclude-sample" ))
return osamples
def __create_samples__(self):
for sd in self.args['sample'] :
sp_object = Sample( sd['sample_id'], sd['read1'], sd['read2'], name = sd['sample_name'], description = sd['sample_description'], type = sd['type'],
insert_size = sd['insert_size'], species = sd['species'], nb_sequence = sd['nb_sequence'] )
insert_size = sd['insert_size'], species = sd['species'] )
self.reads1.extend(sp_object.reads1)
self.reads2.extend(sp_object.reads2)
self.samples.append(sp_object)
......
......@@ -63,7 +63,6 @@ class Run(object):
self.type = type
self.description = description
self.sequencer = sequencer
self.__mids_description = None
self.__samples = None
self.raw_files = []
self.admin_login = None
......@@ -258,7 +257,22 @@ class Run(object):
results = t3mysql.select_run_samples(self.id)
samples = []
for res in results :
samples.append(Sample( res['sample_id'], res['reads1'], reads2 = res['reads2'], name = res['name'], description = res['description'],
t = dict(reads1 = [], reads2 = [])
for k in t.keys() :
for f in res[k].split(',') :
fpath = f
if os.path.isdir(self.__get_work_directory()) :
fpath = self.__get_work_directory() + "/" + fpath
elif os.path.isdir(self.__get_save_directory()) :
fpath = self.__get_save_directory() + "/" + fpath
if not os.path.isfile(fpath):
raise Exception("The sample read file %s cannot be retrieved either in the work or save directory for the run %s " % fpath, self.id)
if k == 'reads1' :
reads1.append(fpath)
else :
reads2.append(fapth)
samples.append(Sample( res['sample_id'], t['reads1'], reads2 = t['reads2'], name = res['name'], description = res['description'],
type = res['type'], insert_size = res['insert_size'], species = res['species'], nb_sequence = res['nb_sequence'] ))
return samples
......@@ -280,10 +294,11 @@ class Run(object):
if spo.reads2 :
reads2 = [ os.path.basename(ff) for ff in spo.reads2 ]
t3mysql.add_sample_to_run(self.id, sample_id = spo.sample_id, reads1 = reads1, reads2 = reads2, name=spo.name,
uid = t3mysql.add_sample_to_run(self.id, sample_id = spo.sample_id, reads1 = reads1, reads2 = reads2, name=spo.name,
description = spo.description, type = spo.type, insert_size = spo.insert_size,
species = spo.species, nb_sequence = spo.nb_sequence)
species = spo.species, nb_sequences = spo.nb_sequences)
spo.id = uid
def get_config_file(self):
"""
Return a Run config file
......@@ -349,14 +364,6 @@ class Run(object):
t3mysql = t3MySQLdb()
self.id = t3mysql.add_run(self.name, self.date, self.directory, self.species, self.data_nature,
self.type, 0, 0, self.description, self.sequencer, self.admin_login)
# if there are samples
if self.__samples :
for spo in self.__samples:
t3mysql.add_sample_to_run(self.id, name=spo.name, description=spo.description, reads1 = spo.reads1,
reads2 = spo.reads2, type = spo.type, insert_size = spo.insert_size,
species = spo.species, nb_sequence = spo.nb_sequence)
# Finaly return it's id
return self.id
def __get_work_directory(self):
......@@ -383,16 +390,12 @@ class Run(object):
t3mysql = t3MySQLdb()
[name, date, species, data_nature, type, description, sequencer] = t3mysql.select_run(id)
my_run = Run(name, date, species, data_nature, type, description, sequencer, id)
# Add the mids_description to the object
# reload samples
try :
mids_desc = {}
samples = t3mysql.select_run_samples(id)
# Create the mids_desc table
for sample in samples.keys():
mids_desc[sample] = samples[sample]
my_run.__mids_description = mids_desc
my_run.__samples = None
my_run.__samples = my_run.get_samples()
except :
pass
logging.getLogger("Run.get_from_id").debug("Impossible to get samples for run id=" + str(id))
return my_run
except :
logging.getLogger("Run.get_from_id").debug("Impossible to build run id=" + str(id))
......
......@@ -15,21 +15,26 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from ng6.exceptions import UnsavedRunError
from ng6.t3MySQLdb import t3MySQLdb
class Sample(object):
AVAILABLE_TYPES = ["pe", "se", "ose", "ope", "mp"]
def __init__(self, sample_id, reads1, reads2 = None, name = None, description = None, type = None, insert_size = None, species = None, nb_sequence = None ):
def __init__(self, sample_id, reads1, reads2 = None, name = None, description = None, type = None,
insert_size = None, species = None, nb_sequences = None, full_size = None, id = None ):
self.sample_id = sample_id
self.name = name
self.description = description
self.reads1 = reads1
self.reads2 = reads2
self.insert_size = insert_size
self.nb_sequence = nb_sequence
self.nb_sequences = nb_sequences
self.full_size = full_size
self.species = species
self.type = type
self.id = id
if isinstance(reads1, str) :
self.reads1 = [reads1]
......@@ -43,8 +48,8 @@ class Sample(object):
else :
self.type = self.AVAILABLE_TYPES[1]
if self.nb_sequence and isinstance(self.nb_sequence, str) :
self.nb_sequence = int(self.nb_sequence)
if self.nb_sequences and isinstance(self.nb_sequences, str) :
self.nb_sequences = int(self.nb_sequences)
self.metadata = {}
......@@ -58,5 +63,32 @@ class Sample(object):
def has_metadata(self, key):
return self.metadata.has_key(key)
def get_all_reads(self):
allreads = self.reads1
if self.reads2 :
allreads += self.reads2
return allreads
def set_nb_sequences(self, nb_seqs):
"""
Sets the number of sequences to nb_seqs
@param nb_seqs : the sample number of sequences
"""
if self.id != None:
t3mysql = t3MySQLdb()
t3mysql.update_sample_info(self.id, nb_sequences=nb_seqs)
self.nb_sequences = nb_seqs
else :
raise UnsavedRunError()
def set_full_size(self, full_size):
"""
Sets the number of nucleotides in all sequences
@param full_size : the number of nucleotides
"""
if self.id != None :
t3mysql = t3MySQLdb()
t3mysql.update_sample_info(self.id, full_seq_size=full_size)
self.full_size = full_size
else :
raise UnsavedRunError()
\ No newline at end of file
......@@ -438,8 +438,43 @@ class t3MySQLdb(object):
conn.close()
return analysis_ids
def update_sample_info(self, id , sample_id = None, name = None, reads1 = None, reads2 = None,
description = None, type = None, insert_size = None, nb_sequences = None, species = None,
full_seq_size = None):
conn = connect(self.host, self.user, self.passwd, self.db)
curs = conn.cursor()
req = "UPDATE tx_nG6_sample "
req += "SET "
if sample_id:
req += "sample_id='" + sample_id + "',"
if name:
req += "name='" + name.replace("'", "\'") + "',"
if reads1:
req += "reads1='" + reads1 + "',"
if reads2:
req += "reads2='" + reads2 + "',"
if description:
req += "description='" + str(description.replace("'", "\'")) + "',"
if type:
req += "type='" + type.replace("'", "\'") + "',"
if insert_size:
req += "insert_size='" + insert_size.replace("'", "\'") + "',"
if nb_sequences is not None:
req += "nb_sequences='" + str(nb_sequences) + "',"
if full_seq_size is not None:
req += "full_seq_size='" + str(full_seq_size) + "',"
if species:
req += "species='" + species.replace("'", "\'") + "',"
req = req[:-1] #remove last comma for WHERE
req += " WHERE uid = '"+ str(id) + "'"
curs.execute(req)
conn.commit()
conn.close()
def add_sample_to_run(self, run_id, sample_id, reads1, reads2 = None, name = None , description = None,
type = None, insert_size = None, species = None, nb_sequence = None):
type = None, insert_size = None, species = None, nb_sequences = None):
"""
add a sample to the run
@param run_id : the run id
......@@ -479,9 +514,9 @@ class t3MySQLdb(object):
req_part1 += ', species'
req_part2 += ", '" + species.replace("'", "\'") + "'"
if nb_sequence :
req_part1 += ', nb_sequence'
req_part2 += ", '" + str(nb_sequence).replace("'", "\'") + "'"
if nb_sequences :
req_part1 += ', nb_sequences'
req_part2 += ", '" + str(nb_sequences).replace("'", "\'") + "'"
req = req_part1 + ' ) ' + req_part2 + ' ) '
curs.execute(req)
......@@ -507,7 +542,7 @@ class t3MySQLdb(object):
"""
conn = connect(self.host, self.user, self.passwd, self.db)
curs = conn.cursor()
req = "SELECT run_id, uid, sample_id, name, description, reads1, reads2, type, insert_size, species, nb_sequence FROM tx_nG6_sample WHERE run_id='" + str(run_id) + "'"
req = "SELECT run_id, uid, sample_id, name, description, reads1, reads2, type, insert_size, species, nb_sequences, full_seq_size FROM tx_nG6_sample WHERE run_id='" + str(run_id) + "'"
curs.execute(req)
samples = []
result_set = curs.fetchall()
......@@ -523,7 +558,8 @@ class t3MySQLdb(object):
'type' : result[7],
'insert_size' : result[8],
'species' : result[9],
'nb_sequence' : result[10]
'nb_sequences' : result[10],
'full_seq_size' : result[11],
})
return samples
......
......@@ -287,7 +287,7 @@ class tx_nG6_upgrade {
CHANGE description name VARCHAR(255) NOT NULL DEFAULT ''
";
// Add column description, reads1, reads2, type, insert_size, species, nb_sequence
// Add column description, reads1, reads2, type, insert_size, species, nb_sequences
$queryAlterTxNG6SampleAddColumns = "
ALTER TABLE tx_nG6_sample
ADD `description` varchar(255) NOT NULL DEFAULT '' AFTER name ,
......@@ -296,7 +296,8 @@ class tx_nG6_upgrade {
ADD `type` varchar(255) NOT NULL DEFAULT '' AFTER reads2,
ADD `insert_size` int(11) NOT NULL DEFAULT '0' AFTER type,
ADD `species` varchar(255) NOT NULL DEFAULT '' AFTER insert_size,
ADD `nb_sequence` int(11) NOT NULL DEFAULT '0' AFTER species
ADD `nb_sequences` int(11) NOT NULL DEFAULT '0' AFTER species
ADD `full_seq_size` int(11) NOT NULL DEFAULT '0' AFTER nb_sequences
";
/*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment