Maintenance - Mise à jour mensuelle Lundi 6 Avril 2020 entre 7h00 et 9h00

Commit 31b218d8 authored by Penom Nom's avatar Penom Nom

upgrade sample handling

parent b31ba169
......@@ -1003,8 +1003,15 @@ CREATE TABLE IF NOT EXISTS `tx_nG6_sample` (
`deleted` tinyint(4) NOT NULL DEFAULT '0',
`hidden` tinyint(4) NOT NULL DEFAULT '0',
`run_id` int(11) NOT NULL DEFAULT '0',
`mid` varchar(255) NOT NULL DEFAULT '',
`sample_id` varchar(255) NOT NULL DEFAULT '',
`name` varchar(255) NOT NULL DEFAULT '',
`description` varchar(255) NOT NULL DEFAULT '',
`reads1` text NOT NULL DEFAULT '',
`reads2` text NOT NULL DEFAULT '',
`type` varchar(255) NOT NULL DEFAULT '',
`insert_size` int(11) NOT NULL DEFAULT '0',
`species` varchar(255) NOT NULL DEFAULT '',
`nb_sequence` int(11) NOT NULL DEFAULT '0',
PRIMARY KEY (`uid`),
KEY `parent` (`pid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
......
......@@ -330,7 +330,7 @@ class Analysis (Component):
def _create_and_archive(self, files, archive_name=None, prefix="dir"):
"""
return the web path to the archive files
If there are mids, datas will be organised by mids
If there are samples, datas will be organised by samples
@param files : table of file
@param archive_name : the archive name to use, if not set will be
the same as link_value
......@@ -352,14 +352,14 @@ class Analysis (Component):
#If some files have the same name
if len(file_basenames) != len(set(file_basenames)) :
try : # If there are mids
try : # If there are samples
if self.run :
mids_desc = self.run.get_mids_description()
samples = self.run.get_samples()
else :
run_id = self.__get_run_id()
my_run = Run.get_from_id(run_id)
mids_desc = my_run.get_mids_description()
samples = my_run.get_samples()
gfiles = {}
ungrouped_files = []
what_left = []
......@@ -367,16 +367,17 @@ class Analysis (Component):
ungrouped_files.append(file)
what_left.append(file)
# First group files if they have the mid name in their path
# First group files if they have the sample name in their path
for file in files :
# In case of multiple description take the longuest
best_description = []
for mid_desc in mids_desc.keys():
mid_desc_regex = re.compile(".*" + mid_desc + ".*")
mdr = mid_desc_regex.match(file)
if mdr != None:
best_description.append(mid_desc)
for sample in samples :
spname = sample.name
spname_regex = re.compile(".*" + spname + ".*")
spr = spname_regex.match(file)
if spr :
best_description.append(spname)
max_len = 0
final_description = None
......
This diff is collapsed.
......@@ -64,6 +64,7 @@ class Run(object):
self.description = description
self.sequencer = sequencer
self.__mids_description = None
self.__samples = None
self.raw_files = []
self.admin_login = None
# Set the temp folder to the ng6 temp folder
......@@ -107,38 +108,6 @@ class Run(object):
self.set_full_size(full_size)
self.archive_files(self.raw_files, mode)
def export_mids_to_newbler_cfg_file(self, error=2, midscheme="454MIDS"):
"""
Export the config file in newbler format
the script add_raw_files.
@param files: the files to archive
@param mode: can be none, gz, bz2, tar.gz and tar.bz2
"""
mids_cfg_path = tempfile.NamedTemporaryFile(suffix=".cfg").name
mid_file = open(mids_cfg_path, "w")
mid_file.write(midscheme+"\n")
mid_file.write("{\n")
# MID1:desc_ex1;MID2:desc_ex2;MID3,MID4:
cfg_reader = NG6ConfigReader()
mids = cfg_reader.get_454_mids()
# First write the run mids config file
for mid in self.__mids_description:
for mid_name in mid.split(","):
try:
mid_name = mid_name.strip()
seq = mids[mid_name.lower()].split(",")
seq_val = ''
if len(seq) > 1:
seq_val += '"' + seq[0] + '", ' + str(error) + ', "' + seq[1] + '";'
else:
seq_val += '"' + seq[0] + '", ' + str(error) + ';'
mid_file.write('\tmid = "' + self.__mids_description[mid] + '", ' + seq_val +"\n")
except:
pass
mid_file.write("}")
mid_file.close()
return [mids_cfg_path, midscheme]
def archive_files(self, files, mode, archive_name="ng6_archive.tar", delete=False):
"""
Copy, archive and compress the files list to the run. Files can then be downloaded and gave back by
......@@ -236,7 +205,7 @@ class Run(object):
logging.getLogger("Run.sync").error("Error when synchronizing analysis that belongs to run id=" + str(self.id))
raise Exception(str(err))
# If a sync is required
if len(os.listdir(self.__get_work_directory())):
if os.path.exists(self.__get_work_directory()) and len(os.listdir(self.__get_work_directory())):
# Finally the Run itself
try :
# not created for updates
......@@ -278,32 +247,43 @@ class Run(object):
"""
return self.__get_save_directory()
def get_mids_description(self):
"""
Return a dictionary with mids description
@return : {"MID1":"mid1_desc", "MID2":"mid2_desc", ...}
"""
if self.__mids_description :
return self.__mids_description
def get_samples(self):
'''
return a list of samples object
'''
if self.__samples :
return self.__samples
else :
t3mysql = t3MySQLdb()
return t3mysql.select_run_samples(self.id)
def add_mids_description(self, mids_desc):
"""
Add a mids description to the run
@param mids_desc : the mids description dictionnary
{"MID1":"mid1_desc", "MID2":"mid2_desc", ...}
"""
# First add the description to the python object
self.__mids_description = mids_desc
results = t3mysql.select_run_samples(self.id)
samples = []
for res in results :
samples.append(Sample( res['sample_id'], res['reads1'], reads2 = res['reads2'], name = res['name'], description = res['description'],
type = res['type'], insert_size = res['insert_size'], species = res['species'], nb_sequence = res['nb_sequence'] ))
return samples
if self.id != None:
def add_samples(self, samples):
'''
Add a list of samples to the run
@param samples: the list of samples object to add
'''
self.__samples = samples
if self.id != None :
# Update database
t3mysql = t3MySQLdb()
for mid_name in self.__mids_description.keys():
t3mysql.add_sample_to_run(self.id, mid=mid_name, description=self.__mids_description[mid_name])
for spo in self.__samples:
reads1 = []
reads2 = []
if spo.reads1:
reads1 = [ os.path.basename(ff) for ff in spo.reads1 ]
if spo.reads2 :
reads2 = [ os.path.basename(ff) for ff in spo.reads2 ]
t3mysql.add_sample_to_run(self.id, sample_id = spo.sample_id, reads1 = reads1, reads2 = reads2, name=spo.name,
description = spo.description, type = spo.type, insert_size = spo.insert_size,
species = spo.species, nb_sequence = spo.nb_sequence)
def get_config_file(self):
"""
Return a Run config file
......@@ -370,11 +350,12 @@ class Run(object):
self.id = t3mysql.add_run(self.name, self.date, self.directory, self.species, self.data_nature,
self.type, 0, 0, self.description, self.sequencer, self.admin_login)
# If there is some mids desc provided
if self.__mids_description != None:
for mid_name in self.__mids_description.keys():
t3mysql.add_sample_to_run(self.id, mid=mid_name, description=self.__mids_description[mid_name])
# if there are samples
if self.__samples :
for spo in self.__samples:
t3mysql.add_sample_to_run(self.id, name=spo.name, description=spo.description, reads1 = spo.reads1,
reads2 = spo.reads2, type = spo.type, insert_size = spo.insert_size,
species = spo.species, nb_sequence = spo.nb_sequence)
# Finaly return it's id
return self.id
......
......@@ -18,15 +18,34 @@
class Sample(object):
def __init__(self, name, description, reads1, reads2 = None):
AVAILABLE_TYPES = ["pe", "se", "ose", "ope", "mp"]
def __init__(self, sample_id, reads1, reads2 = None, name = None, description = None, type = None, insert_size = None, species = None, nb_sequence = None ):
self.sample_id = sample_id
self.name = name
self.description = description
self.reads1 = reads1
self.reads2 = reads2
self.insert_size = insert_size
self.nb_sequence = nb_sequence
self.species = species
self.type = type
if isinstance(reads1, str) :
self.reads1 = [reads1]
self.reads2 = reads2
if isinstance(reads2, str) :
self.reads2 = [reads2]
if self.type is None:
if self.reads2 :
self.type = self.AVAILABLE_TYPES[0]
else :
self.type = self.AVAILABLE_TYPES[1]
if self.nb_sequence and isinstance(self.nb_sequence, str) :
self.nb_sequence = int(self.nb_sequence)
self.metadata = {}
def add_metadata(self, key, value):
......@@ -39,3 +58,5 @@ class Sample(object):
def has_metadata(self, key):
return self.metadata.has_key(key)
......@@ -438,21 +438,52 @@ class t3MySQLdb(object):
conn.close()
return analysis_ids
def add_sample_to_run(self, run_id, mid, description):
def add_sample_to_run(self, run_id, sample_id, reads1, reads2 = None, name = None , description = None,
type = None, insert_size = None, species = None, nb_sequence = None):
"""
add a sample to the run
@param run_id : the run id
@param mid : the mid name
@param description : the mid description
@param sample_id : the samples uniq identifier
@param name : the sample name
@return sample id
"""
conn = connect(self.host, self.user, self.passwd, self.db)
curs = conn.cursor()
crdate = str(time.mktime(datetime.date.today().timetuple())).split(".")[0]
req = "INSERT INTO tx_nG6_sample (pid,tstamp,run_id,mid,description) "
req += "VALUES ('" + str(self.cfg_reader.get_pid())+ "','" + str(crdate) + "','"
req += str(run_id) + "','" + mid.replace("'", "\'") + "','"
req += description.replace("'", "\'") + "')"
req_part1 = "INSERT INTO tx_nG6_sample (pid, tstamp, run_id, sample_id, reads1"
req_part2 = "VALUES ('" + str(self.cfg_reader.get_pid())+ "','" + str(crdate) + "','"
req_part2 += str(run_id) + "','" + str(sample_id).replace("'", "\'") + "','" + ','.join(reads1).replace("'", "\'") + "'"
if name :
req_part1 += ', name'
req_part2 += ", '" + str(name).replace("'", "\'") + "'"
if description :
req_part1 += ', description'
req_part2 += ", '" + str(description).replace("'", "\'") + "'"
if reads2 :
req_part1 += ', reads2'
req_part2 += ", '" + ','.join(reads2).replace("'", "\'") + "'"
if type :
req_part1 += ', type'
req_part2 += ", '" + type.replace("'", "\'") + "'"
if insert_size :
req_part1 += ', insert_size'
req_part2 += ", '" + str(insert_size).replace("'", "\'") + "'"
if species :
req_part1 += ', species'
req_part2 += ", '" + species.replace("'", "\'") + "'"
if nb_sequence :
req_part1 += ', nb_sequence'
req_part2 += ", '" + str(nb_sequence).replace("'", "\'") + "'"
req = req_part1 + ' ) ' + req_part2 + ' ) '
curs.execute(req)
sample_id = conn.insert_id()
......@@ -464,7 +495,7 @@ class t3MySQLdb(object):
curs.close()
conn.close()
if( str(sample_pid[0]) != str(self.cfg_reader.get_pid()) ):
raise Exception('t3MySQLdb', 'The id of the sample ' + mid + ' cannot be to retrieve.')
raise Exception('t3MySQLdb', 'The id of the sample ' + name + ' cannot be to retrieve.')
return sample_id
......@@ -472,17 +503,29 @@ class t3MySQLdb(object):
"""
select all samples linked to the run
@param run_id : the run id
@return a mid description table {"MID1":"desc1", ...}
@return a list of dictionary
"""
conn = connect(self.host, self.user, self.passwd, self.db)
curs = conn.cursor()
req = "SELECT mid, description FROM tx_nG6_sample WHERE run_id='" + str(run_id) + "'"
req = "SELECT run_id, uid, sample_id, name, description, reads1, reads2, type, insert_size, species, nb_sequence FROM tx_nG6_sample WHERE run_id='" + str(run_id) + "'"
curs.execute(req)
mids = {}
samples = []
result_set = curs.fetchall()
for result in result_set:
mids[result[0]] = result[1]
return mids
samples.append({
'run_id' : result[0],
'id' : result[1],
'sample_id' : result[2],
'name' : result[3],
'description' : result[4],
'reads1' : result[5],
'reads2' : result[6],
'type' : result[7],
'insert_size' : result[8],
'species' : result[9],
'nb_sequence' : result[10]
})
return samples
#--------------------------------------------------------------------
# Analyze functions
......
......@@ -320,78 +320,4 @@ class Utils(object):
return path_groups
@staticmethod
def filesFromCasava( casava_directory, project_name, lane_number ):
"""
Returns index description, list of R1 and list of R2 files from a CASAVA directory.
@param casava_directory : path to CASAVA output directory
@param project_name : files in returned lists are part of this project
@param lane_number : files in returned lists are sequenced on this lane
"""
read1_files = []
read2_files = []
undetermined_read1_files = []
undetermined_read2_files = []
mids_desc_array = {}
# Parse the sample sheet
if not os.path.isfile(os.path.join(casava_directory, "SampleSheet.mk")):
raise ValueError, "The folder " + casava_directory + " doesn't contain the file SampleSheet.mk."
barcodes = ""
sample_ids = ""
subdirs = ""
samples = []
# Retrieve all information for samples in lane
for line in open(os.path.join(casava_directory, "SampleSheet.mk")).readlines():
# Retrieve barcodes
if line.startswith("l" + lane_number + "_BARCODES"):
parts = line.strip().split(":=")
barcodes_list = parts[1].split(" ")
for i in range(len(barcodes_list)):
samples.append({'barcode':barcodes_list[i]})
# Retrieve samples ids
elif line.startswith("l" + lane_number + "_SAMPLEIDS" ):
parts = line.strip().split(":=")
sample_ids_list = parts[1].split(" ")
for i in range(len(sample_ids_list)):
samples[i]['sample_id'] = sample_ids_list[i]
# Retrieve folder
elif line.startswith("l" + lane_number + "_SUBDIRS"):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
for i in range(len(subdirs_list)):
samples[i]['subdir'] = subdirs_list[i]
# Filter on project name
aux_samples = []
for current_sample in samples:
if (re.match("Project_" + project_name + "/Sample_.+", current_sample['subdir']) is not None) or (current_sample['subdir'].startswith("Undetermined_indices")):
aux_samples.append(current_sample)
samples = aux_samples
if len(samples) == 0:
raise ValueError, "The project " + project_name + " in lane " + lane_number + " doesn't exist in CASAVA directory " + casava_directory + "."
# Create files lists
for current_sample in samples:
# Write line in the index description
if (current_sample['barcode'] != "NoIndex") and (not current_sample['subdir'].startswith("Undetermined_indices")) :
mids_desc_array[current_sample['barcode']] = current_sample['sample_id']
# Write files lists
for file in os.listdir(casava_directory + "/" + current_sample['subdir']):
if file.endswith(".fastq.gz") and re.search(".*_L00" + lane_number + "_.*", file):
if re.search(".*_R1_.*", file):
if not current_sample['subdir'].startswith("Undetermined_indices"):
read1_files.append(casava_directory + "/" + current_sample['subdir'] + "/" + file)
else:
undetermined_read1_files.append(casava_directory + "/" + current_sample['subdir'] + "/" + file)
if re.search(".*_R2_.*", file):
if not current_sample['subdir'].startswith("Undetermined_indices"):
read2_files.append(casava_directory + "/" + current_sample['subdir'] + "/" + file)
else:
undetermined_read2_files.append(casava_directory + "/" + current_sample['subdir'] + "/" + file)
return mids_desc_array, sorted(read1_files), sorted(read2_files), sorted(undetermined_read1_files), sorted(undetermined_read2_files)
\ No newline at end of file
......@@ -1150,7 +1150,7 @@ class tx_nG6_db {
// First select all analysis from the database
$queryParts = array(
'SELECT' => 'mid, description ',
'SELECT' => 'sample_id, name ',
'FROM' => 'tx_nG6_sample ',
'WHERE' => 'run_id='.$run_id,
'GROUPBY' => '',
......@@ -1161,7 +1161,7 @@ class tx_nG6_db {
$result = array();
$res = $GLOBALS['TYPO3_DB']->exec_SELECT_queryArray($queryParts);
while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
$result[$row["mid"]] = $row["description"];
$result[$row["sample_id"]] = $row["name"];
}
return $result;
......
......@@ -143,6 +143,9 @@ class tx_nG6_upgrade {
// add comment support
tx_nG6_upgrade::add_comment_support();
// add sample support
tx_nG6_upgrade::upgrade_samples();
}
function set_storage_size($data_storage) {
......@@ -268,6 +271,42 @@ class tx_nG6_upgrade {
$GLOBALS['TYPO3_DB']->sql_query($queryCreateAnalyzecomment);
}
function upgrade_samples(){
// Alter tx_nG6_sample change mid to sample_id
$queryAlterTxNG6SampleMID = "
ALTER TABLE tx_nG6_sample
CHANGE mid sample_id VARCHAR(255) NOT NULL DEFAULT ''
";
// Alter tx_nG6_sample description to name
$queryAlterTxNG6SampleDescription = "
ALTER TABLE tx_nG6_sample
CHANGE description name VARCHAR(255) NOT NULL DEFAULT ''
";
// Add column description, reads1, reads2, type, insert_size, species, nb_sequence
$queryAlterTxNG6SampleAddColumns = "
ALTER TABLE tx_nG6_sample
ADD `description` varchar(255) NOT NULL DEFAULT '' AFTER name ,
ADD `reads1` text NOT NULL DEFAULT '' AFTER description,
ADD `reads2` text NOT NULL DEFAULT '' AFTER reads1,
ADD `type` varchar(255) NOT NULL DEFAULT '' AFTER reads2,
ADD `insert_size` int(11) NOT NULL DEFAULT '0' AFTER type,
ADD `species` varchar(255) NOT NULL DEFAULT '' AFTER insert_size,
ADD `nb_sequence` int(11) NOT NULL DEFAULT '0' AFTER species
";
/*
* Execute all queries
*/
$GLOBALS['TYPO3_DB']->sql_query($queryAlterTxNG6SampleMID);
$GLOBALS['TYPO3_DB']->sql_query($queryAlterTxNG6SampleDescription);
$GLOBALS['TYPO3_DB']->sql_query($queryAlterTxNG6SampleAddColumns);
}
/*
* version 1.2
*------------------------------------------------------------*/
......
......@@ -27,7 +27,5 @@ class AddRun (NG6Workflow):
"""
Run the workflow
"""
# First check if files provided exists
files_to_save = self.get_all_reads1() + self.get_all_reads2()
# archive the files
addrawfiles = self.add_component("AddRawFiles", [self.runobj, files_to_save, self.args["compression"]])
\ No newline at end of file
addrawfiles = self.add_component("AddRawFiles", [self.runobj, self.get_all_reads(), self.args["compression"]])
\ No newline at end of file
......@@ -97,14 +97,14 @@ class GeneDiversity (NG6Workflow):
def process(self):
# Manage samples
merge_groups = self._load_merge_arg( self.get_samples_name() ) if self.get_samples_name() else None
merge_groups = self._load_merge_arg( self.samples_names ) if self.samples_names else None
# Add raw files
addrawfiles = self.add_component( "AddRawFiles", [self.runobj, self.get_all_reads1() + self.get_all_reads2(), "none"] )
addrawfiles = self.add_component( "AddRawFiles", [self.runobj, self.get_all_reads(), "none"] )
# Trim sequences
trim_R1 = self.add_component("Trimmer", [self.get_all_reads1(), 1, self.args['trim_read_1']], component_prefix="R1")
trim_R2 = self.add_component("Trimmer", [self.get_all_reads2(), 1, self.args['trim_read_2']], component_prefix="R2")
trim_R1 = self.add_component("Trimmer", [self.get_all_reads(1), 1, self.args['trim_read_1']], component_prefix="R1")
trim_R2 = self.add_component("Trimmer", [self.get_all_reads(2), 1, self.args['trim_read_2']], component_prefix="R2")
# Make some statistics on raw file
fastqc = self.add_component("FastQC", [trim_R1.output_files + trim_R2.output_files, False, True])
......@@ -127,7 +127,7 @@ class GeneDiversity (NG6Workflow):
# Sequence traduction
split = self.add_component("SplitSeq", [chimera.nonchimeras, 6000])
split_outputs, new_samples_names = self._get_clean_list( chimera.nonchimeras, split.output_files, self.get_samples_name() )
split_outputs, new_samples_names = self._get_clean_list( chimera.nonchimeras, split.output_files, self.samples_names )
framebot = self.add_component("Framebot", [split_outputs, self.args["database"], self.args["protein_min_length"], False])
# Rename the pre-clusters to provide traceback after merge and cd-hit
......
......@@ -26,17 +26,17 @@ from ng6.utils import Utils
class IlluminaDiversityQC (CasavaNG6Workflow):
def process(self):
if len(self.get_all_undetermined_read1()) > 0 :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads1(), self.get_all_undetermined_read1()])
if len(self.undetermined_reads1) > 0 :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads(1), self.undetermined_reads1])
if self.args["keep_reads"] != "all" :
# fastq illumina filter
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.get_all_reads1()+self.get_all_reads2(), self.args["keep_reads"], self.get_group_prefix(), self.get_nospace_runname()+"_fastqilluminafilter.tar.gz"])
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.get_all_reads(), self.args["keep_reads"], self.group_prefix])
# list filtered files
if self.is_paired_end() :
# split read 1 and read 2 from filtered files list
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (self.get_group_prefix() is not None))
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (self.group_prefix is not None))
else:
filtered_read1_files = fastqilluminafilter.fastq_files_filtered
filtered_read2_files = []
......@@ -44,26 +44,26 @@ class IlluminaDiversityQC (CasavaNG6Workflow):
filtered_read2_files = sorted(filtered_read2_files)
else:
fastqilluminafilter = None
filtered_read1_files = self.get_all_reads1()
filtered_read2_files = self.get_all_reads2()
filtered_read1_files = self.get_all_reads(1)
filtered_read2_files = self.get_all_reads(2)
# archive the files
saved_files = filtered_read1_files + filtered_read2_files
reads_prefixes = None
if self.get_group_prefix() is not None :
if self.group_prefix is not None :
# concatenate fastq
reads_prefixes = (Utils.get_group_basenames(saved_files, "read")).keys()
concatenatefastq = self.add_component("ConcatenateFilesGroups", [saved_files, reads_prefixes])
saved_files = concatenatefastq.concat_files
addrawfiles = self.add_component("AddRawFiles", [self.runobj, saved_files, self.args["compression"]])
concatenate1 = self.add_component("ConcatenateFilesGroups", [filtered_read1_files, (Utils.get_group_basenames(self.get_all_reads1(), "read")).keys()],component_prefix="read1")
concatenate2 = self.add_component("ConcatenateFilesGroups", [filtered_read2_files, (Utils.get_group_basenames(self.get_all_reads2(), "read")).keys()],component_prefix="read2")
concatenate1 = self.add_component("ConcatenateFilesGroups", [filtered_read1_files, (Utils.get_group_basenames(self.get_all_reads(1), "read")).keys()],component_prefix="read1")
concatenate2 = self.add_component("ConcatenateFilesGroups", [filtered_read2_files, (Utils.get_group_basenames(self.get_all_reads(2), "read")).keys()],component_prefix="read2")
concatenate1.concat_files = sorted(concatenate1.concat_files)
concatenate2.concat_files = sorted(concatenate2.concat_files)
# make some statistics on raw file
fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (self.get_group_prefix() is not None), True, self.get_nospace_runname()+"_fastqc.tar.gz"], parent = fastqilluminafilter)
fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (self.group_prefix is not None), True, "fastqc.tar.gz"], parent = fastqilluminafilter)
# contamination_search
try: self.args["databank"].extend([self.get_resource("phix_bwa"), self.get_resource("ecoli_bwa"), self.get_resource("yeast_bwa")])
......
......@@ -26,17 +26,17 @@ from ng6.utils import Utils
class IlluminaQualityCheck (CasavaNG6Workflow):
def process(self):
if len(self.get_all_undetermined_read1()) > 0 :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads1(), self.get_all_undetermined_read1()])
if len(self.undetermined_reads1) > 0 :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads(1), self.undetermined_reads1])
if self.args["keep_reads"] != "all" :
# fastq illumina filter
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.get_all_reads1()+self.get_all_reads2(), self.args["keep_reads"], self.get_group_prefix(), self.get_nospace_runname()+"_fastqilluminafilter.tar.gz"])
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.get_all_reads(), self.args["keep_reads"], self.group_prefix])
# list filtered files
if self.is_paired_end() :
# split read 1 and read 2 from filtered files list
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (self.get_group_prefix() is not None))
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (self.group_prefix is not None))
else:
filtered_read1_files = fastqilluminafilter.fastq_files_filtered
filtered_read2_files = []
......@@ -44,13 +44,13 @@ class IlluminaQualityCheck (CasavaNG6Workflow):
filtered_read2_files = sorted(filtered_read2_files)
else:
fastqilluminafilter = None
filtered_read1_files = self.get_all_reads1()
filtered_read2_files = self.get_all_reads2()
filtered_read1_files = self.get_all_reads(1)
filtered_read2_files = self.get_all_reads(2)
# archive the files
saved_files = filtered_read1_files + filtered_read2_files
reads_prefixes = None
if self.get_group_prefix() is not None :
if self.group_prefix is not None :
# concatenate fastq
reads_prefixes = (Utils.get_group_basenames(saved_files, "read")).keys()
concatenatefastq = self.add_component("ConcatenateFilesGroups", [saved_files, reads_prefixes])
......@@ -58,7 +58,7 @@ class IlluminaQualityCheck (CasavaNG6Workflow):
addrawfiles = self.add_component("AddRawFiles", [self.runobj, saved_files, self.args["compression"]])
# make some statistics on raw file
fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (self.get_group_prefix() is not None), True, self.get_nospace_runname()+"_fastqc.tar.gz"], parent = fastqilluminafilter)
fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (self.group_prefix is not None), True, "fastqc.tar.gz"], parent = fastqilluminafilter)
# contamination_search
try: self.args["databank"].extend([self.get_resource("phix_bwa"), self.get_resource("ecoli_bwa"), self.get_resource("yeast_bwa")])
......@@ -75,7 +75,7 @@ class IlluminaQualityCheck (CasavaNG6Workflow):
# align reads against indexed genome
sample_lane_prefixes = None
if self.get_group_prefix() is not None :
if self.group_prefix is not None :
sample_lane_prefixes = (Utils.get_group_basenames(filtered_read1_files+filtered_read2_files, "lane")).keys()
bwa = self.add_component("BWA", [indexed_ref, filtered_read1_files, filtered_read2_files, sample_lane_prefixes, "aln", not self.args["delete_bam"]], parent = fastqilluminafilter)
......@@ -87,8 +87,8 @@ class IlluminaQualityCheck (CasavaNG6Workflow):
insertssizes = self.add_component("InsertsSizes", [bwa.bam_files, self.args["histogram_width"], self.args["min_pct"], "LENIENT", "inserts_sizes.tar.gz"], parent = bwa)
if self.args["assignation_databank"]:
R1_prefix = self.get_group_prefix()
if self.get_group_prefix() is not None :
R1_prefix = self.group_prefix
if self.group_prefix is not None :
R1_prefix = (Utils.get_group_basenames(filtered_read1_files, "read")).keys()
# subset assignation
subset_assignation = self.add_component("SubsetAssignation", [filtered_read1_files, self.args["assignation_databank"], R1_prefix], parent = fastqilluminafilter)
......@@ -26,17 +26,17 @@ from ng6.utils import Utils
class RnaSeqQualityCheck (CasavaNG6Workflow):
def process(self):
if len(self.get_all_undetermined_read1()) > 0 :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads1(), self.get_all_undetermined_read1()])
if len(self.undetermined_reads1) > 0 :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads(1), self.undetermined_reads1])
if self.args["keep_reads"] != "all" :
# fastq illumina filter
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.get_all_reads1()+self.get_all_reads2(), self.args["keep_reads"], self.get_group_prefix(), self.get_nospace_runname()+"_fastqilluminafilter.tar.gz"])
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.get_all_reads(), self.args["keep_reads"], self.group_prefix])
# list filtered files
if self.is_paired_end() :
# split read 1 and read 2 from filtered files list
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (self.get_group_prefix() is not None))
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (self.group_prefix is not None))
else: