Commit 9a66f129 authored by Romain Therville's avatar Romain Therville 🐭
Browse files

Merge branch 'ng6-Slurm-10X' into 'master'

Ng6 slurm 10 x

See merge request !50
parents dd0f10c2 a3e88383
...@@ -188,6 +188,20 @@ class NG6ConfigReader(object): ...@@ -188,6 +188,20 @@ class NG6ConfigReader(object):
except : except :
raise Exception("Failed when parsing the config file get_454_mids !") raise Exception("Failed when parsing the config file get_454_mids !")
def get_10X_indexs(self):
"""
return the 10X indexs list
@return : hash table with 10X barcode list of index
"""
try:
barcode_array = {}
barcodes = self.reader.items("10X_barcodes")
for barcode in barcodes:
barcode_array[barcode[0].upper()] = barcode[1].upper()
return barcode_array
except :
raise Exception("Failed when parsin the config file for 10X barcodes !")
def get_workflow_filters(self): def get_workflow_filters(self):
""" """
Return a list of workflow class names Return a list of workflow class names
......
...@@ -33,7 +33,7 @@ from ng6.project import Project ...@@ -33,7 +33,7 @@ from ng6.project import Project
from ng6.run import Run from ng6.run import Run
from ng6.sample import Sample from ng6.sample import Sample
from ng6.utils import Utils from ng6.utils import Utils
from ng6.config_reader import NG6ConfigReader
class BasicNG6Workflow (Workflow): class BasicNG6Workflow (Workflow):
...@@ -112,6 +112,7 @@ class BasicNG6Workflow (Workflow): ...@@ -112,6 +112,7 @@ class BasicNG6Workflow (Workflow):
raise ValueError("Component " + cmpt_object.__class__.__name__ + " with prefix " + raise ValueError("Component " + cmpt_object.__class__.__name__ + " with prefix " +
cmpt_object.prefix + " already exist in this pipeline!") cmpt_object.prefix + " already exist in this pipeline!")
self.component_nameids[cmpt_object.get_nameid()] = None self.component_nameids[cmpt_object.get_nameid()] = None
return cmpt_object return cmpt_object
else: else:
raise ImportError(component_name + " component cannot be loaded, available components are: {0}".format( raise ImportError(component_name + " component cannot be loaded, available components are: {0}".format(
...@@ -145,6 +146,7 @@ class NG6Workflow (BasicNG6Workflow): ...@@ -145,6 +146,7 @@ class NG6Workflow (BasicNG6Workflow):
self.samples = [] self.samples = []
self.reads1 = [] self.reads1 = []
self.reads2 = [] self.reads2 = []
self.index = []
self.samples_names = [] self.samples_names = []
self.reads1_indexes = [] self.reads1_indexes = []
self.reads2_indexes = [] self.reads2_indexes = []
...@@ -171,10 +173,11 @@ class NG6Workflow (BasicNG6Workflow): ...@@ -171,10 +173,11 @@ class NG6Workflow (BasicNG6Workflow):
self.add_parameter_list("metadata", "Add metadata to the sample", type='samplemetadata' ,add_to = "input_sample") self.add_parameter_list("metadata", "Add metadata to the sample", type='samplemetadata' ,add_to = "input_sample")
self.add_input_file_list("read1", "Read 1 data file path", required = True, add_to = "input_sample") self.add_input_file_list("read1", "Read 1 data file path", required = True, add_to = "input_sample")
self.add_input_file_list("read2", "Read 2 data file path", add_to = "input_sample") self.add_input_file_list("read2", "Read 2 data file path", add_to = "input_sample")
self.add_input_file_list("index", "Index data file path", add_to = "input_sample")
def __create_samples__(self): def __create_samples__(self):
for sd in self.input_sample : for sd in self.input_sample :
sp_object = Sample( sd['sample_id'], sd['read1'], sd['read2'], name = sd['sample_name'], description = sd['sample_description'], type = sd['type'], sp_object = Sample( sd['sample_id'], sd['read1'], sd['read2'], sd['index'],name = sd['sample_name'], description = sd['sample_description'], type = sd['type'],
insert_size = sd['insert_size'], species = sd['species'] ) insert_size = sd['insert_size'], species = sd['species'] )
for metadata in sd['metadata'] : for metadata in sd['metadata'] :
...@@ -209,6 +212,9 @@ class NG6Workflow (BasicNG6Workflow): ...@@ -209,6 +212,9 @@ class NG6Workflow (BasicNG6Workflow):
for rfile in sample.reads2 : for rfile in sample.reads2 :
self.reads2_indexes.append(sample.sample_id) self.reads2_indexes.append(sample.sample_id)
self.reads2.append(rfile) self.reads2.append(rfile)
for rfile in sample.index :
self.index.append(rfile)
if len(self.samples_names) != 0 : if len(self.samples_names) != 0 :
if len(self.samples_names) != len (self.samples) : if len(self.samples_names) != len (self.samples) :
...@@ -219,8 +225,9 @@ class NG6Workflow (BasicNG6Workflow): ...@@ -219,8 +225,9 @@ class NG6Workflow (BasicNG6Workflow):
return self.reads1 return self.reads1
elif type == 'read2' : elif type == 'read2' :
return self.reads2 return self.reads2
return self.reads1 + self.reads2 elif type == 'index' :
return self.index
return self.reads1 + self.reads2 + self.index
def get_files_index(self, type = None): def get_files_index(self, type = None):
if type == 'read1' : if type == 'read1' :
...@@ -229,7 +236,6 @@ class NG6Workflow (BasicNG6Workflow): ...@@ -229,7 +236,6 @@ class NG6Workflow (BasicNG6Workflow):
return self.reads2_indexes return self.reads2_indexes
return self.reads1_indexes + self.reads2_indexes return self.reads1_indexes + self.reads2_indexes
def is_paired_end(self): def is_paired_end(self):
return len(self.reads2) > 0 return len(self.reads2) > 0
...@@ -292,6 +298,25 @@ def get_files_from_casava(casava_directory, project_name, lane_number): ...@@ -292,6 +298,25 @@ def get_files_from_casava(casava_directory, project_name, lane_number):
if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane) + "_.*", file): if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane) + "_.*", file):
files.append(filepath); files.append(filepath);
return files return files
def bcl2fastq_10X(directory, pname, lane):
"""longranger"""
files = []
with open(os.path.join(directory, "SampleSheet_10X.mk")) as fh :
subdirs_list = []
for line in fh :
if line.startswith("l" + str(lane) + "_SUBDIRS"):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
# parse samples
for subdir in subdirs_list:
# filter on project name
if re.match("Project_" + pname + "/Sample_.+", subdir) or subdir.startswith("Undetermined_indices"):
for file in os.listdir(directory + "/" + subdir):
filepath = directory + "/" + subdir + "/" + file
if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane) + "_.*", file):
files.append(filepath);
return files
def bcl2fastq_216(directory, pname, lane): def bcl2fastq_216(directory, pname, lane):
"""bcl2fastq >= 1.9""" """bcl2fastq >= 1.9"""
...@@ -319,6 +344,8 @@ def get_files_from_casava(casava_directory, project_name, lane_number): ...@@ -319,6 +344,8 @@ def get_files_from_casava(casava_directory, project_name, lane_number):
return bcl2fastq_18(casava_directory, project_name, lane_number) return bcl2fastq_18(casava_directory, project_name, lane_number)
elif os.path.exists(os.path.join( casava_directory, 'Stats', 'DemultiplexingStats.xml')) : elif os.path.exists(os.path.join( casava_directory, 'Stats', 'DemultiplexingStats.xml')) :
return bcl2fastq_216(casava_directory, project_name, lane_number) return bcl2fastq_216(casava_directory, project_name, lane_number)
elif os.path.exists(os.path.join(casava_directory, "SampleSheet_10X.mk")) :
return bcl2fastq_10X(casava_directory, project_name, lane_number)
...@@ -330,8 +357,10 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -330,8 +357,10 @@ class CasavaNG6Workflow(NG6Workflow):
self.group_prefix = None self.group_prefix = None
self.undetermined_reads1 = [] self.undetermined_reads1 = []
self.undetermined_reads2 = [] self.undetermined_reads2 = []
self.undetermined_index = []
self.log_files = [] self.log_files = []
self.is_casava = False self.is_casava = False
self.is_10Xcasava = False
def __add_sample_parameters__(self): def __add_sample_parameters__(self):
self.add_multiple_parameter('casava', 'Provide the options to retrieve samples from a CASAVA directory', group="Sample description") self.add_multiple_parameter('casava', 'Provide the options to retrieve samples from a CASAVA directory', group="Sample description")
...@@ -387,11 +416,18 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -387,11 +416,18 @@ class CasavaNG6Workflow(NG6Workflow):
logging.getLogger("ng6").debug("CasavaNG6Workflow.__create_samples__ before self._process_casava_18") logging.getLogger("ng6").debug("CasavaNG6Workflow.__create_samples__ before self._process_casava_18")
all_samples, all_samples_id = self._process_casava_18(casava_directory, project_name, lane_number, input_files) all_samples, all_samples_id = self._process_casava_18(casava_directory, project_name, lane_number, input_files)
logging.getLogger("ng6").debug("CasavaNG6Workflow.__create_samples__ before self._process_casava_18") logging.getLogger("ng6").debug("CasavaNG6Workflow.__create_samples__ before self._process_casava_18")
elif os.path.exists(os.path.join( casava_directory, 'Stats', 'DemultiplexingStats.xml')) : elif os.path.exists(os.path.join( casava_directory, 'Stats', 'DemultiplexingStats.xml')) :
all_samples, all_samples_id = self._process_casava_216(casava_directory, project_name, lane_number, input_files) all_samples, all_samples_id = self._process_casava_216(casava_directory, project_name, lane_number, input_files)
elif os.path.exists(os.path.join( casava_directory, "SampleSheet_10X.mk")):
logging.getLogger("ng6").debug("CasavaNG6Workflow.__create_samples__ before self._process_casava_10X")
all_samples, all_samples_id = self._process_casava_10X(casava_directory, project_name, lane_number, input_files)
self.is_10Xcasava = True
logging.getLogger("ng6").debug("CasavaNG6Workflow.__create_samples__ after self._process_casava_10X")
selected_samples = self.casava['select_sample_id'] selected_samples = self.casava['select_sample_id']
logging.getLogger("CasavaNG6Workflow").debug("__create_samples__. all_samples_id = a"+", ".join(all_samples_id)+"a") logging.getLogger("CasavaNG6Workflow").debug("__create_samples__. all_samples_id = "+", ".join(all_samples_id))
if selected_samples : if selected_samples :
for sid in selected_samples : for sid in selected_samples :
assert sid in all_samples_id , "The sample id %s is not in the SampleSheet.mk" % sid assert sid in all_samples_id , "The sample id %s is not in the SampleSheet.mk" % sid
...@@ -407,6 +443,7 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -407,6 +443,7 @@ class CasavaNG6Workflow(NG6Workflow):
def __preprocess_samples__(self): def __preprocess_samples__(self):
NG6Workflow.__preprocess_samples__(self) NG6Workflow.__preprocess_samples__(self)
if self.is_casava: if self.is_casava:
self.group_prefix = list((Utils.get_group_basenames(self.get_all_reads(), "read")).keys()) self.group_prefix = list((Utils.get_group_basenames(self.get_all_reads(), "read")).keys())
...@@ -454,7 +491,6 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -454,7 +491,6 @@ class CasavaNG6Workflow(NG6Workflow):
# filter on project name # filter on project name
if re.match("Project_" + project_name + "/Sample_.+", sample['subdir']) or sample['subdir'].startswith("Undetermined_indices"): if re.match("Project_" + project_name + "/Sample_.+", sample['subdir']) or sample['subdir'].startswith("Undetermined_indices"):
for file in os.listdir(casava_directory + "/" + sample['subdir']): for file in os.listdir(casava_directory + "/" + sample['subdir']):
filepath = casava_directory + "/" + sample['subdir'] + "/" + file filepath = casava_directory + "/" + sample['subdir'] + "/" + file
if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane_number) + "_.*", file): if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane_number) + "_.*", file):
...@@ -481,7 +517,6 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -481,7 +517,6 @@ class CasavaNG6Workflow(NG6Workflow):
all_samples.append(sp_object) all_samples.append(sp_object)
all_samples_id.append(sample['sample_id']) all_samples_id.append(sample['sample_id'])
for file in os.listdir(casava_directory): for file in os.listdir(casava_directory):
filepath = casava_directory + "/" + file filepath = casava_directory + "/" + file
if file.endswith(".log"): if file.endswith(".log"):
...@@ -489,9 +524,111 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -489,9 +524,111 @@ class CasavaNG6Workflow(NG6Workflow):
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_18 self.log_files = " + ",".join(self.log_files)) logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_18 self.log_files = " + ",".join(self.log_files))
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_18 all_samples_id = " + ",".join(all_samples_id)) logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_18 all_samples_id = " + ",".join(all_samples_id))
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_18 exiting") logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_18 exiting")
return all_samples, all_samples_id
def _process_casava_10X(self,casava_directory, project_name, lane_number, input_files):
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_10X enter")
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_10X casava_directory = " + casava_directory + ", project_name = " + str(project_name))
"""
Creates samples from casavadir from longranger demultiplexing
@param casava_directory:
@param project_name:
@param lane_number:
@param input_files:
"""
all_samples = []
all_samples_id = []
# open casava samplesheet again to associate our files with a sample
with open(os.path.join(casava_directory, "SampleSheet_10X.mk")) as fh :
barcodes_list = []
sample_ids_list = []
subdirs_list = []
for line in fh :
if line.startswith("l" + str(lane_number) + "_BARCODES"):
parts = line.strip().split(":=")
barcodes_list = [ re.sub( r"[_\s]+", "", x) for x in parts[1].split() ]
elif line.startswith("l" + str(lane_number) + "_SAMPLEIDS" ):
parts = line.strip().split(":=")
sample_ids_list = parts[1].split(" ")
elif line.startswith("l" + str(lane_number) + "_SUBDIRS"):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
assert len(barcodes_list) == len(sample_ids_list) == len(subdirs_list), "Invalid lane {0} in SampleSheet_10X.mk".format(lane_number)
cfg_reader = NG6ConfigReader()
indexs = cfg_reader.get_10X_indexs()
# parse samples
for i in range(len(barcodes_list)):
if barcodes_list[i] == 'Undetermined' :
barcode = 'Undetermined'
else :
barcode = indexs[barcodes_list[i]]
sample = {
'barcode' : barcode,
'sample_id' : sample_ids_list[i],
'subdir' : subdirs_list[i],
'reads1' : [],
'reads2' : [],
'index' : []
}
# filter on project name
if re.match("Project_" + project_name + "/Sample_.+", sample['subdir']) or sample['subdir'].startswith("Undetermined_indices"):
for file in os.listdir(casava_directory + "/" + sample['subdir']):
filepath = casava_directory + "/" + sample['subdir'] + "/" + file
if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane_number) + "_.*", file):
for idx, iofile in enumerate(input_files) :
if iofile == filepath :
if re.search(".*_R1_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['reads1'].append(iofile)
else:
self.undetermined_reads1.append(iofile)
if re.search(".*_R2_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['reads2'].append(iofile)
else:
self.undetermined_reads2.append(iofile)
if re.search(".*_I1_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['index'].append(iofile)
else:
self.undetermined_index.append(iofile)
input_files.pop(idx)
break
if not sample['subdir'].startswith("Undetermined_indices") :
sp_object = Sample(sample['barcode'], sample['reads1'], reads2 = sample['reads2'], index = sample['index'], name=sample['sample_id'])
sp_object.add_metadata('barcode', sample['barcode'])
sp_object.add_metadata('is_casava', True)
all_samples.append(sp_object)
all_samples_id.append(sample['sample_id'])
for file in os.listdir(casava_directory):
filepath = casava_directory + "/" + file
if file.endswith(".log"):
self.log_files.append(filepath)
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_10X all_samples_id = " + ",".join(all_samples_id))
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_10X exiting")
return all_samples, all_samples_id return all_samples, all_samples_id
def _process_casava_216(self,casava_directory, project_name, lane_number, input_files): def _process_casava_216(self,casava_directory, project_name, lane_number, input_files):
""" """
Creates samples from casavadir (>=1.9) using input files Creates samples from casavadir (>=1.9) using input files
...@@ -513,20 +650,30 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -513,20 +650,30 @@ class CasavaNG6Workflow(NG6Workflow):
logging.getLogger("ng6").debug("illumina_process self.is_casava") logging.getLogger("ng6").debug("illumina_process self.is_casava")
if len(self.log_files) > 0 : if len(self.log_files) > 0 :
add_log = self.add_component("BasicAnalysis", [self.log_files,"Log Files","Log files generated during primary analysis","-","-","-","gz", "","log.gz"]) add_log = self.add_component("BasicAnalysis", [self.log_files,"Log Files","Log files generated during primary analysis","-","-","-","gz", "","log.gz"])
if len(self.undetermined_reads1) > 0 : if len(self.undetermined_reads1) > 0 :
if self.casava['mismatch_index'] : if self.casava['mismatch_index'] :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads("read1"), self.undetermined_reads1, self.get_files_index('read1')]) demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads("read1"), self.undetermined_reads1, self.get_files_index('read1')])
elif self.is_10Xcasava :
logging.getLogger("ng6").debug("illumina_process self.is_10Xcasava = ")
logging.getLogger("ng6").debug(self.get_all_reads("read1"))
logging.getLogger("ng6").debug("illumina_process undetermined reads = " )
logging.getLogger("ng6").debug(self.undetermined_reads1)
logging.getLogger("ng6").debug("illumina_process file index =")
logging.getLogger("ng6").debug(self.get_files_index("read1"))
demultiplex_stats = self.add_component("Demultiplex10XStats", [self.get_all_reads("read1"), self.undetermined_reads1, self.get_files_index("read1")])
else : else :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads("read1"), self.undetermined_reads1]) demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads("read1"), self.undetermined_reads1])
if self.keep_reads != "all" : if self.keep_reads != "all" :
logging.getLogger("ng6").debug("illumina_process self.keep_reads != all") logging.getLogger("ng6").debug("illumina_process self.keep_reads != all")
logging.getLogger("ng6").debug("illumina_process self.get_all_reads() = " + ",".join(self.get_all_reads())) logging.getLogger("ng6").debug("illumina_process BEFORE FASTQILLUMINAFILTER self.get_all_reads() = " + ",".join(self.get_all_reads()))
logging.getLogger("ng6").debug("illumina_process self.group_prefix = " + ",".join(self.group_prefix)) logging.getLogger("ng6").debug("illumina_process self.group_prefix = " + ",".join(self.group_prefix))
# fastq illumina filter # fastq illumina filter
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.runobj,self.get_all_reads(), self.keep_reads, self.group_prefix]) fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.runobj,self.get_all_reads(), self.keep_reads, self.group_prefix])
# list filtered files # list filtered files
if self.is_paired_end() : if self.is_paired_end() :
# split read 1 and read 2 from filtered files list # split read 1 and read 2 from filtered files list
...@@ -543,7 +690,7 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -543,7 +690,7 @@ class CasavaNG6Workflow(NG6Workflow):
# archive the files # archive the files
#TODO : if self.group_prefix == None, the create the output of fastqilluminafilter in the run.get_work_directory() #TODO : if self.group_prefix == None, the create the output of fastqilluminafilter in the run.get_work_directory()
saved_files = filtered_read1_files + filtered_read2_files saved_files = filtered_read1_files + filtered_read2_files + self.get_all_reads("index")
logging.getLogger("CasavaNG6Workflow").debug("illumina_process saved_files = " + ",".join(saved_files)) logging.getLogger("CasavaNG6Workflow").debug("illumina_process saved_files = " + ",".join(saved_files))
reads_prefixes = None reads_prefixes = None
if self.group_prefix != None : if self.group_prefix != None :
...@@ -574,7 +721,7 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -574,7 +721,7 @@ class CasavaNG6Workflow(NG6Workflow):
# contamination_search # contamination_search
if contam : if contam :
if self.contamination_databank: contam.extend(self.contamination_databank) if self.contamination_databank: contam.extend(self.contamination_databank)
contamination_search = self.add_component("ContaminationSearch", [filtered_read1_files, contam, list((Utils.get_group_basenames(filtered_read1_files, "read")).keys())], parent = fastqilluminafilter) contamination_search = self.add_component("ContaminationSearch", [filtered_read1_files+filtered_read2_files, contam, reads_prefixes], parent = fastqilluminafilter)
# make some statistics on raw file # make some statistics on raw file
fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (self.group_prefix is not None), self.no_group, "fastqc.tar.gz"], parent = fastqilluminafilter) fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (self.group_prefix is not None), self.no_group, "fastqc.tar.gz"], parent = fastqilluminafilter)
......
...@@ -88,8 +88,7 @@ class Run(object): ...@@ -88,8 +88,7 @@ class Run(object):
ng6conf.get_space_directory(self.space_id), self.DIRECTORIES_STRUCTURE, directory_name) ng6conf.get_space_directory(self.space_id), self.DIRECTORIES_STRUCTURE, directory_name)
work_dir = os.path.join(ng6conf.get_work_directory(), ng6conf.get_space_directory(self.space_id), \ work_dir = os.path.join(ng6conf.get_work_directory(), ng6conf.get_space_directory(self.space_id), \
self.DIRECTORIES_STRUCTURE, directory_name) self.DIRECTORIES_STRUCTURE, directory_name)
print (work_dir)
print (os.path.isdir(work_dir))
if not os.path.isdir(save_dir) and not os.path.isdir(work_dir): if not os.path.isdir(save_dir) and not os.path.isdir(work_dir):
break break
directory_name = uuid.uuid4().hex[:9] directory_name = uuid.uuid4().hex[:9]
......
...@@ -22,13 +22,14 @@ class Sample(object): ...@@ -22,13 +22,14 @@ class Sample(object):
AVAILABLE_TYPES = ["pe", "se", "ose", "ope", "mp"] AVAILABLE_TYPES = ["pe", "se", "ose", "ope", "mp"]
def __init__(self, sample_id, reads1, reads2 = None, name = None, description = None, type = None, def __init__(self, sample_id, reads1, reads2 = None,index = None, name = None, description = None, type = None,
insert_size = None, species = None, nb_sequences = None, full_size = None, id = None ): insert_size = None, species = None, nb_sequences = None, full_size = None, id = None ):
self.sample_id = sample_id self.sample_id = sample_id
self.name = name self.name = name
self.description = description self.description = description
self.reads1 = reads1 self.reads1 = reads1
self.reads2 = reads2 self.reads2 = reads2
self.index = index
self.insert_size = insert_size self.insert_size = insert_size
self.nb_sequences = nb_sequences self.nb_sequences = nb_sequences
self.full_size = full_size self.full_size = full_size
...@@ -41,6 +42,9 @@ class Sample(object): ...@@ -41,6 +42,9 @@ class Sample(object):
if isinstance(reads2, str) : if isinstance(reads2, str) :
self.reads2 = [reads2] self.reads2 = [reads2]
if isinstance(index, str) :
self.index = [index]
if self.type is None: if self.type is None:
if self.reads2 : if self.reads2 :
...@@ -94,12 +98,13 @@ class Sample(object): ...@@ -94,12 +98,13 @@ class Sample(object):
raise UnsavedRunError() raise UnsavedRunError()
def __str__(self, *args, **kwargs): def __str__(self, *args, **kwargs):
return "sid={sid}; name={name}; desc={desc}; r1={r1}; r2={r2}; insize={insize}; nbs={nbs}; fsize={fsize}; spec={spec}; t={t}".format( return "sid={sid}; name={name}; desc={desc}; r1={r1}; r2={r2}; i={i}; insize={insize}; nbs={nbs}; fsize={fsize}; spec={spec}; t={t}".format(
sid = self.sample_id or '', sid = self.sample_id or '',
name = self.name or '', name = self.name or '',
desc = self.description or '', desc = self.description or '',
r1 = self.reads1 or [], r1 = self.reads1 or [],
r2 = self.reads2 or [], r2 = self.reads2 or [],
i = self.index or [],
insize = self.insert_size or '', insize = self.insert_size or '',
nbs = self.nb_sequences or '', nbs = self.nb_sequences or '',
fsize = self.full_size or '', fsize = self.full_size or '',
......
...@@ -299,11 +299,14 @@ class Utils(object): ...@@ -299,11 +299,14 @@ class Utils(object):
@param group_by : CASAVA_FILENAME key (ex : read) @param group_by : CASAVA_FILENAME key (ex : read)
""" """
group_basenames = {} group_basenames = {}
logging.getLogger("Utils").debug("get_group_basenames. file_list = " + ",".join(file_list))
for file in file_list: for file in file_list:
file_name_fields = os.path.basename(file).split(Utils.CASAVA_FILENAME_SEPARATOR) file_name_fields = os.path.basename(file).split(Utils.CASAVA_FILENAME_SEPARATOR)
group_tag = Utils.CASAVA_FILENAME_SEPARATOR.join( file_name_fields[:Utils.CASAVA_FILENAME[group_by]] ) group_tag = Utils.CASAVA_FILENAME_SEPARATOR.join( file_name_fields[:Utils.CASAVA_FILENAME[group_by]] )
if group_tag in group_basenames : if group_tag in group_basenames :
group_basenames[group_tag].append(file) group_basenames[group_tag].append(file)
......
{*
Copyright (C) 2009 INRA
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. #
*}
{extends file='AnalysisTemplate.tpl'}
{block name=params_title} Parameters {/block}
{block name=params_content}
{assign var="params" value=" "|explode:$analyse.params}
<ul>
<li class="parameter">Unknown indices with a number of fragments < {$params[0]*100}% of the number of fragments in the sample with the littlest population are merged in "All others". In other words, each unknown indice with a number of fragments > {$params[0]*100}% (included) of the number of fragments in the sample with the smallest population is displayed </li>
</ul>
{/block}
{block name=results_title} Illumina metrics {/block}
{block name=results}
<table class="table table-striped table-bordered dataTable analysis-result-table">
<thead>
<tr>
<th class="string-sort">Sample {if $analyse_results|@count > 1 }({$analyse_results|@count}){/if}</th>
<th class="string-sort">Type</th>
<th class="string-sort">Index</th>
<th class="numeric-sort">% of total fragments</th>
<th class="numeric-sort">Number of fragments</th>
<th class="numeric-sort">% of passing filter</th>
</tr>