Commit 87e213ba authored by ckuchly's avatar ckuchly

#134 : modified sample.py and ng6workflow.py to clarify managment of

index files in casava directory who isn't 10X data.
parent a3e88383
......@@ -142,7 +142,7 @@ class NG6Workflow (BasicNG6Workflow):
self.samples = []
self.reads1 = []
self.reads2 = []
self.index = []
self.readsi = []
self.samples_names = []
self.reads1_indexes = []
self.reads2_indexes = []
......@@ -173,7 +173,7 @@ class NG6Workflow (BasicNG6Workflow):
def __create_samples__(self):
for sd in self.input_sample :
sp_object = Sample( sd['sample_id'], sd['read1'], sd['read2'], sd['index'],name = sd['sample_name'], description = sd['sample_description'], type = sd['type'],
sp_object = Sample( sd['sample_id'], sd['read1'], sd['read2'], sd['index'], name = sd['sample_name'], description = sd['sample_description'], type = sd['type'],
insert_size = sd['insert_size'], species = sd['species'] )
for metadata in sd['metadata'] :
......@@ -185,7 +185,10 @@ class NG6Workflow (BasicNG6Workflow):
samples_ids = []
pidx = 1
nidx = 1
for index, sample in enumerate(self.samples) :
for rang, sample in enumerate(self.samples) :
print(sample)
print("sample readsi")
print(sample.readsi)
if sample.name :
self.samples_names.append(sample.name)
else :
......@@ -208,9 +211,9 @@ class NG6Workflow (BasicNG6Workflow):
for rfile in sample.reads2 :
self.reads2_indexes.append(sample.sample_id)
self.reads2.append(rfile)
for rfile in sample.index :
self.index.append(rfile)
for rfile in sample.readsi :
self.readsi.append(rfile)
if len(self.samples_names) != 0 :
if len(self.samples_names) != len (self.samples) :
......@@ -222,8 +225,8 @@ class NG6Workflow (BasicNG6Workflow):
elif type == 'read2' :
return self.reads2
elif type == 'index' :
return self.index
return self.reads1 + self.reads2 + self.index
return self.readsi
return self.reads1 + self.reads2 + self.readsi
def get_files_index(self, type = None):
if type == 'read1' :
......@@ -482,7 +485,8 @@ class CasavaNG6Workflow(NG6Workflow):
'sample_id' : sample_ids_list[i],
'subdir' : subdirs_list[i],
'reads1' : [],
'reads2' : []
'reads2' : [],
'readsi' : []
}
# filter on project name
......@@ -507,7 +511,7 @@ class CasavaNG6Workflow(NG6Workflow):
if not sample['subdir'].startswith("Undetermined_indices") :
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_18 create sample " + sample['sample_id'])
sp_object = Sample(sample['barcode'], sample['reads1'], reads2 = sample['reads2'], name=sample['sample_id'])
sp_object = Sample(sample['barcode'], sample['reads1'], reads2 = sample['reads2'], readsi = [], name=sample['sample_id'])
sp_object.add_metadata('barcode', sample['barcode'])
sp_object.add_metadata('is_casava', True)
......@@ -571,7 +575,7 @@ class CasavaNG6Workflow(NG6Workflow):
'subdir' : subdirs_list[i],
'reads1' : [],
'reads2' : [],
'index' : []
'readsi' : []
}
# filter on project name
......@@ -595,20 +599,18 @@ class CasavaNG6Workflow(NG6Workflow):
if re.search(".*_R2_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['reads2'].append(iofile)
else:
self.undetermined_reads2.append(iofile)
if re.search(".*_I1_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['index'].append(iofile)
sample['readsi'].append(iofile)
else:
self.undetermined_index.append(iofile)
input_files.pop(idx)
break
if not sample['subdir'].startswith("Undetermined_indices") :
sp_object = Sample(sample['barcode'], sample['reads1'], reads2 = sample['reads2'], index = sample['index'], name=sample['sample_id'])
sp_object = Sample(sample['barcode'], sample['reads1'], reads2 = sample['reads2'], readsi = sample['readsi'], name=sample['sample_id'])
sp_object.add_metadata('barcode', sample['barcode'])
sp_object.add_metadata('is_casava', True)
......@@ -651,12 +653,6 @@ class CasavaNG6Workflow(NG6Workflow):
if self.casava['mismatch_index'] :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads("read1"), self.undetermined_reads1, self.get_files_index('read1')])
elif self.is_10Xcasava :
logging.getLogger("ng6").debug("illumina_process self.is_10Xcasava = ")
logging.getLogger("ng6").debug(self.get_all_reads("read1"))
logging.getLogger("ng6").debug("illumina_process undetermined reads = " )
logging.getLogger("ng6").debug(self.undetermined_reads1)
logging.getLogger("ng6").debug("illumina_process file index =")
logging.getLogger("ng6").debug(self.get_files_index("read1"))
demultiplex_stats = self.add_component("Demultiplex10XStats", [self.get_all_reads("read1"), self.undetermined_reads1, self.get_files_index("read1")])
else :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads("read1"), self.undetermined_reads1])
......@@ -686,7 +682,7 @@ class CasavaNG6Workflow(NG6Workflow):
# archive the files
#TODO : if self.group_prefix == None, the create the output of fastqilluminafilter in the run.get_work_directory()
saved_files = filtered_read1_files + filtered_read2_files + self.get_all_reads("index")
saved_files = filtered_read1_files + filtered_read2_files + self.get_all_reads("readsi")
logging.getLogger("CasavaNG6Workflow").debug("illumina_process saved_files = " + ",".join(saved_files))
reads_prefixes = None
if self.group_prefix != None :
......
......@@ -20,16 +20,16 @@ from ng6.t3MySQLdb import t3MySQLdb
class Sample(object):
AVAILABLE_TYPES = ["pe", "se", "ose", "ope", "mp"]
AVAILABLE_TYPES = ["pe", "se", "ose", "ope", "mp","10X"]
def __init__(self, sample_id, reads1, reads2 = None,index = None, name = None, description = None, type = None,
def __init__(self, sample_id, reads1, reads2 = None, readsi = None, name = None, description = None, type = None,
insert_size = None, species = None, nb_sequences = None, full_size = None, id = None ):
self.sample_id = sample_id
self.name = name
self.description = description
self.reads1 = reads1
self.reads2 = reads2
self.index = index
self.readsi = readsi
self.insert_size = insert_size
self.nb_sequences = nb_sequences
self.full_size = full_size
......@@ -43,8 +43,8 @@ class Sample(object):
if isinstance(reads2, str) :
self.reads2 = [reads2]
if isinstance(index, str) :
self.index = [index]
if isinstance(readsi, str) :
self.readsi = [readsi]
if self.type is None:
if self.reads2 :
......@@ -104,7 +104,7 @@ class Sample(object):
desc = self.description or '',
r1 = self.reads1 or [],
r2 = self.reads2 or [],
i = self.index or [],
i = self.readsi or [],
insize = self.insert_size or '',
nbs = self.nb_sequences or '',
fsize = self.full_size or '',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment