Commit cba79771 authored by ckuchly's avatar ckuchly
Browse files

last modification for 10X pipeline

parent 82ab8d0a
......@@ -176,9 +176,6 @@ class NG6ConfigReader(object):
barcode_array = {}
barcodes = self.reader.items("10X_barcodes")
for barcode in barcodes:
#print("config reader")
#print(barcode[0])
#logging.getLogger("ng6").debug("barcode[0] = " + barcode[0].upper())
barcode_array[barcode[0].upper()] = barcode[1].upper()
return barcode_array
except :
......
......@@ -423,7 +423,7 @@ class CasavaNG6Workflow(NG6Workflow):
logging.getLogger("ng6").debug("CasavaNG6Workflow.__create_samples__ after self._process_casava_10X")
selected_samples = self.casava['select_sample_id']
logging.getLogger("CasavaNG6Workflow").debug("__create_samples__. all_samples_id = a"+", ".join(all_samples_id)+"a")
logging.getLogger("CasavaNG6Workflow").debug("__create_samples__. all_samples_id = "+", ".join(all_samples_id))
if selected_samples :
for sid in selected_samples :
assert sid in all_samples_id , "The sample id %s is not in the SampleSheet.mk" % sid
......@@ -439,6 +439,7 @@ class CasavaNG6Workflow(NG6Workflow):
def __preprocess_samples__(self):
NG6Workflow.__preprocess_samples__(self)
if self.is_casava:
self.group_prefix = list((Utils.get_group_basenames(self.get_all_reads(), "read")).keys())
......@@ -524,7 +525,6 @@ class CasavaNG6Workflow(NG6Workflow):
def _process_casava_10X(self,casava_directory, project_name, lane_number, input_files):
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_10X enter")
print("Process Casava 10X ")
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_10X casava_directory = " + casava_directory + ", project_name = " + str(project_name))
"""
Creates samples from casavadir from longranger demultiplexing
......@@ -552,19 +552,19 @@ class CasavaNG6Workflow(NG6Workflow):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
assert len(barcodes_list) == len(sample_ids_list) == len(subdirs_list), "Invalid lane {0} in SampleSheet.mk".format(lane_number)
assert len(barcodes_list) == len(sample_ids_list) == len(subdirs_list), "Invalid lane {0} in SampleSheet_10X.mk".format(lane_number)
cfg_reader = NG6ConfigReader()
indexs = cfg_reader.get_10X_indexs()
# parse samples
for i in range(len(barcodes_list)):
if barcodes_list[i] == 'Undetermined' :
barcode = 'Undetermined'
else :
barcode = indexs[barcodes_list[i]]
#print("ng6worflow retrouver les barcodes")
#print(barcode)
sample = {
'barcode' : barcode,
'sample_id' : sample_ids_list[i],
......@@ -573,15 +573,21 @@ class CasavaNG6Workflow(NG6Workflow):
'reads2' : [],
'index' : []
}
# filter on project name
if re.match("Project_" + project_name + "/Sample_.+", sample['subdir']) or sample['subdir'].startswith("Undetermined_indices"):
for file in os.listdir(casava_directory + "/" + sample['subdir']):
filepath = casava_directory + "/" + sample['subdir'] + "/" + file
if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane_number) + "_.*", file):
for idx, iofile in enumerate(input_files) :
if iofile == filepath :
if re.search(".*_R1_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['reads1'].append(iofile)
else:
......@@ -589,11 +595,12 @@ class CasavaNG6Workflow(NG6Workflow):
if re.search(".*_R2_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['reads2'].append(iofile)
else:
self.undetermined_reads2.append(iofile)
if re.search(".*_I1_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
logging.getLogger("ng6").debug("CasavaNG6Workflow.__process_casava_10X__index_files = " + "".join(iofile))
sample['index'].append(iofile)
else:
self.undetermined_index.append(iofile)
......@@ -609,6 +616,7 @@ class CasavaNG6Workflow(NG6Workflow):
all_samples_id.append(sample['sample_id'])
for file in os.listdir(casava_directory):
filepath = casava_directory + "/" + file
if file.endswith(".log"):
self.log_files.append(filepath)
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_10X all_samples_id = " + ",".join(all_samples_id))
......@@ -649,18 +657,19 @@ class CasavaNG6Workflow(NG6Workflow):
logging.getLogger("ng6").debug(self.undetermined_reads1)
logging.getLogger("ng6").debug("illumina_process file index =")
logging.getLogger("ng6").debug(self.get_files_index("read1"))
#demultiplex_stats = self.add_component("Demultiplex10XStats", [self.get_all_reads("read1"), self.undetermined_reads1, self.get_files_index("read1")])
demultiplex_stats = self.add_component("Demultiplex10XStats", [self.get_all_reads("read1"), self.undetermined_reads1, self.get_files_index("read1")])
else :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads("read1"), self.undetermined_reads1])
if self.keep_reads != "all" :
logging.getLogger("ng6").debug("illumina_process self.keep_reads != all")
logging.getLogger("ng6").debug("illumina_process self.get_all_reads() = " + ",".join(self.get_all_reads()))
logging.getLogger("ng6").debug("illumina_process BEFORE FASTQILLUMINAFILTER self.get_all_reads() = " + ",".join(self.get_all_reads()))
logging.getLogger("ng6").debug("illumina_process self.group_prefix = " + ",".join(self.group_prefix))
# fastq illumina filter
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.runobj,self.get_all_reads(), self.keep_reads, self.group_prefix])
# list filtered files
if self.is_paired_end() :
# split read 1 and read 2 from filtered files list
......@@ -706,9 +715,9 @@ class CasavaNG6Workflow(NG6Workflow):
except : pass
# contamination_search
#if contam :
# if self.contamination_databank: contam.extend(self.contamination_databank)
# contamination_search = self.add_component("ContaminationSearch", [filtered_read1_files+filtered_read2_files, contam, reads_prefixes], parent = fastqilluminafilter)
if contam :
if self.contamination_databank: contam.extend(self.contamination_databank)
contamination_search = self.add_component("ContaminationSearch", [filtered_read1_files+filtered_read2_files, contam, reads_prefixes], parent = fastqilluminafilter)
# make some statistics on raw file
fastqc = self.add_component("FastQC", [filtered_read1_files+filtered_read2_files, (self.group_prefix is not None), self.no_group, "fastqc.tar.gz"], parent = fastqilluminafilter)
......
......@@ -88,8 +88,7 @@ class Run(object):
ng6conf.get_space_directory(self.space_id), self.DIRECTORIES_STRUCTURE, directory_name)
work_dir = os.path.join(ng6conf.get_work_directory(), ng6conf.get_space_directory(self.space_id), \
self.DIRECTORIES_STRUCTURE, directory_name)
print (work_dir)
print (os.path.isdir(work_dir))
if not os.path.isdir(save_dir) and not os.path.isdir(work_dir):
break
directory_name = uuid.uuid4().hex[:9]
......
......@@ -299,11 +299,14 @@ class Utils(object):
@param group_by : CASAVA_FILENAME key (ex : read)
"""
group_basenames = {}
logging.getLogger("Utils").debug("get_group_basenames. file_list = " + ",".join(file_list))
for file in file_list:
file_name_fields = os.path.basename(file).split(Utils.CASAVA_FILENAME_SEPARATOR)
group_tag = Utils.CASAVA_FILENAME_SEPARATOR.join( file_name_fields[:Utils.CASAVA_FILENAME[group_by]] )
if group_tag in group_basenames :
group_basenames[group_tag].append(file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment