Commit 3f882674 authored by Celine Noirot's avatar Celine Noirot
Browse files

Merge branch 'Issues#143_debug_init_illumina10X' into 'master'

Merge branch Issues#143 with master

See merge request !79
parents 47a18ab6 9b9e0e65
...@@ -446,6 +446,7 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -446,6 +446,7 @@ class CasavaNG6Workflow(NG6Workflow):
if self.is_casava: if self.is_casava:
self.group_prefix = list((Utils.get_group_basenames(self.get_all_reads(), "read")).keys()) self.group_prefix = list((Utils.get_group_basenames(self.get_all_reads(), "read")).keys())
logging.getLogger("ng6").debug("CasavaNG6Workflow._preprocess enter" + str(self.group_prefix))
def _process_casava_18(self, casava_directory, project_name, lane_number, input_files): def _process_casava_18(self, casava_directory, project_name, lane_number, input_files):
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_18 enter") logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_18 enter")
...@@ -643,10 +644,14 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -643,10 +644,14 @@ class CasavaNG6Workflow(NG6Workflow):
concatenatefastq = None concatenatefastq = None
filtered_read1_files = [] filtered_read1_files = []
filtered_read2_files = [] filtered_read2_files = []
filtered_index_files = []
analysis_files = []
saved_files = [] saved_files = []
logging.getLogger("ng6").debug("illumina_process entering") logging.getLogger("ng6").debug("illumina_process entering")
if self.is_casava : if self.is_casava :
logging.getLogger("ng6").debug("illumina_process self.is_casava") logging.getLogger("ng6").debug("illumina_process self.is_casava")
analysis_files = self.get_all_reads("read1") + self.get_all_reads("read2")
if len(self.log_files) > 0 : if len(self.log_files) > 0 :
add_log = self.add_component("BasicAnalysis", [self.log_files,"Log Files","Log files generated during primary analysis","-","-","-","gz", "","log.gz"]) add_log = self.add_component("BasicAnalysis", [self.log_files,"Log Files","Log files generated during primary analysis","-","-","-","gz", "","log.gz"])
...@@ -657,39 +662,48 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -657,39 +662,48 @@ class CasavaNG6Workflow(NG6Workflow):
demultiplex_stats = self.add_component("Demultiplex10XStats", [self.get_all_reads("read1"), self.undetermined_reads1, self.get_files_index("read1")]) demultiplex_stats = self.add_component("Demultiplex10XStats", [self.get_all_reads("read1"), self.undetermined_reads1, self.get_files_index("read1")])
else : else :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads("read1"), self.undetermined_reads1]) demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads("read1"), self.undetermined_reads1])
#analysis files for fastq illumina and fastqc analysis
if self.keep_reads != "all" : if self.keep_reads != "all" :
logging.getLogger("ng6").debug("illumina_process self.keep_reads != all") logging.getLogger("ng6").debug("illumina_process self.keep_reads != all")
logging.getLogger("ng6").debug("illumina_process BEFORE FASTQILLUMINAFILTER self.get_all_reads() = " + ",".join(self.get_all_reads())) logging.getLogger("ng6").debug("illumina_process BEFORE FASTQILLUMINAFILTER self.get_all_reads() = " + ",".join(self.get_all_reads()))
logging.getLogger("ng6").debug("illumina_process self.group_prefix = " + ",".join(self.group_prefix)) logging.getLogger("ng6").debug("illumina_process self.group_prefix = " + ",".join(self.group_prefix))
# fastq illumina filter # fastq illumina filter
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.runobj,self.get_all_reads(), self.keep_reads, self.group_prefix])
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.runobj, self.get_all_reads(), self.keep_reads, self.group_prefix])
logging.getLogger("ng6").debug("illumina_process fastqilluminafilter = " + ",".join(filtered_read1_files))
# list filtered files # list filtered files
if self.is_paired_end() : if self.is_paired_end() :
# split read 1 and read 2 from filtered files list # split read 1 and read 2 from filtered files list
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (self.group_prefix is not None)) if self.is_10Xcasava :
[filtered_read1_files, filtered_read2_files, filtered_index_files] = Utils.split_pair_and_index(fastqilluminafilter.fastq_files_filtered, (self.group_prefix is not None))
else:
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (self.group_prefix is not None))
else: else:
filtered_read1_files = fastqilluminafilter.fastq_files_filtered filtered_read1_files = fastqilluminafilter.fastq_files_filtered
filtered_read2_files = [] filtered_read2_files = []
filtered_index_files = []
filtered_read1_files = sorted(filtered_read1_files) filtered_read1_files = sorted(filtered_read1_files)
filtered_read2_files = sorted(filtered_read2_files) filtered_read2_files = sorted(filtered_read2_files)
filtered_index_files = sorted(filtered_index_files)
else: else:
fastqilluminafilter = None fastqilluminafilter = None
filtered_read1_files = self.get_all_reads("read1") filtered_read1_files = self.get_all_reads("read1")
filtered_read2_files = self.get_all_reads("read2") filtered_read2_files = self.get_all_reads("read2")
filtered_index_files = self.get_all_reads("index")
# archive the files # archive the files
#TODO : if self.group_prefix == None, the create the output of fastqilluminafilter in the run.get_work_directory() #TODO : if self.group_prefix == None, the create the output of fastqilluminafilter in the run.get_work_directory()
saved_files = filtered_read1_files + filtered_read2_files + self.get_all_reads("index") saved_files = filtered_read1_files + filtered_read2_files + filtered_index_files
logging.getLogger("CasavaNG6Workflow").debug("illumina_process saved_files = " + ",".join(saved_files)) logging.getLogger("CasavaNG6Workflow").debug("illumina_process saved_files = " + ",".join(saved_files))
reads_prefixes = None reads_prefixes = None
if self.group_prefix != None : if self.group_prefix != None :
# concatenate fastq # concatenate fastq
reads_prefixes = list((Utils.get_group_basenames(saved_files, "read")).keys()) reads_prefixes = list((Utils.get_group_basenames(saved_files, "read")).keys())
logging.getLogger("CasavaNG6Workflow").debug("illumina_process read_predixes = " + ",".join(reads_prefixes))
logging.getLogger("CasavaNG6Workflow").debug("illumina_process saved_files = " + ",".join(saved_files)) logging.getLogger("CasavaNG6Workflow").debug("illumina_process saved_files = " + ",".join(saved_files))
concatenatefastq = self.add_component("ConcatenateFilesGroups", [self.runobj,saved_files,reads_prefixes]) concatenatefastq = self.add_component("ConcatenateFilesGroups", [self.runobj,saved_files,reads_prefixes])
saved_files = concatenatefastq.concat_files saved_files = concatenatefastq.concat_files
...@@ -700,8 +714,11 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -700,8 +714,11 @@ class CasavaNG6Workflow(NG6Workflow):
fastqilluminafilter = None fastqilluminafilter = None
filtered_read1_files = self.get_all_reads("read1") filtered_read1_files = self.get_all_reads("read1")
filtered_read2_files = self.get_all_reads("read2") filtered_read2_files = self.get_all_reads("read2")
filtered_index_files = self.get_all_reads("index")
saved_files = self.get_all_reads() saved_files = self.get_all_reads()
# reads prefixes
reads_prefixes =list((Utils.get_group_basenames(analysis_files, "read")).keys())
# add raw # add raw
addrawfiles = self.add_component("AddRawFiles", [self.runobj, saved_files, self.compression]) addrawfiles = self.add_component("AddRawFiles", [self.runobj, saved_files, self.compression])
contam = [] contam = []
...@@ -711,6 +728,9 @@ class CasavaNG6Workflow(NG6Workflow): ...@@ -711,6 +728,9 @@ class CasavaNG6Workflow(NG6Workflow):
contam.append(self.get_resource("yeast_bwa")) contam.append(self.get_resource("yeast_bwa"))
except : pass except : pass
logging.getLogger("CasavaNG6Workflow").debug("illumina_process files_analysis = " + ",".join(filtered_read1_files))
logging.getLogger("CasavaNG6Workflow").debug("illumina_process files_analysis = " + ",".join(filtered_read2_files))
logging.getLogger("CasavaNG6Workflow").debug("illumina_process files_analysis = " + ",".join(filtered_index_files))
# contamination_search # contamination_search
if contam : if contam :
if self.contamination_databank: contam.extend(self.contamination_databank) if self.contamination_databank: contam.extend(self.contamination_databank)
......
...@@ -290,7 +290,44 @@ class Utils(object): ...@@ -290,7 +290,44 @@ class Utils(object):
return [read_1_list, read_2_list] return [read_1_list, read_2_list]
@staticmethod
def split_pair_and_index ( file_list, is_casava=False ):
"""
Return the list of read 1, the list of read 2 and the list of index read from a list
@param file_list : the list
@param is_casava : files names in file_list are in CASVAVA format
"""
read_1_list = []
read_2_list = []
read_index_list = []
logging.getLogger("Utils").debug("split_pair_and_index. Entering")
if is_casava:
logging.getLogger("Utils").debug("split_pair_and_index. is_casava")
for file in file_list:
logging.getLogger("Utils").debug("split_pair_and_index. file = " + file)
basename_without_ext = os.path.basename(file).split(".")[0]
file_name_fields = basename_without_ext.split(Utils.CASAVA_FILENAME_SEPARATOR)
read_tag = file_name_fields[Utils.CASAVA_FILENAME['read']-1]
if read_tag == "R1":
read_1_list.append(file)
elif read_tag == "R2":
read_2_list.append(file)
else:
read_index_list.append(file)
else:
sorted_list = sorted( file_list )
logging.getLogger("Utils").debug("split_pair_and_index. file_list = " + ", ".join(file_list))
logging.getLogger("Utils").debug("split_pair_and_index. sorted_list = " + ", ".join(sorted_list))
for i in range(0,len(sorted_list),3):
logging.getLogger("Utils").debug("split_pair_and_index. sorted_list[i] = " + sorted_list[i])
logging.getLogger("Utils").debug("split_pair_and_index. sorted_list[i+1] = " + sorted_list[i+1])
logging.getLogger("Utils").debug("split_pair_and_index. sorted_list[i+1] = " + sorted_list[i+2])
read_1_list.append(sorted_list[i])
read_2_list.append(sorted_list[i+1])
read_index_list.append(sorted_list[i+2])
return [read_1_list, read_2_list, read_index_list]
@staticmethod @staticmethod
def get_group_basenames( file_list, group_by ): def get_group_basenames( file_list, group_by ):
""" """
......
...@@ -109,6 +109,8 @@ function get_casava_1_8_desc($string, $desc) { ...@@ -109,6 +109,8 @@ function get_casava_1_8_desc($string, $desc) {
$best_description = $description." (R1)"; $best_description = $description." (R1)";
} elseif (preg_match("/_R2/i", $string)) { } elseif (preg_match("/_R2/i", $string)) {
$best_description = $description." (R2)"; $best_description = $description." (R2)";
} elseif (preg_match("/_I1/i", $string)) {
$best_description = $description." (I1)";
} }
} }
} }
......
...@@ -37,4 +37,4 @@ class Illumina10XQualityCheck (CasavaNG6Workflow): ...@@ -37,4 +37,4 @@ class Illumina10XQualityCheck (CasavaNG6Workflow):
def process(self): def process(self):
fastqilluminafilter, filtered_read1_files, filtered_read2_files, concat_files = self.illumina_process() fastqilluminafilter, filtered_read1_files, filtered_read2_files, concat_files, concatenatefastq = self.illumina_process()
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment