Commit 9b9e0e65 authored by ckuchly's avatar ckuchly

#143 : modified files who is analysed by fastqc, contaminationSearch and

FastqIlluminaFilter.
parent 12ce6464
......@@ -446,6 +446,7 @@ class CasavaNG6Workflow(NG6Workflow):
if self.is_casava:
self.group_prefix = list((Utils.get_group_basenames(self.get_all_reads(), "read")).keys())
logging.getLogger("ng6").debug("CasavaNG6Workflow._preprocess enter" + str(self.group_prefix))
def _process_casava_18(self, casava_directory, project_name, lane_number, input_files):
logging.getLogger("ng6").debug("CasavaNG6Workflow._process_casava_18 enter")
......@@ -643,10 +644,14 @@ class CasavaNG6Workflow(NG6Workflow):
concatenatefastq = None
filtered_read1_files = []
filtered_read2_files = []
filtered_index_files = []
analysis_files = []
saved_files = []
logging.getLogger("ng6").debug("illumina_process entering")
if self.is_casava :
logging.getLogger("ng6").debug("illumina_process self.is_casava")
analysis_files = self.get_all_reads("read1") + self.get_all_reads("read2")
if len(self.log_files) > 0 :
add_log = self.add_component("BasicAnalysis", [self.log_files,"Log Files","Log files generated during primary analysis","-","-","-","gz", "","log.gz"])
......@@ -657,39 +662,48 @@ class CasavaNG6Workflow(NG6Workflow):
demultiplex_stats = self.add_component("Demultiplex10XStats", [self.get_all_reads("read1"), self.undetermined_reads1, self.get_files_index("read1")])
else :
demultiplex_stats = self.add_component("DemultiplexStats", [self.get_all_reads("read1"), self.undetermined_reads1])
#analysis files for fastq illumina and fastqc analysis
if self.keep_reads != "all" :
logging.getLogger("ng6").debug("illumina_process self.keep_reads != all")
logging.getLogger("ng6").debug("illumina_process BEFORE FASTQILLUMINAFILTER self.get_all_reads() = " + ",".join(self.get_all_reads()))
logging.getLogger("ng6").debug("illumina_process self.group_prefix = " + ",".join(self.group_prefix))
# fastq illumina filter
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.runobj,self.get_all_reads(), self.keep_reads, self.group_prefix])
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.runobj, self.get_all_reads(), self.keep_reads, self.group_prefix])
logging.getLogger("ng6").debug("illumina_process fastqilluminafilter = " + ",".join(filtered_read1_files))
# list filtered files
if self.is_paired_end() :
# split read 1 and read 2 from filtered files list
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (self.group_prefix is not None))
if self.is_10Xcasava :
[filtered_read1_files, filtered_read2_files, filtered_index_files] = Utils.split_pair_and_index(fastqilluminafilter.fastq_files_filtered, (self.group_prefix is not None))
else:
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, (self.group_prefix is not None))
else:
filtered_read1_files = fastqilluminafilter.fastq_files_filtered
filtered_read2_files = []
filtered_index_files = []
filtered_read1_files = sorted(filtered_read1_files)
filtered_read2_files = sorted(filtered_read2_files)
filtered_index_files = sorted(filtered_index_files)
else:
fastqilluminafilter = None
filtered_read1_files = self.get_all_reads("read1")
filtered_read2_files = self.get_all_reads("read2")
filtered_index_files = self.get_all_reads("index")
# archive the files
#TODO : if self.group_prefix == None, the create the output of fastqilluminafilter in the run.get_work_directory()
saved_files = filtered_read1_files + filtered_read2_files + self.get_all_reads("index")
saved_files = filtered_read1_files + filtered_read2_files + filtered_index_files
logging.getLogger("CasavaNG6Workflow").debug("illumina_process saved_files = " + ",".join(saved_files))
reads_prefixes = None
if self.group_prefix != None :
# concatenate fastq
reads_prefixes = list((Utils.get_group_basenames(saved_files, "read")).keys())
logging.getLogger("CasavaNG6Workflow").debug("illumina_process read_predixes = " + ",".join(reads_prefixes))
logging.getLogger("CasavaNG6Workflow").debug("illumina_process saved_files = " + ",".join(saved_files))
concatenatefastq = self.add_component("ConcatenateFilesGroups", [self.runobj,saved_files,reads_prefixes])
saved_files = concatenatefastq.concat_files
......@@ -700,8 +714,11 @@ class CasavaNG6Workflow(NG6Workflow):
fastqilluminafilter = None
filtered_read1_files = self.get_all_reads("read1")
filtered_read2_files = self.get_all_reads("read2")
filtered_index_files = self.get_all_reads("index")
saved_files = self.get_all_reads()
# reads prefixes
reads_prefixes =list((Utils.get_group_basenames(analysis_files, "read")).keys())
# add raw
addrawfiles = self.add_component("AddRawFiles", [self.runobj, saved_files, self.compression])
contam = []
......@@ -711,6 +728,9 @@ class CasavaNG6Workflow(NG6Workflow):
contam.append(self.get_resource("yeast_bwa"))
except : pass
logging.getLogger("CasavaNG6Workflow").debug("illumina_process files_analysis = " + ",".join(filtered_read1_files))
logging.getLogger("CasavaNG6Workflow").debug("illumina_process files_analysis = " + ",".join(filtered_read2_files))
logging.getLogger("CasavaNG6Workflow").debug("illumina_process files_analysis = " + ",".join(filtered_index_files))
# contamination_search
if contam :
if self.contamination_databank: contam.extend(self.contamination_databank)
......
......@@ -290,7 +290,44 @@ class Utils(object):
return [read_1_list, read_2_list]
@staticmethod
def split_pair_and_index ( file_list, is_casava=False ):
"""
Return the list of read 1, the list of read 2 and the list of index read from a list
@param file_list : the list
@param is_casava : files names in file_list are in CASVAVA format
"""
read_1_list = []
read_2_list = []
read_index_list = []
logging.getLogger("Utils").debug("split_pair_and_index. Entering")
if is_casava:
logging.getLogger("Utils").debug("split_pair_and_index. is_casava")
for file in file_list:
logging.getLogger("Utils").debug("split_pair_and_index. file = " + file)
basename_without_ext = os.path.basename(file).split(".")[0]
file_name_fields = basename_without_ext.split(Utils.CASAVA_FILENAME_SEPARATOR)
read_tag = file_name_fields[Utils.CASAVA_FILENAME['read']-1]
if read_tag == "R1":
read_1_list.append(file)
elif read_tag == "R2":
read_2_list.append(file)
else:
read_index_list.append(file)
else:
sorted_list = sorted( file_list )
logging.getLogger("Utils").debug("split_pair_and_index. file_list = " + ", ".join(file_list))
logging.getLogger("Utils").debug("split_pair_and_index. sorted_list = " + ", ".join(sorted_list))
for i in range(0,len(sorted_list),3):
logging.getLogger("Utils").debug("split_pair_and_index. sorted_list[i] = " + sorted_list[i])
logging.getLogger("Utils").debug("split_pair_and_index. sorted_list[i+1] = " + sorted_list[i+1])
logging.getLogger("Utils").debug("split_pair_and_index. sorted_list[i+1] = " + sorted_list[i+2])
read_1_list.append(sorted_list[i])
read_2_list.append(sorted_list[i+1])
read_index_list.append(sorted_list[i+2])
return [read_1_list, read_2_list, read_index_list]
@staticmethod
def get_group_basenames( file_list, group_by ):
"""
......
......@@ -109,6 +109,8 @@ function get_casava_1_8_desc($string, $desc) {
$best_description = $description." (R1)";
} elseif (preg_match("/_R2/i", $string)) {
$best_description = $description." (R2)";
} elseif (preg_match("/_I1/i", $string)) {
$best_description = $description." (I1)";
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment