Commit 66dcf0dc authored by Penom Nom's avatar Penom Nom
Browse files

- Fix bug in insert sizes parameters.

- Add error if the sample sheet is missing.
parent 8f3220b2
......@@ -39,67 +39,69 @@ class Casava18 (NG6Workflow):
self.runobj = Run(self.args["run_name"], self.args["run_date"], self.args["species"], self.args["data_nature"],
self.args["type"], self.args["run_description"], self.args["sequencer"])
# Is the run a multiplexed run
if os.path.isfile(os.path.join(self.args["casava_directory"], "SampleSheet.mk")):
barcodes = ""
sample_ids = ""
subdirs = ""
samples = []
# Retrieve all information for samples in lane
for line in open(os.path.join(self.args["casava_directory"], "SampleSheet.mk")).readlines():
# Retrieve barcodes
if line.startswith("l"+self.args["lane_number"]+"_BARCODES"):
parts = line.strip().split(":=")
barcodes_list = parts[1].split(" ")
for i in range(len(barcodes_list)):
samples.append({'barcode':barcodes_list[i]})
# Retrieve samples ids
elif line.startswith("l"+self.args["lane_number"]+"_SAMPLEIDS"):
parts = line.strip().split(":=")
sample_ids_list = parts[1].split(" ")
for i in range(len(sample_ids_list)):
samples[i]['sample_id'] = sample_ids_list[i]
# Retrieve folder
elif line.startswith("l"+self.args["lane_number"]+"_SUBDIRS"):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
for i in range(len(subdirs_list)):
samples[i]['subdir'] = subdirs_list[i]
# Filter on project name
aux_samples = []
for current_sample in samples:
if re.match("Project_" + self.project.get_name() + "/Sample_.+", current_sample['subdir']) is not None:
aux_samples.append(current_sample)
samples = aux_samples
if len(samples) == 0:
raise ValueError, "The project " + self.project.get_name() + " doesn't exist in CASVA directory."
# Create files lists
self.read1_files = []
self.read2_files = []
mids_desc_array = {}
for current_sample in samples:
if not current_sample['subdir'].startswith("Undetermined_indices"): # Skip the folder with the incorrect indexes
# Write line in the index description
if current_sample['barcode'] != "NoIndex":
mids_desc_array[current_sample['barcode']] = current_sample['sample_id']
# Write files lists
for file in os.listdir(self.args["casava_directory"] + "/" + current_sample['subdir']):
if file.endswith(".fastq.gz") and re.search(".*_L00" + self.args["lane_number"] + "_.*", file):
if re.search(".*_R1_.*", file):
self.read1_files.append(self.args["casava_directory"] + "/" + current_sample['subdir'] + "/" + file)
if re.search(".*_R2_.*", file):
self.read2_files.append(self.args["casava_directory"] + "/" + current_sample['subdir'] + "/" + file)
self.runobj.add_mids_description(mids_desc_array)
# then add the run to the project
self.project.add_run(self.runobj)
# Parse the sample sheet
if not os.path.isfile(os.path.join(self.args["casava_directory"], "SampleSheet.mk")):
raise ValueError, "The folder " + self.args["casava_directory"] + " doesn't contain the file SampleSheet.mk."
barcodes = ""
sample_ids = ""
subdirs = ""
samples = []
# Retrieve all information for samples in lane
for line in open(os.path.join(self.args["casava_directory"], "SampleSheet.mk")).readlines():
# Retrieve barcodes
if line.startswith("l"+self.args["lane_number"]+"_BARCODES"):
parts = line.strip().split(":=")
barcodes_list = parts[1].split(" ")
for i in range(len(barcodes_list)):
samples.append({'barcode':barcodes_list[i]})
# Retrieve samples ids
elif line.startswith("l"+self.args["lane_number"]+"_SAMPLEIDS"):
parts = line.strip().split(":=")
sample_ids_list = parts[1].split(" ")
for i in range(len(sample_ids_list)):
samples[i]['sample_id'] = sample_ids_list[i]
# Retrieve folder
elif line.startswith("l"+self.args["lane_number"]+"_SUBDIRS"):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
for i in range(len(subdirs_list)):
samples[i]['subdir'] = subdirs_list[i]
# Filter on project name
aux_samples = []
for current_sample in samples:
if re.match("Project_" + self.project.get_name() + "/Sample_.+", current_sample['subdir']) is not None:
aux_samples.append(current_sample)
samples = aux_samples
if len(samples) == 0:
raise ValueError, "The project " + self.project.get_name() + " doesn't exist in CASVA directory."
# Create files lists
self.read1_files = []
self.read2_files = []
mids_desc_array = {}
for current_sample in samples:
if not current_sample['subdir'].startswith("Undetermined_indices"): # Skip the folder with the incorrect indexes
# Write line in the index description
if current_sample['barcode'] != "NoIndex":
mids_desc_array[current_sample['barcode']] = current_sample['sample_id']
# Write files lists
for file in os.listdir(self.args["casava_directory"] + "/" + current_sample['subdir']):
if file.endswith(".fastq.gz") and re.search(".*_L00" + self.args["lane_number"] + "_.*", file):
if re.search(".*_R1_.*", file):
self.read1_files.append(self.args["casava_directory"] + "/" + current_sample['subdir'] + "/" + file)
if re.search(".*_R2_.*", file):
self.read2_files.append(self.args["casava_directory"] + "/" + current_sample['subdir'] + "/" + file)
self.runobj.add_mids_description(mids_desc_array)
# then add the run to the project
self.project.add_run(self.runobj)
def process(self):
......@@ -137,4 +139,4 @@ class Casava18 (NG6Workflow):
if len(self.read2_files) > 0:
# process insert sizes
insertssizes = self.add_component("InsertsSizes", [bwa.bam_files, self.args["histogram_width"], self.args["min_pct"], "inserts_sizes.tar.gz"], parent = bwa)
\ No newline at end of file
insertssizes = self.add_component("InsertsSizes", [bwa.bam_files, self.args["histogram_width"], self.args["min_pct"], "LENIENT", "inserts_sizes.tar.gz"], parent = bwa)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment