Commit dc70596f authored by Penom Nom's avatar Penom Nom
Browse files

correction import highchart. code cleaning

parent 9b7ddd8c
......@@ -22,6 +22,7 @@ import sys
import pickle
import datetime
import argparse
import xml.etree.ElementTree as ET
from jflow.workflow import Workflow
from jflow.utils import display_error_message
......@@ -240,31 +241,62 @@ class NG6Workflow (BasicNG6Workflow):
elif self.project:
self.project.sync()
def get_files_from_casava(casava_directory, lane_number, project_name):
def get_files_from_casava(casava_directory, project_name, lane_number):
"""
Retrieve all fastq files of a specific project and lane number from a given casava directory
@param casava_directory : path to CASAVA output directory
@param project_name : project name
@param lane_number : lane number
"""
files = []
with open(os.path.join(casava_directory, "SampleSheet.mk")) as fh :
barcodes_list = []
sample_ids_list = []
subdirs_list = []
for line in fh :
if line.startswith("l" + str(lane_number) + "_SUBDIRS"):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
# parse samples
for subdir in subdirs_list:
# filter on project name
if re.match("Project_" + project_name + "/Sample_.+", subdir) or subdir.startswith("Undetermined_indices"):
for file in os.listdir(casava_directory + "/" + subdir):
filepath = casava_directory + "/" + subdir + "/" + file
if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane_number) + "_.*", file):
files.append(filepath);
return files
def bcl2fastq_18(directory, pname, lane):
"""bcl2fastq <= 1.8"""
files = []
with open(os.path.join(directory, "SampleSheet.mk")) as fh :
subdirs_list = []
for line in fh :
if line.startswith("l" + str(lane) + "_SUBDIRS"):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
# parse samples
for subdir in subdirs_list:
# filter on project name
if re.match("Project_" + pname + "/Sample_.+", subdir) or subdir.startswith("Undetermined_indices"):
for file in os.listdir(directory + "/" + subdir):
filepath = directory + "/" + subdir + "/" + file
if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane) + "_.*", file):
files.append(filepath);
return files
def bcl2fastq_216(directory, pname, lane):
"""bcl2fastq >= 1.9"""
files = []
tree = ET.parse( os.path.join( directory, 'Stats', 'DemultiplexingStats.xml'))
root = tree.getroot()
project = root.find(".//Project[@name='%s']"%pname)
if project is not None :
project_files = os.listdir(directory + "/" + pname)
for sample in project.findall("./Sample") :
if sample.get('name') != 'all' :
for barcode in sample.findall('./Barcode'):
if barcode.get('name') != 'all':
lnum = int(barcode.find('Lane').get('number'))
if lnum == lane :
fileregexp = '%s_S\d_L%03d_'%(sample.get('name'), lnum)
for pfile in project_files :
if re.match(fileregexp,pfile) :
files.append(os.path.join(directory,pname,pfile))
return files
if os.path.exists(os.path.join(casava_directory, "SampleSheet.mk")) :
return bcl2fastq_18(casava_directory, project_name, lane_number)
elif os.path.exists(os.path.join( casava_directory, 'Stats', 'DemultiplexingStats.xml')) :
return bcl2fastq_216(casava_directory, project_name, lane_number)
class CasavaNG6Workflow(NG6Workflow):
......@@ -299,7 +331,7 @@ class CasavaNG6Workflow(NG6Workflow):
def __create_samples__(self):
"""
Parse SampleSheep.mk file from a casava directory and return a list of Sample object
Create samples object from a casava directory if provided
@param casava_directory : path to CASAVA output directory
@param lane_number : files in each sample are sequenced on this lane
"""
......@@ -310,72 +342,23 @@ class CasavaNG6Workflow(NG6Workflow):
lane_number = self.casava["lane"]
all_samples, all_samples_id = [], []
# open casava samplesheet again to associate our files with a sample
with open(os.path.join(casava_directory, "SampleSheet.mk")) as fh :
barcodes_list = []
sample_ids_list = []
subdirs_list = []
for line in fh :
if line.startswith("l" + str(lane_number) + "_BARCODES"):
parts = line.strip().split(":=")
barcodes_list = [ re.sub( r"[-_\s]+", "", x) for x in parts[1].split() ]
elif line.startswith("l" + str(lane_number) + "_SAMPLEIDS" ):
parts = line.strip().split(":=")
sample_ids_list = parts[1].split(" ")
elif line.startswith("l" + str(lane_number) + "_SUBDIRS"):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
assert len(barcodes_list) == len(sample_ids_list) == len(subdirs_list), "Invalid lane {0} in SampleSheet.mk".format(lane_number)
# get the casava project_name
if self.casava["project"] :
project_name = self.casava["project"]
else :
project_name = self.project_name
project_name = project_name.replace(" ", "_")
input_files = casava_directory.get_files( lane_number, project_name)
if len(input_files) == 0 :
raise Exception("Error while parsing casava directory %s, invalid project name '%s' for lane %s"% (casava_directory, project_name, lane_number))
# parse samples
for i in range(len(barcodes_list)):
sample = {
'barcode' : barcodes_list[i],
'sample_id' : sample_ids_list[i],
'subdir' : subdirs_list[i],
'reads1' : [],
'reads2' : []
}
# filter on project name
if re.match("Project_" + project_name + "/Sample_.+", sample['subdir']) or sample['subdir'].startswith("Undetermined_indices"):
for file in os.listdir(casava_directory + "/" + sample['subdir']):
filepath = casava_directory + "/" + sample['subdir'] + "/" + file
if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane_number) + "_.*", file):
for idx, iofile in enumerate(input_files) :
if iofile == filepath :
if re.search(".*_R1_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['reads1'].append(iofile)
else:
self.undetermined_reads1.append(iofile)
if re.search(".*_R2_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['reads2'].append(iofile)
else:
self.undetermined_reads2.append(iofile)
input_files.pop(idx)
break
if not sample['subdir'].startswith("Undetermined_indices") :
sp_object = Sample(sample['barcode'], sample['reads1'], reads2 = sample['reads2'], name=sample['sample_id'])
sp_object.add_metadata('barcode', sample['barcode'])
sp_object.add_metadata('is_casava', True)
all_samples.append(sp_object)
all_samples_id.append(sample['sample_id'])
# get the casava project_name
if self.casava["project"] :
project_name = self.casava["project"]
else :
project_name = self.project_name
project_name = project_name.replace(" ", "_")
input_files = casava_directory.get_files( project_name, lane_number)
if len(input_files) == 0 :
raise Exception("Error while parsing casava directory %s, invalid project name '%s' for lane %s"% (casava_directory, project_name, lane_number))
all_samples, all_samples_id = [], []
if os.path.exists(os.path.join(casava_directory, "SampleSheet.mk")) :
all_samples, all_samples_id = self._process_casava_18(casava_directory, project_name, lane_number, input_files)
elif os.path.exists(os.path.join( casava_directory, 'Stats', 'DemultiplexingStats.xml')) :
all_samples, all_samples_id = self._process_casava_216(casava_directory, project_name, lane_number, input_files)
selected_samples = self.casava['select_sample_id']
if selected_samples :
......@@ -395,6 +378,84 @@ class CasavaNG6Workflow(NG6Workflow):
NG6Workflow.__preprocess_samples__(self)
if self.is_casava:
self.group_prefix = (Utils.get_group_basenames(self.get_all_reads(), "read")).keys()
def _process_casava_18(self, casava_directory, project_name, lane_number, input_files):
"""
Creates samples from casavadir (<=1.8) using input files
@param casava_directory:
@param project_name:
@param lane_number:
@param input_files:
"""
all_samples = []
all_samples_id = []
# open casava samplesheet again to associate our files with a sample
with open(os.path.join(casava_directory, "SampleSheet.mk")) as fh :
barcodes_list = []
sample_ids_list = []
subdirs_list = []
for line in fh :
if line.startswith("l" + str(lane_number) + "_BARCODES"):
parts = line.strip().split(":=")
barcodes_list = [ re.sub( r"[-_\s]+", "", x) for x in parts[1].split() ]
elif line.startswith("l" + str(lane_number) + "_SAMPLEIDS" ):
parts = line.strip().split(":=")
sample_ids_list = parts[1].split(" ")
elif line.startswith("l" + str(lane_number) + "_SUBDIRS"):
parts = line.strip().split(":=")
subdirs_list = parts[1].split(" ")
assert len(barcodes_list) == len(sample_ids_list) == len(subdirs_list), "Invalid lane {0} in SampleSheet.mk".format(lane_number)
# parse samples
for i in range(len(barcodes_list)):
sample = {
'barcode' : barcodes_list[i],
'sample_id' : sample_ids_list[i],
'subdir' : subdirs_list[i],
'reads1' : [],
'reads2' : []
}
# filter on project name
if re.match("Project_" + project_name + "/Sample_.+", sample['subdir']) or sample['subdir'].startswith("Undetermined_indices"):
for file in os.listdir(casava_directory + "/" + sample['subdir']):
filepath = casava_directory + "/" + sample['subdir'] + "/" + file
if file.endswith(".fastq.gz") and re.search(".*_L00" + str(lane_number) + "_.*", file):
for idx, iofile in enumerate(input_files) :
if iofile == filepath :
if re.search(".*_R1_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['reads1'].append(iofile)
else:
self.undetermined_reads1.append(iofile)
if re.search(".*_R2_.*", file):
if not sample['subdir'].startswith("Undetermined_indices"):
sample['reads2'].append(iofile)
else:
self.undetermined_reads2.append(iofile)
input_files.pop(idx)
break
if not sample['subdir'].startswith("Undetermined_indices") :
sp_object = Sample(sample['barcode'], sample['reads1'], reads2 = sample['reads2'], name=sample['sample_id'])
sp_object.add_metadata('barcode', sample['barcode'])
sp_object.add_metadata('is_casava', True)
all_samples.append(sp_object)
all_samples_id.append(sample['sample_id'])
return all_samples, all_samples_id
def _process_casava_216(self,casava_directory, project_name, lane_number, input_files):
"""
Creates samples from casavadir (>=1.9) using input files
@param casava_directory:
@param project_name:
@param lane_number:
@param input_files:
"""
raise NotImplementedError
def illumina_process(self):
if self.is_casava :
......
......@@ -60,9 +60,9 @@ class tx_nG6_pi6 extends tslib_pibase {
// Add the jquery libs + the tree plugins and its css
$GLOBALS['TSFE']->additionalHeaderData[$this->prefixId] = '
<script type="text/javascript" src="'.t3lib_extMgm::siteRelPath($this->extKey).'res/js/jquery.min.js"></script>
<script type="text/javascript" src="'.t3lib_extMgm::siteRelPath($this->extKey).'res/js/highstock.js"></script>
<script type="text/javascript" src="'.t3lib_extMgm::siteRelPath($this->extKey).'res/js/jquery.highcharts.exporting.js"></script>
<script type="text/javascript" src="'.t3lib_extMgm::siteRelPath($this->extKey).'res/js/jquery.dataTables.min.js"></script>
<script type="text/javascript" src="'.t3lib_extMgm::siteRelPath($this->extKey).'res/js/highstock.js"></script>
<script type="text/javascript" src="'.t3lib_extMgm::siteRelPath($this->extKey).'res/js/bootstrap.min.js"></script>
<script type="text/javascript" src="'.t3lib_extMgm::siteRelPath($this->extKey).'res/js/dataTables.bootstrap.js"></script>
<script type="text/javascript" src="'.t3lib_extMgm::siteRelPath($this->extKey).'res/js/typeahead.bundle.js"></script>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment