Commit c7997a10 authored by Gerald Salin's avatar Gerald Salin
Browse files

add barcode option and visualisation

parent 09e84fbd
......@@ -14,7 +14,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import logging
from ng6.ng6workflow import NG6Workflow
from ng6.utils import Utils
......@@ -27,6 +27,7 @@ class PacBioQualityCheck (NG6Workflow):
return "PacBio data loading and quality check"
def define_parameters(self, function="process"):
logging.getLogger("jflow").debug("Begin PacBioQualityCheck.define_parameters! pacbio_qc")
self.add_parameter("nb_threads", "Number of threads to use for fastqc. Each thread will be allocated 250MB of memory.", default=3)
self.add_parameter("min_subreads_length", "Subreads shorter than this value (in base pairs) are filtered out and excluded from analysis", default=0, type='int')
self.add_parameter("polymerase_read_qual", "Polymerase reads with lower quality than this value are filtered out and excluded from analysis", default=0, type='float')
......@@ -35,12 +36,12 @@ class PacBioQualityCheck (NG6Workflow):
self.add_input_file( "barcode_file", "Input barcode file", default=None)
self.add_parameter("barcode_score", "Min identical base for barcode", default=22, type='int')
def process(self):
logging.getLogger("jflow").debug("Begin PacBioQualityCheck.process! test pacbio_qc")
sample_names = []
infiles = []
for sample in self.samples :
sample_names.append( sample.name )
infiles.append(sample.reads1[0])
add_pacbio_raw_file = self.add_component("AddPacBioRawFiles", [self.runobj, self.get_all_reads()])
h5tofastq = self.add_component("H5toFastq", [sample_names, infiles])
fastqc = self.add_component("FastQC", [h5tofastq.output_fastqs, False, False, "fastqc.tar.gz", self.nb_threads], parent = h5tofastq)
......
......@@ -105,8 +105,12 @@ def rs_subreads(inputfile, stdout, componentdir, smrtpipe, fofnToSmrtpipeInput,
subprocess.check_call("%s %s > %s"%(fofnToSmrtpipeInput, inputs_fofn, inputs_xml), shell = True)
# run smrtpipe
logging.getLogger("jflow").debug("Begin rs_subreads! smrtpipe ")
logging.getLogger("jflow").debug("-------> %s --output=%s --params=%s xml:%s >> %s"%(smrtpipe, outputdir, settings_xml, inputs_xml,stdout))
subprocess.check_call("%s --output=%s --params=%s xml:%s >> %s"%(smrtpipe, outputdir, settings_xml, inputs_xml,stdout), shell = True)
logging.getLogger("jflow").debug("End rs_subreads! smrtpipe")
def extract_metrics(inputfile):
import h5py
......@@ -141,7 +145,7 @@ class RS_Subreads (Analysis):
self.add_output_file_list( 'stdouts', "logs", pattern="{basename}.stdout", items=items)
def process(self):
logging.getLogger("jflow").debug("Begin RS_Subreads.process!")
logging.getLogger("jflow").debug("Begin RS_Subreads.process! pacbio_qc")
subreads = PythonFunction(rs_subreads, cmd_format="{EXE} {IN} {OUT} {ARG}")
for i,e in enumerate(self.input_files) :
logging.getLogger("jflow").debug("Begin RS_Subreads.process! " + e)
......@@ -159,7 +163,7 @@ class RS_Subreads (Analysis):
arguments = [self.output_directory, self.get_exec_path("smrtpipe"), self.get_exec_path("fofnToSmrtpipeInput.py"),
self.min_subreads_length, self.polymerase_read_qual, self.polymerase_read_length, self.barcode_file, self.barcode_score ])
logging.getLogger("jflow").debug("End RS_Subreads.process! ")
print('END PROCESS')
print('END PROCESS TEST')
def get_version(self):
return "1.0"
......@@ -171,17 +175,16 @@ class RS_Subreads (Analysis):
self.options = "minSubReadLength %s readScore %s minLength %s"%(self.min_subreads_length, self.polymerase_read_qual, self.polymerase_read_length)
def post_process(self):
logging.getLogger("jflow").debug("Begin RS_Subreads.post_process!")
metrics = []
metrics2 = []
results_files = []
for i,samplefile in enumerate(self.input_files) :
sample = self.sample_names[i]
sdir = os.path.basename(os.path.splitext(os.path.splitext(samplefile)[0])[0])
sample_outdir = os.path.join(self.output_directory, sdir)
'''print ('sample -------> ',sample,'\n')
print ('self.sample_names -------> ',self.sample_names,'\n')'''
# loading
jsonfile = os.path.join(sample_outdir, 'results', 'filter_reports_loading.json')
if os.path.isfile(jsonfile):
......@@ -258,8 +261,20 @@ class RS_Subreads (Analysis):
# Barcode report
jsonfile = os.path.join(sample_outdir, 'results', 'barcode_report.json')
if os.path.isfile(jsonfile):
logging.getLogger("jflow").debug("Begin RS_Subreads.post_process Barcode report")
group = 'barcode_results'
#Finaly create and add the archive to the analysis
logging.getLogger("jflow").debug("Begin RS_Subreads.post_process Barcode report START archive")
results_files.append(self.barcode_file)
logging.getLogger("jflow").debug("barcode_file : " + self.barcode_file)
demultiplex_fasta_tar = os.path.join(sample_outdir,'data','barcoded-fastqs.tgz')
logging.getLogger("jflow").debug("demultiplex_fasta_tar : " + demultiplex_fasta_tar)
results_files.append(demultiplex_fasta_tar)
self._create_and_archive(results_files,"Archive_Barcode")
if group not in metrics2 :
metrics2.append(group)
self._add_result_element("metrics2", "headers", ','.join(['Reads', 'Bases']), group)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment