# # Copyright (C) 2012 INRA # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # from .lightpbcoreio import BasH5Reader from jflow.seqio import Sequence import numpy as np import os class PacbioH5Reader(object): """ Reader for pacbio movie file """ def __init__(self, file): """ @param file: either the .bas.h5 or one of the three .bax.h5 files """ parts = [] analysisresults_dir = os.path.dirname(file) celldir = os.path.dirname(analysisresults_dir) if file.endswith(".bax.h5"): for each in os.listdir(analysisresults_dir) : fullpath = os.path.join(analysisresults_dir, each) if each.endswith('.bax.h5') : parts.append(fullpath) elif each.endswith('.bas.h5') : parts = [fullpath] break elif file.endswith(".bas.h5"): parts = [file] self.bash5 = BasH5Reader(*parts) self.metadata = None for meta in os.listdir(celldir) : metafile = os.path.join(celldir, meta) if os.path.isfile(metafile) and meta.endswith(".metadata.xml"): self.metadata = metafile break def qvsFromAscii(self,s): return (np.fromstring(s, dtype=np.uint8) - 33) def asciiFromQvs(self,a): return (np.clip(a, 0, 93).astype(np.uint8) + 33).tostring() def __iter__(self): for zmwRead in self.bash5.subreads(): yield Sequence(zmwRead.readName, zmwRead.readName, zmwRead.basecalls(), self.asciiFromQvs(zmwRead.QualityValue())) def h5file(ifile): try : reader = PacbioH5Reader(ifile) nb_seq = 0 for id, desc, seq, qualities in reader: nb_seq += 1 if nb_seq == 10: break except : raise jflow.InvalidFormatError("The provided file '" + ifile + "' is not a valid pacbio h5 file!")