Commit ac44d6e2 authored by Jerome Mariette's avatar Jerome Mariette
Browse files

No commit message

No commit message
parent e6fc97a7
......@@ -209,7 +209,24 @@ class VCFReader(_AbstractFeatureReader):
self.fp = file
self.wholefile = wholefile
self.samples_name=[]
self._init_sample_names()
def _init_sample_names(self):
for line in self.fp :
if line.startswith('#') :
if line.startswith('#CHROM') :
row = line.rstrip().split('\t')
if len(row) <= 9 :
raise FormatError( 'Invalid number of columns in your vcf header file {0}'.format(len(row)) )
for i in range(9, len(row)) :
self.samples_name.append( ( row[i] , os.path.splitext(os.path.basename(row[i]))[0] ) )
break
else :
raise FormatError( 'The vcf file {0}must start with header lines (#) !!!'.format(self.fp.name) )
self.fp.seek(0,0)
if len(self.samples_name) < 0 :
raise FormatError( "Invalid VCF file {0}. Could not retrieve the sample names headers".format(self.fp.name) )
def _process_line(self,line):
row = line.rstrip().split('\t')
variation = Entry(**{
......@@ -234,7 +251,7 @@ class VCFReader(_AbstractFeatureReader):
format = row[8].split(':')
for lib_infos in range (9,len(row)) :
if row[lib_infos] != '.' and row[lib_infos] != './.' :
if row[lib_infos] not in [ '.', './.'] :
sformat = row[lib_infos].split(':')
variation.samples.append( Entry(**{ autocast(format[i]) : autocast(sformat[i]) for i in range(0,len(format)) }) )
else :
......@@ -244,30 +261,16 @@ class VCFReader(_AbstractFeatureReader):
def _streaming_iter(self):
for line in self.fp :
if line.startswith('#') :
if line.startswith('#CHROM') :
row = line.rstrip().split('\t')
if len(row) <= 9 : raise FormatError( 'Invalid number of columns in your vcf header file {0}'.format(len(row)) )
for i in range(9, len(row)) :
self.samples_name.append( ( row[i] , os.path.splitext(os.path.basename(row[i]))[0] ) )
continue
yield self._process_line(line)
def _wholefile_iter(self):
wholefile = self.fp.read()
assert '\r' not in wholefile, "Sorry, currently don't know how to deal with files that contain \\r linebreaks"
assert len(wholefile) == 0 , "Empty VCF file"
for line in wholefile.split('\n') :
if line.startswith('#') :
if line.startswith('#CHROM') :
row = line.rstrip().split('\t')
if len(row) <= 9 : raise FormatError( 'Invalid number of columns in your vcf header file {0}'.format(len(row)) )
for i in range(9, len(row)) :
self.samples_name.append( ( row[i] , os.path.basename(row[i]) ) )
continue
yield self._process_line(line)
class MpileupReader(_AbstractFeatureReader):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment