Skip to content
Snippets Groups Projects

Fix bug while reading fastq.gz files generated by recent samtools

Merged Florian Plaza-Onate requested to merge fix_samtools_gzip into master
1 file
+ 74
97
Compare changes
  • Side-by-side
  • Inline
+ 74
97
@@ -767,40 +767,6 @@ class MeteorSession
#------------------------------------------------------------------------------
# count fastq reads
def CountReadFastqFile(aInputFile)
aLineCount = aReadCount = aBaseCount = 0
fd = nil
in_fq = nil
# open input fastq in read mode
begin
fd = File.open(aInputFile)
# fastq may be gziped
in_fq = aInputFile =~ /gz$/ ? Zlib::GzipReader.new(fd) : fd
rescue
STDERR.puts "Error: cannot open #{aInputFile} !"
exit 1
end
# read input fastq line by line
in_fq.each do |line|
aLineCount += 1
aModulo = aLineCount % 4
#if line =~ /^@/
if aModulo == 1 # read id line
aReadCount += 1
else
aBaseCount += line.size - 1 if aModulo == 0 # size - 1 because of line ending character
end
end
in_fq.close if not in_fq.closed?
return [aReadCount, aBaseCount]
end
#------------------------------------------------------------------------------
def CountReadAndReIndexCsfastaQualFiles(aInputCsFastaFile, aInputQualFile, aOutputCsFastaFile, aOutputCsQualFile)
aReadCount = aBaseCount = 0
@@ -857,70 +823,81 @@ class MeteorSession
#------------------------------------------------------------------------------
def CountReadAndReIndexFastqFile(aInputFile, aOutputFile)
aInputLineCount = aReadCount = aBaseCount = aCpt = 0
adn = qual = prev = seqId = nil
fd = nil
in_fq = nil
# open input fastq in read mode
begin
fd = File.open(aInputFile)
# fastq may be gzipped, xz or bz2
in_fq = aInputFile =~ /gz$/ ? Zlib::GzipReader.new(fd) : fd
rescue
STDERR.puts "Error: cannot open #{aInputFile} !"
exit 1
end
# open output fastq in write mode
File.open(aOutputFile, "w") do |out_fq|
# read input fastq line by line
in_fq.each do |line|
line.chomp!
# line begins with @seqid
if (line =~ /^@(.+)$/)
## check if the previous read is valid (quality exists and same size as adn)
aQualSize = (qual.nil?) ? 0 : qual.size
if (aQualSize > 0)
if ( (not adn.nil?) and aQualSize == adn.size)
aReadCount += 1
aBaseCount += aQualSize
# then write this indexed read
out_fq.print "@#{aReadCount}\n#{adn}\n+\n#{qual}\n"
adn = nil
end
end
seqId = $1
## NB: quality line might start with @
end
aCpt += 1
if (line == "+" or line == "+#{seqId}")
qual = nil
aCpt = 3
# previous line was adn
adn = prev
end
if aCpt == 4
qual = line
end
prev = line
end
fd.close if not fd.closed?
#in_fq.close if not in_fq.closed?
# evaluate last read
aQualSize = (qual.nil?) ? 0 : qual.size
if (aQualSize > 0)
if ( (not adn.nil?) and aQualSize == adn.size)
aReadCount += 1
aBaseCount += aQualSize
out_fq.print "@#{aReadCount}\n#{adn}\n+\n#{qual}\n"
end
end
end
return [aReadCount, aBaseCount]
aInputLineCount = aReadCount = aBaseCount = aCpt = 0
adn = qual = prev = seqId = nil
fd = nil
in_fq = nil
# open input fastq in read mode
begin
fd = File.open(aInputFile)
rescue
STDERR.puts "Error: cannot open #{aInputFile} !"
exit 1
end
# fastq may be gzipped
is_gzip = aInputFile =~ /gz$/
# open output fastq in write mode
File.open(aOutputFile, "w") do |out_fq|
while not fd.eof
in_fq = is_gzip ? Zlib::GzipReader.new(fd) : fd
# read input fastq line by line
in_fq.each do |line|
line.chomp!
# line begins with @seqid
if (line =~ /^@(.+)$/)
## check if the previous read is valid (quality exists and same size as adn)
aQualSize = (qual.nil?) ? 0 : qual.size
if (aQualSize > 0)
if ( (not adn.nil?) and aQualSize == adn.size)
aReadCount += 1
aBaseCount += aQualSize
# then write this indexed read
out_fq.print "@#{aReadCount}\n#{adn}\n+\n#{qual}\n"
adn = nil
end
end
seqId = $1
## NB: quality line might start with @
end
aCpt += 1
if (line == "+" or line == "+#{seqId}")
qual = nil
aCpt = 3
# previous line was adn
adn = prev
end
if aCpt == 4
qual = line
end
prev = line
end
# fix bug of gzip file concatenating multiple streams
if not fd.eof
unused = in_fq.unused
in_fq.finish
fd.pos -= unused ? unused.length : 0
end
end
fd.close if not fd.closed?
# evaluate last read
aQualSize = (qual.nil?) ? 0 : qual.size
if (aQualSize > 0)
if ( (not adn.nil?) and aQualSize == adn.size)
aReadCount += 1
aBaseCount += aQualSize
out_fq.print "@#{aReadCount}\n#{adn}\n+\n#{qual}\n"
end
end
end
return [aReadCount, aBaseCount]
end
#------------------------------------------------------------------------------
Loading