Skip to content
Snippets Groups Projects
Commit bf0578d9 authored by Florian Plaza Oñate's avatar Florian Plaza Oñate
Browse files

Fix bug while reading fastq.gz files generated by recent samtools

parent b5c43c03
No related branches found
No related tags found
1 merge request!1Fix bug while reading fastq.gz files generated by recent samtools
......@@ -767,40 +767,6 @@ class MeteorSession
#------------------------------------------------------------------------------
# count fastq reads
def CountReadFastqFile(aInputFile)
aLineCount = aReadCount = aBaseCount = 0
fd = nil
in_fq = nil
# open input fastq in read mode
begin
fd = File.open(aInputFile)
# fastq may be gziped
in_fq = aInputFile =~ /gz$/ ? Zlib::GzipReader.new(fd) : fd
rescue
STDERR.puts "Error: cannot open #{aInputFile} !"
exit 1
end
# read input fastq line by line
in_fq.each do |line|
aLineCount += 1
aModulo = aLineCount % 4
#if line =~ /^@/
if aModulo == 1 # read id line
aReadCount += 1
else
aBaseCount += line.size - 1 if aModulo == 0 # size - 1 because of line ending character
end
end
in_fq.close if not in_fq.closed?
return [aReadCount, aBaseCount]
end
#------------------------------------------------------------------------------
def CountReadAndReIndexCsfastaQualFiles(aInputCsFastaFile, aInputQualFile, aOutputCsFastaFile, aOutputCsQualFile)
aReadCount = aBaseCount = 0
......@@ -857,70 +823,81 @@ class MeteorSession
#------------------------------------------------------------------------------
def CountReadAndReIndexFastqFile(aInputFile, aOutputFile)
aInputLineCount = aReadCount = aBaseCount = aCpt = 0
adn = qual = prev = seqId = nil
fd = nil
in_fq = nil
# open input fastq in read mode
begin
fd = File.open(aInputFile)
# fastq may be gzipped, xz or bz2
in_fq = aInputFile =~ /gz$/ ? Zlib::GzipReader.new(fd) : fd
rescue
STDERR.puts "Error: cannot open #{aInputFile} !"
exit 1
end
# open output fastq in write mode
File.open(aOutputFile, "w") do |out_fq|
# read input fastq line by line
in_fq.each do |line|
line.chomp!
# line begins with @seqid
if (line =~ /^@(.+)$/)
## check if the previous read is valid (quality exists and same size as adn)
aQualSize = (qual.nil?) ? 0 : qual.size
if (aQualSize > 0)
if ( (not adn.nil?) and aQualSize == adn.size)
aReadCount += 1
aBaseCount += aQualSize
# then write this indexed read
out_fq.print "@#{aReadCount}\n#{adn}\n+\n#{qual}\n"
adn = nil
end
end
seqId = $1
## NB: quality line might start with @
end
aCpt += 1
if (line == "+" or line == "+#{seqId}")
qual = nil
aCpt = 3
# previous line was adn
adn = prev
end
if aCpt == 4
qual = line
end
prev = line
end
fd.close if not fd.closed?
#in_fq.close if not in_fq.closed?
# evaluate last read
aQualSize = (qual.nil?) ? 0 : qual.size
if (aQualSize > 0)
if ( (not adn.nil?) and aQualSize == adn.size)
aReadCount += 1
aBaseCount += aQualSize
out_fq.print "@#{aReadCount}\n#{adn}\n+\n#{qual}\n"
end
end
end
return [aReadCount, aBaseCount]
aInputLineCount = aReadCount = aBaseCount = aCpt = 0
adn = qual = prev = seqId = nil
fd = nil
in_fq = nil
# open input fastq in read mode
begin
fd = File.open(aInputFile)
rescue
STDERR.puts "Error: cannot open #{aInputFile} !"
exit 1
end
# fastq may be gzipped
is_gzip = aInputFile =~ /gz$/
# open output fastq in write mode
File.open(aOutputFile, "w") do |out_fq|
while not fd.eof
in_fq = is_gzip ? Zlib::GzipReader.new(fd) : fd
# read input fastq line by line
in_fq.each do |line|
line.chomp!
# line begins with @seqid
if (line =~ /^@(.+)$/)
## check if the previous read is valid (quality exists and same size as adn)
aQualSize = (qual.nil?) ? 0 : qual.size
if (aQualSize > 0)
if ( (not adn.nil?) and aQualSize == adn.size)
aReadCount += 1
aBaseCount += aQualSize
# then write this indexed read
out_fq.print "@#{aReadCount}\n#{adn}\n+\n#{qual}\n"
adn = nil
end
end
seqId = $1
## NB: quality line might start with @
end
aCpt += 1
if (line == "+" or line == "+#{seqId}")
qual = nil
aCpt = 3
# previous line was adn
adn = prev
end
if aCpt == 4
qual = line
end
prev = line
end
# fix bug of gzip file concatenating multiple streams
if not fd.eof
unused = in_fq.unused
in_fq.finish
fd.pos -= unused ? unused.length : 0
end
end
fd.close if not fd.closed?
# evaluate last read
aQualSize = (qual.nil?) ? 0 : qual.size
if (aQualSize > 0)
if ( (not adn.nil?) and aQualSize == adn.size)
aReadCount += 1
aBaseCount += aQualSize
out_fq.print "@#{aReadCount}\n#{adn}\n+\n#{qual}\n"
end
end
end
return [aReadCount, aBaseCount]
end
#------------------------------------------------------------------------------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment