# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb, line 134 def close return if @done if not @_mGotData $stderr << "no data received!\n" if $debug return end @done = true if @_mInputState == EPureAscii @result = {'encoding' => 'ascii', 'confidence' => 1.0} return @result end if @_mInputState == EHighbyte confidences = {} @_mCharSetProbers.each{ |prober| confidences[prober] = prober.get_confidence } maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] } if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD @result = {'encoding' => maxProber.get_charset_name(), 'confidence' => maxProber.get_confidence()} return @result end end if $debug $stderr << "no probers hit minimum threshhold\n" if $debug for prober in @_mCharSetProbers[0]._mProbers next if not prober $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug end end end
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb, line 61 def feed(aBuf) return if @done aLen = aBuf.length return if not aLen if not @_mGotData # If the data starts with BOM, we know it is UTF if aBuf[0...3] == "\xEF\xBB\xBF" # EF BB BF UTF-8 with BOM @result = {'encoding' => "UTF-8", 'confidence' => 1.0} elsif aBuf[0...4] == "\xFF\xFE\x00\x00" # FF FE 00 00 UTF-32, little-endian BOM @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0} elsif aBuf[0...4] == "\x00\x00\xFE\xFF" # 00 00 FE FF UTF-32, big-endian BOM @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0} elsif aBuf[0...4] == "\xFE\xFF\x00\x00" # FE FF 00 00 UCS-4, unusual octet order BOM (3412) @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0} elsif aBuf[0...4] == "\x00\x00\xFF\xFE" # 00 00 FF FE UCS-4, unusual octet order BOM (2143) @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0} elsif aBuf[0...2] == "\xFF\xFE" # FF FE UTF-16, little endian BOM @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0} elsif aBuf[0...2] == "\xFE\xFF" # FE FF UTF-16, big endian BOM @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0} end end @_mGotData = true if @result['encoding'] and (@result['confidence'] > 0.0) @done = true return end if @_mInputState == EPureAscii if @_highBitDetector =~ (aBuf) @_mInputState = EHighbyte elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf) @_mInputState = EEscAscii end end @_mLastChar = aBuf[-1..-1] if @_mInputState == EEscAscii if not @_mEscCharSetProber @_mEscCharSetProber = EscCharSetProber.new() end if @_mEscCharSetProber.feed(aBuf) == EFoundIt @result = {'encoding' => self._mEscCharSetProber.get_charset_name(), 'confidence' => @_mEscCharSetProber.get_confidence() } @done = true end elsif @_mInputState == EHighbyte if not @_mCharSetProbers or @_mCharSetProbers.empty? @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()] end for prober in @_mCharSetProbers if prober.feed(aBuf) == EFoundIt @result = {'encoding' => prober.get_charset_name(), 'confidence' => prober.get_confidence()} @done = true break end end end end
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb, line 46 def reset @result = {'encoding' => nil, 'confidence' => 0.0} @done = false @_mStart = true @_mGotData = false @_mInputState = EPureAscii @_mLastChar = '' if @_mEscCharSetProber @_mEscCharSetProber.reset() end for prober in @_mCharSetProbers prober.reset() end end
Generated with the Darkfish Rdoc Generator 2.