Class/Module Index [+]

Quicksearch

CharDet::SingleByteCharSetProber

NEGATIVE_CAT = 0

Public Class Methods

new(model, reversed=false, nameProber=nil) click to toggle source
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb, line 41
def initialize(model, reversed=false, nameProber=nil)
  super()
  @_mModel = model
  @_mReversed = reversed # TRUE if we need to reverse every pair in the model lookup
  @_mNameProber = nameProber # Optional auxiliary prober for name decision
  reset()
end

Public Instance Methods

feed(aBuf) click to toggle source
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb, line 66
def feed(aBuf)
  if not @_mModel['keepEnglishLetter']
    aBuf = filter_without_english_letters(aBuf)
  end
  aLen = aBuf.length
  if not aLen
    return get_state()
  end
  aBuf.each_byte do |b|
    c = b.chr
    order = @_mModel['charToOrderMap'][c[0]]
    if order < SYMBOL_CAT_ORDER
      @_mTotalChar += 1
    end
    if order < SAMPLE_SIZE
      @_mFreqChar += 1
      if @_mLastOrder < SAMPLE_SIZE
        @_mTotalSeqs += 1
        if not @_mReversed
          @_mSeqCounters[@_mModel['precedenceMatrix'][(@_mLastOrder * SAMPLE_SIZE) + order]] += 1
        else # reverse the order of the letters in the lookup
          @_mSeqCounters[@_mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + @_mLastOrder]] += 1
        end
      end
    end
    @_mLastOrder = order
  end

  if get_state() == EDetecting
    if @_mTotalSeqs > SB_ENOUGH_REL_THRESHOLD
      cf = get_confidence()
      if cf > POSITIVE_SHORTCUT_THRESHOLD
        $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
        @_mState = EFoundIt
      elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
        $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
        @_mState = ENotMe
      end
    end
  end

  return get_state()
end
get_charset_name() click to toggle source
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb, line 58
def get_charset_name
  if @_mNameProber
    return @_mNameProber.get_charset_name()
  else
    return @_mModel['charsetName']
  end
end
get_confidence() click to toggle source
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb, line 110
def get_confidence
  r = 0.01
  if @_mTotalSeqs > 0
    #            print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
    r = (1.0 * @_mSeqCounters[POSITIVE_CAT]) / @_mTotalSeqs / @_mModel['mTypicalPositiveRatio']
    #            print r, self._mFreqChar, self._mTotalChar
    r = r * @_mFreqChar / @_mTotalChar
    if r >= 1.0
      r = 0.99
    end
  end
  return r
end
reset() click to toggle source
# File lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb, line 49
def reset
  super()
  @_mLastOrder = 255 # char order of last character
  @_mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
  @_mTotalSeqs = 0
  @_mTotalChar = 0
  @_mFreqChar = 0 # characters that fall in our sampling range
end

[Validate]

Generated with the Darkfish Rdoc Generator 2.