home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_858 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  2.2 KB  |  64 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. import constants
  5. from constants import eStart, eError, eItsMe
  6. from charsetprober import CharSetProber
  7. from codingstatemachine import CodingStateMachine
  8. from mbcssm import UTF8SMModel
  9. ONE_CHAR_PROB = 0.5
  10.  
  11. class UTF8Prober(CharSetProber):
  12.     
  13.     def __init__(self):
  14.         CharSetProber.__init__(self)
  15.         self._mCodingSM = CodingStateMachine(UTF8SMModel)
  16.         self.reset()
  17.  
  18.     
  19.     def reset(self):
  20.         CharSetProber.reset(self)
  21.         self._mCodingSM.reset()
  22.         self._mNumOfMBChar = 0
  23.  
  24.     
  25.     def get_charset_name(self):
  26.         return 'utf-8'
  27.  
  28.     
  29.     def feed(self, aBuf):
  30.         for c in aBuf:
  31.             codingState = self._mCodingSM.next_state(c)
  32.             if codingState == eError:
  33.                 self._mState = constants.eNotMe
  34.                 break
  35.                 continue
  36.             if codingState == eItsMe:
  37.                 self._mState = constants.eFoundIt
  38.                 break
  39.                 continue
  40.             if codingState == eStart:
  41.                 if self._mCodingSM.get_current_charlen() >= 2:
  42.                     self._mNumOfMBChar += 1
  43.                 
  44.             self._mCodingSM.get_current_charlen() >= 2
  45.         
  46.         if self.get_state() == constants.eDetecting:
  47.             if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
  48.                 self._mState = constants.eFoundIt
  49.             
  50.         
  51.         return self.get_state()
  52.  
  53.     
  54.     def get_confidence(self):
  55.         unlike = 0.99
  56.         if self._mNumOfMBChar < 6:
  57.             for i in range(0, self._mNumOfMBChar):
  58.                 unlike = unlike * ONE_CHAR_PROB
  59.             
  60.             return 1 - unlike
  61.         return unlike
  62.  
  63.  
  64.