home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_834 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  2.7 KB  |  73 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. import constants
  5. import sys
  6. from constants import eStart, eError, eItsMe
  7. from mbcharsetprober import MultiByteCharSetProber
  8. from codingstatemachine import CodingStateMachine
  9. from chardistribution import EUCJPDistributionAnalysis
  10. from jpcntx import EUCJPContextAnalysis
  11. from mbcssm import EUCJPSMModel
  12.  
  13. class EUCJPProber(MultiByteCharSetProber):
  14.     
  15.     def __init__(self):
  16.         MultiByteCharSetProber.__init__(self)
  17.         self._mCodingSM = CodingStateMachine(EUCJPSMModel)
  18.         self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
  19.         self._mContextAnalyzer = EUCJPContextAnalysis()
  20.         self.reset()
  21.  
  22.     
  23.     def reset(self):
  24.         MultiByteCharSetProber.reset(self)
  25.         self._mContextAnalyzer.reset()
  26.  
  27.     
  28.     def get_charset_name(self):
  29.         return 'EUC-JP'
  30.  
  31.     
  32.     def feed(self, aBuf):
  33.         aLen = len(aBuf)
  34.         for i in range(0, aLen):
  35.             codingState = self._mCodingSM.next_state(aBuf[i])
  36.             if codingState == eError:
  37.                 if constants._debug:
  38.                     sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n')
  39.                 
  40.                 self._mState = constants.eNotMe
  41.                 break
  42.                 continue
  43.             if codingState == eItsMe:
  44.                 self._mState = constants.eFoundIt
  45.                 break
  46.                 continue
  47.             if codingState == eStart:
  48.                 charLen = self._mCodingSM.get_current_charlen()
  49.                 if i == 0:
  50.                     self._mLastChar[1] = aBuf[0]
  51.                     self._mContextAnalyzer.feed(self._mLastChar, charLen)
  52.                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
  53.                 else:
  54.                     self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
  55.                     self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
  56.             i == 0
  57.         
  58.         self._mLastChar[0] = aBuf[aLen - 1]
  59.         if self.get_state() == constants.eDetecting:
  60.             if self._mContextAnalyzer.got_enough_data() and self.get_confidence() > constants.SHORTCUT_THRESHOLD:
  61.                 self._mState = constants.eFoundIt
  62.             
  63.         
  64.         return self.get_state()
  65.  
  66.     
  67.     def get_confidence(self):
  68.         contxtCf = self._mContextAnalyzer.get_confidence()
  69.         distribCf = self._mDistributionAnalyzer.get_confidence()
  70.         return max(contxtCf, distribCf)
  71.  
  72.  
  73.