home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_856 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  2.7 KB  |  73 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. from mbcharsetprober import MultiByteCharSetProber
  5. from codingstatemachine import CodingStateMachine
  6. from chardistribution import SJISDistributionAnalysis
  7. from jpcntx import SJISContextAnalysis
  8. from mbcssm import SJISSMModel
  9. import constants
  10. import sys
  11. from constants import eStart, eError, eItsMe
  12.  
  13. class SJISProber(MultiByteCharSetProber):
  14.     
  15.     def __init__(self):
  16.         MultiByteCharSetProber.__init__(self)
  17.         self._mCodingSM = CodingStateMachine(SJISSMModel)
  18.         self._mDistributionAnalyzer = SJISDistributionAnalysis()
  19.         self._mContextAnalyzer = SJISContextAnalysis()
  20.         self.reset()
  21.  
  22.     
  23.     def reset(self):
  24.         MultiByteCharSetProber.reset(self)
  25.         self._mContextAnalyzer.reset()
  26.  
  27.     
  28.     def get_charset_name(self):
  29.         return 'SHIFT_JIS'
  30.  
  31.     
  32.     def feed(self, aBuf):
  33.         aLen = len(aBuf)
  34.         for i in range(0, aLen):
  35.             codingState = self._mCodingSM.next_state(aBuf[i])
  36.             if codingState == eError:
  37.                 if constants._debug:
  38.                     sys.stderr.write(self.get_charset_name() + ' prober hit error at byte ' + str(i) + '\n')
  39.                 
  40.                 self._mState = constants.eNotMe
  41.                 break
  42.                 continue
  43.             if codingState == eItsMe:
  44.                 self._mState = constants.eFoundIt
  45.                 break
  46.                 continue
  47.             if codingState == eStart:
  48.                 charLen = self._mCodingSM.get_current_charlen()
  49.                 if i == 0:
  50.                     self._mLastChar[1] = aBuf[0]
  51.                     self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], charLen)
  52.                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
  53.                 else:
  54.                     self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 - charLen], charLen)
  55.                     self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
  56.             i == 0
  57.         
  58.         self._mLastChar[0] = aBuf[aLen - 1]
  59.         if self.get_state() == constants.eDetecting:
  60.             if self._mContextAnalyzer.got_enough_data() and self.get_confidence() > constants.SHORTCUT_THRESHOLD:
  61.                 self._mState = constants.eFoundIt
  62.             
  63.         
  64.         return self.get_state()
  65.  
  66.     
  67.     def get_confidence(self):
  68.         contxtCf = self._mContextAnalyzer.get_confidence()
  69.         distribCf = self._mDistributionAnalyzer.get_confidence()
  70.         return max(contxtCf, distribCf)
  71.  
  72.  
  73.