home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_857 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  4.0 KB  |  126 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. import constants
  5. import sys
  6. from latin1prober import Latin1Prober
  7. from mbcsgroupprober import MBCSGroupProber
  8. from sbcsgroupprober import SBCSGroupProber
  9. from escprober import EscCharSetProber
  10. import re
  11. MINIMUM_THRESHOLD = 0.2
  12. ePureAscii = 0
  13. eEscAscii = 1
  14. eHighbyte = 2
  15.  
  16. class UniversalDetector:
  17.     
  18.     def __init__(self):
  19.         self._highBitDetector = re.compile('[\\x80-\\xFF]')
  20.         self._escDetector = re.compile('(\\033|~{)')
  21.         self._mEscCharSetProber = None
  22.         self._mCharSetProbers = []
  23.         self.reset()
  24.  
  25.     
  26.     def reset(self):
  27.         self.result = {
  28.             'encoding': None,
  29.             'confidence': 0 }
  30.         self.done = constants.False
  31.         self._mStart = constants.True
  32.         self._mGotData = constants.False
  33.         self._mInputState = ePureAscii
  34.         self._mLastChar = ''
  35.         if self._mEscCharSetProber:
  36.             self._mEscCharSetProber.reset()
  37.         
  38.         for prober in self._mCharSetProbers:
  39.             prober.reset()
  40.         
  41.  
  42.     
  43.     def feed(self, aBuf):
  44.         if self.done:
  45.             return None
  46.         aLen = len(aBuf)
  47.         if not aLen:
  48.             return None
  49.         self._mGotData = constants.True
  50.         if self.result['encoding'] and self.result['confidence'] > 0:
  51.             self.done = constants.True
  52.             return None
  53.         self._mLastChar = aBuf[-1]
  54.         if self._mInputState == eEscAscii:
  55.             if not self._mEscCharSetProber:
  56.                 self._mEscCharSetProber = EscCharSetProber()
  57.             
  58.             if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
  59.                 self.result = {
  60.                     'encoding': self._mEscCharSetProber.get_charset_name(),
  61.                     'confidence': self._mEscCharSetProber.get_confidence() }
  62.                 self.done = constants.True
  63.             
  64.         elif self._mInputState == eHighbyte:
  65.             if not self._mCharSetProbers:
  66.                 self._mCharSetProbers = [
  67.                     MBCSGroupProber(),
  68.                     SBCSGroupProber(),
  69.                     Latin1Prober()]
  70.             
  71.             for prober in self._mCharSetProbers:
  72.                 if prober.feed(aBuf) == constants.eFoundIt:
  73.                     self.result = {
  74.                         'encoding': prober.get_charset_name(),
  75.                         'confidence': prober.get_confidence() }
  76.                     self.done = constants.True
  77.                     break
  78.                     continue
  79.             
  80.         
  81.  
  82.     
  83.     def close(self):
  84.         if self.done:
  85.             return None
  86.         if not self._mGotData:
  87.             if constants._debug:
  88.                 sys.stderr.write('no data received!\n')
  89.             
  90.             return None
  91.         self.done = constants.True
  92.         if self._mInputState == ePureAscii:
  93.             self.result = {
  94.                 'encoding': 'ascii',
  95.                 'confidence': 1 }
  96.             return self.result
  97.         if self._mInputState == eHighbyte:
  98.             proberConfidence = None
  99.             maxProberConfidence = 0
  100.             maxProber = None
  101.             for prober in self._mCharSetProbers:
  102.                 proberConfidence = prober.get_confidence()
  103.                 if proberConfidence > maxProberConfidence:
  104.                     maxProberConfidence = proberConfidence
  105.                     maxProber = prober
  106.                     continue
  107.                 self.done if not prober else self._mGotData
  108.             
  109.             if maxProber and maxProberConfidence > MINIMUM_THRESHOLD:
  110.                 self.result = {
  111.                     'encoding': maxProber.get_charset_name(),
  112.                     'confidence': maxProber.get_confidence() }
  113.                 return self.result
  114.         
  115.         if constants._debug:
  116.             sys.stderr.write('no probers hit minimum threshhold\n')
  117.             for prober in self._mCharSetProbers[0].mProbers:
  118.                 if not prober:
  119.                     continue
  120.                 
  121.                 sys.stderr.write('%s confidence = %s\n' % (prober.get_charset_name(), prober.get_confidence()))
  122.             
  123.         
  124.  
  125.  
  126.