home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_841 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  3.2 KB  |  103 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. from charsetprober import CharSetProber
  5. import constants
  6. FINAL_KAF = '\xea'
  7. NORMAL_KAF = '\xeb'
  8. FINAL_MEM = '\xed'
  9. NORMAL_MEM = '\xee'
  10. FINAL_NUN = '\xef'
  11. NORMAL_NUN = '\xf0'
  12. FINAL_PE = '\xf3'
  13. NORMAL_PE = '\xf4'
  14. FINAL_TSADI = '\xf5'
  15. NORMAL_TSADI = '\xf6'
  16. MIN_FINAL_CHAR_DISTANCE = 5
  17. MIN_MODEL_DISTANCE = 0.01
  18. VISUAL_HEBREW_NAME = 'ISO-8859-8'
  19. LOGICAL_HEBREW_NAME = 'windows-1255'
  20.  
  21. class HebrewProber(CharSetProber):
  22.     
  23.     def __init__(self):
  24.         CharSetProber.__init__(self)
  25.         self._mLogicalProber = None
  26.         self._mVisualProber = None
  27.         self.reset()
  28.  
  29.     
  30.     def reset(self):
  31.         self._mFinalCharLogicalScore = 0
  32.         self._mFinalCharVisualScore = 0
  33.         self._mPrev = ' '
  34.         self._mBeforePrev = ' '
  35.  
  36.     
  37.     def set_model_probers(self, logicalProber, visualProber):
  38.         self._mLogicalProber = logicalProber
  39.         self._mVisualProber = visualProber
  40.  
  41.     
  42.     def is_final(self, c):
  43.         return c in [
  44.             FINAL_KAF,
  45.             FINAL_MEM,
  46.             FINAL_NUN,
  47.             FINAL_PE,
  48.             FINAL_TSADI]
  49.  
  50.     
  51.     def is_non_final(self, c):
  52.         return c in [
  53.             NORMAL_KAF,
  54.             NORMAL_MEM,
  55.             NORMAL_NUN,
  56.             NORMAL_PE]
  57.  
  58.     
  59.     def feed(self, aBuf):
  60.         if self.get_state() == constants.eNotMe:
  61.             return constants.eNotMe
  62.         aBuf = self.filter_high_bit_only(aBuf)
  63.         for cur in aBuf:
  64.             if cur == ' ':
  65.                 if self._mBeforePrev != ' ':
  66.                     if self.is_final(self._mPrev):
  67.                         self._mFinalCharLogicalScore += 1
  68.                     elif self.is_non_final(self._mPrev):
  69.                         self._mFinalCharVisualScore += 1
  70.                     
  71.                 
  72.             elif self._mBeforePrev == ' ' and self.is_final(self._mPrev) and cur != ' ':
  73.                 self._mFinalCharVisualScore += 1
  74.             
  75.             self._mBeforePrev = self._mPrev
  76.             self._mPrev = cur
  77.         
  78.         return constants.eDetecting
  79.  
  80.     
  81.     def get_charset_name(self):
  82.         finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
  83.         if finalsub >= MIN_FINAL_CHAR_DISTANCE:
  84.             return LOGICAL_HEBREW_NAME
  85.         if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
  86.             return VISUAL_HEBREW_NAME
  87.         modelsub = self._mLogicalProber.get_confidence() - self._mVisualProber.get_confidence()
  88.         if modelsub > MIN_MODEL_DISTANCE:
  89.             return LOGICAL_HEBREW_NAME
  90.         if modelsub < -MIN_MODEL_DISTANCE:
  91.             return VISUAL_HEBREW_NAME
  92.         if finalsub < 0:
  93.             return VISUAL_HEBREW_NAME
  94.         return LOGICAL_HEBREW_NAME
  95.  
  96.     
  97.     def get_state(self):
  98.         if self._mLogicalProber.get_state() == constants.eNotMe and self._mVisualProber.get_state() == constants.eNotMe:
  99.             return constants.eNotMe
  100.         return constants.eDetecting
  101.  
  102.  
  103.