home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_1609 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  5.5 KB  |  166 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. __all__ = [
  5.     'Tokenizer',
  6.     'CSSProductions']
  7. __docformat__ = 'restructuredtext'
  8. __version__ = '$Id: tokenize2.py 1865 2009-10-11 15:23:11Z cthedot $'
  9. from cssproductions import *
  10. from helper import normalize
  11. import itertools
  12. import re
  13.  
  14. class Tokenizer(object):
  15.     _atkeywords = {
  16.         u'@font-face': CSSProductions.FONT_FACE_SYM,
  17.         u'@import': CSSProductions.IMPORT_SYM,
  18.         u'@media': CSSProductions.MEDIA_SYM,
  19.         u'@namespace': CSSProductions.NAMESPACE_SYM,
  20.         u'@page': CSSProductions.PAGE_SYM,
  21.         u'@variables': CSSProductions.VARIABLES_SYM }
  22.     _linesep = u'\n'
  23.     unicodesub = re.compile('\\\\[0-9a-fA-F]{1,6}(?:\\r\\n|[\\t|\\r|\\n|\\f|\\x20])?').sub
  24.     cleanstring = re.compile('\\\\((\\r\\n)|[\\n|\\r|\\f])').sub
  25.     
  26.     def __init__(self, macros = None, productions = None):
  27.         if not macros:
  28.             macros = MACROS
  29.         
  30.         if not productions:
  31.             productions = PRODUCTIONS
  32.         
  33.         self.tokenmatches = self._compile_productions(self._expand_macros(macros, productions))
  34.         self.commentmatcher = _[1][0]
  35.         self.urimatcher = _[2][0]
  36.         self._pushed = []
  37.  
  38.     
  39.     def _expand_macros(self, macros, productions):
  40.         
  41.         def macro_value(m):
  42.             return '(?:%s)' % macros[m.groupdict()['macro']]
  43.  
  44.         expanded = []
  45.         for key, value in productions:
  46.             while re.search('{[a-zA-Z][a-zA-Z0-9-]*}', value):
  47.                 value = re.sub('{(?P<macro>[a-zA-Z][a-zA-Z0-9-]*)}', macro_value, value)
  48.                 continue
  49.                 (None,)
  50.             expanded.append((key, value))
  51.         
  52.         return expanded
  53.  
  54.     
  55.     def _compile_productions(self, expanded_productions):
  56.         compiled = []
  57.         for key, value in expanded_productions:
  58.             compiled.append((key, re.compile('^(?:%s)' % value, re.U).match))
  59.         
  60.         return compiled
  61.  
  62.     
  63.     def push(self, *tokens):
  64.         self._pushed = itertools.chain(tokens, self._pushed)
  65.  
  66.     
  67.     def clear(self):
  68.         self._pushed = []
  69.  
  70.     
  71.     def tokenize(self, text, fullsheet = False):
  72.         
  73.         def _repl(m):
  74.             num = int(m.group(0)[1:], 16)
  75.             if num < 65536:
  76.                 return unichr(num)
  77.             return m.group(0)
  78.  
  79.         
  80.         def _normalize(value):
  81.             return normalize(self.unicodesub(_repl, value))
  82.  
  83.         (BOM, matcher) = self.tokenmatches[0]
  84.         productions = self.tokenmatches[1:]
  85.         match = matcher(text)
  86.         if match:
  87.             found = match.group(0)
  88.             yield (BOM, found, line, col)
  89.             line = col = 1
  90.             text = text[len(found):]
  91.         
  92.         if text.startswith('@charset '):
  93.             found = '@charset '
  94.             yield (CSSProductions.CHARSET_SYM, found, line, col)
  95.             text = text[len(found):]
  96.             col += len(found)
  97.         
  98.         while text:
  99.             for pushed in self._pushed:
  100.                 yield pushed
  101.             
  102.             c = text[0]
  103.             if c in '{}:;,':
  104.                 yield ('CHAR', c, line, col)
  105.                 col += 1
  106.                 text = text[1:]
  107.                 continue
  108.             for name, matcher in productions:
  109.                 if fullsheet and name == 'CHAR' and text.startswith(u'/*'):
  110.                     possiblecomment = u'%s*/' % text
  111.                     match = self.commentmatcher(possiblecomment)
  112.                     if match:
  113.                         yield ('COMMENT', possiblecomment, line, col)
  114.                         text = None
  115.                         break
  116.                     
  117.                 
  118.                 match = matcher(text)
  119.                 if match:
  120.                     found = match.group(0)
  121.                     if fullsheet:
  122.                         if 'INVALID' == name and text == found:
  123.                             name = 'STRING'
  124.                             found = '%s%s' % (found, found[0])
  125.                         elif 'FUNCTION' == name and u'url(' == _normalize(found):
  126.                             for end in (u"')", u'")', u')'):
  127.                                 possibleuri = '%s%s' % (text, end)
  128.                                 match = self.urimatcher(possibleuri)
  129.                                 if match:
  130.                                     name = 'URI'
  131.                                     found = match.group(0)
  132.                                     break
  133.                                     continue
  134.                             
  135.                         
  136.                     
  137.                     if name in ('DIMENSION', 'IDENT', 'STRING', 'URI', 'HASH', 'COMMENT', 'FUNCTION', 'INVALID', 'UNICODE-RANGE'):
  138.                         value = self.unicodesub(_repl, found)
  139.                         if name in ('STRING', 'INVALID'):
  140.                             value = self.cleanstring('', found)
  141.                         
  142.                     elif 'ATKEYWORD' == name:
  143.                         if '@charset' == found and ' ' == text[len(found):len(found) + 1]:
  144.                             name = CSSProductions.CHARSET_SYM
  145.                             found += ' '
  146.                         else:
  147.                             name = self._atkeywords.get(_normalize(found), 'ATKEYWORD')
  148.                     
  149.                     value = found
  150.                     yield (name, value, line, col)
  151.                     text = text[len(found):]
  152.                     nls = found.count(self._linesep)
  153.                     line += nls
  154.                     if nls:
  155.                         col = len(found[found.rfind(self._linesep):])
  156.                     else:
  157.                         col += len(found)
  158.                     break
  159.                     continue
  160.             
  161.         if fullsheet:
  162.             yield ('EOF', u'', line, col)
  163.         
  164.  
  165.  
  166.