home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- __all__ = [
- 'Tokenizer',
- 'CSSProductions']
- __docformat__ = 'restructuredtext'
- __version__ = '$Id: tokenize2.py 1865 2009-10-11 15:23:11Z cthedot $'
- from cssproductions import *
- from helper import normalize
- import itertools
- import re
-
- class Tokenizer(object):
- _atkeywords = {
- u'@font-face': CSSProductions.FONT_FACE_SYM,
- u'@import': CSSProductions.IMPORT_SYM,
- u'@media': CSSProductions.MEDIA_SYM,
- u'@namespace': CSSProductions.NAMESPACE_SYM,
- u'@page': CSSProductions.PAGE_SYM,
- u'@variables': CSSProductions.VARIABLES_SYM }
- _linesep = u'\n'
- unicodesub = re.compile('\\\\[0-9a-fA-F]{1,6}(?:\\r\\n|[\\t|\\r|\\n|\\f|\\x20])?').sub
- cleanstring = re.compile('\\\\((\\r\\n)|[\\n|\\r|\\f])').sub
-
- def __init__(self, macros = None, productions = None):
- if not macros:
- macros = MACROS
-
- if not productions:
- productions = PRODUCTIONS
-
- self.tokenmatches = self._compile_productions(self._expand_macros(macros, productions))
- self.commentmatcher = _[1][0]
- self.urimatcher = _[2][0]
- self._pushed = []
-
-
- def _expand_macros(self, macros, productions):
-
- def macro_value(m):
- return '(?:%s)' % macros[m.groupdict()['macro']]
-
- expanded = []
- for key, value in productions:
- while re.search('{[a-zA-Z][a-zA-Z0-9-]*}', value):
- value = re.sub('{(?P<macro>[a-zA-Z][a-zA-Z0-9-]*)}', macro_value, value)
- continue
- (None,)
- expanded.append((key, value))
-
- return expanded
-
-
- def _compile_productions(self, expanded_productions):
- compiled = []
- for key, value in expanded_productions:
- compiled.append((key, re.compile('^(?:%s)' % value, re.U).match))
-
- return compiled
-
-
- def push(self, *tokens):
- self._pushed = itertools.chain(tokens, self._pushed)
-
-
- def clear(self):
- self._pushed = []
-
-
- def tokenize(self, text, fullsheet = False):
-
- def _repl(m):
- num = int(m.group(0)[1:], 16)
- if num < 65536:
- return unichr(num)
- return m.group(0)
-
-
- def _normalize(value):
- return normalize(self.unicodesub(_repl, value))
-
- (BOM, matcher) = self.tokenmatches[0]
- productions = self.tokenmatches[1:]
- match = matcher(text)
- if match:
- found = match.group(0)
- yield (BOM, found, line, col)
- line = col = 1
- text = text[len(found):]
-
- if text.startswith('@charset '):
- found = '@charset '
- yield (CSSProductions.CHARSET_SYM, found, line, col)
- text = text[len(found):]
- col += len(found)
-
- while text:
- for pushed in self._pushed:
- yield pushed
-
- c = text[0]
- if c in '{}:;,':
- yield ('CHAR', c, line, col)
- col += 1
- text = text[1:]
- continue
- for name, matcher in productions:
- if fullsheet and name == 'CHAR' and text.startswith(u'/*'):
- possiblecomment = u'%s*/' % text
- match = self.commentmatcher(possiblecomment)
- if match:
- yield ('COMMENT', possiblecomment, line, col)
- text = None
- break
-
-
- match = matcher(text)
- if match:
- found = match.group(0)
- if fullsheet:
- if 'INVALID' == name and text == found:
- name = 'STRING'
- found = '%s%s' % (found, found[0])
- elif 'FUNCTION' == name and u'url(' == _normalize(found):
- for end in (u"')", u'")', u')'):
- possibleuri = '%s%s' % (text, end)
- match = self.urimatcher(possibleuri)
- if match:
- name = 'URI'
- found = match.group(0)
- break
- continue
-
-
-
- if name in ('DIMENSION', 'IDENT', 'STRING', 'URI', 'HASH', 'COMMENT', 'FUNCTION', 'INVALID', 'UNICODE-RANGE'):
- value = self.unicodesub(_repl, found)
- if name in ('STRING', 'INVALID'):
- value = self.cleanstring('', found)
-
- elif 'ATKEYWORD' == name:
- if '@charset' == found and ' ' == text[len(found):len(found) + 1]:
- name = CSSProductions.CHARSET_SYM
- found += ' '
- else:
- name = self._atkeywords.get(_normalize(found), 'ATKEYWORD')
-
- value = found
- yield (name, value, line, col)
- text = text[len(found):]
- nls = found.count(self._linesep)
- line += nls
- if nls:
- col = len(found[found.rfind(self._linesep):])
- else:
- col += len(found)
- break
- continue
-
- if fullsheet:
- yield ('EOF', u'', line, col)
-
-
-
-