home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- '''Pattern compiler.
-
- The grammer is taken from PatternGrammar.txt.
-
- The compiler compiles a pattern to a pytree.*Pattern instance.
- '''
- __author__ = 'Guido van Rossum <guido@python.org>'
- import os
- from pgen2 import driver
- from pgen2 import literals
- from pgen2 import token
- from pgen2 import tokenize
- from import pytree
- from import pygram
- _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), 'PatternGrammar.txt')
-
- def tokenize_wrapper(input):
- '''Tokenizes a string suppressing significant whitespace.'''
- skip = (token.NEWLINE, token.INDENT, token.DEDENT)
- tokens = tokenize.generate_tokens(driver.generate_lines(input).next)
- for quintuple in tokens:
- (type, value, start, end, line_text) = quintuple
- if type not in skip:
- yield quintuple
- continue
-
-
-
- class PatternCompiler(object):
-
- def __init__(self, grammar_file = _PATTERN_GRAMMAR_FILE):
- '''Initializer.
-
- Takes an optional alternative filename for the pattern grammar.
- '''
- self.grammar = driver.load_grammar(grammar_file)
- self.syms = pygram.Symbols(self.grammar)
- self.pygrammar = pygram.python_grammar
- self.pysyms = pygram.python_symbols
- self.driver = driver.Driver(self.grammar, convert = pattern_convert)
-
-
- def compile_pattern(self, input, debug = False):
- '''Compiles a pattern string to a nested pytree.*Pattern object.'''
- tokens = tokenize_wrapper(input)
- root = self.driver.parse_tokens(tokens, debug = debug)
- return self.compile_node(root)
-
-
- def compile_node(self, node):
- '''Compiles a node, recursively.
-
- This is one big switch on the node type.
- '''
- if node.type == self.syms.Matcher:
- node = node.children[0]
-
- if node.type == self.syms.Alternatives:
- alts = [ self.compile_node(ch) for ch in node.children[::2] ]
- if len(alts) == 1:
- return alts[0]
- p = []([ [
- a] for a in alts ], min = 1, max = 1)
- return p.optimize()
- if node.type == self.syms.Alternative:
- units = [ self.compile_node(ch) for ch in node.children ]
- if len(units) == 1:
- return units[0]
- p = pytree.WildcardPattern([
- units], min = 1, max = 1)
- return p.optimize()
- if node.type == self.syms.NegatedUnit:
- pattern = self.compile_basic(node.children[1:])
- p = pytree.NegatedPattern(pattern)
- return p.optimize()
- if not node.type == self.syms.Unit:
- raise AssertionError
- name = None
- nodes = node.children
- repeat = None
- pattern = self.compile_basic(nodes, repeat)
- if repeat is not None:
- if not repeat.type == self.syms.Repeater:
- raise AssertionError
- children = repeat.children
- child = children[0]
- if child.type == token.STAR:
- min = 0
- max = pytree.HUGE
- elif child.type == token.PLUS:
- min = 1
- max = pytree.HUGE
- elif child.type == token.LBRACE:
- if not children[-1].type == token.RBRACE:
- raise AssertionError
- if not len(children) in (3, 5):
- raise AssertionError
- if len(children) == 5:
- max = self.get_int(children[3])
-
- elif not False:
- raise AssertionError
- len(children) in (3, 5)
- if min != 1 or max != 1:
- pattern = pattern.optimize()
- pattern = pytree.WildcardPattern([
- [
- pattern]], min = min, max = max)
-
-
- if name is not None:
- pattern.name = name
-
- return pattern.optimize()
-
-
- def compile_basic(self, nodes, repeat = None):
- if not len(nodes) >= 1:
- raise AssertionError
- node = nodes[0]
- if node.type == token.STRING:
- value = literals.evalString(node.value)
- return pytree.LeafPattern(content = value)
- if node.type == token.NAME:
- value = node.value
- if value.isupper():
- if value not in TOKEN_MAP:
- raise SyntaxError('Invalid token: %r' % value)
- value not in TOKEN_MAP
- return pytree.LeafPattern(TOKEN_MAP[value])
- if value == 'any':
- type = None
- elif not value.startswith('_'):
- type = getattr(self.pysyms, value, None)
- if type is None:
- raise SyntaxError('Invalid symbol: %r' % value)
- type is None
-
- if nodes[1:]:
- content = [
- self.compile_node(nodes[1].children[1])]
- else:
- content = None
- return pytree.NodePattern(type, content)
- node.type == token.NAME
- if node.value == '(':
- return self.compile_node(nodes[1])
- if node.value == '[':
- if not repeat is None:
- raise AssertionError
- subpattern = self.compile_node(nodes[1])
- return pytree.WildcardPattern([
- [
- subpattern]], min = 0, max = 1)
- if not False:
- raise AssertionError, node
-
-
- def get_int(self, node):
- if not node.type == token.NUMBER:
- raise AssertionError
- return int(node.value)
-
-
- TOKEN_MAP = {
- 'NAME': token.NAME,
- 'STRING': token.STRING,
- 'NUMBER': token.NUMBER,
- 'TOKEN': None }
-
- def pattern_convert(grammar, raw_node_info):
- '''Converts raw node information to a Node or Leaf instance.'''
- (type, value, context, children) = raw_node_info
- if children or type in grammar.number2symbol:
- return pytree.Node(type, children, context = context)
- return pytree.Leaf(type, value, context = context)
-
-
- def compile_pattern(pattern):
- return PatternCompiler().compile_pattern(pattern)
-
-