home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- import re
- from lxml import etree
- __all__ = [
- 'SelectorSyntaxError',
- 'ExpressionError',
- 'CSSSelector']
-
- try:
- _basestring = basestring
- except NameError:
- _basestring = str
-
-
- class SelectorSyntaxError(SyntaxError):
- pass
-
-
- class ExpressionError(RuntimeError):
- pass
-
-
- class CSSSelector(etree.XPath):
-
- def __init__(self, css):
- path = css_to_xpath(css)
- etree.XPath.__init__(self, path)
- self.css = css
-
-
- def __repr__(self):
- return '<%s %s for %r>' % (self.__class__.__name__, hex(abs(id(self)))[2:], self.css)
-
-
-
- try:
- _unicode = unicode
- _unichr = unichr
- except NameError:
- _unicode = str
- _unichr = chr
-
-
- class _UniToken(_unicode):
-
- def __new__(cls, contents, pos):
- obj = _unicode.__new__(cls, contents)
- obj.pos = pos
- return obj
-
-
- def __repr__(self):
- return '%s(%s, %r)' % (self.__class__.__name__, _unicode.__repr__(self), self.pos)
-
-
-
- class Symbol(_UniToken):
- pass
-
-
- class String(_UniToken):
- pass
-
-
- class Token(_UniToken):
- pass
-
-
- class Class(object):
-
- def __init__(self, selector, class_name):
- self.selector = selector
- self.class_name = class_name
-
-
- def __repr__(self):
- return '%s[%r.%s]' % (self.__class__.__name__, self.selector, self.class_name)
-
-
- def xpath(self):
- sel_xpath = self.selector.xpath()
- sel_xpath.add_condition("contains(concat(' ', normalize-space(@class), ' '), %s)" % xpath_literal(' ' + self.class_name + ' '))
- return sel_xpath
-
-
-
- class Function(object):
- unsupported = [
- 'target',
- 'lang',
- 'enabled',
- 'disabled']
-
- def __init__(self, selector, type, name, expr):
- self.selector = selector
- self.type = type
- self.name = name
- self.expr = expr
-
-
- def __repr__(self):
- return '%s[%r%s%s(%r)]' % (self.__class__.__name__, self.selector, self.type, self.name, self.expr)
-
-
- def xpath(self):
- sel_path = self.selector.xpath()
- if self.name in self.unsupported:
- raise ExpressionError('The psuedo-class %r is not supported' % self.name)
- self.name in self.unsupported
- method = '_xpath_' + self.name.replace('-', '_')
- if not hasattr(self, method):
- raise ExpressionError('The psuedo-class %r is unknown' % self.name)
- hasattr(self, method)
- method = getattr(self, method)
- return method(sel_path, self.expr)
-
-
- def _xpath_nth_child(self, xpath, expr, last = False, add_name_test = True):
- (a, b) = parse_series(expr)
- if not a and not b and not last:
- xpath.add_condition('false() and position() = 0')
- return xpath
- if add_name_test:
- xpath.add_name_test()
-
- xpath.add_star_prefix()
- if a == 0:
- if last:
- b = 'last() - %s' % b
-
- xpath.add_condition('position() = %s' % b)
- return xpath
- if last:
- a = -a
- b = -b
-
- if b > 0:
- b_neg = str(-b)
- else:
- b_neg = '+%s' % -b
- if a != 1:
- expr = [
- '(position() %s) mod %s = 0' % (b_neg, a)]
- else:
- expr = []
- if b >= 0:
- expr.append('position() >= %s' % b)
- elif b < 0 and last:
- expr.append('position() < (last() %s)' % b)
-
- expr = ' and '.join(expr)
- if expr:
- xpath.add_condition(expr)
-
- return xpath
-
-
- def _xpath_nth_last_child(self, xpath, expr):
- return self._xpath_nth_child(xpath, expr, last = True)
-
-
- def _xpath_nth_of_type(self, xpath, expr):
- if xpath.element == '*':
- raise NotImplementedError('*:nth-of-type() is not implemented')
- xpath.element == '*'
- return self._xpath_nth_child(xpath, expr, add_name_test = False)
-
-
- def _xpath_nth_last_of_type(self, xpath, expr):
- return self._xpath_nth_child(xpath, expr, last = True, add_name_test = False)
-
-
- def _xpath_contains(self, xpath, expr):
- if isinstance(expr, Element):
- expr = expr._format_element()
-
- xpath.add_condition('contains(css:lower-case(string(.)), %s)' % xpath_literal(expr.lower()))
- return xpath
-
-
- def _xpath_not(self, xpath, expr):
- expr = expr.xpath()
- cond = expr.condition
- xpath.add_condition('not(%s)' % cond)
- return xpath
-
-
-
- def _make_lower_case(context, s):
- return s.lower()
-
- ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
- ns.prefix = 'css'
- ns['lower-case'] = _make_lower_case
-
- class Pseudo(object):
- unsupported = [
- 'indeterminate',
- 'first-line',
- 'first-letter',
- 'selection',
- 'before',
- 'after',
- 'link',
- 'visited',
- 'active',
- 'focus',
- 'hover']
-
- def __init__(self, element, type, ident):
- self.element = element
- self.type = type
- self.ident = ident
-
-
- def __repr__(self):
- return '%s[%r%s%s]' % (self.__class__.__name__, self.element, self.type, self.ident)
-
-
- def xpath(self):
- el_xpath = self.element.xpath()
- if self.ident in self.unsupported:
- raise ExpressionError('The psuedo-class %r is unsupported' % self.ident)
- self.ident in self.unsupported
- method = '_xpath_' + self.ident.replace('-', '_')
- if not hasattr(self, method):
- raise ExpressionError('The psuedo-class %r is unknown' % self.ident)
- hasattr(self, method)
- method = getattr(self, method)
- el_xpath = method(el_xpath)
- return el_xpath
-
-
- def _xpath_checked(self, xpath):
- xpath.add_condition("(@selected or @checked) and (name(.) = 'input' or name(.) = 'option')")
- return xpath
-
-
- def _xpath_root(self, xpath):
- raise NotImplementedError
-
-
- def _xpath_first_child(self, xpath):
- xpath.add_star_prefix()
- xpath.add_name_test()
- xpath.add_condition('position() = 1')
- return xpath
-
-
- def _xpath_last_child(self, xpath):
- xpath.add_star_prefix()
- xpath.add_name_test()
- xpath.add_condition('position() = last()')
- return xpath
-
-
- def _xpath_first_of_type(self, xpath):
- if xpath.element == '*':
- raise NotImplementedError('*:first-of-type is not implemented')
- xpath.element == '*'
- xpath.add_star_prefix()
- xpath.add_condition('position() = 1')
- return xpath
-
-
- def _xpath_last_of_type(self, xpath):
- if xpath.element == '*':
- raise NotImplementedError('*:last-of-type is not implemented')
- xpath.element == '*'
- xpath.add_star_prefix()
- xpath.add_condition('position() = last()')
- return xpath
-
-
- def _xpath_only_child(self, xpath):
- xpath.add_name_test()
- xpath.add_star_prefix()
- xpath.add_condition('last() = 1')
- return xpath
-
-
- def _xpath_only_of_type(self, xpath):
- if xpath.element == '*':
- raise NotImplementedError('*:only-of-type is not implemented')
- xpath.element == '*'
- xpath.add_condition('last() = 1')
- return xpath
-
-
- def _xpath_empty(self, xpath):
- xpath.add_condition('not(*) and not(normalize-space())')
- return xpath
-
-
-
- class Attrib(object):
-
- def __init__(self, selector, namespace, attrib, operator, value):
- self.selector = selector
- self.namespace = namespace
- self.attrib = attrib
- self.operator = operator
- self.value = value
-
-
- def __repr__(self):
- if self.operator == 'exists':
- return '%s[%r[%s]]' % (self.__class__.__name__, self.selector, self._format_attrib())
- return '%s[%r[%s %s %r]]' % (self.__class__.__name__, self.selector, self._format_attrib(), self.operator, self.value)
-
-
- def _format_attrib(self):
- if self.namespace == '*':
- return self.attrib
- return '%s|%s' % (self.namespace, self.attrib)
-
-
- def _xpath_attrib(self):
- if self.namespace == '*':
- return '@' + self.attrib
- return '@%s:%s' % (self.namespace, self.attrib)
-
-
- def xpath(self):
- path = self.selector.xpath()
- attrib = self._xpath_attrib()
- value = self.value
- if self.operator == 'exists':
- path.add_condition(attrib)
- elif self.operator == '=':
- path.add_condition('%s = %s' % (attrib, xpath_literal(value)))
- elif self.operator == '!=':
- if value:
- path.add_condition('not(%s) or %s != %s' % (attrib, attrib, xpath_literal(value)))
- else:
- path.add_condition('%s != %s' % (attrib, xpath_literal(value)))
- elif self.operator == '~=':
- path.add_condition("contains(concat(' ', normalize-space(%s), ' '), %s)" % (attrib, xpath_literal(' ' + value + ' ')))
- elif self.operator == '|=':
- path.add_condition('%s = %s or starts-with(%s, %s)' % (attrib, xpath_literal(value), attrib, xpath_literal(value + '-')))
- elif self.operator == '^=':
- path.add_condition('starts-with(%s, %s)' % (attrib, xpath_literal(value)))
- elif self.operator == '$=':
- path.add_condition('substring(%s, string-length(%s)-%s) = %s' % (attrib, attrib, len(value) - 1, xpath_literal(value)))
- elif self.operator == '*=':
- path.add_condition('contains(%s, %s)' % (attrib, xpath_literal(value)))
-
- return path
-
-
-
- class Element(object):
-
- def __init__(self, namespace, element):
- self.namespace = namespace
- self.element = element
-
-
- def __repr__(self):
- return '%s[%s]' % (self.__class__.__name__, self._format_element())
-
-
- def _format_element(self):
- if self.namespace == '*':
- return self.element
- return '%s|%s' % (self.namespace, self.element)
-
-
- def xpath(self):
- if self.namespace == '*':
- el = self.element.lower()
- else:
- el = '%s:%s' % (self.namespace, self.element)
- return XPathExpr(element = el)
-
-
-
- class Hash(object):
-
- def __init__(self, selector, id):
- self.selector = selector
- self.id = id
-
-
- def __repr__(self):
- return '%s[%r#%s]' % (self.__class__.__name__, self.selector, self.id)
-
-
- def xpath(self):
- path = self.selector.xpath()
- path.add_condition('@id = %s' % xpath_literal(self.id))
- return path
-
-
-
- class Or(object):
-
- def __init__(self, items):
- self.items = items
-
-
- def __repr__(self):
- return '%s(%r)' % (self.__class__.__name__, self.items)
-
-
- def xpath(self):
- paths = [ item.xpath() for item in self.items ]
- return XPathExprOr(paths)
-
-
-
- class CombinedSelector(object):
- _method_mapping = {
- ' ': 'descendant',
- '>': 'child',
- '+': 'direct_adjacent',
- '~': 'indirect_adjacent' }
-
- def __init__(self, selector, combinator, subselector):
- self.selector = selector
- self.combinator = combinator
- self.subselector = subselector
-
-
- def __repr__(self):
- if self.combinator == ' ':
- comb = '<followed>'
- else:
- comb = self.combinator
- return '%s[%r %s %r]' % (self.__class__.__name__, self.selector, comb, self.subselector)
-
-
- def xpath(self):
- if self.combinator not in self._method_mapping:
- raise ExpressionError('Unknown combinator: %r' % self.combinator)
- self.combinator not in self._method_mapping
- method = '_xpath_' + self._method_mapping[self.combinator]
- method = getattr(self, method)
- path = self.selector.xpath()
- return method(path, self.subselector)
-
-
- def _xpath_descendant(self, xpath, sub):
- xpath.join('/descendant::', sub.xpath())
- return xpath
-
-
- def _xpath_child(self, xpath, sub):
- xpath.join('/', sub.xpath())
- return xpath
-
-
- def _xpath_direct_adjacent(self, xpath, sub):
- xpath.join('/following-sibling::', sub.xpath())
- xpath.add_name_test()
- xpath.add_condition('position() = 1')
- return xpath
-
-
- def _xpath_indirect_adjacent(self, xpath, sub):
- xpath.join('/following-sibling::', sub.xpath())
- return xpath
-
-
- _el_re = re.compile('^\\w+\\s*$', re.UNICODE)
- _id_re = re.compile('^(\\w*)#(\\w+)\\s*$', re.UNICODE)
- _class_re = re.compile('^(\\w*)\\.(\\w+)\\s*$', re.UNICODE)
-
- def css_to_xpath(css_expr, prefix = 'descendant-or-self::'):
- expr = css_expr.xpath()
- if prefix:
- expr.add_prefix(prefix)
-
- return _unicode(expr)
-
-
- class XPathExpr(object):
-
- def __init__(self, prefix = None, path = None, element = '*', condition = None, star_prefix = False):
- self.prefix = prefix
- self.path = path
- self.element = element
- self.condition = condition
- self.star_prefix = star_prefix
-
-
- def __str__(self):
- path = ''
- if self.prefix is not None:
- path += _unicode(self.prefix)
-
- if self.path is not None:
- path += _unicode(self.path)
-
- path += _unicode(self.element)
- if self.condition:
- path += '[%s]' % self.condition
-
- return path
-
-
- def __repr__(self):
- return '%s[%s]' % (self.__class__.__name__, self)
-
-
- def add_condition(self, condition):
- if self.condition:
- self.condition = '%s and (%s)' % (self.condition, condition)
- else:
- self.condition = condition
-
-
- def add_path(self, part):
- self.element = part
-
-
- def add_prefix(self, prefix):
- if self.prefix:
- self.prefix = prefix + self.prefix
- else:
- self.prefix = prefix
-
-
- def add_name_test(self):
- if self.element == '*':
- return None
- self.add_condition('name() = %s' % xpath_literal(self.element))
- self.element = '*'
-
-
- def add_star_prefix(self):
- if self.path:
- self.path += '*/'
- else:
- self.path = '*/'
- self.star_prefix = True
-
-
- def join(self, combiner, other):
- prefix = _unicode(self)
- prefix += combiner
- if not other.prefix:
- pass
- if not other.path:
- pass
- path = '' + ''
- if other.star_prefix and path == '*/':
- path = ''
-
- self.prefix = prefix
- self.path = path
- self.element = other.element
- self.condition = other.condition
-
-
-
- class XPathExprOr(XPathExpr):
-
- def __init__(self, items, prefix = None):
- for item in items:
- pass
-
- self.items = items
- self.prefix = prefix
-
-
- def __str__(self):
- if not self.prefix:
- pass
- prefix = ''
- return []([ '%s%s' % (prefix, i) for i in self.items ])
-
-
- split_at_single_quotes = re.compile("('+)").split
-
- def xpath_literal(s):
- if isinstance(s, Element):
- s = s._format_element()
- else:
- s = _unicode(s)
- if "'" not in s:
- s = "'%s'" % s
- elif '"' not in s:
- s = '"%s"' % s
- else:
- s = [] % [](_[1])
- return s
-
-
- def parse(string):
- stream = TokenStream(tokenize(string))
- stream.source = string
-
- try:
- return parse_selector_group(stream)
- except SelectorSyntaxError:
- import sys
- e = sys.exc_info()[1]
- message = '%s at %s -> %r' % (e, stream.used, stream.peek())
- e.msg = message
- if sys.version_info < (2, 6):
- e.message = message
-
- e.args = tuple([
- message])
- raise
-
-
-
- def parse_selector_group(stream):
- result = []
- while None:
- if stream.peek() == ',':
- stream.next()
- continue
- break
- continue
- if len(result) == 1:
- return result[0]
- return Or(result)
- return None
-
-
- def parse_selector(stream):
- result = parse_simple_selector(stream)
- while None:
- peek = stream.peek()
- if peek == ',' or peek is None:
- return result
- if peek in ('+', '>', '~'):
- combinator = stream.next()
- else:
- combinator = ' '
- consumed = len(stream.used)
- next_selector = parse_simple_selector(stream)
- if consumed == len(stream.used):
- raise SelectorSyntaxError("Expected selector, got '%s'" % stream.peek())
- result = CombinedSelector(result, combinator, next_selector)
- continue
- return result
-
-
- def parse_simple_selector(stream):
- peek = stream.peek()
- if peek != '*' and not isinstance(peek, Symbol):
- element = namespace = '*'
- else:
- next = stream.next()
- if next != '*' and not isinstance(next, Symbol):
- raise SelectorSyntaxError("Expected symbol, got '%s'" % next)
- not isinstance(next, Symbol)
- if stream.peek() == '|':
- namespace = next
- stream.next()
- element = stream.next()
- if element != '*' and not isinstance(next, Symbol):
- raise SelectorSyntaxError("Expected symbol, got '%s'" % next)
- not isinstance(next, Symbol)
- else:
- namespace = '*'
- element = next
- result = Element(namespace, element)
- has_hash = False
- while None:
- peek = stream.peek()
- if peek == '#':
- if has_hash:
- break
-
- stream.next()
- result = Hash(result, stream.next())
- has_hash = True
- continue
- continue
- if peek == '.':
- stream.next()
- result = Class(result, stream.next())
- continue
- continue
- if peek == '[':
- stream.next()
- result = parse_attrib(result, stream)
- next = stream.next()
- if not next == ']':
- raise SelectorSyntaxError("] expected, got '%s'" % next)
- next == ']'
- continue
- continue
- if peek == ':' or peek == '::':
- type = stream.next()
- ident = stream.next()
- if not isinstance(ident, Symbol):
- raise SelectorSyntaxError("Expected symbol, got '%s'" % ident)
- isinstance(ident, Symbol)
- if stream.peek() == '(':
- stream.next()
- peek = stream.peek()
- if isinstance(peek, String):
- selector = stream.next()
- elif isinstance(peek, Symbol) and is_int(peek):
- selector = int(stream.next())
- else:
- selector = parse_simple_selector(stream)
- next = stream.next()
- if not next == ')':
- raise SelectorSyntaxError("Expected ')', got '%s' and '%s'" % (next, selector))
- next == ')'
- result = Function(result, type, ident, selector)
- continue
- result = Pseudo(result, type, ident)
- continue
- continue
- if peek == ' ':
- stream.next()
-
- break
- continue
- return result
-
-
- def is_int(v):
-
- try:
- int(v)
- except ValueError:
- return False
-
- return True
-
-
- def parse_attrib(selector, stream):
- attrib = stream.next()
- if stream.peek() == '|':
- namespace = attrib
- stream.next()
- attrib = stream.next()
- else:
- namespace = '*'
- if stream.peek() == ']':
- return Attrib(selector, namespace, attrib, 'exists', None)
- op = stream.next()
- if op not in ('^=', '$=', '*=', '=', '~=', '|=', '!='):
- raise SelectorSyntaxError("Operator expected, got '%s'" % op)
- op not in ('^=', '$=', '*=', '=', '~=', '|=', '!=')
- value = stream.next()
- if not isinstance(value, (Symbol, String)):
- raise SelectorSyntaxError("Expected string or symbol, got '%s'" % value)
- isinstance(value, (Symbol, String))
- return Attrib(selector, namespace, attrib, op, value)
-
-
- def parse_series(s):
- if isinstance(s, Element):
- s = s._format_element()
-
- if not s or s == '*':
- return (0, 0)
- if isinstance(s, int):
- return (0, s)
- if s == 'odd':
- return (2, 1)
- if s == 'even':
- return (2, 0)
- if s == 'n':
- return (1, 0)
- if 'n' not in s:
- return (0, int(s))
- (a, b) = s.split('n', 1)
- if not a:
- a = 1
- elif a == '-' or a == '+':
- a = int(a + '1')
- else:
- a = int(a)
- if not b:
- b = 0
- elif b == '-' or b == '+':
- b = int(b + '1')
- else:
- b = int(b)
- return (a, b)
-
- _whitespace_re = re.compile('\\s+', re.UNICODE)
- _comment_re = re.compile('/\\*.*?\\*/', re.DOTALL)
- _count_re = re.compile('[+-]?\\d*n(?:[+-]\\d+)?')
-
- def tokenize(s):
- pos = 0
- s = _comment_re.sub('', s)
- while None:
- match = _whitespace_re.match(s, pos = pos)
- if match:
- preceding_whitespace_pos = pos
- pos = match.end()
- else:
- preceding_whitespace_pos = 0
- if pos >= len(s):
- return None
- match = _count_re.match(s, pos = pos)
- c = s[pos]
- c2 = s[pos:pos + 2]
- old_pos = pos
- (sym, pos) = tokenize_symbol(s, pos)
- yield Symbol(sym, old_pos)
- continue
- continue
- return None
-
- split_at_string_escapes = re.compile('(\\\\(?:%s))' % '|'.join([
- '[A-Fa-f0-9]{1,6}(?:\r\n|\\s)?',
- '[^A-Fa-f0-9]'])).split
-
- def unescape_string_literal(literal):
- substrings = []
- for substring in split_at_string_escapes(literal):
- if not substring:
- continue
- elif '\\' in substring:
- if substring[0] == '\\' and len(substring) > 1:
- substring = substring[1:]
- if substring[0] in '0123456789ABCDEFabcdef':
- substring = _unichr(int(substring, 16))
-
- else:
- raise SelectorSyntaxError('Invalid escape sequence %r in string %r' % (substring.split('\\')[1], literal))
- len(substring) > 1
- substrings.append(substring)
-
- return ''.join(substrings)
-
-
- def tokenize_escaped_string(s, pos):
- quote = s[pos]
- pos = pos + 1
- start = pos
- while None:
- next = s.find(quote, pos)
- if next == -1:
- raise SelectorSyntaxError('Expected closing %s for string in: %r' % (quote, s[start:]))
- result = s[start:next]
- if result.endswith('\\'):
- pos = next + 1
- continue
-
- if '\\' in result:
- result = unescape_string_literal(result)
-
- return (result, next + 1)
- return None
-
- _illegal_symbol = re.compile('[^\\w\\\\-]', re.UNICODE)
-
- def tokenize_symbol(s, pos):
- start = pos
- match = _illegal_symbol.search(s, pos = pos)
- if not match:
- return (s[start:], len(s))
- if match.start() == pos:
- pass
-
- if not match:
- result = s[start:]
- pos = len(s)
- else:
- result = s[start:match.start()]
- pos = match.start()
-
- try:
- result = result.encode('ASCII', 'backslashreplace').decode('unicode_escape')
- except UnicodeDecodeError:
- import sys
- e = sys.exc_info()[1]
- raise SelectorSyntaxError('Bad symbol %r: %s' % (result, e))
-
- return (result, pos)
-
-
- class TokenStream(object):
-
- def __init__(self, tokens, source = None):
- self.used = []
- self.tokens = iter(tokens)
- self.source = source
- self.peeked = None
- self._peeking = False
-
- try:
- self.next_token = self.tokens.next
- except AttributeError:
- self.next_token = self.tokens.__next__
-
-
-
- def next(self):
- if self._peeking:
- self._peeking = False
- self.used.append(self.peeked)
- return self.peeked
-
- try:
- next = self.next_token()
- self.used.append(next)
- return next
- except StopIteration:
- self._peeking
- self._peeking
- return None
-
-
-
- def __iter__(self):
- return iter(self.next, None)
-
-
- def peek(self):
- if not self._peeking:
-
- try:
- self.peeked = self.next_token()
- except StopIteration:
- return None
-
- self._peeking = True
-
- return self.peeked
-
-
-