Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_2001 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-08-06 | 4.2 KB | 122 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) import urllib from html5lib import HTMLParser as _HTMLParser, XHTMLParser as _XHTMLParser from lxml import etree from lxml.html import _contains_block_level_tag, XHTML_NAMESPACE from lxml.html._html5builder import TreeBuilder try: _strings = basestring except NameError: _strings = (bytes, str) class HTMLParser(_HTMLParser): def __init__(self, strict = False): _HTMLParser.__init__(self, strict = strict, tree = TreeBuilder) class XHTMLParser(_XHTMLParser): def __init__(self, strict = False): _XHTMLParser.__init__(self, strict = strict, tree = TreeBuilder) def _find_tag(tree, tag): elem = tree.find(tag) if elem is not None: return elem return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag)) def document_fromstring(html, guess_charset = True, parser = None): if not isinstance(html, _strings): raise TypeError('string required') isinstance(html, _strings) if parser is None: parser = html_parser return parser.parse(html, useChardet = guess_charset).getroot() def fragments_fromstring(html, no_leading_text = False, guess_charset = False, parser = None): if not isinstance(html, _strings): raise TypeError('string required') isinstance(html, _strings) if parser is None: parser = html_parser children = parser.parseFragment(html, 'div', useChardet = guess_charset) if children and isinstance(children[0], _strings): if no_leading_text: if children[0].strip(): raise etree.ParserError('There is leading text: %r' % children[0]) children[0].strip() del children[0] return children def fragment_fromstring(html, create_parent = False, guess_charset = False, parser = None): if not isinstance(html, _strings): raise TypeError('string required') isinstance(html, _strings) if create_parent: if not create_parent: pass container = 'div' html = '<%s>%s</%s>' % (container, html, container) children = fragments_fromstring(html, True, guess_charset, parser) if not children: raise etree.ParserError('No elements found') children if len(children) > 1: raise etree.ParserError('Multiple elements found') len(children) > 1 result = children[0] if result.tail and result.tail.strip(): raise etree.ParserError('Element followed by text: %r' % result.tail) result.tail.strip() result.tail = None return result def fromstring(html, guess_charset = True, parser = None): if not isinstance(html, _strings): raise TypeError('string required') isinstance(html, _strings) doc = document_fromstring(html, parser = parser, guess_charset = guess_charset) start = html[:50].lstrip().lower() if start.startswith('<html') or start.startswith('<!doctype'): return doc head = _find_tag(doc, 'head') if len(head): return doc body = _find_tag(doc, 'body') if len(body) == 1: if not (body.text) or not body.text.strip(): if not (body[-1].tail) or not body[-1].tail.strip(): return body[0] return body def parse(filename_url_or_file, guess_charset = True, parser = None): if parser is None: parser = html_parser if isinstance(filename_url_or_file, basestring): fp = urllib.urlopen(filename_url_or_file) else: fp = filename_url_or_file return parser.parse(fp, useChardet = guess_charset) html_parser = HTMLParser() xhtml_parser = XHTMLParser()