Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_2063 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-08-06 | 20.8 KB | 580 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) import re import copy import htmlentitydefs import sgmllib import ClientForm import _request from _headersutil import split_header_words, is_html as _is_html import _rfc3986 DEFAULT_ENCODING = 'latin-1' COMPRESS_RE = re.compile('\\s+') class ParseError(ClientForm.ParseError): pass class CachingGeneratorFunction(object): def __init__(self, iterable): self._cache = [] self._iterator = iter(iterable) def __call__(self): cache = self._cache for item in cache: yield item for item in self._iterator: cache.append(item) yield item class EncodingFinder: def __init__(self, default_encoding): self._default_encoding = default_encoding def encoding(self, response): for ct in response.info().getheaders('content-type'): for k, v in split_header_words([ ct])[0]: if k == 'charset': return v return self._default_encoding class ResponseTypeFinder: def __init__(self, allow_xhtml): self._allow_xhtml = allow_xhtml def is_html(self, response, encoding): ct_hdrs = response.info().getheaders('content-type') url = response.geturl() return _is_html(ct_hdrs, url, self._allow_xhtml) class Args: def __init__(self, args_map): self.dictionary = dict(args_map) def __getattr__(self, key): try: return self.dictionary[key] except KeyError: return getattr(self.__class__, key) def form_parser_args(select_default = False, form_parser_class = None, request_class = None, backwards_compat = False): return Args(locals()) class Link: def __init__(self, base_url, url, text, tag, attrs): self.base_url = base_url self.absolute_url = _rfc3986.urljoin(base_url, url) (self.url, self.text, self.tag, self.attrs) = (url, text, tag, attrs) def __cmp__(self, other): try: for name in ('url', 'text', 'tag', 'attrs'): if getattr(self, name) != getattr(other, name): return -1 except AttributeError: return -1 return 0 def __repr__(self): return 'Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)' % (self.base_url, self.url, self.text, self.tag, self.attrs) class LinksFactory: def __init__(self, link_parser_class = None, link_class = Link, urltags = None): import _pullparser if link_parser_class is None: link_parser_class = _pullparser.TolerantPullParser self.link_parser_class = link_parser_class self.link_class = link_class if urltags is None: urltags = { 'a': 'href', 'area': 'href', 'frame': 'src', 'iframe': 'src' } self.urltags = urltags self._response = None self._encoding = None def set_response(self, response, base_url, encoding): self._response = response self._encoding = encoding self._base_url = base_url def links(self): response = self._response encoding = self._encoding base_url = self._base_url p = self.link_parser_class(response, encoding = encoding) try: for token in p.tags(*self.urltags.keys() + [ 'base']): if token.type == 'endtag': continue if token.data == 'base': base_href = dict(token.attrs).get('href') if base_href is not None: base_url = base_href continue continue attrs = dict(token.attrs) tag = token.data name = attrs.get('name') text = None url = attrs.get(self.urltags[tag]) if not url: continue url = _rfc3986.clean_url(url, encoding) if tag == 'a': if token.type != 'startendtag': text = p.get_compressed_text(('endtag', tag)) yield Link(base_url, url, text, tag, token.attrs) except sgmllib.SGMLParseError: exc = None raise ParseError(exc) class FormsFactory: def __init__(self, select_default = False, form_parser_class = None, request_class = None, backwards_compat = False): import ClientForm self.select_default = select_default if form_parser_class is None: form_parser_class = ClientForm.FormParser self.form_parser_class = form_parser_class if request_class is None: request_class = _request.Request self.request_class = request_class self.backwards_compat = backwards_compat self._response = None self.encoding = None self.global_form = None def set_response(self, response, encoding): self._response = response self.encoding = encoding self.global_form = None def forms(self): import ClientForm encoding = self.encoding try: forms = ClientForm.ParseResponseEx(self._response, select_default = self.select_default, form_parser_class = self.form_parser_class, request_class = self.request_class, encoding = encoding, _urljoin = _rfc3986.urljoin, _urlparse = _rfc3986.urlsplit, _urlunparse = _rfc3986.urlunsplit) except ClientForm.ParseError: exc = None raise ParseError(exc) self.global_form = forms[0] return forms[1:] class TitleFactory: def __init__(self): self._response = None self._encoding = None def set_response(self, response, encoding): self._response = response self._encoding = encoding def _get_title_text(self, parser): import _pullparser text = [] tok = None while None: try: tok = parser.get_token() except _pullparser.NoMoreTokensError: break if tok.type == 'data': text.append(str(tok)) continue if tok.type == 'entityref': t = unescape('&%s;' % tok.data, parser._entitydefs, parser.encoding) text.append(t) continue if tok.type == 'charref': t = unescape_charref(tok.data, parser.encoding) text.append(t) continue if tok.type in ('starttag', 'endtag', 'startendtag'): tag_name = tok.data if tok.type == 'endtag' and tag_name == 'title': break text.append(str(tok)) continue continue return COMPRESS_RE.sub(' ', ''.join(text).strip()) def title(self): import _pullparser p = _pullparser.TolerantPullParser(self._response, encoding = self._encoding) try: try: p.get_tag('title') except _pullparser.NoMoreTokensError: return None return self._get_title_text(p) except sgmllib.SGMLParseError: exc = None raise ParseError(exc) def unescape(data, entities, encoding): if data is None or '&' not in data: return data def replace_entities(match): ent = match.group() if ent[1] == '#': return unescape_charref(ent[2:-1], encoding) repl = entities.get(ent[1:-1]) return repl return re.sub('&#?[A-Za-z0-9]+?;', replace_entities, data) def unescape_charref(data, encoding): name = data base = 10 if name.startswith('x'): name = name[1:] base = 16 uc = unichr(int(name, base)) if encoding is None: return uc try: repl = uc.encode(encoding) except UnicodeError: encoding is None encoding is None repl = '&#%s;' % data except: encoding is None return repl import _beautifulsoup import ClientForm (RobustFormParser, NestingRobustFormParser) = ClientForm._create_bs_classes(_beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup) sgmllib.charref = re.compile('&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]') class MechanizeBs(_beautifulsoup.BeautifulSoup): _entitydefs = htmlentitydefs.name2codepoint PARSER_MASSAGE = [ (re.compile('(<[^<>]*)/>'), (lambda x: x.group(1) + ' />')), (re.compile('<!\\s+([^<>]*)>'), (lambda x: '<!' + x.group(1) + '>'))] def __init__(self, encoding, text = None, avoidParserProblems = True, initialTextIsEverything = True): self._encoding = encoding _beautifulsoup.BeautifulSoup.__init__(self, text, avoidParserProblems, initialTextIsEverything) def handle_charref(self, ref): t = unescape('&#%s;' % ref, self._entitydefs, self._encoding) self.handle_data(t) def handle_entityref(self, ref): t = unescape('&%s;' % ref, self._entitydefs, self._encoding) self.handle_data(t) def unescape_attrs(self, attrs): escaped_attrs = [] for key, val in attrs: val = unescape(val, self._entitydefs, self._encoding) escaped_attrs.append((key, val)) return escaped_attrs class RobustLinksFactory: compress_re = COMPRESS_RE def __init__(self, link_parser_class = None, link_class = Link, urltags = None): if link_parser_class is None: link_parser_class = MechanizeBs self.link_parser_class = link_parser_class self.link_class = link_class if urltags is None: urltags = { 'a': 'href', 'area': 'href', 'frame': 'src', 'iframe': 'src' } self.urltags = urltags self._bs = None self._encoding = None self._base_url = None def set_soup(self, soup, base_url, encoding): self._bs = soup self._base_url = base_url self._encoding = encoding def links(self): import _beautifulsoup bs = self._bs base_url = self._base_url encoding = self._encoding gen = bs.recursiveChildGenerator() for ch in bs.recursiveChildGenerator(): if isinstance(ch, _beautifulsoup.Tag) and ch.name in self.urltags.keys() + [ 'base']: link = ch attrs = bs.unescape_attrs(link.attrs) attrs_dict = dict(attrs) if link.name == 'base': base_href = attrs_dict.get('href') if base_href is not None: base_url = base_href continue continue url_attr = self.urltags[link.name] url = attrs_dict.get(url_attr) if not url: continue url = _rfc3986.clean_url(url, encoding) text = link.fetchText((lambda t: True)) if not text: if link.name == 'a': text = '' else: text = None else: text = self.compress_re.sub(' ', ' '.join(text).strip()) yield Link(base_url, url, text, link.name, attrs) continue class RobustFormsFactory(FormsFactory): def __init__(self, *args, **kwds): args = form_parser_args(*args, **kwds) if args.form_parser_class is None: args.form_parser_class = RobustFormParser FormsFactory.__init__(self, **args.dictionary) def set_response(self, response, encoding): self._response = response self.encoding = encoding class RobustTitleFactory: def __init__(self): self._bs = None self._encoding = None def set_soup(self, soup, encoding): self._bs = soup self._encoding = encoding def title(self): import _beautifulsoup title = self._bs.first('title') if title == _beautifulsoup.Null: return None inner_html = []([ str(node) for node in title.contents ]) return COMPRESS_RE.sub(' ', inner_html.strip()) class Factory: LAZY_ATTRS = [ 'encoding', 'is_html', 'title', 'global_form'] def __init__(self, forms_factory, links_factory, title_factory, encoding_finder = EncodingFinder(DEFAULT_ENCODING), response_type_finder = ResponseTypeFinder(allow_xhtml = False)): self._forms_factory = forms_factory self._links_factory = links_factory self._title_factory = title_factory self._encoding_finder = encoding_finder self._response_type_finder = response_type_finder self.set_response(None) def set_request_class(self, request_class): self._forms_factory.request_class = request_class def set_response(self, response): self._response = response self._forms_genf = None self._links_genf = None self._get_title = None for name in self.LAZY_ATTRS: try: delattr(self, name) continue except AttributeError: continue def __getattr__(self, name): if name not in self.LAZY_ATTRS: return getattr(self.__class__, name) if name == 'encoding': self.encoding = self._encoding_finder.encoding(copy.copy(self._response)) return self.encoding if name == 'is_html': self.is_html = self._response_type_finder.is_html(copy.copy(self._response), self.encoding) return self.is_html if name == 'title': return self.title if name == 'global_form': self.forms() return self.global_form def forms(self): if self._forms_genf is None: try: self._forms_genf = CachingGeneratorFunction(self._forms_factory.forms()) except: self.set_response(self._response) raise self.global_form = getattr(self._forms_factory, 'global_form', None) return self._forms_genf() def links(self): if self._links_genf is None: try: self._links_genf = CachingGeneratorFunction(self._links_factory.links()) self.set_response(self._response) raise return self._links_genf() class DefaultFactory(Factory): def __init__(self, i_want_broken_xhtml_support = False): Factory.__init__(self, forms_factory = FormsFactory(), links_factory = LinksFactory(), title_factory = TitleFactory(), response_type_finder = ResponseTypeFinder(allow_xhtml = i_want_broken_xhtml_support)) def set_response(self, response): Factory.set_response(self, response) if response is not None: self._forms_factory.set_response(copy.copy(response), self.encoding) self._links_factory.set_response(copy.copy(response), response.geturl(), self.encoding) self._title_factory.set_response(copy.copy(response), self.encoding) class RobustFactory(Factory): def __init__(self, i_want_broken_xhtml_support = False, soup_class = None): Factory.__init__(self, forms_factory = RobustFormsFactory(), links_factory = RobustLinksFactory(), title_factory = RobustTitleFactory(), response_type_finder = ResponseTypeFinder(allow_xhtml = i_want_broken_xhtml_support)) if soup_class is None: soup_class = MechanizeBs self._soup_class = soup_class def set_response(self, response): Factory.set_response(self, response) if response is not None: data = response.read() soup = self._soup_class(self.encoding, data) self._forms_factory.set_response(copy.copy(response), self.encoding) self._links_factory.set_soup(soup, response.geturl(), self.encoding) self._title_factory.set_soup(soup, self.encoding)