Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_2038 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-08-06 | 11.3 KB | 303 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) import re import htmlentitydefs import sgmllib import HTMLParser from xml.sax import saxutils from _html import unescape, unescape_charref class NoMoreTokensError(Exception): pass class Token: def __init__(self, type, data, attrs = None): self.type = type self.data = data self.attrs = attrs def __iter__(self): return iter((self.type, self.data, self.attrs)) def __eq__(self, other): (type, data, attrs) = other if self.type == type and self.data == data and self.attrs == attrs: return True return False def __ne__(self, other): return not self.__eq__(other) def __repr__(self): args = ', '.join(map(repr, [ self.type, self.data, self.attrs])) return self.__class__.__name__ + '(%s)' % args def __str__(self): if self.type == 'starttag': return '<%s%s>' % (self.data, attrs) if self.type == 'startendtag': return '<%s%s />' % (self.data, attrs) if self.type == 'endtag': return '</%s>' % self.data if self.type == 'charref': return '&#%s;' % self.data if self.type == 'entityref': return '&%s;' % self.data if self.type == 'data': return self.data if self.type == 'comment': return '' % self.data if self.type == 'decl': return '<!%s>' % self.data if self.type == 'pi': return '<?%s>' % self.data def iter_until_exception(fn, exception, *args, **kwds): while None: try: yield fn(*args, **kwds) continue except exception: raise StopIteration continue return None class _AbstractParser: chunk = 1024 compress_re = re.compile('\\s+') def __init__(self, fh, textify = { 'img': 'alt', 'applet': 'alt' }, encoding = 'ascii', entitydefs = None): self._fh = fh self._tokenstack = [] self.textify = textify self.encoding = encoding if entitydefs is None: entitydefs = htmlentitydefs.name2codepoint self._entitydefs = entitydefs def __iter__(self): return self def tags(self, *names): return iter_until_exception(self.get_tag, NoMoreTokensError, *names) def tokens(self, *tokentypes): return iter_until_exception(self.get_token, NoMoreTokensError, *tokentypes) def next(self): try: return self.get_token() except NoMoreTokensError: raise StopIteration() def get_token(self, *tokentypes): while None: while self._tokenstack: token = self._tokenstack.pop(0) if tokentypes: if token.type in tokentypes: return token continue token.type in tokentypes return token data = self._fh.read(self.chunk) if not data: raise NoMoreTokensError() self.feed(data) continue return None def unget_token(self, token): self._tokenstack.insert(0, token) def get_tag(self, *names): while None: tok = self.get_token() if tok.type not in ('starttag', 'endtag', 'startendtag'): continue if names: if tok.data in names: return tok continue return tok continue return None def get_text(self, endat = None): text = [] tok = None while None: try: tok = self.get_token() except NoMoreTokensError: if tok: self.unget_token(tok) break if tok.type == 'data': text.append(tok.data) continue if tok.type == 'entityref': t = unescape('&%s;' % tok.data, self._entitydefs, self.encoding) text.append(t) continue if tok.type == 'charref': t = unescape_charref(tok.data, self.encoding) text.append(t) continue if tok.type in ('starttag', 'endtag', 'startendtag'): tag_name = tok.data if tok.type in ('starttag', 'startendtag'): alt = self.textify.get(tag_name) if alt is not None: if callable(alt): text.append(alt(tok)) elif tok.attrs is not None: for k, v in tok.attrs: if k == alt: text.append(v) continue text.append('[%s]' % tag_name.upper()) if endat is None or endat == (tok.type, tag_name): self.unget_token(tok) break endat == (tok.type, tag_name) continue return ''.join(text) def get_compressed_text(self, *args, **kwds): text = self.get_text(*args, **kwds) text = text.strip() return self.compress_re.sub(' ', text) def handle_startendtag(self, tag, attrs): self._tokenstack.append(Token('startendtag', tag, attrs)) def handle_starttag(self, tag, attrs): self._tokenstack.append(Token('starttag', tag, attrs)) def handle_endtag(self, tag): self._tokenstack.append(Token('endtag', tag)) def handle_charref(self, name): self._tokenstack.append(Token('charref', name)) def handle_entityref(self, name): self._tokenstack.append(Token('entityref', name)) def handle_data(self, data): self._tokenstack.append(Token('data', data)) def handle_comment(self, data): self._tokenstack.append(Token('comment', data)) def handle_decl(self, decl): self._tokenstack.append(Token('decl', decl)) def unknown_decl(self, data): self._tokenstack.append(Token('decl', data)) def handle_pi(self, data): self._tokenstack.append(Token('pi', data)) def unescape_attr(self, name): return unescape(name, self._entitydefs, self.encoding) def unescape_attrs(self, attrs): escaped_attrs = [] for key, val in attrs: escaped_attrs.append((key, self.unescape_attr(val))) return escaped_attrs class PullParser(_AbstractParser, HTMLParser.HTMLParser): def __init__(self, *args, **kwds): HTMLParser.HTMLParser.__init__(self) _AbstractParser.__init__(self, *args, **kwds) def unescape(self, name): return self.unescape_attr(name) class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser): def __init__(self, *args, **kwds): sgmllib.SGMLParser.__init__(self) _AbstractParser.__init__(self, *args, **kwds) def unknown_starttag(self, tag, attrs): attrs = self.unescape_attrs(attrs) self._tokenstack.append(Token('starttag', tag, attrs)) def unknown_endtag(self, tag): self._tokenstack.append(Token('endtag', tag)) def _test(): import doctest import _pullparser return doctest.testmod(_pullparser) if __name__ == '__main__': _test()