Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_859 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-08-06 | 3.0 KB | 113 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) __version__ = '1.0' import re def detect(aBuf): import calibre.ebooks.chardet.universaldetector as universaldetector u = universaldetector.UniversalDetector() u.reset() u.feed(aBuf) u.close() return u.result ENCODING_PATS = [ re.compile('<\\?[^<>]+encoding=[\\\'"](.*?)[\\\'"][^<>]*>', re.IGNORECASE), re.compile('<meta\\s+?[^<>]+?content=[\'"][^\'"]*?charset=([-a-z0-9]+)[^\'"]*?[\'"][^<>]*>', re.IGNORECASE)] ENTITY_PATTERN = re.compile('&(\\S+?);') def strip_encoding_declarations(raw): for pat in ENCODING_PATS: raw = pat.sub('', raw) return raw def substitute_entites(raw): xml_entity_to_unicode = xml_entity_to_unicode import calibre return ENTITY_PATTERN.sub(xml_entity_to_unicode, raw) _CHARSET_ALIASES = { 'macintosh': 'mac-roman', 'x-sjis': 'shift-jis' } def force_encoding(raw, verbose, assume_utf8 = False): preferred_encoding = preferred_encoding import calibre.constants try: chardet = detect(raw) except: chardet = { 'encoding': preferred_encoding, 'confidence': 0 } encoding = chardet['encoding'] if chardet['confidence'] < 1 and assume_utf8: encoding = 'utf-8' if chardet['confidence'] < 1 and verbose: print 'WARNING: Encoding detection confidence %d%%' % chardet['confidence'] * 100 if not encoding: encoding = preferred_encoding encoding = encoding.lower() if _CHARSET_ALIASES.has_key(encoding): encoding = _CHARSET_ALIASES[encoding] if encoding == 'ascii': encoding = 'utf-8' return encoding def xml_to_unicode(raw, verbose = False, strip_encoding_pats = False, resolve_entities = False, assume_utf8 = False): encoding = None if not raw: return (u'', encoding) if not isinstance(raw, unicode): if raw.startswith('\xff\xfe'): raw = raw.decode('utf-16-le')[1:] encoding = 'utf-16-le' elif raw.startswith('\xfe\xff'): raw = raw.decode('utf-16-be')[1:] encoding = 'utf-16-be' if not isinstance(raw, unicode): for pat in ENCODING_PATS: match = pat.search(raw) if match: encoding = match.group(1) break continue if encoding is None: encoding = force_encoding(raw, verbose, assume_utf8 = assume_utf8) try: if encoding.lower().strip() == 'macintosh': encoding = 'mac-roman' raw = raw.decode(encoding, 'replace') except LookupError: encoding = 'utf-8' raw = raw.decode(encoding, 'replace') except: None<EXCEPTION MATCH>LookupError None<EXCEPTION MATCH>LookupError if strip_encoding_pats: raw = strip_encoding_declarations(raw) if resolve_entities: raw = substitute_entites(raw) return (raw, encoding)