Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_862 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-08-06 | 5.5 KB | 195 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import re from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.metadata import string_to_authors, MetaInformation from calibre.utils.logging import default_log from calibre.ptempfile import TemporaryFile def _clean(s): return s.replace(u'┬á', u' ') def _detag(tag): str = u'' if tag is None: return str for elem in tag: if hasattr(elem, 'contents'): str += _detag(elem) continue tag is None str += _clean(elem) return str def _metadata_from_table(soup, searchfor): td = soup.find('td', text = re.compile(searchfor, flags = re.I)) if td is None: return None td = td.parent if re.match('^\\s*' + searchfor + '\\s*$', td.renderContents(None), flags = re.I): meta = _detag(td.findNextSibling('td')) return re.sub('^:', '', meta).strip() meta = _detag(td) return re.sub('^[^:]+:', '', meta).strip() def _metadata_from_span(soup, searchfor): span = soup.find('span', { 'class': re.compile(searchfor, flags = re.I) }) if span is None: return None return _detag(span.renderContents(None).strip()) def _get_authors(soup): if not _metadata_from_span(soup, 'author'): pass aut = _metadata_from_table(soup, '^\\s*by\\s*:?\\s+') ans = [ _('Unknown')] if aut is not None: ans = string_to_authors(aut) return ans def _get_publisher(soup): if not _metadata_from_span(soup, 'imprint'): pass return _metadata_from_table(soup, 'publisher') def _get_isbn(soup): if not _metadata_from_span(soup, 'isbn'): pass return _metadata_from_table(soup, 'isbn') def _get_comments(soup): if not _metadata_from_span(soup, 'cwdate'): pass date = _metadata_from_table(soup, 'pub date') if not _metadata_from_span(soup, 'pages'): pass pages = _metadata_from_table(soup, 'pages') try: date = date.replace(u'┬⌐', '').strip() pages = re.search('\\d+', pages).group(0) return u'Published %s, %s pages.' % (date, pages) except: pass def _get_cover(soup, rdr): ans = None try: ans = soup.find('img', alt = re.compile('cover', flags = re.I))['src'] except TypeError: r = { } for img in soup('img'): try: r[abs(float(re.search('[0-9.]+', img['height']).group()) / float(re.search('[0-9.]+', img['width']).group()) - 1.25)] = img['src'] continue except KeyError: r[0] = img['src'] continue continue continue l = r.keys() l.sort() if l: ans = r[l[0]] except: None<EXCEPTION MATCH>KeyError if ans is not None: try: ans = rdr.GetFile(ans) except: ans = rdr.root + '/' + ans try: ans = rdr.GetFile(ans) ans = None if ans is not None: Image = Image import PIL StringIO = StringIO import cStringIO buf = StringIO() try: Image.open(StringIO(ans)).convert('RGB').save(buf, 'JPEG') ans = buf.getvalue() ans = None return ans def get_metadata_from_reader(rdr): raw = rdr.GetFile(rdr.home) home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats = True, resolve_entities = True)[0]) title = rdr.title authors = _get_authors(home) mi = MetaInformation(title, authors) publisher = _get_publisher(home) if publisher: mi.publisher = publisher isbn = _get_isbn(home) if isbn: mi.isbn = isbn comments = _get_comments(home) if comments: mi.comments = comments cdata = _get_cover(home, rdr) if cdata is not None: mi.cover_data = ('jpg', cdata) return mi def get_metadata(stream): try: fname = _[1] try: f = _[2] f.write(stream.read()) finally: pass CHMReader = CHMReader import calibre.ebooks.chm.reader rdr = CHMReader(fname, default_log) return get_metadata_from_reader(rdr) finally: pass