home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- from __future__ import with_statement
- __license__ = 'GPL v3'
- __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
- __docformat__ = 'restructuredtext en'
- import re
- from calibre.ebooks.BeautifulSoup import BeautifulSoup
- from calibre.ebooks.chardet import xml_to_unicode
- from calibre.ebooks.metadata import string_to_authors, MetaInformation
- from calibre.utils.logging import default_log
- from calibre.ptempfile import TemporaryFile
-
- def _clean(s):
- return s.replace(u' ', u' ')
-
-
- def _detag(tag):
- str = u''
- if tag is None:
- return str
- for elem in tag:
- if hasattr(elem, 'contents'):
- str += _detag(elem)
- continue
- tag is None
- str += _clean(elem)
-
- return str
-
-
- def _metadata_from_table(soup, searchfor):
- td = soup.find('td', text = re.compile(searchfor, flags = re.I))
- if td is None:
- return None
- td = td.parent
- if re.match('^\\s*' + searchfor + '\\s*$', td.renderContents(None), flags = re.I):
- meta = _detag(td.findNextSibling('td'))
- return re.sub('^:', '', meta).strip()
- meta = _detag(td)
- return re.sub('^[^:]+:', '', meta).strip()
-
-
- def _metadata_from_span(soup, searchfor):
- span = soup.find('span', {
- 'class': re.compile(searchfor, flags = re.I) })
- if span is None:
- return None
- return _detag(span.renderContents(None).strip())
-
-
- def _get_authors(soup):
- if not _metadata_from_span(soup, 'author'):
- pass
- aut = _metadata_from_table(soup, '^\\s*by\\s*:?\\s+')
- ans = [
- _('Unknown')]
- if aut is not None:
- ans = string_to_authors(aut)
-
- return ans
-
-
- def _get_publisher(soup):
- if not _metadata_from_span(soup, 'imprint'):
- pass
- return _metadata_from_table(soup, 'publisher')
-
-
- def _get_isbn(soup):
- if not _metadata_from_span(soup, 'isbn'):
- pass
- return _metadata_from_table(soup, 'isbn')
-
-
- def _get_comments(soup):
- if not _metadata_from_span(soup, 'cwdate'):
- pass
- date = _metadata_from_table(soup, 'pub date')
- if not _metadata_from_span(soup, 'pages'):
- pass
- pages = _metadata_from_table(soup, 'pages')
-
- try:
- date = date.replace(u'©', '').strip()
- pages = re.search('\\d+', pages).group(0)
- return u'Published %s, %s pages.' % (date, pages)
- except:
- pass
-
-
-
- def _get_cover(soup, rdr):
- ans = None
-
- try:
- ans = soup.find('img', alt = re.compile('cover', flags = re.I))['src']
- except TypeError:
- r = { }
- for img in soup('img'):
-
- try:
- r[abs(float(re.search('[0-9.]+', img['height']).group()) / float(re.search('[0-9.]+', img['width']).group()) - 1.25)] = img['src']
- continue
- except KeyError:
- r[0] = img['src']
- continue
- continue
- continue
-
-
-
- l = r.keys()
- l.sort()
- if l:
- ans = r[l[0]]
-
- except:
- None<EXCEPTION MATCH>KeyError
-
- if ans is not None:
-
- try:
- ans = rdr.GetFile(ans)
- except:
- ans = rdr.root + '/' + ans
-
- try:
- ans = rdr.GetFile(ans)
- ans = None
-
-
- if ans is not None:
- Image = Image
- import PIL
- StringIO = StringIO
- import cStringIO
- buf = StringIO()
-
- try:
- Image.open(StringIO(ans)).convert('RGB').save(buf, 'JPEG')
- ans = buf.getvalue()
- ans = None
-
-
-
- return ans
-
-
- def get_metadata_from_reader(rdr):
- raw = rdr.GetFile(rdr.home)
- home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats = True, resolve_entities = True)[0])
- title = rdr.title
- authors = _get_authors(home)
- mi = MetaInformation(title, authors)
- publisher = _get_publisher(home)
- if publisher:
- mi.publisher = publisher
-
- isbn = _get_isbn(home)
- if isbn:
- mi.isbn = isbn
-
- comments = _get_comments(home)
- if comments:
- mi.comments = comments
-
- cdata = _get_cover(home, rdr)
- if cdata is not None:
- mi.cover_data = ('jpg', cdata)
-
- return mi
-
-
- def get_metadata(stream):
-
- try:
- fname = _[1]
-
- try:
- f = _[2]
- f.write(stream.read())
- finally:
- pass
-
- CHMReader = CHMReader
- import calibre.ebooks.chm.reader
- rdr = CHMReader(fname, default_log)
- return get_metadata_from_reader(rdr)
- finally:
- pass
-
-
-