home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- __license__ = 'GPL v3'
- __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
- __docformat__ = 'restructuredtext en'
- import sys
- import re
- from lxml import html
- from lxml.html import soupparser
- from calibre import browser
- from calibre.ebooks.metadata import check_isbn
- from calibre.ebooks.metadata.book.base import Metadata
- from calibre.ebooks.chardet import xml_to_unicode
- from calibre.library.comments import sanitize_comments_html
-
- def find_asin(br, isbn):
- q = 'http://www.amazon.com/s?field-keywords=' + isbn
- raw = br.open_novisit(q).read()
- raw = xml_to_unicode(raw, strip_encoding_pats = True, resolve_entities = True)[0]
- root = html.fromstring(raw)
- revs = root.xpath('//*[@class="asinReviewsSummary" and @name]')
- revs = [ x.get('name') for x in revs ]
- if revs:
- return revs[0]
-
-
- def to_asin(br, isbn):
- if len(isbn) == 13:
-
- try:
- asin = find_asin(br, isbn)
- import traceback
- traceback.print_exc()
- asin = None
-
- else:
- asin = isbn
- return asin
-
-
- def get_social_metadata(title, authors, publisher, isbn):
- mi = Metadata(title, authors)
- if not isbn:
- return mi
- isbn = check_isbn(isbn)
- if not isbn:
- return mi
- br = browser()
- asin = to_asin(br, isbn)
- xisbn = xisbn
- import calibre.ebooks.metadata.xisbn
- for i in xisbn.get_associated_isbns(isbn):
- asin = to_asin(br, i)
- if get_metadata(br, asin, mi):
- return mi
-
- return mi
-
-
- def get_metadata(br, asin, mi):
- q = 'http://amzn.com/' + asin
-
- try:
- raw = br.open_novisit(q).read()
- except Exception:
- e = None
- if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
- return False
- raise
- except:
- e.getcode() == 404
-
- if '<title>404 - ' in raw:
- return False
- raw = xml_to_unicode(raw, strip_encoding_pats = True, resolve_entities = True)[0]
- root = soupparser.fromstring(raw)
- ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
- if ratings:
- pat = re.compile('([0-9.]+) out of (\\d+) stars')
- r = ratings[0]
- for elem in r.xpath('descendant::*[@title]'):
- t = elem.get('title')
- m = pat.match(t)
- if m is not None:
-
- try:
- mi.rating = (float(m.group(1)) / float(m.group(2))) * 5
- e.getcode() == 404
-
- continue
-
-
- desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
- if desc:
- desc = desc[0]
- for c in desc.xpath('descendant::*[@class="seeAll" or @class="emptyClear" or @href]'):
- c.getparent().remove(c)
-
- desc = html.tostring(desc, method = 'html', encoding = unicode).strip()
- desc = re.sub('<([a-zA-Z0-9]+)\\s[^>]+>', '<\\1>', desc)
- desc = re.sub('(?s)<em>--This text ref.*?</em>', '', desc)
- desc = re.sub('(?s)<!--.*?-->', '', desc)
- mi.comments = sanitize_comments_html(desc)
-
- return True
-
-
- def main(args = sys.argv):
- print get_social_metadata('Angels & Demons', None, None, '9781416580829')
- print
- return None
- print get_social_metadata('Star Trek: Destiny: Mere Mortals', None, None, '9781416551720')
- print
- print get_social_metadata('The Great Gatsby', None, None, '0743273567')
- return 0
-
- if __name__ == '__main__':
- sys.exit(main())
-
-