home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- __license__ = 'GPL v3'
- __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
- import re
- from calibre.ebooks.metadata import MetaInformation
- from calibre.ebooks.chardet import xml_to_unicode
- from calibre import entity_to_unicode
- from calibre.utils.date import parse_date
-
- def get_metadata(stream):
- src = stream.read()
- return get_metadata_(src)
-
-
- def get_meta_regexp_(name):
- return re.compile('<meta name=[\'"]' + name + '[\'"] content=[\'"](.+?)[\'"]\\s*/?>', re.IGNORECASE)
-
-
- def get_metadata_(src, encoding = None):
- if not isinstance(src, unicode):
- if not encoding:
- src = xml_to_unicode(src)[0]
- else:
- src = src.decode(encoding, 'replace')
-
- title = None
- pat = re.compile('<!--.*?TITLE=(?P<q>[\\\'"])(.+?)(?P=q).*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- title = match.group(2)
- else:
- pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE)
- match = pat.search(src)
- if match:
- title = match.group(1)
-
- if not title:
- for x in ('Title', 'DC.title', 'DCTERMS.title'):
- pat = get_meta_regexp_(x)
- match = pat.search(src)
- if match:
- title = match.group(1)
- break
- continue
-
-
- author = None
- pat = re.compile('<!--.*?AUTHOR=(?P<q>[\\\'"])(.+?)(?P=q).*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- author = match.group(2).replace(',', ';')
- else:
- for x in ('Author', 'DC.creator.aut', 'DCTERMS.creator.aut'):
- pat = get_meta_regexp_(x)
- match = pat.search(src)
- if match:
- author = match.group(1)
- break
- continue
-
- ent_pat = re.compile('&(\\S+)?;')
- if title:
- title = ent_pat.sub(entity_to_unicode, title)
-
- if author:
- author = ent_pat.sub(entity_to_unicode, author)
-
- mi = None(MetaInformation, title if author else None)
- publisher = None
- pat = re.compile('<!--.*?PUBLISHER=(?P<q>[\\\'"])(.+?)(?P=q).*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- publisher = match.group(2)
- else:
- for x in ('Publisher', 'DC.publisher', 'DCTERMS.publisher'):
- pat = get_meta_regexp_(x)
- match = pat.search(src)
- if match:
- publisher = match.group(1)
- break
- continue
-
- if publisher:
- mi.publisher = ent_pat.sub(entity_to_unicode, publisher)
-
- isbn = None
- pat = re.compile('<!--.*?ISBN=[\\\'"]([^"\\\']+)[\\\'"].*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- isbn = match.group(1)
- else:
- for x in ('ISBN', 'DC.identifier.ISBN', 'DCTERMS.identifier.ISBN'):
- pat = get_meta_regexp_(x)
- match = pat.search(src)
- if match:
- isbn = match.group(1)
- break
- continue
-
- if isbn:
- mi.isbn = re.sub('[^0-9xX]', '', isbn)
-
- language = None
- pat = re.compile('<!--.*?LANGUAGE=[\\\'"]([^"\\\']+)[\\\'"].*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- language = match.group(1)
- else:
- for x in ('DC.language', 'DCTERMS.language'):
- pat = get_meta_regexp_(x)
- match = pat.search(src)
- if match:
- language = match.group(1)
- break
- continue
-
- if language:
- mi.language = language
-
- pubdate = None
- pat = re.compile('<!--.*?PUBDATE=[\\\'"]([^"\\\']+)[\\\'"].*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- pubdate = match.group(1)
- else:
- for x in ('Pubdate', 'Date of publication', 'DC.date.published', 'DC.date.publication', 'DC.date.issued', 'DCTERMS.issued'):
- pat = get_meta_regexp_(x)
- match = pat.search(src)
- if match:
- pubdate = match.group(1)
- break
- continue
-
- if pubdate:
-
- try:
- mi.pubdate = parse_date(pubdate)
-
-
- timestamp = None
- pat = re.compile('<!--.*?TIMESTAMP=[\\\'"]([^"\\\']+)[\\\'"].*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- timestamp = match.group(1)
- else:
- for x in ('Timestamp', 'Date of creation', 'DC.date.created', 'DC.date.creation', 'DCTERMS.created'):
- pat = get_meta_regexp_(x)
- match = pat.search(src)
- if match:
- timestamp = match.group(1)
- break
- continue
-
- if timestamp:
-
- try:
- mi.timestamp = parse_date(timestamp)
-
-
- series = None
- pat = re.compile('<!--.*?SERIES=[\\\'"]([^"\\\']+)[\\\'"].*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- series = match.group(1)
- else:
- pat = get_meta_regexp_('Series')
- match = pat.search(src)
- if match:
- series = match.group(1)
-
- if series:
- mi.series = ent_pat.sub(entity_to_unicode, series)
-
- rating = None
- pat = re.compile('<!--.*?RATING=[\\\'"]([^"\\\']+)[\\\'"].*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- rating = match.group(1)
- else:
- pat = get_meta_regexp_('Rating')
- match = pat.search(src)
- if match:
- rating = match.group(1)
-
- if rating:
-
- try:
- mi.rating = float(rating)
- if mi.rating < 0:
- mi.rating = 0
-
- if mi.rating > 5:
- mi.rating /= 2
-
- if mi.rating > 5:
- mi.rating = 0
-
-
- comments = None
- pat = re.compile('<!--.*?COMMENTS=[\\\'"]([^"\\\']+)[\\\'"].*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- comments = match.group(1)
- else:
- pat = get_meta_regexp_('Comments')
- match = pat.search(src)
- if match:
- comments = match.group(1)
-
- if comments:
- mi.comments = ent_pat.sub(entity_to_unicode, comments)
-
- tags = None
- pat = re.compile('<!--.*?TAGS=[\\\'"]([^"\\\']+)[\\\'"].*?-->', re.DOTALL)
- match = pat.search(src)
- if match:
- tags = match.group(1)
- else:
- pat = get_meta_regexp_('Tags')
- match = pat.search(src)
- if match:
- tags = match.group(1)
-
- return mi
-
-