home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- __license__ = 'GPL v3'
- __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
- __docformat__ = 'restructuredtext en'
- import re
- import sys
- import unittest
- import functools
- import os
- import mimetypes
- import uuid
- import glob
- import cStringIO
- from urllib import unquote
- from urlparse import urlparse
- from lxml import etree
- from calibre.ebooks.chardet import xml_to_unicode
- from calibre.constants import __appname__, __version__, filesystem_encoding
- from calibre.ebooks.metadata.toc import TOC
- from calibre.ebooks.metadata import MetaInformation, string_to_authors
- from calibre.utils.date import parse_date, isoformat
- from calibre.utils.localization import get_lang
-
- class Resource(object):
-
- def __init__(self, href_or_path, basedir = os.getcwd(), is_path = True):
- self.orig = href_or_path
- self._href = None
- self._basedir = basedir
- self.path = None
- self.fragment = ''
-
- try:
- self.mime_type = mimetypes.guess_type(href_or_path)[0]
- except:
- self.mime_type = None
-
- if self.mime_type is None:
- self.mime_type = 'application/octet-stream'
-
- if is_path:
- path = href_or_path
- if not os.path.isabs(path):
- path = os.path.abspath(os.path.join(basedir, path))
-
- if isinstance(path, str):
- path = path.decode(sys.getfilesystemencoding())
-
- self.path = path
- else:
- href_or_path = href_or_path
- url = urlparse(href_or_path)
- if url[0] not in ('', 'file'):
- self._href = href_or_path
- else:
- pc = url[2]
- if isinstance(pc, unicode):
- pc = pc.encode('utf-8')
-
- pc = pc.decode('utf-8')
- self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
- self.fragment = url[-1]
-
-
- def href(self, basedir = None):
- if basedir is None:
- if self._basedir:
- basedir = self._basedir
- else:
- basedir = os.getcwd()
-
- if self.path is None:
- return self._href
- f = self.path is None if isinstance(self.fragment, unicode) else self.fragment
- frag = None if self.fragment else ''
- if self.path == basedir:
- return '' + frag
-
- try:
- rpath = os.path.relpath(self.path, basedir)
- except ValueError:
- self.path == basedir
- self.path == basedir
- rpath = self.path
- except:
- self.path == basedir
-
- if isinstance(rpath, unicode):
- rpath = rpath.encode('utf-8')
-
- return rpath.replace(os.sep, '/') + frag
-
-
- def set_basedir(self, path):
- self._basedir = path
-
-
- def basedir(self):
- return self._basedir
-
-
- def __repr__(self):
- return 'Resource(%s, %s)' % (repr(self.path), repr(self.href()))
-
-
-
- class ResourceCollection(object):
-
- def __init__(self):
- self._resources = []
-
-
- def __iter__(self):
- for r in self._resources:
- yield r
-
-
-
- def __len__(self):
- return len(self._resources)
-
-
- def __getitem__(self, index):
- return self._resources[index]
-
-
- def __bool__(self):
- return len(self._resources) > 0
-
-
- def __str__(self):
- resources = map(repr, self)
- return '[%s]' % ', '.join(resources)
-
-
- def __repr__(self):
- return str(self)
-
-
- def append(self, resource):
- if not isinstance(resource, Resource):
- raise ValueError('Can only append objects of type Resource')
- isinstance(resource, Resource)
- self._resources.append(resource)
-
-
- def remove(self, resource):
- self._resources.remove(resource)
-
-
- def replace(self, start, end, items):
- self._resources[start:end] = items
-
-
- def from_directory_contents(top, topdown = True):
- collection = ResourceCollection()
- for spec in os.walk(top, topdown = topdown):
- path = os.path.abspath(os.path.join(spec[0], spec[1]))
- res = Resource.from_path(path)
- res.set_basedir(top)
- collection.append(res)
-
- return collection
-
- from_directory_contents = staticmethod(from_directory_contents)
-
- def set_basedir(self, path):
- for res in self:
- res.set_basedir(path)
-
-
-
-
- class ManifestItem(Resource):
-
- def from_opf_manifest_item(item, basedir):
- href = item.get('href', None)
- if href:
- res = ManifestItem(href, basedir = basedir, is_path = True)
- mt = item.get('media-type', '').strip()
- if mt:
- res.mime_type = mt
-
- return res
-
- from_opf_manifest_item = staticmethod(from_opf_manifest_item)
-
- def media_type(self):
-
- def fget(self):
- return self.mime_type
-
-
- def fset(self, val):
- self.mime_type = val
-
- return property(fget = fget, fset = fset)
-
- media_type = dynamic_property(media_type)
-
- def __unicode__(self):
- return u'<item id="%s" href="%s" media-type="%s" />' % (self.id, self.href(), self.media_type)
-
-
- def __str__(self):
- return unicode(self).encode('utf-8')
-
-
- def __repr__(self):
- return unicode(self)
-
-
- def __getitem__(self, index):
- if index == 0:
- return self.href()
- if index == 1:
- return self.media_type
- raise IndexError('%d out of bounds.' % index)
-
-
-
- class Manifest(ResourceCollection):
-
- def from_opf_manifest_element(items, dir):
- m = Manifest()
- for item in items:
-
- try:
- m.append(ManifestItem.from_opf_manifest_item(item, dir))
- id = item.get('id', '')
- if not id:
- id = 'id%d' % m.next_id
-
- m[-1].id = id
- m.next_id += 1
- continue
- except ValueError:
- continue
- continue
-
-
-
- return m
-
- from_opf_manifest_element = staticmethod(from_opf_manifest_element)
-
- def from_paths(entries):
- m = Manifest()
- for path, mt in entries:
- mi = ManifestItem(path, is_path = True)
- if mt:
- mi.mime_type = mt
-
- mi.id = 'id%d' % m.next_id
- m.next_id += 1
- m.append(mi)
-
- return m
-
- from_paths = staticmethod(from_paths)
-
- def add_item(self, path, mime_type = None):
- mi = ManifestItem(path, is_path = True)
- if mime_type:
- mi.mime_type = mime_type
-
- mi.id = 'id%d' % self.next_id
- self.next_id += 1
- self.append(mi)
- return mi.id
-
-
- def __init__(self):
- ResourceCollection.__init__(self)
- self.next_id = 1
-
-
- def item(self, id):
- for i in self:
- if i.id == id:
- return i
-
-
-
- def id_for_path(self, path):
- path = os.path.normpath(os.path.abspath(path))
- for i in self:
- if i.path and os.path.normpath(i.path) == path:
- return i.id
-
-
-
- def path_for_id(self, id):
- for i in self:
- if i.id == id:
- return i.path
-
-
-
- def type_for_id(self, id):
- for i in self:
- if i.id == id:
- return i.mime_type
-
-
-
-
- class Spine(ResourceCollection):
-
- class Item(Resource):
-
- def __init__(self, idfunc, *args, **kwargs):
- Resource.__init__(self, *args, **kwargs)
- self.is_linear = True
- self.id = idfunc(self.path)
- self.idref = None
-
-
- def __repr__(self):
- return 'Spine.Item(path=%r, id=%s, is_linear=%s)' % (self.path, self.id, self.is_linear)
-
-
-
- def from_opf_spine_element(itemrefs, manifest):
- s = Spine(manifest)
- for itemref in itemrefs:
- idref = itemref.get('idref', None)
- if idref is not None:
- path = s.manifest.path_for_id(idref)
- if path:
- r = (Spine.Item,)((lambda x: idref), path, is_path = True)
- r.is_linear = itemref.get('linear', 'yes') == 'yes'
- r.idref = idref
- s.append(r)
-
- path
-
- return s
-
- from_opf_spine_element = staticmethod(from_opf_spine_element)
-
- def from_paths(paths, manifest):
- s = Spine(manifest)
- for path in paths:
-
- try:
- s.append(Spine.Item(s.manifest.id_for_path, path, is_path = True))
- continue
- continue
- continue
-
-
- return s
-
- from_paths = staticmethod(from_paths)
-
- def __init__(self, manifest):
- ResourceCollection.__init__(self)
- self.manifest = manifest
-
-
- def replace(self, start, end, ids):
- items = []
- for path in ids:
- id = None
- if path is None:
- raise ValueError('id %s not in manifest')
- path is None
- items.append((Spine.Item,)((lambda x: id), path, is_path = True))
-
- ResourceCollection.replace(start, end, items)
-
-
- def linear_items(self):
- for r in self:
- if r.is_linear:
- yield r.path
- continue
-
-
-
- def nonlinear_items(self):
- for r in self:
- if not r.is_linear:
- yield r.path
- continue
-
-
-
- def items(self):
- for i in self:
- yield i.path
-
-
-
-
- class Guide(ResourceCollection):
-
- class Reference(Resource):
-
- def from_opf_resource_item(ref, basedir):
- title = ref.get('title', '')
- href = ref.get('href')
- type = ref.get('type')
- res = Guide.Reference(href, basedir, is_path = True)
- res.title = title
- res.type = type
- return res
-
- from_opf_resource_item = staticmethod(from_opf_resource_item)
-
- def __repr__(self):
- ans = '<reference type="%s" href="%s" ' % (self.type, self.href())
- if self.title:
- ans += 'title="%s" ' % self.title
-
- return ans + '/>'
-
-
-
- def from_opf_guide(references, base_dir = os.getcwdu()):
- coll = Guide()
- for ref in references:
-
- try:
- ref = Guide.Reference.from_opf_resource_item(ref, base_dir)
- coll.append(ref)
- continue
- continue
- continue
-
-
- return coll
-
- from_opf_guide = staticmethod(from_opf_guide)
-
- def set_cover(self, path):
- []([], _[1])
- for type in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
- self.append(Guide.Reference(path, is_path = True))
- self[-1].type = type
- self[-1].title = ''
-
-
-
-
- class MetadataField(object):
-
- def __init__(self, name, is_dc = True, formatter = None, none_is = None):
- self.name = name
- self.is_dc = is_dc
- self.formatter = formatter
- self.none_is = none_is
-
-
- def __real_get__(self, obj, type = None):
- ans = obj.get_metadata_element(self.name)
- if ans is None:
- return None
- ans = obj.get_text(ans)
- if ans is None:
- return ans
- if hasattr(ans, 'strip'):
- ans = ans.strip()
-
- return ans
-
-
- def __get__(self, obj, type = None):
- ans = self.__real_get__(obj, type)
- if ans is None:
- ans = self.none_is
-
- return ans
-
-
- def __set__(self, obj, val):
- elem = obj.get_metadata_element(self.name)
- if val is None:
- if elem is not None:
- elem.getparent().remove(elem)
-
- return None
- if elem is None:
- elem = obj.create_metadata_element(self.name, is_dc = self.is_dc)
-
- obj.set_text(elem, unicode(val))
-
-
-
- class OPF(object):
- MIMETYPE = 'application/oebps-package+xml'
- PARSER = etree.XMLParser(recover = True)
- NAMESPACES = {
- None: 'http://www.idpf.org/2007/opf',
- 'dc': 'http://purl.org/dc/elements/1.1/',
- 'opf': 'http://www.idpf.org/2007/opf' }
- META = '{%s}meta' % NAMESPACES['opf']
- xpn = NAMESPACES.copy()
- xpn.pop(None)
- xpn['re'] = 'http://exslt.org/regular-expressions'
- XPath = functools.partial(etree.XPath, namespaces = xpn)
- CONTENT = XPath('self::*[re:match(name(), "meta$", "i")]/@content')
- TEXT = XPath('string()')
- metadata_path = XPath('descendant::*[re:match(name(), "metadata", "i")]')
- metadata_elem_path = XPath('descendant::*[re:match(name(), concat($name, "$"), "i") or (re:match(name(), "meta$", "i") and re:match(@name, concat("^calibre:", $name, "$"), "i"))]')
- title_path = XPath('descendant::*[re:match(name(), "title", "i")]')
- authors_path = XPath('descendant::*[re:match(name(), "creator", "i") and (@role="aut" or @opf:role="aut" or (not(@role) and not(@opf:role)))]')
- bkp_path = XPath('descendant::*[re:match(name(), "contributor", "i") and (@role="bkp" or @opf:role="bkp")]')
- tags_path = XPath('descendant::*[re:match(name(), "subject", "i")]')
- isbn_path = XPath('descendant::*[re:match(name(), "identifier", "i") and ' + '(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]')
- raster_cover_path = XPath('descendant::*[re:match(name(), "meta", "i") and ' + 're:match(@name, "cover", "i") and @content]')
- identifier_path = XPath('descendant::*[re:match(name(), "identifier", "i")]')
- application_id_path = XPath('descendant::*[re:match(name(), "identifier", "i") and ' + '(re:match(@opf:scheme, "calibre|libprs500", "i") or re:match(@scheme, "calibre|libprs500", "i"))]')
- uuid_id_path = XPath('descendant::*[re:match(name(), "identifier", "i") and ' + '(re:match(@opf:scheme, "uuid", "i") or re:match(@scheme, "uuid", "i"))]')
- manifest_path = XPath('descendant::*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]')
- manifest_ppath = XPath('descendant::*[re:match(name(), "manifest", "i")]')
- spine_path = XPath('descendant::*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]')
- guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
- title = MetadataField('title', formatter = (lambda x: re.sub('\\s+', ' ', x)))
- publisher = MetadataField('publisher')
- language = MetadataField('language')
- comments = MetadataField('description')
- category = MetadataField('type')
- rights = MetadataField('rights')
- series = MetadataField('series', is_dc = False)
- series_index = MetadataField('series_index', is_dc = False, formatter = float, none_is = 1)
- rating = MetadataField('rating', is_dc = False, formatter = int)
- pubdate = MetadataField('date', formatter = parse_date)
- publication_type = MetadataField('publication_type', is_dc = False)
- timestamp = MetadataField('timestamp', is_dc = False, formatter = parse_date)
-
- def __init__(self, stream, basedir = os.getcwdu(), unquote_urls = True, populate_spine = True):
- if not hasattr(stream, 'read'):
- stream = open(stream, 'rb')
-
- raw = stream.read()
- if not raw:
- raise ValueError('Empty file: ' + getattr(stream, 'name', 'stream'))
- raw
- self.basedir = self.base_dir = basedir
- self.path_to_html_toc = None
- self.html_toc_fragment = None
- (raw, self.encoding) = xml_to_unicode(raw, strip_encoding_pats = True, resolve_entities = True, assume_utf8 = True)
- raw = raw[raw.find('<'):]
- self.root = etree.fromstring(raw, self.PARSER)
- self.metadata = self.metadata_path(self.root)
- if not self.metadata:
- raise ValueError('Malformed OPF file: No <metadata> element')
- self.metadata
- self.metadata = self.metadata[0]
- if unquote_urls:
- self.unquote_urls()
-
- self.manifest = Manifest()
- m = self.manifest_path(self.root)
- if m:
- self.manifest = Manifest.from_opf_manifest_element(m, basedir)
-
- self.spine = None
- s = self.spine_path(self.root)
- if populate_spine and s:
- self.spine = Spine.from_opf_spine_element(s, self.manifest)
-
- self.guide = None
- guide = self.guide_path(self.root)
- self.guide = None if guide else None
- self.cover_data = (None, None)
- self.find_toc()
-
-
- def find_toc(self):
- self.toc = None
-
- try:
- spine = self.XPath('descendant::*[re:match(name(), "spine", "i")]')(self.root)
- toc = None
- if spine:
- spine = spine[0]
- toc = spine.get('toc', None)
-
- if toc is None and self.guide:
- for item in self.guide:
- if item.type and item.type.lower() == 'toc':
- toc = item.path
- continue
-
-
- if toc is None:
- for item in self.manifest:
- if 'toc' in item.href().lower():
- toc = item.path
- continue
-
-
- if toc is None:
- return None
- self.toc = TOC(base_path = self.base_dir)
- if getattr(self, 'manifest', None) is not None and self.manifest.type_for_id(toc) is not None:
- pass
- is_ncx = 'dtbncx' in self.manifest.type_for_id(toc)
- if is_ncx or toc.lower() in ('ncx', 'ncxtoc'):
- path = self.manifest.path_for_id(toc)
- if path:
- self.toc.read_ncx_toc(path)
- else:
- f = glob.glob(os.path.join(self.base_dir, '*.ncx'))
- if f:
- self.toc.read_ncx_toc(f[0])
-
- else:
- self.path_to_html_toc = toc.partition('#')[0]
- self.html_toc_fragment = toc.partition('#')[-1]
- if not os.access(self.path_to_html_toc, os.R_OK) or not os.path.isfile(self.path_to_html_toc):
- self.path_to_html_toc = None
-
- self.toc.read_html_toc(toc)
- except:
- pass
-
-
-
- def get_text(self, elem):
- if not self.CONTENT(elem):
- pass
- return u''.join(self.TEXT(elem))
-
-
- def set_text(self, elem, content):
- if elem.tag == self.META:
- elem.attrib['content'] = content
- else:
- elem.text = content
-
-
- def itermanifest(self):
- return self.manifest_path(self.root)
-
-
- def create_manifest_item(self, href, media_type):
- ids = [ i.get('id', None) for i in self.itermanifest() ]
- id = None
- for c in xrange(1, sys.maxint):
- id = 'id%d' % c
- if id not in ids:
- break
- continue
- []
-
- if not media_type:
- media_type = 'application/xhtml+xml'
-
- ans = etree.Element('{%s}item' % self.NAMESPACES['opf'], attrib = {
- 'id': id,
- 'href': href,
- 'media-type': media_type })
- ans.tail = '\n\t\t'
- return ans
-
-
- def replace_manifest_item(self, item, items):
- items = [ self.create_manifest_item(*i) for i in items ]
- for i, item2 in enumerate(items):
- item2.set('id', item.get('id') + '.%d' % (i + 1))
-
- manifest = item.getparent()
- index = manifest.index(item)
- manifest[index:index + 1] = items
- return [ i.get('id') for i in items ]
-
-
- def add_path_to_manifest(self, path, media_type):
- has_path = False
- path = os.path.abspath(path)
- for i in self.itermanifest():
- xpath = os.path.join(self.base_dir, *i.get('href', '').split('/'))
- if os.path.abspath(xpath) == path:
- has_path = True
- break
- continue
-
- if not has_path:
- href = os.path.relpath(path, self.base_dir).replace(os.sep, '/')
- item = self.create_manifest_item(href, media_type)
- manifest = self.manifest_ppath(self.root)[0]
- manifest.append(item)
-
-
-
- def iterspine(self):
- return self.spine_path(self.root)
-
-
- def spine_items(self):
- for item in self.iterspine():
- idref = item.get('idref', '')
- for x in self.itermanifest():
- if x.get('id', None) == idref:
- yield x.get('href', '')
- continue
-
-
-
-
- def first_spine_item(self):
- items = self.iterspine()
- if not items:
- return None
- idref = items[0].get('idref', '')
- for x in self.itermanifest():
- if x.get('id', None) == idref:
- return x.get('href', None)
-
-
-
- def create_spine_item(self, idref):
- ans = etree.Element('{%s}itemref' % self.NAMESPACES['opf'], idref = idref)
- ans.tail = '\n\t\t'
- return ans
-
-
- def replace_spine_items_by_idref(self, idref, new_idrefs):
- items = list(map(self.create_spine_item, new_idrefs))
- spine = self.XPath('/opf:package/*[re:match(name(), "spine", "i")]')(self.root)[0]
- old = _[1]
- for x in old:
- i = spine.index(x)
- spine[i:i + 1] = items
-
-
-
- def create_guide_element(self):
- e = etree.SubElement(self.root, '{%s}guide' % self.NAMESPACES['opf'])
- e.text = '\n '
- e.tail = '\n'
- return e
-
-
- def remove_guide(self):
- self.guide = None
- for g in self.root.xpath('./*[re:match(name(), "guide", "i")]', namespaces = {
- 're': 'http://exslt.org/regular-expressions' }):
- self.root.remove(g)
-
-
-
- def create_guide_item(self, type, title, href):
- e = etree.Element('{%s}reference' % self.NAMESPACES['opf'], type = type, title = title, href = href)
- e.tail = '\n'
- return e
-
-
- def add_guide_item(self, type, title, href):
- g = self.root.xpath('./*[re:match(name(), "guide", "i")]', namespaces = {
- 're': 'http://exslt.org/regular-expressions' })[0]
- g.append(self.create_guide_item(type, title, href))
-
-
- def iterguide(self):
- return self.guide_path(self.root)
-
-
- def unquote_urls(self):
-
- def get_href(item):
- raw = unquote(item.get('href', ''))
- if not isinstance(raw, unicode):
- raw = raw.decode('utf-8')
-
- return raw
-
- for item in self.itermanifest():
- item.set('href', get_href(item))
-
- for item in self.iterguide():
- item.set('href', get_href(item))
-
-
-
- def authors(self):
-
- def fget(self):
- ans = []
- for elem in self.authors_path(self.metadata):
- ans.extend(string_to_authors(self.get_text(elem)))
-
- return ans
-
-
- def fset(self, val):
- remove = list(self.authors_path(self.metadata))
- for elem in remove:
- elem.getparent().remove(elem)
-
- elems = []
- for author in val:
- attrib = {
- '{%s}role' % self.NAMESPACES['opf']: 'aut' }
- elem = self.create_metadata_element('creator', attrib = attrib)
- self.set_text(elem, author.strip())
- elems.append(elem)
-
- for elem in reversed(elems):
- parent = elem.getparent()
- parent.remove(elem)
- parent.insert(0, elem)
-
-
- return property(fget = fget, fset = fset)
-
- authors = dynamic_property(authors)
-
- def author_sort(self):
-
- def fget(self):
- matches = self.authors_path(self.metadata)
- if matches:
- for match in matches:
- ans = match.get('{%s}file-as' % self.NAMESPACES['opf'], None)
- if not ans:
- ans = match.get('file-as', None)
-
- if ans:
- return ans
-
-
-
-
- def fset(self, val):
- matches = self.authors_path(self.metadata)
- if matches:
- for key in matches[0].attrib:
- if key.endswith('file-as'):
- matches[0].attrib.pop(key)
- continue
-
- matches[0].set('{%s}file-as' % self.NAMESPACES['opf'], unicode(val))
-
-
- return property(fget = fget, fset = fset)
-
- author_sort = dynamic_property(author_sort)
-
- def title_sort(self):
-
- def fget(self):
- matches = self.title_path(self.metadata)
- if matches:
- for match in matches:
- ans = match.get('{%s}file-as' % self.NAMESPACES['opf'], None)
- if not ans:
- ans = match.get('file-as', None)
-
- if ans:
- return ans
-
-
-
-
- def fset(self, val):
- matches = self.title_path(self.metadata)
- if matches:
- for key in matches[0].attrib:
- if key.endswith('file-as'):
- matches[0].attrib.pop(key)
- continue
-
- matches[0].set('{%s}file-as' % self.NAMESPACES['opf'], unicode(val))
-
-
- return property(fget = fget, fset = fset)
-
- title_sort = dynamic_property(title_sort)
-
- def tags(self):
-
- def fget(self):
- ans = []
- for tag in self.tags_path(self.metadata):
- text = self.get_text(tag)
- if text and text.strip():
- []([ x.strip() for x in text.split(',') ])
- continue
- []
-
- return ans
-
-
- def fset(self, val):
- for tag in list(self.tags_path(self.metadata)):
- tag.getparent().remove(tag)
-
- for tag in val:
- elem = self.create_metadata_element('subject')
- self.set_text(elem, unicode(tag))
-
-
- return property(fget = fget, fset = fset)
-
- tags = dynamic_property(tags)
-
- def isbn(self):
-
- def fget(self):
- for match in self.isbn_path(self.metadata):
- if not self.get_text(match):
- pass
- return None
-
-
-
- def fset(self, val):
- matches = self.isbn_path(self.metadata)
- if val is None:
- if matches:
- for x in matches:
- x.getparent().remove(x)
-
- return None
-
- if not matches:
- attrib = {
- '{%s}scheme' % self.NAMESPACES['opf']: 'ISBN' }
- matches = [
- self.create_metadata_element('identifier', attrib = attrib)]
-
- self.set_text(matches[0], unicode(val))
-
- return property(fget = fget, fset = fset)
-
- isbn = dynamic_property(isbn)
-
- def application_id(self):
-
- def fget(self):
- for match in self.application_id_path(self.metadata):
- if not self.get_text(match):
- pass
- return None
-
-
-
- def fset(self, val):
- matches = self.application_id_path(self.metadata)
- if not matches:
- attrib = {
- '{%s}scheme' % self.NAMESPACES['opf']: 'calibre' }
- matches = [
- self.create_metadata_element('identifier', attrib = attrib)]
-
- self.set_text(matches[0], unicode(val))
-
- return property(fget = fget, fset = fset)
-
- application_id = dynamic_property(application_id)
-
- def uuid(self):
-
- def fget(self):
- for match in self.uuid_id_path(self.metadata):
- if not self.get_text(match):
- pass
- return None
-
-
-
- def fset(self, val):
- matches = self.uuid_id_path(self.metadata)
- if not matches:
- attrib = {
- '{%s}scheme' % self.NAMESPACES['opf']: 'uuid' }
- matches = [
- self.create_metadata_element('identifier', attrib = attrib)]
-
- self.set_text(matches[0], unicode(val))
-
- return property(fget = fget, fset = fset)
-
- uuid = dynamic_property(uuid)
-
- def book_producer(self):
-
- def fget(self):
- for match in self.bkp_path(self.metadata):
- if not self.get_text(match):
- pass
- return None
-
-
-
- def fset(self, val):
- matches = self.bkp_path(self.metadata)
- if not matches:
- attrib = {
- '{%s}role' % self.NAMESPACES['opf']: 'bkp' }
- matches = [
- self.create_metadata_element('contributor', attrib = attrib)]
-
- self.set_text(matches[0], unicode(val))
-
- return property(fget = fget, fset = fset)
-
- book_producer = dynamic_property(book_producer)
-
- def identifier_iter(self):
- for item in self.identifier_path(self.metadata):
- yield item
-
-
-
- def guess_cover(self):
- if self.base_dir and os.path.exists(self.base_dir):
- for item in self.identifier_path(self.metadata):
- scheme = None
- for key in item.attrib.keys():
- if key.endswith('scheme'):
- scheme = item.get(key)
- break
- continue
-
- if scheme is None:
- continue
-
- if item.text:
- prefix = item.text.replace('-', '')
- for suffix in [
- '.jpg',
- '.jpeg',
- '.gif',
- '.png',
- '.bmp']:
- cpath = os.access(os.path.join(self.base_dir, prefix + suffix), os.R_OK)
- if os.access(os.path.join(self.base_dir, prefix + suffix), os.R_OK):
- return cpath
-
- os.access(os.path.join(self.base_dir, prefix + suffix), os.R_OK)
-
-
-
-
- def raster_cover(self):
- covers = self.raster_cover_path(self.metadata)
- if covers:
- cover_id = covers[0].get('content')
- for item in self.itermanifest():
- if item.get('id', None) == cover_id:
- return item.get('href', None)
-
-
-
- raster_cover = property(raster_cover)
-
- def cover(self):
-
- def fget(self):
- if self.guide is not None:
- for t in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
- for item in self.guide:
- if item.type.lower() == t:
- return item.path
-
-
-
-
- try:
- return self.guess_cover()
- except:
- pass
-
-
-
- def fset(self, path):
- if self.guide is not None:
- self.guide.set_cover(path)
- for item in list(self.iterguide()):
- if 'cover' in item.get('type', ''):
- item.getparent().remove(item)
- continue
-
- else:
- g = self.create_guide_element()
- self.guide = Guide()
- self.guide.set_cover(path)
- etree.SubElement(g, 'opf:reference', nsmap = self.NAMESPACES, attrib = {
- 'type': 'cover',
- 'href': self.guide[-1].href() })
- id = self.manifest.id_for_path(self.cover)
- if id is None:
- for t in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
- for item in self.guide:
- if item.type.lower() == t:
- self.create_manifest_item(item.href(), mimetypes.guess_type(path)[0])
- continue
-
-
-
-
- return property(fget = fget, fset = fset)
-
- cover = dynamic_property(cover)
-
- def get_metadata_element(self, name):
- matches = self.metadata_elem_path(self.metadata, name = name)
- if matches:
- return matches[-1]
-
-
- def create_metadata_element(self, name, attrib = None, is_dc = True):
- if is_dc:
- name = '{%s}%s' % (self.NAMESPACES['dc'], name)
- elif not attrib:
- pass
- attrib = { }
- attrib['name'] = 'calibre:' + name
- name = '{%s}%s' % (self.NAMESPACES['opf'], 'meta')
- elem = etree.SubElement(self.metadata, name, attrib = attrib, nsmap = self.NAMESPACES)
- elem.tail = '\n'
- return elem
-
-
- def render(self, encoding = 'utf-8'):
- raw = etree.tostring(self.root, encoding = encoding, pretty_print = True)
- if not raw.lstrip().startswith('<?xml '):
- raw = '<?xml version="1.0" encoding="%s"?>\n' % encoding.upper() + raw
-
- return raw
-
-
- def smart_update(self, mi, replace_metadata = False):
- for attr in ('title', 'authors', 'author_sort', 'title_sort', 'publisher', 'series', 'series_index', 'rating', 'isbn', 'language', 'tags', 'category', 'comments', 'pubdate'):
- val = getattr(mi, attr, None)
- if val is not None and val != [] and val != (None, None):
- setattr(self, attr, val)
- continue
-
-
-
-
- class OPFCreator(MetaInformation):
-
- def __init__(self, base_path, *args, **kwargs):
- MetaInformation.__init__(self, *args, **kwargs)
- self.base_path = os.path.abspath(base_path)
- if self.application_id is None:
- self.application_id = str(uuid.uuid4())
-
- if not isinstance(self.toc, TOC):
- self.toc = None
-
- if not self.authors:
- self.authors = [
- _('Unknown')]
-
- if self.guide is None:
- self.guide = Guide()
-
- if self.cover:
- self.guide.set_cover(self.cover)
-
-
-
- def create_manifest(self, entries):
- entries = (map,)((lambda x: if os.path.isabs(x[0]):
- x(os.path.abspath(os.path.join(self.base_path, x[0])), x[1])), entries)
- self.manifest = Manifest.from_paths(entries)
- self.manifest.set_basedir(self.base_path)
-
-
- def create_manifest_from_files_in(self, files_and_dirs):
- entries = []
-
- def dodir(dir):
- for spec in os.walk(dir):
- root = spec[0]
- files = spec[-1]
- for name in files:
- path = os.path.join(root, name)
- if os.path.isfile(path):
- entries.append((path, None))
- continue
-
-
-
- for i in files_and_dirs:
- if os.path.isdir(i):
- dodir(i)
- continue
- (None,)
- entries.append((i, None))
-
- self.create_manifest(entries)
-
-
- def create_spine(self, entries):
- entries = (map,)((lambda x: if os.path.isabs(x):
- xos.path.abspath(os.path.join(self.base_path, x))), entries)
- self.spine = Spine.from_paths(entries, self.manifest)
-
-
- def set_toc(self, toc):
- self.toc = toc
-
-
- def create_guide(self, guide_element):
- self.guide = Guide.from_opf_guide(guide_element, self.base_path)
- self.guide.set_basedir(self.base_path)
-
-
- def render(self, opf_stream = sys.stdout, ncx_stream = None, ncx_manifest_entry = None, encoding = None):
- if encoding is None:
- encoding = 'utf-8'
-
- toc = getattr(self, 'toc', None)
- if self.manifest:
- self.manifest.set_basedir(self.base_path)
- if ncx_manifest_entry is not None and toc is not None:
- if not os.path.isabs(ncx_manifest_entry):
- ncx_manifest_entry = os.path.join(self.base_path, ncx_manifest_entry)
-
- remove = _[1]
- for item in remove:
- self.manifest.remove(item)
-
- self.manifest.append(ManifestItem(ncx_manifest_entry, self.base_path))
- self.manifest[-1].id = 'ncx'
- self.manifest[-1].mime_type = 'application/x-dtbncx+xml'
-
-
- if self.guide is None:
- self.guide = Guide()
-
- if self.cover:
- cover = self.cover
- if not os.path.isabs(cover):
- cover = os.path.abspath(os.path.join(self.base_path, cover))
-
- self.guide.set_cover(cover)
-
- self.guide.set_basedir(self.base_path)
- ElementMaker = ElementMaker
- import lxml.builder
- OPF2_NS = OPF2_NS
- DC11_NS = DC11_NS
- CALIBRE_NS = CALIBRE_NS
- import calibre.ebooks.oeb.base
- DNS = OPF2_NS + '___xx___'
- E = ElementMaker(namespace = DNS, nsmap = {
- None: DNS })
- M = ElementMaker(namespace = DNS, nsmap = {
- 'dc': DC11_NS,
- 'calibre': CALIBRE_NS,
- 'opf': OPF2_NS })
- DC = ElementMaker(namespace = DC11_NS)
-
- def DC_ELEM(tag, text, dc_attrs = None, opf_attrs = ({ }, { })):
- if text:
- elem = getattr(DC, tag)(text, **dc_attrs)
- else:
- elem = getattr(DC, tag)(**dc_attrs)
- for k, v in opf_attrs.items():
- elem.set('{%s}%s' % (OPF2_NS, k), v)
-
- return elem
-
-
- def CAL_ELEM(name, content):
- return M.meta(name = name, content = content)
-
- metadata = M.metadata()
- a = metadata.append
- role = { }
- if self.title_sort:
- role = {
- 'file-as': self.title_sort }
-
- None(a(DC_ELEM, 'title' if self.title else _('Unknown'), opf_attrs = role))
- for i, author in enumerate(self.authors):
- fa = {
- 'role': 'aut' }
- if i == 0 and self.author_sort:
- fa['file-as'] = self.author_sort
-
- a(DC_ELEM('creator', author, opf_attrs = fa))
-
- a(DC_ELEM('contributor', '%s (%s) [%s]' % (__appname__, __version__, 'http://calibre-ebook.com'), opf_attrs = {
- 'role': 'bkp',
- 'file-as': __appname__ }))
- a(DC_ELEM('identifier', str(self.application_id), opf_attrs = {
- 'scheme': __appname__ }, dc_attrs = {
- 'id': __appname__ + '_id' }))
- if getattr(self, 'pubdate', None) is not None:
- a(DC_ELEM('date', self.pubdate.isoformat()))
-
- lang = self.language
- if not lang or lang.lower() == 'und':
- lang = get_lang().replace('_', '-')
-
- a(DC_ELEM('language', lang))
- if self.comments:
- a(DC_ELEM('description', self.comments))
-
- if self.publisher:
- a(DC_ELEM('publisher', self.publisher))
-
- if self.isbn:
- a(DC_ELEM('identifier', self.isbn, opf_attrs = {
- 'scheme': 'ISBN' }))
-
- if self.rights:
- a(DC_ELEM('rights', self.rights))
-
- if self.tags:
- for tag in self.tags:
- a(DC_ELEM('subject', tag))
-
-
- if self.series:
- a(CAL_ELEM('calibre:series', self.series))
- if self.series_index is not None:
- a(CAL_ELEM('calibre:series_index', self.format_series_index()))
-
-
- if self.rating is not None:
- a(CAL_ELEM('calibre:rating', str(self.rating)))
-
- if self.timestamp is not None:
- a(CAL_ELEM('calibre:timestamp', self.timestamp.isoformat()))
-
- if self.publication_type is not None:
- a(CAL_ELEM('calibre:publication_type', self.publication_type))
-
- manifest = E.manifest()
- if self.manifest is not None:
- for ref in self.manifest:
- item = E.item(id = str(ref.id), href = ref.href())
- item.set('media-type', ref.mime_type)
- manifest.append(item)
-
-
- spine = E.spine()
- if self.toc is not None:
- spine.set('toc', 'ncx')
-
- if self.spine is not None:
- for ref in self.spine:
- spine.append(E.itemref(idref = ref.id))
-
-
- guide = E.guide()
- if self.guide is not None:
- for ref in self.guide:
- item = E.reference(type = ref.type, href = ref.href())
- if ref.title:
- item.set('title', ref.title)
-
- guide.append(item)
-
-
- root = E.package(metadata, manifest, spine, guide)
- root.set('unique-identifier', __appname__ + '_id')
- raw = etree.tostring(root, pretty_print = True, xml_declaration = True, encoding = encoding)
- raw = raw.replace(DNS, OPF2_NS)
- opf_stream.write(raw)
- opf_stream.flush()
- if toc is not None and ncx_stream is not None:
- toc.render(ncx_stream, self.application_id)
- ncx_stream.flush()
-
-
-
-
- def metadata_to_opf(mi, as_string = True):
- etree = etree
- import lxml
- import textwrap
- OPF = OPF
- DC = DC
- import calibre.ebooks.oeb.base
- if not mi.application_id:
- mi.application_id = str(uuid.uuid4())
-
- if not mi.uuid:
- mi.uuid = str(uuid.uuid4())
-
- if not mi.book_producer:
- mi.book_producer = __appname__ + ' (%s) ' % __version__ + '[http://calibre-ebook.com]'
-
- if not mi.language:
- mi.language = 'UND'
-
- root = etree.fromstring(textwrap.dedent('\n <package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id">\n <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n <dc:identifier opf:scheme="%(a)s" id="%(a)s_id">%(id)s</dc:identifier>\n <dc:identifier opf:scheme="uuid" id="uuid_id">%(uuid)s</dc:identifier>\n </metadata>\n <guide/>\n </package>\n ' % dict(a = __appname__, id = mi.application_id, uuid = mi.uuid)))
- metadata = root[0]
- guide = root[1]
- metadata[0].tail = '\n' + ' '
-
- def factory(tag, text = None, sort = None, role = None, scheme = None, name = None, content = (None, None)):
- attrib = { }
- if sort:
- attrib[OPF('file-as')] = sort
-
- if role:
- attrib[OPF('role')] = role
-
- if scheme:
- attrib[OPF('scheme')] = scheme
-
- if name:
- attrib['name'] = name
-
- if content:
- attrib['content'] = content
-
- elem = metadata.makeelement(tag, attrib = attrib)
- elem.tail = '\n' + ' '
- if text:
- elem.text = text.strip()
-
- metadata.append(elem)
-
- factory(DC('title'), mi.title)
- for au in mi.authors:
- factory(DC('creator'), au, mi.author_sort, 'aut')
-
- factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
- if hasattr(mi.pubdate, 'isoformat'):
- factory(DC('date'), isoformat(mi.pubdate))
-
- if mi.category:
- factory(DC('type'), mi.category)
-
- if mi.comments:
- factory(DC('description'), mi.comments)
-
- if mi.publisher:
- factory(DC('publisher'), mi.publisher)
-
- if mi.isbn:
- factory(DC('identifier'), mi.isbn, scheme = 'ISBN')
-
- if mi.rights:
- factory(DC('rights'), mi.rights)
-
- None(factory, DC('language') if mi.language and mi.language.lower() != 'und' else get_lang().replace('_', '-'))
- if mi.tags:
- for tag in mi.tags:
- factory(DC('subject'), tag)
-
-
-
- meta = lambda n, c: factory('meta', name = 'calibre:' + n, content = c)
- if mi.series:
- meta('series', mi.series)
-
- if mi.series_index is not None:
- meta('series_index', mi.format_series_index())
-
- if mi.rating is not None:
- meta('rating', str(mi.rating))
-
- if hasattr(mi.timestamp, 'isoformat'):
- meta('timestamp', isoformat(mi.timestamp))
-
- if mi.publication_type:
- meta('publication_type', mi.publication_type)
-
- if mi.title_sort:
- meta('title_sort', mi.title_sort)
-
- metadata[-1].tail = '\n' + ' '
- if mi.cover:
- if not isinstance(mi.cover, unicode):
- mi.cover = mi.cover.decode(filesystem_encoding)
-
- guide.text = '\n' + ' '
- r = guide.makeelement(OPF('reference'), attrib = {
- 'type': 'cover',
- 'title': _('Cover'),
- 'href': mi.cover })
- r.tail = '\n' + ' '
- guide.append(r)
-
- if as_string:
- return etree.tostring(root, pretty_print = True, encoding = 'utf-8', xml_declaration = True)
- return root
-
-
- def test_m2o():
- nowf = now
- import calibre.utils.date
- StringIO = StringIO
- import cStringIO
- mi = MetaInformation('test & title', [
- 'a"1',
- "a'2"])
- mi.title_sort = 'a\'"b'
- mi.author_sort = 'author sort'
- mi.pubdate = nowf()
- mi.language = 'en'
- mi.category = 'test'
- mi.comments = 'what a fun book\n\n'
- mi.publisher = 'publisher'
- mi.isbn = 'boooo'
- mi.tags = [
- 'a',
- 'b']
- mi.series = 's"c\'l&<>'
- mi.series_index = 3.34
- mi.rating = 3
- mi.timestamp = nowf()
- mi.publication_type = 'ooooo'
- mi.rights = 'yes'
- mi.cover = 'asd.jpg'
- opf = metadata_to_opf(mi)
- print opf
- newmi = MetaInformation(OPF(StringIO(opf)))
- for attr in ('author_sort', 'title_sort', 'comments', 'category', 'publisher', 'series', 'series_index', 'rating', 'isbn', 'tags', 'cover_data', 'application_id', 'language', 'cover', 'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights', 'publication_type'):
- o = getattr(mi, attr)
- n = getattr(newmi, attr)
- if o != n and o.strip() != n.strip():
- print 'FAILED:', attr, getattr(mi, attr), '!=', getattr(newmi, attr)
- continue
-
-
-
- class OPFTest(unittest.TestCase):
-
- def setUp(self):
- self.stream = cStringIO.StringIO('<?xml version="1.0" encoding="UTF-8"?>\n<package version="2.0" xmlns="http://www.idpf.org/2007/opf" >\n<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n <dc:title opf:file-as="Wow">A Cool & © ß Title</dc:title>\n <creator opf:role="aut" file-as="Monkey">Monkey Kitchen</creator>\n <creator opf:role="aut">Next</creator>\n <dc:subject>One</dc:subject><dc:subject>Two</dc:subject>\n <dc:identifier scheme="ISBN">123456789</dc:identifier>\n <meta name="calibre:series" content="A one book series" />\n <meta name="calibre:rating" content="4"/>\n <meta name="calibre:publication_type" content="test"/>\n <meta name="calibre:series_index" content="2.5" />\n</metadata>\n<manifest>\n <item id="1" href="a%20%7E%20b" media-type="text/txt" />\n</manifest>\n</package>\n')
- self.opf = OPF(self.stream, os.getcwd())
-
-
- def testReading(self, opf = None):
- if opf is None:
- opf = self.opf
-
- self.assertEqual(opf.title, u'A Cool & © ß Title')
- self.assertEqual(opf.authors, u'Monkey Kitchen,Next'.split(','))
- self.assertEqual(opf.author_sort, 'Monkey')
- self.assertEqual(opf.title_sort, 'Wow')
- self.assertEqual(opf.tags, [
- 'One',
- 'Two'])
- self.assertEqual(opf.isbn, '123456789')
- self.assertEqual(opf.series, 'A one book series')
- self.assertEqual(opf.series_index, 2.5)
- self.assertEqual(opf.rating, 4)
- self.assertEqual(opf.publication_type, 'test')
- self.assertEqual(list(opf.itermanifest())[0].get('href'), 'a ~ b')
-
-
- def testWriting(self):
- for test in [
- ('title', 'New & Title'),
- ('authors', [
- 'One',
- 'Two']),
- ('author_sort', 'Kitchen'),
- ('tags', [
- 'Three']),
- ('isbn', 'a'),
- ('rating', 3),
- ('series_index', 1),
- ('title_sort', 'ts')]:
- setattr(self.opf, *test)
- (attr, val) = test
- self.assertEqual(getattr(self.opf, attr), val)
-
- self.opf.render()
-
-
- def testCreator(self):
- opf = OPFCreator(os.getcwd(), self.opf)
- buf = cStringIO.StringIO()
- opf.render(buf)
- raw = buf.getvalue()
- self.testReading(opf = OPF(cStringIO.StringIO(raw), os.getcwd()))
-
-
- def testSmartUpdate(self):
- self.opf.smart_update(MetaInformation(self.opf))
- self.testReading()
-
-
-
- def suite():
- return unittest.TestLoader().loadTestsFromTestCase(OPFTest)
-
-
- def test():
- unittest.TextTestRunner(verbosity = 2).run(suite())
-
- if __name__ == '__main__':
- test()
-
-