home *** CD-ROM | disk | FTP | other *** search
Wrap
# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import os import shutil import re from calibre.customize.conversion import OutputFormatPlugin from calibre.ptempfile import TemporaryDirectory from calibre import CurrentDir from calibre.customize.conversion import OptionRecommendation from calibre.constants import filesystem_encoding from lxml import etree block_level_tags = ('address', 'body', 'blockquote', 'center', 'dir', 'div', 'dl', 'fieldset', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'isindex', 'menu', 'noframes', 'noscript', 'ol', 'p', 'pre', 'table', 'ul') class EPUBOutput(OutputFormatPlugin): name = 'EPUB Output' author = 'Kovid Goyal' file_type = 'epub' options = set([ OptionRecommendation(name = 'extract_to', help = _('Extract the contents of the generated EPUB file to the specified directory. The contents of the directory are first deleted, so be careful.')), OptionRecommendation(name = 'dont_split_on_page_breaks', recommended_value = False, level = OptionRecommendation.LOW, help = _('Turn off splitting at page breaks. Normally, input files are automatically split at every page break into two files. This gives an output ebook that can be parsed faster and with less resources. However, splitting is slow and if your source file contains a very large number of page breaks, you should turn off splitting on page breaks.')), OptionRecommendation(name = 'flow_size', recommended_value = 260, help = _('Split all HTML files larger than this size (in KB). This is necessary as most EPUB readers cannot handle large file sizes. The default of %defaultKB is the size required for Adobe Digital Editions.')), OptionRecommendation(name = 'no_default_epub_cover', recommended_value = False, help = _("Normally, if the input file has no cover and you don't specify one, a default cover is generated with the title, authors, etc. This option disables the generation of this cover.")), OptionRecommendation(name = 'no_svg_cover', recommended_value = False, help = _('Do not use SVG for the book cover. Use this option if your EPUB is going to be used on a device that does not support SVG, like the iPhone or the JetBook Lite. Without this option, such devices will display the cover as a blank page.')), OptionRecommendation(name = 'preserve_cover_aspect_ratio', recommended_value = False, help = _('When using an SVG cover, this option will cause the cover to scale to cover the available screen area, but still preserve its aspect ratio (ratio of width to height). That means there may be white borders at the sides or top and bottom of the image, but the image will never be distorted. Without this option the image may be slightly distorted, but there will be no borders.'))]) recommendations = set([ ('pretty_print', True, OptionRecommendation.HIGH)]) def workaround_webkit_quirks(self): XPath = XPath import calibre.ebooks.oeb.base for x in self.oeb.spine: root = x.data body = XPath('//h:body')(root) if body: body = body[0] if not hasattr(body, 'xpath'): continue for pre in XPath('//h:pre')(body): if not (pre.text) and len(pre) == 0: pre.tag = 'div' continue def upshift_markup(self): XPath = XPath import calibre.ebooks.oeb.base for x in self.oeb.spine: root = x.data body = XPath('//h:body')(root) if body: body = body[0] if not hasattr(body, 'xpath'): continue for u in XPath('//h:u')(root): u.tag = 'span' u.set('style', 'text-decoration:underline') def convert(self, oeb, output_path, input_plugin, opts, log): self.log = log self.opts = opts self.oeb = oeb self.workaround_ade_quirks() self.workaround_webkit_quirks() self.upshift_markup() RescaleImages = RescaleImages import calibre.ebooks.oeb.transforms.rescale RescaleImages()(oeb, opts) Split = Split import calibre.ebooks.oeb.transforms.split split = Split(not (self.opts.dont_split_on_page_breaks), max_flow_size = self.opts.flow_size * 1024) split(self.oeb, self.opts) CoverManager = CoverManager import calibre.ebooks.oeb.transforms.cover cm = CoverManager(no_default_cover = self.opts.no_default_epub_cover, no_svg_cover = self.opts.no_svg_cover, preserve_aspect_ratio = self.opts.preserve_cover_aspect_ratio) cm(self.oeb, self.opts, self.log) self.workaround_sony_quirks() if self.oeb.toc.count() == 0: self.log.warn('This EPUB file has no Table of Contents. Creating a default TOC') first = iter(self.oeb.spine).next() self.oeb.toc.add(_('Start'), first.href) OPF = OPF import calibre.ebooks.oeb.base identifiers = oeb.metadata['identifier'] uuid = None for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'): uuid = unicode(x).split(':')[-1] break continue if uuid is None: self.log.warn('No UUID identifier found') uuid4 = uuid4 import uuid uuid = str(uuid4()) oeb.metadata.add('identifier', uuid, scheme = 'uuid', id = uuid) try: tdir = _[1] plugin_for_output_format = plugin_for_output_format import calibre.customize.ui oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) opf = _[2][0] [](_[3][0]) encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', []) encryption = None initialize_container = initialize_container import calibre.ebooks.epub epub = initialize_container(output_path, os.path.basename(opf)) epub.add_dir(tdir) if opts.extract_to is not None: os.mkdir(opts.extract_to) epub.extractall(path = opts.extract_to) self.log.info('EPUB extracted to', opts.extract_to) epub.close() finally: pass def encrypt_fonts(self, uris, tdir, uuid): unhexlify = unhexlify import binascii key = re.sub('[^a-fA-F0-9]', '', uuid) if len(key) < 16: raise ValueError('UUID identifier %r is invalid' % uuid) len(key) < 16 key = unhexlify(key + key[:32]) key = tuple(map(ord, key)) paths = [] CurrentDir(tdir).__enter__() try: paths = [ os.path.join(*x.split('/')) for x in uris ] uris = dict(zip(uris, paths)) fonts = [] for uri in list(uris.keys()): path = uris[uri] self.log.debug('Encrypting font:', uri) try: f = _[2] data = f.read(1024) f.seek(0) for i in range(1024): f.write(chr(ord(data[i]) ^ key[i % 16])) finally: pass fonts.append(u'\n <enc:EncryptedData>\n <enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/>\n <enc:CipherData>\n <enc:CipherReference URI="%s"/>\n </enc:CipherData>\n </enc:EncryptedData>\n ' % uri.replace('"', '\\"')) if fonts: ans = '<encryption\n xmlns="urn:oasis:names:tc:opendocument:xmlns:container"\n xmlns:enc="http://www.w3.org/2001/04/xmlenc#"\n xmlns:deenc="http://ns.adobe.com/digitaleditions/enc">\n ' ans += u'\n'.join(fonts).encode('utf-8') ans += '\n</encryption>' return ans finally: pass def condense_ncx(self, ncx_path): if not self.opts.pretty_print: tree = etree.parse(ncx_path) for tag in tree.getroot().iter(tag = etree.Element): if tag.text: tag.text = tag.text.strip() if tag.tail: tag.tail = tag.tail.strip() continue compressed = etree.tostring(tree.getroot(), encoding = 'utf-8') open(ncx_path, 'wb').write(compressed) def workaround_ade_quirks(self): XPath = XPath XHTML = XHTML OEB_STYLES = OEB_STYLES barename = barename urlunquote = urlunquote import calibre.ebooks.oeb.base stylesheet = None for item in self.oeb.manifest: if item.media_type.lower() in OEB_STYLES: stylesheet = item break continue frag_pat = re.compile('[-A-Za-z0-9_:.]+$') for node in self.oeb.toc.iter(): href = getattr(node, 'href', None) if hasattr(href, 'partition'): (base, _, frag) = href.partition('#') frag = urlunquote(frag) if frag and frag_pat.match(frag) is None: self.log.warn('Removing invalid fragment identifier %r from TOC' % frag) node.href = base frag_pat.match(frag) is None for x in self.oeb.spine: root = x.data body = XPath('//h:body')(root) if body: body = body[0] if hasattr(body, 'xpath'): bad = [] for x in XPath('//h:img')(body): src = x.get('src', '').strip() if src in ('', '#') or src.startswith('http:'): bad.append(x) continue for img in bad: img.getparent().remove(img) for x in XPath('//h:a[@name]')(body): if not x.get('id', False): x.set('id', x.get('name')) continue for br in XPath('./h:br')(body): if br.getparent() is None: continue try: prior = br.itersiblings(preceding = True).next() priortag = barename(prior.tag) priortext = prior.tail except: priortag = 'body' priortext = body.text if priortext: priortext = priortext.strip() br.tag = XHTML('p') br.text = u' ' style = br.get('style', '').split(';') style = filter(None, map((lambda x: x.strip()), style)) style.append('margin:0pt; border:0pt') if not priortext and priortag in block_level_tags: style.append('height:1em') else: style.append('height:0pt') br.set('style', '; '.join(style)) for tag in XPath('//h:embed')(root): tag.getparent().remove(tag) for tag in XPath('//h:object')(root): if tag.get('type', '').lower().strip() in ('image/svg+xml',): continue tag.getparent().remove(tag) for tag in XPath('//h:title|//h:style')(root): if not tag.text: tag.getparent().remove(tag) continue for tag in XPath('//h:script')(root): if not (tag.text) and not tag.get('src', False): tag.getparent().remove(tag) continue for tag in XPath('//h:body/descendant::h:script')(root): tag.getparent().remove(tag) for tag in XPath('//h:form')(root): tag.getparent().remove(tag) for tag in XPath('//h:center')(root): tag.tag = XHTML('div') tag.set('style', 'text-align:center') for tag in XPath('//h:img[@src]')(root): tag.set('src', tag.get('src', '').replace('&', '')) special_chars = re.compile(u'[​­]') for elem in root.iterdescendants(): if getattr(elem, 'text', False): elem.text = special_chars.sub('', elem.text) elem.text = elem.text.replace(u'‑', '-') if getattr(elem, 'tail', False): elem.tail = special_chars.sub('', elem.tail) elem.tail = elem.tail.replace(u'‑', '-') continue if stylesheet is not None: CSSRule = CSSRule import cssutils.css for lb in XPath('//h:ul[@class]|//h:ol[@class]')(root): sel = '.' + lb.get('class') for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE): if sel == rule.selectorList.selectorText: rule.style.removeProperty('margin-left') rule.style.removeProperty('padding-left') continue def workaround_sony_quirks(self): urldefrag = urldefrag XPath = XPath import calibre.ebooks.oeb.base def frag_is_at_top(root, frag): body = XPath('//h:body')(root) if body: body = body[0] else: return False tree = body.getroottree() elem = XPath('//*[@id="%s" or @name="%s"]' % (frag, frag))(root) if elem: elem = elem[0] else: return False path = elem.getpath(elem) for el in body.iterdescendants(): epath = tree.getpath(el) if epath == path: break if el.text and el.text.strip(): return False if not path.startswith(epath): if el.tail and el.tail.strip(): return False continue el.tail.strip() return True def simplify_toc_entry(toc): if toc.href: (href, frag) = urldefrag(toc.href) if frag: for x in self.oeb.spine: if x.href == href: if frag_is_at_top(x.data, frag): self.log.debug('Removing anchor from TOC href:', href + '#' + frag) toc.href = href break continue for x in toc: simplify_toc_entry(x) if self.oeb.toc: simplify_toc_entry(self.oeb.toc)