home *** CD-ROM | disk | FTP | other *** search
Wrap
# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>' import os import re from base64 import b64decode from lxml import etree from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre import guess_type FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0' class FB2Input(InputFormatPlugin): name = 'FB2 Input' author = 'Anatoly Shipitsin' description = 'Convert FB2 files to HTML' file_types = set([ 'fb2']) recommendations = set([ ('level1_toc', '//h:h1', OptionRecommendation.MED), ('level2_toc', '//h:h2', OptionRecommendation.MED), ('level3_toc', '//h:h3', OptionRecommendation.MED)]) options = set([ OptionRecommendation(name = 'no_inline_fb2_toc', recommended_value = False, level = OptionRecommendation.LOW, help = _('Do not insert a Table of Contents at the beginning of the book.'))]) def convert(self, stream, options, file_ext, log, accelerators): OPFCreator = OPFCreator import calibre.ebooks.metadata.opf2 get_metadata = get_metadata import calibre.ebooks.metadata.meta XLINK_NS = XLINK_NS XHTML_NS = XHTML_NS RECOVER_PARSER = RECOVER_PARSER import calibre.ebooks.oeb.base NAMESPACES = { 'f': FB2NS, 'l': XLINK_NS } log.debug('Parsing XML...') raw = stream.read().replace('\x00', '') try: doc = etree.fromstring(raw) except etree.XMLSyntaxError: try: doc = etree.fromstring(raw, parser = RECOVER_PARSER) doc = etree.fromstring(raw.replace('& ', '&'), parser = RECOVER_PARSER) stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]') css = '' for s in stylesheets: css += etree.tostring(s, encoding = unicode, method = 'text', with_tail = False) + '\n\n' if css: import cssutils import logging parser = cssutils.CSSParser(fetcher = None, log = logging.getLogger('calibre.css')) XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS text = XHTML_CSS_NAMESPACE + css log.debug('Parsing stylesheet...') stylesheet = parser.parseString(text) stylesheet.namespaces['h'] = XHTML_NS css = unicode(stylesheet.cssText).replace('h|style', 'h|span') css = re.sub('name\\s*=\\s*', 'class=', css) self.extract_embedded_content(doc) log.debug('Converting XML to HTML...') ss = open(P('templates/fb2.xsl'), 'rb').read() if options.no_inline_fb2_toc: log('Disabling generation of inline FB2 TOC') ss = re.compile('<!-- BUILD TOC -->.*<!-- END BUILD TOC -->', re.DOTALL).sub('', ss) styledoc = etree.fromstring(ss) transform = etree.XSLT(styledoc) result = transform(doc) for img in result.xpath('//img[@src]'): src = img.get('src') img.set('src', self.binary_map.get(src, src)) index = transform.tostring(result) open('index.xhtml', 'wb').write(index) open('inline-styles.css', 'wb').write(css) stream.seek(0) mi = get_metadata(stream, 'fb2') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [ _('Unknown')] opf = OPFCreator(os.getcwdu(), mi) entries = [ (f, guess_type(f)[0]) for f in os.listdir('.') ] opf.create_manifest(entries) opf.create_spine([ 'index.xhtml']) for img in doc.xpath('//f:coverpage/f:image', namespaces = NAMESPACES): href = img.get('{%s}href' % XLINK_NS, img.get('href', None)) if href is not None: opf.guide.set_cover(os.path.abspath(href)) continue None if href.startswith('#') else [] opf.render(open('metadata.opf', 'wb')) return os.path.join(os.getcwd(), 'metadata.opf') def extract_embedded_content(self, doc): self.binary_map = { } for elem in doc.xpath('./*'): if 'binary' in elem.tag and elem.attrib.has_key('id'): ct = elem.get('content-type', '') fname = elem.attrib['id'] ext = ct.rpartition('/')[-1].lower() if ext in ('png', 'jpeg', 'jpg'): fname += '.' + ext self.binary_map[elem.get('id')] = fname data = b64decode(elem.text.strip()) open(fname, 'wb').write(data) continue