home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_890 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  9.9 KB  |  292 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. __license__ = 'GPL 3'
  5. __copyright__ = '2009, John Schember <john@nachtimwald.com>'
  6. __docformat__ = 'restructuredtext en'
  7. import cStringIO
  8. from base64 import b64encode
  9. import re
  10.  
  11. try:
  12.     from PIL import Image
  13.     Image
  14. except ImportError:
  15.     import Image
  16.  
  17. from lxml import etree
  18. from calibre import prepare_string_for_xml
  19. from calibre.constants import __appname__, __version__
  20. from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
  21. from calibre.ebooks.oeb.stylizer import Stylizer
  22. from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
  23. TAG_MAP = {
  24.     'b': 'strong',
  25.     'i': 'emphasis',
  26.     'p': 'p',
  27.     'li': 'p',
  28.     'div': 'p',
  29.     'br': 'p' }
  30. TAG_SPACE = []
  31. TAG_IMAGES = [
  32.     'img']
  33. TAG_LINKS = [
  34.     'a']
  35. BLOCK = [
  36.     'p']
  37. STYLES = [
  38.     ('font-weight', {
  39.         'bold': 'strong',
  40.         'bolder': 'strong' }),
  41.     ('font-style', {
  42.         'italic': 'emphasis' })]
  43.  
  44. class FB2MLizer(object):
  45.     
  46.     def __init__(self, log):
  47.         self.log = log
  48.         self.image_hrefs = { }
  49.         self.link_hrefs = { }
  50.  
  51.     
  52.     def extract_content(self, oeb_book, opts):
  53.         self.log.info('Converting XHTML to FB2 markup...')
  54.         self.oeb_book = oeb_book
  55.         self.opts = opts
  56.         return self.fb2mlize_spine()
  57.  
  58.     
  59.     def fb2mlize_spine(self):
  60.         self.image_hrefs = { }
  61.         self.link_hrefs = { }
  62.         output = [
  63.             self.fb2_header()]
  64.         output.append(self.get_cover_page())
  65.         output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
  66.         output.append(self.get_text())
  67.         output.append(self.fb2_body_footer())
  68.         output.append(self.fb2mlize_images())
  69.         output.append(self.fb2_footer())
  70.         output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
  71.         output = self.clean_text(output)
  72.         return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding = unicode, pretty_print = True)
  73.  
  74.     
  75.     def clean_text(self, text):
  76.         text = re.sub('<p>[ ]*</p>', '', text)
  77.         return text
  78.  
  79.     
  80.     def fb2_header(self):
  81.         author_first = u''
  82.         author_middle = u''
  83.         author_last = u''
  84.         author_parts = self.oeb_book.metadata.creator[0].value.split(' ')
  85.         if len(author_parts) == 1:
  86.             author_last = author_parts[0]
  87.         elif len(author_parts) == 2:
  88.             author_first = author_parts[0]
  89.             author_last = author_parts[1]
  90.         else:
  91.             author_first = author_parts[0]
  92.             author_middle = ' '.join(author_parts[1:-2])
  93.             author_last = author_parts[-1]
  94.         return u'<FictionBook xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.gribuser.ru/xml/fictionbook/2.0">\n<description>\n<title-info>\n <author>\n<first-name>%s</first-name>\n<middle-name>%s</middle-name>\n<last-name>%s</last-name>\n</author>\n<book-title>%s</book-title> </title-info><document-info> <program-used>%s - %s</program-used></document-info>\n</description>\n<body>\n<section>' % tuple(map(prepare_string_for_xml, (author_first, author_middle, author_last, self.oeb_book.metadata.title[0].value, __appname__, __version__)))
  95.  
  96.     
  97.     def get_cover_page(self):
  98.         output = u''
  99.         if 'cover' in self.oeb_book.guide:
  100.             output += '<image xlink:href="#cover.jpg" />'
  101.             self.image_hrefs[self.oeb_book.guide['cover'].href] = 'cover.jpg'
  102.         
  103.         if 'titlepage' in self.oeb_book.guide:
  104.             self.log.debug('Generating cover page...')
  105.             href = self.oeb_book.guide['titlepage'].href
  106.             item = self.oeb_book.manifest.hrefs[href]
  107.             if item.spine_position is None:
  108.                 stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
  109.                 output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
  110.             
  111.         
  112.         return output
  113.  
  114.     
  115.     def get_toc(self):
  116.         toc = []
  117.         if self.opts.inline_toc:
  118.             self.log.debug('Generating table of contents...')
  119.             toc.append(u'<p>%s</p>' % _('Table of Contents:'))
  120.             for item in self.oeb_book.toc:
  121.                 if item.href in self.link_hrefs.keys():
  122.                     toc.append('<p><a xlink:href="#%s">%s</a></p>\n' % (self.link_hrefs[item.href], item.title))
  123.                     continue
  124.                 self.oeb.warn('Ignoring toc item: %s not found in document.' % item)
  125.             
  126.         
  127.         return ''.join(toc)
  128.  
  129.     
  130.     def get_text(self):
  131.         text = []
  132.         for item in self.oeb_book.spine:
  133.             self.log.debug('Converting %s to FictionBook2 XML' % item.href)
  134.             stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
  135.             text.append(self.add_page_anchor(item))
  136.             text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
  137.         
  138.         return ''.join(text)
  139.  
  140.     
  141.     def fb2_body_footer(self):
  142.         return u'\n</section>\n</body>'
  143.  
  144.     
  145.     def fb2_footer(self):
  146.         return u'</FictionBook>'
  147.  
  148.     
  149.     def add_page_anchor(self, page):
  150.         return self.get_anchor(page, '')
  151.  
  152.     
  153.     def get_anchor(self, page, aid):
  154.         aid = prepare_string_for_xml(aid)
  155.         aid = '%s#%s' % (page.href, aid)
  156.         if aid not in self.link_hrefs.keys():
  157.             self.link_hrefs[aid] = 'calibre_link-%s' % len(self.link_hrefs.keys())
  158.         
  159.         aid = self.link_hrefs[aid]
  160.         return '<a id="%s" />' % aid
  161.  
  162.     
  163.     def fb2mlize_images(self):
  164.         images = []
  165.         for item in self.oeb_book.manifest:
  166.             if item.media_type in OEB_RASTER_IMAGES:
  167.                 
  168.                 try:
  169.                     im = Image.open(cStringIO.StringIO(item.data)).convert('RGB')
  170.                     data = cStringIO.StringIO()
  171.                     im.save(data, 'JPEG')
  172.                     data = data.getvalue()
  173.                     raw_data = b64encode(data)
  174.                     data = ''
  175.                     col = 1
  176.                     for char in raw_data:
  177.                         if col == 72:
  178.                             data += '\n'
  179.                             col = 1
  180.                         
  181.                         col += 1
  182.                         data += char
  183.                     
  184.                     images.append('<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs.get(item.href, '0000.JPEG'), item.media_type, data))
  185.                 except Exception:
  186.                     e = None
  187.                     self.log.error('Error: Could not include file %s becuase %s.' % (item.href, e))
  188.                 except:
  189.                     None<EXCEPTION MATCH>Exception
  190.                 
  191.  
  192.             None<EXCEPTION MATCH>Exception
  193.         
  194.         return ''.join(images)
  195.  
  196.     
  197.     def dump_text(self, elem, stylizer, page, tag_stack = []):
  198.         if not isinstance(elem.tag, basestring) or namespace(elem.tag) != XHTML_NS:
  199.             return []
  200.         style = stylizer.style(elem)
  201.         if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or style['visibility'] == 'hidden':
  202.             return []
  203.         fb2_text = []
  204.         tags = []
  205.         tag = barename(elem.tag)
  206.         if tag in TAG_LINKS:
  207.             href = elem.get('href')
  208.             if href:
  209.                 href = prepare_string_for_xml(page.abshref(href))
  210.                 href = href.replace('"', '"')
  211.                 if '://' in href:
  212.                     fb2_text.append('<a xlink:href="%s">' % href)
  213.                 elif href.startswith('#'):
  214.                     href = href[1:]
  215.                 
  216.                 if href not in self.link_hrefs.keys():
  217.                     self.link_hrefs[href] = 'calibre_link-%s' % len(self.link_hrefs.keys())
  218.                 
  219.                 href = self.link_hrefs[href]
  220.                 fb2_text.append('<a xlink:href="#%s">' % href)
  221.                 tags.append('a')
  222.             
  223.         
  224.         id_name = elem.get('id')
  225.         if id_name:
  226.             fb2_text.append(self.get_anchor(page, id_name))
  227.         
  228.         fb2_tag = TAG_MAP.get(tag, None)
  229.         if fb2_tag == 'p':
  230.             if 'p' in tag_stack + tags:
  231.                 all_tags = tag_stack + tags
  232.                 closed_tags = []
  233.                 all_tags.reverse()
  234.                 for t in all_tags:
  235.                     fb2_text.append('</%s>' % t)
  236.                     closed_tags.append(t)
  237.                     if t == 'p':
  238.                         break
  239.                         continue
  240.                 
  241.                 closed_tags.reverse()
  242.                 for t in closed_tags:
  243.                     fb2_text.append('<%s>' % t)
  244.                 
  245.             else:
  246.                 fb2_text.append('<p>')
  247.                 tags.append('p')
  248.         elif fb2_tag and fb2_tag not in tag_stack + tags:
  249.             fb2_text.append('<%s>' % fb2_tag)
  250.             tags.append(fb2_tag)
  251.         
  252.         for s in STYLES:
  253.             style_tag = s[1].get(style[s[0]], None)
  254.             if style_tag and style_tag not in tag_stack + tags:
  255.                 fb2_text.append('<%s>' % style_tag)
  256.                 tags.append(style_tag)
  257.                 continue
  258.         
  259.         if tag in TAG_SPACE:
  260.             if not fb2_text and fb2_text[-1] != ' ' or not fb2_text[-1].endswith(' '):
  261.                 fb2_text.append(' ')
  262.             
  263.         
  264.         if hasattr(elem, 'text') and elem.text:
  265.             if 'p' not in tag_stack + tags:
  266.                 fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.text))
  267.             else:
  268.                 fb2_text.append(prepare_string_for_xml(elem.text))
  269.         
  270.         for item in elem:
  271.             fb2_text += self.dump_text(item, stylizer, page, tag_stack + tags)
  272.         
  273.         tags.reverse()
  274.         fb2_text += self.close_tags(tags)
  275.         if hasattr(elem, 'tail') and elem.tail:
  276.             if 'p' not in tag_stack:
  277.                 fb2_text.append('<p>%s</p>' % prepare_string_for_xml(elem.tail))
  278.             else:
  279.                 fb2_text.append(prepare_string_for_xml(elem.tail))
  280.         
  281.         return fb2_text
  282.  
  283.     
  284.     def close_tags(self, tags):
  285.         text = []
  286.         for tag in tags:
  287.             text.append('</%s>' % tag)
  288.         
  289.         return text
  290.  
  291.  
  292.