home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_861 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  7.5 KB  |  222 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. __license__ = 'GPL v3'
  5. __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>, and Alex Bramley <a.bramley at gmail.com>.'
  6. import os
  7. import uuid
  8. from lxml import html
  9. from calibre.customize.conversion import InputFormatPlugin
  10. from calibre.ptempfile import TemporaryDirectory
  11. from calibre.utils.localization import get_lang
  12. from calibre.utils.filenames import ascii_filename
  13.  
  14. class CHMInput(InputFormatPlugin):
  15.     name = 'CHM Input'
  16.     author = 'Kovid Goyal and Alex Bramley'
  17.     description = 'Convert CHM files to OEB'
  18.     file_types = set([
  19.         'chm'])
  20.     
  21.     def _chmtohtml(self, output_dir, chm_path, no_images, log):
  22.         CHMReader = CHMReader
  23.         import calibre.ebooks.chm.reader
  24.         log.debug('Opening CHM file')
  25.         rdr = CHMReader(chm_path, log)
  26.         log.debug('Extracting CHM to %s' % output_dir)
  27.         rdr.extract_content(output_dir)
  28.         self._chm_reader = rdr
  29.         return rdr.hhc_path
  30.  
  31.     
  32.     def convert(self, stream, options, file_ext, log, accelerators):
  33.         get_metadata_from_reader = get_metadata_from_reader
  34.         import calibre.ebooks.chm.metadata
  35.         plugin_for_input_format = plugin_for_input_format
  36.         import calibre.customize.ui
  37.         log.debug('Processing CHM...')
  38.         
  39.         try:
  40.             tdir = _[1]
  41.             html_input = plugin_for_input_format('html')
  42.             for opt in html_input.options:
  43.                 setattr(options, opt.option.name, opt.recommended_value)
  44.             
  45.             options.input_encoding = 'utf-8'
  46.             no_images = False
  47.             chm_name = stream.name
  48.             stream.close()
  49.             log.debug('tdir=%s' % tdir)
  50.             log.debug('stream.name=%s' % stream.name)
  51.             mainname = self._chmtohtml(tdir, chm_name, no_images, log)
  52.             mainpath = os.path.join(tdir, mainname)
  53.             metadata = get_metadata_from_reader(self._chm_reader)
  54.             odi = options.debug_pipeline
  55.             options.debug_pipeline = None
  56.             htmlpath = self._create_html_root(mainpath, log)
  57.             oeb = self._create_oebbook_html(htmlpath, tdir, options, log, metadata)
  58.             options.debug_pipeline = odi
  59.         finally:
  60.             pass
  61.  
  62.         return oeb
  63.  
  64.     
  65.     def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
  66.         HTMLInput = HTMLInput
  67.         import calibre.ebooks.html.input
  68.         opts.breadth_first = True
  69.         htmlinput = HTMLInput(None)
  70.         oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
  71.         return oeb
  72.  
  73.     
  74.     def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
  75.         create_oebbook = create_oebbook
  76.         import calibre.ebooks.conversion.plumber
  77.         DirContainer = DirContainer
  78.         import calibre.ebooks.oeb.base
  79.         oeb = create_oebbook(log, None, opts, self, encoding = opts.input_encoding, populate = False)
  80.         self.oeb = oeb
  81.         metadata = oeb.metadata
  82.         if mi.title:
  83.             metadata.add('title', mi.title)
  84.         
  85.         if mi.authors:
  86.             for a in mi.authors:
  87.                 metadata.add('creator', a, attrib = {
  88.                     'role': 'aut' })
  89.             
  90.         
  91.         if mi.publisher:
  92.             metadata.add('publisher', mi.publisher)
  93.         
  94.         if mi.isbn:
  95.             metadata.add('identifier', mi.isbn, attrib = {
  96.                 'scheme': 'ISBN' })
  97.         
  98.         if not metadata.language:
  99.             oeb.logger.warn(u'Language not specified')
  100.             metadata.add('language', get_lang().replace('_', '-'))
  101.         
  102.         if not metadata.creator:
  103.             oeb.logger.warn('Creator not specified')
  104.             metadata.add('creator', _('Unknown'))
  105.         
  106.         if not metadata.title:
  107.             oeb.logger.warn('Title not specified')
  108.             metadata.add('title', _('Unknown'))
  109.         
  110.         bookid = str(uuid.uuid4())
  111.         metadata.add('identifier', bookid, id = 'uuid_id', scheme = 'uuid')
  112.         for ident in metadata.identifier:
  113.             if 'id' in ident.attrib:
  114.                 self.oeb.uid = metadata.identifier[0]
  115.                 break
  116.                 continue
  117.         
  118.         hhcdata = self._read_file(hhcpath)
  119.         hhcroot = html.fromstring(hhcdata)
  120.         chapters = self._process_nodes(hhcroot)
  121.         log.debug('Found %d section nodes' % len(chapters))
  122.         if len(chapters) > 0:
  123.             path0 = chapters[0][1]
  124.             subpath = os.path.dirname(path0)
  125.             htmlpath = os.path.join(basedir, subpath)
  126.             oeb.container = DirContainer(htmlpath, log)
  127.             for chapter in chapters:
  128.                 title = chapter[0]
  129.                 basename = os.path.basename(chapter[1])
  130.                 self._add_item(oeb, title, basename)
  131.             
  132.             oeb.container = DirContainer(htmlpath, oeb.log)
  133.         
  134.         return oeb
  135.  
  136.     
  137.     def _create_html_root(self, hhcpath, log):
  138.         hhcdata = self._read_file(hhcpath)
  139.         hhcroot = html.fromstring(hhcdata)
  140.         chapters = self._process_nodes(hhcroot)
  141.         log.debug('Found %d section nodes' % len(chapters))
  142.         htmlpath = os.path.splitext(hhcpath)[0] + '.html'
  143.         f = open(htmlpath, 'wb')
  144.         if chapters:
  145.             f.write('<html><head><meta http-equiv="Content-type" content="text/html;charset=UTF-8" /></head><body>\n')
  146.             path0 = chapters[0][1]
  147.             subpath = os.path.dirname(path0)
  148.             for chapter in chapters:
  149.                 title = chapter[0]
  150.                 rsrcname = os.path.basename(chapter[1])
  151.                 rsrcpath = os.path.join(subpath, rsrcname)
  152.                 url = '<br /><a href=' + rsrcpath + '>' + title + ' </a>\n'
  153.                 if isinstance(url, unicode):
  154.                     url = url.encode('utf-8')
  155.                 
  156.                 f.write(url)
  157.             
  158.             f.write('</body></html>')
  159.         else:
  160.             f.write(hhcdata)
  161.         f.close()
  162.         return htmlpath
  163.  
  164.     
  165.     def _read_file(self, name):
  166.         f = open(name, 'rb')
  167.         data = f.read()
  168.         f.close()
  169.         return data
  170.  
  171.     
  172.     def _visit_node(self, node, chapters, depth):
  173.         if isinstance(node.tag, basestring):
  174.             match_string = match_string
  175.             import calibre.ebooks.chm.reader
  176.             chapter_path = None
  177.             if match_string(node.tag, 'object') and match_string(node.attrib['type'], 'text/sitemap'):
  178.                 chapter_title = None
  179.                 for child in node:
  180.                     if match_string(child.tag, 'param') and match_string(child.attrib['name'], 'name'):
  181.                         chapter_title = child.attrib['value']
  182.                     
  183.                     if match_string(child.tag, 'param') and match_string(child.attrib['name'], 'local'):
  184.                         chapter_path = child.attrib['value']
  185.                         continue
  186.                 
  187.                 if chapter_title is not None and chapter_path is not None:
  188.                     chapter = [
  189.                         chapter_title,
  190.                         chapter_path,
  191.                         depth]
  192.                     chapters.append(chapter)
  193.                 
  194.             
  195.             if node.tag == 'UL':
  196.                 depth = depth + 1
  197.             
  198.             if node.tag == '/UL':
  199.                 depth = depth - 1
  200.             
  201.         
  202.  
  203.     
  204.     def _process_nodes(self, root):
  205.         chapters = []
  206.         depth = 0
  207.         for node in root.iter():
  208.             self._visit_node(node, chapters, depth)
  209.         
  210.         return chapters
  211.  
  212.     
  213.     def _add_item(self, oeb, title, path):
  214.         bname = os.path.basename(path)
  215.         (id, href) = oeb.manifest.generate(id = 'html', href = ascii_filename(bname))
  216.         item = oeb.manifest.add(id, href, 'text/html')
  217.         item.html_input_href = bname
  218.         oeb.spine.add(item, True)
  219.         oeb.toc.add(title, item.href)
  220.  
  221.  
  222.