home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_1012 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  6.8 KB  |  224 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. from __future__ import with_statement
  5. __license__ = 'GPL v3'
  6. __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
  7. __docformat__ = 'restructuredtext en'
  8. import re
  9. from lxml import etree
  10. from urlparse import urlparse
  11. from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
  12. from calibre.ebooks import ConversionError
  13.  
  14. def XPath(x):
  15.     
  16.     try:
  17.         return etree.XPath(x, namespaces = XPNSMAP)
  18.     except etree.XPathSyntaxError:
  19.         raise ConversionError('The syntax of the XPath expression %s is invalid.' % repr(x))
  20.  
  21.  
  22.  
  23. class DetectStructure(object):
  24.     
  25.     def __call__(self, oeb, opts):
  26.         self.log = oeb.log
  27.         self.oeb = oeb
  28.         self.opts = opts
  29.         self.log('Detecting structure...')
  30.         self.detect_chapters()
  31.         if self.oeb.auto_generated_toc or opts.use_auto_toc:
  32.             orig_toc = self.oeb.toc
  33.             self.oeb.toc = TOC()
  34.             self.create_level_based_toc()
  35.             if self.oeb.toc.count() < 1:
  36.                 if not (opts.no_chapters_in_toc) and self.detected_chapters:
  37.                     self.create_toc_from_chapters()
  38.                 
  39.                 if self.oeb.toc.count() < opts.toc_threshold:
  40.                     self.create_toc_from_links()
  41.                 
  42.             
  43.             if self.oeb.toc.count() < 2 and orig_toc.count() > 2:
  44.                 self.oeb.toc = orig_toc
  45.             else:
  46.                 self.oeb.auto_generated_toc = True
  47.                 self.log('Auto generated TOC with %d entries.' % self.oeb.toc.count())
  48.         
  49.         if opts.toc_filter is not None:
  50.             regexp = re.compile(opts.toc_filter)
  51.             for node in list(self.oeb.toc.iter()):
  52.                 if not (node.title) or regexp.search(node.title) is not None:
  53.                     None(self.log, 'Filtering' if node.title else 'empty node', 'from TOC')
  54.                     self.oeb.toc.remove(node)
  55.                     continue
  56.             
  57.         
  58.         if opts.page_breaks_before is not None:
  59.             pb_xpath = XPath(opts.page_breaks_before)
  60.             for item in oeb.spine:
  61.                 for elem in pb_xpath(item.data):
  62.                     style = elem.get('style', '')
  63.                     if style:
  64.                         style += '; '
  65.                     
  66.                     elem.set('style', style + 'page-break-before:always')
  67.                 
  68.             
  69.         
  70.         for node in self.oeb.toc.iter():
  71.             if not (node.title) or not node.title.strip():
  72.                 node.title = _('Unnamed')
  73.                 continue
  74.         
  75.  
  76.     
  77.     def detect_chapters(self):
  78.         self.detected_chapters = []
  79.         if self.opts.chapter:
  80.             chapter_xpath = XPath(self.opts.chapter)
  81.             for item in self.oeb.spine:
  82.                 for x in chapter_xpath(item.data):
  83.                     self.detected_chapters.append((item, x))
  84.                 
  85.             
  86.             chapter_mark = self.opts.chapter_mark
  87.             page_break_before = 'display: block; page-break-before: always'
  88.             page_break_after = 'display: block; page-break-after: always'
  89.             for item, elem in self.detected_chapters:
  90.                 text = xml2text(elem).strip()
  91.                 self.log('\tDetected chapter:', text[:50])
  92.                 if chapter_mark == 'none':
  93.                     continue
  94.                 elif chapter_mark == 'rule':
  95.                     mark = etree.Element(XHTML('hr'))
  96.                 elif chapter_mark == 'pagebreak':
  97.                     mark = etree.Element(XHTML('div'), style = page_break_after)
  98.                 else:
  99.                     mark = etree.Element(XHTML('hr'), style = page_break_before)
  100.                 
  101.                 try:
  102.                     elem.addprevious(mark)
  103.                 continue
  104.                 except TypeError:
  105.                     self.log.exception('Failed to mark chapter')
  106.                     continue
  107.                 
  108.  
  109.             
  110.         
  111.  
  112.     
  113.     def create_level_based_toc(self):
  114.         if self.opts.level1_toc is None:
  115.             return None
  116.         for item in self.oeb.spine:
  117.             self.add_leveled_toc_items(item)
  118.         
  119.  
  120.     
  121.     def create_toc_from_chapters(self):
  122.         counter = self.oeb.toc.next_play_order()
  123.         for item, elem in self.detected_chapters:
  124.             (text, href) = self.elem_to_link(item, elem, counter)
  125.             self.oeb.toc.add(text, href, play_order = counter)
  126.             counter += 1
  127.         
  128.  
  129.     
  130.     def create_toc_from_links(self):
  131.         num = 0
  132.         for item in self.oeb.spine:
  133.             for a in XPath('//h:a[@href]')(item.data):
  134.                 href = a.get('href')
  135.                 purl = urlparse(href)
  136.                 if not purl[0] or purl[0] == 'file':
  137.                     href = purl.path
  138.                     frag = purl.fragment
  139.                     href = item.abshref(href)
  140.                     if frag:
  141.                         href = '#'.join((href, frag))
  142.                     
  143.                     if not self.oeb.toc.has_href(href):
  144.                         text = xml2text(a)
  145.                         text = text[:100].strip()
  146.                         if not self.oeb.toc.has_text(text):
  147.                             num += 1
  148.                             self.oeb.toc.add(text, href, play_order = self.oeb.toc.next_play_order())
  149.                             if self.opts.max_toc_links > 0 and num >= self.opts.max_toc_links:
  150.                                 self.log('Maximum TOC links reached, stopping.')
  151.                                 return None
  152.                         
  153.                     
  154.                 self.oeb.toc.has_href(href)
  155.             
  156.         
  157.  
  158.     
  159.     def elem_to_link(self, item, elem, counter):
  160.         text = xml2text(elem)
  161.         text = text[:100].strip()
  162.         id = elem.get('id', 'calibre_toc_%d' % counter)
  163.         elem.set('id', id)
  164.         href = '#'.join((item.href, id))
  165.         return (text, href)
  166.  
  167.     
  168.     def add_leveled_toc_items(self, item):
  169.         level1 = XPath(self.opts.level1_toc)(item.data)
  170.         level1_order = []
  171.         document = item
  172.         counter = 1
  173.         if level1:
  174.             added = { }
  175.             for elem in level1:
  176.                 (text, _href) = self.elem_to_link(document, elem, counter)
  177.                 counter += 1
  178.                 if text:
  179.                     node = self.oeb.toc.add(text, _href, play_order = self.oeb.toc.next_play_order())
  180.                     level1_order.append(node)
  181.                     added[elem] = node
  182.                     continue
  183.             
  184.             if self.opts.level2_toc is not None:
  185.                 added2 = { }
  186.                 level2 = list(XPath(self.opts.level2_toc)(document.data))
  187.                 for elem in level2:
  188.                     level1 = None
  189.                     for item in document.data.iterdescendants():
  190.                         if item in added.keys():
  191.                             level1 = added[item]
  192.                             continue
  193.                         if item == elem and level1 is not None:
  194.                             (text, _href) = self.elem_to_link(document, elem, counter)
  195.                             counter += 1
  196.                             if text:
  197.                                 added2[elem] = level1.add(text, _href, play_order = self.oeb.toc.next_play_order())
  198.                             
  199.                         text
  200.                     
  201.                 
  202.                 if self.opts.level3_toc is not None:
  203.                     level3 = list(XPath(self.opts.level3_toc)(document.data))
  204.                     for elem in level3:
  205.                         level2 = None
  206.                         for item in document.data.iterdescendants():
  207.                             if item in added2.keys():
  208.                                 level2 = added2[item]
  209.                                 continue
  210.                             if item == elem and level2 is not None:
  211.                                 (text, _href) = self.elem_to_link(document, elem, counter)
  212.                                 counter += 1
  213.                                 if text:
  214.                                     level2.add(text, _href, play_order = self.oeb.toc.next_play_order())
  215.                                 
  216.                             text
  217.                         
  218.                     
  219.                 
  220.             
  221.         
  222.  
  223.  
  224.