home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_995 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  13.3 KB  |  346 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. from __future__ import with_statement
  5. __license__ = 'GPL v3'
  6. __copyright__ = '2008 Kovid Goyal <kovid at kovidgoyal.net>'
  7. import re
  8. import os
  9. import math
  10. from cStringIO import StringIO
  11. from PyQt4.Qt import QFontDatabase
  12. from calibre.customize.ui import available_input_formats
  13. from calibre.ebooks.metadata.opf2 import OPF
  14. from calibre.ptempfile import TemporaryDirectory
  15. from calibre.ebooks.chardet import xml_to_unicode
  16. from calibre.utils.zipfile import safe_replace, ZipFile
  17. from calibre.utils.config import DynamicConfig
  18. from calibre.utils.logging import Log
  19. from calibre import guess_type, prints
  20. from calibre.ebooks.oeb.transforms.cover import CoverManager
  21. TITLEPAGE = CoverManager.SVG_TEMPLATE.decode('utf-8').replace('__ar__', 'none').replace('__viewbox__', '0 0 600 800').replace('__width__', '600').replace('__height__', '800')
  22.  
  23. def character_count(html):
  24.     count = 0
  25.     strip_space = re.compile('\\s+')
  26.     for match in re.finditer('>[^<]+<', html):
  27.         count += len(strip_space.sub(' ', match.group())) - 2
  28.     
  29.     return count
  30.  
  31.  
  32. class UnsupportedFormatError(Exception):
  33.     
  34.     def __init__(self, fmt):
  35.         Exception.__init__(self, _('%s format books are not supported') % fmt.upper())
  36.  
  37.  
  38.  
  39. class SpineItem(unicode):
  40.     
  41.     def __new__(cls, path, mime_type = None):
  42.         ppath = path.partition('#')[0]
  43.         if not os.path.exists(path) and os.path.exists(ppath):
  44.             path = ppath
  45.         
  46.         obj = super(SpineItem, cls).__new__(cls, path)
  47.         raw = open(path, 'rb').read()
  48.         (raw, obj.encoding) = xml_to_unicode(raw)
  49.         obj.character_count = character_count(raw)
  50.         obj.start_page = -1
  51.         obj.pages = -1
  52.         obj.max_page = -1
  53.         if mime_type is None:
  54.             mime_type = guess_type(obj)[0]
  55.         
  56.         obj.mime_type = mime_type
  57.         return obj
  58.  
  59.  
  60.  
  61. class FakeOpts(object):
  62.     verbose = 0
  63.     breadth_first = False
  64.     max_levels = 5
  65.     input_encoding = None
  66.  
  67.  
  68. def is_supported(path):
  69.     ext = os.path.splitext(path)[1].replace('.', '').lower()
  70.     ext = re.sub('(x{0,1})htm(l{0,1})', 'html', ext)
  71.     return ext in available_input_formats()
  72.  
  73.  
  74. def write_oebbook(oeb, path):
  75.     OEBWriter = OEBWriter
  76.     import calibre.ebooks.oeb.writer
  77.     walk = walk
  78.     import calibre
  79.     w = OEBWriter()
  80.     w(oeb, path)
  81.     for f in walk(path):
  82.         if f.endswith('.opf'):
  83.             return f
  84.     
  85.  
  86.  
  87. class EbookIterator(object):
  88.     CHARACTERS_PER_PAGE = 1000
  89.     
  90.     def __init__(self, pathtoebook, log = None):
  91.         self.log = log
  92.         if log is None:
  93.             self.log = Log()
  94.         
  95.         pathtoebook = pathtoebook.strip()
  96.         self.pathtoebook = os.path.abspath(pathtoebook)
  97.         self.config = DynamicConfig(name = 'iterator')
  98.         ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
  99.         ext = re.sub('(x{0,1})htm(l{0,1})', 'html', ext)
  100.         self.ebook_ext = ext
  101.  
  102.     
  103.     def search(self, text, index, backwards = False):
  104.         text = text.lower()
  105.         for i, path in enumerate(self.spine):
  106.             if (backwards or i < index or not backwards) and i > index:
  107.                 if text in open(path, 'rb').read().decode(path.encoding).lower():
  108.                     return i
  109.                 continue
  110.             text in open(path, 'rb').read().decode(path.encoding).lower()
  111.         
  112.  
  113.     
  114.     def find_missing_css_files(self):
  115.         for x in os.walk(os.path.dirname(self.pathtoopf)):
  116.             for f in x[-1]:
  117.                 if f.endswith('.css'):
  118.                     yield os.path.join(x[0], f)
  119.                     continue
  120.             
  121.         
  122.  
  123.     
  124.     def find_declared_css_files(self):
  125.         for item in self.opf.manifest:
  126.             if item.mime_type and 'css' in item.mime_type.lower():
  127.                 yield item.path
  128.                 continue
  129.         
  130.  
  131.     
  132.     def find_embedded_fonts(self):
  133.         css_files = set(self.find_declared_css_files())
  134.         if not css_files:
  135.             css_files = set(self.find_missing_css_files())
  136.         
  137.         bad_map = { }
  138.         font_family_pat = re.compile('font-family\\s*:\\s*([^;]+)')
  139.         for csspath in css_files:
  140.             
  141.             try:
  142.                 css = open(csspath, 'rb').read().decode('utf-8', 'replace')
  143.             except:
  144.                 continue
  145.  
  146.             for match in re.compile('@font-face\\s*{([^}]+)}').finditer(css):
  147.                 block = match.group(1)
  148.                 family = font_family_pat.search(block)
  149.                 url = re.compile('url\\s*\\([\\\'"]*(.+?)[\\\'"]*\\)', re.DOTALL).search(block)
  150.                 if url:
  151.                     path = url.group(1).split('/')
  152.                     path = os.path.join(os.path.dirname(csspath), *path)
  153.                     if not os.access(path, os.R_OK):
  154.                         continue
  155.                     
  156.                     id = QFontDatabase.addApplicationFont(path)
  157.                     if id != -1:
  158.                         families = [ unicode(f) for f in QFontDatabase.applicationFontFamilies(id) ]
  159.                         if family:
  160.                             family = family.group(1)
  161.                             specified_families = [ x.strip().replace('"', '').replace("'", '') for x in family.split(',') ]
  162.                             aliasing_ok = False
  163.                             for f in specified_families:
  164.                                 bad_map[f] = families[0]
  165.                                 if not aliasing_ok and f in families:
  166.                                     aliasing_ok = True
  167.                                     continue
  168.                                 []
  169.                             
  170.                             if not aliasing_ok:
  171.                                 prints('WARNING: Family aliasing not fully supported.')
  172.                                 prints('\tDeclared family: %r not in actual families: %r' % (family, families))
  173.                             else:
  174.                                 prints('Loaded embedded font:', repr(family))
  175.                         
  176.                     
  177.                 id != -1
  178.             
  179.         
  180.  
  181.     
  182.     def __enter__(self, processed = False, only_input_plugin = False):
  183.         self.delete_on_exit = []
  184.         self._tdir = TemporaryDirectory('_ebook_iter')
  185.         self.base = self._tdir.__enter__()
  186.         Plumber = Plumber
  187.         create_oebbook = create_oebbook
  188.         import calibre.ebooks.conversion.plumber
  189.         plumber = Plumber(self.pathtoebook, self.base, self.log)
  190.         plumber.setup_options()
  191.         if self.pathtoebook.lower().endswith('.opf'):
  192.             plumber.opts.dont_package = True
  193.         
  194.         if hasattr(plumber.opts, 'no_process'):
  195.             plumber.opts.no_process = True
  196.         
  197.         plumber.input_plugin.for_viewer = True
  198.         plumber.input_plugin.__enter__()
  199.         
  200.         try:
  201.             self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'), plumber.opts, plumber.input_fmt, self.log, { }, self.base)
  202.         finally:
  203.             pass
  204.  
  205.         if hasattr(self.pathtoopf, 'manifest'):
  206.             self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
  207.         
  208.         self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
  209.         if self.opf is None:
  210.             self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
  211.         
  212.         self.language = self.opf.language
  213.         if self.language:
  214.             self.language = self.language.lower()
  215.         
  216.         ordered = [] + _[2]
  217.         self.spine = []
  218.         for i in ordered:
  219.             spath = i.path
  220.             mt = None
  221.             
  222.             try:
  223.                 self.spine.append(SpineItem(spath, mime_type = mt))
  224.             continue
  225.             self.log.warn('Missing spine item:', repr(spath))
  226.             continue
  227.  
  228.         
  229.         cover = self.opf.cover
  230.         if self.ebook_ext in ('lit', 'mobi', 'prc', 'opf') and cover:
  231.             cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
  232.             chtml = (TITLEPAGE % os.path.relpath(cover, self.base).replace(os.sep, '/')).encode('utf-8')
  233.             open(cfile, 'wb').write(chtml)
  234.             self.spine[0:0] = [
  235.                 SpineItem(cfile, mime_type = 'application/xhtml+xml')]
  236.             self.delete_on_exit.append(cfile)
  237.         
  238.         if self.opf.path_to_html_toc is not None and self.opf.path_to_html_toc not in self.spine:
  239.             
  240.             try:
  241.                 self.spine.append(SpineItem(self.opf.path_to_html_toc))
  242.             import traceback
  243.             traceback.print_exc()
  244.  
  245.         
  246.         sizes = [ i.character_count for i in self.spine ]
  247.         self.pages = [ math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes ]
  248.         for p, s in zip(self.pages, self.spine):
  249.             s.pages = p
  250.         
  251.         start = 1
  252.         for s in self.spine:
  253.             s.start_page = start
  254.             start += s.pages
  255.             s.max_page = s.start_page + s.pages - 1
  256.         
  257.         self.toc = self.opf.toc
  258.         self.find_embedded_fonts()
  259.         self.read_bookmarks()
  260.         return self
  261.  
  262.     
  263.     def parse_bookmarks(self, raw):
  264.         for line in raw.splitlines():
  265.             if line.count('^') > 0:
  266.                 tokens = line.rpartition('^')
  267.                 title = tokens[0]
  268.                 ref = tokens[2]
  269.                 self.bookmarks.append((title, ref))
  270.                 continue
  271.         
  272.  
  273.     
  274.     def serialize_bookmarks(self, bookmarks):
  275.         dat = []
  276.         for title, bm in bookmarks:
  277.             dat.append(u'%s^%s' % (title, bm))
  278.         
  279.         return (u'\n'.join(dat) + '\n').encode('utf-8')
  280.  
  281.     
  282.     def read_bookmarks(self):
  283.         self.bookmarks = []
  284.         bmfile = os.path.join(self.base, 'META-INF', 'calibre_bookmarks.txt')
  285.         raw = ''
  286.         if os.path.exists(bmfile):
  287.             raw = open(bmfile, 'rb').read().decode('utf-8')
  288.         else:
  289.             saved = self.config['bookmarks_' + self.pathtoebook]
  290.             if saved:
  291.                 raw = saved
  292.             
  293.         self.parse_bookmarks(raw)
  294.  
  295.     
  296.     def save_bookmarks(self, bookmarks = None):
  297.         if bookmarks is None:
  298.             bookmarks = self.bookmarks
  299.         
  300.         dat = self.serialize_bookmarks(bookmarks)
  301.         if os.path.splitext(self.pathtoebook)[1].lower() == '.epub' and os.access(self.pathtoebook, os.R_OK):
  302.             
  303.             try:
  304.                 zf = open(self.pathtoebook, 'r+b')
  305.             except IOError:
  306.                 return None
  307.  
  308.             zipf = ZipFile(zf, mode = 'a')
  309.             for name in zipf.namelist():
  310.                 if name == 'META-INF/calibre_bookmarks.txt':
  311.                     safe_replace(zf, 'META-INF/calibre_bookmarks.txt', StringIO(dat))
  312.                     return None
  313.             
  314.             zipf.writestr('META-INF/calibre_bookmarks.txt', dat)
  315.         else:
  316.             self.config['bookmarks_' + self.pathtoebook] = dat
  317.  
  318.     
  319.     def add_bookmark(self, bm):
  320.         dups = []
  321.         for x in self.bookmarks:
  322.             if x[0] == bm[0]:
  323.                 dups.append(x)
  324.                 continue
  325.         
  326.         for x in dups:
  327.             self.bookmarks.remove(x)
  328.         
  329.         self.bookmarks.append(bm)
  330.         self.save_bookmarks()
  331.  
  332.     
  333.     def set_bookmarks(self, bookmarks):
  334.         self.bookmarks = bookmarks
  335.  
  336.     
  337.     def __exit__(self, *args):
  338.         self._tdir.__exit__(*args)
  339.         for x in self.delete_on_exit:
  340.             if os.path.exists(x):
  341.                 os.remove(x)
  342.                 continue
  343.         
  344.  
  345.  
  346.