home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- __license__ = 'GPL 3'
- __copyright__ = '2009, John Schember <john@nachtimwald.com>'
- __docformat__ = 'restructuredtext en'
- import os
- import struct
- import zlib
- from urllib import unquote as urlunquote
- from calibre import CurrentDir
- from calibre.ebooks.rb import HEADER
- from calibre.ebooks.rb import RocketBookError
- from calibre.ebooks.metadata.rb import get_metadata
- from calibre.ebooks.metadata.opf2 import OPFCreator
-
- class RBToc(list):
-
- class Item(object):
-
- def __init__(self, name = '', size = 0, offset = 0, flags = 0):
- self.name = name
- self.size = size
- self.offset = offset
- self.flags = flags
-
-
-
-
- class Reader(object):
-
- def __init__(self, stream, log, encoding = None):
- self.stream = stream
- self.log = log
- self.encoding = encoding
- self.verify_file()
- self.mi = get_metadata(self.stream)
- self.toc = self.get_toc()
-
-
- def read_i32(self):
- return struct.unpack('<I', self.stream.read(4))[0]
-
-
- def verify_file(self):
- self.stream.seek(0)
- if self.stream.read(14) != HEADER:
- raise RocketBookError('Could not read file: %s. Does not contain a valid RocketBook Header.' % self.stream.name)
- self.stream.read(14) != HEADER
- self.stream.seek(28)
- size = self.read_i32()
- self.stream.seek(0, os.SEEK_END)
- real_size = self.stream.tell()
- if size != real_size:
- raise RocketBookError('File is corrupt. The file size recorded in the header does not match the actual file size.')
- size != real_size
-
-
- def get_toc(self):
- self.stream.seek(24)
- toc_offset = self.read_i32()
- self.stream.seek(toc_offset)
- pages = self.read_i32()
- toc = RBToc()
- for i in range(pages):
- name = urlunquote(self.stream.read(32).strip('\x00'))
- size = self.read_i32()
- offset = self.read_i32()
- flags = self.read_i32()
- toc.append(RBToc.Item(name = name, size = size, offset = offset, flags = flags))
-
- return toc
-
-
- def get_text(self, toc_item, output_dir):
- if toc_item.flags in (1, 2):
- return None
- output = u''
- self.stream.seek(toc_item.offset)
- if toc_item.flags == 8:
- count = self.read_i32()
- self.read_i32()
- chunck_sizes = []
- for i in range(count):
- chunck_sizes.append(self.read_i32())
-
- for size in chunck_sizes:
- cm_chunck = self.stream.read(size)
- (toc_item.flags in (1, 2)) += output(zlib.decompress(cm_chunck).decode if self.encoding is None else self.encoding, 'replace')
-
- elif self.encoding is None:
- pass
-
- output += self.stream.read(toc_item.size).decode(self.encoding, 'replace')
-
- try:
- html = _[1]
- html.write(output.encode('utf-8'))
- finally:
- pass
-
-
-
- def get_image(self, toc_item, output_dir):
- if toc_item.flags != 0:
- return None
- self.stream.seek(toc_item.offset)
- data = self.stream.read(toc_item.size)
-
- try:
- img = _[1]
- img.write(data)
- finally:
- pass
-
-
-
- def extract_content(self, output_dir):
- self.log.debug('Extracting content from file...')
- html = []
- images = []
- for item in self.toc:
- if item.name.lower().endswith('html'):
- self.log.debug('HTML item %s found...' % item.name)
- html.append(item.name)
- self.get_text(item, output_dir)
-
- if item.name.lower().endswith('png'):
- self.log.debug('PNG item %s found...' % item.name)
- images.append(item.name)
- self.get_image(item, output_dir)
- continue
-
- opf_path = self.create_opf(output_dir, html, images)
- return opf_path
-
-
- def create_opf(self, output_dir, pages, images):
- CurrentDir(output_dir).__enter__()
-
- try:
- opf = OPFCreator(output_dir, self.mi)
- manifest = []
- for page in pages + images:
- manifest.append((page, None))
-
- opf.create_manifest(manifest)
- opf.create_spine(pages)
-
- try:
- opffile = _[1]
- opf.render(opffile)
- finally:
- pass
-
- finally:
- pass
-
- return os.path.join(output_dir, 'metadata.opf')
-
-
-