home *** CD-ROM | disk | FTP | other *** search
Wrap
# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam> and Kovid Goyal <kovid@kovidgoyal.net>' from collections import defaultdict from itertools import count from itertools import izip import random import re from struct import pack import time from urlparse import urldefrag from PIL import Image from cStringIO import StringIO from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.mobiml import MBP_NS from calibre.ebooks.oeb.base import OEB_DOCS from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES from calibre.ebooks.oeb.base import XHTML from calibre.ebooks.oeb.base import XHTML_NS from calibre.ebooks.oeb.base import XML_NS from calibre.ebooks.oeb.base import namespace from calibre.ebooks.oeb.base import prefixname from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.compression.palmdoc import compress_doc INDEXING = True FCIS_FLIS = True WRITE_PBREAKS = True EXTH_CODES = { 'creator': 100, 'publisher': 101, 'description': 103, 'identifier': 104, 'subject': 105, 'pubdate': 106, 'date': 106, 'review': 107, 'contributor': 108, 'rights': 109, 'type': 111, 'source': 112, 'title': 503 } RECORD_SIZE = 4096 UNCOMPRESSED = 1 PALMDOC = 2 HUFFDIC = 17480 PALM_MAX_IMAGE_SIZE = 63 * 1024 OTHER_MAX_IMAGE_SIZE = 10 * 1024 * 1024 MAX_THUMB_SIZE = 16 * 1024 MAX_THUMB_DIMEN = (180, 240) TAGX = { 'chapter': '\x00\x00\x00\x01\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x00\x00\x00\x01', 'subchapter': '\x00\x00\x00\x01\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x05\x01\x10\x00\x15\x01\x10\x00\x16\x01 \x00\x17\x01@\x00\x00\x00\x00\x01', 'periodical': '\x00\x00\x00\x02\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x05\x01\x10\x00\x15\x01 \x00\x16\x01@\x00\x17\x01\x80\x00\x00\x00\x00\x01E\x01\x01\x00F\x01\x02\x00G\x01\x04\x00\x00\x00\x00\x01', 'secondary_book': '\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00\x00\x01', 'secondary_periodical': '\x00\x00\x00\x01\x01\x01\x01\x00\x0b\x03\x02\x00\x00\x00\x00\x01' } INDXT = { 'chapter': '\x0f', 'subchapter': '\x1f', 'article': '?', 'chapter with subchapters': 'o', 'periodical': '\xdf', 'section': '\xff' } def encode(data): return data.encode('utf-8') DECINT_FORWARD = 0 DECINT_BACKWARD = 1 def decint(value, direction): bytes = [] while True: b = value & 127 value >>= 7 bytes.append(b) if value == 0: break continue if direction == DECINT_FORWARD: bytes[0] |= 128 elif direction == DECINT_BACKWARD: bytes[-1] |= 128 return ''.join((lambda .0: for b in .0: chr(b))(reversed(bytes))) def align_block(raw, multiple = 4, pad = '\x00'): extra = len(raw) % multiple if extra == 0: return raw return raw + pad * (multiple - extra) def rescale_image(data, maxsizeb, dimen = None): image = Image.open(StringIO(data)) format = image.format changed = False if image.format not in ('JPEG', 'GIF'): (width, height) = image.size area = width * height if area <= 40000: format = 'GIF' else: image = image.convert('RGBA') format = 'JPEG' changed = True if dimen is not None: image.thumbnail(dimen, Image.ANTIALIAS) changed = True if changed: data = StringIO() image.save(data, format) data = data.getvalue() if len(data) <= maxsizeb: return data image = image.convert('RGBA') for quality in xrange(95, -1, -1): data = StringIO() image.save(data, 'JPEG', quality = quality) data = data.getvalue() if len(data) <= maxsizeb: return data (width, height) = image.size for scale in xrange(99, 0, -1): scale = scale / 100 data = StringIO() scaled = image.copy() size = (int(width * scale), height * scale) scaled.thumbnail(size, Image.ANTIALIAS) scaled.save(data, 'JPEG', quality = 0) data = data.getvalue() if len(data) <= maxsizeb: return data return data class Serializer(object): NSRMAP = { '': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp' } def __init__(self, oeb, images, write_page_breaks_after_item = True): self.oeb = oeb self.images = images self.logger = oeb.logger self.write_page_breaks_after_item = write_page_breaks_after_item self.id_offsets = { } self.href_offsets = defaultdict(list) self.breaks = [] buffer = self.buffer = StringIO() buffer.write('<html>') self.serialize_head() self.serialize_body() buffer.write('</html>') self.fixup_links() self.text = buffer.getvalue() def serialize_head(self): buffer = self.buffer buffer.write('<head>') if len(self.oeb.guide) > 0: self.serialize_guide() buffer.write('</head>') def serialize_guide(self): buffer = self.buffer hrefs = self.oeb.manifest.hrefs buffer.write('<guide>') for ref in self.oeb.guide.values(): path = urldefrag(ref.href)[0] if hrefs[path].media_type not in OEB_DOCS: continue buffer.write('<reference type="') if ref.type.startswith('other.'): self.serialize_text(ref.type.replace('other.', ''), quot = True) else: self.serialize_text(ref.type, quot = True) buffer.write('" ') if ref.title is not None: buffer.write('title="') self.serialize_text(ref.title, quot = True) buffer.write('" ') self.serialize_href(ref.href) buffer.write(' />') buffer.write('</guide>') def serialize_href(self, href, base = None): hrefs = self.oeb.manifest.hrefs (path, frag) = urldefrag(urlnormalize(href)) if path and base: path = base.abshref(path) if path and path not in hrefs: return False buffer = self.buffer item = path not in hrefs if path else None if item and item.spine_position is None: return False path = item.spine_position is None if item else base.href href = None if frag else path buffer.write('filepos=') self.href_offsets[href].append(buffer.tell()) buffer.write('0000000000') return True def serialize_body(self): buffer = self.buffer self.anchor_offset = buffer.tell() buffer.write('<body>') self.anchor_offset_kindle = buffer.tell() if 'text' in self.oeb.guide: href = self.oeb.guide['text'].href buffer.write('<a ') self.serialize_href(href) buffer.write(' />') spine = _[1] [](_[2]) for item in spine: self.serialize_item(item) buffer.write('</body>') def serialize_item(self, item): buffer = self.buffer if not item.linear: self.breaks.append(buffer.tell() - 1) self.id_offsets[urlnormalize(item.href)] = buffer.tell() buffer.write('<div>') for elem in item.data.find(XHTML('body')): self.serialize_elem(elem, item) buffer.write('<div></div>') if self.write_page_breaks_after_item: buffer.write('<mbp:pagebreak/>') buffer.write('</div>') def serialize_elem(self, elem, item, nsrmap = NSRMAP): buffer = self.buffer if not isinstance(elem.tag, basestring) or namespace(elem.tag) not in nsrmap: return None tag = prefixname(elem.tag, nsrmap) id = elem.attrib.pop('id', None) if id is not None: href = '#'.join((item.href, id)) if not self.anchor_offset: pass offset = buffer.tell() self.id_offsets[urlnormalize(href)] = offset if self.anchor_offset is not None and tag == 'a' and not (elem.attrib) and not len(elem) and not (elem.text): return None self.anchor_offset = buffer.tell() buffer.write('<') buffer.write(tag) if elem.attrib: for attr, val in elem.attrib.items(): if namespace(attr) not in nsrmap: continue attr = prefixname(attr, nsrmap) buffer.write(' ') if attr == 'href': if self.serialize_href(val, item): continue elif attr == 'src': href = urlnormalize(item.abshref(val)) if href in self.images: index = self.images[href] buffer.write('recindex="%05d"' % index) continue buffer.write(attr) buffer.write('="') self.serialize_text(val, quot = True) buffer.write('"') if elem.text or len(elem) > 0: buffer.write('>') if elem.text: self.anchor_offset = None self.serialize_text(elem.text) for child in elem: self.serialize_elem(child, item) if child.tail: self.anchor_offset = None self.serialize_text(child.tail) continue buffer.write('</%s>' % tag) else: buffer.write('/>') def serialize_text(self, text, quot = False): text = text.replace('&', '&') text = text.replace('<', '<') text = text.replace('>', '>') text = text.replace(u'­', '') if quot: text = text.replace('"', '"') self.buffer.write(encode(text)) def fixup_links(self): buffer = self.buffer id_offsets = self.id_offsets for href, hoffs in self.href_offsets.items(): if href not in id_offsets: self.logger.warn('Hyperlink target %r not found' % href) (href, _) = urldefrag(href) ioff = self.id_offsets[href] for hoff in hoffs: buffer.seek(hoff) buffer.write('%010d' % ioff) class MobiWriter(object): COLLAPSE_RE = re.compile('[ \\t\\r\\n\\v]+') def __init__(self, opts, compression = PALMDOC, imagemax = None, prefer_author_sort = False, write_page_breaks_after_item = True): self.opts = opts self.write_page_breaks_after_item = write_page_breaks_after_item if not compression: pass self._compression = UNCOMPRESSED if not imagemax: pass self._imagemax = OTHER_MAX_IMAGE_SIZE self._prefer_author_sort = prefer_author_sort self._primary_index_record = None self._conforming_periodical_toc = False self._indexable = False self._ctoc = '' self._ctoc_records = [] self._ctoc_offset = 0 self._ctoc_largest = 0 self._HTMLRecords = [] self._tbSequence = '' self._MobiDoc = None self._anchor_offset_kindle = 0 self._initialIndexRecordFound = False self._firstSectionConcluded = False self._currentSectionIndex = 0 def generate(cls, opts): imagemax = None if opts.rescale_images else None prefer_author_sort = opts.prefer_author_sort return cls(compression = PALMDOC, imagemax = imagemax, prefer_author_sort = prefer_author_sort) generate = classmethod(generate) def __call__(self, oeb, path): if hasattr(path, 'write'): return self._dump_stream(oeb, path) try: stream = _[1] return self._dump_stream(oeb, stream) finally: pass def _write(self, *data): for datum in data: self._stream.write(datum) def _tell(self): return self._stream.tell() def _dump_stream(self, oeb, stream): self._oeb = oeb self._stream = stream self._records = [ None] self._generate_content() self._generate_record0() self._write_header() self._write_content() def _generate_content(self): self._map_image_names() self._generate_text() if INDEXING and self._indexable: try: self._generate_index() self._oeb.log.exception('Failed to generate index') self._generate_images() def _map_image_names(self): index = 1 self._images = images = { } mh_href = None if 'masthead' in self._oeb.guide: mh_href = self._oeb.guide['masthead'].href images[mh_href] = 1 index += 1 for item in self._oeb.manifest.values(): if item.media_type in OEB_RASTER_IMAGES: if item.href == mh_href: continue images[item.href] = index index += 1 continue def _read_text_record(self, text): pos = text.tell() text.seek(0, 2) npos = min((pos + RECORD_SIZE, text.tell())) last = '' while not last.decode('utf-8', 'ignore'): size = len(last) + 1 text.seek(npos - size) last = text.read(size) extra = 0 try: last.decode('utf-8') except UnicodeDecodeError: prev = len(last) while True: text.seek(npos - prev) last = text.read(len(last) + 1) try: last.decode('utf-8') except UnicodeDecodeError: continue break extra = len(last) - prev text.seek(pos) data = text.read(RECORD_SIZE) overlap = text.read(extra) text.seek(npos) return (data, overlap) def _generate_flat_indexed_navpoints(self): self._oeb.logger.info('Indexing flat navPoints ...') numberOfHTMLRecords = self._content_length // RECORD_SIZE + 1 x = numberOfHTMLRecords while x: self._HTMLRecords.append(HTMLRecordData()) x -= 1 toc = self._oeb.toc myIndex = 0 myEndingRecord = 0 previousOffset = 0 previousLength = 0 offset = 0 length = 0 entries = list(toc.iter())[1:] for i, child in enumerate(entries): if not (child.title) or not child.title.strip(): child.title = '(none)' if not (child.title) or not child.title.strip(): child.title = '(none)' h = child.href if h not in self._id_offsets: self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...' % child.title) return False offset = self._id_offsets[h] length = None for sibling in entries[i + 1:]: h2 = sibling.href if h2 in self._id_offsets: offset2 = self._id_offsets[h2] if offset2 > offset: length = offset2 - offset break offset2 > offset if length is None: length = self._content_length - offset if self.opts.verbose > 3: self._oeb.logger.info('child %03d: %s' % (i, child)) self._oeb.logger.info(' title: %s' % child.title) self._oeb.logger.info(' depth: %d' % child.depth()) self._oeb.logger.info(' offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X' % (offset, length, offset + length)) if i and child.depth() == 1 and entries[i - 1].depth() == 1: if offset != previousOffset + previousLength: self._oeb.log.warning('*** TOC discontinuity ***') self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % (i - 1, entries[i - 1].title, previousOffset, previousLength)) self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % (i, child.title, offset, previousOffset + previousLength)) self._oeb.log.warning('_generate_flat_indexed_navpoints: Failed to generate index') self._HTMLRecords = [] return False previousOffset = offset previousLength = length myStartingRecord = offset // RECORD_SIZE if self._HTMLRecords[myStartingRecord].openingNode == -1: self._HTMLRecords[myStartingRecord].openingNode = myIndex myEndingRecord = (offset + length) // RECORD_SIZE if myEndingRecord > myStartingRecord: interimSpanRecord = myStartingRecord + 1 while interimSpanRecord <= myEndingRecord: self._HTMLRecords[interimSpanRecord].continuingNode = myIndex self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1 interimSpanRecord += 1 continue None if self._HTMLRecords[myStartingRecord].currentSectionNodeCount == -1 else self._HTMLRecords[myStartingRecord] if self.opts.verbose > 3: None(self._oeb.logger.info % (' node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X', myIndex if child.title.strip() > '' else '(missing)', myStartingRecord, interimSpanRecord, offset, length)) elif self.opts.verbose > 3: None(self._oeb.logger.info % (' node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X', myIndex if child.title.strip() > '' else '(missing)', myStartingRecord, myStartingRecord, offset, length)) myIndex += 1 return True def _generate_indexed_navpoints(self): self._oeb.logger.info('Indexing navPoints ...') numberOfHTMLRecords = self._content_length // RECORD_SIZE + 1 x = numberOfHTMLRecords while x: self._HTMLRecords.append(HTMLRecordData()) x -= 1 toc = self._oeb.toc myIndex = 0 myEndingRecord = 0 previousOffset = 0 previousLength = 0 offset = 0 length = 0 sectionChangedInRecordNumber = -1 sectionChangesInThisRecord = False entries = list(toc.iter())[1:] for firstSequentialNode, node in enumerate(list(self._ctoc_map)): if node['klass'] != 'article' and node['klass'] != 'chapter': continue continue if self.opts.verbose > 3: self._oeb.logger.info('\tFirst sequential node: %03d' % firstSequentialNode) for i, child in enumerate(entries): h = child.href if h not in self._id_offsets: self._oeb.log.warning(' Could not find TOC entry "%s", aborting indexing ...' % child.title) return False offset = self._id_offsets[h] length = None for sibling in entries[i + 1:]: h2 = sibling.href if h2 in self._id_offsets: offset2 = self._id_offsets[h2] if offset2 > offset: length = offset2 - offset break offset2 > offset if length is None: length = self._content_length - offset if self.opts.verbose > 3: self._oeb.logger.info('child %03d: %s' % (i, child)) self._oeb.logger.info(' title: %s' % child.title) self._oeb.logger.info(' depth: %d' % child.depth()) self._oeb.logger.info(' offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X' % (offset, length, offset + length)) if i > firstSequentialNode and self._ctoc_map[i - 1]['klass'] != 'section': if offset != previousOffset + previousLength: self._oeb.log.warning('*** TOC discontinuity: nodes are not sequential ***') self._oeb.log.info(" node %03d: '%s' offset: 0x%X length: 0x%X" % (i - 1, entries[i - 1].title, previousOffset, previousLength)) self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % (i, child.title, offset, previousOffset + previousLength)) self._oeb.log.info('...') while i - 6 > 0: pass i - 6 self._oeb.log.info('...') self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index') self._HTMLRecords = [] return False self._ctoc_map[i - 1]['klass'] != 'section' previousOffset = offset previousLength = length thisRecord = offset // RECORD_SIZE if self._ctoc_map[i]['klass'] == 'article': if thisRecord > 0: if sectionChangesInThisRecord: self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex - 1 else: self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex if self._ctoc_map[i]['klass'] == 'periodical': continue if self._ctoc_map[i]['klass'] == 'section': if thisRecord > 0: sectionChangesInThisRecord = True self._currentSectionIndex += 1 self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex continue if self._HTMLRecords[thisRecord].openingNode == -1: self._HTMLRecords[thisRecord].openingNode = myIndex self._HTMLRecords[thisRecord].openingNodeParent = self._currentSectionIndex myEndingRecord = (offset + length) // RECORD_SIZE if myEndingRecord > thisRecord: sectionChangesInThisRecord = False interimSpanRecord = thisRecord + 1 while interimSpanRecord <= myEndingRecord: self._HTMLRecords[interimSpanRecord].continuingNode = myIndex self._HTMLRecords[interimSpanRecord].continuingNodeParent = self._currentSectionIndex self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1 interimSpanRecord += 1 continue None if sectionChangedInRecordNumber == thisRecord else None if self._HTMLRecords[thisRecord].currentSectionNodeCount == -1 else self._HTMLRecords[thisRecord] if self.opts.verbose > 3: None(self._oeb.logger.info % (' node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X', myIndex, self._ctoc_map[i]['klass'] if child.title.strip() > '' else '(missing)', thisRecord, interimSpanRecord, offset, length)) elif thisRecord == numberOfHTMLRecords - 1: if self._HTMLRecords[thisRecord].continuingNode == -1: self._HTMLRecords[thisRecord].continuingNode = self._HTMLRecords[thisRecord].openingNode - 1 elif self.opts.verbose > 3: None(self._oeb.logger.info % (' node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X', myIndex, self._ctoc_map[i]['klass'] if child.title.strip() > '' else '(missing)', thisRecord, thisRecord, offset, length)) myIndex += 1 return True def _generate_tbs_book(self, nrecords, lastrecord): if self.opts.verbose > 3: self._oeb.logger.info('Assembling TBS for Book: HTML record %03d of %03d' % (nrecords, lastrecord)) tbsType = 0 tbSequence = '' if self._initialIndexRecordFound == False: if self._HTMLRecords[nrecords].currentSectionNodeCount == -1: tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD) else: self._initialIndexRecordFound = True if self._HTMLRecords[nrecords].currentSectionNodeCount == 1: tbsType = 2 else: tbsType = 6 tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) if tbsType != 2: tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) elif nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1: tbsType = 2 elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1: tbsType = 3 self._HTMLRecords[nrecords].currentSectionNodeCount = 128 else: tbsType = 6 shiftedNCXEntry = self._HTMLRecords[nrecords].continuingNode << 3 shiftedNCXEntry |= tbsType tbSequence = decint(shiftedNCXEntry, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) if tbsType != 2: tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) self._tbSequence = tbSequence def _generate_tbs_flat_periodical(self, nrecords, lastrecord): tbsType = 0 tbSequence = '' if self._initialIndexRecordFound == False: if self._HTMLRecords[nrecords].currentSectionNodeCount == -1: tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD) else: self._initialIndexRecordFound = True tbsType = 6 tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount + 2) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) if self.opts.verbose > 2: self._oeb.logger.info('\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent)) self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) elif self.opts.verbose > 2: self._oeb.logger.info('\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent)) self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1: tbsType = 6 tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) tbSequence += chr(2) arg3 = self._HTMLRecords[nrecords].continuingNode arg3 += 1 arg3 <<= 4 arg3 |= 0 tbSequence += decint(arg3, DECINT_FORWARD) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1: tbsType = 6 self._HTMLRecords[nrecords].currentSectionNodeCount = 128 tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) tbSequence += chr(2) arg3 = self._HTMLRecords[nrecords].continuingNode arg3 += self._HTMLRecords[nrecords].continuingNodeParent + 1 arg3 <<= 4 arg3 |= 1 tbSequence += decint(arg3, DECINT_FORWARD) tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) else: tbsType = 7 tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) tbSequence += chr(2) tbSequence += decint(0, DECINT_FORWARD) arg4 = self._HTMLRecords[nrecords].continuingNode arg4 += self._HTMLRecords[nrecords].continuingNodeParent + 1 arg4 <<= 4 arg4 |= 4 tbSequence += decint(arg4, DECINT_FORWARD) tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) self._tbSequence = tbSequence def _generate_tbs_structured_periodical(self, nrecords, lastrecord): tbsType = 0 tbSequence = '' if self._initialIndexRecordFound == False: if self._HTMLRecords[nrecords].currentSectionNodeCount == -1: tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD) else: self._initialIndexRecordFound = True if self.opts.verbose > 2: self._oeb.logger.info('\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent)) self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) tbsType = 6 tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) tbSequence += chr(2) arg3 = self._sectionCount arg3 += 0 arg3 <<= 4 arg3 |= 4 tbSequence += decint(arg3, DECINT_FORWARD) tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) elif self._firstSectionConcluded == False: if self._HTMLRecords[nrecords].nextSectionNumber == -1: if self.opts.verbose > 2: self._oeb.logger.info('\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent)) self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1: tbsType = 6 tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) tbSequence += chr(2) arg3 = self._sectionCount arg3 += self._HTMLRecords[nrecords].continuingNode arg3 <<= 4 arg3 |= 4 tbSequence += decint(arg3, DECINT_FORWARD) tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1: tbsType = 6 self._HTMLRecords[nrecords].currentSectionNodeCount = 128 tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) tbSequence += chr(2) arg3 = self._sectionCount arg3 += self._HTMLRecords[nrecords].continuingNode arg3 <<= 4 arg3 |= 1 tbSequence += decint(arg3, DECINT_FORWARD) tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) else: tbsType = 7 tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) tbSequence += chr(2) tbSequence += decint(0, DECINT_FORWARD) arg4 = self._sectionCount arg4 += self._HTMLRecords[nrecords].continuingNode arg4 <<= 4 arg4 |= 4 tbSequence += decint(arg4, DECINT_FORWARD) tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) elif self._HTMLRecords[nrecords].nextSectionNumber > 0: tbsType = 3 if self.opts.verbose > 2: self._oeb.logger.info('\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber)) self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) arg3 = self._HTMLRecords[nrecords].continuingNodeParent + 1 << 4 arg3Flags = 0 arg3 |= arg3Flags tbSequence += decint(arg3, DECINT_FORWARD) sectionBase = self._HTMLRecords[nrecords].continuingNodeParent sectionDelta = self._sectionCount - sectionBase - 1 articleOffset = self._HTMLRecords[nrecords].continuingNode + 1 arg4 = sectionDelta + articleOffset << 4 arg4Flags = 0 if self._HTMLRecords[nrecords].currentSectionNodeCount > 1: arg4Flags = 4 else: arg4Flags = 0 arg4 |= arg4Flags tbSequence += decint(arg4, DECINT_FORWARD) if arg4Flags == 4: nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount nodeCountValue = None if nodeCountValue == 0 else nodeCountValue tbSequence += chr(nodeCountValue) arg5 = sectionDelta + articleOffset if self._HTMLRecords[nrecords].currentSectionNodeCount < 2: arg5 -= 1 arg5 <<= 4 arg5Flags = 8 arg5 |= arg5Flags tbSequence += decint(arg5, DECINT_FORWARD) arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode arg6 <<= 4 if self._HTMLRecords[nrecords].nextSectionNodeCount > 1: arg6Flags = 4 else: arg6Flags = 0 arg6 |= arg6Flags tbSequence += decint(arg6, DECINT_FORWARD) if arg6Flags == 4: nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount nodeCountValue = None if nodeCountValue == 0 else nodeCountValue tbSequence += chr(nodeCountValue) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) self._firstSectionConcluded = True elif self._HTMLRecords[nrecords].nextSectionNumber == -1: if self.opts.verbose > 2: self._oeb.logger.info('\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent)) self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) tbsType = 2 tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) arg2 = self._HTMLRecords[nrecords].continuingNodeParent + 1 arg2 <<= 4 arg2Flags = 0 if self._HTMLRecords[nrecords].currentSectionNodeCount > 0: arg2Flags = 1 arg2 |= arg2Flags tbSequence += decint(arg2, DECINT_FORWARD) if arg2Flags: tbSequence += decint(0, DECINT_FORWARD) arg3 = self._sectionCount - self._HTMLRecords[nrecords].continuingNodeParent arg3 += self._HTMLRecords[nrecords].continuingNode arg3 <<= 4 arg3Flags = 1 if self._HTMLRecords[nrecords].currentSectionNodeCount > 0: arg3Flags = 4 arg3 |= arg3Flags tbSequence += decint(arg3, DECINT_FORWARD) if arg3Flags == 4: nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount nodeCountValue = None if nodeCountValue == 0 else nodeCountValue tbSequence += chr(nodeCountValue) else: tbSequence += decint(0, DECINT_FORWARD) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) else: tbsType = 3 if self.opts.verbose > 2: self._oeb.logger.info('\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber)) self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb) tbSequence = decint(tbsType, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) tbSequence += decint(0, DECINT_FORWARD) arg3 = self._HTMLRecords[nrecords].continuingNodeParent + 1 << 4 arg3Flags = 0 arg3 |= arg3Flags tbSequence += decint(arg3, DECINT_FORWARD) sectionBase = self._HTMLRecords[nrecords].continuingNodeParent sectionDelta = self._sectionCount - sectionBase - 1 articleOffset = self._HTMLRecords[nrecords].continuingNode + 1 arg4 = sectionDelta + articleOffset << 4 arg4Flags = 0 if self._HTMLRecords[nrecords].currentSectionNodeCount > 1: arg4Flags = 4 else: arg4Flags = 0 arg4 |= arg4Flags tbSequence += decint(arg4, DECINT_FORWARD) if arg4Flags == 4: nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount nodeCountValue = None if nodeCountValue == 0 else nodeCountValue tbSequence += chr(nodeCountValue) arg5 = sectionDelta + articleOffset if self._HTMLRecords[nrecords].currentSectionNodeCount < 2: arg5 -= 1 arg5 <<= 4 arg5Flags = 8 arg5 |= arg5Flags tbSequence += decint(arg5, DECINT_FORWARD) arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode arg6 <<= 4 if self._HTMLRecords[nrecords].nextSectionNodeCount > 1: arg6Flags = 4 else: arg6Flags = 0 arg6 |= arg6Flags tbSequence += decint(arg6, DECINT_FORWARD) if arg6Flags == 4: nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount nodeCountValue = None if nodeCountValue == 0 else nodeCountValue tbSequence += chr(nodeCountValue) tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) self._tbSequence = tbSequence def _evaluate_periodical_toc(self): toc = self._oeb.toc nodes = list(toc.iter())[1:] toc_conforms = True for i, child in enumerate(nodes): if not child.klass == 'periodical' or child.depth() != 3: if (child.klass == 'section' or child.depth() != 2 or child.klass == 'article') and child.depth() != 1: self._oeb.logger.warn('Nonconforming TOC entry: "%s" found at depth %d' % (child.klass, child.depth())) self._oeb.logger.warn(" <title>: '%-25.25s...' \t\tklass=%-15.15s \tdepth:%d \tplayOrder=%03d" % (child.title, child.klass, child.depth(), child.play_order)) toc_conforms = False continue if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == []: self._oeb.logger.info('metadata missing date/timestamp') toc_conforms = False if 'masthead' not in self._oeb.guide: self._oeb.logger.info('mastheadImage missing from manifest') toc_conforms = False None(self._oeb.logger.info if toc_conforms else ' TOC structure non-conforming') return toc_conforms def _generate_text(self): self._oeb.logger.info('Serializing markup content...') serializer = Serializer(self._oeb, self._images, write_page_breaks_after_item = self.write_page_breaks_after_item) breaks = serializer.breaks text = serializer.text self._anchor_offset_kindle = serializer.anchor_offset_kindle self._id_offsets = serializer.id_offsets self._content_length = len(text) self._text_length = len(text) text = StringIO(text) buf = [] nrecords = 0 lastrecord = self._content_length // RECORD_SIZE offset = 0 if self._compression != UNCOMPRESSED: self._oeb.logger.info(' Compressing markup content...') (data, overlap) = self._read_text_record(text) if self.opts.mobi_periodical: self._oeb.logger.info(' MOBI periodical specified, evaluating TOC for periodical conformance ...') self._conforming_periodical_toc = self._evaluate_periodical_toc() self._ctoc_records.append(self._generate_ctoc()) toc = self._oeb.toc entries = list(toc.iter())[1:] if len(entries): self._indexable = self._generate_indexed_navpoints() else: self._oeb.logger.info(' No entries found in TOC ...') self._indexable = False if not self._indexable: self._oeb.logger.info(' Writing unindexed mobi ...') while len(data) > 0: if self._compression == PALMDOC: data = compress_doc(data) record = StringIO() record.write(data) if WRITE_PBREAKS: record.write(overlap) record.write(pack('>B', len(overlap))) nextra = 0 pbreak = 0 running = offset while breaks and breaks[0] - offset < RECORD_SIZE: pbreak = breaks.pop(0) - running >> 3 if self.opts.verbose > 2: self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell())) encoded = decint(pbreak, DECINT_FORWARD) record.write(encoded) running += pbreak << 3 nextra += len(encoded) lsize = 1 while True: size = decint(nextra + lsize, DECINT_BACKWARD) if len(size) == lsize: break lsize += 1 record.write(size) if INDEXING and self._indexable: booktype = self._MobiDoc.mobiType if booktype == 2: self._generate_tbs_book(nrecords, lastrecord) elif booktype == 258: self._generate_tbs_flat_periodical(nrecords, lastrecord) elif booktype == 257 or booktype == 259: self._generate_tbs_structured_periodical(nrecords, lastrecord) else: raise NotImplementedError('Indexing for mobitype 0x%X not implemented' % booktype) (booktype == 259).write(self._tbSequence) self._records.append(record.getvalue()) buf.append(self._records[-1]) nrecords += 1 offset += RECORD_SIZE (data, overlap) = self._read_text_record(text) if INDEXING: extra = sum(map(len, buf)) % 4 if extra == 0: extra = 4 self._records.append('\x00' * (4 - extra)) nrecords += 1 self._text_nrecords = nrecords def _generate_images(self): self._oeb.logger.info('Serializing images...') images = [ (index, href) for href, index in self._images.items() ] images.sort() self._first_image_record = None for _, href in images: item = self._oeb.manifest.hrefs[href] try: data = rescale_image(item.data, self._imagemax) except: [] [] self._oeb.logger.warn('Bad image file %r' % item.href) continue self._records.append(data) if self._first_image_record is None: self._first_image_record = len(self._records) - 1 continue [] def _generate_end_records(self): if FCIS_FLIS: self._flis_number = len(self._records) self._records.append('FLIS\x00\x00\x00\x08\x00A\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x01\x00\x03\x00\x00\x00\x03\x00\x00\x00\x01' + '\xff\xff\xff\xff') fcis = 'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00' fcis += pack('>I', self._text_length) fcis += '\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00' self._fcis_number = len(self._records) self._records.append(fcis) self._records.append('\xe9\x8e\r\n') else: self._flis_number = len(self._records) self._records.append('\xe9\x8e\r\n') def _generate_record0(self): metadata = self._oeb.metadata exth = self._build_exth() last_content_record = len(self._records) - 1 self._generate_end_records() record0 = StringIO() record0.write(pack('>HHIHHHH', self._compression, 0, self._text_length, self._text_nrecords - 1, RECORD_SIZE, 0, 0)) uid = random.randint(0, 0xFFFFFFFFL) title = unicode(metadata.title[0]).encode('utf-8') record0.write('MOBI') btype = self._MobiDoc.mobiType record0.write(pack('>IIIII', 232, btype, 65001, uid, 6)) record0.write('\xff\xff\xff\xff\xff\xff\xff\xff') if btype < 256: record0.write(pack('>I', 0xFFFFFFFFL)) elif btype > 256 and self._indexable: if self._primary_index_record is None: record0.write(pack('>I', 0xFFFFFFFFL)) else: record0.write(pack('>I', self._primary_index_record + 2 + len(self._ctoc_records))) else: record0.write(pack('>I', 0xFFFFFFFFL)) record0.write('\xff' * 28) record0.write(pack('>I', self._text_nrecords + 1)) record0.write(pack('>II', 248 + len(exth), len(title))) record0.write(iana2mobi(str(metadata.language[0]))) record0.write('\x00\x00\x00\x00\x00\x00\x00\x00') None(record0.write(pack, '>II', 6 if self._first_image_record else 0)) record0.write('\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') record0.write(pack('>I', 80)) record0.write('\x00' * 32) record0.write(pack('>IIII', 0xFFFFFFFFL, 0xFFFFFFFFL, 0, 0)) record0.write('\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') record0.write(pack('>HH', 1, last_content_record)) record0.write('\x00\x00\x00\x01') if FCIS_FLIS: record0.write(pack('>I', self._fcis_number)) record0.write(pack('>I', 1)) record0.write(pack('>I', self._flis_number)) record0.write(pack('>I', 1)) else: record0.write(pack('>I', 0xFFFFFFFFL)) record0.write(pack('>I', 0xFFFFFFFFL)) record0.write(pack('>I', 0xFFFFFFFFL)) record0.write(pack('>I', 1)) record0.write('\x00\x00\x00\x00\x00\x00\x00\x00') record0.write(pack('>IIII', 0xFFFFFFFFL, 0, 0xFFFFFFFFL, 0xFFFFFFFFL)) trailingDataFlags = 1 if self._indexable: trailingDataFlags |= 2 if WRITE_PBREAKS: trailingDataFlags |= 4 record0.write(pack('>I', trailingDataFlags)) None(record0.write(pack, '>I' if self._primary_index_record is None else self._primary_index_record)) record0.write(exth) record0.write(title) record0 = record0.getvalue() self._records[0] = record0 + '\x00' * (2452 - len(record0)) def _build_exth(self): oeb = self._oeb exth = StringIO() nrecs = 0 for term in oeb.metadata: if term not in EXTH_CODES: continue code = EXTH_CODES[term] items = oeb.metadata[term] for item in items: data = self.COLLAPSE_RE.sub(' ', unicode(item)) data = data.encode('utf-8') exth.write(pack('>II', code, len(data) + 8)) exth.write(data) nrecs += 1 if term == 'rights': rights = unicode(oeb.metadata.rights[0]).encode('utf-8') exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8)) exth.write(rights) continue None if term == 'identifier' else None if term == 'creator' else [] if oeb.metadata['date'] != []: datestr = str(oeb.metadata['date'][0]) elif oeb.metadata['timestamp'] != []: datestr = str(oeb.metadata['timestamp'][0]) if datestr is not None: exth.write(pack('>II', EXTH_CODES['pubdate'], len(datestr) + 8)) exth.write(datestr) nrecs += 1 else: raise NotImplementedError('missing date or timestamp needed for mobi_periodical') if (datestr is not None).metadata.cover and unicode(oeb.metadata.cover[0]) in oeb.manifest.ids: id = unicode(oeb.metadata.cover[0]) item = oeb.manifest.ids[id] href = item.href index = self._images[href] - 1 exth.write(pack('>III', 201, 12, index)) exth.write(pack('>III', 203, 12, 0)) nrecs += 2 index = self._add_thumbnail(item) if index is not None: exth.write(pack('>III', 202, 12, index - 1)) nrecs += 1 exth = exth.getvalue() trail = len(exth) % 4 pad = '\x00' * (4 - trail) exth = [ 'EXTH', pack('>II', len(exth) + 12, nrecs), exth, pad] return ''.join(exth) def _add_thumbnail(self, item): try: data = rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN) except IOError: self._oeb.logger.warn('Bad image file %r' % item.href) return None manifest = self._oeb.manifest (id, href) = manifest.generate('thumbnail', 'thumbnail.jpeg') manifest.add(id, href, 'image/jpeg', data = data) index = len(self._images) + 1 self._images[href] = index self._records.append(data) return index def _write_header(self): title = str(self._oeb.metadata.title[0]) title = re.sub('[^-A-Za-z0-9]+', '_', title)[:31] title = title + '\x00' * (32 - len(title)) now = int(time.time()) nrecords = len(self._records) self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0), 'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords)) offset = self._tell() + 8 * nrecords + 2 for id, record in izip(count(), self._records): self._write(pack('>I', offset), '\x00', pack('>I', id)[1:]) offset += len(record) self._write('\x00\x00') def _write_content(self): for record in self._records: self._write(record) def _generate_index(self): self._oeb.log('Generating INDX ...') self._primary_index_record = None (indxt, indxt_count, indices, last_name) = self._generate_indxt() if last_name is None: self._oeb.log.warn('Input document has no TOC. No index generated.') return None indx1 = StringIO() indx1.write('INDX' + pack('>I', 192)) indx1.write('\x00\x00\x00\x00') indx1.write(pack('>I', 1)) indx1.write('\x00\x00\x00\x00') indx1.write(pack('>I', 192 + len(indxt))) indx1.write(pack('>I', indxt_count + 1)) indx1.write('\xff\xff\xff\xff\xff\xff\xff\xff') indx1.write('\x00' * 156) indx1.write(indxt) indx1.write(indices) indx1 = indx1.getvalue() idxt0 = chr(len(last_name)) + last_name + pack('>H', indxt_count + 1) idxt0 = align_block(idxt0) indx0 = StringIO() if self._MobiDoc.mobiType == 2: tagx = TAGX['chapter'] else: tagx = TAGX['periodical'] tagx = align_block('TAGX' + pack('>I', 8 + len(tagx)) + tagx) indx0_indices_pos = 192 + len(tagx) + len(idxt0) indx0_indices = align_block('IDXT' + pack('>H', 192 + len(tagx))) header = StringIO() header.write('INDX') header.write(pack('>I', 192)) header.write('\x00\x00\x00\x00') header.write(pack('>I', 0)) header.write(pack('>I', 6)) header.write(pack('>I', indx0_indices_pos)) header.write(pack('>I', 1)) header.write(pack('>I', 65001)) header.write(iana2mobi(str(self._oeb.metadata.language[0]))) header.write(pack('>I', indxt_count + 1)) header.write('\x00\x00\x00\x00') header.write('\x00\x00\x00\x00') header.write('\x00\x00\x00\x00') header.write(pack('>I', len(self._ctoc_records))) header.write('\x00' * 124) header.write(pack('>I', 192)) header.write('\x00\x00\x00\x00\x00\x00\x00\x00') header = header.getvalue() indx0.write(header) indx0.write(tagx) indx0.write(idxt0) indx0.write(indx0_indices) indx0 = indx0.getvalue() self._primary_index_record = len(self._records) self._records.extend([ indx0, indx1]) for i, ctoc_record in enumerate(self._ctoc_records): self._records.append(ctoc_record) if self._MobiDoc.mobiType > 256: tagx = TAGX['secondary_periodical'] tagx_len = 8 + len(tagx) indx0 = StringIO() indx0.write('INDX' + pack('>I', 192) + '\x00\x00\x00\x00\x00\x00\x00\x00') indx0.write(pack('>I', 6)) indx0.write(pack('>I', 232)) indx0.write(pack('>I', 1)) indx0.write(pack('>I', 65001)) indx0.write('\xff\xff\xff\xff') indx0.write(pack('>I', 4)) indx0.write('\x00\x00\x00\x00') indx0.write('\x00' * 136) indx0.write(pack('>I', 192)) indx0.write('\x00\x00\x00\x00\x00\x00\x00\x00') indx0.write('TAGX' + pack('>I', tagx_len) + tagx) indx0.write('\rmastheadImage\x00\x04') indx0.write('IDXT\x00\xd8\x00\x00') indx1 = StringIO() indx1.write('INDX' + pack('>I', 192) + '\x00\x00\x00\x00') indx1.write(pack('>I', 1)) indx1.write(pack('>I', 0)) indx1.write('\x00\x00\x00\xf0') indx1.write(pack('>I', 4)) indx1.write('\xff\xff\xff\xff\xff\xff\xff\xff') indx1.write('\x00' * (192 - indx1.tell())) indx1.write('\x00\x01\x80') indx1.write('\x06author\x02\x80\x80\xc7') indx1.write('\x0bdescription\x02\x80\x80\xc6') indx1.write('\rmastheadImage\x02\x85\x80\xc5') indx1.write('IDXT\x00\xc0\x00\xc3\x00\xce\x00\xde') indx0 = indx0.getvalue() indx1 = indx1.getvalue() self._records.extend((indx0, indx1)) if self.opts.verbose > 3: mkdtemp = mkdtemp import tempfile import os t = mkdtemp() for i, n in enumerate([ 'sindx1', 'sindx0', 'ctoc', 'indx0', 'indx1']): open(os.path.join(t, n + '.bin'), 'wb').write(self._records[-(i + 1)]) self._oeb.log.debug('Index records dumped to', t) def _clean_text_value(self, text): if text is not None and text.strip(): text = text.strip() if not isinstance(text, unicode): text = text.decode('utf-8', 'replace') text = text.encode('utf-8') else: text = '(none)'.encode('utf-8') return text def _add_to_ctoc(self, ctoc_str, record_offset): if 64504 - self._ctoc.tell() < 2 + len(ctoc_str): pad = 64504 - self._ctoc.tell() self._ctoc.write('\x00' * pad) self._ctoc_records.append(self._ctoc.getvalue()) self._ctoc.truncate(0) self._ctoc_offset += 65536 record_offset = self._ctoc_offset offset = self._ctoc.tell() + record_offset self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str) return offset def _add_flat_ctoc_node(self, node, ctoc, title = None): t = None if title is None else title t = self._clean_text_value(t) self._last_toc_entry = t ctoc_name_map = { } if node.klass == 'article': ctoc_name_map['klass'] = 'chapter' else: ctoc_name_map['klass'] = node.klass ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset) self._chapterCount += 1 self._ctoc_map.append(ctoc_name_map) def _add_structured_ctoc_node(self, node, ctoc, title = None): if node.klass is None: return None t = node.klass is None if title is None else title t = self._clean_text_value(t) self._last_toc_entry = t ctoc_name_map = { } ctoc_name_map['klass'] = node.klass if node.klass == 'chapter': ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset) self._chapterCount += 1 elif node.klass == 'periodical': ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset) for entry in self._ctoc_map: if entry['klass'] == 'periodical': ctoc_name_map['classOffset'] = entry['classOffset'] break continue else: ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0) self._periodicalCount += 1 elif node.klass == 'section': ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset) for entry in self._ctoc_map: if entry['klass'] == 'section': ctoc_name_map['classOffset'] = entry['classOffset'] break continue else: ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0) self._sectionCount += 1 elif node.klass == 'article': ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset) for entry in self._ctoc_map: if entry['klass'] == 'article': ctoc_name_map['classOffset'] = entry['classOffset'] break continue else: ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0) if node.description: d = self._clean_text_value(node.description) ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset) else: ctoc_name_map['descriptionOffset'] = None if node.author: a = self._clean_text_value(node.author) ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset) else: ctoc_name_map['authorOffset'] = None self._articleCount += 1 else: raise NotImplementedError('writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % (node.title, node.klass, node.play_order)) self._ctoc_map.append(ctoc_name_map) def _generate_ctoc(self): toc = self._oeb.toc reduced_toc = [] self._ctoc_map = [] self._last_toc_entry = None self._ctoc = StringIO() self._periodicalCount = 0 self._sectionCount = 0 self._articleCount = 0 self._chapterCount = 0 if self._conforming_periodical_toc: self._oeb.logger.info('Generating structured CTOC ...') for child in toc.iter(): if self.opts.verbose > 2: self._oeb.logger.info(' %s' % child) self._add_structured_ctoc_node(child, self._ctoc) else: self._oeb.logger.info('Generating flat CTOC ...') previousOffset = -1 currentOffset = 0 for i, child in enumerate(toc.iterdescendants()): if child.klass is None: child.klass = 'chapter' if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1: if self.opts.verbose > 2: self._oeb.logger.info('adding (klass:%s depth:%d) %s to flat ctoc' % (child.klass, child.depth(), child)) h = child.href if h is None: self._oeb.logger.warn(' Ignoring TOC entry with no href:', child.title) continue if h not in self._id_offsets: self._oeb.logger.warn(' Ignoring missing TOC entry:', unicode(child)) continue currentOffset = self._id_offsets[h] if currentOffset != previousOffset: self._add_flat_ctoc_node(child, self._ctoc) reduced_toc.append(child) previousOffset = currentOffset else: self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title)) currentOffset != previousOffset if self.opts.verbose > 2: self._oeb.logger.info('skipping class: %s depth %d at position %d' % (child.klass, child.depth(), i)) continue self._oeb.toc.nodes = reduced_toc if not (self._periodicalCount) and not (self._sectionCount) or not (self._articleCount) or not (self.opts.mobi_periodical): mobiType = 2 elif self._periodicalCount: pt = None if self._oeb.metadata.publication_type: x = unicode(self._oeb.metadata.publication_type[0]).split(':') if len(x) > 1: pt = x[1] mobiType = { 'newspaper': 257 }.get(pt, 259) else: raise NotImplementedError('_generate_ctoc: Unrecognized document structured') self._MobiDoc = not (self.opts.mobi_periodical)(mobiType) if self.opts.verbose > 2: structType = 'book' if mobiType > 256: structType = None if mobiType == 258 else 'structured periodical' self._oeb.logger.info('Instantiating a %s MobiDocument of type 0x%X' % (structType, mobiType)) if mobiType > 256: self._oeb.logger.info('periodicalCount: %d sectionCount: %d articleCount: %d' % (self._periodicalCount, self._sectionCount, self._articleCount)) else: self._oeb.logger.info('chapterCount: %d' % self._chapterCount) if True: rec_count = len(self._ctoc_records) None(self._oeb.logger.info % (' CNCX utilization: %d %s %.0f%% full', rec_count + 1 if rec_count else 'record,', len(self._ctoc.getvalue()) / 655)) return align_block(self._ctoc.getvalue()) def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection): pos = 192 + indxt.tell() indices.write(pack('>H', pos)) name = '%04X' % count indxt.write(chr(len(name)) + name) indxt.write(INDXT['periodical']) indxt.write(chr(1)) indxt.write(decint(offset, DECINT_FORWARD)) indxt.write(decint(length, DECINT_FORWARD)) indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) indxt.write(decint(0, DECINT_FORWARD)) indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD)) indxt.write(decint(firstSection, DECINT_FORWARD)) indxt.write(decint(lastSection, DECINT_FORWARD)) indxt.write(decint(0, DECINT_FORWARD)) def _write_section_node(self, indxt, indices, myCtocMapIndex, index, offset, length, count, firstArticle, lastArticle, parentIndex): pos = 192 + indxt.tell() indices.write(pack('>H', pos)) name = '%04X' % count indxt.write(chr(len(name)) + name) indxt.write(INDXT['section']) indxt.write(chr(0)) indxt.write(decint(offset, DECINT_FORWARD)) indxt.write(decint(length, DECINT_FORWARD)) indxt.write(decint(self._ctoc_map[myCtocMapIndex]['titleOffset'], DECINT_FORWARD)) indxt.write(decint(1, DECINT_FORWARD)) indxt.write(decint(self._ctoc_map[myCtocMapIndex]['classOffset'], DECINT_FORWARD)) indxt.write(decint(parentIndex, DECINT_FORWARD)) indxt.write(decint(firstArticle, DECINT_FORWARD)) indxt.write(decint(lastArticle, DECINT_FORWARD)) def _write_article_node(self, indxt, indices, index, offset, length, count, parentIndex): pos = 192 + indxt.tell() indices.write(pack('>H', pos)) name = '%04X' % count indxt.write(chr(len(name)) + name) indxt.write(INDXT['article']) hasAuthor = None if self._ctoc_map[index]['authorOffset'] else False hasDescription = None if self._ctoc_map[index]['descriptionOffset'] else False flagBits = 0 if hasAuthor: flagBits |= 4 if hasDescription: flagBits |= 2 indxt.write(pack('>B', flagBits)) indxt.write(decint(offset, DECINT_FORWARD)) indxt.write(decint(length, DECINT_FORWARD)) indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) indxt.write(decint(2, DECINT_FORWARD)) indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD)) indxt.write(decint(parentIndex, DECINT_FORWARD)) descriptionOffset = self._ctoc_map[index]['descriptionOffset'] if descriptionOffset: indxt.write(decint(descriptionOffset, DECINT_FORWARD)) authorOffset = self._ctoc_map[index]['authorOffset'] if authorOffset: indxt.write(decint(authorOffset, DECINT_FORWARD)) def _write_chapter_node(self, indxt, indices, index, offset, length, count): if self.opts.verbose > 2: pass pos = 192 + indxt.tell() indices.write(pack('>H', pos)) name = '%04X' % count indxt.write(chr(len(name)) + name) indxt.write(INDXT['chapter']) indxt.write(decint(offset, DECINT_FORWARD)) indxt.write(decint(length, DECINT_FORWARD)) indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) indxt.write(decint(0, DECINT_FORWARD)) def _compute_offset_length(self, i, node, entries): h = node.href if h not in self._id_offsets: self._oeb.log.warning('Could not find TOC entry:', node.title) return (-1, -1) offset = self._id_offsets[h] length = None for sibling in entries[i + 1:]: h2 = sibling.href if h2 in self._id_offsets: offset2 = self._id_offsets[h2] if offset2 > offset: length = offset2 - offset break offset2 > offset if length is None: length = self._content_length - offset return (offset, length) def _establish_document_structure(self): documentType = None try: klass = self._ctoc_map[0]['klass'] except: klass = None if klass == 'chapter' or klass == None: documentType = 'book' if self.opts.verbose > 2: self._oeb.logger.info('Adding a MobiBook to self._MobiDoc') self._MobiDoc.documentStructure = MobiBook() elif klass == 'periodical': documentType = klass if self.opts.verbose > 2: self._oeb.logger.info('Adding a MobiPeriodical to self._MobiDoc') self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode()) self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle else: raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass) return klass == None def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc): sectionTitles = list(child.iter())[1:] sectionIndices = [] sectionParents = [] for j, section in enumerate(sectionTitles): if section.klass == 'periodical': sectionIndices.append(currentSection) if self.opts.verbose > 3: self._oeb.logger.info('Periodical: %15.15s \tkls:%s \tdpt:%d ply:%03d' % (section.title, section.klass, section.depth(), section.play_order)) self.opts.verbose > 3 if section.klass == 'section': myNewSection = myPeriodical.addSectionParent(myDoc, j) sectionParents.append(myNewSection) currentSection += 1 sectionIndices.append(currentSection) if self.opts.verbose > 3: self._oeb.logger.info(' Section: %15.15s \tkls:%s \tdpt:%d ply:%03d \tindex:%d' % (section.title, section.klass, section.depth(), section.play_order, j)) self.opts.verbose > 3 if section.klass == 'article': sectionIndices.append(currentSection) continue if self.opts.verbose > 3: self._oeb.logger.info(' Unrecognized class %s in structured document' % section.klass) continue return (sectionIndices, sectionParents) def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents): sectionArticles = list(section.iter())[1:] for j, article in enumerate(sectionArticles): (offset, length) = self._compute_offset_length(i, article, entries) if self.opts.verbose > 2: self._oeb.logger.info('article %02d: offset = 0x%06X length = 0x%06X' % (j, offset, length)) ctoc_map_index = i + j + 1 mySectionParent = sectionParents[sectionIndices[i - 1]] myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index) mySectionParent.addArticle(myNewArticle) def _add_book_chapters(self, myDoc, indxt, indices): chapterCount = myDoc.documentStructure.chapterCount() if self.opts.verbose > 3: self._oeb.logger.info('Writing %d chapters for mobitype 0x%03X' % (chapterCount, myDoc.mobiType)) for c, chapter in enumerate(list(myDoc.documentStructure.chapters)): index = chapter.myCtocMapIndex self._write_chapter_node(indxt, indices, index, chapter.startAddress, chapter.length, c) last_name = '%04X' % c return (last_name, c) def _add_periodical_flat_articles(self, myDoc, indxt, indices): sectionParent = myDoc.documentStructure.sectionParents[0] articleCount = len(sectionParent.articles) if self.opts.verbose > 3: self._oeb.logger.info('Writing %d articles for mobitype 0x%03X' % (articleCount, myDoc.mobiType)) index = 0 offset = myDoc.documentStructure.startAddress length = myDoc.documentStructure.length c = 0 firstSection = myDoc.documentStructure.firstSectionIndex lastSection = myDoc.documentStructure.lastSectionIndex self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection) index += 1 offset = sectionParent.startAddress length = sectionParent.sectionLength c += 1 firstArticle = sectionParent.firstArticleIndex lastArticle = sectionParent.lastArticleIndex parentIndex = sectionParent.parentIndex self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, index, offset, length, c, firstArticle, lastArticle, parentIndex) last_name = '%04X' % c for i, article in enumerate(list(sectionParent.articles)): index = article.myCtocMapIndex offset = article.startAddress length = article.articleLength c += 1 parentIndex = article.sectionParentIndex self._write_article_node(indxt, indices, index, offset, length, c, parentIndex) last_name = '%04X' % c return (last_name, c) def _add_periodical_structured_articles(self, myDoc, indxt, indices): if self.opts.verbose > 2: self._oeb.logger.info('Writing NCXEntries for mobiType 0x%03X' % myDoc.mobiType) sectionParent = myDoc.documentStructure.sectionParents[0] index = 0 offset = myDoc.documentStructure.startAddress length = myDoc.documentStructure.length c = 0 firstSection = myDoc.documentStructure.firstSectionIndex lastSection = myDoc.documentStructure.lastSectionIndex self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection) sectionCount = firstSection while sectionCount <= lastSection: sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1] offset = sectionParent.startAddress length = sectionParent.sectionLength c += 1 firstArticle = sectionParent.firstArticleIndex lastArticle = sectionParent.lastArticleIndex parentIndex = sectionParent.parentIndex self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, sectionCount, offset, length, c, firstArticle, lastArticle, parentIndex) sectionCount += 1 sectionCount = firstSection while sectionCount <= lastSection: sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1] last_name = '%04X' % c for i, article in enumerate(list(sectionParent.articles)): if self.opts.verbose > 3: self._oeb.logger.info('Adding section:article %d:%02d' % (sectionParent.myIndex, i)) index = article.myCtocMapIndex offset = article.startAddress length = article.articleLength c += 1 parentIndex = article.sectionParentIndex self._write_article_node(indxt, indices, index, offset, length, c, parentIndex) last_name = '%04X' % c sectionCount += 1 return (last_name, c) def _generate_indxt(self): documentType = 'unknown' sectionIndices = [] sectionParents = [] currentSection = 0 toc = self._oeb.toc indxt = StringIO() indices = StringIO() c = 0 indices.write('IDXT') c = 0 last_name = None documentType = self._establish_document_structure() myDoc = self._MobiDoc nodes = list(toc.iter())[0:1] for i, child in enumerate(nodes): if documentType == 'periodical': myPeriodical = myDoc.documentStructure if self.opts.verbose > 3: self._oeb.logger.info('\nDocument: %s \tkls:%s \tdpt:%d ply:%03d' % (child.title, child.klass, child.depth(), child.play_order)) (sectionIndices, sectionParents) = self._generate_section_indices(child, currentSection, myPeriodical, myDoc) continue if documentType == 'book': myBook = myDoc.documentStructure if self.opts.verbose > 3: self._oeb.logger.info('\nBook: %-19.19s \tkls:%s \tdpt:%d ply:%03d' % (child.title, child.klass, child.depth(), child.play_order)) self.opts.verbose > 3 if self.opts.verbose > 3: self._oeb.logger.info('unknown document type %12.12s \tdepth:%d' % (child.title, child.depth())) continue entries = list(toc.iter())[1:] for i, child in enumerate(entries): if not (child.title) or not child.title.strip(): continue (offset, length) = self._compute_offset_length(i, child, entries) if (child.klass == 'chapter' or not (self.opts.mobi_periodical)) and child.klass == 'article': myNewChapter = MobiChapter(myDoc.getNextNode(), offset, length, i) myBook.addChapter(myNewChapter) try: if self.opts.verbose > 3: self._oeb.logger.info(' Chapter: %-14.14s \tcls:%s \tdpt:%d ply:%03d \toff:0x%X \t:len0x%X' % (child.title, child.klass, child.depth(), child.play_order, offset, length)) if self.opts.verbose > 3: self._oeb.logger.info(' Chapter: %-14.14s \tclass:%s \tdepth:%d playOrder:%03d \toff:0x%X \t:len0x%X' % ('(bad string)', child.klass, child.depth(), child.play_order, offset, length)) continue self.opts.verbose > 3 if child.klass == 'section' and self.opts.mobi_periodical: if self.opts.verbose > 3: self._oeb.logger.info('\n Section: %-15.15s \tkls:%s \tdpt:%d ply:%03d' % (child.title, child.klass, child.depth(), child.play_order)) self._generate_section_article_indices(i, child, entries, sectionIndices, sectionParents) continue if self.opts.verbose > 3: self._oeb.logger.info('') mobiType = myDoc.mobiType if self.opts.verbose > 3: self._MobiDoc.dumpInfo() if mobiType == 2: (last_name, c) = self._add_book_chapters(myDoc, indxt, indices) elif mobiType == 258 and myDoc.documentStructure.sectionCount() == 1: (last_name, c) = self._add_periodical_flat_articles(myDoc, indxt, indices) else: (last_name, c) = self._add_periodical_structured_articles(myDoc, indxt, indices) return (align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name) class HTMLRecordData(object): def __init__(self): self._continuingNode = -1 self._continuingNodeParent = -1 self._openingNode = -1 self._openingNodeParent = -1 self._currentSectionNodeCount = -1 self._nextSectionNumber = -1 self._nextSectionOpeningNode = -1 self._nextSectionNodeCount = -1 def getContinuingNode(self): return self._continuingNode def setContinuingNode(self, value): self._continuingNode = value continuingNode = property(getContinuingNode, setContinuingNode, None, None) def getContinuingNodeParent(self): return self._continuingNodeParent def setContinuingNodeParent(self, value): self._continuingNodeParent = value continuingNodeParent = property(getContinuingNodeParent, setContinuingNodeParent, None, None) def getOpeningNode(self): return self._openingNode def setOpeningNode(self, value): self._openingNode = value openingNode = property(getOpeningNode, setOpeningNode, None, None) def getOpeningNodeParent(self): return self._openingNodeParent def setOpeningNodeParent(self, value): self._openingNodeParent = value openingNodeParent = property(getOpeningNodeParent, setOpeningNodeParent, None, None) def getCurrentSectionNodeCount(self): return self._currentSectionNodeCount def setCurrentSectionNodeCount(self, value): self._currentSectionNodeCount = value currentSectionNodeCount = property(getCurrentSectionNodeCount, setCurrentSectionNodeCount, None, None) def getNextSectionNumber(self): return self._nextSectionNumber def setNextSectionNumber(self, value): self._nextSectionNumber = value nextSectionNumber = property(getNextSectionNumber, setNextSectionNumber, None, None) def getNextSectionOpeningNode(self): return self._nextSectionOpeningNode def setNextSectionOpeningNode(self, value): self._nextSectionOpeningNode = value nextSectionOpeningNode = property(getNextSectionOpeningNode, setNextSectionOpeningNode, None, None) def getNextSectionNodeCount(self): return self._nextSectionNodeCount def setNextSectionNodeCount(self, value): self._nextSectionNodeCount = value nextSectionNodeCount = property(getNextSectionNodeCount, setNextSectionNodeCount, None, None) def dumpData(self, recordNumber, oeb): oeb.logger.info('--- Summary of HTML Record 0x%x [%d] indexing ---' % (recordNumber, recordNumber)) oeb.logger.info(' continuingNode: %03d' % self.continuingNode) oeb.logger.info(' continuingNodeParent: %03d' % self.continuingNodeParent) oeb.logger.info(' openingNode: %03d' % self.openingNode) oeb.logger.info(' openingNodeParent: %03d' % self.openingNodeParent) oeb.logger.info(' currentSectionNodeCount: %03d' % self.currentSectionNodeCount) oeb.logger.info(' nextSectionNumber: %03d' % self.nextSectionNumber) oeb.logger.info(' nextSectionOpeningNode: %03d' % self.nextSectionOpeningNode) oeb.logger.info(' nextSectionNodeCount: %03d' % self.nextSectionNodeCount) class MobiDocument(object): _nextNode = -1 def __init__(self, mobitype): self._mobitype = mobitype self._documentStructure = None def getMobiType(self): return self._mobitype def setMobiType(self, value): self._mobitype = value mobiType = property(getMobiType, setMobiType, None, None) def getDocumentStructure(self): return self._documentStructure def setDocumentStructure(self, value): self._documentStructure = value documentStructure = property(getDocumentStructure, setDocumentStructure, None, None) def getNextNode(self): self._nextNode += 1 return self._nextNode def dumpInfo(self): self._documentStructure.dumpInfo() class MobiBook(object): def __init__(self): self._chapters = [] def chapterCount(self): return len(self._chapters) def getChapters(self): return self._chapters def setChapters(self, value): self._chapters = value chapters = property(getChapters, setChapters, None, None) def addChapter(self, value): self._chapters.append(value) def dumpInfo(self): print '%20s:' % 'Book' print '%20s: %d' % ('Number of chapters', len(self._chapters)) for count, chapter in enumerate(self._chapters): print '%20s: %d' % ('myCtocMapIndex', chapter.myCtocMapIndex) print '%20s: %d' % ('Chapter', count) print '%20s: 0x%X' % ('startAddress', chapter.startAddress) print '%20s: 0x%X' % ('length', chapter.length) print class MobiChapter(object): def __init__(self, myIndex, startAddress, length, ctoc_map_index): self._myIndex = myIndex self._startAddress = startAddress self._length = length self._myCtocMapIndex = ctoc_map_index def getMyCtocMapIndex(self): return self._myCtocMapIndex def setMyCtocMapIndex(self, value): self._myCtocMapIndex = value myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None) def getMyIndex(self): return self._myIndex myIndex = property(getMyIndex, None, None, None) def getStartAddress(self): return self._startAddress def setStartAddress(self, value): self._startAddress = value startAddress = property(getStartAddress, setStartAddress, None, None) def getLength(self): return self._length def setLength(self, value): self._length = value length = property(getLength, setLength, None, None) class MobiPeriodical(object): def __init__(self, myIndex): self._myIndex = myIndex self._sectionParents = [] self._startAddress = 0xFFFFFFFFL self._length = 0xFFFFFFFFL self._firstSectionIndex = 0xFFFFFFFFL self._lastSectionIndex = 0xFFFFFFFFL self._myCtocMapIndex = 0 def getMyIndex(self): return self._myIndex def setMyIndex(self, value): self._myIndex = value myIndex = property(getMyIndex, setMyIndex, None, None) def getSectionParents(self): return self._sectionParents def setSectionParents(self, value): self._sectionParents = value sectionParents = property(getSectionParents, setSectionParents, None, None) def sectionCount(self): return len(self._sectionParents) def getStartAddress(self): return self._startAddress def setStartAddress(self, value): self._startAddress = value startAddress = property(getStartAddress, setStartAddress, None, None) def getLength(self): return self._length def setLength(self, value): self._length = value length = property(getLength, setLength, None, None) def getFirstSectionIndex(self): return self._firstSectionIndex def setFirstSectionIndex(self, value): self._firstSectionIndex = value firstSectionIndex = property(getFirstSectionIndex, setFirstSectionIndex, None, None) def getLastSectionIndex(self): return self._lastSectionIndex def setLastSectionIndex(self, value): self._lastSectionIndex = value lastSectionIndex = property(getLastSectionIndex, setLastSectionIndex, None, None) def getMyCtocMapIndex(self): return self._myCtocMapIndex def setMyCtocMapIndex(self, value): self._myCtocMapIndex = value myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None) def addSectionParent(self, myIndex, ctoc_map_index): newSection = MobiSection(myIndex) newSection.parentIndex = self._myIndex newSection.sectionIndex = len(self._sectionParents) newSection.myCtocMapIndex = ctoc_map_index self._sectionParents.append(newSection) return newSection def dumpInfo(self): print '%20s:' % 'Periodical' print '%20s: 0x%X' % ('myIndex', self.myIndex) print '%20s: 0x%X' % ('startAddress', self.startAddress) print '%20s: 0x%X' % ('length', self.length) print '%20s: 0x%X' % ('myCtocMapIndex', self.myCtocMapIndex) print '%20s: 0x%X' % ('firstSectionIndex', self.firstSectionIndex) print '%20s: 0x%X' % ('lastSectionIndex', self.lastSectionIndex) print '%20s: %d' % ('Number of Sections', len(self._sectionParents)) for count, section in enumerate(self._sectionParents): print '\t%20s: %d' % ('Section', count) print '\t%20s: 0x%X' % ('startAddress', section.startAddress) print '\t%20s: 0x%X' % ('length', section.sectionLength) print '\t%20s: 0x%X' % ('parentIndex', section.parentIndex) print '\t%20s: 0x%X' % ('myIndex', section.myIndex) print '\t%20s: 0x%X' % ('firstArticleIndex', section.firstArticleIndex) print '\t%20s: 0x%X' % ('lastArticleIndex', section.lastArticleIndex) print '\t%20s: 0x%X' % ('articles', len(section.articles)) print '\t%20s: 0x%X' % ('myCtocMapIndex', section.myCtocMapIndex) print for artCount, article in enumerate(section.articles): print '\t\t%20s: %d' % ('Article', artCount) print '\t\t%20s: 0x%X' % ('startAddress', article.startAddress) print '\t\t%20s: 0x%X' % ('length', article.articleLength) print '\t\t%20s: 0x%X' % ('sectionIndex', article.sectionParentIndex) print '\t\t%20s: 0x%X' % ('myIndex', article.myIndex) print '\t\t%20s: 0x%X' % ('myCtocMapIndex', article.myCtocMapIndex) print class MobiSection(object): def __init__(self, myMobiDoc): self._myMobiDoc = myMobiDoc self._myIndex = myMobiDoc.getNextNode() self._parentIndex = 0xFFFFFFFFL self._firstArticleIndex = 0 self._lastArticleIndex = 0 self._startAddress = 0xFFFFFFFFL self._sectionLength = 0xFFFFFFFFL self._articles = [] self._myCtocMapIndex = -1 def getMyMobiDoc(self): return self._myMobiDoc def setMyMobiDoc(self, value): self._myMobiDoc = value myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None) def getMyIndex(self): return self._myIndex def setMyIndex(self, value): self._myIndex = value myIndex = property(getMyIndex, setMyIndex, None, None) def getParentIndex(self): return self._parentIndex def setParentIndex(self, value): self._parentIndex = value parenIndex = property(getParentIndex, setParentIndex, None, None) def getFirstArticleIndex(self): return self._firstArticleIndex def setFirstArticleIndex(self, value): self._firstArticleIndex = value firstArticleIndex = property(getFirstArticleIndex, setFirstArticleIndex, None, None) def getLastArticleIndex(self): return self._lastArticleIndex def setLastArticleIndex(self, value): self._lastArticleIndex = value lastArticleIndex = property(getLastArticleIndex, setLastArticleIndex, None, None) def getStartAddress(self): return self._startAddress def setStartAddress(self, value): self._startAddress = value startAddress = property(getStartAddress, setStartAddress, None, None) def getSectionLength(self): return self._sectionLength def setSectionLength(self, value): self._sectionLength = value sectionLength = property(getSectionLength, setSectionLength, None, None) def getArticles(self): return self._articles def setArticles(self, value): self._articles = value articles = property(getArticles, setArticles, None, None) def getMyCtocMapIndex(self): return self._myCtocMapIndex def setMyCtocMapIndex(self, value): self._myCtocMapIndex = value myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None) def addArticle(self, article): self._articles.append(article) self.myMobiDoc.documentStructure.lastSectionIndex = self.myIndex class MobiArticle(object): def __init__(self, sectionParent, startAddress, length, ctocMapIndex): self._mySectionParent = sectionParent self._myMobiDoc = sectionParent.myMobiDoc self._myIndex = sectionParent.myMobiDoc.getNextNode() self._myCtocMapIndex = ctocMapIndex self._sectionParentIndex = sectionParent.myIndex self._startAddress = startAddress self._articleLength = length def getMySectionParent(self): return self._mySectionParent def setMySectionParent(self, value): self._mySectionParent = value mySectionParent = property(getMySectionParent, setMySectionParent, None, None) def getMyMobiDoc(self): return self._myMobiDoc def setMyMobiDoc(self, value): self._myMobiDoc = value myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None) def getMyIndex(self): return self._myIndex def setMyIndex(self, value): self._sectionIndex = value myIndex = property(getMyIndex, setMyIndex, None, None) def getSectionParentIndex(self): return self._sectionParentIndex def setSectionParentIndex(self, value): self._sectionParentIndex = value sectionParentIndex = property(getSectionParentIndex, setSectionParentIndex, None, None) def getStartAddress(self): return self._startAddress def setStartAddress(self, value): self._startAddress = value startAddress = property(getStartAddress, setStartAddress, None, None) def getArticleLength(self): return self._articleLength def setArticleLength(self, value): self._articleLength = value articleLength = property(getArticleLength, setArticleLength, None, None) def getMyCtocMapIndex(self): return self._myCtocMapIndex def setMyCtocMapIndex(self, value): self._myCtocMapIndex = value myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)