home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_987 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  75.8 KB  |  2,451 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. __license__ = 'GPL v3'
  5. __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam> and         Kovid Goyal <kovid@kovidgoyal.net>'
  6. from collections import defaultdict
  7. from itertools import count
  8. from itertools import izip
  9. import random
  10. import re
  11. from struct import pack
  12. import time
  13. from urlparse import urldefrag
  14. from PIL import Image
  15. from cStringIO import StringIO
  16. from calibre.ebooks.mobi.langcodes import iana2mobi
  17. from calibre.ebooks.mobi.mobiml import MBP_NS
  18. from calibre.ebooks.oeb.base import OEB_DOCS
  19. from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
  20. from calibre.ebooks.oeb.base import XHTML
  21. from calibre.ebooks.oeb.base import XHTML_NS
  22. from calibre.ebooks.oeb.base import XML_NS
  23. from calibre.ebooks.oeb.base import namespace
  24. from calibre.ebooks.oeb.base import prefixname
  25. from calibre.ebooks.oeb.base import urlnormalize
  26. from calibre.ebooks.compression.palmdoc import compress_doc
  27. INDEXING = True
  28. FCIS_FLIS = True
  29. WRITE_PBREAKS = True
  30. EXTH_CODES = {
  31.     'creator': 100,
  32.     'publisher': 101,
  33.     'description': 103,
  34.     'identifier': 104,
  35.     'subject': 105,
  36.     'pubdate': 106,
  37.     'date': 106,
  38.     'review': 107,
  39.     'contributor': 108,
  40.     'rights': 109,
  41.     'type': 111,
  42.     'source': 112,
  43.     'title': 503 }
  44. RECORD_SIZE = 4096
  45. UNCOMPRESSED = 1
  46. PALMDOC = 2
  47. HUFFDIC = 17480
  48. PALM_MAX_IMAGE_SIZE = 63 * 1024
  49. OTHER_MAX_IMAGE_SIZE = 10 * 1024 * 1024
  50. MAX_THUMB_SIZE = 16 * 1024
  51. MAX_THUMB_DIMEN = (180, 240)
  52. TAGX = {
  53.     'chapter': '\x00\x00\x00\x01\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x00\x00\x00\x01',
  54.     'subchapter': '\x00\x00\x00\x01\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x05\x01\x10\x00\x15\x01\x10\x00\x16\x01 \x00\x17\x01@\x00\x00\x00\x00\x01',
  55.     'periodical': '\x00\x00\x00\x02\x01\x01\x01\x00\x02\x01\x02\x00\x03\x01\x04\x00\x04\x01\x08\x00\x05\x01\x10\x00\x15\x01 \x00\x16\x01@\x00\x17\x01\x80\x00\x00\x00\x00\x01E\x01\x01\x00F\x01\x02\x00G\x01\x04\x00\x00\x00\x00\x01',
  56.     'secondary_book': '\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00\x00\x01',
  57.     'secondary_periodical': '\x00\x00\x00\x01\x01\x01\x01\x00\x0b\x03\x02\x00\x00\x00\x00\x01' }
  58. INDXT = {
  59.     'chapter': '\x0f',
  60.     'subchapter': '\x1f',
  61.     'article': '?',
  62.     'chapter with subchapters': 'o',
  63.     'periodical': '\xdf',
  64.     'section': '\xff' }
  65.  
  66. def encode(data):
  67.     return data.encode('utf-8')
  68.  
  69. DECINT_FORWARD = 0
  70. DECINT_BACKWARD = 1
  71.  
  72. def decint(value, direction):
  73.     bytes = []
  74.     while True:
  75.         b = value & 127
  76.         value >>= 7
  77.         bytes.append(b)
  78.         if value == 0:
  79.             break
  80.             continue
  81.     if direction == DECINT_FORWARD:
  82.         bytes[0] |= 128
  83.     elif direction == DECINT_BACKWARD:
  84.         bytes[-1] |= 128
  85.     
  86.     return ''.join((lambda .0: for b in .0:
  87. chr(b))(reversed(bytes)))
  88.  
  89.  
  90. def align_block(raw, multiple = 4, pad = '\x00'):
  91.     extra = len(raw) % multiple
  92.     if extra == 0:
  93.         return raw
  94.     return raw + pad * (multiple - extra)
  95.  
  96.  
  97. def rescale_image(data, maxsizeb, dimen = None):
  98.     image = Image.open(StringIO(data))
  99.     format = image.format
  100.     changed = False
  101.     if image.format not in ('JPEG', 'GIF'):
  102.         (width, height) = image.size
  103.         area = width * height
  104.         if area <= 40000:
  105.             format = 'GIF'
  106.         else:
  107.             image = image.convert('RGBA')
  108.             format = 'JPEG'
  109.         changed = True
  110.     
  111.     if dimen is not None:
  112.         image.thumbnail(dimen, Image.ANTIALIAS)
  113.         changed = True
  114.     
  115.     if changed:
  116.         data = StringIO()
  117.         image.save(data, format)
  118.         data = data.getvalue()
  119.     
  120.     if len(data) <= maxsizeb:
  121.         return data
  122.     image = image.convert('RGBA')
  123.     for quality in xrange(95, -1, -1):
  124.         data = StringIO()
  125.         image.save(data, 'JPEG', quality = quality)
  126.         data = data.getvalue()
  127.         if len(data) <= maxsizeb:
  128.             return data
  129.     
  130.     (width, height) = image.size
  131.     for scale in xrange(99, 0, -1):
  132.         scale = scale / 100
  133.         data = StringIO()
  134.         scaled = image.copy()
  135.         size = (int(width * scale), height * scale)
  136.         scaled.thumbnail(size, Image.ANTIALIAS)
  137.         scaled.save(data, 'JPEG', quality = 0)
  138.         data = data.getvalue()
  139.         if len(data) <= maxsizeb:
  140.             return data
  141.     
  142.     return data
  143.  
  144.  
  145. class Serializer(object):
  146.     NSRMAP = {
  147.         '': None,
  148.         XML_NS: 'xml',
  149.         XHTML_NS: '',
  150.         MBP_NS: 'mbp' }
  151.     
  152.     def __init__(self, oeb, images, write_page_breaks_after_item = True):
  153.         self.oeb = oeb
  154.         self.images = images
  155.         self.logger = oeb.logger
  156.         self.write_page_breaks_after_item = write_page_breaks_after_item
  157.         self.id_offsets = { }
  158.         self.href_offsets = defaultdict(list)
  159.         self.breaks = []
  160.         buffer = self.buffer = StringIO()
  161.         buffer.write('<html>')
  162.         self.serialize_head()
  163.         self.serialize_body()
  164.         buffer.write('</html>')
  165.         self.fixup_links()
  166.         self.text = buffer.getvalue()
  167.  
  168.     
  169.     def serialize_head(self):
  170.         buffer = self.buffer
  171.         buffer.write('<head>')
  172.         if len(self.oeb.guide) > 0:
  173.             self.serialize_guide()
  174.         
  175.         buffer.write('</head>')
  176.  
  177.     
  178.     def serialize_guide(self):
  179.         buffer = self.buffer
  180.         hrefs = self.oeb.manifest.hrefs
  181.         buffer.write('<guide>')
  182.         for ref in self.oeb.guide.values():
  183.             path = urldefrag(ref.href)[0]
  184.             if hrefs[path].media_type not in OEB_DOCS:
  185.                 continue
  186.             
  187.             buffer.write('<reference type="')
  188.             if ref.type.startswith('other.'):
  189.                 self.serialize_text(ref.type.replace('other.', ''), quot = True)
  190.             else:
  191.                 self.serialize_text(ref.type, quot = True)
  192.             buffer.write('" ')
  193.             if ref.title is not None:
  194.                 buffer.write('title="')
  195.                 self.serialize_text(ref.title, quot = True)
  196.                 buffer.write('" ')
  197.             
  198.             self.serialize_href(ref.href)
  199.             buffer.write(' />')
  200.         
  201.         buffer.write('</guide>')
  202.  
  203.     
  204.     def serialize_href(self, href, base = None):
  205.         hrefs = self.oeb.manifest.hrefs
  206.         (path, frag) = urldefrag(urlnormalize(href))
  207.         if path and base:
  208.             path = base.abshref(path)
  209.         
  210.         if path and path not in hrefs:
  211.             return False
  212.         buffer = self.buffer
  213.         item = path not in hrefs if path else None
  214.         if item and item.spine_position is None:
  215.             return False
  216.         path = item.spine_position is None if item else base.href
  217.         href = None if frag else path
  218.         buffer.write('filepos=')
  219.         self.href_offsets[href].append(buffer.tell())
  220.         buffer.write('0000000000')
  221.         return True
  222.  
  223.     
  224.     def serialize_body(self):
  225.         buffer = self.buffer
  226.         self.anchor_offset = buffer.tell()
  227.         buffer.write('<body>')
  228.         self.anchor_offset_kindle = buffer.tell()
  229.         if 'text' in self.oeb.guide:
  230.             href = self.oeb.guide['text'].href
  231.             buffer.write('<a ')
  232.             self.serialize_href(href)
  233.             buffer.write(' />')
  234.         
  235.         spine = _[1]
  236.         [](_[2])
  237.         for item in spine:
  238.             self.serialize_item(item)
  239.         
  240.         buffer.write('</body>')
  241.  
  242.     
  243.     def serialize_item(self, item):
  244.         buffer = self.buffer
  245.         if not item.linear:
  246.             self.breaks.append(buffer.tell() - 1)
  247.         
  248.         self.id_offsets[urlnormalize(item.href)] = buffer.tell()
  249.         buffer.write('<div>')
  250.         for elem in item.data.find(XHTML('body')):
  251.             self.serialize_elem(elem, item)
  252.         
  253.         buffer.write('<div></div>')
  254.         if self.write_page_breaks_after_item:
  255.             buffer.write('<mbp:pagebreak/>')
  256.         
  257.         buffer.write('</div>')
  258.  
  259.     
  260.     def serialize_elem(self, elem, item, nsrmap = NSRMAP):
  261.         buffer = self.buffer
  262.         if not isinstance(elem.tag, basestring) or namespace(elem.tag) not in nsrmap:
  263.             return None
  264.         tag = prefixname(elem.tag, nsrmap)
  265.         id = elem.attrib.pop('id', None)
  266.         if id is not None:
  267.             href = '#'.join((item.href, id))
  268.             if not self.anchor_offset:
  269.                 pass
  270.             offset = buffer.tell()
  271.             self.id_offsets[urlnormalize(href)] = offset
  272.         
  273.         if self.anchor_offset is not None and tag == 'a' and not (elem.attrib) and not len(elem) and not (elem.text):
  274.             return None
  275.         self.anchor_offset = buffer.tell()
  276.         buffer.write('<')
  277.         buffer.write(tag)
  278.         if elem.attrib:
  279.             for attr, val in elem.attrib.items():
  280.                 if namespace(attr) not in nsrmap:
  281.                     continue
  282.                 
  283.                 attr = prefixname(attr, nsrmap)
  284.                 buffer.write(' ')
  285.                 if attr == 'href':
  286.                     if self.serialize_href(val, item):
  287.                         continue
  288.                     
  289.                 elif attr == 'src':
  290.                     href = urlnormalize(item.abshref(val))
  291.                     if href in self.images:
  292.                         index = self.images[href]
  293.                         buffer.write('recindex="%05d"' % index)
  294.                         continue
  295.                     
  296.                 
  297.                 buffer.write(attr)
  298.                 buffer.write('="')
  299.                 self.serialize_text(val, quot = True)
  300.                 buffer.write('"')
  301.             
  302.         
  303.         if elem.text or len(elem) > 0:
  304.             buffer.write('>')
  305.             if elem.text:
  306.                 self.anchor_offset = None
  307.                 self.serialize_text(elem.text)
  308.             
  309.             for child in elem:
  310.                 self.serialize_elem(child, item)
  311.                 if child.tail:
  312.                     self.anchor_offset = None
  313.                     self.serialize_text(child.tail)
  314.                     continue
  315.             
  316.             buffer.write('</%s>' % tag)
  317.         else:
  318.             buffer.write('/>')
  319.  
  320.     
  321.     def serialize_text(self, text, quot = False):
  322.         text = text.replace('&', '&')
  323.         text = text.replace('<', '<')
  324.         text = text.replace('>', '>')
  325.         text = text.replace(u'┬¡', '')
  326.         if quot:
  327.             text = text.replace('"', '"')
  328.         
  329.         self.buffer.write(encode(text))
  330.  
  331.     
  332.     def fixup_links(self):
  333.         buffer = self.buffer
  334.         id_offsets = self.id_offsets
  335.         for href, hoffs in self.href_offsets.items():
  336.             if href not in id_offsets:
  337.                 self.logger.warn('Hyperlink target %r not found' % href)
  338.                 (href, _) = urldefrag(href)
  339.             
  340.             ioff = self.id_offsets[href]
  341.             for hoff in hoffs:
  342.                 buffer.seek(hoff)
  343.                 buffer.write('%010d' % ioff)
  344.             
  345.         
  346.  
  347.  
  348.  
  349. class MobiWriter(object):
  350.     COLLAPSE_RE = re.compile('[ \\t\\r\\n\\v]+')
  351.     
  352.     def __init__(self, opts, compression = PALMDOC, imagemax = None, prefer_author_sort = False, write_page_breaks_after_item = True):
  353.         self.opts = opts
  354.         self.write_page_breaks_after_item = write_page_breaks_after_item
  355.         if not compression:
  356.             pass
  357.         self._compression = UNCOMPRESSED
  358.         if not imagemax:
  359.             pass
  360.         self._imagemax = OTHER_MAX_IMAGE_SIZE
  361.         self._prefer_author_sort = prefer_author_sort
  362.         self._primary_index_record = None
  363.         self._conforming_periodical_toc = False
  364.         self._indexable = False
  365.         self._ctoc = ''
  366.         self._ctoc_records = []
  367.         self._ctoc_offset = 0
  368.         self._ctoc_largest = 0
  369.         self._HTMLRecords = []
  370.         self._tbSequence = ''
  371.         self._MobiDoc = None
  372.         self._anchor_offset_kindle = 0
  373.         self._initialIndexRecordFound = False
  374.         self._firstSectionConcluded = False
  375.         self._currentSectionIndex = 0
  376.  
  377.     
  378.     def generate(cls, opts):
  379.         imagemax = None if opts.rescale_images else None
  380.         prefer_author_sort = opts.prefer_author_sort
  381.         return cls(compression = PALMDOC, imagemax = imagemax, prefer_author_sort = prefer_author_sort)
  382.  
  383.     generate = classmethod(generate)
  384.     
  385.     def __call__(self, oeb, path):
  386.         if hasattr(path, 'write'):
  387.             return self._dump_stream(oeb, path)
  388.         
  389.         try:
  390.             stream = _[1]
  391.             return self._dump_stream(oeb, stream)
  392.         finally:
  393.             pass
  394.  
  395.  
  396.     
  397.     def _write(self, *data):
  398.         for datum in data:
  399.             self._stream.write(datum)
  400.         
  401.  
  402.     
  403.     def _tell(self):
  404.         return self._stream.tell()
  405.  
  406.     
  407.     def _dump_stream(self, oeb, stream):
  408.         self._oeb = oeb
  409.         self._stream = stream
  410.         self._records = [
  411.             None]
  412.         self._generate_content()
  413.         self._generate_record0()
  414.         self._write_header()
  415.         self._write_content()
  416.  
  417.     
  418.     def _generate_content(self):
  419.         self._map_image_names()
  420.         self._generate_text()
  421.         if INDEXING and self._indexable:
  422.             
  423.             try:
  424.                 self._generate_index()
  425.             self._oeb.log.exception('Failed to generate index')
  426.  
  427.         
  428.         self._generate_images()
  429.  
  430.     
  431.     def _map_image_names(self):
  432.         index = 1
  433.         self._images = images = { }
  434.         mh_href = None
  435.         if 'masthead' in self._oeb.guide:
  436.             mh_href = self._oeb.guide['masthead'].href
  437.             images[mh_href] = 1
  438.             index += 1
  439.         
  440.         for item in self._oeb.manifest.values():
  441.             if item.media_type in OEB_RASTER_IMAGES:
  442.                 if item.href == mh_href:
  443.                     continue
  444.                 
  445.                 images[item.href] = index
  446.                 index += 1
  447.                 continue
  448.         
  449.  
  450.     
  451.     def _read_text_record(self, text):
  452.         pos = text.tell()
  453.         text.seek(0, 2)
  454.         npos = min((pos + RECORD_SIZE, text.tell()))
  455.         last = ''
  456.         while not last.decode('utf-8', 'ignore'):
  457.             size = len(last) + 1
  458.             text.seek(npos - size)
  459.             last = text.read(size)
  460.         extra = 0
  461.         
  462.         try:
  463.             last.decode('utf-8')
  464.         except UnicodeDecodeError:
  465.             prev = len(last)
  466.             while True:
  467.                 text.seek(npos - prev)
  468.                 last = text.read(len(last) + 1)
  469.                 
  470.                 try:
  471.                     last.decode('utf-8')
  472.                 except UnicodeDecodeError:
  473.                     continue
  474.  
  475.                 break
  476.             extra = len(last) - prev
  477.  
  478.         text.seek(pos)
  479.         data = text.read(RECORD_SIZE)
  480.         overlap = text.read(extra)
  481.         text.seek(npos)
  482.         return (data, overlap)
  483.  
  484.     
  485.     def _generate_flat_indexed_navpoints(self):
  486.         self._oeb.logger.info('Indexing flat navPoints ...')
  487.         numberOfHTMLRecords = self._content_length // RECORD_SIZE + 1
  488.         x = numberOfHTMLRecords
  489.         while x:
  490.             self._HTMLRecords.append(HTMLRecordData())
  491.             x -= 1
  492.         toc = self._oeb.toc
  493.         myIndex = 0
  494.         myEndingRecord = 0
  495.         previousOffset = 0
  496.         previousLength = 0
  497.         offset = 0
  498.         length = 0
  499.         entries = list(toc.iter())[1:]
  500.         for i, child in enumerate(entries):
  501.             if not (child.title) or not child.title.strip():
  502.                 child.title = '(none)'
  503.             
  504.             if not (child.title) or not child.title.strip():
  505.                 child.title = '(none)'
  506.             
  507.             h = child.href
  508.             if h not in self._id_offsets:
  509.                 self._oeb.log.warning('  Could not find TOC entry "%s", aborting indexing ...' % child.title)
  510.                 return False
  511.             offset = self._id_offsets[h]
  512.             length = None
  513.             for sibling in entries[i + 1:]:
  514.                 h2 = sibling.href
  515.                 if h2 in self._id_offsets:
  516.                     offset2 = self._id_offsets[h2]
  517.                     if offset2 > offset:
  518.                         length = offset2 - offset
  519.                         break
  520.                     
  521.                 offset2 > offset
  522.             
  523.             if length is None:
  524.                 length = self._content_length - offset
  525.             
  526.             if self.opts.verbose > 3:
  527.                 self._oeb.logger.info('child %03d: %s' % (i, child))
  528.                 self._oeb.logger.info('    title: %s' % child.title)
  529.                 self._oeb.logger.info('    depth: %d' % child.depth())
  530.                 self._oeb.logger.info('   offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X' % (offset, length, offset + length))
  531.             
  532.             if i and child.depth() == 1 and entries[i - 1].depth() == 1:
  533.                 if offset != previousOffset + previousLength:
  534.                     self._oeb.log.warning('*** TOC discontinuity ***')
  535.                     self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % (i - 1, entries[i - 1].title, previousOffset, previousLength))
  536.                     self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % (i, child.title, offset, previousOffset + previousLength))
  537.                     self._oeb.log.warning('_generate_flat_indexed_navpoints: Failed to generate index')
  538.                     self._HTMLRecords = []
  539.                     return False
  540.             
  541.             previousOffset = offset
  542.             previousLength = length
  543.             myStartingRecord = offset // RECORD_SIZE
  544.             if self._HTMLRecords[myStartingRecord].openingNode == -1:
  545.                 self._HTMLRecords[myStartingRecord].openingNode = myIndex
  546.             
  547.             myEndingRecord = (offset + length) // RECORD_SIZE
  548.             if myEndingRecord > myStartingRecord:
  549.                 interimSpanRecord = myStartingRecord + 1
  550.                 while interimSpanRecord <= myEndingRecord:
  551.                     self._HTMLRecords[interimSpanRecord].continuingNode = myIndex
  552.                     self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1
  553.                     interimSpanRecord += 1
  554.                     continue
  555.                     None if self._HTMLRecords[myStartingRecord].currentSectionNodeCount == -1 else self._HTMLRecords[myStartingRecord]
  556.                 if self.opts.verbose > 3:
  557.                     None(self._oeb.logger.info % (' node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X', myIndex if child.title.strip() > '' else '(missing)', myStartingRecord, interimSpanRecord, offset, length))
  558.                 
  559.             elif self.opts.verbose > 3:
  560.                 None(self._oeb.logger.info % (' node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X', myIndex if child.title.strip() > '' else '(missing)', myStartingRecord, myStartingRecord, offset, length))
  561.             
  562.             myIndex += 1
  563.         
  564.         return True
  565.  
  566.     
  567.     def _generate_indexed_navpoints(self):
  568.         self._oeb.logger.info('Indexing navPoints ...')
  569.         numberOfHTMLRecords = self._content_length // RECORD_SIZE + 1
  570.         x = numberOfHTMLRecords
  571.         while x:
  572.             self._HTMLRecords.append(HTMLRecordData())
  573.             x -= 1
  574.         toc = self._oeb.toc
  575.         myIndex = 0
  576.         myEndingRecord = 0
  577.         previousOffset = 0
  578.         previousLength = 0
  579.         offset = 0
  580.         length = 0
  581.         sectionChangedInRecordNumber = -1
  582.         sectionChangesInThisRecord = False
  583.         entries = list(toc.iter())[1:]
  584.         for firstSequentialNode, node in enumerate(list(self._ctoc_map)):
  585.             if node['klass'] != 'article' and node['klass'] != 'chapter':
  586.                 continue
  587.                 continue
  588.             if self.opts.verbose > 3:
  589.                 self._oeb.logger.info('\tFirst sequential node: %03d' % firstSequentialNode)
  590.             
  591.         
  592.         for i, child in enumerate(entries):
  593.             h = child.href
  594.             if h not in self._id_offsets:
  595.                 self._oeb.log.warning('  Could not find TOC entry "%s", aborting indexing ...' % child.title)
  596.                 return False
  597.             offset = self._id_offsets[h]
  598.             length = None
  599.             for sibling in entries[i + 1:]:
  600.                 h2 = sibling.href
  601.                 if h2 in self._id_offsets:
  602.                     offset2 = self._id_offsets[h2]
  603.                     if offset2 > offset:
  604.                         length = offset2 - offset
  605.                         break
  606.                     
  607.                 offset2 > offset
  608.             
  609.             if length is None:
  610.                 length = self._content_length - offset
  611.             
  612.             if self.opts.verbose > 3:
  613.                 self._oeb.logger.info('child %03d: %s' % (i, child))
  614.                 self._oeb.logger.info('    title: %s' % child.title)
  615.                 self._oeb.logger.info('    depth: %d' % child.depth())
  616.                 self._oeb.logger.info('   offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X' % (offset, length, offset + length))
  617.             
  618.             if i > firstSequentialNode and self._ctoc_map[i - 1]['klass'] != 'section':
  619.                 if offset != previousOffset + previousLength:
  620.                     self._oeb.log.warning('*** TOC discontinuity: nodes are not sequential ***')
  621.                     self._oeb.log.info(" node %03d: '%s' offset: 0x%X length: 0x%X" % (i - 1, entries[i - 1].title, previousOffset, previousLength))
  622.                     self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % (i, child.title, offset, previousOffset + previousLength))
  623.                     self._oeb.log.info('...')
  624.                     while i - 6 > 0:
  625.                         pass
  626.                     i - 6
  627.                 self._oeb.log.info('...')
  628.                 self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')
  629.                 self._HTMLRecords = []
  630.                 return False
  631.                 self._ctoc_map[i - 1]['klass'] != 'section'
  632.             
  633.             previousOffset = offset
  634.             previousLength = length
  635.             thisRecord = offset // RECORD_SIZE
  636.             if self._ctoc_map[i]['klass'] == 'article':
  637.                 if thisRecord > 0:
  638.                     if sectionChangesInThisRecord:
  639.                         self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex - 1
  640.                     else:
  641.                         self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex
  642.                 
  643.             
  644.             if self._ctoc_map[i]['klass'] == 'periodical':
  645.                 continue
  646.             
  647.             if self._ctoc_map[i]['klass'] == 'section':
  648.                 if thisRecord > 0:
  649.                     sectionChangesInThisRecord = True
  650.                     self._currentSectionIndex += 1
  651.                     self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex
  652.                     self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex
  653.                     continue
  654.                 
  655.             
  656.             if self._HTMLRecords[thisRecord].openingNode == -1:
  657.                 self._HTMLRecords[thisRecord].openingNode = myIndex
  658.                 self._HTMLRecords[thisRecord].openingNodeParent = self._currentSectionIndex
  659.             
  660.             myEndingRecord = (offset + length) // RECORD_SIZE
  661.             if myEndingRecord > thisRecord:
  662.                 sectionChangesInThisRecord = False
  663.                 interimSpanRecord = thisRecord + 1
  664.                 while interimSpanRecord <= myEndingRecord:
  665.                     self._HTMLRecords[interimSpanRecord].continuingNode = myIndex
  666.                     self._HTMLRecords[interimSpanRecord].continuingNodeParent = self._currentSectionIndex
  667.                     self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1
  668.                     interimSpanRecord += 1
  669.                     continue
  670.                     None if sectionChangedInRecordNumber == thisRecord else None if self._HTMLRecords[thisRecord].currentSectionNodeCount == -1 else self._HTMLRecords[thisRecord]
  671.                 if self.opts.verbose > 3:
  672.                     None(self._oeb.logger.info % ('     node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X', myIndex, self._ctoc_map[i]['klass'] if child.title.strip() > '' else '(missing)', thisRecord, interimSpanRecord, offset, length))
  673.                 
  674.             elif thisRecord == numberOfHTMLRecords - 1:
  675.                 if self._HTMLRecords[thisRecord].continuingNode == -1:
  676.                     self._HTMLRecords[thisRecord].continuingNode = self._HTMLRecords[thisRecord].openingNode - 1
  677.                 
  678.             elif self.opts.verbose > 3:
  679.                 None(self._oeb.logger.info % ('     node: %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X', myIndex, self._ctoc_map[i]['klass'] if child.title.strip() > '' else '(missing)', thisRecord, thisRecord, offset, length))
  680.             
  681.             myIndex += 1
  682.         
  683.         return True
  684.  
  685.     
  686.     def _generate_tbs_book(self, nrecords, lastrecord):
  687.         if self.opts.verbose > 3:
  688.             self._oeb.logger.info('Assembling TBS for Book: HTML record %03d of %03d' % (nrecords, lastrecord))
  689.         
  690.         tbsType = 0
  691.         tbSequence = ''
  692.         if self._initialIndexRecordFound == False:
  693.             if self._HTMLRecords[nrecords].currentSectionNodeCount == -1:
  694.                 tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)
  695.             else:
  696.                 self._initialIndexRecordFound = True
  697.                 if self._HTMLRecords[nrecords].currentSectionNodeCount == 1:
  698.                     tbsType = 2
  699.                 else:
  700.                     tbsType = 6
  701.                 tbSequence = decint(tbsType, DECINT_FORWARD)
  702.                 tbSequence += decint(0, DECINT_FORWARD)
  703.                 if tbsType != 2:
  704.                     tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
  705.                 
  706.                 tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  707.         elif nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1:
  708.             tbsType = 2
  709.         elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1:
  710.             tbsType = 3
  711.             self._HTMLRecords[nrecords].currentSectionNodeCount = 128
  712.         else:
  713.             tbsType = 6
  714.         shiftedNCXEntry = self._HTMLRecords[nrecords].continuingNode << 3
  715.         shiftedNCXEntry |= tbsType
  716.         tbSequence = decint(shiftedNCXEntry, DECINT_FORWARD)
  717.         tbSequence += decint(0, DECINT_FORWARD)
  718.         if tbsType != 2:
  719.             tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
  720.         
  721.         tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  722.         self._tbSequence = tbSequence
  723.  
  724.     
  725.     def _generate_tbs_flat_periodical(self, nrecords, lastrecord):
  726.         tbsType = 0
  727.         tbSequence = ''
  728.         if self._initialIndexRecordFound == False:
  729.             if self._HTMLRecords[nrecords].currentSectionNodeCount == -1:
  730.                 tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)
  731.             else:
  732.                 self._initialIndexRecordFound = True
  733.                 tbsType = 6
  734.                 tbSequence = decint(tbsType, DECINT_FORWARD)
  735.                 tbSequence += decint(0, DECINT_FORWARD)
  736.                 tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount + 2)
  737.                 tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  738.                 if self.opts.verbose > 2:
  739.                     self._oeb.logger.info('\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent))
  740.                     self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
  741.                 
  742.         elif self.opts.verbose > 2:
  743.             self._oeb.logger.info('\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent))
  744.             self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
  745.         
  746.         if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1:
  747.             tbsType = 6
  748.             tbSequence = decint(tbsType, DECINT_FORWARD)
  749.             tbSequence += decint(0, DECINT_FORWARD)
  750.             tbSequence += chr(2)
  751.             arg3 = self._HTMLRecords[nrecords].continuingNode
  752.             arg3 += 1
  753.             arg3 <<= 4
  754.             arg3 |= 0
  755.             tbSequence += decint(arg3, DECINT_FORWARD)
  756.             tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  757.         elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1:
  758.             tbsType = 6
  759.             self._HTMLRecords[nrecords].currentSectionNodeCount = 128
  760.             tbSequence = decint(tbsType, DECINT_FORWARD)
  761.             tbSequence += decint(0, DECINT_FORWARD)
  762.             tbSequence += chr(2)
  763.             arg3 = self._HTMLRecords[nrecords].continuingNode
  764.             arg3 += self._HTMLRecords[nrecords].continuingNodeParent + 1
  765.             arg3 <<= 4
  766.             arg3 |= 1
  767.             tbSequence += decint(arg3, DECINT_FORWARD)
  768.             tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
  769.             tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  770.         else:
  771.             tbsType = 7
  772.             tbSequence = decint(tbsType, DECINT_FORWARD)
  773.             tbSequence += decint(0, DECINT_FORWARD)
  774.             tbSequence += chr(2)
  775.             tbSequence += decint(0, DECINT_FORWARD)
  776.             arg4 = self._HTMLRecords[nrecords].continuingNode
  777.             arg4 += self._HTMLRecords[nrecords].continuingNodeParent + 1
  778.             arg4 <<= 4
  779.             arg4 |= 4
  780.             tbSequence += decint(arg4, DECINT_FORWARD)
  781.             tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
  782.             tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  783.         self._tbSequence = tbSequence
  784.  
  785.     
  786.     def _generate_tbs_structured_periodical(self, nrecords, lastrecord):
  787.         tbsType = 0
  788.         tbSequence = ''
  789.         if self._initialIndexRecordFound == False:
  790.             if self._HTMLRecords[nrecords].currentSectionNodeCount == -1:
  791.                 tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)
  792.             else:
  793.                 self._initialIndexRecordFound = True
  794.                 if self.opts.verbose > 2:
  795.                     self._oeb.logger.info('\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent))
  796.                     self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
  797.                 
  798.                 tbsType = 6
  799.                 tbSequence = decint(tbsType, DECINT_FORWARD)
  800.                 tbSequence += decint(0, DECINT_FORWARD)
  801.                 tbSequence += chr(2)
  802.                 arg3 = self._sectionCount
  803.                 arg3 += 0
  804.                 arg3 <<= 4
  805.                 arg3 |= 4
  806.                 tbSequence += decint(arg3, DECINT_FORWARD)
  807.                 tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
  808.                 tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  809.         elif self._firstSectionConcluded == False:
  810.             if self._HTMLRecords[nrecords].nextSectionNumber == -1:
  811.                 if self.opts.verbose > 2:
  812.                     self._oeb.logger.info('\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent))
  813.                     self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
  814.                 
  815.                 if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1:
  816.                     tbsType = 6
  817.                     tbSequence = decint(tbsType, DECINT_FORWARD)
  818.                     tbSequence += decint(0, DECINT_FORWARD)
  819.                     tbSequence += chr(2)
  820.                     arg3 = self._sectionCount
  821.                     arg3 += self._HTMLRecords[nrecords].continuingNode
  822.                     arg3 <<= 4
  823.                     arg3 |= 4
  824.                     tbSequence += decint(arg3, DECINT_FORWARD)
  825.                     tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
  826.                     tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  827.                 elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1:
  828.                     tbsType = 6
  829.                     self._HTMLRecords[nrecords].currentSectionNodeCount = 128
  830.                     tbSequence = decint(tbsType, DECINT_FORWARD)
  831.                     tbSequence += decint(0, DECINT_FORWARD)
  832.                     tbSequence += chr(2)
  833.                     arg3 = self._sectionCount
  834.                     arg3 += self._HTMLRecords[nrecords].continuingNode
  835.                     arg3 <<= 4
  836.                     arg3 |= 1
  837.                     tbSequence += decint(arg3, DECINT_FORWARD)
  838.                     tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
  839.                     tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  840.                 else:
  841.                     tbsType = 7
  842.                     tbSequence = decint(tbsType, DECINT_FORWARD)
  843.                     tbSequence += decint(0, DECINT_FORWARD)
  844.                     tbSequence += chr(2)
  845.                     tbSequence += decint(0, DECINT_FORWARD)
  846.                     arg4 = self._sectionCount
  847.                     arg4 += self._HTMLRecords[nrecords].continuingNode
  848.                     arg4 <<= 4
  849.                     arg4 |= 4
  850.                     tbSequence += decint(arg4, DECINT_FORWARD)
  851.                     tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
  852.                     tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  853.             elif self._HTMLRecords[nrecords].nextSectionNumber > 0:
  854.                 tbsType = 3
  855.                 if self.opts.verbose > 2:
  856.                     self._oeb.logger.info('\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber))
  857.                     self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
  858.                 
  859.                 tbSequence = decint(tbsType, DECINT_FORWARD)
  860.                 tbSequence += decint(0, DECINT_FORWARD)
  861.                 tbSequence += decint(0, DECINT_FORWARD)
  862.                 arg3 = self._HTMLRecords[nrecords].continuingNodeParent + 1 << 4
  863.                 arg3Flags = 0
  864.                 arg3 |= arg3Flags
  865.                 tbSequence += decint(arg3, DECINT_FORWARD)
  866.                 sectionBase = self._HTMLRecords[nrecords].continuingNodeParent
  867.                 sectionDelta = self._sectionCount - sectionBase - 1
  868.                 articleOffset = self._HTMLRecords[nrecords].continuingNode + 1
  869.                 arg4 = sectionDelta + articleOffset << 4
  870.                 arg4Flags = 0
  871.                 if self._HTMLRecords[nrecords].currentSectionNodeCount > 1:
  872.                     arg4Flags = 4
  873.                 else:
  874.                     arg4Flags = 0
  875.                 arg4 |= arg4Flags
  876.                 tbSequence += decint(arg4, DECINT_FORWARD)
  877.                 if arg4Flags == 4:
  878.                     nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount
  879.                     nodeCountValue = None if nodeCountValue == 0 else nodeCountValue
  880.                     tbSequence += chr(nodeCountValue)
  881.                 
  882.                 arg5 = sectionDelta + articleOffset
  883.                 if self._HTMLRecords[nrecords].currentSectionNodeCount < 2:
  884.                     arg5 -= 1
  885.                 
  886.                 arg5 <<= 4
  887.                 arg5Flags = 8
  888.                 arg5 |= arg5Flags
  889.                 tbSequence += decint(arg5, DECINT_FORWARD)
  890.                 arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode
  891.                 arg6 <<= 4
  892.                 if self._HTMLRecords[nrecords].nextSectionNodeCount > 1:
  893.                     arg6Flags = 4
  894.                 else:
  895.                     arg6Flags = 0
  896.                 arg6 |= arg6Flags
  897.                 tbSequence += decint(arg6, DECINT_FORWARD)
  898.                 if arg6Flags == 4:
  899.                     nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount
  900.                     nodeCountValue = None if nodeCountValue == 0 else nodeCountValue
  901.                     tbSequence += chr(nodeCountValue)
  902.                 
  903.                 tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  904.                 self._firstSectionConcluded = True
  905.             
  906.         elif self._HTMLRecords[nrecords].nextSectionNumber == -1:
  907.             if self.opts.verbose > 2:
  908.                 self._oeb.logger.info('\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent))
  909.                 self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
  910.             
  911.             tbsType = 2
  912.             tbSequence = decint(tbsType, DECINT_FORWARD)
  913.             tbSequence += decint(0, DECINT_FORWARD)
  914.             arg2 = self._HTMLRecords[nrecords].continuingNodeParent + 1
  915.             arg2 <<= 4
  916.             arg2Flags = 0
  917.             if self._HTMLRecords[nrecords].currentSectionNodeCount > 0:
  918.                 arg2Flags = 1
  919.                 arg2 |= arg2Flags
  920.             
  921.             tbSequence += decint(arg2, DECINT_FORWARD)
  922.             if arg2Flags:
  923.                 tbSequence += decint(0, DECINT_FORWARD)
  924.             
  925.             arg3 = self._sectionCount - self._HTMLRecords[nrecords].continuingNodeParent
  926.             arg3 += self._HTMLRecords[nrecords].continuingNode
  927.             arg3 <<= 4
  928.             arg3Flags = 1
  929.             if self._HTMLRecords[nrecords].currentSectionNodeCount > 0:
  930.                 arg3Flags = 4
  931.             
  932.             arg3 |= arg3Flags
  933.             tbSequence += decint(arg3, DECINT_FORWARD)
  934.             if arg3Flags == 4:
  935.                 nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount
  936.                 nodeCountValue = None if nodeCountValue == 0 else nodeCountValue
  937.                 tbSequence += chr(nodeCountValue)
  938.             else:
  939.                 tbSequence += decint(0, DECINT_FORWARD)
  940.             tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  941.         else:
  942.             tbsType = 3
  943.             if self.opts.verbose > 2:
  944.                 self._oeb.logger.info('\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d' % (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber))
  945.                 self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
  946.             
  947.             tbSequence = decint(tbsType, DECINT_FORWARD)
  948.             tbSequence += decint(0, DECINT_FORWARD)
  949.             tbSequence += decint(0, DECINT_FORWARD)
  950.             arg3 = self._HTMLRecords[nrecords].continuingNodeParent + 1 << 4
  951.             arg3Flags = 0
  952.             arg3 |= arg3Flags
  953.             tbSequence += decint(arg3, DECINT_FORWARD)
  954.             sectionBase = self._HTMLRecords[nrecords].continuingNodeParent
  955.             sectionDelta = self._sectionCount - sectionBase - 1
  956.             articleOffset = self._HTMLRecords[nrecords].continuingNode + 1
  957.             arg4 = sectionDelta + articleOffset << 4
  958.             arg4Flags = 0
  959.             if self._HTMLRecords[nrecords].currentSectionNodeCount > 1:
  960.                 arg4Flags = 4
  961.             else:
  962.                 arg4Flags = 0
  963.             arg4 |= arg4Flags
  964.             tbSequence += decint(arg4, DECINT_FORWARD)
  965.             if arg4Flags == 4:
  966.                 nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount
  967.                 nodeCountValue = None if nodeCountValue == 0 else nodeCountValue
  968.                 tbSequence += chr(nodeCountValue)
  969.             
  970.             arg5 = sectionDelta + articleOffset
  971.             if self._HTMLRecords[nrecords].currentSectionNodeCount < 2:
  972.                 arg5 -= 1
  973.             
  974.             arg5 <<= 4
  975.             arg5Flags = 8
  976.             arg5 |= arg5Flags
  977.             tbSequence += decint(arg5, DECINT_FORWARD)
  978.             arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode
  979.             arg6 <<= 4
  980.             if self._HTMLRecords[nrecords].nextSectionNodeCount > 1:
  981.                 arg6Flags = 4
  982.             else:
  983.                 arg6Flags = 0
  984.             arg6 |= arg6Flags
  985.             tbSequence += decint(arg6, DECINT_FORWARD)
  986.             if arg6Flags == 4:
  987.                 nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount
  988.                 nodeCountValue = None if nodeCountValue == 0 else nodeCountValue
  989.                 tbSequence += chr(nodeCountValue)
  990.             
  991.             tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
  992.         self._tbSequence = tbSequence
  993.  
  994.     
  995.     def _evaluate_periodical_toc(self):
  996.         toc = self._oeb.toc
  997.         nodes = list(toc.iter())[1:]
  998.         toc_conforms = True
  999.         for i, child in enumerate(nodes):
  1000.             if not child.klass == 'periodical' or child.depth() != 3:
  1001.                 if (child.klass == 'section' or child.depth() != 2 or child.klass == 'article') and child.depth() != 1:
  1002.                     self._oeb.logger.warn('Nonconforming TOC entry: "%s" found at depth %d' % (child.klass, child.depth()))
  1003.                     self._oeb.logger.warn("  <title>: '%-25.25s...' \t\tklass=%-15.15s \tdepth:%d  \tplayOrder=%03d" % (child.title, child.klass, child.depth(), child.play_order))
  1004.                     toc_conforms = False
  1005.                     continue
  1006.         
  1007.         if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == []:
  1008.             self._oeb.logger.info('metadata missing date/timestamp')
  1009.             toc_conforms = False
  1010.         
  1011.         if 'masthead' not in self._oeb.guide:
  1012.             self._oeb.logger.info('mastheadImage missing from manifest')
  1013.             toc_conforms = False
  1014.         
  1015.         None(self._oeb.logger.info if toc_conforms else '  TOC structure non-conforming')
  1016.         return toc_conforms
  1017.  
  1018.     
  1019.     def _generate_text(self):
  1020.         self._oeb.logger.info('Serializing markup content...')
  1021.         serializer = Serializer(self._oeb, self._images, write_page_breaks_after_item = self.write_page_breaks_after_item)
  1022.         breaks = serializer.breaks
  1023.         text = serializer.text
  1024.         self._anchor_offset_kindle = serializer.anchor_offset_kindle
  1025.         self._id_offsets = serializer.id_offsets
  1026.         self._content_length = len(text)
  1027.         self._text_length = len(text)
  1028.         text = StringIO(text)
  1029.         buf = []
  1030.         nrecords = 0
  1031.         lastrecord = self._content_length // RECORD_SIZE
  1032.         offset = 0
  1033.         if self._compression != UNCOMPRESSED:
  1034.             self._oeb.logger.info('  Compressing markup content...')
  1035.         
  1036.         (data, overlap) = self._read_text_record(text)
  1037.         if self.opts.mobi_periodical:
  1038.             self._oeb.logger.info('  MOBI periodical specified, evaluating TOC for periodical conformance ...')
  1039.             self._conforming_periodical_toc = self._evaluate_periodical_toc()
  1040.         
  1041.         self._ctoc_records.append(self._generate_ctoc())
  1042.         toc = self._oeb.toc
  1043.         entries = list(toc.iter())[1:]
  1044.         if len(entries):
  1045.             self._indexable = self._generate_indexed_navpoints()
  1046.         else:
  1047.             self._oeb.logger.info('  No entries found in TOC ...')
  1048.             self._indexable = False
  1049.         if not self._indexable:
  1050.             self._oeb.logger.info('  Writing unindexed mobi ...')
  1051.         
  1052.         while len(data) > 0:
  1053.             if self._compression == PALMDOC:
  1054.                 data = compress_doc(data)
  1055.             
  1056.             record = StringIO()
  1057.             record.write(data)
  1058.             if WRITE_PBREAKS:
  1059.                 record.write(overlap)
  1060.                 record.write(pack('>B', len(overlap)))
  1061.                 nextra = 0
  1062.                 pbreak = 0
  1063.                 running = offset
  1064.                 while breaks and breaks[0] - offset < RECORD_SIZE:
  1065.                     pbreak = breaks.pop(0) - running >> 3
  1066.                     if self.opts.verbose > 2:
  1067.                         self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()))
  1068.                     
  1069.                     encoded = decint(pbreak, DECINT_FORWARD)
  1070.                     record.write(encoded)
  1071.                     running += pbreak << 3
  1072.                     nextra += len(encoded)
  1073.                 lsize = 1
  1074.                 while True:
  1075.                     size = decint(nextra + lsize, DECINT_BACKWARD)
  1076.                     if len(size) == lsize:
  1077.                         break
  1078.                     
  1079.                     lsize += 1
  1080.                 record.write(size)
  1081.             
  1082.             if INDEXING and self._indexable:
  1083.                 booktype = self._MobiDoc.mobiType
  1084.                 if booktype == 2:
  1085.                     self._generate_tbs_book(nrecords, lastrecord)
  1086.                 elif booktype == 258:
  1087.                     self._generate_tbs_flat_periodical(nrecords, lastrecord)
  1088.                 elif booktype == 257 or booktype == 259:
  1089.                     self._generate_tbs_structured_periodical(nrecords, lastrecord)
  1090.                 else:
  1091.                     raise NotImplementedError('Indexing for mobitype 0x%X not implemented' % booktype)
  1092.                 (booktype == 259).write(self._tbSequence)
  1093.             
  1094.             self._records.append(record.getvalue())
  1095.             buf.append(self._records[-1])
  1096.             nrecords += 1
  1097.             offset += RECORD_SIZE
  1098.             (data, overlap) = self._read_text_record(text)
  1099.         if INDEXING:
  1100.             extra = sum(map(len, buf)) % 4
  1101.             if extra == 0:
  1102.                 extra = 4
  1103.             
  1104.             self._records.append('\x00' * (4 - extra))
  1105.             nrecords += 1
  1106.         
  1107.         self._text_nrecords = nrecords
  1108.  
  1109.     
  1110.     def _generate_images(self):
  1111.         self._oeb.logger.info('Serializing images...')
  1112.         images = [ (index, href) for href, index in self._images.items() ]
  1113.         images.sort()
  1114.         self._first_image_record = None
  1115.         for _, href in images:
  1116.             item = self._oeb.manifest.hrefs[href]
  1117.             
  1118.             try:
  1119.                 data = rescale_image(item.data, self._imagemax)
  1120.             except:
  1121.                 []
  1122.                 []
  1123.                 self._oeb.logger.warn('Bad image file %r' % item.href)
  1124.                 continue
  1125.  
  1126.             self._records.append(data)
  1127.             if self._first_image_record is None:
  1128.                 self._first_image_record = len(self._records) - 1
  1129.                 continue
  1130.             []
  1131.         
  1132.  
  1133.     
  1134.     def _generate_end_records(self):
  1135.         if FCIS_FLIS:
  1136.             self._flis_number = len(self._records)
  1137.             self._records.append('FLIS\x00\x00\x00\x08\x00A\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x01\x00\x03\x00\x00\x00\x03\x00\x00\x00\x01' + '\xff\xff\xff\xff')
  1138.             fcis = 'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
  1139.             fcis += pack('>I', self._text_length)
  1140.             fcis += '\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
  1141.             self._fcis_number = len(self._records)
  1142.             self._records.append(fcis)
  1143.             self._records.append('\xe9\x8e\r\n')
  1144.         else:
  1145.             self._flis_number = len(self._records)
  1146.             self._records.append('\xe9\x8e\r\n')
  1147.  
  1148.     
  1149.     def _generate_record0(self):
  1150.         metadata = self._oeb.metadata
  1151.         exth = self._build_exth()
  1152.         last_content_record = len(self._records) - 1
  1153.         self._generate_end_records()
  1154.         record0 = StringIO()
  1155.         record0.write(pack('>HHIHHHH', self._compression, 0, self._text_length, self._text_nrecords - 1, RECORD_SIZE, 0, 0))
  1156.         uid = random.randint(0, 0xFFFFFFFFL)
  1157.         title = unicode(metadata.title[0]).encode('utf-8')
  1158.         record0.write('MOBI')
  1159.         btype = self._MobiDoc.mobiType
  1160.         record0.write(pack('>IIIII', 232, btype, 65001, uid, 6))
  1161.         record0.write('\xff\xff\xff\xff\xff\xff\xff\xff')
  1162.         if btype < 256:
  1163.             record0.write(pack('>I', 0xFFFFFFFFL))
  1164.         elif btype > 256 and self._indexable:
  1165.             if self._primary_index_record is None:
  1166.                 record0.write(pack('>I', 0xFFFFFFFFL))
  1167.             else:
  1168.                 record0.write(pack('>I', self._primary_index_record + 2 + len(self._ctoc_records)))
  1169.         else:
  1170.             record0.write(pack('>I', 0xFFFFFFFFL))
  1171.         record0.write('\xff' * 28)
  1172.         record0.write(pack('>I', self._text_nrecords + 1))
  1173.         record0.write(pack('>II', 248 + len(exth), len(title)))
  1174.         record0.write(iana2mobi(str(metadata.language[0])))
  1175.         record0.write('\x00\x00\x00\x00\x00\x00\x00\x00')
  1176.         None(record0.write(pack, '>II', 6 if self._first_image_record else 0))
  1177.         record0.write('\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
  1178.         record0.write(pack('>I', 80))
  1179.         record0.write('\x00' * 32)
  1180.         record0.write(pack('>IIII', 0xFFFFFFFFL, 0xFFFFFFFFL, 0, 0))
  1181.         record0.write('\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
  1182.         record0.write(pack('>HH', 1, last_content_record))
  1183.         record0.write('\x00\x00\x00\x01')
  1184.         if FCIS_FLIS:
  1185.             record0.write(pack('>I', self._fcis_number))
  1186.             record0.write(pack('>I', 1))
  1187.             record0.write(pack('>I', self._flis_number))
  1188.             record0.write(pack('>I', 1))
  1189.         else:
  1190.             record0.write(pack('>I', 0xFFFFFFFFL))
  1191.             record0.write(pack('>I', 0xFFFFFFFFL))
  1192.             record0.write(pack('>I', 0xFFFFFFFFL))
  1193.             record0.write(pack('>I', 1))
  1194.         record0.write('\x00\x00\x00\x00\x00\x00\x00\x00')
  1195.         record0.write(pack('>IIII', 0xFFFFFFFFL, 0, 0xFFFFFFFFL, 0xFFFFFFFFL))
  1196.         trailingDataFlags = 1
  1197.         if self._indexable:
  1198.             trailingDataFlags |= 2
  1199.         
  1200.         if WRITE_PBREAKS:
  1201.             trailingDataFlags |= 4
  1202.         
  1203.         record0.write(pack('>I', trailingDataFlags))
  1204.         None(record0.write(pack, '>I' if self._primary_index_record is None else self._primary_index_record))
  1205.         record0.write(exth)
  1206.         record0.write(title)
  1207.         record0 = record0.getvalue()
  1208.         self._records[0] = record0 + '\x00' * (2452 - len(record0))
  1209.  
  1210.     
  1211.     def _build_exth(self):
  1212.         oeb = self._oeb
  1213.         exth = StringIO()
  1214.         nrecs = 0
  1215.         for term in oeb.metadata:
  1216.             if term not in EXTH_CODES:
  1217.                 continue
  1218.             
  1219.             code = EXTH_CODES[term]
  1220.             items = oeb.metadata[term]
  1221.             for item in items:
  1222.                 data = self.COLLAPSE_RE.sub(' ', unicode(item))
  1223.                 data = data.encode('utf-8')
  1224.                 exth.write(pack('>II', code, len(data) + 8))
  1225.                 exth.write(data)
  1226.                 nrecs += 1
  1227.             
  1228.             if term == 'rights':
  1229.                 rights = unicode(oeb.metadata.rights[0]).encode('utf-8')
  1230.                 exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
  1231.                 exth.write(rights)
  1232.                 continue
  1233.             None if term == 'identifier' else None if term == 'creator' else []
  1234.         
  1235.         if oeb.metadata['date'] != []:
  1236.             datestr = str(oeb.metadata['date'][0])
  1237.         elif oeb.metadata['timestamp'] != []:
  1238.             datestr = str(oeb.metadata['timestamp'][0])
  1239.         
  1240.         if datestr is not None:
  1241.             exth.write(pack('>II', EXTH_CODES['pubdate'], len(datestr) + 8))
  1242.             exth.write(datestr)
  1243.             nrecs += 1
  1244.         else:
  1245.             raise NotImplementedError('missing date or timestamp needed for mobi_periodical')
  1246.         if (datestr is not None).metadata.cover and unicode(oeb.metadata.cover[0]) in oeb.manifest.ids:
  1247.             id = unicode(oeb.metadata.cover[0])
  1248.             item = oeb.manifest.ids[id]
  1249.             href = item.href
  1250.             index = self._images[href] - 1
  1251.             exth.write(pack('>III', 201, 12, index))
  1252.             exth.write(pack('>III', 203, 12, 0))
  1253.             nrecs += 2
  1254.             index = self._add_thumbnail(item)
  1255.             if index is not None:
  1256.                 exth.write(pack('>III', 202, 12, index - 1))
  1257.                 nrecs += 1
  1258.             
  1259.         
  1260.         exth = exth.getvalue()
  1261.         trail = len(exth) % 4
  1262.         pad = '\x00' * (4 - trail)
  1263.         exth = [
  1264.             'EXTH',
  1265.             pack('>II', len(exth) + 12, nrecs),
  1266.             exth,
  1267.             pad]
  1268.         return ''.join(exth)
  1269.  
  1270.     
  1271.     def _add_thumbnail(self, item):
  1272.         
  1273.         try:
  1274.             data = rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN)
  1275.         except IOError:
  1276.             self._oeb.logger.warn('Bad image file %r' % item.href)
  1277.             return None
  1278.  
  1279.         manifest = self._oeb.manifest
  1280.         (id, href) = manifest.generate('thumbnail', 'thumbnail.jpeg')
  1281.         manifest.add(id, href, 'image/jpeg', data = data)
  1282.         index = len(self._images) + 1
  1283.         self._images[href] = index
  1284.         self._records.append(data)
  1285.         return index
  1286.  
  1287.     
  1288.     def _write_header(self):
  1289.         title = str(self._oeb.metadata.title[0])
  1290.         title = re.sub('[^-A-Za-z0-9]+', '_', title)[:31]
  1291.         title = title + '\x00' * (32 - len(title))
  1292.         now = int(time.time())
  1293.         nrecords = len(self._records)
  1294.         self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0), 'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords))
  1295.         offset = self._tell() + 8 * nrecords + 2
  1296.         for id, record in izip(count(), self._records):
  1297.             self._write(pack('>I', offset), '\x00', pack('>I', id)[1:])
  1298.             offset += len(record)
  1299.         
  1300.         self._write('\x00\x00')
  1301.  
  1302.     
  1303.     def _write_content(self):
  1304.         for record in self._records:
  1305.             self._write(record)
  1306.         
  1307.  
  1308.     
  1309.     def _generate_index(self):
  1310.         self._oeb.log('Generating INDX ...')
  1311.         self._primary_index_record = None
  1312.         (indxt, indxt_count, indices, last_name) = self._generate_indxt()
  1313.         if last_name is None:
  1314.             self._oeb.log.warn('Input document has no TOC. No index generated.')
  1315.             return None
  1316.         indx1 = StringIO()
  1317.         indx1.write('INDX' + pack('>I', 192))
  1318.         indx1.write('\x00\x00\x00\x00')
  1319.         indx1.write(pack('>I', 1))
  1320.         indx1.write('\x00\x00\x00\x00')
  1321.         indx1.write(pack('>I', 192 + len(indxt)))
  1322.         indx1.write(pack('>I', indxt_count + 1))
  1323.         indx1.write('\xff\xff\xff\xff\xff\xff\xff\xff')
  1324.         indx1.write('\x00' * 156)
  1325.         indx1.write(indxt)
  1326.         indx1.write(indices)
  1327.         indx1 = indx1.getvalue()
  1328.         idxt0 = chr(len(last_name)) + last_name + pack('>H', indxt_count + 1)
  1329.         idxt0 = align_block(idxt0)
  1330.         indx0 = StringIO()
  1331.         if self._MobiDoc.mobiType == 2:
  1332.             tagx = TAGX['chapter']
  1333.         else:
  1334.             tagx = TAGX['periodical']
  1335.         tagx = align_block('TAGX' + pack('>I', 8 + len(tagx)) + tagx)
  1336.         indx0_indices_pos = 192 + len(tagx) + len(idxt0)
  1337.         indx0_indices = align_block('IDXT' + pack('>H', 192 + len(tagx)))
  1338.         header = StringIO()
  1339.         header.write('INDX')
  1340.         header.write(pack('>I', 192))
  1341.         header.write('\x00\x00\x00\x00')
  1342.         header.write(pack('>I', 0))
  1343.         header.write(pack('>I', 6))
  1344.         header.write(pack('>I', indx0_indices_pos))
  1345.         header.write(pack('>I', 1))
  1346.         header.write(pack('>I', 65001))
  1347.         header.write(iana2mobi(str(self._oeb.metadata.language[0])))
  1348.         header.write(pack('>I', indxt_count + 1))
  1349.         header.write('\x00\x00\x00\x00')
  1350.         header.write('\x00\x00\x00\x00')
  1351.         header.write('\x00\x00\x00\x00')
  1352.         header.write(pack('>I', len(self._ctoc_records)))
  1353.         header.write('\x00' * 124)
  1354.         header.write(pack('>I', 192))
  1355.         header.write('\x00\x00\x00\x00\x00\x00\x00\x00')
  1356.         header = header.getvalue()
  1357.         indx0.write(header)
  1358.         indx0.write(tagx)
  1359.         indx0.write(idxt0)
  1360.         indx0.write(indx0_indices)
  1361.         indx0 = indx0.getvalue()
  1362.         self._primary_index_record = len(self._records)
  1363.         self._records.extend([
  1364.             indx0,
  1365.             indx1])
  1366.         for i, ctoc_record in enumerate(self._ctoc_records):
  1367.             self._records.append(ctoc_record)
  1368.         
  1369.         if self._MobiDoc.mobiType > 256:
  1370.             tagx = TAGX['secondary_periodical']
  1371.             tagx_len = 8 + len(tagx)
  1372.             indx0 = StringIO()
  1373.             indx0.write('INDX' + pack('>I', 192) + '\x00\x00\x00\x00\x00\x00\x00\x00')
  1374.             indx0.write(pack('>I', 6))
  1375.             indx0.write(pack('>I', 232))
  1376.             indx0.write(pack('>I', 1))
  1377.             indx0.write(pack('>I', 65001))
  1378.             indx0.write('\xff\xff\xff\xff')
  1379.             indx0.write(pack('>I', 4))
  1380.             indx0.write('\x00\x00\x00\x00')
  1381.             indx0.write('\x00' * 136)
  1382.             indx0.write(pack('>I', 192))
  1383.             indx0.write('\x00\x00\x00\x00\x00\x00\x00\x00')
  1384.             indx0.write('TAGX' + pack('>I', tagx_len) + tagx)
  1385.             indx0.write('\rmastheadImage\x00\x04')
  1386.             indx0.write('IDXT\x00\xd8\x00\x00')
  1387.             indx1 = StringIO()
  1388.             indx1.write('INDX' + pack('>I', 192) + '\x00\x00\x00\x00')
  1389.             indx1.write(pack('>I', 1))
  1390.             indx1.write(pack('>I', 0))
  1391.             indx1.write('\x00\x00\x00\xf0')
  1392.             indx1.write(pack('>I', 4))
  1393.             indx1.write('\xff\xff\xff\xff\xff\xff\xff\xff')
  1394.             indx1.write('\x00' * (192 - indx1.tell()))
  1395.             indx1.write('\x00\x01\x80')
  1396.             indx1.write('\x06author\x02\x80\x80\xc7')
  1397.             indx1.write('\x0bdescription\x02\x80\x80\xc6')
  1398.             indx1.write('\rmastheadImage\x02\x85\x80\xc5')
  1399.             indx1.write('IDXT\x00\xc0\x00\xc3\x00\xce\x00\xde')
  1400.             indx0 = indx0.getvalue()
  1401.             indx1 = indx1.getvalue()
  1402.             self._records.extend((indx0, indx1))
  1403.             if self.opts.verbose > 3:
  1404.                 mkdtemp = mkdtemp
  1405.                 import tempfile
  1406.                 import os
  1407.                 t = mkdtemp()
  1408.                 for i, n in enumerate([
  1409.                     'sindx1',
  1410.                     'sindx0',
  1411.                     'ctoc',
  1412.                     'indx0',
  1413.                     'indx1']):
  1414.                     open(os.path.join(t, n + '.bin'), 'wb').write(self._records[-(i + 1)])
  1415.                 
  1416.                 self._oeb.log.debug('Index records dumped to', t)
  1417.             
  1418.         
  1419.  
  1420.     
  1421.     def _clean_text_value(self, text):
  1422.         if text is not None and text.strip():
  1423.             text = text.strip()
  1424.             if not isinstance(text, unicode):
  1425.                 text = text.decode('utf-8', 'replace')
  1426.             
  1427.             text = text.encode('utf-8')
  1428.         else:
  1429.             text = '(none)'.encode('utf-8')
  1430.         return text
  1431.  
  1432.     
  1433.     def _add_to_ctoc(self, ctoc_str, record_offset):
  1434.         if 64504 - self._ctoc.tell() < 2 + len(ctoc_str):
  1435.             pad = 64504 - self._ctoc.tell()
  1436.             self._ctoc.write('\x00' * pad)
  1437.             self._ctoc_records.append(self._ctoc.getvalue())
  1438.             self._ctoc.truncate(0)
  1439.             self._ctoc_offset += 65536
  1440.             record_offset = self._ctoc_offset
  1441.         
  1442.         offset = self._ctoc.tell() + record_offset
  1443.         self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
  1444.         return offset
  1445.  
  1446.     
  1447.     def _add_flat_ctoc_node(self, node, ctoc, title = None):
  1448.         t = None if title is None else title
  1449.         t = self._clean_text_value(t)
  1450.         self._last_toc_entry = t
  1451.         ctoc_name_map = { }
  1452.         if node.klass == 'article':
  1453.             ctoc_name_map['klass'] = 'chapter'
  1454.         else:
  1455.             ctoc_name_map['klass'] = node.klass
  1456.         ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
  1457.         self._chapterCount += 1
  1458.         self._ctoc_map.append(ctoc_name_map)
  1459.  
  1460.     
  1461.     def _add_structured_ctoc_node(self, node, ctoc, title = None):
  1462.         if node.klass is None:
  1463.             return None
  1464.         t = node.klass is None if title is None else title
  1465.         t = self._clean_text_value(t)
  1466.         self._last_toc_entry = t
  1467.         ctoc_name_map = { }
  1468.         ctoc_name_map['klass'] = node.klass
  1469.         if node.klass == 'chapter':
  1470.             ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
  1471.             self._chapterCount += 1
  1472.         elif node.klass == 'periodical':
  1473.             ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
  1474.             for entry in self._ctoc_map:
  1475.                 if entry['klass'] == 'periodical':
  1476.                     ctoc_name_map['classOffset'] = entry['classOffset']
  1477.                     break
  1478.                     continue
  1479.             else:
  1480.                 ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
  1481.             self._periodicalCount += 1
  1482.         elif node.klass == 'section':
  1483.             ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
  1484.             for entry in self._ctoc_map:
  1485.                 if entry['klass'] == 'section':
  1486.                     ctoc_name_map['classOffset'] = entry['classOffset']
  1487.                     break
  1488.                     continue
  1489.             else:
  1490.                 ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
  1491.             self._sectionCount += 1
  1492.         elif node.klass == 'article':
  1493.             ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
  1494.             for entry in self._ctoc_map:
  1495.                 if entry['klass'] == 'article':
  1496.                     ctoc_name_map['classOffset'] = entry['classOffset']
  1497.                     break
  1498.                     continue
  1499.             else:
  1500.                 ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
  1501.             if node.description:
  1502.                 d = self._clean_text_value(node.description)
  1503.                 ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
  1504.             else:
  1505.                 ctoc_name_map['descriptionOffset'] = None
  1506.             if node.author:
  1507.                 a = self._clean_text_value(node.author)
  1508.                 ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
  1509.             else:
  1510.                 ctoc_name_map['authorOffset'] = None
  1511.             self._articleCount += 1
  1512.         else:
  1513.             raise NotImplementedError('writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % (node.title, node.klass, node.play_order))
  1514.         self._ctoc_map.append(ctoc_name_map)
  1515.  
  1516.     
  1517.     def _generate_ctoc(self):
  1518.         toc = self._oeb.toc
  1519.         reduced_toc = []
  1520.         self._ctoc_map = []
  1521.         self._last_toc_entry = None
  1522.         self._ctoc = StringIO()
  1523.         self._periodicalCount = 0
  1524.         self._sectionCount = 0
  1525.         self._articleCount = 0
  1526.         self._chapterCount = 0
  1527.         if self._conforming_periodical_toc:
  1528.             self._oeb.logger.info('Generating structured CTOC ...')
  1529.             for child in toc.iter():
  1530.                 if self.opts.verbose > 2:
  1531.                     self._oeb.logger.info('  %s' % child)
  1532.                 
  1533.                 self._add_structured_ctoc_node(child, self._ctoc)
  1534.             
  1535.         else:
  1536.             self._oeb.logger.info('Generating flat CTOC ...')
  1537.             previousOffset = -1
  1538.             currentOffset = 0
  1539.             for i, child in enumerate(toc.iterdescendants()):
  1540.                 if child.klass is None:
  1541.                     child.klass = 'chapter'
  1542.                 
  1543.                 if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1:
  1544.                     if self.opts.verbose > 2:
  1545.                         self._oeb.logger.info('adding (klass:%s depth:%d) %s to flat ctoc' % (child.klass, child.depth(), child))
  1546.                     
  1547.                     h = child.href
  1548.                     if h is None:
  1549.                         self._oeb.logger.warn('  Ignoring TOC entry with no href:', child.title)
  1550.                         continue
  1551.                     
  1552.                     if h not in self._id_offsets:
  1553.                         self._oeb.logger.warn('  Ignoring missing TOC entry:', unicode(child))
  1554.                         continue
  1555.                     
  1556.                     currentOffset = self._id_offsets[h]
  1557.                     if currentOffset != previousOffset:
  1558.                         self._add_flat_ctoc_node(child, self._ctoc)
  1559.                         reduced_toc.append(child)
  1560.                         previousOffset = currentOffset
  1561.                     else:
  1562.                         self._oeb.logger.warn("  Ignoring redundant href: %s in '%s'" % (h, child.title))
  1563.                 currentOffset != previousOffset
  1564.                 if self.opts.verbose > 2:
  1565.                     self._oeb.logger.info('skipping class: %s depth %d at position %d' % (child.klass, child.depth(), i))
  1566.                     continue
  1567.             
  1568.             self._oeb.toc.nodes = reduced_toc
  1569.         if not (self._periodicalCount) and not (self._sectionCount) or not (self._articleCount) or not (self.opts.mobi_periodical):
  1570.             mobiType = 2
  1571.         elif self._periodicalCount:
  1572.             pt = None
  1573.             if self._oeb.metadata.publication_type:
  1574.                 x = unicode(self._oeb.metadata.publication_type[0]).split(':')
  1575.                 if len(x) > 1:
  1576.                     pt = x[1]
  1577.                 
  1578.             
  1579.             mobiType = {
  1580.                 'newspaper': 257 }.get(pt, 259)
  1581.         else:
  1582.             raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
  1583.         self._MobiDoc = not (self.opts.mobi_periodical)(mobiType)
  1584.         if self.opts.verbose > 2:
  1585.             structType = 'book'
  1586.             if mobiType > 256:
  1587.                 structType = None if mobiType == 258 else 'structured periodical'
  1588.             
  1589.             self._oeb.logger.info('Instantiating a %s MobiDocument of type 0x%X' % (structType, mobiType))
  1590.             if mobiType > 256:
  1591.                 self._oeb.logger.info('periodicalCount: %d  sectionCount: %d  articleCount: %d' % (self._periodicalCount, self._sectionCount, self._articleCount))
  1592.             else:
  1593.                 self._oeb.logger.info('chapterCount: %d' % self._chapterCount)
  1594.         
  1595.         if True:
  1596.             rec_count = len(self._ctoc_records)
  1597.             None(self._oeb.logger.info % ('  CNCX utilization: %d %s %.0f%% full', rec_count + 1 if rec_count else 'record,', len(self._ctoc.getvalue()) / 655))
  1598.         
  1599.         return align_block(self._ctoc.getvalue())
  1600.  
  1601.     
  1602.     def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection):
  1603.         pos = 192 + indxt.tell()
  1604.         indices.write(pack('>H', pos))
  1605.         name = '%04X' % count
  1606.         indxt.write(chr(len(name)) + name)
  1607.         indxt.write(INDXT['periodical'])
  1608.         indxt.write(chr(1))
  1609.         indxt.write(decint(offset, DECINT_FORWARD))
  1610.         indxt.write(decint(length, DECINT_FORWARD))
  1611.         indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD))
  1612.         indxt.write(decint(0, DECINT_FORWARD))
  1613.         indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD))
  1614.         indxt.write(decint(firstSection, DECINT_FORWARD))
  1615.         indxt.write(decint(lastSection, DECINT_FORWARD))
  1616.         indxt.write(decint(0, DECINT_FORWARD))
  1617.  
  1618.     
  1619.     def _write_section_node(self, indxt, indices, myCtocMapIndex, index, offset, length, count, firstArticle, lastArticle, parentIndex):
  1620.         pos = 192 + indxt.tell()
  1621.         indices.write(pack('>H', pos))
  1622.         name = '%04X' % count
  1623.         indxt.write(chr(len(name)) + name)
  1624.         indxt.write(INDXT['section'])
  1625.         indxt.write(chr(0))
  1626.         indxt.write(decint(offset, DECINT_FORWARD))
  1627.         indxt.write(decint(length, DECINT_FORWARD))
  1628.         indxt.write(decint(self._ctoc_map[myCtocMapIndex]['titleOffset'], DECINT_FORWARD))
  1629.         indxt.write(decint(1, DECINT_FORWARD))
  1630.         indxt.write(decint(self._ctoc_map[myCtocMapIndex]['classOffset'], DECINT_FORWARD))
  1631.         indxt.write(decint(parentIndex, DECINT_FORWARD))
  1632.         indxt.write(decint(firstArticle, DECINT_FORWARD))
  1633.         indxt.write(decint(lastArticle, DECINT_FORWARD))
  1634.  
  1635.     
  1636.     def _write_article_node(self, indxt, indices, index, offset, length, count, parentIndex):
  1637.         pos = 192 + indxt.tell()
  1638.         indices.write(pack('>H', pos))
  1639.         name = '%04X' % count
  1640.         indxt.write(chr(len(name)) + name)
  1641.         indxt.write(INDXT['article'])
  1642.         hasAuthor = None if self._ctoc_map[index]['authorOffset'] else False
  1643.         hasDescription = None if self._ctoc_map[index]['descriptionOffset'] else False
  1644.         flagBits = 0
  1645.         if hasAuthor:
  1646.             flagBits |= 4
  1647.         
  1648.         if hasDescription:
  1649.             flagBits |= 2
  1650.         
  1651.         indxt.write(pack('>B', flagBits))
  1652.         indxt.write(decint(offset, DECINT_FORWARD))
  1653.         indxt.write(decint(length, DECINT_FORWARD))
  1654.         indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD))
  1655.         indxt.write(decint(2, DECINT_FORWARD))
  1656.         indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD))
  1657.         indxt.write(decint(parentIndex, DECINT_FORWARD))
  1658.         descriptionOffset = self._ctoc_map[index]['descriptionOffset']
  1659.         if descriptionOffset:
  1660.             indxt.write(decint(descriptionOffset, DECINT_FORWARD))
  1661.         
  1662.         authorOffset = self._ctoc_map[index]['authorOffset']
  1663.         if authorOffset:
  1664.             indxt.write(decint(authorOffset, DECINT_FORWARD))
  1665.         
  1666.  
  1667.     
  1668.     def _write_chapter_node(self, indxt, indices, index, offset, length, count):
  1669.         if self.opts.verbose > 2:
  1670.             pass
  1671.         
  1672.         pos = 192 + indxt.tell()
  1673.         indices.write(pack('>H', pos))
  1674.         name = '%04X' % count
  1675.         indxt.write(chr(len(name)) + name)
  1676.         indxt.write(INDXT['chapter'])
  1677.         indxt.write(decint(offset, DECINT_FORWARD))
  1678.         indxt.write(decint(length, DECINT_FORWARD))
  1679.         indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD))
  1680.         indxt.write(decint(0, DECINT_FORWARD))
  1681.  
  1682.     
  1683.     def _compute_offset_length(self, i, node, entries):
  1684.         h = node.href
  1685.         if h not in self._id_offsets:
  1686.             self._oeb.log.warning('Could not find TOC entry:', node.title)
  1687.             return (-1, -1)
  1688.         offset = self._id_offsets[h]
  1689.         length = None
  1690.         for sibling in entries[i + 1:]:
  1691.             h2 = sibling.href
  1692.             if h2 in self._id_offsets:
  1693.                 offset2 = self._id_offsets[h2]
  1694.                 if offset2 > offset:
  1695.                     length = offset2 - offset
  1696.                     break
  1697.                 
  1698.             offset2 > offset
  1699.         
  1700.         if length is None:
  1701.             length = self._content_length - offset
  1702.         
  1703.         return (offset, length)
  1704.  
  1705.     
  1706.     def _establish_document_structure(self):
  1707.         documentType = None
  1708.         
  1709.         try:
  1710.             klass = self._ctoc_map[0]['klass']
  1711.         except:
  1712.             klass = None
  1713.  
  1714.         if klass == 'chapter' or klass == None:
  1715.             documentType = 'book'
  1716.             if self.opts.verbose > 2:
  1717.                 self._oeb.logger.info('Adding a MobiBook to self._MobiDoc')
  1718.             
  1719.             self._MobiDoc.documentStructure = MobiBook()
  1720.         elif klass == 'periodical':
  1721.             documentType = klass
  1722.             if self.opts.verbose > 2:
  1723.                 self._oeb.logger.info('Adding a MobiPeriodical to self._MobiDoc')
  1724.             
  1725.             self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
  1726.             self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
  1727.         else:
  1728.             raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
  1729.         return klass == None
  1730.  
  1731.     
  1732.     def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc):
  1733.         sectionTitles = list(child.iter())[1:]
  1734.         sectionIndices = []
  1735.         sectionParents = []
  1736.         for j, section in enumerate(sectionTitles):
  1737.             if section.klass == 'periodical':
  1738.                 sectionIndices.append(currentSection)
  1739.                 if self.opts.verbose > 3:
  1740.                     self._oeb.logger.info('Periodical: %15.15s \tkls:%s \tdpt:%d  ply:%03d' % (section.title, section.klass, section.depth(), section.play_order))
  1741.                 
  1742.             self.opts.verbose > 3
  1743.             if section.klass == 'section':
  1744.                 myNewSection = myPeriodical.addSectionParent(myDoc, j)
  1745.                 sectionParents.append(myNewSection)
  1746.                 currentSection += 1
  1747.                 sectionIndices.append(currentSection)
  1748.                 if self.opts.verbose > 3:
  1749.                     self._oeb.logger.info('   Section: %15.15s \tkls:%s \tdpt:%d  ply:%03d \tindex:%d' % (section.title, section.klass, section.depth(), section.play_order, j))
  1750.                 
  1751.             self.opts.verbose > 3
  1752.             if section.klass == 'article':
  1753.                 sectionIndices.append(currentSection)
  1754.                 continue
  1755.             if self.opts.verbose > 3:
  1756.                 self._oeb.logger.info(' Unrecognized class %s in structured document' % section.klass)
  1757.                 continue
  1758.         
  1759.         return (sectionIndices, sectionParents)
  1760.  
  1761.     
  1762.     def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents):
  1763.         sectionArticles = list(section.iter())[1:]
  1764.         for j, article in enumerate(sectionArticles):
  1765.             (offset, length) = self._compute_offset_length(i, article, entries)
  1766.             if self.opts.verbose > 2:
  1767.                 self._oeb.logger.info('article %02d: offset = 0x%06X length = 0x%06X' % (j, offset, length))
  1768.             
  1769.             ctoc_map_index = i + j + 1
  1770.             mySectionParent = sectionParents[sectionIndices[i - 1]]
  1771.             myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index)
  1772.             mySectionParent.addArticle(myNewArticle)
  1773.         
  1774.  
  1775.     
  1776.     def _add_book_chapters(self, myDoc, indxt, indices):
  1777.         chapterCount = myDoc.documentStructure.chapterCount()
  1778.         if self.opts.verbose > 3:
  1779.             self._oeb.logger.info('Writing %d chapters for mobitype 0x%03X' % (chapterCount, myDoc.mobiType))
  1780.         
  1781.         for c, chapter in enumerate(list(myDoc.documentStructure.chapters)):
  1782.             index = chapter.myCtocMapIndex
  1783.             self._write_chapter_node(indxt, indices, index, chapter.startAddress, chapter.length, c)
  1784.             last_name = '%04X' % c
  1785.         
  1786.         return (last_name, c)
  1787.  
  1788.     
  1789.     def _add_periodical_flat_articles(self, myDoc, indxt, indices):
  1790.         sectionParent = myDoc.documentStructure.sectionParents[0]
  1791.         articleCount = len(sectionParent.articles)
  1792.         if self.opts.verbose > 3:
  1793.             self._oeb.logger.info('Writing %d articles for mobitype 0x%03X' % (articleCount, myDoc.mobiType))
  1794.         
  1795.         index = 0
  1796.         offset = myDoc.documentStructure.startAddress
  1797.         length = myDoc.documentStructure.length
  1798.         c = 0
  1799.         firstSection = myDoc.documentStructure.firstSectionIndex
  1800.         lastSection = myDoc.documentStructure.lastSectionIndex
  1801.         self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection)
  1802.         index += 1
  1803.         offset = sectionParent.startAddress
  1804.         length = sectionParent.sectionLength
  1805.         c += 1
  1806.         firstArticle = sectionParent.firstArticleIndex
  1807.         lastArticle = sectionParent.lastArticleIndex
  1808.         parentIndex = sectionParent.parentIndex
  1809.         self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, index, offset, length, c, firstArticle, lastArticle, parentIndex)
  1810.         last_name = '%04X' % c
  1811.         for i, article in enumerate(list(sectionParent.articles)):
  1812.             index = article.myCtocMapIndex
  1813.             offset = article.startAddress
  1814.             length = article.articleLength
  1815.             c += 1
  1816.             parentIndex = article.sectionParentIndex
  1817.             self._write_article_node(indxt, indices, index, offset, length, c, parentIndex)
  1818.         
  1819.         last_name = '%04X' % c
  1820.         return (last_name, c)
  1821.  
  1822.     
  1823.     def _add_periodical_structured_articles(self, myDoc, indxt, indices):
  1824.         if self.opts.verbose > 2:
  1825.             self._oeb.logger.info('Writing NCXEntries for mobiType 0x%03X' % myDoc.mobiType)
  1826.         
  1827.         sectionParent = myDoc.documentStructure.sectionParents[0]
  1828.         index = 0
  1829.         offset = myDoc.documentStructure.startAddress
  1830.         length = myDoc.documentStructure.length
  1831.         c = 0
  1832.         firstSection = myDoc.documentStructure.firstSectionIndex
  1833.         lastSection = myDoc.documentStructure.lastSectionIndex
  1834.         self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection)
  1835.         sectionCount = firstSection
  1836.         while sectionCount <= lastSection:
  1837.             sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1]
  1838.             offset = sectionParent.startAddress
  1839.             length = sectionParent.sectionLength
  1840.             c += 1
  1841.             firstArticle = sectionParent.firstArticleIndex
  1842.             lastArticle = sectionParent.lastArticleIndex
  1843.             parentIndex = sectionParent.parentIndex
  1844.             self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, sectionCount, offset, length, c, firstArticle, lastArticle, parentIndex)
  1845.             sectionCount += 1
  1846.         sectionCount = firstSection
  1847.         while sectionCount <= lastSection:
  1848.             sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1]
  1849.             last_name = '%04X' % c
  1850.             for i, article in enumerate(list(sectionParent.articles)):
  1851.                 if self.opts.verbose > 3:
  1852.                     self._oeb.logger.info('Adding section:article %d:%02d' % (sectionParent.myIndex, i))
  1853.                 
  1854.                 index = article.myCtocMapIndex
  1855.                 offset = article.startAddress
  1856.                 length = article.articleLength
  1857.                 c += 1
  1858.                 parentIndex = article.sectionParentIndex
  1859.                 self._write_article_node(indxt, indices, index, offset, length, c, parentIndex)
  1860.                 last_name = '%04X' % c
  1861.             
  1862.             sectionCount += 1
  1863.         return (last_name, c)
  1864.  
  1865.     
  1866.     def _generate_indxt(self):
  1867.         documentType = 'unknown'
  1868.         sectionIndices = []
  1869.         sectionParents = []
  1870.         currentSection = 0
  1871.         toc = self._oeb.toc
  1872.         indxt = StringIO()
  1873.         indices = StringIO()
  1874.         c = 0
  1875.         indices.write('IDXT')
  1876.         c = 0
  1877.         last_name = None
  1878.         documentType = self._establish_document_structure()
  1879.         myDoc = self._MobiDoc
  1880.         nodes = list(toc.iter())[0:1]
  1881.         for i, child in enumerate(nodes):
  1882.             if documentType == 'periodical':
  1883.                 myPeriodical = myDoc.documentStructure
  1884.                 if self.opts.verbose > 3:
  1885.                     self._oeb.logger.info('\nDocument: %s \tkls:%s \tdpt:%d  ply:%03d' % (child.title, child.klass, child.depth(), child.play_order))
  1886.                 
  1887.                 (sectionIndices, sectionParents) = self._generate_section_indices(child, currentSection, myPeriodical, myDoc)
  1888.                 continue
  1889.             if documentType == 'book':
  1890.                 myBook = myDoc.documentStructure
  1891.                 if self.opts.verbose > 3:
  1892.                     self._oeb.logger.info('\nBook: %-19.19s \tkls:%s \tdpt:%d  ply:%03d' % (child.title, child.klass, child.depth(), child.play_order))
  1893.                 
  1894.             self.opts.verbose > 3
  1895.             if self.opts.verbose > 3:
  1896.                 self._oeb.logger.info('unknown document type %12.12s \tdepth:%d' % (child.title, child.depth()))
  1897.                 continue
  1898.         
  1899.         entries = list(toc.iter())[1:]
  1900.         for i, child in enumerate(entries):
  1901.             if not (child.title) or not child.title.strip():
  1902.                 continue
  1903.             
  1904.             (offset, length) = self._compute_offset_length(i, child, entries)
  1905.             if (child.klass == 'chapter' or not (self.opts.mobi_periodical)) and child.klass == 'article':
  1906.                 myNewChapter = MobiChapter(myDoc.getNextNode(), offset, length, i)
  1907.                 myBook.addChapter(myNewChapter)
  1908.                 
  1909.                 try:
  1910.                     if self.opts.verbose > 3:
  1911.                         self._oeb.logger.info('  Chapter: %-14.14s \tcls:%s \tdpt:%d  ply:%03d \toff:0x%X \t:len0x%X' % (child.title, child.klass, child.depth(), child.play_order, offset, length))
  1912.                 if self.opts.verbose > 3:
  1913.                     self._oeb.logger.info('  Chapter: %-14.14s \tclass:%s \tdepth:%d  playOrder:%03d \toff:0x%X \t:len0x%X' % ('(bad string)', child.klass, child.depth(), child.play_order, offset, length))
  1914.                 
  1915.  
  1916.                 continue
  1917.             self.opts.verbose > 3
  1918.             if child.klass == 'section' and self.opts.mobi_periodical:
  1919.                 if self.opts.verbose > 3:
  1920.                     self._oeb.logger.info('\n  Section: %-15.15s \tkls:%s \tdpt:%d  ply:%03d' % (child.title, child.klass, child.depth(), child.play_order))
  1921.                 
  1922.                 self._generate_section_article_indices(i, child, entries, sectionIndices, sectionParents)
  1923.                 continue
  1924.         
  1925.         if self.opts.verbose > 3:
  1926.             self._oeb.logger.info('')
  1927.         
  1928.         mobiType = myDoc.mobiType
  1929.         if self.opts.verbose > 3:
  1930.             self._MobiDoc.dumpInfo()
  1931.         
  1932.         if mobiType == 2:
  1933.             (last_name, c) = self._add_book_chapters(myDoc, indxt, indices)
  1934.         elif mobiType == 258 and myDoc.documentStructure.sectionCount() == 1:
  1935.             (last_name, c) = self._add_periodical_flat_articles(myDoc, indxt, indices)
  1936.         else:
  1937.             (last_name, c) = self._add_periodical_structured_articles(myDoc, indxt, indices)
  1938.         return (align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name)
  1939.  
  1940.  
  1941.  
  1942. class HTMLRecordData(object):
  1943.     
  1944.     def __init__(self):
  1945.         self._continuingNode = -1
  1946.         self._continuingNodeParent = -1
  1947.         self._openingNode = -1
  1948.         self._openingNodeParent = -1
  1949.         self._currentSectionNodeCount = -1
  1950.         self._nextSectionNumber = -1
  1951.         self._nextSectionOpeningNode = -1
  1952.         self._nextSectionNodeCount = -1
  1953.  
  1954.     
  1955.     def getContinuingNode(self):
  1956.         return self._continuingNode
  1957.  
  1958.     
  1959.     def setContinuingNode(self, value):
  1960.         self._continuingNode = value
  1961.  
  1962.     continuingNode = property(getContinuingNode, setContinuingNode, None, None)
  1963.     
  1964.     def getContinuingNodeParent(self):
  1965.         return self._continuingNodeParent
  1966.  
  1967.     
  1968.     def setContinuingNodeParent(self, value):
  1969.         self._continuingNodeParent = value
  1970.  
  1971.     continuingNodeParent = property(getContinuingNodeParent, setContinuingNodeParent, None, None)
  1972.     
  1973.     def getOpeningNode(self):
  1974.         return self._openingNode
  1975.  
  1976.     
  1977.     def setOpeningNode(self, value):
  1978.         self._openingNode = value
  1979.  
  1980.     openingNode = property(getOpeningNode, setOpeningNode, None, None)
  1981.     
  1982.     def getOpeningNodeParent(self):
  1983.         return self._openingNodeParent
  1984.  
  1985.     
  1986.     def setOpeningNodeParent(self, value):
  1987.         self._openingNodeParent = value
  1988.  
  1989.     openingNodeParent = property(getOpeningNodeParent, setOpeningNodeParent, None, None)
  1990.     
  1991.     def getCurrentSectionNodeCount(self):
  1992.         return self._currentSectionNodeCount
  1993.  
  1994.     
  1995.     def setCurrentSectionNodeCount(self, value):
  1996.         self._currentSectionNodeCount = value
  1997.  
  1998.     currentSectionNodeCount = property(getCurrentSectionNodeCount, setCurrentSectionNodeCount, None, None)
  1999.     
  2000.     def getNextSectionNumber(self):
  2001.         return self._nextSectionNumber
  2002.  
  2003.     
  2004.     def setNextSectionNumber(self, value):
  2005.         self._nextSectionNumber = value
  2006.  
  2007.     nextSectionNumber = property(getNextSectionNumber, setNextSectionNumber, None, None)
  2008.     
  2009.     def getNextSectionOpeningNode(self):
  2010.         return self._nextSectionOpeningNode
  2011.  
  2012.     
  2013.     def setNextSectionOpeningNode(self, value):
  2014.         self._nextSectionOpeningNode = value
  2015.  
  2016.     nextSectionOpeningNode = property(getNextSectionOpeningNode, setNextSectionOpeningNode, None, None)
  2017.     
  2018.     def getNextSectionNodeCount(self):
  2019.         return self._nextSectionNodeCount
  2020.  
  2021.     
  2022.     def setNextSectionNodeCount(self, value):
  2023.         self._nextSectionNodeCount = value
  2024.  
  2025.     nextSectionNodeCount = property(getNextSectionNodeCount, setNextSectionNodeCount, None, None)
  2026.     
  2027.     def dumpData(self, recordNumber, oeb):
  2028.         oeb.logger.info('---  Summary of HTML Record 0x%x [%d] indexing  ---' % (recordNumber, recordNumber))
  2029.         oeb.logger.info('            continuingNode: %03d' % self.continuingNode)
  2030.         oeb.logger.info('      continuingNodeParent: %03d' % self.continuingNodeParent)
  2031.         oeb.logger.info('               openingNode: %03d' % self.openingNode)
  2032.         oeb.logger.info('         openingNodeParent: %03d' % self.openingNodeParent)
  2033.         oeb.logger.info('   currentSectionNodeCount: %03d' % self.currentSectionNodeCount)
  2034.         oeb.logger.info('         nextSectionNumber: %03d' % self.nextSectionNumber)
  2035.         oeb.logger.info('    nextSectionOpeningNode: %03d' % self.nextSectionOpeningNode)
  2036.         oeb.logger.info('      nextSectionNodeCount: %03d' % self.nextSectionNodeCount)
  2037.  
  2038.  
  2039.  
  2040. class MobiDocument(object):
  2041.     _nextNode = -1
  2042.     
  2043.     def __init__(self, mobitype):
  2044.         self._mobitype = mobitype
  2045.         self._documentStructure = None
  2046.  
  2047.     
  2048.     def getMobiType(self):
  2049.         return self._mobitype
  2050.  
  2051.     
  2052.     def setMobiType(self, value):
  2053.         self._mobitype = value
  2054.  
  2055.     mobiType = property(getMobiType, setMobiType, None, None)
  2056.     
  2057.     def getDocumentStructure(self):
  2058.         return self._documentStructure
  2059.  
  2060.     
  2061.     def setDocumentStructure(self, value):
  2062.         self._documentStructure = value
  2063.  
  2064.     documentStructure = property(getDocumentStructure, setDocumentStructure, None, None)
  2065.     
  2066.     def getNextNode(self):
  2067.         self._nextNode += 1
  2068.         return self._nextNode
  2069.  
  2070.     
  2071.     def dumpInfo(self):
  2072.         self._documentStructure.dumpInfo()
  2073.  
  2074.  
  2075.  
  2076. class MobiBook(object):
  2077.     
  2078.     def __init__(self):
  2079.         self._chapters = []
  2080.  
  2081.     
  2082.     def chapterCount(self):
  2083.         return len(self._chapters)
  2084.  
  2085.     
  2086.     def getChapters(self):
  2087.         return self._chapters
  2088.  
  2089.     
  2090.     def setChapters(self, value):
  2091.         self._chapters = value
  2092.  
  2093.     chapters = property(getChapters, setChapters, None, None)
  2094.     
  2095.     def addChapter(self, value):
  2096.         self._chapters.append(value)
  2097.  
  2098.     
  2099.     def dumpInfo(self):
  2100.         print '%20s:' % 'Book'
  2101.         print '%20s: %d' % ('Number of chapters', len(self._chapters))
  2102.         for count, chapter in enumerate(self._chapters):
  2103.             print '%20s: %d' % ('myCtocMapIndex', chapter.myCtocMapIndex)
  2104.             print '%20s: %d' % ('Chapter', count)
  2105.             print '%20s: 0x%X' % ('startAddress', chapter.startAddress)
  2106.             print '%20s: 0x%X' % ('length', chapter.length)
  2107.             print 
  2108.         
  2109.  
  2110.  
  2111.  
  2112. class MobiChapter(object):
  2113.     
  2114.     def __init__(self, myIndex, startAddress, length, ctoc_map_index):
  2115.         self._myIndex = myIndex
  2116.         self._startAddress = startAddress
  2117.         self._length = length
  2118.         self._myCtocMapIndex = ctoc_map_index
  2119.  
  2120.     
  2121.     def getMyCtocMapIndex(self):
  2122.         return self._myCtocMapIndex
  2123.  
  2124.     
  2125.     def setMyCtocMapIndex(self, value):
  2126.         self._myCtocMapIndex = value
  2127.  
  2128.     myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
  2129.     
  2130.     def getMyIndex(self):
  2131.         return self._myIndex
  2132.  
  2133.     myIndex = property(getMyIndex, None, None, None)
  2134.     
  2135.     def getStartAddress(self):
  2136.         return self._startAddress
  2137.  
  2138.     
  2139.     def setStartAddress(self, value):
  2140.         self._startAddress = value
  2141.  
  2142.     startAddress = property(getStartAddress, setStartAddress, None, None)
  2143.     
  2144.     def getLength(self):
  2145.         return self._length
  2146.  
  2147.     
  2148.     def setLength(self, value):
  2149.         self._length = value
  2150.  
  2151.     length = property(getLength, setLength, None, None)
  2152.  
  2153.  
  2154. class MobiPeriodical(object):
  2155.     
  2156.     def __init__(self, myIndex):
  2157.         self._myIndex = myIndex
  2158.         self._sectionParents = []
  2159.         self._startAddress = 0xFFFFFFFFL
  2160.         self._length = 0xFFFFFFFFL
  2161.         self._firstSectionIndex = 0xFFFFFFFFL
  2162.         self._lastSectionIndex = 0xFFFFFFFFL
  2163.         self._myCtocMapIndex = 0
  2164.  
  2165.     
  2166.     def getMyIndex(self):
  2167.         return self._myIndex
  2168.  
  2169.     
  2170.     def setMyIndex(self, value):
  2171.         self._myIndex = value
  2172.  
  2173.     myIndex = property(getMyIndex, setMyIndex, None, None)
  2174.     
  2175.     def getSectionParents(self):
  2176.         return self._sectionParents
  2177.  
  2178.     
  2179.     def setSectionParents(self, value):
  2180.         self._sectionParents = value
  2181.  
  2182.     sectionParents = property(getSectionParents, setSectionParents, None, None)
  2183.     
  2184.     def sectionCount(self):
  2185.         return len(self._sectionParents)
  2186.  
  2187.     
  2188.     def getStartAddress(self):
  2189.         return self._startAddress
  2190.  
  2191.     
  2192.     def setStartAddress(self, value):
  2193.         self._startAddress = value
  2194.  
  2195.     startAddress = property(getStartAddress, setStartAddress, None, None)
  2196.     
  2197.     def getLength(self):
  2198.         return self._length
  2199.  
  2200.     
  2201.     def setLength(self, value):
  2202.         self._length = value
  2203.  
  2204.     length = property(getLength, setLength, None, None)
  2205.     
  2206.     def getFirstSectionIndex(self):
  2207.         return self._firstSectionIndex
  2208.  
  2209.     
  2210.     def setFirstSectionIndex(self, value):
  2211.         self._firstSectionIndex = value
  2212.  
  2213.     firstSectionIndex = property(getFirstSectionIndex, setFirstSectionIndex, None, None)
  2214.     
  2215.     def getLastSectionIndex(self):
  2216.         return self._lastSectionIndex
  2217.  
  2218.     
  2219.     def setLastSectionIndex(self, value):
  2220.         self._lastSectionIndex = value
  2221.  
  2222.     lastSectionIndex = property(getLastSectionIndex, setLastSectionIndex, None, None)
  2223.     
  2224.     def getMyCtocMapIndex(self):
  2225.         return self._myCtocMapIndex
  2226.  
  2227.     
  2228.     def setMyCtocMapIndex(self, value):
  2229.         self._myCtocMapIndex = value
  2230.  
  2231.     myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
  2232.     
  2233.     def addSectionParent(self, myIndex, ctoc_map_index):
  2234.         newSection = MobiSection(myIndex)
  2235.         newSection.parentIndex = self._myIndex
  2236.         newSection.sectionIndex = len(self._sectionParents)
  2237.         newSection.myCtocMapIndex = ctoc_map_index
  2238.         self._sectionParents.append(newSection)
  2239.         return newSection
  2240.  
  2241.     
  2242.     def dumpInfo(self):
  2243.         print '%20s:' % 'Periodical'
  2244.         print '%20s: 0x%X' % ('myIndex', self.myIndex)
  2245.         print '%20s: 0x%X' % ('startAddress', self.startAddress)
  2246.         print '%20s: 0x%X' % ('length', self.length)
  2247.         print '%20s: 0x%X' % ('myCtocMapIndex', self.myCtocMapIndex)
  2248.         print '%20s: 0x%X' % ('firstSectionIndex', self.firstSectionIndex)
  2249.         print '%20s: 0x%X' % ('lastSectionIndex', self.lastSectionIndex)
  2250.         print '%20s: %d' % ('Number of Sections', len(self._sectionParents))
  2251.         for count, section in enumerate(self._sectionParents):
  2252.             print '\t%20s: %d' % ('Section', count)
  2253.             print '\t%20s: 0x%X' % ('startAddress', section.startAddress)
  2254.             print '\t%20s: 0x%X' % ('length', section.sectionLength)
  2255.             print '\t%20s: 0x%X' % ('parentIndex', section.parentIndex)
  2256.             print '\t%20s: 0x%X' % ('myIndex', section.myIndex)
  2257.             print '\t%20s: 0x%X' % ('firstArticleIndex', section.firstArticleIndex)
  2258.             print '\t%20s: 0x%X' % ('lastArticleIndex', section.lastArticleIndex)
  2259.             print '\t%20s: 0x%X' % ('articles', len(section.articles))
  2260.             print '\t%20s: 0x%X' % ('myCtocMapIndex', section.myCtocMapIndex)
  2261.             print 
  2262.             for artCount, article in enumerate(section.articles):
  2263.                 print '\t\t%20s: %d' % ('Article', artCount)
  2264.                 print '\t\t%20s: 0x%X' % ('startAddress', article.startAddress)
  2265.                 print '\t\t%20s: 0x%X' % ('length', article.articleLength)
  2266.                 print '\t\t%20s: 0x%X' % ('sectionIndex', article.sectionParentIndex)
  2267.                 print '\t\t%20s: 0x%X' % ('myIndex', article.myIndex)
  2268.                 print '\t\t%20s: 0x%X' % ('myCtocMapIndex', article.myCtocMapIndex)
  2269.                 print 
  2270.             
  2271.         
  2272.  
  2273.  
  2274.  
  2275. class MobiSection(object):
  2276.     
  2277.     def __init__(self, myMobiDoc):
  2278.         self._myMobiDoc = myMobiDoc
  2279.         self._myIndex = myMobiDoc.getNextNode()
  2280.         self._parentIndex = 0xFFFFFFFFL
  2281.         self._firstArticleIndex = 0
  2282.         self._lastArticleIndex = 0
  2283.         self._startAddress = 0xFFFFFFFFL
  2284.         self._sectionLength = 0xFFFFFFFFL
  2285.         self._articles = []
  2286.         self._myCtocMapIndex = -1
  2287.  
  2288.     
  2289.     def getMyMobiDoc(self):
  2290.         return self._myMobiDoc
  2291.  
  2292.     
  2293.     def setMyMobiDoc(self, value):
  2294.         self._myMobiDoc = value
  2295.  
  2296.     myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None)
  2297.     
  2298.     def getMyIndex(self):
  2299.         return self._myIndex
  2300.  
  2301.     
  2302.     def setMyIndex(self, value):
  2303.         self._myIndex = value
  2304.  
  2305.     myIndex = property(getMyIndex, setMyIndex, None, None)
  2306.     
  2307.     def getParentIndex(self):
  2308.         return self._parentIndex
  2309.  
  2310.     
  2311.     def setParentIndex(self, value):
  2312.         self._parentIndex = value
  2313.  
  2314.     parenIndex = property(getParentIndex, setParentIndex, None, None)
  2315.     
  2316.     def getFirstArticleIndex(self):
  2317.         return self._firstArticleIndex
  2318.  
  2319.     
  2320.     def setFirstArticleIndex(self, value):
  2321.         self._firstArticleIndex = value
  2322.  
  2323.     firstArticleIndex = property(getFirstArticleIndex, setFirstArticleIndex, None, None)
  2324.     
  2325.     def getLastArticleIndex(self):
  2326.         return self._lastArticleIndex
  2327.  
  2328.     
  2329.     def setLastArticleIndex(self, value):
  2330.         self._lastArticleIndex = value
  2331.  
  2332.     lastArticleIndex = property(getLastArticleIndex, setLastArticleIndex, None, None)
  2333.     
  2334.     def getStartAddress(self):
  2335.         return self._startAddress
  2336.  
  2337.     
  2338.     def setStartAddress(self, value):
  2339.         self._startAddress = value
  2340.  
  2341.     startAddress = property(getStartAddress, setStartAddress, None, None)
  2342.     
  2343.     def getSectionLength(self):
  2344.         return self._sectionLength
  2345.  
  2346.     
  2347.     def setSectionLength(self, value):
  2348.         self._sectionLength = value
  2349.  
  2350.     sectionLength = property(getSectionLength, setSectionLength, None, None)
  2351.     
  2352.     def getArticles(self):
  2353.         return self._articles
  2354.  
  2355.     
  2356.     def setArticles(self, value):
  2357.         self._articles = value
  2358.  
  2359.     articles = property(getArticles, setArticles, None, None)
  2360.     
  2361.     def getMyCtocMapIndex(self):
  2362.         return self._myCtocMapIndex
  2363.  
  2364.     
  2365.     def setMyCtocMapIndex(self, value):
  2366.         self._myCtocMapIndex = value
  2367.  
  2368.     myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
  2369.     
  2370.     def addArticle(self, article):
  2371.         self._articles.append(article)
  2372.         self.myMobiDoc.documentStructure.lastSectionIndex = self.myIndex
  2373.  
  2374.  
  2375.  
  2376. class MobiArticle(object):
  2377.     
  2378.     def __init__(self, sectionParent, startAddress, length, ctocMapIndex):
  2379.         self._mySectionParent = sectionParent
  2380.         self._myMobiDoc = sectionParent.myMobiDoc
  2381.         self._myIndex = sectionParent.myMobiDoc.getNextNode()
  2382.         self._myCtocMapIndex = ctocMapIndex
  2383.         self._sectionParentIndex = sectionParent.myIndex
  2384.         self._startAddress = startAddress
  2385.         self._articleLength = length
  2386.  
  2387.     
  2388.     def getMySectionParent(self):
  2389.         return self._mySectionParent
  2390.  
  2391.     
  2392.     def setMySectionParent(self, value):
  2393.         self._mySectionParent = value
  2394.  
  2395.     mySectionParent = property(getMySectionParent, setMySectionParent, None, None)
  2396.     
  2397.     def getMyMobiDoc(self):
  2398.         return self._myMobiDoc
  2399.  
  2400.     
  2401.     def setMyMobiDoc(self, value):
  2402.         self._myMobiDoc = value
  2403.  
  2404.     myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None)
  2405.     
  2406.     def getMyIndex(self):
  2407.         return self._myIndex
  2408.  
  2409.     
  2410.     def setMyIndex(self, value):
  2411.         self._sectionIndex = value
  2412.  
  2413.     myIndex = property(getMyIndex, setMyIndex, None, None)
  2414.     
  2415.     def getSectionParentIndex(self):
  2416.         return self._sectionParentIndex
  2417.  
  2418.     
  2419.     def setSectionParentIndex(self, value):
  2420.         self._sectionParentIndex = value
  2421.  
  2422.     sectionParentIndex = property(getSectionParentIndex, setSectionParentIndex, None, None)
  2423.     
  2424.     def getStartAddress(self):
  2425.         return self._startAddress
  2426.  
  2427.     
  2428.     def setStartAddress(self, value):
  2429.         self._startAddress = value
  2430.  
  2431.     startAddress = property(getStartAddress, setStartAddress, None, None)
  2432.     
  2433.     def getArticleLength(self):
  2434.         return self._articleLength
  2435.  
  2436.     
  2437.     def setArticleLength(self, value):
  2438.         self._articleLength = value
  2439.  
  2440.     articleLength = property(getArticleLength, setArticleLength, None, None)
  2441.     
  2442.     def getMyCtocMapIndex(self):
  2443.         return self._myCtocMapIndex
  2444.  
  2445.     
  2446.     def setMyCtocMapIndex(self, value):
  2447.         self._myCtocMapIndex = value
  2448.  
  2449.     myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
  2450.  
  2451.