home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_908 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  31.0 KB  |  1,099 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. from __future__ import with_statement
  5. __license__ = 'GPL v3'
  6. __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> and Marshall T. Vandegrift <llasram@gmail.com>'
  7. import struct
  8. import os
  9. import functools
  10. import re
  11. from urlparse import urldefrag
  12. from cStringIO import StringIO
  13. from urllib import unquote as urlunquote
  14. from calibre.ebooks.lit import LitError
  15. from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
  16. import calibre.ebooks.lit.mssha1 as mssha1
  17. from calibre.ebooks.oeb.base import urlnormalize
  18. from calibre.ebooks.oeb.reader import OEBReader
  19. from calibre.ebooks import DRMError
  20. from calibre import plugins
  21. (lzx, lxzerror) = plugins['lzx']
  22. (msdes, msdeserror) = plugins['msdes']
  23. __all__ = [
  24.     'LitReader']
  25. XML_DECL = '<?xml version="1.0" encoding="UTF-8" ?>\n'
  26. OPF_DECL = '<?xml version="1.0" encoding="UTF-8" ?>\n<!DOCTYPE package\n  PUBLIC "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Package//EN"\n  "http://openebook.org/dtds/oeb-1.0.1/oebpkg101.dtd">\n'
  27. HTML_DECL = '<?xml version="1.0" encoding="UTF-8" ?>\n<!DOCTYPE html PUBLIC\n "+//ISBN 0-9673008-1-9//DTD OEB 1.0.1 Document//EN"\n "http://openebook.org/dtds/oeb-1.0.1/oebdoc101.dtd">\n'
  28. DESENCRYPT_GUID = '{67F6E4A2-60BF-11D3-8540-00C04F58C3CF}'
  29. LZXCOMPRESS_GUID = '{0A9007C6-4076-11D3-8789-0000F8105754}'
  30. CONTROL_TAG = 4
  31. CONTROL_WINDOW_SIZE = 12
  32. RESET_NENTRIES = 4
  33. RESET_HDRLEN = 12
  34. RESET_UCLENGTH = 16
  35. RESET_INTERVAL = 32
  36. FLAG_OPENING = 1 << 0
  37. FLAG_CLOSING = 1 << 1
  38. FLAG_BLOCK = 1 << 2
  39. FLAG_HEAD = 1 << 3
  40. FLAG_ATOM = 1 << 4
  41.  
  42. def u32(bytes):
  43.     return struct.unpack('<L', bytes[:4])[0]
  44.  
  45.  
  46. def u16(bytes):
  47.     return struct.unpack('<H', bytes[:2])[0]
  48.  
  49.  
  50. def int32(bytes):
  51.     return struct.unpack('<l', bytes[:4])[0]
  52.  
  53.  
  54. def encint(bytes, remaining):
  55.     (pos, val) = (0, 0)
  56.     while remaining > 0:
  57.         b = ord(bytes[pos])
  58.         pos += 1
  59.         remaining -= 1
  60.         val <<= 7
  61.         val |= b & 127
  62.         if b & 128 == 0:
  63.             break
  64.             continue
  65.     return (val, bytes[pos:], remaining)
  66.  
  67.  
  68. def msguid(bytes):
  69.     values = struct.unpack('<LHHBBBBBBBB', bytes[:16])
  70.     return '{%08lX-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X}' % values
  71.  
  72.  
  73. def read_utf8_char(bytes, pos):
  74.     c = ord(bytes[pos])
  75.     mask = 128
  76.     if c & mask:
  77.         elsize = 0
  78.         while c & mask:
  79.             mask >>= 1
  80.             elsize += 1
  81.         if mask <= 1 or mask == 64:
  82.             raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos]))
  83.         mask == 64
  84.     else:
  85.         elsize = 1
  86.     if elsize > 1:
  87.         if elsize + pos > len(bytes):
  88.             raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos]))
  89.         elsize + pos > len(bytes)
  90.         c &= mask - 1
  91.         for i in xrange(1, elsize):
  92.             b = ord(bytes[pos + i])
  93.             if b & 192 != 128:
  94.                 raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos:pos + i]))
  95.             b & 192 != 128
  96.             c = c << 6 | b & 63
  97.         
  98.     
  99.     return (unichr(c), pos + elsize)
  100.  
  101.  
  102. def consume_sized_utf8_string(bytes, zpad = False):
  103.     result = []
  104.     (slen, pos) = read_utf8_char(bytes, 0)
  105.     for i in xrange(ord(slen)):
  106.         (char, pos) = read_utf8_char(bytes, pos)
  107.         result.append(char)
  108.     
  109.     if zpad and bytes[pos] == '\x00':
  110.         pos += 1
  111.     
  112.     return (u''.join(result), bytes[pos:])
  113.  
  114.  
  115. def encode(string):
  116.     return unicode(string).encode('ascii', 'xmlcharrefreplace')
  117.  
  118.  
  119. class UnBinary(object):
  120.     AMPERSAND_RE = re.compile('&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
  121.     OPEN_ANGLE_RE = re.compile('<<(?![!]--)')
  122.     CLOSE_ANGLE_RE = re.compile('(?<!--)>>(?=>>|[^>])')
  123.     DOUBLE_ANGLE_RE = re.compile('([<>])\\1')
  124.     EMPTY_ATOMS = ({ }, { })
  125.     
  126.     def __init__(self, bin, path, manifest = { }, map = HTML_MAP, atoms = EMPTY_ATOMS):
  127.         self.manifest = manifest
  128.         (self.tag_map, self.attr_map, self.tag_to_attr_map) = map
  129.         self.is_html = map is HTML_MAP
  130.         (self.tag_atoms, self.attr_atoms) = atoms
  131.         self.dir = os.path.dirname(path)
  132.         buf = StringIO()
  133.         self.binary_to_text(bin, buf)
  134.         self.raw = buf.getvalue().lstrip()
  135.         self.escape_reserved()
  136.         self._tree = None
  137.  
  138.     
  139.     def escape_reserved(self):
  140.         raw = self.raw
  141.         raw = self.AMPERSAND_RE.sub('&', raw)
  142.         raw = self.OPEN_ANGLE_RE.sub('<', raw)
  143.         raw = self.CLOSE_ANGLE_RE.sub('>', raw)
  144.         raw = self.DOUBLE_ANGLE_RE.sub('\\1', raw)
  145.         self.raw = raw
  146.  
  147.     
  148.     def item_path(self, internal_id):
  149.         
  150.         try:
  151.             target = self.manifest[internal_id].path
  152.         except KeyError:
  153.             return internal_id
  154.  
  155.         if not self.dir:
  156.             return target
  157.         target = target.split('/')
  158.         base = self.dir.split('/')
  159.         for index in xrange(min(len(base), len(target))):
  160.             if base[index] != target[index]:
  161.                 break
  162.                 continue
  163.             self.dir
  164.         else:
  165.             index += 1
  166.         relpath = [
  167.             '..'] * (len(base) - index) + target[index:]
  168.         return '/'.join(relpath)
  169.  
  170.     
  171.     def __unicode__(self):
  172.         return self.raw.decode('utf-8')
  173.  
  174.     
  175.     def __str__(self):
  176.         return self.raw
  177.  
  178.     
  179.     def binary_to_text(self, bin, buf, index = 0, depth = 0):
  180.         tag_name = None
  181.         current_map = None
  182.         dynamic_tag = errors = 0
  183.         in_censorship = is_goingdown = False
  184.         state = 'text'
  185.         flags = 0
  186.         while index < len(bin):
  187.             (c, index) = read_utf8_char(bin, index)
  188.             oc = ord(c)
  189.             if state == 'text':
  190.                 if oc == 0:
  191.                     state = 'get flags'
  192.                     continue
  193.                 elif c == '\x0b':
  194.                     c = '\n'
  195.                 elif c == '>':
  196.                     c = '>>'
  197.                 elif c == '<':
  198.                     c = '<<'
  199.                 
  200.                 buf.write(encode(c))
  201.                 continue
  202.             if state == 'get flags':
  203.                 if oc == 0:
  204.                     state = 'text'
  205.                     continue
  206.                 
  207.                 flags = oc
  208.                 state = 'get tag'
  209.                 continue
  210.             if state == 'get tag':
  211.                 state = None if oc == 0 else 'get attr'
  212.                 if flags & FLAG_OPENING:
  213.                     tag = oc
  214.                     buf.write('<')
  215.                     if not flags & FLAG_CLOSING:
  216.                         is_goingdown = True
  217.                     
  218.                     if tag == 32768:
  219.                         state = 'get custom length'
  220.                         continue
  221.                     
  222.                     if flags & FLAG_ATOM:
  223.                         if not (self.tag_atoms) or tag not in self.tag_atoms:
  224.                             raise LitError('atom tag %d not in atom tag list' % tag)
  225.                         tag not in self.tag_atoms
  226.                         tag_name = self.tag_atoms[tag]
  227.                         current_map = self.attr_atoms
  228.                     elif tag < len(self.tag_map):
  229.                         tag_name = self.tag_map[tag]
  230.                         current_map = self.tag_to_attr_map[tag]
  231.                     else:
  232.                         dynamic_tag += 1
  233.                         errors += 1
  234.                         tag_name = '?' + unichr(tag) + '?'
  235.                         current_map = self.tag_to_attr_map[tag]
  236.                         print 'WARNING: tag %s unknown' % unichr(tag)
  237.                     buf.write(encode(tag_name))
  238.                 elif flags & FLAG_CLOSING:
  239.                     if depth == 0:
  240.                         raise LitError('Extra closing tag')
  241.                     depth == 0
  242.                     return index
  243.                 continue
  244.             if state == 'get attr':
  245.                 in_censorship = False
  246.                 if oc == 0:
  247.                     if not is_goingdown:
  248.                         tag_name = None
  249.                         dynamic_tag = 0
  250.                         buf.write(' />')
  251.                     else:
  252.                         buf.write('>')
  253.                         index = self.binary_to_text(bin, buf, index, depth + 1)
  254.                         is_goingdown = False
  255.                         if not tag_name:
  256.                             raise LitError('Tag ends before it begins.')
  257.                         tag_name
  258.                         buf.write(encode(u''.join(('</', tag_name, '>'))))
  259.                         dynamic_tag = 0
  260.                         tag_name = None
  261.                     state = 'text'
  262.                 elif oc == 32768:
  263.                     state = 'get attr length'
  264.                     continue
  265.                 
  266.                 attr = None
  267.                 if current_map and oc in current_map and current_map[oc]:
  268.                     attr = current_map[oc]
  269.                 elif oc in self.attr_map:
  270.                     attr = self.attr_map[oc]
  271.                 
  272.                 if not attr or not isinstance(attr, basestring):
  273.                     raise LitError('Unknown attribute %d in tag %s' % (oc, tag_name))
  274.                 not isinstance(attr, basestring)
  275.                 if attr.startswith('%'):
  276.                     in_censorship = True
  277.                     state = 'get value length'
  278.                     continue
  279.                 
  280.                 buf.write(' ' + encode(attr) + '=')
  281.                 if attr in ('href', 'src'):
  282.                     state = 'get href length'
  283.                 else:
  284.                     state = 'get value length'
  285.             attr in ('href', 'src')
  286.             if state == 'get value length':
  287.                 if not in_censorship:
  288.                     buf.write('"')
  289.                 
  290.                 count = oc - 1
  291.                 if count == 0:
  292.                     if not in_censorship:
  293.                         buf.write('"')
  294.                     
  295.                     in_censorship = False
  296.                     state = 'get attr'
  297.                     continue
  298.                 
  299.                 state = 'get value'
  300.                 if oc == 65535:
  301.                     continue
  302.                 
  303.                 if count < 0 or count > len(bin) - index:
  304.                     raise LitError('Invalid character count %d' % count)
  305.                 count > len(bin) - index
  306.                 continue
  307.             if state == 'get value':
  308.                 if count == 65534:
  309.                     if not in_censorship:
  310.                         buf.write('%s"' % (oc - 1))
  311.                     
  312.                     in_censorship = False
  313.                     state = 'get attr'
  314.                 elif count > 0:
  315.                     if not in_censorship:
  316.                         if c == '"':
  317.                             c = '"'
  318.                         elif c == '<':
  319.                             c = '<'
  320.                         
  321.                         buf.write(c.encode('ascii', 'xmlcharrefreplace'))
  322.                     
  323.                     count -= 1
  324.                 
  325.                 if count == 0:
  326.                     if not in_censorship:
  327.                         buf.write('"')
  328.                     
  329.                     in_censorship = False
  330.                     state = 'get attr'
  331.                 
  332.             count == 0
  333.             if state == 'get custom length':
  334.                 count = oc - 1
  335.                 if count <= 0 or count > len(bin) - index:
  336.                     raise LitError('Invalid character count %d' % count)
  337.                 count > len(bin) - index
  338.                 dynamic_tag += 1
  339.                 state = 'get custom'
  340.                 tag_name = ''
  341.                 continue
  342.             if state == 'get custom':
  343.                 tag_name += c
  344.                 count -= 1
  345.                 if count == 0:
  346.                     buf.write(encode(tag_name))
  347.                     state = 'get attr'
  348.                 
  349.             count == 0
  350.             if state == 'get attr length':
  351.                 count = oc - 1
  352.                 if count <= 0 or count > len(bin) - index:
  353.                     raise LitError('Invalid character count %d' % count)
  354.                 count > len(bin) - index
  355.                 buf.write(' ')
  356.                 state = 'get custom attr'
  357.                 continue
  358.             if state == 'get custom attr':
  359.                 buf.write(encode(c))
  360.                 count -= 1
  361.                 if count == 0:
  362.                     buf.write('=')
  363.                     state = 'get value length'
  364.                 
  365.             count == 0
  366.             if state == 'get href length':
  367.                 count = oc - 1
  368.                 if count <= 0 or count > len(bin) - index:
  369.                     raise LitError('Invalid character count %d' % count)
  370.                 count > len(bin) - index
  371.                 href = ''
  372.                 state = 'get href'
  373.                 continue
  374.             if state == 'get href':
  375.                 href += c
  376.                 count -= 1
  377.                 if count == 0:
  378.                     (doc, frag) = urldefrag(href[1:])
  379.                     path = self.item_path(doc)
  380.                     if frag:
  381.                         path = '#'.join((path, frag))
  382.                     
  383.                     path = urlnormalize(path)
  384.                     buf.write(encode(u'"%s"' % path))
  385.                     state = 'get attr'
  386.                 
  387.             count == 0
  388.         return index
  389.  
  390.  
  391.  
  392. class DirectoryEntry(object):
  393.     
  394.     def __init__(self, name, section, offset, size):
  395.         self.name = name
  396.         self.section = section
  397.         self.offset = offset
  398.         self.size = size
  399.  
  400.     
  401.     def __repr__(self):
  402.         return 'DirectoryEntry(name=%s, section=%d, offset=%d, size=%d)' % (repr(self.name), self.section, self.offset, self.size)
  403.  
  404.     
  405.     def __str__(self):
  406.         return repr(self)
  407.  
  408.  
  409.  
  410. class ManifestItem(object):
  411.     
  412.     def __init__(self, original, internal, mime_type, offset, root, state):
  413.         self.original = original
  414.         self.internal = internal
  415.         self.mime_type = None if hasattr(mime_type, 'lower') else mime_type
  416.         self.offset = offset
  417.         self.root = root
  418.         self.state = state
  419.         path = original.replace('\\', '/')
  420.         if path[1:3] == ':/':
  421.             path = path[2:]
  422.         
  423.         path = os.path.normpath(path).replace('\\', '/')
  424.         while path.startswith('../'):
  425.             path = path[3:]
  426.         self.path = path
  427.  
  428.     
  429.     def __eq__(self, other):
  430.         if hasattr(other, 'internal'):
  431.             return self.internal == other.internal
  432.         return self.internal == other
  433.  
  434.     
  435.     def __repr__(self):
  436.         return 'ManifestItem(internal=%r, path=%r, mime_type=%r, offset=%d, root=%r, state=%r)' % (self.internal, self.path, self.mime_type, self.offset, self.root, self.state)
  437.  
  438.  
  439.  
  440. def preserve(function):
  441.     
  442.     def wrapper(self, *args, **kwargs):
  443.         opos = self.stream.tell()
  444.         
  445.         try:
  446.             return function(self, *args, **kwargs)
  447.         finally:
  448.             self.stream.seek(opos)
  449.  
  450.  
  451.     functools.update_wrapper(wrapper, function)
  452.     return wrapper
  453.  
  454.  
  455. class LitFile(object):
  456.     PIECE_SIZE = 16
  457.     
  458.     def __init__(self, filename_or_stream, log):
  459.         self._warn = log.warn
  460.         if hasattr(filename_or_stream, 'read'):
  461.             self.stream = filename_or_stream
  462.         else:
  463.             self.stream = open(filename_or_stream, 'rb')
  464.         
  465.         try:
  466.             self.opf_path = os.path.splitext(os.path.basename(self.stream.name))[0] + '.opf'
  467.         except AttributeError:
  468.             self.opf_path = 'content.opf'
  469.  
  470.         if self.magic != 'ITOLITLS':
  471.             raise LitError('Not a valid LIT file')
  472.         self.magic != 'ITOLITLS'
  473.         if self.version != 1:
  474.             raise LitError('Unknown LIT version %d' % (self.version,))
  475.         self.version != 1
  476.         self.read_secondary_header()
  477.         self.read_header_pieces()
  478.         self.read_section_names()
  479.         self.read_manifest()
  480.         self.read_drm()
  481.  
  482.     
  483.     def warn(self, msg):
  484.         self._warn(msg)
  485.  
  486.     
  487.     def magic():
  488.         
  489.         def fget(self):
  490.             self.stream.seek(0)
  491.             return self.stream.read(8)
  492.  
  493.         fget = preserve(fget)
  494.         return property(fget = fget)
  495.  
  496.     magic = magic()
  497.     
  498.     def version():
  499.         
  500.         def fget(self):
  501.             self.stream.seek(8)
  502.             return u32(self.stream.read(4))
  503.  
  504.         return property(fget = fget)
  505.  
  506.     version = version()
  507.     
  508.     def hdr_len():
  509.         
  510.         def fget(self):
  511.             self.stream.seek(12)
  512.             return int32(self.stream.read(4))
  513.  
  514.         fget = preserve(fget)
  515.         return property(fget = fget)
  516.  
  517.     hdr_len = hdr_len()
  518.     
  519.     def num_pieces():
  520.         
  521.         def fget(self):
  522.             self.stream.seek(16)
  523.             return int32(self.stream.read(4))
  524.  
  525.         fget = preserve(fget)
  526.         return property(fget = fget)
  527.  
  528.     num_pieces = num_pieces()
  529.     
  530.     def sec_hdr_len():
  531.         
  532.         def fget(self):
  533.             self.stream.seek(20)
  534.             return int32(self.stream.read(4))
  535.  
  536.         fget = preserve(fget)
  537.         return property(fget = fget)
  538.  
  539.     sec_hdr_len = sec_hdr_len()
  540.     
  541.     def guid():
  542.         
  543.         def fget(self):
  544.             self.stream.seek(24)
  545.             return self.stream.read(16)
  546.  
  547.         fget = preserve(fget)
  548.         return property(fget = fget)
  549.  
  550.     guid = guid()
  551.     
  552.     def header():
  553.         
  554.         def fget(self):
  555.             size = self.hdr_len + self.num_pieces * self.PIECE_SIZE + self.sec_hdr_len
  556.             self.stream.seek(0)
  557.             return self.stream.read(size)
  558.  
  559.         fget = preserve(fget)
  560.         return property(fget = fget)
  561.  
  562.     header = header()
  563.     
  564.     def __len__(self):
  565.         self.stream.seek(0, 2)
  566.         return self.stream.tell()
  567.  
  568.     __len__ = preserve(__len__)
  569.     
  570.     def read_raw(self, offset, size):
  571.         self.stream.seek(offset)
  572.         return self.stream.read(size)
  573.  
  574.     read_raw = preserve(read_raw)
  575.     
  576.     def read_content(self, offset, size):
  577.         return self.read_raw(self.content_offset + offset, size)
  578.  
  579.     
  580.     def read_secondary_header(self):
  581.         offset = self.hdr_len + self.num_pieces * self.PIECE_SIZE
  582.         bytes = self.read_raw(offset, self.sec_hdr_len)
  583.         offset = int32(bytes[4:])
  584.         while offset < len(bytes):
  585.             blocktype = bytes[offset:offset + 4]
  586.             blockver = u32(bytes[offset + 4:])
  587.             if blocktype == 'CAOL':
  588.                 if blockver != 2:
  589.                     raise LitError('Unknown CAOL block format %d' % blockver)
  590.                 blockver != 2
  591.                 self.creator_id = u32(bytes[offset + 12:])
  592.                 self.entry_chunklen = u32(bytes[offset + 20:])
  593.                 self.count_chunklen = u32(bytes[offset + 24:])
  594.                 self.entry_unknown = u32(bytes[offset + 28:])
  595.                 self.count_unknown = u32(bytes[offset + 32:])
  596.                 offset += 48
  597.                 continue
  598.             if blocktype == 'ITSF':
  599.                 if blockver != 4:
  600.                     raise LitError('Unknown ITSF block format %d' % blockver)
  601.                 blockver != 4
  602.                 if u32(bytes[offset + 4 + 16:]):
  603.                     raise LitError('This file has a 64bit content offset')
  604.                 u32(bytes[offset + 4 + 16:])
  605.                 self.content_offset = u32(bytes[offset + 16:])
  606.                 self.timestamp = u32(bytes[offset + 24:])
  607.                 self.language_id = u32(bytes[offset + 28:])
  608.                 offset += 48
  609.                 continue
  610.         if not hasattr(self, 'content_offset'):
  611.             raise LitError('Could not figure out the content offset')
  612.         hasattr(self, 'content_offset')
  613.  
  614.     
  615.     def read_header_pieces(self):
  616.         src = self.header[self.hdr_len:]
  617.         for i in xrange(self.num_pieces):
  618.             piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
  619.             if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
  620.                 raise LitError('Piece %s has 64bit value' % repr(piece))
  621.             u32(piece[12:]) != 0
  622.             offset = u32(piece)
  623.             size = int32(piece[8:])
  624.             piece = self.read_raw(offset, size)
  625.             if i == 0:
  626.                 continue
  627.                 continue
  628.             if i == 1:
  629.                 if u32(piece[8:]) != self.entry_chunklen or u32(piece[12:]) != self.entry_unknown:
  630.                     raise LitError('Secondary header does not match piece')
  631.                 u32(piece[12:]) != self.entry_unknown
  632.                 self.read_directory(piece)
  633.                 continue
  634.             if i == 2:
  635.                 if u32(piece[8:]) != self.count_chunklen or u32(piece[12:]) != self.count_unknown:
  636.                     raise LitError('Secondary header does not match piece')
  637.                 u32(piece[12:]) != self.count_unknown
  638.                 continue
  639.                 continue
  640.             if i == 3:
  641.                 self.piece3_guid = piece
  642.                 continue
  643.             if i == 4:
  644.                 self.piece4_guid = piece
  645.                 continue
  646.         
  647.  
  648.     
  649.     def read_directory(self, piece):
  650.         if not piece.startswith('IFCM'):
  651.             raise LitError('Header piece #1 is not main directory.')
  652.         piece.startswith('IFCM')
  653.         chunk_size = int32(piece[8:12])
  654.         num_chunks = int32(piece[24:28])
  655.         if 32 + num_chunks * chunk_size != len(piece):
  656.             raise LitError('IFCM header has incorrect length')
  657.         32 + num_chunks * chunk_size != len(piece)
  658.         self.entries = { }
  659.         for i in xrange(num_chunks):
  660.             offset = 32 + i * chunk_size
  661.             chunk = piece[offset:offset + chunk_size]
  662.             tag = chunk[:4]
  663.             chunk = chunk[4:]
  664.             if tag != 'AOLL':
  665.                 continue
  666.             
  667.             remaining = int32(chunk[:4])
  668.             chunk = chunk[4:]
  669.             if remaining >= chunk_size:
  670.                 raise LitError('AOLL remaining count is negative')
  671.             remaining >= chunk_size
  672.             remaining = chunk_size - (remaining + 48)
  673.             entries = u16(chunk[-2:])
  674.             if entries == 0:
  675.                 entries = 65535
  676.             
  677.             chunk = chunk[40:]
  678.             for j in xrange(entries):
  679.                 if remaining <= 0:
  680.                     break
  681.                 
  682.                 (namelen, chunk, remaining) = encint(chunk, remaining)
  683.                 if namelen != namelen & 2147483647:
  684.                     raise LitError('Directory entry had 64bit name length.')
  685.                 namelen != namelen & 2147483647
  686.                 if namelen > remaining - 3:
  687.                     raise LitError('Read past end of directory chunk')
  688.                 namelen > remaining - 3
  689.                 
  690.                 try:
  691.                     name = chunk[:namelen].decode('utf-8')
  692.                     chunk = chunk[namelen:]
  693.                     remaining -= namelen
  694.                 except UnicodeDecodeError:
  695.                     break
  696.  
  697.                 (section, chunk, remaining) = encint(chunk, remaining)
  698.                 (offset, chunk, remaining) = encint(chunk, remaining)
  699.                 (size, chunk, remaining) = encint(chunk, remaining)
  700.                 entry = DirectoryEntry(name, section, offset, size)
  701.                 self.entries[name] = entry
  702.             
  703.         
  704.  
  705.     
  706.     def read_section_names(self):
  707.         if '::DataSpace/NameList' not in self.entries:
  708.             raise LitError('Lit file does not have a valid NameList')
  709.         '::DataSpace/NameList' not in self.entries
  710.         raw = self.get_file('::DataSpace/NameList')
  711.         if len(raw) < 4:
  712.             raise LitError('Invalid Namelist section')
  713.         len(raw) < 4
  714.         pos = 4
  715.         num_sections = u16(raw[2:pos])
  716.         self.section_names = [
  717.             ''] * num_sections
  718.         self.section_data = [
  719.             None] * num_sections
  720.         for section in xrange(num_sections):
  721.             size = u16(raw[pos:pos + 2])
  722.             pos += 2
  723.             size = size * 2 + 2
  724.             if pos + size > len(raw):
  725.                 raise LitError('Invalid Namelist section')
  726.             pos + size > len(raw)
  727.             self.section_names[section] = raw[pos:pos + size].decode('utf-16-le').rstrip('\x00')
  728.             pos += size
  729.         
  730.  
  731.     
  732.     def read_manifest(self):
  733.         if '/manifest' not in self.entries:
  734.             raise LitError('Lit file does not have a valid manifest')
  735.         '/manifest' not in self.entries
  736.         raw = self.get_file('/manifest')
  737.         self.manifest = { }
  738.         self.paths = {
  739.             self.opf_path: None }
  740.         while raw:
  741.             slen = ord(raw[0])
  742.             raw = raw[1:]
  743.             if slen == 0:
  744.                 break
  745.             
  746.             root = raw[:slen].decode('utf8')
  747.             raw = raw[slen:]
  748.             if not raw:
  749.                 raise LitError('Truncated manifest')
  750.             raw
  751.             for state in [
  752.                 'spine',
  753.                 'not spine',
  754.                 'css',
  755.                 'images']:
  756.                 num_files = int32(raw)
  757.                 raw = raw[4:]
  758.                 if num_files == 0:
  759.                     continue
  760.                 
  761.                 for i in xrange(num_files):
  762.                     if len(raw) < 5:
  763.                         raise LitError('Truncated manifest')
  764.                     len(raw) < 5
  765.                     offset = u32(raw)
  766.                     raw = raw[4:]
  767.                     (internal, raw) = consume_sized_utf8_string(raw)
  768.                     (original, raw) = consume_sized_utf8_string(raw)
  769.                     original = urlunquote(original)
  770.                     (mime_type, raw) = consume_sized_utf8_string(raw, zpad = True)
  771.                     self.manifest[internal] = ManifestItem(original, internal, mime_type, offset, root, state)
  772.                 
  773.             
  774.         mlist = self.manifest.values()
  775.         if len(mlist) > 1:
  776.             shared = mlist[0].path
  777.             for item in mlist[1:]:
  778.                 path = item.path
  779.                 while shared and not path.startswith(shared):
  780.                     
  781.                     try:
  782.                         shared = shared[:shared.rindex('/', 0, -2) + 1]
  783.                     continue
  784.                     except ValueError:
  785.                         shared = None
  786.                         continue
  787.                     
  788.  
  789.                     None<EXCEPTION MATCH>ValueError
  790.                 if not shared:
  791.                     break
  792.                     continue
  793.             
  794.             if shared:
  795.                 slen = len(shared)
  796.                 for item in mlist:
  797.                     item.path = item.path[slen:]
  798.                 
  799.             
  800.         
  801.         for item in mlist:
  802.             if item.path[0] == '/':
  803.                 item.path = os.path.basename(item.path)
  804.             
  805.             self.paths[item.path] = item
  806.         
  807.  
  808.     
  809.     def read_drm(self):
  810.         self.drmlevel = 0
  811.         if '/DRMStorage/Licenses/EUL' in self.entries:
  812.             self.drmlevel = 5
  813.         elif '/DRMStorage/DRMBookplate' in self.entries:
  814.             self.drmlevel = 3
  815.         elif '/DRMStorage/DRMSealed' in self.entries:
  816.             self.drmlevel = 1
  817.         else:
  818.             return None
  819.         if ('/DRMStorage/Licenses/EUL' in self.entries).drmlevel < 5:
  820.             msdes.deskey(self.calculate_deskey(), msdes.DE1)
  821.             bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed'))
  822.             if bookkey[0] != '\x00':
  823.                 raise LitError('Unable to decrypt title key!')
  824.             bookkey[0] != '\x00'
  825.             self.bookkey = bookkey[1:9]
  826.         else:
  827.             raise DRMError('Cannot access DRM-protected book')
  828.         return ('/DRMStorage/Licenses/EUL' in self.entries).drmlevel < 5
  829.  
  830.     
  831.     def calculate_deskey(self):
  832.         hashfiles = [
  833.             '/meta',
  834.             '/DRMStorage/DRMSource']
  835.         if self.drmlevel == 3:
  836.             hashfiles.append('/DRMStorage/DRMBookplate')
  837.         
  838.         prepad = 2
  839.         hash = mssha1.new()
  840.         for name in hashfiles:
  841.             data = self.get_file(name)
  842.             if prepad > 0:
  843.                 data = '\x00' * prepad + data
  844.                 prepad = 0
  845.             
  846.             postpad = 64 - len(data) % 64
  847.             if postpad < 64:
  848.                 data = data + '\x00' * postpad
  849.             
  850.             hash.update(data)
  851.         
  852.         digest = hash.digest()
  853.         key = [
  854.             0] * 8
  855.         for i in xrange(0, len(digest)):
  856.             key[i % 8] ^= ord(digest[i])
  857.         
  858.         return ''.join((lambda .0: for x in .0:
  859. chr(x))(key))
  860.  
  861.     
  862.     def get_file(self, name):
  863.         entry = self.entries[name]
  864.         if entry.section == 0:
  865.             return self.read_content(entry.offset, entry.size)
  866.         section = self.get_section(entry.section)
  867.         return section[entry.offset:entry.offset + entry.size]
  868.  
  869.     
  870.     def get_section(self, section):
  871.         data = self.section_data[section]
  872.         if not data:
  873.             data = self.get_section_uncached(section)
  874.             self.section_data[section] = data
  875.         
  876.         return data
  877.  
  878.     
  879.     def get_section_uncached(self, section):
  880.         name = self.section_names[section]
  881.         path = '::DataSpace/Storage/' + name
  882.         transform = self.get_file(path + '/Transform/List')
  883.         content = self.get_file(path + '/Content')
  884.         control = self.get_file(path + '/ControlData')
  885.         while len(transform) >= 16:
  886.             csize = (int32(control) + 1) * 4
  887.             if csize > len(control) or csize <= 0:
  888.                 raise LitError('ControlData is too short')
  889.             csize <= 0
  890.             guid = msguid(transform)
  891.             if guid == DESENCRYPT_GUID:
  892.                 content = self.decrypt(content)
  893.                 control = control[csize:]
  894.             elif guid == LZXCOMPRESS_GUID:
  895.                 reset_table = self.get_file('/'.join(('::DataSpace/Storage', name, 'Transform', LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
  896.                 content = self.decompress(content, control, reset_table)
  897.                 control = control[csize:]
  898.             else:
  899.                 raise LitError('Unrecognized transform: %s.' % repr(guid))
  900.             transform = guid == DESENCRYPT_GUID[16:]
  901.         return content
  902.  
  903.     
  904.     def decrypt(self, content):
  905.         length = len(content)
  906.         extra = length & 7
  907.         if extra > 0:
  908.             self.warn('content length not a multiple of block size')
  909.             content += '\x00' * (8 - extra)
  910.         
  911.         msdes.deskey(self.bookkey, msdes.DE1)
  912.         return msdes.des(content)
  913.  
  914.     
  915.     def decompress(self, content, control, reset_table):
  916.         if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG + 4] != 'LZXC':
  917.             raise LitError('Invalid ControlData tag value')
  918.         control[CONTROL_TAG:CONTROL_TAG + 4] != 'LZXC'
  919.         if len(reset_table) < RESET_INTERVAL + 8:
  920.             raise LitError('Reset table is too short')
  921.         len(reset_table) < RESET_INTERVAL + 8
  922.         if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
  923.             raise LitError('Reset table has 64bit value for UCLENGTH')
  924.         u32(reset_table[RESET_UCLENGTH + 4:]) != 0
  925.         result = []
  926.         window_size = 14
  927.         u = u32(control[CONTROL_WINDOW_SIZE:])
  928.         while u > 0:
  929.             u >>= 1
  930.             window_size += 1
  931.         if window_size < 15 or window_size > 21:
  932.             raise LitError('Invalid window in ControlData')
  933.         window_size > 21
  934.         lzx.init(window_size)
  935.         ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8
  936.         uclength = int32(reset_table[RESET_UCLENGTH:])
  937.         accum = int32(reset_table[RESET_INTERVAL:])
  938.         bytes_remaining = uclength
  939.         window_bytes = 1 << window_size
  940.         base = 0
  941.         while ofs_entry < len(reset_table):
  942.             if accum >= window_bytes:
  943.                 accum = 0
  944.                 size = int32(reset_table[ofs_entry:])
  945.                 u = int32(reset_table[ofs_entry + 4:])
  946.                 if u != 0:
  947.                     raise LitError('Reset table entry greater than 32 bits')
  948.                 u != 0
  949.                 if size >= len(content):
  950.                     self._warn('LZX reset table entry out of bounds')
  951.                 
  952.                 if bytes_remaining >= window_bytes:
  953.                     lzx.reset()
  954.                     
  955.                     try:
  956.                         result.append(lzx.decompress(content[base:size], window_bytes))
  957.                     except lzx.LZXError:
  958.                         self.warn('LZX decompression error; skipping chunk')
  959.  
  960.                     bytes_remaining -= window_bytes
  961.                     base = size
  962.                 
  963.             
  964.             accum += int32(reset_table[RESET_INTERVAL:])
  965.             ofs_entry += 8
  966.         if bytes_remaining < window_bytes and bytes_remaining > 0:
  967.             lzx.reset()
  968.             
  969.             try:
  970.                 result.append(lzx.decompress(content[base:], bytes_remaining))
  971.             except lzx.LZXError:
  972.                 self.warn('LZX decompression error; skipping chunk')
  973.  
  974.             bytes_remaining = 0
  975.         
  976.         if bytes_remaining > 0:
  977.             raise LitError('Failed to completely decompress section')
  978.         bytes_remaining > 0
  979.         return ''.join(result)
  980.  
  981.     
  982.     def get_atoms(self, entry):
  983.         name = '/'.join(('/data', entry.internal, 'atom'))
  984.         if name not in self.entries:
  985.             return ({ }, { })
  986.         data = self.get_file(name)
  987.         nentries = u32(data)
  988.         data = data[4:]
  989.         tags = { }
  990.         for i in xrange(1, nentries + 1):
  991.             if len(data) <= 1:
  992.                 break
  993.             
  994.             size = ord(data[0])
  995.             data = data[1:]
  996.             if size == 0 or len(data) < size:
  997.                 break
  998.             
  999.             tags[i] = data[:size]
  1000.             data = data[size:]
  1001.         
  1002.         if len(tags) != nentries:
  1003.             self._warn('damaged or invalid atoms tag table')
  1004.         
  1005.         if len(data) < 4:
  1006.             return (tags, { })
  1007.         attrs = { }
  1008.         nentries = u32(data)
  1009.         data = data[4:]
  1010.         for i in xrange(1, nentries + 1):
  1011.             if len(data) <= 4:
  1012.                 break
  1013.             
  1014.             size = u32(data)
  1015.             data = data[4:]
  1016.             if size == 0 or len(data) < size:
  1017.                 break
  1018.             
  1019.             attrs[i] = data[:size]
  1020.             data = data[size:]
  1021.         
  1022.         if len(attrs) != nentries:
  1023.             self._warn('damaged or invalid atoms attributes table')
  1024.         
  1025.         return (tags, attrs)
  1026.  
  1027.  
  1028.  
  1029. class LitContainer(object):
  1030.     
  1031.     def __init__(self, filename_or_stream, log):
  1032.         self._litfile = LitFile(filename_or_stream, log)
  1033.         self.log = log
  1034.  
  1035.     
  1036.     def namelist(self):
  1037.         return self._litfile.paths.keys()
  1038.  
  1039.     
  1040.     def exists(self, name):
  1041.         return urlunquote(name) in self._litfile.paths
  1042.  
  1043.     
  1044.     def read(self, name):
  1045.         entry = None if name else None
  1046.         if entry is None:
  1047.             content = OPF_DECL + self._read_meta()
  1048.         elif 'spine' in entry.state:
  1049.             internal = '/'.join(('/data', entry.internal, 'content'))
  1050.             raw = self._litfile.get_file(internal)
  1051.             manifest = self._litfile.manifest
  1052.             atoms = self._litfile.get_atoms(entry)
  1053.             unbin = UnBinary(raw, name, manifest, HTML_MAP, atoms)
  1054.             content = HTML_DECL + str(unbin)
  1055.             tags = ('personname', 'place', 'city', 'country-region')
  1056.             pat = '(?i)</{0,1}st1:(%s)>' % '|'.join(tags)
  1057.             content = re.sub(pat, '', content)
  1058.             content = re.sub('<(/{0,1})form>', '<\\1div>', content)
  1059.         else:
  1060.             internal = '/'.join(('/data', entry.internal))
  1061.             content = self._litfile.get_file(internal)
  1062.         return content
  1063.  
  1064.     
  1065.     def _read_meta(self):
  1066.         path = 'content.opf'
  1067.         raw = self._litfile.get_file('/meta')
  1068.         
  1069.         try:
  1070.             unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
  1071.         except LitError:
  1072.             if 'PENGUIN group' not in raw:
  1073.                 raise 
  1074.             'PENGUIN group' not in raw
  1075.             print 'WARNING: attempting PENGUIN malformed OPF fix'
  1076.             raw = raw.replace('PENGUIN group', '\x00\x01\x18\x00PENGUIN group', 1)
  1077.             unbin = UnBinary(raw, path, self._litfile.manifest, OPF_MAP)
  1078.  
  1079.         return str(unbin)
  1080.  
  1081.     
  1082.     def get_metadata(self):
  1083.         return self._read_meta()
  1084.  
  1085.  
  1086.  
  1087. class LitReader(OEBReader):
  1088.     Container = LitContainer
  1089.     DEFAULT_PROFILE = 'MSReader'
  1090.  
  1091.  
  1092. try:
  1093.     import psyco
  1094.     psyco.bind(read_utf8_char)
  1095.     psyco.bind(UnBinary.binary_to_text)
  1096. except ImportError:
  1097.     pass
  1098.  
  1099.