home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_975 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  12.4 KB  |  395 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. from __future__ import with_statement
  5. __license__ = 'GPL 3'
  6. __copyright__ = '2010, Greg Riker <griker@hotmail.com>'
  7. __docformat__ = 'restructuredtext en'
  8. import StringIO
  9. import sys
  10. from struct import pack
  11. from calibre.ebooks.metadata import MetaInformation
  12.  
  13. class StreamSlicer(object):
  14.     
  15.     def __init__(self, stream, start = 0, stop = None):
  16.         self._stream = stream
  17.         self.start = start
  18.         if stop is None:
  19.             stream.seek(0, 2)
  20.             stop = stream.tell()
  21.         
  22.         self.stop = stop
  23.         self._len = stop - start
  24.  
  25.     
  26.     def __len__(self):
  27.         return self._len
  28.  
  29.     
  30.     def __getitem__(self, key):
  31.         stream = self._stream
  32.         base = self.start
  33.         if isinstance(key, (int, long)):
  34.             stream.seek(base + key)
  35.             return stream.read(1)
  36.         if isinstance(key, slice):
  37.             (start, stop, stride) = key.indices(self._len)
  38.             if stride < 0:
  39.                 start = stop
  40.                 stop = start
  41.             
  42.             size = stop - start
  43.             if size <= 0:
  44.                 return ''
  45.             stream.seek(base + start)
  46.             data = stream.read(size)
  47.             if stride != 1:
  48.                 data = data[::stride]
  49.             
  50.             return data
  51.         raise TypeError('stream indices must be integers')
  52.  
  53.     
  54.     def __setitem__(self, key, value):
  55.         stream = self._stream
  56.         base = self.start
  57.         if isinstance(key, (int, long)):
  58.             if len(value) != 1:
  59.                 raise ValueError('key and value lengths must match')
  60.             len(value) != 1
  61.             stream.seek(base + key)
  62.             return stream.write(value)
  63.         if isinstance(key, slice):
  64.             (start, stop, stride) = key.indices(self._len)
  65.             if stride < 0:
  66.                 start = stop
  67.                 stop = start
  68.             
  69.             size = stop - start
  70.             if stride != 1:
  71.                 value = value[::stride]
  72.             
  73.             if len(value) != size:
  74.                 raise ValueError('key and value lengths must match')
  75.             len(value) != size
  76.             stream.seek(base + start)
  77.             return stream.write(value)
  78.         raise TypeError('stream indices must be integers')
  79.  
  80.     
  81.     def update(self, data_blocks):
  82.         stream = self._stream
  83.         base = self.start
  84.         stream.seek(base)
  85.         self._stream.truncate(base)
  86.         for block in data_blocks:
  87.             stream.write(block)
  88.         
  89.  
  90.     
  91.     def truncate(self, value):
  92.         self._stream.truncate(value)
  93.  
  94.  
  95.  
  96. class MetadataUpdater(object):
  97.     
  98.     def __init__(self, stream):
  99.         self.stream = stream
  100.         self.data = StreamSlicer(stream)
  101.         sig = self.data[:4]
  102.         if not sig.startswith('TPZ'):
  103.             raise ValueError("'%s': Not a Topaz file" % getattr(stream, 'name', 'Unnamed stream'))
  104.         sig.startswith('TPZ')
  105.         offset = 4
  106.         (self.header_records, consumed) = self.decode_vwi(self.data[offset:offset + 4])
  107.         offset += consumed
  108.         (self.topaz_headers, self.th_seq) = self.get_headers(offset)
  109.         if 'metadata' not in self.topaz_headers:
  110.             raise ValueError("'%s': Invalid Topaz format - no metadata record" % getattr(stream, 'name', 'Unnamed stream'))
  111.         'metadata' not in self.topaz_headers
  112.         md_offset = self.topaz_headers['metadata']['blocks'][0]['offset']
  113.         md_offset += self.base
  114.         if self.data[md_offset + 1:md_offset + 9] != 'metadata':
  115.             raise ValueError("'%s': Damaged metadata record" % getattr(stream, 'name', 'Unnamed stream'))
  116.         self.data[md_offset + 1:md_offset + 9] != 'metadata'
  117.  
  118.     
  119.     def book_length(self):
  120.         self.get_original_metadata()
  121.         if 'bookLength' in self.metadata:
  122.             return int(self.metadata['bookLength'])
  123.         return 0
  124.  
  125.     
  126.     def decode_vwi(self, bytes):
  127.         (pos, val) = (0, 0)
  128.         done = False
  129.         while pos < len(bytes) and not done:
  130.             b = ord(bytes[pos])
  131.             pos += 1
  132.             if b & 128 == 0:
  133.                 done = True
  134.             
  135.             b &= 127
  136.             val <<= 7
  137.             val |= b
  138.             if done:
  139.                 break
  140.                 continue
  141.         return (val, pos)
  142.  
  143.     
  144.     def dump_headers(self):
  145.         print '\ndump_headers():'
  146.         for tag in self.topaz_headers:
  147.             print '%s: ' % tag
  148.             num_recs = len(self.topaz_headers[tag]['blocks'])
  149.             print ' num_recs: %d' % num_recs
  150.             if num_recs:
  151.                 print ' starting offset: 0x%x' % self.topaz_headers[tag]['blocks'][0]['offset']
  152.                 continue
  153.         
  154.  
  155.     
  156.     def dump_hex(self, src, length = 16):
  157.         FILTER = []([ '.' for x in range(256) ])
  158.         N = 0
  159.         result = ''
  160.         for x in s:
  161.             hexa = _[2](_[2]['%02X' % ord(x)])
  162.             s = s.translate(FILTER)
  163.             result += '%04X   %-*s   %s\n' % (N, length * 3, hexa, s)
  164.             N += length
  165.             []
  166.         print result
  167.  
  168.     
  169.     def dump_metadata(self):
  170.         for tag in self.metadata:
  171.             print '%s: %s' % (tag, repr(self.metadata[tag]))
  172.         
  173.  
  174.     
  175.     def encode_vwi(self, value):
  176.         bytes = []
  177.         multi_byte = value > 127
  178.         while value:
  179.             b = value & 127
  180.             value >>= 7
  181.             if value == 0:
  182.                 if multi_byte:
  183.                     bytes.append(b | 128)
  184.                     if bytes[-1] == 255:
  185.                         bytes.append(128)
  186.                     
  187.                     if len(bytes) == 4:
  188.                         return pack('>BBBB', bytes[3], bytes[2], bytes[1], bytes[0]).decode('iso-8859-1')
  189.                     if len(bytes) == 3:
  190.                         return pack('>BBB', bytes[2], bytes[1], bytes[0]).decode('iso-8859-1')
  191.                     if len(bytes) == 2:
  192.                         return pack('>BB', bytes[1], bytes[0]).decode('iso-8859-1')
  193.                 else:
  194.                     return pack('>B', b).decode('iso-8859-1')
  195.             multi_byte
  196.             if len(bytes):
  197.                 bytes.append(b | 128)
  198.                 continue
  199.             bytes.append(b)
  200.         return pack('>B', 0).decode('iso-8859-1')
  201.  
  202.     
  203.     def generate_dkey(self):
  204.         for x in self.topaz_headers:
  205.             if self.topaz_headers[x]['tag'] == 'dkey':
  206.                 if self.topaz_headers[x]['blocks']:
  207.                     offset = self.base + self.topaz_headers[x]['blocks'][0]['offset']
  208.                     len_uncomp = self.topaz_headers[x]['blocks'][0]['len_uncomp']
  209.                     break
  210.                 else:
  211.                     return None
  212.             self.topaz_headers[x]['blocks']
  213.         
  214.         dkey = self.topaz_headers[x]
  215.         dks = StringIO.StringIO()
  216.         dks.write(self.encode_vwi(len(dkey['tag'])))
  217.         offset += 1
  218.         dks.write(dkey['tag'])
  219.         offset += len('dkey')
  220.         dks.write(chr(0))
  221.         offset += 1
  222.         dks.write(self.data[offset:offset + len_uncomp].decode('iso-8859-1'))
  223.         return dks.getvalue().encode('iso-8859-1')
  224.  
  225.     
  226.     def get_headers(self, offset):
  227.         topaz_headers = { }
  228.         th_seq = []
  229.         for x in range(self.header_records):
  230.             offset += 1
  231.             (taglen, consumed) = self.decode_vwi(self.data[offset:offset + 4])
  232.             offset += consumed
  233.             tag = self.data[offset:offset + taglen]
  234.             offset += taglen
  235.             (num_vals, consumed) = self.decode_vwi(self.data[offset:offset + 4])
  236.             offset += consumed
  237.             blocks = { }
  238.             for val in range(num_vals):
  239.                 (hdr_offset, consumed) = self.decode_vwi(self.data[offset:offset + 4])
  240.                 offset += consumed
  241.                 (len_uncomp, consumed) = self.decode_vwi(self.data[offset:offset + 4])
  242.                 offset += consumed
  243.                 (len_comp, consumed) = self.decode_vwi(self.data[offset:offset + 4])
  244.                 offset += consumed
  245.                 blocks[val] = dict(offset = hdr_offset, len_uncomp = len_uncomp, len_comp = len_comp)
  246.             
  247.             topaz_headers[tag] = dict(blocks = blocks)
  248.             th_seq.append(tag)
  249.         
  250.         self.eoth = self.data[offset]
  251.         offset += 1
  252.         self.base = offset
  253.         return (topaz_headers, th_seq)
  254.  
  255.     
  256.     def generate_metadata_stream(self):
  257.         ms = StringIO.StringIO()
  258.         ms.write(self.encode_vwi(len(self.md_header['tag'])).encode('iso-8859-1'))
  259.         ms.write(self.md_header['tag'])
  260.         ms.write(chr(self.md_header['flags']))
  261.         ms.write(chr(len(self.metadata)))
  262.         for tag in self.md_seq:
  263.             ms.write(self.encode_vwi(len(tag)).encode('iso-8859-1'))
  264.             ms.write(tag)
  265.             ms.write(self.encode_vwi(len(self.metadata[tag])).encode('iso-8859-1'))
  266.             ms.write(self.metadata[tag])
  267.         
  268.         return ms.getvalue()
  269.  
  270.     
  271.     def get_metadata(self):
  272.         self.get_original_metadata()
  273.         return MetaInformation(self.metadata['Title'], [
  274.             self.metadata['Authors']])
  275.  
  276.     
  277.     def get_original_metadata(self):
  278.         offset = self.base + self.topaz_headers['metadata']['blocks'][0]['offset']
  279.         self.md_header = { }
  280.         (taglen, consumed) = self.decode_vwi(self.data[offset:offset + 4])
  281.         offset += consumed
  282.         self.md_header['tag'] = self.data[offset:offset + taglen]
  283.         offset += taglen
  284.         self.md_header['flags'] = ord(self.data[offset])
  285.         offset += 1
  286.         self.md_header['num_recs'] = ord(self.data[offset])
  287.         offset += 1
  288.         self.metadata = { }
  289.         self.md_seq = []
  290.         for x in range(self.md_header['num_recs']):
  291.             (taglen, consumed) = self.decode_vwi(self.data[offset:offset + 4])
  292.             offset += consumed
  293.             tag = self.data[offset:offset + taglen]
  294.             offset += taglen
  295.             (md_len, consumed) = self.decode_vwi(self.data[offset:offset + 4])
  296.             offset += consumed
  297.             metadata = self.data[offset:offset + md_len]
  298.             offset += md_len
  299.             self.metadata[tag] = metadata
  300.             self.md_seq.append(tag)
  301.         
  302.  
  303.     
  304.     def regenerate_headers(self, updated_md_len):
  305.         original_md_len = self.topaz_headers['metadata']['blocks'][0]['len_uncomp']
  306.         original_md_offset = self.topaz_headers['metadata']['blocks'][0]['offset']
  307.         delta = updated_md_len - original_md_len
  308.         ths = StringIO.StringIO()
  309.         ths.write(self.data[:5])
  310.         for tag in self.th_seq:
  311.             ths.write('c')
  312.             ths.write(self.encode_vwi(len(tag)))
  313.             ths.write(tag)
  314.             if self.topaz_headers[tag]['blocks']:
  315.                 ths.write(self.encode_vwi(len(self.topaz_headers[tag]['blocks'])))
  316.                 for block in self.topaz_headers[tag]['blocks']:
  317.                     b = self.topaz_headers[tag]['blocks'][block]
  318.                     if b['offset'] <= original_md_offset:
  319.                         ths.write(self.encode_vwi(b['offset']))
  320.                     else:
  321.                         ths.write(self.encode_vwi(b['offset'] + delta))
  322.                     if tag == 'metadata':
  323.                         ths.write(self.encode_vwi(updated_md_len))
  324.                     else:
  325.                         ths.write(self.encode_vwi(b['len_uncomp']))
  326.                     ths.write(self.encode_vwi(b['len_comp']))
  327.                 
  328.             ths.write(self.encode_vwi(0))
  329.         
  330.         self.original_md_start = original_md_offset + self.base
  331.         self.original_md_len = original_md_len
  332.         return ths.getvalue().encode('iso-8859-1')
  333.  
  334.     
  335.     def update(self, mi):
  336.         self.get_original_metadata()
  337.         
  338.         try:
  339.             load_defaults = load_defaults
  340.             import calibre.ebooks.conversion.config
  341.             prefs = load_defaults('mobi_output')
  342.             pas = prefs.get('prefer_author_sort', False)
  343.         except:
  344.             pas = False
  345.  
  346.         if mi.author_sort and pas:
  347.             authors = mi.author_sort
  348.             self.metadata['Authors'] = authors.encode('utf-8')
  349.         elif mi.authors:
  350.             authors = '; '.join(mi.authors)
  351.             self.metadata['Authors'] = authors.encode('utf-8')
  352.         
  353.         self.metadata['Title'] = mi.title.encode('utf-8')
  354.         updated_metadata = self.generate_metadata_stream()
  355.         prefix = len('metadata') + 2
  356.         um_buf_len = len(updated_metadata) - prefix
  357.         head = self.regenerate_headers(um_buf_len)
  358.         chunk1 = self.data[self.base:self.original_md_start]
  359.         chunk2 = self.data[prefix + self.original_md_start + self.original_md_len:]
  360.         self.stream.seek(0)
  361.         self.stream.truncate(0)
  362.         self.stream.write(head)
  363.         self.stream.write('d')
  364.         self.stream.write(chunk1)
  365.         self.stream.write(updated_metadata)
  366.         self.stream.write(chunk2)
  367.  
  368.  
  369.  
  370. def get_metadata(stream):
  371.     mu = MetadataUpdater(stream)
  372.     return mu.get_metadata()
  373.  
  374.  
  375. def set_metadata(stream, mi):
  376.     mu = MetadataUpdater(stream)
  377.     mu.update(mi)
  378.  
  379. if __name__ == '__main__':
  380.     if False:
  381.         print get_metadata(open(sys.argv[1], 'rb'))
  382.     else:
  383.         import cStringIO
  384.         data = open(sys.argv[1], 'rb')
  385.         stream = cStringIO.StringIO()
  386.         stream.write(data.read())
  387.         mi = MetaInformation(title = 'Updated Title', authors = [
  388.             'Author, Random'])
  389.         set_metadata(stream, mi)
  390.         tokens = sys.argv[1].rpartition('.')
  391.         updated_data = open(tokens[0] + '-updated' + '.' + tokens[2], 'wb')
  392.         updated_data.write(stream.getvalue())
  393.         updated_data.close()
  394.  
  395.