home *** CD-ROM | disk | FTP | other *** search
Wrap
# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' import struct import zlib import sys import os from shutil import copyfileobj from cStringIO import StringIO import xml.dom.minidom as dom from functools import wraps from calibre.devices.prs500.prstypes import field from calibre.ebooks.metadata import MetaInformation, string_to_authors BYTE = '<B' WORD = '<H' DWORD = '<I' QWORD = '<Q' class versioned_field(field): def __init__(self, vfield, version, start = 0, fmt = WORD): field.__init__(self, start = start, fmt = fmt) self.vfield = vfield self.version = version def enabled(self): return self.vfield > self.version def __get__(self, obj, typ = None): if self.enabled(): return field.__get__(self, obj, typ = typ) return None def __set__(self, obj, val): if not self.enabled(): raise LRFException('Trying to set disabled field') self.enabled() field.__set__(self, obj, val) class LRFException(Exception): pass class fixed_stringfield(object): def __init__(self, length = 8, start = 0): self._length = length self._start = start def __get__(self, obj, typ = None): length = str(self._length) return obj.unpack(start = self._start, fmt = '<' + length + 's')[0] def __set__(self, obj, val): if val.__class__.__name__ != 'str': val = str(val) if len(val) != self._length: raise LRFException('Trying to set fixed_stringfield with a ' + 'string of incorrect length') len(val) != self._length obj.pack(val, start = self._start, fmt = '<' + str(len(val)) + 's') def __repr__(self): return 'A string of length ' + str(self._length) + ' starting at byte ' + str(self._start) class xml_attr_field(object): def __init__(self, tag_name, attr, parent = 'BookInfo'): self.tag_name = tag_name self.parent = parent self.attr = attr def __get__(self, obj, typ = None): document = obj.info elems = document.getElementsByTagName(self.tag_name) if len(elems): elem = None for candidate in elems: if candidate.parentNode.nodeName == self.parent: elem = candidate continue if elem and elem.hasAttribute(self.attr): return elem.getAttribute(self.attr) return '' def __set__(self, obj, val): if val == None: val = '' document = obj.info elems = document.getElementsByTagName(self.tag_name) if len(elems): elem = None for candidate in elems: if candidate.parentNode.nodeName == self.parent: elem = candidate continue if elem: elem.setAttribute(self.attr, val) obj.info = document def __repr__(self): return 'XML Attr Field: ' + self.tag_name + ' in ' + self.parent def __str__(self): return self.tag_name + '.' + self.attr class xml_field(object): def __init__(self, tag_name, parent = 'BookInfo'): self.tag_name = tag_name self.parent = parent def __get__(self, obj, typ = None): document = obj.info elems = document.getElementsByTagName(self.tag_name) if len(elems): elem = None for candidate in elems: if candidate.parentNode.nodeName == self.parent: elem = candidate continue if elem: elem.normalize() if elem.hasChildNodes(): return elem.firstChild.data.strip() return '' def __set__(self, obj, val): if not val: val = '' document = obj.info def create_elem(): elem = document.createElement(self.tag_name) parent = document.getElementsByTagName(self.parent)[0] parent.appendChild(elem) return elem if not val: val = u'' if type(val).__name__ != 'unicode': val = unicode(val, 'utf-8') elems = document.getElementsByTagName(self.tag_name) elem = None if len(elems): for candidate in elems: if candidate.parentNode.nodeName == self.parent: elem = candidate continue if not elem: elem = create_elem() else: elem.normalize() while elem.hasChildNodes(): elem.removeChild(elem.lastChild) else: elem = create_elem() elem.appendChild(document.createTextNode(val)) obj.info = document def __str__(self): return self.tag_name def __repr__(self): return 'XML Field: ' + self.tag_name + ' in ' + self.parent def insert_into_file(fileobj, data, start, end): buffer = StringIO() fileobj.seek(end) copyfileobj(fileobj, buffer, -1) buffer.flush() buffer.seek(0) fileobj.seek(start) fileobj.write(data) fileobj.flush() fileobj.truncate() delta = fileobj.tell() - end copyfileobj(buffer, fileobj, -1) fileobj.flush() buffer.close() return delta def get_metadata(stream): lrf = None if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream) authors = string_to_authors(lrf.author) mi = MetaInformation(lrf.title.strip(), authors) mi.author = lrf.author.strip() mi.comments = lrf.free_text.strip() mi.category = lrf.category.strip() + ', ' + lrf.classification.strip() tags = _[1] if mi.category.strip() == ',': mi.category = None mi.publisher = lrf.publisher.strip() mi.cover_data = lrf.get_cover() try: mi.title_sort = lrf.title_reading.strip() if not mi.title_sort: mi.title_sort = None except: pass try: mi.author_sort = lrf.author_reading.strip() if not mi.author_sort: mi.author_sort = None except: pass if not (mi.title) or 'unknown' in mi.title.lower(): mi.title = None if not mi.authors: mi.authors = None if not (mi.author) or 'unknown' in mi.author.lower(): mi.author = None if not (mi.category) or 'unknown' in mi.category.lower(): mi.category = None if not (mi.publisher) and 'unknown' in mi.publisher.lower() or 'some publisher' in mi.publisher.lower(): mi.publisher = None return mi class LRFMetaFile(object): LRF_HEADER = 'LRF'.encode('utf-16le') lrf_header = fixed_stringfield(length = 6, start = 0) version = field(fmt = WORD, start = 8) xor_key = field(fmt = WORD, start = 10) root_object_id = field(fmt = DWORD, start = 12) number_of_objects = field(fmt = QWORD, start = 16) object_index_offset = field(fmt = QWORD, start = 24) binding = field(fmt = BYTE, start = 36) dpi = field(fmt = WORD, start = 38) width = field(fmt = WORD, start = 42) height = field(fmt = WORD, start = 44) color_depth = field(fmt = BYTE, start = 46) toc_object_id = field(fmt = DWORD, start = 68) toc_object_offset = field(fmt = DWORD, start = 72) compressed_info_size = field(fmt = WORD, start = 76) thumbnail_type = versioned_field(version, 800, fmt = WORD, start = 78) thumbnail_size = versioned_field(version, 800, fmt = DWORD, start = 80) uncompressed_info_size = versioned_field(compressed_info_size, 0, fmt = DWORD, start = 84) title = xml_field('Title', parent = 'BookInfo') title_reading = xml_attr_field('Title', 'reading', parent = 'BookInfo') author = xml_field('Author', parent = 'BookInfo') author_reading = xml_attr_field('Author', 'reading', parent = 'BookInfo') book_id = xml_field('BookID', parent = 'BookInfo') publisher = xml_field('Publisher', parent = 'BookInfo') label = xml_field('Label', parent = 'BookInfo') category = xml_field('Category', parent = 'BookInfo') classification = xml_field('Classification', parent = 'BookInfo') free_text = xml_field('FreeText', parent = 'BookInfo') language = xml_field('Language', parent = 'DocInfo') creator = xml_field('Creator', parent = 'DocInfo') creation_date = xml_field('CreationDate', parent = 'DocInfo') producer = xml_field('Producer', parent = 'DocInfo') page = xml_field('SumPage', parent = 'DocInfo') def safe(func): def restore_pos(*args, **kwargs): obj = args[0] pos = obj._file.tell() res = func(*args, **kwargs) obj._file.seek(0, 2) if obj._file.tell() >= pos: obj._file.seek(pos) return res restore_pos = (wraps(func),)(restore_pos) return restore_pos def safe_property(func): def decorator(f): def restore_pos(*args, **kwargs): obj = args[0] pos = obj._file.tell() res = f(*args, **kwargs) obj._file.seek(0, 2) if obj._file.tell() >= pos: obj._file.seek(pos) return res return restore_pos locals_ = func() if locals_.has_key('fget'): locals_['fget'] = decorator(locals_['fget']) if locals_.has_key('fset'): locals_['fset'] = decorator(locals_['fset']) return property(**locals_) def info(): doc = '\n Document meta information as a minidom Document object.\n To set use a minidom document object.\n ' def fget(self): if self.compressed_info_size == 0: raise LRFException('This document has no meta info') self.compressed_info_size == 0 size = self.compressed_info_size - 4 self._file.seek(self.info_start) try: src = zlib.decompress(self._file.read(size)) if len(src) != self.uncompressed_info_size: raise LRFException('Decompression of document meta info yielded unexpected results') len(src) != self.uncompressed_info_size try: return dom.parseString(src) except: try: return dom.parseString(src.replace('\x00', '').strip()) src = src.replace('\x00', '').strip().decode('latin1') return dom.parseString(src.encode('utf-8')) except zlib.error: raise LRFException('Unable to decompress document meta information') def fset(self, document): info = document.toxml('utf-8') self.uncompressed_info_size = len(info) stream = zlib.compress(info) orig_size = self.compressed_info_size self.compressed_info_size = len(stream) + 4 delta = insert_into_file(self._file, stream, self.info_start, self.info_start + orig_size - 4) if self.toc_object_offset > 0: self.toc_object_offset += delta self.object_index_offset += delta self.update_object_offsets(delta) return { 'fget': fget, 'fset': fset, 'doc': doc } info = safe_property(info) def thumbnail_pos(): doc = ' The position of the thumbnail in the LRF file ' def fget(self): return self.info_start + self.compressed_info_size - 4 return { 'fget': fget, 'doc': doc } thumbnail_pos = safe_property(thumbnail_pos) def _detect_thumbnail_type(cls, slice): ttype = 20 if 'PNG' in slice: ttype = 18 if 'BM' in slice: ttype = 19 if 'JFIF' in slice: ttype = 17 return ttype _detect_thumbnail_type = classmethod(_detect_thumbnail_type) def thumbnail(): doc = '\n The thumbnail.\n Represented as a string.\n The string you would get from the file read function.\n ' def fget(self): size = self.thumbnail_size if size: self._file.seek(self.thumbnail_pos) return self._file.read(size) def fset(self, data): if self.version <= 800: raise LRFException('Cannot store thumbnails in LRF files of version <= 800') self.version <= 800 slice = data[0:16] orig_size = self.thumbnail_size self.thumbnail_size = len(data) delta = insert_into_file(self._file, data, self.thumbnail_pos, self.thumbnail_pos + orig_size) self.toc_object_offset += delta self.object_index_offset += delta self.thumbnail_type = self._detect_thumbnail_type(slice) self.update_object_offsets(delta) return { 'fget': fget, 'fset': fset, 'doc': doc } thumbnail = safe_property(thumbnail) def __init__(self, file): file.seek(0, 2) self.size = file.tell() self._file = file if self.lrf_header != LRFMetaFile.LRF_HEADER: raise LRFException(file.name + ' has an invalid LRF header. Are you sure it is an LRF file?') self.lrf_header != LRFMetaFile.LRF_HEADER self.info_start = None if self.version > 800 else 83 def update_object_offsets(self, delta): self._file.seek(self.object_index_offset) count = self.number_of_objects while count > 0: raw = self._file.read(8) new_offset = struct.unpack(DWORD, raw[4:8])[0] + delta if new_offset >= 0x100000000L or new_offset < 76: raise LRFException(_('Invalid LRF file. Could not set metadata.')) new_offset < 76 self._file.seek(-4, os.SEEK_CUR) self._file.write(struct.pack(DWORD, new_offset)) self._file.seek(8, os.SEEK_CUR) count -= 1 self._file.flush() update_object_offsets = safe(update_object_offsets) def unpack(self, fmt = DWORD, start = 0): end = start + struct.calcsize(fmt) self._file.seek(start) ret = struct.unpack(fmt, self._file.read(end - start)) return ret unpack = safe(unpack) def pack(self, *args, **kwargs): encoded = struct.pack(kwargs['fmt'], *args) self._file.seek(kwargs['start']) self._file.write(encoded) self._file.flush() pack = safe(pack) def thumbail_extension(self): ext = 'gif' ttype = self.thumbnail_type if ttype == 17: ext = 'jpeg' elif ttype == 18: ext = 'png' elif ttype == 19: ext = 'bmp' return ext def fix_thumbnail_type(self): slice = self.thumbnail[0:16] self.thumbnail_type = self._detect_thumbnail_type(slice) def seek(self, *args): return self._file.seek(*args) def tell(self): return self._file.tell() def read(self): return self._file.read() def write(self, val): self._file.write(val) def _objects(self): self._file.seek(self.object_index_offset) c = self.number_of_objects while c > 0: c -= 1 raw = self._file.read(16) pos = self._file.tell() yield struct.unpack('<IIII', raw)[:3] self._file.seek(pos) def get_objects_by_type(self, type): Tag = Tag import calibre.ebooks.lrf.tags objects = [] for id, offset, size in self._objects(): self._file.seek(offset) tag = Tag(self._file) if tag.id == 62720: (obj_id, obj_type) = struct.unpack('<IH', tag.contents) if obj_type == type: objects.append((obj_id, offset, size)) obj_type == type return objects def get_object_by_id(self, tid): Tag = Tag import calibre.ebooks.lrf.tags for id, offset, size in self._objects(): self._file.seek(offset) tag = Tag(self._file) if tag.id == 62720: (obj_id, obj_type) = struct.unpack('<IH', tag.contents) if obj_id == tid: return (obj_id, offset, size, obj_type) continue obj_id == tid return (False, False, False, False) def get_cover(self): get_object = get_object import calibre.ebooks.lrf.objects for id, offset, size in self.get_objects_by_type(12): image = get_object(None, self._file, id, offset, size, self.xor_key) (id, offset, size) = self.get_object_by_id(image.refstream)[:3] image_stream = get_object(None, self._file, id, offset, size, self.xor_key) return (image_stream.file.rpartition('.')[-1], image_stream.stream) get_cover = safe(get_cover) def option_parser(): OptionParser = OptionParser import calibre.utils.config __appname__ = __appname__ __version__ = __version__ import calibre.constants parser = OptionParser(usage = _('%prog [options] mybook.lrf\n\n\nShow/edit the metadata in an LRF file.\n\n'), version = __appname__ + ' ' + __version__, epilog = 'Created by Kovid Goyal') parser.add_option('-t', '--title', action = 'store', type = 'string', dest = 'title', help = _('Set the book title')) parser.add_option('--title-sort', action = 'store', type = 'string', default = None, dest = 'title_reading', help = _('Set sort key for the title')) parser.add_option('-a', '--author', action = 'store', type = 'string', dest = 'author', help = _('Set the author')) parser.add_option('--author-sort', action = 'store', type = 'string', default = None, dest = 'author_reading', help = _('Set sort key for the author')) parser.add_option('-c', '--category', action = 'store', type = 'string', dest = 'category', help = _('The category this book belongs to. E.g.: History')) parser.add_option('--thumbnail', action = 'store', type = 'string', dest = 'thumbnail', help = _("Path to a graphic that will be set as this files' thumbnail")) parser.add_option('--comment', action = 'store', type = 'string', dest = 'comment', help = _('Path to a txt file containing the comment to be stored in the lrf file.')) parser.add_option('--get-thumbnail', action = 'store_true', dest = 'get_thumbnail', default = False, help = _('Extract thumbnail from LRF file')) parser.add_option('--publisher', default = None, help = _('Set the publisher')) parser.add_option('--classification', default = None, help = _('Set the book classification')) parser.add_option('--creator', default = None, help = _('Set the book creator')) parser.add_option('--producer', default = None, help = _('Set the book producer')) parser.add_option('--get-cover', action = 'store_true', default = False, help = _('Extract cover from LRF file. Note that the LRF format has no defined cover, so we use some heuristics to guess the cover.')) parser.add_option('--bookid', action = 'store', type = 'string', default = None, dest = 'book_id', help = _('Set book ID')) return parser def set_metadata(stream, mi): lrf = LRFMetaFile(stream) if mi.title: lrf.title = mi.title if mi.authors: lrf.author = ', '.join(mi.authors) if mi.tags: lrf.category = mi.tags[0] if getattr(mi, 'category', False): lrf.category = mi.category if mi.comments: lrf.free_text = mi.comments if mi.author_sort: lrf.author_reading = mi.author_sort if mi.publisher: lrf.publisher = mi.publisher def main(args = sys.argv): parser = option_parser() (options, args) = parser.parse_args(args) if len(args) != 2: parser.print_help() print print 'No lrf file specified' return 1 lrf = LRFMetaFile(open(args[1], 'r+b')) if options.title: lrf.title = options.title if options.title_reading != None: lrf.title_reading = options.title_reading if options.author_reading != None: lrf.author_reading = options.author_reading if options.author: lrf.author = options.author if options.publisher: lrf.publisher = options.publisher if options.classification: lrf.classification = options.classification if options.category: lrf.category = options.category if options.creator: lrf.creator = options.creator if options.producer: lrf.producer = options.producer if options.thumbnail: path = os.path.expanduser(os.path.expandvars(options.thumbnail)) f = open(path, 'rb') lrf.thumbnail = f.read() f.close() if options.book_id is not None: lrf.book_id = options.book_id if options.comment: path = os.path.expanduser(os.path.expandvars(options.comment)) lrf.free_text = open(path).read() if options.get_thumbnail: t = lrf.thumbnail td = 'None' if t and len(t) > 0: td = os.path.basename(args[1]) + '_thumbnail.' + lrf.thumbail_extension() f = open(td, 'w') f.write(t) f.close() fields = LRFMetaFile.__dict__.items() fields.sort() for f in fields: if 'XML' in str(f): print str(f[1]) + ':', lrf.__getattribute__(f[0]).encode('utf-8') continue if options.get_thumbnail: print 'Thumbnail:', td if options.get_cover: try: (ext, data) = lrf.get_cover() except: (ext, data) = (None, None) if data: cover = os.path.splitext(os.path.basename(args[1]))[0] + '_cover.' + ext open(cover, 'wb').write(data) print 'Cover:', cover else: print 'Could not find cover in the LRF file' if __name__ == '__main__': sys.exit(main())