Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_928 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-08-06 | 54.8 KB | 1,804 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' import struct import array import zlib import cStringIO import collections import re from calibre.ebooks.lrf import LRFParseError, PRS500_PROFILE from calibre import entity_to_unicode, prepare_string_for_xml from calibre.ebooks.lrf.tags import Tag ruby_tags = { 62837: [ 'rubyAlignAndAdjust', 'W'], 62838: [ 'rubyoverhang', 'W', { 0: 'none', 1: 'auto' }], 62839: [ 'empdotsposition', 'W', { 1: 'before', 2: 'after' }], 62840: [ '', 'parse_empdots'], 62841: [ 'emplineposition', 'W', { 1: 'before', 2: 'after' }], 62842: [ 'emplinetype', 'W', { 0: 'none', 16: 'solid', 32: 'dashed', 48: 'double', 64: 'dotted' }] } class LRFObject(object): tag_map = { 62720: [ '', ''], 62722: [ 'infoLink', 'D'], 62721: [ '', ''] } def descramble_buffer(cls, buf, l, xorKey): i = 0 a = array.array('B', buf) while l > 0: a[i] ^= xorKey i += 1 l -= 1 return a.tostring() descramble_buffer = classmethod(descramble_buffer) def parse_empdots(self, tag, f): (self.refEmpDotsFont, self.empDotsFontName, self.empDotsCode) = tag.contents parse_empdots = classmethod(parse_empdots) def tag_to_val(h, obj, tag, stream): val = None if h[1] == 'D': val = tag.dword elif h[1] == 'W': val = tag.word elif h[1] == 'w': val = tag.word if val > 32768: val -= 65536 elif h[1] == 'B': val = tag.byte elif h[1] == 'P': val = tag.contents elif h[1] != '': val = getattr(obj, h[1])(tag, stream) if len(h) > 2: val = None if callable(h[2]) else h[2][val] return val tag_to_val = staticmethod(tag_to_val) def __init__(self, document, stream, id, scramble_key, boundary): self._scramble_key = scramble_key self._document = document self.id = id while stream.tell() < boundary: tag = Tag(stream) self.handle_tag(tag, stream) def parse_bg_image(self, tag, f): (self.bg_image_mode, self.bg_image_id) = struct.unpack('<HI', tag.contents) def handle_tag(self, tag, stream, tag_map = None): if tag_map is None: tag_map = self.__class__.tag_map if tag.id in tag_map: h = tag_map[tag.id] val = LRFObject.tag_to_val(h, self, tag, stream) if h[1] != '' and h[0] != '': setattr(self, h[0], val) else: raise LRFParseError('Unknown tag in %s: %s' % (self.__class__.__name__, str(tag))) return tag.id in tag_map def __iter__(self): for i in range(0): yield i def __unicode__(self): return unicode(self.__class__.__name__) def __str__(self): return unicode(self).encode('utf-8') class LRFContentObject(LRFObject): tag_map = { } def __init__(self, bytes, objects): self.stream = None if hasattr(bytes, 'read') else cStringIO.StringIO(bytes) length = self.stream_size() self.objects = objects self._contents = [] self.current = 0 self.in_container = True self.parse_stream(length) def parse_stream(self, length): while self.in_container and self.stream.tell() < length: tag = Tag(self.stream) self.handle_tag(tag) def stream_size(self): pos = self.stream.tell() self.stream.seek(0, 2) size = self.stream.tell() self.stream.seek(pos) return size def handle_tag(self, tag): if tag.id in self.tag_map: action = self.tag_map[tag.id] if isinstance(action, basestring): func = action args = tuple([]) else: func = action[0] args = (action[1],) getattr(self, func)(tag, *args) else: raise LRFParseError('Unknown tag in %s: %s' % (self.__class__.__name__, str(tag))) return tag.id in self.tag_map def __iter__(self): for i in self._contents: yield i class LRFStream(LRFObject): tag_map = { 62724: [ '', 'read_stream_size'], 62804: [ 'stream_flags', 'W'], 62725: [ '', 'read_stream'], 62726: [ '', 'end_stream'] } tag_map.update(LRFObject.tag_map) def __init__(self, document, stream, id, scramble_key, boundary): self.stream = '' self.stream_size = 0 self.stream_read = False LRFObject.__init__(self, document, stream, id, scramble_key, boundary) def read_stream_size(self, tag, stream): self.stream_size = tag.dword def end_stream(self, tag, stream): self.stream_read = True def read_stream(self, tag, stream): if self.stream_read: raise LRFParseError('There can be only one stream per object') self.stream_read if not hasattr(self, 'stream_flags'): raise LRFParseError('Stream flags not initialized') hasattr(self, 'stream_flags') self.stream = stream.read(self.stream_size) if self.stream_flags & 512 != 0: l = len(self.stream) key = self._scramble_key & 255 if key != 0 and key <= 240: key = l % key + 15 else: key = 0 if l > 1024: if isinstance(self, ImageStream) and isinstance(self, Font) or isinstance(self, SoundStream): l = 1024 self.stream = self.descramble_buffer(self.stream, l, key) if self.stream_flags & 256 != 0: decomp_size = struct.unpack('<I', self.stream[:4])[0] self.stream = zlib.decompress(self.stream[4:]) if len(self.stream) != decomp_size: raise LRFParseError('Stream decompressed size is wrong!') len(self.stream) != decomp_size if stream.read(2) != '\x06\xf5': print 'Warning: corrupted end-of-stream tag at %08X; skipping it' % (stream.tell() - 2) self.end_stream(None, None) class PageTree(LRFObject): tag_map = { 62812: [ '_contents', 'P'] } tag_map.update(LRFObject.tag_map) def __iter__(self): for id in getattr(self, '_contents', []): yield self._document.objects[id] class StyleObject(object): def _tags_to_xml(self): s = u'' for h in self.tag_map.values(): attr = h[0] if hasattr(self, attr): s += u'%s="%s" ' % (attr, getattr(self, attr)) continue return s def __unicode__(self): s = u'<%s objid="%s" stylelabel="%s" ' % (self.__class__.__name__.replace('Attr', 'Style'), self.id, self.id) s += self._tags_to_xml() s += u'/>\n' return s def as_dict(self): d = { } for h in self.tag_map.values(): attr = h[0] if hasattr(self, attr): d[attr] = getattr(self, attr) continue return d class PageAttr(StyleObject, LRFObject): tag_map = { 62727: [ 'oddheaderid', 'D'], 62728: [ 'evenheaderid', 'D'], 62729: [ 'oddfooterid', 'D'], 62730: [ 'evenfooterid', 'D'], 62753: [ 'topmargin', 'W'], 62754: [ 'headheight', 'W'], 62755: [ 'headsep', 'W'], 62756: [ 'oddsidemargin', 'W'], 62764: [ 'evensidemargin', 'W'], 62757: [ 'textheight', 'W'], 62758: [ 'textwidth', 'W'], 62759: [ 'footspace', 'W'], 62760: [ 'footheight', 'W'], 62773: [ 'layout', 'W', { 65: 'TbRl', 52: 'LrTb' }], 62763: [ 'pageposition', 'W', { 0: 'any', 1: 'upper', 2: 'lower' }], 62762: [ 'setemptyview', 'W', { 1: 'show', 0: 'empty' }], 62938: [ 'setwaitprop', 'W', { 1: 'replay', 2: 'noreplay' }], 62761: [ '', 'parse_bg_image'] } tag_map.update(LRFObject.tag_map) def to_css(cls, obj, inline = False): return '' to_css = classmethod(to_css) class Color(object): def __init__(self, val): (self.a, self.r, self.g, self.b) = (val & 255, val >> 8 & 255, val >> 16 & 255, val >> 24 & 255) def __unicode__(self): return u'0x%02x%02x%02x%02x' % (self.a, self.r, self.g, self.b) def __str__(self): return unicode(self) def __len__(self): return 4 def __getitem__(self, i): return (self.r, self.g, self.b, 255 - self.a)[i] def to_html(self): return 'rgb(%d, %d, %d)' % (self.r, self.g, self.b) class EmptyPageElement(object): def __iter__(self): for i in range(0): yield i def __str__(self): return unicode(self) class PageDiv(EmptyPageElement): def __init__(self, pain, spacesize, linewidth, linecolor): self.pain = pain self.spacesize = spacesize self.linewidth = linewidth self.linecolor = Color(linecolor) def __unicode__(self): return u'\n<PageDiv pain="%s" spacesize="%s" linewidth="%s" linecolor="%s" />\n' % (self.pain, self.spacesize, self.linewidth, self.color) class RuledLine(EmptyPageElement): linetype_map = { 0: 'none', 16: 'solid', 32: 'dashed', 48: 'double', 64: 'dotted', 19: 'unknown13' } def __init__(self, linelength, linetype, linewidth, linecolor): self.linelength = linelength self.linewidth = linewidth self.linetype = self.linetype_map[linetype] self.linecolor = Color(linecolor) self.id = -1 def __unicode__(self): return u'\n<RuledLine linelength="%s" linetype="%s" linewidth="%s" linecolor="%s" />\n' % (self.linelength, self.linetype, self.linewidth, self.linecolor) class Wait(EmptyPageElement): def __init__(self, time): self.time = time def __unicode__(self): return u'\n<Wait time="%d" />\n' % self.time class Locate(EmptyPageElement): pos_map = { 1: 'bottomleft', 2: 'bottomright', 3: 'topright', 4: 'topleft', 5: 'base' } def __init__(self, pos): self.pos = self.pos_map[pos] def __unicode__(self): return u'\n<Locate pos="%s" />\n' % self.pos class BlockSpace(EmptyPageElement): def __init__(self, xspace, yspace): self.xspace = xspace self.yspace = yspace def __unicode__(self): return u'\n<BlockSpace xspace="%d" yspace="%d" />\n' % (self.xspace, self.yspace) class Page(LRFStream): tag_map = { 62723: [ 'style_id', 'D'], 62731: [ 'obj_list', 'P'], 62833: [ '', ''], 62844: [ 'parent_page_tree', 'D'] } tag_map.update(PageAttr.tag_map) tag_map.update(LRFStream.tag_map) style = property(fget = (lambda self: self._document.objects[self.style_id])) evenheader = property(fget = (lambda self: self._document.objects[self.style.evenheaderid])) evenfooter = property(fget = (lambda self: self._document.objects[self.style.evenfooterid])) oddheader = property(fget = (lambda self: self._document.objects[self.style.oddheaderid])) oddfooter = property(fget = (lambda self: self._document.objects[self.style.oddfooterid])) class Content(LRFContentObject): tag_map = { 62723: 'link', 62798: 'page_div', 62791: 'x_space', 62790: 'y_space', 62792: 'do_pos', 62835: 'ruled_line', 62932: 'wait', 62934: 'sound_stop' } def __init__(self, bytes, objects): self.in_blockspace = False LRFContentObject.__init__(self, bytes, objects) def link(self, tag): self.close_blockspace() self._contents.append(self.objects[tag.dword]) def page_div(self, tag): self.close_blockspace() pars = struct.unpack('<HIHI', tag.contents) self._contents.append(PageDiv(*pars)) def x_space(self, tag): self.xspace = tag.word self.in_blockspace = True def y_space(self, tag): self.yspace = tag.word self.in_blockspace = True def do_pos(self, tag): self.pos = tag.wordself.pos_map[tag.word] self.in_blockspace = True def ruled_line(self, tag): self.close_blockspace() pars = struct.unpack('<HHHI', tag.contents) self._contents.append(RuledLine(*pars)) def wait(self, tag): self.close_blockspace() self._contents.append(Wait(tag.word)) def sound_stop(self, tag): self.close_blockspace() def close_blockspace(self): if self.in_blockspace: if hasattr(self, 'pos'): self._contents.append(Locate(self.pos)) delattr(self, 'pos') elif hasattr(self, 'xspace'): pass xspace = 0 yspace = None if hasattr(self, 'yspace') else 0 self._contents.append(BlockSpace(xspace, yspace)) if hasattr(self, 'xspace'): delattr(self, 'xspace') if hasattr(self, 'yspace'): delattr(self, 'yspace') def header(self, odd): id = None if odd else self._document.objects[self.style_id].evenheaderid return self._document.objects[id] def footer(self, odd): id = None if odd else self._document.objects[self.style_id].evenfooterid return self._document.objects[id] def initialize(self): self.content = Page.Content(self.stream, self._document.objects) def __iter__(self): for i in self.content: yield i def __unicode__(self): s = u'\n<Page pagestyle="%d" objid="%d">\n' % (self.style_id, self.id) for i in self: s += unicode(i) s += '\n</Page>\n' return s def __str__(self): return unicode(self) def to_html(self): s = u'' for i in self: s += i.to_html() return s class BlockAttr(StyleObject, LRFObject): tag_map = { 62769: [ 'blockwidth', 'W'], 62770: [ 'blockheight', 'W'], 62771: [ 'blockrule', 'W', { 20: 'horz-fixed', 18: 'horz-adjustable', 65: 'vert-fixed', 33: 'vert-adjustable', 68: 'block-fixed', 34: 'block-adjustable' }], 62772: [ 'bgcolor', 'D', Color], 62773: [ 'layout', 'W', { 65: 'TbRl', 52: 'LrTb' }], 62774: [ 'framewidth', 'W'], 62775: [ 'framecolor', 'D', Color], 62766: [ 'framemode', 'W', { 0: 'none', 2: 'curve', 1: 'square' }], 62776: [ 'topskip', 'W'], 62777: [ 'sidemargin', 'W'], 62778: [ 'footskip', 'W'], 62761: [ '', 'parse_bg_image'] } tag_map.update(LRFObject.tag_map) def to_css(cls, obj, inline = False): ans = '' def item(line): None += ans if inline else '\t' ans += line None += ans if inline else '\n' if hasattr(obj, 'sidemargin'): margin = str(obj.sidemargin) + 'px' item('margin-left: %(m)s; margin-right: %(m)s;' % dict(m = margin)) if hasattr(obj, 'topskip'): item('margin-top: %dpx;' % obj.topskip) if hasattr(obj, 'footskip'): item('margin-bottom: %dpx;' % obj.footskip) if hasattr(obj, 'framewidth'): item('border: solid %dpx' % obj.framewidth) if hasattr(obj, 'framecolor') and obj.framecolor.a < 255: item('border-color: %s;' % obj.framecolor.to_html()) if hasattr(obj, 'bgcolor') and obj.bgcolor.a < 255: item('background-color: %s;' % obj.bgcolor.to_html()) return ans to_css = classmethod(to_css) class TextCSS(object): def to_css(cls, obj, inline = False): ans = '' def item(line): None += ans if inline else '\t' ans += line None += ans if inline else '\n' fs = getattr(obj, 'fontsize', None) if fs is not None: item('font-size: %fpt;' % int(fs) / 10) fw = getattr(obj, 'fontweight', None) if fw is not None: None(item % 'font-weight: %s;' if int(fw) >= 700 else 'normal') fn = getattr(obj, 'fontfacename', None) if fn is not None: fn = cls.FONT_MAP[fn] item('font-family: %s;' % fn) fg = getattr(obj, 'textcolor', None) if fg is not None: fg = fg.to_html() item('color: %s;' % fg) bg = getattr(obj, 'textbgcolor', None) if bg is not None: bg = bg.to_html() item('background-color: %s;' % bg) al = getattr(obj, 'align', None) if al is not None: al = dict(head = 'left', center = 'center', foot = 'right') item('text-align: %s;' % al) lh = getattr(obj, 'linespace', None) if lh is not None: item('text-align: %fpt;' % int(lh) / 10) pi = getattr(obj, 'parindent', None) if pi is not None: item('text-indent: %fpt;' % int(pi) / 10) return ans to_css = classmethod(to_css) class TextAttr(StyleObject, LRFObject, TextCSS): FONT_MAP = collections.defaultdict((lambda : 'serif')) for key, value in PRS500_PROFILE.default_fonts.items(): FONT_MAP[value] = key tag_map = { 62737: [ 'fontsize', 'w'], 62738: [ 'fontwidth', 'w'], 62739: [ 'fontescapement', 'w'], 62740: [ 'fontorientation', 'w'], 62741: [ 'fontweight', 'W'], 62742: [ 'fontfacename', 'P'], 62743: [ 'textcolor', 'D', Color], 62744: [ 'textbgcolor', 'D', Color], 62745: [ 'wordspace', 'w'], 62746: [ 'letterspace', 'w'], 62747: [ 'baselineskip', 'w'], 62748: [ 'linespace', 'w'], 62749: [ 'parindent', 'w'], 62750: [ 'parskip', 'w'], 62780: [ 'align', 'W', { 1: 'head', 4: 'center', 8: 'foot' }], 62781: [ 'column', 'W'], 62782: [ 'columnsep', 'W'], 62941: [ 'charspace', 'w'], 62961: [ 'textlinewidth', 'W'], 62962: [ 'linecolor', 'D', Color] } tag_map.update(ruby_tags) tag_map.update(LRFObject.tag_map) class Block(LRFStream, TextCSS): tag_map = { 62723: [ 'style_id', 'D'] } tag_map.update(BlockAttr.tag_map) tag_map.update(TextAttr.tag_map) tag_map.update(LRFStream.tag_map) extra_attrs = [ i[0] for i in BlockAttr.tag_map.values() ] []([ i[0] for i in TextAttr.tag_map.values() ]) style = property(fget = (lambda self: self._document.objects[self.style_id])) textstyle = property(fget = (lambda self: self._document.objects[self.textstyle_id])) def initialize(self): self.attrs = { } stream = cStringIO.StringIO(self.stream) tag = Tag(stream) if tag.id != 62723: raise LRFParseError('Bad block content') tag.id != 62723 obj = self._document.objects[tag.dword] if isinstance(obj, SimpleText): self.name = 'SimpleTextBlock' self.textstyle_id = obj.style_id elif isinstance(obj, Text): self.name = 'TextBlock' self.textstyle_id = obj.style_id elif isinstance(obj, Image): self.name = 'ImageBlock' for attr in ('x0', 'x1', 'y0', 'y1', 'xsize', 'ysize', 'refstream'): self.attrs[attr] = getattr(obj, attr) self.refstream = self._document.objects[self.attrs['refstream']] elif isinstance(obj, Button): self.name = 'ButtonBlock' else: raise LRFParseError('Unexpected block type: ' + obj.__class__.__name__) self.content = isinstance(obj, SimpleText) for attr in self.extra_attrs: if hasattr(self, attr): self.attrs[attr] = getattr(self, attr) continue def __unicode__(self): s = u'\n<%s objid="%d" blockstyle="%d" ' % (self.name, self.id, self.style_id) if hasattr(self, 'textstyle_id'): s += 'textstyle="%d" ' % (self.textstyle_id,) for attr in self.attrs: s += '%s="%s" ' % (attr, self.attrs[attr]) if self.name != 'ImageBlock': s = s.rstrip() + '>\n' s += unicode(self.content) s += '</%s>\n' % (self.name,) return s return s.rstrip() + ' />\n' def to_html(self): if self.name == 'TextBlock': return u'<div class="block%s text%s">%s</div>' % (self.style_id, self.textstyle_id, self.content.to_html()) return u'' class MiniPage(LRFStream): tag_map = { 62785: [ 'minipagewidth', 'W'], 62786: [ 'minipageheight', 'W'] } tag_map.update(LRFStream.tag_map) tag_map.update(BlockAttr.tag_map) class Text(LRFStream): tag_map = { 62723: [ 'style_id', 'D'] } tag_map.update(TextAttr.tag_map) tag_map.update(LRFStream.tag_map) style = property(fget = (lambda self: self._document.objects[self.style_id])) text_map = { 34: u'"', 38: u'&', 39: u"'", 60: u'<', 62: u'>' } entity_pattern = re.compile('&(\\S+?);') text_tags = { 62849: [ 'simple_container', 'Italic'], 62850: 'end_container', 62897: [ 'simple_container', 'Yoko'], 62898: 'end_container', 62899: [ 'simple_container', 'Tate'], 62900: 'end_container', 62901: [ 'simple_container', 'Nekase'], 62902: 'end_container', 62881: 'start_para', 62882: 'end_para', 62887: 'char_button', 62888: 'end_container', 62889: [ 'simple_container', 'Rubi'], 62890: 'end_container', 62891: [ 'simple_container', 'Oyamoji'], 62892: 'end_container', 62893: [ 'simple_container', 'Rubimoji'], 62894: 'end_container', 62903: [ 'simple_container', 'Sup'], 62904: 'end_container', 62905: [ 'simple_container', 'Sub'], 62906: 'end_container', 62907: [ 'simple_container', 'NoBR'], 62908: 'end_container', 62909: [ 'simple_container', 'EmpDots'], 62910: 'end_container', 62913: 'empline', 62914: 'end_container', 62915: 'draw_char', 62916: 'end_container', 62918: 'box', 62919: 'end_container', 62922: 'space', 62929: 'plot', 62930: 'cr' } class TextTag(object): def __init__(self, name, attrs = { }, self_closing = False): self.name = name self.attrs = attrs self.self_closing = self_closing def __unicode__(self): s = u'<%s ' % (self.name,) for name, val in self.attrs.items(): s += '%s="%s" ' % (name, val) return None + s.rstrip() if self.self_closing else u'>' def to_html(self): s = u'' return s def close_html(self): return u'' class Span(TextTag): pass linetype_map = { 0: 'none', 16: 'solid', 32: 'dashed', 48: 'double', 64: 'dotted' } adjustment_map = { 1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom' } lineposition_map = { 1: 'before', 2: 'after' } def add_text(self, text): s = unicode(text, 'utf-16-le') if s: s = s.translate(self.text_map) self.content.append(self.entity_pattern.sub(entity_to_unicode, s)) def end_container(self, tag, stream): self.content.append(None) def start_para(self, tag, stream): self.content.append(self.__class__.TextTag('P')) def close_containers(self, start = 0): if len(self.content) == 0: return None open_containers = 0 if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span): self.content.pop() while start < len(self.content): c = self.content[start] if c is None: open_containers -= 1 elif isinstance(c, self.__class__.TextTag) and not (c.self_closing): open_containers += 1 start += 1 self.content.extend((lambda .0: for i in .0: None)(range(open_containers))) def end_para(self, tag, stream): i = len(self.content) - 1 while i > -1: if isinstance(self.content[i], Text.TextTag) and self.content[i].name == 'P': break i -= 1 self.close_containers(start = i) def cr(self, tag, stream): self.content.append(self.__class__.TextTag('CR', self_closing = True)) def char_button(self, tag, stream): self.content.append(self.__class__.TextTag('CharButton', attrs = { 'refobj': tag.dword })) def simple_container(self, tag, name): self.content.append(self.__class__.TextTag(name)) def empline(self, tag, stream): def invalid(op): stream.seek(op) oldpos = stream.tell() try: t = Tag(stream) if t.id not in (62841, 62842): raise LRFParseError t.id not in (62841, 62842) except LRFParseError: (None,) (None,) invalid(oldpos) return None h = TextAttr.tag_map[t.id] attrs = { } attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) oldpos = stream.tell() try: t = Tag(stream) if t.id not in (62841, 62842): raise LRFParseError t.id not in (62841, 62842) h = TextAttr.tag_map[t.id] attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) except LRFParseError: (None,) (None,) stream.seek(oldpos) except: (None,) if attrs: self.content.append(self.__class__.TextTag('EmpLine', attrs = attrs)) def space(self, tag, stream): self.content.append(self.__class__.TextTag('Space', attrs = { 'xsize': tag.sword }, self_closing = True)) def plot(self, tag, stream): (xsize, ysize, refobj, adjustment) = struct.unpack('<HHII', tag.contents) plot = self.__class__.TextTag('Plot', { 'xsize': xsize, 'ysize': ysize, 'refobj': refobj, 'adjustment': self.adjustment_map[adjustment] }, self_closing = True) plot.refobj = self._document.objects[refobj] self.content.append(plot) def draw_char(self, tag, stream): self.content.append(self.__class__.TextTag('DrawChar', { 'line': tag.word })) def box(self, tag, stream): self.content.append(self.__class__.TextTag('Box', { 'linetype': self.linetype_map[tag.word] })) def initialize(self): self.content = collections.deque() stream = cStringIO.StringIO(self.stream) length = len(self.stream) style = self.style.as_dict() current_style = style.copy() text_tags = set(list(TextAttr.tag_map.keys()) + list(Text.text_tags.keys()) + list(ruby_tags.keys())) [] -= []([ 62720 + i for i in range(10) ]) text_tags.add(62924) while stream.tell() < length: def find_first_tag(start): pos = self.stream.find('\xf5', start) if pos == -1: return -1 try: stream.seek(pos - 1) _t = Tag(stream) if _t.id in text_tags: return pos - 1 return find_first_tag(pos + 1) except: pos == -1 return find_first_tag(pos + 1) start_pos = stream.tell() tag_pos = find_first_tag(start_pos) tag = Tag(stream) if tag.id == 62924: self.add_text(stream.read(tag.word)) continue None if tag_pos >= start_pos else (None, None, text_tags, set) if tag.id in self.__class__.text_tags: action = self.__class__.text_tags[tag.id] if isinstance(action, basestring): getattr(self, action)(tag, stream) else: getattr(self, action[0])(tag, action[1]) isinstance(action, basestring) if tag.id in TextAttr.tag_map: action = TextAttr.tag_map[tag.id] if len(self.content) == 0: current_style = style.copy() name = action[0] val = LRFObject.tag_to_val(action, self, tag, None) if name and current_style[name] != val: if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span): self.content[-1].attrs[name] = val else: self.content.append(self.__class__.Span('Span', { name: val })) current_style[name] = val current_style[name] != val if len(self.content) > 0: self.close_containers() self.stream = None def __unicode__(self): s = u'' open_containers = collections.deque() for c in self.content: if isinstance(c, basestring): s += prepare_string_for_xml(c).replace('\x00', '') continue if c is None: if open_containers: p = open_containers.pop() s += u'</%s>' % (p.name,) open_containers s += unicode(c) if not c.self_closing: open_containers.append(c) continue if len(open_containers) > 0: if len(open_containers) == 1: s += u'</%s>' % (open_containers[0].name,) else: raise []([] % (_[1],)) len(open_containers) == 1 return s def to_html(self): s = u'' open_containers = collections.deque() in_p = False for c in self.content: if isinstance(c, basestring): s += c continue if c is None: if c.name == 'P': in_p = False p = open_containers.pop() s += p.close_html() continue if c.name == 'P': in_p = True continue if c.name == 'CR': None += s if in_p else '<p>' continue s += c.to_html() if not c.self_closing: open_containers.append(c) continue if len(open_containers) > 0: raise []([] % (_[1],)) len(open_containers) > 0 return s class Image(LRFObject): tag_map = { 62794: [ '', 'parse_image_rect'], 62795: [ '', 'parse_image_size'], 62796: [ 'refstream', 'D'], 62805: [ 'comment', 'P'] } def parse_image_rect(self, tag, f): (self.x0, self.y0, self.x1, self.y1) = struct.unpack('<HHHH', tag.contents) def parse_image_size(self, tag, f): (self.xsize, self.ysize) = struct.unpack('<HH', tag.contents) encoding = property(fget = (lambda self: self._document.objects[self.refstream].encoding)) data = property(fget = (lambda self: self._document.objects[self.refstream].stream)) def __unicode__(self): return u'<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n' % (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream) class PutObj(EmptyPageElement): def __init__(self, objects, x1, y1, refobj): self.x1 = x1 self.y1 = y1 self.refobj = refobj self.object = objects[refobj] def __unicode__(self): return u'<PutObj x1="%d" y1="%d" refobj="%d" />' % (self.x1, self.y1, self.refobj) class Canvas(LRFStream): tag_map = { 62801: [ 'canvaswidth', 'W'], 62802: [ 'canvasheight', 'W'], 62938: [ '', 'parse_waits'], 62771: [ 'blockrule', 'W', { 68: 'block-fixed', 34: 'block-adjustable' }], 62772: [ 'bgcolor', 'D', Color], 62773: [ 'layout', 'W', { 65: 'TbRl', 52: 'LrTb' }], 62774: [ 'framewidth', 'W'], 62775: [ 'framecolor', 'D', Color], 62766: [ 'framemode', 'W', { 0: 'none', 2: 'curve', 1: 'square' }] } tag_map.update(LRFStream.tag_map) extra_attrs = [ 'canvaswidth', 'canvasheight', 'blockrule', 'layout', 'framewidth', 'framecolor', 'framemode'] def parse_waits(self, tag, f): val = tag.word self.setwaitprop = val & 15 self.setwaitsync = val & 240 def initialize(self): self.attrs = { } for attr in self.extra_attrs: if hasattr(self, attr): self.attrs[attr] = getattr(self, attr) continue self._contents = [] stream = cStringIO.StringIO(self.stream) while stream.tell() < len(self.stream): tag = Tag(stream) try: self._contents.append(PutObj(self._document.objects, *struct.unpack('<HHI', tag.contents))) continue except struct.error: print 'Canvas object has errors, skipping.' continue None<EXCEPTION MATCH>struct.error def __unicode__(self): s = '\n<%s objid="%s" ' % (self.__class__.__name__, self.id) for attr in self.attrs: s += '%s="%s" ' % (attr, self.attrs[attr]) s = s.rstrip() + '>\n' for po in self: s += unicode(po) + '\n' s += '</%s>\n' % (self.__class__.__name__,) return s def __iter__(self): for i in self._contents: yield i class Header(Canvas): pass class Footer(Canvas): pass class ESound(LRFObject): pass class ImageStream(LRFStream): tag_map = { 62805: [ 'comment', 'P'] } imgext = { 17: 'jpeg', 18: 'png', 19: 'bmp', 20: 'gif' } tag_map.update(LRFStream.tag_map) encoding = property(fget = (lambda self: self.imgext[self.stream_flags & 255].upper())) def end_stream(self, *args): LRFStream.end_stream(self, *args) self.file = str(self.id) + '.' + self.encoding.lower() if self._document is not None: self._document.image_map[self.id] = self def __unicode__(self): return u'<ImageStream objid="%s" encoding="%s" file="%s" />\n' % (self.id, self.encoding, self.file) class Import(LRFStream): pass class Button(LRFObject): tag_map = { 62723: [ '', 'do_ref_image'], 62817: [ 'button_flags', 'W'], 62818: [ '', 'do_base_button'], 62819: [ '', ''], 62820: [ '', 'do_focus_in_button'], 62821: [ '', ''], 62822: [ '', 'do_push_button'], 62823: [ '', ''], 62824: [ '', 'do_up_button'], 62825: [ '', ''], 62826: [ '', 'do_start_actions'], 62827: [ '', ''], 62828: [ '', 'parse_jump_to'], 62829: [ '', 'parse_send_message'], 62830: [ '', 'parse_close_window'], 62934: [ '', 'parse_sound_stop'], 62969: [ '', 'parse_run'] } tag_map.update(LRFObject.tag_map) def __init__(self, document, stream, id, scramble_key, boundary): self.xml = u'' self.refimage = { } self.actions = { } self.to_dump = True LRFObject.__init__(self, document, stream, id, scramble_key, boundary) def do_ref_image(self, tag, f): self.refimage[self.button_type] = tag.dword def do_base_button(self, tag, f): self.button_type = 0 self.actions[self.button_type] = [] def do_focus_in_button(self, tag, f): self.button_type = 1 def do_push_button(self, tag, f): self.button_type = 2 def do_up_button(self, tag, f): self.button_type = 3 def do_start_actions(self, tag, f): self.actions[self.button_type] = [] def parse_jump_to(self, tag, f): self.actions[self.button_type].append((1, struct.unpack('<II', tag.contents))) def parse_send_message(self, tag, f): params = (tag.word, Tag.string_parser(f), Tag.string_parser(f)) self.actions[self.button_type].append((2, params)) def parse_close_window(self, tag, f): self.actions[self.button_type].append((3,)) def parse_sound_stop(self, tag, f): self.actions[self.button_type].append((4,)) def parse_run(self, tag, f): self.actions[self.button_type].append((5, struct.unpack('<HI', tag.contents))) def jump_action(self, button_type): for i in self.actions[button_type]: if i[0] == 1: return i[1:][0] return (None, None) def __unicode__(self): s = u'<Button objid="%s">\n' % (self.id,) if self.button_flags & 16 != 0: s += '<PushButton ' if 2 in self.refimage: s += 'refimage="%s" ' % (self.refimage[2],) s = s.rstrip() + '>\n' s += '<JumpTo refpage="%s" refobj="%s" />\n' % self.jump_action(2) s += '</PushButton>\n' else: raise LRFParseError('Unsupported button type') (self.button_flags & 16 != 0) += '</Button>\n' return s refpage = property(fget = (lambda self: self.jump_action(2)[0])) refobj = property(fget = (lambda self: self.jump_action(2)[1])) class Window(LRFObject): pass class PopUpWin(LRFObject): pass class Sound(LRFObject): pass class SoundStream(LRFObject): pass class Font(LRFStream): tag_map = { 62809: [ 'fontfilename', 'P'], 62813: [ 'fontfacename', 'P'] } tag_map.update(LRFStream.tag_map) data = property(fget = (lambda self: self.stream)) def end_stream(self, *args): LRFStream.end_stream(self, *args) self._document.font_map[self.fontfacename] = self self.file = self.fontfacename + '.ttf' def __unicode__(self): s = '<RegistFont objid="%s" fontfilename="%s" fontname="%s" encoding="TTF" file="%s" />\n' % (self.id, self.fontfilename, self.fontfacename, self.file) return s class ObjectInfo(LRFStream): pass class BookAttr(StyleObject, LRFObject): tag_map = { 62843: [ 'page_tree_id', 'D'], 62936: [ '', 'add_font'], 62938: [ 'setwaitprop', 'W', { 1: 'replay', 2: 'noreplay' }] } tag_map.update(ruby_tags) tag_map.update(LRFObject.tag_map) binding_map = { 1: 'Lr', 16: 'Rl' } def __init__(self, document, stream, id, scramble_key, boundary): self.font_link_list = [] LRFObject.__init__(self, document, stream, id, scramble_key, boundary) def add_font(self, tag, f): self.font_link_list.append(tag.dword) def __unicode__(self): s = u'<BookStyle objid="%s" stylelabel="%s">\n' % (self.id, self.id) s += u'<SetDefault %s />\n' % (self._tags_to_xml(),) doc = self._document s += u'<BookSetting bindingdirection="%s" dpi="%s" screenwidth="%s" screenheight="%s" colordepth="%s" />\n' % (self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth) for font in self._document.font_map.values(): s += unicode(font) s += '</BookStyle>\n' return s class SimpleText(Text): pass class TocLabel(object): def __init__(self, refpage, refobject, label): self.refpage = refpage self.refobject = refobject self.label = label def __unicode__(self): return u'<TocLabel refpage="%s" refobj="%s">%s</TocLabel>\n' % (self.refpage, self.refobject, self.label) class TOCObject(LRFStream): def initialize(self): stream = cStringIO.StringIO(self.stream) c = struct.unpack('<H', stream.read(2))[0] stream.seek(4 * (c + 1)) self._contents = [] while c > 0: refpage = struct.unpack('<I', stream.read(4))[0] refobj = struct.unpack('<I', stream.read(4))[0] cnt = struct.unpack('<H', stream.read(2))[0] raw = stream.read(cnt) label = raw.decode('utf_16_le') self._contents.append(TocLabel(refpage, refobj, label)) c -= 1 def __iter__(self): for i in self._contents: yield i def __unicode__(self): s = u'<TOC>\n' for i in self: s += unicode(i) return s + '</TOC>\n' object_map = [ None, PageTree, Page, Header, Footer, PageAttr, Block, BlockAttr, MiniPage, None, Text, TextAttr, Image, Canvas, ESound, None, None, ImageStream, Import, Button, Window, PopUpWin, Sound, SoundStream, None, Font, ObjectInfo, None, BookAttr, SimpleText, TOCObject] def get_object(document, stream, id, offset, size, scramble_key): stream.seek(offset) start_tag = Tag(stream) if start_tag.id != 62720: raise LRFParseError('Bad object start') start_tag.id != 62720 (obj_id, obj_type) = struct.unpack('<IH', start_tag.contents) if obj_type < len(object_map) and object_map[obj_type] is not None: return object_map[obj_type](document, stream, obj_id, scramble_key, offset + size - Tag.tags[0][0]) raise LRFParseError('Unknown object type: %02X!' % obj_type)