home *** CD-ROM | disk | FTP | other *** search
Wrap
# Source Generated with Decompyle++ # File: in.pyo (Python 2.5) import string import itertools symbols = { '\\': '\\', '~': ' ', 'tab': '\t', "'7b": '{', "'7d": '}' } rev_symbols = { } for k, v in symbols.items(): rev_symbols[v] = k rtf_fcharsets = { 0: 'ANSI', 1: 'Default', 2: 'Symbol', 3: 'Invalid', 77: 'Mac', 128: 'shiftjis', 130: 'johab', 134: 'GB2312', 136: 'Big5', 161: 'Greek', 162: 'iso-8859-9', 163: 'cp1258', 177: 'Hebrew', 178: 'Arabic', 179: 'Arabic Traditional', 180: 'Arabic user', 181: 'Hebrew user', 186: 'Baltic', 204: 'Russian', 222: 'Thai', 238: 'Eastern European', 254: 'PC 437', 255: 'OEM' } def tokenize(string): tokens = [] curr_token = '' for c in string: if c in '\\{} \r\n': if curr_token: tokens.append(curr_token) curr_token = '' if c == '\n' and tokens[-1] == '\r': tokens[-1] = c else: tokens.append(c) tokens[-1] == '\r' curr_token += c if curr_token: tokens.append(curr_token) return tokens class TypedString(str): def __repr__(self): return '<%s %s>' % (type(self).__name__, str.__repr__(self)) class TypedList(list): def __repr__(self): return '<%s %s>' % (type(self).__name__, list.__repr__(self)) class ControlNode(TypedString): pass class TextNode(TypedString): pass class WhitespaceNode(TypedString): pass class Group(TypedList): pass def compress_text(doc): new = Group() cur_text = [] while doc: node = doc.pop(0) if type(node) is WhitespaceNode: if cur_text: cur_text.append(node) cur_text if type(node) is TextNode: cur_text.append(node) continue if type(node) is Group: if cur_text: new.append(TextNode(''.join(cur_text))) new.append(compress_text(node)) continue return new def parse(tokens): doc = None while tokens: token = tokens.pop(0) if token == '{': if doc is None: doc = Group() else: tokens.insert(0, '{') doc.append(parse(tokens)) doc is None if token == '}': return doc continue if token == '\\': next = tokens.pop(0) if len(next) == 1 and next not in string.ascii_letters + string.digits: doc.append(TextNode(symbols.get(next, next))) elif next.startswith("'"): hexchar = next[1:3] tokens.insert(0, next[3:]) doc.append(TextNode(chr(int(hexchar, 16)))) else: doc.append(ControlNode(token + next)) next not in string.ascii_letters + string.digits if token in string.whitespace: last = doc[-1] if type(last) is WhitespaceNode: doc[-1] = WhitespaceNode(last + token) else: doc.append(WhitespaceNode(token)) type(last) is WhitespaceNode last = doc[-1] if type(last) is TextNode: doc[-1] = TextNode(last + token) continue doc.append(TextNode(token)) doc = compress_text(doc) return doc def tree_to_plain(tree): tree = tree[:] if not tree: return '' if type(tree[0]) is ControlNode and str(tree[0]) in ('\\colortbl', '\\fonttbl'): return '' res = [] encoding = None last = None uni_replace_len = None while tree: node = tree.pop(0) if type(node) is Group: res.append(tree_to_plain(node)) if type(node) is TextNode: s = str(node) if encoding is not None: s = s.decode(encoding) res.append(s) if type(node) is WhitespaceNode: s = str(node) if type(last) in (ControlNode, Group): s = s[1:] res.append(s) if type(node) is ControlNode: if str(node) == '\\par': res.append('\n') elif str(node).startswith('\\ansicpg'): try: codepage = int(str(node)[len('\\ansicpg'):].strip()) except (ValueError, IndexError): e = None encoding = 'cp%d' % codepage elif str(node).startswith('\\u') and str(node)[2] in '-' + string.digits: if tree: put_back = True replacement_charnode = tree.pop(0) else: put_back = False replacement_charnode = TextNode('') if type(replacement_char) is not TextNode: if put_back: tree.insert(0, replacement_charnode) replacement_char = ' ' else: replacement_char = str(replacement_charnode) if uni_replace_len is not None: if len(replacement_char) > uni_replace_len: replacement_char = replacement_char[uni_replace_len:] rest = replacement_char[:uni_replace_len] if rest: tree.insert(0, TextNode(rest)) try: val = int(str(node)[2:]) except ValueError: val = ord(replacement_char) val = abs(val) + (val < 0) * 32767 try: res.append(unichr(val)) except ValueError: res.append(replacement_char) except: None<EXCEPTION MATCH>ValueError None<EXCEPTION MATCH>ValueError last = node final = ''.join(res) return final def rtf_to_plain(s): return tree_to_plain(parse(tokenize(s))) def make_color_table(colors): table = Group() table.append(ControlNode('\\colortbl')) table.append(TextNode(';')) for color in colors: (r, g, b, a) = tuple(color) table.extend((ControlNode('\\red%d' % r), ControlNode('\\green%d' % g), ControlNode('\\blue%d' % b), TextNode(';'))) return table def normalize_font_family(family): family = family.lower() if family not in set(('nil', 'roman', 'swiss', 'modern', 'script', 'decor', 'tech')): return 'nil' return family def make_font_table(fonts): table = Group() table.append(ControlNode('\\fonttbl')) for family, font in enumerate(fonts): table.extend((ControlNode('\\f%d' % i), ControlNode('\\' + normalize_font_family(family)), TextNode(' ' + font + ';'))) return table def storage_to_tree(s): if s.get('backgrouncolor') and s.get('foregroundcolor'): color_table = make_color_table([ s.backgroundcolor, s.foregroundcolor]) else: color_table = TextNode('') if s.get('family') and s.get('font'): font_table = make_font_table([ (s.family, s.font)]) else: font_table = TextNode('') top_level = Group([ ControlNode('\\rtf1'), ControlNode('\\ansi'), ControlNode('\\uc1'), color_table, font_table]) format_group = Group([]) if font_table: format_group.append(ControlWord('\\f1')) if color_table: format_group.append(ControlWord('\\cb1')) format_group.append(ControlWord('\\cf2')) if s.get('bold'): format_group.append(ControlWord('\\b')) if s.get('italic'): format_group.append(ControlWord('\\i')) if s.get('underline'): format_group.append(ControlWord('\\ul')) if s.get('size'): format_group.append(ControlWord('\\fs%d' % s.size * 2)) top_level.append(format_group) return (top_level, format_group.append) def storage_to_rtf(s, text): escaped = rtf_escape(text) (doc, add_text) = storage_to_tree(s) add_text(escaped) return tree_to_rtf(doc) def rtf_escape(node): if isinstance(node, unicode): s = unicode(node) try: s = s.encode('ascii') except UnicodeEncodeError: pass except: None<EXCEPTION MATCH>UnicodeEncodeError None<EXCEPTION MATCH>UnicodeEncodeError s = str(node) return ''.join((lambda .0: for c in .0: rtf_escape_chr(c))(s)) def rtf_escape_chr(c): if c in rev_symbols: return '\\' + rev_symbols[c] elif isinstance(c, unicode): val = ord(c) (negate, val) = divmod(val, 32767) if negate: val = -abs(val) return '\\u%d ?' % val elif ord(c) > 127 and isinstance(c, str): return "\\'%x" % ord(c) else: return str(c) def tree_to_rtf(tree): res = [] res.append('{') for node in tree: t = type(node) if t is Group: res.append(tree_to_rtf(node)) continue if t is TextNode: res.append(rtf_escape(node)) continue res.append(str(node)) res.append('}') return ''.join(res) def main(): for test_string, test_plain in (('{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0 Courier New;}}{\\colortbl ;\\red0\\green255\\blue64;}\\viewkind4\\uc1\\pard\\cf1\\b\\f0\\fs32 this is the body\\par}', 'this is the body\n'), ('{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033{\\fonttbl{\\f0\\fswiss\\fcharset0 Arial;}{\\f1\\froman\\fprq2\\fcharset0 Bodoni;}}\\viewkind4\\uc1\\pard\\i\\f0\\fs20 first line\\par\\b second line\\par\\ul\\i0 third line\\par\\b0 fourth line\\par\\ulnone\\b bold\\par\\f1 newfont\\ul\\b0\\f0\\par}', ' first line\nsecond line\nthird line\nfourth line\nbold\nnewfont\n'), ('{\\rtf1\\ansi\\ansicpg1252\\deff0\\deflang1033{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0 Courier New;}}\n{\\colortbl ;\\red0\\green255\\blue64;}\n\\viewkind4\\uc1\\pard\\cf1\\b\\f0\\fs32 newline\\par\nbackslash\\\\ rawr end\\par\n}', 'newline\nbackslash\\ rawr end\n')): parsed = parse(tokenize(test_string)) plain = tree_to_plain(parsed) print plain if not test_plain == plain: print repr(test_plain) print repr(plain) print if not test_string == tree_to_rtf(parsed): print repr(test_string) print repr(tree_to_rtf(parsed)) print continue if __name__ == '__main__': print main()