Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_1124 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-08-06 | 29.9 KB | 813 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) import os import re import tempfile from calibre.ebooks.rtf2xml import copy, check_brackets class ProcessTokens: def __init__(self, in_file, exception_handler, bug_handler, copy = None, run_level = 1): self._ProcessTokens__file = in_file self._ProcessTokens__bug_handler = bug_handler self._ProcessTokens__copy = copy self._ProcessTokens__run_level = run_level self._ProcessTokens__write_to = tempfile.mktemp() self.initiate_token_dict() self.compile_expressions() self._ProcessTokens__bracket_count = 0 self._ProcessTokens__exception_handler = exception_handler self._ProcessTokens__bug_handler = bug_handler def compile_expressions(self): self._ProcessTokens__num_exp = re.compile('([a-zA-Z]+)(.*)') self._ProcessTokens__utf_exp = re.compile('(&.*?;)') def initiate_token_dict(self): self._ProcessTokens__return_code = 0 self.dict_token = { 'mshex': ('nu', '__________', self._ProcessTokens__ms_hex_func), '{': ('nu', '{', self.ob_func), '}': ('nu', '}', self.cb_func), 'ldblquote': ('mc', 'ldblquote', self.ms_sub_func), 'rdblquote': ('mc', 'rdblquote', self.ms_sub_func), 'rquote': ('mc', 'rquote', self.ms_sub_func), 'lquote': ('mc', 'lquote', self.ms_sub_func), 'emdash': ('mc', 'emdash', self.ms_sub_func), 'endash': ('mc', 'endash', self.ms_sub_func), 'bullet': ('mc', 'bullet', self.ms_sub_func), '~': ('mc', '~', self.ms_sub_func), 'tab': ('mc', 'tab', self.ms_sub_func), '_': ('mc', '_', self.ms_sub_func), ';': ('mc', ';', self.ms_sub_func), '-': ('mc', '-', self.ms_sub_func), '*': ('ml', 'asterisk__', self.default_func), ':': ('ml', 'colon_____', self.default_func), 'backslash': ('nu', '\\', self.text_func), 'ob': ('nu', '{', self.text_func), 'cb': ('nu', '}', self.text_func), 'line': ('nu', 'hard-lineb', self.default_func), 'page': ('pf', 'page-break', self.default_func), 'par': ('pf', 'par-end___', self.default_func), 'pard': ('pf', 'par-def___', self.default_func), 'keepn': ('pf', 'keep-w-nex', self.bool_st_func), 'widctlpar': ('pf', 'widow-cntl', self.bool_st_func), 'adjustright': ('pf', 'adjust-rgt', self.bool_st_func), 'lang': ('pf', 'language__', self._ProcessTokens__language_func), 'ri': ('pf', 'right-inde', self.divide_by_20), 'fi': ('pf', 'fir-ln-ind', self.divide_by_20), 'li': ('pf', 'left-inden', self.divide_by_20), 'sb': ('pf', 'space-befo', self.divide_by_20), 'sa': ('pf', 'space-afte', self.divide_by_20), 'sl': ('pf', 'line-space', self.divide_by_20), 'deftab': ('pf', 'default-ta', self.divide_by_20), 'ql': ('pf', 'align_____<left', self.two_part_func), 'qc': ('pf', 'align_____<cent', self.two_part_func), 'qj': ('pf', 'align_____<just', self.two_part_func), 'qr': ('pf', 'align_____<right', self.two_part_func), 'nowidctlpar': ('pf', 'widow-cntr<false', self.two_part_func), 'tx': ('pf', 'tab-stop__', self.divide_by_20), 'tb': ('pf', 'tab-bar-st', self.divide_by_20), 'tqr': ('pf', 'tab-right_', self.default_func), 'tqdec': ('pf', 'tab-dec___', self.default_func), 'tqc': ('pf', 'tab-center', self.default_func), 'tlul': ('pf', 'leader-und', self.default_func), 'tlhyph': ('pf', 'leader-hyp', self.default_func), 'tldot': ('pf', 'leader-dot', self.default_func), 'stylesheet': ('ss', 'style-shet', self.default_func), 'sbasedon': ('ss', 'based-on__', self.default_func), 'snext': ('ss', 'next-style', self.default_func), 'cs': ('ss', 'char-style', self.default_func), 's': ('ss', 'para-style', self.default_func), 'pict': ('gr', 'picture___', self.default_func), 'objclass': ('gr', 'obj-class_', self.default_func), 'macpict': ('gr', 'mac-pic___', self.default_func), 'sect': ('sc', 'section___', self.default_func), 'sectd': ('sc', 'sect-defin', self.default_func), 'endhere': ('sc', 'sect-note_', self.default_func), 'pntext': ('ls', 'list-text_', self.default_func), 'listtext': ('ls', 'list-text_', self.default_func), 'pn': ('ls', 'list______', self.default_func), 'pnseclvl': ('ls', 'list-level', self.default_func), 'pncard': ('ls', 'list-cardi', self.bool_st_func), 'pndec': ('ls', 'list-decim', self.bool_st_func), 'pnucltr': ('ls', 'list-up-al', self.bool_st_func), 'pnucrm': ('ls', 'list-up-ro', self.bool_st_func), 'pnord': ('ls', 'list-ord__', self.bool_st_func), 'pnordt': ('ls', 'list-ordte', self.bool_st_func), 'pnlvlblt': ('ls', 'list-bulli', self.bool_st_func), 'pnlvlbody': ('ls', 'list-simpi', self.bool_st_func), 'pnlvlcont': ('ls', 'list-conti', self.bool_st_func), 'pnhang': ('ls', 'list-hang_', self.bool_st_func), 'pntxtb': ('ls', 'list-tebef', self.bool_st_func), 'ilvl': ('ls', 'list-level', self.default_func), 'ls': ('ls', 'list-id___', self.default_func), 'pnstart': ('ls', 'list-start', self.default_func), 'itap': ('ls', 'nest-level', self.default_func), 'leveltext': ('ls', 'level-text', self.default_func), 'levelnumbers': ('ls', 'level-numb', self.default_func), 'list': ('ls', 'list-in-tb', self.default_func), 'listlevel': ('ls', 'list-tb-le', self.default_func), 'listname': ('ls', 'list-name_', self.default_func), 'listtemplateid': ('ls', 'ls-tem-id_', self.default_func), 'leveltemplateid': ('ls', 'lv-tem-id_', self.default_func), 'listhybrid': ('ls', 'list-hybri', self.default_func), 'levelstartat': ('ls', 'level-star', self.default_func), 'levelspace': ('ls', 'level-spac', self.divide_by_20), 'levelindent': ('ls', 'level-inde', self.default_func), 'levelnfc': ('ls', 'level-type', self._ProcessTokens__list_type_func), 'levelnfcn': ('ls', 'level-type', self._ProcessTokens__list_type_func), 'listid': ('ls', 'lis-tbl-id', self.default_func), 'listoverride': ('ls', 'lis-overid', self.default_func), 'pnlvl': ('ls', 'list-level', self.default_func), 'rtf': ('ri', 'rtf_______', self.default_func), 'deff': ('ri', 'deflt-font', self.default_func), 'mac': ('ri', 'macintosh_', self.default_func), 'ansi': ('ri', 'ansi______', self.default_func), 'ansicpg': ('ri', 'ansi-codpg', self.default_func), 'footnote': ('nt', 'footnote__', self.default_func), 'ftnalt': ('nt', 'type______<endnote', self.two_part_func), 'tc': ('an', 'toc_______', self.default_func), 'bkmkstt': ('an', 'book-mk-st', self.default_func), 'bkmkstart': ('an', 'book-mk-st', self.default_func), 'bkmkend': ('an', 'book-mk-en', self.default_func), 'xe': ('an', 'index-mark', self.default_func), 'rxe': ('an', 'place_____', self.default_func), 'bxe': ('in', 'index-bold', self.default_func), 'ixe': ('in', 'index-ital', self.default_func), 'txe': ('in', 'index-see_', self.default_func), 'tcl': ('tc', 'toc-level_', self.default_func), 'tcn': ('tc', 'toc-sup-nu', self.default_func), 'field': ('fd', 'field_____', self.default_func), 'fldinst': ('fd', 'field-inst', self.default_func), 'fldrslt': ('fd', 'field-rslt', self.default_func), 'datafield': ('fd', 'datafield_', self.default_func), 'fonttbl': ('it', 'font-table', self.default_func), 'colortbl': ('it', 'colr-table', self.default_func), 'listoverridetable': ('it', 'lovr-table', self.default_func), 'listtable': ('it', 'listtable_', self.default_func), 'revtbl': ('it', 'revi-table', self.default_func), 'b': ('ci', 'bold______', self.bool_st_func), 'blue': ('ci', 'blue______', self.color_func), 'caps': ('ci', 'caps______', self.bool_st_func), 'cf': ('ci', 'font-color', self.default_func), 'chftn': ('ci', 'footnot-mk', self.bool_st_func), 'dn': ('ci', 'font-down_', self.divide_by_2), 'embo': ('ci', 'emboss____', self.bool_st_func), 'f': ('ci', 'font-style', self.default_func), 'fs': ('ci', 'font-size_', self.divide_by_2), 'green': ('ci', 'green_____', self.color_func), 'i': ('ci', 'italics___', self.bool_st_func), 'impr': ('ci', 'engrave___', self.bool_st_func), 'outl': ('ci', 'outline___', self.bool_st_func), 'plain': ('ci', 'plain_____', self.bool_st_func), 'red': ('ci', 'red_______', self.color_func), 'scaps': ('ci', 'small-caps', self.bool_st_func), 'shad': ('ci', 'shadow____', self.bool_st_func), 'strike': ('ci', 'strike-thr', self.bool_st_func), 'striked': ('ci', 'dbl-strike', self.bool_st_func), 'sub': ('ci', 'subscript_', self.bool_st_func), 'super': ('ci', 'superscrip', self.bool_st_func), 'nosupersub': ('ci', 'no-su-supe', self._ProcessTokens__no_sup_sub_func), 'up': ('ci', 'font-up___', self.divide_by_2), 'v': ('ci', 'hidden____', self.default_func), 'trowd': ('tb', 'row-def___', self.default_func), 'cell': ('tb', 'cell______', self.default_func), 'row': ('tb', 'row_______', self.default_func), 'intbl': ('tb', 'in-table__', self.default_func), 'cols': ('tb', 'columns___', self.default_func), 'trleft': ('tb', 'row-pos-le', self.divide_by_20), 'cellx': ('tb', 'cell-posit', self.divide_by_20), 'trhdr': ('tb', 'row-header', self.default_func), 'info': ('di', 'doc-info__', self.default_func), 'author': ('di', 'author____', self.default_func), 'operator': ('di', 'operator__', self.default_func), 'title': ('di', 'title_____', self.default_func), 'keywords': ('di', 'keywords__', self.default_func), 'doccomm': ('di', 'doc-notes_', self.default_func), 'comment': ('di', 'doc-notes_', self.default_func), 'subject': ('di', 'subject___', self.default_func), 'creatim': ('di', 'create-tim', self.default_func), 'yr': ('di', 'year______', self.default_func), 'mo': ('di', 'month_____', self.default_func), 'dy': ('di', 'day_______', self.default_func), 'min': ('di', 'minute____', self.default_func), 'revtim': ('di', 'revis-time', self.default_func), 'nofwords': ('di', 'num-of-wor', self.default_func), 'nofchars': ('di', 'num-of-chr', self.default_func), 'nofpages': ('di', 'num-of-pag', self.default_func), 'edmins': ('di', 'edit-time_', self.default_func), 'headerf': ('hf', 'head-first', self.default_func), 'headerl': ('hf', 'head-left_', self.default_func), 'headerr': ('hf', 'head-right', self.default_func), 'footerf': ('hf', 'foot-first', self.default_func), 'footerl': ('hf', 'foot-left_', self.default_func), 'footerr': ('hf', 'foot-right', self.default_func), 'header': ('hf', 'header____', self.default_func), 'footer': ('hf', 'footer____', self.default_func), 'margl': ('pa', 'margin-lef', self.divide_by_20), 'margr': ('pa', 'margin-rig', self.divide_by_20), 'margb': ('pa', 'margin-bot', self.divide_by_20), 'margt': ('pa', 'margin-top', self.divide_by_20), 'gutter': ('pa', 'gutter____', self.divide_by_20), 'paperw': ('pa', 'paper-widt', self.divide_by_20), 'paperh': ('pa', 'paper-hght', self.divide_by_20), 'annotation': ('an', 'annotation', self.default_func), 'ul': ('ul', 'underlined<continous', self.two_part_func), 'uld': ('ul', 'underlined<dotted', self.two_part_func), 'uldash': ('ul', 'underlined<dash', self.two_part_func), 'uldashd': ('ul', 'underlined<dash-dot', self.two_part_func), 'uldashdd': ('ul', 'underlined<dash-dot-dot', self.two_part_func), 'uldb': ('ul', 'underlined<double', self.two_part_func), 'ulhwave': ('ul', 'underlined<heavy-wave', self.two_part_func), 'ulldash': ('ul', 'underlined<long-dash', self.two_part_func), 'ulth': ('ul', 'underlined<thich', self.two_part_func), 'ulthd': ('ul', 'underlined<thick-dotted', self.two_part_func), 'ulthdash': ('ul', 'underlined<thick-dash', self.two_part_func), 'ulthdashd': ('ul', 'underlined<thick-dash-dot', self.two_part_func), 'ulthdashdd': ('ul', 'underlined<thick-dash-dot-dot', self.two_part_func), 'ulthldash': ('ul', 'underlined<thick-long-dash', self.two_part_func), 'ululdbwave': ('ul', 'underlined<double-wave', self.two_part_func), 'ulw': ('ul', 'underlined<word', self.two_part_func), 'ulwave': ('ul', 'underlined<wave', self.two_part_func), 'ulnone': ('ul', 'underlined<false', self.two_part_func), 'trbrdrh': ('bd', 'bor-t-r-hi', self.default_func), 'trbrdrv': ('bd', 'bor-t-r-vi', self.default_func), 'trbrdrt': ('bd', 'bor-t-r-to', self.default_func), 'trbrdrl': ('bd', 'bor-t-r-le', self.default_func), 'trbrdrb': ('bd', 'bor-t-r-bo', self.default_func), 'trbrdrr': ('bd', 'bor-t-r-ri', self.default_func), 'clbrdrb': ('bd', 'bor-cel-bo', self.default_func), 'clbrdrt': ('bd', 'bor-cel-to', self.default_func), 'clbrdrl': ('bd', 'bor-cel-le', self.default_func), 'clbrdrr': ('bd', 'bor-cel-ri', self.default_func), 'brdrb': ('bd', 'bor-par-bo', self.default_func), 'brdrt': ('bd', 'bor-par-to', self.default_func), 'brdrl': ('bd', 'bor-par-le', self.default_func), 'brdrr': ('bd', 'bor-par-ri', self.default_func), 'box': ('bd', 'bor-par-bx', self.default_func), 'chbrdr': ('bd', 'bor-par-bo', self.default_func), 'brdrbtw': ('bd', 'bor-for-ev', self.default_func), 'brdrbar': ('bd', 'bor-outsid', self.default_func), 'brdrnone': ('bd', 'bor-none__<false', self.two_part_func), 'brdrs': ('bt', 'bdr-single', self.default_func), 'brdrth': ('bt', 'bdr-doubtb', self.default_func), 'brdrsh': ('bt', 'bdr-shadow', self.default_func), 'brdrdb': ('bt', 'bdr-double', self.default_func), 'brdrdot': ('bt', 'bdr-dotted', self.default_func), 'brdrdash': ('bt', 'bdr-dashed', self.default_func), 'brdrhair': ('bt', 'bdr-hair__', self.default_func), 'brdrinset': ('bt', 'bdr-inset_', self.default_func), 'brdrdashsm': ('bt', 'bdr-das-sm', self.default_func), 'brdrdashd': ('bt', 'bdr-dot-sm', self.default_func), 'brdrdashdd': ('bt', 'bdr-dot-do', self.default_func), 'brdroutset': ('bt', 'bdr-outset', self.default_func), 'brdrtriple': ('bt', 'bdr-trippl', self.default_func), 'brdrtnthsg': ('bt', 'bdr-thsm__', self.default_func), 'brdrthtnsg': ('bt', 'bdr-htsm__', self.default_func), 'brdrtnthtnsg': ('bt', 'bdr-hthsm_', self.default_func), 'brdrtnthmg': ('bt', 'bdr-thm___', self.default_func), 'brdrthtnmg': ('bt', 'bdr-htm___', self.default_func), 'brdrtnthtnmg': ('bt', 'bdr-hthm__', self.default_func), 'brdrtnthlg': ('bt', 'bdr-thl___', self.default_func), 'brdrtnthtnlg': ('bt', 'bdr-hthl__', self.default_func), 'brdrwavy': ('bt', 'bdr-wavy__', self.default_func), 'brdrwavydb': ('bt', 'bdr-d-wav_', self.default_func), 'brdrdashdotstr': ('bt', 'bdr-strip_', self.default_func), 'brdremboss': ('bt', 'bdr-embos_', self.default_func), 'brdrengrave': ('bt', 'bdr-engra_', self.default_func), 'brdrframe': ('bt', 'bdr-frame_', self.default_func), 'brdrw': ('bt', 'bdr-li-wid', self.divide_by_20), 'brsp': ('bt', 'bdr-sp-wid', self.divide_by_20), 'brdrcf': ('bt', 'bdr-color_', self.default_func) } self._ProcessTokens__number_type_dict = { 0: 'Arabic', 1: 'uppercase Roman numeral', 2: 'lowercase Roman numeral', 3: 'uppercase letter', 4: 'lowercase letter', 5: 'ordinal number', 6: 'cardianl text number', 7: 'ordinal text number', 10: 'Kanji numbering without the digit character', 11: 'Kanji numbering with the digit character', 1246: 'phonetic Katakana characters in aiueo order', 1346: 'phonetic katakana characters in iroha order', 14: 'double byte character', 15: 'single byte character', 16: 'Kanji numbering 3', 17: 'Kanji numbering 4', 18: 'Circle numbering', 19: 'double-byte Arabic numbering', 2046: 'phonetic double-byte Katakana characters', 2146: 'phonetic double-byte katakana characters', 22: 'Arabic with leading zero', 23: 'bullet', 24: 'Korean numbering 2', 25: 'Korean numbering 1', 26: 'Chinese numbering 1', 27: 'Chinese numbering 2', 28: 'Chinese numbering 3', 29: 'Chinese numbering 4', 30: 'Chinese Zodiac numbering 1', 31: 'Chinese Zodiac numbering 2', 32: 'Chinese Zodiac numbering 3', 33: 'Taiwanese double-byte numbering 1', 34: 'Taiwanese double-byte numbering 2', 35: 'Taiwanese double-byte numbering 3', 36: 'Taiwanese double-byte numbering 4', 37: 'Chinese double-byte numbering 1', 38: 'Chinese double-byte numbering 2', 39: 'Chinese double-byte numbering 3', 40: 'Chinese double-byte numbering 4', 41: 'Korean double-byte numbering 1', 42: 'Korean double-byte numbering 2', 43: 'Korean double-byte numbering 3', 44: 'Korean double-byte numbering 4', 45: 'Hebrew non-standard decimal', 46: 'Arabic Alif Ba Tah', 47: 'Hebrew Biblical standard', 48: 'Arabic Abjad style', 255: 'No number' } self._ProcessTokens__language_dict = { 1078: 'Afrikaans', 1052: 'Albanian', 1025: 'Arabic', 5121: 'Arabic Algeria', 15361: 'Arabic Bahrain', 3073: 'Arabic Egypt', 1: 'Arabic General', 2049: 'Arabic Iraq', 11265: 'Arabic Jordan', 13313: 'Arabic Kuwait', 12289: 'Arabic Lebanon', 4097: 'Arabic Libya', 6145: 'Arabic Morocco', 8193: 'Arabic Oman', 16385: 'Arabic Qatar', 10241: 'Arabic Syria', 7169: 'Arabic Tunisia', 14337: 'Arabic U.A.E.', 9217: 'Arabic Yemen', 1067: 'Armenian', 1101: 'Assamese', 2092: 'Azeri Cyrillic', 1068: 'Azeri Latin', 1069: 'Basque', 1093: 'Bengali', 4122: 'Bosnia Herzegovina', 1026: 'Bulgarian', 1109: 'Burmese', 1059: 'Byelorussian', 1027: 'Catalan', 2052: 'Chinese China', 4: 'Chinese General', 3076: 'Chinese Hong Kong', 4100: 'Chinese Singapore', 1028: 'Chinese Taiwan', 1050: 'Croatian', 1029: 'Czech', 1030: 'Danish', 2067: 'Dutch Belgium', 1043: 'Dutch Standard', 3081: 'English Australia', 10249: 'English Belize', 2057: 'English British', 4105: 'English Canada', 9225: 'English Caribbean', 9: 'English General', 6153: 'English Ireland', 8201: 'English Jamaica', 5129: 'English New Zealand', 13321: 'English Philippines', 7177: 'English South Africa', 11273: 'English Trinidad', 1033: 'English United States', 1061: 'Estonian', 1080: 'Faerose', 1065: 'Farsi', 1035: 'Finnish', 1036: 'French', 2060: 'French Belgium', 11276: 'French Cameroon', 3084: 'French Canada', 12300: "French Cote d'Ivoire", 5132: 'French Luxembourg', 13324: 'French Mali', 6156: 'French Monaco', 8204: 'French Reunion', 10252: 'French Senegal', 4108: 'French Swiss', 7180: 'French West Indies', 9228: 'French Democratic Republic of the Congo', 1122: 'Frisian', 1084: 'Gaelic', 2108: 'Gaelic Ireland', 1110: 'Galician', 1079: 'Georgian', 1031: 'German', 3079: 'German Austrian', 5127: 'German Liechtenstein', 4103: 'German Luxembourg', 2055: 'German Switzerland', 1032: 'Greek', 1095: 'Gujarati', 1037: 'Hebrew', 1081: 'Hindi', 1038: 'Hungarian', 1039: 'Icelandic', 1057: 'Indonesian', 1040: 'Italian', 2064: 'Italian Switzerland', 1041: 'Japanese', 1099: 'Kannada', 1120: 'Kashmiri', 2144: 'Kashmiri India', 1087: 'Kazakh', 1107: 'Khmer', 1088: 'Kirghiz', 1111: 'Konkani', 1042: 'Korean', 2066: 'Korean Johab', 1108: 'Lao', 1062: 'Latvian', 1063: 'Lithuanian', 2087: 'Lithuanian Classic', 1086: 'Malay', 2110: 'Malay Brunei Darussalam', 1100: 'Malayalam', 1082: 'Maltese', 1112: 'Manipuri', 1102: 'Marathi', 1104: 'Mongolian', 1121: 'Nepali', 2145: 'Nepali India', 1044: 'Norwegian Bokmal', 2068: 'Norwegian Nynorsk', 1096: 'Oriya', 1045: 'Polish', 1046: 'Portuguese (Brazil)', 2070: 'Portuguese (Portugal)', 1094: 'Punjabi', 1047: 'Rhaeto-Romanic', 1048: 'Romanian', 2072: 'Romanian Moldova', 1049: 'Russian', 2073: 'Russian Moldova', 1083: 'Sami Lappish', 1103: 'Sanskrit', 3098: 'Serbian Cyrillic', 2074: 'Serbian Latin', 1113: 'Sindhi', 1051: 'Slovak', 1060: 'Slovenian', 1070: 'Sorbian', 11274: 'Spanish Argentina', 16394: 'Spanish Bolivia', 13322: 'Spanish Chile', 9226: 'Spanish Colombia', 5130: 'Spanish Costa Rica', 7178: 'Spanish Dominican Republic', 12298: 'Spanish Ecuador', 17418: 'Spanish El Salvador', 4106: 'Spanish Guatemala', 18442: 'Spanish Honduras', 2058: 'Spanish Mexico', 3082: 'Spanish Modern', 19466: 'Spanish Nicaragua', 6154: 'Spanish Panama', 15370: 'Spanish Paraguay', 10250: 'Spanish Peru', 20490: 'Spanish Puerto Rico', 1034: 'Spanish Traditional', 14346: 'Spanish Uruguay', 8202: 'Spanish Venezuela', 1072: 'Sutu', 1089: 'Swahili', 1053: 'Swedish', 2077: 'Swedish Finland', 1064: 'Tajik', 1097: 'Tamil', 1092: 'Tatar', 1098: 'Telugu', 1054: 'Thai', 1105: 'Tibetan', 1073: 'Tsonga', 1074: 'Tswana', 1055: 'Turkish', 1090: 'Turkmen', 1058: 'Ukranian', 1056: 'Urdu', 2080: 'Urdu India', 2115: 'Uzbek Cyrillic', 1091: 'Uzbek Latin', 1075: 'Venda', 1066: 'Vietnamese', 1106: 'Welsh', 1076: 'Xhosa', 1085: 'Yiddish', 1077: 'Zulu', 1024: 'Unkown', 255: 'Unkown' } def __ms_hex_func(self, pre, token, num): num = num[1:] num = num.upper() return "tx<hx<__________<'%s\n" % num def ms_sub_func(self, pre, token, num): return 'tx<mc<__________<%s\n' % token def default_func(self, pre, token, num): if num == None: num = 'true' return 'cw<%s<%s<nu<%s\n' % (pre, token, num) def __list_type_func(self, pre, token, num): type = 'arabic' if num == None: type = 'Arabic' else: try: num = int(num) except ValueError: if self._ProcessTokens__run_level > 3: msg = 'number "%s" cannot be converted to integer\n' % num raise self._ProcessTokens__bug_handler, msg self._ProcessTokens__run_level > 3 type = self._ProcessTokens__number_type_dict.get(num) if type == None: if self._ProcessTokens__run_level > 3: msg = 'No type for "%s" in self.__number_type_dict\n' raise self._ProcessTokens__bug_handler self._ProcessTokens__run_level > 3 type = 'Arabic' return 'cw<%s<%s<nu<%s\n' % (pre, token, type) def __language_func(self, pre, token, num): lang_name = self._ProcessTokens__language_dict.get(int(re.search('[0-9]+', num).group())) if not lang_name: lang_name = 'not defined' if self._ProcessTokens__run_level > 3: msg = 'No entry for number "%s"' % num raise self._ProcessTokens__bug_handler, msg self._ProcessTokens__run_level > 3 return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name) def two_part_func(self, pre, token, num): list = token.split('<') token = list[0] num = list[1] return 'cw<%s<%s<nu<%s\n' % (pre, token, num) def divide_by_2(self, pre, token, num): num = self.divide_num(num, 2) return 'cw<%s<%s<nu<%s\n' % (pre, token, num) def divide_by_20(self, pre, token, num): num = self.divide_num(num, 20) return 'cw<%s<%s<nu<%s\n' % (pre, token, num) def text_func(self, pre, token, num = None): return 'tx<nu<__________<%s\n' % token def ob_func(self, pre, token, num = None): self._ProcessTokens__bracket_count += 1 return 'ob<nu<open-brack<%04d\n' % self._ProcessTokens__bracket_count def cb_func(self, pre, token, num = None): line = 'cb<nu<clos-brack<%04d\n' % self._ProcessTokens__bracket_count self._ProcessTokens__bracket_count -= 1 return line def color_func(self, pre, token, num): third_field = 'nu' if num[-1] == ';': num = num[:-1] third_field = 'en' num = str('%X' % int(num)) if len(num) != 2: num = '0' + num return 'cw<%s<%s<%s<%s\n' % (pre, token, third_field, num) def bool_st_func(self, pre, token, num): if num is None and num == '' or num == '1': return 'cw<%s<%s<nu<true\n' % (pre, token) if num == '0': return 'cw<%s<%s<nu<false\n' % (pre, token) msg = 'boolean should have some value module process tokens\n' msg += 'token is ' + token + '\n' msg += "'" + num + "'" + '\n' raise self._ProcessTokens__bug_handler, msg def __no_sup_sub_func(self, pre, token, num): the_string = 'cw<ci<subscript_<nu<false\n' the_string += 'cw<ci<superscrip<nu<false\n' return the_string def divide_num(self, numerator, denominator): try: numerator = float(re.search('[0-9.]+', numerator).group()) except TypeError: msg = None if self._ProcessTokens__run_level > 3: msg = 'no number to process?\n' msg += 'this indicates that the token ' msg += ' \\(\\li\\) should have a number and does not\n' msg += 'numerator is "%s"\n' % numerator msg += 'denominator is "%s"\n' % denominator raise self._ProcessTokens__bug_handler, msg self._ProcessTokens__run_level > 3 if 5 > self._ProcessTokens__return_code: self._ProcessTokens__return_code = 5 return 0 num = '%0.2f' % round(numerator / denominator, 2) return num string_num = str(num) if string_num[-2:] == '.0': string_num = string_num[:-2] return string_num def split_let_num(self, token): match_obj = re.search(self._ProcessTokens__num_exp, token) if match_obj != None: first = match_obj.group(1) second = match_obj.group(2) if not second: if self._ProcessTokens__run_level > 3: msg = "token is '%s' \n" % token raise self._ProcessTokens__bug_handler, msg self._ProcessTokens__run_level > 3 return (first, 0) elif self._ProcessTokens__run_level > 3: msg = "token is '%s' \n" % token raise self._ProcessTokens__bug_handler return (token, 0) return (first, second) def convert_to_hex(self, number): num = int(number) try: hex_num = '%X' % num return hex_num except: raise self._ProcessTokens__bug_handler def process_cw(self, token): special = [ '*', ':', '}', '{', '~', '_', '-', ';'] token = token[1:] token = token.replace(' ', '') only_alpha = token.isalpha() num = None if not only_alpha and token not in special: (token, num) = self.split_let_num(token) (pre, token, action) = self.dict_token.get(token, (None, None, None)) if action: return action(pre, token, num) def initiate_token_actions(self): self.action_for_token = { '{': self.ob_func, '}': self.cb_func, '\\': self.process_cw } def evaluate_token(self, token): (token, action) = self.dict_token.get(token[0:1]) if action: line = action(token) return line return 'tx<nu<nu<nu<nu<%s\n' % token def __check_brackets(self, in_file): self._ProcessTokens__check_brack_obj = check_brackets.CheckBrackets(file = in_file) good_br = self._ProcessTokens__check_brack_obj.check_brackets()[0] if not good_br: return 1 def process_tokens(self): first_token = 0 second_token = 0 read_obj = open(self._ProcessTokens__file, 'r') write_obj = open(self._ProcessTokens__write_to, 'w') line_to_read = 'dummy' line_count = 0 while line_to_read: line_to_read = read_obj.readline() token = line_to_read token = token.replace('\n', '') if not token: continue line_count += 1 try: token.decode('us-ascii') except UnicodeError: msg = None msg = str(msg) msg += 'Invalid RTF: File not ascii encoded.\n' raise self._ProcessTokens__exception_handler, msg if not first_token: if token != '\\{': msg = "Invalid RTF: document doesn't start with {\n" raise self._ProcessTokens__exception_handler, msg token != '\\{' first_token = 1 elif first_token and not second_token: if token[0:4] != '\\rtf': msg = "Invalid RTF: document doesn't start with \\rtf \n" raise self._ProcessTokens__exception_handler, msg token[0:4] != '\\rtf' second_token = 1 the_index = token.find('\\ ') if token != None and the_index > -1: msg = 'Invalid RTF: token "\\ " not valid. \n' raise self._ProcessTokens__exception_handler, msg the_index > -1 if token[0:1] == '\\': line = self.process_cw(token) if line != None: write_obj.write(line) line != None fields = re.split(self._ProcessTokens__utf_exp, token) for field in fields: if not field: continue if field[0:1] == '&': write_obj.write('tx<ut<__________<%s\n' % field) continue write_obj.write('tx<nu<__________<%s\n' % field) read_obj.close() write_obj.close() if not line_count: msg = 'Invalid RTF: file appears to be empty. \n' raise self._ProcessTokens__exception_handler, msg line_count copy_obj = copy.Copy(bug_handler = self._ProcessTokens__bug_handler) if self._ProcessTokens__copy: copy_obj.copy_file(self._ProcessTokens__write_to, 'processed_tokens.data') copy_obj.rename(self._ProcessTokens__write_to, self._ProcessTokens__file) os.remove(self._ProcessTokens__write_to) bad_brackets = self._ProcessTokens__check_brackets(self._ProcessTokens__file) if bad_brackets: msg = 'Invalid RTF: document does not have matching brackets.\n' raise self._ProcessTokens__exception_handler, msg bad_brackets return self._ProcessTokens__return_code