home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- import os
- import re
- import tempfile
- from calibre.ebooks.rtf2xml import copy, check_brackets
-
- class ProcessTokens:
-
- def __init__(self, in_file, exception_handler, bug_handler, copy = None, run_level = 1):
- self._ProcessTokens__file = in_file
- self._ProcessTokens__bug_handler = bug_handler
- self._ProcessTokens__copy = copy
- self._ProcessTokens__run_level = run_level
- self._ProcessTokens__write_to = tempfile.mktemp()
- self.initiate_token_dict()
- self.compile_expressions()
- self._ProcessTokens__bracket_count = 0
- self._ProcessTokens__exception_handler = exception_handler
- self._ProcessTokens__bug_handler = bug_handler
-
-
- def compile_expressions(self):
- self._ProcessTokens__num_exp = re.compile('([a-zA-Z]+)(.*)')
- self._ProcessTokens__utf_exp = re.compile('(&.*?;)')
-
-
- def initiate_token_dict(self):
- self._ProcessTokens__return_code = 0
- self.dict_token = {
- 'mshex': ('nu', '__________', self._ProcessTokens__ms_hex_func),
- '{': ('nu', '{', self.ob_func),
- '}': ('nu', '}', self.cb_func),
- 'ldblquote': ('mc', 'ldblquote', self.ms_sub_func),
- 'rdblquote': ('mc', 'rdblquote', self.ms_sub_func),
- 'rquote': ('mc', 'rquote', self.ms_sub_func),
- 'lquote': ('mc', 'lquote', self.ms_sub_func),
- 'emdash': ('mc', 'emdash', self.ms_sub_func),
- 'endash': ('mc', 'endash', self.ms_sub_func),
- 'bullet': ('mc', 'bullet', self.ms_sub_func),
- '~': ('mc', '~', self.ms_sub_func),
- 'tab': ('mc', 'tab', self.ms_sub_func),
- '_': ('mc', '_', self.ms_sub_func),
- ';': ('mc', ';', self.ms_sub_func),
- '-': ('mc', '-', self.ms_sub_func),
- '*': ('ml', 'asterisk__', self.default_func),
- ':': ('ml', 'colon_____', self.default_func),
- 'backslash': ('nu', '\\', self.text_func),
- 'ob': ('nu', '{', self.text_func),
- 'cb': ('nu', '}', self.text_func),
- 'line': ('nu', 'hard-lineb', self.default_func),
- 'page': ('pf', 'page-break', self.default_func),
- 'par': ('pf', 'par-end___', self.default_func),
- 'pard': ('pf', 'par-def___', self.default_func),
- 'keepn': ('pf', 'keep-w-nex', self.bool_st_func),
- 'widctlpar': ('pf', 'widow-cntl', self.bool_st_func),
- 'adjustright': ('pf', 'adjust-rgt', self.bool_st_func),
- 'lang': ('pf', 'language__', self._ProcessTokens__language_func),
- 'ri': ('pf', 'right-inde', self.divide_by_20),
- 'fi': ('pf', 'fir-ln-ind', self.divide_by_20),
- 'li': ('pf', 'left-inden', self.divide_by_20),
- 'sb': ('pf', 'space-befo', self.divide_by_20),
- 'sa': ('pf', 'space-afte', self.divide_by_20),
- 'sl': ('pf', 'line-space', self.divide_by_20),
- 'deftab': ('pf', 'default-ta', self.divide_by_20),
- 'ql': ('pf', 'align_____<left', self.two_part_func),
- 'qc': ('pf', 'align_____<cent', self.two_part_func),
- 'qj': ('pf', 'align_____<just', self.two_part_func),
- 'qr': ('pf', 'align_____<right', self.two_part_func),
- 'nowidctlpar': ('pf', 'widow-cntr<false', self.two_part_func),
- 'tx': ('pf', 'tab-stop__', self.divide_by_20),
- 'tb': ('pf', 'tab-bar-st', self.divide_by_20),
- 'tqr': ('pf', 'tab-right_', self.default_func),
- 'tqdec': ('pf', 'tab-dec___', self.default_func),
- 'tqc': ('pf', 'tab-center', self.default_func),
- 'tlul': ('pf', 'leader-und', self.default_func),
- 'tlhyph': ('pf', 'leader-hyp', self.default_func),
- 'tldot': ('pf', 'leader-dot', self.default_func),
- 'stylesheet': ('ss', 'style-shet', self.default_func),
- 'sbasedon': ('ss', 'based-on__', self.default_func),
- 'snext': ('ss', 'next-style', self.default_func),
- 'cs': ('ss', 'char-style', self.default_func),
- 's': ('ss', 'para-style', self.default_func),
- 'pict': ('gr', 'picture___', self.default_func),
- 'objclass': ('gr', 'obj-class_', self.default_func),
- 'macpict': ('gr', 'mac-pic___', self.default_func),
- 'sect': ('sc', 'section___', self.default_func),
- 'sectd': ('sc', 'sect-defin', self.default_func),
- 'endhere': ('sc', 'sect-note_', self.default_func),
- 'pntext': ('ls', 'list-text_', self.default_func),
- 'listtext': ('ls', 'list-text_', self.default_func),
- 'pn': ('ls', 'list______', self.default_func),
- 'pnseclvl': ('ls', 'list-level', self.default_func),
- 'pncard': ('ls', 'list-cardi', self.bool_st_func),
- 'pndec': ('ls', 'list-decim', self.bool_st_func),
- 'pnucltr': ('ls', 'list-up-al', self.bool_st_func),
- 'pnucrm': ('ls', 'list-up-ro', self.bool_st_func),
- 'pnord': ('ls', 'list-ord__', self.bool_st_func),
- 'pnordt': ('ls', 'list-ordte', self.bool_st_func),
- 'pnlvlblt': ('ls', 'list-bulli', self.bool_st_func),
- 'pnlvlbody': ('ls', 'list-simpi', self.bool_st_func),
- 'pnlvlcont': ('ls', 'list-conti', self.bool_st_func),
- 'pnhang': ('ls', 'list-hang_', self.bool_st_func),
- 'pntxtb': ('ls', 'list-tebef', self.bool_st_func),
- 'ilvl': ('ls', 'list-level', self.default_func),
- 'ls': ('ls', 'list-id___', self.default_func),
- 'pnstart': ('ls', 'list-start', self.default_func),
- 'itap': ('ls', 'nest-level', self.default_func),
- 'leveltext': ('ls', 'level-text', self.default_func),
- 'levelnumbers': ('ls', 'level-numb', self.default_func),
- 'list': ('ls', 'list-in-tb', self.default_func),
- 'listlevel': ('ls', 'list-tb-le', self.default_func),
- 'listname': ('ls', 'list-name_', self.default_func),
- 'listtemplateid': ('ls', 'ls-tem-id_', self.default_func),
- 'leveltemplateid': ('ls', 'lv-tem-id_', self.default_func),
- 'listhybrid': ('ls', 'list-hybri', self.default_func),
- 'levelstartat': ('ls', 'level-star', self.default_func),
- 'levelspace': ('ls', 'level-spac', self.divide_by_20),
- 'levelindent': ('ls', 'level-inde', self.default_func),
- 'levelnfc': ('ls', 'level-type', self._ProcessTokens__list_type_func),
- 'levelnfcn': ('ls', 'level-type', self._ProcessTokens__list_type_func),
- 'listid': ('ls', 'lis-tbl-id', self.default_func),
- 'listoverride': ('ls', 'lis-overid', self.default_func),
- 'pnlvl': ('ls', 'list-level', self.default_func),
- 'rtf': ('ri', 'rtf_______', self.default_func),
- 'deff': ('ri', 'deflt-font', self.default_func),
- 'mac': ('ri', 'macintosh_', self.default_func),
- 'ansi': ('ri', 'ansi______', self.default_func),
- 'ansicpg': ('ri', 'ansi-codpg', self.default_func),
- 'footnote': ('nt', 'footnote__', self.default_func),
- 'ftnalt': ('nt', 'type______<endnote', self.two_part_func),
- 'tc': ('an', 'toc_______', self.default_func),
- 'bkmkstt': ('an', 'book-mk-st', self.default_func),
- 'bkmkstart': ('an', 'book-mk-st', self.default_func),
- 'bkmkend': ('an', 'book-mk-en', self.default_func),
- 'xe': ('an', 'index-mark', self.default_func),
- 'rxe': ('an', 'place_____', self.default_func),
- 'bxe': ('in', 'index-bold', self.default_func),
- 'ixe': ('in', 'index-ital', self.default_func),
- 'txe': ('in', 'index-see_', self.default_func),
- 'tcl': ('tc', 'toc-level_', self.default_func),
- 'tcn': ('tc', 'toc-sup-nu', self.default_func),
- 'field': ('fd', 'field_____', self.default_func),
- 'fldinst': ('fd', 'field-inst', self.default_func),
- 'fldrslt': ('fd', 'field-rslt', self.default_func),
- 'datafield': ('fd', 'datafield_', self.default_func),
- 'fonttbl': ('it', 'font-table', self.default_func),
- 'colortbl': ('it', 'colr-table', self.default_func),
- 'listoverridetable': ('it', 'lovr-table', self.default_func),
- 'listtable': ('it', 'listtable_', self.default_func),
- 'revtbl': ('it', 'revi-table', self.default_func),
- 'b': ('ci', 'bold______', self.bool_st_func),
- 'blue': ('ci', 'blue______', self.color_func),
- 'caps': ('ci', 'caps______', self.bool_st_func),
- 'cf': ('ci', 'font-color', self.default_func),
- 'chftn': ('ci', 'footnot-mk', self.bool_st_func),
- 'dn': ('ci', 'font-down_', self.divide_by_2),
- 'embo': ('ci', 'emboss____', self.bool_st_func),
- 'f': ('ci', 'font-style', self.default_func),
- 'fs': ('ci', 'font-size_', self.divide_by_2),
- 'green': ('ci', 'green_____', self.color_func),
- 'i': ('ci', 'italics___', self.bool_st_func),
- 'impr': ('ci', 'engrave___', self.bool_st_func),
- 'outl': ('ci', 'outline___', self.bool_st_func),
- 'plain': ('ci', 'plain_____', self.bool_st_func),
- 'red': ('ci', 'red_______', self.color_func),
- 'scaps': ('ci', 'small-caps', self.bool_st_func),
- 'shad': ('ci', 'shadow____', self.bool_st_func),
- 'strike': ('ci', 'strike-thr', self.bool_st_func),
- 'striked': ('ci', 'dbl-strike', self.bool_st_func),
- 'sub': ('ci', 'subscript_', self.bool_st_func),
- 'super': ('ci', 'superscrip', self.bool_st_func),
- 'nosupersub': ('ci', 'no-su-supe', self._ProcessTokens__no_sup_sub_func),
- 'up': ('ci', 'font-up___', self.divide_by_2),
- 'v': ('ci', 'hidden____', self.default_func),
- 'trowd': ('tb', 'row-def___', self.default_func),
- 'cell': ('tb', 'cell______', self.default_func),
- 'row': ('tb', 'row_______', self.default_func),
- 'intbl': ('tb', 'in-table__', self.default_func),
- 'cols': ('tb', 'columns___', self.default_func),
- 'trleft': ('tb', 'row-pos-le', self.divide_by_20),
- 'cellx': ('tb', 'cell-posit', self.divide_by_20),
- 'trhdr': ('tb', 'row-header', self.default_func),
- 'info': ('di', 'doc-info__', self.default_func),
- 'author': ('di', 'author____', self.default_func),
- 'operator': ('di', 'operator__', self.default_func),
- 'title': ('di', 'title_____', self.default_func),
- 'keywords': ('di', 'keywords__', self.default_func),
- 'doccomm': ('di', 'doc-notes_', self.default_func),
- 'comment': ('di', 'doc-notes_', self.default_func),
- 'subject': ('di', 'subject___', self.default_func),
- 'creatim': ('di', 'create-tim', self.default_func),
- 'yr': ('di', 'year______', self.default_func),
- 'mo': ('di', 'month_____', self.default_func),
- 'dy': ('di', 'day_______', self.default_func),
- 'min': ('di', 'minute____', self.default_func),
- 'revtim': ('di', 'revis-time', self.default_func),
- 'nofwords': ('di', 'num-of-wor', self.default_func),
- 'nofchars': ('di', 'num-of-chr', self.default_func),
- 'nofpages': ('di', 'num-of-pag', self.default_func),
- 'edmins': ('di', 'edit-time_', self.default_func),
- 'headerf': ('hf', 'head-first', self.default_func),
- 'headerl': ('hf', 'head-left_', self.default_func),
- 'headerr': ('hf', 'head-right', self.default_func),
- 'footerf': ('hf', 'foot-first', self.default_func),
- 'footerl': ('hf', 'foot-left_', self.default_func),
- 'footerr': ('hf', 'foot-right', self.default_func),
- 'header': ('hf', 'header____', self.default_func),
- 'footer': ('hf', 'footer____', self.default_func),
- 'margl': ('pa', 'margin-lef', self.divide_by_20),
- 'margr': ('pa', 'margin-rig', self.divide_by_20),
- 'margb': ('pa', 'margin-bot', self.divide_by_20),
- 'margt': ('pa', 'margin-top', self.divide_by_20),
- 'gutter': ('pa', 'gutter____', self.divide_by_20),
- 'paperw': ('pa', 'paper-widt', self.divide_by_20),
- 'paperh': ('pa', 'paper-hght', self.divide_by_20),
- 'annotation': ('an', 'annotation', self.default_func),
- 'ul': ('ul', 'underlined<continous', self.two_part_func),
- 'uld': ('ul', 'underlined<dotted', self.two_part_func),
- 'uldash': ('ul', 'underlined<dash', self.two_part_func),
- 'uldashd': ('ul', 'underlined<dash-dot', self.two_part_func),
- 'uldashdd': ('ul', 'underlined<dash-dot-dot', self.two_part_func),
- 'uldb': ('ul', 'underlined<double', self.two_part_func),
- 'ulhwave': ('ul', 'underlined<heavy-wave', self.two_part_func),
- 'ulldash': ('ul', 'underlined<long-dash', self.two_part_func),
- 'ulth': ('ul', 'underlined<thich', self.two_part_func),
- 'ulthd': ('ul', 'underlined<thick-dotted', self.two_part_func),
- 'ulthdash': ('ul', 'underlined<thick-dash', self.two_part_func),
- 'ulthdashd': ('ul', 'underlined<thick-dash-dot', self.two_part_func),
- 'ulthdashdd': ('ul', 'underlined<thick-dash-dot-dot', self.two_part_func),
- 'ulthldash': ('ul', 'underlined<thick-long-dash', self.two_part_func),
- 'ululdbwave': ('ul', 'underlined<double-wave', self.two_part_func),
- 'ulw': ('ul', 'underlined<word', self.two_part_func),
- 'ulwave': ('ul', 'underlined<wave', self.two_part_func),
- 'ulnone': ('ul', 'underlined<false', self.two_part_func),
- 'trbrdrh': ('bd', 'bor-t-r-hi', self.default_func),
- 'trbrdrv': ('bd', 'bor-t-r-vi', self.default_func),
- 'trbrdrt': ('bd', 'bor-t-r-to', self.default_func),
- 'trbrdrl': ('bd', 'bor-t-r-le', self.default_func),
- 'trbrdrb': ('bd', 'bor-t-r-bo', self.default_func),
- 'trbrdrr': ('bd', 'bor-t-r-ri', self.default_func),
- 'clbrdrb': ('bd', 'bor-cel-bo', self.default_func),
- 'clbrdrt': ('bd', 'bor-cel-to', self.default_func),
- 'clbrdrl': ('bd', 'bor-cel-le', self.default_func),
- 'clbrdrr': ('bd', 'bor-cel-ri', self.default_func),
- 'brdrb': ('bd', 'bor-par-bo', self.default_func),
- 'brdrt': ('bd', 'bor-par-to', self.default_func),
- 'brdrl': ('bd', 'bor-par-le', self.default_func),
- 'brdrr': ('bd', 'bor-par-ri', self.default_func),
- 'box': ('bd', 'bor-par-bx', self.default_func),
- 'chbrdr': ('bd', 'bor-par-bo', self.default_func),
- 'brdrbtw': ('bd', 'bor-for-ev', self.default_func),
- 'brdrbar': ('bd', 'bor-outsid', self.default_func),
- 'brdrnone': ('bd', 'bor-none__<false', self.two_part_func),
- 'brdrs': ('bt', 'bdr-single', self.default_func),
- 'brdrth': ('bt', 'bdr-doubtb', self.default_func),
- 'brdrsh': ('bt', 'bdr-shadow', self.default_func),
- 'brdrdb': ('bt', 'bdr-double', self.default_func),
- 'brdrdot': ('bt', 'bdr-dotted', self.default_func),
- 'brdrdash': ('bt', 'bdr-dashed', self.default_func),
- 'brdrhair': ('bt', 'bdr-hair__', self.default_func),
- 'brdrinset': ('bt', 'bdr-inset_', self.default_func),
- 'brdrdashsm': ('bt', 'bdr-das-sm', self.default_func),
- 'brdrdashd': ('bt', 'bdr-dot-sm', self.default_func),
- 'brdrdashdd': ('bt', 'bdr-dot-do', self.default_func),
- 'brdroutset': ('bt', 'bdr-outset', self.default_func),
- 'brdrtriple': ('bt', 'bdr-trippl', self.default_func),
- 'brdrtnthsg': ('bt', 'bdr-thsm__', self.default_func),
- 'brdrthtnsg': ('bt', 'bdr-htsm__', self.default_func),
- 'brdrtnthtnsg': ('bt', 'bdr-hthsm_', self.default_func),
- 'brdrtnthmg': ('bt', 'bdr-thm___', self.default_func),
- 'brdrthtnmg': ('bt', 'bdr-htm___', self.default_func),
- 'brdrtnthtnmg': ('bt', 'bdr-hthm__', self.default_func),
- 'brdrtnthlg': ('bt', 'bdr-thl___', self.default_func),
- 'brdrtnthtnlg': ('bt', 'bdr-hthl__', self.default_func),
- 'brdrwavy': ('bt', 'bdr-wavy__', self.default_func),
- 'brdrwavydb': ('bt', 'bdr-d-wav_', self.default_func),
- 'brdrdashdotstr': ('bt', 'bdr-strip_', self.default_func),
- 'brdremboss': ('bt', 'bdr-embos_', self.default_func),
- 'brdrengrave': ('bt', 'bdr-engra_', self.default_func),
- 'brdrframe': ('bt', 'bdr-frame_', self.default_func),
- 'brdrw': ('bt', 'bdr-li-wid', self.divide_by_20),
- 'brsp': ('bt', 'bdr-sp-wid', self.divide_by_20),
- 'brdrcf': ('bt', 'bdr-color_', self.default_func) }
- self._ProcessTokens__number_type_dict = {
- 0: 'Arabic',
- 1: 'uppercase Roman numeral',
- 2: 'lowercase Roman numeral',
- 3: 'uppercase letter',
- 4: 'lowercase letter',
- 5: 'ordinal number',
- 6: 'cardianl text number',
- 7: 'ordinal text number',
- 10: 'Kanji numbering without the digit character',
- 11: 'Kanji numbering with the digit character',
- 1246: 'phonetic Katakana characters in aiueo order',
- 1346: 'phonetic katakana characters in iroha order',
- 14: 'double byte character',
- 15: 'single byte character',
- 16: 'Kanji numbering 3',
- 17: 'Kanji numbering 4',
- 18: 'Circle numbering',
- 19: 'double-byte Arabic numbering',
- 2046: 'phonetic double-byte Katakana characters',
- 2146: 'phonetic double-byte katakana characters',
- 22: 'Arabic with leading zero',
- 23: 'bullet',
- 24: 'Korean numbering 2',
- 25: 'Korean numbering 1',
- 26: 'Chinese numbering 1',
- 27: 'Chinese numbering 2',
- 28: 'Chinese numbering 3',
- 29: 'Chinese numbering 4',
- 30: 'Chinese Zodiac numbering 1',
- 31: 'Chinese Zodiac numbering 2',
- 32: 'Chinese Zodiac numbering 3',
- 33: 'Taiwanese double-byte numbering 1',
- 34: 'Taiwanese double-byte numbering 2',
- 35: 'Taiwanese double-byte numbering 3',
- 36: 'Taiwanese double-byte numbering 4',
- 37: 'Chinese double-byte numbering 1',
- 38: 'Chinese double-byte numbering 2',
- 39: 'Chinese double-byte numbering 3',
- 40: 'Chinese double-byte numbering 4',
- 41: 'Korean double-byte numbering 1',
- 42: 'Korean double-byte numbering 2',
- 43: 'Korean double-byte numbering 3',
- 44: 'Korean double-byte numbering 4',
- 45: 'Hebrew non-standard decimal',
- 46: 'Arabic Alif Ba Tah',
- 47: 'Hebrew Biblical standard',
- 48: 'Arabic Abjad style',
- 255: 'No number' }
- self._ProcessTokens__language_dict = {
- 1078: 'Afrikaans',
- 1052: 'Albanian',
- 1025: 'Arabic',
- 5121: 'Arabic Algeria',
- 15361: 'Arabic Bahrain',
- 3073: 'Arabic Egypt',
- 1: 'Arabic General',
- 2049: 'Arabic Iraq',
- 11265: 'Arabic Jordan',
- 13313: 'Arabic Kuwait',
- 12289: 'Arabic Lebanon',
- 4097: 'Arabic Libya',
- 6145: 'Arabic Morocco',
- 8193: 'Arabic Oman',
- 16385: 'Arabic Qatar',
- 10241: 'Arabic Syria',
- 7169: 'Arabic Tunisia',
- 14337: 'Arabic U.A.E.',
- 9217: 'Arabic Yemen',
- 1067: 'Armenian',
- 1101: 'Assamese',
- 2092: 'Azeri Cyrillic',
- 1068: 'Azeri Latin',
- 1069: 'Basque',
- 1093: 'Bengali',
- 4122: 'Bosnia Herzegovina',
- 1026: 'Bulgarian',
- 1109: 'Burmese',
- 1059: 'Byelorussian',
- 1027: 'Catalan',
- 2052: 'Chinese China',
- 4: 'Chinese General',
- 3076: 'Chinese Hong Kong',
- 4100: 'Chinese Singapore',
- 1028: 'Chinese Taiwan',
- 1050: 'Croatian',
- 1029: 'Czech',
- 1030: 'Danish',
- 2067: 'Dutch Belgium',
- 1043: 'Dutch Standard',
- 3081: 'English Australia',
- 10249: 'English Belize',
- 2057: 'English British',
- 4105: 'English Canada',
- 9225: 'English Caribbean',
- 9: 'English General',
- 6153: 'English Ireland',
- 8201: 'English Jamaica',
- 5129: 'English New Zealand',
- 13321: 'English Philippines',
- 7177: 'English South Africa',
- 11273: 'English Trinidad',
- 1033: 'English United States',
- 1061: 'Estonian',
- 1080: 'Faerose',
- 1065: 'Farsi',
- 1035: 'Finnish',
- 1036: 'French',
- 2060: 'French Belgium',
- 11276: 'French Cameroon',
- 3084: 'French Canada',
- 12300: "French Cote d'Ivoire",
- 5132: 'French Luxembourg',
- 13324: 'French Mali',
- 6156: 'French Monaco',
- 8204: 'French Reunion',
- 10252: 'French Senegal',
- 4108: 'French Swiss',
- 7180: 'French West Indies',
- 9228: 'French Democratic Republic of the Congo',
- 1122: 'Frisian',
- 1084: 'Gaelic',
- 2108: 'Gaelic Ireland',
- 1110: 'Galician',
- 1079: 'Georgian',
- 1031: 'German',
- 3079: 'German Austrian',
- 5127: 'German Liechtenstein',
- 4103: 'German Luxembourg',
- 2055: 'German Switzerland',
- 1032: 'Greek',
- 1095: 'Gujarati',
- 1037: 'Hebrew',
- 1081: 'Hindi',
- 1038: 'Hungarian',
- 1039: 'Icelandic',
- 1057: 'Indonesian',
- 1040: 'Italian',
- 2064: 'Italian Switzerland',
- 1041: 'Japanese',
- 1099: 'Kannada',
- 1120: 'Kashmiri',
- 2144: 'Kashmiri India',
- 1087: 'Kazakh',
- 1107: 'Khmer',
- 1088: 'Kirghiz',
- 1111: 'Konkani',
- 1042: 'Korean',
- 2066: 'Korean Johab',
- 1108: 'Lao',
- 1062: 'Latvian',
- 1063: 'Lithuanian',
- 2087: 'Lithuanian Classic',
- 1086: 'Malay',
- 2110: 'Malay Brunei Darussalam',
- 1100: 'Malayalam',
- 1082: 'Maltese',
- 1112: 'Manipuri',
- 1102: 'Marathi',
- 1104: 'Mongolian',
- 1121: 'Nepali',
- 2145: 'Nepali India',
- 1044: 'Norwegian Bokmal',
- 2068: 'Norwegian Nynorsk',
- 1096: 'Oriya',
- 1045: 'Polish',
- 1046: 'Portuguese (Brazil)',
- 2070: 'Portuguese (Portugal)',
- 1094: 'Punjabi',
- 1047: 'Rhaeto-Romanic',
- 1048: 'Romanian',
- 2072: 'Romanian Moldova',
- 1049: 'Russian',
- 2073: 'Russian Moldova',
- 1083: 'Sami Lappish',
- 1103: 'Sanskrit',
- 3098: 'Serbian Cyrillic',
- 2074: 'Serbian Latin',
- 1113: 'Sindhi',
- 1051: 'Slovak',
- 1060: 'Slovenian',
- 1070: 'Sorbian',
- 11274: 'Spanish Argentina',
- 16394: 'Spanish Bolivia',
- 13322: 'Spanish Chile',
- 9226: 'Spanish Colombia',
- 5130: 'Spanish Costa Rica',
- 7178: 'Spanish Dominican Republic',
- 12298: 'Spanish Ecuador',
- 17418: 'Spanish El Salvador',
- 4106: 'Spanish Guatemala',
- 18442: 'Spanish Honduras',
- 2058: 'Spanish Mexico',
- 3082: 'Spanish Modern',
- 19466: 'Spanish Nicaragua',
- 6154: 'Spanish Panama',
- 15370: 'Spanish Paraguay',
- 10250: 'Spanish Peru',
- 20490: 'Spanish Puerto Rico',
- 1034: 'Spanish Traditional',
- 14346: 'Spanish Uruguay',
- 8202: 'Spanish Venezuela',
- 1072: 'Sutu',
- 1089: 'Swahili',
- 1053: 'Swedish',
- 2077: 'Swedish Finland',
- 1064: 'Tajik',
- 1097: 'Tamil',
- 1092: 'Tatar',
- 1098: 'Telugu',
- 1054: 'Thai',
- 1105: 'Tibetan',
- 1073: 'Tsonga',
- 1074: 'Tswana',
- 1055: 'Turkish',
- 1090: 'Turkmen',
- 1058: 'Ukranian',
- 1056: 'Urdu',
- 2080: 'Urdu India',
- 2115: 'Uzbek Cyrillic',
- 1091: 'Uzbek Latin',
- 1075: 'Venda',
- 1066: 'Vietnamese',
- 1106: 'Welsh',
- 1076: 'Xhosa',
- 1085: 'Yiddish',
- 1077: 'Zulu',
- 1024: 'Unkown',
- 255: 'Unkown' }
-
-
- def __ms_hex_func(self, pre, token, num):
- num = num[1:]
- num = num.upper()
- return "tx<hx<__________<'%s\n" % num
-
-
- def ms_sub_func(self, pre, token, num):
- return 'tx<mc<__________<%s\n' % token
-
-
- def default_func(self, pre, token, num):
- if num == None:
- num = 'true'
-
- return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
-
-
- def __list_type_func(self, pre, token, num):
- type = 'arabic'
- if num == None:
- type = 'Arabic'
- else:
-
- try:
- num = int(num)
- except ValueError:
- if self._ProcessTokens__run_level > 3:
- msg = 'number "%s" cannot be converted to integer\n' % num
- raise self._ProcessTokens__bug_handler, msg
- self._ProcessTokens__run_level > 3
-
- type = self._ProcessTokens__number_type_dict.get(num)
- if type == None:
- if self._ProcessTokens__run_level > 3:
- msg = 'No type for "%s" in self.__number_type_dict\n'
- raise self._ProcessTokens__bug_handler
- self._ProcessTokens__run_level > 3
- type = 'Arabic'
-
- return 'cw<%s<%s<nu<%s\n' % (pre, token, type)
-
-
- def __language_func(self, pre, token, num):
- lang_name = self._ProcessTokens__language_dict.get(int(re.search('[0-9]+', num).group()))
- if not lang_name:
- lang_name = 'not defined'
- if self._ProcessTokens__run_level > 3:
- msg = 'No entry for number "%s"' % num
- raise self._ProcessTokens__bug_handler, msg
- self._ProcessTokens__run_level > 3
-
- return 'cw<%s<%s<nu<%s\n' % (pre, token, lang_name)
-
-
- def two_part_func(self, pre, token, num):
- list = token.split('<')
- token = list[0]
- num = list[1]
- return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
-
-
- def divide_by_2(self, pre, token, num):
- num = self.divide_num(num, 2)
- return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
-
-
- def divide_by_20(self, pre, token, num):
- num = self.divide_num(num, 20)
- return 'cw<%s<%s<nu<%s\n' % (pre, token, num)
-
-
- def text_func(self, pre, token, num = None):
- return 'tx<nu<__________<%s\n' % token
-
-
- def ob_func(self, pre, token, num = None):
- self._ProcessTokens__bracket_count += 1
- return 'ob<nu<open-brack<%04d\n' % self._ProcessTokens__bracket_count
-
-
- def cb_func(self, pre, token, num = None):
- line = 'cb<nu<clos-brack<%04d\n' % self._ProcessTokens__bracket_count
- self._ProcessTokens__bracket_count -= 1
- return line
-
-
- def color_func(self, pre, token, num):
- third_field = 'nu'
- if num[-1] == ';':
- num = num[:-1]
- third_field = 'en'
-
- num = str('%X' % int(num))
- if len(num) != 2:
- num = '0' + num
-
- return 'cw<%s<%s<%s<%s\n' % (pre, token, third_field, num)
-
-
- def bool_st_func(self, pre, token, num):
- if num is None and num == '' or num == '1':
- return 'cw<%s<%s<nu<true\n' % (pre, token)
- if num == '0':
- return 'cw<%s<%s<nu<false\n' % (pre, token)
- msg = 'boolean should have some value module process tokens\n'
- msg += 'token is ' + token + '\n'
- msg += "'" + num + "'" + '\n'
- raise self._ProcessTokens__bug_handler, msg
-
-
- def __no_sup_sub_func(self, pre, token, num):
- the_string = 'cw<ci<subscript_<nu<false\n'
- the_string += 'cw<ci<superscrip<nu<false\n'
- return the_string
-
-
- def divide_num(self, numerator, denominator):
-
- try:
- numerator = float(re.search('[0-9.]+', numerator).group())
- except TypeError:
- msg = None
- if self._ProcessTokens__run_level > 3:
- msg = 'no number to process?\n'
- msg += 'this indicates that the token '
- msg += ' \\(\\li\\) should have a number and does not\n'
- msg += 'numerator is "%s"\n' % numerator
- msg += 'denominator is "%s"\n' % denominator
- raise self._ProcessTokens__bug_handler, msg
- self._ProcessTokens__run_level > 3
- if 5 > self._ProcessTokens__return_code:
- self._ProcessTokens__return_code = 5
-
- return 0
-
- num = '%0.2f' % round(numerator / denominator, 2)
- return num
- string_num = str(num)
- if string_num[-2:] == '.0':
- string_num = string_num[:-2]
-
- return string_num
-
-
- def split_let_num(self, token):
- match_obj = re.search(self._ProcessTokens__num_exp, token)
- if match_obj != None:
- first = match_obj.group(1)
- second = match_obj.group(2)
- if not second:
- if self._ProcessTokens__run_level > 3:
- msg = "token is '%s' \n" % token
- raise self._ProcessTokens__bug_handler, msg
- self._ProcessTokens__run_level > 3
- return (first, 0)
- elif self._ProcessTokens__run_level > 3:
- msg = "token is '%s' \n" % token
- raise self._ProcessTokens__bug_handler
-
- return (token, 0)
- return (first, second)
-
-
- def convert_to_hex(self, number):
- num = int(number)
-
- try:
- hex_num = '%X' % num
- return hex_num
- except:
- raise self._ProcessTokens__bug_handler
-
-
-
- def process_cw(self, token):
- special = [
- '*',
- ':',
- '}',
- '{',
- '~',
- '_',
- '-',
- ';']
- token = token[1:]
- token = token.replace(' ', '')
- only_alpha = token.isalpha()
- num = None
- if not only_alpha and token not in special:
- (token, num) = self.split_let_num(token)
-
- (pre, token, action) = self.dict_token.get(token, (None, None, None))
- if action:
- return action(pre, token, num)
-
-
- def initiate_token_actions(self):
- self.action_for_token = {
- '{': self.ob_func,
- '}': self.cb_func,
- '\\': self.process_cw }
-
-
- def evaluate_token(self, token):
- (token, action) = self.dict_token.get(token[0:1])
- if action:
- line = action(token)
- return line
- return 'tx<nu<nu<nu<nu<%s\n' % token
-
-
- def __check_brackets(self, in_file):
- self._ProcessTokens__check_brack_obj = check_brackets.CheckBrackets(file = in_file)
- good_br = self._ProcessTokens__check_brack_obj.check_brackets()[0]
- if not good_br:
- return 1
-
-
- def process_tokens(self):
- first_token = 0
- second_token = 0
- read_obj = open(self._ProcessTokens__file, 'r')
- write_obj = open(self._ProcessTokens__write_to, 'w')
- line_to_read = 'dummy'
- line_count = 0
- while line_to_read:
- line_to_read = read_obj.readline()
- token = line_to_read
- token = token.replace('\n', '')
- if not token:
- continue
-
- line_count += 1
-
- try:
- token.decode('us-ascii')
- except UnicodeError:
- msg = None
- msg = str(msg)
- msg += 'Invalid RTF: File not ascii encoded.\n'
- raise self._ProcessTokens__exception_handler, msg
-
- if not first_token:
- if token != '\\{':
- msg = "Invalid RTF: document doesn't start with {\n"
- raise self._ProcessTokens__exception_handler, msg
- token != '\\{'
- first_token = 1
- elif first_token and not second_token:
- if token[0:4] != '\\rtf':
- msg = "Invalid RTF: document doesn't start with \\rtf \n"
- raise self._ProcessTokens__exception_handler, msg
- token[0:4] != '\\rtf'
- second_token = 1
-
- the_index = token.find('\\ ')
- if token != None and the_index > -1:
- msg = 'Invalid RTF: token "\\ " not valid. \n'
- raise self._ProcessTokens__exception_handler, msg
- the_index > -1
- if token[0:1] == '\\':
- line = self.process_cw(token)
- if line != None:
- write_obj.write(line)
-
- line != None
- fields = re.split(self._ProcessTokens__utf_exp, token)
- for field in fields:
- if not field:
- continue
-
- if field[0:1] == '&':
- write_obj.write('tx<ut<__________<%s\n' % field)
- continue
- write_obj.write('tx<nu<__________<%s\n' % field)
-
- read_obj.close()
- write_obj.close()
- if not line_count:
- msg = 'Invalid RTF: file appears to be empty. \n'
- raise self._ProcessTokens__exception_handler, msg
- line_count
- copy_obj = copy.Copy(bug_handler = self._ProcessTokens__bug_handler)
- if self._ProcessTokens__copy:
- copy_obj.copy_file(self._ProcessTokens__write_to, 'processed_tokens.data')
-
- copy_obj.rename(self._ProcessTokens__write_to, self._ProcessTokens__file)
- os.remove(self._ProcessTokens__write_to)
- bad_brackets = self._ProcessTokens__check_brackets(self._ProcessTokens__file)
- if bad_brackets:
- msg = 'Invalid RTF: document does not have matching brackets.\n'
- raise self._ProcessTokens__exception_handler, msg
- bad_brackets
- return self._ProcessTokens__return_code
-
-
-