home *** CD-ROM | disk | FTP | other *** search
Wrap
# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) import sys import os import tempfile import cStringIO from calibre.ebooks.rtf2xml import get_char_map, copy from calibre.ebooks.rtf2xml.char_set import char_set class Hex2Utf8: def __init__(self, in_file, area_to_convert, char_file, default_char_map, bug_handler, invalid_rtf_handler, copy = None, temp_dir = None, symbol = None, wingdings = None, caps = None, convert_caps = None, dingbats = None, run_level = 1): self._Hex2Utf8__file = in_file self._Hex2Utf8__copy = copy if area_to_convert != 'preamble' and area_to_convert != 'body': msg = 'Developer error! Wrong flag.\nin module "hex_2_utf8.py\n"area_to_convert" must be "body" or "preamble"\n' raise self._Hex2Utf8__bug_handler, msg area_to_convert != 'body' self._Hex2Utf8__char_file = char_file self._Hex2Utf8__area_to_convert = area_to_convert self._Hex2Utf8__default_char_map = default_char_map self._Hex2Utf8__symbol = symbol self._Hex2Utf8__wingdings = wingdings self._Hex2Utf8__dingbats = dingbats self._Hex2Utf8__caps = caps self._Hex2Utf8__convert_caps = 0 self._Hex2Utf8__convert_symbol = 0 self._Hex2Utf8__convert_wingdings = 0 self._Hex2Utf8__convert_zapf = 0 self._Hex2Utf8__run_level = run_level self._Hex2Utf8__write_to = tempfile.mktemp() self._Hex2Utf8__bug_handler = bug_handler self._Hex2Utf8__invalid_rtf_handler = invalid_rtf_handler def update_values(self, file, area_to_convert, char_file, convert_caps, convert_symbol, convert_wingdings, convert_zapf, copy = None, temp_dir = None, symbol = None, wingdings = None, caps = None, dingbats = None): self._Hex2Utf8__file = file self._Hex2Utf8__copy = copy if area_to_convert != 'preamble' and area_to_convert != 'body': msg = 'in module "hex_2_utf8.py\n"area_to_convert" must be "body" or "preamble"\n' raise self._Hex2Utf8__bug_handler, msg area_to_convert != 'body' self._Hex2Utf8__area_to_convert = area_to_convert self._Hex2Utf8__symbol = symbol self._Hex2Utf8__wingdings = wingdings self._Hex2Utf8__dingbats = dingbats self._Hex2Utf8__caps = caps self._Hex2Utf8__convert_caps = convert_caps self._Hex2Utf8__convert_symbol = convert_symbol self._Hex2Utf8__convert_wingdings = convert_wingdings self._Hex2Utf8__convert_zapf = convert_zapf def __initiate_values(self): self._Hex2Utf8__char_file = cStringIO.StringIO(char_set) char_map_obj = get_char_map.GetCharMap(char_file = self._Hex2Utf8__char_file, bug_handler = self._Hex2Utf8__bug_handler) up_128_dict = char_map_obj.get_char_map(map = self._Hex2Utf8__default_char_map) bt_128_dict = char_map_obj.get_char_map(map = 'bottom_128') ms_standard_dict = char_map_obj.get_char_map(map = 'ms_standard') self._Hex2Utf8__def_dict = { } self._Hex2Utf8__def_dict.update(up_128_dict) self._Hex2Utf8__def_dict.update(bt_128_dict) self._Hex2Utf8__def_dict.update(ms_standard_dict) self._Hex2Utf8__current_dict = self._Hex2Utf8__def_dict self._Hex2Utf8__current_dict_name = 'default' self._Hex2Utf8__in_caps = 0 self._Hex2Utf8__special_fonts_found = 0 if self._Hex2Utf8__symbol: symbol_base_dict = char_map_obj.get_char_map(map = 'SYMBOL') ms_symbol_dict = char_map_obj.get_char_map(map = 'ms_symbol') self._Hex2Utf8__symbol_dict = { } self._Hex2Utf8__symbol_dict.update(symbol_base_dict) self._Hex2Utf8__symbol_dict.update(ms_symbol_dict) if self._Hex2Utf8__wingdings: wingdings_base_dict = char_map_obj.get_char_map(map = 'wingdings') ms_wingdings_dict = char_map_obj.get_char_map(map = 'ms_wingdings') self._Hex2Utf8__wingdings_dict = { } self._Hex2Utf8__wingdings_dict.update(wingdings_base_dict) self._Hex2Utf8__wingdings_dict.update(ms_wingdings_dict) if self._Hex2Utf8__dingbats: dingbats_base_dict = char_map_obj.get_char_map(map = 'dingbats') ms_dingbats_dict = char_map_obj.get_char_map(map = 'ms_dingbats') self._Hex2Utf8__dingbats_dict = { } self._Hex2Utf8__dingbats_dict.update(dingbats_base_dict) self._Hex2Utf8__dingbats_dict.update(ms_dingbats_dict) self._Hex2Utf8__caps_uni_dict = char_map_obj.get_char_map(map = 'caps_uni') self._Hex2Utf8__preamble_state_dict = { 'preamble': self._Hex2Utf8__preamble_func, 'body': self._Hex2Utf8__body_func, 'mi<mk<body-open_': self._Hex2Utf8__found_body_func, 'tx<hx<__________': self._Hex2Utf8__hex_text_func } self._Hex2Utf8__body_state_dict = { 'preamble': self._Hex2Utf8__preamble_for_body_func, 'body': self._Hex2Utf8__body_for_body_func } self._Hex2Utf8__in_body_dict = { 'mi<mk<body-open_': self._Hex2Utf8__found_body_func, 'tx<ut<__________': self._Hex2Utf8__utf_to_caps_func, 'tx<hx<__________': self._Hex2Utf8__hex_text_func, 'tx<mc<__________': self._Hex2Utf8__hex_text_func, 'tx<nu<__________': self._Hex2Utf8__text_func, 'mi<mk<font______': self._Hex2Utf8__start_font_func, 'mi<mk<caps______': self._Hex2Utf8__start_caps_func, 'mi<mk<font-end__': self._Hex2Utf8__end_font_func, 'mi<mk<caps-end__': self._Hex2Utf8__end_caps_func } self._Hex2Utf8__caps_list = [ 'false'] self._Hex2Utf8__font_list = [ 'not-defined'] def __hex_text_func(self, line): hex_num = line[17:-1] converted = self._Hex2Utf8__current_dict.get(hex_num) if converted != None: if converted[0:1] == '&': font = self._Hex2Utf8__current_dict_name if self._Hex2Utf8__convert_caps and self._Hex2Utf8__caps_list[-1] == 'true' and font != 'Symbol' and font != 'Wingdings' and font != 'Zapf Dingbats': converted = self._Hex2Utf8__utf_token_to_caps_func(converted) self._Hex2Utf8__write_obj.write('tx<ut<__________<%s\n' % converted) else: font = self._Hex2Utf8__current_dict_name if self._Hex2Utf8__convert_caps and self._Hex2Utf8__caps_list[-1] == 'true' and font != 'Symbol' and font != 'Wingdings' and font != 'Zapf Dingbats': converted = converted.upper() self._Hex2Utf8__write_obj.write('tx<nu<__________<%s\n' % converted) else: token = hex_num.replace("'", '') the_num = 0 if token: the_num = int(token, 16) if the_num > 10: self._Hex2Utf8__write_obj.write('mi<tg<empty-att_<udef_symbol<num>%s<description>not-in-table\n' % hex_num) if self._Hex2Utf8__run_level > 4: msg = 'Character "%s;" does not appear to be valid (or is a control character)\n' % token raise self._Hex2Utf8__bug_handler, msg self._Hex2Utf8__run_level > 4 def __found_body_func(self, line): self._Hex2Utf8__state = 'body' self._Hex2Utf8__write_obj.write(line) def __body_func(self, line): self._Hex2Utf8__write_obj.write(line) def __preamble_func(self, line): action = self._Hex2Utf8__preamble_state_dict.get(self._Hex2Utf8__token_info) if action != None: action(line) else: self._Hex2Utf8__write_obj.write(line) def __convert_preamble(self): self._Hex2Utf8__state = 'preamble' read_obj = open(self._Hex2Utf8__file, 'r') self._Hex2Utf8__write_obj = open(self._Hex2Utf8__write_to, 'w') line_to_read = 1 while line_to_read: line_to_read = read_obj.readline() line = line_to_read self._Hex2Utf8__token_info = line[:16] action = self._Hex2Utf8__preamble_state_dict.get(self._Hex2Utf8__state) if action == None: sys.stderr.write('error no state found in hex_2_utf8', self._Hex2Utf8__state) action(line) read_obj.close() self._Hex2Utf8__write_obj.close() copy_obj = copy.Copy(bug_handler = self._Hex2Utf8__bug_handler) if self._Hex2Utf8__copy: copy_obj.copy_file(self._Hex2Utf8__write_to, 'preamble_utf_convert.data') copy_obj.rename(self._Hex2Utf8__write_to, self._Hex2Utf8__file) os.remove(self._Hex2Utf8__write_to) def __preamble_for_body_func(self, line): if self._Hex2Utf8__token_info == 'mi<mk<body-open_': self._Hex2Utf8__found_body_func(line) self._Hex2Utf8__write_obj.write(line) def __body_for_body_func(self, line): action = self._Hex2Utf8__in_body_dict.get(self._Hex2Utf8__token_info) if action != None: action(line) else: self._Hex2Utf8__write_obj.write(line) def __start_font_func(self, line): face = line[17:-1] self._Hex2Utf8__font_list.append(face) if face == 'Symbol' and self._Hex2Utf8__convert_symbol: self._Hex2Utf8__current_dict_name = 'Symbol' self._Hex2Utf8__current_dict = self._Hex2Utf8__symbol_dict elif face == 'Wingdings' and self._Hex2Utf8__convert_wingdings: self._Hex2Utf8__current_dict_name = 'Wingdings' self._Hex2Utf8__current_dict = self._Hex2Utf8__wingdings_dict elif face == 'Zapf Dingbats' and self._Hex2Utf8__convert_zapf: self._Hex2Utf8__current_dict_name = 'Zapf Dingbats' self._Hex2Utf8__current_dict = self._Hex2Utf8__dingbats_dict else: self._Hex2Utf8__current_dict_name = 'default' self._Hex2Utf8__current_dict = self._Hex2Utf8__def_dict def __end_font_func(self, line): if len(self._Hex2Utf8__font_list) > 1: self._Hex2Utf8__font_list.pop() else: sys.stderr.write('module is hex_2_utf8\n') sys.stderr.write('method is end_font_func\n') sys.stderr.write('self.__font_list should be greater than one?\n') face = self._Hex2Utf8__font_list[-1] if face == 'Symbol' and self._Hex2Utf8__convert_symbol: self._Hex2Utf8__current_dict_name = 'Symbol' self._Hex2Utf8__current_dict = self._Hex2Utf8__symbol_dict elif face == 'Wingdings' and self._Hex2Utf8__convert_wingdings: self._Hex2Utf8__current_dict_name = 'Wingdings' self._Hex2Utf8__current_dict = self._Hex2Utf8__wingdings_dict elif face == 'Zapf Dingbats' and self._Hex2Utf8__convert_zapf: self._Hex2Utf8__current_dict_name = 'Zapf Dingbats' self._Hex2Utf8__current_dict = self._Hex2Utf8__dingbats_dict else: self._Hex2Utf8__current_dict_name = 'default' self._Hex2Utf8__current_dict = self._Hex2Utf8__def_dict def __start_special_font_func_old(self, line): if self._Hex2Utf8__token_info == 'mi<mk<font-symbo': self._Hex2Utf8__current_dict.append(self._Hex2Utf8__symbol_dict) self._Hex2Utf8__special_fonts_found += 1 self._Hex2Utf8__current_dict_name = 'Symbol' elif self._Hex2Utf8__token_info == 'mi<mk<font-wingd': self._Hex2Utf8__special_fonts_found += 1 self._Hex2Utf8__current_dict.append(self._Hex2Utf8__wingdings_dict) self._Hex2Utf8__current_dict_name = 'Wingdings' elif self._Hex2Utf8__token_info == 'mi<mk<font-dingb': self._Hex2Utf8__current_dict.append(self._Hex2Utf8__dingbats_dict) self._Hex2Utf8__special_fonts_found += 1 self._Hex2Utf8__current_dict_name = 'Zapf Dingbats' def __end_special_font_func(self, line): pass def __start_caps_func_old(self, line): self._Hex2Utf8__in_caps = 1 def __start_caps_func(self, line): self._Hex2Utf8__in_caps = 1 value = line[17:-1] self._Hex2Utf8__caps_list.append(value) def __end_caps_func(self, line): if len(self._Hex2Utf8__caps_list) > 1: self._Hex2Utf8__caps_list.pop() else: sys.stderr.write('Module is hex_2_utf8\n') sys.stderr.write('method is __end_caps_func\n') sys.stderr.write('caps list should be more than one?\n') def __text_func(self, line): text = line[17:-1] if self._Hex2Utf8__current_dict_name == 'Symbol' and self._Hex2Utf8__current_dict_name == 'Wingdings' or self._Hex2Utf8__current_dict_name == 'Zapf Dingbats': the_string = '' for letter in text: hex_num = hex(ord(letter)) hex_num = str(hex_num) hex_num = hex_num.upper() hex_num = hex_num[2:] hex_num = "'%s" % hex_num converted = self._Hex2Utf8__current_dict.get(hex_num) if converted == None: sys.stderr.write('module is hex_2_ut8\n') sys.stderr.write('method is __text_func\n') sys.stderr.write('no hex value for "%s"\n' % hex_num) continue the_string += converted self._Hex2Utf8__write_obj.write('tx<nu<__________<%s\n' % the_string) elif self._Hex2Utf8__caps_list[-1] == 'true' and self._Hex2Utf8__convert_caps and self._Hex2Utf8__current_dict_name != 'Symbol' and self._Hex2Utf8__current_dict_name != 'Wingdings' and self._Hex2Utf8__current_dict_name != 'Zapf Dingbats': text = text.upper() self._Hex2Utf8__write_obj.write('tx<nu<__________<%s\n' % text) def __utf_to_caps_func(self, line): utf_text = line[17:-1] if self._Hex2Utf8__caps_list[-1] == 'true' and self._Hex2Utf8__convert_caps: utf_text = self._Hex2Utf8__utf_token_to_caps_func(utf_text) self._Hex2Utf8__write_obj.write('tx<ut<__________<%s\n' % utf_text) def __utf_token_to_caps_func(self, char_entity): hex_num = char_entity[3:] length = len(hex_num) if length == 3: hex_num = '00%s' % hex_num elif length == 4: hex_num = '0%s' % hex_num new_char_entity = '%s' % hex_num converted = self._Hex2Utf8__caps_uni_dict.get(new_char_entity) if not converted: return char_entity return converted def __convert_body(self): self._Hex2Utf8__state = 'body' read_obj = open(self._Hex2Utf8__file, 'r') self._Hex2Utf8__write_obj = open(self._Hex2Utf8__write_to, 'w') line_to_read = 1 while line_to_read: line_to_read = read_obj.readline() line = line_to_read self._Hex2Utf8__token_info = line[:16] action = self._Hex2Utf8__body_state_dict.get(self._Hex2Utf8__state) if action == None: sys.stderr.write('error no state found in hex_2_utf8', self._Hex2Utf8__state) action(line) read_obj.close() self._Hex2Utf8__write_obj.close() copy_obj = copy.Copy(bug_handler = self._Hex2Utf8__bug_handler) if self._Hex2Utf8__copy: copy_obj.copy_file(self._Hex2Utf8__write_to, 'body_utf_convert.data') copy_obj.rename(self._Hex2Utf8__write_to, self._Hex2Utf8__file) os.remove(self._Hex2Utf8__write_to) def convert_hex_2_utf8(self): self._Hex2Utf8__initiate_values() if self._Hex2Utf8__area_to_convert == 'preamble': self._Hex2Utf8__convert_preamble() else: self._Hex2Utf8__convert_body()