home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_1107 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  13.6 KB  |  360 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. import sys
  5. import os
  6. import tempfile
  7. import cStringIO
  8. from calibre.ebooks.rtf2xml import get_char_map, copy
  9. from calibre.ebooks.rtf2xml.char_set import char_set
  10.  
  11. class Hex2Utf8:
  12.     
  13.     def __init__(self, in_file, area_to_convert, char_file, default_char_map, bug_handler, invalid_rtf_handler, copy = None, temp_dir = None, symbol = None, wingdings = None, caps = None, convert_caps = None, dingbats = None, run_level = 1):
  14.         self._Hex2Utf8__file = in_file
  15.         self._Hex2Utf8__copy = copy
  16.         if area_to_convert != 'preamble' and area_to_convert != 'body':
  17.             msg = 'Developer error! Wrong flag.\nin module "hex_2_utf8.py\n"area_to_convert" must be "body" or "preamble"\n'
  18.             raise self._Hex2Utf8__bug_handler, msg
  19.         area_to_convert != 'body'
  20.         self._Hex2Utf8__char_file = char_file
  21.         self._Hex2Utf8__area_to_convert = area_to_convert
  22.         self._Hex2Utf8__default_char_map = default_char_map
  23.         self._Hex2Utf8__symbol = symbol
  24.         self._Hex2Utf8__wingdings = wingdings
  25.         self._Hex2Utf8__dingbats = dingbats
  26.         self._Hex2Utf8__caps = caps
  27.         self._Hex2Utf8__convert_caps = 0
  28.         self._Hex2Utf8__convert_symbol = 0
  29.         self._Hex2Utf8__convert_wingdings = 0
  30.         self._Hex2Utf8__convert_zapf = 0
  31.         self._Hex2Utf8__run_level = run_level
  32.         self._Hex2Utf8__write_to = tempfile.mktemp()
  33.         self._Hex2Utf8__bug_handler = bug_handler
  34.         self._Hex2Utf8__invalid_rtf_handler = invalid_rtf_handler
  35.  
  36.     
  37.     def update_values(self, file, area_to_convert, char_file, convert_caps, convert_symbol, convert_wingdings, convert_zapf, copy = None, temp_dir = None, symbol = None, wingdings = None, caps = None, dingbats = None):
  38.         self._Hex2Utf8__file = file
  39.         self._Hex2Utf8__copy = copy
  40.         if area_to_convert != 'preamble' and area_to_convert != 'body':
  41.             msg = 'in module "hex_2_utf8.py\n"area_to_convert" must be "body" or "preamble"\n'
  42.             raise self._Hex2Utf8__bug_handler, msg
  43.         area_to_convert != 'body'
  44.         self._Hex2Utf8__area_to_convert = area_to_convert
  45.         self._Hex2Utf8__symbol = symbol
  46.         self._Hex2Utf8__wingdings = wingdings
  47.         self._Hex2Utf8__dingbats = dingbats
  48.         self._Hex2Utf8__caps = caps
  49.         self._Hex2Utf8__convert_caps = convert_caps
  50.         self._Hex2Utf8__convert_symbol = convert_symbol
  51.         self._Hex2Utf8__convert_wingdings = convert_wingdings
  52.         self._Hex2Utf8__convert_zapf = convert_zapf
  53.  
  54.     
  55.     def __initiate_values(self):
  56.         self._Hex2Utf8__char_file = cStringIO.StringIO(char_set)
  57.         char_map_obj = get_char_map.GetCharMap(char_file = self._Hex2Utf8__char_file, bug_handler = self._Hex2Utf8__bug_handler)
  58.         up_128_dict = char_map_obj.get_char_map(map = self._Hex2Utf8__default_char_map)
  59.         bt_128_dict = char_map_obj.get_char_map(map = 'bottom_128')
  60.         ms_standard_dict = char_map_obj.get_char_map(map = 'ms_standard')
  61.         self._Hex2Utf8__def_dict = { }
  62.         self._Hex2Utf8__def_dict.update(up_128_dict)
  63.         self._Hex2Utf8__def_dict.update(bt_128_dict)
  64.         self._Hex2Utf8__def_dict.update(ms_standard_dict)
  65.         self._Hex2Utf8__current_dict = self._Hex2Utf8__def_dict
  66.         self._Hex2Utf8__current_dict_name = 'default'
  67.         self._Hex2Utf8__in_caps = 0
  68.         self._Hex2Utf8__special_fonts_found = 0
  69.         if self._Hex2Utf8__symbol:
  70.             symbol_base_dict = char_map_obj.get_char_map(map = 'SYMBOL')
  71.             ms_symbol_dict = char_map_obj.get_char_map(map = 'ms_symbol')
  72.             self._Hex2Utf8__symbol_dict = { }
  73.             self._Hex2Utf8__symbol_dict.update(symbol_base_dict)
  74.             self._Hex2Utf8__symbol_dict.update(ms_symbol_dict)
  75.         
  76.         if self._Hex2Utf8__wingdings:
  77.             wingdings_base_dict = char_map_obj.get_char_map(map = 'wingdings')
  78.             ms_wingdings_dict = char_map_obj.get_char_map(map = 'ms_wingdings')
  79.             self._Hex2Utf8__wingdings_dict = { }
  80.             self._Hex2Utf8__wingdings_dict.update(wingdings_base_dict)
  81.             self._Hex2Utf8__wingdings_dict.update(ms_wingdings_dict)
  82.         
  83.         if self._Hex2Utf8__dingbats:
  84.             dingbats_base_dict = char_map_obj.get_char_map(map = 'dingbats')
  85.             ms_dingbats_dict = char_map_obj.get_char_map(map = 'ms_dingbats')
  86.             self._Hex2Utf8__dingbats_dict = { }
  87.             self._Hex2Utf8__dingbats_dict.update(dingbats_base_dict)
  88.             self._Hex2Utf8__dingbats_dict.update(ms_dingbats_dict)
  89.         
  90.         self._Hex2Utf8__caps_uni_dict = char_map_obj.get_char_map(map = 'caps_uni')
  91.         self._Hex2Utf8__preamble_state_dict = {
  92.             'preamble': self._Hex2Utf8__preamble_func,
  93.             'body': self._Hex2Utf8__body_func,
  94.             'mi<mk<body-open_': self._Hex2Utf8__found_body_func,
  95.             'tx<hx<__________': self._Hex2Utf8__hex_text_func }
  96.         self._Hex2Utf8__body_state_dict = {
  97.             'preamble': self._Hex2Utf8__preamble_for_body_func,
  98.             'body': self._Hex2Utf8__body_for_body_func }
  99.         self._Hex2Utf8__in_body_dict = {
  100.             'mi<mk<body-open_': self._Hex2Utf8__found_body_func,
  101.             'tx<ut<__________': self._Hex2Utf8__utf_to_caps_func,
  102.             'tx<hx<__________': self._Hex2Utf8__hex_text_func,
  103.             'tx<mc<__________': self._Hex2Utf8__hex_text_func,
  104.             'tx<nu<__________': self._Hex2Utf8__text_func,
  105.             'mi<mk<font______': self._Hex2Utf8__start_font_func,
  106.             'mi<mk<caps______': self._Hex2Utf8__start_caps_func,
  107.             'mi<mk<font-end__': self._Hex2Utf8__end_font_func,
  108.             'mi<mk<caps-end__': self._Hex2Utf8__end_caps_func }
  109.         self._Hex2Utf8__caps_list = [
  110.             'false']
  111.         self._Hex2Utf8__font_list = [
  112.             'not-defined']
  113.  
  114.     
  115.     def __hex_text_func(self, line):
  116.         hex_num = line[17:-1]
  117.         converted = self._Hex2Utf8__current_dict.get(hex_num)
  118.         if converted != None:
  119.             if converted[0:1] == '&':
  120.                 font = self._Hex2Utf8__current_dict_name
  121.                 if self._Hex2Utf8__convert_caps and self._Hex2Utf8__caps_list[-1] == 'true' and font != 'Symbol' and font != 'Wingdings' and font != 'Zapf Dingbats':
  122.                     converted = self._Hex2Utf8__utf_token_to_caps_func(converted)
  123.                 
  124.                 self._Hex2Utf8__write_obj.write('tx<ut<__________<%s\n' % converted)
  125.             else:
  126.                 font = self._Hex2Utf8__current_dict_name
  127.                 if self._Hex2Utf8__convert_caps and self._Hex2Utf8__caps_list[-1] == 'true' and font != 'Symbol' and font != 'Wingdings' and font != 'Zapf Dingbats':
  128.                     converted = converted.upper()
  129.                 
  130.                 self._Hex2Utf8__write_obj.write('tx<nu<__________<%s\n' % converted)
  131.         else:
  132.             token = hex_num.replace("'", '')
  133.             the_num = 0
  134.             if token:
  135.                 the_num = int(token, 16)
  136.             
  137.             if the_num > 10:
  138.                 self._Hex2Utf8__write_obj.write('mi<tg<empty-att_<udef_symbol<num>%s<description>not-in-table\n' % hex_num)
  139.                 if self._Hex2Utf8__run_level > 4:
  140.                     msg = 'Character "&#x%s;" does not appear to be valid (or is a control character)\n' % token
  141.                     raise self._Hex2Utf8__bug_handler, msg
  142.                 self._Hex2Utf8__run_level > 4
  143.             
  144.  
  145.     
  146.     def __found_body_func(self, line):
  147.         self._Hex2Utf8__state = 'body'
  148.         self._Hex2Utf8__write_obj.write(line)
  149.  
  150.     
  151.     def __body_func(self, line):
  152.         self._Hex2Utf8__write_obj.write(line)
  153.  
  154.     
  155.     def __preamble_func(self, line):
  156.         action = self._Hex2Utf8__preamble_state_dict.get(self._Hex2Utf8__token_info)
  157.         if action != None:
  158.             action(line)
  159.         else:
  160.             self._Hex2Utf8__write_obj.write(line)
  161.  
  162.     
  163.     def __convert_preamble(self):
  164.         self._Hex2Utf8__state = 'preamble'
  165.         read_obj = open(self._Hex2Utf8__file, 'r')
  166.         self._Hex2Utf8__write_obj = open(self._Hex2Utf8__write_to, 'w')
  167.         line_to_read = 1
  168.         while line_to_read:
  169.             line_to_read = read_obj.readline()
  170.             line = line_to_read
  171.             self._Hex2Utf8__token_info = line[:16]
  172.             action = self._Hex2Utf8__preamble_state_dict.get(self._Hex2Utf8__state)
  173.             if action == None:
  174.                 sys.stderr.write('error no state found in hex_2_utf8', self._Hex2Utf8__state)
  175.             
  176.             action(line)
  177.         read_obj.close()
  178.         self._Hex2Utf8__write_obj.close()
  179.         copy_obj = copy.Copy(bug_handler = self._Hex2Utf8__bug_handler)
  180.         if self._Hex2Utf8__copy:
  181.             copy_obj.copy_file(self._Hex2Utf8__write_to, 'preamble_utf_convert.data')
  182.         
  183.         copy_obj.rename(self._Hex2Utf8__write_to, self._Hex2Utf8__file)
  184.         os.remove(self._Hex2Utf8__write_to)
  185.  
  186.     
  187.     def __preamble_for_body_func(self, line):
  188.         if self._Hex2Utf8__token_info == 'mi<mk<body-open_':
  189.             self._Hex2Utf8__found_body_func(line)
  190.         
  191.         self._Hex2Utf8__write_obj.write(line)
  192.  
  193.     
  194.     def __body_for_body_func(self, line):
  195.         action = self._Hex2Utf8__in_body_dict.get(self._Hex2Utf8__token_info)
  196.         if action != None:
  197.             action(line)
  198.         else:
  199.             self._Hex2Utf8__write_obj.write(line)
  200.  
  201.     
  202.     def __start_font_func(self, line):
  203.         face = line[17:-1]
  204.         self._Hex2Utf8__font_list.append(face)
  205.         if face == 'Symbol' and self._Hex2Utf8__convert_symbol:
  206.             self._Hex2Utf8__current_dict_name = 'Symbol'
  207.             self._Hex2Utf8__current_dict = self._Hex2Utf8__symbol_dict
  208.         elif face == 'Wingdings' and self._Hex2Utf8__convert_wingdings:
  209.             self._Hex2Utf8__current_dict_name = 'Wingdings'
  210.             self._Hex2Utf8__current_dict = self._Hex2Utf8__wingdings_dict
  211.         elif face == 'Zapf Dingbats' and self._Hex2Utf8__convert_zapf:
  212.             self._Hex2Utf8__current_dict_name = 'Zapf Dingbats'
  213.             self._Hex2Utf8__current_dict = self._Hex2Utf8__dingbats_dict
  214.         else:
  215.             self._Hex2Utf8__current_dict_name = 'default'
  216.             self._Hex2Utf8__current_dict = self._Hex2Utf8__def_dict
  217.  
  218.     
  219.     def __end_font_func(self, line):
  220.         if len(self._Hex2Utf8__font_list) > 1:
  221.             self._Hex2Utf8__font_list.pop()
  222.         else:
  223.             sys.stderr.write('module is hex_2_utf8\n')
  224.             sys.stderr.write('method is end_font_func\n')
  225.             sys.stderr.write('self.__font_list should be greater than one?\n')
  226.         face = self._Hex2Utf8__font_list[-1]
  227.         if face == 'Symbol' and self._Hex2Utf8__convert_symbol:
  228.             self._Hex2Utf8__current_dict_name = 'Symbol'
  229.             self._Hex2Utf8__current_dict = self._Hex2Utf8__symbol_dict
  230.         elif face == 'Wingdings' and self._Hex2Utf8__convert_wingdings:
  231.             self._Hex2Utf8__current_dict_name = 'Wingdings'
  232.             self._Hex2Utf8__current_dict = self._Hex2Utf8__wingdings_dict
  233.         elif face == 'Zapf Dingbats' and self._Hex2Utf8__convert_zapf:
  234.             self._Hex2Utf8__current_dict_name = 'Zapf Dingbats'
  235.             self._Hex2Utf8__current_dict = self._Hex2Utf8__dingbats_dict
  236.         else:
  237.             self._Hex2Utf8__current_dict_name = 'default'
  238.             self._Hex2Utf8__current_dict = self._Hex2Utf8__def_dict
  239.  
  240.     
  241.     def __start_special_font_func_old(self, line):
  242.         if self._Hex2Utf8__token_info == 'mi<mk<font-symbo':
  243.             self._Hex2Utf8__current_dict.append(self._Hex2Utf8__symbol_dict)
  244.             self._Hex2Utf8__special_fonts_found += 1
  245.             self._Hex2Utf8__current_dict_name = 'Symbol'
  246.         elif self._Hex2Utf8__token_info == 'mi<mk<font-wingd':
  247.             self._Hex2Utf8__special_fonts_found += 1
  248.             self._Hex2Utf8__current_dict.append(self._Hex2Utf8__wingdings_dict)
  249.             self._Hex2Utf8__current_dict_name = 'Wingdings'
  250.         elif self._Hex2Utf8__token_info == 'mi<mk<font-dingb':
  251.             self._Hex2Utf8__current_dict.append(self._Hex2Utf8__dingbats_dict)
  252.             self._Hex2Utf8__special_fonts_found += 1
  253.             self._Hex2Utf8__current_dict_name = 'Zapf Dingbats'
  254.         
  255.  
  256.     
  257.     def __end_special_font_func(self, line):
  258.         pass
  259.  
  260.     
  261.     def __start_caps_func_old(self, line):
  262.         self._Hex2Utf8__in_caps = 1
  263.  
  264.     
  265.     def __start_caps_func(self, line):
  266.         self._Hex2Utf8__in_caps = 1
  267.         value = line[17:-1]
  268.         self._Hex2Utf8__caps_list.append(value)
  269.  
  270.     
  271.     def __end_caps_func(self, line):
  272.         if len(self._Hex2Utf8__caps_list) > 1:
  273.             self._Hex2Utf8__caps_list.pop()
  274.         else:
  275.             sys.stderr.write('Module is hex_2_utf8\n')
  276.             sys.stderr.write('method is __end_caps_func\n')
  277.             sys.stderr.write('caps list should be more than one?\n')
  278.  
  279.     
  280.     def __text_func(self, line):
  281.         text = line[17:-1]
  282.         if self._Hex2Utf8__current_dict_name == 'Symbol' and self._Hex2Utf8__current_dict_name == 'Wingdings' or self._Hex2Utf8__current_dict_name == 'Zapf Dingbats':
  283.             the_string = ''
  284.             for letter in text:
  285.                 hex_num = hex(ord(letter))
  286.                 hex_num = str(hex_num)
  287.                 hex_num = hex_num.upper()
  288.                 hex_num = hex_num[2:]
  289.                 hex_num = "'%s" % hex_num
  290.                 converted = self._Hex2Utf8__current_dict.get(hex_num)
  291.                 if converted == None:
  292.                     sys.stderr.write('module is hex_2_ut8\n')
  293.                     sys.stderr.write('method is __text_func\n')
  294.                     sys.stderr.write('no hex value for "%s"\n' % hex_num)
  295.                     continue
  296.                 the_string += converted
  297.             
  298.             self._Hex2Utf8__write_obj.write('tx<nu<__________<%s\n' % the_string)
  299.         elif self._Hex2Utf8__caps_list[-1] == 'true' and self._Hex2Utf8__convert_caps and self._Hex2Utf8__current_dict_name != 'Symbol' and self._Hex2Utf8__current_dict_name != 'Wingdings' and self._Hex2Utf8__current_dict_name != 'Zapf Dingbats':
  300.             text = text.upper()
  301.         
  302.         self._Hex2Utf8__write_obj.write('tx<nu<__________<%s\n' % text)
  303.  
  304.     
  305.     def __utf_to_caps_func(self, line):
  306.         utf_text = line[17:-1]
  307.         if self._Hex2Utf8__caps_list[-1] == 'true' and self._Hex2Utf8__convert_caps:
  308.             utf_text = self._Hex2Utf8__utf_token_to_caps_func(utf_text)
  309.         
  310.         self._Hex2Utf8__write_obj.write('tx<ut<__________<%s\n' % utf_text)
  311.  
  312.     
  313.     def __utf_token_to_caps_func(self, char_entity):
  314.         hex_num = char_entity[3:]
  315.         length = len(hex_num)
  316.         if length == 3:
  317.             hex_num = '00%s' % hex_num
  318.         elif length == 4:
  319.             hex_num = '0%s' % hex_num
  320.         
  321.         new_char_entity = '&#x%s' % hex_num
  322.         converted = self._Hex2Utf8__caps_uni_dict.get(new_char_entity)
  323.         if not converted:
  324.             return char_entity
  325.         return converted
  326.  
  327.     
  328.     def __convert_body(self):
  329.         self._Hex2Utf8__state = 'body'
  330.         read_obj = open(self._Hex2Utf8__file, 'r')
  331.         self._Hex2Utf8__write_obj = open(self._Hex2Utf8__write_to, 'w')
  332.         line_to_read = 1
  333.         while line_to_read:
  334.             line_to_read = read_obj.readline()
  335.             line = line_to_read
  336.             self._Hex2Utf8__token_info = line[:16]
  337.             action = self._Hex2Utf8__body_state_dict.get(self._Hex2Utf8__state)
  338.             if action == None:
  339.                 sys.stderr.write('error no state found in hex_2_utf8', self._Hex2Utf8__state)
  340.             
  341.             action(line)
  342.         read_obj.close()
  343.         self._Hex2Utf8__write_obj.close()
  344.         copy_obj = copy.Copy(bug_handler = self._Hex2Utf8__bug_handler)
  345.         if self._Hex2Utf8__copy:
  346.             copy_obj.copy_file(self._Hex2Utf8__write_to, 'body_utf_convert.data')
  347.         
  348.         copy_obj.rename(self._Hex2Utf8__write_to, self._Hex2Utf8__file)
  349.         os.remove(self._Hex2Utf8__write_to)
  350.  
  351.     
  352.     def convert_hex_2_utf8(self):
  353.         self._Hex2Utf8__initiate_values()
  354.         if self._Hex2Utf8__area_to_convert == 'preamble':
  355.             self._Hex2Utf8__convert_preamble()
  356.         else:
  357.             self._Hex2Utf8__convert_body()
  358.  
  359.  
  360.