home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- __license__ = 'GPL v3'
- __copyright__ = '2009, John Schember <john@nachtimwald.com>'
- __docformat__ = 'restructuredtext en'
- import os
- import re
- import StringIO
- from calibre import my_unichr, prepare_string_for_xml
- from calibre.ebooks.metadata.toc import TOC
-
- class PML_HTMLizer(object):
- STATES = [
- 'i',
- 'u',
- 'd',
- 'b',
- 'sp',
- 'sb',
- 'h1',
- 'h2',
- 'h3',
- 'h4',
- 'h5',
- 'h6',
- 'a',
- 'ra',
- 'c',
- 'r',
- 't',
- 's',
- 'l',
- 'k',
- 'T',
- 'FN',
- 'SB']
- STATES_VALUE_REQ = [
- 'a',
- 'T',
- 'FN',
- 'SB']
- STATES_VALUE_REQ_2 = [
- 'ra']
- STATES_CLOSE_VALUE_REQ = [
- 'FN',
- 'SB']
- STATES_TAGS = {
- 'h1': ('<h1 style="page-break-before: always;">', '</h1>'),
- 'h2': ('<h2>', '</h2>'),
- 'h3': ('<h3>', '</h3>'),
- 'h4': ('<h4>', '</h4>'),
- 'h5': ('<h5>', '</h5>'),
- 'h6': ('<h6>', '</h6>'),
- 'sp': ('<sup>', '</sup>'),
- 'sb': ('<sub>', '</sub>'),
- 'a': ('<a href="#%s">', '</a>'),
- 'ra': ('<span id="r%s"></span><a href="#%s">', '</a>'),
- 'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
- 'r': ('<div style="text-align: right;">', '</div>'),
- 't': ('<div style="text-indent: 5%;">', '</div>'),
- 'T': ('<div style="text-indent: %s;">', '</div>'),
- 'i': ('<span style="font-style: italic;">', '</span>'),
- 'u': ('<span style="text-decoration: underline;">', '</span>'),
- 'd': ('<span style="text-decoration: line-through;">', '</span>'),
- 'b': ('<span style="font-weight: bold;">', '</span>'),
- 'l': ('<span style="font-size: 150%;">', '</span>'),
- 'k': ('<span style="font-size: 75%; font-variant: small-caps;">', '</span>'),
- 'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><p>', '</p><<small><a href="#rfn-%s">return</a></small></div>'),
- 'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><p>', '</p><small><a href="#rsb-%s">return</a></small></div>') }
- CODE_STATES = {
- 'q': 'a',
- 'x': 'h1',
- 'X0': 'h2',
- 'X1': 'h3',
- 'X2': 'h4',
- 'X3': 'h5',
- 'X4': 'h6',
- 'Sp': 'sp',
- 'Sb': 'sb',
- 'c': 'c',
- 'r': 'r',
- 't': 't',
- 'T': 'T',
- 'i': 'i',
- 'I': 'i',
- 'u': 'u',
- 'o': 'd',
- 'b': 'b',
- 'B': 'b',
- 'l': 'l',
- 'k': 'k',
- 'Fn': 'ra',
- 'Sd': 'ra',
- 'FN': 'FN',
- 'SB': 'SB' }
- LINK_STATES = [
- 'a',
- 'ra']
- BLOCK_STATES = [
- 'a',
- 'ra',
- 'h1',
- 'h2',
- 'h3',
- 'h4',
- 'h5',
- 'h6',
- 'sb',
- 'sp']
- DIV_STATES = [
- 'c',
- 'r',
- 't',
- 'T',
- 'FN',
- 'SB']
- SPAN_STATES = [
- 'l',
- 'k',
- 'i',
- 'u',
- 'd',
- 'b']
-
- def __init__(self):
- self.state = { }
- self.toc = TOC()
- self.file_name = ''
-
-
- def prepare_pml(self, pml):
- pml = re.sub(('(?<=\\\\x)(?P<text>.*?)(?=\\\\x)',), (lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text'))), pml)
- pml = re.sub(('(?<=\\\\X[0-4])(?P<text>.*?)(?=\\\\X[0-4])',), (lambda match: '="%s"%s' % (self.strip_pml(match.group('text')), match.group('text'))), pml)
- pml = re.sub('(?mus)\\\\v(?P<text>.*?)\\\\v', '', pml)
- pml = re.sub('(?mus)[ ]{2,}', ' ', pml)
- pml = re.sub('(?mus)^[ ]*(?=.)', '', pml)
- pml = re.sub('(?mus)(?<=.)[ ]*$', '', pml)
- pml = re.sub('(?mus)^[ ]*$', '', pml)
- pml = re.sub('(?mus)<footnote\\s+id="(?P<target>.+?)">\\s*(?P<text>.*?)\\s*</footnote>', (lambda match: if match.group('text'):
- '\\FN="%s"%s\\FN' % (match.group('target'), match.group('text'))''), pml)
- pml = re.sub('(?mus)<sidebar\\s+id="(?P<target>.+?)">\\s*(?P<text>.*?)\\s*</sidebar>', (lambda match: if match.group('text'):
- '\\SB="%s"%s\\SB' % (match.group('target'), match.group('text'))''), pml)
- pml = pml.replace('&', '&')
- pml = re.sub('\\\\a(?P<num>\\d{3})', (lambda match: '%s;' % match.group('num')), pml)
- pml = re.sub('\\\\U(?P<num>[0-9a-f]{4})', (lambda match: '%s' % my_unichr(int(match.group('num'), 16))), pml)
- pml = prepare_string_for_xml(pml)
- return pml
-
-
- def strip_pml(self, pml):
- pml = re.sub('\\\\C\\d=".*"', '', pml)
- pml = re.sub('\\\\Fn=".*"', '', pml)
- pml = re.sub('\\\\Sd=".*"', '', pml)
- pml = re.sub('\\\\.=".*"', '', pml)
- pml = re.sub('\\\\X\\d', '', pml)
- pml = re.sub('\\\\S[pbd]', '', pml)
- pml = re.sub('\\\\Fn', '', pml)
- pml = re.sub('\\\\a\\d\\d\\d', '', pml)
- pml = re.sub('\\\\U\\d\\d\\d\\d', '', pml)
- pml = re.sub('\\\\.', '', pml)
- pml.replace('\r\n', ' ')
- pml.replace('\n', ' ')
- pml.replace('\r', ' ')
- return pml
-
-
- def cleanup_html(self, html):
- old = html
- html = self.cleanup_html_remove_redundant(html)
- while html != old:
- old = html
- html = self.cleanup_html_remove_redundant(html)
- return html
-
-
- def cleanup_html_remove_redundant(self, html):
- for key in self.STATES_TAGS.keys():
- (open, close) = self.STATES_TAGS[key]
- if key in self.STATES_VALUE_REQ:
- html = re.sub('(?u)%s\\s*%s' % (open % '.*?', close), '', html)
- continue
- html = re.sub('(?u)%s\\s*%s' % (open, close), '', html)
-
- html = re.sub('<p>\\s*</p>', '', html)
- return html
-
-
- def start_line(self):
- start = u''
- div = []
- span = []
- other = []
- for key, val in self.state.items():
- if val[0]:
- if key in self.DIV_STATES:
- div.append((key, val[1]))
- elif key in self.SPAN_STATES:
- span.append((key, val[1]))
- else:
- other.append((key, val[1]))
- key in self.DIV_STATES
-
- for key, val in other + div + span:
- if key in self.STATES_VALUE_REQ:
- start += self.STATES_TAGS[key][0] % val
- continue
- if key in self.STATES_VALUE_REQ_2:
- start += self.STATES_TAGS[key][0] % (val, val)
- continue
- start += self.STATES_TAGS[key][0]
-
- return u'<p>%s' % start
-
-
- def end_line(self):
- end = u''
- div = []
- span = []
- other = []
- for key, val in self.state.items():
- if val[0]:
- if key == 'T':
- self.state['T'][0] = False
-
- if key in self.DIV_STATES:
- div.append(key)
- elif key in self.SPAN_STATES:
- span.append(key)
- else:
- other.append(key)
- key in self.DIV_STATES
-
- for key in span + div + other:
- if key in self.STATES_CLOSE_VALUE_REQ:
- end += self.STATES_TAGS[key][1] % self.state[key][1]
- continue
- end += self.STATES_TAGS[key][1]
-
- return u'%s</p>' % end
-
-
- def process_code(self, code, stream, pre = ''):
- text = u''
- code = self.CODE_STATES.get(code, None)
- if not code:
- return text
- if code in self.DIV_STATES:
- if code == 'T' and self.state['T'][0]:
- self.code_value(stream)
- return text
- text = self.process_code_div(code, stream)
- elif code in self.SPAN_STATES:
- text = self.process_code_span(code, stream)
- elif code in self.BLOCK_STATES:
- text = self.process_code_block(code, stream, pre)
- else:
- text = self.process_code_simple(code, stream)
- self.state[code][0] = not self.state[code][0]
- return text
-
-
- def process_code_simple(self, code, stream):
- text = u''
- if self.state[code][0]:
- if code in self.STATES_CLOSE_VALUE_REQ:
- text = self.STATES_TAGS[code][1] % self.state[code][1]
- else:
- text = self.STATES_TAGS[code][1]
- elif code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
- val = self.code_value(stream)
- if code in self.STATES_VALUE_REQ:
- text = self.STATES_TAGS[code][0] % val
- else:
- text = self.STATES_TAGS[code][0] % (val, val)
- self.state[code][1] = val
- else:
- text = self.STATES_TAGS[code][0]
- return text
-
-
- def process_code_div(self, code, stream):
- text = u''
- if self.state[code][0]:
- for c in self.SPAN_STATES + self.DIV_STATES:
- if self.state[c][0]:
- if c in self.STATES_CLOSE_VALUE_REQ:
- text += self.STATES_TAGS[c][1] % self.state[c][1]
- else:
- text += self.STATES_TAGS[c][1]
- c in self.STATES_CLOSE_VALUE_REQ
-
- for c in self.DIV_STATES + self.SPAN_STATES:
- if code == c:
- continue
-
- if self.state[c][0]:
- if c in self.STATES_VALUE_REQ:
- text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
- elif c in self.STATES_VALUE_REQ_2:
- text += self.STATES_TAGS[self.CODE_STATES[c]][0] % (self.state[c][1], self.state[c][1])
- else:
- text += self.STATES_TAGS[c][0]
- c in self.STATES_VALUE_REQ
-
- else:
- for c in self.SPAN_STATES:
- if self.state[c][0]:
- if c in self.STATES_CLOSE_VALUE_REQ:
- text += self.STATES_TAGS[c][1] % self.state[c][1]
- else:
- text += self.STATES_TAGS[c][1]
- c in self.STATES_CLOSE_VALUE_REQ
-
- if code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
- val = self.code_value(stream)
- if code in self.STATES_VALUE_REQ:
- text += self.STATES_TAGS[code][0] % val
- else:
- text += self.STATES_TAGS[code][0] % (val, val)
- self.state[code][1] = val
- else:
- text += self.STATES_TAGS[code][0]
- for c in self.SPAN_STATES:
- if self.state[c][0]:
- if c in self.STATES_VALUE_REQ:
- text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
- elif c in self.STATES_VALUE_REQ_2:
- text += self.STATES_TAGS[self.CODE_STATES[c]][0] % (self.state[c][1], self.state[c][1])
- else:
- text += self.STATES_TAGS[c][0]
- c in self.STATES_VALUE_REQ
-
- return text
-
-
- def process_code_span(self, code, stream):
- text = u''
- if self.state[code][0]:
- for c in self.SPAN_STATES:
- if self.state[c][0]:
- if c in self.STATES_CLOSE_VALUE_REQ:
- text += self.STATES_TAGS[c][1] % self.state[c][1]
- else:
- text += self.STATES_TAGS[c][1]
- c in self.STATES_CLOSE_VALUE_REQ
-
- for c in self.SPAN_STATES:
- if code == c:
- continue
-
- if self.state[c][0]:
- if c in self.STATES_VALUE_REQ:
- text += self.STATES_TAGS[code][0] % self.state[c][1]
- elif c in self.STATES_VALUE_REQ_2:
- text += self.STATES_TAGS[code][0] % (self.state[c][1], self.state[c][1])
- else:
- text += self.STATES_TAGS[c][0]
- c in self.STATES_VALUE_REQ
-
- elif code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
- val = self.code_value(stream)
- if code in self.STATES_VALUE_REQ:
- text += self.STATES_TAGS[code][0] % val
- else:
- text += self.STATES_TAGS[code][0] % (val, val)
- self.state[code][1] = val
- else:
- text += self.STATES_TAGS[code][0]
- return text
-
-
- def process_code_block(self, code, stream, pre = ''):
- text = u''
- for c in self.SPAN_STATES:
- if self.state[c][0]:
- if c in self.STATES_CLOSE_VALUE_REQ:
- text += self.STATES_TAGS[c][1] % self.state[c][1]
- else:
- text += self.STATES_TAGS[c][1]
- c in self.STATES_CLOSE_VALUE_REQ
-
- if self.state[code][0]:
- if code in self.STATES_CLOSE_VALUE_REQ:
- text += self.STATES_TAGS[code][1] % self.state[code][1]
- else:
- text += self.STATES_TAGS[code][1]
- elif code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
- val = self.code_value(stream)
- if code in self.LINK_STATES:
- val = val.lstrip('#')
-
- if pre:
- val = '%s-%s' % (pre, val)
-
- if code in self.STATES_VALUE_REQ:
- text += self.STATES_TAGS[code][0] % val
- else:
- text += self.STATES_TAGS[code][0] % (val, val)
- self.state[code][1] = val
- else:
- text += self.STATES_TAGS[code][0]
- for c in self.SPAN_STATES:
- if self.state[c][0]:
- if c in self.STATES_VALUE_REQ:
- text += self.STATES_TAGS[code][0] % self.state[c][1]
- elif c in self.STATES_VALUE_REQ_2:
- text += self.STATES_TAGS[code][0] % (self.state[c][1], self.state[c][1])
- else:
- text += self.STATES_TAGS[c][0]
- c in self.STATES_VALUE_REQ
-
- return text
-
-
- def code_value(self, stream):
- value = u''
- state = 0
- loc = stream.tell()
- c = stream.read(1)
- while c != '':
- if state == 0:
- if c == '=':
- state = 1
- elif c != ' ':
- break
-
- elif state == 1:
- if c == '"':
- state = 2
- elif c != ' ':
- break
-
- elif state == 2:
- if c == '"':
- state = 3
- break
- else:
- value += c
-
- c = stream.read(1)
- if state != 3:
- stream.seek(loc)
- value = u''
-
- return value.strip()
-
-
- def parse_pml(self, pml, file_name = ''):
- pml = self.prepare_pml(pml)
- output = []
- self.state = { }
- self.toc = TOC()
- self.file_name = file_name
- for s in self.STATES:
- self.state[s] = [
- False,
- '']
-
- for line in pml.splitlines():
- if not line:
- continue
-
- parsed = []
- empty = True
- line = StringIO.StringIO(line)
- parsed.append(self.start_line())
- c = line.read(1)
- while c != '':
- text = u''
- if c == '\\':
- c = line.read(1)
- if c in 'qcrtTiIuobBlk':
- text = self.process_code(c, line)
- elif c in 'FS':
- l = line.read(1)
- if '%s%s' % (c, l) == 'Fn':
- text = self.process_code('Fn', line, 'fn')
- elif '%s%s' % (c, l) == 'FN':
- text = self.process_code('FN', line)
- elif '%s%s' % (c, l) == 'SB':
- text = self.process_code('SB', line)
- elif '%s%s' % (c, l) == 'Sd':
- text = self.process_code('Sd', line, 'sb')
-
- elif c in 'xXC':
- empty = False
- t = ''
- if c in 'XC':
- level = line.read(1)
-
- id = 'pml_toc-%s' % len(self.toc)
- value = self.code_value(line)
- if c == 'x':
- t = self.process_code(c, line)
- elif c == 'X':
- t = self.process_code('%s%s' % (c, level), line)
-
- if not value or value == '':
- text = t
- else:
- self.toc.add_item(os.path.basename(self.file_name), id, value)
- text = '<span id="%s"></span>%s' % (id, t)
- elif c == 'm':
- empty = False
- src = self.code_value(line)
- text = '<img src="images/%s" />' % src
- elif c == 'Q':
- empty = False
- id = self.code_value(line)
- text = '<span id="%s"></span>' % id
- elif c == 'p':
- empty = False
- text = '<br /><br style="page-break-after: always;" />'
- elif c == 'n':
- pass
- elif c == 'w':
- empty = False
- text = '<hr width="%s" />' % self.code_value(line)
- elif c == '-':
- empty = False
- text = ''
- elif c == '\\':
- empty = False
- text = '\\'
-
- elif c != ' ':
- empty = False
-
- text = c
- parsed.append(text)
- c = line.read(1)
- if not empty:
- text = self.end_line()
- parsed.append(text)
- output.append(u''.join(parsed))
-
- line.close()
-
- output = self.cleanup_html(u'\n'.join(output))
- return output
-
-
- def get_toc(self):
- return self.toc
-
-
-
- def pml_to_html(pml):
- hizer = PML_HTMLizer()
- return hizer.parse_pml(pml)
-
-
- def footnote_sidebar_to_html(pre_id, id, pml):
- id = id.strip('\x01')
- html = '<br /><br style="page-break-after: always;" /><div id="%s-%s"><p>%s</p><small><a href="#r%s-%s">return</a></small></div>' % (pre_id, id, pml_to_html(pml), pre_id, id)
- return html
-
-
- def footnote_to_html(id, pml):
- return footnote_sidebar_to_html('fn', id, pml)
-
-
- def sidebar_to_html(id, pml):
- return footnote_sidebar_to_html('sb', id, pml)
-
-