home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- from __future__ import with_statement
- __license__ = 'GPL v3'
- __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
- import os
- import glob
- import re
- import textwrap
- from lxml import etree
- from calibre.customize.conversion import InputFormatPlugin
-
- class InlineClass(etree.XSLTExtension):
- FMTS = ('italics', 'bold', 'underlined', 'strike-through', 'small-caps')
-
- def __init__(self, log):
- etree.XSLTExtension.__init__(self)
- self.log = log
- self.font_sizes = []
- self.colors = []
-
-
- def execute(self, context, self_node, input_node, output_parent):
- classes = [
- 'none']
- for x in self.FMTS:
- if input_node.get(x, None) == 'true':
- classes.append(x)
- continue
-
- fs = input_node.get('font-size', False)
- if fs:
- if fs not in self.font_sizes:
- self.font_sizes.append(fs)
-
- classes.append('fs%d' % self.font_sizes.index(fs))
-
- fc = input_node.get('font-color', False)
- if fc:
- if fc not in self.colors:
- self.colors.append(fc)
-
- classes.append('col%d' % self.colors.index(fc))
-
- output_parent.text = ' '.join(classes)
-
-
-
- class RTFInput(InputFormatPlugin):
- name = 'RTF Input'
- author = 'Kovid Goyal'
- description = 'Convert RTF files to HTML'
- file_types = set([
- 'rtf'])
-
- def generate_xml(self, stream):
- ParseRtf = ParseRtf
- import calibre.ebooks.rtf2xml.ParseRtf
- ofile = 'out.xml'
- parser = ParseRtf(in_file = stream, out_file = ofile, convert_symbol = 1, convert_zapf = 1, convert_wingdings = 1, convert_caps = 1, indent = 1, form_lists = 1, headings_to_sections = 1, group_styles = 1, group_borders = 1, empty_paragraphs = 0)
- parser.parse_rtf()
- ans = open('out.xml').read()
- os.remove('out.xml')
- return ans
-
-
- def extract_images(self, picts):
- self.log('Extracting images...')
- count = 0
- raw = open(picts, 'rb').read()
- starts = []
- for match in re.finditer('\\{\\\\pict([^}]+)\\}', raw):
- starts.append(match.start(1))
-
- imap = { }
- for start in starts:
- pos = start
- bc = 1
- while bc > 0:
- if raw[pos] == '}':
- bc -= 1
- elif raw[pos] == '{':
- bc += 1
-
- pos += 1
- pict = raw[start:pos + 1]
- enc = re.sub('[^a-zA-Z0-9]', '', pict)
- if len(enc) % 2 == 1:
- enc = enc[:-1]
-
- data = enc.decode('hex')
- count += 1
- name = ('%4d' % count).replace(' ', '0') + '.wmf'
- open(name, 'wb').write(data)
- imap[count] = name
-
- return self.convert_images(imap)
-
-
- def convert_images(self, imap):
- for count, val in imap.items():
-
- try:
- imap[count] = self.convert_image(val)
- continue
- self.log.exception('Failed to convert', val)
- continue
-
-
- return imap
-
-
- def convert_image(self, name):
- Image = Image
- import calibre.utils.magick
- img = Image()
- img.open(name)
- name = name.replace('.wmf', '.jpg')
- img.save(name)
- return name
-
-
- def write_inline_css(self, ic):
- font_size_classes = [ 'span.fs%d { font-size: %spt }' % (i, x) for i, x in enumerate(ic.font_sizes) ]
- color_classes = [ 'span.col%d { color: %s }' % (i, x) for i, x in enumerate(ic.colors) ]
- css = textwrap.dedent('\n span.none {\n text-decoration: none; font-weight: normal;\n font-style: normal; font-variant: normal\n }\n\n span.italics { font-style: italic }\n\n span.bold { font-weight: bold }\n\n span.small-caps { font-variant: small-caps }\n\n span.underlined { text-decoration: underline }\n\n span.strike-through { text-decoration: line-through }\n\n ')
- css += '\n' + '\n'.join(font_size_classes)
- css += '\n' + '\n'.join(color_classes)
-
- try:
- f = _[3]
- f.write(css)
- finally:
- pass
-
-
-
- def preprocess(self, fname):
- self.log('\tPreprocessing to convert unicode characters')
-
- try:
- data = open(fname, 'rb').read()
- RtfTokenizer = RtfTokenizer
- RtfTokenParser = RtfTokenParser
- import calibre.ebooks.rtf.preprocess
- tokenizer = RtfTokenizer(data)
- tokens = RtfTokenParser(tokenizer.tokens)
- data = tokens.toRTF()
- fname = 'preprocessed.rtf'
-
- try:
- f = _[1]
- f.write(data)
- finally:
- pass
-
- except:
- self.log.exception('Failed to preprocess RTF to convert unicode sequences, ignoring...')
-
- return fname
-
-
- def convert(self, stream, options, file_ext, log, accelerators):
- get_metadata = get_metadata
- import calibre.ebooks.metadata.meta
- OPFCreator = OPFCreator
- import calibre.ebooks.metadata.opf2
- RtfInvalidCodeException = RtfInvalidCodeException
- import calibre.ebooks.rtf2xml.ParseRtf
- self.log = log
- self.log('Converting RTF to XML...')
- fname = self.preprocess(stream.name)
-
- try:
- xml = self.generate_xml(fname)
- except RtfInvalidCodeException:
- e = None
- raise ValueError(_('This RTF file has a feature calibre does not support. Convert it to HTML first and then try it.\n%s') % e)
-
- d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
- if d:
- imap = { }
-
- try:
- imap = self.extract_images(d[0])
- self.log.exception('Failed to extract images...')
-
-
- self.log('Parsing XML...')
- parser = etree.XMLParser(recover = True, no_network = True)
- doc = etree.fromstring(xml, parser = parser)
- for pict in doc.xpath('//rtf:pict[@num]', namespaces = {
- 'rtf': 'http://rtf2xml.sourceforge.net/' }):
- num = int(pict.get('num'))
- name = imap.get(num, None)
- if name is not None:
- pict.set('num', name)
- continue
-
- self.log('Converting XML to HTML...')
- inline_class = InlineClass(self.log)
- styledoc = etree.fromstring(P('templates/rtf.xsl', data = True))
- extensions = {
- ('calibre', 'inline-class'): inline_class }
- transform = etree.XSLT(styledoc, extensions = extensions)
- result = transform(doc)
- html = 'index.xhtml'
-
- try:
- f = _[1]
- res = transform.tostring(result)
- res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
- f.write(res)
- finally:
- pass
-
- self.write_inline_css(inline_class)
- stream.seek(0)
- mi = get_metadata(stream, 'rtf')
- if not mi.authors:
- mi.authors = [
- _('Unknown')]
-
- opf = OPFCreator(os.getcwd(), mi)
- opf.create_manifest([
- ('index.xhtml', None)])
- opf.create_spine([
- 'index.xhtml'])
- opf.render(open('metadata.opf', 'wb'))
- return os.path.abspath('metadata.opf')
-
-
-