Maximum CD 2011 January

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_1075 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-10-31 | 8.6 KB | 236 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' import os import glob import re import textwrap from lxml import etree from calibre.customize.conversion import InputFormatPlugin from calibre.ebooks.conversion.utils import PreProcessor class InlineClass(etree.XSLTExtension): FMTS = ('italics', 'bold', 'underlined', 'strike-through', 'small-caps') def __init__(self, log): etree.XSLTExtension.__init__(self) self.log = log self.font_sizes = [] self.colors = [] def execute(self, context, self_node, input_node, output_parent): classes = [ 'none'] for x in self.FMTS: if input_node.get(x, None) == 'true': classes.append(x) continue fs = input_node.get('font-size', False) if fs: if fs not in self.font_sizes: self.font_sizes.append(fs) classes.append('fs%d' % self.font_sizes.index(fs)) fc = input_node.get('font-color', False) if fc: if fc not in self.colors: self.colors.append(fc) classes.append('col%d' % self.colors.index(fc)) output_parent.text = ' '.join(classes) class RTFInput(InputFormatPlugin): name = 'RTF Input' author = 'Kovid Goyal' description = 'Convert RTF files to HTML' file_types = set([ 'rtf']) def generate_xml(self, stream): ParseRtf = ParseRtf import calibre.ebooks.rtf2xml.ParseRtf ofile = 'out.xml' parser = ParseRtf(in_file = stream, out_file = ofile, convert_symbol = 1, convert_zapf = 1, convert_wingdings = 1, convert_caps = 1, indent = 1, form_lists = 1, headings_to_sections = 1, group_styles = 1, group_borders = 1, empty_paragraphs = 0) parser.parse_rtf() ans = open('out.xml').read() os.remove('out.xml') return ans def extract_images(self, picts): self.log('Extracting images...') count = 0 raw = open(picts, 'rb').read() starts = [] for match in re.finditer('\\{\\\\pict([^}]+)\\}', raw): starts.append(match.start(1)) imap = { } for start in starts: pos = start bc = 1 while bc > 0: if raw[pos] == '}': bc -= 1 elif raw[pos] == '{': bc += 1 pos += 1 pict = raw[start:pos + 1] enc = re.sub('[^a-zA-Z0-9]', '', pict) if len(enc) % 2 == 1: enc = enc[:-1] data = enc.decode('hex') count += 1 name = ('%4d' % count).replace(' ', '0') + '.wmf' open(name, 'wb').write(data) imap[count] = name return self.convert_images(imap) def convert_images(self, imap): for count, val in imap.items(): try: imap[count] = self.convert_image(val) continue self.log.exception('Failed to convert', val) continue return imap def convert_image(self, name): Image = Image import calibre.utils.magick img = Image() img.open(name) name = name.replace('.wmf', '.jpg') img.save(name) return name def write_inline_css(self, ic): font_size_classes = [ 'span.fs%d { font-size: %spt }' % (i, x) for i, x in enumerate(ic.font_sizes) ] color_classes = [ 'span.col%d { color: %s }' % (i, x) for i, x in enumerate(ic.colors) ] css = textwrap.dedent('\n span.none {\n text-decoration: none; font-weight: normal;\n font-style: normal; font-variant: normal\n }\n\n span.italics { font-style: italic }\n\n span.bold { font-weight: bold }\n\n span.small-caps { font-variant: small-caps }\n\n span.underlined { text-decoration: underline }\n\n span.strike-through { text-decoration: line-through }\n\n ') css += '\n' + '\n'.join(font_size_classes) css += '\n' + '\n'.join(color_classes) try: f = _[3] f.write(css) finally: pass def preprocess(self, fname): self.log('\tPreprocessing to convert unicode characters') try: data = open(fname, 'rb').read() RtfTokenizer = RtfTokenizer RtfTokenParser = RtfTokenParser import calibre.ebooks.rtf.preprocess tokenizer = RtfTokenizer(data) tokens = RtfTokenParser(tokenizer.tokens) data = tokens.toRTF() fname = 'preprocessed.rtf' try: f = _[1] f.write(data) finally: pass except: self.log.exception('Failed to preprocess RTF to convert unicode sequences, ignoring...') return fname def convert(self, stream, options, file_ext, log, accelerators): get_metadata = get_metadata import calibre.ebooks.metadata.meta OPFCreator = OPFCreator import calibre.ebooks.metadata.opf2 RtfInvalidCodeException = RtfInvalidCodeException import calibre.ebooks.rtf2xml.ParseRtf self.options = options self.log = log self.log('Converting RTF to XML...') fname = self.preprocess(stream.name) try: xml = self.generate_xml(fname) except RtfInvalidCodeException: e = None raise ValueError(_('This RTF file has a feature calibre does not support. Convert it to HTML first and then try it.\n%s') % e) d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf')) if d: imap = { } try: imap = self.extract_images(d[0]) self.log.exception('Failed to extract images...') self.log('Parsing XML...') parser = etree.XMLParser(recover = True, no_network = True) doc = etree.fromstring(xml, parser = parser) for pict in doc.xpath('//rtf:pict[@num]', namespaces = { 'rtf': 'http://rtf2xml.sourceforge.net/' }): num = int(pict.get('num')) name = imap.get(num, None) if name is not None: pict.set('num', name) continue self.log('Converting XML to HTML...') inline_class = InlineClass(self.log) styledoc = etree.fromstring(P('templates/rtf.xsl', data = True)) extensions = { ('calibre', 'inline-class'): inline_class } transform = etree.XSLT(styledoc, extensions = extensions) result = transform(doc) html = 'index.xhtml' try: f = _[1] res = transform.tostring(result) res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] f.write(res) finally: pass self.write_inline_css(inline_class) stream.seek(0) mi = get_metadata(stream, 'rtf') if not mi.authors: mi.authors = [ _('Unknown')] opf = OPFCreator(os.getcwd(), mi) opf.create_manifest([ ('index.xhtml', None)]) opf.create_spine([ 'index.xhtml']) opf.render(open('metadata.opf', 'wb')) return os.path.abspath('metadata.opf')