Maximum CD 2011 January

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_1135 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-10-31 | 7.6 KB | 234 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) __license__ = 'GPL 3' __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>' __docformat__ = 'restructuredtext en' import os import re from lxml import etree from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.stylizer import Stylizer def ProcessFileName(fileName): fileName = fileName.replace('/', '_').replace(os.sep, '_') fileName = fileName.replace('#', '_') fileName = fileName.lower() (root, ext) = os.path.splitext(fileName) if ext in ('.jpeg', '.jpg', '.gif', '.svg', '.png'): fileName = root + '.jpg' return fileName BLOCK_TAGS = [ 'div', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'tr'] BLOCK_STYLES = [ 'block'] SPACE_TAGS = [ 'td'] CALIBRE_SNB_IMG_TAG = '<$$calibre_snb_temp_img$$>' CALIBRE_SNB_BM_TAG = '<$$calibre_snb_bm_tag$$>' CALIBRE_SNB_PRE_TAG = '<$$calibre_snb_pre_tag$$>' class SNBMLizer(object): curSubItem = '' def __init__(self, log): self.log = log def extract_content(self, oeb_book, item, subitems, opts): self.log.info('Converting XHTML to SNBC...') self.oeb_book = oeb_book self.opts = opts self.item = item self.subitems = subitems return self.mlize() def merge_content(self, old_tree, oeb_book, item, subitems, opts): newTrees = self.extract_content(oeb_book, item, subitems, opts) body = old_tree.find('.//body') if body != None: for subName in newTrees: newbody = newTrees[subName].find('.//body') for entity in newbody: body.append(entity) def mlize(self): output = [ u''] stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile) content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding = unicode)) trees = { } for subitem, subtitle in self.subitems: snbcTree = etree.Element('snbc') snbcHead = etree.SubElement(snbcTree, 'head') etree.SubElement(snbcHead, 'title').text = subtitle if self.opts and self.opts.snb_hide_chapter_name: etree.SubElement(snbcHead, 'hidetitle').text = u'true' etree.SubElement(snbcTree, 'body') trees[subitem] = snbcTree output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, '')) output += self.dump_text(self.subitems, etree.fromstring(content), stylizer)[0] output = self.cleanup_text(u''.join(output)) subitem = '' bodyTree = trees[subitem].find('.//body') for line in output.splitlines(): if not line.find(CALIBRE_SNB_PRE_TAG) == 0: line = line.strip(u' \t\n\rπÇÇ') else: etree.SubElement(bodyTree, 'text').text = etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):]) if len(line) != 0: if line.find(CALIBRE_SNB_IMG_TAG) == 0: prefix = ProcessFileName(os.path.dirname(self.item.href)) if prefix != '': etree.SubElement(bodyTree, 'img').text = prefix + '_' + line[len(CALIBRE_SNB_IMG_TAG):] else: etree.SubElement(bodyTree, 'img').text = line[len(CALIBRE_SNB_IMG_TAG):] elif line.find(CALIBRE_SNB_BM_TAG) == 0: subitem = line[len(CALIBRE_SNB_BM_TAG):] bodyTree = trees[subitem].find('.//body') elif self.opts and self.opts.snb_indent_first_line: prefix = u'πÇÇπÇÇ' else: prefix = u'' etree.SubElement(bodyTree, 'text').text = etree.CDATA(unicode(prefix + line)) if self.opts and self.opts.snb_insert_empty_line: etree.SubElement(bodyTree, 'text').text = etree.CDATA(u'') self.opts.snb_insert_empty_line return trees def remove_newlines(self, text): self.log.debug('\tRemove newlines for processing...') text = text.replace('\r\n', ' ') text = text.replace('\n', ' ') text = text.replace('\r', ' ') return text def cleanup_text(self, text): self.log.debug('\tClean up text...') text = text.replace(u'├é', '') text = text.replace(u'┬á', ' ') text = text.replace(u'┬⌐', '(C)') text = text.replace('\t+', ' ') text = text.replace('\x0b+', ' ') text = text.replace('\x0c+', ' ') text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text) text = re.sub('\n[ ]+\n', '\n\n', text) if self.opts.remove_paragraph_spacing: text = re.sub('\n{2,}', '\n', text) text = re.sub('(?imu)^(?=.)', '\t', text) else: text = re.sub('\n{3,}', '\n\n', text) text = re.sub('(?imu)^[ ]+', '', text) text = re.sub('(?imu)[ ]+$', '', text) if self.opts.snb_max_line_length: max_length = self.opts.snb_max_line_length if self.opts.max_line_length < 25: max_length = 25 short_lines = [] lines = text.splitlines() for line in lines: while len(line) > max_length: space = line.rfind(' ', 0, max_length) if space != -1: short_lines.append(line[:space]) line = line[space + 1:] continue if False and self.opts.force_max_line_length: short_lines.append(line[:max_length]) line = line[max_length:] continue space = line.find(' ', max_length, len(line)) if space != -1: short_lines.append(line[:space]) line = line[space + 1:] continue short_lines.append(line) line = '' short_lines.append(line) text = '\n'.join(short_lines) return text def dump_text(self, subitems, elem, stylizer, end = '', pre = False, li = ''): if not isinstance(elem.tag, basestring) or namespace(elem.tag) != XHTML_NS: return [ ''] text = [ ''] style = stylizer.style(elem) if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or style['visibility'] == 'hidden': return [ ''] tag = barename(elem.tag) in_block = False if tag in SPACE_TAGS: if not end.endswith('u ') and hasattr(elem, 'text') and elem.text: text.append(u' ') if tag == 'img': text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src']))) if tag == 'br': text.append(u'\n\n') if tag == 'li': li = '- ' if not tag == 'pre': pass pre = pre if hasattr(elem, 'text') and elem.text: if pre: text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG).join((li + elem.text).splitlines())) else: text.append(li + elem.text) li = '' for item in elem: en = u'' if len(text) >= 2: en = text[-1][-2:] t = self.dump_text(subitems, item, stylizer, en, pre, li)[0] text += t if in_block: text.append(u'\n\n') if hasattr(elem, 'tail') and elem.tail: if pre: text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG).join(elem.tail.splitlines())) else: text.append(li + elem.tail) li = '' return (text, li)