home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- __license__ = 'GPL 3'
- __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
- __docformat__ = 'restructuredtext en'
- import os
- import re
- from lxml import etree
- from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
- from calibre.ebooks.oeb.stylizer import Stylizer
-
- def ProcessFileName(fileName):
- fileName = fileName.replace('/', '_').replace(os.sep, '_')
- fileName = fileName.replace('#', '_')
- fileName = fileName.lower()
- (root, ext) = os.path.splitext(fileName)
- if ext in ('.jpeg', '.jpg', '.gif', '.svg', '.png'):
- fileName = root + '.jpg'
-
- return fileName
-
- BLOCK_TAGS = [
- 'div',
- 'p',
- 'h1',
- 'h2',
- 'h3',
- 'h4',
- 'h5',
- 'h6',
- 'li',
- 'tr']
- BLOCK_STYLES = [
- 'block']
- SPACE_TAGS = [
- 'td']
- CALIBRE_SNB_IMG_TAG = '<$$calibre_snb_temp_img$$>'
- CALIBRE_SNB_BM_TAG = '<$$calibre_snb_bm_tag$$>'
- CALIBRE_SNB_PRE_TAG = '<$$calibre_snb_pre_tag$$>'
-
- class SNBMLizer(object):
- curSubItem = ''
-
- def __init__(self, log):
- self.log = log
-
-
- def extract_content(self, oeb_book, item, subitems, opts):
- self.log.info('Converting XHTML to SNBC...')
- self.oeb_book = oeb_book
- self.opts = opts
- self.item = item
- self.subitems = subitems
- return self.mlize()
-
-
- def merge_content(self, old_tree, oeb_book, item, subitems, opts):
- newTrees = self.extract_content(oeb_book, item, subitems, opts)
- body = old_tree.find('.//body')
- if body != None:
- for subName in newTrees:
- newbody = newTrees[subName].find('.//body')
- for entity in newbody:
- body.append(entity)
-
-
-
-
-
- def mlize(self):
- output = [
- u'']
- stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
- content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding = unicode))
- trees = { }
- for subitem, subtitle in self.subitems:
- snbcTree = etree.Element('snbc')
- snbcHead = etree.SubElement(snbcTree, 'head')
- etree.SubElement(snbcHead, 'title').text = subtitle
- if self.opts and self.opts.snb_hide_chapter_name:
- etree.SubElement(snbcHead, 'hidetitle').text = u'true'
-
- etree.SubElement(snbcTree, 'body')
- trees[subitem] = snbcTree
-
- output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, ''))
- output += self.dump_text(self.subitems, etree.fromstring(content), stylizer)[0]
- output = self.cleanup_text(u''.join(output))
- subitem = ''
- bodyTree = trees[subitem].find('.//body')
- for line in output.splitlines():
- if not line.find(CALIBRE_SNB_PRE_TAG) == 0:
- line = line.strip(u' \t\n\r ')
- else:
- etree.SubElement(bodyTree, 'text').text = etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):])
- if len(line) != 0:
- if line.find(CALIBRE_SNB_IMG_TAG) == 0:
- prefix = ProcessFileName(os.path.dirname(self.item.href))
- if prefix != '':
- etree.SubElement(bodyTree, 'img').text = prefix + '_' + line[len(CALIBRE_SNB_IMG_TAG):]
- else:
- etree.SubElement(bodyTree, 'img').text = line[len(CALIBRE_SNB_IMG_TAG):]
- elif line.find(CALIBRE_SNB_BM_TAG) == 0:
- subitem = line[len(CALIBRE_SNB_BM_TAG):]
- bodyTree = trees[subitem].find('.//body')
- elif self.opts and self.opts.snb_indent_first_line:
- prefix = u'  '
- else:
- prefix = u''
- etree.SubElement(bodyTree, 'text').text = etree.CDATA(unicode(prefix + line))
- if self.opts and self.opts.snb_insert_empty_line:
- etree.SubElement(bodyTree, 'text').text = etree.CDATA(u'')
-
- self.opts.snb_insert_empty_line
-
- return trees
-
-
- def remove_newlines(self, text):
- self.log.debug('\tRemove newlines for processing...')
- text = text.replace('\r\n', ' ')
- text = text.replace('\n', ' ')
- text = text.replace('\r', ' ')
- return text
-
-
- def cleanup_text(self, text):
- self.log.debug('\tClean up text...')
- text = text.replace(u'Â', '')
- text = text.replace(u' ', ' ')
- text = text.replace(u'©', '(C)')
- text = text.replace('\t+', ' ')
- text = text.replace('\x0b+', ' ')
- text = text.replace('\x0c+', ' ')
- text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
- text = re.sub('\n[ ]+\n', '\n\n', text)
- if self.opts.remove_paragraph_spacing:
- text = re.sub('\n{2,}', '\n', text)
- text = re.sub('(?imu)^(?=.)', '\t', text)
- else:
- text = re.sub('\n{3,}', '\n\n', text)
- text = re.sub('(?imu)^[ ]+', '', text)
- text = re.sub('(?imu)[ ]+$', '', text)
- if self.opts.snb_max_line_length:
- max_length = self.opts.snb_max_line_length
- if self.opts.max_line_length < 25:
- max_length = 25
-
- short_lines = []
- lines = text.splitlines()
- for line in lines:
- while len(line) > max_length:
- space = line.rfind(' ', 0, max_length)
- if space != -1:
- short_lines.append(line[:space])
- line = line[space + 1:]
- continue
- if False and self.opts.force_max_line_length:
- short_lines.append(line[:max_length])
- line = line[max_length:]
- continue
- space = line.find(' ', max_length, len(line))
- if space != -1:
- short_lines.append(line[:space])
- line = line[space + 1:]
- continue
- short_lines.append(line)
- line = ''
- short_lines.append(line)
-
- text = '\n'.join(short_lines)
-
- return text
-
-
- def dump_text(self, subitems, elem, stylizer, end = '', pre = False, li = ''):
- if not isinstance(elem.tag, basestring) or namespace(elem.tag) != XHTML_NS:
- return [
- '']
- text = [
- '']
- style = stylizer.style(elem)
- if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or style['visibility'] == 'hidden':
- return [
- '']
- tag = barename(elem.tag)
- in_block = False
- if tag in SPACE_TAGS:
- if not end.endswith('u ') and hasattr(elem, 'text') and elem.text:
- text.append(u' ')
-
-
- if tag == 'img':
- text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src'])))
-
- if tag == 'br':
- text.append(u'\n\n')
-
- if tag == 'li':
- li = '- '
-
- if not tag == 'pre':
- pass
- pre = pre
- if hasattr(elem, 'text') and elem.text:
- if pre:
- text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG).join((li + elem.text).splitlines()))
- else:
- text.append(li + elem.text)
- li = ''
-
- for item in elem:
- en = u''
- if len(text) >= 2:
- en = text[-1][-2:]
-
- t = self.dump_text(subitems, item, stylizer, en, pre, li)[0]
- text += t
-
- if in_block:
- text.append(u'\n\n')
-
- if hasattr(elem, 'tail') and elem.tail:
- if pre:
- text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG).join(elem.tail.splitlines()))
- else:
- text.append(li + elem.tail)
- li = ''
-
- return (text, li)
-
-
-