home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- __license__ = 'GPL v3'
- __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
- import sys
- import os
- import logging
- from calibre import setup_cli_handlers
- from calibre.utils.config import OptionParser
- from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, CData, Tag
- from calibre.ebooks.lrf.pylrs.pylrs import Book, PageStyle, TextStyle, BlockStyle, ImageStream, Font, StyleDefault, BookSetting, Header, Image, ImageBlock, Page, TextBlock, Canvas, Paragraph, CR, Span, Italic, Sup, Sub, Bold, EmpLine, JumpButton, CharButton, Plot, DropCaps, Footer, RuledLine
- from calibre.ebooks.chardet import xml_to_unicode
-
- class LrsParser(object):
- SELF_CLOSING_TAGS = [ i.lower() for i in [
- 'CR',
- 'Plot',
- 'NoBR',
- 'Space',
- 'PutObj',
- 'RuledLine',
- 'Plot',
- 'SetDefault',
- 'BookSetting',
- 'RegistFont',
- 'PageStyle',
- 'TextStyle',
- 'BlockStyle',
- 'JumpTo',
- 'ImageStream',
- 'Image'] ]
-
- def __init__(self, stream, logger):
- self.logger = logger
- src = stream.read()
- self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0], convertEntities = BeautifulStoneSoup.XML_ENTITIES, selfClosingTags = self.SELF_CLOSING_TAGS)
- self.objects = { }
- for obj in self.soup.findAll(objid = True):
- self.objects[obj['objid']] = obj
-
- self.parsed_objects = { }
- self.first_pass()
- self.second_pass()
- self.third_pass()
- self.fourth_pass()
- self.fifth_pass()
-
-
- def fifth_pass(self):
- for tag in self.soup.findAll([
- 'canvas',
- 'header',
- 'footer']):
- canvas = self.parsed_objects[tag.get('objid')]
- for po in tag.findAll('putobj'):
- canvas.put_object(self.parsed_objects[po.get('refobj')], po.get('x1'), po.get('y1'))
-
-
-
-
- def attrs_to_dict(cls, tag, exclude = ('objid',)):
- result = { }
- for key, val in tag.attrs:
- if key in exclude:
- continue
-
- result[str(key)] = val
-
- return result
-
- attrs_to_dict = classmethod(attrs_to_dict)
-
- def text_tag_to_element(self, tag):
- map = {
- 'span': Span,
- 'italic': Italic,
- 'bold': Bold,
- 'empline': EmpLine,
- 'sup': Sup,
- 'sub': Sub,
- 'cr': CR,
- 'drawchar': DropCaps }
- if tag.name == 'charbutton':
- return CharButton(self.parsed_objects[tag.get('refobj')], None)
- if tag.name == 'plot':
- return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, [
- 'refobj']))
- settings = self.attrs_to_dict(tag)
- settings.pop('spanstyle', '')
- return map[tag.name](**settings)
-
-
- def process_text_element(self, tag, elem):
- for item in tag.contents:
- if isinstance(item, NavigableString):
- elem.append(item.string)
- continue
- subelem = self.text_tag_to_element(item)
- elem.append(subelem)
- self.process_text_element(item, subelem)
-
-
-
- def process_paragraph(self, tag):
- p = Paragraph()
- contents = [ i for i in tag.contents ]
- if contents:
- for item in contents:
- if isinstance(item, basestring):
- p.append(item)
- continue
- None if isinstance(contents[0], NavigableString) else []
- if isinstance(item, NavigableString):
- p.append(item.string)
- continue
- elem = self.text_tag_to_element(item)
- p.append(elem)
- self.process_text_element(item, elem)
-
-
- return p
-
-
- def process_text_block(self, tag):
- tb = self.parsed_objects[tag.get('objid')]
- for item in tag.contents:
- if hasattr(item, 'name'):
- if item.name == 'p':
- tb.append(self.process_paragraph(item))
- elif item.name == 'cr':
- tb.append(CR())
- elif item.name == 'charbutton':
- p = Paragraph()
- tb.append(p)
- elem = self.text_tag_to_element(item)
- self.process_text_element(item, elem)
- p.append(elem)
-
- item.name == 'p'
-
-
-
- def fourth_pass(self):
- for tag in self.soup.findAll('page'):
- page = self.parsed_objects[tag.get('objid')]
- self.book.append(page)
- for block_tag in tag.findAll([
- 'canvas',
- 'imageblock',
- 'textblock',
- 'ruledline',
- 'simpletextblock']):
- if block_tag.name == 'ruledline':
- page.append(RuledLine(**self.attrs_to_dict(block_tag)))
- continue
- page.append(self.parsed_objects[block_tag.get('objid')])
-
-
- for tag in self.soup.find('objects').findAll('button'):
- jt = tag.find('jumpto')
- tb = self.parsed_objects[jt.get('refobj')]
- jb = JumpButton(tb)
- self.book.append(jb)
- self.parsed_objects[tag.get('objid')] = jb
-
- for tag in self.soup.findAll([
- 'textblock',
- 'simpletextblock']):
- self.process_text_block(tag)
-
- toc = self.soup.find('toc')
- if toc:
- for tag in toc.findAll('toclabel'):
- label = self.tag_to_string(tag)
- self.book.addTocEntry(label, self.parsed_objects[tag.get('refobj')])
-
-
-
-
- def third_pass(self):
- map = {
- 'page': (Page, [
- 'pagestyle',
- 'evenfooterid',
- 'oddfooterid',
- 'evenheaderid',
- 'oddheaderid']),
- 'textblock': (TextBlock, [
- 'textstyle',
- 'blockstyle']),
- 'simpletextblock': (TextBlock, [
- 'textstyle',
- 'blockstyle']),
- 'imageblock': (ImageBlock, [
- 'blockstyle',
- 'refstream']),
- 'image': (Image, [
- 'refstream']),
- 'canvas': (Canvas, [
- 'canvaswidth',
- 'canvasheight']) }
- attrmap = {
- 'pagestyle': 'pageStyle',
- 'blockstyle': 'blockStyle',
- 'textstyle': 'textStyle' }
- for id, tag in self.objects.items():
- if tag.name in map.keys():
- settings = self.attrs_to_dict(tag, map[tag.name][1] + [
- 'objid',
- 'objlabel'])
- for a in ('pagestyle', 'blockstyle', 'textstyle'):
- label = tag.get(a, False)
- if label:
- if label in self._style_labels or label in self.parsed_objects:
- _obj = None if self.parsed_objects.has_key(label) else self._style_labels[label]
- settings[attrmap[a]] = _obj
- continue
-
- for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'):
- if tag.has_key(a):
- settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
- continue
-
- args = []
- if tag.has_key('refstream'):
- args.append(self.parsed_objects[tag.get('refstream')])
-
- if tag.has_key('canvaswidth'):
- args += [
- tag.get('canvaswidth'),
- tag.get('canvasheight')]
-
- self.parsed_objects[id] = map[tag.name][0](*args, **settings)
- continue
-
-
-
- def second_pass(self):
- map = {
- 'pagestyle': (PageStyle, [
- 'stylelabel',
- 'evenheaderid',
- 'oddheaderid',
- 'evenfooterid',
- 'oddfooterid']),
- 'textstyle': (TextStyle, [
- 'stylelabel',
- 'rubyalignandadjust']),
- 'blockstyle': (BlockStyle, [
- 'stylelabel']),
- 'imagestream': (ImageStream, [
- 'imagestreamlabel']),
- 'registfont': (Font, []) }
- self._style_labels = { }
- for id, tag in self.objects.items():
- if tag.name in map.keys():
- settings = self.attrs_to_dict(tag, map[tag.name][1] + [
- 'objid'])
- if tag.name == 'pagestyle':
- for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'):
- if tag.has_key(a):
- settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
- continue
-
-
- settings.pop('autoindex', '')
- self.parsed_objects[id] = map[tag.name][0](**settings)
- x = tag.get('stylelabel', False)
- if x:
- self._style_labels[x] = self.parsed_objects[id]
-
- if tag.name == 'registfont':
- self.book.append(self.parsed_objects[id])
-
- tag.name == 'registfont'
-
-
-
- def tag_to_string(cls, tag):
- if not tag:
- return ''
- strings = []
- for item in tag.contents:
- if isinstance(item, (NavigableString, CData)):
- strings.append(item.string)
- continue
- tag
- if isinstance(item, Tag):
- res = cls.tag_to_string(item)
- if res:
- strings.append(res)
-
- res
-
- return u''.join(strings)
-
- tag_to_string = classmethod(tag_to_string)
-
- def first_pass(self):
- info = self.soup.find('bbebxylog').find('bookinformation').find('info')
- bookinfo = info.find('bookinfo')
- docinfo = info.find('docinfo')
-
- def me(base, tagname):
- tag = base.find(tagname.lower())
- if tag is None:
- return ('', '', '')
- tag = (tag is None, self.tag_to_string(tag) if tag.has_key('reading') else '')
- return tag
-
- title = me(bookinfo, 'Title')
- author = me(bookinfo, 'Author')
- publisher = me(bookinfo, 'Publisher')
- category = me(bookinfo, 'Category')[0]
- classification = me(bookinfo, 'Classification')[0]
- freetext = me(bookinfo, 'FreeText')[0]
- language = me(docinfo, 'Language')[0]
- creator = me(docinfo, 'Creator')[0]
- producer = me(docinfo, 'Producer')[0]
- bookid = me(bookinfo, 'BookID')[0]
- sd = self.soup.find('setdefault')
- sd = StyleDefault(**self.attrs_to_dict(sd, [
- 'page_tree_id',
- 'rubyalignandadjust']))
- bs = self.soup.find('booksetting')
- bs = BookSetting(**self.attrs_to_dict(bs, []))
- settings = { }
- thumbnail = self.soup.find('cthumbnail')
- self.book = Book(title = title, author = author, publisher = publisher, category = category, classification = classification, freetext = freetext, language = language, creator = creator, producer = producer, bookid = bookid, setdefault = sd, booksetting = bs, **settings)
- for hdr in self.soup.findAll([
- 'header',
- 'footer']):
- elem = None if thumbnail is not None else (None,) if hdr.name == 'header' else Footer
- self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr))
-
-
-
- def render(self, file, to_lrs = False):
- if to_lrs:
- self.book.renderLrs(file, 'utf-8')
- else:
- self.book.renderLrf(file)
-
-
-
- def option_parser():
- parser = OptionParser(usage = _('%prog [options] file.lrs\nCompile an LRS file into an LRF file.'))
- parser.add_option('-o', '--output', default = None, help = _('Path to output file'))
- parser.add_option('--verbose', default = False, action = 'store_true', help = _('Verbose processing'))
- parser.add_option('--lrs', default = False, action = 'store_true', help = _('Convert LRS to LRS, useful for debugging.'))
- return parser
-
-
- def main(args = sys.argv, logger = None):
- parser = option_parser()
- (opts, args) = parser.parse_args(args)
- if logger is None:
- level = None if opts.verbose else logging.INFO
- logger = logging.getLogger('lrs2lrf')
- setup_cli_handlers(logger, level)
-
- if len(args) != 2:
- parser.print_help()
- return 1
- opts.output = os.path.abspath(opts.output)
- if opts.verbose:
- import warnings
- warnings.defaultaction = 'error'
-
- logger.info('Parsing LRS file...')
- converter = LrsParser(open(args[1], 'rb'), logger)
- logger.info('Writing to output file...')
- converter.render(opts.output, to_lrs = opts.lrs)
- logger.info('Output written to ' + opts.output)
- return 0
-
- if __name__ == '__main__':
- sys.exit(main())
-
-