home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_925 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  12.6 KB  |  381 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. __license__ = 'GPL v3'
  5. __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
  6. import sys
  7. import os
  8. import logging
  9. from calibre import setup_cli_handlers
  10. from calibre.utils.config import OptionParser
  11. from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, NavigableString, CData, Tag
  12. from calibre.ebooks.lrf.pylrs.pylrs import Book, PageStyle, TextStyle, BlockStyle, ImageStream, Font, StyleDefault, BookSetting, Header, Image, ImageBlock, Page, TextBlock, Canvas, Paragraph, CR, Span, Italic, Sup, Sub, Bold, EmpLine, JumpButton, CharButton, Plot, DropCaps, Footer, RuledLine
  13. from calibre.ebooks.chardet import xml_to_unicode
  14.  
  15. class LrsParser(object):
  16.     SELF_CLOSING_TAGS = [ i.lower() for i in [
  17.         'CR',
  18.         'Plot',
  19.         'NoBR',
  20.         'Space',
  21.         'PutObj',
  22.         'RuledLine',
  23.         'Plot',
  24.         'SetDefault',
  25.         'BookSetting',
  26.         'RegistFont',
  27.         'PageStyle',
  28.         'TextStyle',
  29.         'BlockStyle',
  30.         'JumpTo',
  31.         'ImageStream',
  32.         'Image'] ]
  33.     
  34.     def __init__(self, stream, logger):
  35.         self.logger = logger
  36.         src = stream.read()
  37.         self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0], convertEntities = BeautifulStoneSoup.XML_ENTITIES, selfClosingTags = self.SELF_CLOSING_TAGS)
  38.         self.objects = { }
  39.         for obj in self.soup.findAll(objid = True):
  40.             self.objects[obj['objid']] = obj
  41.         
  42.         self.parsed_objects = { }
  43.         self.first_pass()
  44.         self.second_pass()
  45.         self.third_pass()
  46.         self.fourth_pass()
  47.         self.fifth_pass()
  48.  
  49.     
  50.     def fifth_pass(self):
  51.         for tag in self.soup.findAll([
  52.             'canvas',
  53.             'header',
  54.             'footer']):
  55.             canvas = self.parsed_objects[tag.get('objid')]
  56.             for po in tag.findAll('putobj'):
  57.                 canvas.put_object(self.parsed_objects[po.get('refobj')], po.get('x1'), po.get('y1'))
  58.             
  59.         
  60.  
  61.     
  62.     def attrs_to_dict(cls, tag, exclude = ('objid',)):
  63.         result = { }
  64.         for key, val in tag.attrs:
  65.             if key in exclude:
  66.                 continue
  67.             
  68.             result[str(key)] = val
  69.         
  70.         return result
  71.  
  72.     attrs_to_dict = classmethod(attrs_to_dict)
  73.     
  74.     def text_tag_to_element(self, tag):
  75.         map = {
  76.             'span': Span,
  77.             'italic': Italic,
  78.             'bold': Bold,
  79.             'empline': EmpLine,
  80.             'sup': Sup,
  81.             'sub': Sub,
  82.             'cr': CR,
  83.             'drawchar': DropCaps }
  84.         if tag.name == 'charbutton':
  85.             return CharButton(self.parsed_objects[tag.get('refobj')], None)
  86.         if tag.name == 'plot':
  87.             return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, [
  88.                 'refobj']))
  89.         settings = self.attrs_to_dict(tag)
  90.         settings.pop('spanstyle', '')
  91.         return map[tag.name](**settings)
  92.  
  93.     
  94.     def process_text_element(self, tag, elem):
  95.         for item in tag.contents:
  96.             if isinstance(item, NavigableString):
  97.                 elem.append(item.string)
  98.                 continue
  99.             subelem = self.text_tag_to_element(item)
  100.             elem.append(subelem)
  101.             self.process_text_element(item, subelem)
  102.         
  103.  
  104.     
  105.     def process_paragraph(self, tag):
  106.         p = Paragraph()
  107.         contents = [ i for i in tag.contents ]
  108.         if contents:
  109.             for item in contents:
  110.                 if isinstance(item, basestring):
  111.                     p.append(item)
  112.                     continue
  113.                 None if isinstance(contents[0], NavigableString) else []
  114.                 if isinstance(item, NavigableString):
  115.                     p.append(item.string)
  116.                     continue
  117.                 elem = self.text_tag_to_element(item)
  118.                 p.append(elem)
  119.                 self.process_text_element(item, elem)
  120.             
  121.         
  122.         return p
  123.  
  124.     
  125.     def process_text_block(self, tag):
  126.         tb = self.parsed_objects[tag.get('objid')]
  127.         for item in tag.contents:
  128.             if hasattr(item, 'name'):
  129.                 if item.name == 'p':
  130.                     tb.append(self.process_paragraph(item))
  131.                 elif item.name == 'cr':
  132.                     tb.append(CR())
  133.                 elif item.name == 'charbutton':
  134.                     p = Paragraph()
  135.                     tb.append(p)
  136.                     elem = self.text_tag_to_element(item)
  137.                     self.process_text_element(item, elem)
  138.                     p.append(elem)
  139.                 
  140.             item.name == 'p'
  141.         
  142.  
  143.     
  144.     def fourth_pass(self):
  145.         for tag in self.soup.findAll('page'):
  146.             page = self.parsed_objects[tag.get('objid')]
  147.             self.book.append(page)
  148.             for block_tag in tag.findAll([
  149.                 'canvas',
  150.                 'imageblock',
  151.                 'textblock',
  152.                 'ruledline',
  153.                 'simpletextblock']):
  154.                 if block_tag.name == 'ruledline':
  155.                     page.append(RuledLine(**self.attrs_to_dict(block_tag)))
  156.                     continue
  157.                 page.append(self.parsed_objects[block_tag.get('objid')])
  158.             
  159.         
  160.         for tag in self.soup.find('objects').findAll('button'):
  161.             jt = tag.find('jumpto')
  162.             tb = self.parsed_objects[jt.get('refobj')]
  163.             jb = JumpButton(tb)
  164.             self.book.append(jb)
  165.             self.parsed_objects[tag.get('objid')] = jb
  166.         
  167.         for tag in self.soup.findAll([
  168.             'textblock',
  169.             'simpletextblock']):
  170.             self.process_text_block(tag)
  171.         
  172.         toc = self.soup.find('toc')
  173.         if toc:
  174.             for tag in toc.findAll('toclabel'):
  175.                 label = self.tag_to_string(tag)
  176.                 self.book.addTocEntry(label, self.parsed_objects[tag.get('refobj')])
  177.             
  178.         
  179.  
  180.     
  181.     def third_pass(self):
  182.         map = {
  183.             'page': (Page, [
  184.                 'pagestyle',
  185.                 'evenfooterid',
  186.                 'oddfooterid',
  187.                 'evenheaderid',
  188.                 'oddheaderid']),
  189.             'textblock': (TextBlock, [
  190.                 'textstyle',
  191.                 'blockstyle']),
  192.             'simpletextblock': (TextBlock, [
  193.                 'textstyle',
  194.                 'blockstyle']),
  195.             'imageblock': (ImageBlock, [
  196.                 'blockstyle',
  197.                 'refstream']),
  198.             'image': (Image, [
  199.                 'refstream']),
  200.             'canvas': (Canvas, [
  201.                 'canvaswidth',
  202.                 'canvasheight']) }
  203.         attrmap = {
  204.             'pagestyle': 'pageStyle',
  205.             'blockstyle': 'blockStyle',
  206.             'textstyle': 'textStyle' }
  207.         for id, tag in self.objects.items():
  208.             if tag.name in map.keys():
  209.                 settings = self.attrs_to_dict(tag, map[tag.name][1] + [
  210.                     'objid',
  211.                     'objlabel'])
  212.                 for a in ('pagestyle', 'blockstyle', 'textstyle'):
  213.                     label = tag.get(a, False)
  214.                     if label:
  215.                         if label in self._style_labels or label in self.parsed_objects:
  216.                             _obj = None if self.parsed_objects.has_key(label) else self._style_labels[label]
  217.                             settings[attrmap[a]] = _obj
  218.                             continue
  219.                 
  220.                 for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'):
  221.                     if tag.has_key(a):
  222.                         settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
  223.                         continue
  224.                 
  225.                 args = []
  226.                 if tag.has_key('refstream'):
  227.                     args.append(self.parsed_objects[tag.get('refstream')])
  228.                 
  229.                 if tag.has_key('canvaswidth'):
  230.                     args += [
  231.                         tag.get('canvaswidth'),
  232.                         tag.get('canvasheight')]
  233.                 
  234.                 self.parsed_objects[id] = map[tag.name][0](*args, **settings)
  235.                 continue
  236.         
  237.  
  238.     
  239.     def second_pass(self):
  240.         map = {
  241.             'pagestyle': (PageStyle, [
  242.                 'stylelabel',
  243.                 'evenheaderid',
  244.                 'oddheaderid',
  245.                 'evenfooterid',
  246.                 'oddfooterid']),
  247.             'textstyle': (TextStyle, [
  248.                 'stylelabel',
  249.                 'rubyalignandadjust']),
  250.             'blockstyle': (BlockStyle, [
  251.                 'stylelabel']),
  252.             'imagestream': (ImageStream, [
  253.                 'imagestreamlabel']),
  254.             'registfont': (Font, []) }
  255.         self._style_labels = { }
  256.         for id, tag in self.objects.items():
  257.             if tag.name in map.keys():
  258.                 settings = self.attrs_to_dict(tag, map[tag.name][1] + [
  259.                     'objid'])
  260.                 if tag.name == 'pagestyle':
  261.                     for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'):
  262.                         if tag.has_key(a):
  263.                             settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
  264.                             continue
  265.                     
  266.                 
  267.                 settings.pop('autoindex', '')
  268.                 self.parsed_objects[id] = map[tag.name][0](**settings)
  269.                 x = tag.get('stylelabel', False)
  270.                 if x:
  271.                     self._style_labels[x] = self.parsed_objects[id]
  272.                 
  273.                 if tag.name == 'registfont':
  274.                     self.book.append(self.parsed_objects[id])
  275.                 
  276.             tag.name == 'registfont'
  277.         
  278.  
  279.     
  280.     def tag_to_string(cls, tag):
  281.         if not tag:
  282.             return ''
  283.         strings = []
  284.         for item in tag.contents:
  285.             if isinstance(item, (NavigableString, CData)):
  286.                 strings.append(item.string)
  287.                 continue
  288.             tag
  289.             if isinstance(item, Tag):
  290.                 res = cls.tag_to_string(item)
  291.                 if res:
  292.                     strings.append(res)
  293.                 
  294.             res
  295.         
  296.         return u''.join(strings)
  297.  
  298.     tag_to_string = classmethod(tag_to_string)
  299.     
  300.     def first_pass(self):
  301.         info = self.soup.find('bbebxylog').find('bookinformation').find('info')
  302.         bookinfo = info.find('bookinfo')
  303.         docinfo = info.find('docinfo')
  304.         
  305.         def me(base, tagname):
  306.             tag = base.find(tagname.lower())
  307.             if tag is None:
  308.                 return ('', '', '')
  309.             tag = (tag is None, self.tag_to_string(tag) if tag.has_key('reading') else '')
  310.             return tag
  311.  
  312.         title = me(bookinfo, 'Title')
  313.         author = me(bookinfo, 'Author')
  314.         publisher = me(bookinfo, 'Publisher')
  315.         category = me(bookinfo, 'Category')[0]
  316.         classification = me(bookinfo, 'Classification')[0]
  317.         freetext = me(bookinfo, 'FreeText')[0]
  318.         language = me(docinfo, 'Language')[0]
  319.         creator = me(docinfo, 'Creator')[0]
  320.         producer = me(docinfo, 'Producer')[0]
  321.         bookid = me(bookinfo, 'BookID')[0]
  322.         sd = self.soup.find('setdefault')
  323.         sd = StyleDefault(**self.attrs_to_dict(sd, [
  324.             'page_tree_id',
  325.             'rubyalignandadjust']))
  326.         bs = self.soup.find('booksetting')
  327.         bs = BookSetting(**self.attrs_to_dict(bs, []))
  328.         settings = { }
  329.         thumbnail = self.soup.find('cthumbnail')
  330.         self.book = Book(title = title, author = author, publisher = publisher, category = category, classification = classification, freetext = freetext, language = language, creator = creator, producer = producer, bookid = bookid, setdefault = sd, booksetting = bs, **settings)
  331.         for hdr in self.soup.findAll([
  332.             'header',
  333.             'footer']):
  334.             elem = None if thumbnail is not None else (None,) if hdr.name == 'header' else Footer
  335.             self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr))
  336.         
  337.  
  338.     
  339.     def render(self, file, to_lrs = False):
  340.         if to_lrs:
  341.             self.book.renderLrs(file, 'utf-8')
  342.         else:
  343.             self.book.renderLrf(file)
  344.  
  345.  
  346.  
  347. def option_parser():
  348.     parser = OptionParser(usage = _('%prog [options] file.lrs\nCompile an LRS file into an LRF file.'))
  349.     parser.add_option('-o', '--output', default = None, help = _('Path to output file'))
  350.     parser.add_option('--verbose', default = False, action = 'store_true', help = _('Verbose processing'))
  351.     parser.add_option('--lrs', default = False, action = 'store_true', help = _('Convert LRS to LRS, useful for debugging.'))
  352.     return parser
  353.  
  354.  
  355. def main(args = sys.argv, logger = None):
  356.     parser = option_parser()
  357.     (opts, args) = parser.parse_args(args)
  358.     if logger is None:
  359.         level = None if opts.verbose else logging.INFO
  360.         logger = logging.getLogger('lrs2lrf')
  361.         setup_cli_handlers(logger, level)
  362.     
  363.     if len(args) != 2:
  364.         parser.print_help()
  365.         return 1
  366.     opts.output = os.path.abspath(opts.output)
  367.     if opts.verbose:
  368.         import warnings
  369.         warnings.defaultaction = 'error'
  370.     
  371.     logger.info('Parsing LRS file...')
  372.     converter = LrsParser(open(args[1], 'rb'), logger)
  373.     logger.info('Writing to output file...')
  374.     converter.render(opts.output, to_lrs = opts.lrs)
  375.     logger.info('Output written to ' + opts.output)
  376.     return 0
  377.  
  378. if __name__ == '__main__':
  379.     sys.exit(main())
  380.  
  381.