home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_2102 (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2010-08-06  |  16.3 KB  |  515 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.6)
  3.  
  4. import sys
  5. import zipfile
  6. import xml.dom.minidom as xml
  7. from namespaces import nsdict
  8. from elementtypes import *
  9. IGNORED_TAGS = [] + [ nsdict[item[0]] + ':' + item[1] for item in empty_elements ]
  10. INLINE_TAGS = [ nsdict[item[0]] + ':' + item[1] for item in inline_elements ]
  11.  
  12. class TextProps:
  13.     
  14.     def __init__(self):
  15.         self.italic = False
  16.         self.bold = False
  17.         self.fixed = False
  18.         self.underlined = False
  19.         self.strikethrough = False
  20.         self.superscript = False
  21.         self.subscript = False
  22.  
  23.     
  24.     def setItalic(self, value):
  25.         if value == 'italic':
  26.             self.italic = True
  27.         elif value == 'normal':
  28.             self.italic = False
  29.         
  30.  
  31.     
  32.     def setBold(self, value):
  33.         if value == 'bold':
  34.             self.bold = True
  35.         elif value == 'normal':
  36.             self.bold = False
  37.         
  38.  
  39.     
  40.     def setFixed(self, value):
  41.         self.fixed = value
  42.  
  43.     
  44.     def setUnderlined(self, value):
  45.         if value and value != 'none':
  46.             self.underlined = True
  47.         
  48.  
  49.     
  50.     def setStrikethrough(self, value):
  51.         if value and value != 'none':
  52.             self.strikethrough = True
  53.         
  54.  
  55.     
  56.     def setPosition(self, value):
  57.         if value is None or value == '':
  58.             return None
  59.         posisize = value.split(' ')
  60.         textpos = posisize[0]
  61.         if textpos.find('%') == -1:
  62.             if textpos == 'sub':
  63.                 self.superscript = False
  64.                 self.subscript = True
  65.             elif textpos == 'super':
  66.                 self.superscript = True
  67.                 self.subscript = False
  68.             
  69.         else:
  70.             itextpos = int(textpos[:textpos.find('%')])
  71.             if itextpos > 10:
  72.                 self.superscript = False
  73.                 self.subscript = True
  74.             elif itextpos < -10:
  75.                 self.superscript = True
  76.                 self.subscript = False
  77.             
  78.  
  79.     
  80.     def __str__(self):
  81.         return '[italic=%s, bold=i%s, fixed=%s]' % (str(self.italic), str(self.bold), str(self.fixed))
  82.  
  83.  
  84.  
  85. class ParagraphProps:
  86.     
  87.     def __init__(self):
  88.         self.blockquote = False
  89.         self.headingLevel = 0
  90.         self.code = False
  91.         self.title = False
  92.         self.indented = 0
  93.  
  94.     
  95.     def setIndented(self, value):
  96.         self.indented = value
  97.  
  98.     
  99.     def setHeading(self, level):
  100.         self.headingLevel = level
  101.  
  102.     
  103.     def setTitle(self, value):
  104.         self.title = value
  105.  
  106.     
  107.     def setCode(self, value):
  108.         self.code = value
  109.  
  110.     
  111.     def __str__(self):
  112.         return '[bq=%s, h=%d, code=%s]' % (str(self.blockquote), self.headingLevel, str(self.code))
  113.  
  114.  
  115.  
  116. class ListProperties:
  117.     
  118.     def __init__(self):
  119.         self.ordered = False
  120.  
  121.     
  122.     def setOrdered(self, value):
  123.         self.ordered = value
  124.  
  125.  
  126.  
  127. class ODF2MoinMoin(object):
  128.     
  129.     def __init__(self, filepath):
  130.         self.footnotes = []
  131.         self.footnoteCounter = 0
  132.         self.textStyles = {
  133.             'Standard': TextProps() }
  134.         self.paragraphStyles = {
  135.             'Standard': ParagraphProps() }
  136.         self.listStyles = { }
  137.         self.fixedFonts = []
  138.         self.hasTitle = 0
  139.         self.lastsegment = None
  140.         self.elements = {
  141.             'draw:page': self.textToString,
  142.             'draw:frame': self.textToString,
  143.             'draw:image': self.draw_image,
  144.             'draw:text-box': self.textToString,
  145.             'text:a': self.text_a,
  146.             'text:note': self.text_note }
  147.         for tag in IGNORED_TAGS:
  148.             self.elements[tag] = self.do_nothing
  149.         
  150.         for tag in INLINE_TAGS:
  151.             self.elements[tag] = self.inline_markup
  152.         
  153.         self.elements['text:line-break'] = self.text_line_break
  154.         self.elements['text:s'] = self.text_s
  155.         self.elements['text:tab'] = self.text_tab
  156.         self.load(filepath)
  157.  
  158.     
  159.     def processFontDeclarations(self, fontDecl):
  160.         for fontFace in fontDecl.getElementsByTagName('style:font-face'):
  161.             if fontFace.getAttribute('style:font-pitch') == 'fixed':
  162.                 self.fixedFonts.append(fontFace.getAttribute('style:name'))
  163.                 continue
  164.         
  165.  
  166.     
  167.     def extractTextProperties(self, style, parent = None):
  168.         textProps = TextProps()
  169.         if parent:
  170.             parentProp = self.textStyles.get(parent, None)
  171.             if parentProp:
  172.                 textProp = parentProp
  173.             
  174.         
  175.         textPropEl = style.getElementsByTagName('style:text-properties')
  176.         if not textPropEl:
  177.             return textProps
  178.         textPropEl = textPropEl[0]
  179.         textProps.setItalic(textPropEl.getAttribute('fo:font-style'))
  180.         textProps.setBold(textPropEl.getAttribute('fo:font-weight'))
  181.         textProps.setUnderlined(textPropEl.getAttribute('style:text-underline-style'))
  182.         textProps.setStrikethrough(textPropEl.getAttribute('style:text-line-through-style'))
  183.         textProps.setPosition(textPropEl.getAttribute('style:text-position'))
  184.         if textPropEl.getAttribute('style:font-name') in self.fixedFonts:
  185.             textProps.setFixed(True)
  186.         
  187.         return textProps
  188.  
  189.     
  190.     def extractParagraphProperties(self, style, parent = None):
  191.         paraProps = ParagraphProps()
  192.         name = style.getAttribute('style:name')
  193.         if name.startswith('Heading_20_'):
  194.             level = name[11:]
  195.             
  196.             try:
  197.                 level = int(level)
  198.                 paraProps.setHeading(level)
  199.             level = 0
  200.  
  201.         
  202.         if name == 'Title':
  203.             paraProps.setTitle(True)
  204.         
  205.         paraPropEl = style.getElementsByTagName('style:paragraph-properties')
  206.         if paraPropEl:
  207.             paraPropEl = paraPropEl[0]
  208.             leftMargin = paraPropEl.getAttribute('fo:margin-left')
  209.             if leftMargin:
  210.                 
  211.                 try:
  212.                     leftMargin = float(leftMargin[:-2])
  213.                     if leftMargin > 0.01:
  214.                         paraProps.setIndented(True)
  215.  
  216.             
  217.         
  218.         textProps = self.extractTextProperties(style)
  219.         if textProps.fixed:
  220.             paraProps.setCode(True)
  221.         
  222.         return paraProps
  223.  
  224.     
  225.     def processStyles(self, styleElements):
  226.         for style in styleElements:
  227.             name = style.getAttribute('style:name')
  228.             if name == 'Standard':
  229.                 continue
  230.             
  231.             family = style.getAttribute('style:family')
  232.             parent = style.getAttribute('style:parent-style-name')
  233.             if family == 'text':
  234.                 self.textStyles[name] = self.extractTextProperties(style, parent)
  235.                 continue
  236.             if family == 'paragraph':
  237.                 self.paragraphStyles[name] = self.extractParagraphProperties(style, parent)
  238.                 self.textStyles[name] = self.extractTextProperties(style, parent)
  239.                 continue
  240.         
  241.  
  242.     
  243.     def processListStyles(self, listStyleElements):
  244.         for style in listStyleElements:
  245.             name = style.getAttribute('style:name')
  246.             prop = ListProperties()
  247.             self.listStyles[name] = prop
  248.         
  249.  
  250.     
  251.     def load(self, filepath):
  252.         zip = zipfile.ZipFile(filepath)
  253.         styles_doc = xml.dom.minidom.parseString(zip.read('styles.xml'))
  254.         fontfacedecls = styles_doc.getElementsByTagName('office:font-face-decls')
  255.         if fontfacedecls:
  256.             self.processFontDeclarations(fontfacedecls[0])
  257.         
  258.         self.processStyles(styles_doc.getElementsByTagName('style:style'))
  259.         self.processListStyles(styles_doc.getElementsByTagName('text:list-style'))
  260.         self.content = xml.dom.minidom.parseString(zip.read('content.xml'))
  261.         fontfacedecls = self.content.getElementsByTagName('office:font-face-decls')
  262.         if fontfacedecls:
  263.             self.processFontDeclarations(fontfacedecls[0])
  264.         
  265.         self.processStyles(self.content.getElementsByTagName('style:style'))
  266.         self.processListStyles(self.content.getElementsByTagName('text:list-style'))
  267.  
  268.     
  269.     def compressCodeBlocks(self, text):
  270.         return text
  271.         lines = text.split('\n')
  272.         buffer = []
  273.         numLines = len(lines)
  274.         for i in range(numLines):
  275.             if not lines[i].strip() and i == numLines - 1 and i == 0:
  276.                 if lines[i - 1].startswith('    '):
  277.                     pass
  278.                 if not lines[i + 1].startswith('    '):
  279.                     buffer.append('\n' + lines[i])
  280.                     continue
  281.         
  282.         return ''.join(buffer)
  283.  
  284.     
  285.     def do_nothing(self, node):
  286.         return ''
  287.  
  288.     
  289.     def draw_image(self, node):
  290.         link = node.getAttribute('xlink:href')
  291.         if link and link[:2] == './':
  292.             return '%s\n' % link
  293.         if link and link[:9] == 'Pictures/':
  294.             link = link[9:]
  295.         
  296.         return '[[Image(%s)]]\n' % link
  297.  
  298.     
  299.     def text_a(self, node):
  300.         text = self.textToString(node)
  301.         link = node.getAttribute('xlink:href')
  302.         if link.strip() == text.strip():
  303.             return '[%s] ' % link.strip()
  304.         return '[%s %s] ' % (link.strip(), text.strip())
  305.  
  306.     
  307.     def text_line_break(self, node):
  308.         return '[[BR]]'
  309.  
  310.     
  311.     def text_note(self, node):
  312.         cite = node.getElementsByTagName('text:note-citation')[0].childNodes[0].nodeValue
  313.         body = node.getElementsByTagName('text:note-body')[0].childNodes[0]
  314.         self.footnotes.append((cite, self.textToString(body)))
  315.         return '^%s^' % cite
  316.  
  317.     
  318.     def text_s(self, node):
  319.         
  320.         try:
  321.             num = int(node.getAttribute('text:c'))
  322.             return ' ' * num
  323.         except:
  324.             return ' '
  325.  
  326.  
  327.     
  328.     def text_tab(self, node):
  329.         return '    '
  330.  
  331.     
  332.     def inline_markup(self, node):
  333.         text = self.textToString(node)
  334.         if not text.strip():
  335.             return ''
  336.         styleName = node.getAttribute('text:style-name')
  337.         style = self.textStyles.get(styleName, TextProps())
  338.         if style.fixed:
  339.             return '`' + text + '`'
  340.         mark = []
  341.         if style:
  342.             if style.bold:
  343.                 mark.append("'''")
  344.             
  345.             if style.underlined:
  346.                 mark.append('__')
  347.             
  348.             if style.strikethrough:
  349.                 mark.append('~~')
  350.             
  351.             if style.superscript:
  352.                 mark.append('^')
  353.             
  354.             if style.subscript:
  355.                 mark.append(',,')
  356.             
  357.         
  358.         revmark = mark[:]
  359.         revmark.reverse()
  360.         return '%s%s%s' % (''.join(mark), text, ''.join(revmark))
  361.  
  362.     
  363.     def listToString(self, listElement, indent = 0):
  364.         self.lastsegment = listElement.tagName
  365.         buffer = []
  366.         styleName = listElement.getAttribute('text:style-name')
  367.         props = self.listStyles.get(styleName, ListProperties())
  368.         i = 0
  369.         for item in listElement.childNodes:
  370.             buffer.append(' ' * indent)
  371.             i += 1
  372.             if props.ordered:
  373.                 number = str(i)
  374.                 number = ' ' + number + '. '
  375.                 buffer.append(' 1. ')
  376.             else:
  377.                 buffer.append(' * ')
  378.             subitems = _[1]
  379.             for subitem in subitems:
  380.                 self.lastsegment = subitem.tagName
  381.             
  382.             self.lastsegment = item.tagName
  383.             buffer.append('\n')
  384.         
  385.         return ''.join(buffer)
  386.  
  387.     
  388.     def tableToString(self, tableElement):
  389.         self.lastsegment = tableElement.tagName
  390.         buffer = []
  391.         for item in tableElement.childNodes:
  392.             self.lastsegment = item.tagName
  393.             if item.tagName == 'table:table-header-rows':
  394.                 buffer.append(self.tableToString(item))
  395.             
  396.             if item.tagName == 'table:table-row':
  397.                 buffer.append('\n||')
  398.                 for cell in item.childNodes:
  399.                     buffer.append(self.inline_markup(cell))
  400.                     buffer.append('||')
  401.                     self.lastsegment = cell.tagName
  402.                 
  403.         
  404.         return ''.join(buffer)
  405.  
  406.     
  407.     def toString(self):
  408.         body = self.content.getElementsByTagName('office:body')[0]
  409.         text = body.childNodes[0]
  410.         buffer = []
  411.         paragraphs = _[1]
  412.         for paragraph in paragraphs:
  413.             if paragraph.tagName == 'text:list':
  414.                 text = self.listToString(paragraph)
  415.             elif paragraph.tagName == 'text:section':
  416.                 text = self.textToString(paragraph)
  417.             elif paragraph.tagName == 'table:table':
  418.                 text = self.tableToString(paragraph)
  419.             else:
  420.                 text = self.paragraphToString(paragraph)
  421.             if text:
  422.                 buffer.append(text)
  423.                 continue
  424.         
  425.         if self.footnotes:
  426.             buffer.append('----')
  427.             for cite, body in self.footnotes:
  428.                 buffer.append('%s: %s' % (cite, body))
  429.             
  430.         
  431.         buffer.append('')
  432.         return self.compressCodeBlocks('\n'.join(buffer))
  433.  
  434.     
  435.     def textToString(self, element):
  436.         buffer = []
  437.         for node in element.childNodes:
  438.             if node.nodeType == xml.dom.Node.TEXT_NODE:
  439.                 buffer.append(node.nodeValue)
  440.                 continue
  441.             if node.nodeType == xml.dom.Node.ELEMENT_NODE:
  442.                 tag = node.tagName
  443.                 if tag in ('draw:text-box', 'draw:frame'):
  444.                     buffer.append(self.textToString(node))
  445.                 elif tag in ('text:p', 'text:h'):
  446.                     text = self.paragraphToString(node)
  447.                     if text:
  448.                         buffer.append(text)
  449.                     
  450.                 elif tag == 'text:list':
  451.                     buffer.append(self.listToString(node))
  452.                 else:
  453.                     method = self.elements.get(tag)
  454.                     if method:
  455.                         buffer.append(method(node))
  456.                     else:
  457.                         buffer.append(' {' + tag + '} ')
  458.             method
  459.         
  460.         return ''.join(buffer)
  461.  
  462.     
  463.     def paragraphToString(self, paragraph, indent = 0):
  464.         dummyParaProps = ParagraphProps()
  465.         style_name = paragraph.getAttribute('text:style-name')
  466.         paraProps = self.paragraphStyles.get(style_name, dummyParaProps)
  467.         text = self.inline_markup(paragraph)
  468.         if paraProps and not (paraProps.code):
  469.             text = text.strip()
  470.         
  471.         if paragraph.tagName == 'text:p' and self.lastsegment == 'text:p':
  472.             text = '\n' + text
  473.         
  474.         self.lastsegment = paragraph.tagName
  475.         if paraProps.title:
  476.             self.hasTitle = 1
  477.             return '= ' + text + ' =\n'
  478.         outlinelevel = paragraph.getAttribute('text:outline-level')
  479.         if outlinelevel:
  480.             level = int(outlinelevel)
  481.             if self.hasTitle:
  482.                 level += 1
  483.             
  484.             if level >= 1:
  485.                 return '=' * level + ' ' + text + ' ' + '=' * level + '\n'
  486.         elif paraProps.code:
  487.             return '{{{\n' + text + '\n}}}\n'
  488.         if paraProps.indented:
  489.             return self.wrapParagraph(text, indent = indent, blockquote = True)
  490.         return self.wrapParagraph(text, indent = indent)
  491.  
  492.     
  493.     def wrapParagraph(self, text, indent = 0, blockquote = False):
  494.         counter = 0
  495.         buffer = []
  496.         LIMIT = 50
  497.         if blockquote:
  498.             buffer.append('  ')
  499.         
  500.         return ''.join(buffer) + text
  501.         for token in text.split():
  502.             if counter > LIMIT - indent:
  503.                 buffer.append('\n' + ' ' * indent)
  504.                 if blockquote:
  505.                     buffer.append('  ')
  506.                 
  507.                 counter = 0
  508.             
  509.             buffer.append(token + ' ')
  510.             counter += len(token)
  511.         
  512.         return ''.join(buffer)
  513.  
  514.  
  515.