home *** CD-ROM | disk | FTP | other *** search
/ Maximum CD 2011 October / maximum-cd-2011-10.iso / DiscContents / digsby_setup.exe / lib / util / htmlutils.pyo (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2011-06-22  |  3.2 KB  |  107 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyo (Python 2.6)
  3.  
  4. import lxml.html as lxml
  5. import re
  6. import traceback
  7. __all__ = [
  8.     'render_contents',
  9.     'to_xhtml',
  10.     'remove_tags',
  11.     'remove_attrs',
  12.     'remove_styles',
  13.     'transform_text']
  14.  
  15. def render_contents(doc, encode_text_as_xml = True):
  16.     
  17.     tohtml = lambda d: lxml.html.tostring(d, encoding = unicode)
  18.     
  19.     try:
  20.         t = doc.text
  21.         if encode_text_as_xml and t is not None:
  22.             t = t.encode('xml')
  23.         
  24.         if not t:
  25.             pass
  26.         return '' + ''.join(map(tohtml, doc.getchildren()))
  27.     except ValueError:
  28.         return tohtml()
  29.  
  30.  
  31.  
  32. def make_xhtml_fragment(s):
  33.     if not s.strip():
  34.         return s
  35.     html = lxml.html.document_fromstring(s)
  36.     body = html.body
  37.     bgcolor = html.body.get('bgcolor')
  38.     s = render_contents(body)
  39.     if bgcolor is not None:
  40.         return '<span style="background-color: %s;">%s</span>' % (bgcolor, render_contents(body))
  41.     return s
  42.  
  43.  
  44. def to_xhtml(s):
  45.     s = make_xhtml_fragment(s)
  46.     if s.startswith('<p>') and s.endswith('</p>'):
  47.         return s[3:-4]
  48.     return s
  49.  
  50.  
  51. def remove_tags(tree, tagnames):
  52.     for tagname in tagnames:
  53.         xpath = './/' + tagname
  54.         tag = tree.find(xpath)
  55.         while tag is not None:
  56.             tag.drop_tag()
  57.             tag = tree.find(xpath)
  58.     
  59.     return tree
  60.  
  61.  
  62. def remove_attrs(tree, attrs):
  63.     find = tree.getroottree().iterfind
  64.     for attr in attrs:
  65.         for tag in find('//*[@%s]' % attr):
  66.             del tag.attrib[attr]
  67.         
  68.     
  69.  
  70.  
  71. def remove_style(s, style):
  72.     search = re.compile('\\s*' + style + ' *:([^;]*)').search
  73.     removed = []
  74.     match = search(s)
  75.     while match:
  76.         removed.append(match.groups(1)[0].strip())
  77.         (i, j) = match.span()
  78.         s = s[:i] + s[j + 1:]
  79.         match = search(s)
  80.     return (s.strip(), removed)
  81.  
  82.  
  83. def remove_styles(tree, styles):
  84.     for tag in tree.getroottree().iterfind('//*[@style]'):
  85.         for style in styles:
  86.             attrib = tag.attrib
  87.             (attrib['style'], removed) = remove_style(attrib['style'], style)
  88.         
  89.     
  90.  
  91.  
  92. def transform_text(tree, func, raise_exceptions = False):
  93.     for textelem in tree.getroottree().xpath('//*/text()'):
  94.         
  95.         try:
  96.             newtext = func(textelem)
  97.         except Exception:
  98.             if raise_exceptions:
  99.                 raise 
  100.             raise_exceptions
  101.             traceback.print_exc()
  102.             continue
  103.  
  104.         textelem.getparent().text = newtext
  105.     
  106.  
  107.