home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyo (Python 2.6)
-
- import lxml.html as lxml
- import re
- import traceback
- __all__ = [
- 'render_contents',
- 'to_xhtml',
- 'remove_tags',
- 'remove_attrs',
- 'remove_styles',
- 'transform_text']
-
- def render_contents(doc, encode_text_as_xml = True):
-
- tohtml = lambda d: lxml.html.tostring(d, encoding = unicode)
-
- try:
- t = doc.text
- if encode_text_as_xml and t is not None:
- t = t.encode('xml')
-
- if not t:
- pass
- return '' + ''.join(map(tohtml, doc.getchildren()))
- except ValueError:
- return tohtml()
-
-
-
- def make_xhtml_fragment(s):
- if not s.strip():
- return s
- html = lxml.html.document_fromstring(s)
- body = html.body
- bgcolor = html.body.get('bgcolor')
- s = render_contents(body)
- if bgcolor is not None:
- return '<span style="background-color: %s;">%s</span>' % (bgcolor, render_contents(body))
- return s
-
-
- def to_xhtml(s):
- s = make_xhtml_fragment(s)
- if s.startswith('<p>') and s.endswith('</p>'):
- return s[3:-4]
- return s
-
-
- def remove_tags(tree, tagnames):
- for tagname in tagnames:
- xpath = './/' + tagname
- tag = tree.find(xpath)
- while tag is not None:
- tag.drop_tag()
- tag = tree.find(xpath)
-
- return tree
-
-
- def remove_attrs(tree, attrs):
- find = tree.getroottree().iterfind
- for attr in attrs:
- for tag in find('//*[@%s]' % attr):
- del tag.attrib[attr]
-
-
-
-
- def remove_style(s, style):
- search = re.compile('\\s*' + style + ' *:([^;]*)').search
- removed = []
- match = search(s)
- while match:
- removed.append(match.groups(1)[0].strip())
- (i, j) = match.span()
- s = s[:i] + s[j + 1:]
- match = search(s)
- return (s.strip(), removed)
-
-
- def remove_styles(tree, styles):
- for tag in tree.getroottree().iterfind('//*[@style]'):
- for style in styles:
- attrib = tag.attrib
- (attrib['style'], removed) = remove_style(attrib['style'], style)
-
-
-
-
- def transform_text(tree, func, raise_exceptions = False):
- for textelem in tree.getroottree().xpath('//*/text()'):
-
- try:
- newtext = func(textelem)
- except Exception:
- if raise_exceptions:
- raise
- raise_exceptions
- traceback.print_exc()
- continue
-
- textelem.getparent().text = newtext
-
-
-