home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- __license__ = 'GPL v3'
- __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
- __docformat__ = 'restructuredtext en'
- import re
- from calibre.constants import preferred_encoding
- from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, CData, Comment, Declaration, ProcessingInstruction
- from calibre import prepare_string_for_xml
- from calibre.utils.html2text import html2text
- from calibre.ebooks.markdown import markdown
- lost_cr_pat = re.compile('([a-z])([\\.\\?!])([A-Z])')
- lost_cr_exception_pat = re.compile('(Ph\\.D)|(D\\.Phil)|((Dr|Mr|Mrs|Ms)\\.[A-Z])')
- sanitize_pat = re.compile('<script|<table|<tr|<td|<th|<style|<iframe', re.IGNORECASE)
-
- def comments_to_html(comments):
- if not comments:
- return u'<p></p>'
- if not isinstance(comments, unicode):
- comments = comments.decode(preferred_encoding, 'replace')
-
- if '<' not in comments:
- comments = prepare_string_for_xml(comments)
- parts = [ u'<p class="description">%s</p>' % x.replace(u'\n', u'<br />') for x in comments.split('\n\n') ]
- return '\n'.join(parts)
- comments = lost_cr_exception_pat.sub((lambda m: m.group().replace('.', '.\r')), comments)
- for lost_cr in lost_cr_pat.finditer(comments):
- comments = comments.replace(lost_cr.group(), '%s%s\n\n%s' % (lost_cr.group(1), lost_cr.group(2), lost_cr.group(3)))
-
- comments = comments.replace(u'\r', u'')
- comments = comments.replace(u'\n\n', u'<p>')
- comments = comments.replace(u'\n', '<br />')
- comments = comments.replace('--', '—')
- soup = BeautifulSoup(comments)
- result = BeautifulSoup()
- rtc = 0
- open_pTag = False
- all_tokens = list(soup.contents)
- for token in all_tokens:
- if type(token) is NavigableString:
- if not open_pTag:
- pTag = Tag(result, 'p')
- open_pTag = True
- ptc = 0
-
- pTag.insert(ptc, prepare_string_for_xml(token))
- ptc += 1
- continue
- if type(token) in (CData, Comment, Declaration, ProcessingInstruction):
- continue
- continue
- if token.name in ('br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a', 'hr'):
- if not open_pTag:
- pTag = Tag(result, 'p')
- open_pTag = True
- ptc = 0
-
- pTag.insert(ptc, token)
- ptc += 1
- continue
- if open_pTag:
- result.insert(rtc, pTag)
- rtc += 1
- open_pTag = False
- ptc = 0
-
- result.insert(rtc, token)
- rtc += 1
-
- if open_pTag:
- result.insert(rtc, pTag)
-
- for p in result.findAll('p'):
- p['class'] = 'description'
-
- for t in result.findAll(text = True):
- t.replaceWith(prepare_string_for_xml(unicode(t)))
-
- return result.renderContents(encoding = None)
-
-
- def sanitize_comments_html(html):
- text = html2text(html)
- md = markdown.Markdown(safe_mode = True)
- return md.convert(text)
-
-
- def test():
- for pat, val in [
- ('lineone\n\nlinetwo', '<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
- ('a <b>b&c</b>\nf', '<p class="description">a <b>b&c;</b><br />f</p>'),
- ('a <?xml asd> b\n\ncd', '<p class="description">a b</p><p class="description">cd</p>')]:
- print
- print 'Testing: %r' % pat
- cval = comments_to_html(pat)
- print 'Value: %r' % cval
- if comments_to_html(pat) != val:
- print 'FAILED'
- break
- continue
-
-
- if __name__ == '__main__':
- test()
-
-