Maximum CD 2011 January

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2011 January / maximum-cd-2011-01.iso / DiscContents / calibre-0.7.26.msi / file_1408 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-10-31 | 4.3 KB | 107 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import re from calibre.constants import preferred_encoding from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, CData, Comment, Declaration, ProcessingInstruction from calibre import prepare_string_for_xml from calibre.utils.html2text import html2text from calibre.ebooks.markdown import markdown lost_cr_pat = re.compile('([a-z])([\\.\\?!])([A-Z])') lost_cr_exception_pat = re.compile('(Ph\\.D)|(D\\.Phil)|((Dr|Mr|Mrs|Ms)\\.[A-Z])') sanitize_pat = re.compile('<script|<table|<tr|<td|<th|<style|<iframe', re.IGNORECASE) def comments_to_html(comments): if not comments: return u'' if not isinstance(comments, unicode): comments = comments.decode(preferred_encoding, 'replace') if '<' not in comments: comments = prepare_string_for_xml(comments) parts = [ u'%s' % x.replace(u'\n', u' ') for x in comments.split('\n\n') ] return '\n'.join(parts) comments = lost_cr_exception_pat.sub((lambda m: m.group().replace('.', '.\r')), comments) for lost_cr in lost_cr_pat.finditer(comments): comments = comments.replace(lost_cr.group(), '%s%s\n\n%s' % (lost_cr.group(1), lost_cr.group(2), lost_cr.group(3))) comments = comments.replace(u'\r', u'') comments = comments.replace(u'\n\n', u'') comments = comments.replace(u'\n', ' ') comments = comments.replace('--', '—') soup = BeautifulSoup(comments) result = BeautifulSoup() rtc = 0 open_pTag = False all_tokens = list(soup.contents) for token in all_tokens: if type(token) is NavigableString: if not open_pTag: pTag = Tag(result, 'p') open_pTag = True ptc = 0 pTag.insert(ptc, prepare_string_for_xml(token)) ptc += 1 continue if type(token) in (CData, Comment, Declaration, ProcessingInstruction): continue continue if token.name in ('br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a', 'hr'): if not open_pTag: pTag = Tag(result, 'p') open_pTag = True ptc = 0 pTag.insert(ptc, token) ptc += 1 continue if open_pTag: result.insert(rtc, pTag) rtc += 1 open_pTag = False ptc = 0 result.insert(rtc, token) rtc += 1 if open_pTag: result.insert(rtc, pTag) for p in result.findAll('p'): p['class'] = 'description' for t in result.findAll(text = True): t.replaceWith(prepare_string_for_xml(unicode(t))) return result.renderContents(encoding = None) def sanitize_comments_html(html): text = html2text(html) md = markdown.Markdown(safe_mode = True) return md.convert(text) def test(): for pat, val in [ ('lineone\n\nlinetwo', 'lineone\nlinetwo'), ('a b&c\nf', 'a b&c; f'), ('a <?xml asd> b\n\ncd', 'a bcd')]: print print 'Testing: %r' % pat cval = comments_to_html(pat) print 'Value: %r' % cval if comments_to_html(pat) != val: print 'FAILED' break continue if __name__ == '__main__': test()