Maximum CD 2010 November

home *** CD-ROM | disk | FTP | other *** search

/ Maximum CD 2010 November / maximum-cd-2010-11.iso / DiscContents / calibre-0.7.13.msi / file_939 (.txt) < prev next >

Wrap

Python Compiled Bytecode | 2010-08-06 | 46.8 KB | 1,408 lines

# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) version = '1.7' version_info = (1, 7, 0, 'rc-2') __revision__ = '$Rev: 72 $' import re import sys import codecs from logging import getLogger, StreamHandler, Formatter, DEBUG, INFO, WARN, ERROR, CRITICAL MESSAGE_THRESHOLD = CRITICAL logger = getLogger('MARKDOWN') logger.setLevel(DEBUG) console_hndlr = StreamHandler() formatter = Formatter('%(name)s-%(levelname)s: "%(message)s"') console_hndlr.setFormatter(formatter) console_hndlr.setLevel(MESSAGE_THRESHOLD) logger.addHandler(console_hndlr) def message(level, text): logger.log(level, text) TAB_LENGTH = 4 ENABLE_ATTRIBUTES = True SMART_EMPHASIS = 1 HTML_REMOVED_TEXT = '[HTML_REMOVED]' RTL_BIDI_RANGES = ((u'╓É', u'▀┐'), (u'Γ┤░', u'Γ╡┐')) BOMS = { 'utf-8': (codecs.BOM_UTF8,), 'utf-16': (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE) } def removeBOM(text, encoding): convert = isinstance(text, unicode) for bom in BOMS[encoding]: if not convert or bom.decode(encoding): pass bom = bom if text.startswith(bom): return text.lstrip(bom) return text EXECUTABLE_NAME_FOR_USAGE = 'python markdown.py' HTML_PLACEHOLDER_PREFIX = 'qaodmasdkwaspemas' HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + '%dajkqlsmdqpakldnzsdfls' BLOCK_LEVEL_ELEMENTS = [ 'p', 'div', 'blockquote', 'pre', 'table', 'dl', 'ol', 'ul', 'script', 'noscript', 'form', 'fieldset', 'iframe', 'math', 'ins', 'del', 'hr', 'hr/', 'style'] def isBlockLevel(tag): if tag in BLOCK_LEVEL_ELEMENTS and tag[0] == 'h': pass return tag[1] in '0123456789' ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile('&'), '&'), (re.compile('<'), '<'), (re.compile('>'), '>')] ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [ (re.compile('&(?!\\#)'), '&'), (re.compile('<'), '<'), (re.compile('>'), '>'), (re.compile('"'), '"')] def getBidiType(text): if not text: return None ch = text[0] if not isinstance(ch, unicode) or not ch.isalpha(): return None for min, max in RTL_BIDI_RANGES: if ch >= min and ch <= max: return 'rtl' else: return 'ltr' return ch <= max class Document: def __init__(self): self.bidi = 'ltr' def appendChild(self, child): self.documentElement = child child.isDocumentElement = True child.parent = self self.entities = { } def setBidi(self, bidi): if bidi: self.bidi = bidi def createElement(self, tag, textNode = None): el = Element(tag) el.doc = self if textNode: el.appendChild(self.createTextNode(textNode)) return el def createTextNode(self, text): node = TextNode(text) node.doc = self return node def createEntityReference(self, entity): if entity not in self.entities: self.entities[entity] = EntityReference(entity) return self.entities[entity] def createCDATA(self, text): node = CDATA(text) node.doc = self return node def toxml(self): return self.documentElement.toxml() def normalizeEntities(self, text, avoidDoubleNormalizing = False): if avoidDoubleNormalizing: regexps = ENTITY_NORMALIZATION_EXPRESSIONS_SOFT else: regexps = ENTITY_NORMALIZATION_EXPRESSIONS for regexp, substitution in regexps: text = regexp.sub(substitution, text) return text def find(self, test): return self.documentElement.find(test) def unlink(self): self.documentElement.unlink() self.documentElement = None class CDATA: type = 'cdata' def __init__(self, text): self.text = text def handleAttributes(self): pass def toxml(self): return '<![CDATA[' + self.text + ']]>' class Element: type = 'element' def __init__(self, tag): self.nodeName = tag self.attributes = [] self.attribute_values = { } self.childNodes = [] self.bidi = None self.isDocumentElement = False def setBidi(self, bidi): if bidi: orig_bidi = self.bidi if not (self.bidi) or self.isDocumentElement: self.bidi = bidi self.parent.setBidi(bidi) def unlink(self): for child in self.childNodes: if child.type == 'element': child.unlink() continue self.childNodes = None def setAttribute(self, attr, value): if attr not in self.attributes: self.attributes.append(attr) self.attribute_values[attr] = value def insertChild(self, position, child): self.childNodes.insert(position, child) child.parent = self def removeChild(self, child): self.childNodes.remove(child) def replaceChild(self, oldChild, newChild): position = self.childNodes.index(oldChild) self.removeChild(oldChild) self.insertChild(position, newChild) def appendChild(self, child): self.childNodes.append(child) child.parent = self def handleAttributes(self): pass def find(self, test, depth = 0): matched_nodes = [] for child in self.childNodes: if test(child): matched_nodes.append(child) if child.type == 'element': matched_nodes += child.find(test, depth + 1) continue return matched_nodes def toxml(self): if ENABLE_ATTRIBUTES: for child in self.childNodes: child.handleAttributes() buffer = '' if self.nodeName in ('h1', 'h2', 'h3', 'h4'): buffer += '\n' elif self.nodeName in ('li',): buffer += '\n ' childBuffer = '' if self.childNodes or self.nodeName in ('blockquote',): childBuffer += '>' for child in self.childNodes: childBuffer += child.toxml() if self.nodeName == 'p': childBuffer += '\n' elif self.nodeName == 'li': childBuffer += '\n ' childBuffer += '</%s>' % self.nodeName else: childBuffer += '/>' buffer += '<' + self.nodeName if self.nodeName in ('p', 'li', 'ul', 'ol', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'): if not self.attribute_values.has_key('dir'): if self.bidi: bidi = self.bidi else: bidi = self.doc.bidi if bidi == 'rtl': self.setAttribute('dir', 'rtl') for attr in self.attributes: value = self.attribute_values[attr] value = self.doc.normalizeEntities(value, avoidDoubleNormalizing = True) buffer += ' %s="%s"' % (attr, value) buffer += childBuffer if self.nodeName in ('p', 'br ', 'li', 'ul', 'ol', 'h1', 'h2', 'h3', 'h4'): buffer += '\n' return buffer class TextNode: type = 'text' attrRegExp = re.compile('\\{@([^\\}]*)=([^\\}]*)}') def __init__(self, text): self.value = text def attributeCallback(self, match): self.parent.setAttribute(match.group(1), match.group(2)) def handleAttributes(self): self.value = self.attrRegExp.sub(self.attributeCallback, self.value) def toxml(self): text = self.value self.parent.setBidi(getBidiType(text)) if not text.startswith(HTML_PLACEHOLDER_PREFIX): if self.parent.nodeName == 'p': text = text.replace('\n', '\n ') elif self.parent.nodeName == 'li' and self.parent.childNodes[0] == self: text = '\n ' + text.replace('\n', '\n ') text = self.doc.normalizeEntities(text) return text class EntityReference: type = 'entity_ref' def __init__(self, entity): self.entity = entity def handleAttributes(self): pass def toxml(self): return '&' + self.entity + ';' class TextPreprocessor: def run(self, text): pass class Preprocessor: def run(self, lines): pass class HtmlBlockPreprocessor(TextPreprocessor): def _get_left_tag(self, block): return block[1:].replace('>', ' ', 1).split()[0].lower() def _get_right_tag(self, left_tag, block): return block.rstrip()[-len(left_tag) - 2:-1].lower() def _equal_tags(self, left_tag, right_tag): if left_tag == 'div' or left_tag[0] in ('?', '@', '%'): return True if '/' + left_tag == right_tag: return True if right_tag == '--' and left_tag == '--': return True if left_tag == right_tag[1:] and right_tag[0] != '<': return True return False def _is_oneliner(self, tag): return tag in ('hr', 'hr/') def run(self, text): new_blocks = [] text = text.split('\n\n') items = [] left_tag = '' right_tag = '' in_tag = False for block in text: if block.startswith('\n'): block = block[1:] if not in_tag: if block.startswith('<'): left_tag = self._get_left_tag(block) right_tag = self._get_right_tag(left_tag, block) if not isBlockLevel(left_tag) or block[1] in ('!', '?', '@', '%'): new_blocks.append(block) continue if self._is_oneliner(left_tag): new_blocks.append(block.strip()) continue if block[1] == '!': left_tag = '--' right_tag = self._get_right_tag(left_tag, block) if block.rstrip().endswith('>') and self._equal_tags(left_tag, right_tag): new_blocks.append(self.stash.store(block.strip())) continue else: items.append(block.strip()) in_tag = True new_blocks.append(block) continue items.append(block.strip()) right_tag = self._get_right_tag(left_tag, block) if self._equal_tags(left_tag, right_tag): in_tag = False new_blocks.append(self.stash.store('\n\n'.join(items))) items = [] continue if items: new_blocks.append(self.stash.store('\n\n'.join(items))) new_blocks.append('\n') return '\n\n'.join(new_blocks) HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor() class HeaderPreprocessor(Preprocessor): def run(self, lines): i = -1 while i + 1 < len(lines): i = i + 1 if not lines[i].strip(): continue if lines[i].startswith('#'): lines.insert(i + 1, '\n') if i + 1 <= len(lines) and lines[i + 1] and lines[i + 1][0] in ('-', '='): underline = lines[i + 1].strip() if underline == '=' * len(underline): lines[i] = '# ' + lines[i].strip() lines[i + 1] = '' elif underline == '-' * len(underline): lines[i] = '## ' + lines[i].strip() lines[i + 1] = '' underline == '=' * len(underline) return lines HEADER_PREPROCESSOR = HeaderPreprocessor() class LinePreprocessor(Preprocessor): blockquote_re = re.compile('^(> )+') def run(self, lines): for i in range(len(lines)): prefix = '' m = self.blockquote_re.search(lines[i]) if m: prefix = m.group(0) if self._isLine(lines[i][len(prefix):]): lines[i] = prefix + self.stash.store('<hr />', safe = True) continue return lines def _isLine(self, block): if block.startswith(' '): return 0 text = [](_[1]) if len(text) <= 2: return 0 for pattern in [ 'isline1', 'isline2', 'isline3']: m = RE.regExp[pattern].match(text) if m and m.group(1): return 1 else: return 0 return m.group(1) LINE_PREPROCESSOR = LinePreprocessor() class ReferencePreprocessor(Preprocessor): def run(self, lines): new_text = [] for line in lines: m = RE.regExp['reference-def'].match(line) if m: id = m.group(2).strip().lower() t = m.group(4).strip() if not t: self.references[id] = (m.group(3), t) elif len(t) >= 2: if t[-1] == t[-1]: pass elif not t[-1] == '"': if t[-1] == t[-1]: pass elif (t[-1] == "'" or t[0] == '(') and t[-1] == ')': self.references[id] = (m.group(3), t[1:-1]) else: new_text.append(line) t[-1] == ')' new_text.append(line) return new_text REFERENCE_PREPROCESSOR = ReferencePreprocessor() NOBRACKET = '[^\\]\\[]*' BRK = '\\[(' + (NOBRACKET + '(\\[') * 6 + (NOBRACKET + '\\])*') * 6 + NOBRACKET + ')\\]' NOIMG = '(?<!\\!)' BACKTICK_RE = '\\`([^\\`]*)\\`' DOUBLE_BACKTICK_RE = '\\`\\`(.*)\\`\\`' ESCAPE_RE = '\\\$.)' EMPHASIS_RE = '\\*([^\\*]*)\\*' STRONG_RE = '\\*\\*(.*)\\*\\*' STRONG_EM_RE = '\\*\\*\\*([^_]*)\\*\\*\\*' if SMART_EMPHASIS: EMPHASIS_2_RE = '(?<!\\S)_(\\S[^_]*)_' else: EMPHASIS_2_RE = '_([^_]*)_' STRONG_2_RE = '__([^_]*)__' STRONG_EM_2_RE = '___([^_]*)___' LINK_RE = NOIMG + BRK + '\\s*\\(([^\$]*)\\)' LINK_ANGLED_RE = NOIMG + BRK + '\\s*\$<([^\$]*)>\\)' IMAGE_LINK_RE = '\\!' + BRK + '\\s*\$([^\$]*)\\)' REFERENCE_RE = NOIMG + BRK + '\\s*\\[([^\\]]*)\\]' IMAGE_REFERENCE_RE = '\\!' + BRK + '\\s*\\[([^\\]]*)\\]' NOT_STRONG_RE = '( \\* )' AUTOLINK_RE = '<(http://[^>]*)>' AUTOMAIL_RE = '<([^> \\!]*@[^> ]*)>' HTML_RE = '(\\<[a-zA-Z/][^\\>]*\\>)' ENTITY_RE = '(&[\\#a-zA-Z0-9]*;)' LINE_BREAK_RE = ' \\n' LINE_BREAK_2_RE = ' $' class Pattern: def __init__(self, pattern): self.pattern = pattern self.compiled_re = re.compile('^(.*)%s(.*)$' % pattern, re.DOTALL) def getCompiledRegExp(self): return self.compiled_re BasePattern = Pattern class SimpleTextPattern(Pattern): def handleMatch(self, m, doc): return doc.createTextNode(m.group(2)) class SimpleTagPattern(Pattern): def __init__(self, pattern, tag): Pattern.__init__(self, pattern) self.tag = tag def handleMatch(self, m, doc): el = doc.createElement(self.tag) el.appendChild(doc.createTextNode(m.group(2))) return el class SubstituteTagPattern(SimpleTagPattern): def handleMatch(self, m, doc): return doc.createElement(self.tag) class BacktickPattern(Pattern): def __init__(self, pattern): Pattern.__init__(self, pattern) self.tag = 'code' def handleMatch(self, m, doc): el = doc.createElement(self.tag) text = m.group(2).strip() el.appendChild(doc.createTextNode(text)) return el class DoubleTagPattern(SimpleTagPattern): def handleMatch(self, m, doc): (tag1, tag2) = self.tag.split(',') el1 = doc.createElement(tag1) el2 = doc.createElement(tag2) el1.appendChild(el2) el2.appendChild(doc.createTextNode(m.group(2))) return el1 class HtmlPattern(Pattern): def handleMatch(self, m, doc): rawhtml = m.group(2) inline = True place_holder = self.stash.store(rawhtml) return doc.createTextNode(place_holder) class LinkPattern(Pattern): def handleMatch(self, m, doc): el = doc.createElement('a') el.appendChild(doc.createTextNode(m.group(2))) parts = m.group(9).split('"') if parts: el.setAttribute('href', parts[0].strip()) else: el.setAttribute('href', '') if len(parts) > 1: title = '"' + '"'.join(parts[1:]).strip() title = dequote(title) el.setAttribute('title', title) return el class ImagePattern(Pattern): def handleMatch(self, m, doc): el = doc.createElement('img') src_parts = m.group(9).split() if src_parts: el.setAttribute('src', src_parts[0]) else: el.setAttribute('src', '') if len(src_parts) > 1: el.setAttribute('title', dequote(' '.join(src_parts[1:]))) if ENABLE_ATTRIBUTES: text = doc.createTextNode(m.group(2)) el.appendChild(text) text.handleAttributes() truealt = text.value el.childNodes.remove(text) else: truealt = m.group(2) el.setAttribute('alt', truealt) return el class ReferencePattern(Pattern): def handleMatch(self, m, doc): if m.group(9): id = m.group(9).lower() else: id = m.group(2).lower() if not self.references.has_key(id): return None (href, title) = self.references[id] text = m.group(2) return self.makeTag(href, title, text, doc) def makeTag(self, href, title, text, doc): el = doc.createElement('a') el.setAttribute('href', href) if title: el.setAttribute('title', title) el.appendChild(doc.createTextNode(text)) return el class ImageReferencePattern(ReferencePattern): def makeTag(self, href, title, text, doc): el = doc.createElement('img') el.setAttribute('src', href) if title: el.setAttribute('title', title) el.setAttribute('alt', text) return el class AutolinkPattern(Pattern): def handleMatch(self, m, doc): el = doc.createElement('a') el.setAttribute('href', m.group(2)) el.appendChild(doc.createTextNode(m.group(2))) return el class AutomailPattern(Pattern): def handleMatch(self, m, doc): el = doc.createElement('a') email = m.group(2) if email.startswith('mailto:'): email = email[len('mailto:'):] for letter in email: entity = doc.createEntityReference('#%d' % ord(letter)) el.appendChild(entity) mailto = 'mailto:' + email mailto = []([ '&#%d;' % ord(letter) for letter in mailto ]) el.setAttribute('href', mailto) return el ESCAPE_PATTERN = SimpleTextPattern(ESCAPE_RE) NOT_STRONG_PATTERN = SimpleTextPattern(NOT_STRONG_RE) BACKTICK_PATTERN = BacktickPattern(BACKTICK_RE) DOUBLE_BACKTICK_PATTERN = BacktickPattern(DOUBLE_BACKTICK_RE) STRONG_PATTERN = SimpleTagPattern(STRONG_RE, 'strong') STRONG_PATTERN_2 = SimpleTagPattern(STRONG_2_RE, 'strong') EMPHASIS_PATTERN = SimpleTagPattern(EMPHASIS_RE, 'em') EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em') STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em') STRONG_EM_PATTERN_2 = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em') LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br ') LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br ') LINK_PATTERN = LinkPattern(LINK_RE) LINK_ANGLED_PATTERN = LinkPattern(LINK_ANGLED_RE) IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE) IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE) REFERENCE_PATTERN = ReferencePattern(REFERENCE_RE) HTML_PATTERN = HtmlPattern(HTML_RE) ENTITY_PATTERN = HtmlPattern(ENTITY_RE) AUTOLINK_PATTERN = AutolinkPattern(AUTOLINK_RE) AUTOMAIL_PATTERN = AutomailPattern(AUTOMAIL_RE) class Postprocessor: def run(self, dom): pass class TextPostprocessor: def run(self, text): pass class RawHtmlTextPostprocessor(TextPostprocessor): def __init__(self): pass def run(self, text): for i in range(self.stash.html_counter): (html, safe) = self.stash.rawHtmlBlocks[i] if self.safeMode and not safe: if str(self.safeMode).lower() == 'escape': html = self.escape(html) elif str(self.safeMode).lower() == 'remove': html = '' else: html = HTML_REMOVED_TEXT text = text.replace('<p>%s\n</p>' % HTML_PLACEHOLDER % i, html + '\n') text = text.replace(HTML_PLACEHOLDER % i, html) return text def escape(self, html): html = html.replace('&', '&') html = html.replace('<', '<') html = html.replace('>', '>') return html.replace('"', '"') RAWHTMLTEXTPOSTPROCESSOR = RawHtmlTextPostprocessor() class HtmlStash: def __init__(self): self.html_counter = 0 self.rawHtmlBlocks = [] def store(self, html, safe = False): self.rawHtmlBlocks.append((html, safe)) placeholder = HTML_PLACEHOLDER % self.html_counter self.html_counter += 1 return placeholder class BlockGuru: def _findHead(self, lines, fn, allowBlank = 0): items = [] item = -1 i = 0 for line in lines: if not line.strip() and not allowBlank: return (items, lines[i:]) if not line.strip() and allowBlank: i += 1 for j in range(i, len(lines)): if lines[j].strip(): next = lines[j] break continue not allowBlank else: break part = fn(next) if part: items.append('') continue else: break part = fn(line) if part: items.append(part) i += 1 continue continue return (items, lines[i:]) else: i += 1 return (items, lines[i:]) def detabbed_fn(self, line): m = RE.regExp['tabbed'].match(line) if m: return m.group(4) return None def detectTabbed(self, lines): return self._findHead(lines, self.detabbed_fn, allowBlank = 1) def print_error(string): sys.stderr.write(string + '\n') def dequote(string): if (string.startswith('"') or string.endswith('"') or string.startswith("'")) and string.endswith("'"): return string[1:-1] return string class CorePatterns: patterns = { 'header': '(#*)([^#]*)(#*)', 'reference-def': '(\\ ?\\ ?\\ ?)\\[([^\\]]*)\\]:\\s*([^ ]*)(.*)', 'containsline': '([-]*)$|^([=]*)', 'ol': '[ ]{0,3}[\\d]*\\.\\s+(.*)', 'ul': '[ ]{0,3}[*+-]\\s+(.*)', 'isline1': '(\\**)', 'isline2': '(\\-*)', 'isline3': '(\\_*)', 'tabbed': '((\\t)|( ))(.*)', 'quoted': '> ?(.*)' } def __init__(self): self.regExp = { } for key in self.patterns.keys(): self.regExp[key] = re.compile('^%s$' % self.patterns[key], re.DOTALL) self.regExp['containsline'] = re.compile('^([-]*)$|^([=]*)$', re.M) RE = CorePatterns() class Markdown: def __init__(self, source = None, extensions = [], extension_configs = None, safe_mode = False): self.source = source if source is not None: message(WARN, 'The `source` arg of Markdown.__init__() is depreciated and will be removed in the future. Use `instance.convert(source)` instead.') self.safeMode = safe_mode self.blockGuru = BlockGuru() self.registeredExtensions = [] self.stripTopLevelTags = 1 self.docType = '' self.textPreprocessors = [ HTML_BLOCK_PREPROCESSOR] self.preprocessors = [ HEADER_PREPROCESSOR, LINE_PREPROCESSOR, REFERENCE_PREPROCESSOR] self.postprocessors = [] self.textPostprocessors = [ RAWHTMLTEXTPOSTPROCESSOR] self.prePatterns = [] self.inlinePatterns = [ DOUBLE_BACKTICK_PATTERN, BACKTICK_PATTERN, ESCAPE_PATTERN, REFERENCE_PATTERN, LINK_ANGLED_PATTERN, LINK_PATTERN, IMAGE_LINK_PATTERN, IMAGE_REFERENCE_PATTERN, AUTOLINK_PATTERN, AUTOMAIL_PATTERN, LINE_BREAK_PATTERN, HTML_PATTERN, ENTITY_PATTERN, NOT_STRONG_PATTERN, STRONG_EM_PATTERN, STRONG_EM_PATTERN_2, STRONG_PATTERN, STRONG_PATTERN_2, EMPHASIS_PATTERN, EMPHASIS_PATTERN_2] self.registerExtensions(extensions = extensions, configs = extension_configs) self.reset() def registerExtensions(self, extensions, configs): if not configs: configs = { } for ext in extensions: extension_module_name = 'calibre.ebooks.markdown.mdx_' + ext try: module = sys.modules[extension_module_name] except: message(CRITICAL, "couldn't load extension %s (looking for %s module)" % (ext, extension_module_name)) continue if configs.has_key(ext): configs_for_ext = configs[ext] else: configs_for_ext = [] extension = module.makeExtension(configs_for_ext) extension.extendMarkdown(self, globals()) def registerExtension(self, extension): self.registeredExtensions.append(extension) def reset(self): self.references = { } self.htmlStash = HtmlStash() HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash LINE_PREPROCESSOR.stash = self.htmlStash REFERENCE_PREPROCESSOR.references = self.references HTML_PATTERN.stash = self.htmlStash ENTITY_PATTERN.stash = self.htmlStash REFERENCE_PATTERN.references = self.references IMAGE_REFERENCE_PATTERN.references = self.references RAWHTMLTEXTPOSTPROCESSOR.stash = self.htmlStash RAWHTMLTEXTPOSTPROCESSOR.safeMode = self.safeMode for extension in self.registeredExtensions: extension.reset() def _transform(self): self.doc = Document() self.top_element = self.doc.createElement('span') self.top_element.appendChild(self.doc.createTextNode('\n')) self.top_element.setAttribute('class', 'markdown') self.doc.appendChild(self.top_element) text = self.source text = text.replace('\r\n', '\n').replace('\r', '\n') text += '\n\n' text = text.expandtabs(TAB_LENGTH) self.lines = text.split('\n') for prep in self.preprocessors: self.lines = prep.run(self.lines) buffer = [] for line in self.lines: if line.startswith('#'): self._processSection(self.top_element, buffer) buffer = [ line] continue buffer.append(line) self._processSection(self.top_element, buffer) self.top_element.appendChild(self.doc.createTextNode('\n')) for postprocessor in self.postprocessors: postprocessor.run(self.doc) return self.doc def _processSection(self, parent_elem, lines, inList = 0, looseList = 0): while lines: processFn = { 'ul': self._processUList, 'ol': self._processOList, 'quoted': self._processQuote, 'tabbed': self._processCodeBlock } for regexp in [ 'ul', 'ol', 'quoted', 'tabbed']: m = RE.regExp[regexp].match(lines[0]) if m: processFn[regexp](parent_elem, lines, inList) return None if inList: (start, lines) = self._linesUntil(lines, (lambda line: if not RE.regExp['ul'].match(line) and RE.regExp['ol'].match(line): passnot line.strip())) self._processSection(parent_elem, start, inList - 1, looseList = looseList) inList = inList - 1 else: (paragraph, lines) = self._linesUntil(lines, (lambda line: not line.strip())) if len(paragraph) and paragraph[0].startswith('#'): self._processHeader(parent_elem, paragraph) elif paragraph: self._processParagraph(parent_elem, paragraph, inList, looseList) if lines and not lines[0].strip(): lines = lines[1:] continue def _processHeader(self, parent_elem, paragraph): m = RE.regExp['header'].match(paragraph[0]) if m: level = len(m.group(1)) h = self.doc.createElement('h%d' % level) parent_elem.appendChild(h) for item in self._handleInline(m.group(2).strip()): h.appendChild(item) else: message(CRITICAL, "We've got a problem header!") def _processParagraph(self, parent_elem, paragraph, inList, looseList): list = self._handleInline('\n'.join(paragraph)) if parent_elem.nodeName == 'li': if not looseList: pass if not (parent_elem.childNodes): el = parent_elem else: el = self.doc.createElement('p') parent_elem.appendChild(el) for item in list: el.appendChild(item) def _processUList(self, parent_elem, lines, inList): self._processList(parent_elem, lines, inList, listexpr = 'ul', tag = 'ul') def _processOList(self, parent_elem, lines, inList): self._processList(parent_elem, lines, inList, listexpr = 'ol', tag = 'ol') def _processList(self, parent_elem, lines, inList, listexpr, tag): ul = self.doc.createElement(tag) parent_elem.appendChild(ul) looseList = 0 items = [] item = -1 i = 0 for line in lines: loose = 0 if not line.strip(): i += 1 loose = 1 for j in range(i, len(lines)): if lines[j].strip(): next = lines[j] break continue else: break if RE.regExp['ul'].match(next) and RE.regExp['ol'].match(next) or RE.regExp['tabbed'].match(next): items[item].append(line.strip()) if not loose: pass looseList = looseList continue else: break for expr in [ 'ul', 'ol', 'tabbed']: m = RE.regExp[expr].match(line) if m: if expr in ('ul', 'ol'): items.append([ m.group(1)]) item += 1 elif expr == 'tabbed': items[item].append(m.group(4)) i += 1 break continue else: i += 1 else: i += 1 for item in items: li = self.doc.createElement('li') ul.appendChild(li) self._processSection(li, item, inList + 1, looseList = looseList) self._processSection(parent_elem, lines[i:], inList) def _linesUntil(self, lines, condition): i = -1 for line in lines: i += 1 if condition(line): break continue else: i += 1 return (lines[:i], lines[i:]) def _processQuote(self, parent_elem, lines, inList): dequoted = [] i = 0 blank_line = False for line in lines: m = RE.regExp['quoted'].match(line) if m: dequoted.append(m.group(1)) i += 1 blank_line = False continue if not blank_line and line.strip() != '': dequoted.append(line) i += 1 continue if not blank_line and line.strip() == '': dequoted.append(line) i += 1 blank_line = True continue blockquote = self.doc.createElement('blockquote') parent_elem.appendChild(blockquote) self._processSection(blockquote, dequoted, inList) self._processSection(parent_elem, lines[i:], inList) def _processCodeBlock(self, parent_elem, lines, inList): (detabbed, theRest) = self.blockGuru.detectTabbed(lines) pre = self.doc.createElement('pre') code = self.doc.createElement('code') parent_elem.appendChild(pre) pre.appendChild(code) text = '\n'.join(detabbed).rstrip() + '\n' code.appendChild(self.doc.createTextNode(text)) self._processSection(parent_elem, theRest, inList) def _handleInline(self, line, patternIndex = 0): parts = [ line] while patternIndex < len(self.inlinePatterns): i = 0 while i < len(parts): x = parts[i] if isinstance(x, (str, unicode)): result = self._applyPattern(x, self.inlinePatterns[patternIndex], patternIndex) if result: i -= 1 parts.remove(x) for y in result: parts.insert(i + 1, y) i += 1 patternIndex += 1 for i in range(len(parts)): x = parts[i] if isinstance(x, (str, unicode)): parts[i] = self.doc.createTextNode(x) continue return parts def _applyPattern(self, line, pattern, patternIndex): m = pattern.getCompiledRegExp().match(line) if not m: return None node = pattern.handleMatch(m, self.doc) if node: return (m.groups()[-1], node, m.group(1)) return None def convert(self, source = None): if source is not None: self.source = source if not self.source: return u'' try: self.source = unicode(self.source) except UnicodeDecodeError: self.source self.source message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.') return u'' for pp in self.textPreprocessors: self.source = pp.run(self.source) doc = self._transform() xml = doc.toxml() if self.stripTopLevelTags: xml = xml.strip()[23:-7] + '\n' for pp in self.textPostprocessors: xml = pp.run(xml) return (self.docType + xml).strip() def __str__(self): if self.source is None: status = 'in which no source text has been assinged.' else: status = 'which contains %d chars and %d line(s) of source.' % (len(self.source), self.source.count('\n') + 1) return 'An instance of "%s" %s' % (self.__class__, status) __unicode__ = convert def markdownFromFile(input = None, output = None, extensions = [], encoding = None, message_threshold = CRITICAL, safe = False): console_hndlr.setLevel(message_threshold) message(DEBUG, 'input file: %s' % input) if not encoding: encoding = 'utf-8' input_file = codecs.open(input, mode = 'r', encoding = encoding) text = input_file.read() input_file.close() text = removeBOM(text, encoding) new_text = markdown(text, extensions, safe_mode = safe) if output: output_file = codecs.open(output, 'w', encoding = encoding) output_file.write(new_text) output_file.close() else: sys.stdout.write(new_text.encode(encoding)) def markdown(text, extensions = [], safe_mode = False): message(DEBUG, 'in markdown.markdown(), received text:\n%s' % text) extension_names = [] extension_configs = { } for ext in extensions: pos = ext.find('(') if pos == -1: extension_names.append(ext) continue name = ext[:pos] extension_names.append(name) pairs = [ x.split('=') for x in ext[pos + 1:-1].split(',') ] configs = [ (x.strip(), y.strip()) for x, y in pairs ] extension_configs[name] = configs md = Markdown(extensions = extension_names, extension_configs = extension_configs, safe_mode = safe_mode) return md.convert(text) class Extension: def __init__(self, configs = { }): self.config = configs def getConfig(self, key): if self.config.has_key(key): return self.config[key][0] return '' def getConfigInfo(self): return [ (key, self.config[key][1]) for key in self.config.keys() ] def setConfig(self, key, value): self.config[key][0] = value OPTPARSE_WARNING = '\nPython 2.3 or higher required for advanced command line options.\nFor lower versions of Python use:\n\n %s INPUT_FILE > OUTPUT_FILE\n \n' % EXECUTABLE_NAME_FOR_USAGE def parse_options(): try: optparse = __import__('optparse') except: if len(sys.argv) == 2: return { 'input': sys.argv[1], 'output': None, 'message_threshold': CRITICAL, 'safe': False, 'extensions': [], 'encoding': None } print OPTPARSE_WARNING return None parser = optparse.OptionParser(usage = '%prog INPUTFILE [options]') parser.add_option('-f', '--file', dest = 'filename', help = 'write output to OUTPUT_FILE', metavar = 'OUTPUT_FILE') parser.add_option('-e', '--encoding', dest = 'encoding', help = 'encoding for input and output files') parser.add_option('-q', '--quiet', default = CRITICAL, action = 'store_const', const = 60, dest = 'verbose', help = 'suppress all messages') parser.add_option('-v', '--verbose', action = 'store_const', const = INFO, dest = 'verbose', help = 'print info messages') parser.add_option('-s', '--safe', dest = 'safe', default = False, metavar = 'SAFE_MODE', help = "same mode ('replace', 'remove' or 'escape' user's HTML tag)") parser.add_option('--noisy', action = 'store_const', const = DEBUG, dest = 'verbose', help = 'print debug messages') parser.add_option('-x', '--extension', action = 'append', dest = 'extensions', help = 'load extension EXTENSION', metavar = 'EXTENSION') (options, args) = parser.parse_args() if not len(args) == 1: parser.print_help() return None input_file = args[0] if not options.extensions: options.extensions = [] return { 'input': input_file, 'output': options.filename, 'message_threshold': options.verbose, 'safe': options.safe, 'extensions': options.extensions, 'encoding': options.encoding } def main(): options = parse_options() if not options: sys.exit(0) markdownFromFile(**options) if __name__ == '__main__': sys.exit(main())