home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- version = '1.7'
- version_info = (1, 7, 0, 'rc-2')
- __revision__ = '$Rev: 72 $'
- import re
- import sys
- import codecs
- from logging import getLogger, StreamHandler, Formatter, DEBUG, INFO, WARN, ERROR, CRITICAL
- MESSAGE_THRESHOLD = CRITICAL
- logger = getLogger('MARKDOWN')
- logger.setLevel(DEBUG)
- console_hndlr = StreamHandler()
- formatter = Formatter('%(name)s-%(levelname)s: "%(message)s"')
- console_hndlr.setFormatter(formatter)
- console_hndlr.setLevel(MESSAGE_THRESHOLD)
- logger.addHandler(console_hndlr)
-
- def message(level, text):
- logger.log(level, text)
-
- TAB_LENGTH = 4
- ENABLE_ATTRIBUTES = True
- SMART_EMPHASIS = 1
- HTML_REMOVED_TEXT = '[HTML_REMOVED]'
- RTL_BIDI_RANGES = ((u'֐', u'߿'), (u'ⴰ', u'⵿'))
- BOMS = {
- 'utf-8': (codecs.BOM_UTF8,),
- 'utf-16': (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE) }
-
- def removeBOM(text, encoding):
- convert = isinstance(text, unicode)
- for bom in BOMS[encoding]:
- if not convert or bom.decode(encoding):
- pass
- bom = bom
- if text.startswith(bom):
- return text.lstrip(bom)
-
- return text
-
- EXECUTABLE_NAME_FOR_USAGE = 'python markdown.py'
- HTML_PLACEHOLDER_PREFIX = 'qaodmasdkwaspemas'
- HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + '%dajkqlsmdqpakldnzsdfls'
- BLOCK_LEVEL_ELEMENTS = [
- 'p',
- 'div',
- 'blockquote',
- 'pre',
- 'table',
- 'dl',
- 'ol',
- 'ul',
- 'script',
- 'noscript',
- 'form',
- 'fieldset',
- 'iframe',
- 'math',
- 'ins',
- 'del',
- 'hr',
- 'hr/',
- 'style']
-
- def isBlockLevel(tag):
- if tag in BLOCK_LEVEL_ELEMENTS and tag[0] == 'h':
- pass
- return tag[1] in '0123456789'
-
- ENTITY_NORMALIZATION_EXPRESSIONS = [
- (re.compile('&'), '&'),
- (re.compile('<'), '<'),
- (re.compile('>'), '>')]
- ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [
- (re.compile('&(?!\\#)'), '&'),
- (re.compile('<'), '<'),
- (re.compile('>'), '>'),
- (re.compile('"'), '"')]
-
- def getBidiType(text):
- if not text:
- return None
- ch = text[0]
- if not isinstance(ch, unicode) or not ch.isalpha():
- return None
- for min, max in RTL_BIDI_RANGES:
- if ch >= min and ch <= max:
- return 'rtl'
- else:
- return 'ltr'
- return ch <= max
-
-
- class Document:
-
- def __init__(self):
- self.bidi = 'ltr'
-
-
- def appendChild(self, child):
- self.documentElement = child
- child.isDocumentElement = True
- child.parent = self
- self.entities = { }
-
-
- def setBidi(self, bidi):
- if bidi:
- self.bidi = bidi
-
-
-
- def createElement(self, tag, textNode = None):
- el = Element(tag)
- el.doc = self
- if textNode:
- el.appendChild(self.createTextNode(textNode))
-
- return el
-
-
- def createTextNode(self, text):
- node = TextNode(text)
- node.doc = self
- return node
-
-
- def createEntityReference(self, entity):
- if entity not in self.entities:
- self.entities[entity] = EntityReference(entity)
-
- return self.entities[entity]
-
-
- def createCDATA(self, text):
- node = CDATA(text)
- node.doc = self
- return node
-
-
- def toxml(self):
- return self.documentElement.toxml()
-
-
- def normalizeEntities(self, text, avoidDoubleNormalizing = False):
- if avoidDoubleNormalizing:
- regexps = ENTITY_NORMALIZATION_EXPRESSIONS_SOFT
- else:
- regexps = ENTITY_NORMALIZATION_EXPRESSIONS
- for regexp, substitution in regexps:
- text = regexp.sub(substitution, text)
-
- return text
-
-
- def find(self, test):
- return self.documentElement.find(test)
-
-
- def unlink(self):
- self.documentElement.unlink()
- self.documentElement = None
-
-
-
- class CDATA:
- type = 'cdata'
-
- def __init__(self, text):
- self.text = text
-
-
- def handleAttributes(self):
- pass
-
-
- def toxml(self):
- return '<![CDATA[' + self.text + ']]>'
-
-
-
- class Element:
- type = 'element'
-
- def __init__(self, tag):
- self.nodeName = tag
- self.attributes = []
- self.attribute_values = { }
- self.childNodes = []
- self.bidi = None
- self.isDocumentElement = False
-
-
- def setBidi(self, bidi):
- if bidi:
- orig_bidi = self.bidi
- if not (self.bidi) or self.isDocumentElement:
- self.bidi = bidi
- self.parent.setBidi(bidi)
-
-
-
-
- def unlink(self):
- for child in self.childNodes:
- if child.type == 'element':
- child.unlink()
- continue
-
- self.childNodes = None
-
-
- def setAttribute(self, attr, value):
- if attr not in self.attributes:
- self.attributes.append(attr)
-
- self.attribute_values[attr] = value
-
-
- def insertChild(self, position, child):
- self.childNodes.insert(position, child)
- child.parent = self
-
-
- def removeChild(self, child):
- self.childNodes.remove(child)
-
-
- def replaceChild(self, oldChild, newChild):
- position = self.childNodes.index(oldChild)
- self.removeChild(oldChild)
- self.insertChild(position, newChild)
-
-
- def appendChild(self, child):
- self.childNodes.append(child)
- child.parent = self
-
-
- def handleAttributes(self):
- pass
-
-
- def find(self, test, depth = 0):
- matched_nodes = []
- for child in self.childNodes:
- if test(child):
- matched_nodes.append(child)
-
- if child.type == 'element':
- matched_nodes += child.find(test, depth + 1)
- continue
-
- return matched_nodes
-
-
- def toxml(self):
- if ENABLE_ATTRIBUTES:
- for child in self.childNodes:
- child.handleAttributes()
-
-
- buffer = ''
- if self.nodeName in ('h1', 'h2', 'h3', 'h4'):
- buffer += '\n'
- elif self.nodeName in ('li',):
- buffer += '\n '
-
- childBuffer = ''
- if self.childNodes or self.nodeName in ('blockquote',):
- childBuffer += '>'
- for child in self.childNodes:
- childBuffer += child.toxml()
-
- if self.nodeName == 'p':
- childBuffer += '\n'
- elif self.nodeName == 'li':
- childBuffer += '\n '
-
- childBuffer += '</%s>' % self.nodeName
- else:
- childBuffer += '/>'
- buffer += '<' + self.nodeName
- if self.nodeName in ('p', 'li', 'ul', 'ol', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
- if not self.attribute_values.has_key('dir'):
- if self.bidi:
- bidi = self.bidi
- else:
- bidi = self.doc.bidi
- if bidi == 'rtl':
- self.setAttribute('dir', 'rtl')
-
-
-
- for attr in self.attributes:
- value = self.attribute_values[attr]
- value = self.doc.normalizeEntities(value, avoidDoubleNormalizing = True)
- buffer += ' %s="%s"' % (attr, value)
-
- buffer += childBuffer
- if self.nodeName in ('p', 'br ', 'li', 'ul', 'ol', 'h1', 'h2', 'h3', 'h4'):
- buffer += '\n'
-
- return buffer
-
-
-
- class TextNode:
- type = 'text'
- attrRegExp = re.compile('\\{@([^\\}]*)=([^\\}]*)}')
-
- def __init__(self, text):
- self.value = text
-
-
- def attributeCallback(self, match):
- self.parent.setAttribute(match.group(1), match.group(2))
-
-
- def handleAttributes(self):
- self.value = self.attrRegExp.sub(self.attributeCallback, self.value)
-
-
- def toxml(self):
- text = self.value
- self.parent.setBidi(getBidiType(text))
- if not text.startswith(HTML_PLACEHOLDER_PREFIX):
- if self.parent.nodeName == 'p':
- text = text.replace('\n', '\n ')
- elif self.parent.nodeName == 'li' and self.parent.childNodes[0] == self:
- text = '\n ' + text.replace('\n', '\n ')
-
-
- text = self.doc.normalizeEntities(text)
- return text
-
-
-
- class EntityReference:
- type = 'entity_ref'
-
- def __init__(self, entity):
- self.entity = entity
-
-
- def handleAttributes(self):
- pass
-
-
- def toxml(self):
- return '&' + self.entity + ';'
-
-
-
- class TextPreprocessor:
-
- def run(self, text):
- pass
-
-
-
- class Preprocessor:
-
- def run(self, lines):
- pass
-
-
-
- class HtmlBlockPreprocessor(TextPreprocessor):
-
- def _get_left_tag(self, block):
- return block[1:].replace('>', ' ', 1).split()[0].lower()
-
-
- def _get_right_tag(self, left_tag, block):
- return block.rstrip()[-len(left_tag) - 2:-1].lower()
-
-
- def _equal_tags(self, left_tag, right_tag):
- if left_tag == 'div' or left_tag[0] in ('?', '@', '%'):
- return True
- if '/' + left_tag == right_tag:
- return True
- if right_tag == '--' and left_tag == '--':
- return True
- if left_tag == right_tag[1:] and right_tag[0] != '<':
- return True
- return False
-
-
- def _is_oneliner(self, tag):
- return tag in ('hr', 'hr/')
-
-
- def run(self, text):
- new_blocks = []
- text = text.split('\n\n')
- items = []
- left_tag = ''
- right_tag = ''
- in_tag = False
- for block in text:
- if block.startswith('\n'):
- block = block[1:]
-
- if not in_tag:
- if block.startswith('<'):
- left_tag = self._get_left_tag(block)
- right_tag = self._get_right_tag(left_tag, block)
- if not isBlockLevel(left_tag) or block[1] in ('!', '?', '@', '%'):
- new_blocks.append(block)
- continue
-
- if self._is_oneliner(left_tag):
- new_blocks.append(block.strip())
- continue
-
- if block[1] == '!':
- left_tag = '--'
- right_tag = self._get_right_tag(left_tag, block)
-
- if block.rstrip().endswith('>') and self._equal_tags(left_tag, right_tag):
- new_blocks.append(self.stash.store(block.strip()))
- continue
- else:
- items.append(block.strip())
- in_tag = True
-
- new_blocks.append(block)
- continue
- items.append(block.strip())
- right_tag = self._get_right_tag(left_tag, block)
- if self._equal_tags(left_tag, right_tag):
- in_tag = False
- new_blocks.append(self.stash.store('\n\n'.join(items)))
- items = []
- continue
-
- if items:
- new_blocks.append(self.stash.store('\n\n'.join(items)))
- new_blocks.append('\n')
-
- return '\n\n'.join(new_blocks)
-
-
- HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
-
- class HeaderPreprocessor(Preprocessor):
-
- def run(self, lines):
- i = -1
- while i + 1 < len(lines):
- i = i + 1
- if not lines[i].strip():
- continue
-
- if lines[i].startswith('#'):
- lines.insert(i + 1, '\n')
-
- if i + 1 <= len(lines) and lines[i + 1] and lines[i + 1][0] in ('-', '='):
- underline = lines[i + 1].strip()
- if underline == '=' * len(underline):
- lines[i] = '# ' + lines[i].strip()
- lines[i + 1] = ''
- elif underline == '-' * len(underline):
- lines[i] = '## ' + lines[i].strip()
- lines[i + 1] = ''
-
- underline == '=' * len(underline)
- return lines
-
-
- HEADER_PREPROCESSOR = HeaderPreprocessor()
-
- class LinePreprocessor(Preprocessor):
- blockquote_re = re.compile('^(> )+')
-
- def run(self, lines):
- for i in range(len(lines)):
- prefix = ''
- m = self.blockquote_re.search(lines[i])
- if m:
- prefix = m.group(0)
-
- if self._isLine(lines[i][len(prefix):]):
- lines[i] = prefix + self.stash.store('<hr />', safe = True)
- continue
-
- return lines
-
-
- def _isLine(self, block):
- if block.startswith(' '):
- return 0
- text = [](_[1])
- if len(text) <= 2:
- return 0
- for pattern in [
- 'isline1',
- 'isline2',
- 'isline3']:
- m = RE.regExp[pattern].match(text)
- if m and m.group(1):
- return 1
- else:
- return 0
- return m.group(1)
-
-
- LINE_PREPROCESSOR = LinePreprocessor()
-
- class ReferencePreprocessor(Preprocessor):
-
- def run(self, lines):
- new_text = []
- for line in lines:
- m = RE.regExp['reference-def'].match(line)
- if m:
- id = m.group(2).strip().lower()
- t = m.group(4).strip()
- if not t:
- self.references[id] = (m.group(3), t)
- elif len(t) >= 2:
- if t[-1] == t[-1]:
- pass
- elif not t[-1] == '"':
- if t[-1] == t[-1]:
- pass
- elif (t[-1] == "'" or t[0] == '(') and t[-1] == ')':
- self.references[id] = (m.group(3), t[1:-1])
- else:
- new_text.append(line)
- t[-1] == ')'
- new_text.append(line)
- return new_text
-
-
- REFERENCE_PREPROCESSOR = ReferencePreprocessor()
- NOBRACKET = '[^\\]\\[]*'
- BRK = '\\[(' + (NOBRACKET + '(\\[') * 6 + (NOBRACKET + '\\])*') * 6 + NOBRACKET + ')\\]'
- NOIMG = '(?<!\\!)'
- BACKTICK_RE = '\\`([^\\`]*)\\`'
- DOUBLE_BACKTICK_RE = '\\`\\`(.*)\\`\\`'
- ESCAPE_RE = '\\\\(.)'
- EMPHASIS_RE = '\\*([^\\*]*)\\*'
- STRONG_RE = '\\*\\*(.*)\\*\\*'
- STRONG_EM_RE = '\\*\\*\\*([^_]*)\\*\\*\\*'
- if SMART_EMPHASIS:
- EMPHASIS_2_RE = '(?<!\\S)_(\\S[^_]*)_'
- else:
- EMPHASIS_2_RE = '_([^_]*)_'
- STRONG_2_RE = '__([^_]*)__'
- STRONG_EM_2_RE = '___([^_]*)___'
- LINK_RE = NOIMG + BRK + '\\s*\\(([^\\)]*)\\)'
- LINK_ANGLED_RE = NOIMG + BRK + '\\s*\\(<([^\\)]*)>\\)'
- IMAGE_LINK_RE = '\\!' + BRK + '\\s*\\(([^\\)]*)\\)'
- REFERENCE_RE = NOIMG + BRK + '\\s*\\[([^\\]]*)\\]'
- IMAGE_REFERENCE_RE = '\\!' + BRK + '\\s*\\[([^\\]]*)\\]'
- NOT_STRONG_RE = '( \\* )'
- AUTOLINK_RE = '<(http://[^>]*)>'
- AUTOMAIL_RE = '<([^> \\!]*@[^> ]*)>'
- HTML_RE = '(\\<[a-zA-Z/][^\\>]*\\>)'
- ENTITY_RE = '(&[\\#a-zA-Z0-9]*;)'
- LINE_BREAK_RE = ' \\n'
- LINE_BREAK_2_RE = ' $'
-
- class Pattern:
-
- def __init__(self, pattern):
- self.pattern = pattern
- self.compiled_re = re.compile('^(.*)%s(.*)$' % pattern, re.DOTALL)
-
-
- def getCompiledRegExp(self):
- return self.compiled_re
-
-
- BasePattern = Pattern
-
- class SimpleTextPattern(Pattern):
-
- def handleMatch(self, m, doc):
- return doc.createTextNode(m.group(2))
-
-
-
- class SimpleTagPattern(Pattern):
-
- def __init__(self, pattern, tag):
- Pattern.__init__(self, pattern)
- self.tag = tag
-
-
- def handleMatch(self, m, doc):
- el = doc.createElement(self.tag)
- el.appendChild(doc.createTextNode(m.group(2)))
- return el
-
-
-
- class SubstituteTagPattern(SimpleTagPattern):
-
- def handleMatch(self, m, doc):
- return doc.createElement(self.tag)
-
-
-
- class BacktickPattern(Pattern):
-
- def __init__(self, pattern):
- Pattern.__init__(self, pattern)
- self.tag = 'code'
-
-
- def handleMatch(self, m, doc):
- el = doc.createElement(self.tag)
- text = m.group(2).strip()
- el.appendChild(doc.createTextNode(text))
- return el
-
-
-
- class DoubleTagPattern(SimpleTagPattern):
-
- def handleMatch(self, m, doc):
- (tag1, tag2) = self.tag.split(',')
- el1 = doc.createElement(tag1)
- el2 = doc.createElement(tag2)
- el1.appendChild(el2)
- el2.appendChild(doc.createTextNode(m.group(2)))
- return el1
-
-
-
- class HtmlPattern(Pattern):
-
- def handleMatch(self, m, doc):
- rawhtml = m.group(2)
- inline = True
- place_holder = self.stash.store(rawhtml)
- return doc.createTextNode(place_holder)
-
-
-
- class LinkPattern(Pattern):
-
- def handleMatch(self, m, doc):
- el = doc.createElement('a')
- el.appendChild(doc.createTextNode(m.group(2)))
- parts = m.group(9).split('"')
- if parts:
- el.setAttribute('href', parts[0].strip())
- else:
- el.setAttribute('href', '')
- if len(parts) > 1:
- title = '"' + '"'.join(parts[1:]).strip()
- title = dequote(title)
- el.setAttribute('title', title)
-
- return el
-
-
-
- class ImagePattern(Pattern):
-
- def handleMatch(self, m, doc):
- el = doc.createElement('img')
- src_parts = m.group(9).split()
- if src_parts:
- el.setAttribute('src', src_parts[0])
- else:
- el.setAttribute('src', '')
- if len(src_parts) > 1:
- el.setAttribute('title', dequote(' '.join(src_parts[1:])))
-
- if ENABLE_ATTRIBUTES:
- text = doc.createTextNode(m.group(2))
- el.appendChild(text)
- text.handleAttributes()
- truealt = text.value
- el.childNodes.remove(text)
- else:
- truealt = m.group(2)
- el.setAttribute('alt', truealt)
- return el
-
-
-
- class ReferencePattern(Pattern):
-
- def handleMatch(self, m, doc):
- if m.group(9):
- id = m.group(9).lower()
- else:
- id = m.group(2).lower()
- if not self.references.has_key(id):
- return None
- (href, title) = self.references[id]
- text = m.group(2)
- return self.makeTag(href, title, text, doc)
-
-
- def makeTag(self, href, title, text, doc):
- el = doc.createElement('a')
- el.setAttribute('href', href)
- if title:
- el.setAttribute('title', title)
-
- el.appendChild(doc.createTextNode(text))
- return el
-
-
-
- class ImageReferencePattern(ReferencePattern):
-
- def makeTag(self, href, title, text, doc):
- el = doc.createElement('img')
- el.setAttribute('src', href)
- if title:
- el.setAttribute('title', title)
-
- el.setAttribute('alt', text)
- return el
-
-
-
- class AutolinkPattern(Pattern):
-
- def handleMatch(self, m, doc):
- el = doc.createElement('a')
- el.setAttribute('href', m.group(2))
- el.appendChild(doc.createTextNode(m.group(2)))
- return el
-
-
-
- class AutomailPattern(Pattern):
-
- def handleMatch(self, m, doc):
- el = doc.createElement('a')
- email = m.group(2)
- if email.startswith('mailto:'):
- email = email[len('mailto:'):]
-
- for letter in email:
- entity = doc.createEntityReference('#%d' % ord(letter))
- el.appendChild(entity)
-
- mailto = 'mailto:' + email
- mailto = []([ '%d;' % ord(letter) for letter in mailto ])
- el.setAttribute('href', mailto)
- return el
-
-
- ESCAPE_PATTERN = SimpleTextPattern(ESCAPE_RE)
- NOT_STRONG_PATTERN = SimpleTextPattern(NOT_STRONG_RE)
- BACKTICK_PATTERN = BacktickPattern(BACKTICK_RE)
- DOUBLE_BACKTICK_PATTERN = BacktickPattern(DOUBLE_BACKTICK_RE)
- STRONG_PATTERN = SimpleTagPattern(STRONG_RE, 'strong')
- STRONG_PATTERN_2 = SimpleTagPattern(STRONG_2_RE, 'strong')
- EMPHASIS_PATTERN = SimpleTagPattern(EMPHASIS_RE, 'em')
- EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em')
- STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
- STRONG_EM_PATTERN_2 = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em')
- LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br ')
- LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br ')
- LINK_PATTERN = LinkPattern(LINK_RE)
- LINK_ANGLED_PATTERN = LinkPattern(LINK_ANGLED_RE)
- IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE)
- IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE)
- REFERENCE_PATTERN = ReferencePattern(REFERENCE_RE)
- HTML_PATTERN = HtmlPattern(HTML_RE)
- ENTITY_PATTERN = HtmlPattern(ENTITY_RE)
- AUTOLINK_PATTERN = AutolinkPattern(AUTOLINK_RE)
- AUTOMAIL_PATTERN = AutomailPattern(AUTOMAIL_RE)
-
- class Postprocessor:
-
- def run(self, dom):
- pass
-
-
-
- class TextPostprocessor:
-
- def run(self, text):
- pass
-
-
-
- class RawHtmlTextPostprocessor(TextPostprocessor):
-
- def __init__(self):
- pass
-
-
- def run(self, text):
- for i in range(self.stash.html_counter):
- (html, safe) = self.stash.rawHtmlBlocks[i]
- if self.safeMode and not safe:
- if str(self.safeMode).lower() == 'escape':
- html = self.escape(html)
- elif str(self.safeMode).lower() == 'remove':
- html = ''
- else:
- html = HTML_REMOVED_TEXT
-
- text = text.replace('<p>%s\n</p>' % HTML_PLACEHOLDER % i, html + '\n')
- text = text.replace(HTML_PLACEHOLDER % i, html)
-
- return text
-
-
- def escape(self, html):
- html = html.replace('&', '&')
- html = html.replace('<', '<')
- html = html.replace('>', '>')
- return html.replace('"', '"')
-
-
- RAWHTMLTEXTPOSTPROCESSOR = RawHtmlTextPostprocessor()
-
- class HtmlStash:
-
- def __init__(self):
- self.html_counter = 0
- self.rawHtmlBlocks = []
-
-
- def store(self, html, safe = False):
- self.rawHtmlBlocks.append((html, safe))
- placeholder = HTML_PLACEHOLDER % self.html_counter
- self.html_counter += 1
- return placeholder
-
-
-
- class BlockGuru:
-
- def _findHead(self, lines, fn, allowBlank = 0):
- items = []
- item = -1
- i = 0
- for line in lines:
- if not line.strip() and not allowBlank:
- return (items, lines[i:])
- if not line.strip() and allowBlank:
- i += 1
- for j in range(i, len(lines)):
- if lines[j].strip():
- next = lines[j]
- break
- continue
- not allowBlank
- else:
- break
- part = fn(next)
- if part:
- items.append('')
- continue
- else:
- break
-
- part = fn(line)
- if part:
- items.append(part)
- i += 1
- continue
- continue
- return (items, lines[i:])
- else:
- i += 1
- return (items, lines[i:])
-
-
- def detabbed_fn(self, line):
- m = RE.regExp['tabbed'].match(line)
- if m:
- return m.group(4)
- return None
-
-
- def detectTabbed(self, lines):
- return self._findHead(lines, self.detabbed_fn, allowBlank = 1)
-
-
-
- def print_error(string):
- sys.stderr.write(string + '\n')
-
-
- def dequote(string):
- if (string.startswith('"') or string.endswith('"') or string.startswith("'")) and string.endswith("'"):
- return string[1:-1]
- return string
-
-
- class CorePatterns:
- patterns = {
- 'header': '(#*)([^#]*)(#*)',
- 'reference-def': '(\\ ?\\ ?\\ ?)\\[([^\\]]*)\\]:\\s*([^ ]*)(.*)',
- 'containsline': '([-]*)$|^([=]*)',
- 'ol': '[ ]{0,3}[\\d]*\\.\\s+(.*)',
- 'ul': '[ ]{0,3}[*+-]\\s+(.*)',
- 'isline1': '(\\**)',
- 'isline2': '(\\-*)',
- 'isline3': '(\\_*)',
- 'tabbed': '((\\t)|( ))(.*)',
- 'quoted': '> ?(.*)' }
-
- def __init__(self):
- self.regExp = { }
- for key in self.patterns.keys():
- self.regExp[key] = re.compile('^%s$' % self.patterns[key], re.DOTALL)
-
- self.regExp['containsline'] = re.compile('^([-]*)$|^([=]*)$', re.M)
-
-
- RE = CorePatterns()
-
- class Markdown:
-
- def __init__(self, source = None, extensions = [], extension_configs = None, safe_mode = False):
- self.source = source
- if source is not None:
- message(WARN, 'The `source` arg of Markdown.__init__() is depreciated and will be removed in the future. Use `instance.convert(source)` instead.')
-
- self.safeMode = safe_mode
- self.blockGuru = BlockGuru()
- self.registeredExtensions = []
- self.stripTopLevelTags = 1
- self.docType = ''
- self.textPreprocessors = [
- HTML_BLOCK_PREPROCESSOR]
- self.preprocessors = [
- HEADER_PREPROCESSOR,
- LINE_PREPROCESSOR,
- REFERENCE_PREPROCESSOR]
- self.postprocessors = []
- self.textPostprocessors = [
- RAWHTMLTEXTPOSTPROCESSOR]
- self.prePatterns = []
- self.inlinePatterns = [
- DOUBLE_BACKTICK_PATTERN,
- BACKTICK_PATTERN,
- ESCAPE_PATTERN,
- REFERENCE_PATTERN,
- LINK_ANGLED_PATTERN,
- LINK_PATTERN,
- IMAGE_LINK_PATTERN,
- IMAGE_REFERENCE_PATTERN,
- AUTOLINK_PATTERN,
- AUTOMAIL_PATTERN,
- LINE_BREAK_PATTERN,
- HTML_PATTERN,
- ENTITY_PATTERN,
- NOT_STRONG_PATTERN,
- STRONG_EM_PATTERN,
- STRONG_EM_PATTERN_2,
- STRONG_PATTERN,
- STRONG_PATTERN_2,
- EMPHASIS_PATTERN,
- EMPHASIS_PATTERN_2]
- self.registerExtensions(extensions = extensions, configs = extension_configs)
- self.reset()
-
-
- def registerExtensions(self, extensions, configs):
- if not configs:
- configs = { }
-
- for ext in extensions:
- extension_module_name = 'calibre.ebooks.markdown.mdx_' + ext
-
- try:
- module = sys.modules[extension_module_name]
- except:
- message(CRITICAL, "couldn't load extension %s (looking for %s module)" % (ext, extension_module_name))
- continue
-
- if configs.has_key(ext):
- configs_for_ext = configs[ext]
- else:
- configs_for_ext = []
- extension = module.makeExtension(configs_for_ext)
- extension.extendMarkdown(self, globals())
-
-
-
- def registerExtension(self, extension):
- self.registeredExtensions.append(extension)
-
-
- def reset(self):
- self.references = { }
- self.htmlStash = HtmlStash()
- HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
- LINE_PREPROCESSOR.stash = self.htmlStash
- REFERENCE_PREPROCESSOR.references = self.references
- HTML_PATTERN.stash = self.htmlStash
- ENTITY_PATTERN.stash = self.htmlStash
- REFERENCE_PATTERN.references = self.references
- IMAGE_REFERENCE_PATTERN.references = self.references
- RAWHTMLTEXTPOSTPROCESSOR.stash = self.htmlStash
- RAWHTMLTEXTPOSTPROCESSOR.safeMode = self.safeMode
- for extension in self.registeredExtensions:
- extension.reset()
-
-
-
- def _transform(self):
- self.doc = Document()
- self.top_element = self.doc.createElement('span')
- self.top_element.appendChild(self.doc.createTextNode('\n'))
- self.top_element.setAttribute('class', 'markdown')
- self.doc.appendChild(self.top_element)
- text = self.source
- text = text.replace('\r\n', '\n').replace('\r', '\n')
- text += '\n\n'
- text = text.expandtabs(TAB_LENGTH)
- self.lines = text.split('\n')
- for prep in self.preprocessors:
- self.lines = prep.run(self.lines)
-
- buffer = []
- for line in self.lines:
- if line.startswith('#'):
- self._processSection(self.top_element, buffer)
- buffer = [
- line]
- continue
- buffer.append(line)
-
- self._processSection(self.top_element, buffer)
- self.top_element.appendChild(self.doc.createTextNode('\n'))
- for postprocessor in self.postprocessors:
- postprocessor.run(self.doc)
-
- return self.doc
-
-
- def _processSection(self, parent_elem, lines, inList = 0, looseList = 0):
- while lines:
- processFn = {
- 'ul': self._processUList,
- 'ol': self._processOList,
- 'quoted': self._processQuote,
- 'tabbed': self._processCodeBlock }
- for regexp in [
- 'ul',
- 'ol',
- 'quoted',
- 'tabbed']:
- m = RE.regExp[regexp].match(lines[0])
- if m:
- processFn[regexp](parent_elem, lines, inList)
- return None
-
- if inList:
- (start, lines) = self._linesUntil(lines, (lambda line: if not RE.regExp['ul'].match(line) and RE.regExp['ol'].match(line):
- passnot line.strip()))
- self._processSection(parent_elem, start, inList - 1, looseList = looseList)
- inList = inList - 1
- else:
- (paragraph, lines) = self._linesUntil(lines, (lambda line: not line.strip()))
- if len(paragraph) and paragraph[0].startswith('#'):
- self._processHeader(parent_elem, paragraph)
- elif paragraph:
- self._processParagraph(parent_elem, paragraph, inList, looseList)
-
- if lines and not lines[0].strip():
- lines = lines[1:]
- continue
-
-
- def _processHeader(self, parent_elem, paragraph):
- m = RE.regExp['header'].match(paragraph[0])
- if m:
- level = len(m.group(1))
- h = self.doc.createElement('h%d' % level)
- parent_elem.appendChild(h)
- for item in self._handleInline(m.group(2).strip()):
- h.appendChild(item)
-
- else:
- message(CRITICAL, "We've got a problem header!")
-
-
- def _processParagraph(self, parent_elem, paragraph, inList, looseList):
- list = self._handleInline('\n'.join(paragraph))
- if parent_elem.nodeName == 'li':
- if not looseList:
- pass
- if not (parent_elem.childNodes):
- el = parent_elem
- else:
- el = self.doc.createElement('p')
- parent_elem.appendChild(el)
- for item in list:
- el.appendChild(item)
-
-
-
- def _processUList(self, parent_elem, lines, inList):
- self._processList(parent_elem, lines, inList, listexpr = 'ul', tag = 'ul')
-
-
- def _processOList(self, parent_elem, lines, inList):
- self._processList(parent_elem, lines, inList, listexpr = 'ol', tag = 'ol')
-
-
- def _processList(self, parent_elem, lines, inList, listexpr, tag):
- ul = self.doc.createElement(tag)
- parent_elem.appendChild(ul)
- looseList = 0
- items = []
- item = -1
- i = 0
- for line in lines:
- loose = 0
- if not line.strip():
- i += 1
- loose = 1
- for j in range(i, len(lines)):
- if lines[j].strip():
- next = lines[j]
- break
- continue
- else:
- break
- if RE.regExp['ul'].match(next) and RE.regExp['ol'].match(next) or RE.regExp['tabbed'].match(next):
- items[item].append(line.strip())
- if not loose:
- pass
- looseList = looseList
- continue
- else:
- break
-
- for expr in [
- 'ul',
- 'ol',
- 'tabbed']:
- m = RE.regExp[expr].match(line)
- if m:
- if expr in ('ul', 'ol'):
- items.append([
- m.group(1)])
- item += 1
- elif expr == 'tabbed':
- items[item].append(m.group(4))
-
- i += 1
- break
- continue
- else:
- i += 1
- else:
- i += 1
- for item in items:
- li = self.doc.createElement('li')
- ul.appendChild(li)
- self._processSection(li, item, inList + 1, looseList = looseList)
-
- self._processSection(parent_elem, lines[i:], inList)
-
-
- def _linesUntil(self, lines, condition):
- i = -1
- for line in lines:
- i += 1
- if condition(line):
- break
- continue
- else:
- i += 1
- return (lines[:i], lines[i:])
-
-
- def _processQuote(self, parent_elem, lines, inList):
- dequoted = []
- i = 0
- blank_line = False
- for line in lines:
- m = RE.regExp['quoted'].match(line)
- if m:
- dequoted.append(m.group(1))
- i += 1
- blank_line = False
- continue
- if not blank_line and line.strip() != '':
- dequoted.append(line)
- i += 1
- continue
- if not blank_line and line.strip() == '':
- dequoted.append(line)
- i += 1
- blank_line = True
- continue
-
- blockquote = self.doc.createElement('blockquote')
- parent_elem.appendChild(blockquote)
- self._processSection(blockquote, dequoted, inList)
- self._processSection(parent_elem, lines[i:], inList)
-
-
- def _processCodeBlock(self, parent_elem, lines, inList):
- (detabbed, theRest) = self.blockGuru.detectTabbed(lines)
- pre = self.doc.createElement('pre')
- code = self.doc.createElement('code')
- parent_elem.appendChild(pre)
- pre.appendChild(code)
- text = '\n'.join(detabbed).rstrip() + '\n'
- code.appendChild(self.doc.createTextNode(text))
- self._processSection(parent_elem, theRest, inList)
-
-
- def _handleInline(self, line, patternIndex = 0):
- parts = [
- line]
- while patternIndex < len(self.inlinePatterns):
- i = 0
- while i < len(parts):
- x = parts[i]
- if isinstance(x, (str, unicode)):
- result = self._applyPattern(x, self.inlinePatterns[patternIndex], patternIndex)
- if result:
- i -= 1
- parts.remove(x)
- for y in result:
- parts.insert(i + 1, y)
-
-
-
- i += 1
- patternIndex += 1
- for i in range(len(parts)):
- x = parts[i]
- if isinstance(x, (str, unicode)):
- parts[i] = self.doc.createTextNode(x)
- continue
-
- return parts
-
-
- def _applyPattern(self, line, pattern, patternIndex):
- m = pattern.getCompiledRegExp().match(line)
- if not m:
- return None
- node = pattern.handleMatch(m, self.doc)
- if node:
- return (m.groups()[-1], node, m.group(1))
- return None
-
-
- def convert(self, source = None):
- if source is not None:
- self.source = source
-
- if not self.source:
- return u''
-
- try:
- self.source = unicode(self.source)
- except UnicodeDecodeError:
- self.source
- self.source
- message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
- return u''
-
- for pp in self.textPreprocessors:
- self.source = pp.run(self.source)
-
- doc = self._transform()
- xml = doc.toxml()
- if self.stripTopLevelTags:
- xml = xml.strip()[23:-7] + '\n'
-
- for pp in self.textPostprocessors:
- xml = pp.run(xml)
-
- return (self.docType + xml).strip()
-
-
- def __str__(self):
- if self.source is None:
- status = 'in which no source text has been assinged.'
- else:
- status = 'which contains %d chars and %d line(s) of source.' % (len(self.source), self.source.count('\n') + 1)
- return 'An instance of "%s" %s' % (self.__class__, status)
-
- __unicode__ = convert
-
-
- def markdownFromFile(input = None, output = None, extensions = [], encoding = None, message_threshold = CRITICAL, safe = False):
- console_hndlr.setLevel(message_threshold)
- message(DEBUG, 'input file: %s' % input)
- if not encoding:
- encoding = 'utf-8'
-
- input_file = codecs.open(input, mode = 'r', encoding = encoding)
- text = input_file.read()
- input_file.close()
- text = removeBOM(text, encoding)
- new_text = markdown(text, extensions, safe_mode = safe)
- if output:
- output_file = codecs.open(output, 'w', encoding = encoding)
- output_file.write(new_text)
- output_file.close()
- else:
- sys.stdout.write(new_text.encode(encoding))
-
-
- def markdown(text, extensions = [], safe_mode = False):
- message(DEBUG, 'in markdown.markdown(), received text:\n%s' % text)
- extension_names = []
- extension_configs = { }
- for ext in extensions:
- pos = ext.find('(')
- if pos == -1:
- extension_names.append(ext)
- continue
- name = ext[:pos]
- extension_names.append(name)
- pairs = [ x.split('=') for x in ext[pos + 1:-1].split(',') ]
- configs = [ (x.strip(), y.strip()) for x, y in pairs ]
- extension_configs[name] = configs
-
- md = Markdown(extensions = extension_names, extension_configs = extension_configs, safe_mode = safe_mode)
- return md.convert(text)
-
-
- class Extension:
-
- def __init__(self, configs = { }):
- self.config = configs
-
-
- def getConfig(self, key):
- if self.config.has_key(key):
- return self.config[key][0]
- return ''
-
-
- def getConfigInfo(self):
- return [ (key, self.config[key][1]) for key in self.config.keys() ]
-
-
- def setConfig(self, key, value):
- self.config[key][0] = value
-
-
- OPTPARSE_WARNING = '\nPython 2.3 or higher required for advanced command line options.\nFor lower versions of Python use:\n\n %s INPUT_FILE > OUTPUT_FILE\n \n' % EXECUTABLE_NAME_FOR_USAGE
-
- def parse_options():
-
- try:
- optparse = __import__('optparse')
- except:
- if len(sys.argv) == 2:
- return {
- 'input': sys.argv[1],
- 'output': None,
- 'message_threshold': CRITICAL,
- 'safe': False,
- 'extensions': [],
- 'encoding': None }
- print OPTPARSE_WARNING
- return None
-
- parser = optparse.OptionParser(usage = '%prog INPUTFILE [options]')
- parser.add_option('-f', '--file', dest = 'filename', help = 'write output to OUTPUT_FILE', metavar = 'OUTPUT_FILE')
- parser.add_option('-e', '--encoding', dest = 'encoding', help = 'encoding for input and output files')
- parser.add_option('-q', '--quiet', default = CRITICAL, action = 'store_const', const = 60, dest = 'verbose', help = 'suppress all messages')
- parser.add_option('-v', '--verbose', action = 'store_const', const = INFO, dest = 'verbose', help = 'print info messages')
- parser.add_option('-s', '--safe', dest = 'safe', default = False, metavar = 'SAFE_MODE', help = "same mode ('replace', 'remove' or 'escape' user's HTML tag)")
- parser.add_option('--noisy', action = 'store_const', const = DEBUG, dest = 'verbose', help = 'print debug messages')
- parser.add_option('-x', '--extension', action = 'append', dest = 'extensions', help = 'load extension EXTENSION', metavar = 'EXTENSION')
- (options, args) = parser.parse_args()
- if not len(args) == 1:
- parser.print_help()
- return None
- input_file = args[0]
- if not options.extensions:
- options.extensions = []
-
- return {
- 'input': input_file,
- 'output': options.filename,
- 'message_threshold': options.verbose,
- 'safe': options.safe,
- 'extensions': options.extensions,
- 'encoding': options.encoding }
-
-
- def main():
- options = parse_options()
- if not options:
- sys.exit(0)
-
- markdownFromFile(**options)
-
- if __name__ == '__main__':
- sys.exit(main())
-
-