home *** CD-ROM | disk | FTP | other *** search
- """Module to analyze Python source code; for syntax coloring tools.
-
- Interface:
- tags = fontify(pytext, searchfrom, searchto)
-
- The 'pytext' argument is a string containing Python source code.
- The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.
- The returned value is a list of tuples, formatted like this:
- [('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
- The tuple contents are always like this:
- (tag, startindex, endindex, sublist)
- tag is one of 'keyword', 'string', 'comment' or 'identifier'
- sublist is not used, hence always None.
- """
-
- # Based on FontText.py by Mitchell S. Chapman,
- # which was modified by Zachary Roadhouse,
- # then un-Tk'd by Just van Rossum.
- # Many thanks for regular expression debugging & authoring are due to:
- # Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
- # So, who owns the copyright? ;-) How about this:
- # Copyright 1996-1997:
- # Mitchell S. Chapman,
- # Zachary Roadhouse,
- # Tim Peters,
- # Just van Rossum
-
- __version__ = "0.3.1"
-
- import string, regex
-
- # First a little helper, since I don't like to repeat things. (Tismer speaking)
- import string
- def replace(where, what, with):
- return string.join(string.split(where, what), with)
-
- # This list of keywords is taken from ref/node13.html of the
- # Python 1.3 HTML documentation. ("access" is intentionally omitted.)
- keywordsList = [
- "assert",
- "del", "from", "lambda", "return",
- "and", "elif", "global", "not", "try",
- "break", "else", "if", "or", "while",
- "class", "except", "import", "pass",
- "continue", "finally", "in", "print",
- "def", "for", "is", "raise"]
-
- # Build up a regular expression which will match anything
- # interesting, including multi-line triple-quoted strings.
- commentPat = "#.*"
-
- pat = "q[^\q\n]*\(\\\\[\000-\377][^\q\n]*\)*q"
- quotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
-
- # Way to go, Tim!
- pat = """
- qqq
- [^\\q]*
- \(
- \( \\\\[\000-\377]
- \| q
- \( \\\\[\000-\377]
- \| [^\\q]
- \| q
- \( \\\\[\000-\377]
- \| [^\\q]
- \)
- \)
- \)
- [^\\q]*
- \)*
- qqq
- """
- pat = string.join(string.split(pat), '') # get rid of whitespace
- tripleQuotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
-
- # Build up a regular expression which matches all and only
- # Python keywords. This will let us skip the uninteresting
- # identifier references.
- # nonKeyPat identifies characters which may legally precede
- # a keyword pattern.
- nonKeyPat = "\(^\|[^a-zA-Z0-9_.\"']\)"
-
- keyPat = nonKeyPat + "\("
- for keyword in keywordsList:
- keyPat = keyPat + keyword + "\|"
- keyPat = keyPat[:-2] + "\)" + nonKeyPat
-
- matchPat = keyPat + "\|" + commentPat + "\|" + tripleQuotePat + "\|" + quotePat
- matchRE = regex.compile(matchPat)
-
- idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*" # Ident w. leading whitespace.
- idRE = regex.compile(idKeyPat)
-
-
- def fontify(pytext, searchfrom = 0, searchto = None):
- if searchto is None:
- searchto = len(pytext)
- # Cache a few attributes for quicker reference.
- search = matchRE.search
- group = matchRE.group
- idSearch = idRE.search
- idGroup = idRE.group
-
- tags = []
- tags_append = tags.append
- commentTag = 'comment'
- stringTag = 'string'
- keywordTag = 'keyword'
- identifierTag = 'identifier'
-
- start = 0
- end = searchfrom
- while 1:
- start = search(pytext, end)
- if start < 0 or start >= searchto:
- break # EXIT LOOP
- match = group(0)
- end = start + len(match)
- c = match[0]
- if c not in "#'\"":
- # Must have matched a keyword.
- if start <> searchfrom:
- # there's still a redundant char before and after it, strip!
- match = match[1:-1]
- start = start + 1
- else:
- # this is the first keyword in the text.
- # Only a space at the end.
- match = match[:-1]
- end = end - 1
- tags_append((keywordTag, start, end, None))
- # If this was a defining keyword, look ahead to the
- # following identifier.
- if match in ["def", "class"]:
- start = idSearch(pytext, end)
- if start == end:
- match = idGroup(0)
- end = start + len(match)
- tags_append((identifierTag, start, end, None))
- elif c == "#":
- tags_append((commentTag, start, end, None))
- else:
- tags_append((stringTag, start, end, None))
- return tags
-
-
- def test(path):
- f = open(path)
- text = f.read()
- f.close()
- tags = fontify(text)
- for tag, start, end, sublist in tags:
- print tag, `text[start:end]`
-