home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
- # Massages text into lightweight html, needs python 2 (probably 2.2).
- # Usage:
- # Edit OUT_PAT according to where you wnat the html files created, create
- # any directories needed and:
- # convert.py <textfile> [... <textfile>]
- # Paul Sorenson
- # $Revision$
- # vi: et
-
- import sys
- import cgi # only used for escaping html reserved characters in input text
- import re
-
- class Converter:
-
- RE_URL = re.compile('''(http|ftp|https)://\S+''', re.IGNORECASE)
- RE_ADDR = re.compile('''(<)?(\S+@[^&\s]+)(>)?''', re.IGNORECASE)
- RE_FILE = re.compile('''^(.*?).?([^\.]*)$''') # crack filenames
- # Patterns used to select output filename (not thoroughly tested)
- # %b gets base part of filename (everything up to last '.' if one exists,
- # otherwise everything), same as \g<1>
- # %e gets extension (not including '.' if it exists) same as \g<2>
- # If None then use stdout
- OUT_PAT = None # everthing goes to stdout
- #OUT_PAT = '''%b.%e.foo''' # index.txt > index.txt.foo
- #OUT_PAT = '''otherdir/%b.html''' # index.txt > otherdir/index.html
- #OUT_PAT = '''%b.html''' # index.txt > index.html
- #OUT_PAT = '''tmp/%b.html''' # index.txt > tmp/index.html
-
- def __init__(self):
- # Convert the user pattern to a valid replacement string
- # There is nothing stopping the user enter \g<n> syntax directly
- if self.OUT_PAT:
- self.OUT_SUB = self.OUT_PAT.replace('%b', '''\g<1>''').replace('%e', '''\g<2>''')
- self.index = {}
- self.fileIndex = {}
-
- def convert(self, filename):
- self.filename = filename
- self.setOut(filename)
- self.IN_PARA = 0
- f = file(filename)
- self.writeHeader()
- for line in f.xreadlines():
- self.lineproc(line)
- if self.IN_PARA:
- self.write('<p>\n')
- self.writeFooter()
- if self.OUT != sys.stdout:
- self.OUT.close()
-
- def lineproc(self, line):
- line = line.strip()
- if len(line) == 0 and self.IN_PARA:
- self.write('</p>\n')
- self.IN_PARA = 0
- else: # we have some text
- if not self.IN_PARA:
- self.write('<p>')
- self.IN_PARA = 1
- else:
- self.write('<br>\n')
- # Escape reserved HTML characters
- line = cgi.escape(line, 1)
- line = self.replaceEmailAddr(line)
- line = self.replaceUrl(line)
- self.write(line)
-
- def replaceUrl(self, line):
- line = self.RE_URL.sub('''<a href="\g<0>">\g<0></a>''', line)
- return line
-
- def replaceEmailAddr(self, line):
- # In real life you might want to obfuscate email addresses.
- line = self.RE_ADDR.sub('''<a href="mailto:\g<2>">\g<2></a>''', line)
- return line
-
- def writeHeader(self):
- self.write('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\n\n')
- self.write('<html><head><title>%s</title><link rel=stylesheet type="text/css" href="http://linuxmafia.com/redrick.css"></head><body><div id="content">\n' % self.filename)
-
- def writeFooter(self):
- self.write('</div></body></html>\n')
-
- def setOut(self, filename):
- if not self.OUT_PAT:
- self.OUT = sys.stdout
- else:
- outFile = self.RE_FILE.sub(self.OUT_SUB, filename)
- self.addIndex(filename, outFile)
- print '<!-- ', filename, '-', outFile, '-->'
- self.OUT = file(outFile, 'w')
- #self.OUT = sys.stdout
-
- def addIndex(self, inFilename, outFilename):
- base = self.RE_FILE.sub('''\g<1>''', inFilename)
- ind = base.split('-')
- map = self.index
- for heading in ind:
- if map.has_key(heading):
- map = map[heading]
- else:
- map[heading] = {}
- map = map[heading]
- self.fileIndex[''.join(ind)] = outFilename
-
- def writeIndex(self):
- self.filename = 'index_auto.html'
- self.OUT = file(self.filename, 'w')
- self.writeHeader()
- self.printMap(self.index, 0, '')
- self.writeFooter()
- self.OUT.close()
-
- def printMap(self, map, pad, lookup):
- keys = map.keys()
- keys.sort()
- self.write('<ul>\n')
- for key in keys:
- filemap = lookup + key
- if self.fileIndex.has_key(filemap):
- s = self.makeUrl(self.fileIndex[filemap], key)
- else:
- s = key
- self.write('<li>' + s + '\n')
- if map[key]:
- self.printMap(map[key], pad + 2, filemap)
- self.write('</ul>\n')
-
- def makeUrl(self, ref, text):
- val = None
- if ref and text:
- val = (ref, text)
- else:
- val = (ref, ref)
- return '''<a href="%s">%s</a>''' % val
-
- def write(self, text):
- self.OUT.write(text)
-
-
- def main():
- c = Converter()
- for arg in sys.argv[1:]:
- c.convert(arg)
- c.writeIndex()
-
- main()
-