home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
linuxmafia.com 2016
/
linuxmafia.com.tar
/
linuxmafia.com
/
pub
/
linux
/
utilities-general
/
convert.py
< prev
next >
Wrap
Text File
|
2006-05-04
|
5KB
|
150 lines
#!/usr/bin/env python
# Massages text into lightweight html, needs python 2 (probably 2.2).
# Usage:
# Edit OUT_PAT according to where you wnat the html files created, create
# any directories needed and:
# convert.py <textfile> [... <textfile>]
# Paul Sorenson
# $Revision$
# vi: et
import sys
import cgi # only used for escaping html reserved characters in input text
import re
class Converter:
RE_URL = re.compile('''(http|ftp|https)://\S+''', re.IGNORECASE)
RE_ADDR = re.compile('''(<)?(\S+@[^&\s]+)(>)?''', re.IGNORECASE)
RE_FILE = re.compile('''^(.*?).?([^\.]*)$''') # crack filenames
# Patterns used to select output filename (not thoroughly tested)
# %b gets base part of filename (everything up to last '.' if one exists,
# otherwise everything), same as \g<1>
# %e gets extension (not including '.' if it exists) same as \g<2>
# If None then use stdout
OUT_PAT = None # everthing goes to stdout
#OUT_PAT = '''%b.%e.foo''' # index.txt > index.txt.foo
#OUT_PAT = '''otherdir/%b.html''' # index.txt > otherdir/index.html
#OUT_PAT = '''%b.html''' # index.txt > index.html
#OUT_PAT = '''tmp/%b.html''' # index.txt > tmp/index.html
def __init__(self):
# Convert the user pattern to a valid replacement string
# There is nothing stopping the user enter \g<n> syntax directly
if self.OUT_PAT:
self.OUT_SUB = self.OUT_PAT.replace('%b', '''\g<1>''').replace('%e', '''\g<2>''')
self.index = {}
self.fileIndex = {}
def convert(self, filename):
self.filename = filename
self.setOut(filename)
self.IN_PARA = 0
f = file(filename)
self.writeHeader()
for line in f.xreadlines():
self.lineproc(line)
if self.IN_PARA:
self.write('<p>\n')
self.writeFooter()
if self.OUT != sys.stdout:
self.OUT.close()
def lineproc(self, line):
line = line.strip()
if len(line) == 0 and self.IN_PARA:
self.write('</p>\n')
self.IN_PARA = 0
else: # we have some text
if not self.IN_PARA:
self.write('<p>')
self.IN_PARA = 1
else:
self.write('<br>\n')
# Escape reserved HTML characters
line = cgi.escape(line, 1)
line = self.replaceEmailAddr(line)
line = self.replaceUrl(line)
self.write(line)
def replaceUrl(self, line):
line = self.RE_URL.sub('''<a href="\g<0>">\g<0></a>''', line)
return line
def replaceEmailAddr(self, line):
# In real life you might want to obfuscate email addresses.
line = self.RE_ADDR.sub('''<a href="mailto:\g<2>">\g<2></a>''', line)
return line
def writeHeader(self):
self.write('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">\n\n')
self.write('<html><head><title>%s</title><link rel=stylesheet type="text/css" href="http://linuxmafia.com/redrick.css"></head><body><div id="content">\n' % self.filename)
def writeFooter(self):
self.write('</div></body></html>\n')
def setOut(self, filename):
if not self.OUT_PAT:
self.OUT = sys.stdout
else:
outFile = self.RE_FILE.sub(self.OUT_SUB, filename)
self.addIndex(filename, outFile)
print '<!-- ', filename, '-', outFile, '-->'
self.OUT = file(outFile, 'w')
#self.OUT = sys.stdout
def addIndex(self, inFilename, outFilename):
base = self.RE_FILE.sub('''\g<1>''', inFilename)
ind = base.split('-')
map = self.index
for heading in ind:
if map.has_key(heading):
map = map[heading]
else:
map[heading] = {}
map = map[heading]
self.fileIndex[''.join(ind)] = outFilename
def writeIndex(self):
self.filename = 'index_auto.html'
self.OUT = file(self.filename, 'w')
self.writeHeader()
self.printMap(self.index, 0, '')
self.writeFooter()
self.OUT.close()
def printMap(self, map, pad, lookup):
keys = map.keys()
keys.sort()
self.write('<ul>\n')
for key in keys:
filemap = lookup + key
if self.fileIndex.has_key(filemap):
s = self.makeUrl(self.fileIndex[filemap], key)
else:
s = key
self.write('<li>' + s + '\n')
if map[key]:
self.printMap(map[key], pad + 2, filemap)
self.write('</ul>\n')
def makeUrl(self, ref, text):
val = None
if ref and text:
val = (ref, text)
else:
val = (ref, ref)
return '''<a href="%s">%s</a>''' % val
def write(self, text):
self.OUT.write(text)
def main():
c = Converter()
for arg in sys.argv[1:]:
c.convert(arg)
c.writeIndex()
main()