############################################################################
#
# File: chkhtml.icn
#
# Subject: Program to check HTML files
#
# Author: Robert J. Alexander
#
# Date: November 15, 1994
#
############################################################################
#
# This file is in the public domain.
#
############################################################################
#
# Program to check an HTML file.
#
# Errors detected:
#
# - Reference to undefined anchor name.
# - Duplicated anchor name.
# - Warning for unreferenced anchor name.
# - Unknown tag.
# - Badly formed tag.
# - Improper tag nesting.
# - Unescaped <, >, ", or &.
# - Bad escape string.
# - Improper embedding of attributes.
# - Bad (non-ascii) characters
#
# Advises on:
# - Use of , tags.
#
procedure Usage(s)
write(&errout,\s)
stop(
"Usage: ChkHTML -options file..._
\n -u supress warnings for unreferenced anchor names_
\n -q supress errors for \"\\\"\" (quote) character in open text_
\n -g supress errors for \">\" character in open text_
\n -l n level of HTML (default 2)")
end
global SupressUnrefNames,SupressOpenQuot,SupressOpenGT,HTMLLevel
procedure Init(arg)
local opt,f
ListTypes := ["UL","OL","MENU","DIR"]
opt := options(arg,"uqgl+",Usage)
if *arg = 0 then Usage()
SupressUnrefNames := opt["u"]
SupressOpenQuot := opt["q"]
SupressOpenGT := opt["g"]
HTMLLevel := \opt["l"] | 2
return opt
end
link options
global FileName,LineNbr,TagStack,HRefList,NameSet,NameRefSet,ErrorCount,
SeenSet,PlainText,Tagless,Msg,ListTypes
procedure main(arg)
SetMsg()
Init(arg)
every CheckHTML(!arg)
end
procedure CheckHTML(fn)
local f,line,c
static badChars,scanChars
initial {
badChars := ~(&cset[33:128] ++ '\t')
scanChars := '<>"&' ++ badChars
}
#
# Open the input file.
#
f := open(fn) | {
write(&errout,"Can't open \"",fn,"\"")
fail
}
FileName := fn
write(&errout)
Msg("Checking HTML format...")
ErrorCount := 0
LineNbr := 0
TagStack := []
NameSet := set()
NameRefSet := set()
HRefList := []
SeenSet := set()
PlainText := &null
while line := read(f) do line ? {
LineNbr +:= 1
while tab(upto(scanChars)) do {
case c := move(1) of {
"<": ProcessTag(f) | break
">": if /Tagless & /SupressOpenGT then Error("\">\" in open text")
"\"": if /Tagless & /SupressOpenQuot then Error("\"\\\"\" (quote) in open text")
"&": if /Tagless then ProcessEscape() | Error("\"&\" in open text")
default: Error("Bad character: ",image(c))
}
}
}
close(f)
CheckStack()
CheckHRefs()
FileName := fn
LineNbr := &null
GiveAdvice()
Msg((if ErrorCount > 0 then string(ErrorCount) else "No")
," error",(if ErrorCount = 1 then "" else "s"),
" detected")
return
end
procedure CheckHRefs()
local x
every x := !HRefList do {
if not member(NameSet,x.value) then {
FileName := x.fileName
LineNbr := x.lineNbr
Error("Anchor name referenced but not defined: ",image(x.value))
}
}
if /SupressUnrefNames then {
LineNbr := &null
every x := !(NameSet -- NameRefSet) do {
Msg("Warning: Anchor name not referenced: ",image(x))
}
}
return
end
procedure CheckStack()
local tag
every tag := pop(TagStack) do
Error(pop(TagStack),"Unterminated tag: <",tag,">")
return
end
procedure ProcessTag(f)
local tag,subLine,upTag,endFlag,popCount,tagLines,listType
#
# Scan to the end of the tag (which might be multiple lines).
#
tag := ""
tagLines := 0
if ="!--" then {
#
# Comment tag.
#
until tab(find("-->") + 3) do {
&subject := read(f) | {
Error("Unclosed HTML comment (\"