home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
World of Shareware - Software Farm 2
/
wosw_2.zip
/
wosw_2
/
PASCAL
/
KRUSE_11.ZIP
/
INDEXTEX.PAS
< prev
next >
Wrap
Pascal/Delphi Source File
|
1990-12-01
|
37KB
|
987 lines
{outline of declaration of subprograms:
1. program IndexText(InText, InIndex, NewIndex, OutIndex, HashFile,
NewHashFile, input, output); (main program)
2. function Lt(u, v: word): Boolean;
3. procedure ReadWord(var f: text; var w: word);
4. procedure WriteWord(var f: text; w: word);
4a. built in CPU time function clock;
5. procedure SplitWords; (phase 1)
5a. function FindFile(ch: char): filecode;
6. function HashAddress(w: word): hashentry;
7. procedure Initialize;
8. procedure GetWord;
8a. procedure TellUserPage;
9. procedure GetChar(var ch: char);
10. procedure AddChar(ch: char);
11. procedure Conclude;
12. procedure ClassifyWords; (phase 2)
13. procedure BuildTree(var root: pointer; ch: char);
15. function Power2(c: integer): level;
(the next three procedures are written in line.)
14. procedure Insert(p: pointer);
16. procedure FindRoot;
17. procedure ConnectSubtrees;
18. procedure GetNode(var p: pointer; ch: char);
19. procedure Process(r: reference);
20. procedure UpdateNode(p: pointer; r: reference);
21. procedure NewWord(var p: pointer; r: reference);
22. procedure InsertTree(r, p: pointer);
23. procedure OutputTree(p: pointer);
24. procedure PutNode(p: pointer);
}
program IndexText(InText, InIndex, NewIndex, HashFile, NewHashFile,
input, output);
{Produces word counts and list of references for the document file
InText. Uses the master word list in file InIndex, if provided. Output word
list for new text goes to file NewIndex. HashFile contains the common words
to be ignored. If not specified, it is created on output, containing the
words so flagged by the user.}
{This implementation uses only phases 1 and 2. A smaller array of text files
is also used, as specified in the exercise section.}
const
maxwd = 20; {More letters in word will be ignored.}
minwd = 1; {Shorter words will be ignored.}
hashsize = 2003; {should be a prime}
linesperpage = 100; {set to a value not to interfere with TeX}
maxheight = 20; {for building binary tree in phase 2}
A = 'A';
Z = 'Z';
hyphen = '-';
blank = ' ';
apostrophe = ''''; {requires two `'s to represent one}
underscore = '_';
ordbackspace = 8; {ASCII control character for backspace}
ordformfeed = 12; {ASCII control character for new page}
changecase = 32; {ASCII difference between upper and lower case}
nfiles = 8; {number of temporary files for unprocessed words}
MaxRowLength = 130; {maximum length of output records}
type
word = packed array[1..maxwd] of char;
reference = record
wd: word;
pg: integer; {count or page number}
end;
fileref = file of reference; {used for local files}
letter = A..Z;
hashentry = 1..hashsize;
filecode = 1..nfiles;
var
InText, {document being processed}
InIndex, {master word list}
NewIndex, {word list of current document}
HashFile,
NewHashFile: text;
RefFile: array[filecode] of fileref; {local files used for auxilary
storage of words from phase 1 to phase 2:
Normally, a separate file exist for each initial letter,
this version uses nfiles files due operating system constraints.}
blankword: word; {will contain all blanks}
{The next two variables were originally declared in procedure SplitWords,
they have been moved to this level in order to access them globally.}
outcount: array[filecode] of integer; {counters for word files}
wordcount: integer; {count of all words in the text}
intextname,
inlistname,
newlistname,
newhashname: word; {used to get filename from user}
lastletter: array[filecode] of letter; {last letter in each file}
PresentTime,
StartTime: integer; {used to track CPU time}
RowLength: integer; {ensures records will not exceed MaxRowLength}
function Lt( u, v: word): Boolean;
{Determains if word u precedes word v lexicographically.}
begin
Lt := (u < v)
end;
procedure ReadWord( var F: text; var w: word);
{Reads word w from text file F. Assumes not at end of file.}
{Uses packed array, replace using a loop if your system does not
support packed arrays. }
begin {procedure ReadWord}
read(F, w)
end; {procedure ReadWord}
procedure WriteWord( var F: text; w: word);
{Writes word w to text file F}
{Uses packed array, replace using a loop if your system does not
support packed arrays. }
begin {procedure WriteWord}
write(F, w)
end; {procedure WriteWord}
procedure SetTimer; {Call once at beginning of program execution.}
{Finds the CPU time when called, and keeps in variables for reference.}
{System dependent procedure.}
begin
PresentTime := clock;
StartTime := PresentTime;
end;
function TotalTime: real;
{Returns the total CPU time, in seconds, since call to SetTimer.}
{System dependent procedure.}
begin
TotalTime := (clock - StartTime) / 1000.0;
end;
function ElapsedTime: real;
{Returns elapsed CPU time since last call to function ElapsedTime,
or call to SetTimer, whichever is more recent.}
{System dependent procedure.}
var r: integer;
begin
r := clock;
ElapsedTime := (r - PresentTime) / 1000.0;
PresentTime := r;
end;
procedure SplitWords;
{sets up hash table, reads text, and divides into nfiles word lists}
var
hash: array[hashentry] of reference; {hash table}
pagecount: integer; {keeps the current page number}
addpage: integer; {amount to increase pagecount after word}
linecount: integer; {lines on the current page}
w: word; {word currently being processed}
x: hashentry; {location of w, if in hash table}
endinput: Boolean; {true if and only if input has all been read}
code: filecode; {into which file does word go?}
{The following variables are kept for use in procedure GetWord, and for
efficiency are set up only once in procedure Initialize:}
backspace,
formfeed: char;
alphabet, {letters only - to start a word}
contchar: set of char; {other characters ok in middle of word}
function FindFile( ch: letter): filecode;
{Uses binary decision tree to select one of nfiles = 8 files depending
on the letter ch. These letters must be the same as those in the
global array lastletter .}
begin {function FindFile}
if ch < 'M' then
if ch < 'E' then
if ch < 'C' then FindFile := 1
else FindFile := 2
else if ch < 'H' then FindFile := 3
else FindFile := 4
else if ch < 'S' then
if ch < 'P' then FindFile := 5
else FindFile := 6
else if ch < 'T' then FindFile := 7
else FindFile := 8
end; {function FindFile}
function HashAddress(w: word): hashentry;
{calculates the location in hash table of word w, or, if not there,
returns pointing to the blank word where w should go}
var
x, {calculated location}
inc: integer; {increment for open addressing}
begin {function HashAddress}
x := abs(ord(w[1])*ord(w[2])+ord(w[4])+ord(w[6])) mod hashsize + 1;
{Hash function assumes long word length. For short word machines
we must ensure that the result is non-negative, and worry about overflow.}
if (hash[x].wd <> w) and (hash[x].wd <> blankword) then
begin
inc := (abs(ord(w[3])-95) mod 29);
{A key dependent increment is used to avoid clustering.}
repeat
inc := inc + 1;
if inc > hashsize then
writeln(w,' causes hash table to become full, infinite loop.');
x := x + inc;
if x > hashsize then x := x - hashsize;
until (w = hash[x].wd) or (blankword = hash[x].wd)
end;
HashAddress := x
end; {function HashAddress}
procedure Initialize;
{sets up constant-valued sets for use in GetWord. Opens the text file
and initializes various counters. Opens file holding hash table (if any),
and reads or otherwise initializes table}
var
i: integer; {general purpose loop control}
begin {procedure Initialize}
backspace:= chr(ordbackspace);
formfeed := chr(ordformfeed); {initialize ASCII control characters}
alphabet := ['A'..'Z', 'a'..'z']; {letters only, to start a word}
contchar := [hyphen, apostrophe]; {, backspace, underscore];}
{characters which will not terminate word}
for i := 1 to maxwd do
blankword[i] := blank;
write('Name of input text file?');
ReadWord(input, intextname); readln;
open(InText, intextname, readonly);
reset(InText);
endinput := eof(InText);
repeat
write( 'What is the page number on which the text begins?');
readln(pagecount);
if pagecount < 0 then
writeln('Must be a non-negative integer.')
until pagecount >= 0;
linecount := 0;
addpage := 0;
wordcount := 0;
for i := 1 to nfiles do
begin
rewrite( RefFile[i] );
outcount[i] := 0
end;
lastletter[1] := 'B';
lastletter[2] := 'D';
lastletter[3] := 'G';
lastletter[4] := 'L';
lastletter[5] := 'O';
lastletter[6] := 'R';
lastletter[7] := 'S';
lastletter[8] := 'Z';
reset(HashFile); {assumes HASHFILE.DAT is in current directory}
for i := 1 to hashsize do
with hash[i] do
begin
read(HashFile, pg);
get(HashFile); {skip the blank between number and word}
ReadWord(HashFile, wd);
readln(HashFile);
pg := 0; {initialize all the counts to 0}
end;
writeln('The hash table has been read.')
end; {procedure Initialize}
procedure GetWord( var w: word);
{Gets words from input file InText, and returns only words
at least minwd characters long. Parameter endinput becomes
true if and only if the end of InText is reached with no word to return.
the procedure also updates global variables wordcount and linecount,
updates the global variable pagecount after each linesperpage cr's,
or after each formfeed, whichever comes first, and
uses the sets alphabet and contchar and various character constants.}
label 1; {used by GetChar to exit procedure upon eof(InText)}
var c: 0..maxwd; {count of characters in word}
ch: char; {character currently processed}
endln: Boolean; {at the end of a line?}
procedure TellUserPage; {keep the user informed of progress}
var i: integer;
begin
i := pagecount + addpage;
writeln('At page', i:4, ' word count is', wordcount:7)
end;
procedure TeXCommand(var ch: char);
var
i: integer; {used to construct word 'page' in TeX command}
wd: packed array[1..4] of char; {holds word possibly = 'page'}
begin
ch := InText^; {This character will be deleted.}
get(InText); {Keep InText buffered one character ahead.}
if ch in alphabet then
begin {case: a word follows '\'}
i := 0;
while Intext^ in alphabet do {Delete all following letters}
begin
i := i + 1;
if i <= 4 then wd[i] := InText^;
get(InText)
end;
if (i = 4) and (wd = 'page') then
ch := formfeed
else begin {case: word after '\' not 'page'}
ch := InText^;
get(InText)
end {case: word after '\' not 'page'}
end {case: a word follows '\'}
else begin {case: character after '\' not a letter}
ch := Intext^;
get(InText) {Delete only one character more if it is not a letter.}
end {case: character after '\' not a letter}
end;
procedure GetChar(var ch: char);
{gets a character from input text into ch; checks for eof; updates
page count and line count; deletes all TeX commands}
begin {procedure GetChar}
if eof(InText) then
if c >= minwd then
ch := '.' {special character to end the current word}
else begin {no word to return; set endinput}
endinput := true;
goto 1 {exit from GetWord.}
end {no word to return}
else begin {not end of file: process next character}
while InText^ = backspace do {delete use of underscore; TeX subscript}
get( InText);
ch := InText^;
endln := eoln(InText);
get(InText);
while ch = '\' do
TeXCommand(ch);
if endln then
begin {case: end of line}
linecount := linecount + 1;
if linecount >= linesperpage then
begin
addpage := addpage + 1;
linecount := 0;
TellUserPage
end
end; {case: end of line}
if ch = formfeed then
begin {case: formfeed}
addpage := addpage + 1;
linecount := 0;
TellUserPage;
endln := true; {Treat formfeed like end of line.}
ch := blank
end {case: formfeed}
end {case: not at end of file}
end; {procedure GetChar}
procedure AddChar(ch: char);
{adds given character to word, if possible}
begin {procedure AddChar}
if c < maxwd then
begin
c := c + 1;
w[c] := ch
end
end; {procedure AddChar}
begin {procedure GetWord}
repeat {until current word is at least minwd chars long}
c := 0;
repeat
GetChar(ch) {Find a letter which will start the word.}
until ch in alphabet;
pagecount := pagecount + addpage;
addpage := 0;
if ch in ['a'..'z'] then {translate first letter to upper case.}
ch := chr(ord(ch) - changecase); {assumes ASCII ordering of letters}
AddChar(ch); {put first letter into the word}
GetChar(ch);
while (ch in alphabet) or (ch in contchar) do
if ch in alphabet then {add letters directly to word}
begin {processing letter}
AddChar(ch);
GetChar(ch)
end {processing letter}
else if ch = hyphen then
begin {processing hyphen}
GetChar(ch); {Find what comes after hyphen.}
if endln then
while ch = ' ' do
GetChar(ch) {Delete both the hyphen and the end of line}
else if ch = hyphen then {Two hyphens form a dash; ends word}
ch := blank {Use a blank to terminate the word.}
else if ch in alphabet then
AddChar(hyphen) {Include other hyphens in word}
else {nothing}
end {processing hyphen}
else if ch = apostrophe then
begin {processing apostrophe}
GetChar(ch);
if ch = 's' then {Delete `'s' at end of word only}
begin
GetChar(ch);
if ch in contchar then
begin
AddChar(apostrophe);
AddChar('s')
end
end
else if ch in alphabet then
AddChar(apostrophe) {Allow contractions.}
end {processing apostrophe}
else {Remaining possibilities are backspace and underscore.}
GetChar(ch); {Delete these characters.}
{While loop on continuing characters ends here.}
wordcount := wordcount + 1
until c >= minwd; {Skip over short words.}
while c < maxwd do {Fill with blanks.}
begin
c := c + 1;
w[c] := blank
end;
1: {When end of file occurs, program will exit to here from GetChar}
end; {procedure GetWord}
procedure Conclude;
{Writes out counts of various word lists. For some systems, it is
necessary to close files, which should be done here.}
var
i,j: integer; {loop index}
response: char; {user's answer to question}
begin {procedure Conclude}
writeln('The total number of words read in is ', wordcount:7);
writeln;
writeln('The number of words to process further in the next stage,');
writeln('in each temporary file, is below.');
writeln(' a-b c-d e-g h-l m-o p-r s t-z');
for i := 1 to nfiles do
write(outcount[i]:8);
writeln;
writeln;
repeat
write('Do you wish the counts from hash table to be kept in a file (y,n)?');
readln(response);
if response > 'Z' then response := chr(ord(response)-changecase)
until response in ['N', 'Y'];
if response = 'Y' then
begin
write('Name of file ?');
ReadWord(input, newhashname);
readln;
open(NewHashFile, newhashname, new);
rewrite(NewHashFile);
for i := 1 to hashsize do
with hash[i] do begin
write(NewHashFile, pg:4, ' ');
j := 1;
repeat
write(NewHashFile, wd[j]);
j := j + 1;
until (wd[j] = ' ') or (j >= maxwd);
writeln(NewHashFile)
end
end
end; {procedure Conclude}
begin {procedure SplitWords}
Initialize; {sets up files, hash table, constants}
GetWord(w); {obtain a single word from InText}
while not endinput do
begin
x := HashAddress(w);
if w = hash[x].wd then
hash[x].pg := hash[x].pg + 1
else begin {not in hash table; put into RefFile}
code := FindFile( w[1] );
outcount[code] := outcount[code] + 1;
with RefFile[code]^ do
begin
wd := w;
pg := pagecount
end;
Put(RefFile[code])
end;
GetWord(w);
end;
Conclude {writes word counts to output.}
end; {procedure SplitWords}
{start of phase 2}
procedure ClassifyWords;
{For each letter of the alphabet, the procedure reads in a list of
words from InIndex, builds them into a binary tree, supplements it
with entries from RefFile, and writes the result to files NewIndex
and NewHashFile.}
type
wordtype = (hash, count, page, question, index); {ways to process a word}
pointref = ^reflist;
reflist = record {list of references}
pg: integer;
next: pointref
end;
pointer = ^node;
node = record {vertex of the binary tree}
wd: word;
left,
right: pointer;
ct: integer;
case kind: wordtype of
hash, count:
();
page, question, index:
(ref: pointref)
end;
var
root: pointer; {root of binary tree}
code: filecode; {loop through temporary files}
endlist: Boolean; {at end of input word list?}
i: integer; {general purpose loop variable}
procedure BuildTree(var root: pointer; code: filecode);
{Reads a sequential file in alphabetical order, and converts it into
a binary search tree. Stops reading when the first letter of word
is after lastletter[code].
const maxheight = 20 (in main program) allows 512k entries.}
{This procedure was modified slightly to fit the needs of this application.
The parameters of GetNode now include a character ch, which has also
been introduced as a local variable.}
type
level = -1 .. maxheight; {number of steps above leaves}
var
lastnode: array[level] of pointer; {contains pointer to
last node processed on each level}
counter: integer; {number of nodes read in so far}
p: pointer; {p^ is present input node}
lev: level; {level of p^}
ch: char; {will be last letter to be processed.}
function Power2(c: integer): level;
{finds the highest power of 2 which divides c}
var
lev: level;
begin {function Power2}
lev := 0;
while not odd(c) do
begin
c := c div 2;
lev := lev + 1
end;
Power2 := lev
end; {function Power2}
procedure Insert(p: pointer);
{Inserts p^ as rightmost node of a partial binary search tree.}
var
lev: level; {level of p^}
begin {Procedure Insert}
lev := Power2(counter);
p^.right := nil;
p^.left := lastnode[lev - 1];
lastnode[lev] := p;
if lastnode[lev + 1] <> nil then
with lastnode[lev + 1]^ do
if right = nil then right := p
end; {Procedure Insert}
procedure FindRoot;
var
lev: level;
begin {Procedure FindRoot}
if counter = 0 then
root := nil {Tree is empty.}
else begin {Non-empty tree}
lev := maxheight; {Find the highest occupied level; it gives the root}
while lastnode[lev] = nil do lev := lev - 1;
root := lastnode[lev]
end
end; {Procedure FindRoot}
procedure ConnectSubtrees;
var
p: pointer;
lev: level;
s: level;
begin {Procedure ConnectSubtrees}
lev := maxheight;
while (lastnode[lev] = nil) and (lev > 1) do
lev := lev - 1; {Find the highest node: root}
while lev > 1 do {Nodes on levels 1 and 0 are already OK}
with lastnode[lev]^ do
if right <> nil then
lev := lev - 1 {Search down for the highest dangling node}
else begin {Case: right subtree is undefined.}
p := left; {Find the highest entry in lastnode that}
s := lev - 1; {is not in the left subtree.}
repeat
p := p^.right;
s := s - 1
until (p = nil) or (p <> lastnode[s]);
right := lastnode[s];
lev := s {Nodes on levels between lev and s are on the left.}
end {Connecting dangling subtrees}
end; {Procedure ConnectSubtrees}
procedure GetNode( var p: pointer; ch: char);
{reads a word from file InIndex and sets node correspondingly}
{returns p = nil at eof or when next word starts later than code.}
var
wordcode: char; {letter indicating type of word}
begin {procedure GetNode}
while InIndex^ = '&' do {ignore lines starting with '&'}
readln(InIndex);
while (not eof(InIndex)) and (InIndex^ = blank) do
get(InIndex); {Skip all leading blanks}
if endlist or eof(InIndex) then
p := nil
else if InIndex^ > ch then
p := nil
else begin
new(p);
with p^ do begin
ReadWord(InIndex, wd);
while (InIndex^ = ' ') and (not eoln(InIndex)) do
get(InIndex);
read(InIndex, wordcode);
ct := 0;
if wordcode in ['C', 'H','I','P','?'] then
case wordcode of
'C': kind := count;
'H': begin
writeln('Warning: The input word list contains ', wd);
writeln(' which belongs in the hash table.');
kind := hash
end;
'I': begin kind := index; ref := nil end;
'P': begin kind := page; ref := nil end;
'?': begin
writeln('Questionable word: ', wd, ' in word list.');
write('New category (P, I, C, H, ?');
repeat
readln(wordcode);
if wordcode > 'Z' then
wordcode := chr(ord(wordcode) - changecase)
until wordcode in ['H','C','P','?','I'];
case wordcode of
'H': kind := hash;
'C': kind := count;
'P', ' ': kind := page;
'?': kind := question;
'I': kind := index
end;
if kind in [page, question, index] then ref := nil
end
end
else
writeln('Erroneous word code ', wordcode, ' in file InIndex.')
end; {with statement setting up the node}
readln(InIndex); {Advance to the start of the next entry.}
endlist := eof(InIndex)
end
end; {procedure GetNode}
begin {procedure BuildTree}
for lev := -1 to maxheight do lastnode[lev] := nil;
counter := 0;
ch := lastletter[code];
GetNode(p, ch);
while p <> nil do
begin
counter := counter + 1;
Insert(p);
GetNode(p, ch)
end; {reading and processing input}
FindRoot;
ConnectSubtrees
end; {procedure BuildTree}
procedure Process( r: reference);
{Takes the word and page reference r, and updates the binary tree.}
var
p: pointer; {trace through the tree}
found: Boolean; {Is the word in the tree?}
procedure UpdateNode( p: pointer; r: reference);
{uses reference r to update information in node p^}
var
q: pointref; {used to add reference to list}
begin {procedure UpdateNode}
with p^ do
begin
ct := ct + 1;
if kind in [page, question, index] then
if ref = nil then
begin
new(ref);
ref^.pg := r.pg;
ref^.next := nil
end
else if ref^.pg <> r.pg then
begin {add the new reference to list.}
new(q);
q^.pg := r.pg;
q^.next := ref;
ref := q
end
end {with statement to update tree}
end; {procedure UpdateNode}
procedure NewWord(var p: pointer; r: reference);
{Creates a node for the first occurrence of a new reference r. A
pointer to the new node is returned in p.}
var
response: char; {answer received from user}
begin {procedure NewWord}
new(p);
with p^ do
begin
wd := r.wd;
left := nil;
right := nil;
ct := 1;
kind := question;
repeat {ask user what kind of word}
WriteWord(output, wd);
write(' is (H, C, P, ?, I)?');
readln(response);
if response > 'Z' then response := chr(ord(response) - changecase)
until response in ['H', 'C', 'P', ' ', '?', 'I'];
case response of
'H': kind := hash;
'C': kind := count;
'P', ' ': kind := page;
'?': begin
kind := question;
writeln('First occurence of word is on page', r.pg:5, '.')
end;
'I': kind := index
end; {case statement}
if kind in [page, question, index] then
begin
new(ref);
ref^.pg := r.pg;
ref^.next := nil;
end
end {with statement}
end; {procedure NewWord}
procedure InsertTree(r, p: pointer);
{adds a node p^ to the tree with root r^; requires that r <> nil
and p^ not be in the tree; proceeds by recursion}
begin {procedure InsertTree}
if Lt(p^.wd, r^.wd) then
if r^.left = nil then r^.left := p
else InsertTree(r^.left, p)
else
if r^.right = nil then r^.right := p
else InsertTree(r^.right, p)
end; {procedure InsertTree}
begin {procedure Process}
if root = nil then {The tree might be empty.}
NewWord(root, r)
else begin {case of non-empty tree}
p := root; {Begin a tree search.}
found := false;
repeat
if r.wd = p^.wd then
found := true
else if Lt(r.wd,p^.wd) then
p := p^.left
else
p := p^.right
until found or (p = nil);
if found then UpdateNode(p, r)
else begin {p^ was not found: add it to the tree.}
NewWord(p, r);
InsertTree(root, p)
end
end
end; {procedure Process}
procedure OutputTree( p: pointer);
{traverses the tree for which p^ is the root in inorder}
procedure PutNode( p: pointer);
{Puts the information in p^ into the file NewIndex.}
var
q: pointref; {used to traverse list of references}
response: char;
begin {procedure PutNode}
with p^ do if ct > 0 then
begin {Otherwise, word is not in document.}
if kind <> hash then
WriteWord(NewIndex, wd);
case kind of
hash: begin {new hash entries written to NewHashFile}
write(NewHashFile, ct, ' ');
WriteWord(NewHashFile, wd);
writeln(NewHashFile)
end;
count: write(NewIndex, 'C');
page: write(NewIndex, 'P');
index: write(NewIndex, 'I');
question:
begin
repeat {ask user what kind of word}
WriteWord(output, wd);
write(' is questionable. Change to (h, c, p, ?, i)?');
readln(response);
if response > 'Z' then response := chr(ord(response) - changecase)
until response in ['H', 'C', 'P',' ', '?', 'I'];
case response of
'H': begin kind := hash; write(NewIndex, 'H') end;
'C': begin kind := count; write(NewIndex, 'C') end;
'P', ' ': begin kind := page; write(NewIndex, 'P') end;
'I': begin kind := index; write(NewIndex, 'I') end;
'?': begin
kind := question;
write(NewIndex, '?');
write('The word appears on the following page(s)');
q := ref;
repeat
write(q^.pg:6);
q := q^.next
until q = nil;
writeln
end {case of questionable word}
end {case response statement}
end {treating new or question words}
end; {case kind statement}
if kind <> hash then
write(NewIndex, ct:6);
if kind in [page, question, index] then
begin
q := ref;
RowLength := 28; {ensures that record will not exceed desired length}
repeat
if RowLength > (MaxRowLength - 4) then
begin
writeln(NewIndex);
write(NewIndex,'& '); {& indicates continuation of index}
RowLength := 3
end;
write( NewIndex, q^.pg:4);
q := q^.next;
RowLength := RowLength + 4
until q = nil;
end;
if kind <> hash then
writeln( NewIndex )
end {with statement and if statement}
end; {procedure PutNode}
begin {procedure OutputTree}
if p <> nil then
with p^ do
begin
OutputTree(left); {Traverse the left subtree}
PutNode(p);
OutputTree(right); {Traverse the right subtree}
dispose(p)
end
end; {procedure OutputTree}
begin {procedure ClassifyWords}
write('Name of input word list ?');
ReadWord(input, inlistname);
readln;
open(InIndex, inlistname, readonly);
reset(InIndex);
endlist := eof(InIndex);
write('Name of output word list ?');
ReadWord(input, newlistname);
readln;
open(NewIndex, newlistname, new);
rewrite(NewIndex);
writeln('At the appearance of each word, indicate its disposition:');
writeln(' H - Place this word in hash table and count its frequency.');
writeln(' C - Count how many times this word appears.');
writeln(' P - List pages on which this word appears.');
writeln(' ? - Question this word: list pages on which it appears.');
writeln(' I - Index this word: list pages on which it appears.');
for code := 1 to nfiles do {start main loop through temporary files.}
begin
BuildTree(root, code); {Get the part of master wordlist starting with
code from the file InIndex, and build it into a binary tree.}
reset(RefFile[code]);
for i := 1 to outcount[code] do
begin
Process(RefFile[code]^);
{use new words from RefFile[code] to update the tree.}
get( RefFile[code] )
end;
OutputTree(root)
{write the contents of the tree into file NewIndex.}
end {main loop on temporary files}
end; {procedure ClassifyWords}
{end of all procedures}
begin {main program}
SetTimer;
SplitWords; {Phase 1}
writeln('Time in first phase is ', ElapsedTime:7:1, ' seconds.');
writeln;
ClassifyWords; {Phase 2}
writeln('Time in second phase is', ElapsedTime:7:1, ' seconds.');
writeln;
writeln('Processing of input document ', intextname, ' is complete.');
writeln('Total time in program was ', TotalTime:7:1, ' seconds.')
end.