home *** CD-ROM | disk | FTP | other *** search
- {$N-} {No numeric coprocessor}
- {$S-}
- {$V-}
-
- PROGRAM SPELLER2; { SPELL CHECKER -- with cmd line }
-
- { This spell checker is based on the ideas contained in PC-SPELL ver
- 1.15 in BASIC by Andy Wildenberg. In that program the text file is
- read into memory and put into a list of words in a string array. The
- string array is then sorted and the unique words removed into
- another array. Thus a unique word array is formed which is in
- alphabetical order. This word list is then compared to a dictionary
- file which is an ASCII list of legal words also in alphabetical
- order. If the word is not found then it is placed into a file of
- possible misspelled words on disk. The user is then responsible for
- printing the list of misspelled words and using a global change
- feature in a word processor to find and replace the words with the
- correct spelling.
-
- This spell checker works in much the same way except that a unique
- word file is formed in an array alphabetically as the text file is
- parsed into words. The rest of the process is about the same.
-
- To use, just type the name of the program followed by a parameter
- specifying the file. The parameter is optional and if ommitted then the
- program will request this name.
-
- Version SPELLER2 is compatible with WINDOWS facilities and adds a alternate
- dictionary file as an optional second parameter on the command line. This
- version has been converted to compile with Turbo Pascal version 4.x.
-
- J. Leeson, March 29, 1988
- }
- { *************************************************************************}
-
- CONST
- WORDSIZE : integer = 16;
-
- TYPE
- STRPARAM = string;
- WORDTYP = string [16];
- WORDPTR = ^WORDTYP;
- PTRARRAY = array [0..4000] of WORDPTR; {Limited to 8191 because the
- Move function requires an integer parameter
- for length in bytes of data to move. SPELLERW
- reduced to 4000 to reduce memory requirements.}
-
- VAR
- SRCNAME : string; { Name of source file to spell check }
- ALTDICNAME : string; { Name of the alternate dictionary file }
- OPPATH : string; { DOS path for speller files }
- OPNAME : string; { DOS name for speller files }
- OUTNAME : string; { Name of output file ( default srcfile.MIS) }
- DOCWORDCNT, UNIQUECNT, MISSPELLCNT : integer;
- I : integer;
- WORDINDX : PTRARRAY;
- A_WORD, ALTWORD, TEMP1 : WORDTYP;
- PREFIX : string [1];
- MATCH, ALTDIC : boolean;
- SRCFILE, DICFILE, ALTDICFILE, MISSFILE : text;
- ABuf, SBuf : array[0..$fff] of char; {buffers for source and altdic}
- DBuf : array[0..$1fff] of char; {buffer for dictionary file}
- x : byte;
- PATHSTRING : string; { working storage for path strings }
-
- FUNCTION LOWCASE (var A : char) : boolean;
- { *************************************************************************}
- { LOWCASE modifies the character parameter "A" to make it a lower case
- alpha character if it is an upper case alpha. If the character
- parameter is alpha ('a'..'z' or 'A'..'Z') then the function returns
- TRUE else it returns FALSE. }
- { *************************************************************************}
- var x : byte;
-
- begin
- x := ord (A);
- if (x>96) and (x<123) then LOWCASE := true
- else begin
- if (x>64) and (x<91) then
- begin
- A := chr (x+32);
- LOWCASE := true;
- end
- else LOWCASE := false;
- end;
- end; { of LOWCASE }
-
- PROCEDURE GETWORD (var FILNAME : text; var A_WORD : WORDTYP);
- { *************************************************************************}
- {GETWORD version 1.2. Defines the start of a word as the next alpha
- character in the file. A word is formed by adding characters until a
- non-alpha character is found. Contractions are accepted as identified by
- a single quote followed by an alpha character occuring after the SOW.
- Upper case letters are converted to lower case.}
- { *************************************************************************}
- VAR
- CH, CH2 : char;
- SOW : boolean;
- {Global WORDSIZE = maximum word length value.}
- begin
- SOW := false;
- A_WORD := '';
- repeat
- read (FILNAME, CH);
- if LOWCASE (CH) then SOW := true
- until SOW or eof (FILNAME);
- if SOW then
- begin
- A_WORD := CH;
- repeat
- read (FILNAME, CH);
- if LOWCASE (CH) then
- begin
- if Length (A_WORD) < WORDSIZE then A_WORD := A_WORD + CH
- else SOW := false;
- end
- else begin
- if CH <> '''' then SOW := false
- else begin
- if not Eof (FILNAME) then
- begin
- Read (FILNAME, CH2);
- if LOWCASE (CH2) then
- begin
- if Length (A_WORD) < WORDSIZE-1 then
- A_WORD := A_WORD + CH + CH2 else SOW := false;
- end
- else SOW := false;
- end;
- end;
- end;
- until (not SOW) or eof (FILNAME);
- end;
- end; { of GETWORD }
-
- procedure ADDUNIQUE (var LIST : PTRARRAY; A_WORD : WORDTYP; var TOP : integer);
- { ***************************************************************************}
- { This procedure does a binary search of the LIST looking for the location
- where A_WORD belongs. Once it finds the place, if A_WORD is there then it
- exits. If not, then it moves the list up by one pointer and puts the new
- word there.}
- { ***************************************************************************}
- var
- SEARCH : boolean;
- MID, LOW, HIGH, COUNT : integer;
-
- begin
- SEARCH := true;
- LOW := 0; MID := Trunc (TOP/2); HIGH := TOP;
- while SEARCH do {** Find the place where A_WORD belongs. **}
- begin
- if MID = LOW then SEARCH := false
- else begin
- if A_WORD < LIST [MID]^ then HIGH := MID
- else LOW := MID; {** A_WORD is >= word at LIST [MID]^ **}
- MID := LOW + Trunc ((HIGH-LOW)/2);
- end;
- end; {** of SEARCH. MID is at the location containing A_WORD or else
- A_WORD goes at the location after MID. **}
- if A_WORD <> LIST [MID]^ then begin
- COUNT := 4*(TOP-MID);
- MID := MID+1;
- Move (LIST [MID], LIST [MID+1], COUNT);
- TOP := TOP+1;
- new (LIST [MID]);
- LIST [MID]^ := A_WORD;
- end;
- end;
-
- Function DosPath : string;
- { **************************************************************************}
- { This function extracts the 'PATH =' string from the DOS environment passed
- by DOS to the applications program and returns the string, else returns nul.
- Restructured for 4.0 Turbo Pascal.}
- { **************************************************************************}
- type
- AThing = ^EnvThing;
- EnvThing = array[1..255] of char; {It's a buncha ASCIIZ strings}
- var
- I : word;
- X : integer;
- DosEnvSeg : word;
- DosEnvPtr : AThing;
- DosEnv : EnvThing;
- PathString, EnvString : string;
- begin
- DosEnvSeg := MemW[PrefixSeg:$002c]; {Segment passed by DOS is here}
- I := 0;
- PathString := '';
- repeat {DOS always passes a COMSPEC= environment string}
- DosEnvPtr := Ptr(DosEnvSeg,I);
- EnvString := DosEnvPtr^;
- length(EnvString) := pos(chr(0), EnvString)-1; {ASCIIZ strings}
- {If two consecutive zero bytes then length(EnvString) will be zero}
- I := I+length(EnvString)+1; {Moves the pointer to next string}
- X := Pos('PATH=',EnvString);
- if X <> 0 then PathString :=
- copy(EnvString, X+5, length(EnvString)-(X+4));
- until (PathString <> '') or (EnvString = ''); {Two zero bytes end it}
- DosPath := PathString;
- end;
-
- Function ParsePath (Var PATHSTRING : STRPARAM) : string;
- { ***************************************************************************}
- { This function returns the first substring of PATHSTRING which is terminated
- by the end of the string or by a semicolon. It then alters the input variable
- PATHSTRING to remove this part of the string. Thus subsequent calls to
- ParsePath will return one part of the parameter string until it is all gone
- and will then return a nul string. }
- { ***************************************************************************}
- var
- x : integer;
- begin
- if length (PATHSTRING) = 0 then ParsePath := '' else begin
- x := Pos (';',PATHSTRING);
- if x=0 then begin
- ParsePath := PATHSTRING;
- PATHSTRING := '';
- end
- else begin
- ParsePath := Copy (PATHSTRING, 1, x-1);
- PATHSTRING := Copy (PATHSTRING, x+1, Length (PATHSTRING));
- end;
- end;
- end;
-
- begin { ******************************************************************}
- { ******** MAIN PROGRAM ***********}
- { ******************************************************************}
-
- DOCWORDCNT := 0; MISSPELLCNT := 0; ALTDIC := true;
- if ParamCount = 0 then begin
- write ('Source file : ');
- readln (SRCNAME);
- ALTDICNAME := '';
- write ('Alternate dictionary : ');
- readln (ALTDICNAME);
- end
- else begin
- SRCNAME := ParamStr (1);
- if ParamCount = 1 then ALTDICNAME := '' else ALTDICNAME := ParamStr (2);
- end;
- if ALTDICNAME = '' then ALTDIC := false;
- assign (SRCFILE, SRCNAME);
- SetTextBuf (SRCFILE, SBuf); {Turbo 4.0 setup for I/O buffers}
- {$I-} reset (SRCFILE) {$I+};
- if IOResult <> 0 then begin
- writeln ('Unable to read the source file. Aborting SPELLER.');
- exit;
- end;
- if ALTDIC then begin
- assign (ALTDICFILE, ALTDICNAME);
- SetTextBuf (ALTDICFILE, ABuf); {Turbo 4.0 setup for I/O buffers}
- {$I-} reset (ALTDICFILE) {$I+};
- if IOResult > 0 then begin
- writeln ('Alternate dictionary not found.');
- ALTDIC := false;
- end;
- end;
- { Find the dictionary file in the current directory on the default
- drive or else go searching for it using the DOS PATH command to
- find drives and directories to search. }
- PATHSTRING := DosPath;
- MATCH := false;
- OPPATH := '';
- PREFIX := '';
- while MATCH = false do begin
- OPNAME := OPPATH + PREFIX + 'SPELLER.LIS';
- assign (DICFILE, OPNAME);
- SetTextBuf (DICFILE, DBuf); {Turbo 4.0 setup for I/O buffers}
- {$I-} reset (DICFILE) {$I+};
- x := IOResult;
- MATCH := (x=0);
- OPPATH := ParsePath (PATHSTRING);
- if OPPATH = '' then MATCH := true
- else begin
- if (Pos (':',OPPATH) = Length (OPPATH)) or
- (Pos ('\',OPPATH) = Length (OPPATH)) then PREFIX := ''
- else PREFIX := '\';
- end;
- end;
- if x<>0 then begin {I/O error... file not found usually}
- writeln;
- writeln ('Unable to locate the spelling list. Aborting SPELLER.');
- close (SRCFILE);
- if ALTDIC then close (ALTDICFILE); {Don't close it if it isn't open}
- exit;
- end;
- I := Pos ('.',SRCNAME);
- if I = 0 then OUTNAME := SRCNAME + '.MIS'
- else OUTNAME := Copy (SRCNAME, 1, I-1) + '.MIS';
- assign (MISSFILE, OUTNAME);
- {$I-} rewrite (MISSFILE) {$I+};
- if IOResult <> 0 then begin
- writeln;
- writeln ('Unable to open the output file. Error code is ',x);
- {DOS error code for 4.0 Turbo Pascal}
- writeln ('Program terminating.');
- close (SRCFILE);
- close (DICFILE);
- if ALTDIC then close (ALTDICFILE); {Don't close it if it isn't open}
- exit;
- end;
-
- { If no EXIT's were encountered in getting the files opened then we
- continue here with the files all open. }
-
- Writeln ('READING ',SRCNAME);
- UNIQUECNT := 1;
- New (WORDINDX [1]);
- WORDINDX [2] := nil;
- WORDINDX [1]^ := '~';
- while not eof (SRCFILE) do begin
- GETWORD (SRCFILE, A_WORD);
- if Length (A_WORD) > 1 then begin {Don't spell check one letter words}
- DOCWORDCNT := DOCWORDCNT + 1;
- ADDUNIQUE (WORDINDX, A_WORD, UNIQUECNT);
- end;
- end;
- Close (SRCFILE);
- {*** Check against dictionary ***}
- writeln ('CHECKING SPELLING');
- I := 1;
- A_WORD := ''; ALTWORD := '';
- while I <= UNIQUECNT-1 do begin {dump the ~ at the end of the list}
- while (A_WORD < WORDINDX [I]^) and not Eof (DICFILE) do
- Readln (DICFILE, A_WORD);
- if A_WORD <> WORDINDX [I]^ then begin
- if ALTDIC then
- while (ALTWORD < WORDINDX[I]^) and not Eof (ALTDICFILE) do
- ReadLn (ALTDICFILE, ALTWORD);
- if ALTWORD <> WORDINDX [I]^ then begin
- Writeln (MISSFILE, WORDINDX [I]^);
- MISSPELLCNT := MISSPELLCNT +1;
- end;
- end;
- I := I + 1;
- end { while I <= ... };
- Close (DICFILE);
- Write (MISSFILE, Chr (26));
- Close (MISSFILE);
- if ALTDIC then close (ALTDICFILE); {Don't close it if it aint open}
- writeln;
- writeln ('Speller done. Statistics:');
- writeln (' Source file: ', SRCNAME);
- writeln (' Total words: ', DOCWORDCNT);
- writeln (' Unique words: ', UNIQUECNT);
- writeln (' Spelling errors: ', MISSPELLCNT);
- End.