home *** CD-ROM | disk | FTP | other *** search
- { SOUNDEX.INC }
- { A Turbo Pascal implementation of the Soundex algorithm }
- { The basic Soundex algorithm may be found in Vol. 3 of }
- { Don Knuth's Algorithms. As described, an input string }
- { is converted to a string of the form XYYY, where: }
- { X = the first letter }
- { YYY = the Soundex values the next several non- }
- { vowels; doubled values of Y are ignored. }
- { This implementation will return the Soundex code as }
- { defined by Knuth. In addition, it will return two }
- { variations: first character replaced by its Soundex }
- { value, and retain all vowels. }
-
-
- type
- bigstr =string [255];
-
- const
- code_table : array [0..25] of char =
- ('0', '1', '2', '3', '0', '1', '2', '0',
- { A B C D E F G H }
- '0', '2', '2', '4', '5', '5', '0', '1',
- { I J K L M N O P }
- '2', '6', '2', '3', '0', '1', '0', '2',
- { Q R S T U V W X }
- '0', '2' );
- { Y Z }
-
- procedure soundex (var name , sx : bigstr; first, vowels : boolean;
- size : integer);
-
- { name = string to be converted
- sx = output Soundex string
- first = true to include first character, false to include
- first character's Soundex code
- vowels = true to retain all vowels
- size = Maximum length of the SX string; 0 implies no limt
-
- Using first = false, vowels = false, size = 4 will produce Knuth's
- result.
- }
-
- var
- ch : char; { Holds characters from the input string }
- tv : char; { Holds Soundex value of current character }
- pv : char; { Holds Soundex value of previous character }
- i : integer; { Used to index through the input string }
- j : integer; { Index into the output string, sx }
-
- begin
- if size = 0
- then size := length (name);
-
- ch := ' ';
- i := 0;
-
- while ((i <= length (name)) and not (ch in ['A'..'Z'])) do
- begin
- i := i + 1;
- ch := upcase (name [i]);
- end;
-
- pv := code_table [ord (ch) - ord ('A')];
-
- if first
- then sx [1] := ch
- else sx [1] := pv;
-
-
- if i <= length (name)
- then j := 1
- else j := 0;
-
- for i := i + 1 to length (name) do
- begin
- ch := upcase (name [i]);
-
- if ch in ['A'..'Z']
- then begin
- if vowels or not (ch in ['A', 'E', 'I', 'O', 'U', 'W', 'Y'])
- then begin
- tv := code_table [ord (ch) - ord ('A')];
-
- if tv <> pv
- then begin
- j := j + 1;
- sx [j] := tv;
- pv := tv;
- end;
- end;
- end;
- end;
-
- if j > size { Limit length of result }
- then j := size;
-
- sx [0] := chr (j); { Store length of output string }
- end; { soundex () }
-