home *** CD-ROM | disk | FTP | other *** search
- Path: sparky!uunet!usc!sdd.hp.com!nigel.msen.com!fmsrl7!lynx.unm.edu!mimbres.cs.unm.edu!bbx!tantalum!edsr!bjk
- From: bjk@edsr.eds.com (Brian Kodl)
- Newsgroups: comp.lang.perl
- Subject: Re: SOUNDEX pattern matching
- Summary: perl library module
- Keywords: soundex, perl
- Message-ID: <1452@ares.edsr.eds.com>
- Date: 20 Jan 93 18:58:39 GMT
- References: <1jfjejINNm1q@fernwood.mpk.ca.us>
- Organization: EDS Research, Dallas, TX 75230
- Lines: 61
-
-
- Here is a perl required file containing a soundex routine that
- was originally posted towards the beginning of last year. It has
- been altered slightly up by Don Krause, a colleague, and myself.
-
- Standard disclaimer: use at your own risk, we are not to be held
- responsible for the actions of the code, blah, blah, blah... :-)
-
- ----- cut here -------
-
- # soundex.pl
- # by George Armhold <armhold@dimacs.rutgers.edu>
- # 3/22/92
-
- # return the Soundex value of a string using the following rules:
- #
- # 1) remove W and H
- # 2) recode characters per table:
- # A E I O U Y 0
- # B F P V 1
- # C G J K Q S X Z 2
- # D T 3
- # L 4
- # M N 5
- # R 6
- #
- # 3) remove all vowels except in the first position (A E I O U Y)
- # 4) if two adjacent digits are now identical, remove one
- # 5) truncate to eight digits or pad out the result with zeroes to
- # make eight digits
- # 6) replace the first digit with the first character from the
- # original word
-
- sub soundex {
- # takes a string as an argument, and returns its soundex value
-
- local($pattern) = @_;
-
- # upper-case the pattern to normalize matches
- $pattern =~ tr/a-z/A-Z/;
-
- # save first char
- local($first) = substr($pattern, 0, 1);
-
- # remove all but alphanumerics, and H,W
- $pattern =~ tr/A-GI-VX-Z0-9//cd;
-
- # replaces letters with numbers and squish identical numbers
- $pattern =~ tr/BFPVCGJKQSXZDTLMNR0-9/1111222222223345560-9/ds;
-
- # remove all vowels after 1st letter
- substr($pattern, 1, length($pattern)) =~ tr/AEIOUY//d;
-
- # replace first letter
- substr($pattern, 0, 1) = $first;
-
- # pad on zeroes if necessary and truncate
- substr($pattern."00000000", 0, 8);
- }
-
- 1; # because this is a require'd file
-