home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power CD-ROM!! 7
/
POWERCD7.ISO
/
prgmming
/
clipper
/
fonetix.prg
< prev
next >
Wrap
Text File
|
1993-10-14
|
13KB
|
453 lines
/*
* GT CLIPPER STANDARD HEADER
*
* File......: fonetix.prg
* Author....: Andy M Leighton
* BBS.......: The Dark Knight Returns
* Net/Node..: 050/069
* User Name.: Andy Leighton
* Date......: $Date$
* Revision..: $Revision$
*
* This is an original work by Andy Leighton and is placed in the
* public domain.
*
* Modification history:
* ---------------------
*
* $Log$
*
*/
/* $DOC$
* $FUNCNAME$
* GT_FONETIX()
* $CATEGORY$
* String
* $ONELINER$
* Make a phonetic match string
* $SYNTAX$
* GT_Fonetix(<cStr>) --> cFonStr
* $ARGUMENTS$
* <cStr> - The input string
* $RETURNS$
* cFonStr - A phonetic representation of the input string
* $DESCRIPTION$
* Make a phonetic match string for a passed string.
* Only works in english. Words with foreign roots
* may not match very well. Words with more than one
* syllable also translate poorly in some cases.
* However this approach seems to match most intelligent
* misspellings and most of those made by children.
* Note the order in which these rules are applied are
* important, nevertheless please feel free to experiment
* with reordering or even adding new rules, after all you
* know what kind of strings/names are going to be matched.
*
* Rules for producing the phonetic string
*
* 1. Uppercase <cStr>
*
* 2. Replace KN with N (Knight)
*
* 3. Replace GN with N (Gnome)
*
* 4. Replace WR with R (Wright)
*
* 5. Replace WH with H if WH is followed by O
*
* 6. Replace WH with W if WH isn't follwed by O
* (Whately)
*
* 7. Replace MC with MK
*
* 8. Replace MAC with MK
*
* 9. Replace EIGH with AY
* (Can't have it not matching *my* name :-)
*
* 10. Replace IGHT with ITE (Wright)
*
* 11. Replace C with S if followed by E, I or Y
*
* 12. Replace C with K if not followed by E, I, Y, H
*
* 13. Replace D[JG] with J
*
* 14. Replace G with J if followed by E, I, or Y
*
* 15. Replace GH with H
*
* 16. Replace PH with F
*
* 17. Replace Q with KW
*
* 18. Replace TI with SH if it is followed by a vowel and
* is not at start of a word
*
* 19. Replace X with KS
*
* 20 Replace Y with I if it is not the first or last
* character
*
* 21. Replace Z with S
*
* 22. Replace MB with M if MB is at end of string (thanks METAPH.PRG)
*
* 23. Replace double consonants with just one of 'em
*
* 24. Replace AIT with ATE
*
* 25. Replace IE at end of word with Y
*
* 26. Replace LE with L
*
* 27. TERN at end of word gets replaced with TN
*
* 28. EVE is replaced with EFE
*
* 29 Replace the schwa sound with "" if succeeded by
* R, L, M, N
*
* 30. Replace SCH with SH
*
* 31. ARY, IRY, ORY, ERY all get replaced with RY if
* at end of word
*
* 32. Replace OO with U
*
* 33. Replace OI with OY
*
* 34. Remove vowels at end of word
*
* 35. Replace all vowel groups with just the first
* vowel in the group (you can try last as well)
*
* $EXAMPLES$
* use PERSONS // assume a personnel table
*
* index on GT_fonetix(PERSONS->LAST_NAME) to LASTNAME
*
* seek GT_fonetix("Leighton")
* ? found(), PERSONS_LAST_NAME // .T., Leighton
*
* seek GT_fonetix("Layton")
* ? found(), PERSONS->LAST_NAME // .T., Leighton
*
* Also compile with -DTEST
* $REFERENCES$
* .EXE Magazine Vol 4, Issue 3
* Soundex()
* METAPH.PRG in nanfor.lib
* $END$
*/
#include "gt_LIB.ch"
// a translate to make the code read nicer
#translate REPLACERULE(<cStr>, <cPhoneme>, <cRepl>) => ;
iif(<cPhoneme> $ <cStr>, ;
strtran(<cStr>, <cPhoneme>, <cRepl>), ;
<cStr>)
/*
* TEST HARNESS
*
* create a tbrowse of names, and their GT_Fonetix() equivalents
*/
#ifdef TEST
#include "inkey.ch"
static aNames := {}
function main()
local tb, i, nKey
cls
aadd(aNames, "Adams" )
aadd(aNames, "Addams" )
aadd(aNames, "Smith" )
aadd(aNames, "Smythe" )
aadd(aNames, "Naylor" )
aadd(aNames, "Nailer" )
aadd(aNames, "Holberry" )
aadd(aNames, "Wholebary" )
aadd(aNames, "Jackson" )
aadd(aNames, "Jaksun" )
aadd(aNames, "Fischer" )
aadd(aNames, "Fisher" )
aadd(aNames, "Knight" )
aadd(aNames, "Nite" )
aadd(aNames, "Stephens" )
aadd(aNames, "Stevens" )
aadd(aNames, "Neilson" )
aadd(aNames, "Nelson" )
aadd(aNames, "Wright" )
aadd(aNames, "Write" )
aadd(aNames, "Right" )
aadd(aNames, "McLean" )
aadd(aNames, "McLane" )
aadd(aNames, "Maclean" )
aadd(aNames, "Leighton" )
aadd(aNames, "Layton" )
aadd(aNames, "Whately" )
aadd(aNames, "Waitly" )
aadd(aNames, "Swaine" )
aadd(aNames, "Swane" )
aadd(aNames, "Codie" )
aadd(aNames, "Cody" )
aadd(aNames, "Griffon" )
aadd(aNames, "Griphon" )
aadd(aNames, "Gryphon" )
aadd(aNames, "Pearson" )
aadd(aNames, "Peerson" )
aadd(aNames, "Peersun" )
aadd(aNames, "Chilton" )
aadd(aNames, "Chiltern" )
aadd(aNames, "Chiltun" )
i := 1
tb := tbrowseNew(1, 1, 23, 78)
tb:addColumn(tbColumnNew(padc("Name", 25), ;
{|| padc(aNames[i], 25)}))
tb:addColumn(tbColumnNew(padc("Phoneme", 25), ;
{|| padc(GT_FONETIX(aNames[i]), 25)}))
tb:skipBlock := {|SkipCnt| SkipIt(@i, SkipCnt, len(aNames)) }
tb:goTopBlock := {|| i := 1}
tb:goBottomBlock := {|| i := len(aNames)}
do while lastkey() != K_ESC
do while nextkey() = 0 .and. !tb:stabilize()
enddo
nKey = inkey(0)
do case
case nKey = K_DOWN
tb:down()
case nKey = K_UP
tb:up()
case nKey = K_PGDN
tb:pagedown()
case nKey = K_PGUP
tb:pageup()
case nKey = K_CTRL_PGUP
tb:gotop()
case nKey = K_CTRL_PGDN
tb:gobottom()
endcase
enddo
cls
return NIL
static function SkipIt(ele, skip_cnt, maxval)
local movement := 0 // this will be returned to TBROWSE
if skip_cnt > 0
do while ele + movement < maxval .and. movement < skip_cnt
movement++
enddo
elseif skip_cnt < 0
do while ele + movement > 1 .and. movement > skip_cnt
movement--
enddo
endif
ele += movement
return movement
#endif
/**/
function GT_fonetix(cStr)
local cPhone := upper(cStr)
local nPtr := 1
cPhone := REPLACERULE(cPhone, "KN", "N")
cPhone := REPLACERULE(cPhone, "GN", "N")
cPhone := REPLACERULE(cPhone, "WR", "R")
cPhone := REPLACERULE(cPhone, "WHO", "HO")
cPhone := REPLACERULE(cPhone, "WH", "W")
cPhone := REPLACERULE(cPhone, "MAC", "MK")
cPhone := REPLACERULE(cPhone, "MC", "MK")
cPhone := REPLACERULE(cPhone, "EIGH", "AY")
cPhone := REPLACERULE(cPhone, "IGHT", "ITE")
cPhone := REPLACERULE(cPhone, "CE", "S")
cPhone := REPLACERULE(cPhone, "CI", "S")
cPhone := REPLACERULE(cPhone, "CY", "S")
/*
* do not split the following 3 rules on pain of death
*/
cPhone := REPLACERULE(cPhone, "CH", "||")
cPhone := REPLACERULE(cPhone, "C", "K")
cPhone := REPLACERULE(cPhone, "||", "CH")
cPhone := REPLACERULE(cPhone, "DG", "J")
cPhone := REPLACERULE(cPhone, "DJ", "J")
cPhone := REPLACERULE(cPhone, "GE", "JE")
cPhone := REPLACERULE(cPhone, "GI", "JY")
cPhone := REPLACERULE(cPhone, "GY", "JY")
cPhone := REPLACERULE(cPhone, "GH", "H")
cPhone := REPLACERULE(cPhone, "PH", "F")
cPhone := REPLACERULE(cPhone, "Q", "KW")
/*
* do not split the TI rules on pain of death
*/
if substr(cPhone, 1, 2) == "TI"
cPhone := "||" + substr(cPhone, 3)
endif
cPhone := REPLACERULE(cPhone, "TION", "SHUN")
cPhone := REPLACERULE(cPhone, "TIA", "SHA")
cPhone := REPLACERULE(cPhone, "TIE", "SHE")
cPhone := REPLACERULE(cPhone, "TII", "SHI") // ???
cPhone := REPLACERULE(cPhone, "TIO", "SHO")
cPhone := REPLACERULE(cPhone, "TIU", "SHU")
cPhone := REPLACERULE(cPhone, "||", "TI")
cPhone := REPLACERULE(cPhone, "X", "KS")
/*
* do not split the Y rules on pain of death
*/
if substr(cPhone, 1, 1) == "Y"
cPhone := "|" + substr(cPhone, 2)
endif
if substr(cPhone, len(cPhone), 1) == "Y"
cPhone := substr(cPhone, 1, len(cPhone) - 1) + '|'
endif
cPhone := REPLACERULE(cPhone, "Y", "I")
cPhone := REPLACERULE(cPhone, "|", "Y")
cPhone := REPLACERULE(cPhone, "Z", "S")
if substr(cPhone, len(cPhone) - 1, 2) == 'MB'
cPhone := substr(cPhone, 1, len(cPhone) - 1)
endif
/*
* double consonants NOTE no C, Q, X, or Z
* they have been replaced away already
*/
cPhone := REPLACERULE(cPhone, "BB", "B")
cPhone := REPLACERULE(cPhone, "DD", "D")
cPhone := REPLACERULE(cPhone, "FF", "F")
cPhone := REPLACERULE(cPhone, "GG", "G")
cPhone := REPLACERULE(cPhone, "HH", "H")
cPhone := REPLACERULE(cPhone, "JJ", "J")
cPhone := REPLACERULE(cPhone, "KK", "K")
cPhone := REPLACERULE(cPhone, "LL", "L")
cPhone := REPLACERULE(cPhone, "MM", "M")
cPhone := REPLACERULE(cPhone, "NN", "N")
cPhone := REPLACERULE(cPhone, "PP", "P")
cPhone := REPLACERULE(cPhone, "RR", "R")
cPhone := REPLACERULE(cPhone, "SS", "S")
cPhone := REPLACERULE(cPhone, "TT", "T")
cPhone := REPLACERULE(cPhone, "VV", "V")
cPhone := REPLACERULE(cPhone, "WW", "W")
cPhone := REPLACERULE(cPhone, "LE", "L")
if substr(cPhone, len(cPhone) - 3, 4) == 'TERN'
cPhone := substr(cPhone, 1, len(cPhone) - 4) + "TN"
endif
cPhone := REPLACERULE(cPhone, "EVE", "EFE")
cPhone := gGT_FoneSchwa(cPhone, "E", "R")
cPhone := gGT_FoneSchwa(cPhone, "E", "L")
cPhone := gGT_FoneSchwa(cPhone, "E", "M")
cPhone := gGT_FoneSchwa(cPhone, "E", "N")
cPhone := gGT_FoneSchwa(cPhone, "OU", "R")
cPhone := gGT_FoneSchwa(cPhone, "OU", "L")
cPhone := gGT_FoneSchwa(cPhone, "OU", "M")
cPhone := gGT_FoneSchwa(cPhone, "OU", "N")
cPhone := gGT_FoneSchwa(cPhone, "O", "R")
cPhone := gGT_FoneSchwa(cPhone, "O", "L")
cPhone := gGT_FoneSchwa(cPhone, "O", "M")
cPhone := gGT_FoneSchwa(cPhone, "O", "N")
cPhone := gGT_FoneSchwa(cPhone, "I", "R")
cPhone := gGT_FoneSchwa(cPhone, "I", "L")
cPhone := gGT_FoneSchwa(cPhone, "I", "M")
cPhone := gGT_FoneSchwa(cPhone, "I", "N")
cPhone := gGT_FoneSchwa(cPhone, "U", "R")
cPhone := gGT_FoneSchwa(cPhone, "U", "L")
cPhone := gGT_FoneSchwa(cPhone, "U", "M")
cPhone := gGT_FoneSchwa(cPhone, "U", "N")
cPhone := REPLACERULE(cPhone, "SCH", "SH")
cPhone := REPLACERULE(cPhone, "OO", "U")
cPhone := REPLACERULE(cPhone, "OI", "OY")
if substr(cPhone, len(cPhone) - 1, 2) == 'IE'
cPhone := substr(cPhone, 1, len(cPhone) - 2) + "Y"
endif
if substr(cPhone, len(cPhone) - 1, 2) == 'RY'
if substr(cPhone, len(cPhone) - 2, 1) $ [AEIOUY]
cPhone := substr(cPhone, 1, len(cPhone) - 3) + "RY"
endif
endif
/*
* remove trailing vowels
*/
do while substr(cPhone, len(cPhone), 1) $ [AEIOU]
cPhone := substr(cPhone, 1, len(cPhone) - 1)
enddo
do while nPtr <= len(cPhone)
if substr(cPhone, nPtr, 1) $ [AEIOU]
do while substr(cPhone, nPtr + 1, 1) $ [AEIOU]
cPhone := substr(cPhone, 1, nPtr) + substr(cPhone, nPtr + 2)
enddo
endif
nPtr++
enddo
return cPhone
/*
* Internal Function: gGT_FoneSchwa()
*
* handle a schwa phoneme.
*
* A schwa is the er or uh sound for example the o in carbon.
*
*/
function gGT_FoneSchwa(cPhone, cSchwa, cFollow)
local nPos
do while (cSchwa + cFollow) $ cPhone
nPos := at(cSchwa + cFollow, cPhone)
if substr(cPhone, nPos - 1, 1) $ [BDGFJKLMPRSTVW]
cPhone := substr(cPhone, 1, nPos - 1) +;
substr(cPhone, nPos + len(cSchwa))
else
exit
endif
enddo
return cPhone