home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power CD-ROM!! 7
/
POWERCD7.ISO
/
prgmming
/
clipper
/
levensht.prg
< prev
next >
Wrap
Text File
|
1993-10-14
|
5KB
|
197 lines
/*
* GT CLIPPER STANDARD HEADER
*
* File......: levensht.prg
* Author....: Andy M Leighton
* BBS.......: The Dark Knight Returns
* Net/Node..: 050/069
* User Name.: Andy Leighton
* Date......: $Date$
* Revision..: $Revision$
*
* This is an original work by Andy Leighton and is placed in the
* public domain.
*
* Modification history:
* ---------------------
*
* $Log$
*
*/
/* $DOC$
* $FUNCNAME$
* GT_LEVDIST()
* $CATEGORY$
* String
* $ONELINER$
* Return the Levenshtein distance between two strings
* $SYNTAX$
* GT_LevDist(<cStrA>, <cStrB>) --> nDist
* $ARGUMENTS$
* <cStrA> - First string
* <cStrB> - Second String
* $RETURNS$
* nDist - Levenshtein Distance.
* $DESCRIPTION$
* Return the Levenshtein distance between two strings
*
* The Levenshtein distance between two strings is the
* minimum number of operations (Insertions, Deletions or
* Substitutions) to transform one string to the other. The
* algoritm used to solve this problem is a prime example of
* dynamic programming. It is very similar approach to
* solving the shortest distance between two cities
* connected by a network of roads.
*
* The algorithm is of order O(nm) where n and m are the
* lengths of the two strings. Therefore when the two
* strings are equal the algoritm is of order O(n²) so you
* should check for equality first before calling
* GT_LevDist()
*
* REFERENCES
* Doctor Dobb's Journal #187, April 1992
*
* IMPROVEMENTS
* The main improvements in this routine will be made by
* introducing a more complex operation costing.
*
* ie. have three matrices that record the cost of adding or
* deleting a specific letter, and substituting a
* specified letter with another.
*
* More improvements can be achieved but at the loss of the
* general purpose of the routine.
*
* This is left as an exercise for the foolhardy or brave.
* $EXAMPLES$
* ? GT_LevDist("Axolotl", "Axl Rose") // prints 5
*
* Axolotl -> Axl Rose in 5 operations
*
* Axolotl
* Axolote SUB e for l
* Axolose SUB s for t
* AxoRose SUB R for l
* Ax Rose SUB space for o
* Axl Rose INS L
* $SEEALSO$
* GT_LEVCOST()
* $END$
*/
#include "gt_LIB.ch"
#include "levensht.ch"
static matrix := {}
function GT_LevDist(cA, cB)
local nDistance
gGT_initMatrix(cA, cB)
gGT_calcMatrix(cA, cB)
nDistance := DIST(len(cB) + 1, len(cA) + 1)
matrix := NIL
return nDistance
/*
* INTERNAL FUNCTION: gGT_initMatrix()
*
* initialise the matrix before calculating Levenshtein distance
*/
static function gGT_initMatrix(cA, cB)
local i, j
matrix := array(len(cA) + 1, len(cB) + 1, 2)
for i := 1 to len(cA) + 1
matrix[i][1][1] := i - 1
matrix[i][1][2] := LV_INS
next
for i := 1 to len(cB) + 1
matrix[1][i][1] := i - 1
matrix[1][i][2] := LV_DEL
for j := 2 to len(cA) + 1
matrix[j][i][1] := 0
matrix[j][i][2] := 0
next
next
return NIL
/*
* INTERNAL FUNCTION: gGT_calcMatrix()
*
* calculate the matrix of distances
*/
static function gGT_calcMatrix(cA, cB)
local nRow
local nCol
for nCol := 2 to len(cA) + 1
for nRow := 2 to len(cB) + 1
gGT_calcCell(cA, cB, nRow, nCol)
next
next
return NIL
/*
* INTERNAL FUNCTION: gGT_calcCell()
*
* calculate just one cell of the matrix.
* this is where all the work is done.
*
* To step through this whole algorithm in english text would
* be much longer than the code below and IMHO would be much
* harder to follow. It also involves a mathematical expression
* that is difficult to represent in straight ASCII.
*/
static function gGT_calcCell(cA, cB, row, col)
local nDistNW := DIST(row - 1, col - 1)
local nDistW := DIST(row, col - 1)
local nDistN := DIST(row - 1, col)
local nOp
if nDistW < nDistN
if nDistW < nDistNW
matrix[col][row][2] := LV_INS
matrix[col][row][1] := nDistW + gGT_Cost(LV_INS)
else
if substr(cA, col - 1, 1) == substr(cB, row - 1, 1)
nOp := LV_MATCH
else
nOp := LV_SUB
endif
matrix[col][row][2] := nOp
matrix[col][row][1] := nDistNW + gGT_Cost(nOp)
endif
else
if nDistN < nDistNW
matrix[col][row][2] := LV_DEL
matrix[col][row][1] := nDistN + gGT_Cost(LV_DEL)
else
if substr(cA, col - 1, 1) == substr(cB, row - 1, 1)
nOp := LV_MATCH
else
nOp := LV_SUB
endif
matrix[col][row][2] := nOp
matrix[col][row][1] := nDistNW + gGT_Cost(nOp)
endif
endif
return NIL