home *** CD-ROM | disk | FTP | other *** search
- /*
- l_distance() Determins to what extent two character strings
- are (un)equal using the 'Levenstein distance'
- algorithm.
-
- Limitation: For memory space conservation and execution speed
- this implementation compares the first 20
- characters of both string only if longer strings
- are supplied to l_distance().
- '#define COMP_LEN' may be changed to decrease or
- increase the number of characters compared.
-
- De-commenting the following '#define DEBUG' will
- create a stand-alone program, which enables you
- to experiment with different values for the
- constants 'addition', 'change' and 'deletion'.
- */
-
- /* #define DEBUG */
-
- #include <stdlib.h>
- #include <string.h>
-
- #ifdef DEBUG
- #include <stdio.h>
- #endif
-
- int addition = 1; /* change constants in this block */
- int change = 3; /* of four lines if needed. */
- int deletion = 5;
- #define COMP_LEN 20
-
- #define ARR_SIZE COMP_LEN + 1
- #define SMALLEST_OF(x,y,z) ( (x<y) ? min(x,z) : min(y,z) )
- #define ZERO_IF_EQUAL(x,y) (requested[x-1] == found[y-1] ? 0 : change)
-
- int distance[ARR_SIZE][ARR_SIZE];
-
-
- int l_distance(char *requested, char *found)
- {
- register int i,j;
- int r_len, f_len;
-
- r_len = (strlen(requested)>COMP_LEN ? COMP_LEN : strlen(requested));
- f_len = (strlen(found)>COMP_LEN ? COMP_LEN : strlen(found));
-
- distance[0][0] = 0;
- for (j = 1; j <= ARR_SIZE ; j++)
- distance[0][j] = distance[0][j-1] + addition;
- for (j = 1; j <= ARR_SIZE ; j++)
- distance[j][0] = distance[j-1][0] + deletion;
-
- for (i = 1; i <= r_len; i++)
- for (j = 1; j <= f_len; j++)
- distance[i][j] = SMALLEST_OF(
- (distance[i-1][j-1] + ZERO_IF_EQUAL(i,j)),
- (distance[i][j-1] + addition),
- (distance[i-1][j] + deletion) );
-
- #ifdef DEBUG
- printf("\n");
- for (i = 1; i <= r_len; i++) {
- for (j = 1; j <= f_len; j++)
- printf(" %3d",distance[i][j]);
- printf("\n");
- }
- #endif
-
- return( distance [r_len] [f_len] );
- }
-
- #ifdef DEBUG
- void usage()
- {
- printf("\nUsage: LD requested found\n");
- printf("\n requested & found are the two strings");
- printf("\n to be compared with the Levenstein distance");
- printf("\n algorithm.\n\n");
- }
-
- int main(int argc, char *argv[])
- {
- int result;
-
- if (argc != 3) {
- usage();
- exit(1);
- } else {
- printf("\nComparing '%s' and '%s':\n", argv[1], argv[2]);
- result = l_distance(argv[1], argv[2]);
- printf("\nResult = %d\n", result);
- }
- return(0);
- }
- #endif
-