AmigActive 2

home *** CD-ROM | disk | FTP | other *** search

/ AmigActive 2 / AACD 2.iso / AACD / Programming / jikes-1.02 / src / spell.h < prev next >

Wrap

C/C++ Source or Header | 1999-08-26 | 3.8 KB | 130 lines

// $Id: spell.h,v 1.3 1999/08/26 15:34:10 shields Exp $ // // This software is subject to the terms of the IBM Jikes Compiler // License Agreement available at the following URL: // http://www.ibm.com/research/jikes. // Copyright (C) 1996, 1999, International Business Machines Corporation // and others. All Rights Reserved. // You must accept the terms of that agreement to use this software. // #ifndef spell_INCLUDED #define spell_INCLUDED #include "config.h" #include "case.h" class Spell { static inline int Min(int x, int y) { return (x < y ? x : y); } public: static int Index(wchar_t *str1, wchar_t *str2) { int len1 = wcslen(str1), len2 = wcslen(str2); wchar_t *s1 = new wchar_t[len1 + 1], *s2 = new wchar_t[len2 + 1]; for (int i = 0; i < len1; i++) s1[i] = Case::ToAsciiLower(str1[i]); s1[len1] = U_NULL; for (int j = 0; j < len2; j++) s2[j] = Case::ToAsciiLower(str2[j]); s2[len2] = U_NULL; if (len1 == 1 && len2 == 1) { // // Singleton mispellings: // // ; <----> , // // ; <----> : // // . <----> , // // ' <----> " // if ((s1[0] == U_SEMICOLON && s2[0] == U_COMMA) || (s1[0] == U_COMMA && s2[0] == U_SEMICOLON) || (s1[0] == U_SEMICOLON && s2[0] == U_COLON) || (s1[0] == U_COLON && s2[0] == U_SEMICOLON) || (s1[0] == U_DOT && s2[0] == U_COMMA) || (s1[0] == U_COMMA && s2[0] == U_DOT) || (s1[0] == U_SINGLE_QUOTE && s2[0] == U_DOUBLE_QUOTE) || (s1[0] == U_DOUBLE_QUOTE && s2[0] == U_SINGLE_QUOTE)) return 3; } // // Scan the two strings. Increment "match" count for each match. // When a transposition is encountered, increase "match" count // by two but count it as an error. When a typo is found, skip // it and count it as an error. Otherwise we have a mismatch; if // one of the strings is longer, increment its index, otherwise, // increment both indices and continue. // // This algorithm is an adaptation of a boolean misspelling // algorithm proposed by Juergen Uhl. // int count = 0, prefix_length = 0, num_errors = 0, i1 = 0, i2 = 0; while ((i1 < len1) && (i2 < len2)) { if (s1[i1] == s2[i2]) { count++; i1++; i2++; if (num_errors == 0) prefix_length++; } else if (s1[i1 + 1] == s2[i2] && s1[i1] == s2[i2 + 1]) { count += 2; i1 += 2; i2 += 2; num_errors++; } else if (s1[i1 + 1] == s2[i2 + 1]) { i1++; i2++; num_errors++; } else { if ((len1 - i1) > (len2 - i2)) i1++; else if ((len2 - i2) > (len1 - i1)) i2++; else { i1++; i2++; } num_errors++; } } if (i1 < len1 || i2 < len2) num_errors++; if (num_errors > (Min(len1, len2) / 6 + 1)) count = prefix_length; delete [] s1; delete [] s2; return (count * 10 / (len1 + num_errors)); } }; #endif // #ifndef spell_INCLUDED