home *** CD-ROM | disk | FTP | other *** search
/ OS/2 Shareware BBS: 10 Tools / 10-Tools.zip / snip9707.zip / BMHASRCH.C < prev    next >
C/C++ Source or Header  |  1997-07-05  |  4KB  |  107 lines

  1. /* +++Date last modified: 05-Jul-1997 */
  2.  
  3. /*
  4. **  Boyer-Moore-Horspool pattern match
  5. **  Case-insensitive with accented character translation
  6. **
  7. **  public domain by Raymond Gardner 7/92
  8. **
  9. **  limitation: pattern length + subject length must be less than 32767
  10. **
  11. **  10/21/93 rdg  Fixed bug found by Jeff Dunlop
  12. */
  13. #include <limits.h>                                         /* rdg 10/93 */
  14. #include <stddef.h>
  15. #include <string.h>
  16.  
  17. typedef unsigned char uchar;
  18.  
  19. #define LOWER_ACCENTED_CHARS
  20.  
  21. unsigned char lowervec[UCHAR_MAX+1] = {                     /* rdg 10/93 */
  22.   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
  23.  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  24.  32,'!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
  25. '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
  26. '@','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
  27. 'p','q','r','s','t','u','v','w','x','y','z','[','\\',']','^','_',
  28. '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
  29. 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~',127,
  30. #ifdef LOWER_ACCENTED_CHARS
  31. 'c','u','e','a','a','a','a','c','e','e','e','i','i','i','a','a',
  32. 'e',145,146,'o','o','o','u','u','y','o','u',155,156,157,158,159,
  33. 'a','i','o','u','n','n',166,167,168,169,170,171,172,173,174,175,
  34. #else
  35. 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  36. 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  37. 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  38. #endif
  39. 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  40. 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  41. 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  42. 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  43. 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
  44. };
  45.  
  46. #define lowerc(c) lowervec[(uchar)(c)]
  47.  
  48. #define LARGE 32767
  49.  
  50. static int patlen;
  51. static int skip[UCHAR_MAX+1];                               /* rdg 10/93 */
  52. static int skip2;
  53. static uchar *pat;
  54.  
  55. void bmha_init(const char *pattern)
  56. {
  57.       int i, j;
  58.       pat = (uchar *)pattern;
  59.       patlen = strlen(pattern);
  60.       for (i = 0; i <= UCHAR_MAX; ++i)                      /* rdg 10/93 */
  61.       {
  62.             skip[i] = patlen;
  63.             for (j = patlen - 1; j >= 0; --j)
  64.             {
  65.                   if (lowerc(i) == lowerc(pat[j]))
  66.                         break;
  67.             }
  68.             if (j >= 0)
  69.                   skip[i] = patlen - j - 1;
  70.             if (j == patlen - 1)
  71.                   skip[i] = LARGE;
  72.       }
  73.       skip2 = patlen;
  74.       for (i = 0; i < patlen - 1; ++i)
  75.       {
  76.             if ( lowerc(pat[i]) == lowerc(pat[patlen - 1]) )
  77.                   skip2 = patlen - i - 1;
  78.       }
  79. }
  80.  
  81. char *bmha_search(const char *string, const int stringlen)
  82. {
  83.       int i, j;
  84.       char *s;
  85.  
  86.       i = patlen - 1 - stringlen;
  87.       if (i >= 0)
  88.             return NULL;
  89.       string += stringlen;
  90.       for ( ;; )
  91.       {
  92.             while ((i += skip[((uchar *)string)[i]]) < 0)
  93.                   ;                           /* mighty fast inner loop */
  94.             if (i < (LARGE - stringlen))
  95.                   return NULL;
  96.             i -= LARGE;
  97.             j = patlen - 1;
  98.             s = (char *)string + (i - j);
  99.             while (--j >= 0 && lowerc(s[j]) == lowerc(pat[j]))
  100.                   ;
  101.             if ( j < 0 )                                    /* rdg 10/93 */
  102.                   return s;                                 /* rdg 10/93 */
  103.             if ( (i += skip2) >= 0 )                        /* rdg 10/93 */
  104.                   return NULL;                              /* rdg 10/93 */
  105.       }
  106. }
  107.