home *** CD-ROM | disk | FTP | other *** search
/ Monster Media 1994 #1 / monster.zip / monster / PROG_C / SNIP9404.ZIP / BMHASRCH.C < prev    next >
C/C++ Source or Header  |  1994-04-03  |  4KB  |  105 lines

  1. /*
  2. **  Boyer-Moore-Horspool pattern match
  3. **  Case-insensitive with accented character translation
  4. **
  5. **  public domain by Raymond Gardner 7/92
  6. **
  7. **  limitation: pattern length + subject length must be less than 32767
  8. **
  9. **  10/21/93 rdg  Fixed bug found by Jeff Dunlop
  10. */
  11. #include <limits.h>                                         /* rdg 10/93 */
  12. #include <stddef.h>
  13. #include <string.h>
  14.  
  15. typedef unsigned char uchar;
  16.  
  17. #define LOWER_ACCENTED_CHARS
  18.  
  19. unsigned char lowervec[UCHAR_MAX+1] = {                     /* rdg 10/93 */
  20.   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
  21.  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  22.  32,'!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
  23. '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
  24. '@','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
  25. 'p','q','r','s','t','u','v','w','x','y','z','[','\\',']','^','_',
  26. '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
  27. 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~',127,
  28. #ifdef LOWER_ACCENTED_CHARS
  29. 'c','u','e','a','a','a','a','c','e','e','e','i','i','i','a','a',
  30. 'e',145,146,'o','o','o','u','u','y','o','u',155,156,157,158,159,
  31. 'a','i','o','u','n','n',166,167,168,169,170,171,172,173,174,175,
  32. #else
  33. 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  34. 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  35. 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  36. #endif
  37. 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  38. 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  39. 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  40. 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  41. 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
  42. };
  43.  
  44. #define lowerc(c) lowervec[(uchar)(c)]
  45.  
  46. #define LARGE 32767
  47.  
  48. static int patlen;
  49. static int skip[UCHAR_MAX+1];                               /* rdg 10/93 */
  50. static int skip2;
  51. static uchar *pat;
  52.  
  53. void bmh_init(const char *pattern)
  54. {
  55.       int i, j;
  56.       pat = (uchar *)pattern;
  57.       patlen = strlen(pattern);
  58.       for (i = 0; i <= UCHAR_MAX; ++i)                      /* rdg 10/93 */
  59.       {
  60.             skip[i] = patlen;
  61.             for (j = patlen - 1; j >= 0; --j)
  62.             {
  63.                   if (lowerc(i) == lowerc(pat[j]))
  64.                         break;
  65.             }
  66.             if (j >= 0)
  67.                   skip[i] = patlen - j - 1;
  68.             if (j == patlen - 1)
  69.                   skip[i] = LARGE;
  70.       }
  71.       skip2 = patlen;
  72.       for (i = 0; i < patlen - 1; ++i)
  73.       {
  74.             if ( lowerc(pat[i]) == lowerc(pat[patlen - 1]) )
  75.                   skip2 = patlen - i - 1;
  76.       }
  77. }
  78.  
  79. char *bmh_search(const char *string, const int stringlen)
  80. {
  81.       int i, j;
  82.       char *s;
  83.  
  84.       i = patlen - 1 - stringlen;
  85.       if (i >= 0)
  86.             return NULL;
  87.       string += stringlen;
  88.       for ( ;; )
  89.       {
  90.             while ((i += skip[((uchar *)string)[i]]) < 0)
  91.                   ;                           /* mighty fast inner loop */
  92.             if (i < (LARGE - stringlen))
  93.                   return NULL;
  94.             i -= LARGE;
  95.             j = patlen - 1;
  96.             s = (char *)string + (i - j);
  97.             while (--j >= 0 && lowerc(s[j]) == lowerc(pat[j]))
  98.                   ;
  99.             if ( j < 0 )                                    /* rdg 10/93 */
  100.                   return s;                                 /* rdg 10/93 */
  101.             if ( (i += skip2) >= 0 )                        /* rdg 10/93 */
  102.                   return NULL;                              /* rdg 10/93 */
  103.       }
  104. }
  105.