home *** CD-ROM | disk | FTP | other *** search
/ The Developer Connection…ice Driver Kit for OS/2 3 / DEV3-D1.ISO / source / util2src / regexp.h < prev    next >
Encoding:
C/C++ Source or Header  |  1991-04-04  |  15.1 KB  |  453 lines

  1. /* Module @(#)regexp.h  1.1 last modified 5/25/90 09:20:38 */
  2. /*
  3. * 5713-AEQ COPYRIGHT IBM CORP 1989
  4. * LICENSED MATERIAL - PROGRAM PROPERTY OF IBM
  5. *
  6. * 11/30/89 (BEY) Removed 'register' declarations.  'register c' is not 'char c'
  7. * 12/01/89 (BEY) Redefined compile() to return 'int' instead of 'char *'.
  8. *                Also, data declarations and functions are defined as static
  9. *                to hide them from external source modules.
  10. * 12/08/89 (BEY) Added prototypes to all functions' parameters, for strong
  11. *                type checking.  Also, the H_SCCSID line was enclosed in
  12. *                a comment for portability to other compiler environments.
  13. * 12/11/89 (BEY) Added typecasts to avoid compiler warnings.  The original
  14. *                code is commented out and the new lines are marked with '!!!'.
  15. * 04/04/91 (BEY) Converted to DOS text format and compiled using C/2.
  16. */
  17. /*H_SCCSID(@(#)regexp.h   1.11    AIX)    Modified 2/20/89 19:38:09 */
  18. /* @(#)regexp.h 7.1 - 87/06/16 - 00:12:28 */
  19. #define CBRA    2
  20. #define CCHR    4
  21. #define CDOT    8
  22. #define CCL     12
  23. #define CDOL    20
  24. #define CCEOF   22
  25. #define CKET    24
  26. #define CBACK   36
  27.  
  28. #define STAR    01
  29. #define RNGE    03
  30.  
  31. #define NBRA    9
  32.  
  33. #define PLACE(c)        ep[c >> 3] |= bittab[c & 07]
  34. #define ISTHERE(c)      (ep[c >> 3] & bittab[c & 07])
  35.  
  36. static char    *braslist[NBRA];
  37. static char    *braelist[NBRA];
  38. static char    *loc1, *loc2, *locs;
  39. static char    bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
  40.  
  41. static int     sed, nbra;
  42. static int     ebra;
  43. static int     nodelim;
  44. static int     circf;
  45. static int     low;
  46. static int     size;
  47.  
  48. /* Prototypes for functions in this .h file */
  49.  
  50. static int compile(char *, char *, char *, int);
  51. static int step(char *, char *);
  52. static int advance(char *, char *);
  53. static int getrnge(char *);
  54. static int ecmp(char *, char *, int);
  55.  
  56. /*char * */
  57. static int
  58. compile(instring, ep, endbuf, seof)
  59. char *ep;
  60. char *instring, *endbuf;
  61. int   seof;
  62. {
  63.         INIT    /* Dependent declarations and initializations */
  64.         char c;
  65.         int eof = seof;
  66.         char *lastep = instring;
  67.         int cclcnt;
  68.         char bracket[NBRA], *bracketp;
  69.         int closed;
  70.         char neg;
  71.         int lc;
  72.         int i, cflg;
  73.  
  74.         lastep = 0;
  75.         if((c = GETC()) == eof || c == '\n') {
  76.                 if(c == '\n') {
  77.                         UNGETC(c);
  78.                         nodelim = 1;
  79.                 }
  80.                 if(*ep == 0 && !sed)
  81.                         ERROR(41);
  82.                 RETURN(ep);
  83.         }
  84.         bracketp = bracket;
  85.         circf = closed = nbra = ebra = 0;
  86.         if(c == '^')
  87.                 circf++;
  88.         else
  89.                 UNGETC(c);
  90.         while(1) {
  91.                 if(ep >= endbuf)
  92.                         ERROR(50);
  93.                 c = GETC();
  94.                 if(c != '*' && ((c != '\\') || (PEEKC() != '{')))
  95.                         lastep = ep;
  96.                 if(c == eof) {
  97.                         *ep++ = CCEOF;
  98.                         RETURN(ep);
  99.                 }
  100.                 switch(c) {
  101.  
  102.                 case '.':
  103.                         *ep++ = CDOT;
  104.                         continue;
  105.  
  106.                 case '\n':
  107.                         if(!sed) {
  108.                                 UNGETC(c);
  109.                                 *ep++ = CCEOF;
  110.                                 nodelim = 1;
  111.                                 RETURN(ep);
  112.                         }
  113.                         else ERROR(36);
  114.                 case '*':
  115.                         if(lastep == 0 || *lastep == CBRA || *lastep == CKET)
  116.                                 goto defchar;
  117.                         *lastep |= STAR;
  118.                         continue;
  119.  
  120.                 case '$':
  121.                         if(PEEKC() != eof && PEEKC() != '\n')
  122.                                 goto defchar;
  123.                         *ep++ = CDOL;
  124.                         continue;
  125.  
  126.                 case '[':
  127.                         if(&ep[17] >= endbuf)
  128.                                 ERROR(50);
  129.  
  130.                         *ep++ = CCL;
  131.                         lc = 0;
  132.                         for(i = 0; i < 16; i++)
  133.                                 ep[i] = 0;
  134.  
  135.                         neg = 0;
  136.                         if((c = GETC()) == '^') {
  137.                                 neg = 1;
  138.                                 c = GETC();
  139.                         }
  140.  
  141.                         do {
  142.                                 if(c == '\0' || c == '\n')
  143.                                         ERROR(49);
  144.                                 if(c == '-' && lc != 0) {
  145.                                         if((c = GETC()) == ']') {
  146.                                                 PLACE('-');
  147.                                                 break;
  148.                                         }
  149.                                         while(lc < c) {
  150.                                                 PLACE(lc);
  151.                                                 lc++;
  152.                                         }
  153.                                 }
  154.                                 lc = c;
  155.                                 PLACE(c);
  156.                         } while((c = GETC()) != ']');
  157.                         if(neg) {
  158.                                 for(cclcnt = 0; cclcnt < 16; cclcnt++)
  159.                                         ep[cclcnt] ^= -1;
  160.                                 ep[0] &= 0376;
  161.                         }
  162.  
  163.                         ep += 16;
  164.  
  165.                         continue;
  166.  
  167.                 case '\\':
  168.                         switch(c = GETC()) {
  169.  
  170.                         case '(':
  171.                                 if(nbra >= NBRA)
  172.                                         ERROR(43);
  173.                                 *bracketp++ = nbra;
  174.                                 *ep++ = CBRA;
  175.                             /*  *ep++ = nbra++;  */
  176.                                 *ep++ = (char)nbra;  /*!!!*/
  177.                                 nbra++;              /*!!!*/
  178.                                 continue;
  179.  
  180.                         case ')':
  181.                                 if(bracketp <= bracket || ++ebra != nbra)
  182.                                         ERROR(42);
  183.                                 *ep++ = CKET;
  184.                                 *ep++ = *--bracketp;
  185.                                 closed++;
  186.                                 continue;
  187.  
  188.                         case '{':
  189.                                 if(lastep == (char *) 0)
  190.                                         goto defchar;
  191.                                 *lastep |= RNGE;
  192.                                 cflg = 0;
  193.                         nlim:
  194.                                 c = GETC();
  195.                                 i = 0;
  196.                                 do {
  197.                                         if('0' <= c && c <= '9')
  198.                                                 i = 10 * i + c - '0';
  199.                                         else
  200.                                                 ERROR(16);
  201.                                 } while(((c = GETC()) != '\\') && (c != ','));
  202.                                 if(i > 255)
  203.                                         ERROR(11);
  204.                             /*  *ep++ = i;    */
  205.                                 *ep++ = (char)i;   /*!!!*/
  206.                                 if(c == ',') {
  207.                                         if(cflg++)
  208.                                                 ERROR(44);
  209.                                         if((c = GETC()) == '\\')
  210.                                                 *ep++ = 255;
  211.                                         else {
  212.                                                 UNGETC(c);
  213.                                                 goto nlim;
  214.                                                 /* get 2'nd number */
  215.                                         }
  216.                                 }
  217.                                 if(GETC() != '}')
  218.                                         ERROR(45);
  219.                                 if(!cflg)       /* one number */
  220.                                         *ep++ = i;
  221.                                 else if((ep[-1] & 0377) < (ep[-2] & 0377))
  222.                                         ERROR(46);
  223.                                 continue;
  224.  
  225.                         case '\n':
  226.                                 ERROR(36);
  227.  
  228.                         case 'n':
  229.                                 c = '\n';
  230.                                 goto defchar;
  231.  
  232.                         default:
  233.                                 if(c >= '1' && c <= '9') {
  234.                                     /*  if((c -= '1') >= closed) */
  235.                                         if((c -= (int)'1') >= closed)   /*!!!*/
  236.                                                 ERROR(25);
  237.                                         *ep++ = CBACK;
  238.                                         *ep++ = c;
  239.                                         continue;
  240.                                 }
  241.                         }
  242.         /* Drop through to default to use \ to turn off special chars */
  243.  
  244.                 defchar:
  245.                 default:
  246.                         lastep = ep;
  247.                         *ep++ = CCHR;
  248.                         *ep++ = c;
  249.                 }
  250.         }
  251. }
  252.  
  253. static int
  254. step(p1, p2)
  255. char *p1, *p2;
  256. {
  257.         char c;
  258.  
  259.         if(circf) {
  260.                 loc1 = p1;
  261.                 return(advance(p1, p2));
  262.         }
  263.         /* fast check for first character */
  264.         if(*p2 == CCHR) {
  265.                 c = p2[1];
  266.                 do {
  267.                         if(*p1 != c)
  268.                                 continue;
  269.                         if(advance(p1, p2)) {
  270.                                 loc1 = p1;
  271.                                 return(1);
  272.                         }
  273.                 } while(*p1++);
  274.                 return(0);
  275.         }
  276.                 /* regular algorithm */
  277.         do {
  278.                 if(advance(p1, p2)) {
  279.                         loc1 = p1;
  280.                         return(1);
  281.                 }
  282.         } while(*p1++);
  283.         return(0);
  284. }
  285.  
  286. static int
  287. advance(lp, ep)
  288. char *lp, *ep;
  289. {
  290.         char *curlp;
  291.         char c;
  292.         char *bbeg;
  293.         int ct;
  294.  
  295.         while(1) {
  296.                 switch(*ep++) {
  297.  
  298.                 case CCHR:
  299.                         if(*ep++ == *lp++)
  300.                                 continue;
  301.                         return(0);
  302.  
  303.                 case CDOT:
  304.                         if(*lp++)
  305.                                 continue;
  306.                         return(0);
  307.  
  308.                 case CDOL:
  309.                         if(*lp == 0)
  310.                                 continue;
  311.                         return(0);
  312.  
  313.                 case CCEOF:
  314.                         loc2 = lp;
  315.                         return(1);
  316.  
  317.                 case CCL:
  318.                         c = *lp++ & 0177;
  319.                         if(ISTHERE(c)) {
  320.                                 ep += 16;
  321.                                 continue;
  322.                         }
  323.                         return(0);
  324.                 case CBRA:
  325.                         braslist[*ep++] = lp;
  326.                         continue;
  327.  
  328.                 case CKET:
  329.                         braelist[*ep++] = lp;
  330.                         continue;
  331.  
  332.                 case CCHR | RNGE:
  333.                         c = *ep++;
  334.                         getrnge(ep);
  335.                         while(low--)
  336.                                 if(*lp++ != c)
  337.                                         return(0);
  338.                         curlp = lp;
  339.                         while(size--)
  340.                                 if(*lp++ != c)
  341.                                         break;
  342.                         if(size < 0)
  343.                                 lp++;
  344.                         ep += 2;
  345.                         goto star;
  346.  
  347.                 case CDOT | RNGE:
  348.                         getrnge(ep);
  349.                         while(low--)
  350.                                 if(*lp++ == '\0')
  351.                                         return(0);
  352.                         curlp = lp;
  353.                         while(size--)
  354.                                 if(*lp++ == '\0')
  355.                                         break;
  356.                         if(size < 0)
  357.                                 lp++;
  358.                         ep += 2;
  359.                         goto star;
  360.  
  361.                 case CCL | RNGE:
  362.                         getrnge(ep + 16);
  363.                         while(low--) {
  364.                                 c = *lp++ & 0177;
  365.                                 if(!ISTHERE(c))
  366.                                         return(0);
  367.                         }
  368.                         curlp = lp;
  369.                         while(size--) {
  370.                                 c = *lp++ & 0177;
  371.                                 if(!ISTHERE(c))
  372.                                         break;
  373.                         }
  374.                         if(size < 0)
  375.                                 lp++;
  376.                         ep += 18;               /* 16 + 2 */
  377.                         goto star;
  378.  
  379.                 case CBACK:
  380.                         bbeg = braslist[*ep];
  381.                         ct = braelist[*ep++] - bbeg;
  382.  
  383.                         if(ecmp(bbeg, lp, ct)) {
  384.                                 lp += ct;
  385.                                 continue;
  386.                         }
  387.                         return(0);
  388.  
  389.                 case CBACK | STAR:
  390.                         bbeg = braslist[*ep];
  391.                         ct = braelist[*ep++] - bbeg;
  392.                         curlp = lp;
  393.                         while(ecmp(bbeg, lp, ct))
  394.                                 lp += ct;
  395.  
  396.                         while(lp >= curlp) {
  397.                                 if(advance(lp, ep))     return(1);
  398.                                 lp -= ct;
  399.                         }
  400.                         return(0);
  401.  
  402.  
  403.                 case CDOT | STAR:
  404.                         curlp = lp;
  405.                         while(*lp++);
  406.                         goto star;
  407.  
  408.                 case CCHR | STAR:
  409.                         curlp = lp;
  410.                         while(*lp++ == *ep);
  411.                         ep++;
  412.                         goto star;
  413.  
  414.                 case CCL | STAR:
  415.                         curlp = lp;
  416.                         do {
  417.                                 c = *lp++ & 0177;
  418.                         } while(ISTHERE(c));
  419.                         ep += 16;
  420.                         goto star;
  421.  
  422.                 star:
  423.                         do {
  424.                                 if(--lp == locs)
  425.                                         break;
  426.                                 if(advance(lp, ep))
  427.                                         return(1);
  428.                         } while(lp > curlp);
  429.                         return(0);
  430.  
  431.                 }
  432.         }
  433. }
  434.  
  435. static int
  436. getrnge(str)
  437. char *str;
  438. {
  439.         low = *str++ & 0377;
  440.         size = (*str == 255)? 20000: (*str &0377) - low;
  441. }
  442.  
  443. static int
  444. ecmp(a, b, count)
  445. char   *a, *b;
  446. int     count;
  447. {
  448.         while(count--)
  449.                 if(*a++ != *b++)
  450.                         return(0);
  451.         return(1);
  452. }
  453.