home *** CD-ROM | disk | FTP | other *** search
/ SGI Developer Toolbox 6.1 / SGI Developer Toolbox 6.1 - Disc 4.iso / public / ImageMagick / xtp / regular.c < prev    next >
Encoding:
C/C++ Source or Header  |  1994-08-02  |  35.8 KB  |  1,263 lines

  1. /*
  2. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  3. %                                                                             %
  4. %                                                                             %
  5. %                                                                             %
  6. %             RRRR   EEEEE   GGGG  U   U  L       AAA   RRRR                  %
  7. %             R   R  E      G      U   U  L      A   A  R   R                 %
  8. %             RRRR   EEE    G  GG  U   U  L      AAAAA  RRRR                  %
  9. %             R R    E      G   G  U   U  L      A   A  R R                   %
  10. %             R  R   EEEEE   GGGG   UUU   LLLLL  A   A  R  R                  %
  11. %                                                                             %
  12. %                                                                             %
  13. %                     Regular Expression Interpreter.                         %
  14. %                                                                             %
  15. %                                                                             %
  16. %                                                                             %
  17. %                           Software Design                                   %
  18. %                             John Cristy                                     %
  19. %                              July 1992                                      %
  20. %                                                                             %
  21. %                                                                             %
  22. %  Copyright 1993 E. I. Dupont de Nemours & Company                           %
  23. %                                                                             %
  24. %  Permission to use, copy, modify, distribute, and sell this software and    %
  25. %  its documentation for any purpose is hereby granted without fee,           %
  26. %  provided that the above Copyright notice appear in all copies and that     %
  27. %  both that Copyright notice and this permission notice appear in            %
  28. %  supporting documentation, and that the name of E. I. Dupont de Nemours     %
  29. %  & Company not be used in advertising or publicity pertaining to            %
  30. %  distribution of the software without specific, written prior               %
  31. %  permission.  E. I. Dupont de Nemours & Company makes no representations    %
  32. %  about the suitability of this software for any purpose.  It is provided    %
  33. %  "as is" without express or implied warranty.                               %
  34. %                                                                             %
  35. %  E. I. Dupont de Nemours & Company disclaims all warranties with regard     %
  36. %  to this software, including all implied warranties of merchantability      %
  37. %  and fitness, in no event shall E. I. Dupont de Nemours & Company be        %
  38. %  liable for any special, indirect or consequential damages or any           %
  39. %  damages whatsoever resulting from loss of use, data or profits, whether    %
  40. %  in an action of contract, negligence or other tortious action, arising     %
  41. %  out of or in connection with the use or performance of this software.      %
  42. %                                                                             %
  43. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  44. %
  45. %  CompileRegularExpression returns NULL for a failure, where failures are
  46. %  syntax errors, exceeding implementation limits, or applying `+' or `*'
  47. %  to a possibly-null operand.
  48. %
  49. %  This is essentially the same routine written and copywrited by Henry
  50. %  Spencer, University of Toronto.  I made minor programming changes but
  51. %  major variable name changes to improve readability.
  52. %
  53. %  A regular expression is zero or more branches, separated by `|'. It
  54. %  matches anything that matches one of the branches.
  55. %
  56. %  A branch is zero or more pieces, concatenated. It matches a match for
  57. %  the first, followed by a match for the second, etc.
  58. %
  59. %  A piece is an atom possibly followed by `*', `+', or `?'. An atom
  60. %  followed by `*' matches a sequence of 0 or more matches of the atom. An
  61. %  atom followed by `+' matches a sequence of 1 or more matches of the
  62. %  atom. An atom followed by `?' matches a match of the atom, or the null
  63. %  pattern.
  64. %
  65. %  An atom is a regular expression in parentheses (matching a match for
  66. %  the regular expression), a range (see below), `.' (matching any single
  67. %  character), `^' (matching the null pattern at the beginning of the input
  68. %  pattern), `$' (matching the null pattern at the end of the input pattern),
  69. %  a `\' followed by a single character (matching that character), or a
  70. %  single character with no other significance (matching that character).
  71. %
  72. %  A range is a sequence of characters enclosed in `[]'. It normally
  73. %  matches any single character from the sequence. If the sequence begins
  74. %  with `^', it matches any single character not from the rest of the
  75. %  sequence. If two characters in the sequence are separated by `-', this
  76. %  is shorthand for the full list of ASCII characters between them (e.g.
  77. %  `[0-9]' matches any decimal digit). To include a literal `]' in the
  78. %  sequence, make it the first character (following a possible `^').  To
  79. %  include a literal `-', make it the first or last character.
  80. %
  81. %  If a regular expression could match two different parts of the input
  82. %  pattern, it will match the one which begins earliest. If both begin in
  83. %  the same place but match different lengths, or match the same length
  84. %  in different ways, life gets messier, as follows.
  85. %
  86. %  In general, the possibilities in a list of branches are considered in
  87. %  left-to-right order, the possibilities for `*', `+', and `?' are
  88. %  considered longest-first, nested constructs are considered from the
  89. %  outermost in, and concatenated constructs are considered
  90. %  leftmost-first. The match that will be chosen is the one that uses the
  91. %  earliest possibility in the first choice that has to be made. If there
  92. %  is more than one choice, the next will be made in the same manner
  93. %  (earliest possibility) subject to the decision on the first choice.
  94. %  And so forth.
  95. %
  96. %  For example, `(ab|a)b*c' could match `abc' in one of two ways. The
  97. %  first choice is between `ab' and `a'; since `ab' is earlier, and does
  98. %  lead to a successful overall match, it is chosen. Since the `b' is
  99. %  already spoken for, the `b*' must match its last possibility-the empty
  100. %  pattern-since it must respect the earlier choice.
  101. %
  102. %  In the particular case where no `|'s are present and there is only one
  103. %  `*', `+', or `?', the net effect is that the longest possible match
  104. %  will be chosen. So `ab*', presented with `xabbbby', will match `abbbb'.
  105. %  Note that if `ab*' is tried against `xabyabbbz', it will match `ab'
  106. %  just after `x', due to the begins-earliest rule. (In effect, the deci-
  107. %  sion on where to start the match is the first choice to be made, hence
  108. %  subsequent choices must respect it even if this leads them to
  109. %  less-preferred alternatives.)
  110. %
  111. %
  112. */
  113.  
  114. #include "xtp.h"
  115. #include "regular.h"
  116.  
  117. /*
  118.   Variable declarations.
  119. */
  120. char
  121.   *code,
  122.   **subpattern_end,
  123.   *p,
  124.   start_code,
  125.   *start_pattern,
  126.   **subpattern;
  127.  
  128. static char
  129.   *token;
  130.  
  131. static int
  132.   number_parenthesis;
  133.  
  134. static long
  135.   code_size;
  136.  
  137. /*
  138.   Forward declarations.
  139. */
  140. static char
  141.   *NextToken _Declare((register char *)),
  142.   *Node _Declare((int)),
  143.   *Piece _Declare((int *)),
  144.   *Regular _Declare((int,int *));
  145.  
  146. static int
  147.   Repeat _Declare((char *));
  148.  
  149. static void
  150.   EmitCode _Declare((int)),
  151.   Tail _Declare((char *,char *));
  152.  
  153. /*
  154. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  155. %                                                                             %
  156. %                                                                             %
  157. %                                                                             %
  158. %   A t o m                                                                   %
  159. %                                                                             %
  160. %                                                                             %
  161. %                                                                             %
  162. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  163. %
  164. %
  165. */
  166. static char *Atom(flagp)
  167. int
  168.   *flagp;
  169. {
  170.   int
  171.     flags;
  172.  
  173.   register char
  174.     *status;
  175.  
  176.   *flagp=WorstCase;
  177.   switch(*token++)
  178.   {
  179.     case '^':
  180.     {
  181.       status=Node(MatchBeginningOfLine);
  182.       break;
  183.     }
  184.     case '$':
  185.     {
  186.       status=Node(MatchEndOfProgramOfLine);
  187.       break;
  188.     }
  189.     case '.':
  190.     {
  191.       status=Node(MatchAnyCharacter);
  192.       *flagp|=NonNull | Simple;
  193.       break;
  194.     }
  195.     case '[':
  196.     {
  197.       register int
  198.         class,
  199.         class_end;
  200.  
  201.       if (*token != '^')
  202.         status=Node(MatchAnyCharacterOf);
  203.       else
  204.         {
  205.           /*
  206.             Complement of range.
  207.           */
  208.           status=Node(MatchAnyCharacterBut);
  209.           token++;
  210.         }
  211.       if ((*token == ']') || (*token == '-'))
  212.         EmitCode(*token++);
  213.       while ((*token != '\0') && (*token != ']'))
  214.       {
  215.         if (*token != '-')
  216.           EmitCode(*token++);
  217.          else
  218.           {
  219.             token++;
  220.             if ((*token == ']') || (*token == '\0'))
  221.               EmitCode('-');
  222.             else
  223.               {
  224.                 class=((int)*(unsigned char *)(token-2))+1;
  225.                 class_end=((int)*(unsigned char *)(token));
  226.                 if (class > class_end+1)
  227.                   Fail("invalid [] range");
  228.                 for(; class <= class_end; class++)
  229.                   EmitCode((char) class);
  230.                 token++;
  231.               }
  232.           }
  233.       }
  234.       EmitCode('\0');
  235.       if (*token != ']')
  236.         Fail("unmatched []");
  237.       token++;
  238.       *flagp|=NonNull | Simple;
  239.       break;
  240.     }
  241.     case '(':
  242.     {
  243.       status=Regular(1,&flags);
  244.       if (status == NULL)
  245.         return(NULL);
  246.       *flagp|=flags & (NonNull | SpecialStart);
  247.       break;
  248.     }
  249.     case '\0':
  250.     case '|':
  251.     case ')':
  252.     {
  253.       Fail("internal urp");
  254.       break;
  255.     }
  256.     case '?':
  257.     case '+':
  258.     case '*':
  259.     {
  260.       Fail("?+* follows nothing");
  261.       break;
  262.     }
  263.     case '\\':
  264.     {
  265.       if (*token == '\0')
  266.         Fail("trailing \\");
  267.       status=Node(MatchExactly);
  268.       EmitCode(*token++);
  269.       EmitCode('\0');
  270.       *flagp|=NonNull | Simple;
  271.       break;
  272.     }
  273.     default:
  274.     {
  275.       register char
  276.         ender;
  277.  
  278.       register int
  279.         length;
  280.  
  281.       token--;
  282.       length=strcspn(token,Meta);
  283.       if (length <= 0)
  284.         Fail("internal disaster");
  285.       ender=(*(token+length));
  286.       if (length > 1 && MultipleMatches(ender))
  287.         length--;
  288.       *flagp|=NonNull;
  289.       if (length == 1)
  290.         *flagp|=Simple;
  291.       status=Node(MatchExactly);
  292.       while (length > 0)
  293.       {
  294.         EmitCode(*token++);
  295.         length--;
  296.       }
  297.       EmitCode('\0');
  298.       break;
  299.     }
  300.   }
  301.   return(status);
  302. }
  303.  
  304. /*
  305. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  306. %                                                                             %
  307. %                                                                             %
  308. %                                                                             %
  309. %   B r a n c h                                                               %
  310. %                                                                             %
  311. %                                                                             %
  312. %                                                                             %
  313. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  314. %
  315. %  Function Branch Implements the | operator.
  316. %
  317. %
  318. */
  319. static char *Branch(flagp)
  320. int
  321.   *flagp;
  322. {
  323.   int
  324.     flags;
  325.  
  326.   register char
  327.     *chain,
  328.     *latest,
  329.     *status;
  330.  
  331.   *flagp=WorstCase;
  332.   status=Node(MatchThisOrNext);
  333.   chain=NULL;
  334.   while ((*token != '\0') && (*token != '|') && (*token != ')'))
  335.   {
  336.     latest=Piece(&flags);
  337.     if (latest == NULL)
  338.       return(NULL);
  339.     *flagp|=flags & NonNull;
  340.     if (chain == NULL)
  341.       *flagp|=flags & SpecialStart;
  342.     else
  343.       Tail(chain,latest);
  344.     chain=latest;
  345.   }
  346.   if (chain == NULL)
  347.    (void) Node(MatchEmptyString);
  348.   return(status);
  349. }
  350.  
  351. /*
  352. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  353. %                                                                             %
  354. %                                                                             %
  355. %                                                                             %
  356. %   E m i t C o d e                                                           %
  357. %                                                                             %
  358. %                                                                             %
  359. %                                                                             %
  360. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  361. %
  362. %
  363. */
  364. static void EmitCode(opcode)
  365. int
  366.   opcode;
  367. {
  368.   if (code != &start_code)
  369.     *code++=(char) opcode;
  370.   else
  371.     code_size++;
  372. }
  373.  
  374. /*
  375. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  376. %                                                                             %
  377. %                                                                             %
  378. %                                                                             %
  379. %   I n s e r t                                                               %
  380. %                                                                             %
  381. %                                                                             %
  382. %                                                                             %
  383. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  384. %
  385. %  Function Insert inserts an operator in front of an already-emitted operand.
  386. %
  387. */
  388. static void Insert(opcode,operand)
  389. char
  390.   opcode,
  391.   *operand;
  392. {
  393.   register char
  394.     *p,
  395.     *place,
  396.     *q;
  397.  
  398.   if (code == &start_code)
  399.     {
  400.       code_size+=3;
  401.       return;
  402.     }
  403.   p=code;
  404.   code+=3;
  405.   q=code;
  406.   while (p > operand)
  407.     *--q=(*--p);
  408.   place=operand;
  409.   *place++=opcode;
  410.   *place++='\0';
  411.   *place++='\0';
  412. }
  413.  
  414. /*
  415. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  416. %                                                                             %
  417. %                                                                             %
  418. %                                                                             %
  419. %   M a t c h                                                                 %
  420. %                                                                             %
  421. %                                                                             %
  422. %                                                                             %
  423. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  424. %
  425. %
  426. */
  427. static int Match(regular_expression)
  428. char
  429.   *regular_expression;
  430. {
  431.   char
  432.     *next_token;
  433.  
  434.   register char
  435.     *scan;
  436.  
  437.   scan=regular_expression;
  438.   while (scan != NULL)
  439.   {
  440.     next_token=NextToken(scan);
  441.     switch(OpCode(scan))
  442.     {
  443.       case MatchBeginningOfLine:
  444.       {
  445.         if (p != start_pattern)
  446.           return(0);
  447.         break;
  448.       }
  449.       case MatchEndOfProgramOfLine:
  450.       {
  451.         if (*p != '\0')
  452.          return(0);
  453.         break;
  454.       }
  455.       case MatchAnyCharacter:
  456.       {
  457.         if (*p == '\0')
  458.           return(0);
  459.         p++;
  460.         break;
  461.       }
  462.       case MatchExactly:
  463.       {
  464.         register char
  465.           *operand;
  466.  
  467.         register int
  468.           length;
  469.  
  470.         operand=Operand(scan);
  471.         /*
  472.           Inline the first character for speed.
  473.         */
  474.         if (*operand != *p)
  475.           return(0);
  476.         length=strlen(operand);
  477.         if ((length > 1) && (strncmp(operand,p,length) != 0))
  478.           return(0);
  479.         p+=length;
  480.         break;
  481.       }
  482.       case MatchAnyCharacterOf:
  483.       {
  484.         if ((*p == '\0' || strchr(Operand(scan),*p) == NULL))
  485.           return(0);
  486.         p++;
  487.         break;
  488.       }
  489.       case MatchAnyCharacterBut:
  490.       {
  491.         if ((*p == '\0') || (strchr(Operand(scan),*p) != NULL))
  492.           return(0);
  493.         p++;
  494.         break;
  495.       }
  496.       case MatchEmptyString:
  497.         break;
  498.       case Back:
  499.         break;
  500.       case Open+1:
  501.       case Open+2:
  502.       case Open+3:
  503.       case Open+4:
  504.       case Open+5:
  505.       case Open+6:
  506.       case Open+7:
  507.       case Open+8:
  508.       case Open+9:
  509.       {
  510.         register char
  511.           *save;
  512.  
  513.         register int
  514.           no;
  515.  
  516.         no=OpCode(scan)-Open;
  517.         save=p;
  518.         if (!Match(next_token))
  519.           return(0);
  520.         else
  521.           {
  522.             /*
  523.               Don't set subpattern if some later invocation of the same
  524.               parentheses already has.
  525.             */
  526.             if (subpattern[no] == NULL)
  527.               subpattern[no]=save;
  528.             return(1);
  529.           }
  530.         break;
  531.       }
  532.       case Close+1:
  533.       case Close+2:
  534.       case Close+3:
  535.       case Close+4:
  536.       case Close+5:
  537.       case Close+6:
  538.       case Close+7:
  539.       case Close+8:
  540.       case Close+9:
  541.       {
  542.         register char
  543.           *save;
  544.  
  545.         register int
  546.           no;
  547.  
  548.         no=OpCode(scan)-Close;
  549.         save=p;
  550.         if (!Match(next_token))
  551.            return(0);
  552.         else
  553.           {
  554.             /*
  555.               Don't set subpattern_end if some later invocation of the same
  556.               parentheses already has.
  557.             */
  558.             if (subpattern_end[no] == NULL)
  559.               subpattern_end[no]=save;
  560.             return(1);
  561.           }
  562.         break;
  563.       }
  564.       case MatchThisOrNext:
  565.       {
  566.         register char
  567.           *save;
  568.  
  569.         if (OpCode(next_token) != MatchThisOrNext)
  570.           next_token=Operand(scan);
  571.         else
  572.           {
  573.             do
  574.             {
  575.               save=p;
  576.               if (Match(Operand(scan)))
  577.                 return(1);
  578.               p=save;
  579.               scan=NextToken(scan);
  580.             } while ((scan != NULL) && (OpCode(scan) == MatchThisOrNext));
  581.             return(0);
  582.           }
  583.         break;
  584.       }
  585.       case MatchZeroOrMore:
  586.       case MatchOneOrMore:
  587.       {
  588.         register char
  589.           next_tokench,
  590.           *save;
  591.  
  592.         register int
  593.           min,
  594.           no;
  595.  
  596.         /*
  597.           Lookahead to avoid useless match attempts when we know what
  598.           character comes next_token.
  599.         */
  600.         next_tokench='\0';
  601.         if (OpCode(next_token) == MatchExactly)
  602.           next_tokench=(*Operand(next_token));
  603.         min=(OpCode(scan) == MatchZeroOrMore) ? 0 : 1;
  604.         save=p;
  605.         no=Repeat(Operand(scan));
  606.         while (no >= min)
  607.         {
  608.           /*
  609.             If it could work, try it.
  610.           */
  611.           if ((next_tokench == '\0') || (*p == next_tokench))
  612.             if (Match(next_token))
  613.               return(1);
  614.           /*
  615.             Couldn't or didn't -- back up.
  616.           */
  617.           no--;
  618.           p=save+no;
  619.         }
  620.         return(0);
  621.         break;
  622.       }
  623.       case EndOfProgram:
  624.         return(1);
  625.         break;
  626.       default:
  627.         (void) fprintf(stderr,"Regular(3): %s","memory corruption");
  628.         return(0);
  629.         break;
  630.     }
  631.     scan=next_token;
  632.   }
  633.   (void) fprintf(stderr,"Regular(3): %s","corrupted pointers");
  634.   return(0);
  635. }
  636.  
  637. /*
  638. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  639. %                                                                             %
  640. %                                                                             %
  641. %                                                                             %
  642. %   N e x t T o k e n                                                         %
  643. %                                                                             %
  644. %                                                                             %
  645. %                                                                             %
  646. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  647. %
  648. %
  649. */
  650. static char *NextToken(p)
  651. register char
  652.   *p;
  653. {
  654.   register int
  655.     offset;
  656.  
  657.   if (p == &start_code)
  658.     return(NULL);
  659.   offset=Next(p);
  660.   if (offset == 0)
  661.     return(NULL);
  662.   if (OpCode(p) == Back)
  663.     return(p-offset);
  664.   else
  665.     return(p+offset);
  666. }
  667.  
  668. /*
  669. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  670. %                                                                             %
  671. %                                                                             %
  672. %                                                                             %
  673. %   N o d e                                                                   %
  674. %                                                                             %
  675. %                                                                             %
  676. %                                                                             %
  677. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  678. %
  679. %
  680. */
  681. static char *Node(opcode)
  682. int
  683.   opcode;
  684. {
  685.   register char
  686.     *ptr,
  687.     *status;
  688.  
  689.   status=code;
  690.   if (status == &start_code)
  691.     {
  692.       code_size+=3;
  693.       return(status);
  694.     }
  695.   ptr=status;
  696.   *ptr++=(char) opcode;
  697.   *ptr++='\0';
  698.   *ptr++='\0';
  699.   code=ptr;
  700.   return(status);
  701. }
  702.  
  703. /*
  704. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  705. %                                                                             %
  706. %                                                                             %
  707. %                                                                             %
  708. %   O p T a i l                                                               %
  709. %                                                                             %
  710. %                                                                             %
  711. %                                                                             %
  712. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  713. %
  714. %
  715. */
  716. static void OpTail(p,value)
  717. char
  718.   *p;
  719.  
  720. char
  721.   *value;
  722. {
  723.   /*
  724.     "Operandless" and "op != MatchThisOrNext" are synonymous in practice.
  725.   */
  726.   if ((p == NULL) || (p == &start_code) || (OpCode(p) != MatchThisOrNext))
  727.     return;
  728.   Tail(Operand(p),value);
  729. }
  730.  
  731. /*
  732. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  733. %                                                                             %
  734. %                                                                             %
  735. %                                                                             %
  736. %   P i e c e                                                                 %
  737. %                                                                             %
  738. %                                                                             %
  739. %                                                                             %
  740. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  741. %
  742. %
  743. */
  744. static char *Piece(flagp)
  745. int
  746.   *flagp;
  747. {
  748.   int
  749.     flags;
  750.  
  751.   register char
  752.     *next_token,
  753.     op,
  754.     *status;
  755.  
  756.   status=Atom(&flags);
  757.   if (status == NULL)
  758.     return(NULL);
  759.   op=(*token);
  760.   if (!MultipleMatches(op))
  761.     {
  762.       *flagp=flags;
  763.       return(status);
  764.     }
  765.   if (!(flags & NonNull) && op != '?')
  766.     Fail("*+ operand could be empty");
  767.   *flagp=(op != '+') ? (WorstCase | SpecialStart) : (WorstCase | NonNull);
  768.   if (op == '*' && (flags & Simple))
  769.     Insert(MatchZeroOrMore,status);
  770.   else
  771.     if (op == '*')
  772.       {
  773.         /*
  774.           Emit x* as (x&|), where & means "self".
  775.         */
  776.         Insert(MatchThisOrNext,status);
  777.         OpTail(status,Node(Back));
  778.         OpTail(status,status);
  779.         Tail(status,Node(MatchThisOrNext));
  780.         Tail(status,Node(MatchEmptyString));
  781.       }
  782.     else
  783.       if ((op == '+') && (flags & Simple))
  784.         Insert(MatchOneOrMore,status);
  785.       else
  786.         if (op == '+')
  787.           {
  788.             /*
  789.               Emit x+ as x (&|), where & means "self".
  790.             */
  791.             next_token=Node(MatchThisOrNext);
  792.             Tail(status,next_token);
  793.             Tail(Node(Back),status);
  794.             Tail(next_token,Node(MatchThisOrNext));
  795.             Tail(status,Node(MatchEmptyString));
  796.           }
  797.         else
  798.           if (op == '?')
  799.             {
  800.               /*
  801.                 Emit x? as (x|)
  802.               */
  803.               Insert(MatchThisOrNext,status);
  804.               Tail(status,Node(MatchThisOrNext));
  805.               next_token=Node(MatchEmptyString);
  806.               Tail(status,next_token);
  807.               OpTail(status,next_token);
  808.             }
  809.   token++;
  810.   if (MultipleMatches(*token))
  811.     Fail("nested *?+");
  812.   return(status);
  813. }
  814.  
  815. /*
  816. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  817. %                                                                             %
  818. %                                                                             %
  819. %                                                                             %
  820. %   R e g u l a r                                                             %
  821. %                                                                             %
  822. %                                                                             %
  823. %                                                                             %
  824. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  825. %
  826. %
  827. */
  828. static char *Regular(parenthesized,flagp)
  829. int
  830.   parenthesized;
  831.  
  832. int
  833.   *flagp;
  834. {
  835.   int
  836.     flags;
  837.  
  838.   register char
  839.     *br,
  840.     *ender,
  841.     *status;
  842.  
  843.   register int
  844.     count;
  845.  
  846.   count=0;
  847.   *flagp=NonNull;
  848.   if (!parenthesized)
  849.     status=NULL;
  850.   else
  851.     {
  852.       /*
  853.         Make an Open node.
  854.       */
  855.       if (number_parenthesis >= NumberSubExpressions)
  856.         Fail("too many ()");
  857.       count=number_parenthesis;
  858.       number_parenthesis++;
  859.       status=Node(Open+count);
  860.     }
  861.   /*
  862.     Pick up the branches, linking them together.
  863.   */
  864.   br=Branch(&flags);
  865.   if (br == NULL)
  866.     return(NULL);
  867.   if (status != NULL)
  868.     Tail(status,br);
  869.   else
  870.     status=br;
  871.   if (!(flags & NonNull))
  872.     *flagp&=(~NonNull);
  873.   *flagp|=flags & SpecialStart;
  874.   while (*token == '|')
  875.   {
  876.     token++;
  877.     br=Branch(&flags);
  878.     if (br == NULL)
  879.       return(NULL);
  880.     Tail(status,br);
  881.     if (!(flags & NonNull))
  882.       *flagp &= ~NonNull;
  883.     *flagp|=flags & SpecialStart;
  884.   }
  885.   /*
  886.     Make a closing node and hook it on the end.
  887.   */
  888.   ender=Node((parenthesized) ? Close+count : EndOfProgram);
  889.   Tail(status,ender);
  890.   /*
  891.     Hook the tails of the branches to the closing node.
  892.   */
  893.   for(br=status; br != NULL; br=NextToken(br))
  894.     OpTail(br,ender);
  895.   /*
  896.     Check for proper termination.
  897.   */
  898.   if (parenthesized && (*token++ != ')'))
  899.     Fail("unmatched()")
  900.   else
  901.     if (!parenthesized && (*token != '\0'))
  902.       {
  903.         if (*token == ')')
  904.           Fail("unmatched()")
  905.         else
  906.           Fail("junk on end")
  907.        }
  908.   return(status);
  909. }
  910.  
  911. /*
  912. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  913. %                                                                             %
  914. %                                                                             %
  915. %                                                                             %
  916. %   R e p e a t                                                               %
  917. %                                                                             %
  918. %                                                                             %
  919. %                                                                             %
  920. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  921. %
  922. %
  923. */
  924. static int Repeat(p)
  925. char
  926.   *p;
  927. {
  928.   register char
  929.     *operand,
  930.     *scan;
  931.  
  932.   register int
  933.     count=0;
  934.  
  935.   scan=p;
  936.   operand=Operand(p);
  937.   switch(OpCode(p))
  938.   {
  939.     case MatchAnyCharacter:
  940.     {
  941.       count=strlen(scan);
  942.       scan+=count;
  943.       break;
  944.     }
  945.     case MatchExactly:
  946.     {
  947.       while (*operand == *scan)
  948.       {
  949.         count++;
  950.         scan++;
  951.       }
  952.       break;
  953.     }
  954.     case MatchAnyCharacterOf:
  955.     {
  956.       while ((*scan != '\0') && (strchr(operand,*scan) != NULL))
  957.       {
  958.         count++;
  959.         scan++;
  960.       }
  961.       break;
  962.     }
  963.     case MatchAnyCharacterBut:
  964.     {
  965.       while ((*scan != '\0') && (strchr(operand,*scan) == NULL))
  966.       {
  967.         count++;
  968.         scan++;
  969.       }
  970.       break;
  971.     }
  972.     default:
  973.     {
  974.       (void) fprintf(stderr,"Regular(3): %s","internal foulup");
  975.       count=0;
  976.       break;
  977.     }
  978.   }
  979.   p=scan;
  980.   return(count);
  981. }
  982.  
  983. /*
  984. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  985. %                                                                             %
  986. %                                                                             %
  987. %                                                                             %
  988. %   T a i l                                                                   %
  989. %                                                                             %
  990. %                                                                             %
  991. %                                                                             %
  992. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  993. %
  994. %
  995. */
  996. static void Tail(p,val)
  997. char
  998.   *p;
  999.  
  1000. char
  1001.   *val;
  1002. {
  1003.   register char
  1004.     *scan,
  1005.     *temp;
  1006.  
  1007.   register int
  1008.     offset;
  1009.  
  1010.   if (p == &start_code)
  1011.     return;
  1012.   /*
  1013.     Find last node.
  1014.   */
  1015.   scan=p;
  1016.   for(;;)
  1017.   {
  1018.     temp=NextToken(scan);
  1019.     if (temp == NULL)
  1020.       break;
  1021.     scan=temp;
  1022.   }
  1023.   if (OpCode(scan) == Back)
  1024.     offset=scan-val;
  1025.   else
  1026.     offset=val-scan;
  1027.   *(scan+1)=(offset >> 8) & 0377;
  1028.   *(scan+2)=offset & 0377;
  1029. }
  1030.  
  1031. /*
  1032. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1033. %                                                                             %
  1034. %                                                                             %
  1035. %                                                                             %
  1036. %   T r y                                                                     %
  1037. %                                                                             %
  1038. %                                                                             %
  1039. %                                                                             %
  1040. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1041. %
  1042. %
  1043. */
  1044. static int Try(regular_expression,pattern)
  1045. RegularExpression
  1046.   *regular_expression;
  1047.  
  1048. char
  1049.   *pattern;
  1050. {
  1051.   register char
  1052.     **ep,
  1053.     **sp;
  1054.  
  1055.   register int
  1056.     i;
  1057.  
  1058.   p=pattern;
  1059.   subpattern=regular_expression->subpattern;
  1060.   subpattern_end=regular_expression->subpattern_end;
  1061.   sp=regular_expression->subpattern;
  1062.   ep=regular_expression->subpattern_end;
  1063.   for(i=NumberSubExpressions; i > 0; i--)
  1064.   {
  1065.     *sp++=NULL;
  1066.     *ep++=NULL;
  1067.   }
  1068.   if (!Match(regular_expression->program+1))
  1069.     return(0);
  1070.   else
  1071.     {
  1072.       regular_expression->subpattern[0]=pattern;
  1073.       regular_expression->subpattern_end[0]=p;
  1074.       return(1);
  1075.     }
  1076. }
  1077.  
  1078. /*
  1079. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1080. %                                                                             %
  1081. %                                                                             %
  1082. %                                                                             %
  1083. %   C o m p i l e R e g u l a r E x p r e s s i o n                           %
  1084. %                                                                             %
  1085. %                                                                             %
  1086. %                                                                             %
  1087. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1088. %
  1089. %  Function CompileRegularExpression compiles a regular expression into a
  1090. %  structure of type RegularExpression and returns a pointer to it.  The space
  1091. %  is allocated using function malloc and may be released by function free.
  1092. %
  1093. %
  1094. */
  1095. RegularExpression *CompileRegularExpression(regular_expression)
  1096. char
  1097.   *regular_expression;
  1098. {
  1099.   int
  1100.     flags;
  1101.  
  1102.   register char
  1103.     *longest,
  1104.     *scan;
  1105.  
  1106.   register RegularExpression
  1107.     *r;
  1108.  
  1109.   register int
  1110.     length;
  1111.  
  1112.   if (regular_expression == NULL)
  1113.     Fail("NULL argument");
  1114.   /*
  1115.     First pass: determine size.
  1116.   */
  1117.   token=regular_expression;
  1118.   number_parenthesis=1;
  1119.   code_size=0L;
  1120.   code=(&start_code);
  1121.   EmitCode(Magick);
  1122.   if (Regular(0,&flags) == NULL)
  1123.     return(NULL);
  1124.   /*
  1125.     Allocate space.
  1126.   */
  1127.   r=(RegularExpression *) malloc((code_size+sizeof(RegularExpression)));
  1128.   if (r == (RegularExpression *) NULL)
  1129.     Fail("out of space");
  1130.   /*
  1131.     Second pass: emit code.
  1132.   */
  1133.   token=regular_expression;
  1134.   number_parenthesis=1;
  1135.   code=r->program;
  1136.   EmitCode(Magick);
  1137.   if (Regular(0,&flags) == NULL)
  1138.     return(NULL);
  1139.   /*
  1140.     Dig out information for optimizations.
  1141.   */
  1142.   r->start_character='\0';
  1143.   r->anchor=0;
  1144.   r->priority_pattern=NULL;
  1145.   r->pattern_length=0;
  1146.   scan=r->program+1;
  1147.   if (OpCode(NextToken(scan)) == EndOfProgram)
  1148.     {
  1149.       scan=Operand(scan);
  1150.       if (OpCode(scan) == MatchExactly)
  1151.         r->start_character=(*Operand(scan));
  1152.       else
  1153.         if (OpCode(scan) == MatchBeginningOfLine)
  1154.           r->anchor++;
  1155.       /*
  1156.         If there's something expensive in the regular expression, find the
  1157.         longest literal pattern that must appear and make it the
  1158.         priority_pattern.
  1159.       */
  1160.       if (flags & SpecialStart)
  1161.         {
  1162.           longest=NULL;
  1163.           length=0;
  1164.           for(; scan != NULL; scan=NextToken(scan))
  1165.             if ((OpCode(scan) == MatchExactly) &&
  1166.                 ((int) strlen(Operand(scan)) >= length))
  1167.               {
  1168.                 longest=Operand(scan);
  1169.                 length=strlen(Operand(scan));
  1170.               }
  1171.           r->priority_pattern=longest;
  1172.           r->pattern_length=length;
  1173.         }
  1174.     }
  1175.   return(r);
  1176. }
  1177.  
  1178. /*
  1179. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1180. %                                                                             %
  1181. %                                                                             %
  1182. %                                                                             %
  1183. %   E x e c u t e R e g u l a r E x p r e s s i o n                           %
  1184. %                                                                             %
  1185. %                                                                             %
  1186. %                                                                             %
  1187. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1188. %
  1189. %  Function ExecuteRegularExpression matches a NULL-terminated pattern against
  1190. %  the compiled regular expression in regular-expression.  It returns 1 for
  1191. %  success and 0 for failure.
  1192. %
  1193. %
  1194. */
  1195. int ExecuteRegularExpression(regular_expression,pattern)
  1196. register RegularExpression
  1197.   *regular_expression;
  1198.  
  1199. register char
  1200.   *pattern;
  1201. {
  1202.   register char
  1203.     *s;
  1204.  
  1205.   if ((regular_expression == (RegularExpression *) NULL) ||
  1206.       (pattern == (char *) NULL))
  1207.     {
  1208.       (void) fprintf(stderr,"Regular(3): %s","NULL parameter\n");
  1209.       return(0);
  1210.     }
  1211.   /*
  1212.     Check validity of program.
  1213.   */
  1214.   if (((int)*(unsigned char *)(regular_expression->program)) != Magick)
  1215.     {
  1216.       (void) fprintf(stderr,"Regular(3): %s","corrupted program");
  1217.       return(0);
  1218.     }
  1219.   /*
  1220.     If there is a "must appear" pattern, look for it.
  1221.   */
  1222.   if (regular_expression->priority_pattern != NULL)
  1223.     {
  1224.       s=pattern;
  1225.       while ((s=strchr(s,regular_expression->priority_pattern[0])) != NULL)
  1226.       {
  1227.         if (strncmp(s,regular_expression->priority_pattern,
  1228.             regular_expression->pattern_length) == 0)
  1229.           break;
  1230.         s++;
  1231.        }
  1232.        if (s == NULL)
  1233.          return(0);
  1234.     }
  1235.   /*
  1236.     Mark beginning of line for ^.
  1237.   */
  1238.   start_pattern=pattern;
  1239.   /*
  1240.     Simplest case:  anchored match need be tried only once.
  1241.   */
  1242.   if (regular_expression->anchor)
  1243.     return(Try(regular_expression,pattern));
  1244.   /*
  1245.     Messy cases:  unanchored match.
  1246.   */
  1247.   s=pattern;
  1248.   if (regular_expression->start_character != '\0')
  1249.     while ((s=strchr(s,regular_expression->start_character)) != NULL)
  1250.     {
  1251.       if (Try(regular_expression,s))
  1252.         return(1);
  1253.       s++;
  1254.     }
  1255.   else
  1256.     do
  1257.     {
  1258.       if (Try(regular_expression,s))
  1259.         return(1);
  1260.     } while (*s++ != '\0');
  1261.   return(0);
  1262. }
  1263.