home *** CD-ROM | disk | FTP | other *** search
/ Dream 52 / Amiga_Dream_52.iso / Linux / Divers / ImageMagick-4.0.6.tar.gz / ImageMagick-4.0.6.tar / ImageMagick-4.0.6 / xtp / regular.c < prev    next >
C/C++ Source or Header  |  1998-01-30  |  37KB  |  1,228 lines

  1. /*
  2. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  3. %                                                                             %
  4. %                                                                             %
  5. %                                                                             %
  6. %             RRRR   EEEEE   GGGG  U   U  L       AAA   RRRR                  %
  7. %             R   R  E      G      U   U  L      A   A  R   R                 %
  8. %             RRRR   EEE    G  GG  U   U  L      AAAAA  RRRR                  %
  9. %             R R    E      G   G  U   U  L      A   A  R R                   %
  10. %             R  R   EEEEE   GGGG   UUU   LLLLL  A   A  R  R                  %
  11. %                                                                             %
  12. %                                                                             %
  13. %                     Regular Expression Interpreter.                         %
  14. %                                                                             %
  15. %                                                                             %
  16. %                                                                             %
  17. %                           Software Design                                   %
  18. %                             John Cristy                                     %
  19. %                              July 1992                                      %
  20. %                                                                             %
  21. %                                                                             %
  22. %  Copyright 1998 E. I. du Pont de Nemours and Company                        %
  23. %                                                                             %
  24. %  Permission is hereby granted, free of charge, to any person obtaining a    %
  25. %  copy of this software and associated documentation files ("ImageMagick"),  %
  26. %  to deal in ImageMagick without restriction, including without limitation   %
  27. %  the rights to use, copy, modify, merge, publish, distribute, sublicense,   %
  28. %  and/or sell copies of ImageMagick, and to permit persons to whom the       %
  29. %  ImageMagick is furnished to do so, subject to the following conditions:    %
  30. %                                                                             %
  31. %  The above copyright notice and this permission notice shall be included in %
  32. %  all copies or substantial portions of ImageMagick.                         %
  33. %                                                                             %
  34. %  The software is provided "as is", without warranty of any kind, express or %
  35. %  implied, including but not limited to the warranties of merchantability,   %
  36. %  fitness for a particular purpose and noninfringement.  In no event shall   %
  37. %  E. I. du Pont de Nemours and Company be liable for any claim, damages or   %
  38. %  other liability, whether in an action of contract, tort or otherwise,      %
  39. %  arising from, out of or in connection with ImageMagick or the use or other %
  40. %  dealings in ImageMagick.                                                   %
  41. %                                                                             %
  42. %  Except as contained in this notice, the name of the E. I. du Pont de       %
  43. %  Nemours and Company shall not be used in advertising or otherwise to       %
  44. %  promote the sale, use or other dealings in ImageMagick without prior       %
  45. %  written authorization from the E. I. du Pont de Nemours and Company.       %
  46. %                                                                             %
  47. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  48. %
  49. %  CompileRegularExpression returns NULL for a failure, where failures are
  50. %  syntax errors, exceeding implementation limits, or applying `+' or `*'
  51. %  to a possibly-null operand.
  52. %
  53. %  This is essentially the same routine written and copywrited by Henry
  54. %  Spencer, University of Toronto.  I made minor programming changes but
  55. %  major variable name changes to improve readability.
  56. %
  57. %  A regular expression is zero or more branches, separated by `|'. It
  58. %  matches anything that matches one of the branches.
  59. %
  60. %  A branch is zero or more pieces, concatenated. It matches a match for
  61. %  the first, followed by a match for the second, etc.
  62. %
  63. %  A piece is an atom possibly followed by `*', `+', or `?'.  An atom
  64. %  followed by `*' matches a sequence of 0 or more matches of the
  65. %  atom.  An atom followed by `+' matches a sequence of 1 or more
  66. %  matches of the atom.  An atom followed by `?' matches a match of
  67. %  the atom, or the null string.
  68. %
  69. %  An atom is a regular expression in parentheses (matching a match
  70. %  for the regular expression), a range (see below), `.' (matching any
  71. %  single character), `^' (matching the null pattern at the beginning of
  72. %  the input string), `$' (matching the null pattern at the end of the
  73. %  input string), a `\' followed by a single character (matching that
  74. %  character), or a single character with no other significance (match-
  75. %  ing that character).
  76. %
  77. %  A range is a sequence of characters enclosed in `[]'. It normally
  78. %  matches any single character from the sequence. If the sequence
  79. %  begins with `^', it matches any single character not from the rest of
  80. %  the sequence.  If two characters in the sequence are separated by `-',
  81. %  this is shorthand for the full list of ASCII characters between
  82. %  them (e.g. `[0-9]' matches any decimal digit). To include a literal
  83. %  `]' in the sequence, make it the first character (following a possible
  84. %  `^').  To include a literal `-', make it the first or last character.
  85. %
  86. %  If a regular expression could match two different parts of a string,
  87. %  it will match the one which begins earliest. If both begin in the
  88. %  same place but match different lengths, or match the same length
  89. %  in different ways, life gets messier, as follows.
  90. %
  91. %  In general, the possibilities in a list of branches are considered in
  92. %  left-to-right order, the possibilities for `*', `+', and `?' are consid-
  93. %  ered longest-first, nested constructs are considered from the outer-
  94. %  most in, and concatenated constructs are considered leftmost-first.
  95. %  The match that will be chosen is the one that uses the earliest possi-
  96. %  bility in the first choice that has to be made. If there is more than
  97. %  one choice, the next will be made in the same manner (earliest pos-
  98. %  sibility) subject to the decision on the first choice.  And so forth.
  99. %
  100. %  For example, `(ab|a)b*c' could match `abc' in one of two ways.
  101. %  The first choice is between `ab' and `a'; since `ab' is earlier, and
  102. %  does lead to a successful overall match, it is chosen. Since the `b' is
  103. %  already spoken for, the `b*' must match its last possibility the empty
  104. %  string since it must respect the earlier choice.
  105. %
  106. %  In the particular case where no `|'s are present and there is only
  107. %  one `*', `+', or `?', the net effect is that the longest possible match
  108. %  will be chosen. So `ab*', presented with `xabbbby', will match
  109. %  `abbbb'.  Note that if `ab*' is tried against `xabyabbbz', it will
  110. %  match `ab' just after `x', due to the begins-earliest rule. (In effect,
  111. %  the decision on where to start the match is the first choice to be
  112. %  made, hence subsequent choices must respect it even if this leads
  113. %  them to less-preferred alternatives.)
  114. %
  115. %
  116. */
  117.  
  118. #include "xtp.h"
  119. #include "regular.h"
  120.  
  121. /*
  122.   Variable declarations.
  123. */
  124. static char
  125.   *code,
  126.   **subpattern_end,
  127.   *p,
  128.   start_code,
  129.   *start_pattern,
  130.   **subpattern,
  131.   *token;
  132.  
  133. static int
  134.   number_parenthesis;
  135.  
  136. static long
  137.   code_size;
  138.  
  139. /*
  140.   Forward declarations.
  141. */
  142. static char
  143.   *Atom(int *),
  144.   *Branch(int *),
  145.   *NextToken(register char *),
  146.   *Node(int),
  147.   *Piece(int *),
  148.   *Regular(int,int *);
  149.  
  150. static int
  151.   Match(char *),
  152.   Repeat(char *),
  153.   Try(RegularExpression *,char *);
  154.  
  155. static void
  156.   EmitCode(int),
  157.   Insert(int,char *),
  158.   OpTail(char *,char *),
  159.   Tail(char *,char *);
  160.  
  161. /*
  162. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  163. %                                                                             %
  164. %                                                                             %
  165. %                                                                             %
  166. %   A t o m                                                                   %
  167. %                                                                             %
  168. %                                                                             %
  169. %                                                                             %
  170. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  171. %
  172. %
  173. */
  174. static char *Atom(int *flagp)
  175. {
  176.   int
  177.     flags;
  178.  
  179.   register char
  180.     *status;
  181.  
  182.   *flagp=WorstCase;
  183.   switch(*token++)
  184.   {
  185.     case '^':
  186.     {
  187.       status=Node(MatchBeginningOfLine);
  188.       break;
  189.     }
  190.     case '$':
  191.     {
  192.       status=Node(MatchEndOfProgramOfLine);
  193.       break;
  194.     }
  195.     case '.':
  196.     {
  197.       status=Node(MatchAnyCharacter);
  198.       *flagp|=NonNull | Simple;
  199.       break;
  200.     }
  201.     case '[':
  202.     {
  203.       register int
  204.         class,
  205.         class_end;
  206.  
  207.       if (*token != '^')
  208.         status=Node(MatchAnyCharacterOf);
  209.       else
  210.         {
  211.           /*
  212.             Complement of range.
  213.           */
  214.           status=Node(MatchAnyCharacterBut);
  215.           token++;
  216.         }
  217.       if ((*token == ']') || (*token == '-'))
  218.         EmitCode(*token++);
  219.       while ((*token != '\0') && (*token != ']'))
  220.       {
  221.         if (*token != '-')
  222.           EmitCode(*token++);
  223.          else
  224.           {
  225.             token++;
  226.             if ((*token == ']') || (*token == '\0'))
  227.               EmitCode('-');
  228.             else
  229.               {
  230.                 class=((int)*(unsigned char *)(token-2))+1;
  231.                 class_end=((int)*(unsigned char *)(token));
  232.                 if (class > class_end+1)
  233.                   Fail("invalid [] range");
  234.                 for(; class <= class_end; class++)
  235.                   EmitCode((char) class);
  236.                 token++;
  237.               }
  238.           }
  239.       }
  240.       EmitCode('\0');
  241.       if (*token != ']')
  242.         Fail("unmatched []");
  243.       token++;
  244.       *flagp|=NonNull | Simple;
  245.       break;
  246.     }
  247.     case '(':
  248.     {
  249.       status=Regular(1,&flags);
  250.       if (status == NULL)
  251.         return(NULL);
  252.       *flagp|=flags & (NonNull | SpecialStart);
  253.       break;
  254.     }
  255.     case '\0':
  256.     case '|':
  257.     case ')':
  258.     {
  259.       Fail("internal urp");
  260.       break;
  261.     }
  262.     case '?':
  263.     case '+':
  264.     case '*':
  265.     {
  266.       Fail("?+* follows nothing");
  267.       break;
  268.     }
  269.     case '\\':
  270.     {
  271.       if (*token == '\0')
  272.         Fail("trailing \\");
  273.       status=Node(MatchExactly);
  274.       EmitCode(*token++);
  275.       EmitCode('\0');
  276.       *flagp|=NonNull | Simple;
  277.       break;
  278.     }
  279.     default:
  280.     {
  281.       register char
  282.         ender;
  283.  
  284.       register int
  285.         length;
  286.  
  287.       token--;
  288.       length=strcspn(token,Meta);
  289.       if (length <= 0)
  290.         Fail("internal disaster");
  291.       ender=(*(token+length));
  292.       if (length > 1 && MultipleMatches(ender))
  293.         length--;
  294.       *flagp|=NonNull;
  295.       if (length == 1)
  296.         *flagp|=Simple;
  297.       status=Node(MatchExactly);
  298.       while (length > 0)
  299.       {
  300.         EmitCode(*token++);
  301.         length--;
  302.       }
  303.       EmitCode('\0');
  304.       break;
  305.     }
  306.   }
  307.   return(status);
  308. }
  309.  
  310. /*
  311. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  312. %                                                                             %
  313. %                                                                             %
  314. %                                                                             %
  315. %   B r a n c h                                                               %
  316. %                                                                             %
  317. %                                                                             %
  318. %                                                                             %
  319. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  320. %
  321. %  Function Branch Implements the | operator.
  322. %
  323. %
  324. */
  325. static char *Branch(int *flagp)
  326. {
  327.   int
  328.     flags;
  329.  
  330.   register char
  331.     *chain,
  332.     *latest,
  333.     *status;
  334.  
  335.   *flagp=WorstCase;
  336.   status=Node(MatchThisOrNext);
  337.   chain=NULL;
  338.   while ((*token != '\0') && (*token != '|') && (*token != ')'))
  339.   {
  340.     latest=Piece(&flags);
  341.     if (latest == NULL)
  342.       return(NULL);
  343.     *flagp|=flags & NonNull;
  344.     if (chain == NULL)
  345.       *flagp|=flags & SpecialStart;
  346.     else
  347.       Tail(chain,latest);
  348.     chain=latest;
  349.   }
  350.   if (chain == NULL)
  351.    (void) Node(MatchEmptyString);
  352.   return(status);
  353. }
  354.  
  355. /*
  356. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  357. %                                                                             %
  358. %                                                                             %
  359. %                                                                             %
  360. %   E m i t C o d e                                                           %
  361. %                                                                             %
  362. %                                                                             %
  363. %                                                                             %
  364. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  365. %
  366. %
  367. */
  368. static void EmitCode(int opcode)
  369. {
  370.   if (code != &start_code)
  371.     *code++=(char) opcode;
  372.   else
  373.     code_size++;
  374. }
  375.  
  376. /*
  377. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  378. %                                                                             %
  379. %                                                                             %
  380. %                                                                             %
  381. %   I n s e r t                                                               %
  382. %                                                                             %
  383. %                                                                             %
  384. %                                                                             %
  385. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  386. %
  387. %  Function Insert inserts an operator in front of an already-emitted operand.
  388. %
  389. */
  390. static void Insert(int opcode,char *operand)
  391. {
  392.   register char
  393.     *p,
  394.     *place,
  395.     *q;
  396.  
  397.   if (code == &start_code)
  398.     {
  399.       code_size+=3;
  400.       return;
  401.     }
  402.   p=code;
  403.   code+=3;
  404.   q=code;
  405.   while (p > operand)
  406.     *--q=(*--p);
  407.   place=operand;
  408.   *place++=opcode;
  409.   *place++='\0';
  410.   *place++='\0';
  411. }
  412.  
  413. /*
  414. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  415. %                                                                             %
  416. %                                                                             %
  417. %                                                                             %
  418. %   M a t c h                                                                 %
  419. %                                                                             %
  420. %                                                                             %
  421. %                                                                             %
  422. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  423. %
  424. %
  425. */
  426. static int Match(char *regular_expression)
  427. {
  428.   char
  429.     *next_token;
  430.  
  431.   register char
  432.     *scan;
  433.  
  434.   scan=regular_expression;
  435.   while (scan != NULL)
  436.   {
  437.     next_token=NextToken(scan);
  438.     switch(OpCode(scan))
  439.     {
  440.       case MatchBeginningOfLine:
  441.       {
  442.         if (p != start_pattern)
  443.           return(0);
  444.         break;
  445.       }
  446.       case MatchEndOfProgramOfLine:
  447.       {
  448.         if (*p != '\0')
  449.          return(0);
  450.         break;
  451.       }
  452.       case MatchAnyCharacter:
  453.       {
  454.         if (*p == '\0')
  455.           return(0);
  456.         p++;
  457.         break;
  458.       }
  459.       case MatchExactly:
  460.       {
  461.         register char
  462.           *operand;
  463.  
  464.         register int
  465.           length;
  466.  
  467.         operand=Operand(scan);
  468.         /*
  469.           Inline the first character for speed.
  470.         */
  471.         if (*operand != *p)
  472.           return(0);
  473.         length=strlen(operand);
  474.         if ((length > 1) && (strncmp(operand,p,length) != 0))
  475.           return(0);
  476.         p+=length;
  477.         break;
  478.       }
  479.       case MatchAnyCharacterOf:
  480.       {
  481.         if ((*p == '\0' || strchr(Operand(scan),*p) == NULL))
  482.           return(0);
  483.         p++;
  484.         break;
  485.       }
  486.       case MatchAnyCharacterBut:
  487.       {
  488.         if ((*p == '\0') || (strchr(Operand(scan),*p) != NULL))
  489.           return(0);
  490.         p++;
  491.         break;
  492.       }
  493.       case MatchEmptyString:
  494.         break;
  495.       case Back:
  496.         break;
  497.       case Open+1:
  498.       case Open+2:
  499.       case Open+3:
  500.       case Open+4:
  501.       case Open+5:
  502.       case Open+6:
  503.       case Open+7:
  504.       case Open+8:
  505.       case Open+9:
  506.       {
  507.         register char
  508.           *save;
  509.  
  510.         register int
  511.           no;
  512.  
  513.         no=OpCode(scan)-Open;
  514.         save=p;
  515.         if (!Match(next_token))
  516.           return(0);
  517.         else
  518.           {
  519.             /*
  520.               Don't set subpattern if some later invocation of the same
  521.               parentheses already has.
  522.             */
  523.             if (subpattern[no] == NULL)
  524.               subpattern[no]=save;
  525.             return(1);
  526.           }
  527.         break;
  528.       }
  529.       case Close+1:
  530.       case Close+2:
  531.       case Close+3:
  532.       case Close+4:
  533.       case Close+5:
  534.       case Close+6:
  535.       case Close+7:
  536.       case Close+8:
  537.       case Close+9:
  538.       {
  539.         register char
  540.           *save;
  541.  
  542.         register int
  543.           no;
  544.  
  545.         no=OpCode(scan)-Close;
  546.         save=p;
  547.         if (!Match(next_token))
  548.            return(0);
  549.         else
  550.           {
  551.             /*
  552.               Don't set subpattern_end if some later invocation of the same
  553.               parentheses already has.
  554.             */
  555.             if (subpattern_end[no] == NULL)
  556.               subpattern_end[no]=save;
  557.             return(1);
  558.           }
  559.         break;
  560.       }
  561.       case MatchThisOrNext:
  562.       {
  563.         register char
  564.           *save;
  565.  
  566.         if (OpCode(next_token) != MatchThisOrNext)
  567.           next_token=Operand(scan);
  568.         else
  569.           {
  570.             do
  571.             {
  572.               save=p;
  573.               if (Match(Operand(scan)))
  574.                 return(1);
  575.               p=save;
  576.               scan=NextToken(scan);
  577.             } while ((scan != NULL) && (OpCode(scan) == MatchThisOrNext));
  578.             return(0);
  579.           }
  580.         break;
  581.       }
  582.       case MatchZeroOrMore:
  583.       case MatchOneOrMore:
  584.       {
  585.         register char
  586.           next_tokench,
  587.           *save;
  588.  
  589.         register int
  590.           min,
  591.           no;
  592.  
  593.         /*
  594.           Lookahead to avoid useless match attempts when we know what
  595.           character comes next_token.
  596.         */
  597.         next_tokench='\0';
  598.         if (OpCode(next_token) == MatchExactly)
  599.           next_tokench=(*Operand(next_token));
  600.         min=(OpCode(scan) == MatchZeroOrMore) ? 0 : 1;
  601.         save=p;
  602.         no=Repeat(Operand(scan));
  603.         while (no >= min)
  604.         {
  605.           /*
  606.             If it could work, try it.
  607.           */
  608.           if ((next_tokench == '\0') || (*p == next_tokench))
  609.             if (Match(next_token))
  610.               return(1);
  611.           /*
  612.             Couldn't or didn't -- back up.
  613.           */
  614.           no--;
  615.           p=save+no;
  616.         }
  617.         return(0);
  618.         break;
  619.       }
  620.       case EndOfProgram:
  621.         return(1);
  622.         break;
  623.       default:
  624.         (void) fprintf(stderr,"Regular(3): %s","memory corruption");
  625.         return(0);
  626.         break;
  627.     }
  628.     scan=next_token;
  629.   }
  630.   (void) fprintf(stderr,"Regular(3): %s","corrupted pointers");
  631.   return(0);
  632. }
  633.  
  634. /*
  635. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  636. %                                                                             %
  637. %                                                                             %
  638. %                                                                             %
  639. %   N e x t T o k e n                                                         %
  640. %                                                                             %
  641. %                                                                             %
  642. %                                                                             %
  643. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  644. %
  645. %
  646. */
  647. static char *NextToken(register char *p)
  648. {
  649.   register int
  650.     offset;
  651.  
  652.   if (p == &start_code)
  653.     return(NULL);
  654.   offset=Next(p);
  655.   if (offset == 0)
  656.     return(NULL);
  657.   if (OpCode(p) == Back)
  658.     return(p-offset);
  659.   else
  660.     return(p+offset);
  661. }
  662.  
  663. /*
  664. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  665. %                                                                             %
  666. %                                                                             %
  667. %                                                                             %
  668. %   N o d e                                                                   %
  669. %                                                                             %
  670. %                                                                             %
  671. %                                                                             %
  672. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  673. %
  674. %
  675. */
  676. static char *Node(int opcode)
  677. {
  678.   register char
  679.     *ptr,
  680.     *status;
  681.  
  682.   status=code;
  683.   if (status == &start_code)
  684.     {
  685.       code_size+=3;
  686.       return(status);
  687.     }
  688.   ptr=status;
  689.   *ptr++=(char) opcode;
  690.   *ptr++='\0';
  691.   *ptr++='\0';
  692.   code=ptr;
  693.   return(status);
  694. }
  695.  
  696. /*
  697. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  698. %                                                                             %
  699. %                                                                             %
  700. %                                                                             %
  701. %   O p T a i l                                                               %
  702. %                                                                             %
  703. %                                                                             %
  704. %                                                                             %
  705. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  706. %
  707. %
  708. */
  709. static void OpTail(char *p,char *value)
  710. {
  711.   /*
  712.     "Operandless" and "op != MatchThisOrNext" are synonymous in practice.
  713.   */
  714.   if ((p == NULL) || (p == &start_code) || (OpCode(p) != MatchThisOrNext))
  715.     return;
  716.   Tail(Operand(p),value);
  717. }
  718.  
  719. /*
  720. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  721. %                                                                             %
  722. %                                                                             %
  723. %                                                                             %
  724. %   P i e c e                                                                 %
  725. %                                                                             %
  726. %                                                                             %
  727. %                                                                             %
  728. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  729. %
  730. %
  731. */
  732. static char *Piece(int *flagp)
  733. {
  734.   int
  735.     flags;
  736.  
  737.   register char
  738.     *next_token,
  739.     op,
  740.     *status;
  741.  
  742.   status=Atom(&flags);
  743.   if (status == NULL)
  744.     return(NULL);
  745.   op=(*token);
  746.   if (!MultipleMatches(op))
  747.     {
  748.       *flagp=flags;
  749.       return(status);
  750.     }
  751.   if (!(flags & NonNull) && op != '?')
  752.     Fail("*+ operand could be empty");
  753.   *flagp=(op != '+') ? (WorstCase | SpecialStart) : (WorstCase | NonNull);
  754.   if (op == '*' && (flags & Simple))
  755.     Insert(MatchZeroOrMore,status);
  756.   else
  757.     if (op == '*')
  758.       {
  759.         /*
  760.           Emit x* as (x&|), where & means "self".
  761.         */
  762.         Insert(MatchThisOrNext,status);
  763.         OpTail(status,Node(Back));
  764.         OpTail(status,status);
  765.         Tail(status,Node(MatchThisOrNext));
  766.         Tail(status,Node(MatchEmptyString));
  767.       }
  768.     else
  769.       if ((op == '+') && (flags & Simple))
  770.         Insert(MatchOneOrMore,status);
  771.       else
  772.         if (op == '+')
  773.           {
  774.             /*
  775.               Emit x+ as x (&|), where & means "self".
  776.             */
  777.             next_token=Node(MatchThisOrNext);
  778.             Tail(status,next_token);
  779.             Tail(Node(Back),status);
  780.             Tail(next_token,Node(MatchThisOrNext));
  781.             Tail(status,Node(MatchEmptyString));
  782.           }
  783.         else
  784.           if (op == '?')
  785.             {
  786.               /*
  787.                 Emit x? as (x|)
  788.               */
  789.               Insert(MatchThisOrNext,status);
  790.               Tail(status,Node(MatchThisOrNext));
  791.               next_token=Node(MatchEmptyString);
  792.               Tail(status,next_token);
  793.               OpTail(status,next_token);
  794.             }
  795.   token++;
  796.   if (MultipleMatches(*token))
  797.     Fail("nested *?+");
  798.   return(status);
  799. }
  800.  
  801. /*
  802. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  803. %                                                                             %
  804. %                                                                             %
  805. %                                                                             %
  806. %   R e g u l a r                                                             %
  807. %                                                                             %
  808. %                                                                             %
  809. %                                                                             %
  810. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  811. %
  812. %
  813. */
  814. static char *Regular(int parenthesized,int *flagp)
  815. {
  816.   int
  817.     flags;
  818.  
  819.   register char
  820.     *br,
  821.     *ender,
  822.     *status;
  823.  
  824.   register int
  825.     count;
  826.  
  827.   count=0;
  828.   *flagp=NonNull;
  829.   if (!parenthesized)
  830.     status=NULL;
  831.   else
  832.     {
  833.       /*
  834.         Make an Open node.
  835.       */
  836.       if (number_parenthesis >= NumberSubExpressions)
  837.         Fail("too many ()");
  838.       count=number_parenthesis;
  839.       number_parenthesis++;
  840.       status=Node(Open+count);
  841.     }
  842.   /*
  843.     Pick up the branches, linking them together.
  844.   */
  845.   br=Branch(&flags);
  846.   if (br == NULL)
  847.     return(NULL);
  848.   if (status != NULL)
  849.     Tail(status,br);
  850.   else
  851.     status=br;
  852.   if (!(flags & NonNull))
  853.     *flagp&=(~NonNull);
  854.   *flagp|=flags & SpecialStart;
  855.   while (*token == '|')
  856.   {
  857.     token++;
  858.     br=Branch(&flags);
  859.     if (br == NULL)
  860.       return(NULL);
  861.     Tail(status,br);
  862.     if (!(flags & NonNull))
  863.       *flagp &= ~NonNull;
  864.     *flagp|=flags & SpecialStart;
  865.   }
  866.   /*
  867.     Make a closing node and hook it on the end.
  868.   */
  869.   ender=Node((parenthesized) ? Close+count : EndOfProgram);
  870.   Tail(status,ender);
  871.   /*
  872.     Hook the tails of the branches to the closing node.
  873.   */
  874.   for(br=status; br != NULL; br=NextToken(br))
  875.     OpTail(br,ender);
  876.   /*
  877.     Check for proper termination.
  878.   */
  879.   if (parenthesized && (*token++ != ')'))
  880.     Fail("unmatched()")
  881.   else
  882.     if (!parenthesized && (*token != '\0'))
  883.       {
  884.         if (*token == ')')
  885.           Fail("unmatched()")
  886.         else
  887.           Fail("junk on end")
  888.        }
  889.   return(status);
  890. }
  891.  
  892. /*
  893. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  894. %                                                                             %
  895. %                                                                             %
  896. %                                                                             %
  897. %   R e p e a t                                                               %
  898. %                                                                             %
  899. %                                                                             %
  900. %                                                                             %
  901. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  902. %
  903. %
  904. */
  905. static int Repeat(char *p)
  906. {
  907.   register char
  908.     *operand,
  909.     *scan;
  910.  
  911.   register int
  912.     count=0;
  913.  
  914.   scan=p;
  915.   operand=Operand(p);
  916.   switch(OpCode(p))
  917.   {
  918.     case MatchAnyCharacter:
  919.     {
  920.       count=strlen(scan);
  921.       scan+=count;
  922.       break;
  923.     }
  924.     case MatchExactly:
  925.     {
  926.       while (*operand == *scan)
  927.       {
  928.         count++;
  929.         scan++;
  930.       }
  931.       break;
  932.     }
  933.     case MatchAnyCharacterOf:
  934.     {
  935.       while ((*scan != '\0') && (strchr(operand,*scan) != NULL))
  936.       {
  937.         count++;
  938.         scan++;
  939.       }
  940.       break;
  941.     }
  942.     case MatchAnyCharacterBut:
  943.     {
  944.       while ((*scan != '\0') && (strchr(operand,*scan) == NULL))
  945.       {
  946.         count++;
  947.         scan++;
  948.       }
  949.       break;
  950.     }
  951.     default:
  952.     {
  953.       (void) fprintf(stderr,"Regular(3): %s","internal foulup");
  954.       count=0;
  955.       break;
  956.     }
  957.   }
  958.   p=scan;
  959.   return(count);
  960. }
  961.  
  962. /*
  963. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  964. %                                                                             %
  965. %                                                                             %
  966. %                                                                             %
  967. %   T a i l                                                                   %
  968. %                                                                             %
  969. %                                                                             %
  970. %                                                                             %
  971. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  972. %
  973. %
  974. */
  975. static void Tail(char *p,char *val)
  976. {
  977.   register char
  978.     *scan,
  979.     *temp;
  980.  
  981.   register int
  982.     offset;
  983.  
  984.   if (p == &start_code)
  985.     return;
  986.   /*
  987.     Find last node.
  988.   */
  989.   scan=p;
  990.   for(;;)
  991.   {
  992.     temp=NextToken(scan);
  993.     if (temp == NULL)
  994.       break;
  995.     scan=temp;
  996.   }
  997.   if (OpCode(scan) == Back)
  998.     offset=scan-val;
  999.   else
  1000.     offset=val-scan;
  1001.   *(scan+1)=(offset >> 8) & 0377;
  1002.   *(scan+2)=offset & 0377;
  1003. }
  1004.  
  1005. /*
  1006. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1007. %                                                                             %
  1008. %                                                                             %
  1009. %                                                                             %
  1010. %   T r y                                                                     %
  1011. %                                                                             %
  1012. %                                                                             %
  1013. %                                                                             %
  1014. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1015. %
  1016. %
  1017. */
  1018. static int Try(RegularExpression *regular_expression,char *pattern)
  1019. {
  1020.   register char
  1021.     **ep,
  1022.     **sp;
  1023.  
  1024.   register int
  1025.     i;
  1026.  
  1027.   p=pattern;
  1028.   subpattern=regular_expression->subpattern;
  1029.   subpattern_end=regular_expression->subpattern_end;
  1030.   sp=regular_expression->subpattern;
  1031.   ep=regular_expression->subpattern_end;
  1032.   for(i=NumberSubExpressions; i > 0; i--)
  1033.   {
  1034.     *sp++=NULL;
  1035.     *ep++=NULL;
  1036.   }
  1037.   if (!Match(regular_expression->program+1))
  1038.     return(0);
  1039.   else
  1040.     {
  1041.       regular_expression->subpattern[0]=pattern;
  1042.       regular_expression->subpattern_end[0]=p;
  1043.       regular_expression->pattern_length=p-pattern;
  1044.       return(1);
  1045.     }
  1046. }
  1047.  
  1048. /*
  1049. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1050. %                                                                             %
  1051. %                                                                             %
  1052. %                                                                             %
  1053. %   C o m p i l e R e g u l a r E x p r e s s i o n                           %
  1054. %                                                                             %
  1055. %                                                                             %
  1056. %                                                                             %
  1057. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1058. %
  1059. %  Function CompileRegularExpression compiles a regular expression into a
  1060. %  structure of type RegularExpression and returns a pointer to it.  The space
  1061. %  is allocated using function malloc and may be released by function free.
  1062. %
  1063. %
  1064. */
  1065. RegularExpression *CompileRegularExpression(char *regular_expression)
  1066. {
  1067.   int
  1068.     flags;
  1069.  
  1070.   register char
  1071.     *longest,
  1072.     *scan;
  1073.  
  1074.   register RegularExpression
  1075.     *r;
  1076.  
  1077.   register int
  1078.     length;
  1079.  
  1080.   if (regular_expression == NULL)
  1081.     Fail("NULL argument");
  1082.   /*
  1083.     First pass: determine size.
  1084.   */
  1085.   token=regular_expression;
  1086.   number_parenthesis=1;
  1087.   code_size=0L;
  1088.   code=(&start_code);
  1089.   EmitCode(Magick);
  1090.   if (Regular(0,&flags) == NULL)
  1091.     return(NULL);
  1092.   /*
  1093.     Allocate space.
  1094.   */
  1095.   r=(RegularExpression *)
  1096.     malloc((unsigned int) (code_size+sizeof(RegularExpression)));
  1097.   if (r == (RegularExpression *) NULL)
  1098.     Fail("out of space");
  1099.   /*
  1100.     Second pass: emit code.
  1101.   */
  1102.   token=regular_expression;
  1103.   number_parenthesis=1;
  1104.   code=r->program;
  1105.   EmitCode(Magick);
  1106.   if (Regular(0,&flags) == NULL)
  1107.     return(NULL);
  1108.   /*
  1109.     Dig out information for optimizations.
  1110.   */
  1111.   r->start_character='\0';
  1112.   r->anchor=0;
  1113.   r->priority_pattern=NULL;
  1114.   r->pattern_length=0;
  1115.   scan=r->program+1;
  1116.   if (OpCode(NextToken(scan)) == EndOfProgram)
  1117.     {
  1118.       scan=Operand(scan);
  1119.       if (OpCode(scan) == MatchExactly)
  1120.         r->start_character=(*Operand(scan));
  1121.       else
  1122.         if (OpCode(scan) == MatchBeginningOfLine)
  1123.           r->anchor++;
  1124.       /*
  1125.         If there's something expensive in the regular expression, find the
  1126.         longest literal pattern that must appear and make it the
  1127.         priority_pattern.
  1128.       */
  1129.       if (flags & SpecialStart)
  1130.         {
  1131.           longest=NULL;
  1132.           length=0;
  1133.           for(; scan != NULL; scan=NextToken(scan))
  1134.             if ((OpCode(scan) == MatchExactly) &&
  1135.                 ((int) strlen(Operand(scan)) >= length))
  1136.               {
  1137.                 longest=Operand(scan);
  1138.                 length=strlen(Operand(scan));
  1139.               }
  1140.           r->priority_pattern=longest;
  1141.           r->pattern_length=length;
  1142.         }
  1143.     }
  1144.   return(r);
  1145. }
  1146.  
  1147. /*
  1148. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1149. %                                                                             %
  1150. %                                                                             %
  1151. %                                                                             %
  1152. %   E x e c u t e R e g u l a r E x p r e s s i o n                           %
  1153. %                                                                             %
  1154. %                                                                             %
  1155. %                                                                             %
  1156. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  1157. %
  1158. %  Function ExecuteRegularExpression matches a NULL-terminated pattern against
  1159. %  the compiled regular expression in regular-expression.  It returns 1 for
  1160. %  success and 0 for failure.
  1161. %
  1162. %
  1163. */
  1164. int ExecuteRegularExpression(register RegularExpression *regular_expression,
  1165.   register char *pattern)
  1166. {
  1167.   register char
  1168.     *s;
  1169.  
  1170.   if ((regular_expression == (RegularExpression *) NULL) ||
  1171.       (pattern == (char *) NULL))
  1172.     {
  1173.       (void) fprintf(stderr,"Regular(3): %s","NULL parameter\n");
  1174.       return(0);
  1175.     }
  1176.   /*
  1177.     Check validity of program.
  1178.   */
  1179.   if (((int)*(unsigned char *)(regular_expression->program)) != Magick)
  1180.     {
  1181.       (void) fprintf(stderr,"Regular(3): %s","corrupted program");
  1182.       return(0);
  1183.     }
  1184.   /*
  1185.     If there is a "must appear" pattern, look for it.
  1186.   */
  1187.   if (regular_expression->priority_pattern != NULL)
  1188.     {
  1189.       s=pattern;
  1190.       while ((s=strchr(s,regular_expression->priority_pattern[0])) != NULL)
  1191.       {
  1192.         if (strncmp(s,regular_expression->priority_pattern,
  1193.             regular_expression->pattern_length) == 0)
  1194.           break;
  1195.         s++;
  1196.        }
  1197.        if (s == NULL)
  1198.          return(0);
  1199.     }
  1200.   /*
  1201.     Mark beginning of line for ^.
  1202.   */
  1203.   start_pattern=pattern;
  1204.   /*
  1205.     Simplest case:  anchored match need be tried only once.
  1206.   */
  1207.   if (regular_expression->anchor)
  1208.     return(Try(regular_expression,pattern));
  1209.   /*
  1210.     Messy cases:  unanchored match.
  1211.   */
  1212.   s=pattern;
  1213.   if (regular_expression->start_character != '\0')
  1214.     while ((s=strchr(s,regular_expression->start_character)) != NULL)
  1215.     {
  1216.       if (Try(regular_expression,s))
  1217.         return(1);
  1218.       s++;
  1219.     }
  1220.   else
  1221.     do
  1222.     {
  1223.       if (Try(regular_expression,s))
  1224.         return(1);
  1225.     } while (*s++ != '\0');
  1226.   return(0);
  1227. }
  1228.