home *** CD-ROM | disk | FTP | other *** search
- /******************************************************************************
- * Module : Regular Expression Matching.
- *
- * Author : John W. M. Stevens.
- ******************************************************************************/
-
- #include "compiler.h"
-
- #include "sets.h"
- #include "regexp.h"
- #include "utils.h"
-
- /* Define sub expression list. */
- typedef struct {
- char *Start; /* Start of sub-string in source. */
- char *End; /* End of sub-string in source. */
- char *SubStr; /* Pointer to sub-string. */
- } SUB_EXPR;
- static SUB_EXPR SubExprs[MAX_SUB_EXPRS + 1];
-
- static CASE_CMP CmpCase = CASE_SENSITIVE;
-
- /*-----------------------------------------------------------------------------
- | Routine : ExtSubStrs() --- Extract the sub-strings from the string to
- | match against.
- -----------------------------------------------------------------------------*/
-
- static
- void ExtSubStrs(void)
- {
- register int i;
- register int StrLen;
- auto char *Str;
- extern FILE *ErrFile;
-
- /* Extract all sub strings from the source string. */
- for (i = 0; i <= MAX_SUB_EXPRS; i++)
- {
- /* Report run time error. */
- if (SubExprs[i].Start == NULL || SubExprs[i].End == NULL)
- continue;
- else if (SubExprs[i].Start > SubExprs[i].End)
- {
- fprintf(ErrFile,
- "%s %d : Error - sub expression #%d extraction failed.\n",
- __FILE__,
- __LINE__,
- i);
- exit( 1 );
- }
-
- /* Determine length of string. */
- for (StrLen = 0, Str = SubExprs[i].Start;
- Str != SubExprs[i].End;
- StrLen++, Str++
- )
- ;
-
- /* Allocate space for the string. */
- if ((Str = (char *) calloc(1, StrLen + 1)) == NULL)
- {
- fprintf(ErrFile,
- "%s %d : Out of memory.\n",
- __FILE__,
- __LINE__);
- exit( 1 );
- }
-
- /* Copy the string to the new space. */
- MemCopy(Str, SubExprs[i].Start, StrLen);
- Str[StrLen] = '\0';
-
- /* Add string to sub expression string buffer. */
- SubExprs[i].SubStr = Str;
- }
- }
-
- /*-----------------------------------------------------------------------------
- | Routine : ReStrCmp() --- Regular expression string compare.
- |
- | Inputs : Str - Pointer to string to attempt to match.
- | ReStr - Pointer to regular expression string.
- |
- | Returns : Returns zero for no match, non-zero for a match.
- -----------------------------------------------------------------------------*/
-
- static
- int ReStrCmp(char *Str,
- char *ReStr)
- {
- /* While not end of regular expression string, compare characters. */
- while ( *ReStr )
- {
- /* If the characters do not match, return no match. */
- if (CmpCase == IGN_CASE)
- {
- /* Compare characters regardless of case. */
- if (tolower( *ReStr ) != tolower( *Str ))
- return( 0 );
-
- /* They match. Increment pointers to next characters. */
- ReStr++;
- Str++;
- }
- else if (*ReStr++ != *Str++)
- return( 0 );
- }
- return( 1 );
- }
-
- /*-----------------------------------------------------------------------------
- | Routine : ReData() --- Evaluate a regular expression data node.
- |
- | Inputs : SrcStart - Start of source string (for left anchor
- | comparisons).
- | Str - Pointer to string to attempt to match.
- | ReExpr - Pointer to regular expression node.
- | Outputs : Str - Pointer to string after matched portion, or
- | unchanged since input, if no match.
- | Srch - Pointer to search description structure.
- |
- | Returns : Returns 1 for a match, 0 for no match.
- -----------------------------------------------------------------------------*/
-
- static
- int ReData(char *SrcStart,
- char **Str,
- REG_EXP_NODE *ReExpr)
- {
- register int Match;
- extern FILE *ErrFile;
-
- /* Attempt match. */
- switch ( ReExpr->NodeType )
- {
- case DATA_STRING:
- /* Compare string. */
- if ((Match = ReStrCmp(*Str, ReExpr->data.MatchStr)) != 0)
- {
- /* Bump the source string pointer. */
- (*Str) += strlen( ReExpr->data.MatchStr );
- }
- break;
- case DATA_SET:
- /* Is the current character a member of the set? (Don't
- * forget that negated sets are already negated at this
- * point.)
- */
- if ((Match = InSet(ReExpr->data.CSet, **Str)) != 0)
- {
- /* Bump source string pointer. */
- (*Str)++;
- }
- break;
- case DATA_ANY:
- /* Match any character except end of line. */
- if ((Match = **Str && **Str != '\n') != 0)
- {
- /* Bump source string pointer. */
- (*Str)++;
- }
- break;
- case DATA_LEFT_ANCHOR:
- /* Is this the start of the line? */
- Match = *Str == SrcStart;
- break;
- case DATA_RIGHT_ANCHOR:
- /* Is the current character an end of line character? */
- Match = **Str == '\n' || **Str == '\0';
- break;
- default:
- fprintf(ErrFile,
- "%s %d : Error - illegal regular expression node type.\n",
- __FILE__,
- __LINE__);
- exit( 1 );
- }
-
- /* Return match state. */
- return( Match );
- }
-
- /*-----------------------------------------------------------------------------
- | Routine : ReOp() --- Execute a regular expression operator.
- |
- | Inputs : SrcStart - Start of source string (for left anchor
- | comparisons).
- | Str - Pointer to string to attempt to match.
- | ReExpr - Pointer to regular expression node.
- | Outputs : Str - Pointer to string after matched portion, or
- | unchanged since input, if no match.
- | Cont - Current regular expression node pointer.
- |
- | Returns : Returns 1 for a match, 0 for no match.
- -----------------------------------------------------------------------------*/
-
- static
- int ReOp(char *SrcStart,
- char **Str,
- REG_EXP_NODE *ReExpr,
- REG_EXP_NODE **Cont)
- {
- register int i;
- register int Match;
- auto char *ChkPt;
- auto REG_EXP_NODE *Tmp;
- auto REG_EXP_NODE *Node;
- extern FILE *ErrFile;
-
- /* Do operations. */
- do
- {
- /* Attempt match. */
- switch ( ReExpr->NodeType )
- {
- case OP_OR:
- /* Save check point for backtracking. */
- ChkPt = *Str;
-
- /* Attempt left branch first. */
- if ((Match = ReOp(SrcStart,
- Str,
- ReExpr->Left,
- &Node)) == 0)
- {
- /* Attempt to match the right hand branch. */
- *Str = ChkPt;
- Match = ReOp(SrcStart, Str, ReExpr->Right, &Node);
- }
-
- /* Get end of OR. */
- ReExpr = Node;
- break;
- case DATA_SPAN:
- /* Scan for at least the minimum number of characters. */
- for (Match = 1, i = 0; i < ReExpr->MinSpan; i++)
- if (**Str == '\0' || **Str == '\n')
- {
- Match = 0;
- break;
- }
- if (! Match)
- break;
-
- /* Skip over all control node types to see if there is
- * a terminating expression for this span.
- */
- for (Tmp = ReExpr->Right;
- Tmp &&
- (Tmp->NodeType == OP_L_PAREN ||
- Tmp->NodeType == OP_R_PAREN ||
- Tmp->NodeType == END_OR);
- Tmp = Tmp->Right)
- ;
-
- /* If there is no span terminating expression, then we
- * automatically match to end of line.
- */
- if (Tmp == NULL)
- {
- /* Scan to end of input string. */
- for ( ; **Str; ++*Str)
- ;
-
- /* This is a definite match. */
- Match = 1;
- break;
- }
-
- /* We matched at least the minimum number. Now attempt the
- * maximum number.
- */
- for (ChkPt = *Str;
- *ChkPt && i < ReExpr->MaxSpan;
- ChkPt++, i++)
- {
- /* Search forward until a match is found, or the
- * max number of any character has been spanned.
- */
- *Str = ChkPt;
- if ((Match = ReOp(SrcStart,
- Str,
- ReExpr->Right,
- &Node)) != 0)
- {
- break;
- }
- }
-
- /* Check for a match. */
- if (i >= ReExpr->MaxSpan)
- Match = 0;
- ReExpr = Node;
- break;
- case OP_ENUM:
- /* Match the correct number of sub-expressions. */
- for (i = 0; i < ReExpr->MaxSpan; i++)
- {
- /* Attempt data match. */
- if ((Match = ReData(SrcStart,
- Str,
- ReExpr->Left)) == 0)
- {
- /* Have we at least the minimum? */
- if (i >= ReExpr->MinSpan)
- Match = 1;
- break;
- }
- }
- break;
- case OP_L_PAREN:
- /* Save start of sub-expression. */
- SubExprs[ReExpr->SubExprNo].Start = *Str;
- Match = 1;
- break;
- case OP_R_PAREN:
- /* Save start of sub-expression. */
- SubExprs[ReExpr->SubExprNo].End = *Str;
- Match = 1;
- break;
- default:
- Match = ReData(SrcStart, Str, ReExpr);
- break;
- }
-
- /* Check for end. */
- if (ReExpr != NULL)
- ReExpr = ReExpr->Right;
-
- } while (Match && ReExpr && ReExpr->NodeType != END_OR);
-
- /* Return match. */
- *Cont = ReExpr;
- return( Match );
- }
-
- /*-----------------------------------------------------------------------------
- | Routine : ReMatch() --- Regular expression match.
- |
- | Inputs : Str - Pointer to string to attempt to match.
- | Case - Flag for case sensitivity.
- | ReExpr - Pointer to regular expression tree root.
- | Outputs : SubStrs - Pointer to array of returned strings.
- | NoSubStrs - Size of returned string array.
- |
- | Returns : Returns zero for no match, non-zero for a match.
- -----------------------------------------------------------------------------*/
-
- int ReMatch(char *Str,
- CASE_CMP Case,
- REG_EXP_NODE *ReExpr,
- char ***SubStrs)
- {
- register int i;
- register int Match;
- auto char *tp;
- auto char *Loop;
- auto REG_EXP_NODE *Node;
-
- static char *RetStrs[MAX_SUB_EXPRS];
- extern FILE *ErrFile;
-
- /* Run list and free strings. */
- for (i = 1; i < MAX_SUB_EXPRS; i++)
- {
- /* Free previous sub strings. */
- if (RetStrs[i] != NULL)
- free( RetStrs[i] );
-
- /* Initialize the sub expression list. */
- RetStrs[i] = NULL;
- SubExprs[i].Start = SubExprs[i].End = SubExprs[i].SubStr = NULL;
- }
-
- /* If the first node is a left anchor, no shifting is necesary. */
- tp = Str;
- CmpCase = Case;
- if (ReExpr->NodeType == DATA_LEFT_ANCHOR)
- Match = ReOp(Str, &tp, ReExpr->Right, &Node);
- else
- {
- /* Search forward. */
- for (Loop = Str; ; Loop++)
- {
- /* Search for a match. */
- tp = Loop;
- if ((Match = ReOp(Str, &tp, ReExpr, &Node)) != 0)
- break;
-
- /* Check for end of loop. */
- if (*Loop == '\0' || *Loop == '\n')
- break;
- }
- }
-
- /* Create sub string array. */
- if ( Match )
- {
- /* Extract sub expression strings. */
- ExtSubStrs();
- for (i = 1; i < MAX_SUB_EXPRS; i++)
- RetStrs[i] = SubExprs[i].SubStr;
-
- /* Return sub string array. */
- *SubStrs = RetStrs;
- }
-
- /* Return match true or false. */
- return( Match );
- }
-