home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The World of Computer Software
/
World_Of_Computer_Software-02-387-Vol-3of3.iso
/
h
/
hpack78s.zip
/
wildcard.c
< prev
next >
Wrap
C/C++ Source or Header
|
1992-12-03
|
9KB
|
332 lines
/****************************************************************************
* *
* HPACK Multi-System Archiver *
* =========================== *
* *
* Wildcard String Matching Routines *
* WILDCARD.C Updated 20/06/90 *
* *
* This program is protected by copyright and as such any use or copying of *
* this code for your own purposes directly or indirectly is highly uncool *
* and if you do so there will be....trubble. *
* And remember: We know where your kids go to school. *
* *
* Copyright 1989, 1990 Peter C.Gutmann. All rights reserved *
* *
****************************************************************************/
#include <ctype.h>
#include <string.h>
#include "defs.h"
#include "error.h"
#include "system.h"
#include "wildcard.h"
/* The types we can match. These are:
END - End of string
MATCHEND - Match all following chars
MATCHTO <end> - Match all chars till <end>
MATCHONE - Match any one char
RANGE <from> <to> - Match all chars in range <from>...<to>
NOTRANGE <from> <to>- Match all chars but those in range <from>...<to>
<char> - Match this char */
enum { END, MATCHEND, MATCHTO, MATCHONE, RANGE, NOTRANGE, ENDRANGE };
/* The escape character which is used to override standard wildcard
characters. Usually this is '\' but MSDOS and OS/2 need to use '#' since
'\' is used in pathnames */
#if defined( __MSDOS__ ) || defined( __OS2__ )
#define ESC '#'
#else
#define ESC '\\'
#endif /* __MSDOS__ || __OS2__ */
/* A utility function to determine whether a string segment contains any
wildcard characters */
BOOLEAN hasWildcards( char *string, int count )
{
char ch;
while( count-- )
if( ( ch = *string++ ) == '*' || ch == '?' || ch == '[' || ch == ESC )
return( TRUE );
/* No wildcards found in string */
return( FALSE );
}
BOOLEAN strHasWildcards( char *string )
{
return( hasWildcards( string, strlen( string ) ) );
}
/****************************************************************************
* *
* String "Compiling" Function *
* *
****************************************************************************/
/* Compile a string into a form acceptable by the wildcard-matching FSM.
The form is pretty simple. The only part which may require a bit of
explanation is the mechanism for matching ranges. This is given by the
RE { "^" | "!" }? { letter { "-" letter }? }+ */
int compileString( const char *src, char *dest )
{
int srcIndex = 0, destIndex = 0;
BOOLEAN inRange;
char ch;
while( ( ch = src[ srcIndex++ ] ) && destIndex < MATCH_DEST_LEN )
switch( ch )
{
case '*':
#ifdef AIX
/* Fix a bug in the RS6000 cc optimizer where it gets a bit
enthusiastic with reusing a CR which was set for the test
of src[ srcIndex++ ] and doesn't notice that srcIndex has
changed. The following forces a reevaluation of
src[ srcIndex ] */
srcIndex--;
srcIndex++;
#endif /* AIX */
if( src[ srcIndex ] )
dest[ destIndex++ ] = MATCHTO;
else
dest[ destIndex++ ] = MATCHEND;
/* Stomp repeated '*'s */
while( src[ srcIndex ] == '*' )
srcIndex++;
break;
case '?':
dest[ destIndex++ ] = MATCHONE;
break;
case '[':
if( src[ srcIndex ] == '^' || src[ srcIndex ] == '!' )
{
dest[ destIndex++ ] = NOTRANGE;
srcIndex++;
}
else
dest[ destIndex++ ] = RANGE;
if( src[ srcIndex ] == ']' || src[ srcIndex ] == '-' )
#ifdef GUI
return( ERROR );
#else
error( BAD_WILDCARD_FORMAT, src );
#endif /* GUI */
inRange = FALSE;
while( ( ( ch = src[ srcIndex++ ] ) != ']' ) && \
( destIndex < MATCH_DEST_LEN - 1 ) )
/* destIndex < MATCH_DEST_LEN - 1 allows room for
the ENDRANGE and END tokens */
switch( ch )
{
case END:
#ifdef GUI
return( ERROR );
#else
error( BAD_WILDCARD_FORMAT, src );
break;
#endif /* GUI */
case '-':
if( inRange )
#ifdef GUI
return( ERROR );
#else
error( BAD_WILDCARD_FORMAT, src );
#endif /* GUI */
inRange = TRUE;
dest[ destIndex ] = dest[ destIndex - 1 ];
dest[ destIndex - 1 ] = RANGE;
destIndex++;
break;
case ESC:
if( !( ch = src[ srcIndex++ ] ) )
#ifdef GUI
return( ERROR );
#else
error( BAD_WILDCARD_FORMAT, src );
#endif /* GUI */
/* Drop through */
default:
dest[ destIndex++ ] = ch;
inRange = FALSE;
break;
}
if( inRange )
#ifdef GUI
return( ERROR );
#else
error( BAD_WILDCARD_FORMAT, src );
#endif /* GUI */
dest[ destIndex++ ] = ENDRANGE;
break;
case ESC:
if( !( ch = src[ srcIndex++ ] ) )
#ifdef GUI
return( ERROR );
#else
error( BAD_WILDCARD_FORMAT, src );
#endif /* GUI */
/* Drop through */
default:
dest[ destIndex++ ] = ch;
}
dest[ destIndex++ ] = END;
/* Very complex regular expressions can overflow the destination buffer */
if( destIndex >= MATCH_DEST_LEN )
#ifdef GUI
return( ERROR );
#else
error( WILDCARD_TOO_COMPLEX );
#endif /* GUI */
/* Return count of no.of chars in output + null delimiter */
return( destIndex );
}
/****************************************************************************
* *
* Wildcard String Matching Functions *
* *
****************************************************************************/
/* A simple queue, used to handle backtracking. Once the queue is full, we
stop adding items, even if space has been freed up at the front. This
conveniently stops deep recursion for very complex expressions. The queue
size is chosen to be in the vicinity of the maximum filename length we can
expect, to allow for a worst-case scenario of matching "*x" to "xxxxx..." */
#define QUEUE_SIZE 50
static int srcQueue[ QUEUE_SIZE ], destQueue[ QUEUE_SIZE ], queueFront, queueEnd;
/* Match a pattern string with wildcards against a literal string */
BOOLEAN matchString( const char *src, const char *dest, const BOOLEAN hasWildcards )
{
char ch, matchChar;
int srcIndex = 0, destIndex = 0;
BOOLEAN match = TRUE, notFlag = FALSE, passedChar = FALSE;
/* Simple case: If there are no wildcards, just do a straight compare */
if( !hasWildcards )
{
/* Check for special cases of empty string and differing string lengths */
if( strlen( src ) != strlen( dest ) )
return( FALSE );
return( !caseStrcmp( src, dest ) );
}
/* Set up queue for backtracking */
queueFront = queueEnd = 0;
retry:
while( ( ch = src[ srcIndex++ ] ) && match )
switch( ch )
{
case MATCHEND:
return( match ); /* Match all remaining chars */
case MATCHTO:
matchChar = src[ srcIndex ];
while( caseMatch( dest[ destIndex ] ) != matchChar && dest[ destIndex ] )
{
/* We can't do this as part of the main loop since that
wouldn't let us do a match of 0 chars */
destIndex++;
passedChar = TRUE; /* Remember we've matched at least one char */
}
/* See if we exited due to running out of chars to match rather
than an actual match */
if( !dest[ destIndex ] )
match = FALSE;
else
/* Push current state so we can backtrack */
if( queueEnd < QUEUE_SIZE )
{
srcQueue[ queueEnd ] = srcIndex - 1;
destQueue[ queueEnd++ ] = ( passedChar ) ? destIndex : destIndex + 1;
}
passedChar = FALSE;
break;
case MATCHONE:
if( !dest[ destIndex++ ] )
match = FALSE;
break;
case NOTRANGE:
notFlag = TRUE;
/* Drop through */
case RANGE:
/* Note that when checking for a match it is necessary to use
"|=" to indicate a match, since we are not using short
circuit evaluation and thus with "=" a later non-match may
cancel a previous match. Using short-circuit evaluation
give little advantage, since it involves adding gotos to
exit the loop, and yet we must still scan over src[]
anyway to get to the ENDRANGE. Note also that binary
&'s are used in the comparisons to avoid short-circuit
evaluation, since the expressions have side effects */
match = FALSE;
matchChar = caseMatch( dest[ destIndex ] );
destIndex++;
while( ( ch = src[ srcIndex++ ] ) != ENDRANGE )
if( ch == RANGE )
{
/* Joining the following two statements with an '&'
doesn't seem to work */
match |= matchChar >= caseMatch( src[ srcIndex ] );
srcIndex++;
match &= matchChar <= caseMatch( src[ srcIndex ] );
srcIndex++;
}
else
match |= matchChar == caseMatch( ch );
if( notFlag )
match = !match;
notFlag = FALSE;
break;
default:
match = caseMatch( ch ) == caseMatch( dest[ destIndex ] );
destIndex++;
}
/* No match if there are more matchable chars in one of the strings */
if( src[ srcIndex - 1 ] || dest[ destIndex ] )
match = FALSE;
/* See if we can backtrack and try again */
if( !match && ( queueFront != queueEnd ) )
{
srcIndex = srcQueue[ queueFront ];
destIndex = destQueue[ queueFront++ ];
match = TRUE;
goto retry;
}
return( match );
}