home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 5 Edit
/
05-Edit.zip
/
me34src.zip
/
me3
/
util
/
wildmat.c
< prev
Wrap
C/C++ Source or Header
|
1995-01-14
|
5KB
|
212 lines
/*
** Do shell-style pattern matching for ?, \, [], and * characters.
** Might not be robust in face of malformed patterns; e.g., "foo[a-"
** could cause a segmentation violation. It is 8bit clean.
**
** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
** Rich $alz is now <rsalz@bbn.com>.
** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
** This can greatly speed up failing wildcard patterns. For example:
** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without
** the ABORT, then it takes 22310 calls to fail. Ugh.
**
** C Durland added:
** MS-DOS uses \ for path, so do nothing for \ in that case.
** Put man page at the end of this file.
*/
#include "os.h" /* only used for MSDOZ, ATARI --CD */
#define TRUE 1
#define FALSE 0
#define ABORT -1
#define NEGATE_CLASS '^'
/* Forward declaration. */
static int DoMatch();
/*
** See if the text matches the p, which has an implied leading asterisk.
*/
static int
Star(text, p)
register char *text;
register char *p;
{
register int ret;
do
ret = DoMatch(text++, p);
while (ret == FALSE);
return ret;
}
/*
** Match text and p, return TRUE, FALSE, or ABORT.
*/
static int
DoMatch(text, p)
register char *text;
register char *p;
{
register int last;
register int matched;
register int reverse;
for ( ; *p; text++, p++) {
if (*text == '\0' && *p != '*')
return ABORT;
switch (*p) {
#if !(MSDOZ || ATARI) /* blame those bozos at Microsoft */
case '\\':
/* Literal match with following character. */
p++;
/* FALLTHROUGH */
#endif
default:
if (*text != *p)
return FALSE;
continue;
case '?':
/* Match anything. */
continue;
case '*':
/* Trailing star matches everything. */
return *++p ? Star(text, p) : TRUE;
case '[':
if (reverse = p[1] == NEGATE_CLASS)
/* Inverted character class. */
p++;
for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p)
/* This next line requires a good C compiler. */
if (*p == '-' ? *text <= *++p && *text >= last : *text == *p)
matched = TRUE;
if (matched == reverse)
return FALSE;
continue;
}
}
return *text == '\0';
}
/*
** User-level routine. Returns TRUE or FALSE.
*/
int
wildmat(text, p)
char *text;
char *p;
{
return DoMatch(text, p) == TRUE;
}
#ifdef TEST
#include <stdio.h>
/* Yes, we use gets not fgets. Sue me. */
extern char *gets();
main()
{
char p[80];
char text[80];
printf("Wildmat tester. Enter pattern, then strings to test.\n");
printf("A blank line gets prompts for a new pattern; a blank pattern\n");
printf("exits the program.\n\n");
for ( ; ; ) {
printf("Enter pattern: ");
(void)fflush(stdout);
if (gets(pattern) == NULL || pattern[0] == '\n')
break;
for ( ; ; ) {
printf("Enter text: ");
(void)fflush(stdout);
if (gets(text) == NULL)
exit(0);
if (text[0] == '\0')
/* Blank line; go back and get a new pattern. */
break;
printf(" %s\n", wildmat(text, pattern) ? "YES" : "NO");
}
}
exit(0);
/* NOTREACHED */
}
#endif /* TEST */
#if 0 /* the man page */
WILDMAT(3) WILDMAT(3)
NAME
wildmat - perform shell-style wildcard matching
SYNOPSIS
int
wildmat(text, pattern)
char *text;
char *pattern;
DESCRIPTION
Wildmat compares the text against the pattern and returns non-zero if
the pattern matches the text. The pattern is interpreted similar to
shell filename wildcards, and not as a full regular expression such as
those handled by the grep(1) family of programs or the regex(3) or
regexp(3) set of routines.
The pattern is interpreted according to the following rules:
\x Turns off the special meaning of x and matches it directly; this
is used mostly before a question mark or asterisk, and is not
valid inside square brackets.
? Matches any single character.
* Matches any sequence of zero or more characters.
[x...y]
Matches any single character specified by the set x...y, where
any character other than minus sign or close bracket may appear
in the set. A minus sign may be used to indicate a range of
characters. That is, [0-5abc] is a shorthand for [012345abc].
More than one range may appear inside a character set; [0-9a-zA-
Z._] matches almost all of the legal characters for a host name.
[^x...y]
This matches any character not in the set x...y, which is
interpreted as described above.
BUGS
There is no way to specify a minus sign in a character range.
HISTORY
Written by Rich $alz <rsalz@bbn.com> in 1986, and posted to Usenet
several times since then, most notably in comp.sources.misc in March,
1991.
Lars Mathiesen <thorinn@diku.dk> enhanced the multi-asterisk failure
mode in early 1991.
SEE ALSO
grep(1), regex(3), regexp(3).
- 1 - Formatted: January 17, 1992
#endif