home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 22 gnu
/
22-gnu.zip
/
gnurx.zip
/
rx
/
rxposix.c
< prev
next >
Wrap
C/C++ Source or Header
|
1995-12-31
|
10KB
|
419 lines
/***********************************************************
Copyright 1995 by Tom Lord
All Rights Reserved
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the name of the copyright holder not be
used in advertising or publicity pertaining to distribution of the
software without specific, written prior permission.
Tom Lord DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
EVENT SHALL TOM LORD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
******************************************************************/
#include "rxall.h"
#include "rxposix.h"
#include "rxgnucomp.h"
#include "rxbasic.h"
#include "rxsimp.h"
/* regcomp takes a regular expression as a string and compiles it.
*
* PATTERN is the address of the pattern string.
*
* CFLAGS is a series of bits which affect compilation.
*
* If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
* use POSIX basic syntax.
*
* If REG_NEWLINE is set, then . and [^...] don't match newline.
* Also, regexec will try a match beginning after every newline.
*
* If REG_ICASE is set, then we considers upper- and lowercase
* versions of letters to be equivalent when matching.
*
* If REG_NOSUB is set, then when PREG is passed to regexec, that
* routine will report only success or failure, and nothing about the
* registers.
*
* It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
* the return codes and their meanings.)
*/
#ifdef __STDC__
int
regcomp (regex_t * preg, const char * pattern, int cflags)
#else
int
regcomp (preg, pattern, cflags)
regex_t * preg;
const char * pattern;
int cflags;
#endif
{
reg_errcode_t ret;
unsigned int syntax;
unsigned char xlate[256];
bzero (preg, sizeof (*preg));
syntax = ((cflags & REG_EXTENDED)
? RE_SYNTAX_POSIX_EXTENDED
: RE_SYNTAX_POSIX_BASIC);
if (!(cflags & REG_ICASE))
preg->translate = 0;
else
{
unsigned i;
preg->translate = (unsigned char *) malloc (256);
if (!preg->translate)
return (int) REG_ESPACE;
/* Map uppercase characters to corresponding lowercase ones. */
for (i = 0; i < CHAR_SET_SIZE; i++)
preg->translate[i] = isupper (i) ? tolower (i) : i;
}
/* If REG_NEWLINE is set, newlines are treated differently. */
if (!(cflags & REG_NEWLINE))
preg->newline_anchor = 0;
else
{
/* REG_NEWLINE implies neither . nor [^...] match newline. */
syntax &= ~RE_DOT_NEWLINE;
syntax |= RE_HAT_LISTS_NOT_NEWLINE;
/* It also changes the matching behavior. */
preg->newline_anchor = 1;
}
preg->no_sub = !!(cflags & REG_NOSUB);
/* POSIX says a null character in the pattern terminates it, so we
* can use strlen here in compiling the pattern.
*/
ret = rx_parse (&preg->pattern,
pattern, strlen (pattern),
syntax,
256,
preg->translate);
/* POSIX doesn't distinguish between an unmatched open-group and an
* unmatched close-group: both are REG_EPAREN.
*/
if (ret == REG_ERPAREN)
ret = REG_EPAREN;
if (!ret)
{
preg->n_subexps = 1;
preg->subexps = 0;
rx_posix_analyze_rexp (&preg->subexps,
&preg->n_subexps,
preg->pattern,
0);
rx_fill_in_fastmap (256,
preg->fastmap,
preg->pattern);
preg->is_anchored = rx_is_anchored_p (preg->pattern);
}
return (int) ret;
}
/* Returns a message corresponding to an error code, ERRCODE, returned
from either regcomp or regexec. */
#ifdef __STDC__
size_t
regerror (int errcode, const regex_t *preg,
char *errbuf, size_t errbuf_size)
#else
size_t
regerror (errcode, preg, errbuf, errbuf_size)
int errcode;
const regex_t *preg;
char *errbuf;
size_t errbuf_size;
#endif
{
const char *msg;
size_t msg_size;
msg = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
msg_size = strlen (msg) + 1; /* Includes the 0. */
if (errbuf_size != 0)
{
if (msg_size > errbuf_size)
{
strncpy (errbuf, msg, errbuf_size - 1);
errbuf[errbuf_size - 1] = 0;
}
else
strcpy (errbuf, msg);
}
return msg_size;
}
#ifdef __STDC__
int
rx_regmatch (regmatch_t pmatch[],
regex_t *preg,
struct rx_context_rules * rules,
int start, int end, char *string)
#else
int
rx_regmatch (pmatch, preg, rules, start, end, string)
regmatch_t pmatch[];
regex_t *preg;
struct rx_context_rules * rules;
int start;
int end;
char *string;
#endif
{
struct rx_solutions * solutions;
enum rx_answers answer;
struct rx_context_rules local_rules;
int orig_end;
local_rules = *rules;
orig_end = end;
while (end >= start)
{
local_rules.not_eol = (rules->not_eol
? ( (end == orig_end)
|| !local_rules.newline_anchor
|| (string[end] != '\n'))
: ( (end != orig_end)
&& (!local_rules.newline_anchor
|| (string[end] != '\n'))));
solutions = rx_basic_make_solutions (pmatch, preg->pattern, preg->subexps,
start, end, &local_rules, string);
if (!solutions)
return REG_ESPACE;
do
{
answer = rx_next_solution (solutions);
}
while (answer == rx_maybe);
rx_basic_free_solutions (solutions);
if (answer == rx_yes)
{
if (pmatch)
{
pmatch[0].rm_so = start;
pmatch[0].rm_eo = end;
}
return 0;
}
--end;
}
switch (answer)
{
default:
case rx_bogus:
return REG_ESPACE;
case rx_no:
case rx_maybe:
return REG_NOMATCH;
}
}
#ifdef __STDC__
int
rx_regexec (regmatch_t pmatch[],
regex_t *preg,
struct rx_context_rules * rules,
int start,
int end,
char *string)
#else
int
rx_regexec (pmatch, preg, rules, start, end, string)
regmatch_t pmatch[];
regex_t *preg;
struct rx_context_rules * rules;
int start;
int end;
char *string;
#endif
{
int x;
int stat;
int anchored;
anchored = preg->is_anchored;
for (x = start; x <= end; ++x)
{
if (preg->fastmap[((unsigned char *)string)[x]])
{
stat = rx_regmatch (pmatch, preg, rules, x, end, string);
if (!stat || (stat != REG_NOMATCH))
return stat;
}
if (anchored)
return REG_NOMATCH;
}
return REG_NOMATCH;
}
/* regexec searches for a given pattern, specified by PREG, in the
* string STRING.
*
* If NMATCH is zero or REG_NOSUB was set in the cflags argument to
* `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
* least NMATCH elements, and we set them to the offsets of the
* corresponding matched substrings.
*
* EFLAGS specifies `execution flags' which affect matching: if
* REG_NOTBOL is set, then ^ does not match at the beginning of the
* string; if REG_NOTEOL is set, then $ does not match at the end.
*
* We return 0 if we find a match and REG_NOMATCH if not.
*/
#ifdef __STDC__
int
regnexec (regex_t *preg, int len, char *string,
size_t nmatch, regmatch_t pmatch[],
int eflags)
#else
int
regnexec (preg, len, string, nmatch, pmatch, eflags)
regex_t *preg;
int len;
char *string;
size_t nmatch;
regmatch_t pmatch[];
int eflags;
#endif
{
int want_reg_info;
struct rx_context_rules rules;
regmatch_t * regs;
int nregs;
int stat;
want_reg_info = (!preg->no_sub && (nmatch > 0));
rules.newline_anchor = preg->newline_anchor;
rules.not_bol = !!(eflags & REG_NOTBOL);
rules.not_eol = !!(eflags & REG_NOTEOL);
if (nmatch >= preg->n_subexps)
{
regs = pmatch;
nregs = nmatch;
}
else
{
regs = (regmatch_t *)malloc (preg->n_subexps * sizeof (*regs));
nregs = preg->n_subexps;
}
{
int x;
for (x = 0; x < nregs; ++x)
regs[x].rm_so = regs[x].rm_eo = -1;
}
stat = rx_regexec (pmatch, preg, &rules, 0, len, string);
if (!stat && want_reg_info && (regs != pmatch))
{
int x;
for (x = 0; x < nmatch; ++x)
pmatch[x] = regs[x];
free (regs);
}
return ((stat == rx_yes)
? 0
: REG_NOMATCH);
}
#ifdef __STDC__
int
regexec (regex_t *preg, char *string,
size_t nmatch, regmatch_t pmatch[],
int eflags)
#else
int
regexec (preg, string, nmatch, pmatch, eflags)
regex_t *preg;
char *string;
size_t nmatch;
regmatch_t pmatch[];
int eflags;
#endif
{
return regnexec (preg,
strlen (string),
string,
nmatch,
pmatch,
eflags);
}
/* Free dynamically allocated space used by PREG. */
#ifdef __STDC__
void
regfree (regex_t *preg)
#else
void
regfree (preg)
regex_t *preg;
#endif
{
if (preg->pattern)
{
rx_free_rexp (preg->pattern);
preg->pattern = 0;
}
if (preg->subexps)
{
free (preg->subexps);
preg->subexps = 0;
}
if (preg->translate != 0)
{
free (preg->translate);
preg->translate = 0;
}
}