home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 22 gnu
/
22-gnu.zip
/
gnurx.zip
/
rx
/
rxanal.c
< prev
next >
Wrap
C/C++ Source or Header
|
1995-12-31
|
18KB
|
780 lines
/***********************************************************
Copyright 1995 by Tom Lord
All Rights Reserved
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the name of the copyright holder not be
used in advertising or publicity pertaining to distribution of the
software without specific, written prior permission.
Tom Lord DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
EVENT SHALL TOM LORD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
******************************************************************/
#include "rxall.h"
#include "rxanal.h"
#include "rxbitset.h"
#ifdef __STDC__
int
rx_posix_analyze_rexp (struct rexp_node *** subexps,
int * n_subexps,
struct rexp_node * node,
int id)
#else
int
rx_posix_analyze_rexp (subexps, n_subexps, node, id)
struct rexp_node *** subexps;
int * n_subexps;
struct rexp_node * node;
int id;
#endif
{
if (node)
{
int this_subexp;
if (node->type == r_parens)
{
this_subexp = *n_subexps;
++*n_subexps;
if (!*subexps)
*subexps = (struct rexp_node **)malloc (sizeof (struct rexp_node *) * *n_subexps);
else
*subexps = (struct rexp_node **)realloc (*subexps,
sizeof (struct rexp_node *) * *n_subexps);
}
if (node->params.pair.left)
id = rx_posix_analyze_rexp (subexps, n_subexps, node->params.pair.left, id);
if (node->params.pair.right)
id = rx_posix_analyze_rexp (subexps, n_subexps, node->params.pair.right, id);
switch (node->type)
{
case r_cset:
node->len = 1;
node->observed = 0;
break;
case r_concat:
case r_alternate:
{
int lob, rob;
int llen, rlen;
lob = (!node->params.pair.left ? 0 : node->params.pair.left->observed);
rob = (!node->params.pair.right ? 0 : node->params.pair.right->observed);
llen = (!node->params.pair.left ? 0 : node->params.pair.left->len);
rlen = (!node->params.pair.right ? 0 : node->params.pair.right->len);
node->len = ((llen >= 0) && (rlen >= 0)
? ((node->type == r_concat)
? llen + rlen
: ((llen == rlen) ? llen : -1))
: -1);
node->observed = lob || rob;
break;
}
case r_opt:
case r_star:
case r_plus:
case r_interval:
node->len = -1;
node->observed = (node->params.pair.left
? node->params.pair.left->observed
: 0);
break;
case r_parens:
node->observed = 1;
node->len = (node->params.pair.left
? node->params.pair.left->len
: 0);
(*subexps)[this_subexp] = node;
break;
case r_context:
switch (node->params.intval)
{
default:
node->observed = 1;
node->len = -1;
break;
case '$':
case '=':
case '<':
case '>':
case 'b':
case 'B':
case '`':
case '\'':
node->observed = 1;
node->len = 0;
break;
}
break;
}
if (node->observed)
node->id = id++;
return id;
}
}
/* Returns 0 unless the pattern can match the empty string. */
#ifdef __STDC__
int
rx_fill_in_fastmap (int cset_size, unsigned char * map, struct rexp_node * exp)
#else
int
rx_fill_in_fastmap (cset_size, map, exp)
int cset_size;
unsigned char * map;
struct rexp_node * exp;
#endif
{
if (!exp)
{
can_match_empty:
{
int x;
for (x = 0; x < cset_size; ++x)
map[x] = 1;
}
return 1;
}
switch (exp->type)
{
case r_cset:
{
int x;
int most;
most = exp->params.cset_size;
for (x = 0; x < most; ++x)
if (RX_bitset_member (exp->params.cset, x))
map[x] = 1;
}
return 0;
case r_concat:
return rx_fill_in_fastmap (cset_size, map, exp->params.pair.left);
/* Why not the right branch? If the left branch
* can't be empty it doesn't matter. If it can, then
* the fastmap is already saturated, and again, the
* right branch doesn't matter.
*/
case r_alternate:
return ( rx_fill_in_fastmap (cset_size, map, exp->params.pair.left)
| rx_fill_in_fastmap (cset_size, map, exp->params.pair.right));
case r_parens:
case r_plus:
return rx_fill_in_fastmap (cset_size, map, exp->params.pair.left);
case r_opt:
case r_star:
goto can_match_empty;
/* Why not the subtree? These operators already saturate
* the fastmap.
*/
case r_interval:
if (exp->params.intval == 0)
goto can_match_empty;
else
return rx_fill_in_fastmap (cset_size, map, exp->params.pair.left);
case r_context:
goto can_match_empty;
}
/* this should never happen but gcc seems to like it */
return 0;
}
#ifdef __STDC__
int
rx_is_anchored_p (struct rexp_node * exp)
#else
int
rx_is_anchored_p (exp)
struct rexp_node * exp;
#endif
{
if (!exp)
return 0;
switch (exp->type)
{
case r_opt:
case r_star:
case r_cset:
return 0;
case r_parens:
case r_plus:
case r_concat:
return rx_is_anchored_p (exp->params.pair.left);
case r_alternate:
return ( rx_is_anchored_p (exp->params.pair.left)
&& rx_is_anchored_p (exp->params.pair.right));
case r_interval:
if (exp->params.intval == 0)
return 0;
else
return rx_is_anchored_p (exp->params.pair.left);
case r_context:
return (exp->params.intval == '^');
}
/* this should never happen but gcc seems to like it */
return 0;
}
#ifdef __STDC__
enum rx_answers
rx_start_superstate (struct rx_classical_system * frame)
#else
enum rx_answers
rx_start_superstate (frame)
struct rx_classical_system * frame;
#endif
{
struct rx_superset * start_contents;
struct rx_nfa_state_set * start_nfa_set;
if (frame->state)
{
rx_unlock_superstate (frame->rx, frame->state);
frame->state = 0;
}
if ( frame->rx->start_set
&& (frame->rx->start_set->starts_for == frame->rx)
&& (frame->rx->start_set->id == frame->rx->rx_id))
start_contents = frame->rx->start_set;
else
{
{
struct rx_possible_future * futures;
futures = rx_state_possible_futures (frame->rx, frame->rx->start_nfa_states);
if (!futures)
return rx_bogus;
if (futures->next)
return rx_start_state_with_too_many_futures;
start_nfa_set = futures->destset;
}
start_contents =
rx_superstate_eclosure_union (frame->rx,
rx_superset_cons (frame->rx, 0, 0),
start_nfa_set);
if (!start_contents)
return rx_bogus;
start_contents->starts_for = frame->rx;
frame->rx->start_set = start_contents;
}
if ( start_contents->superstate
&& (start_contents->superstate->rx_id == frame->rx->rx_id))
{
frame->state = start_contents->superstate;
rx_lock_superstate (frame->rx, frame->state);
return rx_yes;
}
else
{
rx_protect_superset (frame->rx, start_contents);
frame->state = rx_superstate (frame->rx, start_contents);
rx_release_superset (frame->rx, start_contents);
if (!frame->state)
return rx_bogus;
rx_lock_superstate (frame->rx, frame->state);
return rx_yes;
}
}
#ifdef __STDC__
enum rx_answers
rx_match_here_p (struct rx_classical_system * frame,
unsigned char * burst,
int len)
#else
enum rx_answers
rx_match_here_p (frame, burst, len)
struct rx_classical_system * frame;
unsigned char * burst;
int len;
#endif
{
struct rx_inx * inx_table;
if (!frame->state)
return rx_bogus;
if (!len)
{
return (frame->state->contents->is_final
? rx_yes
: rx_maybe);
}
inx_table = frame->state->transitions;
rx_unlock_superstate (frame->rx, frame->state);
while (len--)
{
struct rx_inx * inx;
struct rx_inx * next_table;
inx = inx_table + *burst;
next_table = (struct rx_inx *)inx->data;
while (!next_table)
{
struct rx_superstate * state;
state = ((struct rx_superstate *)
((char *)inx_table
- ((unsigned long)
((struct rx_superstate *)0)->transitions)));
switch ((int)inx->inx)
{
case rx_backtrack:
/* RX_BACKTRACK means that we've reached the empty
* superstate, indicating that match can't succeed
* from this point.
*/
frame->state = 0;
return rx_no;
case rx_cache_miss:
/* Because the superstate NFA is lazily constructed,
* and in fact may erode from underneath us, we sometimes
* have to construct the next instruction from the hard way.
* This invokes one step in the lazy-conversion.
*/
inx =
rx_handle_cache_miss
(frame->rx, state, *burst, inx->data_2);
if (!inx)
{
frame->state = 0;
return rx_bogus;
}
next_table = (struct rx_inx *)inx->data;
continue;
/* No other instructions are legal here.
* (In particular, this function does not handle backtracking
* or the related instructions.)
*/
default:
frame->state = 0;
return rx_bogus;
}
}
if (inx->data_2) /* indicates a final superstate */
{
frame->state = ((struct rx_superstate *)
((char *)inx_table
- ((unsigned long)
((struct rx_superstate *)0)->transitions)));
rx_lock_superstate (frame->rx, frame->state);
return rx_yes;
}
inx_table = next_table;
++burst;
}
frame->state = ((struct rx_superstate *)
((char *)inx_table
- ((unsigned long)
((struct rx_superstate *)0)->transitions)));
rx_lock_superstate (frame->rx, frame->state);
return rx_maybe;
}
#ifdef __STDC__
enum rx_answers
rx_fit_p (struct rx_classical_system * frame,
unsigned char * burst,
int len)
#else
enum rx_answers
rx_fit_p (frame, burst, len)
struct rx_classical_system * frame;
unsigned char * burst;
int len;
#endif
{
struct rx_inx * inx_table;
struct rx_inx * inx;
if (!frame->state)
return rx_bogus;
if (!len)
{
return (frame->state->contents->is_final
? rx_yes
: rx_maybe);
}
inx_table = frame->state->transitions;
rx_unlock_superstate (frame->rx, frame->state);
while (len--)
{
struct rx_inx * next_table;
inx = inx_table + *burst;
next_table = (struct rx_inx *)inx->data;
while (!next_table)
{
struct rx_superstate * state;
state = ((struct rx_superstate *)
((char *)inx_table
- ((unsigned long)
((struct rx_superstate *)0)->transitions)));
switch ((int)inx->inx)
{
case rx_backtrack:
/* RX_BACKTRACK means that we've reached the empty
* superstate, indicating that match can't succeed
* from this point.
*/
frame->state = 0;
return rx_no;
case rx_cache_miss:
/* Because the superstate NFA is lazily constructed,
* and in fact may erode from underneath us, we sometimes
* have to construct the next instruction from the hard way.
* This invokes one step in the lazy-conversion.
*/
inx =
rx_handle_cache_miss
(frame->rx, state, *burst, inx->data_2);
if (!inx)
{
frame->state = 0;
return rx_bogus;
}
next_table = (struct rx_inx *)inx->data;
continue;
/* No other instructions are legal here.
* (In particular, this function does not handle backtracking
* or the related instructions.)
*/
default:
frame->state = 0;
return rx_bogus;
}
}
inx_table = next_table;
++burst;
}
if (inx->data_2) /* indicates a final superstate */
{
frame->state = ((struct rx_superstate *)
((char *)inx_table
- ((unsigned long)
((struct rx_superstate *)0)->transitions)));
rx_lock_superstate (frame->rx, frame->state);
return rx_yes;
}
frame->state = ((struct rx_superstate *)
((char *)inx_table
- ((unsigned long)
((struct rx_superstate *)0)->transitions)));
rx_lock_superstate (frame->rx, frame->state);
return rx_maybe;
}
#ifdef __STDC__
enum rx_answers
rx_longest (int * last_matched,
int base_addr,
struct rx_classical_system * frame,
unsigned char * burst,
int len)
#else
enum rx_answers
rx_longest (last_matched, base_addr, frame, burst, len)
int * last_matched;
int base_addr;
struct rx_classical_system * frame;
unsigned char * burst;
int len;
#endif
{
struct rx_inx * inx_table;
int len_in;
if (!frame->state)
return rx_bogus;
if (!len)
{
if (frame->state->contents->is_final)
{
*last_matched = base_addr;
return rx_yes;
}
else
{
return rx_maybe;
}
}
len_in = len;
inx_table = frame->state->transitions;
rx_unlock_superstate (frame->rx, frame->state);
while (len--)
{
struct rx_inx * inx;
struct rx_inx * next_table;
inx = inx_table + *burst;
next_table = (struct rx_inx *)inx->data;
while (!next_table)
{
struct rx_superstate * state;
state = ((struct rx_superstate *)
((char *)inx_table
- ((unsigned long)
((struct rx_superstate *)0)->transitions)));
switch ((int)inx->inx)
{
case rx_backtrack:
/* RX_BACKTRACK means that we've reached the empty
* superstate, indicating that match can't succeed
* from this point.
*/
frame->state = 0;
return rx_no;
case rx_cache_miss:
/* Because the superstate NFA is lazily constructed,
* and in fact may erode from underneath us, we sometimes
* have to construct the next instruction from the hard way.
* This invokes one step in the lazy-conversion.
*/
inx =
rx_handle_cache_miss
(frame->rx, state, *burst, inx->data_2);
if (!inx)
{
frame->state = 0;
return rx_bogus;
}
next_table = (struct rx_inx *)inx->data;
continue;
/* No other instructions are legal here.
* (In particular, this function does not handle backtracking
* or the related instructions.)
*/
default:
frame->state = 0;
return rx_bogus;
}
}
if (inx->data_2) /* indicates a final superstate */
*last_matched = base_addr + len_in - len;
inx_table = next_table;
++burst;
}
frame->state = ((struct rx_superstate *)
((char *)inx_table
- ((unsigned long)
((struct rx_superstate *)0)->transitions)));
rx_lock_superstate (frame->rx, frame->state);
return rx_maybe;
}
#ifdef __STDC__
enum rx_answers
rx_advance (struct rx_classical_system * frame,
unsigned char * burst, int len)
#else
enum rx_answers
rx_advance (frame, burst, len)
struct rx_classical_system * frame;
unsigned char * burst;
int len;
#endif
{
struct rx_inx * inx_table;
int len_in;
if (!frame->state)
return rx_bogus;
if (!len)
return rx_yes;
inx_table = frame->state->transitions;
rx_unlock_superstate (frame->rx, frame->state);
while (len--)
{
struct rx_inx * inx;
struct rx_inx * next_table;
inx = inx_table + *burst;
next_table = (struct rx_inx *)inx->data;
while (!next_table)
{
struct rx_superstate * state;
state = ((struct rx_superstate *)
((char *)inx_table
- ((unsigned long)
((struct rx_superstate *)0)->transitions)));
switch ((int)inx->inx)
{
case rx_backtrack:
/* RX_BACKTRACK means that we've reached the empty
* superstate, indicating that match can't succeed
* from this point.
*/
frame->state = 0;
return rx_no;
case rx_cache_miss:
/* Because the superstate NFA is lazily constructed,
* and in fact may erode from underneath us, we sometimes
* have to construct the next instruction from the hard way.
* This invokes one step in the lazy-conversion.
*/
inx =
rx_handle_cache_miss
(frame->rx, state, *burst, inx->data_2);
if (!inx)
{
frame->state = 0;
return rx_bogus;
}
next_table = (struct rx_inx *)inx->data;
continue;
/* No other instructions are legal here.
* (In particular, this function does not handle backtracking
* or the related instructions.)
*/
default:
frame->state = 0;
return rx_bogus;
}
}
inx_table = next_table;
++burst;
}
frame->state = ((struct rx_superstate *)
((char *)inx_table
- ((unsigned long)
((struct rx_superstate *)0)->transitions)));
rx_lock_superstate (frame->rx, frame->state);
return rx_yes;
}
#ifdef __STDC__
void
rx_terminate_system (struct rx_classical_system * frame)
#else
void
rx_terminate_system (frame)
struct rx_classical_system * frame;
#endif
{
if (frame->state)
{
rx_unlock_superstate (frame->rx, frame->state);
frame->state = 0;
}
}
#ifdef __STDC__
void
rx_init_system (struct rx_classical_system * frame, struct rx * rx)
#else
void
rx_init_system (frame, rx)
struct rx_classical_system * frame;
struct rx * rx;
#endif
{
frame->rx = rx;
frame->state = 0;
}