home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 22 gnu
/
22-gnu.zip
/
gnurx.zip
/
rx
/
rxnfa.c
< prev
next >
Wrap
C/C++ Source or Header
|
1995-12-31
|
18KB
|
814 lines
/***********************************************************
Copyright 1995 by Tom Lord
All Rights Reserved
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the name of the copyright holder not be
used in advertising or publicity pertaining to distribution of the
software without specific, written prior permission.
Tom Lord DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
EVENT SHALL TOM LORD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
******************************************************************/
/*
* Tom Lord (lord@cygnus.com, lord@gnu.ai.mit.edu)
*/
#include "rxall.h"
#include "rxnfa.h"
#ifndef realloc
extern char * realloc ();
#endif
#ifndef malloc
extern char * malloc ();
#endif
#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
/* {Low Level Data Structure Code}
*/
/* Constructs a new nfa node. */
#ifdef __STDC__
struct rx_nfa_state *
rx_nfa_state (struct rx *rx)
#else
struct rx_nfa_state *
rx_nfa_state (rx)
struct rx *rx;
#endif
{
struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
if (!n)
return 0;
bzero (n, sizeof (*n));
n->next = rx->nfa_states;
rx->nfa_states = n;
return n;
}
#ifdef __STDC__
static void
rx_free_nfa_state (struct rx_nfa_state * n)
#else
static void
rx_free_nfa_state (n)
struct rx_nfa_state * n;
#endif
{
free ((char *)n);
}
/* This adds an edge between two nodes, but doesn't initialize the
* edge label.
*/
#ifdef __STDC__
struct rx_nfa_edge *
rx_nfa_edge (struct rx *rx,
enum rx_nfa_etype type,
struct rx_nfa_state *start,
struct rx_nfa_state *dest)
#else
struct rx_nfa_edge *
rx_nfa_edge (rx, type, start, dest)
struct rx *rx;
enum rx_nfa_etype type;
struct rx_nfa_state *start;
struct rx_nfa_state *dest;
#endif
{
struct rx_nfa_edge *e;
e = (struct rx_nfa_edge *)malloc (sizeof (*e));
if (!e)
return 0;
e->next = start->edges;
start->edges = e;
e->type = type;
e->dest = dest;
return e;
}
#ifdef __STDC__
static void
rx_free_nfa_edge (struct rx_nfa_edge * e)
#else
static void
rx_free_nfa_edge (e)
struct rx_nfa_edge * e;
#endif
{
free ((char *)e);
}
/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
* of an NFA. These are added to an nfa automaticly by eclose_nfa.
*/
#ifdef __STDC__
static struct rx_possible_future *
rx_possible_future (struct rx * rx,
struct rx_se_list * effects)
#else
static struct rx_possible_future *
rx_possible_future (rx, effects)
struct rx * rx;
struct rx_se_list * effects;
#endif
{
struct rx_possible_future *ec;
ec = (struct rx_possible_future *) malloc (sizeof (*ec));
if (!ec)
return 0;
ec->destset = 0;
ec->next = 0;
ec->effects = effects;
return ec;
}
#ifdef __STDC__
static void
rx_free_possible_future (struct rx_possible_future * pf)
#else
static void
rx_free_possible_future (pf)
struct rx_possible_future * pf;
#endif
{
free ((char *)pf);
}
#ifdef __STDC__
static void
rx_free_nfa_graph (struct rx *rx)
#else
static void
rx_free_nfa_graph (rx)
struct rx *rx;
#endif
{
while (rx->nfa_states)
{
while (rx->nfa_states->edges)
{
switch (rx->nfa_states->edges->type)
{
case ne_cset:
rx_free_cset (rx->nfa_states->edges->params.cset);
break;
default:
break;
}
{
struct rx_nfa_edge * e;
e = rx->nfa_states->edges;
rx->nfa_states->edges = rx->nfa_states->edges->next;
rx_free_nfa_edge (e);
}
} /* while (rx->nfa_states->edges) */
{
/* Iterate over the partial epsilon closures of rx->nfa_states */
struct rx_possible_future * pf = rx->nfa_states->futures;
while (pf)
{
struct rx_possible_future * pft = pf;
pf = pf->next;
rx_free_possible_future (pft);
}
}
{
struct rx_nfa_state *n;
n = rx->nfa_states;
rx->nfa_states = rx->nfa_states->next;
rx_free_nfa_state (n);
}
}
}
/* {Translating a Syntax Tree into an NFA}
*
*/
/* This is the Thompson regexp->nfa algorithm.
* It is modified to allow for `side-effect epsilons.' Those are
* edges that are taken whenever a similarly placed epsilon edge
* would be, but which also imply that some side effect occurs
* when the edge is taken.
*
* Side effects are used to model parts of the pattern langauge
* that are not regular.
*/
#ifdef __STDC__
int
rx_build_nfa (struct rx *rx,
struct rexp_node *rexp,
struct rx_nfa_state **start,
struct rx_nfa_state **end)
#else
int
rx_build_nfa (rx, rexp, start, end)
struct rx *rx;
struct rexp_node *rexp;
struct rx_nfa_state **start;
struct rx_nfa_state **end;
#endif
{
struct rx_nfa_edge *edge;
/* Start & end nodes may have been allocated by the caller. */
*start = *start ? *start : rx_nfa_state (rx);
if (!*start)
return 0;
if (!rexp)
{
*end = *start;
return 1;
}
*end = *end ? *end : rx_nfa_state (rx);
if (!*end)
{
rx_free_nfa_state (*start);
return 0;
}
switch (rexp->type)
{
case r_cset:
edge = rx_nfa_edge (rx, ne_cset, *start, *end);
if (!edge)
return 0;
edge->params.cset = rx_copy_cset (rx->local_cset_size,
rexp->params.cset);
if (!edge->params.cset)
{
rx_free_nfa_edge (edge);
return 0;
}
return 1;
case r_opt:
return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
&& rx_nfa_edge (rx, ne_epsilon, *start, *end));
case r_plus:
{
struct rx_nfa_state * star_start = 0;
struct rx_nfa_state * star_end = 0;
struct rx_nfa_state * shared;
shared = 0;
if (!rx_build_nfa (rx, rexp->params.pair.left, start, &shared))
return 0;
return (rx_build_nfa (rx, rexp->params.pair.left,
&star_start, &star_end)
&& star_start
&& star_end
&& rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
&& rx_nfa_edge (rx, ne_epsilon, shared, star_start)
&& rx_nfa_edge (rx, ne_epsilon, star_end, *end)
&& rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
}
case r_interval:
case r_star:
{
struct rx_nfa_state * star_start = 0;
struct rx_nfa_state * star_end = 0;
return (rx_build_nfa (rx, rexp->params.pair.left,
&star_start, &star_end)
&& star_start
&& star_end
&& rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
&& rx_nfa_edge (rx, ne_epsilon, *start, star_start)
&& rx_nfa_edge (rx, ne_epsilon, star_end, *end)
&& rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
}
case r_parens:
return rx_build_nfa (rx, rexp->params.pair.left, start, end);
case r_concat:
{
struct rx_nfa_state *shared = 0;
return
(rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
&& rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
}
case r_alternate:
{
struct rx_nfa_state *ls = 0;
struct rx_nfa_state *le = 0;
struct rx_nfa_state *rs = 0;
struct rx_nfa_state *re = 0;
return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
&& rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
&& rx_nfa_edge (rx, ne_epsilon, *start, ls)
&& rx_nfa_edge (rx, ne_epsilon, *start, rs)
&& rx_nfa_edge (rx, ne_epsilon, le, *end)
&& rx_nfa_edge (rx, ne_epsilon, re, *end));
}
case r_context:
edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
if (!edge)
return 0;
edge->params.side_effect = (void *)rexp->params.intval;
return 1;
}
/* this should never happen */
return 0;
}
/* {Low Level Data Structures for the Static NFA->SuperNFA Analysis}
*
* There are side effect lists -- lists of side effects occuring
* along an uninterrupted, acyclic path of side-effect epsilon edges.
* Such paths are collapsed to single edges in the course of computing
* epsilon closures. The resulting single edges are labled with a list
* of all the side effects from the original multi-edge path. Equivalent
* lists of side effects are made == by the constructors below.
*
* There are also nfa state sets. These are used to hold a list of all
* states reachable from a starting state for a given type of transition
* and side effect list. These are also hash-consed.
*/
/* The next several functions compare, construct, etc. lists of side
* effects. See ECLOSE_NFA (below) for details.
*/
/* Ordering of rx_se_list
* (-1, 0, 1 return value convention).
*/
#ifdef __STDC__
static int
se_list_cmp (void * va, void * vb)
#else
static int
se_list_cmp (va, vb)
void * va;
void * vb;
#endif
{
struct rx_se_list * a = (struct rx_se_list *)va;
struct rx_se_list * b = (struct rx_se_list *)vb;
return ((va == vb)
? 0
: (!va
? -1
: (!vb
? 1
: ((long)a->car < (long)b->car
? 1
: ((long)a->car > (long)b->car
? -1
: se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
}
#ifdef __STDC__
static int
se_list_equal (void * va, void * vb)
#else
static int
se_list_equal (va, vb)
void * va;
void * vb;
#endif
{
return !(se_list_cmp (va, vb));
}
static struct rx_hash_rules se_list_hash_rules = { se_list_equal, 0, 0, 0, 0 };
#ifdef __STDC__
static struct rx_se_list *
side_effect_cons (struct rx * rx,
void * se, struct rx_se_list * list)
#else
static struct rx_se_list *
side_effect_cons (rx, se, list)
struct rx * rx;
void * se;
struct rx_se_list * list;
#endif
{
struct rx_se_list * l;
l = ((struct rx_se_list *) malloc (sizeof (*l)));
if (!l)
return 0;
l->car = se;
l->cdr = list;
return l;
}
#ifdef __STDC__
static struct rx_se_list *
hash_cons_se_prog (struct rx * rx,
struct rx_hash * memo,
void * car, struct rx_se_list * cdr)
#else
static struct rx_se_list *
hash_cons_se_prog (rx, memo, car, cdr)
struct rx * rx;
struct rx_hash * memo;
void * car;
struct rx_se_list * cdr;
#endif
{
long hash = (long)car ^ (long)cdr;
struct rx_se_list template;
template.car = car;
template.cdr = cdr;
{
struct rx_hash_item * it = rx_hash_store (memo, hash,
(void *)&template,
&se_list_hash_rules);
if (!it)
return 0;
if (it->data == (void *)&template)
{
struct rx_se_list * consed;
consed = (struct rx_se_list *) malloc (sizeof (*consed));
*consed = template;
it->data = (void *)consed;
}
return (struct rx_se_list *)it->data;
}
}
#ifdef __STDC__
static struct rx_se_list *
hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
#else
static struct rx_se_list *
hash_se_prog (rx, memo, prog)
struct rx * rx;
struct rx_hash * memo;
struct rx_se_list * prog;
#endif
{
struct rx_se_list * answer = 0;
while (prog)
{
answer = hash_cons_se_prog (rx, memo, prog->car, answer);
if (!answer)
return 0;
prog = prog->cdr;
}
return answer;
}
/* {Constructors, etc. for NFA State Sets}
*/
#ifdef __STDC__
static int
nfa_set_cmp (void * va, void * vb)
#else
static int
nfa_set_cmp (va, vb)
void * va;
void * vb;
#endif
{
struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
return ((va == vb)
? 0
: (!va
? -1
: (!vb
? 1
: (a->car->id < b->car->id
? 1
: (a->car->id > b->car->id
? -1
: nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
}
#ifdef __STDC__
static int
nfa_set_equal (void * va, void * vb)
#else
static int
nfa_set_equal (va, vb)
void * va;
void * vb;
#endif
{
return !nfa_set_cmp (va, vb);
}
static struct rx_hash_rules nfa_set_hash_rules = { nfa_set_equal, 0, 0, 0, 0 };
#ifdef __STDC__
static struct rx_nfa_state_set *
nfa_set_cons (struct rx * rx,
struct rx_hash * memo, struct rx_nfa_state * state,
struct rx_nfa_state_set * set)
#else
static struct rx_nfa_state_set *
nfa_set_cons (rx, memo, state, set)
struct rx * rx;
struct rx_hash * memo;
struct rx_nfa_state * state;
struct rx_nfa_state_set * set;
#endif
{
struct rx_nfa_state_set template;
struct rx_hash_item * node;
template.car = state;
template.cdr = set;
node = rx_hash_store (memo,
(((long)state) >> 8) ^ (long)set,
&template, &nfa_set_hash_rules);
if (!node)
return 0;
if (node->data == &template)
{
struct rx_nfa_state_set * l;
l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
node->data = (void *) l;
if (!l)
return 0;
*l = template;
}
return (struct rx_nfa_state_set *)node->data;
}
#ifdef __STDC__
static struct rx_nfa_state_set *
nfa_set_enjoin (struct rx * rx,
struct rx_hash * memo, struct rx_nfa_state * state,
struct rx_nfa_state_set * set)
#else
static struct rx_nfa_state_set *
nfa_set_enjoin (rx, memo, state, set)
struct rx * rx;
struct rx_hash * memo;
struct rx_nfa_state * state;
struct rx_nfa_state_set * set;
#endif
{
if (!set || state->id < set->car->id)
return nfa_set_cons (rx, memo, state, set);
if (state->id == set->car->id)
return set;
else
{
struct rx_nfa_state_set * newcdr
= nfa_set_enjoin (rx, memo, state, set->cdr);
if (newcdr != set->cdr)
set = nfa_set_cons (rx, memo, set->car, newcdr);
return set;
}
}
/* {Computing Epsilon/Side-effect Closures.}
*/
struct eclose_frame
{
struct rx_se_list *prog_backwards;
};
/* This is called while computing closures for "outnode".
* The current node in the traversal is "node".
* "frame" contains a list of a all side effects between
* "outnode" and "node" from most to least recent.
*
* Explores forward from "node", adding new possible
* futures to outnode.
*
* Returns 0 on allocation failure.
*/
#ifdef __STDC__
static int
eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
struct rx_nfa_state *node, struct eclose_frame *frame)
#else
static int
eclose_node (rx, outnode, node, frame)
struct rx *rx;
struct rx_nfa_state *outnode;
struct rx_nfa_state *node;
struct eclose_frame *frame;
#endif
{
struct rx_nfa_edge *e = node->edges;
/* For each node, we follow all epsilon paths to build the closure.
* The closure omits nodes that have only epsilon edges.
* The closure is split into partial closures -- all the states in
* a partial closure are reached by crossing the same list of
* of side effects (though not necessarily the same path).
*/
if (node->mark)
return 1;
node->mark = 1;
/* If "node" has more than just epsilon and
* and side-effect transitions (id >= 0), or is final,
* then it has to be added to the possible futures
* of "outnode".
*/
if (node->id >= 0 || node->is_final)
{
struct rx_possible_future **ec;
struct rx_se_list * prog_in_order;
int cmp;
prog_in_order = ((struct rx_se_list *)hash_se_prog (rx,
&rx->se_list_memo,
frame->prog_backwards));
ec = &outnode->futures;
while (*ec)
{
cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
if (cmp <= 0)
break;
ec = &(*ec)->next;
}
if (!*ec || (cmp < 0))
{
struct rx_possible_future * pf;
pf = rx_possible_future (rx, prog_in_order);
if (!pf)
return 0;
pf->next = *ec;
*ec = pf;
}
if (node->id >= 0)
{
(*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
node, (*ec)->destset);
if (!(*ec)->destset)
return 0;
}
}
/* Recurse on outgoing epsilon and side effect nodes.
*/
while (e)
{
switch (e->type)
{
case ne_epsilon:
if (!eclose_node (rx, outnode, e->dest, frame))
return 0;
break;
case ne_side_effect:
{
frame->prog_backwards = side_effect_cons (rx,
e->params.side_effect,
frame->prog_backwards);
if (!frame->prog_backwards)
return 0;
if (!eclose_node (rx, outnode, e->dest, frame))
return 0;
{
struct rx_se_list * dying = frame->prog_backwards;
frame->prog_backwards = frame->prog_backwards->cdr;
free ((char *)dying);
}
break;
}
default:
break;
}
e = e->next;
}
node->mark = 0;
return 1;
}
#ifdef __STDC__
struct rx_possible_future *
rx_state_possible_futures (struct rx * rx, struct rx_nfa_state * n)
#else
struct rx_possible_future *
rx_state_possible_futures (rx, n)
struct rx * rx;
struct rx_nfa_state * n;
#endif
{
if (n->futures_computed)
return n->futures;
else
{
struct eclose_frame frame;
frame.prog_backwards = 0;
if (!eclose_node (rx, n, n, &frame))
return 0;
else
{
n->futures_computed = 1;
return n->futures;
}
}
}
/* {Storing the NFA in a Contiguous Region of Memory}
*/
#ifdef __STDC__
static void
se_memo_freer (struct rx_hash_item * node)
#else
static void
se_memo_freer (node)
struct rx_hash_item * node;
#endif
{
free ((char *)node->data);
}
#ifdef __STDC__
static void
nfa_set_freer (struct rx_hash_item * node)
#else
static void
nfa_set_freer (node)
struct rx_hash_item * node;
#endif
{
free ((char *)node->data);
}
#ifdef __STDC__
void
rx_free_nfa (struct rx *rx)
#else
void
rx_free_nfa (rx)
struct rx *rx;
#endif
{
rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
rx_free_nfa_graph (rx);
}