home *** CD-ROM | disk | FTP | other *** search
-
- /********************************************
- rexp2.c
- copyright 1991, Michael D. Brennan
-
- This is a source file for mawk, an implementation of
- the AWK programming language.
-
- Mawk is distributed without warranty under the terms of
- the GNU General Public License, version 2, 1991.
- ********************************************/
-
- /*$Log: rexp2.c,v $
- * Revision 1.3 1993/07/24 17:55:12 mike
- * more cleanup
- *
- * Revision 1.2 1993/07/23 13:21:44 mike
- * cleanup rexp code
- *
- * Revision 1.1.1.1 1993/07/03 18:58:28 mike
- * move source to cvs
- *
- * Revision 3.8 1992/12/24 00:36:44 mike
- * fixed major bozo for LMDOS when growing stack
- * fixed potential LMDOS bozo with M_STR+U_ON+END_ON
- * fixed minor bug in M_CLASS+U_ON+END_ON
- *
- * Revision 3.7 92/01/21 17:33:15 brennan
- * added some casts so that character classes work with signed chars
- *
- * Revision 3.6 91/10/29 10:54:03 brennan
- * SIZE_T
- *
- * Revision 3.5 91/08/13 09:10:15 brennan
- * VERSION .9994
- *
- * Revision 3.4 91/08/08 07:53:34 brennan
- * work around for turboC realloc() bug
- *
- * Revision 3.4 91/08/07 07:10:47 brennan
- * work around for TurboC realloc() bug
- *
- * Revision 3.3 91/08/04 15:45:57 brennan
- * minor change for large model dos
- *
- * Revision 3.2 91/06/10 16:18:14 brennan
- * changes for V7
- *
- * Revision 3.1 91/06/07 10:33:25 brennan
- * VERSION 0.995
- *
- * Revision 1.8 91/06/05 09:01:33 brennan
- * changes to RE_new_run_stack
- *
- * Revision 1.7 91/05/31 10:56:02 brennan
- * stack_empty hack for DOS large model
- *
- */
-
-
-
- /* test a string against a machine */
-
- #include "rexp.h"
-
- #define STACKGROWTH 16
-
- #ifdef DEBUG
- static RT_STATE *PROTO(slow_push, (RT_STATE *, STATE *, char *, int)) ;
- #endif
-
-
- RT_STATE *RE_run_stack_base ;
- RT_STATE *RE_run_stack_limit ;
-
- /* Large model DOS segment arithemetic breaks the current stack.
- This hack fixes it without rewriting the whole thing, 5/31/91 */
- RT_STATE *RE_run_stack_empty ;
-
- void
- RE_run_stack_init()
- {
- if (!RE_run_stack_base)
- {
- RE_run_stack_base = (RT_STATE *)
- RE_malloc(sizeof(RT_STATE) * STACKGROWTH) ;
- RE_run_stack_limit = RE_run_stack_base + STACKGROWTH ;
- RE_run_stack_empty = RE_run_stack_base - 1 ;
- }
- }
-
- /* sometimes during REmatch(), this stack can grow pretty large.
- In real life cases, the back tracking usually fails. Some
- work is needed here to improve the algorithm.
- I.e., figure out how not to stack useless paths.
- */
-
- RT_STATE *
- RE_new_run_stack()
- {
- int oldsize = RE_run_stack_limit - RE_run_stack_base ;
- int newsize = oldsize + STACKGROWTH ;
-
- #ifdef LMDOS /* large model DOS */
- /* have to worry about overflow on multiplication (ugh) */
- if (newsize >= 4096) RE_run_stack_base = (RT_STATE *) 0 ;
- else
- #endif
-
- RE_run_stack_base = (RT_STATE *) realloc(RE_run_stack_base,
- newsize * sizeof(RT_STATE)) ;
-
- if (!RE_run_stack_base)
- {
- fprintf(stderr, "out of memory for RE run time stack\n") ;
- /* this is pretty unusual, I've only seen it happen on
- weird input to REmatch() under 16bit DOS , the same
- situation worked easily on 32bit machine. */
- exit(100) ;
- }
-
- RE_run_stack_limit = RE_run_stack_base + newsize ;
- RE_run_stack_empty = RE_run_stack_base - 1 ;
-
- /* return the new stackp */
- return RE_run_stack_base + oldsize ;
- }
-
- #ifdef DEBUG
- static RT_STATE *
- slow_push(sp, m, s, u)
- RT_STATE *sp ;
- STATE *m ;
- char *s ;
- int u ;
- {
- if (sp == RE_run_stack_limit) sp = RE_new_run_stack() ;
- sp->m = m ; sp->s = s ; sp->u = u ;
- return sp ;
- }
- #endif
-
- #ifdef DEBUG
- #define push(mx,sx,ux) stackp = slow_push(++stackp, mx, sx, ux)
- #else
- #define push(mx,sx,ux) if (++stackp == RE_run_stack_limit)\
- stackp = RE_new_run_stack() ;\
- stackp->m=(mx);stackp->s=(sx);stackp->u=(ux)
- #endif
-
-
- #define CASE_UANY(x) case x + U_OFF : case x + U_ON
-
- /* test if str ~ /machine/
- */
-
- int
- REtest(str, machine)
- char *str ;
- PTR machine ;
- {
- register STATE *m = (STATE *) machine ;
- register char *s = str ;
- register RT_STATE *stackp ;
- int u_flag ;
- char *str_end ;
- int t ; /*convenient temps */
- STATE *tm ;
-
- /* handle the easy case quickly */
- if ((m + 1)->type == M_ACCEPT && m->type == M_STR)
- return str_str(s, m->data.str, m->len) != (char *) 0 ;
- else
- {
- u_flag = U_ON ; str_end = (char *) 0 ;
- stackp = RE_run_stack_empty ;
- goto reswitch ;
- }
-
- refill :
- if (stackp == RE_run_stack_empty) return 0 ;
- m = stackp->m ;
- s = stackp->s ;
- u_flag = stackp--->u ;
-
-
- reswitch :
-
- switch (m->type + u_flag)
- {
- case M_STR + U_OFF + END_OFF:
- if (strncmp(s, m->data.str, m->len)) goto refill ;
- s += m->len ; m++ ;
- goto reswitch ;
-
- case M_STR + U_OFF + END_ON:
- if (strcmp(s, m->data.str)) goto refill ;
- s += m->len ; m++ ;
- goto reswitch ;
-
- case M_STR + U_ON + END_OFF:
- if (!(s = str_str(s, m->data.str, m->len))) goto refill ;
- push(m, s + 1, U_ON) ;
- s += m->len ; m++ ; u_flag = U_OFF ;
- goto reswitch ;
-
- case M_STR + U_ON + END_ON:
- if (!str_end) str_end = s + strlen(s) ;
- t = (str_end - s) - m->len ;
- if (t < 0 || memcmp(s + t, m->data.str, m->len))
- goto refill ;
- s = str_end ; m++ ; u_flag = U_OFF ;
- goto reswitch ;
-
- case M_CLASS + U_OFF + END_OFF:
- if (!ison(*m->data.bvp, s[0])) goto refill ;
- s++ ; m++ ;
- goto reswitch ;
-
- case M_CLASS + U_OFF + END_ON:
- if (s[1] || !ison(*m->data.bvp, s[0])) goto refill ;
- s++ ; m++ ;
- goto reswitch ;
-
- case M_CLASS + U_ON + END_OFF:
- while (!ison(*m->data.bvp, s[0]))
- {
- if (s[0] == 0) goto refill ;
- else s++ ;
- }
- s++ ;
- push(m, s, U_ON) ;
- m++ ; u_flag = U_OFF ;
- goto reswitch ;
-
- case M_CLASS + U_ON + END_ON:
- if (!str_end) str_end = s + strlen(s) ;
- if (s[0] == 0 || !ison(*m->data.bvp, str_end[-1]))
- goto refill ;
- s = str_end ; m++ ; u_flag = U_OFF ;
- goto reswitch ;
-
- case M_ANY + U_OFF + END_OFF:
- if (s[0] == 0) goto refill ;
- s++ ; m++ ;
- goto reswitch ;
-
- case M_ANY + U_OFF + END_ON:
- if (s[0] == 0 || s[1] != 0) goto refill ;
- s++ ; m++ ;
- goto reswitch ;
-
- case M_ANY + U_ON + END_OFF:
- if (s[0] == 0) goto refill ;
- s++ ;
- push(m, s, U_ON) ;
- m++ ; u_flag = U_OFF ;
- goto reswitch ;
-
- case M_ANY + U_ON + END_ON:
- if (s[0] == 0) goto refill ;
- if (!str_end) str_end = s + strlen(s) ;
- s = str_end ; m++ ; u_flag = U_OFF ;
- goto reswitch ;
-
- case M_START + U_OFF + END_OFF:
- case M_START + U_ON + END_OFF:
- if (s != str) goto refill ;
- m++ ; u_flag = U_OFF ;
- goto reswitch ;
-
- case M_START + U_OFF + END_ON:
- case M_START + U_ON + END_ON:
- if (s != str || s[0] != 0) goto refill ;
- m++ ; u_flag = U_OFF ;
- goto reswitch ;
-
- case M_END + U_OFF:
- if (s[0] != 0) goto refill ;
- m++ ; goto reswitch ;
-
- case M_END + U_ON:
- s += strlen(s) ;
- m++ ; u_flag = U_OFF ;
- goto reswitch ;
-
- CASE_UANY(M_U):
- u_flag = U_ON ; m++ ;
- goto reswitch ;
-
- CASE_UANY(M_1J):
- m += m->data.jump ;
- goto reswitch ;
-
- CASE_UANY(M_2JA): /* take the non jump branch */
- /* don't stack an ACCEPT */
- if ((tm = m + m->data.jump)->type == M_ACCEPT) return 1 ;
- push(tm, s, u_flag) ;
- m++ ;
- goto reswitch ;
-
- CASE_UANY(M_2JB): /* take the jump branch */
- /* don't stack an ACCEPT */
- if ((tm = m + 1)->type == M_ACCEPT) return 1 ;
- push(tm, s, u_flag) ;
- m += m->data.jump ;
- goto reswitch ;
-
- CASE_UANY(M_ACCEPT):
- return 1 ;
-
- default:
- RE_panic("unexpected case in REtest") ;
- }
- }
-
-
-
- #ifdef MAWK
-
- char *
- is_string_split(p, lenp)
- register STATE *p ;
- unsigned *lenp ;
- {
- if (p[0].type == M_STR && p[1].type == M_ACCEPT)
- {
- *lenp = p->len ;
- return p->data.str ;
- }
- else return (char *) 0 ;
- }
- #else /* mawk provides its own str_str */
-
- char *
- str_str(target, key, klen)
- register char *target ;
- register char *key ;
- unsigned klen ;
- {
- int c = key[0] ;
-
- switch (klen)
- {
- case 0:
- return (char *) 0 ;
-
- case 1:
- return strchr(target, c) ;
-
- case 2:
- {
- int c1 = key[1] ;
-
- while (target = strchr(target, c))
- {
- if (target[1] == c1) return target ;
- else target++ ;
- }
- break ;
- }
-
- default:
- klen-- ; key++ ;
- while (target = strchr(target, c))
- {
- if (memcmp(target + 1, key, klen) == 0) return target ;
- else target++ ;
- }
- break ;
- }
- return (char *) 0 ;
- }
-
-
- #endif /* MAWK */
-