home *** CD-ROM | disk | FTP | other *** search
- #include "awk.def"
- #include "stdio.h"
- #include "awk.h"
-
- extern node *op2();
- extern struct fa *cgotofn();
- #define MAXLIN 256
- #define NCHARS 128
- #define NSTATES 256
-
- #define type(v) v->nobj
- #define left(v) v->narg[0]
- #define right(v) v->narg[1]
- #define parent(v) v->nnext
-
- #define LEAF case CCL: case NCCL: case CHAR: case DOT:
- #define UNARY case FINAL: case STAR: case PLUS: case QUEST:
-
- /* encoding in tree nodes:
- leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
- unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
- binary (CAT, OR): left and right are children
- parent contains pointer to parent
- */
-
- struct fa {
- int cch;
- struct fa *st;
- };
-
- int *state[NSTATES];
- int *foll[MAXLIN];
- char chars[MAXLIN];
- int setvec[MAXLIN];
- node *point[MAXLIN];
-
- int setcnt;
- int line;
-
-
- struct fa *makedfa(p) /* returns dfa for tree pointed to by p */
- node *p;
- {
- node *p1;
- struct fa *fap;
- p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
- /* put DOT STAR in front of reg. exp. */
- p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */
-
- line = 0;
- penter(p1); /* enter parent pointers and leaf indices */
- point[line] = p1; /* FINAL node */
- setvec[0] = 1; /* for initial DOT STAR */
- cfoll(p1); /* set up follow sets */
- fap = cgotofn();
- freetr(p1); /* add this when alloc works */
- return(fap);
- }
-
- penter(p) /* set up parent pointers and leaf indices */
- node *p;
- {
- switch(type(p)) {
- LEAF
- left(p) = (node *) line;
- point[line++] = p;
- break;
- UNARY
- penter(left(p));
- parent(left(p)) = p;
- break;
- case CAT:
- case OR:
- penter(left(p));
- penter(right(p));
- parent(left(p)) = p;
- parent(right(p)) = p;
- break;
- default:
- error(FATAL, "unknown type %d in penter\n", type(p));
- break;
- }
- }
-
- freetr(p) /* free parse tree and follow sets */
- node *p;
- {
- switch(type(p)) {
- LEAF
- xfree(foll[(int) left(p)]);
- xfree(p);
- break;
- UNARY
- freetr(left(p));
- xfree(p);
- break;
- case CAT:
- case OR:
- freetr(left(p));
- freetr(right(p));
- xfree(p);
- break;
- default:
- error(FATAL, "unknown type %d in freetr", type(p));
- break;
- }
- }
- char *cclenter(p)
- register char *p;
- {
- register i, c;
- char *op;
-
- op = p;
- i = 0;
- while ((c = *p++) != 0) {
- if (c == '-' && i > 0 && chars[i-1] != 0) {
- if (*p != 0) {
- c = chars[i-1];
- while (c < *p) {
- if (i >= MAXLIN)
- overflo();
- chars[i++] = ++c;
- }
- p++;
- continue;
- }
- }
- if (i >= MAXLIN)
- overflo();
- chars[i++] = c;
- }
- chars[i++] = '\0';
- dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
- xfree(op);
- return(tostring(chars));
- }
-
- overflo()
- {
- error(FATAL, "regular expression too long\n");
- }
-
- cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */
- register node *v;
- {
- register i;
- int prev;
- int *add();
-
- switch(type(v)) {
- LEAF
- setcnt = 0;
- for (i=1; i<=line; i++)
- setvec[i] = 0;
- follow(v);
- if (notin(foll, ( (int) left(v))-1, &prev)) {
- foll[(int) left(v)] = add(setcnt);
- }
- else
- foll[ (int) left(v)] = foll[prev];
- break;
- UNARY
- cfoll(left(v));
- break;
- case CAT:
- case OR:
- cfoll(left(v));
- cfoll(right(v));
- break;
- default:
- error(FATAL, "unknown type %d in cfoll", type(v));
- }
- }
-
- first(p) /* collects initially active leaves of p into setvec */
- register node *p; /* returns 0 or 1 depending on whether p matches empty string */
- {
- register b;
-
- switch(type(p)) {
- LEAF
- if (setvec[(int) left(p)] != 1) {
- setvec[(int) left(p)] = 1;
- setcnt++;
- }
- if (type(p) == CCL && (*(char *) right(p)) == '\0')
- return(0); /* empty CCL */
- else return(1);
- case FINAL:
- case PLUS:
- if (first(left(p)) == 0) return(0);
- return(1);
- case STAR:
- case QUEST:
- first(left(p));
- return(0);
- case CAT:
- if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
- return(1);
- case OR:
- b = first(right(p));
- if (first(left(p)) == 0 || b == 0) return(0);
- return(1);
- }
- error(FATAL, "unknown type %d in first\n", type(p));
- return(-1);
- }
-
- follow(v)
- node *v; /* collects leaves that can follow v into setvec */
- {
- node *p;
-
- if (type(v) == FINAL)
- return;
- p = parent(v);
- switch (type(p)) {
- case STAR:
- case PLUS: first(v);
- follow(p);
- return;
-
- case OR:
- case QUEST: follow(p);
- return;
-
- case CAT: if (v == left(p)) { /* v is left child of p */
- if (first(right(p)) == 0) {
- follow(p);
- return;
- }
- }
- else /* v is right child */
- follow(p);
- return;
- case FINAL: if (setvec[line] != 1) {
- setvec[line] = 1;
- setcnt++;
- }
- return;
- }
- }
-
- member(c, s) /* is c in s? */
- register char c, *s;
- {
- while (*s)
- if (c == *s++)
- return(1);
- return(0);
- }
-
- notin(array, n, prev) /* is setvec in array[0] thru array[n]? */
- int **array;
- int *prev; {
- register i, j;
- int *ptr;
- for (i=0; i<=n; i++) {
- ptr = array[i];
- if (*ptr == setcnt) {
- for (j=0; j < setcnt; j++)
- if (setvec[*(++ptr)] != 1) goto nxt;
- *prev = i;
- return(0);
- }
- nxt: ;
- }
- return(1);
- }
-
- int *add(n) { /* remember setvec */
- int *ptr, *p;
- register i;
- if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
- overflo();
- *ptr = n;
- dprintf("add(%d)\n", n, NULL, NULL);
- for (i=1; i <= line; i++)
- if (setvec[i] == 1) {
- *(++ptr) = i;
- dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
- }
- dprintf("\n", NULL, NULL, NULL);
- return(p);
- }
-
- struct fa *cgotofn()
- {
- register i, k;
- register int *ptr;
- char c;
- char *p;
- node *cp;
- int j, n, s, ind, numtrans;
- int finflg;
- int curpos, num, prev;
- struct fa *where[NSTATES];
-
- int fatab[257];
- struct fa *pfa;
-
- char index[MAXLIN];
- char iposns[MAXLIN];
- int sposns[MAXLIN];
- int spmax, spinit;
-
- char symbol[NCHARS];
- char isyms[NCHARS];
- char ssyms[NCHARS];
- int ssmax, ssinit;
-
- for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
- for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0;
- setcnt = 0;
- /* compute initial positions and symbols of state 0 */
- ssmax = 0;
- ptr = state[0] = foll[0];
- spinit = *ptr;
- for (i=0; i<spinit; i++) {
- curpos = *(++ptr);
- sposns[i] = curpos;
- iposns[curpos] = 1;
- cp = point[curpos];
- dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
- switch (type(cp)) {
- case CHAR:
- k = (int) right(cp);
- if (isyms[k] != 1) {
- isyms[k] = 1;
- ssyms[ssmax++] = k;
- }
- break;
- case DOT:
- for (k=1; k<NCHARS; k++) {
- if (k != HAT) {
- if (isyms[k] != 1) {
- isyms[k] = 1;
- ssyms[ssmax++] = k;
- }
- }
- }
- break;
- case CCL:
- for (p = (char *) right(cp); *p; p++) {
- if (*p != HAT) {
- if (isyms[*p] != 1) {
- isyms[*p] = 1;
- ssyms[ssmax++] = *p;
- }
- }
- }
- break;
- case NCCL:
- for (k=1; k<NCHARS; k++) {
- if (k != HAT && !member(k, (char *) right(cp))) {
- if (isyms[k] != 1) {
- isyms[k] = 1;
- ssyms[ssmax++] = k;
- }
- }
- }
- }
- }
- ssinit = ssmax;
- n = 0;
- for (s=0; s<=n; s++) {
- dprintf("s = %d\n", s, NULL, NULL);
- ind = 0;
- numtrans = 0;
- finflg = 0;
- if (*(state[s] + *state[s]) == line) { /* s final? */
- finflg = 1;
- goto tenter;
- }
- spmax = spinit;
- ssmax = ssinit;
- ptr = state[s];
- num = *ptr;
- for (i=0; i<num; i++) {
- curpos = *(++ptr);
- if (iposns[curpos] != 1 && index[curpos] != 1) {
- index[curpos] = 1;
- sposns[spmax++] = curpos;
- }
- cp = point[curpos];
- switch (type(cp)) {
- case CHAR:
- k = (int) right(cp);
- if (isyms[k] == 0 && symbol[k] == 0) {
- symbol[k] = 1;
- ssyms[ssmax++] = k;
- }
- break;
- case DOT:
- for (k=1; k<NCHARS; k++) {
- if (k != HAT) {
- if (isyms[k] == 0 && symbol[k] == 0) {
- symbol[k] = 1;
- ssyms[ssmax++] = k;
- }
- }
- }
- break;
- case CCL:
- for (p = (char *) right(cp); *p; p++) {
- if (*p != HAT) {
- if (isyms[*p] == 0 && symbol[*p] == 0) {
- symbol[*p] = 1;
- ssyms[ssmax++] = *p;
- }
- }
- }
- break;
- case NCCL:
- for (k=1; k<NCHARS; k++) {
- if (k != HAT && !member(k, (char *) right(cp))) {
- if (isyms[k] == 0 && symbol[k] == 0) {
- symbol[k] = 1;
- ssyms[ssmax++] = k;
- }
- }
- }
- }
- }
- for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */
- c = ssyms[j];
- symbol[c] = 0;
- setcnt = 0;
- for (k=0; k<=line; k++) setvec[k] = 0;
- for (i=0; i<spmax; i++) {
- index[sposns[i]] = 0;
- cp = point[sposns[i]];
- if ((k = type(cp)) != FINAL)
- if (k == CHAR && c == (int) right(cp)
- || k == DOT
- || k == CCL && member(c, (char *) right(cp))
- || k == NCCL && !member(c, (char *) right(cp))) {
- ptr = foll[sposns[i]];
- num = *ptr;
- for (k=0; k<num; k++) {
- if (setvec[*(++ptr)] != 1
- && iposns[*ptr] != 1) {
- setvec[*ptr] = 1;
- setcnt++;
- }
- }
- }
- } /* end nextstate */
- if (notin(state, n, &prev)) {
- if (n >= NSTATES) {
- dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
- overflo();
- }
- state[++n] = add(setcnt);
- dprintf(" delta(%d,%o) = %d", s,c,n);
- dprintf(", ind = %d\n", ind+1, NULL, NULL);
- fatab[++ind] = c;
- fatab[++ind] = n;
- numtrans++;
- }
- else {
- if (prev != 0) {
- dprintf(" delta(%d,%o) = %d", s,c,prev);
- dprintf(", ind = %d\n", ind+1, NULL, NULL);
- fatab[++ind] = c;
- fatab[++ind] = prev;
- numtrans++;
- }
- }
- }
- tenter:
- if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
- overflo();
- where[s] = pfa;
- if (finflg)
- pfa->cch = -1; /* s is a final state */
- else
- pfa->cch = numtrans;
- pfa->st = 0;
- for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
- pfa->cch = fatab[2*i-1];
- pfa->st = (struct fa *) fatab[2*i];
- }
- }
- for (i=0; i<=n; i++) {
- xfree(state[i]); /* free state[i] */
- pfa = where[i];
- pfa->st = where[0];
- dprintf("state %d: (%o)\n", i, pfa, NULL);
- dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL);
- for (k=1; k<=pfa->cch; k++) {
- (pfa+k)->st = where[ (int) (pfa+k)->st];
- dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
- }
- }
- pfa = where[0];
- if ((num = pfa->cch) < 0)
- return(where[0]);
- for (pfa += num; num; num--, pfa--)
- if (pfa->cch == HAT) {
- return(pfa->st);
- }
- return(where[0]);
- }
-
- match(pfa, p)
- register struct fa *pfa;
- register char *p;
- {
- register count;
- char c;
- if (p == 0) return(0);
- if (pfa->cch == 1) { /* fast test for first character, if possible */
- c = (++pfa)->cch;
- do
- if (c == *p) {
- p++;
- pfa = pfa->st;
- goto adv;
- }
- while (*p++ != 0);
- return(0);
- }
- adv: if ((count = pfa->cch) < 0) return(1);
- do {
- for (pfa += count; count; count--, pfa--)
- if (pfa->cch == *p) {
- break;
- }
- pfa = pfa->st;
- if ((count = pfa->cch) < 0) return(1);
- } while(*p++ != 0);
- return(0);
- }
-