home *** CD-ROM | disk | FTP | other *** search
- /*
- * egrep -- print lines containing (or not containing) a regular expression
- *
- * status returns:
- * 0 - ok, and some matches
- * 1 - ok, but no matches
- * 2 - some error
- */
- %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST
- %left OR
- %left CHAR DOT CCL NCCL '('
- %left CAT
- %left STAR PLUS QUEST
-
- %{
- #include <stdio.h>
-
- #define MAXLIN 350
- #define MAXPOS 4000
- #define NCHARS 128
- #define NSTATES 128
- #define FINAL -1
- char gotofn[NSTATES][NCHARS];
- int state[NSTATES];
- char out[NSTATES];
- int line 1;
- int name[MAXLIN];
- int left[MAXLIN];
- int right[MAXLIN];
- int parent[MAXLIN];
- int foll[MAXLIN];
- int positions[MAXPOS];
- char chars[MAXLIN];
- int nxtpos;
- int nxtchar 0;
- int tmpstat[MAXLIN];
- int initstat[MAXLIN];
- int xstate;
- int count;
- int icount;
- char *input;
-
- long lnum;
- int bflag;
- int cflag;
- int fflag;
- int lflag;
- int nflag;
- int hflag = 1;
- int sflag;
- int vflag;
- int nfile;
- long blkno;
- long tln;
- int nsucc;
-
- int f;
- int fname;
- %}
-
- %%
- s: t
- ={ unary(FINAL, $1);
- line--;
- }
- ;
- t: b r
- ={ $$ = node(CAT, $1, $2); }
- | OR b r OR
- ={ $$ = node(CAT, $2, $3); }
- | OR b r
- ={ $$ = node(CAT, $2, $3); }
- | b r OR
- ={ $$ = node(CAT, $1, $2); }
- ;
- b:
- ={ $$ = enter(DOT);
- $$ = unary(STAR, $$); }
- ;
- r: CHAR
- ={ $$ = enter($1); }
- | DOT
- ={ $$ = enter(DOT); }
- | CCL
- ={ $$ = cclenter(CCL); }
- | NCCL
- ={ $$ = cclenter(NCCL); }
- ;
-
- r: r OR r
- ={ $$ = node(OR, $1, $3); }
- | r r %prec CAT
- ={ $$ = node(CAT, $1, $2); }
- | r STAR
- ={ $$ = unary(STAR, $1); }
- | r PLUS
- ={ $$ = unary(PLUS, $1); }
- | r QUEST
- ={ $$ = unary(QUEST, $1); }
- | '(' r ')'
- ={ $$ = $2; }
- | error
- ;
-
- %%
- yyerror(s) {
- fprintf(stderr, "egrep: %s\n", s);
- exit(2);
- }
-
- yylex() {
- extern int yylval;
- int cclcnt, x;
- register char c, d;
- switch(c = nextch()) {
- case '$':
- case '^': c = '\n';
- goto defchar;
- case '|': return (OR);
- case '*': return (STAR);
- case '+': return (PLUS);
- case '?': return (QUEST);
- case '(': return (c);
- case ')': return (c);
- case '.': return (DOT);
- case '\0': return (0);
- case '\n': return (OR);
- case '[':
- x = CCL;
- cclcnt = 0;
- count = nxtchar++;
- if ((c = nextch()) == '^') {
- x = NCCL;
- c = nextch();
- }
- do {
- if (c == '\0') synerror();
- if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) {
- if ((d = nextch()) != 0) {
- c = chars[nxtchar-1];
- while (c < d) {
- if (nxtchar >= MAXLIN) overflo();
- chars[nxtchar++] = ++c;
- cclcnt++;
- }
- continue;
- }
- }
- if (nxtchar >= MAXLIN) overflo();
- chars[nxtchar++] = c;
- cclcnt++;
- } while ((c = nextch()) != ']');
- chars[count] = cclcnt;
- return (x);
- case '\\':
- if ((c = nextch()) == '\0') synerror();
- defchar:
- default: yylval = c; return (CHAR);
- }
- }
- nextch() {
- register char c;
- if (fflag) {
- if ((c = getc(stdin)) == EOF) return(0);
- }
- else c = *input++;
- return(c);
- }
-
- synerror() {
- fprintf(stderr, "egrep: syntax error\n");
- exit(2);
- }
-
- enter(x) int x; {
- if(line >= MAXLIN) overflo();
- name[line] = x;
- left[line] = 0;
- right[line] = 0;
- return(line++);
- }
-
- cclenter(x) int x; {
- register linno;
- linno = enter(x);
- right[linno] = count;
- return (linno);
- }
-
- node(x, l, r) {
- if(line >= MAXLIN) overflo();
- name[line] = x;
- left[line] = l;
- right[line] = r;
- parent[l] = line;
- parent[r] = line;
- return(line++);
- }
-
- unary(x, d) {
- if(line >= MAXLIN) overflo();
- name[line] = x;
- left[line] = d;
- right[line] = 0;
- parent[d] = line;
- return(line++);
- }
- overflo() {
- fprintf(stderr, "egrep: regular expression too long\n");
- exit(2);
- }
-
- cfoll(v) {
- register i;
- if (left[v] == 0) {
- count = 0;
- for (i=1; i<=line; i++) tmpstat[i] = 0;
- follow(v);
- add(foll, v);
- }
- else if (right[v] == 0) cfoll(left[v]);
- else {
- cfoll(left[v]);
- cfoll(right[v]);
- }
- }
- cgotofn() {
- register c, i, k;
- int n, s;
- char symbol[NCHARS];
- int j, nc, pc, pos;
- int curpos, num;
- int number, newpos;
- count = 0;
- for (n=3; n<=line; n++) tmpstat[n] = 0;
- if (cstate(line-1)==0) {
- tmpstat[line] = 1;
- count++;
- out[0] = 1;
- }
- for (n=3; n<=line; n++) initstat[n] = tmpstat[n];
- count--; /*leave out position 1 */
- icount = count;
- tmpstat[1] = 0;
- add(state, 0);
- n = 0;
- for (s=0; s<=n; s++) {
- if (out[s] == 1) continue;
- for (i=0; i<NCHARS; i++) symbol[i] = 0;
- num = positions[state[s]];
- count = icount;
- for (i=3; i<=line; i++) tmpstat[i] = initstat[i];
- pos = state[s] + 1;
- for (i=0; i<num; i++) {
- curpos = positions[pos];
- if ((c = name[curpos]) >= 0) {
- if (c < NCHARS) symbol[c] = 1;
- else if (c == DOT) {
- for (k=0; k<NCHARS; k++)
- if (k!='\n') symbol[k] = 1;
- }
- else if (c == CCL) {
- nc = chars[right[curpos]];
- pc = right[curpos] + 1;
- for (k=0; k<nc; k++) symbol[chars[pc++]] = 1;
- }
- else if (c == NCCL) {
- nc = chars[right[curpos]];
- for (j = 0; j < NCHARS; j++) {
- pc = right[curpos] + 1;
- for (k = 0; k < nc; k++)
- if (j==chars[pc++]) goto cont;
- if (j!='\n') symbol[j] = 1;
- cont:;
- }
- }
- else printf("something's funny\n");
- }
- pos++;
- }
- for (c=0; c<NCHARS; c++) {
- if (symbol[c] == 1) { /* nextstate(s,c) */
- count = icount;
- for (i=3; i <= line; i++) tmpstat[i] = initstat[i];
- pos = state[s] + 1;
- for (i=0; i<num; i++) {
- curpos = positions[pos];
- if ((k = name[curpos]) >= 0)
- if (
- (k == c)
- | (k == DOT)
- | (k == CCL && member(c, right[curpos], 1))
- | (k == NCCL && member(c, right[curpos], 0))
- ) {
- number = positions[foll[curpos]];
- newpos = foll[curpos] + 1;
- for (k=0; k<number; k++) {
- if (tmpstat[positions[newpos]] != 1) {
- tmpstat[positions[newpos]] = 1;
- count++;
- }
- newpos++;
- }
- }
- pos++;
- } /* end nextstate */
- if (notin(n)) {
- if (n >= NSTATES) overflo();
- add(state, ++n);
- if (tmpstat[line] == 1) out[n] = 1;
- gotofn[s][c] = n;
- }
- else {
- gotofn[s][c] = xstate;
- }
- }
- }
- }
- }
-
- cstate(v) {
- register b;
- if (left[v] == 0) {
- if (tmpstat[v] != 1) {
- tmpstat[v] = 1;
- count++;
- }
- return(1);
- }
- else if (right[v] == 0) {
- if (cstate(left[v]) == 0) return (0);
- else if (name[v] == PLUS) return (1);
- else return (0);
- }
- else if (name[v] == CAT) {
- if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0);
- else return (1);
- }
- else { /* name[v] == OR */
- b = cstate(right[v]);
- if (cstate(left[v]) == 0 || b == 0) return (0);
- else return (1);
- }
- }
-
-
- member(symb, set, torf) {
- register i, num, pos;
- num = chars[set];
- pos = set + 1;
- for (i=0; i<num; i++)
- if (symb == chars[pos++]) return (torf);
- return (!torf);
- }
-
- notin(n) {
- register i, j, pos;
- for (i=0; i<=n; i++) {
- if (positions[state[i]] == count) {
- pos = state[i] + 1;
- for (j=0; j < count; j++)
- if (tmpstat[positions[pos++]] != 1) goto nxt;
- xstate = i;
- return (0);
- }
- nxt: ;
- }
- return (1);
- }
-
- add(array, n) int *array; {
- register i;
- if (nxtpos + count > MAXPOS) overflo();
- array[n] = nxtpos;
- positions[nxtpos++] = count;
- for (i=3; i <= line; i++) {
- if (tmpstat[i] == 1) {
- positions[nxtpos++] = i;
- }
- }
- }
-
- follow(v) int v; {
- int p;
- if (v == line) return;
- p = parent[v];
- switch(name[p]) {
- case STAR:
- case PLUS: cstate(v);
- follow(p);
- return;
-
- case OR:
- case QUEST: follow(p);
- return;
-
- case CAT: if (v == left[p]) {
- if (cstate(right[p]) == 0) {
- follow(p);
- return;
- }
- }
- else follow(p);
- return;
- case FINAL: if (tmpstat[line] != 1) {
- tmpstat[line] = 1;
- count++;
- }
- return;
- }
- }
-
-
- main(argc, argv)
- char **argv;
- {
- while (--argc > 0 && (++argv)[0][0]=='-')
- switch (argv[0][1]) {
-
- case 's':
- sflag++;
- continue;
-
- case 'h':
- hflag = 0;
- continue;
-
- case 'b':
- bflag++;
- continue;
-
- case 'c':
- cflag++;
- continue;
-
- case 'e':
- argc--;
- argv++;
- goto out;
-
- case 'f':
- fflag++;
- continue;
-
- case 'l':
- lflag++;
- continue;
-
- case 'n':
- nflag++;
- continue;
-
- case 'v':
- vflag++;
- continue;
-
- default:
- fprintf(stderr, "egrep: unknown flag\n");
- continue;
- }
- out:
- if (argc<=0)
- exit(2);
- if (fflag) {
- if (freopen(fname = *argv, "r", stdin) == NULL) {
- fprintf(stderr, "egrep: can't open %s\n", fname);
- exit(2);
- }
- }
- else input = *argv;
- argc--;
- argv++;
-
- yyparse();
-
- cfoll(line-1);
- cgotofn();
- nfile = argc;
- if (argc<=0) {
- if (lflag) exit(1);
- execute(0);
- }
- else while (--argc >= 0) {
- execute(*argv);
- argv++;
- }
- exit(nsucc == 0);
- }
-
- execute(file)
- char *file;
- {
- register char *p;
- register cstat;
- register ccount;
- char buf[1024];
- char *nlp;
- int istat;
- if (file) {
- if ((f = open(file, 0)) < 0) {
- fprintf(stderr, "egrep: can't open %s\n", file);
- exit(2);
- }
- }
- else f = 0;
- ccount = 0;
- lnum = 1;
- tln = 0;
- p = buf;
- nlp = p;
- if ((ccount = read(f,p,512))<=0) goto done;
- blkno = ccount;
- istat = cstat = gotofn[0]['\n'];
- if (out[cstat]) goto found;
- for (;;) {
- cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */
- if (out[cstat]) {
- found: for(;;) {
- if (*p++ == '\n') {
- if (vflag == 0) {
- succeed: nsucc = 1;
- if (cflag) tln++;
- else if (sflag)
- ; /* ugh */
- else if (lflag) {
- printf("%s\n", file);
- close(f);
- return;
- }
- else {
- if (nfile > 1 && hflag) printf("%s:", file);
- if (bflag) printf("%ld:", (blkno-ccount-1)/512);
- if (nflag) printf("%ld:", lnum);
- if (p <= nlp) {
- while (nlp < &buf[1024]) putchar(*nlp++);
- nlp = buf;
- }
- while (nlp < p) putchar(*nlp++);
- }
- }
- lnum++;
- nlp = p;
- if ((out[(cstat=istat)]) == 0) goto brk2;
- }
- cfound:
- if (--ccount <= 0) {
- if (p <= &buf[512]) {
- if ((ccount = read(f, p, 512)) <= 0) goto done;
- }
- else if (p == &buf[1024]) {
- p = buf;
- if ((ccount = read(f, p, 512)) <= 0) goto done;
- }
- else {
- if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done;
- }
- if(nlp>p && nlp<=p+ccount)
- nlp = p+ccount;
- blkno += ccount;
- }
- }
- }
- if (*p++ == '\n') {
- if (vflag) goto succeed;
- else {
- lnum++;
- nlp = p;
- if (out[(cstat=istat)]) goto cfound;
- }
- }
- brk2:
- if (--ccount <= 0) {
- if (p <= &buf[512]) {
- if ((ccount = read(f, p, 512)) <= 0) break;
- }
- else if (p == &buf[1024]) {
- p = buf;
- if ((ccount = read(f, p, 512)) <= 0) break;
- }
- else {
- if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
- }
- if(nlp>p && nlp<=p+ccount)
- nlp = p+ccount;
- blkno += ccount;
- }
- }
- done: close(f);
- if (cflag) {
- if (nfile > 1)
- printf("%s:", file);
- printf("%ld\n", tln);
- }
- }
-