home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Columbia Kermit
/
kermit.zip
/
archives
/
convergent.tar.gz
/
convergent.tar
/
ctwart.c
< prev
next >
Wrap
C/C++ Source or Header
|
1993-01-20
|
12KB
|
628 lines
/* W A R T
*
* pre-process a lex-like file into a C program.
*
* Jeff Damens, Columbia University Center for Computing Activites, 11/84.
* (Reorganized by Frank da Cruz into a single source module for ease
* of distribution).
* Copyright (C) 1984, Trustees of Columbia University.
* May be copied and used except for explicitly commercial purposes.
*
* input format is:
* lines to be copied | %state <state names...>
* %%
* <state> | <state,state,...> CHAR { actions }
* ...
* %%
*/
/* modified for CTOS C2.0 by Joel Dunn, UNC-CH, October 1986 */
#include <stdio.h>
#include <ctype.h>
/* token types */
#define SEP 1
#define LBRACK 2
#define RBRACK 3
#define WORD 4
#define COMMA 5
/* storage sizes */
#define MAXSTATES 50 /* max number of states */
#define MAXWORD 50 /* max # of chars/word */
#define SBYTES ((MAXSTATES+7)/8) /* # of bytes for state bitmask */
/* name of wart function in generated program */
#ifndef FNAME
#define FNAME "wart"
#endif
/* data structure for state information */
#ifdef PROVX1
typedef unsigned short CHAR;
#else
typedef unsigned char CHAR;
#endif
struct trans { CHAR states[SBYTES]; /* included states */
int anyst; /* true if this good from any state */
CHAR inchr; /* input character */
int actno; /* associated action */
struct trans *nxt; }; /* next transition */
typedef struct trans *Trans;
/* Variables and tables */
int lines,nstates,nacts;
char tokval[MAXWORD];
int tbl[MAXSTATES*128];
char *txt1 = "\n\
#define BEGIN state =\n\
\n\
int state = 0;\n\
\n";
char *fname = FNAME; /* function name goes here */
/* rest of program... */
char *txt2 = "()\n\
{\n\
int c,actno;\n\
extern int tbl[];\n\
while (1) {\n\
c = input();\n\
if ((actno = tbl[c + state*128]) != -1)\n\
switch(actno) {\n";
/* this program's output goes here, followed by final text... */
char *txt3 = "\n }\n }\n\}\n\n";
/*
* turn on the bit associated with the given state
*
*/
setstate(state,t)
int state;
Trans t;
{
int idx,msk;
idx = state/8; /* byte associated with state */
msk = 0x80 >> (state % 8); /* bit mask for state */
t->states[idx] |= msk;
}
/*
* see if the state is involved in the transition
*
*/
teststate(state,t)
int state;
Trans t;
{
int idx,msk;
idx = state/8;
msk = 0x80 >> (state % 8);
return(t->states[idx] & msk);
}
/*
* read input from here...
*
*/
Trans
rdinput(infp,outfp)
FILE *infp,*outfp;
{
Trans x,rdrules();
lines = 1; /* line counter */
nstates = 0; /* no states */
nacts = 0; /* no actions yet */
fprintf(outfp,"\n\
/* WARNING -- This C source program generated by Wart preprocessor. */\n");
fprintf(outfp,"\
/* Do not edit this file; edit the Wart-format source file instead, */\n");
fprintf(outfp,"\
/* and then run it through Wart to produce a new C source file. */\n\n");
initial(infp,outfp); /* read state names, initial defs */
prolog(outfp); /* write out our initial code */
x = rdrules(infp,outfp); /* read rules */
epilogue(outfp); /* write out epilogue code */
return(x);
}
/*
* initial - read initial definitions and state names. Returns
* on EOF or %%.
*
*/
initial(infp,outfp)
FILE *infp,*outfp;
{
int c;
char wordbuf[MAXWORD];
while ((c = getc(infp)) != EOF) {
if (c == '%') {
rdword(infp,wordbuf);
if (strcmp(wordbuf,"states") == 0)
rdstates(infp,outfp);
else if (strcmp(wordbuf,"%") == 0) return;
else fprintf(outfp,"%%%s",wordbuf);
}
else putc(c,outfp);
if (c == '\n') lines++;
}
}
/*
* boolean function to tell if the given character can be part of
* a word.
*
*/
isin(s,c) char *s; int c; {
for (; *s != '\0'; s++)
if (*s == c) return(1);
return(0);
}
isword(c)
int c;
{
static char special[] = ".%_-$@"; /* these are allowable */
return(isalnum(c) || isin(special,c));
}
/*
* read the next word into the given buffer.
*
*/
rdword(fp,buf)
FILE *fp;
char *buf;
{
int len = 0,c;
while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = c;
*buf++ = '\0'; /* tie off word */
ungetc(c,fp); /* put break char back */
}
/*
* read state names, up to a newline.
*
*/
rdstates(fp,ofp)
FILE *fp,*ofp;
{
int c;
char wordbuf[MAXWORD];
while ((c = getc(fp)) != EOF && c != '\n')
{
if (isspace(c)) continue; /* skip whitespace */
ungetc(c,fp); /* put char back */
rdword(fp,wordbuf); /* read the whole word */
enter(wordbuf,++nstates); /* put into symbol tbl */
fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
}
lines++;
}
/*
* allocate a new, empty transition node
*
*/
Trans
newtrans()
{
Trans new;
int i;
new = (Trans) malloc(sizeof (struct trans));
for (i=0; i<SBYTES; i++) new->states[i] = 0;
new->anyst = 0;
new->nxt = NULL;
return(new);
}
/*
* read all the rules.
*
*/
Trans
rdrules(fp,out)
FILE *fp,*out;
{
Trans head,cur,prev;
int curtok,i;
head = cur = NULL;
while ((curtok = gettoken(fp)) != SEP)
switch(curtok) {
case LBRACK: if (cur == NULL) cur = newtrans();
else fatal("duplicate state list");
statelist(fp,cur);/* set states */
continue; /* prepare to read char */
case WORD: if (strlen(tokval) != 1)
fatal("multiple chars in state");
if (cur == NULL) {
cur = newtrans();
cur->anyst = 1;
}
cur->actno = ++nacts;
cur->inchr = tokval[0];
if (head == NULL) head = cur;
else prev->nxt = cur;
prev = cur;
cur = NULL;
copyact(fp,out,nacts);
break;
default: fatal("bad input format");
}
return(head);
}
/*
* read a list of (comma-separated) states, set them in the
* given transition.
*
*/
statelist(fp,t)
FILE *fp;
Trans t;
{
int curtok,sval;
curtok = COMMA;
while (curtok != RBRACK) {
if (curtok != COMMA) fatal("missing comma");
if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
if ((sval = lkup(tokval)) == -1) {
fprintf(stderr,"state %s undefined\n",tokval);
fatal("undefined state");
}
setstate(sval,t);
curtok = gettoken(fp);
}
}
/*
* copy an action from the input to the output file
*
*/
copyact(inp,outp,actno)
FILE *inp,*outp;
int actno;
{
int c,bcnt;
fprintf(outp,"case %d:\n",actno);
while ((c = getc(inp)) != '\n' && isspace(c)); /* skip whitespace */
if (c == '{') {
bcnt = 1;
putc(c,outp);
while (bcnt > 0 && (c = getc(inp)) != EOF) {
if (c == '{') bcnt++;
else if (c == '}') bcnt--;
else if (c == '\n') lines++;
putc(c,outp);
}
if (bcnt > 0) fatal("action doesn't end");
}
else {
while (c != '\n' && c != EOF) {
putc(c,outp);
c = getc(inp);
}
lines++;
}
fprintf(outp,"\nbreak;\n");
}
/*
* find the action associated with a given character and state.
* returns -1 if one can't be found.
*
*/
faction(hd,state,chr)
Trans hd;
int state,chr;
{
while (hd != NULL) {
if (hd->anyst || teststate(state,hd))
if (hd->inchr == '.' || hd->inchr == chr) return(hd->actno);
hd = hd->nxt;
}
return(-1);
}
/*
* empty the table...
*
*/
emptytbl()
{
int i;
for (i=0; i<nstates*128; i++) tbl[i] = -1;
}
/*
* add the specified action to the output for the given state and chr.
*
*/
addaction(act,state,chr)
int act,state,chr;
{
tbl[state*128 + chr] = act;
}
writetbl(fp)
FILE *fp;
{
warray(fp,"tbl",tbl,128*(nstates+1));
}
/*
* write an array to the output file, given its name and size.
*
*/
warray(fp,nam,cont,siz)
FILE *fp;
char *nam;
int cont[],siz;
{
int i;
fprintf(fp,"int %s[] = {\n",nam);
for (i = 0; i < siz; i++) {
fprintf(fp,"%d, ",cont[i]);
if ((i % 20) == 0) putc('\n',fp);
}
fprintf(fp,"};\n");
}
main(argc, argv)
int argc;
char **argv;
{
Trans head;
int state,c;
FILE *infile,*outfile;
if (argc > 1) {
argv++;
if ((infile = fopen(*argv,"r")) == NULL) {
fprintf(stderr,"Can't open %s\n",*argv);
fatal("unreadable input file"); } }
else infile = stdin;
if (argc > 2) {
argv++;
if ((outfile = fopen(*argv,"w")) == NULL) {
fprintf(stderr,"Can't write to %s\n",*argv);
fatal("bad output file"); } }
else outfile = stdout;
clrhash(); /* empty hash table */
head = rdinput(infile,outfile); /* read input file */
emptytbl(); /* empty our tables */
for (state = 0; state <= nstates; state++)
for (c = 1; c < 128; c++)
addaction(faction(head,state,c),state,c); /* find actions, add to tbl */
writetbl(outfile);
copyrest(infile,outfile);
printf("%d states, %d actions\n",nstates,nacts);
#ifdef undef
for (state = 1; state <= nstates; state ++)
for (c = 1; c < 128; c++)
if (tbl[state*128 + c] != -1) printf("state %d, chr %d, act %d\n",
state,c,tbl[state*128 + c]);
#endif
fclose(infile);
fclose(outfile);
exit(0);
}
/*
* fatal error handler
*
*/
fatal(msg)
char *msg;
{
fprintf(stderr,"error in line %d: %s\n",lines,msg);
exit(1);
}
prolog(outfp)
FILE *outfp;
{
int c;
while ((c = *txt1++) != '\0') putc(c,outfp);
while ((c = *fname++) != '\0') putc(c,outfp);
while ((c = *txt2++) != '\0') putc(c,outfp);
}
epilogue(outfp)
FILE *outfp;
{
int c;
while ((c = *txt3++) != '\0') putc(c,outfp);
}
copyrest(in,out)
FILE *in,*out;
{
int c;
while ((c = getc(in)) != EOF) putc(c,out);
}
/*
* gettoken - returns token type of next token, sets tokval
* to the string value of the token if appropriate.
*
*/
gettoken(fp)
FILE *fp;
{
int c;
while (1) { /* loop if reading comments... */
do {
c = getc(fp);
if (c == '\n') lines++;
} while (isspace(c)); /* skip whitespace */
switch(c) {
case EOF: return(SEP);
case '%': if ((c = getc(fp)) == '%') return(SEP);
tokval[0] = '%';
tokval[1] = c;
rdword(fp,tokval+2);
return(WORD);
case '<': return(LBRACK);
case '>': return(RBRACK);
case ',': return(COMMA);
case '/': if ((c = getc(fp)) == '*') {
rdcmnt(fp); /* skip over the comment */
continue; } /* and keep looping */
else {
ungetc(c); /* put this back into input */
c = '/'; } /* put character back, fall thru */
default: if (isword(c)) {
ungetc(c,fp);
rdword(fp,tokval);
return(WORD);
}
else fatal("Invalid character in input");
}
}
}
/*
* skip over a comment
*
*/
rdcmnt(fp)
FILE *fp;
{
int c,star,prcnt;
prcnt = star = 0; /* no star seen yet */
while (!((c = getc(fp)) == '/' && star)) {
if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
prcnt = (c == '%');
star = (c == '*');
if (c == '\n') lines++; }
}
/*
* symbol table management for wart
*
* entry points:
* clrhash - empty hash table.
* enter - enter a name into the symbol table
* lkup - find a name's value in the symbol table.
*
*/
#define HASHSIZE 101 /* # of entries in hash table */
struct sym { char *name; /* symbol name */
int val; /* value */
struct sym *hnxt; } /* next on collision chain */
*htab[HASHSIZE]; /* the hash table */
/*
* empty the hash table before using it...
*
*/
clrhash()
{
int i;
for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
}
/*
* compute the value of the hash for a symbol
*
*/
hash(name)
char *name;
{
int sum;
for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
sum %= HASHSIZE; /* take sum mod hashsize */
if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */
return(sum);
}
/*
* make a private copy of a string...
*
*/
char *
copy(s)
char *s;
{
char *new;
new = (char *) malloc(strlen(s) + 1);
strcpy(new,s);
return(new);
}
/*
* enter state name into the hash table
*
*/
enter(name,svalue)
char *name;
int svalue;
{
int h;
struct sym *cur;
if (lkup(name) != -1) {
fprintf(stderr,"state %s appears twice...\n");
exit(1); }
h = hash(name);
cur = (struct sym *)malloc(sizeof (struct sym));
cur->name = copy(name);
cur->val = svalue;
cur->hnxt = htab[h];
htab[h] = cur;
}
/*
* find name in the symbol table, return its value. Returns -1
* if not found.
*
*/
lkup(name)
char *name;
{
struct sym *cur;
for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
if (strcmp(cur->name,name) == 0) return(cur->val);
return(-1);
}