home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power-Programmierung
/
CD2.mdf
/
c
/
compiler
/
miracl
/
slr.c
< prev
Wrap
C/C++ Source or Header
|
1993-01-13
|
9KB
|
377 lines
/* SLR parser generator
given slr grammar in file `grammar', generate characteristic fsm for it
`grammar.c' is generated from cfsm to recognize the grammar
`grammar.o' gives description of state machine
*/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <system.h>
#define gen(p) fputs(p,output)
FILE *grammar, *output, *des;
int states[100], sprod[1000], sposn[500], togen[500],
tsnext[500], tsposn[500], trn[500], sttype[100],
nstates=0, nterms=0, nnonterms=0, ntogen=1, trans=0, trs[1000];
char *pptr, *prods[100], terms[50], nonterms[50], exnon[50], trc[1000];
void main()
{
char o, ia[100], out[200], rt[50],
*in, *tr, *aptr, *optr, *rtptr, nxtchr;
int check, prct, z, rules=0, rout, flag=0, nxt=0, cflag,
i, stype, rdstate, reduce, start, tf, tt, ot;
grammar=fopen("grammar","r"); output=fopen("grammar.c","w");
des=fopen("grammar.o","w");
if(grammar==NULL || output==NULL || des==NULL)
{
printf("file open failure\n"),
exit(1);
}
for(i=0; i<100; i++) prods[i]=malloc(10);
*terms='\0'; *nonterms='\0';
/**** get grammar from input file */
for(;;)
{
if(fgets(ia,100,grammar)==0) break;
in=ia;
while(isspace(*in)) in++;
tr=prods[rules++]; *tr=*in++;
while(isspace(*in)) in++;
if(!isupper(*tr) || *in++!='-' || *in++!='>')
printf("bad grammar file\n"),
fclose(output),
exit(0);
while(isspace(*in)) in++;
while(*tr++)
if((*tr=*in++)==' ') --tr;
*(tr-2)='\0';
}
fclose(grammar);
/**** show grammar */
fprintf(des,"\ninput grammar : found %d productions ...\n\n",rules);
for(i=0; i<rules; i++)
fprintf(des," %c -> .%s. \n",*prods[i],prods[i]+1);
/**** make list `nonterms' of non-terminals for which productions exist */
for(i=0; i<rules; i++)
{
for(flag=0, check=0; check<nnonterms; check++)
if(prods[i][0]==nonterms[check]) flag=1;
if(!flag) nonterms[nnonterms++]=prods[i][0];
}
nonterms[nnonterms]='\0';
fprintf(des,"\nnon-terminals are .%s.\n",nonterms);
/**** check that every rhs non-terminal has a production,list non-terminals */
for(i=0; i<rules; i++)
{
tr=prods[i];
while(*++tr)
{
flag=0;
if(isupper(*tr))
{
aptr=nonterms;
while(*aptr) if(*aptr++==*tr) flag=1;
if(!flag)
printf("can\'t find production for %c in rule %s\n",
*tr,prods[rout]),
exit(1);
}
else
{
aptr=terms;
while(*aptr) if(*aptr++==*tr) flag=1;
if(!flag)
{ nterms++; *aptr++=*tr; *aptr='\0';}
}
}
}
fprintf(des,"\nterminals are .%s.\n",terms);
/**** find starting S rule ... usually the first, must be unique */
for(start=-1, i=0; i<rules; i++)
if(prods[i][0]=='S')
if(start==-1)
start=i;
else
printf("starting rule must be unique\n"),
exit(1);
/**** expand the grammar into a characteristic fsm */
tsnext[0]=0, tsposn[0]=0, tsnext[1]=-1, tsposn[1]=-1, togen[0]=0;
while(ntogen>nstates) /* while there are states to expand, do nstate'th */
{
if(nstates>0)
{
nxt=states[nstates-1];
while(sprod[nxt++]!=-1);
}
else
nxt=0;
states[nstates]=nxt; exnon[0]='\0';
tf=togen[nstates]; tt=nxt;
rdstate=0; reduce=0;
/* copy unexpanded state to state table */
while(((sprod[tt]=tsnext[tf])&(sposn[tt]=tsposn[tf]))+1) tt++, tf++;
while(nxt<tt) /* expand each following nterm until complete */
{
if(isupper(o=prods[sprod[nxt]][sposn[nxt]+1])) /* non-terminal next? */
{
aptr=exnon;
while(*aptr)
if(*aptr==o)
break;
else
aptr++;
if(*aptr=='\0') /* then we must expand the nterm's productions */
{
*aptr++=o; *aptr='\0';
for(i=0; i<rules; i++) /* insert prods for nterm */
if(prods[i][0]==o)
{
sprod[tt]=i; sposn[tt++]=0;
sprod[tt]=-1; sposn[tt]=-1;
}
}
}
nxt++;
}
ssort(nstates); nxt=states[nstates++];
tf=togen[ntogen-1]; while(tsnext[tf++]+1);
/* now copy each branch out of the state into the unexp table and try it */
while(sprod[nxt]!=-1)
{
nxtchr=prods[sprod[nxt]][sposn[nxt]+1];
tf=togen[ntogen-1];
while(tsnext[tf++]+1);
ot=tf; togen[ntogen]=tf;
if(nxtchr=='\0')
{
trc[trans]=(char)sposn[nxt];
trs[trans]=nxt++;
trn[trans++]=-2; reduce=1;
}
else
{
while((sprod[nxt]!=-1)&& (nxtchr==prods[sprod[nxt]][sposn[nxt]+1]))
{
tsnext[ot]=sprod[nxt];
tsposn[ot++]=sposn[nxt++]+1;
}
tsnext[ot]=-1; tsposn[ot]=-1;
trc[trans]=nxtchr; rdstate=1;
if(z=findstate(ntogen))
trn[trans++]=z;
else
trn[trans++]=ntogen++;
}
}
trn[trans]=-1;
trc[trans++]='0'+rdstate+reduce*2;
}
/**** list cfsm's transitions */
tt=0;
for(i=0; i<nstates; i++)
{
fprintf(des,"\nstate %d \n",i);
while(trn[tt++]!=-1)
if(trn[tt-1]!=-2)
fprintf(des," %c %d \n",trc[tt-1],trn[tt-1]);
else
fprintf(des," $\n");
stype=trc[tt-1]-'0'; sttype[i]=stype;
if(stype==1) fprintf(des,"read\n");
if(stype==2) fprintf(des,"reduce\n");
if(stype==3) fprintf(des,"inadequate\n");
}
/**** generate the parser */
tt=0;
fputs("#include <stdio.h>\n#include <system.h>\n\n",output);
fputs("#define push(a)\t*++sp=a;\n#define pop()",output);
fputs("\t(sp==stack?(exit(0),*sp):*sp--)\n\n",output);
fputs("char stack[1000],*sp,res[5000],ia[100],*in;\n\n",output);
fputs("main()\n{\n\tint state,item,resn=0,finish=0;\n\n",output);
fputs("\tsp=stack; gets(in=ia); push(0);",output);
fputs("\n\n\tfor(;;)\n\t{\n\t\tstate=pop(); push(state);\n",output);
fputs("\t\tif(finish) break;\n\n\t\tswitch(state)\n\t\t{\n",output);
for(i=0; i<nstates; i++) /* generate the cfsm's case statement */
switch(sttype[i])
{
case 1: /* read state */
fprintf(output,"\t\tcase %2d : push(item=*in++);\n",i);
fprintf(output,"\n\t\t\tswitch(item){\n");
while(trn[tt++]!=-1)
fprintf(output,"\t\t\t\tcase \'%c\':push(%d); break;\n",
trc[tt-1],trn[tt-1]);
fprintf(output,"\t\t\t\tdefault :");
fprintf(output,"printf(\"parse failed\");exit(0);\n");
fprintf(output,"\t\t\t} break;\n\n");
break;
case 2: /* reduce state */
fprintf(output,"\t\tcase %2d : ",i);
for(ot=0; ot<trc[tt]; ot++)
fprintf(output,"pop();pop();");
fprintf(output," *--in=\'%c\';\n",prods[sprod[states[i]]][0]);
fprintf(output,"\t\t\tres[resn++]=%d;",sprod[states[i]]);
if(prods[sprod[states[i]]][0]=='S')
fprintf(output," if(*in==\'S\') finish=1;");
fprintf(output," break;\n\n");
tt++;tt++;
break;
case 3: /* inadequate state */
tf=tt; rtptr=rt;
while(trn[tf]!=-1)
if(trn[tf++]!=-2)
*rtptr++=trc[tf-1];
else
o=prods[z=sprod[trs[tf-1]]][0];
*rtptr='\0'; rtptr=rt;
fprintf(output,"\t\tcase %2d : if(*in==\'%c\'",i,*rtptr++);
while(*rtptr)
fprintf(output,"||*in==\'%c\'",*rtptr++);
fprintf(output,"){\n\t\t\t\tpush(item=*in++);");
fprintf(output,"\n\n\t\t\t\tswitch(item){\n"); tf=tt;
while(trn[tt++]!=-1)
if(trn[tt-1]!=-2)
fprintf(output,"\t\t\t\t\tcase \'%c\':push(%d); break;\n",
trc[tt-1],trn[tt-1]);
fprintf(output,"\t\t\t\tdefault :");
fprintf(output,"printf(\"parse failed\");exit(0);}\n");
fprintf(output,"\t\t\t\t} else {\n\t\t\t");
while(trn[tf++]!=-2);
for(ot=0; ot<trc[tf-1]; ot++)
fprintf(output,"pop();pop();");
fprintf(output," *--in=\'%c\';\n",o);
fprintf(output,"\t\t\t\tres[resn++]=%d; ",z);
fprintf(output,"\n\t\t\t} break;\n");
break;
}
fprintf(output,"\n\t\t}\n\t}\n\n\tprintf(\"parsed: \");\n\tfor(item=0;");
fprintf(output,"item<resn;item++)\n\t\tprintf(\" %%2d\",res[item]);\n}\n");
fclose(des);
fclose(output);
}
/**** sort the configuration set for a given state */
ssort(int st)
{
int pstart,nstat,t,la,lb;
for(nstat=0, pstart=states[st]; sprod[pstart++]!=-1; nstat++) ;
if(nstat<2) return;
pstart=states[st];
for(la=0; la<nstat; la++)
for(lb=pstart; lb<(pstart+nstat-1); lb++)
if(1000*(int)prods[sprod[lb]][sposn[lb]+1]+50*(int)sprod[lb]+sposn[lb]>
1000*(int)prods[sprod[lb+1]][sposn[lb+1]+1]+50*(int)sprod[lb+1]+sposn[lb+1])
{
t=sprod[lb]; sprod[lb]=sprod[lb+1]; sprod[lb+1]=t;
t=sposn[lb]; sposn[lb]=sposn[lb+1]; sposn[lb+1]=t;
}
}
/**** compare two unexpanded state sets */
int compare(int f1, int f2)
{
if(f1==f2) return 0;
f1=togen[f1]; f2=togen[f2];
while(tsnext[f1]==tsnext[f2] && tsposn[f1++]==tsposn[f2++])
if(tsnext[f1-1]==-1) return 1;
return 0;
}
/**** find if state already exists */
int findstate(int state)
{
int i;
for(i=0; i<ntogen; i++)
if(compare(state,i)) return i;
return 0;
}