Power-Programmierung

home *** CD-ROM | disk | FTP | other *** search

/ Power-Programmierung / CD2.mdf / c / compiler / miracl / slr.c < prev

Wrap

C/C++ Source or Header | 1993-01-13 | 9KB | 377 lines

/* SLR parser generator given slr grammar in file `grammar', generate characteristic fsm for it `grammar.c' is generated from cfsm to recognize the grammar `grammar.o' gives description of state machine */ #include <stdio.h> #include <ctype.h> #include <string.h> #include <system.h> #define gen(p) fputs(p,output) FILE *grammar, *output, *des; int states[100], sprod[1000], sposn[500], togen[500], tsnext[500], tsposn[500], trn[500], sttype[100], nstates=0, nterms=0, nnonterms=0, ntogen=1, trans=0, trs[1000]; char *pptr, *prods[100], terms[50], nonterms[50], exnon[50], trc[1000]; void main() { char o, ia[100], out[200], rt[50], *in, *tr, *aptr, *optr, *rtptr, nxtchr; int check, prct, z, rules=0, rout, flag=0, nxt=0, cflag, i, stype, rdstate, reduce, start, tf, tt, ot; grammar=fopen("grammar","r"); output=fopen("grammar.c","w"); des=fopen("grammar.o","w"); if(grammar==NULL || output==NULL || des==NULL) { printf("file open failure\n"), exit(1); } for(i=0; i<100; i++) prods[i]=malloc(10); *terms='\0'; *nonterms='\0'; /**** get grammar from input file */ for(;;) { if(fgets(ia,100,grammar)==0) break; in=ia; while(isspace(*in)) in++; tr=prods[rules++]; *tr=*in++; while(isspace(*in)) in++; if(!isupper(*tr) || *in++!='-' || *in++!='>') printf("bad grammar file\n"), fclose(output), exit(0); while(isspace(*in)) in++; while(*tr++) if((*tr=*in++)==' ') --tr; *(tr-2)='\0'; } fclose(grammar); /**** show grammar */ fprintf(des,"\ninput grammar : found %d productions ...\n\n",rules); for(i=0; i<rules; i++) fprintf(des," %c -> .%s. \n",*prods[i],prods[i]+1); /**** make list `nonterms' of non-terminals for which productions exist */ for(i=0; i<rules; i++) { for(flag=0, check=0; check<nnonterms; check++) if(prods[i][0]==nonterms[check]) flag=1; if(!flag) nonterms[nnonterms++]=prods[i][0]; } nonterms[nnonterms]='\0'; fprintf(des,"\nnon-terminals are .%s.\n",nonterms); /**** check that every rhs non-terminal has a production,list non-terminals */ for(i=0; i<rules; i++) { tr=prods[i]; while(*++tr) { flag=0; if(isupper(*tr)) { aptr=nonterms; while(*aptr) if(*aptr++==*tr) flag=1; if(!flag) printf("can\'t find production for %c in rule %s\n", *tr,prods[rout]), exit(1); } else { aptr=terms; while(*aptr) if(*aptr++==*tr) flag=1; if(!flag) { nterms++; *aptr++=*tr; *aptr='\0';} } } } fprintf(des,"\nterminals are .%s.\n",terms); /**** find starting S rule ... usually the first, must be unique */ for(start=-1, i=0; i<rules; i++) if(prods[i][0]=='S') if(start==-1) start=i; else printf("starting rule must be unique\n"), exit(1); /**** expand the grammar into a characteristic fsm */ tsnext[0]=0, tsposn[0]=0, tsnext[1]=-1, tsposn[1]=-1, togen[0]=0; while(ntogen>nstates) /* while there are states to expand, do nstate'th */ { if(nstates>0) { nxt=states[nstates-1]; while(sprod[nxt++]!=-1); } else nxt=0; states[nstates]=nxt; exnon[0]='\0'; tf=togen[nstates]; tt=nxt; rdstate=0; reduce=0; /* copy unexpanded state to state table */ while(((sprod[tt]=tsnext[tf])&(sposn[tt]=tsposn[tf]))+1) tt++, tf++; while(nxt<tt) /* expand each following nterm until complete */ { if(isupper(o=prods[sprod[nxt]][sposn[nxt]+1])) /* non-terminal next? */ { aptr=exnon; while(*aptr) if(*aptr==o) break; else aptr++; if(*aptr=='\0') /* then we must expand the nterm's productions */ { *aptr++=o; *aptr='\0'; for(i=0; i<rules; i++) /* insert prods for nterm */ if(prods[i][0]==o) { sprod[tt]=i; sposn[tt++]=0; sprod[tt]=-1; sposn[tt]=-1; } } } nxt++; } ssort(nstates); nxt=states[nstates++]; tf=togen[ntogen-1]; while(tsnext[tf++]+1); /* now copy each branch out of the state into the unexp table and try it */ while(sprod[nxt]!=-1) { nxtchr=prods[sprod[nxt]][sposn[nxt]+1]; tf=togen[ntogen-1]; while(tsnext[tf++]+1); ot=tf; togen[ntogen]=tf; if(nxtchr=='\0') { trc[trans]=(char)sposn[nxt]; trs[trans]=nxt++; trn[trans++]=-2; reduce=1; } else { while((sprod[nxt]!=-1)&& (nxtchr==prods[sprod[nxt]][sposn[nxt]+1])) { tsnext[ot]=sprod[nxt]; tsposn[ot++]=sposn[nxt++]+1; } tsnext[ot]=-1; tsposn[ot]=-1; trc[trans]=nxtchr; rdstate=1; if(z=findstate(ntogen)) trn[trans++]=z; else trn[trans++]=ntogen++; } } trn[trans]=-1; trc[trans++]='0'+rdstate+reduce*2; } /**** list cfsm's transitions */ tt=0; for(i=0; i<nstates; i++) { fprintf(des,"\nstate %d \n",i); while(trn[tt++]!=-1) if(trn[tt-1]!=-2) fprintf(des," %c %d \n",trc[tt-1],trn[tt-1]); else fprintf(des," $\n"); stype=trc[tt-1]-'0'; sttype[i]=stype; if(stype==1) fprintf(des,"read\n"); if(stype==2) fprintf(des,"reduce\n"); if(stype==3) fprintf(des,"inadequate\n"); } /**** generate the parser */ tt=0; fputs("#include <stdio.h>\n#include <system.h>\n\n",output); fputs("#define push(a)\t*++sp=a;\n#define pop()",output); fputs("\t(sp==stack?(exit(0),*sp):*sp--)\n\n",output); fputs("char stack[1000],*sp,res[5000],ia[100],*in;\n\n",output); fputs("main()\n{\n\tint state,item,resn=0,finish=0;\n\n",output); fputs("\tsp=stack; gets(in=ia); push(0);",output); fputs("\n\n\tfor(;;)\n\t{\n\t\tstate=pop(); push(state);\n",output); fputs("\t\tif(finish) break;\n\n\t\tswitch(state)\n\t\t{\n",output); for(i=0; i<nstates; i++) /* generate the cfsm's case statement */ switch(sttype[i]) { case 1: /* read state */ fprintf(output,"\t\tcase %2d : push(item=*in++);\n",i); fprintf(output,"\n\t\t\tswitch(item){\n"); while(trn[tt++]!=-1) fprintf(output,"\t\t\t\tcase \'%c\':push(%d); break;\n", trc[tt-1],trn[tt-1]); fprintf(output,"\t\t\t\tdefault :"); fprintf(output,"printf(\"parse failed\");exit(0);\n"); fprintf(output,"\t\t\t} break;\n\n"); break; case 2: /* reduce state */ fprintf(output,"\t\tcase %2d : ",i); for(ot=0; ot<trc[tt]; ot++) fprintf(output,"pop();pop();"); fprintf(output," *--in=\'%c\';\n",prods[sprod[states[i]]][0]); fprintf(output,"\t\t\tres[resn++]=%d;",sprod[states[i]]); if(prods[sprod[states[i]]][0]=='S') fprintf(output," if(*in==\'S\') finish=1;"); fprintf(output," break;\n\n"); tt++;tt++; break; case 3: /* inadequate state */ tf=tt; rtptr=rt; while(trn[tf]!=-1) if(trn[tf++]!=-2) *rtptr++=trc[tf-1]; else o=prods[z=sprod[trs[tf-1]]][0]; *rtptr='\0'; rtptr=rt; fprintf(output,"\t\tcase %2d : if(*in==\'%c\'",i,*rtptr++); while(*rtptr) fprintf(output,"||*in==\'%c\'",*rtptr++); fprintf(output,"){\n\t\t\t\tpush(item=*in++);"); fprintf(output,"\n\n\t\t\t\tswitch(item){\n"); tf=tt; while(trn[tt++]!=-1) if(trn[tt-1]!=-2) fprintf(output,"\t\t\t\t\tcase \'%c\':push(%d); break;\n", trc[tt-1],trn[tt-1]); fprintf(output,"\t\t\t\tdefault :"); fprintf(output,"printf(\"parse failed\");exit(0);}\n"); fprintf(output,"\t\t\t\t} else {\n\t\t\t"); while(trn[tf++]!=-2); for(ot=0; ot<trc[tf-1]; ot++) fprintf(output,"pop();pop();"); fprintf(output," *--in=\'%c\';\n",o); fprintf(output,"\t\t\t\tres[resn++]=%d; ",z); fprintf(output,"\n\t\t\t} break;\n"); break; } fprintf(output,"\n\t\t}\n\t}\n\n\tprintf(\"parsed: \");\n\tfor(item=0;"); fprintf(output,"item<resn;item++)\n\t\tprintf(\" %%2d\",res[item]);\n}\n"); fclose(des); fclose(output); } /**** sort the configuration set for a given state */ ssort(int st) { int pstart,nstat,t,la,lb; for(nstat=0, pstart=states[st]; sprod[pstart++]!=-1; nstat++) ; if(nstat<2) return; pstart=states[st]; for(la=0; la<nstat; la++) for(lb=pstart; lb<(pstart+nstat-1); lb++) if(1000*(int)prods[sprod[lb]][sposn[lb]+1]+50*(int)sprod[lb]+sposn[lb]> 1000*(int)prods[sprod[lb+1]][sposn[lb+1]+1]+50*(int)sprod[lb+1]+sposn[lb+1]) { t=sprod[lb]; sprod[lb]=sprod[lb+1]; sprod[lb+1]=t; t=sposn[lb]; sposn[lb]=sposn[lb+1]; sposn[lb+1]=t; } } /**** compare two unexpanded state sets */ int compare(int f1, int f2) { if(f1==f2) return 0; f1=togen[f1]; f2=togen[f2]; while(tsnext[f1]==tsnext[f2] && tsposn[f1++]==tsposn[f2++]) if(tsnext[f1-1]==-1) return 1; return 0; } /**** find if state already exists */ int findstate(int state) { int i; for(i=0; i<ntogen; i++) if(compare(state,i)) return i; return 0; }