The World of Computer Software

home *** CD-ROM | disk | FTP | other *** search

/ The World of Computer Software / World_Of_Computer_Software-02-385-Vol-1of3.iso / p / pccts.zip / antlr / antlr.g < prev next >

Wrap

Text File | 1992-12-08 | 21KB | 671 lines

/* * antlr.g -- PCCTS Version 1.xx ANTLR * * Parse an antlr input grammar and build a syntax-diagram. * * Terence Parr * Purdue University * August 1990 * * Rewritten in itself (needs at least 1.00 to work) May 1992--TJP * * SOFTWARE RIGHTS * * We reserve no LEGAL rights to the Purdue Compiler Construction Tool * Set (PCCTS) -- PCCTS is in the public domain. An individual or * company may do whatever they wish with source code distributed with * PCCTS or the code generated by PCCTS, including the incorporation of * PCCTS, or its output, into commerical software. * * We encourage users to develop software with PCCTS. However, we do ask * that credit is given to us for developing PCCTS. By "credit", * we mean that if you incorporate our source code into one of your * programs (commercial product, research project, or otherwise) that you * acknowledge this fact somewhere in the documentation, research report, * etc... If you like PCCTS and have developed a nice tool with the * output, please mention that you developed it using PCCTS. In * addition, we ask that this header remain intact in our source code. * As long as these guidelines are kept, we expect to continue enhancing * this system and expect to make other tools available as they are * completed. * * ANTLR 1.06 * Terence Parr * Purdue University * 1989-1992 */ #header <<#include "set.h" #include <ctype.h> #include "syn.h" #include "hash.h" #include "generic.h" #define zzcr_attr(attr,tok,t) >> << #ifdef __STDC__ static void chkToken(char *, char *, char *); #else static void chkToken(); #endif >> #lexclass STRINGS #token QuotedTerm "\"" << zzmode(START); >> #token "\\\"" << zzmore(); >> #token "\n" << zzline++; warn("eoln found in string (in user action)"); zzskip(); >> #token "\\~[\"]" << zzmore(); >> #token "~[\n\"\\]+" << zzmore(); >> #lexclass COMMENTS #token "\*/" << zzmode(START); zzskip(); >> #token "\*" << zzskip(); >> #token "\n" << zzline++; zzskip(); >> #token "~[\n\*]+" << zzskip(); >> /* * This lexical class accepts actions of type [..] and <<..>> * * It translates the following special items: * * $j --> "zzaArg(current zztasp, j)" * $i.j --> "zzaArg(zztaspi, j)" * $i.nondigit> "zzaArg(current zztasp, i).nondigit" * $$ --> "zzaRet" * $alnum --> "alnum" (used to ref parameters) * $rule --> "zzaRet" * $retval --> "_retv.retval" if > 1 return values else "_retv" * $[token, text] --> "zzconstr_attr(token, text)" * $[] --> "zzempty_attr()" * * And, for trees: * * #0 --> "(*_root)" * #i --> "zzastArg(i)" * #[args] --> "zzmk_ast(zzastnew(), args)" * #[] --> "zzastnew()" * #( root, child1, ..., childn ) --> "zztmake(root, child1, ...., childn, NULL)" * #() --> "NULL" * * To escape, * * \] --> ] * \) --> ) * \$ --> $ * \# --> # * * A stack is used to nest action terminators because they can be nested * like crazy: << #[$[..],..] >> */ #lexclass ACTIONS #token Action "\>\>" << /* these do not nest */ zzmode(START); NLATEXT[0] = ' '; NLATEXT[1] = ' '; zzbegexpr[0] = ' '; zzbegexpr[1] = ' '; if ( zzbufovf ) { warn( eMsgd("action buffer overflow; size %d",ZZLEXBUFSIZE)); } >> #token Pred "\>\>?" << /* these do not nest */ zzmode(START); NLATEXT[0] = ' '; NLATEXT[1] = ' '; zzbegexpr[0] = ' '; zzbegexpr[1] = ' '; zzbegexpr[2] = ' '; if ( zzbufovf ) { warn( eMsgd("predicate buffer overflow; size %d",ZZLEXBUFSIZE)); } >> #token PassAction "\]" << if ( topint() == ']' ) { popint(); if ( istackempty() ) /* terminate action */ { zzmode(START); NLATEXT[0] = ' '; zzbegexpr[0] = ' '; if ( zzbufovf ) { warn( eMsgd("parameter buffer overflow; size %d",ZZLEXBUFSIZE)); } } else { /* terminate $[..] and #[..] */ zzreplstr(")"); zzmore(); } } else if ( topint() == '|' ) { /* end of simple [...] */ popint(); zzmore(); } else zzmore(); >> #token "\n" << zzline++; zzmore(); >> #token "\>" << zzmore(); >> #token "$" << zzmore(); >> #token "$$" << zzreplstr("zzaRet"); zzmore(); >> #token "$\[\]" << zzreplstr("zzempty_attr"); zzmore(); >> #token "$\[" << pushint(']'); zzreplstr("zzconstr_attr("); zzmore(); >> #token "$[0-9]+" <<{ static char buf[100]; if ( strlen(zzbegexpr)>85 ) fatal("$i attrib ref too big"); sprintf(buf,"zzaArg(zztasp%d,%s)", BlkLevel-1,zzbegexpr+1); zzreplstr(buf); zzmore(); } >> #token "$[0-9]+." <<{ static char buf[100]; if ( strlen(zzbegexpr)>85 ) fatal("$i.field attrib ref too big"); zzbegexpr[strlen(zzbegexpr)-1] = ' '; sprintf(buf,"zzaArg(zztasp%d,%s).", BlkLevel-1,zzbegexpr+1); zzreplstr(buf); zzmore(); } >> #token "$[0-9]+.[0-9]+" <<{ static char buf[100]; static char i[20], j[20]; char *p,*q; if (strlen(zzbegexpr)>85) fatal("$i.j attrib ref too big"); for (p=zzbegexpr+1,q= &i[0]; *p!='.'; p++) { if ( q == &i[20] ) fatalFL("i of $i.j attrib ref too big", FileStr[CurFile], zzline ); *q++ = *p; } *q = '\0'; for (p++, q= &j[0]; *p!='\0'; p++) { if ( q == &j[20] ) fatalFL("j of $i.j attrib ref too big", FileStr[CurFile], zzline ); *q++ = *p; } *q = '\0'; sprintf(buf,"zzaArg(zztasp%s,%s)",i,j); zzreplstr(buf); zzmore(); } >> #token "$[_a-zA-Z][_a-zA-Z0-9]*" <<{ static char buf[300]; zzbegexpr[0] = ' '; if ( CurRule != NULL && strcmp(CurRule, &zzbegexpr[1])==0 ) { zzreplstr("zzaRet"); } else if ( CurRetDef != NULL ) { if ( strmember(CurRetDef, &zzbegexpr[1]) ) { if ( HasComma( CurRetDef ) ) { require (strlen(zzbegexpr)<=285, "$retval attrib ref too big"); sprintf(buf,"_retv.%s",&zzbegexpr[1]); zzreplstr(buf); } else zzreplstr("_retv"); } else if ( CurParmDef != NULL ) { if ( !strmember(CurParmDef, &zzbegexpr[1]) ) warn(eMsg1("$%s not parameter or return value",&zzbegexpr[1])); } else warn(eMsg1("$%s not parameter or return value",&zzbegexpr[1])); } } zzmore(); >> #token "#0" << zzreplstr("(*_root)"); zzmore(); >> #token "#\[\]" << zzreplstr("zzastnew()"); zzmore(); >> #token "#" << zzreplstr("NULL"); zzmore(); >> #token "#[0-9]+" <<{ static char buf[100]; if ( strlen(zzbegexpr)>85 ) fatal("#i AST ref too big"); sprintf(buf,"zzastArg(%s)",zzbegexpr+1); zzreplstr(buf); zzmore(); } >> #token "#\[" << pushint(']'); zzreplstr("zzmk_ast(zzastnew(),"); zzmore(); >> #token "#$" << pushint('}'); zzreplstr("zztmake("); zzmore(); >> #token "#" << zzmore(); >> #token "$" << if ( istackempty() ) zzmore(); else if ( topint()==')' ) { popint(); } else if ( topint()=='}' ) { popint(); /* terminate #(..) */ zzreplstr(", NULL)"); } zzmore(); >> #token "\[" << pushint('|'); /* look for '|' to terminate simple [...] */ zzmore(); >> #token "$" << pushint(')'); zzmore(); >> #token "\\\]" << zzreplstr("]"); zzmore(); >> #token "\\$" << zzreplstr(")"); zzmore(); >> #token "\\>" << zzreplstr(">"); zzmore(); >> #token "\\$" << zzreplstr("$"); zzmore(); >> #token "\\#" << zzreplstr("#"); zzmore(); >> #token "\\\\" << zzmore(); >> /* need this for some reason */ #token "\\~[\]\)>$#\\]" << zzmore(); >> /* escaped char, always ignore */ #token "~[\n\)\(\\$#\>\]\[]+" << zzmore(); >> #lexclass START #token "[\t\ ]+" << zzskip(); >> /* Ignore White */ #token "[\n\r]" << zzline++; zzskip(); >> /* Track Line # */ #token "\[" << zzmode(ACTIONS); zzmore(); istackreset(); pushint(']'); >> #token "\<\<" << action_file=CurFile; action_line=zzline; zzmode(ACTIONS); zzmore(); istackreset(); pushint('>'); >> #token "\"" << zzmode(STRINGS); zzmore(); >> #token "/\*" << zzmode(COMMENTS); zzskip(); >> #token "\*/" << warn("Missing /*; found dangling */"); zzskip(); >> #token "\>\>" << warn("Missing <<; found dangling \\>\\>"); zzskip(); >> #token Eof "@" << /* L o o k F o r A n o t h e r F i l e */ { FILE *new_input; new_input = NextFile(); if ( new_input == NULL ) return; fclose( input ); input = new_input; zzrdstream( input ); /*zzadvance(); /* Get 1st char of this file */ zzskip(); /* Skip the Eof (@) char i.e continue */ } >> /* * Get a grammar -- Build a list of rules like: * * o-->Rule1--o * | * o-->Rule2--o * | * ... * | * o-->RuleN--o */ grammar : <<Graph g;>> { "#header" Action << HdrAction = calloc(strlen(LATEXT(1))+1, sizeof(char)); require(HdrAction!=NULL, "rule grammar: cannot allocate header action"); strcpy(HdrAction, LATEXT(1)); >> } ( <<char *a;>> Action << a = calloc(strlen(LATEXT(1))+1, sizeof(char)); require(a!=NULL, "rule grammar: cannot allocate action"); strcpy(a, LATEXT(1)); list_add(&BeforeActions, a); >> | laction | aLexclass | token | error )* rule <<g=$3; SynDiag = (Junction *) $3.left;>> ( rule <<if ( $1.left!=NULL ) {g.right = NULL; g = Or(g, $1);}>> | aLexclass | token | error )* ( <<char *a;>> Action << a = calloc(strlen(LATEXT(1))+1, sizeof(char)); require(a!=NULL, "rule grammar: cannot allocate action"); strcpy(a, LATEXT(1)); list_add(&AfterActions, a); >> | laction | error )* Eof ; <<CannotContinue=TRUE;>> /* * Build -o-->o-R-o-->o- where -o-R-o- is the block from rule 'block'. * Construct the RuleBlk front and EndRule node on the end of the * block. This is used to add FOLLOW pointers to the rule end. Add the * new rule name to the Rname hash table and sets its rulenum. * Store the parameter definitions if any are found. * * Note that locks are required on the RuleBlk and EndRule nodes to thwart * infinite recursion. * * Return the left graph pointer == NULL to indicate error/dupl rule def. */ rule : <<RuleEntry *q; Junction *p; Graph r; int f, l; ECnode *e; char *pdecl=NULL, *ret=NULL, *a;>> NonTerminal <<q=NULL; if ( hash_get(Rname, LATEXT(1))!=NULL ) { warn(eMsg1("duplicate rule definition: '%s'",LATEXT(1))) CannotContinue=TRUE; } else { q = (RuleEntry *)hash_add(Rname, LATEXT(1), (Entry *)newRuleEntry(LATEXT(1))); CurRule = q->str; } CurRuleNode = q; f = CurFile; l = zzline; NumRules++; >> { "!" <<if ( q!=NULL ) q->noAST = TRUE;>> } { <<;>> {"\<"} PassAction << pdecl = calloc(strlen(LATEXT(1))+1, sizeof(char)); require(pdecl!=NULL, "rule rule: cannot allocate param decl"); strcpy(pdecl, LATEXT(1)); CurParmDef = pdecl; >> } { "\>" PassAction << ret = calloc(strlen(LATEXT(1))+1, sizeof(char)); require(ret!=NULL, "rule rule: cannot allocate ret type"); strcpy(ret, LATEXT(1)); CurRetDef = ret; >> } { QuotedTerm <<if ( q!=NULL ) q->egroup=strdup(LATEXT(1));>> } << if ( GenEClasseForRules && q!=NULL ) { e = newECnode; require(e!=NULL, "cannot allocate error class node"); if ( q->egroup == NULL ) {a = q->str; a[0] = toupper(a[0]);} else a = q->egroup; if ( Tnum( a ) == 0 ) { e->tok = addTname( a ); list_add(&eclasses, (char *)e); if ( q->egroup == NULL ) a[0] = tolower(a[0]); /* refers to itself */ list_add(&(e->elist), strdup(q->str)); } else { warn(eMsg1("default errclass for '%s' would conflict with token/errclass",a)); if ( q->egroup == NULL ) a[0] = tolower(a[0]); free(e); } } >> <<BlkLevel++;>> ":" block <<r = makeBlk($7); ((Junction *)r.left)->jtype = RuleBlk; if ( q!=NULL ) ((Junction *)r.left)->rname = q->str; ((Junction *)r.left)->file = f; ((Junction *)r.left)->line = l; ((Junction *)r.left)->pdecl = pdecl; ((Junction *)r.left)->ret = ret; ((Junction *)r.left)->lock = makelocks(); ((Junction *)r.left)->pred_lock = makelocks(); p = newJunction(); /* add EndRule Node */ ((Junction *)r.right)->p1 = (Node *)p; r.right = (Node *) p; p->jtype = EndRule; p->lock = makelocks(); p->pred_lock = makelocks(); ((Junction *)r.left)->end = p; if ( q!=NULL ) q->rulenum = NumRules; $7 = r; >> <<--BlkLevel;>> ";" { Action << a = calloc(strlen(LATEXT(1))+1, sizeof(char)); require(a!=NULL, "rule rule: cannot allocate error action"); strcpy(a, LATEXT(1)); ((Junction *)r.left)->erraction = a; >> } <<if ( q==NULL ) $0.left = NULL; else $0 = $7;>> <<CurRuleNode = NULL;>> ; <<CannotContinue=TRUE;>> laction : <<char *a;>> "#lexaction" Action << a = calloc(strlen(LATEXT(1))+1, sizeof(char)); require(a!=NULL, "rule laction: cannot allocate action"); strcpy(a, LATEXT(1)); list_add(&LexActions, a); >> ; aLexclass: "#lexclass" TokenTerm <<lexclass(strdup(LATEXT(1)));>> ; error : <<char *t=NULL; ECnode *e; int go=1; TermEntry *p;>> "#errclass" (<<;>> TokenTerm <<t=strdup(LATEXT(1));>> | QuotedTerm <<t=strdup(LATEXT(1));>> ) <<e = newECnode; require(e!=NULL, "cannot allocate error class node"); e->lexclass = CurrentLexClass; if ( Tnum( (t=StripQuotes(t)) ) == 0 ) { if ( hash_get(Texpr, t) != NULL ) warn(eMsg1("errclass name conflicts with regular expression '%s'",t)); e->tok = addTname( t ); require((p=(TermEntry *)hash_get(Tname, t)) != NULL, "hash table mechanism is broken"); p->errclassname = 1; /* entry is errclass name, not token */ list_add(&eclasses, (char *)e); } else { warn(eMsg1("redefinition of errclass or conflict w/token '%s'; ignored",t)); free( e ); go=0; } >> "\{" ( NonTerminal <<if ( go ) t=strdup(LATEXT(1));>> | TokenTerm <<if ( go ) t=strdup(LATEXT(1));>> | QuotedTerm <<if ( go ) t=strdup(LATEXT(1));>> ) <<if ( go ) list_add(&(e->elist), t);>> ( ( NonTerminal <<if ( go ) t=strdup(LATEXT(1));>> | TokenTerm <<if ( go ) t=strdup(LATEXT(1));>> | QuotedTerm <<if ( go ) t=strdup(LATEXT(1));>> ) <<if ( go ) list_add(&(e->elist), t);>> )* "\}" ; token : <<char *t=NULL, *e=NULL, *a=NULL;>> "#token" { TokenTerm <<t=strdup(LATEXT(1));>> } { QuotedTerm <<e=strdup(LATEXT(1));>> } { Action << a = calloc(strlen(LATEXT(1))+1, sizeof(char)); require(a!=NULL, "rule token: cannot allocate action"); strcpy(a, LATEXT(1)); >> } <<chkToken(t, e, a);>> ; <<CannotContinue=TRUE;>> block : <<Graph g, b;>> alt <<b = g = $1;>> << if ( ((Junction *)g.left)->p1->ntype == nAction ) { if ( !((ActionNode *)(((Junction *)g.left)->p1))->is_predicate ) { ((ActionNode *)(((Junction *)g.left)->p1))->init_action = TRUE; } } >> ( "\|" alt <<g = Or(g, $2);>> )* <<$0 = b;>> ; <<CannotContinue=TRUE;>> alt : <<int n=0; Graph g; g.left=g.right=NULL;>> ( element <<n++; g = Cat(g, $1);>> )* <<if ( n == 0 ) g = emptyAlt(); $0 = g; >> ; <<CannotContinue=TRUE;>> element : <<TokNode *p; RuleRefNode *q;>> TokenTerm <<$0 = buildToken(LATEXT(1));>> ( <<p = (TokNode *) ((Junction *)$$.left)->p1;>> "^" <<p->astnode=ASTroot;>> | <<p->astnode=ASTchild;>> | "!" <<p->astnode=ASTexclude;>> ) | QuotedTerm <<$0 = buildToken(LATEXT(1));>> ( <<p = (TokNode *) ((Junction *)$$.left)->p1;>> "^" <<p->astnode=ASTroot;>> | <<p->astnode=ASTchild;>> | "!" <<p->astnode=ASTexclude;>> ) | NonTerminal <<$0 = buildRuleRef(LATEXT(1));>> { "!" <<q = (RuleRefNode *) ((Junction *)$$.left)->p1; q->astnode=ASTexclude;>> } { {"\<"} PassAction <<addParm(((Junction *)$$.left)->p1, LATEXT(1));>> } { <<char *a; RuleRefNode *rr=(RuleRefNode *) ((Junction *)$$.left)->p1; >> "\>" PassAction << a = calloc(strlen(LATEXT(1))+1, sizeof(char)); require(a!=NULL, "rule element: cannot allocate assignment"); strcpy(a, LATEXT(1)); rr->assign = a; >> } | Action <<$0 = buildAction(LATEXT(1),action_file,action_line, 0);>> | Pred <<$0 = buildAction(LATEXT(1),action_file,action_line, 1);>> { <<char *a; ActionNode *act = (ActionNode *) ((Junction *)$$.left)->p1;>> PassAction << a = calloc(strlen(LATEXT(1))+1, sizeof(char)); require(a!=NULL, "rule element: cannot allocate predicate fail action"); strcpy(a, LATEXT(1)); act->pred_fail = a; >> } | <<BlkLevel++;>> "$" block <<$0 = $2; --BlkLevel;>> "$" ( "\*" <<$$ = makeLoop($$);>> | "\+" <<$$ = makePlus($$);>> | <<$$ = makeBlk($$);>> ) { PassAction <<addParm(((Junction *)$$.left)->p1, LATEXT(1));>> } | <<BlkLevel++;>> "\{" block <<$0 = makeOpt($2); --BlkLevel;>> "\}" { PassAction <<addParm(((Junction *)$$.left)->p1, LATEXT(1));>> } | ":" <<warn(eMsg1("missing ';' on rule %s", CurRule)); CannotContinue=TRUE;>> | "\*" <<warn("don't you want a ')' with that '*'?"); CannotContinue=TRUE;>> | "\+" <<warn("don't you want a ')' with that '+'?"); CannotContinue=TRUE;>> | "\>" <<warn("'>' can only appear after a nonterminal"); CannotContinue=TRUE;>> | PassAction <<warn("[...] out of context 'rule > [...]'"); CannotContinue=TRUE;>> ; <<CannotContinue=TRUE;>> #token NonTerminal "[a-z] [A-Za-z0-9_]*" #token TokenTerm "[A-Z] [A-Za-z0-9_]*" #token "#[A-Za-z0-9_]*" <<warn(eMsg1("unknown meta-op: %s",LATEXT(1))); zzskip(); >> << /* semantics of #token */ static void chkToken(t,e,a) char *t, *e, *a; { if ( t==NULL && e==NULL ) { /* none found */ warn("#token requires at least token name or rexpr"); } else if ( t!=NULL && e!=NULL ) { /* both found */ Tlink(t, e); if ( a!=NULL ) { if ( hasAction(e) ) { warn(eMsg1("redefinition of action for %s; ignored",e)); } else setHasAction(e, a); } } else if ( t!=NULL ) { /* only one found */ if ( Tnum( t ) == 0 ) addTname( t ); else { warn(eMsg1("redefinition of token %s; ignored",t)); } if ( a!=NULL ) { warn(eMsg1("action cannot be attached to a token name (%s); ignored",t)); } } else if ( e!=NULL ) { if ( Tnum( e ) == 0 ) addTexpr( e ); else { if ( hasAction(e) ) { warn(eMsg1("redefinition of action for %s; ignored",e)); } else if ( a==NULL ) { warn(eMsg1("redefinition of expr %s; ignored",e)); } } if ( a!=NULL ) setHasAction(e, a); } } >>