home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The World of Computer Software
/
World_Of_Computer_Software-02-385-Vol-1of3.iso
/
p
/
pccts.zip
/
lang
/
c
/
decl.g
next >
Wrap
Text File
|
1992-12-08
|
15KB
|
564 lines
/*
* ANSI C recognizer
*
* Gives some error messages for semantics, but this grammar
* checks mostly syntax. We make no claim that it rigorously follows
* the ANSI C standard, but it's a good start.
*
* Type trees are constructed and maintained in the symbol table.
* Expression trees are constructed and then thrown away. The
* user can presumably do something more useful with them.
*
* Requires PCCTS Version 1.00
*
* Terence Parr
* July 1991
*/
#header <<
#define D_TextSize 20
#include "charbuf.h"
#include "type.h"
#include "sym.h"
#include "proto.h"
>>
#token "[\ \t]+" << zzskip(); >>
#token "\n" << zzline++; zzskip(); >>
#token "#line [\ \t]+ [0-9]+ ~[\n]*\n"
<< zzline = atoi(zzlextext+5); zzskip(); >>
#token "# [\ \t]+ [0-9]+ ~[\n]*\n"
<< zzline = atoi(zzlextext+1); zzskip(); >>
#token "\"" << zzmode(STRINGS); zzmore(); >>
#token "'" << zzmode(CHARACTERS); zzmore(); >>
/* these tokens are used as node types, but not referenced in grammar */
#token Var
#token Func
#token FuncCall
#token Label
#token PostInc
#token PostDec
#token StructPtrRef
#token StructRef
#token AggrTag
#lexclass STRINGS
#token STRING "\"" << zzmode(START); >>
#token "\\\"" << zzmore(); >>
#token "\\n" << zzreplchar('\n'); zzmore(); >>
#token "\\r" << zzreplchar('\r'); zzmore(); >>
#token "\\t" << zzreplchar('\t'); zzmore(); >>
#token "\\[1-9][0-9]*"
<< zzreplchar((char)strtol(zzbegexpr,NULL,10)); zzmore(); >>
#token "\\0[0-7]*" << zzreplchar((char)strtol(zzbegexpr,NULL,8)); zzmore(); >>
#token "\\0x[0-9]+" << zzreplchar((char)strtol(zzbegexpr,NULL,16)); zzmore(); >>
#token "\\~[\n\r]" << zzmore(); >>
#token "[\n\r]" << zzline++; zzmore(); /* print warning about \n in str */>>
#token "~[\"\n\r\\]+"<< zzmore(); >>
#lexclass CHARACTERS
#token CHARACTER "'" << zzmode(START); >>
#token "\\'" << zzmore(); >>
#token "\\n" << zzreplchar('\n'); zzmore(); >>
#token "\\r" << zzreplchar('\r'); zzmore(); >>
#token "\\t" << zzreplchar('\t'); zzmore(); >>
#token "\\[1-9][0-9]*"
<< zzreplchar((char)strtol(zzbegexpr,NULL,10)); zzmore(); >>
#token "\\0[0-7]*" << zzreplchar((char)strtol(zzbegexpr,NULL,8)); zzmore(); >>
#token "\\0x[0-9]+" << zzreplchar((char)strtol(zzbegexpr,NULL,16)); zzmore(); >>
#token "\\~[\n\r]" << zzmore(); >>
#token "[\n\r]" << zzline++; zzmore(); /* print warning about \n in str */>>
#token "~[\'\n\r\\]"<< zzmore(); >>
#lexclass START
#token OCT_NUM "[0][0-7]*"
#token L_OCT_NUM "[0][0-7]*[Ll]"
#token INT_NUM "[1-9][0-9]*"
#token L_INT_NUM "[1-9][0-9]*[Ll]"
#token HEX_NUM "[0][Xx][0-9A-Fa-f]+"
#token L_HEX_NUM "[0][Xx][0-9A-Fa-f]+[Ll]"
#token FNUM "([1-9][0-9]*{.[0-9]*} | {[0]}.[0-9]+) {[Ee]{[\+\-]}[0-9]+}"
#token PreInc "\+\+"
#token PreDec "\-\-"
#token LPAREN "\("
#token LBRACK "\["
#token SizeOf "sizeof"
globals!: <<AST *base, *t; Sym *p;
zzs_scope(&Globals);>>
( <<;>>
<<Params=NULL;>> decl[GLOBAL]
| <<Params=NULL;>>
<<base = #[BaseTypeQ,scNone,0,tInt,NULL];>>
declarator[base]
<<handleSymbol(scNone, $1.text, #1, NULL, GLOBAL);
t = defineArgs(#1, &Params);
>>
func_def[t]
<<english( #(#[SymQ,$1.text,#2], #1) );>>
<<Proto($1.text, #1);
p = zzs_rmscope(&Params);
pScope(p, "parameters\n");
>>
)*
<<p = zzs_rmscope(&Globals);
pScope(p, "globals\n");
ProtoVars(p);>>
"@"
;
/* d e c l -- recognize a declaration or definition.
*
* We handle typedefs in a bizarre way. WORD's are converted
* to TypeName's inside the lexical action for token WORD. So,
* because of the lookahead, we need to get a TypeName into
* the symbol table before the lookahead can get a reference
* to this. e.g. "typedef int I; I i;" We actually add the typedef
* name to the symbol table when we see its definition in
* rule declaration and friends. Aggregate tags are handled in a
* similar fashion by adding them to the symbol table as they
* are declared.
*
* functions definitions always have a FunctionQ node at the root
* of the declarator since anything in front would make a pointer to
* a function or whatever. e.g. int *f(); --> () * int --> "function
* returning pointer to integer." Or, int (*f)() --> * () int -->
* "pointer to function returning integer." The first is a function
* symbol, the 2nd is a variable.
*/
decl![int level]
: <<int sc=scNone, t=tInt, cv=cvNone, typ;
AST *base, *d, *init=NULL, *tr;
char *w;
Sym *n=NULL, *p;>>
( (sclass[&sc] | typeq[&cv])+
( type[&t] <<base = #[BaseTypeQ,cv,sc,t,$1.text];>>
| aggr[sc,cv] <<base = #1;>>
| enum_def <<base = #1;>>
| <<base = #[BaseTypeQ,cv,sc,tInt,NULL];>>
)
| type[&t] <<base = #[BaseTypeQ,cvNone,scNone,t,$1.text];>>
| aggr[scNone,cvNone] <<base = #1;>>
| enum_def <<base = #1;>>
)
( declarator[base] <<d=#1; w=$1.text;>>
( { <<init=NULL;>> "=" initialize <<init=#2;>> }
<<if ( d->nodeType == FunctionQ ) {
sc |= scExtern;
bottom(d)->data.t.sc |= scExtern;
}
handleSymbol(sc, w, d, init, $level);>>
( <<english( #(#[SymQ,w,init], d) );>>
","
declarator[base]
{ <<init=NULL;>> "=" initialize <<init=#2;>> }
<<english( #(#[SymQ,$2.text,init], #2) );>>
<<if ( #2->nodeType == FunctionQ ) {
sc |= scExtern;
bottom(#2)->data.t.sc |= scExtern;
}
handleSymbol(sc, $2.text, #2, init, $level);>>
)*
<<
if ( base->data.t.type==tStruct ||
base->data.t.type==tUnion ||
base->data.t.type==tEnum )
{
if ( base->data.t.name != NULL )
{
p = zzs_get(base->data.t.name);
if ( p!=NULL ) p->level = $level;
}
}
>>
";"
| <<
handleSymbol(sc, w, d, init, $level);
tr = defineArgs(d, &Params);
>>
func_def[tr]
<<english( #(#[SymQ,w,#1], d) );>>
<<Proto(w, d);
p = zzs_rmscope(&Params);
pScope(p, "block\n");
>>
)
| ";"
<<english( base );>>
<<if ( base->data.t.type==tStruct ||
base->data.t.type==tUnion ||
base->data.t.type==tEnum )
{
p = zzs_get(base->data.t.name);
if ( p!=NULL ) p->level = $level;
}
else
error("missing declarator");
>>
)
;
sclass![int *sc]
: "auto" <<*$sc |= scAuto;>>
| "static" <<*$sc |= scStatic;>>
| "register" <<*$sc |= scRegister;>>
| "extern" <<*$sc |= scExtern;>>
| "typedef" <<*$sc |= scTypedef;>>
;
typeq![int *cv]
: "const" <<*$cv |= cvConst;>>
| "volatile" <<*$cv |= cvVolatile;>>
;
type![int *t]
: t1[t] <<$type = $1;>>
;
t1![int *type]
: ( "unsigned" <<*$type = tUnsigned;>>
| "signed" <<*$type = tSigned;>>
)
( "char" <<*$type |= tChar;>>
| { "short" <<*$type |= tShort;>>
| "long" <<*$type |= tLong;>>
}
{ "int" <<*$type |= tInt;>>
}
)
| ( "short" <<*$type = tShort;>>
{ "int" <<*$type |= tInt;>>
}
| "long" <<*$type = tLong;>>
{ "int" <<*$type |= tInt;>>
| "float" <<*$type |= tFloat;>>
| "double" <<*$type |= tDouble;>>
}
)
| "void" <<*$type = tVoid;>>
| "char" <<*$type = tChar;>>
| "int" <<*$type = tInt;>>
| "float" <<*$type = tFloat;>>
| "double" <<*$type = tDouble;>>
| TypeName <<*$type = tTypeName; $t1 = $1;>>
;
/* D e c l a r a t o r */
/*
* Build a declarator by appending the base to the bottom of the type-tree
* matched in dcltor1. We pass the storage class to dcltor1 in case
* we have a typedef on our hands which needs to be added to the symbol
* table ASAP.
*/
declarator![AST *base]
: dcltor1[bottom($base)] <<#(bottom(#1), $base);
#0 = (#1==NULL)?$base:#1;
$declarator = $1;>>
;
/*
* Match *D1 or D2. Build type-trees for PointerQ (pointer qualifier)
* via:
*
* #0 = D1
* |
* v
* *
*
* where D? is dcltor? in this grammar.
*/
dcltor1![AST *base]
: <<AST *t; int cv=0;>>
"\*"
{ "const" <<cv=cvConst;>>
| "volatile" <<cv=cvVolatile;>>
} <<t = #[PointerQ,cv];>>
dcltor1[$base] <<#(bottom(#3), t); #0=(#3==NULL)?t:#3;
$dcltor1 = $3;>>
| dcltor2[$base] <<#0 = #1; $dcltor1 = $1;>>
;
/*
* For WORD D3 we return the following
*
* $$ = WORD recognized.
* #0 = D3 (array or func modifier)
*
* For ( D1 ) we return
*
* $$ = WORD recognized in D1.
* #0 = D1 (put stuff in (..) above [] or ())
* |
* v
* D3
*
* For instance, (*f)() yields
*
* $$ = f
* #0 = * (pointer to)
* |
* v
* ( ) (a function)
*
* If storage class is scTypedef, we need to add it to the symbol table.
*/
dcltor2![AST *base] /* pass in storage class for typedefs */
: <<AST *t; Sym *n;>>
WORD <<if ( $base->data.t.sc&scTypedef )
addsym(TypeName,$1.text,0,NULL,NULL);
>>
dcltor3 <<#0 = #2; $dcltor2 = $1;>>
| "\(" dcltor1[$base] "\)" <<$dcltor2 = $2;>>
dcltor3 <<#(bottom(#2), #4); #0=(#2==NULL)?#4:#2;>>
;
/*
* return #0 = [expr] or = [nodimension]
* or ( ) --> arg1 --> ... --> argn for a function
*
* multiple [1][2][3] yields
*
* #0 = [1] (an 1-element array of)
* |
* v
* [2] (2-element arrays of)
* |
* v
* [3] (3-element arrays)
*
*/
dcltor3!: "\[" expr1 "\]" dcltor3 <<#0 = #( #[ArrayQ,#2], #4 );>>
| "\[" "\]" dcltor3 <<#0 = #( #[ArrayQ,NULL], #3 );>>
| "\(" args "\)" <<#0 = #(NULL, #[FunctionQ], #2);>>
| <<#0 = NULL;>>
;
/*
* match a list of arguments.
*
* The arguments are siblings of the FunctionQ node in the type
* tree. e.g.
*
* [FunctionQ]-->[arg1]--> ... -->[argn]
* | |
* v v
* [type1] [type1]
*/
args! : <<AST *t;>>
arg <<t=#1;>>
( "," arg <<t = #(NULL, t, #2);>>
)*
{ "," "..." <<t=#(NULL,t,#[BaseTypeQ,0,0,tEllipsis,NULL]);>>
}
<<#0 = t;>>
|
;
arg! : typename <<#0 = #1;>>
| WORD <<#0 = #[SymQ,$1.text,NULL];>>
;
/*
* match a typename -- (used in type-casting and function prototypes).
* Type-trees look the same as those for decl. But, a symbol is
* optional here because they can be used in argument lists.
*/
typename!: <<int cv, t=tInt; AST *base, *tr=NULL;>>
( (typeq[&cv])+
{ type[&t] <<base = #[BaseTypeQ,cv,0,t,$1.text];>>
| aggr[scNone,cv] <<base = #1;>>
}
| type[&t] <<base = #[BaseTypeQ,0,0,t,$1.text];>>
| aggr[scNone,cvNone] <<base = #1;>>
)
tdecl[base] <<if ($2.text[0]!='\0') tr=#[SymQ,$2.text,NULL];
#0=#(tr, #2);>>
;
/* A g g r e g a t e s */
/*
* match an enum definition; yield following tree:
*
* [BaseTypeQ] --> [elem1] --> ... --> [elemn]
*/
enum_def!: <<AST *base;>>
"enum" WORD <<base=#[BaseTypeQ,0,0,tEnum,$2.text];>>
enum_lst <<#0 = #(NULL, base, #3);>>
;
/*
* match a list of enumeration elements.
*
* The symbols are siblings of each other:
*
* [elem1] --> ... --> [elemn]
*
* If an element has an initialization, store a pointer to it in the
* AST node.
*/
enum_lst!: <<AST *list, *init=NULL;>>
"\{"
WORD
{ "=" expr1 <<init = #2;>>
} <<list = #[SymQ,$2.text, init];>>
( ","
WORD
{ <<init=NULL;>>
"=" expr1 <<init=#2;>>
} <<list = #(NULL,list,#[SymQ,$2.text, init]);>>
)*
"\}"
<<#0 = list;>>
| <<#0 = NULL;>>
;
/*
* Match a struct/union def.
* Return a tree like this:
*
* [BaseTypeQ]-->[fld1]--> ... -->[fldn]
* | |
* v v
* [type1] [type1]
*
* BUG: Allows two structs to have same name
*/
aggr![int sc, int cv]
: <<AST *tr, *base; int t; Sym *typ;>>
( "struct" <<t=tStruct;>>
| "union" <<t=tUnion;>>
) <<base = #[BaseTypeQ,$cv,$sc,t,NULL];>>
( ( WORD <<base->data.t.name = strdup($1.text);>>
| TypeName <<base->data.t.name = strdup($1.text);>>
)
( ag[base] <<#0 = #(NULL, base, #1);
addsym(AggrTag, base->data.t.name,
0, base, NULL);
>>
| <<#0 = base;>>
)
| ag[base] <<#0 = #(NULL, base, #1);>>
)
;
/*
* match a field list for a struct/union
*
* The fields are siblings of each other:
*
* [fld1] --> ... --> [fldn]
* | |
* v v
* [type1] [type1]
*
*/
ag![AST *base]
: <<AST *t=NULL;>>
"\{" fdef[$base] <<#0=t=#2;>>
( fdef[$base] <<#(NULL, t, #1); t = #1;>>
)*
"\}"
;
/*
* Match one field definition; make the following tree
*
* [FieldQ]
* |
* v
* [type1]
*/
fdef![AST *base]
: <<int t=tInt; AST *f, *g;>>
( type[&t] <<base = #[BaseTypeQ,0,0,t,$1.text];>>
| aggr[scNone,cvNone] <<base = #1;>>
)
field[$base] <<f = #(#[FieldQ,$2.text], #2);>>
( "," field[$base]<<g = #(#[FieldQ,$2.text], #2);
f = #(NULL, f, g);>>
)*
";"
<<#0 = f;>>
;
/* bitfields are recognized, but not handled 'cause not too many people
* use them
*/
field![AST *base]
: declarator[$base] { ":" expr1 } <<#0=#1; $field = $1;>>
| ":" expr1
;
/* T y p e N a m e */
tdecl![AST *base]
: tdecl1 <<#(bottom(#1), $base);
#0 = (#1==NULL)?$base:#1; $tdecl=$1;>>
;
tdecl1! : <<AST *t; int cv=0;>>
"\*"
{ "const" <<cv=cvConst;>>
| "volatile" <<cv=cvVolatile;>>
} <<t = #[PointerQ,cv];>>
tdecl1 <<#(bottom(#3), t); #0=(#3==NULL)?t:#3;
$tdecl1 = $3;>>
| tdecl2 <<#0 = #1; $tdecl1 = $1;>>
;
tdecl2! : <<AST *t=NULL; $tdecl2.text[0] = '\0';>>
"\(" tdecl1 "\)" <<$tdecl2 = $2;>>
tdecl3 <<#(bottom(#2), #4); #0=(#2==NULL)?#4:#2;>>
| WORD tdecl3 <<$tdecl2 = $1; #0 = #2;>>
| tdecl3 <<#0 = #1;>>
;
tdecl3! : "\[" expr1 "\]" tdecl3<<#0 = #( #[ArrayQ,#2], #4 );>>
| "\[" "\]" tdecl3<<#0 = #( #[ArrayQ,NULL], #3 );>>
| "\(" args "\)" <<#0 = #( NULL, #[FunctionQ], #2 );>>
|
;
/* I n i t e x p r e s s i o n s */
initialize
: init2
| expr0
;
/* Build an initialization expression-tree of the form:
*
* Single-dimensioned array or structure:
*
* "{"
* |
* v
* [exp1] --> ... --> [expn]
*
* Nested structure or multi-dim array:
*
* "{"
* |
* v
* "{" --> ... --> "{"
* | ...
* v ...
* [exp1] --> ... --> [expn]
*/
init2 : "\{"^ init3 ( ","! init3 )* {","!} "\}"!
;
init3 : init2
| expr1
;