home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 10 Tools
/
10-Tools.zip
/
pccts1.zip
/
ANTLR
/
LEX.C
< prev
next >
Wrap
C/C++ Source or Header
|
1993-09-02
|
15KB
|
594 lines
/*
* lex.c -- Generate all of the lexical type files: parser.dlg tokens.h
*
* $Id: lex.c,v 1.7 1993/08/24 14:44:32 pccts Exp pccts $
* $Revision: 1.7 $
*
* SOFTWARE RIGHTS
*
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool
* Set (PCCTS) -- PCCTS is in the public domain. An individual or
* company may do whatever they wish with source code distributed with
* PCCTS or the code generated by PCCTS, including the incorporation of
* PCCTS, or its output, into commerical software.
*
* We encourage users to develop software with PCCTS. However, we do ask
* that credit is given to us for developing PCCTS. By "credit",
* we mean that if you incorporate our source code into one of your
* programs (commercial product, research project, or otherwise) that you
* acknowledge this fact somewhere in the documentation, research report,
* etc... If you like PCCTS and have developed a nice tool with the
* output, please mention that you developed it using PCCTS. In
* addition, we ask that this header remain intact in our source code.
* As long as these guidelines are kept, we expect to continue enhancing
* this system and expect to make other tools available as they are
* completed.
*
* ANTLR 1.10
* Terence Parr
* Purdue University
* 1989-1993
*/
#include <stdio.h>
#include <ctype.h>
#ifdef __cplusplus
#ifndef __STDC__
#define __STDC__
#endif
#endif
#include "set.h"
#include "syn.h"
#include "hash.h"
#include "generic.h"
#define DLGErrorString "invalid token"
#ifdef __STDC__
static void dumpPredictionExpressions(FILE *);
#else
static void dumpPredictionExpressions();
#endif
/* Generate a complete lexical description of the lexemes found in the grammar */
void
#ifdef __STDC__
genLexDescr( void )
#else
genLexDescr( )
#endif
{
ListNode *p;
FILE *dlgFile = fopen(DlgFileName, "w");
require(dlgFile!=NULL, eMsg1("genLexFile: cannot open %s", DlgFileName) );
fprintf(dlgFile, "<<\n");
fprintf(dlgFile, "/* %s -- DLG Description of scanner\n", DlgFileName);
fprintf(dlgFile, " *\n");
fprintf(dlgFile, " * Generated from:");
{int i; for (i=0; i<NumFiles; i++) fprintf(dlgFile, " %s", FileStr[i]);}
fprintf(dlgFile, "\n");
fprintf(dlgFile, " *\n");
fprintf(dlgFile, " * Terence Parr, Will Cohen, and Hank Dietz: 1989-1993\n");
fprintf(dlgFile, " * Purdue University Electrical Engineering\n");
fprintf(dlgFile, " * ANTLR Version %s\n", Version);
fprintf(dlgFile, " */\n");
fprintf(dlgFile, "#include <stdio.h>\n");
fprintf(dlgFile, "#define ANTLR_VERSION %s\n", VersionDef);
if ( strcmp(ParserName, DefaultParserName)!=0 )
fprintf(dlgFile, "#define %s %s\n", DefaultParserName, ParserName);
if ( strcmp(ParserName, DefaultParserName)!=0 )
fprintf(dlgFile, "#include \"%s\"\n", RemapFileName);
if ( HdrAction != NULL ) dumpAction( HdrAction, dlgFile, 0, -1, 0, 1 );
if ( FoundGuessBlk )
{
fprintf(dlgFile, "#define ZZCAN_GUESS\n");
fprintf(dlgFile, "#include <setjmp.h>\n");
}
if ( OutputLL_k > 1 ) fprintf(dlgFile, "#define LL_K %d\n", OutputLL_k);
if ( DemandLookahead ) fprintf(dlgFile, "#define DEMAND_LOOK\n");
fprintf(dlgFile, "#include \"antlr.h\"\n");
if ( GenAST ) fprintf(dlgFile, "#include \"ast.h\"\n");
fprintf(dlgFile, "#include \"%s\"\n", DefFileName);
fprintf(dlgFile, "#include \"dlgdef.h\"\n");
fprintf(dlgFile, "LOOKAHEAD\n");
fprintf(dlgFile, "void zzerraction()\n");
fprintf(dlgFile, "{\n");
fprintf(dlgFile, "\t(*zzerr)(\"%s\");\n", DLGErrorString);
fprintf(dlgFile, "\tzzadvance();\n");
fprintf(dlgFile, "\tzzskip();\n");
fprintf(dlgFile, "}\n>>\n\n");
/* dump all actions */
if (LexActions != NULL)
{
for (p = LexActions->next; p!=NULL; p=p->next)
{
fprintf(dlgFile, "<<\n");
dumpAction( (char *)p->elem, dlgFile, 0, -1, 0, 1 );
fprintf(dlgFile, ">>\n");
}
}
/* dump all regular expression rules/actions (skip sentinel node) */
if ( ExprOrder == NULL ) {
warnNoFL("no regular expressions found in grammar");
}
else dumpLexClasses(dlgFile);
fprintf(dlgFile, "%%%%\n");
fclose( dlgFile );
}
/* For each lexical class, scan ExprOrder looking for expressions
* in that lexical class. Print out only those that match.
* Each element of the ExprOrder list has both an expr and an lclass
* field.
*/
void
#ifdef __STDC__
dumpLexClasses( FILE *dlgFile )
#else
dumpLexClasses( dlgFile )
FILE *dlgFile;
#endif
{
int i;
TermEntry *t;
ListNode *p;
Expr *q;
for (i=0; i<NumLexClasses; i++)
{
fprintf(dlgFile, "\n%%%%%s\n\n", lclass[i].classnum);
for (p=ExprOrder->next; p!=NULL; p=p->next)
{
q = (Expr *) p->elem;
if ( q->lclass != i ) continue;
lexmode(i);
t = (TermEntry *) hash_get(Texpr, q->expr);
require(t!=NULL, eMsg1("genLexDescr: rexpr %s not in hash table",q->expr) );
if ( t->token == EpToken ) continue;
fprintf(dlgFile, "%s\n\t<<\n", StripQuotes(q->expr));
/* replace " killed by StripQuotes() */
q->expr[ strlen(q->expr) ] = '"';
if ( TokenStr[t->token] != NULL )
fprintf(dlgFile, "\t\tNLA = %s;\n", TokenStr[t->token]);
else fprintf(dlgFile, "\t\tNLA = %d;\n", t->token);
if ( t->action != NULL ) dumpAction( t->action, dlgFile, 2,-1,0,1 );
fprintf(dlgFile, "\t>>\n\n");
}
}
}
/* Generate a list of #defines && list of struct definitions for
* aggregate retv's */
void
#ifdef __STDC__
genDefFile( void )
#else
genDefFile( )
#endif
{
int i;
DefFile = fopen(DefFileName, "w");
require(DefFile!=NULL, eMsg1("genDefFile: cannot open %s", DefFileName) );
fprintf(DefFile, "/* %s -- List of labelled tokens and stuff\n", DefFileName);
fprintf(DefFile, " *\n");
fprintf(DefFile, " * Generated from:");
for (i=0; i<NumFiles; i++) fprintf(DefFile, " %s", FileStr[i]);
fprintf(DefFile, "\n");
fprintf(DefFile, " *\n");
fprintf(DefFile, " * Terence Parr, Will Cohen, and Hank Dietz: 1989-1993\n");
fprintf(DefFile, " * Purdue University Electrical Engineering\n");
fprintf(DefFile, " * ANTLR Version %s\n", Version);
fprintf(DefFile, " */\n");
if ( TokenStr[EofToken]!=NULL )
fprintf(DefFile, "#define %s %d\n", TokenStr[EofToken], EofToken);
for (i=TokenStart; i<TokenNum; i++)
{
if ( TokenStr[i]!=NULL && i != EpToken )
{
TermEntry *p;
require((p=(TermEntry *)hash_get(Tname, TokenStr[i])) != NULL,
"token not in sym tab when it should be");
if ( !p->errclassname )
{
fprintf(DefFile, "#define %s %d\n", TokenStr[i], i);
}
}
}
GenRulePrototypes(DefFile, SynDiag);
}
void
#ifdef __STDC__
GenRemapFile( void )
#else
GenRemapFile( )
#endif
{
if ( strcmp(ParserName, DefaultParserName)!=0 )
{
FILE *f;
int i;
f = fopen(RemapFileName, "w");
require(f!=NULL, eMsg1("GenRemapFile: cannot open %s", RemapFileName) );
fprintf(f, "/* %s -- List of symbols to remap\n", RemapFileName);
fprintf(f, " *\n");
fprintf(f, " * Generated from:");
for (i=0; i<NumFiles; i++) fprintf(f, " %s", FileStr[i]);
fprintf(f, "\n");
fprintf(f, " *\n");
fprintf(f, " * Terence Parr, Will Cohen, and Hank Dietz: 1989-1993\n");
fprintf(f, " * Purdue University Electrical Engineering\n");
fprintf(f, " * ANTLR Version %s\n", Version);
fprintf(f, " */\n");
GenRuleFuncRedefs(f, SynDiag);
GenPredefinedSymbolRedefs(f);
if ( GenAST ) GenASTSymbolRedefs(f);
GenSetRedefs(f);
fclose(f);
}
}
/* Generate a bunch of #defines that rename all functions to be "ParserName_func" */
void
#ifdef __STDC__
GenRuleFuncRedefs( FILE *f, Junction *p )
#else
GenRuleFuncRedefs( f, p )
FILE *f;
Junction *p;
#endif
{
fprintf(f, "\n/* rename rule functions to be 'ParserName_func' */\n");
while ( p!=NULL )
{
fprintf(f, "#define %s %s_%s\n", p->rname, ParserName, p->rname);
p = (Junction *)p->p2;
}
}
/* Generate a bunch of #defines that rename all standard symbols to be
* "ParserName_symbol". The list of standard symbols to change is in
* globals.c.
*/
void
#ifdef __STDC__
GenPredefinedSymbolRedefs( FILE *f )
#else
GenPredefinedSymbolRedefs( f )
FILE *f;
#endif
{
char **p;
fprintf(f, "\n/* rename PCCTS-supplied symbols to be 'ParserName_symbol' */\n");
for (p = &StandardSymbols[0]; *p!=NULL; p++)
{
fprintf(f, "#define %s %s_%s\n", *p, ParserName, *p);
}
}
/* Generate a bunch of #defines that rename all AST symbols to be
* "ParserName_symbol". The list of AST symbols to change is in
* globals.c.
*/
void
#ifdef __STDC__
GenASTSymbolRedefs( FILE *f )
#else
GenASTSymbolRedefs( f )
FILE *f;
#endif
{
char **p;
fprintf(f, "\n/* rename PCCTS-supplied AST symbols to be 'ParserName_symbol' */\n");
for (p = &ASTSymbols[0]; *p!=NULL; p++)
{
fprintf(f, "#define %s %s_%s\n", *p, ParserName, *p);
}
}
/* redefine all sets generated by ANTLR; WARNING: 'zzerr', 'setwd' must match
* use in bits.c (DumpSetWd() etc...)
*/
void
#ifdef __STDC__
GenSetRedefs( FILE *f )
#else
GenSetRedefs( f )
FILE *f;
#endif
{
int i;
for (i=1; i<=wordnum; i++)
{
fprintf(f, "#define setwd%d %s_setwd%d\n", i, ParserName, i);
}
for (i=1; i<=esetnum; i++)
{
fprintf(f, "#define zzerr%d %s_err%d\n", i, ParserName, i);
}
}
/* Find all return types/parameters that require structs and def
* all rules with ret types.
*/
void
#ifdef __STDC__
GenRulePrototypes( FILE *f, Junction *p )
#else
GenRulePrototypes( f, p )
FILE *f;
Junction *p;
#endif
{
int i;
i = 1;
while ( p!=NULL )
{
if ( p->ret != NULL )
{
if ( HasComma(p->ret) )
{
DumpRetValStruct(f, p->ret, i);
}
fprintf(f, "\n#ifdef __STDC__\n");
if ( HasComma(p->ret) )
{
fprintf(f, "extern struct _rv%d", i);
}
else
{
fprintf(f, "extern ");
DumpType(p->ret, f);
}
fprintf(f, " %s%s(", RulePrefix, p->rname);
if ( p->pdecl != NULL || GenAST )
{
if ( GenAST ) fprintf(f, "AST **%s",(p->pdecl!=NULL)?",":"");
if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl);
}
else fprintf(f, "void");
fprintf(f, ");\n");
fprintf(f, "#else\n");
if ( HasComma(p->ret) )
{
fprintf(f, "extern struct _rv%d", i);
}
else
{
fprintf(f, "extern ");
DumpType(p->ret, f);
}
fprintf(f, " %s%s();\n", RulePrefix, p->rname);
fprintf(f, "#endif\n");
}
else
{
fprintf(f, "\n#ifdef __STDC__\n");
fprintf(f, "void %s%s(", RulePrefix, p->rname);
if ( p->pdecl != NULL || GenAST )
{
if ( GenAST ) fprintf(f, "AST **%s",(p->pdecl!=NULL)?",":"");
if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl);
}
else fprintf(f, "void");
fprintf(f, ");\n");
fprintf(f, "#else\n");
fprintf(f, "extern %s%s();\n", RulePrefix, p->rname);
fprintf(f, "#endif\n");
}
i++;
p = (Junction *)p->p2;
}
}
/* Given a list of ANSI-style parameter declarations, print out a
* comma-separated list of the symbols (w/o types).
* Basically, we look for a comma, then work backwards until start of
* the symbol name. Then print it out until 1st non-alnum char. Now,
* move on to next parameter.
*/
void
#ifdef __STDC__
DumpListOfParmNames( char *pdecl, FILE *output )
#else
DumpListOfParmNames( pdecl, output )
char *pdecl;
FILE *output;
#endif
{
int firstTime = 1, done = 0;
require(output!=NULL, "DumpListOfParmNames: NULL parm");
if ( pdecl == NULL ) return;
while ( !done )
{
if ( !firstTime ) putc(',', output);
done = DumpNextNameInDef(&pdecl, output);
firstTime = 0;
}
}
/* given a list of parameters or return values, dump the next
* name to output. Return 1 if last one just printed, 0 if more to go.
*/
int
#ifdef __STDC__
DumpNextNameInDef( char **q, FILE *output )
#else
DumpNextNameInDef( q, output )
char **q;
FILE *output;
#endif
{
char *p = *q; /* where did we leave off? */
int done=0;
while ( *p!='\0' && *p!=',' ) p++; /* find end of decl */
if ( *p == '\0' ) done = 1;
while ( !isalnum(*p) && *p!='_' ) --p; /* scan back until valid var character */
while ( isalnum(*p) || *p=='_' ) --p; /* scan back until beginning of variable */
p++; /* move to start of variable */
while ( isalnum(*p) || *p=='_' ) {putc(*p, output); p++;}
while ( *p!='\0' && *p!=',' ) p++; /* find end of decl */
p++; /* move past this parameter */
*q = p; /* record where we left off */
return done;
}
/* Given a list of ANSI-style parameter declarations, dump K&R-style
* declarations, one per line for each parameter. Basically, convert
* comma to semi-colon, newline.
*/
void
#ifdef __STDC__
DumpOldStyleParms( char *pdecl, FILE *output )
#else
DumpOldStyleParms( pdecl, output )
char *pdecl;
FILE *output;
#endif
{
require(output!=NULL, "DumpOldStyleParms: NULL parm");
if ( pdecl == NULL ) return;
while ( *pdecl != '\0' )
{
if ( *pdecl == ',' )
{
pdecl++;
putc(';', output); putc('\n', output);
while ( *pdecl==' ' || *pdecl=='\t' || *pdecl=='\n' ) pdecl++;
}
else {putc(*pdecl, output); pdecl++;}
}
putc(';', output);
putc('\n', output);
}
/* Take in a type definition (type + symbol) and print out type only */
void
#ifdef __STDC__
DumpType( char *s, FILE *f )
#else
DumpType( s, f )
char *s;
FILE *f;
#endif
{
char *p, *end;
require(s!=NULL, "DumpType: invalid type string");
p = &s[strlen(s)-1]; /* start at end of string and work back */
/* scan back until valid variable character */
while ( !isalnum(*p) && *p!='_' ) --p;
/* scan back until beginning of variable */
while ( isalnum(*p) || *p=='_' ) --p;
if ( p<=s )
{
warnNoFL(eMsg1("invalid parameter/return value: '%s'",s));
return;
}
end = p; /* here is where we stop printing alnum */
p = s;
while ( p!=end ) {putc(*p, f); p++;} /* dump until just before variable */
while ( *p!='\0' ) /* dump rest w/o variable */
{
if ( !isalnum(*p) && *p!='_' ) putc(*p, f);
p++;
}
}
/* check to see if string e is a word in string s */
int
#ifdef __STDC__
strmember( char *s, char *e )
#else
strmember( s, e )
char *s;
char *e;
#endif
{
register char *p;
require(s!=NULL&&e!=NULL, "strmember: NULL string");
if ( *e=='\0' ) return 1; /* empty string is always member */
do {
while ( *s!='\0' && !isalnum(*s) && *s!='_' )
++s;
p = e;
while ( *p!='\0' && *p==*s ) {p++; s++;}
if ( *p=='\0' ) {
if ( *s=='\0' ) return 1;
if ( !isalnum (*s) && *s != '_' ) return 1;
}
while ( isalnum(*s) || *s == '_' )
++s;
} while ( *s!='\0' );
return 0;
}
int
#ifdef __STDC__
HasComma( char *s )
#else
HasComma( s )
char *s;
#endif
{
while (*s!='\0')
if ( *s++ == ',' ) return 1;
return 0;
}
void
#ifdef __STDC__
DumpRetValStruct( FILE *f, char *ret, int i )
#else
DumpRetValStruct( f, ret, i )
FILE *f;
char *ret;
int i;
#endif
{
fprintf(f, "\nstruct _rv%d {\n", i);
while ( *ret != '\0' )
{
while ( *ret==' ' || *ret=='\t' ) ret++; /* ignore white */
putc('\t', f);
while ( *ret!=',' && *ret!='\0' ) putc(*ret++, f);
if ( *ret == ',' ) {putc(';', f); putc('\n', f); ret++;}
}
fprintf(f, ";\n};\n");
}
/* given "s" yield s -- DESTRUCTIVE (we modify s if starts with " else return s) */
char *
#ifdef __STDC__
StripQuotes( char *s )
#else
StripQuotes( s )
char *s;
#endif
{
if ( *s == '"' )
{
s[ strlen(s)-1 ] = '\0'; /* remove last quote */
return( s+1 ); /* return address past initial quote */
}
return( s );
}