home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.ee.pdx.edu
/
2014.02.ftp.ee.pdx.edu.tar
/
ftp.ee.pdx.edu
/
pub
/
users
/
Harry
/
compilers
/
yapp
/
lexer.c
< prev
next >
Wrap
C/C++ Source or Header
|
2003-05-23
|
15KB
|
416 lines
/* lexer.c
**
** The PCAT lexer with modifications for use as a YAPP preprocessor.
**
** Harry Porter, 10/8/98.
** 11/4/98 - YAPP modifications.
**
** This file provides the routine getToken() which returns a single token
** for every call. It returns zero after the source code file
** has been exhausted. It reads from stdin.
**
** This file also contains calls to lexError().
*/
#include <stdio.h>
#include <stdarg.h>
#include <float.h>
#include <string.h>
#include "lexer.h"
int getToken (void);
void lexError (char *msg);
void initKeywords ();
char *strsave (char *str);
union tokenValue tokenValue;
int currentLine;
int errorsDetected;
#define MAX_STR_LEN 255 /* Strings are limited to 255 characters. */
#define NUM_KEYWORDS 90
char * keywords [NUM_KEYWORDS+1];
int keywordTokens [NUM_KEYWORDS+1];
int tableInitialized = 0;
/* getToken ()
**
** Scan the next token and return it. Side-effects tokenVal and
** currentLine.
*/
int getToken (void) {
int ch, ch2;
int intVal, t, intOverflow, realOverflow;
double exp, realVal;
char lexError2 [] = "Illegal character xxxxxxx in string ignoredxxxxxx";
char buffer [MAX_STR_LEN+1]; /* buffer for saving a string */
int next, i; /* index into buffer */
while (1) {
ch = getchar ();
/* Process enf-of-file... */
if (ch == EOF) {
return 0;
/* Process newline... */
} else if (ch == '\n') {
currentLine ++;
if (currentLine > 0) {
printf ("\n-%d ", currentLine);
}
/* Process other white space... */
} else if (ch == ' ' || ch == '\t') {
/* do nothing */
/* Process left-parenthesis and comments... */
} else if (ch == '(') {
ch2 = getchar ();
if (ch2 != '*') { /* Just a left-parenthesis... */
ungetc (ch2, stdin);
return '(';
} else { /* A comment... */
ch2 = ' ';
do {
ch = ch2;
ch2 = getchar ();
if (ch2 == EOF) {
lexError ("End-of-file encountered within a comment");
ungetc (ch2, stdin);
return 0;
} else if (ch2 == '\n') {
currentLine++;
}
} while (ch != '*' || ch2 != ')');
}
/* Process strings... */
} else if (ch == '"') {
next = 0;
while (1) {
ch2 = getchar ();
if (ch2 == '"') {
break;
} else if (ch2 == '\n') {
lexError ("End-of-line encountered within a string");
ungetc (ch2, stdin);
break;
} else if (ch2 == 0) {
lexError ("EOF encountered within a string--VALID MESSAGE?");
ungetc (ch2, stdin);
break;
} else if ((ch2 < 32) || (ch2 > 126)) {
sprintf (lexError2,
"Illegal character \\%03o in string ignored", ch2);
lexError (lexError2);
} else {
if (next >= MAX_STR_LEN) {
lexError ("Maximum string length (255) exceeded");
} else {
buffer [next++] = ch2;
}
}
}
buffer [next++] = '\0';
tokenValue.svalue = strsave (buffer);
return STRING;
/* Process identifiers... */
} else if (isalpha (ch)) {
next = 0;
while (isalpha (ch) || isdigit (ch)) {
if (next >= MAX_STR_LEN) {
lexError ("Maximum identifier length (255) exceeded");
} else {
buffer [next++] = ch;
}
ch = getchar ();
}
ungetc (ch, stdin);
buffer [next++] = '\0';
tokenValue.svalue = strsave (buffer);
if (!tableInitialized) {
initKeywords ();
tableInitialized = 1;
}
for (i = 0; i <= NUM_KEYWORDS; i++) {
if (tokenValue.svalue == keywords [i]) {
return keywordTokens [i];
}
}
return ID;
/* Process numbers... */
} else if (isdigit (ch)) {
next = intVal = intOverflow = realOverflow= 0;
exp = 1.0;
realVal = 0.0;
while (isdigit (ch)) {
t = intVal * 10 + (ch - '0');
if (t < intVal) {
intOverflow = 1;
}
intVal = t;
realVal = (realVal * 10.0) + (double) (ch - '0');
if (realVal > DBL_MAX) {
realOverflow = 1;
}
ch = getchar ();
}
/* If no decimal point, this is an integer so return. */
if (ch != '.') {
ungetc (ch, stdin);
if (intOverflow) {
lexError ("Integer out of range (0..2147483647)");
intVal = 0;
}
tokenValue.ivalue = intVal;
return INTEGER;
}
/* We have a decimal number; scan the fractional part. */
ch = getchar ();
while (isdigit (ch)) {
exp *= 10.0;
realVal = realVal + ((float) (ch - '0') / exp);
ch = getchar ();
}
ungetc (ch, stdin);
if (realOverflow) {
lexError ("Real number is too large");
tokenValue.rvalue = 0.0;
} else {
tokenValue.rvalue = realVal;
}
return REAL;
/* Check for : and := ... */
} else if (ch == ':') {
ch2 = getchar ();
if (ch2 == '=') {
return ASSIGN;
} else {
ungetc (ch2, stdin);
return ':';
}
/* Check for > >= >] ... */
} else if (ch == '>') {
ch2 = getchar ();
if (ch2 == '=') {
return GEQ;
} else if (ch2 == ']') {
return RBAG;
} else {
ungetc (ch2, stdin);
return '>';
}
/* Check for < <= <> ... */
} else if (ch == '<') {
ch2 = getchar ();
if (ch2 == '=') {
return LEQ;
} else if (ch2 == '>') {
return NEQ;
} else {
ungetc (ch2, stdin);
return '<';
}
/* Check for [ and [< ... */
} else if (ch == '[') {
ch2 = getchar ();
if (ch2 == '<') {
return LBAG;
} else {
ungetc (ch2, stdin);
return '[';
}
/* Check for remaining single character operator symbols */
} else if (strchr("+-*/=;,.()]{}", ch)) {
return ch;
/* Otherwise, we have an invalid character; ignore it. */
} else {
sprintf (lexError2,
"Illegal character \\%03o ignored", ch);
lexError (lexError2);
}
}
}
/* initKeywords ()
**
** This routine initializes the "keywords" and the "keywordTokens" arrays.
*/
void initKeywords () {
keywords [0] = strsave ("and"); keywordTokens [0] = AND;
keywords [1] = strsave ("array"); keywordTokens [1] = ARRAY;
keywords [2] = strsave ("begin"); keywordTokens [2] = BEGIN;
keywords [3] = strsave ("by"); keywordTokens [3] = BY;
keywords [4] = strsave ("div"); keywordTokens [4] = DIV;
keywords [5] = strsave ("do"); keywordTokens [5] = DO;
keywords [6] = strsave ("else"); keywordTokens [6] = ELSE;
keywords [7] = strsave ("elseif"); keywordTokens [7] = ELSEIF;
keywords [8] = strsave ("end"); keywordTokens [8] = END;
keywords [9] = strsave ("exit"); keywordTokens [9] = EXIT;
keywords [10] = strsave ("for"); keywordTokens [10] = FOR;
keywords [11] = strsave ("if"); keywordTokens [11] = IF;
keywords [12] = strsave ("is"); keywordTokens [12] = IS;
keywords [13] = strsave ("loop"); keywordTokens [13] = LOOP;
keywords [14] = strsave ("mod"); keywordTokens [14] = MOD;
keywords [15] = strsave ("not"); keywordTokens [15] = NOT;
keywords [16] = strsave ("of"); keywordTokens [16] = OF;
keywords [17] = strsave ("or"); keywordTokens [17] = OR;
keywords [18] = strsave ("procedure"); keywordTokens [18] = PROCEDURE;
keywords [19] = strsave ("program"); keywordTokens [19] = PROGRAM;
keywords [20] = strsave ("read"); keywordTokens [20] = READ;
keywords [21] = strsave ("record"); keywordTokens [21] = RECORD;
keywords [22] = strsave ("return"); keywordTokens [22] = RETURN;
keywords [23] = strsave ("then"); keywordTokens [23] = THEN;
keywords [24] = strsave ("to"); keywordTokens [24] = TO;
keywords [25] = strsave ("type"); keywordTokens [25] = TYPE;
keywords [26] = strsave ("var"); keywordTokens [26] = VAR;
keywords [27] = strsave ("while"); keywordTokens [27] = WHILE;
keywords [28] = strsave ("write"); keywordTokens [28] = WRITE;
keywords [29] = strsave ("E"); keywordTokens [29] = SY_E;
keywords [30] = strsave ("T"); keywordTokens [30] = SY_T;
keywords [31] = strsave ("F"); keywordTokens [31] = SY_F;
keywords [32] = strsave ("prog"); keywordTokens [32] = NT_PROGRAM;
keywords [33] = strsave ("body"); keywordTokens [33] = NT_BODY;
keywords [34] = strsave ("decls"); keywordTokens [34] = NT_DECLS;
keywords [35] = strsave ("stmts"); keywordTokens [35] = NT_STMTS;
keywords [36] = strsave ("varDecls"); keywordTokens [36] = NT_VARDECLS;
keywords [37] = strsave ("typeDecls"); keywordTokens [37] = NT_TYPEDECLS;
keywords [38] = strsave ("procDecls"); keywordTokens [38] = NT_PROCDECLS;
keywords [39] = strsave ("decl"); keywordTokens [39] = NT_DECL;
keywords [40] = strsave ("stmt"); keywordTokens [40] = NT_STMT;
keywords [41] = strsave ("varDecl"); keywordTokens [41] = NT_VARDECL;
keywords [42] = strsave ("typeDecl"); keywordTokens [42] = NT_TYPEDECL;
keywords [43] = strsave ("procDecl"); keywordTokens [43] = NT_PROCDECL;
keywords [44] = strsave ("idList"); keywordTokens [44] = NT_IDLIST;
keywords [45] = strsave ("optionalType"); keywordTokens [45] = NT_OPTIONALTYPE;
keywords [46] = strsave ("expr"); keywordTokens [46] = NT_EXPR;
keywords [47] = strsave ("type2"); keywordTokens [47] = NT_TYPE;
keywords [48] = strsave ("components"); keywordTokens [48] = NT_COMPONENTS;
keywords [49] = strsave ("component"); keywordTokens [49] = NT_COMPONENT;
keywords [50] = strsave ("formalParams"); keywordTokens [50] = NT_FORMALPARAMS;
keywords [51] = strsave ("fpSections"); keywordTokens [51] = NT_FPSECTIONS;
keywords [52] = strsave ("fpSection"); keywordTokens [52] = NT_FPSECTION;
keywords [53] = strsave ("lValues"); keywordTokens [53] = NT_LVALUES;
keywords [54] = strsave ("lValue"); keywordTokens [54] = NT_LVALUE;
keywords [55] = strsave ("actualParams"); keywordTokens [55] = NT_ACTUALPARAMS;
keywords [56] = strsave ("actuals"); keywordTokens [56] = NT_ACTUALS;
keywords [57] = strsave ("writeParams"); keywordTokens [57] = NT_WRITEPARAMS;
keywords [58] = strsave ("writeExprs"); keywordTokens [58] = NT_WRITEEXPRS;
keywords [59] = strsave ("writeExpr"); keywordTokens [59] = NT_WRITEEXPR;
keywords [60] = strsave ("elseIfs"); keywordTokens [60] = NT_ELSEIFS;
keywords [61] = strsave ("optionalElse"); keywordTokens [61] = NT_OPTIONALELSE;
keywords [62] = strsave ("optionalBy"); keywordTokens [62] = NT_OPTIONALBY;
keywords [63] = strsave ("optionalExpr"); keywordTokens [63] = NT_OPTIONALEXPR;
keywords [64] = strsave ("unaryOp"); keywordTokens [64] = NT_UNARYOP;
keywords [65] = strsave ("binaryOp2"); keywordTokens [65] = NT_BINARYOP2;
keywords [66] = strsave ("compValues"); keywordTokens [66] = NT_COMPVALUES;
keywords [67] = strsave ("moreCompValues"); keywordTokens [67] = NT_MORECOMPVALUES;
keywords [68] = strsave ("arrayValues"); keywordTokens [68] = NT_ARRAYVALUES;
keywords [69] = strsave ("moreArrayValues"); keywordTokens [69] = NT_MOREARRAYVALUES;
keywords [70] = strsave ("arrayValue"); keywordTokens [70] = NT_ARRAYVALUE;
keywords [71] = strsave ("moreExpr2"); keywordTokens [71] = NT_MOREEXPR2;
keywords [72] = strsave ("moreExpr3"); keywordTokens [72] = NT_MOREEXPR3;
keywords [73] = strsave ("moreExpr4"); keywordTokens [73] = NT_MOREEXPR4;
keywords [74] = strsave ("binaryOp3"); keywordTokens [74] = NT_BINARYOP3;
keywords [75] = strsave ("binaryOp4"); keywordTokens [75] = NT_BINARYOP4;
keywords [76] = strsave ("expr2"); keywordTokens [76] = NT_EXPR2;
keywords [77] = strsave ("expr3"); keywordTokens [77] = NT_EXPR3;
keywords [78] = strsave ("expr4"); keywordTokens [78] = NT_EXPR4;
keywords [79] = strsave ("expr5"); keywordTokens [79] = NT_EXPR5;
keywords [80] = strsave ("E2"); keywordTokens [80] = SY_E2;
keywords [81] = strsave ("T2"); keywordTokens [81] = SY_T2;
keywords [82] = strsave ("bexpr"); keywordTokens [82] = SY_BEXPR;
keywords [83] = strsave ("bterm"); keywordTokens [83] = SY_BTERM;
keywords [84] = strsave ("bfactor"); keywordTokens [84] = SY_BFACTOR;
keywords [85] = strsave ("true"); keywordTokens [85] = SY_TRUE;
keywords [86] = strsave ("false"); keywordTokens [86] = SY_FALSE;
keywords [87] = strsave ("A"); keywordTokens [87] = SY_A;
keywords [88] = strsave ("a"); keywordTokens [88] = SY_a;
keywords [89] = strsave ("b"); keywordTokens [89] = SY_b;
keywords [90] = strsave ("S"); keywordTokens [90] = SY_S;
/* Make sure NUM_KEYWORDS is the same as the number on the line above. */
}
/* strsave.c
**
** A string saving routine to be used by the lexer.
**
** For CS301
**
** See Aho,Sethi,Ullman pp.435-437 for details.
**
** Jingke Li, 1/20/97.
** Modified: Harry Porter, 10/9/97.
**
*/
#define STRING_TABLE_SIZE 211 /* Size of hash table for storing strings */
typedef struct string {
struct string *next; /* pointer to next bucket entry */
char s[1]; /* the string itself */
} String;
static String *string_table [STRING_TABLE_SIZE];
/*
** strsave()
** This routine is passed a pointer to a string of characters, terminated
** by '\0'. It looks it up in the table. If an equal string is already
** there, it returns a pointer to the copy previously stored in the table.
** If not found, it copies the new string into the table and returns a
** pointer to this new copy.
**
** After calling this routine, you can use pointer comparisons to check
** for equality, rather than the more expensive test for character equality.
** Furthermore, each different string will only be stored once, saving space.
**
*/
char *strsave (char *str)
{
unsigned h = 0, g;
char *p;
String *b;
for ( p = str; *p != '\0'; p++ ) {
h = (h << 4) + (*p);
if (g = h & 0xf0000000) {
h = h ^ (g >> 24);
h = h ^ g;
}
}
h %= STRING_TABLE_SIZE;
for (b = string_table[h]; b; b = b->next)
if (strcmp(b->s,str) == 0)
return(b->s);
b = (String *) malloc(sizeof(String) + strlen(str) + 1);
if (b==0) {
fprintf (stderr, "***** Compiler Error: Malloc failed in strsave *****\n");
exit (1);
}
strcpy(b->s, str);
b->next = string_table[h];
string_table[h] = b;
return(b->s);
}