home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.ee.pdx.edu
/
2014.02.ftp.ee.pdx.edu.tar
/
ftp.ee.pdx.edu
/
pub
/
users
/
Harry
/
Blitz
/
version-1-0
/
BlitzSrc
/
kpl
/
lexer.cc
< prev
next >
Wrap
C/C++ Source or Header
|
2006-09-25
|
46KB
|
1,523 lines
// lexer.cc -- Routines for lexical analysis
//
// KPL Compiler
//
// Copyright 2002-2006, Harry H. Porter III
//
// This file may be freely copied, modified and compiled, on the sole
// condition that if you modify it...
// (1) Your name and the date of modification is added to this comment
// under "Modifications by", and
// (2) Your name and the date of modification is added to the printHelp()
// routine in file "main.cc" under "Modifications by".
//
// Original Author:
// 06/15/02 - Harry H. Porter III
//
// Modifcations by:
// 03/15/06 - Harry H. Porter III
//
// The primary routines provided in this file are...
//
// initScanner (filename)
// scan ()
//
// Information about the current token will be found in...
//
// token - the type of the token
// token.type - the type of the token
// token.value - additional info about the token
// token.tokenPos - the position of the token (file, line no, char pos)
//
// Look-ahead tokens can also be found in...
//
// token2
// token3
// token4
// token5
//
// The input is obtained from the operating system character-by-character,
// using the following interface:
//
// getc (inputFile)
// ungetc (ch, inputFile)
//
// After all tokens have been scanned, the pseudo-random integer "hashVal" will have been
// computed. Any change to the token stream will (with high probability) result in
// a different value being stored in "hashVal".
#include "main.h"
// initScanner (filename) --> actualFileName
//
// This routine initializes the 5 token look-ahead buffer. The tokens will come from the
// file with the given name. We try to open the file (for read-only) and if that fails,
// we prepend the directory path prefix to the name and try again.
//
// We return the actual file name if we succeed and NULL if problems occurred
// while opening the file.
//
// If 'filename' is NULL, we will open stdin and return "<stdin>".
//
char * initScanner (char * filename) {
char * currentInputFileName = "<filename missing>";
currentLineOfToken = 1;
currentCharPosOfToken = 0;
// newlinePreceedsThisToken = 0;
// newlinePreceedsToken2 = 0;
posOfNextToken = createTokenPos ();
token.type = COLON;
token.value.ivalue = 0;
token.tokenPos = posOfNextToken;
token2 = token;
token3 = token;
token4 = token;
token5 = token;
eofCount = 0;
hashVal = 0x12345678;
hashCount = 20;
if (filename == NULL) {
currentInputFileName = lookupAndAdd ("<stdin>", ID) -> chars;
inputFile = stdin;
} else {
currentInputFileName = filename;
// Open the input file.
inputFile = fopen (currentInputFileName, "r");
// If that fails, prepend the directory path prefix and try again.
if ((inputFile == NULL) && (commandDirectoryName != NULL)) {
currentInputFileName = appendStrings (commandDirectoryName, filename, "");
inputFile = fopen (currentInputFileName, "r");
}
// If that fails, then print an error, set things to the EOF state, and return.
if (inputFile == NULL) {
if (commandDirectoryName == NULL) {
fprintf (stderr, "%s:0: ***** LEXICAL ERROR: File could not be opened for reading\n", filename);
} else {
fprintf (stderr, "%s:0: ***** LEXICAL ERROR: File could not be opened for reading (also tried %s)\n", filename, currentInputFileName);
}
errorsDetected++;
token.type = EOF;
token.tokenPos = posOfNextToken;
tokenMinusOne = token;
token2 = token;
token3 = token;
token4 = token;
token5 = token;
return NULL;
}
}
addToInputFilenames (currentInputFileName);
token5.type = getToken ();
token5.value = currentTokenValue;
token5.tokenPos = posOfNextToken;
addTokenToHash (token5);
token4 = token5;
scan ();
scan ();
scan ();
scan ();
return currentInputFileName;
}
// scan ()
//
// This routine advances to the next token. It shifts the 5 token look-ahead
// buffer ahead by one token. At EOF, the token types will all be EOF.
//
void scan () {
tokenMinusOne = token;
token = token2;
token2 = token3;
token3 = token4;
token4 = token5;
if (token5.type != EOF) {
token5.type = getToken ();
token5.value = currentTokenValue;
token5.tokenPos = posOfNextToken;
addTokenToHash (token5);
} else {
eofCount++;
if (eofCount > 10) {
programLogicError ("Parser is looping on EOF");
}
}
}
// getToken ()
//
// Scan the next token and return it's type. Side-effects the following:
// currentTokenValue
// currentInputFileIndex
// currentLineOfToken
// currentCharPosOfToken
// Returns EOF repeatedly after end-of-file is reached.
//
int getToken (void) {
int ch, ch2, lengthError, numDigits;
int intVal, t, intOverflow, realOverflow, sign;
double realValue, exp, power;
char lexError2 [200]; // buffer for varying error messages
char buffer [MAX_STR_LEN+1]; // buffer for saving a string
int next, i;
while (1) {
ch = getNextChar ();
// Process end-of-file...
if (ch == EOF) {
unGetChar (ch);
posOfNextToken = createTokenPos ();
return EOF;
// Process newline...
} else if (ch == '\n') {
incrLineNumber ();
continue;
// Process other white space...
} else if (ch == ' ' || ch == '\t') {
// do nothing
continue;
}
posOfNextToken = createTokenPos ();
// Process identifiers...
if (isalpha (ch)) {
lengthError = 0;
next = 0;
while (isalpha (ch) || isdigit (ch) || (ch=='_')) {
if (next >= MAX_STR_LEN) {
lengthError = 1;
} else {
buffer [next++] = ch;
}
ch = getNextChar ();
}
unGetChar (ch);
currentTokenValue.svalue = lookupAndAdd2 (buffer, next, ID);
// If already there, then its type may be BOOL, ..., IF, ..., or ID
if (lengthError) {
lexError ("Identifier exceeds maximum allowable length");
}
i = currentTokenValue.svalue->type;
return i;
// Process strings...
} else if (ch == '"') {
next = 0;
lengthError = 0;
while (1) {
ch2 = getNextChar ();
if (ch2 == '"') {
break;
} else if (ch2 == '\n') {
incrLineNumber ();
if (next >= MAX_STR_LEN) {
lengthError = 1;
} else {
buffer [next++] = ch2;
}
} else if (ch2 == EOF) {
lexError ("End-of-file encountered within a string");
unGetChar (ch2);
break;
} else if ((ch2 < 32) || (ch2 > 126)) {
sprintf (lexError2,
"Illegal character 0x%02x in string ignored", ch2);
lexError (lexError2);
} else {
if (ch2 == '\\') {
ch2 = scanEscape ();
}
if (next >= MAX_STR_LEN) {
lengthError = 1;
} else {
buffer [next++] = ch2;
}
}
}
currentTokenValue.svalue = lookupAndAdd2 (buffer, next, ID);
if (lengthError) {
lexError ("Maximum string length exceeded");
}
return STRING_CONST;
// Process operators...
} else if (isOpChar (ch)) {
lengthError = 0;
next = 0;
buffer [next++] = ch;
ch2 = getNextChar ();
// Check for -- comments...
if ((ch == '-') && (ch2 == '-')) {
while (1) {
ch = getNextChar ();
if (ch == EOF) {
lexError ("End-of-file encountered within a -- comment");
unGetChar (ch);
return EOF;
} else if (ch == '\n') {
incrLineNumber ();
break;
}
}
continue;
// Check for /* comments...
} else if ((ch == '/') && (ch2 == '*')) {
ch2 = ' ';
i = 1;
while (1) {
ch = ch2;
ch2 = getNextChar ();
if (ch2 == EOF) {
lexError ("End-of-file encountered within a /* comment");
unGetChar (ch2);
return EOF;
} else if (ch2 == '\n') {
incrLineNumber ();
}
if (ch == '/' && ch2 == '*') {
i++;
ch = ' ';
ch2 = ' ';
}
if (ch == '*' && ch2 == '/') {
if (i <= 1) {
break;
} else {
i--;
ch = ' ';
ch2 = ' ';
}
}
}
continue;
} else if (isOpChar (ch2)) {
buffer [next++] = ch2;
ch = getNextChar ();
while (isOpChar (ch)) {
if (next >= MAX_STR_LEN) {
lengthError = 1;
} else {
buffer [next++] = ch;
}
ch = getNextChar ();
}
unGetChar (ch);
} else {
unGetChar (ch2);
}
currentTokenValue.svalue = lookupAndAdd2 (buffer, next, OPERATOR);
if (lengthError) {
lexError ("Operator exceeds maximum allowable length");
}
return currentTokenValue.svalue->type;
// Process character constants...
} else if (ch == '\'') {
currentTokenValue.ivalue = '?';
ch2 = getNextChar ();
if (ch2 == EOF) {
lexError ("End-of-file encountered within a char constant");
unGetChar (ch2);
return EOF;
} else if (ch2 == '\n') {
lexError ("End-of-line encountered within a char constant");
incrLineNumber ();
return CHAR_CONST;
}
if (ch2 == '\\') {
ch2 = scanEscape ();
}
currentTokenValue.ivalue = ch2;
ch2 = getNextChar ();
if (ch2 != '\'') {
unGetChar (ch2);
lexError ("Expecting closing quote in character constant");
}
return CHAR_CONST;
// Process integer and double constants...
} else if (isdigit (ch)) {
// See if we have 0x...*/
if (ch == '0') {
ch2 = getNextChar ();
if (ch2 == 'x') {
numDigits = 0;
ch = getNextChar ();
if (hexCharToInt (ch) < 0) {
lexError ("Must have a hex digit after 0x");
}
next = intVal = intOverflow = 0;
while (hexCharToInt (ch) >= 0) {
intVal = (intVal << 4) + hexCharToInt (ch);
numDigits++;
ch = getNextChar ();
}
unGetChar (ch);
if (numDigits > 8) {
lexError ("Hex constants must be 8 or fewer digits");
intVal = 0;
}
currentTokenValue.ivalue = intVal;
return INT_CONST;
}
unGetChar (ch2);
}
// Otherwise we have a string of decimal numerals.
intVal = intOverflow = realOverflow = 0;
exp = 1.0;
realValue = 0.0;
while (isdigit (ch)) {
t = intVal * 10 + (ch - '0');
if (t < intVal) {
intOverflow = 1;
}
intVal = t;
realValue = (realValue * 10.0) + (double) (ch - '0');
if (realValue > DBL_MAX) {
realOverflow = 1;
}
ch = getNextChar ();
}
// If we have a real number...
if ((ch == '.') || (ch == 'e') || (ch == 'E')) {
// Read in the fractional part.
if (ch == '.') {
ch = getNextChar ();
if (!isdigit (ch)) {
lexError ("At least one digit is required after decimal point");
}
while (isdigit (ch)) {
exp *= 10.0;
realValue = realValue + ((double) (ch - '0') / exp);
ch = getNextChar ();
}
}
intVal = 0;
sign = 1;
if ((ch == 'e') || (ch == 'E')) {
ch = getNextChar ();
// Process the exponent sign, if there is one.
if (ch == '+') {
ch = getNextChar ();
} else if (ch == '-') {
ch = getNextChar ();
sign = -1;
}
// Read in the exponent integer into intVal.
if (!isdigit (ch)) {
lexError ("Expecting exponent numerals");
} else {
intVal = intOverflow = 0;
while (isdigit (ch)) {
t = intVal * 10 + (ch - '0');
if (t < intVal) {
intOverflow = 1;
}
intVal = t;
ch = getNextChar ();
}
if (intOverflow) {
lexError ("Exponent is out of range");
intVal = 0;
}
}
}
unGetChar (ch);
currentTokenValue.rvalue = 999.888;
// power = pow ((double) 10.0, (double) (sign * intVal));
power = raisedToThePower (sign * intVal);
realValue = realValue * power;
if (realValue > DBL_MAX) {
realOverflow = 1;
}
currentTokenValue.rvalue = realValue;
if (realOverflow) {
lexError ("Real number is out of range");
currentTokenValue.rvalue = 0.0;
}
return DOUBLE_CONST;
} else { // If we have an integer...
unGetChar (ch);
if (intOverflow) {
lexError ("Integer out of range (0..2147483647); use 0x80000000 for -2147483648");
intVal = 0;
}
currentTokenValue.ivalue = intVal;
return INT_CONST;
}
// Check for one character symbols.
} else if (ch == '(') {
return L_PAREN;
} else if (ch == ')') {
return R_PAREN;
} else if (ch == '{') {
return L_BRACE;
} else if (ch == '}') {
return R_BRACE;
} else if (ch == '[') {
return L_BRACK;
} else if (ch == ']') {
return R_BRACK;
} else if (ch == ',') {
return COMMA;
} else if (ch == '.') {
return PERIOD;
} else if (ch == ':') {
return COLON;
} else if (ch == ';') {
return SEMI_COLON;
// // Check for semi-colon and print special message.
// } else if (ch == ';') {
// lexError ("The semi-colon is not used in the KPL programming language");
// Otherwise, we have an invalid character; ignore it.
} else {
if ((ch>=' ') && (ch<='~')) {
sprintf (lexError2, "Illegal character '%c' ignored", ch);
} else {
sprintf (lexError2, "Illegal character 0x%02x ignored", ch);
}
lexError (lexError2);
}
}
}
// lexError (msg)
//
// This routine is called to print an error message and the current line
// number. It returns.
//
void lexError (char *msg) {
fprintf (stderr, "%s", inputFileNames [currentInputFileIndex]);
if (currentLineOfToken >= 65535) {
fprintf (stderr, ":<line number not available>: ");
} else {
fprintf (stderr, ":%d: ", currentLineOfToken);
}
fprintf (stderr, "***** LEXICAL ERROR: %s\n", msg);
errorsDetected++;
}
// initKeywords ()
//
// This routine adds each keyword to the symbol table with the corresponding
// type code.
// This routine initializes the "keywords" and the "keywordTokens" arrays.
//
void initKeywords () {
lookupAndAdd ("=", EQUAL);
lookupAndAdd ("alloc", ALLOC);
lookupAndAdd ("anyType", ANY_TYPE);
lookupAndAdd ("array", ARRAY);
lookupAndAdd ("arraySize", ARRAY_SIZE);
lookupAndAdd ("asInteger", AS_INTEGER);
lookupAndAdd ("asPtrTo", AS_PTR_TO);
lookupAndAdd ("behavior", BEHAVIOR);
lookupAndAdd ("bool", BOOL);
lookupAndAdd ("break", BREAK);
lookupAndAdd ("by", BY);
lookupAndAdd ("case", CASE);
lookupAndAdd ("catch", CATCH);
lookupAndAdd ("char", CHAR);
lookupAndAdd ("class", CLASS);
lookupAndAdd ("code", CODE);
lookupAndAdd ("const", CONST);
lookupAndAdd ("continue", CONTINUE);
lookupAndAdd ("debug", DEBUG);
lookupAndAdd ("default", DEFAULT);
lookupAndAdd ("do", DO);
lookupAndAdd ("double", DOUBLE);
lookupAndAdd ("else", ELSE);
lookupAndAdd ("elseIf", ELSE_IF);
lookupAndAdd ("endBehavior", END_BEHAVIOR);
lookupAndAdd ("endClass", END_CLASS);
lookupAndAdd ("endCode", END_CODE);
lookupAndAdd ("endFor", END_FOR);
lookupAndAdd ("endFunction", END_FUNCTION);
lookupAndAdd ("endHeader", END_HEADER);
lookupAndAdd ("endIf", END_IF);
lookupAndAdd ("endInterface", END_INTERFACE);
lookupAndAdd ("endMethod", END_METHOD);
lookupAndAdd ("endRecord", END_RECORD);
lookupAndAdd ("endSwitch", END_SWITCH);
lookupAndAdd ("endTry", END_TRY);
lookupAndAdd ("endWhile", END_WHILE);
lookupAndAdd ("enum", ENUM);
lookupAndAdd ("errors", ERRORS);
lookupAndAdd ("extends", EXTENDS);
lookupAndAdd ("external", EXTERNAL);
lookupAndAdd ("false", FALSE);
lookupAndAdd ("fields", FIELDS);
lookupAndAdd ("for", FOR);
lookupAndAdd ("free", FREE);
lookupAndAdd ("function", FUNCTION);
lookupAndAdd ("functions", FUNCTIONS);
lookupAndAdd ("header", HEADER);
lookupAndAdd ("if", IF);
lookupAndAdd ("implements", IMPLEMENTS);
lookupAndAdd ("infix", INFIX);
lookupAndAdd ("int", INT);
lookupAndAdd ("interface", INTERFACE);
lookupAndAdd ("isInstanceOf", IS_INSTANCE_OF);
lookupAndAdd ("isKindOf", IS_KIND_OF);
lookupAndAdd ("messages", MESSAGES);
lookupAndAdd ("method", METHOD);
lookupAndAdd ("methods", METHODS);
lookupAndAdd ("new", NEW);
lookupAndAdd ("null", NULL_KEYWORD);
lookupAndAdd ("of", OF);
lookupAndAdd ("prefix", PREFIX);
lookupAndAdd ("ptr", PTR);
lookupAndAdd ("record", RECORD);
lookupAndAdd ("renaming", RENAMING);
lookupAndAdd ("return", RETURN);
lookupAndAdd ("returns", RETURNS);
lookupAndAdd ("self", SELF);
lookupAndAdd ("sizeOf", SIZE_OF);
lookupAndAdd ("super", SUPER);
lookupAndAdd ("superclass", SUPER_CLASS);
lookupAndAdd ("switch", SWITCH);
lookupAndAdd ("throw", THROW);
lookupAndAdd ("to", TO);
lookupAndAdd ("true", TRUE);
lookupAndAdd ("try", TRY);
lookupAndAdd ("type", TYPE);
lookupAndAdd ("typeOfNull", TYPE_OF_NULL);
lookupAndAdd ("until", UNTIL);
lookupAndAdd ("uses", USES);
lookupAndAdd ("var", VAR);
lookupAndAdd ("void", VOID);
lookupAndAdd ("while", WHILE);
}
// lookupAndAdd (givenStr, type)
//
// This routine is passed a pointer to a string of characters, terminated
// by '\0'. It looks it up in the string table. If there is already an entry
// in the table, it returns a pointer to the previously stored entry.
// If not found, it allocates a new table entry, copies the new string into
// the table, and returns a pointer to the new String.
// If the string is new, then its 'type' is set according to newType.
//
String * lookupAndAdd (char * givenStr, int newType) {
return lookupAndAdd2 (givenStr, strlen(givenStr), newType);
}
// lookupAndAdd2 (givenStr, length, type)
//
// This routine is passed a pointer to a sequence of any characters (possibly
// containing \0, of length "length". It looks this string up in the
// string table. If there is already an entry in the table, it returns
// a pointer to the previously created entry. If not found, it allocates
// a new String, copies the new string into the entry, and returns
// a pointer to the new String.
//
// After calling this routine, you can use pointer comparisons to check
// for equality, rather than the more expensive test for character equality.
// Furthermore, each different string will only be stored once, saving space.
//
// If the string is new, then its 'type' is set according to newType.
//
//
String * lookupAndAdd2 (char * givenStr, int length, int newType) {
unsigned hashVal = 0, g;
char * p, * q;
int i;
String * stringPtr;
// Compute the hash value for the givenStr and set hashVal to it.
for ( p = givenStr, i=0;
i < length;
p++, i++ ) {
hashVal = (hashVal << 4) + (*p);
if (g = hashVal & 0xf0000000) {
hashVal = hashVal ^ (g >> 24);
hashVal = hashVal ^ g;
}
}
hashVal %= STRING_TABLE_HASH_SIZE;
// Search the linked list and return if we find it.
for (stringPtr = stringTableIndex [hashVal];
stringPtr;
stringPtr = stringPtr->next) {
if ((length == stringPtr->length) &&
(bytesEqual (stringPtr->chars, givenStr, length))) {
return stringPtr;
}
}
// Create an entry and initialize it.
stringPtr = (String *)
calloc (1, sizeof (String) + length + 1);
if (stringPtr == 0) {
fatalError ("Calloc failed in lookupAndAdd!");
}
for (p=givenStr, q=stringPtr->chars, i=length;
i>0;
p++, q++, i--) {
*q = *p;
}
*q = '\0';
stringPtr->length = length;
stringPtr->type = newType;
stringPtr->primitiveSymbol = 0;
// Add the new entry to the appropriate linked list and return.
stringPtr->next = stringTableIndex [hashVal];
stringTableIndex [hashVal] = stringPtr;
return stringPtr;
}
// bytesEqual (p, q, length)
//
// This function is passed two pointers to blocks of characters, and a
// length. It compares the two sequences of bytes and returns true iff
// they are both equal.
//
int bytesEqual (char * p, char * q, int length) {
for (; length>0; length--, p++, q++) {
if (*p != *q) return 0;
}
return 1;
}
// printStringTable ()
//
// This routine runs through the string table and prints each entry.
//
void printStringTable () {
int hashVal;
String * stringPtr;
printf ("LIST OF ALL STRINGS AND IDS\n");
printf ("===========================\n");
for (hashVal = 0; hashVal<STRING_TABLE_HASH_SIZE; hashVal++) {
for (stringPtr = stringTableIndex [hashVal];
stringPtr;
stringPtr = stringPtr->next) {
printString (stdout, stringPtr);
printf ("\n");
}
}
}
// printString (file, str)
//
// This routine is passed a pointer to a String. It prints the
// string of the given file, translating non-printable characters into
// escape sequences. This routine will not print a terminating newline.
//
void printString (FILE * file, String * str) {
int i = 0;
if (str == NULL) {
fprintf (file, "*****NULL-STRING*****");
return;
}
for (i=0; i<str->length; i++) {
printChar (file, str->chars [i]);
}
}
// printChar (file, int)
//
// This routine is passed a char, which could be any value 0..255.
// It prints it on the given file (e.g. stdout) in a form such as
// a \n \xEF ...etc...
//
void printChar (FILE * file, int c) {
int i, j;
if (c == '\"') {
fprintf (file, "\\\"");
} else if (c == '\'') {
fprintf (file, "\\\'");
} else if (c == '\\') {
fprintf (file, "\\\\");
} else if ((c>=32) && (c<127)) {
fprintf (file, "%c", c);
} else if (c == '\0') {
fprintf (file, "\\0");
} else if (c == '\a') {
fprintf (file, "\\a");
} else if (c == '\b') {
fprintf (file, "\\b");
} else if (c == '\t') {
fprintf (file, "\\t");
} else if (c == '\n') {
fprintf (file, "\\n");
} else if (c == '\v') {
fprintf (file, "\\v");
} else if (c == '\f') {
fprintf (file, "\\f");
} else if (c == '\r') {
fprintf (file, "\\r");
} else {
i = intToHexChar ((c>>4) & 0x0000000f);
j = intToHexChar (c & 0x0000000f);
fprintf (file, "\\x%c%c", i, j);
}
}
// symbolName (s)
//
// This routine is passed a symbol such as BOOL. It returns a pointer
// to a string of characters, such as "BOOL".
//
char * symbolName (int i) {
switch (i) {
case EOF: return "EOF";
case ID: return "ID";
case INT_CONST: return "INT_CONST";
case DOUBLE_CONST: return "DOUBLE_CONST";
case CHAR_CONST: return "CHAR_CONST";
case STRING_CONST: return "STRING_CONST";
case OPERATOR: return "OPERATOR";
// keywords
case ALLOC: return "ALLOC";
case ANY_TYPE: return "ANY_TYPE";
case ARRAY: return "ARRAY";
case ARRAY_SIZE: return "ARRAY_SIZE";
case AS_INTEGER: return "AS_INTEGER";
case AS_PTR_TO: return "AS_PTR_TO";
case BEHAVIOR: return "BEHAVIOR";
case BOOL: return "BOOL";
case BREAK: return "BREAK";
case BY: return "BY";
case CASE: return "CASE";
case CATCH: return "CATCH";
case CHAR: return "CHAR";
case CLASS: return "CLASS";
case CODE: return "CODE";
case CONST: return "CONST";
case CONTINUE: return "CONTINUE";
case DEBUG: return "DEBUG";
case DEFAULT: return "DEFAULT";
case DO: return "DO";
case DOUBLE: return "DOUBLE";
case ELSE: return "ELSE";
case ELSE_IF: return "ELSE_IF";
case END_BEHAVIOR: return "END_BEHAVIOR";
case END_CLASS: return "END_CLASS";
case END_CODE: return "END_CODE";
case END_FOR: return "END_FOR";
case END_FUNCTION: return "END_FUNCTION";
case END_HEADER: return "END_HEADER";
case END_IF: return "END_IF";
case END_INTERFACE: return "END_INTERFACE";
case END_METHOD: return "END_METHOD";
case END_RECORD: return "END_RECORD";
case END_SWITCH: return "END_SWITCH";
case END_TRY: return "END_TRY";
case END_WHILE: return "END_WHILE";
case ENUM: return "ENUM";
case ERRORS: return "ERRORS";
case EXTENDS: return "EXTENDS";
case EXTERNAL: return "EXTERNAL";
case FALSE: return "FALSE";
case FIELDS: return "FIELDS";
case FOR: return "FOR";
case FREE: return "FREE";
case FUNCTION: return "FUNCTION";
case FUNCTIONS: return "FUNCTIONS";
case HEADER: return "HEADER";
case IF: return "IF";
case IMPLEMENTS: return "IMPLEMENTS";
case INFIX: return "INFIX";
case INT: return "INT";
case INTERFACE: return "INTERFACE";
case IS_INSTANCE_OF: return "IS_INSTANCE_OF";
case IS_KIND_OF: return "IS_KIND_OF";
case MESSAGES: return "MESSAGES";
case METHOD: return "METHOD";
case METHODS: return "METHODS";
case NEW: return "NEW";
case NULL_KEYWORD: return "NULL";
case OF: return "OF";
case PREFIX: return "PREFIX";
case PTR: return "PTR";
case RECORD: return "RECORD";
case RENAMING: return "RENAMING";
case RETURN: return "RETURN";
case RETURNS: return "RETURNS";
case SELF: return "SELF";
case SIZE_OF: return "SIZE_OF";
case SUPER: return "SUPER";
case SUPER_CLASS: return "SUPER_CLASS";
case SWITCH: return "SWITCH";
case THROW: return "THROW";
case TO: return "TO";
case TRUE: return "TRUE";
case TRY: return "TRY";
case TYPE: return "TYPE";
case TYPE_OF_NULL: return "TYPE_OF_NULL";
case UNTIL: return "UNTIL";
case USES: return "USES";
case VAR: return "VAR";
case VOID: return "VOID";
case WHILE: return "WHILE";
// Punctuation tokens
case L_PAREN: return "L_PAREN";
case R_PAREN: return "R_PAREN";
case L_BRACE: return "L_BRACE";
case R_BRACE: return "R_BRACE";
case L_BRACK: return "L_BRACK";
case R_BRACK: return "R_BRACK";
case COLON: return "COLON";
case SEMI_COLON: return "SEMI_COLON";
case COMMA: return "COMMA";
case PERIOD: return "PERIOD";
case EQUAL: return "EQUAL";
// Symbols describing nodes in the Abstract Syntax Tree
case ALLOC_EXPR: return "ALLOC_EXPR";
// case ANY_TYPE: return "ANY_TYPE";
case ARGUMENT: return "ARGUMENT";
case ARRAY_ACCESS: return "ARRAY_ACCESS";
case ARRAY_SIZE_EXPR: return "ARRAY_SIZE_EXPR";
case ARRAY_TYPE: return "ARRAY_TYPE";
case AS_INTEGER_EXPR: return "AS_INTEGER_EXPR";
case AS_PTR_TO_EXPR: return "AS_PTR_TO_EXPR";
case ASSIGN_STMT: return "ASSIGN_STMT";
// case BEHAVIOR: return "BEHAVIOR";
case BOOL_CONST: return "BOOL_CONST";
case BOOL_TYPE: return "BOOL_TYPE";
case BREAK_STMT: return "BREAK_STMT";
case CALL_EXPR: return "CALL_EXPR";
case CALL_STMT: return "CALL_STMT";
// case CASE: return "CASE";
// case CATCH: return "CATCH";
// case CHAR_CONST: return "CHAR_CONST";
case CHAR_TYPE: return "CHAR_TYPE";
case CLASS_DEF: return "CLASS_DEF";
case CLASS_FIELD: return "CLASS_FIELD";
case CLOSURE_EXPR: return "CLOSURE_EXPR";
// case CODE: return "CODE";
case CONST_DECL: return "CONST_DECL";
case CONSTRUCTOR: return "CONSTRUCTOR";
case CONTINUE_STMT: return "CONTINUE_STMT";
case COUNT_VALUE: return "COUNT_VALUE";
case DO_STMT: return "DO_STMT";
// case DOUBLE_CONST: return "DOUBLE_CONST";
case DOUBLE_TYPE: return "DOUBLE_TYPE";
case DYNAMIC_CHECK: return "DYNAMIC_CHECK";
case ERROR_DECL: return "ERROR_DECL";
case FIELD_ACCESS: return "FIELD_ACCESS";
case FIELD_INIT: return "FIELD_INIT";
case FOR_STMT: return "FOR_STMT";
// case FUNCTION: return "FUNCTION";
case FUNCTION_PROTO: return "FUNCTION_PROTO";
case FUNCTION_TYPE: return "FUNCTION_TYPE";
case GLOBAL: return "GLOBAL";
// case HEADER: return "HEADER";
case IF_STMT: return "IF_STMT";
// case INT_CONST: return "INT_CONST";
case INT_TYPE: return "INT_TYPE";
// case INTERFACE: return "INTERFACE";
case IS_INSTANCE_OF_EXPR: return "IS_INSTANCE_OF_EXPR";
case IS_KIND_OF_EXPR: return "IS_KIND_OF_EXPR";
case LOCAL: return "LOCAL";
// case METHOD: return "METHOD";
case METHOD_PROTO: return "METHOD_PROTO";
case NAMED_TYPE: return "NAMED_TYPE";
case NEW_EXPR: return "NEW_EXPR";
case NULL_CONST: return "NULL_CONST";
case PARAMETER: return "PARAMETER";
case PTR_TYPE: return "PTR_TYPE";
case RECORD_FIELD: return "RECORD_FIELD";
case RECORD_TYPE: return "RECORD_TYPE";
case RETURN_STMT: return "RETURN_STMT";
case SELF_EXPR: return "SELF_EXPR";
case SEND_EXPR: return "SEND_EXPR";
case SEND_STMT: return "SEND_STMT";
case SIZE_OF_EXPR: return "SIZE_OF_EXPR";
// case STRING_CONST: return "STRING_CONST";
case SUPER_EXPR: return "SUPER_EXPR";
case SWITCH_STMT: return "SWITCH_STMT";
case THROW_STMT: return "THROW_STMT";
case TRY_STMT: return "TRY_STMT";
case TYPE_ARG: return "TYPE_ARG";
case TYPE_DEF: return "TYPE_DEF";
case TYPE_OF_NULL_TYPE: return "TYPE_OF_NULL_TYPE";
case TYPE_PARM: return "TYPE_PARM";
// case USES: return "USES";
// case RENAMING: return "RENAMING";
case VARIABLE_EXPR: return "VARIABLE_EXPR";
case VOID_TYPE: return "VOID_TYPE";
case WHILE_STMT: return "WHILE_STMT";
case FREE_STMT: return "FREE_STMT";
case DEBUG_STMT: return "DEBUG_STMT";
// Symbols used to identify build-in function ids, and mess selectors
case DOUBLE_TO_INT: return "DOUBLE_TO_INT";
case INT_TO_DOUBLE: return "INT_TO_DOUBLE";
case INT_TO_CHAR: return "INT_TO_CHAR";
case CHAR_TO_INT: return "CHAR_TO_INT";
case PTR_TO_BOOL: return "PTR_TO_BOOL";
case POS_INF: return "POS_INF";
case NEG_INF: return "NEG_INF";
case NEG_ZERO: return "NEG_ZERO";
case I_IS_ZERO: return "I_IS_ZERO";
case I_NOT_ZERO: return "I_NOT_ZERO";
case BAR: return "BAR";
case CARET: return "CARET";
case AMP: return "AMP";
case BAR_BAR: return "BAR_BAR";
case AMP_AMP: return "AMP_AMP";
case EQUAL_EQUAL: return "EQUAL_EQUAL";
case NOT_EQUAL: return "NOT_EQUAL";
case LESS: return "LESS";
case LESS_EQUAL: return "LESS_EQUAL";
case GREATER: return "GREATER";
case GREATER_EQUAL: return "GREATER_EQUAL";
case LESS_LESS: return "LESS_LESS";
case GREATER_GREATER: return "GREATER_GREATER";
case GREATER_GREATER_GREATER: return "GREATER_GREATER_GREATER";
case PLUS: return "PLUS";
case MINUS: return "MINUS";
case STAR: return "STAR";
case SLASH: return "SLASH";
case PERCENT: return "PERCENT";
case UNARY_BANG: return "UNARY_BANG";
case UNARY_STAR: return "UNARY_STAR";
case UNARY_AMP: return "UNARY_AMP";
case UNARY_MINUS: return "UNARY_MINUS";
// Symbols identifying primitive functions
case PRIMITIVE_I_ADD: return "PRIMITIVE_I_ADD";
case PRIMITIVE_I_SUB: return "PRIMITIVE_I_SUB";
case PRIMITIVE_I_MUL: return "PRIMITIVE_I_MUL";
case PRIMITIVE_I_DIV: return "PRIMITIVE_I_DIV";
case PRIMITIVE_I_REM: return "PRIMITIVE_I_REM";
case PRIMITIVE_I_SLL: return "PRIMITIVE_I_SLL";
case PRIMITIVE_I_SRA: return "PRIMITIVE_I_SRA";
case PRIMITIVE_I_SRL: return "PRIMITIVE_I_SRL";
case PRIMITIVE_I_LT: return "PRIMITIVE_I_LT";
case PRIMITIVE_I_LE: return "PRIMITIVE_I_LE";
case PRIMITIVE_I_GT: return "PRIMITIVE_I_GT";
case PRIMITIVE_I_GE: return "PRIMITIVE_I_GE";
case PRIMITIVE_I_EQ: return "PRIMITIVE_I_EQ";
case PRIMITIVE_I_NE: return "PRIMITIVE_I_NE";
case PRIMITIVE_I_AND: return "PRIMITIVE_I_AND";
case PRIMITIVE_I_OR: return "PRIMITIVE_I_OR";
case PRIMITIVE_I_XOR: return "PRIMITIVE_I_XOR";
case PRIMITIVE_I_NOT: return "PRIMITIVE_I_NOT";
case PRIMITIVE_I_NEG: return "PRIMITIVE_I_NEG";
case PRIMITIVE_I_IS_ZERO: return "PRIMITIVE_I_IS_ZERO";
case PRIMITIVE_I_NOT_ZERO: return "PRIMITIVE_I_NOT_ZERO";
case PRIMITIVE_D_ADD: return "PRIMITIVE_D_ADD";
case PRIMITIVE_D_SUB: return "PRIMITIVE_D_SUB";
case PRIMITIVE_D_MUL: return "PRIMITIVE_D_MUL";
case PRIMITIVE_D_DIV: return "PRIMITIVE_D_DIV";
case PRIMITIVE_D_LT: return "PRIMITIVE_D_LT";
case PRIMITIVE_D_LE: return "PRIMITIVE_D_LE";
case PRIMITIVE_D_GT: return "PRIMITIVE_D_GT";
case PRIMITIVE_D_GE: return "PRIMITIVE_D_GE";
case PRIMITIVE_D_EQ: return "PRIMITIVE_D_EQ";
case PRIMITIVE_D_NE: return "PRIMITIVE_D_NE";
case PRIMITIVE_D_NEG: return "PRIMITIVE_D_NEG";
case PRIMITIVE_B_AND: return "PRIMITIVE_B_AND";
case PRIMITIVE_B_OR: return "PRIMITIVE_B_OR";
case PRIMITIVE_B_NOT: return "PRIMITIVE_B_NOT";
case PRIMITIVE_B_EQ: return "PRIMITIVE_B_EQ";
case PRIMITIVE_B_NE: return "PRIMITIVE_B_NE";
case PRIMITIVE_OBJECT_EQ: return "PRIMITIVE_OBJECT_EQ";
case PRIMITIVE_OBJECT_NE: return "PRIMITIVE_OBJECT_NE";
case PRIMITIVE_DEREF: return "PRIMITIVE_DEREF";
case PRIMITIVE_ADDRESS_OF: return "PRIMITIVE_ADDRESS_OF";
// Symbols identifying primitive functions
case PRIMITIVE_INT_TO_DOUBLE: return "PRIMITIVE_INT_TO_DOUBLE";
case PRIMITIVE_DOUBLE_TO_INT: return "PRIMITIVE_DOUBLE_TO_INT";
case PRIMITIVE_INT_TO_CHAR: return "PRIMITIVE_INT_TO_CHAR";
case PRIMITIVE_CHAR_TO_INT: return "PRIMITIVE_CHAR_TO_INT";
case PRIMITIVE_PTR_TO_BOOL: return "PRIMITIVE_PTR_TO_BOOL";
case PRIMITIVE_POS_INF: return "PRIMITIVE_POS_INF";
case PRIMITIVE_NEG_INF: return "PRIMITIVE_NEG_INF";
case PRIMITIVE_NEG_ZERO: return "PRIMITIVE_NEG_ZERO";
// case PRIMITIVE_xxx: return "yyy";
// Misc Symbols
case KEYWORD: return "KEYWORD";
case NORMAL: return "NORMAL";
default:
printf ("\nSYMBOL = %d\n", i);
return "***** unknown symbol *****";
}
}
// printSymbol (int)
//
// Print this symbol out. Used for debugging.
//
void printSymbol (int sym) {
printf ("***** Symbol = %s *****\n", symbolName (sym));
}
// hexCharToInt (char)
//
// This routine is passed a character. If it is a hex digit, i.e.,
// 0, 1, 2, ... 9, a, b, ... f, A, B, ... F
// then it returns its value (0..15). Otherwise, it returns -1.
//
int hexCharToInt (char ch) {
if (('0'<=ch) && (ch<='9')) {
return ch-'0';
} else if (('a'<=ch) && (ch<='f')) {
return ch-'a' + 10;
} else if (('A'<=ch) && (ch<='F')) {
return ch-'A' + 10;
} else {
return -1;
}
}
// intToHexChar (int)
//
// This routine is passed an integer 0..15. It returns a char
// from 0 1 2 3 4 5 6 7 8 9 A B C D E F
//
char intToHexChar (int i) {
if (i<10) {
return '0' + i;
} else {
return 'A' + (i-10);
}
}
// raisedToThePower (int i)
//
// This routine returns 10**i. "i" may be positive or negative.
// It uses a brute force approach. There must be a better way...
//
double raisedToThePower (int i) {
double d;
if (i==0) {
return 1.0;
} else if (i > 0) {
for (d=1.0; i>0; i--) {
d = d * 10.0;
}
return d;
} else {
for (d=1.0; i<0; i++) {
d = d / 10.0;
}
return d;
}
}
// isEscapeChar (char) --> ASCII Value
//
// This routine is passed a char, such as 'n'. If this char is one
// of the escape characters, (e.g., \n), then this routine returns the
// ASCII value (e.g., 10). Otherwise, it returns -1.
//
int isEscape (char ch) {
if (ch == '0') {
return 0;
} else if (ch == 'a') {
return '\a';
} else if (ch == 'b') {
return '\b';
} else if (ch == 't') {
return '\t';
} else if (ch == 'n') {
return '\n';
} else if (ch == 'v') {
return '\v';
} else if (ch == 'f') {
return '\f';
} else if (ch == 'r') {
return '\r';
} else if (ch == '\"') {
return '\"';
} else if (ch == '\'') {
return '\'';
} else if (ch == '\\') {
return '\\';
} else {
return -1;
}
}
// scanEscape ()
//
// This routine is called after we have gotten a back-slash. It
// reads whatever characters follow and returns the character. If
// problems arise it prints a message and returns '?'. If EOF is
// encountered, it prints a message, calls unGetChar(EOF) and returns '?'.
//
int scanEscape () {
int ch, ch2, i, j;
ch2 = getNextChar ();
if (ch2 == '\n') {
lexError ("End-of-line encountered after a \\ escape");
incrLineNumber ();
return '?';
}
if (ch2 == EOF) {
lexError ("End-of-file encountered after a \\ escape");
unGetChar (ch2);
return '?';
}
i = isEscape(ch2);
if (i != -1) {
return i;
} else if (ch2 == 'x') {
ch = getNextChar (); // Get 1st hex digit
if (ch == '\n') {
lexError ("End-of-line encountered after a \\x escape");
incrLineNumber ();
return '?';
}
if (ch == EOF) {
lexError ("End-of-file encountered after a \\x escape");
unGetChar (ch);
return '?';
}
i = hexCharToInt (ch);
if (i < 0) {
lexError ("Must have a hex digit after \\x");
return '?';
} else {
ch2 = getNextChar ();
if (ch2 == '\n') {
lexError ("End-of-line encountered after a \\x escape");
incrLineNumber ();
return '?';
}
if (ch2 == EOF) {
lexError ("End-of-file encountered after a \\x escape");
unGetChar (ch2);
return '?';
}
j = hexCharToInt (ch2);
if (j < 0) {
lexError ("Must have two hex digits after \\x");
return '?';
}
return (i<<4) + j;
}
} else {
lexError ("Illegal escape (only \\0, \\a, \\b, \\t, \\n, \\v, \\f, \\r, \\\", \\\', \\\\, and \\xHH allowed)");
return '?';
}
}
// isOpChar (char)
//
// Return true if this char is one of...
// + - * \ / ! @ # $ % ^ & ~ ` | ? < > =
//
int isOpChar (char ch) {
if (ch == '+') {
return 1;
} else if (ch == '-') {
return 1;
} else if (ch == '*') {
return 1;
} else if (ch == '/') {
return 1;
} else if (ch == '\\') {
return 1;
} else if (ch == '!') {
return 1;
} else if (ch == '@') {
return 1;
} else if (ch == '#') {
return 1;
} else if (ch == '$') {
return 1;
} else if (ch == '%') {
return 1;
} else if (ch == '^') {
return 1;
} else if (ch == '&') {
return 1;
} else if (ch == '~') {
return 1;
} else if (ch == '`') {
return 1;
} else if (ch == '|') {
return 1;
} else if (ch == '?') {
return 1;
} else if (ch == '<') {
return 1;
} else if (ch == '>') {
return 1;
} else if (ch == '=') {
return 1;
} else {
return 0;
}
}
// addToHash (int i)
//
// This routine is passed an integer. It adds it into the running "hashVal".
// The "hashVal" is initialized before each file is processed. Every token is used to
// modify this value (by calling this routine) so that, after completely processing a file,\
// the resulting "hashVal" will be a psuedo-random number, dependent on exactly what tokens
// were in the file and what order they were in.
//
// The idea is that any change to the tokens will almost certainly alter the computed
// "hashVal". Comments and formatting changes, however, will not change the value. This
// will be used later to check to make sure that compiled versions of files are consistent
// and that the header files have not been changed. This technique is not fool-proof
// and should not be relied on; the programmer should make sure that header files are
// not changed between the several compilations that go into building an executable.
// Nevertheless, with this technique there is a reasonably good chance that a change
// to a header file will be detected and a runtime error can be signaled during
// program initialization.
//
void addToHash (int i) {
// printf (" Starting: 0x%08x\n", hashVal);
// printf (" i: 0x%08x (%d)\n", i, i);
// Rotate the current hashVal 1 bit to the left.
if (hashVal < 0) {
hashVal = (hashVal << 1) + 1;
} else {
hashVal = hashVal << 1;
}
// printf (" after <<: 0x%08x\n", hashVal);
// Add the current integer in by xor-ing it.
hashVal = hashVal ^ i;
// printf (" After xor: 0x%08x\n", hashVal);
// Next use the sequence number to add more info into the hashVal.
hashCount = hashCount + 1;
hashVal = hashVal ^ (hashCount * i);
// printf (" hashCount: 0x%08x\n", hashCount);
// printf (" hashCount*i: 0x%08x\n", hashCount*i);
// printf (" Ending: 0x%08x\n", hashVal);
}
// addTokenToHash (token)
//
// This routine is passed a token. It adds info about that token into the running
// hashVal computation.
//
void addTokenToHash (Token token) {
int i;
int * p;
String * str;
addToHash (token.type);
switch (token.type) {
case ID:
case OPERATOR:
case STRING_CONST:
str = token.value.svalue;
for (i=0; i<str->length; i++) {
addToHash (str->chars [i]);
}
return;
case CHAR_CONST:
case INT_CONST:
addToHash(token.value.ivalue);
return;
case DOUBLE_CONST:
p = (int *) (& token.value.rvalue);
addToHash(*p);
p++;
addToHash(*p);
return;
}
}
// createTokenPos () --> int
//
// This routine takes the following:
// currentInputFileIndex (FF = 0..255)
// currentLineOfToken (LLLL = 0..65535)
// currentCharPosOfToken (PP = 0..255)
// and combines them into one 32-bit integer, as follows:
// FFLLLLPP
//
int createTokenPos () {
return
((currentInputFileIndex & 0x000000ff) << 24) |
((currentLineOfToken & 0x0000ffff) << 8) |
(currentCharPosOfToken & 0x000000ff);
}
// extractLineNumber (token) --> int
//
// Given a token, this routine returns the line number.
//
// This routine is not reliable for files of size >= 65535.
//
int extractLineNumber (Token token) {
int i = (token.tokenPos & 0x00ffff00) >> 8;
if (i >= 65535) {
return -1;
} else {
return i;
}
}
// extractCharPos (token) --> int
//
// Given a token, this routine returns the character position within the line.
//
int extractCharPos (Token token) {
int i = token.tokenPos & 0x000000ff;
if (i >= 255) {
return -1;
} else {
return i;
}
}
// extractFilename (token) --> char *
//
// Given a token, this routine returns the name of the file.
//
char * extractFilename (Token token) {
int i = (token.tokenPos >> 24) & 0x000000ff;
return inputFileNames [i];
}
// incrLineNumber ()
//
// This routine increments "currentLineOfToken". The largest value (65535) is sticky.
//
void incrLineNumber () {
if (currentLineOfToken >= 65535) {
currentLineOfToken = 65535;
} else {
currentLineOfToken++;
}
currentCharPosOfToken = 0;
return;
}
// getNextChar () --> char
//
// This routine reads and returns the next character from the input file.
// It also increments "currentCharPosOfToken". The largest value (255) is sticky.
//
char getNextChar () {
if (currentCharPosOfToken >= 255) {
currentCharPosOfToken = 255;
} else {
currentCharPosOfToken++;
}
return getc (inputFile);
}
// unGetChar (ch)
//
// This routine returns the given character to the input buffer. It also
// decrements "currentCharPosOfToken". The largest value (255) is sticky.
//
void unGetChar (char ch) {
if (currentCharPosOfToken >= 255) {
currentCharPosOfToken = 255;
} else {
currentCharPosOfToken--;
}
ungetc (ch, inputFile);
return;
}
// addToInputFileNames (filename)
//
// This routine is passed a filename. It addes it to the array "inputFileNames".
// The global variable "currentInputIndex" is:
// -1 = initial value
// 0 = first file
// 1 = second file
// ...
// 255 = max value, indicating too many files
// We keep an array of pointers to the file names. Within the token, we store only
// an 8 bit value, which is used to index into this array. This value can range from
// 0 to 255, but 255 is used to represent problems, and will be <filename not available>.
// The value 255 is sticky; note that MAX_INPUT_FILES = 255.
//
void addToInputFilenames (char * filename) {
// printf ("addToInputFilenames: %s\n", filename);
// printf ("currentInputFileIndex, before: %d\n", currentInputFileIndex);
if (currentInputFileIndex >= MAX_INPUT_FILES-1) {
fprintf (stderr, "%s:0: ***** LEXICAL ERROR: Maximum number of input files (%d) exceeded\n", filename, MAX_INPUT_FILES);
errorsDetected++;
currentInputFileIndex = MAX_INPUT_FILES;
inputFileNames [currentInputFileIndex] = "<filename NOT available>";
} else {
currentInputFileIndex++;
inputFileNames [currentInputFileIndex] = filename;
}
// printf ("currentInputFileIndex, after: %d\n", currentInputFileIndex);
}