ftp.ee.pdx.edu

home *** CD-ROM | disk | FTP | other *** search

/ ftp.ee.pdx.edu / 2014.02.ftp.ee.pdx.edu.tar / ftp.ee.pdx.edu / pub / users / Harry / Blitz / BlitzSrc / main.cc < prev next >

Wrap

C/C++ Source or Header | 2007-09-19 | 51KB | 1,632 lines

// main.cc -- Driver routine for the compiler; basic control routines // // KPL Compiler // // Copyright 2002-2007, Harry H. Porter III // // This file may be freely copied, modified and compiled, on the sole // condition that if you modify it... // (1) Your name and the date of modification is added to this comment // under "Modifications by", and // (2) Your name and the date of modification is added to the printHelp() // routine under "Modifications by". // // Original Author: // 06/15/02 - Harry H. Porter III // // Modifcations by: // 03/15/06 - Harry H. Porter III // #include <signal.h> #include "main.h" //--------------- Global Variables --------------- TokenValue currentTokenValue; // Used in lexer only Token tokenMinusOne, token, token2, token3, token4, token5; int currentInputFileIndex = -1; // These describe the current position in the file int currentLineOfToken; // . int currentCharPosOfToken; // . int posOfNextToken; // Position of the next token int eofCount; // Used to check for looping on EOF char * inputFileNames [MAX_INPUT_FILES+1]; // Array of ptrs to file names int errorsDetected; // Count of errors detected so far int tokenPosOfLastError; // Used to suppress extraneous syntax errors int hashVal = 0; // The running hash code for this file int hashCount = 0; // Used in computing the hashVal char * commandPackageName = NULL; // The package name, NULL = missing char * commandDirectoryName; // The search directory name, NULL = missing char * headerFileName = NULL; // The header file name, NULL = missing char * codeFileName = NULL; // The code file name, NULL = missing char * outputFileName = NULL; // The .s filename, NULL = missing FILE * inputFile; // The input file, e.g., stdin FILE * outputFile; // The output file, e.g., stdout int commandOptionS = 0; // True: print the symbol table int commandOptionP = 0; // True: pretty-print the AST int commandOptionAST = 0; // True: print the full AST int commandOptionTestLexer = 0; // True: stop after lexer & print tokens int commandOptionTestParser = 0; // True: stop after parser & print AST int safe = 1; // True: only allow safe constructs Header * headerList = NULL; // List of all headers Header * headerListLast = NULL; // . Mapping <String, Header> * // Strings --> Headers headerMapping = NULL; // . Code * code = NULL; // The thing being compiled Header * mainHeader = NULL; // The corresponding header Interface * tempInterList = NULL; // Used in topoProcessInterfaces ClassDef * tempClassList = NULL; // Used in topoProcessClasses int changed = 0; // Used in assignOffsets int quo = 0; // Used in integer division int rem = 0; // . ClassDef * currentClass = NULL; // The class we are currently processing Header * currentHeader = NULL; // The header we are currently processing int recursionCounter = 0; // Used to detect recursive types IntConst * memoryStart; // Used to compute memory usage by compiler IR * firstInstruction = NULL; // List of IR instructions IR * lastInstruction = NULL; // . int maxArgBytesSoFar = -1; // Used in setting fun/meth->maxArgBytes DoubleConst * floatList = NULL; // Used during code gen StringConst * stringList = NULL; // Used during code gen MethOrFunction * currentFunOrMeth = NULL; // Used during code gen Offset * firstDispatchOffset = NULL; // Ptr to linked list: 4,8,12,16,... // The String Table, for all Strings and IDs. String * stringTableIndex [STRING_TABLE_HASH_SIZE]; char buffer [BUFF_LEN]; // Misc. use, e.g., "_Person__Constructor" // These are initialized in "initializeConstants"... String * stringUnaryBang; String * stringUnaryStar; String * stringUnaryAmp; String * stringUnaryMinus; String * stringPlus; String * stringMinus; String * stringStar; String * stringSlash; String * stringPercent; String * stringBar; String * stringCaret; String * stringAmp; String * stringBarBar; String * stringAmpAmp; String * stringEqualEqual; String * stringNotEqual; String * stringLess; String * stringLessEqual; String * stringGreater; String * stringGreaterEqual; String * stringLessLess; String * stringGreaterGreater; String * stringGreaterGreaterGreater; String * stringIntToDouble; String * stringDoubleToInt; String * stringIntToChar; String * stringCharToInt; String * stringPtrToBool; String * stringPosInf; String * stringNegInf; String * stringNegZero; String * stringIIsZero; String * stringINotZero; String * stringObject; String * stringMain; CharType * basicCharType; IntType * basicIntType; DoubleType * basicDoubleType; BoolType * basicBoolType; VoidType * basicVoidType; TypeOfNullType * basicTypeOfNullType; AnyType * basicAnyType; PtrType * basicCharArrayPtrType; PtrType * basicVoidPtrType; PtrType * basicAnyPtrType; IntConst * constantIntZero; IntConst * constantIntOne; IntConst * constantIntMinusOne; BoolConst * constantFalse; BoolConst * constantTrue; /***** DoubleExpr * constantDoubleZero; DoubleExpr * constantDoubleOne; CharExpr * constantCharNull; BoolExpr * constantFalse; BoolExpr * constantTrue; NullExpr * constantNull; String * stringUninitialized; String * stringGenericDestructor; String * stringThis; Quad * firstQuad; Quad * lastQuad; StringExpr * stringList; DoubleExpr * floatList; AstNode * currentMethOrFun; Class * currentClass; *****/ // main (argc, argv) // // The compiler main. // main (int argc, char ** argv) { Expression * expr; Header * hdr, * newHeader; Uses * uses; String * newPackName; AstNode * ast; int len; char * fileName; Type * t; int saveSafe; int wantProgress = 0; /***** Function * function; Method * meth; Class * cl; Function * fun; Expression * exp; Statement * stmtList; Type * type; TypeParm * templateParms; int isLone = 0; String *s1, *s2, *s3, *s4, *s5, *s6, *s7, *s8, *s9; Decl * decl; *****/ memoryStart = new IntConst (); // printf ("Starting memory address = 0x%08x\n", (int) memoryStart); errorsDetected = 0; tokenPosOfLastError = -1; // FF=255, LLLL=65535, PP=255 initKeywords (); checkHostCompatibility (); // Debugging: test the "Mapping" class... // testMapping (); processCommandLine (argc, argv); initializeConstants (); // // print various global variables and command line options. // printf ("safe = %d\n", safe); // printf ("commandOptionTestLexer = %d\n", commandOptionTestLexer); // printf ("commandOptionTestParser = %d\n", commandOptionTestParser); // if (commandPackageName == NULL) { // printf ("commandPackageName = NULL\n"); // } else { // printf ("commandPackageName = %s\n", commandPackageName); // } // if (commandDirectoryName == NULL) { // printf ("commandDirectoryName = NULL\n"); // } else { // printf ("commandDirectoryName = %s\n", commandDirectoryName); // } // if (headerFileName == NULL) { // printf ("headerFileName = NULL\n"); // } else { // printf ("headerFileName = %s\n", headerFileName); // } // if (codeFileName == NULL) { // printf ("codeFileName = NULL\n"); // } else { // printf ("codeFileName = %s\n", codeFileName); // } // if (outputFileName == NULL) { // printf ("outputFileName = NULL\n"); // } else { // printf ("outputFileName = %s\n", outputFileName); // } // If -testLexer command line option present... if (commandOptionTestLexer) { printf("file-name\tline\tchar\ttoken-type\tother-info\n"); printf("=========\t====\t====\t==========\t==========\n"); // initScanner ("temp1.h"); // testLexer (); // initScanner ("temp2.h"); // testLexer (); // initScanner ("temp3.h"); // testLexer (); if (initScanner (headerFileName) != NULL) { testLexer (); } terminateCompiler (); } /***** printf ("INPUT FILE NAMES\n"); printf ("================\n"); for (int i=0; i<=currentInputFileIndex; i++) { printf ("%d: %s\n", i, inputFileNames [i]); } terminateCompiler (); *****/ headerMapping = new Mapping<String, Header> (15, NULL); // If -testParser command line option present... if (commandOptionTestParser) { if (initScanner (headerFileName)) { if (token.type == FUNCTION) { ast = parseFunction (1); // 1=expecting ID } else if (token.type == METHOD) { ast = parseMethod (); } else if (token.type == HEADER) { ast = parseHeader (); } else if (token.type == CODE) { ast = parseCode (); } else if (token.type == DO) { scan (); ast = parseExpr ("Expecting test expression after 'do'"); } if (commandOptionAST) { printAst (6, ast); } if (commandOptionP) { printf ("======== PRETTY PRINT ========\n"); pretty (ast); printf ("\n================================\n"); } } terminateCompiler (); } // Make sure the user specified a package name on the command line. if (commandPackageName == NULL) { fprintf (stderr, "***** ERROR: Missing package name on command line\n"); errorsDetected++; terminateCompiler (); } if ((codeFileName == NULL) || (headerFileName == NULL)) { programLogicError ("File names should have been created from package name"); } if (wantProgress) printf ("Parsing...\n"); // Parse the code file... if (initScanner (codeFileName)) { code = parseCode (); // printf ("\n================ CODE ================\n"); // printAst (6, code); // pretty (code); // printf ("================================\n"); } else { code = NULL; } // Parse the main header file... fileName = initScanner (headerFileName); if (fileName != NULL) { // mainHeader = parseHeader (commandPackageName); mainHeader = parseHeader (); headerList = mainHeader; headerListLast = mainHeader; if (mainHeader) { // headerMapping->enter (mainHeader->packageName, mainHeader); headerMapping->enter (lookupAndAdd (commandPackageName, ID), mainHeader); } } else { mainHeader = NULL; headerList = NULL; headerListLast = NULL; } // Run through the headerList. For every header on it, see which // packages it uses. For each, if it has not already been parsed // then parse it and add it to the headerMapping. hdr = headerList; while (hdr != NULL) { // Look at the next "hdr"... // Run through the list of packages this "hdr" uses... uses = hdr->uses; while (uses != NULL) { newPackName = uses->id; // If this package has not been seen before... if (! headerMapping->alreadyDefined (newPackName)) { // Figure out the name of the .h header file... len = strlen (newPackName->chars); fileName = (char *) calloc (1, len + 4); strcpy (fileName, newPackName->chars); fileName [len] = '.'; fileName [len+1] = 'h'; fileName [len+2] = '\0'; fileName = initScanner (fileName); if (fileName != NULL) { // Parse the new package... // newHeader = parseHeader (newPackName->chars); newHeader = parseHeader (); if (newHeader != NULL) { // Add the new package to headerMapping... // (NOTE: the keys will not contain any "-d" directory prefixes.) // printf ("ADDING TO HEADERMAPPING: filename=%s, newHeader=%s\n", // newPackName->chars, newHeader->packageName->chars); headerMapping->enter (newPackName, newHeader); // Add the new package to the end of headerList... headerListLast->next = newHeader; headerListLast = newHeader; } } } uses = uses->next; } // Move to next header on the headerList hdr = hdr->next; } // printAllData (); // dump ("After parsing..."); // Check for package-uses circularity and re-order all packages... if (wantProgress) printf ("Finding a topological-sort order for packages...\n"); topoProcessAllPackages (); // Must stop here if errors, since one error is cyclic package-use. From // here on out, we're assuming our super-packages have already been done first. if (errorsDetected) { terminateCompiler (); } saveSafe = safe; // Run through all headers and process each in turn... for (hdr=headerList; hdr!=NULL; hdr=hdr->next) { if (wantProgress) printf ("Processing package \"%s\"...\n", hdr->packageName->chars); currentHeader = hdr; if (currentHeader == mainHeader) { safe = saveSafe; } else { safe = 0; // Allow unsafe constructs in any header we are using. } // Build the symbol tables... if (wantProgress) printf (" Building symbol tables...\n"); buildPackageMapping (hdr); // Topo-process the interfaces... if (wantProgress) printf (" Topologically sorting interfaces...\n"); topoProcessInterfaces (hdr); // Topo-process the classes... if (wantProgress) printf (" Topologically sorting classes...\n"); topoProcessClasses (hdr); // Bind Types... if (wantProgress) printf (" Binding types...\n"); bindTypeNames (hdr, NULL); // Check TypeDef circularity... if (wantProgress) printf (" Checking type def circularity...\n"); checkTypeDefCircularity (hdr); // Inherit Class Fields... if (wantProgress) printf (" Inheriting class fields...\n"); inheritFields (hdr); // Inherit MethodProtos... if (wantProgress) printf (" Inheriting method prototypes...\n"); inheritMethodProtos (hdr); // Inherit messages for interface "extends" hierarchy... if (wantProgress) printf (" Inheriting messages in interfaces...\n"); inheritMessages (hdr); // Bind variable names... if (wantProgress) printf (" Binding variable names...\n"); bindVarNames (hdr, NULL); // Assign offsets and evaluate expressions... if (wantProgress) printf (" Assigning offsets and evaluating static expressions...\n"); assignOffsetsAndEvalExprs (hdr); // The subtype test is operative after this point. // testSubType (hdr); // break; // Check class-interface "implements" hierarchy... if (wantProgress) printf (" Checking class/interface implements...\n"); checkImplements (hdr); // Check method prototypes... if (wantProgress) printf (" Checking method prototypes...\n"); checkMethodProtos (hdr); // Check interface extends... if (wantProgress) printf (" Checking interface extends...\n"); checkExtends (hdr); // printf ("=================== Before checkTypes =========================\n"); // printAst (6, hdr); // printf ("===============================================================\n"); // Check types... if (wantProgress) printf (" Checking types...\n"); t = checkTypes (hdr); // Evaluate expressions again; to handle anything introduced by checkTypes... if (wantProgress) printf (" Evaluating static expressions...\n"); changed = 1; while (changed) { changed = 0; evalExprsIn (hdr); // The test called "bizarre" will print this message... // if (changed) { // printf ("LOOKS LIKE WE NEEDED A SECOND PASS!!!\n"); // } } // Make sure the recursionCounter is OK... if (recursionCounter != 0) { printf ("recursionCounter = %d\n", recursionCounter); programLogicError ("recursionCounter not incremented and decremented equally"); } // Check flow of control... if (wantProgress) printf (" Checking flow of control...\n"); fallsThru (hdr); // Assign Dispatch Table Offsets... if (wantProgress) printf (" Assigning Dispatch Table Offsets...\n"); assignDispatchTableOffsets (hdr); // Assign offsets to locals and parms in functions and closures... // printf (" Asigning offsets to local variables and parameters...\n"); // assignLocalOffsets (hdr); // printf ("\n========== HERE IS THE PACKAGE WE JUST FINISHED ==========\n"); // hdr->prettyPrint (4); // printf ("\n============================================================\n"); } safe = saveSafe; // Make sure the recursionCounter is OK... if (recursionCounter != 0) { printf ("recursionCounter = %d\n", recursionCounter); programLogicError ("recursionCounter not incremented and decremented equally"); } // Stop here if errors... if (commandOptionAST) { // && errorsDetected) { printAllData (); } if (commandOptionP) { // && errorsDetected) { dump ("After semantic processing..."); } if (errorsDetected) { terminateCompiler (); } // Generating IR code... if (wantProgress) printf (" Generating IR code...\n"); generateIR (); // Assign offsets to locals and parms in functions and closures... if (wantProgress) printf (" Asigning offsets to local variables and parameters...\n"); assignLocalOffsets (mainHeader); // Print the IR code... if (wantProgress) printf (" Writing .s file...\n"); printIR (); // Print the full AST in all detail, if requested... if (commandOptionAST) { printAllData (); } // Pretty-print the AST, if requested... if (commandOptionP) { dump ("After code generation..."); } terminateCompiler (); } // initializeConstants () // // This routine initializes various constants that will be used during the // compilation. // void initializeConstants () { ArrayType * arrayType; // The nodes created here will be positioned at "(null):0:" token.value.svalue = NULL; token.tokenPos = 0; stringUnaryBang = lookupAndAdd ("_prefix_!", OPERATOR); stringUnaryStar = lookupAndAdd ("_prefix_*", OPERATOR); stringUnaryAmp = lookupAndAdd ("_prefix_&", OPERATOR); stringUnaryMinus = lookupAndAdd ("_prefix_-", OPERATOR); stringPlus = lookupAndAdd ("+", OPERATOR); stringMinus = lookupAndAdd ("-", OPERATOR); stringStar = lookupAndAdd ("*", OPERATOR); stringSlash = lookupAndAdd ("/", OPERATOR); stringPercent = lookupAndAdd ("%", OPERATOR); stringBar = lookupAndAdd ("|", OPERATOR); stringCaret = lookupAndAdd ("^", OPERATOR); stringAmp = lookupAndAdd ("&", OPERATOR); stringBarBar = lookupAndAdd ("||", OPERATOR); stringAmpAmp = lookupAndAdd ("&&", OPERATOR); stringEqualEqual = lookupAndAdd ("==", OPERATOR); stringNotEqual = lookupAndAdd ("!=", OPERATOR); stringLess = lookupAndAdd ("<", OPERATOR); stringLessEqual = lookupAndAdd ("<=", OPERATOR); stringGreater = lookupAndAdd (">", OPERATOR); stringGreaterEqual = lookupAndAdd (">=", OPERATOR); stringLessLess = lookupAndAdd ("<<", OPERATOR); stringGreaterGreater = lookupAndAdd (">>", OPERATOR); stringGreaterGreaterGreater = lookupAndAdd (">>>", OPERATOR); stringIntToDouble = lookupAndAdd ("intToDouble", ID); stringDoubleToInt = lookupAndAdd ("doubleToInt", ID); stringIntToChar = lookupAndAdd ("intToChar", ID); stringCharToInt = lookupAndAdd ("charToInt", ID); stringPtrToBool = lookupAndAdd ("ptrToBool", ID); stringPosInf = lookupAndAdd ("posInf", ID); stringNegInf = lookupAndAdd ("negInf", ID); stringNegZero = lookupAndAdd ("negZero", ID); stringIIsZero = lookupAndAdd ("iIsZero", ID); // hidden function stringINotZero = lookupAndAdd ("iNotZero", ID); // hidden function stringObject = lookupAndAdd ("Object", ID); stringMain = lookupAndAdd ("main", ID); // Initialize nodes corresponding to basic types... token.type = CHAR; basicCharType = new CharType (); token.type = INT; basicIntType = new IntType (); token.type = DOUBLE; basicDoubleType = new DoubleType (); token.type = BOOL; basicBoolType = new BoolType (); token.type = VOID; basicVoidType = new VoidType (); token.type = TYPE_OF_NULL; basicTypeOfNullType = new TypeOfNullType (); token.type = ANY_TYPE; basicAnyType = new AnyType (); token.type = ARRAY; // array [*] of char arrayType = new ArrayType (); arrayType->baseType = basicCharType; arrayType->sizeOfElements = 1; token.type = PTR; // ptr to array [*] of char basicCharArrayPtrType = new PtrType (); basicCharArrayPtrType->baseType = arrayType; token.type = PTR; // ptr to void basicVoidPtrType = new PtrType (); basicVoidPtrType->baseType = basicVoidType; token.type = PTR; // ptr to anyType basicAnyPtrType = new PtrType (); basicAnyPtrType->baseType = basicAnyType; /***** printf ("========== Basic Type Nodes ==========\n"); printf ("basicCharType = "); pretty (basicCharType); printf ("basicIntType = "); pretty (basicIntType); printf ("basicDoubleType = "); pretty (basicDoubleType); printf ("basicBoolType = "); pretty (basicBoolType); printf ("basicVoidType = "); pretty (basicVoidType); printf ("basicTypeOfNullType = "); pretty (basicTypeOfNullType); printf ("basicCharArrayPtrType = "); pretty (basicCharArrayPtrType); printf ("basicVoidPtrType = "); pretty (basicVoidPtrType); printf ("basicAnyType = "); pretty (basicAnyType); printf ("========================================\n"); error (basicVoidPtrType, "Testing..."); *****/ // Some functions and messages are "primitives". Examples include "+" and // "doubleToInt". Mark these symbols so that they can be easily tested later. stringUnaryBang->primitiveSymbol = UNARY_BANG; stringUnaryStar->primitiveSymbol = UNARY_STAR; stringUnaryAmp->primitiveSymbol = UNARY_AMP; stringUnaryMinus->primitiveSymbol = UNARY_MINUS; stringPlus->primitiveSymbol = PLUS; stringMinus->primitiveSymbol = MINUS; stringStar->primitiveSymbol = STAR; stringSlash->primitiveSymbol = SLASH; stringPercent->primitiveSymbol = PERCENT; stringBar->primitiveSymbol = BAR; stringCaret->primitiveSymbol = CARET; stringAmp->primitiveSymbol = AMP; stringBarBar->primitiveSymbol = BAR_BAR; stringAmpAmp->primitiveSymbol = AMP_AMP; stringEqualEqual->primitiveSymbol = EQUAL_EQUAL; stringNotEqual->primitiveSymbol = NOT_EQUAL; stringLess->primitiveSymbol = LESS; stringLessEqual->primitiveSymbol = LESS_EQUAL; stringGreater->primitiveSymbol = GREATER; stringGreaterEqual->primitiveSymbol = GREATER_EQUAL; stringLessLess->primitiveSymbol = LESS_LESS; stringGreaterGreater->primitiveSymbol = GREATER_GREATER; stringGreaterGreaterGreater->primitiveSymbol = GREATER_GREATER_GREATER; stringIntToDouble->primitiveSymbol = INT_TO_DOUBLE; stringDoubleToInt->primitiveSymbol = DOUBLE_TO_INT; stringIntToChar->primitiveSymbol = INT_TO_CHAR; stringCharToInt->primitiveSymbol = CHAR_TO_INT; stringPtrToBool->primitiveSymbol = PTR_TO_BOOL; stringPosInf->primitiveSymbol = POS_INF; stringNegInf->primitiveSymbol = NEG_INF; stringNegZero->primitiveSymbol = NEG_ZERO; stringIIsZero->primitiveSymbol = I_IS_ZERO; stringINotZero->primitiveSymbol = I_NOT_ZERO; // Set up some constants... constantIntZero = new IntConst (); constantIntOne = new IntConst (); constantIntOne->ivalue = 1; constantIntMinusOne = new IntConst (); constantIntMinusOne->ivalue = -1; constantFalse = new BoolConst (0); constantTrue = new BoolConst (1); /***** constantDoubleZero = new DoubleExpr (); constantDoubleOne = new DoubleExpr (); constantDoubleOne->rvalue = 1.0; constantCharNull = new CharExpr (); constantNull = new NullExpr (); stringUninitialized = lookupAndAdd ("<uninitialized string>", ID); stringGenericDestructor = lookupAndAdd ("_Generic_Destructor", ID); stringThis = lookupAndAdd ("_this", ID); *****/ } // printAllData () // // This routine prints out all the data structures using printAst(). // in full and gory detail. // void printAllData () { fflush (stdout); printf ("\n================ HEADER LIST ================\n"); printAst (6, headerList); printf ("================================\n"); // printf ("\n================ CODE ================\n"); // printAst (6, code); // printf ("================================\n"); /***** printf ("constantIntZero:\n"); printAst (6, constantIntZero); printf ("constantIntOne:\n"); printAst (6, constantIntOne); printf ("constantIntMinusOne:\n"); printAst (6, constantIntMinusOne); printf ("constantDoubleZero:\n"); printAst (6, constantDoubleZero); printf ("constantDoubleOne:\n"); printAst (6, constantDoubleOne); printf ("constantCharNull:\n"); printAst (6, constantCharNull); printf ("constantFalse:\n"); printAst (6, constantFalse); printf ("constantTrue:\n"); printAst (6, constantTrue); printf ("constantNull:\n"); printAst (6, constantNull); printf ("basicTypeInt:\n"); printAst (6, basicTypeInt); printf ("basicTypeDouble:\n"); printAst (6, basicTypeDouble); printf ("basicTypeChar:\n"); printAst (6, basicTypeChar); printf ("basicTypeBool:\n"); printAst (6, basicTypeBool); printf ("basicTypeVoid:\n"); printAst (6, basicTypeVoid); printf ("basicTypeNull:\n"); printAst (6, basicTypeNull); printf ("basicTypeCharPtr:\n"); printAst (6, basicTypeCharPtr); printf ("basicTypeVoidPtr:\n"); printAst (6, basicTypeVoidPtr); *****/ fflush (stdout); } // dump (message) // // This routine prints out various data structures, using "prettyPrinting". // void dump (char * message) { Header * hdr; fflush (stdout); printf ("\n***************\n"); printf ("** **\n"); printf ("** DUMPING ** %s\n", message); printf ("** **\n"); printf ("***************\n\n"); // Print out the headerList... hdr = headerList; printf ("\n================ HEADER LIST ================\n"); while (hdr) { hdr->prettyPrint (4); // if (hdr->packageMapping) { // hdr->packageMapping->print (4); // } printf ("================================\n"); hdr = hdr->next; } /***** if (code) { printf ("\n================ CODE ================\n"); code->prettyPrint (4); printf ("================================\n"); } *****/ // Print out the headerMapping... // printf ("\n\n"); // headerMapping->print (0); fflush (stdout); } // testLexer () // // This routine can be used to test the lexer portion of the compiler. // void testLexer () { while (1) { printToken (token); /***** printf ("\t\t\t"); printToken (token2); printf ("\t\t\t\t"); printToken (token3); printf ("\t\t\t\t\t"); printToken (token4); printf ("\t\t\t\t\t\t"); printToken (token5); *****/ if (token.type == EOF) { break; } scan (); } // printStringTable (); } // printToken (token) // // This routine prints a token in a form such as... // 7 ID abc // 8 INT 1234 // 9 STRING_CONST "abc" // 10 CHAR_CONST 0x61 97 'a' // 11 WHILE // void printToken (Token token) { int i; printf("%s\t%d\t%d\t%s", extractFilename (token), extractLineNumber (token), extractCharPos (token), symbolName (token.type)); switch (token.type) { case ID: printf("\t"); printString (stdout, token.value.svalue); break; case OPERATOR: printf("\t"); printString (stdout, token.value.svalue); break; case STRING_CONST: printf("\t\""); printString (stdout, token.value.svalue); printf("\""); break; case CHAR_CONST: i = token.value.ivalue; printf("\t%02x\t%d", i, i); if ((i >= ' ') && (i <= '~')) { printf("\t\'%c\'", i); } break; case INT_CONST: printf("\t0x%08x\t%d", token.value.ivalue, token.value.ivalue); break; case DOUBLE_CONST: printf("\t%.16g", token.value.rvalue); break; } printf("\n"); } // programLogicError (msg) // // This routine prints the message and terminates the compiler. // void programLogicError (char * msg) { fprintf (stderr, "********************************************************************\n" "*****\n" "***** PROGRAM LOGIC ERROR\n" "*****\n" "***** It appears that this compiler contains a software bug.\n" "***** I apologize for the inconvenience it causes you.\n" "*****\n" "***** Error message: \"%s\"\n" "*****\n" "********************************************************************\n", msg); errorsDetected++; terminateCompiler (); } // terminateCompiler () // // Print out the number of errors (if any) and terminate of the compiler. If errors, then // remove the output file (if any). If no errors, the close the output file normally. // void terminateCompiler () { // fprintf (stderr, // "Estimated memory usage = %d bytes\n", // ((int) new IntConst)- ((int) memoryStart)); if (errorsDetected == 0) { if (outputFileName != NULL) { fclose (outputFile); } // fprintf (stderr, "\n********** Normal exit **********\n"); exit (0); } else if (errorsDetected == 1) { fprintf (stderr, "\n********** 1 error detected! **********\n"); } else { fprintf (stderr, "\n********** %d errors detected! **********\n", errorsDetected); } if (outputFileName != NULL) { fclose (outputFile); remove (outputFileName); } exit (1); } // fatalError (msg) // // This routine is called to print an error message and the current line // number of the curent token. It aborts the compiler. // void fatalError (char *msg) { errorsDetected++; doMessage (token, "***** FATAL ERROR", msg); terminateCompiler (); } // error (node, msg) // // This routine is called to print an error message. It returns; it // does not terminate the program after printing. The "node" parameter // is used to print additional information about the position of the error. // void error (AstNode * node, char * msg) { errorsDetected++; doMessage (node->tokn, "***** ERROR", msg); } // error2 (node, msg) // // This routine is called to print an error message. It returns; it // does not terminate the program after printing. The "node" parameter // is used to print additional information about the position of the error. // It differs from "error()" in that it does not print "***** ERRROR"; it is // used to print additional info after the initial error message. // void error2 (AstNode * node, char * msg) { // errorsDetected++; doMessage (node->tokn, " ", msg); } // syntaxError (msg) // // This routine is called to print a syntax error message. // // This routine returns; it does not terminate the compiler after printing. // // It uses the current token to print additional information about the // position of the error. // void syntaxError (char * msg) { syntaxErrorWithToken (token, msg); } // syntaxErrorWithToken (tok, msg) // // This routine is called to do the work of printing a syntax error message, // position on 'tok'. // void syntaxErrorWithToken (Token tok, char * msg) { // If the last message was on this token, then suppress this message. if (tok.tokenPos != tokenPosOfLastError) { errorsDetected++; doMessage (tok, "***** SYNTAX ERROR", msg); } tokenPosOfLastError = tok.tokenPos; } // doMessage (tok, prefix, msg) // // Print info about the current token and the given "msg". // void doMessage (Token tok, char * prefix, char * msg) { fprintf (stderr, "%s:%d: %s at ", extractFilename (tok), extractLineNumber (tok), prefix); switch (tok.type) { case ID: fprintf (stderr, "\'"); printString (stderr, tok.value.svalue); fprintf (stderr, "\'"); break; case STRING_CONST: fprintf (stderr, "\""); printString (stderr, tok.value.svalue); fprintf (stderr, "\""); break; case CHAR_CONST: fprintf (stderr, "\'"); printChar (stderr, tok.value.ivalue); fprintf (stderr, "\'"); break; case INT_CONST: fprintf (stderr, "\'%d\'", tok.value.ivalue); break; case DOUBLE_CONST: fprintf (stderr, "%.16g", tok.value.rvalue); break; case OPERATOR: fprintf (stderr, "\""); printString (stderr, tok.value.svalue); fprintf (stderr, "\""); break; default: fprintf (stderr, "%s", symbolName (tok.type)); } fprintf (stderr, ": %s\n", msg); fflush (stderr); if (errorsDetected >= MAX_NUMBER_OF_ERRORS) { fprintf (stderr, "%s:%d: ***** Too many errors - I'm giving up\n", extractFilename (tok), extractLineNumber (tok)); terminateCompiler (); } } // errorWithType (msg, type) // // This routine is called to print an error message. It returns; it does not // terminate the program unless we've had too many errors. // // The "type" parameter is printed after the message. For example, if msg is // "The expected type is" // the following might get printed: // test.c:26: The expected type is: ptr to array [*] of char // // This routine calls "resolveNamedType" so it prints out the underlying // type, getting rid of aliases. // void errorWithType (char * msg, Type * type) { Token tok; if (type == NULL) { tok.tokenPos = 0; } else { tok = type->tokn; } fprintf (stderr, "%s:%d: %s: ", extractFilename (tok), extractLineNumber (tok), msg); fpretty (type); fprintf (stderr, "\n"); fflush (stderr); // errorsDetected++; // if (errorsDetected >= MAX_NUMBER_OF_ERRORS) { // fprintf (stderr, "%s:%d: ***** Too many errors - I'm giving up\n", // extractFilename (tok), // extractLineNumber (tok)); // terminateCompiler (); // } } // checkTokenSkipping (count) // // "count" is the number of tokens we just skipped over. If it exceeds a // threshhold, then print a message, using the current token as the position // of the message. Also, watch out for hitting EOF. // void checkTokenSkipping (int count) { if (count > TOKEN_SKIP_COUNT) { fprintf (stderr, "%s:%d: Skipping %d tokens...\n", extractFilename (token), extractLineNumber (token), count); } if (token.type == EOF) { fprintf (stderr, "%s:%d: ***** SYNTAX ERROR: Unexpected EOF... aborting\n", extractFilename (token), extractLineNumber (token)); terminateCompiler (); } } // processCommandLine (argc, argv) // // This routine processes the command line options. // void processCommandLine (int argc, char ** argv) { int argCount; int badArgs = 0; int len; for (argc--, argv++; argc > 0; argc -= argCount, argv += argCount) { argCount = 1; // Scan the -h option if (!strcmp (*argv, "-h")) { printHelp (); exit (1); // Check for the -s option } else if (!strcmp (*argv, "-s")) { commandOptionS = 1; // Check for the -p option } else if (!strcmp (*argv, "-p")) { commandOptionP = 1; // Check for the -ast option } else if (!strcmp (*argv, "-ast")) { commandOptionAST = 1; // Check for the -testLexer option } else if (!strcmp (*argv, "-testLexer")) { commandOptionTestLexer = 1; // Check for the -testParser option } else if (!strcmp (*argv, "-testParser")) { commandOptionTestParser = 1; // Check for the -unsafe option } else if (!strcmp (*argv, "-unsafe")) { safe = 0; // Check for the -o option, which should be followed by a file name } else if (!strcmp (*argv, "-o")) { if (argc <= 1) { fprintf (stderr, "Expecting filename after -o option. Use -h for help display.\n"); badArgs = 1; } else { argCount++; if (outputFileName == NULL) { outputFileName = *(argv+1); } else { fprintf (stderr, "Invalid command line: Multiple output files. Use -h for help display.\n"); badArgs = 1; } } // Check for the search directory name } else if (!strcmp (*argv, "-d")) { if (argc <= 1) { fprintf (stderr, "Expecting search directory prefix after -d option. Use -h for help display.\n"); badArgs = 1; } else { argCount++; if (commandDirectoryName == NULL) { commandDirectoryName = *(argv+1); } else { fprintf (stderr, "Invalid command line: Multiple search directories. Use -h for help display.\n"); badArgs = 1; } } // Check for the package file name } else if ((*argv)[0] != '-') { if (commandPackageName == NULL) { commandPackageName = *argv; } else { fprintf (stderr, "Invalid command line: Multiple package names. Use -h for help display.\n"); badArgs = 1; } } else { fprintf (stderr, "Invalid command line option (%s). Use -h for help display.\n", *argv); badArgs = 1; } } // If command line problems, then abort now. if (badArgs) { exit (1); } // Figure out the name of the .h header file. if (commandPackageName != NULL) { len = strlen (commandPackageName); headerFileName = (char *) calloc (1, len + 4); strcpy (headerFileName, commandPackageName); headerFileName [len] = '.'; headerFileName [len+1] = 'h'; headerFileName [len+2] = '\0'; } // Figure out the name of the .c code file. if (commandPackageName != NULL) { codeFileName = (char *) calloc (1, len + 4); strcpy (codeFileName, commandPackageName); codeFileName [len] = '.'; codeFileName [len+1] = 'c'; codeFileName [len+2] = '\0'; } // Figure out the name of the .s output file. if (outputFileName == NULL) { if (commandPackageName != NULL) { outputFileName = (char *) calloc (1, len + 4); strcpy (outputFileName, commandPackageName); outputFileName [len] = '.'; outputFileName [len+1] = 's'; outputFileName [len+2] = '\0'; } } // Open the output (.s) file. if (outputFileName == NULL) { outputFile = stdout; } else { outputFile = fopen (outputFileName, "w"); if (outputFile == NULL) { fprintf (stderr, "File \"%s\" could not be opened for writing\n", outputFileName); exit (1); } } } // printHelp () // // This routine prints some documentation. It is invoked whenever // the -h option is used on the command line. // void printHelp () { printf ( "==============================\n" "===== =====\n" "===== The KPL Compiler =====\n" "===== =====\n" "==============================\n" "\n" "Copyright 2002-2007, Harry H. Porter III\n" "========================================\n" " Original Author:\n" " 06/15/02 - Harry H. Porter III\n" " Modifcations by:\n" " 03/15/06 - Harry H. Porter III\n" "\n" "Command Line Options\n" "====================\n" " Command line options may be given in any order.\n" " -h\n" " Print this help info. All other options are ignored.\n" " packageName\n" " Compile the package with this name. The input will come from the files\n" " called \"packageName.h\" and \"packageName.c\". No extension should be\n" " given on the command line. Only one package may be compiled at once.\n" " The packageName is required.\n" " -d directoryPrefix\n" " When looking for header and code files, the default is to look in the\n" " current directory. With this option, the current directory is first\n" " searched. If that fails, then the directoryPrefix is prepended to the\n" " file name and the resulting file name is used. For example:\n" " kpl myPack -d ~harry/BlitzLib/\n" " will first try to open \"myPack.h\" and, if that fails, will try to open\n" " \"~harry/BlitzLib/myPack.h\".\n" " -unsafe\n" " Allow unsafe language constructs.\n" " -o filename\n" " If there are no errors, an assembly code file will be created. This \n" " option can be used to give the output file a specific name. If \n" " missing, the name of the output file will be computed from the name of\n" " the package and appending \".s\". For example:\n" " myPackage --> myPackage.s\n" " COMPILER DEBUGGING: If packageName and output filename are missing,\n" " stdout will be used.\n" " -testLexer\n" " COMPILER DEBUGGING: Scan tokens only, and print tokens out. Input may\n" " come from stdin.\n" " -testParser\n" " COMPILER DEBUGGING: Parse program only, and print data structures out.\n" " Input may come from stdin.\n" " -s\n" " COMPILER DEBUGGING: Print the symbol table on stdout.\n" " -p\n" " COMPILER DEBUGGING: Pretty-print the AST.\n" " -ast\n" " COMPILER DEBUGGING: Dump the full AST.\n" ); } // checkHostCompatibility () // // This routine checks that the host implementation of C++ meets certain // requirements. // // (1) This routine checks that integers are represented using exactly 4 // bytes. // // (2) This routine checks that integers are stored in the expected // Big or Little Endian order. // // (3) This routine checks that integer overflow behavior is as expected // with two's complement arithmetic. // // (4) This routine checks that doubles are implemented using 8-bytes in // the IEEE standard, with the bytes in correct Big/Little Endian order. // // (5) This routine checks that the double->int conversion works as // expected. If this is not the case, then truncateToInt() will need to // be changed. // void checkHostCompatibility () { union fourBytes { char chars [4]; unsigned int i; } fourBytes; double d; char * p, * q; int i, i1, i2, i3; // Check that ints are in the expected Big/Little Endian order. fourBytes.chars[0] = 0x12; fourBytes.chars[1] = 0x34; fourBytes.chars[2] = 0x56; fourBytes.chars[3] = 0x78; if (SWAP_BYTES(fourBytes.i) != 0x12345678) { fatalError ("There is a big/little endian byte ordering problem."); } // Check that we have at least 4 bytes of precision. i = 0x00000001; i <<= 20; i <<= 10; i >>= 20; i >>= 10; if (i != 0x00000001) { fatalError ("This program only runs on computers with 4 byte integers - 1"); } // Check that we have no more than 4 bytes of precision. i = 0x00000001; i <<= 20; i <<= 13; // Some compilers treat <<33 as a nop! i >>= 20; i >>= 13; if (i != 0x00000000) { fatalError ("This program only runs on computers with 4 byte integers - 2"); } // Check that we have the expected overflow behavior for ints. i = -2147483647; i = i - 2; if (i != 2147483647) { fatalError ("This program only runs on computers with 4 byte integers - 3"); } // Check that doubles are represented as we expect. d = 123.456e37; p = (char *) &d; q = p; // If doubles are stored in Big Endian byte order.... if ((*p++ == '\x48') && (*p++ == '\x0d') && (*p++ == '\x06') && (*p++ == '\x3c') && (*p++ == '\xdb') && (*p++ == '\x93') && (*p++ == '\x27') && (*p++ == '\xcf')) { #ifdef BLITZ_HOST_IS_LITTLE_ENDIAN fatalError ("There is a big/little endian byte ordering problem with doubles - 1."); #endif // Else, if doubles are stored in Little Endian byte order... } else if ((*q++ == '\xcf') && (*q++ == '\x27') && (*q++ == '\x93') && (*q++ == '\xdb') && (*q++ == '\x3c') && (*q++ == '\x06') && (*q++ == '\x0d') && (*q++ == '\x48')) { #ifdef BLITZ_HOST_IS_LITTLE_ENDIAN #else fatalError ("There is a big/little endian byte ordering problem with doubles - 2."); #endif // Else, if doubles are stored in some other way... } else { fatalError ("The host implementation of 'double' is not what I expect."); } // There is variation in the way different hosts handle double->int conversion // when the double is too large to represent as an integer. When checking // we must do the conversion in two steps, since some compilers perform the // conversion at compile time, and will do the conversion differently than // the host machine. Truly appalling, isn't it! // On PPC, (int) 9e99 is 0x7fffffff // On PPC, (int) d is 0x7fffffff // On Intel, (int) 9e99 is 0x7fffffff // On Intel, (int) d is 0x80000000 // i = (int) 9e99; // printf ("(int) 9e99 is 0x%08x\n", i); d = 9e99; i = (int) d; // Note: ((int) 9e99 == 0 while ((int) d) == 2147483647)!!! // printf ("(int) d is 0x%08x\n", i); // Check that double->int conversion works as expected. d = 4.9; i1 = (int) d; d = -4.9; i2 = (int) d; d = -9e99; i3 = (int) d; if ((i1 != 4) || (i2 != -4) || (i3 != 0x80000000)) { printf ("%d %d %d %d\n", i1, i2, i3); fatalError ("The host implementation of double->int casting is not what I expect."); } } // appendStrings (char *, char *, char *) // // Allocate and a new char array and fill it in from the // characters in the strings. Return a pointer to it. // char * appendStrings (char * str1, char * str2, char * str3) { int len = strlen (str1) + strlen (str2) + strlen (str3); char * newStr, * to, * from ; newStr = (char *) calloc (1, len+1); to = newStr; for (from=str1; *from != 0; to++, from++) { *to = *from; } for (from=str2; *from != 0; to++, from++) { *to = *from; } for (from=str3; *from != 0; to++, from++) { *to = *from; } *to = 0; return newStr; } // divide (a, b) // // This routine is passed two integers ("a" and "b"). It divides a by b // to get a quotient ("q") and remainder ("r"), such that // // a = b*q + r // // Furthermore, the remainder follows the mathematical definition of the // "modulo" operator, namely that the remainder will have the same sign // as b and that // // 0 <= abs(r) < abs(b) // // Another way to look at this is that the quotient is the real quotient, // rounded down to the nearest integer. // // For example: // // a b q r a = b * q + r a/b rounded // == == == == ================= ==== ======= // 7 3 2 1 7 = 3 * 2 + 1 2.3 2 // -7 3 -3 2 -7 = 3 * -3 + 2 -2.3 -3 // 7 -3 -3 -2 7 = -3 * -3 + -2 -2.3 -3 // -7 -3 2 -1 -7 = -3 * 2 + -1 2.3 2 // // This routine modifies global variables "qqo" and "rem". If b=0 it // sets q and r to zero and returns immediately. // // With this definition of "q" and "r", overflow can and will occur in only // one situation. Assuming that we are using 32-bit signed integers, the // following inputs cause a problem... // a = -2147483648 // b = -1 // The mathematically correct answer is... // q = +2147483648 // r = 0 // Unfortunately, this value of q is not representable. The underlying // implementation of the C operators / and % will normally fail, and will // quietly return the wrong answer... // q = -2147483648 // r = 0 // This routine will simply return these incorrect values. // // The C language does not define the / and % operators precisely, but // only requires that a = b*q + r be true. This routine is designed to // return consistent, "correct" answers, regardless of the underlying // implementation of / and %. // // Typical variations in integer division are... // // (1) "r" is always non-negative. 0 <= r < abs(b) // "q" will be negative when either a or b (but not both) are negative. // a b q r a = b * q + r // == == == == ================= // 7 3 2 1 7 = 3 * 2 + 1 // -7 3 -3 2 -7 = 3 * -3 + 2 // 7 -3 -2 1 7 = -3 * -2 + 1 // -7 -3 3 2 -7 = -3 * 3 + 2 // // (2) Real division, rounded toward zero. // "q" = a/b, rounded toward zero. // "q" will be negative when either a or b (but not both) are negative. // The sign of "r" will be the same as the sign of "a". // a b q r a = b * q + r a/b rounded // == == == == ================= ==== ======= // 7 3 2 1 7 = 3 * 2 + 1 2.3 2 // -7 3 -2 -1 -7 = 3 * -2 + -1 -2.3 -2 // 7 -3 -2 1 7 = -3 * -2 + 1 -2.3 -2 // -7 -3 2 -1 -7 = -3 * 2 + -1 2.3 2 // // (3) Real division, rounded toward negative infinity. // "q" = a/b, rounded toward negative infinity. // This results in "r" being the mathematically correct "modulo". // "q" will be negative when either a or b (but not both) are negative. // "r" will be negative whenever "b" is negative. // // This routine implements option number (3). It works assuming that // the underlying C implementation uses options (1), (2), or (3). // // Overflow cannot occur in this routine, assuming 2's complement // representation of integers. // void divide (int a, int b) { if (b==0) { quo = rem = 0; return; } quo = a/b; rem = a%b; if (b>0) { if (rem<0) { quo--; // Overflow iff q=MIN; but then b=1 and r=0... can't be. rem = rem + b; // r is neg, b is pos; cannot overflow. } } else { if (rem>0) { quo--; // Overflow iff q=MIN; but then b=1 and r=0... can't be. rem = rem + b; // r is pos, b is neg; cannot overflow. } } } // truncateToInt (double) --> int // // This routine is passed a double; it returns an int by truncating the arg // to the next integal value toward zero. For example: // // 4.9 --> 4 // -4.9 --> -4 // 9e99 --> 2,147,483,647 // -9e99 --> -2,147,483,648 // int truncateToInt (double d) { return (int) (d); }