home *** CD-ROM | disk | FTP | other *** search
- /*
- * @(#)scan.c 1.5 10/6/87
- */
- #include "assert.h"
- #include "nodes.h"
- #include "tokens.h"
- #include "system.h"
- #include "MyParser.h"
- #include "keyword.h"
- #include "error.h"
- Token currentToken, nextToken;
- NodePtr currentTokenValue, nextTokenValue;
- extern NodePtr yylval;
-
- #define TNO -1
-
- FILE *inputFile;
- char *currentFileName = NULL;
- int currentLineNumber = 1;
- int nextLineNumber = 0;
- int needIncLineNumber = 1;
-
- static TokenBuffer nextTokenBuffer;
-
- static int
- ScanIllegal(), ScanLetter(), ScanColon(), ScanLparen(),
- ScanRparen(), ScanDigit(), ScanDot(), ScanStringquote(),
- ScanCharquote(), ScanComma(), ScanComment(), ScanOperator();
- ScanLsquare(), ScanRsquare(),
- ScanLcurly(), ScanRcurly(), ScanDollar(),
- ScanEof(), ScanWhite(), ScanNL();
-
- static void extend(tb)
- register TokenBufferPtr tb;
- {
- register char *newBuffer;
- register int currentLength;
- currentLength = tb->bufferEnd - tb->buffer;
-
- newBuffer = (char *) malloc((unsigned)2 * currentLength);
- bcopy(tb->buffer, newBuffer, currentLength);
- free(tb->buffer);
- tb->buffer = newBuffer;
- tb->fillPtr = tb->buffer + currentLength;
- tb->bufferEnd = tb->buffer + 2 * currentLength;
- currentLength = 2 * currentLength;
- }
-
- #define collect(tb, c) \
- if ((tb)->fillPtr >= (tb)->bufferEnd) { \
- extend(tb); \
- } \
- *(tb)->fillPtr++ = c;
-
- char *strdup(S)
- char *S;
- {
- register char *str;
- register int length;
- length = strlen(S) + 1;
- str = (char *) malloc ((unsigned)length);
- bcopy(S, str, length);
- return(str);
- }
-
- /* This is the scanner which presents a token stream to its caller. */
-
- static int nextChar = ' ';
- TokenBuffer lineBuffer;
- int currentPosition = 0;
- static int positionInLine = -1;
-
- static void readLine()
- {
- register int c;
- lineBuffer.fillPtr = lineBuffer.buffer;
- positionInLine = -1;
- do {
- c = getc(inputFile);
- collect(&lineBuffer, c);
- } while (c != EOF && c != '\n');
- collect(&lineBuffer, '\0');
- }
-
- #define MgetNextChar() {\
- register int c; \
- if (needIncLineNumber) { \
- nextLineNumber++; \
- readLine(); \
- } \
- c = lineBuffer.buffer[++positionInLine]; \
- if (c == EOF) nextChar = -1; \
- else { \
- nextChar = c; \
- if (c & 0xffffff80) ScanIllegal(); \
- needIncLineNumber = (c == '\n'); \
- } \
- }
-
- void getNextChar()
- {
- MgetNextChar();
- }
-
- typedef int CharClasses;
- /*
- * We are changing things so that operation names can be made up of strange
- * characters. Basically these things are now treated like identifiers, with
- * keywords and all. The legal operator characters are =!><|&+-/*#@?^~.
- */
- #define CILLEGAL 0
- #define CLETTER 1
- #define CCOLON 2
- #define CLPAREN 3
- #define CRPAREN 4
- #define CDIGIT 5
- #define CDOT 6
- #define CSTRINGQUOTE 7
- #define CCHARQUOTE 8
- #define CCOMMA 9
- #define CCOMMENT 10
- #define COPERATOR 11
- #define CLSQUARE 12
- #define CRSQUARE 13
- #define CLCURLY 14
- #define CRCURLY 15
- #define CDOLLAR 16
- #define CEOF 17
- #define CWHITE 18
- #define CNL 19
-
- /* This must come right before CharClasses since we use it to detect eof. */
- static CharClasses junk[1] = { CEOF };
- /* This has to be initialized */
- static CharClasses charClass[128] = {
- CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,
- CILLEGAL,CWHITE,CNL,CILLEGAL,CWHITE,CWHITE,CILLEGAL,CILLEGAL,
- CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,
- CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,CILLEGAL,
- CWHITE,COPERATOR,CSTRINGQUOTE,COPERATOR,CDOLLAR,CCOMMENT,COPERATOR,CCHARQUOTE,
- CLPAREN,CRPAREN,COPERATOR,COPERATOR,CCOMMA,COPERATOR,CDOT,COPERATOR,
- CDIGIT,CDIGIT,CDIGIT,CDIGIT,CDIGIT,CDIGIT,CDIGIT,CDIGIT,
- CDIGIT,CDIGIT,CCOLON,CILLEGAL,COPERATOR,COPERATOR,COPERATOR,COPERATOR,
- COPERATOR,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
- CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
- CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
- CLETTER,CLETTER,CLETTER,CLSQUARE,CILLEGAL,CRSQUARE,COPERATOR,CLETTER,
- CILLEGAL,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
- CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
- CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,CLETTER,
- CLETTER,CLETTER,CLETTER,CLCURLY,COPERATOR,CRCURLY,COPERATOR,CILLEGAL
- };
-
- static int (*charRoutines[])() = {
- /* CILLEGAL */ ScanIllegal,
- /* CLETTER */ ScanLetter,
- /* CCOLON */ ScanColon,
- /* CLPAREN */ ScanLparen,
- /* CRPAREN */ ScanRparen,
- /* CDIGIT */ ScanDigit,
- /* CDOT */ ScanDot,
- /* CSTRINGQUOTE */ ScanStringquote,
- /* CCHARQUOTE */ ScanCharquote,
- /* CCOMMA */ ScanComma,
- /* CCOMMENT */ ScanComment,
- /* COPERATOR */ ScanOperator,
- /* CLSQUARE */ ScanLsquare,
- /* CRSQUARE */ ScanRsquare,
- /* CLCURLY */ ScanLcurly,
- /* CRCURLY */ ScanRcurly,
- /* CDOLLAR */ ScanDollar,
- /* CEOF */ ScanEof,
- /* CWHITE */ ScanWhite,
- /* CNL */ ScanNL };
-
- void Scanner_Accept()
- {
- currentToken = nextToken;
- currentTokenValue = nextTokenValue;
- currentLineNumber = nextLineNumber;
- nextTokenBuffer.fillPtr = nextTokenBuffer.buffer;
- nextTokenValue = (NodePtr) -1;
- scan();
- }
-
- static int ScanIllegal()
- {
- IllegalCharacter(nextChar);
- getNextChar();
- }
-
- static int ScanLetter()
- {
- register Ident id;
- register int cc;
- do {
- collect(&nextTokenBuffer, nextChar);
- getNextChar();
- cc = charClass[nextChar];
- } while (cc == CLETTER || cc == CDIGIT);
- collect(&nextTokenBuffer, '\0');
- id = Ident_Lookup(nextTokenBuffer.buffer,
- nextTokenBuffer.fillPtr - nextTokenBuffer.buffer - 1);
- if (id <= lastKeywordIdent) {
- nextToken = firstKeyword + id;
- } else {
- nextToken = TIDENTIFIER;
- nextTokenValue = NewNode(T_IDENT);
- nextTokenValue->b.ident.ident = id;
- }
- }
-
- static int ScanOperator()
- {
- register Ident id;
- register int cc;
- do {
- collect(&nextTokenBuffer, nextChar);
- getNextChar();
- cc = charClass[nextChar];
- } while (cc == COPERATOR);
- collect(&nextTokenBuffer, '\0');
- id = Ident_Lookup(nextTokenBuffer.buffer,
- nextTokenBuffer.fillPtr - nextTokenBuffer.buffer - 1);
- if (id <= lastKeywordIdent) {
- nextToken = firstKeyword + id;
- } else {
- nextToken = TOPERATOR;
- nextTokenValue = NewNode(T_IDENT);
- nextTokenValue->b.ident.ident = id;
- }
- }
-
- static int ScanColon()
- {
- getNextChar();
- if (nextChar == '=') {
- getNextChar();
- nextToken = TSUGARASSIGN;
- } else {
- nextToken = TCOLON;
- }
- }
-
- static int ScanLparen()
- {
- getNextChar();
- nextToken = TLPAREN;
- }
- static int ScanRparen()
- {
- getNextChar();
- nextToken = TRPAREN;
- }
- static int ScanDigit()
- {
- do {
- collect(&nextTokenBuffer, nextChar);
- getNextChar();
- } while (charClass[nextChar] == CDIGIT);
- if (nextChar == '.') {
- do {
- collect(&nextTokenBuffer, nextChar);
- getNextChar();
- } while (charClass[nextChar] == CDIGIT);
- nextToken = TREALLITERAL;
- } else {
- nextToken = TINTEGERLITERAL;
- }
- collect(&nextTokenBuffer, '\0');
- nextTokenValue = NewNode(T_STRING);
- nextTokenValue->b.string.string = strdup(nextTokenBuffer.buffer);
- }
- static int ScanDot()
- {
- getNextChar();
- nextToken = TDOT;
- }
- static int ScanComma()
- {
- getNextChar();
- nextToken = TCOMMA;
- }
- static int ScanComment()
- {
- do {
- getNextChar();
- } while (charClass[nextChar] != CNL);
- }
- static int ScanLsquare()
- {
- getNextChar();
- nextToken = TLSQUARE;
- }
- static int ScanRsquare()
- {
- getNextChar();
- nextToken = TRSQUARE;
- }
- static int ScanLcurly()
- {
- getNextChar();
- nextToken = TLCURLY;
- }
- static int ScanRcurly()
- {
- getNextChar();
- nextToken = TRCURLY;
- }
- static int ScanDollar()
- {
- getNextChar();
- nextToken = TDOLLAR;
- }
- static int DoAChar()
- {
- register char c = nextChar;
- register int num = 0;
- if (c == '\\') {
- getNextChar();
- if (nextChar == '^') {
- getNextChar();
- num = nextChar & 0x1f;
- getNextChar();
- } else if ('0' <= nextChar && nextChar <= '7') {
- /* a C octal escape */
- num = nextChar - '0';
- getNextChar();
- if ('0' <= nextChar && nextChar <= '7') {
- num *= 8;
- num += nextChar - '0';
- getNextChar();
- if ('0' <= nextChar && nextChar <= '7') {
- num *= 8;
- num += nextChar - '0';
- getNextChar();
- }
- }
- } else {
- switch (nextChar) {
- case 'n':
- num = '\n';
- break;
- case 'b':
- num = '\b';
- break;
- case 't':
- num = '\t';
- break;
- case 'r':
- num = '\r';
- break;
- case 'f':
- num = '\f';
- break;
- default:
- num = nextChar;
- break;
- }
- getNextChar();
- }
- } else {
- num = nextChar;
- getNextChar();
- }
- return(num);
- }
- static int ScanStringquote()
- {
- getNextChar();
- while (1) {
- if (nextChar == -1) {
- UnexpectedEndOfFile();
- break;
- } else if (nextChar == '"') {
- getNextChar();
- break;
- }
- collect(&nextTokenBuffer, DoAChar());
- }
- collect(&nextTokenBuffer, '\0');
- nextToken = TSTRINGLITERAL;
- nextTokenValue = NewNode(T_STRING);
- nextTokenValue->b.string.string = strdup(nextTokenBuffer.buffer);
- }
- static int ScanCharquote()
- {
- getNextChar();
- if (nextChar == -1) {
- UnexpectedEndOfFile();
- }
- collect(&nextTokenBuffer, DoAChar());
- if (nextChar != '\'') {
- BeginSyntaxErrorMessage(1);
- ErrorWrite("expected \"'\"");
- EndErrorMessage();
- }
- getNextChar();
- collect(&nextTokenBuffer, '\0');
- nextToken = TCHARACTERLITERAL;
- nextTokenValue = NewNode(T_STRING);
- nextTokenValue->b.string.string = strdup(nextTokenBuffer.buffer);
- }
-
- static int ScanEof()
- {
- nextToken = TEOF;
- }
- static int ScanWhite()
- {
- do {
- getNextChar();
- } while (charClass[nextChar] == CWHITE);
- }
-
- static int ScanNL()
- {
- getNextChar();
- }
-
- /* This one scans until it finds the next token, leaving it in nextToken* */
-
- scan()
- {
- nextToken = TNO;
- do {
- currentPosition = positionInLine;
- (*charRoutines[charClass[nextChar]])();
- } while (nextToken == TNO);
- if (nextTokenValue == (NodePtr) -1) nextTokenValue = (NodePtr) nextToken;
- }
-
- void Scanner_Initialize()
- {
- junk[0] = CEOF;
- nextTokenBuffer.buffer = (char *) malloc(80);
- nextTokenBuffer.bufferEnd = nextTokenBuffer.buffer + 80;
- nextTokenBuffer.fillPtr = nextTokenBuffer.buffer;
- lineBuffer.buffer = (char *) malloc(100);
- lineBuffer.bufferEnd = lineBuffer.buffer + 100;
- lineBuffer.fillPtr = lineBuffer.buffer;
- Ident_Initialize();
- Keyword_Initialize();
- }
-
- int yylex()
- {
- Scanner_Accept();
- yylval = nextTokenValue;
- return(nextToken);
- }
-