home *** CD-ROM | disk | FTP | other *** search
Text File | 1991-08-20 | 24.2 KB | 1,070 lines | [TEXT/MPS ] |
- /*
- ** CScanner.cp
- */
-
- #ifndef __CSCANNER__
- #include "CScanner.h"
- #endif
-
- #ifndef __FORMATTING__
- #include "Formatting.h"
- #endif
-
- #ifndef __CTYPE__
- #include <ctype.h>
- #endif
-
- #ifndef __STDIO__
- #include <stdio.h>
- #endif
-
- #ifndef __STRING__
- #include <string.h>
- #endif
-
-
-
-
- /*
- ** TextPtr encapsulates the pointer into the text buffer
- */
- typedef const unsigned char* TextPtr;
-
-
-
- /*
- ** class SyntacticLex
- ** This class overrides SaveCopy by returning itself. All Syntactic objects
- ** returned by the CScanner are constant and are never modified by any
- ** inherited methods. If this fails to be true, override this method in the
- ** derived class
- */
-
- #pragma segment CScanner
- class SyntacticLex : public Syntactic {
- public:
- SyntacticLex(int aType, int aMinorType = 0)
- : Syntactic(aType, aMinorType)
- {
- }
-
-
- virtual Boolean IsSeparator() const;
- /*
- ** Return true.
- */
-
- virtual const Syntactic *SaveCopy() const;
- /*
- ** The object cannot be modified by any of its methods, so
- ** return itself.
- */
-
- virtual Boolean Display(Formatting *aFormat) = 0;
- /*
- ** Format and display this syntactic item. The format to use when
- ** displaying is passed in as its only argument.
- */
- };
-
-
- Boolean SyntacticLex::IsSeparator() const
- {
- return (false);
- }
-
-
- const Syntactic *SyntacticLex::SaveCopy() const
- {
- return (this);
- }
-
-
-
-
- /*
- ** class LexicalToken
- ** This class represents tokens whose body is in the CScanner buffer itself
- */
-
- #pragma segment CScanner
- class LexicalToken : public SyntacticLex {
- public:
- LexicalToken(short aType, TextPtr start)
- : SyntacticLex(aType),
- fStart(start),
- fEnd(start + strlen((char *)start))
- {
- }
-
- LexicalToken(short aType, short aMinorType, TextPtr start)
- : SyntacticLex(aType, aMinorType),
- fStart(start),
- fEnd(start + strlen((char *)start))
- {
- }
-
- LexicalToken(short aType, TextPtr start, TextPtr end)
- : SyntacticLex(aType),
- fStart(start),
- fEnd(end)
- {
- }
-
- LexicalToken(short aType, short aMinorType, TextPtr start, TextPtr end)
- : SyntacticLex(aType, aMinorType),
- fStart(start),
- fEnd(end)
- {
- }
-
- virtual Boolean Display(Formatting *aFormat);
- /*
- ** Display the token using the formatting information given by
- ** aFormat
- */
-
- protected:
- TextPtr fStart;
- TextPtr fEnd;
- };
-
-
- Boolean LexicalToken::Display(Formatting *aFormat)
- {
- aFormat->Write((char *)fStart, fEnd - fStart);
- return (fEnd != fStart);
- }
-
-
-
- /*
- ** class PredefinedToken
- ** Predefined tokens which never change their form
- */
-
- #pragma segment CScanner
- class PredefinedToken : public LexicalToken {
- public:
- PredefinedToken(short aType, const char *aString)
- : LexicalToken(aType, (TextPtr)aString)
- {
- }
-
- PredefinedToken(short aType, const char *aString, short aMinorType)
- : LexicalToken(aType, aMinorType, (TextPtr)aString)
- {
- }
-
- PredefinedToken(const char *aString, short aType)
- : LexicalToken(aType, (TextPtr)aString)
- {
- }
-
- PredefinedToken(const char *aString, short aType, short aMinorType)
- : LexicalToken(aType, aMinorType, (TextPtr)aString)
- {
- }
-
- const char *String() const
- {
- return ((const char *)fStart);
- }
- // Return the string in the token itself
-
- };
-
-
-
-
- /*
- ** class ReservedWord:
- ** Reserved words are a subclass of PredefinedTokens.
- */
-
- #pragma segment CScanner
- class ReservedWord : public PredefinedToken {
- public:
- ReservedWord(short aType, const char *aString)
- : PredefinedToken(aType, aString)
- {
- }
-
- ReservedWord(short aType, const char *aString, short aMinorType)
- : PredefinedToken(aType, aString, aMinorType)
- {
- }
-
- ReservedWord(const char *aString, short aType)
- : PredefinedToken(aType, aString)
- {
- }
-
- ReservedWord(const char *aString, short aType, short aMinorType)
- : PredefinedToken(aType, aString, aMinorType)
- {
- }
- };
-
-
-
-
- /*
- ** class OperatorToken:
- ** Operators are a subclass of PredefinedTokens.
- */
-
- #pragma segment CScanner
- class OperatorToken : public PredefinedToken {
- public:
- OperatorToken(short aType, const char *aString)
- : PredefinedToken(aType, aString)
- {
- }
-
- OperatorToken(short aType, const char *aString, short aMinorType)
- : PredefinedToken(aType, aString, aMinorType)
- {
- }
-
- OperatorToken(const char *aString, short aType)
- : PredefinedToken(aType, aString)
- {
- }
-
- OperatorToken(const char *aString, short aType, short aMinorType)
- : PredefinedToken(aType, aString, aMinorType)
- {
- }
- };
-
-
-
-
- /*
- ** class WhiteSpaceToken:
- ** This class encapsulates those items which are white space (non-separators)
- ** but which the formatter is concerned about. These include comments, newlines
- ** preprocessor lines, etc.
- */
-
- #pragma segment CScanner
- class WhiteSpaceToken : public SyntacticLex {
- public:
- WhiteSpaceToken(int aType)
- : SyntacticLex(aType)
- {
- }
-
-
- virtual Boolean IsSeparator() const;
- /*
- ** Return false.
- */
-
- virtual Boolean Display(Formatting *aFormat) = 0;
- /*
- ** Format and display this syntactic item. The format to use when
- ** displaying is passed in as its only argument.
- */
- };
-
-
- Boolean WhiteSpaceToken::IsSeparator() const
- {
- return (true);
- }
-
-
-
- /*
- ** class NewLineToken
- ** This class encapsulates source new lines. Source new lines may or may
- ** not be passed through to the output depending on what the user has said
- ** about new line preservation
- */
-
- #pragma segment CScanner
- class NewLineToken : public WhiteSpaceToken {
- public:
- NewLineToken(short aType)
- : WhiteSpaceToken(aType)
- {
- }
-
- virtual Boolean Display(Formatting *aFormat);
- };
-
-
- Boolean NewLineToken::Display(Formatting *aFormat)
- {
- if (Type() == kSLex_Null)
- aFormat->Putc('\\');
- aFormat->NewLine();
-
- return (true);
- }
-
-
-
- /*
- ** class CommentToken
- ** This class contains comments. Comments are separators and are formatted
- ** by the Formatting class. For implementation purposes, "# preprocessor"
- ** lines are also considered comments.
- */
-
- #pragma segment CScanner
- class CommentToken : public WhiteSpaceToken {
- public:
- CommentToken(short aType, TextPtr start, TextPtr end)
- : WhiteSpaceToken(aType),
- fStart(start),
- fEnd(end)
- {
- // Check if this is a formatting type of comment. If it is, set the
- // minor type to the requisite type of change. A "formatting off"
- // comment is of the form "//ƒ-" or "/*ƒ-", while a "formatting on"
- // comment is of the form "//ƒ+" or "/*ƒ+"
- if (start[2] == (unsigned char)'ƒ')
- if (start[3] == '+')
- MinorSexChange(kSLex_CommentFormatOn);
- else if (start[3] == '-')
- MinorSexChange(kSLex_CommentFormatOff);
- }
-
- virtual Boolean Display(Formatting *aFormat);
- /*
- ** Make the Formatting* do the work
- */
-
- private:
- TextPtr fStart;
- TextPtr fEnd;
- };
-
-
- Boolean CommentToken::Display(Formatting *aFormat)
- {
- aFormat->ExecuteGlue((FormatString)"s#");
- switch (Type()) {
- case kSLex_Comment:
- aFormat->Comment((const char *)fStart, (const char *)fEnd);
- break;
-
- case kSLex_PoundLine:
- /*
- ** Assure that the preprocessor line starts on a fresh line
- */
- aFormat->ExecuteGlue((FormatString)"&n");
- // !!! FALL THROUGH !!!
-
- default:
- aFormat->Write((char *)fStart, fEnd - fStart);
- break;
- }
-
- return (fStart != fEnd);
- }
-
-
-
-
- /*ƒ-
- ** Predefined token types
- */
- static unsigned char gErrText[] = "——— Error ———";
- static LexicalToken gErr (kSErr, gErrText, gErrText+13);
- static NewLineToken gNewLine (kSLex_NewLine);
- static NewLineToken gEOF (kSLex_EOF);
- static NewLineToken gContinuation(kSLex_Null);
-
- static PredefinedToken gClassColon (kSLex_ClassColon, "::");
- static PredefinedToken gColon (kSLex_Colon, ":");
- static PredefinedToken gSemiColon (kSLex_SemiColon, ";");
- static PredefinedToken gLParen (kSLex_LParen, "(");
- static PredefinedToken gRParen (kSLex_RParen, ")");
- static PredefinedToken gLBrace (kSLex_LBrace, "[");
- static PredefinedToken gRBrace (kSLex_RBrace, "]");
- static PredefinedToken gLCurly (kSLex_LCurly, "{");
- static PredefinedToken gRCurly (kSLex_RCurly, "}");
- static PredefinedToken gComma (kSLex_Comma, ",");
- static PredefinedToken gEllipsis (kSLex_Ellipsis, "...");
-
- /*
- ** Define the operators
- */
- static OperatorToken gClassStar (kSLex_Op, "::*", kSLex_OpClassStar);
- static OperatorToken gQuestion (kSLex_Op, "?", kSLex_OpQuestion);
- static OperatorToken gPeriod (kSLex_Op, ".", kSLex_OpDot);
- static OperatorToken gPeriodStar (kSLex_Op, ".*", kSLex_OpDotStar);
- static OperatorToken gAdd (kSLex_Op, "+", kSLex_OpAdd);
- static OperatorToken gSub (kSLex_Op, "-", kSLex_OpSub);
- static OperatorToken gMul (kSLex_Op, "*", kSLex_OpMul);
- static OperatorToken gDiv (kSLex_Op, "/", kSLex_OpDiv);
- static OperatorToken gMod (kSLex_Op, "%", kSLex_OpMod);
- static OperatorToken gXor (kSLex_Op, "^", kSLex_OpBXor);
- static OperatorToken gLNot (kSLex_Op, "!", kSLex_OpLNot);
- static OperatorToken gLAnd (kSLex_Op, "&&", kSLex_OpLAnd);
- static OperatorToken gLOr (kSLex_Op, "||", kSLex_OpLOr);
- static OperatorToken gBNot (kSLex_Op, "~", kSLex_OpBNot);
- static OperatorToken gBAnd (kSLex_Op, "&", kSLex_OpBAnd);
- static OperatorToken gBOr (kSLex_Op, "|", kSLex_OpBOr);
- static OperatorToken gLSH (kSLex_Op, "<<", kSLex_OpLSh);
- static OperatorToken gRSH (kSLex_Op, ">>", kSLex_OpRSh);
- static OperatorToken gAssign (kSLex_Op, "=", kSLex_OpAssign);
- static OperatorToken gLT (kSLex_Op, "<", kSLex_OpLT);
- static OperatorToken gLE (kSLex_Op, "<=", kSLex_OpLE);
- static OperatorToken gEQ (kSLex_Op, "==", kSLex_OpEQ);
- static OperatorToken gNE (kSLex_Op, "!=", kSLex_OpNE);
- static OperatorToken gGE (kSLex_Op, ">=", kSLex_OpGE);
- static OperatorToken gGT (kSLex_Op, ">", kSLex_OpGT);
- static OperatorToken gAddAssign (kSLex_Op, "+=", kSLex_OpAssignAdd);
- static OperatorToken gSubAssign (kSLex_Op, "-=", kSLex_OpAssignSub);
- static OperatorToken gMulAssign (kSLex_Op, "*=", kSLex_OpAssignMul);
- static OperatorToken gDivAssign (kSLex_Op, "/=", kSLex_OpAssignDiv);
- static OperatorToken gModAssign (kSLex_Op, "%=", kSLex_OpAssignMod);
- static OperatorToken gXorAssign (kSLex_Op, "^=", kSLex_OpAssignBXor);
- static OperatorToken gBAndAssign (kSLex_Op, "&=", kSLex_OpAssignBAnd);
- static OperatorToken gBOrAssign (kSLex_Op, "|=", kSLex_OpAssignBOr);
- static OperatorToken gLSHAssign (kSLex_Op, "<<=", kSLex_OpAssignLSh);
- static OperatorToken gRSHAssign (kSLex_Op, ">>=", kSLex_OpAssignRSh);
- static OperatorToken gDecr (kSLex_Op, "--", kSLex_OpMinusMinus);
- static OperatorToken gIncr (kSLex_Op, "++", kSLex_OpPlusPlus);
- static OperatorToken gPointer (kSLex_Op, "->", kSLex_OpPointer);
- static OperatorToken gPointerStar (kSLex_Op, "->*", kSLex_OpPointerStar);
-
-
-
- /*ƒ-
- ** Reserved words
- */
- static ReservedWord gAuto ("auto", kSLex_Decl);
- static ReservedWord gBreak ("break", kSLex_Break, kSLex_BreakBreak);
- static ReservedWord gCase ("case", kSLex_Case);
- static ReservedWord gChar ("char", kSLex_Decl);
- static ReservedWord gClass ("class", kSLex_Struct, kSLex_StructClass);
- static ReservedWord gConst ("const", kSLex_Decl, kSLex_DeclConst);
- static ReservedWord gContinue ("continue", kSLex_Break, kSLex_BreakContinue);
- static ReservedWord gDefault ("default", kSLex_Default);
- static ReservedWord gDo ("do", kSLex_Do);
- static ReservedWord gDouble ("double", kSLex_Decl);
- static ReservedWord gElse ("else", kSLex_Else);
- static ReservedWord gEnum ("enum", kSLex_Struct, kSLex_StructEnum);
- static ReservedWord gExtended ("extended", kSLex_Decl);
- static ReservedWord gExtern ("extern", kSLex_Decl);
- static ReservedWord gFor ("for", kSLex_For);
- static ReservedWord gFloat ("float", kSLex_Decl);
- static ReservedWord gFriend ("friend", kSLex_Decl);
- static ReservedWord gGoto ("goto", kSLex_Break, kSLex_BreakGoto);
- static ReservedWord gIf ("if", kSLex_If);
- static ReservedWord gInline ("inline", kSLex_Decl);
- static ReservedWord gInt ("int", kSLex_Decl);
- static ReservedWord gLong ("long", kSLex_Decl);
- static ReservedWord gOperator ("operator", kSLex_DeclOperator, kSLex_DeclOperator);
- static ReservedWord gPascal ("pascal", kSLex_Decl);
- static ReservedWord gPrivate ("private", kSLex_Public, kSLex_PublicPrivate);
- static ReservedWord gProtected ("protected", kSLex_Public, kSLex_PublicProtected);
- static ReservedWord gPublic ("public", kSLex_Public, kSLex_PublicPublic);
- static ReservedWord gRegister ("register", kSLex_Decl);
- static ReservedWord gReturn ("return", kSLex_Break, kSLex_BreakReturn);
- static ReservedWord gShort ("short", kSLex_Decl);
- static ReservedWord gSigned ("signed", kSLex_Decl);
- static ReservedWord gStatic ("static", kSLex_Decl);
- static ReservedWord gStruct ("struct", kSLex_Struct, kSLex_StructStruct);
- static ReservedWord gSwitch ("switch", kSLex_Switch);
- static ReservedWord gTemplate ("template", kSLex_Struct, kSLex_StructTemplate);
- static ReservedWord gTypedef ("typedef", kSLex_Decl);
- static ReservedWord gUnion ("union", kSLex_Struct, kSLex_StructUnion);
- static ReservedWord gUnsigned ("unsigned", kSLex_Decl);
- static ReservedWord gVa_dcl ("va_dcl", kSLex_Decl);
- static ReservedWord gVirtual ("virtual", kSLex_Decl);
- static ReservedWord gVoid ("void", kSLex_Decl);
- static ReservedWord gVolatile ("volatile", kSLex_Decl, kSLex_DeclVolatile);
- static ReservedWord gWhile ("while", kSLex_While);
- //ƒ+
-
-
- #pragma segment CScanner
- static Syntactic *lookup(TextPtr start, TextPtr end)
- {
- int len = end - start;
-
- // These are the bounds on the lengths of the words in the reservedWordList
- const int kMinWordLength = 2;
- const int kMaxWordLength = 9;
-
- //ƒ- Note that this list is in alphabetical order
- static ReservedWord* reservedWordList[] = {
- &gAuto
- , &gBreak
- , &gCase
- , &gChar
- , &gClass
- , &gConst
- , &gContinue
- , &gDefault
- , &gDo
- , &gDouble
- , &gElse
- , &gEnum
- , &gExtended
- , &gExtern
- , &gFloat
- , &gFor
- , &gFriend
- , &gGoto
- , &gIf
- , &gInline
- , &gInt
- , &gLong
- , &gOperator
- , &gPascal
- , &gPrivate
- , &gProtected
- , &gPublic
- , &gRegister
- , &gReturn
- , &gShort
- , &gSigned
- , &gStatic
- , &gStruct
- , &gSwitch
- , &gTemplate
- , &gTypedef
- , &gUnion
- , &gUnsigned
- , &gVa_dcl
- , &gVirtual
- , &gVoid
- , &gVolatile
- , &gWhile
- };
-
- //ƒ+
- if (len <= kMaxWordLength && len >= kMinWordLength) {
- int min = 0;
- int max = sizeof(reservedWordList) / sizeof(reservedWordList[0]) - 1;
-
- while (min <= max) {
- int mid = (min + max) / 2;
- int cmp = strncmp((char *)start, reservedWordList[mid]->String(), len);
-
- // fprintf(stderr, "reservedWordList[%2d] = %s\n", mid, reservedWordList[mid]->String());
- if (cmp == 0)
- cmp = strlen(reservedWordList[mid]->String()) - len;
-
- if (cmp == 0)
- return (reservedWordList[mid]);
- else if (cmp < 0)
- max = mid - 1;
- else
- min = mid + 1;
- }
- }
-
- return (new LexicalToken(kSLex_Id, start, end));
- }
-
-
- /*
- ** w1
- ** Return a token corresponding to the single character passed it. The assign
- ** arg is true if this is the special case of op=
- */
- #pragma segment CScanner
- static Syntactic *w1(int aChar, Boolean assign = false)
- {
- //ƒ-
- switch (aChar) {
- case '+': return (assign ? &gAddAssign :&gAdd);
- case '-': return (assign ? &gSubAssign :&gSub);
- case '*': return (assign ? &gMulAssign :&gMul);
- case '/': return (assign ? &gDivAssign :&gDiv);
- case '%': return (assign ? &gModAssign :&gMod);
- case '^': return (assign ? &gXorAssign :&gXor);
- case '!': return (assign ? &gNE :&gLNot);
- case '&': return (assign ? &gBAndAssign :&gBAnd);
- case '|': return (assign ? &gBOrAssign :&gBOr);
- case '=': return (assign ? &gEQ :&gAssign);
- case '<': return (assign ? &gLE :&gLT);
- case '>': return (assign ? &gGE :&gGT);
- case '~': return (&gBNot);
- case ':': return (&gColon);
- case '.': return (&gPeriod);
- case '?': return (&gQuestion);
- case '\0': return (&gEOF);
- default: return (&gErr);
- }
- //ƒ+
- }
-
-
-
- // w2
- #pragma segment CScanner
- static Syntactic *w2(int aChar)
- {
- //ƒ-
- switch (aChar) {
- case '+': return (&gIncr);
- case '-': return (&gDecr);
- case '&': return (&gLAnd);
- case '|': return (&gLOr);
- case '<': return (&gLSH);
- case '>': return (&gRSH);
- case '=': return (&gEQ);
- case ':': return (&gClassColon);
- default: return (&gErr);
- }
- //ƒ+
- }
-
-
-
-
-
- #pragma segment CScanner
- inline Boolean isodigit(int ch)
- {
- return ((ch >= '0') && (ch <= '7'));
- }
-
-
-
- // CScanner::ICScanner
- #pragma segment CScanner
- short CScanner::ICScanner()
- {
- return (noErr);
- }
-
-
- // CScanner::NextToken
- #pragma segment CScanner
- Syntactic *CScanner::NextToken()
- {
- int ch;
- int firstCh;
- TextPtr start;
-
- // End of file check
- if (fBuffer >= fBufferEnd) {
- return (&gEOF);
- }
-
- // Skip over the white space (0xCA is the non-breaking space)
- while (fBuffer < fBufferEnd) {
- ch = NextChar();
- if (ch == '\n') {
- fLastTokenStart = fBuffer - 1;
- return (&gNewLine);
- }
- if (!isspace(ch))
- if (ch != (unsigned char)0xCA)
- break;
- }
-
- // End of file check
- if (fBuffer == fBufferEnd && isspace(ch)) {
- return (&gEOF);
- }
-
- // Remember the start of the token
- fLastTokenStart = start = fBuffer - 1;
-
- // Identifier scanning
- if (isalpha(ch) || (ch == '_')) {
- while (isalnum(ch) || (ch == '_') || (ch == '%') || (ch == '$'))
- ch = NextChar();
-
- if (!IsEOF())
- PushBack();
- return (lookup(start, fBuffer));
- }
-
- // Number scanning
- if (isdigit(ch)) {
- if (ch == '0') {
- ch = NextChar();
- if (ch == 'x' || ch == 'X') {
- ch = NextChar();
- while (isxdigit(ch))
- ch = NextChar();
-
- goto numDone;
- }
- }
-
- while (isdigit(ch))
- ch = NextChar();
-
- if (ch == '.') {
- ch = NextChar();
- if (ch == '.') {
- PushBack(); // '..' range symbol pushed back
- PushBack();
- goto numDone;
- }
-
- while (isdigit(ch))
- ch = NextChar();
-
- if (ch == 'e' || ch == 'E') {
- ch = NextChar();
- if (ch == '+' || ch == '-')
- ch = NextChar();
-
- if (!isdigit(ch))
- goto numDone;
-
- while (isdigit(ch))
- ch = NextChar();
- }
- }
-
- numDone:;
- // Allow the optional type marker.
- if (ch == 'l' || ch == 'L')
- ch = NextChar();
-
- if (!IsEOF())
- PushBack();
- return (new LexicalToken(kSLex_Value, start, fBuffer));
- }
-
- firstCh = ch;
- switch (firstCh) {
- //ƒ-
- case ';': return (&gSemiColon);
- case '(': return (&gLParen);
- case ')': return (&gRParen);
- case '{': return (&gLCurly);
- case '}': return (&gRCurly);
- case '[': return (&gLBrace);
- case ']': return (&gRBrace);
- case ',': return (&gComma);
- case '?': return (&gQuestion);
- //ƒ+
-
- case '"':
- case '\'':
- for (;;) {
- ch = NextChar();
- if (ch == '\\')
- NextChar();
- else if (IsEOF() || (ch == firstCh))
- break;
- }
-
- return (new LexicalToken(kSLex_Value, start, fBuffer));
- //break;
-
- case ':':
- ch = NextChar();
- if (ch == ':') {
- if (NextChar() == '*')
- return (&gClassStar);
- PushBack();
- return (&gClassColon);
- } else {
- PushBack();
- return (&gColon);
- }
- break;
-
- case '*':
- case '%':
- case '^':
- case '!':
- ch = NextChar();
- if (ch == '=')
- return (w1(firstCh, true));
- else {
- PushBack();
- return (w1(firstCh));
- }
- break;
-
- case '+':
- case '&':
- case '|':
- case '=':
- ch = NextChar();
- if (ch == '=')
- return (w1(firstCh, true));
- else if (ch == firstCh)
- return (w2(firstCh));
- else {
- PushBack();
- return (w1(firstCh));
- }
- break;
-
- case '-':
- ch = NextChar();
- if (ch == '=')
- return (&gSubAssign);
- else if (ch == '-')
- return (&gDecr);
- else if (ch == '>') {
- ch = NextChar();
- if (ch == '*')
- return (&gPointerStar);
- else {
- PushBack();
- return (&gPointer);
- }
- } else {
- PushBack();
- return (&gSub);
- }
- break;
-
- case '<':
- case '>':
- ch = NextChar();
- if (ch == '=')
- return (w1(firstCh, true));
- else if (ch == firstCh) {
- ch = NextChar();
- if (ch == '=')
- return ((firstCh == '<') ? &gLSHAssign : &gRSHAssign);
- else {
- PushBack();
- return (w2(firstCh));
- }
- } else {
- PushBack();
- return (w1(firstCh));
- }
- break;
-
- case '/':
- ch = NextChar();
- switch (ch) {
- case '/':
- // Copy the comment up to but not including the '\n'
- do
- ch = NextChar();
- while ((ch > 0) && (ch != '\n'));
- if (ch == '\n')
- PushBack();
- return (new CommentToken(kSLex_Comment, start, fBuffer));
- //break;
-
- case '*':
- while (ch > 0) {
- ch = NextChar();
- if (ch == '*') {
- while (ch == '*')
- ch = NextChar();
- if (ch == '/')
- break;
- }
- }
- return (new CommentToken(kSLex_Comment, start, fBuffer));
- //break;
-
- case '=':
- return (&gDivAssign);
- //break;
-
- default:
- PushBack();
- return (&gDiv);
- }
- break;
-
- case '.':
- ch = NextChar();
- if (ch == '*')
- return (&gPeriodStar);
- else if (ch == '.') {
- if (NextChar() == '.')
- return (&gEllipsis);
- PushBack();
- }
-
- PushBack();
- return (&gPeriod);
- // break;
-
- case '#':
- {
- int type = 0; // Assume other type
-
- enum {
- kStart // Look for "if", "elif", "else"
- , kText // Normal text
- , kComment // Within a "/*" comment
- , kCommentToEOL // Within a "//" comment
- , kString // Within a string
- , kChar // Within a character constant
- } state = kStart;
-
- /*
- ** State transitions:
- ** kStart -> kText
- ** kText -> {kComment, kString, kChar}
- ** kComment -> kText
- ** kString -> kText
- ** kChar -> kText
- **
- ** Note that we don't allow comments immediately following the
- ** "#". If this is a problem, extend the state machine by adding
- ** kStartComment, which commutes with kStart. For now, we don't
- ** care.
- ** The only reason we have a state machine here at all is because
- ** there is some code extent which has a multi-line comment at the
- ** end of a #define without line continuation characters at the end.
- */
- for (;;) {
- // Get the next character. Strings, character constants and
- // line continuations use "\" to strop the character following.
- ch = NextChar();
- if (ch == '\\') {
- NextChar();
- continue;
- }
-
- // Blanks don't matter, but newlines do.
- if (isspace(ch) && ch != '\n')
- continue;
-
- // EOF is fatal in all states
- if (IsEOF()) {
- Syntactic * aToken = new CommentToken(kSLex_PoundLine, start, fBuffer);
- aToken->MinorSexChange(type);
- return (aToken);
- }
-
- switch (state) {
- case kStart:
- // Determine what type of "#" this is. Following this is text.
- state = kText;
-
- if (strncmp((char *)fBuffer - 1, "if", 2) == 0)
- type = kSLex_PoundIf;
- else if (strncmp((char *)fBuffer - 1, "elif", 4) == 0)
- type = kSLex_PoundElif;
- else if (strncmp((char *)fBuffer - 1, "else", 4) == 0)
- type = kSLex_PoundElse;
- else if (strncmp((char *)fBuffer - 1, "endif", 5) == 0)
- type = kSLex_PoundEndIf;
- else
- // Do this character as text.
- goto doText;
- break;
-
- case kText:
- // Switch to one of the subordinate states.
- doText: switch (ch) {
- case '"':
- state = kString;
- break;
- case '\'':
- state = kChar;
- break;
-
- case '/':
- switch (NextChar()) {
- case '*':
- state = kComment;
- break;
- case '/':
- state = kCommentToEOL;
- break;
- default:
- PushBack();
- break;
- }
- break;
-
- case '\\':
- // End of line continuation character? Ignore the character
- // following as if it is a newline the continuation hides
- // it and if it isn't a newline it would be incorporated
- // as part of the token.
- NextChar();
- break;
-
- case '\n':
- {
- PushBack();
- Syntactic * aToken = new CommentToken(kSLex_PoundLine, start, fBuffer);
- aToken->MinorSexChange(type);
- return (aToken);
- }
- }
- break;
-
- case kComment:
- // Check if this is the end of the comment
- if (ch == '*')
- if (NextChar() == '/')
- state = kText;
- else
- PushBack();
- break;
-
- case kCommentToEOL:
- // Keep going to the end of the comment line. If there is
- // a continuation character, check if the character following
- // is a newline. If it is, the state becomes kText. If the
- // character is a newline, then change state to kText and let
- // doText do the work. Otherwise ignore the character
- if (ch == '\\') {
- if (NextChar() == '\n')
- state = kText;
- } else if (ch == '\n') {
- state = kText;
- goto doText;
- }
- break;
-
- case kString:
- if (ch == '"')
- state = kText;
- break;
- case kChar:
- if (ch == '\'')
- state = kText;
- break;
- }
- }
- }
- break;
-
- case '\\':
- ch = NextChar();
- if (ch == '\n')
- return (&gContinuation);
- else {
- PushBack();
- return (&gErr);
- }
- break;
-
- default:
- return (w1(firstCh));
- }
- }
-
-
- // CScanner::LineNumber
- #pragma segment CScanner
- size_t CScanner::LineNumber() const
- {
- size_t lineNo = 1;
- const unsigned char *aPtr = fBufferStart;
-
- while (aPtr <= fLastTokenStart)
- if (*aPtr++ == '\n')
- lineNo++;
-
- return (lineNo);
- }
-
-
-