home *** CD-ROM | disk | FTP | other *** search
/ AmigActive 20 / AACD20.BIN / AACD / Programming / Jikes / Source / src / scanner.cpp < prev    next >
Encoding:
C/C++ Source or Header  |  2001-02-24  |  51.4 KB  |  1,582 lines

  1. // $Id: scanner.cpp,v 1.17 2001/01/10 16:49:45 mdejong Exp $
  2. //
  3. // This software is subject to the terms of the IBM Jikes Compiler
  4. // License Agreement available at the following URL:
  5. // http://www.ibm.com/research/jikes.
  6. // Copyright (C) 1996, 1998, International Business Machines Corporation
  7. // and others.  All Rights Reserved.
  8. // You must accept the terms of that agreement to use this software.
  9. //
  10. #include "scanner.h"
  11. #include "control.h"
  12. #include "error.h"
  13.  
  14. #ifdef    HAVE_JIKES_NAMESPACE
  15. namespace Jikes {    // Open namespace Jikes block
  16. #endif
  17.  
  18. int (*Scanner::scan_keyword[13]) (wchar_t *p1) =
  19. {
  20.     ScanKeyword0,
  21.     ScanKeyword0,
  22.     ScanKeyword2,
  23.     ScanKeyword3,
  24.     ScanKeyword4,
  25.     ScanKeyword5,
  26.     ScanKeyword6,
  27.     ScanKeyword7,
  28.     ScanKeyword8,
  29.     ScanKeyword9,
  30.     ScanKeyword10,
  31.     ScanKeyword0,
  32.     ScanKeyword12
  33. };
  34.  
  35.  
  36. //
  37. // The constructor initializes all utility variables.
  38. //
  39. Scanner::Scanner(Control &control_) : control(control_)
  40. {
  41.     //
  42.     // If this assertion fails, the Token structure in stream.h must be redesigned !!!
  43.     //
  44.     assert(NUM_TERMINALS < 128);
  45.  
  46.     //
  47.     // -------------------------------------------------------------------------------
  48.     // We are pulling this code out because we are tired of defending it. We
  49.     // tought it was obvious that either $ should not have been used for compiler
  50.     // generated variables or that users should not be allowed to use in variable names...
  51.     // -------------------------------------------------------------------------------
  52.     //
  53.     // For version 1.1 or above a $ may not be used as part of an identifier name
  54.     // unless the user specifically requests that it be allowed.
  55.     //
  56.     //    if (! control.option.dollar)
  57.     //        Code::SetBadCode(U_DOLLAR);
  58.     //
  59.  
  60.     //
  61.     // CLASSIFY_TOKEN is a mapping from each character into a
  62.     // classification routine that is invoked when that character
  63.     // is the first character encountered in a token.
  64.     //
  65.     for (int c = 0; c < 128; c++)
  66.     {
  67.         if (Code::IsAlpha(c))
  68.              classify_token[c] = &Scanner::ClassifyId;
  69.         else if (Code::IsDigit(c))
  70.              classify_token[c] = &Scanner::ClassifyNumericLiteral;
  71.         else classify_token[c] = &Scanner::ClassifyBadToken;
  72.     }
  73.     classify_token[128] = &Scanner::ClassifyNonAsciiUnicode;
  74.  
  75.     classify_token[U_a] = &Scanner::ClassifyIdOrKeyword;
  76.     classify_token[U_b] = &Scanner::ClassifyIdOrKeyword;
  77.     classify_token[U_c] = &Scanner::ClassifyIdOrKeyword;
  78.     classify_token[U_d] = &Scanner::ClassifyIdOrKeyword;
  79.     classify_token[U_e] = &Scanner::ClassifyIdOrKeyword;
  80.     classify_token[U_f] = &Scanner::ClassifyIdOrKeyword;
  81.     classify_token[U_g] = &Scanner::ClassifyIdOrKeyword;
  82.     classify_token[U_i] = &Scanner::ClassifyIdOrKeyword;
  83.     classify_token[U_l] = &Scanner::ClassifyIdOrKeyword;
  84.     classify_token[U_n] = &Scanner::ClassifyIdOrKeyword;
  85.     classify_token[U_p] = &Scanner::ClassifyIdOrKeyword;
  86.     classify_token[U_r] = &Scanner::ClassifyIdOrKeyword;
  87.     classify_token[U_s] = &Scanner::ClassifyIdOrKeyword;
  88.     classify_token[U_t] = &Scanner::ClassifyIdOrKeyword;
  89.     classify_token[U_v] = &Scanner::ClassifyIdOrKeyword;
  90.     classify_token[U_w] = &Scanner::ClassifyIdOrKeyword;
  91.  
  92.     classify_token[U_SINGLE_QUOTE]       = &Scanner::ClassifyCharLiteral;
  93.     classify_token[U_DOUBLE_QUOTE]       = &Scanner::ClassifyStringLiteral;
  94.  
  95.     classify_token[U_PLUS]               = &Scanner::ClassifyPlus;
  96.     classify_token[U_MINUS]              = &Scanner::ClassifyMinus;
  97.     classify_token[U_EXCLAMATION]        = &Scanner::ClassifyNot;
  98.     classify_token[U_PERCENT]            = &Scanner::ClassifyMod;
  99.     classify_token[U_CARET]              = &Scanner::ClassifyXor;
  100.     classify_token[U_AMPERSAND]          = &Scanner::ClassifyAnd;
  101.     classify_token[U_STAR]               = &Scanner::ClassifyStar;
  102.     classify_token[U_BAR]                = &Scanner::ClassifyOr;
  103.     classify_token[U_TILDE]              = &Scanner::ClassifyComplement;
  104.     classify_token[U_SLASH]              = &Scanner::ClassifySlash;
  105.     classify_token[U_GREATER]            = &Scanner::ClassifyGreater;
  106.     classify_token[U_LESS]               = &Scanner::ClassifyLess;
  107.     classify_token[U_LEFT_PARENTHESIS]   = &Scanner::ClassifyLparen;
  108.     classify_token[U_RIGHT_PARENTHESIS]  = &Scanner::ClassifyRparen;
  109.     classify_token[U_LEFT_BRACE]         = &Scanner::ClassifyLbrace;
  110.     classify_token[U_RIGHT_BRACE]        = &Scanner::ClassifyRbrace;
  111.     classify_token[U_LEFT_BRACKET]       = &Scanner::ClassifyLbracket;
  112.     classify_token[U_RIGHT_BRACKET]      = &Scanner::ClassifyRbracket;
  113.     classify_token[U_SEMICOLON]          = &Scanner::ClassifySemicolon;
  114.     classify_token[U_QUESTION]           = &Scanner::ClassifyQuestion;
  115.     classify_token[U_COLON]              = &Scanner::ClassifyColon;
  116.     classify_token[U_COMMA]              = &Scanner::ClassifyComma;
  117.     classify_token[U_DOT]                = &Scanner::ClassifyPeriod;
  118.     classify_token[U_EQUAL]              = &Scanner::ClassifyEqual;
  119.  
  120.     return;
  121. }
  122.  
  123.  
  124. //
  125. // Associate a lexical stream with this file
  126. //
  127. void Scanner::Initialize(FileSymbol *file_symbol)
  128. {
  129.     lex = new LexStream(control, file_symbol);
  130.     lex -> Reset();
  131.  
  132.     current_token_index = lex -> GetNextToken(0); // Get 0th token !
  133.     current_token = &(lex -> token_stream[current_token_index]);
  134.     current_token -> SetKind(0);
  135.  
  136.     if (control.option.comments)
  137.     {
  138.         LexStream::Comment *current_comment = &(lex -> comment_stream.Next()); // add 0th comment !
  139.         current_comment -> string = NULL;
  140.         current_comment -> length = 0;
  141.         current_comment -> previous_token = -1; // No token precedes this comment
  142.         current_comment -> location = 0;
  143.     }
  144.  
  145.     lex -> line_location.Next() = 0; // mark starting location of line # 0
  146.  
  147.     return;
  148. }
  149.  
  150.  
  151. //
  152. // This is one of the main entry point for the Java lexical analyser.
  153. // Its input is the name of a regular text file. Its output is a stream
  154. // of tokens.
  155. //
  156. void Scanner::SetUp(FileSymbol *file_symbol)
  157. {
  158.     Initialize(file_symbol);
  159.     lex -> CompressSpace();
  160.     file_symbol -> lex_stream = lex;
  161.  
  162.     return;
  163. }
  164.  
  165.  
  166. //
  167. // This is one of the main entry point for the Java lexical analyser.
  168. // Its input is the name of a regular text file. Its output is a stream
  169. // of tokens.
  170. //
  171. void Scanner::Scan(FileSymbol *file_symbol)
  172. {
  173.     Initialize(file_symbol);
  174.  
  175.     lex -> ReadInput();
  176.  
  177.     cursor = lex -> InputBuffer();
  178.     if (cursor)
  179.     {
  180.         Scan();
  181.  
  182.         lex -> CompressSpace();
  183.  
  184.         //
  185.         //
  186.         //
  187.         if (control.option.dump_errors)
  188.         {
  189.             lex -> SortMessages();
  190.             for(int i = 0; i < lex -> bad_tokens.Length(); i++)
  191.                 JikesAPI::getInstance()->reportError(&(lex->bad_tokens[i]));
  192.         }
  193.         lex -> DestroyInput(); // get rid of input buffer
  194.     }
  195.     else
  196.     {
  197.         delete lex;
  198.         lex = NULL;
  199.     }
  200.  
  201.     file_symbol -> lex_stream = lex;
  202.  
  203.     return;
  204. }
  205.  
  206.  
  207. //
  208. // Scan the InputBuffer() and process all tokens and comments.
  209. //
  210. void Scanner::Scan()
  211. {
  212.     wchar_t *input_buffer_tail = &cursor[lex -> InputBufferLength()];
  213.  
  214.     //
  215.     // CURSOR is assumed to point to the next character to be scanned.
  216.     // Using CURSOR,we jump to the proper classification function
  217.     // which scans and classifies the token and returns the location of
  218.     // the character immediately following it.
  219.     //
  220.     do
  221.     {
  222.         SkipSpaces();
  223.  
  224.         //
  225.         // Allocate space for next token and set its location.
  226.         //
  227.         current_token_index = lex -> GetNextToken(cursor - lex -> InputBuffer());
  228.         current_token = &(lex -> token_stream[current_token_index]);
  229.  
  230.         (this ->* classify_token[*cursor < 128 ? *cursor : 128])();
  231.     } while (cursor < input_buffer_tail);
  232.  
  233.     //
  234.     // Add a a gate after the last line.
  235.     //
  236.     lex -> line_location.Next() = input_buffer_tail - lex -> InputBuffer();
  237.  
  238.     //
  239.     // If the brace_stack is not empty, then there are unmatched left
  240.     // braces in the input. Each unmatched left brace should point to
  241.     // the EOF token as a substitute for a matching right brace.
  242.     //
  243.     assert(current_token_index == lex -> token_stream.Length() - 1);
  244.  
  245.     for (LexStream::TokenIndex left_brace = brace_stack.Top(); left_brace; left_brace = brace_stack.Top())
  246.     {
  247.         lex -> token_stream[left_brace].SetRightBrace(current_token_index);
  248.         brace_stack.Pop();
  249.     }
  250.  
  251.     return;
  252. }
  253.  
  254.  
  255. //
  256. // CURSOR points to the starting position of a comment.  Scan the
  257. // the comment and return the location of the character immediately
  258. // following it. CURSOR is advanced accordingly.
  259. //
  260. void Scanner::ScanStarComment()
  261. {
  262.     LexStream::Comment *current_comment = (control.option.comments ? &(lex -> comment_stream.Next()) : new LexStream::Comment());
  263.     current_comment -> string = NULL;
  264.     current_comment -> previous_token = current_token_index; // the token that precedes this comment
  265.     current_comment -> location = cursor - lex -> InputBuffer();
  266.  
  267.     cursor += 2;
  268.  
  269.     //
  270.     // If this comment starts with the prefix "/**" then, it may be a document
  271.     // comment. Check whether or not it contains the deprecated tag and if so,
  272.     // mark the token preceeding it.
  273.     //
  274.     if (*cursor == U_STAR)
  275.     {
  276.         for (;;)
  277.         {
  278.             while (*cursor != U_STAR && (! Code::IsNewline(*cursor)) && *cursor != U_CTL_Z)
  279.             {
  280.                 if (cursor[0] == U_AT &&
  281.                     cursor[1] == U_d &&
  282.                     cursor[2] == U_e &&
  283.                     cursor[3] == U_p &&
  284.                     cursor[4] == U_r &&
  285.                     cursor[5] == U_e &&
  286.                     cursor[6] == U_c &&
  287.                     cursor[7] == U_a &&
  288.                     cursor[8] == U_t &&
  289.                     cursor[9] == U_e &&
  290.                     cursor[10] == U_d)
  291.                 {
  292.                     current_token -> SetDeprecated(); // the token that precedes this comment
  293.                 }
  294.                 cursor++;
  295.             }
  296.  
  297.             if (*cursor == U_STAR) // Potential comment closer
  298.             {
  299.                 while (*++cursor == U_STAR)
  300.                     ;
  301.                 if (*cursor == U_SLASH)
  302.                 {
  303.                     cursor++;
  304.                     current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;
  305.                     if (! control.option.comments)
  306.                         delete current_comment;
  307.                     return;
  308.                 }
  309.             }
  310.             else if (Code::IsNewline(*cursor)) // Record new line
  311.             {
  312.                 cursor++;
  313.                 lex -> line_location.Next() = cursor - lex -> InputBuffer();
  314.             }
  315.             else break;
  316.         }
  317.     }
  318.     else
  319.     {
  320.         for (;;)
  321.         {
  322.             while (*cursor != U_STAR && (! Code::IsNewline(*cursor)) && *cursor != U_CTL_Z)
  323.                 cursor++;
  324.  
  325.             if (*cursor == U_STAR) // Potential comment closer
  326.             {
  327.                 while (*++cursor == U_STAR)
  328.                     ;
  329.                 if (*cursor == U_SLASH)
  330.                 {
  331.                     cursor++;
  332.                     current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;
  333.                     if (! control.option.comments)
  334.                         delete current_comment;
  335.                     return;
  336.                 }
  337.             }
  338.             else if (Code::IsNewline(*cursor)) // Record new line
  339.             {
  340.                 cursor++;
  341.                 lex -> line_location.Next() = cursor - lex -> InputBuffer();
  342.             }
  343.             else break;
  344.         }
  345.     }
  346.  
  347.     lex -> bad_tokens.Next().Initialize(StreamError::UNTERMINATED_COMMENT,
  348.                                         current_comment -> location,
  349.                                         (unsigned) (cursor - lex -> InputBuffer()) - 1, lex);
  350.  
  351.     current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;
  352.  
  353.     if (! control.option.comments)
  354.         delete current_comment;
  355.  
  356.     return;
  357. }
  358.  
  359.  
  360. //
  361. //
  362. //
  363. void Scanner::ScanSlashComment()
  364. {
  365.     if (control.option.comments)
  366.     {
  367.         LexStream::Comment *current_comment = &(lex -> comment_stream.Next());
  368.         current_comment -> string = NULL;
  369.         current_comment -> previous_token = current_token_index;  // the token that precedes this comment
  370.         current_comment -> location = cursor - lex -> InputBuffer();
  371.         for (cursor += 2; ! Code::IsNewline(*cursor); cursor++)  // skip all until \n
  372.             ;
  373.         current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;
  374.     }
  375.     else
  376.     {
  377.         for (cursor += 2; ! Code::IsNewline(*cursor); cursor++)  // skip all until \n
  378.             ;
  379.     }
  380.  
  381.     return;
  382. }
  383.  
  384.  
  385. //
  386. // This procedure is invoked to skip useless spaces in the input.
  387. // It assumes upon entry that CURSOR points to the next character to
  388. // be scanned.  Before returning it sets CURSOR to the location of the
  389. // first non-space character following its initial position.
  390. //
  391. inline void Scanner::SkipSpaces()
  392. {
  393.     do
  394.     {
  395.         while (Code::IsSpaceButNotNewline(*cursor))
  396.             cursor++;
  397.         while (Code::IsNewline(*cursor))        // starting a new line?
  398.         {
  399.             cursor++;
  400.             lex -> line_location.Next() = cursor - lex -> InputBuffer();
  401.             while (Code::IsSpaceButNotNewline(*cursor))
  402.                 cursor++;
  403.         }
  404.  
  405.         while (*cursor == U_SLASH)
  406.         {
  407.             if (cursor[1] == U_STAR)
  408.                  ScanStarComment();
  409.             else if (cursor[1] == U_SLASH)
  410.                  ScanSlashComment();
  411.             else break;
  412.         }
  413.     } while (Code::IsSpace(*cursor));
  414.  
  415.     return;
  416. }
  417.  
  418.  
  419. /**********************************************************************/
  420. /**********************************************************************/
  421. /**                                                                  **/
  422. /**                           scan_keyword(i):                       **/
  423. /**                                                                  **/
  424. /**********************************************************************/
  425. /**********************************************************************/
  426. /**                                                                  **/
  427. /** Scan an identifier of length I and determine if it is a keyword. **/
  428. /**                                                                  **/
  429. /**********************************************************************/
  430. /**********************************************************************/
  431. int Scanner::ScanKeyword0(wchar_t *p1)
  432. {
  433.     return TK_Identifier;
  434. }
  435.  
  436. int Scanner::ScanKeyword2(wchar_t *p1)
  437. {
  438.     if (p1[0] == U_d && p1[1] == U_o)
  439.         return TK_do;
  440.     else if (p1[0] == U_i && p1[1] == U_f)
  441.         return TK_if;
  442.  
  443.     return TK_Identifier;
  444. }
  445.  
  446. int Scanner::ScanKeyword3(wchar_t *p1)
  447. {
  448.     switch(*p1)
  449.     {
  450.         case U_f:
  451.             if (p1[1] == U_o && p1[2] == U_r)
  452.                 return TK_for;
  453.             break;
  454.         case U_i:
  455.             if (p1[1] == U_n && p1[2] == U_t)
  456.                 return TK_int;
  457.             break;
  458.         case U_n:
  459.             if (p1[1] == U_e && p1[2] == U_w)
  460.                 return TK_new;
  461.             break;
  462.         case U_t:
  463.             if (p1[1] == U_r && p1[2] == U_y)
  464.                 return TK_try;
  465.             break;
  466.     }
  467.  
  468.     return TK_Identifier;
  469. }
  470.  
  471. int Scanner::ScanKeyword4(wchar_t *p1)
  472. {
  473.     switch (*p1)
  474.     {
  475.         case U_b:
  476.             if (p1[1] == U_y && p1[2] == U_t && p1[3] == U_e)
  477.                 return TK_byte;
  478.             break;
  479.         case U_c:
  480.             if (p1[1] == U_a && p1[2] == U_s && p1[3] == U_e)
  481.                 return TK_case;
  482.             else if (p1[1] == U_h && p1[2] == U_a && p1[3] == U_r)
  483.                 return TK_char;
  484.             break;
  485.         case U_e:
  486.             if (p1[1] == U_l && p1[2] == U_s && p1[3] == U_e)
  487.                 return TK_else;
  488.             break;
  489.         case U_g:
  490.             if (p1[1] == U_o && p1[2] == U_t && p1[3] == U_o)
  491.                 return TK_goto;
  492.             break;
  493.         case U_l:
  494.             if (p1[1] == U_o && p1[2] == U_n && p1[3] == U_g)
  495.                 return TK_long;
  496.             break;
  497.         case U_n:
  498.             if (p1[1] == U_u && p1[2] == U_l && p1[3] == U_l)
  499.                 return TK_null;
  500.             break;
  501.         case U_t:
  502.             if (p1[1] == U_h && p1[2] == U_i && p1[3] == U_s)
  503.                 return TK_this;
  504.             else if (p1[1] == U_r && p1[2] == U_u && p1[3] == U_e)
  505.                 return TK_true;
  506.             break;
  507.         case U_v:
  508.             if (p1[1] == U_o && p1[2] == U_i && p1[3] == U_d)
  509.                 return TK_void;
  510.             break;
  511.     }
  512.  
  513.     return TK_Identifier;
  514. }
  515.  
  516. int Scanner::ScanKeyword5(wchar_t *p1)
  517. {
  518.     switch (*p1)
  519.     {
  520.         case U_b:
  521.             if (p1[1] == U_r && p1[2] == U_e &&
  522.                 p1[3] == U_a && p1[4] == U_k)
  523.                 return TK_break;
  524.             break;
  525.         case U_c:
  526.             if (p1[1] == U_a && p1[2] == U_t &&
  527.                 p1[3] == U_c && p1[4] == U_h)
  528.                 return TK_catch;
  529.             else if (p1[1] == U_l && p1[2] == U_a &&
  530.                      p1[3] == U_s && p1[4] == U_s)
  531.                 return TK_class;
  532.             else if (p1[1] == U_o && p1[2] == U_n &&
  533.                      p1[3] == U_s && p1[4] == U_t)
  534.                 return TK_const;
  535.             break;
  536.         case U_f:
  537.             if (p1[1] == U_a && p1[2] == U_l &&
  538.                 p1[3] == U_s && p1[4] == U_e)
  539.                 return TK_false;
  540.             else if (p1[1] == U_i && p1[2] == U_n &&
  541.                      p1[3] == U_a && p1[4] == U_l)
  542.                 return TK_final;
  543.             else if (p1[1] == U_l && p1[2] == U_o &&
  544.                      p1[3] == U_a && p1[4] == U_t)
  545.                 return TK_float;
  546.             break;
  547.         case U_s:
  548.             if (p1[1] == U_h && p1[2] == U_o &&
  549.                 p1[3] == U_r && p1[4] == U_t)
  550.                 return TK_short;
  551.             else if (p1[1] == U_u && p1[2] == U_p &&
  552.                      p1[3] == U_e && p1[4] == U_r)
  553.                 return TK_super;
  554.             break;
  555.         case U_t:
  556.             if (p1[1] == U_h && p1[2] == U_r &&
  557.                 p1[3] == U_o && p1[4] == U_w)
  558.                 return TK_throw;
  559.             break;
  560.         case U_w:
  561.             if (p1[1] == U_h && p1[2] == U_i &&
  562.                 p1[3] == U_l && p1[4] == U_e)
  563.                 return TK_while;
  564.             break;
  565.     }
  566.  
  567.     return TK_Identifier;
  568. }
  569.  
  570. int Scanner::ScanKeyword6(wchar_t *p1)
  571. {
  572.     switch (*p1)
  573.     {
  574.         case U_d:
  575.             if (p1[1] == U_o && p1[2] == U_u &&
  576.                      p1[3] == U_b && p1[4] == U_l && p1[5] == U_e)
  577.                 return TK_double;
  578.             break;
  579.         case U_i:
  580.             if (p1[1] == U_m && p1[2] == U_p &&
  581.                 p1[3] == U_o && p1[4] == U_r && p1[5] == U_t)
  582.                 return TK_import;
  583.             break;
  584.         case U_n:
  585.             if (p1[1] == U_a && p1[2] == U_t &&
  586.                 p1[3] == U_i && p1[4] == U_v && p1[5] == U_e)
  587.                 return TK_native;
  588.             break;
  589.         case U_p:
  590.             if (p1[1] == U_u && p1[2] == U_b &&
  591.                 p1[3] == U_l && p1[4] == U_i && p1[5] == U_c)
  592.                 return TK_public;
  593.             break;
  594.         case U_r:
  595.             if (p1[1] == U_e && p1[2] == U_t &&
  596.                 p1[3] == U_u && p1[4] == U_r && p1[5] == U_n)
  597.                 return TK_return;
  598.             break;
  599.         case U_s:
  600.             if (p1[1] == U_t && p1[2] == U_a &&
  601.                 p1[3] == U_t && p1[4] == U_i && p1[5] == U_c)
  602.                     return TK_static;
  603.             else if (p1[1] == U_w && p1[2] == U_i &&
  604.                      p1[3] == U_t && p1[4] == U_c && p1[5] == U_h)
  605.                 return TK_switch;
  606.             break;
  607.         case U_t:
  608.             if (p1[1] == U_h && p1[2] == U_r &&
  609.                 p1[3] == U_o && p1[4] == U_w && p1[5] == U_s)
  610.                 return TK_throws;
  611.             break;
  612.     }
  613.  
  614.     return TK_Identifier;
  615. }
  616.  
  617. int Scanner::ScanKeyword7(wchar_t *p1)
  618. {
  619.     switch(*p1)
  620.     {
  621.         case U_b:
  622.             if (p1[1] == U_o && p1[2] == U_o && p1[3] == U_l &&
  623.                 p1[4] == U_e && p1[5] == U_a && p1[6] == U_n)
  624.                 return TK_boolean;
  625.         case U_d:
  626.             if (p1[1] == U_e && p1[2] == U_f && p1[3] == U_a &&
  627.                 p1[4] == U_u && p1[5] == U_l && p1[6] == U_t)
  628.                 return TK_default;
  629.             break;
  630.         case U_e:
  631.             if (p1[1] == U_x && p1[2] == U_t && p1[3] == U_e &&
  632.                 p1[4] == U_n && p1[5] == U_d && p1[6] == U_s)
  633.                 return TK_extends;
  634.             break;
  635.         case U_f:
  636.             if (p1[1] == U_i && p1[2] == U_n && p1[3] == U_a &&
  637.                 p1[4] == U_l && p1[5] == U_l && p1[6] == U_y)
  638.                 return TK_finally;
  639.             break;
  640.         case U_p:
  641.             if (p1[1] == U_a && p1[2] == U_c && p1[3] == U_k &&
  642.                 p1[4] == U_a && p1[5] == U_g && p1[6] == U_e)
  643.                 return TK_package;
  644.             else if (p1[1] == U_r && p1[2] == U_i && p1[3] == U_v &&
  645.                      p1[4] == U_a && p1[5] == U_t && p1[6] == U_e)
  646.                 return TK_private;
  647.             break;
  648.     }
  649.  
  650.     return TK_Identifier;
  651. }
  652.  
  653. int Scanner::ScanKeyword8(wchar_t *p1)
  654. {
  655.     switch(*p1)
  656.     {
  657.         case U_a:
  658.             if (p1[1] == U_b && p1[2] == U_s &&
  659.                 p1[3] == U_t && p1[4] == U_r &&
  660.                 p1[5] == U_a && p1[6] == U_c && p1[7] == U_t)
  661.                  return TK_abstract;
  662.             break;
  663.         case U_c:
  664.             if (p1[1] == U_o && p1[2] == U_n &&
  665.                 p1[3] == U_t && p1[4] == U_i &&
  666.                 p1[5] == U_n && p1[6] == U_u && p1[7] == U_e)
  667.                  return TK_continue;
  668.             break;
  669.         case U_s:
  670.             if (p1[1] == U_t && p1[2] == U_r &&
  671.                 p1[3] == U_i && p1[4] == U_c &&
  672.                 p1[5] == U_t && p1[6] == U_f && p1[7] == U_p)
  673.                  return TK_strictfp;
  674.             break;
  675.         case U_v:
  676.             if (p1[1] == U_o && p1[2] == U_l &&
  677.                 p1[3] == U_a && p1[4] == U_t &&
  678.                 p1[5] == U_i && p1[6] == U_l && p1[7] == U_e)
  679.                  return TK_volatile;
  680.             break;
  681.     }
  682.  
  683.     return TK_Identifier;
  684. }
  685.  
  686. int Scanner::ScanKeyword9(wchar_t *p1)
  687. {
  688.     if (p1[0] == U_i && p1[1] == U_n && p1[2] == U_t &&
  689.         p1[3] == U_e && p1[4] == U_r && p1[5] == U_f &&
  690.         p1[6] == U_a && p1[7] == U_c && p1[8] == U_e)
  691.         return TK_interface;
  692.     else if (p1[0] == U_p && p1[1] == U_r && p1[2] == U_o &&
  693.              p1[3] == U_t && p1[4] == U_e && p1[5] == U_c &&
  694.              p1[6] == U_t && p1[7] == U_e && p1[8] == U_d)
  695.         return TK_protected;
  696.     else if (p1[0] == U_t && p1[1] == U_r && p1[2] == U_a &&
  697.              p1[3] == U_n && p1[4] == U_s && p1[5] == U_i &&
  698.              p1[6] == U_e && p1[7] == U_n && p1[8] == U_t)
  699.         return TK_transient;
  700.  
  701.     return TK_Identifier;
  702. }
  703.  
  704. int Scanner::ScanKeyword10(wchar_t *p1)
  705. {
  706.     if (p1[0] == U_i && p1[1] == U_m && p1[2] == U_p &&
  707.         p1[3] == U_l && p1[4] == U_e && p1[5] == U_m &&
  708.         p1[6] == U_e && p1[7] == U_n && p1[8] == U_t && p1[9] == U_s)
  709.         return TK_implements;
  710.     else if (p1[0] == U_i && p1[1] == U_n && p1[2] == U_s &&
  711.              p1[3] == U_t && p1[4] == U_a && p1[5] == U_n &&
  712.              p1[6] == U_c && p1[7] == U_e && p1[8] == U_o && p1[9] == U_f)
  713.         return TK_instanceof;
  714.  
  715.     return TK_Identifier;
  716. }
  717.  
  718. int Scanner::ScanKeyword12(wchar_t *p1)
  719. {
  720.     if (p1[0] == U_s && p1[1] == U_y && p1[2] == U_n &&
  721.         p1[3] == U_c && p1[4] == U_h && p1[5] == U_r &&
  722.         p1[6] == U_o && p1[7] == U_n && p1[8] == U_i &&
  723.         p1[9] == U_z && p1[10] == U_e&& p1[11] == U_d)
  724.         return TK_synchronized;
  725.  
  726.     return TK_Identifier;
  727. }
  728.  
  729. /**********************************************************************/
  730. /*                           CHECK_OctalLiteral:                      */
  731. /**********************************************************************/
  732. /* Verify that an octal token is legal. If not, issue a message.      */
  733. /**********************************************************************/
  734. inline void Scanner::CheckOctalLiteral(wchar_t *cursor, wchar_t *tail)
  735. {
  736.     if (cursor[0] == U_0 && cursor[1] != U_x && cursor[1] != U_X)
  737.     {
  738.         wchar_t *p;
  739.         for (p = cursor + 1; p < tail; p++)
  740.         {
  741.             if (*p == U_8 || *p == U_9)
  742.                 break;
  743.         }
  744.  
  745.         if (p < tail)
  746.             lex -> bad_tokens.Next().Initialize(StreamError::BAD_OCTAL_CONSTANT,
  747.                                                 (unsigned) (cursor - lex -> InputBuffer()),
  748.                                                 (unsigned) (tail - lex -> InputBuffer()) - 1, lex);
  749.     }
  750.  
  751.     return;
  752. }
  753.  
  754.  
  755. /**********************************************************************/
  756. /*                      ClassifyCharLiteral:                          */
  757. /**********************************************************************/
  758. /* This procedure is invoked to scan a character literal or a large   */
  759. /* character literal. A large character literal is preceded by the    */
  760. /* letter L (capital L). After the character literal has been scanned */
  761. /* and classified, it is entered in the table without its closing     */
  762. /* quote but with the opening quote (preceded by L if it's a large    */
  763. /* character literal).                                                */
  764. /**********************************************************************/
  765. void Scanner::ClassifyCharLiteral()
  766. {
  767.     current_token -> SetKind(TK_CharacterLiteral);
  768.  
  769.     wchar_t *ptr = cursor + 1;
  770.  
  771.     while (*ptr != U_SINGLE_QUOTE && (! Code::IsNewline(*ptr)))
  772.     {
  773.         if (*ptr++ == U_BACKSLASH)   // In any case, skip the character
  774.         {                            // If it was a backslash,
  775.             if (! Code::IsNewline(*ptr)) // if the next char is not eol, skip it.
  776.                 ptr++;
  777.         }
  778.     }
  779.  
  780.     int len = ptr - cursor;
  781.     if (*ptr == U_SINGLE_QUOTE)
  782.     {
  783.         if (len == 1)
  784.             lex -> bad_tokens.Next().Initialize(StreamError::EMPTY_CHARACTER_CONSTANT,
  785.                                                 current_token -> Location(),
  786.                                                 (unsigned) (ptr - lex -> InputBuffer()), lex);
  787.         ptr++;
  788.     }
  789.     else
  790.     {
  791.         if (len == 1) /* Definitely, an isolated quote */
  792.             current_token -> SetKind(0);
  793.         lex -> bad_tokens.Next().Initialize(StreamError::UNTERMINATED_CHARACTER_CONSTANT,
  794.                                             current_token -> Location(),
  795.                                             (unsigned) (ptr - lex -> InputBuffer()) - 1, lex);
  796.     }
  797.  
  798.     current_token -> SetSymbol(control.char_table.FindOrInsertLiteral(cursor, ptr - cursor));
  799.  
  800.     cursor = ptr;
  801.     return;
  802. }
  803.  
  804.  
  805. /**********************************************************************/
  806. /*                     CLASSIFY_STRINGLITERAL:                        */
  807. /**********************************************************************/
  808. /* This procedure is invoked to scan a string literal or a large      */
  809. /* string literal. A large string literal is preceded by the letter   */
  810. /* L (capital L). After the string literal has been scanned and       */
  811. /* classified, it is entered in the table without its closing double  */
  812. /* quote but with the opening quote (preceded by L if it's a large    */
  813. /* string literal).                                                   */
  814. /**********************************************************************/
  815. void Scanner::ClassifyStringLiteral()
  816. {
  817.     current_token -> SetKind(TK_StringLiteral);
  818.  
  819.     wchar_t *ptr = cursor + 1;
  820.  
  821.     while (*ptr != U_DOUBLE_QUOTE && (! Code::IsNewline(*ptr)))
  822.     {
  823.         if (*ptr++ == U_BACKSLASH)   // In any case, skip the character
  824.         {                            // If it was a backslash,
  825.             if (! Code::IsNewline(*ptr)) // if the next char is not eol, skip it.
  826.                 ptr++;
  827.         }
  828.     }
  829.  
  830.     if (*ptr == U_DOUBLE_QUOTE)
  831.         ptr++;
  832.     else
  833.     {
  834.         if ((ptr - cursor) == 1) /* Definitely, an isolated double quote */
  835.             current_token -> SetKind(0);
  836.         lex -> bad_tokens.Next().Initialize(StreamError::UNTERMINATED_STRING_CONSTANT,
  837.                                             current_token -> Location(),
  838.                                             (unsigned) (ptr - lex -> InputBuffer()) - 1, lex);
  839.     }
  840.  
  841.     current_token -> SetSymbol(control.string_table.FindOrInsertLiteral(cursor, ptr - cursor));
  842.  
  843.     cursor = ptr;
  844.     return;
  845. }
  846.  
  847.  
  848. /**********************************************************************/
  849. /*                     CLASSIFYIDORKEYWORD:                        */
  850. /**********************************************************************/
  851. /* This procedure is invoked when CURSOR points to one of the         */
  852. /* following characters:                                              */
  853. /*                                                                    */
  854. /*      'a'                                                           */
  855. /*      'b'                                                           */
  856. /*      'c'                                                           */
  857. /*      'd'                                                           */
  858. /*      'e'                                                           */
  859. /*      'f'                                                           */
  860. /*      'g'                                                           */
  861. /*      'i'                                                           */
  862. /*      'l'                                                           */
  863. /*      'n'                                                           */
  864. /*      'o'                                                           */
  865. /*      'p'                                                           */
  866. /*      'r'                                                           */
  867. /*      's'                                                           */
  868. /*      't'                                                           */
  869. /*      'v'                                                           */
  870. /*      'w'                                                           */
  871. /*                                                                    */
  872. /* It scans the identifier and checks whether or not it is a keyword. */
  873. /*                                                                    */
  874. /* NOTE that the use of that check is a time-optimization that is not */
  875. /* required for correctness.                                          */
  876. /**********************************************************************/
  877. void Scanner::ClassifyIdOrKeyword()
  878. {
  879.     wchar_t *ptr = cursor + 1;
  880.  
  881.     while (Code::IsAlnum(*ptr))
  882.         ptr++;
  883.     int len = ptr - cursor;
  884.  
  885.     current_token -> SetKind(len < 13 ? (scan_keyword[len])(cursor) : TK_Identifier);
  886.     if (current_token -> Kind() == TK_Identifier)
  887.     {
  888.         current_token -> SetSymbol(control.FindOrInsertName(cursor, len));
  889.         for (int i = 0; i < control.option.keyword_map.Length(); i++)
  890.         {
  891.             if (control.option.keyword_map[i].length == len && wcsncmp(cursor, control.option.keyword_map[i].name, len) == 0)
  892.                 current_token -> SetKind(control.option.keyword_map[i].key);
  893.         }
  894.     }
  895.     else if (current_token -> Kind() == TK_class || current_token -> Kind() == TK_interface)
  896.     {
  897.         //
  898.         // This type keyword is not nested. When we encounter an occurrence of the keyword
  899.         // class or interface that is not enclosed in at least one set of braces, we keep track
  900.         // of it by adding it to a list.
  901.         //
  902.         if (brace_stack.Size() == 0)
  903.             lex -> type_index.Next() = current_token_index;
  904.     }
  905.  
  906.     cursor = ptr;
  907.  
  908.     return;
  909. }
  910.  
  911. /**********************************************************************/
  912. /*                             CLASSIFY_ID:                           */
  913. /**********************************************************************/
  914. /* This procedure is invoked when CURSOR points to an alphabetic      */
  915. /* character other than the ones identified above or '$' or '_'.      */
  916. /* A token that starts with one of these letters is an identifier.    */
  917. /**********************************************************************/
  918. void Scanner::ClassifyId()
  919. {
  920.     wchar_t *ptr = cursor + 1;
  921.  
  922.     while (Code::IsAlnum(*ptr))
  923.         ptr++;
  924.  
  925.     int len = ptr - cursor;
  926.  
  927.     current_token -> SetKind(TK_Identifier);
  928.     current_token -> SetSymbol(control.FindOrInsertName(cursor, len));
  929.  
  930.     for (int i = 0; i < control.option.keyword_map.Length(); i++)
  931.     {
  932.         if (control.option.keyword_map[i].length == len && wcsncmp(cursor, control.option.keyword_map[i].name, len) == 0)
  933.             current_token -> SetKind(control.option.keyword_map[i].key);
  934.     }
  935.  
  936.     cursor = ptr;
  937.     return;
  938. }
  939.  
  940.  
  941. /**********************************************************************/
  942. /*                     CLASSIFY_NUMERICLITERAL:                       */
  943. /**********************************************************************/
  944. /* This procedure is invoked when CURSOR points directly to one of    */
  945. /* the characters below or to a '.' followed by one of the characters */
  946. /* below:                                                             */
  947. /*                                                                    */
  948. /*        case '0': case '1': case '2': case '3': case '4':           */
  949. /*        case '5': case '6': case '7': case '8': case '9':           */
  950. /*                                                                    */
  951. /* Such a token is classified as a numeric literal:                   */
  952. /*                                                                    */
  953. /*   TK_LongLiteral, TK_IntegerLiteral,                               */
  954. /*   TK_DOUBLELiteral, TK_FloatingPointLiteral                        */
  955. /**********************************************************************/
  956. void Scanner::ClassifyNumericLiteral()
  957. {
  958.     /******************************************************************/
  959.     /* Scan the initial sequence of digits if any.                    */
  960.     /******************************************************************/
  961.     wchar_t *ptr;
  962.     for (ptr = cursor; Code::IsDigit(*ptr); ptr++)
  963.         ;
  964.  
  965.     /******************************************************************/
  966.     /* We now take an initial crack at classifying the numeric token. */
  967.     /* we have four cases to consider.                                */
  968.     /*                                                                */
  969.     /* 1) If the initial (perhaps an empty) sequence of digits is     */
  970.     /*    followed by a period ('.'), we have a floating-constant.    */
  971.     /*    We scan the sequence of digits (if any) that follows the    */
  972.     /*    period.                                                     */
  973.     /*                                                                */
  974.     /* 2) Otherwise, we hava an integer literal.                      */
  975.     /*                                                                */
  976.     /*    If the initial (can't be empty) sequence of digits start    */
  977.     /*    with "0x" or "0X" we have a hexadecimal constant:           */
  978.     /*    continue scanning all hex-digits that follow the 'x'.       */
  979.     /******************************************************************/
  980.     if (*ptr == U_DOT)
  981.     {
  982.         current_token -> SetKind(TK_DoubleLiteral);
  983.         for (ptr++; Code::IsDigit(*ptr); ptr++)
  984.             ;
  985.     }
  986.     else
  987.     {
  988.         current_token -> SetKind(TK_IntegerLiteral);
  989.         if (*cursor == U_0 && (cursor[1] == U_x || cursor[1] == U_X))
  990.         {
  991.             ptr = cursor + 2;
  992.             if (isxdigit(*ptr))
  993.             {
  994.                 for (ptr++; isxdigit(*ptr); ptr++)
  995.                     ;
  996.             }
  997.             else lex -> bad_tokens.Next().Initialize(StreamError::INVALID_HEX_CONSTANT,
  998.                                                      current_token -> Location(),
  999.                                                      (unsigned) (ptr - lex -> InputBuffer()) - 1, lex);
  1000.         }
  1001.     }
  1002.  
  1003.     /******************************************************************/
  1004.     /* If the initial numeric token is followed by an exponent, then  */
  1005.     /* it is a floating-constant. If that's the case, the literal is  */
  1006.     /* reclassified ant the exponent is scanned.                      */
  1007.     /*                                                                */
  1008.     /* NOTE that as 'E' and 'e' are legitimate hexadecimal digits, we */
  1009.     /* don't have to worry about a hexadecimal constant being used as */
  1010.     /* the prefix of a floating-constant. E.g., 0x123e12 is tokenized */
  1011.     /* as a single hexadecimal digit. The string 0x123e+12 gets       */
  1012.     /* broken down as the hex number 0x123e, the operator '+' and the */
  1013.     /* decimal constant 12.                                           */
  1014.     /******************************************************************/
  1015.     if (*ptr == U_e || *ptr == U_E)
  1016.     {
  1017.         current_token -> SetKind(TK_DoubleLiteral);
  1018.  
  1019.         ptr++; /* Skip the 'e' or 'E' */
  1020.  
  1021.         if (*ptr == U_PLUS || *ptr == U_MINUS)
  1022.             ptr++; /* Skip the '+' or '-' */
  1023.  
  1024.         if (Code::IsDigit(*ptr))
  1025.         {
  1026.             for (ptr++; Code::IsDigit(*ptr); ptr++)
  1027.                 ;
  1028.         }
  1029.         else lex -> bad_tokens.Next().Initialize(StreamError::INVALID_FLOATING_CONSTANT_EXPONENT,
  1030.                                                  current_token -> Location(),
  1031.                                                  (unsigned) (ptr - lex -> InputBuffer()) - 1, lex);
  1032.     }
  1033.  
  1034.     /******************************************************************/
  1035.     /* A numeric constant may be suffixed by a letter that further    */
  1036.     /* qualifies what kind of a constant it is. We check for these    */
  1037.     /* suffixes here.                                                 */
  1038.     /******************************************************************/
  1039.     int len;
  1040.  
  1041.     if (*ptr == U_f || *ptr == U_F)
  1042.     {
  1043.         ptr++;
  1044.         len = ptr - cursor;
  1045.         current_token -> SetSymbol(control.float_table.FindOrInsertLiteral(cursor, len));
  1046.         current_token -> SetKind(TK_FloatingPointLiteral);
  1047.     }
  1048.     else if (*ptr == U_d || *ptr == U_D)
  1049.     {
  1050.         ptr++;
  1051.         len = ptr - cursor;
  1052.         current_token -> SetSymbol(control.double_table.FindOrInsertLiteral(cursor, len));
  1053.         current_token -> SetKind(TK_DoubleLiteral);
  1054.     }
  1055.     else if (current_token -> Kind() == TK_IntegerLiteral)
  1056.     {
  1057.         if (*ptr == U_l || *ptr == U_L)
  1058.         {
  1059.             ptr++; /* Skip the 'l' or 'L' */
  1060.             len = ptr - cursor;
  1061.             current_token -> SetSymbol(control.long_table.FindOrInsertLiteral(cursor, len));
  1062.             current_token -> SetKind(TK_LongLiteral);
  1063.         }
  1064.         else
  1065.         {
  1066.             len = ptr - cursor;
  1067.             current_token -> SetSymbol(control.int_table.FindOrInsertLiteral(cursor, len));
  1068.         }
  1069.  
  1070.         CheckOctalLiteral(cursor, ptr);
  1071.     }
  1072.     else
  1073.     {
  1074.         len = ptr - cursor;
  1075.         current_token -> SetSymbol(control.double_table.FindOrInsertLiteral(cursor, len));
  1076.         current_token -> SetKind(TK_DoubleLiteral);
  1077.     }
  1078.  
  1079.     /******************************************************************/
  1080.     /* We now have scanned the complete token and it has been properly*/
  1081.     /* classified. CURSOR points to its first character in the buffer */
  1082.     /* and PTR points to the character immediately following it. We   */
  1083.     /* insert the name into the name table and if the token is an     */
  1084.     /* octal constant, we check that all the digits in its name are   */
  1085.     /* in the range 0-7.                                              */
  1086.     /******************************************************************/
  1087.  
  1088.     cursor = ptr;
  1089.     return;
  1090. }
  1091.  
  1092.  
  1093. /**********************************************************************/
  1094. /*                         CLASSIFY_COLON:                            */
  1095. /**********************************************************************/
  1096. void Scanner::ClassifyColon()
  1097. {
  1098.     current_token -> SetKind(TK_COLON);
  1099.  
  1100.     cursor++;
  1101.  
  1102.     return;
  1103. }
  1104.  
  1105.  
  1106. /**********************************************************************/
  1107. /*                          CLASSIFY_PLUS:                            */
  1108. /**********************************************************************/
  1109. void Scanner::ClassifyPlus()
  1110. {
  1111.     cursor++;
  1112.  
  1113.     if (*cursor == U_PLUS)
  1114.     {
  1115.         cursor++;
  1116.         current_token -> SetKind(TK_PLUS_PLUS);
  1117.     }
  1118.     else if (*cursor == U_EQUAL)
  1119.     {
  1120.         cursor++;
  1121.         current_token -> SetKind(TK_PLUS_EQUAL);
  1122.     }
  1123.     else current_token -> SetKind(TK_PLUS);
  1124.  
  1125.     return;
  1126. }
  1127.  
  1128.  
  1129. /**********************************************************************/
  1130. /*                         CLASSIFY_MINUS:                            */
  1131. /**********************************************************************/
  1132. void Scanner::ClassifyMinus()
  1133. {
  1134.     cursor++;
  1135.  
  1136.     if (*cursor == U_MINUS)
  1137.     {
  1138.         cursor++;
  1139.         current_token -> SetKind(TK_MINUS_MINUS);
  1140.     }
  1141.     else if (*cursor == U_EQUAL)
  1142.     {
  1143.         cursor++;
  1144.         current_token -> SetKind(TK_MINUS_EQUAL);
  1145.     }
  1146.     else current_token -> SetKind(TK_MINUS);
  1147.  
  1148.     return;
  1149. }
  1150.  
  1151.  
  1152. /**********************************************************************/
  1153. /*                          CLASSIFY_STAR:                            */
  1154. /**********************************************************************/
  1155. void Scanner::ClassifyStar()
  1156. {
  1157.     cursor++;
  1158.  
  1159.     if (*cursor == U_EQUAL)
  1160.     {
  1161.         cursor++;
  1162.         current_token -> SetKind(TK_MULTIPLY_EQUAL);
  1163.     }
  1164.     else current_token -> SetKind(TK_MULTIPLY);
  1165.  
  1166.     return;
  1167. }
  1168.  
  1169.  
  1170. /**********************************************************************/
  1171. /*                         CLASSIFY_SLASH:                            */
  1172. /**********************************************************************/
  1173. void Scanner::ClassifySlash()
  1174. {
  1175.     cursor++;
  1176.  
  1177.     if (*cursor == U_EQUAL)
  1178.     {
  1179.         cursor++;
  1180.         current_token -> SetKind(TK_DIVIDE_EQUAL);
  1181.     }
  1182.     else current_token -> SetKind(TK_DIVIDE);
  1183.  
  1184.     return;
  1185. }
  1186.  
  1187.  
  1188. /**********************************************************************/
  1189. /*                         CLASSIFY_LESS:                             */
  1190. /**********************************************************************/
  1191. void Scanner::ClassifyLess()
  1192. {
  1193.     cursor++;
  1194.  
  1195.     if (*cursor == U_EQUAL)
  1196.     {
  1197.         cursor++;
  1198.         current_token -> SetKind(TK_LESS_EQUAL);
  1199.     }
  1200.     else if (*cursor == U_LESS)
  1201.     {
  1202.         cursor++;
  1203.  
  1204.         if (*cursor == U_EQUAL)
  1205.         {
  1206.             cursor++;
  1207.             current_token -> SetKind(TK_LEFT_SHIFT_EQUAL);
  1208.         }
  1209.         else current_token -> SetKind(TK_LEFT_SHIFT);
  1210.     }
  1211.     else current_token -> SetKind(TK_LESS);
  1212.  
  1213.     return;
  1214. }
  1215.  
  1216.  
  1217. /**********************************************************************/
  1218. /*                        CLASSIFY_GREATER:                           */
  1219. /**********************************************************************/
  1220. void Scanner::ClassifyGreater()
  1221. {
  1222.     cursor++;
  1223.  
  1224.     if (*cursor == U_EQUAL)
  1225.     {
  1226.         cursor++;
  1227.         current_token -> SetKind(TK_GREATER_EQUAL);
  1228.     }
  1229.     else if (*cursor == U_GREATER)
  1230.     {
  1231.         cursor++;
  1232.  
  1233.         if (*cursor == U_EQUAL)
  1234.         {
  1235.             cursor++;
  1236.             current_token -> SetKind(TK_RIGHT_SHIFT_EQUAL);
  1237.         }
  1238.         else if (*cursor == U_GREATER)
  1239.         {
  1240.             cursor++;
  1241.  
  1242.             if (*cursor == U_EQUAL)
  1243.             {
  1244.                 cursor++;
  1245.                 current_token -> SetKind(TK_UNSIGNED_RIGHT_SHIFT_EQUAL);
  1246.             }
  1247.             else current_token -> SetKind(TK_UNSIGNED_RIGHT_SHIFT);
  1248.         }
  1249.         else current_token -> SetKind(TK_RIGHT_SHIFT);
  1250.     }
  1251.     else current_token -> SetKind(TK_GREATER);
  1252.  
  1253.     return;
  1254. }
  1255.  
  1256.  
  1257. /**********************************************************************/
  1258. /*                          CLASSIFY_AND:                             */
  1259. /**********************************************************************/
  1260. void Scanner::ClassifyAnd()
  1261. {
  1262.     cursor++;
  1263.  
  1264.     if (*cursor == U_AMPERSAND)
  1265.     {
  1266.         cursor++;
  1267.         current_token -> SetKind(TK_AND_AND);
  1268.     }
  1269.     else if (*cursor == U_EQUAL)
  1270.     {
  1271.         cursor++;
  1272.         current_token -> SetKind(TK_AND_EQUAL);
  1273.     }
  1274.     else current_token -> SetKind(TK_AND);
  1275.  
  1276.     return;
  1277. }
  1278.  
  1279.  
  1280. /**********************************************************************/
  1281. /*                          CLASSIFY_OR:                              */
  1282. /**********************************************************************/
  1283. void Scanner::ClassifyOr()
  1284. {
  1285.     cursor++;
  1286.  
  1287.     if (*cursor == U_BAR)
  1288.     {
  1289.         cursor++;
  1290.         current_token -> SetKind(TK_OR_OR);
  1291.     }
  1292.     else if (*cursor == U_EQUAL)
  1293.     {
  1294.         cursor++;
  1295.         current_token -> SetKind(TK_OR_EQUAL);
  1296.     }
  1297.     else current_token -> SetKind(TK_OR);
  1298.  
  1299.     return;
  1300. }
  1301.  
  1302.  
  1303. /**********************************************************************/
  1304. /*                          CLASSIFY_XOR:                             */
  1305. /**********************************************************************/
  1306. void Scanner::ClassifyXor()
  1307. {
  1308.     cursor++;
  1309.  
  1310.     if (*cursor == U_EQUAL)
  1311.     {
  1312.         cursor++;
  1313.         current_token -> SetKind(TK_XOR_EQUAL);
  1314.     }
  1315.     else current_token -> SetKind(TK_XOR);
  1316.  
  1317.     return;
  1318. }
  1319.  
  1320.  
  1321. /**********************************************************************/
  1322. /*                          CLASSIFY_NOT:                             */
  1323. /**********************************************************************/
  1324. void Scanner::ClassifyNot()
  1325. {
  1326.     cursor++;
  1327.  
  1328.     if (*cursor == U_EQUAL)
  1329.     {
  1330.         cursor++;
  1331.         current_token -> SetKind(TK_NOT_EQUAL);
  1332.     }
  1333.     else current_token -> SetKind(TK_NOT);
  1334.  
  1335.     return;
  1336. }
  1337.  
  1338.  
  1339. /**********************************************************************/
  1340. /*                         CLASSIFY_EQUAL:                            */
  1341. /**********************************************************************/
  1342. void Scanner::ClassifyEqual()
  1343. {
  1344.     cursor++;
  1345.  
  1346.     if (*cursor == U_EQUAL)
  1347.     {
  1348.         cursor++;
  1349.         current_token -> SetKind(TK_EQUAL_EQUAL);
  1350.     }
  1351.     else current_token -> SetKind(TK_EQUAL);
  1352.  
  1353.     return;
  1354. }
  1355.  
  1356.  
  1357. /**********************************************************************/
  1358. /*                          CLASSIFY_MOD:                             */
  1359. /**********************************************************************/
  1360. void Scanner::ClassifyMod()
  1361. {
  1362.     cursor++;
  1363.  
  1364.     if (*cursor == U_EQUAL)
  1365.     {
  1366.         cursor++;
  1367.         current_token -> SetKind(TK_REMAINDER_EQUAL);
  1368.     }
  1369.     else current_token -> SetKind(TK_REMAINDER);
  1370.  
  1371.     return;
  1372. }
  1373.  
  1374.  
  1375. /**********************************************************************/
  1376. /*                         CLASSIFY_PERIOD:                           */
  1377. /**********************************************************************/
  1378. void Scanner::ClassifyPeriod()
  1379. {
  1380.     if (Code::IsDigit(cursor[1])) // Is period immediately followed by digit?
  1381.         ClassifyNumericLiteral();
  1382.     else
  1383.     {
  1384.         current_token -> SetKind(TK_DOT);
  1385.  
  1386.         cursor++;
  1387.     }
  1388.  
  1389.     return;
  1390. }
  1391.  
  1392.  
  1393. /**********************************************************************/
  1394. /*                         CLASSIFY_SEMICOLON:                        */
  1395. /**********************************************************************/
  1396. void Scanner::ClassifySemicolon()
  1397. {
  1398.     current_token -> SetKind(TK_SEMICOLON);
  1399.  
  1400.     cursor++;
  1401.  
  1402.     return;
  1403. }
  1404.  
  1405.  
  1406. /**********************************************************************/
  1407. /*                           CLASSIFY_COMMA:                          */
  1408. /**********************************************************************/
  1409. void Scanner::ClassifyComma()
  1410. {
  1411.     current_token -> SetKind(TK_COMMA);
  1412.  
  1413.     cursor++;
  1414.  
  1415.     return;
  1416. }
  1417.  
  1418.  
  1419. /**********************************************************************/
  1420. /*                           CLASSIFY_LBRACE:                         */
  1421. /**********************************************************************/
  1422. void Scanner::ClassifyLbrace()
  1423. {
  1424.     //
  1425.     // Instead of setting the symbol for a left brace, we keep track of it.
  1426.     // When we encounter its matching right brace, we use the symbol field
  1427.     // to identify its counterpart.
  1428.     //
  1429.     brace_stack.Push(current_token_index);
  1430.  
  1431.     current_token -> SetKind(TK_LBRACE);
  1432.  
  1433.     cursor++;
  1434.  
  1435.     return;
  1436. }
  1437.  
  1438.  
  1439. /**********************************************************************/
  1440. /*                           CLASSIFY_RBRACE:                         */
  1441. /**********************************************************************/
  1442. void Scanner::ClassifyRbrace()
  1443. {
  1444.     //
  1445.     // When a left brace in encountered, it is pushed into the brace_stack.
  1446.     // When its matching right brace in encountered, we pop the left brace
  1447.     // and make it point to its matching right brace.
  1448.     //
  1449.     LexStream::TokenIndex left_brace = brace_stack.Top();
  1450.     if (left_brace) // This right brace is matched by a left one
  1451.     {
  1452.         lex -> token_stream[left_brace].SetRightBrace(current_token_index);
  1453.         brace_stack.Pop();
  1454.     }
  1455.  
  1456.     current_token -> SetKind(TK_RBRACE);
  1457.  
  1458.     cursor++;
  1459.  
  1460.     return;
  1461. }
  1462.  
  1463.  
  1464. /**********************************************************************/
  1465. /*                           CLASSIFY_LPAREN:                         */
  1466. /**********************************************************************/
  1467. void Scanner::ClassifyLparen()
  1468. {
  1469.     current_token -> SetKind(TK_LPAREN);
  1470.  
  1471.     cursor++;
  1472.  
  1473.     return;
  1474. }
  1475.  
  1476.  
  1477. /**********************************************************************/
  1478. /*                           CLASSIFY_RPAREN:                         */
  1479. /**********************************************************************/
  1480. void Scanner::ClassifyRparen()
  1481. {
  1482.     current_token -> SetKind(TK_RPAREN);
  1483.  
  1484.     cursor++;
  1485.  
  1486.     return;
  1487. }
  1488.  
  1489.  
  1490. /**********************************************************************/
  1491. /*                          CLASSIFY_LBRACKET:                        */
  1492. /**********************************************************************/
  1493. void Scanner::ClassifyLbracket()
  1494. {
  1495.     current_token -> SetKind(TK_LBRACKET);
  1496.  
  1497.     cursor++;
  1498.  
  1499.     return;
  1500. }
  1501.  
  1502.  
  1503. /**********************************************************************/
  1504. /*                          CLASSIFY_RBRACKET:                        */
  1505. /**********************************************************************/
  1506. void Scanner::ClassifyRbracket()
  1507. {
  1508.     current_token -> SetKind(TK_RBRACKET);
  1509.  
  1510.     cursor++;
  1511.  
  1512.     return;
  1513. }
  1514.  
  1515.  
  1516. /**********************************************************************/
  1517. /*                         CLASSIFY_COMPLEMENT:                       */
  1518. /**********************************************************************/
  1519. void Scanner::ClassifyComplement()
  1520. {
  1521.     current_token -> SetKind(TK_TWIDDLE);
  1522.  
  1523.     cursor++;
  1524.  
  1525.     return;
  1526. }
  1527.  
  1528.  
  1529. /**********************************************************************/
  1530. /*                        CLASSIFY_BAD_TOKEN:                         */
  1531. /**********************************************************************/
  1532. void Scanner::ClassifyBadToken()
  1533. {
  1534.     if (++cursor < &lex -> InputBuffer()[lex -> InputBufferLength()]) // not the terminating character?
  1535.     {
  1536.          current_token -> SetKind(0);
  1537.          current_token -> SetSymbol(control.FindOrInsertName(cursor - 1, 1));
  1538.  
  1539.          lex -> bad_tokens.Next().Initialize(StreamError::BAD_TOKEN,
  1540.                                              current_token -> Location(),
  1541.                                              current_token -> Location(), lex);
  1542.     }
  1543.     else
  1544.     {
  1545.         current_token -> SetKind(TK_EOF);
  1546.     }
  1547.  
  1548.     return;
  1549. }
  1550.  
  1551.  
  1552. /**********************************************************************/
  1553. /*                        CLASSIFY_QUESTION:                          */
  1554. /**********************************************************************/
  1555. /**********************************************************************/
  1556. void Scanner::ClassifyQuestion()
  1557. {
  1558.     current_token -> SetKind(TK_QUESTION);
  1559.  
  1560.     cursor++;
  1561.  
  1562.     return;
  1563. }
  1564.  
  1565.  
  1566. /**********************************************************************/
  1567. /*                     CLASSIFY_NONASCIIUNICODE:                      */
  1568. /**********************************************************************/
  1569. void Scanner::ClassifyNonAsciiUnicode()
  1570. {
  1571.     if (Code::IsAlpha(*cursor)) // Some kind of non-ascii unicode letter
  1572.         ClassifyId();
  1573.     else 
  1574.         ClassifyBadToken();
  1575.     return;
  1576. }
  1577.  
  1578. #ifdef    HAVE_JIKES_NAMESPACE
  1579. }            // Close namespace Jikes block
  1580. #endif
  1581.  
  1582.