home *** CD-ROM | disk | FTP | other *** search
- // $Id: stream.cpp,v 1.48 2001/02/20 07:47:03 mdejong Exp $
- //
- // This software is subject to the terms of the IBM Jikes Compiler
- // License Agreement available at the following URL:
- // http://www.ibm.com/research/jikes.
- // Copyright (C) 1996, 1998, International Business Machines Corporation
- // and others. All Rights Reserved.
- // You must accept the terms of that agreement to use this software.
- //
-
- #include "stream.h"
- #include "code.h"
- #include "zip.h"
- #include "symbol.h"
- #include "control.h"
- #include "semantic.h"
-
- #ifdef HAVE_CTYPE_H
- #include <ctype.h>
- #endif
-
- #if defined(HAVE_LIB_ICU_UC)
- #ifdef ICU131
- # include <ucnv.h>
- #else
- # include <unicode/ucnv.h>
- #endif
- #elif defined(HAVE_ICONV_H)
- # include <iconv.h>
- # include <errno.h>
- #endif
-
- #ifdef HAVE_JIKES_NAMESPACE
- namespace Jikes { // Open namespace Jikes block
- #endif
-
- // Class StreamError
-
- JikesError::JikesErrorSeverity StreamError::getSeverity ()
- {
- //All Lexical errors are ERRORs.
- return JikesError::JIKES_ERROR;
- }
-
- int StreamError::getLeftLineNo () { return left_line_no ; }
- int StreamError::getLeftColumnNo () { return left_column_no ; }
- int StreamError::getRightLineNo () { return right_line_no ; }
- int StreamError::getRightColumnNo () { return right_column_no ; }
-
- const char *StreamError::getFileName()
- {
- assert(lex_stream);
- return lex_stream -> FileName();
- }
-
- const wchar_t *StreamError::getErrorMessage()
- {
- switch(kind)
- {
- case StreamError::BAD_TOKEN:
- return L"Illegal token\n";
- break;
- case StreamError::BAD_OCTAL_CONSTANT:
- return L"Octal constant contains invalid digit\n";
- break;
- case StreamError::EMPTY_CHARACTER_CONSTANT:
- return L"Empty character constant\n";
- break;
- case StreamError::UNTERMINATED_CHARACTER_CONSTANT:
- return L"Character constant not properly terminated\n";
- break;
- case StreamError::UNTERMINATED_COMMENT:
- return L"Comment not properly terminated\n";
- break;
- case StreamError::UNTERMINATED_STRING_CONSTANT:
- return L"String constant not properly terminated\n";
- break;
- case StreamError::INVALID_HEX_CONSTANT:
- return L"The prefix 0x must be followed by at least one hex digit\n";
- break;
- case StreamError::INVALID_FLOATING_CONSTANT_EXPONENT:
- return L"floating-constant exponent has no digit\n";
- break;
- case StreamError::INVALID_UNICODE_ESCAPE:
- return L"Invalid unicode escape character\n";
- break;
- default:
- assert(false);
- }
-
- return L"Unknown Error\n";
- }
-
- bool StreamError::emacs_style_report=false;
-
- const wchar_t *StreamError::getErrorReport()
- {
- /*
- * We need to use this lazy initialization,
- * because we can't to it in Initialize() method. Reason
- * is that Find* methods are unusalble until
- * LexStream::CompressSpace is called, which
- * is not happend until later after scanning is done
- * and all errors are reported.
- * (lord)
- */
- if(!initialized)
- {
- left_line_no = lex_stream->FindLine ( start_location );
- left_column_no = lex_stream->FindColumn ( start_location );
- right_line_no = lex_stream->FindLine ( end_location );
- right_column_no = lex_stream->FindColumn ( end_location );
- initialized = true;
- }
-
- return emacs_style_report?emacsErrorString():regularErrorString();
- }
-
- wchar_t *StreamError::emacsErrorString()
- {
- ErrorString s;
-
- s << getFileName()
- << ':' << left_line_no << ':' << left_column_no
- << ':' << right_line_no << ':' << right_column_no
- << ": Lexical: " << getErrorMessage();
-
- return s.Array();
- }
-
-
- wchar_t *StreamError::regularErrorString()
- {
- ErrorString s;
-
- assert(lex_stream);
- if(left_line_no == right_line_no)
- PrintSmallSource(s);
- else
- PrintLargeSource(s);
-
- s << "\n*** Lexical Error: "
- << getErrorMessage();
-
- return s.Array();
- }
-
- //
- // This procedure is invoked to print a small message that may
- // only span a single line. The parameter k points to the error
- // message in the error structure.
- //
- void StreamError::PrintSmallSource(ErrorString &s)
- {
- s << "\n\n";
- s.width(6);
- s << left_line_no;
- s << ". ";
- for (int i = lex_stream->LineStart(left_line_no); i <= lex_stream->LineEnd(left_line_no); i++)
- s << lex_stream->InputBuffer()[i];
-
- s.width(left_column_no + 7);
- s << "";
- if (left_column_no == right_column_no)
- s << '^';
- else
- {
- int offset = 0;
- for (size_t i = start_location; i <= end_location; i++)
- {
- if (lex_stream->InputBuffer()[i] > 0xff)
- offset += 5;
- }
-
- s << '<';
- s.width(right_column_no - left_column_no + offset);
- s.fill('-');
- s << ">";
- s.fill(' ');
- }
- }
-
-
- //
- // This procedure is invoked to print a large message that may
- // span more than one line. The parameter message points to the
- // starting line. The parameter k points to the error message in
- // the error structure.
- //
- void StreamError::PrintLargeSource(ErrorString &s)
- {
- if (left_line_no == right_line_no)
- {
- if (left_line_no == 0)
- s << "\n";
- else
- {
- s << "\n\n";
- s.width(6);
- s << left_line_no << ". ";
- for (int i = lex_stream -> LineStart(left_line_no); i <= lex_stream -> LineEnd(left_line_no); i++)
- s << lex_stream -> InputBuffer()[i];
-
- int offset = 0;
- for (size_t j = start_location; j <= end_location; j++)
- {
- if (lex_stream -> InputBuffer()[j] > 0xff)
- offset += 5;
- }
-
- s.width(left_column_no + 8);
- s << "<";
- s.width(right_column_no - left_column_no + offset);
- s.fill('-');
- s << ">";
- s.fill(' ');
- }
- }
- else
- {
- s << "\n\n";
- s.width(left_column_no + 8);
- s << "<";
-
- int segment_size = Tab::Wcslen(lex_stream->input_buffer, start_location,
- lex_stream->LineEnd(lex_stream->FindLine(start_location)));
- s.width(segment_size - 1);
- s.fill('-');
- s << "\n";
- s.fill(' ');
-
- s.width(6);
- s << left_line_no << ". ";
- for (int i = lex_stream -> LineStart(left_line_no); i <= lex_stream -> LineEnd(left_line_no); i++)
- s << lex_stream -> InputBuffer()[i];
-
- if (right_line_no > left_line_no + 1)
- {
- s.width(left_column_no + 7);
- s << " ";
- s << ". . .\n";
- }
-
- s.width(6);
- s << right_line_no << ". ";
-
- int offset = 0;
- for (int j = lex_stream -> LineStart(right_line_no); j <= lex_stream -> LineEnd(right_line_no); j++)
- {
- wchar_t c = lex_stream -> InputBuffer()[j];
- if (c > 0xff)
- offset += 5;
- s << c;
- }
-
- s.width(8);
- s << "";
- s.width(right_column_no - 1 + offset);
- s.fill('-');
- s << ">";
- s.fill(' ');
- }
- }
-
- void StreamError::Initialize(StreamErrorKind kind_, unsigned start_location_, unsigned end_location_, LexStream *l)
- {
- kind = kind_ ;
- start_location = start_location_ ;
- end_location = end_location_ ;
- lex_stream = l ;
- }
-
- StreamError::StreamError():initialized(false)
- {
- }
-
-
- // Class Stream
-
- Stream::Stream()
- : input_buffer(NULL),
- input_buffer_length(0)
- #if defined(HAVE_LIB_ICU_UC)
- ,_converter(NULL)
- #elif defined(HAVE_ICONV_H)
- ,_converter((iconv_t)-1)
- #endif
- {
- }
-
- Stream::~Stream()
- {
- DestroyInput();
- #if defined(HAVE_LIB_ICU_UC) || defined(HAVE_ICONV_H)
- DestroyEncoding();
- #endif
- }
-
- #if defined(HAVE_LIB_ICU_UC) || defined(HAVE_ICONV_H)
-
- // This method will return true is the given encoding
- // can be supported, it is static because we need to
- // be able to query encodings without an instance.
-
- bool Stream::IsSupportedEncoding(char* encoding)
- {
- bool supported;
- // Create a tmp object instead of duplicating
- // the code in SetEncoding and DestroyEncoding
- Stream* tmp = new Stream();
- supported = tmp->SetEncoding(encoding);
- delete tmp;
- return supported;
- }
-
- bool Stream::SetEncoding(char* encoding)
- {
- assert(encoding);
- DestroyEncoding();
-
- #if defined(HAVE_LIB_ICU_UC)
- UErrorCode err = U_ZERO_ERROR;
-
- _converter = ucnv_open(encoding, &err);
- if (!_converter)
- {
- return false;
- }
- else {
- return true;
- }
- #elif defined(HAVE_ICONV_H)
- _converter = iconv_open("utf-16", encoding);
- if (_converter == (iconv_t)-1)
- {
- return false;
- }
- else
- {
- return true;
- }
- #endif
- }
-
- void Stream::DestroyEncoding()
- {
- #if defined(HAVE_LIB_ICU_UC)
- if (_converter)
- {
- ucnv_close(_converter);
- _converter = NULL;
- }
- #elif defined(HAVE_ICONV_H)
- if (_converter != (iconv_t)-1)
- {
- iconv_close(_converter);
- _converter = (iconv_t)-1;
- }
- #endif
- }
-
- #endif // defined(HAVE_LIB_ICU_UC) || defined(HAVE_ICONV_H)
-
-
- // Class LexStream
-
- LexStream::LexStream(Control &control_, FileSymbol *file_symbol_) : file_symbol(file_symbol_),
- #ifdef JIKES_DEBUG
- file_read(0),
- #endif
- tokens(NULL),
- columns(NULL),
- token_stream(12, 16),
- comments(NULL),
- comment_stream(10, 8),
- locations(NULL),
- line_location(12, 8),
- initial_reading_of_input(true),
- comment_buffer(NULL),
- control(control_)
- {
- StreamError::emacs_style_report=!control_.option.errors;
- }
-
- wchar_t *LexStream::KeywordName(int kind)
- {
- switch(kind)
- {
- case TK_abstract: return StringConstant::US_abstract; break;
- case TK_boolean: return StringConstant::US_boolean; break;
- case TK_break: return StringConstant::US_break; break;
- case TK_byte: return StringConstant::US_byte; break;
- case TK_case: return StringConstant::US_case; break;
- case TK_catch: return StringConstant::US_catch; break;
- case TK_char: return StringConstant::US_char; break;
- case TK_class: return StringConstant::US_class; break;
- case TK_const: return StringConstant::US_const; break;
- case TK_continue: return StringConstant::US_continue; break;
- case TK_default: return StringConstant::US_default; break;
- case TK_do: return StringConstant::US_do; break;
- case TK_double: return StringConstant::US_double; break;
- case TK_else: return StringConstant::US_else; break;
- case TK_extends: return StringConstant::US_extends; break;
- case TK_false: return StringConstant::US_false; break;
- case TK_final: return StringConstant::US_final; break;
- case TK_finally: return StringConstant::US_finally; break;
- case TK_float: return StringConstant::US_float; break;
- case TK_for: return StringConstant::US_for; break;
- case TK_goto: return StringConstant::US_goto; break;
- case TK_if: return StringConstant::US_if; break;
- case TK_implements: return StringConstant::US_implements; break;
- case TK_import: return StringConstant::US_import; break;
- case TK_instanceof: return StringConstant::US_instanceof; break;
- case TK_int: return StringConstant::US_int; break;
- case TK_interface: return StringConstant::US_interface; break;
- case TK_long: return StringConstant::US_long; break;
- case TK_native: return StringConstant::US_native; break;
- case TK_new: return StringConstant::US_new; break;
- case TK_null: return StringConstant::US_null; break;
- case TK_package: return StringConstant::US_package; break;
- case TK_private: return StringConstant::US_private; break;
- case TK_protected: return StringConstant::US_protected; break;
- case TK_public: return StringConstant::US_public; break;
- case TK_return: return StringConstant::US_return; break;
- case TK_short: return StringConstant::US_short; break;
- case TK_static: return StringConstant::US_static; break;
- case TK_strictfp: return StringConstant::US_strictfp; break;
- case TK_super: return StringConstant::US_super; break;
- case TK_switch: return StringConstant::US_switch; break;
- case TK_synchronized: return StringConstant::US_synchronized; break;
- case TK_this: return StringConstant::US_this; break;
- case TK_throw: return StringConstant::US_throw; break;
- case TK_throws: return StringConstant::US_throws; break;
- case TK_transient: return StringConstant::US_transient; break;
- case TK_true: return StringConstant::US_true; break;
- case TK_try: return StringConstant::US_try; break;
- case TK_void: return StringConstant::US_void; break;
- case TK_volatile: return StringConstant::US_volatile; break;
- case TK_while: return StringConstant::US_while; break;
-
- case TK_PLUS_PLUS: return StringConstant::US_PLUS_PLUS; break;
- case TK_MINUS_MINUS: return StringConstant::US_MINUS_MINUS; break;
- case TK_EQUAL_EQUAL: return StringConstant::US_EQUAL_EQUAL; break;
- case TK_LESS_EQUAL: return StringConstant::US_LESS_EQUAL; break;
- case TK_GREATER_EQUAL: return StringConstant::US_GREATER_EQUAL; break;
- case TK_NOT_EQUAL: return StringConstant::US_NOT_EQUAL; break;
- case TK_LEFT_SHIFT: return StringConstant::US_LEFT_SHIFT; break;
- case TK_RIGHT_SHIFT: return StringConstant::US_RIGHT_SHIFT; break;
- case TK_UNSIGNED_RIGHT_SHIFT: return StringConstant::US_UNSIGNED_RIGHT_SHIFT; break;
- case TK_PLUS_EQUAL: return StringConstant::US_PLUS_EQUAL; break;
- case TK_MINUS_EQUAL: return StringConstant::US_MINUS_EQUAL; break;
- case TK_MULTIPLY_EQUAL: return StringConstant::US_MULTIPLY_EQUAL; break;
- case TK_DIVIDE_EQUAL: return StringConstant::US_DIVIDE_EQUAL; break;
- case TK_AND_EQUAL: return StringConstant::US_AND_EQUAL; break;
- case TK_OR_EQUAL: return StringConstant::US_OR_EQUAL; break;
- case TK_XOR_EQUAL: return StringConstant::US_XOR_EQUAL; break;
- case TK_REMAINDER_EQUAL: return StringConstant::US_REMAINDER_EQUAL; break;
- case TK_LEFT_SHIFT_EQUAL: return StringConstant::US_LEFT_SHIFT_EQUAL; break;
- case TK_RIGHT_SHIFT_EQUAL: return StringConstant::US_RIGHT_SHIFT_EQUAL; break;
- case TK_UNSIGNED_RIGHT_SHIFT_EQUAL: return StringConstant::US_UNSIGNED_RIGHT_SHIFT_EQUAL; break;
- case TK_OR_OR: return StringConstant::US_OR_OR; break;
- case TK_AND_AND: return StringConstant::US_AND_AND; break;
-
- case TK_PLUS: return StringConstant::US_PLUS; break;
- case TK_MINUS: return StringConstant::US_MINUS; break;
- case TK_NOT: return StringConstant::US_NOT; break;
- case TK_REMAINDER: return StringConstant::US_REMAINDER; break;
- case TK_XOR: return StringConstant::US_XOR; break;
- case TK_AND: return StringConstant::US_AND; break;
- case TK_MULTIPLY: return StringConstant::US_MULTIPLY; break;
- case TK_OR: return StringConstant::US_OR; break;
- case TK_TWIDDLE: return StringConstant::US_TWIDDLE; break;
- case TK_DIVIDE: return StringConstant::US_DIVIDE; break;
- case TK_GREATER: return StringConstant::US_GREATER; break;
- case TK_LESS: return StringConstant::US_LESS; break;
- case TK_LPAREN: return StringConstant::US_LPAREN; break;
- case TK_RPAREN: return StringConstant::US_RPAREN; break;
- case TK_LBRACE: return StringConstant::US_LBRACE; break;
- case TK_RBRACE: return StringConstant::US_RBRACE; break;
- case TK_LBRACKET: return StringConstant::US_LBRACKET; break;
- case TK_RBRACKET: return StringConstant::US_RBRACKET; break;
- case TK_SEMICOLON: return StringConstant::US_SEMICOLON; break;
- case TK_QUESTION: return StringConstant::US_QUESTION; break;
- case TK_COLON: return StringConstant::US_COLON; break;
- case TK_COMMA: return StringConstant::US_COMMA; break;
- case TK_DOT: return StringConstant::US_DOT; break;
- case TK_EQUAL: return StringConstant::US_EQUAL; break;
- case TK_EOF: return StringConstant::US_EOF; break;
- default: break;
- }
-
- return StringConstant::US_EMPTY;
- }
-
-
- LexStream::~LexStream()
- {
- #ifdef JIKES_DEBUG
- control.line_count += (file_read * (line_location.Length() - 3));
- #endif
-
- DestroyInput();
-
- delete [] columns;
- columns = NULL;
- }
-
-
- //
- //
- //
- class LiteralSymbol *LexStream::LiteralSymbol(TokenIndex i)
- {
- Symbol *symbol = tokens[i].additional_info.symbol;
- return (symbol && (Kind(i) != TK_LBRACE) ?
- symbol -> LiteralCast() :
- (class LiteralSymbol *) NULL);
- }
-
-
- //
- //
- //
- class NameSymbol *LexStream::NameSymbol(TokenIndex i)
- {
- Symbol *symbol = tokens[i].additional_info.symbol;
- return (symbol && (Kind(i) != TK_LBRACE) ?
- symbol -> NameCast() :
- (class NameSymbol *) NULL);
- }
-
-
- //
- // Name of input file where the token appeared.
- //
- char *LexStream::FileName() { return file_symbol -> FileName(); }
- size_t LexStream::FileNameLength() { return file_symbol -> FileNameLength(); }
-
-
- void LexStream::InitializeColumns()
- {
- if (! columns)
- {
- columns = new unsigned short[token_stream.Length()];
-
- int start = 0,
- k = 1;
-
- for (size_t i = 0; i < input_buffer_length; i++)
- {
- if (Code::IsNewline(input_buffer[i]))
- start = i;
- else
- {
- if (input_buffer[i] == U_HORIZONTAL_TAB)
- {
- int offset = (i - start) - 1;
- start -= ((Tab::TabSize() - 1) - offset % Tab::TabSize());
- }
- else if (tokens[k].Location() == i)
- {
- int col = i - start;
- columns[k++] = (col < USHRT_MAX ? col : 0);
- }
- }
- }
- }
-
- return;
- }
-
-
- //
- //
- //
- void LexStream::CompressSpace()
- {
- tokens = token_stream .Array();
- comments = comment_stream .Array();
- locations = line_location .Array();
- types = type_index .Array();
-
- if(control.option.dump_errors)
- InitializeColumns();
-
- return;
- }
-
-
- //
- // Find and return the index of the first comment that immediately
- // follows tok. Return 0 if there is not a comment that immediately
- // follows tok.
- //
- LexStream::CommentIndex LexStream::FirstComment(TokenIndex tok)
- {
- unsigned location = Location(tok);
- int lo = 0,
- hi = comment_stream.Length() - 1,
- i = 0;
-
- if (lo < hi)
- {
- do
- {
- int mid = (lo + hi) / 2;
-
- if (comment_stream[mid].location < location)
- lo = mid + 1;
- else hi = mid - 1;
- } while (lo < hi);
-
- //
- // at this stage lo == hi
- //
- i = (comment_stream[lo].location > location ? lo : lo + 1);
- }
-
- return (i < comment_stream.Length() && comment_stream[i].previous_token == tok ? i : 0);
- }
-
-
- unsigned LexStream::FindLine(unsigned location)
- {
- int lo = 0,
- hi = line_location.Length() - 1;
-
- assert(locations);
-
- //
- // we can place the exit test at the bottom of the loop
- // since the line_location array will always contain at least
- // one element.
- //
- do
- {
- int mid = (lo + hi) / 2;
-
- if (locations[mid] == location)
- return mid;
- if (locations[mid] < location)
- lo = mid + 1;
- else hi = mid - 1;
- } while (lo < hi);
-
- return (locations[lo] > location ? lo - 1 : lo);
- }
-
-
- void LexStream::ReadInput()
- {
- if (file_symbol -> buffer)
- {
- ProcessInput(file_symbol -> buffer, strlen(file_symbol -> buffer));
- }
- else if (file_symbol -> IsZip()) {
- ZipFile *zipfile = new ZipFile(file_symbol);
-
- if (zipfile -> Buffer() == NULL)
- {
- fprintf(stderr, "chaos: Don\'t know how to process compressed (\".java\") source in a zip file\n");
- assert(false);
- }
- else if (! file_symbol -> lex_stream) // Once the zip file is loaded, it never changes. So, we only read it the first time
- {
- file_symbol -> lex_stream = this;
- ProcessInput(zipfile -> Buffer(), file_symbol -> uncompressed_size);
- }
- delete zipfile;
- }
- else
- {
- struct stat status;
- JikesAPI::getInstance()->stat(FileName(), &status);
-
- file_symbol -> mtime = status.st_mtime; // actual time stamp of file read
- file_symbol -> lex_stream = this;
-
-
- JikesAPI::FileReader *file = JikesAPI::getInstance()->read(FileName());
- if (file)
- {
- ProcessInput(file->getBuffer(),file->getBufferSize());
- delete file;
- }
- }
-
- initial_reading_of_input = false;
-
- return;
- }
-
- void LexStream::RereadInput()
- {
- if (input_buffer) // if input already available, do nothing
- ;
- #ifdef JIKES_DEBUG
- else if (file_symbol -> buffer)
- {
- fprintf(stderr, "chaos: Don\'t know how to RereadInput a buffer\n");
- assert(false);
- }
- #endif
- else if (file_symbol -> IsZip())
- {
- ZipFile *zipfile = new ZipFile(file_symbol);
-
- if (zipfile -> Buffer() == NULL)
- {
- fprintf(stderr, "chaos: Don\'t know how to process compressed (\".java\") source in a zip file\n");
- assert(false);
- }
- else ProcessInput(zipfile -> Buffer(), file_symbol -> uncompressed_size);
- delete zipfile;
- }
- else
- {
- struct stat status;
- JikesAPI::getInstance()->stat(FileName(), &status);
-
- if (status.st_mtime == file_symbol -> mtime)
- {
- JikesAPI::FileReader *file = JikesAPI::getInstance()->read(FileName());
- if (file)
- {
- ProcessInput(file->getBuffer(),file->getBufferSize());
- delete file;
- }
- }
- else
- {
- // TODO: File has changed !!!
- }
- }
-
- return;
- }
-
-
- int LexStream::hexvalue(wchar_t ch)
- {
- switch(ch)
- {
- case U_a: case U_A:
- return 10;
- case U_b: case U_B:
- return 11;
- case U_c: case U_C:
- return 12;
- case U_d: case U_D:
- return 13;
- case U_e: case U_E:
- return 14;
- case U_f: case U_F:
- return 15;
- default:
- return ch - U_0;
- }
- }
-
- //
- // Read filesize characters from srcfile, convert them to unicode, and
- // store them in input_buffer.
- //
- void LexStream::ProcessInput(const char *buffer, long filesize)
- {
- #if defined(HAVE_LIB_ICU_UC) || defined(HAVE_ICONV_H)
- LexStream::ProcessInputUnicode(buffer,filesize);
- #else
- LexStream::ProcessInputAscii(buffer, filesize);
- #endif
- }
-
- //
- // Read file_size Ascii characters from srcfile, convert them to unicode and
- // store them in input_buffer.
- //
- void LexStream::ProcessInputAscii(const char *buffer, long filesize)
- {
- #ifdef JIKES_DEBUG
- file_read++;
- #endif
-
- wchar_t *input_ptr = AllocateInputBuffer( filesize );
- *input_ptr = U_LINE_FEED; // add an initial '\n';
-
- if (buffer)
- {
- const char *source_ptr = buffer,
- *source_tail = &(buffer[filesize - 1]); // point to last character read from the file.
-
- while(source_ptr <= source_tail)
- {
- *(++input_ptr) = (*source_ptr++) & 0x00ff; // The (& 0x00ff) guarantees that quantity is copied as unsigned value
-
- if (*input_ptr == U_CARRIAGE_RETURN)
- {
- *input_ptr = U_LINE_FEED;
- if (*source_ptr == U_LINE_FEED)
- source_ptr++;
- }
- else if (*input_ptr == U_BACKSLASH)
- {
- if (*source_ptr == U_BACKSLASH)
- *(++input_ptr) = *source_ptr++;
- else if (*source_ptr == U_u)
- {
- const char *u_ptr = source_ptr;
-
- for (source_ptr++; source_ptr <= source_tail && *source_ptr == U_u; source_ptr++)
- ;
- *input_ptr = 0;
- int i;
- for (i = 0; source_ptr <= source_tail && isxdigit(*source_ptr) && i < 4; i++)
- {
- int multiplier[4] = {4096, 256, 16, 1};
-
- const char ch = *source_ptr++;
- switch(ch)
- {
- case U_a: case U_A:
- *input_ptr += (10 * multiplier[i]);
- break;
- case U_b: case U_B:
- *input_ptr += (11 * multiplier[i]);
- break;
- case U_c: case U_C:
- *input_ptr += (12 * multiplier[i]);
- break;
- case U_d: case U_D:
- *input_ptr += (13 * multiplier[i]);
- break;
- case U_e: case U_E:
- *input_ptr += (14 * multiplier[i]);
- break;
- case U_f: case U_F:
- *input_ptr += (15 * multiplier[i]);
- break;
- default:
- *input_ptr += ((ch - U_0) * multiplier[i]);
- }
- }
-
- if (i != 4)
- {
- if (initial_reading_of_input)
- bad_tokens.Next().Initialize(StreamError::INVALID_UNICODE_ESCAPE,
- (unsigned) (input_ptr - input_buffer),
- (unsigned) (input_ptr - input_buffer) + (source_ptr - u_ptr), this);
-
- source_ptr = u_ptr;
- *input_ptr = U_BACKSLASH;
- }
- else if (*input_ptr == U_CARRIAGE_RETURN)
- {
- *input_ptr = U_LINE_FEED;
- if (*source_ptr == U_LINE_FEED)
- source_ptr++;
- else if (*source_ptr == U_BACKSLASH)
- {
- int i;
- for (i = 1; (source_ptr + i) <= source_tail && source_ptr[i] == U_u; i++)
- ;
- if (i > 1 && (source_ptr + i + 3) <= source_tail
- && source_ptr[i] == U_0
- && source_ptr[i + 1] == U_0
- && source_ptr[i + 2] == U_0
- && source_ptr[i + 3] == U_a) // the escape sequence of \n is \u000a
- source_ptr += (i + 4);
- }
- }
- }
- }
- }
-
- //
- // Remove all trailing spaces
- //
- while((input_ptr > input_buffer) && Code::IsSpace(*input_ptr))
- input_ptr--;
- }
-
- //
- // If the very last character is not CTL_Z then add CTL_Z
- //
- if (*input_ptr != U_CTL_Z)
- {
- if (*input_ptr != U_LINE_FEED)
- *(++input_ptr) = U_LINE_FEED; // if the last character is not end-of-line, add end-of-line
- *(++input_ptr) = U_CTL_Z; // Mark end-of-file
- }
- *(++input_ptr) = U_NULL; // add gate
-
- input_buffer_length = input_ptr - input_buffer;
-
- return;
- }
-
- #if defined(HAVE_LIB_ICU_UC) || defined(HAVE_ICONV_H)
- //
- // Read file_size Ascii characters from srcfile, convert them to unicode, and
- // store them in input_buffer.
- //
- void LexStream::ProcessInputUnicode(const char *buffer, long filesize)
- {
- //fprintf(stderr,"LexStream::ProcessInputUnicode called.\n");
- #ifdef JIKES_DEBUG
- file_read++;
- #endif
-
- wchar_t *input_ptr = AllocateInputBuffer( filesize );
- wchar_t *input_tail = input_ptr + filesize;
- *input_ptr = U_LINE_FEED; // add an initial '\n';
-
- if(buffer)
- {
- int escape_value;
- wchar_t *escape_ptr;
-
- const char *source_ptr = buffer;
- const char *source_tail = buffer + filesize - 1; // point to last character read from the file.
-
- UnicodeLexerState saved_state = START;
- UnicodeLexerState state = START;
- #ifdef HAVE_LIB_ICU_UC
- UErrorCode err = U_ZERO_ERROR;
- #endif
- bool oncemore = false;
-
- if(control.option.encoding)
- {
- // The encoding should have been validated by now
- assert( SetEncoding(control.option.encoding) );
- }
-
- while((source_ptr <= source_tail) || oncemore)
- {
- // On each iteration we advance input_ptr maximun 2 postions.
- // Here we check if we are close to the end of input_buffer
- if(input_ptr>=input_tail)
- {
- // If this happen, reallocate it with some more space.
- // This is very rare case, which could happen if
- // one code page character is represened by several
- // unicode characters. One of exaples of such
- // situation is unicode "surrogates".
- //
- // If such reallocation will be required, it will indeed
- // slow down compilation a bit.
- size_t cursize = input_ptr-input_buffer;
- size_t newsize = cursize+cursize/10+4; // add 10%
- wchar_t *tmp = new wchar_t[newsize];
- memcpy (tmp, input_buffer, cursize*sizeof(wchar_t));
- delete [] input_buffer;
- input_buffer = tmp;
- input_tail = input_buffer + newsize - 1;
- input_ptr = input_buffer + cursize;
- }
-
- wchar_t ch;
-
- if(!oncemore)
- {
- if(control.option.encoding)
- {
- const char *before = source_ptr;
-
- #ifdef HAVE_LIB_ICU_UC
- ch=ucnv_getNextUChar (_converter,
- &source_ptr,
- source_tail+1,
- &err);
-
-
- if(U_FAILURE(err))
- {
- fprintf(stderr,"Conversion error: %s at byte %d\n",
- #ifdef ICU131
- errorName(err),
- #else
- u_errorName(err),
- #endif
- int(before-buffer)
- );
- break;
- }
- #else
- # ifdef HAVE_ICONV_H
- u1 chd[2], uni_high, uni_low;
- u1 *chp = chd;
- // Point to 2 bytes with 16 bit type
- wchar_t* wchp = (wchar_t *) chp;
- size_t chl = 2;
- size_t srcl = 1;
- size_t n = iconv(_converter,
- #ifdef HAVE_ERROR_CALL_ICONV_CONST
- (char **)
- #endif
- &source_ptr, &srcl,
- (char **)&chp, &chl
- );
-
- if(n == (size_t) -1)
- {
- fprintf(stderr,"Charset conversion error at offset %d: ", int(before-buffer));
- perror("");
- break;
- }
-
- // FIXME: This seems like a hack, someone should reread the docs
- // and clean this nasty code up -> http://www.netppl.fi/~pp/glibc21/libc_6.html#SEC91
-
- // Operate on chd buffer in endian independent fashion
- uni_high = (u1) (*wchp);
- uni_low = (u1) ((*wchp) >> 8);
- ch = uni_low + (uni_high * 256);
- # endif
- #endif
- if(before==source_ptr)
- {
- //End of conversion
- break;
- }
- }
- else
- {
- ch=*source_ptr++;
- }
- } else oncemore = false;
-
- switch(state)
- {
-
- case QUOTE:
- if(ch==U_BACKSLASH)
- {
- *(++input_ptr) = U_BACKSLASH;
- *(++input_ptr) = U_BACKSLASH;
- state = RAW;
- } else if(ch==U_u)
- {
- escape_ptr = input_ptr;
- state = UNICODE_ESCAPE;
- } else
- {
- *(++input_ptr )= U_BACKSLASH;
- state = RAW;
- oncemore = true;
- }
- break;
-
- case UNICODE_ESCAPE:
- if(isxdigit(ch))
- {
- state=UNICODE_ESCAPE_DIGIT_0;
- escape_value=hexvalue(ch)*16*16*16;
- } else if(ch!=U_u)
- {
- if(initial_reading_of_input)
- bad_tokens.Next().Initialize(StreamError::INVALID_UNICODE_ESCAPE,
- (unsigned) (escape_ptr - input_buffer),
- (unsigned) (input_ptr - input_buffer), this);
- }
- break;
-
- case UNICODE_ESCAPE_DIGIT_0:
- if(isxdigit(ch))
- {
- state=UNICODE_ESCAPE_DIGIT_1;
- escape_value+=hexvalue(ch)*16*16;
- } else
- {
- if(initial_reading_of_input)
- bad_tokens.Next().Initialize(StreamError::INVALID_UNICODE_ESCAPE,
- (unsigned) (escape_ptr - input_buffer),
- (unsigned) (input_ptr - input_buffer), this);
- }
- break;
-
- case UNICODE_ESCAPE_DIGIT_1:
- if(isxdigit(ch))
- {
- state=UNICODE_ESCAPE_DIGIT_2;
- escape_value+=hexvalue(ch)*16;
- } else
- {
- if(initial_reading_of_input)
- bad_tokens.Next().Initialize(StreamError::INVALID_UNICODE_ESCAPE,
- (unsigned) (escape_ptr - input_buffer),
- (unsigned) (input_ptr - input_buffer), this);
- }
- break;
-
- case UNICODE_ESCAPE_DIGIT_2:
- if(isxdigit(ch))
- {
- ch = escape_value+hexvalue(ch);
- state = saved_state;
- saved_state = UNICODE_ESCAPE_DIGIT_2;
- oncemore = true;
- } else
- {
- if(initial_reading_of_input)
- bad_tokens.Next().Initialize(StreamError::INVALID_UNICODE_ESCAPE,
- (unsigned) (escape_ptr - input_buffer),
- (unsigned) (input_ptr - input_buffer), this);
- }
- break;
-
- case CR:
- if (ch == U_LINE_FEED)
- {
- // skip line feed if it comes right after a CR.
- state = RAW;
- } else if (ch == U_CARRIAGE_RETURN)
- {
- // but if CR follows CR then the second CR is a
- // line feed too (and note that state=CR still, afterwards,
- // so that CR-CR-LF will be handled correctly). [CSA]
- *(++input_ptr) = U_LINE_FEED;
- } else if (ch == U_BACKSLASH && saved_state != UNICODE_ESCAPE_DIGIT_2)
- {
- saved_state = CR;
- state = QUOTE;
- } else
- {
- state = RAW;
- *(++input_ptr)=ch;
- }
- // clear saved_state == UNICODE_ESCAPE_DIGIT_2 status
- saved_state = CR;
- break;
-
- case START:
- // if for some reason converter produced or passed
- // byte order mark, it have to be ignored.
- state = RAW;
- if(ch==U_BOM || ch==U_REVERSE_BOM)
- break; //ignore
-
- case RAW:
- if(ch==U_BACKSLASH && saved_state != UNICODE_ESCAPE_DIGIT_2)
- {
- state = QUOTE;
- } else if(ch == U_CARRIAGE_RETURN)
- {
- state = CR;
- *(++input_ptr) = U_LINE_FEED;
- } else
- {
- *(++input_ptr)=ch;
- }
- saved_state = RAW;
- break;
- }
- }
- }
-
- //
- // If the very last character is not CTL_Z then add CTL_Z
- //
- if (*input_ptr != U_CTL_Z)
- {
- if (*input_ptr != U_LINE_FEED)
- *(++input_ptr) = U_LINE_FEED; // if the last character is not end-of-line, add end-of-line
- *(++input_ptr) = U_CTL_Z; // Mark end-of-file
- }
- *(++input_ptr) = U_NULL; // add gate
-
- input_buffer_length = input_ptr - input_buffer;
-
- return;
- }
- #endif // defined(HAVE_LIB_ICU_UC) || defined(HAVE_ICONV_H)
-
- //
- // This procedure uses a quick sort algorithm to sort the stream ERRORS
- // by their locations.
- //
- void LexStream::SortMessages()
- {
- int lower,
- upper,
- lostack[32],
- histack[32];
-
- int top,
- i,
- j;
- StreamError pivot,
- temp;
-
- top = 0;
- lostack[top] = 0;
- histack[top] = bad_tokens.Length() - 1;
-
- while(top >= 0)
- {
- lower = lostack[top];
- upper = histack[top];
- top--;
-
- while(upper > lower)
- {
- //
- // The array is most-likely almost sorted. Therefore,
- // we use the middle element as the pivot element.
- //
- i = (lower + upper) / 2;
- pivot = bad_tokens[i];
- bad_tokens[i] = bad_tokens[lower];
-
- //
- // Split the array section indicated by LOWER and UPPER
- // using ARRAY(LOWER) as the pivot.
- //
- i = lower;
- for (j = lower + 1; j <= upper; j++)
- {
- if (bad_tokens[j].start_location < pivot.start_location)
- {
- temp = bad_tokens[++i];
- bad_tokens[i] = bad_tokens[j];
- bad_tokens[j] = temp;
- }
- }
- bad_tokens[lower] = bad_tokens[i];
- bad_tokens[i] = pivot;
-
- top++;
- if ((i - lower) < (upper - i))
- {
- lostack[top] = i + 1;
- histack[top] = upper;
- upper = i - 1;
- }
- else
- {
- histack[top] = i - 1;
- lostack[top] = lower;
- lower = i + 1;
- }
- }
- }
-
- return;
- }
-
-
- //
- //
- //
- void LexStream::PrintMessages()
- {
- //
- // If control.option.dump_errors then the error messages have already been printed
- //
- if (! control.option.dump_errors)
- {
- RereadInput();
-
- if (control.option.errors)
- {
- char *file_name = FileName();
-
- Coutput << "\nFound " << NumBadTokens() << " lexical error" << (NumBadTokens() == 1 ? "" : "s")
- << " in \""
- << file_name
- << "\":";
-
- if (! input_buffer)
- {
- int length = FileNameLength();
- wchar_t *name = new wchar_t[length + 1];
- for (int i = 0; i < length; i++)
- name[i] = file_name[i];
- name[length] = U_NULL;
- control.system_semantic -> ReportSemError(SemanticError::CANNOT_REOPEN_FILE,
- 0,
- 0,
- name);
- delete [] name;
- }
- else
- {
- for (int i = 0; i < bad_tokens.Length(); i++)
- {
- JikesAPI::getInstance()->reportError(&bad_tokens[i]);
- }
- }
- }
- else
- {
- for (int i = 0; i < bad_tokens.Length(); i++)
- JikesAPI::getInstance()->reportError(&bad_tokens[i]);
- }
-
- DestroyInput();
-
- Coutput.flush();
- }
-
- return;
- }
-
- #ifdef HAVE_JIKES_NAMESPACE
- } // Close namespace Jikes block
- #endif
-
-