home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PC World Komputer 1997 May
/
Pcwk0597.iso
/
sybase
/
starbuck
/
java.z
/
StreamTokenizer.java
< prev
next >
Wrap
Text File
|
1996-05-03
|
11KB
|
469 lines
/*
* @(#)StreamTokenizer.java 1.10 95/08/15
*
* Copyright (c) 1995 Sun Microsystems, Inc. All Rights reserved Permission to
* use, copy, modify, and distribute this software and its documentation for
* NON-COMMERCIAL purposes and without fee is hereby granted provided that
* this copyright notice appears in all copies. Please refer to the file
* copyright.html for further important copyright and licensing information.
*
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
* SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
* OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY
* LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR
* ITS DERIVATIVES.
*/
package java.io;
import java.io.InputStream;
/**
* A class to turn an input stream into a stream of tokens.
* There are a number of methods that define the lexical
* syntax of tokens.
* @version 1.10, 15 Aug 1995
* @author James Gosling
*/
public
class StreamTokenizer {
private InputStream input;
private char buf[];
private int peekc = ' ';
private boolean pushedBack;
private boolean forceLower;
/** The line number of the last token read */
private int LINENO = 1;
private boolean eolIsSignificantP = false;
private boolean slashSlashCommentsP = false;
private boolean slashStarCommentsP = false;
private byte ctype[] = new byte[256];
private static final byte CT_WHITESPACE = 1;
private static final byte CT_DIGIT = 2;
private static final byte CT_ALPHA = 4;
private static final byte CT_QUOTE = 8;
private static final byte CT_COMMENT = 16;
/**
* The type of the last token returned. It's value will either
* be one of the following TT_* constants, or a single
* character. For example, if '+' is encountered and is
* not a valid word character, ttype will be '+'
*/
public int ttype;
/**
* The End-of-file token.
*/
public static final int TT_EOF = -1;
/**
* The End-of-line token.
*/
public static final int TT_EOL = '\n';
/**
* The number token. This value is in nval.
*/
public static final int TT_NUMBER = -2;
/**
* The word token. This value is in sval.
*/
public static final int TT_WORD = -3;
/**
* The Stream value.
*/
public String sval;
/**
* The number value.
*/
public double nval;
/**
* Creates a stream tokenizer that parses the specified input
* stream.
* By default, it recognizes numbers, Strings quoted with
* single and double quotes, and all the alphabetics.
* @param I the input stream
*/
public StreamTokenizer (InputStream I) {
input = I;
buf = new char[20];
byte ct[] = ctype;
int i;
wordChars('a', 'z');
wordChars('A', 'Z');
wordChars(128 + 32, 255);
whitespaceChars(0, ' ');
commentChar('/');
quoteChar('"');
quoteChar('\'');
parseNumbers();
}
/**
* Resets the syntax table so that all characters are special.
*/
public void resetSyntax() {
for (int i = ctype.length; --i >= 0;)
ctype[i] = 0;
}
/**
* Specifies that characters in this range are word characters.
* @param low the low end of the range
* @param hi the high end of the range
*/
public void wordChars(int low, int hi) {
if (low < 0)
low = 0;
if (hi > ctype.length)
hi = ctype.length;
while (low <= hi)
ctype[low++] |= CT_ALPHA;
}
/**
* Specifies that characters in this range are whitespace
* characters.
* @param low the low end of the range
* @param hi the high end of the range
*/
public void whitespaceChars(int low, int hi) {
if (low < 0)
low = 0;
if (hi > ctype.length)
hi = ctype.length;
while (low <= hi)
ctype[low++] = CT_WHITESPACE;
}
/**
* Specifies that characters in this range are 'ordinary'.
* Ordinary characters mean that any significance as words,
* comments, strings, whitespaces or number characters are removed.
* When these characters are encountered by the
* parser, they return a ttype equal to the character.
* @param low the low end of the range
* @param hi the high end of the range
*/
public void ordinaryChars(int low, int hi) {
if (low < 0)
low = 0;
if (hi > ctype.length)
hi = ctype.length;
while (low <= hi)
ctype[low++] = 0;
}
/**
* Specifies that this character is 'ordinary': it removes any
* significance as a word, comment, string, whitespace or number
* character. When encountered by the parser, it returns a ttype
* equal to the character.
* @param ch the character
*/
public void ordinaryChar(int ch) {
ctype[ch] = 0;
}
/**
* Specifies that this character starts a single line comment.
* @param ch the character
*/
public void commentChar(int ch) {
ctype[ch] = CT_COMMENT;
}
/**
* Specifies that matching pairs of this character delimit String
* constants. When a String constant is recognized, ttype will be
* the character that delimits the String, and sval will have
* the body of the String.
* @param ch the character
*/
public void quoteChar(int ch) {
ctype[ch] = CT_QUOTE;
}
/**
* Specifies that numbers should be parsed. This method accepts
* double precision floating point numbers and returns a ttype of
* TT_NUMBER with the value in nval.
*/
public void parseNumbers() {
for (int i = '0'; i <= '9'; i++)
ctype[i] |= CT_DIGIT;
ctype['.'] |= CT_DIGIT;
ctype['-'] |= CT_DIGIT;
}
/**
* If the flag is true, end-of-lines are significant (TT_EOL will
* be returned by nexttoken). If false, they will be treated
* as whitespace.
*/
public void eolIsSignificant(boolean flag) {
eolIsSignificantP = flag;
}
/**
* If the flag is true, recognize C style( /* ) comments.
*/
public void slashStarComments(boolean flag) {
slashStarCommentsP = flag;
}
/**
* If the flag is true, recognize C++ style( // ) comments.
*/
public void slashSlashComments(boolean flag) {
slashSlashCommentsP = flag;
}
/**
* Examines a boolean to decide whether TT_WORD tokens are
* forced to be lower case.
* @param fl the boolean flag
*/
public void lowerCaseMode(boolean fl) {
forceLower = fl;
}
/**
* Parses a token from the input stream. The return value is
* the same as the value of ttype. Typical clients of this
* class first set up the syntax tables and then sit in a loop
* calling nextToken to parse successive tokens until TT_EOF
* is returned.
*/
public int nextToken() throws IOException {
if (pushedBack) {
pushedBack = false;
return ttype;
}
InputStream is = input;
byte ct[] = ctype;
int c = peekc;
sval = null;
if (c < 0)
return ttype = TT_EOF;
int ctype = c < 256 ? ct[c] : CT_ALPHA;
while ((ctype & CT_WHITESPACE) != 0) {
if (c == '\r') {
LINENO++;
c = is.read();
if (c == '\n')
c = is.read();
if (eolIsSignificantP) {
peekc = c;
return ttype = TT_EOL;
}
} else {
if (c == '\n') {
LINENO++;
if (eolIsSignificantP) {
peekc = ' ';
return ttype = TT_EOL;
}
}
c = is.read();
}
if (c < 0)
return ttype = TT_EOF;
ctype = c < 256 ? ct[c] : CT_ALPHA;
}
if ((ctype & CT_DIGIT) != 0) {
boolean neg = false;
if (c == '-') {
c = is.read();
if (c != '.' && (c < '0' || c > '9')) {
peekc = c;
return ttype = '-';
}
neg = true;
}
double v = 0;
int decexp = 0;
int seendot = 0;
while (true) {
if (c == '.' && seendot == 0)
seendot = 1;
else if ('0' <= c && c <= '9') {
v = v * 10 + (c - '0');
decexp += seendot;
} else
break;
c = is.read();
}
peekc = c;
if (decexp != 0) {
double denom = 10;
decexp--;
while (decexp > 0) {
denom *= 10;
decexp--;
}
/* do one division of a likely-to-be-more-accurate number */
v = v / denom;
}
nval = neg ? -v : v;
return ttype = TT_NUMBER;
}
if ((ctype & CT_ALPHA) != 0) {
int i = 0;
do {
if (i >= buf.length) {
char nb[] = new char[buf.length * 2];
System.arraycopy(buf, 0, nb, 0, buf.length);
buf = nb;
}
buf[i++] = (char) c;
c = is.read();
ctype = c < 0 ? CT_WHITESPACE : c < 256 ? ct[c] : CT_ALPHA;
} while ((ctype & (CT_ALPHA | CT_DIGIT)) != 0);
peekc = c;
sval = String.copyValueOf(buf, 0, i);
if (forceLower)
sval = sval.toLowerCase();
return ttype = TT_WORD;
}
if ((ctype & CT_COMMENT) != 0) {
while ((c = is.read()) != '\n' && c != '\r' && c >= 0);
peekc = c;
return nextToken();
}
if ((ctype & CT_QUOTE) != 0) {
ttype = c;
int i = 0;
while ((c = is.read()) >= 0 && c != ttype && c != '\n' && c != '\r') {
if (c == '\\')
switch (c = is.read()) {
case 'a':
c = 0x7;
break;
case 'b':
c = '\b';
break;
case 'f':
c = 0xC;
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = 0xB;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
c = c - '0';
int c2 = is.read();
if ('0' <= c2 && c2 <= '7') {
c = (c << 3) + (c2 - '0');
c2 = is.read();
if ('0' <= c2 && c2 <= '7')
c = (c << 3) + (c2 - '0');
else
peekc = c;
} else
peekc = c;
break;
}
if (i >= buf.length) {
char nb[] = new char[buf.length * 2];
System.arraycopy(buf, 0, nb, 0, buf.length);
buf = nb;
}
buf[i++] = (char) c;
}
peekc = ' ';
sval = String.copyValueOf(buf, 0, i);
return ttype;
}
if (c == '/' && (slashSlashCommentsP || slashStarCommentsP)) {
c = is.read();
if (c == '*' && slashStarCommentsP) {
int prevc = 0;
while ((c = is.read()) != '/' || prevc != '*') {
if (c == '\n')
LINENO++;
if (c < 0)
return ttype = TT_EOF;
prevc = c;
}
peekc = ' ';
return nextToken();
} else if (c == '/' && slashSlashCommentsP) {
while ((c = is.read()) != '\n' && c != '\r' && c >= 0);
peekc = c;
return nextToken();
} else {
peekc = c;
return ttype = '/';
}
}
peekc = ' ';
return ttype = c;
}
/**
* Pushes back a stream token.
*/
public void pushBack() {
pushedBack = true;
}
/** Return the current line number. */
public int lineno() {
return LINENO;
}
/**
* Returns the String representation of the stream token.
*/
public String toString() {
String ret;
switch (ttype) {
case TT_EOF:
ret = "EOF";
break;
case TT_EOL:
ret = "EOL";
break;
case TT_WORD:
ret = sval;
break;
case TT_NUMBER:
ret = "n=" + nval;
break;
default:{
char s[] = new char[3];
s[0] = s[2] = '\'';
s[1] = (char) ttype;
ret = new String(s);
break;
}
}
return "Token[" + ret + "], line " + LINENO;
}
}