home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.cs.arizona.edu
/
ftp.cs.arizona.edu.tar
/
ftp.cs.arizona.edu
/
icon
/
historic
/
v92.tgz
/
v92.tar
/
v92
/
src
/
preproc
/
bldtok.c
next >
Wrap
C/C++ Source or Header
|
1996-03-22
|
21KB
|
769 lines
/*
* This file contains routines for building tokens out of characters from a
* "character source". This source is the top element on the source stack.
*/
#include "::preproc:preproc.h"
#include "::preproc:ptoken.h"
#include <ctype.h>
/*
* Prototypes for static functions.
*/
hidden int pp_tok_id Params((char *s));
hidden struct token *chck_wh_sp Params((struct char_src *cs));
hidden struct token *pp_number Params((noargs));
hidden struct token *char_str Params((int delim, int tok_id));
hidden struct token *hdr_tok Params((int delim, int tok_id,
struct char_src *cs));
int whsp_image = NoSpelling; /* indicate what is in white space tokens */
struct token *zero_tok; /* token for literal 0 */
struct token *one_tok; /* token for literal 1 */
#include "::preproc:pproto.h"
/*
* IsWhSp(c) - true if c is a white space character.
*/
#define IsWhSp(c) (c == ' ' || c == '\n' || c == '\t' || c == '\v' || c == '\f')
/*
* AdvChar() - advance to next character from buffer, filling the buffer
* if needed.
*/
#define AdvChar() \
if (++next_char == last_char) \
fill_cbuf();
static int line; /* current line number */
static char *fname; /* current file name */
static struct str_buf tknize_sbuf; /* string buffer */
/*
* List of preprocessing directives and the corresponding token ids.
*/
static struct rsrvd_wrd pp_rsrvd[] = {
PPDirectives
{"if", PpIf},
{"else", PpElse},
{"ifdef", PpIfdef},
{"ifndef", PpIfndef},
{"elif", PpElif},
{"endif", PpEndif},
{"include", PpInclude},
{"define", PpDefine},
{"undef", PpUndef},
{"begdef", PpBegdef},
{"enddef", PpEnddef},
{"line", PpLine},
{"error", PpError},
{"pragma", PpPragma},
{NULL, Invalid}};
/*
* init_tok - initialize tokenizer.
*/
novalue init_tok()
{
struct rsrvd_wrd *rw;
static int first_time = 1;
if (first_time) {
first_time = 0;
init_sbuf(&tknize_sbuf); /* initialize string buffer */
/*
* install reserved words into the string table
*/
for (rw = pp_rsrvd; rw->s != NULL; ++rw)
rw->s = spec_str(rw->s);
zero_tok = new_token(PpNumber, spec_str("0"), "", 0);
one_tok = new_token(PpNumber, spec_str("1"), "", 0);
}
}
/*
* pp_tok_id - see if s in the name of a preprocessing directive.
*/
static int pp_tok_id(s)
char *s;
{
struct rsrvd_wrd *rw;
for (rw = pp_rsrvd; rw->s != NULL && rw->s != s; ++rw)
;
return rw->tok_id;
}
/*
* chk_eq_sign - look ahead to next character to see if it is an equal sign.
* It is used for processing -D options.
*/
int chk_eq_sign()
{
if (*next_char == '=') {
AdvChar();
return 1;
}
else
return 0;
}
/*
* chck_wh_sp - If the input is at white space, construct a white space token
* and return it, otherwise return NULL. This function also helps keeps track
* of preprocessor directive boundaries.
*/
static struct token *chck_wh_sp(cs)
struct char_src *cs;
{
register int c1, c2;
struct token *t;
int tok_id;
/*
* See if we are at white space or a comment.
*/
c1 = *next_char;
if (!IsWhSp(c1) && (c1 != '/' || next_char[1] != '*'))
return NULL;
/*
* Fine the line number of the current character in the line number
* buffer, and correct it if we have encountered any #line directives.
*/
line = cs->line_buf[next_char - first_char] + cs->line_adj;
if (c1 == '\n')
--line; /* a new-line really belongs to the previous line */
tok_id = WhiteSpace;
for (;;) {
if (IsWhSp(c1)) {
/*
* The next character is a white space. If we are retaining the
* image of the white space in the token, copy the character to
* the string buffer. If we are in the midst of a preprocessor
* directive and find a new-line, indicate the end of the
* the directive.
*/
AdvChar();
if (whsp_image != NoSpelling)
AppChar(tknize_sbuf, c1);
if (c1 == '\n') {
if (cs->dir_state == Within)
tok_id = PpDirEnd;
cs->dir_state = CanStart;
if (tok_id == PpDirEnd)
break;
}
}
else if (c1 == '/' && next_char[1] == '*') {
/*
* Start of comment. If we are retaining the image of comments,
* copy the characters into the string buffer.
*/
if (whsp_image == FullImage) {
AppChar(tknize_sbuf, '/');
AppChar(tknize_sbuf, '*');
}
AdvChar();
AdvChar();
/*
* Look for the end of the comment.
*/
c1 = *next_char;
c2 = next_char[1];
while (c1 != '*' || c2 != '/') {
if (c1 == EOF)
errfl1(fname, line, "eof encountered in comment");
AdvChar();
if (whsp_image == FullImage)
AppChar(tknize_sbuf, c1);
c1 = c2;
c2 = next_char[1];
}
/*
* Determine if we are retaining the image of a comment, replacing
* a comment by one space character, or ignoring comments.
*/
if (whsp_image == FullImage) {
AppChar(tknize_sbuf, '*');
AppChar(tknize_sbuf, '/');
}
else if (whsp_image == NoComment)
AppChar(tknize_sbuf, ' ');
AdvChar();
AdvChar();
}
else
break; /* end of white space */
c1 = *next_char;
}
/*
* If we are not retaining the image of white space, replace it all
* with one space character.
*/
if (whsp_image == NoSpelling)
AppChar(tknize_sbuf, ' ');
t = new_token(tok_id, str_install(&tknize_sbuf), fname, line);
/*
* Look ahead to see if a ## operator is next.
*/
if (*next_char == '#' && next_char[1] == '#')
if (tok_id == PpDirEnd)
errt1(t, "## expressions must not cross directive boundaries");
else {
/*
* Discard white space before a ## operator.
*/
free_t(t);
return NULL;
}
return t;
}
/*
* pp_number - Create a token for a preprocessing number (See ANSI C Standard
* for the syntax of such a number).
*/
static struct token *pp_number()
{
register int c;
c = *next_char;
for (;;) {
if (c == 'e' || c == 'E') {
AppChar(tknize_sbuf, c);
AdvChar();
c = *next_char;
if (c == '+' || c == '-') {
AppChar(tknize_sbuf, c);
AdvChar();
c = *next_char;
}
}
else if (isdigit(c) || c == '.' || islower(c) || isupper(c) || c == '_') {
AppChar(tknize_sbuf, c);
AdvChar();
c = *next_char;
}
else {
return new_token(PpNumber, str_install(&tknize_sbuf), fname, line);
}
}
}
/*
* char_str - construct a token for a character constant or string literal.
*/
static struct token *char_str(delim, tok_id)
int delim;
int tok_id;
{
register int c;
for (c = *next_char; c != EOF && c != '\n' && c != delim; c = *next_char) {
AppChar(tknize_sbuf, c);
if (c == '\\') {
c = next_char[1];
if (c == EOF || c == '\n')
break;
else {
AppChar(tknize_sbuf, c);
AdvChar();
}
}
AdvChar();
}
if (c == EOF)
errfl1(fname, line, "End-of-file encountered within a literal");
if (c == '\n')
errfl1(fname, line, "New-line encountered within a literal");
AdvChar();
return new_token(tok_id, str_install(&tknize_sbuf), fname, line);
}
/*
* hdr_tok - create a token for an #include header. The delimiter may be
* > or ".
*/
static struct token *hdr_tok(delim, tok_id, cs)
int delim;
int tok_id;
struct char_src *cs;
{
register int c;
line = cs->line_buf[next_char - first_char] + cs->line_adj;
AdvChar();
for (c = *next_char; c != delim; c = *next_char) {
if (c == EOF)
errfl1(fname, line,
"End-of-file encountered within a header name");
if (c == '\n')
errfl1(fname, line,
"New-line encountered within a header name");
AppChar(tknize_sbuf, c);
AdvChar();
}
AdvChar();
return new_token(tok_id, str_install(&tknize_sbuf), fname, line);
}
/*
* tokenize - return the next token from the character source on the top
* of the source stack.
*/
struct token *tokenize()
{
struct char_src *cs;
struct token *t1, *t2;
register int c;
int tok_id;
cs = src_stack->u.cs;
/*
* Check to see if the last call left a token from a look ahead.
*/
if (cs->tok_sav != NULL) {
t1 = cs->tok_sav;
cs->tok_sav = NULL;
return t1;
}
if (*next_char == EOF)
return NULL;
/*
* Find the current line number and file name for the character
* source and check for white space.
*/
line = cs->line_buf[next_char - first_char] + cs->line_adj;
fname = cs->fname;
if ((t1 = chck_wh_sp(cs)) != NULL)
return t1;
c = *next_char; /* look at next character */
AdvChar();
/*
* If the last thing we saw in this character source was white space
* containing a new-line, then we must look for the start of a
* preprocessing directive.
*/
if (cs->dir_state == CanStart) {
cs->dir_state = Reset;
if (c == '#' && *next_char != '#') {
/*
* Assume we are within a preprocessing directive and check
* for white space to discard.
*/
cs->dir_state = Within;
if ((t1 = chck_wh_sp(cs)) != NULL)
if (t1->tok_id == PpDirEnd) {
/*
* We found a new-line, this is a null preprocessor directive.
*/
cs->tok_sav = t1;
AppChar(tknize_sbuf, '#');
return new_token(PpNull, str_install(&tknize_sbuf), fname, line);
}
else
free_t(t1); /* discard white space */
c = *next_char;
if (islower(c) || isupper(c) || c == '_') {
/*
* Tokenize the identifier following the #
*/
t1 = tokenize();
if ((tok_id = pp_tok_id(t1->image)) == Invalid) {
/*
* We have a stringizing operation, not a preprocessing
* directive.
*/
cs->dir_state = Reset;
cs->tok_sav = t1;
AppChar(tknize_sbuf, '#');
return new_token('#', str_install(&tknize_sbuf), fname, line);
}
else {
t1->tok_id = tok_id;
if (tok_id == PpInclude) {
/*
* A header name has to be tokenized specially. Find
* it, then save the token.
*/
if ((t2 = chck_wh_sp(cs)) != NULL)
if (t2->tok_id == PpDirEnd)
errt1(t2, "file name missing from #include");
else
free_t(t2);
c = *next_char;
if (c == '"')
cs->tok_sav = hdr_tok('"', StrLit, cs);
else if (c == '<')
cs->tok_sav = hdr_tok('>', PpHeader, cs);
}
/*
* Return the token indicating the kind of preprocessor
* directive we have started.
*/
return t1;
}
}
else
errfl1(fname, line,
"# must be followed by an identifier or keyword");
}
}
/*
* Check for literals containing wide characters.
*/
if (c == 'L') {
if (*next_char == '\'') {
AdvChar();
t1 = char_str('\'', LCharConst);
if (t1->image[0] == '\0')
errt1(t1, "invalid character constant");
return t1;
}
else if (*next_char == '"') {
AdvChar();
return char_str('"', LStrLit);
}
}
/*
* Check for identifier.
*/
if (islower(c) || isupper(c) || c == '_') {
AppChar(tknize_sbuf, c);
c = *next_char;
while (islower(c) || isupper(c) || isdigit(c) || c == '_') {
AppChar(tknize_sbuf, c);
AdvChar();
c = *next_char;
}
return new_token(Identifier, str_install(&tknize_sbuf), fname, line);
}
/*
* Check for number.
*/
if (isdigit(c)) {
AppChar(tknize_sbuf, c);
return pp_number();
}
/*
* Check for character constant.
*/
if (c == '\'') {
t1 = char_str(c, CharConst);
if (t1->image[0] == '\0')
errt1(t1, "invalid character constant");
return t1;
}
/*
* Check for string constant.
*/
if (c == '"')
return char_str(c, StrLit);
/*
* Check for operators and punctuation. Anything that does not fit these
* categories is a single character token.
*/
AppChar(tknize_sbuf, c);
switch (c) {
case '.':
c = *next_char;
if (isdigit(c)) {
/*
* Number
*/
AppChar(tknize_sbuf, c);
AdvChar();
return pp_number();
}
else if (c == '.' && next_char[1] == '.') {
/*
* ...
*/
AdvChar();
AdvChar();
AppChar(tknize_sbuf, '.');
AppChar(tknize_sbuf, '.');
return new_token(Ellipsis, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('.', str_install(&tknize_sbuf), fname, line);
case '+':
c = *next_char;
if (c == '+') {
/*
* ++
*/
AppChar(tknize_sbuf, '+');
AdvChar();
return new_token(Incr, str_install(&tknize_sbuf), fname, line);
}
else if (c == '=') {
/*
* +=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(PlusAsgn, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('+', str_install(&tknize_sbuf), fname, line);
case '-':
c = *next_char;
if (c == '>') {
/*
* ->
*/
AppChar(tknize_sbuf, '>');
AdvChar();
return new_token(Arrow, str_install(&tknize_sbuf), fname, line);
}
else if (c == '-') {
/*
* --
*/
AppChar(tknize_sbuf, '-');
AdvChar();
return new_token(Decr, str_install(&tknize_sbuf), fname, line);
}
else if (c == '=') {
/*
* -=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(MinusAsgn, str_install(&tknize_sbuf), fname,
line);
}
else
return new_token('-', str_install(&tknize_sbuf), fname, line);
case '<':
c = *next_char;
if (c == '<') {
AppChar(tknize_sbuf, '<');
AdvChar();
if (*next_char == '=') {
/*
* <<=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(LShftAsgn, str_install(&tknize_sbuf), fname,
line);
}
else
/*
* <<
*/
return new_token(LShft, str_install(&tknize_sbuf), fname, line);
}
else if (c == '=') {
/*
* <=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(Leq, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('<', str_install(&tknize_sbuf), fname, line);
case '>':
c = *next_char;
if (c == '>') {
AppChar(tknize_sbuf, '>');
AdvChar();
if (*next_char == '=') {
/*
* >>=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(RShftAsgn, str_install(&tknize_sbuf), fname,
line);
}
else
/*
* >>
*/
return new_token(RShft, str_install(&tknize_sbuf), fname, line);
}
else if (c == '=') {
/*
* >=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(Geq, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('>', str_install(&tknize_sbuf), fname, line);
case '=':
if (*next_char == '=') {
/*
* ==
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(Equal, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('=', str_install(&tknize_sbuf), fname, line);
case '!':
if (*next_char == '=') {
/*
* !=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(Neq, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('!', str_install(&tknize_sbuf), fname, line);
case '&':
c = *next_char;
if (c == '&') {
/*
* &&
*/
AppChar(tknize_sbuf, '&');
AdvChar();
return new_token(And, str_install(&tknize_sbuf), fname, line);
}
else if (c == '=') {
/*
* &=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(AndAsgn, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('&', str_install(&tknize_sbuf), fname, line);
case '|':
c = *next_char;
if (c == '|') {
/*
* ||
*/
AppChar(tknize_sbuf, '|');
AdvChar();
return new_token(Or, str_install(&tknize_sbuf), fname, line);
}
else if (c == '=') {
/*
* |=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(OrAsgn, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('|', str_install(&tknize_sbuf), fname, line);
case '*':
if (*next_char == '=') {
/*
* *=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(MultAsgn, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('*', str_install(&tknize_sbuf), fname, line);
case '/':
if (*next_char == '=') {
/*
* /=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(DivAsgn, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('/', str_install(&tknize_sbuf), fname, line);
case '%':
if (*next_char == '=') {
/*
* &=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(ModAsgn, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('%', str_install(&tknize_sbuf), fname, line);
case '^':
if (*next_char == '=') {
/*
* ^=
*/
AppChar(tknize_sbuf, '=');
AdvChar();
return new_token(XorAsgn, str_install(&tknize_sbuf), fname, line);
}
else
return new_token('^', str_install(&tknize_sbuf), fname, line);
case '#':
/*
* Token pasting or stringizing operator.
*/
if (*next_char == '#') {
/*
* ##
*/
AppChar(tknize_sbuf, '#');
AdvChar();
t1 = new_token(PpPaste, str_install(&tknize_sbuf), fname, line);
}
else
t1 = new_token('#', str_install(&tknize_sbuf), fname, line);
/*
* The operand must be in the same preprocessing directive.
*/
if ((t2 = chck_wh_sp(cs)) != NULL)
if (t2->tok_id == PpDirEnd)
errt2(t2, t1->image,
" preprocessing expression must not cross directive boundary");
else
free_t(t2);
return t1;
default:
return new_token(c, str_install(&tknize_sbuf), fname, line);
}
}