home *** CD-ROM | disk | FTP | other *** search
-
- #include <ctype.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include <stddef.h>
- #include "global.h"
- #include "ztype.h"
-
- #define BASE10 1
- #define BASE8 2
- #define BASE16 3
-
- #define GRANULARITY 256
-
- extern char *next_c;
-
- static int tok_flags = 0;
- static TokenP tok_blocks, next_free_tok, pushback_list;
-
- /*
- alloc_token() -- return space for a Token, either from the free list or
- from freshly malloc()'ed memory
- */
- TokenP alloc_token()
- {
- register TokenP T;
- register int i;
-
- if (!next_free_tok) {
- /* allocate several Token's at once for efficiency */
- T = (TokenP) mallok(GRANULARITY * sizeof (Token));
-
- /* the first pointer is used to chain blocks of Token's together */
- T->next = tok_blocks;
- tok_blocks = T;
- /* string the rest together to form a free list */
- for (i = 1; i < GRANULARITY; i++) {
- T[i].next = T + i + 1;
- T[i].pre_ws = T[i].txt = NULL;
- }
- T[GRANULARITY - 1].next = NULL;
- next_free_tok = T + 1;
- }
- T = next_free_tok;
- next_free_tok = T->next;
- T->hashval = T->val = T->type = T->subtype = T->flags = 0;
- T->next = NULL;
- return T;
- }
-
- /* free_token() -- return an allocated Token to the free list */
- void free_token(T)
- TokenP T;
- {
- T->next = NULL;
- free_tlist(T);
- }
-
- /* free_tlist() -- return a list of Token's to the free list */
- void free_tlist(T)
- register TokenP T;
- {
- register TokenP T1;
-
- for (T1 = T; T; T = T1) {
- T1 = T->next;
- free(T->pre_ws);
- T->pre_ws = NULL;
- free(T->txt);
- T->txt = NULL;
- T->next = next_free_tok;
- next_free_tok = T;
- }
- }
-
- /*
- copy_token() -- return a new Token that is a duplicate of the given token
- */
- TokenP copy_token(T1)
- TokenP T1;
- {
- TokenP T2 = alloc_token();
-
- *T2 = *T1;
- T2->pre_ws = strdup(T1->pre_ws);
- T2->txt = strdup(T1->txt);
- T2->next = NULL;
- return T2;
- }
-
- /* copy_tlist() -- create a duplicate of a list of Token's */
- TokenP copy_tlist(T1)
- TokenP T1;
- {
- Token head;
- TokenP T2 = &head;
-
- for (T2->next = NULL; T1; T1 = T1->next, T2 = T2->next)
- T2->next = copy_token(T1);
- return head.next;
- }
-
- /* tok_shutdown() -- free all space allocated for Token's */
- void tok_shutdown()
- {
- register TokenP T, T1;
- register int i;
-
- for (T1 = T = tok_blocks; T; T = T1) {
- T1 = T->next;
- for (i = 1; i < GRANULARITY; i++) {
- if (T[i].pre_ws)
- free(T[i].pre_ws);
- if (T[i].txt)
- free(T[i].txt);
- }
- free(T);
- }
- }
-
- /*
- push_tlist() -- "un-read" the list of Token's |T|; token() will return all
- of these tokens in order before reading another token from the input file
- */
- void push_tlist(T)
- TokenP T;
- {
- register TokenP t;
-
- if (!T)
- return;
- t = T;
- while (t->next)
- t = t->next;
- t->next = pushback_list;
- pushback_list = T;
- }
-
- /* mk_eof() -- makes and returns an EOF_ token */
- static TokenP mk_eof()
- {
- TokenP T = alloc_token();
-
- T->type = EOF_;
- T->pre_ws = mallok(1);
- *T->pre_ws = '\0';
- T->txt = mallok(1);
- *T->txt = '\0';
- return T;
- }
-
- /* mk_eol() -- makes and returns an EOL token */
- TokenP mk_eol(s, n)
- char *s;
- int n;
- {
- TokenP T = alloc_token();
-
- T->pre_ws = mallok(n + 1);
- strncpy(T->pre_ws, s, n);
- T->pre_ws[n] = '\0';
- T->txt = mallok(2);
- T->txt[0] = '\n';
- T->txt[1] = '\0';
- T->type == EOL;
- T->subtype = '\n';
- return T;
- }
-
- /*
- mk_stopper() -- makes and returns a STOP token. See expand_tlist() for
- further information.
- */
- TokenP mk_stopper()
- {
- TokenP T = alloc_token();
-
- T->type = STOP;
- T->pre_ws = mallok(1);
- *T->pre_ws = '\0';
- T->txt = mallok(1);
- *T->txt = '\0';
- return T;
- }
-
- /*
- mk_unmarker() -- makes and returns a special token that informs the
- tokenizer to unmark the macro text associated with token |T|. See
- expand() for further information.
- */
- TokenP mk_unmarker(T)
- TokenP T;
- {
- TokenP T1 = copy_token(T);
-
- T1->type = UNMARK;
- return T1;
- }
-
- /* flush_tokenizer() -- discard all Tokens pushed back by push_tlist() */
- void flush_tokenizer()
- {
- free_tlist(pushback_list);
- pushback_list = NULL;
- }
-
- /*
- number() -- copies from |s| into the token |T| a string of characters
- denoting an integer or floating-point constant. Returns a pointer to the
- first uncopied character.
- */
- static char *number(s, T)
- register char *s;
- TokenP T;
- {
- int numtype = BASE10, fpflag = 0;
- char *t;
-
- T->type = NUMBER;
- if (*s == '0') {
- /* check for octal or hexadecimal constant */
- if ((s[1] == 'x' || s[1] == 'X') && isxdigit(s[2])) {
- numtype = BASE16;
- T->flags |= UNS_VAL;
- } else if (is_octal(s[1])) {
- numtype = BASE8;
- T->flags |= UNS_VAL;
- }
- }
- T->val = strtol(s, &t, 0);
- s = t;
- if (numtype != BASE10 || is_isuff(*s)) {
-
- /*
- if we're not in base 10, or the next characters are integer constant
- suffixes, this can't be a floating-point constant
- */
- while (is_isuff(*s)) {
- if (*s == 'u' || *s == 'U')
- T->flags |= UNS_VAL;
- s++;
- }
- return s;
- }
- /* check to see if the number is actually floating point */
- if (*s == '.') {
- fpflag = 1;
- do
- s++;
- while (isdigit(*s));
- }
- if (*s == 'e' || *s == 'E') {
- register char *t = s;
-
- t++;
- if (*t == '-' || *t == '+')
- t++;
- if (isdigit(*t)) {
- fpflag = 1;
- do
- t++;
- while (isdigit(*t));
- s = t;
- }
- }
- if (fpflag) {
- T->type = FP_NUM;
- if (is_fsuff(*s))
- s++;
- }
- return s;
- }
-
- /*
- char_constant() -- copy from |s| into the token |T| a string of characters
- denoting a character constant. We do not translate escape sequences at
- this point, though we might need to
- */
- static char *char_constant(s, T)
- register char *s;
- TokenP T;
- {
- T->type = CHAR_CON;
- for (; *s; s++) {
- if (*s == '\'')
- return s + 1;
- if (*s == '\\')
- s++;
- }
- error("unterminated character constant");
- return s;
- }
-
- /*
- string_literal() -- copy from |s| into the token |T| a string of
- characters denoting a string literal. We do not translate escape
- sequences at this point, though we might need to
- */
- static char *string_literal(s, T)
- register char *s;
- TokenP T;
- {
- T->type = STR_CON;
- for (; *s; s++) {
- if (*s == '"')
- return s + 1;
- if (*s == '\\')
- s++;
- }
- error("unterminated string literal");
- return s;
- }
-
- /*
- include_name() -- copy from |s| into the token |T| a string of characters
- denoting an #include file specifier enclosed in <>. |s| points to the
- character after the '<'.
- */
- static char *include_name(s, T)
- register char *s;
- TokenP T;
- {
- T->type = INC_NAM;
- for (; *s; s++) {
- if (*s == '>')
- return s + 1;
- }
- error("unterminated include file name");
- }
-
- /* set_mode() -- set the tokenizer flags to |m| */
- void set_mode(m)
- int m;
- {
- tok_flags = m;
- }
-
- /*
- change_mode() -- twiddle the tokenizer flags; in particular, set the flags
- specified in |raise| and clear the flags specified in |lower|
- */
- void change_mode(raise, lower)
- {
- tok_flags |= raise;
- tok_flags &= (~lower);
- }
-
- /* get_mode() -- return the current value of the tokenizer flags */
- int get_mode()
- {
- return tok_flags;
- }
-
- /*
- xlate_token() -- determines the type of the next preprocessor token in the
- string pointed to by |s|. Information about the token found is placed in
- the Token |T|. Returns a pointer to the first character not in the token
- read.
- */
- static char *xlate_token(s, T)
- register char *s;
- TokenP T;
- {
- if (is_ctoks(*s)) {
- char *t;
-
- T->hashval = hash_id(s, &t);
- s = t;
- T->type = ID;
- return t;
- } else if (isdigit(*s))
- return number(s, T);
- else
- switch (*s++) {
- case '.':
- T->subtype = '.';
- if (*s == '.' && s[1] == '.') {
- s += 2;
- T->type = DONT_CARE;
- } else if (isdigit(*s))
- s = number(s - 1, T);
- else
- T->type = DONT_CARE;
- break;
- case '#':
- if (*s == '#') {
- s++;
- T->type = TOK_CAT;
- } else
- T->type = POUND;
- break;
- case '&':
- T->subtype = '&';
- if (*s == '&') {
- s++;
- T->type = L_AND_OP;
- } else if (*s == '=') {
- s++;
- T->type = DONT_CARE;
- } else
- T->type = B_AND_OP;
- break;
- case '|':
- T->subtype = '|';
- if (*s == '|') {
- s++;
- T->type = L_OR_OP;
- } else if (*s == '=') {
- s++;
- T->type = DONT_CARE;
- } else
- T->type = B_OR_OP;
- break;
- case '+':
- T->subtype = '+';
- if (*s == s[-1] || *s == '=') {
- s++;
- T->type = DONT_CARE;
- } else
- T->type = ADD_OP;
- break;
- case '~':
- T->type = UNARY_OP;
- T->subtype = '~';
- break;
- case ',':
- T->type = COMMA;
- T->subtype = ',';
- break;
- case '(':
- T->type = LPAREN;
- T->subtype = '(';
- break;
- case ')':
- T->type = RPAREN;
- T->subtype = ')';
- break;
- case '!':
- T->subtype = '!';
- if (*s == '=')
- T->type = EQ_OP;
- else
- T->type = UNARY_OP;
- break;
- case '=':
- T->subtype = '=';
- if (*s == '=')
- T->type = EQ_OP;
- else
- T->type = DONT_CARE;
- break;
- case '*':
- case '/':
- case '%':
- T->subtype = s[-1];
- if (*s == '=') {
- s++;
- T->type = DONT_CARE;
- } else
- T->type = MUL_OP;
- break;
- case '^':
- T->subtype = '^';
- if (*s == '=') {
- s++;
- T->type = DONT_CARE;
- } else
- T->type = B_XOR_OP;
- break;
- case '-':
- T->subtype = '-';
- if (*s == '-' || *s == '=' || *s == '>') {
- s++;
- T->type = DONT_CARE;
- } else
- T->type = ADD_OP;
- break;
- case '<':
- if (tok_flags & INCLUDE_LINE) {
- s = include_name(s, T);
- break;
- }
- /* else fall through */
- case '>':
- T->subtype = s[-1];
- T->type = REL_OP;
- if (*s == s[-1]) {
- s++;
- T->type = SHIFT_OP;
- }
- if (*s == '=') {
- s++;
- if (T->type == REL_OP)
- T->subtype = (T->subtype == '<' ? '(' : ')');
- else
- T->type = DONT_CARE;
- }
- break;
- case '\'':
- s = char_constant(s, T);
- break;
- case '"':
- s = string_literal(s, T);
- break;
- case '[':
- case ']':
- case '{':
- case '}':
- case ';':
- case ':':
- case '?':
- T->type = DONT_CARE;
- break;
- default:
- T->type = UNKNOWN;
- }
- return s;
- }
-
- /* print_token() -- write token |T| to the output file */
- void print_token(T)
- TokenP T;
- {
- if (T->type == STOP)
- bugchk("STOP token in output stream?");
- fputs(T->pre_ws, outf);
- fputs(T->txt, outf);
- if (T->flags & TRAIL_SPC)
- fputc(' ', outf);
- }
-
- /*
- merge_tokens() -- Perform token pasting on Token's |T1| and |T2|. Returns
- the resulting token.
- */
- TokenP merge_tokens(T1, T2)
- TokenP T1, T2;
- {
- TokenP T = alloc_token();
- char *s, *t;
-
- T->pre_ws = strdup(T1->pre_ws);
- T->txt = mallok(strlen(T1->txt) + strlen(T2->txt) + 1);
- strcpy(T->txt, T1->txt);
- strcat(T->txt, T2->txt);
- t = xlate_token(T->txt, T);
- if (*t != '\0') {
- warning("Invalid token \"%s\" created by concatenation", t);
- T->type = UNKNOWN;
- }
- return T;
- }
-
- TokenP _one_token()
- {
- register char *s = next_c, *t, *u;
- int n;
- TokenP T = alloc_token();
-
- t = suck_ws(s, &(T->pre_ws));
- if (!t || !*t) {
- T->txt = mallok(2);
- T->txt[0] = '\n';
- T->txt[1] = '\0';
- T->type = EOL;
- T->subtype = '\n';
- next_c = t;
- return T;
- }
- u = xlate_token(t, T);
- n = u - t;
- if (T->type == UNKNOWN && w_bad_chars)
- error("Unrecognized character 0x%02x='%c'", *t, *t);
- T->txt = mallok(n + 1);
- strncpy(T->txt, t, n);
- T->txt[n] = '\0';
- next_c = u;
- return T;
- }
-
- void _tokenize_line()
- {
- Token head;
- TokenP T = &head;
-
- head.next = NULL;
- do {
- T = T->next = _one_token();
- } while (T->type != EOL);
- push_tlist(head.next);
- }
-
- TokenP token()
- {
- TokenP T;
- register char *s;
-
- while (pushback_list) {
- T = pushback_list;
- pushback_list = T->next;
- T->next = NULL;
- if (T->type == UNMARK) {
- Macro *M;
-
- M = lookup(T->txt, T->hashval);
- if (!M)
- bugchk("UNMARK on non-macro token %s", T->txt);
- if (!(M->flags & MARKED))
- bugchk("UNMARK on unmarked macro %s", T->txt);
- M->flags ^= MARKED;
- free_token(T);
- continue;
- } else {
- return T;
- }
- }
-
- /*
- if we get to here, the pushback list is empty, and we need to read in
- another line
- */
- next_c = s = getline();
- if (!s)
- return mk_eof();
- T = _one_token();
- if (T->type == EOL) {
- return T;
- }
- if (T->type != POUND || get_mode() & SLURP)
- _tokenize_line();
- return T;
- }
-
- TokenP exp_token()
- {
- TokenP T = token();
- Macro *M;
-
- if (T->type == ID && !(T->flags & BLUEPAINT) && (M = lookup(T->txt, T->hashval))) {
- expand(T, M);
- return exp_token();
- } else
- return T;
- }
-