home *** CD-ROM | disk | FTP | other *** search
- /* File : Token.c
- Author : Richard A. O'Keefe
- Modified by : Deeporn H. Beardsley
- Updated : July 1988
- Purpose : Tokenizer for SB-Prolog.
-
- */
-
- #ifdef vms
- #include stdio
- #else
- #include <stdio.h>
- #endif
-
- /* We used to use an 8-bit character set under VMS, but 7-bit ASCII
- elsewhere. Now that DIS 8859/1 exists (a draft international
- standard for an 8-bit extension of ASCII) we use that, and we are
- in luck: it is almost identical to the VMS character set.
- */
- #define Char unsigned char
- #define AlphabetSize 256
-
- extern char *strcpy(/* char*, char* */);
- #define StrCpy(dst, src) (void)strcpy(dst, src)
- #define Printf (void)printf
- #define Sprintf (void)sprintf
- #define Fprintf (void)fprintf
-
-
- #define InRange(X,L,U) ((unsigned)((X)-(L)) <= (unsigned)((U)-(L)))
- #define IsLayout(X) InRange(InType(X), SPACE, EOLN)
-
-
- /* VERY IMPORTANT NOTE: I assume that the stdio library returns the value
- EOF when character input hits the end of the file, and that this value
- is actually the integer -1. You will note the DigVal(), InType(), and
- OuType() macros below, and there is a ChType() macro used in crack().
- They all depend on this assumption.
- */
-
- #define DIGIT 0 /* 0 .. 9 */
- #define BREAK 1 /* _ */
- #define UPPER 2 /* A .. Z */
- #define LOWER 3 /* a .. z */
- #define SIGN 4 /* -/+*<=>#@$\^&~`:.? */
- #define NOBLE 5 /* !; (don't form compounds) */
- #define PUNCT 6 /* (),[]|{}% */
- #define ATMQT 7 /* ' (atom quote) */
- #define LISQT 8 /* " (list quote) */
- #define STRQT 9 /* $ (string quote) */
- #define CHRQT 10 /* ` (character quote, maybe) */
- #define TILDE 11 /* ~ (like character quote but buggy) */
- #define SPACE 12 /* layout and control chars */
- #define EOLN 13 /* line terminators ^J ^L */
- #define REALO 14 /* floating point number */
- #define EOFCH 15 /* end of file */
- #define ALPHA DIGIT /* any of digit, break, upper, lower */
- #define BEGIN BREAK /* atom left-paren pair */
- #define ENDCL EOLN /* end of clause token */
- #define RREAL 16 /* radix number(real) - overflowed */
- #define RDIGIT 17 /* radix number(int) */
-
- #define InType(c) (intab.chtype+1)[c]
- #define DigVal(c) (digval+1)[c]
-
- Char outqt[EOFCH+1];
-
- struct CHARS
- {
- int eolcom; /* End-of-line comment, default % */
- int endeol; /* early terminator of eolcoms, default none */
- int begcom; /* In-line comment start, default / */
- int astcom; /* In-line comment second, default * */
- int endcom; /* In-line comment finish, default / */
- int radix; /* Radix character, default ' */
- int dpoint; /* Decimal point, default . */
- int escape; /* String escape character, default \ */
- int termin; /* Terminates a clause */
- char chtype[AlphabetSize+1];
- };
-
- struct CHARS intab = /* Special character table */
- {
- '%', /* eolcom: end of line comments */
- -1, /* endeol: early end for eolcoms */
- '/', /* begcom: in-line comments */
- '*', /* astcom: in-line comments */
- '/', /* endcom: in-line comments */
- '\'', /* radix : radix separator */
- '.', /* dpoint: decimal point */
- -1, /* escape: string escape character */
- '.', /* termin: ends clause, sign or solo */
- {
- EOFCH, /* really the -1th element of the table: */
- /* ^@ ^A ^B ^C ^D ^E ^F ^G */
- SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
- /* ^H ^I ^J ^K ^L ^M ^N ^O */
- SPACE, SPACE, EOLN, SPACE, EOLN, SPACE, SPACE, SPACE,
- /* ^P ^Q ^R ^S ^T ^U ^V ^W */
- SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
- /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
- SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
- /* sp ! " # $ % & ' */
- SPACE, NOBLE, LISQT, SIGN, LOWER, PUNCT, SIGN, ATMQT,
- /* ( ) * + , - . / */
- PUNCT, PUNCT, SIGN, SIGN, PUNCT, SIGN, SIGN, SIGN,
- /* 0 1 2 3 4 5 6 7 */
- DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT,
- /* 8 9 : ; < = > ? */
- DIGIT, DIGIT, SIGN, PUNCT, SIGN, SIGN, SIGN, SIGN,
- /* @ A B C D E F G */
- SIGN, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
- /* H I J K L M N O */
- UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
- /* P Q R S T U V W */
- UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
- /* X Y Z [ \ ] ^ _ */
- UPPER, UPPER, UPPER, PUNCT, SIGN, PUNCT, SIGN, BREAK,
- /* ` a b c d e f g */
- SIGN, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
- /* h i j k l m n o */
- LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
- /* p q r s t u v w */
- LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
- /* x y z { | } ~ ^? */
- LOWER, LOWER, LOWER, PUNCT, PUNCT, PUNCT, SIGN, SPACE,
- /* 128 129 130 131 132 133 134 135 */
- SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
- /* 136 137 138 139 140 141 142 143 */
- SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
- /* 144 145 146 147 148 149 150 151 */
- SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
- /* 152 153 154 155 156 157 158 159 */
- SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE, SPACE,
- /* NBSP !-inv cents pounds ching yen brobar section */
- SPACE, SIGN, SIGN, SIGN, SIGN, SIGN, SIGN, SIGN,
- /* "accent copyr -a ord << nothook SHY (reg) ovbar */
- SIGN, SIGN, LOWER, SIGN, SIGN, SIGN, SIGN, SIGN,
- /* degrees +/- super 2 super 3 - micron pilcrow - */
- SIGN, SIGN, LOWER, LOWER, SIGN, SIGN, SIGN, SIGN,
- /* , super 1 -o ord >> 1/4 1/2 3/4 ?-inv */
- SIGN, LOWER, LOWER, SIGN, SIGN, SIGN, SIGN, SIGN,
- /* `A 'A ^A ~A "A oA AE ,C */
- UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
- /* `E 'E ^E "E `I 'I ^I "I */
- UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
- /* ETH ~N `O 'O ^O ~O "O x times */
- #ifdef vms
- UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER,
- #else
- UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, SIGN,
- #endif
- /* /O `U 'U ^U "U 'Y THORN ,B */
- UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, UPPER, LOWER,
- /* `a 'a ^a ~a "a oa ae ,c */
- LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
- /* `e 'e ^e "e `i 'i ^i "i */
- LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
- /* eth ~n `o 'o ^o ~o "o -:- */
- #ifdef vms
- LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER,
- #else
- LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, SIGN,
- #endif
- /* /o `u 'u ^u "u 'y thorn "y */
- #ifdef vms
- LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, SPACE
- #else
- LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER, LOWER
- #endif
- }};
-
- char digval[AlphabetSize+1] =
- {
- 99, /* really the -1th element of the table */
- /* ^@ ^A ^B ^C ^D ^E ^F ^G */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* ^H ^I ^J ^K ^L ^M ^N ^O */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* ^P ^Q ^R ^S ^T ^U ^V ^W */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* sp ! " # $ % & ' */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* ( ) * + , - . / */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 0 1 2 3 4 5 6 7 */
- 0, 1, 2, 3, 4, 5, 6, 7,
- /* 8 9 : ; < = > ? */
- 8, 9, 99, 99, 99, 99, 99, 99,
- /* @ A B C D E F G */
- 99, 10, 11, 12, 13, 14, 15, 99,
- /* H I J K L M N O */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* P Q R S T U V W */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* X Y Z [ \ ] ^ _ */
- 99, 99, 99, 99, 99, 99, 99, 0, /*NB*/
- /* ` a b c d e f g */
- 99, 10, 11, 12, 13, 14, 15, 99,
- /* h i j k l m n o */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* p q r s t u v w */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* x y z { | } ~ ^? */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 128 129 130 131 132 133 134 135 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 136 137 138 139 140 141 142 143 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 144 145 146 147 148 149 150 151 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 152 153 154 155 156 157 158 159 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 160 161 162 163 164 165 166 167 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 168 169 170(-a) 171 172 173 174 175 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 176 177 178(2) 179(3) 180 181 182 183 */
- 99, 99, 2, 3, 99, 99, 99, 99,
- /* 184 185(1) 186(-o) 187 188 189 190 191 */
- 99, 1, 99, 99, 99, 99, 99, 99,
- /* 192 193 194 195 196 197 198 199 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 200 201 202 203 204 205 206 207 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 208 209 210 211 212 213 214 215 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 216 217 218 219 220 221 222 223 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 224 225 226 227 228 229 230 231 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 232 233 234 235 236 237 238 239 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 240 241 242 243 244 245 246 247 */
- 99, 99, 99, 99, 99, 99, 99, 99,
- /* 248 249 250 251 252 253 254 255 */
- 99, 999, 99, 99, 99, 99, 99, 99
- };
-
-
- /* values returned to calling program */
- #define SPECIAL 0 /* puncuation , ( ) [ ] ... */
- #define VARO 1 /* type is a variable */
- #define FUNC 2 /* type is atom( */
- #define NUMBERO 3 /* type is a number */
- #define ATOMO 4 /* type is an atom */
- #define ENDCLS 5 /* END of clause but not file */
- #define USCORE 6 /* underscore '_' */
- #define SEMI 7 /* ; */
- #define BADEND 8 /* END of file, not end of clause */
- #define STRING 9 /* type is a char string */
-
- /* stuff defined to interface with SB-Prolog */
- #include "builtin.h"
- #include <errno.h>
-
- #ifndef AMIGA
- #include <sys/types.h>
- #include <netdb.h>
- #include <sys/socket.h>
- #include <netinet/in.h>
- #include <arpa/inet.h>
- #endif
-
- extern word nil_sym;
- extern word insert();
- extern int errno;
-
- extern word *memory; /* heap, local stack */
- extern word *pspace; /* psc records, instructions, p-names */
- extern word *tstack;
- extern word *local_bottom;
- extern word *heap_bottom;
- extern byte *curr_fence; /* ptr to next free byte in perm space */
- extern word *ereg; /* last activation record */
- extern word *breg; /* last choice point */
- extern word *hreg; /* top of heap */
- extern word *trreg; /* top of trail stack */
- extern int maxmem, maxpspace, maxtrail;
-
- extern byte *curr_fence; /* ptr to next free byte in perm space */
- extern byte *max_fence; /* ptr to last+1 free byte in perm space */
-
- extern FILE *curr_in, *curr_out; /* current input, output streams */
-
- char temp = TEMP;
-
-
- void SyntaxError(message)
- char *message;
- {
- Fprintf(stderr, "Syntax error: %s\n", message);
- exit(1);
- }
-
-
-
- /* GetToken() reads a single token from the input stream and returns
- its type, which is one of
- DIGIT -- a number
- BEGIN -- an atom( pair
- LOWER -- an atom
- UPPER -- a variable
- PUNCT -- a single punctuation mark
- LISQT -- a quoted list of character codes
- STRQT -- a quoted string
- ENDCL -- end of clause (normally '.\n').
- EOFCH -- signifies end-of-file.
- RREAL -- a real, from some radix notation, in double_v.
- RDIGIT -- an integer, from some radix notation, in rad_int.
- In all cases except the last, the text of the token is in AtomStr.
- There are two questions: between which pairs of adjacent tokens is
- a space (a) necessary, (b) desirable? There is an additional
- dummy token type used by the output routines, namely
- NOBLE -- extra space is definitely not needed.
- I leave it as an exercise for the reader to answer question (a).
- Since this program is to produce output I find palatable (even if
- it isn't exactly what I'd write myself), extra spaces are ok. In
- fact, the main use of this program is as an editor command, so it
- is normal to do a bit of manual post-processing. Question (b) is
- the one to worry about then. My answer is that a space is never
- written
- - after PUNCT ( [ { |
- - before PUNCT ) ] } | , <ENDCL>
- is written after comma only sometimes, and is otherwise always
- written. The variable lastput thus takes these values:
- ALPHA -- put a space except before PUNCT
- SIGN -- as alpha, but different so ENDCL knows to put a space.
- NOBLE -- don't put a space
- ENDCL -- just ended a clause
- EOFCH -- at beginning of file
- */
-
- int lastc = ' '; /* previous character */
- #define MaxStrLen 1000
- Char AtomStr[MaxStrLen+20];
- word list_p;
- int rtnint;
- double double_v;
- long rad_int;
-
- char tok2long[] = "token too long";
- char eofinrem[] = "end of file in comment";
- char badexpt[] = "bad exponent";
- char badradix[] = "radix > 36";
-
-
- /* read_character(FILE* card, Char q)
- reads one character from a quoted atom, list, string, or character.
- Doubled quotes are read as single characters, otherwise a
- quote is returned as -1 and lastc is set to the next character.
- If the input syntax has character escapes, they are processed.
- Note that many more character escape sequences are accepted than
- are generated. There is a divergence from C: \xhh sequences are
- two hexadecimal digits long, not three.
- Note that the \c and \<space> sequences combine to make a pretty
- way of continuing strings. Do it like this:
- "This is a string, which \c
- \ has to be continued over \c
- \ several lines.\n".
- */
-
- int read_character(card, q)
- register FILE *card;
- register int q;
- {
- register int c;
-
- c = getc(card);
- BACK: if (c < 0) {
- ERROR: if (q < 0) {
- SyntaxError("end of file in character constant");
- } else {
- char message[80];
- Sprintf(message, "end of file in %cquoted%c constant", q, q);
- SyntaxError(message);
- }
- }
- if (c == q) {
- c = getc(card);
- if (c == q) return c;
- lastc = c;
- return -1;
- } else
- if (c != intab.escape) {
- return c;
- }
- /* If we get here, we have read the "\" of an escape sequence */
- c = getc(card);
- switch (c) {
- case EOF:
- clearerr(curr_in);
- goto ERROR;
- case 'n': case 'N': /* newline */
- return 10;
- case 't': case 'T': /* tab */
- return 9;
- case 'r': case 'R': /* reeturn */
- return 13;
- case 'v': case 'V': /* vertical tab */
- return 11;
- case 'b': case 'B': /* backspace */
- return 8;
- case 'f': case 'F': /* formfeed */
- return 12;
- case 'e': case 'E': /* escape */
- return 27;
- case 'd': case 'D': /* delete */
- return 127;
- case 's': case 'S': /* space */
- return 32;
- case 'a': case 'A': /* alarm */
- return 7;
- case '^': /* control */
- c = getc(card);
- if (c < 0) goto ERROR;
- return c == '?' ? 127 : c&31;
- case 'c': case 'C': /* continuation */
- while (IsLayout(c = getc(card))) ;
- goto BACK;
- case 'x': case 'X': /* hexadecimal */
- { int i, n;
- for (n = 0, i = 2; --i >= 0; n = (n<<4) + DigVal(c))
- if (DigVal(c = getc(card)) >= 16) {
- if (c < 0) goto ERROR;
- (void)ungetc(c, card);
- break;
- }
- return n & 255;
- }
- case 'o': case 'O': /* octal */
- c = getc(card);
- if (DigVal(c) >= 8) {
- if (c < 0) goto ERROR;
- (void) ungetc(c, card);
- return 0;
- }
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- { int i, n;
- for (n = c-'0', i = 2; --i >= 0; n = (n<<3) + DigVal(c))
- if (DigVal(c = getc(card)) >= 8) {
- if (c < 0) goto ERROR;
- (void) ungetc(c, card);
- break;
- }
- return n & 255;
- }
- default:
- if (!IsLayout(c)) return c;
- c = getc(card);
- goto BACK;
- }
- }
-
-
-
-
- /* com0plain(card, endeol)
- These comments have the form
- <eolcom> <char>* <newline> {PUNCT}
- or <eolcom><eolcom> <char>* <newline> {SIGN }
- depending on the classification of <eolcom>. Note that we could
- handle ADA comments with no trouble at all. There was a Pop-2
- dialect which had end-of-line comments using "!" where the comment
- could also be terminated by "!". You could obtain the effect of
- including a "!" in the comment by doubling it, but what you had
- then was of course two comments. The endeol parameter of this
- function allows the handling of comments like that which can be
- terminated either by a new-line character or an <endeol>, whichever
- comes first. For ordinary purposes, endeol = -1 will do fine.
- When this is called, the initial <eolcom>s have been consumed.
- We return the first character after the comment.
- If the end of the source file is encountered, we do not treat it
- as an error, but quietly close the comment and return EOF as the
- "following" character.
-
- */
- int com0plain(card, endeol)
- register FILE *card; /* source file */
- register int endeol; /* The closing character "!" */
- {
- register int c;
-
- while ((c = getc(card)) >= 0 && c != '\n' && c != endeol) ;
- if (c >= 0) c = getc(card);
- return c;
- }
-
-
-
- /* The states in the next two functions are
- 0 - after an uninteresting character
- 1 - after an "astcom"
- 2 - after a "begcom"
- Assuming begcom = "(", astom = "#", endcom = ")",
- com2plain will accept "(#)" as a complete comment. This can
- be changed by initialising the state to 0 rather than 1.
- The same is true of com2nest, which accepts "(#(#)#) as a
- complete comment. Changing it would be rather harder.
- Fixing the bug where the closing <astcom> is copied if it is
- not an asterisk may entail rejecting "(#)".
- */
-
- /* com2plain(card, astcom, endcom)
- handles PL/I-style comments, that is, comments which begin with
- a pair of characters <begcom><astcom> and end with a pair of
- chracters <astcom><endcom>, where nesting is not allowed. For
- example, if we take begcom='(', astcom='*', endcom=')' as in
- Pascal, the comment "(* not a (* plain *)^ comment *) ends at
- the "^".
- For this kind of comment, it is perfectly sensible for any of
- the characters to be equal. For example, if all three of the
- bracket characters are "#", then "## stuff ##" is a comment.
- When this is called, the initial <begcom><astcom> has been consumed.
- */
- void com2plain(card, astcom, endcom)
- register FILE *card; /* source file */
- int astcom; /* The asterisk character "*" */
- int endcom; /* The closing character "/" */
- {
- register int c;
- register int state;
-
- for (state = 0; (c = getc(card)) >= 0; ) {
- if (c == endcom && state) break;
- state = c == astcom;
- }
- if (c < 0) SyntaxError(eofinrem);
- }
-
-
- int GetToken()
- {
- register FILE *card = curr_in;
- register Char *s = AtomStr;
- register int c, d;
- long oldv = 0, newv = 0;
- register int n = MaxStrLen;
- word *newpair,*list_head;
-
- c = lastc;
- START:
- switch (InType(c)) {
-
- case DIGIT:
- /* The following kinds of numbers exist:
- (1) unsigned decimal integers: d+
- (2) unsigned based integers: d+Ro+[R]
- (3) unsigned floats: d* [. d*] [e +/-] d+
- (4) characters: 0Rc[R]
- We allow underscores in numbers too, ignoring them.
- */
- do {
- if (c != '_') *s++ = c;
- c = getc(card);
- } while (InType(c) <= BREAK);
- if (c == intab.radix) {
- *s = 0;
- for (d = 0, s = AtomStr; c = *s++; ) {
- d = d*10-'0'+c;
- if (d > 36) SyntaxError(badradix);
- }
- if (d == 0) {
- /* 0'c['] is a character code */
- d = read_character(card, -1);
- Sprintf(AtomStr, "%d", d);
- d = getc(card);
- lastc = d == intab.radix ? getc(card) : d;
- return DIGIT;
- }
- while (c = getc(card), DigVal(c) < 99)
- if (c != '_') {
- oldv = newv;
- newv = newv*d + DigVal(c);
- if (newv < oldv || newv > MAXINT) {
- printf("*** overflow in radix notation *** \n");
- double_v = oldv*1.0*d + DigVal(c);
- while (c = getc(card), DigVal(c) < 99)
- if (c != '_')
- double_v = double_v*d + DigVal(c);
- if (c == intab.radix) c = getc(card);
- lastc = c;
- return RREAL;
- }
- }
- /*
- Sprintf(AtomStr, "%ld", newv);
- */
- rad_int = newv;
- if (c == intab.radix) c = getc(card);
- lastc = c;
- return RDIGIT;
- } else
- if (c == intab.dpoint) {
- d = getc(card);
- if (InType(d) == DIGIT) {
- DECIMAL: *s++ = '.';
- do {
- if (d != '_') *s++ = d;
- d = getc(card);
- } while (InType(d) <= BREAK);
- if ((d | 32) == 'e') {
- *s++ = 'E';
- d = getc(card);
- if (d == '-') *s++ = d, d = getc(card);
- else if (d == '+') d = getc(card);
- if (InType(d) > BREAK) SyntaxError(badexpt);
- do {
- if (d != '_') *s++ = d;
- d = getc(card);
- } while (InType(d) <= BREAK);
- }
- c = d;
- *s = 0, lastc = c;
- return REALO;
- } else {
- ungetc(d, card);
- /* c has not changed */
- }
- }
- *s = 0, lastc = c;
- return DIGIT;
-
- case BREAK: case UPPER:
- do {
- if (--n < 0) SyntaxError(tok2long);
- *s++ = c, c = getc(card);
- } while (InType(c) <= LOWER);
- *s = 0, lastc = c;
- rtnint = (int) (s - AtomStr);
- return UPPER;
-
- case LOWER:
- do {
- if (--n < 0) SyntaxError(tok2long);
- *s++ = c, c = getc(card);
- } while (InType(c) <= LOWER);
- *s = 0;
- SYMBOL: if (c == '(') {
- lastc = getc(card);
- rtnint = (int) (s - AtomStr);
- return BEGIN;
- } else {
- lastc = c;
- rtnint = (int) (s - AtomStr);
- return LOWER;
- }
-
- case SIGN:
- *s = c, d = getc(card);
- if (c == intab.begcom && d == intab.astcom) {
- ASTCOM: com2plain(card, d, intab.endcom);
- c = getc(card);
- goto START;
- } else
- if (c == intab.dpoint && InType(d) == DIGIT) {
- *s++ = '0';
- goto DECIMAL;
- }
- while (InType(d) == SIGN) {
- if (--n == 0) SyntaxError(tok2long);
- *++s = d, d = getc(card);
- }
- *++s = 0;
- if (InType(d) >= SPACE && c == intab.termin && AtomStr[1] == 0) {
- lastc = d;
- return ENDCL; /* i.e. '.' followed by layout */
- }
- c = d;
- goto SYMBOL;
-
- case NOBLE:
- if (c == intab.termin) {
- *s = 0, lastc = ' ';
- return ENDCL;
- } else
- if (c == intab.eolcom) {
- c = com0plain(card, intab.endeol);
- goto START;
- }
- *s++ = c, *s = 0;
- lastc = c = getc(card);
- goto SYMBOL;
-
- case PUNCT:
- if (c == intab.termin) {
- *s = 0, lastc = ' ';
- return ENDCL;
- } else
- if (c == intab.eolcom) {
- c = com0plain(card, intab.endeol);
- goto START;
- }
- d = getc(card);
- if (c == intab.begcom && d == intab.astcom) goto ASTCOM;
-
- /* If we arrive here, c is an ordinary punctuation mark */
- if (c == '(')
- /* need to distingusih between atom( and atom ( */
- *s++ = ' ';
- lastc = d, *s++ = c, *s = 0;
- rtnint = (int) (s - AtomStr);
- return PUNCT;
-
- case CHRQT:
- /* `c[`] is read as an integer.
- Eventually we should treat characters as a distinct
- token type, so they can be generated on output.
- If the character quote, atom quote, list quote,
- or string quote is the radix character, we should
- generate 0'x notation, otherwise `x`.
- */
- d = read_character(card, -1);
- Sprintf(AtomStr, "%d", d);
- d = getc(card);
- lastc = d == c ? getc(card) : d;
- return DIGIT;
-
- case ATMQT: case STRQT:
- while ((d = read_character(card, c)) >= 0) {
- if (--n < 0) SyntaxError(tok2long);
- *s++ = d;
- }
- *s = 0;
- rtnint = (int) (s - AtomStr);
- c = lastc;
- goto SYMBOL;
-
- case LISQT:
- list_head = newpair = hreg;
- while ((d = read_character(card, c)) >= 0) {
- hreg++; hreg++;
- *newpair++ = makeint(d);
- *newpair++ = (word)hreg | LIST_TAG;
- }
- if (list_head == hreg) /* null string */
- list_p = nil_sym;
- else {
- *(--newpair) = nil_sym;
- list_p = (word)list_head | LIST_TAG;
- }
- return LISQT;
-
- case EOLN:
- case SPACE:
- c = getc(card);
- goto START;
-
- case EOFCH:
- clearerr(curr_in);
- return EOFCH;
- }
- fprintf(stderr, "Internal error: InType(%d)==%d\n",
- c, InType(c));
- #ifdef AMIGA
- exit();
- #else
- abort(); /* There is no way we can get here */
- #endif
- /*NOTREACHED*/
- }
-
-
- void b_NEXT_TOKEN()
- {
- register word op;
- register pw top;
- int i, atoi(), oldnum, newnum;
- int len;
- char perm = PERM;
- register FILE *card = curr_in;
- double atof();
- word makefloat(), ptr;
-
-
- i = GetToken();
- switch (i) {
- case LOWER:
- op = gregc(1); deref(op); follow(op) = makeint(ATOMO);
- ptr = insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
- op = gregc(2); deref(op); follow(op) = ptr;
- break;
- case BEGIN:
- op = gregc(1); deref(op); follow(op) = makeint(FUNC);
- ptr = insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
- op = gregc(2); deref(op); follow(op) = ptr;
- break;
- case UPPER:
- if ((AtomStr[0] == '_') && (AtomStr[1] == 0)) {
- op = gregc(1); deref(op); follow(op) = makeint(USCORE);
- } else {
- op = gregc(1); deref(op); follow(op) = makeint(VARO);
- }
- if (rtnint > 256) {
- AtomStr[256] = 0;
- rtnint = 256;
- printf("*** Name of constant too long: %s\n"), AtomStr;
- }
- ptr = insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
- op = gregc(2); deref(op); follow(op) = ptr;
- break;
- case REALO:
- op = gregc(2); deref(op); follow(op) = makefloat(atof(AtomStr));
- op = gregc(1); deref(op); follow(op) = makeint(NUMBERO);
- break;
- case RREAL:
- op = gregc(2); deref(op); follow(op) = makefloat(double_v);
- op = gregc(1); deref(op); follow(op) = makeint(NUMBERO);
- break;
- case RDIGIT:
- op = gregc(1); deref(op); follow(op) = makeint(NUMBERO);
- op = gregc(2); deref(op); follow(op) = makeint(rad_int);
- break;
- case DIGIT:
- op = gregc(1); deref(op); follow(op) = makeint(NUMBERO);
- op = gregc(2); deref(op);
- for(len = oldnum = newnum = 0; AtomStr[len] != 0; len++) {
- oldnum = newnum;
- newnum = newnum * 10 + DigVal(AtomStr[len]);
- if (newnum < oldnum || newnum > MAXINT) {
- printf("*** overflow >> %s\n",AtomStr);
- len = strlen(AtomStr);
- AtomStr[len++] = '.';
- AtomStr[len++] = '0';
- AtomStr[len] = 0;
- follow(op) = makefloat(atof(AtomStr));
- return;
- }
- }
- follow(op) = makeint(newnum);
- break;
- case LISQT:
- op = gregc(1); deref(op); follow(op) = makeint(STRING);
- op = gregc(2); deref(op); follow(op) = list_p;
- break;
- case PUNCT:
- /* there are nine punctuation marks, */
- /* ( , ) [ | ] { ; } */
- /* % is listed as one, but isn't really. */
- if (AtomStr[0] == ';') {
- op = gregc(1); deref(op); follow(op) = makeint(SEMI);
- } else {
- op = gregc(1); deref(op); follow(op) = makeint(SPECIAL);
- ptr = insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
- op = gregc(2); deref(op); follow(op) = ptr;
- }
- break;
- case ENDCL:
- op = gregc(1); deref(op); follow(op) = makeint(ENDCLS);
- break;
- case EOFCH:
- op = gregc(1); deref(op); follow(op) = makeint(BADEND);
- break;
- default:
- Fprintf(stderr, "Internal error %d %s\n", i, AtomStr);
- }
- }
-
-