home *** CD-ROM | disk | FTP | other *** search
- /*
- * Lexical Analysis Functions Takes input stream and returns "tokens".
- * Implements pre-processor, allowing #include's and #define's tokens are
- * character strings.
- */
- #include <stdio.h>
- #include <ctype.h>
- #include <functions.h>
- #include <exec/types.h>
- #include "listmac.h"
- #include "std.h"
- #include "lex.h"
-
-
- struct StringElt {
- struct Nod node;
- char *buf;
- };
-
- struct Token {
- struct Nod node;
- char *buf;
- short type, can_be_macro;
- char chr;
- };
-
- struct Macro {
- struct Nod node;
- struct Lst arguments;
- struct Lst tokens;
- char *name;
- short active, nargs;
- };
-
- struct ActiveMacro {
- struct Nod node;
- struct Macro *mac;
- struct Token *curtok;
- };
-
- struct LexFile {
- struct Nod node;
- FILE *fp;
- short newln;
- short nextchar;
- };
-
- struct IfBlock {
- struct Nod *node;
- short skip_else;
- };
-
-
- struct Token *CopyToken ();
- struct Macro *FindMacro ();
- char *FindString ();
-
-
- #define BUFLEN 100
- static char buf[BUFLEN];
- static short bufpos;
- static struct Token
- rawtok, *lasttok, *retok;
-
- static struct LexFile *toplf;
- static struct Lst macros, activemacs, files, ifblocks;
- static short files_open = 0, init_done = 0;
- static char *includestr, *definestr, *ifdefstr,
- *ifndefstr, *elsestr, *endifstr;
-
- static char *libbase = "hd:include/", libnam[80], basnam[80];
-
- #define NHASH 53
- struct Lst hashTab[NHASH];
-
-
- /*
- * Open a file for lexing - could be an include.
- * Set newline flag to true.
- */
- short OpenLexFile (name)
- char *name;
- {
- struct LexFile *lf;
- struct Macro *mac;
-
- /*
- * on first call, init environment vars
- */
- if (!init_done) {
- NewList (&files);
- NewList (¯os);
- NewList (&ifblocks);
- NewList (&activemacs);
- InitHash ();
- includestr = FindString ("include");
- definestr = FindString ("define");
- ifdefstr = FindString ("ifdef");
- ifndefstr = FindString ("ifndef");
- elsestr = FindString ("else");
- endifstr = FindString ("endif");
- init_done = 1;
- }
-
- /*
- * if there are no files currently open, prepare for a new one
- */
- if (!files_open) {
- while (mac = (struct Macro *) RemHead (¯os))
- FreeMacro (mac);
- ASSERT (!TEST (HEAD (&ifblocks)));
- ASSERT (!TEST (HEAD (&activemacs)));
- retok = NULL;
- }
- lf = NEW (struct LexFile);
- if (!lf) return 0;
-
- /*
- * printf ("opening \"%s\"\n", name);
- */
- lf->fp = fopen (name, "r");
- if (!lf->fp) {
- FREI (lf);
- return 0;
- }
- lf->newln = 1;
- lf->nextchar = getc (lf->fp);
- AddHead (&files, lf);
- toplf = lf;
-
- if (feof (lf->fp)) {
- CloseLexFile ();
- return 0;
- }
- files_open = 1;
- return 1;
- }
-
-
- /*
- * Close top file returning 1 for the last file now closed.
- */
- short CloseLexFile ()
- {
- if (!files_open) return 1;
-
- fclose (toplf->fp);
- Remove (toplf);
- FREI (toplf);
- toplf = HEAD (&files);
- if (TEST (toplf)) return 0;
- files_open = 0;
- toplf = 0;
- return 1;
- }
-
-
- /*
- * free stuff up
- */
- LexCleanup ()
- {
- struct Macro *mac, *nmac;
-
- while (files_open) CloseLexFile ();
-
- FreeHash ();
- for (mac = HEAD (¯os); nmac = NEXT (mac); mac = nmac)
- FreeMacro (mac);
- ASSERT (!TEST (HEAD (&activemacs)));
-
- /*
- * initialization can be done again
- */
- init_done = 0;
- }
-
-
- FreeMacro (mac)
- struct Macro *mac;
- {
- struct Token *tok;
- struct StringElt *se;
-
- while (tok = (struct Token *) RemHead (&mac->tokens))
- FREI (tok);
- while (se = (struct StringElt *) RemHead (&mac->arguments))
- FREI (se);
- FREI (mac);
- }
-
-
- /*
- * Get next raw character from file dealing with backslash escape chars.
- * This is kind of wrong since these really ought to be recognized only
- * inside strings. Doing it this way deals with backslashes before
- * newlines.
- */
- short NextC1 (fp)
- FILE *fp;
- {
- short c, k, radix;
-
- c = getc (fp);
- if (c == '\\') {
- c = getc (fp);
- if (isdigit (c)) {
- radix = 10;
- if (c == '0') radix = 8;
- k = (c - '0');
- while (isdigit (c = getc (fp)))
- k = k * radix + (c - '0');
- ungetc (c, fp);
- c = k;
- } else {
- switch (c) {
- case '\\':
- break;
- case '"':
- c = '.';
- break;
- case 'n':
- c = '\n';
- break;
- case 'b':
- c = '\b';
- break;
- case 't':
- c = '\t';
- break;
- case '\n':
- c = getc (fp);
- }
- }
- }
- return c;
- }
-
-
- /*
- * Get next character from Top Lex File, updating nextchar.
- */
- short NextC ()
- {
- short c;
-
- if (!toplf) return EOF;
- c = toplf->nextchar;
- toplf->nextchar = NextC1 (toplf->fp);
- return c;
- }
-
-
- /*
- * Move the top lex file to the next newline.
- */
- AdvanceEOL ()
- {
- if (!toplf) return;
- while (toplf->nextchar != '\n' && !feof (toplf->fp))
- NextC ();
- }
-
-
- /*
- * Read a raw set of characters updating newline state. Sets the
- * fields in rawtok to the type and string pointer, if any.
- */
- RawToken ()
- {
- short c;
- short nochar = 1, comment, nl;
-
- bufpos = 0;
- rawtok.can_be_macro = 1;
-
- /*
- * get a non-blank char
- */
- while (nochar) {
-
- /*
- * get a character testing for end-of-file if EOF, just
- * return that fact right away
- */
- c = NextC ();
- if (c == EOF) {
- rawtok.type = RT_EOF;
- return;
- }
-
- /*
- * test next char: if it's white space other than newline,
- * just clear the newline flag and continue looking.
- */
- if (isspace (c) && c != '\n')
- toplf->newln = 0;
- else {
-
- /*
- * otherwise, this is potentially interesting, but it
- * might be the start of a comment - if so, scan til
- * the end of the comment (no nested comments ala
- * "C"). If not, exit the loop.
- */
- if (c == '/' && toplf->nextchar == '*') {
- comment = 1;
- while (comment) {
- c = NextC ();
- if (c == '*'
- && toplf->nextchar == '/') {
- c = NextC ();
- comment = 0;
- }
- }
-
- } else nochar = 0;
- }
- }
-
- nl = toplf->newln;
- toplf->newln = 0;
- bufpos = 0;
-
- /*
- * otherwise, read out the cases id symbol starting with char
- */
- if (nl && c == '#')
- rawtok.type = RT_ESC;
-
- else if (c == '\'') {
- rawtok.chr = NextC ();
- c = NextC ();
-
- /*
- * Skip past weird character constants, like 'FOOL',
- * making no attempt to deal with them correctly.
- */
- comment = 0;
- while (c != '\'' && comment < 6) {
- c = NextC ();
- comment++;
- }
- if (comment >= 6) printf ("error in character constant\n");
- rawtok.type = RT_CHRC;
-
- } else if (isalpha (c) || c == '_') {
- buf[bufpos++] = c;
- while (isalnum (toplf->nextchar) || toplf->nextchar == '_')
- buf[bufpos++] = NextC ();
- buf[bufpos] = 0;
- rawtok.type = RT_ID;
- rawtok.buf = FindString (buf);
-
- } else if (c == '\n') {
- toplf->newln = 1;
- rawtok.type = RT_NEWLN;
-
- } else if (isdigit (c)) {
- buf[bufpos++] = c;
- while (isdigit (toplf->nextchar)) buf[bufpos++] = NextC ();
- buf[bufpos] = 0;
- rawtok.buf = FindString (buf);
- rawtok.type = RT_NUM;
-
- } else if (c == '"') {
- while ((c = NextC ()) != '"')
- if (bufpos < BUFLEN)
- buf[bufpos++] = c;
-
- if (bufpos + 1 == BUFLEN) printf ("string overflow\n");
- buf[bufpos] = 0;
- rawtok.buf = FindString (buf);
- rawtok.type = RT_STR;
-
- } else {
- rawtok.chr = c;
- rawtok.type = RT_CHR;
- }
- }
-
-
- /*
- * Main function -- returns string pointer for next token makes include files
- * and macros transparent reads from top open file and returns pointer to
- * string of chars could be a symbol, could be a number, could be a character
- */
- short NextToken (tbuf)
- char **tbuf;
- {
- struct Token *tok;
- struct Macro *mac;
- struct ActiveMacro *amac;
- short i;
- char c;
-
- while () {
-
- /*
- * get raw token from file or active macro
- */
- amac = HEAD (&activemacs);
- if (retok) {
- tok = retok;
- retok = NULL;
- } else if (TEST (amac)) {
- tok = amac->curtok;
- if (!TEST (tok)) {
- amac->mac->active = 0;
- for (i = 0; i < amac->mac->nargs; i++)
- FreeMacro (RemHead (¯os));
- Remove (amac);
- FREI (amac);
- continue;
- }
- amac->curtok = NEXT (amac->curtok);
- } else {
- RawToken ();
- tok = &rawtok;
- }
- switch (tok->type) {
- case RT_EOF:
- if (CloseLexFile ()) {
- lasttok = tok;
- return RT_EOF;
- }
- break;
- case RT_ESC:
- RawToken ();
- if (rawtok.type != RT_ID) {
- printf ("real problem with lexer directive\n");
- AdvanceEOL ();
- break;
- }
- if (rawtok.buf == includestr) Include ();
- else if (rawtok.buf == definestr) Define ();
- else if (rawtok.buf == ifdefstr) IfDef (1);
- else if (rawtok.buf == ifndefstr) IfDef (0);
- else if (rawtok.buf == elsestr) Else ();
- else if (rawtok.buf == endifstr) EndIf ();
- else {
- printf ("unknown directive \"%s\"\n", rawtok.buf);
- AdvanceEOL ();
- }
- break;
-
- case RT_ID:
-
- /*
- * test for known macros (if this token can be one)
- */
- if (tok->can_be_macro) {
- mac = FindMacro (tok->buf);
- if (mac) {
- InstantiateMacro (mac);
- break;
- }
- }
- case RT_STR:
- case RT_NUM:
- *tbuf = tok->buf;
- lasttok = tok;
- return tok->type;
- case RT_CHR:
- case RT_CHRC:
- *tbuf = &tok->chr;
- lasttok = tok;
- return tok->type;
- }
- }
- }
-
-
- /*
- * The given macro has occured in the text, cause it to come into existence
- * with argument substitution.
- */
- InstantiateMacro (mac)
- struct Macro *mac;
- {
- struct ActiveMacro *amac;
- struct StringElt *arg;
- struct Macro *smac;
- struct Token *tok;
- char *buf;
- short vtok, level, endarglist;
-
- if (mac->active) {
- printf ("recursive macro ignored\n");
- return;
- }
-
- /*
- * read arguments, if any, and make them macros in their own right
- * arguments are read recursively using NextToken (right? I'm not
- * sure...)
- */
- arg = HEAD (&mac->arguments);
- if (TEST (arg)) {
-
- /*
- * get first open paren
- */
- vtok = NextToken (&buf);
- if (vtok != RT_CHR || *buf != '(')
- goto argfail;
-
- /*
- * loop through args separated with commas
- */
- endarglist = 0;
- while (TEST (arg)) {
- smac = NEW (struct Macro);
- if (!smac) goto argfail;
- smac->name = arg->buf;
- smac->active = 0;
- smac->nargs = 0;
- NewList (&smac->arguments);
- NewList (&smac->tokens);
-
- /*
- * scan out tokens allowing for nested commas
- */
- level = 0;
- while () {
- vtok = NextToken (&buf);
- if (vtok == RT_CHR) {
- if (*buf == '(' || *buf == '{')
- level++;
- if (*buf == '}')
- level--;
- if (*buf == ')') {
- if (level == 0) {
- endarglist = 1;
- break;
- }
- level--;
- }
- if (*buf == ',' && level == 0)
- break;
- }
- tok = CopyToken (lasttok);
- if (!tok) goto argfail;
- tok->can_be_macro = 0;
- AddTail (&smac->tokens, tok);
- }
- AddHead (¯os, smac);
- if (endarglist) break;
- arg = NEXT (arg);
- }
- if (vtok != RT_CHR || *buf != ')') goto argfail;
- }
-
- /*
- * Macro does not become active in the global list until all
- * arguments have been parsed and interpreted.
- */
- amac = NEW (struct ActiveMacro);
- if (!amac) return; /* what the fuck do I do here? */
- amac->mac = mac;
- amac->curtok = HEAD (&mac->tokens);
- mac->active = 1;
- AddHead (&activemacs, amac);
- return;
-
- argfail:
- printf ("bad problem with arguments\n");
- }
-
-
- /*
- * locate a specified macro in the available macros list
- */
- struct Macro * FindMacro (buf)
- char *buf;
- {
- struct Macro *mac;
-
- for (mac = HEAD (¯os); TEST (mac); mac = NEXT (mac))
- if (mac->name == buf)
- return mac;
-
- return NULL;
- }
-
-
- /*
- * Do an "include" directive
- */
- Include ()
- {
- short pos, ok, global;
- char c;
-
- /*
- * get filename
- */
- RawToken ();
- ok = 0;
- global = 0;
-
- if (rawtok.type == RT_STR) {
- ok = 1;
- strcpy (basnam, rawtok.buf);
- } else if (rawtok.type == RT_CHR && rawtok.chr == '<') {
- ok = 1;
- pos = 0;
- while () {
- c = NextC ();
- if (c == '>') break;
- basnam[pos++] = c;
- }
- basnam[pos] = 0;
- global = 1;
- }
- AdvanceEOL ();
- if (!ok) {
- printf ("no file to include\n");
- return;
- }
- if (!global) if (OpenLexFile (basnam)) return;
- strcpy (libnam, libbase);
- strcat (libnam, basnam);
- if (OpenLexFile (libnam)) return;
- printf ("error open include file <%s>\n", libnam);
- }
-
-
- /*
- * Do the "define" directive.
- */
- Define ()
- {
- struct Macro *mac = NULL;
- struct Token *tok;
- struct StringElt *se;
-
- /*
- * get identifier to define
- */
- RawToken ();
- if (rawtok.type != RT_ID) {
- printf ("error in define - no identifier\n");
- goto escape;
- }
- mac = NEW (struct Macro);
- if (!mac) goto escape;
- mac->name = rawtok.buf;
- mac->active = 0;
- mac->nargs = 0;
- NewList (&mac->arguments);
- NewList (&mac->tokens);
-
- /*
- * Look for parenthized argument list.
- */
- if (toplf->nextchar == '(') {
- RawToken ();
- while () {
- RawToken ();
-
- /*
- * deal with special case of "#define MACRO()"
- */
- if (!mac->nargs && rawtok.type == RT_CHR
- && rawtok.chr == ')')
- break;
- if (rawtok.type != RT_ID) {
- printf ("macro argument not an identifier\n");
- goto escape;
- }
- se = NEW (struct StringElt);
- if (!se) goto escape;
- se->buf = rawtok.buf;
- AddTail (&mac->arguments, se);
- mac->nargs++;
- RawToken ();
- if (rawtok.type != RT_CHR) {
- printf ("macro arg list delimiter not a character\n");
- goto escape;
- }
- if (rawtok.chr == ')') break;
- if (rawtok.chr != ',') {
- printf ("macro arg list delimiter not ',' or ')'\n");
- goto escape;
- }
- }
- }
-
- /*
- * get the sequence of tokens which make up this definition
- */
- while () {
- RawToken ();
- if (rawtok.type == RT_NEWLN || rawtok.type == RT_EOF) break;
- tok = CopyToken (&rawtok);
- if (!tok) goto escape;
- tok->can_be_macro = 1;
- AddTail (&mac->tokens, tok);
- }
-
- AddHead (¯os, mac);
- return;
-
- escape:
- if (mac) FreeMacro (mac);
- AdvanceEOL ();
- }
-
-
- /*
- * Skip over a defined-out section. Returns 1 if it ends with #else, 0
- * if it ends with #endif, -1 for EOF. Keeps track of nested if's
- * being skipped.
- */
- short SkipRemovedSec ()
- {
- short level = 0;
-
- while () {
- RawToken ();
- if (rawtok.type == RT_EOF) return -1;
- if (rawtok.type == RT_ESC) {
- RawToken ();
- if (rawtok.type == RT_ID) {
- if (rawtok.buf == ifdefstr
- || rawtok.buf == ifndefstr) level++;
- if (rawtok.buf == elsestr)
- if (!level) return 1;
- if (rawtok.buf == endifstr)
- if (!level--) return 0;
- }
- }
- }
- }
-
-
- /*
- * Does the "ifdef"/"ifndef" - "else" - "endif" directive. Type is 1
- * for ifdef and 0 for ifndef.
- */
- IfDef (type)
- short type;
- {
- struct Macro *mac;
- struct IfBlock *ib;
- short els;
-
- RawToken ();
- mac = FindMacro (rawtok.buf);
-
- /*
- * If condition is true, add a node to say "skip the next #else
- * section" if false, skip this section. If section ends with an
- * else, add a junk node to pop off for consistency at next #endif.
- */
- if ((mac && type) || (!mac && !type)) {
- ib = NEW (struct IfBlock);
- if (!ib) return;
- ib->skip_else = 1;
- AddHead (&ifblocks, ib);
- } else {
- els = SkipRemovedSec ();
- if (els == 1) {
- ib = NEW (struct IfBlock);
- if (!ib) return;
- ib->skip_else = 0;
- AddHead (&ifblocks, ib);
- }
- }
- }
-
-
- Else ()
- {
- struct IfBlock *ib;
- short els;
-
- ib = HEAD (&ifblocks);
- ASSERT (TEST (ib));
- Remove (ib);
- if (!ib->skip_else) {
- printf ("strange \"else\" block found\n");
- } else {
- els = SkipRemovedSec ();
- if (els == 1 && ib->skip_else) {
- printf ("strange \"else\" block found\n");
- }
- }
- FREI (ib);
- }
-
-
- EndIf ()
- {
- struct IfBlock *ib;
-
- ib = HEAD (&ifblocks);
- ASSERT (TEST (ib));
- Remove (ib);
- FREI (ib);
- }
-
-
- /*
- * Backs up one token
- */
- Backspace ()
- {
- retok = lasttok;
- }
-
-
- struct Token * CopyToken (tok)
- struct Token *tok;
- {
- struct Token *ntok;
-
- ntok = NEW (struct Token);
- if (ntok) *ntok = *tok;
- return ntok;
- }
-
-
-
- /* HASH TABLE STUFF */
-
-
- InitHash ()
- {
- short i;
-
- for (i = 0; i < NHASH; i++)
- NewList (&hashTab[i]);
- }
-
-
- /*
- * Simple but (I hope) effective hash function.
- */
- short HashVal (buf)
- char *buf;
- {
- unsigned long hash;
-
- hash = 0;
- while (*buf) hash = hash * 3 + *buf++;
- return hash % NHASH;
- }
-
-
- /*
- * Finds (or creates) a stored string matching the given one return pointer
- * which acts as unique ident for this string.
- */
- char * FindString (buf)
- char *buf;
- {
- short hash;
- struct Lst *hlist;
- struct StringElt *se;
- char *sto;
-
- hash = HashVal (buf);
- hlist = &hashTab[hash];
-
- for (se = HEAD (hlist); TEST (se); se = NEXT (se)) {
- if (!strcmp (se->buf, buf))
- return se->buf;
- }
- se = NEW (struct StringElt);
- if (!se) return NULL;
- sto = NEW_N (char, strlen (buf) + 1);
- if (!sto) {
- FREI (se);
- return NULL;
- }
- AddTail (hlist, se);
- strcpy (sto, buf);
- se->buf = sto;
- return sto;
- }
-
-
- FreeHash ()
- {
- short i;
- struct StringElt *se, *nse;
-
- for (i = 0; i < NHASH; i++) {
- for (se = HEAD (&hashTab[i]); nse = NEXT (se); se = nse) {
- FREE_N (se->buf, char, strlen (se->buf) + 1);
- FREI (se);
- }
- }
- }
-