home *** CD-ROM | disk | FTP | other *** search
- /*
- * GNU m4 -- A simple macro processor
- * Copyright (C) 1989-1992 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
- /*
- * Handling of different input sources, and lexical analysis.
- */
-
- #include "m4.h"
- /*
- * Unread input can be either files, that should be read (eg. included
- * files), strings, which should be rescanned (eg. macro expansion
- * text), or quoted macro definitions (as returned by the builtin
- * "defn"). Unread input are organised in a stack, implemented with an
- * obstack. Each input source is described by a "struct input_block".
- * The obstack is "input_stack". The top of the input stack is "isp".
- *
- * The macro "m4wrap" places the text to be saved on another input
- * stack, on the obstack "wrapup_stack", whose top is "wsp". When EOF
- * is seen on normal input (eg, when "input_stack" is empty), input is
- * switched over to "wrapup_stack". To make this easier, all references
- * to the current input stack, whether it be "input_stack" or
- * "wrapup_stack", are done through a pointer "current_input", which
- * points to either "input_stack" or "wrapup_stack".
- *
- * Pushing new input on the input stack is done by push_file (),
- * push_string (), push_wrapup () (for wrapup text), and push_macro () (for
- * macro definitions). Because macro expansion needs direct access to
- * the current input obstack (for optimisation), push_string () are split
- * in two functions, push_string_init (), which returns a pointer to the
- * current input stack, and push_string_finish (), which return a pointer
- * to the final text. The input_block *next is used to manage the
- * coordination between the different push routines.
- *
- * The current file and line number are stored in two global variables,
- * for use by the error handling functions in m4.c. Whenever a file
- * input_block is pushed, the current file name and line number is saved
- * in the input_block, and the two variables are reset to match the new
- * input file.
- */
-
- enum input_type
- {
- INPUT_FILE,
- INPUT_STRING,
- INPUT_MACRO
- };
-
- typedef enum input_type input_type;
-
- struct input_block
- {
- struct input_block *prev; /* previous input_block on the input stack */
- input_type type; /* INPUT_FILE, INPUT_STRING or INPUT_MACRO */
- union
- {
- struct
- {
- char *string; /* string value */
- }
- u_s;
- struct
- {
- FILE *file; /* input file handle */
- char *name; /* name of PREVIOUS input file */
- int lineno; /* current line number for do. */
- /* Yet another attack of "The curse of global variables" (sic). */
- int out_lineno; /* current output line number do.*/
- boolean advance_line; /* start_of_input_line from advance_input */
- }
- u_f;
- struct
- {
- builtin_func *func; /* pointer to macros function */
- boolean traced; /* TRUE iff builtin is traced */
- }
- u_m;
- }
- u;
- };
-
- typedef struct input_block input_block;
-
-
- /* Current input file name. */
- char *current_file;
-
- /* Current input line number. */
- int current_line;
-
- /* Obstack for storing individual tokens. */
- static struct obstack token_stack;
-
- /* Normal input stack. */
- static struct obstack input_stack;
-
- /* Wrapup input stack. */
- static struct obstack wrapup_stack;
-
- /* Input or wrapup. */
- static struct obstack *current_input;
-
- /* Bottom of token_stack, for obstack_free. */
- static char *token_bottom;
-
- /* Pointer to top of current_input. */
- static input_block *isp;
-
- /* Pointer to top of wrapup_stack. */
- static input_block *wsp;
-
- /* Aux. for handling split push_string (). */
- static input_block *next;
-
- /* Flag for advance_input to increment current_line. */
- static boolean start_of_input_line;
-
- #define CHAR_EOF 256 /* character return on EOF */
- #define CHAR_MACRO 257 /* character return for MACRO token */
-
- /* Quote chars. */
- char *rquote;
- char *lquote;
-
- /* And their length. */
- int len_rquote;
- int len_lquote;
-
- /* And default quote chars. */
- static char *def_rquote = DEF_RQUOTE;
- static char *def_lquote = DEF_LQUOTE;
-
- /* And comment chars. */
- char *bcomm;
- char *ecomm;
-
- /* And their length. */
- static int len_bcomm;
- static int len_ecomm;
-
- /* And default comment chars. */
- static char *def_bcomm = DEF_BCOMM;
- static char *def_ecomm = DEF_ECOMM;
-
-
- /*
- * push_file () pushes an input file on the input stack, saving the
- * current file name and line number. If next is non-NULL, this push
- * invalidates a call to push_string_init (), whose storage are
- * consequentely released.
- */
- void
- push_file (FILE *fp, const char *title)
- {
- input_block *i;
-
- if (next != NULL)
- {
- obstack_free (current_input, next);
- next = NULL;
- }
-
- if (debug_level & DEBUG_TRACE_INPUT)
- debug_message ("input read from %s", title);
-
- i = (input_block *) obstack_alloc (current_input,
- sizeof (struct input_block));
- i->type = INPUT_FILE;
-
- i->u.u_f.name = current_file;
- i->u.u_f.lineno = current_line;
- i->u.u_f.out_lineno = output_current_line;
- i->u.u_f.advance_line = start_of_input_line;
- current_file = obstack_copy0 (current_input, title, strlen (title));
- current_line = 1;
- output_current_line = -1;
-
- i->u.u_f.file = fp;
- i->prev = isp;
- isp = i;
- }
-
- /*
- * push_macro () pushes a builtin macros definition on the input stack. If
- * next is non-NULL, this push invalidates a call to push_string_init (),
- * whose storage are consequentely released.
- */
- void
- push_macro (builtin_func *func, boolean traced)
- {
- input_block *i;
-
- if (next != NULL)
- {
- obstack_free (current_input, next);
- next = NULL;
- }
-
- i = (input_block *) obstack_alloc (current_input,
- sizeof (struct input_block));
- i->type = INPUT_MACRO;
-
- i->u.u_m.func = func;
- i->u.u_m.traced = traced;
- i->prev = isp;
- isp = i;
- }
-
- /*
- * First half of push_string (). The pointer next points to the new
- * input_block.
- */
- struct obstack *
- push_string_init (void)
- {
- if (next != NULL)
- internal_error ("recursive push_string!");
-
- next = (input_block *) obstack_alloc (current_input,
- sizeof (struct input_block));
- next->type = INPUT_STRING;
- return current_input;
- }
-
- /*
- * Last half of push_string (). If next is now NULL, a call to
- * push_file () has invalidated the previous call to push_string_init (),
- * so we just give up. If the new object is void, we do not push it.
- * The function push_string_finish () returns a pointer to the finished
- * object. This pointer is only for temporary use, since reading the
- * next token might release the memory used for the object.
- */
- char *
- push_string_finish (void)
- {
- char *ret = NULL;
-
- if (next == NULL)
- return NULL;
-
- if (obstack_object_size (current_input) > 0)
- {
- obstack_1grow (current_input, '\0');
- next->u.u_s.string = obstack_finish (current_input);
- next->prev = isp;
- isp = next;
- ret = isp->u.u_s.string; /* for immediate use only */
- }
- else
- obstack_free (current_input, next); /* people might leave garbage on it. */
- next = NULL;
- return ret;
- }
-
- /*
- * The function push_wrapup () pushes a string on the wrapup stack. When
- * he normal input stack gets empty, the wrapup stack will become the
- * input stack, and push_string () and push_file () will operate on
- * wrapup_stack. Push_wrapup should be done as push_string (), but this
- * will suffice, as long as arguments to m4_m4wrap () are moderate in
- * size.
- */
- void
- push_wrapup (char *s)
- {
- input_block *i = (input_block *) obstack_alloc (&wrapup_stack,
- sizeof (struct input_block));
- i->prev = wsp;
- i->type = INPUT_STRING;
- i->u.u_s.string = obstack_copy0 (&wrapup_stack, s, strlen (s));
- wsp = i;
- }
-
-
-
- /*
- * The function pop_input () pops one level of input sources. If the
- * popped input_block is a file, current_file and current_line are reset
- * to the saved values before the memory for the input_block are
- * released.
- */
- static void
- pop_input (void)
- {
- input_block *tmp = isp->prev;
-
- switch (isp->type)
- {
- case INPUT_STRING:
- case INPUT_MACRO:
- break;
- case INPUT_FILE:
- if (debug_level & DEBUG_TRACE_INPUT)
- debug_message ("input reverted to %s, line %d",
- isp->u.u_f.name, isp->u.u_f.lineno);
-
- fclose (isp->u.u_f.file);
- current_file = isp->u.u_f.name;
- current_line = isp->u.u_f.lineno;
- output_current_line = isp->u.u_f.out_lineno;
- start_of_input_line = isp->u.u_f.advance_line;
- if (tmp != NULL)
- output_current_line = -1;
- break;
- default:
- internal_error ("Input stack botch in pop_input ()");
- break;
- }
- obstack_free (current_input, isp);
- next = NULL; /* might be set in push_string_init () */
-
- isp = tmp;
- }
-
- /*
- * To switch input over to the wrapup stack, main () calls pop_wrapup ().
- * Since wrapup text can install new wrapup text, pop_wrapup () returns
- * FALSE when there is no wrapup text on the stack, and TRUE otherwise.
- */
- boolean
- pop_wrapup (void)
- {
- if (wsp == NULL)
- return FALSE;
-
- current_input = &wrapup_stack;
- isp = wsp;
- wsp = NULL;
-
- return TRUE;
- }
-
- /*
- * When a MACRO token is seen, next_token () uses get_macro_func () to
- * retrieve the value of the function pointer.
- */
- static void
- init_macro_token (token_data *td)
- {
- if (isp->type != INPUT_MACRO)
- internal_error ("Bad call to get_macro_func ()");
-
- TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
- TOKEN_DATA_FUNC (td) = isp->u.u_m.func;
- TOKEN_DATA_FUNC_TRACED (td) = isp->u.u_m.traced;
- }
-
-
- /*
- * Low level input is done a character at a time. The function
- * peek_input () is used to look at the next character in the input
- * stream. At any given time, it reads from the input_block on the top
- * of the current input stack.
- */
- int
- peek_input (void)
- {
- register int ch;
-
- while (1)
- {
- if (isp == NULL)
- return CHAR_EOF;
-
- switch (isp->type)
- {
- case INPUT_STRING:
- ch = isp->u.u_s.string[0];
- if (ch != '\0')
- return ch;
- break;
- case INPUT_FILE:
- ch = getc (isp->u.u_f.file);
- if (ch != EOF)
- {
- ungetc (ch, isp->u.u_f.file);
- return ch;
- }
- break;
- case INPUT_MACRO:
- return CHAR_MACRO;
- default:
- internal_error ("Input stack botch in peek_input ()");
- break;
- }
- /* End of input source --- pop one level. */
- pop_input ();
- }
- }
-
- /*
- * The function next_char () is used to read and advance the input to the
- * next character. It also manages line numbers for error messages, so
- * they do not get wrong, due to lookahead. The token consisting of a
- * newline alone is taken as belonging to the line it ends, and the
- * current line number is not incremented until the next character is
- * read.
- */
- static int
- next_char (void)
- {
- register int ch;
-
- if (start_of_input_line)
- {
- start_of_input_line = FALSE;
- current_line++;
- }
-
- while (1)
- {
- if (isp == NULL)
- return CHAR_EOF;
-
- switch (isp->type)
- {
- case INPUT_STRING:
- ch = *isp->u.u_s.string++;
- if (ch != '\0')
- return ch;
- break;
- case INPUT_FILE:
- ch = getc (isp->u.u_f.file);
- if (ch != EOF)
- {
- if (ch == '\n')
- start_of_input_line = TRUE;
- return ch;
- }
- break;
- case INPUT_MACRO:
- pop_input (); /* INPUT_MACRO input sources has only one token */
- return CHAR_MACRO;
- break;
- default:
- internal_error ("Input stack botch in advance_input ()");
- break;
- }
- /* End of input source --- pop one level. */
- pop_input ();
- }
- }
-
- /*
- * skip_line () simply discards all immediately following characters,
- * upto the first newline. It is only used from m4_dnl ().
- */
- void
- skip_line (void)
- {
- int ch;
-
- while ((ch = next_char ()) != CHAR_EOF && ch != '\n')
- ;
- }
-
-
- /*
- * This function is for matching a string against a prefix of the input
- * stream. If the string matches the input, the input is discarded,
- * otherwise the characters read are pushed back again. The functin is
- * used only when multicharacter quotes or comment delimiters are used.
- */
-
- static int
- match_input (char *s)
- {
- int n; /* number of characters matched */
- int ch; /* input character */
- char *t;
-
- ch = peek_input ();
- if (ch != *s)
- return 0; /* fail */
- (void) next_char ();
-
- if (s[1] == '\0')
- return 1; /* short match */
-
- for (n = 1, t = s++; (ch = peek_input ()) == *s++; n++)
- {
- (void) next_char ();
- if (*s == '\0') /* long match */
- return 1;
- }
-
- /* Failed, push back input. */
- obstack_grow (push_string_init (), t, n);
- push_string_finish ();
- return 0;
- }
-
- /*
- * The macro MATCH() is used to match a string against the input. The
- * first character is handled inline, for speed. Hopefully, this will
- * not hurt efficiency too much when single character quotes and comment
- * delimiters are used.
- */
- #define MATCH(ch, s) \
- ((s)[0] == (ch) \
- && (ch) != '\0' \
- && ((s)[1] == '\0' \
- || (match_input ((s) + 1) ? (ch) = peek_input (), 1 : 0)))
-
-
- /*
- * Inititialise input stacks, and quote/comment characters.
- */
- void
- input_init (void)
- {
- current_file = "NONE";
- current_line = 0;
-
- obstack_init (&token_stack);
- obstack_init (&input_stack);
- obstack_init (&wrapup_stack);
-
- current_input = &input_stack;
-
- obstack_1grow (&token_stack, '\0');
- token_bottom = obstack_finish (&token_stack);
-
- isp = NULL;
- wsp = NULL;
- next = NULL;
-
- start_of_input_line = FALSE;
-
- set_quotes (NULL, NULL);
- set_comment (NULL, NULL);
- }
-
-
- /*
- * Functions for setting quotes and comment delimiters. Used by
- * m4_changecom () and m4_changequote ().
- */
-
- void
- set_quotes (char *lq, char *rq)
- {
- if (lquote != def_lquote)
- xfree (lquote);
- if (rquote != def_rquote)
- xfree (rquote);
-
- lquote = (lq == NULL) ? def_lquote : xstrdup (lq);
- rquote = (rq == NULL) ? def_rquote : xstrdup (rq);
-
- len_lquote = strlen (lquote);
- len_rquote = strlen (rquote);
- }
-
- void
- set_comment (char *bc, char *ec)
- {
- if (bcomm != def_bcomm)
- xfree (bcomm);
- if (ecomm != def_ecomm)
- xfree (ecomm);
-
- bcomm = (bc == NULL) ? def_bcomm : xstrdup (bc);
- ecomm = (ec == NULL) ? def_ecomm : xstrdup (ec);
-
- len_bcomm = strlen (bcomm);
- len_ecomm = strlen (ecomm);
- }
-
-
- /*
- * Parse and return a single token from the input stream. A token can
- * either be TOKEN_EOF, if the input_stack is empty; it can be
- * TOKEN_STRING for a quoted string; TOKEN_WORD for something that is a
- * potential macro name; and TOKEN_SIMPLE for any single character that
- * is not a part of any of the previous types.
- *
- * Next_token () return the token type, and passes back a pointer to the
- * token data through TD. The token text is collected on the obstack
- * token_stack, which never contains more than one token text at a time.
- * The storage pointed to by the fields in TD is therefore subject to
- * change the next time next_token () is called.
- */
-
- token_type
- next_token (token_data *td)
- {
- int ch;
- int quote_level;
- token_type type;
-
- obstack_free (&token_stack, token_bottom);
- obstack_1grow (&token_stack, '\0');
- token_bottom = obstack_finish (&token_stack);
-
- ch = peek_input ();
- if (ch == CHAR_EOF)
- {
- return TOKEN_EOF;
- #ifdef DEBUG_INPUT
- fprintf (stderr, "next_token -> EOF\n");
- #endif
- }
- if (ch == CHAR_MACRO)
- {
- init_macro_token (td);
- (void) next_char ();
- return TOKEN_MACDEF;
- }
-
- (void) next_char ();
- if (MATCH (ch, bcomm))
- {
-
- obstack_grow (&token_stack, bcomm, len_bcomm);
- while ((ch = next_char ()) != CHAR_EOF && !MATCH (ch, ecomm))
- obstack_1grow (&token_stack, ch);
- if (ch != CHAR_EOF)
- obstack_grow (&token_stack, ecomm, len_ecomm);
- type = TOKEN_STRING;
-
- }
- else if (isalpha (ch) || ch == '_')
- {
-
- obstack_1grow (&token_stack, ch);
- while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
- {
- obstack_1grow (&token_stack, ch);
- (void) next_char ();
- }
- type = TOKEN_WORD;
-
- }
- else if (!MATCH (ch, lquote))
- {
-
- type = TOKEN_SIMPLE;
- obstack_1grow (&token_stack, ch);
-
- }
- else
- {
-
- quote_level = 1;
- while (1)
- {
- ch = next_char ();
- if (ch == CHAR_EOF)
- fatal ("EOF in string");
-
- if (MATCH (ch, rquote))
- {
- if (--quote_level == 0)
- break;
- obstack_grow (&token_stack, rquote, len_rquote);
- }
- else if (MATCH (ch, lquote))
- {
- quote_level++;
- obstack_grow (&token_stack, lquote, len_lquote);
- }
- else
- obstack_1grow (&token_stack, ch);
- }
- type = TOKEN_STRING;
- }
-
- obstack_1grow (&token_stack, '\0');
-
- TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
- TOKEN_DATA_TEXT (td) = obstack_finish (&token_stack);
- #ifdef DEBUG_INPUT
- fprintf (stderr, "next_token -> %d (%s)\n", type, TOKEN_DATA_TEXT (td));
- #endif
- return type;
- }
-
-
- #ifdef DEBUG_INPUT
-
- static void
- print_token (char *s, token_type t, token_data *td)
- {
- fprintf (stderr, "%s: ", s);
- switch (t)
- { /* TOKSW */
- case TOKEN_SIMPLE:
- fprintf (stderr, "char:");
- break;
- case TOKEN_WORD:
- fprintf (stderr, "word:");
- break;
- case TOKEN_STRING:
- fprintf (stderr, "string:");
- break;
- case TOKEN_MACDEF:
- fprintf (stderr, "macro: 0x%x\n", TOKEN_DATA_FUNC (td));
- break;
- case TOKEN_EOF:
- fprintf (stderr, "eof\n");
- break;
- }
- fprintf (stderr, "\t\"%s\"\n", TOKEN_DATA_TEXT (td));
- }
-
- static void
- lex_debug (void)
- {
- token_type t;
- token_data td;
-
- while ((t = next_token (&td)) != NULL)
- print_token ("lex", t, &td);
- }
- #endif
-