home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
rtsi.com
/
2014.01.www.rtsi.com.tar
/
www.rtsi.com
/
OS9
/
TOP
/
USR
/
SRC
/
gawk2.0.t.Z
/
gawk2.0.t
/
awk1.c
< prev
next >
Wrap
C/C++ Source or Header
|
1988-12-31
|
20KB
|
881 lines
/*
* awk1 -- Expression tree constructors and main program for gawk.
*
* Copyright (C) 1986 Free Software Foundation Written by Paul Rubin, August
* 1986
*
* $Log: awk1.c,v $
* Revision 1.29 88/12/08 15:57:41 david
* *** empty log message ***
*
* Revision 1.28 88/12/07 20:00:15 david
* changes for incorporating source filename into error messages
*
* Revision 1.27 88/12/01 15:05:26 david
* changes to allow source line number printing in error messages
*
* Revision 1.26 88/11/28 20:12:30 david
* unbuffer stdout if compiled with DEBUG
*
* Revision 1.25 88/11/23 21:39:57 david
* Arnold: set strict if invoked as "awk"
*
* Revision 1.24 88/11/22 13:47:40 david
* Arnold: changes for case-insensitive matching
*
* Revision 1.23 88/11/15 10:18:57 david
* Arnold: cleanup; allow multiple -f options; if invoked as awk disable
* -v option for compatability
*
* Revision 1.22 88/11/14 21:54:27 david
* Arnold: cleanup
*
* Revision 1.21 88/11/03 15:22:22 david
* revised flags
*
* Revision 1.20 88/11/01 11:47:24 david
* mostly cleanup and code movement
*
* Revision 1.19 88/10/19 21:56:00 david
* replace malloc with emalloc
*
* Revision 1.18 88/10/17 20:57:19 david
* Arnold: purge FAST
*
* Revision 1.17 88/10/14 22:11:24 david
* pathc from Hack to get gcc to work
*
* Revision 1.16 88/10/13 21:55:08 david
* purge FAST; clean up error messages
*
* Revision 1.15 88/10/06 21:55:20 david
* be more careful about I/O errors on exit
* Arnold's fixes for command line processing
*
* Revision 1.14 88/10/06 15:52:24 david
* changes from Arnold: use getopt; ifdef -v option; change semantics of = args.
*
* Revision 1.13 88/09/26 10:16:35 david
* cleanup from Arnold
*
* Revision 1.12 88/09/19 20:38:29 david
* added -v option
* set FILENAME to "-" if stdin
*
* Revision 1.11 88/08/09 14:49:23 david
* reorganized handling of command-line arguments and files
*
* Revision 1.10 88/06/13 18:08:25 david
* delete \a and -R options
* separate exit value from flag indicating that exit has been called
* [from Arnold]
*
* Revision 1.9 88/06/05 22:19:41 david
* func_level goes away; param_counter is used to count order of local vars.
*
* Revision 1.8 88/05/31 09:21:56 david
* Arnold's portability changes (vprintf)
* fixed handling of function parameter use inside function
*
* Revision 1.7 88/05/30 09:51:17 david
* exit after yyparse() if any errors were encountered
* mk_re_parse() parses C escapes in regexps.
* do panic() properly with varargs
* clean up and simplify pop_var()
*
* Revision 1.6 88/05/26 22:46:19 david
* minor changes: break out separate case for making regular expressions
* from parser vs. from strings
*
* Revision 1.5 88/05/13 22:02:31 david
* moved BEGIN and END block merging into parse-phase (sorry Arnold)
* if there is only a BEGIN block and nothing else, don't read any files
* cleaned up func_install a bit
*
* Revision 1.4 88/05/04 12:18:28 david
* make_for_loop() now returns a NODE *
* pop_var() now returns the value of the node being popped, to be used
* in func_call() if the variable is an array (call by reference)
*
* Revision 1.3 88/04/15 13:12:19 david
* small fix to arg reading code
*
* Revision 1.2 88/04/14 14:40:25 david
* Arnold's changes to read program from a file
*
* Revision 1.1 88/04/08 15:14:52 david
* Initial revision
* Revision 1.6 88/04/08 14:48:30 david changes from
* Arnold Robbins
*
* Revision 1.5 88/03/28 14:13:33 david *** empty log message ***
*
* Revision 1.4 88/03/23 22:17:33 david mostly delinting -- a couple of bug
* fixes
*
* Revision 1.3 88/03/18 21:00:09 david Baseline -- hoefully all the
* functionality of the new awk added. Just debugging and tuning to do.
*
* Revision 1.2 87/11/19 14:40:17 david added support for user-defined
* functions
*
* Revision 1.1 87/10/27 15:23:26 david Initial revision
*
*/
/*
* GAWK is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY. No author or distributor accepts responsibility to anyone for
* the consequences of using it or for whether it serves any particular
* purpose or works at all, unless he says so in writing. Refer to the GAWK
* General Public License for full details.
*
* Everyone is granted permission to copy, modify and redistribute GAWK, but
* only under the conditions described in the GAWK General Public License. A
* copy of this license is supposed to have been given to you along with GAWK
* so you can know your rights and responsibilities. It should be in a file
* named COPYING. Among other things, the copyright notice and this notice
* must be preserved on all copies.
*
* In other words, go ahead and share GAWK, but don't try to stop anyone else
* from sharing it farther. Help stamp out software hoarding!
*/
#include "awk.h"
NODE *expression_value;
struct hashnode *variables[HASHSIZE];
/*
* Temporary nodes are stored here. ob_dummy is a dummy object used to keep
* the obstack library from free()ing up the entire stack.
*/
struct obstack temp_strings;
char *ob_dummy;
/*
* The parse tree and field nodes are stored here. Parse_end is a dummy item
* used to free up unneeded fields without freeing the program being run
*/
struct obstack other_stack;
struct obstack var_stack;
char *parse_end;
int errcount = 0; /* error counter, used by yyerror() */
int param_counter;
/* The global null string */
NODE *Nnull_string;
/* The special variable that contains the name of the current input file */
extern NODE *FILENAME_node;
extern NODE *ARGC_node;
extern NODE *ARGV_node;
/* The name the program was invoked under, for error messages */
char *myname;
/* A block of AWK code to be run before running the program */
NODE *begin_block = 0;
/* A block of AWK code to be run after the last input file */
NODE *end_block = 0;
FILE *input_file; /* Where to read from */
int exiting = 0; /* Was an "exit" statement executed? */
int exit_val = 0; /* optional exit value */
#ifdef DEBUG
/* non-zero means in debugging is enabled. Probably not very useful */
int debugging = 0;
#endif
int tempsource = 0; /* source is in a temp file */
char **sourcefile = NULL; /* source file name(s) */
int numfiles = -1; /* how many source files */
int ignorecase = 0; /* global flag for ignoring case */
int strict = 0; /* turn off gnu extensions */
main(argc, argv)
int argc;
char **argv;
{
#ifdef DEBUG
/* Print out the parse tree. For debugging */
register int dotree = 0;
extern int yydebug;
#endif
extern char *lexptr;
extern char *lexptr_begin;
extern char *version_string;
extern FILE *nextfile();
extern char *alloca();
FILE *fp, *fopen();
#ifndef OSK
static char template[] = "/tmp/gawk.XXXXX";
#else
static char template[] = "/dd/tmp/gawk.XXXXX";
#endif
char *mktemp ();
int c;
extern int opterr, optind, getopt();
extern char *optarg;
char *cp, *rindex();
/*
* for strict to work, legal options must be first
*/
#define EXTENSIONS 4 /* where to clear */
#ifdef DEBUG
char *awk_opts = "F:f:ivdD";
#else
char *awk_opts = "F:f:iv";
#endif
#ifdef DEBUG
/*malloc_debug(2);*/
#endif
myname = argv[0];
if (argc < 2)
usage();
/* Tell the regex routines how they should work. . . */
(void) re_set_syntax(RE_SYNTAX_AWK);
/* Set up the stack for temporary strings */
obstack_init(&temp_strings);
ob_dummy = obstack_alloc(&temp_strings, 0);
/* Set up the other stack for other things */
obstack_init(&other_stack);
obstack_init(&var_stack);
/* initialize the null string */
Nnull_string = make_string("", 0);
Nnull_string->numbr = 0.0;
Nnull_string->type = Node_val;
Nnull_string->flags = (PERM|STR|NUM);
/* Set up the special variables */
/*
* Note that this must be done BEFORE arg parsing else -F
* breaks horribly
*/
init_vars();
sourcefile = (char **) alloca(argc * sizeof(char *)); /* worst case */
#ifdef STRICT /* strict Unix awk compatibility */
strict = 1;
#else
/* if invoked as 'awk', also behave strictly */
if ((cp = rindex(myname, '/')) != NULL)
cp++;
else
cp = myname;
if (strcmp (cp, "awk") == 0)
strict = 1;
#endif
if (strict)
awk_opts[EXTENSIONS] = '\0';
while ((c = getopt (argc, argv, awk_opts)) != EOF) {
switch (c) {
#ifdef DEBUG
case 'd':
debugging++;
dotree++;
break;
case 'D':
debugging++;
yydebug = 2;
break;
#endif
case 'F':
set_fs(optarg);
break;
case 'f':
/*
* a la MKS awk, allow multiple -f options.
* this makes function libraries real easy.
* most of the magic is in the scanner.
*/
sourcefile[++numfiles] = optarg;
break;
case 'i':
ignorecase = 1;
break;
case 'v':
fprintf(stderr, "%s", version_string);
break;
case '?':
default:
/* getopt will print a message for us */
/* S5R4 awk ignores bad options and keeps going */
break;
}
}
#ifdef DEBUG
setbuf(stdout, (char *) NULL); /* make debugging easier */
#endif
/* No -f option, use next arg */
/* write to temp file and save sourcefile name */
if (numfiles == -1) {
int i;
if (optind > argc - 1) /* no args left */
usage();
numfiles++;
sourcefile[0] = mktemp (template);
i = strlen (argv[optind]);
if ((fp = fopen (sourcefile[0], "w")) == NULL)
fatal("could not save source prog in temp file (%s)",
sys_errlist[errno]);
if (fwrite (argv[optind], 1, i, fp) == 0)
fatal(
"could not write source program to temp file (%s)",
sys_errlist[errno]);
if (argv[optind][i-1] != '\n')
putc ('\n', fp);
(void) fclose (fp);
tempsource++;
optind++;
}
init_args(optind, argc, myname, argv);
/* Read in the program */
lexptr_begin = lexptr;
if (yyparse() || errcount)
exit(1);
/*
* Anything allocated on the other_stack after here will be freed
* when the next input line is read.
*/
parse_end = obstack_alloc(&other_stack, 0);
#ifdef DEBUG
if (dotree)
print_parse_tree(expression_value);
#endif
/* Set up the field variables */
init_fields();
if (begin_block)
(void) interpret(begin_block);
if (!exiting && (expression_value || end_block)) {
if(input_file)
do_file(input_file);
while ((fp = nextfile()) != NULL) {
do_file(fp);
if (exiting)
break;
}
}
if (end_block)
(void) interpret(end_block);
if (flush_io() != 0 && exit_val == 0)
exit_val = 1;
if (close_io() != 0 && exit_val == 0)
exit_val = 1;
exit(exit_val);
}
do_file(fp)
FILE *fp;
{
input_file = fp;
/* This is where it spends all its time. The infamous MAIN LOOP */
if (inrec() == 0) {
do {
/*obstack_free(&temp_strings, ob_dummy);*/
/*obstack_free(&other_stack, parse_end);*/
} while (interpret(expression_value) && inrec() == 0);
}
if (fp != stdin)
(void) fclose(fp);
}
usage()
{
#ifdef STRICT
char *opt1 = "[ -Ffs ] -f progfile [ -- ]";
char *opt2 = "[ -Ffs ] [ -- ] 'program'";
#else
char *opt1 = "[ -v ] [ -Ffs ] -f progfile [ -- ]";
char *opt2 = "[ -v ] [ -Ffs ] [ -- ] 'program'";
#endif
fprintf(stderr, "usage: %s %s file ...\n %s %s file ...\n",
myname, opt1, myname, opt2);
#ifdef OSK
exit(1);
#else
exit(11);
#endif
}
NODE *
node_common(op)
NODETYPE op;
{
register NODE *r;
extern int lineno;
extern int numfiles;
extern int tempsource;
extern char **sourcefile;
extern int curinfile;
r = (NODE *) obstack_alloc(&other_stack, sizeof(NODE));
r->type = op;
r->source_line = lineno;
if (numfiles > 1 && !tempsource)
r->source_file = sourcefile[curinfile];
else
r->source_file = NULL;
return r;
}
/*
* This allocates a node with defined lnode and rnode.
* This should only be used by yyparse+co while reading in the program
*/
NODE *
node(left, op, right)
NODE *left, *right;
NODETYPE op;
{
register NODE *r;
r = node_common(op);
r->lnode = left;
r->rnode = right;
return r;
}
/*
* This allocates a node with defined subnode and proc
* Otherwise like node()
*/
NODE *
snode(subn, op, procp)
NODETYPE op;
NODE *(*procp) ();
NODE *subn;
{
register NODE *r;
r = node_common(op);
r->subnode = subn;
r->proc = procp;
return r;
}
/*
* This allocates a Node_line_range node with defined condpair and
* zeroes the trigger word to avoid the temptation of assuming that calling
* 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'.
*/
/* Otherwise like node() */
NODE *
mkrangenode(cpair)
NODE *cpair;
{
register NODE *r;
r = (NODE *) obstack_alloc(&other_stack, sizeof(NODE));
r->type = Node_line_range;
r->condpair = cpair;
r->triggered = 0;
return r;
}
struct re_pattern_buffer *
mk_re_parse(s)
char *s;
{
register char *src, *dest;
int c;
for (dest = src = s; *src != '\0'; src++) {
if (*src == '\\') {
c = *++src;
switch (c) {
case 'b':
*dest++ = '\b';
break;
case 'f':
*dest++ = '\f';
break;
case 'n':
*dest++ = '\n';
break;
case 'r':
*dest++ = '\r';
break;
case 't':
*dest++ = '\t';
break;
case 'v':
*dest++ = '\v';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
{
register int i = c - '0';
register int count = 0;
while (++count < 3) {
if ((c = *++src) >= '0' && c <= '7') {
i *= 8;
i += c - '0';
} else
break;
}
*dest++ = i;
}
break;
default:
*dest++ = '\\';
*dest++ = c;
break;
}
} else if (*src == '/')
break;
else
*dest++ = *src;
}
return make_regexp(tmp_string(s, dest-s));
}
/* Generate compiled regular expressions */
struct re_pattern_buffer *
make_regexp(s)
NODE *s;
{
struct re_pattern_buffer *rp;
char *err;
emalloc(rp, struct re_pattern_buffer *, sizeof(*rp), "make_regexp");
bzero((char *) rp, sizeof(*rp));
emalloc(rp->buffer, char *, 8, "make_regexp");
/*
* This could be obstack allocated, except the regex routines
* try to realloc() it, which fails.
* Note that this means it may never be freed. Someone fix, please?
*/
rp->allocated = 8;
emalloc(rp->fastmap, char *, 256, "make_regexp");
if ((err = re_compile_pattern(s->stptr, s->stlen, rp)) != NULL)
fatal("%s: /%s/", err, s->stptr);
return rp;
}
/* Build a for loop */
NODE *
make_for_loop(init, cond, incr)
NODE *init, *cond, *incr;
{
register FOR_LOOP_HEADER *r;
NODE *n;
r = (FOR_LOOP_HEADER *) obstack_alloc(&other_stack, sizeof(FOR_LOOP_HEADER));
n = (NODE *) obstack_alloc(&other_stack, sizeof(NODE));
r->init = init;
r->cond = cond;
r->incr = incr;
n->type = Node_illegal;
n->sub.nodep.r.hd = r;
return n;
}
/* Name points to a variable name. Make sure its in the symbol table */
NODE *
variable(name)
char *name;
{
register NODE *r;
NODE *lookup(), *install(), *make_name();
if ((r = lookup(variables, name)) == NULL)
r = install(variables, name,
node(Nnull_string, Node_var, (NODE *) NULL));
return r;
}
/* Create a special variable */
NODE *
spc_var(name, value)
char *name;
NODE *value;
{
register NODE *r;
NODE *lookup(), *install();
if ((r = lookup(variables, name)) == NULL)
r = install(variables, name, node(value, Node_var, (NODE *) NULL));
return r;
}
/*
* Install a name in the hash table specified, even if it is already there.
* Name stops with first non alphanumeric. Caller must check against
* redefinition if that is desired.
*/
NODE *
install(table, name, value)
HASHNODE **table;
char *name;
NODE *value;
{
register HASHNODE *hp;
register int i, len, bucket;
register char *p;
len = 0;
p = name;
while (is_identchar(*p))
p++;
len = p - name;
i = sizeof(HASHNODE) + len + 1;
hp = (HASHNODE *) obstack_alloc(&var_stack, i);
bucket = hashf(name, len, HASHSIZE);
hp->next = table[bucket];
table[bucket] = hp;
hp->length = len;
hp->value = value;
hp->name = ((char *) hp) + sizeof(HASHNODE);
hp->length = len;
bcopy(name, hp->name, len);
hp->name[len] = '\0';
hp->value->varname = hp->name;
return hp->value;
}
/*
* find the most recent hash node for name name (ending with first
* non-identifier char) installed by install
*/
NODE *
lookup(table, name)
HASHNODE **table;
char *name;
{
register char *bp;
register HASHNODE *bucket;
register int len;
for (bp = name; is_identchar(*bp); bp++)
;
len = bp - name;
bucket = table[hashf(name, len, HASHSIZE)];
while (bucket) {
if (bucket->length == len && strncmp(bucket->name, name, len) == 0)
return bucket->value;
bucket = bucket->next;
}
return NULL;
}
#define HASHSTEP(old, c) ((old << 1) + c)
#define MAKE_POS(v) (v & ~0x80000000) /* make number positive */
/*
* return hash function on name.
*/
int
hashf(name, len, hashsize)
register char *name;
register int len;
int hashsize;
{
register int r = 0;
while (len--)
r = HASHSTEP(r, *name++);
r = MAKE_POS(r) % hashsize;
return r;
}
/*
* Add new to the rightmost branch of LIST. This uses n^2 time, but doesn't
* get used enough to make optimizing worth it. . .
*/
/* You don't believe me? Profile it yourself! */
NODE *
append_right(list, new)
NODE *list, *new;
{
register NODE *oldlist;
oldlist = list;
while (list->rnode != NULL)
list = list->rnode;
list->rnode = new;
return oldlist;
}
/*
* check if name is already installed; if so, it had better have Null value,
* in which case def is added as the value. Otherwise, install name with def
* as value.
*/
func_install(params, def)
NODE *params;
NODE *def;
{
NODE *r;
NODE *lookup();
pop_params(params);
r = lookup(variables, params->param);
if (r != NULL) {
fatal("function name `%s' previously defined", params->param);
} else
(void) install(variables, params->param,
node(params, Node_func, def));
}
NODE *
pop_var(name)
char *name;
{
register char *bp;
register HASHNODE *bucket, **save;
register int len;
for (bp = name; is_identchar(*bp); bp++)
;
len = bp - name;
save = &(variables[hashf(name, len, HASHSIZE)]);
bucket = *save;
while (bucket) {
if (strncmp(bucket->name, name, len) == 0) {
*save = bucket->next;
return bucket->value;
}
save = &(bucket->next);
bucket = bucket->next;
}
return NULL;
}
pop_params(params)
NODE *params;
{
register NODE *np;
for (np = params; np != NULL; np = np->rnode)
pop_var(np->param);
}
NODE *
make_name(name, type)
char *name;
NODETYPE type;
{
register char *p;
register NODE *r;
register int len;
p = name;
while (is_identchar(*p))
p++;
len = p - name;
r = (NODE *) obstack_alloc(&other_stack, sizeof(NODE));
r->param = (char *) obstack_alloc(&other_stack, len + 1);
if (r->param == NULL)
abort();
bcopy(name, r->param, len);
r->param[len] = '\0';
r->rnode = NULL;
r->type = type;
return (install(variables, name, r));
}
NODE *make_param(name)
char *name;
{
NODE *r;
r = make_name(name, Node_param_list);
r->param_cnt = param_counter++;
return r;
}
FILE *
nextfile()
{
static int i = 1;
static int files = 0;
char *arg;
char *cp;
FILE *fp;
extern NODE **assoc_lookup();
for (; i < (int) (ARGC_node->lnode->numbr); i++) {
arg = (*assoc_lookup(ARGV_node, tmp_number((AWKNUM) i)))->stptr;
if (*arg == '\0')
continue;
cp = index(arg, '=');
if (cp != NULL) {
*cp++ = '\0';
variable(arg)->var_value = make_string(cp, strlen(cp));
} else {
extern NODE *deref;
files++;
if (strcmp(arg, "-") == 0)
fp = stdin;
else
fp = fopen(arg, "r");
if (fp == NULL)
fatal("cannot open file `%s' for reading (%s)",
arg, sys_errlist[errno]);
/* NOTREACHED */
/* This is a kludge. */
deref = FILENAME_node->var_value;
do_deref();
FILENAME_node->var_value =
make_string(arg, strlen(arg));
FNR_node->var_value->numbr = 0.0;
i++;
return fp;
}
}
if (files == 0) {
files++;
/* no args. -- use stdin */
/* FILENAME is init'ed to "-" */
/* FNR is init'ed to 0 */
return stdin;
}
return NULL;
}