home *** CD-ROM | disk | FTP | other *** search
- /*
- * Bawk main program
- */
- #define MAIN 1
- #include <stdio.h>
- #include "bawk.h"
-
- static char *pattern_arg = NULL; /* Command line bawk program pattern */
- static int ungetc_arg = 0;
- static char eof_seen = 0;
- static int max_field_count = 0;
-
- /*
- * Main program
- */
- main( argc, argv )
- register int argc;
- register char **argv;
- {
- register char gotrules = 0, didfile = 0, getstdin = 0;
- register char rule_file_flag = 0;
-
- DBUG_ENTER("main");
- /*
- * Initialize global variables:
- */
- Beginact = 0;
- Endact = 0;
- Rules = 0;
- Rulep = 0;
- Filename = 0;
- Linecount = 0;
- Saw_break = 0;
- Stackptr = Stackbtm - 1;
- Stacktop = Stackbtm + MAXSTACKSZ;
- Nextvar = Vartab;
- init_pop_array();
-
- strcpy( Fieldsep, " \t" );
- strcpy( Recordsep, "\n" );
-
- /*
- * Parse command line
- */
- while ( --argc )
- {
- if ( **(++argv) == '-' )
- {
- /*
- * Process dash options.
- */
- switch ( tolower( argv[0][1] ) )
- {
- case '#':
- DBUG_PUSH(&argv[0][2]);
- continue;
- case 'f':
- if(!gotrules) {
- rule_file_flag++;
- argv++;
- argc--;
- } else
- usage();
- break;
- case 0:
- if(!gotrules)
- rule_file_flag++;
- getstdin++;
- break;
- default: usage();
- }
- }
- if ( gotrules )
- {
- /*
- * Already read rules file - assume this is
- * is a text file for processing.
- */
- if ( ++didfile == 1 && Beginact )
- doaction( Beginact );
- if ( getstdin )
- {
- getstdin--;
- newfile( 0 );
- }
- else
- newfile( *argv );
- process();
- }
- else
- {
- if(rule_file_flag) {
- if ( getstdin )
- {
- getstdin--;
- newfile( 0 );
- }
- else
- newfile( *argv );
- } else
- pattern_arg = *argv;
- compile();
- pattern_arg = NULL;
- gotrules = 1;
- }
- }
- if ( !gotrules )
- usage();
-
- if ( ! didfile )
- {
- /*
- * Didn't process any files yet - process stdin.
- */
- newfile( 0 );
- if ( Beginact )
- doaction( Beginact );
- process();
- }
- if ( Endact )
- doaction( Endact );
- DBUG_RETURN(0);
- }
-
- /*
- * Regular expression/action file compilation routines.
- */
- void compile()
- {
- /*
- * Compile regular expressions and C actions into Rules struct,
- * reading from current input file "Fileptr".
- */
- register int c;
- register EXPR_NODE *root;
-
- DBUG_ENTER("compile");
-
- while ( (c = getcharacter()) != -1 )
- {
- if ( c==' ' || c=='\t' || c=='\n' )
- /* swallow whitespace */
- ;
- else if ( c=='#' )
- {
- /*
- * Swallow comments
- */
- while ( (c=getcharacter()) != -1 && c!='\n' )
- ;
- }
- else if ( c=='{' )
- {
- DBUG_PRINT("compile",("action"));
- /*
- * Compile the action string into a parse tree
- */
- ungetcharacter( (char) '{' );
-
- if ( Rulep && Rulep->action )
- {
- Rulep->nextrule = (RULE *)
- get_clear_memory( sizeof( *Rulep ) );
- Rulep = Rulep->nextrule;
- }
- if ( !Rulep )
- {
- /*
- * This is the first action encountered.
- * Allocate the first Rules structure and
- * initialize it
- */
- Rules = Rulep = (RULE *)
- get_clear_memory( sizeof( *Rulep ) );
- }
- Rulep->action = act_compile( Workbuf );
- }
- else if ( c==',' )
- {
- DBUG_PRINT("compile",("stop pattern"));
- /*
- * It's (hopefully) the second part of a two-part
- * pattern string. Swallow the comma and start
- * compiling an action string.
- */
- if ( !Rulep || !Rulep->pattern.start )
- error( "stop pattern without a start",
- RE_ERROR );
- if ( Rulep->pattern.stop )
- error( "already have a stop pattern",
- RE_ERROR );
- Rulep->pattern.stop = pat_compile( Workbuf );
- }
- else
- {
- /*
- * Assume it's a regular expression pattern
- */
- DBUG_PRINT("compile",("start pattern"));
-
- ungetcharacter( (char) c );
- root = pat_compile( Workbuf );
-
- if ( *Workbuf == T_BEGIN )
- {
- /*
- * Saw a "BEGIN" keyword - compile following
- * action into special "Beginact" parse tree.
- */
- Beginact = act_compile( Workbuf );
- continue;
- }
- if ( *Workbuf == T_END )
- {
- /*
- * Saw an "END" keyword - compile following
- * action into special "Endact" parse tree.
- */
- Endact = act_compile( Workbuf );
- continue;
- }
- if ( Rulep )
- {
- /*
- * Already saw a pattern/action - link in
- * another Rules structure.
- */
- Rulep->nextrule = (RULE *)
- get_clear_memory( sizeof( *Rulep ) );
- Rulep = Rulep->nextrule;
- }
- if ( !Rulep )
- {
- /*
- * This is the first pattern encountered.
- * Allocate the first Rules structure and
- * initialize it
- */
- Rules = Rulep = (RULE *)
- get_clear_memory( sizeof( *Rulep ) );
- }
- if ( Rulep->pattern.start )
- error( "already have a start pattern",
- RE_ERROR );
-
- Rulep->pattern.start = root;
- }
- }
- for(Rulep = Rules; Rulep; Rulep = Rulep->nextrule)
- {
- if(!Rulep->action) {
- pattern_arg = "{printf \"%s\n\", $0}";
- Rulep->action = act_compile( Workbuf );
- pattern_arg = NULL;
- }
- }
- endfile();
- DBUG_VOID_RETURN;
- }
-
- /*
- * Text file main processing loop.
- */
- void process()
- {
- /*
- * Read a line at a time from current input file at "Fileptr",
- * then apply each rule in the Rules chain to the input line.
- */
- register int i;
-
- DBUG_ENTER("process");
-
- Recordcount = 0;
-
- while ( getline() )
- {
- /*
- * Parse the input line.
- */
- if(! *Recordsep )
- strcpy(Fieldsep," \t\n");
- Fieldcount = parse( Linebuf, Fields, Fieldsep );
- DBUG_PRINT("process",( "parsed %d words:", Fieldcount ));
- DBUG_EXECUTE("process",for(i=0; i<Fieldcount; ++i )DBUG_PRINT("process",("<%s>",Fields[i])););
-
- Rulep = Rules;
- while(Rulep)
- {
- if ( ! Rulep->pattern.start )
- {
- /*
- * No pattern given - perform action on
- * every input line.
- */
- doaction( Rulep->action );
- }
- else if ( Rulep->pattern.startseen )
- {
- /*
- * Start pattern already found - perform
- * action then check if line matches
- * stop pattern.
- */
- doaction( Rulep->action );
- if ( dopattern( Rulep->pattern.stop ) )
- Rulep->pattern.startseen = 0;
- }
- else if ( dopattern( Rulep->pattern.start ) )
- {
- /*
- * Matched start pattern - perform action.
- * If a stop pattern was given, set "start
- * pattern seen" flag and process every input
- * line until stop pattern found.
- */
- doaction( Rulep->action );
- if ( Rulep->pattern.stop )
- Rulep->pattern.startseen = 1;
- }
- Rulep = Rulep->nextrule;
- }
- }
- DBUG_VOID_RETURN;
- }
-
- /*
- * Miscellaneous functions
- */
- parse( str, wrdlst, delim )
- register char *str;
- char *wrdlst[];
- char *delim;
- {
- /*
- * Parse the string of words in "str" into the word list at "wrdlst".
- * A "word" is a sequence of characters delimited by one or more
- * of the characters found in the string "delim".
- * Returns the number of words parsed.
- */
- register int wrdcnt;
- register char *cp, *wrdcp, c;
- char wrdbuf[ MAXLINELEN+1 ];
-
- DBUG_ENTER("parse");
- wrdcnt = 0;
- while ( *str )
- {
- while(c = *str++)
- {
- cp = delim;
- while(*cp && c != *cp)
- cp++;
- if(! *cp)
- break;
- }
- str--;
- if ( !*str )
- break;
- wrdcp = wrdbuf;
- while(c = *str++)
- {
- cp = delim;
- while(*cp && c != *cp)
- cp++;
- if(*cp)
- break;
- *wrdcp++ = c;
- }
- str--;
- *wrdcp = 0;
- /*
- * NOTE: allocate a MAXLINELEN sized buffer for every
- * word, just in case user wants to copy a larger string
- * into a field.
- */
- if(wrdcnt == max_field_count)
- {
- wrdlst[ wrdcnt ] = getmemory( MAXLINELEN+1 );
- max_field_count++;
- }
- strcpy( wrdlst[ wrdcnt++ ], wrdbuf );
- }
- DBUG_RETURN(wrdcnt);
- }
-
- void unparse( wrdlst, wrdcnt, str, delim )
- char *wrdlst[];
- register int wrdcnt;
- register char *str;
- char *delim;
- {
- /*
- * Replace all the words in "str" with the words in "wrdlst",
- * maintaining the same word seperation distance as found in
- * the string.
- * A "word" is a sequence of characters delimited by one or more
- * of the characters found in the string "delim".
- */
- register int wc;
- register char *sp, *cp, c;
- char strbuf[ MAXLINELEN+1 ], *start;
-
- DBUG_ENTER("unparse");
- wc = 0; /* next word in "wrdlst" */
- sp = strbuf; /* points to our local string */
- start = str; /* save start address of "str" for later... */
- while ( *str )
- {
- /*
- * Copy the field delimiters from the original string to
- * our local version.
- */
- while(c = *str++)
- {
- cp = delim;
- while(*cp && c != *cp)
- cp++;
- if(!*cp)
- break;
- *sp++ = c;
- }
- str--;
- if ( !*str )
- break;
- /*
- * Skip over the field in the original string and...
- */
- while(c = *str++)
- {
- cp = delim;
- while(*cp && c != *cp)
- cp++;
- if(*cp)
- break;
- }
- str--;
- if ( wc < wrdcnt )
- {
- /*
- * ...copy in the field in the wordlist instead.
- */
- cp = wrdlst[ wc++ ];
- while(*sp++ = *cp++);
- sp--;
- }
- }
- /*
- * Tie off the local string, then copy it back to caller's string.
- */
- *sp = 0;
- strcpy( start, strbuf );
- DBUG_VOID_RETURN;
- }
-
- char *
- getmemory( len )
- register unsigned len;
- {
- register char *cp;
-
- DBUG_ENTER("getmemory");
- if ( cp=malloc( len ) )
- DBUG_RETURN(cp);
- error( "out of memory", MEM_ERROR );
- DBUG_RETURN(NULL);
- }
-
- char *
- get_clear_memory( len )
- register unsigned len;
- {
- register char *cp;
-
- DBUG_ENTER("getmemory");
- if ( cp=calloc( 1, len ) )
- DBUG_RETURN(cp);
- error( "out of memory", MEM_ERROR );
- DBUG_RETURN(NULL);
- }
-
- EXPR_NODE *get_expr_node(operator)
- char operator;
- {
- register EXPR_NODE *node;
-
- DBUG_ENTER("get_expr_node");
- node = (EXPR_NODE *) getmemory(sizeof(EXPR_NODE));
- node->left = node->right = NULL;
- node->operator = operator;
- DBUG_PRINT("get_expr_node",("operator = '%s'",token_name[operator]));
- DBUG_RETURN(node);
- }
-
- void newfile( s )
- register char *s;
- {
- DBUG_ENTER("newfile");
- Linecount = 0;
- if ( Filename = s )
- {
- #ifdef BDS_C
- if ( fopen( s, Fileptr = Curfbuf ) == -1 )
- #else
- if ( !(Fileptr = fopen( s, "r" )) )
- #endif
- error( "file not found", FILE_ERROR );
- }
- else
- {
- /*
- * No file name given - process standard input.
- */
- Fileptr = stdin;
- Filename = "standard input";
- }
- DBUG_VOID_RETURN;
- }
-
- getline()
- {
- /*
- * Read a record from current input file.
- */
- register int rtn, len = 0;
- register char *cp = Linebuf, *last_nl, *sep = Recordsep;
-
- DBUG_ENTER("getline");
- if(eof_seen)
- {
- endfile();
- DBUG_RETURN(0);
- }
- if(*sep)
- {
- while((*cp++ = rtn = getcharacter()) != *sep++ && rtn != -1)
- {
- while(*sep)
- {
- if(rtn == *sep++)
- break;
- }
- if( ++len == MAXLINELEN )
- error("Input record too long", RECORD_ERROR);
- sep = Recordsep;
- }
- } else /* Treat an empty line as record separator. */
- {
- while(1)
- {
- last_nl = cp;
- while((*cp++ = rtn = getcharacter()) != '\n' &&
- rtn != -1)
- {
- if( ++len == MAXLINELEN )
- error("Input record too long",
- RECORD_ERROR);
- }
- if(((cp - last_nl) == 1) || (rtn == -1))
- break;
- }
- }
- *(--cp) = 0;
- if ( rtn == -1 )
- {
- if(len)
- eof_seen = 1;
- else
- {
- endfile();
- DBUG_RETURN(0);
- }
- }
- ++Recordcount;
- DBUG_RETURN(1);
- }
-
- int getcharacter()
- {
- /*
- * Read a character from curren input file.
- * WARNING: your getc() must convert lines that end with CR+LF
- * to LF and CP/M's EOF character (^Z) to a -1.
- * Also, getc() must return a -1 when attempting to read from
- * an unopened file.
- */
- register int c;
-
- DBUG_ENTER("getcharacter");
- if(pattern_arg) {
- if(ungetc_arg) {
- c = ungetc_arg;
- ungetc_arg = 0;
- } else if(*pattern_arg)
- c = *pattern_arg++;
- else
- c = EOF;
- } else {
- #ifdef BDS_C
- /*
- * BDS C doesn't do CR+LF to LF and ^Z to -1 conversions
- * <gag>
- */
- if ( (c = getc( Fileptr )) == '\r' )
- {
- if ( (c = getc( Fileptr )) != '\n' )
- {
- ungetc( c );
- c = '\r';
- }
- }
- else if ( c == 26 ) /* ^Z */
- c = -1;
- #else
- c = getc( Fileptr );
- #endif
-
- if ( c=='\n' )
- ++Linecount;
- }
- DBUG_PRINT("getcharacter",("'%c'", c));
- DBUG_RETURN(c);
- }
-
- ungetcharacter( c )
- register char c;
- {
- /*
- * Push a character back into the input stream.
- * If the character is a record seperator, or a newline character,
- * the record and line counters are adjusted appropriately.
- */
- DBUG_ENTER("ungetcharacter");
- if ( c == *Recordsep )
- --Recordcount;
- if ( c=='\n' )
- --Linecount;
- DBUG_PRINT("ungetcharacter",("'%c'", c));
- if(pattern_arg)
- DBUG_RETURN(ungetc_arg = c);
- DBUG_RETURN(ungetc( c, Fileptr ));
- }
-
- void endfile()
- {
- DBUG_ENTER("endfile");
- fclose( Fileptr );
- eof_seen = 0;
- Filename = NULL;
- Linecount = 0;
- DBUG_VOID_RETURN;
- }
-
- void error( s, severe )
- register char *s;
- register int severe;
- {
- DBUG_ENTER("error");
- if ( Filename )
- fprintf( stderr, "%s:", Filename );
-
- if ( Linecount )
- fprintf( stderr, " line %d:", Linecount );
-
- fprintf( stderr, " %s\n", s );
- if ( severe )
- exit( severe );
- DBUG_VOID_RETURN;
- }
-
- void usage()
- {
- DBUG_ENTER("usage");
- error( "Usage: bawk { action | - | -f <actfile> } <file> ...",
- USAGE_ERROR );
- DBUG_VOID_RETURN;
- }
-