home *** CD-ROM | disk | FTP | other *** search
- /*
- * Bawk C actions compiler
- */
- #include <stdio.h>
- #include "bawk.h"
-
- EXPR_NODE *act_compile( actbuf )
- register char *actbuf;/* where tokenized actions are compiled into */
- {
- DBUG_ENTER("act_compile");
- Where = ACTION;
- stmt_lex( actbuf );
- Actptr = actbuf;
- getoken();
- DBUG_RETURN(stmt_parse());
- }
-
- EXPR_NODE *pat_compile( actbuf )
- register char *actbuf;/* where tokenized actions are compiled into */
- {
- DBUG_ENTER("pat_compile");
- Where = PATTERN;
- stmt_lex( actbuf );
- Actptr = actbuf;
- getoken();
- DBUG_RETURN(stmt_parse());
- }
-
- void stmt_lex( actbuf )
- register char *actbuf;/* where tokenized actions are compiled into */
- {
- /*
- * Read and tokenize C actions from current input file into the
- * action buffer. Strip out comments and whitespace in the
- * process.
- */
- register char *actptr, /* actbuf pointer */
- *cp; /* work pointer */
- char buf[MAXLINELEN+1];/* string buffer */
- register int braces = 0,/* counts '{}' pairs - return when 0 */
- parens = 0, /* counts '()' pairs */
- i, /* temp */
- c, /* current input character */
- finished = 0;
-
- DBUG_ENTER("stmt_lex");
- actptr = actbuf;
- while ( !finished && ((c = getcharacter()) != -1) )
- {
- switch(c) {
- case ' ':
- case '\t':
- case '\n':
- /*
- * Skip over spaces, tabs and newlines
- */
- break;
- case '#':
- /*
- * Skip comments. Comments start with a '#' and
- * end at the next newline.
- */
- while ( (c = getcharacter()) != -1 && c!='\n' )
- ;
- break;
- case '{':
- if ( Where==PATTERN )
- {
- /*
- * We're compiling a pattern. The '{' marks
- * the beginning of an action statement.
- * Push the character back and return.
- */
- ungetcharacter( (char) '{' );
- finished = 1;
- }
- else
- {
- /*
- * We must be compiling an action statement.
- * '{'s mark beginning of action or compound
- * statements.
- */
- ++braces;
- *actptr++ = T_LBRACE;
- }
- break;
- case '}':
- *actptr++ = T_RBRACE;
- finished = (! --braces );
- break;
- case '(':
- ++parens;
- *actptr++ = T_LPAREN;
- break;
- case ')':
- if ( --parens < 0 )
- error( "mismatched '()'", ACT_ERROR );
- *actptr++ = T_RPAREN;
- break;
- case ',':
- if ( !braces && !parens )
- {
- /*
- * found a comma outside of any braces or
- * parens - this must be a regular
- * expression seperator.
- */
- ungetcharacter( (char) ',' );
- finished = 1;
- } else
- *actptr++ = T_COMMA;
- break;
- case '/':
- *actptr++ = T_DIV;
- break;
- case '@':
- *actptr++ = T_REGEXP;
- ungetcharacter( (char) c );
- actptr += re_compile( actptr );
- break;
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case '_':
- /*
- * It's a symbol reference. Copy the symbol into
- * string buffer.
- */
- cp = buf;
- do
- *cp++ = c;
- while ( (c=getcharacter()) != -1 &&
- (isalnum( c ) || (c == '_')));
- ungetcharacter( (char) c );
- *cp = 0;
- /*
- * Check if a keyword, builtin function or variable.
- */
- if ( c = iskeyword( buf ) )
- *actptr++ = c;
- else if ( i = isfunction( buf ) )
- {
- *actptr++ = T_FUNCTION;
- storeint( actptr, i );
- actptr += sizeof( i );
- }
- else
- {
- /*
- * It's a symbol name.
- */
- *actptr++ = T_VARIABLE;
- if ( !(cp = (char *) findvar( buf )) )
- cp = (char *) addvar( buf );
- storeptr( actptr, cp );
- actptr += sizeof( cp );
- }
- break;
- #ifdef QUOTE_STRING_HACK
- case '`':
- #endif
- case '"':
- /*
- * It's a string constant
- */
- *actptr++ = T_STRING;
- actptr = str_compile( actptr, c );
- break;
- case '\'':
- /*
- * It's a character constant
- */
- *actptr++ = T_CONSTANT;
- str_compile( buf, (char) '\'' );
- storeint( actptr, *buf );
- actptr += sizeof( i );
- break;
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
- /*
- * It's a numeric constant
- */
- *actptr++ = T_CONSTANT;
- cp = buf;
- do
- *cp++ = c;
- while ( (c=getcharacter()) != -1 && isdigit(c) );
- ungetcharacter( (char) c );
- *cp = 0;
- storeint( actptr, atoi( buf ) );
- actptr += sizeof( i );
- break;
- case '$':
- *actptr++ = T_DOLLAR;
- break;
- case '=':
- if ( (c=getcharacter()) == '=' )
- *actptr++ = T_EQ;
- else
- {
- ungetcharacter( (char) c );
- *actptr++ = T_ASSIGN;
- }
- break;
- case '!':
- if ( (c=getcharacter()) == '=' )
- *actptr++ = T_NE;
- else
- {
- ungetcharacter( (char) c );
- *actptr++ = T_LNOT;
- }
- break;
- case '<':
- if ( (c=getcharacter()) == '<' )
- *actptr++ = T_SHL;
- else if ( c == '=' )
- *actptr++ = T_LE;
- else
- {
- ungetcharacter( (char) c );
- *actptr++ = T_LT;
- }
- break;
- case '>':
- if ( (c=getcharacter()) == '>' )
- *actptr++ = T_SHR;
- else if ( c == '=' )
- *actptr++ = T_GE;
- else
- {
- ungetcharacter( (char) c );
- *actptr++ = T_GT;
- }
- break;
- case '&':
- if ( (c=getcharacter()) == '&' )
- *actptr++ = T_LAND;
- else
- {
- ungetcharacter( (char) c );
- *actptr++ = T_AND;
- }
- break;
- case '|':
- if ( (c=getcharacter()) == '|' )
- *actptr++ = T_LOR;
- else
- {
- ungetcharacter( (char) c );
- *actptr++ = T_OR;
- }
- break;
- case '+':
- if ( (c=getcharacter()) == '+' )
- *actptr++ = T_INCR;
- else
- {
- ungetcharacter( (char) c );
- *actptr++ = T_ADD;
- }
- break;
- case '-':
- if ( (c=getcharacter()) == '-' )
- *actptr++ = T_DECR;
- else
- {
- ungetcharacter( (char) c );
- *actptr++ = T_SUB;
- }
- break;
- case '[':
- *actptr++ = T_LBRACKET;
- break;
- case ']':
- *actptr++ = T_RBRACKET;
- break;
- case ';':
- *actptr++ = T_SEMICOLON;
- break;
- case '*':
- *actptr++ = T_MUL;
- break;
- case '%':
- *actptr++ = T_MOD;
- break;
- case '^':
- *actptr++ = T_XOR;
- break;
- case '~':
- *actptr++ = T_NOT;
- break;
- default:
- /*
- * Bad character in input line
- */
- error( "lexical error", ACT_ERROR );
- }
- if ( actptr >= Workbuf + MAXWORKBUFLEN )
- error( "action too long", MEM_ERROR );
- }
- if ( braces || parens )
- error( "mismatched '{}' or '()'", ACT_ERROR );
-
- *actptr++ = T_EOF;
-
- DBUG_VOID_RETURN;
- }
-
- char *
- str_compile( str, delim )
- register char *str, delim;
- {
- /*
- * Compile a string from current input file into the given string
- * buffer. Stop when input character is the delimiter in "delim".
- * Returns a pointer to the first character after the string.
- */
- int tmpc; /* can not be a register variable */
- register int c;
- register char buf[4];
-
- DBUG_ENTER("str_compile");
- while ( (c = getcharacter()) != -1 && c != delim)
- {
- if ( c == '\\' )
- {
- switch ( c = getcharacter() )
- {
- case -1: goto err;
- case 'b': c = '\b'; break;
- case 'n': c = '\n'; break;
- case 't': c = '\t'; break;
- case 'f': c = '\f'; break;
- case 'r': c = '\r'; break;
- case '0':
- case '1':
- case '2':
- case '3':
- *buf = c;
- for ( c=1; c<3; c++ )
- {
- if ( (buf[c]=getcharacter()) == -1 )
- goto err;
- }
- buf[c] = 0;
- sscanf( buf, "%o", &tmpc );
- c = tmpc;
- break;
- case '\n':
- if ( getcharacter() == -1 )
- goto err;
- default:
- if ( (c = getcharacter()) == -1 )
- goto err;
- }
- }
- *str++ = c;
- }
- *str++ = 0;
-
- DBUG_RETURN(str);
- err:
- sprintf( buf, "missing %c delimiter", delim );
- error( buf, 4 );
- DBUG_RETURN(NULL);
- }
-
- void storeint( ip, i )
- char *ip;
- int i;
- {
- DBUG_ENTER("storeint");
- movmem((char *) &i, ip, sizeof(i));
- DBUG_VOID_RETURN;
- }
-
- void storeptr( pp, p )
- char *pp, *p;
- {
- DBUG_ENTER("storeptr");
- movmem((char *) &p, pp, sizeof(p));
- DBUG_VOID_RETURN;
- }
-
- int fetchint( ip )
- register char *ip;
- {
- int i;
-
- DBUG_ENTER("fetchint");
- movmem(ip, (char *) &i, sizeof(i));
- DBUG_RETURN(i);
- }
-
- char *
- fetchptr( pp )
- register char *pp;
- {
- char *p;
-
- DBUG_ENTER("fetchptr");
- movmem(pp, (char *) &p, sizeof(p));
- DBUG_RETURN(p);
- }
-
- #ifndef DBUG_OFF
- char *token_name[] = {
- 0,
- "CHAR",
- "BOL",
- "EOL",
- "ANY",
- "CLASS",
- "NCLASS",
- "STAR",
- "PLUS",
- "MINUS",
- "ALPHA",
- "DIGIT",
- "NALPHA",
- "PUNCT",
- "RANGE",
- "ENDPAT",
- "T_STRING",
- "T_DOLLAR",
- "T_REGEXP",
- "T_REGEXP_ARG",
- "T_CONSTANT",
- "T_VARIABLE",
- "T_FUNCTION",
- "T_SEMICOLON",
- "T_EOF",
- "T_LBRACE",
- "T_RBRACE",
- "T_LPAREN",
- "T_RPAREN",
- "T_LBRACKET",
- "T_RBRACKET",
- "T_COMMA",
- "T_ASSIGN",
- "T_STAR",
- "T_MUL",
- "T_DIV",
- "T_MOD",
- "T_ADD",
- "T_UMINUS",
- "T_SUB",
- "T_SHL",
- "T_SHR",
- "T_LT",
- "T_LE",
- "T_GT",
- "T_GE",
- "T_EQ",
- "T_NE",
- "T_NOT",
- "T_ADDROF",
- "T_AND",
- "T_XOR",
- "T_OR",
- "T_LNOT",
- "T_LAND",
- "T_LOR",
- "T_INCR",
- "T_DECR",
- "T_POSTINCR",
- "T_POSTDECR",
- "T_IF",
- "T_ELSE",
- "T_WHILE",
- "T_BREAK",
- "T_CHAR",
- "T_INT",
- "T_BEGIN",
- "T_END",
- "T_NF",
- "T_NR",
- "T_FS",
- "T_RS",
- "T_FILENAME",
- "T_STATEMENT",
- "T_DECLARE",
- "T_ARRAY_DECLARE"
- };
- #endif
-
- char getoken()
- {
- register char *cp;
- register int i;
-
- DBUG_ENTER("getoken");
- switch ( Token = *Actptr++ )
- {
- case T_STRING:
- case T_REGEXP:
- Value.dptr = Actptr;
- Actptr += strlen( Actptr ) + 1;
- break;
- case T_VARIABLE:
- Value.dptr = fetchptr( Actptr );
- Actptr += sizeof( cp );
- break;
- case T_FUNCTION:
- case T_CONSTANT:
- Value.ival = fetchint( Actptr );
- Actptr += sizeof( i );
- break;
- case T_EOF:
- --Actptr;
- default:
- Value.dptr = 0;
- }
-
- DBUG_PRINT("getoken",
- ("Token='%s' (%d), Value=%d",token_name[Token],Token,Value.ival));
- DBUG_RETURN(Token);
- }
-