Black Box 4

home *** CD-ROM | disk | FTP | other *** search

/ Black Box 4 / BlackBox.cdr / progc / snip1091.arj / INTERP.C < prev next >

Wrap

Text File | 1991-09-24 | 16KB | 598 lines

/* * * INTERP.C - An arithmetic expression interpreter in ANSI C. * * Written September, 1991 by Jonathan Guthrie and placed in the public domain * */ #include <setjmp.h> #include <stdlib.h> #include <ctype.h> #include <float.h> #include <math.h> #ifdef DEBUG #include <stdio.h> #endif /* DEBUG */ #include "interp.h" /* This is the EBNF grammar of the language handled by the interpreter. NOTE: What I'm calling a number is slightly different from what C calls a float. I did this to make the number recognizer easier to write. expression = factor {("+"|"-") factor}. factor = power {("*"|"/") power}. power = {value "^"} value. value = number | ("(" expression ")") | ("+"|"-" value). number = digit {digit} ["." {digit}] [("E"|"e") ["+"|"-"] digit {digit}]. Implementation: The evaluator is implemented as a recursive evaluator. I did this because recursive evaluators are straightforward to code and simple to explain. Basically, each one of the productions above, except for number, has a function. If a production involves a nonterminal (a nonterminal is something that appears on the left side of a production) then the function handling the production calls the function that handles the nonterminal's production. Terminals (quoted strings, in the grammar) and numbers are handled by gettoken. Numbers are handled differently because recognizing a number is more properly the domain of a lexical analyzer than a parser. Basically, I only include the numbers in the grammar in order to show what _I_ am calling a number. In the code, numbers are treated as terminals. */ #define PAREN 0x8000 #define RPAREN 0x4000 #define UNARY 0x2000 #define POWER 0x1000 #define MULTOP 0x0800 #define ADDOP 0x0400 #define NUMBER 0x0200 #define ERROR 0x0100 #define ISPAREN(x) ((x) & PAREN) #define ISUNARY(x) ((x) & UNARY) #define ISPOWER(x) ((x) & POWER) #define ISMULTOP(x) ((x) & MULTOP) #define ISADDOP(x) ((x) & ADDOP) #define ISNUMBER(x) ((x) & NUMBER) #define LPAREN PAREN #define UNPLUS UNARY #define UNMINUS UNARY + 1 #define STAR MULTOP #define SLASH MULTOP + 1 #define PLUS ADDOP #define MINUS ADDOP + 1 typedef struct { unsigned type; double value; } TOKEN; #ifndef TRUE #define TRUE 1 #endif #ifndef FALSE #define FALSE 0 #endif /* * For error handling by longjmp() */ static jmp_buf error; /* * To avoid needing an unget(). */ static TOKEN old_token; /* * To avoid passing input_func back and forth. */ static int (*input_func)(void); /* * I need this one prototype because expression is called recursively */ static void expression(TOKEN *out); /* * It's just a big size. Numbers are limited to no more than 100 characters * long. NOTE: The length is checked to prevent buffer overruns. */ #define BUFFLEN 100 /* * getnumber() recognizes a number in the input stream. The number is * returned in value, the first character of the number is passed in * pushback which also passes back the first character after the number. * * getnumber() returns a status code which tells whether the number is * ill-formed or not. * * Implementation: getnumber() is coded as a deterministic finite-state- * machine with six states. State 0 is the start state, and state 5 is * the end state. Each state gets a case in the main switch statement. * The cases are explained as they appear. */ static int getnumber(double *value, int *pushback) { int i, c, state, error; char buff[BUFFLEN]; i = state = error = 0; c = buff[i++] = *pushback; while(state != 5) { c = input_func(); buff[i++] = c; if((BUFFLEN-1) <= i) state = 4; switch(state) { /* In state 0, the function has seen no decimal points or exponents, just an initial string of numbers. */ case 0: if(!isdigit(c)) { if('.' == c) state = 1; else if('E' == toupper(c)) state = 2; else { *pushback = c; state = 5; } } break; /* In state 1, the function has seen a decimal point, but no exponent. */ case 1: if(!isdigit(c)) { if('E' == toupper(c)) state = 2; else { *pushback = c; state = 5; } } break; /* In state 2, the function has seen an exponent, and needs to see either a digit or a + or a - or there's an error. */ case 2: if(isdigit(c)) { state = 4; } else if (('+' == c) || ('-' == c)) { state = 3; } else { *pushback = c; error = 1; state = 5; } break; /* In state 3, the function has seen an exponent and a + or - and it needs to see a digit or there's an error */ case 3: if(isdigit(c)) { state = 4; } else { *pushback = c; error = 1; state = 5; } break; /* In state 4, the function is looking for a trailing string of digits. */ case 4: if(!isdigit(c)) { *pushback = c; state = 5; } break; /* None of the other states are valid. Flag an error if you see them. (State 5 doesn't get a case because it should be filtered out at the top of the loop.) */ default: state = 5; error = 1; } } /* Now, convert the found string to a number */ buff[i] = 0; if(!error) *value = strtod(buff, NULL); return error; } /* * This is the function that handles getting nonterminals or "tokens" * as well as numbers. Since all tokens are either numbers (which * all begin with digits, strangely enough) or single characters, I don't * go to a whole lot of trouble checking things. Whitespace is ignored. * If it sees a character it doesn't recognize, it returns the error * token. * * One bit of wierdness: The unary_flag indicates whether or not "+" and * "-" represent unary operators or binary operators. Since that is * context-dependant, gettoken would otherwise have difficulty telling. * * gettoken() returns TRUE if an error occurred, else it returns FALSE. * The token itself is returned in token. */ static int gettoken(TOKEN *token, int unary_flag) { int c; static int pushback = 0; if(pushback) { c = pushback; } else { do { c = input_func(); } while(isspace(c)); } if(isdigit(c)) { pushback = c; if(getnumber(&(token->value), &pushback)) token->type = ERROR; else token->type = NUMBER; pushback = isspace(pushback) ? 0 : pushback; } else { pushback = 0; switch(c) { case '+': token->type = ((unary_flag) ? UNPLUS : PLUS); break; case '-': token->type = ((unary_flag) ? UNMINUS : MINUS); break; case '*': token->type = STAR; break; case '/': token->type = SLASH; break; case '^': token->type = POWER; break; case '(': token->type = LPAREN; break; case ')': token->type = RPAREN; break; default: token->type = ERROR; } } return ERROR == token->type; } /* * This is the function that handles the value nonterminal. It does * unary operators, parentheses, and getting numbers. In fact, this * function is the one that gets most of the tokens from the list. */ static void value(TOKEN *out) { TOKEN in; gettoken(out, TRUE); if(ISUNARY(out->type)) { value(&in); if(!ISNUMBER(in.type)) longjmp(error, EV_BADVAL); out->value = (UNMINUS == out->type) ? -1 * in.value : in.value; out->type = NUMBER; } else if(ISPAREN(out->type)) { expression(out); if(!ISNUMBER(out->type)) longjmp(error, EV_BADEXP); if(RPAREN != old_token.type) longjmp(error, EV_BADPAREN); /* error */ } else if(!ISNUMBER(out->type)) longjmp(error, EV_BADNUM); /* error */ } /* * This function is the same as a pow() function, but it checks for * overflow and domain errors before executing the calculation. * * NOTE: This function is considerably more limited than C's pow() * function. src1 is not allowed to be negative even if src2 is an * integer. Also it will reject, as overflows, some calculations * that would not overflow pow(). However, it will do most calcs * properly. */ static int bp_pow(double src1, double src2, double *dest) { int exp1; if(0.0 > src1) return TRUE; frexp(src1, &exp1); if(DBL_MAX_EXP < fabs(src2 * exp1)) return TRUE; *dest = pow(src1, src2); return FALSE; } /* * This is the function that handles the power nonterminal. "Powers" are * strings of values separated by "^"s. Note that this function is * recursive because strings of powers are normally evaluated right-to-left * rather than the usual left to right. */ static void power(TOKEN *out) { TOKEN in1, in2; value(out); if(!ISNUMBER(out->type)) longjmp(error, EV_BADVAL); gettoken(&in1, FALSE); if(ISPOWER(in1.type)) { power(&in2); if(!ISNUMBER(out->type)) longjmp(error, EV_BADPOW); if(bp_pow(out->value, in2.value, &(out->value))) longjmp(error, EV_OVERFLOW); out->type = NUMBER; } else old_token = in1; } /* * This function multiplies two numbers together, first checking them * for overflow. If the multiplication would overflow, TRUE is returned. * Else, FALSE is returned and the product is returned in dest. */ static int bp_mul(double src1, double src2, double *dest) { int exp1, exp2; frexp(src1, &exp1); frexp(src2, &exp2); if(DBL_MAX_EXP < abs(exp1 + exp2)) return TRUE; *dest = src1 * src2; return FALSE; } /* * This function divides one number into another, first checking * for overflow and division by zero. If an error is detected, TRUE * is returned. Else, FALSE is returned and the result is returned * in dest. */ int bp_div(double src1, double src2, double *dest) { int exp1, exp2; frexp(src1, &exp1); frexp(src2, &exp2); if((DBL_MAX_EXP < abs(exp1 - exp2)) || (0.0 == src2)) return TRUE; *dest = src1 / src2; return FALSE; } /* * This function handles the factor nonterminal. Factors are strings of * powers separated by "/" and "*". */ static void factor(TOKEN *out) { TOKEN in; int oldtype; power(out); if(!ISNUMBER(out->type)) longjmp(error, EV_BADPOW); while(ISMULTOP(old_token.type)) { oldtype = old_token.type; power(&in); if(!ISNUMBER(in.type)) longjmp(error, EV_BADPOW); if(STAR == oldtype) { if(bp_mul(out->value, in.value, &(out->value))) longjmp(error, EV_OVERFLOW); } else { if(0.0 == in.value) longjmp(error, EV_DIV0); if(bp_div(out->value, in.value, &(out->value))) longjmp(error, EV_OVERFLOW); } out->type = NUMBER; } } /* * This function handles the expression nonterminal. Expressions are * strings of factors separated by "+" and "-". Addition and subtraction * are not checked for overflow because I deem that it would be more * trouble than it's worth. If you know of a simple way to check an * addition or subtraction for overflow, let me know. */ static void expression(TOKEN *out) { TOKEN in; int oldtype; factor(out); if(!ISNUMBER(out->type)) longjmp(error, EV_BADFACT); while(ISADDOP(old_token.type)) { oldtype = old_token.type; factor(&in); if(!ISNUMBER(in.type)) longjmp(error, EV_BADFACT); out->value = (PLUS == oldtype) ? out->value + in.value : out->value - in.value; out->type = NUMBER; } } /* * This is the main function for the evaluator. The two parameters are * a pointer to where the result should be stored and a pointer to the * input function. The input function can be literally anything that * returns a single character every time it's called. The example code * contains an appropriate function for evaluating the expression * stored in a string. * * This function returns an error code as defined in interp.h. */ int eval(double *answer, int (*get_a_char)(void)) { TOKEN result; int errcode; if(EV_RES_OK == (errcode = setjmp(error))) { input_func = get_a_char; expression(&result); if(ISNUMBER(result.type)) { errcode = EV_RES_OK; *answer = result.value; } else errcode = EV_BADEXP; } return errcode; } /* What follows is example/test code */ #ifdef DEBUG int charcount; const char *eval_string; /* This is the testing "input function" */ static int gchar() { return eval_string[charcount++]; } int main() { char buffer[100]; double result; int errcode; fputs("What is the expression? ", stdout); fgets(buffer, 100, stdin); eval_string = buffer; charcount = 0; errcode = eval(&result, gchar); if(errcode) printf("Error: %s\n", EV_results[errcode]); else printf("\n\nThe result is: %f\n", result); return(EXIT_SUCCESS); } #endif /* DEBUG */