home *** CD-ROM | disk | FTP | other *** search
- //------------------------------------------------------------------------
- // ^FILE: fsm.c - implement a finite staet machine
- //
- // ^DESCRIPTION:
- // This file implements a finite state machine tailored to the task of
- // parsing syntax strings for command-line arguments.
- //
- // ^HISTORY:
- // 03/27/92 Brad Appleton <brad@ssd.csd.harris.com> Created
- //-^^---------------------------------------------------------------------
-
- #include <stdlib.h>
- #include <iostream.h>
- #include <ctype.h>
- #include <string.h>
-
- #include "fsm.h"
-
- // define the characters that have a "special" meaning
- enum {
- c_LBRACE = '[',
- c_RBRACE = ']',
- c_ALT = '|',
- c_LIST = '.',
- } ;
-
-
- //-------------------
- // ^FUNCTION: SyntaxFSM::skip - skip to the next token
- //
- // ^SYNOPSIS:
- // SyntaxFSM::skip(input)
- //
- // ^PARAMETERS:
- // const char * & input;
- // -- the current "read" position in the syntax string.
- //
- // ^DESCRIPTION:
- // Skip past all whitespace and past square braced (recording the
- // current brace-nesting level and the number of balanced braces
- // parsed).
- //
- // ^REQUIREMENTS:
- // None.
- //
- // ^SIDE-EFFECTS:
- // Updates "input" to point to the next token (or eos)
- //
- // ^RETURN-VALUE:
- // None.
- //
- // ^ALGORITHM:
- // Trivial.
- //-^^----------------
- void
- SyntaxFSM::skip(const char * & input) {
- if ((! input) || (! *input)) return;
-
- while (isspace(*input)) ++input;
- while ((*input == c_LBRACE) || (*input == c_RBRACE)) {
- if (*input == c_LBRACE) {
- ++lev;
- } else {
- if (lev > 0) {
- ++nbpairs;
- } else {
- fsm_state = ERROR;
- cerr << "too many '" << char(c_RBRACE) << "' characters." << endl;
- }
- --lev;
- }
- ++input;
- while (isspace(*input)) ++input;
- }//while
- }
-
-
- //-------------------
- // ^FUNCTION: SyntaxFSM::parse_token - parse a token
- //
- // ^SYNOPSIS:
- // SyntaxFSM::parse_token(input)
- //
- // ^PARAMETERS:
- // const char * & input;
- // -- the current "read" position in the syntax string.
- //
- // ^DESCRIPTION:
- // Get the next token from the input string.
- //
- // ^REQUIREMENTS:
- // input should be non-NULL.
- //
- // ^SIDE-EFFECTS:
- // Updates "input" to point to the next token (or eos)
- //
- // ^RETURN-VALUE:
- // None.
- //
- // ^ALGORITHM:
- // Trivial.
- //-^^----------------
- void
- SyntaxFSM::parse_token(const char * & input)
- {
- while (*input && (! isspace(*input)) &&
- (*input != c_LBRACE) && (*input != c_RBRACE) &&
- ((*input != c_LIST) || (fsm_state == OPTION)))
- {
- ++input;
- }
- }
-
-
- //-------------------
- // ^FUNCTION: SyntaxFSM::operator() - get a token
- //
- // ^SYNOPSIS:
- // SyntaxFSM::operator()(input, token)
- //
- // ^PARAMETERS:
- // const char * & input;
- // -- the current "read" position in the syntax string.
- //
- // token_t & token;
- // -- where to place the token that we will find.
- //
- // ^DESCRIPTION:
- // Get the next token from the input string.
- //
- // ^REQUIREMENTS:
- // None.
- //
- // ^SIDE-EFFECTS:
- // - updates "input" to point to the next token (or eos)
- // - updates "token" to be the token that we found
- //
- // ^RETURN-VALUE:
- // 0 if we are in a non-FINAL state; non-zero otherwise..
- //
- // ^ALGORITHM:
- // It gets complicated so follow along.
- //-^^----------------
- int
- SyntaxFSM::operator()(const char * & input, token_t & token)
- {
- token.set(NULL, 0);
-
- // if inout is NULL or empty - then we are finished
- if ((! input) || (! *input)) {
- if (lev) {
- cerr << "not enough '" << char(c_RBRACE) << "' characters." << endl ;
- fsm_state = ERROR;
- return (fsm_state != FINAL);
- } else {
- fsm_state = FINAL;
- return (fsm_state != FINAL);
- }
- }
-
- skip(input); // skip whitespace
-
- const char * start = input;
-
- // the token we are to parse depends on what state we are in
- switch(fsm_state) {
- case START :
- // We are parsing either an option-character name or a value.
- // If it is an option-character name, the character that stops
- // the input scan will be c_ALT.
- //
- if (*input != c_ALT) ++input;
- if (*input == c_ALT) {
- fsm_state = OPTION;
- if (start != input) token.set(start, 1);
- } else {
- parse_token(input);
- fsm_state = VALUE;
- token.set(start, (input - start));
- }
- ++ntoks;
- break;
-
- case OPTION :
- // We parsed an option-character already so we had better see a keyword
- // name this time around.
- //
- start = ++input; // skip past the '|' character
- if (! isspace(*input)) {
- parse_token(input);
- token.set(start, (input - start));
- }
- fsm_state = KEYWORD;
- ++ntoks;
- break;
-
- case KEYWORD :
- // We parsed a keyword already - if anything is here then it better be a
- // value name.
- //
- if (*input) {
- parse_token(input);
- fsm_state = VALUE;
- token.set(start, (input - start));
- ++ntoks;
- } else {
- fsm_state = FINAL;
- }
- break;
-
- case VALUE :
- // We already parsed a value name - all that could possibly be left
- // (that we be valid) is an ellipsis ("...") indicating a list.
- //
- if (! *input) {
- fsm_state = FINAL;
- } else if (::strncmp(input, "...", 3) == 0) {
- fsm_state = LIST;
- token.set(input, 3);
- input += 3;
- ++ntoks;
- } else {
- fsm_state = ERROR;
- cerr << "unexpected token \"" << input << "\"." << endl ;
- }
- break;
-
- case LIST :
- // We already parsed an ellipsis, there better not be anything left
- if (! *input) {
- fsm_state = FINAL;
- } else {
- fsm_state = ERROR;
- cerr << "unexpected token \"" << input << "\"." << endl ;
- }
- break;
-
- case ERROR :
- case FINAL :
- default :
- break;
- }
-
- if (fsm_state == FINAL) {
- skip(input);
- if ((! *input) && lev) {
- cerr << "not enough '" << char(c_RBRACE) << "' characters." << endl ;
- fsm_state = ERROR;
- } else if (*input) {
- cerr << "unexpected token \"" << input << "\"." << endl ;
- fsm_state = ERROR;
- }
- }
-
- return (fsm_state != FINAL);
- }
-
-