home *** CD-ROM | disk | FTP | other *** search
Text File | 1990-01-02 | 13.1 KB | 653 lines | [TEXT/KAHL] |
- /* parse.y - parser for flex input */
-
- /*
- * Copyright (c) 1989 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Vern Paxson.
- *
- * The United States Government has rights in this work pursuant to
- * contract no. DE-AC03-76SF00098 between the United States Department of
- * Energy and the University of California.
- *
- * Redistribution and use in source and binary forms are permitted
- * provided that the above copyright notice and this paragraph are
- * duplicated in all such forms and that any documentation,
- * advertising materials, and other materials related to such
- * distribution and use acknowledge that the software was developed
- * by the University of California, Berkeley. The name of the
- * University may not be used to endorse or promote products derived
- * from this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- */
-
- %token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP
-
- %{
-
- #include "flexdef.h"
-
- #ifndef lint
-
- static char copyright[] =
- "@(#) Copyright (c) 1989 The Regents of the University of California.\n";
- static char CR_continuation[] = "@(#) All rights reserved.\n";
-
- static char rcsid[] =
- "@(#) $Header: parse.y,v 2.1 89/06/20 17:23:54 vern Exp $ (LBL)";
-
- #endif
-
- int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
- int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule;
- char clower();
-
- static int madeany = false; /* whether we've made the '.' character class */
- int previous_continued_action; /* whether the previous rule's action was '|' */
-
- %}
-
- %%
- goal : initlex sect1 sect1end sect2 initforrule
- { /* add default rule */
- int def_rule;
-
- pat = cclinit();
- cclnegate( pat );
-
- def_rule = mkstate( -pat );
-
- finish_rule( def_rule, false, 0, 0 );
-
- for ( i = 1; i <= lastsc; ++i )
- scset[i] = mkbranch( scset[i], def_rule );
-
- if ( spprdflt )
- fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )",
- temp_action_file );
- else
- fputs( "ECHO", temp_action_file );
-
- fputs( ";\n\tYY_BREAK\n", temp_action_file );
- }
- ;
-
- initlex :
- {
- /* initialize for processing rules */
-
- /* create default DFA start condition */
- scinstal( "INITIAL", false );
- }
- ;
-
- sect1 : sect1 startconddecl WHITESPACE namelist1 '\n'
- |
- | error '\n'
- { synerr( "unknown error processing section 1" ); }
- ;
-
- sect1end : SECTEND
- ;
-
- startconddecl : SCDECL
- {
- /* these productions are separate from the s1object
- * rule because the semantics must be done before
- * we parse the remainder of an s1object
- */
-
- xcluflg = false;
- }
-
- | XSCDECL
- { xcluflg = true; }
- ;
-
- namelist1 : namelist1 WHITESPACE NAME
- { scinstal( nmstr, xcluflg ); }
-
- | NAME
- { scinstal( nmstr, xcluflg ); }
-
- | error
- { synerr( "bad start condition list" ); }
- ;
-
- sect2 : sect2 initforrule flexrule '\n'
- |
- ;
-
- initforrule :
- {
- /* initialize for a parse of one rule */
- trlcontxt = variable_trail_rule = varlength = false;
- trailcnt = headcnt = rulelen = 0;
- current_state_type = STATE_NORMAL;
- previous_continued_action = continued_action;
- new_rule();
- }
- ;
-
- flexrule : scon '^' re eol
- {
- pat = link_machines( $3, $4 );
- finish_rule( pat, variable_trail_rule,
- headcnt, trailcnt );
-
- for ( i = 1; i <= actvp; ++i )
- scbol[actvsc[i]] =
- mkbranch( scbol[actvsc[i]], pat );
-
- if ( ! bol_needed )
- {
- bol_needed = true;
-
- if ( performance_report )
- fprintf( stderr,
- "'^' operator results in sub-optimal performance\n" );
- }
- }
-
- | scon re eol
- {
- pat = link_machines( $2, $3 );
- finish_rule( pat, variable_trail_rule,
- headcnt, trailcnt );
-
- for ( i = 1; i <= actvp; ++i )
- scset[actvsc[i]] =
- mkbranch( scset[actvsc[i]], pat );
- }
-
- | '^' re eol
- {
- pat = link_machines( $2, $3 );
- finish_rule( pat, variable_trail_rule,
- headcnt, trailcnt );
-
- /* add to all non-exclusive start conditions,
- * including the default (0) start condition
- */
-
- for ( i = 1; i <= lastsc; ++i )
- if ( ! scxclu[i] )
- scbol[i] = mkbranch( scbol[i], pat );
-
- if ( ! bol_needed )
- {
- bol_needed = true;
-
- if ( performance_report )
- fprintf( stderr,
- "'^' operator results in sub-optimal performance\n" );
- }
- }
-
- | re eol
- {
- pat = link_machines( $1, $2 );
- finish_rule( pat, variable_trail_rule,
- headcnt, trailcnt );
-
- for ( i = 1; i <= lastsc; ++i )
- if ( ! scxclu[i] )
- scset[i] = mkbranch( scset[i], pat );
- }
-
- | scon EOF_OP
- { build_eof_action(); }
-
- | EOF_OP
- {
- /* this EOF applies only to the INITIAL start cond. */
- actvsc[actvp = 1] = 1;
- build_eof_action();
- }
-
- | error
- { synerr( "unrecognized rule" ); }
- ;
-
- scon : '<' namelist2 '>'
- ;
-
- namelist2 : namelist2 ',' NAME
- {
- if ( (scnum = sclookup( nmstr )) == 0 )
- lerrsf( "undeclared start condition %s", nmstr );
-
- else
- actvsc[++actvp] = scnum;
- }
-
- | NAME
- {
- if ( (scnum = sclookup( nmstr )) == 0 )
- lerrsf( "undeclared start condition %s", nmstr );
- else
- actvsc[actvp = 1] = scnum;
- }
-
- | error
- { synerr( "bad start condition list" ); }
- ;
-
- eol : '$'
- {
- if ( trlcontxt )
- {
- synerr( "trailing context used twice" );
- $$ = mkstate( SYM_EPSILON );
- }
- else
- {
- trlcontxt = true;
-
- if ( ! varlength )
- headcnt = rulelen;
-
- ++rulelen;
- trailcnt = 1;
-
- eps = mkstate( SYM_EPSILON );
- $$ = link_machines( eps, mkstate( '\n' ) );
- }
- }
-
- |
- {
- $$ = mkstate( SYM_EPSILON );
-
- if ( trlcontxt )
- {
- if ( varlength && headcnt == 0 )
- /* both head and trail are variable-length */
- variable_trail_rule = true;
- else
- trailcnt = rulelen;
- }
- }
- ;
-
- re : re '|' series
- {
- varlength = true;
-
- $$ = mkor( $1, $3 );
- }
-
- | re2 series
- {
- if ( transchar[lastst[$2]] != SYM_EPSILON )
- /* provide final transition \now/ so it
- * will be marked as a trailing context
- * state
- */
- $2 = link_machines( $2, mkstate( SYM_EPSILON ) );
-
- mark_beginning_as_normal( $2 );
- current_state_type = STATE_NORMAL;
-
- if ( previous_continued_action )
- {
- /* we need to treat this as variable trailing
- * context so that the backup does not happen
- * in the action but before the action switch
- * statement. If the backup happens in the
- * action, then the rules "falling into" this
- * one's action will *also* do the backup,
- * erroneously.
- */
- if ( ! varlength || headcnt != 0 )
- {
- fprintf( stderr,
- "flex: warning - trailing context rule at line %d made variable because\n",
- linenum );
- fprintf( stderr,
- " of preceding '|' action\n" );
- }
-
- /* mark as variable */
- varlength = true;
- headcnt = 0;
- }
-
- if ( varlength && headcnt == 0 )
- { /* variable trailing context rule */
- /* mark the first part of the rule as the accepting
- * "head" part of a trailing context rule
- */
- /* by the way, we didn't do this at the beginning
- * of this production because back then
- * current_state_type was set up for a trail
- * rule, and add_accept() can create a new
- * state ...
- */
- add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK );
- }
-
- $$ = link_machines( $1, $2 );
- }
-
- | series
- { $$ = $1; }
- ;
-
-
- re2 : re '/'
- {
- /* this rule is separate from the others for "re" so
- * that the reduction will occur before the trailing
- * series is parsed
- */
-
- if ( trlcontxt )
- synerr( "trailing context used twice" );
- else
- trlcontxt = true;
-
- if ( varlength )
- /* we hope the trailing context is fixed-length */
- varlength = false;
- else
- headcnt = rulelen;
-
- rulelen = 0;
-
- current_state_type = STATE_TRAILING_CONTEXT;
- $$ = $1;
- }
- ;
-
- series : series singleton
- {
- /* this is where concatenation of adjacent patterns
- * gets done
- */
- $$ = link_machines( $1, $2 );
- }
-
- | singleton
- { $$ = $1; }
- ;
-
- singleton : singleton '*'
- {
- varlength = true;
-
- $$ = mkclos( $1 );
- }
-
- | singleton '+'
- {
- varlength = true;
-
- $$ = mkposcl( $1 );
- }
-
- | singleton '?'
- {
- varlength = true;
-
- $$ = mkopt( $1 );
- }
-
- | singleton '{' NUMBER ',' NUMBER '}'
- {
- varlength = true;
-
- if ( $3 > $5 || $3 < 0 )
- {
- synerr( "bad iteration values" );
- $$ = $1;
- }
- else
- {
- if ( $3 == 0 )
- $$ = mkopt( mkrep( $1, $3, $5 ) );
- else
- $$ = mkrep( $1, $3, $5 );
- }
- }
-
- | singleton '{' NUMBER ',' '}'
- {
- varlength = true;
-
- if ( $3 <= 0 )
- {
- synerr( "iteration value must be positive" );
- $$ = $1;
- }
-
- else
- $$ = mkrep( $1, $3, INFINITY );
- }
-
- | singleton '{' NUMBER '}'
- {
- /* the singleton could be something like "(foo)",
- * in which case we have no idea what its length
- * is, so we punt here.
- */
- varlength = true;
-
- if ( $3 <= 0 )
- {
- synerr( "iteration value must be positive" );
- $$ = $1;
- }
-
- else
- $$ = link_machines( $1, copysingl( $1, $3 - 1 ) );
- }
-
- | '.'
- {
- if ( ! madeany )
- {
- /* create the '.' character class */
- anyccl = cclinit();
- ccladd( anyccl, '\n' );
- cclnegate( anyccl );
-
- if ( useecs )
- mkeccl( ccltbl + cclmap[anyccl],
- ccllen[anyccl], nextecm,
- ecgroup, CSIZE );
-
- madeany = true;
- }
-
- ++rulelen;
-
- $$ = mkstate( -anyccl );
- }
-
- | fullccl
- {
- if ( ! cclsorted )
- /* sort characters for fast searching. We use a
- * shell sort since this list could be large.
- */
- cshell( ccltbl + cclmap[$1], ccllen[$1] );
-
- if ( useecs )
- mkeccl( ccltbl + cclmap[$1], ccllen[$1],
- nextecm, ecgroup, CSIZE );
-
- ++rulelen;
-
- $$ = mkstate( -$1 );
- }
-
- | PREVCCL
- {
- ++rulelen;
-
- $$ = mkstate( -$1 );
- }
-
- | '"' string '"'
- { $$ = $2; }
-
- | '(' re ')'
- { $$ = $2; }
-
- | CHAR
- {
- ++rulelen;
-
- if ( $1 == '\0' )
- synerr( "null in rule" );
-
- if ( caseins && $1 >= 'A' && $1 <= 'Z' )
- $1 = clower( $1 );
-
- $$ = mkstate( $1 );
- }
- ;
-
- fullccl : '[' ccl ']'
- { $$ = $2; }
-
- | '[' '^' ccl ']'
- {
- /* *Sigh* - to be compatible Unix lex, negated ccls
- * match newlines
- */
- #ifdef NOTDEF
- ccladd( $3, '\n' ); /* negated ccls don't match '\n' */
- cclsorted = false; /* because we added the newline */
- #endif
- cclnegate( $3 );
- $$ = $3;
- }
- ;
-
- ccl : ccl CHAR '-' CHAR
- {
- if ( $2 > $4 )
- synerr( "negative range in character class" );
-
- else
- {
- if ( caseins )
- {
- if ( $2 >= 'A' && $2 <= 'Z' )
- $2 = clower( $2 );
- if ( $4 >= 'A' && $4 <= 'Z' )
- $4 = clower( $4 );
- }
-
- for ( i = $2; i <= $4; ++i )
- ccladd( $1, i );
-
- /* keep track if this ccl is staying in alphabetical
- * order
- */
- cclsorted = cclsorted && ($2 > lastchar);
- lastchar = $4;
- }
-
- $$ = $1;
- }
-
- | ccl CHAR
- {
- if ( caseins )
- if ( $2 >= 'A' && $2 <= 'Z' )
- $2 = clower( $2 );
-
- ccladd( $1, $2 );
- cclsorted = cclsorted && ($2 > lastchar);
- lastchar = $2;
- $$ = $1;
- }
-
- |
- {
- cclsorted = true;
- lastchar = 0;
- $$ = cclinit();
- }
- ;
-
- string : string CHAR
- {
- if ( caseins )
- if ( $2 >= 'A' && $2 <= 'Z' )
- $2 = clower( $2 );
-
- ++rulelen;
-
- $$ = link_machines( $1, mkstate( $2 ) );
- }
-
- |
- { $$ = mkstate( SYM_EPSILON ); }
- ;
-
- %%
-
-
- /* build_eof_action - build the "<<EOF>>" action for the active start
- * conditions
- */
-
- build_eof_action()
-
- {
- register int i;
-
- for ( i = 1; i <= actvp; ++i )
- {
- if ( sceof[actvsc[i]] )
- lerrsf( "multiple <<EOF>> rules for start condition %s",
- scname[actvsc[i]] );
-
- else
- {
- sceof[actvsc[i]] = true;
- fprintf( temp_action_file, "case YY_STATE_EOF(%s):\n",
- scname[actvsc[i]] );
- }
- }
-
- line_directive_out( temp_action_file );
- }
-
-
- /* synerr - report a syntax error
- *
- * synopsis
- * char str[];
- * synerr( str );
- */
-
- synerr( str )
- char str[];
-
- {
- syntaxerror = true;
- fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str );
- }
-
-
- /* yyerror - eat up an error message from the parser
- *
- * synopsis
- * char msg[];
- * yyerror( msg );
- */
-
- yyerror( msg )
- char msg[];
-
- {
- }
-