home *** CD-ROM | disk | FTP | other *** search
- Subject: v19i057: Flex, a fast LEX replacement, Part03/07
- Newsgroups: comp.sources.unix
- Sender: sources
- Approved: rsalz@uunet.UU.NET
-
- Submitted-by: Vern Paxson <vern@csam.lbl.gov>
- Posting-number: Volume 19, Issue 57
- Archive-name: flex2/part03
-
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then unpack
- # it by saving it into a file and typing "sh file". To overwrite existing
- # files, type "sh file -c". You can also feed this as standard input via
- # unshar, or by typing "sh <file", e.g.. If this archive is complete, you
- # will see the following message at the end:
- # "End of archive 3 (of 7)."
- # Contents: flex/main.c flex/nfa.c
- # Wrapped by rsalz@prune.bbn.com on Thu Jun 22 19:01:46 1989
- PATH=/bin:/usr/bin:/usr/ucb ; export PATH
- if test -f 'flex/main.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'flex/main.c'\"
- else
- echo shar: Extracting \"'flex/main.c'\" \(16556 characters\)
- sed "s/^X//" >'flex/main.c' <<'END_OF_FILE'
- X/* flex - tool to generate fast lexical analyzers
- X *
- X *
- X * Copyright (c) 1989 The Regents of the University of California.
- X * All rights reserved.
- X *
- X * This code is derived from software contributed to Berkeley by
- X * Vern Paxson.
- X *
- X * The United States Government has rights in this work pursuant to
- X * contract no. DE-AC03-76SF00098 between the United States Department of
- X * Energy and the University of California.
- X *
- X * Redistribution and use in source and binary forms are permitted
- X * provided that the above copyright notice and this paragraph are
- X * duplicated in all such forms and that any documentation,
- X * advertising materials, and other materials related to such
- X * distribution and use acknowledge that the software was developed
- X * by the University of California, Berkeley. The name of the
- X * University may not be used to endorse or promote products derived
- X * from this software without specific prior written permission.
- X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
- X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- X *
- X */
- X
- X#ifndef lint
- X
- Xstatic char copyright[] =
- X "@(#) Copyright (c) 1989 The Regents of the University of California.\n";
- Xstatic char CR_continuation[] = "@(#) All rights reserved.\n";
- X
- Xstatic char rcsid[] =
- X "@(#) $Header: main.c,v 2.2 89/06/20 16:36:26 vern Exp $ (LBL)";
- X
- X#endif
- X
- X
- X#include "flexdef.h"
- X
- Xstatic char flex_version[] = "2.1 (beta)";
- X
- X
- X/* these globals are all defined and commented in flexdef.h */
- Xint printstats, syntaxerror, eofseen, ddebug, trace, spprdflt;
- Xint interactive, caseins, useecs, fulltbl, usemecs;
- Xint fullspd, gen_line_dirs, performance_report, backtrack_report;
- Xint yymore_used, reject, real_reject, continued_action;
- Xint yymore_really_used, reject_really_used;
- Xint datapos, dataline, linenum;
- XFILE *skelfile = NULL;
- Xchar *infilename = NULL;
- Xint onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
- Xint onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
- Xint current_mns, num_rules, current_max_rules, lastnfa;
- Xint *firstst, *lastst, *finalst, *transchar, *trans1, *trans2;
- Xint *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum;
- Xint current_state_type;
- Xint variable_trailing_context_rules;
- Xint numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
- Xint protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
- Xint numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1];
- Xint tecbck[CSIZE + 1];
- Xint lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc;
- Xchar **scname;
- Xint current_max_dfa_size, current_max_xpairs;
- Xint current_max_template_xpairs, current_max_dfas;
- Xint lastdfa, *nxt, *chk, *tnxt;
- Xint *base, *def, tblend, firstfree, **dss, *dfasiz;
- Xunion dfaacc_union *dfaacc;
- Xint *accsiz, *dhash, numas;
- Xint numsnpairs, jambase, jamstate;
- Xint lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse;
- Xint current_max_ccl_tbl_size;
- Xchar *ccltbl;
- Xchar *starttime, *endtime, nmstr[MAXLINE];
- Xint sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
- Xint tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
- Xint num_backtracking, bol_needed;
- XFILE *temp_action_file;
- XFILE *backtrack_file;
- Xint end_of_buffer_state;
- X#ifndef SHORT_FILE_NAMES
- Xchar action_file_name[] = "/tmp/flexXXXXXX";
- X#else
- Xchar action_file_name[] = "flexXXXXXX.tmp";
- X#endif
- X
- X#ifndef SHORT_FILE_NAMES
- Xstatic char outfile[] = "lex.yy.c";
- X#else
- Xstatic char outfile[] = "lexyy.c";
- X#endif
- Xstatic int outfile_created = 0;
- X
- X
- X/* flex - main program
- X *
- X * synopsis (from the shell)
- X * flex [-v] [file ...]
- X */
- X
- Xmain( argc, argv )
- Xint argc;
- Xchar **argv;
- X
- X {
- X flexinit( argc, argv );
- X
- X readin();
- X
- X if ( syntaxerror )
- X flexend( 1 );
- X
- X if ( yymore_really_used == REALLY_USED )
- X yymore_used = true;
- X else if ( yymore_really_used == REALLY_NOT_USED )
- X yymore_used = false;
- X
- X if ( reject_really_used == REALLY_USED )
- X reject = true;
- X else if ( reject_really_used == REALLY_NOT_USED )
- X reject = false;
- X
- X if ( performance_report )
- X {
- X if ( yymore_used )
- X fprintf( stderr,
- X "yymore() entails a minor performance penalty\n" );
- X
- X if ( interactive )
- X fprintf( stderr,
- X "-I (interactive) entails a minor performance penalty\n" );
- X
- X if ( reject )
- X fprintf( stderr,
- X "REJECT entails a large performance penalty\n" );
- X
- X if ( variable_trailing_context_rules )
- X fprintf( stderr,
- X"Variable trailing context rules entail a large performance penalty\n" );
- X }
- X
- X if ( reject )
- X real_reject = true;
- X
- X if ( variable_trailing_context_rules )
- X reject = true;
- X
- X if ( (fulltbl || fullspd) && reject )
- X {
- X if ( real_reject )
- X flexerror( "REJECT cannot be used with -f or -F" );
- X else
- X flexerror(
- X "variable trailing context rules cannot be used with -f or -F" );
- X }
- X
- X /* convert the ndfa to a dfa */
- X ntod();
- X
- X /* generate the C state transition tables from the DFA */
- X make_tables();
- X
- X /* note, flexend does not return. It exits with its argument as status. */
- X
- X flexend( 0 );
- X
- X /*NOTREACHED*/
- X }
- X
- X
- X/* flexend - terminate flex
- X *
- X * synopsis
- X * int status;
- X * flexend( status );
- X *
- X * status is exit status.
- X *
- X * note
- X * This routine does not return.
- X */
- X
- Xflexend( status )
- Xint status;
- X
- X {
- X int tblsiz;
- X char *flex_gettime();
- X
- X if ( skelfile != NULL )
- X (void) fclose( skelfile );
- X
- X if ( temp_action_file )
- X {
- X (void) fclose( temp_action_file );
- X (void) unlink( action_file_name );
- X }
- X
- X if ( status != 0 && outfile_created )
- X {
- X (void) fclose( stdout );
- X (void) unlink( outfile );
- X }
- X
- X if ( backtrack_report )
- X {
- X if ( num_backtracking == 0 )
- X fprintf( backtrack_file, "No backtracking.\n" );
- X else if ( fullspd || fulltbl )
- X fprintf( backtrack_file,
- X "%d backtracking (non-accepting) states.\n",
- X num_backtracking );
- X else
- X fprintf( backtrack_file, "Compressed tables always backtrack.\n" );
- X
- X (void) fclose( backtrack_file );
- X }
- X
- X if ( printstats )
- X {
- X endtime = flex_gettime();
- X
- X fprintf( stderr, "flex version %s usage statistics:\n", flex_version );
- X fprintf( stderr, " started at %s, finished at %s\n",
- X starttime, endtime );
- X
- X fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns );
- X fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa,
- X current_max_dfas, totnst );
- X fprintf( stderr, " %d rules\n", num_rules - 1 /* - 1 for def. rule */ );
- X
- X if ( num_backtracking == 0 )
- X fprintf( stderr, " No backtracking\n" );
- X else if ( fullspd || fulltbl )
- X fprintf( stderr, " %d backtracking (non-accepting) states\n",
- X num_backtracking );
- X else
- X fprintf( stderr, " compressed tables always backtrack\n" );
- X
- X if ( bol_needed )
- X fprintf( stderr, " Beginning-of-line patterns used\n" );
- X
- X fprintf( stderr, " %d/%d start conditions\n", lastsc,
- X current_max_scs );
- X fprintf( stderr, " %d epsilon states, %d double epsilon states\n",
- X numeps, eps2 );
- X
- X if ( lastccl == 0 )
- X fprintf( stderr, " no character classes\n" );
- X else
- X fprintf( stderr,
- X " %d/%d character classes needed %d/%d words of storage, %d reused\n",
- X lastccl, current_maxccls,
- X cclmap[lastccl] + ccllen[lastccl],
- X current_max_ccl_tbl_size, cclreuse );
- X
- X fprintf( stderr, " %d state/nextstate pairs created\n", numsnpairs );
- X fprintf( stderr, " %d/%d unique/duplicate transitions\n",
- X numuniq, numdup );
- X
- X if ( fulltbl )
- X {
- X tblsiz = lastdfa * numecs;
- X fprintf( stderr, " %d table entries\n", tblsiz );
- X }
- X
- X else
- X {
- X tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend;
- X
- X fprintf( stderr, " %d/%d base-def entries created\n",
- X lastdfa + numtemps, current_max_dfas );
- X fprintf( stderr, " %d/%d (peak %d) nxt-chk entries created\n",
- X tblend, current_max_xpairs, peakpairs );
- X fprintf( stderr,
- X " %d/%d (peak %d) template nxt-chk entries created\n",
- X numtemps * nummecs, current_max_template_xpairs,
- X numtemps * numecs );
- X fprintf( stderr, " %d empty table entries\n", nummt );
- X fprintf( stderr, " %d protos created\n", numprots );
- X fprintf( stderr, " %d templates created, %d uses\n",
- X numtemps, tmpuses );
- X }
- X
- X if ( useecs )
- X {
- X tblsiz = tblsiz + CSIZE;
- X fprintf( stderr, " %d/%d equivalence classes created\n",
- X numecs, CSIZE );
- X }
- X
- X if ( usemecs )
- X {
- X tblsiz = tblsiz + numecs;
- X fprintf( stderr, " %d/%d meta-equivalence classes created\n",
- X nummecs, CSIZE );
- X }
- X
- X fprintf( stderr, " %d (%d saved) hash collisions, %d DFAs equal\n",
- X hshcol, hshsave, dfaeql );
- X fprintf( stderr, " %d sets of reallocations needed\n", num_reallocs );
- X fprintf( stderr, " %d total table entries needed\n", tblsiz );
- X }
- X
- X#ifndef VMS
- X exit( status );
- X#else
- X exit( status + 1 );
- X#endif
- X }
- X
- X
- X/* flexinit - initialize flex
- X *
- X * synopsis
- X * int argc;
- X * char **argv;
- X * flexinit( argc, argv );
- X */
- X
- Xflexinit( argc, argv )
- Xint argc;
- Xchar **argv;
- X
- X {
- X int i, sawcmpflag, use_stdout;
- X char *arg, *skelname = NULL, *flex_gettime(), clower(), *mktemp();
- X
- X printstats = syntaxerror = trace = spprdflt = interactive = caseins = false;
- X backtrack_report = performance_report = ddebug = fulltbl = fullspd = false;
- X yymore_used = continued_action = reject = false;
- X yymore_really_used = reject_really_used = false;
- X gen_line_dirs = usemecs = useecs = true;
- X
- X sawcmpflag = false;
- X use_stdout = false;
- X
- X /* read flags */
- X for ( --argc, ++argv; argc ; --argc, ++argv )
- X {
- X if ( argv[0][0] != '-' || argv[0][1] == '\0' )
- X break;
- X
- X arg = argv[0];
- X
- X for ( i = 1; arg[i] != '\0'; ++i )
- X switch ( arg[i] )
- X {
- X case 'b':
- X backtrack_report = true;
- X break;
- X
- X case 'c':
- X if ( i != 1 )
- X flexerror( "-c flag must be given separately" );
- X
- X if ( ! sawcmpflag )
- X {
- X useecs = false;
- X usemecs = false;
- X fulltbl = false;
- X sawcmpflag = true;
- X }
- X
- X for ( ++i; arg[i] != '\0'; ++i )
- X switch ( clower( arg[i] ) )
- X {
- X case 'e':
- X useecs = true;
- X break;
- X
- X case 'F':
- X fullspd = true;
- X break;
- X
- X case 'f':
- X fulltbl = true;
- X break;
- X
- X case 'm':
- X usemecs = true;
- X break;
- X
- X default:
- X lerrif( "unknown -c option %c",
- X (int) arg[i] );
- X break;
- X }
- X
- X goto get_next_arg;
- X
- X case 'd':
- X ddebug = true;
- X break;
- X
- X case 'f':
- X useecs = usemecs = false;
- X fulltbl = true;
- X break;
- X
- X case 'F':
- X useecs = usemecs = false;
- X fullspd = true;
- X break;
- X
- X case 'I':
- X interactive = true;
- X break;
- X
- X case 'i':
- X caseins = true;
- X break;
- X
- X case 'L':
- X gen_line_dirs = false;
- X break;
- X
- X case 'p':
- X performance_report = true;
- X break;
- X
- X case 'S':
- X if ( i != 1 )
- X flexerror( "-S flag must be given separately" );
- X
- X skelname = arg + i + 1;
- X goto get_next_arg;
- X
- X case 's':
- X spprdflt = true;
- X break;
- X
- X case 't':
- X use_stdout = true;
- X break;
- X
- X case 'T':
- X trace = true;
- X break;
- X
- X case 'v':
- X printstats = true;
- X break;
- X
- X default:
- X lerrif( "unknown flag %c", (int) arg[i] );
- X break;
- X }
- X
- Xget_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */
- X ;
- X }
- X
- X if ( (fulltbl || fullspd) && usemecs )
- X flexerror( "full table and -cm don't make sense together" );
- X
- X if ( (fulltbl || fullspd) && interactive )
- X flexerror( "full table and -I are (currently) incompatible" );
- X
- X if ( fulltbl && fullspd )
- X flexerror( "full table and -F are mutually exclusive" );
- X
- X if ( ! skelname )
- X {
- X static char skeleton_name_storage[400];
- X
- X skelname = skeleton_name_storage;
- X (void) strcpy( skelname, DEFAULT_SKELETON_FILE );
- X }
- X
- X if ( ! use_stdout )
- X {
- X FILE *prev_stdout = freopen( outfile, "w", stdout );
- X
- X if ( prev_stdout == NULL )
- X flexerror( "could not create lex.yy.c" );
- X
- X outfile_created = 1;
- X }
- X
- X if ( argc )
- X {
- X if ( argc > 1 )
- X flexerror( "extraneous argument(s) given" );
- X
- X yyin = fopen( infilename = argv[0], "r" );
- X
- X if ( yyin == NULL )
- X lerrsf( "can't open %s", argv[0] );
- X }
- X
- X else
- X yyin = stdin;
- X
- X if ( backtrack_report )
- X {
- X#ifndef SHORT_FILE_NAMES
- X backtrack_file = fopen( "lex.backtrack", "w" );
- X#else
- X backtrack_file = fopen( "lex.bck", "w" );
- X#endif
- X
- X if ( backtrack_file == NULL )
- X flexerror( "could not create lex.backtrack" );
- X }
- X
- X else
- X backtrack_file = NULL;
- X
- X
- X lastccl = 0;
- X lastsc = 0;
- X
- X /* initialize the statistics */
- X starttime = flex_gettime();
- X
- X if ( (skelfile = fopen( skelname, "r" )) == NULL )
- X lerrsf( "can't open skeleton file %s", skelname );
- X
- X (void) mktemp( action_file_name );
- X
- X if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL )
- X lerrsf( "can't open temporary action file %s", action_file_name );
- X
- X lastdfa = lastnfa = num_rules = numas = numsnpairs = tmpuses = 0;
- X numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0;
- X numuniq = numdup = hshsave = eofseen = datapos = dataline = 0;
- X num_backtracking = onesp = numprots = 0;
- X variable_trailing_context_rules = bol_needed = false;
- X
- X linenum = sectnum = 1;
- X firstprot = NIL;
- X
- X /* used in mkprot() so that the first proto goes in slot 1
- X * of the proto queue
- X */
- X lastprot = 1;
- X
- X if ( useecs )
- X {
- X /* set up doubly-linked equivalence classes */
- X ecgroup[1] = NIL;
- X
- X for ( i = 2; i <= CSIZE; ++i )
- X {
- X ecgroup[i] = i - 1;
- X nextecm[i - 1] = i;
- X }
- X
- X nextecm[CSIZE] = NIL;
- X }
- X
- X else
- X { /* put everything in its own equivalence class */
- X for ( i = 1; i <= CSIZE; ++i )
- X {
- X ecgroup[i] = i;
- X nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */
- X }
- X }
- X
- X set_up_initial_allocations();
- X }
- X
- X
- X/* readin - read in the rules section of the input file(s)
- X *
- X * synopsis
- X * readin();
- X */
- X
- Xreadin()
- X
- X {
- X if ( ddebug )
- X puts( "#define FLEX_DEBUG" );
- X
- X if ( fulltbl )
- X puts( "#define FLEX_FULL_TABLE" );
- X else if ( fullspd )
- X puts( "#define FLEX_FAST_COMPRESSED" );
- X else
- X puts( "#define FLEX_COMPRESSED" );
- X
- X skelout();
- X
- X line_directive_out( stdout );
- X
- X if ( yyparse() )
- X lerrif( "fatal parse error at line %d", linenum );
- X
- X if ( useecs )
- X {
- X numecs = cre8ecs( nextecm, ecgroup, CSIZE );
- X ccl2ecl();
- X }
- X
- X else
- X numecs = CSIZE;
- X
- X }
- X
- X
- X
- X/* set_up_initial_allocations - allocate memory for internal tables */
- X
- Xset_up_initial_allocations()
- X
- X {
- X current_mns = INITIAL_MNS;
- X firstst = allocate_integer_array( current_mns );
- X lastst = allocate_integer_array( current_mns );
- X finalst = allocate_integer_array( current_mns );
- X transchar = allocate_integer_array( current_mns );
- X trans1 = allocate_integer_array( current_mns );
- X trans2 = allocate_integer_array( current_mns );
- X accptnum = allocate_integer_array( current_mns );
- X assoc_rule = allocate_integer_array( current_mns );
- X state_type = allocate_integer_array( current_mns );
- X
- X current_max_rules = INITIAL_MAX_RULES;
- X rule_type = allocate_integer_array( current_max_rules );
- X rule_linenum = allocate_integer_array( current_max_rules );
- X
- X current_max_scs = INITIAL_MAX_SCS;
- X scset = allocate_integer_array( current_max_scs );
- X scbol = allocate_integer_array( current_max_scs );
- X scxclu = allocate_integer_array( current_max_scs );
- X sceof = allocate_integer_array( current_max_scs );
- X scname = allocate_char_ptr_array( current_max_scs );
- X actvsc = allocate_integer_array( current_max_scs );
- X
- X current_maxccls = INITIAL_MAX_CCLS;
- X cclmap = allocate_integer_array( current_maxccls );
- X ccllen = allocate_integer_array( current_maxccls );
- X cclng = allocate_integer_array( current_maxccls );
- X
- X current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE;
- X ccltbl = allocate_character_array( current_max_ccl_tbl_size );
- X
- X current_max_dfa_size = INITIAL_MAX_DFA_SIZE;
- X
- X current_max_xpairs = INITIAL_MAX_XPAIRS;
- X nxt = allocate_integer_array( current_max_xpairs );
- X chk = allocate_integer_array( current_max_xpairs );
- X
- X current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS;
- X tnxt = allocate_integer_array( current_max_template_xpairs );
- X
- X current_max_dfas = INITIAL_MAX_DFAS;
- X base = allocate_integer_array( current_max_dfas );
- X def = allocate_integer_array( current_max_dfas );
- X dfasiz = allocate_integer_array( current_max_dfas );
- X accsiz = allocate_integer_array( current_max_dfas );
- X dhash = allocate_integer_array( current_max_dfas );
- X dss = allocate_int_ptr_array( current_max_dfas );
- X dfaacc = allocate_dfaacc_union( current_max_dfas );
- X }
- END_OF_FILE
- if test 16556 -ne `wc -c <'flex/main.c'`; then
- echo shar: \"'flex/main.c'\" unpacked with wrong size!
- fi
- # end of 'flex/main.c'
- fi
- if test -f 'flex/nfa.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'flex/nfa.c'\"
- else
- echo shar: Extracting \"'flex/nfa.c'\" \(17293 characters\)
- sed "s/^X//" >'flex/nfa.c' <<'END_OF_FILE'
- X/* nfa - NFA construction routines */
- X
- X/*
- X * Copyright (c) 1989 The Regents of the University of California.
- X * All rights reserved.
- X *
- X * This code is derived from software contributed to Berkeley by
- X * Vern Paxson.
- X *
- X * The United States Government has rights in this work pursuant to
- X * contract no. DE-AC03-76SF00098 between the United States Department of
- X * Energy and the University of California.
- X *
- X * Redistribution and use in source and binary forms are permitted
- X * provided that the above copyright notice and this paragraph are
- X * duplicated in all such forms and that any documentation,
- X * advertising materials, and other materials related to such
- X * distribution and use acknowledge that the software was developed
- X * by the University of California, Berkeley. The name of the
- X * University may not be used to endorse or promote products derived
- X * from this software without specific prior written permission.
- X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
- X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- X */
- X
- X#ifndef lint
- X
- Xstatic char copyright[] =
- X "@(#) Copyright (c) 1989 The Regents of the University of California.\n";
- Xstatic char CR_continuation[] = "@(#) All rights reserved.\n";
- X
- Xstatic char rcsid[] =
- X "@(#) $Header: nfa.c,v 2.0 89/06/20 15:50:05 vern Locked $ (LBL)";
- X
- X#endif
- X
- X#include "flexdef.h"
- X
- X/* add_accept - add an accepting state to a machine
- X *
- X * synopsis
- X *
- X * add_accept( mach, accepting_number );
- X *
- X * accepting_number becomes mach's accepting number.
- X */
- X
- Xadd_accept( mach, accepting_number )
- Xint mach;
- X
- X {
- X /* hang the accepting number off an epsilon state. if it is associated
- X * with a state that has a non-epsilon out-transition, then the state
- X * will accept BEFORE it makes that transition, i.e., one character
- X * too soon
- X */
- X
- X if ( transchar[finalst[mach]] == SYM_EPSILON )
- X accptnum[finalst[mach]] = accepting_number;
- X
- X else
- X {
- X int astate = mkstate( SYM_EPSILON );
- X accptnum[astate] = accepting_number;
- X mach = link_machines( mach, astate );
- X }
- X }
- X
- X
- X/* copysingl - make a given number of copies of a singleton machine
- X *
- X * synopsis
- X *
- X * newsng = copysingl( singl, num );
- X *
- X * newsng - a new singleton composed of num copies of singl
- X * singl - a singleton machine
- X * num - the number of copies of singl to be present in newsng
- X */
- X
- Xint copysingl( singl, num )
- Xint singl, num;
- X
- X {
- X int copy, i;
- X
- X copy = mkstate( SYM_EPSILON );
- X
- X for ( i = 1; i <= num; ++i )
- X copy = link_machines( copy, dupmachine( singl ) );
- X
- X return ( copy );
- X }
- X
- X
- X/* dumpnfa - debugging routine to write out an nfa
- X *
- X * synopsis
- X * int state1;
- X * dumpnfa( state1 );
- X */
- X
- Xdumpnfa( state1 )
- Xint state1;
- X
- X {
- X int sym, tsp1, tsp2, anum, ns;
- X
- X fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n",
- X state1 );
- X
- X /* we probably should loop starting at firstst[state1] and going to
- X * lastst[state1], but they're not maintained properly when we "or"
- X * all of the rules together. So we use our knowledge that the machine
- X * starts at state 1 and ends at lastnfa.
- X */
- X
- X /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
- X for ( ns = 1; ns <= lastnfa; ++ns )
- X {
- X fprintf( stderr, "state # %4d\t", ns );
- X
- X sym = transchar[ns];
- X tsp1 = trans1[ns];
- X tsp2 = trans2[ns];
- X anum = accptnum[ns];
- X
- X fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 );
- X
- X if ( anum != NIL )
- X fprintf( stderr, " [%d]", anum );
- X
- X fprintf( stderr, "\n" );
- X }
- X
- X fprintf( stderr, "********** end of dump\n" );
- X }
- X
- X
- X/* dupmachine - make a duplicate of a given machine
- X *
- X * synopsis
- X *
- X * copy = dupmachine( mach );
- X *
- X * copy - holds duplicate of mach
- X * mach - machine to be duplicated
- X *
- X * note that the copy of mach is NOT an exact duplicate; rather, all the
- X * transition states values are adjusted so that the copy is self-contained,
- X * as the original should have been.
- X *
- X * also note that the original MUST be contiguous, with its low and high
- X * states accessible by the arrays firstst and lastst
- X */
- X
- Xint dupmachine( mach )
- Xint mach;
- X
- X {
- X int i, init, state_offset;
- X int state = 0;
- X int last = lastst[mach];
- X
- X for ( i = firstst[mach]; i <= last; ++i )
- X {
- X state = mkstate( transchar[i] );
- X
- X if ( trans1[i] != NO_TRANSITION )
- X {
- X mkxtion( finalst[state], trans1[i] + state - i );
- X
- X if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION )
- X mkxtion( finalst[state], trans2[i] + state - i );
- X }
- X
- X accptnum[state] = accptnum[i];
- X }
- X
- X if ( state == 0 )
- X flexfatal( "empty machine in dupmachine()" );
- X
- X state_offset = state - i + 1;
- X
- X init = mach + state_offset;
- X firstst[init] = firstst[mach] + state_offset;
- X finalst[init] = finalst[mach] + state_offset;
- X lastst[init] = lastst[mach] + state_offset;
- X
- X return ( init );
- X }
- X
- X/* finish_rule - finish up the processing for a rule
- X *
- X * synopsis
- X *
- X * finish_rule( mach, variable_trail_rule, headcnt, trailcnt );
- X *
- X * An accepting number is added to the given machine. If variable_trail_rule
- X * is true then the rule has trailing context and both the head and trail
- X * are variable size. Otherwise if headcnt or trailcnt is non-zero then
- X * the machine recognizes a pattern with trailing context and headcnt is
- X * the number of characters in the matched part of the pattern, or zero
- X * if the matched part has variable length. trailcnt is the number of
- X * trailing context characters in the pattern, or zero if the trailing
- X * context has variable length.
- X */
- X
- Xfinish_rule( mach, variable_trail_rule, headcnt, trailcnt )
- Xint mach, variable_trail_rule, headcnt, trailcnt;
- X
- X {
- X add_accept( mach, num_rules );
- X
- X /* we did this in new_rule(), but it often gets the wrong
- X * number because we do it before we start parsing the current rule
- X */
- X rule_linenum[num_rules] = linenum;
- X
- X fprintf( temp_action_file, "case %d:\n", num_rules );
- X
- X if ( variable_trail_rule )
- X {
- X rule_type[num_rules] = RULE_VARIABLE;
- X
- X if ( performance_report )
- X fprintf( stderr, "Variable trailing context rule at line %d\n",
- X rule_linenum[num_rules] );
- X
- X variable_trailing_context_rules = true;
- X }
- X
- X else
- X {
- X rule_type[num_rules] = RULE_NORMAL;
- X
- X if ( headcnt > 0 || trailcnt > 0 )
- X {
- X /* do trailing context magic to not match the trailing characters */
- X char *scanner_cp = "yy_c_buf_p = yy_cp";
- X char *scanner_bp = "yy_bp";
- X
- X fprintf( temp_action_file,
- X "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" );
- X
- X if ( headcnt > 0 )
- X {
- X if ( headcnt > 0 )
- X fprintf( temp_action_file, "%s = %s + %d;\n",
- X scanner_cp, scanner_bp, headcnt );
- X
- X else
- X fprintf( temp_action_file, "%s = %s;\n",
- X scanner_cp, scanner_bp );
- X }
- X
- X else
- X fprintf( temp_action_file,
- X "%s -= %d;\n", scanner_cp, trailcnt );
- X
- X fprintf( temp_action_file,
- X "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" );
- X }
- X }
- X
- X line_directive_out( temp_action_file );
- X }
- X
- X
- X/* link_machines - connect two machines together
- X *
- X * synopsis
- X *
- X * new = link_machines( first, last );
- X *
- X * new - a machine constructed by connecting first to last
- X * first - the machine whose successor is to be last
- X * last - the machine whose predecessor is to be first
- X *
- X * note: this routine concatenates the machine first with the machine
- X * last to produce a machine new which will pattern-match first first
- X * and then last, and will fail if either of the sub-patterns fails.
- X * FIRST is set to new by the operation. last is unmolested.
- X */
- X
- Xint link_machines( first, last )
- Xint first, last;
- X
- X {
- X if ( first == NIL )
- X return ( last );
- X
- X else if ( last == NIL )
- X return ( first );
- X
- X else
- X {
- X mkxtion( finalst[first], last );
- X finalst[first] = finalst[last];
- X lastst[first] = max( lastst[first], lastst[last] );
- X firstst[first] = min( firstst[first], firstst[last] );
- X
- X return ( first );
- X }
- X }
- X
- X
- X/* mark_beginning_as_normal - mark each "beginning" state in a machine
- X * as being a "normal" (i.e., not trailing context-
- X * associated) states
- X *
- X * synopsis
- X *
- X * mark_beginning_as_normal( mach )
- X *
- X * mach - machine to mark
- X *
- X * The "beginning" states are the epsilon closure of the first state
- X */
- X
- Xmark_beginning_as_normal( mach )
- Xregister int mach;
- X
- X {
- X switch ( state_type[mach] )
- X {
- X case STATE_NORMAL:
- X /* oh, we've already visited here */
- X return;
- X
- X case STATE_TRAILING_CONTEXT:
- X state_type[mach] = STATE_NORMAL;
- X
- X if ( transchar[mach] == SYM_EPSILON )
- X {
- X if ( trans1[mach] != NO_TRANSITION )
- X mark_beginning_as_normal( trans1[mach] );
- X
- X if ( trans2[mach] != NO_TRANSITION )
- X mark_beginning_as_normal( trans2[mach] );
- X }
- X break;
- X
- X default:
- X flexerror( "bad state type in mark_beginning_as_normal()" );
- X break;
- X }
- X }
- X
- X
- X/* mkbranch - make a machine that branches to two machines
- X *
- X * synopsis
- X *
- X * branch = mkbranch( first, second );
- X *
- X * branch - a machine which matches either first's pattern or second's
- X * first, second - machines whose patterns are to be or'ed (the | operator)
- X *
- X * note that first and second are NEITHER destroyed by the operation. Also,
- X * the resulting machine CANNOT be used with any other "mk" operation except
- X * more mkbranch's. Compare with mkor()
- X */
- X
- Xint mkbranch( first, second )
- Xint first, second;
- X
- X {
- X int eps;
- X
- X if ( first == NO_TRANSITION )
- X return ( second );
- X
- X else if ( second == NO_TRANSITION )
- X return ( first );
- X
- X eps = mkstate( SYM_EPSILON );
- X
- X mkxtion( eps, first );
- X mkxtion( eps, second );
- X
- X return ( eps );
- X }
- X
- X
- X/* mkclos - convert a machine into a closure
- X *
- X * synopsis
- X * new = mkclos( state );
- X *
- X * new - a new state which matches the closure of "state"
- X */
- X
- Xint mkclos( state )
- Xint state;
- X
- X {
- X return ( mkopt( mkposcl( state ) ) );
- X }
- X
- X
- X/* mkopt - make a machine optional
- X *
- X * synopsis
- X *
- X * new = mkopt( mach );
- X *
- X * new - a machine which optionally matches whatever mach matched
- X * mach - the machine to make optional
- X *
- X * notes:
- X * 1. mach must be the last machine created
- X * 2. mach is destroyed by the call
- X */
- X
- Xint mkopt( mach )
- Xint mach;
- X
- X {
- X int eps;
- X
- X if ( ! SUPER_FREE_EPSILON(finalst[mach]) )
- X {
- X eps = mkstate( SYM_EPSILON );
- X mach = link_machines( mach, eps );
- X }
- X
- X /* can't skimp on the following if FREE_EPSILON(mach) is true because
- X * some state interior to "mach" might point back to the beginning
- X * for a closure
- X */
- X eps = mkstate( SYM_EPSILON );
- X mach = link_machines( eps, mach );
- X
- X mkxtion( mach, finalst[mach] );
- X
- X return ( mach );
- X }
- X
- X
- X/* mkor - make a machine that matches either one of two machines
- X *
- X * synopsis
- X *
- X * new = mkor( first, second );
- X *
- X * new - a machine which matches either first's pattern or second's
- X * first, second - machines whose patterns are to be or'ed (the | operator)
- X *
- X * note that first and second are both destroyed by the operation
- X * the code is rather convoluted because an attempt is made to minimize
- X * the number of epsilon states needed
- X */
- X
- Xint mkor( first, second )
- Xint first, second;
- X
- X {
- X int eps, orend;
- X
- X if ( first == NIL )
- X return ( second );
- X
- X else if ( second == NIL )
- X return ( first );
- X
- X else
- X {
- X /* see comment in mkopt() about why we can't use the first state
- X * of "first" or "second" if they satisfy "FREE_EPSILON"
- X */
- X eps = mkstate( SYM_EPSILON );
- X
- X first = link_machines( eps, first );
- X
- X mkxtion( first, second );
- X
- X if ( SUPER_FREE_EPSILON(finalst[first]) &&
- X accptnum[finalst[first]] == NIL )
- X {
- X orend = finalst[first];
- X mkxtion( finalst[second], orend );
- X }
- X
- X else if ( SUPER_FREE_EPSILON(finalst[second]) &&
- X accptnum[finalst[second]] == NIL )
- X {
- X orend = finalst[second];
- X mkxtion( finalst[first], orend );
- X }
- X
- X else
- X {
- X eps = mkstate( SYM_EPSILON );
- X
- X first = link_machines( first, eps );
- X orend = finalst[first];
- X
- X mkxtion( finalst[second], orend );
- X }
- X }
- X
- X finalst[first] = orend;
- X return ( first );
- X }
- X
- X
- X/* mkposcl - convert a machine into a positive closure
- X *
- X * synopsis
- X * new = mkposcl( state );
- X *
- X * new - a machine matching the positive closure of "state"
- X */
- X
- Xint mkposcl( state )
- Xint state;
- X
- X {
- X int eps;
- X
- X if ( SUPER_FREE_EPSILON(finalst[state]) )
- X {
- X mkxtion( finalst[state], state );
- X return ( state );
- X }
- X
- X else
- X {
- X eps = mkstate( SYM_EPSILON );
- X mkxtion( eps, state );
- X return ( link_machines( state, eps ) );
- X }
- X }
- X
- X
- X/* mkrep - make a replicated machine
- X *
- X * synopsis
- X * new = mkrep( mach, lb, ub );
- X *
- X * new - a machine that matches whatever "mach" matched from "lb"
- X * number of times to "ub" number of times
- X *
- X * note
- X * if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach"
- X */
- X
- Xint mkrep( mach, lb, ub )
- Xint mach, lb, ub;
- X
- X {
- X int base_mach, tail, copy, i;
- X
- X base_mach = copysingl( mach, lb - 1 );
- X
- X if ( ub == INFINITY )
- X {
- X copy = dupmachine( mach );
- X mach = link_machines( mach,
- X link_machines( base_mach, mkclos( copy ) ) );
- X }
- X
- X else
- X {
- X tail = mkstate( SYM_EPSILON );
- X
- X for ( i = lb; i < ub; ++i )
- X {
- X copy = dupmachine( mach );
- X tail = mkopt( link_machines( copy, tail ) );
- X }
- X
- X mach = link_machines( mach, link_machines( base_mach, tail ) );
- X }
- X
- X return ( mach );
- X }
- X
- X
- X/* mkstate - create a state with a transition on a given symbol
- X *
- X * synopsis
- X *
- X * state = mkstate( sym );
- X *
- X * state - a new state matching sym
- X * sym - the symbol the new state is to have an out-transition on
- X *
- X * note that this routine makes new states in ascending order through the
- X * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
- X * relies on machines being made in ascending order and that they are
- X * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
- X * that it admittedly is)
- X */
- X
- Xint mkstate( sym )
- Xint sym;
- X
- X {
- X if ( ++lastnfa >= current_mns )
- X {
- X if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS )
- X lerrif( "input rules are too complicated (>= %d NFA states)",
- X current_mns );
- X
- X ++num_reallocs;
- X
- X firstst = reallocate_integer_array( firstst, current_mns );
- X lastst = reallocate_integer_array( lastst, current_mns );
- X finalst = reallocate_integer_array( finalst, current_mns );
- X transchar = reallocate_integer_array( transchar, current_mns );
- X trans1 = reallocate_integer_array( trans1, current_mns );
- X trans2 = reallocate_integer_array( trans2, current_mns );
- X accptnum = reallocate_integer_array( accptnum, current_mns );
- X assoc_rule = reallocate_integer_array( assoc_rule, current_mns );
- X state_type = reallocate_integer_array( state_type, current_mns );
- X }
- X
- X firstst[lastnfa] = lastnfa;
- X finalst[lastnfa] = lastnfa;
- X lastst[lastnfa] = lastnfa;
- X transchar[lastnfa] = sym;
- X trans1[lastnfa] = NO_TRANSITION;
- X trans2[lastnfa] = NO_TRANSITION;
- X accptnum[lastnfa] = NIL;
- X assoc_rule[lastnfa] = num_rules;
- X state_type[lastnfa] = current_state_type;
- X
- X /* fix up equivalence classes base on this transition. Note that any
- X * character which has its own transition gets its own equivalence class.
- X * Thus only characters which are only in character classes have a chance
- X * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b'
- X * into two different equivalence classes. "[ab]" puts them in the same
- X * equivalence class (barring other differences elsewhere in the input).
- X */
- X
- X if ( sym < 0 )
- X {
- X /* we don't have to update the equivalence classes since that was
- X * already done when the ccl was created for the first time
- X */
- X }
- X
- X else if ( sym == SYM_EPSILON )
- X ++numeps;
- X
- X else
- X {
- X if ( useecs )
- X mkechar( sym, nextecm, ecgroup );
- X }
- X
- X return ( lastnfa );
- X }
- X
- X
- X/* mkxtion - make a transition from one state to another
- X *
- X * synopsis
- X *
- X * mkxtion( statefrom, stateto );
- X *
- X * statefrom - the state from which the transition is to be made
- X * stateto - the state to which the transition is to be made
- X */
- X
- Xmkxtion( statefrom, stateto )
- Xint statefrom, stateto;
- X
- X {
- X if ( trans1[statefrom] == NO_TRANSITION )
- X trans1[statefrom] = stateto;
- X
- X else if ( (transchar[statefrom] != SYM_EPSILON) ||
- X (trans2[statefrom] != NO_TRANSITION) )
- X flexfatal( "found too many transitions in mkxtion()" );
- X
- X else
- X { /* second out-transition for an epsilon state */
- X ++eps2;
- X trans2[statefrom] = stateto;
- X }
- X }
- X
- X/* new_rule - initialize for a new rule
- X *
- X * synopsis
- X *
- X * new_rule();
- X *
- X * the global num_rules is incremented and the any corresponding dynamic
- X * arrays (such as rule_type[]) are grown as needed.
- X */
- X
- Xnew_rule()
- X
- X {
- X if ( ++num_rules >= current_max_rules )
- X {
- X ++num_reallocs;
- X current_max_rules += MAX_RULES_INCREMENT;
- X rule_type = reallocate_integer_array( rule_type, current_max_rules );
- X rule_linenum =
- X reallocate_integer_array( rule_linenum, current_max_rules );
- X }
- X
- X if ( num_rules > MAX_RULE )
- X lerrif( "too many rules (> %d)!", MAX_RULE );
- X
- X rule_linenum[num_rules] = linenum;
- X }
- END_OF_FILE
- if test 17293 -ne `wc -c <'flex/nfa.c'`; then
- echo shar: \"'flex/nfa.c'\" unpacked with wrong size!
- fi
- # end of 'flex/nfa.c'
- fi
- echo shar: End of archive 3 \(of 7\).
- cp /dev/null ark3isdone
- MISSING=""
- for I in 1 2 3 4 5 6 7 ; do
- if test ! -f ark${I}isdone ; then
- MISSING="${MISSING} ${I}"
- fi
- done
- if test "${MISSING}" = "" ; then
- echo You have unpacked all 7 archives.
- rm -f ark[1-9]isdone
- else
- echo You still need to unpack the following archives:
- echo " " ${MISSING}
- fi
- ## End of shell archive.
- exit 0
-