home *** CD-ROM | disk | FTP | other *** search
Text File | 1994-05-29 | 75.7 KB | 2,924 lines |
- Newsgroups: comp.sources.unix
- From: wendt@cs.colostate.edu (alan l wendt)
- Subject: v28i044: grok - match regular expressions and correct errors, V1.0, Part01/01
- Message-id: <1.770288657.8654@gw.home.vix.com>
- Sender: unix-sources-moderator@gw.home.vix.com
- Approved: vixie@gw.home.vix.com
-
- Submitted-By: wendt@cs.colostate.edu (alan l wendt)
- Posting-Number: Volume 28, Issue 44
- Archive-Name: grok/part01
-
- Grok (get regular expression and make OK) is like grep except that it
- corrects its input to instances of the regular expression, using a
- minimum-cost correction. Grok uses flex code. Its only argument is a
- filename containing character-to-character correction costs, a maximum
- acceptable cost for the string, and a regular expression.
-
- The code has been tested on 286 Xenix 2.3.2 (large model) and DEC 3100
- Ultrix 4.2.
-
- This is version 1.0.
-
- Contributors: Alan Wendt, Vern Paxson, Eugene Myers
-
- # This is a shell archive.
- # Remove everything above and including the cut line.
- # Then run the rest of the file through sh.
- #----cut here-----cut here-----cut here-----cut here----#
- #!/bin/sh
- # shar: Shell Archiver
- # Run the following text with /bin/sh to create:
- # Makefile
- # README
- # ccl.c
- # flexdef.h
- # grok.1
- # misc.c
- # ncnb.def
- # nfa.c
- # parse.h
- # parse.y
- # scan.l
- # sym.c
- # test.in
- # test.out
- # This archive created: Tue Nov 17 09:30:20 1992
- cat << \SHAR_EOF > Makefile
-
- #
- # DEC 3100 Ultrix
- #
- LDFLAGS =
- CFLAGS =
-
- #
- # Xenix 286 2.3.2 large model
- #
- #LDFLAGS = -M2l -g -F 4000
- #CFLAGS = -I$I -DUNIX=1 -LARGE -DUSG=1 -DSV
-
- .c.o: ; -$(CC) $(LDFLAGS) $(CFLAGS) -c $*.c
-
- # correction
-
- scan.c: scan.l
- flex -I scan.l
- mv lex.yy.c scan.c
-
- REPAIRFILES = nfa.o ccl.o parse.o sym.o misc.o scan.o
-
- parse.h parse.c : parse.y
- yacc -d parse.y
- @mv y.tab.c parse.c
- @mv y.tab.h parse.h
-
- grok: $(REPAIRFILES)
- cc $(LDFLAGS) -o grok $(REPAIRFILES)
-
- clean:
- rm *.o *.log scan.c parse.c
-
- test: grok
- grok ncnb.def < test.in > test.out.tmp
- diff test.out test.out.tmp
- SHAR_EOF
- cat << \SHAR_EOF > README
-
- Grok (get regular expression and make OK) is like grep except that it
- corrects its input to instances of the regular expression, using a
- minimum-cost correction. Grok uses flex code. Its only argument is
- a filename containing character-to-character correction costs, a
- maximum acceptable cost for the string, and a regular expression.
-
- The code has been tested on 286 Xenix 2.3.2 (large model)
- and DEC 3100 Ultrix 4.2.
-
- Contributors: Alan Wendt, Vern Paxson, Eugene Myers
- SHAR_EOF
- cat << \SHAR_EOF > ccl.c
- /* ccl - routines for character classes */
-
- /*
- * Copyright (c) 1987, the University of California
- *
- * The United States Government has rights in this work pursuant to
- * contract no. DE-AC03-76SF00098 between the United States Department of
- * Energy and the University of California.
- *
- * This program may be redistributed. Enhancements and derivative works
- * may be created provided the new works, if made available to the general
- * public, are made available for use by anyone.
- */
-
- #include "flexdef.h"
-
- /* ccladd - add a single character to a ccl
- *
- * synopsis
- * int cclp;
- * char ch;
- * ccladd( cclp, ch );
- */
-
- ccltest( cclp, ch )
- struct Ccl *cclp;
- char ch;
- {
- /* SUPPRESS 53 */
- return cclp->c[(ch & (CSIZE - 1)) >> 3] & (1 << (ch & 7));
- }
-
- ccladd( cclp, ch )
- struct Ccl *cclp;
- char ch;
- {
- /* SUPPRESS 53 */
- cclp->c[(ch & (CSIZE - 1)) >> 3] |= 1 << (ch & 7);
- }
-
-
- /* cclinit - make an empty ccl
- *
- * synopsis
- * int cclinit();
- * cclinit( &newccl );
- */
-
- void cclinit( ccl )
- struct Ccl *ccl;
- {
- bzero(ccl->c, sizeof(*ccl));
- }
-
-
- /* cclnegate - negate a ccl
- *
- * synopsis
- * int cclp;
- * cclnegate( ccl );
- */
-
- cclnegate( cclp )
- struct Ccl *cclp;
- {
- int i;
- for (i = 0; i < sizeof(*cclp); i++)
- {
- cclp->c[i] ^= -1;
- }
- }
-
-
-
- /* BUGGY IF CCL IS EMPTY!!! */
- int ccl2nfa( cclp )
- struct Ccl *cclp;
- {
- int i, j;
- int result = 0;
-
- for (i = 0; i < CSIZE; i++)
- {
- j = cclp->c[i >> 3];
- if (j & (1 << (i & 7)))
- {
- if (result == 0)
- {
- result = mkstate( i );
- }
- else
- {
- result = mkor( result, mkstate( i ) );
- }
- }
- }
- return result;
- }
- SHAR_EOF
- cat << \SHAR_EOF > flexdef.h
- #include <stdio.h>
-
- #ifdef SV
- #include <string.h>
- #define bzero(s, n) memset((char *)(s), '\000', (unsigned)(n))
- #else
- #include <strings.h>
- #endif
-
- char *sprintf(); /* keep lint happy */
-
- #define min(x,y) (x < y ? x : y)
- #define max(x,y) (x > y ? x : y)
-
- #define true 1
- #define false 0
-
- /* NIL must be 0. If not, its special meaning when making equivalence classes
- * (it marks the representative of a given e.c.) will be unidentifiable
- */
- #define NIL 0
-
- #define NO_TRANSITION NIL
- #define INFINITY -1 /* for x{5,} constructions */
-
- /* variables used in the flex input routines:
- * datapos - characters on current output line
- * dataline - number of contiguous lines of data in current data
- * statement. Used to generate readable -f output
- * skelfile - fd of the skeleton file
- * yyin - input file
- * temp_action_file - temporary file to hold actions
- * action_file_name - name of the temporary file
- * infilename - name of input file
- * linenum - current input line number
- */
-
- extern int linenum;
-
- /* size of input alphabet - should be size of ASCII set */
- #define CSIZE 128
-
- struct Ccl
- {
- char c[CSIZE/8];
- };
-
-
- /* variables for flags:
- * syntaxerror - true if a syntax error has been found
- */
-
- extern int syntaxerror;
-
- #define SYM_EPSILON 0 /* to mark transitions on the symbol epsilon */
-
- /* variables for nfa machine data:
- * current_mns - current maximum on number of NFA states
- * firstst - first physical state of a fragment
- * lastst - last physical state of fragment
- * finalst - last logical state of fragment
- * transchar - transition character that got us into this state
- * accptnum - accepting number
- * lastnfa - last nfa state number created
- * edgeptr - pointer to first Edge out of this state
- * first logical state of a fragment is its machine number
- */
-
- #define MAXIMUM_MNS 30000
-
- struct state {
- struct Edge *edgeptr;
- int firstst;
- int lastst;
- int finalst;
- int ninedges;
- int noutedges;
- int temp;
- unsigned char transchar;
- } *states[15000];
-
- extern int current_mns;
- extern int lastnfa, firstnfa;
-
-
- extern int ccl2nfa();
-
- #if 0
- /*
- * transfrom - from edge state number
- * transto - to state number
- */
- #define INITIAL_MNE 2000 /* default maximum number of nfa edges */
- #define MNE_INCREMENT 1000 /* amount to bump above by if it's not enough */
- #define MAXIMUM_MNE 31999
- extern int current_mne; /* current max # of edges */
- extern int current_ne; /* current # of edges */
- #endif
-
- struct Edge {
- struct Edge *next, *prior; /* links all edges from a given state */
- int from, to; /* from & to state numbers */
- };
-
- /*
- * num_reallocs - number of times it was necessary to realloc() a group
- * of arrays
- */
- extern int num_reallocs;
-
- extern char *malloc(), *realloc(), *sbrk();
-
- #define allocate_integer_array(size) \
- (int *) allocate_array( size, sizeof( int ) )
-
- #define reallocate_integer_array(array,size) \
- (int *) reallocate_array( (char *) array, size, sizeof( int ) )
- char *allocate_array(), *reallocate_array();
-
- struct hash_entry
- {
- struct hash_entry *prev, *next;
- char *name;
- char *str_val;
- int int_val;
- } ;
-
- typedef struct hash_entry *hash_table[];
-
- #define NAME_TABLE_HASH_SIZE 101
- #define START_COND_HASH_SIZE 101
-
- /* maximum line length we'll have to deal with */
- #define MAXLINE BUFSIZ
-
- #define allocate_character_array(size) allocate_array( size, sizeof( char ) )
-
- #define reallocate_character_array(array,size) \
- reallocate_array( array, size, sizeof( char ) )
- /*
- * nmstr - last NAME scanned by the scanner
- */
-
- extern char nmstr[MAXLINE];
-
- /* used to communicate between scanner and parser. The type should really
- * be YYSTYPE, but we can't easily get our hands on it.
- */
- extern int yylval;
-
- char delete_cost[128];
- char insert_cost[128];
- char correct_cost[128][128];
- int tolerance; /* maximum tolerable error */
- SHAR_EOF
- cat << \SHAR_EOF > grok.1
- .\"
- .\" @(#)grok.1 6.5 (CSU) 10/11/91
- .\"
-
- .TH GROK 1 "November 11, 1991
- .SH NAME
- grok \- Match and repair errors in regular expressions
- .SH SYNOPSIS
- .in +\w'\fBgrok \fR'u
- .ti -\w'\fBgrok \fR'u
- \fBgrok \fR \fIdefinition-file\fR
-
- .SH DESCRIPTION
- .I Grok
- matches instances of \fIlex\fR-style regular expressions from the input
- and prints them.
- In addition, it tries to repair input lines to make them instances of the
- regular expression, using single-character insertions, deletions, and
- substitutions.
- Different single-character edits can be assigned different costs,
- and \fIgrok\fR will find a repair of lowest total cost.
-
- The algorithm used to find lowest-cost repairs takes time proportional
- to the product of the length of the input and the cost of the repair.
- A maximum repair cost tolerance must be supplied.
- \fIgrok\fR ignores input lines that cannot be repaired within the tolerance.
- The match is \fIanchored\fR, unlike the matching performed by \fIgrep\fR.
-
- The \fIdefinition-file\fR contains entries to define character
- repair costs, followed by a pair of percent signs alone on a line,
- followed by a single regular expression. For example, the following
- defines repair for lines scanned from a bank statement:
-
- .ne 4
- .nf
- .ft L
- % correct anything to anything else for 3
- %correct . . 3
-
- % a number of common numeric misreads. 4 is often read as (+
- % but we cannot correct string to string.
- %correct "Z" "2" 1
- %correct "z" "2" 1
- %correct "S" "5" 1
- %correct "s" "5" 1
- %correct "l" "1" 1
- %correct "i" "1" 1
- %correct "I" "1" 1
- %correct "o" "0" 1
- %correct "O" "0" 1
- %correct "r" "5" 1
- %correct "?" "9" 1
- %correct "+" "4" 1
-
- % default insertion cost is 2
- %insert . 2
-
- % charge extra for numbers to avoid hallucinating numbers.
- %insert [0-9] 3
-
- % delete anything for 2. correction cost <= insert + delete.
- %delete . 2
- %delete [0-9] 3
-
- % the maximum tolerable error cost
- %tolerance 20
- %%
- ([ ]{20,30}[+ ]([ ]{5,7}[0-9]{5}|"NO ITEM # ")[ ]{3,4}[0-9]{9}
- [ ]{5,7}[0-1][0-9]-[0-3][0-9][ ]{6,13}(""|[1-9][0-9]{0,2}|[1-9]
- [0-9]{0,2}","[0-9]{3})\.[0-9]{2}([ ]{5,10}[+ ]([ ]{5,7}[0-9]{5}|
- "NO ITEM # ")[ ]{3,4}[0-9]{9}[ ]{5,7}[0-1][0-9]-[0-3][0-9][ ]{6,13}
- (""|[1-9][0-9]{0,2}|[1-9][0-9]{0,2}","[0-9]{3})\.[0-9]{2})?)|
- (" ENCLOSED NUMBER NUMBER".*)
-
- .ft R
- .fi
-
- \fB%correct\fR, \fB%insert\fR, and \fB%delete\fR lines supply repair costs
- to replace each character with each other, as well as for insertions
- and deletions. Single-character regular expressions are accepted,
- and later costs overwrite earlier ones.
-
- The regular expression has been broken into several lines for display.
- It defines the expected
- structure of important lines on one style of bank statement.
-
- \fBGrok\fR outputs the repaired lines.
-
- .SH BUGS
- No provision is made for breaking long defining expressions across lines.
- The NFA optimization is too slow.
-
- .SH AUTHOR
- Alan Wendt. Contributors include Eugene Myers (repair algorithm), Vern Paxson (\fBflex\fR code).
- SHAR_EOF
- cat << \SHAR_EOF > misc.c
- /* misc - miscellaneous flex routines */
-
- /*
- * Copyright (c) 1987, the University of California
- *
- * The United States Government has rights in this work pursuant to
- * contract no. DE-AC03-76SF00098 between the United States Department of
- * Energy and the University of California.
- *
- * This program may be redistributed. Enhancements and derivative works
- * may be created provided the new works, if made available to the general
- * public, are made available for use by anyone.
- */
-
- #include <ctype.h>
- #include "flexdef.h"
-
- char *malloc(), *realloc();
-
-
- /* allocate_array - allocate memory for an integer array of the given size */
-
- char *allocate_array( size, element_size )
- int size, element_size;
-
- {
- register char *mem = malloc( (unsigned) (element_size * size) );
-
- if ( mem == NULL )
- flexfatal( "memory allocation failed in allocate_array()" );
-
- return ( mem );
- }
-
-
- /* clower - replace upper-case letter to lower-case
- *
- * synopsis:
- * char clower(), c;
- * c = clower( c );
- */
-
- char clower( c )
- register char c;
-
- {
- return ( isupper(c) ? tolower(c) : c );
- }
-
-
- /* copy_string - returns a dynamically allocated copy of a string
- *
- * synopsis
- * char *str, *copy, *copy_string();
- * copy = copy_string( str );
- */
-
- char *copy_string( str )
- register char *str;
-
- {
- register char *c;
- char *copy;
-
- /* find length */
- for ( c = str; *c; ++c )
- continue;
-
- copy = malloc( (unsigned) ((c - str + 1) * sizeof( char )) );
-
- if ( copy == NULL )
- flexfatal( "dynamic memory failure in copy_string()" );
-
- /* SUPPRESS 560 */
- for ( c = copy; (*c++ = *str++); )
- continue;
-
- return ( copy );
- }
-
-
- /* lerrif - report an error message formatted with one integer argument
- *
- * synopsis
- * char msg[];
- * int arg;
- * lerrif( msg, arg );
- */
-
- lerrif( msg, arg )
- char msg[];
- int arg;
-
- {
- char errmsg[MAXLINE];
- (void) sprintf( errmsg, msg, arg );
- flexerror( errmsg );
- }
-
-
- /* lerrsf - report an error message formatted with one string argument
- *
- * synopsis
- * char msg[], arg[];
- * lerrsf( msg, arg );
- */
-
- lerrsf( msg, arg )
- char msg[], arg[];
-
- {
- char errmsg[MAXLINE];
-
- (void) sprintf( errmsg, msg, arg );
- flexerror( errmsg );
- }
-
-
- /* flexerror - report an error message and terminate
- *
- * synopsis
- * char msg[];
- * flexerror( msg );
- */
-
- flexerror( msg )
- char msg[];
-
- {
- fprintf( stderr, "flex: %s\n", msg );
- exit( 1 );
- }
-
-
- /* flexfatal - report a fatal error message and terminate
- *
- * synopsis
- * char msg[];
- * flexfatal( msg );
- */
-
- flexfatal( msg )
- char msg[];
-
- {
- fprintf( stderr, "flex: fatal internal error %s\n", msg );
- exit( 1 );
- }
-
-
-
-
-
-
- /* myctoi - return the integer represented by a string of digits
- *
- * synopsis
- * char array[];
- * int val, myctoi();
- * val = myctoi( array );
- *
- */
-
- int myctoi( array )
- char array[];
-
- {
- int val = 0;
-
- (void) sscanf( array, "%d", &val );
-
- return ( val );
- }
-
-
- /* myesc - return character corresponding to escape sequence
- *
- * synopsis
- * char array[], c, myesc();
- * c = myesc( array );
- *
- */
-
- char myesc( array )
- char array[];
-
- {
- switch ( array[1] )
- {
- case 'n': return ( '\n' );
- case 't': return ( '\t' );
- case 'f': return ( '\f' );
- case 'r': return ( '\r' );
- case 'b': return ( '\b' );
-
- case '0':
- if ( isdigit(array[2]) )
- { /* \0<octal> */
- char c, esc_char;
- register int sptr = 2;
-
- while ( isdigit(array[sptr]) )
- /* don't increment inside loop control because the
- * macro will expand it to two increments! (Not a
- * problem with the C version of the macro)
- */
- ++sptr;
-
- c = array[sptr];
- array[sptr] = '\0';
-
- esc_char = otoi( array + 2 );
- array[sptr] = c;
-
- if ( esc_char == '\0' )
- {
- synerr( "escape sequence for null not allowed" );
- return ( 1 );
- }
-
- return ( esc_char );
- }
-
- else
- {
- synerr( "escape sequence for null not allowed" );
- return ( 1 );
- }
-
- #ifdef NOTDEF
- case '^':
- {
- register char next_char = array[2];
-
- if ( next_char == '?' )
- return ( 0x7f );
-
- else if ( next_char >= 'A' && next_char <= 'Z' )
- return ( next_char - 'A' + 1 );
-
- else if ( next_char >= 'a' && next_char <= 'z' )
- return ( next_char - 'z' + 1 );
-
- synerr( "illegal \\^ escape sequence" );
-
- return ( 1 );
- }
- #endif
- }
-
- return ( array[1] );
- }
-
-
- /* otoi - convert an octal digit string to an integer value
- *
- * synopsis:
- * int val, otoi();
- * char str[];
- * val = otoi( str );
- */
-
- int otoi( str )
- char str[];
-
- {
- #ifdef FTLSOURCE
- fortran int gctoi()
- int dummy = 1;
-
- return ( gctoi( str, dummy, 8 ) );
- #else
- int result;
-
- (void) sscanf( str, "%o", &result );
-
- return ( result );
- #endif
- }
-
-
-
- #if 0
- /* reallocate_array - increase the size of a dynamic array */
-
- char *reallocate_array( array, size, element_size )
- char *array;
- int size, element_size;
-
- {
- register char *new_array = realloc( array,
- (unsigned) (size * element_size ));
-
- if ( new_array == NULL )
- flexfatal( "attempt to increase array size failed" );
-
- return ( new_array );
- }
-
-
- #endif
- SHAR_EOF
- cat << \SHAR_EOF > ncnb.def
- % correct anything to anything else for 3
- %correct . . 3
-
- % a number of common numeric misreads. 4 is often read as (+
- % but we cannot correct string to string.
- %correct "Z" "2" 1
- %correct "z" "2" 1
- %correct "S" "5" 1
- %correct "s" "5" 1
- %correct "l" "1" 1
- %correct "i" "1" 1
- %correct "I" "1" 1
- %correct "o" "0" 1
- %correct "O" "0" 1
- %correct "r" "5" 1
- %correct "?" "9" 1
- %correct "+" "4" 1
-
- % default insertion cost is 2
- %insert . 2
-
- % charge extra for numbers to avoid hallucinations.
- %insert [0-9] 3
-
- % delete anything for 2. correct cost <= insert + delete.
- %delete . 2
- %delete [0-9] 3
-
- % the maximum tolerable error cost
- %tolerance 20
- %%
- ([ ]{20,30}[+ ]([ ]{5,7}[0-9]{5}|"NO ITEM # ")[ ]{3,4}[0-9]{9}[ ]{5,7}[0-1][0-9]-[0-3][0-9][ ]{6,13}(""|[1-9][0-9]{0,2}|[1-9][0-9]{0,2}","[0-9]{3})\.[0-9]{2}([ ]{5,10}[+ ]([ ]{5,7}[0-9]{5}|"NO ITEM # ")[ ]{3,4}[0-9]{9}[ ]{5,7}[0-1][0-9]-[0-3][0-9][ ]{6,13}(""|[1-9][0-9]{0,2}|[1-9][0-9]{0,2}","[0-9]{3})\.[0-9]{2})?)|(" ENCLOSED NUMBER NUMBER".*)
- SHAR_EOF
- cat << \SHAR_EOF > nfa.c
- /* nfa - NFA construction routines */
-
- /* Construct a state-labelled epsilon nfa for a regular expression.
- * Then read input and output a word from the language defined
- * by the nfa that is as close as possible to the input.
- */
-
- /*
- * Copyright (c) 1987, the University of California
- *
- * The United States Government has rights in this work pursuant to
- * contract no. DE-AC03-76SF00098 between the United States Department of
- * Energy and the University of California.
- *
- * This program may be redistributed. Enhancements and derivative works
- * may be created provided the new works, if made available to the general
- * public, are made available for use by anyone.
- */
-
- /* Changes copyright (c) 1991, Alan Wendt. This work is hereby made
- * available to the general public, for use by anyone. The changes
- * may be redistributed only if this notice is removed and the
- * "Lumberjack Song" substituted in its place, and you must also cut
- * down the largest tree in the forest with a herring.
- */
-
-
- #include "flexdef.h"
-
- #define FIRSTST(x) states[x]->firstst
- #define LASTST(x) states[x]->lastst
- #define FINALST(x) states[x]->finalst
- #define EDGEPTR(x) states[x]->edgeptr
- #define NOUTEDGES(x) states[x]->noutedges
- #define ACCPTNUM(x) ((x) == finalst)
- #define NINEDGES(x) states[x]->ninedges
- #define TRANSCHAR(x) states[x]->transchar
- #define TEMP(x) states[x]->temp
-
- #define EDGE_FROM(x) (x)->from
- #define EDGE_TO(x) (x)->to
-
-
- #define DELETE_COST(x) delete_cost[x]
- #define INSERT_COST(x) insert_cost[x]
- #define CORRECT_COST(x,y) correct_cost[x][y]
-
- #define MAXCOST 31
-
- int lastnfa, finalst;
- int tolerance = 15; /* maximum tolerable error */
-
- #define MAXMEM 32000L
- char *banks[40];
- int nbanks = 0;
- long memx; /* memory allocation counter */
- int globcost; /* cost of repair */
- struct BackEdge *globword; /* the repaired word */
-
- static char *getword(); /* reconstruct the repaired word */
- static void advance();
-
- char *ralloc(n) /* allocate memory */
- int n;
- {
- long result;
-
- retry:
-
- memx += sizeof(int) - 1; /* align */
- memx &= ~(sizeof(int) - 1);
- result = memx;
- memx += n;
- if (nbanks == 0 || memx > MAXMEM) {
- nbanks++;
- if (nbanks > 40)
- {
- fprintf(stderr, "no memory in ralloc\n");
- exit(1);
- }
- memx = 0;
- if (banks[nbanks - 1] == NULL)
- {
- banks[nbanks - 1] = sbrk((int)MAXMEM);
- }
- goto retry;
- }
- return banks[nbanks - 1]+result;
- }
-
-
- /* add_accept - add an accepting state to a machine
- *
- * synopsis
- *
- * add_accept( mach );
- *
- */
-
- add_accept( mach )
- int mach;
- {
- finalst = FINALST(mach);
- /* fprintf(stderr, "final state = %d\n", finalst); */
- }
-
- /* copysingl - make a given number of copies of a singleton machine
- *
- * synopsis
- *
- * newsng = copysingl( singl, num );
- *
- * newsng - a new singleton composed of num copies of singl
- * singl - a singleton machine
- * num - the number of copies of singl to be present in newsng
- */
-
- int copysingl( singl, num )
- int singl, num;
-
- {
- int copy, i;
-
- copy = mkstate( SYM_EPSILON );
-
- for ( i = 1; i <= num; ++i )
- copy = link_machines( copy, dupmachine( singl ) );
-
- return ( copy );
- }
-
-
- /* dumpnfa - debugging routine to write out an nfa
- *
- * synopsis
- * int state1;
- * dumpnfa( state1 );
- */
-
- dumpnfa( state1 )
- int state1;
-
- {
- int ns, j;
- struct Edge *e;
-
- fprintf( stderr, "\n\n********** beginning dump of state-labelled epsilon nfa with start state %d\n",
- state1 );
-
- /* we probably should loop starting at FIRSTST(state1) and going to
- * LASTST(state1), but they're not maintained properly when we "or"
- * all of the rules together. So we use our knowledge that the machine
- * starts at state 1 and ends at lastnfa.
- */
-
- /* for ( ns = FIRSTST(state1); ns <= LASTST(state1); ++ns ) */
- for ( ns = 1; ns <= lastnfa; ++ns )
- {
- fprintf( stderr, "state # %4d:\t %4d", ns, TRANSCHAR(ns));
- if (' ' <= TRANSCHAR(ns) && TRANSCHAR(ns) <= '~')
- fprintf(stderr, "'%c'", TRANSCHAR(ns));
- else
- fprintf(stderr, " ");
-
- fprintf(stderr, ", %d nedges %3d\n", ns == finalst, NOUTEDGES(ns) );
-
- for (e = EDGEPTR(ns), j = 0; j < NOUTEDGES(ns); e = e->next, j++)
- fprintf( stderr, "from %4d to %4d\n", EDGE_FROM(e), EDGE_TO(e));
- }
-
- fprintf( stderr, "********** end of dump\n" );
- }
-
-
- /* dupmachine - make a duplicate of a given machine
- *
- * synopsis
- *
- * copy = dupmachine( mach );
- *
- * copy - holds duplicate of mach
- * mach - machine to be duplicated
- *
- * note that the copy of mach is NOT an exact duplicate; rather, all the
- * transition states values are adjusted so that the copy is self-contained,
- * as the original should have been.
- *
- * also note that the original MUST be contiguous, with its low and high
- * states accessible by the arrays firstst and lastst
- */
-
- int dupmachine( mach )
- int mach;
-
- {
- int i, j, state, init, last = LASTST(mach), state_offset;
- struct Edge *e;
-
- for ( i = FIRSTST(mach); i <= last; ++i )
- {
- state = mkstate( TRANSCHAR(i) );
- }
-
- state_offset = state - i + 1;
- init = mach + state_offset;
-
- for ( i = FIRSTST(mach); i <= last; ++i )
- for (j = 0, e = EDGEPTR(i); j < NOUTEDGES(i); j++, e = e->next)
- mkxtion(e->from + state_offset, e->to + state_offset, __LINE__);
-
- FIRSTST(init) = FIRSTST(mach) + state_offset;
- FINALST(init) = FINALST(mach) + state_offset;
- LASTST(init) = LASTST(mach) + state_offset;
-
- return ( init );
- }
-
-
- /* link_machines - connect two machines together
- *
- * synopsis
- *
- * new = link_machines( first, last );
- *
- * new - a machine constructed by connecting first to last
- * first - the machine whose successor is to be last
- * last - the machine whose predecessor is to be first
- *
- * note: this routine concatenates the machine first with the machine
- * last to produce a machine new which will pattern-match first first
- * and then last, and will fail if either of the sub-patterns fails.
- * FIRST is set to new by the operation. last is unmolested.
- */
-
- int link_machines( first, last )
- int first, last;
-
- {
- #if 0
- fprintf(stderr, "link_machines(%d,%d)\n", first, last);
- #endif
- if ( first == NIL )
- {
- return ( last );
- }
-
- else if ( last == NIL )
- {
- return ( first );
- }
-
- else
- {
- mkxtion( FINALST(first), last, __LINE__ );
- FINALST(first) = FINALST(last);
- LASTST(first) = max( LASTST(first), LASTST(last) );
- FIRSTST(first) = min( FIRSTST(first), FIRSTST(last) );
-
- return ( first );
- }
- }
-
-
- /* mkclos - convert a machine into a closure
- *
- * synopsis
- * new = mkclos( state );
- *
- * new - a new state which matches the closure of "state"
- */
-
- int mkclos( state )
- int state;
-
- {
- return ( mkopt( mkposcl( state ) ) );
- }
-
-
- /* mkopt - make a machine optional
- *
- * synopsis
- *
- * new = mkopt( mach );
- *
- * new - a machine which optionally matches whatever mach matched
- * mach - the machine to make optional
- *
- * notes:
- * 1. mach must be the last machine created
- * 2. mach is destroyed by the call
- */
-
- int mkopt( mach )
- int mach;
-
- {
- return mkor( mach, mkstate( SYM_EPSILON ) );
- }
-
-
- /* mkor - make a machine that matches either one of two machines
- *
- * synopsis
- *
- * new = mkor( first, second );
- *
- * new - a machine which matches either first's pattern or second's
- * first, second - machines whose patterns are to be or'ed (the | operator)
- *
- * note that first and second are both destroyed by the operation
- * the code is rather convoluted because an attempt is made to minimize
- * the number of epsilon states needed
- */
-
- int mkor( first, second )
- int first, second;
-
- {
- int eps, result, newedges = 0;
-
- #if 0
- fprintf(stderr, "mkor(%d,%d)", first, second);
- #endif
- if ( first == NIL )
- return ( second );
-
- else if ( second == NIL )
- return ( first );
-
- eps = mkstate( SYM_EPSILON );
-
- mkxtion( FINALST(first), eps, __LINE__ );
- mkxtion( FINALST(second), eps, __LINE__ );
- FINALST(second) = FINALST(first) = eps;
- LASTST(first) = LASTST(second) = eps;
- newedges += 2;
- eps = mkstate( SYM_EPSILON );
- mkxtion( eps, first, __LINE__ );
- mkxtion( eps, second, __LINE__ );
- LASTST(first) = LASTST(second) = eps;
- newedges += 2;
- result = eps;
-
- FINALST(result) = FINALST(first);
- if ( FIRSTST(first) < FIRSTST(result) )
- FIRSTST(result) = FIRSTST(first);
- if ( FIRSTST(second) < FIRSTST(result) )
- FIRSTST(result) = FIRSTST(second);
- if ( LASTST(first) > LASTST(result) )
- LASTST(result) = LASTST(first);
- if ( LASTST(second) > LASTST(result) )
- LASTST(result) = LASTST(second);
-
- #if 0
- fprintf(stderr, "mkor returns %d\n", result);
- #endif
- return result;
- }
-
-
- /* mkposcl - convert a machine into a positive closure
- *
- * synopsis
- * new = mkposcl( state );
- *
- * new - a machine matching the positive closure of "state"
- */
-
- int mkposcl( state )
- int state;
-
- {
- int eps;
- if (TRANSCHAR(state) == SYM_EPSILON)
- {
- mkxtion( FINALST(state), state, __LINE__ );
- return ( state );
- }
-
- eps = mkstate( SYM_EPSILON );
- mkxtion( FINALST(state), eps, __LINE__ );
- mkxtion( eps, state, __LINE__ );
- FIRSTST(eps) = FIRSTST(state);
- LASTST(eps) = eps;
- FINALST(eps) = FINALST(state);
- return eps;
- }
-
-
- /* mkrep - make a replicated machine
- *
- * synopsis
- * new = mkrep( mach, lb, ub );
- *
- * new - a machine that matches whatever "mach" matched from "lb"
- * number of times to "ub" number of times
- *
- * note
- * if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach"
- */
-
- int mkrep( mach, lb, ub )
- int mach, lb, ub;
-
- {
- int base, tail, copy, i;
-
- base = copysingl( mach, lb - 1 );
-
- if ( ub == INFINITY )
- {
- copy = dupmachine( mach );
- mach = link_machines( mach, link_machines( base, mkclos( copy ) ) );
- }
-
- else
- {
- tail = mkstate( SYM_EPSILON );
-
- for ( i = lb; i < ub; ++i )
- {
- copy = dupmachine( mach );
- tail = mkopt( link_machines( copy, tail ) );
- }
-
- mach = link_machines( mach, link_machines( base, tail ) );
- }
-
- return ( mach );
- }
-
-
- /* mkstate - create a state with a transition on a given symbol
- *
- * synopsis
- *
- * state = mkstate( sym );
- *
- * state - a new state matching sym
- * sym - the symbol the new state is to have its in-transitions on
- * (state-labelled epsilon-nfa)
- *
- * note that this routine makes new states in ascending order through the
- * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
- * relies on machines being made in ascending order and that they are
- * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
- * that it admittedly is)
- */
-
- int mkstate( sym )
- int sym;
-
- {
- if (++lastnfa >= MAXIMUM_MNS)
- {
- lerrif( "input rules are too complicated (>= %d NFA states)",
- MAXIMUM_MNS );
- }
-
- states[lastnfa] = (struct state *)malloc(sizeof(struct state));
-
- if (!states[lastnfa])
- {
- lerrif("out of memory at line %d\n", __LINE__);
- }
-
- TRANSCHAR(lastnfa) = sym;
- FIRSTST(lastnfa) = lastnfa;
- FINALST(lastnfa) = lastnfa;
- LASTST(lastnfa) = lastnfa;
- EDGEPTR(lastnfa) = NULL;
- NOUTEDGES(lastnfa) = 0;
- NINEDGES(lastnfa) = 0;
-
- return ( lastnfa );
- }
-
- #define INSERT_LIST(head, new, next, prior) { \
- if (head == NULL) { \
- head = new->next = new->prior = new; \
- } \
- else { \
- new->next = head; \
- new->prior = head->prior; \
- new->prior->next = new; \
- new->next->prior = new; \
- head = new; \
- } \
- }
-
- #define UNLINK(x, type) { \
- type *next, *prior; \
- next = x->next; \
- prior = x->prior; \
- prior->next = next; \
- next->prior = prior; \
- }
-
- /* delxtion - delete a transition
- *
- * synopsis
- *
- * delxtion( edgeptr )
- *
- * edgeptr - a pointer to the struct Edge that is to be deleted
- */
- delxtion( edgeptr )
- struct Edge *edgeptr;
- {
- NOUTEDGES(EDGE_FROM(edgeptr))--;
- NINEDGES(EDGE_TO(edgeptr))--;
- UNLINK(edgeptr, struct Edge);
- if (EDGEPTR(EDGE_FROM(edgeptr)) == edgeptr)
- EDGEPTR(EDGE_FROM(edgeptr)) = edgeptr->next;
- if (NOUTEDGES(EDGE_FROM(edgeptr)) == 0)
- EDGEPTR(EDGE_FROM(edgeptr)) = NULL;
- free(edgeptr);
- }
-
- /* mkxtion - make a transition from one state to another
- *
- * synopsis
- *
- * mkxtion( statefrom, stateto );
- *
- * statefrom - the state from which the transition is to be made
- * stateto - the state to which the transition is to be made
- */
-
- /* SUPPRESS 590 */
- mkxtion( statefrom, stateto, fromline )
- int statefrom, stateto;
- {
- struct Edge *newedge;
-
- /*
- fprintf(stderr, "mkxtion(%d,%d,%d)\n", statefrom, stateto, fromline);
- */
-
- newedge = (struct Edge *)malloc(sizeof(struct Edge));
- if (!newedge)
- {
- fprintf(stderr, "no memory in mkxtion\n");
- exit(1);
- }
-
- NOUTEDGES(statefrom)++;
- NINEDGES(stateto)++;
- newedge->from = statefrom;
- newedge->to = stateto;
- INSERT_LIST(EDGEPTR(statefrom), newedge, next, prior)
- }
-
- static int bflen; /* length of the buffer */
- static char bf[512]; /* the buffer */
- extern FILE *yyin;
-
- /* v[j][q] records the cost of the cheapest way to get to state q
- * having consumed j characters.
- */
- static int maxj = 0; /* # of allocated vectors in v */
- static unsigned char *v[MAXLINE + 1];
-
- /* Back-edge is used to record the history of edits made by the system
- * as it finds the cost of edits to get the cheapest match. It is used
- * to reconstruct a word that is a closest match.
- */
- struct BackEdge {
- struct BackEdge *prior;
- char c;
- };
-
- struct Work
- {
- struct Work *next;
- int q; /* a state number */
- int j; /* a number of characters consumed */
- struct BackEdge *backedge;
- } *worklist[MAXCOST+1], *workend[MAXCOST+1];
-
- #define Q_PUT(type, head, tail, state, chars, word) { \
- type *newptr; \
- newptr = (type *)ralloc(sizeof(type)); \
- newptr->q = state; \
- newptr->j = chars; \
- newptr->backedge = word; \
- if (head == NULL) head = newptr; \
- else tail->next = newptr; \
- newptr->next = NULL; \
- tail = newptr; \
- }
-
- #define Q_EMPTY(head) (head == NULL)
-
- /* Get the next element off the queue. Make sure the queue is not
- * empty before you do this.
- */
- #define Q_GET(type, head, new) { \
- type *next; \
- new = *(head); \
- next = (head)->next; \
- (head) = next; \
- }
-
- /* This constructs a state-labelled epsilon-nfa from a regular expression.
- * Then it constructs a two-dimensional array v[j][n]. j corresponds
- * to the number of characters that have been read from the string, so
- * 0 <= j <= strlen(input). n corresponds to the states of the nfa.
- * Entries in v represent the cheapest cost of driving the nfa to state
- * n, having read j characters from the input. Initially the nfa is
- * in state 0, having read 0 characters from the input, so v[0][0] = 0.
- * The algorithm uses a dynamic programming algorithm based upon Dikstra's
- * shortest-path (with discrete bucketing) to find the cheapest path to
- * v[strlen(input)][nf] where nf is the nfa final state. If the cost
- * exceeds the tolerance, the search is abandoned.
- */
- main(argc, argv)
- int argc;
- char **argv;
- {
- int i, j;
-
- if (argc == 2)
- {
- yyin = fopen(argv[1], "r");
- if (!yyin) {
- fprintf(stderr, "cannot open %s\n", argv[1]);
- exit(1);
- }
- }
-
- for (i = 0; i < 128; i++)
- {
- delete_cost[i] = insert_cost[i] = 1;
- for (j = 0; j < 128; j++)
- correct_cost[i][j] = 1;
- }
-
- yyparse();
-
-
- firstnfa = link_machines( mkstate( SYM_EPSILON ), firstnfa );
-
- optimize_nfa();
-
- checkedges(__LINE__, __FILE__);
-
- /*
- fprintf(stderr, "firstnfa %d last %d\n", firstnfa, lastnfa);
- dumpnfa( firstnfa );
- fprintf(stderr, "\n\n");
- */
-
- while (gets( bf ))
- {
- /* fprintf(stderr, "bf = %s\n", bf); */
- bflen = strlen( bf );
- while (maxj <= bflen)
- {
- v[maxj] = (unsigned char *)
- allocate_array( lastnfa + 1, sizeof(*v[0]) );
- maxj++;
- }
-
- for (j = 0; j <= bflen; j++)
- memset((char *)(v[j]), '\377', (unsigned)(lastnfa + 1));
-
- nbanks = memx = 0;
- for (i = 0; i <= MAXCOST; i++)
- {
- worklist[i] = NULL;
- workend[i] = NULL;
- }
- getcosts(tolerance);
- if (globword)
- {
- /*
- fprintf(stderr, "cost = %d\nword = '%s'\n",
- globcost, getword(globword));
- */
- puts(getword(globword));
- }
- }
- /* fprintf(stderr, "done!\n"); */
- }
-
- getcosts(maxcost)
- {
- int cost = 0, modcost;
- struct Work new;
-
- /* We can get to the first nfa state, scanning zero characters,
- * for a cost of 0. Start off by sticking this fact into the queue.
- */
- globword = NULL;
- record(firstnfa, 0, 0, (struct BackEdge *)0);
-
- /* allow increasing costs and see how far we can get */
- for (cost = 0; cost <= maxcost; cost++)
- {
- modcost = cost % (MAXCOST + 1);
-
- /* process until we exhaust the work queue */
- while (!Q_EMPTY(worklist[modcost]))
- {
- Q_GET(struct Work, worklist[modcost], new);
-
- /*
- fprintf(stderr, "Pull chars=%d state=%d cost=%d from queue\n",
- new.j, new.q, cost);
- */
- if ( v[new.j][new.q] < cost )
- {
- /*
- fprintf(stderr, "discard chars=%d state=%d cost=%d\n",
- new.j, new.q, cost);
- */
- continue;
- }
-
- advance(new.q, new.j, cost, new.backedge);
- }
-
- if (globword)
- {
- return;
- }
- }
- }
-
- static char word[MAXLINE];
-
- static char *getword(edge)
- struct BackEdge *edge;
- {
- int first = 0, last = 0;
- int t;
-
- /* trace back and collect the corrected word into a buffer */
- for (; edge; edge = edge->prior)
- {
- word[last++] = edge->c;
- }
-
- word[last--] = 0;
-
- /* reverse the buffer */
- for (; last > first; last--, first++)
- {
- t = word[last]; word[last] = word[first]; word[first] = t;
- }
-
- return word;
- }
-
- /* A call to record says that we can get to a given state for a
- * given cost k. If this is further than we have gotten yet for
- * that much cost we put this info on the end of the worklist[k]
- * and also into v[cost][state].
- */
- record(state, chars, cost, word)
- int state; /* state we can get to for given cost */
- int chars; /* having consumed this many characters */
- int cost; /* cost */
- struct BackEdge *word; /* last node in a word that works */
- {
- int modcost;
-
- /*
- fprintf(stderr, "record state=%d chars=%d cost=%d word='%s'\n",
- state, chars, cost, getword(word));
- */
-
- if (state == finalst && chars == bflen && (!globword || cost < globcost))
- {
- globcost = cost;
- globword = word;
- }
-
- /* Is this a newly-discovered cheaper way to get here? */
- if (cost < v[chars][state])
- {
- v[chars][state] = cost;
- modcost = cost % (MAXCOST + 1);
- Q_PUT(struct Work, worklist[modcost], workend[modcost],
- state, chars, word);
- /*
- fprintf(stderr, "new v[%d][%d]=%d\n\n", chars, state, cost);
- */
- }
- }
-
- #define TRACE_ADVANCE 0
-
- /* A call to advance(state, chars, cost, word) asserts that cost is the
- * cheapest way we know to get to the given state after consuming chars
- * of the input, and demands that the closure of this assertion be
- * taken.
- *
- * Advance goes one more step and enters the new values in v. Advance will
- * eventually get called for everything that it puts in v, so recursive
- * calls are not necessary.
- */
- static void advance(state, chars, cost, word)
- int state; /* we can get to state p */
- int chars; /* after consuming so many characters */
- int cost; /* for a given cost */
- struct BackEdge *word; /* last char in a word that works */
- {
- register q; /* another state */
- struct Edge *e;
- struct BackEdge *new;
-
- #if TRACE_ADVANCE
- fprintf(stderr,
- "got to state %d at cost %d using %d input chars\n",
- state, cost, chars);
- #endif
-
- /* we can always consume one character and stay in the same state,
- * by incurring a "deletion charge".
- */
- if (chars < bflen)
- {
- #if TRACE_ADVANCE
- fprintf(stderr,
- "get to state %d at cost %d using %d chars by deleting 1 char\n",
- state, cost + DELETE_COST(bf[chars]), chars + 1);
- #endif
- record(state, chars + 1, cost + DELETE_COST(bf[chars]), word);
- }
-
- /* for each edge out of state p */
- for (e = EDGEPTR(state); e;)
- {
- q = EDGE_TO(e);
-
- /* If we have an epsilon edge to a new state, we can get
- * there for no additional charge and without consuming
- * any characters.
- */
- if (TRANSCHAR(q) == SYM_EPSILON)
- {
- #if TRACE_ADVANCE
- fprintf(stderr,
- "get to state %d at cost %d using %d chars with e-edge\n",
- q, cost, chars);
- #endif
- record(q, chars, cost, word);
- }
-
- else {
- /* We can incur an "insertion charge" and get to the
- * next state without using up any input characters.
- */
- new = (struct BackEdge *)ralloc(sizeof(struct BackEdge));
- new->c = TRANSCHAR(q);
- new->prior = word;
-
- #if TRACE_ADVANCE
- fprintf(stderr,
- "get to state %d at cost %d using %d chars by insert %c\n",
- q, cost + INSERT_COST(new->c), chars, new->c);
- #endif
-
- record( q, chars, cost + INSERT_COST(new->c), new );
-
- /* If we have a transition to a new state that is labelled
- * correctly, we can get there for no additional charge.
- */
- if (chars < bflen)
- {
- if (new->c == bf[chars])
- {
- #if TRACE_ADVANCE
- fprintf(stderr,
- "get to state %d cost %d using %d chars following %c edge\n",
- q, cost, chars + 1, new->c);
- #endif
- record(q, chars + 1, cost, new);
- }
-
- else {
- /* If we have a transition to a new state that is not
- * labelled correctly, we can get there by incurring a
- * correction cost.
- */
- #if TRACE_ADVANCE
- fprintf(stderr,
- "get to state %d cost %d using %d chars changing %c to %c\n",
- q, cost + CORRECT_COST(bf[chars], new->c),
- chars + 1, bf[chars], new->c);
- #endif
- record( q, chars + 1,
- cost + CORRECT_COST(bf[chars], new->c), new );
- }
- }
- }
-
-
- e = e->next;
- if (e == EDGEPTR(state))
- break;
- }
- }
-
- /* nfa branch chaining. Find a state A with an out-edge that points
- * to an epsilon state B. Remove the edge and duplicate the out-edges
- * of B into A. When you remove all of the in-edges of some state,
- * you can remove the state unless it is the start state.
- *
- * Straightfoward application of this principal makes the nfa
- * slower and larger. What happens is on something like
- * [0-9][0-9] the original machine had an e-state in between 10
- * left-hand states and 10 right-hand states, and this replaced
- * 20 edges with 100 in order to eliminate the e-state. This caused
- * 5 times more calls to record. Now we do not perform this opt
- * unless the product of B's inedges and B's outedges is <= their
- * sum. Optimize_nfa still usually eliminates half of the input
- * states.
- *
- * A worklist algorithm would probably improve the speed.
- */
- optimize_nfa()
- {
- int A, B, C, k, kk, changes;
- struct Edge *e, *nexte, *ee, *nextee;
-
-
- for (changes = 1; changes;)
- {
- changes = 0;
-
-
- changes = 0;
- for (A = 1; A <= lastnfa; A++)
- {
-
-
- /* If an edge from this state goes to an epsilon state */
- for (e = EDGEPTR(A); EDGEPTR(A); e = nexte) {
-
- nexte = e->next;
- B = EDGE_TO(e);
- if (TRANSCHAR(B) == SYM_EPSILON && B != finalst &&
- NINEDGES(B) * NOUTEDGES(B) <= NINEDGES(B) + NOUTEDGES(B)) {
-
- /* delete A->B edge */
- delxtion(e);
-
- /* copy B's out-edges to A */
- for (kk = 0, ee = EDGEPTR(B); kk < NOUTEDGES(B);
- ee = ee->next, kk++)
- {
- mkxtion(A, EDGE_TO(ee), __LINE__);
- /* Fix the case in which there was only one out edge
- * which is being replaced by a different one.
- * If we don't do this, e will point to the deleted
- * edge on the next loop iteration.
- */
- if (e == nexte)
- nexte = EDGEPTR(A);
- }
-
- /* delete B if it has no incoming edges */
- if (NINEDGES(B) == 0)
- {
- delete_state(B);
- A--;
- changes = 1;
- goto nextA; /* avoid second loop in case A=0 */
- }
- changes = 1;
- }
- if (nexte == EDGEPTR(A)) break;
- }
-
- for (k = 0, e = EDGEPTR(A); k < NOUTEDGES(A); k++, e = nexte) {
-
- nexte = e->next;
- B = EDGE_TO(e);
-
- if (TRANSCHAR(B) == SYM_EPSILON
- && NOUTEDGES(B) == 1
- && TRANSCHAR(C = EDGE_TO(EDGEPTR(B))) == SYM_EPSILON
- && ACCPTNUM(B) <= ACCPTNUM(C)) {
- /*
- fprintf(stderr, "%d -> eps %d -> eps %d\n", A, B, C);
- */
-
- delxtion(e);
- mkxtion(A, C, __LINE__);
- if (NINEDGES(B) == 0) {
- delete_state(B);
- A--;
- }
- changes = 1;
- }
- }
- nextA:;
- }
- }
-
- }
-
- /* delete an nfa state which has lost all of its in-edges */
- delete_state(d)
- int d;
- {
- int dd;
- struct Edge *e, *nexte;
-
- retry:
- #if 0
- fprintf(stderr, "delete_state(%d)\n", d);
- #endif
-
- if (firstnfa == lastnfa)
- firstnfa = d;
- if (finalst == lastnfa)
- finalst = d;
-
- /* Delete all of the edges emanating from the losing state.
- * Uncount the in-edge count of each state pointed to.
- */
- /* SUPPRESS 560 */
- while (e = EDGEPTR(d))
- delxtion(e);
-
- states[d] = states[lastnfa--]; /* overwrite the losing state */
-
- /* fix up the edges out of the state that moved */
- for (e = EDGEPTR(d); e; e = nexte)
- {
- nexte = e->next;
- if (EDGE_TO(e) == lastnfa + 1)
- {
- fprintf(stderr, "can't happen at line %d\n", __LINE__, __FILE__);
- exit(1);
- }
- EDGE_FROM(e) = d;
- if (nexte == EDGEPTR(d))
- break;
- }
-
- /* fix up the edges into the state that moved */
- for (dd = 1; dd <= lastnfa; dd++)
- {
- for (e = EDGEPTR(dd); e; e = nexte)
- {
- nexte = e->next;
- if (EDGE_TO(e) == lastnfa + 1)
- {
- EDGE_TO(e) = d;
- }
- if (nexte == EDGEPTR(dd))
- break;
- }
- }
-
-
- /* delete any states (except the start state) which have
- * lost all of their incoming edges.
- */
- for (d = 1; d <= lastnfa ; d++)
- if (NINEDGES(d) == 0 && d != firstnfa)
- goto retry;
-
- }
-
- /* Check the nfa to make sure the in-edge and out-edge counts are
- * correct. If you suspect nfa structure problems, call this from
- * all over the place.
- */
-
- /* SUPPRESS 590 */
- checkedges(fromline, fromfile)
- int fromline;
- char *fromfile;
- {
-
- int i;
- int k;
- struct Edge *e, *nexte;
- for (i = 1; i <= lastnfa; i++)
- TEMP(i) = 0;
-
- /* count the edges into and from each state */
- for (i = 1; i <= lastnfa; i++)
- {
- k = 0;
- for (e = EDGEPTR(i); e; e = nexte)
- {
- k++;
- nexte = e->next;
- TEMP(EDGE_TO(e))++;
- if (EDGE_FROM(e) != i)
- {
- fprintf(stderr, "EDGE_FROM(%d) is %d should be %d\n",
- e, EDGE_FROM(e), i);
- }
- if (nexte == EDGEPTR(i))
- break;
- }
- if (NOUTEDGES(i) != k)
- {
- fprintf(stderr, "noutedges[%d] is %d should be %d line %d %s\n",
- i, NOUTEDGES(i), k, fromline, fromfile);
- }
- }
-
- for (i = 1; i <= lastnfa; i++)
- if (TEMP(i) != NINEDGES(i))
- {
- fprintf(stderr, "ninedges[%d] is %d should be %d line %d %s\n",
- i, NINEDGES(i), TEMP(i), fromline, fromfile);
- }
- }
-
-
- SHAR_EOF
- cat << \SHAR_EOF > parse.h
- # define CHAR 257
- # define NUMBER 258
- # define SECTEND 259
- # define SCDECL 260
- # define XSCDECL 261
- # define WHITESPACE 262
- # define NAME 263
- # define CORRECT 264
- # define STRING 265
- # define DELETE 266
- # define INSERT 267
- # define TOLERANCE 268
- SHAR_EOF
- cat << \SHAR_EOF > parse.y
- /* parse.y - parser for flex input */
-
- /*
- * Copyright (c) 1987, the University of California
- *
- * The United States Government has rights in this work pursuant to
- * contract no. DE-AC03-76SF00098 between the United States Department of
- * Energy and the University of California.
- *
- * This program may be redistributed. Enhancements and derivative works
- * may be created provided the new works, if made available to the general
- * public, are made available for use by anyone.
- */
-
- %token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME CORRECT
- %token STRING DELETE INSERT TOLERANCE
-
- %{
- #include "flexdef.h"
-
- struct Ccl ccl, anyccl, ccl0;
-
- int pat, eps, lastchar, i, firstnfa, finalst;
- char clower();
-
- int linenum, syntaxerror;
-
-
- static void makeany()
- {
- static int madeany = false;
- if ( ! madeany )
- {
- /* create the '.' character class */
- checkedges(__LINE__, __FILE__);
- cclinit( &anyccl );
- ccladd( &anyccl, '\n' );
- checkedges(__LINE__, __FILE__);
- cclnegate( &anyccl );
- checkedges(__LINE__, __FILE__);
-
- madeany = true;
- }
- }
-
-
- %}
-
- %%
- goal : initlex correctionrules sect2
- ;
-
- initlex :
- {
- /* initialize for processing rules */
- }
- ;
-
- correctionrules : correctionrules onerule
- |
- ;
-
- onerule : CORRECT item
- {
- ccl0 = ccl;
- }
- item NUMBER '\n'
- {
- int i, j;
- for (i = 0; i < 128; i++)
- {
- for (j = 0; j < 128; j++)
- {
- if (ccltest( &ccl0, i) && ccltest( &ccl, j))
- {
- correct_cost[i][j] = $5;
- }
- }
- }
- }
-
- | DELETE item NUMBER '\n'
- {
- int i, j;
- for (i = 0; i < 128; i++)
- {
- if (ccltest( &ccl, i))
- {
- delete_cost[i] = $3;
- }
- }
- }
-
- | '\n'
- {
- }
-
- | INSERT item NUMBER '\n'
- {
- int i;
- for (i = 0; i < 128; i++)
- {
- if (ccltest( &ccl, i))
- {
- insert_cost[i] = $3;
- }
- }
- }
-
- | TOLERANCE NUMBER '\n'
- {
- tolerance = $2;
- }
- ;
-
- item : '.'
- {
- makeany();
- ccl = anyccl;
- }
-
- | fullccl
- {
- }
-
- | CHAR
- {
- cclinit( &ccl );
- ccladd( &ccl, $1 );
- }
-
- | '"' CHAR '"'
- {
- cclinit( &ccl );
- ccladd( &ccl, $2 );
- }
-
- sect2 : sect2 initforrule flexrule '\n'
- |
- ;
-
- initforrule :
- {
- /* initialize for a parse of one rule */
- }
- ;
-
- flexrule : '^' re eol
- {
- checkedges(__LINE__, __FILE__);
- pat = link_machines( $2, $3 );
- checkedges(__LINE__, __FILE__);
- add_accept( pat );
-
- checkedges(__LINE__, __FILE__);
- firstnfa = pat;
- }
-
- | re eol
- {
- checkedges(__LINE__, __FILE__);
- pat = link_machines( $1, $2 );
- checkedges(__LINE__, __FILE__);
- add_accept( pat );
- checkedges(__LINE__, __FILE__);
- firstnfa = pat;
- }
-
- | error
- { synerr( "unrecognized rule" ); }
- ;
-
- eol : '$'
- {
- checkedges(__LINE__, __FILE__);
- eps = mkstate( SYM_EPSILON );
- checkedges(__LINE__, __FILE__);
- $$ = link_machines( eps, mkstate( '\n' ) );
- checkedges(__LINE__, __FILE__);
- }
-
- |
- {
- checkedges(__LINE__, __FILE__);
- $$ = mkstate( SYM_EPSILON );
- checkedges(__LINE__, __FILE__);
- }
- ;
-
- re : re '|' series
- {
- checkedges(__LINE__, __FILE__);
- $$ = mkor( $1, $3 );
- checkedges(__LINE__, __FILE__);
- }
-
- | series
- {
- $$ = $1;
- }
- ;
-
-
- series : series singleton
- {
- /* this is where concatenation of adjacent patterns
- * gets done
- */
- checkedges(__LINE__, __FILE__);
- $$ = link_machines( $1, $2 );
- checkedges(__LINE__, __FILE__);
- }
-
- | singleton
- { $$ = $1; }
- ;
-
- singleton : singleton '*'
- {
- checkedges(__LINE__, __FILE__);
- $$ = mkclos( $1 );
- checkedges(__LINE__, __FILE__);
- }
-
- | singleton '+'
- {
- checkedges(__LINE__, __FILE__);
- $$ = mkposcl( $1 );
- checkedges(__LINE__, __FILE__);
- }
-
- | singleton '?'
- {
- checkedges(__LINE__, __FILE__);
- $$ = mkopt( $1 );
- checkedges(__LINE__, __FILE__);
- }
-
- | singleton '{' NUMBER ',' NUMBER '}'
- {
- if ( $3 > $5 || $3 < 0 )
- {
- synerr( "bad iteration values" );
- $$ = $1;
- }
- else {
- if ( $3 == 0 )
- {
- checkedges(__LINE__, __FILE__);
- $$ = mkrep( $1, 1, $5 );
- checkedges(__LINE__, __FILE__);
- $$ = mkopt( $$ );
- checkedges(__LINE__, __FILE__);
- }
- else {
- checkedges(__LINE__, __FILE__);
- $$ = mkrep( $1, $3, $5 );
- checkedges(__LINE__, __FILE__);
- }
- }
- }
-
- | singleton '{' NUMBER ',' '}'
- {
- if ( $3 <= 0 )
- {
- synerr( "iteration value must be positive" );
- $$ = $1;
- }
-
- else
- $$ = mkrep( $1, $3, INFINITY );
- }
-
- | singleton '{' NUMBER '}'
- {
- if ( $3 <= 0 )
- {
- synerr( "iteration value must be positive" );
- $$ = $1;
- }
-
- else {
- checkedges(__LINE__, __FILE__);
- $$ = link_machines( $1, copysingl( $1, $3 - 1 ) );
- checkedges(__LINE__, __FILE__);
- }
- }
-
- | '.'
- {
-
- makeany();
- $$ = ccl2nfa( &anyccl );
- checkedges(__LINE__, __FILE__);
- }
-
- | fullccl
- {
- $$ = ccl2nfa( &ccl );
- }
-
-
- | '"' string '"'
- { $$ = $2; }
-
- | '(' re ')'
- { $$ = $2; }
-
- | CHAR
- {
- if ( $1 == '\0' )
- synerr( "null in rule" );
-
- checkedges(__LINE__, __FILE__);
- $$ = mkstate( $1 );
- checkedges(__LINE__, __FILE__);
- }
- ;
-
- fullccl : '[' ccl ']'
-
- | '[' '^' ccl ']'
- {
- checkedges(__LINE__, __FILE__);
- cclnegate( &ccl );
- checkedges(__LINE__, __FILE__);
- }
- ;
-
- ccl : ccl CHAR '-' CHAR
- {
- if ( $2 > $4 )
- synerr( "negative range in character class" );
-
- else
- {
- for ( i = $2; i <= $4; ++i )
- {
- checkedges(__LINE__, __FILE__);
- ccladd( &ccl, i );
- checkedges(__LINE__, __FILE__);
- }
-
- checkedges(__LINE__, __FILE__);
- lastchar = $4;
- }
-
- $$ = $1;
- }
-
- | ccl CHAR
- {
- checkedges(__LINE__, __FILE__);
- ccladd( &ccl, $2 );
- checkedges(__LINE__, __FILE__);
- lastchar = $2;
- checkedges(__LINE__, __FILE__);
- $$ = $1;
- }
-
- |
- {
- checkedges(__LINE__, __FILE__);
- lastchar = 0;
- checkedges(__LINE__, __FILE__);
- cclinit( &ccl );
- checkedges(__LINE__, __FILE__);
- }
- ;
-
- string : string CHAR
- {
- checkedges(__LINE__, __FILE__);
- $$ = link_machines( $1, mkstate( $2 ) );
- checkedges(__LINE__, __FILE__);
- }
-
- |
- {
- checkedges(__LINE__, __FILE__);
- $$ = mkstate( SYM_EPSILON );
- checkedges(__LINE__, __FILE__);
- }
- ;
-
- %%
-
- /* synerr - report a syntax error
- *
- * synopsis
- * char str[];
- * synerr( str );
- */
-
- synerr( str )
- char str[];
-
- {
- syntaxerror = true;
- fprintf( stderr, "Syntax error at line %d: %s\n", linenum, str );
- }
-
-
- /* yyerror - eat up an error message from the parser
- *
- * synopsis
- * char msg[];
- * yyerror( msg );
- */
-
- yyerror( msg )
- char msg[];
-
- {
- }
-
- SHAR_EOF
- cat << \SHAR_EOF > scan.l
- /* scan.l - scanner for flex input */
-
- /*
- * Copyright (c) 1987, the University of California
- *
- * The United States Government has rights in this work pursuant to
- * contract no. DE-AC03-76SF00098 between the United States Department of
- * Energy and the University of California.
- *
- * This program may be redistributed. Enhancements and derivative works
- * may be created provided the new works, if made available to the general
- * public, are made available for use by anyone.
- */
-
- %{
- #include "flexdef.h"
- #include "parse.h"
-
- char nmstr[MAXLINE];
-
- #define RETURNCHAR \
- yylval = yytext[0]; \
- return ( CHAR );
-
- #define RETURNNAME \
- (void) strcpy( nmstr, yytext ); \
- return ( NAME );
-
- #define RETURNSTRING \
- (void) strcpy( nmstr, yytext ); \
- return ( STRING );
-
- #define PUT_BACK_STRING(str, start) \
- for ( i = strlen( str ) - 1; i >= start; --i ) \
- unput(str[i])
- %}
-
- %x MAIN NUM QUOTE BRACEERROR FIRSTCCL CCL COR QUOTECOR
- %x FIRSTCCLCOR CCLCOR
-
- C ([a-zA-Z0-9_()%<>[\]{},.;&!~*/+\-=^|?:@`$# ])
- cchar ((\\?)({C}|\"))|(\\\')|(\\\\)|(\\([0-7]{1,3}))
- schar ((\\?)({C}|\'))|(\\\")|(\\\\)|(\\([0-7]{1,3}))
- WS [ \t]+
-
- NAME [a-z_][a-z_0-9]*
-
- ESCSEQ \\([^^\n]|"^".|0[0-9]{1,3})
-
- %%
- static int bracelevel, didadef;
- int i;
- char myesc();
-
-
- <INITIAL,COR>^%correct {
- BEGIN(COR);
- return CORRECT;
- }
-
- <INITIAL,COR>^"%"{WS}.*"\n" {
- /* I'm a comment */
- }
-
- <INITIAL,COR>^%tolerance {
- BEGIN(COR);
- return TOLERANCE;
- }
-
- <INITIAL,COR>^%insert {
- BEGIN(COR);
- return INSERT;
- }
-
- <INITIAL,COR>^%delete {
- BEGIN(COR);
- return DELETE;
- }
-
- <COR>{WS} {
- }
-
- <COR>[0-9]+ {
- yylval = myctoi( yytext );
- return ( NUMBER );
- }
-
- <COR>\" {
- BEGIN(QUOTECOR);
- return '"';
- }
-
- <QUOTECOR>\" {
- BEGIN(COR);
- return '"';
- }
-
- <QUOTECOR>. {
- RETURNCHAR;
- }
-
- <COR>\. {
- return '.';
- }
-
- <COR>"\n" {
- return '\n';
- }
-
- <COR>. {
- RETURNCHAR;
- }
-
- <INITIAL,COR>"["([^\\\]\n]|{ESCSEQ})+"]" {
- (void) strcpy( nmstr, yytext );
-
- /* push back everything but the leading bracket
- * so the ccl can be rescanned
- */
- PUT_BACK_STRING(nmstr, 1);
-
- BEGIN(FIRSTCCLCOR);
- return ( '[' );
- }
-
- <FIRSTCCLCOR>"^"/[^-\n] BEGIN(CCLCOR); return ( '^' );
- <FIRSTCCLCOR>"^"/- return ( '^' );
- <FIRSTCCLCOR>- BEGIN(CCLCOR); yylval = '-'; return ( CHAR );
- <FIRSTCCLCOR>. BEGIN(CCLCOR); RETURNCHAR;
- <FIRSTCCLCOR>{ESCSEQ} {
- yylval = myesc( yytext );
- BEGIN(CCLCOR);
- return ( CHAR );
- }
-
- <CCLCOR>-/[^\]\n] return ( '-' );
- <CCLCOR>[^\]\n] RETURNCHAR;
- <CCLCOR>"]" BEGIN(COR); return ( ']' );
-
-
- <INITIAL,COR>^"%%".*\n {
- BEGIN(MAIN);
- }
-
-
- <MAIN>^"^" return ( '^' );
- <MAIN>\" BEGIN(QUOTE); return ( '"' );
- <MAIN>"{"/[0-9] BEGIN(NUM); return ( '{' );
- <MAIN>"{"[^0-9\n][^}\n]* BEGIN(BRACEERROR);
- <MAIN>"$"/[ \t\n] return ( '$' );
-
- <MAIN>{WS} { /* needs to be separate from following rule due to
- * bug with trailing context
- */
- bracelevel = 0;
- return ( '\n' );
- }
-
- <MAIN>"["([^\\\]\n]|{ESCSEQ})+"]" {
- (void) strcpy( nmstr, yytext );
-
- /* push back everything but the leading bracket
- * so the ccl can be rescanned
- */
- PUT_BACK_STRING(nmstr, 1);
-
- BEGIN(FIRSTCCL);
- return ( '[' );
- }
-
- <MAIN>"{"{NAME}"}" {
- register char *nmdefptr;
- char *ndlookup();
-
- (void) strcpy( nmstr, yytext );
- nmstr[yyleng - 1] = '\0'; /* chop trailing brace */
-
- /* lookup from "nmstr + 1" to chop leading brace */
- if ( ! (nmdefptr = ndlookup( nmstr + 1 )) )
- synerr( "undefined {name}" );
-
- else
- { /* push back name surrounded by ()'s */
- unput(')');
- PUT_BACK_STRING(nmdefptr, 0);
- unput('(');
- }
- }
-
- <MAIN>[/|*+?.()] return ( yytext[0] );
- <MAIN>. RETURNCHAR;
- <MAIN>\n ++linenum; return ( '\n' );
-
-
- <QUOTE>[^"\n] RETURNCHAR;
- <QUOTE>\" BEGIN(MAIN); return ( '"' );
-
- <QUOTE>\n {
- synerr( "missing quote" );
- BEGIN(MAIN);
- ++linenum;
- return ( '"' );
- }
-
-
- <FIRSTCCL>"^"/[^-\n] BEGIN(CCL); return ( '^' );
- <FIRSTCCL>"^"/- return ( '^' );
- <FIRSTCCL>- BEGIN(CCL); yylval = '-'; return ( CHAR );
- <FIRSTCCL>. BEGIN(CCL); RETURNCHAR;
- <FIRSTCCL>{ESCSEQ} {
- yylval = myesc( yytext );
- BEGIN(CCL);
- return ( CHAR );
- }
-
- <CCL>-/[^\]\n] return ( '-' );
- <CCL>[^\]\n] RETURNCHAR;
- <CCL>"]" BEGIN(MAIN); return ( ']' );
-
-
-
- <NUM>[0-9]+ {
- yylval = myctoi( yytext );
- return ( NUMBER );
- }
-
- <NUM>"," return ( ',' );
- <NUM>"}" BEGIN(MAIN); return ( '}' );
-
- <NUM>. {
- synerr( "bad character inside {}'s" );
- BEGIN(MAIN);
- return ( '}' );
- }
-
- <NUM>\n {
- synerr( "missing }" );
- BEGIN(MAIN);
- ++linenum;
- return ( '}' );
- }
-
-
- <BRACEERROR>"}" synerr( "bad name in {}'s" ); BEGIN(MAIN);
- <BRACEERROR>\n synerr( "missing }" ); ++linenum; BEGIN(MAIN);
-
-
-
- <INITIAL,MAIN,QUOTE,CCL>{ESCSEQ} {
- yylval = myesc( yytext );
- return ( CHAR );
- }
-
-
-
- %%
- SHAR_EOF
- cat << \SHAR_EOF > sym.c
- /* sym - symbol table routines */
-
- /*
- * Copyright (c) 1987, the University of California
- *
- * The United States Government has rights in this work pursuant to
- * contract no. DE-AC03-76SF00098 between the United States Department of
- * Energy and the University of California.
- *
- * This program may be redistributed. Enhancements and derivative works
- * may be created provided the new works, if made available to the general
- * public, are made available for use by anyone.
- */
-
- #include "flexdef.h"
-
- struct hash_entry *ndtbl[NAME_TABLE_HASH_SIZE];
- struct hash_entry *sctbl[START_COND_HASH_SIZE];
-
- struct hash_entry *findsym();
-
-
- /* addsym - add symbol and definitions to symbol table
- *
- * synopsis
- * char sym[], *str_def;
- * int int_def;
- * hash_table table;
- * int table_size;
- * 0 / -1 = addsym( sym, def, int_def, table, table_size );
- *
- * -1 is returned if the symbol already exists, and the change not made.
- */
-
- int addsym( sym, str_def, int_def, table, table_size )
- register char sym[];
- char *str_def;
- int int_def;
- hash_table table;
- int table_size;
-
- {
- int hash_val = hashfunct( sym, table_size );
- register struct hash_entry *entry = table[hash_val];
- register struct hash_entry *new_entry;
- register struct hash_entry *successor;
- char *malloc();
-
- while ( entry )
- {
- if ( ! strcmp( sym, entry->name ) )
- { /* entry already exists */
- return ( -1 );
- }
-
- entry = entry->next;
- }
-
- /* create new entry */
- new_entry = (struct hash_entry *) malloc( sizeof( struct hash_entry ) );
-
- if ( new_entry == NULL )
- flexfatal( "symbol table memory allocation failed" );
-
- if ( (successor = table[hash_val]) )
- {
- new_entry->next = successor;
- successor->prev = new_entry;
- }
- else
- new_entry->next = NULL;
-
- new_entry->prev = NULL;
- new_entry->name = sym;
- new_entry->str_val = str_def;
- new_entry->int_val = int_def;
-
- table[hash_val] = new_entry;
-
- return ( 0 );
- }
-
-
- /* findsym - find symbol in symbol table
- *
- * synopsis
- * char sym[];
- * hash_table table;
- * int table_size;
- * struct hash_entry *entry, *findsym();
- * entry = findsym( sym, table, table_size );
- */
-
- struct hash_entry *findsym( sym, table, table_size )
- register char sym[];
- hash_table table;
- int table_size;
-
- {
- register struct hash_entry *entry = table[hashfunct( sym, table_size )];
- static struct hash_entry empty_entry =
- {
- (struct hash_entry *) 0, (struct hash_entry *) 0, NULL, NULL, 0,
- } ;
-
- while ( entry )
- {
- if ( ! strcmp( sym, entry->name ) )
- return ( entry );
- entry = entry->next;
- }
-
- return ( &empty_entry );
- }
-
-
- /* hashfunct - compute the hash value for "str" and hash size "hash_size"
- *
- * synopsis
- * char str[];
- * int hash_size, hash_val;
- * hash_val = hashfunct( str, hash_size );
- */
-
- int hashfunct( str, hash_size )
- register char str[];
- int hash_size;
-
- {
- register int hashval;
- register int locstr;
-
- hashval = 0;
- locstr = 0;
-
- while ( str[locstr] )
- hashval = ((hashval << 1) + str[locstr++]) % hash_size;
-
- return ( hashval );
- }
-
-
- /* ndinstal - install a name definition
- *
- * synopsis
- * char nd[], def[];
- * ndinstal( nd, def );
- */
-
- ndinstal( nd, def )
- char nd[], def[];
-
- {
- char *copy_string();
-
- if ( addsym( copy_string( nd ), copy_string( def ), 0,
- ndtbl, NAME_TABLE_HASH_SIZE ) )
- synerr( "name defined twice" );
- }
-
-
- /* ndlookup - lookup a name definition
- *
- * synopsis
- * char nd[], *def;
- * char *ndlookup();
- * def/NULL = ndlookup( nd );
- */
-
- char *ndlookup( nd )
- char nd[];
-
- {
- return ( findsym( nd, ndtbl, NAME_TABLE_HASH_SIZE )->str_val );
- }
-
-
- SHAR_EOF
- cat << \SHAR_EOF > test.in
- LmC"L4m!3
-
- NCNB Nitionil Bank 0 2 - 2 8-9 1
- 2 9 4
-
-
-
-
-
-
-
-
-
-
-
-
-
- DEBITS CHECK REFERENCE DATE CHECK REFERENCE DATE
- ENCLOSED NU4BER ?UMBER POSTED AMOUNT NUMBER NUMBER POSTED AMOUNT
- 56818 060202238 02-21 1,463.96 56885 050406145 02-26 621. 34
- + 56820 061508340 02-19 414.00 56886 051309782 02-25 212.40
- 56821 060561884 OZ-20 380.00 + 56888 060203646 02-26 316.80
- 56322 050308612 02-20 363.60 56839 050000681 OZ-27 349.80
- S6823 OS100SO88 02-19 206.57 56890 060701340 02-26 380.00
- 56824 OS1215839 02-19 190.80 56891 OS2104948 02-25 123.20
- + 56826 051215810 02-19 119.42 56892 osoilOS09 02-27 588.38
- 56827 051009745 02-19 258.00 56893 050106127 02-28 161.60
- 56828 060-,41320 02-20 621.83 56894 OS2104867 C2-2S 168.00
- 56829 060006534 02-20 541.21 56895 061000900 02-26 63.00
- 56830 OS1216101 02-19 19216.00 56896 050307144 02-27 703.52
- + 56832 05111167S 02-19 269.90 56897 0605OS986 02-26 242.04
- 56833 060003876 02-20 1,987.50 56898 050106126 02-28 161.60
- + 56836 051201744 02-21 182,40 56899 060708682 02-26 964,08
- 56837 050202940 02-21 1;219.20 56900 060708683 02-26 64.68
- + 56839 06000451S 02-21 491.10 56901 061308454 02-26 173.70
- 56840 OS020209S 02-25 389.60 S6902 060809S84 02-26 213,38
- 56841 050308595 02-20 91-52 56903 061308450 02-26 1,202.00
- 56842 050007600 02-20 596.75 + 5690S 060114198 02-27 604.18
- 56843 050111266 02-22 372.28 + 56908 OS1103408 02-27 108.80
- 56844 260208095 OZ-21 67.50 56909 051507798 02-27 104.65
- 5684S 060802989 02-20 1,108.8S 56910 050104176 OZ-28 171.70
- + 56847 060109317 02-21 412.70 56911 051606473 02-27 98.48
- 56848 050307489 02-21 914.80 56912 051312820 02-27 676.40
- + 56852 051607269 02-21 562.60 + 56916 050603S74 02-28 15S.20
- 56853 050007191 02-26 161.60 56917 050603564 02-28 530.40
- 56854 OS1103024 02-21 54.00 56918 050603S65 02-28 710.64
- 56855 0511028?5 02-21 161.60 56919 050607308 0 2 - 28 217.90
- + 56857 050513006 OZ-22 105.9z 56920 050310013 02-23 347.65
- 56858 050202096 02-25 108.80 + 56922 OS0310012 02-28 429.8S
- 56859 060808589 O2-2S 356.40 56923 060601777 02-28 617.69
- 56860 050806133 02-22 406.36 56924 060709449 02-28 201.70
- 56861 050808783 02-22 454.8S + 57091 050608090 02-27 210.38
- 56862 060200919 02-25 291.62 57092 060012187 02-25 629.40
- 56863 050207083 02-25 112.40 57093 050202097 02-25 161.60
- 56864 060312068 02-22 640.58 57094 051012134 02-22 168.79
- 56865 OSIO01219 O2-2S 242.80 57095 OSIO04287 02-22 338.00
- 56866 060804384 02-25 732.40 57096 0607042SS 02-22 lSl.20
- 56867 051500756 02-25 312.80 57097 060006924 02-26 200.40
- 56868 060511506 02-25 50.40 57098 050102456 02-26 410.90
- 56369 060508702 02-2s 9S.36 57099 050102583 02-25 8S.75
- + 56872 060525998 02-25 226.00 57100 060100999 02-20 672.00
- + 56874 060212912 02-26 192.00 57101 050906894 02-20 50. 96
- 56875 OS1500744 02-25 39-5.44 S7102 051009816 02-19 468.00
- 56876 050007022 02-27 259.80 57103 050308609 02-20 396.00
- 56877 060006155 02-27 298.80 + 57105 060212020 02-20 161.60
- 56878 050201596 02-27 176.00 S7106 051002151 02-22 397.84
- 56879 250211326 02-27 199.75 57107 060002931 02-20 382.40
- 56880 050406092 02-26 940.89 57108 OS02074S9 02-12 41S.06
- + 56882 050007659 02-27 899.20 57109 051909717 02-11 94.40
- 56883 060213188 02-26 221.10 57110 060503196 OZ-12 119.70
- 56884 061308451 OZ-26 253.60 NO ITEM 9 051204273 02-22 273.80
-
- + INDICATES BREAK IN NUMERICAL SEQUENCE
-
-
-
-
-
-
-
-
- NdnT'rg QPP nw/rDOL, Qlngl C:C)Q INADOOTAPI-R It,([:(-)DRAATir)rll kA,-h., P:ni(,
-
- iNCR!=9 XXXXXXXXXX
- SHAR_EOF
- cat << \SHAR_EOF > test.out
- ENCLOSED NUMBER NUMBER POSTED AMOUNT NUMBER NUMBER POSTED AMOUNT
- 56818 060202238 02-21 1,463.96 56885 050406145 02-26 621.34
- + 56820 061508340 02-19 414.00 56886 051309782 02-25 212.40
- 56821 060561884 02-20 380.00 + 56888 060203646 02-26 316.80
- 56322 050308612 02-20 363.60 56839 050000681 02-27 349.80
- 56823 051005088 02-19 206.57 56890 060701340 02-26 380.00
- 56824 051215839 02-19 190.80 56891 052104948 02-25 123.20
- + 56826 051215810 02-19 119.42 56892 050110509 02-27 588.38
- 56827 051009745 02-19 258.00 56893 050106127 02-28 161.60
- 56828 060841320 02-20 621.83 56894 052104867 12-25 168.00
- 56829 060006534 02-20 541.21 56895 061000900 02-26 63.00
- 56830 051216101 02-19 19,216.00 56896 050307144 02-27 703.52
- + 56832 051111675 02-19 269.90 56897 060505986 02-26 242.04
- 56833 060003876 02-20 1,987.50 56898 050106126 02-28 161.60
- + 56836 051201744 02-21 182.40 56899 060708682 02-26 964.08
- 56837 050202940 02-21 1,219.20 56900 060708683 02-26 64.68
- + 56839 060004515 02-21 491.10 56901 061308454 02-26 173.70
- 56840 050202095 02-25 389.60 56902 060809584 02-26 213.38
- 56841 050308595 02-20 91.52 56903 061308450 02-26 1,202.00
- 56842 050007600 02-20 596.75 + 56905 060114198 02-27 604.18
- 56843 050111266 02-22 372.28 + 56908 051103408 02-27 108.80
- 56844 260208095 02-21 67.50 56909 051507798 02-27 104.65
- 56845 060802989 02-20 1,108.85 56910 050104176 02-28 171.70
- + 56847 060109317 02-21 412.70 56911 051606473 02-27 98.48
- 56848 050307489 02-21 914.80 56912 051312820 02-27 676.40
- + 56852 051607269 02-21 562.60 + 56916 050603574 02-28 155.20
- 56853 050007191 02-26 161.60 56917 050603564 02-28 530.40
- 56854 051103024 02-21 54.00 56918 050603565 02-28 710.64
- 56855 051102895 02-21 161.60 56919 050607308 02-28 217.90
- + 56857 050513006 02-22 105.92 56920 050310013 02-23 347.65
- 56858 050202096 02-25 108.80 + 56922 050310012 02-28 429.85
- 56859 060808589 02-25 356.40 56923 060601777 02-28 617.69
- 56860 050806133 02-22 406.36 56924 060709449 02-28 201.70
- 56861 050808783 02-22 454.85 + 57091 050608090 02-27 210.38
- 56862 060200919 02-25 291.62 57092 060012187 02-25 629.40
- 56863 050207083 02-25 112.40 57093 050202097 02-25 161.60
- 56864 060312068 02-22 640.58 57094 051012134 02-22 168.79
- 56865 051001219 02-25 242.80 57095 051004287 02-22 338.00
- 56866 060804384 02-25 732.40 57096 060704255 02-22 151.20
- 56867 051500756 02-25 312.80 57097 060006924 02-26 200.40
- 56868 060511506 02-25 50.40 57098 050102456 02-26 410.90
- 56369 060508702 02-25 95.36 57099 050102583 02-25 85.75
- + 56872 060525998 02-25 226.00 57100 060100999 02-20 672.00
- + 56874 060212912 02-26 192.00 57101 050906894 02-20 50.96
- 56875 051500744 02-25 395.44 57102 051009816 02-19 468.00
- 56876 050007022 02-27 259.80 57103 050308609 02-20 396.00
- 56877 060006155 02-27 298.80 + 57105 060212020 02-20 161.60
- 56878 050201596 02-27 176.00 57106 051002151 02-22 397.84
- 56879 250211326 02-27 199.75 57107 060002931 02-20 382.40
- 56880 050406092 02-26 940.89 57108 050207459 02-12 415.06
- + 56882 050007659 02-27 899.20 57109 051909717 02-11 94.40
- 56883 060213188 02-26 221.10 57110 060503196 02-12 119.70
- 56884 061308451 02-26 253.60 NO ITEM # 051204273 02-22 273.80
- SHAR_EOF
- # End of shell archive
- exit 0
-