home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: comp.sources.misc
- From: goer@midway.uchicago.edu (Richard L. Goerwitz)
- Subject: v38i049: ibpag2 - Icon-Based Parser Generator, Part05/05
- Message-ID: <1993Jul13.044510.17324@sparky.sterling.com>
- X-Md4-Signature: 0f43d1c51d8d78bfad1938f6ba3bfd09
- Sender: kent@sparky.sterling.com (Kent Landfield)
- Organization: University of Chicago
- Date: Tue, 13 Jul 1993 04:45:10 GMT
- Approved: kent@sparky.sterling.com
-
- Submitted-by: goer@midway.uchicago.edu (Richard L. Goerwitz)
- Posting-number: Volume 38, Issue 49
- Archive-name: ibpag2/part05
- Environment: Icon
-
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then feed it
- # into a shell via "sh file" or similar. To overwrite existing files,
- # type "sh file -c".
- # Contents: Makefile.dist beta2ref.ibp ibwriter.icn iohno.icn
- # outbits.icn sample.ibp shrnktbl.icn slshupto.icn
- # Wrapped by kent@sparky on Sun Jul 11 18:51:51 1993
- PATH=/bin:/usr/bin:/usr/ucb:/usr/local/bin:/usr/lbin ; export PATH
- echo If this archive is complete, you will see the following message:
- echo ' "shar: End of archive 5 (of 5)."'
- if test -f 'Makefile.dist' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'Makefile.dist'\"
- else
- echo shar: Extracting \"'Makefile.dist'\" \(2833 characters\)
- sed "s/^X//" >'Makefile.dist' <<'END_OF_FILE'
- X##########################################################################
- X#
- X PROGNAME = ibpag2
- X#
- X##########################################################################
- X#
- X# User-modifiable section. Read carefully! You will almost
- X# certainly have to change some settings here.
- X#
- X
- X#
- X# Destination directory for binaries files. Owner and group for
- X# public executables. Leave the trailing slash off of directory
- X# names.
- X#
- XOWNER = richard # root
- XGROUP = group # root
- XDESTDIR = /usr/local/bin
- X# Put this path into your LPATH variable (on which, see the Icon
- X# documentation). Make sure that the directory exists.
- XLIBDIR = /usr/local/lib/icon/data
- X
- X#
- X# Name of your icon compiler and compiler flags.
- X#
- XICONC = /usr/icon/v8/bin/icont
- XIFLAGS = -u #-Sc 400 -Sg 400 -Si 2000 -Sn 4000 -SF 40
- X
- XSHELL = /bin/sh
- XSHAR = /usr/local/bin/shar
- XCOMPRESS = /usr/bin/compress
- X# COMPRESS = /usr/local/bin/gzip
- X
- X###########################################################################
- X#
- X# Don't change anything below this line unless you're really sure of
- X# what you're doing.
- X#
- X
- XAUX = slshupto.icn rewrap.icn outbits.icn
- XSRC = $(PROGNAME).icn $(AUX) slrtbls.icn slritems.icn follow.icn \
- X ibutil.icn iohno.icn ibreader.icn ibtokens.icn ibwriter.icn \
- X shrnktbl.icn version.icn
- XPARSER = iiparse.lib
- XGLRPARSER = iiglrpar.lib
- XSHARFILES = $(SRC) $(PARSER) $(GLRPARSER) sample.ibp beta2ref.ibp \
- X iacc.ibp Makefile.dist README
- X
- Xall: $(PROGNAME)
- X
- X$(PROGNAME): $(SRC)
- X $(ICONC) $(IFLAGS) -o $(PROGNAME) $(SRC)
- X
- X
- X##########################################################################
- X#
- X# Pseudo-target names (shar, install, clean, clobber)
- X#
- X
- X#
- X# Assumes you have a shar program like mine.
- X#
- Xshar: $(SHARFILES)
- X @echo ""
- X @echo "Removing any old shars in this directory."
- X @echo ""
- X -rm -f $(PROGNAME).[0-9][0-9].Z
- X @echo ""
- X $(SHAR) -fVc -o$(PROGNAME) -L32 $(SHARFILES)
- X $(COMPRESS) -f $(PROGNAME).[0-9][0-9]
- X @echo ""
- X @echo "Shell archive finished."
- X @echo ""
- X
- X# Pessimistic assumptions regarding the environment (in particular,
- X# I don't assume you have the BSD "install" shell script).
- Xinstall: all
- X @echo ""
- X -test -d $(DESTDIR) || mkdir $(DESTDIR) && chmod 755 $(DESTDIR)
- X cp $(PROGNAME) $(DESTDIR)/$(PROGNAME)
- X -chgrp $(GROUP) $(DESTDIR)/$(PROGNAME)
- X -chown $(OWNER) $(DESTDIR)/$(PROGNAME)
- X -chmod 755 $(DESTDIR)/$(PROGNAME)
- X -test -d $(LIBDIR) || mkdir $(LIBDIR) && chmod 755 $(LIBDIR)
- X cp $(PARSER) $(LIBDIR)/$(PARSER)
- X cp $(GLRPARSER) $(LIBDIR)/$(GLRPARSER)
- X -chgrp $(GROUP) $(LIBDIR)/$(PARSER)
- X -chown $(OWNER) $(LIBDIR)/$(PARSER)
- X -chgrp $(GROUP) $(LIBDIR)/$(GLRPARSER)
- X -chown $(OWNER) $(LIBDIR)/$(GLRPARSER)
- X -chmod 644 $(LIBDIR)/$(PARSER)
- X -chmod 644 $(LIBDIR)/$(GLRPARSER)
- X @echo ""
- X @echo "Done installing."
- X @echo ""
- X
- X#
- X# Cleanup
- X#
- Xclean:
- X -rm -f *~ #*# core *.u[12] $(PROGNAME).output
- Xclobber: clean
- X -rm $(PROGNAME)
- END_OF_FILE
- if test 2833 -ne `wc -c <'Makefile.dist'`; then
- echo shar: \"'Makefile.dist'\" unpacked with wrong size!
- fi
- # end of 'Makefile.dist'
- fi
- if test -f 'beta2ref.ibp' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'beta2ref.ibp'\"
- else
- echo shar: Extracting \"'beta2ref.ibp'\" \(2441 characters\)
- sed "s/^X//" >'beta2ref.ibp' <<'END_OF_FILE'
- X#
- X# Ibpag2 source file for OT betacode-to-English converter.
- X#
- X# "Betacode" is the name used for the markers that the Thesaurus
- X# Linguae Graecae uses to segment texts into works, books, chapters,
- X# verses, etc. The Michigan-Claremont scan of the Hebrew OT (BHS)
- X# uses a subset of the betacode "language." This file contains a
- X# parser for that language that converts it into human readable form.
- X#
- X# Reads the standard input. Sends the original text, with betacode
- X# markers converted to human-readable form, to the standard output.
- X#
- X
- X%{
- X
- X# These need to be global, because all of the actions modify them.
- X# Remember that the default scope for a variable used in an action is
- X# that action.
- X#
- Xglobal betavals, blev
- X
- X%}
- X
- X%token INTVAL, STRVAL, LINE
- X
- X%%
- X
- Xbetalines : betalines, betaline
- X | epsilon
- X ;
- X
- Xbetaline : '~', cvalue, xvalue, yvalue, '\n'
- X { if integer(betavals[2]) then {
- X write(betavals[1], " ",
- X betavals[2], ":",
- X betavals[3])
- X }
- X blev := 4 # global
- X }
- X | LINE, '\n' { write($1) }
- X ;
- X
- Xcvalue : 'a', value, 'b', value, 'c', value
- X { betavals[blev := 1] := $6 }
- X | 'c', value { betavals[blev := 1] := $2 }
- X | epsilon
- X ;
- X
- Xxvalue : 'x', value { betavals[blev := 2] := $2 }
- X | 'x' { if integer(betavals[2])
- X then betavals[blev := 2] +:= 1
- X else betavals[blev := 2] := 1
- X }
- X | epsilon { if blev < 2 then
- X betavals[2] := 1
- X }
- X ;
- X
- Xyvalue : 'y', value { betavals[blev := 3] := $2 }
- X | 'y' { betavals[blev := 3] +:= 1 }
- X | epsilon { if blev < 3 then
- X betavals[3] := 1
- X }
- X ;
- X
- Xvalue : INTVAL { return $1 }
- X | STRVAL { return $1 }
- X ;
- X
- X
- X%%
- X
- X
- Xprocedure iilex(infile)
- X
- X local line
- X # betavals is global
- X initial betavals := ["", 0, 0]
- X
- X while line := read(infile) do {
- X line ? {
- X if ="~" then {
- X suspend ord("~")
- X until pos(0) do {
- X case move(1) of {
- X "a" : suspend ord("a")
- X "b" : suspend ord("b")
- X "c" : suspend ord("c")
- X "x" : suspend ord("x")
- X "y" : suspend ord("y")
- X default : stop("betacode error: ", line)
- X }
- X if ="\"" then {
- X iilval := tab(find("\""))
- X suspend STRVAL
- X move(1)
- X } else {
- X if iilval := integer(tab(many(&digits)))
- X then suspend INTVAL
- X }
- X }
- X suspend ord("\n")
- X }
- X else {
- X iilval := line
- X suspend LINE
- X suspend ord("\n")
- X }
- X }
- X }
- X
- Xend
- X
- X
- Xprocedure main()
- X return iiparse(&input)
- Xend
- END_OF_FILE
- if test 2441 -ne `wc -c <'beta2ref.ibp'`; then
- echo shar: \"'beta2ref.ibp'\" unpacked with wrong size!
- fi
- # end of 'beta2ref.ibp'
- fi
- if test -f 'ibwriter.icn' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'ibwriter.icn'\"
- else
- echo shar: Extracting \"'ibwriter.icn'\" \(3586 characters\)
- sed "s/^X//" >'ibwriter.icn' <<'END_OF_FILE'
- X############################################################################
- X#
- X# Name: ibwriter.icn
- X#
- X# Title: Ibpag2 parser/library writer
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.5
- X#
- X############################################################################
- X#
- X# Given a grammar, an action table, a goto table, an open output
- X# file, an open iiparser file, and a module name, sends to the output
- X# file a fully loaded LR parser with run-time constructible action
- X# and goto tables. The iiparser file contains the base LR parser
- X# that the output file uses.
- X#
- X############################################################################
- X#
- X# Links: ibtokens, ximage
- X#
- X# See also: iiparse.icn
- X#
- X############################################################################
- X
- X#link ibtokens, ximage
- Xlink ximage
- X
- X# defined in ibtokens.icn
- X# record ib_TOK(sym, str)
- X
- Xprocedure ibwriter(iiparse_file, outfile, grammar, atbl, gtbl, module)
- X
- X local token, next_token, start_symbol, rule_list, ttbl
- X
- X /module := ""
- X start_symbol := grammar.start
- X rule_list := grammar.rules
- X ttbl := grammar.tbl
- X next_token := create ibtokens(iiparse_file)
- X
- X #
- X # Copy tokens in iiparse_file to outfile. Whenever we find a $
- X # (RHSARG), process: If we find $$, output $; If we find $module,
- X # output image(module); and other such stuff. Note that
- X # copy_iiparse_tokens suspends tokens before writing them. It
- X # also blocks writing of any token whose sym field matches the
- X # string given as arg 3.
- X #
- X every token := copy_iiparse_tokens(next_token, outfile, "RHSARG")
- X do {
- X if token.sym == "RHSARG" then {
- X if (token := @next_token).sym == "RHSARG" then {
- X writes(outfile, token.str)
- X next
- X }
- X token.sym == "IDENT" | iohno(60, "line "|| line_number)
- X writes(outfile, " ")
- X case token.str of {
- X # copy $module name over as a literal
- X "module" : writes(outfile, image(module))
- X # use ximage to copy over action, goto, and token tables,
- X # as well as the production list (used only for debugging)
- X "atbl_insertion_point": writes(outfile, ximage(atbl))
- X "gtbl_insertion_point": writes(outfile, ximage(gtbl))
- X "ttbl_insertion_point": writes(outfile, ximage(ttbl))
- X "rule_list_insertion_point" :
- X writes(outfile, ximage(rule_list))
- X # use image to copy the start symbol into the output file
- X "start_symbol_insertion_point" :
- X writes(outfile, image(start_symbol))
- X # add the module name to anything else beginning with $
- X default : writes(outfile, token.str, module, " ")
- X }
- X }
- X }
- X
- X return
- X
- Xend
- X
- X
- X#
- X# copy_iiparse_tokens: coexpression x file x string -> ib_TOK records
- X# (next_token, out, except) -> token records
- X#
- X# Copy Icon code to output stream, also suspending as we go.
- X# Insert separators between tokens where needed. Do not output
- X# any token whose sym field matches except. The point in
- X# suspending tokens as we go is to enable the calling procedure to
- X# look for signal tokens that indicate insertion or termination
- X# points. Fail on EOF.
- X#
- Xprocedure copy_iiparse_tokens(next_token, out, except)
- X
- X local separator, T
- X
- X separator := ""
- X while T := @next_token do {
- X if \T.sym then suspend T
- X if \T.sym == \except then next
- X if any(&digits ++ &letters ++ '_.', \T.str, 1, 2) & \T.sym ~== "DOT"
- X then writes(out, separator)
- X writes(out, T.str)
- X if any(&digits ++ &letters ++ '_.', \T.str, -1, 0) & \T.sym ~== "DOT"
- X then separator := " " else separator := ""
- X }
- X
- Xend
- END_OF_FILE
- if test 3586 -ne `wc -c <'ibwriter.icn'`; then
- echo shar: \"'ibwriter.icn'\" unpacked with wrong size!
- fi
- # end of 'ibwriter.icn'
- fi
- if test -f 'iohno.icn' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'iohno.icn'\"
- else
- echo shar: Extracting \"'iohno.icn'\" \(2908 characters\)
- sed "s/^X//" >'iohno.icn' <<'END_OF_FILE'
- X############################################################################
- X#
- X# Name: iohno.icn
- X#
- X# Title: iohno (error handler, with hard-coded messages)
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.20
- X#
- X############################################################################
- X#
- X# This file contains iohno(n, s) - an error handler taking two
- X# arguments: 1) an integer and 2) a string. The string (2) is an
- X# optional error message. The integer (1) is one of several
- X# hard-coded error numbers (see below).
- X#
- X############################################################################
- X#
- X# Links: rewrap
- X#
- X############################################################################
- X
- X#
- X# iohno: print error message s to stderr; abort with exit status n
- X#
- Xprocedure iohno(n, s)
- X
- X local i, msg
- X static errlist
- X initial {
- X errlist := [[100, "unspecified failure"],
- X
- X [2, "can't find iiparse.lib file"],
- X
- X [4, "unexpected EOF"],
- X [5, "unknown associativity value"],
- X
- X [11, "malformed right-hand side"],
- X [12, "unexpected RHS symbol type"],
- X
- X [21, "malformed left-hand side"],
- X
- X [30, "unknown or unimplemented % declaration"],
- X [31, "malformed token declaration"],
- X [32, "start symbol redefined"],
- X [33, "LHS symbol expected"],
- X [34, "colon missing"],
- X [35, "malformed RHS in rule declaration"],
- X [36, "undeclared character literal"],
- X [37, "illegal $integer reference"],
- X [38, "out-of-range $reference"],
- X [39, "unterminated brace { in action"],
- X [43, "bogus precedence"],
- X [44, "superfluous epsilon"],
- X [45, "superfluous %union declaration"],
- X [47, "empty or missing rules section"],
- X [48, "garbled declarations section"],
- X [49, "multiple characters within quotes"],
- X
- X [40, "same prec, different (or perhaps lacking) assoc"],
- X [41, "conflict between nonassociative rules"],
- X [42, "reduce -- reduce conflict"],
- X [46, "unresolvable shift/reduce conflict"],
- X
- X [50, "illegal conflict for nonassociative rules"],
- X [51, "reduce/reduce conflict"],
- X [52, "nonterminal useless and/or declared as a terminal"],
- X
- X [60, "malformed $insertion point in iiparse file"],
- X
- X [70, "bad action format"],
- X [71, "nonexistent rule number specified in old action"],
- X [72, "nonexistent rule number specified in new action"],
- X
- X [80, "conflict in goto table"],
- X
- X [90, "RHS nonterminal appears in no LHS"],
- X [91, "useless nonterminal"]
- X ]
- X }
- X
- X /n := 0
- X every i := 1 to *errlist do
- X if errlist[i][1] = n then msg := errlist[i][2]
- X writes(&errout, "error ", n, " (", msg, ")")
- X if \s then {
- X write(&errout, ": ")
- X every write(&errout, "\t", rewrap(s) | rewrap())
- X }
- X else write(&errout)
- X
- X exit(n)
- X
- Xend
- END_OF_FILE
- if test 2908 -ne `wc -c <'iohno.icn'`; then
- echo shar: \"'iohno.icn'\" unpacked with wrong size!
- fi
- # end of 'iohno.icn'
- fi
- if test -f 'outbits.icn' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'outbits.icn'\"
- else
- echo shar: Extracting \"'outbits.icn'\" \(3067 characters\)
- sed "s/^X//" >'outbits.icn' <<'END_OF_FILE'
- X############################################################################
- X#
- X# Name: outbits.icn
- X#
- X# Title: output variable-length characters in byte-size chunks
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.5
- X#
- X############################################################################
- X#
- X# In any number of instances (e.g. when outputting variable-length
- X# characters or fixed-length encoded strings), the programmer must
- X# fit variable and/or non-byte-sized blocks into standard 8-bit
- X# bytes. Outbits() performs this task.
- X#
- X# Pass to outbits(i, len) an integer i, and a length parameter (len),
- X# and outbits will suspend byte-sized chunks of i converted to
- X# characters (most significant bits first) until there is not enough
- X# left of i to fill up an 8-bit character. The remaining portion is
- X# stored in a buffer until outbits() is called again, at which point
- X# the buffer is combined with the new i and then output in the same
- X# manner as before. The buffer is flushed by calling outbits() with
- X# a null i argument. Note that len gives the number of bits there
- X# are in i (or at least the number of bits you want preserved; those
- X# that are discarded are the most significant ones).
- X#
- X# A trivial example of how outbits() might be used:
- X#
- X# outtext := open("some.file.name","w")
- X# l := [1,2,3,4]
- X# every writes(outtext, outbits(!l,3))
- X# writes(outtext, outbits(&null,3)) # flush buffer
- X#
- X# List l may be reconstructed with inbits() (see inbits.icn):
- X#
- X# intext := open("some.file.name")
- X# l := []
- X# while put(l, inbits(intext, 3))
- X#
- X# Note that outbits() is a generator, while inbits() is not.
- X#
- X############################################################################
- X#
- X# Links: none
- X# See also: inbits.icn
- X#
- X############################################################################
- X
- X
- Xprocedure outbits(i, len)
- X
- X local old_part, new_part, window, old_byte_mask
- X static old_i, old_len, byte_length, byte_mask
- X initial {
- X old_i := old_len := 0
- X byte_length := 8
- X byte_mask := (2^byte_length)-1
- X }
- X
- X old_byte_mask := (0 < 2^old_len - 1) | 0
- X window := byte_length - old_len
- X old_part := ishift(iand(old_i, old_byte_mask), window)
- X
- X # If we have a no-arg invocation, then flush buffer (old_i).
- X if /i then {
- X if old_len > 0 then {
- X old_i := old_len := 0
- X return char(old_part)
- X } else {
- X old_i := old_len := 0
- X fail
- X }
- X } else {
- X new_part := ishift(i, window-len)
- X len -:= (len >= window) | {
- X old_len +:= len
- X old_i := ior(ishift(old_part, len-window), i)
- X fail
- X }
- X# For debugging purposes.
- X# write("old_byte_mask = ", old_byte_mask)
- X# write("window = ", image(window))
- X# write("old_part = ", image(old_part))
- X# write("new_part = ", image(new_part))
- X# write("outputting ", image(ior(old_part, new_part)))
- X suspend char(ior(old_part, new_part))
- X }
- X
- X until len < byte_length do {
- X suspend char(iand(ishift(i, byte_length-len), byte_mask))
- X len -:= byte_length
- X }
- X
- X old_len := len
- X old_i := i
- X fail
- X
- Xend
- END_OF_FILE
- if test 3067 -ne `wc -c <'outbits.icn'`; then
- echo shar: \"'outbits.icn'\" unpacked with wrong size!
- fi
- # end of 'outbits.icn'
- fi
- if test -f 'sample.ibp' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'sample.ibp'\"
- else
- echo shar: Extracting \"'sample.ibp'\" \(2364 characters\)
- sed "s/^X//" >'sample.ibp' <<'END_OF_FILE'
- X#
- X# Sample Ibpag2 grammar file.
- X#
- X
- X#
- X# The code between %{ and %} gets copied directly. Note the Iconish
- X# comment syntax.
- X#
- X%{
- X
- X# Note: If IIDEBUG is defined in the output file, debugging messages
- X# about the stacks and actions get displayed.
- X#
- X$define IIDEBUG 1
- X
- X%}
- X
- X#
- X# Here we declare the tokens returned by the lexical analyzer.
- X# Precedences increase as we go on. Note how (unlike YACC), tokens
- X# are separated by commas. Note also how UMINUS is used only for its
- X# %prec later.
- X#
- X%token NUMBER
- X%left '+', '-'
- X%left '*', '/'
- X%right UMINUS
- X
- X%%
- X
- X#
- X# After this point, and up to the next %%, we have the grammar itself.
- X# By default, the start symbol is the left-hand side of the first
- X# rule.
- X#
- X
- Xlines : lines, expr, '\n' { write($2) }
- X | lines, '\n'
- X | epsilon # Note use of epsilon/error tokens.
- X | error, '\n' {
- X write("syntax error; try again:")
- X # like YACC's yyerrok macro
- X iierrok
- X }
- X ;
- X
- Xexpr : expr, '+', expr { return $1 + $3 }
- X | expr, '-', expr { return $1 - $3 }
- X | expr, '*', expr { return $1 * $3 }
- X | expr, '/', expr { return $1 / $3 }
- X | '(', expr, ')' { return $2 }
- X | '-', expr %prec UMINUS { return -$2 }
- X | NUMBER { return $1 }
- X ;
- X
- X%%
- X
- X#
- X# From here on, code gets copied directly to the output file. We are
- X# no longer in the grammar proper.
- X#
- X
- X#
- X# The lexical analyzer must be called iilex, with the module name
- X# appended (if there is one). It must take one argument, infile (an
- X# input stream). It must be a generator, and fail on EOF (not return
- X# something <= 0, as is the case for YACC + Lex). Iilval holds the
- X# literal string value of the token just suspended by iilex().
- X#
- Xprocedure iilex(infile)
- X
- X local nextchar, c, num
- X initial {
- X # Here's where you'd initialize any %{ globals %} declared
- X # above.
- X }
- X
- X nextchar := create !(!infile || "\n" || "\n")
- X
- X c := @nextchar | fail
- X repeat {
- X if any(&digits, c) then {
- X if not (\num ||:= c) then
- X num := c
- X } else {
- X if iilval := \num then {
- X suspend NUMBER
- X num := &null
- X }
- X if any('+-*/()\n', c) then {
- X iilval := c
- X suspend ord(c)
- X } else {
- X if not any(' \t', c) then {
- X # deliberate error - will be handled later
- X suspend &null
- X }
- X }
- X }
- X c := @nextchar | break
- X }
- X if iilval := \num then {
- X return NUMBER
- X num := &null
- X }
- X
- Xend
- X
- Xprocedure main()
- X return iiparse(&input, 1)
- Xend
- END_OF_FILE
- if test 2364 -ne `wc -c <'sample.ibp'`; then
- echo shar: \"'sample.ibp'\" unpacked with wrong size!
- fi
- # end of 'sample.ibp'
- fi
- if test -f 'shrnktbl.icn' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'shrnktbl.icn'\"
- else
- echo shar: Extracting \"'shrnktbl.icn'\" \(3387 characters\)
- sed "s/^X//" >'shrnktbl.icn' <<'END_OF_FILE'
- X############################################################################
- X#
- X# Name: shrnktbl.icn
- X#
- X# Title: table shrinker
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.4
- X#
- X############################################################################
- X#
- X# Action/goto table shrinking routine.
- X#
- X# Entry point: shrink_tables(start_symbol, st, atbl, gtbl), where
- X# start_symbol is the start symbol for the grammar whose productions
- X# are contained in the list/set st, and where atbl and gtbl are the
- X# action and goto tables, respectively. Returns &null, for lack of
- X# anything better.
- X#
- X# Basically, this routine merges duplicate structures in atbl and
- X# gtbl (if there are any), replaces the nonterminal symbols in the
- X# action table with integers (including the start symbol), then
- X# resets the goto table so that its keys point to these integers,
- X# instead of to the original nonterminal symbol strings.
- X#
- X############################################################################
- X#
- X# Links: structs, outbits
- X#
- X# See also: ibpag2, slrtbls
- X#
- X############################################################################
- X
- X# structs has equiv; outbits is for outputting variable-width integers
- X# as 8-bit characters
- X#
- X# link structs, outbits
- Xlink structs
- X
- X#
- X# shrink_tables
- X#
- Xprocedure shrink_tables(grammar, atbl, gtbl)
- X
- X local t, k, seen, nontermtbl, r, a, action, state, by_rule,
- X rule_len, LHS, keys
- X
- X # Create a table mapping nonterminal symbols to integers.
- X nontermtbl := table()
- X every r := !grammar.rules do
- X # r is a production; production records have LHS, RHS,...no
- X # fields, where the no field contains the rule number; we can
- X # use this as an arbitrary representation for that rule's LHS
- X # nonterminal
- X insert(nontermtbl, r.LHS, r.no)
- X
- X # Replace old start symbol.
- X grammar.start := nontermtbl[grammar.start]
- X
- X # Re-form the goto table to use the new integer values for
- X # nonterminals.
- X keys := set()
- X every insert(keys, key(gtbl))
- X every k := !keys do {
- X # first create a column for the new integer-valued nonterminal
- X insert(gtbl, string(nontermtbl[k]), gtbl[k])
- X # then clobber the old column with a string-valued nonterminal
- X gtbl[k] := &null
- X }
- X
- X # Rewrite actions using a fixed field-width format.
- X every t := !atbl do {
- X every k := key(t) do {
- X a := ""
- X t[k] ? {
- X while action := tab(any('sra')) do {
- X case action of {
- X "s": {
- X outbits(0, 2)
- X state := integer(tab(find(".")))
- X every a ||:= outbits(state, 11)
- X move(1)
- X by_rule := integer(tab(many(&digits)))
- X every a ||:= outbits(by_rule, 11)
- X outbits()
- X }
- X "r": {
- X outbits(1, 2)
- X state := integer(tab(find("<")))
- X every a ||:= outbits(state, 11)
- X move(1)
- X LHS := nontermtbl[tab(find(">"))]
- X every a ||:= outbits(LHS, 11)
- X move(1)
- X rule_len := integer(tab(many(&digits)))
- X every a ||:= outbits(rule_len, 8)
- X outbits()
- X }
- X "a": {
- X outbits(2, 2)
- X a ||:= outbits()
- X }
- X }
- X }
- X }
- X t[k] := a
- X }
- X }
- X
- X #
- X # Turn pointers to identical structures into pointers to the same
- X # structure.
- X #
- X seen := set()
- X every t := atbl | gtbl do {
- X every k := key(t) do {
- X if t[k] := equiv(t[k], !seen)
- X then next else insert(seen, t[k])
- X }
- X }
- X
- X # signal success
- X return &null
- X
- Xend
- END_OF_FILE
- if test 3387 -ne `wc -c <'shrnktbl.icn'`; then
- echo shar: \"'shrnktbl.icn'\" unpacked with wrong size!
- fi
- # end of 'shrnktbl.icn'
- fi
- if test -f 'slshupto.icn' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'slshupto.icn'\"
- else
- echo shar: Extracting \"'slshupto.icn'\" \(2247 characters\)
- sed "s/^X//" >'slshupto.icn' <<'END_OF_FILE'
- X############################################################################
- X#
- X# Name: slshupto.icn
- X#
- X# Title: slshupto (upto with backslash escaping)
- X#
- X# Author: Richard L. Goerwitz
- X#
- X# Version: 1.4
- X#
- X############################################################################
- X#
- X# Slshupto works just like upto, except that it ignores backslash
- X# escaped characters. I can't even begin to express how often I've
- X# run into problems applying Icon's string scanning facilities to
- X# to input that uses backslash escaping. Normally, I tokenize first,
- X# and then work with lists. With slshupto() I can now postpone or
- X# even eliminate the traditional tokenizing step, and let Icon's
- X# string scanning facilities to more of the work.
- X#
- X# If you're confused:
- X#
- X# Typically UNIX utilities (and probably others) use backslashes to
- X# "escape" (i.e. remove the special meaning of) metacharacters. For
- X# instance, UNIX shells normally accept "*" as a shorthand for "any
- X# series of zero or more characters. You can make the "*" a literal
- X# "*," with no special meaning, by prepending a backslash. The rou-
- X# tine slshupto() understands these backslashing conventions. You
- X# can use it to find the "*" and other special characters because it
- X# will ignore "escaped" characters.
- X#
- X############################################################################
- X#
- X# Links: none
- X#
- X# See also: slashbal.icn
- X#
- X############################################################################
- X
- X# for compatibility with the original name
- X#
- Xprocedure slashupto(c, s, i, j)
- X suspend slshupto(c, s, i, j)
- Xend
- X
- X#
- X# slshupto: cset x string x integer x integer -> integers
- X# (c, s, i, j) -> Is (a generator)
- X# where Is are the integer positions in s[i:j] before characters
- X# in c that is not preceded by a backslash escape
- X#
- Xprocedure slshupto(c, s, i, j)
- X
- X local c2
- X
- X if /s := &subject
- X then /i := &pos
- X else /i := 1
- X /j := *s + 1
- X
- X /c := &cset
- X c2 := '\\' ++ c
- X s[1:j] ? {
- X tab(i)
- X while tab(upto(c2)) do {
- X if ="\\" then {
- X move(1) | {
- X if find("\\", c)
- X then return &pos - 1
- X }
- X next
- X }
- X suspend .&pos
- X move(1)
- X }
- X }
- X
- Xend
- X
- END_OF_FILE
- if test 2247 -ne `wc -c <'slshupto.icn'`; then
- echo shar: \"'slshupto.icn'\" unpacked with wrong size!
- fi
- # end of 'slshupto.icn'
- fi
- echo shar: End of archive 5 \(of 5\).
- cp /dev/null ark5isdone
- MISSING=""
- for I in 1 2 3 4 5 ; do
- if test ! -f ark${I}isdone ; then
- MISSING="${MISSING} ${I}"
- fi
- done
- if test "${MISSING}" = "" ; then
- echo You have unpacked all 5 archives.
- rm -f ark[1-9]isdone
- else
- echo You still must unpack the following archives:
- echo " " ${MISSING}
- fi
- exit 0
- exit 0 # Just in case...
-