home *** CD-ROM | disk | FTP | other *** search
-
- --reserve_word_hash_pkg_.ada
- --::::::::::
- -------- SIMTEL20 Ada Software Repository Prologue ------------
- -- -*
- -- Unit name : Ada Reserved Word Hash
- -- Version : 1.0
- -- Authors : Mars J. Gralia
- -- : Jerry L. Kashtan
- -- : The Johns Hopkins Universtity
- -- : Laurel, Maryland 20707
- -- DDN Address : GRALIA @ APLVAX
- -- Copyright : (c) 1986 Mars J. Gralia and Jerry L. Kashtan
- -- Date created : November 16, 1985
- -- Release date : January 29, 1986
- -- Last update : January 29, 1986
- -- Machine/System Compiled/Run on : DEC 8600 Cluster, DEC Ada v1.0-7
- -- -*
- ---------------------------------------------------------------
- -- -*
- -- Keywords : ADA RESERVED WORDS
- ----------------: PARSER,TOKEN,TOKENIZER,SYNTAX,HASH
- --
- -- Abstract : This package contains the single function
- ----------------: "is_Ada_reserved_word". It returns with
- ----------------: either a "true" or "false" to the statement
- ----------------: "the input character string is a reserved
- ----------------: word in the Ada language."
- ----------------: The contribution of the function is that it
- ----------------: executes very quickly. It is an implemen-
- ----------------: tation of the algorithm defined by David
- ----------------: Wolverton in "A Perfect Hash Function for
- ----------------: Ada Reserved Words", as published in
- ----------------: Ada Letters, July-August 1984.
- -- -*
- ------------------ Revision history ---------------------------
- -- -*
- -- DATE VERSION AUTHOR HISTORY
- -- 1/29/86 1.0 Gralia & Kashtan Initial Release
- -- -*
- ------------------ Distribution and Copyright -----------------
- -- -*
- -- This prologue must be included in all copies of this software.
- --
- -- This software is copyright by the authors.
- --
- -- This software is released to the Ada community.
- --
- -- Restrictions on use or distribution: NONE
- -- -*
- ------------------ Disclaimer ---------------------------------
- -- -*
- -- This software and its documentation are provided "AS IS" and
- -- without any expressed or implied warranties whatsoever.
- -- No warranties as to performance, merchantability, or fitness
- -- for a particular purpose exist.
- --
- -- Because of the diversity of conditions and hardware under
- -- which this software may be used, no warranty of fitness for
- -- a particular purpose is offered. The user is advised to
- -- test the software thoroughly before relying on it. The user
- -- must assume the entire risk and liability of using this
- -- software.
- --
- -- In no event shall any person or organization of people be
- -- held responsible for any direct, indirect, consequential
- -- or inconsequential damages or lost profits.
- -- -*
- -------------------END-PROLOGUE--------------------------------
-
- --------------------------------------------------------------------------------
- -- R E S E R V E _ W O R D _ H A S H _ P K G --
- -- --
- -- P A C K A G E S P E C I F I C A T I O N --
- --------------------------------------------------------------------------------
-
-
- PACKAGE reserve_word_hash_pkg IS
-
- --| OVERVIEW
- --| This package contains only one subunit, a function named
- --| "is_Ada_reserved_word". It returns with either "true" or "false",
- --| to the statement "the input character string is an Ada reserved word".
- --|
- --| The contribution of the function is that it executes very quickly.
- --| It is an implementation of the algorithm defined by David Alan
- --| Wolverton in "A Perfect Hash Function for Ada Reserved Words",
- --| as published in the ACM's "Ada Letters", vol IV, number 1,
- --| (40-44), July-August 1984.
- --|
- --| This was written by Mars Gralia and Jerry Kashtan, JHU, 1/24/86.
- --|
-
-
- SUBTYPE id_type IS STRING;
- SUBTYPE id_length_type IS INTEGER;
- --| NOTE
- --| The size is unlimited to simplify caller code. A range
- --| of 2..9 is checked inside the package. If the length is
- --| outside this range, then it is clearly FALSE that this
- --| "is_Ada_reserved_word".
-
- FUNCTION is_Ada_reserved_word
- (
- input_id : IN id_type; -- Possible Ada reserved word
- id_length : IN id_length_type -- Logical length of "input_id"
- )
- RETURN BOOLEAN;
-
- --| OVERVIEW
- --| The function "is_Ada_reserved_word" has two inputs, a (fixed length)
- --| character string (input_id) and an integer (id_length).
- --| The character string normally contains either an Ada reserved
- --| word (LRM 2.9) or some other Ada identifier (LRM 2.3).
- --| The function returns either a "true" or "false" to the statement
- --| "input_id(1..id_length) is an Ada reserved word".
- --|
- --| The contribution of the function is that it executes very quickly.
- --| It is an implementation of the algorithm defined by David Alan
- --| Wolverton in "A Perfect Hash Function for Ada Reserved Words",
- --| as published in the ACM's "Ada Letters", vol IV, number 1,
- --| (40-44), July-August 1984.
- --|
- --| It was written by Mars Gralia and Jerry Kashtan of The Johns
- --| Hopkins University on 1/24/86. The system was DEC Ada v1.0-7.
-
- --| EFFECTS
- --| See description above in "Overview".
-
- --| REQUIRES
- --| The input string "input_id" is to be tested for the existence
- --| of an Ada reserved word. It is typically an identifier, as obtained
- --| by a lexical analyzer processing an Ada program or PDL.
- --| The input integer "id_length" specifies how much of the "input_id"
- --| string is to be considered as a possible reserved word. That is,
- --| only the slice input_id(1..id_length) is considered as a possible
- --| reserved word.
-
- --| RAISES
- --| No exceptions are normally raised by the package. Any exceptions
- --| which *do* occur are due to a design, coding or compiler bug.
-
- --| MODIFIES
- --| This package references no global data and therefore modifies none.
-
- --| ERRORS
- --| No errors are logically possible; the input string is either a
- --| reserved word or it is not.
-
-
-
- END reserve_word_hash_pkg;
- --::::::::::
- --reserve_word_hash_pkg.ada
- --::::::::::
- --------------------------------------------------------------------------------
- -- R E S E R V E _ W O R D _ H A S H _ P K G --
- -- --
- -- P A C K A G E B O D Y --
- --------------------------------------------------------------------------------
-
- -- This was written by Mars Gralia and Jerry Kashtan
- -- of The Johns Hopkins University on 1/24/86.
-
-
-
- PACKAGE BODY reserve_word_hash_pkg IS
-
- -- An aside: I had wanted to name this "reserveD_word_hash_pkg",
- -- but that caused an internal error in DEC Ada v1.0-7!
-
-
- SUBTYPE hash_component_type IS INTEGER RANGE -38 .. +56;
- --| NOTE
- --| The range here is the min and max of the individual components of
- --| the hashing function. Reference: "procedure calc_hash_vector",
- --| table "char_value_array".
-
- SUBTYPE hash_type IS INTEGER RANGE -76 .. +171;
- --| NOTE
- --| The range here is the min and max values the hashing function
- --| can produce. It is calculated as follows:
- --| -38 <= xlate_first <= +56
- --| -38-38 <= xlate_first+xlate_last <= +56+56
- --| -76-0 <= xlate_first+xlate_last+2*second_to_last <= +112+(2*25)
- --| -76+2 <= xlate_first+xlate_last+2*second_to_last+length <= +162+9
-
- xlate_first,
- xlate_last,
- second_to_last : hash_component_type;
- hash_value : hash_type;
-
-
-
- FUNCTION is_Ada_reserved_word
- (
- input_id : IN id_type;
- id_length : IN id_length_type
- ) RETURN BOOLEAN IS
-
- identifier : id_type (1..id_length); -- Lower-case version of "input_id"
- --| NOTE
- --| This is fixed to be that of the suspected reserved word.
- --| We therefore don't need to specify its length explicitly. That
- --| is, henceforth "id_length" is effectively a "global variable".
-
- PRAGMA PAGE;
-
-
-
- --| NOTE
- --| Using "PRAGMA INLINE" just might help thing execute faster.
- --| The DEC v1.0-7 compiler refused to allow the following two
- --| routines to be "inline", so I removed them:
- --| PROCEDURE calc_hash_vector
- --| FUNCTION identifier_matches_hash
-
-
- FUNCTION is_alphabetic (c : CHARACTER) RETURN BOOLEAN;
-
- FUNCTION convert_to_lowercase (c : CHARACTER) RETURN CHARACTER;
-
-
-
- PRAGMA INLINE (
- is_alphabetic,
- convert_to_lowercase
- );
-
- PRAGMA PAGE;
-
-
-
- FUNCTION is_alphabetic (c : CHARACTER) RETURN BOOLEAN IS
- BEGIN -- is_alphabetic
- IF (c IN 'a' .. 'z')
- OR ELSE (c in 'A' .. 'Z') THEN
- RETURN TRUE;
- ELSE
- RETURN FALSE;
- END IF;
- END is_alphabetic;
-
-
- FUNCTION convert_to_lowercase (c : CHARACTER) RETURN CHARACTER IS
- conversion_factor : CONSTANT INTEGER
- := CHARACTER'POS('a') - CHARACTER'POS('A');
- BEGIN -- convert_to_lowercase
- IF c IN 'a' .. 'z' THEN
- RETURN c;
- ELSE
- RETURN CHARACTER'VAL ((CHARACTER'POS(c)) + conversion_factor);
- END IF;
- END convert_to_lowercase;
-
-
- PROCEDURE calc_hash_vector
- (
- token : IN id_type;
- xlate_first,
- xlate_last,
- second_to_last : OUT hash_component_type
- ) IS
- --| OVERVIEW
- --| Given a string, compute the components of its hash function.
-
- SUBTYPE lc_chars IS CHARACTER RANGE 'a' .. 'z';
- char_value_array : CONSTANT ARRAY (lc_chars) OF hash_component_type
- --| NOTE
- --| This is called "XLATE array" in the paper.
- :=
-
- ('a' => 0, 'b' => 49, 'c' => 0, 'd' => -7, 'e' => -20,
- 'f' => 18, 'g' => -2, 'h' => -38, 'i' => 33, 'j' => 0,
- 'k' => -9, 'l' => 9, 'm' => 29, 'n' => -9, 'o' => 6,
- 'p' => 26, 'q' => 0, 'r' => 8, 's' => 1, 't' => 1,
- 'u' => -9, 'v' => 0, 'w' => 56, 'x' => -28, 'y' => 11,
- 'z' => 0);
-
- BEGIN -- calc_hash_vector procedure
-
- xlate_first := char_value_array (token(1));
- xlate_last := char_value_array (token (token'LENGTH));
- second_to_last := CHARACTER'POS(token(token'LENGTH - 1)) -
- CHARACTER'POS('a');
-
- END calc_hash_vector;
-
-
- FUNCTION identifier_matches_hash
- (
- hash_value : IN hash_type;
- identifier : IN id_type;
- id_length : IN id_length_type
- ) RETURN BOOLEAN IS
-
- --| OVERVIEW
- --| We know the hash_value for our string, and its (probably) the
- --| same as the hash value for the reserved word. This routine
- --| answers true/false to "the indicated hash_value/identifier
- --| correspond to the proper Ada reserved word".
-
- --| NOTE
- --| At one time, there was some concern that this function might
- --| return the improper value. That can't happen, according to
- --| our analysis. The reason, in general, is that the surrounding
- --| algorithm and the constraints prohibit it. In particular:
- --| a) The body of the "is_Ada_reserved_word" function will continue
- --| to process only strings which contain "is_alphabetic" characters.
- --| b) If the first letter of a proposed reserved word were a blank,
- --| then "calc_hash_vector" would give a constraint error since the
- --| index to its table must be a "lower case character" in the
- --| range 'a' .. 'z'.
-
- hash_table : CONSTANT ARRAY (0..71) OF STRING (1..9) :=
-
- (0 => "else ", 1 => "exit ", 2 => "end ",
- 3 => "at ", 4 => "then ", 5 => "range ",
- 6 => "abs ", 7 => "do ", 8 => "exception",
- 9 => "delay ", 10 => "use ", 11 => "xor ",
- 12 => "select ", 13 => " ", 14 => "declare ",
- 15 => "type ", 16 => "array ", 17 => "limited ",
- 18 => "subtype ", 19 => "elsif ", 20 => "case ",
- 21 => "generic ", 22 => "and ", 23 => "not ",
- 24 => "renames ", 25 => "package ", 26 => "null ",
- 27 => "separate ", 28 => "terminate", 29 => "raise ",
- 30 => "entry ", 31 => "reverse ", 32 => "task ",
- 33 => " ", 34 => "all ", 35 => "constant ",
- 36 => "delta ", 37 => "accept ", 38 => "digits ",
- 39 => "return ", 40 => "abort ", 41 => "record ",
- 42 => "in ", 43 => "access ", 44 => "or ",
- 45 => "function ", 46 => "goto ", 47 => "others ",
- 48 => "rem ", 49 => "procedure", 50 => "out ",
- 51 => "private ", 52 => "is ", 53 => "mod ",
- 54 => "of ", 55 => " ", 56 => "pragma ",
- 57 => "for ", 58 => "new ", 59 => "when ",
- 60 => "with ", 61 => "begin ", 62 => " ",
- 63 => "while ", 64 => " ", 65 => " ",
- 66 => " ", 67 => "loop ", 68 => " ",
- 69 => "if ", 70 => "body ", 71 => " ");
-
- table_value : STRING (1..9);
-
- BEGIN -- identifier_matches_hash function
-
- IF hash_value NOT IN hash_table'RANGE THEN
- RETURN FALSE;
- END IF;
-
- table_value := hash_table (hash_value);
-
- IF table_value (1..id_length) = identifier (1..id_length) THEN
- RETURN (TRUE);
- ELSE
- RETURN (FALSE);
- END IF;
-
- END identifier_matches_hash;
-
-
-
- BEGIN -- is_Ada_reserved_word
- -- (main procedure of package RESERVE_WORD_HASH_PKG)
-
- -- Ada reserved words are 2..9 characters in length. All other tokens --|
- -- are not Ada reserved words. --|
- IF ( (id_length > 9) OR ELSE (id_length < 2) ) THEN
- --| NOTE
- --| "or else" because that usually generates faster code on
- --| compilers with simple optimizers.
- RETURN (FALSE);
- END IF;
-
- -- See if input characters legal and, if so, convert to lower case --|
- FOR j in 1 .. id_length LOOP
-
- IF NOT is_alphabetic(input_id(j)) THEN
- RETURN FALSE; -- Reserved words have alphabetic characters only --|
- END IF;
-
- identifier(j) := convert_to_lowercase(input_id(j));
-
- END LOOP;
-
-
- -- Compute the hash function value. --|
-
- -- Translate token characters into numeric values for the
- -- hash function computation.
- calc_hash_vector (identifier, xlate_first, xlate_last, second_to_last);
-
- hash_value := (xlate_first + xlate_last + (2 * second_to_last)
- + id_length);
-
- -- Look up the reserved word value in the hash table to see --|
- -- if a reserved word was found. Report the status to the --|
- -- calling program and then exit this function. --|
-
- RETURN ( identifier_matches_hash (hash_value, identifier, id_length) );
-
-
- END is_Ada_reserved_word;
-
- END reserve_word_hash_pkg;
- --::::::::::
- --read_me.test
- --::::::::::
- The compilation order for the useful package is:
-
- reserve_word_hash_pkg_.ada -- The package specifications
- reserve_word_hash_pkg.ada -- The package body
-
-
-
- To run the demonstration tests, the compilation order is:
-
- reserve_word_hash_pkg_.ada -- The package specifications
- reserve_word_hash_pkg.ada -- The package body
- driver.ada -- The test driver.
-
- There are three files which are used by the supplied test driver. They
- are:
-
- test_go.txt -- Contains Ada reserved words.
- test_nogo.txt -- Contains identifiers which are NOT Ada reserved words
- test_mixed.txt -- A mixture of reserved words and just plain junk.
-
- Note the supplied test driver will never send the is_ada_reserved_word
- function a string which contains a blank. (This is due to the way
- the driver parses the input stream.)
-
- These programs were developed on DEC Ada, v1.0-7. The machines used were
- 780, 750 and 8600; several were clustered. No other compiler was used
- as a cross-check.
-
-
- MJG
- 1/29/86.
- --::::::::::
- --driver.ada
- --::::::::::
- -- This driver routine tests the RESERVE_WORD_HASH_PKG.
-
- -- The approach is to pass sample words through it; the words come
- -- from disk files.
-
- -- This is the result of a special Johns Hopkins University student project.
-
- -- The design of the implemented algorithm is given in package
- -- RESERVE_WORD_HASH_PKG. The package specification provides a function
- -- called IS_ADA_RESERVED_WORD which takes the token (which is an
- -- unconstrained string variable), the
- -- length of the token (in token characters) as input, and returns a
- -- boolean TRUE or FALSE depending if the token hashes properly into the
- -- reserved word hash table. The package specification also makes a few types
- -- available for use. All others are placed in the package body so
- -- they are not readily accessable.
- --
- --
- -- This program was written by Jerry Kashtan on 11/16/85.
- -- It was modified by Mars Gralia on 1/24/86: modularized and blank tests.
- -- Modified by Mars Gralia on 1/29/86: this header block.
- -- Modified by Mars Gralia on 1/29/86: changed input to text_io.
-
-
-
- WITH RESERVE_WORD_HASH_PKG, TEXT_IO;
- USE TEXT_IO;
-
- PROCEDURE driver IS
-
-
- PACKAGE INT_IO IS NEW TEXT_IO.INTEGER_IO (INTEGER);
- USE INT_IO;
-
- TYPE input_data_file_types IS (Ada_reserved_words,
- non_Ada_reserved_words, mixed_words);
-
- data_file : TEXT_IO.FILE_TYPE;
-
- max_line_length : CONSTANT INTEGER := 80;
- max_page_size : CONSTANT INTEGER := 50;
-
- got_length : INTEGER RANGE 1..max_line_length;
- pos : INTEGER RANGE 1..max_line_length;
- line_count : INTEGER RANGE 1..max_page_size;
- proposed_reserved_word : reserve_word_hash_pkg.id_type(1..max_line_length);
- proposed_res_word_length : reserve_word_hash_pkg.id_length_type;
-
- PROCEDURE write_banner IS
-
- BEGIN -- write_banner procedure
- PUT_LINE ("KEYWORD SEARCH PROGRAM USING PERFECT HASH FUNCTION ALGORITHM");
- PUT_LINE (" PROGRAM ALGORITHM ADOPTED FROM ADA LETTERS JOURNAL");
- PUT_LINE (" ");
- PUT_LINE (" Program Output");
- PUT_LINE (" ");
-
- END write_banner;
-
-
- PROCEDURE special_new_line
- (
- line_count : IN OUT INTEGER
- ) IS
- BEGIN -- special_new_line
- PUT_LINE (" ");
- line_count := line_count + 1;
-
- IF line_count >= max_page_size THEN
- line_count := 1;
- NEW_PAGE;
- write_banner;
- END IF;
- END special_new_line;
-
-
- PROCEDURE clear_token_string IS
- -- Clear the token string variable by loading blanks.
- BEGIN -- clear_token_string
- FOR i IN 1..max_line_length LOOP
- proposed_reserved_word (i) := ' ';
- END LOOP;
- END clear_token_string;
-
-
- PROCEDURE test_one
- (
- proposed_reserved_word : IN reserve_word_hash_pkg.id_type;
- proposed_res_word_length : IN reserve_word_hash_pkg.id_length_type;
- line_count : IN OUT INTEGER
- ) IS
-
- BEGIN -- test_one
- PUT (" token => ");
- PUT (proposed_reserved_word(1..proposed_res_word_length));
- SET_COL (35);
-
- IF reserve_word_hash_pkg.is_Ada_reserved_word
- (proposed_reserved_word,
- proposed_res_word_length) THEN
- PUT (" is an Ada reserved word");
- ELSE
- PUT (" is not an Ada reserved word");
- END IF;
-
- special_new_line (line_count);
-
- END test_one;
-
- BEGIN -- driver procedure
-
- FOR current_file IN input_data_file_types LOOP
-
- CASE current_file IS
- WHEN Ada_reserved_words =>
- TEXT_IO.OPEN (data_file, IN_FILE, "test_go.txt");
- TEXT_IO.SET_INPUT (data_file);
- WHEN non_Ada_reserved_words =>
- TEXT_IO.OPEN (data_file, IN_FILE, "test_nogo.txt");
- TEXT_IO.SET_INPUT (data_file);
- WHEN mixed_words =>
- TEXT_IO.OPEN (data_file, IN_FILE, "test_mixed.txt");
- TEXT_IO.SET_INPUT (data_file);
- END CASE;
-
- NEW_PAGE;
- write_banner;
- line_count := 1;
-
- WHILE NOT END_OF_FILE LOOP
-
- clear_token_string;
-
- TEXT_IO.GET_LINE (proposed_reserved_word, got_length);
-
- -- Search token string variable for first blank character.
- -- The blank character will signal the end of the meaningful
- -- token substring.
- IF (got_length > 0) THEN
- pos := 1;
- WHILE proposed_reserved_word(pos) /= ' ' LOOP
- exit when pos = max_line_length;
- pos := pos + 1;
- END LOOP;
- IF (pos <= max_line_length) THEN
- pos := pos - 1;
-
- proposed_res_word_length := pos;
-
- test_one (
- proposed_reserved_word(1..proposed_res_word_length),
- proposed_res_word_length,
- line_count);
- END IF;
- END IF;
-
- END LOOP;
-
- TEXT_IO.CLOSE (data_file);
-
- END LOOP; -- Go back and open another data file.
-
- -- Try some strings of blanks
- clear_token_string;
- FOR j IN 0..max_line_length LOOP
- IF (reserve_word_hash_pkg.is_Ada_reserved_word
- (proposed_reserved_word, j ) )THEN
- PUT (" ERROR: string of");
- PUT (j);
- PUT (" blanks is a reserved word!");
- PUT (" ");
- special_new_line (line_count);
- END IF;
- END LOOP;
-
-
-
- END DRIVER;
-
- --::::::::::
- --test_go.txt
- --::::::::::
- ABORT
- ABS
- ACCEPT
- access
- all
- and
- ArRaY
- At
- BEGin
- BodY
- casE
- CONSTANt
- declare
- delay
- delta
- digits
- do
- else
- elsif
- end
- ENTRY
- EXCEPTION
- EXIT
- FOR
- FUNCTION
- GENERIC
- GOTO
- IF
- IN
- IS
- LIMITED
- loop
- mod
- new
- not
- null
- of
- or
- others
- Out
- Package
- Pragma
- Private
- Procedure
- Raise
- Range
- Record
- Rem
- renameS
- returN
- reVERse
- select
- SEPARATE
- SUBTYPE
- TASK
- TERMINATE
- THEN
- TYPE
- USE
- when
- while
- with
- xor
- --::::::::::
- --test_nogo.txt
- --::::::::::
- Jerry
- Kashtan
- new_values
- X01_1
- Ada__keyword
- Ada_
- Identifier_1234_5678_90
- T
- z
- off
- often
- last_word_in_this_file
- --::::::::::
- --test_mixed.txt
- --::::::::::
- ABORT
- Jerry
- Kashtan
- Procedure
- FIND_identifier
- OR
- Private
- NeXt_ScReEn
- a3b
- a.b
- a
- 3
- a-b
- package
- pa..age
- ------
-
-