home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Collection of Hack-Phreak Scene Programs
/
cleanhpvac.zip
/
cleanhpvac
/
FUZZY.ZIP
/
PARSER.B
< prev
next >
Wrap
Text File
|
1986-11-30
|
27KB
|
753 lines
-------------------------------------------------------------------------------
-- --
-- Library Unit: Parser --
-- --
-- Author: Bradley L. Richards --
-- --
-- Version Date Notes . . . --
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --
-- 1.0 22 May 86 Initial Version --
-- 1.1 19 Jun 86 Lotsa revisions due to Prover design --
-- 2.0 20 Jun 86 Version number change only (for consistancy) --
-- 2.05 13 Jul 86 Split spec and body into separate files --
-- 2.1 21 Jul 86 Demonstration Version --
-- 2.2 28 Jul 86 Added parse_read. Initial operational version --
-- 3.0 10 Oct 86 Final thesis product --
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --
-- --
-- Library units used: Data_def, listing, token, unchecked_deallocation --
-- --
-- Description: This package parses an input program, and constructs an --
-- abstract syntax tree for the program. Procedures appear in --
-- alphabetical order. Start_parser and stop_parser are the --
-- initialization and clean up routines. Parse_file, parse_read, and --
-- parse_request are also externally visible routines. Release is --
-- the routine which deallocated ASTs when they are no longer needed. --
-- --
-------------------------------------------------------------------------------
-- --
-- Package Body --
-- --
-------------------------------------------------------------------------------
package body parser is
--
-- some forward definitions for our parsing routines
--
function parse_element return argument_ptr;
function parse_expression return AST_ptr;
function parse_head return AST_ptr;
function parse_list return p_list_ptr;
function parse_predicate return AST_ptr;
function parse_term return AST_ptr;
--
-- Parse_to_bracket -- Parse to the first unbalanced right bracket to
-- reach the end of the current string expression. Reaching an
-- unbalanced right paren or a period also suffices.
--
procedure parse_to_bracket is
bracket_level : natural := 0;
paren_level : natural := 0;
begin
loop
exit when current_token.is_a = period;
exit when current_token.is_a = end_of_file;
exit when paren_level = 0 and current_token.is_a = right_paren;
exit when bracket_level = 0 and current_token.is_a = right_bracket;
if current_token.is_a = left_paren then
paren_level := paren_level + 1;
elsif current_token.is_a = right_paren then
paren_level := paren_level - 1;
elsif current_token.is_a = left_bracket then
bracket_level := bracket_level + 1;
elsif current_token.is_a = right_bracket then
bracket_level := bracket_level - 1;
end if;
get_token;
end loop;
end parse_to_bracket;
--
-- Parse_to_paren -- Parse to the first unbalanced right paren, or to
-- a period. Skip embedded parentheses pairs.
--
procedure parse_to_paren is
paren_level : natural := 0;
begin
loop
exit when current_token.is_a = period;
exit when current_token.is_a = end_of_file;
exit when paren_level = 0 and current_token.is_a = right_paren;
if current_token.is_a = left_paren then
paren_level := paren_level + 1;
elsif current_token.is_a = right_paren then
paren_level := paren_level - 1;
end if;
get_token;
end loop;
end parse_to_paren;
procedure parse_to_period is
begin
loop
exit when current_token.is_a = period;
exit when current_token.is_a = end_of_file;
get_token;
end loop;
end parse_to_period;
--
-- Release -- Return memory to the system using UNCHECKED_DEALLOCATION.
-- These routines must be filled in for an efficient system
-- since the current Verdix Compiler does not include an
-- automatic garbage collector. Routines deallocate all
-- items within the passed structure UP TO the "stop" value.
--
procedure release( tree, stop : AST_ptr ) is
ptr : AST_ptr := tree;
begin
if ptr /= stop then
case ptr.node_type is
when implication =>
release(ptr.head, stop);
release(ptr.tail, stop);
when binary_operator =>
release(ptr.left_operand, stop);
release(ptr.right_operand, stop);
when unary_operator =>
release(ptr.operand, stop);
when resolution_marker =>
release(ptr.subgoals, stop);
when others => null;
end case;
free_AST(ptr);
end if;
end release;
procedure start_parser( input_file, output_file : in string ) is
begin
start_token(input_file, output_file);
end start_parser;
procedure stop_parser is
begin
stop_token;
end stop_parser;
-------------------------------------------------------------------------------
-- --
-- Parsing Routines --
-- (in alphabetical order) --
-- --
-------------------------------------------------------------------------------
--
-- A general convention honored by the parsing routines is that each will
-- leave current_token pointing to the token after whatever the routine just
-- parsed. In the case of an error where the routine got lost, it calls one
-- of parse_to_bracket, parse_to_paren, or parse_to_period to get the parser
-- back into known territory. Note that these are hierarchical routines in
-- a sense; parse_to_bracket will also stop when it finds an unbalanced
-- right parenthesis or a period, and parse_to_paren will stop when it sees
-- a period. The parsing routines are careful, in this case, not to parse
-- past the token that represents the termination condition for one of the
-- routines higher up in the call stack.
--
--
-- parse_arguments -- parses the argument list associated with a
-- predicate call.
--
function parse_arguments return argument_ptr is
arg, return_list : argument_ptr;
begin
--
-- on entry we know that the current token is a left parenthesis
--
get_token;
arg := parse_element;
return_list := arg; -- we'll return a pointer to the front of the list
loop
if current_token.is_a = comma then -- another argument coming
get_token;
elsif current_token.is_a = right_bracket then
error(pointer, "unbalanced right bracket");
get_token;
elsif current_token.is_a = right_paren then -- end of argument list
get_token;
exit;
elsif current_token.is_a = period then
error(pointer, "no terminating ')' for argument list");
exit;
else -- must be a syntax error
error(pointer,"missing argument separator; comma inserted");
get_token;
exit;
end if;
arg.next_arg := parse_element;
if arg.next_arg /= null then -- parse_element succeeded
arg := arg.next_arg;
end if;
end loop;
return return_list;
end parse_arguments;
function parse_clause return AST_ptr is
clause : AST_ptr := null;
begin
if current_token.is_a /= identifier
then
error(pointer,"invalid predicate name; clause ignored");
parse_to_period;
else
clause := new AST(implication);
clause.head := parse_head;
if current_token.is_a = period then -- just an assertion
clause.tail := new AST'(fuzzy_value,1.0);
elsif current_token.is_a = implication then -- we already knew that
get_token; -- so skip it...
clause.tail := parse_expression;
else
error(pointer,"':-' inserted");
clause.tail := parse_expression;
end if;
end if;
if current_token.is_a /= period then
if current_token.is_a = right_paren then
error(pointer,"unbalanced right parenthesis");
parse_to_period;
else
error(pointer,"unknown parsing error");
end if;
end if;
get_token;
return clause;
end parse_clause;
function parse_element return argument_ptr is
arg, old : argument_ptr := null;
duplicate, error_flg : boolean := false;
begin
case current_token.is_a is
when character_lit => arg := new argument(character_lit);
arg.char := current_token.char;
when float_num => arg := new argument(float_num);
arg.fp_num := current_token.fp_num;
when integer_num => arg := new argument(integer_num);
arg.int_num := current_token.int_num;
when left_bracket => arg := new argument(prolog_list);
arg.list := parse_list;
when identifier => arg := new argument(predicate);
arg.name := current_token.ident_name;
get_token;
if current_token.is_a = left_paren then
arg.p_arguments := parse_arguments;
end if;
when variable => arg := new argument(variable);
arg.v_name := current_token.var_name;
when underline => arg := new argument(variable);
arg.v_name := null; -- it's anonymous
when others => error(pointer,"illegal or missing element");
--
-- if the next token isn't an element
-- separator then we are totally lost,
-- so give up on this argument list.
--
if (current_token.is_a /= bar) and
(current_token.is_a /= comma) then
parse_to_paren;
end if;
error_flg := true;
end case;
if not error_flg then -- we're ok, and arg.is_a has a value
if (arg.is_a /= prolog_list) and (arg.is_a /= predicate) then
get_token;
end if;
end if;
return arg;
end parse_element;
function parse_expression return AST_ptr is
--
-- The basic parsing routines for expressions (which is to say the
-- entire right half of a clause) are "parse_expression" and "parse_term."
-- Basically, the parse grammar looks like this:
--
-- E --> E binary_op T | unary_op T | T |
-- E binary_op E | unary_op E
-- T --> ( E ) | predicate_name
--
--
-- Define a couple of characteristics of operators
--
type ary_ness is (unary, binary);
subtype precedence_value is integer range 0..255;
--
-- Define the relative precedence of operators. In keeping with
-- "Programming in Prolog" by Clocksin and Mellish, lower precedence
-- operators are executed FIRST. This is counter-intuitive, but seemed
-- better than conflicting with the definitions in such a popular book.
-- The values assigned to operators is the same as given in the book.
-- In the case of two operators of the same precedence, left-to-right
-- execution occurs. The left-side/right-side precedence referred to
-- in Clocksin & Mellish is NOT implemented.
--
precedence : array (operators) of precedence_value :=
(semicolon => 254, comma => 253,
bar => 252, hat => 251, rw_spy => 250,
rw_nospy => 250, rw_not => 60, rw_is => 40,
univ => 40, equal => 40, not_equal => 40,
less_than => 40, less_or_equal => 40, greater_or_equal => 40,
greater_than => 40, equality => 40, not_equality => 40,
minus => 31, plus => 31, slash => 21,
asterisk => 21, rw_mod => 11);
--
-- miscellaneous variables
--
left, right : AST_ptr;
operator : token_type := null_token; -- initialize so we can test it later
--
-- Ary -- returns unary or binary as the type of an operator
--
function ary( operator : operators ) return ary_ness is
begin
if operator in binary_operators then return binary;
elsif operator in unary_operators then return unary;
else error(pointer,"parser.expression.ary called with invalid operator");
return binary; -- have to return something
end if;
end ary;
--
-- Get_operator -- Since some operators are reserved words, where the
-- specific word is buried as "current_token.word" this
-- routine digs it out if necessary and returns a
-- consistant item of subtype operators.
--
function get_operator( thing : token_ptr ) return operators is
begin
if thing.is_a in operators then return thing.is_a;
elsif thing.is_a = reserved_word then
if (thing.word = rw_is) or else (thing.word = rw_mod) or else
(thing.word = rw_nospy) or else (thing.word = rw_not) or else
(thing.word = rw_spy) then return thing.word;
end if;
else
error(pointer,"parser.expression.get_operator called with invalid operator");
return comma; -- have to return something
end if;
end get_operator;
--
-- Not_operator -- We need to be able to tell if the current token is or
-- is not an operator. It happens that "not_operator" is
-- the version we need.
--
function not_operator( thing : token_ptr ) return boolean is
begin
if thing.is_a in operators then return false;
elsif thing.is_a = reserved_word then
if (thing.word = rw_is) or else (thing.word = rw_mod) or else
(thing.word = rw_nospy) or else (thing.word = rw_not) or else
(thing.word = rw_spy) then return false;
else return true;
end if;
else return true;
end if;
end not_operator;
--
-- Build_expr -- This is the recursive routine which actually does most of
-- the work. A single pass of this routine takes the
-- input operator and its operands and builds an AST node
-- which is returned as the new left operand.
--
procedure build_expr( pending : precedence_value; left : in out AST_ptr;
in_operator : operators; in_right : AST_ptr ) is
operator : operators := in_operator;
right : AST_ptr := in_right;
op2, next_op : operators;
right2 : AST_ptr;
begin
loop
if not_operator(current_token) then
--
-- At the end of this expression or subexpression?
--
if current_token.is_a = period or
current_token.is_a = right_paren then
if ary(operator) = unary then
left := new AST'(unary_operator, operator, right);
else
left := new AST'(binary_operator, operator, left,right);
end if;
else
--
-- Hmmm...at this point the current token should be an
-- operator of some sort. Since it isn't, give up.
--
error(pointer,"invalid operator");
parse_to_paren;
end if;
exit; -- one way or another, we're done.
else
next_op := get_operator(current_token);
--
-- If the next operator has a higher precedence, then execute
-- it LATER. This means that we go ahead and compress the
-- current operator and its operands into an AST node. But,
-- if the right-hand side is null, it is a unary operator
-- which must be evaluated before we'll even have an operand.
--
if (precedence(next_op) >= precedence(operator)) and
(right /= null) then
if ary(operator) = unary then
left := new AST'(unary_operator, operator, right);
else
left := new AST'(binary_operator, operator, left,right);
end if;
--
-- If the pending operator has a LOWER precedence then
-- it needs executed before the next operator. Hence we
-- exit to allow it to execute.
--
exit when precedence(next_op) >= pending;
--
-- still here, so shift in the next operator and operand
--
operator := next_op;
get_token;
if not_operator(current_token) then
right := parse_term;
elsif ary(get_operator(current_token)) = unary then
right := null; -- unary operator is legitimate operand
else
error(pointer,"illegal use of operator");
end if;
else
--
-- The next operator has to be executed before the current
-- one. Make a recursive call to take care of it.
--
op2 := next_op;
get_token;
if not_operator(current_token) then
right2 := parse_term;
elsif ary(get_operator(current_token)) = unary then
right2 := null; -- unary operator is legitimate operand
else
error(pointer,"illegal use of operator");
end if;
build_expr(precedence(operator), right, op2, right2);
end if;
end if;
end loop;
end build_expr;
begin -- parse_expression
if not_operator(current_token) then -- it must be the left-hand operand of
-- a binary operator
left := parse_term;
else
operator := get_operator(current_token);
if ary(operator) /= unary then
error(pointer,"missing left-hand operand for binary operator");
parse_to_paren;
end if;
end if;
if not_operator(current_token) then
--
-- A predicate name by itself is ok, so if the current token is a
-- period things are fine. If it's anything else (we already know
-- it's not a legal operator) we're lost.
--
if current_token.is_a /= period then
if operator = null_token then
error(pointer,"missing operator");
else
error(pointer,"missing operand");
end if;
parse_to_paren;
end if;
else
operator := get_operator(current_token);
get_token;
if not_operator(current_token) then
right := parse_term;
elsif ary(get_operator(current_token)) = unary then
right := null; -- unary operator is legitimate operand
else
error(pointer,"illegal use of operator");
end if;
build_expr(255, left, operator, right);
end if;
return left;
end parse_expression;
procedure parse_file( abstract_syntax_tree : out AST_ptr ) is
clause, FP_program : AST_ptr := null;
function first_node(node : AST_ptr) return AST_ptr is
temp_node : AST_ptr := node;
begin
if temp_node /= null
then
while temp_node.prev /= null loop
temp_node := temp_node.prev;
end loop;
end if;
return temp_node;
end first_node;
begin
get_token;
while current_token.is_a /= end_of_file loop
clause := parse_clause;
if clause /= null then
--
-- append new node to the last node in FP_program. Note that
-- FP_program points to the last clause parsed. After appending
-- the new node, then set the "next" pointer in the previous node.
--
if FP_program /= null then -- this is not the first node
FP_program.next := clause;
clause.prev := FP_program;
end if;
FP_program := clause;
end if;
end loop;
--
-- Point abstract_syntax_tree to the very first node in FP_program
--
abstract_syntax_tree := first_node(FP_program);
exception
when unexpected_end_of_file =>
error(pointer,"unexpected end of file");
abstract_syntax_tree := first_node(FP_program);
end parse_file;
--
-- Parse_head -- Parse the head of a clause. This is currently limited to
-- parsing a single predicate.
--
function parse_head return AST_ptr is
node : AST_ptr;
begin
node := parse_predicate;
return node;
end parse_head;
--
-- Parse_list -- Parse a Prolog list structure. Note that lists appear
-- only in argument lists. they can contain any of the elements which
-- can occur elsewhere in an argument list. The structure of a list is
-- similar to a LISP "cons" cell.
--
function parse_list return p_list_ptr is
has_tail, need_elt : boolean;
temp_elt : argument_ptr;
root, ptr, ptr2 : p_list_ptr;
begin
--
-- we know the current token is a left bracket
--
get_token;
need_elt := false;
while current_token.is_a /= right_bracket loop
temp_elt := parse_element;
need_elt := false;
if current_token.is_a = comma then
has_tail := false;
need_elt := true;
get_token;
elsif current_token.is_a = bar then
has_tail := true;
need_elt := true;
get_token;
elsif current_token.is_a = right_bracket then
has_tail := false;
elsif (current_token.is_a = period) or
(current_token.is_a = right_paren) then
error(pointer,"no terminating ']' for list");
exit;
else -- we don't have what we expected
error(pointer,"missing separator; comma inserted");
has_tail := false;
need_elt := true;
end if;
if root = null then -- first element
root := new p_list(has_tail);
root.elt := temp_elt;
ptr := root;
else
if ptr.has_tail then -- this element is the tail
ptr.tail := temp_elt;
if need_elt then
error(pointer,"only a single element allowed in a tail");
parse_to_bracket;
need_elt := false;
end if;
else -- a normal continuation of the list
ptr2 := new p_list(has_tail);
ptr2.elt := temp_elt;
ptr.next_elt := ptr2;
ptr := ptr2;
end if;
end if;
end loop;
if need_elt then
error(pointer,"missing element in list");
end if;
if current_token.is_a = right_bracket then -- finish off list
get_token;
end if;
return root;
end parse_list;
--
-- Parse_predicate -- Parse a single predicate call
--
function parse_predicate return AST_ptr is
node, temp_node : AST_ptr := null;
begin
if current_token.is_a = reserved_word then
if current_token.word in reserved_predicates then
node := new AST(reserved_predicate);
node.predicate := current_token.word;
get_token;
if current_token.is_a = left_paren then
node.r_arguments := parse_arguments;
end if;
--
-- This section of code implements special handling for certain
-- Fuzzy Prolog reserved predicates. For example, when the
-- "fuzzy" predicate has an explicit floating point number as
-- its argument, we go ahead and establish a fuzzy_value node.
--
case node.predicate is
when rw_fuzzy =>
if node.r_arguments.is_a = float_num then
temp_node := new AST'(fuzzy_value, node.r_arguments.fp_num);
release(node, null);
node := temp_node;
end if;
when others => -- no special handling (or not implemented)
null;
end case;
else
error(pointer,"illegal use of operator");
end if;
elsif current_token.is_a = identifier then
node := new AST(predicate);
node.name := current_token.ident_name;
get_token;
if current_token.is_a = left_paren then
node.p_arguments := parse_arguments;
end if;
elsif current_token.is_a = cut then
node := new AST'(reserved_predicate,cut,null);
get_token;
else
error(pointer,"illegal use of operator");
end if;
return node;
end parse_predicate;
--
-- Parse_read -- Return a single element for a READ predicate. Disallow
-- variables as they make no sense in this context.
--
procedure parse_read( elt : out argument_ptr; eof : out boolean ) is
temp_elt : argument_ptr;
begin
get_token;
temp_elt := parse_element;
if current_token.is_a = end_of_file then
eof := true;
else
if temp_elt.is_a = variable then
error(pointer, "Variable not allowed on READ");
elt := null;
else
elt := temp_elt;
end if;
eof := false;
end if;
end parse_read;
--
-- Parse_request -- Parse an interactive user request. This is assumed to be
-- some expression (just like the tail of a clause). The
-- short-hand list notation for file consultations is not
-- currently supported.
--
procedure parse_request( abstract_syntax_tree : out AST_ptr;
eof : out boolean ) is
begin
get_token;
abstract_syntax_tree := parse_expression;
eof := (current_token.is_a = end_of_file);
end parse_request;
--
-- Parse_term -- Parse a term within an expression. Terms will either be
-- predicate calls or subexpressions enclosed within
-- parentheses.
--
function parse_term return AST_ptr is
node : AST_ptr;
begin
if current_token.is_a = left_paren then -- parse a subexpression
get_token; -- consume the left parenthesis
node := parse_expression;
if current_token.is_a = right_paren then -- normal condition
get_token; -- consume the right parenthesis
elsif current_token.is_a = period then -- missing right parenthesis
error(pointer,"right parenthesis inserted");
else
error(pointer,"unknown parsing error in parse_term");
end if;
elsif current_token.is_a = identifier then -- a predicate
node := parse_predicate;
elsif current_token.is_a = reserved_word then -- still a predicate
node := parse_predicate;
elsif current_token.is_a = cut then -- the "cut" predicate
node := parse_predicate;
elsif current_token.is_a = integer_num then
node := new AST'(integer_num, current_token.int_num);
get_token;
elsif current_token.is_a = float_num then
node := new AST'(float_num, current_token.fp_num);
get_token;
elsif current_token.is_a = character_lit then
node := new AST'(character_lit, current_token.char);
get_token;
elsif current_token.is_a = variable then
node := new AST'(variable, current_token.var_name);
get_token;
else
error(pointer,"illegal or missing term");
end if;
return node;
end parse_term;
end parser;