home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
cs.rhul.ac.uk
/
www.cs.rhul.ac.uk.zip
/
www.cs.rhul.ac.uk
/
pub
/
rdp
/
rdp_cs3470.tar
/
rdp_gram.c
< prev
next >
Wrap
C/C++ Source or Header
|
1998-05-07
|
16KB
|
529 lines
/****************************************************************************
*
* RDP release 1.50 by Adrian Johnstone (A.Johnstone@rhbnc.ac.uk) 20 December 1997
*
* rdp_gram.c - rdp grammar checking routines
*
* This file may be freely distributed. Please mail improvements to the author.
*
****************************************************************************/
#include <string.h>
#include <ctype.h>
#include "memalloc.h"
#include "scan.h"
#include "set.h"
#include "symbol.h"
#include "textio.h"
#include "rdp_aux.h"
#include "rdp_gram.h"
#include "rdp_prnt.h"
#include "rdp.h"
static int rdp_follow_changed; /* repeat until done flag for follow sets */
void rdp_check_eoln(char * id)
{
if (strcmp(id, "EOLN")== 0)
rdp_dir_newline_visible = 1; /* Grammar contains an explicit EOLN */
}
void rdp_check_token_valid(char * id)
{
if (id == NULL)
return;
if (* id == 0)
text_message(TEXT_ERROR_ECHO, "empty tokens are not allowed: use [ ... ] instead\n");
/* Test for embedded spaces in token */
{
int bad = 0;
while (* id != 0)
{
bad |= !isgraph(* id);
id++;
}
if (bad)
text_message(TEXT_ERROR_ECHO, "tokens must not contain spaces or control characters\n");
}
}
void rdp_check_string_token_valid(char * id)
{
rdp_check_token_valid(id); /* make sure it's not empty or contains spaces */
if (*(id + 2)!= 0)
text_message(TEXT_ERROR_ECHO, "string delimiter tokens must be exactly one character long\n");
}
void rdp_check_comment_token_valid(char * id)
{
rdp_check_token_valid(id); /* make sure it's not empty or contains spaces */
if (!(*(id + 2)== 0 || *(id + 3)== 0))
text_message(TEXT_ERROR_ECHO, "comment delimiter tokens must be less than three characters long\n");
}
void rdp_check_prod_name_valid(char * id)
{
if ((strncmp("rdp_", id, 4)== 0)||
(strncmp("RDP_", id, 4)== 0)||
(strncmp("args_", id, 5)== 0)||
(strncmp("mem_", id, 4)== 0)||
(strncmp("set_", id, 4)== 0)||
(strncmp("scan_", id, 5)== 0)||
(strncmp("SCAN_", id, 5)== 0)||
(strncmp("sym_", id, 4)== 0)||
(strncmp("text_", id, 5)== 0))
text_message(TEXT_ERROR_ECHO, "identifier \'%s\' begins with a reserved name\n", id);
}
static void rdp_count_productions(void * base)
{
unsigned primaries = 0,
internals = 0,
codes = 0;
rdp_data * temp =(rdp_data *) symbol_next_symbol_in_scope(base);
while (temp != NULL)
{
if (temp->kind == K_PRIMARY)
primaries++;
else if (temp->kind == K_CODE)
codes++;
else
internals++;
temp =(rdp_data *) symbol_next_symbol_in_scope(temp);
}
if (rdp_verbose)
text_message(TEXT_INFO, "%u rules, %u tokens, %u actions, %u subrules\n",
primaries, rdp_token_count - SCAN_P_TOP + 1, codes, internals);
}
static void rdp_first(rdp_data * prod)
{
if (prod->in_use) /* something has gone wrong */
{
text_message(TEXT_ERROR, "LL(1) violation - rule \'%s\' is left recursive\n", prod->id); /* and return */
prod->ll1_violation = 1;
return;
}
if (!prod->first_done) /* something to do */
{
rdp_list * list = prod->list; /* set up alternates pointer */
prod->in_use = 1; /* mark this production as being processed */
if (prod->kind == K_SEQUENCE) /* sequences are treated differently */
{
prod->contains_null = 1; /* set up list flag */
while (list != NULL && prod->contains_null) /* scan until non-empty alternate is found */
{
if (!list->production->first_done) /* do first */
rdp_first(list->production);
set_unite_set(& prod->first, & list->production->first); /* add alternate first set to production first set */
prod->contains_null = list->production->contains_null; /* set contains_null flag */
list = list->next;
}
}
else
while (list != NULL) /* scan all alternates */
{
if (!list->production->first_done) /* do first */
rdp_first(list->production);
set_unite_set(& prod->first, & list->production->first); /* add alternate first set to production first set */
prod->contains_null |= list->production->contains_null; /* OR in contains_null flag */
list = list->next;
}
prod->in_use = 0; /* production is no longer in use */
prod->first_done = 1; /* first set is now complete */
/* and set cardinality */
prod->first_cardinality = set_cardinality(& prod->first);
}
}
/* scan a sequence production for follow set changes */
static void rdp_follow_sequence(rdp_data * prod)
{
rdp_list * check = prod->list; /* pointer to sequence list */
while (check != NULL) /* scan entire sequence and add to follow sets */
{
rdp_list * following = check; /* temporary to look at followers */
unsigned old_cardinality = check->production->follow_cardinality;
do /* scan up list adding first sets of trailing productions */
{
following = following->next;
if (following == NULL) /* We hit end of sequence */
set_unite_set(& check->production->follow, & prod->follow);
else
set_unite_set(& check->production->follow, & following->production->first);
}
while (following != NULL && following->production->contains_null);
/* Update cardinality changed flag */
rdp_follow_changed |=((check->production->follow_cardinality =
set_cardinality(& check->production->follow)
)!= old_cardinality);
check = check->next; /* step to next item in sequence */
}
}
/* scan an alternate for follow set changes */
static void rdp_follow_alternate(rdp_data * prod)
{
rdp_list * check = prod->list; /* pointer to alternate list */
while (check != NULL)
{
unsigned old_cardinality = check->production->follow_cardinality;
set_unite_set(& check->production->follow, & prod->follow);
rdp_follow_changed |=((check->production->follow_cardinality =
set_cardinality(& check->production->follow)
)!= old_cardinality);
check = check->next;
}
}
static void rdp_find_follow(void * base)
{
rdp_data * temp;
unsigned follow_pass = 0;
do
{
follow_pass++;
rdp_follow_changed = 0;
temp =(rdp_data *) symbol_next_symbol_in_scope(base);
while (temp != NULL)
{
if (temp->kind == K_SEQUENCE) /* only need to scan sequences */
rdp_follow_sequence(temp);
else
rdp_follow_alternate(temp);
temp =(rdp_data *) symbol_next_symbol_in_scope(temp);
}
}
while (rdp_follow_changed);
if (rdp_verbose)
text_message(TEXT_INFO, "Follow sets stabilised after %u pass%s\n", follow_pass, follow_pass == 1 ? "": "es");
}
static int rdp_check_identifier(char * id)
{
rdp_data * s =(rdp_data *) symbol_lookup_key(rdp, & id, NULL);
if (s != NULL)
{
if (s->kind == K_PRIMARY)
{
text_message(TEXT_ERROR, "identifier \'%s\' is a C++ reserved word or library identifier\n", id);
return 1;
}
}
return 0;
}
static int rdp_check_reserved_words(void)
{
int bad = 0,
temp = 0;
char * rdp_reserved_words[]= {RDP_RESERVED_WORDS, NULL};
while (rdp_reserved_words[temp]!= NULL)
bad |= rdp_check_identifier(rdp_reserved_words[temp++]);
return bad;
}
/* check for empty alternates. mark up code sequences while we're at it */
static int rdp_check_empties(void * base)
{
int bad = 0;
rdp_data * temp =(rdp_data *) symbol_next_symbol_in_scope(base);
while (temp != NULL)
{
rdp_list * list = temp->list;
int k = temp->kind,
bad_alternate = 1;
if (k == K_PRIMARY && temp->call_count == 0 && !temp->comment_only)
text_message(TEXT_WARNING, "rule \'%s\' never called so deleted\n", temp->id);
if (list == NULL && k == K_PRIMARY)
{
text_message(TEXT_ERROR, "rule \'%s\' is empty\n", temp->id);
bad = 1;
}
if (k == K_SEQUENCE) /* check for empty alternates and mark up code */
{
while (list != NULL)
{
if (list->production->kind == K_CODE) /* check code position */
if (list->next == NULL) /* last in list? */
list->production->code_terminator = 1;
else if (list->next->production->kind == K_CODE) /* is the next one code? */
list->next->production->code_successor = 1; /* next one is code successor */
else
list->production->code_terminator = 1; /* this one is code terminator */
if (list->production->kind != K_CODE)
bad_alternate = 0;
list = list->next;
}
}
else
bad_alternate = 0;
if (bad_alternate)
{
if (temp->list == NULL)
{
text_message(TEXT_ERROR, "LL(1) violation - alternate \'%s\' is empty\n", temp->id);
temp->ll1_violation = 1;
}
else
{
temp->code_only = 1;
}
bad = 1;
}
temp =(rdp_data *) symbol_next_symbol_in_scope(temp);
}
/* Now go over again updating primaries to mark code only productions */
temp =(rdp_data *) symbol_next_symbol_in_scope(base);
while (temp != NULL)
{
if (temp->kind == K_PRIMARY && temp->list != NULL)
if (temp->list->next == NULL && temp->list->production->code_only)
temp->code_only = 1;
temp =(rdp_data *) symbol_next_symbol_in_scope(temp);
}
return bad;
}
static void rdp_find_first(void * base)
{
rdp_data * temp =(rdp_data *) symbol_next_symbol_in_scope(base);
while (temp != NULL)
{
rdp_first(temp);
temp =(rdp_data *) symbol_next_symbol_in_scope(temp);
}
}
/* Check if we have a nullable alternate inside a nullable iterator */
static int rdp_check_nested_nullable(void * base)
{
int bad = 0;
rdp_data * temp =(rdp_data *) symbol_next_symbol_in_scope(base);
while (temp != NULL)
{
if (set_includes_element(& rdp_production_set, temp->kind)&& temp->kind != K_SEQUENCE)
{
rdp_list * inner = temp->list;
while (inner != NULL)
{
if (temp->lo == 0 && inner->production->contains_null)
{
text_message(TEXT_ERROR, "LL(1) violation - rule \'%s\'\n is nullable but contains the nullable subrule\n", temp->id);
text_printf(" %s ::= ", inner->production->id);
rdp_print_sub_item(inner->production, 1);
text_printf(".\n");
bad = 1;
temp->ll1_violation = 1;
inner->production->ll1_violation = 1;
}
inner = inner->next;
}
}
temp =(rdp_data *) symbol_next_symbol_in_scope(temp);
}
return bad;
}
static int rdp_check_disjoint(void * base)
{
int bad = 0;
set_ work = SET_NULL;
rdp_data * temp =(rdp_data *) symbol_next_symbol_in_scope(base);
while (temp != NULL)
{
if (set_includes_element(& rdp_production_set, temp->kind)&& temp->kind != K_SEQUENCE)
{
rdp_list * left = temp->list;
while (left != NULL)
{
rdp_list * right = left->next;
while (right != NULL)
{
/* First check for disjoint on epsilon */
if (left->production->contains_null && right->production->contains_null)
{
text_message(TEXT_ERROR, "LL(1) violation - rule \'%s\'\n", temp->id);
text_printf(" productions %s ::= ", left->production->id);
rdp_print_sub_item(left->production, 1);
text_printf(".\n and %s ::= ", right->production->id);
rdp_print_sub_item(right->production, 1);
text_printf(".\n are both nullable \n");
left->production->ll1_violation = 1;
right->production->ll1_violation = 1;
bad = 1;
}
set_assign_set(& work, & left->production->first);
set_intersect_set(& work, & right->production->first);
if (set_cardinality(& work)!= 0)
{
text_message(TEXT_ERROR, "LL(1) violation - rule \'%s\'\n", temp->id);
text_printf(" productions %s ::= ", left->production->id);
rdp_print_sub_item(left->production, 1);
text_printf(".\n and %s ::= ", right->production->id);
rdp_print_sub_item(right->production, 1);
text_printf(".\n share these start tokens: ");
set_print_set(& work, rdp_token_string, 78);
text_printf("\n");
left->production->ll1_violation = 1;
right->production->ll1_violation = 1;
bad = 1;
}
right = right->next;
}
left = left->next;
}
}
temp =(rdp_data *) symbol_next_symbol_in_scope(temp);
}
return bad;
}
static int rdp_check_nullable(void * base)
{
int bad = 0;
rdp_data * temp =(rdp_data *) symbol_next_symbol_in_scope(base);
set_ work = SET_NULL;
while (temp != NULL)
{
if (temp->contains_null &&(temp->kind != K_CODE))
{
set_assign_set(& work, & temp->first);
set_intersect_set(& work, & temp->follow);
if (set_cardinality(& work)!= 0)
{
text_message(TEXT_ERROR, "LL(1) violation - rule\n %s ::= ", temp->id);
rdp_print_sub_item(temp, 1);
text_printf(".\n contains null but first and follow sets both include: ");
set_print_set(& work, rdp_token_string, 78);
text_printf("\n");
temp->ll1_violation = 1;
bad = 1;
}
}
temp =(rdp_data *) symbol_next_symbol_in_scope(temp);
}
return bad;
}
static void rdp_update_follow_sets(void * base)
{
rdp_data * temp =(rdp_data *) symbol_next_symbol_in_scope(base);
while (temp != NULL)
{
if (temp->kind == K_LIST && temp->hi != 1 && temp->supplementary_token == NULL)
{
set_unite_set(& temp->follow, & temp->first);
temp->follow_cardinality = set_cardinality(& temp->follow);
}
temp =(rdp_data *) symbol_next_symbol_in_scope(temp);
}
}
int rdp_bad_grammar(void * base)
{
int bad = 0;
/* Check for empties */
if (rdp_verbose)
text_message(TEXT_INFO, "Checking for empty alternates\n");
bad |= rdp_check_empties(base);
/* Count productions and produce statistics */
rdp_count_productions(base);
/* Check promotion operators on start production */
if (rdp_start_prod->promote_default != PROMOTE_DONT)
text_message(TEXT_WARNING, "default promotion operator \'%s\' on start production \'%s\' will not be applied at top level\n",
rdp_start_prod->promote_default == PROMOTE ? "^":
rdp_start_prod->promote_default == PROMOTE_AND_COPY ? "^^": "??",
rdp_start_prod->id);
/* find first sets */
if (rdp_verbose)
text_message(TEXT_INFO, "Generating first sets\n");
rdp_find_first(base);
/* find follow sets */
if (rdp_verbose)
text_message(TEXT_INFO, "Generating follow sets\n");
rdp_find_follow(base);
/* check for C reserved words */
if (rdp_verbose)
text_message(TEXT_INFO, "Checking for clashes with reserved words\n");
bad |= rdp_check_reserved_words();
/* check that for each production, all alternates have unique start tokens */
if (rdp_verbose)
text_message(TEXT_INFO, "Checking for disjoint first sets\n");
bad |= rdp_check_disjoint(base);
/* check nullable brackets don't contain nullable productions */
if (rdp_verbose)
text_message(TEXT_INFO, "Checking for nested nullable subrules\n");
bad |= rdp_check_nested_nullable(base);
/* check that first(a) - follow (a) is empty for nullable a */
if (rdp_verbose)
text_message(TEXT_INFO, "Checking nullable rules\n");
bad |= rdp_check_nullable(base);
/* add first() to follow() for iterations so that error handling doesn't just eat entire file! */
if (rdp_verbose)
text_message(TEXT_INFO, "Updating follow sets\n");
rdp_update_follow_sets(base);
/* re-close follow sets */
rdp_find_follow(base);
return bad;
}
/* End of rdp_gram.c */