OS/2 Shareware BBS: 10 Tools

home *** CD-ROM | disk | FTP | other *** search

/ OS/2 Shareware BBS: 10 Tools / 10-Tools.zip / jikepg12.zip / jikespg / src / produce.c < prev next >

Wrap

C/C++ Source or Header | 1999-11-04 | 55.2 KB | 1,527 lines

/* $Id: produce.c,v 1.2 1999/11/04 14:02:23 shields Exp $ */ /* This software is subject to the terms of the IBM Jikes Compiler License Agreement available at the following URL: http://www.ibm.com/research/jikes. Copyright (C) 1983, 1999, International Business Machines Corporation and others. All Rights Reserved. You must accept the terms of that agreement to use this software. */ static char hostfile[] = __FILE__; #include <string.h> #include "common.h" #include "header.h" static short *stack, *index_of, *nt_list, *item_list, *item_of, *next_item, *scope_table; static int scope_top = 0; static struct node **direct_produces; static struct scope_elmt { short link, item, index; } *scope_element; static BOOLEAN *symbol_seen; static SET_PTR produces, right_produces, left_produces; static int top; static void compute_produces(int symbol); static void print_name_map(int symbol); static void process_scopes(void); static BOOLEAN is_scope(int item_no); static BOOLEAN scope_check(int lhs_symbol, int target, int source); static insert_prefix(int item_no); static BOOLEAN is_prefix_equal(int item_no, int item_no2); static insert_suffix(int item_no); static BOOLEAN is_suffix_equal(int item_no1, int item_no2); static void print_scopes(void); static get_shift_symbol(int lhs_symbol); /****************************************************************************/ /* PRODUCE: */ /****************************************************************************/ /* This procedure computes for each state the set of non-terminal symbols */ /* that are required as candidates for secondary error recovery. If the */ /* option NAMES=OPTIMIZED is requested, the NAME map is optimized and SYMNO */ /* is updated accordingly. */ /****************************************************************************/ void produce(void) { /*****************************************************************/ /* TOP, STACK, and INDEX are used for the digraph algorithm */ /* in the routines COMPUTE_PRODUCES. */ /* */ /* The array PRODUCES is used to construct two maps: */ /* */ /* 1) PRODUCES, a mapping from each non-terminal A to the set of */ /* non-terminals C such that: */ /* */ /* A =>* x C w */ /* */ /* 2) RIGHT_MOST_PRODUCES, a mapping from each non-terminal A to */ /* the set of non-terminals C such that: */ /* */ /* C =>+ A x and x =>* %empty. */ /* */ /* NOTE: This is really a reverse right-most produces mapping, */ /* since given the above rule, we say that */ /* C right-most produces A. */ /* */ /*****************************************************************/ int state_no, state, nt_root, item_no, item_root, rule_no, symbol, nt, i, n; short *names_map; struct node **goto_domain, *p, *q; BOOLEAN *name_used, end_node; SET_PTR set; stack = Allocate_short_array(num_symbols + 1); index_of = Allocate_short_array(num_symbols + 1); names_map = Allocate_short_array(num_names + 1); name_used = Allocate_boolean_array(num_names + 1); item_list = Allocate_short_array(num_items + 1); nt_list = Allocate_short_array(num_non_terminals + 1); nt_list -= (num_terminals + 1); set = (SET_PTR) calloc(1, non_term_set_size * sizeof(BOOLEAN_CELL)); if (set == NULL) nospace(__FILE__, __LINE__); produces = (SET_PTR) calloc(num_non_terminals, non_term_set_size * sizeof(BOOLEAN_CELL)); if (produces == NULL) nospace(__FILE__, __LINE__); produces -= ((num_terminals + 1) * non_term_set_size); direct_produces = (struct node **) calloc(num_non_terminals, sizeof(struct node *)); if (direct_produces == NULL) nospace(__FILE__, __LINE__); direct_produces -= (num_terminals + 1); goto_domain = (struct node **) calloc(num_states + 1, sizeof(struct node *)); if (goto_domain == NULL) nospace(__FILE__, __LINE__); /*********************************************************************/ /* Note that the space allocated for PRODUCES and DIRECT_PRODUCES */ /* is automatically initialized to 0 by calloc. Logically, this sets */ /* all the sets in the PRODUCES map to the empty set and all the */ /* pointers in DIRECT_PRODUCES are set to NULL. */ /* */ /* Next, PRODUCES is initialized to compute RIGHT_MOST_PRODUCES. */ /* Also, we count the number of error rules and verify that they are */ /* in the right format. */ /*********************************************************************/ item_root = NIL; for ALL_NON_TERMINALS(nt) { for (end_node = ((p = clitems[nt]) == NULL); ! end_node; end_node = (p == clitems[nt])) { p = p -> next; item_no = p -> value; symbol = item_table[item_no].symbol; if (symbol IS_A_NON_TERMINAL) { i = item_table[item_no].suffix_index; if (IS_IN_SET(first, i, empty) && (! IS_IN_NTSET(produces, symbol, nt - num_terminals))) { NTSET_BIT_IN(produces, symbol, nt - num_terminals); q = Allocate_node(); q -> value = nt; q -> next = direct_produces[symbol]; direct_produces[symbol] = q; } } rule_no = item_table[item_no].rule_number; for (i = 0; i < RHS_SIZE(rule_no); i++) { if (item_table[item_no + i].symbol == error_image) break; } item_no += i; symbol = item_table[item_no].symbol; if (symbol == error_image) { if (item_table[item_no + 1].symbol IS_A_NON_TERMINAL && i > 0) { symbol = item_table[item_no + 2].symbol; if (symbol == empty) num_error_rules++; } if (warnings_bit && symbol != empty) { item_list[item_no] = item_root; item_root = item_no; } } symbol = eoft_image; } } /*********************************************************************/ /* If WARNINGS_BIT is on and some error rules are in the wrong, */ /* format, report them. */ /*********************************************************************/ if (warnings_bit && item_root != NIL) { PR_HEADING; if (item_list[item_root] == NIL) fprintf(syslis, "*** This error rule is not in manual format:\n\n"); else fprintf(syslis, "*** These error rules are not in manual format:\n\n"); for (item_no = item_root; item_no != NIL; item_no = item_list[item_no]) { print_item(item_no); } } /********************************************************************/ /* Complete the construction of the RIGHT_MOST_PRODUCES map for */ /* non-terminals using the digraph algorithm. */ /* We make sure that each non-terminal A is not present in its own */ /* PRODUCES set since we are interested in the non-reflexive */ /* (positive) transitive closure. */ /********************************************************************/ for ALL_SYMBOLS(i) index_of[i] = OMEGA; top = 0; for ALL_NON_TERMINALS(nt) { if (index_of[nt] == OMEGA) compute_produces(nt); NTRESET_BIT_IN(produces, nt, nt - num_terminals); } /********************************************************************/ /* Construct the minimum subset of the domain of the GOTO map */ /* needed for automatic secondary level error recovery. For each */ /* state, we start out with the set of all nonterminals on which */ /* there is a transition in that state, and pare it down to a */ /* subset S, by removing all nonterminals B in S such that there */ /* is a goto-reduce action on B by a single production. If the */ /* READ-REDUCE option is not turned on, then, we check whether or */ /* not the goto action on B is to an LR(0) reduce state.Once we have*/ /* our subset S, we further reduce its size as follows. For each */ /* nonterminal A in S such that there exists another nonterminal */ /* B in S, where B ^= A, A ->+ Bx and x =>* %empty, we remove A */ /* from S. */ /* At the end of this process, the nonterminal elements whose */ /* NT_LIST values are still OMEGA are precisely the nonterminal */ /* symbols that are never used as candidates. */ /********************************************************************/ for ALL_NON_TERMINALS(i) nt_list[i] = OMEGA; nt_list[accept_image] = NIL; for ALL_STATES(state_no) { struct goto_header_type go_to; go_to = statset[state_no].go_to; nt_root = NIL; INIT_NTSET(set); for (i = 1; i <= go_to.size; i++) { symbol = GOTO_SYMBOL(go_to, i); state = GOTO_ACTION(go_to, i); if (state < 0) rule_no = -state; else { q = statset[state].kernel_items; item_no = q -> value; if (q -> next != NULL) rule_no = 0; else rule_no = item_table[item_no].rule_number; } if (rule_no == 0 || RHS_SIZE(rule_no) != 1) { nt_list[symbol] = nt_root; nt_root = symbol; NTSET_UNION(set, 0, produces, symbol); } } goto_domain[state_no] = NULL; for (symbol = nt_root; symbol != NIL; symbol = nt_list[symbol]) { if (! IS_ELEMENT(set, symbol - num_terminals)) { q = Allocate_node(); q -> value = symbol; q -> next = goto_domain[state_no]; goto_domain[state_no] = q; gotodom_size++; } } } /********************************************************************/ /* Allocate and construct the permanent goto domain structure: */ /* GD_INDEX and GD_RANGE. */ /********************************************************************/ n = 0; gd_index = Allocate_short_array(num_states + 2); gd_range = Allocate_short_array(gotodom_size + 1); for ALL_STATES(state_no) { gd_index[state_no] = n + 1; for (p = goto_domain[state_no]; p != NULL; q = p, p = p -> next) gd_range[++n] = p -> value; if (goto_domain[state_no] != NULL) free_nodes(goto_domain[state_no], q); } gd_index[num_states + 1] = n + 1; /******************************************************************/ /* Remove names assigned to nonterminals that are never used as */ /* error candidates. */ /******************************************************************/ if (names_opt == OPTIMIZE_PHRASES) { /*****************************************************************/ /* In addition to nonterminals that are never used as candidates,*/ /* if a nullable nonterminal was assigned a name by default */ /* (nonterminals that were "named" by default are identified */ /* with negative indices), that name is also removed. */ /*****************************************************************/ for ALL_NON_TERMINALS(symbol) { if (nt_list[symbol] == OMEGA) symno[symbol].name_index = symno[accept_image].name_index; else if (symno[symbol].name_index < 0) { if (null_nt[symbol]) symno[symbol].name_index = symno[accept_image].name_index; else symno[symbol].name_index = - symno[symbol].name_index; } } /*******************************************************************/ /* Adjust name map to remove unused elements and update SYMNO map. */ /*******************************************************************/ for (i = 1; i <= num_names; i++) name_used[i] = FALSE; for ALL_SYMBOLS(symbol) name_used[symno[symbol].name_index] = TRUE; n = 0; for (i = 1; i <= num_names; i++) { if (name_used[i]) { name[++n] = name[i]; names_map[i] = n; } } num_names = n; for ALL_SYMBOLS(symbol) symno[symbol].name_index = names_map[symno[symbol].name_index]; } /********************************************************************/ /* If the option LIST_BIT is ON, print the name map. */ /********************************************************************/ if (list_bit) { PR_HEADING; fprintf(syslis, "\nName map:\n"); output_line_no += 2; for ALL_SYMBOLS(symbol) { if (symno[symbol].name_index != symno[accept_image].name_index) print_name_map(symbol); } for ALL_SYMBOLS(symbol) { if (symbol != accept_image && symno[symbol].name_index == symno[accept_image].name_index) print_name_map(symbol); } } if (scopes_bit) process_scopes(); ffree(stack); ffree(index_of); ffree(names_map); ffree(name_used); nt_list += (num_terminals + 1); ffree(nt_list); ffree(set); produces += ((num_terminals + 1) * non_term_set_size); ffree(produces); direct_produces += (num_terminals + 1); ffree(direct_produces); ffree(goto_domain); return; } /******************************************************************/ /* COMPUTE_PRODUCES: */ /******************************************************************/ /* This procedure is used to compute the transitive closure of */ /* the PRODUCES, LEFT_PRODUCES and RIGHT_MOST_PRODUCES maps. */ /******************************************************************/ static void compute_produces(int symbol) { int indx; int new_symbol; struct node *p, *q; stack[++top] = symbol; indx = top; index_of[symbol] = indx; for (p = direct_produces[symbol]; p != NULL; q = p, p = p -> next) { new_symbol = p -> value; if (index_of[new_symbol] == OMEGA) /* first time seen? */ compute_produces(new_symbol); index_of[symbol] = MIN(index_of[symbol], index_of[new_symbol]); NTSET_UNION(produces, symbol, produces, new_symbol); } if (direct_produces[symbol] != NULL) free_nodes(direct_produces[symbol], q); if (index_of[symbol] == indx) /*symbol is SCC root */ { for (new_symbol = stack[top]; new_symbol != symbol; new_symbol = stack[--top]) { ASSIGN_NTSET(produces, new_symbol, produces, symbol); index_of[new_symbol] = INFINITY; } index_of[symbol] = INFINITY; top--; } return; } /******************************************************************/ /* PRINT_NAME_MAP: */ /******************************************************************/ /* This procedure prints the name associated with a given symbol. */ /* The same format that was used in the procedure DISPLAY_INPUT */ /* to print aliases is used to print name mappings. */ /******************************************************************/ static void print_name_map(int symbol) { int len; char line[PRINT_LINE_SIZE], tok[SYMBOL_SIZE + 1]; restore_symbol(tok, RETRIEVE_STRING(symbol)); len = PRINT_LINE_SIZE - 5; print_large_token(line, tok, "", len); strcat(line, " ::= "); restore_symbol(tok, RETRIEVE_NAME(symno[symbol].name_index)); if (strlen(line) + strlen(tok) > PRINT_LINE_SIZE - 1) { fprintf(syslis, "\n%s", line); ENDPAGE_CHECK; len = PRINT_LINE_SIZE - 4; print_large_token(line, tok, " ", len); } else strcat(line, tok); fprintf(syslis, "\n%s", line); ENDPAGE_CHECK; return; } /******************************************************************/ /* PROCESS_SCOPES: */ /******************************************************************/ /* Compute set of "scopes" and use it to construct SCOPE map. */ /******************************************************************/ static void process_scopes(void) { short *prefix_index, *suffix_index, *state_index; struct node **states_of; int num_state_sets = 0, i, j, k, n; short max_prefix_length = 0, dot_symbol, nt, symbol, item_root, item_no, rule_no, state_no, nt_root; BOOLEAN end_node; struct node *p, *q; prefix_index = Allocate_short_array(num_items + 1); suffix_index = Allocate_short_array(num_items + 1); item_of = Allocate_short_array(num_non_terminals); item_of -= (num_terminals + 1); next_item = Allocate_short_array(num_items + 1); symbol_seen = Allocate_boolean_array(num_non_terminals); symbol_seen -= (num_terminals + 1); states_of = (struct node **) calloc(num_non_terminals, sizeof(struct node *)); states_of -= (num_terminals + 1); state_index = Allocate_short_array(num_non_terminals); state_index -= (num_terminals + 1); scope_element = (struct scope_elmt *) calloc(num_items + 1, sizeof(struct scope_elmt)); /********************************************************************/ /* Initially, PRODUCES was used to compute the right-most-produces */ /* map. We save that map map and make it reflexive. Recall that */ /* RIGHT_PRODUCES is a mapping from each nonterminal B into the set */ /* of nonterminals A such that: */ /* */ /* A =>rm* B */ /* */ /* Next, reallocate PRODUCES and initialize it in order to */ /* construct the LEFT_PRODUCES map. Initially, CALLOC sets PRODUCES */ /* to the empty map. */ /* LEFT_PRODUCES is a mapping from each nonterminal A into the set */ /* of nonterminals B such that: */ /* */ /* A =>lm* B x */ /* */ /* for some arbitrary string x. */ /* */ /* Since A ->* A for all A, we insert A in PRODUCES(A) (but not */ /* in the linked list). */ /* */ /********************************************************************/ right_produces = produces; produces = (SET_PTR) calloc(num_non_terminals, non_term_set_size * sizeof(BOOLEAN_CELL)); if (produces == NULL) nospace(__FILE__, __LINE__); produces -= ((num_terminals + 1) * non_term_set_size); for ALL_NON_TERMINALS(nt) { NTSET_BIT_IN(right_produces, nt, nt - num_terminals); NTSET_BIT_IN(produces, nt, nt - num_terminals); direct_produces[nt] = NULL; for (end_node = ((p = clitems[nt]) == NULL); ! end_node; end_node = (p == clitems[nt])) { p = p -> next; for (item_no = p -> value; item_table[item_no].symbol IS_A_NON_TERMINAL; item_no++) { symbol = item_table[item_no].symbol; if (! IS_IN_NTSET(produces, nt, symbol - num_terminals)) { NTSET_BIT_IN(produces, nt, symbol - num_terminals); q = Allocate_node(); q -> value = symbol; q -> next = direct_produces[nt]; direct_produces[nt] = q; } if (! null_nt[symbol]) break; } } } /****************************************************************/ /* Complete the construction of the LEFT_produces map for */ /* non_terminals using the digraph algorithm. */ /****************************************************************/ for ALL_NON_TERMINALS(nt) index_of[nt] = OMEGA; top = 0; for ALL_NON_TERMINALS(nt) { if (index_of[nt] == OMEGA) compute_produces(nt); } left_produces = produces; /********************************************************************/ /* Allocate and initialize the PRODUCES array to construct the */ /* PRODUCES map. After allocation, CALLOC sets all sets to empty. */ /* Since A ->* A for all A, we insert A in PRODUCES(A) (but not */ /* in the linked list). */ /********************************************************************/ produces = (SET_PTR) calloc(num_non_terminals, non_term_set_size * sizeof(BOOLEAN_CELL)); if (produces == NULL) nospace(__FILE__, __LINE__); produces -= ((num_terminals + 1) * non_term_set_size); for ALL_NON_TERMINALS(nt) { NTSET_BIT_IN(produces, nt, nt - num_terminals); direct_produces[nt] = NULL; for (end_node = ((p = clitems[nt]) == NULL); ! end_node; end_node = (p == clitems[nt])) { p = p -> next; for (item_no = p -> value; item_table[item_no].symbol != empty; item_no++) { symbol = item_table[item_no].symbol; if (symbol IS_A_NON_TERMINAL) { if (! IS_IN_NTSET(produces, nt, symbol - num_terminals)) { NTSET_BIT_IN(produces, nt, symbol - num_terminals); q = Allocate_node(); q -> value = symbol; q -> next = direct_produces[nt]; direct_produces[nt] = q; } } } } } /****************************************************************/ /* Complete the construction of the PRODUCES map for */ /* non_terminals using the digraph algorithm. */ /* */ /* Since $ACC =>* x A y for all nonterminal A in the grammar, a */ /* single call to COMPUTE_PRODUCES does the trick. */ /****************************************************************/ for ALL_NON_TERMINALS(nt) index_of[nt] = OMEGA; top = 0; compute_produces(accept_image); /********************************************************************/ /* Construct a mapping from each non_terminal A into the set of */ /* items of the form [B -> x . A y]. */ /********************************************************************/ for ALL_NON_TERMINALS(nt) item_of[nt] = NIL; for ALL_ITEMS(item_no) { dot_symbol = item_table[item_no].symbol; if (dot_symbol IS_A_NON_TERMINAL) { next_item[item_no] = item_of[dot_symbol]; item_of[dot_symbol] = item_no; } } /********************************************************************/ /* Construct a list of scoped items in ITEM_LIST. */ /* Scoped items are derived from rules of the form A -> x B y such */ /* that B =>* w A z, %empty not in FIRST(y), and it is not the case */ /* that x = %empty and B ->* A v. */ /* Scoped items may also be identified by the user, using %error */ /* productions. */ /* As scoped items are added to the list, we keep track of the */ /* longest prefix encountered. This is subsequently used to */ /* bucket sort the scoped items in descending order of the length */ /* of their prefixes. */ /********************************************************************/ for ALL_ITEMS(item_no) item_list[item_no] = OMEGA; item_root = NIL; for ALL_ITEMS(item_no) { dot_symbol = item_table[item_no].symbol; if (dot_symbol == error_image) { if (item_table[item_no].dot != 0 && (! IS_IN_SET(first, item_table[item_no].suffix_index, empty))) { if (item_list[item_no] == OMEGA) { item_list[item_no] = item_root; item_root = item_no; max_prefix_length = MAX(max_prefix_length, item_table[item_no].dot); } } } else if (dot_symbol IS_A_NON_TERMINAL) { symbol = rules[item_table[item_no].rule_number].lhs; if (! IS_IN_SET(first, item_table[item_no].suffix_index, empty) && IS_IN_NTSET(produces, dot_symbol, symbol - num_terminals)) { if (is_scope(item_no)) { for (i = item_no + 1; ;i++) { symbol = item_table[i].symbol; if (symbol IS_A_TERMINAL) break; if (! null_nt[symbol]) break; } if (symbol IS_A_NON_TERMINAL) { for ALL_NON_TERMINALS(nt) symbol_seen[nt] = FALSE; symbol = get_shift_symbol(symbol); } if (symbol != empty && item_list[i] == OMEGA) { item_list[i] = item_root; item_root = i; max_prefix_length = MAX(max_prefix_length, item_table[i].dot); } } } } } /*********************************************************************/ /* In this loop, the prefix and suffix string for each scope in */ /* entered into a table. We also use the SYMBOL_SEEN array to */ /* identify the set of left-hand side symbols associated with the */ /* scopes. */ /*********************************************************************/ scope_table = Allocate_short_array(SCOPE_SIZE); for (i = 0; i < SCOPE_SIZE; i++) scope_table[i] = NIL; for ALL_NON_TERMINALS(nt) symbol_seen[nt] = FALSE; for (item_no = item_root; item_no != NIL; item_no = item_list[item_no]) { rule_no = item_table[item_no].rule_number; symbol = rules[rule_no].lhs; num_scopes = num_scopes + 1; symbol_seen[symbol] = TRUE; prefix_index[item_no] = insert_prefix(item_no); suffix_index[item_no] = insert_suffix(item_no); } ffree(scope_table); /*********************************************************************/ /* We now construct a mapping from each nonterminal symbol that is */ /* the left-hand side of a rule containing scopes into the set of */ /* states that has a transition on the nonterminal in question. */ /*********************************************************************/ nt_root = NIL; for ALL_NON_TERMINALS(nt) states_of[nt] = NULL; for ALL_STATES(state_no) { struct goto_header_type go_to; go_to = statset[state_no].go_to; for (i = 1; i <= go_to.size; i++) { symbol = GOTO_SYMBOL(go_to, i); if (symbol_seen[symbol]) { if (states_of[symbol] == NULL) { nt_list[symbol] = nt_root; nt_root = symbol; num_state_sets = num_state_sets + 1; } q = Allocate_node(); q -> value = state_no; q -> next = states_of[symbol]; states_of[symbol] = q; } } } right_produces += ((num_terminals + 1) * non_term_set_size); ffree(right_produces); left_produces += ((num_terminals + 1) * non_term_set_size); ffree(left_produces); /*********************************************************************/ /* Next, we used the optimal partition procedure to compress the */ /* space used by the sets of states, allocate the SCOPE structure */ /* and store the compressed sets of states in it. */ /* We also sort the list of items by the length of their prefixes in */ /* descending order. This is done primarily as an optimization. */ /* If a longer prefix matches prior to a shorter one, the parsing */ /* will terminate quicker. */ /*********************************************************************/ process_scope_states: { SET_PTR collection; short *element_size, *list, *start, *stack, *ordered_symbol, *state_list, *bucket; int state_root, state_no; state_set_size = num_states / SIZEOF_BC + (num_states % SIZEOF_BC ? 1 : 0); collection = (SET_PTR) calloc(num_state_sets + 1, state_set_size * sizeof(BOOLEAN_CELL)); if (collection == NULL) nospace(__FILE__, __LINE__); element_size = Allocate_short_array(num_state_sets + 1); start = Allocate_short_array(num_state_sets + 2); stack = Allocate_short_array(num_state_sets + 1); ordered_symbol = Allocate_short_array(num_state_sets + 1); list = Allocate_short_array(num_state_sets + 1); state_list = Allocate_short_array(num_states + 1); bucket = Allocate_short_array(max_prefix_length + 1); for (symbol = nt_root, i = 1; symbol != NIL; symbol = nt_list[symbol], i++) { list[i] = i; ordered_symbol[i] = symbol; EMPTY_COLLECTION_SET(i); element_size[i] = 0; for (p = states_of[symbol]; p != NULL; p = p -> next) { element_size[i]++; SET_COLLECTION_BIT(i, p -> value); } } partset(collection, element_size, list, start, stack, num_state_sets); for (i = 1; i <= num_state_sets; i++) { symbol = ordered_symbol[i]; state_index[symbol] = ABS(start[i]); } scope_state_size = start[num_state_sets + 1] - 1; scope = (struct scope_type *) calloc(num_scopes + 1, sizeof(struct scope_type)); if (scope == NULL) nospace(__FILE__, __LINE__); scope_right_side = Allocate_short_array(scope_rhs_size + 1); scope_state = Allocate_short_array(scope_state_size + 1); k = 0; for (i = 0; i <= (int) num_states; i++) state_list[i] = OMEGA; for (i = 1; i <= num_state_sets; i++) { if (start[i] > 0) { state_root = 0; state_list[state_root] = NIL; for (end_node = ((j = i) == NIL); ! end_node; end_node = (j == i)) { j = stack[j]; symbol = ordered_symbol[j]; for (p = states_of[symbol]; p != NULL; p = p -> next) { state_no = p -> value; if (state_list[state_no] == OMEGA) { state_list[state_no] = state_root; state_root = state_no; } } } for (state_no = state_root; state_no != NIL; state_no = state_root) { state_root = state_list[state_no]; state_list[state_no] = OMEGA; k++; scope_state[k] = state_no; } } } for (symbol = nt_root; symbol != NIL; symbol = nt_list[symbol]) { for (p = states_of[symbol]; p != NULL; q = p, p = p -> next) ; free_nodes(states_of[symbol], q); } /***********************************************************************/ /* Use the BUCKET array as a base to partition the scoped items */ /* based on the length of their prefixes. The list of items in each */ /* bucket is kept in the NEXT_ITEM array sorted in descending order */ /* of the length of the right-hand side of the item. */ /* Items are kept sorted in that fashion because when two items have */ /* the same prefix, we want the one with the shortest suffix to be */ /* chosen. In other words, if we have two scoped items, say: */ /* */ /* A ::= x . y and B ::= x . z where |y| < |z| */ /* */ /* and both of them are applicable in a given context with similar */ /* result, then we always want A ::= x . y to be used. */ /***********************************************************************/ for (i = 1; i <= max_prefix_length; i++) bucket[i] = NIL; for (item_no = item_root; item_no != NIL; item_no = item_list[item_no]) { int tail; k = item_table[item_no].dot; for (i = bucket[k]; i != NIL; tail = i, i = next_item[i]) { if (RHS_SIZE(item_table[item_no].rule_number) >= RHS_SIZE(item_table[i].rule_number)) break; } next_item[item_no] = i; if (i == bucket[k]) bucket[k] = item_no; /* insert at the beginning */ else next_item[tail] = item_no; /* insert in middle or end */ } /*********************************************************************/ /* Reconstruct list of scoped items in sorted order. Since we want */ /* the items in descending order, we start with the smallest bucket */ /* proceeding to the largest one and insert the items from each */ /* bucket in LIFO order in ITEM_LIST. */ /*********************************************************************/ item_root = NIL; for (k = 1; k <= max_prefix_length; k++) { for (item_no = bucket[k]; item_no != NIL; item_no = next_item[item_no]) { item_list[item_no] = item_root; item_root = item_no; } } ffree(collection); ffree(element_size); ffree(start); ffree(stack); ffree(ordered_symbol); ffree(state_list); ffree(list); ffree(bucket); } /* End PROCESS_SCOPE_STATES */ /*********************************************************************/ /* Next, we initialize the remaining fields of the SCOPE structure. */ /*********************************************************************/ item_no = item_root; for (i = 1; item_no != NIL; i++) { scope[i].prefix = prefix_index[item_no]; scope[i].suffix = suffix_index[item_no]; rule_no = item_table[item_no].rule_number; scope[i].lhs_symbol = rules[rule_no].lhs; symbol = rhs_sym[rules[rule_no].rhs + item_table[item_no].dot]; if (symbol IS_A_TERMINAL) scope[i].look_ahead = symbol; else { for ALL_NON_TERMINALS(j) symbol_seen[j] = FALSE; scope[i].look_ahead = get_shift_symbol(symbol); } scope[i].state_set = state_index[scope[i].lhs_symbol]; item_no = item_list[item_no]; } for (j = 1; j <= scope_top; j++) { if (scope_element[j].item < 0) { item_no = -scope_element[j].item; rule_no = item_table[item_no].rule_number; n = scope_element[j].index; for (k = rules[rule_no].rhs+item_table[item_no].dot - 1; k >= rules[rule_no].rhs; /* symbols before dot*/ k--) scope_right_side[n++] = rhs_sym[k]; } else { item_no = scope_element[j].item; rule_no = item_table[item_no].rule_number; n = scope_element[j].index; for (k = rules[rule_no].rhs + item_table[item_no].dot; k < rules[rule_no + 1].rhs; /* symbols after dot */ k++) { symbol = rhs_sym[k]; if (symbol IS_A_NON_TERMINAL) { if (! null_nt[symbol]) scope_right_side[n++] = rhs_sym[k]; } else if (symbol != error_image) scope_right_side[n++] = rhs_sym[k]; } } scope_right_side[n] = 0; } if (list_bit) print_scopes(); ffree(prefix_index); ffree(suffix_index); item_of += (num_terminals + 1); ffree(item_of); ffree(next_item); symbol_seen += (num_terminals + 1); ffree(symbol_seen); states_of += (num_terminals + 1); ffree(states_of); state_index += (num_terminals + 1); ffree(state_index); ffree(scope_element); return; } /*********************************************************************/ /* IS_SCOPE: */ /*********************************************************************/ /* This procedure checks whether or not an item of the form: */ /* [A -> w B x where B ->* y A z is a valid scope. */ /* */ /* Such an item is a valid scope if the following conditions hold: */ /* */ /* 1) it is not the case that x =>* %empty */ /* 2) either it is not the case that w =>* %empty or it is not the */ /* case that B =>lm* A. */ /* 3) it is not the case that whenever A is introduced through */ /* closure, it is introduced by a nonterminal C where C =>rm* A */ /* and C =>rm+ B. */ /*********************************************************************/ static BOOLEAN is_scope(int item_no) { int i, nt, lhs_symbol, target, symbol; for (i = item_no - item_table[item_no].dot; i < item_no; i++) { symbol = item_table[i].symbol; if (symbol IS_A_TERMINAL) return(TRUE); if (! null_nt[symbol]) return(TRUE); } lhs_symbol = rules[item_table[item_no].rule_number].lhs; target = item_table[item_no].symbol; if (IS_IN_NTSET(left_produces, target, lhs_symbol - num_terminals)) return(FALSE); if (item_table[item_no].dot > 0) return(TRUE); for ALL_NON_TERMINALS(nt) symbol_seen[nt] = FALSE; return(scope_check(lhs_symbol, target, lhs_symbol)); } /*********************************************************************/ /* SCOPE_CHECK: */ /*********************************************************************/ /* Given a nonterminal LHS_SYMBOL and a nonterminal TARGET where, */ /* */ /* LHS_SYMBOL ::= TARGET x */ /* */ /* find out if whenever LHS_SYMBOL is introduced through closure, it */ /* is introduced by a nonterminal SOURCE such that */ /* */ /* SOURCE ->rm* LHS_SYMBOL */ /* */ /* and */ /* */ /* SOURCE ->rm+ TARGET */ /* */ /*********************************************************************/ static BOOLEAN scope_check(int lhs_symbol, int target, int source) { int item_no, rule_no, symbol; symbol_seen[source] = TRUE; if (IS_IN_NTSET(right_produces, target, source - num_terminals) && IS_IN_NTSET(right_produces, lhs_symbol, source - num_terminals)) return(FALSE); for (item_no = item_of[source]; item_no != NIL; item_no = next_item[item_no]) { if (item_table[item_no].dot != 0) return(TRUE); rule_no = item_table[item_no].rule_number; symbol = rules[rule_no].lhs; if (! symbol_seen[symbol]) /* not yet processed */ { if (scope_check(lhs_symbol, target, symbol)) return(TRUE); } } return(FALSE); } /*********************************************************************/ /* INSERT_PREFIX: */ /*********************************************************************/ /* This procedure takes as argument an item and inserts the string */ /* prefix of the item preceeding the "dot" into the scope table, if */ /* that string is not already there. In any case, the index number */ /* associated with the prefix in question is returned. */ /* NOTE that since both prefixes and suffixes are entered in the */ /* table, the prefix of a given item, ITEM_NO, is encoded as */ /* -ITEM_NO, whereas the suffix of that item is encoded as +ITEM_NO. */ /*********************************************************************/ static insert_prefix(int item_no) { int i, j, rule_no; unsigned long hash_address = 0; rule_no = item_table[item_no].rule_number; for (i = rules[rule_no].rhs; /* symbols before dot */ i < rules[rule_no].rhs + item_table[item_no].dot; i++) hash_address += rhs_sym[i]; i = hash_address % SCOPE_SIZE; for (j = scope_table[i]; j != NIL; j = scope_element[j].link) { if (is_prefix_equal(scope_element[j].item, item_no)) return(scope_element[j].index); } scope_top++; scope_element[scope_top].item = -item_no; scope_element[scope_top].index = scope_rhs_size + 1; scope_element[scope_top].link = scope_table[i]; scope_table[i] = scope_top; scope_rhs_size += (item_table[item_no].dot + 1); return(scope_element[scope_top].index); } /******************************************************************/ /* IS_PREFIX_EQUAL: */ /******************************************************************/ /* This boolean function takes two items as arguments and checks */ /* whether or not they have the same prefix. */ /******************************************************************/ static BOOLEAN is_prefix_equal(int item_no, int item_no2) { int item_no1, start, dot, i, j; if (item_no > 0) /* a suffix */ return(FALSE); item_no1 = -item_no; if (item_table[item_no1].dot != item_table[item_no2].dot) return(FALSE); j = rules[item_table[item_no1].rule_number].rhs; start = rules[item_table[item_no2].rule_number].rhs; dot = start + item_table[item_no2].dot - 1; for (i = start; i <= dot; i++) /* symbols before dot */ { if (rhs_sym[i] != rhs_sym[j]) return(FALSE); j++; } return(TRUE); } /*********************************************************************/ /* INSERT_SUFFIX: */ /*********************************************************************/ /* This procedure is analoguous to INSERT_PREFIX. It takes as */ /* argument an item, and inserts the suffix string following the dot */ /* in the item into the scope table, if it is not already there. */ /* In any case, it returns the index associated with the suffix. */ /* When inserting a suffix into the table, all nullable nonterminals */ /* in the suffix are disregarded. */ /*********************************************************************/ static insert_suffix(int item_no) { int i, j, rule_no, num_elements = 0; unsigned long hash_address = 0; rule_no = item_table[item_no].rule_number; for (i = rules[rule_no].rhs + item_table[item_no].dot; i < rules[rule_no + 1].rhs; /* symbols after dot */ i++) { if (rhs_sym[i] IS_A_NON_TERMINAL) { if (! null_nt[rhs_sym[i]]) { hash_address += rhs_sym[i]; num_elements++; } } else if (rhs_sym[i] != error_image) { hash_address += rhs_sym[i]; num_elements++; } } i = hash_address % SCOPE_SIZE; for (j = scope_table[i]; j != NIL; j = scope_element[j].link) { if (is_suffix_equal(scope_element[j].item, item_no)) return(scope_element[j].index); } scope_top++; scope_element[scope_top].item = item_no; scope_element[scope_top].index = scope_rhs_size + 1; scope_element[scope_top].link = scope_table[i]; scope_table[i] = scope_top; scope_rhs_size += (num_elements + 1); return(scope_element[scope_top].index); } /******************************************************************/ /* IS_SUFFIX_EQUAL: */ /******************************************************************/ /* This boolean function takes two items as arguments and checks */ /* whether or not they have the same suffix. */ /******************************************************************/ static BOOLEAN is_suffix_equal(int item_no1, int item_no2) { int rule_no, dot1, dot2, i, j; if (item_no1 < 0) /* a prefix */ return(FALSE); rule_no = item_table[item_no1].rule_number; i = rules[rule_no].rhs + item_table[item_no1].dot; dot1 = rules[rule_no + 1].rhs - 1; rule_no = item_table[item_no2].rule_number; j = rules[rule_no].rhs + item_table[item_no2].dot; dot2 = rules[rule_no + 1].rhs - 1; while (i <= dot1 && j <= dot2) /* non-nullable syms before dot */ { if (rhs_sym[i] IS_A_NON_TERMINAL) { if (null_nt[rhs_sym[i]]) { i++; continue; } } else if (rhs_sym[i] == error_image) { i++; continue; } if (rhs_sym[j] IS_A_NON_TERMINAL) { if (null_nt[rhs_sym[j]]) { j++; continue; } } else if (rhs_sym[j] == error_image) { j++; continue; } if (rhs_sym[i] != rhs_sym[j]) return(FALSE); j++; i++; } for (; i <= dot1; i++) { if (rhs_sym[i] IS_A_NON_TERMINAL) { if (! null_nt[rhs_sym[i]]) return(FALSE); } else if (rhs_sym[i] != error_image) return(FALSE); } for (; j <= dot2; j++) { if (rhs_sym[j] IS_A_NON_TERMINAL) { if (! null_nt[rhs_sym[j]]) return(FALSE); } else if (rhs_sym[j] != error_image) return(FALSE); } return(TRUE); } /******************************************************************/ /* PRINT_SCOPES: */ /******************************************************************/ /* This procedure is similar to the global procedure PTITEM. */ /******************************************************************/ static void print_scopes(void) { int len, offset, i, k, symbol; char line[PRINT_LINE_SIZE + 1], tok[SYMBOL_SIZE + 1], tmp[PRINT_LINE_SIZE]; PR_HEADING; fprintf(syslis, "\nScopes:\n"); output_line_no += 2; for (k=1; k <= num_scopes; k++) { symbol = scope[k].lhs_symbol; restore_symbol(tok, RETRIEVE_STRING(symbol)); len = PRINT_LINE_SIZE - 5; print_large_token(line, tok, "", len); strcat(line, " ::= "); i = (PRINT_LINE_SIZE / 2) - 1; offset = MIN(strlen(line) - 1, i); len = PRINT_LINE_SIZE - (offset + 4); /* locate end of list */ for (i = scope[k].prefix; scope_right_side[i] != 0; i++) ; for (i = i - 1; i >= scope[k].prefix; i--) /* symbols before dot */ { symbol = scope_right_side[i]; restore_symbol(tok, RETRIEVE_STRING(symbol)); if (strlen(line) + strlen(tok) > PRINT_LINE_SIZE - 4) { fprintf(syslis, "\n%s", line); ENDPAGE_CHECK; fill_in(tmp, offset, ' '); print_large_token(line, tok, tmp, len); } else strcat(line, tok); strcat(line, BLANK); } /*********************************************************************/ /* We now add a dot "." to the output line, and print the remaining */ /* symbols in the right hand side. */ /*********************************************************************/ strcat(line, " ."); len = PRINT_LINE_SIZE - (offset + 1); for (i = scope[k].suffix; scope_right_side[i] != 0; i++) { symbol = scope_right_side[i]; restore_symbol(tok, RETRIEVE_STRING(symbol)); if (strlen(line) + strlen(tok) > PRINT_LINE_SIZE - 1) { fprintf(syslis, "\n%s", line); ENDPAGE_CHECK; fill_in(tmp, offset, ' '); print_large_token(line, tok, tmp, len); } else strcat(line, tok); strcat(line, BLANK); } fprintf(syslis, "\n%s", line); ENDPAGE_CHECK; } return; } /*********************************************************************/ /* GET_SHIFT_SYMBOL: */ /*********************************************************************/ /* This procedure takes as parameter a nonterminal, LHS_SYMBOL, and */ /* determines whether or not there is a terminal symbol t such that */ /* LHS_SYMBOL can rightmost produce a string tX. If so, t is */ /* returned, otherwise EMPTY is returned. */ /*********************************************************************/ static get_shift_symbol(int lhs_symbol) { int item_no, rule_no, symbol; BOOLEAN end_node; struct node *p; if (! symbol_seen[lhs_symbol]) { symbol_seen[lhs_symbol] = TRUE; for (end_node = ((p = clitems[lhs_symbol]) == NULL); ! end_node; end_node = (p == clitems[lhs_symbol])) { p = p -> next; item_no = p -> value; rule_no = item_table[item_no].rule_number; if (RHS_SIZE(rule_no) > 0) { symbol = rhs_sym[rules[rule_no].rhs]; if (symbol IS_A_TERMINAL) return(symbol); else { symbol = get_shift_symbol(symbol); if (symbol != empty) return(symbol); } } } } return(empty); }