home *** CD-ROM | disk | FTP | other *** search
- /**********************************************************************\
- *
- * CALCPOS.C
- *
- * Calculates firstpos, lastpos and followpos from syntax tree.
- *
- * Author: Jarmo Ruuth 15-Mar-1988
- *
- * Copyright (C) 1988-90 by Jarmo Ruuth
- * May be freely copied for any non-commercial usage.
- \**********************************************************************/
-
- #include "system.h"
- #include "dfa.h"
-
- /**********************************************************************
- *
- * nullable
- *
- * Sets nullable TRUE or FALSE for given node.
- * Nullable is TRUE for such nodes that are the roots of subexpressions
- * that generate languages that include the empty string.
- */
- static void nullable(REG1 node_t* tnode)
- {
- switch (tnode->type) {
- case EMPTY_ID:
- tnode->nullable = TRUE;
- break;
- case CLASS_ID:
- case ID:
- tnode->nullable = FALSE;
- break;
- case OR:
- tnode->nullable =
- rbuf.tree[tnode->val.next.left].nullable
- || rbuf.tree[tnode->val.next.right].nullable;
- break;
- case CAT:
- tnode->nullable =
- rbuf.tree[tnode->val.next.left].nullable
- && rbuf.tree[tnode->val.next.right].nullable;
- break;
- case CLOSURE:
- tnode->nullable = TRUE;
- break;
- }
- }
-
- /**********************************************************************
- *
- * firstpos
- *
- * Fills firstpos-field in syntax tree. Here we suppose, that all
- * nodes below current node are already visited (depth-first traversal).
- * Firstpos is a set of positions that can match the first symbol of a
- * string generated by the subexpression rooted by node.
- */
- static void firstpos(REG1 int node, REG2 node_t* tnode)
- {
- switch (tnode->type) {
- case EMPTY_ID:
- set_clear(tnode->firstpos, rbuf.setsize);
- break;
- case CLASS_ID:
- case ID:
- add_set(tnode->firstpos, node);
- break;
- case OR:
- set_union(tnode->firstpos,
- rbuf.tree[tnode->val.next.left].firstpos,
- rbuf.tree[tnode->val.next.right].firstpos,
- rbuf.setsize);
- break;
- case CAT:
- if (rbuf.tree[tnode->val.next.left].nullable)
- set_union(tnode->firstpos,
- rbuf.tree[tnode->val.next.left].firstpos,
- rbuf.tree[tnode->val.next.right].firstpos,
- rbuf.setsize);
- else
- set_copy(tnode->firstpos,
- rbuf.tree[tnode->val.next.left].firstpos,
- rbuf.setsize);
- break;
- case CLOSURE:
- set_copy(tnode->firstpos,
- rbuf.tree[tnode->val.next.left].firstpos,
- rbuf.setsize);
- break;
- }
- }
-
- /**********************************************************************
- *
- * lastpos
- *
- * Fills lastpos-field in syntax tree. Here we suppose, that all
- * nodes below current node are already visited (depth-first traversal).
- * Lastpos is a set of positions that can match the last symbol of a
- * string generated by the subexpression rooted by node.
- */
- static void lastpos(int node, REG1 node_t* tnode)
- {
- switch (tnode->type) {
- case EMPTY_ID:
- set_clear(tnode->lastpos, rbuf.setsize);
- break;
- case CLASS_ID:
- case ID:
- add_set(tnode->lastpos, node);
- break;
- case OR:
- set_union(tnode->lastpos,
- rbuf.tree[tnode->val.next.left].lastpos,
- rbuf.tree[tnode->val.next.right].lastpos,
- rbuf.setsize);
- break;
- case CAT:
- if (rbuf.tree[tnode->val.next.right].nullable)
- set_union(tnode->lastpos,
- rbuf.tree[tnode->val.next.left].lastpos,
- rbuf.tree[tnode->val.next.right].lastpos,
- rbuf.setsize);
- else
- set_copy(tnode->lastpos,
- rbuf.tree[tnode->val.next.right].lastpos,
- rbuf.setsize);
- break;
- case CLOSURE:
- set_copy(tnode->lastpos,
- rbuf.tree[tnode->val.next.left].lastpos,
- rbuf.setsize);
- break;
- }
- }
-
- /**********************************************************************
- *
- * set_pos
- *
- * Sets nullable, firstpos and lastpos in whole tree.
- */
- static bool set_pos(void)
- {
- REG1 int i;
- REG2 node_t* tnode = rbuf.tree;
-
- for (i = 0; i <= rbuf.root; i++, tnode++) {
- nullable(tnode);
- if ((tnode->firstpos = calloc(rbuf.setsize, 1)) == NULL)
- return FALSE;
- firstpos(i, tnode);
- if ((tnode->lastpos = calloc(rbuf.setsize, 1)) == NULL)
- return FALSE;
- lastpos(i, tnode);
- }
- return TRUE;
- }
-
- /**********************************************************************
- *
- * followpos
- *
- * Creates followpos array using depth-first traversal.
- * Followpos(i) is the set of positions j such that there is some input
- * string ...cd... such that i corresponds to this occurence of c and
- * j to this occurence of d. So for example for a subexpression cd each
- * lastpos of c is followed by firstpos of d, or for c* each lastpos of
- * c is followed by firstpos in c.
- */
- static bool followpos(void)
- {
- REG1 int i;
- REG2 int j;
- REG3 node_t* inode;
- REG4 node_t* jnode;
- REG5 set_t** followptr;
-
- if ((followptr = calloc((rbuf.root+1) * sizeof(set_t *), 1)) == NULL)
- return FALSE;
- rbuf.follow_array = followptr;
- inode = rbuf.tree;
- for (i = 0; i <= rbuf.root; i++, inode++, followptr++) {
- if (inode->type != ID && inode->type != CLASS_ID ) {
- *followptr = NULL;
- continue;
- }
- if ((*followptr = calloc(rbuf.setsize, 1)) == NULL)
- return FALSE;
- jnode = &rbuf.tree[i+1];
- for (j = i+1; j <= rbuf.root; j++, jnode++) {
- switch (jnode->type) {
- case CAT:
- if (in_set(rbuf.tree[jnode->val.next.left].
- lastpos, i))
- set_union(
- *followptr,
- *followptr,
- rbuf.tree[jnode->val.next.right].firstpos,
- rbuf.setsize);
- break;
- case CLOSURE:
- if (in_set(jnode->lastpos, i))
- set_union(
- *followptr,
- *followptr,
- jnode->firstpos,
- rbuf.setsize);
- break;
- default:
- break;
- }
- }
- }
- return TRUE;
- }
-
- /**********************************************************************
- *
- * free_posmem
- *
- * Frees memory, that is not needed to create transition table.
- */
- static void free_posmem(void)
- {
- REG1 int i;
- REG2 node_t* tnode = &rbuf.tree[rbuf.root];
-
- d_free(&tnode->lastpos);
- --tnode;
- for (i = rbuf.root-1; i-- >= 0; tnode--) {
- d_free(&tnode->firstpos);
- d_free(&tnode->lastpos);
- }
- }
-
- #ifdef TEST
-
- extern int debug;
-
- /**********************************************************************
- *
- * print_tree
- */
- static void print_tree(void)
- {
- REG1 int i,j;
- char* char_image(int c);
-
- for (i = 0; i <= rbuf.root; i++) {
- printf("%2d ",i);
- switch (rbuf.tree[i].type) {
- case CAT:
- printf("CAT %2d %2d",
- rbuf.tree[i].val.next.left,
- rbuf.tree[i].val.next.right);
- break;
- case OR:
- printf("OR %2d %2d",
- rbuf.tree[i].val.next.left,
- rbuf.tree[i].val.next.right);
- break;
- case CLOSURE:
- printf("CLO %2d ",
- rbuf.tree[i].val.next.left);
- break;
- case ID:
- printf("ID %3s ",
- char_image(rbuf.tree[i].val.item));
- break;
- case EMPTY_ID:
- printf("EMPTY_ID ");
- break;
- case CLASS_ID:
- printf("CLASS ID ");
- break;
- }
- printf(" f");
- for (j = 0; j <= rbuf.root; j++)
- if (in_set(rbuf.tree[i].firstpos, j))
- printf(" %2d",j);
- printf("\tl");
- for (j = 0; j <= rbuf.root; j++)
- if (in_set(rbuf.tree[i].lastpos, j))
- printf(" %2d",j);
- printf("\n");
- }
- fflush(stdout);
- }
-
- /**********************************************************************
- *
- * print_followpos
- */
- static void print_followpos(void)
- {
- REG1 int i, j;
-
- printf("Followpositions:\n");
- for (i = 0; i <= rbuf.root; i++) {
- if (rbuf.follow_array[i] == NULL)
- continue;
- printf("%2d:", i);
- for (j = 0; j <= rbuf.root; j++)
- if (in_set(rbuf.follow_array[i], j))
- printf(" %d", j);
- printf("\n");
- }
- fflush(stdout);
- }
-
- #endif /* TEST */
-
- /**********************************************************************
- *
- * calcpos
- */
- bool calcpos(void)
- {
- rbuf.setsize = set_bytesize(rbuf.root+1);
- if (!set_pos() || !followpos()) {
- free_posmem();
- return FALSE;
- }
- #ifdef TEST
- if (debug > 1) {
- print_tree();
- print_followpos();
- }
- #endif
- free_posmem();
- return TRUE;
- }
-