home *** CD-ROM | disk | FTP | other *** search
- /**********************************************************************\
- *
- * REGMUST.C
- *
- * Finds regmust string from syntax tree and stores it to rbuf.regmust.
- *
- * Author: Jarmo Ruuth 15-Mar-1988
- *
- * Copyright (C) 1988-90 by Jarmo Ruuth
- * May be freely copied for any non-commercial usage.
- \**********************************************************************/
-
- #include "system.h"
- #include "dfa.h"
-
- /**********************************************************************
- *
- * save_must
- *
- * Saves regmust.
- */
- static bool save_must(REG1 node_t* tree, REG2 int pos, int len)
- {
- REG3 char* ptr;
- REG4 int left, right;
-
- if (len <= 0) {
- rbuf.regmust = NULL;
- return TRUE;
- }
- if ((ptr = malloc(len+1)) == NULL)
- return FALSE;
- rbuf.regmust = ptr;
- ptr += len;
- *ptr-- = '\0';
- while (tree[pos].type == CAT && len--) {
- left = tree[pos].val.next.left;
- right = tree[pos].val.next.right;
- if (tree[right].type != ID)
- break;
- *ptr-- = tree[right].val.item;
- pos = left;
- }
- if (len && tree[pos].type == ID)
- *ptr-- = tree[pos].val.item;
- if (strlen(rbuf.regmust) == 0)
- d_free(&rbuf.regmust);
- return TRUE;
- }
-
- /**********************************************************************
- *
- * IS_MUST_CH
- */
- #define IS_MUST_CH(c) ((int)(c) < NCHARS)
-
- /**********************************************************************
- *
- * must_len
- *
- * Calculates regmust lenght starting from 'pos'. Length is zero if
- * current position doesn't start any regmust.
- */
- static int must_len(REG1 node_t* tree, REG2 int* pos)
- {
- REG3 int len = 0;
- REG4 int left, right;
-
- while (tree[*pos].type == CAT) {
- left = tree[*pos].val.next.left;
- right = tree[*pos].val.next.right;
- if (tree[right].type != ID)
- return len;
- if (!IS_MUST_CH(tree[right].val.item))
- return len;
- ++len;
- if (tree[left].type == ID) {
- if (IS_MUST_CH(tree[left].val.item))
- return len+1;
- else
- return len;
- }
- *pos = left;
- }
- return len;
- }
-
- /**********************************************************************
- *
- * find_regmust
- *
- * Finds longest regmust, i.e. string that must match exactly in regular
- * expression. A very simple linear algorithm is used: starting from root go
- * always to the left and from every CAT node try to calculate regmust
- * that begins from that position. Routine stops to first node that isn't
- * CAT node. A better algorithm should also check possible common
- * substrings from OR's, now we just give up if we find OR.
- */
- bool find_regmust(REG1 node_t* tree, int root)
- {
- REG2 int len, new_len;
- REG4 int pos, save;
-
- rbuf.regmust = NULL;
- root -= rbuf.search ? 3 : 2;
- if (root == 0) {
- if (rbuf.tree[0].type == ID
- && IS_MUST_CH(rbuf.tree[0].val.item))
- len = 1;
- else
- len = 0;
- return save_must(tree, 0, len);
- }
- len = 0;
- while (tree[root].type == CAT) {
- save = root;
- new_len = must_len(tree, &root);
- if (new_len > len) {
- len = new_len;
- pos = save;
- }
- if (root == save)
- root = tree[root].val.next.left;
- }
- if (tree[root].type == ID && len < 1
- && IS_MUST_CH(rbuf.tree[root].val.item))
- {
- len = 1;
- pos = root;
- }
- return save_must(tree, pos, len > MAXREGMUST ? MAXREGMUST : len);
- }
-