home *** CD-ROM | disk | FTP | other *** search
- #include <stdlib.h>
- #include <string.h>
- #include <limits.h>
- #include <stdio.h>
- #include <ctype.h>
-
- /* Get the interface, including the syntax bits. */
- #include "regex.h"
-
- /* Allow BSD version of memory functions */
- #define bcopy(s,d,n) memcpy((d),(s),(n))
- #define bcmp(s1,s2,n) memcmp((s1),(s2),(n))
- #define bzero(s,n) memset((s),0,(n))
-
- /* Implemetataion limits */
- #define CHAR_NUM (1 << CHAR_BIT) /* Number of characters */
- #define CHAR_ARR (CHAR_NUM / CHAR_BIT) /* Size of a char bit array */
-
- /* Define the syntax stuff, so we can do the \w, \s, etc. */
- #define Sword 1
- #define Sdigit 2
- #define Sspace 4
-
- /* These are the command codes that appear in compiled regular
- expressions, one per byte. Some command codes are followed by
- argument bytes. A command code can specify any interpretation
- whatsoever for its arguments. Zero-bytes may appear in the compiled
- regular expression.
-
- The value of `exactn' is needed in search.c (search_buffer) in emacs.
- So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
- `exactn' we use here must also be 1. */
-
- enum regexpcode
- {
- unused=0,
- exactn=1, /* Followed by one byte giving n, then by n literal bytes. */
- begline, /* Fail unless at beginning of line. */
- endline, /* Fail unless at end of line. */
- jump, /* Followed by two bytes giving relative address to jump to. */
- on_failure_jump, /* Followed by two bytes giving relative address of
- place to resume at in case of failure. */
- finalize_jump, /* Throw away latest failure point and then jump to
- address. */
- maybe_finalize_jump, /* Like jump but finalize if safe to do so.
- This is used to jump back to the beginning
- of a repeat. If the command that follows
- this jump is clearly incompatible with the
- one at the beginning of the repeat, such that
- we can be sure that there is no use backtracking
- out of repetitions already completed,
- then we finalize. */
- dummy_failure_jump, /* Jump, and push a dummy failure point. This
- failure point will be thrown away if an attempt
- is made to use it for a failure. A + construct
- makes this before the first repeat. Also
- use it as an intermediary kind of jump when
- compiling an or construct. */
- succeed_n, /* Used like on_failure_jump except has to succeed n times;
- then gets turned into an on_failure_jump. The relative
- address following it is useless until then. The
- address is followed by two bytes containing n. */
- jump_n, /* Similar to jump, but jump n times only; also the relative
- address following is in turn followed by yet two more bytes
- containing n. */
- set_number_at, /* Set the following relative location to the
- subsequent number. */
- anychar, /* Matches any (more or less) one character. */
- charset, /* Matches any one char belonging to specified set.
- First following byte is number of bitmap bytes.
- Then come bytes for a bitmap saying which chars are in.
- Bits in each byte are ordered low-bit-first.
- A character is in the set if its bit is 1.
- A character too large to have a bit in the map
- is automatically not in the set. */
- charset_not, /* Same parameters as charset, but match any character
- that is not one of those specified. */
- start_memory, /* Start remembering the text that is matched, for
- storing in a memory register. Followed by one
- byte containing the register number. Register numbers
- must be in the range 0 through RE_NREGS. */
- stop_memory, /* Stop remembering the text that is matched
- and store it in a memory register. Followed by
- one byte containing the register number. Register
- numbers must be in the range 0 through RE_NREGS. */
- duplicate, /* Match a duplicate of something remembered.
- Followed by one byte containing the index of the memory
- register. */
- begbuf, /* Succeeds if at beginning of buffer. */
- endbuf, /* Succeeds if at end of buffer. */
- wordchar, /* Matches any word-constituent character. */
- notwordchar, /* Matches any char that is not a word-constituent. */
- spacechar, /* Matches any whitespace character */
- notspacechar, /* Matches any non-whitespace character */
- digit, /* Matches any digit character */
- notdigit, /* Matches any non-digit character */
- begword, /* Succeeds if at beginning of word. */
- endword, /* Succeeds if at end of word. */
- wordbound, /* Succeeds if at a word boundary */
- notwordbound /* Succeeds if not at a word boundary */
- };
-
-
- #define SIGN_EXTEND_CHAR(x) ((signed char)(x))
-
- /* Put into DESTINATION a number stored in two contingous bytes starting
- at SOURCE. */
- #define EXTRACT_NUMBER(destination, source) \
- { (destination) = *(source) & 0377; \
- (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; }
-
- /* Same as EXTRACT_NUMBER, except increment the pointer for source to
- point to second byte of SOURCE. Note that SOURCE has to be a value
- such as p, not, e.g., p + 1. */
- #define EXTRACT_NUMBER_AND_INCR(destination, source) \
- { EXTRACT_NUMBER (destination, source); \
- (source) += 2; }
-
- /* Indexed by a character, gives the upper case equivalent of the
- character. */
-
- char upcase[0400] =
- { 000, 001, 002, 003, 004, 005, 006, 007,
- 010, 011, 012, 013, 014, 015, 016, 017,
- 020, 021, 022, 023, 024, 025, 026, 027,
- 030, 031, 032, 033, 034, 035, 036, 037,
- 040, 041, 042, 043, 044, 045, 046, 047,
- 050, 051, 052, 053, 054, 055, 056, 057,
- 060, 061, 062, 063, 064, 065, 066, 067,
- 070, 071, 072, 073, 074, 075, 076, 077,
- 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
- 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
- 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
- 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
- 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
- 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
- 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
- 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
- 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
- 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
- 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
- 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
- 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
- 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
- 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
- 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
- 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
- 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
- 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
- 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
- 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
- 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
- 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
- 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
- };
-
- /* Routines for test program */
- static void printchar (char);
- static void dump_buffer (struct re_pattern_buffer *);
-
- /* Use this to run interactive tests. */
- int main (void)
- {
- char pat[80];
- struct re_pattern_buffer buf;
- struct re_registers regs;
- int i, j;
- char *p;
- char fastmap[CHAR_NUM];
-
- memset(&buf,0,sizeof(buf));
-
- while (1)
- {
- printf("Pat> ");
- fflush(stdout);
- gets (pat);
-
- if (*pat == 0)
- break;
-
- p = re_compile_pattern (pat, strlen(pat), &buf);
- if (p)
- {
- fprintf(stderr,"Error: %s\n",p);
- continue;
- }
-
- printf("\n--------------------------------------------------\n");
- printf("Buffer: ");
- for (i = 0; i < buf.used; ++i)
- printchar(buf.buffer[i]);
- printf ("\n%d allocated, %d used.\n", buf.allocated, buf.used);
- printf("--------------------------------------------------\n");
- re_compile_fastmap (&buf);
- printf ("Allowed by fastmap: ");
- for (i = 0; i < CHAR_NUM; i++)
- if (fastmap[i]) printchar (i);
- printf("\n--------------------------------------------------\n");
- dump_buffer (&buf);
- printf("--------------------------------------------------\n\n");
-
- while (1)
- {
- printf("Str> ");
- fflush(stdout);
- gets (pat); /* Now read the string to match against */
- if (*pat == 0)
- break;
-
- i = re_search (&buf, pat, strlen (pat), 0, 0, ®s);
- j = regs.end[0] - regs.start[0];
- p = pat + regs.start[0];
- printf ("Found \"%.*s\" at %d.\n", j, p, i);
- }
- }
- }
-
- static void dump_buffer (struct re_pattern_buffer *bufp)
- {
- int op;
- int n, m;
- char *p = bufp->buffer;
- char *end = bufp->buffer + bufp->used;
-
- while (p < end)
- {
- printf("%.8X: ",(unsigned int)p);
- switch (op = *p++)
- {
- case unused:
- printf("Unused");
- break;
- case exactn:
- n = *p++;
- printf("Match ");
- while (n-- > 0)
- printchar(*p++);
- break;
- case begline:
- printf("Start of line");
- break;
- case endline:
- printf("End of line");
- break;
- case jump:
- EXTRACT_NUMBER_AND_INCR(n,p);
- printf("Jump to %.8X", (int)(p + n));
- break;
- case on_failure_jump:
- EXTRACT_NUMBER_AND_INCR(n,p);
- printf("Resume at %.8X on failure", (int)(p + n));
- break;
- case finalize_jump:
- EXTRACT_NUMBER_AND_INCR(n,p);
- printf("Finalise and jump to %.8X", (int)(p + n));
- break;
- case maybe_finalize_jump:
- EXTRACT_NUMBER_AND_INCR(n,p);
- printf("Jump to %.8X (finalise if possible)", (int)(p + n));
- break;
- case dummy_failure_jump:
- EXTRACT_NUMBER_AND_INCR(n,p);
- printf("Jump to %.8X and push a dummy failure", (int)(p + n));
- break;
- case succeed_n:
- EXTRACT_NUMBER_AND_INCR(n,p);
- EXTRACT_NUMBER_AND_INCR(m,p);
- printf("Succeed at least %d time%s, then resume at %.8X on failure",
- m, (m == 1 ? "" : "s"), (int)(p + n - 2));
- break;
- case jump_n:
- EXTRACT_NUMBER_AND_INCR(n,p);
- EXTRACT_NUMBER_AND_INCR(m,p);
- printf("Jump to %.8X, %d time%s", (int)(p + n - 2),
- m, (m == 1 ? "" : "s"));
- break;
- case set_number_at:
- EXTRACT_NUMBER_AND_INCR(n,p);
- EXTRACT_NUMBER_AND_INCR(m,p);
- printf("Set the number at %.8X to %d", (int)(p + n - 5), m);
- break;
- case anychar:
- printf("Any character");
- break;
- case charset:
- printf("Character set: ");
- for (n = *p++ * CHAR_BIT - 1; n >= 0; --n)
- if (p[n / CHAR_BIT] & (1 << (n % CHAR_BIT)))
- printchar(n);
- p += p[-1];
- break;
- case charset_not:
- printf("Negated character set: ");
- for (n = *p++ * CHAR_BIT - 1; n >= 0; --n)
- if (p[n / CHAR_BIT] & (1 << (n % CHAR_BIT)))
- printchar(n);
- p += p[-1];
- break;
- case start_memory:
- printf("Start memory (%d)", *p++);
- break;
- case stop_memory:
- printf("Stop memory (%d)", *p++);
- break;
- case duplicate:
- printf("Duplicate (%d)", *p++);
- break;
- case begword:
- printf("Start of word");
- break;
- case endword:
- printf("End of word");
- break;
- case begbuf:
- printf("Start of buffer");
- break;
- case endbuf:
- printf("End of buffer");
- break;
- case wordchar:
- printf("Word character");
- break;
- case notwordchar:
- printf("Not word character");
- break;
- case spacechar:
- printf("Space character");
- break;
- case notspacechar:
- printf("Not space character");
- break;
- case digit:
- printf("Digit");
- break;
- case notdigit:
- printf("Not digit");
- break;
- case wordbound:
- printf("Word boundary");
- break;
- case notwordbound:
- printf("Not word boundary");
- break;
- default:
- printf("Unknown code (0x%.2X)",op);
- break;
- }
- putchar('\n');
- }
- }
-
- static void printchar (char c)
- {
- if (!isprint(c))
- {
- switch (c)
- {
- case '\0':
- printf("\\0");
- break;
- case '\a':
- printf("\\a");
- break;
- case '\b':
- printf("\\b");
- break;
- case '\f':
- printf("\\f");
- break;
- case '\n':
- printf("\\n");
- break;
- case '\r':
- printf("\\r");
- break;
- case '\t':
- printf("\\t");
- break;
- case '\v':
- printf("\\v");
- break;
- default:
- printf("\\x%.2X",c);
- break;
- }
- }
- else
- putchar (c);
- }
-
-