home *** CD-ROM | disk | FTP | other *** search
- /* -*-C-*-
-
- $Id: regex.h,v 1.7 1999/01/02 06:11:34 cph Exp $
-
- Copyright (c) 1987-1999 Massachusetts Institute of Technology
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or (at
- your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
- /* NOTE: This program was created by translation from the regular
- expression code of GNU Emacs; it was translated from the original C to
- 68000 assembly language (in 1986), and then translated back from 68000
- assembly language to C (in 1987). Users should be aware that the GNU
- GENERAL PUBLIC LICENSE may apply to this code. A copy of that license
- should have been included along with this file. */
-
- /* Structure to represent a buffer of text to match against.
- This contains the information that an editor buffer would have
- to supply for the matching process to be executed.
-
- `translation' is an array of MAX_ASCII characters which is used to
- map each character before matching. Both the pattern and the match
- text are mapped. This is normally used to implement case
- insensitive searches.
-
- `syntax_table' describes the syntax of the match text. See the
- syntax table primitives for more information.
-
- `text' points to the beginning of the match text. It is used only
- for translating match text pointers into indices.
-
- `text_start' and `text_end' delimit the match text. They define
- the buffer-start and buffer-end for those matching commands that
- refer to them. Also, all matching must take place within these
- limits.
-
- `gap_start' and `gap_end' delimit a gap in the match text. Editor
- buffers normally have such a gap. For applications without a gap,
- it is recommended that these be set to the same value as
- `text_end'.
-
- Both `text_start' and `gap_start' are inclusive indices, while
- `text_end' and `gap_end' are exclusive.
-
- The following conditions must be true:
-
- (text <= text_start)
- (text_start <= text_end)
- (gap_start <= gap_end)
- (! ((text_start < text_end) &&
- (gap_start < gap_end) &&
- ((text_start == gap_start) || (text_end == gap_end))))
-
- */
-
- struct re_buffer
- {
- unsigned char *translation;
- SYNTAX_TABLE_TYPE syntax_table;
- unsigned char *text;
- unsigned char *text_start;
- unsigned char *text_end;
- unsigned char *gap_start;
- unsigned char *gap_end;
- };
-
- /* Structure to store "register" contents data in.
-
- Pass the address of such a structure as an argument to re_match,
- etc., if you want this information back.
-
- start[i] and end[i] record the string matched by \( ... \) grouping
- i, for i from 1 to RE_NREGS - 1.
-
- start[0] and end[0] record the entire string matched. */
-
- #define RE_NREGS 10
-
- struct re_registers
- {
- long start[RE_NREGS];
- long end[RE_NREGS];
- };
-
- /* These are the command codes that appear in compiled regular
- expressions, one per byte. Some command codes are followed by
- argument bytes. A command code can specify any interpretation
- whatever for its arguments. Zero-bytes may appear in the compiled
- regular expression. */
-
- enum regexpcode
- {
- regexpcode_unused,
- regexpcode_exact_1, /* Followed by 1 literal byte */
-
- /* Followed by one byte giving n, and then by n literal bytes. */
- regexpcode_exact_n,
-
- regexpcode_line_start, /* Fails unless at beginning of line */
- regexpcode_line_end, /* Fails unless at end of line */
-
- /* Followed by two bytes giving relative address to jump to. */
- regexpcode_jump,
-
- /* Followed by two bytes giving relative address of place to
- resume at in case of failure. */
- regexpcode_on_failure_jump,
-
- /* Throw away latest failure point and then jump to address. */
- regexpcode_finalize_jump,
-
- /* Like jump but finalize if safe to do so. This is used to jump
- back to the beginning of a repeat. If the command that follows
- this jump is clearly incompatible with the one at the beginning
- of the repeat, such that we can be sure that there is no use
- backtracking out of repetitions already completed, then we
- finalize. */
- regexpcode_maybe_finalize_jump,
-
- /* jump, and push a dummy failure point. This failure point will
- be thrown away if an attempt is made to use it for a failure.
- A + construct makes this before the first repeat. */
- regexpcode_dummy_failure_jump,
-
- regexpcode_any_char, /* Matches any one character */
-
- /* Matches any one char belonging to specified set. First
- following byte is # bitmap bytes. Then come bytes for a
- bit-map saying which chars are in. Bits in each byte are
- ordered low-bit-first. A character is in the set if its bit is
- 1. A character too large to have a bit in the map is
- automatically not in the set. */
- regexpcode_char_set,
-
- /* Similar but match any character that is NOT one of those
- specified. */
- regexpcode_not_char_set,
-
- /* Starts remembering the text that is matched and stores it in a
- memory register. Followed by one byte containing the register
- number. Register numbers must be in the range 0 through
- (RE_NREGS - 1) inclusive. */
- regexpcode_start_memory,
-
- /* Stops remembering the text that is matched and stores it in a
- memory register. Followed by one byte containing the register
- number. Register numbers must be in the range 0 through
- (RE_NREGS - 1) inclusive. */
- regexpcode_stop_memory,
-
- /* Match a duplicate of something remembered. Followed by one
- byte containing the index of the memory register. */
- regexpcode_duplicate,
-
- regexpcode_buffer_start, /* Succeeds if at beginning of buffer */
- regexpcode_buffer_end, /* Succeeds if at end of buffer */
- regexpcode_word_char, /* Matches any word-constituent character */
-
- /* Matches any char that is not a word-constituent. */
- regexpcode_not_word_char,
-
- regexpcode_word_start, /* Succeeds if at word beginning */
- regexpcode_word_end, /* Succeeds if at word end */
- regexpcode_word_bound, /* Succeeds if at a word boundary */
- regexpcode_not_word_bound, /* Succeeds if not at a word boundary */
-
- /* Matches any character whose syntax is specified. Followed by a
- byte which contains a syntax code, Sword or such like. */
- regexpcode_syntax_spec,
-
- /* Matches any character whose syntax differs from the specified. */
- regexpcode_not_syntax_spec
- };
-
- extern void
- EXFUN (re_buffer_initialize,
- (struct re_buffer *, unsigned char *, SYNTAX_TABLE_TYPE,
- unsigned char *, unsigned long, unsigned long,
- unsigned long, unsigned long));
-
- extern int
- EXFUN (re_compile_fastmap,
- (unsigned char *, unsigned char *, unsigned char *,
- SYNTAX_TABLE_TYPE, unsigned char *));
-
- extern int
- EXFUN (re_match,
- (unsigned char *, unsigned char *, struct re_buffer *,
- struct re_registers *, unsigned char *, unsigned char *));
-
- extern int
- EXFUN (re_search_forward,
- (unsigned char *, unsigned char *, struct re_buffer *,
- struct re_registers *, unsigned char *, unsigned char *));
-
- extern int
- EXFUN (re_search_backward,
- (unsigned char *, unsigned char *, struct re_buffer *,
- struct re_registers *, unsigned char *, unsigned char *));
-