home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 10 Tools
/
10-Tools.zip
/
mitsch75.zip
/
scheme-7_5_17-src.zip
/
scheme-7.5.17
/
src
/
microcode
/
regex.h
< prev
next >
Wrap
C/C++ Source or Header
|
1999-01-02
|
8KB
|
212 lines
/* -*-C-*-
$Id: regex.h,v 1.7 1999/01/02 06:11:34 cph Exp $
Copyright (c) 1987-1999 Massachusetts Institute of Technology
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* NOTE: This program was created by translation from the regular
expression code of GNU Emacs; it was translated from the original C to
68000 assembly language (in 1986), and then translated back from 68000
assembly language to C (in 1987). Users should be aware that the GNU
GENERAL PUBLIC LICENSE may apply to this code. A copy of that license
should have been included along with this file. */
/* Structure to represent a buffer of text to match against.
This contains the information that an editor buffer would have
to supply for the matching process to be executed.
`translation' is an array of MAX_ASCII characters which is used to
map each character before matching. Both the pattern and the match
text are mapped. This is normally used to implement case
insensitive searches.
`syntax_table' describes the syntax of the match text. See the
syntax table primitives for more information.
`text' points to the beginning of the match text. It is used only
for translating match text pointers into indices.
`text_start' and `text_end' delimit the match text. They define
the buffer-start and buffer-end for those matching commands that
refer to them. Also, all matching must take place within these
limits.
`gap_start' and `gap_end' delimit a gap in the match text. Editor
buffers normally have such a gap. For applications without a gap,
it is recommended that these be set to the same value as
`text_end'.
Both `text_start' and `gap_start' are inclusive indices, while
`text_end' and `gap_end' are exclusive.
The following conditions must be true:
(text <= text_start)
(text_start <= text_end)
(gap_start <= gap_end)
(! ((text_start < text_end) &&
(gap_start < gap_end) &&
((text_start == gap_start) || (text_end == gap_end))))
*/
struct re_buffer
{
unsigned char *translation;
SYNTAX_TABLE_TYPE syntax_table;
unsigned char *text;
unsigned char *text_start;
unsigned char *text_end;
unsigned char *gap_start;
unsigned char *gap_end;
};
/* Structure to store "register" contents data in.
Pass the address of such a structure as an argument to re_match,
etc., if you want this information back.
start[i] and end[i] record the string matched by \( ... \) grouping
i, for i from 1 to RE_NREGS - 1.
start[0] and end[0] record the entire string matched. */
#define RE_NREGS 10
struct re_registers
{
long start[RE_NREGS];
long end[RE_NREGS];
};
/* These are the command codes that appear in compiled regular
expressions, one per byte. Some command codes are followed by
argument bytes. A command code can specify any interpretation
whatever for its arguments. Zero-bytes may appear in the compiled
regular expression. */
enum regexpcode
{
regexpcode_unused,
regexpcode_exact_1, /* Followed by 1 literal byte */
/* Followed by one byte giving n, and then by n literal bytes. */
regexpcode_exact_n,
regexpcode_line_start, /* Fails unless at beginning of line */
regexpcode_line_end, /* Fails unless at end of line */
/* Followed by two bytes giving relative address to jump to. */
regexpcode_jump,
/* Followed by two bytes giving relative address of place to
resume at in case of failure. */
regexpcode_on_failure_jump,
/* Throw away latest failure point and then jump to address. */
regexpcode_finalize_jump,
/* Like jump but finalize if safe to do so. This is used to jump
back to the beginning of a repeat. If the command that follows
this jump is clearly incompatible with the one at the beginning
of the repeat, such that we can be sure that there is no use
backtracking out of repetitions already completed, then we
finalize. */
regexpcode_maybe_finalize_jump,
/* jump, and push a dummy failure point. This failure point will
be thrown away if an attempt is made to use it for a failure.
A + construct makes this before the first repeat. */
regexpcode_dummy_failure_jump,
regexpcode_any_char, /* Matches any one character */
/* Matches any one char belonging to specified set. First
following byte is # bitmap bytes. Then come bytes for a
bit-map saying which chars are in. Bits in each byte are
ordered low-bit-first. A character is in the set if its bit is
1. A character too large to have a bit in the map is
automatically not in the set. */
regexpcode_char_set,
/* Similar but match any character that is NOT one of those
specified. */
regexpcode_not_char_set,
/* Starts remembering the text that is matched and stores it in a
memory register. Followed by one byte containing the register
number. Register numbers must be in the range 0 through
(RE_NREGS - 1) inclusive. */
regexpcode_start_memory,
/* Stops remembering the text that is matched and stores it in a
memory register. Followed by one byte containing the register
number. Register numbers must be in the range 0 through
(RE_NREGS - 1) inclusive. */
regexpcode_stop_memory,
/* Match a duplicate of something remembered. Followed by one
byte containing the index of the memory register. */
regexpcode_duplicate,
regexpcode_buffer_start, /* Succeeds if at beginning of buffer */
regexpcode_buffer_end, /* Succeeds if at end of buffer */
regexpcode_word_char, /* Matches any word-constituent character */
/* Matches any char that is not a word-constituent. */
regexpcode_not_word_char,
regexpcode_word_start, /* Succeeds if at word beginning */
regexpcode_word_end, /* Succeeds if at word end */
regexpcode_word_bound, /* Succeeds if at a word boundary */
regexpcode_not_word_bound, /* Succeeds if not at a word boundary */
/* Matches any character whose syntax is specified. Followed by a
byte which contains a syntax code, Sword or such like. */
regexpcode_syntax_spec,
/* Matches any character whose syntax differs from the specified. */
regexpcode_not_syntax_spec
};
extern void
EXFUN (re_buffer_initialize,
(struct re_buffer *, unsigned char *, SYNTAX_TABLE_TYPE,
unsigned char *, unsigned long, unsigned long,
unsigned long, unsigned long));
extern int
EXFUN (re_compile_fastmap,
(unsigned char *, unsigned char *, unsigned char *,
SYNTAX_TABLE_TYPE, unsigned char *));
extern int
EXFUN (re_match,
(unsigned char *, unsigned char *, struct re_buffer *,
struct re_registers *, unsigned char *, unsigned char *));
extern int
EXFUN (re_search_forward,
(unsigned char *, unsigned char *, struct re_buffer *,
struct re_registers *, unsigned char *, unsigned char *));
extern int
EXFUN (re_search_backward,
(unsigned char *, unsigned char *, struct re_buffer *,
struct re_registers *, unsigned char *, unsigned char *));