home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 10 Tools
/
10-Tools.zip
/
mitsch75.zip
/
scheme-7_5_17-src.zip
/
scheme-7.5.17
/
src
/
microcode
/
regex.c
< prev
next >
Wrap
C/C++ Source or Header
|
2000-12-05
|
33KB
|
1,170 lines
/* -*-C-*-
$Id: regex.c,v 1.20 2000/12/05 21:23:48 cph Exp $
Copyright (c) 1987-2000 Massachusetts Institute of Technology
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* Regular expression matching and search. */
/* NOTE: This program was created by translation from the regular
expression code of GNU Emacs; it was translated from the original C to
68000 assembly language (in 1986), and then translated back from 68000
assembly language to C (in 1987). Users should be aware that the GNU
GENERAL PUBLIC LICENSE may apply to this code. A copy of that license
should have been included along with this file. */
#include "scheme.h"
#include "syntax.h"
#include "regex.h"
#ifdef STDC_HEADERS
# include <stdlib.h>
#else
extern char * malloc ();
extern char * realloc ();
extern void free ();
#endif
#if defined(__IRIX__) || defined(_AIX)
#define SIGN_EXTEND_CHAR(x) ((((int) (x)) >= 0x80) \
? (((int) (x)) - 0x100) \
: ((int) (x)))
#endif
#ifndef SIGN_EXTEND_CHAR
#define SIGN_EXTEND_CHAR(x) (x)
#endif /* not SIGN_EXTEND_CHAR */
#ifndef SWITCH_ENUM
#define SWITCH_ENUM(enum_type, expression) \
switch ((enum enum_type) (expression))
#endif /* not SWITCH_ENUM */
#ifndef RE_NFAILURES
#define RE_NFAILURES 512
#endif
#define FOR_INDEX_RANGE(index, start, end) \
for (index = (start); (index < (end)); index += 1)
#define FOR_INDEX_BELOW(index, limit) \
FOR_INDEX_RANGE (index, 0, (limit))
#define FOR_ALL_ASCII(index) \
FOR_INDEX_BELOW (index, MAX_ASCII)
#define FOR_ALL_ASCII_SUCH_THAT(index, expression) \
FOR_ALL_ASCII (index) \
if (expression)
#define TRANSLATE_CHAR(ascii) \
((translation == NULL) ? (ascii) : (translation [(ascii)]))
#define WORD_CONSTITUENT_P(ascii) \
(SYNTAX_CONSTITUENT_P (syntaxcode_word, (ascii)))
#define SYNTAX_CONSTITUENT_P(code, ascii) \
((SYNTAX_ENTRY_CODE (SYNTAX_TABLE_REF (syntax_table, (ascii)))) == (code))
#define CHAR_SET_MEMBER_P(length, char_set, ascii) \
(((ascii) < ((length) * ASCII_LENGTH)) && \
(CHAR_SET_MEMBER_P_INTERNAL (char_set, ascii)))
#define CHAR_SET_MEMBER_P_INTERNAL(char_set, ascii) \
((((char_set) [((ascii) / ASCII_LENGTH)]) & \
(1 << ((ascii) % ASCII_LENGTH))) \
!= 0)
#define READ_PATTERN_CHAR(target) do \
{ \
if (pattern_pc >= pattern_end) \
BAD_PATTERN (); \
(target) = (*pattern_pc++); \
} while (0)
#define READ_PATTERN_OFFSET(target) do \
{ \
SIGNED char _fetched; \
if ((pattern_pc + 1) >= pattern_end) \
BAD_PATTERN (); \
(target) = (*pattern_pc++); \
_fetched = (* ((SIGNED char *) (pattern_pc++))); \
(target) += ((SIGN_EXTEND_CHAR (_fetched)) << ASCII_LENGTH); \
if (((pattern_pc + (target)) < pattern_start) || \
((pattern_pc + (target)) > pattern_end)) \
BAD_PATTERN (); \
} while (0)
#define READ_PATTERN_LENGTH(target) do \
{ \
int _len; \
\
if (pattern_pc >= pattern_end) \
BAD_PATTERN (); \
_len = ((int) (*pattern_pc++)); \
if ((pattern_pc + _len) > pattern_end) \
BAD_PATTERN (); \
(target) = _len; \
} while (0)
#define READ_PATTERN_REGISTER(target) do \
{ \
if ((pattern_pc >= pattern_end) || \
(((target) = (*pattern_pc++)) >= RE_NREGS)) \
BAD_PATTERN (); \
} while (0)
#define READ_PATTERN_SYNTAXCODE(target) do \
{ \
if ((pattern_pc >= pattern_end) || \
(((int) ((target) = ((enum syntaxcode) (*pattern_pc++)))) \
>= ((int) syntaxcode_max))) \
BAD_PATTERN (); \
} while (0)
#define BAD_PATTERN() RE_RETURN (-2)
#define PUSH_FAILURE_POINT(pattern_pc, match_pc) do \
{ \
if (stack_pointer == stack_end) \
{ \
long stack_length; \
unsigned char **stack_temporary; \
\
stack_length = ((stack_end - stack_start) * 2); \
if (stack_length > (re_max_failures * 2)) \
RE_RETURN (-4); \
stack_temporary = \
((unsigned char **) \
(realloc \
(stack_start, (stack_length * (sizeof (unsigned char *)))))); \
if (stack_temporary == NULL) \
RE_RETURN (-3); \
stack_end = (& (stack_temporary [stack_length])); \
stack_pointer = \
(& (stack_temporary [(stack_pointer - stack_start)])); \
stack_start = stack_temporary; \
} \
(*stack_pointer++) = (pattern_pc); \
(*stack_pointer++) = (match_pc); \
} while (0)
#define RE_RETURN(value) \
{ \
return_value = (value); \
goto return_point; \
}
void
DEFUN (re_buffer_initialize,
(buffer, translation, syntax_table, text,
text_start_index, text_end_index,
gap_start_index, gap_end_index),
struct re_buffer * buffer
AND unsigned char * translation
AND SYNTAX_TABLE_TYPE syntax_table
AND unsigned char * text
AND unsigned long text_start_index
AND unsigned long text_end_index
AND unsigned long gap_start_index
AND unsigned long gap_end_index)
{
unsigned char *text_start, *text_end, *gap_start, *gap_end;
/* Assumes that
((text_start_index <= gap_start_index) &&
(gap_start_index <= gap_end_index) &&
(gap_end_index <= text_end_index)) */
text_start = (text + text_start_index);
text_end = (text + text_end_index);
gap_start = (text + gap_start_index);
gap_end = (text + gap_end_index);
(buffer -> translation) = translation;
(buffer -> syntax_table) = syntax_table;
(buffer -> text) = text;
(buffer -> text_start) = ((text_start == gap_start) ? gap_end : text_start);
(buffer -> text_end) = ((text_end == gap_end) ? gap_start : text_end);
(buffer -> gap_start) = gap_start;
(buffer -> gap_end) = gap_end;
return;
}
/* Given a compiled pattern between `pattern_start' and `pattern_end',
generate a character set which is true of all characters which can
be the first character of a match.
See the documentation of `struct re_buffer' for a description of
`translation' and `syntax_table'.
`fastmap' is the resulting character set. It is a character array
whose elements are either `FASTMAP_FALSE' or `FASTMAP_TRUE'.
Return values:
0 => pattern cannot match the null string.
1 => pattern can match the null string.
2 => pattern can match the null string, but only at end of match
text or to left of a character in `fastmap'.
-2 => the pattern is improperly formed.
else => undefined. */
#define FASTMAP_FALSE '\0'
#define FASTMAP_TRUE '\1'
int
DEFUN (re_compile_fastmap,
(pattern_start, pattern_end, translation, syntax_table, fastmap),
unsigned char * pattern_start
AND fast unsigned char * pattern_end
AND unsigned char * translation
AND SYNTAX_TABLE_TYPE syntax_table
AND fast unsigned char * fastmap)
{
fast unsigned char *pattern_pc;
unsigned char *stack_start[RE_NFAILURES];
unsigned char **stack_pointer;
int return_value;
pattern_pc = pattern_start;
return_value = 0;
stack_pointer = stack_start;
{
fast int i;
FOR_ALL_ASCII (i)
(fastmap [i]) = FASTMAP_FALSE;
}
loop:
if (pattern_pc >= pattern_end)
RE_RETURN (1);
SWITCH_ENUM (regexpcode, (*pattern_pc++))
{
case regexpcode_unused:
case regexpcode_line_start:
case regexpcode_buffer_start:
case regexpcode_buffer_end:
case regexpcode_word_start:
case regexpcode_word_end:
case regexpcode_word_bound:
case regexpcode_not_word_bound:
goto loop;
case regexpcode_line_end:
{
(fastmap [(TRANSLATE_CHAR ('\n'))]) = FASTMAP_TRUE;
if (return_value == 0)
return_value = 2;
goto next;
}
case regexpcode_exact_1:
{
fast int ascii;
READ_PATTERN_CHAR (ascii);
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
goto next;
}
case regexpcode_exact_n:
{
fast int length;
READ_PATTERN_LENGTH (length);
if (length == 0)
goto loop;
(fastmap [(TRANSLATE_CHAR (pattern_pc [0]))]) = FASTMAP_TRUE;
goto next;
}
case regexpcode_any_char:
{
fast int ascii;
FOR_ALL_ASCII_SUCH_THAT (ascii, (ascii != '\n'))
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
if (return_value != 0)
goto return_point;
goto next;
}
case regexpcode_char_set:
{
fast int length;
fast int ascii;
READ_PATTERN_LENGTH (length);
length = (length * ASCII_LENGTH);
FOR_INDEX_BELOW (ascii, length)
if (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii))
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
goto next;
}
case regexpcode_not_char_set:
{
fast int length;
fast int ascii;
READ_PATTERN_LENGTH (length);
length = (length * ASCII_LENGTH);
FOR_INDEX_BELOW (ascii, length)
if (! (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii)))
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
FOR_INDEX_RANGE (ascii, length, MAX_ASCII)
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
goto next;
}
case regexpcode_word_char:
{
fast int ascii;
FOR_ALL_ASCII_SUCH_THAT (ascii, (WORD_CONSTITUENT_P (ascii)))
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
goto next;
}
case regexpcode_not_word_char:
{
fast int ascii;
FOR_ALL_ASCII_SUCH_THAT (ascii, (! (WORD_CONSTITUENT_P (ascii))))
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
goto next;
}
case regexpcode_syntax_spec:
{
fast enum syntaxcode code;
fast int ascii;
READ_PATTERN_SYNTAXCODE (code);
FOR_ALL_ASCII_SUCH_THAT (ascii, (SYNTAX_CONSTITUENT_P (code, ascii)))
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
goto next;
}
case regexpcode_not_syntax_spec:
{
fast enum syntaxcode code;
fast int ascii;
READ_PATTERN_SYNTAXCODE (code);
FOR_ALL_ASCII_SUCH_THAT (ascii,
(! (SYNTAX_CONSTITUENT_P (code, ascii))))
(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
goto next;
}
case regexpcode_start_memory:
case regexpcode_stop_memory:
{
fast int register_number;
READ_PATTERN_REGISTER (register_number);
goto loop;
}
case regexpcode_duplicate:
{
fast int register_number;
fast int ascii;
READ_PATTERN_REGISTER (register_number);
FOR_ALL_ASCII (ascii)
(fastmap [ascii]) = FASTMAP_TRUE;
RE_RETURN (1);
}
case regexpcode_jump:
case regexpcode_finalize_jump:
case regexpcode_maybe_finalize_jump:
case regexpcode_dummy_failure_jump:
{
fast int offset;
return_value = 1;
READ_PATTERN_OFFSET (offset);
pattern_pc += offset;
if (offset > 0)
goto loop;
/* Jump backward reached implies we just went through the
body of a loop and matched nothing. Opcode jumped to
should be an on_failure_jump. Just treat it like an
ordinary jump. For a * loop, it has pushed its failure
point already; if so, discard that as redundant. */
if (pattern_pc >= pattern_end)
BAD_PATTERN ();
if (((enum regexpcode) (pattern_pc [0])) !=
regexpcode_on_failure_jump)
goto loop;
READ_PATTERN_OFFSET (offset);
pattern_pc += offset;
if ((stack_pointer != stack_start) &&
((stack_pointer [-1]) == pattern_pc))
stack_pointer -= 1;
goto loop;
}
case regexpcode_on_failure_jump:
{
fast int offset;
READ_PATTERN_OFFSET (offset);
(*stack_pointer++) = (pattern_pc + offset);
goto loop;
}
default:
BAD_PATTERN ();
}
next:
if (stack_pointer != stack_start)
{
pattern_pc = (*--stack_pointer);
goto loop;
}
return_point:
return (return_value);
}
/* Match the compiled pattern described by `pattern_start' and
`pattern_end' against the characters in `buffer' between
`match_start' and `match_end'.
`registers', if not NULL, will be filled with the start and end
indices of the match registers if the match succeeds.
It is assumed that the following is true:
(! ((gap_start < gap_end) &&
(match_start < match_end) &&
((match_start == gap_start) || (match_end == gap_end))))
Return values:
non-negative => the end index (exclusive) of the match.
-1 => no match.
-2 => the pattern is badly formed.
-3 => memory allocation error.
-4 => match stack overflow.
other => undefined. */
#define RE_MATCH_FAILED (-1)
/* This macro is used by search procedures to decide when a match at a
particular place has failed. If true, the search continues by
advancing to the next match point. */
#define RE_MATCH_FAILURE_RESULT(result) \
(((result) == RE_MATCH_FAILED) || ((result) == (-4)))
#define ADDRESS_TO_INDEX(address) \
((((address) > gap_start) ? ((address) - gap_length) : (address)) \
- (buffer -> text))
#define READ_MATCH_CHAR(target) do \
{ \
if (match_pc >= match_end) \
goto re_match_fail; \
(target) = (TRANSLATE_CHAR (*match_pc++)); \
if (match_pc == gap_start) \
match_pc = gap_end; \
} while (0)
static Boolean
DEFUN (beq_translate, (scan1, scan2, length, translation),
unsigned char * scan1 AND
unsigned char * scan2 AND
long length AND
unsigned char * translation)
{
while ((length--) > 0)
if ((TRANSLATE_CHAR (*scan1++)) != (TRANSLATE_CHAR (*scan2++)))
return (false);
return (true);
}
int re_max_failures = 1000;
int
DEFUN (re_match,
(pattern_start, pattern_end, buffer, registers, match_start, match_end),
unsigned char * pattern_start
AND unsigned char * pattern_end
AND struct re_buffer * buffer
AND struct re_registers * registers
AND unsigned char * match_start
AND unsigned char * match_end)
{
fast unsigned char *pattern_pc, *match_pc;
unsigned char *gap_start, *gap_end;
unsigned char *translation;
SYNTAX_TABLE_TYPE syntax_table;
long gap_length;
int return_value;
/* Failure point stack. Each place that can handle a failure
further down the line pushes a failure point on this stack. It
consists of two char *'s. The first one pushed is where to
resume scanning the pattern; the second pushed is where to resume
scanning the match text. If the latter is NULL, the failure
point is a "dummy". If a failure happens and the innermost
failure point is dormant, it discards that failure point and
tries the next one. */
unsigned char **stack_start, **stack_end, **stack_pointer;
/* Information on the "contents" of registers. These are pointers
into the match text; they record just what was matched (on this
attempt) by some part of the pattern. The start_memory command
stores the start of a register's contents and the stop_memory
command stores the end.
At that point, (register_start [regnum]) points to the first
character in the register, and (register_end [regnum]) points to
the first character beyond the end of the register. */
unsigned char *register_start[RE_NREGS];
unsigned char *register_end[RE_NREGS];
pattern_pc = pattern_start;
match_pc = match_start;
gap_start = (buffer -> gap_start);
gap_end = (buffer -> gap_end);
gap_length = (gap_end - gap_start);
translation = (buffer -> translation);
syntax_table = (buffer -> syntax_table);
stack_start =
((unsigned char **) (malloc ((2 * RE_NFAILURES) * (sizeof (char *)))));
if (stack_start == NULL)
RE_RETURN (-3);
stack_end = (& (stack_start [(2 * RE_NFAILURES)]));
stack_pointer = stack_start;
{
fast int i;
FOR_INDEX_BELOW (i, RE_NREGS)
{
(register_start [i]) = NULL;
(register_end [i]) = NULL;
}
}
re_match_loop:
if (pattern_pc >= pattern_end)
{
/* Reaching here indicates that match was successful. */
if (registers != NULL)
{
fast int i;
(register_start [0]) = match_start;
(register_end [0]) = match_pc;
FOR_INDEX_BELOW (i, RE_NREGS)
{
((registers -> start) [i]) =
(((register_start [i]) == NULL)
? -1
: (ADDRESS_TO_INDEX (register_start [i])));
((registers -> end) [i]) =
(((register_end [i]) == NULL)
? -1
: (ADDRESS_TO_INDEX (register_end [i])));
}
}
RE_RETURN (ADDRESS_TO_INDEX (match_pc));
}
SWITCH_ENUM (regexpcode, (*pattern_pc++))
{
case regexpcode_unused:
goto re_match_loop;
case regexpcode_exact_1:
{
fast int ascii;
fast int ascii_p;
READ_MATCH_CHAR (ascii);
READ_PATTERN_CHAR (ascii_p);
if (ascii == ascii_p)
goto re_match_loop;
goto re_match_fail;
}
case regexpcode_exact_n:
{
fast int length;
fast int ascii;
READ_PATTERN_LENGTH (length);
while ((length--) > 0)
{
READ_MATCH_CHAR (ascii);
if (ascii != (*pattern_pc++))
goto re_match_fail;
}
goto re_match_loop;
}
case regexpcode_any_char:
{
fast int ascii;
READ_MATCH_CHAR (ascii);
if (ascii == '\n')
goto re_match_fail;
goto re_match_loop;
}
#define RE_MATCH_CHAR_SET(winning_label, losing_label) \
{ \
fast int ascii; \
fast int length; \
\
READ_MATCH_CHAR (ascii); \
READ_PATTERN_LENGTH (length); \
if (CHAR_SET_MEMBER_P (length, pattern_pc, ascii)) \
{ \
pattern_pc += length; \
goto winning_label; \
} \
else \
{ \
pattern_pc += length; \
goto losing_label; \
} \
}
case regexpcode_char_set:
RE_MATCH_CHAR_SET (re_match_loop, re_match_fail);
case regexpcode_not_char_set:
RE_MATCH_CHAR_SET (re_match_fail, re_match_loop);
#undef RE_MATCH_CHAR_SET
/* \( is represented by a start_memory, \) by a stop_memory. Both
of those commands contain a "register number" argument. The
text matched within the \( and \) is recorded under that
number. Then, \<digit> turns into a `duplicate' command which
is followed by the numeric value of <digit> as the register
number. */
case regexpcode_start_memory:
{
fast int register_number;
READ_PATTERN_REGISTER (register_number);
(register_start [register_number]) = match_pc;
goto re_match_loop;
}
case regexpcode_stop_memory:
{
fast int register_number;
READ_PATTERN_REGISTER (register_number);
(register_end [register_number]) =
((match_pc == gap_end) ? gap_start : match_pc);
goto re_match_loop;
}
case regexpcode_duplicate:
{
fast int register_number;
unsigned char *start, *end, *new_end;
long length;
READ_PATTERN_REGISTER (register_number);
start = (register_start [register_number]);
end = (register_end [register_number]);
length = (end - start);
if (length <= 0)
goto re_match_loop;
new_end = (match_pc + length);
if (new_end > match_end)
goto re_match_fail;
if ((match_pc <= gap_start) && (new_end > gap_start))
{
long length1, length2;
new_end += gap_length;
if (new_end > match_end)
goto re_match_fail;
length1 = (gap_start - match_pc);
length2 = (length - length1);
if (!
((beq_translate (match_pc, start, length1, translation)) &&
(beq_translate (gap_end, (start + length1), length2,
translation))))
goto re_match_fail;
}
else if ((start <= gap_start) && (end > gap_start))
{
long length1, length2;
length1 = (gap_start - start);
length2 = (end - gap_end);
if (!
((beq_translate (match_pc, start, length1, translation)) &&
(beq_translate ((match_pc + length1), gap_end, length2,
translation))))
goto re_match_fail;
}
else
{
if (! (beq_translate (match_pc, start, length, translation)))
goto re_match_fail;
}
match_pc = ((new_end == gap_start) ? gap_end : new_end);
goto re_match_loop;
}
case regexpcode_buffer_start:
{
if ((ADDRESS_TO_INDEX (match_pc))
== (ADDRESS_TO_INDEX (buffer -> text_start)))
goto re_match_loop;
goto re_match_fail;
}
case regexpcode_buffer_end:
{
if ((ADDRESS_TO_INDEX (match_pc))
== (ADDRESS_TO_INDEX (buffer -> text_end)))
goto re_match_loop;
goto re_match_fail;
}
case regexpcode_line_start:
{
if ((ADDRESS_TO_INDEX (match_pc))
== (ADDRESS_TO_INDEX (buffer -> text_start)))
goto re_match_loop;
if ((TRANSLATE_CHAR
(((match_pc == gap_end) ? gap_start : match_pc) [-1]))
== '\n')
goto re_match_loop;
goto re_match_fail;
}
case regexpcode_line_end:
{
if (((ADDRESS_TO_INDEX (match_pc))
== (ADDRESS_TO_INDEX (buffer -> text_end)))
|| ((TRANSLATE_CHAR (match_pc [0])) == '\n'))
goto re_match_loop;
goto re_match_fail;
}
#define RE_MATCH_WORD_BOUND(word_bound_p) \
if ((match_pc == gap_end) \
? (word_bound_p \
(((gap_start != (buffer -> text_start)) \
&& (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_start[-1])))), \
((gap_end != (buffer -> text_end)) \
&& (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_end[0])))))) \
: (word_bound_p \
(((match_pc != (buffer -> text_start)) \
&& (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc[-1])))), \
((match_pc != (buffer -> text_end)) \
&& (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc[0]))))))) \
goto re_match_loop; \
goto re_match_fail
case regexpcode_word_bound:
#define WORD_BOUND_P(left_p, right_p) ((left_p) != (right_p))
RE_MATCH_WORD_BOUND (WORD_BOUND_P);
#undef WORD_BOUND_P
case regexpcode_not_word_bound:
#define NOT_WORD_BOUND_P(left_p, right_p) ((left_p) == (right_p))
RE_MATCH_WORD_BOUND (NOT_WORD_BOUND_P);
#undef NOT_WORD_BOUND_P
case regexpcode_word_start:
#define WORD_START_P(left_p, right_p) ((! (left_p)) && (right_p))
RE_MATCH_WORD_BOUND (WORD_START_P);
#undef WORD_START_P
case regexpcode_word_end:
#define WORD_END_P(left_p, right_p) ((left_p) && (! (right_p)))
RE_MATCH_WORD_BOUND (WORD_END_P);
#undef WORD_END_P
#undef RE_MATCH_WORD_BOUND
case regexpcode_syntax_spec:
{
fast int ascii;
fast enum syntaxcode code;
READ_MATCH_CHAR (ascii);
READ_PATTERN_SYNTAXCODE (code);
if (SYNTAX_CONSTITUENT_P (code, ascii))
goto re_match_loop;
goto re_match_fail;
}
case regexpcode_not_syntax_spec:
{
fast int ascii;
fast enum syntaxcode code;
READ_MATCH_CHAR (ascii);
READ_PATTERN_SYNTAXCODE (code);
if (! (SYNTAX_CONSTITUENT_P (code, ascii)))
goto re_match_loop;
goto re_match_fail;
}
case regexpcode_word_char:
{
fast int ascii;
READ_MATCH_CHAR (ascii);
if (WORD_CONSTITUENT_P (ascii))
goto re_match_loop;
goto re_match_fail;
}
case regexpcode_not_word_char:
{
fast int ascii;
READ_MATCH_CHAR (ascii);
if (! (WORD_CONSTITUENT_P (ascii)))
goto re_match_loop;
goto re_match_fail;
}
/* "or" constructs ("|") are handled by starting each alternative
with an on_failure_jump that points to the start of the next
alternative. Each alternative except the last ends with a jump
to the joining point. (Actually, each jump except for the last
one really jumps to the following jump, because tensioning the
jumps is a hassle.)
The start of a stupid repeat has an on_failure_jump that points
past the end of the repeat text. This makes a failure point so
that, on failure to match a repetition, matching restarts past
as many repetitions have been found with no way to fail and
look for another one.
A smart repeat is similar but loops back to the on_failure_jump
so that each repetition makes another failure point. */
case regexpcode_on_failure_jump:
{
fast long offset;
READ_PATTERN_OFFSET (offset);
PUSH_FAILURE_POINT ((pattern_pc + offset), match_pc);
goto re_match_loop;
}
/* The end of a smart repeat has a maybe_finalize_jump back.
Change it either to a finalize_jump or an ordinary jump. */
case regexpcode_maybe_finalize_jump:
{
fast long offset;
fast long ascii;
READ_PATTERN_OFFSET (offset);
if (pattern_pc == pattern_end)
goto finalize_jump;
/* Compare what follows with the beginning of the repeat.
If we can establish that there is nothing that they
would both match, we can change to `finalize_jump'. */
SWITCH_ENUM (regexpcode, (pattern_pc [0]))
{
case regexpcode_exact_1:
ascii = (pattern_pc [1]);
break;
case regexpcode_exact_n:
ascii = (pattern_pc [2]);
break;
case regexpcode_line_end:
ascii = ('\n');
break;
default:
goto dont_finalize_jump;
}
/* (pattern_pc [(offset - 3)]) is an `on_failure_jump'.
Examine what follows that. */
SWITCH_ENUM (regexpcode, (pattern_pc [offset]))
{
case regexpcode_exact_1:
{
if (ascii != (pattern_pc [(offset + 1)]))
goto finalize_jump;
goto dont_finalize_jump;
}
case regexpcode_exact_n:
{
if (ascii != (pattern_pc [(offset + 2)]))
goto finalize_jump;
goto dont_finalize_jump;
}
case regexpcode_char_set:
{
if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]),
(& (pattern_pc [(offset + 2)])),
ascii))
goto dont_finalize_jump;
goto finalize_jump;
}
case regexpcode_not_char_set:
{
if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]),
(& (pattern_pc [(offset + 2)])),
ascii))
goto finalize_jump;
goto dont_finalize_jump;
}
default:
goto dont_finalize_jump;
}
finalize_jump:
pattern_pc -= 2;
(pattern_pc [-1]) = ((unsigned char) regexpcode_finalize_jump);
goto re_match_finalize_jump;
dont_finalize_jump:
pattern_pc -= 2;
(pattern_pc [-1]) = ((unsigned char) regexpcode_jump);
goto re_match_jump;
}
case regexpcode_finalize_jump:
re_match_finalize_jump:
{
stack_pointer -= 2;
goto re_match_jump;
}
case regexpcode_jump:
re_match_jump:
{
fast long offset;
READ_PATTERN_OFFSET (offset);
pattern_pc += offset;
goto re_match_loop;
}
case regexpcode_dummy_failure_jump:
{
PUSH_FAILURE_POINT (NULL, NULL);
goto re_match_jump;
}
default:
{
BAD_PATTERN ();
}
}
re_match_fail:
if (stack_pointer == stack_start)
RE_RETURN (RE_MATCH_FAILED);
match_pc = (*--stack_pointer);
pattern_pc = (*--stack_pointer);
if (pattern_pc != NULL)
goto re_match_loop;
goto re_match_fail;
return_point:
if (stack_start != NULL)
free (stack_start);
return (return_value);
}
#define DEFINE_RE_SEARCH(name) \
int \
DEFUN (name, \
(pattern_start, pattern_end, buffer, registers, \
match_start, match_end), \
unsigned char * pattern_start \
AND unsigned char * pattern_end \
AND struct re_buffer * buffer \
AND struct re_registers * registers \
AND unsigned char * match_start \
AND unsigned char * match_end)
#define INITIALIZE_RE_SEARCH(pc, limit, gap_limit) \
int can_be_null; \
unsigned char *translation; \
int match_result; \
\
fast unsigned char *match_pc; \
fast unsigned char *match_limit; \
fast unsigned char *gap_limit; \
fast unsigned char *fastmap; \
unsigned char fastmap_array[MAX_ASCII]; \
\
fastmap = &fastmap_array[0]; \
translation = (buffer -> translation); \
can_be_null = \
(re_compile_fastmap \
(pattern_start, pattern_end, translation, \
(buffer -> syntax_table), fastmap)); \
if (can_be_null < 0) \
return (can_be_null); \
\
match_pc = (pc); \
match_limit = (limit); \
gap_limit = (buffer -> gap_limit)
#define RE_SEARCH_TEST(start) \
(re_match \
(pattern_start, pattern_end, buffer, registers, (start), match_end))
#define RE_SEARCH_FORWARD_FAST(limit) do \
{ \
while (true) \
{ \
if (match_pc >= (limit)) \
break; \
\
if ((fastmap [(TRANSLATE_CHAR (*match_pc++))]) == FASTMAP_FALSE) \
continue; \
\
match_result = (RE_SEARCH_TEST (match_pc - 1)); \
if (RE_MATCH_FAILURE_RESULT (match_result)) \
continue; \
\
return (match_result); \
} \
} while (0)
DEFINE_RE_SEARCH (re_search_forward)
{
INITIALIZE_RE_SEARCH (match_start, match_end, gap_start);
if (can_be_null != 1)
{
if ((match_pc < gap_start) && (gap_start < match_limit))
RE_SEARCH_FORWARD_FAST (gap_start);
if (match_pc == gap_start)
match_pc = (buffer -> gap_end);
RE_SEARCH_FORWARD_FAST (match_limit);
return
((can_be_null == 0)
? RE_MATCH_FAILED
: (RE_SEARCH_TEST (match_limit)));
}
else
{
while (true)
{
match_result = (RE_SEARCH_TEST (match_pc));
if (! (RE_MATCH_FAILURE_RESULT (match_result)))
return (match_result);
match_pc += 1;
if (match_pc == gap_start)
match_pc = (buffer -> gap_end);
if (match_pc > match_limit)
return (match_result);
}
}
}
#define RE_SEARCH_BACKWARD_FAST(limit) do \
{ \
while (true) \
{ \
if (match_pc <= (limit)) \
break; \
\
if ((fastmap [(TRANSLATE_CHAR (*--match_pc))]) == FASTMAP_FALSE) \
continue; \
\
match_result = (RE_SEARCH_TEST (match_pc)); \
if (RE_MATCH_FAILURE_RESULT (match_result)) \
continue; \
\
RE_SEARCH_BACKWARD_RETURN (match_pc); \
} \
} while (0)
#define RE_SEARCH_BACKWARD_RETURN(start) \
return \
((match_result < 0) \
? match_result \
: ((((start) > (buffer -> gap_start)) \
? ((start) - (gap_end - (buffer -> gap_start))) \
: (start)) \
- (buffer -> text)))
DEFINE_RE_SEARCH (re_search_backward)
{
INITIALIZE_RE_SEARCH (match_end, match_start, gap_end);
if (can_be_null != 1)
{
if ((match_pc > gap_end) && (gap_end > match_limit))
RE_SEARCH_BACKWARD_FAST (gap_end);
if (match_pc == gap_end)
match_pc = (buffer -> gap_start);
RE_SEARCH_BACKWARD_FAST (match_limit);
if (can_be_null == 0)
return (RE_MATCH_FAILED);
match_result = (RE_SEARCH_TEST (match_limit));
RE_SEARCH_BACKWARD_RETURN (match_limit);
}
else
{
while (true)
{
match_result = (RE_SEARCH_TEST (match_pc));
if (! (RE_MATCH_FAILURE_RESULT (match_result)))
RE_SEARCH_BACKWARD_RETURN (match_pc);
if (match_pc == gap_end)
match_pc = (buffer -> gap_start);
match_pc -= 1;
if (match_pc < match_limit)
RE_SEARCH_BACKWARD_RETURN (match_pc);
}
}
}