home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
BURKS 2
/
BURKS_AUG97.ISO
/
BURKS
/
SOFTWARE
/
LIBS
/
NIHCL1.ZIP
/
NIHCL-3.0
/
LIB
/
REGEX.C
(
.txt
)
< prev
next >
Wrap
C/C++ Source or Header
|
1990-05-20
|
10KB
|
404 lines
/* Regex.c -- implementation of NIHCL class Regex
THIS SOFTWARE FITS THE DESCRIPTION IN THE U.S. COPYRIGHT ACT OF A
"UNITED STATES GOVERNMENT WORK". IT WAS WRITTEN AS A PART OF THE
AUTHOR'S OFFICIAL DUTIES AS A GOVERNMENT EMPLOYEE. THIS MEANS IT
CANNOT BE COPYRIGHTED. THIS SOFTWARE IS FREELY AVAILABLE TO THE
PUBLIC FOR USE WITHOUT A COPYRIGHT NOTICE, AND THERE ARE NO
RESTRICTIONS ON ITS USE, NOW OR SUBSEQUENTLY.
Author:
K. E. Gorlen
Bg. 12A, Rm. 2033
Computer Systems Laboratory
Division of Computer Research and Technology
National Institutes of Health
Bethesda, Maryland 20892
Phone: (301) 496-1111
uucp: uunet!nih-csl!kgorlen
Internet: kgorlen@alw.nih.gov
December, 1987
Function:
Regex is a class derived from String and containing a regular
expression and its compiled form. It implements functions that search
Strings for and match Strings with regular expressions using the
regular expression code from GNU Emacs Version 18.41 (regex.c).
Note that when a Regex is printed, only the String portion is printed,
not the compiled form.
$Log: Regex.c,v $
* Revision 3.0 90/05/20 00:21:01 kgorlen
* Release for 1st edition.
*
*/
#include "Regex.h"
#include "nihclIO.h"
#include <ctype.h>
#include <iomanip.h>
#include <libc.h>
#ifdef SYSV
#include <memory.h>
inline void bcopy(void* from, void* to, int sz)
{
memcpy(to,from,sz);
}
#endif
#define THIS Regex
#define BASE String
#define BASE_CLASSES BASE::desc()
#define MEMBER_CLASSES
#define VIRTUAL_BASE_CLASSES
DEFINE_CLASS(Regex,1,"$Header: /afs/alw.nih.gov/unix/sun4_40c/usr/local/src/nihcl-3.0/share/lib/RCS/Regex.c,v 3.0 90/05/20 00:21:01 kgorlen Rel $",NULL,NULL);
const unsigned Regex::DEFAULT_BUFSIZE = 64;
extern const int NIHCL_BADREGEX;
/*
extern const char* re_compile_pattern(
const char*, // the address of the pattern string
int size, // the length of the pattern string
struct re_pattern_buffer* bufp);
re_compile_pattern takes a regular-expression descriptor string in the
user's format and converts it into a buffer full of byte commands for
matching.
pattern is the address of the pattern string
size is the length of it.
bufp is a struct re_pattern_buffer * which points to the info
on where to store the byte commands.
This structure contains a char * which points to the
actual space, which should have been obtained with malloc().
compile_pattern may use realloc() to grow the buffer space.
The number of bytes of commands can be found out by looking in the
struct re_pattern_buffer that bufp pointed to, after compile_pattern
returns.
*/
/*
extern int re_match(struct re_pattern_buffer*, const char*, int size,
int pos, struct re_registers*);
extern int re_match_2(
struct re_pattern_buffer* pbufp,
const char* string1, int size1,
const char* string2, int size2,
int pos,
struct re_registers*,
int mstop);
Match the pattern described by `pbufp' against data which is the
virtual concatenation of `string1' and `string2'. `size1' and `size2'
are the sizes of the two data strings. Start the match at position
`pos'. Do not consider matching past the position `mstop'.
If pbufp->fastmap is nonzero, then it had better be up to date.
The reason that the data to match is specified as two components which
are to be regarded as concatenated is so that this function can be
used directly on the contents of an Emacs buffer.
-1 is returned if there is no match. Otherwise the value is the
length of the substring which was matched.
re_match just calls re_match_2 with size1=0 and mstop=size.
*/
/*
extern int re_search(struct re_pattern_buffer*, const char*, int size,
int startpos, int range, struct re_registers*);
extern int re_search_2(
struct re_pattern_buffer*,
const char*, int size1,
const char*, int size2,
int startpos,
int range,
struct re_registers*,
int mstop);
Like re_match_2 but tries first a match starting at index `startpos',
then at startpos + 1, and so on. `range' is the number of places to
try before giving up. If `range' is negative, the starting positions
tried are startpos, startpos - 1, etc. It is up to the caller to make
sure that range is not so large as to take the starting position
outside of the input strings.
The value returned is the position at which the match was found, or -1
if no match was found.
re_search just calls re_search_2 with size1=0 and mstop=size.
*/
const unsigned BYTEWIDTH = 8; // width of a byte in bits
void Regex::re_compile_pattern()
{
const char* error = ::re_compile_pattern(*this,length(),&pattern);
if (error) errRegex(error);
}
int Regex::re_match(const String& str, int pos)
{
int len = ::re_match(&pattern, str, str.length(), pos, ®s);
setGroups(len);
return len;
}
void Regex::init(int bufsize)
{
pattern.buffer = (char*)malloc(bufsize);
pattern.allocated = bufsize;
pattern.used = 0;
pattern.fastmap = 0;
pattern.translate = 0;
ngroups = 0;
for (int i=0; i<RE_NREGS; i++) {
regs.start[i] = regs.end[i] = -1;
}
}
void Regex::fixCopy()
// copy heap storage in a struct re_pattern_buffer
{
register char* oldp = pattern.buffer;
pattern.buffer = (char*)malloc(pattern.allocated);
bcopy(oldp,pattern.buffer,pattern.used);
if (pattern.fastmap) {
oldp = pattern.fastmap;
pattern.fastmap = (char*)malloc(1<<BYTEWIDTH);
bcopy(oldp,pattern.fastmap,1<<BYTEWIDTH);
}
}
void Regex::setGroups(int result)
// set number of groups matched by last match/search
{
ngroups = 0;
if (result == -1) return;
for (register i=0; i<RE_NREGS; i++) {
if (regs.start[i] == -1) return;
ngroups++;
}
}
void Regex::errRegex(const char* s) const
{
const char* re = *this;
setError(NIHCL_BADREGEX,DEFAULT,s,this,re);
}
Regex::Regex(unsigned bufsize)
{
init(bufsize);
}
Regex::Regex(const char* rexp, unsigned bufsize) : BASE(rexp)
{
init(bufsize);
re_compile_pattern();
}
Regex::Regex(const Regex& rexp) : BASE(rexp)
{
// A copy is more efficient than re_compile_pattern()
pattern = rexp.pattern;
regs = rexp.regs;
fixCopy();
}
Regex::~Regex()
{
free(pattern.buffer);
if (pattern.fastmap) free(pattern.fastmap);
}
Range Regex::operator[](unsigned i) const
{
if (i >= ngroups) return Range(0,-1); // should raise exception
return Range(regs.start[i],regs.end[i]-regs.start[i]);
}
void Regex::operator=(const char* cs)
{
*(String*)this=cs;
re_compile_pattern();
}
void Regex::operator=(const Regex& rexp)
{
if (this == &rexp) return; // watch out for x = x
*(String*)this=rexp; // copy the String part
free(pattern.buffer);
pattern = rexp.pattern;
regs = rexp.regs;
fixCopy();
}
void Regex::operator=(const String& str)
{
*(String*)this = str;
re_compile_pattern();
}
void Regex::operator=(const SubString& substr)
{
*(String*)this = substr;
re_compile_pattern();
}
void Regex::deepenShallowCopy()
// Called by deepCopy() to convert a shallow copy to a deep copy.
{
BASE::deepenShallowCopy();
fixCopy();
}
void Regex::scanFrom(istream& strm)
{
String::scanFrom(strm);
re_compile_pattern();
}
void Regex::toAscii()
{
String::toAscii();
re_compile_pattern();
}
void Regex::toLower()
{
String::toLower();
re_compile_pattern();
}
void Regex::toUpper()
{
String::toUpper();
re_compile_pattern();
}
void Regex::storer(OIOout& strm) const
{
BASE::storer(strm);
strm << pattern.allocated << ngroups;
for (int i=0; i<ngroups; i++) {
strm << regs.start[i] << regs.end[i];
}
}
Regex::Regex(OIOin& strm)
:
#ifdef MI
Object(strm),
#endif
BASE(strm)
{
int bufsize;
strm >> bufsize;
init(bufsize);
re_compile_pattern();
strm >> ngroups;
for (int i=0; i<ngroups; i++) {
strm >> regs.start[i] >> regs.end[i];
}
}
void Regex::storer(OIOofd& fd) const
{
BASE::storer(fd);
fd << pattern.allocated;
fd << ngroups;
for (int i=0; i<ngroups; i++) {
fd << regs.start[i];
fd << regs.end[i];
}
}
Regex::Regex(OIOifd& fd)
:
#ifdef MI
Object(fd),
#endif
BASE(fd)
{
int bufsize;
fd >> bufsize;
init(bufsize);
re_compile_pattern();
fd >> ngroups;
for (int i=0; i<ngroups; i++) {
fd >> regs.start[i];
fd >> regs.end[i];
}
}
bool Regex::match(const String& s, int pos)
// Check for match of Regex at index pos of String s.
{
return re_match(s,pos) !=