home *** CD-ROM | disk | FTP | other *** search
- /*
- Copyright (C) 1988 Free Software Foundation
- written by Doug Lea (dl@rocky.oswego.edu)
-
- This file is part of GNU CC.
-
- GNU CC is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY. No author or distributor
- accepts responsibility to anyone for the consequences of using it
- or for whether it serves any particular purpose or works at all,
- unless he says so in writing. Refer to the GNU CC General Public
- License for full details.
-
- Everyone is granted permission to copy, modify and redistribute
- GNU CC, but only under the conditions described in the
- GNU CC General Public License. A copy of this license is
- supposed to have been given to you along with GNU CC so you
- can know your rights and responsibilities. It should be in a
- file named COPYING. Among other things, the copyright notice
- and this notice must be preserved on all copies.
- */
-
- /*
- String class implementation
- */
-
- #include <String.h>
- #include <std.h>
- #include <ctype.h>
- #include "libconfig.h"
-
- extern "C" {
- #include <regex.h>
- }
-
- void String::error(char* msg)
- {
- (*lib_error_handler)("String", msg);
- }
-
- // globals
-
- StrRep _nilStrRep = { 0, 1, { 0 } }; // nil strings point here
- String _nilString; // nil SubStrings point here
-
-
-
-
- /*
- the following inline fcts are specially designed to work
- in support of String classes, and are not meant as generic replacements
- for libc "str" functions.
-
- inline copy fcts - I like left-to-right from->to arguments.
- all versions assume that `to' argument is non-null
- */
-
- // copy n bytes
- inline static void ncopy(const char* from, char* to, int n)
- {
- if (n > 0 && from != to) bcopy((void*)from, (void*) to, n);
- }
-
- // copy n bytes, null-terminate
- inline static void ncopy0(const char* from, char* to, int n)
- {
- if (n > 0 && from != to) bcopy((void*)from, (void*) to, n);
- to[n] = 0;
- }
-
- // copy until null
- inline static void scopy(const char* from, char* to)
- {
- if (from != 0) while((*to++ = *from++) != 0);
- }
-
- // copy right-to-left
- inline static void revcopy(const char* from, char* to, short n)
- {
- if (from != 0) while (n-- > 0) *to-- = *from--;
- }
-
-
- inline static int slen(const char* t) // inline strlen
- {
- if (t == 0)
- return 0;
- else
- {
- const char* a = t;
- while (*a++ != 0);
- return a - 1 - t;
- }
- }
-
- // minimum and maximum possible rep sizes
- // these are always allocated in blocks of
- // a power of 2 minus MALLOC_OVERHEAD, which
- // is the least wasteful & fastest size for standard versions of malloc
-
- #define MAXStrRep_SIZE (1 << (SHORTBITS - 1) - 1)
- #define MINStrRep_SIZE 16
- #define MALLOC_OVERHEAD 4
-
- inline static StrRep* Snew(int newsiz)
- {
- unsigned int siz = sizeof(StrRep) + newsiz + MALLOC_OVERHEAD;
- unsigned int allocsiz = MINStrRep_SIZE;
- while (allocsiz < siz) allocsiz <<= 1;
- allocsiz -= MALLOC_OVERHEAD;
- if (allocsiz >= MAXStrRep_SIZE)
- (*lib_error_handler)("String", "Requested length out of range");
-
- StrRep* rep = (StrRep *) new char[allocsiz];
- rep->sz = allocsiz - sizeof(StrRep);
- return rep;
- }
-
- StrRep* Salloc(StrRep* old, const char* src, int srclen, int newlen)
- {
- if (old == &_nilStrRep) old = 0;
- if (srclen < 0) srclen = slen(src);
- if (newlen < srclen) newlen = srclen;
- StrRep* rep;
- if (old == 0 || newlen > old->sz)
- rep = Snew(newlen);
- else
- rep = old;
-
- rep->len = newlen;
-
- ncopy0(src, rep->s, srclen);
-
- if (old != rep && old != 0) delete old;
-
- return rep;
- }
-
- StrRep* Sresize(StrRep* old, int newlen)
- {
- if (old == &_nilStrRep) old = 0;
- StrRep* rep;
- if (old == 0)
- rep = Snew(newlen);
- else if (newlen > old->sz)
- {
- rep = Snew(newlen);
- bcopy(old->s, rep->s, old->len);
- delete old;
- }
- else
- rep = old;
-
- rep->len = newlen;
-
- return rep;
- }
-
- StrRep* Scopy(StrRep* old, StrRep* s)
- {
- if (old == &_nilStrRep) old = 0;
- if (s == &_nilStrRep) s = 0;
- if (old == s)
- return (old == 0)? &_nilStrRep : old;
- else if (s == 0)
- {
- old->s[0] = 0;
- old->len = 0;
- return old;
- }
- else
- {
- StrRep* rep;
- int newlen = s->len;
- if (old == 0 || newlen > old->sz)
- {
- rep = Snew(newlen);
- if (old != 0) delete old;
- }
- else
- rep = old;
- rep->len = newlen;
- ncopy0(s->s, rep->s, newlen);
- return rep;
- }
- }
-
- StrRep* Scat(StrRep* old, const char* s, int srclen, const char* t, int tlen)
- {
- if (old == &_nilStrRep) old = 0;
- if (srclen < 0) srclen = slen(s);
- if (tlen < 0) tlen = slen(t);
- int newlen = srclen + tlen;
- StrRep* rep;
- if (old == 0 || newlen > old->sz)
- rep = Snew(newlen);
- else
- rep = old;
-
- rep->len = newlen;
-
- ncopy(s, rep->s, srclen);
- ncopy0(t, &(rep->s[srclen]), tlen);
-
- if (old != rep && old != 0) delete old;
-
- return rep;
- }
-
- StrRep* Sprepend(StrRep* old, const char* t, int tlen)
- {
- char* s;
- int srclen;
- if (old == &_nilStrRep || old == 0)
- {
- s = 0; old = 0; srclen = 0;
- }
- else
- {
- s = old->s; srclen = old->len;
- }
- if (tlen < 0) tlen = slen(t);
- int newlen = srclen + tlen;
- StrRep* rep;
- if (old == 0 || newlen > old->sz || (t >= old->s && t <= old->s+old->len))
- rep = Snew(newlen);
- else
- rep = old;
-
- rep->len = newlen;
-
- revcopy(&(s[srclen]), &(rep->s[newlen]), srclen+1);
- ncopy(t, rep->s, tlen);
-
- if (old != rep && old != 0) delete old;
-
- return rep;
- }
-
-
- // string compare: first argument is known to be non-null
-
- inline static int scmp(const char* a, const char* b)
- {
- if (b == 0)
- return *a != 0;
- else
- {
- signed char diff = 0;
- while ((diff = *a - *b++) == 0 && *a++ != 0);
- return diff;
- }
- }
-
- inline static int ncmp(const char* a, int al, const char* b, int bl)
- {
- int n = al <? bl;
- signed char diff;
- while (n-- > 0) if ((diff = *a++ - *b++) != 0) return diff;
- return al - bl;
- }
-
- int fcompare(String& x, String& y)
- {
- const char* a = x.rep->s;
- const char* b = y.rep->s;
- int al = x.rep->len;
- int bl = y.rep->len;
- int n = al <? bl;
- signed char diff = 0;
- while (n-- > 0)
- {
- char ac = *a++;
- char bc = *b++;
- if ((diff = ac - bc) != 0)
- {
- if (ac >= 'a' && ac <= 'z')
- ac = ac - 'a' + 'A';
- if (bc >= 'a' && bc <= 'z')
- bc = bc - 'a' + 'A';
- if ((diff = ac - bc) != 0)
- return diff;
- }
- }
- return al - bl;
- }
-
- // these are not inline, but pull in the above inlines, so are
- // pretty fast
-
- int compare(String& x, const char* b)
- {
- return scmp(x.rep->s, b);
- }
-
- int compare(String& x, String& y)
- {
- return scmp(x.rep->s, y.rep->s);
- }
-
- int compare(String& x, SubString& y)
- {
- return ncmp(x.rep->s, x.rep->len, &(y.S->rep->s[y.pos]), y.len);
- }
-
- int compare(SubString& x, String& y)
- {
- return ncmp(&(x.S->rep->s[x.pos]), x.len, y.rep->s, y.rep->len);
- }
-
- int compare(SubString& x, SubString& y)
- {
- return ncmp(&(x.S->rep->s[x.pos]), x.len, &(y.S->rep->s[y.pos]), y.len);
- }
-
- int compare(SubString& x, const char* b)
- {
- if (b == 0)
- return x.len;
- else
- {
- const char* a = &(x.S->rep->s[x.pos]);
- int n = x.len;
- signed char diff;
- while (n-- > 0) if ((diff = *a++ - *b++) != 0) return diff;
- return (*b == 0) ? 0 : -1;
- }
- }
-
- /*
- index fcts
- */
-
- int String::search(int start, int sl, char c)
- {
- const char* s = rep->s;
- if (sl > 0)
- {
- if (start >= 0)
- {
- const char* a = &(s[start]);
- const char* lasta = &(s[sl]);
- while (a < lasta) if (*a++ == c) return --a - s;
- }
- else
- {
- const char* a = &(s[sl + start + 1]);
- while (--a >= s) if (*a == c) return a - s;
- }
- }
- return -1;
- }
-
- int String::search(int start, int sl, const char* t, int tl = -1)
- {
- const char* s = rep->s;
- if (tl < 0) tl = slen(t);
- if (sl > 0 && tl > 0)
- {
- if (start >= 0)
- {
- const char* lasts = &(s[sl - tl]);
- const char* lastt = &(t[tl]);
- const char* p = &(s[start]);
-
- while (p <= lasts)
- {
- const char* x = p++;
- const char* y = t;
- while (*x++ == *y++) if (y >= lastt) return --p - s;
- }
- }
- else
- {
- const char* firsts = &(s[tl - 1]);
- const char* lastt = &(t[tl - 1]);
- const char* p = &(s[sl + start + 1]);
-
- while (--p >= firsts)
- {
- const char* x = p;
- const char* y = lastt;
- while (*x-- == *y--) if (y < t) return ++x - s;
- }
- }
- }
- return -1;
- }
-
-
- int String::match(int start, int sl, int exact, const char* t, int tl = -1)
- {
- if (tl < 0) tl = slen(t);
-
- if (start < 0)
- {
- start = sl + start - tl + 1;
- if (start < 0 || (exact && start != 0))
- return 0;
- }
- else if (exact && sl - start != tl)
- return 0;
-
- if (sl == 0 || tl == 0 || sl - start < tl || start >= sl)
- return 0;
-
- int n = tl;
- const char* s = &(rep->s[start]);
- while (n-- > 0) if (*s++ != *t++) return 0;
- return tl;
- }
-
-
-
- void SubString::assign(StrRep* ysrc, const char* ys, int ylen=-1)
- {
- if (S == &_nilString) return;
-
- if (ylen < 0) ylen = slen(ys);
- StrRep* targ = S->rep;
- int sl = targ->len - len + ylen;
-
- if (ysrc == targ || sl >= targ->sz)
- {
- StrRep* oldtarg = targ;
- targ = Sresize(0, sl);
- ncopy(oldtarg->s, targ->s, pos);
- ncopy(ys, &(targ->s[pos]), ylen);
- scopy(&(oldtarg->s[pos + len]), &(targ->s[pos + ylen]));
- delete oldtarg;
- }
- else if (len == ylen)
- ncopy(ys, &(targ->s[pos]), len);
- else if (ylen < len)
- {
- ncopy(ys, &(targ->s[pos]), ylen);
- scopy(&(targ->s[pos + len]), &(targ->s[pos + ylen]));
- }
- else
- {
- revcopy(&(targ->s[targ->len]), &(targ->s[sl]), targ->len-pos-len +1);
- ncopy(ys, &(targ->s[pos]), ylen);
- }
- targ->len = sl;
- S->rep = targ;
- }
-
- // Regex stuff
-
- Regex::~Regex()
- {
- delete(buf->buffer);
- delete(buf->fastmap);
- delete(buf);
- delete(reg);
- }
-
- void Regex::initialize(const char* t, int tlen, int fast, int bufsize,
- const char* transtable)
- {
- if (tlen < 0) tlen = slen(t);
- buf = new re_pattern_buffer;
- reg = new re_registers;
- if (fast)
- buf->fastmap = new char[256];
- else
- buf->fastmap = 0;
- buf->translate = (char*)transtable;
- if (tlen > bufsize)
- bufsize = tlen;
- buf->allocated = bufsize;
- buf->buffer = new char [buf->allocated];
- char* msg = re_compile_pattern((char*)t, tlen, buf);
- if (msg != 0)
- (*lib_error_handler)("Regex", msg);
- else if (fast)
- re_compile_fastmap(buf);
- }
-
- int Regex::match_info(int& start, int& length, int nth = 0)
- {
- if ((unsigned)(nth) >= RE_NREGS)
- return 0;
- else
- {
- start = reg->start[nth];
- length = reg->end[nth] - start;
- return start >= 0 && length >= 0;
- }
- }
-
- int Regex::search(const char* s, int len, int& matchlen, int startpos = 0)
- {
- int matchpos, pos, range;
- if (startpos >= 0)
- {
- pos = startpos;
- range = len - startpos;
- }
- else
- {
- pos = len + startpos;
- range = -pos;
- }
- matchpos = re_search_2(buf, 0, 0, (char*)s, len, pos, range, reg, len);
- if (matchpos >= 0)
- matchlen = reg->end[0] - reg->start[0];
- else
- matchlen = 0;
- return matchpos;
- }
-
- int Regex::match(const char*s, int len, int p = 0)
- {
- if (p < 0)
- {
- p += len;
- if (p >= len)
- return 0;
- return re_match_2(buf, 0, 0, (unsigned char*)s, p, 0, reg, p);
- }
- else if (p >= len)
- return 0;
- else
- return re_match_2(buf, 0, 0, (unsigned char*)s, len, p, reg, len);
- }
-
-
- /*
- * substitution
- */
-
-
- int String::_gsub(const char* pat, int pl, const char* r, int rl)
- {
- int nmatches = 0;
- if (pl < 0) pl = slen(pat);
- if (rl < 0) rl = slen(r);
- int sl = rep->len;
- if (sl <= 0 || pl <= 0 || sl < pl)
- return nmatches;
-
- const char* s = rep->s;
-
- StrRep* nrep = Sresize(0, 2 * sl); // guess size
- char* x = nrep->s;
-
- int si = 0;
- int xi = 0;
- int remaining = sl;
-
- while (remaining >= pl)
- {
- int pos = search(si, sl, pat, pl);
- if (pos < 0)
- break;
- else
- {
- ++nmatches;
- int mustfit = xi + remaining + rl - pl;
- if (mustfit >= nrep->sz)
- {
- nrep = Sresize(nrep, mustfit);
- x = nrep->s;
- }
- pos -= si;
- ncopy(&(s[si]), &(x[xi]), pos);
- ncopy(r, &(x[xi + pos]), rl);
- si += pos + pl;
- remaining -= pos + pl;
- xi += pos + rl;
- }
- }
-
- ncopy0(&(s[si]), &(x[xi]), remaining);
- nrep->len = xi + remaining;
-
- if (nrep->len <= rep->sz) // fit back in if possible
- {
- rep->len = nrep->len;
- ncopy0(nrep->s, rep->s, rep->len);
- delete(nrep);
- }
- else
- {
- delete(rep);
- rep = nrep;
- }
- return nmatches;
- }
-
- int String::_gsub(Regex& pat, const char* r, int rl)
- {
- int nmatches = 0;
- int sl = rep->len;
- if (sl <= 0)
- return nmatches;
-
- if (rl < 0) rl = slen(r);
-
- const char* s = rep->s;
-
- StrRep* nrep = Sresize(0, 2 * sl); // guess size
- char* x = nrep->s;
-
- int si = 0;
- int xi = 0;
- int remaining = sl;
- int pos, pl = 0; // how long is a regular expression?
-
- while (remaining > 0)
- {
- pos = pat.search(s, sl, pl, si); // unlike string search, the pos returned here is absolute
- if (pos < 0 || pl <= 0)
- break;
- else
- {
- ++nmatches;
- int mustfit = xi + remaining + rl - pl;
- if (mustfit >= nrep->sz)
- {
- nrep = Sresize(nrep, mustfit);
- x = nrep->s;
- }
- pos -= si;
- ncopy(&(s[si]), &(x[xi]), pos);
- ncopy(r, &(x[xi + pos]), rl);
- si += pos + pl;
- remaining -= pos + pl;
- xi += pos + rl;
- }
- }
-
- ncopy0(&(s[si]), &(x[xi]), remaining);
- nrep->len = xi + remaining;
-
- if (nrep->len <= rep->sz) // fit back in if possible
- {
- rep->len = nrep->len;
- ncopy0(nrep->s, rep->s, rep->len);
- delete(nrep);
- }
- else
- {
- delete(rep);
- rep = nrep;
- }
- return nmatches;
- }
-
-
- /*
- * deletion
- */
-
- void String::del(int pos, int len)
- {
- if (pos <= 0 || len <= 0 || pos + len > rep->len) return;
- int nlen = rep->len - len;
- int first = pos + len;
- ncopy0(&(rep->s[first]), &(rep->s[pos]), rep->len - first);
- rep->len = nlen;
- }
-
- void String::del(Regex& r, int startpos = 0)
- {
- int mlen;
- int first = r.search(rep->s, rep->len, mlen, startpos);
- del(first, mlen);
- }
-
- void String::del(const char* t, int startpos = 0)
- {
- int tlen = slen(t);
- int p = search(startpos, rep->len, t, tlen);
- del(p, tlen);
- }
-
- /*
- * substring extraction
- */
-
-
- SubString String::at(String& y, int startpos = 0)
- {
- int first = search(startpos, rep->len, y.rep->s, y.rep->len);
- return SubString(this, first, y.rep->len);
- }
-
- SubString String::at(SubString& y, int startpos = 0)
- {
- int first = search(startpos, rep->len, &(y.S->rep->s[y.pos]), y.len);
- return SubString(this, first, y.len);
- }
-
- SubString String::at(Regex& r, int startpos = 0)
- {
- int mlen;
- int first = r.search(rep->s, rep->len, mlen, startpos);
- return SubString(this, first, mlen);
- }
-
- SubString String::at(const char* t, int startpos = 0)
- {
- int tlen = slen(t);
- int first = search(startpos, rep->len, t, tlen);
- return SubString(this, first, tlen);
- }
-
- SubString String::at(char c, int startpos = 0)
- {
- int first = search(startpos, rep->len, c);
- return SubString(this, first, 1);
- }
-
-
- SubString String::before(String& y, int startpos = 0)
- {
- int last = search(startpos, rep->len, y.rep->s, y.rep->len);
- return SubString(this, 0, last);
- }
-
-
- SubString String::before(SubString& y, int startpos = 0)
- {
- int last = search(startpos, rep->len, &(y.S->rep->s[y.pos]), y.len);
- return SubString(this, 0, last);
- }
-
- SubString String::before(Regex& r, int startpos = 0)
- {
- int mlen;
- int first = r.search(rep->s, rep->len, mlen, startpos);
- return SubString(this, 0, first);
- }
-
- SubString String::before(char c, int startpos = 0)
- {
- int last = search(startpos, rep->len, c);
- return SubString(this, 0, last);
- }
-
- SubString String::before(const char* t, int startpos = 0)
- {
- int tlen = slen(t);
- int last = search(startpos, rep->len, t, tlen);
- return SubString(this, 0, last);
- }
-
-
- SubString String::through(String& y, int startpos = 0)
- {
- int last = search(startpos, rep->len, y.rep->s, y.rep->len);
- if (last >= 0) last += y.rep->len;
- return SubString(this, 0, last);
- }
-
-
- SubString String::through(SubString& y, int startpos = 0)
- {
- int last = search(startpos, rep->len, &(y.S->rep->s[y.pos]), y.len);
- if (last >= 0) last += y.len;
- return SubString(this, 0, last);
- }
-
-
- SubString String::through(Regex& r, int startpos = 0)
- {
- int mlen;
- int first = r.search(rep->s, rep->len, mlen, startpos);
- if (first >= 0) first += mlen;
- return SubString(this, 0, first);
- }
-
- SubString String::through(char c, int startpos = 0)
- {
- int last = search(startpos, rep->len, c);
- if (last >= 0) last += 1;
- return SubString(this, 0, last);
- }
-
- SubString String::through(const char* t, int startpos = 0)
- {
- int tlen = slen(t);
- int last = search(startpos, rep->len, t, tlen);
- if (last >= 0) last += tlen;
- return SubString(this, 0, last);
- }
-
-
- SubString String::after(String& y, int startpos = 0)
- {
- int first = search(startpos, rep->len, y.rep->s, y.rep->len);
- if (first >= 0) first += y.rep->len;
- return SubString(this, first, rep->len - first);
- }
-
- SubString String::after(SubString& y, int startpos = 0)
- {
- int first = search(startpos, rep->len, &(y.S->rep->s[y.pos]), y.len);
- if (first >= 0) first += y.len;
- return SubString(this, first, rep->len - first);
- }
-
- SubString String::after(char c, int startpos = 0)
- {
- int first = search(startpos, rep->len, c);
- if (first >= 0) first += 1;
- return SubString(this, first, rep->len - first);
- }
-
- SubString String::after(Regex& r, int startpos = 0)
- {
- int mlen;
- int first = r.search(rep->s, rep->len, mlen, startpos);
- if (first >= 0) first += mlen;
- return SubString(this, first, rep->len - first);
- }
-
- SubString String::after(const char* t, int startpos = 0)
- {
- int tlen = slen(t);
- int first = search(startpos, rep->len, t, tlen);
- if (first >= 0) first += tlen;
- return SubString(this, first, rep->len - first);
- }
-
- SubString String::from(String& y, int startpos = 0)
- {
- int first = search(startpos, rep->len, y.rep->s, y.rep->len);
- return SubString(this, first, rep->len - first);
- }
-
-
- SubString String::from(SubString& y, int startpos = 0)
- {
- int first = search(startpos, rep->len, &(y.S->rep->s[y.pos]), y.len);
- return SubString(this, first, rep->len - first);
- }
-
- SubString String::from(Regex& r, int startpos = 0)
- {
- int mlen;
- int first = r.search(rep->s, rep->len, mlen, startpos);
- return SubString(this, first, rep->len - first);
- }
-
- SubString String::from(char c, int startpos = 0)
- {
- int first = search(startpos, rep->len, c);
- return SubString(this, first, rep->len - first);
- }
-
- SubString String::from(const char* t, int startpos = 0)
- {
- int tlen = slen(t);
- int first = search(startpos, rep->len, t, tlen);
- return SubString(this, first, rep->len - first);
- }
-
- /*
- * split/join
- */
-
-
- int split(String& src, String results[], int n, String& sep)
- {
- String x = src;
- const char* s = x.rep->s;
- int sl = x.rep->len;
- int i = 0;
- int pos = 0;
- while (i < n && pos < sl)
- {
- int p = x.search(pos, sl, sep.rep->s, sep.rep->len);
- if (p < 0)
- p = sl;
- results[i].rep = Salloc(results[i].rep, &(s[pos]), p - pos, p - pos);
- i++;
- pos = p + sep.rep->len;
- }
- return(i);
- }
-
- int split(String& src, String results[], int n, Regex& r)
- {
- String x = src;
- char* s = x.rep->s;
- int sl = x.rep->len;
- int i = 0;
- int pos = 0;
- int p, matchlen;
- while (i < n && pos < sl)
- {
- p = r.search(s, sl, matchlen, pos);
- if (p < 0)
- p = sl;
- results[i].rep = Salloc(results[i].rep, &(s[pos]), p - pos, p - pos);
- i++;
- pos = p + matchlen;
- }
- return(i);
- }
-
-
- StrTmp join(String src[], int n, String& separator)
- {
- String sep = separator;
- int xlen = 0;
- for (int i = 0; i < n; ++i)
- xlen += src[i].rep->len;
- xlen += (n - 1) * sep.rep->len;
-
- StrRep* x = Sresize(0, xlen);
-
- int j = 0;
-
- for (i = 0; i < n - 1; ++i)
- {
- ncopy(src[i].rep->s, &(x->s[j]), src[i].rep->len);
- j += src[i].rep->len;
- ncopy(sep.rep->s, &(x->s[j]), sep.rep->len);
- j += sep.rep->len;
- }
- ncopy0(src[i].rep->s, &(x->s[j]), src[i].rep->len);
- return StrTmp(x);
- }
-
-
- /*
- misc
- */
-
-
- StrRep* Sreverse(StrRep* src, StrRep* dest)
- {
- int n = src->len;
- if (src != dest)
- dest = Salloc(dest, src->s, n, n);
- if (n > 0)
- {
- char* a = dest->s;
- char* b = &(a[n - 1]);
- while (a < b)
- {
- char t = *a;
- *a++ = *b;
- *b-- = t;
- }
- }
- return dest;
- }
-
-
- StrRep* Supcase(StrRep* src, StrRep* dest)
- {
- int n = src->len;
- if (src != dest) dest = Salloc(dest, src->s, n, n);
- char* p = dest->s;
- char* e = &(p[n]);
- for (; p < e; ++p) if (islower(*p)) *p = toupper(*p);
- return dest;
- }
-
- StrRep* Sdowncase(StrRep* src, StrRep* dest)
- {
- int n = src->len;
- if (src != dest) dest = Salloc(dest, src->s, n, n);
- char* p = dest->s;
- char* e = &(p[n]);
- for (; p < e; ++p) if (isupper(*p)) *p = tolower(*p);
- return dest;
- }
-
- StrRep* Scapitalize(StrRep* src, StrRep* dest)
- {
- int n = src->len;
- if (src != dest) dest = Salloc(dest, src->s, n, n);
-
- char* p = dest->s;
- char* e = &(p[n]);
- for (; p < e; ++p)
- {
- int at_word;
- if (at_word = islower(*p))
- *p = toupper(*p);
- else
- at_word = isupper(*p) || isdigit(*p);
-
- if (at_word)
- {
- while (++p < e)
- {
- if (isupper(*p))
- *p = tolower(*p);
- else if (!islower(*p) && !isdigit(*p))
- break;
- }
- }
- }
- return dest;
- }
-
- StrTmp replicate(char c, int n)
- {
- StrRep* w = Sresize(0, n);
- char* p = w->s;
- while (n-- > 0) *p++ = c;
- *p = 0;
- return (w);
- }
-
- StrTmp replicate(String& y, int n)
- {
- int len = y.rep->len;
- StrRep* w = Sresize(0, n * len);
- char* p = w->s;
- while (n-- > 0)
- {
- ncopy(y.rep->s, p, len);
- p += len;
- }
- *p = 0;
- return (w);
- }
-
- StrTmp common_prefix(String& x, String& y, int startpos = 0)
- {
- const char* xs = &(x.rep->s[startpos]);
- const char* ss = xs;
- const char* topx = &(x.rep->s[x.rep->len]);
- const char* ys = &(y.rep->s[startpos]);
- const char* topy = &(y.rep->s[y.rep->len]);
- for (int l = 0; xs < topx && ys < topy && *xs++ == *ys++; ++l);
- return StrTmp(Salloc(0, ss, l, l));
- }
-
-
- StrTmp common_suffix(String& x, String& y, int startpos = -1)
- {
- const char* xs = &(x.rep->s[x.rep->len + startpos]);
- const char* botx = x.rep->s;
- const char* ys = &(y.rep->s[y.rep->len + startpos]);
- const char* boty = y.rep->s;
- for (int l = 0; xs >= botx && ys >= boty && *xs == *ys ; --xs, --ys, ++l);
- return StrTmp(Salloc(0, ++xs, l, l));
- }
-
- // IO
-
- istream& operator>>(istream& s, String& x)
- {
- char ch;
- int i = 0;
- x.rep = Sresize(x.rep, 20);
- s >> WS;
- while (s.good())
- {
- s.get(ch);
- if (isspace(ch))
- break;
- if (i >= x.rep->sz - 1)
- x.rep = Sresize(x.rep, i+1);
- x.rep->s[i++] = ch;
- }
- x.rep->s[i] = 0;
- x.rep->len = i;
- s.failif(i == 0);
- return s;
- }
-
- int readline(istream& s, String& x, char terminator = '\n', int discard = 1)
- {
- char ch;
- int i = 0;
- x.rep = Sresize(x.rep, 80);
- while (s.good())
- {
- s.get(ch);
- if (ch != terminator || !discard)
- {
- if (i >= x.rep->sz - 1)
- x.rep = Sresize(x.rep, i+1);
- x.rep->s[i++] = ch;
- }
- if (ch == terminator)
- break;
- }
- x.rep->s[i] = 0;
- x.rep->len = i;
- return i;
- }
-
-
- ostream& operator<<(ostream& s, SubString& x)
- {
- const char* a = &(x.S->rep->s[x.pos]);
- const char* lasta = &(a[x.len]);
- while (a < lasta)
- s.put(*a++);
- return(s);
- }
-
- // from John.Willis@FAS.RI.CMU.EDU
-
- int String::freq(SubString& y)
- {
- int found = 0;
- for (int i = 0; i < rep->len; i++)
- if (match(i,rep->len,0,&(y.S->rep->s[y.pos]), y.len)) found++;
- return(found);
- }
-
- int String::freq(String& y)
- {
- int found = 0;
- for (int i = 0; i < rep->len; i++)
- if (match(i,rep->len,0,y.rep->s,y.rep->len)) found++;
- return(found);
- }
-
- int String::freq(const char* t)
- {
- int found = 0;
- for (int i = 0; i < rep->len; i++) if (match(i,rep->len,0,t)) found++;
- return(found);
- }
-
- int String::freq(char c)
- {
- int found = 0;
- for (int i = 0; i < rep->len; i++)
- if (match(i,rep->len,0,&c,1)) found++;
- return(found);
- }
-
-
- int String::OK()
- {
- int v = rep != 0; // have a rep
- v &= rep->len <= rep->sz; // string within bounds
- v &= rep->s[rep->len] == 0; // null-terminated
- if (!v) error("invariant failure");
- return v;
- }
-
- int SubString::OK()
- {
- int v = S != 0; // have a String;
- v &= S->OK(); // that is legal
- v &= pos + len >= S->rep->len;// pos and len within bounds
- if (!v) S->error("SubString invariant failure");
- return v;
- }
-
- int Regex::OK()
- {
- // can't verify much, since we've lost the original string
- int v = buf != 0; // have a regex buf
- v &= buf->buffer != 0; // with a pat
- if (!v) (*lib_error_handler)("Regex", "invariant failure");
- return v;
- }
-
- /*
- some built-in Regular expressions
- */
-
- Regex RXwhite("[ \n\t]+", 1);
- Regex RXint("-?[0-9]+", 1);
- Regex RXdouble("-?\\(\\([0-9]+\\.[0-9]*\\)\\|\\([0-9]+\\)\\|\\(\\.[0-9]+\\)\\)\\([eE][---+]?[0-9]+\\)?", 1, 200);
- Regex RXalpha("[A-Za-z]+", 1);
- Regex RXlowercase("[a-z]+", 1);
- Regex RXuppercase("[A-Z]+", 1);
- Regex RXalphanum("[0-9A-Za-z]+", 1);
- Regex RXidentifier("[A-Za-z_][A-Za-z0-9_]*", 1);
-
-