home *** CD-ROM | disk | FTP | other *** search
- /************************************************************************
- * bregex -- The `Better/BuGless/Berg' Regular Expression library *
- * *
- * Copyright (c) 1991-1993, by S.R. van den Berg, The Netherlands *
- * *
- * See bregex.h for more info. *
- * *
- * You are not required to understand the sources in order to *
- * use them (I suspect not many people could use them in that *
- * case). You can, however, admire them as a work of art :-). *
- ************************************************************************/
- #ifdef RCS
- static const char rcsid[]=
- "$Id: bregex.c,v 1.5 1993/10/01 16:48:26 berg Exp $";
- #endif
- #include <sys/types.h> /* size_t */
- #include <stddef.h> /* offsetof() */
- #ifndef RE_nCHAR_CLASS
- #include <string.h> /* strncmp() */
- #ifdef RE_nCTYPE
- #undef RE_nCTYPE
- #endif
- #else
- #ifndef RE_nERROR_REPORT
- #include <string.h> /* strlen() */
- #else
- #ifdef RE_MEMCHR
- #include <string.h> /* memchr() */
- #endif
- #endif
- #endif
- #ifndef RE_nCTYPE
- #include <ctype.h>
- #endif
- #define BREGEX_C_
- #include "bregex.h"
-
- #ifndef offsetof
- #define offsetof(s,m) ((char*)&((s*)sizeof(s))->m-(char*)sizeof(s))
- #endif
- #define ioffsetof(struc,memb) ((int)offsetof(struc,memb))
- #define maxindex(a) (sizeof(a)/sizeof((a)[0])-1)
- #define STRLEN(a) (sizeof(a)-1)
- #define mx(a,b) ((a)>(b)?(a):(b))
- typedef unsigned char uschar;
- #define uchar uschar
- #ifdef RE_nCTYPE
- #define islower(c) ((unsigned)(c)-'a'<='z'-'a')
- #define isupper(c) ((unsigned)(c)-'A'<='Z'-'A')
- #define isprint(c) ((unsigned)(c)-' '<='~'-' ') /* ASCII dependent */
- #define tolower(c) ((c)-'A'+'a')
- #define toupper(c) ((c)+'A'-'a')
- #define TO_LOWER(i) (isupper(i)&&((i)+='a'-'A'))
- #else
- #ifndef const
- #define TO_LOWER(i) ((i)=Tolower(i))
- #else
- #define TO_LOWER(i) (isupper(i)&&((i)=Tolower(i)))
- #endif /* const */
- #endif /* RE_nCTYPE */
-
- #define STARTSTACK 16 /* needed stacksize will solely depend */
- #define INCSTACK 16 /* on the length of the match */
-
- #define BITS_P_CHAR 8 /* if you change this, everything goes with it */
- #define OPB (1<<BITS_P_CHAR) /* except the \x00 and */
- /* \000 encodings */
- #define C_BEG_GROUP '(' /* if you prefer different symbols */
- #define C_OR '|' /* here is your chance */
- #define C_END_GROUP ')'
- #define C_0_OR_MORE '*'
- #define C_0_OR_1 '?'
- #define C_1_OR_MORE '+'
- #define C_DOT '.'
- #define C_BOL '^'
- #define C_EOL '$'
- #define C_BEG_NUMBER '{'
- #define C_SEP_NUMBER ','
- #define C_END_NUMBER '}'
- #define C_BEG_CLASS '['
- #define C_BEG_CCLASS '['
- #define C_DEL_CCLASS ':'
- #define C_END_CCLASS ']'
- #define C_NOT_CLASS '^'
- #define C_RANGE '-'
- #define C_END_CLASS ']'
- #define C_ESCAPE '\\'
-
- #ifndef RE_nCHAR_CLASS /* The order has to correspond with R_CC_* */
- static const char*const cclass[]={"alnum","alpha","blank","cntrl","digit",
- "graph","lower","print","punct","space","upper","xdigit",0};
- #endif /* the opcodes of the non-deterministic automaton */
- /* below OPB are the regular characters */
- #define OPC_CLASS OPB /* a bitmapped character class */
- #define OPC_DOT (OPB+1) /* any character */
- #define OPC_BOL (OPB+2) /* beginning of line */
- #define OPC_EOL (OPB+3) /* end of line */
- #define OPC_EPS (OPB+4) /* epsilon-transition (fork) */
- #define OPC_FIN (OPB+5) /* finish */
- #define OPC_GOP (OPB+6) /* group open */
- #define OPC_GCL (OPB+7) /* group close */
- /* Don't change any opcode here without checking skplen[] */
-
- #define R_BEG_GROUP (OPB|C_BEG_GROUP) /* token values */
- #define R_OR (OPB|C_OR)
- #define R_END_GROUP (OPB|C_END_GROUP)
- #define R_0_OR_MORE (OPB|C_0_OR_MORE)
- #define R_0_OR_1 (OPB|C_0_OR_1)
- #define R_1_OR_MORE (OPB|C_1_OR_MORE)
- #define R_DOT (OPB|C_DOT)
- #define R_BOL (OPB|C_BOL)
- #define R_EOL (OPB|C_EOL)
- #define R_BEG_NUMBER (OPB|C_BEG_NUMBER)
- #define R_END_NUMBER (OPB|C_END_NUMBER)
- #define R_BEG_CLASS (OPB|C_BEG_CLASS)
- #define R_NOT_CLASS (OPB|C_NOT_CLASS)
- #define R_RANGE (OPB|C_RANGE)
- #define R_END_CLASS (OPB|C_END_CLASS)
- #define R_CC_BASE (2*OPB)
- #define R_CC_alnum (R_CC_BASE)
- #define R_CC_alpha (R_CC_BASE+1)
- #define R_CC_blank (R_CC_BASE+2)
- #define R_CC_cntrl (R_CC_BASE+3)
- #define R_CC_digit (R_CC_BASE+4)
- #define R_CC_graph (R_CC_BASE+5)
- #define R_CC_lower (R_CC_BASE+6)
- #define R_CC_print (R_CC_BASE+7)
- #define R_CC_punct (R_CC_BASE+8)
- #define R_CC_space (R_CC_BASE+9)
- #define R_CC_upper (R_CC_BASE+10)
- #define R_CC_xdigit (R_CC_BASE+11)
- #define R_CC_nothing (R_CC_BASE+12) /* has to be the last R_CC_* */
-
- #define bit_type unsigned
- #define bit_bits (sizeof(bit_type)*BITS_P_CHAR)
- #define bit_index(which) ((unsigned)(which)/bit_bits)
- #define bit_mask(which) ((unsigned)1<<(unsigned)(which)%bit_bits)
- #define bit_toggle(name,which) (name[bit_index(which)]^=bit_mask(which))
- #define bit_test(name,which) (!!(name[bit_index(which)]&bit_mask(which)))
- #define bit_set(name,which,value) \
- (value?(name[bit_index(which)]|=bit_mask(which)):\
- (name[bit_index(which)]&=~bit_mask(which)))
- #define bit_mindex(size) (((size)+bit_bits-1)/bit_bits)
- #define bit_field(name,size) bit_type name[bit_mindex(size)]
-
- #ifdef RE_EMPTY_OR
- #ifdef RE_SURE_MINIMAL
- #undef RE_SURE_MINIMAL
- #endif
- #endif
- #ifndef RE_nERROR_DETECT
- #ifdef RE_SURE_MINIMAL
- #undef RE_SURE_MINIMAL
- #endif
- #endif
- #ifdef RE_HEX_CHAR
- #define HDIGIT
- #define BACKSL_CLASS
- #else
- #ifdef RE_OCT_CHAR
- #define HDIGIT
- #define BACKSL_CLASS
- #else
- #ifndef RE_nBRACES
- #define HDIGIT
- #endif
- #ifdef RE_SPEC_CHAR
- #define BACKSL_CLASS
- #endif
- #endif
- #endif
- #ifdef HDIGIT
- #define HEX2NUM
- #else
- #ifndef RE_nERROR_REPORT
- #define HEX2NUM
- #endif
- #endif
- #ifndef RE_nCASE_TABLE
- #define Tolower(c) (igncase[c])
- #undef TO_LOWER
- #define TO_LOWER(i) ((i)=Tolower(i))
- #else /* RE_nCASE_TABLE */
- #define Tolower(c) tolower(c)
- #endif /* RE_nCASE_TABLE */
- #ifndef isblank
- #define isblank(c) ((c)==' '||(c)=='\t')
- #endif
- #define SZ(x) (sizeof(struct x))
- #define Ceps (struct eps*)
- #define geno(to,add) ((char*)(to)+(add))
- #define epso(to,add) (Ceps geno(to,add))
- #define dif(high,low) ((char*)(high)-(char*)(low))
- #define Jtable(p) (((struct aligar*)(p))->table)
- #define SKIP_TOK (1<<0)
- #define CLASS_TOK (1<<1)
- #define ii (aleps.topc)
- #define kk (aleps.tnext)
- #define jj (aleps.ua.jjua)
- #define jjp (aleps.ua.tspawn)
- #define spawn sp.awn
-
- struct eps{unsigned opc;struct eps*stack;
- union evu{struct eps*awn;void*oid;}sp;struct eps*next;};
- struct ALEPS{unsigned topc;struct eps*tnext;
- union everyt{struct eps*tspawn;unsigned jjua;void*Irrelevoid;}ua;};
-
- #ifdef RE_nCONCUR_COMPILE /* some compilation temporaries */
- static struct eps*r; /* target object pointer */
- static uchar*p; /* regexp source pointer */
- static regex_t*gpreg; /* regex_t reference */
- static struct ALEPS aleps; /* general purpose aligner and scratch variables */
- static uchar*cachea,*cachep; /* cache at, cache resulting p */
- static size_t cacher; /* cache resulting delta r */
- static errorno; /* last error found */
- #ifdef RE_NSUBMAX
- static mnmatch; /* REG_NSUBMAX count cache */
- #endif
- #define VSO
- #define VS
- #define VSA
- #define VSD
- #else /* RE_nCONCUR_COMPILE */
- #define VSO vs /* to avoid static variables, use a */
- #define VS VSO, /* fake struct that is passed all the */
- #define VSS struct Vs*const
- #define VSA VSS VSO,
- #define VSD VSS VSO; /* way down the compilation functions */
- struct Vs{struct eps*r_;uchar*p_;regex_t*gpreg_;struct ALEPS aleps_;
- uchar*cachea_,*cachep_;size_t cacher_;int errorno_;
- #ifdef RE_NSUBMAX
- int mnmatch_;
- #endif
- };
- #define r (vs->r_)
- #define p (vs->p_)
- #define gpreg (vs->gpreg_)
- #define aleps (vs->aleps_)
- #define cachea (vs->cachea_)
- #define cachep (vs->cachep_)
- #define cacher (vs->cacher_)
- #define errorno (vs->errorno_)
- #define mnmatch (vs->mnmatch_)
- #endif /* RE_nCONCUR_COMPILE */
-
- #ifndef RE_nCASE_TABLE
- static uchar bothcase[OPB],igncase[OPB];
- #endif
-
- struct mchar{unsigned opcc_;struct eps*next1_;
- struct evoi{struct eps*st_;const void*wh_;}p1_,p2_;};
- struct chclass{unsigned opcc;struct eps*next1;struct evoi pos1,pos2;
- bit_field(c,OPB);};
- struct gop{unsigned opcgop;struct eps*gopnext;
- union{unsigned nr;void*IRrelevoid;}gopp;};
- #define gcl gop
- #ifdef RE_nJUMP_TABLE
- struct aligar{unsigned aopc;bit_field(table,OPB);};
- #else /* RE_nJUMP_TABLE */
- #ifdef RE_SMALL_JUMP_TABLE
- typedef uchar jt_type;
- #else
- typedef unsigned jt_type;
- #endif
- struct aligar{unsigned aopc;jt_type table[OPB];};
- #endif /* RE_nJUMP_TABLE */
- /* lenght array, used by skiplen() */
- static const char skplen[]={SZ(chclass),SZ(mchar),SZ(mchar),SZ(mchar),SZ(eps),
- 0
- #ifndef RE_pNOSUB
- ,SZ(gop),SZ(gcl)
- #endif
- };
-
- #ifndef RE_nERROR_DETECT
- static void error(VS nr)VSD const unsigned nr;
- { if(!errorno||(uchar*)gpreg->re_es.re_stack>p) /* no error yet? earlier? */
- errorno=nr,gpreg->re_es.re_stack=p; /* ok, save it for posterity */
- }
- #else
- #define error(VS nr)
- #endif
-
- #ifdef HEX2NUM
- static const char hex2num[]="0123456789abcdef"; /* to get rid of ASCII */
- #endif
- #ifdef HDIGIT
- static hdigit(d) /* returns 16 if non-xdigit */
- { register const char*chp;
- TO_LOWER(d);
- for(chp=hex2num-1;*++chp&&*chp!=d;);
- return(int)(chp-hex2num);
- }
- #endif
-
- static unsigned tok(VS type)VSD const int type; /* get token */
- { register size_t i= *p;
- #ifndef RE_nEXTENDED
- #ifndef RE_pEXTENDED
- register unsigned extended=gpreg->re_flags®_EXTENDED; /* cache it */
- #endif
- #endif
- if(type&CLASS_TOK) /* inside a class? */
- { switch(i)
- { case '\0':goto case_0;
- case C_NOT_CLASS:case C_RANGE:case C_END_CLASS:goto o_case;
- #ifdef BACKSL_CLASS
- case C_ESCAPE:goto e_case;
- #endif
- #ifndef RE_nCHAR_CLASS
- case C_BEG_CCLASS:
- if(p[1]==C_DEL_CCLASS)
- { ;{ const uchar*cc=p+1;
- do /* find the other delimiter */
- if(!*++cc)
- { error(VS REG_ECTYPE);break; /* oops, end of string */
- }
- while(*cc!=C_DEL_CCLASS||cc[1]!=C_END_CCLASS);
- i=cc-p-2; /* how long was it now? */
- }
- ;{ const char*const*ccp; /* do we have that name here? */
- for(ccp=cclass;strncmp(*ccp,(char*)p+2,i)||(*ccp)[i];)
- if(!*++ccp)
- { error(VS REG_ECTYPE);break; /* oops, past last name */
- }
- if(p[i+=2]&&p[++i]) /* carefully sense the end */
- i++;
- if(type&SKIP_TOK)
- p+=i; /* actually skip it now */
- return R_CC_BASE+ccp-cclass;
- }
- }
- #endif
- }
- }
- switch(i)
- { case '\0':
- case_0: return R_END_GROUP;
- case C_ESCAPE:
- e_case: switch(i=p[1])
- { case '\0':i=C_ESCAPE;error(VS REG_EESCAPE);goto once;
- #ifdef RE_HEX_CHAR
- case 'x': /* \x00 */
- if(hdigit(i)<16)
- { i=hdigit(i);
- if(hdigit(p[3])<16)
- { i=(i<<4)+hdigit(p[3]);
- #ifdef RE_OCT_CHAR
- goto skip2;
- }
- else
- goto skip1;
- #else
- if(type&SKIP_TOK)
- p+=2;
- }
- else if(type&SKIP_TOK)
- p++;
- #endif
- }
- break;
- #endif
- #ifdef RE_OCT_CHAR
- default: /* \000 */
- if(hdigit(i)<8)
- { i=hdigit(i);
- if(hdigit(p[2])<8)
- { i=(i<<3)+hdigit(p[2]);
- if(hdigit(p[3])<8)
- { i=(i<<3)+hdigit(p[3]);
- skip2: if(type&SKIP_TOK)
- p+=2;
- }
- else
- skip1: if(type&SKIP_TOK)
- p++;
- }
- }
- break;
- #endif /* RE_OCT_CHAR */
- #ifdef RE_SPEC_CHAR
- case 'a':i='\a';break;
- case 'b':i='\b';break;
- case 'f':i='\f';break;
- case 'n':i='\n';break;
- case 'r':i='\r';break;
- case 't':i='\t';break;
- case 'v':i='\v';break;
- #endif
- #ifndef RE_pEXTENDED
- case C_0_OR_1:case C_1_OR_MORE:case C_BEG_GROUP:case C_OR:
- case C_END_GROUP:
- #ifndef RE_nBRACES
- case C_BEG_NUMBER:case C_END_NUMBER:
- #endif
- #ifndef RE_nEXTENDED
- if(!extended)
- #endif
- i|=OPB;
- #endif /* RE_pEXTENDED */
- }
- if(type&SKIP_TOK)
- p++;
- break;
- #ifndef RE_nEXTENDED
- case C_0_OR_1:case C_1_OR_MORE:case C_BEG_GROUP:case C_OR:
- case C_END_GROUP:
- #ifndef RE_nBRACES
- case C_BEG_NUMBER:case C_END_NUMBER:
- #endif
- #ifndef RE_pEXTENDED
- if(!extended)
- break;
- #endif
- #endif /* RE_nEXTENDED */
- case C_0_OR_MORE:case C_DOT:case C_BOL:case C_EOL:case C_BEG_CLASS:
- if(!(type&CLASS_TOK))
- o_case: i|=OPB;
- }
- once:
- if(type&SKIP_TOK) /* skip it as well? */
- p++;
- return i; /* return token found */
- }
-
- static unsigned token(VSO)VSD /* normal token */
- { return tok(VS 0);
- }
-
- static unsigned skiptok(VSO)VSD /* skip normal token */
- { return tok(VS SKIP_TOK);
- }
-
- static unsigned classtok(VSO)VSD /* token in class */
- { return tok(VS CLASS_TOK);
- }
-
- static unsigned skipclass(VSO)VSD /* skip token in class */
- { return tok(VS CLASS_TOK|SKIP_TOK);
- }
- /* epsilon transition */
- static void puteps(spot,to,aswell)struct eps*const spot;
- const struct eps*const to,*const aswell;
- { spot->opc=OPC_EPS;spot->next=to!=spot?Ceps to:Ceps aswell;
- spot->spawn=aswell!=spot?Ceps aswell:Ceps to;
- #ifndef RE_SURE_MINIMAL
- spot->stack=0; /* zero stack; cstack(), the optimiser, needs this! */
- #endif
- }
-
- static void putneps(spot,to)struct eps*const spot;const struct eps*const to;
- { puteps(spot,to,spot+1); /* epsilon transition, one branch to the next */
- }
-
- #define Cc(p,memb) (((struct chclass*)(p))->memb)
- #define Go(p,memb) (((struct gop*)(p))->memb)
- #define Gc(p,memb) (((struct gcl*)(p))->memb)
- #define rAc Cc(r,c)
-
- static void bseti(VS fld,i,j)VSD bit_field(fld,OPB);unsigned i;
- const unsigned j;
- { bit_set(fld,i,j); /* mark 'i' as being in the class */
- if(gpreg->re_flags®_ICASE) /* mark the other case too */
- { if(islower(i)) /* lowercase */
- i=toupper(i);
- else
- TO_LOWER(i); /* uppercase */
- bit_set(fld,i,j);
- }
- }
- /* general purpose length routine */
- static struct eps*skiplen(ep)const struct eps*const ep;
- { return epso(ep,ep->opc<OPB?SZ(mchar):skplen[ep->opc-OPB]);
- }
-
- static void initbitm(fld,reset)bit_type*fld;const unsigned reset;
- { register unsigned i=bit_mindex(OPB);
- do fld[--i]=reset?0:~(bit_type)0; /* preset the bit field */
- while(i);
- }
-
- static void initsimp(rp,e)struct eps*rp,*e;
- { Cc(rp,next1)=e;Cc(rp,pos1.st_)=Cc(rp,pos2.st_)=0; /* init thiss & other */
- #ifndef RE_nJUMP_TABLE /* stacks to 0 (regexec() & initjtable() need this) */
- Cc(rp,pos2.wh_)=0;
- #endif
- }
- /* we need one forward declaration to allow recursion */
- static por P((VSA const struct eps*const e));
-
- #define IN_CLASS(CLASS,class) \
- case CLASS:\
- if(class(cc))\
- break;\
- continue
-
- static void psimp(VS e)VSD const struct eps*e; /* put a simple element */
- { register union{unsigned uns;struct eps*epp;}u; /* save stack space */
- register union{unsigned i;void*pold;}v; /* for the recursion */
- switch(token(VSO))
- { case R_BEG_GROUP:skiptok(VSO); /* not so simple after all */
- #ifndef RE_pNOSUB /* are we exceeding REG_NSUBMAX? */
- if(!e||gpreg->re_flags®_NOSUB
- #ifdef RE_NSUBMAX
- ||gpreg->re_flags®_NSUBMAX&&gpreg->re_nsub>=mnmatch
- #endif
- )
- { if(por(VS e))
- error(VS REG_EPAREN);
- #ifndef RE_nBRACES /* we need these fillers, identical subexpressions */
- if(e) /* must have the same lengths (pnorm() depends on it) */
- r->opc=OPC_GOP; /* also, bogus opcodes are not allowed */
- r=epso(r,SZ(gop)); /* e.g. findandrep() depends on it */
- if(!e)
- goto no_e;
- #else
- r=epso(r,SZ(gop)+SZ(gcl));return;
- #endif
- }
- else /* insert the numbered paren pairs */
- { (u.epp=r)->opc=OPC_GOP;r=epso(u.epp,SZ(gop));
- u.epp->stack=epso(u.epp,SZ(gop)); /* remember the spot of the ( */
- Go(u.epp,gopp.nr)= ++gpreg->re_nsub;v.pold=p;por(VS Ceps 0);
- p=v.pold;r->stack=Ceps e;Gc(e=r,gopp.nr)=Go(u.epp,gopp.nr);
- r=epso(u.epp,SZ(gop)); /* match up the ) with the ( */
- if(por(VS e)) /* insert the meat */
- error(VS REG_EPAREN);
- }
- r->opc=OPC_GCL;
- no_e: r=epso(r,SZ(gcl));return;
- #else /* RE_pNOSUB */
- if(por(VS e))
- error(VS REG_EPAREN);
- return;
- #endif /* RE_pNOSUB */
- case R_BEG_CLASS: /* a simple class */
- skiptok(VSO);u.uns=C_NOT_CLASS!=*p; /* `not' modifier? */
- if(e)
- r->opc=OPC_CLASS,initsimp(r,Ceps e),initbitm(rAc,u.uns);
- if(!u.uns) /* skip the `not' modifier */
- { p++;
- #ifndef RE_nNEWLINE
- #ifdef RE_pNEWLINE
- if(e)
- #else
- if(e&&gpreg->re_flags®_NEWLINE) /* newlines are special */
- #endif
- bit_toggle(rAc,'\n');
- #endif
- }
- if(*p==C_END_CLASS) /* right at the start, cannot mean the end */
- { p++;
- if(e)
- v.i=C_END_CLASS,bit_toggle(rAc,C_END_CLASS);
- }
- else if(*p==C_RANGE) /* take it literally */
- { p++;
- if(e)
- v.i=C_RANGE,bit_toggle(rAc,C_RANGE);
- }
- for(;;skipclass(VSO)) /* skip through the class */
- { switch(classtok(VSO))
- { case R_END_GROUP:error(VS REG_EBRACK);
- case R_END_CLASS:skipclass(VSO);r=epso(r,SZ(chclass));
- return;
- #ifndef RE_nCHAR_CLASS
- case R_CC_alnum:case R_CC_alpha:case R_CC_blank:case R_CC_cntrl:
- case R_CC_digit:case R_CC_graph:case R_CC_lower:case R_CC_print:
- case R_CC_punct:case R_CC_space:case R_CC_upper:case R_CC_xdigit:
- if(e)
- { unsigned cc;
- v.i=classtok(VSO);cc=OPB-1;
- do
- { switch(v.i)
- { IN_CLASS(R_CC_alnum,isalnum);
- IN_CLASS(R_CC_alpha,isalpha);
- IN_CLASS(R_CC_blank,isblank);
- IN_CLASS(R_CC_cntrl,iscntrl);
- IN_CLASS(R_CC_digit,isdigit);
- IN_CLASS(R_CC_graph,isgraph);
- IN_CLASS(R_CC_lower,islower);
- IN_CLASS(R_CC_print,isprint);
- IN_CLASS(R_CC_punct,ispunct);
- IN_CLASS(R_CC_space,isspace);
- IN_CLASS(R_CC_upper,isupper);
- default:
- if(!isxdigit(cc))
- continue;
- } /* it's in the character class */
- bseti(VS rAc,cc,u.uns); /* so mark it */
- }
- while(cc--); /* walk through the whole alphabet */
- }
- case R_CC_nothing:v.i=OPB;continue; /* erroneous char class */
- #endif
- case R_RANGE:p++;
- switch(classtok(VSO))
- { default:
- if(e) /* mark all in range */
- { while(++v.i<(classtok(VSO)&(OPB-1))) /* careful */
- bseti(VS rAc,v.i&(OPB-1),u.uns); /* with mask */
- #ifndef RE_nERROR_DETECT /* to stay within bounds */
- if(v.i!=(classtok(VSO)&(OPB-1))
- #ifndef RE_nCHAR_CLASS
- ||classtok(VSO)>=R_CC_BASE
- #endif
- )
- error(VS REG_ERANGE);
- #endif
- }
- break;
- case R_END_CLASS:p--; /* literally */
- case R_END_GROUP:;
- }
- }
- if(e) /* regular character, mark it */
- bseti(VS rAc,v.i=classtok(VSO),u.uns);
- }
- case R_END_GROUP:return;
- case R_DOT: /* matches everything but a newline */
- if(e) /* if REG_NEWLINE is set */
- { u.uns=OPC_DOT;goto fine;
- }
- goto fine2;
- case R_BOL: /* beginning of line */
- if(e)
- { u.uns=OPC_BOL;goto finep;
- }
- goto fine2;
- case R_EOL: /* end of line */
- if(e)
- { u.uns=OPC_EOL;
- finep:
- #ifndef RE_DUMB_ANCHOR
- Cc(r,pos1.wh_)=p;
- #endif
- goto fine;
- }
- goto fine2;
- }
- if(e) /* a regular character */
- #ifndef RE_nERROR_DETECT
- { if((u.uns=token(VSO))>=OPB)
- error(VS REG_BADRPT),u.uns&=OPB-1;
- #else
- { u.uns=token(VSO)&OPB-1;
- #endif
- fine:
- r->opc=u.uns<OPB&&gpreg->re_flags®_ICASE&&isupper(u.uns)?
- Tolower(u.uns):u.uns;
- initsimp(r,Ceps e);
- }
- fine2:
- skiptok(VSO);r=epso(r,SZ(mchar)); /* advance source and target pointers */
- }
-
- #define PROGID const char progid[]=\
- "Bregex library, copyright (c) 1991-1993, S.R. van den Berg, The Netherlands"
-
- #ifndef RE_nBRACES
- static dec(VSO)VSD /* return a decimal for {.. , ..} */
- { unsigned i=0;
- while(hdigit(*p)<10)
- i=i*10+hdigit(*p++);
- return i;
- }
-
- static skipend(VSO)VSD /* check if we are close before the end */
- { switch(token(VSO)) /* anything special? */
- { case R_OR:case R_END_GROUP: /* the end in person */
- goto ret1;
- case R_BEG_NUMBER: /* something complicated */
- for(;;) /* start searching for its complement */
- switch(skiptok(VSO))
- { case R_END_GROUP:goto ret1; /* how unexpected */
- case R_END_NUMBER:goto skippend; /* how predictable */
- }
- case R_0_OR_MORE:case R_1_OR_MORE:case R_0_OR_1:case R_END_NUMBER:
- skiptok(VSO); /* how casual */
- skippend:
- switch(token(VSO)) /* look one step ahead */
- { case R_OR:case R_END_GROUP: /* so it ends here afterall */
- ret1: return 1;
- }
- }
- return 0; /* then again, it doesn't have to */
- }
-
- #define EOS(x) (kk?kk:(x)) /* real end, or continue with the next? */
-
- static void pnorm(VS e)VSD const struct eps*const e; /* put normal text */
- { void*pold;int i;union{struct eps*old;unsigned len;}q;
- for(;;) /* get the length */
- { i=0;pold=p;q.old=r;psimp(VS Ceps 0);q.len=dif(r,q.old); /* store it */
- if(e) /* we really want to generate code */
- r=epso(r,-(int)q.len); /* backup before we start */
- switch(token(VSO)) /* anything special trailing it? */
- {
- #ifndef RE_pNOSUB
- unsigned pin;
- #endif
- case R_0_OR_MORE:ii=0;goto procj0; /* ii indicates the minimum */
- case R_1_OR_MORE:ii=1; /* no. of occurances */
- procj0: jj=0;goto proc; /* jj indicates the maximum */
- case R_0_OR_1:ii=0;goto procj1; /* no. of occurances */
- default:ii=1;
- procj1: jj=1;goto proc;
- case R_BEG_NUMBER:
- #ifndef RE_pNOSUB
- for(;;gpreg->re_nsub=pin) /* restore it from our excursion */
- #else
- for(;;)
- #endif
- { skiptok(VSO);ii=dec(VSO);jj=C_SEP_NUMBER==*p?(p++,dec(VSO)):ii;
- #ifndef RE_nERROR_DETECT
- if(token(VSO)!=R_END_NUMBER)
- error(VS REG_EBRACE);
- #endif
- proc: if(jj)
- {
- #ifndef RE_SURE_MINIMAL
- if(ii>jj)
- { error(VS REG_BADBR);jj=ii;
- }
- #endif
- if(!e) /* only calculating the length? */
- { r=epso(r+jj-ii,q.len*(jj-1));goto eit; /* use a shortcut */
- }
- }
- else if(!e)
- { r++;
- if(ii)
- r=epso(r,q.len*(ii-1)); /* different shortcut */
- goto eit;
- }
- kk=skipend(VSO)?Ceps e:Ceps 0;p=pold;
- if(++i<ii) /* while we're still below the minimum */
- #ifdef RE_pNOSUB
- psimp(VS epso(r,q.len));
- else if(i==ii)
- #else /* keep the paren groups out of our hair */
- { pin=gpreg->re_flags;gpreg->re_flags|=REG_NOSUB;
- psimp(VS epso(r,q.len));gpreg->re_flags=pin;
- } /* keep the paren group number the same */
- else if(pin=gpreg->re_nsub,i==ii) /* while copying */
- #endif
- { if(i==jj) /* last copy */
- { psimp(VS EOS(epso(r,q.len)));goto eit;
- }
- if(!jj) /* maximum is infinit */
- { puteps(epso(r,q.len),r,EOS(epso(r,q.len)+1)); /* loop */
- psimp(VS epso(r,q.len));r++;goto eit;
- }
- psimp(VS epso(r,q.len)); /* plain */
- }
- else if(!jj) /* maximum is infinit */
- { putneps(r,EOS(epso(r+1,q.len)));psimp(VS r++);goto eit;
- }
- else if(i==jj) /* maximum reached */
- { putneps(r,EOS(epso(r+1,q.len)));r++;
- psimp(VS EOS(epso(r,q.len)));
- eit: if(skipend(VSO)) /* real end? */
- return;
- break;
- }
- else /* plain */
- { putneps(r,EOS(epso(r,(q.len+SZ(eps))*(1+jj-i))));
- psimp(VS epso(++r,q.len));
- }
- }
- }
- }
- }
- #else /* RE_nBRACES */
- #define EOS(x) (jjp?jjp:(x))
-
- static void pnorm(VS e)VSD const struct eps*const e; /* put normal text */
- { void*pold;struct eps*rold;
- for(;;) /* skip it first */
- { pold=p;rold=r;psimp(VS Ceps 0);
- if(!e) /* if we're just counting */
- pold=p; /* remember this spot */
- ii=token(VSO);skiptok(VSO);
- if(e)
- { p=pold;pold=r;
- jjp=token(VSO)==R_OR||token(VSO)==R_END_GROUP?Ceps e:Ceps 0;
- }
- switch(ii) /* check for any of the postfix operators */
- { case R_0_OR_MORE:r++;
- if(e) /* first an epsilon, then the rest */
- putneps(rold,EOS(r)),r=rold+1,psimp(VS rold);
- goto incagoon;
- case R_1_OR_MORE: /* first the rest */
- if(e) /* and then an epsilon */
- puteps(r,rold,EOS(r+1)),r=rold,psimp(VS Ceps pold);
- r++;goto incagoon;
- case R_0_OR_1:r++;
- if(e) /* first an epsilon, then the rest */
- putneps(rold,r=EOS(r)),pold=r,r=rold+1,psimp(VS Ceps pold);
- incagoon: switch(skiptok(VSO)) /* at the end of this group already? */
- { case R_OR:case R_END_GROUP:return;
- }
- continue; /* regular end of the group */
- case R_OR:case R_END_GROUP:case '\0':
- if(e)
- r=rold,psimp(VS e);
- return;
- }
- if(e) /* no fancy postfix operators, plain vanilla */
- r=rold,psimp(VS Ceps pold);
- else
- p=pold;
- }
- }
- #endif /* RE_nBRACES */
-
- static PROGID;
-
- static por(VS e)VSD const struct eps*const e; /* put an or-group */
- { uchar*pvold;struct eps*rvold;
- if(!e) /* if we're only counting */
- { rvold=r;
- if(cachea==(pvold=p)) /* check the cache */
- { p=cachep;r=epso(rvold,cacher);goto ret0; /* yep, hit */
- } /* if the cache is taken out, compile time rises exponentially */
- } /* with the number of nested parens */
- for(;;)
- { uchar*pold;struct eps*rold;
- for(pold=p,rold=r;;)
- { switch(token(VSO))
- { default:pnorm(VS Ceps 0);r=rold;continue; /* still in this group */
- case R_END_GROUP: /* found the end of the group */
- #ifdef RE_EMPTY_OR
- if(p==pold) /* empty `or' group */
- { if(e)
- puteps(r,e,e); /* misused epsilon as branch */
- r++;
- }
- else
- #endif
- p=pold,pnorm(VS e); /* normal last group */
- if(!e) /* only counting? refresh the cache */
- { skiptok(VSO);cachea=pvold;cachep=p;cacher=dif(r,rvold);
- goto ret0;
- }
- if(*p)
- { skiptok(VSO);
- ret0: return 0;
- }
- return 1;
- case R_OR:r++;
- #ifdef RE_EMPTY_OR
- if(p==pold) /* empty `or' group */
- { if(e)
- putneps(rold,e); /* special epsilon */
- }
- else
- #endif
- { p=pold;pnorm(VS e); /* normal `or' group, first an */
- if(e) /* epsilon, then the rest */
- putneps(rold,r);
- }
- skiptok(VSO);
- }
- break;
- }
- }
- }
-
- #ifndef RE_SURE_MINIMAL
- static void findandrep(VS old,newv)VSD register struct eps**const old;
- struct eps*const newv;
- { register struct eps*i,*t= *old;
- for(i=r;;i=skiplen(i)) /* change all pointers from *old to newv */
- { switch(i->opc)
- { case OPC_FIN:*old=t;return; /* last one, finished */
- case OPC_EPS:
- if(i->next==t)
- i->next=newv;
- if(i->spawn==t)
- i->spawn=newv;
- continue;
- }
- if(i->stack==t)
- i->stack=newv;
- }
- }
-
- #define drs(m) (*(struct eps**)geno(*stack,ioffsetof(struct eps,m)^ofs))
- #define RETyes 1
- #define RETno 0
-
- #ifndef RE_pNOSUB
- #undef RETyes
- #undef RETno
- #define RETyes ((struct eps**)0)
- #define RETno nst
-
- static struct eps**cstack(VS stack,ofs)VSD struct eps**const stack;
- { struct eps**nst;
- for(nst= &drs(next);;)
- { switch((*nst)->opc)
- { case OPC_GOP:case OPC_GCL:nst= &(*nst)->stack;continue;
- }
- break;
- }
- #else
- #define nst (&drs(next))
- static cstack(VS stack,ofs)VSD struct eps**const stack;
- {
- #endif /* RE_pNOSUB */
- if((*nst)->stack==Ceps p)
- { findandrep(VS *(struct eps***)stack,drs(next));*stack=drs(spawn);
- return RETyes;
- }
- return RETno;
- }
- /* break up any closed epsilon circles, otherwise they can't be executed */
- static fillout(VS stack)VSD struct eps**const stack;
- {
- #ifndef RE_pNOSUB
- struct eps**nst;
- #endif
- if((*stack)->opc!=OPC_EPS||(*stack)->stack)
- return 0;
- (*stack)->stack=Ceps p; /* mark this one as used */
- #ifndef RE_pNOSUB
- #define RECURS(nxt) \
- do\
- { if(!(nst=cstack(VS (struct eps**)stack,\
- ioffsetof(struct eps,nxt)^ioffsetof(struct eps,next))))\
- return 1;\
- }\
- while(fillout(VS nst))
- #else /* RE_pNOSUB */
- #define RECURS(nxt) \
- do\
- if(cstack(VS (struct eps**)stack,\
- ioffsetof(struct eps,nxt)^ioffsetof(struct eps,next)))\
- return 1;\
- while(fillout(VS &(*stack)->nxt))
- #endif /* RE_pNOSUB */
- RECURS(next);RECURS(spawn);return 0; /* recurse */
- }
- #endif /* RE_SURE_MINIMAL */
-
- #define XOR1 \
- (ioffsetof(struct chclass,pos1)^ioffsetof(struct chclass,pos2))
- #define PC(thiss,t) (((struct evoi*)geno(thiss,t))->st_)
- #define PCp(thiss,t) (((struct evoi*)geno(thiss,t))->wh_)
- #define PCs(s,thiss,t) (*(struct eps**)((s)=(void*)&PC(thiss,t)))
- #define PCps(s,thiss,t) ((uchar*)*(const void**)\
- geno(&PCs(s,thiss,t),ioffsetof(struct evoi,wh_)))
- #define Pci(thiss,t) PCs(c,thiss,t)
- #define Pc() (*(struct eps**)c)
- #define Pcp() (*(const void**)(c+=ioffsetof(struct evoi,wh_)))
- #define PcP() (*(const void**)c)
-
- static void initjtable(VSO)VSD
- #ifndef RE_nJUMP_TABLE
- { typedef int goodness;
- goodness bestval;jt_type*bt=Jtable(gpreg->re_finpoint);
- unsigned depth,maxdepth,th1,ot1;
- register struct eps*stack,*thiss,*other,*code,*s;jt_type jt[OPB];
- ;{ register i=OPB-1;
- while(jt[i]=0,i--); /* pre-init our jump table */
- }
- th1=ioffsetof(struct chclass,pos1);ot1=ioffsetof(struct chclass,pos2);
- maxdepth=(jt_type)~(jt_type)0;depth=1;thiss=other=code=(s=r)-1;bestval=0;
- stack=0;goto nostack;
- do /* now flood the tree from the left, counting the distance */
- { th1^=XOR1;ot1^=XOR1;thiss=other;other=code;stack=0;depth++;
- do /* pop next entry off this pc-stack */
- { thiss=PC(s=thiss,th1);PC(s,th1)=0;s=s->stack;goto nostack;
- do
- for(s=stack->spawn,stack=stack->stack;;) /* empty epsilon stack */
- nostack: { switch(s->opc-OPB)
- { case OPC_EPS-OPB:s->stack=stack;s=(stack=s)->next;
- continue;
- #ifndef RE_pNOSUB
- case OPC_GOP-OPB:case OPC_GCL-OPB:s=s->stack;continue;
- #endif /* maxjump determined */
- case OPC_FIN-OPB:maxdepth=depth;break;
- case OPC_DOT-OPB:case OPC_CLASS-OPB:
- { register unsigned c;
- c=OPB-1;
- do
- { if(s->opc==OPC_CLASS)
- { if(!bit_test(Cc(s,c),c))
- continue;
- }
- #ifndef RE_nNEWLINE
- #ifdef RE_pNEWLINE
- else if(c=='\n')
- #else
- else if(c=='\n'&&gpreg->re_flags®_NEWLINE)
- #endif
- continue;
- #endif
- jt[c]=depth;
- }
- while(c--); /* loop through the whole alphabet */
- goto acc_st;
- }
- case OPC_BOL-OPB:case OPC_EOL-OPB:jt['\n']=depth;goto acc_st;
- default:jt[s->opc]=depth; /* if state not yet pushed and */
- acc_st: if(!PC(s,ot1)&&!Cc(s,pos2.wh_)) /* still in the running */
- { PC(s,ot1)=other;other=s;
- if(depth==maxdepth) /* last rounds? */
- Cc(s,pos2.wh_)=s; /* knock him out */
- }
- }
- break;
- }
- while(stack);
- }
- while(thiss!=code); /* still not done with this pass? */
- if(gpreg->re_flags®_ICASE)
- { unsigned i=OPB;
- do
- if(isupper(--i)) /* fill the uppercase values with the */
- jt[i]=jt[Tolower(i)]; /* lowercase ones */
- while(i);
- }
- if(depth==maxdepth) /* already at the limit? */
- depth--; /* keep our depth within bounds */
- ;{ jt_type*jp;unsigned i;goodness better;
- jp=jt+(i=OPB);better=depth;
- do
- if(i--,*--jp>depth) /* out of bounds? */
- *jp=depth;
- else if(isprint(i)) /* statistically relevant? */
- better+=depth-*jp; /* add its weight to the rating */
- while(i);
- if(better-bestval>0) /* is this one better than our old one? */
- { jt_type*jpb; /* it's better, so copy it over */
- bestval=better;gpreg->re_maxjump=depth-1;i=OPB;jpb=bt;
- while(*jpb++=depth-*jp++,--i);
- }
- }
- }
- while(other!=code); /* done with every thread? */
- #ifndef RE_STRLEN
- *bt=0; /* make sure that \0 always matches */
- #endif
- #else /* RE_nJUMP_TABLE */
- { bit_type*jt=Jtable(gpreg->re_finpoint);
- ;{ register struct eps*stack,*s;
- initbitm(jt,1);stack=0;s=r;goto nxt3; /* preset our bitmask */
- do /* now examine all the candidates that could be first */
- for(s=stack->spawn,stack=stack->stack;;)
- nxt3: { switch(s->opc-OPB)
- { case OPC_EPS-OPB:s->stack=stack;s=(stack=s)->next;continue;
- #ifndef RE_pNOSUB
- case OPC_GOP-OPB:case OPC_GCL-OPB:s=s->stack;continue;
- #endif /* pity, mark everything as eligible */
- case OPC_FIN-OPB:case OPC_DOT-OPB:initbitm(jt,0);goto bailout;
- case OPC_CLASS-OPB:
- ;{ size_t i;bit_type*from;
- i=maxindex(Cc(s,c))+1;from=Cc(s,c);
- while(*jt++|=*from++,--i);
- }
- jt-=maxindex(Cc(s,c))+1;break;
- case OPC_BOL-OPB:case OPC_EOL-OPB:bseti(VS jt,'\n',1);break;
- default:bseti(VS jt,s->opc,1);
- }
- break;
- }
- while(stack);
- }
- bailout:
- #ifndef RE_STRLEN
- bseti(VS jt,'\0',1); /* make sure that \0 always matches */
- #endif
- #endif /* RE_nJUMP_TABLE */
- }
-
- #ifndef reg_malloc
- #include <stdlib.h> /* malloc() realloc() free() */
-
- void
- *(*reg_malloc)P((size_t size))=malloc,
- *(*reg_realloc)P((void*ptr,size_t size))=realloc,
- (*reg_free)P((void*ptr))=free;
- #endif
-
- regcomp(preg,pattern,cflags)regex_t*const preg;const char*const pattern;
- { struct eps*s;
- #ifndef RE_nCONCUR_COMPILE
- struct Vs vss; /* virtual static struct :-) */
- #define vs (&vss)
- #endif
- #ifndef RE_nCASE_TABLE
- if(!bothcase[OPB-1]) /* already initialised case tables? */
- { register unsigned c=OPB-1; /* no, so go through the entire alphabet */
- while(igncase[bothcase[c]=c]=isupper(c)?tolower(c):c,c--);
- }
- #endif
- #ifdef RE_NSUBMAX
- if(((gpreg=preg)->re_flags=cflags)®_NSUBMAX) /* max. re_nsub given? */
- mnmatch=preg->re_nsub;
- #else
- (gpreg=preg)->re_flags=cflags; /* store the flags, for later reference */
- #endif
- preg->re_nsub=errorno=(char*)progid-(char*)progid;preg->re_es.re_stack=0;
- p=(uchar*)pattern;r=Ceps&aleps;cachea=0;por(VS Ceps 0); /* 1st a trial run */
- /*
- * Possible ANSI violation here. The assumption is that:
- * (&aleps+a)+b==&aleps+c for any positive integers a,b,c
- * where a+b==c
- */
- ;{ size_t i;
- if(!(r=preg->re_compiled=s=reg_malloc((i=dif(r,&aleps))+ /* allocate */
- mx(ioffsetof(struct eps,stack)+SZ(eps*),SZ(aligar))))) /* the memory */
- return REG_ESPACE;
- p=(uchar*)pattern;preg->re_nsub=0; /* back to the start of the source */
- if(!por(VS epso(s,i))) /* now compile it for real */
- error(VS REG_EPAREN);
- }
- (Ceps(preg->re_finpoint=r))->opc=OPC_FIN; /* add end */
- #ifndef RE_SURE_MINIMAL
- r->stack=0;
- for(r=s;;s=skiplen(s)) /* simplify the compiled code (i.e. */
- { switch(s->opc) /* take out cyclic epsilon references) */
- { case OPC_EPS:p=(uchar*)s;fillout(VS &s); /* check tree */
- default:continue;
- case OPC_FIN:; /* finished */
- }
- break;
- }
- #endif
- cflags|=REG_SIMPLE;
- #ifndef RE_DUMB_ANCHOR
- ;{ register struct eps*stack;
- stack=0;s=r;goto nxt;
- do
- for(s=stack->spawn,stack=stack->stack;;)
- nxt: { switch(s->opc)
- { case OPC_EPS:s->stack=stack;s=(stack=s)->next;continue;
- #ifndef RE_pNOSUB
- case OPC_GOP:case OPC_GCL:s=s->stack;continue;
- #endif
- case OPC_BOL:Cc(s,pos1.wh_)=0; /* found a real OPC_BOL */
- } /* anything else is irrelevant (prune the search to the top) */
- break;
- }
- while(stack);
- }
- for(s=r;;s=skiplen(s)) /* search through it linearly */
- { switch(s->opc)
- { case OPC_EOL:
- { register struct eps*t,*stack;
- stack=0;t=s->stack;goto nxt1;
- do
- for(t=stack->spawn,stack=stack->stack;;)
- nxt1: { switch(t->opc)
- { case OPC_EPS:t->stack=stack;t=(stack=t)->next;continue;
- #ifndef RE_pNOSUB
- case OPC_GOP:case OPC_GCL:t=t->stack;continue;
- #endif
- case OPC_FIN:goto eol_ok; /* ok, reached the end */
- } /* not the end, look elsewhere */
- break;
- }
- while(stack);
- s->opc=C_EOL; /* so it turned out to be a fake OPC_EOL */
- err_anch:
- #ifndef RE_nERROR_DETECT
- p=(void*)Cc(s,pos1.wh_); /* adjust the source pointer */
- #ifndef RE_pEXTENDED
- if(cflags®_EXTENDED)
- #endif
- #endif
- error(VS REG_BADRPT); /* so that the error is at the right */
- continue; /* offset */
- }
- case OPC_BOL:
- if(Cc(s,pos1.wh_)) /* fake OPC_BOL? */
- { s->opc=C_BOL;goto err_anch; /* yep, convert it */
- }
- #ifndef RE_pNOSUB
- case OPC_GOP:case OPC_GCL:
- #endif
- eol_ok: cflags&=~REG_SIMPLE;
- default:continue;
- case OPC_FIN:; /* end of the line, *stop* */
- }
- break;
- }
- #else /* RE_DUMB_ANCHOR */
- for(s=r;;s=skiplen(s)) /* search through it linearly */
- { switch(s->opc)
- { case OPC_FIN:break;
- case OPC_EOL:case OPC_BOL:case OPC_GOP:case OPC_GCL:
- cflags&=~REG_SIMPLE;continue;
- }
- break;
- }
- #endif /* RE_DUMB_ANCHOR */
- if(errorno) /* did we detect any error? */
- { preg->re_erroffset=(const char*)preg->re_es.re_stack-pattern;
- preg->re_flags=cflags|REG_ERROR;
- }
- #ifndef RE_EXEC_ERROR
- else
- #endif
- initjtable(VSO);
- return errorno; /* phew, compilation finished */
- }
-
- #ifdef RE_nCASE_TABLE
- #define CHECKCASE(i) (ign_case&&TO_LOWER(i))
- #else
- #define CHECKCASE(i) ((i)=transcase[i])
- #endif
-
- static struct eps*cleantail(start,code,thiss,th1)const uchar*const start;
- const struct eps*const code;struct eps*thiss;const unsigned th1;
- { register struct eps**t;
- while(thiss!=code) /* not at the end of this pc-stack */
- if(PCps(t,thiss,th1)>start) /* did it start later? */
- thiss= *t,*t=0; /* yep, so throw it out */
- else /* no, proceed carefully */
- { register struct eps**s;
- while(*t!=code) /* not at the end of this pc-stack */
- if(PCps(s,*t,th1)>start) /* did it start later? */
- *t= *s,*s=0; /* yep, so throw it out */
- else
- t=s; /* no, proceed */
- break;
- }
- return thiss;
- }
-
- #ifdef RE_STRLEN
- regexec(preg,string,length,nmatch,pmatch,eflags)/*const*/regex_t*preg;
- const char*const string;size_t length;size_t nmatch;regmatch_t pmatch[];
- #else
- regexec(preg,string,nmatch,pmatch,eflags)/*const*/regex_t*preg;
- const char*const string;size_t nmatch;regmatch_t pmatch[];
- #endif /* RE_STRLEN */
- { register struct eps*code;const uchar*founds,*founde=0;
- #ifdef RE_nCASE_TABLE
- unsigned ign_case;
- #else
- const uchar*transcase;
- #endif
- #ifdef RE_STRLEN
- size_t len;
- #endif
- ;{ register struct eps*other,*thiss;unsigned th1,ot1;const uchar*str;
- struct eps*initstack,*initcode;struct mchar virteol;
- virteol.next1_=preg->re_finpoint;virteol.p1_.st_=virteol.p2_.st_=0;
- #ifdef RE_nCASE_TABLE
- ign_case=(eflags|=preg->re_flags)®_ICASE;
- #else
- transcase=(eflags|=preg->re_flags)®_ICASE?igncase:bothcase;
- #endif
- if((code=preg->re_compiled)->opc==OPC_EPS)
- initcode=(initstack=code)->next; /* epsilon at the start */
- else
- initcode=code,initstack=0; /* regular at the start */
- th1=ioffsetof(struct chclass,pos1);ot1=ioffsetof(struct chclass,pos2);
- /*
- * Possible ANSI violation here. The assumption is that:
- * (struct eps*)malloc(sizeof(struct eps))-1!=0
- */
- thiss=other= --code;str=(uchar*)string-1; /* init pc-stack pointers */
- #ifdef RE_STRLEN
- founds=(uchar*)string+2+(len=length); /* saves us a compare later */
- #else
- founds=0; /* there is no other way in this case */
- #endif
- ;{ register struct eps*s,*stack; /* proper pre-conditioning */
- stack=initstack;s=initcode;goto nxt;
- do /* make sure \n does not match BOL */
- for(s=stack->spawn,stack=stack->stack;;)
- nxt: { switch(s->opc)
- { case OPC_EPS:s->stack=stack;s=(stack=s)->next;continue;
- #ifndef RE_pNOSUB
- case OPC_GOP:case OPC_GCL:s=s->stack;continue;
- #endif
- case OPC_FIN: /* special case */
- #ifndef RE_COMMON_SUBXP
- { register char*c;
- if(!Pci(&virteol,ot1)) /* state not yet pushed */
- Pc()=other,other=Ceps&virteol,Pcp()=string;
- }
- #else /* RE_COMMON_SUBXP */
- if(!PC(&virteol,ot1)) /* state not yet pushed */
- { PC(&virteol,ot1)=other;
- PCp(other=Ceps&virteol,ot1)=string;
- }
- #endif /* RE_COMMON_SUBXP */
- break;
- case OPC_BOL:
- if(!(eflags®_NOTBOL))
- #ifndef RE_COMMON_SUBXP
- { register char*c;
- if(!Pci(s,ot1)) /* state not yet pushed */
- Pc()=other,other=s,Pcp()=str;
- }
- #else /* RE_COMMON_SUBXP */
- if(!PC(s,ot1)) /* state not yet pushed */
- PC(s,ot1)=other,PCp(other=s,ot1)=str;
- #endif /* RE_COMMON_SUBXP */
- }
- break;
- }
- while(stack);
- }
- #ifdef RE_STRLEN
- if(len)
- #endif
- { register struct eps*s,*stack;const uchar*start;unsigned i;
- #ifdef RE_nJUMP_TABLE
- bit_type*jt;
- #else
- jt_type*jt;unsigned maxjump=preg->re_maxjump;
- #endif
- jt=Jtable(preg->re_finpoint);
- #ifndef RE_STRLEN
- while(i= *++str)
- {
- #else
- do
- { i= *++str; /* get the next real-text character */
- #endif /* switch this & other pc-stack */
- CHECKCASE(i);th1^=XOR1;ot1^=XOR1;thiss=other;other=code;
- stack=initstack;
- if(s=initcode)
- { if(thiss==code) /* automaton idle? */
- #ifndef RE_nJUMP_TABLE
- if(maxjump) /* can we jump at all? */
- { register unsigned jump=maxjump;
- register const uchar*jstr=str;
- shortcut: do
- #ifdef RE_STRLEN
- { if(jump>=len) /* want to jump over the edge? */
- { if(jump!=len||jt['\n']) /* virtual edge? */
- goto ret; /* no, the real one */
- jstr+=jump;jump=0;break; /* might be virtual */
- }
- len-=jump; /* make the jump */
- #ifdef RE_ABORT_EXPR
- if(RE_ABORT_EXPR)
- goto ret;
- #endif
- }
- while(jump=(jt[*(jstr+=jump)])); /* determine new jump */
- #else /* RE_STRLEN */
- #ifdef RE_MEMCHR
- {
- switch(jump)
- { case 2:
- if(!jstr[1])
- goto ret;
- break;
- default:
- if(memchr(jstr+1,'\0',(size_t)jump-1))
- goto ret;
- case 1:;
- }
- #else
- { while(--jump) /* jump in little steps */
- if(!*++jstr) /* so as not to overlook the \0 */
- goto ret;
- #endif /* RE_nMEMCHR */
- #ifdef RE_ABORT_EXPR
- if(RE_ABORT_EXPR)
- goto ret;
- #endif
- }
- #ifdef RE_MEMCHR
- while(jump=jt[*(jstr+=jump)]);
- #else
- while(jump=jt[*++jstr]);
- #endif
- if(!*jstr&&jt['\n']) /* virtual \0 ? */
- goto ret;
- #endif /* RE_STRLEN */
- do /* go back on our steps */
- if(jt[*--jstr]>++jump) /* impossible match? */
- { jump=jt[*jstr];goto shortcut; /* jump forward */
- }
- while(jump!=maxjump); /* not at target position */
- i= *(str=jstr); /* load and run */
- }
- else
- #define JT_TEST(jt,i) (jt[i]) /* !maxjump, so misuse jt as a bitmap */
- #else /* RE_nJUMP_TABLE */
- #define JT_TEST(jt,i) (!bit_test(jt,i))
- #endif /* RE_nJUMP_TABLE */
- if(JT_TEST(jt,i))
- { register const uchar*jstr=str;
- do
- {
- #ifdef RE_STRLEN
- if(!--len)
- { len++;break; /* give `$' a chance */
- }
- #endif
- #ifdef RE_ABORT_EXPR
- if(RE_ABORT_EXPR)
- goto ret;
- #endif
- }
- while(jstr++,JT_TEST(jt,*jstr));
- #ifndef RE_STRLEN
- if(!(i= *(str=jstr)))
- i=*--str; /* give `$' a chance */
- #else
- i= *(str=jstr); /* load and run */
- #endif
- }
- start=str;goto nostack;
- }
- else if(thiss==code)
- goto foundit;
- do /* pop next entry off this pc-stack */
- #ifndef RE_COMMON_SUBXP
- { s=thiss->stack;
- ;{ register char*c;
- thiss=Pci(thiss,th1);Pc()=0;start=Pcp();
- }
- #else /* RE_COMMON_SUBXP */
- { thiss=PC(s=thiss,th1);PC(s,th1)=0;start=PCp(s,th1);s=s->stack;
- #endif /* RE_COMMON_SUBXP */
- goto nostack;
- do /* pop next entry off the epsilon-stack */
- for(s=stack->spawn,stack=stack->stack;;)
- nostack: { switch(s->opc-OPB)
- { default:
- if(i==s->opc) /* regular character match */
- goto yep;
- break;
- case OPC_BOL-OPB:
- if(i=='\n') /* reset the start pointer */
- #ifndef RE_COMMON_SUBXP
- { register char*c;
- if(!Pci(s,ot1)) /* state not yet pushed */
- Pc()=other,other=s;
- Pcp()=str;
- #else /* RE_COMMON_SUBXP */
- { if(!PC(s,ot1)) /* state not yet pushed */
- PC(s,ot1)=other,other=s;
- PCp(s,ot1)=str;
- #endif /* RE_COMMON_SUBXP */
- }
- break;
- case OPC_EOL-OPB:
- if(i=='\n') /* only allow OPC_FIN to follow */
- { s=Ceps&virteol;goto yep; /* this one */
- }
- break; /* push spawned branch on the work-stack */
- case OPC_EPS-OPB:s->stack=stack;s=(stack=s)->next;
- continue;
- #ifndef RE_pNOSUB
- case OPC_GOP-OPB:case OPC_GCL-OPB:s=s->stack;continue;
- #endif
- case OPC_FIN-OPB: /* hurray! complete match */
- #ifdef RE_STRLEN
- if(start<=founds) /* will always match once */
- #else
- if(start<=founds||!founds) /* extra compare */
- #endif
- { founde=str; /* one past the start */
- if(!nmatch)
- goto ret; /* only report success */
- thiss=cleantail(founds=start,code,thiss,th1);
- other=cleantail(start,code,other,ot1);
- initcode=initstack=0; /* don't start anything */
- } /* new */
- break;
- case OPC_CLASS-OPB:
- if(bit_test(Cc(s,c),i))
- goto yep; /* character in class */
- break;
- case OPC_DOT-OPB: /* dot-wildcard */
- #ifndef RE_nNEWLINE
- #ifdef RE_pNEWLINE
- if(i!='\n')
- #else
- if(i!='\n'||!(eflags®_NEWLINE))
- #endif
- #endif
- #ifndef RE_COMMON_SUBXP
- { register char*c;
- yep: if(!Pci(s,ot1)) /* state not yet pushed */
- { Pc()=other;other=s;Pcp();
- earlier: PcP()=start;
- }
- else if((uchar*)Pcp()>start)
- goto earlier;
- }
- #else /* RE_COMMON_SUBXP */
- yep: if(!PC(s,ot1)) /* state not yet pushed */
- { PC(s,ot1)=other;other=s;
- earlier: PCp(s,ot1)=start;
- } /* is it not the earliest match? */
- else if((uchar*)PCp(s,ot1)>start)
- goto earlier;
- #endif /* RE_COMMON_SUBXP */
- } /* push location onto other pc-stack */
- break;
- }
- while(stack); /* the work-stack is not empty */
- }
- while(thiss!=code); /* this pc-stack is not empty */
- #ifdef RE_ABORT_EXPR
- if(RE_ABORT_EXPR)
- goto ret;
- #endif
- }
- #ifdef RE_STRLEN
- while(--len); /* still text to search */
- }
- str++; /* one past the end */
- #else
- } /* out of text */
- #endif /* proper post-conditioning */
- ;{ register struct eps*t,*s,*stack;unsigned i,no_eol;
- for(t=other,no_eol=eflags®_NOTEOL;t!=code;t=PC(t,ot1))
- { s=t->stack;i=0;stack=0;goto nxt2;
- do
- for(s=stack->spawn,stack=stack->stack;;)
- nxt2: { switch(s->opc)
- { case OPC_EPS:s->stack=stack;s=(stack=s)->next;continue;
- case OPC_EOL:
- if(no_eol) /* can we match this EOL? */
- break;
- i=1; /* skip one further in that case */
- #ifndef RE_pNOSUB
- case OPC_GOP:case OPC_GCL:
- #endif
- s=s->stack;continue;
- case OPC_FIN:
- { const uchar*start;
- #ifdef RE_STRLEN
- if((start=PCp(t,ot1))<=founds)
- #else
- if((start=PCp(t,ot1))<=founds||!founds)
- #endif /* assign match + offset i */
- founds=start,founde=str+i;
- /*
- * Possible ANSI violation here.
- * The assumption is that `founde' can be
- * assigned a value that is two elements
- * past the last element of `string'.
- */
- }
- }
- break;
- }
- while(stack);
- }
- }
- ret:;
- #ifndef RE_nREUSABLE
- do
- { register struct eps*t;
- while(thiss!=code) /* cleanup this pc-stack */
- thiss=PC(t=thiss,th1),PC(t,th1)=0;
- thiss=other; /* and the other one */
- }
- while((th1^=XOR1)==ot1);
- #endif
- }
- #ifdef RE_ABORT_EXPR
- if(RE_ABORT_EXPR)
- return REG_ABORT;
- #endif
- foundit:
- if(!nmatch||!founde&&eflags®_NOSUB)
- goto retmatch;
- ;{ unsigned lsub=nmatch;
- while(--lsub) /* clear pmatch array first */
- pmatch[lsub].rm_ep=0;
- }
- #define for_CLEAN_PMATCH(nmatch,lsub) \
- for(;;)\
- if(nmatch<=lsub||!pmatch[lsub].rm_ep)\
- lsub--;\
- else if(lsub&&pmatch[lsub].rm_sp>=(char*)str)\
- pmatch[lsub--].rm_ep=0;\
- else
- #define PMATCH_so(str) \
- if(nmatch>(lsub=Go(s,gopp.nr)))\
- pmatch[lsub].rm_sp=(char*)str
- #define PMATCH_eo(str) \
- if(nmatch>Gc(s,gopp.nr))\
- pmatch[Gc(s,gopp.nr)].rm_ep=(char*)str
- /* any match at all? spare us the details? */
- if((pmatch->rm_ep=(char*)founde)&&
- (pmatch->rm_sp=(char*)founds,!(eflags®_SIMPLE)))
- { unsigned sol,eol; /* second automaton, for filling pmatch */
- ;{ struct evoi*cstp,*stp,*estp;register struct eps*s;const uchar*str,*end;
- unsigned i,no_eol;
- #ifndef RE_pNOSUB
- unsigned lsub=0;
- #endif
- #ifdef RE_EXEC_ERROR
- if(eflags®_ERROR)
- { preg->re_flags=eflags&~REG_ERROR;goto newstack;
- }
- #endif
- if(stp=preg->re_es.re_stack)
- estp=(struct evoi*)stp->wh_;
- else
- newstack: estp=STARTSTACK+(stp=reg_malloc(STARTSTACK*SZ(evoi)));
- cstp=stp;s=code+1;end=founde--;
- #ifdef RE_STRLEN
- if((char*)end>string+length)
- end--; /* limit end to the non-virtual area */
- no_eol=(char*)end<string+length&&*end!='\n';
- #else
- if((char*)end>string&&!end[-1])
- end--; /* limit end to the non-virtual area */
- no_eol= *end!='\n'&&*end;
- #endif /* upcoming preincrement, so backup */
- str=founds-1;eol=0;
- if((char*)founds<string) /* match before the start of string? */
- { str++;goto virtual_newline;
- }
- for(sol=0;;s=s->stack) /* update the pc to the next */
- { if(++str<end) /* inside our match? */
- goon: { i= *str; /* read character from stream */
- redo: switch(s->opc-OPB) /* switch to the current opcode */
- { default:case OPC_FIN-OPB:
- CHECKCASE(i);
- if(i==s->opc) /* compare a regular character */
- continue; /* a match, simply go to the next */
- break;
- case OPC_DOT-OPB:
- #ifndef RE_nNEWLINE
- #ifdef RE_pNEWLINE
- if(i!='\n')
- #else
- if(i!='\n'||!(eflags®_NEWLINE))
- #endif
- #endif
- continue;
- break;
- case OPC_CLASS-OPB:
- if(bit_test(Cc(s,c),i))
- continue;
- break;
- case OPC_EPS-OPB: /* push(str,s->spawn); */
- if(estp==cstp)
- { size_t t;
- if(!(cstp=reg_realloc(stp,
- t=dif(cstp,stp)+INCSTACK*SZ(evoi))))
- { eflags|=REG_LOWMEM|REG_OUTOFMEM;goto complete_match;
- }
- stp=cstp;cstp=(estp=(struct evoi*)geno(stp,t))-INCSTACK;
- } /* save the junction on the stack, pick one branch */
- cstp->wh_=str;cstp++->st_=s->spawn;s=s->next;goto redo;
- #ifndef RE_pNOSUB
- case OPC_GOP-OPB:PMATCH_so(str);s=s->stack;goto redo;
- case OPC_GCL-OPB:PMATCH_eo(str);s=s->stack;goto redo;
- #endif
- case OPC_EOL-OPB:
- if(i=='\n'&&str==founde)
- { eol=1;continue;
- }
- break;
- case OPC_BOL-OPB:
- if(i=='\n'&&str==founds)
- { sol=1;continue;
- }
- }
- }
- else
- { register struct eps*stack=0;
- goto nxt3; /* proper post-conditioning again */
- do
- for(s=stack->spawn,stack=stack->stack;;s=s->stack)
- nxt3: { switch(s->opc)
- { case OPC_EPS:s->stack=stack;s=(stack=s)->next;
- goto nxt3;
- case OPC_EOL:
- if(no_eol)
- break;
- continue;
- #ifndef RE_pNOSUB
- case OPC_GOP:PMATCH_so(str);continue;
- case OPC_GCL:PMATCH_eo(str);continue;
- #endif
- case OPC_FIN:goto complete_match; /* yahoe! */
- }
- #ifndef RE_pNOSUB
- for_CLEAN_PMATCH(nmatch,lsub)
- break;
- #endif
- break;
- }
- while(stack);
- }
- str=(--cstp)->wh_;s=cstp->st_; /* pop(str,s); */
- #ifndef RE_pNOSUB
- for_CLEAN_PMATCH(nmatch,lsub)
- #endif
- if((char*)str<string)
- virtual_newline:
- { i='\n';goto redo;
- }
- else
- { if(str<founde)
- eol=0;
- if(str==founds)
- sol=0;
- goto goon;
- }
- }
- complete_match: /* free the runtime stack or prepare it for later */
- preg->re_es.re_stack=eflags®_LOWMEM?
- (reg_free(stp),(struct evoi*)0):(stp->wh_=estp,stp);
- pmatch->rm_ep=(char*)str; /* mark the end of the match */
- }
- if(sol) /* adjust the starting point? (due to a `^') */
- { unsigned lsub=nmatch;
- do
- if(pmatch[--lsub].rm_sp==(char*)founds)
- pmatch[lsub].rm_sp=(char*)founds+1; /* shift it right */
- while(lsub);
- }
- if(eol) /* adjust the ending point? (due to a `$') */
- { unsigned lsub=nmatch;
- do
- if(pmatch[--lsub].rm_ep>(char*)founde)
- pmatch[lsub].rm_ep=(char*)founde; /* shift it left */
- while(lsub);
- }
- }
- ;{ unsigned lsub=nmatch;
- do
- if(!pmatch[--lsub].rm_ep) /* no match on this group? */
- #ifdef RE_SUBSTR_PTR
- pmatch[lsub].rm_sp=0; /* then wipe out the start as well */
- #else
- pmatch[lsub].rm_so=pmatch[lsub].rm_eo= -1; /* same, with offsets */
- else
- { pmatch[lsub].rm_so=(const char*)pmatch[lsub].rm_sp-string;
- pmatch[lsub].rm_eo=(const char*)pmatch[lsub].rm_ep-string;
- }
- #endif
- while(lsub);
- }
- retmatch:
- return eflags®_OUTOFMEM?REG_ESPACE:founde?0:REG_NOMATCH;
- }
-
- #ifdef RE_COPY
- #define cp_s(memb) (dest->memb=src->memb)
- #define cp_e(memb) (d->memb=epso(d,dif(s->memb,s)))
-
- static void copyclass(to,from)bit_type*to;const bit_type*from;
- { register unsigned i=bit_mindex(OPB);
- while(*to++= *from++,--i);
- }
-
- regcopy(dest,src)regex_t*const dest;const regex_t*const src;
- { register struct eps*s,*d;
- cp_s(re_nsub);cp_s(re_flags); /* clone the easy parts */
- #ifndef RE_nJUMP_TABLE
- cp_s(re_maxjump);
- #endif
- dest->re_compiled=0;
- if(src->re_flags®_ERROR)
- cp_s(re_erroffset);
- else
- dest->re_es.re_stack=0;
- if(!(s=src->re_compiled)) /* hmmm..., missing source regexp */
- return REG_BADPAT; /* allocate some space */
- if(!(d=dest->re_compiled=reg_malloc(dif(src->re_finpoint,s)+
- mx(ioffsetof(struct eps,stack)+SZ(eps*),SZ(aligar)))))
- return REG_ESPACE; /* oops */
- for(;;d=skiplen(d),s=skiplen(s)) /* step through them */
- { switch((d->opc=s->opc)-OPB) /* copying opcodes */
- { case OPC_EPS-OPB:cp_e(spawn);cp_e(next);continue;
- case OPC_FIN-OPB:
- #ifdef RE_nJUMP_TABLE
- copyclass(Jtable(dest->re_finpoint=d),Jtable(s));
- #else
- ;{ register jt_type*to;register const jt_type*from;
- register unsigned i;
- i=OPB;to=Jtable(dest->re_finpoint=d);from=Jtable(s);
- while(*to++= *from++,--i);
- }
- #endif
- return 0;
- #ifndef RE_nNOSUB
- case OPC_GOP-OPB:
- #ifndef gcl
- Go(d,gopp.nr)=Go(s,gopp.nr);break;
- #endif
- case OPC_GCL-OPB:
- Gc(d,gopp.nr)=Gc(s,gopp.nr);break;
- #endif /* RE_nNOSUB */
- case OPC_CLASS-OPB:copyclass(Cc(d,c),Cc(s,c));
- default:case OPC_DOT-OPB:case OPC_BOL-OPB:case OPC_EOL-OPB:
- initsimp(d,d);
- }
- cp_e(stack); /* simple next pointers */
- }
- }
- #endif /* RE_COPY */
-
- void regfree(rg)regex_t*rg;
- { if(rg->re_compiled)
- { reg_free(rg->re_compiled);rg->re_compiled=0;
- if(!(rg->re_flags®_ERROR)&&rg->re_es.re_stack)
- reg_free(rg->re_es.re_stack),rg->re_es.re_stack=0;
- }
- }
-
- #ifndef RE_nERROR_REPORT
- static size_t catlim(dest,src,lim)register char*dest;register const char*src;
- register size_t lim;
- { size_t len=strlen(src);
- while(lim&&*dest) /* search for the end of dest */
- dest++,lim--;
- if(lim) /* still space left in buffer */
- { while(--lim&&(*dest++= *src++)); /* append src */
- *dest='\0'; /* make sure it is properly terminated */
- }
- return len; /* merely a counting service */
- }
-
- size_t regerror(errcode,preg,errbuf,errbuf_size)const regex_t*const preg;
- char*const errbuf;size_t errbuf_size;
- { size_t needed,i;
- static const char*const error_msg[]=
- {"Failed to match","Invalid regular expression",
- "Invalid collating element","Invalid character class type","Trailing \\",
- "Number in \\digit invalid","[ ] imbalance","( ) imbalance",
- "{ } imbalance","Content of { } invalid",
- "Invalid endpoint in range expression","Out of memory",
- "Inappropriate use of magic character",
- #ifdef RE_ABORT_EXPR
- "Search aborted"
- #endif
- },atoffset[]=" at offset: ";
- if(errbuf_size) /* any target at all? */
- *errbuf='\0'; /* clear out the target */
- needed=1; /* for the omnipresent terminating \0 */
- if(--errcode>=0&&errcode<=maxindex(error_msg))
- { needed+=catlim(errbuf,error_msg[errcode],errbuf_size); /* regular msg */
- if(errcode&&preg&&(preg->re_flags&(REG_NOOFFSET|REG_ERROR))==REG_ERROR)
- { needed+=catlim(errbuf,atoffset,errbuf_size); /* at offset */
- for(errcode=0,i=preg->re_erroffset;errcode++,i/=10;);
- needed+=errcode;errcode=1;i=preg->re_erroffset;
- do /* fill in the digits */
- if(needed-++errcode<errbuf_size)
- errbuf[needed-errcode]=hex2num[i%10]; /* coding it like this */
- while(i/=10); /* avoids linking in sprintf() */
- errbuf[(needed<=errbuf_size?needed:errbuf_size)-1]='\0';
- }
- }
- return needed;
- }
- #endif /* RE_nERROR_REPORT */
-
- /*
- * It might be hard to believe, but I generally dislike conditionally
- * compiled code (especially when it is done to get the code working
- * on vastly distinct architectures).
- *
- * However, you might have noticed (I guess you could hardly have missed
- * it :-) that I made an exception in this library. I did it because:
- * 1. The conditional compilation constructs in this library have
- * nothing to do with portability issues.
- * 2. This seemed to be the only way to have a relatively compact
- * library, yet easily customisable to your personal taste.
- *
- * But, I hope I never have to program/debug code like this in the
- * future. It comes close to a programmer's worst nightmare :-).
- */
-