home *** CD-ROM | disk | FTP | other *** search
Text File | 1994-08-18 | 82.9 KB | 3,118 lines |
- Newsgroups: comp.sources.unix
- From: Ed.Falk@Eng.Sun.COM (Ed Falk)
- Subject: v28i114: sortmail - kill files and more for your mail, Part01/01
- Message-id: <1.777230853.7352@gw.home.vix.com>
- Sender: unix-sources-moderator@gw.home.vix.com
- Approved: vixie@gw.home.vix.com
-
- Submitted-By: Ed.Falk@Eng.Sun.COM (Ed Falk)
- Posting-Number: Volume 28, Issue 114
- Archive-Name: sortmail/part01
-
- Hi; I post this program because I believe it is the best one I've
- seen (or I wouldn't have written it.) It's written in C rather than
- shell scripts, and runs quite fast.
-
- Overview: sortmail(1) examines incoming mail and sorts it for you based
- on all or part of the header, or even the entire message.
-
- Messages can be filed to your mailbox, to a specific mail folder,
- junked, forwarded to another address or piped through sh(1).
-
- The format of the $HOME/.sortmailrc file is similar to an rn KILL file:
-
- set default=+other
- /MAILER-DAEMON/f:+bounces
- /falk/t:m
- /scuba/s:+scuba
- /marko/f:j
- /^Precedence: junk/h:+other
-
- #! /bin/sh
- # this is a shell archive, meaning:
- # 1. Remove everything above the #! /bin/sh line
- # 2. Save the resulting text in a file.
- # 3. Execute the file with /bin/sh to create the files:
- # sortmail.1
- # sample.forward
- # sample.sortmailrc
- # Makefile
- # sortmail.h
- # regex.h
- # sortmail.c
- # parse.c
- # regex.c
- # This archive created: Wed Aug 17 13:42:30 PDT 1994 by falk
- #
- #
- export PATH; PATH=/bin:$PATH
- #
-
- if test -f sortmail.1
- then
- echo shar: will not over-write existing file sortmail.1
- else
- echo shar: extracting 'sortmail.1', 7787 characters
- sed 's/^X//' > sortmail.1 <<'SHAR_EOF'
- X.\" %Z%%M% %I% %E%; Copyright (c) 1990 - Sun Microsystems
- X.\"
- X.TH SORTMAIL 1 "21 Apr 1991"
- X.SH NAME
- Xsortmail \- classify incoming mail
- X.SH SYNOPSIS
- X.B sortmail
- X[
- X.B \-v
- X] [
- X.B \-home
- X.I path
- X] [
- X.B \-mailbox
- X.I path
- X] [
- X.B \-mailrc
- X.I initfile
- X] [
- X.B \-sortmailrc
- X.I initfile
- X]
- X.I username
- X.SH DESCRIPTION
- XCreate this \fB.forward\fP file in your home directory:
- X
- X "| /\fIpath\fP/sortmail \fIuser\fP"
- X
- XWhere "/\fIpath\fP/sortmail" is the full path where you installed
- Xsortmail, and \fIuser\fP is your own userid. The userid must be
- Xspecified because when mail arrives \fBsortmail\fP could be run as
- Xroot, daemon, or any number of other id's.
- X
- XOnce your \fB.forward\fP file is set up,
- X.B sortmail
- Xwill classify incoming mail according to the patterns in $HOME/.sortmailrc.
- XYour \fB.sortmailrc\fP file is similar to a news KILL file, but somewhat
- Xmore powerful. You can discard mail, have it delivered to your mailbox,
- Xhave it filed into a folder, forward it to another address
- Xor even pipe it through a shell command.
- X
- XWhen \fBsortmail\fP starts up, it first reads your \fB.mailrc\fP file to
- Xfind the value of mail(1) variables that are also used by \fBsortmail\fP.
- XIt then reads your \fB.sortmailrc\fP file for additional variable settings,
- Xif any, and for search patterns. (See below.)
- X
- X.SH OPTIONS
- X.TP 15
- X.B \-v
- XVerbose. A message is printed on /dev/console for every mail message. A
- Xsecond
- X.B \-v
- Xcauses a lot more information to be printed.
- X.TP 15
- X\fB\-home\fP \fIpath\fP
- XSet user's home directory, overriding the default taken from the user's
- Xpasswd entry.
- X.TP 15
- X\fB\-mailbox\fP \fIpath\fP
- XSet the user's system mailbox, overriding the default of
- X\fB/usr/spool/mail/\fP\fIuser\fP.
- X.TP 15
- X\fB\-mailrc\fP \fIpath\fP
- XSet the user's .mailrc file, overriding the default of
- X\fI~user/\fP\fB.mailrc\fP.
- X.TP 15
- X\fB\-sortmailrc\fP \fIpath\fP
- XSet the user's .sortmailrc file, overriding the default of
- X\fI~user/\fP\fB.sortmailrc\fP.
- X
- X.SH SORTMAILRC FORMAT
- X
- XYour .sortmailrc file is a series of lines in the form
- X
- X \fBset\fP \fIvariable=value\fP
- X
- Xand search patterns take the form
- X
- X /\fIregular-expression\fP/\fImodifier\fP:\fIcommand\fP
- X
- Xwhere \fIregular-expression\fP is any \fBed(1)\fP-style regular
- Xexpression, \fImodifier\fP is one of \fBt, f, s, h, a\fP, and
- X\fIcommand\fP is one of \fBm, j, v, f\fP\ \fIfile\fP, \fB+\fP\fIfile\fP,
- Xor \fB|\fP\ \fIcommand\fP.
- X
- X.SH MODIFIERS
- X.LP
- XThese modifiers affect how the regular expression is applied to the
- Xincoming mail. The default is 's'.
- X.TP 6
- X.B s
- XTest the "Subject:\ " line of the incoming mail against the regular expression.
- X.TP 6
- X.B t
- XTest the "To:\ ", "Cc:\ " and "Apparently-To:\ " lines of the incoming
- Xmail against the regular expression.
- X.TP 6
- X.B f
- XTest the "From:\ " line of the incoming mail against the regular expression.
- X.TP 6
- X.B h
- XTest the entire header of the incoming mail against the regular expression.
- X.TP 6
- X.B a
- XTest the entire incoming mail message against the regular expression.
- X
- X.SH COMMANDS
- X.TP 10
- X.B m
- XSend the message to the user's mailbox.
- X.TP
- X\fBm\fP \fIaddress\fP
- XForward the mail to the specified address.
- X.TP 10
- X.B j
- XDelete the message ("junk" it.)
- X.TP 10
- X\fBf\fP \fIfolder\fP
- XSave the message in the given mail folder. \fIfolder\fP may be in
- Xthe formats ~/\fIpath\fP, /\fIabspath\fP, ~\fIuser/path\fP, or
- X+\fIname\fP. The latter form expands to ~/\fIfolder/name\fP where
- X\fIfolder\fP is the value specified for the \fBfolder\fP variable
- X(default is "\fBfolders\fP".)
- X.TP 10
- X\fB+\fP\fIfolder\fP
- XShorthand for "\fBf +\fP\fIfolder\fP".
- X.TP 10
- X\fB|\fP \fIcommand\fP
- XPipe the mail message through the given \fIshell-command\fP.
- Xsh(1) is used.
- X
- X.SH VARIABLES
- X.LP
- X\fBsortmail\fP uses the following variables, which may be changed in
- Xyour \fB.mailrc\fP or \fB.sortmailrc\fP files. Some variables may also
- Xbe set on the command line.
- X.TP 10
- X.B user
- XThe user on behalf of whom \fBsortmail\fP is running. This value must
- Xbe specified on the command line, but may be changed later. It is used
- Xto determine the user's home directory, among other things.
- X.TP 10
- X.B home
- XThe user's home directory. Used to find intialization files and the
- Xuser's \fIfolders\fP directory. Default is ~\fIuser\fP.
- X.TP 10
- X.B mailbox
- XThe user's mail box. Default is \fB/usr/spool/mail/\fP\fIuser\fP.
- X.TP 10
- X.B mailrc
- XFull path of the user's \fB.mailrc\fP file. There's no real point in
- Xchanging this.
- X.TP 10
- X.B sortmailrc
- XFull path of the user's \fB.sortmailrc\fP file. There's no real point in
- Xchanging this.
- X.TP 10
- X.B mbox
- XFull path of the user's \fBmbox\fP file. Not currently used by \fBsortmail\fP.
- X.TP 10
- X.B folder
- XThe user's \fImail folders\fP directory. Mail folders are identified by the
- Xleading '+' in their name, and are stored in ~\fIuser/folder\fP/.
- XDefault is "\fBfolders\fP".
- X.TP 10
- X.B default
- XName of the file into which unclassifiable mail is sent. The default
- Xis \fImailbox\fP. Another reasonable value might be "\fB+other\fP".
- X.TP 10
- X.B vacation
- XIf set, mail to \fIuser\fP will also be piped through /usr/ucb/vacation.
- X.TP 10
- X.B logfilename
- XMessages printed with -v are sent to the specified log file. Default is
- X/dev/console, or (if /dev/console cannot be opened) stderr.
- X
- X.SH EXAMPLES
- X.LP
- XHere is a sample \fB.sortmailrc\fP file:
- X.LP
- X.RS
- X.nf
- X.ft B
- Xset default=+other
- X/MAILER-DAEMON/f:+bounces
- X/falk/t:m
- X/bldg8/t:m
- X/joe/f:m
- X/scuba@sun/t:+scuba
- X/scuba/s:+scuba
- X/mwicks/f:j
- X/homework/t:m falk@kestrel
- X/jim@apple/f:| /home/falk/bin/fixjim
- X/^Precedence: junk/h:+other
- X.ft
- X.fi
- X.RE
- X.LP
- XIn this example, the folder directory and other variables have whatever
- Xvalues were specified in \fB.mailrc\fP. Unclassifiable mail is sent to
- Xthe folder "+other". Mail from "MAILER-DAEMON" is sent to the folder
- X"+bounces". Mail to "falk" or "bldg8" or from my friend joe is sent
- Xdirectly to my mailbox. Mail to the scuba club or with "scuba" in the
- Xsubject line is sent to the "+scuba" folder. Mail from mwicks is
- Xthrown away without my ever seeing it. Mail to the "homework" alias
- Xis forwarded to my account on another machine.
- XMail from my friend jim, who
- Xformats his mail in a funny way is passed through a shell script which
- Xcleans up his messages and appends them to my mailbox. Mail messages
- Xwith "^Precedence:\ junk" anywhere in the header is filed in +other.
- X
- XNote that the patterns are applied in the order given; it is important,
- Xfor example, that the "MAILER-DAEMON" pattern precede the "falk" pattern
- Xso that mail from MAILER-DAEMON is filed in +bounces even if directed
- Xto me personally. On the other hand, mail from mwicks will be placed
- Xin my mailbox if addressed to me personally, otherwise it will
- Xbe junked.
- X
- X.SH NOTES
- XRemember that sortmail can be executed under any userid, depending on
- Xwho sent the mail, and whether or not it came from the local machine.
- XBecause of this, you cannot depend on any user environment to be available,
- Xespecially environment variables and path. All filenames and program
- Xnames should be specified as full paths, except that "~", "~user" and
- X"+folder" forms are understood. The permissions of sortmail and every
- Xdirectory along its path should be such that any user can run it.
- X
- XIf you pipe incoming mail through a program, that program should not
- Xgenerate any output to stdout or stderr whatsoever. If it does, that
- Xoutput will be sent back to the originator of the mail as if the mail
- Xhad bounced.
- X
- X.SH SEE ALSO
- Xed(1), mail(1)
- X.SH COPYRIGHT
- X Copyright (c) 1990 by Sun Microsystems, Inc.
- X Edward A. Falk (falk@sun.com)
- X
- XPermission to use, copy, modify, and distribute this software and its
- Xdocumentation for any purpose and without fee is hereby granted,
- Xprovided that the above copyright notice appear in all copies and that
- Xboth that copyright notice and this permission notice appear in
- Xsupporting documentation.
- X
- Xregex.[ch] is covered by the GNU copyleft.
- X
- SHAR_EOF
- len=`wc -c < sortmail.1`
- if test $len != 7787 ; then
- echo error: sortmail.1 was $len bytes long, should have been 7787
- fi
- fi # end of overwriting check
-
- if test -f sample.forward
- then
- echo shar: will not over-write existing file sample.forward
- else
- echo shar: extracting 'sample.forward', 41 characters
- sed 's/^X//' > sample.forward <<'SHAR_EOF'
- X"| /home/joeshmoe/bin/sortmail joeshmoe"
- SHAR_EOF
- len=`wc -c < sample.forward`
- if test $len != 41 ; then
- echo error: sample.forward was $len bytes long, should have been 41
- fi
- fi # end of overwriting check
-
- if test -f sample.sortmailrc
- then
- echo shar: will not over-write existing file sample.sortmailrc
- else
- echo shar: extracting 'sample.sortmailrc', 1067 characters
- sed 's/^X//' > sample.sortmailrc <<'SHAR_EOF'
- X# This is a sample .sortmailrc file.
- X#
- X# The destination for unclassifiable mail is the +other folder.
- X# The directory containg the folders is Mail
- X# vacation mode is not currently enabled
- X#
- X# all mail from Mailer-Daemon goes to the +bounces folder, otherwise
- X# all mail to me (joeshmoe) or to anybody in mail building, or from
- X# my boss fred goes directly to me.
- X# all mail from mwicks gets ignored.
- X# all mail from my friend jim who never formats his mail goes through
- X# my "fixjim" filter.
- X# all mail to the scuba club or with "scuba" in the subject goes into
- X# my +scuba folder.
- X# all mail to the testing alias is forwarded to my my account on another machine
- X# all mail with "Precedence: junk" in the header goes to +other.
- X# all unclassified mail goes to +other.
- X#
- Xset default=+other
- Xset folder='Mail'
- X#set vacation
- X/Mailer-Daemon/f:+bounces
- X/MAILER-DAEMON/f:+bounces
- X/joeshmoe/t:m
- X/Joeshmoe/t:m
- X/bldg18/t:m
- X/fred/f:m
- X/mwicks/f:j
- X/jim@apple/f:| /home/joeshmoe/bin/fixjim
- X/scuba@sun/t:+scuba
- X/scuba/s:+scuba
- X/testing/t:m joeshmoe@lab
- X/^Precedence: junk/h:+other
- SHAR_EOF
- len=`wc -c < sample.sortmailrc`
- if test $len != 1067 ; then
- echo error: sample.sortmailrc was $len bytes long, should have been 1067
- fi
- fi # end of overwriting check
-
- if test -f Makefile
- then
- echo shar: will not over-write existing file Makefile
- else
- echo shar: extracting 'Makefile', 460 characters
- sed 's/^X//' > Makefile <<'SHAR_EOF'
- X
- X#DBG = -g -DDEBUG
- XDBG = -O
- X
- X# SunOS 4.x
- X#CFLAGS = ${DBG}
- X#LDFLAGS = -n -Bdynamic -s
- X
- X# SunOS 5.x
- XCFLAGS = ${DBG} -DSVr4
- XLDFLAGS =
- X
- X#CC = gcc
- X
- XSRCS = sortmail.c parse.c regex.c
- X
- XHDRS = sortmail.h regex.h
- X
- XOBJS = $(SRCS:.c=.o)
- X
- XDOC = sortmail.1 sample.forward sample.sortmailrc
- X
- Xsortmail: $(OBJS)
- X $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIB)
- X
- Xarchive: Makefile $(HDRS) $(SRCS) $(DOC)
- X shar $(DOC) Makefile $(HDRS) $(SRCS) > archive
- X
- Xclean:
- X -rm *.o archive
- SHAR_EOF
- len=`wc -c < Makefile`
- if test $len != 460 ; then
- echo error: Makefile was $len bytes long, should have been 460
- fi
- fi # end of overwriting check
-
- if test -f sortmail.h
- then
- echo shar: will not over-write existing file sortmail.h
- else
- echo shar: extracting 'sortmail.h', 1038 characters
- sed 's/^X//' > sortmail.h <<'SHAR_EOF'
- X
- X
- X /* sortmail variables */
- X
- Xextern char *user ; /* userid */
- Xextern char *home ; /* user's $HOME */
- Xextern char *mailbox ; /* $MAIL, default = /var/spool/mail/$USER */
- Xextern char *mailrc ; /* $MAILRC, default = $HOME/.mailrc */
- Xextern char *sortmailrc ; /* initfile, default = $HOME/.sortmailrc */
- Xextern char *mbox ; /* MBOX, default = $HOME/mbox */
- Xextern char *deflt ; /* where unclasifiable mail goes */
- Xextern char *folder ; /* folder, default = $HOME/folders */
- Xextern char *vacation ; /* pipe through vacation(1) */
- Xextern int verbose ;
- Xextern char *logfilename ;
- Xextern FILE *logfile ;
- X
- X#define SPOOLDIR "/usr/spool/mail/"
- X#define MBOX "mbox"
- X#define FOLDER "folders"
- X
- Xtypedef struct patinfo {
- X struct patinfo *next ;
- X char *pattern ; /* pattern to match */
- X enum {P_SUBJECT, P_TO, P_FROM, P_HEADER, P_ALL} flags ;
- X enum {C_MAIL, C_FILE, C_PIPE, C_JUNK} command ;
- X char *dest ; /* destination, if any */
- X } PatInfo ;
- X
- Xextern PatInfo *patterns, *lastpat ;
- X
- X
- X#define strmatch(a,b) (!strncmp((a),(b),sizeof(b)-1))
- X
- SHAR_EOF
- len=`wc -c < sortmail.h`
- if test $len != 1038 ; then
- echo error: sortmail.h was $len bytes long, should have been 1038
- fi
- fi # end of overwriting check
-
- if test -f regex.h
- then
- echo shar: will not over-write existing file regex.h
- else
- echo shar: extracting 'regex.h', 8080 characters
- sed 's/^X//' > regex.h <<'SHAR_EOF'
- X/* Definitions for data structures callers pass the regex library.
- X Copyright (C) 1985 Free Software Foundation, Inc.
- X
- X This program is free software; you can redistribute it and/or modify
- X it under the terms of the GNU General Public License as published by
- X the Free Software Foundation; either version 1, or (at your option)
- X any later version.
- X
- X This program is distributed in the hope that it will be useful,
- X but WITHOUT ANY WARRANTY; without even the implied warranty of
- X MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- X GNU General Public License for more details.
- X
- X You should have received a copy of the GNU General Public License
- X along with this program; if not, write to the Free Software
- X Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- X
- XIn other words, you are welcome to use, share and improve this program.
- XYou are forbidden to forbid anyone else to use, share and improve
- Xwhat you give them. Help stamp out software-hoarding! */
- X
- X
- X/* Define number of parens for which we record the beginnings and ends.
- X This affects how much space the `struct re_registers' type takes up. */
- X#ifndef RE_NREGS
- X#define RE_NREGS 10
- X#endif
- X
- X/* These bits are used in the obscure_syntax variable to choose among
- X alternative regexp syntaxes. */
- X
- X/* 1 means plain parentheses serve as grouping, and backslash
- X parentheses are needed for literal searching.
- X 0 means backslash-parentheses are grouping, and plain parentheses
- X are for literal searching. */
- X#define RE_NO_BK_PARENS 1
- X
- X/* 1 means plain | serves as the "or"-operator, and \| is a literal.
- X 0 means \| serves as the "or"-operator, and | is a literal. */
- X#define RE_NO_BK_VBAR 2
- X
- X/* 0 means plain + or ? serves as an operator, and \+, \? are literals.
- X 1 means \+, \? are operators and plain +, ? are literals. */
- X#define RE_BK_PLUS_QM 4
- X
- X/* 1 means | binds tighter than ^ or $.
- X 0 means the contrary. */
- X#define RE_TIGHT_VBAR 8
- X
- X/* 1 means treat \n as an _OR operator
- X 0 means treat it as a normal character */
- X#define RE_NEWLINE_OR 16
- X
- X/* 0 means that a special characters (such as *, ^, and $) always have
- X their special meaning regardless of the surrounding context.
- X 1 means that special characters may act as normal characters in some
- X contexts. Specifically, this applies to:
- X ^ - only special at the beginning, or after ( or |
- X $ - only special at the end, or before ) or |
- X *, +, ? - only special when not after the beginning, (, or | */
- X#define RE_CONTEXT_INDEP_OPS 32
- X
- X/* Now define combinations of bits for the standard possibilities. */
- X#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
- X#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR)
- X#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
- X#define RE_SYNTAX_EMACS 0
- X
- X/* This data structure is used to represent a compiled pattern. */
- X
- Xstruct re_pattern_buffer
- X {
- X char *buffer; /* Space holding the compiled pattern commands. */
- X int allocated; /* Size of space that buffer points to */
- X int used; /* Length of portion of buffer actually occupied */
- X char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
- X /* re_search uses the fastmap, if there is one,
- X to skip quickly over totally implausible characters */
- X char *translate; /* Translate table to apply to all characters before comparing.
- X Or zero for no translation.
- X The translation is applied to a pattern when it is compiled
- X and to data when it is matched. */
- X char fastmap_accurate;
- X /* Set to zero when a new pattern is stored,
- X set to one when the fastmap is updated from it. */
- X char can_be_null; /* Set to one by compiling fastmap
- X if this pattern might match the null string.
- X It does not necessarily match the null string
- X in that case, but if this is zero, it cannot.
- X 2 as value means can match null string
- X but at end of range or before a character
- X listed in the fastmap. */
- X };
- X
- X/* Structure to store "register" contents data in.
- X
- X Pass the address of such a structure as an argument to re_match, etc.,
- X if you want this information back.
- X
- X start[i] and end[i] record the string matched by \( ... \) grouping i,
- X for i from 1 to RE_NREGS - 1.
- X start[0] and end[0] record the entire string matched. */
- X
- Xstruct re_registers
- X {
- X int start[RE_NREGS];
- X int end[RE_NREGS];
- X };
- X
- X/* These are the command codes that appear in compiled regular expressions, one per byte.
- X Some command codes are followed by argument bytes.
- X A command code can specify any interpretation whatever for its arguments.
- X Zero-bytes may appear in the compiled regular expression. */
- X
- Xenum regexpcode
- X {
- X unused,
- X exactn, /* followed by one byte giving n, and then by n literal bytes */
- X begline, /* fails unless at beginning of line */
- X endline, /* fails unless at end of line */
- X jump, /* followed by two bytes giving relative address to jump to */
- X on_failure_jump, /* followed by two bytes giving relative address of place
- X to resume at in case of failure. */
- X finalize_jump, /* Throw away latest failure point and then jump to address. */
- X maybe_finalize_jump, /* Like jump but finalize if safe to do so.
- X This is used to jump back to the beginning
- X of a repeat. If the command that follows
- X this jump is clearly incompatible with the
- X one at the beginning of the repeat, such that
- X we can be sure that there is no use backtracking
- X out of repetitions already completed,
- X then we finalize. */
- X dummy_failure_jump, /* jump, and push a dummy failure point.
- X This failure point will be thrown away
- X if an attempt is made to use it for a failure.
- X A + construct makes this before the first repeat. */
- X anychar, /* matches any one character */
- X charset, /* matches any one char belonging to specified set.
- X First following byte is # bitmap bytes.
- X Then come bytes for a bit-map saying which chars are in.
- X Bits in each byte are ordered low-bit-first.
- X A character is in the set if its bit is 1.
- X A character too large to have a bit in the map
- X is automatically not in the set */
- X charset_not, /* similar but match any character that is NOT one of those specified */
- X start_memory, /* starts remembering the text that is matched
- X and stores it in a memory register.
- X followed by one byte containing the register number.
- X Register numbers must be in the range 0 through NREGS. */
- X stop_memory, /* stops remembering the text that is matched
- X and stores it in a memory register.
- X followed by one byte containing the register number.
- X Register numbers must be in the range 0 through NREGS. */
- X duplicate, /* match a duplicate of something remembered.
- X Followed by one byte containing the index of the memory register. */
- X before_dot, /* Succeeds if before dot */
- X at_dot, /* Succeeds if at dot */
- X after_dot, /* Succeeds if after dot */
- X begbuf, /* Succeeds if at beginning of buffer */
- X endbuf, /* Succeeds if at end of buffer */
- X wordchar, /* Matches any word-constituent character */
- X notwordchar, /* Matches any char that is not a word-constituent */
- X wordbeg, /* Succeeds if at word beginning */
- X wordend, /* Succeeds if at word end */
- X wordbound, /* Succeeds if at a word boundary */
- X notwordbound, /* Succeeds if not at a word boundary */
- X syntaxspec, /* Matches any character whose syntax is specified.
- X followed by a byte which contains a syntax code, Sword or such like */
- X notsyntaxspec /* Matches any character whose syntax differs from the specified. */
- X };
- X
- Xextern char *re_compile_pattern ();
- X/* Is this really advertised? */
- Xextern void re_compile_fastmap ();
- Xextern int re_search (), re_search_2 ();
- Xextern int re_match (), re_match_2 ();
- X
- X/* 4.2 bsd compatibility (yuck) */
- Xextern char *re_comp ();
- Xextern int re_exec ();
- X
- X#ifdef SYNTAX_TABLE
- Xextern char *re_syntax_table;
- X#endif
- SHAR_EOF
- len=`wc -c < regex.h`
- if test $len != 8080 ; then
- echo error: regex.h was $len bytes long, should have been 8080
- fi
- fi # end of overwriting check
-
- if test -f sortmail.c
- then
- echo shar: will not over-write existing file sortmail.c
- else
- echo shar: extracting 'sortmail.c', 6369 characters
- sed 's/^X//' > sortmail.c <<'SHAR_EOF'
- X
- X
- X#include <stdio.h>
- X#include <varargs.h>
- X#include <errno.h>
- X#include <alloca.h>
- X#include <sys/param.h> /* defines MAXPATHLEN */
- X
- X#include "sortmail.h"
- X#include "regex.h" /* use gnu regex */
- X
- X FILE *logfile ;
- X
- Xstatic int
- XReCompile(pat)
- X char *pat ;
- X{
- X char *err ;
- X if( (err = re_comp(pat)) != NULL ) {
- X fprintf(logfile, "sortmail: pattern %s: %s\n", pat, err) ;
- X return 1 ;
- X }
- X return 0 ;
- X}
- X
- Xextern int sys_nerr ;
- Xextern char *sys_errlist[] ;
- X
- X
- X
- X char *user = NULL ;
- X char *home = NULL ;
- X char *mailbox = NULL ;
- X char *mailrc = NULL ;
- X char *sortmailrc = NULL ;
- X char *mbox = NULL ;
- X char *deflt = NULL ;
- X char *vacation = NULL ;
- X char *folder = NULL ;
- X char *logfilename = NULL ;
- X int verbose = 0 ;
- X
- X PatInfo *patterns = NULL, *lastpat = NULL ;
- X
- Xstatic char tmpfilename[256] ;
- X
- Xstatic void dispose() ;
- Xstatic void append() ;
- Xstatic void put_pipe() ;
- Xstatic void forward() ;
- X
- Xstatic int
- Xargcheck(str, argv, nargs, argc)
- X char *str, *argv ;
- X int nargs ;
- X int *argc ;
- X{
- X if( strcmp(str, argv) != 0 )
- X return 0 ;
- X if( (*argc -= nargs) <= 0 )
- X die("sortmail: %s option requires %d arguments\n", str, nargs) ;
- X return 1 ;
- X}
- X
- X
- X
- Xmain(argc,argv)
- X int argc ;
- X char **argv ;
- X{
- X FILE *tmpfile ;
- X char line[1024] ;
- X int n ;
- X int header ;
- X PatInfo *pat ;
- X char *err ;
- X int firstline ;
- X int cont = 0 ;
- X int foundit = 0 ;
- X
- X logfile = fopen("/dev/console","w") ;
- X if( logfile == NULL )
- X logfile = stderr ;
- X
- X while( --argc > 0 )
- X {
- X ++argv ;
- X if( argcheck("-v", *argv, 0, &argc) )
- X ++verbose ;
- X else if( argcheck("-home", *argv, 1, &argc) )
- X home = *++argv ;
- X else if( argcheck("-mailbox", *argv, 1, &argc) )
- X mailbox = *++argv ;
- X else if( argcheck("-mailrc", *argv, 1, &argc) )
- X mailrc = *++argv ;
- X else if( argcheck("-sortmailrc", *argv, 1, &argc) )
- X sortmailrc = *++argv ;
- X else
- X user = *argv ;
- X }
- X
- X#ifdef DEBUG
- X verbose = 1 ;
- X#endif /* DEBUG */
- X
- X read_initfiles() ;
- X
- X sprintf(tmpfilename, "/tmp/sortmail%d", getpid()) ;
- X if( (tmpfile = fopen(tmpfilename, "w+")) == NULL )
- X {
- X fprintf(stderr, "sortmail: can't open %s, mail to %s may be lost\n",
- X tmpfile, user) ;
- X fprintf(logfile, "sortmail: can't open %s, %s\n",
- X tmpfile, sys_errlist[errno]) ;
- X append(stdin, mailbox) ; /* try to rescue situtation */
- X exit(1) ;
- X }
- X
- X firstline = 1 ;
- X while( fgets(line, sizeof(line), stdin) != NULL ) {
- X if( !firstline && strmatch(line, "From ") )
- X putc('>', tmpfile) ;
- X fputs(line, tmpfile) ;
- X firstline = 0 ;
- X }
- X#ifdef DEBUG
- X fprintf(logfile, "got a message in %s\n", tmpfilename) ;
- X#endif /* DEBUG */
- X
- X for( pat = patterns; pat != NULL && !foundit ; pat = pat->next )
- X {
- X if( !ReCompile(pat->pattern) )
- X {
- X rewind(tmpfile) ;
- X while( fgets(line, sizeof(line), tmpfile) != NULL ) {
- X if( line[0] == '\n' && pat->flags != P_ALL )
- X break ;
- X if( pat->flags == P_HEADER ||
- X pat->flags == P_SUBJECT && strmatch(line, "Subject: ") ||
- X pat->flags == P_FROM && strmatch(line, "From: ") ||
- X pat->flags == P_TO &&
- X (strmatch(line, "To: ") || strmatch(line, "Cc: ") ||
- X strmatch(line, "Apparently-To: ") ) ||
- X isspace(line[0]) && cont ||
- X pat->flags == P_ALL )
- X {
- X if( re_exec(line) )
- X {
- X dispose(pat,tmpfile) ;
- X foundit = 1 ;
- X break ;
- X }
- X else
- X /* if next line is a continuation line, test it too */
- X cont = 1 ;
- X }
- X else
- X cont = 0 ;
- X }
- X }
- X }
- X
- X if( vacation != NULL )
- X {
- X rewind(tmpfile) ;
- X if( !ReCompile(user) )
- X {
- X while( fgets(line, sizeof(line), tmpfile) != NULL ) {
- X if( line[0] == '\n' )
- X break ;
- X if( strmatch(line, "To: ") || strmatch(line, "Cc: ") ||
- X strmatch(line, "Apparently-To: ") )
- X {
- X if( re_exec(line) )
- X {
- X sprintf(line, "/usr/ucb/vacation %s", user) ;
- X put_pipe(tmpfile, line) ;
- X if( verbose )
- X fprintf(logfile, "message piped through %s\n", line) ;
- X break ;
- X }
- X }
- X }
- X }
- X }
- X
- X unlink(tmpfilename) ;
- X
- X if( !foundit )
- X append(tmpfile, deflt != NULL ? deflt : mailbox) ;
- X exit(0) ;
- X}
- X
- X
- X/*VARARGS0*/
- Xdie(va_alist)
- X va_dcl
- X{
- X va_list args ;
- X char *fmt ;
- X
- X va_start(args) ;
- X fprintf(logfile, "sortmail: ") ;
- X fmt = va_arg(args, char *);
- X vfprintf(logfile, fmt, args);
- X va_end(args) ;
- X exit(1) ;
- X}
- X
- X
- X /* This function deals with the file described by 'file' in
- X * the manner described by "pat".
- X */
- X
- Xstatic void
- Xdispose(pat, file)
- Xregister PatInfo *pat ;
- Xregister FILE *file ;
- X{
- X char outfile[MAXPATHLEN] ;
- X char *ptr ;
- X struct passwd *passwd ;
- X
- X switch( pat->command ) {
- X case C_MAIL: /* append to user's mail file */
- X if( pat->dest == NULL || pat->dest[0] == '\0' )
- X append(file, mailbox) ;
- X else
- X forward(file, pat->dest) ;
- X break ;
- X
- X case C_FILE: /* append to named file. */
- X append(file, pat->dest) ;
- X break ;
- X
- X case C_PIPE: /* pipe through named command */
- X put_pipe(file, pat->dest) ;
- X if( verbose )
- X fprintf(logfile, "message piped through %s\n", pat->dest) ;
- X break ;
- X
- X case C_JUNK: /* throw it away */
- X if( verbose )
- X fprintf(logfile, "message deleted\n") ;
- X break ;
- X }
- X}
- X
- X
- Xstatic void
- Xappend(file, outfile)
- X FILE *file ;
- X char *outfile ;
- X{
- X FILE *ofile ;
- X char line[1024] ;
- X int i ;
- X
- X filename_expand(line, outfile) ;
- X if( verbose )
- X fprintf(logfile, "message filed to %s\n", line) ;
- X
- X rewind(file) ;
- X
- X if( (ofile = fopen(line, "a")) == NULL )
- X {
- X fprintf(logfile, "sortmail: can't open %s, %s\n",
- X line,sys_errlist[errno]);
- X if( strcmp(outfile, mailbox) == 0 )
- X fprintf(stderr,
- X "sortmail: Cannot open %s, %s. Mail to %s may be lost\n",
- X mailbox, sys_errlist[errno], user) ;
- X else
- X append(file, mailbox) ;
- X }
- X else
- X {
- X while( fgets(line, sizeof(line), file) != NULL )
- X fputs(line, ofile) ;
- X fputs("\n", ofile) ;
- X fclose(ofile) ;
- X }
- X}
- X
- X
- Xstatic void
- Xput_pipe(file, dest)
- X FILE *file ;
- X char *dest ;
- X{
- X FILE *ofile ;
- X char line[1024] ;
- X
- X if( (ofile = popen(dest, "w")) == NULL )
- X {
- X fprintf(logfile, "sortmail: cannot open pipe to command %s\n",
- X dest ) ;
- X append(file, mailbox) ;
- X }
- X else
- X {
- X rewind(file) ;
- X while( fgets(line, sizeof(line), file) != NULL )
- X fputs(line, ofile) ;
- X pclose(ofile) ;
- X }
- X}
- X
- X
- Xstatic void
- Xforward(file, dest)
- X FILE *file ;
- X char *dest ;
- X{
- X char cmd[1024] ;
- X
- X sprintf(cmd, "/usr/lib/sendmail %s", dest) ;
- X put_pipe(file, cmd) ;
- X if( verbose )
- X fprintf(logfile, "message mailed to %s\n", dest) ;
- X}
- SHAR_EOF
- len=`wc -c < sortmail.c`
- if test $len != 6369 ; then
- echo error: sortmail.c was $len bytes long, should have been 6369
- fi
- fi # end of overwriting check
-
- if test -f parse.c
- then
- echo shar: will not over-write existing file parse.c
- else
- echo shar: extracting 'parse.c', 7864 characters
- sed 's/^X//' > parse.c <<'SHAR_EOF'
- X
- X
- X#include <stdio.h>
- X#include <stdlib.h> /* defines getenv(3) */
- X#include <pwd.h>
- X#include <ctype.h>
- X#include <string.h>
- X#include <sys/param.h>
- X
- X#include "sortmail.h"
- X
- X#define MAXLINE 256
- X
- X#ifdef DEBUG
- X#define register
- X#endif
- X
- Xstatic char spooldir[] = SPOOLDIR ;
- X
- Xstatic void read_initfile() ;
- Xstatic void parse_var() ;
- Xstatic void set_var() ;
- Xstatic void add_pat() ;
- Xstatic void get_dest() ;
- X
- X#define NN(str) ((str)==NULL ? "-none-" : (str))
- X
- Xread_initfiles()
- X{
- X struct passwd *passwd ;
- X
- X /* first, try to set up some of the defaults */
- X
- X if( user == NULL ) {
- X if( (user = (char *) malloc(L_cuserid)) == NULL )
- X die("out of memory") ;
- X if( cuserid(user) == NULL )
- X die("can't determine username") ;
- X }
- X
- X if( home == NULL ) {
- X if( (passwd = getpwnam(user)) != NULL )
- X home = strdup(passwd->pw_dir) ;
- X else if( (home = getenv("HOME")) == NULL )
- X die("can't determine %s's directory",user) ;
- X }
- X
- X if( mailbox == NULL ) {
- X mailbox = (char *) malloc(sizeof(spooldir) + strlen(user)) ;
- X strcpy(mailbox, spooldir) ;
- X strcat(mailbox, user) ;
- X }
- X
- X if( mailrc == NULL ) {
- X mailrc = (char *) malloc(strlen(home) + sizeof("/.mailrc") ) ;
- X strcpy(mailrc, home) ;
- X strcat(mailrc, "/.mailrc") ;
- X }
- X
- X if( sortmailrc == NULL ) {
- X sortmailrc = (char *) malloc(strlen(home) + sizeof("/.sortmailrc") ) ;
- X strcpy(sortmailrc, home) ;
- X strcat(sortmailrc, "/.sortmailrc") ;
- X }
- X
- X if( mbox == NULL )
- X mbox = MBOX ;
- X
- X if( folder == NULL )
- X folder = FOLDER ;
- X
- X read_initfile(mailrc) ;
- X read_initfile(sortmailrc) ;
- X if( verbose >= 2 ) {
- X fprintf(logfile,"$USER = %s\n", NN(user)) ;
- X fprintf(logfile,"$HOME = %s\n", NN(home)) ;
- X fprintf(logfile,"mailbox = %s\n", NN(mailbox)) ;
- X fprintf(logfile,"mailrc = %s\n", NN(mailrc)) ;
- X fprintf(logfile,"sortmailrc = %s\n", NN(sortmailrc)) ;
- X fprintf(logfile,"mbox = %s\n", NN(mbox)) ;
- X fprintf(logfile,"deflt = %s\n", NN(deflt)) ;
- X fprintf(logfile,"vacation = %s\n", NN(vacation)) ;
- X fprintf(logfile,"folder = %s\n", NN(folder)) ;
- X }
- X}
- X
- X
- Xstatic void
- Xread_initfile(file)
- X char *file ;
- X{
- Xregister FILE *ifile ;
- X char line[MAXLINE] ;
- Xregister char *ptr ;
- X
- X if( (ifile = fopen(file, "r")) != NULL ) {
- X while( fgets(line, MAXLINE, ifile) != NULL ) {
- X if( line[0] != '#' )
- X {
- X for( ptr = line; isspace(*ptr); ++ptr );
- X
- X if( strmatch(ptr,"set") ) /* variable */
- X parse_var(ptr) ;
- X else if( *ptr == '/' ) /* pattern */
- X add_pat(ptr) ;
- X /* else ignore it */
- X }
- X }
- X fclose(ifile) ;
- X }
- X}
- X
- X
- Xstatic void
- Xparse_var(ptr)
- Xregister char *ptr ;
- X{
- X ptr += 3 ; /* skip "set" */
- X if( !isspace(*ptr) )
- X return ; /* something was wrong */
- X
- X for(; isspace(*ptr); ++ptr );
- X
- X if( strmatch(ptr,"user") )
- X set_var(&user, ptr+4) ;
- X else if( strmatch(ptr,"home") )
- X set_var(&home, ptr+4) ;
- X else if( strmatch(ptr,"mailbox") )
- X set_var(&mailbox, ptr+7) ;
- X else if( strmatch(ptr,"mailrc") )
- X set_var(&mailrc, ptr+6) ;
- X else if( strmatch(ptr,"sortmailrc") )
- X set_var(&sortmailrc, ptr+10) ;
- X else if( strmatch(ptr,"mbox") )
- X set_var(&mbox, ptr+4) ;
- X else if( strmatch(ptr,"folder") )
- X set_var(&folder, ptr+6) ;
- X else if( strmatch(ptr,"default") )
- X set_var(&deflt, ptr+7) ;
- X else if( strmatch(ptr,"vacation") )
- X set_var(&vacation, ptr+8) ;
- X else if( strmatch(ptr,"logfile") ) {
- X set_var(&logfilename, ptr+7) ;
- X if( (logfile = fopen(logfilename, "a")) == NULL )
- X logfile = stderr ;
- X }
- X /* else ignore it */
- X}
- X
- Xstatic void
- Xset_var(var, ptr)
- Xregister char **var ;
- Xregister char *ptr ;
- X{
- X for(; isspace(*ptr); ++ptr ); /* consume blanks before '=' */
- X
- X if( *ptr != '=' )
- X {
- X *var = "" ;
- X return ;
- X }
- X
- X for(++ptr; isspace(*ptr); ++ptr ); /* consume blanks after '=' */
- X
- X if( *ptr == '\'' ) {
- X ++ptr ;
- X *var = strdup(ptr) ;
- X for(ptr=*var; *ptr != '\'' && *ptr != '\n' && *ptr != '\0'; ++ptr);
- X *ptr = '\0' ;
- X }
- X
- X else if( *ptr == '"' ) {
- X ++ptr ;
- X *var = strdup(ptr) ;
- X for(ptr=*var; *ptr != '"' && *ptr != '\n' && *ptr != '\0'; ++ptr);
- X *ptr = '\0' ;
- X }
- X
- X else {
- X *var = strdup(ptr) ;
- X for(ptr=*var; *ptr != '\n' && *ptr != '\0'; ++ptr);
- X *ptr = '\0' ;
- X }
- X}
- X
- X
- X
- X /* try to make sense out of patterns in the form
- X /regular expression/modifs:command
- X */
- X
- Xstatic void
- Xadd_pat(ptr)
- Xregister char *ptr ;
- X{
- X char buffer[1024] ; /* KLUDGE! should be unlimited */
- Xregister char *optr = buffer ;
- Xregister PatInfo *pat ;
- Xregister int done, done2 ;
- X
- X ++ptr ; /* skip leading '/' */
- X
- X for(done=0;!done;)
- X switch( *ptr ) {
- X case '\0':
- X fprintf(logfile, "sortmail: missing '/' in pattern\n") ;
- X return ;
- X case '\\': /* '\' quotes anything */
- X *optr++ = *ptr++ ;
- X *optr++ = *ptr++ ;
- X break ;
- X case '/': /* '/' terminates */
- X ++ptr ;
- X done = 1 ;
- X break ;
- X case '[': /* '[' starts a set */
- X *optr++ = *ptr++ ;
- X for(done2=0; !done2;)
- X switch(*ptr) {
- X case '\0': /* premature EOS, reject */
- X fprintf(logfile, "sortmail: missing ']' in pattern\n") ;
- X return ;
- X case ']': /* ']' terminates a set */
- X done2=1 ;
- X *optr++ = *ptr++ ;
- X break ;
- X default:
- X *optr++ = *ptr++ ;
- X break ;
- X }
- X break ;
- X default:
- X *optr++ = *ptr++ ;
- X }
- X *optr++ = '\0' ;
- X
- X pat = (PatInfo *) malloc(sizeof(PatInfo)) ;
- X pat->next = NULL ;
- X if( patterns == NULL )
- X patterns = pat ;
- X else
- X lastpat->next = pat ;
- X lastpat = pat ;
- X
- X pat->pattern = strdup(buffer) ;
- X pat->flags = P_SUBJECT ;
- X pat->command = C_MAIL ;
- X pat->dest = NULL ;
- X
- X /* parse modifiers part */
- X
- X for(done=0;!done;)
- X switch( *ptr ) {
- X case '\0': done = 1 ; break ;
- X case ':': ++ptr ; done = 1 ; break ;
- X case 't': pat->flags = P_TO ; ++ptr ; break ;
- X case 'f': pat->flags = P_FROM ; ++ptr ; break ;
- X case 's': pat->flags = P_SUBJECT ; ++ptr ; break ;
- X case 'h': pat->flags = P_HEADER ; ++ptr ; break ;
- X case 'a': pat->flags = P_ALL ; ++ptr ; break ;
- X default: ++ptr ; break ;
- X }
- X
- X while( isspace(*ptr) )
- X ++ptr ;
- X
- X /* parse command part */
- X
- X switch( *ptr ) {
- X case '\0': break ;
- X case 'm':
- X pat->command = C_MAIL ;
- X get_dest(pat, ++ptr) ;
- X break ;
- X case 'j': pat->command = C_JUNK ; ++ptr ; break ;
- X case 'f':
- X pat->command = C_FILE ;
- X get_dest(pat, ++ptr) ;
- X break ;
- X case '+':
- X pat->command = C_FILE ;
- X get_dest(pat, ptr) ;
- X break ;
- X case '|':
- X pat->command = C_PIPE ;
- X get_dest(pat, ++ptr) ;
- X break ;
- X default: ++ptr ; break ;
- X }
- X}
- X
- X
- Xstatic void
- Xget_dest(pat,ptr)
- Xregister PatInfo *pat ;
- Xregister char *ptr ;
- X{
- X for(; isspace(*ptr); ++ptr);
- X
- X pat->dest = strdup(ptr) ;
- X if( (ptr = strchr(pat->dest, '\n')) != NULL )
- X *ptr = '\0' ;
- X}
- X
- X
- X
- X
- Xint
- Xfilename_expand(dest, src)
- X char *dest ;
- X char *src ; /* file to match */
- X{
- X char *lastsl ; /* last '/' in template */
- X char *dirstr ;
- X struct passwd *passent ;
- X
- X if( src[0] == '/' ) { /* absolute */
- X strcpy(dest, src) ; /* that was easy */
- X return 0 ;
- X }
- X
- X if( src[0] == '+' ) { /* folder name */
- X strcpy(dest, home) ;
- X strcat(dest, "/") ;
- X strcat(dest, folder) ;
- X strcat(dest, "/") ;
- X strcat(dest, src+1) ;
- X return 0 ;
- X }
- X
- X if( strmatch(src, "$HOME/") )
- X {
- X strcpy(dest, home) ;
- X strcat(dest, src+5) ;
- X return 0 ;
- X }
- X
- X if( src[0] != '~' ) { /* relative path, assume relative */
- X strcpy(dest, home) ; /* to $HOME */
- X strcat(dest, "/") ;
- X strcat(dest, src) ;
- X return 0 ;
- X }
- X
- X
- X
- X /* cases:
- X * "~" fail
- X * "~/ get home directory
- X * "~xx username matching
- X */
- X
- X if( src[1] == '/' )
- X {
- X strcpy(dest, home) ;
- X strcat(dest, src+1) ;
- X }
- X else
- X {
- X dirstr = strchr(src, '/') ;
- X if( dirstr != NULL )
- X *dirstr = '\0' ;
- X passent = getpwnam(src+1) ;
- X if( dirstr != NULL )
- X *dirstr = '/' ;
- X if( passent == NULL )
- X return -1 ;
- X
- X strcpy(dest, passent->pw_dir) ;
- X if( dirstr != NULL )
- X strcat(dest, dirstr) ;
- X strcpy(src,dest) ;
- X }
- X return 0 ;
- X}
- SHAR_EOF
- len=`wc -c < parse.c`
- if test $len != 7864 ; then
- echo error: parse.c was $len bytes long, should have been 7864
- fi
- fi # end of overwriting check
-
- if test -f regex.c
- then
- echo shar: will not over-write existing file regex.c
- else
- echo shar: extracting 'regex.c', 44540 characters
- sed 's/^X//' > regex.c <<'SHAR_EOF'
- X/* Extended regular expression matching and search.
- X Copyright (C) 1985 Free Software Foundation, Inc.
- X
- X This program is free software; you can redistribute it and/or modify
- X it under the terms of the GNU General Public License as published by
- X the Free Software Foundation; either version 1, or (at your option)
- X any later version.
- X
- X This program is distributed in the hope that it will be useful,
- X but WITHOUT ANY WARRANTY; without even the implied warranty of
- X MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- X GNU General Public License for more details.
- X
- X You should have received a copy of the GNU General Public License
- X along with this program; if not, write to the Free Software
- X Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- X
- XIn other words, you are welcome to use, share and improve this program.
- XYou are forbidden to forbid anyone else to use, share and improve
- Xwhat you give them. Help stamp out software-hoarding! */
- X
- X
- X/* To test, compile with -Dtest.
- X This Dtestable feature turns this into a self-contained program
- X which reads a pattern, describes how it compiles,
- X then reads a string and searches for it. */
- X
- X
- X#ifdef SVr4
- X#define bcopy(s,d,n) memmove(d,s,n)
- X#define bzero(d,n) memset(d,0,n)
- X#define bcmp memcmp
- X#endif
- X
- X#ifdef emacs
- X
- X/* The `emacs' switch turns on certain special matching commands
- X that make sense only in emacs. */
- X
- X#include "config.h"
- X#include "lisp.h"
- X#include "buffer.h"
- X#include "syntax.h"
- X
- X#else /* not emacs */
- X
- X/*
- X * Define the syntax stuff, so we can do the \<...\> things.
- X */
- X
- X#ifndef Sword /* must be non-zero in some of the tests below... */
- X#define Sword 1
- X#endif
- X
- X#define SYNTAX(c) re_syntax_table[c]
- X
- X#ifdef SYNTAX_TABLE
- X
- Xchar *re_syntax_table;
- X
- X#else
- X
- Xstatic char re_syntax_table[256];
- X
- Xstatic void
- Xinit_syntax_once ()
- X{
- X register int c;
- X static int done = 0;
- X
- X if (done)
- X return;
- X
- X bzero (re_syntax_table, sizeof re_syntax_table);
- X
- X for (c = 'a'; c <= 'z'; c++)
- X re_syntax_table[c] = Sword;
- X
- X for (c = 'A'; c <= 'Z'; c++)
- X re_syntax_table[c] = Sword;
- X
- X for (c = '0'; c <= '9'; c++)
- X re_syntax_table[c] = Sword;
- X
- X done = 1;
- X}
- X
- X#endif /* SYNTAX_TABLE */
- X#endif /* not emacs */
- X
- X#include <alloca.h>
- X#include "regex.h"
- X
- X/* Number of failure points to allocate space for initially,
- X when matching. If this number is exceeded, more space is allocated,
- X so it is not a hard limit. */
- X
- X#ifndef NFAILURES
- X#define NFAILURES 80
- X#endif NFAILURES
- X
- X/* width of a byte in bits */
- X
- X#define BYTEWIDTH 8
- X
- X#ifndef SIGN_EXTEND_CHAR
- X#define SIGN_EXTEND_CHAR(x) (x)
- X#endif
- X
- Xstatic int obscure_syntax = 0;
- X
- X/* Specify the precise syntax of regexp for compilation.
- X This provides for compatibility for various utilities
- X which historically have different, incompatible syntaxes.
- X
- X The argument SYNTAX is a bit-mask containing the two bits
- X RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
- X
- Xint
- Xre_set_syntax (syntax)
- X{
- X int ret;
- X
- X ret = obscure_syntax;
- X obscure_syntax = syntax;
- X return ret;
- X}
- X
- X/* re_compile_pattern takes a regular-expression string
- X and converts it into a buffer full of byte commands for matching.
- X
- X PATTERN is the address of the pattern string
- X SIZE is the length of it.
- X BUFP is a struct re_pattern_buffer * which points to the info
- X on where to store the byte commands.
- X This structure contains a char * which points to the
- X actual space, which should have been obtained with malloc.
- X re_compile_pattern may use realloc to grow the buffer space.
- X
- X The number of bytes of commands can be found out by looking in
- X the struct re_pattern_buffer that bufp pointed to,
- X after re_compile_pattern returns.
- X*/
- X
- X#define PATPUSH(ch) (*b++ = (char) (ch))
- X
- X#define PATFETCH(c) \
- X {if (p == pend) goto end_of_pattern; \
- X c = * (unsigned char *) p++; \
- X if (translate) c = translate[c]; }
- X
- X#define PATFETCH_RAW(c) \
- X {if (p == pend) goto end_of_pattern; \
- X c = * (unsigned char *) p++; }
- X
- X#define PATUNFETCH p--
- X
- X#define EXTEND_BUFFER \
- X { char *old_buffer = bufp->buffer; \
- X if (bufp->allocated == (1<<16)) goto too_big; \
- X bufp->allocated *= 2; \
- X if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
- X if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
- X goto memory_exhausted; \
- X c = bufp->buffer - old_buffer; \
- X b += c; \
- X if (fixup_jump) \
- X fixup_jump += c; \
- X if (laststart) \
- X laststart += c; \
- X begalt += c; \
- X if (pending_exact) \
- X pending_exact += c; \
- X }
- X
- Xstatic int store_jump (), insert_jump ();
- X
- Xchar *
- Xre_compile_pattern (pattern, size, bufp)
- X char *pattern;
- X int size;
- X struct re_pattern_buffer *bufp;
- X{
- X register char *b = bufp->buffer;
- X register char *p = pattern;
- X char *pend = pattern + size;
- X register unsigned c, c1;
- X char *p1;
- X unsigned char *translate = (unsigned char *) bufp->translate;
- X
- X /* address of the count-byte of the most recently inserted "exactn" command.
- X This makes it possible to tell whether a new exact-match character
- X can be added to that command or requires a new "exactn" command. */
- X
- X char *pending_exact = 0;
- X
- X /* address of the place where a forward-jump should go
- X to the end of the containing expression.
- X Each alternative of an "or", except the last, ends with a forward-jump
- X of this sort. */
- X
- X char *fixup_jump = 0;
- X
- X /* address of start of the most recently finished expression.
- X This tells postfix * where to find the start of its operand. */
- X
- X char *laststart = 0;
- X
- X /* In processing a repeat, 1 means zero matches is allowed */
- X
- X char zero_times_ok;
- X
- X /* In processing a repeat, 1 means many matches is allowed */
- X
- X char many_times_ok;
- X
- X /* address of beginning of regexp, or inside of last \( */
- X
- X char *begalt = b;
- X
- X /* Stack of information saved by \( and restored by \).
- X Four stack elements are pushed by each \(:
- X First, the value of b.
- X Second, the value of fixup_jump.
- X Third, the value of regnum.
- X Fourth, the value of begalt. */
- X
- X int stackb[40];
- X int *stackp = stackb;
- X int *stacke = stackb + 40;
- X int *stackt;
- X
- X /* Counts \('s as they are encountered. Remembered for the matching \),
- X where it becomes the "register number" to put in the stop_memory command */
- X
- X int regnum = 1;
- X
- X bufp->fastmap_accurate = 0;
- X
- X#ifndef emacs
- X#ifndef SYNTAX_TABLE
- X /*
- X * Initialize the syntax table.
- X */
- X init_syntax_once();
- X#endif
- X#endif
- X
- X if (bufp->allocated == 0)
- X {
- X bufp->allocated = 28;
- X if (bufp->buffer)
- X /* EXTEND_BUFFER loses when bufp->allocated is 0 */
- X bufp->buffer = (char *) realloc (bufp->buffer, 28);
- X else
- X /* Caller did not allocate a buffer. Do it for him */
- X bufp->buffer = (char *) malloc (28);
- X if (!bufp->buffer) goto memory_exhausted;
- X begalt = b = bufp->buffer;
- X }
- X
- X while (p != pend)
- X {
- X if (b - bufp->buffer > bufp->allocated - 10)
- X /* Note that EXTEND_BUFFER clobbers c */
- X EXTEND_BUFFER;
- X
- X PATFETCH (c);
- X
- X switch (c)
- X {
- X case '$':
- X if (obscure_syntax & RE_TIGHT_VBAR)
- X {
- X if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
- X goto normal_char;
- X /* Make operand of last vbar end before this `$'. */
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X fixup_jump = 0;
- X PATPUSH (endline);
- X break;
- X }
- X
- X /* $ means succeed if at end of line, but only in special contexts.
- X If randomly in the middle of a pattern, it is a normal character. */
- X if (p == pend || *p == '\n'
- X || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
- X || (obscure_syntax & RE_NO_BK_PARENS
- X ? *p == ')'
- X : *p == '\\' && p[1] == ')')
- X || (obscure_syntax & RE_NO_BK_VBAR
- X ? *p == '|'
- X : *p == '\\' && p[1] == '|'))
- X {
- X PATPUSH (endline);
- X break;
- X }
- X goto normal_char;
- X
- X case '^':
- X /* ^ means succeed if at beg of line, but only if no preceding pattern. */
- X
- X if (laststart && p[-2] != '\n'
- X && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
- X goto normal_char;
- X if (obscure_syntax & RE_TIGHT_VBAR)
- X {
- X if (p != pattern + 1
- X && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
- X goto normal_char;
- X PATPUSH (begline);
- X begalt = b;
- X }
- X else
- X PATPUSH (begline);
- X break;
- X
- X case '+':
- X case '?':
- X if (obscure_syntax & RE_BK_PLUS_QM)
- X goto normal_char;
- X handle_plus:
- X case '*':
- X /* If there is no previous pattern, char not special. */
- X if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
- X goto normal_char;
- X /* If there is a sequence of repetition chars,
- X collapse it down to equivalent to just one. */
- X zero_times_ok = 0;
- X many_times_ok = 0;
- X while (1)
- X {
- X zero_times_ok |= c != '+';
- X many_times_ok |= c != '?';
- X if (p == pend)
- X break;
- X PATFETCH (c);
- X if (c == '*')
- X ;
- X else if (!(obscure_syntax & RE_BK_PLUS_QM)
- X && (c == '+' || c == '?'))
- X ;
- X else if ((obscure_syntax & RE_BK_PLUS_QM)
- X && c == '\\')
- X {
- X int c1;
- X PATFETCH (c1);
- X if (!(c1 == '+' || c1 == '?'))
- X {
- X PATUNFETCH;
- X PATUNFETCH;
- X break;
- X }
- X c = c1;
- X }
- X else
- X {
- X PATUNFETCH;
- X break;
- X }
- X }
- X
- X /* Star, etc. applied to an empty pattern is equivalent
- X to an empty pattern. */
- X if (!laststart)
- X break;
- X
- X /* Now we know whether 0 matches is allowed,
- X and whether 2 or more matches is allowed. */
- X if (many_times_ok)
- X {
- X /* If more than one repetition is allowed,
- X put in a backward jump at the end. */
- X store_jump (b, maybe_finalize_jump, laststart - 3);
- X b += 3;
- X }
- X insert_jump (on_failure_jump, laststart, b + 3, b);
- X pending_exact = 0;
- X b += 3;
- X if (!zero_times_ok)
- X {
- X /* At least one repetition required: insert before the loop
- X a skip over the initial on-failure-jump instruction */
- X insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
- X b += 3;
- X }
- X break;
- X
- X case '.':
- X laststart = b;
- X PATPUSH (anychar);
- X break;
- X
- X case '[':
- X while (b - bufp->buffer
- X > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
- X /* Note that EXTEND_BUFFER clobbers c */
- X EXTEND_BUFFER;
- X
- X laststart = b;
- X if (*p == '^')
- X PATPUSH (charset_not), p++;
- X else
- X PATPUSH (charset);
- X p1 = p;
- X
- X PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
- X /* Clear the whole map */
- X bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
- X /* Read in characters and ranges, setting map bits */
- X while (1)
- X {
- X PATFETCH (c);
- X if (c == ']' && p != p1 + 1) break;
- X if (*p == '-' && p[1] != ']')
- X {
- X PATFETCH (c1);
- X PATFETCH (c1);
- X while (c <= c1)
- X b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
- X }
- X else
- X {
- X b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
- X }
- X }
- X /* Discard any bitmap bytes that are all 0 at the end of the map.
- X Decrement the map-length byte too. */
- X while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
- X b[-1]--;
- X b += b[-1];
- X break;
- X
- X case '(':
- X if (! (obscure_syntax & RE_NO_BK_PARENS))
- X goto normal_char;
- X else
- X goto handle_open;
- X
- X case ')':
- X if (! (obscure_syntax & RE_NO_BK_PARENS))
- X goto normal_char;
- X else
- X goto handle_close;
- X
- X case '\n':
- X if (! (obscure_syntax & RE_NEWLINE_OR))
- X goto normal_char;
- X else
- X goto handle_bar;
- X
- X case '|':
- X if (! (obscure_syntax & RE_NO_BK_VBAR))
- X goto normal_char;
- X else
- X goto handle_bar;
- X
- X case '\\':
- X if (p == pend) goto invalid_pattern;
- X PATFETCH_RAW (c);
- X switch (c)
- X {
- X case '(':
- X if (obscure_syntax & RE_NO_BK_PARENS)
- X goto normal_backsl;
- X handle_open:
- X if (stackp == stacke) goto nesting_too_deep;
- X if (regnum < RE_NREGS)
- X {
- X PATPUSH (start_memory);
- X PATPUSH (regnum);
- X }
- X *stackp++ = b - bufp->buffer;
- X *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
- X *stackp++ = regnum++;
- X *stackp++ = begalt - bufp->buffer;
- X fixup_jump = 0;
- X laststart = 0;
- X begalt = b;
- X break;
- X
- X case ')':
- X if (obscure_syntax & RE_NO_BK_PARENS)
- X goto normal_backsl;
- X handle_close:
- X if (stackp == stackb) goto unmatched_close;
- X begalt = *--stackp + bufp->buffer;
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X if (stackp[-1] < RE_NREGS)
- X {
- X PATPUSH (stop_memory);
- X PATPUSH (stackp[-1]);
- X }
- X stackp -= 2;
- X fixup_jump = 0;
- X if (*stackp)
- X fixup_jump = *stackp + bufp->buffer - 1;
- X laststart = *--stackp + bufp->buffer;
- X break;
- X
- X case '|':
- X if (obscure_syntax & RE_NO_BK_VBAR)
- X goto normal_backsl;
- X handle_bar:
- X insert_jump (on_failure_jump, begalt, b + 6, b);
- X pending_exact = 0;
- X b += 3;
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X fixup_jump = b;
- X b += 3;
- X laststart = 0;
- X begalt = b;
- X break;
- X
- X#ifdef emacs
- X case '=':
- X PATPUSH (at_dot);
- X break;
- X
- X case 's':
- X laststart = b;
- X PATPUSH (syntaxspec);
- X PATFETCH (c);
- X PATPUSH (syntax_spec_code[c]);
- X break;
- X
- X case 'S':
- X laststart = b;
- X PATPUSH (notsyntaxspec);
- X PATFETCH (c);
- X PATPUSH (syntax_spec_code[c]);
- X break;
- X#endif emacs
- X
- X case 'w':
- X laststart = b;
- X PATPUSH (wordchar);
- X break;
- X
- X case 'W':
- X laststart = b;
- X PATPUSH (notwordchar);
- X break;
- X
- X case '<':
- X PATPUSH (wordbeg);
- X break;
- X
- X case '>':
- X PATPUSH (wordend);
- X break;
- X
- X case 'b':
- X PATPUSH (wordbound);
- X break;
- X
- X case 'B':
- X PATPUSH (notwordbound);
- X break;
- X
- X case '`':
- X PATPUSH (begbuf);
- X break;
- X
- X case '\'':
- X PATPUSH (endbuf);
- X break;
- X
- X case '1':
- X case '2':
- X case '3':
- X case '4':
- X case '5':
- X case '6':
- X case '7':
- X case '8':
- X case '9':
- X c1 = c - '0';
- X if (c1 >= regnum)
- X goto normal_char;
- X for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
- X if (*stackt == c1)
- X goto normal_char;
- X laststart = b;
- X PATPUSH (duplicate);
- X PATPUSH (c1);
- X break;
- X
- X case '+':
- X case '?':
- X if (obscure_syntax & RE_BK_PLUS_QM)
- X goto handle_plus;
- X
- X default:
- X normal_backsl:
- X /* You might think it would be useful for \ to mean
- X not to translate; but if we don't translate it
- X it will never match anything. */
- X if (translate) c = translate[c];
- X goto normal_char;
- X }
- X break;
- X
- X default:
- X normal_char:
- X if (!pending_exact || pending_exact + *pending_exact + 1 != b
- X || *pending_exact == 0177 || *p == '*' || *p == '^'
- X || ((obscure_syntax & RE_BK_PLUS_QM)
- X ? *p == '\\' && (p[1] == '+' || p[1] == '?')
- X : (*p == '+' || *p == '?')))
- X {
- X laststart = b;
- X PATPUSH (exactn);
- X pending_exact = b;
- X PATPUSH (0);
- X }
- X PATPUSH (c);
- X (*pending_exact)++;
- X }
- X }
- X
- X if (fixup_jump)
- X store_jump (fixup_jump, jump, b);
- X
- X if (stackp != stackb) goto unmatched_open;
- X
- X bufp->used = b - bufp->buffer;
- X return 0;
- X
- X invalid_pattern:
- X return "Invalid regular expression";
- X
- X unmatched_open:
- X return "Unmatched \\(";
- X
- X unmatched_close:
- X return "Unmatched \\)";
- X
- X end_of_pattern:
- X return "Premature end of regular expression";
- X
- X nesting_too_deep:
- X return "Nesting too deep";
- X
- X too_big:
- X return "Regular expression too big";
- X
- X memory_exhausted:
- X return "Memory exhausted";
- X}
- X
- X/* Store where `from' points a jump operation to jump to where `to' points.
- X `opcode' is the opcode to store. */
- X
- Xstatic int
- Xstore_jump (from, opcode, to)
- X char *from, *to;
- X char opcode;
- X{
- X from[0] = opcode;
- X from[1] = (to - (from + 3)) & 0377;
- X from[2] = (to - (from + 3)) >> 8;
- X}
- X
- X/* Open up space at char FROM, and insert there a jump to TO.
- X CURRENT_END gives te end of the storage no in use,
- X so we know how much data to copy up.
- X OP is the opcode of the jump to insert.
- X
- X If you call this function, you must zero out pending_exact. */
- X
- Xstatic int
- Xinsert_jump (op, from, to, current_end)
- X char op;
- X char *from, *to, *current_end;
- X{
- X register char *pto = current_end + 3;
- X register char *pfrom = current_end;
- X while (pfrom != from)
- X *--pto = *--pfrom;
- X store_jump (from, op, to);
- X}
- X
- X/* Given a pattern, compute a fastmap from it.
- X The fastmap records which of the (1 << BYTEWIDTH) possible characters
- X can start a string that matches the pattern.
- X This fastmap is used by re_search to skip quickly over totally implausible text.
- X
- X The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
- X as bufp->fastmap.
- X The other components of bufp describe the pattern to be used. */
- X
- Xvoid
- Xre_compile_fastmap (bufp)
- X struct re_pattern_buffer *bufp;
- X{
- X unsigned char *pattern = (unsigned char *) bufp->buffer;
- X int size = bufp->used;
- X register char *fastmap = bufp->fastmap;
- X register unsigned char *p = pattern;
- X register unsigned char *pend = pattern + size;
- X register int j, k;
- X unsigned char *translate = (unsigned char *) bufp->translate;
- X
- X unsigned char *stackb[NFAILURES];
- X unsigned char **stackp = stackb;
- X
- X bzero (fastmap, (1 << BYTEWIDTH));
- X bufp->fastmap_accurate = 1;
- X bufp->can_be_null = 0;
- X
- X while (p)
- X {
- X if (p == pend)
- X {
- X bufp->can_be_null = 1;
- X break;
- X }
- X#ifdef SWITCH_ENUM_BUG
- X switch ((int) ((enum regexpcode) *p++))
- X#else
- X switch ((enum regexpcode) *p++)
- X#endif
- X {
- X case exactn:
- X if (translate)
- X fastmap[translate[p[1]]] = 1;
- X else
- X fastmap[p[1]] = 1;
- X break;
- X
- X case begline:
- X case before_dot:
- X case at_dot:
- X case after_dot:
- X case begbuf:
- X case endbuf:
- X case wordbound:
- X case notwordbound:
- X case wordbeg:
- X case wordend:
- X continue;
- X
- X case endline:
- X if (translate)
- X fastmap[translate['\n']] = 1;
- X else
- X fastmap['\n'] = 1;
- X if (bufp->can_be_null != 1)
- X bufp->can_be_null = 2;
- X break;
- X
- X case finalize_jump:
- X case maybe_finalize_jump:
- X case jump:
- X case dummy_failure_jump:
- X bufp->can_be_null = 1;
- X j = *p++ & 0377;
- X j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p += j + 1; /* The 1 compensates for missing ++ above */
- X if (j > 0)
- X continue;
- X /* Jump backward reached implies we just went through
- X the body of a loop and matched nothing.
- X Opcode jumped to should be an on_failure_jump.
- X Just treat it like an ordinary jump.
- X For a * loop, it has pushed its failure point already;
- X if so, discard that as redundant. */
- X if ((enum regexpcode) *p != on_failure_jump)
- X continue;
- X p++;
- X j = *p++ & 0377;
- X j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p += j + 1; /* The 1 compensates for missing ++ above */
- X if (stackp != stackb && *stackp == p)
- X stackp--;
- X continue;
- X
- X case on_failure_jump:
- X j = *p++ & 0377;
- X j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p++;
- X *++stackp = p + j;
- X continue;
- X
- X case start_memory:
- X case stop_memory:
- X p++;
- X continue;
- X
- X case duplicate:
- X bufp->can_be_null = 1;
- X fastmap['\n'] = 1;
- X case anychar:
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (j != '\n')
- X fastmap[j] = 1;
- X if (bufp->can_be_null)
- X return;
- X /* Don't return; check the alternative paths
- X so we can set can_be_null if appropriate. */
- X break;
- X
- X case wordchar:
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) == Sword)
- X fastmap[j] = 1;
- X break;
- X
- X case notwordchar:
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) != Sword)
- X fastmap[j] = 1;
- X break;
- X
- X#ifdef emacs
- X case syntaxspec:
- X k = *p++;
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) == (enum syntaxcode) k)
- X fastmap[j] = 1;
- X break;
- X
- X case notsyntaxspec:
- X k = *p++;
- X for (j = 0; j < (1 << BYTEWIDTH); j++)
- X if (SYNTAX (j) != (enum syntaxcode) k)
- X fastmap[j] = 1;
- X break;
- X#endif emacs
- X
- X case charset:
- X for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
- X if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
- X {
- X if (translate)
- X fastmap[translate[j]] = 1;
- X else
- X fastmap[j] = 1;
- X }
- X break;
- X
- X case charset_not:
- X /* Chars beyond end of map must be allowed */
- X for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
- X if (translate)
- X fastmap[translate[j]] = 1;
- X else
- X fastmap[j] = 1;
- X
- X for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
- X if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
- X {
- X if (translate)
- X fastmap[translate[j]] = 1;
- X else
- X fastmap[j] = 1;
- X }
- X break;
- X }
- X
- X /* Get here means we have successfully found the possible starting characters
- X of one path of the pattern. We need not follow this path any farther.
- X Instead, look at the next alternative remembered in the stack. */
- X if (stackp != stackb)
- X p = *stackp--;
- X else
- X break;
- X }
- X}
- X
- X/* Like re_search_2, below, but only one string is specified. */
- X
- Xint
- Xre_search (pbufp, string, size, startpos, range, regs)
- X struct re_pattern_buffer *pbufp;
- X char *string;
- X int size, startpos, range;
- X struct re_registers *regs;
- X{
- X return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
- X}
- X
- X/* Like re_match_2 but tries first a match starting at index STARTPOS,
- X then at STARTPOS + 1, and so on.
- X RANGE is the number of places to try before giving up.
- X If RANGE is negative, the starting positions tried are
- X STARTPOS, STARTPOS - 1, etc.
- X It is up to the caller to make sure that range is not so large
- X as to take the starting position outside of the input strings.
- X
- XThe value returned is the position at which the match was found,
- X or -1 if no match was found,
- X or -2 if error (such as failure stack overflow). */
- X
- Xint
- Xre_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
- X struct re_pattern_buffer *pbufp;
- X char *string1, *string2;
- X int size1, size2;
- X int startpos;
- X register int range;
- X struct re_registers *regs;
- X int mstop;
- X{
- X register char *fastmap = pbufp->fastmap;
- X register unsigned char *translate = (unsigned char *) pbufp->translate;
- X int total = size1 + size2;
- X int val;
- X
- X /* Update the fastmap now if not correct already */
- X if (fastmap && !pbufp->fastmap_accurate)
- X re_compile_fastmap (pbufp);
- X
- X /* Don't waste time in a long search for a pattern
- X that says it is anchored. */
- X if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
- X && range > 0)
- X {
- X if (startpos > 0)
- X return -1;
- X else
- X range = 1;
- X }
- X
- X while (1)
- X {
- X /* If a fastmap is supplied, skip quickly over characters
- X that cannot possibly be the start of a match.
- X Note, however, that if the pattern can possibly match
- X the null string, we must test it at each starting point
- X so that we take the first null string we get. */
- X
- X if (fastmap && startpos < total && pbufp->can_be_null != 1)
- X {
- X if (range > 0)
- X {
- X register int lim = 0;
- X register unsigned char *p;
- X int irange = range;
- X if (startpos < size1 && startpos + range >= size1)
- X lim = range - (size1 - startpos);
- X
- X p = ((unsigned char *)
- X &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
- X
- X if (translate)
- X {
- X while (range > lim && !fastmap[translate[*p++]])
- X range--;
- X }
- X else
- X {
- X while (range > lim && !fastmap[*p++])
- X range--;
- X }
- X startpos += irange - range;
- X }
- X else
- X {
- X register unsigned char c;
- X if (startpos >= size1)
- X c = string2[startpos - size1];
- X else
- X c = string1[startpos];
- X c &= 0xff;
- X if (translate ? !fastmap[translate[c]] : !fastmap[c])
- X goto advance;
- X }
- X }
- X
- X if (range >= 0 && startpos == total
- X && fastmap && pbufp->can_be_null == 0)
- X return -1;
- X
- X val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs,
- X mstop);
- X /* Propagate error indication if worse than mere failure. */
- X if (val == -2)
- X return -2;
- X /* Return position on success. */
- X if (0 <= val)
- X return startpos;
- X
- X#ifdef C_ALLOCA
- X alloca (0);
- X#endif /* C_ALLOCA */
- X
- X advance:
- X if (!range) break;
- X if (range > 0) range--, startpos++; else range++, startpos--;
- X }
- X return -1;
- X}
- X
- X#ifndef emacs /* emacs never uses this */
- Xint
- Xre_match (pbufp, string, size, pos, regs)
- X struct re_pattern_buffer *pbufp;
- X char *string;
- X int size, pos;
- X struct re_registers *regs;
- X{
- X return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
- X}
- X#endif /* emacs */
- X
- X/* Maximum size of failure stack. Beyond this, overflow is an error. */
- X
- Xint re_max_failures = 2000;
- X
- Xstatic int bcmp_translate();
- X/* Match the pattern described by PBUFP
- X against data which is the virtual concatenation of STRING1 and STRING2.
- X SIZE1 and SIZE2 are the sizes of the two data strings.
- X Start the match at position POS.
- X Do not consider matching past the position MSTOP.
- X
- X If pbufp->fastmap is nonzero, then it had better be up to date.
- X
- X The reason that the data to match are specified as two components
- X which are to be regarded as concatenated
- X is so this function can be used directly on the contents of an Emacs buffer.
- X
- X -1 is returned if there is no match. -2 is returned if there is
- X an error (such as match stack overflow). Otherwise the value is the length
- X of the substring which was matched. */
- X
- Xint
- Xre_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
- X struct re_pattern_buffer *pbufp;
- X unsigned char *string1, *string2;
- X int size1, size2;
- X int pos;
- X struct re_registers *regs;
- X int mstop;
- X{
- X register unsigned char *p = (unsigned char *) pbufp->buffer;
- X register unsigned char *pend = p + pbufp->used;
- X /* End of first string */
- X unsigned char *end1;
- X /* End of second string */
- X unsigned char *end2;
- X /* Pointer just past last char to consider matching */
- X unsigned char *end_match_1, *end_match_2;
- X register unsigned char *d, *dend;
- X register int mcnt;
- X unsigned char *translate = (unsigned char *) pbufp->translate;
- X
- X /* Failure point stack. Each place that can handle a failure further down the line
- X pushes a failure point on this stack. It consists of two char *'s.
- X The first one pushed is where to resume scanning the pattern;
- X the second pushed is where to resume scanning the strings.
- X If the latter is zero, the failure point is a "dummy".
- X If a failure happens and the innermost failure point is dormant,
- X it discards that failure point and tries the next one. */
- X
- X unsigned char *initial_stack[2 * NFAILURES];
- X unsigned char **stackb = initial_stack;
- X unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
- X
- X /* Information on the "contents" of registers.
- X These are pointers into the input strings; they record
- X just what was matched (on this attempt) by some part of the pattern.
- X The start_memory command stores the start of a register's contents
- X and the stop_memory command stores the end.
- X
- X At that point, regstart[regnum] points to the first character in the register,
- X regend[regnum] points to the first character beyond the end of the register,
- X regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
- X and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
- X
- X unsigned char *regstart[RE_NREGS];
- X unsigned char *regend[RE_NREGS];
- X unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
- X
- X /* Set up pointers to ends of strings.
- X Don't allow the second string to be empty unless both are empty. */
- X if (!size2)
- X {
- X string2 = string1;
- X size2 = size1;
- X string1 = 0;
- X size1 = 0;
- X }
- X end1 = string1 + size1;
- X end2 = string2 + size2;
- X
- X /* Compute where to stop matching, within the two strings */
- X if (mstop <= size1)
- X {
- X end_match_1 = string1 + mstop;
- X end_match_2 = string2;
- X }
- X else
- X {
- X end_match_1 = end1;
- X end_match_2 = string2 + mstop - size1;
- X }
- X
- X /* Initialize \) text positions to -1
- X to mark ones that no \( or \) has been seen for. */
- X
- X for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
- X regend[mcnt] = (unsigned char *) -1;
- X
- X /* `p' scans through the pattern as `d' scans through the data.
- X `dend' is the end of the input string that `d' points within.
- X `d' is advanced into the following input string whenever necessary,
- X but this happens before fetching;
- X therefore, at the beginning of the loop,
- X `d' can be pointing at the end of a string,
- X but it cannot equal string2. */
- X
- X if (pos <= size1)
- X d = string1 + pos, dend = end_match_1;
- X else
- X d = string2 + pos - size1, dend = end_match_2;
- X
- X/* Write PREFETCH; just before fetching a character with *d. */
- X#define PREFETCH \
- X while (d == dend) \
- X { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
- X d = string2; /* end of string1 => advance to string2. */ \
- X dend = end_match_2; }
- X
- X /* This loop loops over pattern commands.
- X It exits by returning from the function if match is complete,
- X or it drops through if match fails at this starting point in the input data. */
- X
- X while (1)
- X {
- X if (p == pend)
- X /* End of pattern means we have succeeded! */
- X {
- X /* If caller wants register contents data back, convert it to indices */
- X if (regs)
- X {
- X regs->start[0] = pos;
- X if (dend == end_match_1)
- X regs->end[0] = d - string1;
- X else
- X regs->end[0] = d - string2 + size1;
- X for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
- X {
- X if (regend[mcnt] == (unsigned char *) -1)
- X {
- X regs->start[mcnt] = -1;
- X regs->end[mcnt] = -1;
- X continue;
- X }
- X if (regstart_seg1[mcnt])
- X regs->start[mcnt] = regstart[mcnt] - string1;
- X else
- X regs->start[mcnt] = regstart[mcnt] - string2 + size1;
- X if (regend_seg1[mcnt])
- X regs->end[mcnt] = regend[mcnt] - string1;
- X else
- X regs->end[mcnt] = regend[mcnt] - string2 + size1;
- X }
- X }
- X if (dend == end_match_1)
- X return (d - string1 - pos);
- X else
- X return d - string2 + size1 - pos;
- X }
- X
- X /* Otherwise match next pattern command */
- X#ifdef SWITCH_ENUM_BUG
- X switch ((int) ((enum regexpcode) *p++))
- X#else
- X switch ((enum regexpcode) *p++)
- X#endif
- X {
- X
- X /* \( is represented by a start_memory, \) by a stop_memory.
- X Both of those commands contain a "register number" argument.
- X The text matched within the \( and \) is recorded under that number.
- X Then, \<digit> turns into a `duplicate' command which
- X is followed by the numeric value of <digit> as the register number. */
- X
- X case start_memory:
- X regstart[*p] = d;
- X regstart_seg1[*p++] = (dend == end_match_1);
- X break;
- X
- X case stop_memory:
- X regend[*p] = d;
- X regend_seg1[*p++] = (dend == end_match_1);
- X break;
- X
- X case duplicate:
- X {
- X int regno = *p++; /* Get which register to match against */
- X register unsigned char *d2, *dend2;
- X
- X /* Don't allow matching a register that hasn't been used.
- X This isn't fully reliable in the current version,
- X but it is better than crashing. */
- X if ((int) regend[regno] <= -1)
- X goto fail;
- X
- X d2 = regstart[regno];
- X dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
- X ? regend[regno] : end_match_1);
- X while (1)
- X {
- X /* Advance to next segment in register contents, if necessary */
- X while (d2 == dend2)
- X {
- X if (dend2 == end_match_2) break;
- X if (dend2 == regend[regno]) break;
- X d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
- X }
- X /* At end of register contents => success */
- X if (d2 == dend2) break;
- X
- X /* Advance to next segment in data being matched, if necessary */
- X PREFETCH;
- X
- X /* mcnt gets # consecutive chars to compare */
- X mcnt = dend - d;
- X if (mcnt > dend2 - d2)
- X mcnt = dend2 - d2;
- X /* Compare that many; failure if mismatch, else skip them. */
- X if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
- X goto fail;
- X d += mcnt, d2 += mcnt;
- X }
- X }
- X break;
- X
- X case anychar:
- X /* fetch a data character */
- X PREFETCH;
- X /* Match anything but a newline. */
- X if ((translate ? translate[*d++] : *d++) == '\n')
- X goto fail;
- X break;
- X
- X case charset:
- X case charset_not:
- X {
- X /* Nonzero for charset_not */
- X int not = 0;
- X register int c;
- X if (*(p - 1) == (unsigned char) charset_not)
- X not = 1;
- X
- X /* fetch a data character */
- X PREFETCH;
- X
- X if (translate)
- X c = translate [*d];
- X else
- X c = *d;
- X
- X if (c < *p * BYTEWIDTH
- X && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- X not = !not;
- X
- X p += 1 + *p;
- X
- X if (!not) goto fail;
- X d++;
- X break;
- X }
- X
- X case begline:
- X if (d == string1 || d[-1] == '\n')
- X break;
- X goto fail;
- X
- X case endline:
- X if (d == end2
- X || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
- X break;
- X goto fail;
- X
- X /* "or" constructs ("|") are handled by starting each alternative
- X with an on_failure_jump that points to the start of the next alternative.
- X Each alternative except the last ends with a jump to the joining point.
- X (Actually, each jump except for the last one really jumps
- X to the following jump, because tensioning the jumps is a hassle.) */
- X
- X /* The start of a stupid repeat has an on_failure_jump that points
- X past the end of the repeat text.
- X This makes a failure point so that, on failure to match a repetition,
- X matching restarts past as many repetitions have been found
- X with no way to fail and look for another one. */
- X
- X /* A smart repeat is similar but loops back to the on_failure_jump
- X so that each repetition makes another failure point. */
- X
- X case on_failure_jump:
- X if (stackp == stacke)
- X {
- X unsigned char **stackx;
- X if (stacke - stackb > re_max_failures)
- X return -2;
- X stackx = (unsigned char **) alloca (2 * (stacke - stackb)
- X * sizeof (char *));
- X bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
- X stackp = stackx + (stackp - stackb);
- X stacke = stackx + 2 * (stacke - stackb);
- X stackb = stackx;
- X }
- X mcnt = *p++ & 0377;
- X mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p++;
- X *stackp++ = mcnt + p;
- X *stackp++ = d;
- X break;
- X
- X /* The end of a smart repeat has an maybe_finalize_jump back.
- X Change it either to a finalize_jump or an ordinary jump. */
- X
- X case maybe_finalize_jump:
- X mcnt = *p++ & 0377;
- X mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p++;
- X /* Compare what follows with the begining of the repeat.
- X If we can establish that there is nothing that they would
- X both match, we can change to finalize_jump */
- X if (p == pend)
- X p[-3] = (unsigned char) finalize_jump;
- X else if (*p == (unsigned char) exactn
- X || *p == (unsigned char) endline)
- X {
- X register int c = *p == (unsigned char) endline ? '\n' : p[2];
- X register unsigned char *p1 = p + mcnt;
- X /* p1[0] ... p1[2] are an on_failure_jump.
- X Examine what follows that */
- X if (p1[3] == (unsigned char) exactn && p1[5] != c)
- X p[-3] = (unsigned char) finalize_jump;
- X else if (p1[3] == (unsigned char) charset
- X || p1[3] == (unsigned char) charset_not)
- X {
- X int not = p1[3] == (unsigned char) charset_not;
- X if (c < p1[4] * BYTEWIDTH
- X && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- X not = !not;
- X /* not is 1 if c would match */
- X /* That means it is not safe to finalize */
- X if (!not)
- X p[-3] = (unsigned char) finalize_jump;
- X }
- X }
- X p -= 2;
- X if (p[-1] != (unsigned char) finalize_jump)
- X {
- X p[-1] = (unsigned char) jump;
- X goto nofinalize;
- X }
- X
- X /* The end of a stupid repeat has a finalize-jump
- X back to the start, where another failure point will be made
- X which will point after all the repetitions found so far. */
- X
- X case finalize_jump:
- X stackp -= 2;
- X
- X case jump:
- X nofinalize:
- X mcnt = *p++ & 0377;
- X mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- X p += mcnt + 1; /* The 1 compensates for missing ++ above */
- X break;
- X
- X case dummy_failure_jump:
- X if (stackp == stacke)
- X {
- X unsigned char **stackx
- X = (unsigned char **) alloca (2 * (stacke - stackb)
- X * sizeof (char *));
- X bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
- X stackp = stackx + (stackp - stackb);
- X stacke = stackx + 2 * (stacke - stackb);
- X stackb = stackx;
- X }
- X *stackp++ = 0;
- X *stackp++ = 0;
- X goto nofinalize;
- X
- X case wordbound:
- X if (d == string1 /* Points to first char */
- X || d == end2 /* Points to end */
- X || (d == end1 && size2 == 0)) /* Points to end */
- X break;
- X if ((SYNTAX (d[-1]) == Sword)
- X != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
- X break;
- X goto fail;
- X
- X case notwordbound:
- X if (d == string1 /* Points to first char */
- X || d == end2 /* Points to end */
- X || (d == end1 && size2 == 0)) /* Points to end */
- X goto fail;
- X if ((SYNTAX (d[-1]) == Sword)
- X != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
- X goto fail;
- X break;
- X
- X case wordbeg:
- X if (d == end2 /* Points to end */
- X || (d == end1 && size2 == 0) /* Points to end */
- X || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
- X goto fail;
- X if (d == string1 /* Points to first char */
- X || SYNTAX (d[-1]) != Sword) /* prev char not letter */
- X break;
- X goto fail;
- X
- X case wordend:
- X if (d == string1 /* Points to first char */
- X || SYNTAX (d[-1]) != Sword) /* prev char not letter */
- X goto fail;
- X if (d == end2 /* Points to end */
- X || (d == end1 && size2 == 0) /* Points to end */
- X || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
- X break;
- X goto fail;
- X
- X#ifdef emacs
- X case before_dot:
- X if (PTR_CHAR_POS (d) + 1 >= point)
- X goto fail;
- X break;
- X
- X case at_dot:
- X if (PTR_CHAR_POS (d) + 1 != point)
- X goto fail;
- X break;
- X
- X case after_dot:
- X if (PTR_CHAR_POS (d) + 1 <= point)
- X goto fail;
- X break;
- X
- X case wordchar:
- X mcnt = (int) Sword;
- X goto matchsyntax;
- X
- X case syntaxspec:
- X mcnt = *p++;
- X matchsyntax:
- X PREFETCH;
- X if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
- X break;
- X
- X case notwordchar:
- X mcnt = (int) Sword;
- X goto matchnotsyntax;
- X
- X case notsyntaxspec:
- X mcnt = *p++;
- X matchnotsyntax:
- X PREFETCH;
- X if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
- X break;
- X#else
- X case wordchar:
- X PREFETCH;
- X if (SYNTAX (*d++) == 0) goto fail;
- X break;
- X
- X case notwordchar:
- X PREFETCH;
- X if (SYNTAX (*d++) != 0) goto fail;
- X break;
- X#endif not emacs
- X
- X case begbuf:
- X if (d == string1) /* Note, d cannot equal string2 */
- X break; /* unless string1 == string2. */
- X goto fail;
- X
- X case endbuf:
- X if (d == end2 || (d == end1 && size2 == 0))
- X break;
- X goto fail;
- X
- X case exactn:
- X /* Match the next few pattern characters exactly.
- X mcnt is how many characters to match. */
- X mcnt = *p++;
- X if (translate)
- X {
- X do
- X {
- X PREFETCH;
- X if (translate[*d++] != *p++) goto fail;
- X }
- X while (--mcnt);
- X }
- X else
- X {
- X do
- X {
- X PREFETCH;
- X if (*d++ != *p++) goto fail;
- X }
- X while (--mcnt);
- X }
- X break;
- X }
- X continue; /* Successfully matched one pattern command; keep matching */
- X
- X /* Jump here if any matching operation fails. */
- X fail:
- X if (stackp != stackb)
- X /* A restart point is known. Restart there and pop it. */
- X {
- X if (!stackp[-2])
- X { /* If innermost failure point is dormant, flush it and keep looking */
- X stackp -= 2;
- X goto fail;
- X }
- X d = *--stackp;
- X p = *--stackp;
- X if (d >= string1 && d <= end1)
- X dend = end_match_1;
- X }
- X else break; /* Matching at this starting point really fails! */
- X }
- X return -1; /* Failure to match */
- X}
- X
- Xstatic int
- Xbcmp_translate (s1, s2, len, translate)
- X unsigned char *s1, *s2;
- X register int len;
- X unsigned char *translate;
- X{
- X register unsigned char *p1 = s1, *p2 = s2;
- X while (len)
- X {
- X if (translate [*p1++] != translate [*p2++]) return 1;
- X len--;
- X }
- X return 0;
- X}
- X
- X/* Entry points compatible with bsd4.2 regex library */
- X
- X#ifndef emacs
- X
- Xstatic struct re_pattern_buffer re_comp_buf;
- X
- Xchar *
- Xre_comp (s)
- X char *s;
- X{
- X if (!s)
- X {
- X if (!re_comp_buf.buffer)
- X return "No previous regular expression";
- X return 0;
- X }
- X
- X if (!re_comp_buf.buffer)
- X {
- X if (!(re_comp_buf.buffer = (char *) malloc (200)))
- X return "Memory exhausted";
- X re_comp_buf.allocated = 200;
- X if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
- X return "Memory exhausted";
- X }
- X return re_compile_pattern (s, strlen (s), &re_comp_buf);
- X}
- X
- Xint
- Xre_exec (s)
- X char *s;
- X{
- X int len = strlen (s);
- X return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
- X}
- X
- X#endif /* emacs */
- X
- X#ifdef test
- X
- X#include <stdio.h>
- X
- X/* Indexed by a character, gives the upper case equivalent of the character */
- X
- Xstatic char upcase[0400] =
- X { 000, 001, 002, 003, 004, 005, 006, 007,
- X 010, 011, 012, 013, 014, 015, 016, 017,
- X 020, 021, 022, 023, 024, 025, 026, 027,
- X 030, 031, 032, 033, 034, 035, 036, 037,
- X 040, 041, 042, 043, 044, 045, 046, 047,
- X 050, 051, 052, 053, 054, 055, 056, 057,
- X 060, 061, 062, 063, 064, 065, 066, 067,
- X 070, 071, 072, 073, 074, 075, 076, 077,
- X 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
- X 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
- X 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
- X 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
- X 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
- X 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
- X 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
- X 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
- X 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
- X 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
- X 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
- X 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
- X 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
- X 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
- X 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
- X 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
- X 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
- X 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
- X 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
- X 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
- X 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
- X 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
- X 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
- X 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
- X };
- X
- Xmain (argc, argv)
- X int argc;
- X char **argv;
- X{
- X char pat[80];
- X struct re_pattern_buffer buf;
- X int i;
- X char c;
- X char fastmap[(1 << BYTEWIDTH)];
- X
- X /* Allow a command argument to specify the style of syntax. */
- X if (argc > 1)
- X obscure_syntax = atoi (argv[1]);
- X
- X buf.allocated = 40;
- X buf.buffer = (char *) malloc (buf.allocated);
- X buf.fastmap = fastmap;
- X buf.translate = upcase;
- X
- X while (1)
- X {
- X gets (pat);
- X
- X if (*pat)
- X {
- X re_compile_pattern (pat, strlen(pat), &buf);
- X
- X for (i = 0; i < buf.used; i++)
- X printchar (buf.buffer[i]);
- X
- X putchar ('\n');
- X
- X printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
- X
- X re_compile_fastmap (&buf);
- X printf ("Allowed by fastmap: ");
- X for (i = 0; i < (1 << BYTEWIDTH); i++)
- X if (fastmap[i]) printchar (i);
- X putchar ('\n');
- X }
- X
- X gets (pat); /* Now read the string to match against */
- X
- X i = re_match (&buf, pat, strlen (pat), 0, 0);
- X printf ("Match value %d.\n", i);
- X }
- X}
- X
- X#ifdef NOTDEF
- Xprint_buf (bufp)
- X struct re_pattern_buffer *bufp;
- X{
- X int i;
- X
- X printf ("buf is :\n----------------\n");
- X for (i = 0; i < bufp->used; i++)
- X printchar (bufp->buffer[i]);
- X
- X printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
- X
- X printf ("Allowed by fastmap: ");
- X for (i = 0; i < (1 << BYTEWIDTH); i++)
- X if (bufp->fastmap[i])
- X printchar (i);
- X printf ("\nAllowed by translate: ");
- X if (bufp->translate)
- X for (i = 0; i < (1 << BYTEWIDTH); i++)
- X if (bufp->translate[i])
- X printchar (i);
- X printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
- X printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
- X}
- X#endif
- X
- Xprintchar (c)
- X char c;
- X{
- X if (c < 041 || c >= 0177)
- X {
- X putchar ('\\');
- X putchar (((c >> 6) & 3) + '0');
- X putchar (((c >> 3) & 7) + '0');
- X putchar ((c & 7) + '0');
- X }
- X else
- X putchar (c);
- X}
- X
- Xerror (string)
- X char *string;
- X{
- X puts (string);
- X exit (1);
- X}
- X
- X#endif test
- SHAR_EOF
- len=`wc -c < regex.c`
- if test $len != 44540 ; then
- echo error: regex.c was $len bytes long, should have been 44540
- fi
- fi # end of overwriting check
- exit 0
- --
- -ed falk, sun microsystems
- sun!falk, falk@sun.com
- He who dies with the most friends, wins.
-