home *** CD-ROM | disk | FTP | other *** search
- Subject: v19i073: NN, a Usenet news reader, Part12/15
- Newsgroups: comp.sources.unix
- Sender: sources
- Approved: rsalz@uunet.UU.NET
-
- Submitted-by: storm@texas.dk (Kim F. Storm)
- Posting-number: Volume 19, Issue 73
- Archive-name: nn/part12
-
- #!/bin/sh
- # this is part 12 of a multipart archive
- # do not concatenate these parts, unpack them in order with /bin/sh
- # file pack_date.c continued
- #
- CurArch=12
- if test ! -r s2_seq_.tmp
- then echo "Please unpack part 1 first!"
- exit 1; fi
- ( read Scheck
- if test "$Scheck" != $CurArch
- then echo "Please unpack part $Scheck next!"
- exit 1;
- else exit 0; fi
- ) < s2_seq_.tmp || exit 1
- echo "x - Continuing file pack_date.c"
- sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' >> pack_date.c
- X * as long as the "ordering" is ok.
- X *
- X * The result is NOT a time_t value, i.e. ctime() will
- X * not produce the original Date string.
- X *
- X * The date must have format: [D]D Mmm YY hh:mm:ss GMT
- X */
- X
- Xpack_date(destp, date)
- Xtime_stamp *destp;
- Xchar *date;
- X{
- X time_stamp res;
- X register int min, hour, day, mon, year;
- X
- X *destp = 0;
- X if (date == NULL) return;
- X
- X if ((day = next_int(&date)) == 0) return;
- X
- X switch (*date) {
- X case 'J':
- X if (date[1] == 'a') { mon = 0; break; }
- X if (date[2] == 'n') { mon = 5; break; }
- X mon = 6; break;
- X case 'F':
- X mon = 1; break;
- X case 'M':
- X if (date[2] == 'r') { mon = 2; break; }
- X mon = 4; break;
- X case 'A':
- X if (date[1] == 'p') { mon = 3; break; }
- X mon = 7; break;
- X case 'S':
- X mon = 8; break;
- X case 'O':
- X mon = 9; break;
- X case 'N':
- X mon = 10; break;
- X case 'D':
- X mon = 11; break;
- X default:
- X return;
- X }
- X
- X date += 4;
- X
- X year = next_int(&date);
- X hour = next_int(&date);
- X min = next_int(&date);
- X
- X year -= 87; /* base is 1987 */
- X if (year < 0) year += 100;
- X
- X res = (year * 12 + mon) * 31 + day - 1;
- X res *= 24 * 60;
- X res += (hour * 60) + min;
- X
- X *destp = res;
- X}
- X
- X
- Xstatic next_int(dp)
- Xchar **dp;
- X{
- X register char *str = *dp;
- X register i;
- X
- X i = 0;
- X while (*str && isdigit(*str))
- X i = (i * 10) + *str++ - '0';
- X
- X while (*str && (isspace(*str) || *str == ':')) str++;
- X
- X *dp = str;
- X return i;
- X}
- X
- X
- X#ifdef DATE_TEST
- X
- X
- Xmain()
- X{
- X char buffer[128];
- X char *dp;
- X unsigned long t;
- X
- X while (fgets(buffer, 128, stdin)) {
- X dp = strchr(buffer, ':');
- X if (dp == NULL) continue;
- X dp++;
- X while (isspace(*dp)) dp++;
- X pack_date(&t, dp);
- X printf("%lu\t%s\n", t, dp);
- X }
- X
- X nn_exit(0);
- X}
- X
- X#endif
- NO_NEWS_IS_GOOD_NEWS
- echo "File pack_date.c is complete"
- chmod 0644 pack_date.c || echo "restore of pack_date.c fails"
- set `wc -c pack_date.c`;Sum=$1
- if test "$Sum" != "1938"
- then echo original size 1938, current size $Sum;fi
- echo "x - extracting pack_name.c (Text)"
- sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > pack_name.c &&
- X/*
- X * pack_name(packed, name, length)
- X * pack sender's name into something sensible, return in packed
- X *
- X */
- X
- X#include "config.h"
- X
- X
- X#define SEP_DOT 0 /* . */
- X#define SEP_PERCENT 1 /* % */
- X#define SEP_SCORE 2 /* _ */
- X#define SEP_AMPERSAND 3 /* @ */
- X#define SEP_BANG 4 /* ! */
- X#define SEP_MAXIMUM 5
- X
- X
- X#define CL_OK 0x0100 /* letter or digit */
- X#define CL_SPACE 0x0200 /* cvt to space */
- X#define CL_IGNORE 0x0400 /* ignore */
- X#define CL_RANGE(c) 0x0800+c /* space range, end with c */
- X#define CL_HYPHEN 0x1000 /* convert to - */
- X#define CL_STOP 0x2000 /* discard rest of name */
- X#define CL_SEP | 0x4000 + /* address separator */
- X
- X#define IS_OK(c) (Class[c] & CL_OK)
- X#define IS_SPACE(c) (Class[c] & CL_SPACE)
- X#define IGNORE(c) (c & 0x80 || Class[c] & CL_IGNORE)
- X#define BEGIN_RANGE(c) (Class[c] & CL_RANGE(0))
- X#define END_RANGE(c) (Class[c] & 0xff)
- X#define IS_HYPHEN(c) (Class[c] & CL_HYPHEN)
- X#define IS_STOP(c) (Class[c] & CL_STOP)
- X#define IS_SEPARATOR(c) (Class[c] & (0 CL_SEP 0))
- X
- Xstatic short Class[128] = {
- X /* NUL */ CL_STOP ,
- X /* SOH */ CL_IGNORE ,
- X /* STX */ CL_IGNORE ,
- X /* ETX */ CL_IGNORE ,
- X /* EOT */ CL_IGNORE ,
- X /* ENQ */ CL_IGNORE ,
- X /* ACK */ CL_IGNORE ,
- X /* BEL */ CL_IGNORE ,
- X /* BS */ CL_IGNORE ,
- X /* TAB */ CL_SPACE ,
- X /* NL */ CL_IGNORE ,
- X /* VT */ CL_IGNORE ,
- X /* FF */ CL_IGNORE ,
- X /* CR */ CL_IGNORE ,
- X /* SO */ CL_IGNORE ,
- X /* SI */ CL_IGNORE ,
- X /* DLE */ CL_IGNORE ,
- X /* DC1 */ CL_IGNORE ,
- X /* DC2 */ CL_IGNORE ,
- X /* DC3 */ CL_IGNORE ,
- X /* DC4 */ CL_IGNORE ,
- X /* NAK */ CL_IGNORE ,
- X /* SYN */ CL_IGNORE ,
- X /* ETB */ CL_IGNORE ,
- X /* CAN */ CL_IGNORE ,
- X /* EM */ CL_IGNORE ,
- X /* SUB */ CL_IGNORE ,
- X /* ESC */ CL_IGNORE ,
- X /* FS */ CL_IGNORE ,
- X /* GS */ CL_IGNORE ,
- X /* RS */ CL_IGNORE ,
- X /* US */ CL_IGNORE ,
- X
- X /* space */ CL_SPACE ,
- X /* ! */ CL_SPACE CL_SEP SEP_BANG,
- X /* " */ CL_RANGE( '"' ) ,
- X /* # */ CL_OK ,
- X /* $ */ CL_OK ,
- X /* % */ CL_OK CL_SEP SEP_PERCENT,
- X /* & */ CL_OK ,
- X /* ' */ CL_OK ,
- X /* ( */ CL_RANGE( ')' ) ,
- X /* ) */ CL_IGNORE ,
- X /* * */ CL_HYPHEN ,
- X /* + */ CL_HYPHEN ,
- X /* , */ CL_STOP ,
- X /* - */ CL_HYPHEN ,
- X /* . */ CL_SPACE CL_SEP SEP_DOT,
- X /* / */ CL_OK ,
- X /* 0 */ CL_OK ,
- X /* 1 */ CL_OK ,
- X /* 2 */ CL_OK ,
- X /* 3 */ CL_OK ,
- X /* 4 */ CL_OK ,
- X /* 5 */ CL_OK ,
- X /* 6 */ CL_OK ,
- X /* 7 */ CL_OK ,
- X /* 8 */ CL_OK ,
- X /* 9 */ CL_OK ,
- X /* : */ CL_IGNORE ,
- X /* ; */ CL_STOP ,
- X /* < */ CL_IGNORE ,
- X /* = */ CL_HYPHEN ,
- X /* > */ CL_IGNORE ,
- X /* ? */ CL_IGNORE ,
- X /* @ */ CL_OK CL_SEP SEP_AMPERSAND,
- X /* A */ CL_OK ,
- X /* B */ CL_OK ,
- X /* C */ CL_OK ,
- X /* D */ CL_OK ,
- X /* E */ CL_OK ,
- X /* F */ CL_OK ,
- X /* G */ CL_OK ,
- X /* H */ CL_OK ,
- X /* I */ CL_OK ,
- X /* J */ CL_OK ,
- X /* K */ CL_OK ,
- X /* L */ CL_OK ,
- X /* M */ CL_OK ,
- X /* N */ CL_OK ,
- X /* O */ CL_OK ,
- X /* P */ CL_OK ,
- X /* Q */ CL_OK ,
- X /* R */ CL_OK ,
- X /* S */ CL_OK ,
- X /* T */ CL_OK ,
- X /* U */ CL_OK ,
- X /* V */ CL_OK ,
- X /* W */ CL_OK ,
- X /* X */ CL_OK ,
- X /* Y */ CL_OK ,
- X /* Z */ CL_OK ,
- X /* [ */ CL_OK ,
- X /* \ */ CL_OK ,
- X /* ] */ CL_OK ,
- X /* ^ */ CL_IGNORE ,
- X /* _ */ CL_SPACE CL_SEP SEP_SCORE,
- X /* ` */ CL_IGNORE ,
- X /* a */ CL_OK ,
- X /* b */ CL_OK ,
- X /* c */ CL_OK ,
- X /* d */ CL_OK ,
- X /* e */ CL_OK ,
- X /* f */ CL_OK ,
- X /* g */ CL_OK ,
- X /* h */ CL_OK ,
- X /* i */ CL_OK ,
- X /* j */ CL_OK ,
- X /* k */ CL_OK ,
- X /* l */ CL_OK ,
- X /* m */ CL_OK ,
- X /* n */ CL_OK ,
- X /* o */ CL_OK ,
- X /* p */ CL_OK ,
- X /* q */ CL_OK ,
- X /* r */ CL_OK ,
- X /* s */ CL_OK ,
- X /* t */ CL_OK ,
- X /* u */ CL_OK ,
- X /* v */ CL_OK ,
- X /* w */ CL_OK ,
- X /* x */ CL_OK ,
- X /* y */ CL_OK ,
- X /* z */ CL_OK ,
- X /* { */ CL_OK ,
- X /* | */ CL_OK ,
- X /* } */ CL_OK ,
- X /* ~ */ CL_HYPHEN ,
- X /* DEL */ CL_IGNORE
- X} ;
- X
- X
- Xpack_name(dest, source, length)
- Xchar *dest, *source;
- Xint length;
- X{
- X register char *p, *q, *r, c;
- X register int n;
- X char namebuf[129], *name;
- X char *maxq;
- X int lname, lfirst, lmiddle, llast, sep, i;
- X int drop_space, prev_space;
- X char *separator[SEP_MAXIMUM];
- X
- X dest[0] = NUL;
- X
- X if (source == NULL || source[0] == NUL)
- X return 0;
- X
- X p = source, q = namebuf, n = 0;
- X
- Xnew_partition:
- X for (i = SEP_MAXIMUM; --i >= 0; separator[i] = NULL);
- X
- X while ( c = *p++ ) {
- X if (c == '<') {
- X while (q > namebuf && q[-1] == SP) q--;
- X if (q == namebuf) continue;
- X break;
- X }
- X if (IGNORE(c)) continue;
- X if (q == namebuf && IS_SPACE(c)) continue;
- X if (c == '(') {
- X if (*p == ')') {
- X p++;
- X continue;
- X }
- X if (n++ == 0) {
- X q = namebuf;
- X goto new_partition;
- X }
- X continue;
- X }
- X if (c == ')') {
- X if (--n == 0) break;
- X continue;
- X }
- X if (n > 1) continue;
- X *q++ = c;
- X if (IS_SEPARATOR(c)) {
- X switch (sep = (Class[c] & 0xff)) {
- X
- X case SEP_DOT:
- X if (separator[SEP_AMPERSAND] && q - namebuf <= length)
- X break;
- X continue;
- X
- X case SEP_BANG:
- X if (separator[SEP_AMPERSAND]) continue;
- X break;
- X
- X default:
- X if (separator[sep]) continue;
- X break;
- X }
- X
- X separator[sep] = q - 1;
- X }
- X }
- X
- X *q = NUL;
- X
- X if (namebuf[0] == NUL) return 0;
- X
- X name = namebuf;
- X
- X if (name[0] == '"') {
- X name++;
- X if (q[-1] == '"') *--q = NUL;
- X }
- X
- X if (q - name <= length) goto name_ok;
- X
- X /* sorry for the many goto's -- the 3B2 C compiler does not */
- X /* make correct code for complicated logical expressions!! */
- X /* not even without -O */
- X
- X /* We must pack the name to make it fit */
- X
- X /* Name_of_person%... -> Name_of_person */
- X
- X if (r = separator[SEP_PERCENT]) {
- X if (!(q = separator[SEP_SCORE]) || q > r )
- X goto no_percent;
- X if ((q = separator[SEP_AMPERSAND]) && q < r)
- X goto no_percent;
- X if ((q = separator[SEP_BANG]) && q < r)
- X goto no_percent;
- X *r = NUL;
- X goto parse_name;
- X }
- X
- X no_percent:
- X
- X /* name@site.domain -> name@site */
- X
- X if (r = separator[SEP_AMPERSAND]) {
- X
- X if ((q = separator[SEP_PERCENT]) && q < r) {
- X *r = NUL;
- X if (r - name <= length) goto name_ok;
- X
- X *q = NUL;
- X
- X if (((p = separator[SEP_BANG]) && p < q)
- X || ((p = strrchr(name, '!')) && p < q)) {
- X name = p + 1;
- X }
- X
- X if (strchr(name, '.'))
- X goto parse_name;
- X
- X goto name_ok;
- X }
- X
- X if (q = separator[SEP_DOT]) {
- X *q = NUL;
- X goto name_ok;
- X }
- X
- X *r = NUL;
- X if (r - name <= length) goto name_ok;
- X
- X if ((q = separator[SEP_BANG]) && q < r) {
- X name = q + 1;
- X goto name_ok;
- X }
- X
- X#ifdef NOTDEF
- X if (strchr(name, '!') == NULL)
- X goto parse_name; /* take the chance ... */
- X#endif
- X goto name_ok; /* can't do it any better */
- X }
- X
- X
- X /* Phase 1: Normalization (remove superfluous characters) */
- X
- X parse_name:
- X
- X for (p = name, lname = 0, prev_space = 0; c = *p; p++) {
- X
- X/*
- X if (IGNORE(c)) {
- X *p = TAB;
- X if (p == name) name++;
- X continue;
- X }
- X*/
- X
- X if (IS_OK(c)) {
- X lname++;
- X prev_space = 0;
- X continue;
- X }
- X
- X if (IS_HYPHEN(c)) {
- X if (p == name) {
- X name++;
- X continue;
- X }
- X if (prev_space)
- X *p = TAB;
- X else {
- X *p = '-';
- X lname++;
- X }
- X continue;
- X }
- X
- X if (BEGIN_RANGE(c)) {
- X
- X if (p == name) {
- X name++;
- X continue;
- X }
- X
- X c = END_RANGE(c);
- X for (q = p+1; *q && *q != c; q++);
- X if (*q) {
- X if (p[-1] != ' ') lname++;
- X while (p <= q) *p++ = ' ';
- X p--;
- X prev_space++;
- X continue;
- X }
- X c = ' ';
- X }
- X
- X if (IS_SPACE(c)) {
- X *p = ' ';
- X if (p == name)
- X name++;
- X else
- X if (!prev_space) {
- X lname++;
- X prev_space++;
- X }
- X continue;
- X }
- X
- X if (IS_STOP(c)) {
- X *p = NUL;
- X break;
- X }
- X }
- X drop_last_name:
- X while (p > name && (*--p == ' ' || *p == TAB)) *p = NUL;
- X
- X if (lname < length) goto name_ok;
- X
- X
- X /* Phase 2: Reduce middle names */
- X
- X for (r = p, llast = 0; r > name && *r != ' '; r--)
- X if (*r != TAB) llast++;
- X
- X /* r points to space before last name */
- X
- X if (strncmp(r, " Jr", 3) == 0 || strncmp(r, " II", 3) == 0) {
- X p = r+1;
- X lname -= llast;
- X goto drop_last_name;
- X }
- X
- X if (r == name) goto phase6; /* only last name */
- X
- X for (q = name, lfirst = 0; *q && *q != ' '; q++)
- X if (*q != TAB) lfirst++;
- X
- X /* q points at space after first name */
- X
- X for (p = q, lmiddle = 0; p < r; ) {
- X /* find next middle name */
- X while (p < r && (*p == ' ' || *p == TAB)) p++;
- X
- X if (p >= r) break; /* found last name */
- X
- X p++; /* skip first char of middle name */
- X for (;*p != ' '; p++) { /* remove rest */
- X if (*p == TAB) continue;
- X *p = TAB;
- X lname--;
- X }
- X lmiddle += 2; /* initial + space */
- X }
- X
- X if (lname < length) goto name_ok;
- X
- X /* If removing middle names is not enough, but reducing first name instead is, do it that way */
- X
- X if (lname - lmiddle >= length && lname - lfirst + 1 < length) goto phase4;
- X
- X
- X /* Phase 3: Remove middle names */
- X
- X for (p = q; p < r; p++) {
- X if (*p == TAB) continue;
- X if (*p == ' ') continue;
- X *p = TAB;
- X lname -= 2;
- X }
- X
- X if (lname < length) goto name_ok;
- X
- X
- X /* Phase 4: Reduce first name */
- X
- X phase4:
- X for (p = name+1; p < q; p++) {
- X if (*p == TAB) continue;
- X if (*p == ' ') continue;
- X *p = TAB;
- X lname--;
- X }
- X
- X if (lname < length) goto name_ok;
- X
- X /* Phase 5: Remove first name */
- X
- X name = r+1;
- X lname--;
- X
- X if (lname < length) goto name_ok;
- X
- X /* Phase 6: Cut last name */
- X phase6:
- X goto name_ok;
- X
- X name_ok:
- X
- X q = dest;
- X maxq = q + length;
- X
- X drop_space = 1;
- X
- X for (p = name; *p && q < maxq ; p++) {
- X if (*p == TAB) continue;
- X
- X if ( *p == ' ' ) {
- X if (!drop_space) {
- X drop_space = 1;
- X *q++ = ' ';
- X }
- X continue;
- X }
- X drop_space = 0;
- X *q++ = *p;
- X }
- X
- X *q = NUL;
- X
- X return strlen(dest);
- X}
- X
- NO_NEWS_IS_GOOD_NEWS
- chmod 0644 pack_name.c || echo "restore of pack_name.c fails"
- set `wc -c pack_name.c`;Sum=$1
- if test "$Sum" != "10093"
- then echo original size 10093, current size $Sum;fi
- echo "x - extracting pack_subject.c (Text)"
- sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > pack_subject.c &&
- X/*
- X * pack subject by eliminating RE prefixes and - (nf) suffixes
- X */
- X
- X#include "config.h"
- X
- Xpack_subject(dest, src, re_counter_ptr, max_length)
- Xregister char *dest, *src;
- Xint *re_counter_ptr, max_length;
- X{
- X int re;
- X char *start_dest;
- X register char *max_dest;
- X
- X re = 0;
- X
- X if (src) {
- X start_dest = dest;
- X max_dest = dest + max_length;
- X
- X while (*src) {
- X if (isspace(*src)) {
- X src++;
- X continue;
- X }
- X
- X /* count and remove 'Re: Re: ...' */
- X
- X if (*src != 'R' && *src != 'r') break;
- X *dest++ = *src++;
- X
- X if (*src != 'e' && *src != 'E') break;
- X *dest++ = *src++;
- X
- X if (*src == ':' || *src == ' ') {
- X src++;
- X dest = start_dest;
- X re++;
- X continue;
- X }
- X
- X if (*src != '^') break;
- X
- X src++;
- X dest = start_dest;
- X
- X while (isdigit(*src)) *dest++ = *src++;
- X if (dest == start_dest)
- X re++;
- X else {
- X *dest = NUL;
- X dest = start_dest;
- X re += atoi(dest);
- X }
- X if (*src == ':') src++;
- X }
- X
- X while (*src && dest < max_dest) {
- X if (*src == '-' && strncmp("- (nf)", src, 5) == 0) break;
- X *dest++ = *src++;
- X }
- X }
- X
- X *dest = NUL;
- X *re_counter_ptr = (char)re;
- X
- X return dest - start_dest;
- X}
- NO_NEWS_IS_GOOD_NEWS
- chmod 0644 pack_subject.c || echo "restore of pack_subject.c fails"
- set `wc -c pack_subject.c`;Sum=$1
- if test "$Sum" != "1207"
- then echo original size 1207, current size $Sum;fi
- echo "x - extracting patchlevel.h (Text)"
- sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > patchlevel.h &&
- X/*
- X * Current patch level (initial level is zero)
- X *
- X * Modification history:
- X *
- X * 1988-07-20: Beta-test release 6.0 (Denmark)
- X * 1988-11-01: Distributed release 6.1 (Europe)
- X * 1989-03-21: Distributed release 6.2beta (FTP)
- X * 1989-05-30: Distributed release 6.3 (World)
- X */
- X
- X#define PATCHLEVEL 0
- X
- NO_NEWS_IS_GOOD_NEWS
- chmod 0644 patchlevel.h || echo "restore of patchlevel.h fails"
- set `wc -c patchlevel.h`;Sum=$1
- if test "$Sum" != "305"
- then echo original size 305, current size $Sum;fi
- echo "x - extracting prefix.sh (Text)"
- sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > prefix.sh &&
- XWARNING: DON'T CHANGE THE ORDER OR CONTENTS OF THE FOLLOWING LINES
- X
- X#include "config.h"
- X#include "patchlevel.h"
- X#include "update.h"
- X
- X--------CUT PREFIX HERE--------
- X&!/bin/sh
- X
- X& Release RELEASE,VERSION,PATCHLEVEL, No. UPDATE
- X
- X& Do not edit this file directly.
- X& It is generated from the corresponding .sh file.
- X
- X
- XSPOOL=NEWS_DIRECTORY
- X
- XTMP=TMP_DIRECTORY
- X
- XLIB=LIB_DIRECTORY
- X
- XDB=DB_DIRECTORY
- X
- XINEWS=INEWS_PATH
- X
- XRECMAIL=REC_MAIL
- X
- X#ifdef APPEND_SIGNATURE
- XAPPENDSIG=true
- X#else
- XAPPENDSIG=false
- X#endif
- X
- XPG=PAGER
- X
- X#ifdef NNTP
- X#undef NNTP
- XNNTP=true
- XACTIVE=$DB/ACTIVE
- X#else
- XNNTP=false
- XACTIVE=NEWS_ACTIVE
- X#endif
- NO_NEWS_IS_GOOD_NEWS
- chmod 0644 prefix.sh || echo "restore of prefix.sh fails"
- set `wc -c prefix.sh`;Sum=$1
- if test "$Sum" != "600"
- then echo original size 600, current size $Sum;fi
- echo "x - extracting rc.c (Text)"
- sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > rc.c &&
- X/*
- X * rc management routines
- X */
- X
- X#include "config.h"
- X#include "term.h"
- X#include "debug.h"
- X
- Xexport int keep_rc_backup = 1;
- Xexport int no_update = 0;
- Xexport int use_newsrc = 0;
- X
- Xexport long unread_articles; /* estimate of unread articles */
- Xexport int unread_groups;
- X
- X
- Xstatic FILE *rc = NULL; /* rc_file descriptor */
- X
- Xstatic char RC[] = "rc";
- Xstatic char BAK[] = "rc.bak";
- Xstatic char NEWSRC[] = ".newsrc";
- X
- Xstatic int has_newsrc = 0;
- X
- X/* RC lines have the format: */
- X/* SUBSCR space LASTART space GROUPNUM space NAME */
- X
- X#define SUBSCRZ 1
- X#define SUBSCRPOS 0
- X#define SUBSCR(buf) buf[0]
- X
- X/* NOTICE THAT LASTARTZ IS HARDCODED IN A printf FORMAT STRING LATER ON */
- X
- X#define LASTARTZ 6
- X#define LASTARTPOS (SUBSCRPOS + SUBSCRZ + 1)
- X#define LASTART(buf) atol(buf + LASTARTPOS)
- X
- X#define GROUPNAMEPOS (LASTARTPOS + LASTARTZ + 1)
- X#define GROUPNAME(buf) (buf + GROUPNAMEPOS)
- X
- X#define NEW_OFFSET ((off_t)1) /* append to rc_file when written */
- X
- X/*
- X * read rc file info to group headers
- X * master file has been read in
- X */
- X
- X#define G_OLD G_NEW /* inverse use during rc reading */
- X#define G_RENUM G_DONE
- X
- X
- Xvisit_rc_file()
- X{
- X FILE *bak, *newsrc;
- X register int c;
- X register group_header *gh;
- X register char *cp;
- X char line[512];
- X off_t rcpos; /* position in rc */
- X int warn_duplicates = 0, mk_bak, rd_newsrc, bak_used;
- X time_t m_rc, m_newsrc;
- X char rc_path[FILENAME], bak_path[FILENAME];
- X
- X strcpy(rc_path, relative(nn_directory, RC));
- X strcpy(bak_path, relative(nn_directory, BAK));
- X
- X rc = NULL; /* open rc-file */
- X rewind_rc(rc_path, OPEN_READ);
- X
- X m_rc = 0;
- X if (rc != NULL) {
- X fseek(rc, (off_t)0, 2);
- X if (ftell(rc) > (off_t)0) {
- X fseek(rc, 0L, 0);
- X m_rc = m_time(rc);
- X }
- X }
- X
- X rd_newsrc = use_newsrc;
- X m_newsrc = 0;
- X mk_bak = keep_rc_backup;
- X bak_used = 0;
- X
- X if (m_rc == 0) { /* rc empty (i.e. new or corrupted) */
- X if ((bak = open_file(bak_path, OPEN_READ)) != NULL) {
- X m_rc = m_time(bak);
- X if (m_rc >= m_newsrc) {
- X printf("\nRestoring %s from %s\n", RC, BAK);
- X rewind_rc(rc_path, OPEN_CREATE | MUST_EXIST);
- X while ((c = getc(bak)) != EOF) putc(c, rc);
- X rewind_rc(rc_path, OPEN_READ | MUST_EXIST);
- X fclose(bak);
- X mk_bak = 0;
- X bak_used = 1;
- X }
- X } else {
- X rd_newsrc = 1;
- X display_help("welcome");
- X }
- X }
- X
- X if (rd_newsrc) {
- X newsrc = open_file(relative(home_directory, NEWSRC), OPEN_READ);
- X if (newsrc != NULL) {
- X m_newsrc = m_time(newsrc);
- X has_newsrc = 1;
- X }
- X }
- X
- X if (has_newsrc) {
- X if (m_rc < m_newsrc) {
- X if (bak_used) {
- X printf("\n%s is newer than %s -- use %s ? ",
- X NEWSRC, BAK, NEWSRC);
- X fl;
- X if (!yes(0)) m_newsrc = 0;
- X }
- X
- X if (m_newsrc != 0) {
- X
- X printf("\nReading from %s\n", NEWSRC);
- X
- X if (m_rc != 0) {
- X fclose(rc);
- X rc = NULL;
- X
- X unlink(bak_path);
- X if (link(rc_path, bak_path) < 0 || unlink(rc_path) < 0)
- X user_error("Cannot backup %s file\n", RC);
- X
- X mk_bak = 0;
- X }
- X
- X rewind_rc(rc_path, OPEN_CREATE | MUST_EXIST);
- X read_newsrc(newsrc);
- X rewind_rc(rc_path, OPEN_READ | MUST_EXIST);
- X }
- X }
- X
- X fclose(newsrc);
- X }
- X
- X if (no_update) mk_bak = 0;
- X
- X bak = mk_bak ? open_file(bak_path, OPEN_CREATE | MUST_EXIST) : NULL;
- X
- X for(;;) {
- X rcpos = ftell(rc);
- X
- X c = getc(rc);
- X
- X cp = line;
- X while (c != NL) {
- X if (c == EOF) goto endloop;
- X *cp++ = c;
- X c = getc(rc);
- X }
- X *cp = NUL;
- X if (bak != NULL) {
- X fputs(line, bak);
- X fputc(NL, bak);
- X }
- X
- X if (SUBSCR(line) != '+' && SUBSCR(line) != '!') {
- X /* unrecognized line */
- X continue;
- X }
- X
- X if ((gh = lookup(GROUPNAME(line))) == NULL) continue;
- X
- X if (gh->group_flag & G_OLD) {
- X printf("Duplicated entry in rc file: %s\n", gh->group_name);
- X warn_duplicates++;
- X }
- X gh->rc_offset = rcpos;
- X
- X gh->group_flag |= G_OLD;
- X if (SUBSCR(line) == '+')
- X gh->group_flag |= G_SUBSCRIPTION;
- X
- X gh->last_article = LASTART(line);
- X
- X if (gh->last_article > gh->last_l_article)
- X gh->group_flag |= G_RENUM; /* mark for use below */
- X
- X if (gh->first_l_article > gh->last_article ||
- X gh->last_article > gh->last_l_article)
- X gh->last_article = gh->first_l_article - 1;
- X
- X if (gh->last_article < 0) gh->last_article = 0;
- X }
- X
- Xendloop:
- X if (warn_duplicates) {
- X printf("You can repair this using \"nntidy\"\n");
- X any_key(0);
- X }
- X
- X rewind_rc(rc_path, OPEN_UPDATE | MUST_EXIST);
- X
- X Loop_Groups_Header(gh) {
- X if (gh->group_flag & G_OLD) {
- X if (gh->group_flag & G_RENUM) /* group is renumbered */
- X write_rc_entry(gh, 0);
- X gh->group_flag &= ~(G_NEW | G_RENUM);
- X } else {
- X gh->group_flag |= G_SUBSCRIPTION | G_NEW;
- X gh->last_article = gh->first_l_article - 1;
- X if (gh->last_article < 0) gh->last_article = 0;
- X gh->rc_offset = NEW_OFFSET;
- X }
- X gh->first_article = gh->last_article;
- X }
- X
- X if (bak != NULL) fclose(bak);
- X
- X if (no_update) {
- X fclose(rc);
- X rc = NULL;
- X } else
- X fflush(rc);
- X}
- X
- X
- Xrewind_rc(path, mode)
- Xchar *path;
- X{
- X if (rc != NULL) fclose(rc);
- X rc = open_file(path, mode);
- X}
- X
- X
- Xrestore_bak()
- X{
- X if (no_update)
- X return 1;
- X
- X if (!keep_rc_backup) {
- X msg("No %s file ('backup' is not set)", BAK);
- X return 0;
- X }
- X
- X prompt("Are you sure? ");
- X if (!yes(1)) return 0;
- X
- X fclose(rc); /* cannot use close_rc() since it would update .newsrc */
- X rc = NULL;
- X
- X if (chdir(nn_directory) < 0) goto err;
- X
- X if (unlink(RC) < 0) goto err;
- X if (link(BAK, RC) < 0) goto err;
- X if (unlink(BAK) < 0) goto err;
- X
- X return 1;
- X
- X err:
- X clrdisp();
- X printf("Restore of %s file failed\n\n", RC);
- X printf("Check state of %s and %s files\n", RC, BAK);
- X nn_exit(1);
- X /*NOTREACHED*/
- X}
- X
- Xupdate_rc(gh)
- Xregister group_header *gh;
- X{
- X add_unread(gh, -1);
- X
- X if (no_update || gh->group_flag & G_RC_UPDATED) return;
- X
- X gh->last_article = gh->last_l_article;
- X
- X#ifdef RC_TEST
- X if (Debug & RC_TEST)
- X fprintf(stderr, "upd_rc(%s) pos=%ld, artno=%ld\n",
- X gh->group_name, gh->rc_offset, gh->last_article);
- X#endif
- X
- X write_rc_entry(gh, 0);
- X
- X if (gh->group_flag & G_READ) return;
- X
- X gh->group_flag |= G_READ;
- X
- X if ((gh->group_flag & G_SUBSCRIPTION) == 0) return;
- X}
- X
- X
- Xrestore_rc(gh, count)
- Xregister group_header *gh;
- Xlong count;
- X{
- X if (no_update || (count == 0 && (gh->group_flag & G_RC_UPDATED) == 0))
- X return 0;
- X
- X if (gh->group_flag & G_READ || count > 0) {
- X add_unread(gh, -1);
- X
- X if (count > 0) {
- X gh->last_article = gh->last_l_article - count;
- X if (gh->last_article < gh->first_l_article)
- X gh->last_article = gh->first_l_article - 1;
- X gh->first_article = gh->last_article;
- X } else
- X gh->last_article = gh->first_article;
- X
- X#ifdef RC_TEST
- X if (Debug & RC_TEST)
- X fprintf(stderr, "restore_rc(%s) pos=%ld, artno=%ld\n",
- X gh->group_name, gh->rc_offset, gh->last_article);
- X#endif
- X
- X write_rc_entry(gh, 0);
- X
- X gh->group_flag &= ~(G_READ|G_RC_UPDATED);
- X
- X add_unread(gh, 1);
- X
- X return 1;
- X }
- X return 0;
- X}
- X
- X
- Xclose_rc()
- X{
- X off_t endrc;
- X
- X if (rc == NULL) return;
- X
- X if (use_newsrc) {
- X write_newsrc();
- X
- X fflush(rc);
- X fseek(rc, 0L, 2); /* touch rc file */
- X if ((endrc = ftell(rc)) == 0)
- X fprintf(rc, "#\n");
- X else {
- X fflush(rc);
- X fseek(rc, endrc - 1, 0);
- X fputc(NL, rc);
- X }
- X }
- X
- X fclose(rc);
- X rc = NULL;
- X}
- X
- X
- X
- Xcount_unread_articles(trace)
- Xint trace;
- X{
- X register group_header *gh;
- X long n;
- X
- X unread_articles = 0;
- X unread_groups = 0;
- X
- X Loop_Groups_Header(gh) {
- X gh->group_flag &= ~G_UNREAD_COUNT;
- X
- X if ((gh->group_flag & G_SUBSCRIPTION) == 0) continue;
- X
- X if (gh->last_l_article > gh->last_article) {
- X n = unread_articles;
- X add_unread(gh, 1);
- X if (trace)
- X printf("%s: %d\n", gh->group_name, unread_articles - n);
- X }
- X }
- X}
- X
- X
- Xprt_unread(format)
- Xregister char *format;
- X{
- X if (format == NULL) {
- X printf("No News (is good news)\n");
- X return;
- X }
- X
- X while (*format) {
- X if (*format != '%') {
- X putchar(*format++);
- X continue;
- X }
- X format++;
- X switch (*format++) {
- X case 'u':
- X printf("%ld unread article%s",
- X unread_articles,
- X unread_articles == 1 ? "" : "s");
- X continue;
- X case 'g':
- X printf("%d group%s",
- X unread_groups,
- X unread_groups == 1 ? "" : "s");
- X continue;
- X case 'i':
- X printf(unread_articles == 1 ? "is" : "are");
- X continue;
- X case 'U':
- X printf("%ld", unread_articles);
- X continue;
- X case 'G':
- X printf("%d", unread_groups);
- X continue;
- X }
- X }
- X}
- X
- X
- Xadd_unread(gh, mode)
- Xgroup_header *gh;
- Xint mode; /* +1 => add, -1 => subtract */
- X{
- X long art;
- X int was_unread;
- X
- X art = gh->last_l_article - gh->last_article;
- X was_unread = (gh->group_flag & G_UNREAD_COUNT);
- X
- X if (mode > 0) {
- X if (was_unread) return 0;
- X unread_articles += art;
- X unread_groups++;
- X gh->group_flag |= G_UNREAD_COUNT;
- X } else {
- X if (!was_unread) return 0;
- X unread_articles -= art;
- X unread_groups--;
- X gh->group_flag &= ~G_UNREAD_COUNT;
- X }
- X
- X return was_unread;
- X}
- X
- X
- X/*
- X * write one line on rc_file
- X */
- X
- Xwrite_rc_entry(gh, new)
- Xgroup_header *gh;
- Xint new; /* 0 => old, 1 => quick append, 2 => normal append */
- X{
- X if (gh->rc_offset == NEW_OFFSET) new = 2;
- X
- X if (new) {
- X if (new == 2) fseek(rc, (off_t)0, 2);
- X gh->rc_offset = ftell(rc);
- X } else
- X if (fseek(rc, gh->rc_offset, 0) < 0)
- X user_error("Seek error on %s file", RC);
- X
- X /*
- X * the 'last article' is not updated in the rc file
- X * when a group is unsubscribed; if it is later resubscribed,
- X * the present articles will still be unread (if they exist)
- X */
- X
- X /* update article number */
- X
- X fprintf(rc, "%c %06ld", /* MUST CHANGE IF LASTARTZ CHANGES */
- X (gh->group_flag & G_SUBSCRIPTION) ? '+' : '!',
- X (long)(gh->last_article));
- X
- X if (new) {
- X fputc(' ', rc);
- X fputs(gh->group_name, rc);
- X fputc(NL, rc);
- X }
- X
- X fflush(rc);
- X}
- X
- X
- X/*
- X * Old-style .newsrc support
- X */
- X
- Xstatic read_newsrc(newsrc)
- XFILE *newsrc;
- X{
- X copy_newsrc(newsrc, (FILE *)NULL);
- X}
- X
- Xstatic write_newsrc()
- X{
- X char newsrc_path[FILENAME], bak_path[FILENAME];
- X FILE *newsrc, *bak;
- X
- X strcpy(newsrc_path, relative(home_directory, NEWSRC));
- X sprintf(bak_path, "%s.bak", newsrc_path);
- X
- X if (has_newsrc) {
- X unlink(bak_path);
- X if (link(newsrc_path, bak_path) < 0 || unlink(newsrc_path) < 0)
- X user_error("Cannot backup %s file\n", newsrc_path);
- X
- X bak = open_file(bak_path, OPEN_READ | MUST_EXIST);
- X } else
- X bak = NULL;
- X
- X if (file_exist(newsrc_path, (char *)NULL)) {
- X /* This is real paranoia ... don't let people lose their .newsrc */
- X /* This should not happen - but it has been seen */
- X log_entry('E', "failed to backup %s", newsrc_path);
- X fprintf(stderr, "PROBLEM... YOUR %s WAS NOT UPDATED\n", NEWSRC);
- X if (bak != NULL) fclose(bak);
- X return;
- X }
- X
- X newsrc = open_file(newsrc_path, OPEN_CREATE | MUST_EXIST);
- X copy_newsrc(bak, newsrc);
- X if (bak != NULL) fclose(bak);
- X fclose(newsrc);
- X}
- X
- Xstatic copy_newsrc(old_rc, new_rc)
- XFILE *old_rc, *new_rc;
- X{
- X char buf[2048];
- X char *sub, *last, subscr;
- X long atol();
- X register group_header *gh;
- X
- X Loop_Groups_Header(gh)
- X gh->group_flag &= ~G_DONE;
- X
- X if (old_rc != NULL) {
- X /* NEWSRC lines have the following format */
- X /* NAME(n)SUBSCR(1) space NUM[,NUM][-NUM]... */
- X
- X while (fgets(buf, 2048, old_rc) != NULL) {
- X subscr = 0;
- X if (sub = strchr(buf, ':'))
- X subscr = 1;
- X else
- X sub = strchr(buf, '!');
- X
- X if (sub == NULL) {
- X if (new_rc != NULL) goto output_unchanged;
- X continue;
- X }
- X
- X *sub = NUL;
- X gh = lookup(buf);
- X *sub++ = subscr ? ':' : '!';
- X
- X if (gh == NULL) {
- X if (new_rc != NULL) goto output_unchanged;
- X continue;
- X }
- X
- X if (new_rc != NULL) {
- X if (gh->group_flag & G_DONE) continue;
- X gh->group_flag |= G_DONE;
- X if (!subscr) goto output_unchanged;
- X write_newsrc_entry(new_rc, gh, (*sub == NL) ? 1 : 0);
- X continue;
- X }
- X
- X /* Notice: unread articles before the last read article are lost */
- X
- X if (*sub == NL) /* new group */
- X continue;
- X
- X if (subscr) {
- X last = strrchr(sub, '-');
- X if (last == NULL) last = strrchr(sub, ',');
- X if (last == NULL) last = strrchr(sub, ' ');
- X if (last == NULL) last = "0"; else last++;
- X
- X gh->last_article = atol(last);
- X gh->group_flag |= G_SUBSCRIPTION;
- X } else
- X gh->last_article = 0;
- X
- X gh->rc_offset = NEW_OFFSET;
- X
- X write_rc_entry(gh, 0);
- X continue;
- X
- X output_unchanged:
- X fputs(buf, new_rc);
- X }
- X }
- X
- X Loop_Groups_Header(gh) {
- X if (new_rc != NULL) {
- X if (gh->group_flag & G_DONE) continue;
- X write_newsrc_entry(new_rc, gh, -1);
- X } else {
- X gh->rc_offset = 0;
- X gh->last_article = 0;
- X gh->group_flag &= G_MASTER_FLAGS;
- X }
- X }
- X
- X return 1;
- X}
- X
- Xwrite_newsrc_entry(newsrc, gh, also_new)
- XFILE *newsrc;
- Xregister group_header *gh;
- Xint also_new;
- X{
- X if ((gh->group_flag & G_READ) == 0 && (gh->group_flag & G_NEW)) {
- X if (also_new < 0) return;
- X } else
- X also_new = 0;
- X
- X fprintf(newsrc, "%s%c", gh->group_name,
- X (gh->group_flag & G_SUBSCRIPTION) ? ':' : '!');
- X
- X if (also_new) {
- X fputc(NL, newsrc);
- X return;
- X }
- X
- X if (gh->first_l_article > gh->last_article)
- X fprintf(newsrc, " %s%d\n",
- X gh->first_l_article > 2 ? "1-" : "",
- X gh->first_l_article - 1);
- X else
- X fprintf(newsrc, " %d-%d\n", gh->first_l_article, gh->last_article);
- X}
- NO_NEWS_IS_GOOD_NEWS
- chmod 0644 rc.c || echo "restore of rc.c fails"
- set `wc -c rc.c`;Sum=$1
- if test "$Sum" != "13314"
- then echo original size 13314, current size $Sum;fi
- echo "x - extracting regexp.c (Text)"
- sed 's/^X//' << 'NO_NEWS_IS_GOOD_NEWS' > regexp.c &&
- X/*
- X * regexp.c - regular expression matching
- X *
- X * NOTICE: THIS CODE HAS BEEN MODIFIED TO FIT THE NN ENVIRONMENT.
- X *
- X * DESCRIPTION
- X *
- X * This source was taken from the pax posting in comp.sources.unix.
- X *
- X * Underneath the reformatting and comment blocks which were added to
- X * make it consistent with the rest of the code, you will find a
- X * modified version of Henry Specer's regular expression library.
- X * Henry's functions were modified to provide the minimal regular
- X * expression matching, as required by P1003. Henry's code was
- X * copyrighted, and copy of the copyright message and restrictions
- X * are provided, verbatim, below:
- X *
- X * Copyright (c) 1986 by University of Toronto.
- X * Written by Henry Spencer. Not derived from licensed software.
- X *
- X * Permission is granted to anyone to use this software for any
- X * purpose on any computer system, and to redistribute it freely,
- X * subject to the following restrictions:
- X *
- X * 1. The author is not responsible for the consequences of use of
- X * this software, no matter how awful, even if they arise
- X * from defects in it.
- X *
- X * 2. The origin of this software must not be misrepresented, either
- X * by explicit claim or by omission.
- X *
- X * 3. Altered versions must be plainly marked as such, and must not
- X * be misrepresented as being the original software.
- X *
- X * Beware that some of this code is subtly aware of the way operator
- X * precedence is structured in regular expressions. Serious changes in
- X * regular-expression syntax might require a total rethink.
- X *
- X * AUTHORS
- X *
- X * Mark H. Colburn, NAPS International (mark@jhereg.mn.org)
- X * Henry Spencer, University of Torronto (henry@utzoo.edu)
- X *
- X * Sponsored by The USENIX Association for public distribution.
- X *
- X * $Log: regexp.c,v $
- X * Revision 1.1 88/12/23 18:02:32 mark
- X * Initial revision
- X *
- X */
- X
- X#define NN
- X
- X/* Headers */
- X
- X#ifdef NN
- X#include "config.h"
- X#include "regexp.h"
- X#else
- X#include "pax.h"
- X
- X#ifndef lint
- Xstatic char *Ident = "$Id: regexp.c,v 1.1 88/12/23 18:02:32 mark Rel $";
- X#endif
- X#endif
- X
- X/*
- X * The "internal use only" fields in regexp.h are present to pass info from
- X * compile to execute that permits the execute phase to run lots faster on
- X * simple cases. They are:
- X *
- X * regstart char that must begin a match; '\0' if none obvious
- X * reganch is the match anchored (at beginning-of-line only)?
- X * regmust string (pointer into program) that match must include, or NULL
- X * regmlen length of regmust string
- X *
- X * Regstart and reganch permit very fast decisions on suitable starting points
- X * for a match, cutting down the work a lot. Regmust permits fast rejection
- X * of lines that cannot possibly match. The regmust tests are costly enough
- X * that regcomp() supplies a regmust only if the r.e. contains something
- X * potentially expensive (at present, the only such thing detected is * or +
- X * at the start of the r.e., which can involve a lot of backup). Regmlen is
- X * supplied because the test in regexec() needs it and regcomp() is computing
- X * it anyway.
- X */
- X
- X/*
- X * Structure for regexp "program". This is essentially a linear encoding
- X * of a nondeterministic finite-state machine (aka syntax charts or
- X * "railroad normal form" in parsing technology). Each node is an opcode
- X * plus a "nxt" pointer, possibly plus an operand. "Nxt" pointers of
- X * all nodes except BRANCH implement concatenation; a "nxt" pointer with
- X * a BRANCH on both ends of it is connecting two alternatives. (Here we
- X * have one of the subtle syntax dependencies: an individual BRANCH (as
- X * opposed to a collection of them) is never concatenated with anything
- X * because of operator precedence.) The operand of some types of node is
- X * a literal string; for others, it is a node leading into a sub-FSM. In
- X * particular, the operand of a BRANCH node is the first node of the branch.
- X * (NB this is *not* a tree structure: the tail of the branch connects
- X * to the thing following the set of BRANCHes.) The opcodes are:
- X */
- X
- X/* definition number opnd? meaning */
- X#define END 0 /* no End of program. */
- X#define BOL 1 /* no Match "" at beginning of line. */
- X#define EOL 2 /* no Match "" at end of line. */
- X#define ANY 3 /* no Match any one character. */
- X#define ANYOF 4 /* str Match any character in this string. */
- X#define ANYBUT 5 /* str Match any character not in this
- X * string. */
- X#define BRANCH 6 /* node Match this alternative, or the
- X * nxt... */
- X#define BACK 7 /* no Match "", "nxt" ptr points backward. */
- X#define EXACTLY 8 /* str Match this string. */
- X#define NOTHING 9 /* no Match empty string. */
- X#define STAR 10 /* node Match this (simple) thing 0 or more
- X * times. */
- X#define OPEN 20 /* no Mark this point in input as start of
- X * #n. */
- X /* OPEN+1 is number 1, etc. */
- X#define CLOSE 30 /* no Analogous to OPEN. */
- X
- X/*
- X * Opcode notes:
- X *
- X * BRANCH The set of branches constituting a single choice are hooked
- X * together with their "nxt" pointers, since precedence prevents
- X * anything being concatenated to any individual branch. The
- X * "nxt" pointer of the last BRANCH in a choice points to the
- X * thing following the whole choice. This is also where the
- X * final "nxt" pointer of each individual branch points; each
- X * branch starts with the operand node of a BRANCH node.
- X *
- X * BACK Normal "nxt" pointers all implicitly point forward; BACK
- X * exists to make loop structures possible.
- X *
- X * STAR complex '*', are implemented as circular BRANCH structures
- X * using BACK. Simple cases (one character per match) are
- X * implemented with STAR for speed and to minimize recursive
- X * plunges.
- X *
- X * OPEN,CLOSE ...are numbered at compile time.
- X */
- X
- X/*
- X * A node is one char of opcode followed by two chars of "nxt" pointer.
- X * "Nxt" pointers are stored as two 8-bit pieces, high order first. The
- X * value is a positive offset from the opcode of the node containing it.
- X * An operand, if any, simply follows the node. (Note that much of the
- X * code generation knows about this implicit relationship.)
- X *
- X * Using two bytes for the "nxt" pointer is vast overkill for most things,
- X * but allows patterns to get big without disasters.
- X */
- X#define OP(p) (*(p))
- X#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
- X#define OPERAND(p) ((p) + 3)
- X
- X/*
- X * Utility definitions.
- X */
- X
- X#define FAIL(m) { regerror(m); return(NULL); }
- X#define ISMULT(c) ((c) == '*')
- X#define META "^$.[()|*\\"
- X#ifndef CHARBITS
- X#define UCHARAT(p) ((int)*(unsigned char *)(p))
- X#else
- X#define UCHARAT(p) ((int)*(p)&CHARBITS)
- X#endif
- X
- X/*
- X * Flags to be passed up and down.
- X */
- X#define HASWIDTH 01 /* Known never to match null string. */
- X#define SIMPLE 02 /* Simple enough to be STAR operand. */
- X#define SPSTART 04 /* Starts with * */
- X#define WORST 0 /* Worst case. */
- X
- X/*
- X * Global work variables for regcomp().
- X */
- Xstatic char *regparse; /* Input-scan pointer. */
- Xstatic int regnpar; /* () count. */
- Xstatic char regdummy;
- Xstatic char *regcode; /* Code-emit pointer; ®dummy = don't. */
- Xstatic long regsize; /* Code size. */
- X
- X/*
- X * Forward declarations for regcomp()'s friends.
- X */
- X#ifndef STATIC
- X#define STATIC static
- X#endif
- XSTATIC char *reg();
- XSTATIC char *regbranch();
- XSTATIC char *regpiece();
- XSTATIC char *regatom();
- XSTATIC char *regnode();
- XSTATIC char *regnext();
- XSTATIC void regc();
- XSTATIC void reginsert();
- XSTATIC void regtail();
- XSTATIC void regoptail();
- X#ifdef STRCSPN
- XSTATIC int strcspn();
- X#endif
- X
- X/*
- X - regcomp - compile a regular expression into internal code
- X *
- X * We can't allocate space until we know how big the compiled form will be,
- X * but we can't compile it (and thus know how big it is) until we've got a
- X * place to put the code. So we cheat: we compile it twice, once with code
- X * generation turned off and size counting turned on, and once "for real".
- X * This also means that we don't allocate space until we are sure that the
- X * thing really will compile successfully, and we never have to move the
- X * code and thus invalidate pointers into it. (Note that it has to be in
- X * one piece because free() must be able to free it all.)
- X *
- X * Beware that the optimization-preparation code in here knows about some
- X * of the structure of the compiled regexp.
- X */
- Xregexp *regcomp(exp)
- Xchar *exp;
- X{
- X register regexp *r;
- X register char *scan;
- X register char *longest;
- X register int len;
- X int flags;
- X extern char *malloc();
- X
- X if (exp == NULL)
- X FAIL("NULL argument");
- X
- X /* First pass: determine size, legality. */
- X regparse = exp;
- X regnpar = 1;
- X regsize = 0L;
- X regcode = ®dummy;
- X regc(MAGIC);
- X if (reg(0, &flags) == NULL)
- X return (NULL);
- X
- X /* Small enough for pointer-storage convention? */
- X if (regsize >= 32767L) /* Probably could be 65535L. */
- X FAIL("regexp too big");
- X
- X /* Allocate space. */
- X r = (regexp *) malloc(sizeof(regexp) + (unsigned) regsize);
- X if (r == NULL)
- X FAIL("out of space");
- X
- X /* Second pass: emit code. */
- X regparse = exp;
- X regnpar = 1;
- X regcode = r->program;
- X regc(MAGIC);
- X if (reg(0, &flags) == NULL)
- X return (NULL);
- X
- X /* Dig out information for optimizations. */
- X r->regstart = '\0'; /* Worst-case defaults. */
- X r->reganch = 0;
- X r->regmust = NULL;
- X r->regmlen = 0;
- X scan = r->program + 1; /* First BRANCH. */
- X if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
- X scan = OPERAND(scan);
- X
- X /* Starting-point info. */
- X if (OP(scan) == EXACTLY)
- X r->regstart = *OPERAND(scan);
- X else if (OP(scan) == BOL)
- X r->reganch++;
- X
- X /*
- X * If there's something expensive in the r.e., find the longest
- X * literal string that must appear and make it the regmust. Resolve
- X * ties in favor of later strings, since the regstart check works
- X * with the beginning of the r.e. and avoiding duplication
- X * strengthens checking. Not a strong reason, but sufficient in the
- X * absence of others.
- X */
- X if (flags & SPSTART) {
- X longest = NULL;
- X len = 0;
- X for (; scan != NULL; scan = regnext(scan))
- X if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
- X longest = OPERAND(scan);
- X len = strlen(OPERAND(scan));
- X }
- X r->regmust = longest;
- X r->regmlen = len;
- X }
- X }
- X return (r);
- X}
- X
- X/*
- X - reg - regular expression, i.e. main body or parenthesized thing
- X *
- X * Caller must absorb opening parenthesis.
- X *
- X * Combining parenthesis handling with the base level of regular expression
- X * is a trifle forced, but the need to tie the tails of the branches to what
- X * follows makes it hard to avoid.
- X */
- Xstatic char *reg(paren, flagp)
- Xint paren; /* Parenthesized? */
- Xint *flagp;
- X{
- X register char *ret;
- X register char *br;
- X register char *ender;
- X register int parno;
- X int flags;
- X
- X *flagp = HASWIDTH; /* Tentatively. */
- X
- X /* Make an OPEN node, if parenthesized. */
- X if (paren) {
- X if (regnpar >= NSUBEXP)
- X FAIL("too many ()");
- X parno = regnpar;
- X regnpar++;
- X ret = regnode(OPEN + parno);
- X } else
- X ret = NULL;
- X
- X /* Pick up the branches, linking them together. */
- X br = regbranch(&flags);
- X if (br == NULL)
- X return (NULL);
- X if (ret != NULL)
- X regtail(ret, br); /* OPEN -> first. */
- X else
- X ret = br;
- X if (!(flags & HASWIDTH))
- X *flagp &= ~HASWIDTH;
- X *flagp |= flags & SPSTART;
- X while (*regparse == '|') {
- X regparse++;
- X br = regbranch(&flags);
- X if (br == NULL)
- X return (NULL);
- X regtail(ret, br); /* BRANCH -> BRANCH. */
- X if (!(flags & HASWIDTH))
- X *flagp &= ~HASWIDTH;
- X *flagp |= flags & SPSTART;
- X }
- X
- X /* Make a closing node, and hook it on the end. */
- X ender = regnode((paren) ? CLOSE + parno : END);
- X regtail(ret, ender);
- X
- X /* Hook the tails of the branches to the closing node. */
- X for (br = ret; br != NULL; br = regnext(br))
- X regoptail(br, ender);
- X
- X /* Check for proper termination. */
- X if (paren && *regparse++ != ')') {
- X FAIL("unmatched ()");
- X } else if (!paren && *regparse != '\0') {
- X if (*regparse == ')') {
- X FAIL("unmatched ()");
- X } else
- X FAIL("junk on end");/* "Can't happen". */
- X /* NOTREACHED */
- X }
- X return (ret);
- X}
- X
- X/*
- X - regbranch - one alternative of an | operator
- X *
- X * Implements the concatenation operator.
- X */
- Xstatic char *regbranch(flagp)
- Xint *flagp;
- X{
- X register char *ret;
- X register char *chain;
- X register char *latest;
- X int flags;
- X
- X *flagp = WORST; /* Tentatively. */
- X
- X ret = regnode(BRANCH);
- X chain = NULL;
- X while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
- X latest = regpiece(&flags);
- X if (latest == NULL)
- X return (NULL);
- X *flagp |= flags & HASWIDTH;
- X if (chain == NULL) /* First piece. */
- X *flagp |= flags & SPSTART;
- X else
- X regtail(chain, latest);
- X chain = latest;
- X }
- X if (chain == NULL) /* Loop ran zero times. */
- X regnode(NOTHING);
- X
- X return (ret);
- X}
- X
- X/*
- X - regpiece - something followed by possible [*]
- X *
- X * Note that the branching code sequence used for * is somewhat optimized:
- X * they use the same NOTHING node as both the endmarker for their branch
- X * list and the body of the last branch. It might seem that this node could
- X * be dispensed with entirely, but the endmarker role is not redundant.
- X */
- Xstatic char *regpiece(flagp)
- Xint *flagp;
- X{
- X register char *ret;
- X register char op;
- X register char *nxt;
- X int flags;
- X
- X ret = regatom(&flags);
- X if (ret == NULL)
- X return (NULL);
- X
- X op = *regparse;
- X if (!ISMULT(op)) {
- X *flagp = flags;
- X return (ret);
- X }
- X if (!(flags & HASWIDTH))
- X FAIL("* operand could be empty");
- X *flagp = (WORST | SPSTART);
- X
- X if (op == '*' && (flags & SIMPLE))
- X reginsert(STAR, ret);
- X else if (op == '*') {
- X /* Emit x* as (x&|), where & means "self". */
- X reginsert(BRANCH, ret); /* Either x */
- X regoptail(ret, regnode(BACK)); /* and loop */
- X regoptail(ret, ret); /* back */
- X regtail(ret, regnode(BRANCH)); /* or */
- X regtail(ret, regnode(NOTHING)); /* null. */
- X }
- X regparse++;
- X if (ISMULT(*regparse))
- X FAIL("nested *");
- X
- X return (ret);
- X}
- X
- X/*
- X - regatom - the lowest level
- X *
- X * Optimization: gobbles an entire sequence of ordinary characters so that
- X * it can turn them into a single node, which is smaller to store and
- X * faster to run. Backslashed characters are exceptions, each becoming a
- X * separate node; the code is simpler that way and it's not worth fixing.
- X */
- Xstatic char *regatom(flagp)
- Xint *flagp;
- X{
- X register char *ret;
- X int flags;
- X
- X *flagp = WORST; /* Tentatively. */
- X
- X switch (*regparse++) {
- X case '^':
- X ret = regnode(BOL);
- X break;
- X case '$':
- X ret = regnode(EOL);
- X break;
- X case '.':
- X ret = regnode(ANY);
- X *flagp |= HASWIDTH | SIMPLE;
- X break;
- X case '[':{
- X register int class;
- X register int classend;
- X
- X if (*regparse == '^') { /* Complement of range. */
- X ret = regnode(ANYBUT);
- X regparse++;
- X } else
- X ret = regnode(ANYOF);
- X if (*regparse == ']' || *regparse == '-')
- X regc(*regparse++);
- X while (*regparse != '\0' && *regparse != ']') {
- X if (*regparse == '-') {
- X regparse++;
- X if (*regparse == ']' || *regparse == '\0')
- X regc('-');
- X else {
- X class = UCHARAT(regparse - 2) + 1;
- X classend = UCHARAT(regparse);
- X if (class > classend + 1)
- X FAIL("invalid [] range");
- X for (; class <= classend; class++)
- X regc(class);
- X regparse++;
- X }
- X } else
- X regc(*regparse++);
- X }
- X regc('\0');
- X if (*regparse != ']')
- X FAIL("unmatched []");
- X regparse++;
- X *flagp |= HASWIDTH | SIMPLE;
- X }
- X break;
- X case '(':
- X ret = reg(1, &flags);
- X if (ret == NULL)
- X return (NULL);
- X *flagp |= flags & (HASWIDTH | SPSTART);
- X break;
- X case '\0':
- X case '|':
- X case ')':
- X FAIL("internal urp"); /* Supposed to be caught earlier. */
- X break;
- X case '*':
- X FAIL("* follows nothing");
- X break;
- X case '\\':
- X if (*regparse == '\0')
- X FAIL("trailing \\");
- X ret = regnode(EXACTLY);
- X regc(*regparse++);
- X regc('\0');
- X *flagp |= HASWIDTH | SIMPLE;
- X break;
- X default:{
- X register int len;
- X register char ender;
- X
- X regparse--;
- X len = strcspn(regparse, META);
- X if (len <= 0)
- X FAIL("internal disaster");
- X ender = *(regparse + len);
- X if (len > 1 && ISMULT(ender))
- X len--; /* Back off clear of * operand. */
- X *flagp |= HASWIDTH;
- X if (len == 1)
- X *flagp |= SIMPLE;
- X ret = regnode(EXACTLY);
- X while (len > 0) {
- X regc(*regparse++);
- X len--;
- X }
- X regc('\0');
- X }
- X break;
- X }
- X
- X return (ret);
- X}
- X
- X/*
- X - regnode - emit a node
- X */
- Xstatic char *regnode(op)
- Xchar op;
- X{
- X register char *ret;
- X register char *ptr;
- X
- X ret = regcode;
- X if (ret == ®dummy) {
- X regsize += 3;
- X return (ret);
- X }
- X ptr = ret;
- X *ptr++ = op;
- X *ptr++ = '\0'; /* Null "nxt" pointer. */
- X *ptr++ = '\0';
- X regcode = ptr;
- X
- X return (ret);
- X}
- X
- X/*
- X - regc - emit (if appropriate) a byte of code
- X */
- Xstatic void regc(b)
- Xchar b;
- X{
- X if (regcode != ®dummy)
- X *regcode++ = b;
- X else
- X regsize++;
- X}
- X
- X/*
- X - reginsert - insert an operator in front of already-emitted operand
- X *
- X * Means relocating the operand.
- X */
- Xstatic void reginsert(op, opnd)
- Xchar op;
- Xchar *opnd;
- X{
- X register char *src;
- X register char *dst;
- X register char *place;
- X
- X if (regcode == ®dummy) {
- X regsize += 3;
- X return;
- X }
- X src = regcode;
- X regcode += 3;
- X dst = regcode;
- X while (src > opnd)
- X *--dst = *--src;
- X
- X place = opnd; /* Op node, where operand used to be. */
- X *place++ = op;
- X *place++ = '\0';
- X *place++ = '\0';
- X}
- X
- X/*
- X - regtail - set the next-pointer at the end of a node chain
- X */
- Xstatic void regtail(p, val)
- Xchar *p;
- Xchar *val;
- X{
- X register char *scan;
- X register char *temp;
- X register int offset;
- X
- X if (p == ®dummy)
- X return;
- X
- X /* Find last node. */
- X scan = p;
- X for (;;) {
- X temp = regnext(scan);
- X if (temp == NULL)
- X break;
- X scan = temp;
- X }
- X
- X if (OP(scan) == BACK)
- X offset = scan - val;
- X else
- X offset = val - scan;
- X *(scan + 1) = (offset >> 8) & 0377;
- X *(scan + 2) = offset & 0377;
- X}
- X
- X/*
- X - regoptail - regtail on operand of first argument; nop if operandless
- NO_NEWS_IS_GOOD_NEWS
- echo "End of part 12"
- echo "File regexp.c is continued in part 13"
- echo "13" > s2_seq_.tmp
- exit 0
- ---
- Kim F. Storm storm@texas.dk Tel +45 429 174 00
- Texas Instruments, Marielundvej 46E, DK-2730 Herlev, Denmark
- No news is good news, but nn is better!
-
-