home *** CD-ROM | disk | FTP | other *** search
- /******************************************************************************
-
- NAME
- vh.c --- retrieval primitives for vh-format text
-
- SYNOPSIS
- vh [-icm] [textfile] [indexfile]
-
- DESCRIPTION
- Contains display-independent primitives for implementing a
- simple hypertext browser.
-
- AUTHORS
- Adapted by Eric S. Raymond <eric@snark.thyrsus.com> from the 1.1 version
- of Raymond Gardner's MS-DOS browser, October 1991.
- Please see the READ.ME in this directory for license terms.
-
- PORTING NOTES
- Some effort has been made to make this code independent of whether the
- underlying OS uses \n or \r\n as a line terminator. If CRLFSIZE is defined
- to 1, \n only is assumed; if it is 2, \r\n is assumed.
-
- BUGS
- Multi-file database support is only half implemented.
-
- **************************************************************************/
- /*LINTLIBRARY*/
- #include <stdio.h>
- #include <string.h>
- #include <ctype.h>
- #include <assert.h>
- #include <time.h>
-
- typedef int bool;
- #define FALSE 0
- #define TRUE 1
-
- #include "vh.h"
-
- #ifndef F_OK
- #define F_OK 0
- #endif /* F_OK */
-
- #define VHPATHSIZ 128
-
- #define TAB 0x09 /* tab */
- #define BS 0x08 /* backspace */
- #define CR 0x0D /* carriage return */
- #define LF 0x0A /* line feed */
- #define SP 0x20 /* space */
- #define DEL 0x7f /* delete */
-
- #define max(a,b) ((a) > (b) ? (a) : (b))
-
- #ifdef ATT
- #define srand(n) srand48(n)
- #define rand() lrand48()
- extern long lrand48();
- extern void srand48();
- /*
- * Try this if ungetch() fails to resolve.
- *
- * #define ungetch ungetc
- */
- #endif /* isxdigit */
-
- extern char *malloc();
- extern long atol();
-
- extern int LINES, COLS; /* initialize screen size into these */
-
- #ifdef UNIX
- #define CRLFSIZE 1 /* \n only */
- #endif /* UNIX */
-
- #ifndef CRLFSIZE
- #define CRLFSIZE 2 /* \r\n as in MS-DOS, etc. */
- #endif /* CRLFSIZE */
-
- #define WARNSIZE 75 /* warn of lines longer than this */
- #define ENTRYMARK ':'
- #define SEPARATOR (' ' - 2)
- #define STRSZ 80 /* for various strings */
-
- #ifndef VHPATH
- #ifdef MSDOS
- #define VHPATH ".;/usr/lib/vh"
- #else
- #ifndef AMIGA
- #define VHPATH ".:/usr/lib/vh"
- #else
- #define VHPATH "s:;jargon:"
- #endif
- #endif /* MSDOS */
- #endif /* VHPATH */
- #define TXT ".txt"
- #define IDX ".idx"
- #define TXTLEN 4
-
- /*
- * This was carefully tuned using prof(1) under UNIX SVr3. Unless you have
- * profiling tools at least as good, best not mess with it!
- */
- #define DBBUF 4096 /* optimal buffer size */
-
- #ifdef MSDOS
- #ifdef DBBUF
- #undef DBBUF
- #endif
- #define DBBUF 512 /* better size for MSDOS/Borland C -- rdg */
- #endif
-
- #define IS_TAG(s, x) (s[x] == LTAG || (x == 0 && s[0] == ENTRYMARK))
-
- /*******************************************************************
- *
- * All message texts declared here for internationalization purposes
- *
- ******************************************************************/
-
- #define FORMTYPE "vh: format type is %d, "
- #define OUTASYNC "vh: index is out of sync with text.\n"
- #define BADMAGIC "vh: index file %s doesn't look like an index\n"
- #define CANTFIND "vh: can't find database files for %s\n"
- #define CANTOPEN "vh: can't open %s\n"
- #define NOMEMORY "vh: out of memory.\n"
- #define TOOMANY "vh: too many index items.\n"
- #define DANGLING "\"%s%s\", line %ld: {%s} dangling reference\n"
- #define OUTERROR "vh: error writing output -- disk full?\n"
- #define LENGTHZERO "vh: line length 0 -- ignored.\n"
- #define TOOLONG "vh: line too long\n"
- #define CONTROLZ "vh: last line is Control-Z(s) -- ignored\n"
- #define NOCRLF "vh: last line not terminated -- CR/LF added\n"
- #define NOINPUT "vh: can't open input file %s\n"
- #define NOOUTPUT "vh: can't open output file %s\n"
- #define FILLING "filling index\n"
- #define LONGLINE "\"%s%s\", line %ld: line too long (%d)\n"
- #define REREADING "vh: rereading, "
- #define SORTING "sorting, "
- #define WRITING "writing, "
- #define BADREFS "checking for bad references...\n"
- #define DONE "done.\n"
-
- /*******************************************************************
- *
- * Global data
- *
- ******************************************************************/
-
- static FILEINFO textblk;
-
- FILEINFO vhi, *vht = &textblk;
-
- static char *xp[ITEMSMAX]; /* index pointer table */
- static int nitems; /* xp item count */
-
- #if defined(__TURBOC__) && defined(FFGETS)
- /*******************************************************************
- *
- * ffgets() -- faster replacement for fgets() (very Borland-specific!)
- * depends on Borland's FILE structure & usage
- * similar method may be useful on other systems
- *
- * Note that implementing this doesn't require having any library
- * source. All you need to know is in the getc() macro in stdio.h.
- *
- ******************************************************************/
- char *ffgets(s, n, f)
- char *s;
- int n;
- FILE *f;
- {
- char *s0;
- int c;
-
- s0 = s;
- --n;
- while ( n )
- {
- register int k;
- register char *p;
-
- k = f->level;
- if ( k > n )
- k = n;
- if ( k == 0 )
- {
- c = fgetc(f);
- if ( c == EOF )
- {
- *s = '\0';
- return NULL;
- }
- *s++ = c;
- --n;
- if ( c == '\n' )
- break;
- }
- else
- {
- p = f->curp;
- while ( *p++ != '\n' && --k )
- ;
- k = p - f->curp;
- memmove(s, f->curp, k);
- f->level -= k;
- f->curp += k;
- n -= k;
- s += k;
- if ( s[-1] == '\n' )
- break;
- }
- }
- *s = '\0';
- return s0;
- }
- #if 00
- /* another approach ... */
- char *ffgets(s, n, f)
- char *s;
- int n;
- FILE *f;
- {
- char *s0, *p;
- int k, c;
-
- s0 = s;
- n--;
- while ( n )
- {
- k = f->level;
- if ( k > n )
- k = n;
- if ( k < 1 )
- {
- c = fgetc(f);
- if ( c == EOF )
- {
- *s = '\0';
- return NULL;
- }
- *s++ = c;
- --n;
- if ( c == '\n' )
- break;
- }
- else
- {
- memmove(s, f->curp, k);
- s[k] = '\0';
- p = strchr(s, '\n');
- if ( p )
- n = k = p - s + 1;
- f->level -= k;
- f->curp += k;
- s += k;
- n -= k;
- }
- }
- *s = '\0';
- return s0;
- }
- #endif
- #endif /* defined(__TURBOC__) && defined(FFGETS) */
-
- #ifndef MSDOS
- /*******************************************************************
- *
- * Emulations of Borland C library functions
- *
- ******************************************************************/
-
- static bool strnicmp(s1, s2, n)
- /* case-insensitive length-limited string compare */
- register char *s1, *s2;
- register n;
- {
- #ifdef BSD
- char ls1;
- char ls2;
-
- while (--n >= 0) {
- ls1 = *s1;
- ls2 = *s2++;
- if (isupper(ls1))
- ls1 = tolower(*s1);
- if (isupper(ls2))
- ls2 = tolower(ls2);
- if (ls1 != ls2)
- break;
- if (*s1++ == '\0')
- return(0);
- }
- return((n < 0) ? 0 : (ls1-ls2));
- #else
- while (--n >= 0 && tolower(*s1) == tolower(*s2++))
- if (*s1++ == '\0')
- return(0);
- return((n < 0) ? 0 : (tolower(*s1) - tolower(*--s2)));
- #endif
- }
-
- static void strlwr(s)
- /* force string to lower case */
- char *s;
- {
- register char *cp;
-
- for (cp = s; *cp; cp++)
- #ifdef BSD
- if (isupper(*cp))
- #endif
- *cp = tolower(*cp);
- }
-
- #ifndef linux
- static char *strstr(t, s)
- /* find s in t */
- char *s, *t;
- {
- char *cp;
-
- for (cp = t; *cp; cp++)
- if (strncmp(cp, s, strlen(s)) == 0)
- return(cp);
- return((char *)NULL);
- }
- #endif /* !linux */
- #endif /* MSDOS */
-
- /*******************************************************************
- *
- * All file-type dependent stuff
- *
- ******************************************************************/
-
- static int format; /* markup type, defaults to ORIGINAL296 */
- #define ORIGINAL296 0 /* Jargon File 2.9.6 version */
- #define COLON297 1 /* Jargon File 2.9.7+ with colons */
-
- #define HDOFF(s) (s[0] == ENTRYMARK)
-
- static void getformat(fp)
- /* figure out what format we're looking at */
- FILE *fp;
- {
- (void) fseek(fp, 0L, SEEK_SET);
- if (fgetc(fp) != '=')
- format = COLON297;
- }
-
- char *headword(ln)
- /* is this line an entry? */
- char *ln;
- {
- char *p;
-
- if (ln[0] == ' ' || (format > ORIGINAL296 && (ln[0] != ENTRYMARK)))
- return((char *)NULL);
-
- for (p = ln + HDOFF(ln); p = strchr(p, ENTRYMARK); ++p)
- if (isspace(p[1]) || (p[1] == ENTRYMARK && isspace(p[2])))
- break;
-
- return(p);
- }
-
- /*******************************************************************
- *
- * Sequential entry access
- *
- * This supports the following entry points:
- * getnextln() --- get line beginning at given position
- * getprevln() --- get *previous* line from given position
- *
- ******************************************************************/
-
- static void detab(s)
- /* expand tabs in s, in place; assumes tab stops every 8 chars */
- char *s;
- {
- char *p;
- int k, n, i;
-
- while ((p = strchr(s, TAB)) != NULL)
- {
- /* while any tabs */
- k = p - s; /* offset to tab */
- n = (k + 8) / 8 * 8; /* next tab stop */
- for (i = strlen(s + k + 1); i >= 0; i--)
- s[n + i] = s[k + 1 + i];
- memset(&s[k], ' ', n - k); /* blank fill */
- }
- }
-
- /* daddr_t getnextln(FILE *fp, daddr_t pos, char *ln) -- get next line of file
- ** takes position where a line starts, gets line into buffer ln
- ** removes trailing CR/LF and expands tabs
- ** returns position of next byte after line (i.e. start of next line)
- ** returns NOWHERE at EOF
- */
- daddr_t getnextln(fp, pos, ln)
- FILE *fp;
- daddr_t pos;
- char *ln;
- {
- int k;
- char *p;
- if (ftell(fp) != pos) /* slight optimization for Borland & Zortech*/
- (void) fseek(fp, pos, SEEK_SET);
- if (fgets(ln, LNSZ, fp))
- {
- k = strlen(ln);
- ln[k - 1] = '\0';
- #if CRLFSIZE > 1
- if (ln[k - 2] == CR)
- ln[k - 2] = '\0'; /* fixed bug here 11/28/91 rdg */
- #endif /* CRLFSIZE > 1 */
- pos += k;
- }
- else
- pos = NOWHERE;
- detab(ln);
- if (p = strchr(ln, SEPARATOR))
- *p = '\0';
- return(pos);
- }
-
- /* daddr_t getprevln(FILE *fp, daddr_t pos, char *ln) -- get prev line in file
- ** takes position where a line starts, gets _previous_ line into buffer ln
- ** removes trailing CR/LF and expands tabs
- ** returns position of line returned
- ** returns NOWHERE at BOF
- */
- daddr_t getprevln(fp, pos, ln)
- FILE *fp;
- daddr_t pos;
- char *ln;
- {
- int n;
- char *p;
-
- assert(pos > 0);
- n = LNSZ;
- if (pos < LNSZ)
- n = pos;
- (void) fseek(fp, pos - n, SEEK_SET);
- (void) fread(ln, 1, n, fp);
- p = &ln[n-1];
- while (p != ln)
- {
- int i;
-
- --p;
- if (*p == LF)
- {
- ++p;
- n = &ln[n] - p;
- for (i = 0; i < n; i++)
- ln[i] = p[i];
- /* memmove(ln, p, n); */
- break;
- }
- }
- assert(p > ln || (p == ln && pos == n));
- ln[n - 1] = '\0';
- #if CRLFSIZE > 1
- if (ln[n - 2] == CR)
- ln[n - 2] = '\0';
- #endif /* CRLFSIZE > 1 */
- pos -= n;
- detab(ln);
- if (p = strchr(ln, SEPARATOR))
- *p = '\0';
- return(pos);
- }
-
- /*******************************************************************
- *
- * Fortune-cookie mode
- *
- * This supports the following entry points:
- * jrandom() -- return the offset of a random entry
- *
- ******************************************************************/
-
- daddr_t jrandom()
- /* grab a fortune cookie */
- {
- int r = rand() % nitems;
-
- return(atol(xp[r] + strlen(xp[r]) + 1));
- }
-
- /*******************************************************************
- *
- * Entry-access by name
- *
- * This supports the following entry points:
- * xlocate() --- go to entry by name
- * ilocate() --- go to entry by name (incremental)
- * ffind() --- find string in file
- * ifind() --- find string in file (incremental)
- *
- ******************************************************************/
-
- static daddr_t ixlocate(s, isincrsearch)
- /* binary-search index for x; return offset if found, else -offset of next */
- char *s;
- bool isincrsearch;
- {
- int lo, mid, hi, k, hit;
- daddr_t pos;
-
- strlwr(s);
- #define NOTFOUND (-1)
- lo = 0;
- hi = nitems - 1;
- hit = NOTFOUND; /* your basic binary search */
- while (hit == NOTFOUND && lo <= hi)
- {
- mid = (lo + hi) / 2;
- if ((k = strcmp(s, xp[mid])) < 0)
- hi = mid - 1;
- else if (k > 0)
- lo = mid + 1;
- else
- hit = mid;
- }
-
- /*
- * if not found, and item wanted exceeds item found,
- * and there's room to move up, go to next higher item
- */
- if (hit==NOTFOUND && strcmp(s, xp[mid]) > 0 && mid < nitems-1)
- {
- ++mid;
- assert(strcmp(s, xp[mid]) < 0);
- }
- /* the file position is stored right after the index string */
- pos = atol(xp[mid] + strlen(xp[mid]) + 1);
-
- if (isincrsearch)
- {
- if (strncmp(s, xp[mid], strlen(s)))
- pos = -pos;
- }
- else
- {
- /* if no hit, return negative pos */
- if (hit == NOTFOUND)
- pos = -pos;
- }
- return(pos);
- }
-
- daddr_t xlocate(s)
- char *s;
- {
- return(ixlocate(s, FALSE));
- }
-
- daddr_t ilocate(c)
- /* incremental-lookup through entry key list */
- char c;
- {
- static char key[LNSZ], *ep = key;
- static int oldhit;
-
- if (c == BS)
- {
- if (ep > key)
- *--ep = '\0';
- }
- else if (isprint(c)) /* incremental-search for given character */
- {
- *ep++ = tolower(c);
- *ep = '\0';
- return(ixlocate(key, TRUE));
- }
- else if (c == DEL) /* reset from previous incremental lookup */
- {
- key[0] = '\0';
- ep = key;
- oldhit = 0;
- }
- return(NOWHERE);
- }
-
- daddr_t ffind(fp, pos, ss)
- /* case-blind search forward for ss; return offset if found, else NOWHERE */
- FILE *fp;
- daddr_t pos;
- char *ss;
- {
- char s[LNSZ], ln[LNSZ];
-
- (void) strcpy(s, ss);
- strlwr(s);
- (void) fseek(fp, pos, SEEK_SET);
- while (fgets(ln, LNSZ, fp))
- {
- strlwr(ln);
- if (strstr(ln, s))
- return(pos);
- pos = ftell(fp); /* save pos before reading next line */
- }
- return(NOWHERE);
- }
-
- daddr_t ifind(fp, pos, c)
- /* incremental-search forward */
- FILE *fp;
- daddr_t pos;
- char c;
- {
- static char key[LNSZ], *ep = key;
- static int oldhit;
-
- if (c == BS)
- {
- if (ep > key)
- *--ep = '\0';
- return(pos);
- }
- else if (isprint(c)) /* incremental-search for given character */
- {
- *ep++ = tolower(c);
- *ep = '\0';
- return(ffind(fp, pos, key));
- }
- else if (c == DEL) /* reset from previous incremental search */
- {
- key[0] = '\0';
- ep = key;
- oldhit = 0;
- return(pos);
- }
- return(NOWHERE); /* should never reach here */
- }
-
- /*******************************************************************
- *
- * Screen-fetch and line-retrieval code
- *
- * Entry points:
- * iflink() --- test for presence of a reference
- * findnextsel() --- find next link on current screen
- * findprevsel() --- find previous link on current screen
- *
- * These functions expect to be able to call:
- * readscr() --- get text from screen
- *
- ******************************************************************/
-
- /* left tag and right tag of textual references */
- #define LTAG '{'
- #define RTAG '}'
-
- /* daddr_t iflink() -- test for link at screen position, find right tag pos
- */
- daddr_t iflink(x, y, xrtag, yrtag, isindex)
- int x,y;
- int *xrtag, *yrtag;
- bool isindex;
- {
- char *cp, s[MAXWIDTH];
- char term[STRSZ];
- int i;
- daddr_t pos;
-
- readscr(0, y, COLS-1, y, s); /* get line on screen */
-
- if (isindex) /* if index, x is col 0 */
- {
- (void) strcpy(term, s);
- for (cp = term + strlen(term) - 1; *cp == ' '; cp--)
- *cp = '\0';
- x = strlen(term) - 1;
- }
- else if ((cp = headword(s)) && x < cp - s) /* a headword? */
- {
- i = *cp;
- *cp = '\0';
- (void) strcpy(term, s + HDOFF(s));
- *cp = i;
- x = (cp - s) - 1;
- }
- else /* not index or headword, find left tag */
- {
- int x0 = x;
-
- while (x >= 0 && s[x] != LTAG)
- --x;
- if (x < 0) /* if no left tag, then no link */
- return(0L);
- assert(IS_TAG(s, x));
-
- /* find next non-LTAG char */
- while (x < COLS && s[x] == LTAG)
- ++x;
- if (x >= COLS) /* if none, no tag */
- return(0L);
-
- /* scan the link text up to RTAG, put into term[] */
- i = 0;
- while (x < COLS-1 && s[x] != RTAG)
- term[i++] = s[x++];
- --x;
- if (x < x0) /* if we are left of start point, return 0 */
- return(0L); /* in case user clicks to right of link */
- else if (x >= COLS-2) /* in case link wraps across line */
- {
- ++y;
- if (y > LASTLINE)
- return(0L);
- readscr(0, y, COLS-2, y, s);
- while (i > 0 && term[i - 1] == ' ')
- --i;
- term[i++] = ' ';
- x = 0;
- while (x < COLS-1 && s[x] == ' ')
- ++x;
- while (x < COLS-1 && s[x] != RTAG)
- term[i++] = s[x++];
- --x;
- if (x >= COLS)
- return(0L);
- }
-
- term[i] = '\0'; /* terminate the term */
- }
-
- /* look up in index table */
- pos = xlocate(term);
- if (pos > 0) /* if found, set the right tag x/y coords */
- {
- *yrtag = y;
- *xrtag = x;
- }
- return(pos); /* return position of link target, or 0L if none */
- }
-
- /* region findnextsel() -- find next selection (i.e. link reference)
- ** given x/y coords of a link selection on the screen, find the next
- ** one onscreen searching left to right, then down, wrapping last line
- ** to top line, until back to original position if only one link onscreen;
- ** coordinates are placed in the result.
- */
- region findnextsel(x, y, isindex)
- int x, y;
- bool isindex;
- {
- char s[MAXWIDTH];
- int x0, y0, j;
- daddr_t pos;
- region res;
-
- if (isindex) /* if index, just go to next row */
- {
- ++y;
- if (y > LASTLINE) /* wrap line to top if at end */
- y = 0;
- res.yl = res.yr = y;
- res.xl = 0;
- res.xr = COLS - 2;
- return(res);
- }
-
- if (y < 0) /* if no selection currently, look for one */
- y = 0;
-
- x0 = x;
- y0 = y;
- readscr(0, y, COLS-1, y, s); /* get screen line */
- while (x < COLS && IS_TAG(s, x))
- ++x;
- for (;;)
- {
- for (; x < COLS; ++x)
- {
- if (IS_TAG(s, x))
- {
- while (x < COLS && s[x] == LTAG || (x == 0 && s[0] == ENTRYMARK))
- ++x;
- /* look up link */
- if (iflink(x, y, &res.xr, &res.yr, isindex) > 0)
- {
- res.xl = x; /* if found, set x/y and return */
- res.yl = y;
- return(res);
- }
- }
- }
- x = 0; /* past end of row; reset x pos */
- ++y; /* step to next line, wrap around if at end */
- if (y > LASTLINE)
- y = 0;
- if (y == y0 && x == x0) /* return if back at start point */
- {
- res.xl = res.yl = NOPLACE;
- return(res);
- }
- readscr(0, y, COLS-1, y, s); /* get screen line */
- }
- }
-
- /* void findprevsel() -- find prev selection (i.e. link reference)
- ** given x/y coords of a link selection on the screen, find the previous
- ** one onscreen searching right to left, then up, wrapping top line
- ** to last line, until back to original position if only one link onscreen;
- ** coordinates are placed in the result.
- */
- region findprevsel(x, y, isindex)
- int x, y;
- bool isindex;
- {
- char s[MAXWIDTH];
- int x0, y0, j;
- daddr_t pos;
- region res;
-
- if (isindex) /* if index, get prev row */
- {
- --y;
- if (y < 0) /* wrap line to last if at top */
- y = LASTLINE;
- res.yl = res.yr = y;
- res.xl = 0;
- res.xr = COLS - 2;
- return(res);
- }
-
- x0 = x;
- y0 = y;
- readscr(0, y, COLS-1, y, s); /* get screen line */
- while (x >= 0 && IS_TAG(s, x))
- --x;
- for (;;)
- {
- for (; x >= 0; --x)
- {
- if (IS_TAG(s, x))
- {
- while (x < COLS && IS_TAG(s, x))
- ++x;
- --x; /* back up to LTAG; look up link */
- if (iflink(x, y, &res.xr, &res.yr, isindex) > 0)
- {
- res.xl = ++x; /* if found, set x/y and return */
- res.yl = y;
- return(res);
- }
- }
- }
- x = COLS - 1; /* past beginning of row; reset x pos */
- --y; /* step to prev line, wrap around if at top */
- if (y < 0)
- y = LASTLINE;
- if (y == y0 && x == x0) /* return if back at start point */
- {
- res.xl = res.yl = NOPLACE;
- return(res);
- }
- readscr(0, y, COLS-1, y, s); /* get prev line, continue */
- }
- }
-
- /*******************************************************************
- *
- * Position stack management
- *
- * This supports the following entry points:
- * enqueue() --- push a placemark
- * dequeue() --- pop a placemark
- *
- ******************************************************************/
-
- void enqueue(f)
- /* add file position & link info to backtrack stack */
- FILEINFO *f;
- {
- int k;
-
- k = f->btscnt;
-
- /* if full, move it up to make room */
- if (k == BTSMAX)
- {
- int n;
- for ( n = (BTSMAX-1) * sizeof(f->bts[0]); n--; )
- {
- memcpy(&f->bts[0], &f->bts[1], sizeof(f->bts[0]));
- memcpy(&f->selbts[0], &f->selbts[1], sizeof(f->selbts[0]));
- }
- --k;
- f->btscnt = k; /* 9/27/91 rdg fixed bts overflow bug */
- }
- f->bts[k] = f->toppos;
- f->selbts[k].xl = f->sel.xl;
- f->selbts[k].yl = f->sel.yl;
- f->selbts[k].xr = f->sel.xr;
- f->selbts[k].yr = f->sel.yr;
- #ifdef DEBUG
- (void) fprintf(stderr, "enqueue[%d]: x = %d, y = %d, pos = %ld.\n",
- k, f->sel.xl, f->sel.yl, f->toppos);
- #endif /* DEBUG */
-
- if (k == 0 || f->bts[k] != f->bts[k-1] || /* only enqueue if changed */
- memcmp(&f->selbts[k], &f->selbts[k-1], sizeof(f->selbts[k])) != 0)
- ++f->btscnt;
- #ifdef DEBUG
- else
- (void) fprintf(stderr, "enqueue[%d]: is duplicate, popped.\n", k);
- #endif /* DEBUG */
- }
-
- void dequeue(f)
- /* pull file position & link info from stack */
- FILEINFO *f;
- {
- if (f->btscnt) /* don't attempt pop if empty */
- {
- --f->btscnt;
- f->toppos = f->bts[f->btscnt];
- f->sel.xl = f->selbts[f->btscnt].xl;
- f->sel.yl = f->selbts[f->btscnt].yl;
- f->sel.xr = f->selbts[f->btscnt].xr;
- f->sel.yr = f->selbts[f->btscnt].yr;
-
- #ifdef DEBUG
- (void) fprintf(stderr, "dequeue[%d]: x = %d, y = %d, pos = %ld.\n",
- f->btscnt, f->sel.xl, f->sel.yl, f->toppos);
- #endif /* DEBUG */
- }
- #ifdef DEBUG
- else
- (void) fprintf(stderr, "dequeue: failed, no stack space.\n");
- #endif /* DEBUG */
- }
-
- /*******************************************************************
- *
- * Browse support
- *
- * Entry points:
- * initbrowse() --- set up in-core structures for given file pair
- * setlastpage() --- set lastpagetoppos members
- *
- ******************************************************************/
-
- /* validate_synchronization(FILE *fp, char *ln)
- ** check file against index table, to try to be sure they match up
- ** will sample 10 index entries, evenly spaced, including first and last
- ** for each entry, looks at text file to see if text matches index
- */
- static void validate_synchronization(fp, ln)
- FILE *fp;
- char *ln;
- {
- #define nsamples 10
- int i, k;
- daddr_t pos;
-
- for (i = 0; i < nsamples; ++i)
- {
- k = (i * (nitems - 1)) / (nsamples - 1); /* sample index */
- pos = xlocate(xp[k]); /* look up pos */
- assert(pos > 0);
- if (fseek(fp, pos, SEEK_SET) < 0) /* seek in file */
- break;
- getnextln(fp, pos, ln); /* get text line */
- if (strnicmp(ln + HDOFF(ln), xp[k], strlen(xp[k])) != 0) /* match it */
- break;
- }
- /* exit if any mismatch */
- if (i < nsamples)
- {
- (void) fprintf(stderr, OUTASYNC);
- exit(1);
- }
- }
-
- static bool idxsearch(name, path)
- /* look for database along defined search path */
- char *name, *path;
- {
- extern char *getenv();
- char *srch, *cp;
-
- if ((srch = getenv("VHPATH")) == (char *)NULL)
- srch = VHPATH;
-
- #ifdef MSDOS
- #define LISTSEP ";"
- #define PATHSEP "\\"
- #else
- #ifndef AMIGA
- #define LISTSEP ":"
- #else
- #define LISTSEP ";"
- #endif /* AMIGA */
- #define PATHSEP "/"
- #endif /* MSDOS */
-
- cp = strtok(srch, LISTSEP);
- do {
- (void) strcpy(path, cp);
- #ifdef AMIGA
- if (path[strlen(path) - 1] != ':') { /* e.g. "DH0:" */
- #endif
- if (path[strlen(path) - 1] != PATHSEP[0])
- (void) strcat(path, PATHSEP);
- #ifdef AMIGA
- }
- #endif
- (void) strcat(path, name);
- (void) strcat(path, IDX);
- if (access(path, F_OK) == 0)
- return(TRUE);
- } while
- (cp = strtok((char *)NULL, LISTSEP));
- return(FALSE);
- }
-
- bool initbrowse(name)
- char *name;
- {
- char ln[LNSZ + 1], path[PATHLEN], *cp;
- #ifdef VHHDR
- vhhdr hdr;
- #endif /* VHHDR */
- int k, fno;
-
- /* look for the database files */
- if (!idxsearch(name, path))
- {
- (void) fprintf(stderr, CANTFIND, name);
- return(FALSE);
- }
-
- if ((vhi.fp = fopen(path, "rb"))==(FILE*)NULL) /* open index file */
- {
- (void) fprintf(stderr, CANTOPEN, path);
- return(FALSE);
- }
- #ifndef BSD
- (void) setvbuf(vhi.fp, (char *)NULL, _IOFBF, DBBUF);
- #endif /* BSD */
-
- vhi.btscnt = 0;
- vhi.sel.xl = NOPLACE;
-
- #ifdef VHHDR
- (void) fread(&hdr, sizeof(vhhdr), 1, vhi.fp);
- if (hdr.magic != VHMAGIC)
- {
- (void) fprintf(stderr, BADMAGIC, path);
- return(FALSE);
- }
-
- /* read in text file names */
- for (fno = 0 ; fno < hdr.nfiles; fno++)
- (void) fread(ln, 1, VHPATHSIZ, vhi.fp);
- #endif /* VHHDR */
-
- /* fill index table */
- for (nitems = 0; nitems < ITEMSMAX; ++nitems)
- {
- if (fgets(ln, LNSZ, vhi.fp) == NULL)
- break;
- k = strlen(ln) - 1;
- ln[k] = '\0';
- #if CRLFSIZE > 1
- if (ln[k-1] == '\r')
- ln[--k] = '\0';
- #endif /* CRLFSIZE > 1 */
- if ((xp[nitems] = malloc(k + 1)) == NULL)
- {
- (void) fprintf(stderr, NOMEMORY);
- return(TRUE);
- }
- strcpy(xp[nitems], ln);
- assert(strchr(ln, SEPARATOR)); /* better have separator */
- *strchr(xp[nitems], SEPARATOR) = '\0'; /* replace it w/ null byte */
- strlwr(xp[nitems]); /* force items in table to lowercase */
- }
-
- /* did we overflow? */
- if (nitems == ITEMSMAX)
- {
- (void) fprintf(stderr, TOOMANY);
- return(FALSE);
- }
-
- vhi.toppos = 0;
-
- /* this will go inside a loop */
-
- vht->btscnt = 0;
- vht->sel.xl = NOPLACE;
-
- (void) strcpy(ln, path);
- (void) strcpy(ln + strlen(ln) - TXTLEN, TXT);
- if ((vht->fp = fopen(ln, "rb"))==(FILE*)NULL) /* open text file */
- {
- (void) fprintf(stderr, CANTOPEN, path);
- return(FALSE);
- }
- #ifndef BSD
- (void) setvbuf(vht->fp, (char *)NULL, _IOFBF, DBBUF);
- #endif /* BSD */
-
- getformat(vht->fp); /* what file format are we looking at? */
- vht->toppos = 0;
-
- /* set random-number seed, in case we're after a fortune cookie */
- srand(time((time_t *)0));
-
- return(TRUE);
- }
-
- void setlastpage()
- /* this needs to be called *after* initscr() */
- {
- char ln[LNSZ + 1];
- int i;
-
- /* set up last page top pos for index */
- (void) fseek(vhi.fp, 0L, SEEK_END); /* seek end of index file */
- vhi.dsptoppos = vhi.lastpagetoppos = vhi.dspnextpos = vhi.endpos =ftell(vhi.fp);
- for (i = 0; i <= LASTLINE; ++i)
- if (vhi.lastpagetoppos)
- vhi.lastpagetoppos = getprevln(vhi.fp, vhi.lastpagetoppos, ln);
- if (vhi.lastpagetoppos == 0L) /* avoid divide-by-zero error */
- vhi.lastpagetoppos = 1L;
- (void) fseek(vhi.fp, 0L, SEEK_SET); /* seek start of index file */
-
- (void) fseek(vht->fp, 0L, SEEK_END); /* seek eof */
- vht->dsptoppos = vht->lastpagetoppos = vht->dspnextpos = vht->endpos =ftell(vht->fp);
- for (i = 0; i <= LASTLINE; ++i)
- if (vht->lastpagetoppos)
- vht->lastpagetoppos = getprevln(vht->fp, vht->lastpagetoppos, ln);
- if (vht->lastpagetoppos == 0L) /* avoid divide-by-zero error */
- vht->lastpagetoppos = 1L;
- (void) fseek(vht->fp, 0L, SEEK_SET); /* back to top of file */
- }
-
- /*******************************************************************
- *
- * Consistency checking
- *
- ******************************************************************/
-
- void chkindex(name)
- /* look for over-long lines, dangling references, self-references */
- char *name;
- {
- long lnum = 1;
- char refbuf[LNSZ + 1], *refpt = refbuf;
- int c, len = 0, depth = 0;
-
- /* time for consistency check */
- initbrowse(name);
-
- (void) fseek(vht->fp, 0L, SEEK_SET);
- while ((c = fgetc(vht->fp)) != EOF)
- if (c == '\n')
- {
- if (len > WARNSIZE + CRLFSIZE)
- (void) printf(LONGLINE, name, TXT, lnum, len);
- len = 0;
-
- ++lnum;
- if (depth != 0 && refpt[-1] != ' ')
- *refpt++ = ' ';
- }
- else if (c == LTAG)
- {
- ++len;
- ungetc(c = fgetc(vht->fp), vht->fp);
- if (isprint(c))
- ++depth;
- }
- else if (depth > 0 && c == RTAG)
- {
- ++len;
- --depth;
- if (depth == 0 && refpt != refbuf)
- {
- *refpt = '\0';
- if (xlocate(refbuf) < 0)
- (void) printf(DANGLING, name, TXT, lnum, refbuf);
- refpt = refbuf;
- }
- }
- else if (depth != 0)
- {
- ++len;
- if (!isspace(c) || refpt[-1] != ' ')
- *refpt++ = c;
- }
-
- validate_synchronization(vht->fp, refbuf); /* validate */
- }
-
- /*******************************************************************
- *
- * File indexing code
- *
- * Entry points:
- * mkindex(name) -- generate index file from text
- *
- ******************************************************************/
-
- static char ln[LNSZ + 1];
-
- typedef struct list_struct
- {
- struct list_struct *next;
- unsigned int len;
- char ln[1];
- }
- list;
-
- static list *listp = NULL;
-
- static char *mmalloc(n)
- /* malloc(3) with error message and abort */
- unsigned n;
- {
- char *p;
-
- if ((p = malloc(n)) == NULL)
- {
- (void) fprintf(stderr, NOMEMORY);
- exit(1);
- }
- return(p);
- }
-
- #define UNSCH(cp) ((int)(*((unsigned char *)(cp))))
-
- static int strlcmp(a, b)
- /* compare, smashing case */
- char *a, *b;
- {
- #ifdef BSD
- return(strnicmp(a,b,max(strlen(a),strlen(b))));
- #else
- char *lim;
- int v;
-
- for (; *a && *b; a++, b++)
- {
- v = tolower(UNSCH(a)) - tolower(UNSCH(b));
- if (v)
- return v;
- }
- return(tolower(UNSCH(a)) - tolower(UNSCH(b)));
- #endif
- }
-
- static int keycmp(p, q)
- /* compare keys for equality */
- list *p, *q;
- {
- return(strlcmp(p->ln, q->ln));
- }
-
- static list *lmerge (p, q)
- /* merge 2 lists under dummy head item */
- list *p, *q;
- {
- list *r, head;
-
- for (r = &head; p && q;)
- {
- if (keycmp(p, q) < 0)
- {
- r = r->next = p;
- p = p->next;
- }
- else
- {
- r = r->next = q;
- q = q->next;
- }
- }
- r->next = (p ? p : q);
- return(head.next);
- }
-
- static list *lsort (p)
- /* split list into 2 parts, sort each recursively, merge */
- list *p;
- {
- list *q, *r;
-
- if (p)
- {
- q = p;
- for (r = q->next; r && (r = r->next) != NULL; r = r->next)
- q = q->next;
- r = q->next;
- q->next = NULL;
- if (r)
- p = lmerge(lsort(p), lsort(r));
- }
- return(p);
- }
-
- static void readfile(inf)
- /* read file into linked list of lines */
- FILE *inf;
- {
- int k;
- list *p;
-
- while (fgets(ln, LNSZ, inf))
- {
- k = strlen(ln);
- if (k == 0)
- {
- (void) fprintf(stderr, LENGTHZERO);
- continue;
- }
- if (ln[k-1] == '\n')
- {
- k--;
- #if CRLFSIZE > 1
- if (ln[k-1] == '\r')
- k--;
- #endif /* CRLFSIZE > 1 */
- ln[k] = '\0';
- }
- else
- {
- if (k == LNSZ)
- {
- (void) fprintf(stderr, TOOLONG);
- exit(1);
- }
- if (ln[0] == 26)
- {
- (void) fprintf(stderr, CONTROLZ);
- continue;
- }
- else
- {
- (void) fprintf(stderr, NOCRLF);
- continue;
- }
- }
-
- p = (list *)mmalloc(k + sizeof(list));
- p->len = k;
- memcpy(p->ln, ln, k+1);
- p->next = listp;
- listp = p;
- }
- }
-
- static void writefile (outf)
- /* write file from in-core list */
- FILE *outf;
- {
- list *p;
-
- for (p = listp; p; p = p->next)
- {
- (void) fwrite(p->ln, 1, p->len, outf);
- #if CRLFSIZE > 1
- (void) fwrite("\r", 1, 1, outf);
- #endif /* CRLFSIZE > 1 */
- (void) fwrite("\n", 1, 1, outf);
- }
- }
-
- #ifdef AMIGA
- void setthebufback(void)
- {
- static char mybuf[BUFSIZ];
- setbuf(stdout, mybuf);
- }
- #endif /* AMIGA */
-
- void mkindex(argc, argv)
- /* make index from given files */
- int argc;
- char *argv[];
- {
- char source[PATHLEN], target[PATHLEN];
- char ln[LNSZ], prevln[LNSZ];
- FILE *inf, *outf;
- daddr_t pos;
- char *infilen, *outfilen, *p, *s;
- unsigned int buffersize;
- int i, len, fno;
- #ifdef VHHDR
- vhhdr hdr;
- #endif /* VHHDR */
-
- (void) strcpy(target, argv[0]);
- (void) strcat(target, IDX);
- if ((outf = fopen(target, "wb")) == (FILE *)NULL)
- {
- (void) fprintf(stderr, NOOUTPUT, target);
- exit(1);
- }
-
- setbuf(stdout, (char *)NULL);
- #ifdef AMIGA
- atexit(setthebufback);
- #endif /* AMIGA */
-
- for (fno = 0 ; fno < argc; fno++)
- {
- (void) strcpy(source, argv[fno]);
- (void) strcat(source, TXT);
- if ((inf = fopen(source, "rb"))==(FILE *)NULL)
- {
- (void) fprintf(stderr, NOINPUT, source);
- exit(1);
- }
-
- getformat(inf);
-
- (void) printf(FORMTYPE, format);
-
- *prevln = 0;
-
- (void) printf(FILLING);
-
- pos = 0L;
- while (fgets(ln, LNSZ, inf))
- {
- if (p = headword(ln))
- {
- *p = 0;
- if (strlcmp(ln, prevln) != 0)
- (void) fprintf(outf, "%s%c%ld\r\n",
- ln + HDOFF(ln), SEPARATOR, pos);
- (void) strcpy(prevln, ln);
- }
- pos = ftell(inf);
- }
- (void) fclose(inf);
- }
-
- (void) fclose(outf);
-
- /* now sort the file */
- inf = fopen(target, "rb");
- if (inf == NULL)
- {
- fprintf(stderr, NOINPUT);
- exit(1);
- }
-
- (void) printf(REREADING);
-
- #ifndef AOS
- setvbuf(inf, NULL, _IOFBF, LNSZ + 1);
- #endif /* AOS */
- readfile(inf);
- (void) fclose(inf);
-
- (void) printf(SORTING);
- listp = lsort(listp);
-
- (void) printf(WRITING);
- outf = fopen(target, "wb");
- if (outf == NULL)
- {
- (void) fprintf(stderr, NOOUTPUT);
- exit(1);
- }
- #ifndef AOS
- setvbuf(outf, NULL, _IOFBF, LNSZ + 1);
- #endif /* AOS */
-
- #ifdef VHHDR
- hdr.magic = VHMAGIC;
- hdr.nfiles = argc;
- (void) fwrite(&hdr, sizeof(vhhdr), 1, outf);
-
- for (fno = 0 ; fno < argc; fno++)
- (void) fwrite(argv[fno], 1, VHPATHSIZ, outf);
- #endif /* VHHDR */
-
- writefile(outf);
- (void) fclose(outf);
-
- (void) printf(DONE);
- }
-
- /* vh.c ends here */
-