home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
rtsi.com
/
2014.01.www.rtsi.com.tar
/
www.rtsi.com
/
OS9
/
OSK
/
MISC
/
vh_1.4.lzh
/
VH
/
vh.c
< prev
next >
Wrap
Text File
|
1994-04-22
|
35KB
|
1,535 lines
/******************************************************************************
NAME
vh.c --- retrieval primitives for vh-format text
SYNOPSIS
vh [-icm] [textfile] [indexfile]
DESCRIPTION
Contains display-independent primitives for implementing a
simple hypertext browser.
AUTHORS
Adapted by Eric S. Raymond <eric@snark.thyrsus.com> from the 1.1 version
of Raymond Gardner's MS-DOS browser, October 1991.
Please see the READ.ME in this directory for license terms.
PORTING NOTES
Some effort has been made to make this code independent of whether the
underlying OS uses \n or \r\n as a line terminator. If CRLFSIZE is defined
to 1, \n only is assumed; if it is 2, \r\n is assumed.
BUGS
Multi-file database support is only half implemented.
**************************************************************************/
/*LINTLIBRARY*/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include <time.h>
typedef int bool;
#define FALSE 0
#define TRUE 1
#include "vh.h"
#ifndef F_OK
#define F_OK 0
#endif /* F_OK */
#define VHPATHSIZ 128
#define TAB 0x09 /* tab */
#define BS 0x08 /* backspace */
#define CR 0x0D /* carriage return */
#define LF 0x0A /* line feed */
#define SP 0x20 /* space */
#define DEL 0x7f /* delete */
#define max(a,b) ((a) > (b) ? (a) : (b))
#ifdef ATT
#define srand(n) srand48(n)
#define rand() lrand48()
extern long lrand48();
extern void srand48();
/*
* Try this if ungetch() fails to resolve.
*
* #define ungetch ungetc
*/
#endif /* isxdigit */
extern char *malloc();
extern long atol();
extern int LINES, COLS; /* initialize screen size into these */
#ifdef UNIX
#define CRLFSIZE 1 /* \n only */
#endif /* UNIX */
#ifndef CRLFSIZE
#define CRLFSIZE 2 /* \r\n as in MS-DOS, etc. */
#endif /* CRLFSIZE */
#define WARNSIZE 75 /* warn of lines longer than this */
#define ENTRYMARK ':'
#define SEPARATOR (' ' - 2)
#define STRSZ 80 /* for various strings */
#ifndef VHPATH
#ifdef MSDOS
#define VHPATH ".;/usr/lib/vh"
#else
#ifndef AMIGA
#ifndef OSK
#define VHPATH ".:/usr/lib/vh"
#else
char VHPATH[] = ".:/h0/vh";
#endif
#else
#define VHPATH "s:;jargon:"
#endif
#endif /* MSDOS */
#endif /* VHPATH */
#define TXT ".txt"
#define IDX ".idx"
#define TXTLEN 4
/*
* This was carefully tuned using prof(1) under UNIX SVr3. Unless you have
* profiling tools at least as good, best not mess with it!
*/
#define DBBUF 4096 /* optimal buffer size */
#ifdef MSDOS
#ifdef DBBUF
#undef DBBUF
#endif
#define DBBUF 512 /* better size for MSDOS/Borland C -- rdg */
#endif
#define IS_TAG(s, x) (s[x] == LTAG || (x == 0 && s[0] == ENTRYMARK))
/*******************************************************************
*
* All message texts declared here for internationalization purposes
*
******************************************************************/
#define FORMTYPE "vh: format type is %d, "
#define OUTASYNC "vh: index is out of sync with text.\n"
#define BADMAGIC "vh: index file %s doesn't look like an index\n"
#define CANTFIND "vh: can't find database files for %s\n"
#define CANTOPEN "vh: can't open %s\n"
#define NOMEMORY "vh: out of memory.\n"
#define TOOMANY "vh: too many index items.\n"
#define DANGLING "\"%s%s\", line %ld: {%s} dangling reference\n"
#define OUTERROR "vh: error writing output -- disk full?\n"
#define LENGTHZERO "vh: line length 0 -- ignored.\n"
#define TOOLONG "vh: line too long\n"
#define CONTROLZ "vh: last line is Control-Z(s) -- ignored\n"
#define NOCRLF "vh: last line not terminated -- CR/LF added\n"
#define NOINPUT "vh: can't open input file %s\n"
#define NOOUTPUT "vh: can't open output file %s\n"
#define FILLING "filling index\n"
#define LONGLINE "\"%s%s\", line %ld: line too long (%d)\n"
#define REREADING "vh: rereading, "
#define SORTING "sorting, "
#define WRITING "writing, "
#define BADREFS "checking for bad references...\n"
#define DONE "done.\n"
/*******************************************************************
*
* Global data
*
******************************************************************/
static FILEINFO textblk;
FILEINFO vhi, *vht = &textblk;
static char *xp[ITEMSMAX]; /* index pointer table */
static int nitems; /* xp item count */
#if defined(__TURBOC__) && defined(FFGETS)
/*******************************************************************
*
* ffgets() -- faster replacement for fgets() (very Borland-specific!)
* depends on Borland's FILE structure & usage
* similar method may be useful on other systems
*
* Note that implementing this doesn't require having any library
* source. All you need to know is in the getc() macro in stdio.h.
*
******************************************************************/
char *ffgets(s, n, f)
char *s;
int n;
FILE *f;
{
char *s0;
int c;
s0 = s;
--n;
while ( n )
{
register int k;
register char *p;
k = f->level;
if ( k > n )
k = n;
if ( k == 0 )
{
c = fgetc(f);
if ( c == EOF )
{
*s = '\0';
return NULL;
}
*s++ = c;
--n;
if ( c == '\n' )
break;
}
else
{
p = f->curp;
while ( *p++ != '\n' && --k )
;
k = p - f->curp;
memmove(s, f->curp, k);
f->level -= k;
f->curp += k;
n -= k;
s += k;
if ( s[-1] == '\n' )
break;
}
}
*s = '\0';
return s0;
}
#if 00
/* another approach ... */
char *ffgets(s, n, f)
char *s;
int n;
FILE *f;
{
char *s0, *p;
int k, c;
s0 = s;
n--;
while ( n )
{
k = f->level;
if ( k > n )
k = n;
if ( k < 1 )
{
c = fgetc(f);
if ( c == EOF )
{
*s = '\0';
return NULL;
}
*s++ = c;
--n;
if ( c == '\n' )
break;
}
else
{
memmove(s, f->curp, k);
s[k] = '\0';
p = strchr(s, '\n');
if ( p )
n = k = p - s + 1;
f->level -= k;
f->curp += k;
s += k;
n -= k;
}
}
*s = '\0';
return s0;
}
#endif
#endif /* defined(__TURBOC__) && defined(FFGETS) */
#ifndef MSDOS
/*******************************************************************
*
* Emulations of Borland C library functions
*
******************************************************************/
bool strnicmp(s1, s2, n)
/* case-insensitive length-limited string compare */
register char *s1, *s2;
register n;
{
#ifdef BSD
char ls1;
char ls2;
while (--n >= 0) {
ls1 = *s1;
ls2 = *s2++;
if (isupper(ls1))
ls1 = tolower(*s1);
if (isupper(ls2))
ls2 = tolower(ls2);
if (ls1 != ls2)
break;
if (*s1++ == '\0')
return(0);
}
return((n < 0) ? 0 : (ls1-ls2));
#else
while (--n >= 0 && tolower(*s1) == tolower(*s2++))
if (*s1++ == '\0')
return(0);
return((n < 0) ? 0 : (tolower(*s1) - tolower(*--s2)));
#endif
}
void strlwr(s)
/* force string to lower case */
char *s;
{
register char *cp;
for (cp = s; *cp; cp++)
#ifdef BSD
if (isupper(*cp))
#endif
*cp = tolower(*cp);
}
#ifndef linux
char *strstr(t, s)
/* find s in t */
char *s, *t;
{
char *cp;
for (cp = t; *cp; cp++)
if (strncmp(cp, s, strlen(s)) == 0)
return(cp);
return((char *)NULL);
}
#endif /* !linux */
#endif /* MSDOS */
/*******************************************************************
*
* All file-type dependent stuff
*
******************************************************************/
static int format; /* markup type, defaults to ORIGINAL296 */
#define ORIGINAL296 0 /* Jargon File 2.9.6 version */
#define COLON297 1 /* Jargon File 2.9.7+ with colons */
#define HDOFF(s) (s[0] == ENTRYMARK)
void getformat(fp)
/* figure out what format we're looking at */
FILE *fp;
{
(void) fseek(fp, 0L, SEEK_SET);
if (fgetc(fp) != '=')
format = COLON297;
}
char *headword(ln)
/* is this line an entry? */
char *ln;
{
char *p;
if (ln[0] == ' ' || (format > ORIGINAL296 && (ln[0] != ENTRYMARK)))
return((char *)NULL);
for (p = ln + HDOFF(ln); p = strchr(p, ENTRYMARK); ++p)
if (isspace(p[1]) || (p[1] == ENTRYMARK && isspace(p[2])))
break;
return(p);
}
/*******************************************************************
*
* Sequential entry access
*
* This supports the following entry points:
* getnextln() --- get line beginning at given position
* getprevln() --- get *previous* line from given position
*
******************************************************************/
void detab(s)
/* expand tabs in s, in place; assumes tab stops every 8 chars */
char *s;
{
char *p;
int k, n, i;
while ((p = strchr(s, TAB)) != NULL)
{
/* while any tabs */
k = p - s; /* offset to tab */
n = (k + 8) / 8 * 8; /* next tab stop */
for (i = strlen(s + k + 1); i >= 0; i--)
s[n + i] = s[k + 1 + i];
memset(&s[k], ' ', n - k); /* blank fill */
}
}
/* daddr_t getnextln(FILE *fp, daddr_t pos, char *ln) -- get next line of file
** takes position where a line starts, gets line into buffer ln
** removes trailing CR/LF and expands tabs
** returns position of next byte after line (i.e. start of next line)
** returns NOWHERE at EOF
*/
daddr_t getnextln(fp, pos, ln)
FILE *fp;
daddr_t pos;
char *ln;
{
int k;
char *p;
if (ftell(fp) != pos) /* slight optimization for Borland & Zortech*/
(void) fseek(fp, pos, SEEK_SET);
if (fgets(ln, LNSZ, fp))
{
k = strlen(ln);
ln[k - 1] = '\0';
#if CRLFSIZE > 1
if (ln[k - 2] == CR)
ln[k - 2] = '\0'; /* fixed bug here 11/28/91 rdg */
#endif /* CRLFSIZE > 1 */
pos += k;
}
else
pos = NOWHERE;
detab(ln);
if (p = strchr(ln, SEPARATOR))
*p = '\0';
return(pos);
}
/* daddr_t getprevln(FILE *fp, daddr_t pos, char *ln) -- get prev line in file
** takes position where a line starts, gets _previous_ line into buffer ln
** removes trailing CR/LF and expands tabs
** returns position of line returned
** returns NOWHERE at BOF
*/
daddr_t getprevln(fp, pos, ln)
FILE *fp;
daddr_t pos;
char *ln;
{
int n;
char *p;
assert(pos > 0);
n = LNSZ;
if (pos < LNSZ)
n = pos;
(void) fseek(fp, pos - n, SEEK_SET);
(void) fread(ln, 1, n, fp);
p = &ln[n-1];
while (p != ln)
{
int i;
--p;
#ifndef OSK
if (*p == LF)
#else
if (*p == '\n')
#endif
{
++p;
n = &ln[n] - p;
for (i = 0; i < n; i++)
ln[i] = p[i];
/* memmove(ln, p, n); */
break;
}
}
assert(p > ln || (p == ln && pos == n));
ln[n - 1] = '\0';
#if CRLFSIZE > 1
if (ln[n - 2] == CR)
ln[n - 2] = '\0';
#endif /* CRLFSIZE > 1 */
pos -= n;
detab(ln);
if (p = strchr(ln, SEPARATOR))
*p = '\0';
return(pos);
}
/*******************************************************************
*
* Fortune-cookie mode
*
* This supports the following entry points:
* jrandom() -- return the offset of a random entry
*
******************************************************************/
daddr_t jrandom()
/* grab a fortune cookie */
{
int r = rand() % nitems;
return(atol(xp[r] + strlen(xp[r]) + 1));
}
/*******************************************************************
*
* Entry-access by name
*
* This supports the following entry points:
* xlocate() --- go to entry by name
* ilocate() --- go to entry by name (incremental)
* ffind() --- find string in file
* ifind() --- find string in file (incremental)
*
******************************************************************/
daddr_t ixlocate(s, isincrsearch)
/* binary-search index for x; return offset if found, else -offset of next */
char *s;
bool isincrsearch;
{
int lo, mid, hi, k, hit;
daddr_t pos;
strlwr(s);
#define NOTFOUND (-1)
lo = 0;
hi = nitems - 1;
hit = NOTFOUND; /* your basic binary search */
while (hit == NOTFOUND && lo <= hi)
{
mid = (lo + hi) / 2;
if ((k = strcmp(s, xp[mid])) < 0)
hi = mid - 1;
else if (k > 0)
lo = mid + 1;
else
hit = mid;
}
/*
* if not found, and item wanted exceeds item found,
* and there's room to move up, go to next higher item
*/
if (hit==NOTFOUND && strcmp(s, xp[mid]) > 0 && mid < nitems-1)
{
++mid;
assert(strcmp(s, xp[mid]) < 0);
}
/* the file position is stored right after the index string */
pos = atol(xp[mid] + strlen(xp[mid]) + 1);
if (isincrsearch)
{
if (strncmp(s, xp[mid], strlen(s)))
pos = -pos;
}
else
{
/* if no hit, return negative pos */
if (hit == NOTFOUND)
pos = -pos;
}
return(pos);
}
daddr_t xlocate(s)
char *s;
{
return(ixlocate(s, FALSE));
}
daddr_t ilocate(c)
/* incremental-lookup through entry key list */
char c;
{
static char key[LNSZ], *ep = key;
static int oldhit;
if (c == BS)
{
if (ep > key)
*--ep = '\0';
}
else if (isprint(c)) /* incremental-search for given character */
{
*ep++ = tolower(c);
*ep = '\0';
return(ixlocate(key, TRUE));
}
else if (c == DEL) /* reset from previous incremental lookup */
{
key[0] = '\0';
ep = key;
oldhit = 0;
}
return(NOWHERE);
}
daddr_t ffind(fp, pos, ss)
/* case-blind search forward for ss; return offset if found, else NOWHERE */
FILE *fp;
daddr_t pos;
char *ss;
{
char s[LNSZ], ln[LNSZ];
(void) strcpy(s, ss);
strlwr(s);
(void) fseek(fp, pos, SEEK_SET);
while (fgets(ln, LNSZ, fp))
{
strlwr(ln);
if (strstr(ln, s))
return(pos);
pos = ftell(fp); /* save pos before reading next line */
}
return(NOWHERE);
}
daddr_t ifind(fp, pos, c)
/* incremental-search forward */
FILE *fp;
daddr_t pos;
char c;
{
static char key[LNSZ], *ep = key;
static int oldhit;
if (c == BS)
{
if (ep > key)
*--ep = '\0';
return(pos);
}
else if (isprint(c)) /* incremental-search for given character */
{
*ep++ = tolower(c);
*ep = '\0';
return(ffind(fp, pos, key));
}
else if (c == DEL) /* reset from previous incremental search */
{
key[0] = '\0';
ep = key;
oldhit = 0;
return(pos);
}
return(NOWHERE); /* should never reach here */
}
/*******************************************************************
*
* Screen-fetch and line-retrieval code
*
* Entry points:
* iflink() --- test for presence of a reference
* findnextsel() --- find next link on current screen
* findprevsel() --- find previous link on current screen
*
* These functions expect to be able to call:
* readscr() --- get text from screen
*
******************************************************************/
/* left tag and right tag of textual references */
#define LTAG '{'
#define RTAG '}'
/* daddr_t iflink() -- test for link at screen position, find right tag pos
*/
daddr_t iflink(x, y, xrtag, yrtag, isindex)
int x,y;
int *xrtag, *yrtag;
bool isindex;
{
char *cp, s[MAXWIDTH];
char term[STRSZ];
int i;
daddr_t pos;
readscr(0, y, COLS-1, y, s); /* get line on screen */
if (isindex) /* if index, x is col 0 */
{
(void) strcpy(term, s);
for (cp = term + strlen(term) - 1; *cp == ' '; cp--)
*cp = '\0';
x = strlen(term) - 1;
}
else if ((cp = headword(s)) && x < cp - s) /* a headword? */
{
i = *cp;
*cp = '\0';
(void) strcpy(term, s + HDOFF(s));
*cp = i;
x = (cp - s) - 1;
}
else /* not index or headword, find left tag */
{
int x0 = x;
while (x >= 0 && s[x] != LTAG)
--x;
if (x < 0) /* if no left tag, then no link */
return(0L);
assert(IS_TAG(s, x));
/* find next non-LTAG char */
while (x < COLS && s[x] == LTAG)
++x;
if (x >= COLS) /* if none, no tag */
return(0L);
/* scan the link text up to RTAG, put into term[] */
i = 0;
while (x < COLS-1 && s[x] != RTAG)
term[i++] = s[x++];
--x;
if (x < x0) /* if we are left of start point, return 0 */
return(0L); /* in case user clicks to right of link */
else if (x >= COLS-2) /* in case link wraps across line */
{
++y;
if (y > LASTLINE)
return(0L);
readscr(0, y, COLS-2, y, s);
while (i > 0 && term[i - 1] == ' ')
--i;
term[i++] = ' ';
x = 0;
while (x < COLS-1 && s[x] == ' ')
++x;
while (x < COLS-1 && s[x] != RTAG)
term[i++] = s[x++];
--x;
if (x >= COLS)
return(0L);
}
term[i] = '\0'; /* terminate the term */
}
/* look up in index table */
pos = xlocate(term);
if (pos > 0) /* if found, set the right tag x/y coords */
{
*yrtag = y;
*xrtag = x;
}
return(pos); /* return position of link target, or 0L if none */
}
/* region findnextsel() -- find next selection (i.e. link reference)
** given x/y coords of a link selection on the screen, find the next
** one onscreen searching left to right, then down, wrapping last line
** to top line, until back to original position if only one link onscreen;
** coordinates are placed in the result.
*/
region findnextsel(x, y, isindex)
int x, y;
bool isindex;
{
char s[MAXWIDTH];
int x0, y0, j;
daddr_t pos;
region res;
if (isindex) /* if index, just go to next row */
{
++y;
if (y > LASTLINE) /* wrap line to top if at end */
y = 0;
res.yl = res.yr = y;
res.xl = 0;
res.xr = COLS - 2;
return(res);
}
if (y < 0) /* if no selection currently, look for one */
y = 0;
x0 = x;
y0 = y;
readscr(0, y, COLS-1, y, s); /* get screen line */
while (x < COLS && IS_TAG(s, x))
++x;
for (;;)
{
for (; x < COLS; ++x)
{
if (IS_TAG(s, x))
{
while (x < COLS && s[x] == LTAG || (x == 0 && s[0] == ENTRYMARK))
++x;
/* look up link */
if (iflink(x, y, &res.xr, &res.yr, isindex) > 0)
{
res.xl = x; /* if found, set x/y and return */
res.yl = y;
return(res);
}
}
}
x = 0; /* past end of row; reset x pos */
++y; /* step to next line, wrap around if at end */
if (y > LASTLINE)
y = 0;
if (y == y0 && x == x0) /* return if back at start point */
{
res.xl = res.yl = NOPLACE;
return(res);
}
readscr(0, y, COLS-1, y, s); /* get screen line */
}
}
/* void findprevsel() -- find prev selection (i.e. link reference)
** given x/y coords of a link selection on the screen, find the previous
** one onscreen searching right to left, then up, wrapping top line
** to last line, until back to original position if only one link onscreen;
** coordinates are placed in the result.
*/
region findprevsel(x, y, isindex)
int x, y;
bool isindex;
{
char s[MAXWIDTH];
int x0, y0, j;
daddr_t pos;
region res;
if (isindex) /* if index, get prev row */
{
--y;
if (y < 0) /* wrap line to last if at top */
y = LASTLINE;
res.yl = res.yr = y;
res.xl = 0;
res.xr = COLS - 2;
return(res);
}
x0 = x;
y0 = y;
readscr(0, y, COLS-1, y, s); /* get screen line */
while (x >= 0 && IS_TAG(s, x))
--x;
for (;;)
{
for (; x >= 0; --x)
{
if (IS_TAG(s, x))
{
while (x < COLS && IS_TAG(s, x))
++x;
--x; /* back up to LTAG; look up link */
if (iflink(x, y, &res.xr, &res.yr, isindex) > 0)
{
res.xl = ++x; /* if found, set x/y and return */
res.yl = y;
return(res);
}
}
}
x = COLS - 1; /* past beginning of row; reset x pos */
--y; /* step to prev line, wrap around if at top */
if (y < 0)
y = LASTLINE;
if (y == y0 && x == x0) /* return if back at start point */
{
res.xl = res.yl = NOPLACE;
return(res);
}
readscr(0, y, COLS-1, y, s); /* get prev line, continue */
}
}
/*******************************************************************
*
* Position stack management
*
* This supports the following entry points:
* enqueue() --- push a placemark
* dequeue() --- pop a placemark
*
******************************************************************/
void enqueue(f)
/* add file position & link info to backtrack stack */
FILEINFO *f;
{
int k;
k = f->btscnt;
/* if full, move it up to make room */
if (k == BTSMAX)
{
int n;
for ( n = (BTSMAX-1) * sizeof(f->bts[0]); n--; )
{
memcpy(&f->bts[0], &f->bts[1], sizeof(f->bts[0]));
memcpy(&f->selbts[0], &f->selbts[1], sizeof(f->selbts[0]));
}
--k;
f->btscnt = k; /* 9/27/91 rdg fixed bts overflow bug */
}
f->bts[k] = f->toppos;
f->selbts[k].xl = f->sel.xl;
f->selbts[k].yl = f->sel.yl;
f->selbts[k].xr = f->sel.xr;
f->selbts[k].yr = f->sel.yr;
#ifdef DEBUG
(void) fprintf(stderr, "enqueue[%d]: x = %d, y = %d, pos = %ld.\n",
k, f->sel.xl, f->sel.yl, f->toppos);
#endif /* DEBUG */
if (k == 0 || f->bts[k] != f->bts[k-1] || /* only enqueue if changed */
memcmp(&f->selbts[k], &f->selbts[k-1], sizeof(f->selbts[k])) != 0)
++f->btscnt;
#ifdef DEBUG
else
(void) fprintf(stderr, "enqueue[%d]: is duplicate, popped.\n", k);
#endif /* DEBUG */
}
void dequeue(f)
/* pull file position & link info from stack */
FILEINFO *f;
{
if (f->btscnt) /* don't attempt pop if empty */
{
--f->btscnt;
f->toppos = f->bts[f->btscnt];
f->sel.xl = f->selbts[f->btscnt].xl;
f->sel.yl = f->selbts[f->btscnt].yl;
f->sel.xr = f->selbts[f->btscnt].xr;
f->sel.yr = f->selbts[f->btscnt].yr;
#ifdef DEBUG
(void) fprintf(stderr, "dequeue[%d]: x = %d, y = %d, pos = %ld.\n",
f->btscnt, f->sel.xl, f->sel.yl, f->toppos);
#endif /* DEBUG */
}
#ifdef DEBUG
else
(void) fprintf(stderr, "dequeue: failed, no stack space.\n");
#endif /* DEBUG */
}
/*******************************************************************
*
* Browse support
*
* Entry points:
* initbrowse() --- set up in-core structures for given file pair
* setlastpage() --- set lastpagetoppos members
*
******************************************************************/
/* validate_synchronization(FILE *fp, char *ln)
** check file against index table, to try to be sure they match up
** will sample 10 index entries, evenly spaced, including first and last
** for each entry, looks at text file to see if text matches index
*/
void validate_synchronization(fp, ln)
FILE *fp;
char *ln;
{
#define nsamples 10
int i, k;
daddr_t pos;
for (i = 0; i < nsamples; ++i)
{
k = (i * (nitems - 1)) / (nsamples - 1); /* sample index */
pos = xlocate(xp[k]); /* look up pos */
assert(pos > 0);
if (fseek(fp, pos, SEEK_SET) < 0) /* seek in file */
break;
getnextln(fp, pos, ln); /* get text line */
if (strnicmp(ln + HDOFF(ln), xp[k], strlen(xp[k])) != 0) /* match it */
break;
}
/* exit if any mismatch */
if (i < nsamples)
{
(void) fprintf(stderr, OUTASYNC);
exit(1);
}
}
bool idxsearch(name, path)
/* look for database along defined search path */
char *name, *path;
{
extern char *getenv();
char *srch, *cp;
if ((srch = getenv("VHPATH")) == (char *)NULL)
srch = VHPATH;
#ifdef MSDOS
#define LISTSEP ";"
#define PATHSEP "\\"
#else
#ifndef AMIGA
#define LISTSEP ":"
#else
#define LISTSEP ";"
#endif /* AMIGA */
#define PATHSEP "/"
#endif /* MSDOS */
cp = strtok(srch, LISTSEP);
do {
(void) strcpy(path, cp);
#ifdef AMIGA
if (path[strlen(path) - 1] != ':') { /* e.g. "DH0:" */
#endif
if (path[strlen(path) - 1] != PATHSEP[0])
(void) strcat(path, PATHSEP);
#ifdef AMIGA
}
#endif
(void) strcat(path, name);
(void) strcat(path, IDX);
if (access(path, F_OK) == 0)
return(TRUE);
} while
(cp = strtok((char *)NULL, LISTSEP));
return(FALSE);
}
bool initbrowse(name)
char *name;
{
char ln[LNSZ + 1], path[PATHLEN], *cp;
#ifdef VHHDR
vhhdr hdr;
#endif /* VHHDR */
int k, fno;
/* look for the database files */
if (!idxsearch(name, path))
{
(void) fprintf(stderr, CANTFIND, name);
return(FALSE);
}
#ifndef OSK
if ((vhi.fp = fopen(path, "rb"))==(FILE*)NULL) /* open index file */
#else
if ((vhi.fp = fopen(path, "r"))==(FILE*)NULL) /* open index file */
#endif
{
(void) fprintf(stderr, CANTOPEN, path);
return(FALSE);
}
#ifndef BSD
(void) setvbuf(vhi.fp, (char *)NULL, _IOFBF, DBBUF);
#endif /* BSD */
vhi.btscnt = 0;
vhi.sel.xl = NOPLACE;
#ifdef VHHDR
(void) fread(&hdr, sizeof(vhhdr), 1, vhi.fp);
if (hdr.magic != VHMAGIC)
{
(void) fprintf(stderr, BADMAGIC, path);
return(FALSE);
}
/* read in text file names */
for (fno = 0 ; fno < hdr.nfiles; fno++)
(void) fread(ln, 1, VHPATHSIZ, vhi.fp);
#endif /* VHHDR */
/* fill index table */
for (nitems = 0; nitems < ITEMSMAX; ++nitems)
{
if (fgets(ln, LNSZ, vhi.fp) == NULL)
break;
k = strlen(ln) - 1;
ln[k] = '\0';
#if CRLFSIZE > 1
if (ln[k-1] == '\r')
ln[--k] = '\0';
#endif /* CRLFSIZE > 1 */
if ((xp[nitems] = malloc(k + 1)) == NULL)
{
(void) fprintf(stderr, NOMEMORY);
return(TRUE);
}
strcpy(xp[nitems], ln);
assert(strchr(ln, SEPARATOR)); /* better have separator */
*strchr(xp[nitems], SEPARATOR) = '\0'; /* replace it w/ null byte */
strlwr(xp[nitems]); /* force items in table to lowercase */
}
/* did we overflow? */
if (nitems == ITEMSMAX)
{
(void) fprintf(stderr, TOOMANY);
return(FALSE);
}
vhi.toppos = 0;
/* this will go inside a loop */
vht->btscnt = 0;
vht->sel.xl = NOPLACE;
(void) strcpy(ln, path);
(void) strcpy(ln + strlen(ln) - TXTLEN, TXT);
#ifndef OSK
if ((vht->fp = fopen(ln, "rb"))==(FILE*)NULL) /* open text file */
#else
if ((vht->fp = fopen(ln, "r"))==(FILE*)NULL) /* open text file */
#endif
{
(void) fprintf(stderr, CANTOPEN, path);
return(FALSE);
}
#ifndef BSD
(void) setvbuf(vht->fp, (char *)NULL, _IOFBF, DBBUF);
#endif /* BSD */
getformat(vht->fp); /* what file format are we looking at? */
vht->toppos = 0;
/* set random-number seed, in case we're after a fortune cookie */
srand(time((time_t *)0));
return(TRUE);
}
void setlastpage()
/* this needs to be called *after* initscr() */
{
char ln[LNSZ + 1];
int i;
/* set up last page top pos for index */
(void) fseek(vhi.fp, 0L, SEEK_END); /* seek end of index file */
vhi.dsptoppos = vhi.lastpagetoppos = vhi.dspnextpos = vhi.endpos =ftell(vhi.fp);
for (i = 0; i <= LASTLINE; ++i)
if (vhi.lastpagetoppos)
vhi.lastpagetoppos = getprevln(vhi.fp, vhi.lastpagetoppos, ln);
if (vhi.lastpagetoppos == 0L) /* avoid divide-by-zero error */
vhi.lastpagetoppos = 1L;
(void) fseek(vhi.fp, 0L, SEEK_SET); /* seek start of index file */
(void) fseek(vht->fp, 0L, SEEK_END); /* seek eof */
vht->dsptoppos = vht->lastpagetoppos = vht->dspnextpos = vht->endpos =ftell(vht->fp);
for (i = 0; i <= LASTLINE; ++i)
if (vht->lastpagetoppos)
vht->lastpagetoppos = getprevln(vht->fp, vht->lastpagetoppos, ln);
if (vht->lastpagetoppos == 0L) /* avoid divide-by-zero error */
vht->lastpagetoppos = 1L;
(void) fseek(vht->fp, 0L, SEEK_SET); /* back to top of file */
}
/*******************************************************************
*
* Consistency checking
*
******************************************************************/
void chkindex(name)
/* look for over-long lines, dangling references, self-references */
char *name;
{
long lnum = 1;
char refbuf[LNSZ + 1], *refpt = refbuf;
int c, len = 0, depth = 0;
/* time for consistency check */
initbrowse(name);
(void) fseek(vht->fp, 0L, SEEK_SET);
while ((c = fgetc(vht->fp)) != EOF)
if (c == '\n')
{
if (len > WARNSIZE + CRLFSIZE)
(void) printf(LONGLINE, name, TXT, lnum, len);
len = 0;
++lnum;
if (depth != 0 && refpt[-1] != ' ')
*refpt++ = ' ';
}
else if (c == LTAG)
{
++len;
ungetc(c = fgetc(vht->fp), vht->fp);
if (isprint(c))
++depth;
}
else if (depth > 0 && c == RTAG)
{
++len;
--depth;
if (depth == 0 && refpt != refbuf)
{
*refpt = '\0';
if (xlocate(refbuf) < 0)
(void) printf(DANGLING, name, TXT, lnum, refbuf);
refpt = refbuf;
}
}
else if (depth != 0)
{
++len;
if (!isspace(c) || refpt[-1] != ' ')
*refpt++ = c;
}
validate_synchronization(vht->fp, refbuf); /* validate */
}
/*******************************************************************
*
* File indexing code
*
* Entry points:
* mkindex(name) -- generate index file from text
*
******************************************************************/
static char ln[LNSZ + 1];
typedef struct list_struct
{
struct list_struct *next;
unsigned int len;
char ln[1];
}
list;
static list *listp = NULL;
char *mmalloc(n)
/* malloc(3) with error message and abort */
unsigned n;
{
char *p;
if ((p = malloc(n)) == NULL)
{
(void) fprintf(stderr, NOMEMORY);
exit(1);
}
return(p);
}
#define UNSCH(cp) ((int)(*((unsigned char *)(cp))))
int strlcmp(a, b)
/* compare, smashing case */
char *a, *b;
{
#ifdef BSD
return(strnicmp(a,b,max(strlen(a),strlen(b))));
#else
char *lim;
int v;
for (; *a && *b; a++, b++)
{
v = tolower(UNSCH(a)) - tolower(UNSCH(b));
if (v)
return v;
}
return(tolower(UNSCH(a)) - tolower(UNSCH(b)));
#endif
}
int keycmp(p, q)
/* compare keys for equality */
list *p, *q;
{
return(strlcmp(p->ln, q->ln));
}
list *lmerge (p, q)
/* merge 2 lists under dummy head item */
list *p, *q;
{
list *r, head;
for (r = &head; p && q;)
{
if (keycmp(p, q) < 0)
{
r = r->next = p;
p = p->next;
}
else
{
r = r->next = q;
q = q->next;
}
}
r->next = (p ? p : q);
return(head.next);
}
list *lsort (p)
/* split list into 2 parts, sort each recursively, merge */
list *p;
{
list *q, *r;
if (p)
{
q = p;
for (r = q->next; r && (r = r->next) != NULL; r = r->next)
q = q->next;
r = q->next;
q->next = NULL;
if (r)
p = lmerge(lsort(p), lsort(r));
}
return(p);
}
void readfile(inf)
/* read file into linked list of lines */
FILE *inf;
{
int k;
list *p;
while (fgets(ln, LNSZ, inf))
{
k = strlen(ln);
if (k == 0)
{
(void) fprintf(stderr, LENGTHZERO);
continue;
}
if (ln[k-1] == '\n')
{
k--;
#if CRLFSIZE > 1
if (ln[k-1] == '\r')
k--;
#endif /* CRLFSIZE > 1 */
ln[k] = '\0';
}
else
{
if (k == LNSZ)
{
(void) fprintf(stderr, TOOLONG);
exit(1);
}
if (ln[0] == 26)
{
(void) fprintf(stderr, CONTROLZ);
continue;
}
else
{
(void) fprintf(stderr, NOCRLF);
continue;
}
}
p = (list *)mmalloc(k + sizeof(list));
p->len = k;
memcpy(p->ln, ln, k+1);
p->next = listp;
listp = p;
}
}
void writefile (outf)
/* write file from in-core list */
FILE *outf;
{
list *p;
for (p = listp; p; p = p->next)
{
(void) fwrite(p->ln, 1, p->len, outf);
#if CRLFSIZE > 1
(void) fwrite("\r", 1, 1, outf);
#endif /* CRLFSIZE > 1 */
(void) fwrite("\n", 1, 1, outf);
}
}
#ifdef AMIGA
void setthebufback(void)
{
static char mybuf[BUFSIZ];
setbuf(stdout, mybuf);
}
#endif /* AMIGA */
void mkindex(argc, argv)
/* make index from given files */
int argc;
char *argv[];
{
char source[PATHLEN], target[PATHLEN];
char ln[LNSZ], prevln[LNSZ];
FILE *inf, *outf;
daddr_t pos;
char *infilen, *outfilen, *p, *s;
unsigned int buffersize;
int i, len, fno;
#ifdef VHHDR
vhhdr hdr;
#endif /* VHHDR */
(void) strcpy(target, argv[0]);
(void) strcat(target, IDX);
#ifndef OSK
if ((outf = fopen(target, "wb")) == (FILE *)NULL)
#else
if ((outf = fopen(target, "w")) == (FILE *)NULL)
#endif
{
(void) fprintf(stderr, NOOUTPUT, target);
exit(1);
}
setbuf(stdout, (char *)NULL);
#ifdef AMIGA
atexit(setthebufback);
#endif /* AMIGA */
for (fno = 0 ; fno < argc; fno++)
{
(void) strcpy(source, argv[fno]);
(void) strcat(source, TXT);
#ifndef OSK
if ((inf = fopen(source, "rb"))==(FILE *)NULL)
#else
if ((inf = fopen(source, "r"))==(FILE *)NULL)
#endif
{
(void) fprintf(stderr, NOINPUT, source);
exit(1);
}
getformat(inf);
(void) printf(FORMTYPE, format);
*prevln = 0;
(void) printf(FILLING);
pos = 0L;
while (fgets(ln, LNSZ, inf))
{
if (p = headword(ln))
{
*p = 0;
if (strlcmp(ln, prevln) != 0)
#ifndef OSK
(void) fprintf(outf, "%s%c%ld\r\n",
#else
(void) fprintf(outf, "%s%c%ld\n",
#endif
ln + HDOFF(ln), SEPARATOR, pos);
(void) strcpy(prevln, ln);
}
pos = ftell(inf);
}
(void) fclose(inf);
}
(void) fclose(outf);
/* now sort the file */
#ifndef OSK
inf = fopen(target, "rb");
#else
inf = fopen(target, "r");
#endif
if (inf == NULL)
{
fprintf(stderr, NOINPUT);
exit(1);
}
(void) printf(REREADING);
#ifndef AOS
setvbuf(inf, NULL, _IOFBF, LNSZ + 1);
#endif /* AOS */
readfile(inf);
(void) fclose(inf);
(void) printf(SORTING);
listp = lsort(listp);
(void) printf(WRITING);
#ifndef OSK
outf = fopen(target, "wb");
#else
outf = fopen(target, "w");
#endif
if (outf == NULL)
{
(void) fprintf(stderr, NOOUTPUT);
exit(1);
}
#ifndef AOS
setvbuf(outf, NULL, _IOFBF, LNSZ + 1);
#endif /* AOS */
#ifdef VHHDR
hdr.magic = VHMAGIC;
hdr.nfiles = argc;
(void) fwrite(&hdr, sizeof(vhhdr), 1, outf);
for (fno = 0 ; fno < argc; fno++)
(void) fwrite(argv[fno], 1, VHPATHSIZ, outf);
#endif /* VHHDR */
writefile(outf);
(void) fclose(outf);
(void) printf(DONE);
}
/* vh.c ends here */