home *** CD-ROM | disk | FTP | other *** search
- From: lee@sq.sq.com (Liam R. E. Quin)
- Newsgroups: alt.sources
- Subject: lq-text Full Text Retrieval Database Part 08/13
- Message-ID: <1991Mar4.020723.16680@sq.sq.com>
- Date: 4 Mar 91 02:07:23 GMT
-
- : cut here --- cut here --
- : To unbundle, sh this file
- #! /bin/sh
- : part 08
- echo x - lq-text/src/lqtext/lqword.c 1>&2
- sed 's/^X//' >lq-text/src/lqtext/lqword.c <<'@@@End of lq-text/src/lqtext/lqword.c'
- X/* lqword.c -- Copyright 1989 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X */
- X
- X/* lqword -- simple program to print information about individual words.
- X *
- X * $Id: lqword.c,v 2.8 90/10/06 00:51:00 lee Rel1-10 $
- X */
- X
- X#include "globals.h" /* defines and declarations for database filenames */
- X
- X#include <stdio.h>
- X#include <sys/types.h>
- X#include <malloc.h>
- X#include <fcntl.h> /* for fileinfo.h */
- X#include <ctype.h>
- X
- X#ifdef BSD
- X# define USI_MAX ((unsigned int) -1)
- X#else
- X# include <limits.h>
- X /* for USI_MAX, the largest unsigned integer.
- X * 4.3 BSD doesn't seem to have this. I don't know how to get this
- X * on BSD systems.
- X */
- X#endif
- X
- X#include "fileinfo.h"
- X#include "wordinfo.h"
- X#include "smalldb.h"
- X#include "pblock.h"
- X#include "wordrules.h"
- X#include "emalloc.h"
- X
- X/*** Declarations: ***/
- X/** System calls and library routines: **/
- Xextern void exit();
- X
- X/** System calls: **/
- X
- X/** Unix Library Functions: **/
- Xextern char *strncpy();
- X#ifndef tolower
- X extern int tolower();
- X#endif
- X
- X/** lqtext library functions: **/
- Xextern char *UnFlag();
- Xextern t_WordInfo *WID2WordInfo();
- Xextern int TooCommon();
- Xextern void cleanupdb();
- Xextern void SetDefaults();
- Xextern void DefaultUsage();
- Xextern void DeleteWord();
- X
- X/** functions defined within this file: */
- Xvoid PrintWordInfo(), AllWordInfo();
- Xvoid Display(), ShowWordList();
- Xvoid dbmmarch();
- X
- X/** Macros and variable definitions **/
- X
- X#define DISPLAY_ALL 1
- X#define DISPLAY_NAME 2
- X /* These are the possible DisplayMode values -- see main() */
- X
- Xchar *progname = 0;
- X /* Used for error messages */
- X
- Xint SilentMode = 0;
- X /* Set if we were invoked with the -s option. In this mode, we behave
- X * like grep -s, and exit with a zero exit status if one or more of
- X * the words were found in the database.
- X */
- X
- Xint ListMode = 0;
- X /* Set if we are to provide a terser output format suitable for use
- X * with lqshow(1L).
- X */
- X
- Xint AsciiTrace = 0;
- X /* If this is non-zero, we provide debugging information. The lqtext
- X * library also uses this variable. Setting it to values greater
- X * than 1 or 2 will generally provide large amounts of debugging
- X * information. If the library was compiled with -UASCIITRACE,
- X * however, there will be much less diagnostic output at higher
- X * levels.
- X */
- X
- Xstatic char *Revision = "lqword 2.2";
- X
- X/** end of declarations... **/
- X
- X
- Xint
- Xmain(argc, argv)
- X int argc;
- X char *argv[];
- X{
- X extern int optind, getopt(); /* For getopt(3) */
- X extern char *optarg; /* For getopt(3) */
- X int ch; /* For getopt(3) */
- X int ErrorFlag = 0; /* For getopt(3) */
- X int DisplayMode = 0;
- X /* DisplayMode indicates what kind of information we are to
- X * print in response to queries. The values understood are
- X * the DISPLAY_* constants. Perhaps this should be an enum.
- X */
- X
- X progname = argv[0];
- X /* I see this as a library program, so I am leaving the full
- X * path. lqaddfile(1L) and lqphrase(1L) set progname to be
- X * the filename of the command, rather than the full pathname.
- X */
- X
- X SetDefaults(argc, argv);
- X /* Deal with any arguments that are understood by all lqtext
- X * programs.
- X */
- X
- X while ((ch = getopt(argc, argv, "aAD:lsVxZz:")) != EOF) {
- X switch (ch) {
- X case 'a':
- X DisplayMode = DISPLAY_NAME;
- X break;
- X case 'A':
- X DisplayMode = DISPLAY_ALL;
- X break;
- X case 'D':
- X DeleteWord(optarg); /* MISFEATURE */
- X /* This actually removes all entries for the given word
- X * from the database. You need write permission, of
- X * course.
- X */
- X break;
- X case 'l':
- X ListMode = 1;
- X break;
- X case 's':
- X SilentMode = 1;
- X break;
- X case 'V':
- X fprintf(stderr, "%s version %s\n", progname, Revision);
- X break;
- X case 'x':
- X ErrorFlag++;
- X break;
- X case '?':
- X ErrorFlag++;
- X break;
- X case 'z':
- X case 'Z':
- X break; /* done by SetDefaults(); */
- X }
- X }
- X
- X /* Normally put call to lrqError here to give a helpful message,
- X * but not yet ready to ship the error handling package, sorry
- X */
- X if (ErrorFlag) {
- X fprintf(stderr, "%s: options are:\n", progname);
- X fputs("\
- X -D Word -- delete the named word (DANGEROUS!)\n\
- X -l -- list mode, for use with lqshow\n\
- X -s -- silent mode (like grep -s)\n", stderr);
- X DefaultUsage();
- X /* DefaultUsage() prints the list of the standard options. */
- X fputs("\n\
- XIn addition, if no words are given, the following are understood:\n\
- X -a -- print all words\n\
- X -A -- print all matches to all words\n", stderr);
- X exit(1);
- X }
- X
- X if (optind >= argc) {
- X if (SilentMode) exit(1);
- X /* if there were no words given, none of them matched.
- X * It could be argued that this case should be an error.
- X */
- X if (DisplayMode) {
- X AllWordInfo(DisplayMode);
- X } else {
- X /* In this case, there were no command-line options and no
- X * display-mode flags, so we do the default thing.
- X * This happens to be to print every word in the database.
- X * This is probably bogus behaviour -- there should be a better
- X * way of finding words that match a given pattern than using
- X * lqword | grep
- X * which is what this allows.
- X */
- X dbmmarch();
- X }
- X } else {
- X if (!SilentMode && !ListMode) {
- X /* Print some pretty headers */
- X printf(" WID | Where | Total | Word\n");
- X puts(
- X"===========|=========|=========|============================================");
- X }
- X
- X while (optind < argc) {
- X PrintWordInfo(argv[optind++]);
- X }
- X }
- X cleanupdb();
- X /* close database files. This is particularly important if we are
- X * updating the database -- the horrible -D option -- but should
- X * probably be done by liblqtext itself.
- X */
- X exit(SilentMode); /* 0 or 1 (this is a little devious) */
- X#ifdef lint
- X /*NOTREACHED*/
- X return 1;
- X /* this is for versions of lint and gcc that don't understand
- X * that exit() doesn't return -- or, if it douse, that there is
- X * nothing that can be done about it!
- X */
- X#endif
- X}
- X
- Xvoid
- XPrintWordInfo(Word)
- X char *Word;
- X{
- X extern t_WordInfo *FindWordInfoFromIndex();
- X extern long atol();
- X extern t_WID Word2WID();
- X extern char *WordRoot();
- X
- X register char *p;
- X t_WordInfo *WordInfo;
- X t_WID WID;
- X t_WordInfo Root;
- X
- X Root.WordPlace.Flags = 0;
- X
- X /** Find the canonical form of the word, with plurals reduced to the
- X ** singular and letters folded into lower case.
- X **/
- X
- X /* First, remember if the word originally started with an upper case
- X * letter:
- X */
- X if (isupper(*Word)) {
- X Root.WordPlace.Flags |= WPF_UPPERCASE;
- X }
- X
- X /* now convert to lower case and measure its length at the same time: */
- X for (p = Word; *p; p++) {
- X if (isupper(*p)) *p = tolower(*p);
- X }
- X
- X Root.Length = p - Word;
- X Root.Word = Word;
- X
- X /* Now call WordRoot() to find the canonical form: */
- X Word = WordRoot(&Root);
- X
- X /** Now see if the canonical word is too common to list: **/
- X
- X if (TooCommon(&Root)) {
- X /* It is listed in the common word list, so don't bother looking
- X * it up at all
- X */
- X if (!SilentMode) {
- X fprintf(stderr, "No index information for: %s (too common)\n",
- X Word);
- X }
- X return;
- X }
- X
- X /** It is not too common, so look it up: **/
- X
- X if (((WID = Word2WID(Word, Root.Length)) == (t_WID) 0) ||
- X (WordInfo = WID2WordInfo(WID)) == (t_WordInfo *) 0) {
- X if (!SilentMode) {
- X if (WID) {
- X /* In this case the word is in the database (since it has
- X * a non-zero WID), but not in the word index. This might
- X * happen if the word is being deleted (or added) by someone
- X * else at this very moment, or if the database is corrupt.
- X */
- X fprintf(stderr, "No index information for: %s (WID %lu)\n",
- X Word, WID);
- X } else {
- X /* In this case the word is neither listed as common nor
- X * found in the database. Either it was spelt differently
- X * there or it isn't there at all.
- X */
- X fprintf(stderr, "No index information for: %s\n", Word);
- X }
- X }
- X return;
- X }
- X if (SilentMode && WordInfo->NumberOfWordPlaces > 0) {
- X /* We found something, so there is no point looking further --
- X * we already know enough to exit. If a lot of words are given,
- X * this could be a big efficiency win.
- X */
- X exit(0);
- X }
- X
- X /** Now we have the database entry for the word, so let's print it!
- X **/
- X Display(WordInfo, DISPLAY_ALL);
- X
- X /** Now return the storage used...
- X **/
- X if (WordInfo) {
- X SlayWordInfo(WordInfo);
- X }
- X
- X /** All done for this word.
- X **/
- X}
- X
- X/* Display() -- print information about a single word */
- Xvoid
- XDisplay(WordInfo, Verbose)
- X t_WordInfo *WordInfo;
- X int Verbose;
- X{
- X char *Buf = emalloc(WordInfo->Length + 1);
- X
- X /* Words in a t_WordInfo might not be null terminated, since the
- X * storage overhead and the work of putting the nulls there might
- X * be significant...
- X */
- X (void) strncpy(Buf, WordInfo->Word, WordInfo->Length);
- X Buf[WordInfo->Length] = '\0';
- X
- X if (!ListMode) {
- X /* Print a little header for the word, unless we were asked not to */
- X printf("%10lu | %7lu | %7lu | %s\n", WordInfo->WID,
- X WordInfo->Offset,
- X WordInfo->NumberOfWordPlaces,
- X WordInfo->Word
- X );
- X
- X }
- X if ((ListMode || Verbose == DISPLAY_ALL) && WordInfo->NumberOfWordPlaces) {
- X /* If there are occurrences in the database (there might not be if
- X * the word has been deleted, or has only just been added),
- X * and we want all the matches,
- X * then print the list of matches in the appropriate format:
- X */
- X ShowWordList(WordInfo);
- X }
- X
- X (void) efree(Buf);
- X /* reclaim storage */
- X}
- X
- Xvoid
- XShowWordList(WordInfo)
- X t_WordInfo *WordInfo;
- X{
- X extern t_pblock *Getpblock();
- X t_FileInfo *GetFileInfo();
- X
- X t_FileInfo *FileInfo = (t_FileInfo *) 0;
- X t_pblock *pblock = (t_pblock *) 0;
- X t_WordPlace *PP = (t_WordPlace *) 0;
- X int Place;
- X char *LastRoot = "[internal error lqword.c 392]";
- X /* the message is in case I make a coding error!. The number
- X * was once the line number of the message, but it only needs to
- X * be a distinct enough message to search for.
- X */
- X
- X if (WordInfo->WordPlacesInHere >= WordInfo->NumberOfWordPlaces) {
- X /* In this case, the match info all fits in the index, so it
- X * does not matter if automatic pre-fetching from the overflow
- X * file "data" happens or not (i.e. if we are using Lazy Evaluation,
- X * it doesn't happen, but it makes no difference in this case).
- X */
- X PP = WordInfo->WordPlaces;
- X } else if ((pblock = Getpblock(WordInfo)) != (t_pblock *) 0) {
- X PP = pblock->WordPlaces;
- X /* If Lazy Evaluation is enabled, liblqtext might not have fetched
- X * all of the match information from the overflow database, in
- X * which case we must do it now.
- X */
- X }
- X
- X if (PP) {
- X t_FID LastFID = USI_MAX;
- X /* This is not a plausible FID (File IDentifier), so it
- X * will force a call to GetFileInfo() in the loop below.
- X */
- X unsigned int LastFlags = 256 * 2;
- X /* Similarly, this is an impossible flag value, since the
- X * flags are constrained to fit in a single byte.
- X */
- X
- X /* cycle through the Place... */
- X for (Place = 0; Place < WordInfo->NumberOfWordPlaces; Place++) {
- X
- X char BIF[100]; char WIB[100];
- X register char *p;
- X char *Bp, *Wp;
- X long l;
- X
- X if (LastFlags != PP[Place].Flags) {
- X LastFlags = PP[Place].Flags;
- X LastRoot = UnFlag(WordInfo, LastFlags);
- X /* UnFlag() takes a canonical (singular, lower-case)
- X * word and a set of flags, and reverses the
- X * transformations implied by the flags. For example,
- X * if WordInfo->Word is "boy" and flags contain the
- X * Plural flag, you should get "boys" returned.
- X * Since we don't remember whether a word was in all
- X * caps or had only the first letter capitalised (at
- X * the moment, anyway), the routine will return Boys
- X * even if the input was BOYS or BoYs.
- X * Possessives (the boy's books) may also be indicated.
- X */
- X }
- X
- X if (LastFID != PP[Place].FID || FileInfo == (t_FileInfo *) 0) {
- X /* The first part of the test means we don't call the
- X * function to retrieve the file name lots of times if
- X * there are multiple matches in the same data file.
- X * This turns out to be a common case.
- X */
- X
- X /* Reclaim storage */
- X if (FileInfo) {
- X if (FileInfo->Name) {
- X (void) efree(FileInfo->Name);
- X }
- X (void) efree(FileInfo);
- X }
- X
- X /* Find the file name from the FID. This routine should
- X * be called FID2FileName(), and may in fact be renamed
- X * in the future.
- X */
- X if ((FileInfo = GetFileInfo(LastFID = PP[Place].FID)) ==
- X (t_FileInfo *) 0) {
- X /* No filename information available. This sometimes
- X * happens if you rin lqword diring an lqaddfile
- X * session and match a word in one of the new files.
- X * Note that if the output is for reuse, we don't
- X * want to include references to files whose names
- X * we don't have!
- X */
- X if (!ListMode) {
- X printf("%20s | %-.5lu/%-.3lu | [FID %d]\n",
- X LastRoot,
- X PP[Place].BlockInFile,
- X PP[Place].WordInBlock,
- X PP[Place].FID);
- X }
- X continue;
- X }
- X }
- X
- X /* This is an inline printf, because otherwise this call
- X * to printf takes over 20% of the execution time, and nearly
- X * 40% for a frequent word (e.g. over 1000 places) !!
- X */
- X p = &BIF[sizeof(BIF) - 1];
- X *p = '\0';
- X if (PP[Place].BlockInFile == 0) {
- X *--p = '0';
- X } else for (l = PP[Place].BlockInFile; l; l /= 10) {
- X *--p = "0123456789"[l % 10];
- X }
- X Bp = p;
- X
- X p = &WIB[sizeof(WIB) - 1];
- X *p = '\0';
- X {
- X register int i = PP[Place].WordInBlock;
- X if (i == 0) {
- X *--p = '0';
- X } else for (; i; i /= 10) {
- X *--p = "0123456789"[i % 10];
- X }
- X Wp = p;
- X }
- X
- X if (ListMode) {
- X while (*Bp) {
- X putchar(*Bp);
- X Bp++;
- X }
- X putchar(' ');
- X while (*Wp) {
- X putchar(*Wp);
- X Wp++;
- X }
- X putchar(' ');
- X puts(FileInfo->Name);
- X } else {
- X /* Well, if we are not reusing the output, maybe the speed
- X * is not quite so critical...
- X */
- X printf("%20s | %5lu/%3lu F=%3u S=%3u | %s\n",
- X LastRoot,
- X PP[Place].BlockInFile,
- X PP[Place].WordInBlock,
- X PP[Place].Flags, /* XXX */
- X PP[Place].StuffBefore,
- X FileInfo->Name);
- X }
- X }
- X }
- X
- X if (pblock) {
- X /* If we had to go and get the matches ourselves, we had better
- X * release the storage.
- X * Actually we should also be freeing the FileInfo and possibly
- X * the WordInfo as well, but the pblock is the biggest... and I
- X * am only adding comments today, not fixing code (I hope)...
- X * NOTDONE FIXME
- X */
- X (void) efree(pblock);
- X }
- X}
- X
- Xvoid
- XAllWordInfo(Verbose)
- X int Verbose;
- X{
- X extern char *WID2Word();
- X extern t_WID GetMaxWID();
- X
- X t_WID i;
- X t_WID MaxWid = GetMaxWID();
- X t_WordInfo *WordInfo;
- X char *Name;
- X
- X /* Loop over all possible WID numbers and print information
- X * for each of them.
- X */
- X for (i = (t_WID) 1; i <= MaxWid; i++) {
- X if ((Name = WID2Word(i)) != (char *) 0) {
- X
- X /* If Name is zero, that WID is unused. There might be gaps
- X * if a word was deleted.
- X */
- X
- X if ((WordInfo = WID2WordInfo(i)) != (t_WordInfo *) 0) {
- X Display(WordInfo, Verbose);
- X SlayWordInfo(WordInfo);
- X } else {
- X /* In this case the word is known, but there is no further
- X * information about it. In the current inplementation,
- X * this cannot happen unless someone else is updating the
- X * database and replacing a WID whose word had been deleted.
- X */
- X if (!ListMode) {
- X /* If we are in list mode, it is probably because the
- X * output is wanted by another prpgram, so we had
- X * better not print out this (useless) entry.
- X */
- X printf("%10lu | %7lu | | ?? %s\n",
- X i, 0L, Name);
- X }
- X }
- X
- X /* Reclaim the storage used... */
- X (void) efree(Name);
- X } /* end if */
- X } /* for each WID */
- X
- X if (!ListMode) {
- X printf("Maximum WID is %lu\n", MaxWid);
- X }
- X}
- X
- X/* dbmmarch -- print every value in a dbm database. This might go
- X * wrong (omitting some values) if the database is being concurrently
- X * updated.
- X */
- Xvoid
- Xdbmmarch()
- X{
- X DBM *db;
- X datum d;
- X
- X if ((db = startdb(WordIndex)) == (DBM *) 0) {
- X /* WordIndex is the list of words, defined in "globals.h".
- X * If we didn't open it, the user probably has not set
- X * $LQTEXTDIR, or didn't use the -d database-dir option that
- X * is handled bu SetDefaults() called from main().
- X */
- X fprintf(stderr, "Can't open database file \"%s\"\n", WordIndex);
- X exit(1);
- X }
- X
- X /* The word database contains WID-->word matches, that look like
- X * (key = "Word", content = WID)
- X */
- X for (d = dbm_firstkey(db); d.dsize != 0; d = dbm_nextkey(db)) {
- X register char *s;
- X
- X /* IMPORTANT NOTE:
- X * The words are not nul-terminated in the database. It is
- X * therefore not safe to use printf() or puts() unless we make
- X * a copy or are careful...
- X */
- X for (s = d.dptr; s - d.dptr < d.dsize; s++) {
- X putchar(*s);
- X }
- X putchar('\n');
- X }
- X enddb(db);
- X}
- X
- X/*
- X * $Log: lqword.c,v $
- X * Revision 2.8 90/10/06 00:51:00 lee
- X * Prepared for first beta release.
- X *
- X * Revision 2.7 90/08/29 21:45:37 lee
- X * Alpha release
- X *
- X * Revision 2.6 90/08/08 22:22:53 lee
- X * Added heavy comments. Cleaned up dbmmarch() and made some other
- X * minor fixes.
- X *
- X * Revision 2.5 90/08/08 21:06:21 lee
- X * Added -x option; removed rude message about getpts bugs.
- X *
- X * Revision 2.4 90/04/21 18:50:38 lee
- X * fixed a serious bug in the -l mode -- now prints the entire match!
- X *
- X * Revision 2.3 90/03/27 13:20:57 lee
- X * now passes gcc -Wall
- X *
- X * Revision 2.2 89/10/08 20:47:23 lee
- X * Working version of nx-text engine. Addfile and wordinfo work OK.
- X *
- X * Revision 2.1 89/10/02 01:16:10 lee
- X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
- X *
- X * Revision 1.3 89/09/17 23:04:42 lee
- X * Various fixes; NumberInBlock now a short...
- X *
- X * Revision 1.2 89/09/16 21:18:50 lee
- X * First demonstratable version.
- X *
- X * Revision 1.1 89/09/07 21:06:14 lee
- X * Initial revision
- X *
- X */
- @@@End of lq-text/src/lqtext/lqword.c
- echo x - lq-text/src/lqtext/matchword.sh 1>&2
- sed 's/^X//' >lq-text/src/lqtext/matchword.sh <<'@@@End of lq-text/src/lqtext/matchword.sh'
- X:
- X# matchword pattern [...] -- grep for words in the database
- X#
- X# matchword -- Copyright 1990 Liam R. Quin. All Rights Reserved.
- X# This code is NOT in the public domain.
- X# See the file ../COPYRIGHT for full details.
- X#
- X# $Id: matchword.sh,v 1.2 90/10/06 00:51:02 lee Rel1-10 $
- X#
- X
- X# "echo" portability test:
- XN=; C='\c'; if [ x"`echo -n hello`" = x"hello" ]; then N=-n;C=; fi
- Xexport N C
- X
- Xans=no
- Xwhile [ x"$ans" != x"q" ]
- Xdo
- X echo $N "Enter a word or pattern: $C"
- X read pattern
- X if [ x"$pattern" = x"q" ]
- X then
- X break
- X fi
- X WORDS=`lqword | grep "^${pattern}\$"`
- X if [ "$WORDS" = "" ]
- X then echo "(no match in the database for ${pattern})"
- X else echo `echo "$WORDS" | wc -l` words found:
- X echo "$WORDS" | sort -d | rs | ${PAGER-more}
- X # If you don't have rs, you could use cat instead.
- X # PAGER could also be "pg -nse", or "less -q".
- X fi
- Xdone
- X
- @@@End of lq-text/src/lqtext/matchword.sh
- echo x - lq-text/src/lqtext/sizes.c 1>&2
- sed 's/^X//' >lq-text/src/lqtext/sizes.c <<'@@@End of lq-text/src/lqtext/sizes.c'
- X/* sizes.c -- Copyright 1990 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file COPYRIGHT for full details.
- X */
- X
- X#ifndef lint
- X static char *Rcs = "$Id: sizes.c,v 1.3 90/10/06 00:51:03 lee Rel1-10 $";
- X#endif
- X
- X#include "globals.h" /* defines and declarations for database filenames */
- X
- X#include <stdio.h>
- X#include <sys/types.h>
- X#include "fileinfo.h"
- X#include "wordinfo.h"
- X#include "pblock.h"
- X#include "wordrules.h"
- X#include "wordindex.h"
- X
- Xmain()
- X{
- X printf("FileInfo %u bytes\n", sizeof(t_FileInfo));
- X printf("WordInfo %u bytes\n", sizeof(t_WordInfo));
- X printf("WordPlace %u bytes\n", sizeof(t_WordPlace));
- X printf("pblock %u bytes\n", sizeof(t_pblock));
- X}
- @@@End of lq-text/src/lqtext/sizes.c
- echo x - lq-text/src/lqtext/wordtable.c 1>&2
- sed 's/^X//' >lq-text/src/lqtext/wordtable.c <<'@@@End of lq-text/src/lqtext/wordtable.c'
- X/* wordtable.c -- Copyright 1989, 1990 Liam R. Quin. All Rights Reserved.
- X * This code is NOT in the public domain.
- X * See the file ../COPYRIGHT for full details.
- X */
- X
- X/* Symbol Table Interface to text retrieval database.
- X * Handles both the internal and external indexes.
- X *
- X * This originally used a linked list. Converting to a hash table reduced
- X * the time to index comp.os.vms from nearly an hour to one and a half
- X * minutes...
- X *
- X * Liam Quin, 1989
- X */
- X
- X/*
- X * $Id: wordtable.c,v 2.11 91/02/20 19:07:37 lee Rel1-10 $
- X */
- X
- X#ifndef lint
- X static char *Rcs = "$Id: wordtable.c,v 2.11 91/02/20 19:07:37 lee Rel1-10 $";
- X#endif
- X
- X#include "globals.h" /* defines and declarations for database filenames */
- X
- X#ifdef SYSV
- Xextern int _filbuf();
- X#endif
- X#include <stdio.h>
- X#include <malloc.h>
- X#include <ctype.h>
- X#include <sys/types.h>
- X#include <fcntl.h> /* for O_RDWR wtc */
- X#include "smalldb.h"
- X#include "fileinfo.h"
- X#include "wordinfo.h"
- X#include "pblock.h"
- X#include "wordrules.h"
- X#include "emalloc.h"
- X
- X#define HASHSIZ 32768 /* MUST be a power of two */
- X
- X#ifndef MAXWORDSINCACHE
- X# define MAXWORDSINCACHE (HASHSIZ * 10)
- X#endif
- Xint MaxWordsInCache = MAXWORDSINCACHE;
- X
- Xextern int AsciiTrace;
- X
- X/* useful macros */
- X#define NumberOfElements(array, type) (sizeof(array)/sizeof(type))
- X#define STRCMP(a,b) ((*(a) > *(b)) ? 1 : ((*(a) < *(b)) ? -1 : strcmp(a,b)) )
- X/* #define Hash(WordInfo) \
- X * (dbm_hash(WordInfo->Word, WordInfo->Length) % HashSize)
- X */
- X
- X/** System calls and library functions used in this file: **/
- X
- X/** Lqtext calls */
- Xextern unsigned int Putpblock();
- Xextern void DeleteWordPlaces();
- X
- X/** System calls: */
- X
- X/** Library Functions: */
- Xextern char *strncpy();
- Xextern int strcmp();
- Xextern void perror();
- Xextern void exit();
- X/**/
- X
- X#define enew(var, type) (var = (type *) emalloc(sizeof (type)))
- X
- Xextern char *progname;
- Xstatic int HashSize = HASHSIZ; /* MUST be a power of two */
- X
- X#ifdef NEWSYM
- X
- X#define NPLACES 7
- X/* THis is small to optimise the common case -- by far the majority of
- X * words are used less than 10 times. In the cases where we've gone
- X * wrong, well, there'll be a few thousand.
- X */
- X
- Xtypedef struct s_HashEl {
- X char *Word;
- X t_WID WID;
- X int PlacesUsed;
- X t_WordPlace Places[NPLACES];
- X struct s_HashEl *Next;
- X} t_HashEl;
- X
- Xstatic t_HashEl *SymbolTable;
- Xstatic t_HashEl *LastEl;
- Xstatic int WordsInCache = 0;
- X
- XStartHash()
- X{
- X if (MaxWordsInCache) HashSize = MaxWordsInCache / 16;
- X SymbolTable = (t_HashEl *) emalloc(sizeof(t_HashEl) * HashSize);
- X /* Note that we only need to initialise the Word pointers... */
- X for (LastEl = SymbolTable; LastEl != &SymbolTable[HashSize]; LastEl++) {
- X LastEl->Word = (char *) 0;
- X }
- X /* ASSERT: LastEl == &SymbolTable[HashSize] */
- X MaxWordsInCache = HashSize;
- X}
- X
- XSetElEmpty(El) /* Initialisation function for Hash Elements */
- X t_HashEl *El;
- X{
- X El->Word = (char *) 0;
- X El->WID = (t_WID) -1;
- X /* NOT zero, so we can distinguish between unknown and
- X * "haven't looked"
- X */
- X El->PlacesUsed = 0;
- X El->Next = (t_HashEl *) 0;
- X}
- X
- Xvoid DumpCache();
- X
- Xvoid
- XAddWord(WordInfo)
- X t_WordInfo *WordInfo;
- X{
- X register t_HashEl *HashEl;
- X int Slot;
- X t_HashEl *FirstEl;
- X
- X if (!WordInfo || !WordInfo->Word || !WordInfo->Word[0]) {
- X (void) fprintf(stderr, "%s: warning: Null Word in AddWord\n", progname);
- X return;
- X }
- X
- X if (!LastEl) {
- X StartHash();
- X } else if (MaxWordsInCache && ++WordsInCache > MaxWordsInCache) {
- X DumpCache(1);
- X }
- X
- X if (WordInfo->Word[0] == 'q') {
- X register char *xp;
- X
- X for (xp = &WordInfo->Word[1]; *xp && *xp == 'x'; xp++) {
- X /*NULLBODY*/
- X }
- X if (!*xp) {
- X if (AsciiTrace >= 10) {
- X (void) fprintf(stderr, "Discard %d\n", WordInfo->Word);
- X }
- X return;
- X }
- X }
- X
- X Slot = Hash(WordInfo);
- X FirstEl = HashEl = &SymbolTable[Slot];
- X
- X
- X for (;;) {
- X if (!HashEl->Word) {
- X extern char *strcpy();
- X extern t_WID Word2WID();
- X
- X if (AsciiTrace > 9) {
- X (void) fprintf(stderr, "New ", WordInfo->Word);
- X }
- X /* make a new element */
- X SetElEmpty(HashEl);
- X HashEl->Word = emalloc(WordInfo->Length + 1);
- X (void) strcpy(HashEl->Word, WordInfo->Word);
- X /**
- X HashEl->WID = (t_WID) -1;
- X **/
- X HashEl->WID = Word2WID(HashEl->Word, WordInfo->Length);
- X /** **/
- X break;
- X } else if (STREQ(HashEl->Word, WordInfo->Word)) {
- X break;
- X }
- X
- X if (++HashEl == LastEl) HashEl = SymbolTable;
- X
- X if (HashEl == FirstEl) {
- X /* We need to dump the cache and start again */
- X DumpCache(1);
- X AddWord(WordInfo);
- X return;
- X }
- X }
- X /* If we get here, all we need to do is add the WordPlace */
- X if (AsciiTrace > 9) {
- X (void) fprintf(stderr, "AddWord %s\n", WordInfo->Word);
- X }
- X FirstEl = HashEl;
- X
- X while (HashEl->PlacesUsed >= NPLACES && HashEl->Next != (t_HashEl *) 0) {
- X HashEl = HashEl->Next;
- X }
- X
- X if (HashEl->PlacesUsed >= NPLACES) {
- X t_HashEl *New;
- X
- X New = (t_HashEl *) malloc(sizeof(t_HashEl));
- X SetElEmpty(New);
- X
- X New->Next = FirstEl->Next;
- X FirstEl->Next = HashEl = New;
- X }
- X HashEl->Places[HashEl->PlacesUsed] = WordInfo->WordPlace; /* structure copy */
- X HashEl->PlacesUsed++;
- X return;
- X}
- X
- Xvoid
- XDumpCache(CallFree)
- X int CallFree;
- X{
- X register t_HashEl *HashEl, *MeNext;
- X int Progress = 0;
- X
- X for (HashEl = SymbolTable; HashEl != LastEl; HashEl++) {
- X if (HashEl->Word) {
- X extern t_WordInfo *MakeWordInfo();
- X unsigned len;
- X t_WordInfo *WP;
- X
- X /* We are going to make a new index entry for the word.
- X * There are two cases -- depending on whether the word
- X * is already indexed or not.
- X * In the former case we must merge the new information.
- X * In the latter case we don't have to read the old info,
- X * but we must make a new entry in the WID Index.
- X */
- X
- X len = strlen(HashEl->Word);
- X if (HashEl->WID == (t_WID) -1) {
- X HashEl->WID = Word2WID(HashEl->Word, len);
- X }
- X WP = MakeWordInfo(HashEl->WID, len, HashEl->Word);
- X
- X if (HashEl->WID == (t_WID) 0) {
- X NewEntry(HashEl, WP);
- X } else {
- X UpdateEntry(HashEl, WP);
- X }
- X /* Reclaim storage */
- X if (CallFree) {
- X extern void SlayWordInfo();
- X register t_HashEl *FreeMe = HashEl;
- X
- X (void) SlayWordInfo(WP);
- X
- X efree(HashEl->Word);
- X FreeMe->Word = (char *) 0;
- X FreeMe = FreeMe->Next; /* don't do the first one */
- X while (FreeMe) {
- X MeNext = FreeMe->Next;
- X (void) efree((char *) FreeMe);
- X FreeMe = MeNext;
- X }
- X }
- X }
- X if (AsciiTrace > 1) {
- X if (HashEl - SymbolTable >= Progress * (HashSize / 16)) {
- X fputc(" 01234567890ABCDEFGHIJKL"[Progress], stderr);
- X ++Progress;
- X }
- X }
- X }
- X WordsInCache = 0;
- X}
- X
- XNewEntry(HashEl, WP)
- X t_HashEl *HashEl;
- X t_WordInfo *WP;
- X{
- X extern t_WID GetNextWID();
- X t_pblock *pblock;
- X long MatchCount;
- X t_HashEl *Ep;
- X
- X /** Assign a new WID */
- X WP->WID = GetNextWID();
- X
- X /** make a WIDIndex entry and mark it as invalid (NOTDONE) */
- X
- X /* In order to do this, we must make a "pblock", a structure that
- X * reflects the physical database. This is fairly low-level stuff
- X * for efficiency's sake...
- X */
- X
- X /* count the total number of entries we're adding: */
- X for (Ep = HashEl, MatchCount = 0; Ep; Ep = Ep->Next) {
- X MatchCount += Ep->PlacesUsed;
- X }
- X
- X /* allocate a pblock structure. These are rather devious things, a
- X * structure with an array tacked onto the end.
- X */
- X pblock = (t_pblock *) emalloc(sizeof(t_pblock) +
- X MatchCount * sizeof(t_WordPlace));
- X
- X pblock->WID = WP->WID;
- X pblock->ChainStart = 0L; /* address on disk -- not there yet, so 0! */
- X pblock->NumberOfWordPlaces = WP->NumberOfWordPlaces = MatchCount;
- X
- X /* fill in the WordPlaces */
- X for (Ep = HashEl, MatchCount = 0; Ep; Ep = Ep->Next) {
- X register int i;
- X
- X for (i = 0; i < Ep->PlacesUsed; i++) {
- X pblock->WordPlaces[MatchCount++] = Ep->Places[i]; /* struct copy */
- X }
- X }
- X
- X /* Now fill in enough of WP to let us use the low-level routines: */
- X WP->FID = (t_FID) 0;
- X WP->Next = (t_WordInfo *) 0;
- X WP->DataBlock = (char *) 0;
- X WP->WordPlaceStart = (char *) 0;
- X WP->WordPlaces = (t_WordPlace *) 0;
- X WP->WordPlacesInHere = 0;
- X WP->WordPlace.FID = 0;
- X WP->WordPlace.Flags = 0;
- X WP->Offset = 0;
- X
- X /* First, let's make an index entry: */
- X#ifndef MaxWordPlacesInAWordBlock
- X# define MaxWordPlacesInAWordBlock ((WIDBLOCKSIZE-(WP->Length+2)/3))
- X#endif
- X if (pblock->NumberOfWordPlaces <= MaxWordPlacesInAWordBlock) {
- X (void) MkWIB(WP, pblock);
- X }
- X
- X /** write out the new entry */
- X if (WP->WordPlacesInHere == pblock->NumberOfWordPlaces) {
- X /* In this case it all fits into the main index */
- X if (PutWordInfoIntoIndex(WP, (unsigned long) 0L) < 0) {
- X extern int errno;
- X int e = errno;
- X fprintf(stderr, "%s: Couldn't insert word \"%s\" into the index",
- X progname, WP->Word);
- X perror("");
- X exit(1);
- X }
- X } else {
- X (void) Putpblock(WP, pblock);
- X if (PutWordInfoIntoIndex(WP, pblock->ChainStart) < 0) {
- X extern int errno;
- X int e = errno;
- X fprintf(stderr, "%s: Couldn't re-insert word \"%s\" into the index",
- X progname, WP->Word);
- X perror("");
- X exit(1);
- X }
- X }
- X
- X /** mark it as valid (NOTDONE) */
- X
- X /** reclaim storage */
- X (void) efree((char *) pblock);
- X /* the caller *must* do SlayWordInfo(WP) */
- X}
- X
- XUpdateEntry(HashEl, WP)
- X t_HashEl *HashEl;
- X t_WordInfo *WP;
- X{
- X extern t_pblock *Getpblock();
- X extern t_WordInfo *WID2WordInfo();
- X t_pblock *pblock;
- X long MatchCount;
- X t_HashEl *Ep;
- X t_WordInfo *Wpp;
- X
- X /** Mark the old entry as invalid (NOTDONE) */
- X
- X /** get the old entry */
- X if ((Wpp = WID2WordInfo(WP->WID)) == (t_WordInfo *) 0) {
- X /* someone else has just deleted it! */
- X NewEntry(HashEl, WP);
- X return;
- X }
- X /* It would be best if we could append to the old entry... which is what
- X * I had in mind when I designed the disk storage stuff... but you can't.
- X */
- X pblock = Getpblock(Wpp);
- X
- X /** merge the old and new entries */
- X
- X /* count the total number of entries we're adding: */
- X for (Ep = HashEl, MatchCount = 0; Ep; Ep = Ep->Next) {
- X MatchCount += Ep->PlacesUsed;
- X }
- X
- X pblock = (t_pblock *) erealloc((char *) pblock, sizeof(t_pblock) +
- X (Wpp->NumberOfWordPlaces + MatchCount) * sizeof(t_WordPlace));
- X
- X /* delete the old entry from disk */
- X if (Wpp->Offset) {
- X DeleteWordPlaces(Wpp->Offset, Wpp->WID);
- X }
- X
- X /* fill in the WordPlaces */
- X for (Ep = HashEl, MatchCount = 0; Ep; Ep = Ep->Next) {
- X register int i;
- X
- X for (i = 0; i < Ep->PlacesUsed; i++) {
- X pblock->WordPlaces[pblock->NumberOfWordPlaces++] =
- X Ep->Places[i]; /* struct copy */
- X }
- X }
- X
- X Wpp->Offset = 0L; /* it's invalid now... */
- X Wpp->WordPlacesInHere = 0;
- X
- X /* First, let's make an index entry: */
- X if (pblock->NumberOfWordPlaces <= MaxWordPlacesInAWordBlock) {
- X (void) MkWIB(WP, pblock);
- X }
- X
- X /** write out the new entry */
- X if (Wpp->WordPlacesInHere == pblock->NumberOfWordPlaces) {
- X /* In this case it all fits into the main index */
- X if (PutWordInfoIntoIndex(Wpp, (unsigned long) 0L) < 0) {
- X extern int errno;
- X int e = errno;
- X fprintf(stderr, "%s: Couldn't insert word \"%s\" into the index",
- X progname, Wpp->Word);
- X perror("");
- X exit(1);
- X }
- X } else {
- X (void) Putpblock(Wpp, pblock);
- X if (PutWordInfoIntoIndex(Wpp, pblock->ChainStart) < 0) {
- X extern int errno;
- X int e = errno;
- X fprintf(stderr, "%s: Couldn't re-insert word \"%s\" into the index",
- X progname, Wpp->Word);
- X perror("");
- X exit(1);
- X }
- X }
- X
- X /** mark it as valid (NOTDONE) */
- X
- X /** reclaim storage */
- X (void) efree((char *)pblock);
- X /* the caller *must* do SlayWordInfo(WP) */
- X (void) SlayWordInfo(Wpp);
- X}
- X
- X#else /* NEWSYM */
- Xstatic t_WordPlaceList *SymbolTable[HASHSIZ]; /* static --> initialised to 0 */
- X#endif /* NEWSYM */
- X
- X#ifdef __GNU__
- Xinline
- X#endif
- X#ifndef Hash
- Xint
- XHash(WordInfo)
- X t_WordInfo *WordInfo;
- X{
- X register unsigned long n = 0;
- X register int len = WordInfo->Length;
- X register char *str = WordInfo->Word;
- X
- X#ifdef DUFF /* clever stuff for speedup... dmr-approved!... */
- X
- X#define HASHC n = *str++ + 65599 * n
- X
- X if (len > 0) {
- X register int loop = (len + 8 - 1) >> 3;
- X
- X switch(len & (8 - 1)) {
- X case 0: do {
- X HASHC; case 7: HASHC;
- X case 6: HASHC; case 5: HASHC;
- X case 4: HASHC; case 3: HASHC;
- X case 2: HASHC; case 1: HASHC;
- X } while (--loop);
- X }
- X
- X }
- X#else /* DUFF */
- X while (len--)
- X n = *str++ + 65599 * n;
- X#endif /* DUFF */
- X /**
- X return n & (HashSize - 1);
- X **/
- X return n % HashSize;
- X}
- X#endif
- X
- Xstatic int HashOK = 0;
- X
- Xvoid
- XInitHash()
- X{
- X HashOK = 1;
- X}
- X
- X#ifndef NEWSYM
- Xstatic int WordsInCache = 0;
- X
- X/* FIXME: this ought to taks a WordInfo and a WordPlaceList instead.
- X * Using a hash table means that we can end up with really pathalogical
- X * paging pehaviour. Nearly all of lqaddfile is resident when running
- X * on a Sun. Hence, I shall be replacing this code entirely soon with
- X * something that has less memory fragmentation, perhaps by coalescing
- X * list members or with a tree.
- X * For now, MaxWordsInCache is a parameter that you can set to zero if
- X * you want.
- X *
- X * Also, the cache structure should be cleaver enough to avoid writing
- X * out the more common words if it can, so as to minimise the number
- X * of data _fetches_ that have to be done.
- X * You could also argue that it should be more efficient to add new data,
- X * of course. I couldn't disagree.
- X *
- X * Next change required is to make AddWord do a little more of the work --
- X * in particular, to call Word2WID for each new word, in an attempt to
- X * make cache dumping faster.
- X */
- X
- XAddWord(WordInfo) /* old version */
- X t_WordInfo *WordInfo;
- X{
- X int Slot;
- X int GreaterOrLess = 1;
- X t_WordPlaceList *SaveOldNext;
- X t_WordPlaceList **WPL;
- X
- X if (!HashOK) InitHash();
- X
- X /* The following are all awfully serious internal errors.
- X * They will only happen if I make a huge coding error, whereupon
- X * they tend to happen for every word in the input...
- X */
- X if (!WordInfo) {
- X fprintf(stderr, "AddWord(0)\n");
- X return;
- X } else if (!WordInfo->Word) {
- X fprintf(stderr, "AddWord(Word=0)\n");
- X return;
- X } else if (!WordInfo->Word[0]) {
- X fprintf(stderr, "AddWord(Word[0]=0)\n");
- X return;
- X#ifdef ASCIITRACE
- X } else if (AsciiTrace > 20) {
- X fprintf(stderr, "[%s.len %d]\n", WordInfo->Word, WordInfo->Length);
- X#endif
- X }
- X
- X Slot = Hash(WordInfo);
- X
- X#ifdef ASCIITRACE
- X if (AsciiTrace > 10) {
- X fprintf(stderr, "H %d %s\n", Slot, WordInfo->Word);
- X }
- X#endif
- X
- X if (WordInfo->Word[0] == 'q') {
- X register char *p = WordInfo->Word;
- X
- X /* Words of the form qxxxxx* are not indexed. This is so the filters
- X * can preprocess the files without upsetting the word counts.
- X * If you can think of a better way to do this, well, tell me!
- X * Lee
- X */
- X
- X for (++p; p - WordInfo->Word < WordInfo->Length; p++) {
- X if (*p != 'x') break;
- X }
- X
- X if (p - WordInfo->Word == WordInfo->Length) {
- X#ifdef ASCIITRACE
- X if (AsciiTrace > 10) {
- X (void) fprintf(stderr, "rejected %s (too boring)\n",
- X WordInfo->Word);
- X }
- X#endif
- X return;
- X }
- X }
- X
- X for (WPL = &SymbolTable[Slot]; *WPL; WPL = &((*WPL)->Next)) {
- X if ((GreaterOrLess = STRCMP((*WPL)->Word, WordInfo->Word)) <= 0) {
- X break;
- X }
- X }
- X
- X /* Insert the new word at the head of the Word Chain,
- X * i.e. at the start of the group of similar words
- X */
- X SaveOldNext = *WPL;
- X
- X enew(*WPL, t_WordPlaceList);
- X (*WPL)->WordPlace = WordInfo->WordPlace; /* structure copy */
- X (*WPL)->WordPlace.FID = WordInfo->WordPlace.FID;
- X (*WPL)->Next = SaveOldNext;
- X
- X if (GreaterOrLess || !SaveOldNext) {
- X (*WPL)->Word = emalloc(WordInfo->Length + 1);
- X (void) strncpy((*WPL)->Word, WordInfo->Word, (int) WordInfo->Length);
- X (*WPL)->Word[WordInfo->Length] = '\0';
- X } else {
- X /* The word is already saved, so we only need to link to it */
- X (*WPL)->Word = SaveOldNext->Word;
- X }
- X if (MaxWordsInCache && ++WordsInCache > MaxWordsInCache) {
- X void DumpCache();
- X
- X DumpCache(1);
- X WordsInCache = 0;
- X }
- X}
- X
- Xvoid
- XDumpCache(CallFree)
- X int CallFree; /* call efree() if non-zero */
- X{
- X extern int WriteWordChain();
- X
- X register int Slot;
- X register t_WordPlaceList *WordPlaceList;
- X int WordsLeft = WordsInCache;
- X int EmptySlots = 0, UsedSlots = 0;
- X int Progress = 0;
- X
- X if (WordsInCache == 0) return; /* save some work maybe */
- X
- X if (AsciiTrace) {
- X fprintf(stderr, "Writing%s%d words\n",
- X (CallFree) ? " and freeing " : " ", WordsInCache);
- X }
- X
- X for (Slot = 0; WordsLeft > 0 && Slot < HASHSIZ; Slot++) {
- X
- X if (AsciiTrace > 1) {
- X if (Slot >= Progress * (HASHSIZ / 16)) {
- X fputc(" 01234567890ABCDEFGHIJKL"[Progress], stderr);
- X ++Progress;
- X }
- X }
- X if (SymbolTable[Slot] == (t_WordPlaceList *) 0) {
- X ++EmptySlots;
- X continue;
- X } else {
- X char *LastFreed = (char *) 0;
- X
- X ++UsedSlots;
- X WordPlaceList = SymbolTable[Slot];
- X WordsLeft -= WriteWordChain(WordPlaceList);
- X
- X if (CallFree) {
- X while (WordPlaceList) {
- X register t_WordPlaceList *SavePointer;
- X
- X if (WordPlaceList->Word &&
- X WordPlaceList->Word != LastFreed) {
- X efree(WordPlaceList->Word);
- X LastFreed = WordPlaceList->Word;
- X }
- X
- X SavePointer = WordPlaceList->Next;
- X efree((char *) WordPlaceList);
- X WordPlaceList = SavePointer;
- X }
- X SymbolTable[Slot] = (t_WordPlaceList *) 0;
- X }
- X }
- X }
- X
- X if (AsciiTrace) {
- X double d = UsedSlots;
- X d /= (EmptySlots + UsedSlots);
- X d *= 100.0;
- X
- X fprintf(stderr, "%4.3f%% cache used -- %d out of (%d <= %d)\n",
- X d, UsedSlots, UsedSlots + EmptySlots, HASHSIZ);
- X#ifdef MALLOCTRACE
- X mallocmap();
- X#endif
- X }
- X
- X if (WordsInCache != 0 && CallFree) {
- X WordsInCache = 0;
- X }
- X}
- X
- X#endif /*!NEWSYM*/
- X
- X/*
- X * $Log: wordtable.c,v $
- X * Revision 2.11 91/02/20 19:07:37 lee
- X * The qxxx fix only worked if ASCIITRACE was defined!
- X *
- X * Revision 2.10 90/10/06 00:51:05 lee
- X * Prepared for first beta release.
- X *
- X * Revision 2.9 90/10/05 23:44:30 lee
- X * Major experimentation with new symbol table failed...
- X *
- X * Revision 2.8 90/09/26 19:45:02 lee
- X * Added call to mallocmap() in ifdef MALLTRACE.
- X *
- X * Revision 2.7 90/09/20 18:58:25 lee
- X * Added some comments, and deleted a needless test. Reorderered a loop
- X * in the (probably vain) hope of a speed-up in the face of paging...
- X *
- X * Revision 2.6 90/09/19 20:25:44 lee
- X * Don't index "qxxxxxxxx" words (this is a hook for filters...)
- X *
- X * Revision 2.5 90/08/29 21:46:11 lee
- X * Alpha release
- X *
- X * Revision 2.4 90/08/09 19:17:37 lee
- X * BSD lint and Saber
- X *
- X * Revision 2.3 90/03/21 17:32:31 lee
- X * new hashing function, masses, masses better -- the old one only ever
- X * used abuot 6% of the available values!
- X *
- X * Revision 2.2 89/10/08 20:47:47 lee
- X * Working version of nx-text engine. Addfile and wordinfo work OK.
- X *
- X * Revision 2.1 89/10/02 01:16:22 lee
- X * New index format, with Block/WordInBlock/Flags/BytesSkipped info.
- X *
- X * Revision 1.3 89/09/17 23:05:15 lee
- X * Various fixes; NumberInBlock now a short...
- X *
- X * Revision 1.2 89/09/16 21:18:55 lee
- X * First demonstratable version.
- X *
- X * Revision 1.1 89/09/07 21:06:20 lee
- X * Initial revision
- X *
- X */
- @@@End of lq-text/src/lqtext/wordtable.c
- echo x - lq-text/src/menu/Makefile 1>&2
- sed 's/^X//' >lq-text/src/menu/Makefile <<'@@@End of lq-text/src/menu/Makefile'
- X# Makefile for simple curses-based menu interface.
- X#
- X# $Id: Makefile,v 1.3 90/10/06 01:28:02 lee Rel1-10 $
- X
- XPWD=menu
- X
- X# PERFORMANCE makes curses go faster
- XEXTRA=-I../h -DPERFORMANCE
- XOPT=-O -g
- XDEFS= -DASCIITRACE -UBSD -DSYSV
- XWHICHDBM=sdbm
- X# change the next three lines to be the same as the lq-text definitions.
- XDBMLIBS=$(LIBDIR)/libsdbm.a
- XBCOPY=bcopy.o
- X# DBMLIBS=-lndbm -linet # 386/ix with hbtcpip provides a good bcopy()
- X
- XCFLAGS= $(OPT) $(DEFS) -UBSD -DSYSV $(GCCF) -D$(WHICHDBM) $(EXTRA)
- XCC=gcc
- XTERMCAP=-lcurses
- XRANLIB=ranlib
- X
- XTEXT=lqtext
- XPROG=m # a simple example of using the library...
- XPROGOBJS=example.o
- XPROGSRC=example.c
- XLIBDIR=../lib
- XBINDIR=../bin
- XLIAMLIB=$(LIBDIR)/liblq.a
- XLQTEXTLIB=$(LIBDIR)/liblqtext.a
- XMENULIB=liblqmenu.a
- X
- XPROGS=$(PROG) $(TEXT) # removed by make clean
- X
- XOBJS=menu.o error.o stringbox.o OldCurses.o
- XSRCS=menu.c error.c stringbox.c OldCurses.c
- X
- Xall: $(TEXT) m
- X
- Xinstall: $(MENULIB) $(TEXT)
- X cp $(MENULIB) $(LIBDIR)/$(MENULIB)
- X cp $(TEXT) $(BINDIR)/$(TEXT)
- X strip $(BINDIR)/$(TEXT)
- X
- X$(TEXT): text.o $(LQTEXTLIB) $(LIAMLIB) $(MENULIB) $(BCOPY)
- X $(CC) $(CFLAGS) -o $(TEXT) text.o $(BCOPY) \
- X $(MENULIB) $(LQTEXTLIB) $(DBMLIBS) $(LIAMLIB) $(TERMCAP)
- X
- X$(MENULIB): $(OBJS)
- X rm -f $(MENULIB)
- X ar rv $(MENULIB) $(OBJS)
- X $(RANLIB) $(MENULIB)
- X
- X$(PROG): $(PROGOBJS)
- X $(CC) $(CFLAGS) -o $(PROG) $(OBJS) $(PROGOBJS) $(BCOPY) $(TERMCAP)
- X
- Xlint$(PROG): $(OBJS) $(SRCS) $(PROGSRCS)
- X lint $(CFLAGS) $(SRCS) $(TERMCAP) 2>&1 | tee lint$(PROG)
- X
- X# Tidy should leave the final executables, but otherwise remove all
- X# generated files
- Xtidy:
- X /bin/rm -f *.o core make.log .mk m.log
- X
- X# Clean should revert to a distribution state as far as possible
- Xclean:
- X /bin/rm -f *.o core *.a $(PROGS) $(CHARGEN) make.log .mk m.log
- X
- Xtext.o: text.c
- X $(CC) $(CFLAGS) $(TEXTINC) -c text.c
- X
- X
- Xdepend:
- X mkdep $(CFLAGS) *.c
- X
- X#
- X# $Log: Makefile,v $
- X# Revision 1.3 90/10/06 01:28:02 lee
- X# deleted mkdep output.
- X#
- X# Revision 1.2 90/10/01 20:33:09 lee
- X# Added BSD compatibility hooks and improved "make clean".
- X#
- X# Revision 1.1 90/08/29 21:48:48 lee
- X# Initial revision
- X#
- X# Revision 2.1 89/08/07 13:52:22 lee
- X# First fully working release; this is the basis for all
- X# future development.
- X#
- X# Revision 1.2 89/08/04 17:59:23 lee
- X# Fully working with Basic Functionality.
- X# Scrolling menubar, scrolling menus, moveable Info windows.
- X#
- X# Revision 1.1 89/07/27 11:41:39 lee
- X# Initial revision
- X#
- X
- X# DO NOT DELETE THIS LINE -- mkdep uses it.
- X# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY.
- X
- X# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
- @@@End of lq-text/src/menu/Makefile
- echo x - lq-text/src/menu/README 1>&2
- sed 's/^X//' >lq-text/src/menu/README <<'@@@End of lq-text/src/menu/README'
- XThis directory contains as much as necessary of my curses menu library
- Xto demonstrate lq-text. (and I just saw some more files I could remove...)
- XThis is a fairly simple curses-based front end to the lq-text text retrieval
- Xsoftware.
- X
- XIt has only been tested on System V Release 3.2, and almost certainly will
- Xnot work on anything else without at least a little effort.
- X
- XSee the notes on porting below if you want to try...
- X
- XPlease do not ask me for the rest of the package. If I get the time and the
- Xnecessary facilities, I will make it available. I am hoping to have a
- Xversion which will run with no source changes under X windows as well as
- Xunder Curses, but it is not trivial... What you see here is a hacked version,
- Xin order to minimise what I post. Sorry.
- X
- X
- XTo install:
- X
- X(1) You need to have lq-text already.
- X Make it and test that it works, for example by indexing the unix man pages.
- X In particular, check that lqphrase works with two-word phrases. If it
- X doesn't, there is little point in proceeding.
- X
- X(2) You will need to edit Makefile in this directory to point to the directory
- X containing the lq-text source:
- X Change the defintion of $(NX) as appropriate -- for example,
- X NX=../../../src/lq-text/src
- X or something.
- X
- X(3) make text
- X
- X(4) ln text lqtext (if you want)
- X
- X(5) try it. If you don't have working function keys, you can use ESC
- X followed by a digit (e.g. ESC 1 is the same as F1), and when you are
- X entering phrases, ^D at the start of the line will take you back to the
- X main menu just as F1 does.
- X
- X **>>> You will need to have lqshow in your path for this to work. <<<**
- X **>>> You will need want to set $LQTEXTDIR, or use the -d option.
- X see the man page for lq-text for command-line options to "text".
- X
- X Try
- X from the File menu, select "new words"
- X (you can type 'x' for an explanation at any point in the menus)
- X
- X type some words or phrases
- X
- X select "match all" from the "All Words" menu
- X
- X the numbers by the phrases indicate the number of matches;
- X you can then do "browse all" from the "All Words" menu.
- X
- X To exit, press "q" from the main menu, or select "Finish" from the
- X Main Menu.
- X
- X When you have typed 'x', a box will appear containing an explanation.
- X You can type 'x' at this point for an explanation of what to do with
- X the box... for example, you can move the explain-box around the sceen
- X or resize it if you want. I have no idea why you would want to do
- X this, but it can be a little fun for people who are bored... and is
- X a facility that came for free from my curses/menu package...
- X
- X(6) You might also like to try making "m", and running examples/vsh, which
- X is a simple shell-script. It is not meant to be a useful shell -- just
- X a tiny demo I wrote at home in some spare time...
- X
- X
- X(7) Now investigate internal.h and menu.h if you want to change things.
- X If you change these, go back to step (3)
- X
- XLee
- Xsq.com
- XThu Dec 14 21:06:34 EST 1989
- @@@End of lq-text/src/menu/README
- echo x - lq-text/src/menu/bcopy.c 1>&2
- sed 's/^X//' >lq-text/src/menu/bcopy.c <<'@@@End of lq-text/src/menu/bcopy.c'
- X#ifdef BCOPYTEST
- X# include <stdio.h>
- X#endif
- X
- X/* this is a simple replacement for bcopy() where the native bcopy()
- X * does not handle overlapping blocks.
- X * do
- X * cc -DBCOPYTEST -o bcopy bcopy.c
- X * and run "./bcopy" for a simple test. You should get three
- X * identical lines of output.
- X */
- X
- Xbcopy(src, dest, nbytes)
- X char *dest;
- X char *src;
- X int nbytes;
- X{
- X /* We have to be clever about this...
- X * If src < dest then we copy from the top down
- X * otherwise, copy from the bottom up...
- X */
- X
- X register char *p, *q;
- X
- X if (src < dest) {
- X for (p = &src[nbytes - 1], q = &dest[nbytes - 1]; nbytes--; q--, p--) {
- X *q = *p;
- X }
- X } else {
- X for (p = src, q = dest; nbytes--; p++, q++) {
- X *q = *p;
- X }
- X }
- X}
- X
- X#ifdef BCOPYTEST
- Xmain()
- X{
- X char buffer[4096];
- X char *s = "The naked children hugged each other";
- X
- X puts(s); /* first line */
- X (void) sprintf(&buffer[12], "%s", s);
- X bcopy(&buffer[12], buffer, strlen(s) + 1);
- X printf("[%s]\n", buffer); /* 2nd line */
- X bcopy(buffer, &buffer[12], strlen(s) + 1);
- X printf("[%s]\n", &buffer[12]); /* 3rd line */
- X}
- X#endif
- @@@End of lq-text/src/menu/bcopy.c
- echo x - lq-text/src/menu/OldCurses.c 1>&2
- sed 's/^X//' >lq-text/src/menu/OldCurses.c <<'@@@End of lq-text/src/menu/OldCurses.c'
- X/* Compatibility routines for older versions of curses...
- X * $Id: OldCurses.c,v 1.2 90/10/04 16:27:58 lee Rel1-10 $
- X *
- X */
- X
- X#include <curses.h>
- X#include <ctype.h>
- X
- X#ifndef A_STANDOUT
- X#include "oldcurses.h"
- X
- X#undef CONTROL
- X#define CONTROL(c) (c ^ 64)
- X
- X#undef wgetch
- X
- Xchtype
- XLqwgetch(w)
- X WINDOW *w;
- X{
- X int ch = wgetch(w);
- X
- X if (isprint(ch)) return ch;
- X
- X switch (ch) {
- X case CONTROL('^'): return KEY_HOME;
- X case CONTROL('P'): return KEY_UP;
- X case CONTROL('N'): return KEY_DOWN;
- X case CONTROL('B'): return KEY_LEFT;
- X case CONTROL('F'): return KEY_RIGHT;
- X case CONTROL('X'): return KEY_HELP; /* Xplain.... (groan) */
- X case '\033': /* Escape */
- X (void) fprintf(stderr, "ESC\007");
- X (void) fflush(stderr);
- X
- X switch (ch = wgetch(w)) {
- X case 0: return KEY_F0;
- X case 1: return KEY_F(1);
- X case 2: return KEY_F(2);
- X case 3: return KEY_F(3);
- X case 4: return KEY_F(4);
- X case 5: return KEY_F(5);
- X case 6: return KEY_F(6);
- X case 7: return KEY_F(7);
- X case 8: return KEY_F(8);
- X case 9: return KEY_F(9);
- X case 'a': case 'A': return KEY_F(10);
- X case 'b': case 'B': return KEY_F(11);
- X case 'c': case 'C': return KEY_F(12);
- X case 'd': case 'D': return KEY_F(13);
- X case 'e': case 'E': return KEY_F(14);
- X case 'f': case 'F': return KEY_F(15);
- X case 'h': return KEY_HELP;
- X }
- X break;
- X }
- X return ch;
- X}
- X
- Xvoid
- Xbeep()
- X{
- X (void) putc('\b', stderr);
- X (void) fflush(stderr);
- X}
- X
- Xvoid
- Xbox(win, vert, hor)
- X WINDOW *win;
- X int vert;
- X int hor;
- X{
- X#undef box
- X if (hor == 0) hor = ACS_HLINE;
- X if (vert == 0) vert = ACS_VLINE;
- X box(win, vert, hor);
- X}
- X
- Xvoid
- XLqattrset(win, attr)
- X WINDOW *win;
- X int attr;
- X{
- X if (attr) {
- X wstandout(win);
- X } else {
- X wstandend(win);
- X }
- X}
- X
- Xwnoutrefresh(win)
- X WINDOW *win;
- X{
- X touchwin(win);
- X}
- X#endif
- X
- X/* $Log: OldCurses.c,v $
- X * Revision 1.2 90/10/04 16:27:58 lee
- X * SysV compat improved.
- X *
- X * Revision 1.1 90/10/03 21:54:04 lee
- X * Initial revision
- X *
- X *
- X */
- @@@End of lq-text/src/menu/OldCurses.c
- echo x - lq-text/src/menu/oldcurses.h 1>&2
- sed 's/^X//' >lq-text/src/menu/oldcurses.h <<'@@@End of lq-text/src/menu/oldcurses.h'
- X/* oldcurses.h -- compatibility with pre-System V.3 curses...
- X * $Id: oldcurses.h,v 1.2 90/10/04 16:28:31 lee Rel1-10 $
- X */
- X
- Xtypedef int chtype;
- X
- X#define ACS_LARROW '>'
- X#define ACS_RARROW '<'
- X#define ACS_HLINE '='
- X#define ACS_VLINE '|'
- X#define ACS_LRCORNER '+'
- X#define ACS_LLCORNER '+'
- X
- X/* Line drawing: */
- X#define ACS_BSSS '+' /* T-piece */
- X#define ACS_SBSS '+' /* -| */
- X#define ACS_SSBS '+' /* inverted T-piece */
- X#define ACS_SSSB '+' /* |- */
- X#define ACS_BBSS '+' /* top right corner */
- X#define ACS_BSSB '+' /* bottom left corner */
- X
- X#define KEY_DOWN 257
- X#define KEY_UP 258
- X#define KEY_LEFT 259
- X#define KEY_RIGHT 260
- X#define KEY_HELP 261
- X#define KEY_HOME 262
- X#define KEY_F0 300
- X#define KEY_F(n) (KEY_F0+n)
- X
- X#undef getch
- X#define getch() Lqwgetch(stdscr)
- X#define wgetch Lqwgetch
- X
- X#undef box
- X#define box LqBox
- X
- X#define A_STANDOUT 1
- X#undef standout
- X#undef standend
- X#define wattrset Lqattrset
- X#define attrset(a) Lqattrset(stdscr, a)
- X#define keypad(win, bool) 1 /* ignore this one please */
- X
- X/* $Log: oldcurses.h,v $
- X * Revision 1.2 90/10/04 16:28:31 lee
- X * SysV compat improved.
- X *
- X * Revision 1.1 90/10/03 21:56:32 lee
- X * Initial revision
- X *
- X *
- X */
- @@@End of lq-text/src/menu/oldcurses.h
- echo end of part 08
- --
- Liam R. E. Quin, lee@sq.com, SoftQuad Inc., Toronto, +1 (416) 963-8337
-