home *** CD-ROM | disk | FTP | other *** search
- /*
- * indexer: tiny database index generation utility
- *
- * Author: HIRAHO Satoshi
- * (C) 1989 Halca Computer Science Laboratory TM
- *
- * Edition History:
- * 1.1 89/07/14 Halca.Hirano creation for hterm set-up help system
- * simple linear search version (but fast enough than grep way)
- * 1.2 89/07/29 Halca.Hirano permit to include comment line
- * change command system
- * add -s strip option to strip indexer command
- * 1.3 89/08/14 Halca.Hirano binary-tree version
- * ---- V2.4.0 distribution ----
- * 1.4 90/05/27 Halca.Hirano
- * compress text by mad@keio
- * add include command '@filename'
- * 1.5 90/07/13 Halca.Hirano
- * add version number to header
- *
- *
- * Description:
- * Indexer generates index to pick up variable length record from given key.
- * There are three components for indexer.
- *
- * Record text; This is a plain text file including keys and appropriate
- * records. e.g,
- *
- * comment
- * > "key1"
- * this is a text line. '>' means key.
- * And this is also test for key1.
- * < end key1
- * comment
- * > "key2"
- * text lines for key2.
- * < end key2
- * Lines beginning with # character in record are written to a file
- * after removed # character.
- *
- * Database file; This is generated by indexer from Record text.
- * Index information is included in head part of the file.
- *
- * header
- * records
- * index part (key and index to records)
- *
- * Library; indexlib.c is a access facility to the database file.
- * This should be linked with users application program to use database.
- *
- */
-
- static char version[] = "$Header: indexer.cv 1.8 90/07/04 01:07:40 hirano Exp $";
-
- char help[] = "\
- Function: generate index file\n\
- Syntax: indexer [options] <input text file> <output file>\n\
- Options: -s strip indexer command\n\
- Example: indexer help.doc help.db\n\
- ";
-
- #include <stdio.h>
- #include <ctype.h>
- #include <string.h>
- #include <stdlib.h>
- #include "indexer.h"
-
- #define YES 1
- #define NO 0
- #define MAX_LINE 256
- #define skipSpace(x) {while (*(x) == ' ' || *(x) == '\t') (x)++;}
- #define MAX_KEY 500
- #define S_COMMENT 1
- #define S_RECORD 2
-
- #ifdef MSDOS
- #define OUT_ATTR "wb"
- #else
- #define OUT_ATTR "w"
- #endif /* MSDOS */
-
-
- char headerString[] = "Generated by indexer %d.%d. Don't edit this file.\n";
-
- FILE *ifp; /* input file */
- FILE *ofp; /* output file */
- FILE *tfp; /* temporary file */
- long root; /* binary-tree root */
- Index keys[MAX_KEY]; /* keys */
- int numKey; /* keys index */
- int lineNo;
- char *fileName; /* input file name */
- char line[MAX_LINE];
- struct _header header;
- int state;
- int stripMode = NO; /* default non stip mode */
- long RecordSize; /* Size of current record. */
-
- struct _fileStack {
- FILE *ifp;
- int lineNo;
- char fileName[50];
- } fileStack[30];
- int fileStackPtr = 0;
-
- typedef unsigned char u_char;
-
- void stripper(void );
- void indexer(void );
- void keyStart(void);
- void keyEnd(char *line);
- void text(char *s);
- void cutKey(char *p,char *key);
- void newFile(char *file);
- int oldFile(void );
- void error(char *s,char *line);
- void bInsert(char *s,int indexNo);
- int WriteCode(unsigned short code,FILE *fp,int bits);
- void ResetWrite(void );
- int EndWrite(FILE *fp);
- int SearchCode(unsigned short code,unsigned char c);
- int WriteChar(unsigned char c, FILE *fp);
-
- main(argc, argv)
- char **argv;
- {
- register char *p;
- while (--argc > 0)
- if (*(p = *++argv) == '-')
- for (++p; *p; p++)
- switch (tolower(*p)) {
- case 's': stripMode = YES; break;
- case 'h':
- case '?':
- default:
- fprintf(stderr, help);
- exit(1);
- }
- else
- break;
- if (argc != 2) {
- fprintf(stderr, help);
- exit(1);
- }
-
- if ((ifp = fopen(*argv, "r")) == NULL) {
- fprintf(stderr, help);
- fprintf(stderr, "indexer: can't open input file %s\n", *argv);
- exit(1);
- }
- fileName = *argv;
- fprintf(stderr, "File %s\n", fileName);
-
- if ((ofp = fopen(*++argv, stripMode ? "w" : OUT_ATTR)) == NULL) {
- fprintf(stderr, help);
- fprintf(stderr, "can't open output file %s\n", *argv);
- exit(1);
- }
-
- if (stripMode)
- stripper();
- else
- indexer();
- fclose(ifp);
- fclose(ofp);
- }
-
- static void stripper()
- /*
- * strip mode; strip indexer command
- */
- {
- do {
- while (fgets(line, MAX_LINE, ifp) > 0) {
- switch (line[0]) {
- case KEY:
- case KEY_END:
- case STRIP:
- break;
- case INCLUDE:
- *strchr(line, '\n') = '\0';
- newFile(&line[1]);
- default:
- fputs(line, ofp);
- }
- }
- } while (oldFile());
- }
-
- static void indexer()
- /*
- * indexer: generate indexed file
- */
- {
- int i;
-
- lineNo = 0;
- state = S_COMMENT;
- numKey = 0;
- root = -1;
- for (i = 0; i < MAX_KEY; i++)
- keys[i].right = keys[i].left = -1;
-
- sprintf(header.header, "incomplete\n");
- fwrite(&header, sizeof(header), 1, ofp);
- ResetWrite();
-
- do {
- while (fgets(line, MAX_LINE, ifp) > 0) {
- lineNo++;
- if (line[0] == KEY)
- keyStart();
- else if (line[0] == KEY_END)
- keyEnd(line);
- else if (line[0] == INCLUDE) {
- *strchr(line, '\n') = '\0';
- newFile(&line[1]);
- } else if (state == S_RECORD)
- text(line);
- /* else ignore */
- }
- } while (oldFile());
-
- if (state == S_RECORD)
- keyEnd(line);
-
- sprintf(header.header, headerString, I_VERSION, I_REVISION);
- fflush(ofp); /* need because MSC5.1 ftell() bug */
- header.numIndex = numKey;
- header.indexOffset = ftell(ofp);
- header.version = I_VERSION;
- header.revision = I_REVISION;
- fseek(ofp, 0L, 0); /* head of file */
- fwrite(&header, sizeof(header), 1, ofp);
-
- fseek(ofp, 0L, 2); /* end of file */
- /*
- * We may sort index here.
- */
- for (i = 0; i < numKey; i++) {
- keys[i].right = keys[i].right * sizeof(Index) + header.indexOffset;
- keys[i].left = keys[i].left * sizeof(Index) + header.indexOffset;
- fwrite(&keys[i], sizeof(keys[i]), 1, ofp);
- }
- }
-
- static void keyStart()
- {
- if (state == S_RECORD) {
- fprintf(stderr, "warning: missing '<' line %d, assume end of record\n", line);
- keyEnd(line);
- }
- if (numKey > MAX_KEY) {
- fprintf(stderr, "indexer: sorry, too many key (max %d)\n", MAX_KEY);
- exit(1);
- }
- keys[numKey].offset = ftell(ofp);
- cutKey(&line[1], keys[numKey].key);
- if (strlen(keys[numKey].key) == 0)
- error("key length is zero", line);
- bInsert(keys[numKey].key, numKey);
- fprintf(stderr, "%d %d: '%s'", numKey+1, lineNo, keys[numKey].key);
- numKey++;
- state = S_RECORD;
- }
-
- static void keyEnd(line)
- char *line;
- {
- if (state != S_RECORD)
- error("no key", line);
- EndWrite(ofp);
- keys[numKey-1].size = RecordSize;
- if (keys[numKey-1].size == 0)
- fprintf(stderr, "warning: no record for key '%s'\n", keys[numKey-1].key);
- cutKey(&line[1], keys[numKey-1].nextKey);
- fprintf(stderr, " -> '%s'\n", keys[numKey-1].nextKey);
- RecordSize = 0;
- state = S_COMMENT;
- }
-
- static void text(s)
- char *s;
- {
- if (*s == STRIP)
- s++; /* remove force strip command */
- while (*s) {
- RecordSize++;
- WriteChar(*s++, ofp);
- }
- }
-
- static void cutKey(p, key)
- char *p;
- char *key;
- {
- char term = '\n';
- char *k = key;
- int i;
-
- skipSpace(p);
- if (*p == KEY_PAREN1 || *p == KEY_PAREN2) {
- term = *p;
- p++;
- }
- for (i = 0; *p != term; i++) {
- if (i >= KEY_LEN)
- error("key too long line", line);
- *k++ = *p++;
- }
- *k = '\0';
- }
-
- static void newFile(file)
- char *file;
- {
- fileStack[fileStackPtr].ifp = ifp;
- fileStack[fileStackPtr].lineNo = lineNo;
- strcpy(fileStack[fileStackPtr].fileName, fileName);
- fileStackPtr++;
- if ((ifp = fopen(file, "r")) == NULL) {
- fprintf(stderr, "indexer: can't open input file %s\n", file);
- exit(1);
- }
- fprintf(stderr, "File %s\n", file);
- lineNo = 1;
- }
-
- static int oldFile()
- {
- if (--fileStackPtr < 0)
- return(NO);
- fclose(ifp);
- ifp = fileStack[fileStackPtr].ifp;
- lineNo = fileStack[fileStackPtr].lineNo;
- fileName = fileStack[fileStackPtr].fileName;
- return(YES);
- }
-
- static void error(s, line)
- char *s, *line;
- {
- fprintf(stderr, "indexer: %s\n", s);
- fprintf(stderr, "%s(%d): %s", fileName, lineNo, line);
- exit(1);
- }
-
- static void bInsert(s, indexNo)
- /*
- * binary-tree insert
- */
- char *s; /* new key string */
- int indexNo; /* new key no */
- {
- register long *p = &root;
- register int bs;
-
- while (*p != -1) {
- if ((bs = strcmp(s, keys[*p].key)) > 0)
- p = &(keys[*p].right);
- else if (bs < 0)
- p = &(keys[*p].left);
- else {
- fprintf(stderr, "duplicated key %s\n", s);
- exit(1);
- }
- }
- *p = (long)indexNo;
- }
-
- #ifdef COMPRESS_DB
- /* #define TABLESIZE 1500 */
- #define TABLESIZE 1000
-
- static unsigned short MaxCode;
- static int BitLength;
- static int BitsLeft;
- static unsigned long BitBuff;
- static unsigned short LastCode;
- static int NotFirstTime;
-
- static unsigned short mask[] =
- {
- 0, 1, 3, 7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff
- };
-
- static struct {
- unsigned short code;
- unsigned char next;
- unsigned short NewCode;
- } table[TABLESIZE];
-
- static int WriteCode(unsigned short code, FILE *fp, int bits)
- /* Convert n-bit code to 8 bit stream and write to file. */
- {
- int value;
- int NextBitsLeft;
-
- if (code > MaxCode) {
- fprintf(stderr, "I'm sorry -- %d, MaxCode = %d\n",
- (int)code, (int) MaxCode);
- }
- NextBitsLeft = BitsLeft + bits - 8;
- value = putc((BitBuff << (8 - BitsLeft)) +
- (code >> NextBitsLeft), fp);
- BitBuff = code & mask[NextBitsLeft];
- BitsLeft = NextBitsLeft;
-
- if (BitsLeft >= 8) {
- BitsLeft -= 8;
- value = putc(BitBuff >> BitsLeft, fp);
- BitBuff = BitBuff & mask[BitsLeft];
- }
- return value;
- }
-
- static void ResetWrite(void)
- {
- int i;
-
- BitsLeft = 0;
- BitBuff = 0;
- NotFirstTime = 0;
- MaxCode = 255;
- BitLength = 9;
- for (i = 0; i < TABLESIZE; i++) {
- table[i].code = 0;
- table[i].next = 0;
- table[i].NewCode = 0;
- }
- }
-
- static int EndWrite(FILE *fp)
- {
- int value;
-
- WriteCode(LastCode, fp, BitLength);
- value = WriteCode((unsigned short) 0, fp, BitLength);
- ResetWrite();
- return value;
- }
-
- static int SearchCode(unsigned short code, unsigned char c)
- /* Search hash table and return index to the table.
- The result entry may be empty or may have code and char
- equal to args. */
- {
- unsigned hash = code ^ c;
- int i;
-
- i = hash;
- do {
- if (table[i].NewCode == 0)
- /* This entry is empty. */
- return i;
- if (table[i].code == code && table[i].next == c)
- return i;
- if (++i > TABLESIZE)
- i = 0;
- } while (i != hash);
-
- /* This can never happen because table can never be full. */
- return 0;
- }
-
- static int WriteChar(unsigned char c, FILE *fp)
- {
- int i;
- int value;
-
- if (! NotFirstTime) {
- NotFirstTime = 1;
- LastCode = c;
- return (int) c;
- }
- i = SearchCode(LastCode, c);
- if (table[i].NewCode == MaxCode) {
- /* This code is not yet known to uncompressor. */
- MaxCode++;
- value = WriteCode(LastCode, fp, BitLength);
- LastCode = c;
- return value;
- }
- if (table[i].NewCode == 0) {
- /* New code */
- value = WriteCode(LastCode, fp, BitLength);
- if (MaxCode < 1023) {
- MaxCode++;
- if (MaxCode == 512)
- BitLength = 10;
- table[i].NewCode = MaxCode;
- table[i].code = LastCode;
- table[i].next = c;
- }
- LastCode = c;
- return value;
- } else {
- LastCode = table[i].NewCode;
- return c;
- }
- }
- #else
- static void ResetWrite()
- {}
- static void EndWrite()
- {}
- static int WriteChar(c, fp)
- u_char c;
- FILE *fp;
- {
- fwrite(&c, 1, 1, fp);
- }
- #endif /* COMPRESS_DB */
-