home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
World of Shareware - Software Farm 2
/
wosw_2.zip
/
wosw_2
/
CPROG
/
US20SRC.ZIP
/
CDICT.C
< prev
next >
Wrap
C/C++ Source or Header
|
1992-06-26
|
5KB
|
218 lines
/* CDICT: Compress Dictionary utility program for
MicroSPELL 2.0
(C)opyright May 1987,1992 by Daniel Lawrence
All Rights Reserved
*/
#include <stdio.h>
#include "dopt.h"
#include "dsfx.h"
/* globals */
char mdfile[NSTRING]; /* main dictionary text file name */
char mcfile[NSTRING]; /* compressed dictionary file name */
FILE *mdptr = NULL; /* main dictionary file pointer */
FILE *mcptr = NULL; /* compressed dictionary file pointer */
int sflen[NSUFFIX]; /* length of suffixes */
long letter_offset[ALPHASIZE]; /* offsets in file to letters */
unsigned char lcase[128]; /* lower case table (quick!) */
main(argc, argv)
int argc; /* # of command line arguments */
char **argv; /* text of command line arguments */
{
register char *word; /* current word */
register int suffix; /* suffix index */
register char cur_first_letter; /* current first letter scanned */
register int index; /* loop index */
long total_words; /* total number of words in this dictionary */
char lastword[NSTRING]; /* previous word in dictionary */
char tempword[NSTRING]; /* temporary word in dictionary */
char *nxtmword();
printf("CDICT Dictionary Compression Utility for MicroSPELL v%s\n",
VERSION);
if (argc != 3) {
help();
exit(EXBADOPT);
}
strcpy(mdfile, argv[1]);
strcpy(mcfile, argv[2]);
if (mopen() != TRUE) {
printf("%%Can not open text dictionary file\n");
exit(EXMDICT);
}
/* init the lower case table */
for (index = 0; index < 128; index ++)
if ('A' <= index && index <= 'Z')
lcase[index] = index - 'A' + 'a';
else
lcase[index] = index;
/* open the output compressed dictionary file */
mcptr = fopen(mcfile, "wb");
if (mcptr == NULL) {
printf("%%Can not open compressed dictionary output file\n");
exit(EXMDICT);
}
/* position past character table */
fwrite((char *)&(letter_offset[0]), sizeof(long), ALPHASIZE, mcptr);
/* prepare the suffix length table */
for (suffix = 0; suffix < NSUFFIX; suffix++)
sflen[suffix] = strlen(sfx[suffix]);
printf("[Compressing %s => %s]\n", mdfile, mcfile);
lastword[0] = 0; /* null last word */
/* scan the dictionary, compressing */
cur_first_letter = 0;
total_words = 0L;
word = nxtmword();
while (word) {
if (lcase[word[0]] != cur_first_letter) {
cur_first_letter = lcase[word[0]];
letter_offset[cur_first_letter - 'a'] = ftell(mcptr);
}
strcpy(tempword, word);
cmpsword(lastword, word);
total_words++;
strcpy(lastword, tempword);
word = nxtmword();
}
/* write out letter offset table to front of file */
fseek(mcptr, 0, 0);
fwrite((char *)&(letter_offset[0]), sizeof(long), ALPHASIZE, mcptr);
/* close things up */
mclose();
fclose(mcptr);
printf("[%ld words in dictionary compressed]\n", total_words);
}
help() /* tell us about cdict... */
{
printf("\nUsage:\n\n");
printf(" CDICT <text dictionary> <compressed dictionary>\n");
}
mopen() /* open the main dicionary */
{
/* if it is already open, close it down */
if (mdptr != NULL)
fclose(mdptr);
/* open up the text dictionary... */
if ((mdptr = fopen(mdfile, "r")) == NULL)
return(FALSE);
return(TRUE);
}
mclose() /* close the dictionary down */
{
/* if it is already open, close it down */
if (mdptr != NULL)
fclose(mdptr);
mdptr = NULL;
}
char *nxtmword() /* get the next word from the main dictionary */
{
static char word[NSTRING]; /* word to return */
/* is it already closed? */
if (mdptr == NULL)
return(NULL);
/* get the next word */
if (fgets(word, NSTRING - 1, mdptr) == NULL) {
/* no more left!!!! close out */
fclose(mdptr);
mdptr = NULL;
return(NULL);
}
/* all's well, dump the return, any trailing spaces and
return the word */
do
word[strlen(word) - 1] = 0;
while (word[strlen(word) - 1] == ' ');
return(word);
}
cmpsword(lastword, word) /* compress the given word */
char *lastword; /* previous dictionary word */
char *word; /* current dictionary word */
{
register int index; /* index into current word */
register int same; /* # of same characters */
register int suffix; /* suffix code */
register int wlen; /* length of current word */
register int orig_first; /* original first character */
/* scan for common suffixes */
wlen = strlen(word);
for (suffix = 0; suffix < NSUFFIX; suffix++) {
if (wlen < sflen[suffix])
continue;
if (strcmp(&word[wlen - sflen[suffix]], sfx[suffix]) == 0) {
word[wlen - sflen[suffix]] = 0; /* trunc it */
break;
}
}
/* If there is no suffix...suffix ends up as NSUFFIX */
/* save the capitalization of the original lead char */
word[0] = lcase[(orig_first = word[0])];
/* scan for like beginning characters */
index = 0;
while (lastword[index] && lastword[index] == word[index])
index++;
same = index;
#if ASCII
suffix |= 128;
#endif
if (orig_first == word[0])
fprintf(mcptr, "%c%s%c", 'A'+same, &word[index], suffix);
else
fprintf(mcptr, "%c%s%c", 'A'+same+128, &word[index], suffix);
}
#if CMS
#undef fopen
/* The IBM 30xx likes to tell us when file opens
fail...it's too chatty....I like to handle these myself */
FILE *cmsopen(file, mode)
char *file; /* name of file to open */
char *mode; /* mode to open it in */
{
quiet(1);
return(fopen(file,mode));
}
#endif