home *** CD-ROM | disk | FTP | other *** search
- From: gtoal@tharr.UUCP (Graham Toal)
- Newsgroups: alt.sources
- Subject: Spelling utilities; crossword-helper & typo fixer.
- Message-ID: <814@tharr.UUCP>
- Date: 29 Jun 90 09:49:17 GMT
-
- Archive-Name: dawgutils/update1.shar
-
- To people who received the last post and got a bit annoyed at the
- lack of a makefile, I apologise. It was my first net posting. I'll
- know better next time. Meanwhile here's an emergency fix;
- cc dawg.c; mv a.out dawg
- cc pdawg.c; mv a.out pdawg
- cc pack.c; mv a.out pack
- cc ppack.c; mv a.out ppack
- cc dwgcheck.c; mv a.out dwgcheck
- cc pckcheck.c; mv a.out pckcheck
- cc tell.c; mv a.out tell
- cc proot.c; mv a.out proot
- dawg /usr/dict/words dict
- pack dict.dwg dict.pck
- dwgcheck this and that wroang wurd and another wurd
- pckcheck this and that wroang wurd and another wurd
- proot meta
- tell stoopid speled wurds
-
- Hope that helps a bit. Now for todays additions:
- cc typo.c; mv a.out typo
- cc cross.c; mv a.out cross
- typo spel
- cross p\?zz\?e
-
- #!/bin/sh-----cut here-----cut here-----cut here-----cut here-----
- # shar: Shell Archiver
- # Run the following text with /bin/sh to create:
- # README # cross.c # typo.c
- cat - << \SHAR_EOF > README
- This is probably rather early for a follow up to my posting on
- spelling checker utilities, but a couple of people have asked for
- these...
-
- (These programs need dawgutils/* as posted a couple of days ago)
-
- typo.c
- A sample program showing how to use the dawg structure to efficiently
- correct typos.
-
- cross.c
- A 'crossword puzzle' solver - effectively the same as grepping the
- word list allowing '?' as a single wild char, eg cross p?zz?e
- would return 'puzzle'.
-
- I've also had some interest in my throwaway comments on a replacement
- for Soundex; enough to convince me to document it properly and post it
- here (a week away at the earliest). Meanwhile, may I ask again:
- If anyone has a dictionary of words and their phonetic representations
- could they get in touch please? Many thanks if you can.
-
- Share & Enjoy,
-
- Graham Toal (grA@m tOl) <gtoal%uk.ac.ed@nsfnet-relay.ac.uk>
- SHAR_EOF
- cat - << \SHAR_EOF > cross.c
- /* On brain-dead PC's, with MICROSOFT, link with /ST:30000 */
- /*
-
- File: cross.c
- Author: Graham Toal
- Purpose: match words with single-char wildcards (cr?ssw?rd p?zzl?s)
- Creation date: 28/06/90 14:01:34
- Lastedit: 28/06/90 14:05:45
-
- Description:
-
- (The nice thing about the dawg structure is that it makes utilities
- like this easy to write; none of these small programs has taken
- more than about an hour. This one took five minutes ;-))
-
- */
-
-
- /* Manadatory header files */
- #include <stdio.h>
- #include "dawg.h"
- #include "grope.h"
- #include "utils.c"
-
- /* Headers here as needed on per-program basis */
-
- /* Spelling library utilities */
- #include "init.c" /* Loading dicts */
-
- #ifdef SYS_MAC
- /* To compile with THINK C 4.0, place all the relevant .h and .c
- files in a folder. Then create a project which contains this main.c
- and the libraries unix and ANSI.
- */
- #include <unix.h>
- #include <stdlib.h>
- #include <console.h>
- #endif
-
- /* This one just gets wrong letters */
- int
- #ifdef PROTOTYPES
- fix_cross(
- NODE PCCRAP *dawg, INDEX i,
- char *word, char *res,
- int len, int *found)
- #else
- fix_cross(dawg, i, word, res, len, found)
- NODE PCCRAP *dawg;
- INDEX i;
- char *word;
- char *res;
- int len;
- int *found;
- #endif
- {
- int endsword, last, ch, target;
- NODE node;
- INDEX link;
-
- for (;;) {
- node = dawg[i++];
- ch = (int)((node >> V_LETTER) & M_LETTER);
- last = ((node & (INDEX)M_END_OF_NODE) != 0);
- endsword = ((node & M_END_OF_WORD) != 0);
- link = node & M_NODE_POINTER;
-
- res[len] = ch; res[len+1] = '\0';
- target = ((int)*word)&255;
- if (ch != 0) {
- if (ch == target || target == '?') {
- if (endsword && *(word+1) == '\0') {
- fprintf(stdout, "word: %s\n", res); (*found)++;
- }
- if (*(word+1) != '\0' && link != 0)
- (void) fix_cross(dawg, link, word+1, res, len+1, found);
- }
- }
- if (last) break;
- }
- return(0==0);
- }
-
- int
- #ifdef PROTOTYPES
- crossword(NODE PCCRAP *dawg, char *word)
- #else
- crossword(dawg, word)
- NODE PCCRAP *dawg;
- char *word;
- #endif
- {
- char result[MAX_WORD_LEN];
- int i = 0;
- (void)fix_cross(dawg, (INDEX)ROOT_NODE, word, result, 0, &i);
- return(i);
- }
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- NODE PCCRAP *dawg;
- INDEX edges;
- int each;
-
- #ifdef SYS_MAC
- argc = ccommand(&argv);
- #endif
-
- /* Your program goes here... */
- if (argc == 1) {
- fprintf(stderr, "usage: %s mispeled wurdz\n", argv[0]);
- exit(EXIT_ERROR);
- }
- if (!dawg_init("", &dawg, &edges)) exit(EXIT_ERROR);
- for (each = 1; each < argc; each++) {
- fprintf(stderr, "* Matches:\n");
- if (!crossword(dawg, argv[each])) {
- fprintf(stderr, "(none found)\n");
- }
- if (each+1 != argc) fprintf(stderr, "\n");
- }
-
- exit(EXIT_OK);
- }
- SHAR_EOF
- cat - << \SHAR_EOF > typo.c
- /* On brain-dead PC's, with MICROSOFT, link with /ST:30000 */
- /*
-
- File: typo.c
- Author: Graham Toal
- Purpose: offer correct spelling
- Creation date: 27/06/90
- Lastedit: 28/06/90 13:27:03
-
- Description:
-
- Like the unix 'spelltell' command but only fixes typos
- rather than soundslike errors.
-
- See my 'tell' program if you want soundex, or wait a few weeks
- for my new proper phonetic algorithm.
-
- It is a design decision that some of the functions will return the word
- presented if it is in fact correct. You can call this a bug if you
- prefer.
-
- (The nice thing about the dawg structure is that it makes utilities
- like this easy to write; none of these small programs has taken
- more than about an hour.)
-
- */
-
-
- /* Manadatory header files */
- #include <stdio.h>
- #include "dawg.h"
- #include "grope.h"
- #include "utils.c"
-
- /* Headers here as needed on per-program basis */
-
- /* Spelling library utilities */
- #include "init.c" /* Loading dicts */
- #include "check.c" /* Simple word-check */
-
- #ifdef SYS_MAC
- /* To compile with THINK C 4.0, place all the relevant .h and .c
- files in a folder. Then create a project which contains this main.c
- and the libraries unix and ANSI.
- */
- #include <unix.h>
- #include <stdlib.h>
- #include <console.h>
- #endif
-
- /* This one just gets wrong letters */
- int
- #ifdef PROTOTYPES
- fix_typos(
- NODE PCCRAP *dawg, INDEX i,
- char *word, char *res,
- int len, int errs_allowed, int *found)
- #else
- fix_typos(dawg, i, word, res, len, errs_allowed, found)
- NODE PCCRAP *dawg;
- INDEX i;
- char *word;
- char *res;
- int len;
- int errs_allowed;
- int *found;
- #endif
- {
- int endsword, last, ch;
- NODE node;
- INDEX link;
-
- for (;;) {
- node = dawg[i++];
- ch = (int)((node >> V_LETTER) & M_LETTER);
- last = ((node & (INDEX)M_END_OF_NODE) != 0);
- endsword = ((node & M_END_OF_WORD) != 0);
- link = node & M_NODE_POINTER;
-
- res[len] = ch; res[len+1] = '\0';
-
- if (ch != 0) {
- if (ch == ((int)*word)&255) {
- if (endsword && *(word+1) == '\0') {
- fprintf(stdout, "word: %s\n", res); (*found)++;
- }
- if (*(word+1) != '\0' && link != 0)
- (void) fix_typos(dawg, link, word+1, res, len+1, errs_allowed, found);
- } else {
- /* Try a different letter here instead? */
- if (errs_allowed > 0) {
- if (endsword && *(word+1) == '\0') {
- fprintf(stdout, "word: %s\n", res); (*found)++;
- }
- if (*(word+1) != '\0' && link != 0)
- (void) fix_typos(
- dawg, link, word+1, res, len+1, errs_allowed-1, found);
- }
- }
- }
- if (last) break;
- }
- return(0==0);
- }
-
-
- /* And this one corrects omitted letters by inserting one. */
-
- int
- #ifdef PROTOTYPES
- fix_insert(
- NODE PCCRAP *dawg, INDEX i,
- char *word, char *res,
- int len, int errs_allowed, int *found)
- #else
- fix_insert(dawg, i, word, res, len, errs_allowed, found)
- NODE PCCRAP *dawg;
- INDEX i;
- char *word;
- char *res;
- int len;
- int errs_allowed;
- int *found;
- #endif
- {
- int endsword, last, ch;
- NODE node;
- INDEX link;
-
- for (;;) {
- node = dawg[i++];
- ch = (int)((node >> V_LETTER) & M_LETTER);
- endsword = ((node & M_END_OF_WORD) != 0);
- last = ((node & M_END_OF_NODE) != 0);
-
- link = node & M_NODE_POINTER;
-
- res[len] = ch; res[len+1] = '\0';
-
- if (ch != 0) {
-
- if (endsword && *word == '\0' && errs_allowed > 0) {
- fprintf(stdout, "word: %s\n", res); (*found)++;
- }
-
- if (ch == ((int)*word)&255) {
- if (endsword && *(word+1) == '\0') {
- fprintf(stdout, "word: %s\n", res); (*found)++;
- /*return(0==0);*/
- }
- if (*word == '\0') {
- if (errs_allowed > 0)
- (void) fix_insert(
- dawg, link, word+1, res, len+1, errs_allowed, found);
- } else {
- if (link != 0)
- (void) fix_insert(
- dawg, link, word+1, res, len+1, errs_allowed, found);
- }
- }
- /* Insert this letter (len+1) and see if rest matches */
- if (errs_allowed > 0) {
- if (link != 0)
- (void) fix_insert(dawg, link, word, res, len+1, errs_allowed-1, found);
- }
- }
- if (last) break;
- }
- return(0==0);
- }
-
-
- /* And finally catch inserted letters by deleting one */
-
- int
- #ifdef PROTOTYPES
- fix_delete(
- NODE PCCRAP *dawg, INDEX i,
- char *word, char *res,
- int len, int errs_allowed, int *found)
- #else
- fix_delete(dawg, i, word, res, len, errs_allowed, found)
- NODE PCCRAP *dawg;
- INDEX i;
- char *word;
- char *res;
- int len;
- int errs_allowed;
- int *found;
- #endif
- {
- int endsword, last, ch;
- NODE node;
- INDEX link;
-
- if (errs_allowed > 0) {
- if (*(word+1) != '\0') {
- (void) fix_delete(dawg, i, word+1, res, len, errs_allowed-1, found);
- } else {
- /* Shouldn't get this far, but does :-( */
- return(0==0);
- }
- }
- for (;;) {
- node = dawg[i++];
- ch = (int)((node >> V_LETTER) & M_LETTER);
- endsword = ((node & M_END_OF_WORD) != 0);
- last = ((node & M_END_OF_NODE) != 0);
-
- link = node & M_NODE_POINTER;
-
- res[len] = ch; res[len+1] = '\0';
-
- if (ch != 0) {
- if (ch == ((int)*word)&255) {
-
- if (errs_allowed > 0 &&
- endsword &&
- *(word+1) != '\0' &&
- *(word+2) == '\0') {
- fprintf(stdout, "word: %s\n", res); (*found)++;
- }
-
- if (endsword && *(word+1) == '\0') {
- fprintf(stdout, "word: %s\n", res); (*found)++;
- return(0==0);
- }
- if (*(word+1) != '\0' && link != 0)
- (void) fix_delete(dawg, link, word+1, res, len+1, errs_allowed, found);
- }
- }
-
- if (last) break;
- }
- return(0==0);
- }
-
-
- int
- #ifdef PROTOTYPES
- dawg_typo(NODE PCCRAP *dawg, char *word)
- #else
- dawg_typo(dawg, word)
- NODE PCCRAP *dawg;
- char *word;
- #endif
- {
- char result[MAX_WORD_LEN];
- int i = 0;
- (void)fix_typos(dawg, (INDEX)ROOT_NODE, word, result, 0, 1, &i);
- return(i);
- }
-
- int
- #ifdef PROTOTYPES
- dawg_insert(NODE PCCRAP *dawg, char *word)
- #else
- dawg_insert(dawg, word)
- NODE PCCRAP *dawg;
- char *word;
- #endif
- {
- char result[MAX_WORD_LEN];
- int i = 0;
- (void)fix_insert(dawg, (INDEX)ROOT_NODE, word, result, 0, 1, &i);
- return(i);
- }
-
- int
- #ifdef PROTOTYPES
- dawg_delete(NODE PCCRAP *dawg, char *word)
- #else
- dawg_delete(dawg, word)
- NODE PCCRAP *dawg;
- char *word;
- #endif
- {
- char result[MAX_WORD_LEN];
- int i = 0;
- (void)fix_delete(dawg, (INDEX)ROOT_NODE, word, result, 0, 1, &i);
- return(i);
- }
-
- int
- #ifdef PROTOTYPES
- dawg_transpose(NODE PCCRAP *dawg, char *word)
- #else
- dawg_transpose(dawg, word)
- NODE PCCRAP *dawg;
- char *word;
- #endif
- {
- int i = 0, l, c;
- for (l = 0; word[l+1] != '\0'; l++) {
- c = word[l]; word[l] = word[l+1]; word[l+1] = c;
- if (dawg_check(dawg, word)) {
- i++;
- fprintf(stdout, "word: %s\n", word);
- }
- c = word[l]; word[l] = word[l+1]; word[l+1] = c;
- }
- return(i /* != 0 */);
- }
-
-
- int
- #ifdef PROTOTYPES
- main(int argc, char **argv)
- #else
- main(argc, argv)
- int argc;
- char **argv;
- #endif
- {
- NODE PCCRAP *dawg;
- INDEX edges;
- int each;
-
- #ifdef SYS_MAC
- argc = ccommand(&argv);
- #endif
-
- /* Your program goes here... */
- if (argc == 1) {
- fprintf(stderr, "usage: %s mispeled wurdz\n", argv[0]);
- exit(EXIT_ERROR);
- }
- if (!dawg_init("", &dawg, &edges)) exit(EXIT_ERROR);
- for (each = 1; each < argc; each++) {
- fprintf(stderr, "* Wrong char:\n");
- if (!dawg_typo(dawg, argv[each])) {
- fprintf(stderr, "(none found)\n");
- }
- fprintf(stderr, "* Omitted char\n");
- if (!dawg_insert(dawg, argv[each])) {
- fprintf(stderr, "(none found)\n");
- }
- fprintf(stderr, "* Inserted char:\n");
- if (!dawg_delete(dawg, argv[each])) {
- fprintf(stderr, "(none found)\n");
- }
- fprintf(stderr, "* Transposed char:\n");
- if (!dawg_transpose(dawg, argv[each])) {
- fprintf(stderr, "(none found)\n");
- }
- if (each+1 != argc) fprintf(stderr, "\n");
- }
-
- exit(EXIT_OK);
- }
- SHAR_EOF
-