home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
DP Tool Club 8
/
CDASC08.ISO
/
VRAC
/
CUJJUN93.ZIP
/
SUER01.C
< prev
next >
Wrap
C/C++ Source or Header
|
1993-04-12
|
27KB
|
813 lines
/*****************************************************/
/* NATURAL.C Copyright (c) 1993 Russell Suereth */
/*****************************************************/
/*****************************************************/
/* This is the original natural language processor */
/* code plus expansions for tense and number. */
/*****************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "natural.h"
void initialize(void);
void reset_sentence(void);
void get_record(char *);
char *extract_word(void);
int match_record(char *, int);
char *extract_root(void);
int check_underlying(void);
int check_type(char *,int);
void get_aux(void);
int match_aux(void);
void check_subject(void);
void check_action(void);
void check_place(void);
void check_aux_verb(void);
void check_number(void);
void make_response(void);
void make_answer(int);
void get_verb(char, char, char);
int match_verb(char, char, char);
FILE *infile;
char dic_record[80];
int sentence;
int word_ct;
char word_array[10][25];
char root_array[10][25];
char prime_types[10][11];
char phrases[10][11];
char type_array[10][5][11];
char subjects[20][25];
char actions[20][25];
char places[20][31];
char response[200];
unsigned char verb_tense[5];
unsigned char verb_number[5];
unsigned char verb_usage;
unsigned char aux_tense[5];
unsigned char aux_number[5];
unsigned char aux_usage;
unsigned char subject_number;
unsigned char tenses[20];
unsigned char numbers[20];
unsigned char usages[20];
unsigned char subjects_type[20];
unsigned char aux_meaning[20][5];
char auxiliaries[20][25];
void main()
{
char *cur_word;
char in_sentence[80];
initialize();
if ((infile = fopen("diction", "r+")) == NULL) {
printf ("\nError opening dictionary\n");
exit(0);
}
printf("\nSentence: ");
while(gets(in_sentence)) {
if (in_sentence[0] == '\0') break;
reset_sentence();
cur_word = strtok(in_sentence, " ");
while(cur_word != NULL) {
get_record(cur_word);
cur_word = strtok(NULL, " ");
if (++word_ct > 9) break;
}
if (check_underlying() == 0) {
check_subject();
check_action();
check_place();
check_aux_verb();
check_number();
}
make_response();
printf("Response: %s\n\nSentence: ", response);
if (++sentence > 19) break;
} /* end while */
fclose(infile);
return;
}
/*****************************************************/
/* Initialize variables. */
/*****************************************************/
void initialize()
{
sentence = 0;
memset(subjects, '\0', 500);
memset(actions, '\0', 500);
memset(places, '\0', 620);
memset(tenses, '\0', 20);
memset(numbers, '\0', 20);
memset(usages, '\0', 20);
memset(subjects_type, '\0', 20);
memset(aux_meaning, '\0', 100);
memset(auxiliaries, '\0', 500);
return;
}
/*****************************************************/
/* These variables are initialized for each new */
/* input sentence. */
/*****************************************************/
void reset_sentence()
{
word_ct = 0;
memset(word_array, '\0', 250);
memset(root_array, '\0', 250);
memset(prime_types, '\0', 110);
memset(phrases, '\0', 110);
memset(type_array, '\0', 550);
response[0] = '\0';
return;
}
/*****************************************************/
/* Get all the records from the dictionary. If the */
/* passed word is not in the dictionary, then the */
/* word could be a name. */
/*****************************************************/
void get_record(char *pass_word)
{
int types = 0;
rewind (infile);
fgets(dic_record, 80, infile);
while (! feof(infile)) {
if (match_record(pass_word, types) == 0)
types++;
fgets(dic_record, 80, infile);
}
if (types == 0) {
if (isupper( (int) pass_word[0]))
strcpy(type_array[word_ct][types], "NAME");
else
strcpy(type_array[word_ct][types],
"NOTFOUND");
}
strcpy(word_array[word_ct], pass_word);
return;
}
/*****************************************************/
/* Compare the passed word with the word in the */
/* current dictionary record. If they are the same, */
/* then extract the type (NOUN, VERB, etc.). If the */
/* type is PRON, then extract pronoun information. */
/* If the type is VERB, then extract verb */
/* information. */
/*****************************************************/
int match_record(char *pass_word, int types)
{
int i, j;
char *root;
char *dic_word;
dic_word = extract_word();
/* Check if passed word equals dictionary word */
if (strcmpi(pass_word, dic_word) != 0) return(1);
/* Word found, get the type */
for (i=24,j=0; i<28; i++) {
if (isspace(dic_record[i])) break;
type_array[word_ct][types][j++] = dic_record[i];
}
/* Trim the type */
type_array[word_ct][types][j] = '\0';
if (strcmp(type_array[word_ct][types],
"PRON") == 0)
subject_number = dic_record[41];
if (strcmp(type_array[word_ct][types],
"VERB") == 0) {
root = extract_root();
strcpy(root_array[word_ct], root);
verb_usage = dic_record[29];
for (i=30,j=0; i<34; i++,j++) {
if (isspace(dic_record[i])) break;
verb_tense[j] = dic_record[i];
}
verb_tense[j] = '\0';
for (i=41,j=0; i<43; i++,j++) {
if (isspace(dic_record[i])) break;
verb_number[j] = dic_record[i];
}
verb_number[j] = '\0';
}
return(0);
}
/*****************************************************/
/* Extract the word from the dictionary. The word is */
/* 24 characters in length and starts in column 1. */
/*****************************************************/
char *extract_word()
{
int i;
char dic_word[25];
strncpy(dic_word, dic_record, 24);
for (i=23; i>=0; i--) {
if (isspace(dic_word[i])) {
dic_word[i] = '\0';
continue;
}
break;
}
return(dic_word);
}
/*****************************************************/
/* Extract the root from the dictionary. It */
/* identifies a group of similar words (the root for */
/* run, ran, runs and running is run). It is 14 */
/* characters in length and starts in column 47. */
/*****************************************************/
char *extract_root()
{
int i, j;
char root[15];
for (i=46,j=0; i<60; i++) {
if (isspace(dic_record[i])) break;
root[j++] = dic_record[i];
}
/* Trim the root */
root[j] = '\0';
return(root);
}
/*****************************************************/
/* Determine if the input sentence contains a known, */
/* underlying structure. If it does, then assign the */
/* correct types and phrases for the words. */
/*****************************************************/
int check_underlying()
{
int i = 0;
/* Structure WH-AUX-PRON-VERB */
if ( (check_type("WH", i) == 0) &&
(check_type("AUX", i+1) == 0) &&
(check_type("PRON", i+2) == 0) &&
(check_type("VERB", i+3) == 0) ) {
strcpy(prime_types[i], "WH");
strcpy(prime_types[i+1], "AUX");
strcpy(prime_types[i+2], "PRON");
strcpy(prime_types[i+3], "VERB");
strcpy(phrases[i], "WHQUESTION");
strcpy(phrases[i+1], "VERBPHRASE");
strcpy(phrases[i+2], "NOUNPHRASE");
strcpy(phrases[i+3], "VERBPHRASE");
strcpy(auxiliaries[sentence], word_array[i+1]);
get_aux();
return(0);
}
/* Structure PRON-AUX-VERB-PREP-DET-NOUN */
if ( (check_type("PRON", i) == 0) &&
(check_type("AUX", i+1) == 0) &&
(check_type("VERB", i+2) == 0) &&
(check_type("PREP", i+3) == 0) &&
(check_type("DET", i+4) == 0) &&
(check_type("NOUN", i+5) == 0) ) {
strcpy(prime_types[i], "PRON");
strcpy(prime_types[i+1], "AUX");
strcpy(prime_types[i+2], "VERB");
strcpy(prime_types[i+3], "PREP");
strcpy(prime_types[i+4], "DET");
strcpy(prime_types[i+5], "NOUN");
strcpy(phrases[i], "NOUNPHRASE");
strcpy(phrases[i+1], "VERBPHRASE");
strcpy(phrases[i+2], "VERBPHRASE");
strcpy(phrases[i+3], "PREPPHRASE");
strcpy(phrases[i+4], "PREPPHRASE");
strcpy(phrases[i+5], "PREPPHRASE");
strcpy(auxiliaries[sentence], word_array[i+1]);
get_aux();
return(0);
}
/* Structure WH-AUX-NAME-VERB */
if ( (check_type("WH", i) == 0) &&
(check_type("AUX", i+1) == 0) &&
(check_type("NAME", i+2) == 0) &&
(check_type("VERB", i+3) == 0) ) {
strcpy(prime_types[i], "WH");
strcpy(prime_types[i+1], "AUX");
strcpy(prime_types[i+2], "NAME");
strcpy(prime_types[i+3], "VERB");
strcpy(phrases[i], "WHQUESTION");
strcpy(phrases[i+1], "VERBPHRASE");
strcpy(phrases[i+2], "NOUNPHRASE");
strcpy(phrases[i+3], "VERBPHRASE");
strcpy(auxiliaries[sentence], word_array[i+1]);
get_aux();
return(0);
}
/* Structure NAME-AUX-AUX-AUX-VERB-PREP-DET-NOUN */
if ( (check_type("NAME", i) == 0) &&
(check_type("AUX", i+1) == 0) &&
(check_type("AUX", i+2) == 0) &&
(check_type("AUX", i+3) == 0) &&
(check_type("VERB", i+4) == 0) &&
(check_type("PREP", i+5) == 0) &&
(check_type("DET", i+6) == 0) &&
(check_type("NOUN", i+7) == 0) ) {
strcpy(prime_types[i], "NAME");
strcpy(prime_types[i+1], "AUX");
strcpy(prime_types[i+2], "AUX");
strcpy(prime_types[i+3], "AUX");
strcpy(prime_types[i+4], "VERB");
strcpy(prime_types[i+5], "PREP");
strcpy(prime_types[i+6], "DET");
strcpy(prime_types[i+7], "NOUN");
strcpy(phrases[i], "NOUNPHRASE");
strcpy(phrases[i+1], "VERBPHRASE");
strcpy(phrases[i+2], "VERBPHRASE");
strcpy(phrases[i+3], "VERBPHRASE");
strcpy(phrases[i+4], "VERBPHRASE");
strcpy(phrases[i+5], "PREPPHRASE");
strcpy(phrases[i+6], "PREPPHRASE");
strcpy(phrases[i+7], "PREPPHRASE");
strcpy(auxiliaries[sentence], word_array[i+1]);
strcat(auxiliaries[sentence], " ");
strcat(auxiliaries[sentence], word_array[i+2]);
strcat(auxiliaries[sentence], " ");
strcat(auxiliaries[sentence], word_array[i+3]);
get_aux();
return(0);
}
/* Structure NAME-AUX-AUX-VERB-PREP-DET-NOUN */
if ( (check_type("NAME", i) == 0) &&
(check_type("AUX", i+1) == 0) &&
(check_type("AUX", i+2) == 0) &&
(check_type("VERB", i+3) == 0) &&
(check_type("PREP", i+4) == 0) &&
(check_type("DET", i+5) == 0) &&
(check_type("NOUN", i+6) == 0) ) {
strcpy(prime_types[i], "NAME");
strcpy(prime_types[i+1], "AUX");
strcpy(prime_types[i+2], "AUX");
strcpy(prime_types[i+3], "VERB");
strcpy(prime_types[i+4], "PREP");
strcpy(prime_types[i+5], "DET");
strcpy(prime_types[i+6], "NOUN");
strcpy(phrases[i], "NOUNPHRASE");
strcpy(phrases[i+1], "VERBPHRASE");
strcpy(phrases[i+2], "VERBPHRASE");
strcpy(phrases[i+3], "VERBPHRASE");
strcpy(phrases[i+4], "PREPPHRASE");
strcpy(phrases[i+5], "PREPPHRASE");
strcpy(phrases[i+6], "PREPPHRASE");
strcpy(auxiliaries[sentence], word_array[i+1]);
strcat(auxiliaries[sentence], " ");
strcat(auxiliaries[sentence], word_array[i+2]);
get_aux();
return(0);
}
/* Structure NAME-AUX-VERB-PREP-DET-NOUN */
if ( (check_type("NAME", i) == 0) &&
(check_type("AUX", i+1) == 0) &&
(check_type("VERB", i+2) == 0) &&
(check_type("PREP", i+3) == 0) &&
(check_type("DET", i+4) == 0) &&
(check_type("NOUN", i+5) == 0) ) {
strcpy(prime_types[i], "NAME");
strcpy(prime_types[i+1], "AUX");
strcpy(prime_types[i+2], "VERB");
strcpy(prime_types[i+3], "PREP");
strcpy(prime_types[i+4], "DET");
strcpy(prime_types[i+5], "NOUN");
strcpy(phrases[i], "NOUNPHRASE");
strcpy(phrases[i+1], "VERBPHRASE");
strcpy(phrases[i+2], "VERBPHRASE");
strcpy(phrases[i+3], "PREPPHRASE");
strcpy(phrases[i+4], "PREPPHRASE");
strcpy(phrases[i+5], "PREPPHRASE");
strcpy(auxiliaries[sentence], word_array[i+1]);
get_aux();
return(0);
}
/* Structure NAME-VERB-PREP-DET-NOUN */
if ( (check_type("NAME", i) == 0) &&
(check_type("VERB", i+1) == 0) &&
(check_type("PREP", i+2) == 0) &&
(check_type("DET", i+3) == 0) &&
(check_type("NOUN", i+4) == 0) ) {
strcpy(prime_types[i], "NAME");
strcpy(prime_types[i+1], "VERB");
strcpy(prime_types[i+2], "PREP");
strcpy(prime_types[i+3], "DET");
strcpy(prime_types[i+4], "NOUN");
strcpy(phrases[i], "NOUNPHRASE");
strcpy(phrases[i+1], "VERBPHRASE");
strcpy(phrases[i+2], "PREPPHRASE");
strcpy(phrases[i+3], "PREPPHRASE");
strcpy(phrases[i+4], "PREPPHRASE");
return(0);
}
return(1);
}
/*****************************************************/
/* Compare the passed type with all the types for */
/* this word in the type_array. If the type is */
/* found, then return 0. The pass_number parameter */
/* identifies the word in the input sentence. */
/*****************************************************/
int check_type(char *pass_type, int pass_number)
{
int i;
for (i=0; type_array[pass_number][i][0]; i++) {
if (strcmp(type_array[pass_number][i],
pass_type) == 0)
/* Passed type is found in array */
return(0);
}
/* Passed type is not found in array */
return(1);
}
/*****************************************************/
/* If the correct type is "NAME" or "PRON" then the */
/* word refers to a subject so copy the word to the */
/* subjects array. */
/*****************************************************/
void check_subject()
{
int i;
for (i=0; i<word_ct; i++) {
if (strcmp(prime_types[i], "NAME") == 0) {
strcpy(subjects[sentence], word_array[i]);
subject_number = SINGULAR;
subjects_type[sentence] = NAME;
break;
}
if (strcmp(prime_types[i], "PRON") == 0) {
strcpy(subjects[sentence], word_array[i]);
subjects_type[sentence] = PRONOUN;
break;
}
}
return;
}
/*****************************************************/
/* If the correct type is "VERB", then the word */
/* refers to an action so copy the word's root from */
/* the root array to the actions array. */
/*****************************************************/
void check_action()
{
int i;
for (i=0; i<word_ct; i++) {
if (strcmp(prime_types[i], "VERB") == 0) {
strcpy(actions[sentence], root_array[i]);
break;
}
}
return;
}
/*****************************************************/
/* If the phrase is a "PREPPHRASE", then all the */
/* words in the phrase refer to a place. Concatenate */
/* these words to the places array. */
/*****************************************************/
void check_place()
{
int i;
for (i=0; i<word_ct; i++) {
if (strcmp(phrases[i], "PREPPHRASE") == 0) {
strcat(places[sentence], " ");
strcat(places[sentence], word_array[i]);
}
}
return;
}
/*****************************************************/
/* Determine the sentence tense and usage by */
/* matching auxiliary and verb information, or by */
/* matching previous sentence information. */
/*****************************************************/
void check_aux_verb()
{
int i, j, matches;
char *result;
char temp_tenses[5];
/**************************************************/
/* Auxiliary in sentence */
/**************************************************/
if (strlen(auxiliaries[sentence]) > 0) {
if (aux_usage != verb_usage) {
tenses[sentence] = UNKNOWN;
usages[sentence] = UNKNOWN;
return;
}
for (i=0,j=0,matches=0; aux_tense[i]; i++) {
if ((result = strchr(verb_tense,aux_tense[i]))
!= NULL) {
temp_tenses[j++] = *result;
matches++;
}
}
temp_tenses[j] = '\0';
if (matches == 0) {
tenses[sentence] = UNKNOWN;
usages[sentence] = UNKNOWN;
return;
}
usages[sentence] = aux_usage;
if (matches == 1) {
tenses[sentence] = temp_tenses[0];
return;
}
for (i=sentence-1; i>=0 && i>=sentence-3; i--) {
if ((strcmpi(subjects[i],
subjects[sentence]) == 0) &&
(strcmpi(actions[i],
actions[sentence]) == 0) &&
(strchr(temp_tenses, tenses[i])
!= NULL) &&
(strlen(places[i]) > 0)) {
tenses[sentence] = tenses[i];
return;
}
}
tenses[sentence] = PRESENT;
return;
}
/**************************************************/
/* No auxiliary in sentence */
/**************************************************/
usages[sentence] = verb_usage;
if (strchr(verb_tense, PAST) != NULL) {
tenses[sentence] = PAST;
return;
}
/**************************************************/
/* No auxiliary, verb tense is present or future */
/**************************************************/
for (i=sentence-1; i>=0 && i>=sentence-3; i--) {
if ((strcmpi(subjects[i],
subjects[sentence]) == 0) &&
(strcmpi(actions[i],
actions[sentence]) == 0) &&
(strchr(verb_tense, tenses[i]) != NULL) &&
(strlen(places[i]) > 0)) {
tenses[sentence] = tenses[i];
return;
}
}
tenses[sentence] = PRESENT;
return;
}
/*****************************************************/
/* Match the subject, verb, and auxiliary number. */
/* If the match is successful, then the sentence */
/* number is the matched number. */
/*****************************************************/
void check_number()
{
if (strchr(verb_number, subject_number) == NULL) {
numbers[sentence] = UNKNOWN;
return;
}
if ((strlen(auxiliaries[sentence]) > 0) &&
(strchr(aux_number, subject_number) == NULL)) {
numbers[sentence] = UNKNOWN;
return;
}
numbers[sentence] = subject_number;
return;
}
/*****************************************************/
/* Read the dictionary to extract the auxiliary */
/* information. */
/*****************************************************/
void get_aux()
{
rewind(infile);
fgets(dic_record, 80, infile);
while (! feof(infile)) {
if (match_aux() == 0)
return;
fgets(dic_record, 80, infile);
}
return;
}
/*****************************************************/
/* If the sentence auxiliary matches the word in the */
/* current dictionary record, then extract the */
/* auxiliary information from the dictionary. */
/*****************************************************/
int match_aux()
{
int i,j;
char *dic_word;
dic_word = extract_word();
if (strcmpi(auxiliaries[sentence], dic_word) != 0)
return(1);
aux_usage = dic_record[29];
for (i=30,j=0; i<34; i++,j++) {
if (isspace(dic_record[i])) break;
aux_tense[j] = dic_record[i];
}
/* Trim the tense */
aux_tense[j] = '\0';
for (i=41,j=0; i<43; i++,j++) {
if (isspace(dic_record[i])) break;
aux_number[j] = dic_record[i];
}
/* Trim the number */
aux_number[j] = '\0';
for (i=44,j=0; i<47; i++,j++) {
if (isspace(dic_record[i])) break;
aux_meaning[sentence][j] = dic_record[i];
}
return(0);
}
/*****************************************************/
/* Generate a response with information from a */
/* matching, previous sentence. */
/*****************************************************/
void make_response()
{
int i;
/***************************************************/
/* Input sentence is not asking for information. */
/***************************************************/
if (strcmpi(word_array[0], "where") != 0) {
strcpy(response, "OK");
return;
}
/***************************************************/
/* Match subject, action, tense, and meaning. */
/***************************************************/
for (i=sentence-1; i>=0; i--) {
if ((strcmpi(subjects[i],subjects[sentence])==0) &&
(strcmpi(actions[i], actions[sentence]) ==0) &&
(strlen(places[i]) > 0) &&
(tenses[i] == tenses[sentence]) &&
(strpbrk(aux_meaning[i],aux_meaning[sentence])
!= NULL)) {
make_answer(i);
return;
}
}
/***************************************************/
/* Match subject, action, and tense. */
/***************************************************/
for (i=sentence-1; i>=0; i--) {
if ((strcmpi(subjects[i],subjects[sentence])==0) &&
(strcmpi(actions[i], actions[sentence]) ==0) &&
(strlen(places[i]) > 0) &&
(tenses[i] == tenses[sentence])) {
make_answer(i);
return;
}
}
/***************************************************/
/* Match subject, action, and meaning. */
/***************************************************/
for (i=sentence-1; i>=0; i--) {
if ((strcmpi(subjects[i],subjects[sentence])==0) &&
(strcmpi(actions[i], actions[sentence]) ==0) &&
(strlen(places[i]) > 0) &&
(strpbrk(aux_meaning[i],aux_meaning[sentence])
!= NULL)) {
strcpy(response, "I'm not sure, but ");
make_answer(i);
return;
}
}
/***************************************************/
/* Match subject and action. */
/***************************************************/
for (i=sentence-1; i>=0; i--) {
if ((strcmpi(subjects[i],subjects[sentence])==0) &&
(strcmpi(actions[i], actions[sentence]) ==0) &&
(strlen(places[i]) > 0)) {
strcpy(response, "I'm not sure, but ");
make_answer(i);
return;
}
}
strcpy(response, "I don't know");
return;
}
/*****************************************************/
/* Move information from a previous sentence to the */
/* response. */
/*****************************************************/
void make_answer(int prev_sentence)
{
if (subjects_type[prev_sentence] == PRONOUN) {
if (strlen(response) == 0) {
subjects[prev_sentence][0] =
(char) toupper(subjects[prev_sentence][0]);
}
else {
subjects[prev_sentence][0] =
(char) tolower(subjects[prev_sentence][0]);
}
}
strcat(response, subjects[prev_sentence]);
strcat(response, " ");
if (strlen(auxiliaries[prev_sentence]) > 0) {
strcat(response, auxiliaries[prev_sentence]);
strcat(response, " ");
}
get_verb(tenses[prev_sentence],
numbers[prev_sentence],
usages[prev_sentence]);
strcat(response, places[prev_sentence]);
return;
}
/*****************************************************/
/* Get the correct verb from the dictionary. */
/*****************************************************/
void get_verb(char pass_tense,
char pass_number, char pass_usage)
{
rewind(infile);
fgets(dic_record, 80, infile);
while (! feof(infile)) {
if (match_verb(pass_tense,
pass_number, pass_usage) == 0)
break;
fgets(dic_record, 80, infile);
}
return;
}
/*****************************************************/
/* If the verb information in the current record */
/* matches the passed information, then move the */
/* correct verb to the response. */
/*****************************************************/
int match_verb(char pass_tense,
char pass_number, char pass_usage)
{
int i;
char *root;
char *dic_word;
root = extract_root();
/* Match verb with root */
if (strcmpi(actions[sentence], root) == 0) {
/* Match verb with tense */
for (i=30; i<34; i++) {
if (isspace(dic_record[i])) return(1);
if (dic_record[i] == pass_tense) break;
}
/* Match verb with number */
for (i=41; i<43; i++) {
if (isspace(dic_record[i])) return(1);
if (dic_record[i] == pass_number) break;
}
/* Match verb with usage */
if (dic_record[29] == pass_usage) {
dic_word = extract_word();
strcat(response, dic_word);
return(0);
}
}
return(1);
}
/* End of file */