DP Tool Club 8

home *** CD-ROM | disk | FTP | other *** search

/ DP Tool Club 8 / CDASC08.ISO / VRAC / CUJJUN93.ZIP / SUER01.C < prev next >

Wrap

C/C++ Source or Header | 1993-04-12 | 27KB | 813 lines

/*****************************************************/ /* NATURAL.C Copyright (c) 1993 Russell Suereth */ /*****************************************************/ /*****************************************************/ /* This is the original natural language processor */ /* code plus expansions for tense and number. */ /*****************************************************/ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <ctype.h> #include "natural.h" void initialize(void); void reset_sentence(void); void get_record(char *); char *extract_word(void); int match_record(char *, int); char *extract_root(void); int check_underlying(void); int check_type(char *,int); void get_aux(void); int match_aux(void); void check_subject(void); void check_action(void); void check_place(void); void check_aux_verb(void); void check_number(void); void make_response(void); void make_answer(int); void get_verb(char, char, char); int match_verb(char, char, char); FILE *infile; char dic_record[80]; int sentence; int word_ct; char word_array[10][25]; char root_array[10][25]; char prime_types[10][11]; char phrases[10][11]; char type_array[10][5][11]; char subjects[20][25]; char actions[20][25]; char places[20][31]; char response[200]; unsigned char verb_tense[5]; unsigned char verb_number[5]; unsigned char verb_usage; unsigned char aux_tense[5]; unsigned char aux_number[5]; unsigned char aux_usage; unsigned char subject_number; unsigned char tenses[20]; unsigned char numbers[20]; unsigned char usages[20]; unsigned char subjects_type[20]; unsigned char aux_meaning[20][5]; char auxiliaries[20][25]; void main() { char *cur_word; char in_sentence[80]; initialize(); if ((infile = fopen("diction", "r+")) == NULL) { printf ("\nError opening dictionary\n"); exit(0); } printf("\nSentence: "); while(gets(in_sentence)) { if (in_sentence[0] == '\0') break; reset_sentence(); cur_word = strtok(in_sentence, " "); while(cur_word != NULL) { get_record(cur_word); cur_word = strtok(NULL, " "); if (++word_ct > 9) break; } if (check_underlying() == 0) { check_subject(); check_action(); check_place(); check_aux_verb(); check_number(); } make_response(); printf("Response: %s\n\nSentence: ", response); if (++sentence > 19) break; } /* end while */ fclose(infile); return; } /*****************************************************/ /* Initialize variables. */ /*****************************************************/ void initialize() { sentence = 0; memset(subjects, '\0', 500); memset(actions, '\0', 500); memset(places, '\0', 620); memset(tenses, '\0', 20); memset(numbers, '\0', 20); memset(usages, '\0', 20); memset(subjects_type, '\0', 20); memset(aux_meaning, '\0', 100); memset(auxiliaries, '\0', 500); return; } /*****************************************************/ /* These variables are initialized for each new */ /* input sentence. */ /*****************************************************/ void reset_sentence() { word_ct = 0; memset(word_array, '\0', 250); memset(root_array, '\0', 250); memset(prime_types, '\0', 110); memset(phrases, '\0', 110); memset(type_array, '\0', 550); response[0] = '\0'; return; } /*****************************************************/ /* Get all the records from the dictionary. If the */ /* passed word is not in the dictionary, then the */ /* word could be a name. */ /*****************************************************/ void get_record(char *pass_word) { int types = 0; rewind (infile); fgets(dic_record, 80, infile); while (! feof(infile)) { if (match_record(pass_word, types) == 0) types++; fgets(dic_record, 80, infile); } if (types == 0) { if (isupper( (int) pass_word[0])) strcpy(type_array[word_ct][types], "NAME"); else strcpy(type_array[word_ct][types], "NOTFOUND"); } strcpy(word_array[word_ct], pass_word); return; } /*****************************************************/ /* Compare the passed word with the word in the */ /* current dictionary record. If they are the same, */ /* then extract the type (NOUN, VERB, etc.). If the */ /* type is PRON, then extract pronoun information. */ /* If the type is VERB, then extract verb */ /* information. */ /*****************************************************/ int match_record(char *pass_word, int types) { int i, j; char *root; char *dic_word; dic_word = extract_word(); /* Check if passed word equals dictionary word */ if (strcmpi(pass_word, dic_word) != 0) return(1); /* Word found, get the type */ for (i=24,j=0; i<28; i++) { if (isspace(dic_record[i])) break; type_array[word_ct][types][j++] = dic_record[i]; } /* Trim the type */ type_array[word_ct][types][j] = '\0'; if (strcmp(type_array[word_ct][types], "PRON") == 0) subject_number = dic_record[41]; if (strcmp(type_array[word_ct][types], "VERB") == 0) { root = extract_root(); strcpy(root_array[word_ct], root); verb_usage = dic_record[29]; for (i=30,j=0; i<34; i++,j++) { if (isspace(dic_record[i])) break; verb_tense[j] = dic_record[i]; } verb_tense[j] = '\0'; for (i=41,j=0; i<43; i++,j++) { if (isspace(dic_record[i])) break; verb_number[j] = dic_record[i]; } verb_number[j] = '\0'; } return(0); } /*****************************************************/ /* Extract the word from the dictionary. The word is */ /* 24 characters in length and starts in column 1. */ /*****************************************************/ char *extract_word() { int i; char dic_word[25]; strncpy(dic_word, dic_record, 24); for (i=23; i>=0; i--) { if (isspace(dic_word[i])) { dic_word[i] = '\0'; continue; } break; } return(dic_word); } /*****************************************************/ /* Extract the root from the dictionary. It */ /* identifies a group of similar words (the root for */ /* run, ran, runs and running is run). It is 14 */ /* characters in length and starts in column 47. */ /*****************************************************/ char *extract_root() { int i, j; char root[15]; for (i=46,j=0; i<60; i++) { if (isspace(dic_record[i])) break; root[j++] = dic_record[i]; } /* Trim the root */ root[j] = '\0'; return(root); } /*****************************************************/ /* Determine if the input sentence contains a known, */ /* underlying structure. If it does, then assign the */ /* correct types and phrases for the words. */ /*****************************************************/ int check_underlying() { int i = 0; /* Structure WH-AUX-PRON-VERB */ if ( (check_type("WH", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("PRON", i+2) == 0) && (check_type("VERB", i+3) == 0) ) { strcpy(prime_types[i], "WH"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "PRON"); strcpy(prime_types[i+3], "VERB"); strcpy(phrases[i], "WHQUESTION"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "NOUNPHRASE"); strcpy(phrases[i+3], "VERBPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); get_aux(); return(0); } /* Structure PRON-AUX-VERB-PREP-DET-NOUN */ if ( (check_type("PRON", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("VERB", i+2) == 0) && (check_type("PREP", i+3) == 0) && (check_type("DET", i+4) == 0) && (check_type("NOUN", i+5) == 0) ) { strcpy(prime_types[i], "PRON"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "VERB"); strcpy(prime_types[i+3], "PREP"); strcpy(prime_types[i+4], "DET"); strcpy(prime_types[i+5], "NOUN"); strcpy(phrases[i], "NOUNPHRASE"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "VERBPHRASE"); strcpy(phrases[i+3], "PREPPHRASE"); strcpy(phrases[i+4], "PREPPHRASE"); strcpy(phrases[i+5], "PREPPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); get_aux(); return(0); } /* Structure WH-AUX-NAME-VERB */ if ( (check_type("WH", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("NAME", i+2) == 0) && (check_type("VERB", i+3) == 0) ) { strcpy(prime_types[i], "WH"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "NAME"); strcpy(prime_types[i+3], "VERB"); strcpy(phrases[i], "WHQUESTION"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "NOUNPHRASE"); strcpy(phrases[i+3], "VERBPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); get_aux(); return(0); } /* Structure NAME-AUX-AUX-AUX-VERB-PREP-DET-NOUN */ if ( (check_type("NAME", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("AUX", i+2) == 0) && (check_type("AUX", i+3) == 0) && (check_type("VERB", i+4) == 0) && (check_type("PREP", i+5) == 0) && (check_type("DET", i+6) == 0) && (check_type("NOUN", i+7) == 0) ) { strcpy(prime_types[i], "NAME"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "AUX"); strcpy(prime_types[i+3], "AUX"); strcpy(prime_types[i+4], "VERB"); strcpy(prime_types[i+5], "PREP"); strcpy(prime_types[i+6], "DET"); strcpy(prime_types[i+7], "NOUN"); strcpy(phrases[i], "NOUNPHRASE"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "VERBPHRASE"); strcpy(phrases[i+3], "VERBPHRASE"); strcpy(phrases[i+4], "VERBPHRASE"); strcpy(phrases[i+5], "PREPPHRASE"); strcpy(phrases[i+6], "PREPPHRASE"); strcpy(phrases[i+7], "PREPPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); strcat(auxiliaries[sentence], " "); strcat(auxiliaries[sentence], word_array[i+2]); strcat(auxiliaries[sentence], " "); strcat(auxiliaries[sentence], word_array[i+3]); get_aux(); return(0); } /* Structure NAME-AUX-AUX-VERB-PREP-DET-NOUN */ if ( (check_type("NAME", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("AUX", i+2) == 0) && (check_type("VERB", i+3) == 0) && (check_type("PREP", i+4) == 0) && (check_type("DET", i+5) == 0) && (check_type("NOUN", i+6) == 0) ) { strcpy(prime_types[i], "NAME"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "AUX"); strcpy(prime_types[i+3], "VERB"); strcpy(prime_types[i+4], "PREP"); strcpy(prime_types[i+5], "DET"); strcpy(prime_types[i+6], "NOUN"); strcpy(phrases[i], "NOUNPHRASE"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "VERBPHRASE"); strcpy(phrases[i+3], "VERBPHRASE"); strcpy(phrases[i+4], "PREPPHRASE"); strcpy(phrases[i+5], "PREPPHRASE"); strcpy(phrases[i+6], "PREPPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); strcat(auxiliaries[sentence], " "); strcat(auxiliaries[sentence], word_array[i+2]); get_aux(); return(0); } /* Structure NAME-AUX-VERB-PREP-DET-NOUN */ if ( (check_type("NAME", i) == 0) && (check_type("AUX", i+1) == 0) && (check_type("VERB", i+2) == 0) && (check_type("PREP", i+3) == 0) && (check_type("DET", i+4) == 0) && (check_type("NOUN", i+5) == 0) ) { strcpy(prime_types[i], "NAME"); strcpy(prime_types[i+1], "AUX"); strcpy(prime_types[i+2], "VERB"); strcpy(prime_types[i+3], "PREP"); strcpy(prime_types[i+4], "DET"); strcpy(prime_types[i+5], "NOUN"); strcpy(phrases[i], "NOUNPHRASE"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "VERBPHRASE"); strcpy(phrases[i+3], "PREPPHRASE"); strcpy(phrases[i+4], "PREPPHRASE"); strcpy(phrases[i+5], "PREPPHRASE"); strcpy(auxiliaries[sentence], word_array[i+1]); get_aux(); return(0); } /* Structure NAME-VERB-PREP-DET-NOUN */ if ( (check_type("NAME", i) == 0) && (check_type("VERB", i+1) == 0) && (check_type("PREP", i+2) == 0) && (check_type("DET", i+3) == 0) && (check_type("NOUN", i+4) == 0) ) { strcpy(prime_types[i], "NAME"); strcpy(prime_types[i+1], "VERB"); strcpy(prime_types[i+2], "PREP"); strcpy(prime_types[i+3], "DET"); strcpy(prime_types[i+4], "NOUN"); strcpy(phrases[i], "NOUNPHRASE"); strcpy(phrases[i+1], "VERBPHRASE"); strcpy(phrases[i+2], "PREPPHRASE"); strcpy(phrases[i+3], "PREPPHRASE"); strcpy(phrases[i+4], "PREPPHRASE"); return(0); } return(1); } /*****************************************************/ /* Compare the passed type with all the types for */ /* this word in the type_array. If the type is */ /* found, then return 0. The pass_number parameter */ /* identifies the word in the input sentence. */ /*****************************************************/ int check_type(char *pass_type, int pass_number) { int i; for (i=0; type_array[pass_number][i][0]; i++) { if (strcmp(type_array[pass_number][i], pass_type) == 0) /* Passed type is found in array */ return(0); } /* Passed type is not found in array */ return(1); } /*****************************************************/ /* If the correct type is "NAME" or "PRON" then the */ /* word refers to a subject so copy the word to the */ /* subjects array. */ /*****************************************************/ void check_subject() { int i; for (i=0; i<word_ct; i++) { if (strcmp(prime_types[i], "NAME") == 0) { strcpy(subjects[sentence], word_array[i]); subject_number = SINGULAR; subjects_type[sentence] = NAME; break; } if (strcmp(prime_types[i], "PRON") == 0) { strcpy(subjects[sentence], word_array[i]); subjects_type[sentence] = PRONOUN; break; } } return; } /*****************************************************/ /* If the correct type is "VERB", then the word */ /* refers to an action so copy the word's root from */ /* the root array to the actions array. */ /*****************************************************/ void check_action() { int i; for (i=0; i<word_ct; i++) { if (strcmp(prime_types[i], "VERB") == 0) { strcpy(actions[sentence], root_array[i]); break; } } return; } /*****************************************************/ /* If the phrase is a "PREPPHRASE", then all the */ /* words in the phrase refer to a place. Concatenate */ /* these words to the places array. */ /*****************************************************/ void check_place() { int i; for (i=0; i<word_ct; i++) { if (strcmp(phrases[i], "PREPPHRASE") == 0) { strcat(places[sentence], " "); strcat(places[sentence], word_array[i]); } } return; } /*****************************************************/ /* Determine the sentence tense and usage by */ /* matching auxiliary and verb information, or by */ /* matching previous sentence information. */ /*****************************************************/ void check_aux_verb() { int i, j, matches; char *result; char temp_tenses[5]; /**************************************************/ /* Auxiliary in sentence */ /**************************************************/ if (strlen(auxiliaries[sentence]) > 0) { if (aux_usage != verb_usage) { tenses[sentence] = UNKNOWN; usages[sentence] = UNKNOWN; return; } for (i=0,j=0,matches=0; aux_tense[i]; i++) { if ((result = strchr(verb_tense,aux_tense[i])) != NULL) { temp_tenses[j++] = *result; matches++; } } temp_tenses[j] = '\0'; if (matches == 0) { tenses[sentence] = UNKNOWN; usages[sentence] = UNKNOWN; return; } usages[sentence] = aux_usage; if (matches == 1) { tenses[sentence] = temp_tenses[0]; return; } for (i=sentence-1; i>=0 && i>=sentence-3; i--) { if ((strcmpi(subjects[i], subjects[sentence]) == 0) && (strcmpi(actions[i], actions[sentence]) == 0) && (strchr(temp_tenses, tenses[i]) != NULL) && (strlen(places[i]) > 0)) { tenses[sentence] = tenses[i]; return; } } tenses[sentence] = PRESENT; return; } /**************************************************/ /* No auxiliary in sentence */ /**************************************************/ usages[sentence] = verb_usage; if (strchr(verb_tense, PAST) != NULL) { tenses[sentence] = PAST; return; } /**************************************************/ /* No auxiliary, verb tense is present or future */ /**************************************************/ for (i=sentence-1; i>=0 && i>=sentence-3; i--) { if ((strcmpi(subjects[i], subjects[sentence]) == 0) && (strcmpi(actions[i], actions[sentence]) == 0) && (strchr(verb_tense, tenses[i]) != NULL) && (strlen(places[i]) > 0)) { tenses[sentence] = tenses[i]; return; } } tenses[sentence] = PRESENT; return; } /*****************************************************/ /* Match the subject, verb, and auxiliary number. */ /* If the match is successful, then the sentence */ /* number is the matched number. */ /*****************************************************/ void check_number() { if (strchr(verb_number, subject_number) == NULL) { numbers[sentence] = UNKNOWN; return; } if ((strlen(auxiliaries[sentence]) > 0) && (strchr(aux_number, subject_number) == NULL)) { numbers[sentence] = UNKNOWN; return; } numbers[sentence] = subject_number; return; } /*****************************************************/ /* Read the dictionary to extract the auxiliary */ /* information. */ /*****************************************************/ void get_aux() { rewind(infile); fgets(dic_record, 80, infile); while (! feof(infile)) { if (match_aux() == 0) return; fgets(dic_record, 80, infile); } return; } /*****************************************************/ /* If the sentence auxiliary matches the word in the */ /* current dictionary record, then extract the */ /* auxiliary information from the dictionary. */ /*****************************************************/ int match_aux() { int i,j; char *dic_word; dic_word = extract_word(); if (strcmpi(auxiliaries[sentence], dic_word) != 0) return(1); aux_usage = dic_record[29]; for (i=30,j=0; i<34; i++,j++) { if (isspace(dic_record[i])) break; aux_tense[j] = dic_record[i]; } /* Trim the tense */ aux_tense[j] = '\0'; for (i=41,j=0; i<43; i++,j++) { if (isspace(dic_record[i])) break; aux_number[j] = dic_record[i]; } /* Trim the number */ aux_number[j] = '\0'; for (i=44,j=0; i<47; i++,j++) { if (isspace(dic_record[i])) break; aux_meaning[sentence][j] = dic_record[i]; } return(0); } /*****************************************************/ /* Generate a response with information from a */ /* matching, previous sentence. */ /*****************************************************/ void make_response() { int i; /***************************************************/ /* Input sentence is not asking for information. */ /***************************************************/ if (strcmpi(word_array[0], "where") != 0) { strcpy(response, "OK"); return; } /***************************************************/ /* Match subject, action, tense, and meaning. */ /***************************************************/ for (i=sentence-1; i>=0; i--) { if ((strcmpi(subjects[i],subjects[sentence])==0) && (strcmpi(actions[i], actions[sentence]) ==0) && (strlen(places[i]) > 0) && (tenses[i] == tenses[sentence]) && (strpbrk(aux_meaning[i],aux_meaning[sentence]) != NULL)) { make_answer(i); return; } } /***************************************************/ /* Match subject, action, and tense. */ /***************************************************/ for (i=sentence-1; i>=0; i--) { if ((strcmpi(subjects[i],subjects[sentence])==0) && (strcmpi(actions[i], actions[sentence]) ==0) && (strlen(places[i]) > 0) && (tenses[i] == tenses[sentence])) { make_answer(i); return; } } /***************************************************/ /* Match subject, action, and meaning. */ /***************************************************/ for (i=sentence-1; i>=0; i--) { if ((strcmpi(subjects[i],subjects[sentence])==0) && (strcmpi(actions[i], actions[sentence]) ==0) && (strlen(places[i]) > 0) && (strpbrk(aux_meaning[i],aux_meaning[sentence]) != NULL)) { strcpy(response, "I'm not sure, but "); make_answer(i); return; } } /***************************************************/ /* Match subject and action. */ /***************************************************/ for (i=sentence-1; i>=0; i--) { if ((strcmpi(subjects[i],subjects[sentence])==0) && (strcmpi(actions[i], actions[sentence]) ==0) && (strlen(places[i]) > 0)) { strcpy(response, "I'm not sure, but "); make_answer(i); return; } } strcpy(response, "I don't know"); return; } /*****************************************************/ /* Move information from a previous sentence to the */ /* response. */ /*****************************************************/ void make_answer(int prev_sentence) { if (subjects_type[prev_sentence] == PRONOUN) { if (strlen(response) == 0) { subjects[prev_sentence][0] = (char) toupper(subjects[prev_sentence][0]); } else { subjects[prev_sentence][0] = (char) tolower(subjects[prev_sentence][0]); } } strcat(response, subjects[prev_sentence]); strcat(response, " "); if (strlen(auxiliaries[prev_sentence]) > 0) { strcat(response, auxiliaries[prev_sentence]); strcat(response, " "); } get_verb(tenses[prev_sentence], numbers[prev_sentence], usages[prev_sentence]); strcat(response, places[prev_sentence]); return; } /*****************************************************/ /* Get the correct verb from the dictionary. */ /*****************************************************/ void get_verb(char pass_tense, char pass_number, char pass_usage) { rewind(infile); fgets(dic_record, 80, infile); while (! feof(infile)) { if (match_verb(pass_tense, pass_number, pass_usage) == 0) break; fgets(dic_record, 80, infile); } return; } /*****************************************************/ /* If the verb information in the current record */ /* matches the passed information, then move the */ /* correct verb to the response. */ /*****************************************************/ int match_verb(char pass_tense, char pass_number, char pass_usage) { int i; char *root; char *dic_word; root = extract_root(); /* Match verb with root */ if (strcmpi(actions[sentence], root) == 0) { /* Match verb with tense */ for (i=30; i<34; i++) { if (isspace(dic_record[i])) return(1); if (dic_record[i] == pass_tense) break; } /* Match verb with number */ for (i=41; i<43; i++) { if (isspace(dic_record[i])) return(1); if (dic_record[i] == pass_number) break; } /* Match verb with usage */ if (dic_record[29] == pass_usage) { dic_word = extract_word(); strcat(response, dic_word); return(0); } } return(1); } /* End of file */