home *** CD-ROM | disk | FTP | other *** search
- /* Speech Recognizer */
-
- /* bj gleason, Upsala College, Computer Science Department */
- /* East Orange, nj 07019 (201)-998-1037 */
-
- #include <stdio.h>
- #include <conio.h>
-
- #define RAWBUFFERSIZE 150 /* 100 samples/second 1.5 seconds */
- #define VOCABSIZE 10 /* 10 digits, 0 - 9 */
- #define NUMSAMPLES 16 /* Number of samples to extract */
- #define NUMBANDS 4 /* High, Low Freq, High, Low Energy */
- #define BIGNUM 32767 /* large number for min diff. calc */
-
-
- int rawspeech[RAWBUFFERSIZE][NUMBANDS]; /* to hold raw speech */
- int index[VOCABSIZE]; /* inidicate if digit trained */
- int template[VOCABSIZE][NUMSAMPLES][NUMBANDS]; /* known templates */
- int unknown[NUMSAMPLES][NUMBANDS]; /* unknown voice template */
- int min_diff[VOCABSIZE]; /* min diff. each digit */
- int sam_size; /* current sample size */
- int debug; /* show debugging info */
-
- /* getspeech will read in a file from disk. The length in bytes */
- /* will be returned. The rawspeech buffer will be modified. */
- int getspeech()
- {
- FILE *fptr;
- int i,j;
- char fname[80];
-
- if (debug) printf("\nReading in Speech");
- printf("\nEnter name of file?");
- gets(fname);
- if ((fptr=fopen(fname,"rt"))==NULL)
- {
- printf("\nCant find file %s",fname);
- return(0);
- }
- else
- {
- for(i=0;i<=RAWBUFFERSIZE;i++)
- for(j=0;j < NUMBANDS;j++)
- {
- if ((fscanf(fptr,"%i",&rawspeech[i][j]))==EOF)
- {
- fclose(fptr);
- return(i);
- }
- }
- }
- }
-
- int plot_it()
- {
- int i,j,x,y;
-
- printf("\n\n");
- for (i=0;i < sam_size;i++)
- {
- for (j=NUMBANDS-1; j >= 0; j--)
- {
- x=rawspeech[i][j]+(j*2);
- gotoxy(x,wherey());
- putchar(j+48);
- }
- printf("\n");
- }
- }
-
- /* the closest match routine compares the unknown template with */
- /* known templates. It builds a minimim difference list that is */
- /* the difference between unknown and each known. We then scan */
- /* list to find the closest match. */
- int closest_match()
- {
- int p,i,j;
- int low,next_low,digit,next_digit;
-
- if (debug) printf("\nFinding Closest Match");
-
- for (p = 0; p < VOCABSIZE; p++)
- if (index[p] != 0)
- {
- min_diff[p] = 0;
- for(i = 0; i < NUMSAMPLES; i++)
- for(j = 0; j < NUMBANDS; j++)
- /* for each digit, find the absoulte difference */
- /* between known and unknown templates */
- min_diff[p] = min_diff[p] + abs(unknown[i][j]
- -template[p][i][j]);
- }
- else
- {
- min_diff[p]=BIGNUM; /* put in a big number if digit not */
- } /* trained. */
-
- /* min_diff now has the difference for each template. Search */
- /* to find the smallest difference. This will be our digit. */
- /* Find the next lowest match to calculate the delta diff. */
- digit = -1;
- next_digit = -1;
- low = BIGNUM;
- next_low = BIGNUM;
- if (debug) printf("\nTP# Diff Low Digit");
- for (p = 0; p < VOCABSIZE; p++)
- {
- if(min_diff[p] < low)
- {
- next_low = low;
- next_digit = digit;
- digit = p;
- low = min_diff[p];
- }
- if (debug) printf("\n%3i %5i %5i %2i",p,min_diff[p],low,digit);
- }
- if (debug == 1)
- {
- printf("\nMinimun Difference was %i, Digit is %i",low,digit);
- printf("\nNext Closest Diff was %i, Digit is %i",next_low
- ,next_digit);
- printf("\nWith the delta difference of %i",next_low-low);
- }
-
- /* it would be right here where your would add the code */
- /* to set a rejection limit or a delta difference limit. */
- /* If the digit is rejected, send back error, such as -1. */
- return(digit);
- }
-
- /* Extract template will extract a template from the raw */
- /* speech buffer. This is to reduce the size of the */
- /* template and to elimate time warping. */
- /* the rate is kept in floating point to prevent truncation */
- /* errors. */
- int extract_template()
- {
- int i,j,p;
- float rate,x;
-
- if (debug) printf("\nExtracting Template");
- rate = (float) sam_size / NUMSAMPLES;
- p = 0;
- if (debug)
- {
- printf("\nExtracting %i elements from Raw Speech",
- NUMSAMPLES);
- printf("\nTake every %f element", rate);
- printf("\n\n UN RS");
- }
- for (x = 0; x < sam_size ; x = x + rate)
- {
- for (j = 0; j < NUMBANDS; j++)
- unknown[p][j] = rawspeech[(int)x][j];
- if (debug) printf("\n%3i %3i",p,(int)x);
- p++;
- }
- }
-
- /* During the training phase, this will take the extracted template */
- /* and store it in the array of known templates. */
- int store_template(int position)
- {
- int i,j;
-
- if (debug) printf("\nStoring template at position %i",position);
- for (i = 0; i < NUMSAMPLES ; i++)
- for (j = 0; j < NUMBANDS; j++)
- {
- template[position][i][j] = unknown[i][j];
- }
- }
-
- /* Perform - Get the speech, extract an unknown template, compare */
- /* against the rest, and print the resulting digit. */
- int perform()
- {
- int digit;
-
- sam_size = getspeech();
- if (debug) plot_it();
- if (debug) printf("\nSize of Sample = %i",sam_size);
-
- extract_template(); /* break raw buffer up and */
- /* place into unknown template */
-
- digit = closest_match();
- printf("\nDigit spoken was %i",digit);
-
- }
-
- /* Training - Get the speech, extract an unknown template, */
- /* find from the user what digit it was, then store it in */
- /* the known template array. */
- int train()
- {
- char ans[10];
- int digit;
-
- sam_size = getspeech();
- if (debug) plot_it();
- if (debug) printf("\nSize of Sample = %i",sam_size);
- printf("\nEnter the digit spoken ?");
- gets(ans);
- digit = atoi(ans);
-
- index[digit] = 1; /* indicate this digit is trained */
-
- extract_template(); /* break raw buffer up and */
- /* place into unknown template */
-
- store_template(digit); /* store the template */
-
- }
-
- /* Eztrain - This is to quickly load in files a0 - a9 */
- int eztrain(char fname[80], int digit)
- {
- FILE *fptr;
- int i,j;
-
- if ((fptr=fopen(fname,"rt"))!=NULL)
- {
- sam_size = 0;
- printf("\nReading file %s", fname);
- for(i=0;i<=RAWBUFFERSIZE;i++)
- for(j=0;j < NUMBANDS;j++)
- if ((fscanf(fptr,"%i",&rawspeech[i][j]))!=EOF)
- sam_size = i;
- fclose(fptr);
- if (debug) plot_it();
- if (debug) printf("\nSize of Sample = %i",sam_size);
- index[digit] = 1;
- extract_template();
- store_template(digit);
- }
- }
-
-
- main()
- {
- int i;
- char ans[80];
- char choice;
-
- /* clear the training index... nothing has been entered */
- for (i=0; i<VOCABSIZE; i++)
- index[i] = 0;
-
-
- printf("\nWelcome to Speech Recognition Demo, Version 1.0\n");
-
- debug = 1; /* display debugging information */
- do
- {
- printf("\n\nTrain, Perform, Load A or B, Debug ");
- if (debug) printf("Off"); else printf("On");
- printf(", or Quit? (T/P/A/B/D/Q)");
- gets(ans);
- choice = toupper(ans[0]);
- if (choice == 'A')
- {
- eztrain("a0",0); eztrain("a1",1); eztrain("a2",2);
- eztrain("a3",3); eztrain("a4",4); eztrain("a5",5);
- eztrain("a6",6); eztrain("a7",7); eztrain("a8",8);
- eztrain("a9",9);
- }
- if (choice == 'B')
- {
- eztrain("b0",0); eztrain("b1",1); eztrain("b2",2);
- eztrain("b3",3); eztrain("b4",4); eztrain("b5",5);
- eztrain("b6",6); eztrain("b7",7); eztrain("b8",8);
- eztrain("b9",9);
- }
- if (choice == 'D')
- {
- debug = !debug;
- printf("\n Debugging Trace ");
- if (debug) printf("On"); else printf("Off");
- }
- if (choice == 'T') train();
- if (choice == 'P') perform();
- }
- while(choice != 'Q');
- printf("\n\nAll done.");
- }