home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The C Users' Group Library 1994 August
/
wc-cdrom-cusersgrouplibrary-1994-08.iso
/
listings
/
v_08_03
/
8n03090a
< prev
next >
Wrap
Text File
|
1990-03-20
|
8KB
|
287 lines
/* Speech Recognizer */
/* bj gleason, Upsala College, Computer Science Department */
/* East Orange, nj 07019 (201)-998-1037 */
#include <stdio.h>
#include <conio.h>
#define RAWBUFFERSIZE 150 /* 100 samples/second 1.5 seconds */
#define VOCABSIZE 10 /* 10 digits, 0 - 9 */
#define NUMSAMPLES 16 /* Number of samples to extract */
#define NUMBANDS 4 /* High, Low Freq, High, Low Energy */
#define BIGNUM 32767 /* large number for min diff. calc */
int rawspeech[RAWBUFFERSIZE][NUMBANDS]; /* to hold raw speech */
int index[VOCABSIZE]; /* inidicate if digit trained */
int template[VOCABSIZE][NUMSAMPLES][NUMBANDS]; /* known templates */
int unknown[NUMSAMPLES][NUMBANDS]; /* unknown voice template */
int min_diff[VOCABSIZE]; /* min diff. each digit */
int sam_size; /* current sample size */
int debug; /* show debugging info */
/* getspeech will read in a file from disk. The length in bytes */
/* will be returned. The rawspeech buffer will be modified. */
int getspeech()
{
FILE *fptr;
int i,j;
char fname[80];
if (debug) printf("\nReading in Speech");
printf("\nEnter name of file?");
gets(fname);
if ((fptr=fopen(fname,"rt"))==NULL)
{
printf("\nCant find file %s",fname);
return(0);
}
else
{
for(i=0;i<=RAWBUFFERSIZE;i++)
for(j=0;j < NUMBANDS;j++)
{
if ((fscanf(fptr,"%i",&rawspeech[i][j]))==EOF)
{
fclose(fptr);
return(i);
}
}
}
}
int plot_it()
{
int i,j,x,y;
printf("\n\n");
for (i=0;i < sam_size;i++)
{
for (j=NUMBANDS-1; j >= 0; j--)
{
x=rawspeech[i][j]+(j*2);
gotoxy(x,wherey());
putchar(j+48);
}
printf("\n");
}
}
/* the closest match routine compares the unknown template with */
/* known templates. It builds a minimim difference list that is */
/* the difference between unknown and each known. We then scan */
/* list to find the closest match. */
int closest_match()
{
int p,i,j;
int low,next_low,digit,next_digit;
if (debug) printf("\nFinding Closest Match");
for (p = 0; p < VOCABSIZE; p++)
if (index[p] != 0)
{
min_diff[p] = 0;
for(i = 0; i < NUMSAMPLES; i++)
for(j = 0; j < NUMBANDS; j++)
/* for each digit, find the absoulte difference */
/* between known and unknown templates */
min_diff[p] = min_diff[p] + abs(unknown[i][j]
-template[p][i][j]);
}
else
{
min_diff[p]=BIGNUM; /* put in a big number if digit not */
} /* trained. */
/* min_diff now has the difference for each template. Search */
/* to find the smallest difference. This will be our digit. */
/* Find the next lowest match to calculate the delta diff. */
digit = -1;
next_digit = -1;
low = BIGNUM;
next_low = BIGNUM;
if (debug) printf("\nTP# Diff Low Digit");
for (p = 0; p < VOCABSIZE; p++)
{
if(min_diff[p] < low)
{
next_low = low;
next_digit = digit;
digit = p;
low = min_diff[p];
}
if (debug) printf("\n%3i %5i %5i %2i",p,min_diff[p],low,digit);
}
if (debug == 1)
{
printf("\nMinimun Difference was %i, Digit is %i",low,digit);
printf("\nNext Closest Diff was %i, Digit is %i",next_low
,next_digit);
printf("\nWith the delta difference of %i",next_low-low);
}
/* it would be right here where your would add the code */
/* to set a rejection limit or a delta difference limit. */
/* If the digit is rejected, send back error, such as -1. */
return(digit);
}
/* Extract template will extract a template from the raw */
/* speech buffer. This is to reduce the size of the */
/* template and to elimate time warping. */
/* the rate is kept in floating point to prevent truncation */
/* errors. */
int extract_template()
{
int i,j,p;
float rate,x;
if (debug) printf("\nExtracting Template");
rate = (float) sam_size / NUMSAMPLES;
p = 0;
if (debug)
{
printf("\nExtracting %i elements from Raw Speech",
NUMSAMPLES);
printf("\nTake every %f element", rate);
printf("\n\n UN RS");
}
for (x = 0; x < sam_size ; x = x + rate)
{
for (j = 0; j < NUMBANDS; j++)
unknown[p][j] = rawspeech[(int)x][j];
if (debug) printf("\n%3i %3i",p,(int)x);
p++;
}
}
/* During the training phase, this will take the extracted template */
/* and store it in the array of known templates. */
int store_template(int position)
{
int i,j;
if (debug) printf("\nStoring template at position %i",position);
for (i = 0; i < NUMSAMPLES ; i++)
for (j = 0; j < NUMBANDS; j++)
{
template[position][i][j] = unknown[i][j];
}
}
/* Perform - Get the speech, extract an unknown template, compare */
/* against the rest, and print the resulting digit. */
int perform()
{
int digit;
sam_size = getspeech();
if (debug) plot_it();
if (debug) printf("\nSize of Sample = %i",sam_size);
extract_template(); /* break raw buffer up and */
/* place into unknown template */
digit = closest_match();
printf("\nDigit spoken was %i",digit);
}
/* Training - Get the speech, extract an unknown template, */
/* find from the user what digit it was, then store it in */
/* the known template array. */
int train()
{
char ans[10];
int digit;
sam_size = getspeech();
if (debug) plot_it();
if (debug) printf("\nSize of Sample = %i",sam_size);
printf("\nEnter the digit spoken ?");
gets(ans);
digit = atoi(ans);
index[digit] = 1; /* indicate this digit is trained */
extract_template(); /* break raw buffer up and */
/* place into unknown template */
store_template(digit); /* store the template */
}
/* Eztrain - This is to quickly load in files a0 - a9 */
int eztrain(char fname[80], int digit)
{
FILE *fptr;
int i,j;
if ((fptr=fopen(fname,"rt"))!=NULL)
{
sam_size = 0;
printf("\nReading file %s", fname);
for(i=0;i<=RAWBUFFERSIZE;i++)
for(j=0;j < NUMBANDS;j++)
if ((fscanf(fptr,"%i",&rawspeech[i][j]))!=EOF)
sam_size = i;
fclose(fptr);
if (debug) plot_it();
if (debug) printf("\nSize of Sample = %i",sam_size);
index[digit] = 1;
extract_template();
store_template(digit);
}
}
main()
{
int i;
char ans[80];
char choice;
/* clear the training index... nothing has been entered */
for (i=0; i<VOCABSIZE; i++)
index[i] = 0;
printf("\nWelcome to Speech Recognition Demo, Version 1.0\n");
debug = 1; /* display debugging information */
do
{
printf("\n\nTrain, Perform, Load A or B, Debug ");
if (debug) printf("Off"); else printf("On");
printf(", or Quit? (T/P/A/B/D/Q)");
gets(ans);
choice = toupper(ans[0]);
if (choice == 'A')
{
eztrain("a0",0); eztrain("a1",1); eztrain("a2",2);
eztrain("a3",3); eztrain("a4",4); eztrain("a5",5);
eztrain("a6",6); eztrain("a7",7); eztrain("a8",8);
eztrain("a9",9);
}
if (choice == 'B')
{
eztrain("b0",0); eztrain("b1",1); eztrain("b2",2);
eztrain("b3",3); eztrain("b4",4); eztrain("b5",5);
eztrain("b6",6); eztrain("b7",7); eztrain("b8",8);
eztrain("b9",9);
}
if (choice == 'D')
{
debug = !debug;
printf("\n Debugging Trace ");
if (debug) printf("On"); else printf("Off");
}
if (choice == 'T') train();
if (choice == 'P') perform();
}
while(choice != 'Q');
printf("\n\nAll done.");
}