Black Box 4

home *** CD-ROM | disk | FTP | other *** search

/ Black Box 4 / BlackBox.cdr / progc / cujv8a.arj / 8N03090A < prev next >

Wrap

Text File | 1990-03-20 | 7.9 KB | 287 lines

/* Speech Recognizer */ /* bj gleason, Upsala College, Computer Science Department */ /* East Orange, nj 07019 (201)-998-1037 */ #include <stdio.h> #include <conio.h> #define RAWBUFFERSIZE 150 /* 100 samples/second 1.5 seconds */ #define VOCABSIZE 10 /* 10 digits, 0 - 9 */ #define NUMSAMPLES 16 /* Number of samples to extract */ #define NUMBANDS 4 /* High, Low Freq, High, Low Energy */ #define BIGNUM 32767 /* large number for min diff. calc */ int rawspeech[RAWBUFFERSIZE][NUMBANDS]; /* to hold raw speech */ int index[VOCABSIZE]; /* inidicate if digit trained */ int template[VOCABSIZE][NUMSAMPLES][NUMBANDS]; /* known templates */ int unknown[NUMSAMPLES][NUMBANDS]; /* unknown voice template */ int min_diff[VOCABSIZE]; /* min diff. each digit */ int sam_size; /* current sample size */ int debug; /* show debugging info */ /* getspeech will read in a file from disk. The length in bytes */ /* will be returned. The rawspeech buffer will be modified. */ int getspeech() { FILE *fptr; int i,j; char fname[80]; if (debug) printf("\nReading in Speech"); printf("\nEnter name of file?"); gets(fname); if ((fptr=fopen(fname,"rt"))==NULL) { printf("\nCant find file %s",fname); return(0); } else { for(i=0;i<=RAWBUFFERSIZE;i++) for(j=0;j < NUMBANDS;j++) { if ((fscanf(fptr,"%i",&rawspeech[i][j]))==EOF) { fclose(fptr); return(i); } } } } int plot_it() { int i,j,x,y; printf("\n\n"); for (i=0;i < sam_size;i++) { for (j=NUMBANDS-1; j >= 0; j--) { x=rawspeech[i][j]+(j*2); gotoxy(x,wherey()); putchar(j+48); } printf("\n"); } } /* the closest match routine compares the unknown template with */ /* known templates. It builds a minimim difference list that is */ /* the difference between unknown and each known. We then scan */ /* list to find the closest match. */ int closest_match() { int p,i,j; int low,next_low,digit,next_digit; if (debug) printf("\nFinding Closest Match"); for (p = 0; p < VOCABSIZE; p++) if (index[p] != 0) { min_diff[p] = 0; for(i = 0; i < NUMSAMPLES; i++) for(j = 0; j < NUMBANDS; j++) /* for each digit, find the absoulte difference */ /* between known and unknown templates */ min_diff[p] = min_diff[p] + abs(unknown[i][j] -template[p][i][j]); } else { min_diff[p]=BIGNUM; /* put in a big number if digit not */ } /* trained. */ /* min_diff now has the difference for each template. Search */ /* to find the smallest difference. This will be our digit. */ /* Find the next lowest match to calculate the delta diff. */ digit = -1; next_digit = -1; low = BIGNUM; next_low = BIGNUM; if (debug) printf("\nTP# Diff Low Digit"); for (p = 0; p < VOCABSIZE; p++) { if(min_diff[p] < low) { next_low = low; next_digit = digit; digit = p; low = min_diff[p]; } if (debug) printf("\n%3i %5i %5i %2i",p,min_diff[p],low,digit); } if (debug == 1) { printf("\nMinimun Difference was %i, Digit is %i",low,digit); printf("\nNext Closest Diff was %i, Digit is %i",next_low ,next_digit); printf("\nWith the delta difference of %i",next_low-low); } /* it would be right here where your would add the code */ /* to set a rejection limit or a delta difference limit. */ /* If the digit is rejected, send back error, such as -1. */ return(digit); } /* Extract template will extract a template from the raw */ /* speech buffer. This is to reduce the size of the */ /* template and to elimate time warping. */ /* the rate is kept in floating point to prevent truncation */ /* errors. */ int extract_template() { int i,j,p; float rate,x; if (debug) printf("\nExtracting Template"); rate = (float) sam_size / NUMSAMPLES; p = 0; if (debug) { printf("\nExtracting %i elements from Raw Speech", NUMSAMPLES); printf("\nTake every %f element", rate); printf("\n\n UN RS"); } for (x = 0; x < sam_size ; x = x + rate) { for (j = 0; j < NUMBANDS; j++) unknown[p][j] = rawspeech[(int)x][j]; if (debug) printf("\n%3i %3i",p,(int)x); p++; } } /* During the training phase, this will take the extracted template */ /* and store it in the array of known templates. */ int store_template(int position) { int i,j; if (debug) printf("\nStoring template at position %i",position); for (i = 0; i < NUMSAMPLES ; i++) for (j = 0; j < NUMBANDS; j++) { template[position][i][j] = unknown[i][j]; } } /* Perform - Get the speech, extract an unknown template, compare */ /* against the rest, and print the resulting digit. */ int perform() { int digit; sam_size = getspeech(); if (debug) plot_it(); if (debug) printf("\nSize of Sample = %i",sam_size); extract_template(); /* break raw buffer up and */ /* place into unknown template */ digit = closest_match(); printf("\nDigit spoken was %i",digit); } /* Training - Get the speech, extract an unknown template, */ /* find from the user what digit it was, then store it in */ /* the known template array. */ int train() { char ans[10]; int digit; sam_size = getspeech(); if (debug) plot_it(); if (debug) printf("\nSize of Sample = %i",sam_size); printf("\nEnter the digit spoken ?"); gets(ans); digit = atoi(ans); index[digit] = 1; /* indicate this digit is trained */ extract_template(); /* break raw buffer up and */ /* place into unknown template */ store_template(digit); /* store the template */ } /* Eztrain - This is to quickly load in files a0 - a9 */ int eztrain(char fname[80], int digit) { FILE *fptr; int i,j; if ((fptr=fopen(fname,"rt"))!=NULL) { sam_size = 0; printf("\nReading file %s", fname); for(i=0;i<=RAWBUFFERSIZE;i++) for(j=0;j < NUMBANDS;j++) if ((fscanf(fptr,"%i",&rawspeech[i][j]))!=EOF) sam_size = i; fclose(fptr); if (debug) plot_it(); if (debug) printf("\nSize of Sample = %i",sam_size); index[digit] = 1; extract_template(); store_template(digit); } } main() { int i; char ans[80]; char choice; /* clear the training index... nothing has been entered */ for (i=0; i<VOCABSIZE; i++) index[i] = 0; printf("\nWelcome to Speech Recognition Demo, Version 1.0\n"); debug = 1; /* display debugging information */ do { printf("\n\nTrain, Perform, Load A or B, Debug "); if (debug) printf("Off"); else printf("On"); printf(", or Quit? (T/P/A/B/D/Q)"); gets(ans); choice = toupper(ans[0]); if (choice == 'A') { eztrain("a0",0); eztrain("a1",1); eztrain("a2",2); eztrain("a3",3); eztrain("a4",4); eztrain("a5",5); eztrain("a6",6); eztrain("a7",7); eztrain("a8",8); eztrain("a9",9); } if (choice == 'B') { eztrain("b0",0); eztrain("b1",1); eztrain("b2",2); eztrain("b3",3); eztrain("b4",4); eztrain("b5",5); eztrain("b6",6); eztrain("b7",7); eztrain("b8",8); eztrain("b9",9); } if (choice == 'D') { debug = !debug; printf("\n Debugging Trace "); if (debug) printf("On"); else printf("Off"); } if (choice == 'T') train(); if (choice == 'P') perform(); } while(choice != 'Q'); printf("\n\nAll done."); }