OS/2 Shareware BBS: 8 Other

home *** CD-ROM | disk | FTP | other *** search

/ OS/2 Shareware BBS: 8 Other / 08-Other.zip / bytewarp.zip / NBENCH1.C < prev next >

Wrap

C/C++ Source or Header | 1995-11-30 | 106KB | 4,307 lines

/* ** nbench1.c */ /******************************** ** BYTEmark (tm) ** ** BYTE NATIVE MODE BENCHMARKS ** ** VERSION 2 ** ** ** ** Included in this source ** ** file: ** ** Numeric Heapsort ** ** String Heapsort ** ** Bitfield test ** ** Floating point emulation ** ** Fourier coefficients ** ** Assignment algorithm ** ** IDEA Encyption ** ** Huffman compression ** ** Back prop. neural net ** ** LU Decomposition ** ** (linear equations) ** ** ---------- ** ** Rick Grehan, BYTE Magazine ** ********************************* ** ** BYTEmark (tm) ** BYTE's Native Mode Benchmarks ** Rick Grehan, BYTE Magazine ** ** Creation: ** Revision: 3/95;10/95 ** 10/95 - Removed allocation that was taking place inside ** the LU Decomposition benchmark. Though it didn't seem to ** make a difference on systems we ran it on, it nonetheless ** removes an operating system dependency that probably should ** not have been there. ** ** DISCLAIMER ** The source, executable, and documentation files that comprise ** the BYTEmark benchmarks are made available on an "as is" basis. ** This means that we at BYTE Magazine have made every reasonable ** effort to verify that the there are no errors in the source and ** executable code. We cannot, however, guarantee that the programs ** are error-free. Consequently, McGraw-HIll and BYTE Magazine make ** no claims in regard to the fitness of the source code, executable ** code, and documentation of the BYTEmark. ** Furthermore, BYTE Magazine, McGraw-Hill, and all employees ** of McGraw-Hill cannot be held responsible for any damages resulting ** from the use of this code or the results obtained from using ** this code. */ /* ** INCLUDES */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include "nmglobal.h" #include "nbench1.h" #include "wordcat.h" /********************* ** NUMERIC HEAPSORT ** ********************** ** This test implements a heapsort algorithm, performed on an ** array of longs. */ /************** ** DoNumSort ** *************** ** This routine performs the CPU numeric sort test. ** NOTE: Last version incorrectly stated that the routine ** returned result in # of longword sorted per second. ** Not so; the routine returns # of iterations per sec. */ void DoNumSort(void) { SortStruct *numsortstruct; /* Local pointer to global struct */ farlong *arraybase; /* Base pointers of array */ long accumtime; /* Accumulated time */ double iterations; /* Iteration counter */ char *errorcontext; /* Error context string pointer */ int systemerror; /* For holding error codes */ /* ** Link to global structure */ numsortstruct=&global_numsortstruct; /* ** Set the error context string. */ errorcontext="CPU:Numeric Sort"; /* ** See if we need to do self adjustment code. */ if(numsortstruct->adjust==0) { /* ** Self-adjustment code. The system begins by sorting 1 ** array. If it does that in no time, then two arrays ** are built and sorted. This process continues until ** enough arrays are built to handle the tolerance. */ numsortstruct->numarrays=1; while(1) { /* ** Allocate space for arrays */ arraybase=(farlong *)AllocateMemory(sizeof(long) * numsortstruct->numarrays * numsortstruct->arraysize, &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((farvoid *)arraybase, &systemerror); ErrorExit(); } /* ** Do an iteration of the numeric sort. If the ** elapsed time is less than or equal to the permitted ** minimum, then allocate for more arrays and ** try again. */ if(DoNumSortIteration(arraybase, numsortstruct->arraysize, numsortstruct->numarrays)>global_min_ticks) break; /* We're ok...exit */ FreeMemory((farvoid *)arraybase,&systemerror); if(numsortstruct->numarrays++>NUMNUMARRAYS) { printf("CPU:NSORT -- NUMNUMARRAYS hit.\n"); ErrorExit(); } } } else { /* ** Allocate space for arrays */ arraybase=(farlong *)AllocateMemory(sizeof(long) * numsortstruct->numarrays * numsortstruct->arraysize, &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((farvoid *)arraybase, &systemerror); ErrorExit(); } } /* ** All's well if we get here. Repeatedly perform sorts until the ** accumulated elapsed time is greater than # of seconds requested. */ accumtime=0L; iterations=(double)0.0; do { accumtime+=DoNumSortIteration(arraybase, numsortstruct->arraysize, numsortstruct->numarrays); iterations+=(double)1.0; } while(TicksToSecs(accumtime)<numsortstruct->request_secs); /* ** Clean up, calculate results, and go home. Be sure to ** show that we don't have to rerun adjustment code. */ FreeMemory((farvoid *)arraybase,&systemerror); numsortstruct->sortspersec=iterations * (double)numsortstruct->numarrays / TicksToFracSecs(accumtime); if(numsortstruct->adjust==0) numsortstruct->adjust=1; return; } /*********************** ** DoNumSortIteration ** ************************ ** This routine executes one iteration of the numeric ** sort benchmark. It returns the number of ticks ** elapsed for the iteration. */ static ulong DoNumSortIteration(farlong *arraybase, ulong arraysize, uint numarrays) { ulong elapsed; /* Elapsed ticks */ ulong i; /* ** Load up the array with random numbers */ LoadNumArrayWithRand(arraybase,arraysize,numarrays); /* ** Start the stopwatch */ elapsed=StartStopwatch(); /* ** Execute a heap of heapsorts */ for(i=0;i<numarrays;i++) NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L); /* ** Get elapsed time */ elapsed=StopStopwatch(elapsed); #ifdef DEBUG { for(i=0;i<arraysize-1;i++) { /* ** Compare to check for proper ** sort. */ if(arraybase[i+1]<arraybase[i]) { printf("Sort Error\n"); break; } } } #endif return(elapsed); } /************************* ** LoadNumArrayWithRand ** ************************** ** Load up an array with random longs. */ static void LoadNumArrayWithRand(farlong *array, /* Pointer to arrays */ ulong arraysize, uint numarrays) /* # of elements in array */ { long i; /* Used for index */ farlong *darray; /* Destination array pointer */ /* ** Initialize the random number generator */ randnum(13L); /* ** Load up first array with randoms */ for(i=0L;i<arraysize;i++) array[i]=randnum(0L); /* ** Now, if there's more than one array to load, copy the ** first into each of the others. */ darray=array; while(--numarrays) { darray+=arraysize; for(i=0L;i<arraysize;i++) darray[i]=array[i]; } return; } /**************** ** NumHeapSort ** ***************** ** Pass this routine a pointer to an array of long ** integers. Also pass in minimum and maximum offsets. ** This routine performs a heap sort on that array. */ static void NumHeapSort(farlong *array, ulong bottom, /* Lower bound */ ulong top) /* Upper bound */ { ulong temp; /* Used to exchange elements */ ulong i; /* Loop index */ /* ** First, build a heap in the array */ for(i=(top/2L); i>0; --i) NumSift(array,i,top); /* ** Repeatedly extract maximum from heap and place it at the ** end of the array. When we get done, we'll have a sorted ** array. */ for(i=top; i>0; --i) { NumSift(array,bottom,i); temp=*array; /* Perform exchange */ *array=*(array+i); *(array+i)=temp; } return; } /************ ** NumSift ** ************* ** Peforms the sift operation on a numeric array, ** constructing a heap in the array. */ static void NumSift(farlong *array, /* Array of numbers */ ulong i, /* Minimum of array */ ulong j) /* Maximum of array */ { unsigned long k; long temp; /* Used for exchange */ while((i+i)<=j) { k=i+i; if(k<j) if(array[k]<array[k+1L]) ++k; if(array[i]<array[k]) { temp=array[k]; array[k]=array[i]; array[i]=temp; i=k; } else i=j+1; } return; } /******************** ** STRING HEAPSORT ** ********************/ /***************** ** DoStringSort ** ****************** ** This routine performs the CPU string sort test. ** Arguments: ** requested_secs = # of seconds to execute test ** stringspersec = # of strings per second sorted (RETURNED) */ void DoStringSort(void) { SortStruct *strsortstruct; /* Local for sort structure */ faruchar *arraybase; /* Base pointer of char array */ long accumtime; /* Accumulated time */ double iterations; /* # of iterations */ char *errorcontext; /* Error context string pointer */ int systemerror; /* For holding error code */ /* ** Link to global structure */ strsortstruct=&global_strsortstruct; /* ** Set the error context */ errorcontext="CPU:String Sort"; /* ** See if we have to perform self-adjustment code */ if(strsortstruct->adjust==0) { /* ** Initialize the number of arrays. */ strsortstruct->numarrays=1; while(1) { /* ** Allocate space for array. We'll add an extra 100 ** bytes to protect memory as strings move around ** (this can happen during string adjustment) */ arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) * (long)strsortstruct->numarrays,&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); ErrorExit(); } /* ** Do an iteration of the string sort. If the ** elapsed time is less than or equal to the permitted ** minimum, then de-allocate the array, reallocate a ** an additional array, and try again. */ if(DoStringSortIteration(arraybase, strsortstruct->numarrays, strsortstruct->arraysize)>global_min_ticks) break; /* We're ok...exit */ FreeMemory((farvoid *)arraybase,&systemerror); strsortstruct->numarrays+=1; } } else { /* ** We don't have to perform self adjustment code. ** Simply allocate the space for the array. */ arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) * (long)strsortstruct->numarrays,&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); ErrorExit(); } } /* ** All's well if we get here. Repeatedly perform sorts until the ** accumulated elapsed time is greater than # of seconds requested. */ accumtime=0L; iterations=(double)0.0; do { accumtime+=DoStringSortIteration(arraybase, strsortstruct->numarrays, strsortstruct->arraysize); iterations+=(double)strsortstruct->numarrays; } while(TicksToSecs(accumtime)<strsortstruct->request_secs); /* ** Clean up, calculate results, and go home. ** Set flag to show we don't need to rerun adjustment code. */ FreeMemory((farvoid *)arraybase,&systemerror); strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime); if(strsortstruct->adjust==0) strsortstruct->adjust=1; return; } /************************** ** DoStringSortIteration ** *************************** ** This routine executes one iteration of the string ** sort benchmark. It returns the number of ticks ** Note that this routine also builds the offset pointer ** array. */ static ulong DoStringSortIteration(faruchar *arraybase, uint numarrays,ulong arraysize) { farulong *optrarray; /* Offset pointer array */ unsigned long elapsed; /* Elapsed ticks */ unsigned long nstrings; /* # of strings in array */ int syserror; /* System error code */ unsigned int i; /* Index */ farulong *tempobase; /* Temporary offset pointer base */ faruchar *tempsbase; /* Temporary string base pointer */ /* ** Load up the array(s) with random numbers */ optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize); /* ** Set temp base pointers...they will be modified as the ** benchmark proceeds. */ tempobase=optrarray; tempsbase=arraybase; /* ** Start the stopwatch */ elapsed=StartStopwatch(); /* ** Execute heapsorts */ for(i=0;i<numarrays;i++) { StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1); tempobase+=nstrings; /* Advance base pointers */ tempsbase+=arraysize+100; } /* ** Record elapsed time */ elapsed=StopStopwatch(elapsed); #ifdef DEBUG { unsigned long i; for(i=0;i<nstrings-1;i++) { /* ** Compare strings to check for proper ** sort. */ if(str_is_less(optrarray,arraybase,nstrings,i+1,i)) { printf("Sort Error\n"); break; } } } #endif /* ** Release the offset pointer array built by ** LoadStringArray() */ FreeMemory((farvoid *)optrarray,&syserror); /* ** Return elapsed ticks. */ return(elapsed); } /******************** ** LoadStringArray ** ********************* ** Initialize the string array with random strings of ** varying sizes. ** Returns the pointer to the offset pointer array. ** Note that since we're creating a number of arrays, this ** routine builds one array, then copies it into the others. */ static farulong *LoadStringArray(faruchar *strarray, /* String array */ uint numarrays, /* # of arrays */ ulong *nstrings, /* # of strings */ ulong arraysize) /* Size of array */ { faruchar *tempsbase; /* Temporary string base pointer */ farulong *optrarray; /* Local for pointer */ farulong *tempobase; /* Temporary offset pointer base pointer */ unsigned long curroffset; /* Current offset */ int fullflag; /* Indicates full array */ unsigned char stringlength; /* Length of string */ unsigned char i; /* Index */ unsigned long j; /* Another index */ unsigned int k; /* Yet another index */ unsigned int l; /* Ans still one more index */ int systemerror; /* For holding error code */ /* ** Initialize random number generator. */ randnum(13L); /* ** Start with no strings. Initialize our current offset pointer ** to 0. */ *nstrings=0L; curroffset=0L; fullflag=0; do { /* ** Allocate a string with a random length no ** shorter than 4 bytes and no longer than ** 80 bytes. Note we have to also make sure ** there's room in the array. */ stringlength=(unsigned char)(1+abs_randwc(76L) & 0xFFL); if((unsigned long)stringlength+curroffset+1L>=arraysize) { stringlength=(unsigned char)((arraysize-curroffset-1L) & 0xFF); fullflag=1; /* Indicates a full */ } /* ** Store length at curroffset and advance current offset. */ *(strarray+curroffset)=stringlength; curroffset++; /* ** Fill up the rest of the string with random bytes. */ for(i=0;i<stringlength;i++) { *(strarray+curroffset)= (unsigned char)(abs_randwc((long)0xFE)); curroffset++; } /* ** Increment the # of strings counter. */ *nstrings+=1L; } while(fullflag==0); /* ** We now have initialized a single full array. If there ** is more than one array, copy the original into the ** others. */ k=1; tempsbase=strarray; while(k<numarrays) { tempsbase+=arraysize+100; /* Set base */ for(l=0;l<arraysize;l++) tempsbase[l]=strarray[l]; k++; } /* ** Now the array is full, allocate enough space for an ** offset pointer array. */ optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) * numarrays, &systemerror); if(systemerror) { ReportError("CPU:Stringsort",systemerror); FreeMemory((void *)strarray,&systemerror); ErrorExit(); } /* ** Go through the newly-built string array, building ** offsets and putting them into the offset pointer ** array. */ curroffset=0; for(j=0;j<*nstrings;j++) { *(optrarray+j)=curroffset; curroffset+=(unsigned long)(*(strarray+curroffset))+1L; } /* ** As above, we've made one copy of the offset pointers, ** so duplicate this array in the remaining ones. */ k=1; tempobase=optrarray; while(k<numarrays) { tempobase+=*nstrings; for(l=0;l<*nstrings;l++) tempobase[l]=optrarray[l]; k++; } /* ** All done...go home. Pass local pointer back. */ return(optrarray); } /************** ** stradjust ** *************** ** Used by the string heap sort. Call this routine to adjust the ** string at offset i to length l. The members of the string array ** are moved accordingly and the length of the string at offset i ** is set to l. */ static void stradjust(farulong *optrarray, /* Offset pointer array */ faruchar *strarray, /* String array */ ulong nstrings, /* # of strings */ ulong i, /* Offset to adjust */ uchar l) /* New length */ { unsigned long nbytes; /* # of bytes to move */ unsigned long j; /* Index */ int direction; /* Direction indicator */ unsigned char adjamount; /* Adjustment amount */ /* ** If new length is less than old length, the direction is ** down. If new length is greater than old length, the ** direction is up. */ direction=(int)l - (int)*(strarray+*(optrarray+i)); adjamount=(unsigned char)abs(direction); /* ** See if the adjustment is being made to the last ** string in the string array. If so, we don't have to ** do anything more than adjust the length field. */ if(i==(nstrings-1L)) { *(strarray+*(optrarray+i))=l; return; } /* ** Calculate the total # of bytes in string array from ** location i+1 to end of array. Whether we're moving "up" or ** down, this is how many bytes we'll have to move. */ nbytes=*(optrarray+nstrings-1L) + (unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L - *(optrarray+i+1L); /* ** Calculate the source and the destination. Source is ** string position i+1. Destination is string position i+l ** (i+"ell"...don't confuse 1 and l). ** Hand this straight to memmove and let it handle the ** "overlap" problem. */ MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1), (farvoid *)(strarray+*(optrarray+i+1)), (unsigned long)nbytes); /* ** We have to adjust the offset pointer array. ** This covers string i+1 to numstrings-1. */ for(j=i+1;j<nstrings;j++) if(direction<0) *(optrarray+j)=*(optrarray+j)-adjamount; else *(optrarray+j)=*(optrarray+j)+adjamount; /* ** Store the new length and go home. */ *(strarray+*(optrarray+i))=l; return; } /**************** ** strheapsort ** ***************** ** Pass this routine a pointer to an array of unsigned char. ** The array is presumed to hold strings occupying at most ** 80 bytes (counts a byte count). ** This routine also needs a pointer to an array of offsets ** which represent string locations in the array, and ** an unsigned long indicating the number of strings ** in the array. */ static void StrHeapSort(farulong *optrarray, /* Offset pointers */ faruchar *strarray, /* Strings array */ ulong numstrings, /* # of strings in array */ ulong bottom, /* Region to sort...bottom */ ulong top) /* Region to sort...top */ { unsigned char temp[80]; /* Used to exchange elements */ unsigned char tlen; /* Temp to hold length */ unsigned long i; /* Loop index */ /* ** Build a heap in the array */ for(i=(top/2L); i>0; --i) strsift(optrarray,strarray,numstrings,i,top); /* ** Repeatedly extract maximum from heap and place it at the ** end of the array. When we get done, we'll have a sorted ** array. */ for(i=top; i>0; --i) { strsift(optrarray,strarray,numstrings,0,i); /* temp = string[0] */ tlen=*strarray; MoveMemory((farvoid *)&temp[0], /* Perform exchange */ (farvoid *)strarray, (unsigned long)(tlen+1)); /* string[0]=string[i] */ tlen=*(strarray+*(optrarray+i)); stradjust(optrarray,strarray,numstrings,0,tlen); MoveMemory((farvoid *)strarray, (farvoid *)(strarray+*(optrarray+i)), (unsigned long)(tlen+1)); /* string[i]=temp */ tlen=temp[0]; stradjust(optrarray,strarray,numstrings,i,tlen); MoveMemory((farvoid *)(strarray+*(optrarray+i)), (farvoid *)&temp[0], (unsigned long)(tlen+1)); } return; } /**************** ** str_is_less ** ***************** ** Pass this function: ** 1) A pointer to an array of offset pointers ** 2) A pointer to a string array ** 3) The number of elements in the string array ** 4) Offsets to two strings (a & b) ** This function returns TRUE if string a is < string b. */ static int str_is_less(farulong *optrarray, /* Offset pointers */ faruchar *strarray, /* String array */ ulong numstrings, /* # of strings */ ulong a, ulong b) /* Offsets */ { int slen; /* String length */ /* ** Determine which string has the minimum length. Use that ** to call strncmp(). If they match up to that point, the ** string with the longer length wins. */ slen=(int)*(strarray+*(optrarray+a)); if(slen > (int)*(strarray+*(optrarray+b))) slen=(int)*(strarray+*(optrarray+b)); slen=strncmp((char *)(strarray+*(optrarray+a)), (char *)(strarray+*(optrarray+b)),slen); if(slen==0) { /* ** They match. Return true if the length of a ** is greater than the length of b. */ if(*(strarray+*(optrarray+a)) > *(strarray+*(optrarray+b))) return(TRUE); return(FALSE); } if(slen<0) return(TRUE); /* a is strictly less than b */ return(FALSE); /* Only other possibility */ } /************ ** strsift ** ************* ** Pass this function: ** 1) A pointer to an array of offset pointers ** 2) A pointer to a string array ** 3) The number of elements in the string array ** 4) Offset within which to sort. ** Sift the array within the bounds of those offsets (thus ** building a heap). */ static void strsift(farulong *optrarray, /* Offset pointers */ faruchar *strarray, /* String array */ ulong numstrings, /* # of strings */ ulong i, ulong j) /* Offsets */ { unsigned long k; /* Temporaries */ unsigned char temp[80]; unsigned char tlen; /* For string lengths */ while((i+i)<=j) { k=i+i; if(k<j) if(str_is_less(optrarray,strarray,numstrings,k,k+1L)) ++k; if(str_is_less(optrarray,strarray,numstrings,i,k)) { /* temp=string[k] */ tlen=*(strarray+*(optrarray+k)); MoveMemory((farvoid *)&temp[0], (farvoid *)(strarray+*(optrarray+k)), (unsigned long)(tlen+1)); /* string[k]=string[i] */ tlen=*(strarray+*(optrarray+i)); stradjust(optrarray,strarray,numstrings,k,tlen); MoveMemory((farvoid *)(strarray+*(optrarray+k)), (farvoid *)(strarray+*(optrarray+i)), (unsigned long)(tlen+1)); /* string[i]=temp */ tlen=temp[0]; stradjust(optrarray,strarray,numstrings,i,tlen); MoveMemory((farvoid *)(strarray+*(optrarray+i)), (farvoid *)&temp[0], (unsigned long)(tlen+1)); i=k; } else i=j+1; } return; } /************************ ** BITFIELD OPERATIONS ** *************************/ /************* ** DoBitops ** ************** ** Perform the bit operations test portion of the CPU ** benchmark. Returns the iterations per second. */ void DoBitops(void) { BitOpStruct *locbitopstruct; /* Local bitop structure */ farulong *bitarraybase; /* Base of bitmap array */ farulong *bitoparraybase; /* Base of bitmap operations array */ ulong nbitops; /* # of bitfield operations */ ulong accumtime; /* Accumulated time in ticks */ double iterations; /* # of iterations */ char *errorcontext; /* Error context string */ int systemerror; /* For holding error codes */ /* ** Link to global structure. */ locbitopstruct=&global_bitopstruct; /* ** Set the error context. */ errorcontext="CPU:Bitfields"; /* ** See if we need to run adjustment code. */ if(locbitopstruct->adjust==0) { bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize * sizeof(ulong),&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); ErrorExit(); } /* ** Initialize bitfield operations array to [2,30] elements */ locbitopstruct->bitoparraysize=30L; while(1) { /* ** Allocate space for operations array */ bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L* sizeof(ulong), &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((farvoid *)bitarraybase,&systemerror); ErrorExit(); } /* ** Do an iteration of the bitmap test. If the ** elapsed time is less than or equal to the permitted ** minimum, then de-allocate the array, reallocate a ** larger version, and try again. */ if(DoBitfieldIteration(bitarraybase, bitoparraybase, locbitopstruct->bitoparraysize, &nbitops)>global_min_ticks) break; /* We're ok...exit */ FreeMemory((farvoid *)bitoparraybase,&systemerror); locbitopstruct->bitoparraysize+=100L; } } else { /* ** Don't need to do self adjustment, just allocate ** the array space. */ bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize * sizeof(ulong),&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); ErrorExit(); } bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L* sizeof(ulong), &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((farvoid *)bitarraybase,&systemerror); ErrorExit(); } } /* ** All's well if we get here. Repeatedly perform bitops until the ** accumulated elapsed time is greater than # of seconds requested. */ accumtime=0L; iterations=(double)0.0; do { accumtime+=DoBitfieldIteration(bitarraybase, bitoparraybase, locbitopstruct->bitoparraysize,&nbitops); iterations+=(double)nbitops; } while(TicksToSecs(accumtime)<locbitopstruct->request_secs); /* ** Clean up, calculate results, and go home. ** Also, set adjustment flag to show that we don't have ** to do self adjusting in the future. */ FreeMemory((farvoid *)bitarraybase,&systemerror); FreeMemory((farvoid *)bitoparraybase,&systemerror); locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime); if(locbitopstruct->adjust==0) locbitopstruct->adjust=1; return; } /************************ ** DoBitfieldIteration ** ************************* ** Perform a single iteration of the bitfield benchmark. ** Return the # of ticks accumulated by the operation. */ static ulong DoBitfieldIteration(farulong *bitarraybase, farulong *bitoparraybase, long bitoparraysize, ulong *nbitops) { long i; /* Index */ ulong bitoffset; /* Offset into bitmap */ ulong elapsed; /* Time to execute */ /* ** Clear # bitops counter */ *nbitops=0L; /* ** Construct a set of bitmap offsets and run lengths. ** The offset can be any random number from 0 to the ** size of the bitmap (in bits). The run length can ** be any random number from 1 to the number of bits ** between the offset and the end of the bitmap. ** Note that the bitmap has 8192 * 32 bits in it. ** (262,144 bits) */ for (i=0;i<bitoparraysize;i++) { /* First item is offset */ *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); /* Next item is run length */ *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset); } /* ** Array of offset and lengths built...do an iteration of ** the test. ** Start the stopwatch. */ elapsed=StartStopwatch(); /* ** Loop through array off offset/run length pairs. ** Execute operation based on modulus of index. */ for(i=0;i<bitoparraysize;i++) { switch(i % 3) { case 0: /* Set run of bits */ ToggleBitRun(bitarraybase, *(bitoparraybase+i+i), *(bitoparraybase+i+i+1), 1); break; case 1: /* Clear run of bits */ ToggleBitRun(bitarraybase, *(bitoparraybase+i+i), *(bitoparraybase+i+i+1), 0); break; case 2: /* Complement run of bits */ FlipBitRun(bitarraybase, *(bitoparraybase+i+i), *(bitoparraybase+i+i+1)); break; } } /* ** Return elapsed time */ return(StopStopwatch(elapsed)); } /***************************** ** ToggleBitRun * ****************************** ** Set or clear a run of nbits starting at ** bit_addr in bitmap. */ static void ToggleBitRun(farulong *bitmap, /* Bitmap */ ulong bit_addr, /* Address of bits to set */ ulong nbits, /* # of bits to set/clr */ uint val) /* 1 or 0 */ { unsigned long bindex; /* Index into array */ unsigned long bitnumb; /* Bit number */ while(nbits--) { #ifdef LONG64 bindex=bit_addr>>6; /* Index is number /64 */ bindex=bit_addr % 64; /* Bit number in word */ #else bindex=bit_addr>>5; /* Index is number /32 */ bitnumb=bit_addr % 32; /* bit number in word */ #endif if(val) bitmap[bindex]|=(1L<<bitnumb); else bitmap[bindex]&=~(1L<<bitnumb); bit_addr++; } return; } /*************** ** FlipBitRun ** **************** ** Complements a run of bits. */ static void FlipBitRun(farulong *bitmap, /* Bit map */ ulong bit_addr, /* Bit address */ ulong nbits) /* # of bits to flip */ { unsigned long bindex; /* Index into array */ unsigned long bitnumb; /* Bit number */ while(nbits--) { #ifdef LONG64 bindex=bit_addr>>6; /* Index is number /64 */ bitnumb=bit_addr % 32; /* Bit number in longword */ #else bindex=bit_addr>>5; /* Index is number /32 */ bitnumb=bit_addr % 32; /* Bit number in longword */ #endif bitmap[bindex]^=(1L<<bitnumb); bit_addr++; } return; } /***************************** ** FLOATING-POINT EMULATION ** *****************************/ /************** ** DoEmFloat ** *************** ** Perform the floating-point emulation routines portion of the ** CPU benchmark. Returns the operations per second. */ void DoEmFloat(void) { EmFloatStruct *locemfloatstruct; /* Local structure */ InternalFPF *abase; /* Base of A array */ InternalFPF *bbase; /* Base of B array */ InternalFPF *cbase; /* Base of C array */ ulong accumtime; /* Accumulated time in ticks */ double iterations; /* # of iterations */ ulong tickcount; /* # of ticks */ char *errorcontext; /* Error context string pointer */ int systemerror; /* For holding error code */ ulong loops; /* # of loops */ /* ** Link to global structure */ locemfloatstruct=&global_emfloatstruct; /* ** Set the error context */ errorcontext="CPU:Floating Emulation"; /* ** Test the emulation routines. */ #ifdef DEBUG #endif abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF), &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); ErrorExit(); } bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF), &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((farvoid *)abase,&systemerror); ErrorExit(); } cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF), &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((farvoid *)abase,&systemerror); FreeMemory((farvoid *)bbase,&systemerror); ErrorExit(); } /* ** Set up the arrays */ SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize); /* ** See if we need to do self-adjusting code. */ if(locemfloatstruct->adjust==0) { locemfloatstruct->loops=0; /* ** Do an iteration of the tests. If the elapsed time is ** less than minimum, increase the loop count and try ** again. */ for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops) { tickcount=DoEmFloatIteration(abase,bbase,cbase, locemfloatstruct->arraysize, loops); if(tickcount>global_min_ticks) { locemfloatstruct->loops=loops; break; } } } /* ** Verify that selft adjustment code worked. */ if(locemfloatstruct->loops==0) { printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n"); FreeMemory((farvoid *)abase,&systemerror); FreeMemory((farvoid *)bbase,&systemerror); FreeMemory((farvoid *)cbase,&systemerror); ErrorExit(); } /* ** All's well if we get here. Repeatedly perform floating ** tests until the accumulated time is greater than the ** # of seconds requested. ** Each iteration performs arraysize * 3 operations. */ accumtime=0L; iterations=(double)0.0; do { accumtime+=DoEmFloatIteration(abase,bbase,cbase, locemfloatstruct->arraysize, locemfloatstruct->loops); iterations+=(double)1.0; } while(TicksToSecs(accumtime)<locemfloatstruct->request_secs); /* ** Clean up, calculate results, and go home. ** Also, indicate that adjustment is done. */ FreeMemory((farvoid *)abase,&systemerror); FreeMemory((farvoid *)bbase,&systemerror); FreeMemory((farvoid *)cbase,&systemerror); locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/ (double)TicksToFracSecs(accumtime); if(locemfloatstruct->adjust==0) locemfloatstruct->adjust=1; return; } /************************* ** FOURIER COEFFICIENTS ** *************************/ /************** ** DoFourier ** *************** ** Perform the transcendental/trigonometric portion of the ** benchmark. This benchmark calculates the first n ** fourier coefficients of the function (x+1)^x defined ** on the interval 0,2. */ void DoFourier(void) { FourierStruct *locfourierstruct; /* Local fourier struct */ fardouble *abase; /* Base of A[] coefficients array */ fardouble *bbase; /* Base of B[] coefficients array */ unsigned long accumtime; /* Accumulated time in ticks */ double iterations; /* # of iterations */ char *errorcontext; /* Error context string pointer */ int systemerror; /* For error code */ /* ** Link to global structure */ locfourierstruct=&global_fourierstruct; /* ** Set error context string */ errorcontext="FPU:Transcendental"; /* ** See if we need to do self-adjustment code. */ if(locfourierstruct->adjust==0) { locfourierstruct->arraysize=100L; /* Start at 100 elements */ while(1) { abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); ErrorExit(); } bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((void *)abase,&systemerror); ErrorExit(); } /* ** Do an iteration of the tests. If the elapsed time is ** less than or equal to the permitted minimum, re-allocate ** larger arrays and try again. */ if(DoFPUTransIteration(abase,bbase, locfourierstruct->arraysize)>global_min_ticks) break; /* We're ok...exit */ /* ** Make bigger arrays and try again. */ FreeMemory((farvoid *)abase,&systemerror); FreeMemory((farvoid *)bbase,&systemerror); locfourierstruct->arraysize+=50L; } } else { /* ** Don't need self-adjustment. Just allocate the ** arrays, and go. */ abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); ErrorExit(); } bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double), &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((void *)abase,&systemerror); ErrorExit(); } } /* ** All's well if we get here. Repeatedly perform integration ** tests until the accumulated time is greater than the ** # of seconds requested. */ accumtime=0L; iterations=(double)0.0; do { accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize); iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0; } while(TicksToSecs(accumtime)<locfourierstruct->request_secs); /* ** Clean up, calculate results, and go home. ** Also set adjustment flag to indicate no adjust code needed. */ FreeMemory((farvoid *)abase,&systemerror); FreeMemory((farvoid *)bbase,&systemerror); locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime); if(locfourierstruct->adjust==0) locfourierstruct->adjust=1; return; } /************************ ** DoFPUTransIteration ** ************************* ** Perform an iteration of the FPU Transcendental/trigonometric ** benchmark. Here, an iteration consists of calculating the ** first n fourier coefficients of the function (x+1)^x on ** the interval 0,2. n is given by arraysize. ** NOTE: The # of integration steps is fixed at ** 200. */ static ulong DoFPUTransIteration(fardouble *abase, /* A coeffs. */ fardouble *bbase, /* B coeffs. */ ulong arraysize) /* # of coeffs */ { double omega; /* Fundamental frequency */ unsigned long i; /* Index */ unsigned long elapsed; /* Elapsed time */ /* ** Start the stopwatch */ elapsed=StartStopwatch(); /* ** Calculate the fourier series. Begin by ** calculating A[0]. */ *abase=TrapezoidIntegrate((double)0.0, (double)2.0, 200, (double)0.0, /* No omega * n needed */ 0 )/(double)2.0; /* ** Calculate the fundamental frequency. ** ( 2 * pi ) / period...and since the period ** is 2, omega is simply pi. */ omega=(double)3.1415926535897932; for(i=1;i<arraysize;i++) { /* ** Calculate A[i] terms. Note, once again, that we ** can ignore the 2/period term outside the integral ** since the period is 2 and the term cancels itself ** out. */ *(abase+i)=TrapezoidIntegrate((double)0.0, (double)2.0, 200, omega * (double)i, 1); /* ** Calculate the B[i] terms. */ *(bbase+i)=TrapezoidIntegrate((double)0.0, (double)2.0, 200, omega * (double)i, 2); } /* ** All done, stop the stopwatch */ return(StopStopwatch(elapsed)); } /*********************** ** TrapezoidIntegrate ** ************************ ** Perform a simple trapezoid integration on the ** function (x+1)**x. ** x0,x1 set the lower and upper bounds of the ** integration. ** nsteps indicates # of trapezoidal sections ** omegan is the fundamental frequency times ** the series member # ** select = 0 for the A[0] term, 1 for cosine terms, and ** 2 for sine terms. ** Returns the value. */ static double TrapezoidIntegrate( double x0, /* Lower bound */ double x1, /* Upper bound */ int nsteps, /* # of steps */ double omegan, /* omega * n */ int select) { double x; /* Independent variable */ double dx; /* Stepsize */ double rvalue; /* Return value */ /* ** Initialize independent variable */ x=x0; /* ** Calculate stepsize */ dx=(x1 - x0) / (double)nsteps; /* ** Initialize the return value. */ rvalue=thefunction(x0,omegan,select)/(double)2.0; /* ** Compute the other terms of the integral. */ if(nsteps!=1) { --nsteps; /* Already done 1 step */ while(--nsteps ) { x+=dx; rvalue+=thefunction(x,omegan,select); } } /* ** Finish computation */ rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx; return(rvalue); } /**************** ** thefunction ** ***************** ** This routine selects the function to be used ** in the Trapezoid integration. ** x is the independent variable ** omegan is omega * n ** select chooses which of the sine/cosine functions ** are used. note the special case for select=0. */ static double thefunction(double x, /* Independent variable */ double omegan, /* Omega * term */ int select) /* Choose term */ { /* ** Use select to pick which function we call. */ switch(select) { case 0: return(pow(x+(double)1.0,x)); case 1: return(pow(x+(double)1.0,x) * cos(omegan * x)); case 2: return(pow(x+(double)1.0,x) * sin(omegan * x)); } /* ** We should never reach this point, but the following ** keeps compilers from issuing a warning message. */ return(0.0); } /************************* ** ASSIGNMENT ALGORITHM ** *************************/ /************* ** DoAssign ** ************** ** Perform an assignment algorithm. ** The algorithm was adapted from the step by step guide found ** in "Quantitative Decision Making for Business" (Gordon, ** Pressman, and Cohn; Prentice-Hall) ** ** ** NOTES: ** 1. Even though the algorithm distinguishes between ** ASSIGNROWS and ASSIGNCOLS, as though the two might ** be different, it does presume a square matrix. ** I.E., ASSIGNROWS and ASSIGNCOLS must be the same. ** This makes for some algorithmically-correct but ** probably non-optimal constructs. ** */ void DoAssign(void) { AssignStruct *locassignstruct; /* Local structure ptr */ farlong *arraybase; char *errorcontext; int systemerror; ulong accumtime; double iterations; /* ** Link to global structure */ locassignstruct=&global_assignstruct; /* ** Set the error context string. */ errorcontext="CPU:Assignment"; /* ** See if we need to do self adjustment code. */ if(locassignstruct->adjust==0) { /* ** Self-adjustment code. The system begins by working on 1 ** array. If it does that in no time, then two arrays ** are built. This process continues until ** enough arrays are built to handle the tolerance. */ locassignstruct->numarrays=1; while(1) { /* ** Allocate space for arrays */ arraybase=(farlong *) AllocateMemory(sizeof(long)* ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays, &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((farvoid *)arraybase, &systemerror); ErrorExit(); } /* ** Do an iteration of the assignment alg. If the ** elapsed time is less than or equal to the permitted ** minimum, then allocate for more arrays and ** try again. */ if(DoAssignIteration(arraybase, locassignstruct->numarrays)>global_min_ticks) break; /* We're ok...exit */ FreeMemory((farvoid *)arraybase, &systemerror); locassignstruct->numarrays++; } } else { /* ** Allocate space for arrays */ arraybase=(farlong *)AllocateMemory(sizeof(long)* ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays, &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((farvoid *)arraybase, &systemerror); ErrorExit(); } } /* ** All's well if we get here. Do the tests. */ accumtime=0L; iterations=(double)0.0; do { accumtime+=DoAssignIteration(arraybase, locassignstruct->numarrays); iterations+=(double)1.0; } while(TicksToSecs(accumtime)<locassignstruct->request_secs); /* ** Clean up, calculate results, and go home. Be sure to ** show that we don't have to rerun adjustment code. */ FreeMemory((farvoid *)arraybase,&systemerror); locassignstruct->iterspersec=iterations * (double)locassignstruct->numarrays / TicksToFracSecs(accumtime); if(locassignstruct->adjust==0) locassignstruct->adjust=1; return; } /********************** ** DoAssignIteration ** *********************** ** This routine executes one iteration of the assignment test. ** It returns the number of ticks elapsed in the iteration. */ static ulong DoAssignIteration(farlong *arraybase, ulong numarrays) { longptr abase; /* local pointer */ ulong elapsed; /* Elapsed ticks */ ulong i; /* ** Set up local pointer */ abase.ptrs.p=arraybase; /* ** Load up the arrays with a random table. */ LoadAssignArrayWithRand(arraybase,numarrays); /* ** Start the stopwatch */ elapsed=StartStopwatch(); /* ** Execute assignment algorithms */ for(i=0;i<numarrays;i++) { abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; Assignment(*abase.ptrs.ap); } /* ** Get elapsed time */ return(StopStopwatch(elapsed)); } /**************************** ** LoadAssignArrayWithRand ** ***************************** ** Load the assignment arrays with random numbers. All positive. ** These numbers represent costs. */ static void LoadAssignArrayWithRand(farlong *arraybase, ulong numarrays) { longptr abase,abase1; /* Local for array pointer */ ulong i; /* ** Set local array pointer */ abase.ptrs.p=arraybase; abase1.ptrs.p=arraybase; /* ** Set up the first array. Then just copy it into the ** others. */ LoadAssign(*(abase.ptrs.ap)); if(numarrays>1) for(i=1;i<numarrays;i++) { abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap)); } return; } /*************** ** LoadAssign ** **************** ** The array given by arraybase is loaded with positive random ** numbers. Elements in the array are capped at 5,000,000. */ static void LoadAssign(farlong arraybase[][ASSIGNCOLS]) { ushort i,j; /* ** Reset random number generator so things repeat. */ randnum(13L); for(i=0;i<ASSIGNROWS;i++) for(j=0;j<ASSIGNROWS;j++) arraybase[i][j]=abs_randwc(5000000L); return; } /***************** ** CopyToAssign ** ****************** ** Copy the contents of one array to another. This is called by ** the routine that builds the initial array, and is used to copy ** the contents of the intial array into all following arrays. */ static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS], farlong arrayto[ASSIGNROWS][ASSIGNCOLS]) { ushort i,j; for(i=0;i<ASSIGNROWS;i++) for(j=0;j<ASSIGNCOLS;j++) arrayto[i][j]=arrayfrom[i][j]; return; } /*************** ** Assignment ** ***************/ static void Assignment(farlong arraybase[][ASSIGNCOLS]) { short assignedtableau[ASSIGNROWS][ASSIGNCOLS]; /* ** First, calculate minimum costs */ calc_minimum_costs(arraybase); /* ** Repeat following until the number of rows selected ** equals the number of rows in the tableau. */ while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS) { second_assignments(arraybase,assignedtableau); } #ifdef DEBUG { int i,j; printf("Column choices for each row\n"); for(i=0;i<ASSIGNROWS;i++) { for(j=0;j<ASSIGNCOLS;j++) if(assignedtableau[i][j]!=0) printf("%d ",j); } } #endif return; } /*********************** ** calc_minimum_costs ** ************************ ** Revise the tableau by calculating the minimum costs on a ** row and column basis. These minima are subtracted from ** their rows and columns, creating a new tableau. */ static void calc_minimum_costs(long tableau[][ASSIGNCOLS]) { ushort i,j; /* Index variables */ long currentmin; /* Current minimum */ /* ** Determine minimum costs on row basis. This is done by ** subtracting -- on a row-per-row basis -- the minum value ** for that row. */ for(i=0;i<ASSIGNROWS;i++) { currentmin=MAXPOSLONG; /* Initialize minimum */ for(j=0;j<ASSIGNCOLS;j++) if(tableau[i][j]<currentmin) currentmin=tableau[i][j]; for(j=0;j<ASSIGNCOLS;j++) tableau[i][j]-=currentmin; } /* ** Determine minimum cost on a column basis. This works ** just as above, only now we step through the array ** column-wise */ for(j=0;j<ASSIGNCOLS;j++) { currentmin=MAXPOSLONG; /* Initialize minimum */ for(i=0;i<ASSIGNROWS;i++) if(tableau[i][j]<currentmin) currentmin=tableau[i][j]; /* ** Here, we'll take the trouble to see if the current ** minimum is zero. This is likely worth it, since the ** preceding loop will have created at least one zero in ** each row. We can save ourselves a few iterations. */ if(currentmin!=0) for(i=0;i<ASSIGNROWS;i++) tableau[i][j]-=currentmin; } return; } /********************** ** first_assignments ** *********************** ** Do first assignments. ** The assignedtableau[] array holds a set of values that ** indicate the assignment of a value, or its elimination. ** The values are: ** 0 = Item is neither assigned nor eliminated. ** 1 = Item is assigned ** 2 = Item is eliminated ** Returns the number of selections made. If this equals ** the number of rows, then an optimum has been determined. */ static int first_assignments(long tableau[][ASSIGNCOLS], short assignedtableau[][ASSIGNCOLS]) { ushort i,j,k; /* Index variables */ ushort numassigns; /* # of assignments */ ushort totnumassigns; /* Total # of assignments */ ushort numzeros; /* # of zeros in row */ int selected; /* Flag used to indicate selection */ /* ** Clear the assignedtableau, setting all members to show that ** no one is yet assigned, eliminated, or anything. */ for(i=0;i<ASSIGNROWS;i++) for(j=0;j<ASSIGNCOLS;j++) assignedtableau[i][j]=0; totnumassigns=0; do { numassigns=0; /* ** Step through rows. For each one that is not currently ** assigned, see if the row has only one zero in it. If so, ** mark that as an assigned row/col. Eliminate other zeros ** in the same column. */ for(i=0;i<ASSIGNROWS;i++) { numzeros=0; for(j=0;j<ASSIGNCOLS;j++) if(tableau[i][j]==0L) if(assignedtableau[i][j]==0) { numzeros++; selected=j; } if(numzeros==1) { numassigns++; totnumassigns++; assignedtableau[i][selected]=1; for(k=0;k<ASSIGNROWS;k++) if((k!=i) && (tableau[k][selected]==0)) assignedtableau[k][selected]=2; } } /* ** Step through columns, doing same as above. Now, be careful ** of items in the other rows of a selected column. */ for(j=0;j<ASSIGNCOLS;j++) { numzeros=0; for(i=0;i<ASSIGNROWS;i++) if(tableau[i][j]==0L) if(assignedtableau[i][j]==0) { numzeros++; selected=i; } if(numzeros==1) { numassigns++; totnumassigns++; assignedtableau[selected][j]=1; for(k=0;k<ASSIGNCOLS;k++) if((k!=j) && (tableau[selected][k]==0)) assignedtableau[selected][k]=2; } } /* ** Repeat until no more assignments to be made. */ } while(numassigns!=0); /* ** See if we can leave at this point. */ if(totnumassigns==ASSIGNROWS) return(totnumassigns); /* ** Now step through the array by row. If you find any unassigned ** zeros, pick the first in the row. Eliminate all zeros from ** that same row & column. This occurs if there are multiple optima... ** possibly. */ for(i=0;i<ASSIGNROWS;i++) { selected=-1; for(j=0;j<ASSIGNCOLS;j++) if((tableau[i][j]==0L) && (assignedtableau[i][j]==0)) { selected=j; break; } if(selected!=-1) { assignedtableau[i][selected]=1; totnumassigns++; for(k=0;k<ASSIGNCOLS;k++) if((k!=selected) && (tableau[i][k]==0L)) assignedtableau[i][k]=2; for(k=0;k<ASSIGNROWS;k++) if((k!=i) && (tableau[k][selected]==0L)) assignedtableau[k][selected]=2; } } return(totnumassigns); } /*********************** ** second_assignments ** ************************ ** This section of the algorithm creates the revised ** tableau, and is difficult to explain. I suggest you ** refer to the algorithm's source, mentioned in comments ** toward the beginning of the program. */ static void second_assignments(long tableau[][ASSIGNCOLS], short assignedtableau[][ASSIGNCOLS]) { int i,j; /* Indexes */ short linesrow[ASSIGNROWS]; short linescol[ASSIGNCOLS]; long smallest; /* Holds smallest value */ ushort numassigns; /* Number of assignments */ ushort newrows; /* New rows to be considered */ /* ** Clear the linesrow and linescol arrays. */ for(i=0;i<ASSIGNROWS;i++) linesrow[i]=0; for(i=0;i<ASSIGNCOLS;i++) linescol[i]=0; /* ** Scan rows, flag each row that has no assignment in it. */ for(i=0;i<ASSIGNROWS;i++) { numassigns=0; for(j=0;j<ASSIGNCOLS;j++) if(assignedtableau[i][j]==1) { numassigns++; break; } if(numassigns==0) linesrow[i]=1; } do { newrows=0; /* ** For each row checked above, scan for any zeros. If found, ** check the associated column. */ for(i=0;i<ASSIGNROWS;i++) { if(linesrow[i]==1) for(j=0;j<ASSIGNCOLS;j++) if(tableau[i][j]==0) linescol[j]=1; } /* ** Now scan checked columns. If any contain assigned zeros, check ** the associated row. */ for(j=0;j<ASSIGNCOLS;j++) if(linescol[j]==1) for(i=0;i<ASSIGNROWS;i++) if((assignedtableau[i][j]==1) && (linesrow[i]!=1)) { linesrow[i]=1; newrows++; } } while(newrows!=0); /* ** linesrow[n]==0 indicate rows covered by imaginary line ** linescol[n]==1 indicate cols covered by imaginary line ** For all cells not covered by imaginary lines, determine smallest ** value. */ smallest=MAXPOSLONG; for(i=0;i<ASSIGNROWS;i++) if(linesrow[i]!=0) for(j=0;j<ASSIGNCOLS;j++) if(linescol[j]!=1) if(tableau[i][j]<smallest) smallest=tableau[i][j]; /* ** Subtract smallest from all cells in the above set. */ for(i=0;i<ASSIGNROWS;i++) if(linesrow[i]!=0) for(j=0;j<ASSIGNCOLS;j++) if(linescol[j]!=1) tableau[i][j]-=smallest; /* ** Add smallest to all cells covered by two lines. */ for(i=0;i<ASSIGNROWS;i++) if(linesrow[i]==0) for(j=0;j<ASSIGNCOLS;j++) if(linescol[j]==1) tableau[i][j]+=smallest; return; } /******************** ** IDEA Encryption ** ********************* ** IDEA - International Data Encryption Algorithm. ** Based on code presented in Applied Cryptography by Bruce Schneier. ** Which was based on code developed by Xuejia Lai and James L. Massey. ** Other modifications made by Colin Plumb. ** */ /*********** ** DoIDEA ** ************ ** Perform IDEA encryption. Note that we time encryption & decryption ** time as being a single loop. */ void DoIDEA(void) { IDEAStruct *locideastruct; /* Loc pointer to global structure */ int i; IDEAkey Z,DK; u16 userkey[8]; ulong accumtime; double iterations; char *errorcontext; int systemerror; faruchar *plain1; /* First plaintext buffer */ faruchar *crypt1; /* Encryption buffer */ faruchar *plain2; /* Second plaintext buffer */ /* ** Link to global data */ locideastruct=&global_ideastruct; /* ** Set error context */ errorcontext="CPU:IDEA"; /* ** Re-init random-number generator. */ randnum(3L); /* ** Build an encryption/decryption key */ for (i=0;i<8;i++) userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); for(i=0;i<KEYLEN;i++) Z[i]=0; /* ** Compute encryption/decryption subkeys */ en_key_idea(userkey,Z); de_key_idea(Z,DK); /* ** Allocate memory for buffers. We'll make 3, called plain1, ** crypt1, and plain2. It works like this: ** plain1 >>encrypt>> crypt1 >>decrypt>> plain2. ** So, plain1 and plain2 should match. ** Also, fill up plain1 with sample text. */ plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); ErrorExit(); } crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((farvoid *)plain1,&systemerror); ErrorExit(); } plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory((farvoid *)plain1,&systemerror); FreeMemory((farvoid *)crypt1,&systemerror); ErrorExit(); } /* ** Note that we build the "plaintext" by simply loading ** the array up with random numbers. */ for(i=0;i<locideastruct->arraysize;i++) plain1[i]=(uchar)(abs_randwc(255) & 0xFF); /* ** See if we need to perform self adjustment loop. */ if(locideastruct->adjust==0) { /* ** Do self-adjustment. This involves initializing the ** # of loops and increasing the loop count until we ** get a number of loops that we can use. */ for(locideastruct->loops=100L; locideastruct->loops<MAXIDEALOOPS; locideastruct->loops+=10L) if(DoIDEAIteration(plain1,crypt1,plain2, locideastruct->arraysize, locideastruct->loops, Z,DK)>global_min_ticks) break; } /* ** All's well if we get here. Do the test. */ accumtime=0L; iterations=(double)0.0; do { accumtime+=DoIDEAIteration(plain1,crypt1,plain2, locideastruct->arraysize, locideastruct->loops,Z,DK); iterations+=(double)locideastruct->loops; } while(TicksToSecs(accumtime)<locideastruct->request_secs); /* ** Clean up, calculate results, and go home. Be sure to ** show that we don't have to rerun adjustment code. */ FreeMemory((farvoid *)plain1,&systemerror); FreeMemory((farvoid *)crypt1,&systemerror); FreeMemory((farvoid *)plain2,&systemerror); locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime); if(locideastruct->adjust==0) locideastruct->adjust=1; return; } /******************** ** DoIDEAIteration ** ********************* ** Execute a single iteration of the IDEA encryption algorithm. ** Actually, a single iteration is one encryption and one ** decryption. */ static ulong DoIDEAIteration(faruchar *plain1, faruchar *crypt1, faruchar *plain2, ulong arraysize, ulong nloops, IDEAkey Z, IDEAkey DK) { register ulong i; register ulong j; ulong elapsed; /* ** Start the stopwatch. */ elapsed=StartStopwatch(); /* ** Do everything for nloops. */ for(i=0;i<nloops;i++) { for(j=0;j<arraysize;j+=(sizeof(u16)*4)) cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z); /* Encrypt */ for(j=0;j<arraysize;j+=(sizeof(u16)*4)) cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK); /* Decrypt */ } #ifdef DEBUG for(j=0;j<arraysize;j++) if(*(plain1+j)!=*(plain2+j)) printf("IDEA Error! \n"); #endif /* ** Get elapsed time. */ return(StopStopwatch(elapsed)); } /******** ** mul ** ********* ** Performs multiplication, modulo (2**16)+1. This code is structured ** on the assumption that untaken branches are cheaper than taken ** branches, and that the compiler doesn't schedule branches. */ static u16 mul(register u16 a, register u16 b) { register u32 p; if(a) { if(b) { p=(u32)(a*b); b=low16(p); a=(u16)(p>>16); return(b-a+(b<a)); } else return(1-a); } else return(1-b); } /******** ** inv ** ********* ** Compute multiplicative inverse of x, modulo (2**16)+1 ** using Euclid's GCD algorithm. It is unrolled twice ** to avoid swapping the meaning of the registers. And ** some subtracts are changed to adds. */ static u16 inv(u16 x) { u16 t0, t1; u16 q, y; if(x<=1) return(x); /* 0 and 1 are self-inverse */ t1=0x10001 / x; y=0x10001 % x; if(y==1) return(low16(1-t1)); t0=1; do { q=x/y; x=x%y; t0+=q*t1; if(x==1) return(t0); q=y/x; y=y%x; t1+=q*t0; } while(y!=1); return(low16(1-t1)); } /**************** ** en_key_idea ** ***************** ** Compute IDEA encryption subkeys Z */ static void en_key_idea(u16 *userkey, u16 *Z) { int i,j; /* ** shifts */ for(j=0;j<8;j++) Z[j]=*userkey++; for(i=0;j<KEYLEN;j++) { i++; Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7); Z+=i&8; i&=7; } return; } /**************** ** de_key_idea ** ***************** ** Compute IDEA decryption subkeys DK from encryption ** subkeys Z. */ static void de_key_idea(IDEAkey Z, IDEAkey DK) { IDEAkey TT; int j; u16 t1, t2, t3; u16 *p; p=(u16 *)(TT+KEYLEN); t1=inv(*Z++); t2=-*Z++; t3=-*Z++; *--p=inv(*Z++); *--p=t3; *--p=t2; *--p=t1; for(j=1;j<ROUNDS;j++) { t1=*Z++; *--p=*Z++; *--p=t1; t1=inv(*Z++); t2=-*Z++; t3=-*Z++; *--p=inv(*Z++); *--p=t2; *--p=t3; *--p=t1; } t1=*Z++; *--p=*Z++; *--p=t1; t1=inv(*Z++); t2=-*Z++; t3=-*Z++; *--p=inv(*Z++); *--p=t3; *--p=t2; *--p=t1; /* ** Copy and destroy temp copy */ for(j=0,p=TT;j<KEYLEN;j++) { *DK++=*p; *p++=0; } return; } /* ** MUL(x,y) ** This #define creates a macro that computes x=x*y modulo 0x10001. ** Requires temps t16 and t32. Also requires y to be strictly 16 ** bits. Here, I am using the simplest form. May not be the ** fastest. -- RG */ /* #define MUL(x,y) (x=mul(low16(x),y)) */ /**************** ** cipher_idea ** ***************** ** IDEA encryption/decryption algorithm. */ static void cipher_idea(u16 in[4], u16 out[4], register IDEAkey Z) { register u16 x1, x2, x3, x4, t1, t2; /* register u16 t16; register u16 t32; */ int r=ROUNDS; x1=*in++; x2=*in++; x3=*in++; x4=*in; do { MUL(x1,*Z++); x2+=*Z++; x3+=*Z++; MUL(x4,*Z++); t2=x1^x3; MUL(t2,*Z++); t1=t2+(x2^x4); MUL(t1,*Z++); t2=t1+t2; x1^=t1; x4^=t2; t2^=x2; x2=x3^t1; x3=t2; } while(--r); MUL(x1,*Z++); *out++=x1; *out++=x3+*Z++; *out++=x2+*Z++; MUL(x4,*Z); *out=x4; return; } /************************ ** HUFFMAN COMPRESSION ** ************************/ /************** ** DoHuffman ** *************** ** Execute a huffman compression on a block of plaintext. ** Note that (as with IDEA encryption) an iteration of the ** Huffman test includes a compression AND a decompression. ** Also, the compression cycle includes building the ** Huffman tree. */ void DoHuffman(void) { HuffStruct *lochuffstruct; /* Loc pointer to global data */ char *errorcontext; int systemerror; ulong accumtime; double iterations; farchar *comparray; farchar *decomparray; farchar *plaintext; /* ** Link to global data */ lochuffstruct=&global_huffstruct; /* ** Set error context. */ errorcontext="CPU:Huffman"; /* ** Allocate memory for the plaintext and the compressed text. ** We'll be really pessimistic here, and allocate equal amounts ** for both (though we know...well, we PRESUME) the compressed ** stuff will take less than the plain stuff. ** Also note that we'll build a 3rd buffer to decompress ** into, and we preallocate space for the huffman tree. ** (We presume that the Huffman tree will grow no larger ** than 512 bytes. This is actually a super-conservative ** estimate...but, who cares?) */ plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); ErrorExit(); } comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory(plaintext,&systemerror); ErrorExit(); } decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory(plaintext,&systemerror); FreeMemory(comparray,&systemerror); ErrorExit(); } hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512, &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); FreeMemory(plaintext,&systemerror); FreeMemory(comparray,&systemerror); FreeMemory(decomparray,&systemerror); ErrorExit(); } /* ** Build the plaintext buffer. Since we want this to ** actually be able to compress, we'll use the ** wordcatalog to build the plaintext stuff. */ create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500); plaintext[lochuffstruct->arraysize-1L]='\0'; plaintextlen=lochuffstruct->arraysize; /* ** See if we need to perform self adjustment loop. */ if(lochuffstruct->adjust==0) { /* ** Do self-adjustment. This involves initializing the ** # of loops and increasing the loop count until we ** get a number of loops that we can use. */ for(lochuffstruct->loops=100L; lochuffstruct->loops<MAXHUFFLOOPS; lochuffstruct->loops+=10L) if(DoHuffIteration(plaintext, comparray, decomparray, lochuffstruct->arraysize, lochuffstruct->loops, hufftree)>global_min_ticks) break; } /* ** All's well if we get here. Do the test. */ accumtime=0L; iterations=(double)0.0; do { accumtime+=DoHuffIteration(plaintext, comparray, decomparray, lochuffstruct->arraysize, lochuffstruct->loops, hufftree); iterations+=(double)lochuffstruct->loops; } while(TicksToSecs(accumtime)<lochuffstruct->request_secs); /* ** Clean up, calculate results, and go home. Be sure to ** show that we don't have to rerun adjustment code. */ FreeMemory((farvoid *)plaintext,&systemerror); FreeMemory((farvoid *)comparray,&systemerror); FreeMemory((farvoid *)decomparray,&systemerror); FreeMemory((farvoid *)hufftree,&systemerror); lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime); if(lochuffstruct->adjust==0) lochuffstruct->adjust=1; } /********************* ** create_text_line ** ********************** ** Create a random line of text, stored at *dt. The line may be ** no more than nchars long. */ static void create_text_line(farchar *dt, long nchars) { long charssofar; /* # of characters so far */ long tomove; /* # of characters to move */ char myword[40]; /* Local buffer for words */ farchar *wordptr; /* Pointer to word from catalog */ charssofar=0; do { /* ** Grab a random word from the wordcatalog */ wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)]; MoveMemory((farvoid *)myword, (farvoid *)wordptr, (unsigned long)strlen(wordptr)+1); /* ** Append a blank. */ tomove=strlen(myword)+1; myword[tomove-1]=' '; /* ** See how long it is. If its length+charssofar > nchars, we have ** to trim it. */ if((tomove+charssofar)>nchars) tomove=nchars-charssofar; /* ** Attach the word to the current line. Increment counter. */ MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove); charssofar+=tomove; dt+=tomove; /* ** If we're done, bail out. Otherwise, go get another word. */ } while(charssofar<nchars); return; } /********************** ** create_text_block ** *********************** ** Build a block of text randomly loaded with words. The words ** come from the wordcatalog (which must be loaded before you ** call this). ** *tb points to the memory where the text is to be built. ** tblen is the # of bytes to put into the text block ** maxlinlen is the maximum length of any line (line end indicated ** by a carriage return). */ static void create_text_block(farchar *tb, ulong tblen, ushort maxlinlen) { ulong bytessofar; /* # of bytes so far */ ulong linelen; /* Line length */ bytessofar=0L; do { /* ** Pick a random length for a line and fill the line. ** Make sure the line can fit (haven't exceeded tablen) and also ** make sure you leave room to append a carriage return. */ linelen=abs_randwc(maxlinlen-6)+6; if((linelen+bytessofar)>tblen) linelen=tblen-bytessofar; if(linelen>1) { create_text_line(tb,linelen); } tb+=linelen-1; /* Add the carriage return */ *tb++='\n'; bytessofar+=linelen; } while(bytessofar<tblen); } /******************** ** DoHuffIteration ** ********************* ** Perform the huffman benchmark. This routine ** (a) Builds the huffman tree ** (b) Compresses the text ** (c) Decompresses the text and verifies correct decompression */ static ulong DoHuffIteration(farchar *plaintext, farchar *comparray, farchar *decomparray, ulong arraysize, ulong nloops, huff_node *hufftree) { int i; /* Index */ long j; /* Bigger index */ int root; /* Pointer to huffman tree root */ float lowfreq1, lowfreq2; /* Low frequency counters */ int lowidx1, lowidx2; /* Indexes of low freq. elements */ long bitoffset; /* Bit offset into text */ long textoffset; /* Char offset into text */ long maxbitoffset; /* Holds limit of bit offset */ long bitstringlen; /* Length of bitstring */ int c; /* Character from plaintext */ char bitstring[30]; /* Holds bitstring */ ulong elapsed; /* For stopwatch */ /* ** Start the stopwatch */ elapsed=StartStopwatch(); /* ** Do everything for nloops */ while(nloops--) { /* ** Calculate the frequency of each byte value. Store the ** results in what will become the "leaves" of the ** Huffman tree. Interior nodes will be built in those ** nodes greater than node #255. */ for(i=0;i<256;i++) { hufftree[i].freq=(float)0.0; hufftree[i].c=(unsigned char)i; } for(j=0;j<arraysize;j++) hufftree[plaintext[j]].freq+=(float)1.0; for(i=0;i<256;i++) if(hufftree[i].freq != (float)0.0) hufftree[i].freq/=(float)arraysize; /* ** Build the huffman tree. First clear all the parent ** pointers and left/right pointers. Also, discard all ** nodes that have a frequency of true 0. */ for(i=0;i<512;i++) { if(hufftree[i].freq==(float)0.0) hufftree[i].parent=EXCLUDED; else hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1; } /* ** Go through the tree. Finding nodes of really low ** frequency. */ root=255; /* Starting root node-1 */ while(1) { lowfreq1=(float)2.0; lowfreq2=(float)2.0; lowidx1=-1; lowidx2=-1; /* ** Find first lowest frequency. */ for(i=0;i<=root;i++) if(hufftree[i].parent<0) if(hufftree[i].freq<lowfreq1) { lowfreq1=hufftree[i].freq; lowidx1=i; } /* ** Did we find a lowest value? If not, the ** tree is done. */ if(lowidx1==-1) break; /* ** Find next lowest frequency */ for(i=0;i<=root;i++) if((hufftree[i].parent<0) && (i!=lowidx1)) if(hufftree[i].freq<lowfreq2) { lowfreq2=hufftree[i].freq; lowidx2=i; } /* ** If we could only find one item, then that ** item is surely the root, and (as above) the ** tree is done. */ if(lowidx2==-1) break; /* ** Attach the two new nodes to the current root, and ** advance the current root. */ root++; /* New root */ hufftree[lowidx1].parent=root; hufftree[lowidx2].parent=root; hufftree[root].freq=lowfreq1+lowfreq2; hufftree[root].left=lowidx1; hufftree[root].right=lowidx2; hufftree[root].parent=-2; /* Show root */ } /* ** Huffman tree built...compress the plaintext */ bitoffset=0L; /* Initialize bit offset */ for(i=0;i<arraysize;i++) { c=(int)plaintext[i]; /* Fetch character */ /* ** Build a bit string for byte c */ bitstringlen=0; while(hufftree[c].parent!=-2) { if(hufftree[hufftree[c].parent].left==c) bitstring[bitstringlen]='0'; else bitstring[bitstringlen]='1'; c=hufftree[c].parent; bitstringlen++; } /* ** Step backwards through the bit string, setting ** bits in the compressed array as you go. */ while(bitstringlen--) { SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]); bitoffset++; } } /* ** Compression done. Perform de-compression. */ maxbitoffset=bitoffset; bitoffset=0; textoffset=0; do { i=root; while(hufftree[i].left!=-1) { if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0) i=hufftree[i].left; else i=hufftree[i].right; bitoffset++; } decomparray[textoffset]=hufftree[i].c; #ifdef DEBUG if(hufftree[i].c != plaintext[textoffset]) { /* Show error */ printf("Error at textoffset %d\n",textoffset); } #endif textoffset++; } while(bitoffset<maxbitoffset); } /* End the big while(nloops--) from above */ /* ** All done */ return(StopStopwatch(elapsed)); } /*************** ** SetCompBit ** **************** ** Set a bit in the compression array. The value of the ** bit is set according to char bitchar. */ static void SetCompBit(u8 *comparray, u32 bitoffset, char bitchar) { u32 byteoffset; int bitnumb; /* ** First calculate which element in the comparray to ** alter. and the bitnumber. */ byteoffset=bitoffset>>3; bitnumb=bitoffset % 8; /* ** Set or clear */ if(bitchar=='1') comparray[byteoffset]|=(1<<bitnumb); else comparray[byteoffset]&=~(1<<bitnumb); return; } /*************** ** GetCompBit ** **************** ** Return the bit value of a bit in the comparession array. ** Returns 0 if the bit is clear, nonzero otherwise. */ static int GetCompBit(u8 *comparray, u32 bitoffset) { u32 byteoffset; int bitnumb; /* ** Calculate byte offset and bit number. */ byteoffset=bitoffset>>3; bitnumb=bitoffset % 8; /* ** Fetch */ return((1<<bitnumb) & comparray[byteoffset] ); } /******************************** ** BACK PROPAGATION NEURAL NET ** ********************************* ** This code is a modified version of the code ** that was submitted to BYTE Magazine by ** Maureen Caudill. It accomanied an article ** that I CANNOT NOW RECALL. ** The author's original heading/comment was ** as follows: ** ** Backpropagation Network ** Written by Maureen Caudill ** in Think C 4.0 on a Macintosh ** ** (c) Maureen Caudill 1988-1991 ** This network will accept 5x7 input patterns ** and produce 8 bit output patterns. ** The source code may be copied or modified without restriction, ** but no fee may be charged for its use. ** ** ++++++++++++++ ** I have modified the code so that it will work ** on systems other than a Macintosh -- RG */ /*********** ** DoNNet ** ************ ** Perform the neural net benchmark. ** Note that this benchmark is one of the few that ** requires an input file. That file is "NNET.DAT" and ** should be on the local directory (from which the ** benchmark program in launched). */ void DoNNET(void) { NNetStruct *locnnetstruct; /* Local ptr to global data */ char *errorcontext; int systemerror; ulong accumtime; double iterations; /* ** Link to global data */ locnnetstruct=&global_nnetstruct; /* ** Set error context */ errorcontext="CPU:NNET"; /* ** Init random number generator. ** NOTE: It is important that the random number generator ** be re-initialized for every pass through this test. ** The NNET algorithm uses the random number generator ** to initialize the net. Results are sensitive to ** the initial neural net state. */ randnum(3L); /* ** Read in the input and output patterns. We'll do this ** only once here at the beginning. These values don't ** change once loaded. */ if(read_data_file()!=0) ErrorExit(); /* ** See if we need to perform self adjustment loop. */ if(locnnetstruct->adjust==0) { /* ** Do self-adjustment. This involves initializing the ** # of loops and increasing the loop count until we ** get a number of loops that we can use. */ for(locnnetstruct->loops=1L; locnnetstruct->loops<MAXNNETLOOPS; locnnetstruct->loops++) { randnum(3L); if(DoNNetIteration(locnnetstruct->loops) >global_min_ticks) break; } } /* ** All's well if we get here. Do the test. */ accumtime=0L; iterations=(double)0.0; do { randnum(3L); /* Gotta do this for Neural Net */ accumtime+=DoNNetIteration(locnnetstruct->loops); iterations+=(double)locnnetstruct->loops; } while(TicksToSecs(accumtime)<locnnetstruct->request_secs); /* ** Clean up, calculate results, and go home. Be sure to ** show that we don't have to rerun adjustment code. */ locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime); if(locnnetstruct->adjust==0) locnnetstruct->adjust=1; return; } /******************** ** DoNNetIteration ** ********************* ** Do a single iteration of the neural net benchmark. ** By iteration, we mean a "learning" pass. */ static ulong DoNNetIteration(ulong nloops) { ulong elapsed; /* Elapsed time */ int patt; /* ** Run nloops learning cycles. Notice that, counted with ** the learning cycle is the weight randomization and ** zeroing of changes. This should reduce clock jitter, ** since we don't have to stop and start the clock for ** each iteration. */ elapsed=StartStopwatch(); while(nloops--) { randomize_wts(); zero_changes(); iteration_count=1; learned = F; numpasses = 0; while (learned == F) { for (patt=0; patt<numpats; patt++) { worst_error = 0.0; /* reset this every pass through data */ move_wt_changes(); /* move last pass's wt changes to momentum array */ do_forward_pass(patt); do_back_pass(patt); iteration_count++; } numpasses ++; learned = check_out_error(); } #ifdef DEBUG printf("Learned in %d passes\n",numpasses); #endif } return(StopStopwatch(elapsed)); } /************************* ** do_mid_forward(patt) ** ************************** ** Process the middle layer's forward pass ** The activation of middle layer's neurode is the weighted ** sum of the inputs from the input pattern, with sigmoid ** function applied to the inputs. **/ static void do_mid_forward(int patt) { double sum; int neurode, i; for (neurode=0;neurode<MID_SIZE; neurode++) { sum = 0.0; for (i=0; i<IN_SIZE; i++) { /* compute weighted sum of input signals */ sum += mid_wts[neurode][i]*in_pats[patt][i]; } /* ** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum */ sum = 1.0/(1.0+exp(-sum)); mid_out[neurode] = sum; } return; } /********************* ** do_out_forward() ** ********************** ** process the forward pass through the output layer ** The activation of the output layer is the weighted sum of ** the inputs (outputs from middle layer), modified by the ** sigmoid function. **/ static void do_out_forward() { double sum; int neurode, i; for (neurode=0; neurode<OUT_SIZE; neurode++) { sum = 0.0; for (i=0; i<MID_SIZE; i++) { /* ** compute weighted sum of input signals ** from middle layer */ sum += out_wts[neurode][i]*mid_out[i]; } /* ** Apply f(x) = 1/(1+exp(-x)) to weighted input */ sum = 1.0/(1.0+exp(-sum)); out_out[neurode] = sum; } return; } /************************* ** display_output(patt) ** ************************** ** Display the actual output vs. the desired output of the ** network. ** Once the training is complete, and the "learned" flag set ** to TRUE, then display_output sends its output to both ** the screen and to a text output file. ** ** NOTE: This routine has been disabled in the benchmark ** version. -- RG **/ /* void display_output(int patt) { int i; fprintf(outfile,"\n Iteration # %d",iteration_count); fprintf(outfile,"\n Desired Output: "); for (i=0; i<OUT_SIZE; i++) { fprintf(outfile,"%6.3f ",out_pats[patt][i]); } fprintf(outfile,"\n Actual Output: "); for (i=0; i<OUT_SIZE; i++) { fprintf(outfile,"%6.3f ",out_out[i]); } fprintf(outfile,"\n"); return; } */ /********************** ** do_forward_pass() ** *********************** ** control function for the forward pass through the network ** NOTE: I have disabled the call to display_output() in ** the benchmark version -- RG. **/ static void do_forward_pass(int patt) { do_mid_forward(patt); /* process forward pass, middle layer */ do_out_forward(); /* process forward pass, output layer */ /* display_output(patt); ** display results of forward pass */ return; } /*********************** ** do_out_error(patt) ** ************************ ** Compute the error for the output layer neurodes. ** This is simply Desired - Actual. **/ static void do_out_error(int patt) { int neurode; double error,tot_error, sum; tot_error = 0.0; sum = 0.0; for (neurode=0; neurode<OUT_SIZE; neurode++) { out_error[neurode] = out_pats[patt][neurode] - out_out[neurode]; /* ** while we're here, also compute magnitude ** of total error and worst error in this pass. ** We use these to decide if we are done yet. */ error = out_error[neurode]; if (error <0.0) { sum += -error; if (-error > tot_error) tot_error = -error; /* worst error this pattern */ } else { sum += error; if (error > tot_error) tot_error = error; /* worst error this pattern */ } } avg_out_error[patt] = sum/OUT_SIZE; tot_out_error[patt] = tot_error; return; } /*********************** ** worst_pass_error() ** ************************ ** Find the worst and average error in the pass and save it **/ static void worst_pass_error() { double error,sum; int i; error = 0.0; sum = 0.0; for (i=0; i<numpats; i++) { if (tot_out_error[i] > error) error = tot_out_error[i]; sum += avg_out_error[i]; } worst_error = error; average_error = sum/numpats; return; } /******************* ** do_mid_error() ** ******************** ** Compute the error for the middle layer neurodes ** This is based on the output errors computed above. ** Note that the derivative of the sigmoid f(x) is ** f'(x) = f(x)(1 - f(x)) ** Recall that f(x) is merely the output of the middle ** layer neurode on the forward pass. **/ static void do_mid_error() { double sum; int neurode, i; for (neurode=0; neurode<MID_SIZE; neurode++) { sum = 0.0; for (i=0; i<OUT_SIZE; i++) sum += out_wts[i][neurode]*out_error[i]; /* ** apply the derivative of the sigmoid here ** Because of the choice of sigmoid f(I), the derivative ** of the sigmoid is f'(I) = f(I)(1 - f(I)) */ mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum; } return; } /********************* ** adjust_out_wts() ** ********************** ** Adjust the weights of the output layer. The error for ** the output layer has been previously propagated back to ** the middle layer. ** Use the Delta Rule with momentum term to adjust the weights. **/ static void adjust_out_wts() { int weight, neurode; double learn,delta,alph; learn = BETA; alph = ALPHA; for (neurode=0; neurode<OUT_SIZE; neurode++) { for (weight=0; weight<MID_SIZE; weight++) { /* standard delta rule */ delta = learn * out_error[neurode] * mid_out[weight]; /* now the momentum term */ delta += alph * out_wt_change[neurode][weight]; out_wts[neurode][weight] += delta; /* keep track of this pass's cum wt changes for next pass's momentum */ out_wt_cum_change[neurode][weight] += delta; } } return; } /************************* ** adjust_mid_wts(patt) ** ************************** ** Adjust the middle layer weights using the previously computed ** errors. ** We use the Generalized Delta Rule with momentum term **/ static void adjust_mid_wts(int patt) { int weight, neurode; double learn,alph,delta; learn = BETA; alph = ALPHA; for (neurode=0; neurode<MID_SIZE; neurode++) { for (weight=0; weight<IN_SIZE; weight++) { /* first the basic delta rule */ delta = learn * mid_error[neurode] * in_pats[patt][weight]; /* with the momentum term */ delta += alph * mid_wt_change[neurode][weight]; mid_wts[neurode][weight] += delta; /* keep track of this pass's cum wt changes for next pass's momentum */ mid_wt_cum_change[neurode][weight] += delta; } } return; } /******************* ** do_back_pass() ** ******************** ** Process the backward propagation of error through network. **/ void do_back_pass(int patt) { do_out_error(patt); do_mid_error(); adjust_out_wts(); adjust_mid_wts(patt); return; } /********************** ** move_wt_changes() ** *********************** ** Move the weight changes accumulated last pass into the wt-change ** array for use by the momentum term in this pass. Also zero out ** the accumulating arrays after the move. **/ static void move_wt_changes() { int i,j; for (i = 0; i<MID_SIZE; i++) for (j = 0; j<IN_SIZE; j++) { mid_wt_change[i][j] = mid_wt_cum_change[i][j]; /* ** Zero it out for next pass accumulation. */ mid_wt_cum_change[i][j] = 0.0; } for (i = 0; i<OUT_SIZE; i++) for (j=0; j<MID_SIZE; j++) { out_wt_change[i][j] = out_wt_cum_change[i][j]; out_wt_cum_change[i][j] = 0.0; } return; } /********************** ** check_out_error() ** *********************** ** Check to see if the error in the output layer is below ** MARGIN*OUT_SIZE for all output patterns. If so, then ** assume the network has learned acceptably well. This ** is simply an arbitrary measure of how well the network ** has learned -- many other standards are possible. **/ static int check_out_error() { int result,i,error; result = T; error = F; worst_pass_error(); /* identify the worst error in this pass */ /* #ifdef DEBUG printf("\n Iteration # %d",iteration_count); #endif */ for (i=0; i<numpats; i++) { /* printf("\n Error pattern %d: Worst: %8.3f; Average: %8.3f", i+1,tot_out_error[i], avg_out_error[i]); fprintf(outfile, "\n Error pattern %d: Worst: %8.3f; Average: %8.3f", i+1,tot_out_error[i]); */ if (worst_error >= STOP) result = F; if (tot_out_error[i] >= 16.0) error = T; } if (error == T) result = ERR; #ifdef DEBUG /* printf("\n Error this pass thru data: Worst: %8.3f; Average: %8.3f", worst_error,average_error); */ /* fprintf(outfile, "\n Error this pass thru data: Worst: %8.3f; Average: %8.3f", worst_error, average_error); */ #endif return(result); } /******************* ** zero_changes() ** ******************** ** Zero out all the wt change arrays **/ static void zero_changes() { int i,j; for (i = 0; i<MID_SIZE; i++) { for (j=0; j<IN_SIZE; j++) { mid_wt_change[i][j] = 0.0; mid_wt_cum_change[i][j] = 0.0; } } for (i = 0; i< OUT_SIZE; i++) { for (j=0; j<MID_SIZE; j++) { out_wt_change[i][j] = 0.0; out_wt_cum_change[i][j] = 0.0; } } return; } /******************** ** randomize_wts() ** ********************* ** Intialize the weights in the middle and output layers to ** random values between -0.25..+0.25 ** Function rand() returns a value between 0 and 32767. ** ** NOTE: Had to make alterations to how the random numbers were ** created. -- RG. **/ static void randomize_wts() { int neurode,i; double value; /* ** Following not used int benchmark version -- RG ** ** printf("\n Please enter a random number seed (1..32767): "); ** scanf("%d", &i); ** srand(i); */ for (neurode = 0; neurode<MID_SIZE; neurode++) { for(i=0; i<IN_SIZE; i++) { value=(double)abs_randwc(100000L); value=value/(double)100000.0 - (double) 0.5; mid_wts[neurode][i] = value/2; } } for (neurode=0; neurode<OUT_SIZE; neurode++) { for(i=0; i<MID_SIZE; i++) { value=(double)abs_randwc(100000L); value=value/(double)10000.0 - (double) 0.5; out_wts[neurode][i] = value/2; } } return; } /********************* ** read_data_file() ** ********************** ** Read in the input data file and store the patterns in ** in_pats and out_pats. ** The format for the data file is as follows: ** ** line# data expected ** ----- ------------------------------ ** 1 In-X-size,in-y-size,out-size ** 2 number of patterns in file ** 3 1st X row of 1st input pattern ** 4.. following rows of 1st input pattern pattern ** in-x+2 y-out pattern ** 1st X row of 2nd pattern ** etc. ** ** Each row of data is separated by commas or spaces. ** The data is expected to be ascii text corresponding to ** either a +1 or a 0. ** ** Sample input for a 1-pattern file (The comments to the ** right may NOT be in the file unless more sophisticated ** parsing of the input is done.): ** ** 5,7,8 input is 5x7 grid, output is 8 bits ** 1 one pattern in file ** 0,1,1,1,0 beginning of pattern for "O" ** 1,0,0,0,1 ** 1,0,0,0,1 ** 1,0,0,0,1 ** 1,0,0,0,1 ** 1,0,0,0,0 ** 0,1,1,1,0 ** 0,1,0,0,1,1,1,1 ASCII code for "O" -- 0100 1111 ** ** Clearly, this simple scheme can be expanded or enhanced ** any way you like. ** ** Returns -1 if any file error occurred, otherwise 0. **/ static int read_data_file() { FILE *infile; int xinsize,yinsize,youtsize; int patt, element, i, row; int vals_read; int val1,val2,val3,val4,val5,val6,val7,val8; /* printf("\n Opening and retrieving data from file."); */ infile = fopen(inpath, "r"); if (infile == NULL) { printf("\n CPU:NNET--error in opening file!"); return -1 ; } vals_read =fscanf(infile,"%d %d %d",&xinsize,&yinsize,&youtsize); if (vals_read != 3) { printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read); return -1; } vals_read=fscanf(infile,"%d",&numpats); if (vals_read !=1) { printf("\n CPU:NNET -- Should read 1 item in line 2; did read &d",vals_read); return -1; } if (numpats > MAXPATS) numpats = MAXPATS; for (patt=0; patt<numpats; patt++) { element = 0; for (row = 0; row<yinsize; row++) { vals_read = fscanf(infile,"%d %d %d %d %d", &val1, &val2, &val3, &val4, &val5); if (vals_read != 5) { printf ("\n CPU:NNET -- failure in reading input!"); return -1; } element=row*xinsize; in_pats[patt][element] = (double) val1; element++; in_pats[patt][element] = (double) val2; element++; in_pats[patt][element] = (double) val3; element++; in_pats[patt][element] = (double) val4; element++; in_pats[patt][element] = (double) val5; element++; } for (i=0;i<IN_SIZE; i++) { if (in_pats[patt][i] >= 0.9) in_pats[patt][i] = 0.9; if (in_pats[patt][i] <= 0.1) in_pats[patt][i] = 0.1; } element = 0; vals_read = fscanf(infile,"%d %d %d %d %d %d %d %d", &val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8); out_pats[patt][element] = (double) val1; element++; out_pats[patt][element] = (double) val2; element++; out_pats[patt][element] = (double) val3; element++; out_pats[patt][element] = (double) val4; element++; out_pats[patt][element] = (double) val5; element++; out_pats[patt][element] = (double) val6; element++; out_pats[patt][element] = (double) val7; element++; out_pats[patt][element] = (double) val8; element++; } /* printf("\n Closing the input file now. "); */ fclose(infile); return(0); } /********************* ** initialize_net() ** ********************** ** Do all the initialization stuff before beginning */ static int initialize_net() { int err_code; randomize_wts(); zero_changes(); err_code = read_data_file(); iteration_count = 1; return(err_code); } /********************** ** display_mid_wts() ** *********************** ** Display the weights on the middle layer neurodes ** NOTE: This routine is not used in the benchmark ** test -- RG **/ /* static void display_mid_wts() { int neurode, weight, row, col; fprintf(outfile,"\n Weights of Middle Layer neurodes:"); for (neurode=0; neurode<MID_SIZE; neurode++) { fprintf(outfile,"\n Mid Neurode # %d",neurode); for (row=0; row<IN_Y_SIZE; row++) { fprintf(outfile,"\n "); for (col=0; col<IN_X_SIZE; col++) { weight = IN_X_SIZE * row + col; fprintf(outfile," %8.3f ", mid_wts[neurode][weight]); } } } return; } */ /********************** ** display_out_wts() ** *********************** ** Display the weights on the output layer neurodes ** NOTE: This code is not used in the benchmark ** test -- RG */ /* void display_out_wts() { int neurode, weight; fprintf(outfile,"\n Weights of Output Layer neurodes:"); for (neurode=0; neurode<OUT_SIZE; neurode++) { fprintf(outfile,"\n Out Neurode # %d \n",neurode); for (weight=0; weight<MID_SIZE; weight++) { fprintf(outfile," %8.3f ", out_wts[neurode][weight]); } } return; } */ /*********************** ** LU DECOMPOSITION ** ** (Linear Equations) ** ************************ ** These routines come from "Numerical Recipes in Pascal". ** Note that, as in the assignment algorithm, though we ** separately define LUARRAYROWS and LUARRAYCOLS, the two ** must be the same value (this routine depends on a square ** matrix). */ /********* ** DoLU ** ********** ** Perform the LU decomposition benchmark. */ void DoLU(void) { LUStruct *loclustruct; /* Local pointer to global data */ char *errorcontext; int systemerror; fardouble *a; fardouble *b; fardouble *abase; fardouble *bbase; LUdblptr ptra; int n; int i; ulong accumtime; double iterations; /* ** Link to global data */ loclustruct=&global_lustruct; /* ** Set error context. */ errorcontext="FPU:LU"; /* ** Our first step is to build a "solvable" problem. This ** will become the "seed" set that all others will be ** derived from. (I.E., we'll simply copy these arrays ** into the others. */ a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS, &systemerror); b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS, &systemerror); n=LUARRAYROWS; /* ** We need to allocate a temp vector that is used by the LU ** algorithm. This removes the allocation routine from the ** timing. */ LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS, &systemerror); /* ** Build a problem to be solved. */ ptra.ptrs.p=a; /* Gotta coerce linear array to 2D array */ build_problem(*ptra.ptrs.ap,n,b); /* ** Now that we have a problem built, see if we need to do ** auto-adjust. If so, repeatedly call the DoLUIteration routine, ** increasing the number of solutions per iteration as you go. */ if(loclustruct->adjust==0) { loclustruct->numarrays=0; for(i=1;i<=MAXLUARRAYS;i++) { abase=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL); ErrorExit(); } bbase=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS*(i+1),&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); LUFreeMem(a,b,abase,(fardouble *)NULL); ErrorExit(); } if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks) { loclustruct->numarrays=i; break; } /* ** Not enough arrays...free them all and try again */ FreeMemory((farvoid *)abase,&systemerror); FreeMemory((farvoid *)bbase,&systemerror); } /* ** Were we able to do it? */ if(loclustruct->numarrays==0) { printf("FPU:LU -- Array limit reached\n"); LUFreeMem(a,b,abase,bbase); ErrorExit(); } } else { /* ** Don't need to adjust -- just allocate the proper ** number of arrays and proceed. */ abase=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays, &systemerror); if(systemerror) { ReportError(errorcontext,systemerror); LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL); ErrorExit(); } bbase=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS*loclustruct->numarrays,&systemerror); if(systemerror) { ReportError(errorcontext,systemerror); LUFreeMem(a,b,abase,(fardouble *)NULL); ErrorExit(); } } /* ** All's well if we get here. Do the test. */ accumtime=0L; iterations=(double)0.0; do { accumtime+=DoLUIteration(a,b,abase,bbase, loclustruct->numarrays); iterations+=(double)loclustruct->numarrays; } while(TicksToSecs(accumtime)<loclustruct->request_secs); /* ** Clean up, calculate results, and go home. Be sure to ** show that we don't have to rerun adjustment code. */ loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime); if(loclustruct->adjust==0) loclustruct->adjust=1; LUFreeMem(a,b,abase,bbase); return; } /************** ** LUFreeMem ** *************** ** Release memory associated with LU benchmark. */ static void LUFreeMem(fardouble *a, fardouble *b, fardouble *abase,fardouble *bbase) { int systemerror; FreeMemory((farvoid *)a,&systemerror); FreeMemory((farvoid *)b,&systemerror); FreeMemory((farvoid *)LUtempvv,&systemerror); if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror); if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror); return; } /****************** ** DoLUIteration ** ******************* ** Perform an iteration of the LU decomposition benchmark. ** An iteration refers to the repeated solution of several ** identical matrices. */ static ulong DoLUIteration(fardouble *a,fardouble *b, fardouble *abase, fardouble *bbase, ulong numarrays) { fardouble *locabase; fardouble *locbbase; LUdblptr ptra; /* For converting ptr to 2D array */ ulong elapsed; ulong j,i; /* Indexes */ /* ** Move the seed arrays (a & b) into the destination ** arrays; */ for(j=0;j<numarrays;j++) { locabase=abase+j*LUARRAYROWS*LUARRAYCOLS; locbbase=bbase+j*LUARRAYROWS; for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++) *(locabase+i)=*(a+i); for(i=0;i<LUARRAYROWS;i++) *(locbbase+i)=*(b+i); } /* ** Do test...begin timing. */ elapsed=StartStopwatch(); for(i=0;i<numarrays;i++) { locabase=abase+i*LUARRAYROWS*LUARRAYCOLS; locbbase=bbase+i*LUARRAYROWS; ptra.ptrs.p=locabase; lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase); } return(StopStopwatch(elapsed)); } /****************** ** build_problem ** ******************* ** Constructs a solvable set of linear equations. It does this by ** creating an identity matrix, then loading the solution vector ** with random numbers. After that, the identity matrix and ** solution vector are randomly "scrambled". Scrambling is ** done by (a) randomly selecting a row and multiplying that ** row by a random number and (b) adding one randomly-selected ** row to another. */ static void build_problem(double a[][LUARRAYCOLS], int n, double b[LUARRAYROWS]) { long i,j,k,k1; /* Indexes */ double rcon; /* Random constant */ /* ** Reset random number generator */ randnum(13L); /* ** Build an identity matrix. ** We'll also use this as a chance to load the solution ** vector. */ for(i=0;i<n;i++) { b[i]=(double)(abs_randwc(100L)+1L); for(j=0;j<n;j++) if(i==j) a[i][j]=(double)(abs_randwc(1000L)+1L); else a[i][j]=(double)0.0; } #ifdef DEBUG for(i=0;i<n;i++) { for(j=0;j<n;j++) printf("%6.2f ",a[i][j]); printf(":: %6.2f\n",b[i]); } #endif /* ** Scramble. Do this 8n times. See comment above for ** a description of the scrambling process. */ for(i=0;i<8*n;i++) { /* ** Pick a row and a random constant. Multiply ** all elements in the row by the constant. */ /* k=abs_randwc((long)n); rcon=(double)(abs_randwc(20L)+1L); for(j=0;j<n;j++) a[k][j]=a[k][j]*rcon; b[k]=b[k]*rcon; */ /* ** Pick two random rows and add second to ** first. Note that we also occasionally multiply ** by minus 1 so that we get a subtraction operation. */ k=abs_randwc((long)n); k1=abs_randwc((long)n); if(k!=k1) { if(k<k1) rcon=(double)1.0; else rcon=(double)-1.0; for(j=0;j<n;j++) a[k][j]+=a[k1][j]*rcon;; b[k]+=b[k1]*rcon; } } return; } /*********** ** ludcmp ** ************ ** From the procedure of the same name in "Numerical Recipes in Pascal", ** by Press, Flannery, Tukolsky, and Vetterling. ** Given an nxn matrix a[], this routine replaces it by the LU ** decomposition of a rowwise permutation of itself. a[] and n ** are input. a[] is output, modified as follows: ** -- -- ** | b(1,1) b(1,2) b(1,3)... | ** | a(2,1) b(2,2) b(2,3)... | ** | a(3,1) a(3,2) b(3,3)... | ** | a(4,1) a(4,2) a(4,3)... | ** | ... | ** -- -- ** ** Where the b(i,j) elements form the upper triangular matrix of the ** LU decomposition, and the a(i,j) elements form the lower triangular ** elements. The LU decomposition is calculated so that we don't ** need to store the a(i,i) elements (which would have laid along the ** diagonal and would have all been 1). ** ** indx[] is an output vector that records the row permutation ** effected by the partial pivoting; d is output as +/-1 depending ** on whether the number of row interchanges was even or odd, ** respectively. ** Returns 0 if matrix singular, else returns 1. */ static int ludcmp(double a[][LUARRAYCOLS], int n, int indx[], int *d) { double big; /* Holds largest element value */ double sum; double dum; /* Holds dummy value */ int i,j,k; /* Indexes */ int imax; /* Holds max index value */ double tiny; /* A really small number */ int systemerror; tiny=(double)1.0e-20; *d=1; /* No interchanges yet */ for(i=0;i<n;i++) { big=(double)0.0; for(j=0;j<n;j++) if((double)fabs(a[i][j]) > big) big=fabs(a[i][j]); /* Bail out on singular matrix */ if(big==(double)0.0) return(0); LUtempvv[i]=1.0/big; } /* ** Crout's algorithm...loop over columns. */ for(j=0;j<n;j++) { if(j!=0) for(i=0;i<j;i++) { sum=a[i][j]; if(i!=0) for(k=0;k<i;k++) sum-=(a[i][k]*a[k][j]); a[i][j]=sum; } big=(double)0.0; for(i=j;i<n;i++) { sum=a[i][j]; if(j!=0) for(k=0;k<j;k++) sum-=a[i][k]*a[k][j]; a[i][j]=sum; dum=LUtempvv[i]*fabs(sum); if(dum>=big) { big=dum; imax=i; } } if(j!=imax) /* Interchange rows if necessary */ { for(k=0;k<n;k++) { dum=a[imax][k]; a[imax][k]=a[j][k]; a[j][k]=dum; } *d=-*d; /* Change parity of d */ dum=LUtempvv[imax]; LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */ LUtempvv[j]=dum; } indx[j]=imax; /* ** If the pivot element is zero, the matrix is singular ** (at least as far as the precision of the machine ** is concerned.) We'll take the original author's ** recommendation and replace 0.0 with "tiny". */ if(a[j][j]==(double)0.0) a[j][j]=tiny; if(j!=(n-1)) { dum=1.0/a[j][j]; for(i=j+1;i<n;i++) a[i][j]=a[i][j]*dum; } } return(1); } /*********** ** lubksb ** ************ ** Also from "Numerical Recipes in Pascal". ** This routine solves the set of n linear equations A X = B. ** Here, a[][] is input, not as the matrix A, but as its ** LU decomposition, created by the routine ludcmp(). ** Indx[] is input as the permutation vector returned by ludcmp(). ** b[] is input as the right-hand side an returns the ** solution vector X. ** a[], n, and indx are not modified by this routine and ** can be left in place for different values of b[]. ** This routine takes into account the possibility that b will ** begin with many zero elements, so it is efficient for use in ** matrix inversion. */ static void lubksb( double a[][LUARRAYCOLS], int n, int indx[LUARRAYROWS], double b[LUARRAYROWS]) { int i,j; /* Indexes */ int ip; /* "pointer" into indx */ int ii; double sum; /* ** When ii is set to a positive value, it will become ** the index of the first nonvanishing element of b[]. ** We now do the forward substitution. The only wrinkle ** is to unscramble the permutation as we go. */ ii=-1; for(i=0;i<n;i++) { ip=indx[i]; sum=b[ip]; b[ip]=b[i]; if(ii!=-1) for(j=ii;j<i;j++) sum=sum-a[i][j]*b[j]; else /* ** If a nonzero element is encountered, we have ** to do the sums in the loop above. */ if(sum!=(double)0.0) ii=i; b[i]=sum; } /* ** Do backsubstitution */ for(i=(n-1);i>=0;i--) { sum=b[i]; if(i!=(n-1)) for(j=(i+1);j<n;j++) sum=sum-a[i][j]*b[j]; b[i]=sum/a[i][i]; } return; } /************ ** lusolve ** ************* ** Solve a linear set of equations: A x = b ** Original matrix A will be destroyed by this operation. ** Returns 0 if matrix is singular, 1 otherwise. */ static int lusolve(double a[][LUARRAYCOLS], int n, double b[LUARRAYROWS]) { int indx[LUARRAYROWS]; int d; int i,j; if(ludcmp(a,n,indx,&d)==0) return(0); /* Matrix not singular -- proceed */ lubksb(a,n,indx,b); #ifdef DEBUG for(i=0;i<n;i++) { for(j=0;j<n;j++) printf("%6.2f ",a[i][j]); */ printf("%6.2f ",b[i]); } printf("\n"); #endif return(1); }