rtsi.com

home *** CD-ROM | disk | FTP | other *** search

/ rtsi.com / 2014.01.www.rtsi.com.tar / www.rtsi.com / OS9 / BENCHMARKS / flops.c < prev next >

Wrap

C/C++ Source or Header | 2009-11-06 | 14KB | 507 lines

/*****************************/ /* FLOPS.c */ /* Version 1.2, 29 Feb 1992 */ /* Al Aburto */ /* 'ala' on BIX */ /* aburto@marlin.nosc.mil */ /*****************************/ /* FLOPS.c is a 'c' program which attempts to estimate your systems scalar floating-point 'MFLOP' rating for the scalar +, -, *, and / operations based on a specific 'instruction mix'. It attempts to measure the time for an FADD and the time to do an empty 'for' loop in microseconds. It calculates pi based on the series expansion for atan(1.0). A few correction terms are applied to achieve at most a 16 digit accurate result. It calculates the area under the sin(x) curve from 0 to pi/3 using the Trapazoidal rule. Sin(x) is approximated by a polynomial series accurate to approximately 1.0e-14 over the specified range. The program is designed for double precision. The program does not work with arrays and thus it is not an appropriate program for an important group of processors (array, vector, and possibly other type machines). It gives a Scalar (one dimensional) MFLOP rating, not an N-Scalar or vector MFLOP rating. It was originally intended for PC's but it is applicable for a wide variety of different machines. The Scalar MFLOPS rating is based on a count of 52 total scalar floating-point operations per loop (iteration). The instruction mix is: FADD: 40.38, FSUB: 23.31, FMUL: 26.92, and FDIV: 9.62 percent. This is based on instruction mix data found in the Dhrystone paper by Reinhold P. Weicker, Communications of the ACM, Oct 1984, Vol 27, Number 10, Page 1013 (Table IV). This is the best available information I found in which to base the instruction mix. Version 1.2 corrects some problems encountered with Version 1.1. A timer routine for UNIX using 'getrusage()' has been added by Markku Kolkka of Tampere University of Technology, Finland. This eliminates problems with the definition of 'HZ' in the old routine (UNIX_Old). I eliminated the 'w' sum in the second loop as it did nothing. The FADD time is now estimated (the printf is commented out, but you can uncomment it to see what it says) and I use it solely to estimate the number of loops to conduct: 20 million loops when the program estimates the FADD is faster than 1/3 usec, 0.1 million loops for FADD slower than 4 usec, and 1 million loops otherwise. If the number of loops conducted is not properly related to 'loops' then the program prints out a message saying the program and results are invalid. Bo Thide' of the Swedish Institute of Space Physics, Sweden caught an error in the value for 'piref' and that has been corrected. The time to do an empty 'for' loop is also estimated somewhat better since there are now automatic longer loops. The V1.2 results are not expected to show any significant difference relative to the V1.1 results. I verified this with several machines running in the 1 to 6 MFLOP range, but you might want to recheck the faster machines. The results do vary even when doing 20 million loops. The maximum standard deviation I observed in the systems I checked was 0.07 MFLOP at the 6 MFLOP range. See the next page for 'register' and 'timer' options available in the program. Example UNIX compilation is: 'cc -DUNIX -O2 flops.c -o flops', or 'cc -DUNIX -DROPT flops.c -o flops', ... , etc. NOTE: Please do not remove any of the printouts. Al Aburto */ #include <stdio.h> #include <math.h> /* 'Uncomment' the line below to run */ /* with 'register double' variables */ /* defined, or compile with the */ /* '-DROPT' option. Don't need this if */ /* registers used automatically. */ /* #define ROPT */ /* 'Uncomment' one of the statements */ /* below to access the right timer */ /* routine for your system, or compile */ /* with '-DUNIX' (for example). You */ /* may need to write your own similar */ /* timer routine if you have another */ /* system or compiler not covered by */ /* the options below. I have not tested*/ /* the 'MSC' option. */ /* #define Amiga */ /* #define UNIX */ /* #define UNIX_Old */ /* #define TURBO_C */ /* #define MSC */ #ifdef Amiga #include <ctype.h> #define HZ 50 #endif #ifdef _OSK #include <time.h> clock_t tnow; #endif #ifdef UNIX #include <sys/time.h> #include <sys/resource.h> struct rusage rusage; #endif #ifdef UNIX_Old #include <sys/types.h> #include <sys/times.h> #include <sys/param.h> #ifndef HZ #define HZ 60 #endif struct tms tms; #endif #ifdef TURBO_C #include <ctype.h> #include <dos.h> #include <time.h> #define HZ 100 struct time now; #endif #ifdef MSC #include <time.h> #include <ctype.h> #define HZ CLK_TCK clock_t tnow; #endif double TimeArray[3]; /* Time Array needed for 'dtime()'. */ double T[25]; /* Global Array used to hold timing */ /* results and other information. */ double sa,sb,sc,sd,one,two,three; double four,five,piref,piprg; double pierr,delta; double A0 = 1.0; double A1 =-0.1666666666671334; double A2 = 0.833333333809067E-2; double A3 = 0.198412715551283E-3; double A4 = 0.27557589750762E-5; double A5 = 0.2507059876207E-7; double A6 = 0.164105986683E-9; void main() { #ifdef ROPT register double s,u,v,w,x; #else double s,u,v,w,x; #endif long i, loops, m, n; printf("\n"); printf(" FLOPS C Program (Double Precision)\n"); printf(" Version 1.2, 29 Feb 1992\n\n"); /******************************/ loops = 1000000; /* Number of loops conducted. */ /******************************/ /****************************************************/ /* Set Global Variable Values. T[24] References */ /* All Timing Results Relative To 1 Million Loops. */ /****************************************************/ T[24] = 1.0e+06/(double)loops; piref = 3.14159265358979324; one = 1.0; two = 2.0; three= 3.0; four = 4.0; five = 5.0; sd = one; /**********************************/ /* Module 1. Estimate Loop time */ /* and FADD time. */ /**********************************/ u = 0.0; /*********************/ v = one; /* Loop 1. */ /*********************/ dtime(TimeArray); for( i = 1 ; i<= loops ; i++ ) { u = u + v; } dtime(TimeArray); T[19] = T[24] * TimeArray[1]; m = (long)u; /*********************/ u = 0.0; /* Loop 2. */ s = 0.0; /*********************/ dtime(TimeArray); for( i = 1 ; i<= m ; i++ ) { u = u + v; s = s + u; } dtime(TimeArray); T[20] = T[24] * TimeArray[1]; /********************/ T[21] = two * T[19] - T[20]; /* Loop Time (usec) */ if ( T[21] < 0.0 ) T[21] = 0.0; /* First Estimate. */ /********************/ n = (long)( two * ( s / u ) - one ); /********************/ T[22] = T[20] - T[19]; /* FADD Time (usec) */ if ( T[22] < 0.0 ) T[22] = 0.0; /* First Estimate. */ /********************/ m = n; if ( T[22] < (one / three) ) { sd = one / 20.0; m = 20 * n; T[24] = 1.0e+06/(double)m; } if ( T[22] > four ) { sd = 10.0; m = n / 10; T[24] = 1.0e+06/(double)m; } /*********************/ u = 0.0; /* Loop 3. */ /*********************/ dtime(TimeArray); for( i = 1 ; i<= m ; i++ ) { u = u + v; } dtime(TimeArray); T[1] = T[24] * TimeArray[1]; m = (long)u; /*********************/ u = 0.0; /* Loop 4. */ s = 0.0; /*********************/ dtime(TimeArray); for( i = 1 ; i<= m ; i++ ) { u = u + v; s = s + u; } dtime(TimeArray); T[2] = T[24] * TimeArray[1]; /********************/ T[3] = two * T[1] - T[2]; /* Loop Time (usec) */ if ( T[3] < 0.0 ) T[3] = 0.0; /* Second Estimate. */ /********************/ m = (long)( two * ( s / u ) - v ); /********************/ T[23] = T[2] - T[1]; /* FADD Time (usec) */ if ( T[23] < 0.0 ) T[23] = 0.0; /* Second Estimate. */ /********************/ T[3] = ( T[3] + T[21] ) / two; T[23] = ( T[23] + T[22] ) / two; /* printf(" Loop Time (usec) = %10.4lf\n",T[3]); printf(" FADD Time (usec) = %10.4lf\n\n",T[23]); */ /*******************************************************/ /* Module 2. Calculate Value Of PI From Taylor Series */ /* Expansion Of atan(1.0). There Are 7 */ /* Double Precision Operations Per Loop */ /* ( 3 +, 2 -, 1 *, and 1 / ) That are */ /* Included in The Timing. */ /*******************************************************/ s =-five; /********************/ sa =-one; /* Loop 5. */ /********************/ dtime(TimeArray); for ( i = 1 ; i <= m ; i++ ) { s =-s; sa = sa + s; } dtime(TimeArray); T[4] = T[24] * TimeArray[1]; if ( T[4] < 0.0 ) T[4] = 0.0; sc = (double)m; u = sa; /*********************/ v = 0.0; /* Loop 6. */ w = 0.0; /*********************/ x = 0.0; dtime(TimeArray); for ( i = 1 ; i <= m ; i++) { s =-s; sa = sa + s; u = u + two; x = x +(s - u); v = v - s * u; w = w + s / u; } dtime(TimeArray); T[5] = T[24] * TimeArray[1]; T[6] = T[5] - T[4]; /*********************/ m = (long)( sa * x / sc ); /* PI Results */ sa = four * w / five; /*********************/ sb = sa + five / v; sc = 31.25; piprg = sb - sc / (v * v * v); pierr = piprg - piref; /*********************/ /* DO NOT REMOVE */ /* THESE PRINTOUTS! */ /*********************/ printf(" PI: Program = %20.17lf\n",piprg); printf(" PI: Reference = %20.17lf\n",piref); printf(" PI: Error =%13.4le\n\n",pierr); /*******************************************************/ /* Module 3. Calculate Area Under sin(x) Curve From */ /* 0.0 To PI/3.0 Using Trapazoidal Method. */ /* Result is 0.5 . There Are 17 Double */ /* Precision Operations Per Loop ( 6 +, 2 -,*/ /* 9 *, and 0 / ) Included In The Timing. */ /*******************************************************/ delta = piref / ( three * (double)m ); /*********************/ s = 0.0; /* Loop 7. */ v = 0.0; /*********************/ dtime(TimeArray); for( i = 1 ; i <= m ; i++ ) { v = v + one; u = v * delta; w = u * u; s = s + u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+A0); } dtime(TimeArray); T[7] = T[24] * TimeArray[1]; u = piref / three; w = u * u; sa = u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+A0) / two; m = (long)v; /*********************/ sa = delta * ( s - sa ); /* Area Results. */ sb = 0.5; /*********************/ sc = sa - sb; /*********************/ /* DO NOT REMOVE */ printf(" Area: Program = %20.17lf\n",sa); /* THESE PRINTOUTS! */ printf(" Area: Reference = %20.17lf\n",sb); /*********************/ printf(" Area: Error =%13.4le\n\n",sc); T[8] = T[7] - T[3]; T[9] = ( five * T[6] + T[8] ) / 52.0; T[10] = one / T[9]; /*********************/ /* DO NOT REMOVE */ /* THESE PRINTOUTS! */ /*********************/ printf(" Iterations = %9ld\n",m); printf(" BenchTime(usec) = %9.4lf\n",T[9]); printf(" Scalar MFLOPS = %9.4lf\n\n",T[10]); x = sd * (double)m; if ( (long)x != loops ) { printf(" Results are bogus! It appears that an optimization\n"); printf(" was done which invalidates the program and results.\n\n"); } } /********************************************************/ /* dtime () outputs the elapsed time in p[1] from the */ /* first call of dtime() to the second call of dtime(). */ /********************************************************/ #ifdef Amiga dtime(p) double p[]; { double q; struct tt { long days; long minutes; long ticks; } tt; q = p[2]; DateStamp(&tt); p[2] = ( (double)(tt.ticks + (tt.minutes * 60L * 50L)) ) / (double)HZ; p[1] = p[2] - q; return 0; } #endif #ifdef UNIX dtime(p) double p[]; { double q; q = p[2]; getrusage(RUSAGE_SELF,&rusage); p[2] = (double)(rusage.ru_utime.tv_sec); p[2] = p[2] + (double)(rusage.ru_utime.tv_usec) / 1.0e+06; p[1] = p[2] - q; return 0; } #endif #ifdef UNIX_Old dtime(p) double p[]; { double q; q = p[2]; times(&tms); p[2] = (double)(tms.tms_utime) / (double)HZ; p[1] = p[2] - q; return 0; } #endif #ifdef TURBO_C dtime(p) double p[]; { double q; q = p[2]; gettime(&now); p[2] = 60.0 * (double)(now.ti_min); p[2] = p[2] + (double)(now.ti_sec); p[2] = p[2] + (double)(now.ti_hund)/(double)HZ; p[1] = p[2] - q; return 0; } #endif #ifdef MSC dtime(p) double p[]; { double q; q = p[2]; tnow = clock(); p[2] = (double)tnow / (double)HZ; p[1] = p[2] - q; return 0; } #endif #ifdef _OSK dtime(p) double p[]; { double q; q = p[2]; tnow = clock(); p[2] = (double)tnow / (double)CLK_TCK; p[1] = p[2] - q; return 0; } #endif /*------------------------ End flops.c code ---------------------------*/