home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
rtsi.com
/
2014.01.www.rtsi.com.tar
/
www.rtsi.com
/
OS9
/
BENCHMARKS
/
flops.c
< prev
next >
Wrap
C/C++ Source or Header
|
2009-11-06
|
14KB
|
507 lines
/*****************************/
/* FLOPS.c */
/* Version 1.2, 29 Feb 1992 */
/* Al Aburto */
/* 'ala' on BIX */
/* aburto@marlin.nosc.mil */
/*****************************/
/*
FLOPS.c is a 'c' program which attempts to estimate your systems scalar
floating-point 'MFLOP' rating for the scalar +, -, *, and / operations
based on a specific 'instruction mix'. It attempts to measure the time
for an FADD and the time to do an empty 'for' loop in microseconds. It
calculates pi based on the series expansion for atan(1.0). A few
correction terms are applied to achieve at most a 16 digit accurate
result. It calculates the area under the sin(x) curve from 0 to pi/3
using the Trapazoidal rule. Sin(x) is approximated by a polynomial
series accurate to approximately 1.0e-14 over the specified range. The
program is designed for double precision. The program does not work
with arrays and thus it is not an appropriate program for an important
group of processors (array, vector, and possibly other type machines).
It gives a Scalar (one dimensional) MFLOP rating, not an N-Scalar or
vector MFLOP rating. It was originally intended for PC's but it is
applicable for a wide variety of different machines.
The Scalar MFLOPS rating is based on a count of 52 total scalar
floating-point operations per loop (iteration). The instruction mix is:
FADD: 40.38, FSUB: 23.31, FMUL: 26.92, and FDIV: 9.62 percent. This is
based on instruction mix data found in the Dhrystone paper by Reinhold
P. Weicker, Communications of the ACM, Oct 1984, Vol 27, Number 10,
Page 1013 (Table IV). This is the best available information I found in
which to base the instruction mix.
Version 1.2 corrects some problems encountered with Version 1.1. A timer
routine for UNIX using 'getrusage()' has been added by Markku Kolkka of
Tampere University of Technology, Finland. This eliminates problems with
the definition of 'HZ' in the old routine (UNIX_Old). I eliminated the
'w' sum in the second loop as it did nothing. The FADD time is now
estimated (the printf is commented out, but you can uncomment it to see
what it says) and I use it solely to estimate the number of loops to
conduct: 20 million loops when the program estimates the FADD is faster
than 1/3 usec, 0.1 million loops for FADD slower than 4 usec, and
1 million loops otherwise. If the number of loops conducted is not
properly related to 'loops' then the program prints out a message saying
the program and results are invalid. Bo Thide' of the Swedish Institute
of Space Physics, Sweden caught an error in the value for 'piref' and
that has been corrected. The time to do an empty 'for' loop is also
estimated somewhat better since there are now automatic longer loops.
The V1.2 results are not expected to show any significant difference
relative to the V1.1 results. I verified this with several machines
running in the 1 to 6 MFLOP range, but you might want to recheck the
faster machines. The results do vary even when doing 20 million loops.
The maximum standard deviation I observed in the systems I checked was
0.07 MFLOP at the 6 MFLOP range.
See the next page for 'register' and 'timer' options available in the
program. Example UNIX compilation is: 'cc -DUNIX -O2 flops.c -o flops',
or 'cc -DUNIX -DROPT flops.c -o flops', ... , etc.
NOTE: Please do not remove any of the printouts.
Al Aburto
*/
#include <stdio.h>
#include <math.h>
/* 'Uncomment' the line below to run */
/* with 'register double' variables */
/* defined, or compile with the */
/* '-DROPT' option. Don't need this if */
/* registers used automatically. */
/* #define ROPT */
/* 'Uncomment' one of the statements */
/* below to access the right timer */
/* routine for your system, or compile */
/* with '-DUNIX' (for example). You */
/* may need to write your own similar */
/* timer routine if you have another */
/* system or compiler not covered by */
/* the options below. I have not tested*/
/* the 'MSC' option. */
/* #define Amiga */
/* #define UNIX */
/* #define UNIX_Old */
/* #define TURBO_C */
/* #define MSC */
#ifdef Amiga
#include <ctype.h>
#define HZ 50
#endif
#ifdef _OSK
#include <time.h>
clock_t tnow;
#endif
#ifdef UNIX
#include <sys/time.h>
#include <sys/resource.h>
struct rusage rusage;
#endif
#ifdef UNIX_Old
#include <sys/types.h>
#include <sys/times.h>
#include <sys/param.h>
#ifndef HZ
#define HZ 60
#endif
struct tms tms;
#endif
#ifdef TURBO_C
#include <ctype.h>
#include <dos.h>
#include <time.h>
#define HZ 100
struct time now;
#endif
#ifdef MSC
#include <time.h>
#include <ctype.h>
#define HZ CLK_TCK
clock_t tnow;
#endif
double TimeArray[3]; /* Time Array needed for 'dtime()'. */
double T[25]; /* Global Array used to hold timing */
/* results and other information. */
double sa,sb,sc,sd,one,two,three;
double four,five,piref,piprg;
double pierr,delta;
double A0 = 1.0;
double A1 =-0.1666666666671334;
double A2 = 0.833333333809067E-2;
double A3 = 0.198412715551283E-3;
double A4 = 0.27557589750762E-5;
double A5 = 0.2507059876207E-7;
double A6 = 0.164105986683E-9;
void main()
{
#ifdef ROPT
register double s,u,v,w,x;
#else
double s,u,v,w,x;
#endif
long i, loops, m, n;
printf("\n");
printf(" FLOPS C Program (Double Precision)\n");
printf(" Version 1.2, 29 Feb 1992\n\n");
/******************************/
loops = 1000000; /* Number of loops conducted. */
/******************************/
/****************************************************/
/* Set Global Variable Values. T[24] References */
/* All Timing Results Relative To 1 Million Loops. */
/****************************************************/
T[24] = 1.0e+06/(double)loops;
piref = 3.14159265358979324;
one = 1.0;
two = 2.0;
three= 3.0;
four = 4.0;
five = 5.0;
sd = one;
/**********************************/
/* Module 1. Estimate Loop time */
/* and FADD time. */
/**********************************/
u = 0.0; /*********************/
v = one; /* Loop 1. */
/*********************/
dtime(TimeArray);
for( i = 1 ; i<= loops ; i++ )
{
u = u + v;
}
dtime(TimeArray);
T[19] = T[24] * TimeArray[1];
m = (long)u;
/*********************/
u = 0.0; /* Loop 2. */
s = 0.0; /*********************/
dtime(TimeArray);
for( i = 1 ; i<= m ; i++ )
{
u = u + v;
s = s + u;
}
dtime(TimeArray);
T[20] = T[24] * TimeArray[1];
/********************/
T[21] = two * T[19] - T[20]; /* Loop Time (usec) */
if ( T[21] < 0.0 ) T[21] = 0.0; /* First Estimate. */
/********************/
n = (long)( two * ( s / u ) - one );
/********************/
T[22] = T[20] - T[19]; /* FADD Time (usec) */
if ( T[22] < 0.0 ) T[22] = 0.0; /* First Estimate. */
/********************/
m = n;
if ( T[22] < (one / three) )
{
sd = one / 20.0;
m = 20 * n;
T[24] = 1.0e+06/(double)m;
}
if ( T[22] > four )
{
sd = 10.0;
m = n / 10;
T[24] = 1.0e+06/(double)m;
}
/*********************/
u = 0.0; /* Loop 3. */
/*********************/
dtime(TimeArray);
for( i = 1 ; i<= m ; i++ )
{
u = u + v;
}
dtime(TimeArray);
T[1] = T[24] * TimeArray[1];
m = (long)u;
/*********************/
u = 0.0; /* Loop 4. */
s = 0.0; /*********************/
dtime(TimeArray);
for( i = 1 ; i<= m ; i++ )
{
u = u + v;
s = s + u;
}
dtime(TimeArray);
T[2] = T[24] * TimeArray[1];
/********************/
T[3] = two * T[1] - T[2]; /* Loop Time (usec) */
if ( T[3] < 0.0 ) T[3] = 0.0; /* Second Estimate. */
/********************/
m = (long)( two * ( s / u ) - v );
/********************/
T[23] = T[2] - T[1]; /* FADD Time (usec) */
if ( T[23] < 0.0 ) T[23] = 0.0; /* Second Estimate. */
/********************/
T[3] = ( T[3] + T[21] ) / two;
T[23] = ( T[23] + T[22] ) / two;
/*
printf(" Loop Time (usec) = %10.4lf\n",T[3]);
printf(" FADD Time (usec) = %10.4lf\n\n",T[23]);
*/
/*******************************************************/
/* Module 2. Calculate Value Of PI From Taylor Series */
/* Expansion Of atan(1.0). There Are 7 */
/* Double Precision Operations Per Loop */
/* ( 3 +, 2 -, 1 *, and 1 / ) That are */
/* Included in The Timing. */
/*******************************************************/
s =-five; /********************/
sa =-one; /* Loop 5. */
/********************/
dtime(TimeArray);
for ( i = 1 ; i <= m ; i++ )
{
s =-s;
sa = sa + s;
}
dtime(TimeArray);
T[4] = T[24] * TimeArray[1];
if ( T[4] < 0.0 ) T[4] = 0.0;
sc = (double)m;
u = sa; /*********************/
v = 0.0; /* Loop 6. */
w = 0.0; /*********************/
x = 0.0;
dtime(TimeArray);
for ( i = 1 ; i <= m ; i++)
{
s =-s;
sa = sa + s;
u = u + two;
x = x +(s - u);
v = v - s * u;
w = w + s / u;
}
dtime(TimeArray);
T[5] = T[24] * TimeArray[1];
T[6] = T[5] - T[4]; /*********************/
m = (long)( sa * x / sc ); /* PI Results */
sa = four * w / five; /*********************/
sb = sa + five / v;
sc = 31.25;
piprg = sb - sc / (v * v * v);
pierr = piprg - piref;
/*********************/
/* DO NOT REMOVE */
/* THESE PRINTOUTS! */
/*********************/
printf(" PI: Program = %20.17lf\n",piprg);
printf(" PI: Reference = %20.17lf\n",piref);
printf(" PI: Error =%13.4le\n\n",pierr);
/*******************************************************/
/* Module 3. Calculate Area Under sin(x) Curve From */
/* 0.0 To PI/3.0 Using Trapazoidal Method. */
/* Result is 0.5 . There Are 17 Double */
/* Precision Operations Per Loop ( 6 +, 2 -,*/
/* 9 *, and 0 / ) Included In The Timing. */
/*******************************************************/
delta = piref / ( three * (double)m ); /*********************/
s = 0.0; /* Loop 7. */
v = 0.0; /*********************/
dtime(TimeArray);
for( i = 1 ; i <= m ; i++ )
{
v = v + one;
u = v * delta;
w = u * u;
s = s + u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+A0);
}
dtime(TimeArray);
T[7] = T[24] * TimeArray[1];
u = piref / three;
w = u * u;
sa = u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+A0) / two;
m = (long)v;
/*********************/
sa = delta * ( s - sa ); /* Area Results. */
sb = 0.5; /*********************/
sc = sa - sb;
/*********************/
/* DO NOT REMOVE */
printf(" Area: Program = %20.17lf\n",sa); /* THESE PRINTOUTS! */
printf(" Area: Reference = %20.17lf\n",sb); /*********************/
printf(" Area: Error =%13.4le\n\n",sc);
T[8] = T[7] - T[3];
T[9] = ( five * T[6] + T[8] ) / 52.0;
T[10] = one / T[9];
/*********************/
/* DO NOT REMOVE */
/* THESE PRINTOUTS! */
/*********************/
printf(" Iterations = %9ld\n",m);
printf(" BenchTime(usec) = %9.4lf\n",T[9]);
printf(" Scalar MFLOPS = %9.4lf\n\n",T[10]);
x = sd * (double)m;
if ( (long)x != loops )
{
printf(" Results are bogus! It appears that an optimization\n");
printf(" was done which invalidates the program and results.\n\n");
}
}
/********************************************************/
/* dtime () outputs the elapsed time in p[1] from the */
/* first call of dtime() to the second call of dtime(). */
/********************************************************/
#ifdef Amiga
dtime(p)
double p[];
{
double q;
struct tt {
long days;
long minutes;
long ticks;
} tt;
q = p[2];
DateStamp(&tt);
p[2] = ( (double)(tt.ticks + (tt.minutes * 60L * 50L)) ) / (double)HZ;
p[1] = p[2] - q;
return 0;
}
#endif
#ifdef UNIX
dtime(p)
double p[];
{
double q;
q = p[2];
getrusage(RUSAGE_SELF,&rusage);
p[2] = (double)(rusage.ru_utime.tv_sec);
p[2] = p[2] + (double)(rusage.ru_utime.tv_usec) / 1.0e+06;
p[1] = p[2] - q;
return 0;
}
#endif
#ifdef UNIX_Old
dtime(p)
double p[];
{
double q;
q = p[2];
times(&tms);
p[2] = (double)(tms.tms_utime) / (double)HZ;
p[1] = p[2] - q;
return 0;
}
#endif
#ifdef TURBO_C
dtime(p)
double p[];
{
double q;
q = p[2];
gettime(&now);
p[2] = 60.0 * (double)(now.ti_min);
p[2] = p[2] + (double)(now.ti_sec);
p[2] = p[2] + (double)(now.ti_hund)/(double)HZ;
p[1] = p[2] - q;
return 0;
}
#endif
#ifdef MSC
dtime(p)
double p[];
{
double q;
q = p[2];
tnow = clock();
p[2] = (double)tnow / (double)HZ;
p[1] = p[2] - q;
return 0;
}
#endif
#ifdef _OSK
dtime(p)
double p[];
{
double q;
q = p[2];
tnow = clock();
p[2] = (double)tnow / (double)CLK_TCK;
p[1] = p[2] - q;
return 0;
}
#endif
/*------------------------ End flops.c code ---------------------------*/