home *** CD-ROM | disk | FTP | other *** search
- #include <stdio.h>
- #include <sys/time.h>
- #include <math.h>
- #include "conv.h"
-
- /* --------- The following definitions change
- according to precision required -------- */
-
-
- #ifdef DOUBLE /* real double precision */
-
- typedef double this_type;
-
- #define OPS_PER_ITER 2
-
- void dfir1d_(), ornl_dfir1d_();
- void diir1d_(), ornl_diir1d_();
- void dcor1d_(), ornl_dcor1d_();
- void simple_dfir1d_(), simple_diir1d_(), simple_dcor1d_();
- #define SIMPLE_FIR simple_dfir1d_
- #define SIMPLE_IIR simple_diir1d_
- #define SIMPLE_COR simple_dcor1d_
- #define ORNL_FIR ornl_dfir1d_
- #define ORNL_IIR ornl_diir1d_
- #define ORNL_COR ornl_dcor1d_
- #define MY_COR dcor1d_
- #define MY_FIR dfir1d_
- #define MY_IIR diir1d_
-
- #define MY_INIT dinit_
- #define MY_ONE done_
- #define THIS_REAL
- this_type zero = 0.;
- this_type one = 1.;
- #endif
- #ifdef SINGLE /* real single precision */
-
- typedef float this_type;
-
- #define OPS_PER_ITER 2
-
- void sfir1d_(), ornl_sfir1d_();
- void siir1d_(), ornl_siir1d_();
- void scor1d_(), ornl_scor1d_();
- void simple_sfir1d_(), simple_siir1d_(), simple_scor1d_();
- #define SIMPLE_FIR simple_sfir1d_
- #define SIMPLE_IIR simple_siir1d_
- #define SIMPLE_COR simple_scor1d_
- #define ORNL_FIR ornl_sfir1d_
- #define ORNL_IIR ornl_siir1d_
- #define ORNL_COR ornl_scor1d_
- #define MY_COR scor1d_
- #define MY_FIR sfir1d_
- #define MY_IIR siir1d_
- #define MY_INIT sinit_
- #define MY_ONE sone_
- #define THIS_REAL
- this_type zero = 0.;
- this_type one = 1.;
- #endif
-
- #ifdef ZOMPLEX /* complex double precision */
-
- typedef struct {double real, imag;} this_type;
-
- #define OPS_PER_ITER 8
-
- void zfir1d_(), ornl_zfir1d_();
- void ziir1d_(), ornl_ziir1d_();
- void zcor1d_(), ornl_zcor1d_();
- void simple_zfir1d_(), simple_ziir1d_(), simple_zcor1d_();
- #define SIMPLE_FIR simple_zfir1d_
- #define SIMPLE_IIR simple_ziir1d_
- #define SIMPLE_COR simple_zcor1d_
- #define ORNL_FIR ornl_zfir1d_
- #define ORNL_IIR ornl_ziir1d_
- #define ORNL_COR ornl_zcor1d_
- #define MY_COR zcor1d_
- #define MY_FIR zfir1d_
- #define MY_IIR ziir1d_
- #define MY_INIT zinit_
- #define MY_ONE zone_
- #define THIS_COMPLEX
- this_type zero = { 0., 0.};
- this_type one = { 1., 0.};
- #endif
- #ifdef COMPLEX /* complex single precision */
-
- typedef struct {float real, imag;} this_type;
-
- #define OPS_PER_ITER 8
-
- void cfir1d_(), ornl_cfir1d_();
- void ciir1d_(), ornl_ciir1d_();
- void ccor1d_(), ornl_ccor1d_();
- void simple_cfir1d_(), simple_ciir1d_(), simple_ccor1d_();
- #define SIMPLE_FIR simple_cfir1d_
- #define SIMPLE_IIR simple_ciir1d_
- #define SIMPLE_COR simple_ccor1d_
- #define ORNL_FIR ornl_cfir1d_
- #define ORNL_IIR ornl_ciir1d_
- #define ORNL_COR ornl_ccor1d_
- #define MY_COR ccor1d_
- #define MY_FIR cfir1d_
- #define MY_IIR ciir1d_
- #define MY_INIT cinit_
- #define MY_ONE cone_
- #define THIS_COMPLEX
- this_type zero = { 0., 0.};
- this_type one = { 1., 0.};
- #endif
-
- /* ---------- The rest is the same ---------------- */
-
- #define MAX_SIZE 111
- #define MAX_STRIDE 7
- #define INC_STRIDE 2
- #define MAX_TIMES 3
- #define MIN_OPS 5.e+6
-
- #define ABS(a) ( ((a)>0) ? (a) : -(a))
- #define MAX(a,b) (((a) < (b)) ? (b) : a)
-
- void (*ornl_fir)(), (*my_fir)();
- void (*ornl_iir)(), (*my_iir)();
- void (*ornl_cor)(), (*my_cor)();
- void (*simple_fir)();
- void (*simple_iir)();
- void (*simple_cor)();
-
- void GetArguments();
- double second();
-
- int parallel;
- int is_speedup;
- int is_optimal;
- int n_trials;
- int all_run;
- int len = 4;
-
- int ldx, size, ldy, n_trials, n_times, nx, ny;
- int min_size, max_size, inc_size, xsize, ysize, zsize;
- this_type *vx, *vy, *vz;
-
- double t, x, y, z;
- double total_flops;
-
- main(argc,argv)
- int argc;
- char *argv[];
- {
- int i, j, k;
-
- /* ******************************************************* */
- GetArguments( argc, argv);
- /* ******************************************************* */
-
- if( is_speedup)
- fprintf( stderr, "Measuring Relative Performances Lib_version/Simple_version \n");
- else if( is_optimal)
- fprintf( stderr, "Measuring Performances for Lib_version (Mflops)\n");
- else
- fprintf( stderr, "Measuring Performances for Simple_version (Mflops)\n");
-
- fprintf( stderr,
- "Size FIR(1) IIR(1) COR(1) FIR(%d) IIR(%d) COR(%d)\n\n",
- MAX_STRIDE, MAX_STRIDE, MAX_STRIDE);
-
- xsize = MAX(max_size, nx) * MAX_STRIDE;
- ysize = MAX(max_size, ny) * MAX_STRIDE;
- zsize = xsize + ysize;
-
- vx = (this_type *)malloc( xsize * sizeof( this_type));
- vy = (this_type *)malloc( ysize * sizeof( this_type));
- vz = (this_type *)malloc( zsize * sizeof( this_type));
-
- if( (vx == (this_type *)0) || (vy == (this_type *)0)) {
- fprintf( stderr, "Malloc problem ... Exiting");
- exit( -2);
- }
- MY_INIT( (&xsize), vx);
- MY_ONE( (&ysize), vy);
-
- for( size = min_size; size <= max_size ; size += inc_size ) {
- ysize = (ny == 0) ? size : ny;
- xsize = (nx == 0) ? size : nx;
- zsize = xsize+ysize;
- printf("%4d ", size);
- do_it();
-
- printf("\n", x);
- fflush(stdout);
- }
-
- free ( vx);
- free ( vy);
-
- return(0);
- }
-
-
- do_it()
- {
- int ii, jj, inc, i1, i2, j1, j2, k1, k2;
- int l1, l2;
- i1 = 0; i2 = xsize;
- j1 = 0; j2 = ysize;
- k1=i1+j1; k2=i2+j2-1;
- l1 = -(j1+j2-1);
- l2 = j2;
-
- /*
- *
- * Do it first with UNIT stride
- *
- */
- inc = 1;
-
- /*
- * Finite Impulse Response Filter
- */
- total_flops = xsize * ysize * OPS_PER_ITER;
- n_times = MIN_OPS / total_flops;
- if( n_times < 1)
- n_times = 1;
- total_flops *= n_times;
-
- if( is_speedup || is_optimal ) {
- t = second();
- for( ii = 0 ; ii < n_times ; ii++)
- my_fir(vx,&inc,&i1,&i2,vy,&inc,&j1,&j2, vz,&inc,&k1,&k2,
- &one, &zero);
- t = second() - t;
- x = total_flops * 1.e-6 / t;
- }
- if( is_speedup || !is_optimal) {
- x = second();
- for( ii = 0 ; ii < n_times ; ii++)
- simple_fir(&i2,&j2,vx,vy,vz);
- x = second() - x;
- if( is_speedup )
- x = x / t;
- else
- x = total_flops * 1.e-6 / x;
- }
- printf (" %8.3f ", x);
- /*
- * Infinite Impulse Response Filter
- */
- if( xsize < ysize)
- total_flops = .5 * xsize * xsize * OPS_PER_ITER;
- else
- total_flops = ysize * ( xsize - .5 * ysize) * OPS_PER_ITER;
- n_times = MIN_OPS / total_flops;
- if( n_times < 1)
- n_times = 1;
- total_flops *= n_times;
-
- if( is_speedup || is_optimal ) {
- t = second();
- for( ii = 0 ; ii < n_times ; ii++)
- my_iir(vx,&inc,&i1,&i2,vy,&inc,&j1,&j2,vz,&inc,&k1,&i2);
- t = second() - t;
- x = total_flops * 1.e-6 / t;
- }
- if( is_speedup || !is_optimal) {
- x = second();
- for( ii = 0 ; ii < n_times ; ii++)
- simple_iir(&i2,&j2,vx,vy,vz);
- x = second() - x;
- if( is_speedup )
- x = x / t;
- else
- x = total_flops * 1.e-6 / x;
- }
- printf (" %8.3f ", x);
- /*
- * CORRELATION
- */
- total_flops = xsize * ysize * OPS_PER_ITER;
- n_times = MIN_OPS / total_flops;
- if( n_times < 1)
- n_times = 1;
- total_flops *= n_times;
-
- if( is_speedup || is_optimal ) {
- t = second();
- for( ii = 0 ; ii < n_times ; ii++)
- my_cor(vx,&inc,&i1,&i2,vy,&inc,&l1,&l2,vz,&inc,&k1,&k2);
- t = second() - t;
- x = total_flops * 1.e-6 / t;
- }
- if( is_speedup || !is_optimal) {
- x = second();
- for( ii = 0 ; ii < n_times ; ii++)
- simple_cor(&i2,&j2,vx,vy,vz);
- x = second() - x;
- if( is_speedup )
- x = x / t;
- else
- x = total_flops * 1.e-6 / x;
- }
- printf (" %8.3f ", x);
- /*
- *
- * Now do it again with NON_UNIT stride
- *
- */
- inc = MAX_STRIDE;
-
- /*
- * Finite Impulse Response Filter
- */
- total_flops = xsize * ysize * OPS_PER_ITER;
- n_times = MIN_OPS / total_flops;
- if( n_times < 1)
- n_times = 1;
- total_flops *= n_times;
-
- if( is_speedup || is_optimal ) {
- t = second();
- for( ii = 0 ; ii < n_times ; ii++)
- my_fir(vx,&inc,&i1,&i2,vy,&inc,&j1,&j2,vz,&inc,&k1,&k2,
- &one, &zero);
- t = second() - t;
- x = total_flops * 1.e-6 / t;
- }
- if( is_speedup || !is_optimal) {
- x = second();
- for( ii = 0 ; ii < n_times ; ii++)
- ornl_fir(vx,&inc,&i1,&i2,vy,&inc,&j1,&j2,vz,&inc,&k1,&k2,
- &one, &zero);
- x = second() - x;
- if( is_speedup )
- x = x / t;
- else
- x = total_flops * 1.e-6 / x;
- }
- printf (" %8.3f ", x);
- /*
- * Infinite Impulse Response Filter
- */
- if( xsize < ysize)
- total_flops = .5 * xsize * xsize * OPS_PER_ITER;
- else
- total_flops = ysize * ( xsize - .5 * ysize) * OPS_PER_ITER;
- n_times = MIN_OPS / total_flops;
- if( n_times < 1)
- n_times = 1;
- total_flops *= n_times;
-
- if( is_speedup || is_optimal ) {
- t = second();
- for( ii = 0 ; ii < n_times ; ii++)
- my_iir(vx,&inc,&i1,&i2,vy,&inc,&j1,&j2,vz,&inc,&k1,&i2);
- t = second() - t;
- x = total_flops * 1.e-6 / t;
- }
- if( is_speedup || !is_optimal) {
- x = second();
- for( ii = 0 ; ii < n_times ; ii++)
- ornl_iir(vx,&inc,&i1,&i2,vy,&inc,&j1,&j2,vz,&inc,&k1,&i2);
- x = second() - x;
- if( is_speedup )
- x = x / t;
- else
- x = total_flops * 1.e-6 / x;
- }
- printf (" %8.3f ", x);
- /*
- * CORRELATION
- */
- total_flops = xsize * ysize * OPS_PER_ITER;
- n_times = MIN_OPS / total_flops;
- if( n_times < 1)
- n_times = 1;
- total_flops *= n_times;
-
- if( is_speedup || is_optimal ) {
- t = second();
- for( ii = 0 ; ii < n_times ; ii++)
- my_cor(vx,&inc,&i1,&i2,vy,&inc,&l1,&l2,vz,&inc,&k1,&k2);
- t = second() - t;
- x = total_flops * 1.e-6 / t;
- }
- if( is_speedup || !is_optimal) {
- x = second();
- for( ii = 0 ; ii < n_times ; ii++)
- ornl_cor(vx,&inc,&i1,&i2,vy,&inc,&l1,&l2,vz,&inc,&k1,&k2);
- x = second() - x;
- if( is_speedup )
- x = x / t;
- else
- x = total_flops * 1.e-6 / x;
- }
- printf (" %8.3f ", x);
- }
-
- void GetArguments( argc, argv)
- int argc;
- char *argv[];
- {
- int i, j, k;
- int nerror = 0;
-
- srandom( (123*getpid()) | 0x01);
-
- #define ON 1
-
- is_speedup = 0;
- is_optimal = 0;
- ny = 0;
- nx = 0;
- all_run = 0;
-
- min_size = 16;
- max_size = 1024;
- inc_size = 8;
- ornl_fir = ORNL_FIR;
- ornl_iir = ORNL_IIR;
- ornl_cor = ORNL_COR;
- simple_fir = SIMPLE_FIR;
- simple_iir = SIMPLE_IIR;
- simple_cor = SIMPLE_COR;
- my_fir = MY_FIR;
- my_iir = MY_IIR;
- my_cor = MY_COR;
- /* ******************************************************* */
- for ( i = 1 ; (i < argc) && (nerror != ON) ; i ++ ) {
- if( argv[i][0] == '-') {
- switch ( argv[i][1]) {
-
- case 'x' :
- case 'X' :
- nx = atoi( argv[++i]);
- break;
- case 'y' :
- case 'Y' :
- ny = atoi( argv[++i]);
- break;
- case 'z' :
- case 'Z' :
- is_speedup = 1;
- is_optimal = 0;
- break;
- case 'p' :
- case 'P' :
- case 's' :
- case 'S' :
- is_optimal = 1;
- is_speedup = 0;
- break;
- default : nerror = ON;
- }
- }
- else {
- if( i+1 > argc)
- nerror = ON;
- else {
- min_size = atoi( argv[i]);i++;
- max_size = atoi( argv[i]);i++;
- inc_size = atoi( argv[i]);
- }
- }
- }
- if( nerror == ON) {
- fprintf( stderr,
- "Usage : %s [-s / -z] [-x nx] [-y ny] <min max inc>\n", argv[0]);
- exit(-1);
- }
- }
- /* **********************************************************************
-
- give the elapsed wall clock time
-
- ********************************************************************** */
- double second()
- {
- struct timeval s_val;
- struct timezone s_z;
-
- static double zero_time = 0.0;
- static long zero_sec = 0;
- double time;
- long n_sec, n_usec;
-
- gettimeofday(&s_val, &s_z);
-
- n_sec = s_val.tv_sec;
- n_usec = s_val.tv_usec;
- if( zero_time == 0.0 ) {
- zero_sec = n_sec;
- zero_time = 1.0e-6 * (double)n_usec;
- }
- time = (double)(n_sec-zero_sec) + (double)n_usec * 1.0E-6 - zero_time;
- return( time );
- }
-
-