DP Tool Club 26

home *** CD-ROM | disk | FTP | other *** search

/ DP Tool Club 26 / CD_ASCQ_26_1295.iso / vrac / laipe.zip / CBANDED.C next >

Wrap

C/C++ Source or Header | 1995-07-31 | 7KB | 266 lines

/* ; ; Program to demo parallel performance of the following two LAIPE functions ; (1) Decompose_CSP_S that decomposes a constantly-banded, symmetric, ; and positive definite matrix into the product of [L][L]^T. ; (2) Substitute_CSP_S that implements forward and backward substitutions. ; ; If a multiple-processor computer is available, these two functions will ; be implemented in a loop. The loop bound begins with one to the number ; of system processors, and each loop sets the loop counter as the number ; of employed processors. Such that, each loop executes on different ; number of processors. Keep computer STANDING ALONE while running this ; program, and elapsed time with respect to different number of processors ; will be reported. ; ; Running this program, user has to provide the matrix order, i.e., 10000, ; and the lower bandwidth, i.e., 600. Large problem may have a better ; performance. It is inappropriate to try this program for a small problem ; where no sufficient operations may be distributed onto multiple ; processors. For a small bandwidth problem, try a multiple-entry solver. ; */ /* initial values */ #define LIMIT 6100000 /* head file */ #include <stdlib.h> #include <stdio.h> /* function protype */ float crandom( int *seed); void GenerateData( float *A, float *X, float *Solution, int *N, int *M ); /* main function */ main() { static float A[LIMIT]; float Zero, Lower, Upper, R4TEMP; int N, M, IndexA, IndexX, IndexSolution; int processors, CPUs, KK, JJ; int NoGood, I; float second; Zero = 0.0001; /* enter order of the system */ printf("Enter order (i.e., 10000 or something else): "); scanf("%d", &N); /* enter half bandwidth */ printf("Enter half bandwidth (i.e., 600 or something else): "); scanf("%d", &M); /* memory distribution */ IndexA = 0; IndexX = IndexA + (N - 1) * M + N; IndexSolution = IndexX + N; /* check memory space */ if( IndexSolution + N >= LIMIT ) { printf("Memory overflow\n"); exit; } /* get number of system processors */ GetCPUs( &CPUs ); /* set number of processor to execute the following statements */ for ( processors = 1; processors <= CPUs; processors ++ ) { SetEmployedProcessors( &processors ); /* ; generate left side matrix and right side vector */ printf("\n"); printf("Generate data in a pseudo-random procedure..."); GenerateData( &A[IndexA], &A[IndexX], &A[IndexSolution], &N, &M); /* output number of employed processors */ printf("\nNumber of processors employed: %d\n", processors); /* start collecting elapsed time */ CollectElapsedTime(); /* decompose in parallel */ Decompose_CSP_S( &A[IndexA], &N, &M, &Zero, &NoGood ); /* output elapsed time */ GetElapsedTime(&second); printf("Decompose>>> Elapsed Time (seconds): %8.2f\n", second); /* stop if the system is not positive definite */ if( NoGood == 1) { printf("The system is not positive definite.\n"); exit; } /* start collecting elapsed time */ CollectElapsedTime(); /* substitute in parallel */ Substitute_CSP_S( &A[IndexA], &N, &M, &A[IndexX] ); /* output elapsed time */ GetElapsedTime(&second); printf("Substitute>>> Elapsed Time (seconds): %8.2f\n", second); /* calculate upper and lower bounds of relative error */ KK = IndexX; JJ = IndexSolution; R4TEMP = ( A[KK] - A[JJ] ) / A[JJ]; Lower = R4TEMP; Upper = R4TEMP; for ( I = 2; I <= N; I ++) { JJ ++; KK ++; R4TEMP = ( A[KK]-A[JJ] ) / A[JJ]; if( R4TEMP < Lower) { Lower = R4TEMP; } else if (R4TEMP > Upper ) { Upper = R4TEMP; } } /* output lower bound and upper bound of the relative error */ printf("Lower and upper bound of the relative error:\n"); printf("%6.12f %6.12f\n", Lower, Upper); } } void GenerateData( float *A, float *X, float *Solution, int *N, int *M ) /* ; ; routine to generate the left side matrix, and the right side vector ; Parameters: ; 1.A: <R4> left side matrix, dimension((N-1)*M+N) ; 2.X: <R4> right side vector, dimension(N) ; 3.Solution: <R4> return the solution for checking accuracy, ; dimension(N) ; 4.N: <I4> order ; 5.M: <I4> half band width ; */ { int SEED, K, J, I, II, IJ; SEED = 456789; /* left side matrix */ for ( I = 0; I < (( *N - 1 ) * *M + *N ); I ++ ) A[I] = crandom( &SEED ) * 100.0; for ( I = 1; I <= *N; I ++) { II = (I - 1) * ( *M + 1); K = I - *M; if( K < 1 ) K = 1; for ( J = K; J < I; J ++ ) { IJ = ( J - 1 ) * *M + I - 1; A[II] = A[II] + A[IJ]; } K = I + *M; if( K > *N ) K = *N; IJ = (I - 1) * *M + I; for ( J = I + 1; J <= K; J ++ ) { A[II] = A[II] + A[IJ]; IJ ++; } A[II] = A[II] * 100.0; } /* solution */ for ( I = 0; I < *N; I ++ ) { Solution[I] = crandom( &SEED ) * 500.0; } /* right side vector */ II = 0; for ( I = 1; I <= *N; I ++ ) { II = (I - 1) * ( *M + 1); X[I-1] = A[II] * Solution[I-1]; K = I - *M; if( K < 1 ) K = 1; for ( J = K; J < I; J ++) { IJ = ( J - 1) * *M + I - 1; X[I-1] = X[I-1] + A[IJ] * Solution[J-1]; } K = I + *M; if( K > *N ) K = *N; IJ= (I - 1) * *M + I; for ( J = I + 1; J <= K; J ++) { X[I-1] = X[I-1] + A[IJ] * Solution[J-1]; IJ ++; } } return; } float crandom(int *seed) { #define m 2147483647 #define a 16807 double rtempa, rtempb; float rtemp; int itemp; rtempa = (double) *seed * a; itemp = (int) (rtempa / (double) m); rtempb = (double) m * (double) itemp; *seed = (int) (rtempa - rtempb); rtemp = (double) *seed / (double) m; return rtemp; }