home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
DP Tool Club 26
/
CD_ASCQ_26_1295.iso
/
vrac
/
laipe.zip
/
CBANDED.C
next >
Wrap
C/C++ Source or Header
|
1995-07-31
|
7KB
|
266 lines
/*
;
; Program to demo parallel performance of the following two LAIPE functions
; (1) Decompose_CSP_S that decomposes a constantly-banded, symmetric,
; and positive definite matrix into the product of [L][L]^T.
; (2) Substitute_CSP_S that implements forward and backward substitutions.
;
; If a multiple-processor computer is available, these two functions will
; be implemented in a loop. The loop bound begins with one to the number
; of system processors, and each loop sets the loop counter as the number
; of employed processors. Such that, each loop executes on different
; number of processors. Keep computer STANDING ALONE while running this
; program, and elapsed time with respect to different number of processors
; will be reported.
;
; Running this program, user has to provide the matrix order, i.e., 10000,
; and the lower bandwidth, i.e., 600. Large problem may have a better
; performance. It is inappropriate to try this program for a small problem
; where no sufficient operations may be distributed onto multiple
; processors. For a small bandwidth problem, try a multiple-entry solver.
;
*/
/* initial values */
#define LIMIT 6100000
/* head file */
#include <stdlib.h>
#include <stdio.h>
/* function protype */
float crandom( int *seed);
void GenerateData( float *A,
float *X,
float *Solution,
int *N,
int *M
);
/* main function */
main()
{
static float A[LIMIT];
float Zero, Lower, Upper, R4TEMP;
int N, M, IndexA, IndexX, IndexSolution;
int processors, CPUs, KK, JJ;
int NoGood, I;
float second;
Zero = 0.0001;
/* enter order of the system */
printf("Enter order (i.e., 10000 or something else): ");
scanf("%d", &N);
/* enter half bandwidth */
printf("Enter half bandwidth (i.e., 600 or something else): ");
scanf("%d", &M);
/* memory distribution */
IndexA = 0;
IndexX = IndexA + (N - 1) * M + N;
IndexSolution = IndexX + N;
/* check memory space */
if( IndexSolution + N >= LIMIT )
{
printf("Memory overflow\n");
exit;
}
/* get number of system processors */
GetCPUs( &CPUs );
/* set number of processor to execute the following statements */
for ( processors = 1; processors <= CPUs; processors ++ )
{
SetEmployedProcessors( &processors );
/*
; generate left side matrix and right side vector
*/
printf("\n");
printf("Generate data in a pseudo-random procedure...");
GenerateData( &A[IndexA],
&A[IndexX],
&A[IndexSolution],
&N,
&M);
/* output number of employed processors */
printf("\nNumber of processors employed: %d\n", processors);
/* start collecting elapsed time */
CollectElapsedTime();
/* decompose in parallel */
Decompose_CSP_S( &A[IndexA],
&N,
&M,
&Zero,
&NoGood
);
/* output elapsed time */
GetElapsedTime(&second);
printf("Decompose>>> Elapsed Time (seconds): %8.2f\n", second);
/* stop if the system is not positive definite */
if( NoGood == 1)
{
printf("The system is not positive definite.\n");
exit;
}
/* start collecting elapsed time */
CollectElapsedTime();
/* substitute in parallel */
Substitute_CSP_S( &A[IndexA],
&N,
&M,
&A[IndexX]
);
/* output elapsed time */
GetElapsedTime(&second);
printf("Substitute>>> Elapsed Time (seconds): %8.2f\n", second);
/* calculate upper and lower bounds of relative error */
KK = IndexX;
JJ = IndexSolution;
R4TEMP = ( A[KK] - A[JJ] ) / A[JJ];
Lower = R4TEMP;
Upper = R4TEMP;
for ( I = 2; I <= N; I ++)
{
JJ ++;
KK ++;
R4TEMP = ( A[KK]-A[JJ] ) / A[JJ];
if( R4TEMP < Lower)
{
Lower = R4TEMP;
}
else if (R4TEMP > Upper )
{
Upper = R4TEMP;
}
}
/* output lower bound and upper bound of the relative error */
printf("Lower and upper bound of the relative error:\n");
printf("%6.12f %6.12f\n", Lower, Upper);
}
}
void GenerateData( float *A,
float *X,
float *Solution,
int *N,
int *M
)
/*
;
; routine to generate the left side matrix, and the right side vector
; Parameters:
; 1.A: <R4> left side matrix, dimension((N-1)*M+N)
; 2.X: <R4> right side vector, dimension(N)
; 3.Solution: <R4> return the solution for checking accuracy,
; dimension(N)
; 4.N: <I4> order
; 5.M: <I4> half band width
;
*/
{
int SEED, K, J, I, II, IJ;
SEED = 456789;
/* left side matrix */
for ( I = 0; I < (( *N - 1 ) * *M + *N ); I ++ )
A[I] = crandom( &SEED ) * 100.0;
for ( I = 1; I <= *N; I ++)
{
II = (I - 1) * ( *M + 1);
K = I - *M;
if( K < 1 ) K = 1;
for ( J = K; J < I; J ++ )
{
IJ = ( J - 1 ) * *M + I - 1;
A[II] = A[II] + A[IJ];
}
K = I + *M;
if( K > *N ) K = *N;
IJ = (I - 1) * *M + I;
for ( J = I + 1; J <= K; J ++ )
{
A[II] = A[II] + A[IJ];
IJ ++;
}
A[II] = A[II] * 100.0;
}
/* solution */
for ( I = 0; I < *N; I ++ )
{
Solution[I] = crandom( &SEED ) * 500.0;
}
/* right side vector */
II = 0;
for ( I = 1; I <= *N; I ++ )
{
II = (I - 1) * ( *M + 1);
X[I-1] = A[II] * Solution[I-1];
K = I - *M;
if( K < 1 ) K = 1;
for ( J = K; J < I; J ++)
{
IJ = ( J - 1) * *M + I - 1;
X[I-1] = X[I-1] + A[IJ] * Solution[J-1];
}
K = I + *M;
if( K > *N ) K = *N;
IJ= (I - 1) * *M + I;
for ( J = I + 1; J <= K; J ++)
{
X[I-1] = X[I-1] + A[IJ] * Solution[J-1];
IJ ++;
}
}
return;
}
float crandom(int *seed)
{
#define m 2147483647
#define a 16807
double rtempa, rtempb;
float rtemp;
int itemp;
rtempa = (double) *seed * a;
itemp = (int) (rtempa / (double) m);
rtempb = (double) m * (double) itemp;
*seed = (int) (rtempa - rtempb);
rtemp = (double) *seed / (double) m;
return rtemp;
}