home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
DP Tool Club 26
/
CD_ASCQ_26_1295.iso
/
vrac
/
laipe.zip
/
FBANDED.FOR
< prev
next >
Wrap
Text File
|
1995-07-31
|
7KB
|
258 lines
C
C
C Program to demo parallel performance of the following LAIPE subroutines
C (1) Decompose_CSP_S that decomposes a constantly-banded, symmetric,
C positive definitive matrix into the produce of [L][L]^T.
C (2) Substitute_CSP_S that implements forward and backward substitutions.
C
C If a multiple-processor computer is available, these two subroutines
C will be implemented in a loop. The loop bound begins with one to the
C number of system processors, and each loop sets the loop counter
C as the number of employed processors. Such that, each loop executes on
C different number of processors. Keep computer STANDING ALONE while
C running this program, and elapsed time with respect to different number
C of processors will be reported. And, the speedup can be seen.
C
C Running this program, user has to provide the matrix order, i.e., 10000,
C and the lower bandwidth, i.e., 600. Larger problem may have a better
C performance. It is inappropriate to try this program for a small problem
C where no sufficient operations may be distributed onto multiple
C processors. For a small bandwidth problem, try a multiple-entry solver.
C
C define global variable
C
PARAMETER (LIMIT=6100000)
REAL*4 A(LIMIT)
C
C variables for number of processors
C where Processors : number of employed processors
C CPUs: number of system processors
C
INTEGER*4 Processors,CPUs
C
C working variables
C
INTEGER*4 I,JJ,KK
C
C variables for calculating relative error
C where Lower : lower bound of relative error
C Upper : upper bound of relative error
C R4TEMP : working variable
C
REAL*4 Lower,Upper,R4TEMP
C
C variable for collecting elapsed time
C
REAL*4 Second
C
C variable for problem size
C where N : matrix order
C M : lower bandwidth
C
INTEGER*4 N,M
C
C variables for memory distribution
C where IndexA : index to the left side matrix [A]
C IndexX : index to the right side vector {X} and
C the solution computed
C IndexSolution : index to the exact solution for checking accuracy
C
INTEGER*4 IndexA,IndexX,IndexSolution
C
C execution flag
C
LOGICAL*4 NoGood
C
C numerical zero
C
REAL*4 ZERO
DATA ZERO/0.0001/
C
C enter order of the system
C
WRITE(*,'('' Enter order (i.e. 10000 or something else): '',$)')
READ(*,*) N
C
C enter half bandwidth
C
WRITE(*,
& '('' Enter half bandwidth (i.e. 600 or something else): '',
& $)')
READ(*,*) M
C
C memory distribution
C
IndexA=1
IndexX=IndexA+(N-1)*M+N
IndexSolution=IndexX+N
C
C check memory space
C
IF(IndexSolution+N.GT.LIMIT) THEN
WRITE(*,'('' Memory overflow'')')
STOP
END IF
C
C get number of CPUs on board
C
CALL GetCPUs(CPUs)
C
C set number of processor to execute the following statements
C
DO Processors=1,CPUs
CALL SetEmployedProcessors(Processors)
C
C generate left side matrix and right side vector
C
Write(*,*)
Write(*,'('' Generate data in a pseudo-random procedure...'',
& $)')
CALL GenerateData(A(IndexA),A(IndexX),A(IndexSolution),N,M)
C
C output number of employed processors
C
WRITE(*,'(/,'' Number of processors employed: '',I2)')
& Processors
C
C start collecting timing result spent in decomposition
C
CALL CollectElapsedTime
C
C decompose in parallel
C
CALL Decompose_CSP_S
& (
& A(IndexA),
& N,
& M,
& Zero,
& NoGood
& )
C
C output elapsed time
C
CALL GetElapsedTime(Second)
WRITE(*,'('' Decompose>>> Elapsed Time (Seconds): '',
& F8.2)') Second
C
C stop if the system is not positive definite
C
IF(NoGood) THEN
WRITE(*,'('' The system is not positive definite.'')')
STOP
END IF
C
C start collecting timing result spent in substitutions
C
CALL CollectElapsedTime
C
C substitute in parallel
C
CALL Substitute_CSP_S(A(IndexA),N,M,A(IndexX))
C
C output elapsed time
C
CALL GetElapsedTime(Second)
WRITE(*,'('' Substitute>>> Elapsed Time (Seconds): '',
& F8.2)') Second
C
C calculate relative error
C
KK=IndexX
JJ=IndexSolution
R4TEMP=(A(KK)-A(JJ))/A(JJ)
Lower=R4TEMP
Upper=R4TEMP
DO I=2,N
JJ=JJ+1
KK=KK+1
R4TEMP=(A(KK)-A(JJ))/A(JJ)
IF(R4TEMP.LT.Lower) THEN
Lower=R4TEMP
ELSE IF(R4TEMP.GT.Upper) THEN
Upper=R4TEMP
END IF
END DO
C
C output lower bound and upper bound of the solution
C
WRITE(*,*) 'Lower and upper bound of the relative error:'
WRITE(*,*) Lower,Upper
END DO
STOP
END
SUBROUTINE GenerateData(A,X,Solution,N,M)
C
C
C routine to generate the left side matrix, the solution,
C and the right side vector
C (A)FORTRAN CALL: CALL GenerateData(A,X,Solution,N,M)
C 1.A: <R4> left side matrix, dimension((N-1)*M+N)
C 2.X: <R4> right side vector, dimension(N)
C 3.Solution: <R4> return the solution for checking accuracy,
C dimension(N)
C 4.N: <I4> order
C 5.M: <I4> half band width
C
INTEGER*4 N,M,SEED
REAL*4 A(M,1),X(1),Solution(1)
C
C left side matrix
C
SEED=456789
DO I=1,(N-1)*M+N
A(I,1)=RAN(SEED)*100.0
END DO
DO I=1,N
DO J=MAX0(1,I-M),I-1
A(I,I)=A(I,I)+A(I,J)
END DO
DO J=I+1,MIN0(I+M,N)
A(I,I)=A(I,I)+A(J,I)
END DO
A(I,I)=A(I,I)*100.0
END DO
C
C solution
C
DO I=1,N
Solution(I)=Ran(Seed)*500.0
END DO
C
C right side vector
C
DO I=1,N
X(I)=A(I,I)*Solution(I)
DO J=MAX0(1,I-M),I-1
X(I)=X(I)+A(I,J)*Solution(J)
END DO
DO J=I+1,MIN0(I+M,N)
X(I)=X(I)+A(J,I)*Solution(J)
END DO
END DO
C
RETURN
END
REAL*4 FUNCTION RAN(SEED)
C
C
C function to generate a pseudo-random number
C (A)FORTRAN ENTRY: RAN(SEED)
C 1.RAN: <R4> return a random number
C 2.SEED: <I4> a seed (updated)
C
INTEGER*4 SEED,A
REAL*8 RTEMPA,RTEMPB
DATA M/2147483647/,A/16807/
C
C generate a random number
C
RTEMPA=DFLOAT(SEED)*A
ITEMP=INT(RTEMPA/DFLOAT(M))
RTEMPB=DFLOAT(M)*DFLOAT(ITEMP)
SEED=INT(RTEMPA-RTEMPB)
RAN=DFLOAT(SEED)/DFLOAT(M)
C
RETURN
END