home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The C Users' Group Library 1994 August
/
wc-cdrom-cusersgrouplibrary-1994-08.iso
/
vol_100
/
181_01
/
carne_fp.cod
< prev
next >
Wrap
Text File
|
1986-01-09
|
22KB
|
716 lines
Reprinted from: Micro/Systems Journal, Volume 1. No. 5. Nov/Dec 1985
Article Title: "Faster Floating Point Math"
Author: Ted Carnevale
-----------------------------------------------------------------
Copy of back issue may be obtained for $4.50 (foreign $6) from:
Micro/Systems Journal
Box 1192
Mountainside NJ 07092
-----------------------------------------------------------------
Copyright 1986
Micro/Systems Journal, Box 1192, Mountainside NJ 07092
This software is released into the public domain for
non-commercial use only.
-----------------------------------------------------------------
Listing 1
=========
/* fpc.c--tests conversion to/from amd9511 fpp format */
#include fprintf.h
#include c80def.h
#define MESSAGE "\nFloating-point format conversion program")
#define DASHES "\n\n----------------------------------------")
#define MAX 6 /* how many different values to feed
to the format conversion routines */
/* replace the next two functions with
your own format conversion routines as needed */
extern long c2amd(); /* link .rel file containing these */
extern float amd2c(); /* to fpc.rel */
main()
{
int i;
static int sx=10;
float x;
static float dx=1.0;
printf(MESSAGE);
printf(DASHES);
/* a geometric series of positive values */
for (i = 0, x = 100.0; i<=MAX; x /= sx, i++) showbits(x);
printf(DASHES);
/* a geometric series of negative values */
for (i = 0, x = -100.0; i<=MAX; x /= sx, i++) showbits(x);
printf(DASHES);
/* a linear series of positive and negative values */
for (i = -MAX, x=(float)(-MAX); i<=MAX; x += dx, i++) showbits(x);
}
/* show bit patterns used by C80 and AMD FPP floating point formats
to represent the float n */
showbits(n)
float n;
{
int i;
union {
float f;
long l;
} x,z; /* unions are easier to use here than pointers */
long y;
x.f=n;
/* show C80's preconversion bit pattern */
printf("\n\nx = %e,\t\tBCDE = ",x.f);
prntlong(x.l);
/* eliminate next few lines if you just want to check the format
used by your version of C */
/* show AMD formatted data */
y=c2amd(x.f);
printf("\n\t\t\t\t AMD = ");
prntlong(y);
/* convert back to C80's format */
z.f=amd2c(y);
printf("\nz = %e,\t\tBCDE = ",z.f);
prntlong(z.l);
/* end of C80 -> AMD -> C80 format conversion test */
}
/* print bit pattern of a long, starting with the high-order bit
of the most significant byte, working down from left to right */
prntlong(k)
long k;
{
int i;
union {
long l;
char b[4];
} datum;
datum.l=k;
for (i=3; i>=0; i--) {
prntbyt(datum.b[i]);
printf(" ");
}
}
/* print the bit pattern for a byte from left to right,
high order bit first (does the dirty work for prntlong) */
prntbyte(i)
int i;
{
int j;
char bit;
for (j=0x80; j>0; j=j>>1) {
if (i & j) bit='1';
else bit='0';
printf("%c",bit);
}
}
Listing 2
=========
TITLE FLTLB
PAGE 64
; Floating point library
; C/80 3.0 (7/7/83) - (c) 1983 Walter Bilofsky
;MODIFIED 8/84 to use amd9511 for floating point multiply/divide
;and MATHLIB functions as well.
;Replaces modules FLTLIB and MATHLIB
; -- N.T.Carnevale
;
;
;these were gleaned from LIB's listing of FLIBRARY.REL:
;
ENTRY Bf.Bl,Bf.Hc,Bf.Hi,Bf.Hu,Bl.Bf
ENTRY cf.eq,cf.ge,cf.gt,cf.le,cf.lt,cf.ne
ENTRY div10.,mul10.
ENTRY dum_
ENTRY errcod
ENTRY F.add,F.div,F.mul,F.neg,F.not,F.sub
ENTRY facl_,facl_1,facl_2,fac_,fac_1
ENTRY fadd.,fadda.,fcomp.,fdiv_a,fdiv_b,fdiv_c,fdiv_g
ENTRY flneg.,float.,flt.0,flt_pk
ENTRY fmlt_1,fmlt_2
ENTRY Hc.Bf,Hi.Bf,Hu.Bf
ENTRY inxhr.
ENTRY movfm.,movfr.,movmf.,movrf.
ENTRY pushf.,qint.,save_,save_1,sign.,zero.
;
EXTRN c.neg,c.sxt,eq.4,g.,Hc.Bl,Hi.Bl,Hu.Bl,L.add,L.neg
EXTRN llong.,movrm.,neq.4,slong.
;
;this preserves the 15 byte data area revealed by LIB:
DSEG
;
facl_: DB 0
facl_1: DB 0
facl_2: DB 0
fac_: DB 0
fac_1: DB 0
save_: DB 0
fmlt_1: DB 0
fmlt_2: DB 0
dum_: DB 0
save_1: DB 0
errcod: DB 0
fdiv_a: DB 0
fdiv_b: DB 0
fdiv_c: DB 0
fdiv_g: DB 0
;
; CSEG
;
flt_pk: DS 0
F.add: XRA A
JMP Dual
F.sub: MVI A,1
JMP Dual
F.mul: JMP fpmul ;jump to new routines
; MVI A,2
; JMP Dual
F.div: JMP fpdiv
; MVI A,3
Dual: CALL movfr.
Listing 3
=========
Ftab: DW fadd.
DW fsub ;next two addresses not used
; DW fpmul
; DW fpdiv
Listing 4
=========
;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
;
;
;What follows replaces the original code in the FLTLIB section of
;FLOATLIB.ASM that started with fmult3: and ended just before pophrt:.
;
;
CR EQU 0DH
LF EQU 0AH
BDOS EQU 5
BOOT EQU 0
PSTRNG EQU 9
;
;
;send message to console
PRMSG:
PUSH PSW
PUSH B
PUSH D
PUSH H
MVI C,PSTRNG
CALL BDOS
POP H
POP D
POP B
POP PSW
RET
;
;**************************************************************
;
;
;Port addresses
;
BASE EQU 050H ;base address of Compupro SS1 board
CREG EQU BASE+9 ;location of 9511's control & data ports
DREG EQU BASE+8
;
;
;FPP error codes
;
ERRBITS EQU 1EH ;the status register's error bits
OVRFLO EQU 2 ;overflow
UNRFLO EQU 4 ;underflow
NEGARG EQU 8 ;negative argument to sqrt or log function
DIVZER EQU 10H ;divide by zero
TOOBIG EQU 18H ;arg of asin, acos, or exp too big
;
;
;FPP command codes
;
FMUL EQU 12H
FDIV EQU 13H
XCHF EQU 19H ;swap top two locations in fpp's stack
FSQRT EQU 1
FSIN EQU 2
FCOS EQU 3
FTAN EQU 4
FASIN EQU 5
FACOS EQU 6
FATAN EQU 7
FLOG EQU 8 ;log base 10
FLN EQU 9 ;natural logarithm
FEXP EQU 0AH ;e^x
FPWR EQU 0BH ;x^y
;
;**************************************************************
;**************************************************************
;
;
;note: timing indicated for some of the following
;
;This block converts c80 float in BD to FPP format, then loads it into FPP.
;If out of range for FPP, aborts with warning.
;
;Data format conversion routine C2AMD--
;converts c80's float to amd's fp format.
;Based on suggestions by J.Shook, Electronics Lab, Dept.of Chemistry,
;SUNY Stony Brook
;
;Floating point formats
;----------------------
;C80 stores floats as:
; mantissa sign in C7
; mantissa = 24 bit two's complement in CDE,
; with bit 23 assumed = 1.
; exponent is added to 128 (80H) and stored in B.
; If the number is 0, B=0.
;
;FPP stores floats as:
; mantissa sign in B7
; mantissa = 24 bit two's complement in CDE,
; with bit 23 = 1. However,
; the value 0 is represented by BD=0.
; exponent sign in B6
; exponent in B5-B0 (only 6 bits).
;
;
;Call with value to convert in BD.
C2AMD:
MOV A,B ; 4
ORA A ; 4
JZ FPPZERO ;10--B=0 implies x=0
;take care of register B
SUI 80H ; 7--corrects exponent
;does exponent lie in FPP's range?
CPI 64 ; 7
JP EXPHI ;10--exceeds 2^63
CPI -64 ; 7
JM EXPLO ;10--smaller than 2^-64
;exponent ok, proceed
ANI 7FH ; 7--mask out hi bit of B
MOV B,A ; 4-- and save til later
MOV A,C ; 4
ANI 80H ; 7--isolate mantissa sign bit
ORA B ; 4--fix mantissa sign bit for FPP
MOV B,A ; 4--done with B
;take care of C
MVI A,80H ; 7
ORA C ; 4--set hi bit of C
MOV C,A ; 4
JMP LOADFPP ;10--put data in FPP
;114 t cycles @ 4mHz = 28.5usec
;
FPPZERO: ;x=0
XRA A
MOV C,A
MOV D,A
MOV E,A
;a short cut, time not calculated
;fall through to next routine
;
;
;Data transfer routine
;LOADFPP puts BD into fpp
LOADFPP:
MOV A,E ; 4
OUT DREG ;11
MOV A,D ; 4
OUT DREG ;11
MOV A,C ; 4
OUT DREG ;11
MOV A,B ; 4
OUT DREG ;11
RET ;10
;70 t cycles = 17.5usec
;
;TOTAL TIME for C2AMD & LOADFPP = 46usec
;
;**************************************************************
;
;
;C2AMD found float to be out of range for FPP--warn & abort
EXPHI:
LXI D,HIMSG
JMP ERREXIT
EXPLO:
LXI D,LOMSG
ERREXIT:
CALL PRMSG
JMP BOOT ;bail out!
;
;
HIMSG: DB CR,LF,'float --> FPP exponent overflow ( > 2^63)$'
LOMSG: DB CR,LF,'float --> FPP exponent underflow ( < 2^-64)$'
;
;**************************************************************
;**************************************************************
;
;
;
;FLD1 gets 1 float from the stack, puts it into fpp,
;and DOES NOT restore stack. Called by "intrinsic"
;float arithmetic operations, e.g. fpmul and fpdiv.
FLD1:
POP H ;10--return addr for this block
SHLD RETADR ;16
POP H ;10--return addr for calling function
POP D ;10
POP B ;10--BD = argument
CALL C2AMD ;17--convert format & load FPP
;
PUSH H ;11--restore return address of calling function
LHLD RETADR ;16--return to calling function
PCHL ; 4-- without disturbing stack
;104 t cycles = 26usec, + 46 for C2AMD = 72usec
;
;**************************************************************
;
;
;FLOAD1 gets 1 float from the stack, puts it into fpp,
;and restores stack for c80. Called by "user-defined"
;functions, not by fpmul or fpdiv.
FLOAD1:
POP H ;10--return addr for this block
SHLD RETADR ;16
POP H ;10--return addr for calling function
POP D ;10
POP B ;10--BD = argument
CALL C2AMD ;17--convert format & load FPP
;fix stack for c80
PUSH H ;11
PUSH H ;11
PUSH H ;11--restore return address of calling function
LHLD RETADR ;16--return to calling function
PCHL ; 4-- without disturbing stack
;126 t cycles = 31.5usec, + 46 for C2AMD = 77.5usec
;
;**************************************************************
;
;
;FLOAD2 gets 2 floats from the stack, puts them into fpp,
; and restores the stack for c80
FLOAD2:
POP H ;10--return addr for this block
SHLD RETADR ;16
POP H ;10--return addr for calling function
POP D ;10
POP B ;10--BD = second argument
CALL C2AMD ;17--convert format & load FPP
POP D ;10
POP B ;10--BD = first argument
CALL C2AMD ;17
;fix stack for c80
PUSH H ;11 x 5 = 55
PUSH H
PUSH H
PUSH H
PUSH H ;restore return address of calling function
LHLD RETADR ;16--return to calling function
PCHL ; 4-- without disturbing stack
;185 t cycles = 46.25usec, + 2 * 46 (for C2AMD) = 138.25usec
;
RETADR: DS 2 ;used by all FPP load routines
;
;**************************************************************
;**************************************************************
;
;
;
;fpmask() allows the user to specify which error bits
;are tested for error detection. The argument to fpmask
;becomes the ERRMASK that is ANDed with the status word
;to detect a hardware (FPP) error.
PUBLIC fpmask
fpmask:
POP H ;return address
POP B ;argument
PUSH B ;fix stack
PUSH H
MOV A,C ;get the user-specified error mask
ANI ERRBITS ; and mask out all but the genuine error bits
STA ERRTEST+1
RET
;
;**************************************************************
;
;
;FPP Error handling routine
;print message according to what sort of error occurred
FPERR:
LXI D,EMSG0
CALL PRMSG ;print general message
MOV C,A ;save error code
MVI B,MSGPTR-ETYPES ;# of codes to check
LXI H,ETYPES ;M=first code in the list
LXI D,MSGPTR ;(DE) = address that contains
; location of message for first error code
ERCHEK:
ANA M
JMP ERMATCH
MOV A,C ;restore error code
INX H ;advance to next code and message
INX D
INX D
DCR B
JNZ ERCHEK ;more codes to test
;should never fall through, but if it does, will print 'huh?'
;
ERMATCH:
XCHG ;(HL) holds address of error message
MOV E,M
INX H
MOV D,M
JMP ERREXIT
;
;
EMSG0: DB CR,LF,,LF,'FPP error: $'
EMSG1: DB 'overflow$'
EMSG2: DB 'underflow$'
EMSG3: DB 'sqrt or log of negative number$'
EMSG4: DB 'divide by 0$'
EMSG5: DB 'argument of asin, acos, or exp too large$'
EMSGX: DB 'huh?$' ;should never occur!
;
;
ETYPES: DB OVRFLO,UNRFLO,TOOBIG,NEGARG,DIVZER
MSGPTR: DW EMSG1,EMSG2,EMSG5,EMSG3,EMSG4,EMSGX
;
;**************************************************************
;**************************************************************
;
;
;Next come the hardware floating point routines themselves.
;The multiplcation routine contains most of what the
;other routines use, so it is listed first.
PUBLIC fpmul
fpmul:
;Note: for fp multiply and divide,
;c80 passes the first arg in the stack, the second in BD.
;For the intrinsic multiply and divide operations,
;the calling program does NOT have to remove args
;from the stack upon return! This is quite different from
;the situation with user-defined functions.
;
;take 2nd arg from BD, convert to amd format, and pass to fpp
CALL C2AMD ;17
;take 1st arg from stack, convert format, and pass to fpp
CALL FLD1 ;17
;send command, test for error, return with result in BD
MVI A,FMUL ; 7
;fall through to DOIT-ERRTEST-GETIT-AMD2C
;
;
;DOIT-GETIT starts an FPP operation, tests for error,
;gets the result & converts to c80 format
;
;send command in A to FPP
DOIT: OUT CREG ;11--send command
WAIT: IN CREG ;10
ORA A ; 4
JM WAIT ;10--until done
;11 + n*24 t cycles = 2.75 + 6n usec (X)
;
;A contains error code
ERRTEST:
ANI ERRBITS ; 7--default = test all error bits
JNZ FPERR ;10--if error found, else fall through to GETIT
;
;get top of fpp's stack into BD
GETIT:
IN DREG ;10
MOV B,A ; 4
IN DREG ;10
MOV C,A ; 4
IN DREG ;10
MOV D,A ; 4
IN DREG ;10
MOV E,A ; 4
;
;Fall through to AMD2C, which converts FPP format in BD
;to c80 float
;AMD2C is based on suggestions by J.Shook, Electronics Lab,
;Dept.of Chemistry, SUNY Stony Brook
;
AMD2C:
MOV A,C ; 4
ORA A ; 4
JP C80ZERO ;10--bit 7=0 implies value is 0
;take care of mantissa sign
XRA B ; 4--complements bit 7 of B, among other things
ANI 80H ; 7--mask out all the garbage
XRA C ; 4--hi bit of C was 1 in AMD format, so this
MOV C,A ; 4-- sets the hi bit of C to mantissa sign
;take care of exponent sign
MVI A,40H ; 7
ANA B ; 4--is exponent negative?
MOV A,B ; 4
JZ POSEXP ;10--no, set hi bit to 1 (add 128)
ANI 7FH ; 7-- yes, mask out hi bit
MOV B,A ; 4
RET ;10
;
POSEXP:
ORI 80H ;set hi bit for positive exponent
MOV B,A
RET
;format conversion delay identical whether exponent is + or -
;
C80ZERO:
XRA A
MOV B,A
RET
;short cut--no time calculated
;
;(DOIT-ERRTEST-GETIT-AMD2C takes 156 t cycles
; = 39usec, + X (for operation))
;
;41 t cycles = 10.25usec, + 46 (C2AMD) + 72 (FLD1) + 39
; + X (DOIT) = 167.25 + X usec
;
;float sqrt(x) float x;
PUBLIC sqrt
sqrt:
CALL FLOAD1
MVI A,FSQRT
JMP DOIT
;float sin(x) float x;
PUBLIC sin
sin:
CALL FLOAD1
MVI A,FSIN
JMP DOIT
;float cos(x) float x;
PUBLIC cos
cos:
CALL FLOAD1
MVI A,FCOS
JMP DOIT
;float tan(x) float x;
PUBLIC tan
tan:
CALL FLOAD1
MVI A,FTAN
JMP DOIT
;float asin(x) float x; /* arc sin in radians */
PUBLIC asin
asin:
CALL FLOAD1
MVI A,FASIN
JMP DOIT
;float acos(x) float x;
PUBLIC acos
acos:
CALL FLOAD1
MVI A,FACOS
JMP DOIT
;float atan(x) float x;
PUBLIC atan
atan:
CALL FLOAD1
MVI A,FATAN
JMP DOIT
;float log(x) float x; /* log10 */
PUBLIC log
log:
CALL FLOAD1
MVI A,FLOG
JMP DOIT
;float ln(x) float x; /* log e */
PUBLIC ln
ln:
CALL FLOAD1
MVI A,FLN
JMP DOIT
;float exp(x) float x;
PUBLIC exp
exp:
CALL FLOAD1
MVI A,FEXP
JMP DOIT
;float pwr(x,y) float x,y; /* x to the yth power */
PUBLIC pwr
pwr:
CALL FLOAD2
;
MVI A,XCHF
OUT CREG ;11--send command
PWAIT: IN CREG ;10
ORA A ; 4
JM PWAIT ;10--until done
;
MVI A,FPWR
JMP DOIT
;
;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
;briefly back to the original source for fltlib--
;
;
pophrt: POP H
RET
;
;since div10.: is called by an external routine, I left it alone.
div10.: CALL pushf.
LXI B,8420H
LXI D,0000H
CALL movfr.
fdivt: POP B
POP D
;
;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
;the fdiv: routine is replaced by fpdiv:, and muldiv: is not needed
PUBLIC fpdiv
fpdiv:
;take 2nd arg from BD, convert to amd format, and pass to fpp
CALL C2AMD ;17
;take 1st arg from stack, convert format, and pass to fpp
CALL FLD1 ;17
;swap fpp's A and B registers
MVI A,XCHF ; 7
OUT CREG ;11--send command
DWAIT: IN CREG ;10
ORA A ; 4
JM DWAIT ;10--until done
;send command, test for error, return with result in BD
MVI A,FDIV ; 7
JMP DOIT ;10
;takes 169.75usec + X plus delay for register swap
;END OF MODIFICATIONS TO FLTLIB.MAC<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
;back to the original code again at mul10.:
mul10.: CALL movrf.