home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C!T ROM 2
/
ctrom_ii_b.zip
/
ctrom_ii_b
/
PROGRAM
/
PASCAL
/
TPL60N19
/
ARISOURC
/
FP48KER.ASM
< prev
next >
Wrap
Assembly Source File
|
1993-01-25
|
41KB
|
899 lines
; *******************************************************
; * *
; * Turbo Pascal Runtime Library Version 6.0 *
; * Real Kernel Routines (Add,Sub,Mul,Div,Sqr) *
; * *
; * Copyright (C) 1989-1992 Norbert Juffa *
; * *
; *******************************************************
TITLE FP48KER
INCLUDE SE.ASM
;-------------------------------------------------------------------------------
;
; Turbo Pascal REAL floating-point format
;
; 47 46 8 7 0
; +--+------------------------------+--------+
; |S | Mantissa |Exponent|
; +--+------------------------------+--------+
;
; 47 31 15 7 0
; +-------------+------------+------+--------+
; | DX | BX | AH | AL |
; +-------------+------------+------+--------+
;
; 47 31 15 7 0
; +-------------+------------+------+--------+
; | DI | SI | CH | CL |
; +-------------+------------+------+--------+
;
; value = 1^(-S) * Mantissa/2^40 * 2^(Exponent - 129)
;
;-------------------------------------------------------------------------------
CODE SEGMENT BYTE PUBLIC
ASSUME CS:CODE
; Externals
EXTRN HaltError:NEAR
; Publics
PUBLIC RealAdd,RealSub,RealMul,RealDiv
PUBLIC RealSqr,RealSqrNoChk,RealDivRev
PUBLIC RealMulNoChk,RealMulNChk2
PUBLIC RAdd,RSub,RMul,RDiv,RSqr,ROverflow
;-------------------------------------------------------------------------------
; RealAdd and RealSub are the routines for adding and subtracting two numbers
; in the Turbo Pascal 6 byte floating point format. They are practically ident-
; ical, since subtraction is implemented as an addition with a negated second
; addend. If underflow occurs, zero is returned. On overflow the carry flag
; will be set. The rounding of these routines complies with the IEEE "round to
; nearest or even" mode. Guard and sticky flags are therefore fully implemented.
;
; INPUT: DX:BX:AX first addend
; DI:SI:CX second addend
;
; OUTPUT: DX:BX:AX sum
; CF set if overflow occured, else cleared
;
; DESTROYS: AX,BX,CX,DX,SI,DI,Flags
;-------------------------------------------------------------------------------
AddExt PROC NEAR
$ret_second: XCHG AX, CX ; load second addend
MOV BX, SI ; into DX:BX:AX (DX currently loaded)
RET ; done
AddExt ENDP
RealSub PROC NEAR
XOR DI, 8000h ; negate second argument
RealSub ENDP
RealAdd PROC NEAR
CMP CL, AL ; second addend bigger ?
JAE $bigger ; yes
XCHG AX, CX ; no,
XCHG BX, SI ; exchange
XCHG DX, DI ; addends
$bigger: XCHG DX, DI ; DX = msb of second addend
NEG AL ; smaller addend zero ?
JZ $ret_second ; yes, return other addend
ADD AL, CL ; compute difference of exponents
CMP AL, 41 ; difference too big ?
JA $ret_second ; yes, add/sub will not change bigger arg
PUSH BP ; save TURBO-Pascal frame pointer
MOV BP, 0FF00h ; load mask for msb
AND BP, CX ; save msb of second addend
MOV CH, 80h ; mask for sign bit
AND CH, DH ; sign bit of second addend
PUSH CX ; save sign and exponent
XOR CX, DI ; test if operands have different sign
PUSHF ; save sign indicator
OR DH, 80h ; set implicit bit in second addend
XCHG DX, DI ; DX = msb of first addend
OR DH, 80h ; set implicit bit in first addend
XOR CX, CX ; set guard and sticky bytes to 0
IFDEF USE386
.386
XCHG CL, AL ; shift counter in CX
SHL EDX, 16
MOV DX, BX
XOR EBX, EBX
SHRD EBX, EAX, 16
XOR EAX, EAX ; mantissa in EDX:EBX:EAX
CMP CX, 32
JB $do_32
XCHG EAX, EDX
XCHG EAX, EBX
$do_32: SHRD EAX, EBX, CL
SHRD EBX, EDX, CL
SHR EDX, CL
NEG EAX
SBB CX, CX
OR CX, BX
MOV BX, DX
SHR EDX, 16
SHLD EAX, EBX, 16
.8086
ELSE
comment #
XCHG CL, AL
$try_16: CMP CL, 16
JB $try_1
OR CH, AL
OR CH, AH
MOV AX, BX
MOV BX, DX
XOR DX, DX
SUB CL, 16
JMP $try_16
$try_1: PUSH SI
NEG CH
SBB SI, SI
SHRD SI, AX, CL
SHRD AX, BX, CL
SHRD BX, DX, CL
SHR DX, CL
NEG SI
SBB CH, CH
POP SI
MOV CL, CH
ELSE
#
XCHG AL, CH ; DX:BX:AX = mantissa, CH = shift counter
$test_shift: CMP CH, 4 ; less than 4 bit shifts necessary ?
JB $bit_shift ; yes, do it one bit at a time
CMP CH, 8 ; between 4 and 7 bit shifts necessary ?
JB $4bit_shift ; yes, do 4 bit shift first
CMP CH, 16 ; between 8 and 15 bit shifts necessary ?
JB $byte_shift ; yes, do byte shift first
OR CL, AL ; accumulate
OR CL, AH ; sticky byte
XCHG AX, DX ; shift
XCHG AX, BX ; mantissa 16 bits
XOR DX, DX ; to the right
SUB CH, 16 ; decrement shift counter by 16
JMP $test_shift ; test remaining shifts
$byte_shift: OR CL, AL ; accumulate sticky byte
MOV AL, AH ; shift
MOV AH, BL ; mantissa
MOV BL, BH ; eight
MOV BH, DL ; bits
MOV DL, DH ; to the
XOR DH, DH ; right
TEST CH, 4 ; 4 bit shift possible ?
JZ $bit_shift ; no, try single bit shifts
$4bit_shift: NEG CL ; set sticky flag = FFh
SBB CL, CL ; if <> 0 before
OR CL, AL ; accumulate
AND CL, 0Fh ; sticky flag
SHR DX, 1 ; shift
RCR BX, 1 ; mantissa
RCR AX, 1 ; 1 bit to the right
SHR DX, 1 ; shift
RCR BX, 1 ; mantissa
RCR AX, 1 ; 1 bit to the right
SHR DX, 1 ; shift
RCR BX, 1 ; mantissa
RCR AX, 1 ; 1 bit to the right
SHR DX, 1 ; shift
RCR BX, 1 ; mantissa
RCR AX, 1 ; 1 bit to the right
$bit_shift: AND CH, 3 ; compute number of single bit shifts
JZ $shift_done ; no shifts necessary, mantissas aligned
NEG CL ; set sticky flag to FFh
SBB CL, CL ; if <> 0 before
ALIGN 4
$bit_loop: SHR DX, 1 ; shift
RCR BX, 1 ; mantissa
RCR AX, 1 ; 1 bit to the right
ADC CL, CL ; accumulate sticky byte
DEC CH ; decrement shift counter
JNZ $bit_loop ; until shift counter zero
ENDIF
$shift_done: POPF ; signs of addends different ?
JS $subtract ; sign of addends differ
ADD AX, BP ; add
ADC BX, SI ; mantissas
ADC DX, DI ; of two addends
MOV BP, CX ; get sticky byte
POP CX ; get exponent and sign
JNC $no_overf ; no mantissa overflow
SHR DX, 1 ; divide
RCR BX, 1 ; mantissa
RCR AX, 1 ; by two
INC CX ; adjust exponent
$no_overf: DEC CX ; exponent-1
JMP $add_sub_end ; do rounding
$ret_first: POP BP ; restore TURBO-Pascal frame pointer
RET ; done
$subtract: XCHG AX, BP ; exchange
XCHG BX, SI ; addends
XCHG DX, DI ; for correct order
NEG CX ; set carry if sticky byte <> 0
SBB AX, BP ; subtract
SBB BX, SI ; the two
SBB DX, DI ; mantissas
MOV BP, CX ; get sticky byte
POP CX ; get exponent and sign of result
JNC $no_negate ; no negative result
XOR CH, 80h ; result has other sign than 2. addend
NOT DX ; negate
NOT BX ; number
NEG AX ; in
SBB BX, -1 ; DX:BX:AX
SBB DX, -1 ; "
$no_negate: JS $no_overf ; mantissa normalized
JZ $test_z1 ; first mantissa word is zero
ALIGN 4
$shift_l: DEC CX ; adjust exponent
ADD AX, AX ; multiply
ADC BX, BX ; mantissa
ADC DX, DX ; by two
JNS $shift_l ; normalized? no
DEC CX ; exponent-1
JMP $add_sub_end ; do rounding
$test_z1: XCHG BX, AX ; do a 16-bit
XCHG DX, AX ; left shift of the mantissa
SUB CX, 16 ; adjust exponent
OR DX, DX ; first mantissa word zero?
JG $shift_l ; not zero, no sign
JS $no_overf ; mantissa normalized
XCHG DX, BX ; shift mantissa 16 bits left (AX=0)
SUB CX, 16 ; adjust exponent
OR DX, DX ; first mantissa word zero ?
JG $shift_l ; not zero, no sign
JS $no_overf ; mantissa normalized
POP BP ; mantissa zero, return DX:BX:AX=0
RET ; done
RealAdd ENDP
;-------------------------------------------------------------------------------
; RealMul multiplies two numbers in the Turbo Pascal 6 byte floating point
; format. If underflow occurs, zero is returned. On overflow the carry flag
; will be set. The routine multiplies the mantissas by computing nine partial
; products using the 80x86 MUL instruction. RealMulNoChk is the same routine
; as RealMul but does not check the operand in DI:SI:CX for zero. The fastest
; multiplication routine, RealMulNChk2, does not check either operand for zero.
; The rounding of this routine complies with IEEE "round to nearest or even"
; mode. For this purpose, guard and sticky flags are implemented.
;
; INPUT: DX:BX:AX multiplicand
; DI:SI:CX multiplicator
;
; OUTPUT: DX:BX:AX product
; CF set if overflow occured, else cleared
;
; DESTROYS: AX,BX,CX,DX,SI,DI,Flags
;-------------------------------------------------------------------------------
ALIGN 4
RealMul PROC NEAR
OR CL, CL ; multiplicator = 0 ?
JZ $zero_res ; result will be 0
RealMulNoChk PROC NEAR
OR AL, AL ; multiplicand = 0 ?
JZ $zero_res ; result is zero
RealMulNChk2 PROC NEAR
PUSH BP ; save TURBO-framepointer
XCHG BX, DI ; BX = b1, DI = a2
MOV BP, DX ; get sign of multiplicant
XOR BP, BX ; compute sign of result
AND BP, 8000h ; mask out sign bit
XCHG AL, CH ; save b3
ADD CL, CH ; sum of biased exponents
SBB CH, CH ; clear msb
NEG CH ; and put possible overflow in CH
OR CX, BP ; zap in sign bit
PUSH CX ; save new exponent and sign bit
XOR CX, CX ; clear lo-bytes of a3 and b3
OR DH, 80h ; set implicit bit of multipicand
OR BH, 80h ; set implicit bit of multiplicator
OR SI, SI ; b2 = 0 ?
JZ $test_short ; yes, test if b3 = 0
OR DI, DI ; a2 = 0 ?
JNZ $full_mult ; no, use full multiplication
OR AH, AH ; a3 = 0 ?
JNZ $full_mult ; no, use full multiplication
XCHG AH, AL ; swap a3 and b3
XCHG DI, SI ; swap a2 and b2
XCHG DX, BX ; swap a1 and b1
$test_short: OR AL, AL ; b3 = 0 ?
JNZ $full_mult ; no, use full multiplication
MOV SI, DX ; save a1
MUL BX ; b1 * a3
MOV BP, AX ; generate sticky byte = 0
XCHG AX, DX ; AX = msw of product
XCHG AX, DI ; save msw of product, get a2
MUL BX ; b1 * a2
XCHG AX, BX ; save lsw of product, get b1
XCHG DX, SI ; save msw of product, get a1
ADD BX, DI ; add product
ADC SI, CX ; to FPA
MUL DX ; b1 * a1
ADD AX, SI ; add product
ADC DX, CX ; result in DX:AX:BX
JMP $end_mantiss ; handle exponent
$zero_res: JMP $zero_prod2 ; result is 0
ALIGN 4
IFDEF USE386
.386
$full_mult: XCHG AL, CH ; CH = b3, AL = 0
; b1 = BX
; b2 = SI
; b3 = CX
; a1 = DX
; a2 = DI
; a3 = AX
SHL ESI, 16
SHRD ESI, EBX, 16 ; b1:b2 in ESI
SHL EDI, 16
SHRD EDI, EDX, 16 ; a1:a2 in EDI
MOV BX, AX ;
MUL CX ; a3*b3
SHL EBX, 16 ; a3
SHL ECX, 16 ; b3
SHL EAX, 16
SHRD EAX, EDX, 16 ; result in EAX
XCHG EAX, EBX ; save a3*b3
MUL ESI ; a3*(b1:b2)
ADD EBX, EAX
XCHG EAX, ECX ; get b3
MOV ECX, 0
ADC ECX, EDX ; ECX:EBX
MUL EDI ; b3*(a1:a2)
ADD EBX, EAX
MOV EAX, 0
ADC ECX, EDX
XCHG EAX, EDI
ADC EDI, EDI
MUL ESI
ADD EAX, ECX
ADC EDX, EDI ; EDX:EAX:EBX
NEG EBX
SBB BP, BP
OR BP, AX
SHLD EBX, EAX, 16
XCHG AX, DX
SHR EDX, 16
JMPS $end_mantiss
$sqr_end: ADC DI, DX ; to SI:DI:BX
ADC SI, 0 ; FPA
MUL CX ; a1 * b1
ADD AX, DI
ADC DX, SI ; result in DX:AX:BX
.8086
ELSEIFDEF FASTOPS
ALIGN 4
$full_mult: XCHG AL, CH ; CH = b3, AL = 0
PUSH BX ; save b1
PUSH DX ; save a1
MOV BP, DX ; save a1
; b1 = BX
; b2 = SI
; b3 = CX
; a1 = DX
; a2 = DI
; a3 = AX
MUL BX ; b1 * a3
XOR BX, BX ; clear FPA
XCHG AX, CX ; get b3, save LSW (b1*a3)
XCHG DX, BP ; get a1, save MSW (b1*a3)
MUL DX ; a1 * b3
ADD CX, AX ; add
ADC BP, DX ; result
ADC BX, BX ; to FPA
MOV AX, SI ; b2
MUL DI ; a2 * b2
ADC CX, AX
ADC BP, DX
ADC BX, 0
XOR CX, CX ; FPA = CX:BX:BP
XCHG AX, SI ; get b2
POP SI ; get a1
MUL SI ; a1 * b2
ADD BP, AX ; add
ADC BX, DX ; result
ADC CX, CX ; to FPA
XCHG AX, DI ; get a2
POP DI ; get b1
MUL DI ; a2 * b1
ADD BP, AX ; add result
XCHG AX, DI ; get a1
XCHG CX, SI ; CX = b1
MOV DI, BX ; FPA = SI:DI:BX
MOV BX, BP ;
$sqr_end: ADC DI, DX ; to SI:DI:BX
ADC SI, 0 ; FPA
MUL CX ; a1 * b1
ADD AX, DI
ADC DX, SI ; result in DX:AX:BX
ELSE
$full_mult: PUSH BX ; save b1
PUSH DI ; save a2
PUSH SI ; save b2
PUSH DX ; save a1
PUSH BX ; save b1
MOV BX, CX ; clear most significant word of FPA
XCHG AL, CH ; CH = b3, AL = 0
MOV BP, AX ; a3
MOV AL, CH ; b3
MUL AH ; a3 * b3
XCHG AX, DI ; store to FPA, get a2
MUL CX ; a2 * b3
ADD DI, AX ; add result
ADC DX, BX ; to FPA
XCHG AX, DX ; and
XCHG AX, SI ; get b2
MUL BP ; a3 * b2
ADD DI, AX ; add result
ADC SI, DX ; to
ADC BX, BX ; FPA
XCHG AX, BP ; get a3
MOV BP, DI ; generate sticky flag
XOR DI, DI ; FPA = DI:BX:SI
POP DX ; get b1
MUL DX ; a3 * b1
ADD SI, AX ; add result to
ADC BX, DX ; FPA, no overflow possible
XCHG AX, CX ; b3
POP CX ; a1
MUL CX ; a1 * b3
ADD SI, AX ; add
ADC BX, DX ; result to
ADC DI, DI ; FPA
POP AX ; b2
POP DX ; a2
PUSH DX ; save a2
PUSH AX ; save b2
MUL DX ; a2 * b2
ADD SI, AX ; add
ADC BX, DX ; result
ADC DI, 0 ; to FPA
OR BP, SI ; accumulate sticky flag
XOR SI, SI ; FPA = SI:DI:BX
POP AX ; b2
MUL CX ; a1 * b2
ADD BX, AX ; add
ADC DI, DX ; result
ADC SI, SI ; to FPA
POP AX ; a2
POP DX ; get b1
PUSH DX ; save b1
MUL DX ; a2 * b1
ADD BX, AX ; add -------+
POP AX ; get b1 !
$sqr_end: ADC DI, DX ; result <-+
ADC SI, 0 ; to FPA
MUL CX ; a1 * b1
ADD AX, DI ; add result to FPA
ADC DX, SI ; DX:AX:BX = result
ENDIF
$end_mantiss:POP CX ; CH = exponent CL = sign
XCHG AX, BX ; DX:BX:AX = result
; SUB CX, 81h ; compute new exponent-1
$div_end: ; OR DX, DX ; is mantissa normalized ?
; JS $add_sub_end ; yes
js $$1
ADD AX, AX ; no, shift
ADC BX, BX ; FPA 1 bit
ADC DX, DX ; to the left
DEC CX ; adjust exponent
$$1: sub cx, 81h
$add_sub_end:ADD AX, 80h ; round
ADC BX, 0 ; up
ADC DX, 0 ; mantissa
ADC CX, 0 ; increment exponent if mantissa overfl.
IFNDEF FASTOPS
OR AL, AL ; tie case ?
JZ $tie_case ; tie case possible if sticky = 0, too
ENDIF
$round_done: POP BP ; restore caller's frame pointer
TEST CH, 40H ; test if (exponent-1) negative
JNZ $zero_prod2 ; yes, underflow, return zero
INC CX ; new exponent
MOV AL, CL ; store exponent
AND DH, 7Fh ; force MSB of mantissa to 0
OR DH, CH ; fill in sign bit
IFDEF NOOVERFLOW
ROR CH, 1 ; test if exponent overflow
ROL CH, 1 ; restore sign flag
ELSE
SHR CH, 1 ; test if exponent overflow (> FFh)
ENDIF
RET ; done
$zero_prod2: XOR AX, AX ; load
MOV BX, AX ; a
CWD ; zero
RET ; done
$tie_case: OR BP, BP ; sticky flag = 0 (tie case) ?
JNZ $round_done ; no, round up was correct
AND AH, 0FEh ; tie case, make mantissa even
JMP $round_done ; IEEE rounding done
RealMulNChk2 ENDP
RealMulNoChk ENDP
RealMul ENDP
;-------------------------------------------------------------------------------
; RealSqr computes the square of a number in the Turbo Pascal 6-byte floating
; point format. If underflow occurs, zero is returned. On overflow the carry
; flag will be set. Since squaring allows for some optimizations in code when
; compared with normal multiplication, RealSqr is implemented as a self con-
; tained routine and not as a call to RealMul. The routine exits thru RealMul.
; RealSqrNoChk does not check the argument for zero before squaring. Rounding
; complies with the IEEE "round to nearest or even" mode, so guard and sticky
; flags are provided.
;
; INPUT: DX:BX:AX argument
;
; OUTPUT: DX:BX:AX square of argument
; CF set if overflow occured, else cleared
;
; DESTROYS: AX,BX,CX,DX,SI,DI,Flags
;-------------------------------------------------------------------------------
RealSqr PROC NEAR
OR AL, AL ; argument = 0 ?
JZ $zero_prod2 ; result is zero
RealSqrNoChk PROC NEAR
XOR CX, CX ; clear register
XCHG CL, AL ; exponent in CL, AL = 0
ADD CX, CX ; new exponent, sign always positive (0)
PUSH BP ; save TURBO-Pascal frame pointer
PUSH CX ; save sign and exponent
OR DH, 80h ; set implicit bit of argument
MOV SI, AX ; a2 and
OR SI, BX ; a3 = 0 ?
JNZ $full_sqr ; no, do full multiplication
MOV AX, DX ; load a1
MUL DX ; a1 * a1
or dx, dx
JMPS $end_mantiss ; result in DX:AX:BX
ALIGN 2
$full_sqr: PUSH BX ; save a2
XOR DI, DI ; load zero
MOV CX, DX ; save a1
MOV BP, AX ; save a3
MOV AL, AH ; load a3
MUL AL ; a3 * a3
XCHG AX, BX ; save product, get a2
MUL BP ; a2 * a3
XCHG AX, BP ; get a3, BP = save lo-word a2*a3
MOV SI, DX ; save hi-word a2*a3
ADD BX, BP ; add a3*a3 to
ADC SI, DI ; a2*a3 (result in SI:BX, no overflow)
ADD BP, BX ; add a2*a3 lo-word to result
MOV BX, DI ; BX = 0
ADC SI, DX ; add a2*a3 hi-word
ADC DI, DI ; to result (DI:SI:BP)
XCHG DI, BX ; FPA = DI:BX:SI, BP = sticky byte
MUL CX ; a1 * a3
ADD SI, AX ; add product
ADC BX, DX ; to FPA (no overflow possible)
ADD SI, AX ; add
ADC BX, DX ; product to
ADC DI, DI ; FPA another time
POP AX ; get a2
PUSH AX ; save a2
MUL AX ; a2 * a2
ADD SI, AX ; add
ADC BX, DX ; product to
ADC DI, 0 ; FPA
OR BP, SI ; accumulate sticky byte
XOR SI, SI ; FPA = SI:DI:BX
POP AX ; get a2
MUL CX ; a1 * a2
ADD BX, AX ; add
ADC DI, DX ; resulting
ADC SI, SI ; product
ADD BX, AX ; to FPA twice
MOV AX, CX ; AX = CX = a1
JMP $sqr_end ; exit thru REAL_MUL
RealSqrNoChk ENDP
RealSqr ENDP
;-------------------------------------------------------------------------------
; RealDiv divides two numbers in the Turbo Pascal 6 byte floating point
; format. If underflow occurs, zero is returned. On overflow the carry flag
; will be set. The routine exits through the REAL_MUL routine. It makes use
; of the 80x86 DIV instruction in an estimate and correct algorithm. In each
; of the three steps, an estimation of a part of the quotient is produced by
; dividing the first 32 bits of the current remainder by the first 16 bits of
; the divisor using a machine instruction. Then the divisor is multiplied by
; the result and this product subtracted from the current remainder. If the sum
; is negative, the partial quotient must be decremented until the new remainder
; is positive. RealDivRev is an additional routine which exchanges the operands
; before performing the division. The rounding provided complies with IEEE
; "round to nearest or even" mode. For this purpose, guard and sticky flags
; are implemented.
;
; INPUT: DX:BX:AX dividend
; DI:SI:CX divisor
;
; OUTPUT: DX:BX:AX quotient
; CF set if overflow occured, else cleared
;
; DESTROYS: AX,BX,CX,DX,SI,DI,Flags
;-------------------------------------------------------------------------------
RealDivRev PROC NEAR
XCHG AX, CX ; exchange
XCHG BX, SI ; divisor and
XCHG DX, DI ; dividend
RealDivRev ENDP
RealDiv PROC NEAR
OR AL, AL ; dividend = 0 ?
JZ $zero_prod2 ; result is zero
PUSH BP ; save TURBO-Pascal framepointer
MOV BP, DX ; get msw of dividend
XOR BP, DI ; xor with msw of divisor to make sign
AND BP, 8000h ; isolate sign bit of result
OR DH, 80h ; set implicit bit in dividend
XCHG DX, DI ; DX = divisor msw, DI = dividend msw
OR DH, 80h ; set implicit bit in divisor
SUB AL, CL ; subtract exponents ----------+
MOV CL, 0 ; clear lsb of divisor lsw |
PUSH SI ; save divisor middle word |
PUSH CX ; and lsw on stack |
MOV CX, BP ; get sign |
XCHG AL, CL ; AL = 0, CL = new exponent |
SBB CH, AL ; put carry here <-------------+
add cx,101h
MOV BP, SP ; access divisor on stack via BP
SUB BP, 6 ; leave room for three pushes
SHR DI, 1 ; divide dividend
RCR BX, 1 ; by 2 to prevent
RCR AX, 1 ; an overflow condition
IFDEF FASTOPS ; di:bx:ax = dividend
; dx:[bp+8]:[bp+6] = divisor
PUSH CX ; save sign and exponent
XCHG AX, BX ; di:ax:bx = dividend
XCHG DX, DI ; dx:ax:bx = dividend, di:[bp+8]:[bp+6]
DIV DI ; AX = quotient1, DX = remainder msw
MOV CX, AX ; save quotient 1
MOV SI, DX ; save remainder = SI:BX
MUL WORD PTR [BP+8] ; quotient1 * divisor middle word
SUB BX, AX ;
SBB SI, DX ;
SBB DX, DX ; DX:SI:BX = remainder
PUSH DX ; save msw of divisor
MOV AX, [BP+6] ; divisor msw
MUL CX ;
NEG AX ;
SBB BX, DX ;
SBB SI, 0 ;
POP DX ;
SBB DX, 0 ; DX:SI:BX:AX = remainder, CX = quot 1
MOV DX, [BP+6]
MOV BP, [BP+8]
JZ $sub_ok ;
$add_twice: DEC CX ;
ADD AX, DX ;
ADC BX, BP ;
ADC SI, DI ;
JNC $add_twice ; until remainder positive
$sub_ok: MOV DX, SI ; SI:BX:AX = remainder
XCHG AX, BX ; DX:AX:BX = remainder
PUSH CX ; save quotient 1
XOR SI, SI
CMP DI, DX ; division overflow ?
JE $div_overfl
DIV DI ;
$cont: PUSH AX ; save quot2
MOV CX, DX ; CX:BX = remainder
MUL BP ;
SUB BX, AX
SBB CX, DX
SBB SI, 0
POP SI ; quot2
JNC $sub_ok2 ;
$add_twice2: DEC SI
ADD BX, BP
ADC CX, DI
JNC $add_twice2
$sub_ok2: MOV DX, CX
MOV AX, BX
CMP DX, DI
JE $div_overfl2
DIV DI
$cont2: POP DX ; get quotient 1
MOV BX, SI ; quotient = DX:BX:AX
ELSE
ALIGN 4
$divide_loop:PUSH CX ; save sign & exponent resp. part. quot.
MOV CX, DX ; get msw of divisor
XCHG AX, BX ; create new dividend
XCHG AX, DI ; by shifting remainder
XCHG AX, SI ; 16 bits to the left
CMP CX, SI ; overflow possible on division ?
JE $div_overfl ; yes
MOV DX, SI ; get msw of dividend
XCHG AX, DI ; second word of dividend
DIV CX ; compute partial quotient
XOR SI, SI ; subtract product of divisor high word
MOV DI, DX ; and partial quotient from dividend
$comp_rem: XCHG AX, CX ; AX = divisor high word, CX = quotient
PUSH AX ; save divisor high word
MOV AX, [BP+8] ; get middle word of divisor
MUL CX ; multiply by partial quotient
SUB BX, AX ; subtract the product of
SBB DI, DX ; divisor middle word and partial
SBB SI, 0 ; quotient from dividend
MOV AX, [BP+6] ; get lsw of divisor
MUL CX ; multiply by partial quotient
NEG AX ; subtract the product
SBB BX, DX ; of divisor LSW
SBB DI, 0 ; and partial
SBB SI, 0 ; quotient from dividend
POP DX ; get back msw of divisor
JZ $sub_ok ; remainder must be positive
$add_twice: DEC CX ; quotient to high, decrement it
ADD AX, [BP+6] ; adjust
ADC BX, [BP+8] ; quotient and
ADC DI, DX ; remainder
JNC $add_twice ; until remainder positive
$sub_ok: CMP BP, SP ; two partial quotients saved already ?
JNE $divide_loop ; no, continue (carry set !!!)
MOV BP, AX ; accumulate
OR BP, BX ; sticky
OR BP, DI ; byte
XCHG AX, CX ; get last partial quotient
POP BX ; get other
POP DX ; partial quotients
ENDIF
POP CX ; get sign and exponent
ADD SP, 4 ; remove saved divisor from stack
; ADD CX, 80h ; adjust new exponent for bias
; add cx, 101h
or dx, dx
JMP $div_end ; normalize mantissa and round
IFDEF FASTOPS
$div_overfl: MOV DX, AX
ADD DX, DI
ADC SI, SI
MOV AX, -1
JMP $cont
$div_overfl2:
MOV AX, -1
JMP $cont2
ELSE
$div_overfl: XOR SI, SI ; remainder - 10000h * divisor
ADD DI, CX ; remainder -
ADC SI, SI ; FFFFh * divisor
MOV AX, -1 ; quotient = FFFFh
JMP $comp_rem ; continue computation of remainder
ENDIF
RealDiv ENDP
ALIGN 4
RAdd PROC FAR
CALL RealAdd ; perform addition
JC ROverflow ; overflow error
RET ; done
RAdd ENDP
ALIGN 4
RSub PROC FAR
CALL RealSub ; perform subtraction
JC ROverflow ; overflow error
RET ; done
RSub ENDP
ALIGN 4
RSqr PROC FAR
CALL RealSqr ; perform squaring
JC ROverflow ; overflow error
RET ; done
RSqr ENDP
ALIGN 4
RMul PROC FAR
CALL RealMul ; perform multiplication
JC ROverflow ; overflow error
RET ; done
RMul ENDP
ALIGN 4
RDiv PROC FAR
OR CL, CL ; divisor zero ?
JZ RDivZero ; yes, error
CALL RealDiv ; perform division
JC ROverflow ; overflow error
RET ; done
RDiv ENDP
IFDEF NOOVERFLOW
ROverflow: MOV AX, 0FFFFh ; load
MOV BX, 0FFFFh ; largest
MOV DX, 07FFFh ; REAL number
OR DH, CH ; stuff in sign bit
RETF ; done
RDivZero: MOV CH, DH ; get dividend's sign
XOR CX, DI ; make sign of result
JMP ROverflow ; return largest REAL number
ELSE
ROverflow: MOV AX, 0CDh ; error code 205 (fp overflow)
JMP HaltError ; execute error handler
RDivZero: MOV AX, 0C8h ; error code 200 (division by zero)
JMP HaltError ; execute error handler
ENDIF
ALIGN 4
CODE ENDS
END