Virtual Reality Homebrewer's Handbook

home *** CD-ROM | disk | FTP | other *** search

/ Virtual Reality Homebrewer's Handbook / vr.iso / vr386 / sqrti.asm < prev next >

Wrap

Assembly Source File | 1996-03-19 | 7KB | 441 lines

TITLE SQRTI - INTEGER SQRT AND MAGNITUDE COMMENT $ /* Routines for integer sqrt and magnitude for REND386 */ // All code by Dave Stampe, last updated 23/12/93 // These routines are (c) 1993 by Dave Stampe /* This code is part of the VR-386 project, created by Dave Stampe. VR-386 is a desendent of REND386, created by Dave Stampe and Bernie Roehl. Almost all the code has been rewritten by Dave Stampre for VR-386. Copyright (c) 1994 by Dave Stampe: May be freely used to write software for release into the public domain or for educational use; all commercial endeavours MUST contact Dave Stampe (dstampe@psych.toronto.edu) for permission to incorporate any part of this software or source code into their products! Usually there is no charge for under 50-100 items for low-cost or shareware products, and terms are reasonable. Any royalties are used for development, so equipment is often acceptable payment. ATTRIBUTION: If you use any part of this source code or the libraries in your projects, you must give attribution to VR-386 and Dave Stampe, and any other authors in your documentation, source code, and at startup of your program. Let's keep the freeware ball rolling! DEVELOPMENT: VR-386 is a effort to develop the process started by REND386, improving programmer access by rewriting the code and supplying a standard API. If you write improvements, add new functions rather than rewriting current functions. This will make it possible to include you improved code in the next API release. YOU can help advance VR-386. Comments on the API are welcome. CONTACT: dstampe@psych.toronto.edu */ $ .MODEL large .CODE INTMATH ; one iteration of 32->16 sqrt SQRT32 MACRO LOCAL skip shld edx,eax,2 ;; get 2 bits of input to error shl eax,2 add ebx,ebx ;; estimate*2 mov ecx,ebx ;; temp = est*2 add ecx,ecx cmp edx,ecx ;; error>2*est? jle skip inc ebx ;; yes, update for new bit added inc ecx sub edx,ecx skip: ENDM ;long squareroot16(long arg) ; takes root of 32 bit number to 16 bit result ; about 220 clocks worst case: ; 3 us on 486/66 and 10 us on 386/25 larg equ DWORD PTR [bp+8] PUBLIC _squareroot32 _squareroot32 proc far .386 push ebp mov ebp,esp push esi push ecx xor edx,edx xor ebx,ebx mov eax,DWORD PTR larg test eax,0FFFF0000h ; can we cut it in half? jne hasupper shl eax,16 ; yes, so prescale test eax,0FF000000h ; half again? jne do16 shl eax,8 ; yes, prescale jmp do8 ; do 8 loops hasupper: test eax,0FF000000h ; half again? jne do32 shl eax,8 jmp do24 do32: SQRT32 SQRT32 SQRT32 SQRT32 do24: SQRT32 SQRT32 SQRT32 SQRT32 do16: SQRT32 SQRT32 SQRT32 SQRT32 do8: SQRT32 SQRT32 SQRT32 SQRT32 mov eax,ebx shld edx,eax,16 ; returns in both eax and dx:ax pop ecx pop esi mov esp,ebp pop ebp ret _squareroot32 endp ; one iteration of 62->31 sqrt SQRT64 MACRO LOCAL skip shld edi,edx,2 shld edx,eax,2 ; get 2 bits of input to error shl eax,2 add ebx,ebx ; estimate*2 mov ecx,ebx ; temp = est*2 add ecx,ecx cmp edi,ecx ; error>2*est? jle skip inc ebx ; yes, update for new bit added inc ecx sub edi,ecx skip: ENDM ;long squareroot62(long hiarg, long loarg) ; takes root of 62 bit number to 31 bit result ; about 500 clocks worst case: ; 8 us on 486/66 and 20 us on 386/25 hiarg equ DWORD PTR [bp+8] loarg equ DWORD PTR [bp+12] PUBLIC _squareroot62 _squareroot62 proc far .386 push ebp mov ebp,esp mov edx,DWORD PTR hiarg mov eax,DWORD PTR loarg or edx,edx jne dohigh push eax ; can use short root! call _squareroot32 sub esp,4 mov esp,ebp pop ebp ret dohigh: push ecx ; have to do 2 dwords push esi push edi xor edi,edi xor ebx,ebx shld edx,eax,2 ; prescale for 62 bits in 64 bit word shl eax,2 test edx,0FFFF0000h ; can we cut it in half? jne hashigh shld edx,eax,16 ; yes, so prescale shl eax,16 test edx,0FF000000h ; half again? jne do48 ; no, do 48 loops shld edx,eax,8 shl eax,8 ; yes, prescale jmp do40 ; do 40 loops hashigh: test edx,0FF000000h ; half again? jne do64 shld edx,eax,8 shl eax,8 jmp do56 do64: SQRT64 SQRT64 SQRT64 SQRT64 do56: SQRT64 SQRT64 SQRT64 SQRT64 do48: SQRT64 SQRT64 SQRT64 SQRT64 do40: SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 SQRT64 ; one missing because of prescale mov eax,ebx pop edi pop esi pop ecx shld edx,eax,16 ; returns in both eax and dx:ax mov esp,ebp pop ebp ret _squareroot62 endp ;long magnitude32(long x, long y, long z) ; computes overall magnitude of vector ; no scaling or shortcuts: does 3x32-bit multiplies ; time: worst case of 650 clocks, best case of 200 ; 3 to 10 us on 486/66, 8 to 25 us on 386/25 x equ DWORD PTR [bp+8] y equ DWORD PTR [bp+12] z equ DWORD PTR [bp+16] PUBLIC _magnitude32 _magnitude32 proc far .386 push ebp mov ebp,esp push ecx mov eax,x ; sum of squares imul x mov ebx,eax mov ecx,edx mov eax,y imul y add ebx,eax adc ecx,edx mov eax,z imul z add ebx,eax adc ecx,edx push ebx ; square root push ecx call _squareroot62 add esp,8 pop ecx mov esp,ebp pop ebp ret _magnitude32 endp ;long magnitude16(int x, int y, int z) ; computes overall magnitude of vector ; no scaling or shortcuts: does 3x16-bit multiplies ; time: worst case of 300 clocks, best case of 150 ; 2 to 5 us on 486/66, 6 to 12 us on 386/25 x equ WORD PTR [bp+8] y equ WORD PTR [bp+10] z equ WORD PTR [bp+12] PUBLIC _magnitude16 _magnitude16 proc far .386 push ebp mov ebp,esp push ecx mov ax,x ; sum of squares imul x mov bx,ax mov cx,dx mov ax,y imul y add bx,ax adc cx,dx mov ax,z imul z add bx,ax adc cx,dx push cx ; square root push bx call _squareroot32 add esp,4 pop ecx mov esp,ebp pop ebp ret _magnitude16 endp ;void set_vector_length32(long length, long *xp, long *yp, long *zp) ; sets overall magnitude of vector length equ DWORD PTR [bp+8] xp equ DWORD PTR [bp+12] yp equ DWORD PTR [bp+16] zp equ DWORD PTR [bp+20] PUBLIC _set_vector_magnitude32 _set_vector_magnitude32 proc far .386 push ebp mov ebp,esp sub esp,20 push ecx push edi push esi les bx,xp ; compute magnitude push DWORD PTR es:[bx] les bx,yp push DWORD PTR es:[bx] les bx,zp push DWORD PTR es:[bx] call _magnitude32 add esp,12 mov esi,eax or eax,eax je zero_magnitude les bx,xp ; scale each part mov eax, es:[bx] imul length idiv esi les bx,xp mov es:[bx],eax les bx,yp mov eax, es:[bx] imul length idiv esi les bx,yp mov es:[bx],eax les bx,zp mov eax, es:[bx] imul length idiv esi les bx,zp mov es:[bx],eax zero_magnitude: pop esi pop edi pop ecx mov esp,ebp pop ebp ret _set_vector_magnitude32 endp end