home *** CD-ROM | disk | FTP | other *** search
- Listing One:
- ;DeSmet function isqrt(source);
- ; ...source is a long integer (32 bit)...
- ;Returns square root of source in ax, using Newton's method; see Scanlon's
- ;8086 book for similar function (Scanlon's is not sufficiently general, and has
- ;an error)...
- ;
- ;REGISTER USAGE cx:bx ...stores copy of 32-bit source throughout...
- ; di ...``last''estimate of isqrt(source)...
- ; si ...current estimate of isqrt(source)...
- ; dx:ax ...used to divide source by di...
-
- cseg
- public isqrt_
- isqrt_: push bp
- mov bp,sp
- mov bx, [bp+4] ;Store a copy of source in cx:bx;
- mov cx, [bp+6] ;cx:bx preserved till almostdone...
- ;-----Start block to determine initial estimate; base estimate on most---------
- ;-----significant non-zero byte of cx:bx---------------------------------------
- cmp ch,0 ;Note 46341 covers largest positive
- je test_cl ;long that most compilers will pass.
- mov di,46341 ;If you use 32-bit unsigned, replace
- jmp load_si ;with 65535, and if cx>=FFFEH, jump
- ;------------ ;out and set result= 65535.
- test_cl: cmp cl,0
- je text_bh
- mov di,2896
- jmp load_si
- ;------------
- test_bh: cmp bh,0
- je its_bl
- mov di,181
- jmp load_si
- ;------------
- its_bl: mov di,8
- load_si: mov si,di
- ;-----End block to determine initial estimate----------------------------------
-
- ;-----Begin loop to refine the estimate----------------------------------------
- refine: mov dx,cx ;Load dx:ax pair with source in prep
- mov ax,bx ;for divide by di=estimate...
- div di
- ;------Block to average quotient and last estimate-------
- shr ax,1 ;We can't just add di,ax then
- adc di,0 ;shr di,1 because sum of di and ax
- shr di,1 ;may exceed 65535...
- ;-----------
- sub si,di ;Obtain difference betw. old (si)
- jz almostdone ;and new estimates; if 0, we're
- cmp si,1 ;almost done...Else if diff. is
- je almostdone ;1 or -1 we're almost done...
- cmp si,-1
- je almostdone
- mov si,di ;Store current value di in si as
- ;``old'' estimate for next iteration.
- jmp refine
- ;-----End loop to refine the estimate------------------------------------------
- almostdone: mov ax,di ;Check to see if estimate*estimate
- mul di ;is less than cx:bx; this step is
- sub bx,ax ;for fussbudgets who demand final
- sbb cx,dx ;integer sqrt be < real sqrt; ditch
- jns done ;it and save approx. 60 clocks...
- dec di ;If product >cx:bx, subtract 1...
- done: mov ax,di ;DeSmet looks for result in ax...
- mov sp,bp
- pop bp
- ret
-
- /* Test driver for isqrt()... */
-
- main()
- {
- long start,i,NumSqrts;
- printf(``ENTER start: '');
- scanf(``%D'',&start);
- printf(``ENTER NumSqrts: '');
- scanf(``%D'',&NumSqrts);
- printf(``isqrt(%D)= %u\n'',start,isqrt(start));
- putchar('\007');
- for(i=start;i<start+NumSqrts;++i)isqrt(i); /* Remove this isqrt() to find */
- putchar('\007'); /* time taken by loop itself...*/
- }
-
-
- /* Another short driver; this one better for verifying algorithm */
-
- main()
- {
- long source;
- unsigned result;
- printf(``ENTER # for sqrt; negative exits\n'');
- while(printf(``ENTER #: ''),scanf(``%D'',&source),source>=OL){
- result=isqrt(source);
- printf(``result= SQRT(%D)= %u\n'',source,result);
- printf(``result*result= %D\n'',((long)result)*((long)result));
- printf(``(result+1)*(result+1)= %D\n\n'',(result+1L)*(result+1L));
- }
- }
- -----------------------------------------------------------------------------
- Listing Two: Bit-Shifting Method (Slower)
-
- ;DeSmet function 1sqrt(); takes a long (32-bit) integer as an argument, returns
- ;a short (16-bit) integer square root. Function result returned in ax.
- ;Modified after 68000 code published in DDJ #109, Nov. 1985, p. 90. Comments
- ;give roughly analogous 68000 instructions; correspondence with 68000 registers
- ;is:
- ; D0 = sp:si (initially holds argument)
- ; D1 = dx:di (Error term)
- ; D2 = ax (Running estimate)
- ; D3 = bx (High bracket; may exceed 16 bits on last iteration)
- ; D4 = cx (Loop counter)
- ;
- ;Note sp is used as a general register, so can't push or pop between
- ;``mov bp,sp'' and ``mov sp,bp''...also, we must disable DOS's timer
- ;interrupt, because it manipulates the stack every 55 milliseconds...
-
- cseg
- public lsqrt_
-
- lsqrt_: push bp
- mov bp,sp
-
- ;------Now can address stack; 4-byte argument starts at bp+4------------------
- cli ;Clear interrupts (lock out)...
- mov si,word [bp+4] ;Place argument in sp:si= ``DO''...
- mov sp,word [bp+6]
-
- xor di,di ;Zero ``D1'' and ``D2''...
- mov dx,di
- mov ax,di
- mov cx,16 ;Loop counter cx= ``D4''...
-
- sqrt1: shl si,1
- rcl sp,1 ;``asl.l #1,D0''...
- rcl di,1 ;``roxl.l #1,D1''...
- rcl dx,1
-
- shl si,1
- rcl sp,1 ;Repeat shift and rotate...
- rcl di,1
- rcl dx,1
-
- shl ax,1 ;``asl.l #1,D2''...
-
- mov bx,ax ;``move.1 D2,D3''...
-
- shl bx,1 ;``asl.l #1,D3''...
- jc is_carry ;Jump out of loop if new ``D3''exceeds
- ;16 bits (may happen on last
- ;iteration)...
- cmp dx,0
- jb sqrt2 ;``cmp.l D3,D1''...
- ja past1 ;``bls sqrt2''...
- cmp di,bx
- jbe sqrt2
-
- past1: inc ax ;``addq.1 #1,D2''...
- inc bx ;``addq.1 #1,D3''...
-
- sub di,bx ;``sub.1 D3,D1''...
- sbb dx,0
-
- sqrt2: loop sqrt1 ;``dbra D4,sqrt1''...
-
- jmp past3 ;Skip 'is_carry' block if we finished
- ;loop through 16 iterations (no jc,
- ;``D3'' stayed <17 bits)...
- ;------------------------------------------------------------------------------
- is_carry; cmp dx,0001H ;If we got here, there was a carry
- ja past2 ;for shl bx,1 on last iteration of
- jb past3 ;loop; compare upper word ``D1''
- cmp di,bx ;against upper word ``D3'', which is
- jbe past3 ;now 0001H; then compare lower words
- ;of ``D1'' and ``D3''...
- past2: inc ax ;``addq.1 #1,D2''...
- ;------------------------------------------------------------------------------
- past3: sti ;Restore interrupts...
- mov sp,bp ;Restore frame, function result
- pop bp ;is already in ax....
- ret
-
- *************************************************************************
- Listing Three:
-
-
- * *
- * Integer Square Root (32 to 16 bit). *
- * *
- * (Exact method, not approximate). *
- * *
- * Call with: *
- * DO.L = Unsigned number. *
- * *
- * Returns: *
- * DO.L = SQRT(DO.L) *
- * *
- * Notes: Result fits in DO.W, but is valid in longword. *
- * Takes from 122 to 1272 cycles (including rts). *
- * Averages 610 cycles measured over first 65535 roots. *
- * Averages 1104 cycles measured over first 500000 roots. *
- * *
- *************************************************************************
-
- .globl lsqrt
- * Cycles
- lsqrt tst.l d0 (4) ; Skip doing zero.
- beg.s done (10/8)
- cmp.l #$10000,d0 (14) ; If is a longword, use the long routine.
- bhs.s glsqrt (10/8)
- cmp.w #625,d0 (8) ; Would the short word routine be quicker?
- bhi.s gsqrt (10/8) ; No, use general purpose word routine.
- * ; Otherwise fall into special routine.
- *
- * For speed, we use three exit points.
- * This is cheesy, but this is a speed-optimized subroutine!
-
- *************************************************************************
- * *
- * Faster Integer Square Root (16 to 8 bit). For small arguments. *
- * *
- * (Exact method, not approximate). *
- * *
- * Call with: *
- * DO.W = Unsigned number. *
- * *
- * Returns: *
- * DO.W = SQRT(DO.W) *
- * *
- * Notes: Result fits in DO.B, but is valid in word. *
- * Takes from 72 (d0=1) to 504 (d0=625) cycles *
- * (including rts). *
- * *
- * Algorithm supplied by Motorola. *
- * *
- *************************************************************************
-
- * Use the theorem that a perfect square is the sum of the first
- * sqrt(arg) number of odd integers.
-
- * Cycles
- move.w d1,-(sp) (8)
- move.w #-1,d1 (8)
- qsqrt1 addq.w #2,d1 (4)
- sub.w d1,d0 (4)
- bpl qsqrt1 (10/8)
- asr.w #1,d1 (8)
- move.w d1,d0 (4)
- move.w (sp)+,d1 (12)
- done rts (16)
-
- *************************************************************************
- * *
- * Integer Square Root (16 to 8 bit). *
- * *
- * (Exact method, not approximate). *
- * *
- * Call with: *
- * DO.W = Unsigned number. *
- * *
- * Returns: *
- * DO.L = SQRT(DO.W) *
- * *
- * Uses: D1-D4 as temporaries -- *
- * D1 = Error term; *
- * D2 = Running estimate; *
- * D3 = High bracket; *
- * D4 = Loop counter *
- * *
- * Notes: Result fits in DO.B, but is valid in word. *
- * *
- * Takes from 512 to 592 cycles (including rts). *
- * *
- * Instruction times for branch-type instructions *
- * listed as (X/Y) are for (taken/not taken). *
- * *
- *************************************************************************
-
- * Cycles
- gsqrt movem.w d1-d4,-(sp) (24)
- move.w #7,d4 (8) ; Loop count (bits-1 of result).
- clr.w d1 (4) ; Error term in D1.
- clr.w d2 (4)
- sqrt1 add.w d0,d0 (4) ; Get 2 leading bits a time and add
- addx.w d1,d1 (4) ; into Error term for interpolation.
- add.w d0,d0 (4) ; (Classical method, easy in binary).
- addx.w d1,d1 (4)
- add.w d2,d2 (4) ; Running estimate * 2.
- move.w d2,d3 (4)
- add.w d3,d3 (4)
- cmp.w d3,d1 (4)
- bls.s sqrt2 (10/8) ; New Error term > 2* Running estimate?
- addq.w #1,d2 (4) ; Yes, we want a `1' bit then.
- addq.w #1,d3 (4) ; Fix up new Error term.
- sub.w d3,d1 (4)
- sqrt2 dbra d4,sqrt1 (10/14) ; Do all 8 bit-pairs.
- move.w d2,d0 (4)
- movem.w (sp)+,d1-d4 (28)
- rts (16)
-
- *************************************************************************
- * *
- * Integer Square Root (32 to 16 bit). *
- * *
- * (Exact Method, not approximate). *
- * *
- * Call with: *
- * DO.L = Unsigned number. *
- * *
- * Returns: *
- * DO.L = SQRT(DO.L) *
- * *
- * Uses: D1-D4 as temporaries -- *
- * D1 = Error term; *
- * D2 = Running estimate; *
- * D3 = High bracket; *
- * D4 = Loop counter. *
- * *
- * Notes: Result fits in DO.W, but is valid in longword. *
- * *
- * Takes from 1080 to 1236 cycles (including rts.) *
- * *
- * Two of the 16 passes are unrolled from the loop so that *
- * quicker instructions may be used where there is no *
- * danger of overflow (in the early passes). *
- * *
- * Instruction times for branch-type instructions *
- * listed as (X/Y) are for (taken/not taken). *
- * *
- *************************************************************************
-
- * Cycles
- glsqrt movem.l d1-d4,-(sp) (40)
- moveq #13,d4 (4) ; Loop count (bits-1 of result).
- moveq #0,d1 (4) ; Error term in D1.
- moveq #0,d2 (4)
- lsqrt1 add.l d0,d0 (8) ; Get 2 leading bits a time and add
- addx.w d1,d1 (4) ; into Error term for interpolation.
- add.l d0,d0 (8) ; (Classical method, easy in binary).
- addx.w d1,d1 (4)
- add.w d2,d2 (4) ; Running estimate * 2.
- move.w d2,d3 (4)
- add.w d3,d3 (4)
- cmp.w d3,d1 (4)
- bls.s lsqrt2 (10/8) ; New Error term > 2* Running estimate?
- addq.w #1,d2 (4) ; Yes, we want a `1' bit then.
- addq.w #1,d3 (4) ; Fix up new Error term.
- sub.w d3,d1 (4)
- lsqrt2 dbra d4,lsqrt1 (10/14) ; Do first 14 bit-pairs.
-
- add.l d0,d0 (8) ; Do 15-th bit-pair.
- addx.w d1,d1 (4)
- add.l d0,d0 (8)
- addx.l d1,d1 (8)
- add.w d2,d2 (4)
- move.l d2,d3 (4)
- add.w d3,d3 (4)
- cmp.l d3,d1 (6)
- bls.s lsqrt3 (10/8)
- addq.w #1,d2 (4)
- addq.w #1,d3 (4)
- sub.l d3,d1 (8)
-
- lsqrt3 add.l d0,d0 (8) ; Do 16-th bit-pair.
- addx.l d1,d1 (8)
- add.l d0,d0 (8)
- addx.l d1,d1 (8)
- add.w d2,d2 (4)
- move.l d2,d3 (4)
- add.l d3,d3 (8)
- cmp.l d3,d1 (6)
- bls.s lsqrt4 (10/8)
- addq.w #1,d2 (4)
- lsqrt4 move.w d2,d0 (4)
- movem.l (sp)+,d1-d4 (44)
- rts (16)
-
- end
-
-
-
- *************************************************************************
- Listing Four:
-
- * *
- * Integer Square Root (32 to 16 bit). *
- * *
- * (Newton-Raphson method). *
- * *
- * Call with: *
- * DO.L = Unsigned number. *
- * *
- * Returns: *
- * DO.L = SQRT(DO.L) *
- * *
- * Notes: Result fits in DO.W, but is valid in longword. *
- * Takes from 338 cycles (1 shift, 1 division) to *
- * 1580 cycles (16 shifts, 4 divisions) (including rts). *
- * Averages 854 cycles measured over first 65535 roots. *
- * Averages 992 cycles measured over first 500000 roots. *
- * *
- *************************************************************************
-
- .globl lsqrt
- * Cycles
- lsqrt movem.l d1-d2,-(sp) (24)
- move.l d0,d1 (4) ; Set up for guessing algorithm.
- beq.s return (10/8) ; Don't process zero.
- moveq #1,d2 (4)
-
- guess cmp.l d2,d1 (6) ; Get a guess that is guaranteed to be
- bls.s newton (10/8) ; too high, but not by much, by dividing the
- add.l d2,d2 (8) ; argument by two and multiplying a 1 by 2
- lsr,l #1,d1 (10) ; until the power of two passes the modified
- bra.s guess (10) ; argument, then average these two numbers.
-
- newton add.l d1,d2 (8) ; Average the two guesses.
- lsr.l #1,d2 (10)
- move.l d0,d1 (4) ; Generate the next approximation(s)
- divu d2,d1 (140) ; via the Newton-Raphson method.
- bvs.s done (10/8) ; Handle out-of-range input (cheats!)
- cmp.w d1,d2 (4) ; Have we converged?
- bls.s done (10/8)
- swap d1 (4) ; No, kill the remainder so the
- clr.w d1 (4) ; next average comes out right.
- swap d1 (4)
- bra.s newton (10)
-
- done clr.w d0 (4) ; Return a word answer in longword.
- swap d0 (4)
- move.w d2,d0 (4)
- return movem.l (sp)+,d1-d2 (28)
- rts (16)
-
- end
- **************************************************************************
- Listing Five:
-
- Program Squareroot;
- {
- Squareroot algorithm & testprogram; DDJ March 1986, p.122
-
- Features: - sqrt routine in 68000 machine language;
- - long integer loopcount;
- }
-
- const { Iteration count for test loop }
- NNR = 6E4; { real, for printing of statistics }
- NNS = '60000'; { string, for assignment to long integer }
-
- type long = record
- low,high : integer;
- end;
-
- var finished : boolean; { flag for loop }
- number, limit : long; { loop count, loop limit }
- sqrrt, { square root result }
- sqrrto, { last displayed square root }
- t1,t2 : integer; { parameters for system time }
- time1, time2 : real; { start, end time }
-
- { --- ROUTINES FOR LONG (32-BIT) INTEGER SUPPORT --- }
-
- procedure lg_clr(var l:long); external;
- { Clears long integer l }
-
- procedure lg_asn_DU(s:string; var l:long); external;
- { Assigns the unsigned decimal string s to the long integer l }
-
- procedure lg_inc1(var l:long); external;
- { Increments long integer l by 1 }
-
- procedure lg_grt(a,b:long; var flag:boolean); external;
- { Compares long integers a and b and assign result to flag }
-
- { --- SQUAREROOT ROUTINE --- }
-
- procedure sqrt(number:long; var result:integer); external;
- { Calculates square root of 'number' and returns it in 'result' }
-
- begin { main }
-
- sqrrto := 100;
-
- lg_clr(number); { number := 0 }
- lg_asn_DU(NNS,limit); {limit := NNS }
- finished := false;
-
- write('Start...');
- time(t1,t2); time1 := t2; { get start time }
-
- while not finished do { calculate sqrt(number) }
- begin
- sqrt(number,sqrrt);
- {
- if sqrrt <> sqrrto
- then begin
- write ('Number = ',number.high:6,' | ',number.low:6);
- writeln(' --- Sqrt = ',sqrrt:4);
- sqrrto := sqrrt;
- end;
- }
- lg_inc1(number); { number := number + 1 }
- lg_grt(number,limit,finished); { finished := (number > limit) }
- end;
-
- time(t1,t2); time2 := t2; { get end time }
-
- writeln('finished !'); writeln;
- writeln('Time: ',(time2-time1)/60,' seconds');
-
- end.
- *******************************************************************************
- Listing Six:
-
- 1 *
- 2 * Squareroot algorithm; DDJ March 1986, p.122
- 3 *
- 4 * 68000 assembly language version
- 5 *
- 6 * Features: - equivalent to compiler-generated code;
- 7 *
- 8 *
- 9 * procedure sqrt(number:long; var result:integer);
- 10 *
- 11 * Calculates integer square root of 'number' and returns it in 'result';
- 12 *
- 13 *
- 14 * Register usage:
- 15 * --------------
- 16 *
- 17 * D0 : word register A0 : parameter stack pointer
- 18 * D1 : number A1 : scratch register
- 19 * D2 : guess1
- 20 * D3 : guess2
- 21 * D4 : error
- 22 *
- 23 proc sqrt,2 ;2 words of parameters
- 24 *
- 25 * Get parameters from stack
- 26 *
- 27 move.l (sp)+,a0 ;get return address
- 28 move.w 2(sp),a1 ;get ^number
- 29 move.l (a1),d1 ;get number
- 30 *
- 31 * bra Q15 ;--- for timing only
- 32 *
- 33 beq Q8 ;if number=0, skip
- 34 *
- 35 * Set initial values
- 36 *
- 37 Q7 moveq #1,d2 ;guess1 := 1
- 38 move.1 d1,d3 ;guess2 := number
- 39 *
- 40 * Do shifts until guess1 ~~~ guess2
- 41 *
- 42 Q9 move.l d2,d0 ;guess1 to work register
- 43 asl.l #1,d0 ;guess1 * 2
- 44 cmp.l d3,d0 ;compare with guess2
- 45 bge Q11 ;branch if guess1 * 2 >= guess2
- 46 asl.l #1,d2 ;guess1 := guess1 * 2
- 47 asr.l #1,d3 ;guess2 := guess2 / 2
- 48 bra Q9
- 49 *
- 50 * Now do divisions
- 51 *
- 52 Q11
- 53 Q13 add.l d3,d2 ;guess := guess1 + guess2
- 54 asr.l #1,d2 ;guess1 := (guess1+guess2)/2
- 55 move.l d1,d0 ;number to work register
- 56 divs d2,d0 ;number / guess1
- 57 move.w d0,d3 ;guess2 := number/guess1
- 58 ext.l d3 ;extend to 32 bits
- 59 move.l d2,d0 ;guess1 to work register
- 60 sub.l d3,d0 ;guess1-guess2
- 61 move.l d0,d4 ;error := guess1-guess2
- 62 ble Q14 ;if error <= 0
- 63 bra Q13 ;loop back if error > 0
- 64 Q14 move.l d2,d0 ;result := guess1
- 65 bra Q15
- 66 Q8 moveq #0,d0 ;result := 0
- 67 *
- 68 * Set result & return to caller
- 69 *
- 70 Q15 movea.w (sp)+,a1 ;get ^result
- 71 move.w d0,(a1) ;store result
- 72 *
- 73 addq.l #2,sp ;drop ^number
- 74 jmp (a0) ;return to caller
- 75 *
- 76 .nolist
-
-
- ******************************************************************************
- Listing Seven:
-
- 1 *
- 2 * Squareroot algorithm; DDJ March 1986, p.122
- 3 *
- 4 * 68000 assembly language version
- 5 *
- 6 * Features: - hand-optimized machine code;
- 7 *
- 8 *
- 9 * procedure sqrt(number:integer; var result:integer);
- 10 *
- 11 * Calculates square root of 'number' and returns it in 'result';
- 12 *
- 13 *
- 14 * Register usage:
- 15 * --------------
- 16 *
- 17 * D0 : work register, error A0 : ^result
- 18 * D1 : number A1 : ^number
- 19 * D2 : guess1,result
- 20 * D3 : guess2
- 21 * D4 : temporary storage for return address
- 22 *
- 23 *
- 24 .proc sqrt,2 ;2 words of parameters
- 25 *
- 26 * Get parameters from stack
- 27 *
- 28 moveq #0,d2 ;result := 0 --- remove for timing
- 29 *
- 30 move.l (sp)+,d4 ;get return address
- 31 move.w (sp)+,a0 ;get ^result
- 32 move.w (sp)+,a1 ;get ^number
- 33 move.l (a1),d1 ;get number
- 34 *
- 35 * bra Q15 ;--- for timing only
- 36 *
- 37 beq Q15 ;if number=0, then result=0
- 38 *
- 39 * Set initial values
- 40 *
- 41 Q7 moveq #1,d2 ;guess1 := 1
- 42 move.l d1,d3 ;guess2 := number
- 43 *
- 44 * Do shifts until guess1 ~ guess2
- 45 *
- 46 Q9 asl.l #1,d2 ;guess1 := guess1 * 2
- 47 cmp.l d3,d2 ;compare with guess2
- 48 bge Q11 ;branch if guess1 * 2 >= guess2
- 49 asr.l #1,d3 ;guess2 := guess2 / 2
- 50 bra Q9
- 51 *
- 52 Q11 asr.l #1,d2 ;adjust guess1
- 53 *
- 54 * Now do divisions
- 55 *
- 56 Q13 add.l d3,d2 ;guess1 := guess1 + guess2
- 57 asr.l #1,d2 ;guess1 := (guess1+guess2)/2
- 58 move.l d1,d3 ;guess2 := number
- 59 divs d2,d3 ;guess2 := number / guess1
- 60 ext.l d3 ;extend guess2 to 32 bits
- 61 move.l d2,d0 ;guess1 to work register
- 62 sub.l d3,d0 ;error := guess1 - guess2
- 63 bgt Q13 ;loop back if error > 0
- 64 *
- 65 * Store result and return to caller
- 66 *
- 67 Q15 move.w d2,(a0) ;store result
- 68 *
- 69 movea.l d4,a0 ;move return address to adr-reg
- 70 jmp (a0) ;return to caller
- 71 *
- 72 .nolist
-
-
- ***************************************************************************
-
- Listing Eight:
-
- 1 *
- 2 * Squareroot algorithm; DDJ November 1985, p.88
- 3 *
- 4 * 68000 assembly language
- 5 *
- 6 * procedure sqrt(number:long; var result:integer);
- 7 *
- 8 * Calculates the integer squareroot of 'number' and returns it in 'result'
- 9 *
- 10 *
- 11 * Register usage
- 12 * --------------
- 13 *
- 14 * D0 : number A0 : scratch, for pointers
- 15 * D1 : error term A1 : scratch, for pointers
- 16 * D2 : estimate
- 17 * D3 : corrector term
- 18 * D4 : loop counter
- 19 *
- 20 *
- 21 .proc sqrt,2 ;2 words of parameters
- 22 *
- 23 move.w 6(sp),a0 ;get ^number
- 24 move.l (a0),d0 ;get number into d0
- 25 *
- 26 * bra exit ;--- for timing only
- 27 *
- 28 moveq #16-1,d4 ;set loopcount,16*2 = 32 bits
- 29 moveq #0,d1 ;clear error term
- 30 moveq #0,d2 ;clear estimate
- 31 *
- 32 * Calculate squareroot
- 33 *
- 34 sqrtl asl.l #1,d0 ;get 2 leading bits,
- 35 roxl.l #1,d1 ;one at a time, into
- 36 asl.l #1,d0 ;the error term
- 37 roxl.l #1,d1
- 38 asl.l #1,d2 ;estimate * 2
- 39 move.l d2,d3
- 40 asl.l #1,d3 ;corrector = 2 * new estimate
- 41 cmp.l d3,d1
- 42 bls sqrt2 ;branch if error term <= corrector
- 43 addq.l #1,d2 ;otherwise, add low bit to estimate
- 44 addq.l #1,d3
- 45 sub.l d3,d1 ;and calculate new error term
- 46 *
- 47 sqrt2 dbra d4,sqrtl ;do all 16 bit-pairs
- 48 *
- 49 * Set result & return to caller
- 50 *
- 51 exit move.l (sp)+,a0 ;get return address
- 52 move.w (sp)+,a1 ;get ^result
- 53 move.w d2,(a1) ;store integer result
- 54 addq.l #2,sp ;drop ^number
- 55 jmp (a0) ;return to caller
- 56 *
- 57 .nolist
-
-