The C Users' Group Library 1994 August

home *** CD-ROM | disk | FTP | other *** search

/ The C Users' Group Library 1994 August / wc-cdrom-cusersgrouplibrary-1994-08.iso / vol_200 / 247_01 / bnmuldv.any < prev next >

Wrap

Text File | 1989-04-19 | 12KB | 378 lines

/* * MIRACL routine muldiv * bnmuldv.c * * calculates (a*b+c)/m and (a*b+c)%m as quickly as * possible. Should ideally be written in assembly * language of target machine for speed and/or to allow * largest possible MAXBASE. See mirdef.h. The problem is * to avoid overflow in the calculation of the intermediate * product a*b+c. Note that this routine needs only work * for UNSIGNED parameters * * First are shown two straight-forward portable approaches, * which needs a long data type capable of holding * the product of two int types (i.e. twice as long). * The second method uses the 'ldiv' function which * is defined in the new ANSI standard for C. If you are * using a C compiler which complies with this standard, this * should be faster, as it calculates quotient and remainder * simultaneously (somebody up there heard me!). * * This is followed by various other assembly language * implementations for popular processors, computers and * compilers. * * Since parameter passing and returning is time-consuming, * this routine should be generated 'inline', if compiler * allows it. * int muldiv(a,b,c,m,rp) int a,b,c,m,*rp; { long p; int q; p=(long)a*b+c; q=p/m; *rp=p-(long)q*m; return q; } #include <stdlib.h> int muldiv(a,b,c,m,rp) int a,b,c,m,*rp; { ldiv_t r; r=ldiv((long)a*b+c,(long)m); *rp=(int)r.rem; return (int)r.quot; } ; ; VAX11 version for Dec C compiler ; with 32 bit int using 64-bit quadword ; for the intermediate product. ; .entry muldiv,0 subl #4,sp emul 4(ap),8(ap),12(ap),r0 ;a*b+c ediv 16(ap),r0,r0,@20(ap) ;quo. in r0, rem. in *rp ret .end ; ; IBM-PC versions - small memory model only ; Easily modified for other memory models ; ; Microsoft C compiler V4.0+ ; Written for MS macro-assembler ; ASSUME CS:_TEXT _TEXT SEGMENT BYTE PUBLIC 'CODE' PUBLIC _muldiv _muldiv PROC NEAR push bp ;standard C linkage mov bp,sp mov ax,[bp+4] ;get a mul WORD PTR [bp+6] ;multiply by b add ax,[bp+8] ;add c to low word adc dx,0h ;add carry to high word div WORD PTR [bp+10] ;divide by m mov bx,[bp+12] ;get address for remainder mov [bx],dx ;store remainder pop bp ;standard C return ret ;quotient in ax _muldiv endP _TEXT ENDS END /* * Turbo C compiler V1.0. Uses inline assembly feature * Generates code identical to above Microsoft version */ int muldiv(a,b,c,m,rp) int a,b,c,m,*rp; { asm mov ax,a ;/* get a */ asm mul WORD PTR b ;/* multiply by b */ asm add ax,c ;/* add c to low word */ asm adc dx,0h ;/* add carry to high word */ asm div WORD PTR m ;/* divide by m */ asm mov bx,rp ;/* get address for remainder */ asm mov [bx],dx ;/* store remainder */ return (_AX); } /* Replace last two asm lines when using large data memory models */ /* asm les bx, DWORD PTR rp ; get address for remainder */ /* asm mov WORD PTR es:[bx],dx ; store remainder */ ; ; Zorland C compiler Version 1.1 (Lattice C compatible) ; Note that later versions of Zortech C (and C++) use the MS version ; Written for MS macro-assembler ; PGROUP GROUP PROG PROG SEGMENT BYTE PUBLIC 'PROG' ASSUME CS:PGROUP PUBLIC muldiv muldiv PROC NEAR push bp ;standard C linkage mov bp,sp mov ax,[bp+4] ;get a mul WORD PTR [bp+6] ;multiply by b add ax,[bp+8] ;add c to low word adc dx,0h ;add carry to high word div WORD PTR [bp+10] ;divide by m mov bx,[bp+12] ;get address for remainder mov [bx],dx ;store remainder pop bp ;standard C return ret ;quotient in ax muldiv endP PROG ENDS END ; ; Aztec C compiler V3.4 ; CODESEG SEGMENT BYTE PUBLIC 'CODESEG' ASSUME CS:CODESEG PUBLIC muldiv_ muldiv_ PROC NEAR push bp ;standard C linkage mov bp,sp mov ax,[bp+4] ;get a mul WORD PTR [bp+6] ;multiply by b add ax,[bp+8] ;add c to low word adc dx,0h ;add carry to high word div WORD PTR [bp+10] ;divide by m mov bx,[bp+12] ;get address for remainder mov [bx],dx ;store remainder pop bp ;standard C return ret ;quotient in ax muldiv_ endP CODESEG ENDS END / / Mark Williams C compiler V2.0 / .globl muldiv_ muldiv_: push bp /standard C linkage mov bp,sp mov ax,4(bp) /get a mul 6(bp) /multiply by b add ax,8(bp) /add c to low word adc dx,$0x00 /add carry to high word div 10(bp) /divide by m mov bx,12(bp) /get address for remainder mov (bx),dx /store remainder pop bp /standard C return ret /quotient in ax ; ; IBM-PC-8087 for Microsoft C compiler V4.0+ ; with 'small' defined as 'long' (32 bit). See mirdef.h ; This allows IBM-PC to look like 32-bit computer ; (which it isn't). To make use of this option: ; ; (1) Must have 8087 Maths Co-processor (for speed and to hold 64-bit ; intermediate product). ; ; (2) Must use 'ANSI' enhanced type C compiler, e.g. Microsoft V3.0+ ; and must use header 'miracl.h' which declares function ; parameter types. Changes will be necessary in 'mirdef.h' ; ; Note: many compilation warnings may be generated - ignore them. ; ; Note: This is NOT, in most cases, faster, but it does allow ; very high precision calculations, e.g. 1000! ; ASSUME CS:_TEXT _TEXT SEGMENT BYTE PUBLIC 'CODE' PUBLIC _MULDIV _muldiv PROC NEAR push si ;standard C linkage push bp mov bp,sp finit ;initialise 8087 fild DWORD PTR [bp+6] ;get a fimul DWORD PTR [bp+0ah];multiply by b fiadd DWORD PTR [bp+0eh];add c fild DWORD PTR [bp+12h];get m fld st(1) ;duplicate a*b+c on stack fprem ;get remainder fist DWORD PTR [bp+0ah];store remainder in b fsubr st,st(2) ;subtract rem from total fdiv st,st(1) ;divide by m fist DWORD PTR [bp+6] ;store quotient in a wait mov si,[bp+22] ;get address for remainder mov ax,[bp+10] mov dx,[bp+12] ;get remainder mov [si],ax mov [si+2],dx ;store remainder mov ax,[bp+6] mov dx,[bp+8] ;get quotient in dx:ax pop bp ;standard C return pop si ret _muldiv endP _TEXT ENDS END ; ; IBM-80386 version - for Microsoft C V5.0+ ; Written for MS macro-assembler V5.0+ by Andrej Sauer ; Same comments apply as above (except for 8087 requirement) ; .386 ASSUME CS:_TEXT _TEXT SEGMENT USE16 PUBLIC 'CODE' PUBLIC _MULDIV _muldiv PROC NEAR push bp ;standard C linkage mov ebp,esp mov eax,[ebp+4] ;get a mul DWORD PTR [ebp+8] ;multiply by b add eax,DWORD PTR [ebp+12] ;add c to low word adc edx,0h ;add carry to high word div DWORD PTR [ebp+16] ;divide by m movzx ebx,WORD PTR [ebp+20] ;get address for remainder, zero high mov [ebx],edx ;store remainder shld edx,eax,16 ;shift higher half of quotient ;into lower half of edx pop bp ;standard C return ret ;quotient: high bits in dx, lows in ax _muldiv endP _TEXT ENDS END int muldiv(a,b,c,m,rp) int a,b,c,m,*rp; { asm { ; ; MACintosh version for Megamax C compiler ; with 16-bit int, 68000 processor ; move a(A6),D1 ;get a mulu b(A6),D1 ;multiply by b move c(A6),D0 ;get c ext.l D0 ;extend to longword add.l D0,D1 ;D1 contains a*b+c divu m(A6),D1 ;divide by m move D1,D0 ;return with quotient in D0 swap D1 ;get remainder move.l rp(A6),A0 ;get address for remainder move D1,(A0) ;store remainder } } int muldiv(a,b,c,m,rp) int a,b,c,m,*rp; { asm { ; ; 32016 processor version for BBC Master Scientific ; with 32-bit int, by Dudley Long, Rutherford-Appleton Labs. ; movd a,0 ;move a to R0 meid b,0 ;multiply by b, result extended addd c,0 ;add c to extended number in R0 & R1 addcd #0,1 deid m,0 ;divide by m movd 0,0(rp) ;remainder to *rp movd 1,0 ;quotient returned in R0 } } ; ; Acorn ARM Risc version (32-bit) for Archimedes micro ; unsigned only ; Wingpass Macro Assembler ; .INCLUDE "A.REGNAMES" .AREA C$$code, .CODE, .READONLY muldiv:: MOV ip, sp ;standard linkage STMFD sp!, {v1-v4} CMPS a2,#0x40000000 ;check for b=MAXBASE MOVEQ v3,a1,LSL #30 ;this idea is quicker because MOVEQ v4,a1,LSR #2 ;of ARM barrel shifting capability BEQ addin MOV v1,a1,LSR #16 ;do it the hard way MOV v2,a2,LSR #16 BIC a1,a1,v1,LSL #16 BIC a2,a2,v2,LSL #16 MUL v3,a1,a2 ;form partial products of a*b MUL v4,v1,v2 SUB v1,v1,a1 SUB v2,a2,v2 MLA v1,v2,v1,v3 ;look - only 3 MULs! ADD v1,v1,v4 ADDS v3,v3,v1,LSL #16 ADC v4,v4,v1,LSR #16 addin: ADDS v3,v3,a3 ;now add in c ADCCS v4,v4,#0 CMPS a4,#0x40000000 ;check for m=MAXBASE MOVEQ a1,v3,LSR #30 ADDEQ a1,a1,v4,LSL #2 BICEQ v4,v3,#0xC0000000 BEQ leave MOV a1,#0 ;do long division by m divlp: .REPEAT 32 ;2xfaster than a loop! MOVS v3,v3,ASL #1 ;get next bit into carry ADC v4,v4,v4 ;accumulate remainder CMPS v4,a4 SUBCS v4,v4,a4 ADC a1,a1,a1 ;accumulate quotient .ENDREPEAT leave: LDR v3,[ip] STR v4,[v3] ;store remainder LDMFD sp!, {v1-v4} MOVS pc,lr */