RISC DISC 2

home *** CD-ROM | disk | FTP | other *** search

/ RISC DISC 2 / RISC_DISC_2.iso / pd_share / utilities / cli / pgp2 / src / s / arm next >

Wrap

Text File | 1995-06-08 | 5.3 KB | 216 lines

; multiple-precision arithmetic routines ; the ARM is a 32-bit little-endian RISC processor r0 RN 0 r1 RN 1 r2 RN 2 r3 RN 3 r4 RN 4 r5 RN 5 r6 RN 6 r7 RN 7 r8 RN 8 r9 RN 9 r10 RN 10 r11 RN 11 r12 RN 12 r14 RN 14 sp RN 13 lr RN 14 pc RN 15 AREA |A$$Code|, CODE, READONLY IMPORT global_precision gp_ DCD global_precision EXPORT _p_addc ; _p_addc ; r0,r1 point to the numbers, r2 is carry (0 or 1) ; return with number at r0 increased, new carry in r0 _p_addc STMFD sp!,{r4,r5,lr} LDR r3,gp_ ; addr of gp variable LDR r3,[r3] ; value of gp variable MOVS r3,r3,LSL#16 ; <<16 [it's a short] MOVEQ r0,r2 ; precision = 0 ??! LDMEQFD sp!,{r4,r5,pc}^ MOVS r2,r2,LSR#1 ; carry into C flag pa1 LDR r4,[r0] LDR r5,[r1],#4 ADCS r4,r4,r5 STR r4,[r0],#4 SUB r3,r3,#1<<16 TEQ r3,#0 BNE pa1 ADC r0,r3,#0 ; r3==0 here, so r0 = carry LDMFD sp!,{r4,r5,pc}^ EXPORT _p_subb ; _p_subb ; r0,r1 point to the numbers, r2 is borrow (0 or 1) ; return with number at r1 decreased, new borrow in r0 _p_subb STMFD sp!,{r4,r5,lr} LDR r3,gp_ ; addr of gp variable LDR r3,[r3] ; value of gp variable MOVS r3,r3,LSL#16 ; <<16 [it's a short] MOVEQ r0,r2 ; precision = 0 ??! LDMEQFD sp!,{r4,r5,pc}^ EOR r2,r2,#1 ; carry = 1-borrow MOVS r2,r2,LSR#1 ; carry into C flag ps1 LDR r4,[r0] LDR r5,[r1],#4 SBCS r4,r4,r5 STR r4,[r0],#4 SUB r3,r3,#1<<16 TEQ r3,#0 BNE ps1 ADC r0,r3,#0 ; r3==0 here, so r0 = carry EOR r0,r0,#1 ; borrow = 1-carry LDMFD sp!,{r4,r5,pc}^ EXPORT _p_rol ; _p_rol ; r0 points to a number, r1 is carry. ; rotate left 1 bit, return new carry _p_rol LDR r3,gp_ LDR r3,[r3] MOVS r3,r3,LSL#16 MOVEQ r0,r1 MOVEQS pc,lr MOVS r1,r1,LSR#1 ; carry into C flag pr1 LDR r2,[r0] ADCS r2,r2,r2 ; left shift 1b with carry -> b0 STR r2,[r0],#4 SUB r3,r3,#1<<16 TEQ r3,#0 BNE pr1 ADC r0,r3,#0 MOVS pc,lr EXPORT _p_smul ; _p_smul ; r0:prod r1:multiplicand r2=multiplier ; accumulate into prod _p_smul STMFD sp!,{r5-r9,lr} LDR r3,gp_ LDR r3,[r3] MOVS r3,r3,LSL#16 LDMEQFD sp!,{r5-r9,pc}^ MOV r14,#0 ; carried-over high word MOV r7,r2,LSR#16 ; high1 in r7 pm1 LDR r5,[r1],#4 ; get a digit ; now a 32x32->64 multiply... ;NBNBNB factor out splitting of r2, since it doesn't change MOV r6,r5,LSR#16 ; high0 in r6 SUB r5,r5,r6,LSL#16 ; low0 in r5 SUB r8,r2,r7,LSL#16 ; low1 in r8 MUL r9,r5,r8 ; ll in r9 MUL r8,r6,r8 ; hl in r8 MUL r5,r7,r5 ; lh in r5 MUL r6,r7,r6 ; hh in r6 ADDS r8,r5,r8 ; sum of cross terms in r8 ADDCS r6,r6,#&10000 ; bit 16 of hh ADDS r5,r9,r8,LSL#16 ; ll + low(cross) ADC r8,r6,r8,LSR#16 ; hh + high(cross) + carry ; now low in r5, high in r8 ADDS r5,r5,r14 ; add in carry ADC r8,r8,#0 LDR r14,[r0] ; digit from PROD ADDS r5,r5,r14 ; add it in. There will be at most 1 ADC r14,r8,#0 ; carry between prev add and this. STR r5,[r0],#4 ; store a digit LDR r5,[r0] SUBS r3,r3,#1<<16 BNE pm1 ADD r5,r5,r14 STR r5,[r0] ; add final carry back in LDMFD sp!,{r5-r9,pc}^ ; I really ought to implement mp_dmul in assembler too. ; It's pretty speed-critical. ; Actually if I do this my effort on mp_smul will have been ; wasted :-) ; Well, here goes... EXPORT _p_dmul ; _p_dmul ; r0 points to double-length space. r1,r2 point to bignums. _p_dmul STMFD sp!,{r4-r11,lr} ; next few instructions are nasty hack so I can just drop in ; my existing code... MOV r4,r0 MOV r0,r2 LDR r3,gp_ LDR r3,[r3] MOV r3,r3,LSL#16 MOV r3,r3,LSR#16 MOV r2,r3 ; now for the code itself mul9999 MOV r6,#0 MOV r7,r4 ; save start of answer ANDS r5,r3,#3 BEQ m99l1 ; multiple of 4 words CMP r5,#2 STR r6,[r4],#4 STRGE r6,[r4],#4 STRGT r6,[r4],#4 m99l1 BICS r5,r3,#3 BEQ m99l3 MOV r8,#0 MOV r9,#0 MOV r10,#0 m99l2 STMIA r4!,{r6,r8,r9,r10} SUBS r5,r5,#4 BGT m99l2 ; destination now has lots of 0s ; it's worth setting out the register allocation here: ; r0,r1 are digit pointers. r0 points to a[i], r1 to b[0] ; r2,r3 are lengths. r2 decrements, r3 doesn't ; r4 points to c[i] ; r5 is the digit (of a) we're multiplying by ; r6 is the carry ; r7 is the inner loop counter ("j") ; r8,9,10,11,12 are used by the 32x32 multiply ; r13 is SP ; r14 is the (unmodified) length of the first number m99l3 ;STMFD sp!,{r14} MOV r4,r7 ; restore c MOV r14,r2 ; save la m99l4 LDR r5,[r0],#4 ; f=a[i++] START OF OUTER LOOP MOV r6,#0 ; ca=0 MOV r7,#0 ; j=0 m99l5 LDR r8,[r1,r7,LSL#2] ; b[j] START OF INNER LOOP ; start of multiply routine -- mul3232 with all regs +8 MOV r10,r8,LSR#16 ; high halfwords in r10,r11 MOV r11,r5,LSR#16 SUB r8,r8,r10,LSL#16 ; low halfwords in r8,r9 SUB r9,r5,r11,LSL#16 MUL r12,r8,r9 ; ll in r12 MUL r9,r10,r9 ; hl in r9 MUL r8,r11,r8 ; lh in r8 MUL r10,r11,r10 ; hh in r10 ADDS r9,r8,r9 ; sum of cross terms in r9; now r8 spare ADDCS r10,r10,#&10000 ; insert carry ADDS r8,r12,r9,LSL#16 ; low bits in r8 ADC r9,r10,r9,LSR#16 ; high bits in r9 ; end of multiply routine. H in r9, L in r8 LDR r10,[r4,r7,LSL#2] ; c[i+j] ADDS r8,r8,r10 ; f*b[j] + c[i+j] ADC r9,r9,#0 ; including high bits ADDS r8,r8,r6 ; ... + ca ADC r6,r9,#0 ; high bits are new carry STR r8,[r4,r7,LSL#2] ; low bits are new value for c[i+j] ADD r7,r7,#1 ; ++j CMP r7,r3 ; if (j<lb) BLT m99l5 ; then continue with inner loop ; END OF INNER LOOP STR r6,[r4,r7,LSL#2] ; c[i+j]=ca ADD r4,r4,#4 ; advance &c[i] SUBS r2,r2,#1 ; finished outer loop? BGT m99l4 ; nope ; END OF OUTER LOOP LDMFD sp!,{r4-r11,pc}^ END