RISC DISC 2

home *** CD-ROM | disk | FTP | other *** search

/ RISC DISC 2 / RISC_DISC_2.iso / pd_share / utilities / cli / pgp2 / src / s / ideaARM < prev next >

Wrap

Text File | 1995-06-07 | 5.1 KB | 258 lines

; ARM code version of IDEA stuff, (1) for speed and (2) because ; Cv5 actually compiles ideaExpandKey *wrongly*. ; Unfortunately IDEA assumes MSB first, so we have to reverse everything. ; Please see the note at the start of ideaExpandKey regarding ; alignment. IdeaRounds EQU 8 inbuf RN 0 outbuf RN 1 key RN 2 round RN 3 x1 RN 4 x2 RN 5 x3 RN 6 x4 RN 7 t1 RN 8 t2 RN 9 s2 RN 10 s3 RN 11 sp RN 13 lr RN 14 pc RN 15 r0 RN 0 r1 RN 1 r2 RN 2 r3 RN 3 r4 RN 4 r5 RN 5 r6 RN 6 r7 RN 7 r8 RN 8 ip RN 12 AREA |A$$Code|, CODE, READONLY EXPORT ideaCipher EXPORT ideaExpandKey ; r0 -> inbuf (8 bytes) ; r1 -> outbuf (8 bytes) ; r2 -> key ideaCipher STMFD sp!,{x1-s3,lr} ; change if reg nums change!!!! MOV round,#IdeaRounds MOV s2,#&FF LDR t1,[inbuf],#4 AND x1,s2,t1 ; 1H AND t2,s2,t1,LSR#8 ; 1L ADD x1,t2,x1,LSL#8 ; x1 AND x2,s2,t1,LSR#16 ; 2H AND t2,s2,t1,LSR#24 ; 2L ADD x2,t2,x2,LSL#8 ; x1 LDR t1,[inbuf] AND x3,s2,t1 ; 3H AND t2,s2,t1,LSR#8 ; 3L ADD x3,t2,x3,LSL#8 ; x3 AND x4,s2,t1,LSR#16 ; 4H AND t2,s2,t1,LSR#24 ; 4L ADD x4,t2,x4,LSL#8 ; x4 ic1 ; start of DO loop ; MUL(x1,*key++) LDR t1,[key],#2 MOV t1,t1,LSL#16 MOVS t1,t1,LSR#16 RSBEQ x1,x1,#1 BEQ md1 MOV x1,x1,LSL#16 MOVS x1,x1,LSR#16 RSBEQ x1,t1,#1 BEQ md1 MUL x1,t1,x1 MOV t1,x1,LSR#16 BIC t2,x1,t1,LSL#16 SUBS x1,t2,t1 ADDLO x1,x1,#1 md1 ; x2 += *key++; x3 += *key++ LDR t1,[key],#2 ADD x2,x2,t1 LDR t1,[key],#2 ADD x3,x3,t1 ; MUL(x4,*key++) LDR t1,[key],#2 MOV t1,t1,LSL#16 MOVS t1,t1,LSR#16 RSBEQ x4,x4,#1 BEQ md2 MOV x4,x4,LSL#16 MOVS x4,x4,LSR#16 RSBEQ x4,t1,#1 BEQ md2 MUL x4,t1,x4 MOV t1,x4,LSR#16 BIC t2,x4,t1,LSL#16 SUBS x4,t2,t1 ADDLO x4,x4,#1 md2 ; s3=x3; x3^=x1 MOV s3,x3 EOR x3,x3,x1 ; MUL(x3,*key++) LDR t1,[key],#2 MOV t1,t1,LSL#16 MOVS t1,t1,LSR#16 RSBEQ x3,x3,#1 BEQ md3 MOV x3,x3,LSL#16 MOVS x3,x3,LSR#16 RSBEQ x3,t1,#1 BEQ md3 MUL x3,t1,x3 MOV t1,x3,LSR#16 BIC t2,x3,t1,LSL#16 SUBS x3,t2,t1 ADDLO x3,x3,#1 md3 ; s2=x2; x2^=x4; x2+=x3 MOV s2,x2 EOR x2,x2,x4 ADD x2,x2,x3 ; MUL(x2,*key++) LDR t1,[key],#2 MOV t1,t1,LSL#16 MOVS t1,t1,LSR#16 RSBEQ x2,x2,#1 BEQ md4 MOV x2,x2,LSL#16 MOVS x2,x2,LSR#16 RSBEQ x2,t1,#1 BEQ md4 MUL x2,t1,x2 MOV t1,x2,LSR#16 BIC t2,x2,t1,LSL#16 SUBS x2,t2,t1 ADDLO x2,x2,#1 md4 ; x3+=x2; x1^=x2; x4^=x3; x2^=s3; x3^=s2; ADD x3,x3,x2 EOR x1,x1,x2 EOR x4,x4,x3 EOR x2,x2,s3 EOR x3,x3,s2 ; while (--r); SUBS round,round,#1 BNE ic1 ; MUL(x1,*key++) LDR t1,[key],#2 MOV t1,t1,LSL#16 MOVS t1,t1,LSR#16 RSBEQ x1,x1,#1 BEQ md5 MOV x1,x1,LSL#16 MOVS x1,x1,LSR#16 RSBEQ x1,t1,#1 BEQ md5 MUL x1,t1,x1 MOV t1,x1,LSR#16 BIC t2,x1,t1,LSL#16 SUBS x1,t2,t1 ADDLO x1,x1,#1 md5 ; x3 += *key++; x2+=*key++ LDR t1,[key],#2 ADD x3,x3,t1 LDR t1,[key],#2 ADD x2,x2,t1 ; MUL(x4,*key) LDR t1,[key] MOV t1,t1,LSL#16 MOVS t1,t1,LSR#16 RSBEQ x4,x4,#1 BEQ md6 MOV x4,x4,LSL#16 MOVS x4,x4,LSR#16 RSBEQ x4,t1,#1 BEQ md6 MUL x4,t1,x4 MOV t1,x4,LSR#16 BIC t2,x4,t1,LSL#16 SUBS x4,t2,t1 ADDLO x4,x4,#1 md6 ; store x1..x4 in outbuf. MOV s2,#&FF ORR s2,s2,s2,LSL#16 ; s2 = 00 FF 00 FF MOV x1,x1,LSL#16 ; x1 = 1H 1L 00 00 MOV x3,x3,LSL#16 ; x3 = 3H 3L 00 00 ORR t1,x3,x1,LSR#16 ; t1 = 3H 3L 1H 1L AND s3,s2,t1 ; s3 = 00 3L 00 1L AND t1,s2,t1,LSR#8 ; t1 = 00 3H 00 1H ADD t1,t1,s3,LSL#8 ; t1 = 3L 3H 1L 1H STR t1,[outbuf],#4 MOV x2,x2,LSL#16 ; x2 = 2H 2L 00 00 MOV x4,x4,LSL#16 ; x4 = 4H 4L 00 00 ORR t1,x4,x2,LSR#16 ; t1 = 4H 4L 2H 2L AND s3,s2,t1 ; s3 = 00 4L 00 2L AND t1,s2,t1,LSR#8 ; t1 = 00 4H 00 2H ADD t1,t1,s3,LSL#8 ; t1 = 4L 4H 2L 2H STR t1,[outbuf] ; and we're done. I think. LDMFD sp!,{x1-s3,pc}^ ; change if reg nums change!!!! ; r0 -> userkey (byte const *) ; r1 -> EK (word16 *) ; NB: this *requires* that EK be 4-aligned. This is always the case ; as ideaExpandKey is actually used. However, userkey is often ; *not* 4-aligned; it need not even be 2-aligned. ; I apologise for the fact that alignment is relevant at all; ; this is entirely the result of my stupidity. ideaExpandKey STMFD sp!,{r4-r8,lr} ; First loop: put user key into first 8 EK entries, ; swapping bytes because of endianness conflict. MOV r4,#4 l1 LDRB r3,[r0],#1 LDRB r2,[r0],#1 ADD r3,r2,r3,LSL#8 LDRB r2,[r0],#1 ADD r3,r3,r2,LSL#24 ; sic LDRB r2,[r0],#1 ADD r3,r3,r2,LSL#16 ; sic STR r3,[r1],#4 SUBS r4,r4,#1 BGT l1 ; now we've added 16 bytes to r0 (no longer needed) ; and r1 (needs resetting). SUB r0,r1,#16 ; so r0 is now EK ; Second loop: we could certainly optimise this way further, ; but it's not done very often so we don't bother. MOV ip,#6 LDMIA r0!,{r1,r2,r3,r4} ; r1: EK[0,1] ; r2: EK[2,3] ; r3: EK[4,5] ; r4: EK[6,7] l2 MOV r1,r1,ROR#16 MOV r2,r2,ROR#16 MOV r3,r3,ROR#16 MOV r4,r4,ROR#16 ; now we have, always, high 16bits first. MOV r5,r1,LSL#25 ADD r5,r5,r2,LSR#7 ; r5: 1,2 2,3 MOV r6,r2,LSL#25 ADD r6,r6,r3,LSR#7 ; r6: 3,4 4,5 MOV r7,r3,LSL#25 ADD r7,r7,r4,LSR#7 ; r7: 5,6 6,7 MOV r8,r4,LSL#25 ADD r8,r8,r1,LSR#7 ; r8: 7,0 0,1 ; get order right again MOV r1,r5,ROR#16 MOV r2,r6,ROR#16 MOV r3,r7,ROR#16 MOV r4,r8,ROR#16 SUBS ip,ip,#1 STMGTIA r0!,{r1,r2,r3,r4} BGT l2 STMIA r0!,{r1,r2} ; last time only write 4 halfwords ; Done. (Phew!) LDMFD sp!,{r4-r8,pc}^ END