home *** CD-ROM | disk | FTP | other *** search
- ; ARM code version of IDEA stuff, (1) for speed and (2) because
- ; Cv5 actually compiles ideaExpandKey *wrongly*.
- ; Unfortunately IDEA assumes MSB first, so we have to reverse everything.
-
- ; Please see the note at the start of ideaExpandKey regarding
- ; alignment.
-
- IdeaRounds EQU 8
-
- inbuf RN 0
- outbuf RN 1
- key RN 2
- round RN 3
- x1 RN 4
- x2 RN 5
- x3 RN 6
- x4 RN 7
- t1 RN 8
- t2 RN 9
- s2 RN 10
- s3 RN 11
- sp RN 13
- lr RN 14
- pc RN 15
-
- r0 RN 0
- r1 RN 1
- r2 RN 2
- r3 RN 3
- r4 RN 4
- r5 RN 5
- r6 RN 6
- r7 RN 7
- r8 RN 8
- ip RN 12
-
- AREA |A$$Code|, CODE, READONLY
- EXPORT ideaCipher
- EXPORT ideaExpandKey
-
- ; r0 -> inbuf (8 bytes)
- ; r1 -> outbuf (8 bytes)
- ; r2 -> key
- ideaCipher
- STMFD sp!,{x1-s3,lr} ; change if reg nums change!!!!
- MOV round,#IdeaRounds
- MOV s2,#&FF
- LDR t1,[inbuf],#4
- AND x1,s2,t1 ; 1H
- AND t2,s2,t1,LSR#8 ; 1L
- ADD x1,t2,x1,LSL#8 ; x1
- AND x2,s2,t1,LSR#16 ; 2H
- AND t2,s2,t1,LSR#24 ; 2L
- ADD x2,t2,x2,LSL#8 ; x1
- LDR t1,[inbuf]
- AND x3,s2,t1 ; 3H
- AND t2,s2,t1,LSR#8 ; 3L
- ADD x3,t2,x3,LSL#8 ; x3
- AND x4,s2,t1,LSR#16 ; 4H
- AND t2,s2,t1,LSR#24 ; 4L
- ADD x4,t2,x4,LSL#8 ; x4
- ic1 ; start of DO loop
- ; MUL(x1,*key++)
- LDR t1,[key],#2
- MOV t1,t1,LSL#16
- MOVS t1,t1,LSR#16
- RSBEQ x1,x1,#1
- BEQ md1
- MOV x1,x1,LSL#16
- MOVS x1,x1,LSR#16
- RSBEQ x1,t1,#1
- BEQ md1
- MUL x1,t1,x1
- MOV t1,x1,LSR#16
- BIC t2,x1,t1,LSL#16
- SUBS x1,t2,t1
- ADDLO x1,x1,#1
- md1 ; x2 += *key++; x3 += *key++
- LDR t1,[key],#2
- ADD x2,x2,t1
- LDR t1,[key],#2
- ADD x3,x3,t1
- ; MUL(x4,*key++)
- LDR t1,[key],#2
- MOV t1,t1,LSL#16
- MOVS t1,t1,LSR#16
- RSBEQ x4,x4,#1
- BEQ md2
- MOV x4,x4,LSL#16
- MOVS x4,x4,LSR#16
- RSBEQ x4,t1,#1
- BEQ md2
- MUL x4,t1,x4
- MOV t1,x4,LSR#16
- BIC t2,x4,t1,LSL#16
- SUBS x4,t2,t1
- ADDLO x4,x4,#1
- md2 ; s3=x3; x3^=x1
- MOV s3,x3
- EOR x3,x3,x1
- ; MUL(x3,*key++)
- LDR t1,[key],#2
- MOV t1,t1,LSL#16
- MOVS t1,t1,LSR#16
- RSBEQ x3,x3,#1
- BEQ md3
- MOV x3,x3,LSL#16
- MOVS x3,x3,LSR#16
- RSBEQ x3,t1,#1
- BEQ md3
- MUL x3,t1,x3
- MOV t1,x3,LSR#16
- BIC t2,x3,t1,LSL#16
- SUBS x3,t2,t1
- ADDLO x3,x3,#1
- md3 ; s2=x2; x2^=x4; x2+=x3
- MOV s2,x2
- EOR x2,x2,x4
- ADD x2,x2,x3
- ; MUL(x2,*key++)
- LDR t1,[key],#2
- MOV t1,t1,LSL#16
- MOVS t1,t1,LSR#16
- RSBEQ x2,x2,#1
- BEQ md4
- MOV x2,x2,LSL#16
- MOVS x2,x2,LSR#16
- RSBEQ x2,t1,#1
- BEQ md4
- MUL x2,t1,x2
- MOV t1,x2,LSR#16
- BIC t2,x2,t1,LSL#16
- SUBS x2,t2,t1
- ADDLO x2,x2,#1
- md4 ; x3+=x2; x1^=x2; x4^=x3; x2^=s3; x3^=s2;
- ADD x3,x3,x2
- EOR x1,x1,x2
- EOR x4,x4,x3
- EOR x2,x2,s3
- EOR x3,x3,s2
- ; while (--r);
- SUBS round,round,#1
- BNE ic1
- ; MUL(x1,*key++)
- LDR t1,[key],#2
- MOV t1,t1,LSL#16
- MOVS t1,t1,LSR#16
- RSBEQ x1,x1,#1
- BEQ md5
- MOV x1,x1,LSL#16
- MOVS x1,x1,LSR#16
- RSBEQ x1,t1,#1
- BEQ md5
- MUL x1,t1,x1
- MOV t1,x1,LSR#16
- BIC t2,x1,t1,LSL#16
- SUBS x1,t2,t1
- ADDLO x1,x1,#1
- md5 ; x3 += *key++; x2+=*key++
- LDR t1,[key],#2
- ADD x3,x3,t1
- LDR t1,[key],#2
- ADD x2,x2,t1
- ; MUL(x4,*key)
- LDR t1,[key]
- MOV t1,t1,LSL#16
- MOVS t1,t1,LSR#16
- RSBEQ x4,x4,#1
- BEQ md6
- MOV x4,x4,LSL#16
- MOVS x4,x4,LSR#16
- RSBEQ x4,t1,#1
- BEQ md6
- MUL x4,t1,x4
- MOV t1,x4,LSR#16
- BIC t2,x4,t1,LSL#16
- SUBS x4,t2,t1
- ADDLO x4,x4,#1
- md6 ; store x1..x4 in outbuf.
- MOV s2,#&FF
- ORR s2,s2,s2,LSL#16 ; s2 = 00 FF 00 FF
- MOV x1,x1,LSL#16 ; x1 = 1H 1L 00 00
- MOV x3,x3,LSL#16 ; x3 = 3H 3L 00 00
- ORR t1,x3,x1,LSR#16 ; t1 = 3H 3L 1H 1L
- AND s3,s2,t1 ; s3 = 00 3L 00 1L
- AND t1,s2,t1,LSR#8 ; t1 = 00 3H 00 1H
- ADD t1,t1,s3,LSL#8 ; t1 = 3L 3H 1L 1H
- STR t1,[outbuf],#4
- MOV x2,x2,LSL#16 ; x2 = 2H 2L 00 00
- MOV x4,x4,LSL#16 ; x4 = 4H 4L 00 00
- ORR t1,x4,x2,LSR#16 ; t1 = 4H 4L 2H 2L
- AND s3,s2,t1 ; s3 = 00 4L 00 2L
- AND t1,s2,t1,LSR#8 ; t1 = 00 4H 00 2H
- ADD t1,t1,s3,LSL#8 ; t1 = 4L 4H 2L 2H
- STR t1,[outbuf]
- ; and we're done. I think.
- LDMFD sp!,{x1-s3,pc}^ ; change if reg nums change!!!!
-
- ; r0 -> userkey (byte const *)
- ; r1 -> EK (word16 *)
- ; NB: this *requires* that EK be 4-aligned. This is always the case
- ; as ideaExpandKey is actually used. However, userkey is often
- ; *not* 4-aligned; it need not even be 2-aligned.
- ; I apologise for the fact that alignment is relevant at all;
- ; this is entirely the result of my stupidity.
- ideaExpandKey
- STMFD sp!,{r4-r8,lr}
- ; First loop: put user key into first 8 EK entries,
- ; swapping bytes because of endianness conflict.
- MOV r4,#4
- l1 LDRB r3,[r0],#1
- LDRB r2,[r0],#1
- ADD r3,r2,r3,LSL#8
- LDRB r2,[r0],#1
- ADD r3,r3,r2,LSL#24 ; sic
- LDRB r2,[r0],#1
- ADD r3,r3,r2,LSL#16 ; sic
- STR r3,[r1],#4
- SUBS r4,r4,#1
- BGT l1
- ; now we've added 16 bytes to r0 (no longer needed)
- ; and r1 (needs resetting).
- SUB r0,r1,#16 ; so r0 is now EK
- ; Second loop: we could certainly optimise this way further,
- ; but it's not done very often so we don't bother.
- MOV ip,#6
- LDMIA r0!,{r1,r2,r3,r4}
- ; r1: EK[0,1]
- ; r2: EK[2,3]
- ; r3: EK[4,5]
- ; r4: EK[6,7]
- l2 MOV r1,r1,ROR#16
- MOV r2,r2,ROR#16
- MOV r3,r3,ROR#16
- MOV r4,r4,ROR#16
- ; now we have, always, high 16bits first.
- MOV r5,r1,LSL#25
- ADD r5,r5,r2,LSR#7 ; r5: 1,2 2,3
- MOV r6,r2,LSL#25
- ADD r6,r6,r3,LSR#7 ; r6: 3,4 4,5
- MOV r7,r3,LSL#25
- ADD r7,r7,r4,LSR#7 ; r7: 5,6 6,7
- MOV r8,r4,LSL#25
- ADD r8,r8,r1,LSR#7 ; r8: 7,0 0,1
- ; get order right again
- MOV r1,r5,ROR#16
- MOV r2,r6,ROR#16
- MOV r3,r7,ROR#16
- MOV r4,r8,ROR#16
- SUBS ip,ip,#1
- STMGTIA r0!,{r1,r2,r3,r4}
- BGT l2
- STMIA r0!,{r1,r2} ; last time only write 4 halfwords
- ; Done. (Phew!)
- LDMFD sp!,{r4-r8,pc}^
-
- END
-