home *** CD-ROM | disk | FTP | other *** search
- * $VER: c2p2.s (22.11.96) 33.2
- *
- * - c2p for chunky duplets
- * - this routine is public domain, use it as you want
- * - based on the new merge rout by Mikael Kalms
- *
- * Version history:
- *
- * 33.1 21.11.96 Aki Laukkanen (amlaukka@cc.helsinki.fi)
- *
- * should work, not tested
- *
- * 33.2 22.11.96 laukkanen
- *
- * - now it even works. :=)
- * - misc cleanups
- *
-
- xdef _c2p2
-
- ; from:
- ; 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30
- ; a1a0 b1b0 c1c0 d1d0 e1e0 f1f0 g1g0 h1h0 i1i0 j1j0 k1k0 l1l0 m1m0 n1n0 o1o0 p1p0
- ; q1q0 r1r0 s1s0 t1t0 u1u0 v1v0 w1w0 x1x0 y1y0 z1z0 A1A0 B1B0 C1C0 D1D0 E1E0 F1F0
-
- ; 1st pass 16x1
- ; a1a0 b1b0 c1c0 d1d0 e1e0 f1f0 g1g0 h1h0 q1q0 r1r0 s1s0 t1t0 u1u0 v1v0 w1w0 x1x0
- ; i1i0 j1j0 k1k0 l1l0 m1m0 n1n0 o1o0 p1p0 y1y0 z1z0 A1A0 B1B0 C1C0 D1D0 E1E0 F1F0
-
- ; 2nd pass 8x1
-
- ; a1a0 b1b0 c1c0 d1d0 i1i0 j1j0 k1k0 l1l0 q1q0 r1r0 s1s0 t1t0 y1y0 z1z0 A1A0 B1B0
- ; e1e0 f1f0 g1g0 h1h0 m1m0 n1n0 o1o0 p1p0 u1u0 v1v0 w1w0 x1x0 C1C0 D1D0 E1E0 F1F0
-
- ; 3rd pass 4x1
-
- ; a1a0 b1b0 e1e0 f1f0 i1i0 j1j0 m1m0 n1n0 q1q0 r1r0 u1u0 v1v0 y1y0 z1z0 C1C0 D1D0
- ; c1c0 d1d0 g1g0 h1h0 k1k0 l1l0 o1o0 p1p0 s1s0 t1t0 w1w0 x1x0 A1A0 B1B0 E1E0 F1F0
-
- ; 4th pass 2x1
- ; a1a0 c1c0 e1e0 g1g0 i1i0 k1k0 m1m0 o1o0 q1q0 s1s0 u1u0 w1w0 y1y0 A1A0 C1C0 E1E0
- ; b1b0 d1d0 f1f0 h1h0 j1j0 l1l0 n1n0 p1p0 r1r0 t1t0 v1v0 x1x0 z1z0 B1B0 D1D0 F1F0
-
- ; last pass 1x1
- ; to:
- ; a1b1 c1d1 e1f1 g1h1 i1j1 k1l1 m1n1 o1p1 q1r1 s1t1 u1v1 w1x1 y1z1 A1B1 C1D1 E1F1
- ; a0b0 c0d0 e0f0 g0h0 i0j0 k0l0 m0n0 o0p0 q0r0 s0t0 u0v0 w0x0 y0z0 A0B0 C0D0 E0F0
-
- include "exec/types.i"
-
- ; a0 - chunky start
- ; a1 - bitplane1
- ; a2 - bitplane2
- ; a3 - chunky end
-
- c2p2
- movem.l d2-d7/a2-a3,-(sp)
-
- move.l #$00FF00FF,d2
- move.l #$0F0F0F0F,d3
- move.l #$33333333,d4
- move.l #$55555555,d5
-
- .start
- move.l (a0)+,d0
- move.l (a0)+,d1
-
- swap d1 ; 1st pass
- move.w d0,d7
- move.w d1,d0
- move.w d7,d1
- swap d1
-
- move.l d1,d7 ; 2nd pass
- lsr.l #8,d7
- eor.l d0,d7
- and.l d2,d7
- eor.l d7,d0
- lsl.l #8,d7
- eor.l d7,d1
-
- move.l d1,d7 ; 3rd pass
- lsr.l #4,d7
- eor.l d0,d7
- and.l d3,d7
- eor.l d7,d0
- lsl.l #4,d7
- eor.l d7,d1
-
- move.l d1,d7 ; 4th pass
- lsr.l #2,d7
- eor.l d0,d7
- and.l d4,d7
- eor.l d7,d0
- lsl.l #2,d7
- eor.l d7,d1
-
- move.l d1,d7 ; 5th pass
- lsr.l #1,d7
- eor.l d0,d7
- and.l d5,d7
- eor.l d7,d0
- add.l d7,d7
- eor.l d7,d1
-
- move.l d0,d6
- move.l d1,a4
-
- cmp.l a0,a3
- beq.s .end
- .loop
- move.l (a0)+,d0
- move.l (a0)+,d1
-
- move.l d6,(a2)+
-
- swap d1 ; 1st pass 1
- move.w d0,d7 ; 1
- move.w d1,d0 ; sOEP
- move.w d7,d1 ; 1
- swap d1 ; 1
-
- move.l d1,d7 ; 2nd pass 1
- lsr.l #8,d7 ; 1
- eor.l d0,d7 ; 1
- and.l d2,d7 ; 1
- eor.l d7,d0 ; 1
- lsl.l #8,d7 ; 1
- eor.l d7,d1 ; 1
-
- move.l d1,d7 ; 3rd pass 1
- lsr.l #4,d7 ; 1
- eor.l d0,d7 ; 1
- and.l d3,d7 ; 1
- eor.l d7,d0 ; 1
- lsl.l #4,d7 ; 1
- eor.l d7,d1 ; 1 = 18 cycles between chip writes
- ; should be free from 040/25 upwards
- move.l a4,(a1)+
-
- move.l d1,d7 ; 4th pass
- lsr.l #2,d7
- eor.l d0,d7
- and.l d4,d7
- eor.l d7,d0
- lsl.l #2,d7
- eor.l d7,d1
-
- move.l d1,d7 ; 5th pass
- lsr.l #1,d7
- eor.l d0,d7
- and.l d5,d7
- eor.l d7,d0
- add.l d7,d7
- eor.l d7,d1
-
- move.l d0,d6
- move.l d1,a4
-
- cmp.l a0,a3
- bne .loop
- .end
- move.l d6,(a2)
- move.l a4,(a1)
-
- movem.l (sp)+,d2-d7/a2-a3
-
- rts
-