Brotikasten

home *** CD-ROM | disk | FTP | other *** search

/ Brotikasten / BROTCD01.iso / amiga / frodov13.lha / src / c2p4.asm < prev next >

Wrap

Assembly Source File | 1995-02-07 | 15KB | 638 lines

XDEF _c2p4 XDEF Initc2p4 XDEF Exitc2p4 ; --------------------------------------------------------------------- ; void __asm c2p4 (register __a2 UBYTE *fBUFFER, ; register __a3 UBYTE *fBUFFER_CMP, ; register __a4 PLANEPTR *planes, ; register __a5 struct Task *task, ; register __d0 ULONG signals); ; ; 4-plane unpacked chunky to planar converter. ; Optimised for 68020/30 with fastmem. ; ; Author: Peter McGavin (e-mail peterm@maths.grace.cri.nz), 6 April 1994 ; Based on James McCoull's 4-pass blitter algorithm. ; ; This code is public domain. ; ; Use chunky comparison buffer. Return immediately if no diffs found. ; Perform first 2 passes (Fast->Chip) with the CPU (in 1 pass). ; Update chunky comparison buffer. ; Perform passes 3 & 4 with QBlit(). ; Return immediately after launching blits. ; Signal task from CleanUp() on completion. ; Task should wait for signal before next call to c2p4(). ; ; (Unimplemented speedup idea: Might be possible to signal task after pass 3, ; but will probably need another Wait() somewhere.) ; ; Approx timing (A4000/030, 320x200x4): ; CPU pass max 18ms (then return) ; Asynchronous blitter passes add 31ms ; ; Example usage: ; ; /* clear fBUFFER, fBUFFER_CMP, and planes here */ ; if ((sigbit = AllocSignal(-1)) == -1) ; die ("Can't allocate signal!\n"); ; safe = TRUE; ; for (;;) { ; ... /* render to fBUFFER here */ ; if (!safe) { ; Wait (1<<sigbit); // wait for previous c2p4 to finish ; safe = TRUE; ; } ; c2p4 (fBUFFER, fBUFFER_CMP, &RASTPORT->BitMap->Planes[0], ; FindTask(NULL), 1<<sigbit); ; safe = FALSE; ; } ; if (!safe) ; Wait (1<<sigbit); // wait for last c2p4 to finish ; FreeSignal(sigbit); ; ; <20.Jan.95: Angepa▀t fⁿr den Frodo C64-Emulator und an OCS ; von Christian Bauer> width equ $180 ; must be a multiple of 32 height equ $110 toplinestoskip equ 0 plsiz equ (width/8)*height pixels equ width*height offset equ (width/8)*toplinestoskip cleanup equ $40 INCLUDE "exec/types.i" INCLUDE "exec/macros.i" INCLUDE "exec/memory.i" INCLUDE "graphics/gfxbase.i" INCLUDE "hardware/custom.i" XREF _SysBase XREF _GfxBase SECTION "CODE",CODE ; Installierte Chips ermitteln (OCS/ECS) und buff2 belegen ; R▄ckgabe: d0#0: OK Initc2p4 move.l _GfxBase,a0 btst #GFXB_BIG_BLITS,gb_ChipRevBits0(a0) bne 1$ move.l #blit43,qblitfunc move.l #blit43,initblitfunc move.w #-1,wehaveocs move.l a6,-(sp) move.l _SysBase,a6 move.l #pixels/2,d0 move.l #MEMF_ANY,d1 JSRLIB AllocVec move.l d0,buff2ptr move.l (sp)+,a6 rts 1$ move.l #blit31,qblitfunc move.l #blit31,initblitfunc clr.w wehaveocs move.l a6,-(sp) move.l _SysBase,a6 move.l #pixels/2,d0 move.l #MEMF_CHIP,d1 JSRLIB AllocVec move.l d0,buff2ptr move.l (sp)+,a6 rts Exitc2p4 move.l a6,-(sp) move.l _SysBase,a6 move.l buff2ptr,d0 beq 1$ move.l d0,a1 JSRLIB FreeVec 1$ move.l (sp)+,a6 rts _c2p4: movem.l d2-d7/a2-a6,-(sp) ; save arguments move.l #mybltnode,a0 move.l a2,(chunky-mybltnode,a0) move.l a4,(planes-mybltnode,a0) move.l a5,(task-mybltnode,a0) move.l d0,(signals-mybltnode,a0) ;------------------------------------------------- ;original chunky data ;0 ........a3a2a1a0 ........b3b2b1b0 ;2 ........c3c2c1c0 ........d3d2d1d0 ;4 ........e3e2e1e0 ........f3f2f1f0 ;6 ........g3g2g1g0 ........h3h2h1h0 ;8 ........i3i2i1i0 ........j3j2j1j0 ;10 ........k3k2k1k0 ........l3l2l1l0 ;12 ........m3m2m1m0 ........n3n2n1n0 ;14 ........o3o2o1o0 ........p3p2p1p0 ;16 ........q3q2q1q0 ........r3r2r1r0 ;18 ........s3s2s1s0 ........t3t2t1t0 ;20 ........u3u2u1u0 ........v3v2v1v0 ;22 ........w3w2w1w0 ........x3x2x1x0 ;24 ........y3y2y1y0 ........z3z2z1z0 ;26 ........A3A2A1A0 ........B3B2B1B0 ;28 ........C3C2C1C0 ........D3D2D1D0 ;30 ........E3E2E1E0 ........F3F2F1F0 ;------------------------------------------------- move.l buff2ptr,a4 ; a4 -> buff2 move.l #$00ff00ff,d7 ; constant move.w #pixels/32,d6 ; loop counter bra.b end_pass1loop CNOP 0,4 ; main loop (starts here) processes 32 chunky pixels at a time ; compare next 32 pixels with compare page, looking for differences initpass1loop: cmpm.l (a2)+,(a3)+ bne.w fix1 cmpm.l (a2)+,(a3)+ bne.w fix2 cmpm.l (a2)+,(a3)+ bne.b fix3 cmpm.l (a2)+,(a3)+ bne.b fix4 cmpm.l (a2)+,(a3)+ bne.b fix5 cmpm.l (a2)+,(a3)+ bne.b fix6 cmpm.l (a2)+,(a3)+ bne.b fix7 cmpm.l (a2)+,(a3)+ bne.b fix8 addq.l #8,a4 ; skip 8 bytes in output end_pass1loop: dbra d6,initpass1loop ; If we get to here then no difference was found. ; Signal the task and return. move.l (task-mybltnode,a0),a1 move.l (signals-mybltnode,a0),d0 move.l (4).w,a6 JSRLIB Signal movem.l (sp)+,d2-d7/a2-a6 rts ; This becomes the main loop after the first difference is found pass1loop: cmpm.l (a2)+,(a3)+ bne.b fix1 cmpm.l (a2)+,(a3)+ bne.b fix2 cmpm.l (a2)+,(a3)+ bne.b fix3 cmpm.l (a2)+,(a3)+ bne.b fix4 cmpm.l (a2)+,(a3)+ bne.b fix5 cmpm.l (a2)+,(a3)+ bne.b fix6 cmpm.l (a2)+,(a3)+ bne.b fix7 cmpm.l (a2)+,(a3)+ bne.b fix8 addq.l #8,a4 ; skip 8 bytes in output dbra d6,pass1loop bra.w done ; difference found, restore a2 and a3 fix8: subq.l #4,a2 subq.l #4,a3 fix7: sub.w #28,a2 sub.w #28,a3 bra.b go_c2p fix6: subq.l #4,a2 subq.l #4,a3 fix5: sub.w #20,a2 sub.w #20,a3 bra.b go_c2p fix4: subq.l #4,a2 subq.l #4,a3 fix3: sub.w #12,a2 sub.w #12,a3 bra.b go_c2p fix2: subq.l #4,a2 subq.l #4,a3 fix1: subq.l #4,a2 subq.l #4,a3 ; convert 32 pixels (passes 1 and 2 combined) go_c2p: movem.l (a2)+,d0-d3/a0/a1/a5/a6 ; ABCD EFGH IJKL MNOP QRST UVWX YZ01 2345 move.l #$0f0f0f0f,d4 ;<Obere Nibbles l÷schen> and.l d4,d0 and.l d4,d1 and.l d4,d2 and.l d4,d3 movem.l d0-d3/a0/a1/a5/a6,(a3) ; update compare buffer adda.w #32,a3 lsl.l #4,d0 ; A.B.C.D. move.l d0,d4 ; A.B.C.D. and.l d7,d4 ; ..B...D. eor.l d4,d0 ; A...C... move.l d1,d5 ; .E.F.G.H and.l d7,d5 ; ...F...H eor.l d5,d1 ; .E...G.. or.l d1,d0 ; AE..CG.. or.l d5,d4 ; ..BF..DH move.l d2,d1 ; .I.J.K.L and.l d7,d1 ; ...J...L move.l d3,d5 ; .M.N.O.P and.l d7,d5 ; ...N...P lsl.l #4,d4 ; .BF..DH. or.l d1,d4 ; .BFJ.DHL lsl.l #4,d4 ; BFJ.DHL. or.l d5,d4 ; BFJNDHLP move.l d4,(pixels/4,a4) eor.l d5,d3 ; .M...O.. lsr.l #4,d3 ; ..M...O. eor.l d1,d2 ; .I...K.. or.l d3,d2 ; .IM..KO. lsr.l #4,d2 ; ..IM..KO or.l d2,d0 ; AEIMCGKO move.l a6,d3 move.l a5,d2 move.l a1,d1 move.l d0,(a4)+ move.l a0,d0 move.l #$0f0f0f0f,d4 ;<Obere Nibbles l÷schen> and.l d4,d0 and.l d4,d1 and.l d4,d2 and.l d4,d3 lsl.l #4,d0 ; Q.R.S.T. move.l d0,d4 ; Q.R.S.T. and.l d7,d4 ; ..R...T. eor.l d4,d0 ; Q...S... move.l d1,d5 ; .U.V.W.X and.l d7,d5 ; ...V...X eor.l d5,d1 ; .U...W.. or.l d1,d0 ; QU..SW.. or.l d5,d4 ; ..RV..TX move.l d2,d1 ; .Y.Z.0.1 and.l d7,d1 ; ...Z...1 move.l d3,d5 ; .2.3.4.5 and.l d7,d5 ; ...3...5 lsl.l #4,d4 ; .RV..TX. or.l d1,d4 ; .RVZ.TX1 lsl.l #4,d4 ; RVZ.TX1. or.l d5,d4 ; RVZ3TX15 move.l d4,(pixels/4,a4) eor.l d5,d3 ; .2...4.. lsr.l #4,d3 ; ..2...4. eor.l d1,d2 ; .Y...0.. or.l d3,d2 ; .Y2..04. lsr.l #4,d2 ; ..Y2..04 or.l d2,d0 ; QUY2SW04 move.l d0,(a4)+ dbra d6,pass1loop ; start the blitter in the background for passes 3 & 4 ; <OCS: pass 4 only, pass 3 is done by CPU> done: tst.w wehaveocs beq 3$ move.w #pixels/8-1,d7 ;blit31 move.l buff2ptr,a0 move.l buff2ptr,a1 addq.l #2,a1 lea buff3,a2 move.w #$cccc,d2 1$ move.w (a0)+,d0 addq.l #2,a0 and.w d2,d0 move.w (a1)+,d1 addq.l #2,a1 and.w d2,d1 lsr.w #2,d1 or.w d1,d0 move.w d0,(a2)+ dbra d7,1$ move.w #pixels/8-1,d7 ;blit32 move.l buff2ptr,a0 add.l #pixels/2-2,a0 move.l buff2ptr,a1 add.l #pixels/2,a1 lea buff3+pixels/2,a2 move.w #$3333,d2 2$ move.w -(a0),d0 subq.l #2,a0 and.w d2,d0 lsl.w #2,d0 move.w -(a1),d1 subq.l #2,a1 and.w d2,d1 or.w d1,d0 move.w d0,-(a2) dbra d7,2$ 3$ lea mybltnode,a1 move.l _GfxBase,a6 JSRLIB QBlit movem.l (sp)+,d2-d7