home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Brotikasten
/
BROTCD01.iso
/
amiga
/
frodov13.lha
/
src
/
c2p4.asm
< prev
next >
Wrap
Assembly Source File
|
1995-02-07
|
15KB
|
638 lines
XDEF _c2p4
XDEF Initc2p4
XDEF Exitc2p4
; ---------------------------------------------------------------------
; void __asm c2p4 (register __a2 UBYTE *fBUFFER,
; register __a3 UBYTE *fBUFFER_CMP,
; register __a4 PLANEPTR *planes,
; register __a5 struct Task *task,
; register __d0 ULONG signals);
;
; 4-plane unpacked chunky to planar converter.
; Optimised for 68020/30 with fastmem.
;
; Author: Peter McGavin (e-mail peterm@maths.grace.cri.nz), 6 April 1994
; Based on James McCoull's 4-pass blitter algorithm.
;
; This code is public domain.
;
; Use chunky comparison buffer. Return immediately if no diffs found.
; Perform first 2 passes (Fast->Chip) with the CPU (in 1 pass).
; Update chunky comparison buffer.
; Perform passes 3 & 4 with QBlit().
; Return immediately after launching blits.
; Signal task from CleanUp() on completion.
; Task should wait for signal before next call to c2p4().
;
; (Unimplemented speedup idea: Might be possible to signal task after pass 3,
; but will probably need another Wait() somewhere.)
;
; Approx timing (A4000/030, 320x200x4):
; CPU pass max 18ms (then return)
; Asynchronous blitter passes add 31ms
;
; Example usage:
;
; /* clear fBUFFER, fBUFFER_CMP, and planes here */
; if ((sigbit = AllocSignal(-1)) == -1)
; die ("Can't allocate signal!\n");
; safe = TRUE;
; for (;;) {
; ... /* render to fBUFFER here */
; if (!safe) {
; Wait (1<<sigbit); // wait for previous c2p4 to finish
; safe = TRUE;
; }
; c2p4 (fBUFFER, fBUFFER_CMP, &RASTPORT->BitMap->Planes[0],
; FindTask(NULL), 1<<sigbit);
; safe = FALSE;
; }
; if (!safe)
; Wait (1<<sigbit); // wait for last c2p4 to finish
; FreeSignal(sigbit);
;
; <20.Jan.95: Angepa▀t fⁿr den Frodo C64-Emulator und an OCS
; von Christian Bauer>
width equ $180 ; must be a multiple of 32
height equ $110
toplinestoskip equ 0
plsiz equ (width/8)*height
pixels equ width*height
offset equ (width/8)*toplinestoskip
cleanup equ $40
INCLUDE "exec/types.i"
INCLUDE "exec/macros.i"
INCLUDE "exec/memory.i"
INCLUDE "graphics/gfxbase.i"
INCLUDE "hardware/custom.i"
XREF _SysBase
XREF _GfxBase
SECTION "CODE",CODE
; Installierte Chips ermitteln (OCS/ECS) und buff2 belegen
; R▄ckgabe: d0#0: OK
Initc2p4 move.l _GfxBase,a0
btst #GFXB_BIG_BLITS,gb_ChipRevBits0(a0)
bne 1$
move.l #blit43,qblitfunc
move.l #blit43,initblitfunc
move.w #-1,wehaveocs
move.l a6,-(sp)
move.l _SysBase,a6
move.l #pixels/2,d0
move.l #MEMF_ANY,d1
JSRLIB AllocVec
move.l d0,buff2ptr
move.l (sp)+,a6
rts
1$ move.l #blit31,qblitfunc
move.l #blit31,initblitfunc
clr.w wehaveocs
move.l a6,-(sp)
move.l _SysBase,a6
move.l #pixels/2,d0
move.l #MEMF_CHIP,d1
JSRLIB AllocVec
move.l d0,buff2ptr
move.l (sp)+,a6
rts
Exitc2p4 move.l a6,-(sp)
move.l _SysBase,a6
move.l buff2ptr,d0
beq 1$
move.l d0,a1
JSRLIB FreeVec
1$ move.l (sp)+,a6
rts
_c2p4: movem.l d2-d7/a2-a6,-(sp)
; save arguments
move.l #mybltnode,a0
move.l a2,(chunky-mybltnode,a0)
move.l a4,(planes-mybltnode,a0)
move.l a5,(task-mybltnode,a0)
move.l d0,(signals-mybltnode,a0)
;-------------------------------------------------
;original chunky data
;0 ........a3a2a1a0 ........b3b2b1b0
;2 ........c3c2c1c0 ........d3d2d1d0
;4 ........e3e2e1e0 ........f3f2f1f0
;6 ........g3g2g1g0 ........h3h2h1h0
;8 ........i3i2i1i0 ........j3j2j1j0
;10 ........k3k2k1k0 ........l3l2l1l0
;12 ........m3m2m1m0 ........n3n2n1n0
;14 ........o3o2o1o0 ........p3p2p1p0
;16 ........q3q2q1q0 ........r3r2r1r0
;18 ........s3s2s1s0 ........t3t2t1t0
;20 ........u3u2u1u0 ........v3v2v1v0
;22 ........w3w2w1w0 ........x3x2x1x0
;24 ........y3y2y1y0 ........z3z2z1z0
;26 ........A3A2A1A0 ........B3B2B1B0
;28 ........C3C2C1C0 ........D3D2D1D0
;30 ........E3E2E1E0 ........F3F2F1F0
;-------------------------------------------------
move.l buff2ptr,a4 ; a4 -> buff2
move.l #$00ff00ff,d7 ; constant
move.w #pixels/32,d6 ; loop counter
bra.b end_pass1loop
CNOP 0,4
; main loop (starts here) processes 32 chunky pixels at a time
; compare next 32 pixels with compare page, looking for differences
initpass1loop: cmpm.l (a2)+,(a3)+
bne.w fix1
cmpm.l (a2)+,(a3)+
bne.w fix2
cmpm.l (a2)+,(a3)+
bne.b fix3
cmpm.l (a2)+,(a3)+
bne.b fix4
cmpm.l (a2)+,(a3)+
bne.b fix5
cmpm.l (a2)+,(a3)+
bne.b fix6
cmpm.l (a2)+,(a3)+
bne.b fix7
cmpm.l (a2)+,(a3)+
bne.b fix8
addq.l #8,a4 ; skip 8 bytes in output
end_pass1loop: dbra d6,initpass1loop
; If we get to here then no difference was found.
; Signal the task and return.
move.l (task-mybltnode,a0),a1
move.l (signals-mybltnode,a0),d0
move.l (4).w,a6
JSRLIB Signal
movem.l (sp)+,d2-d7/a2-a6
rts
; This becomes the main loop after the first difference is found
pass1loop: cmpm.l (a2)+,(a3)+
bne.b fix1
cmpm.l (a2)+,(a3)+
bne.b fix2
cmpm.l (a2)+,(a3)+
bne.b fix3
cmpm.l (a2)+,(a3)+
bne.b fix4
cmpm.l (a2)+,(a3)+
bne.b fix5
cmpm.l (a2)+,(a3)+
bne.b fix6
cmpm.l (a2)+,(a3)+
bne.b fix7
cmpm.l (a2)+,(a3)+
bne.b fix8
addq.l #8,a4 ; skip 8 bytes in output
dbra d6,pass1loop
bra.w done
; difference found, restore a2 and a3
fix8: subq.l #4,a2
subq.l #4,a3
fix7: sub.w #28,a2
sub.w #28,a3
bra.b go_c2p
fix6: subq.l #4,a2
subq.l #4,a3
fix5: sub.w #20,a2
sub.w #20,a3
bra.b go_c2p
fix4: subq.l #4,a2
subq.l #4,a3
fix3: sub.w #12,a2
sub.w #12,a3
bra.b go_c2p
fix2: subq.l #4,a2
subq.l #4,a3
fix1: subq.l #4,a2
subq.l #4,a3
; convert 32 pixels (passes 1 and 2 combined)
go_c2p: movem.l (a2)+,d0-d3/a0/a1/a5/a6 ; ABCD EFGH IJKL MNOP QRST UVWX YZ01 2345
move.l #$0f0f0f0f,d4 ;<Obere Nibbles l÷schen>
and.l d4,d0
and.l d4,d1
and.l d4,d2
and.l d4,d3
movem.l d0-d3/a0/a1/a5/a6,(a3) ; update compare buffer
adda.w #32,a3
lsl.l #4,d0 ; A.B.C.D.
move.l d0,d4 ; A.B.C.D.
and.l d7,d4 ; ..B...D.
eor.l d4,d0 ; A...C...
move.l d1,d5 ; .E.F.G.H
and.l d7,d5 ; ...F...H
eor.l d5,d1 ; .E...G..
or.l d1,d0 ; AE..CG..
or.l d5,d4 ; ..BF..DH
move.l d2,d1 ; .I.J.K.L
and.l d7,d1 ; ...J...L
move.l d3,d5 ; .M.N.O.P
and.l d7,d5 ; ...N...P
lsl.l #4,d4 ; .BF..DH.
or.l d1,d4 ; .BFJ.DHL
lsl.l #4,d4 ; BFJ.DHL.
or.l d5,d4 ; BFJNDHLP
move.l d4,(pixels/4,a4)
eor.l d5,d3 ; .M...O..
lsr.l #4,d3 ; ..M...O.
eor.l d1,d2 ; .I...K..
or.l d3,d2 ; .IM..KO.
lsr.l #4,d2 ; ..IM..KO
or.l d2,d0 ; AEIMCGKO
move.l a6,d3
move.l a5,d2
move.l a1,d1
move.l d0,(a4)+
move.l a0,d0
move.l #$0f0f0f0f,d4 ;<Obere Nibbles l÷schen>
and.l d4,d0
and.l d4,d1
and.l d4,d2
and.l d4,d3
lsl.l #4,d0 ; Q.R.S.T.
move.l d0,d4 ; Q.R.S.T.
and.l d7,d4 ; ..R...T.
eor.l d4,d0 ; Q...S...
move.l d1,d5 ; .U.V.W.X
and.l d7,d5 ; ...V...X
eor.l d5,d1 ; .U...W..
or.l d1,d0 ; QU..SW..
or.l d5,d4 ; ..RV..TX
move.l d2,d1 ; .Y.Z.0.1
and.l d7,d1 ; ...Z...1
move.l d3,d5 ; .2.3.4.5
and.l d7,d5 ; ...3...5
lsl.l #4,d4 ; .RV..TX.
or.l d1,d4 ; .RVZ.TX1
lsl.l #4,d4 ; RVZ.TX1.
or.l d5,d4 ; RVZ3TX15
move.l d4,(pixels/4,a4)
eor.l d5,d3 ; .2...4..
lsr.l #4,d3 ; ..2...4.
eor.l d1,d2 ; .Y...0..
or.l d3,d2 ; .Y2..04.
lsr.l #4,d2 ; ..Y2..04
or.l d2,d0 ; QUY2SW04
move.l d0,(a4)+
dbra d6,pass1loop
; start the blitter in the background for passes 3 & 4
; <OCS: pass 4 only, pass 3 is done by CPU>
done: tst.w wehaveocs
beq 3$
move.w #pixels/8-1,d7 ;blit31
move.l buff2ptr,a0
move.l buff2ptr,a1
addq.l #2,a1
lea buff3,a2
move.w #$cccc,d2
1$ move.w (a0)+,d0
addq.l #2,a0
and.w d2,d0
move.w (a1)+,d1
addq.l #2,a1
and.w d2,d1
lsr.w #2,d1
or.w d1,d0
move.w d0,(a2)+
dbra d7,1$
move.w #pixels/8-1,d7 ;blit32
move.l buff2ptr,a0
add.l #pixels/2-2,a0
move.l buff2ptr,a1
add.l #pixels/2,a1
lea buff3+pixels/2,a2
move.w #$3333,d2
2$ move.w -(a0),d0
subq.l #2,a0
and.w d2,d0
lsl.w #2,d0
move.w -(a1),d1
subq.l #2,a1
and.w d2,d1
or.w d1,d0
move.w d0,-(a2)
dbra d7,2$
3$ lea mybltnode,a1
move.l _GfxBase,a6
JSRLIB QBlit
movem.l (sp)+,d2-d7