home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Frozen Fish 1: Amiga
/
FrozenFish-Apr94.iso
/
bbs
/
alib
/
d5xx
/
d528
/
cpublit.lha
/
CpuBlit
/
src
/
scroll.s
< prev
next >
Wrap
Text File
|
1991-08-05
|
43KB
|
1,167 lines
***************************************************************** :ts=8 *****
*
* SCROLL.S
*
* (C) Copyright Eddy Carroll, January 1991.
*
* Replaces BltBitMap with a routine that uses the CPU (preferably
* 68030). This increases speed by a factor of about 2.8 on the A3000
* when the cache is enabled or 2.0 when the cache is disabled.
*
*****************************************************************************
include "exec/types.i"
include "exec/execbase.i"
include "exec/nodes.i"
include "graphics/gfx.i"
XDEF _NewBltBitMap
XDEF _BltBitMapAddress
XDEF _OnlySingle
XDEF _UsageCount
XDEF _Broken
XDEF _BlitFunc
XDEF _MinTaskPri
XDEF _StartBlit
XDEF _ExitBlit
XDEF _ShareBlit
XDEF _Friend1
XDEF _Friend2
XREF _SysBase
XREF _GfxBase
XREF _LVOWait
XREF _LVOWaitBlit
SECTION Scroll,CODE
*****************************************************************************
*
* NewBltBitMap()
*
* Replacement BltBitMap which uses the 68030 instead of the blitter.
* The following conditions must hold for the CPU routine to be used:
*
* o Bitmaps aligned on same longword bit offset
* (i.e. XSrc % 32 == XDest % 32)
*
* o If source bitmap == destination bitmap, then YSrc != YDest
*
* o Blitter minterm = $Cx (i.e. straight copy)
*
* If any of these conditions doesn't hold, then the original BltBitMap
* is called instead.
*
* Input:
* D0 - X Source
* D1 - Y Source
* D2 - X Dest
* D3 - Y Dest
* D4 - X Size
* D5 - Y Size
* D6 - Minterm
* D7 - Mask, indicating which planes are to be affected
* A0 - Pointer to source bitmap structure
* A1 - Pointer to destination bitmap structure
* A2 - Pointer to temporary bitmap structure (not used)
*
* Output:
* D0 - Number of planes actually copied
*
* The copy routine works as follows. Everything is done in longword
* units. If the bitmap being copied fits horizontally into a single
* longword, then the CopySingle() routine is used which copies a
* single column of longwords, masked out as appropriate. Otherwise,
* there are at least two longwords involved (the left and right edges
* of the bitmap), with possibly some longwords inbetween as well.
* CopyMultiple() is called to perform this copy; it uses two mask
* values to identify which bits in the left and right longwords should
* be copied. The longwords (if any) in between are copied verbatim.
*
* Note that using longwords gives a big win on the A3000 since it can
* access CHIP ram via the 32 bit bus. This relies on the data being
* longword aligned of course. In the worst case (where a bitmap width
* is not a multiple of 4), one out of every two rows will be longword
* aligned, which is not too bad. In the more common case, every row
* is longword aligned. For overscan users, it's best to have your
* screen width a multiple of 32.
*
*****************************************************************************
PreExit:
move.w (sp)+,d0 ; Restore original A0 register
exg d6,a0 ; Ignore following instruction
_ExitBlit:
exg d6,a0 ; Restore original A0 register
DoOldBlt:
subq.l #1,_UsageCount ; Decrement number of callers in code
oldblt2:
move.l (sp)+,d6 ; Restore original mask
oldblit:
jmp dummy ; Filled in with correct address later
_BltBitMapAddress equ oldblit+2
dummy: rts
_NewBltBitMap:
tst.w d4 ; Check if width is zero
beq.s dummy ; If it is, don't do anything
cmp.l a0,a1 ; Copying within the same bitmap?
bne.s nb1 ; If yes,
cmp.w d1,d3 ; and Y row is the same, then it's a
beq.s oldblit ; sideways blit so use system routine
bra.s nb2 ; Else skip to next check
nb1:
tst.l _OnlySingle ; Should we handle different src/dest
bne.s oldblit ; If not, use standard system blit
nb2:
move.l d6,-(sp) ; Save current minterm register
and.b #$f0,d6 ; Mask out low bits
cmp.b #$c0,d6 ; Is it standard COPY minterm?
bne.s oldblt2 ; If not, exit
move.l d0,d6 ; See if XSrc % 32 == XDest % 32
eor.l d2,d6 ; Low 5 bits should be zero if equal
and.b #$1f,d6 ;
bne.s oldblt2 ; If not, then have to do normal blit
tst.l _Broken ; Are we accomodating broken s/w?
bne.s nb3 ; If so, skip checks
tst.b bm_Flags(a0) ; Is source standard Amiga bitmap?
bne.s oldblt2 ; If not, use system blit routine
tst.w bm_Pad(a0) ;
bne.s oldblt2 ;
tst.b bm_Flags(a1) ; How about destination?
bne.s oldblt2 ; If it isn't, use system blit
tst.w bm_Pad(a1) ;
bne.s oldblt2 ;
nb3:
addq.l #1,_UsageCount ; Increment usage count
exg d6,a0 ; Save current A0
move.l _BlitFunc,a0 ; Get pointer to appropriate test func
jmp (a0) ; And branch to it
;
; Checks the usage count for the blitter code, to see if anyone else
; is currently executing it. If so, use the blitter instead (hence
; CPU does one blit while blitter does the other blit; multiprocessing!)
;
_ShareBlit:
exg d6,a0 ; Restore old A0
move.l _UsageCount,d6 ; Check if someone already in code
bne DoOldBlt ; If there is, use blitter instead
bra.s sblit2 ; Else skip to use CPU
;
; Checks to see if there is more than one task ready to run. If so,
; use the blitter, else use the CPU. Note that for the most common case
; of scrolling (in a CLI/console window), the task outputting the text
; that causes the scroll will be "Ready to Run" since it is pre-empted
; by the console device before it has a chance to go into a Wait
; condition.
;
; If there is more than one task ready to run, but the second task
; in the queue has priority < MinTaskPri, then we can use the CPU
; anyway (since the second task is a background task that can be
; ignored).
;
_Friend2:
move.l _SysBase,a0 ; Get pointer to ExecBase
lea.l TaskReady(a0),a0 ; Get ptr to TaskReady list
cmp.l 8(a0),a0 ; Empty list?
beq.s _StartBlit ; If yes, do blit
move.w d0,-(sp) ; Grab a register temporarily
move.l (a0),a0 ; Get pointer to first waiting task
move.l (a0),a0 ; Get pointer to second task
move.b LN_PRI(a0),d0 ; Get its priority (if it exists)
move.l (a0),a0 ; And final link ptr (NULL if at end)
exg d6,a0 ; Restore previous A0
tst.l d6 ; More than 1 task?
beq.s F2Okay ; If no, we can use the blitter anyway
cmp.b _MinTaskPri,d0 ; Should we make way for waiting task?
bge PreExit ; If so, use blitter instead
F2Okay:
move.w (sp)+,d0 ; Else restore D0
bra.s sblit2 ; And skip to start blit
;
; Checks to see if there are _any_ other tasks ready to run. If there
; are and their task priority is >= MinTaskPri, then uses system blit
; instead of CPU.
;
_Friend1:
move.l _SysBase,a0 ; Get pointer to ExecBase
lea.l TaskReady(a0),a0 ; Get ptr to TaskReady list, head node
cmp.l 8(a0),a0 ; Empty list?
beq.s _StartBlit ; If yes, we can safely blit
move.l (a0),a0 ; Get pointer to first task
move.w LN_TYPE(a0),a0 ; Read ln_Type and ln_Pri
exg d6,a0 ; Restore a0
cmp.b _MinTaskPri,d6 ; Should we ignore this task?
bge DoOldBlt ; If not, then use blitter instead
bra.s sblit2 ; Else skip to use CPU
;----------------------------------------------------------------------------
; Where the action starts. Initialises everything and then performs
; the blits using the CPU. At this stage, all registers are exactly
; as they were on entry to BltBitMap, except for D6 and A0, and these
; two are restored to the correct values immediately on entry.
;----------------------------------------------------------------------------
_StartBlit:
exg d6,a0 ; Restore A0
sblit2: ; Alternative entry point
;
; Now we need to determine the masks to be used for clipping, along
; with the start address in memory of the section of the bit and
; the modulo of each bitplane (the amount added onto the end of each
; copied row address to get to the start of the next one). Then loop
; over all the selected bitplanes, copying those requested.
;
movem.l d1-d5/d7/a0-a6,-(sp) ; Save rest of the registers
;
; Next, we need to make sure that the blitter is free. This is because
; some other blitter operation that operates on the bitmaps we've been
; passed may have started but not yet finished. Operations that
; depend on the blitter are guaranteed to occur in the right order
; (since the blitter can't multitask with itself) but when we start
; doing some of them with the CPU, we need to be a bit more careful.
;
; Note: Since we are now "in" graphics.library, a6 holds GfxBase.
; WaitBlit() is documented as preserving all registers.
;
jsr _LVOWaitBlit(a6) ; Wait for blitter to become free
ext.l d0 ; Convert all parameters to long
ext.l d1 ;
ext.l d2 ;
ext.l d3 ;
ext.l d4 ;
ext.l d5 ;
cmp d1,d3 ; See if we are scrolling up or down
bhi bltdown ;
;
; Since YDest < YSrc, we are copying the bitmap upwards in memory
; therefore start at the beginning and work down. (This is only
; important if the source and destination bitmaps are the same, but
; it doesn't do any harm to check when they are different also.)
;
bltup:
move.w bm_BytesPerRow(a0),d6 ; Get width of source bitmap
ext.l d6 ; Extend to full integer
move.l d6,a2 ; Initialise modulo for source bitmap
muls d6,d1 ; Calculate row offset
move.l d0,d6 ; Get XSrc
lsr.l #3,d6 ; Get #bytes offset of XSrc
and.b #$fc,d6 ; Adjust to longword boundary
add.l d6,d1 ; Add on x offset to get bitmap offset
move.l d1,a4 ; Now A4 = offset into source bitmap
;
; Repeat for dest bitmap
;
move.w bm_BytesPerRow(a1),d6 ; Get width of dest bitmap
ext.l d6 ; Extend to full integer
move.l d6,a3 ; Initialise modulo for dest bitmap
muls d6,d3 ; Calculate row offset
move.l d2,d6 ; Get XDest
lsr.l #3,d6 ; (Converted to longword aligned
and.b #$fc,d6 ; byteoffset)
add.l d6,d3 ; Add on xoffset to get bitmap offset
move.l d3,a5 ; Now A5 = offset into dest bitmap
bra.s contblit ; Skip to rest of blitcopy
;
; If we get here, YDest > YSrc, so we are copying the bitmap downwards
; which means we need to start from the end and work back. We also
; need to initialise the modulo to -BytesPerRow instead of BytesPerRow.
;
bltdown:
add.l d5,d1 ; Add YSize+YSrc to get last row addr
subq.l #1,d1 ; Adjust (so we don't have last_row+1)
move.w bm_BytesPerRow(a0),d6 ; Get width of source bitmap
ext.l d6 ; Extend to full longword
muls d6,d1 ; Calculate row offset
neg.l d6 ; Negate mod. since copying backwards
move.l d6,a2 ; Initialise modulo for source bitmap
move.l d0,d6 ; Get XSrc
lsr.l #3,d6 ; Get #bytes offset of XSrc
and.b #$fc,d6 ; Adjust to longword boundary
add.l d6,d1 ; Add on x offset to get bitmap offset
move.l d1,a4 ; Now A4 = offset into source bitmap
;
; Do same calculations for dest bitmap
;
add.l d5,d3 ; Add YSize+YSrc to get last row addr
subq.l #1,d3 ; Adjust (so we don't have last_row+1)
move.w bm_BytesPerRow(a1),d6 ; Get width of dest bitmap
ext.l d6 ; Extend to full longword
muls d6,d3 ; Calculate row offset
neg.l d6 ; Negate, since copying backwards
move.l d6,a3 ; Initialise modulo for dest bitmap
move.l d2,d6 ; Get XDest
lsr.l #3,d6 ; (Converted to longword aligned
and.b #$fc,d6 ; byteoffset)
add.l d6,d3 ; Add on xoffset to get bitmap offset
move.l d3,a5 ; Now A5 = offset into dest bitmap
;
; Now calculate the mask values
;
contblit:
and.w #$1f,d0 ; Calculate XSrc longword bit offset
add.l d0,d4 ; Calculate width of bitmap
move.l d4,d1 ; Calculate longword bit offset
and.w #$1f,d1 ;
lsr.l #5,d4 ; Calc # of longwords needed for copy
add.l d1,d1 ; Scale XWidth bits to longword index
add.l d1,d1 ; into the bitmask array
bne.s contb1 ; If zero,
subq.l #1,d4 ; Decrement longword count
contb1:
lea RightMask(PC),a6 ; Get address of right mask table
move.l 0(a6,d1.w),d2 ; Get right bitmask
add.l d0,d0 ; Scale XSrc bits to longword index
add.l d0,d0 ; And again
contb2:
lea LeftMask(PC),a6 ; Get address of left mask table
move.l 0(a6,d0.w),d1 ; Get left bitmask
;
; Calculate minimum number of bitplanes to copy
;
moveq.l #0,d6 ; Zero out high bits of D6
move.b bm_Depth(a0),d6 ; Get depth of source bitmap
cmp.b bm_Depth(a1),d6 ; If greater than that of dest bitmap
blo.s contb3 ;
move.b bm_Depth(a1),d6 ; Use dest bitmap depth instead
contb3:
subq.l #1,d6 ; Adjust depth to 0-based, not 1-based
move.l d4,d0 ; Copy longword count
addq.l #1,d0 ; Adjust positively
add.l d0,d0 ; Convert longword count to byte count
add.l d0,d0 ;
sub.l d0,a2 ; Calculate correct modulo for source
sub.l d0,a3 ; Calculate correct modulo for dest.
exg a2,a4 ; Setup A2/A3 = bitmap offsets
exg a3,a5 ; and A4/A5 = bitmap modulos
subq.l #1,d4 ; Adjust longword count to zero-based
move.l d4,d3 ; Move to right reg for Copy routine
move.l d5,d0 ; Copy YSize to right place also
lea.l bm_Planes(a0),a0 ; Get pointer to source bitplane array
lea.l bm_Planes(a1),a1 ; Get pointer to dest bitplane array
move.l a0,d4 ; Stash bitplane pointers here
move.l a1,d5 ;
move.l 20(sp),d7 ; Read plane mask value from stack
;
; Now build a list of bitmaps to be copied on the stack. To this end,
; we reserve 8 * 8 = 64 bytes of stack for source/destination bitmap
; pointers.
;
lea -64(sp),sp ; Reserve space for bitmap ptrs
move.l sp,a6 ; And point to it using A6
;
; Loop through bitmaps building bitmap list for bitmaps specified in
; the copy mask. Bitplanes which have source and/or destination bitmaps
; set to NULL or -1 get handled immediately (new for WB 2.0). All others
; get stored on the stack.
;
move.w d7,-(sp) ; Save plane mask as temporary value
moveq.l #0,d7 ; Clear bitmap plane count
cmultlp:
lsr.w (sp) ; See if need to copy this bitplane
bcc.s cmultx ; If not, skip over code
addq #1,d7 ; Increment number of bitmaps copied
move.l d4,a0 ; Get pointer to source bitplane ptr
move.l d5,a1 ; And destination bitplane ptr
move.l (a0),d4 ; Read pointers to bitplanes
move.l (a1),d5 ;
not.l d5 ; Check if dest is -1
beq skipfill ; If so, don't copy anything
not.l d5 ; Check if dest is zero
beq skipfill ; If so, don't copy anything
not.l d4 ; Check if source is -1
beq fillones ; If so, fill dest with 1's
not.l d4 ; Check if source is 0
beq fillzeros ; If so, fill dest with 0's
exg d4,a0 ; Put registers back in right place
exg d5,a1 ;
add.l a2,a0 ; Add in correct offset for src ptr
add.l a3,a1 ; Add in correct offset for dest ptr
move.l a0,(a6)+ ; Store bitmap pointers on the stack
move.l a1,(a6)+ ;
cmultx:
addq.l #4,d4 ; Bump bitplane pointers
addq.l #4,d5 ;
dbf d6,cmultlp ; Repeat for remaining bitplanes
addq.l #2,sp ; Pop plane mask from stack
;
; Now copy all the bitmaps we accumulated on the stack. There will be
; between 1 and 8 of them. We copy them in groups of 1 to 4, so two
; operations may be required.
;
; A quick recap on what the various registers contain:
;
; D0 - Number of rows to copy
; D1 - Mask for left edge of bitmap
; D2 - Mask for right edge of bitmap
; D3 - Number of longwords _between_ left edge and right edge
; D7 - Total number of bitplanes copied (including 0 & -1 ptrs)
; A4 - Modulo of source bitplanes
; A5 - Modulo of dest bitplanes
; A6 - Points to end of source/dest bitplane pointers
; SP - Points to start of source/dest bitplane pointers
;
sub.l sp,a6 ; Calculate how many bitplanes to copy
move.l a6,d6 ; Equals half # of source/dest pairs
lsr.l #1,d6 ; (giving a range of 0-28)
subq #4,d6 ; Adjust to zero based
bpl.s cmultx2 ; If negative, no bitplanes to copy
lea 64(sp),sp ; so pop bitplane pointers from stack
bra doneblt ; and exit without doing any work
cmultx2:
cmpi.w #12,d6 ; More than 4 bitplanes to copy?
bhi.s cmult_db ; If so, skip to do in two goes
move.l d3,d3 ; Does bitmap fits in one longword?
bpl.s cmult_mm ; If not, skip to multiple longwords
;
; We have between 1 and 4 bitplanes to copy, each a single
; longword wide.
;
and.l d2,d1 ; Create composite mask
addq #8,d6 ; Adjust to index CopySingle() entries
addq #8,d6 ; and then fall through.
;
; We have between 1 and 4 bitplanes to copy, each at least two
; longwords wide.
;
cmult_mm:
move.l FuncTab(pc,d6),a6 ; Else call appropriate routine
jsr (a6) ;
lea 64(sp),sp ; Pop everything off the stack
bra doneblt ; And skip to end of blit
cmult_db:
move.l d3,d3 ; Does bitplane fit in one longword?
bpl.s cmult_dbm ; If not, skip to multiple copy
;
; We have between 5 and 8 bitplanes to copy, each just one
; longword wide. Note that when we exit, we branch into the code to
; copy the remaining bitmaps, but with the function index pointing
; into the CopySingle() entries rather than CopyMultiple()
;
and.l d2,d1 ; Create composite mask
bsr Copy4Single ; Copy first four bitplanes
bra.s cmult_dbm2 ; Skip to exit with correct fn index
;
; We have between 5 and 8 bitplanes to copy, each at least two
; longwords wide.
;
cmult_dbm:
bsr Copy4Multiple ; Copy first four bitmaps in one gulp
subi.w #16,d6 ; Adjust bitmap count
cmult_dbm2:
lea 32(sp),sp ; Pop first four bitmaps off stack
move.l FuncTab(pc,d6),a6 ; Copy remaining bitmaps
jsr (a6) ;
lea 32(sp),sp ; Pop remaining bitmaps
bra doneblt ; And skip to end of blit
;
; Index to table of functions for copying from 1 to 4 multiple and
; single longword bitmaps.
;
FuncTab:
dc.l Copy1Multiple,Copy2Multiple,Copy3Multiple,Copy4Multiple
dc.l Copy1Single,Copy2Single,Copy3Single,Copy4Single
;
; Skip past current bitplane without doing anything to bitplane data
; (used when destination bitmap ptr is 0 or -1).
;
skipfill:
exg d4,a0 ; Restore original pointers
exg d5,a1 ;
bra cmultx ; Skip back to do next bitplane
;
; Fill bitplane with one's (source bitplane pointer is -1)
;
fillones:
exg d4,a0 ; Restore register order
exg d5,a1 ;
add.l a3,a1 ; Add in correct offset into bitplane
bsr Fill_1s ; Fill the bitplane
bra cmultx ; Skip back to do next bitplane
;
; Fill bitplane with zero's (source bitplane pointer is NULL)
;
fillzeros:
exg d4,a0 ; Restore register order
exg d5,a1 ;
add.l a3,a1 ; Add in correct offset into bitplane
bsr Fill_0s ; Fill the bitplane
bra cmultx ; Skip back to do next bitplane
;
; That's it -- we're done! Now just pop remaining values off the stack
; and return to the caller with d0 = number of bitplanes copied.
;
doneblt:
move.l d7,d0 ; Set return value = #bitplanes copied
subq.l #1,_UsageCount ; Decrement number of callers in code
movem.l (sp)+,d1-d5/d7/a0-a6 ; Restore registers
move.l (sp)+,d6 ; And this one too
rts ; Return to caller
*****************************************************************************
*
* CopyMultiple()
*
* The following routines copy from 1 to 4 bitplanes which span more
* than one longword boundary horizontally (i.e. the start and finish
* bitplanes are in different longwords).
*
* The routines are constructed mainly out of macros, to keep the source
* code down to size (and also more manageable). All routines take the
* following parameters:
*
* Input:
* D0 - Number of rows to copy
* D1 - Mask for left edge of source (000xxx)
* D2 - Mask for right edge of source (xxx000)
* D3 - Number of longwords to copy
* A4 - Modulo of source (positive or negative)
* A5 - Modulo of destination (positive or negative)
*
* In addition, pointers to the source/destination bitplanes are pushed
* onto the stack, such that 4(SP) = src bp1, 8(SP) = dest bp1,
* 12(SP) = src bp2, 16(SP) = dest bp2 etc.
*
* Output:
* None
*
*****************************************************************************
*****************************************************************************
*
* Macros used by the copy routines
*
*****************************************************************************
;-----------------------------------------------------------------------------
; Init_Mult Label
;
; This macro is the standard entry to each CopyMultiple() routine. It
; checks to see whether the bitplane being copied contains at least
; one full longword. If not, it branches to a separate routine
; (loop?edges) which is smaller; doing this at the start saves having
; to check for zero longwords each time through the main loop.
; Label is the name of the routine to perform the separate copy.
;-----------------------------------------------------------------------------
Init_Mult macro
subq.l #1,d0 ; Convert row count to zero-based
move.l d1,d4 ; Copy left source mask
not.l d4 ; And change it into destination mask
move.l d2,d5 ; Copy right source mask
not.l d5 ; Change into destination mask
subq.l #1,d3 ; Adjust longword count to zero based
bmi \1 ; If none to copy use seperate routine
endm
;-----------------------------------------------------------------------------
; Left_Mult src,dest
;
; Copies the left hand side of the bitplane from register src to the
; bitplane pointed to by dest, using the masks in d1/d4
;-----------------------------------------------------------------------------
Left_Mult macro
move.l (\1)+,d6 ; Read leftmost longword of source
and.l d1,d6 ; Mask out bits not to be copied
move.l (\2),d7 ; Read leftmost longword of dest
and.l d4,d7 ; Mask out bits to remain the same
or.l d6,d7 ; Merge source and dest columns
move.l d7,(\2)+ ; Output first word of bitplane again
endm
;-----------------------------------------------------------------------------
; Copy_Mult src,dest
;
; Copies all the full longwords between the left and right extremities
; of the bitplane row from src to dest. Note that for 68010 upwards, it
; is faster to copy using MOVE.L/DBF than to play tricks with MOVEM;
; since this program will only be of use to systems with fast CPU's
; anyway, this is the route we take.
;-----------------------------------------------------------------------------
Copy_Mult macro
move.l d3,d6 ; Copy longword count into scratch reg
loop_m\@:
move.l (\1)+,(\2)+ ; Copy longwords
dbf d6,loop_m\@ ;
endm
;-----------------------------------------------------------------------------
; Right_Mult src,dest
;
; Copies the right hand side of the bitplane from register src to the
; bitplane pointed to by dest, using the masks in d2/d5
;-----------------------------------------------------------------------------
Right_Mult macro
move.l (\1)+,d6 ; Read rightmost longword of source
and.l d2,d6 ; Mask out bits not being copied
move.l (\2),d7 ; Read rightmost longword of dest
and.l d5,d7 ; Mask out bits to remain the same
or.l d6,d7 ; Merge source and dest columns
move.l d7,(\2)+ ; Output right longword again
endm
;-----------------------------------------------------------------------------
; Advance src,dest
;
; This macro advances the source and destination pointers to point to
; the next row in the bitplane.
;-----------------------------------------------------------------------------
Advance macro
add.l a4,\1 ; Increment source pointer
add.l a5,\2 ; Increment dest pointer
endm
;-----------------------------------------------------------------------------
; Copy_Quick src,dest
;
; This macro copies the left and right edges in one go, when there
; are no complete longwords in between. It's quicker than having to
; check for zero longwords each time through the main loop. The masks
; used are d1/d4 for the left edge of the bitplane, d2/d5 for the
; right edge.
;-----------------------------------------------------------------------------
Copy_Quick macro
move.l (\1)+,d6 ; Read leftmost longword of source
and.l d1,d6 ; Mask out bits not to be copied
move.l (\2),d7 ; Read leftmost longword of dest
and.l d4,d7 ; Mask out bits to remain the same
or.l d6,d7 ; Merge source and dest columns
move.l d7,(\2)+ ; Output first word of bitplane again
;
; Now tidy up right hand edge of bitplane
;
move.l (\1)+,d6 ; Read rightmost longword of source
and.l d2,d6 ; Mask out bits not being copied
move.l (\2),d7 ; Read rightmost longword of dest
and.l d5,d7 ; Mask out bits to remain the same
or.l d6,d7 ; Merge source and dest columns
move.l d7,(\2)+ ; Output right longword again
endm
*****************************************************************************
*
* The actual copy routines, Copy1Multiple() ... Copy4Multiple()
*
*****************************************************************************
;-----------------------------------------------------------------------------
;
; Copies a single bitplane
;
;-----------------------------------------------------------------------------
Copy1Multiple:
movem.l a0-a1/a6/d0-d7,-(sp) ; Save registers
lea.l 48(sp),a6 ; Get pointer to stack
move.l (a6)+,a0 ; Read bitplane pointers from stack
move.l (a6)+,a1 ; Read bitplane pointers from stack
Init_Mult Copy1Quick ; Setup registers
c1m_loop:
Left_Mult a0,a1 ; Copy left edge of bitplane
Copy_Mult a0,a1 ; Copy middle of bitplane
Right_Mult a0,a1 ; Copy right edge of bitplane
Advance a0,a1 ; Increment bitplane ptrs
dbf d0,c1m_loop ; Repeat for remaining rows
movem.l (sp)+,a0-a1/a6/d0-d7 ; Restore registers
rts ; Return to caller
;
; Handle inner longword count of zero
;
Copy1Quick:
Copy_Quick a0,a1 ; Copy left/right edge of bitplane
Advance a0,a1 ; Increment bitplane ptrs
dbra d0,Copy1Quick ; Repeat for all rows
movem.l (sp)+,a0-a1/a6/d0-d7 ; Restore registers
rts ; Return to caller
;-----------------------------------------------------------------------------
;
; Copies 2 bitplanes simultaneously
;
;-----------------------------------------------------------------------------
Copy2Multiple:
movem.l a0-a3/a6/d0-d7,-(sp) ; Save registers
lea.l 56(sp),a6 ; Get pointer to bitplanes
movem.l (a6),a0-a3 ; Load bitplane ptrs off stack
Init_Mult Copy2Quick ; Setup registers
c2m_loop:
Left_Mult a0,a1 ; Copy left edge of bitplane 1
Left_Mult a2,a3 ; Copy left edge of bitplane 2
Copy_Mult a0,a1 ; Copy middle of bitplane 1
Copy_Mult a2,a3 ; Copy middle of bitplane 2
Right_Mult a0,a1 ; Copy right edge of bitplane 1
Right_Mult a2,a3 ; Copy right edge of bitplane 2
Advance a0,a1 ; Increment bitplane 1 ptrs
Advance a2,a3 ; Increment bitplane 2 ptrs
dbf d0,c2m_loop ; Repeat for remaining rows
movem.l (sp)+,a0-a3/a6/d0-d7 ; Restore registers
rts ; Return to caller
;
; Handle inner longword count of zero
;
Copy2Quick:
Copy_Quick a0,a1 ; Copy left/right edge of bitplane 1
Copy_Quick a2,a3 ; Copy left/right edge of bitplane 2
Advance a0,a1 ; Increment bitplane 1 ptrs
Advance a2,a3 ; Increment bitplane 2 ptrs
dbra d0,Copy2Quick ; Repeat for all rows
movem.l (sp)+,a0-a3/a6/d0-d7 ; Restore registers
rts ; Return to caller
;-----------------------------------------------------------------------------
;
; Copies 3 bitplanes simultaneously
;
;-----------------------------------------------------------------------------
Copy3Multiple:
movem.l a0-a3/a6/d0-d7,-(sp) ; Save registers
lea.l 56(sp),a6 ; Get pointer to bitplanes
movem.l (a6)+,a0-a3 ; Load bitplane ptrs 1 & 2 off stack
Init_Mult Copy3Quick ; Setup registers
c3m_loop:
Left_Mult a0,a1 ; Copy left edge of bitplane 1
Left_Mult a2,a3 ; Copy left edge of bitplane 2
Copy_Mult a0,a1 ; Copy middle of bitplane 1
Copy_Mult a2,a3 ; Copy middle of bitplane 2
Right_Mult a0,a1 ; Copy right edge of bitplane 1
Right_Mult a2,a3 ; Copy right edge of bitplane 2
Advance a0,a1 ; Increment bitplane 1 ptrs
Advance a2,a3 ; Increment bitplane 2 ptrs
move.l a3,-(sp) ; Save bitplane 2 ptrs
move.l a2,-(sp) ;
move.l (a6)+,a2 ; Load bitplane 3 ptrs
move.l (a6),a3 ;
Left_Mult a2,a3 ; Copy left edge of bitplane 3
Copy_Mult a2,a3 ; Copy middle of bitplane 3
Right_Mult a2,a3 ; Copy right edge of bitplane 3
Advance a2,a3 ; Increment bitplane 3 ptrs
move.l a3,(a6) ; Save bitplane 3 ptrs
move.l a2,-(a6) ;
move.l (sp)+,a2 ; Restore bitplane 2 ptrs
move.l (sp)+,a3 ;
dbf d0,c3m_loop ; Repeat for remaining rows
movem.l (sp)+,a0-a3/a6/d0-d7 ; Restore registers
rts ; Return to caller
;
; Handle inner longword count of zero
;
Copy3Quick:
Copy_Quick a0,a1 ; Copy left/right edge of bitplane 1
Copy_Quick a2,a3 ; Copy left/right edge of bitplane 2
Advance a0,a1 ; Increment bitplane 1 ptrs
Advance a2,a3 ; Increment bitplane 2 ptrs
move.l a3,-(sp) ; Save bitplane 2 ptrs
move.l a2,-(sp) ;
move.l (a6)+,a2 ; Load bitplane 3 ptrs
move.l (a6),a3 ;
Copy_Quick a2,a3 ; Copy left/right edge of bitplane 2
Advance a2,a3 ; Increment bitplane 2 ptrs
move.l a3,(a6) ; Save bitplane 3 ptrs
move.l a2,-(a6) ;
move.l (sp)+,a2 ; Restore bitplane 2 ptrs
move.l (sp)+,a3 ;
dbra d0,Copy3Quick ; Repeat for all rows
movem.l (sp)+,a0-a3/a6/d0-d7 ; Restore registers
rts ; Return to caller
;-----------------------------------------------------------------------------
;
; Copies 4 bitplanes simultaneously
;
;-----------------------------------------------------------------------------
Copy4Multiple:
movem.l a0-a3/a6/d0-d7,-(sp) ; Save registers
lea.l 56(sp),a6 ; Get pointer to bitplanes
movem.l (a6)+,a0-a3 ; Load bitplane ptrs 1 & 2 off stack
Init_Mult Copy4Quick ; Setup registers
c4m_loop:
Left_Mult a0,a1 ; Copy left edge of bitplane 1
Left_Mult a2,a3 ; Copy left edge of bitplane 2
Copy_Mult a0,a1 ; Copy middle of bitplane 1
Copy_Mult a2,a3 ; Copy middle of bitplane 2
Right_Mult a0,a1 ; Copy right edge of bitplane 1
Right_Mult a2,a3 ; Copy right edge of bitplane 2
Advance a0,a1 ; Increment bitplane 1 ptrs
Advance a2,a3 ; Increment bitplane 2 ptrs
movem.l a0-a3,-(sp) ; Save bitplane 2 ptrs
movem.l (a6),a0-a3 ; Load bitplane 3 ptrs
Left_Mult a0,a1 ; Copy left edge of bitplane 1
Left_Mult a2,a3 ; Copy left edge of bitplane 2
Copy_Mult a0,a1 ; Copy middle of bitplane 1
Copy_Mult a2,a3 ; Copy middle of bitplane 2
Right_Mult a0,a1 ; Copy right edge of bitplane 1
Right_Mult a2,a3 ; Copy right edge of bitplane 2
Advance a0,a1 ; Increment bitplane 1 ptrs
Advance a2,a3 ; Increment bitplane 2 ptrs
movem.l a0-a3,(a6) ; Save bitplane 3 ptrs
movem.l (sp)+,a0-a3 ; Restore bitplane 2 ptrs
dbf d0,c4m_loop ; Repeat for remaining rows
movem.l (sp)+,a0-a3/a6/d0-d7 ; Restore registers
rts ; Return to caller
;
; Handle inner longword count of zero
;
Copy4Quick:
Copy_Quick a0,a1 ; Copy left/right edge of bitplane 1
Copy_Quick a2,a3 ; Copy left/right edge of bitplane 2
Advance a0,a1 ; Increment bitplane 1 ptrs
Advance a2,a3 ; Increment bitplane 2 ptrs
movem.l a0-a3,-(sp) ; Save bitplane 1,2 ptrs
movem.l (a6),a0-a3 ; Load bitplane 3,4 ptrs
Copy_Quick a0,a1 ; Copy left/right edge of bitplane 3
Copy_Quick a2,a3 ; Copy left/right edge of bitplane 4
Advance a0,a1 ; Increment bitplane 3 ptrs
Advance a2,a3 ; Increment bitplane 4 ptrs
movem.l a0-a3,(a6) ; Save bitplane 3,4 ptrs
movem.l (sp)+,a0-a3 ; Restore bitplane 1,2 ptrs
dbra d0,Copy4Quick ; Repeat for all rows
movem.l (sp)+,a0-a3/a6/d0-d7 ; Restore registers
rts ; Return to caller
*****************************************************************************
*
* CopySingle()
*
* The following routines copy from 1 to 4 bitplanes that start and end
* (horizontally) within a single longword. CopyMultiple can't be used
* for such cases, since it always copies at least two longwords (one
* for the left edge and one for the right).
*
* Input:
* D0 - Number of rows to copy
* D1 - Mask of bits to be copied from source (000xxx000)
* A4 - Modulo of source bitplane
* A5 - Modulo of dest bitplane
*
* In addition, pointers to the source/destination bitplanes are pushed
* onto the stack, such that 4(SP) = src bp1, 8(SP) = dest bp1,
* 12(SP) = src bp2, 16(SP) = dest bp2 etc.
*
* Output:
* None
*
*****************************************************************************
*****************************************************************************
*
* Macros used by the copy routines
*
*****************************************************************************
;-----------------------------------------------------------------------------
; Init_Sing
;
; This macro is the standard entry to each CopySingle() routine. It
; creates the complement mask used for masking source/destination
; and adjusts the row counter to be zero based.
;-----------------------------------------------------------------------------
Init_Sing macro
subq.l #1,d0 ; Adjust row count to zero-based
move.l d1,d2 ; Copy mask
not.l d2 ; And make mask for dest bitplane
endm
;-----------------------------------------------------------------------------
; Copy_Dual src,dest
;
; Copies the source longword from src to dest, masked with the value
; in D2/D4
;-----------------------------------------------------------------------------
Copy_Dual macro
move.l (\1)+,d3 ; Read src word
and.l d1,d3 ; Mask out unwanted bits
move.l (\2),d4 ; Read dest word
and.l d2,d4 ; Mask out bits to be replaced
or.l d3,d4 ; Combine src and dest bits
move.l d4,(\2)+ ; Replace destination word
endm
*****************************************************************************
*
* The actual copy routines, Copy1Single() ... Copy4Single()
*
*****************************************************************************
;-----------------------------------------------------------------------------
;
; Copies a single bitplane one longword wide
;
;-----------------------------------------------------------------------------
Copy1Single:
movem.l a0-a1/a6/d0-d4,-(sp) ; Save registers
lea 36(sp),a6 ; Get pointer to bitplane
move.l (a6)+,a0 ; Get bitplane pointers into registers
move.l (a6),a1 ;
Init_Sing ; Initialise masks etc.
copy1slp:
Copy_Dual a0,a1 ; Copy longword
Advance a0,a1 ; Move to next longword
dbra d0,copy1slp ; Repeat for all rows
movem.l (sp)+,a0-a1/a6/d0-d4 ; Restore registers
rts
;-----------------------------------------------------------------------------
;
; Copies two bitplanes, each one longword wide
;
;-----------------------------------------------------------------------------
Copy2Single:
movem.l a0-a3/a6/d0-d4,-(sp) ; Save registers
lea 44(sp),a6 ; Get ptr to bitplane
movem.l (a6)+,a0-a3 ; Get bitplane ptrs into registers
Init_Sing ; Initialise masks etc.
copy2slp:
Copy_Dual a0,a1 ; Copy longword for bitplane 1
Copy_Dual a2,a3 ; Copy longword for bitplane 2
Advance a0,a1 ; Advance bitplane 1 ptrs
Advance a2,a3 ; Advance bitplane 2 ptrs
dbra d0,copy2slp ; Repeat for all rows
movem.l (sp)+,a0-a3/a6/d0-d4 ; Restore registers
rts
;-----------------------------------------------------------------------------
;
; Copies three bitplanes, each one longword wide
;
;-----------------------------------------------------------------------------
Copy3Single:
movem.l a0-a3/a6/d0-d4,-(sp) ; Save registers
lea 44(sp),a6 ; Get pointer to bitplane
movem.l (a6)+,a0-a3 ; Get bitplane ptrs into registers
Init_Sing ; Initialise masks etc.
copy3slp:
Copy_Dual a0,a1 ; Copy longword for bitplane 1
Copy_Dual a2,a3 ; Copy longword for bitplane 2
Advance a0,a1 ; Advance bitplane 1 ptrs
Advance a2,a3 ; Advance bitplane 2 ptrs
move.l a1,-(sp) ; Save bitplane 2 ptrs
move.l a0,-(sp) ;
move.l (a6)+,a0 ; Load bitplane 3 ptrs
move.l (a6),a1 ;
Copy_Dual a0,a1 ; Copy longword for bitplane 3
Advance a0,a1 ; Advance bitplane 3 ptrs
move.l a1,(a6) ; Save bitplane 3 ptrs
move.l a0,-(a6) ;
move.l (sp)+,a0 ; Restore bitplane 2 ptrs
move.l (sp)+,a1 ;
dbra d0,copy3slp ; Repeat for all rows
movem.l (sp)+,a0-a3/a6/d0-d4 ; Restore registers
rts
;-----------------------------------------------------------------------------
;
; Copies four bitplanes, each one longword wide
;
;-----------------------------------------------------------------------------
Copy4Single:
movem.l a0-a3/a6/d0-d4,-(sp) ; Save registers
lea 44(sp),a6 ; Get pointer to bitplane pointers
movem.l (a6)+,a0-a3 ; Get bitplane pointers into registers
Init_Sing ; Initialise masks etc.
copy4slp:
Copy_Dual a0,a1 ; Copy longword for bitplane 1
Copy_Dual a2,a3 ; Copy longword for bitplane 2
Advance a0,a1 ; Advance bitplane 1 ptrs
Advance a2,a3 ; Advance bitplane 2 ptrs
movem.l a0-a3,-(sp) ; Save bitplane 1 and 2 ptrs on stack
movem.l (a6),a0-a3 ; Read bitplane 3 and 4 ptrs
Copy_Dual a0,a1 ; Copy longword for bitplane 3
Copy_Dual a2,a3 ; Copy longword for bitplane 4
Advance a0,a1 ; Advance bitplane 3 ptrs
Advance a2,a3 ; Advance bitplane 4 ptrs
movem.l a0-a3,(a6) ; Save bitplane 3 and 4 ptrs
movem.l (sp)+,a0-a3 ; Restore bitplane 1 and 2 ptrs
dbra d0,copy4slp ; Repeat for all rows
movem.l (sp)+,a0-a3/a6/d0-d4 ; Restore registers
rts
*****************************************************************************
*
* Fill_1s(), Fill_0s
*
* Handles the case new for Workbench 2.0 where the source bitplane
* pointer points to an array of all ones (ptr = $FFFFFFFF) or all
* zeros ($ptr = $00000000).
*
* Input:
* D0 - Number of rows to copy
* D1 - Mask for left edge of source (000xxx)
* D2 - Mask for right edge of source (xxx000)
* D3 - Number of longwords to copy (-1 means single column)
* A1 - Pointer to dest bitplane
* A5 - Modulo of dest bitplane
*
* Output:
* None
*
*****************************************************************************
Fill_1s:
movem.l d0/d3/d6-d7/a1,-(sp) ; Save registers
moveq.l #-1,d7 ; Set register ready for fills
subq.l #1,d0 ; Adjust row count to zero-based
move.l d3,d3 ; Check how many longwords to copy
bmi.s Fill_1single ; Branch if only a single longword
subq #1,d3 ; Adjust longword count to zero based
bmi.s Fill_1quick ; If no complete longwords, handle
;
; We have more than two longwords to copy, so loop over them all.
;
fill_1lp1:
or.l d1,(a1)+ ; Set bits on left edge of bitplane
move.l d3,d6 ; Get number of longwords to fill
fill_1lp2:
move.l d7,(a1)+ ; Fill all the longwords
dbra d6,fill_1lp2 ;
or.l d2,(a1)+ ; Set bits on right edge of bitplane
add.l a5,a1 ; Advance to next bitplane row
dbra d0,fill_1lp1 ; And repeat until done
movem.l (sp)+,d0/d3/d6-d7/a1 ; Restore registers
rts ; Return to caller
;
; Only two longwords to copy, the left and right edges
;
Fill_1quick:
or.l d1,(a1)+ ; Set bits on left edge of bitplane
or.l d2,(a1)+ ; Set bits on right edge of bitplane
add.l a5,a1 ; Move to next row
dbra d0,Fill_1quick ; Repeat for all rows
movem.l (sp)+,d0/d3/d6-d7/a1 ; Restore registers
rts ; Return to caller
;
; Only a single longword to copy, with left and right portions masked
;
Fill_1single:
move.l d1,d6 ; Create new mask
and.l d2,d6 ; by combining left and right masks
Fill_1s2:
or.l d6,(a1)+ ; Fill longword
add.l a5,a1 ; Advance to next row
dbra d0,Fill_1s2 ; Repeat for all rows
movem.l (sp)+,d0/d3/d6-d7/a1 ; Restore registers
rts ; Return to caller
;-----------------------------------------------------------------------------
; Clear bitplane with zeros
;-----------------------------------------------------------------------------
Fill_0s:
movem.l d0-d3/d6-d7/a1,-(sp) ; Save registers
not.l d1 ; Invert masks ready for AND
not.l d2 ;
moveq.l #0,d7 ; Clear register ready for fills
subq.l #1,d0 ; Adjust row count to zero-based
move.l d3,d3 ; Check how many longwords to copy
bmi.s Fill_0single ; Branch if only a single longword
subq #1,d3 ; Adjust longword count to zero based
bmi.s Fill_0quick ; If no complete longwords, handle
;
; We have more than two longwords to copy, so loop over them all.
;
fill_0lp1:
and.l d1,(a1)+ ; Set bits on left edge of bitplane
move.l d3,d6 ; Get number of longwords to fill
fill_0lp2:
move.l d7,(a1)+ ; Fill all the longwords
dbra d6,fill_0lp2 ;
and.l d2,(a1)+ ; Set bits on right edge of bitplane
add.l a5,a1 ; Advance to next bitplane row
dbra d0,fill_0lp1 ; And repeat until done
movem.l (sp)+,d0-d3/d6-d7/a1 ; Restore registers
rts ; Return to caller
;
; Only two longwords to copy, the left and right edges
;
Fill_0quick:
and.l d1,(a1)+ ; Clear left edge of bitplane
and.l d2,(a1)+ ; Clear right edge of bitplane
add.l a5,a1 ; Move to next row
dbra d0,Fill_0quick ; Repeat for all rows
movem.l (sp)+,d0-d3/d6-d7/a1 ; Restore registers
rts ; Return to caller
;
; Only a single longword to copy, with left and right portions masked
;
Fill_0single:
move.l d1,d6 ; Combine left and right edges
or.l d2,d6 ; to create new mask
Fill_0s2:
and.l d6,(a1)+ ; Fill longword
add.l a5,a1 ; Advance to next row
dbra d0,Fill_0s2 ; Repeat for all rows
movem.l (sp)+,d0-d3/d6-d7/a1 ; Restore registers
rts ; Return to caller
*****************************************************************************
*
* These two tables give the mask values used when copying the
* bits at the edge of each bitplane row. Note that a right edge
* of zero bits in width is handled as a special case in the code
* (it gets converted to a bitmap which is one longword narrower
* but has a right edge 32 bits wide).
*
*****************************************************************************
LeftMask:
dc.l $ffffffff,$7fffffff,$3fffffff,$1fffffff
dc.l $0fffffff,$07ffffff,$03ffffff,$01ffffff
dc.l $00ffffff,$007fffff,$003fffff,$001fffff
dc.l $000fffff,$0007ffff,$0003ffff,$0001ffff
dc.l $0000ffff,$00007fff,$00003fff,$00001fff
dc.l $00000fff,$000007ff,$000003ff,$000001ff
dc.l $000000ff,$0000007f,$0000003f,$0000001f
dc.l $0000000f,$00000007,$00000003,$00000001
RightMask:
dc.l $ffffffff,$80000000,$c0000000,$e0000000
dc.l $f0000000,$f8000000,$fc000000,$fe000000
dc.l $ff000000,$ff800000,$ffc00000,$ffe00000
dc.l $fff00000,$fff80000,$fffc0000,$fffe0000
dc.l $ffff0000,$ffff8000,$ffffc000,$ffffe000
dc.l $fffff000,$fffff800,$fffffc00,$fffffe00
dc.l $ffffff00,$ffffff80,$ffffffc0,$ffffffe0
dc.l $fffffff0,$fffffff8,$fffffffc,$fffffffe
*****************************************************************************
*
* Variables used by the code. _UsageCount is only ever updated
* atomically (since the replacement code must be re-entrant), and
* _BlitFunc is initialised by the startup code.
*
*****************************************************************************
SECTION Scroll,DATA
cnop 0,4
_UsageCount: dc.l -1 ; Number of callers currently in code
_BlitFunc: dc.l _StartBlit ; Address of function for blitter test
_OnlySingle: dc.l 0 ; Only use CPU when src bm == dest bm?
_Broken: dc.l 0 ; Accomodate broken software?
_MinTaskPri: dc.b 0 ; Ignore tasks with pri <= this
Pad dc.b 0,0,0 ; Padding to round to LW boundary
END