home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The World of Computer Software
/
World_Of_Computer_Software-02-385-Vol-1of3.iso
/
x
/
xibm.zip
/
apa16
/
fs.spp
< prev
next >
Wrap
Text File
|
1991-12-17
|
11KB
|
607 lines
# This file has 4 functions. The first is solid fill spans,
# the second is a tile fill spans (currently not used because
# it doesn't work), the third draws vertical rectangles, and
# the fourth draws horizontal rectangles ("vertical" and
# "horizontal" refer to the way these routines are optimized).
# Fill spans (int *pwidth, struct {short x,y} *ppts,
# int nspans, int rop)
# Tile fill spans (int *pwidth, struct {short x,y} *ppts,
# int nspans, int rop, int *pTile, int w, int h,
# int pw, int xorg, int yorg)
# {vert,horz} fill rects (int *px, int *py, int *pw, int *ph, n, rop)
# notes:
# read from bit map (0xf4dxxxxx) takes 3.1 us
# write to bit map takes 2.5 us
# read from I/O register (0xf00xxxxx) takes 2.7 us
# write to I/O register takes 2.2 us
# A lot of code here would seem to be inefficient except that
# it overlaps I/O to the apa16.
# This function used to draw lines for FillSpans, but that code was
# broken, it is expensive to switch from queue to frame buffer, and
# the break-even point is large (greater than 100 pixels).
# registers used in fill spans and tile fill spans:
# r0 -1 or tile value
# r2 pwidth
# r3 ppts
# r4 nspans
# r5 rop << 4 (const part of MR)
# r6 Mode register value
# r7 screen addr
# r8 x
# r9 y
# r10 width
# r11 tmp (width % 16)
# r12 tmp (bit offset in word, MR)
# r13 queue length
# r14 tile line pointer (tile FS only)
# r15 &queue_counter (solid), x (tile)
# constants
.set screen_addr,0xf4d80000
.set queue_counter,0xf4d9f804
.set queue_pointer,0xf4d9f806
.set mode_shadow,0xf4d9f812
.set mode_register,0xf0000d10
.globl .oVncs
.data
.align 2
.globl _apa16FastFS
_apa16FastFS: .long _.apa16FastFS
.text
.align 2
.globl _.apa16FastFS
_.apa16FastFS:
stm r6,-0x5c(sp)
cal sp,-0x64(sp)
# assume that it is valid to dereference the width pointer
# even if there are 0 spans
ls r10,0(r2) # prefetch first width
lda r15,queue_counter
lhs r13,0(r15)
inc r2,4 # pwidth++
sli r5,4 # convert rop to mode register value
setbl r5,0 # set horizontal access bit
lhs r8,0(r3) # prefetch first x
lh r9,2(r3) # prefetch first y [stall 4]
lda r7,screen_addr
cis r4,0
loadh r6,mode_shadow # [stall 1]
jle ret # note that epilogue requires r6 valid
# so this test can't be moved earlier
bx 0f
cal r0,-1(r0)
loop:
# get span info
ls r10,0(r2) # width = *pwidth++
inc r2,4
lhs r8,0(r3) # x = ppts->x
lda r7,screen_addr
lh r9,2(r3) # y = ppts->y
0:
inc r3,4 # ppts++
nilz r12,r8,15 # bit offset within word
nilz r11,r10,15 # width % 16
bnex 0f
o r12,r5 # new mode register
cal r10,-16(r10) # "partial" last word is really full word
0:
sri r8,3 # byte offset in scan line
clrbl r8,15 # with low bit forced to 0
a r7,r8
cis r13,0 # graphics processor active?
beqx 0f
sli r9,7 # byte offset of scan line
bali r0,queue_wait # wait for queue to drain
cal r0,-1(r0)
0:
sri r10,4 # number of full words to write
beqx short # 1 word is fast
a r7,r9 # addr of left end of line
# write r10 + 1 full words plus r11 bits
# bit offset is in r12
c r12,r6 # mode register unchanged? [stall 8]
jeq write_bits
storeh r12,mode_register,r6
mr r6,r12
write_bits:
sths r0,0(r7)
inc r7,2
sis r10,1
jh write_bits
short:
sli r11,8
o r12,r11 # set write mask
c r6,r12
jeq 0f
storeh r12,mode_register,r6
mr r6,r12
0:
sths r0,0(r7)
sis r4,1
jh loop
ret:
# reset merge mode to copy
cal16 r11,0x90(r0)
store r11,_apa16Qvars+8,r15 # apa16Qmerge_mode_old
setbl r11,0 # horizontal access bit
c r6,r11
jeq 2f
storeh r11,mode_register,r15
2:
lm r6,8(sp)
brx r15
cal sp,0x64(sp)
#####
queue_wait:
# mode register must be 80x0 to read queue counter
nilz r13,r6,0xff0f
cal16 r12,0x8000(r0)
c r13,r12
jeq 0f
nilz r6,r5,0x00f0
o r12,r6
storeh r12,mode_register,r6
mr r6,r12
0:
lhs r13,0(r15)
cis r13,0
jne 0b
br r0
.long 0xdf02df00
#ifdef FAST_TILE
# apa16FastTileFS is similar to apa16TileFS except that it
# has to compute the value to write each word. Register use
# is a bit different because of this.
# Note: this does not depend on the tile being a multiple of
# 32 bits wide. It does require that the tile be at least 16 bits
# wide.
.globl _apa16FastTileFS
.globl _.apa16FastTileFS
.data
.align 2
_apa16FastTileFS:
.long _.apa16FastTileFS
.text
.align 2
_.apa16FastTileFS:
stm r6,-0x5c(sp)
cal sp,-0x64(sp)
ls r10,0(r2) # prefetch first width
loadh r13,queue_counter
inc r2,4 # pwidth++
lhs r8,0(r3) # prefetch first x
sli r5,4
setbl r5,0 # set horizontal access bit
cis r4,0 # (this check is free, overlapped
jeq ret # with memory access)
lh r9,2(r3) # prefetch first y
lda r7,screen_addr
loadh r6,mode_shadow # [stall 2]
j 0f
tile_loop:
# get span info
ls r10,0(r2) # width = *pwidth++
inc r2,4
lhs r8,0(r3) # x = ppts->x
lda r7,screen_addr
lh r9,2(r3) # y = ppts->y
0:
inc r3,4 # ppts++
bx tile_line_init
mr r15,r8
tile_line_init_ret:
nilz r12,r8,15 # bit offset within word
nilz r11,r10,15 # width % 16
bnex 0f
o r12,r5 # new mode register
cal r10,-16(r10) # "partial" last word is really full word
0:
sri r8,3 # byte offset in scan line
clrbl r8,15 # with low bit forced to 0
a r7,r8
cis r13,0
beqx 0f
sli r9,7 # byte offset of scan line
sts r15,4(sp)
lda r15,queue_counter
balix r0,queue_wait
sts r12,0(sp)
ls r12,0(sp)
ls r15,4(sp)
0:
sri r10,4 # number of full words to write
beqx tile_short # 1 word is fast
a r7,r9 # addr of left end of line
# write r10 + 1 full words plus r11 bits
# bit offset is in r12
c r12,r6 # mode register unchanged? [stall 8]
jeq tile_write_bits
storeh r12,mode_register,r6
mr r6,r12
tile_write_bits:
bali r0,tile_get_word
sths r0,0(r7)
sis r10,1
bhx tile_write_bits
inc r7,2
tile_short:
sli r11,8
o r12,r11 # set write mask
c r6,r12
jeq 0f
storeh r12,mode_register,r6
mr r6,r12
0:
bali r0,tile_get_word
sths r0,0(r7)
sis r4,1
jh tile_loop
b ret
# The next 2 routines use r13 and r0 as temporary registers
tile_line_init:
# Compute the address of the start of the tile scan line
# and save it in r14.
# inputs:
# r9 = y
# 0x64(sp) = tile pointer
# 0x68(sp) = tile w (bits)
# 0x6c(sp) = tile h
# 0x70(sp) = tile padded w (bytes)
# 0x74(sp) = x origin of tile
# 0x78(sp) = y origin of tile
l r0,0x78(sp)
l r13,0x6c(sp)
l r14,0x64(sp) # load tile pointer
sf r0,r9 # r0 = (y - yorg)
sli16 r0,0 # r0 = (y - yorg) << 16
mts r10,r0
lis r0,0 # r0:mq = y << 16
d r0,r13 # compute (y - yorg) % h
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
jc0 0f
a r0,r13
0:
mts r10,r0 # r10 = y % h
l r13,0x70(sp) # r13 = pw
s r0,r0
m r0,r13
m r0,r13
m r0,r13
m r0,r13
m r0,r13
m r0,r13
m r0,r13
m r0,r13 # mq = ((y % h) * w) << 16
mfs r10,r0
sri16 r0,0 # (y % h) * pw
nilo r0,r0,0xfffc # word align
cas r14,r0,r14
get r0,$0x10000000
tlt r14,r0
get r0,$0x20000000
tgte r14,r0
j tile_line_init_ret
# tile_get_word:
# inputs:
# r14 = start of scan line
# r15 = x
# 0x7c(sp) = w
# 0x84(sp) = xorg
# r0 = return addr
# output:
# r0 = word to write
# r13 is used as a temporary
tile_get_word:
sts r0,0(sp)
sts r2,4(sp)
l r0,0x74(sp) # xorg
l r13,0x6c(sp) # w
sf r0,r15 # (x - xorg)
sli16 r0,0 # (x - xorg) << 16
mts r10,r0
lis r0,0 # r0:mq = x << 16
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
d r0,r13
jc0 0f
a r0,r13
0:
exts r0,r0 # clear high bits of r0 = (x - xorg) % w
ail r2,r0,16
c r2,r13
jh tile_split # wrap around
# Tile access does not wrap. Read two halfwords and combine them.
sri r0,3 # make a byte count
clrbl r0,15 # force even
cas r13,r0,r14
nilz r2,r2,15 # (x - xorg) % 16
beqx tile_halfword
lhs r0,0(r13)
lh r13,2(r13)
# 16 of the bits in r0||r13, starting with bit #r2, are the ones we want
# shift r0 left r2 bits, shift r13 right (16 - r2) bits
sl r0,r2
sfi r2,r2,16
sr r13,r2
l r2,4(sp)
o r0,r13
ls r13,0(sp)
br r13
# this is separate to avoid reading past the end of the tile
# bitmap if the last 16 bits of the tile are to be read
tile_halfword:
ls r13,0(sp)
brx r13
ls r2,4(sp)
# Tile access wraps around.
# Compute 16 bit word by concatenating last # bits of
# tile with first (16-#) bits of tile.
tile_split:
st r3,-4(sp)
lhs r2,0(r14)
nilz r3,r0,0xfff0
sri r3,3
cas r3,r3,r14
lhs r3,0(r3)
# r2 = left end of tile, r3 = right end of tile
# r13 - r0 is number of bits to take from r3
s r13,r0
sr r2,r13 # clear high bits of r2
sfi r13,r13,16
sl r3,r13 # clear low bits of r2
o r2,r3
ls r13,0(sp)
l r3,-4(sp)
mr r0,r2
brx r13
ls r2,4(sp)
.long 0xdf02df00
#endif
#ifdef FAST_RECT
# draw a vertical rectangle
.globl _apa16FastVertFS
.globl _.apa16FastVertFS
.data
.align 2
_apa16FastVertFS:
.long _.apa16FastVertFS
.text
.align 2
_.apa16FastVertFS:
stm r6,-0x5c(sp)
cal sp,-0x64(sp)
lda r15,queue_counter
lhs r13,0(r15)
loadh r6,mode_shadow
cis r13,0
jeq 0f
bali r0,queue_wait
0:
l r15,0x68(sp) # n
cal r0,-1(r0)
nloop:
lhs r10,0(r4) # w
lhs r14,0(r2) # x
xloop:
l r7,0x64(sp) # rop
lhs r9,0(r3) # y
mr r8,r14
nilz r12,r8,15 # x & 15
lhs r11,0(r5) # h
sli r7,4
o r12,r7 # mode register
sri r8,3 # x >> 3
clrbl r8,15
lda r7,screen_addr(r8)
sli r9,7
a r7,r9
sri r9,7
nilz r13,r11,0xf
jne 1f
ail r11,r11,-16
jc0 veloop # h was 0?
1:
sri r11,4
jeq vshort # only one word
c r6,r12
jeq 2f
storeh r12,mode_register,r6
mr r6,r12
2:
sths r0,0(r7)
cal r7,2048(r7)
sis r11,1
jh 2b
vshort:
o r12,r13 # bit mask
c r6,r12
jeq 3f
storeh r12,mode_register,r6
mr r6,r12
3:
sths r0,0(r7)
sis r10,1 # while (--width > 0)
bhx xloop
inc r14,1 # x++
veloop:
sis r15,1
jnh ret
inc r2,2 # px++
inc r3,2 # py++
inc r4,2 # pw++
bx nloop
inc r5,2 # ph++
b ret
.globl _apa16FastHorzFS
.globl _.apa16FastHorzFS
.data
.align 2
_apa16FastHorzFS:
.long _.apa16FastHorzFS
.text
.align 2
_.apa16FastHorzFS:
stm r6,-0x5c(sp)
cal sp,-0x64(sp)
get r15,$queue_counter
lhs r13,0(r15)
loadh r6,mode_shadow
cis r13,0
jeq 0f
bali r0,queue_wait
0:
l r15,0x68(sp) # n
cal r0,-1(r0)
hnloop:
lhs r11,0(r5) # h
lhs r14,0(r3) # y
hyloop:
l r7,0x64(sp) # rop
lhs r8,0(r2) # x
lhs r10,0(r4) # w
mr r9,r14 # y
nilz r12,r8,15 # x & 15
sli r7,4
setbl r7,0 # horizontal access bit
o r12,r7 # mode register
sri r8,3 # x >> 3
clrbl r8,15
get r7,$screen_addr
a r7,r8
sli r9,7
a r7,r9
sri r9,7
nilz r13,r10,0xf
jeq 1f
cal r10,-16(r10)
1:
sri r10,4
jeq hshort # only one word
c r6,r12
jeq 2f
storeh r12,mode_register,r6
mr r6,r12
2:
sths r0,0(r7)
inc r7,2
sis r11,1
jh 2b
hshort:
o r12,r13 # bit mask
c r6,r12
jeq 3f
storeh r12,mode_register,r6
mr r6,r12
3:
sths r0,0(r7)
sis r11,1 # while (--height > 0)
bhx hyloop
inc r14,1 # y++
sis r15,1
bnh ret
inc r2,2 # px++
inc r3,2 # py++
inc r4,2 # pw++
bx hnloop
inc r5,2 # ph++
b ret
#endif