The World of Computer Software

home *** CD-ROM | disk | FTP | other *** search

/ The World of Computer Software / World_Of_Computer_Software-02-385-Vol-1of3.iso / x / xibm.zip / apa16 / fs.spp < prev next >

Wrap

Text File | 1991-12-17 | 11KB | 607 lines

# This file has 4 functions. The first is solid fill spans, # the second is a tile fill spans (currently not used because # it doesn't work), the third draws vertical rectangles, and # the fourth draws horizontal rectangles ("vertical" and # "horizontal" refer to the way these routines are optimized). # Fill spans (int *pwidth, struct {short x,y} *ppts, # int nspans, int rop) # Tile fill spans (int *pwidth, struct {short x,y} *ppts, # int nspans, int rop, int *pTile, int w, int h, # int pw, int xorg, int yorg) # {vert,horz} fill rects (int *px, int *py, int *pw, int *ph, n, rop) # notes: # read from bit map (0xf4dxxxxx) takes 3.1 us # write to bit map takes 2.5 us # read from I/O register (0xf00xxxxx) takes 2.7 us # write to I/O register takes 2.2 us # A lot of code here would seem to be inefficient except that # it overlaps I/O to the apa16. # This function used to draw lines for FillSpans, but that code was # broken, it is expensive to switch from queue to frame buffer, and # the break-even point is large (greater than 100 pixels). # registers used in fill spans and tile fill spans: # r0 -1 or tile value # r2 pwidth # r3 ppts # r4 nspans # r5 rop << 4 (const part of MR) # r6 Mode register value # r7 screen addr # r8 x # r9 y # r10 width # r11 tmp (width % 16) # r12 tmp (bit offset in word, MR) # r13 queue length # r14 tile line pointer (tile FS only) # r15 &queue_counter (solid), x (tile) # constants .set screen_addr,0xf4d80000 .set queue_counter,0xf4d9f804 .set queue_pointer,0xf4d9f806 .set mode_shadow,0xf4d9f812 .set mode_register,0xf0000d10 .globl .oVncs .data .align 2 .globl _apa16FastFS _apa16FastFS: .long _.apa16FastFS .text .align 2 .globl _.apa16FastFS _.apa16FastFS: stm r6,-0x5c(sp) cal sp,-0x64(sp) # assume that it is valid to dereference the width pointer # even if there are 0 spans ls r10,0(r2) # prefetch first width lda r15,queue_counter lhs r13,0(r15) inc r2,4 # pwidth++ sli r5,4 # convert rop to mode register value setbl r5,0 # set horizontal access bit lhs r8,0(r3) # prefetch first x lh r9,2(r3) # prefetch first y [stall 4] lda r7,screen_addr cis r4,0 loadh r6,mode_shadow # [stall 1] jle ret # note that epilogue requires r6 valid # so this test can't be moved earlier bx 0f cal r0,-1(r0) loop: # get span info ls r10,0(r2) # width = *pwidth++ inc r2,4 lhs r8,0(r3) # x = ppts->x lda r7,screen_addr lh r9,2(r3) # y = ppts->y 0: inc r3,4 # ppts++ nilz r12,r8,15 # bit offset within word nilz r11,r10,15 # width % 16 bnex 0f o r12,r5 # new mode register cal r10,-16(r10) # "partial" last word is really full word 0: sri r8,3 # byte offset in scan line clrbl r8,15 # with low bit forced to 0 a r7,r8 cis r13,0 # graphics processor active? beqx 0f sli r9,7 # byte offset of scan line bali r0,queue_wait # wait for queue to drain cal r0,-1(r0) 0: sri r10,4 # number of full words to write beqx short # 1 word is fast a r7,r9 # addr of left end of line # write r10 + 1 full words plus r11 bits # bit offset is in r12 c r12,r6 # mode register unchanged? [stall 8] jeq write_bits storeh r12,mode_register,r6 mr r6,r12 write_bits: sths r0,0(r7) inc r7,2 sis r10,1 jh write_bits short: sli r11,8 o r12,r11 # set write mask c r6,r12 jeq 0f storeh r12,mode_register,r6 mr r6,r12 0: sths r0,0(r7) sis r4,1 jh loop ret: # reset merge mode to copy cal16 r11,0x90(r0) store r11,_apa16Qvars+8,r15 # apa16Qmerge_mode_old setbl r11,0 # horizontal access bit c r6,r11 jeq 2f storeh r11,mode_register,r15 2: lm r6,8(sp) brx r15 cal sp,0x64(sp) ##### queue_wait: # mode register must be 80x0 to read queue counter nilz r13,r6,0xff0f cal16 r12,0x8000(r0) c r13,r12 jeq 0f nilz r6,r5,0x00f0 o r12,r6 storeh r12,mode_register,r6 mr r6,r12 0: lhs r13,0(r15) cis r13,0 jne 0b br r0 .long 0xdf02df00 #ifdef FAST_TILE # apa16FastTileFS is similar to apa16TileFS except that it # has to compute the value to write each word. Register use # is a bit different because of this. # Note: this does not depend on the tile being a multiple of # 32 bits wide. It does require that the tile be at least 16 bits # wide. .globl _apa16FastTileFS .globl _.apa16FastTileFS .data .align 2 _apa16FastTileFS: .long _.apa16FastTileFS .text .align 2 _.apa16FastTileFS: stm r6,-0x5c(sp) cal sp,-0x64(sp) ls r10,0(r2) # prefetch first width loadh r13,queue_counter inc r2,4 # pwidth++ lhs r8,0(r3) # prefetch first x sli r5,4 setbl r5,0 # set horizontal access bit cis r4,0 # (this check is free, overlapped jeq ret # with memory access) lh r9,2(r3) # prefetch first y lda r7,screen_addr loadh r6,mode_shadow # [stall 2] j 0f tile_loop: # get span info ls r10,0(r2) # width = *pwidth++ inc r2,4 lhs r8,0(r3) # x = ppts->x lda r7,screen_addr lh r9,2(r3) # y = ppts->y 0: inc r3,4 # ppts++ bx tile_line_init mr r15,r8 tile_line_init_ret: nilz r12,r8,15 # bit offset within word nilz r11,r10,15 # width % 16 bnex 0f o r12,r5 # new mode register cal r10,-16(r10) # "partial" last word is really full word 0: sri r8,3 # byte offset in scan line clrbl r8,15 # with low bit forced to 0 a r7,r8 cis r13,0 beqx 0f sli r9,7 # byte offset of scan line sts r15,4(sp) lda r15,queue_counter balix r0,queue_wait sts r12,0(sp) ls r12,0(sp) ls r15,4(sp) 0: sri r10,4 # number of full words to write beqx tile_short # 1 word is fast a r7,r9 # addr of left end of line # write r10 + 1 full words plus r11 bits # bit offset is in r12 c r12,r6 # mode register unchanged? [stall 8] jeq tile_write_bits storeh r12,mode_register,r6 mr r6,r12 tile_write_bits: bali r0,tile_get_word sths r0,0(r7) sis r10,1 bhx tile_write_bits inc r7,2 tile_short: sli r11,8 o r12,r11 # set write mask c r6,r12 jeq 0f storeh r12,mode_register,r6 mr r6,r12 0: bali r0,tile_get_word sths r0,0(r7) sis r4,1 jh tile_loop b ret # The next 2 routines use r13 and r0 as temporary registers tile_line_init: # Compute the address of the start of the tile scan line # and save it in r14. # inputs: # r9 = y # 0x64(sp) = tile pointer # 0x68(sp) = tile w (bits) # 0x6c(sp) = tile h # 0x70(sp) = tile padded w (bytes) # 0x74(sp) = x origin of tile # 0x78(sp) = y origin of tile l r0,0x78(sp) l r13,0x6c(sp) l r14,0x64(sp) # load tile pointer sf r0,r9 # r0 = (y - yorg) sli16 r0,0 # r0 = (y - yorg) << 16 mts r10,r0 lis r0,0 # r0:mq = y << 16 d r0,r13 # compute (y - yorg) % h d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 jc0 0f a r0,r13 0: mts r10,r0 # r10 = y % h l r13,0x70(sp) # r13 = pw s r0,r0 m r0,r13 m r0,r13 m r0,r13 m r0,r13 m r0,r13 m r0,r13 m r0,r13 m r0,r13 # mq = ((y % h) * w) << 16 mfs r10,r0 sri16 r0,0 # (y % h) * pw nilo r0,r0,0xfffc # word align cas r14,r0,r14 get r0,$0x10000000 tlt r14,r0 get r0,$0x20000000 tgte r14,r0 j tile_line_init_ret # tile_get_word: # inputs: # r14 = start of scan line # r15 = x # 0x7c(sp) = w # 0x84(sp) = xorg # r0 = return addr # output: # r0 = word to write # r13 is used as a temporary tile_get_word: sts r0,0(sp) sts r2,4(sp) l r0,0x74(sp) # xorg l r13,0x6c(sp) # w sf r0,r15 # (x - xorg) sli16 r0,0 # (x - xorg) << 16 mts r10,r0 lis r0,0 # r0:mq = x << 16 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 d r0,r13 jc0 0f a r0,r13 0: exts r0,r0 # clear high bits of r0 = (x - xorg) % w ail r2,r0,16 c r2,r13 jh tile_split # wrap around # Tile access does not wrap. Read two halfwords and combine them. sri r0,3 # make a byte count clrbl r0,15 # force even cas r13,r0,r14 nilz r2,r2,15 # (x - xorg) % 16 beqx tile_halfword lhs r0,0(r13) lh r13,2(r13) # 16 of the bits in r0||r13, starting with bit #r2, are the ones we want # shift r0 left r2 bits, shift r13 right (16 - r2) bits sl r0,r2 sfi r2,r2,16 sr r13,r2 l r2,4(sp) o r0,r13 ls r13,0(sp) br r13 # this is separate to avoid reading past the end of the tile # bitmap if the last 16 bits of the tile are to be read tile_halfword: ls r13,0(sp) brx r13 ls r2,4(sp) # Tile access wraps around. # Compute 16 bit word by concatenating last # bits of # tile with first (16-#) bits of tile. tile_split: st r3,-4(sp) lhs r2,0(r14) nilz r3,r0,0xfff0 sri r3,3 cas r3,r3,r14 lhs r3,0(r3) # r2 = left end of tile, r3 = right end of tile # r13 - r0 is number of bits to take from r3 s r13,r0 sr r2,r13 # clear high bits of r2 sfi r13,r13,16 sl r3,r13 # clear low bits of r2 o r2,r3 ls r13,0(sp) l r3,-4(sp) mr r0,r2 brx r13 ls r2,4(sp) .long 0xdf02df00 #endif #ifdef FAST_RECT # draw a vertical rectangle .globl _apa16FastVertFS .globl _.apa16FastVertFS .data .align 2 _apa16FastVertFS: .long _.apa16FastVertFS .text .align 2 _.apa16FastVertFS: stm r6,-0x5c(sp) cal sp,-0x64(sp) lda r15,queue_counter lhs r13,0(r15) loadh r6,mode_shadow cis r13,0 jeq 0f bali r0,queue_wait 0: l r15,0x68(sp) # n cal r0,-1(r0) nloop: lhs r10,0(r4) # w lhs r14,0(r2) # x xloop: l r7,0x64(sp) # rop lhs r9,0(r3) # y mr r8,r14 nilz r12,r8,15 # x & 15 lhs r11,0(r5) # h sli r7,4 o r12,r7 # mode register sri r8,3 # x >> 3 clrbl r8,15 lda r7,screen_addr(r8) sli r9,7 a r7,r9 sri r9,7 nilz r13,r11,0xf jne 1f ail r11,r11,-16 jc0 veloop # h was 0? 1: sri r11,4 jeq vshort # only one word c r6,r12 jeq 2f storeh r12,mode_register,r6 mr r6,r12 2: sths r0,0(r7) cal r7,2048(r7) sis r11,1 jh 2b vshort: o r12,r13 # bit mask c r6,r12 jeq 3f storeh r12,mode_register,r6 mr r6,r12 3: sths r0,0(r7) sis r10,1 # while (--width > 0) bhx xloop inc r14,1 # x++ veloop: sis r15,1 jnh ret inc r2,2 # px++ inc r3,2 # py++ inc r4,2 # pw++ bx nloop inc r5,2 # ph++ b ret .globl _apa16FastHorzFS .globl _.apa16FastHorzFS .data .align 2 _apa16FastHorzFS: .long _.apa16FastHorzFS .text .align 2 _.apa16FastHorzFS: stm r6,-0x5c(sp) cal sp,-0x64(sp) get r15,$queue_counter lhs r13,0(r15) loadh r6,mode_shadow cis r13,0 jeq 0f bali r0,queue_wait 0: l r15,0x68(sp) # n cal r0,-1(r0) hnloop: lhs r11,0(r5) # h lhs r14,0(r3) # y hyloop: l r7,0x64(sp) # rop lhs r8,0(r2) # x lhs r10,0(r4) # w mr r9,r14 # y nilz r12,r8,15 # x & 15 sli r7,4 setbl r7,0 # horizontal access bit o r12,r7 # mode register sri r8,3 # x >> 3 clrbl r8,15 get r7,$screen_addr a r7,r8 sli r9,7 a r7,r9 sri r9,7 nilz r13,r10,0xf jeq 1f cal r10,-16(r10) 1: sri r10,4 jeq hshort # only one word c r6,r12 jeq 2f storeh r12,mode_register,r6 mr r6,r12 2: sths r0,0(r7) inc r7,2 sis r11,1 jh 2b hshort: o r12,r13 # bit mask c r6,r12 jeq 3f storeh r12,mode_register,r6 mr r6,r12 3: sths r0,0(r7) sis r11,1 # while (--height > 0) bhx hyloop inc r14,1 # y++ sis r15,1 bnh ret inc r2,2 # px++ inc r3,2 # py++ inc r4,2 # pw++ bx hnloop inc r5,2 # ph++ b ret #endif