home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Club Amiga de Montreal - CAM
/
CAM_CD_1.iso
/
files
/
620.lha
/
CopyMemQuicker_v2.1
/
CopyMemQuicker.asm
< prev
next >
Wrap
Assembly Source File
|
1992-02-10
|
6KB
|
274 lines
nolist
**********************************************************************
* CopyMemQuicker 2.1 - (C) 1991, 1992 Arthur Hagen *
* Parts of code: (C) 1985-1991 Commodore Business Machines Ltd. *
* Posted to the Public Domain *
**********************************************************************
xref _LVOOldOpenLibrary
xref _LVOCloseLibrary
xref _LVOOutput
xref _LVOWrite
xref _LVOAllocMem
xref _LVOFreeMem
xref _LVOSetFunction
xref _LVOCopyMem
xref _LVOCopyMemQuick
include 'exec/execbase.i'
public _QuickMem
list
_QuickMem
movea.l (4).w,a6 ; ExecBase
lea dosname(pc),a1
jsr _LVOOldOpenLibrary(a6) ; Any version will do
movea.l d0,a5
exg.l a5,a6
jsr _LVOOutput(a6)
exg.l a5,a6
move.l d0,d7
moveq #title_end-title,d3
lea title(pc),a0
bsr.s wrt
move.l #(CopyEnd-CopyStart),d4
movea.l _LVOCopyMem+2(a6),a0
lea CopyStart-CopyMemQuicker(a0),a3
move.l Identity-CopyStart(a3),d1
cmpi.l #'*Art',d1
beq.s isquicker
move.l d4,d0
moveq #0,d1 ; Any memory type
jsr _LVOAllocMem(a6)
tst.l d0
beq.s nomem
movea.l d0,a3
movea.l a3,a1
lea CopyStart(pc),a0
move.l d4,d0
bsr.s CopyMemQuicker ; beats jsr _LVOCopyMem(a6)
* The movem-trick uses some extra cycles for setting up, so
* if we run the loop < n times, we will actually slow things down.
* For 68000: n = 2; for 68010: n = 8; for 68020+: n = 4
* The reason is that the 68010 has a loop mode for dbf-loops, but no
* cache, whereas the 68020+'es run all code faster.
moveq #44*2,d0
btst #AFB_68020,AttnFlags+1(a6)
beq.s tst10
add.w d0,d0 ; We have a 020 or higher
bra.s cmpset
tst10 btst #AFB_68010,AttnFlags+1(a6)
bne.s cmpok ; We have a 010
cmpset move.w d0,CmpValS-CopyStart(a3)
move.w d0,CmpValQ-CopyStart(a3)
cmpok moveq #255-(CopyMemQuickest-CopyStart),d0
not.b d0
add.l a3,d0
bsr.s setcmq
move.l d0,(a3)
moveq #CopyMemQuicker-CopyStart,d0
add.l a3,d0
bsr.s setcm
move.l d0,4(a3)
quit movea.l a5,a1
jmp _LVOCloseLibrary(a6) ; faster than jsr + rts
nomem moveq #memerr_end-memerr,d3
lea memerr(pc),a0
pea quit(pc) ; faster than 'bsr.s wrt' + 'bra.s quit'
wrt move.l a0,d2
move.l d7,d1
exg.l a5,a6
jsr _LVOWrite(a6)
exg.l a5,a6
rts
isquicker
moveq #already_end-already,d3
lea already(pc),a0
bsr.s wrt
move.l 4(a3),d0
bsr.s setcm
move.l (a3),d0
* Too darn dangerous! Some other program might be using the routine
* in the background, and freeing the code it is running just won't do!
; bsr.s setcmq
; move.l d4,d0
; movea.l a3,a1
; jsr _LVOFreeMem(a6)
; bra.s quit
pea quit(pc) ; faster than 'bsr.s setcmq' + 'bra.s quit'
setcmq lea (_LVOCopyMemQuick).w,a0
bra.s setit
setcm lea (_LVOCopyMem).w,a0
setit movea.l a6,a1
jmp _LVOSetFunction(a6)
************************************************************
CopyStart
OldCopyMemQuick
dc.l 0
OldCopyMem
dc.l 0
Identity
dc.l '*Art'
CopyMemQuicker
moveq #12,d1
cmp.l d1,d0
bcs.s tinycpy ; too small to gain anything
move.w a0,d1
lsr.b #1,d1 ; fastest test for evenness
bcc.s evena0
move.b (a0)+,(a1)+
subq.l #1,d0
evena0 move.w a1,d1
lsr.b #1,d1
bcc.s CopyMemEvenQuicker
moveq #36*3,d1
cmp.l d1,d0
bcs.s tinycpy
* This is tricky! They said it couldn't be done...
unevcpy movem.l a2-a4/d2-d7,-(sp)
moveq #32,d1 ; 8 registers of 4 bytes
move.w d1,a3
moveq #36,d1 ; as above plus 4 "roundoff" bytes
move.w d1,a4
sub.l d1,d0
move.l d0,a2
uloop movem.l (a0)+,d0-d7
rol.l #8,d0
rol.l #8,d1
rol.l #8,d2
rol.l #8,d3
rol.l #8,d4
rol.l #8,d5
rol.l #8,d6
rol.l #8,d7
move.b d0,(a1)+
move.b d1,d0
move.b d2,d1
move.b d3,d2
move.b d4,d3
move.b d5,d4
move.b d6,d5
move.b d7,d6
move.b (a0)+,d7
movem.l d0-d7,(a1)
adda.w a3,a1
move.b (a0)+,(a1)+ ; even up to next longword
move.b (a0)+,(a1)+
move.b (a0)+,(a1)+
move.l a2,d0
sub.l a4,d0
movea.l d0,a2
bcc.s uloop
add.w a4,d0
movem.l (sp)+,a2-a4/d2-d7
subq.b #1,d0
bcs.s tdone
tloop move.b (a0)+,(a1)+
tinycpy dbf d0,tloop
tdone rts
CopyMemEvenQuicker
dc.w $0c80 ; cmpi.l #nnnn,d0
dc.w 0 ; Need 8 loops to be economical on 68010
CmpValS dc.w 44*8 ; (on 68000 this will be set to 44*2
; and on 68020+ this will be 44*4
bcs.s smlmov
moveq #44,d1 ; 11 registers of 4 bytes
sub.l d1,d0
movem.l d2-d7/a2-a6,-(sp)
bigmov movem.l (a0)+,d2-d7/a2-a6
movem.l d2-d7/a2-a6,(a1)
adda.w d1,a1
sub.l d1,d0
bcc.s bigmov
add.w d1,d0
movem.l (sp)+,d2-d7/a2-a6
smlmov lsr.w #1,d0
beq.s even01
bcs.s sm13
lsr.w #1,d0
beq.s even2
bcs.s sm2
sm0 subq.w #1,d0
loop0 move.l (a0)+,(a1)+
dbf d0,loop0
even0 rts
sm2 subq.w #1,d0
loop2 move.l (a0)+,(a1)+
dbf d0,loop2
even2 move.w (a0),(a1)
rts
sm13 lsr.w #1,d0
beq.s even3
bcs.s sm3
sm1 subq.w #1,d0
loop1 move.l (a0)+,(a1)+
dbf d0,loop1
even1 move.b (a0),(a1)
rts
sm3 subq.w #1,d0
loop3 move.l (a0)+,(a1)+
dbf d0,loop3
even3 move.w (a0)+,(a1)+
move.b (a0),(a1)
rts
even01 bcs.s even1
rts
CopyMemQuickest
dc.w $0c80 ; cmpi.l #nnnn,d0
dc.w 0 ; Need 8 loops to be economical on 68010
CmpValQ dc.w 44*8 ; (on 68000 this will be set to 44*2
; and on 68020+ this will be 44*4
bcs.s smlmovQ
moveq #44,d1 ; 11 registers of 4 bytes
sub.l d1,d0
movem.l d2-d7/a2-a6,-(sp)
bigmovQ movem.l (a0)+,d2-d7/a2-a6
movem.l d2-d7/a2-a6,(a1)
adda.w d1,a1
sub.l d1,d0
bcc.s bigmovQ
add.w d1,d0
movem.l (sp)+,d2-d7/a2-a6
smlmovQ lsr.w #2,d0
beq.s done
subq.w #1,d0
qloop move.l (a0)+,(a1)+
dbf d0,qloop
done rts
CopyEnd
************************************************************
* Leave this for the 2.0 Version function!!!
version dc.b '$VER: '
title dc.b 'CopyMemQuicker 2.1 (8 Feb 1992)',$0A,$0D
dc.b 'Copyright ',$A9,' 1992 Arthur Hagen.',$0A
title_end
already dc.b 'Restoring vectors.',$0A
already_end
memerr dc.b 'No memory!',$0A
memerr_end
dosname dc.b 'dos.library',0
even
end