home *** CD-ROM | disk | FTP | other *** search
- | new version of bcopy and memset
- | uses movem to set 256 bytes blocks faster.
- | Alexander Lehmann alexlehm@iti.informatik.th-darmstadt.de
- | sortof inspired by jrbs bcopy
- | has to be preprocessed (int parameter in memset)
-
- .text
- .even
- .globl _bzero
- .globl __bzero
- .globl _memset
-
- | void *memset( void *dest, int val, size_t len );
- | returns dest
- | two versions for 16/32 bits
-
- _memset:
- movl sp@(4),a0 | dest
- #ifdef __MSHORT__
- movb sp@(9),d0 | value
- movl sp@(10),d1 | length
- #else
- movb sp@(11),d0 | value
- movl sp@(12),d1 | length
- #endif
- jeq exit | length==0? (size_t)
- jra do_set
-
- | void bzero( void *dest, size_t length );
- | void _bzero( void *dest, size_t length );
- | return value not used (returns dest)
-
- _bzero:
- __bzero:
- movl sp@(4),a0 | dest
- movl sp@(8),d1 | length
- jeq exit | length==0? (size_t)
- clrb d0 | value
-
- do_set: | a0 dest, d0.b byte, d1.l length
- movel d2,sp@-
-
- addl d1,a0 | a0 points to end of area, needed for predec
-
- movw a0,d2 | test for alignment
- btst #0,d2 | odd ?
- jeq even
- movb d0,a0@- | set one byte, now we are even
- subql #1,d1
- even:
- movb d0,d2
- lslw #8,d0
- movb d2,d0
- movw d0,d2
- swap d2
- movw d0,d2 | d2 has byte now four times
-
- clrw d0 | save length less 256
- movb d1,d0
- lsrl #8,d1 | number of 256 bytes blocks
- jeq less256
- movml d3-d7/a2-a6,sp@- | d2 is already saved
- movl d2,d3
- movl d2,d4
- movl d2,d5
- movl d2,d6
- movl d2,d7
- movl d2,a2
- movl d2,a3
- movl d2,a4
- movl d2,a5
- movl d2,a6
- set256:
- movml d2-d7/a2-a6,a0@- | set 5*44+36=256 bytes
- movml d2-d7/a2-a6,a0@-
- movml d2-d7/a2-a6,a0@-
- movml d2-d7/a2-a6,a0@-
- movml d2-d7/a2-a6,a0@-
- movml d2-d7/a2-a4,a0@-
- subql #1,d1
- jne set256 | next, please
- movml sp@+,d3-d7/a2-a6
- less256: | set 16 bytes blocks
- movw d0,sp@- | save length below 256 for last 3 bytes
- lsrw #2,d0 | number of 4 bytes blocks
- jeq less4 | less that 4 bytes left
- movw d0,d1
- negw d1
- andw #3,d1 | d1 = number of bytes below 16 (-n)&3
- subqw #1,d0
- lsrw #2,d0 | number of 16 bytes blocks minus 1, if d1==0
- addw d1,d1 | offset in code (movl two bytes)
- jmp pc@(2,d1:w) | jmp into loop
- set16:
- movl d2,a0@-
- movl d2,a0@-
- movl d2,a0@-
- movl d2,a0@-
- dbra d0,set16
- less4:
- movw sp@+,d0
- btst #1,d0
- jeq less2
- movw d2,a0@-
- less2:
- btst #0,d0
- jeq none
- movb d2,a0@-
- none:
- exit_d2:
- movl sp@+,d2
- exit:
- movl sp@(4),d0 | return dest (for memset only)
- rts
-
-