home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 9 Archive
/
09-Archive.zip
/
zip22.zip
/
amiga
/
deflate.a
< prev
next >
Wrap
Text File
|
1997-10-15
|
35KB
|
1,046 lines
; This is a 680x0 assembly language translation of the Info-ZIP source file
; deflate.c, by Paul Kienitz. No rights reserved. The function longest_match
; is based in part on match.a by Carsten Steger, which in turn is partly based
; on match.s for 386 by Jean-loup Gailly and Kai Uwe Rommel. Mostly, however,
; this material is based on deflate.c, by Gailly, Rommel, and Igor Mandrichenko.
; This code is not commented very much; see deflate.c for comments that explain
; what the functions are doing.
;
; The symbols that can be used to select different versions are as follows:
;
; CPU020 if defined, use 68020 instructions always.
;
; CPUTEST if defined, check at runtime for CPU type. Another symbol
; specifying the platform-specific test must be used with this.
; If neither of these is defined, use 68000 instructions only.
; Runtime test is nonportable; it is different for each OS.
;
; AMIGA use Amiga-specific test for 68020, if CPUTEST defined. Also
; tells it that registers d0/a0/d1/a1 are not preserved by
; function calls. At present, if AMIGA is not defined, it
; causes functions to preserve all registers. ALL OF THIS CODE
; CURRENTLY ASSUMES THAT REGISTERS D2-D7/A2-A6 WILL BE PRESERVED
; BY ANY FUNCTIONS THAT IT CALLS.
;
; DYN_ALLOC should be defined here if it is defined for C source; tells us
; that big arrays are allocated instead of static.
;
; WSIZE must be defined as the same number used for WSIZE in the C
; source, and must be a power of two <= 32768. As elsewhere,
; the default value is 32768.
;
; INT16 define this if ints are 16 bits; otherwise 32 bit ints assumed.
;
; SMALL_MEM define this if it is defined in the C source; otherwise it uses
; the MEDIUM_MEM model. BIG_MEM and MMAP are *not* supported.
; The FULL_SEARCH option in deflate.c is also not supported.
;
; DEBUG activates some tracing output, as in the C source.
;
; QUADLONG this selects a different version of the innermost longest_match
; loop code for 68020 operations, comparing bytes four at a time
; instead of two at a time. It seems to be a tiny bit faster on
; average, but it's slower often enough that one can't generalize.
;
; This code currently assumes that function results are returned in D0 for
; all platforms. It assumes that args to functions are pushed onto the stack,
; last arg first. It also currently assumes that all C symbols have an
; underscore prepended when referenced from assembly.
IFND CPU020
IFND CPUTEST
CPU000 equ 1
ENDC
ENDC
; Use these macros for accessing variables of type int:
IFD INT16
MOVINT MACRO
move.w \1,\2
ENDM
CLRINT MACRO
clr.w \1
ENDM
INTSIZE equ 2
ELSE ; !INT16
MOVINT MACRO
move.l \1,\2
ENDM
CLRINT MACRO
clr.l \1
ENDM
INTSIZE equ 4
ENDC
IFD DYN_ALLOC
BASEPTR MACRO
move.l \1,\2
ENDM
ELSE
BASEPTR MACRO
lea \1,\2
ENDM
ENDC
; constants we use, many of them adjustable:
MAX_MATCH equ 258
MIN_MATCH equ 3
TOO_FAR equ 4096
IFND WSIZE
WSIZE equ 32768
ENDC
WMASK equ WSIZE-1
MAX_DIST equ WSIZE-MAX_MATCH-MIN_MATCH-1
MIN_LOOKAHEAD equ MAX_MATCH+MIN_MATCH+1
; IFD BIG_MEM ; NOT supported -- type Pos needs to be 32 bits
;HASH_BITS equ 15
; ELSE
IFD SMALL_MEM
HASH_BITS equ 13
ELSE
HASH_BITS equ 14 ; default -- MEDIUM_MEM
ENDC
; ENDC ; BIG_MEM
HASH_SIZE equ 1<<HASH_BITS
HASH_MASK equ HASH_SIZE-1
H_SHIFT equ (HASH_BITS+MIN_MATCH-1)/MIN_MATCH
B_SLOW equ 1
B_FAST equ 2
ZE_MEM equ 4
EOF equ -1
; struct config is defined by these offsets:
Good_length equ 0
Max_lazy equ 2
Nice_length equ 4
Max_chain equ 6
Sizeof_config equ 8
; external functions we call:
xref _ct_tally ; int ct_tally(int, int)
xref _flush_block ; unsigned long F(char *, unsigned long, int)
xref _ziperr ; void ziperr(int, char *)
xref _error ; void error(char *)
xref _calloc ; stdlib function: void *calloc(size_t, size_t)
xref _free ; stdlib function: void free(void *)
IFD DEBUG
xref _fputc ; stdio function: int fputc(int, FILE *)
xref _stderr ; pointer to FILE, which we pass to fputc
ENDC
; our entry points:
xdef _lm_init ; void lm_init(int level, unsigned short *flags)
xdef _lm_free ; void lm_free(void)
xdef _deflate ; void deflate(void) ...the big one
xdef _fill_window ; this line is just for debugging
; ============================================================================
; Here is where we have our global variables.
section deflatevars,data
; external global variables we reference:
xref _verbose ; signed int
xref _level ; signed int
xref _read_buf ; int (*read_buf)(char *, unsigned int)
; global variables we make available:
xdef _window
xdef _prev
xdef _head
xdef _window_size
xdef _block_start
xdef _strstart
IFD DYN_ALLOC
_prev: ds.l 1 ; pointer to calloc()'d unsigned short array
_head: ds.l 1 ; pointer to calloc()'d unsigned short array
_window: ds.l 1 ; pointer to calloc()'d unsigned char array
ELSE ; !DYN_ALLOC
_prev: ds.w WSIZE ; array of unsigned short
_head: ds.w HASH_SIZE ; array of unsigned short
_window: ds.b 2*WSIZE ; array of unsigned char
ENDC ; ?DYN_ALLOC
_window_size: ds.l 1 ; unsigned long
_block_start: ds.l 1 ; unsigned long
_strstart: ds.w INTSIZE/2 ; unsigned int
; Now here are our private variables:
IFD CPUTEST
is020: ds.w 1 ; bool: CPU type is '020 or higher
ENDC
ins_h: ds.w 1 ; unsigned short
sliding: ds.w 1 ; bool: the file is read a piece at a time
eofile: ds.w 1 ; bool: we have read in the end of the file
max_lazy_match: ds.w 1 ; unsigned short
lookahead: ds.w 1 ; unsigned short
; These are NOT DECLARED AS STATIC in deflate.c, but currently could be:
max_chain_len: ds.w 1 ; unsigned short (unsigned int in deflate.c)
prev_length: ds.w 1 ; unsigned short (unsigned int in deflate.c)
good_match: ds.w 1 ; unsigned short (unsigned int in deflate.c)
nice_match: ds.w 1 ; unsigned short (signed int in deflate.c)
match_start: ds.w 1 ; unsigned short (unsigned int in deflate.c)
; This array of struct config is a constant and could be in the code section:
config_table: dc.w 0,0,0,0 ; level 0: store uncompressed
dc.w 4,4,8,4 ; level 1: fastest, loosest compression
dc.w 4,5,16,8 ; level 2
dc.w 4,6,32,32 ; level 3: highest to use deflate_fast
dc.w 4,4,16,16 ; level 4: lowest to use lazy matches
dc.w 8,16,32,32 ; level 5
dc.w 8,16,128,128 ; level 6: the default level
dc.w 8,32,128,256 ; level 7
dc.w 32,128,258,1024 ; level 8
dc.w 32,258,258,4096 ; level 9: maximum compression, slow
;;CAL_SH MACRO ; macro for calling zcalloc()
;; IFD INT16
;; move.w #2,-(sp)
;; move.w #\1,-(sp)
;; jsr _zcalloc
;; addq #4,sp
;; ELSE
;; pea 2
;; pea \1
;; jsr _zcalloc
;; addq #8,sp
;; ENDC
;; ENDM
CAL_SH MACRO ; Okay, we're back to using regular calloc()...
pea 2
pea \1
jsr _calloc
addq #8,sp
ENDM
; ============================================================================
; And here we begin our functions. match_init is for internal use only:
section deflate,code
match_init:
IFD CPUTEST ; now check for platform type
IFD AMIGA ; Amiga specific test for '020 CPU:
xref _SysBase
NOLIST
INCLUDE 'exec/execbase.i'
LIST
clr.w is020 ; default value is 68000
move.l _SysBase,a0
btst #AFB_68020,AttnFlags+1(a0)
beq.s cheap
move.w #1,is020
cheap:
ELSE ; !AMIGA
FAIL Write an '020-detector for your system here!
; On the Macintosh, I believe GetEnvironment() provides the information.
ENDC ; AMIGA
ENDC ; CPUTEST
rts ; match_init consists only of rts if CPUTEST unset
; ============================================================================
; Here is longest_match(), the function that the rest of this was built up
; from, the hottest hot spot in the program and therefore the most heavily
; optimized. It has two different versions, one for '020 and higher CPUs, and
; one for 68000/68010. It can test at runtime which version to use if you
; create a test function in match_init for your platform. Currently such a
; test is implemented for the Amiga. It can also be assembled to use '000 or
; '020 code only.
Cur_Match equr d0 ; unsigned int, kept valid as long
Best_Len equr d1 ; unsigned int, kept valid as long
Scan_Start equr d3 ; pair of bytes
Scan_End equr d4 ; pair of bytes
Limit equr d5 ; unsigned int
Chain_Length equr d6 ; unsigned int
Scan_Test equr d7 ; counter, pair of bytes sometimes
Scan equr a0 ; pointer to unsigned char
Match equr a1 ; pointer to unsigned char
Prev_Address equr a2 ; pointer to unsigned short
Scan_Ini equr a3 ; pointer to unsigned char
Match_Ini equr a5 ; pointer to unsigned char
; Note: "pair of bytes" means the two low order bytes of the register in
; 68020 code, but means the lowest and third lowest bytes on the 68000.
IFD AMIGA
SAVEREGS reg d3-d7/a2/a3/a5 ; don't protect d0/d1/a0/a1
ELSE ; !AMIGA
SAVEREGS reg d1/d3-d7/a0-a3/a5 ; protect all except d0 return val
ENDC ; ?AMIGA
; d2, a4, a6 not used... on Amiga, a4 is used by small-data memory model
longest_match:
movem.l SAVEREGS,-(sp)
; setup steps common to byte and word versions:
IFD INT16
and.l #$0000FFFF,Cur_Match ; upper half must be zero!
; we use an and.l down here for the sake of ATSIGN/REGARGS.
moveq #0,Limit ; so adding to Scan_Ini works
ENDC
move.w max_chain_len,Chain_Length
move.w prev_length,Best_Len
MOVINT _strstart,Limit
BASEPTR _prev,Prev_Address
BASEPTR _window,Match_Ini
move.l Match_Ini,Scan_Ini
addq #MIN_MATCH,Match_Ini ; optimizes inner loop
add.l Limit,Scan_Ini
sub.w #MAX_DIST,Limit
bhi.s limit_ok
moveq #0,Limit
limit_ok:
cmp.w good_match,Best_Len
blo.s length_ok
lsr.w #2,Chain_Length
length_ok:
subq.w #1,Chain_Length
IFD CPUTEST
tst.w is020 ; can we use '020 stuff today?
bne WORD_match
ENDC
IFND CPU020
; for 68000 or 68010, use byte operations:
moveq #0,Scan_Start ; clear 2nd & 4th bytes, use 1st & 3rd
moveq #0,Scan_End ; likewise
moveq #0,Scan_Test ; likewise
move.b (Scan_Ini),Scan_Start
swap Scan_Start ; swap is faster than 8 bit shift
move.b 1(Scan_Ini),Scan_Start
move.b -1(Scan_Ini,Best_Len.w),Scan_End
swap Scan_End
move.b 0(Scan_Ini,Best_Len.w),Scan_End
bra.s bdo_scan
blong_loop:
move.b -1(Scan_Ini,Best_Len.w),Scan_End
swap Scan_End
move.b 0(Scan_Ini,Best_Len.w),Scan_End
bshort_loop:
add.w Cur_Match,Cur_Match ; assert value before doubling < 32K
IFNE 32768-WSIZE
and.w #(WMASK*2),Cur_Match
ENDC
move.w (Prev_Address,Cur_Match.l),Cur_Match
cmp.w Limit,Cur_Match
dbls Chain_Length,bdo_scan
bra return
bdo_scan:
move.l Match_Ini,Match
add.l Cur_Match,Match
move.b -MIN_MATCH-1(Match,Best_Len.w),Scan_Test
swap Scan_Test
move.b -MIN_MATCH(Match,Best_Len.w),Scan_Test
cmp.l Scan_Test,Scan_End
bne.s bshort_loop
move.b -MIN_MATCH(Match),Scan_Test
swap Scan_Test
move.b -MIN_MATCH+1(Match),Scan_Test
cmp.l Scan_Test,Scan_Start
bne.s bshort_loop
move.w #(MAX_MATCH-3),Scan_Test
lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
bscan_loop:
cmp.b (Match)+,(Scan)+
dbne Scan_Test,bscan_loop
subq #1,Scan
sub.l Scan_Ini,Scan ; assert difference is 16 bits
cmp.w Best_Len,Scan
bls.s bshort_loop
MOVINT Scan,Best_Len
move.w Cur_Match,match_start
cmp.w nice_match,Best_Len
blo.s blong_loop
IFD CPUTEST
bra return
ENDC
ENDC ; !CPU020
IFND CPU000
MACHINE MC68020
; for 68020 or higher, use word operations even on odd addresses:
WORD_match:
move.w (Scan_Ini),Scan_Start
move.w -1(Scan_Ini,Best_Len.w),Scan_End
bra.s wdo_scan
wlong_loop:
move.w -1(Scan_Ini,Best_Len.w),Scan_End
wshort_loop:
and.w #WMASK,Cur_Match
move.w (Prev_Address,Cur_Match.w*2),Cur_Match ; '020 addressing mode
cmp.w Limit,Cur_Match
dbls Chain_Length,wdo_scan
bra.s return
wdo_scan:
move.l Match_Ini,Match
add.l Cur_Match,Match
cmp.w -MIN_MATCH-1(Match,Best_Len.w),Scan_End
bne.s wshort_loop
cmp.w -MIN_MATCH(Match),Scan_Start
bne.s wshort_loop
IFD QUADLONG
; By some measurements, this version of the code is a little tiny bit faster.
; But on some files it's slower. It probably pays off only when there are
; long match strings, and costs in the most common case of three-byte matches.
moveq #((MAX_MATCH-MIN_MATCH)/16),Scan_Test ; value = 15
lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
wscan_loop:
cmp.l (Match)+,(Scan)+ ; test four bytes at a time
bne.s odd
cmp.l (Match)+,(Scan)+
bne.s odd
cmp.l (Match)+,(Scan)+
bne.s odd
cmp.l (Match)+,(Scan)+
dbne Scan_Test,wscan_loop ; '020 can cache a bigger loop
odd:
subq #4,Scan
subq #4,Match
cmp.b (Match)+,(Scan)+ ; find good bytes in bad longword
bne.s even
cmp.b (Match)+,(Scan)+
bne.s even
cmp.b (Match)+,(Scan)+
beq.s steven
even: subq #1,Scan
ELSE ; !QUADLONG
moveq #((MAX_MATCH-MIN_MATCH)/2),Scan_Test ; value = 127
lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
wscan_loop:
cmp.w (Match)+,(Scan)+
dbne Scan_Test,wscan_loop
subq #2,Scan
move.b -2(Match),Scan_Test
cmp.b (Scan),Scan_Test
bne.s steven
addq #1,Scan
ENDC ; ?QUADLONG
steven:
sub.l Scan_Ini,Scan ; assert: difference is 16 bits
cmp.w Best_Len,Scan
bls.s wshort_loop
MOVINT Scan,Best_Len
move.w Cur_Match,match_start
cmp.w nice_match,Best_Len
blo.s wlong_loop
MACHINE MC68000
ENDC ; !CPU000
return:
MOVINT Best_Len,d0 ; return value (upper half should be clear)
movem.l (sp)+,SAVEREGS
rts
; =============================================================================
; This is the deflate() function itself, our main entry point. It calls
; longest_match, above, and some outside functions. It is a hot spot, but not
; as hot as longest_match. It uses no special '020 code.
; ================== Several macros used in deflate() and later functions:
; Arg 1 is D-reg that new ins_h value is to be left in,
; arg 2 is the byte value to be hashed into it, which must not be the same reg
UP_HASH MACRO
move.w ins_h,\1
asl.w #H_SHIFT,\1
eor.b \2,\1
and.w #HASH_MASK,\1 ; ((ins_h << H_SHIFT) ^ c) & HASH_MASK
move.w \1,ins_h ; ins_h = that
ENDM
; Arg 1 is scratch A, arg 2 is scratch D
IN_STR MACRO
move.l Strst,\2
addq.w #MIN_MATCH-1,\2
move.b (Window,\2.l),\2 ; window[strstart + MIN_MATCH - 1]
UP_HASH Head,\2
add.l Head,Head ; assert upper word is zero before add
BASEPTR _head,\1
add.l Head,\1
move.w (\1),Head ; hash_head = head[ins_h]
move.w Strst,(\1) ; head[ins_h] = strstart
move.l Strst,\2
IFNE WSIZE-32768
and.w #WMASK,\2
ENDC
add.w \2,\2 ; masks implicitly when WSIZE == 32768
move.w Head,(Prev,\2.l) ; prev[str_start & WMASK] = hash_head
ENDM
; Arg 1 is bool (int) EOF flag, flush_block result is in d0, trashes d1/a0/a1
FLUSH_B MACRO
IFC '\1','#0'
CLRINT -(sp)
ELSE
MOVINT \1,-(sp)
ENDC
move.l _block_start,d0
blt.s nenu\@
move.l Window,a0
add.l d0,a0
bra.s nun\@
nenu\@: sub.l a0,a0 ; if block_start < 0, push NULL
nun\@: sub.l Strst,d0
neg.l d0
move.l d0,-(sp)
move.l a0,-(sp)
jsr _flush_block
lea 8+INTSIZE(sp),sp
ENDM
; This expands to nothing unless DEBUG is defined.
; Arg 1 is a byte to be trace-outputted -- if it is d0 it must be a valid int
TRACE_C MACRO
IFD DEBUG
cmp.w #1,_verbose+INTSIZE-2 ; test lower word only
ble.s qui\@
IFNC '\1','d0'
moveq #0,d0
move.b \1,d0
ENDC
move.l _stderr,-(sp)
MOVINT d0,-(sp)
jsr _fputc
addq #4+INTSIZE,sp
qui\@:
ENDC ; DEBUG
ENDM
; ================== Here are the register vars we use, and deflate() itself:
Window equr a2 ; cached address of window[]
Prev equr a3 ; cached address of prev[]
Strst equr d7 ; strstart cached as a longword
Look equr d6 ; lookahead cached as short
Head equr d5 ; local variable hash_head, short
PrevL equr d4 ; prev_length cached as short
MatchL equr d3 ; local variable match_length, unsigned short
Avail equr d2 ; local variable available_match, bool
PrevM equr a5 ; local variable prev_match, int in an A-reg
IFD AMIGA
DEFREGS reg d2-d7/a2/a3/a5
ELSE
DEFREGS reg d0-d7/a0/a2/a3/a5 ; play it safe, preserve all regs
ENDC
_deflate: ; first, setup steps common to deflate and deflate_fast:
movem.l DEFREGS,-(sp)
IFD INT16
moveq #0,Strst ; make sure strstart is valid as a long
ENDC
moveq #0,Head ; ditto for hash_head
MOVINT _strstart,Strst
move.w lookahead,Look
move.w prev_length,PrevL
BASEPTR _window,Window
BASEPTR _prev,Prev
MOVINT _level,d0
cmp.w #3,d0
ble deflate_fast
moveq #MIN_MATCH-1,MatchL
moveq #0,Avail
look_loop:
tst.w Look
beq last_tally
IN_STR a0,d0
move.w MatchL,PrevL
move.w match_start,PrevM
move.w #MIN_MATCH-1,MatchL
tst.w Head
beq.s no_new_match
cmp.w max_lazy_match,PrevL
bhs.s no_new_match
move.w Strst,d0
sub.w Head,d0
cmp.w #MAX_DIST,d0
bhi.s no_new_match
move.w PrevL,prev_length ; longest_match reads these variables
MOVINT Strst,_strstart
MOVINT Head,d0 ; parm for longest_match
bsr longest_match ; sets match_start
cmp.w Look,d0 ; does length exceed valid data?
bls.s stml
move.w Look,d0
stml: move.w d0,MatchL ; valid length of match
cmp.w #MIN_MATCH,MatchL ; is the match only three bytes?
bne.s no_new_match
move.w match_start,d0
sub.w Strst,d0
cmp.w #-TOO_FAR,d0
bge.s no_new_match
moveq #MIN_MATCH-1,MatchL ; mark the current match as no good
no_new_match:
cmp.w #MIN_MATCH,PrevL
blo literal
cmp.w MatchL,PrevL
blo literal
; CHECK_MATCH Strst-1,PrevM,PrevL
MOVINT Strst,_strstart ; ct_tally reads this variable
move.l PrevL,d0
subq.w #MIN_MATCH,d0
MOVINT d0,-(sp)
move.l Strst,d0
sub.w PrevM,d0
subq.w #1,d0
MOVINT d0,-(sp)
jsr _ct_tally ; sets d0 true if we have to flush
addq #2*INTSIZE,sp
subq.w #3,PrevL ; convert for dbra (prev_length - 2)
sub.w PrevL,Look
subq.w #2,Look
insertmatch:
addq.w #1,Strst
IN_STR a0,d1 ; don't clobber d0
dbra PrevL,insertmatch
moveq #0,Avail
moveq #0,PrevL ; not needed?
moveq #MIN_MATCH-1,MatchL
addq.w #1,Strst
tst.w d0
beq refill
FLUSH_B #0
move.l Strst,_block_start
bra.s refill
literal:
tst.w Avail
bne.s yeslit
moveq #1,Avail
bra.s skipliteral
yeslit: TRACE_C <-1(Window,Strst.l)>
MOVINT Strst,_strstart ; ct_tally reads this variable
moveq #0,d0
move.b -1(Window,Strst.l),d0
MOVINT d0,-(sp)
CLRINT -(sp)
jsr _ct_tally
addq #2*INTSIZE,sp
tst.w d0
beq.s skipliteral
FLUSH_B #0
move.l Strst,_block_start
skipliteral:
addq.w #1,Strst
subq.w #1,Look
refill:
cmp.w #MIN_LOOKAHEAD,Look
bhs look_loop
bsr fill_window
bra look_loop
last_tally:
tst.w Avail
beq last_flush
MOVINT Strst,_strstart ; ct_tally reads this variable
moveq #0,d0
move.b -1(Window,Strst.l),d0
MOVINT d0,-(sp)
CLRINT -(sp)
jsr _ct_tally
addq #2*INTSIZE,sp
last_flush:
FLUSH_B #1
bra deflate_exit
; ================== This is another version used for low compression levels:
deflate_fast:
moveq #0,MatchL
moveq #MIN_MATCH-1,PrevL
flook_loop:
tst.w Look
beq flast_flush
IN_STR a0,d0
tst.w Head
beq.s fno_new_match
move.w Strst,d0
sub.w Head,d0
cmp.w #MAX_DIST,d0
bhi.s fno_new_match
move.w PrevL,prev_length ; longest_match reads these variables
MOVINT Strst,_strstart
MOVINT Head,d0 ; parm for longest_match
bsr longest_match ; sets match_start
cmp.w Look,d0 ; does length exceed valid data?
bls.s fstml
move.w Look,d0
fstml: move.w d0,MatchL ; valid length of match
fno_new_match:
cmp.w #MIN_MATCH,MatchL
blo fliteral
; CHECK_MATCH Strst,match_start,MatchL
MOVINT Strst,_strstart ; ct_tally reads this variable
move.l MatchL,d0
subq.w #MIN_MATCH,d0
MOVINT d0,-(sp)
move.l Strst,d0
sub.w match_start,d0
MOVINT d0,-(sp)
jsr _ct_tally ; sets d0 true if we have to flush
addq #2*INTSIZE,sp
sub.w MatchL,Look
cmp.w max_lazy_match,MatchL
bhi ftoolong
subq.w #2,MatchL
finsertmatch:
addq.w #1,Strst
IN_STR a0,d1 ; preserve d0
dbra MatchL,finsertmatch
moveq #0,MatchL ; not needed?
addq.w #1,Strst
bra.s flushfill
ftoolong:
add.w MatchL,Strst
moveq #0,MatchL
moveq #0,d1 ; preserve d0
move.b (Window,Strst.l),d1
move.w d1,ins_h
; My assembler objects to passing <1(Window,Strst.l)> directly to UP_HASH...
move.b 1(Window,Strst.l),Avail ; Avail is not used in deflate_fast
UP_HASH d1,Avail ; preserve d0
IFNE MIN_MATCH-3
FAIL needs to UP_HASH another MIN_MATCH-3 times, but with what arg?
ENDC
bra.s flushfill
fliteral:
TRACE_C <(Window,Strst.l)>
MOVINT Strst,_strstart ; ct_tally reads this variable
moveq #0,d0
move.b (Window,Strst.l),d0
MOVINT d0,-(sp)
CLRINT -(sp)
jsr _ct_tally ; d0 set if we need to flush
addq #2*INTSIZE,sp
addq.w #1,Strst
subq.w #1,Look
flushfill:
tst.w d0
beq.s frefill
FLUSH_B #0
move.l Strst,_block_start
frefill:
cmp.w #MIN_LOOKAHEAD,Look
bhs flook_loop
bsr fill_window
bra flook_loop
flast_flush:
FLUSH_B #1 ; sets our return value
deflate_exit:
MOVINT Strst,_strstart ; save back cached values
move.w PrevL,prev_length
move.w Look,lookahead
movem.l (sp)+,DEFREGS
rts
; =========================================================================
; void fill_window(void) calls the input function to refill the sliding
; window that we use to find substring matches in.
More equr Head ; local variable in fill_window
WindTop equr Prev ; local variable used for sliding
SlidIx equr PrevL ; local variable used for sliding
IFD AMIGA
FWREGS reg d2-d5/a2-a6 ; does NOT include Look and Strst
ELSE
FWREGS reg d1-d5/a0-a6 ; likewise
ENDC
; all registers available to be clobbered by the sliding operation:
; we exclude More, WindTop, SlidIx, Look, Strst, Window, a4 and a7.
SPAREGS reg d0-d3/a0-a1/a5-a6
SPCOUNT equ 8 ; number of registers in SPAREGS
_fill_window: ; C-callable entry point
movem.l Strst/Look,-(sp)
IFD INT16
moveq #0,Strst ; Strst must be valid as a long
ENDC
MOVINT _strstart,Strst
move.w lookahead,Look
BASEPTR _window,Window
bsr.s fill_window
MOVINT Strst,_strstart
move.w Look,lookahead
movem.l (sp)+,Strst/Look
rts
; strstart, lookahead, and window must be cached in Strst, Look, and Window:
fill_window: ; asm-callable entry point
movem.l FWREGS,-(sp)
tst.w eofile ; we put this up here for speed
bne fwdone
and.l #$FFFF,Look ; make sure Look is valid as long
fw_refill:
move.l _window_size,More ; <= 64K
sub.l Look,More
sub.l Strst,More ; Strst is already valid as long
cmp.w #EOF,More
bne.s notboundary
subq.w #1,More
bra checkend
notboundary:
tst.w sliding
beq checkend
cmp.w #WSIZE+MAX_DIST,Strst
blo checkend
IFGT 32768-WSIZE
lea WSIZE(Window),WindTop ; WindTop is aligned when Window is
ELSE
move.l Window,WindTop
add.l #WSIZE,WindTop
ENDC
move.l Window,d0
and.w #3,d0
beq.s isaligned
subq.w #1,d0
align: move.b (WindTop)+,(Window)+ ; copy up to a longword boundary
dbra d0,align
isaligned:
; This is faster than a simple move.l (WindTop)+,(Window)+ / dbra loop:
move.w #(WSIZE-1)/(4*SPCOUNT),SlidIx
slide: movem.l (WindTop)+,SPAREGS ; copy, 32 bytes at a time!
movem.l SPAREGS,(Window) ; a slight overshoot doesn't matter.
lea 4*SPCOUNT(Window),Window ; can't use (aN)+ as movem.l dest
dbra SlidIx,slide
BASEPTR _window,Window ; restore cached value
sub.w #WSIZE,match_start
sub.w #WSIZE,Strst
sub.l #WSIZE,_block_start
add.w #WSIZE,More
BASEPTR _head,a0
move.w #HASH_SIZE-1,d0
fixhead:
move.w (a0),d1
sub.w #WSIZE,d1
bpl.s headok
moveq #0,d1
headok: move.w d1,(a0)+
dbra d0,fixhead
BASEPTR _prev,a0
move.w #WSIZE-1,d0
fixprev:
move.w (a0),d1
sub.w #WSIZE,d1
bpl.s prevok
moveq #0,d1
prevok: move.w d1,(a0)+
dbra d0,fixprev
TRACE_C #'.'
checkend: ; assert eofile is false
MOVINT More,-(sp) ; assert More's upper word is zero
move.l Strst,d0
add.w Look,d0
add.l Window,d0
move.l d0,-(sp)
move.l _read_buf,a0
jsr (a0) ; refill the upper part of the window
addq #4+INTSIZE,sp
tst.w d0
beq.s iseof
cmp.w #EOF,d0
beq.s iseof
add.w d0,Look
cmp.w #MIN_LOOKAHEAD,Look
blo fw_refill ; eofile is still false
bra.s fwdone
iseof: move.w #1,eofile
fwdone: movem.l (sp)+,FWREGS
rts
; =========================================================================
; void lm_free(void) frees dynamic arrays in the DYN_ALLOC version.
xdef _lm_free ; the entry point
_lm_free:
IFD DYN_ALLOC
move.l _window,d0
beq.s lf_no_window
move.l d0,-(sp)
jsr _free
addq #4,sp
clr.l _window
lf_no_window:
move.l _prev,d0
beq.s lf_no_prev
move.l d0,-(sp)
jsr _free
move.l _head,(sp) ; reuse the same stack arg slot
jsr _free
addq #4,sp
clr.l _prev
clr.l _head
lf_no_prev:
ENDC
rts
; ============================================================================
; void lm_init(int pack_level, unsigned short *flags) allocates dynamic arrays
; if any, and initializes all variables so that deflate() is ready to go.
xdef _lm_init ; the entry point
Level equr d2
;Window equr a2 ; as in deflate()
IFD AMIGA
INIREGS reg d2/a2
ELSE
INIREGS reg d0-d2/a0-a1
ENDC
_lm_init:
MOVINT 4(sp),d0
move.l 4+INTSIZE(sp),a0
movem.l INIREGS,-(sp)
move.w d0,Level
cmp.w #1,Level
blt.s levelerr
bgt.s try9
bset.b #B_FAST,1(a0)
try9: cmp.w #9,Level
bgt.s levelerr
blt.s levelok
bset.b #B_SLOW,1(a0)
bra.s levelok
levelerr:
pea level_message
jsr _error ; never returns
levelok:
clr.w sliding
tst.l _window_size
bne.s gotawindowsize
move.w #1,sliding
move.l #2*WSIZE,_window_size
gotawindowsize:
BASEPTR _window,Window
IFD DYN_ALLOC
move.l Window,d0 ; fake tst.l
bne.s gotsomewind
CAL_SH WSIZE
move.l d0,Window
move.l d0,_window
bne.s gotsomewind
pea window_message
MOVINT #ZE_MEM,-(sp)
jsr _ziperr ; never returns
gotsomewind:
tst.l _prev
bne.s gotsomehead
CAL_SH WSIZE
move.l d0,_prev
beq.s nohead
CAL_SH HASH_SIZE
move.l d0,_head
bne.s gotfreshhead ; newly calloc'd memory is zeroed
nohead: pea hash_message
MOVINT #ZE_MEM,-(sp)
jsr _ziperr ; never returns
gotsomehead:
ENDC ; DYN_ALLOC
move.w #(HASH_SIZE/2)-1,d0 ; two shortwords per loop
BASEPTR _head,a0
wipeh: clr.l (a0)+
dbra d0,wipeh
gotfreshhead:
move.l Level,d0
IFEQ Sizeof_config-8
asl.l #3,d0
ELSE
mulu #Sizeof_config,d0
ENDC
lea config_table,a0
add.l d0,a0
move.w Max_lazy(a0),max_lazy_match
move.w Good_length(a0),good_match
move.w Nice_length(a0),nice_match
move.w Max_chain(a0),max_chain_len
CLRINT _strstart
clr.l _block_start
bsr match_init
clr.w eofile
MOVINT #WSIZE,-(sp) ; We read only 32K because lookahead is short
move.l Window,-(sp) ; even when int size is long, as if deflate.c
move.l _read_buf,a0 ; were compiled with MAXSEG_64K defined.
jsr (a0)
addq #4+INTSIZE,sp
move.w d0,lookahead
beq.s noread
cmp.w #EOF,d0
bne.s irefill
noread: move.w #1,eofile
clr.w lookahead
bra.s init_done
irefill:
move.w lookahead,d0
cmp.w #MIN_LOOKAHEAD,d0
bhs.s hashify
bsr _fill_window ; use the C-callable version
hashify:
clr.w ins_h
moveq #MIN_MATCH-2,d0
hash1: move.b (Window)+,d1
UP_HASH Level,d1
dbra d0,hash1
init_done:
movem.l (sp)+,INIREGS
rts
; strings for error messages:
hash_message dc.b 'hash table allocation',0
window_message dc.b 'window allocation',0
level_message dc.b 'bad pack level',0
end