home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Windoware
/
WINDOWARE_1_6.iso
/
source
/
stream13
/
lzwstrea.asm
< prev
next >
Wrap
Assembly Source File
|
1992-02-11
|
19KB
|
680 lines
PAGE 60, 132
TITLE 12 bit LZW Compression Scheme
LOCALS @@
Comment *
This unit is a modified version of Wilbert van Leijen's LZW unit,
to implement a stream type that automatically compresses data.
The original documentation:
This modules implements a 12-bit Lempel-Zev-Welch "crunch" (compress
data) and "uncrunch" (restore data in its original form) routines.
InBuffer and OutBuffer are untyped; you must pass pointers to arrays
(0..MaxRange) of Char or Byte to it, whereby MaxRange is limited to
2^16-16+1 = 65521 bytes.
You must also supply the size of the input buffer.
The LZW technique is well explained in the following reference:
Terry A. Welch, "A Technique for High Performance Data Compression"
IEEE Computer
Vol. 17, no. 6 (June 1984), pp. 8-19
Incorporate these routines as follows in a TP unit:
[ deleted ]
(C) Copyright Wilbert van Leijen, Amsterdam 1990.
Released to the Public Domain under the condition that this program
will not be sold for a profit except with written permission from
the author.
Stream additions (c) copyright D.J. Murdoch, 1991.
*
MaxStack = 4096; ; Decompression stack size
TableSize = MaxStack-1; ; Upper bound of 12 bit tables
HalfFull = MaxStack / 2
ProbeValue = 131; ; Preferably a prime number
True = 1
False = 0
EndList = -1; ; Marks end of a list
NoPrev = -2; ; Code for no previous character
Empty = -3; ; Indicates empty
DataSize = [BP+18] ; Number of bytes input
OutputSize = word ptr [BP+16] ; Max number to output
InputBuf = [BP+12] ; Input data buffer
OutputBuf = [BP+8] ; Output data buffer
Tables = [BP+4] ; Where the tables are
Table STRUC
Collision DB MaxStack DUP(?) ; Hash table entries
PrefixTable DW MaxStack DUP(?) ; Code for preceding stringf
SuffixTable DB MaxStack DUP(?) ; Code for current character
ChildTable DW MaxStack DUP(?) ; Next duplicate in collision list
CharStack DB MaxStack DUP(?) ; Decompression stack
StackPtr DW ? ; Decompression stack depth
Prefix DW ? ; Previous code string
TableUsed DW ? ; # string table entries used
InputPos DW ? ; Index in input buffer
OutputPos DW ? ; Index in output buffer
LastHit DW ? ; Last empty slot in collision table
CodeBuf DW ? ; Temporary code buffer
SaveIP DW ? ; Saved registers between calls
SaveAX DW ?
SaveCX DW ?
SaveDX DW ?
NotFound DB ? ; Character combination found flag
Table ENDS
; CH register is set aside for final output character (= Suffix)
Code SEGMENT Word Public
ASSUME CS:Code
Public Initialise
Public PutSignature
Public Crunch
Public FlushLZW
Public GetSignature
Public Uncrunch
; Initialise variables, fill tables
Initialise PROC near
PUSH BP
MOV BP,SP
PUSH DS ; save DS
LDS BX,Tables ; get DS:BX to point to the tables
XOR AX, AX
MOV [BX].InputPos, AX
MOV [BX].OutputPos, AX
MOV [BX].TableUsed, AX
MOV [BX].StackPtr, AX
MOV [BX].CodeBuf, Empty
; Loop:
; Clear collision table
; Set prefix to 'no previous character' code
; Set child nodes to 'end of list' code
MOV DX, Tablesize
@@1: MOV DI, DX
MOV [BX+DI+Collision], 0
SHL DI, 1
MOV [BX+DI+PrefixTable], NoPrev
MOV [BX+DI+ChildTable], EndList
DEC DX
JNS @@1
; Loop:
; Enter all single characters into the hash table
MOV DX, 255
MOV [BX].Prefix, NoPrev
@@2: MOV CH, DL
CALL MakeEntry
DEC DX
JNS @@2
MOV [BX].SaveCX,CX
POP DS
POP BP
RETN 4
Initialise ENDP
; Hash function: 'fold' number
; AX := (Prefix shl 5 xor Suffix) and TableSize
; uses CL,DX
HashNumber MACRO
XOR DX, DX
MOV DL, CH
MOV AX, [BX].Prefix
MOV CL, 5
SHL AX, CL
XOR AX, DX
AND AX, TableSize
ENDM
; Store a character combination in the hash table
MakeEntry PROC Near
PUSH DX
HashNumber
; Rehash is necessary if entry in the hash table is occupied
MOV DI, AX
CMP [BX+DI+Collision], False
JE @@6
; Loop:
; Advance index of ChildTable to last empty list
@@1: MOV DI, AX
SHL DI, 1
CMP [BX+DI+ChildTable], EndList
JE @@2
MOV AX, [BX+DI+ChildTable]
JMP Short @@1
; If the hash table is less than 50% loaded
; Increase index with the probing value
@@2: CMP [BX].TableUsed, HalfFull
JAE @@3
MOV SI, AX
ADD SI, ProbeValue
AND SI, TableSize
JMP Short @@4
; Else deal with the clustering problem
; A simple yet effective solution is to start probing at the
; empty slot of the hash table found during the previous run
@@3: MOV SI, [BX].LastHit
; Loop:
; Probe hash table until an empty slot is found
@@4: CMP [BX+SI+Collision], False
JE @@5
INC SI
AND SI, TableSize
JMP Short @@4
; Found an empty slot, save index in ChildTable
; Store index in LastHit
@@5: MOV DI, AX
SHL DI, 1
MOV [BX+DI+ChildTable], SI
MOV DI, SI
MOV [BX].LastHit, SI
; Return:
; Indicate hash table slot's been occupied
; Store character combination in PrefixTable, SuffixTable
; Indicate 'end of list' in ChildTable
; Bump table load counter
@@6: MOV [BX+DI+Collision], True
MOV [BX+DI+SuffixTable], CH
SHL DI, 1
MOV [BX+DI+ChildTable], EndList
MOV AX, [BX].Prefix
MOV [BX+DI+PrefixTable], AX
INC [BX].TableUsed
POP DX
RETN
MakeEntry ENDP
; Lookup a character combination in the hash table
LookupStr PROC Near
HashNumber
XCHG SI, AX
; Search through list of hash collision entries for one that match
; Loop
; Entry is found if prefix and suffix match
; If no match, advance index to entry in ChildTable
; Until entry is found or no such list exists in ChildTable
@@1: MOV DI, SI
MOV AL, [BX+DI+SuffixTable]
CMP AL, CH
JNE @@2
SHL DI, 1
MOV AX, [BX+DI+PrefixTable]
CMP AX, [BX].Prefix
JE @@3
@@2: MOV DI, SI
SHL DI, 1
MOV SI, [BX+DI+ChildTable]
CMP SI, EndList
JNE @@1
; Return index from ChildTable in AX
@@3: XCHG AX, SI
RETN
LookupStr ENDP
; Retrieve next character from the input buffer
GetChar MACRO
LES DI, InputBuf
ADD DI, [BX].InputPos
MOV AL, ES:[DI]
INC [BX].InputPos
ENDM
; Store next character in AL into the output buffer
PutChar MACRO
LES DI, OutputBuf
ADD DI, [BX].OutputPos
STOSB
INC [BX].OutputPos
ENDM
; Retrieve compressed code from input buffer
GetCode PROC Near
MOV SI, [BX].CodeBuf
; Get first character and store it in a temporary buffer
GetChar
XOR AH, AH
MOV DX, AX
; If input code is empty
CMP SI, Empty
JNE @@1
; Get next character
; Return 8 bits from first character + 4 bits from second character
; Save the remaining 4 bits of the second character for next time
GetChar
MOV SI, AX
MOV CL, 4
SHR AX, CL
MOV DI, AX
MOV AX, DX
SHL AX, CL
ADD AX, DI
JMP @@2
; Else
; Get the last 4 bits from the input code + 8 bits from the second char
@@1: MOV AX, SI
XCHG AH, AL
AND AH, 0Fh
ADD AX, DX
MOV SI, Empty
@@2:
MOV [BX].CodeBuf,SI
RETN
GetCode ENDP
; Store compressed code in the output buffer
PutCode PROC Near
MOV DI, [BX].CodeBuf
MOV DX, [BX].Prefix
; If output code is empty
CMP DI, Empty
JNE @@1
; Store first 8 bits in the output buffer
; Save last 4 bits for the next time through
MOV AX, DX
MOV CL, 4
SHR AX, CL
PutChar
MOV DI, DX
JMP @@2
; Else
; Put out last 4 bits of previous code + first 4 bits of this code
; Next, put out last 8 bits of this code
; Indicate output code as empty
@@1: MOV AX, DI
MOV CL, 4
SHL AX, CL
ADD AL, DH
PutChar
MOV AL, DL
PutChar
MOV DI, Empty
@@2: MOV [BX].CodeBuf,DI
RETN
PutCode ENDP
; Start the compressor by putting 'LZ'
PutSignature PROC Near
PUSH BP
MOV BP,SP
PUSH DS ; save DS
LDS BX,Tables ; get DS:BX to point to the tables
MOV [BX].OutputPos, 0
; Get first character and store it in Suffix
; There are no character combinations yet
MOV AL, 'L'
MOV CH, AL
MOV [BX].Prefix, NoPrev
CALL LookupStr
MOV [BX].Prefix, AX
; Get next character
MOV AL, 'Z'
MOV CH, AL
MOV [BX].SaveCX,CX
MOV AL,True ; Return success
POP DS
POP BP
RET 4
PutSignature ENDP
Crunch PROC Near
PUSH BP
MOV BP,SP
PUSH DS ; save DS
LDS BX,Tables ; get DS:BX to point to the tables
MOV CX,[BX].SaveCX ; get saved registers
MOV [BX].InputPos, 0
MOV [BX].OutputPos, 0
DEC OutputSize ; sometimes we write 2 bytes per loop,
; so reduce this for safety
; Loop:
; Process all characters from input buffer
@@1: MOV DX, [BX].InputPos
MOV AX, [BX].OutputPos
CMP DX, DataSize
JAE @@5
CMP AX, OutputSize
JAE @@5
; Lookup the character combination
; Store if not found and if empty slots are available in the hash table
CALL LookupStr
MOV DI, AX
CMP DI, EndList
JNE @@3
PUSH DI
CMP [BX].TableUsed, TableSize
JA @@2
CALL MakeEntry
; Store code in the output buffer
; If string is in table, keep looking for longer strings
@@2: CALL PutCode
POP DI
MOV [BX].Prefix, NoPrev
XCHG DX, DI
CALL LookupStr
XCHG DI, DX
MOV [BX].Prefix, AX
JMP Short @@4
; Get next character
@@3: MOV [BX].Prefix, DI
@@4: GetChar
MOV CH, AL
JMP Short @@1
@@5: MOV [BX].SaveCX,CX
; return number written in AX
; and number used in DX
POP DS
POP BP
RET 14
Crunch ENDP
; Make sure the last code will be written out
; If the output code <> Empty, store the last 4 pending bits
FlushLZW Proc Near
PUSH BP
MOV BP,SP
PUSH DS ; save DS
LDS BX,Tables ; get DS:BX to point to the tables
MOV CX,[BX].SaveCX ; get saved registers
MOV [BX].InputPos, 0
MOV [BX].OutputPos, 0
@@5: CALL PutCode
MOV SI,[BX].CodeBuf
CMP SI, Empty
JE @@7
MOV AX, SI
MOV CL, 4
SHL AX, CL
PutChar
; Return the number of bytes written to the output buffer
@@7: MOV AX, [BX].OutputPos
MOV [BX].SaveCX,CX
POP DS
POP BP
RET 8
FlushLZW ENDP
; Run through code extracting single characters from code string until
; no more characters can be removed. Push these onto the stack.
; They will be entered in reverse order, and will come out in forwards order
; when popped off
PushChar PROC Near
@@1: MOV DI, SI
SHL DI, 1
CMP [BX+DI+PrefixTable], NoPrev
JNE @@2
RETN
@@2: MOV AL, [BX+SI+SuffixTable]
INC [BX].StackPtr
MOV DI, [BX].StackPtr
MOV [BX+DI+CharStack], AL
SHL SI, 1
MOV SI, [BX+SI+PrefixTable]
JMP Short @@1
PushChar ENDP
; While StackPtr > 0
; Pop a character from the stack
PopChar PROC Near
PutChar
MOV DI, [BX].StackPtr
OR DI, DI
JE @@1
MOV AL, [BX+DI+CharStack]
DEC [BX].StackPtr
RETN
@@1: MOV AX, Empty
RETN
PopChar ENDP
; Check for correct start of buffer, and initialize registers
GetSignature PROC Near
PUSH BP
MOV BP, SP
PUSH DS ; save DS
LDS BX,Tables ; get DS:BX to point to the tables
; Get first string and check the corresponding character
; Keep a copy of this code
CALL GetCode
MOV [BX].Prefix, AX
MOV SI, AX
MOV CH, [BX+SI+SuffixTable]
CMP CH, 'L'
JNE @@2
CALL GetCode
MOV [BX].SaveDX, AX
; Do half a run through the old Uncrunch loop
; ( skip size check )
MOV [BX].Notfound, False
MOV SI, AX
; ( skip collision test, & PushChar call )
MOV CH, [BX+SI+SuffixTable]
CMP CH, 'Z'
JNE @@2
; ( do PopChar's MOV AX, Empty )
MOV [BX].SaveAX,Empty
MOV AL,True ; Success! We're ready for the loop.
JMP @@1
@@2: MOV AL,False ; It failed!
@@1: MOV [BX].SaveCX,CX
MOV [BX].SaveIP,OFFSET MainLoop
POP DS
POP BP
RET 14
GetSignature ENDP
UnCrunch PROC Near
PUSH BP
MOV BP,SP
PUSH DS ; save DS
LDS BX,Tables ; get DS:BX to point to the tables
MOV AX,[BX].SaveAX ; get saved registers
MOV CX,[BX].SaveCX
MOV DX,[BX].SaveDX
MOV [BX].InputPos, 0 ; set up string pointers & sizes
MOV [BX].OutputPos, 0
DEC Word Ptr DataSize ; sometimes we need to read two
JMP [BX].SaveIP
; Loop:
; Process all characters from input buffer
; While the stack is not empty, remove and output all characters from
; stack which are rest of characters in the string
@@1: MOV DI,[BX].OutputPos ; Check if there's room for more characters
CMP DI,OutputSize
JB @@3
CALL ExitLoop ; Exit from Uncrunch
Mainloop:
@@3: CMP AX, Empty
JE @@4
CALL PopChar
JMP Short @@1
; If code isn't known, store the follower character of last
; character of string
@@4: CMP [BX].NotFound, False
JE @@5
PUSH AX
MOV AL, CL
MOV CH, AL
PutChar
POP AX
; Check whether there's enough space for another char
@@8: MOV DI,[BX].OutputPos
CMP DI,OutputSize
JB @@5
; No space, so quit
CALL ExitLoop
; If the hash table is not full
; Enter code into table
; Make current code the previous code
; Get next code
@@5: CMP [BX].TableUsed, TableSize
JA @@6
CALL MakeEntry
@@6: MOV [BX].Prefix, DX
CALL GetCode
XCHG DX, AX
; Old top of loop
MOV AX, [BX].InputPos
CMP AX, DataSize
JB @@10
; Out of data, so exit
CALL ExitLoop
; Retrieve character from string
; Keep a copy of it in CL
@@10: MOV [BX].NotFound, False
MOV SI, DX
CMP [BX+SI+Collision], False
JNE @@2
MOV AL, CH
MOV CL, AL
MOV SI, [BX].Prefix
MOV [BX].NotFound, True
; Get first character from string
@@2: CALL PushChar
MOV AL, [BX+SI+SuffixTable]
MOV CH, AL
CALL PopChar
JMP @@1
UnCrunch ENDP
ExitLoop PROC Near ; allows exit from UnCrunch loop
; and resumption at several places.
POP [BX].SaveIP ; Get address
MOV [BX].SaveAX,AX ; Save registers
MOV [BX].SaveCX,CX
MOV [BX].SaveDX,DX
; Return the number of bytes written to the output buffer in AX
; and the number of bytes used in DX
MOV AX, [BX].OutputPos
MOV DX, [BX].InputPos
POP DS
POP BP
RETN 14
ExitLoop ENDP
Code ENDS
END