home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The World of Computer Software
/
World_Of_Computer_Software-02-385-Vol-1of3.iso
/
k
/
kboom11.zip
/
KBOOM11.ASM
next >
Wrap
Assembly Source File
|
1993-01-08
|
55KB
|
1,372 lines
.MODEL small
COMMENT #
"AP-SLIDER" (LZW) DYNAMIC DICTIONARY COMPRESSION
uses 13 bit pointers and
8k dictionary
w/update
(oldest leaf is deleted)
(one-at-a-time)
Memory needed: 100k
Written in MASM 6.0
Miles Pawski, 1992
THIS PROGRAM IS FREE
To assemble & link under MASM 6.0 (use ML.EXE): ML kboom11.asm
Requirements: 8086+
Please note: Stack frames are generated automatically by
procedures called by the INVOKE directive.
The stack is automatically cleaned up by the
RET instruction. Local variables are automatically
referenced and coded. You can eliminate the INVOKE
directive by passing parameters in variables or
registers. The USES list automatically pushes (on entry)
and pops (on ret) the indicated registers. You can
eliminate this with the appropriate code within the
procedure.
.WHILE .IF .ELSEIF .ELSE, etc. are automatic generations
of the cmp/jne/jmp... routines. An .IF .ENDIF block
is basically a cmp/jne routine. Adding an .ELSEIF
puts in a cmp/jump-around, etc.
.STARTUP generates the following code (if default near-
stack is used - this places the stack in DGROUP so ss:sp
needs to be adjusted). Only the first two lines are
required if using using FARSTACK (stack in its own
segment). FARSTACK can be specified by indicating
it in the .MODEL directive: .MODEL small,farstack
mov dx,DGROUP
mov ds,dx
mov bx,ss
sub bx,dx
shl bx,1
shl bx,1
shl bx,1
shl bx,1
cli
mov ss,dx
add sp,bx
sti
.EXIT generates terminate program code using int 21h,
function 04ch.
The ASSUME assembler directive was used to automatically
provide various segment over-rides for various array
variables. For examble, ASSUME es:seg parfield generates
this code: es:parfield.
THIS PROGRAM is based on James Storer's 'squeeze' source
code found in his book DATA COMPRESSION, METHODS AND
THEORY, 1988 - Computer Science Press. Modification
and methods were based on experimentation, general
tree theory of data structures and ideas put forth in
Mr. Storer's book. Mr. Storer's original source code
is in PASCAL. This program will hopefully provide a
better understanding of the LZW method and complex,
linked tree structures and there use.
#
;---------------------------MACROS---------------------------------------
child macro mainptr,char
xor ax,ax ;;returns pointer value in ax
mov cx,char
mov bx,mainptr
shl bx,1 ;;make mainptr word indexer
mov ax,lcfield[bx] ;;get child from lcfield
mov bx,ax ;;put in bx to index
shl bx,1 ;;make word ptr
.WHILE ax != NILPTR && cx != ctrfield[bx]
mov ax,rsibfield[bx] ;;cx=indexed char
mov bx,ax ;;if first child <> NILPTR and
shl bx,1 ;;char <> indexed char then walk the
.ENDW ;;linked list of siblings of child
endm ;;until we find a match
;;no match returns NILPTR
parent macro mainptr
xor ax,ax ;returns pointer value in ax
mov bx,mainptr
shl bx,1
mov ax,es:parfield[bx]
endm
ctr1 macro mainptr
xor ax,ax ;returns pointer value in ax
mov bx,mainptr
shl bx,1
mov ax,ctrfield[bx]
endm
print macro char
mov ah,02h
forc arg1,<char>
mov dl,"&arg1"
int 21h ;;print string to screen: print HELLO
endm
endm
close macro handle
mov ah,3eh
mov bx,handle ;;file closure
int 21h
endm
writestring macro string,length
mov ah,40h
mov bx,1
mov cx,length ;;write string to screen
lea dx,string
int 21h
endm
writefile macro var,handle,len
mov ah,40h
mov bx,handle ;;write to file handle from variable
mov cx,len
lea dx,var
int 21h
endm
readfile macro var,handle,len
mov ah,3fh
mov bx,handle ;;read from file handle into variable
mov cx,len
lea dx,var
int 21h
endm
;------------------------------------------------------------------------
; ADD_LEAF macro. Adds a leaf to the tree.
; Input: Parent node, node (code) to be added, character (mval)
; ** See James Storer's DATA COMPRESSION, METHODS AND THEORY
; for more explanations,theory and original PASCAL code.
;------------------------------------------------------------------------
add_leaf macro
.IF prevptr != NILPTR
push dx
push si
push di
mov bx,trieptr ;new code
mov dx,bx ;save a copy of new code in dx
mov ax,mval ;match character
mov di,prevptr ;parent node in di
shl bx,1
mov ctrfield[bx],ax ;ctrfield(mainptr)=char
mov parfield[bx],di ;parfield(mainptr)=parptr
mov lcfield[bx],NILPTR ;lcfield(mainptr)=NILPTR
mov lsibfield[bx],NILPTR
shl di,1 ;make parptr word indexer
mov si,lcfield[di] ;si=lcfield(parptr)
mov rsibfield[bx],si ;rsibfield(mainptr)=lcfield(parptr)
.IF si != NILPTR ;si=lcfield(parptr)
shl si,1
mov lsibfield[si],dx ;lsibfield(lcfield(parptr))=mainptr
.ENDIF
mov lcfield[di],dx ;lcfield(parptr)=ptr
pop di
pop si
pop dx
.ENDIF
endm
;----------------------------------------------------------------------
; DELETE_LEAF macro. Deletes a leaf from the tree.
; Input: Node (code) to be deleted. Passed in variable (trieptr)
; If node has siblings, they will be connected to each other properly
; after the node is deleted.
; ** See James Storer's DATA COMPRESSION, METHODS AND THEORY
; for more explanations,theory and original PASCAL code
;----------------------------------------------------------------------
delete_leaf macro
push si
push di
mov bx,trieptr ;bx=code to be deleted
shl bx,1
mov ax,rsibfield[bx] ;ax=rsibfield(mainptr)
.IF lsibfield[bx] != NILPTR
mov di,lsibfield[bx] ;connect left sibling and right sibling
shl di,1 ;through rsibfield of left sibling (huh?)
mov rsibfield[di],ax ;rsibfield[lsibfield(mainptr))=
.ELSE ; rsibfield(mainptr)
mov di,parfield[bx] ;di=parfield(mainptr)
shl di,1
mov lcfield[di],ax ;lcfield(parfield(mainptr))=
.ENDIF ; rsibfield(mainptr)
.IF ax != NILPTR
mov di,lsibfield[bx] ;di=lsibfield(mainptr)
mov si,ax ;make left sibling of deleted leaf the
shl si,1 ;left sibling of deleted leaf's right sib.
mov lsibfield[si],di ;lsibfield[rsibfield(mainptr))=
.ENDIF ; lsibfield(mainptr)
pop di
pop si
endm
;--------------------------------------------------------------------------
;MASM 6.0 is 1-pass assembly. Procedures called with the INVOKE
;directive that are physically located AFTER the call in the source
;code must be declared with PROTO. Far procedures must be declared
;also although this code has none.
cmdline PROTO near C,arg1:WORD,arg2:WORD,arg3:WORD
caps PROTO near C,arg1:WORD,arg2:WORD
.STACK
.DATA
dictsize WORD 00h
curptr WORD 00h
curlen WORD 00h
prevptr WORD 00h
trieptr WORD 00h
mval WORD 00h
mindex WORD 00h
prevlen WORD 00h
mstart WORD 00h
outcount WORD 00h
incount WORD 00h
bytes_read WORD ?
eight_count BYTE 00h
sixteen_count BYTE 16
all_over BYTE 00h
bigplace WORD 256
filename BYTE 50 dup (00)
tempfile BYTE 50 dup (00)
parabuf BYTE 50 dup (00)
paraflag BYTE 00h
compflag BYTE 00h
decompflag BYTE 00h
file_handle WORD ?
file_handle2 WORD ?
inputbyte WORD 00h
end_flag BYTE 00h
init_size DWORD ?
master_size DWORD 00h
filelen WORD ?
filelen2 WORD ?
comp_size DWORD ?
dictflag BYTE 00h
incflag BYTE 00h
work_code BYTE 0,0,0,0
ascii_string BYTE 10 dup (00h)
count WORD 00h
xdate WORD ?
xtime WORD ?
nologo_flag BYTE 00h
comp_flag BYTE 00h
decomp_flag BYTE 00h
comp BYTE "Compressing: "
spc BYTE 70 dup (20h)
fsize BYTE "Comp. size: "
isize BYTE "Orig. size: "
ratio BYTE "Ratio: "
perc BYTE "%"
crlf BYTE 13,10
expand BYTE "Expanding: "
arrow BYTE " --> "
fnl BYTE "Exp. Size: "
kboom_code BYTE "¿MP¿"
sorry0 BYTE "Sorry..."
sorry1 BYTE " was not compressed by Kboom11"
logo BYTE 13,10," Kboom11 LZW File Encoding/Decoding Utility featuring",13,10,
" 8k Dynamic Dictionary w/discrete update",13,10,
" Miles Pawski CIS: 70473,527 1992",13,10,13,10
format BYTE "FORMAT: kboom11 [input file] [output file] [/c][/d][/n]",13,10,
" /c - Compression /d - Decompression /n - No logo"
;Assembly-time constants
MAXINCREMENT EQU 4 ;AP - All prefixes
MAXMATCH EQU 100 ;max string match=100
STATICSIZE EQU 256 ;constant dictionary gaurantees first match
DICTIONARY EQU 8192
MAXPTR EQU DICTIONARY-2 ;highest pointer
NILPTR EQU DICTIONARY-1 ;end code - root code
;For experimentation. If the DICTIONARY size is changed, this
;text sequence automatically changes CODESIZE and BITSHIFT to fit
;the DICTIONARY. CODESIZE and BITSHIFT are used in the code to
;read and write pointers from and to the bit stream.
;Use only the below DICTIONARY values.
IF DICTIONARY EQ 512
CODESIZE EQU 9
BITSHIFT EQU 7
ELSEIF DICTIONARY EQ 1024
CODESIZE EQU 10
BITSHIFT EQU 6
ELSEIF DICTIONARY EQ 2048
CODESIZE EQU 11
BITSHIFT EQU 5
ELSEIF DICTIONARY EQ 4096
CODESIZE EQU 12
BITSHIFT EQU 4
ELSEIF DICTIONARY EQ 8192
CODESIZE EQU 13
BITSHIFT EQU 3
ENDIF
; I split the array fields between the near and far BSS (un-
; initialized data) segments. I couldn't get all the variables
; and arrays into one data segment, especially when using an 8k
; dictionary.
.DATA? ;DATA? is a default BSS segment in DGROUP
;-------------------------------------------
fielder label WORD
ctrfield WORD DICTIONARY dup (?)
lcfield WORD DICTIONARY dup (?)
;-------------------------------------------
match BYTE 100 dup (?),?
OUTSEG SEGMENT PARA PRIVATE 'FAR_BSS'
;----------------------------------------
fielder_two label WORD
parfield WORD DICTIONARY dup (?)
rsibfield WORD DICTIONARY dup (?)
lsibfield WORD DICTIONARY dup (?)
;----------------------------------------
outbuffer BYTE 8000 dup (?) ;file i/o buffers
inbuffer BYTE 8000 dup (?)
OUTSEG ENDS
.CODE
.STARTUP
;-----------------------------MAIN PROGRAM------------------------------
invoke cmdline, ADDR filename,ADDR tempfile,ADDR parabuf
cmp filelen,00h ;if source filename length=0,
je form ;quit with FORMAT display
cmp paraflag,01h ;if no parameters, quit with
jne form ;FORMAT display
call initialize ;set variables
mov di,seg outbuffer
mov es,di ;set es=far (output) segment
ASSUME es:seg parfield,es:seg lsibfield,es:seg rsibfield
;ASSUME usage:
;sets assembler for automatic segment
;over-rides for the listed variables
invoke caps,ADDR parabuf,50 ;capitalize parameters
mov di, offset parabuf ;read parameters
mov cx,50
@@:
mov ax,word ptr [di]
.IF ax=="N/"
mov nologo_flag,01h
.ELSEIF ax=="C/"
mov comp_flag,01h
.ELSEIF ax=="D/"
mov decomp_flag,01h
.ENDIF
inc di
loop @B
.IF comp_flag == 01h
call compression
jmp final_exit
.ELSEIF decomp_flag == 01h
call decompression
jmp final_exit
.ENDIF
form:
writestring format,SIZEOF format
final_exit:
.EXIT
;-------------------------------------------------------------------------
;╔══════════════════════════════════════════════════════════════════════╗
;║ ║
;║ ║
;║ PROCEDURE SECTION ║
;║ ║
;║ ║
;╚══════════════════════════════════════════════════════════════════════╝
;------------------------------------------------------------------------
; |
; MAIN COMPRESSION SEQUENCE |
; |
;------------------------------------------------------------------------
compression proc near PRIVATE
call open_source_file ;open source file
.IF CARRY?
jmp final
.ENDIF
push es
mov ah,4eh ;function 4eh
mov cx,10h ;file attribute
mov dx,offset filename ;point to filespec
int 21h ;"first match" interrupt
mov ah,2fh
int 21h ;get dta
mov ax,word ptr es:[bx+1ah] ;get file size from dta
mov dx,word ptr es:[bx+1ah][2]
mov word ptr [init_size][0],ax ;record in variable
mov word ptr [init_size][2],dx
mov ax,word ptr es:[bx+16h] ;get file time
mov xtime,ax
mov ax,word ptr es:[bx+18h] ;get file date
mov xdate,ax
pop es
call open_second_file ;open target file
.IF CARRY?
jmp final
.ENDIF
.IF nologo_flag != 01h
writestring logo,SIZEOF logo
.ENDIF
writestring comp,SIZEOF comp ;"Compressing: "
writestring filename,filelen ; [filename]
writefile kboom_code,file_handle2,4 ;write kboom code to file in
;first 4 bytes so we won't try to
;expand any files not generated by
;this program: ¿MP¿
writefile init_size,file_handle2,4 ;write next 4 bytes to file
;which represent what file size
;should be when expansion is
;finished
writefile xdate,file_handle2,2 ;write date to file
writefile xtime,file_handle2,2 ;write time to file
mov di,offset outbuffer ;set output buffer. es already set
call read_file ;read file into buffer
mov al, es:[si] ;get first byte from buffer
inc si
inc incount
xor ah,ah ;convert to word
mov inputbyte,ax ;move byte into variable
;--------- MAIN COMPRESSION LOOP ----------
.WHILE 1 ;loop indefinitely
mov ax,inputbyte ;same as: child NILPTR, inputbyte
mov trieptr,ax ;first ptr is always character and
;gets us into the root level
mov ax,curptr
mov prevptr,ax ;prevptr=curptr
mov ax,curlen
mov prevlen,ax ;prevlen=curlen
mov curlen,0000h
.WHILE trieptr != NILPTR ;first trieptr is never NILPTR
mov ax,trieptr ;curptr=trieptr
mov curptr,ax
mov bx,curlen
mov ax,inputbyte
mov match[bx],al ;record byte in array starting
inc curlen ;at postion 00. Curlen indexes in.
;This is used by update process.
;First byte in loop is always a match
mov ax,incount
cmp ax,bytes_read ;see if we'er at end of input buffer
jae read_leave ;if not, keep reading
goon:
mov al,es:[si] ;get new byte
inc si
inc incount
xor ah,ah
mov inputbyte,ax ;place in variable
child curptr,inputbyte ;see if new byte is child of curptr
mov trieptr,ax ;result in ax -> trieptr
;keep walking down the tree if
.ENDW ;we have a match (trieptr <> NILPTR)
leaving:
mov mstart,00h
mov bx,curptr ;mov curptr in bx
xor ax,ax
mov al,sixteen_count ;get number of bit positions left
repeat BITSHIFT
shl bx,1 ;CODESIZE bit pointer will now start
endm ;at left end of word. We want to
;output curptr into bit stream.
xor cx,cx
bigloop:
cmp cx,CODESIZE ;check code bit counter
je outbig ;done? then leave. Shift both in
shl bx,1 ;unison. dx holds bits.
rcl dx,1 ;carry added to dl if set by bx.
inc cx ;We are copying bx to dx bit by
dec ax ;bit. ax=dx bit counter
jnz bigloop
;---------------- JUMP-OUTS & FALL-THROUGHS ----------------------
; conditional branches ;
; kept out of main loops for speed ;
; ;
mov ax,dx ;
stosw ;store word in buffer ;
xor dx,dx ;clear out dl ;
mov ax,16 ;reset output bit counter ;
inc outcount ;record byte output ;
inc outcount ;
cmp outcount,8000 ;see if we're at end of output ;
jae next6 ;buffer. If not, go on. ;
;if at end, flush the buffer ;
next5: ;
cmp cx,CODESIZE-8 ;if we have enough bits left ;
ja bigloop ;and dx is clear, then we can ;
mov dl,bh ;move a whole byte (bh) at once. ;
mov bh,bl ;move bl over to left of word ;
mov ax,8 ;adjust output bit counter ;
add cx,8 ;adjust bit counter for CODESIZE ;
jmp bigloop ;
;
next6: ;
call write_file ;flush buffer to file ;
jmp next5 ;continue ;
;
read_leave: ;
.IF end_flag==01h ;check for eof. End_flag set if eof.;
mov all_over,01h ;set another flag so we can process ;
jmp leaving ;the last byte ;
.ENDIF ;
call read_file ;read more from file into buffer ;
jmp goon ;
; ;
;-----------------------------------------------------------------
outbig:
mov sixteen_count,al ;save bit count
cmp all_over,01h ;last byte? then leave
je endup
call update ;update the tree
.ENDW
;-------------- END MAIN COMPRESSION LOOP ------------
endup:
.IF sixteen_count > 00 ;see if we have leftover in dx
mov cl,sixteen_count ;if so, shift them all to the
shl dx,cl ;left so we don't mess up the
mov ax,dx ;bit stream.
stosw
inc outcount ;record final bytes
inc outcount
.ENDIF
call write_file ;flush the buffer
;--------- INFO/DECORATION ---------------
writestring crlf,2
writestring isize,SIZEOF isize ;"Orig. Size: "
mov ax,word ptr init_size[0]
mov dx,word ptr init_size[2]
call convert ;Convert to decimal string.
writestring crlf,2 ;value is passed to process in
;dx:ax pair
writestring fsize,SIZEOF fsize
mov ax,word ptr master_size[0]
mov dx,word ptr master_size[2]
call convert
;"Comp. size; "
writestring crlf,2 ; carriage return-line feed
writestring ratio,SIZEOF ratio ;"Ratio: "
;Ratio is not % saved but, rather
;the new file length divided by the old.
;However, to achieve this, I multiply the
;new file length by 100 first, then divide
;it by the old length.
xor bx,bx
mov bx,100
xor dx,dx ;file length.
mov ax,word ptr master_size[0] ;get low word
mul bx ;multiply by 100
mov cx,dx ;save dx in cx
mov si,ax ;save low word in si
xor dx,dx
mov ax,word ptr master_size[2] ;get high word dx ax
mul bx ;multiply by 100 bx
add dx,cx ;add saved high word ---------
add dx,ax ;add high to high (bx*ax) cx si
mov ax,si ;get back low (bx*dx) dx ax
; -------------
mov bx,word ptr init_size[0] ;Oh well, -> dx+ax+cx,si
mov cx,word ptr init_size[2]
;dx:ax now has the new file size multiplied by 100
;cx:bx has original file size
;---------DIVISION ROUTINE---------
div_num_1:
or cx,cx ; Divide both numbers by 2
je div_num_2 ; until high word of
shr dx,1 ; divisor (cx) is zero.
rcr ax,1 ; if cf is set by dx shift, the bit
; will be added to the left of ax during
shr cx,1 ; ax rcl
rcr bx,1
jmp div_num_1
div_num_2:
push ax ;Save low word
mov ax,dx
xor dx,dx
div bx ;Divide high word
mov cx,ax ;Save high quotent
pop ax
add ax,dx ;add remainder to low word
div bx ;Divide low word
mov si,dx ;save remainder in si
mov dx,cx ;386 code is faster and easier and I would
;much rather use it than going through all
;this BS. 32-bit registers FOREVER!
push dx ;save high word
push ax ;save low word
xor dx,dx ;zero dx
mov ax,si ;get back last remainder
mov cx,10 ;multiply by 10
mul cx
div bx ;divide by divisor again
mov si,ax ;save last quotient in si
;this will be first digit after decimal
mov ax,dx ;put latest remainder into ax
xor dx,dx ;zero dx
mul cx ;multiply by 10
div bx ;divide by divisor
;this will be second digit after decimal
mov bx,ax ;save quotient in bx
;------- END DIVISION ROUTINES ----------
pop ax ;get back low word
pop dx ;get back high word
push bx ;save second decimal digit
push si ;save first decimal digit
call convert ;write main ratio
mov ah,02h
mov dl,"." ;write decimal point "."
int 21h
pop si ;get back first decimal digit
pop bx ;get back second decimal digit
mov dx,si ;put first digit into dx
xor dh,dh
add dl,48 ;convert to ascii
int 21h ;write first digit after decimal to screen
mov dl,bl ;put second digit into dl
add dl,48 ;convert to ascii
int 21h ;write second digit after decimal to screen
mov dl,"%" ;"%"
int 21h
;---------- END INFO DECORATION -----------
final:
ret
compression endp
;------------------------------------------------------------------------
; |
; MAIN DECOMPRESSION SEQUENCE |
; |
;------------------------------------------------------------------------
decompression proc near PRIVATE
call open_source_file ;open source file
.IF CARRY?
jmp final
.ENDIF
push es
mov ah,4eh ;function 4eh
mov cx,10h ;file attribute
mov dx,offset filename ;point to filespec
int 21h ;"first match" interrupt
mov ah,2fh
int 21h ;get dta
mov ax,word ptr es:[bx+1ah] ;get file size from dta
mov dx,word ptr es:[bx+1ah][2]
sub ax,12 ;subtract 12 bytes because file size
sbb dx,0000h ;and code are contained in these bytes
mov word ptr [comp_size][0],ax ;record compressed file size (from dta)
mov word ptr [comp_size][2],dx ;date and time also take 4 bytes
pop es
readfile work_code,file_handle,4 ;first four bytes are compression code
mov si,offset work_code ;check to see if code is ¿MP¿
mov ax, word ptr [si] ;check first pair
cmp ax,"M¿"
jne @F
add si,2
mov ax,word ptr [si] ;if ok, check second pair
cmp ax,"¿P"
je code_ok
@@:
writestring sorry0,SIZEOF sorry0
writestring filename,filelen
writestring sorry1,SIZEOF sorry1
jmp final
code_ok:
call open_second_file ;open target file
.IF CARRY?
jmp final
.ENDIF
.IF nologo_flag != 01h
writestring logo,SIZEOF logo
.ENDIF
writestring expand,SIZEOF expand ;"Expanding [] --> []
writestring filename,filelen
writestring arrow,SIZEOF arrow
writestring tempfile,filelen2
writestring crlf,2
readfile init_size,file_handle,4 ;second four bytes are file size
readfile xdate,file_handle,2 ;read file date into variable
readfile xtime,file_handle,2 ;read file time into variable
mov di,offset outbuffer
call read_file ;read file into buffer
mov dx,word ptr es:[si] ;load dx with first word
inc si
inc si
inc incount
inc incount
;--------------------MAIN DECOMPRESSION ROUTINE---------------------
.WHILE 1 ;loop indefinitely
xor ax,ax
mov al,sixteen_count ;get saved bit count
xor cx,cx ;We need to read CODESIZE bits from input stream.
xor bx,bx ;We read a bit at a time from dx into bx. The
bigloop: ;leftover is left in dx for next time.
cmp cx,CODESIZE ;see if CODESIZE bits have been read
je outbig ;if not,read more bits
;if so, we have full CODESIZE bit pointer. Leave.
shl dx,1
rcl bx,1 ;bx will have pointer. Shift in unison with
inc cx ;dx. Ax keeps bit count ofn dx. If dx is empty,
dec ax ;fall through and fill dx again.
jnz bigloop
;-------------------JUMP-OUTS & FALL-THROUGHS ----------------------------
; ;
mov ax,incount ;check byte count ;
cmp ax,bytes_read ;see if all bytes have been processed from buffer
jae check_end ;if not, proceed ;
next0: ;
mov dx, word ptr es:[si] ;put new word into dl ;
inc incount ;count it ;
inc incount ;
inc si ;
inc si ;
mov ax,16 ;reset bit count ;
jmp bigloop ;
;
check_end: ;jump-out from above loop ;
.IF end_flag==01h ;
mov all_over,01h ;
jmp outbig ;
.ENDIF ;
call read_file ;read more of file into buffer ;
jmp next0 ;
; ;
;-----------------------------------------------------------------------;
outbig:
mov sixteen_count,al ;save bit count
mov trieptr,bx ;put completed pointer into trieptr
;GOT POINTER, NOW START
mov ax,curptr
mov prevptr,ax ;prevptr=curptr
mov ax,curlen
mov prevlen,ax ;prevlen=curlen
mov ax,trieptr
mov curptr,ax ;curptr=trieptr
mov curlen,0000h
.WHILE trieptr != NILPTR
ctr1 trieptr ;get character for trieptr (ret in ax)
mov bx,MAXMATCH
sub bx,curlen ;index to end of match array. Place
mov match[bx],al ;character there. Match array is
inc curlen ;written back to front.
parent trieptr ;get parent of trieptr (ret in ax)
mov trieptr,ax ;Move into trieptr. Walk up the tree
.ENDW ;reading the nodes until we reach the
;root (NILPTR).
mov ax,MAXMATCH ;curlen has length of string
sub ax,curlen ;move back in match array to where
inc ax ;string starts.
mov mstart,ax ;move index into mstart. This is used
xor bx,bx ;by update routine.
mov bx,ax ;index into array
.WHILE bx <= MAXMATCH ;max match length is 100 letters
mov al,match[bx] ;write the string in the match array
stosb ;to the output buffer.
inc outcount
cmp outcount,8000 ;buffer holds 8000. When full,
jae chkout ;flush buffer. Do this outside of
chkout_ret: ;loop. Hate to put jump-arounds
inc bx ;inside loop. Fall-through is faster
.ENDW
cmp all_over,01h
je endup
call update
.ENDW
chkout:
call write_file ;jumpout from above loop
jmp chkout_ret
endup:
call adjust
call write_file
writestring fsize,SIZEOF fsize
mov ax,word ptr comp_size[0] ;"Init. Size: "
mov dx,word ptr comp_size[2]
call convert
writestring crlf,2
writestring fnl,SIZEOF fnl ;"Final Size: "
mov ax,word ptr master_size[0]
mov dx,word ptr master_size[2]
call convert
mov ah,57h
mov al,01h
mov bx,file_handle2 ;set date and time
mov cx,xtime
mov dx,xdate
int 21h
final:
ret
decompression endp
;----------------------------------------------------------------------
; INITIALIZE procedure. Initializes variables and the first 256 nodes
; of the tree. The first 256 nodes represent the first level of the
; tree and they will never be deleted in this particular data structure.
;----------------------------------------------------------------------
initialize proc near
mov di,ds
mov es,di
mov ax,NILPTR
mov di,offset fielder ;fill fields with NILPTR
mov cx,DICTIONARY*2
rep stosw
mov ax,NILPTR
mov di,seg rsibfield ;now do far segment
mov es,di
mov di,offset fielder_two
mov cx,DICTIONARY*3
rep stosw
mov dictsize,00h
xor bx,bx
.WHILE dictsize < STATICSIZE ;fill first 256 bytes of ctr
mov ax,dictsize ;(character) field with usual
mov ctrfield[bx],ax ;dos characters 00-255.
inc bx ;These will comprise the non-
inc bx ;erasable potion of the dictionary.
inc dictsize
.ENDW
mov curptr,NILPTR
mov curlen,00h
ret
initialize endp
;-----------------------------------------------------------------------
; UPDATE procedure. Updates the tree by adding and deleting appropriate
; leaves. Also determines which codewords will be re-used and which
; leaves will be deleted from the tree.
; Input: from main program - trieptr, prevptr, match, etc.
;------------------------------------------------------------------------
update proc near
LOCAL update_flag:BYTE
cmp prevptr,NILPTR
je update_out
mov update_flag,00h
mov mindex,00h
.WHILE 1 ;loop indefinitely
mov ax,curlen ;Don't loop more than current
cmp mindex,ax ;match length. Leaf nodes are
jae update_out ;added to previous pointer.
cmp mindex,MAXINCREMENT ;MAXINCREMENT is number of pre-
jae update_out ;fixes to add. Read on...
mov ax,prevlen ;Maximum match length is 100
add ax,mindex ;in this code. This routine
cmp ax,MAXMATCH ;adds prefixes one-at-a-time
jae update_out ;per loop. Variable mindex is
;the loop counter.
mov bx,mstart ;mstart from main program
add bx,mindex ;index into match array
xor ah,ah ;make into word
mov al,match[bx] ;get letter from array
mov mval,ax ;record in mval
cmp update_flag,01h ;skip the child search after first
je skip_this ;leaf added since the rest will be
;new and without children
child prevptr,mval ;see if mval is child of prevptr
mov trieptr,ax ;record result in trieptr
;If no match was found, we will
;walk down the tree from prevptr and
;add leaves consisting of the
;matched prefixes. The number
;added is regulated by MAXINCREMENT.
.IF trieptr == NILPTR ;if not child, then update tree
mov update_flag,01h
skip_this:
.IF dictsize <= MAXPTR
mov ax,dictsize ;get free unused pointer
mov trieptr,ax ;move pointer into trieptr
inc dictsize ;increment to next available pointer
.ELSE ;No more unused pointers?
mov cx,bigplace ;Get saved place. Will loop around
bigtop: ;from 256 to 8190 progressively and
cmp cx,prevptr ;re-use pointers. Don't use prevptr.
je around ;We'll get the oldest leaf without
cmp cx,MAXPTR ;children, delete the leaf,update the
ja bigtop2 ;tree with recycled pointer.Over max?
bigtop1: ;Then reset to 256 and start over.
mov bx,cx ;put into bx
shl bx,1 ;word index
cmp lcfield[bx],NILPTR ;see if pointer has children
;NILPTR means no children for this
je bigtopout ;node and we can erase & re-use it,
around: ;else keep trying
inc cx ;go to next pointer
jmp bigtop ;repeat
bigtop2: ;jump-out from above loop
mov cx,STATICSIZE ;reset to 256
jmp bigtop1
bigtopout:
mov trieptr,cx ;put leaf node in trieptr
inc cx ;advance for progressive usage
mov bigplace,cx ;Record in bigplace. We'll start
;here next time we look for a pointer
;to re-use.
delete_leaf ;delete leaf node and,if necessary,
;reconnect siblings
.ENDIF
add_leaf ;add leaf to tree
.ENDIF
mov ax,trieptr
mov prevptr,ax
inc mindex
.ENDW
update_out:
ret
update endp
;----------------------------------------------------------------------
; READ_FILE procedure. Reads the file into a buffer for processing
; by the program.
; Input: None
; Output: eof flag
;-----------------------------------------------------------------------
read_file proc near USES dx cx bx ax
mov bytes_read,0000h
mov ah,3fh
mov bx,file_handle
mov cx, 8000
push ds
ASSUME ds:seg inbuffer
mov dx,es
mov ds,dx
mov dx,offset inbuffer
int 21h
ASSUME ds:@data
pop ds
mov bytes_read,ax ;Number of bytes read will be in ax. Less
.IF ax < 8000 ;than requested bytes indicates eof.
mov end_flag,01h
.ENDIF
mov si,offset inbuffer
mov incount,0000h
ret
read_file endp
;---------------------------------------------------------------------
; WRITE_FILE procedure. Writes the output buffer to disk.
; Input: none
;---------------------------------------------------------------------
write_file proc near USES ax bx cx dx
mov cx,outcount
add word ptr master_size[0],cx ;keeps track of bytes written
adc word ptr master_size[2],0000h ;to file in variable master_size
mov ah,40h ;This variable will be referenced
mov bx,file_handle2 ;later by adjustment routine in
push ds ;the decoder. Encoder doesn't
mov dx,es ;use this variable so no harm done
mov ds,dx ;there.
ASSUME ds:seg outbuffer
mov dx, offset outbuffer
int 21h
ASSUME ds:@data
pop ds
mov di,offset outbuffer
mov outcount,0000h
ret
write_file endp
;--------------------------------------------------------------------------
; CMDLINE procedure. Parses the command line for files.
;-------------------------------------------------------------------------
cmdline proc near C USES ax bx es si,arg1:WORD,arg2:WORD,arg3:WORD
LOCAL go_around_flag:BYTE
mov go_around_flag,00h
mov ah,51h ;get psp
int 21h
mov es,bx ;move psp seg into es
xor cx,cx
xor bx,bx
.IF es:[80h]==cl ;check for command line arguments
ret ;value at 80h of the psp is number of bytes
.ENDIF ;in command line argument.
mov bx,82h ;find start of command line
mov si,arg1 ;buffer for command line
@@:
mov al,es:[bx] ;get letter from dta (command line)
.IF al==13 || al==00
ret
.ENDIF
.IF al < 33 && go_around_flag==0
mov si,arg2 ;buffer for output file
inc go_around_flag
mov filelen,cx
xor cx,cx
jmp skp
.ENDIF
.IF al < 33 && go_around_flag==1
mov si,arg3 ;buffer for parameter line
mov filelen2,cx
mov paraflag,01h
inc go_around_flag
.ENDIF
mov [si],al
inc si
skp:
inc bx
inc cx ;byte count of file name
jmp @B
cmdline endp
;--------------------------------------------------------------------------
; |
; ADJUSTMENT FOR DECOMPRESSION FINAL OUTPUT |
; |
;--------------------------------------------------------------------------
adjust proc near PRIVATE
;NOTES:
;This decompression routine may add on an extra byte or two
;on the end. This is usually the letter coded as 00 since the
;encoder has to flush the final byte to the file, whether it is
;full or not. These extra bytes are removed by this routine. The
;decoder never generates fewer bytes than what the final size
;of the file should be. The encoder writes the original file
;size to the file at bytes 5-8. The decoder keeps track of the
;number of bytes generated and compares it with this figure.
;This routine then subtacts the extra byte(s) off the end. Don't
;worry, they're never needed and are really extra bytes. This
;is one way to do things and it works without fail.
mov dx,word ptr [master_size][2] ;load high word of byte count
mov ax,word ptr [master_size][0] ;load low word of byte count
add ax,[outcount] ;add the last count
adc dx,0000h ;add the carry flag to high word
cmp dx,word ptr [init_size][2] ;see if high word of byte count=
je nxt ;high word of header info.
cmp dx,word ptr [init_size][2] ;see if high word of byte count is
jb lving ;lower than high word of header info
sub ax,word ptr [init_size][0] ;If so , leave without adjusting
sbb dx,word ptr [init_size][2] ;otherwise, if byte count is greater
;than header info, subtact the two
sub [outcount],ax ;and take the difference off the end
jmp short lving ;so that the file size will be correct
nxt:
cmp ax,word ptr [init_size] ;compare low words
jbe lving ;if expanded file is smaller, leave
sub ax,word ptr [init_size] ;without adjusting, otherwise, subtract
sub [outcount],ax ;the two and take the difference off
;the end again
lving:
ret
adjust endp
;---------------------------------------------------------------------------
; |
; OPENS SOURCE FILE |
; Input: Nothing |
; Output: Carry flag set if error |
;---------------------------------------------------------------------------
open_source_file proc near
mov ah,3dh
mov al,00h
mov dx,offset filename ;open input file
int 21h
.IF CARRY? ;check to see if carry flag is set
print error
stc
jmp @F
.ENDIF
mov file_handle,ax
@@:
ret
open_source_file endp
;---------------------------------------------------------------------------
; |
; OPENS OUTPUT FILE |
; Input: Nothing |
; Output: Carry flag set if error |
;---------------------------------------------------------------------------
open_second_file proc near
mov ah,3ch
mov cx,00h ;create/truncate output file
mov dx,offset tempfile
int 21h
.IF CARRY? ;check to see if carry flag is set
print error
stc
jmp @F
.ENDIF
mov file_handle2,ax
@@:
ret
open_second_file endp
;--------------------------------------------------------------------------
; CAPS procedure |
; Capitalizes any string |
; Input: near pointer to string,length of buffer |
;--------------------------------------------------------------------------
caps proc near C PUBLIC USES si cx ax,arg1:WORD,arg2:WORD
mov si,arg1
xor cx,cx
mov cx,arg2
cap_loop:
mov al,[si]
cmp al,61h ;below a?
jb capon
cmp al,7ah ;above z?
ja capon
AND al,0dfh ;clear the 6th bit
mov [si],al
capon:
inc si
loop cap_loop
ret
caps endp
;--------------------------------------------------------------------------
; CONVERT procedure |
; Converts any Hex number to displayable decimal string. |
; Input: Doubleword in dx:ax |
; Output: To screen automatically (from variable: ascii_string) |
;--------------------------------------------------------------------------
convert proc near
mov di,ds
mov es,di ;load ds into es
mov si,10 ;load base 10 into si
xor bx,bx ;zero bx
xor cx,cx ;zero cx
mov bx,ax ;move low word into bx
mov cx,dx ;load high word into cx
mov dx,-1 ;set end-of-string flag
push dx ;save it as last string byte
hex_loop_1:
xchg ax,cx ;load ax with high word
xor dx,dx ;zero dx
div si ;divde by 10, leave rem. in dx
xchg cx,ax ;save high word back in cx
xchg ax,bx ;load ax with low word
div si ;divide by ten
xchg bx,ax ;save low word back in bx
add dl,30h ;convert remainder to ascii
push dx ;save as digit
or bx,bx ;see if low word is zero
jne hex_loop_1
or cx,cx ;see if high word is zero
jne hex_loop_1
xor cx,cx
xor dx,dx
lea di, ascii_string ;point di to string
hex_loop_2:
pop ax ;pop first digit
cmp ax,-1 ;see if end-of-string
je write_it
stosb ;write it to string
inc cx ;increase digit counter
jmp hex_loop_2
write_it:
cmp cx,1 ;see if string is 1 digit long
jne write ;if not, write string
lea di, ascii_string ;point di to string
mov al,[di] ;move only digit to al
mov ah,"0" ;move "0" into ah
mov [di+1],al ;write "0" as first letter
mov [di],ah ;write digit as second
mov cx,2 ;adjust counter to two
write:
mov [count],cx
writestring ascii_string,count
ret
convert endp
END