home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power-Programmierung
/
CD1.mdf
/
basic
/
library
/
pb
/
library5
/
huffman.bas
< prev
next >
Wrap
BASIC Source File
|
1990-07-14
|
4KB
|
128 lines
CLS
InFile$="A SIMPLE STRING TO BE ENCODED USING A MINIMAL NUMBER OF BITS"
CALL Huffman(InFile$,OutFile$,NewFile$)
print:print:print
PRINT "In: ";LEN(InFile$);InFile$
PRINT "Out: ";LEN(OutFile$)
PRINT "New: ";LEN(NewFile$);NewFile$
input,r
END
'*****************************************************************************
' Huffman Encoding File Compression Technique
'
' From: R Sedgwick. Algorithms. Reading, MA: Addison-Wesley.
' 1984. Second Ed. pp 286 / 93.
'
' Converted to Power Basic by M. Rosenberg CI$: [73707,2545]
'
SUB Huffman(InText$,OutText$,NewText$)
SHARED N%,Heap%(),Count%()
DIM Count%(1024),Heap%(1024),Dad%(1024),Code%(256),Leng%(256)
' Count the frequency of each character in the message to be encoded (P. 287)
FOR I%=0 to 255 : Count%(I%)=0 : NEXT I%
Csr%=0 : DO : INCR Csr% : X%=ASC(MID$(InText$,Csr%,1)) : INCR Count%(X%)
LOOP UNTIL Csr%=LEN(InText$)
' Initialize the heap array to point to non-zero frequency counts (P. 290)
N%=0 : FOR I%=0 to 255 : IF Count%(I%)<>0 THEN INCR N% : Heap%(N%)=I%
NEXT I%
' Construct an indirect heap on the frequency values (P. 289)
FOR K% = N% TO 1 STEP -1 : CALL PqDownHeap(K%) : NEXT K%
' Construct the trie (P. 290)
DO : T%=Heap%(1) : Heap%(1)=Heap%(N%) : DECR N%
CALL PqDownHeap(1)
Count%(255+N%)=Count%(Heap%(1))+Count%(T%)
Dad%(T%)=255+N% : Dad%(Heap%(1))=-255-N%
Heap%(1)=255+N% : CALL PqDownHeap(1)
LOOP UNTIL N%=1
Dad%(255+N%)=0
' Reconstruct the information from the representation of the coding tree (P.291)
' computed during the sifting process.
FOR K% = 0 TO 255
IF Count%(K%)=0 THEN
Code%(K%)=0 : Leng%(K%)=0
ELSE
I%=0 : J&=1 : T%=Dad%(K%) : X%=0
DO : IF T%<0 THEN X%=X%+J& : T%=0-T%
T%=Dad%(T%) : J&=J&+J& : INCR I%
LOOP UNTIL T%=0
Code%(K%)=X% : Leng%(K%)=I%
END IF
NEXT K%
' Use the computed representations of the code to encode the string (P. 292)
J%=0 : OutText$="" : Hold$=""
DO : INCR J%
Char%=ASC(MID$(InText$,J%,1)) : Compr$=BIN$(Code%(Char%))
DO WHILE LEN(Compr$)< Leng%(Char%) : Compr$="0"+Compr$ : LOOP
Hold$=Hold$+Compr$
IF LEN(Hold$)>8 THEN
OutText$=OutText$+CHR$(Bin2Int(LEFT$(Hold$,8)))
Hold$=RIGHT$(Hold$,LEN(Hold$)-8)
END IF
LOOP UNTIL J%=LEN(InText$)
' Add a byte at the end that contains any left-over bits
IF LEN(Hold$)>0 THEN
Hold$=Hold$+STRING$(8-LEN(Hold$),"0")
OutText$=OutText$+CHR$(Bin2Int(LEFT$(Hold$,8)))
END IF
'*****************************************************************************
' Unpack compressed string into character representation of binary
J%=0 : UnCompr$="" : NewText$=""
DO : INCR J%
Hold$=MID$(OutText$,J%,1) : Hold$=BIN$(ASC(Hold$))
DO WHILE LEN(Hold$)<8 : Hold$="0"+Hold$ : LOOP
UnCompr$=UnCompr$+Hold$
LOOP UNTIL J%=LEN(OutText$)
' Decode compressed string
DO : FOR K%=1 TO 256
IF K%=256 THEN EXIT LOOP 'All done
IF Leng%(K%)>0 THEN
IF Bin2Int(LEFT$(UnCompr$,Leng%(K%)))=Code%(K%) THEN
UnCompr$=RIGHT$(UnCompr$,LEN(UnCompr$)-Leng%(K%))
NewText$=NewText$+CHR$(K%) : EXIT FOR
END IF
END IF
NEXT K%
LOOP UNTIL LEN(UnCompr$) = 0
END SUB 'Huffman
SUB PqDownHeap(K%)
' Build and maintain an indirect heap on the frequency values (P. 139)
' reversing the inequalities since we want the smallest values first.
SHARED N%,Heap%(),Count%()
LOCAL J%,V%,Limit%
V%=Heap%(K%) : Limit% = N%/2
DO WHILE K% <= Limit%
J%=K%+K%
IF J%<N% THEN IF Count%(Heap%(J%)) > Count%(Heap%(J%+1)) THEN INCR J%
IF Count%(V%)<=Count%(Heap%(J%)) THEN Heap%(K%)=V% : EXIT SUB
Heap%(K%)=Heap%(J%) : Heap%(J%)=V% : K%=J%
LOOP
END SUB 'PqDownHeap
'*****************************************************************************
FUNCTION Bin2Int(X$)
X$=RTRIM$(X$) :X$=LTRIM$(X$) : Ll%=LEN(X$) : Ex%=0 : Tot%=0 : I%=Ll%
DO WHILE I% > 0
IF MID$(X$,I%,1)="1" THEN Tot&=Tot&+(2^Ex&)
INCR Ex& : DECR I% : WEND
Bin2Int=Tot&
END FUNCTION 'Bin2Int