home *** CD-ROM | disk | FTP | other *** search
- '/////////////////////////////////////////////////////////////////////////////
- '/ SLENCODE.BAS v1.0 /
- '/ An Implemention of the LZSS Pattern Compression Algorithm /
- '/ By Rich Geldreich 1992 /
- '/ Original C Program by Haruhiko Okumura 4/6/1989 /
- '/ /
- '/ Anyone may freely distribute and use this program, as long as proper /
- '/ credit is given. Thanks! /
- '/ /
- '/ Any bugs, problems, questions, write/call: /
- '/ Rich Geldreich /
- '/ 410 Market St. /
- '/ Gloucester City, NJ 08030 /
- '/ (609)-742-8752 or (609)-456-0721 /
- '/ /
- '/ The two programs SLDECODE.ASM and SLDECODE.BAS decompress files created /
- '/ with this program. /
- '/////////////////////////////////////////////////////////////////////////////
-
- 'ToDo list: add more terse error checking, move code from main module to
- 'a callable subroutine, CRC-32 routine, add a statistical modeling scheme
- 'for greater compression(dynamic Huffman coding is a very likely choice-
- 'arithmitic coding in QB might not work too well).
-
- 'Warning: Do NOT press CTRL+Break and then continue this program while
- 'you are running it in the environment! At best, the resulting compressed
- 'file will be invalid. At worst, your machine will lock up.
-
- 'QuickBASIC 4.5 users: As this program is, it will not work correctly with
- 'QB 4.5. To make it QB 4.5 compatible, simply change all of the "SSEG" strings
- 'in this program to "VARSEG" strings with the search and replace function.
- 'Note that the SLDECODE.ASM routine should not require any changes for it to
- 'work properly.
-
- DEFINT A-Z
-
- CONST BufferSize = 4096 'Ring buffer's size
- CONST MaxMatch = 74 'Maximum match length
- CONST Threshold = 2 'Minimum match length
- CONST Null = BufferSize
-
- DECLARE SUB InitTree () 'Initializes the multiple binary trees
- DECLARE SUB InsertNode (R) 'Inserts string R to R+MaxMatch-1 into tree
- DECLARE SUB DeleteNode (P) 'Deletes node P from tree
- DECLARE SUB FillInputBuffer () 'Fills input buffer from disk
- DECLARE SUB PutData (Stuff, Codesize) 'Writes multibit codes to output buffer
-
- 'Last Buffersize characters from input file go into ring buffer for searching
- DIM SHARED Ring.Buffer((BufferSize - 1) + (MaxMatch - 1))
- DIM SHARED Dad(BufferSize) 'Father to each entry
- DIM SHARED LeftSon(BufferSize) 'Left son of each entry
- DIM SHARED RightSon(BufferSize + 1 + 255) 'Right son of each entry+root of each
- 'binary tree.
- 'Note that node 4096 is a "null" node
-
- 'Maximum match length and position returned by InsertNode
- DIM SHARED Match.Position, Match.Length
-
- 'Input & output buffer stuff
- DIM SHARED InBuffer$, Iseg, IAddress, IEndAddress
- DIM SHARED OutBuffer$, Oseg, OAddress, OStartAddress, OEndAddress
-
- 'PutData stuff
- DIM SHARED Shift(7) AS LONG, Char&, CurrentBit
-
- PRINT "SLENCODE.BAS v1.0 - LZSS Encoder in QuickBASIC 4.5"
- PRINT "By Rich Geldreich 1992"
- A$ = COMMAND$
- IF A$ = "" THEN INPUT "File to compress"; A$
- IF A$ = "" THEN END
- IF INSTR(A$, "OUTPUT.SL1") THEN
- PRINT "Cannot compress output file."
- END
- END IF
-
- 'Why use DATA/READ when we can just plop the values in ourselves?
- Shift(0) = 1
- Shift(1) = 2
- Shift(2) = 4
- Shift(3) = 8
- Shift(4) = 16
- Shift(5) = 32
- Shift(6) = 64
- Shift(7) = 128
-
- 'Initialize the input and output buffers
- InBuffer$ = SPACE$(4096)
- IAddress = 0: IEndAddress = 1
-
- OutBuffer$ = SPACE$(4096)
- 'Make sure that the offset used to address the OutBuffer$ is always an
- 'integer for speed.
- A& = SADD(OutBuffer$)
- A& = A& - 65536 * (A& < 0)
- Oseg& = SSEG(OutBuffer$) + (A& \ 16)
- IF Oseg& > 32767 THEN Oseg = Oseg& - 65536 ELSE Oseg = Oseg&
- OAddress = A& AND 15
- OStartAddress = OAddress
- OEndAddress = OAddress + 4096
-
- 'Input input file
- OPEN A$ FOR BINARY AS #1
- BytesLeft& = LOF(1)
- IF BytesLeft& = 0 THEN
- PRINT A$; " not found or null."
- CLOSE #1
- KILL A$
- END
- END IF
- PRINT "Compressing "; A$; " -"; : xpos = POS(0)
-
- 'Open output file
- OPEN "output.sl1" FOR BINARY AS #2
- IF LOF(2) <> 0 THEN
- CLOSE #2
- KILL "output.sl1"
- OPEN "output.sl1" FOR BINARY AS #2
- END IF
-
- GOSUB UpdatePercent
-
- 'Put my little header
- A$ = "RG": PUT #2, , A$
- PUT #2, , BytesLeft&
-
- 'Initialize the ring buffer with space characters.
- InitTree
- S = 0: R = BufferSize - MaxMatch
- FOR Work = 0 TO R - 1
- Ring.Buffer(Work) = 32
- NEXT
-
- 'Attempt to get MaxMatch characters from the file, and put them in the ring
- 'buffer.
- FOR LookAheadLength = 0 TO MaxMatch - 1
- IAddress = IAddress + 1
- IF IAddress = IEndAddress THEN FillInputBuffer
- IF BytesLeft& = 0 THEN EXIT FOR
- Ring.Buffer(R + LookAheadLength) = PEEK(IAddress)
- BytesLeft& = BytesLeft& - 1
- NEXT
-
- 'Insert the characters into the tree.
- FOR Work = 1 TO MaxMatch
- InsertNode R - Work
- NEXT
- InsertNode R
- DO
- 'Match.Length must always be less than the look ahead buffer's length
- IF Match.Length > LookAheadLength THEN Match.Length = LookAheadLength
-
- 'Does the match length exceed the break even point?
- IF Match.Length <= Threshold THEN
- CodesOut& = CodesOut& + 1
-
- Match.Length = 1
- PutData Ring.Buffer(R) * 2, 9 'Send 1 character unencoded
- ELSE
- CodesOut& = CodesOut& + 1
-
- 'Send a position and match length pair
- IF Match.Length <= 10 THEN 'do we send 3 or 6 bits for match length?
- PutData 1, 2 'send 1 0
- PutData (Match.Length - (Threshold + 1)), 3
- ELSE
- PutData 3, 2 'send 1 1
- PutData (Match.Length - 11), 6
- END IF
-
- 'Send match position.
- PutData Match.Position, 12
- END IF
-
- 'Get Match.Length chars from the input file and put them into the
- 'ring buffer.
- Last.Match.Length = Match.Length
- FOR Work = 0 TO Last.Match.Length - 1
- 'Check to see if any bytes left, and get a byte if there is.
- 'Otherwise exit this loop.
- IF BytesLeft& = 0 THEN EXIT FOR
- IAddress = IAddress + 1
- IF IAddress = IEndAddress THEN FillInputBuffer
- C = PEEK(IAddress)
- BytesLeft& = BytesLeft& - 1
-
- DeleteNode S
- Ring.Buffer(S) = C
-
- 'Copy the char to a ghost buffer at the end of the table in case
- 'it's near the beginning. This simplifies the InsertNode
- 'procedure, because the ring buffer comparison does not have to
- 'increment the comparison positions modulo BufferSize.
-
- IF S < (MaxMatch - 1) THEN Ring.Buffer(S + BufferSize) = C
-
- S = (S + 1) AND (BufferSize - 1)
- R = (R + 1) AND (BufferSize - 1)
- InsertNode R
- NEXT
-
- 'At end of file, but still characters left in the look ahead buffer.
- FOR Work = Work TO Last.Match.Length - 1
- DeleteNode S
- S = (S + 1) AND (BufferSize - 1)
- R = (R + 1) AND (BufferSize - 1)
- LookAheadLength = LookAheadLength - 1
- IF LookAheadLength > 0 THEN InsertNode R
- NEXT
-
- IF Tc = 0 THEN GOSUB UpdatePercent
- Tc = (Tc + 1) AND 255
-
- LOOP WHILE LookAheadLength > 0
-
- 'Flush output buffer
- PutData 0, 12
- OutBuffer$ = LEFT$(OutBuffer$, OAddress - OStartAddress)
-
- PUT #2, , OutBuffer$
- PUT #2, 3, CodesOut& 'Store # of codes sent
-
- 'Report compression
- LOCATE , 1
- PRINT "Bytes in:"; LOF(1); "Bytes out:"; LOF(2);
- PRINT "Compression:";
- PRINT 100 - (LOF(2) * 100&) \ LOF(1); "%"; SPACE$(16)
- CLOSE #1, #2
- END
-
- UpdatePercent:
- LOCATE , xpos
- PRINT (100& * (LOF(1) - BytesLeft&)) \ LOF(1); "% complete";
- RETURN
-
- 'Deletes node P from binary tree
- SUB DeleteNode (P)
- 'not in tree yet?
- IF Dad(P) = Null THEN EXIT SUB
-
- L = LeftSon(P)
- R = RightSon(P)
-
- IF R = Null THEN 'no right son?
- Q = L 'use left son then
- ELSEIF L = Null THEN 'no left son?
- Q = R 'use right son then
- ELSE
- Q = L 'great, it has two sons! find a place for
- 'one of them...
-
- IF RightSon(Q) <> Null THEN
- 'find a leaf branch
- DO
- Q = RightSon(Q)
- LOOP WHILE RightSon(Q) <> Null
- 'RightSon(Q)=Null now
-
- 'Make right son of Q's dad point tward the left son of Q
-
- D = Dad(Q)
- RightSon(D) = LeftSon(Q)
- Dad(LeftSon(Q)) = D
-
- 'The left son of P is now the left son of Q
- L = LeftSon(P)
- LeftSon(Q) = L
- Dad(L) = Q
- END IF
- 'The right son of P is now the right son of Q
- R = RightSon(P)
- RightSon(Q) = R
- Dad(R) = Q
- 'now P has no children
- END IF
-
- 'Delete node P- replace it with node Q
- Dad(Q) = Dad(P)
- IF RightSon(Dad(P)) = P THEN
- RightSon(Dad(P)) = Q 'which way was P's father pointing twards
- ELSE 'P? Right or left?
- LeftSon(Dad(P)) = Q
- END IF
- Dad(P) = Null 'P bites the dust here
-
- END SUB
-
- SUB FillInputBuffer
- GET #1, , InBuffer$
- A& = SADD(InBuffer$): A& = A& - 65536 * (A& < 0)
- Iseg& = SSEG(InBuffer$) + (A& \ 16)
- IF Iseg& > 32767 THEN Iseg = Iseg& - 65536 ELSE Iseg = Iseg&
- IAddress = A& AND 15
- IEndAddress = IAddress + 4096
- DEF SEG = Iseg
- END SUB
-
- SUB InitTree
- 'Initialize RightSon(FirstChar)...RightSon(LastChar) to null
- '(These make up the roots to the multiple binary search trees.)
- FOR Work = BufferSize + 1 TO BufferSize + 256
- RightSon(Work) = Null
- NEXT
- 'Clear ring buffer's father pointers
- FOR Work = 0 TO BufferSize - 1
- Dad(Work) = Null
- NEXT
- END SUB
-
- 'Inserts string Ring.Buffer(R) into tree- finds maximum match length and
- 'position too. If match length is the maximum match length, then the old
- 'node will be deleted for the new one to avoid having two nodes of the
- 'same string in the tree.
- SUB InsertNode (R)
- Test = 1
- P = (BufferSize + 1) + Ring.Buffer(R) 'Find first tree
- RightSon(R) = Null
- LeftSon(R) = Null
- Match.Length = 0
- DO
- IF Test >= 0 THEN 'which way did he go?
- IF RightSon(P) <> Null THEN
- P = RightSon(P)
- ELSE
- RightSon(P) = R
- Dad(R) = P
- EXIT SUB
- END IF
- ELSE
- IF LeftSon(P) <> Null THEN
- P = LeftSon(P)
- ELSE
- LeftSon(P) = R
- Dad(R) = P
- EXIT SUB
- END IF
- END IF
-
- 'Do an alphabetical comparison on Ring.Buffer(R) & Ring.Buffer(P)
- FOR Work = 1 TO MaxMatch - 1 'Compare
- Test = Ring.Buffer(R + Work) - Ring.Buffer(P + Work)
- IF Test <> 0 THEN EXIT FOR 'Exit if not equal
- NEXT
- IF Work > Match.Length THEN 'higher than current match length?
- Match.Position = P 'save match position and match length
- Match.Length = Work
- END IF
-
- LOOP UNTIL Work >= MaxMatch
-
- 'The following code is only executed when the string found is of the
- 'maximum match size.
- 'The old string is deleted so the binary tree doesn't have two copies
- 'of the same string for efficiency.
-
- 'delete node P for node R
-
- Dad(R) = Dad(P)
- LeftSon(R) = LeftSon(P)
- RightSon(R) = RightSon(P)
- Dad(LeftSon(P)) = R
- Dad(RightSon(P)) = R
-
- 'make P's father point twards R
-
- IF RightSon(Dad(P)) = P THEN
- RightSon(Dad(P)) = R
- ELSE
- LeftSon(Dad(P)) = R
- END IF
-
- 'delete P
-
- Dad(P) = Null
-
- END SUB
-
- 'Sends a multibit code to the output file.
- SUB PutData (Stuff, Codesize) STATIC
-
- DEF SEG = Oseg 'switch to output segment
-
- Char& = Char& + Stuff * Shift(CurrentBit) 'attach bits to bit buffer
- CurrentBit = CurrentBit + Codesize '+Codesize more bits now
-
-
- DO WHILE CurrentBit > 7 'we have at least one byte?
- IF OAddress = OEndAddress THEN 'At end of output buffer?
- PUT #2, , OutBuffer$
- OAddress = OStartAddress
- END IF
- POKE OAddress, Char& AND 255 'save 8 bits
- OAddress = OAddress + 1
-
- Char& = Char& \ 256
- CurrentBit = CurrentBit - 8 '8 less bits now
- LOOP
-
- DEF SEG = Iseg 'switch to input segment
-
- END SUB
-
-