home *** CD-ROM | disk | FTP | other *** search
- ; COMP2.ASM as of April 29, 1981
- ;
- ; This routine is extracted and CP/Mified from an article
- ; 'An Inroduction to Data Compression' by Harold Corbin,
- ; that appeared in the April 1981 issue of Byte magazine.
- ;
- ; CP/Mified by: Kelly Smith, CP/M-Net "SYSOP" April 29, 1981
- ;
- ; COMP2 (text compression routine) takes keyboard text
- ; (uppercase letters ONLY) and compresses them using the
- ; Huffman coding technique. The first two bytes in the data
- ; buffer (dbuf) are the bit count. Encoded data is stored in
- ; the data buffer in packed form, 8 bits to the data byte.
- ;
- ; The Huffman Code was derived for the following table, by
- ; the relative frequency of occurence in a random sampling
- ; of English text. Frequently used characters are assigned
- ; shorter bit 'code' patterns, and seldom used characters
- ; are assigned longer bit 'code' patterns. Note: with any
- ; set of codes generated, it is important that no code has a
- ; shorter code as part of its beginning. For example, if the
- ; letter 'E' is 100, then 10010 cannot be the code for
- ; another letter...because in scanning the bit stream from
- ; left to right (using EXP2, the text expansion routine),
- ; the decoding algorithm would think that 10010 is 'E' (100)
- ; followed by 10 and NOT the different letter that was
- ; intended.
- ;
- ; Letter Frequency (%) Huffman Code
- ;
- ; E 13.0 100
- ; T 10.5 001
- ; A 8.1 1111
- ; O 7.9 1110
- ; N 7.1 1100
- ; R 6.8 1011
- ; I 6.3 1010
- ; S 6.1 0110
- ; H 5.2 0101
- ; D 3.8 11011
- ; L 3.4 01111
- ; F 2.9 01001
- ; C 2.7 01000
- ; M 2.5 00011
- ; U 2.4 00010
- ; G 2.0 00001
- ; Y 1.9 00000
- ; P 1.9 110101
- ; W 1.5 011101
- ; B 1.4 011100
- ; V .9 1101001
- ; K .4 110100011
- ; X .15 110100001
- ; J .13 110100000
- ; Q .11 1101000101
- ; Z .07 1101000100
- ;
- ; The COMP2 program takes characters entered via the
- ; keyboard, checks for a legal character, finds the Huffman
- ; Code corresponding to the entered character, and the
- ; serial bit stream that results from the encoding process
- ; is packed and stored 8 bits to the byte.
- ;
- ; The heart of the program's operation is the table lookup
- ; and shifting function. Based upon a letter ASCII code, an
- ; index is computed that is then added to the base address
- ; of the encoding table. This table has the following
- ; format: two 8 bit words are required for each letter to be
- ; encoded; the low order 4 bits of the first word in memory
- ; contain a count of the number of bits required to encode
- ; the letter. The remaining 12 bits, 8 in the second byte
- ; followed by 4 in the top half of the first byte are used
- ; to store the compressed code. The code is stored left-
- ; justified in the 12 bit word.
- ;
- ; With the compressing code located, it is serialized by
- ; shifting left according to the count in the 4 bit part of
- ; the table. As each bit is shifted out, the total bit count
- ; in the buffer is updated. The processing of the input
- ; stream continues until a period (.) is detected, and
- ; control returns to CP/M. The data buffer at address 4000
- ; Hex then remains intact for expansion by EXP2.
- ;
- ;
- ;
- true equ -1 ; define true
- false equ not true; define false
- ;
- stdcpm equ true ; true if regular CP/M (base address 0000h)
- altcpm equ false ; true if alternate CP/M (base address 4200h)
- ;
- if stdcpm ; if standard CP/M...
- base equ 0000h ; bsae for standard CP/M system
- endif ; end if...
- ;
- if altcpm ; if alternate CP/M...
- base equ 4200h ; base for H8 or TRS-80 CP/M system
- endif ; end if...
- ;
- bdos equ base+5 ; CP/M BDOS entry address for function call
- ;
- rdcon equ 1 ; read console character
- ;
- dbuf equ 04000h ; data buffer for compressed data
- ;
- org base+100h
-
- start: lxi h,0 ; save "old" CP/M stack pointer
- shld dbuf ; clear compressed bit count
- dad sp
- shld oldstk
- lxi sp,stack; make "new" stack pointer
- lxi h,dbuf+2; store pointer to next bit location in data buffer
- shld dadd
- xra a ; clear bit position
- sta pos
- ;
- ; get character from keyboard, check if end of text '.' character
- ;
- get$character:
- ;
- push b ; save reigisters
- push d
- push h
- mvi c,rdcon ; read console character function
- call bdos
- pop h ; restore registers
- pop d
- pop b
- ani 07fh ; mask-off parity bit
- cpi '.' ; end of text?
- jnz process ; if not, process this character
- lhld dadd ; clean up partial byte remaining
- lda pos ; compute remaining shift count
- mov b,a
- mvi a,8
- sub b
- ani 7
- mov b,a ; keep shift count
- mov a,m ; get compressed byte
- shift: jz exit ; exit to CP/M, if all bytes processed
- ral
- dcr b
- mov m,a ; replace compressed byte
- jmp shift ; loop 'till done
- process:sui 'A' ; character < 'A'?
- jc get$character
- cpi 'Z'-'A'+1 ; character > 'Z'?
- jnc get$character
- add a ; multiply by 2 to get table index
- mov c,a
- mvi b,0 ; make index bias to table
- lxi h,compression$table ; point to base of table
- dad b ; add index bias to table pointer
- mov e,m ; get encoded value
- inx h
- mov d,m
- mov a,e ; get bit count for this character
- ani 00fh ; mask-off high nibble
- mov b,a ; keep bit count
- xchg ; swap pointer for encoded value
- next: xra a
- dad h ; shift out bit stream...
- ral ; ...MSB first
- ani 1 ; mask-off all but output bit
- push h
- lhld dadd ; get pointer to next bit location
- mov d,a
- lda pos ; get current bit position
- mov e,a ; keep it
- mov a,m ; get "old" compressed data
- ral ; make room...
- ora d ; ...compress
- mov m,a ; save it, done with this compression
- inr e ; update position
- mov a,e
- cpi 8 ; full byte processed?
- jnz stor
- xra a ; clear position
- inx h ; bump for next bit location
- shld dadd
- stor: sta pos
- lhld dbuf ; update compressed bit count
- inx h
- shld dbuf
- pop h
- dcr b ; de-bump count
- jnz next ; continue compression, if not done
- jmp get$character ; get next character, and compress
- ;
- exit: lhld oldstk ; get "old" CP/M stack pointer
- sphl ; and restore...
- ret ; return to CP/M
- ;
- compression$table:
- ;
- dw 0f004h ; 'A'
- dw 07006h ; 'B'
- dw 04005h ; 'C'
- dw 0d805h ; 'D'
- dw 08003h ; 'E'
- dw 04805h ; 'F'
- dw 00805h ; 'G'
- dw 05004h ; 'H'
- dw 0a004h ; 'I'
- dw 0d009h ; 'J'
- dw 0d189h ; 'K'
- dw 07805h ; 'L'
- dw 01805h ; 'M'
- dw 0c004h ; 'N'
- dw 0e004h ; 'O'
- dw 0d406h ; 'P'
- dw 0d14ah ; 'Q'
- dw 0b004h ; 'R'
- dw 06004h ; 'S'
- dw 02003h ; 'T'
- dw 01005h ; 'U'
- dw 0d207h ; 'V'
- dw 07406h ; 'W'
- dw 0d089h ; 'X'
- dw 00005h ; 'Y'
- dw 0d10ah ; 'Z'
- ;
- dadd: dw 0 ; next bit location
- ;
- pos: ds 1 ; current bit location
- ;
- oldstk: ds 2 ; storage for "old" CP/M stack pointer
- ds 32 ; storage for 16 level stack
- stack equ $ ; top of local stack
- ;
- end start
-