home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Frozen Fish 2: PC
/
frozenfish_august_1995.bin
/
bbs
/
d01xx
/
d0128.lha
/
MRBackup
/
Compress.c
< prev
next >
Wrap
C/C++ Source or Header
|
1988-01-02
|
18KB
|
790 lines
/* MRBackup: File Compression/Decompression Routines
* Filename: Compress.c
* History: (most recent change first)
*
* 09/19/87 -MRR- Fixed bugs in decompression routine, introduced by
* new buffering scheme.
*
* 09/04/87 -MRR- This package now uses MRBackup's buffer to hold the
* compressed data and uses AmigaDOS I/O.
*/
/*
* Compress.c - data compression/decompression routines.
* This is an adaptation of the public domain Un*x compress v4.0.
*/
#include "MRBackup.h"
#define min(a,b) ((a>b) ? b : a)
#define INBUFSIZE 4L*1024L /* input buffer size */
#ifdef DEBUG
#define DBG(x) x
#else
#define DBG(x)
#endif
extern int errno;
/*
* Set USERMEM to the maximum amount of physical user memory available
* in bytes. USERMEM is used to determine the maximum BITS that can be used
* for compression.
*
* SACREDMEM is the amount of physical memory saved for others; compress
* will hog the rest.
*/
#ifndef SACREDMEM
#define SACREDMEM 0
#endif
#ifndef USERMEM
# define USERMEM 450000 /* default user memory */
#endif
# define MAXFILES 100 /* arbitrary maximum - see note below */
# define BITS 12 /* > 12 crashes system (?) */
# undef USERMEM
long filesize();
char *scdir();
#ifdef USERMEM
# if USERMEM >= (433484+SACREDMEM)
# define PBITS 16
# else
# if USERMEM >= (229600+SACREDMEM)
# define PBITS 15
# else
# if USERMEM >= (127536+SACREDMEM)
# define PBITS 14
# else
# if USERMEM >= (73464+SACREDMEM)
# define PBITS 13
# else
# define PBITS 12
# endif
# endif
# endif
# endif
# undef USERMEM
#endif /* USERMEM */
#ifdef PBITS /* Preferred BITS for this memory size */
# ifndef BITS
# define BITS PBITS
# endif BITS
#endif /* PBITS */
#if BITS == 16
# define HSIZE 69001L /* 95% occupancy */
#endif
#if BITS == 15
# define HSIZE 35023L /* 94% occupancy */
#endif
#if BITS == 14
# define HSIZE 18013L /* 91% occupancy */
#endif
#if BITS == 13
# define HSIZE 9001L /* 91% occupancy */
#endif
#if BITS <= 12
# define HSIZE 5003L /* 80% occupancy */
#endif
/*
* a code_int must be able to hold 2**BITS values of type int, and also -1
*/
#if BITS > 15
typedef long int code_int;
#else
typedef int code_int;
#endif
#ifdef SIGNED_COMPARE_SLOW
typedef unsigned long int count_int;
typedef unsigned short int count_short;
#else
typedef long int count_int;
#endif
#ifdef NO_UCHAR
typedef char char_type;
#else
typedef unsigned char char_type;
#endif /* UCHAR */
char_type magic_header[]= {
"\037\235"
}; /* 1F 9D */
/* Defines for third byte of header */
#define BIT_MASK 0x1f
#define BLOCK_MASK 0x80
/* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
a fourth header byte (for expansion).
*/
#define INIT_BITS 9 /* initial number of bits/code */
/*
* compress.c - File compression ala IEEE Computer, June 1984.
*
* Authors:
* Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
* Jim McKie (decvax!mcvax!jim)
* Steve Davies (decvax!vax135!petsd!peora!srd)
* Ken Turkowski (decvax!decwrl!turtlevax!ken)
* James A. Woods (decvax!ihnp4!ames!jaw)
* Joe Orost (decvax!vax135!petsd!joe)
* Mark R. Rinfret (mods) (mark@unisec.USI.COM)
*/
static unsigned endInput; /* true => input file exhausted */
static int n_bits; /* number of bits/code */
static int maxbits = BITS; /* user settable max # bits/code */
static code_int maxcode; /* maximum code, given n_bits */
static code_int maxmaxcode = 1 << BITS; /* NEVER generate this code */
#define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
static count_int htab [HSIZE];
static USHORT codetab [HSIZE];
#define htabof(i) htab[i]
#define codetabof(i) codetab[i]
static code_int hsize = HSIZE; /* for dynamic table sizing */
static count_int fsize;
static struct FileHandle *infile, *outfile;
static char iname[256], oname[256];
static UBYTE inbuf[INBUFSIZE], *inbufptr;
static LONG inbufsize;
static UBYTE *outbufptr;
/*
* To save much memory, we overlay the table used by compress() with those
* used by decompress(). The tab_prefix table is the same size and type
* as the codetab. The tab_suffix table needs 2**BITS characters. We
* get this from the beginning of htab. The output stack uses the rest
* of htab, and contains characters. There is plenty of room for any
* possible stack (stack used to be 8000 characters).
*/
#define tab_prefixof(i) codetabof(i)
#define tab_suffixof(i) ((char_type *)(htab))[i]
#define de_stack ((char_type *)&tab_suffixof(1<<BITS))
static code_int free_ent = 0; /* first unused entry */
static int status = 0;
code_int getcode();
static int nomagic = 0; /* Use a 3-byte magic number header,
unless old file */
static int zcat_flg = 0; /* Write output on stdout, suppress
messages */
static int quiet = 1; /* don't tell me about compression */
/*
* block compression parameters -- after all codes are used up,
* and compression rate changes, start over.
*/
static int block_compress = BLOCK_MASK;
static int clear_flg = 0;
static long int ratio = 0;
#define CHECK_GAP 10000 /* ratio check interval */
static count_int checkpoint = CHECK_GAP;
/*
* the next two codes should not be changed lightly, as they must not
* lie within the contiguous general code space.
*/
#define FIRST 257 /* first free entry */
#define CLEAR 256 /* table clear output code */
static int force = 0;
static char ofname [100];
#ifdef DEBUG
static int verbose = 0;
#endif /* DEBUG */
int (*bgnd_flag)();
static int do_decomp = 0;
static int offset;
static LONG in_count; /* length of input */
static LONG bytes_out; /* length of compressed output */
static LONG out_count; /* # of codes output (for debugging) */
static LONG outbufcount; /* # bytes in output buffer */
/*****************************************************************
*
* Algorithm from "A Technique for High Performance Data Compression",
* Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
*
* Algorithm:
* Modified Lempel-Ziv method (LZW). Basically finds common
* substrings and replaces them with a variable size code. This is
* deterministic, and can be done on the fly. Thus, the decompression
* procedure needs no input table, but tracks the way the table was built.
*/
static void
comp_init()
{
if(maxbits < INIT_BITS)
maxbits = INIT_BITS;
if (maxbits > BITS)
maxbits = BITS;
maxmaxcode = 1 << maxbits;
outbufcount = 0;
outbufptr = buffer;
}
/*
* compress "from" to "to"
*
* Algorithm: use open addressing double hashing (no chaining) on the
* prefix code / next character combination. We do a variant of Knuth's
* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
* secondary probe. Here, the modular division first probe is gives way
* to a faster exclusive-or manipulation. Also do block compression with
* an adaptive reset, whereby the code table is cleared when the compression
* ratio decreases, but after the table fills. The variable-length output
* codes are re-sized at this point, and a special CLEAR code is generated
* for the decompressor. Late addition: construct the table according to
* file size for noticeable speed improvement on small files. Please direct
* questions about this implementation to ames!jaw.
*/
int
compress(from, to)
char *from, *to;
{
register long fcode;
register code_int i = 0;
register int c;
register code_int ent;
register int disp;
register code_int hsize_reg;
register int hshift;
char *s;
char *err_msg[256];
comp_init();
inbufsize = 0;
endInput = false;
/*
* tune hash table size for small files -- ad hoc,
* but the sizes match earlier #defines, which
* serve as upper bounds on the number of output codes.
*/
hsize = HSIZE;
if (fsize < (1L << 12))
hsize = min (5003L,HSIZE );
else
if (fsize < (1L << 13))
hsize = min (9001L,HSIZE );
else
if (fsize < (1L << 14))
hsize = min (18013L,HSIZE );
else
if (fsize < (1L << 15))
hsize = min (35023L,HSIZE );
else
if (fsize < 47000L )
hsize = min (50021L,HSIZE );
if (!(infile = Open(from, MODE_OLDFILE))) {
return IoErr();
}
if (!(outfile = Open(to, MODE_NEWFILE))) {
status = IoErr();
Close(infile);
return status;
}
if (nomagic == 0){
putcbuf(magic_header[0]);
putcbuf(magic_header[1]);
putcbuf((char)(maxbits | block_compress));
if(status) {
cleanup:
Close(infile);
Close(outfile);
return status;
}
}
offset = 0;
bytes_out = 3; /* includes 3-byte header mojo */
out_count = 0;
clear_flg = 0;
ratio = 0L;
in_count = 1;
checkpoint = CHECK_GAP;
maxcode = MAXCODE(n_bits = INIT_BITS);
free_ent = ((block_compress)?FIRST :256 );
ent = ReadChar();
hshift = 0;
for (fcode = (long) hsize; fcode < 65536L; fcode *= 2L )
hshift++;
hshift = 8 - hshift; /* set hash code range bound */
hsize_reg = hsize;
cl_hash((count_int)hsize_reg); /* clear hash table */
/*while ((c = getc(infile))!= EOF ){*/
while ( ( c = ReadChar() ) != EOF) {
in_count++;
fcode = (long)(((long)c << maxbits)+ ent);
i = ((c << hshift)^ ent); /* xor hashing */
if (htabof (i)== fcode ){
ent = codetabof (i);
continue;
}
else
if ((long)htabof (i)< 0 )/* empty slot */
goto nomatch;
disp = hsize_reg - i; /* secondary hash (after G. Knott) */
if (i == 0 )
disp = 1;
probe:
if ((i -= disp)< 0 )
i += hsize_reg;
if (htabof (i)== fcode ){
ent = codetabof (i);
continue;
}
if ((long)htabof (i)> 0 )
goto probe;
nomatch:
output ((code_int)ent );
out_count++;
ent = c;
if (free_ent < maxmaxcode ){
codetabof (i)= free_ent++;/* code -> hashtable */
htabof (i)= fcode;
}
else
if ((count_int)in_count >= checkpoint && block_compress )
cl_block ();
}
/*
* Put out the final code.
*/
output((code_int)ent );
out_count++;
output((code_int)-1 );
/*
* Print out stats on stderr
*/
if(zcat_flg == 0 && !quiet){
#ifdef DEBUG
fprintf(stderr,
"%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
in_count,out_count,bytes_out );
prratio(stderr,in_count,bytes_out );
fprintf(stderr,"\n");
fprintf(stderr,"\tCompression as in compact: " );
prratio(stderr,in_count-bytes_out,in_count );
fprintf(stderr,"\n");
fprintf(stderr,"\tLargest code (of last block) was %d (%d bits)\n",
free_ent - 1,n_bits );
#endif /* DEBUG */
}
flushbuf(); /* dump the last block */
if(bytes_out > in_count) /* exit(2) if no savings */
status = 2;
goto cleanup;
}
/*****************************************************************
* TAG( output )
*
* Output the given code.
* Inputs:
* code: A n_bits-bit integer. If == -1, then EOF. This assumes
* that n_bits =< (long)wordsize - 1.
* Outputs:
* Outputs code to the file.
* Assumptions:
* Chars are 8 bits long.
* Algorithm:
* Maintain a BITS character long buffer (so that 8 codes will
* fit in it exactly). Use the VAX insv instruction to insert each
* code in turn. When the buffer fills up empty it and start over.
*/
static char buf[BITS];
char_type lmask[9]= {
0xff,0xfe,0xfc,0xf8,0xf0,0xe0,0xc0,0x80,0x00
};
char_type rmask[9]= {
0x00,0x01,0x03,0x07,0x0f,0x1f,0x3f,0x7f,0xff
};
output(code)
code_int code;
{
#ifdef DEBUG
static int col = 0;
#endif /* DEBUG */
register int r_off = offset, bits = n_bits;
register char * bp = buf;
int count;
#ifdef DEBUG
if (verbose )
fprintf(stderr,"%5d%c",code,
(col+=6)>= 74 ?(col = 0,'\n'):' ' );
#endif /* DEBUG */
if (code >= 0 ){
/*
* byte/bit numbering on the VAX is simulated by the following code
*/
/*
* Get to the first byte.
*/
bp += (r_off >> 3);
r_off &= 7;
/*
* Since code is always >= 8 bits, only need to mask the first
* hunk on the left.
*/
*bp = (*bp & rmask[r_off])| (code << r_off)& lmask[r_off];
bp++;
bits -= (8 - r_off);
code >>= 8 - r_off;
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if (bits >= 8 ){
*bp++= code;
code >>= 8;
bits -= 8;
}
/* Last bits. */
if(bits)
*bp = code;
offset += n_bits;
if (offset == (n_bits << 3)){
bp = buf;
bits = n_bits;
bytes_out += bits;
do
putcbuf(*bp++);
while(--bits);
offset = 0;
}
/*
* If the next entry is going to be too big for the code size,
* then increase it, if possible.
*/
if (free_ent > maxcode || (clear_flg > 0)){
/*
* Write the whole buffer, because the input side won't
* discover the size increase until after it has read it.
*/
if (offset > 0 ){
putmcbuf(buf, n_bits);
if (status) return 1;
bytes_out += n_bits;
}
offset = 0;
if (clear_flg ){
maxcode = MAXCODE (n_bits = INIT_BITS);
clear_flg = 0;
}
else {
n_bits++;
if (n_bits == maxbits )
maxcode = maxmaxcode;
else
maxcode = MAXCODE(n_bits);
}
#ifdef DEBUG
if (debug ){
fprintf(stderr,"\nChange to %d bits\n",n_bits );
col = 0;
}
#endif /* DEBUG */
}
}
else {
/*
* At EOF, write the rest of the buffer.
*/
coun= MAXCODE (n_bits = INIT_BITS);
clear_flg = 0;
}
size = 0;
while (size < n_bits) {
if ((c = ReadChar()) == EOF || status ) break;
buf[size++] = c;
}
if (size == 0 || status) {
return EOF; /* end of file */
}
offset = 0;
/* Round size down to integral number of codes */
size = (size << 3)- (n_bits - 1);
}
r_off = offset;
bits = n_bits;
/*
* Get to the first byte.
*/
bp += (r_off >> 3);
r_off &= 7;
/* Get first part (low order bits) */
#ifdef NO_UCHAR
code = ((*bp++ >> r_off)& rmask[8 - r_off]) & 0xff;
#else
code = (*bp++ >> r_off);
#endif /* NO_UCHAR */
bits -= (8 - r_off);
r_off = 8 - r_off; /* now, offset into code word */
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if (bits >= 8 ){
#ifdef NO_UCHAR
code |= (*bp++ & 0xff) << r_off;
#else
code |= *bp++ << r_off;
#endif /* NO_UCHAR */
r_off += 8;
bits -= 8;
}
/* high order bits. */
code |= (*bp & rmask[bits]) << r_off;
offset += n_bits;
return code;
}
#ifndef AZTEC_C
char *
rindex(s,c)
register char *s,c;
{
char *p;
for (p = NULL;*s;s++)
if (*s == c)
p = s;
return(p);
}
#endif
/*
* This routine returns 1 if we are running in the foreground and stderr
* is a tty.
*/
foreground()
{
/* I don't know how to test for background, yet. */
return (isatty(2));
}
onintr()
{
unlink ( ofname );
exit (1 );
}
/* wild pointer -- assume bad input */
oops ()
{
if ( do_decomp == 1 )
fprintf (stderr,"uncompress: corrupt input\n" );
unlink ( ofname );
exit ( 1 );
}
/* table clear for block compress */
cl_block ()
{
register long int rat;
checkpoint = in_count + CHECK_GAP;
#ifdef DEBUG
if (debug ){
fprintf (stderr,"count: %ld, ratio: ",in_count );
prratio (stderr,in_count,bytes_out );
fprintf (stderr,"\n");
}
#endif /* DEBUG */
if(in_count > 0x007fffff){ /* shift will overflow */
rat = bytes_out >> 8;
if(rat == 0){ /* Don't divide by zero */
rat = 0x7fffffff;
}
else {
rat = in_count / rat;
}
}
else {
rat = (in_count << 8) / bytes_out; /* 8 fractional bits */
}
if (rat > ratio ){
ratio = rat;
}
else {
ratio = 0;
#ifdef DEBUG
if(verbose)
dump_tab(); /* dump string table */
#endif
cl_hash ((count_int)hsize );
free_ent = FIRST;
clear_flg = 1;
output ((code_int)CLEAR );
#ifdef DEBUG
if(debug)
fprintf (stderr,"clear\n" );
#endif /* DEBUG */
}
}
cl_hash(hsize) /* reset code table */
register count_int hsize;
{
register count_int *htab_p = htab+hsize;
register long i;
register long m1 = -1;
i = hsize - 16;
do { /* might use Sys V memset(3) here */
*(htab_p-16)= m1;
*(htab_p-15)= m1;
*(htab_p-14)= m1;
*(htab_p-13)= m1;
*(htab_p-12)= m1;
*(htab_p-11)= m1;
*(htab_p-10)= m1;
*(htab_p-9)= m1;
*(htab_p-8)= m1;
*(htab_p-7)= m1;
*(htab_p-6)= m1;
*(htab_p-5)= m1;
*(htab_p-4)= m1;
*(htab_p-3)= m1;
*(htab_p-2)= m1;
*(htab_p-1)= m1;
htab_p -= 16;
} while ((i -= 16) >= 0);
for (i += 16; i > 0; i--)
*--htab_p = m1;
}
#ifdef DEBUG
prratio(stream,num,den)
FILE *stream;
long int num,den;
{
register int q; /* Doesn't need to be long */
if(num > 214748L){ /* 2147483647/10000 */
q = num / (den / 10000L);
}
else {
q = (10000L*num)/ den; /* Long calculations, though */
}
if (q < 0){
putc('-',stream);
q = -q;
}
fprintf(stream,"%d.%02d%%",q / 100,q % 100);
}
#endif
/* Flush the output buffer. */
flushbuf()
{
if (Write(outfile, buffer, outbufcount) != outbufcount) {
status = IoErr();
}
outbufcount = 0;
outbufptr = buffer;
}
/* Put a character into the output buffer. If the buffer is full,
* output the buffer, then reset its count to zero.
* Called with:
* c: character to be output
* Returns:
* status code (0 => OK)
*/
int
putcbuf(c)
{
if (outbufcount >= bufsize) flushbuf();
*outbufptr++ = c;
++outbufcount;
return status;
}
/* Put multiple characters in the output buffer.
* Called with:
* buf: address of character array
* count: number of characters to output
* Returns:
* status
*/
putmcbuf(buf, count)
char *buf; int count;
{
for (; count; --count) if ( putcbuf( *buf++ ) ) break;
return status;
}
/* Read 1 character from the input file.
* Returns:
* character code or -1 (EOF)
*/
int
ReadChar()
{
again:
if (inbufsize == 0) {
if (endInput)
return EOF;
if ((inbufsize = Read(infile, inbuf, INBUFSIZE)) == -1L) {
status = IoErr();
return EOF;
}
inbufptr = inbuf; /* reset buffer pointer */
if (inbufsize < INBUFSIZE) {
endInput = true;
goto again;
}
}
++in_count;
--inbufsize;
return (*inbufptr++);
}