Fresh Fish 5

home *** CD-ROM | disk | FTP | other *** search

/ Fresh Fish 5 / FreshFish_July-August1994.bin / bbs / gnu / gzip-1.2.4-src.lha / src / amiga / gzip-1.2.4 / deflate.c < prev next >

Wrap

C/C++ Source or Header | 1993-08-13 | 29KB | 764 lines

/* deflate.c -- compress data using the deflation algorithm * Copyright (C) 1992-1993 Jean-loup Gailly * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. */ /* * PURPOSE * * Identify new text as repetitions of old text within a fixed- * length sliding window trailing behind the new text. * * DISCUSSION * * The "deflation" process depends on being able to identify portions * of the input text which are identical to earlier input (within a * sliding window trailing behind the input currently being processed). * * The most straightforward technique turns out to be the fastest for * most input files: try all possible matches and select the longest. * The key feature of this algorithm is that insertions into the string * dictionary are very simple and thus fast, and deletions are avoided * completely. Insertions are performed at each input character, whereas * string matches are performed only when the previous match ends. So it * is preferable to spend more time in matches to allow very fast string * insertions and avoid deletions. The matching algorithm for small * strings is inspired from that of Rabin & Karp. A brute force approach * is used to find longer strings when a small match has been found. * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze * (by Leonid Broukhis). * A previous version of this file used a more sophisticated algorithm * (by Fiala and Greene) which is guaranteed to run in linear amortized * time, but has a larger average cost, uses more memory and is patented. * However the F&G algorithm may be faster for some highly redundant * files if the parameter max_chain_length (described below) is too large. * * ACKNOWLEDGEMENTS * * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and * I found it in 'freeze' written by Leonid Broukhis. * Thanks to many info-zippers for bug reports and testing. * * REFERENCES * * APPNOTE.TXT documentation file in PKZIP 1.93a distribution. * * A description of the Rabin and Karp algorithm is given in the book * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. * * Fiala,E.R., and Greene,D.H. * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 * * INTERFACE * * void lm_init (int pack_level, ush *flags) * Initialize the "longest match" routines for a new file * * ulg deflate (void) * Processes a new input file and return its compressed length. Sets * the compressed length, crc, deflate flags and internal file * attributes. */ #include <stdio.h> #include "tailor.h" #include "gzip.h" #include "lzw.h" /* just for consistency checking */ #ifdef RCSID static char rcsid[] = "$Id: deflate.c,v 0.15 1993/06/24 10:53:53 jloup Exp $"; #endif /* =========================================================================== * Configuration parameters */ /* Compile with MEDIUM_MEM to reduce the memory requirements or * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the * entire input file can be held in memory (not possible on 16 bit systems). * Warning: defining these symbols affects HASH_BITS (see below) and thus * affects the compression ratio. The compressed output * is still correct, and might even be smaller in some cases. */ #ifdef SMALL_MEM # define HASH_BITS 13 /* Number of bits used to hash strings */ #endif #ifdef MEDIUM_MEM # define HASH_BITS 14 #endif #ifndef HASH_BITS # define HASH_BITS 15 /* For portability to 16 bit machines, do not use values above 15. */ #endif /* To save space (see unlzw.c), we overlay prev+head with tab_prefix and * window with tab_suffix. Check that we can do this: */ #if (WSIZE<<1) > (1<<BITS) error: cannot overlay window with tab_suffix and prev with tab_prefix0 #endif #if HASH_BITS > BITS-1 error: cannot overlay head with tab_prefix1 #endif #define HASH_SIZE (unsigned)(1<<HASH_BITS) #define HASH_MASK (HASH_SIZE-1) #define WMASK (WSIZE-1) /* HASH_SIZE and WSIZE must be powers of two */ #define NIL 0 /* Tail of hash chains */ #define FAST 4 #define SLOW 2 /* speed options for the general purpose bit flag */ #ifndef TOO_FAR # define TOO_FAR 4096 #endif /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ /* =========================================================================== * Local data used by the "longest match" routines. */ typedef ush Pos; typedef unsigned IPos; /* A Pos is an index in the character window. We use short instead of int to * save space in the various tables. IPos is used only for parameter passing. */ /* DECLARE(uch, window, 2L*WSIZE); */ /* Sliding window. Input bytes are read into the second half of the window, * and move to the first half later to keep a dictionary of at least WSIZE * bytes. With this organization, matches are limited to a distance of * WSIZE-MAX_MATCH bytes, but this ensures that IO is always * performed with a length multiple of the block size. Also, it limits * the window size to 64K, which is quite useful on MSDOS. * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would * be less efficient). */ /* DECLARE(Pos, prev, WSIZE); */ /* Link to older string with same hash index. To limit the size of this * array to 64K, this link is maintained only for the last 32K strings. * An index in this array is thus a window index modulo 32K. */ /* DECLARE(Pos, head, 1<<HASH_BITS); */ /* Heads of the hash chains or NIL. */ ulg window_size = (ulg)2*WSIZE; /* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the * input file length plus MIN_LOOKAHEAD. */ long block_start; /* window position at the beginning of the current output block. Gets * negative when the window is moved backwards. */ local unsigned ins_h; /* hash index of string to be inserted */ #define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH) /* Number of bits by which ins_h and del_h must be shifted at each * input step. It must be such that after MIN_MATCH steps, the oldest * byte no longer takes part in the hash key, that is: * H_SHIFT * MIN_MATCH >= HASH_BITS */ unsigned int near prev_length; /* Length of the best match at previous step. Matches not greater than this * are discarded. This is used in the lazy match evaluation. */ unsigned near strstart; /* start of string to insert */ unsigned near match_start; /* start of matching string */ local int eofile; /* flag set at end of input file */ local unsigned lookahead; /* number of valid bytes ahead in window */ unsigned near max_chain_length; /* To speed up deflation, hash chains are never searched beyond this length. * A higher limit improves compression ratio but degrades the speed. */ local unsigned int max_lazy_match; /* Attempt to find a better match only when the current match is strictly * smaller than this value. This mechanism is used only for compression * levels >= 4. */ #define max_insert_length max_lazy_match /* Insert new strings in the hash table only if the match length * is not greater than this length. This saves time but degrades compression. * max_insert_length is used only for compression levels <= 3. */ local int compr_level; /* compression level (1..9) */ unsigned near good_match; /* Use a faster search when the previous match is longer than this */ /* Values for max_lazy_match, good_match and max_chain_length, depending on * the desired pack level (0..9). The values given below have been tuned to * exclude worst case performance for pathological files. Better values may be * found for specific files. */ typedef struct config { ush good_length; /* reduce lazy search above this match length */ ush max_lazy; /* do not perform lazy search above this match length */ ush nice_length; /* quit search above this match length */ ush max_chain; } config; #ifdef FULL_SEARCH # de