home *** CD-ROM | disk | FTP | other *** search
/ Power-Programmierung / CD2.mdf / c / library / dos / packer / c_lzw / lzw.c
Encoding:
C/C++ Source or Header  |  1992-02-10  |  19.6 KB  |  582 lines

  1. /*
  2.  *
  3.  *
  4.  */
  5.  
  6. #ifndef TYPEDEF
  7. typedef long int                count_int;
  8. typedef short int               code_int;
  9. typedef unsigned short int      ucode_int;
  10. typedef long int                buff_int;
  11. typedef unsigned char           char_type;
  12. #endif
  13.  
  14. #define CHK_POINT 1000
  15. #define HSIZE 5003
  16. #define LZ_MAXBITS    12
  17. #define MAXMAXCODE (short int)(1<<LZ_MAXBITS)   /* should NEVER generate this code */
  18. #define BLCOMP  0x80                            /* if 0 htable won't be refresed
  19.                                          automaticaly */
  20.  
  21. typedef struct {
  22.         code_int  n_bits;                    /* number of bits/code */
  23.         code_int  maxcode;                   /* maximum code, given n_bits */
  24.         count_int htab[HSIZE];
  25.         unsigned short  codetab╒HSIZEσ;
  26.         code_int hsize_reg;
  27.         code_int hshift;
  28.         code_int free_ent ;                  /* first unused entry */
  29.  
  30.         code_int   clear_flg ;               /*flag to clear tables */
  31.         count_int  ratio ;
  32.         count_int  checkpoint;
  33.  
  34.         count_int    in_count ;              /* length of input record */
  35.         count_int    bytes_out_all;          /* count all bytes in file */
  36.         count_int    out_count;              /* # of codes output  */
  37.  
  38.         short int    new_file_flag;          /* if 0 refresh tables and turn to 1*/
  39.                                             /* mast be set while lzopen()
  40. */
  41.         code_int  offset;                       /* do not warry, it's for bits packeges */
  42.         buff_int bytes_out;                     /* length of compressed output */
  43.         buff_int bytes_in;
  44.  
  45. }  LZ_COMP;
  46.  
  47. /*=====================================================================================*
  48. *Functions:                                                                            *
  49. *  void lzopen (&hendle) - allocate memory for compress process               *
  50. *                                                                                      *
  51. *  void lzcomp (p_in,&bytes_in, p_out, &bytes_out, handle) -                       *
  52. *           compress p-in which is the length of bytes_in in p_out which                     *
  53. *           is the length of bytes_out                                                       *
  54. *                                                                                      *
  55. *  void lzdecomp (p_out, &bytes_out, p_in, &bites_in, handle) -                     *
  56. *            decompress p_out which is the length of bytes_out in p_in which           *
  57. *            is the length of bytes_in                                                 *
  58. *                                                                                      *
  59. *Parameters:                                                                           *
  60. *  LZ_COMP *handle     - pointer to tables of current compression stream                        *
  61. *                                                                                      *
  62. *  char_type *p_in - pointer to input record to be compressed for compression          *
  63. *  function pointer on decompressed record for decompression function                  *
  64. *                                                                                      *
  65. *  unsigned short bytes_in - nuber of bytes in the p_in record                         *
  66. *                                                                                      *
  67. *  char_type *p_out    - pointer on compressed record in compressed and                *
  68. *  decompressed functions                                                                *
  69. *                                                                                      *
  70. *  unsigned short bytes_out- nuber of bytes in the p_out record                        *
  71. *                                                                                      *
  72. *Do not forget:                                                                        *
  73. *                                                                                      *
  74. *#include <stdio.h>                                                                    *
  75. *#include <string.h>                                                                   *
  76. *#include "lz.h"                                                                       *
  77. *                                                                                      *
  78. *======================================================================================*/
  79. /*------------------------------------------------------------------*/
  80.  
  81. lzopen(handle)
  82. LZ_COMP **handle;
  83.  {
  84.         *handle=(LZ_COMP *)malloc(sizeof(LZ_COMP));
  85.         (*handle)->new_file_flag=1;
  86.         return;
  87.  }
  88.  
  89. /*------------------------------------------------------------------*/
  90.  
  91. lzclose(handle)
  92. LZ_COMP *handle;
  93. {
  94.         free(handle);
  95.         return;
  96.  }
  97.  
  98. /*------------------------------------------------------------------*/
  99. lzcomp(p_in, in_length, p_out, out_length, handle)
  100.  
  101. char_type       *p_in,     *p_out;
  102. buff_int        *in_length, *out_length;
  103. LZ_COMP *handle;
  104. {
  105.  
  106.         long int        fcode;
  107.  
  108.         register code_int  disp;
  109.         register code_int  c;
  110.         static code_int  ent;
  111.         register int    i ;
  112.  
  113.         handle->bytes_in = *in_length;
  114.  
  115.  
  116.   if( handle->new_file_flag == 1 ){   /* if first record - refresh tables */
  117.  
  118.         handle->new_file_flag=0;
  119.  
  120.                 handle->offset = 0;
  121.                 handle->n_bits = 9;
  122.                 handle->bytes_out_all = 0;
  123.                 handle->out_count = 0;
  124.                 handle->clear_flg = 0;
  125.                 handle->ratio = 0;
  126.                 handle->in_count = 1;
  127.                 handle->checkpoint =  CHK_POINT;
  128.                 handle->maxcode = ((1 << (handle->n_bits = 9)) - 1);
  129.                 handle->free_ent = ((BLCOMP) ? 257 : 256 );
  130.  
  131.                 handle->hshift = 0;
  132.                 for ( fcode = (long) HSIZE; fcode < 65536L; fcode *= 2L )
  133.                         handle->hshift++;
  134.                 handle->hshift = 8 - handle->hshift;            /* set hash code range bound */
  135.  
  136.                 handle->hsize_reg = HSIZE;
  137.                 cl_hash(handle);             /* clear hash table */
  138.  
  139.         }
  140.  
  141.         ent = *p_in++;
  142.         handle->bytes_in--;
  143.         handle->bytes_out = 0;
  144.  
  145.         while (handle->bytes_in-- > 0) {
  146.  
  147.                 c = *p_in++;
  148.                 handle->in_count++;
  149.                 fcode = (long) (((long) c << LZ_MAXBITS) + ent);
  150.                 i = ((c << handle->hshift) ^ ent);      /* xor hashing */
  151.  
  152.                 if ( handle->htab╒iσ == fcode ) {
  153.                         ent = handle->codetab╒iσ;
  154.                         continue;
  155.                 } else if ( (long)handle->htab╒iσ < 0 ) /* empty slot */
  156.                         goto nomatch;
  157.                 disp = handle->hsize_reg - i;           /* secondary hash (after G. Knott) */
  158.                 if ( i == 0 )
  159.                         disp = 1;
  160. probe:
  161.                 if ( (i -= disp) < 0 )
  162.                         i += handle->hsize_reg;
  163.  
  164.                 if ( handle->htab╒iσ == fcode ) {
  165.                         ent = handle->codetab╒iσ;
  166.                         continue;
  167.                 }
  168.                 if ( (long)handle->htab╒iσ > 0 )
  169.                         goto probe;
  170. nomatch:
  171.                 p_out += output (p_out, (code_int) ent , handle);
  172.                 handle->out_count++;
  173.                 ent = c;
  174.  
  175.                 if ( handle->free_ent < MAXMAXCODE ) {
  176.                         handle->codetab╒iσ = handle->free_ent++;        /* code -> hashtable */
  177.                         handle->htab╒iσ = fcode;
  178.                 } else if ( ((count_int)handle->in_count >= handle->checkpoint) &&
  179.                     BLCOMP )
  180.                         p_out += cl_block (p_out, handle);
  181.         }
  182.  
  183.         /* Put out the final code. */
  184.         p_out += output(p_out, (code_int)ent , handle);
  185.         handle->out_count++;
  186.         p_out += output(p_out, (code_int) - 1 , handle);
  187.         handle->bytes_out_all += handle->bytes_out;
  188.  
  189.         *out_length=handle->bytes_out;
  190.         return;
  191. }
  192.  
  193.  
  194. /*****************************************************************/
  195.  
  196. output(p_out, code, handle)
  197. char_type  *p_out;
  198. code_int   code;
  199. LZ_COMP *handle;
  200. {
  201.  
  202. static char_type        buf╒12σ;
  203.  
  204. static char_type lmask╒9σ = {
  205.         0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00            };
  206.  
  207. static char_type rmask╒9σ = {
  208.         0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff            };
  209.  
  210.  
  211.         register code_int       r_off = handle->offset;
  212.         code_int bits = handle->n_bits;
  213.         register char_type      *bp = p_out;
  214.  
  215.         register buff_int       nw = 0;
  216.  
  217.         if ( code >= 0 ) {
  218.                 /*
  219.  * Get to the first byte.
  220.  */
  221.                 bp += (r_off >> 3);
  222.                 r_off &= 7;
  223.                 /*
  224.  * Since code is always >= 8 bits, only need to mask the first
  225.  * hunk on the left.
  226.  */
  227.                 *bp = (*bp & rmask╒r_offσ) | (code << r_off) &
  228.                     lmask╒r_offσ;
  229.                 bp++;
  230.                 bits -= (8 - r_off);
  231.                 code >>= 8 - r_off;
  232.                 /* Get any 8 bit parts in the middle  */
  233.                 if ( bits >= 8 ) {
  234.                         *bp++ = code;
  235.                         code >>= 8;
  236.                         bits -= 8;
  237.                 }
  238.                 /* Last bits. */
  239.                 if (bits)
  240.                         *bp = code;
  241.                 handle->offset += handle->n_bits;
  242.                 if ( handle->offset == (handle->n_bits << 3) ) {
  243.                         bp = buf;
  244.                         bits = handle->n_bits;
  245.                         handle->bytes_out += bits;
  246.                         nw = bits;
  247.                         handle->offset = 0;
  248.                 }
  249.  
  250.                 /*
  251.  * If the next entry is going to be too big for the code size,
  252.  * then increase it, if possible.
  253.  */
  254.                 if ( handle->free_ent > handle->maxcode || (handle->clear_flg > 0)) {
  255.                         /*
  256.  * Write the whole buffer, because the input side won't
  257.  * discover the size increase until after it has read it.
  258.  */
  259.                         if ( handle->offset > 0 ) {
  260.                                 nw = handle->n_bits;
  261.                                 handle->bytes_out += handle->n_bits;
  262.                         }
  263.                         handle->offset = 0;
  264.  
  265.                         if ( handle->clear_flg ) {
  266.                                 handle->maxcode = ((1 << (handle->n_bits = 9)) - 1);
  267.                                 handle->clear_flg = 0;
  268.                         } else {
  269.                                 handle->n_bits++;
  270.                                 if ( handle->n_bits == LZ_MAXBITS )
  271.                                         handle->maxcode = MAXMAXCODE;
  272.                                 else
  273.                                         handle->maxcode = ((1 << (handle->n_bits)) - 1);
  274.                         }
  275.  
  276.                 }
  277.         } else {
  278.                 /*
  279.  * Write the rest of the buffer.
  280.  */
  281.                 if ( handle->offset > 0 ) {
  282.                         handle->bytes_out += (handle->offset + 7) / 8;
  283.                         nw = (handle->offset + 7) / 8;
  284.                 }
  285.                 handle->offset = 0;
  286.  
  287.         }
  288.         return(nw);
  289. }
  290.  
  291.  
  292. /*----------------------------------------------------------------*/
  293.  
  294. lzdecomp(p_in, in_length, p_out, out_length, handle)
  295. char_type       *p_in, *p_out;
  296. buff_int        *in_length,*out_length;
  297.  
  298. LZ_COMP *handle;
  299.  
  300. {
  301.         static char_type *stackp;
  302.         code_int finchar, oldcode;
  303.         code_int code;
  304.         code_int incode;
  305.         int     stack_size = 0;
  306.         int     i;
  307.  
  308.         handle->bytes_out = 0;
  309.  
  310.  if(handle->new_file_flag== 1){
  311.  
  312.        handle->new_file_flag=0;
  313.        handle->offset = 0;
  314.  
  315.                 /*
  316.  * As above, initialize the first 256 entries in the table.
  317.  */
  318.                 handle->maxcode = ((1 << (handle->n_bits = 9)) - 1);
  319.                 for ( code = 256; code > 0; code--) {
  320.  
  321.                         handle->codetab╒code-1σ = 0;
  322.                         ((char_type * )(handle->htab))╒code-1σ = (char_type)(code - 1);
  323.                 }
  324.                 handle->free_ent = ((BLCOMP) ? 257 : 256 );
  325.  
  326.  
  327.         }
  328.  
  329.         handle->bytes_in = *in_length;
  330.  
  331.         code = getcode(handle,&p_in);
  332.         incode = code;
  333.         stackp = p_out;
  334.  
  335.         while ( code >= 256 ) {
  336.                 *stackp++ = ((char_type * )(handle->htab))╒codeσ;
  337.                 stack_size++;
  338.                 code = handle->codetab╒codeσ;
  339.         }
  340.         *stackp++ = finchar = ((char_type * )(handle->htab))╒codeσ;
  341.         stack_size++;
  342.  
  343.         for (i = 0; i < (stack_size + 1) / 2; i++) {
  344.                 char_type ch;
  345.  
  346.                 ch = *(p_out + i);
  347.                 *(p_out + i) = *(stackp - i - 1);
  348.                 *(stackp - i - 1) = ch;
  349.         }
  350.         handle->bytes_out += stack_size;
  351.         stack_size = 0;
  352.         p_out = stackp;
  353.  
  354.         oldcode = incode;
  355.  
  356.         while ( (code = getcode(handle,&p_in)) > -1 ) {
  357.  
  358.  
  359.                 if ( (code == 256) && BLCOMP ) {
  360.                         for ( code = 255; code >= 0; code--)
  361.                                 handle->codetab╒codeσ = 0;
  362.                         handle->clear_flg = 1;
  363.                         handle->free_ent = 256;
  364.  
  365.                         if ( (code = getcode (handle,&p_in)) <= -1 )
  366.                                 /* O, untimely death! */
  367.                                 break;
  368.                 }
  369.                 incode = code;
  370.                 /*
  371.  * Special case for KwKwK string.
  372.  */
  373.                 if ( code >= handle->free_ent ) {
  374.                         *stackp++ = finchar;
  375.                         stack_size++;
  376.                         code = oldcode;
  377.                 }
  378.  
  379.                 /*
  380.  * Generate output characters in reverse order
  381.  */
  382.  
  383.                 while ( code >= 256 ) {
  384.                         *stackp++ = ((char_type * )(handle->htab))╒codeσ;
  385.                         stack_size++;
  386.                         code = handle->codetab╒codeσ;
  387.                 }
  388.                 *stackp++ = finchar = ((char_type * )(handle->htab))╒codeσ;
  389.                 stack_size++;
  390.  
  391.                 /*
  392.  * And put them out in forward order
  393.  */
  394.  
  395.                 for (i = 0; i < (stack_size + 1) / 2; i++) {
  396.                         char_type ch;
  397.  
  398.                         ch = *(p_out + i);
  399.                         *(p_out + i) = *(stackp - i - 1);
  400.                         *(stackp - i - 1) = ch;
  401.                 }
  402.                 handle->bytes_out += stack_size;
  403.                 stack_size = 0;
  404.                 p_out = stackp;
  405.  
  406.                 /*
  407.  * Generate the new entry.
  408.  */
  409.  
  410.                 if ( (code = handle->free_ent) < MAXMAXCODE ) {
  411.  
  412.                         handle->codetab╒codeσ = (unsigned short)oldcode;
  413.  
  414.                         ((char_type * )(handle->htab))╒codeσ = finchar;
  415.  
  416.                         handle->free_ent = code + 1;
  417.                 }
  418.                 /*
  419.  * Remember previous code.
  420.  */
  421.                 oldcode = incode;
  422.         }
  423.  
  424.         *out_length=handle->bytes_out;
  425.         return;
  426.  
  427. }
  428.  
  429.  
  430. /******************************************************************/
  431. getcode(handle,pp_in)
  432. LZ_COMP *handle;
  433. char_type **pp_in;
  434.  
  435. {
  436. static char_type        buf╒12σ;
  437. static char_type rmask╒9σ = {
  438.         0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff            };
  439.  
  440.         register code_int code;
  441.         register char_type *bp = buf;
  442.  
  443.         static code_int  size = 0;
  444.         static code_int r_off, bits;
  445.  
  446.         if ( handle->clear_flg > 0 || handle->offset >= size || handle->free_ent >
  447.             handle->maxcode ) {
  448.                 /*
  449.  * If the next entry will be too big for the current code
  450.  * size, then we must increase the size.  This implies reading
  451.  * a new buffer full, too.
  452.  */
  453.                 if ( handle->free_ent > handle->maxcode ) {
  454.                         handle->n_bits++;
  455.                         if ( handle->n_bits == LZ_MAXBITS )
  456.                                 handle->maxcode = MAXMAXCODE;
  457.                                 /* won't get any bigger now */
  458.                         else
  459.                                 handle->maxcode = ((1 << (handle->n_bits)) - 1);
  460.                 }
  461.                 if ( handle->clear_flg > 0) {
  462.                         handle->maxcode = ((1 << (handle->n_bits = 9)) - 1);
  463.                         handle->clear_flg = 0;
  464.  
  465.                 }
  466.  
  467.  
  468.                 if ( handle->bytes_in == 0)
  469.                         return - 1;
  470.  
  471.                 if ( handle->n_bits <= handle->bytes_in ) {
  472.  
  473.                         bp = buf;
  474.  
  475.                         (void) memcpy(bp,*pp_in, handle->n_bits);
  476.                         size= handle->n_bits;
  477.                         *pp_in+=size;
  478.                         handle->bytes_in-=handle->n_bits;
  479.  
  480.  
  481.                 }  else {
  482.  
  483.                         bp = buf;
  484.                         (void) memcpy(bp,*pp_in, handle->bytes_in);
  485.                         size= handle->bytes_in;
  486.                         *pp_in+=size;
  487.                         handle->bytes_in=0;
  488.  
  489.                 }
  490.  
  491.                 handle->offset = 0;
  492.                 /* Round size down to integral number of codes */
  493.                 size = (size << 3) - (handle->n_bits - 1);
  494.         }
  495.         r_off = handle->offset;
  496.         bits = handle->n_bits;
  497.         /*
  498.          * Get to the first byte.
  499.          */
  500.         bp += (r_off >> 3);
  501.         r_off &= 7;
  502.         /* Get first part (low order bits) */
  503.  
  504.         code = (*bp++ >> r_off);
  505.         bits -= (8 - r_off);
  506.         r_off = 8 - r_off;              /* now, offset into code word */
  507.         /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
  508.         if ( bits >= 8 ) {
  509.  
  510.                 code |= *bp++ << r_off;
  511.                 r_off += 8;
  512.                 bits -= 8;
  513.         }
  514.         /* high order bits. */
  515.         code |= (*bp & rmask╒bitsσ) << r_off;
  516.         handle->offset += handle->n_bits;
  517.  
  518.         return code;
  519. }
  520.  
  521.  
  522. /*------------------------------------------------------------------*/
  523.  
  524. cl_block (p_out, handle)         /* table clear for block compress */
  525. char_type  *p_out;
  526. LZ_COMP *handle;
  527. {
  528.         register long int       rat;
  529.  
  530.         buff_int nw = 0;
  531.  
  532.         handle->checkpoint = handle->in_count + CHK_POINT;
  533.  
  534.  
  535.         if (handle->in_count > 0x007fffff) {    /* shift will overflow */
  536.                 rat = (handle->bytes_out_all + handle->bytes_out) >> 8;
  537.                 if (rat == 0) {         /* Don't divide by zero */
  538.                         rat = 0x7fffffff;
  539.                 } else {
  540.                         rat = handle->in_count / rat;
  541.                 }
  542.         } else {
  543.                 rat = (handle->in_count << 8) / (handle->bytes_out_all + handle->bytes_out);
  544.                 /* 8 fractional bits */
  545.         }
  546.  
  547.  
  548.         if ( rat > handle->ratio ) {
  549.                 handle->ratio = rat;
  550.                 nw = 0;
  551.  
  552.         } else {
  553.                 handle->ratio = 0;
  554.  
  555.                 cl_hash ( handle );
  556.                 handle->free_ent = 257;
  557.                 handle->clear_flg = 1;
  558.                 putchar('/');
  559.                 nw = output ( p_out, (code_int) 256, handle );
  560.  
  561.         }
  562.         return nw;
  563. }
  564.  
  565.  
  566. /*------------------------------------------------------------------*/
  567. cl_hash(handle)           /* reset code table */
  568. LZ_COMP *handle;
  569. {
  570.  
  571.  
  572.         register count_int *htab_p = handle->htab;
  573.         register long   i;
  574.         register long   m1 = -1;
  575.  
  576.  
  577.         for (i = 0; i < HSIZE; i++)
  578.                 *htab_p++ = m1;
  579.  
  580. }
  581.  
  582.