Simtel MSDOS 1992 June

home *** CD-ROM | disk | FTP | other *** search

/ Simtel MSDOS 1992 June / SIMTEL_0692.cdr / msdos / ddjmag / ddj8910.arc / NELSON.LST < prev next >

Wrap

File List | 1989-09-07 | 10.1 KB | 307 lines

_LZW Data Compression_ by Mark R. Nelson [LISTING ONE] /**************************************************************************** ** LZW data compression/expansion demonstration program. ** Mark R. Nelson *****************************************************************************/ #include <stdio.h> #define BITS 12 /* Setting the number of bits to 12, 13 */ #define HASHING_SHIFT BITS-8 /* or 14 affects several constants. */ #define MAX_VALUE (1 << BITS) - 1 /* Note that MS-DOS machines need to */ #define MAX_CODE MAX_VALUE - 1 /* compile their code in large model if */ /* 14 bits are selected. */ #if BITS == 14 #define TABLE_SIZE 18041 /* The string table size needs to be a */ #endif /* prime number that is somwhat larger */ #if BITS == 13 /* than 2**BITS. */ #define TABLE_SIZE 9029 #endif #if BITS <= 12 #define TABLE_SIZE 5021 #endif void *malloc(); int *code_value; /* This is the code value array */ unsigned int *prefix_code; /* This array holds the prefix codes */ unsigned char *append_character; /* This array holds the appended chars */ unsigned char decode_stack[4000]; /* This array holds the decoded string */ /**************************************************************************** ** This program gets a file name from the command line. It compresses the ** file, placing its output in a file named test.lzw. It then expands ** test.lzw into test.out. Test.out should then be an exact duplicate of ** the input file. ****************************************************************************/ main(int argc, char *argv[]) { FILE *input_file; FILE *output_file; FILE *lzw_file; char input_file_name[81]; /* ** The three buffers are needed for the compression phase. */ code_value=malloc(TABLE_SIZE*sizeof(unsigned int)); prefix_code=malloc(TABLE_SIZE*sizeof(unsigned int)); append_character=malloc(TABLE_SIZE*sizeof(unsigned char)); if (code_value==NULL || prefix_code==NULL || append_character==NULL) { printf("Fatal error allocating table space!\n"); exit(); } /* ** Get the file name, open it up, and open up the lzw output file. */ if (argc>1) strcpy(input_file_name,argv[1]); else { printf("Input file name? "); scanf("%s",input_file_name); } input_file=fopen(input_file_name,"rb"); lzw_file=fopen("test.lzw","wb"); if (input_file==NULL || lzw_file==NULL) { printf("Fatal error opening files.\n"); exit(); }; /* ** Compress the file. */ compress(input_file,lzw_file); fclose(input_file); fclose(lzw_file); free(code_value); /* ** Now open the files for the expansion. */ lzw_file=fopen("test.lzw","rb"); output_file=fopen("test.out","wb"); if (lzw_file==NULL || output_file==NULL) { printf("Fatal error opening files.\n"); exit(); }; /* ** Expand the file. */ expand(lzw_file,output_file); fclose(lzw_file); fclose(output_file); free(prefix_code); free(append_character); } /* ** This is the compression routine. The code should be a fairly close ** match to the algorithm accompanying the article. ** */ compress(FILE *input,FILE *output) { unsigned int next_code; unsigned int character; unsigned int string_code; unsigned int index; int i; next_code=256; /* Next code is the next available string code*/ for (i=0;i<TABLE_SIZE;i++) /* Clear out the string table before starting */ code_value[i]=-1; i=0; printf("Compressing...\n"); string_code=getc(input); /* Get the first code*/ /* ** This is the main loop where it all happens. This loop runs util all of ** the input has been exhausted. Note that it stops adding codes to the ** table after all of the possible codes have been defined. */ while ((character=getc(input)) != (unsigned)EOF) { if (++i==1000) /* Print a * every 1000 */ { /* input characters. This */ i=0; /* is just a pacifier. */ printf("*"); } index=find_match(string_code,character); /* See if the string is in */ if (code_value[index] != -1) /* the table. If it is, */ string_code=code_value[index]; /* get the code value. If */ else /* the string is not in the*/ { /* table, try to add it. */ if (next_code <= MAX_CODE) { code_value[index]=next_code++; prefix_code[index]=string_code; append_character[index]=character; } output_code(output,string_code); /* When a string is found */ string_code=character; /* that is not in the table*/ } /* I output the last string*/ } /* after adding the new one*/ /* ** End of the main loop. */ output_code(output,string_code); /* Output the last code */ output_code(output,MAX_VALUE); /* Output the end of buffer code */ output_code(output,0); /* This code flushes the output buffer*/ printf("\n"); } /* ** This is the hashing routine. It tries to find a match for the prefix+char ** string in the string table. If it finds it, the index is returned. If ** the string is not found, the first available index in the string table is ** returned instead. */ find_match(int hash_prefix,unsigned int hash_character) { int index; int offset; index = (hash_character << HASHING_SHIFT) ^ hash_prefix; if (index == 0) offset = 1; else offset = TABLE_SIZE - index; while (1) { if (code_value[index] == -1) return(index); if (prefix_code[index] == hash_prefix && append_character[index] == hash_character) return(index); index -= offset; if (index < 0) index += TABLE_SIZE; } } /* ** This is the expansion routine. It takes an LZW format file, and expands ** it to an output file. The code here should be a fairly close match to ** the algorithm in the accompanying article. */ expand(FILE *input,FILE *output) { unsigned int next_code; unsigned int new_code; unsigned int old_code; int character; int counter; unsigned char *string; char *decode_string(unsigned char *buffer,unsigned int code); next_code=256; /* This is the next available code to define */ counter=0; /* Counter is used as a pacifier. */ printf("Expanding...\n"); old_code=input_code(input); /* Read in the first code, initialize the */ character=old_code; /* character variable, and send the first */ putc(old_code,output); /* code to the output file */ /* ** This is the main expansion loop. It reads in characters from the LZW file ** until it sees the special code used to inidicate the end of the data. */ while ((new_code=input_code(input)) != (MAX_VALUE)) { if (++counter==1000) /* This section of code prints out */ { /* an asterisk every 1000 characters*/ counter=0; /* It is just a pacifier. */ printf("*"); } /* ** This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING ** case which generates an undefined code. It handles it by decoding ** the last code, adding a single character to the end of the decode string. */ if (new_code>=next_code) { *decode_stack=character; string=decode_string(decode_stack+1,old_code); } /* ** Otherwise we do a straight decode of the new code. */ else string=decode_string(decode_stack,new_code); /* ** Now we output the decoded string in reverse order. */ character=*string; while (string >= decode_stack) putc(*string--,output); /* ** Finally, if possible, add a new code to the string table. */ if (next_code <= MAX_CODE) { prefix_code[next_code]=old_code; append_character[next_code]=character; next_code++; } old_code=new_code; } printf("\n"); } /* ** This routine simply decodes a string from the string table, storing ** it in a buffer. The buffer can then be output in reverse order by ** the expansion program. */ char *decode_string(unsigned char *buffer,unsigned int code) { int i; i=0; while (code > 255) { *buffer++ = append_character[code]; code=prefix_code[code]; if (i++>=4094) { printf("Fatal error during code expansion.\n"); exit(); } } *buffer=code; return(buffer); } /* ** The following two routines are used to output variable length ** codes. They are written strictly for clarity, and are not ** particularly efficient. */ input_code(FILE *input) { unsigned int return_value; static int input_bit_count=0; static unsigned long input_bit_buffer=0L; while (input_bit_count <= 24) { input_bit_buffer |= (unsigned long) getc(input) << (24-input_bit_count); input_bit_count += 8; } return_value=input_bit_buffer >> (32-BITS); input_bit_buffer <<= BITS; input_bit_count -= BITS; return(return_value); } output_code(FILE *output,unsigned int code) { static int output_bit_count=0; static unsigned long output_bit_buffer=0L; output_bit_buffer |= (unsigned long) code << (32-BITS-output_bit_count); output_bit_count += BITS; while (output_bit_count >= 8) { putc(output_bit_buffer >> 24,output); output_bit_buffer <<= 8; output_bit_count -= 8; } }