/* Basic LZW Data Compression program published in DDJ October 1989 issue.
* Original Author: Mark R. Nelson
* Updated by: Shawn M. Regan, January 1990
* Added: - Method to clear table when compression ratio degrades
* - Self adjusting code size capability (up to 14 bits)
* Updated functions are marked with "MODIFIED". main() has been updated also
* Compile with -ml (large model) for MAX_BITS == 14 only
*/
#include <stdio.h>
#define INIT_BITS 9
#define MAX_BITS 14 /* Do not exceed 14 with this program */
#define HASHING_SHIFT MAX_BITS - 8
#if MAX_BITS == 14 /* Set the table size. Must be a prime */
#define TABLE_SIZE 18041 /* number somewhat larger than 2^MAX_BITS.*/
#elif MAX_BITS == 13
#define TABLE_SIZE 9029
#else
#define TABLE_SIZE 5021
#endif
#define CLEAR_TABLE 256 /* Code to flush the string table */
#define TERMINATOR 257 /* To mark EOF Condition, instead of MAX_VALUE */
#define FIRST_CODE 258 /* First available code for code_value table */
#define CHECK_TIME 100 /* Check comp ratio every CHECK_TIME chars input */
#define MAXVAL(n) (( 1 <<( n )) -1) /* max_value formula macro */
unsigned input_code();
void *malloc();
int *code_value; /* This is the code value array */
unsigned int *prefix_code; /* This array holds the prefix codes */
unsigned char *append_character; /* This array holds the appended chars */
unsigned char decode_stack[4000]; /* This array holds the decoded string */
int num_bits=INIT_BITS; /* Starting with 9 bit codes */
unsigned long bytes_in=0,bytes_out=0; /* Used to monitor compression ratio */
int max_code; /* old MAX_CODE */
unsigned long checkpoint=CHECK_TIME; /* For compression ratio monitoring */
main(int argc, char *argv[])
{
FILE *input_file, *output_file, *lzw_file;
char input_file_name[81];
/* The three buffers for the compression phase. */
code_value=malloc(TABLE_SIZE*sizeof(unsigned int));
prefix_code=malloc(TABLE_SIZE*sizeof(unsigned int));
append_character=malloc(TABLE_SIZE*sizeof(unsigned char));
if (code_value==NULL || prefix_code==NULL || append_character==NULL) {
printf("Error allocating table space!\n");
exit(1);
}
/* Get the file name, open it, and open the LZW output file. */
if (argc>1)
strcpy(input_file_name,argv[1]);
else {
printf("Input file name: ");
scanf("%s",input_file_name);
}
input_file=fopen(input_file_name,"rb");
lzw_file=fopen("test.lzw","wb");
if (input_file == NULL || lzw_file == NULL) {
printf("Error opening files\n");
exit(1);
}
max_code = MAXVAL(num_bits); /* Initialize max_value & max_code */
compress(input_file,lzw_file); /* Call compression routine */
fclose(input_file);
fclose(lzw_file);
free(code_value); /* Needed only for compression */
lzw_file=fopen("test.lzw","rb");
output_file=fopen("test.out","wb");
if (lzw_file == NULL || output_file == NULL) {
printf("Error opening files\n");
exit(1);
}
num_bits=INIT_BITS; /* Re-initialize for expansion */
max_code = MAXVAL(num_bits);
expand(lzw_file,output_file); /* Call expansion routine */
fclose(lzw_file); /* Clean it all up */
fclose(output_file);
free(prefix_code);
free(append_character);
}
/* MODIFIED This is the new compression routine. The first two 9-bit codes
* have been reserved for communication between the compressor and expander.
*/
compress(FILE *input, FILE *output)
{
unsigned int next_code=FIRST_CODE;
unsigned int character;
unsigned int string_code;
unsigned int index;
int i, /* All purpose integer */
ratio_new, /* New compression ratio as a percentage */
ratio_old=100; /* Original ratio at 100% */
for (i=0;i<TABLE_SIZE;i++) /* Initialize the string table first */
code_value[i]=-1;
printf("Compressing\n");
string_code=getc(input); /* Get the first code */
/* This is the main compression loop. Notice when the table is full we try
* to increment the code size. Only when num_bits == MAX_BITS and the code
* value table is full do we start to monitor the compression ratio.
*/
while((character=getc(input)) != (unsigned)EOF) {
if (!(++bytes_in % 1000)) { /* Count input bytes and pacifier */
putchar('.');
}
index=find_match(string_code,character);
if (code_value[index] != -1)
string_code=code_value[index];
else {
if (next_code <= max_code ) {
code_value[index]=next_code++;
prefix_code[index]=string_code;
append_character[index]=character;
}
output_code(output,string_code); /* Send out current code */
string_code=character;
if (next_code > max_code) { /* Is table Full? */
if ( num_bits < MAX_BITS) { /* Any more bits? */
putchar('+');
max_code = MAXVAL(++num_bits); /* Increment code size then */
}
else if (bytes_in > checkpoint) { /* At checkpoint? */
if (num_bits == MAX_BITS ) {
ratio_new = bytes_out*100/bytes_in; /* New compression ratio */
if (ratio_new > ratio_old) { /* Has ratio degraded? */
output_code(output,CLEAR_TABLE); /* YES,flush string table */
putchar('C');
num_bits=INIT_BITS;
next_code=FIRST_CODE; /* Reset to FIRST_CODE */
max_code = MAXVAL(num_bits); /* Re-Initialize this stuff */
bytes_in = bytes_out = 0;
ratio_old=100; /* Reset compression ratio */
for (i=0;i<TABLE_SIZE;i++) /* Reset code value array */
code_value[i]=-1;
}
else /* NO, then save new */
ratio_old = ratio_new; /* compression ratio */
}
checkpoint = bytes_in + CHECK_TIME; /* Set new checkpoint */
}
}
}
}
output_code(output,string_code); /* Output the last code */
if (next_code == max_code) { /* Handles special case for bit */
++num_bits; /* increment on EOF */
putchar('+');
}
output_code(output,TERMINATOR); /* Output the end of buffer code */
output_code(output,0); /* Flush the output buffer */
output_code(output,0);
output_code(output,0);
putchar('\n');
}
/* UNCHANGED from original
* This is the hashing routine.
*/
find_match(int hash_prefix, unsigned int hash_character)
{
int index, offset;
index = (hash_character << HASHING_SHIFT ) ^ hash_prefix;
if (index == 0 )
offset=1;
else
offset = TABLE_SIZE - index;
while(1) {
if (code_value[index] == -1 )
return(index);
if (prefix_code[index] == hash_prefix &&
append_character[index] == hash_character)
return(index);
index -= offset;
if (index < 0)
index += TABLE_SIZE;
}
}
/* MODIFIED This is the modified expansion routine. It must now check for the
* CLEAR_TABLE code and know when to increment the code size.
*/
expand(FILE *input, FILE *output)
{
unsigned int next_code=FIRST_CODE;
unsigned int new_code;
unsigned int old_code;
int character,
counter=0,
clear_flag=1; /* Need to clear the code value array */
unsigned char *string;
char *decode_string(unsigned char *buffer, unsigned int code);
printf("Expanding\n");
while((new_code=input_code(input)) != TERMINATOR) {
if (clear_flag) { /* Initialize or Re-Initialize */
clear_flag=0;
old_code=new_code; /* The next three lines have been moved */
character=old_code; /* from the original */
putc(old_code,output);
continue;
}
if (new_code == CLEAR_TABLE) { /* Clear string table */
clear_flag=1;
num_bits=INIT_BITS;
next_code=FIRST_CODE;
putchar('C');
max_code = MAXVAL(num_bits);
continue;
}
if (++counter == 1000) { /* Pacifier */
counter=0;
putchar('.');
}
if (new_code >= next_code) { /* Check for string+char+string */
*decode_stack=character;
string=decode_string(decode_stack+1,old_code);
}
else
string=decode_string(decode_stack,new_code);
character = *string; /* Output decoded string in reverse */
while (string >= decode_stack)
putc(*string--,output);
if (next_code <= max_code) { /* Add to string table if not full */
prefix_code[next_code]=old_code;
append_character[next_code++]=character;
if (next_code == max_code && num_bits < MAX_BITS) {
putchar('+');
max_code = MAXVAL(++num_bits);
}
}
old_code=new_code;
}
putchar('\n');
}
/* UNCHANGED from original
* Decode a string from the string table, storing it in a buffer.
* The buffer can then be output in reverse order by the expansion
* program.
*/
char *decode_string(unsigned char *buffer, unsigned int code)
{
int i=0;
while(code > 255 ) {
*buffer++ = append_character[code];
code=prefix_code[code];
if (i++ >= 4000 ) {
printf("Error during code expansion\n");
exit(1);
}
}
*buffer=code;
return(buffer);
}
/* UNCHANGED from original
* Input a variable length code.
*/
unsigned input_code(FILE *input)
{
unsigned int return_value;
static int input_bit_count=0;
static unsigned long input_bit_buffer=0L;
while (input_bit_count <= 24 ) {
input_bit_buffer |= (unsigned long) getc(input) << (24 - input_bit_count);
input_bit_count += 8;
}
return_value=input_bit_buffer >> (32-num_bits);
input_bit_buffer <<= num_bits;
input_bit_count -= num_bits;
return(return_value);
}
/* MODIFIED Output a variable length code.
*/
output_code(FILE *output, unsigned int code)
{
static int output_bit_count=0;
static unsigned long output_bit_buffer=0L;
output_bit_buffer |= (unsigned long) code << (32 - num_bits -
output_bit_count);
output_bit_count += num_bits;
while (output_bit_count >= 8) {
putc(output_bit_buffer >> 24, output);
output_bit_buffer <<= 8;
output_bit_count -= 8;
bytes_out++; /* ADDED for compression monitoring */
}
}
unit LZRW;
interface
uses SysUtils, Classes;
{$IFDEF WIN32}
type
Int16 = SmallInt;
{$ELSE}
type
Int16 = Integer;
{$ENDIF}
const
BufferMaxSize = 32768;
BufferMax = BufferMaxSize - 1;
FLAG_Copied = $80;
FLAG_Compress = $40;
FSignature = 'LZRW';
type
BufferIndex = 0..BufferMax + 15;
BufferSize = 0..BufferMaxSize;
{ extra bytes needed here if compression fails *dh *}
BufferArray = array [BufferIndex] of BYTE;
BufferPtr = ^BufferArray;
ELzrw1KHCompressor = class(Exception);
function Compression(Source, Dest: BufferPtr; SourceSize: BufferSize): BufferSize;
function Decompression(Source, Dest: BufferPtr; SourceSize: BufferSize): BufferSize;
procedure CompressStream(InStream, OutStream: TStream; InSize: LongInt);
procedure DeCompressStream(InStream, OutStream: TStream);
implementation
type
HashTable = array [0..4095] of Int16;
HashTabPtr = ^Hashtable;
var
Hash: HashTabPtr;
{ check if this string has already been seen }
{ in the current 4 KB window }
function GetMatch(Source: BufferPtr; X: BufferIndex;
SourceSize: BufferSize; Hash: HashTabPtr; var Size: WORD;
var Pos: BufferIndex): BOOLEAN;
var
HashValue: WORD;
TmpHash: Int16;
begin
HashValue := (40543 * ((((Source^[X] shl 4) xor Source^[X + 1]) shl 4) xor
Source^[X + 2]) shr 4) and $0FFF;
Result := False;
TmpHash := Hash^[HashValue];
if (TmpHash <> -1) and (X - TmpHash < 4096) then
begin
Pos := TmpHash;
Size := 0;
while ((Size < 18) and (Source^[X + Size] = Source^[Pos + Size]) and
(X + Size < SourceSize)) do
begin
INC(Size);
end;
Result := (Size >= 3)
end;
Hash^[HashValue] := X
end;
{ compress a buffer of max. 32 KB }
function Compression(Source, Dest: BufferPtr;
SourceSize: BufferSize): BufferSize;
var
Bit, Command, Size: WORD;
Key: Word;
X, Y, Z, Pos: BufferIndex;
begin
FillChar(Hash^, SizeOf(Hashtable), $FF);
Dest^[0] := FLAG_Compress;
X := 0;
Y := 3;
Z := 1;
Bit := 0;
Command := 0;
while (X < SourceSize) and (Y <= SourceSize) do
begin
if (Bit > 15) then
begin
Dest^[Z] := HI(Command);
Dest^[Z + 1] := LO(Command);
Z := Y;
Bit := 0;
INC(Y, 2)
end;
Size := 1;
while ((Source^[X] = Source^[X + Size]) and (Size < $FFF) and
(X + Size < SourceSize)) do
begin
INC(Size);
end;
if (Size >= 16) then
begin
Dest^[Y] := 0;
Dest^[Y + 1] := HI(Size - 16);
Dest^[Y + 2] := LO(Size - 16);
Dest^[Y + 3] := Source^[X];
INC(Y, 4);
INC(X, Size);
Command := (Command shl 1) + 1;
end
else
begin { not size >= 16 }
if (GetMatch(Source, X, SourceSize, Hash, Size, Pos)) then
begin
Key := ((X - Pos) shl 4) + (Size - 3);
Dest^[Y] := HI(Key);
Dest^[Y + 1] := LO(Key);
INC(Y, 2);
INC(X, Size);
Command := (Command shl 1) + 1
end
else
begin
Dest^[Y] := Source^[X];
INC(Y);
INC(X);
Command := Command shl 1
end;
end; { size <= 16 }
INC(Bit);
end; { while x < sourcesize ... }
Command := Command shl (16 - Bit);
Dest^[Z] := HI(Command);
Dest^[Z + 1] := LO(Command);
if (Y > SourceSize) then
begin
MOVE(Source^[0], Dest^[1], SourceSize);
Dest^[0] := FLAG_Copied;
Y := SUCC(SourceSize)
end;
Result := Y
end;
{ decompress a buffer of max 32 KB }
function Decompression(Source, Dest: BufferPtr;
SourceSize: BufferSize): BufferSize;
var
X, Y, Pos: BufferIndex;
Command, Size, K: WORD;
Bit: BYTE;
SaveY: BufferIndex; { * dh * unsafe for-loop variable Y }
begin
if (Source^[0] = FLAG_Copied) then
begin
for Y := 1 to PRED(SourceSize) do
begin
Dest^[PRED(Y)] := Source^[Y];
SaveY := Y;
end;
Y := SaveY;
end
else
begin
Y := 0;
X := 3;
Command := (Source^[1] shl 8) + Source^[2];
Bit := 16;
while (X < SourceSize) do
begin
if (Bit = 0) then
begin
Command := (Source^[X] shl 8) + Source^[X + 1];
Bit := 16;
INC(X, 2)
end;
if ((Command and $8000) = 0) then
begin
Dest^[Y] := Source^[X];
INC(X);
INC(Y)
end
else
begin { command and $8000 }
Pos := ((Source^[X] shl 4) + (Source^[X + 1] shr 4));
if (Pos = 0) then
begin
Size := (Source^[X + 1] shl 8) + Source^[X + 2] + 15;
for K := 0 to Size do
begin
Dest^[Y + K] := Source^[X + 3];
end;
INC(X, 4);
INC(Y, Size + 1)
end
else
begin { pos = 0 }
Size := (Source^[X + 1] and $0F) + 2;
for K := 0 to Size do
Dest^[Y + K] := Dest^[Y - Pos + K];
INC(X, 2);
INC(Y, Size + 1)
end; { pos = 0 }
end; { command and $8000 }
Command := Command shl 1;
DEC(Bit)
end { while x < sourcesize }
end;
Result := Y
end; { decompression }
{
Unit "Finalization" as Delphi 2.0 would have it
}
procedure CompressStream(InStream, OutStream: TStream; InSize: LongInt);
var
InBuffer, OutBuffer: BufferArray;
CompressedSize, BytesRead, FinalPos, SizePos, TotalSize: LongInt;
begin
TotalSize := 0;
OutStream.WriteBuffer(FSignature, SizeOf(FSignature));
SizePos := OutStream.Position;
OutStream.WriteBuffer(TotalSize, SizeOf(TotalSize));
while InSize > 0 do
begin
BytesRead := InStream.Read(InBuffer, SizeOf(InBuffer));
CompressedSize := Compression(@InBuffer, @OutBuffer, BytesRead);
OutStream.WriteBuffer(CompressedSize, SizeOf(CompressedSize));
OutStream.WriteBuffer(OutBuffer, CompressedSize);
TotalSize := TotalSize + CompressedSize + SizeOf(CompressedSize);
InSize := InSize - BytesRead;
end;
FinalPos := OutStream.Position;
OutStream.Position := SizePos;
OutStream.WriteBuffer(TotalSize, SizeOf(TotalSize));
OutStream.Position := FinalPos;
end;
procedure DeCompressStream(InStream, OutStream: TStream);
var
InBuffer, OutBuffer: BufferArray;
CompressedSize, UnCompressedSize, InSize: LongInt;
Sig: array[0..SizeOf(FSignature) - 1] of Char;
begin
InStream.ReadBuffer(Sig, SizeOf(FSignature));
if Sig <> FSignature then
raise Exception.Create('Wrong file type');
InStream.ReadBuffer(InSize, SizeOf(InSize));
while InSize > 0 do
begin
InStream.ReadBuffer(CompressedSize, SizeOf(CompressedSize));
InStream.ReadBuffer(InBuffer, CompressedSize);
UnCompressedSize := DeCompression(@InBuffer, @OutBuffer, CompressedSize);
OutStream.WriteBuffer(OutBuffer, UnCompressedSize);
InSize := InSize - CompressedSize - SizeOf(CompressedSize);
end;
end;
var
ExitSave: Pointer;
procedure Cleanup; far;
begin
ExitProc := ExitSave;
if (Hash <> nil) then
Freemem(Hash, Sizeof(HashTable));
end;
initialization
Hash := nil;
try
Getmem(Hash, Sizeof(Hashtable));
except
raise ELzrw1KHCompressor.Create('LZRW1KH : no memory for HASH table');
end;
ExitSave := ExitProc;
ExitProc := @Cleanup;
end.
评论