登录  
 加关注
   显示下一条  |  关闭
温馨提示!由于新浪微博认证机制调整,您的新浪微博帐号绑定已过期,请重新绑定!立即重新绑定新浪微博》  |  关闭

BCB-DG's Blog

...

 
 
 

日志

 
 

Basic LZW Data Compression  

2008-11-25 14:11:53|  分类: Delphi |  标签: |举报 |字号 订阅

  下载LOFTER 我的照片书  |

/* Basic LZW Data Compression program published in DDJ October 1989 issue.
 * Original Author: Mark R. Nelson
 * Updated by: Shawn M. Regan, January 1990
 * Added: - Method to clear table when compression ratio degrades
 *        - Self adjusting code size capability (up to 14 bits)
 * Updated functions are marked with "MODIFIED". main() has been updated also
 * Compile with -ml (large model) for MAX_BITS == 14 only
 */

#include <stdio.h>

#define INIT_BITS 9
#define MAX_BITS 14           /* Do not exceed 14 with this program */
#define HASHING_SHIFT MAX_BITS - 8

#if MAX_BITS == 14            /* Set the table size. Must be a prime    */
#define TABLE_SIZE 18041      /* number somewhat larger than 2^MAX_BITS.*/
#elif MAX_BITS == 13
#define TABLE_SIZE 9029
#else
#define TABLE_SIZE 5021
#endif

#define CLEAR_TABLE 256    /* Code to flush the string table */
#define TERMINATOR  257    /* To mark EOF Condition, instead of MAX_VALUE */
#define FIRST_CODE  258    /* First available code for code_value table */
#define CHECK_TIME  100    /* Check comp ratio every CHECK_TIME chars input */

#define MAXVAL(n) (( 1 <<( n )) -1)   /* max_value formula macro */

unsigned input_code();
void *malloc();

int *code_value;                      /* This is the code value array */
unsigned int *prefix_code;            /* This array holds the prefix codes */
unsigned char *append_character;      /* This array holds the appended chars */
unsigned char decode_stack[4000];     /* This array holds the decoded string */

int num_bits=INIT_BITS;               /* Starting with 9 bit codes */
unsigned long bytes_in=0,bytes_out=0; /* Used to monitor compression ratio */
int max_code;                         /* old MAX_CODE */
unsigned long checkpoint=CHECK_TIME;  /* For compression ratio monitoring */

main(int argc, char *argv[])
{
   FILE *input_file, *output_file, *lzw_file;
   char input_file_name[81];
 /* The three buffers for the compression phase.  */
   code_value=malloc(TABLE_SIZE*sizeof(unsigned int));
   prefix_code=malloc(TABLE_SIZE*sizeof(unsigned int));
   append_character=malloc(TABLE_SIZE*sizeof(unsigned char));

   if (code_value==NULL || prefix_code==NULL || append_character==NULL) {
      printf("Error allocating table space!\n");
      exit(1);
   }
 /* Get the file name, open it, and open the LZW output file. */
   if (argc>1)
      strcpy(input_file_name,argv[1]);
   else {
      printf("Input file name: ");
      scanf("%s",input_file_name);
   }
   input_file=fopen(input_file_name,"rb");
   lzw_file=fopen("test.lzw","wb");
   if (input_file == NULL || lzw_file == NULL) {
      printf("Error opening files\n");
      exit(1);
   }
   max_code = MAXVAL(num_bits);     /* Initialize max_value & max_code */
   compress(input_file,lzw_file);       /* Call compression routine */

   fclose(input_file);
   fclose(lzw_file);
   free(code_value);                    /* Needed only for compression */

   lzw_file=fopen("test.lzw","rb");
   output_file=fopen("test.out","wb");
   if (lzw_file == NULL || output_file == NULL) {
      printf("Error opening files\n");
      exit(1);
   }
   num_bits=INIT_BITS;                  /* Re-initialize for expansion */
   max_code = MAXVAL(num_bits);
   expand(lzw_file,output_file);        /* Call expansion routine */

   fclose(lzw_file);                    /* Clean it all up */
   fclose(output_file);
   free(prefix_code);
   free(append_character);
}
/* MODIFIED This is the new compression routine. The first two 9-bit codes
 * have been reserved for communication between the compressor and expander.
 */
compress(FILE *input, FILE *output)
{
   unsigned int next_code=FIRST_CODE;
   unsigned int character;
   unsigned int string_code;
   unsigned int index;
   int i,             /* All purpose integer */
   ratio_new,         /* New compression ratio as a percentage */
   ratio_old=100;     /* Original ratio at 100% */

   for (i=0;i<TABLE_SIZE;i++)   /* Initialize the string table first */
      code_value[i]=-1;
   printf("Compressing\n");
   string_code=getc(input);     /* Get the first code */

 /* This is the main compression loop. Notice when the table is full we try
  * to increment the code size. Only when num_bits == MAX_BITS and the code
  * value table is full do we start to monitor the compression ratio.
  */
   while((character=getc(input)) != (unsigned)EOF) {
      if (!(++bytes_in % 1000)) {     /* Count input bytes and pacifier */
         putchar('.');
      }
      index=find_match(string_code,character);
      if (code_value[index] != -1)
         string_code=code_value[index];
      else {
         if (next_code <= max_code ) {
            code_value[index]=next_code++;
            prefix_code[index]=string_code;
            append_character[index]=character;
         }
         output_code(output,string_code);   /* Send out current code */
         string_code=character;
         if (next_code > max_code) {      /* Is table Full? */
            if ( num_bits < MAX_BITS) {     /* Any more bits? */
               putchar('+');
               max_code = MAXVAL(++num_bits);  /* Increment code size then */
            }
            else if (bytes_in > checkpoint) {         /* At checkpoint? */
               if (num_bits == MAX_BITS ) {
                ratio_new = bytes_out*100/bytes_in; /* New compression ratio */
                if (ratio_new > ratio_old) {        /* Has ratio degraded? */
                  output_code(output,CLEAR_TABLE); /* YES,flush string table */
                  putchar('C');
                  num_bits=INIT_BITS;
                  next_code=FIRST_CODE;        /* Reset to FIRST_CODE */
                  max_code = MAXVAL(num_bits); /* Re-Initialize this stuff */
                  bytes_in = bytes_out = 0;
                  ratio_old=100;               /* Reset compression ratio */
                  for (i=0;i<TABLE_SIZE;i++)   /* Reset code value array */
                        code_value[i]=-1;
               }
               else                                /* NO, then save new */
               ratio_old = ratio_new;            /* compression ratio */
            }
            checkpoint = bytes_in + CHECK_TIME;    /* Set new checkpoint */
            }
         }
      }
   }
   output_code(output,string_code);   /* Output the last code */
   if (next_code == max_code) {       /* Handles special case for bit */
      ++num_bits;                     /* increment on EOF */
      putchar('+');
   }
   output_code(output,TERMINATOR);    /* Output the end of buffer code */
   output_code(output,0);             /* Flush the output buffer */
   output_code(output,0);
   output_code(output,0);
   putchar('\n');
}
/* UNCHANGED from original
 * This is the hashing routine.
 */
find_match(int hash_prefix, unsigned int hash_character)
{
   int index, offset;

   index = (hash_character << HASHING_SHIFT ) ^ hash_prefix;
   if (index == 0 )
      offset=1;
   else
      offset = TABLE_SIZE - index;
   while(1) {
      if (code_value[index] == -1 )
         return(index);
      if (prefix_code[index] == hash_prefix &&
                                     append_character[index] == hash_character)
         return(index);
      index -= offset;
      if (index < 0)
         index += TABLE_SIZE;
   }
}

/* MODIFIED This is the modified expansion routine. It must now check for the
 * CLEAR_TABLE code and know when to increment the code size.
 */
expand(FILE *input, FILE *output)
{
   unsigned int next_code=FIRST_CODE;
   unsigned int new_code;
   unsigned int old_code;
   int character,
   counter=0,
   clear_flag=1;          /* Need to clear the code value array */
   unsigned char *string;
   char *decode_string(unsigned char *buffer, unsigned int code);

   printf("Expanding\n");

   while((new_code=input_code(input)) != TERMINATOR) {
      if (clear_flag) {       /* Initialize or Re-Initialize */
         clear_flag=0;
         old_code=new_code;   /* The next three lines have been moved */
         character=old_code;  /* from the original */
         putc(old_code,output);
         continue;
      }
      if (new_code == CLEAR_TABLE) {     /* Clear string table */
         clear_flag=1;
         num_bits=INIT_BITS;
         next_code=FIRST_CODE;
         putchar('C');
         max_code = MAXVAL(num_bits);
         continue;
      }
      if (++counter == 1000) {           /* Pacifier */
         counter=0;
         putchar('.');
      }
      if (new_code >= next_code) {       /* Check for string+char+string */
         *decode_stack=character;
         string=decode_string(decode_stack+1,old_code);
      }
      else
         string=decode_string(decode_stack,new_code);

      character = *string;              /* Output decoded string in reverse */
      while (string >= decode_stack)
         putc(*string--,output);

      if (next_code <= max_code) {      /* Add to string table if not full */
         prefix_code[next_code]=old_code;
         append_character[next_code++]=character;
         if (next_code == max_code && num_bits < MAX_BITS) {
            putchar('+');
            max_code = MAXVAL(++num_bits);
         }
      }
      old_code=new_code;
   }
   putchar('\n');
}
/* UNCHANGED from original
 * Decode a string from the string table, storing it in a buffer.
 * The buffer can then be output in reverse order by the expansion
 * program.
 */
char *decode_string(unsigned char *buffer, unsigned int code)
{
   int i=0;

   while(code > 255 ) {
      *buffer++ = append_character[code];
      code=prefix_code[code];
      if (i++ >= 4000 ) {
         printf("Error during code expansion\n");
         exit(1);
      }
   }
   *buffer=code;
   return(buffer);
}

/* UNCHANGED from original
 * Input a variable length code.
 */
unsigned input_code(FILE *input)
{
   unsigned int return_value;
   static int input_bit_count=0;
   static unsigned long input_bit_buffer=0L;

   while (input_bit_count <= 24 ) {
     input_bit_buffer |= (unsigned long) getc(input) << (24 - input_bit_count);
     input_bit_count += 8;
   }
   return_value=input_bit_buffer >> (32-num_bits);
   input_bit_buffer <<= num_bits;
   input_bit_count -= num_bits;
   return(return_value);
}
/* MODIFIED Output a variable length code.
 */
output_code(FILE *output, unsigned int code)
{
   static int output_bit_count=0;
   static unsigned long output_bit_buffer=0L;

   output_bit_buffer |= (unsigned long) code << (32 - num_bits -
                                                             output_bit_count);
   output_bit_count += num_bits;
   while (output_bit_count >= 8) {
      putc(output_bit_buffer >> 24, output);
      output_bit_buffer <<= 8;
      output_bit_count -= 8;
      bytes_out++;                    /* ADDED for compression monitoring */
   }
}

unit LZRW;

interface

uses SysUtils, Classes;

{$IFDEF WIN32}
type
  Int16 = SmallInt;
{$ELSE}
type
  Int16 = Integer;
{$ENDIF}

const
  BufferMaxSize = 32768;
  BufferMax     = BufferMaxSize - 1;
  FLAG_Copied   = $80;
  FLAG_Compress = $40;
  FSignature = 'LZRW';
 
type
  BufferIndex = 0..BufferMax + 15;
  BufferSize  = 0..BufferMaxSize;
  { extra bytes needed here if compression fails      *dh *}
  BufferArray = array [BufferIndex] of BYTE;
  BufferPtr   = ^BufferArray;


  ELzrw1KHCompressor = class(Exception);


function Compression(Source, Dest: BufferPtr; SourceSize: BufferSize): BufferSize;
function Decompression(Source, Dest: BufferPtr; SourceSize: BufferSize): BufferSize;

procedure CompressStream(InStream, OutStream: TStream; InSize: LongInt);
procedure DeCompressStream(InStream, OutStream: TStream);

implementation

type
  HashTable  = array [0..4095] of Int16;
  HashTabPtr = ^Hashtable;

var
  Hash: HashTabPtr;

  { check if this string has already been seen }
  { in the current 4 KB window }
 
function GetMatch(Source: BufferPtr; X: BufferIndex;
  SourceSize: BufferSize; Hash: HashTabPtr; var Size: WORD;
  var Pos: BufferIndex): BOOLEAN;
var
  HashValue: WORD;
  TmpHash: Int16;
begin
  HashValue := (40543 * ((((Source^[X] shl 4) xor Source^[X + 1]) shl 4) xor
    Source^[X + 2]) shr 4) and $0FFF;
  Result := False;
  TmpHash := Hash^[HashValue];
  if (TmpHash <> -1) and (X - TmpHash < 4096) then
  begin
    Pos  := TmpHash;
    Size := 0;
    while ((Size < 18) and (Source^[X + Size] = Source^[Pos + Size]) and
      (X + Size < SourceSize)) do
    begin
      INC(Size);
    end;
    Result := (Size >= 3)
  end;
  Hash^[HashValue] := X
end;

{ compress a buffer of max. 32 KB }

function Compression(Source, Dest: BufferPtr;
  SourceSize: BufferSize): BufferSize;
var
  Bit, Command, Size: WORD;
  Key: Word;
  X, Y, Z, Pos: BufferIndex;
begin
  FillChar(Hash^, SizeOf(Hashtable), $FF);
  Dest^[0] := FLAG_Compress;
  X := 0;
  Y := 3;
  Z := 1;
  Bit := 0;
  Command := 0;
  while (X < SourceSize) and (Y <= SourceSize) do
  begin
    if (Bit > 15) then
    begin
      Dest^[Z] := HI(Command);
      Dest^[Z + 1] := LO(Command);
      Z := Y;
      Bit := 0;
      INC(Y, 2)
    end;
    Size := 1;
    while ((Source^[X] = Source^[X + Size]) and (Size < $FFF) and
      (X + Size < SourceSize)) do
    begin
      INC(Size);
    end;
    if (Size >= 16) then
    begin
      Dest^[Y] := 0;
      Dest^[Y + 1] := HI(Size - 16);
      Dest^[Y + 2] := LO(Size - 16);
      Dest^[Y + 3] := Source^[X];
      INC(Y, 4);
      INC(X, Size);
      Command := (Command shl 1) + 1;
    end
    else
    begin { not size >= 16 }
      if (GetMatch(Source, X, SourceSize, Hash, Size, Pos)) then
      begin
        Key := ((X - Pos) shl 4) + (Size - 3);
        Dest^[Y] := HI(Key);
        Dest^[Y + 1] := LO(Key);
        INC(Y, 2);
        INC(X, Size);
        Command := (Command shl 1) + 1
      end
      else
      begin
        Dest^[Y] := Source^[X];
        INC(Y);
        INC(X);
        Command := Command shl 1
      end;
    end; { size <= 16 }
    INC(Bit);
  end; { while x < sourcesize ... }
  Command := Command shl (16 - Bit);
  Dest^[Z] := HI(Command);
  Dest^[Z + 1] := LO(Command);
  if (Y > SourceSize) then
  begin
    MOVE(Source^[0], Dest^[1], SourceSize);
    Dest^[0] := FLAG_Copied;
    Y        := SUCC(SourceSize)
  end;
  Result := Y
end;

{ decompress a buffer of max 32 KB }

function Decompression(Source, Dest: BufferPtr;
  SourceSize: BufferSize): BufferSize;
var
  X, Y, Pos: BufferIndex;
  Command, Size, K: WORD;
  Bit: BYTE;
  SaveY: BufferIndex; { * dh * unsafe for-loop variable Y }
begin
  if (Source^[0] = FLAG_Copied) then
  begin
    for Y := 1 to PRED(SourceSize) do
    begin
      Dest^[PRED(Y)] := Source^[Y];
      SaveY := Y;
    end;
    Y := SaveY;
  end
  else
  begin
    Y       := 0;
    X       := 3;
    Command := (Source^[1] shl 8) + Source^[2];
    Bit     := 16;
    while (X < SourceSize) do
    begin
      if (Bit = 0) then
      begin
        Command := (Source^[X] shl 8) + Source^[X + 1];
        Bit := 16;
        INC(X, 2)
      end;
      if ((Command and $8000) = 0) then
      begin
        Dest^[Y] := Source^[X];
        INC(X);
        INC(Y)
      end
      else
      begin  { command and $8000 }
        Pos := ((Source^[X] shl 4) + (Source^[X + 1] shr 4));
        if (Pos = 0) then
        begin
          Size := (Source^[X + 1] shl 8) + Source^[X + 2] + 15;
          for K := 0 to Size do
          begin
            Dest^[Y + K] := Source^[X + 3];
          end;
          INC(X, 4);
          INC(Y, Size + 1)
        end
        else
        begin  { pos = 0 }
          Size := (Source^[X + 1] and $0F) + 2;
          for K := 0 to Size do
            Dest^[Y + K] := Dest^[Y - Pos + K];
          INC(X, 2);
          INC(Y, Size + 1)
        end; { pos = 0 }
      end;   { command and $8000 }
      Command := Command shl 1;
      DEC(Bit)
    end { while x < sourcesize }
  end;
  Result := Y
end;  { decompression }

{
  Unit "Finalization" as Delphi 2.0 would have it
}

procedure CompressStream(InStream, OutStream: TStream; InSize: LongInt);
var
  InBuffer, OutBuffer: BufferArray;
  CompressedSize, BytesRead, FinalPos, SizePos, TotalSize: LongInt;
begin
  TotalSize := 0;
  OutStream.WriteBuffer(FSignature, SizeOf(FSignature));
  SizePos := OutStream.Position;
  OutStream.WriteBuffer(TotalSize, SizeOf(TotalSize));
  while InSize > 0 do
  begin
    BytesRead      := InStream.Read(InBuffer, SizeOf(InBuffer));
    CompressedSize := Compression(@InBuffer, @OutBuffer, BytesRead);
    OutStream.WriteBuffer(CompressedSize, SizeOf(CompressedSize));
    OutStream.WriteBuffer(OutBuffer, CompressedSize);
    TotalSize := TotalSize + CompressedSize + SizeOf(CompressedSize);
    InSize    := InSize - BytesRead;
  end;
  FinalPos := OutStream.Position;
  OutStream.Position := SizePos;
  OutStream.WriteBuffer(TotalSize, SizeOf(TotalSize));
  OutStream.Position := FinalPos;
end;

procedure DeCompressStream(InStream, OutStream: TStream);
var
  InBuffer, OutBuffer: BufferArray;
  CompressedSize, UnCompressedSize, InSize: LongInt;
  Sig: array[0..SizeOf(FSignature) - 1] of Char;
begin
  InStream.ReadBuffer(Sig, SizeOf(FSignature));
  if Sig <> FSignature then
    raise Exception.Create('Wrong file type');
  InStream.ReadBuffer(InSize, SizeOf(InSize));
  while InSize > 0 do
  begin
    InStream.ReadBuffer(CompressedSize, SizeOf(CompressedSize));
    InStream.ReadBuffer(InBuffer, CompressedSize);
    UnCompressedSize := DeCompression(@InBuffer, @OutBuffer, CompressedSize);
    OutStream.WriteBuffer(OutBuffer, UnCompressedSize);
    InSize := InSize - CompressedSize - SizeOf(CompressedSize);
  end;
end;

var
  ExitSave: Pointer;

procedure Cleanup; far;
begin
  ExitProc := ExitSave;
  if (Hash <> nil) then
    Freemem(Hash, Sizeof(HashTable));
end;

initialization
  Hash := nil;
  try
    Getmem(Hash, Sizeof(Hashtable));
  except
    raise ELzrw1KHCompressor.Create('LZRW1KH : no memory for HASH table');
  end;
  ExitSave := ExitProc;
  ExitProc := @Cleanup;
end.

  评论这张
 
阅读(1177)| 评论(0)

历史上的今天

评论

<#--最新日志,群博日志--> <#--推荐日志--> <#--引用记录--> <#--博主推荐--> <#--随机阅读--> <#--首页推荐--> <#--历史上的今天--> <#--被推荐日志--> <#--上一篇,下一篇--> <#-- 热度 --> <#-- 网易新闻广告 --> <#--右边模块结构--> <#--评论模块结构--> <#--引用模块结构--> <#--博主发起的投票-->
 
 
 
 
 
 
 
 
 
 
 
 
 
 

页脚

网易公司版权所有 ©1997-2018