/*
Simple LZSS implementation v1.0 by Jeremy Collake
jeremy@bitsum.com
http://www.bitsum.com
synopsis:
This is a very simple implementation of the LZSS
compression algorithm. I wrote it some time ago and
have had it lying around, not going to much good use.
So, I figured I'd make it public in hopes that someone
may be able to learn from it. Unfortunatly, my comments
are non-existent.
This implementation uses a 4095 (12 bit) window, with
maximum phrase sizes of 17 bytes, and a minimum of 2 bytes,
to allow storage within a nibble. Therefore, codewords are a
nice and easy 16bits. Lazy-evaluation is performed when
selecting the best phrase to encode. Control bits are
stored conveniently in groups of 8. The end of stream marker
is indicated by a null index.
Please remember that this code is by no means optimal, nor
is it intended to be used for any data compression purposes.
*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#define CONTROL_LITERAL 0
#define CONTROL_CODEWORD 1
#define MAX_LENGTH 17
#define MAX_WINDOWSIZE 4095
#define ITERATIONS_BEFORE_CALLBACK 64
typedef unsigned int (_cdecl *PFNCOMPRESSCALLBACK)(unsigned char *,unsigned char *,unsigned char *);
unsigned long CompressData(unsigned char *src, unsigned char *dest,
unsigned long src_length,
unsigned long windowsize,PFNCOMPRESSCALLBACK);
unsigned long DecompressData(unsigned char *src, unsigned char *dest);
unsigned long DecompressData(unsigned char *src, unsigned char *dest) {
unsigned char control;
unsigned int phrase_index,control_count=0;
int phrase_length=0;
unsigned char *dest_start=dest,*temp;
unsigned short codeword=0;
control=*src;
src++;
while(1) {
if((control>>7)==CONTROL_LITERAL) {
*dest=*src;
dest++;
src++;
}
else {
codeword=*src;
codeword<<=8;
codeword|=*(src+1);
phrase_index=codeword>>4;
if(!phrase_index) break;
temp=dest-phrase_index;
for(phrase_length=((codeword&0x0f)+2);
phrase_length>0;
phrase_length--,temp++,dest++) {
*dest=*temp;
}
src+=2;
}
control=control<<1;
control_count++;
if(control_count>=8) {
control=*src;
*src++;
control_count=0;
}
}
return (unsigned long)(dest-dest_start)-1;
}
unsigned int _cdecl CompressCallback(unsigned char *src,unsigned char *src_end,unsigned char *p) {
printf("\r%d%% complete. ",
((((unsigned int)p-(unsigned int)src)*100)/((unsigned int)src_end-(unsigned int)src)));
return 1;
}
unsigned long DataCompare(unsigned char *str1,unsigned char *str2,
unsigned long maxlength) {
unsigned long length=1;
for(;*str1==*str2 && length<maxlength;length++,str1++,str2++);
return length;
}
unsigned char *SearchForPhrase(unsigned char *str, unsigned char *src,
unsigned long maxlength,
unsigned long *bestlength) {
unsigned char *p=str, *best=NULL;
unsigned long curlength;
*bestlength=1;
p--;
while(p>=src) {
if(*p==*str) {
curlength=DataCompare((p+1),(str+1),maxlength);
if(curlength>*bestlength) {
*bestlength=curlength;
best=p;
}
}
p--;
}
return best;
}
unsigned long CompressData(unsigned char *src, unsigned char *dest,
unsigned long src_length,
unsigned long windowsize, PFNCOMPRESSCALLBACK CallbackProc) {
unsigned char *src_end=src+src_length,
*dest_end=dest+src_length,
*phrase_ptr=NULL,
*lazy_ptr,
*control_ptr,
*start_dest=dest,
*temp,
*p=src;
unsigned long phrase_length,maxlength,lazy_length,control_counter=0,
iterationcnt=0;
unsigned short phrase_index;
unsigned char control=0;
control_ptr=dest;
dest++;
while(p<=src_end && dest<=dest_end) {
control_counter++;
if(control_counter==9) {
*control_ptr=control;
control_ptr=dest;
dest++;
control=0;
control_counter=1;
}
maxlength=(unsigned long)(src_end-src);
if(maxlength>windowsize) maxlength=windowsize;
if(maxlength>MAX_LENGTH) maxlength=MAX_LENGTH;
temp=p-windowsize;
if(temp<src) temp=src;
if(!phrase_ptr) phrase_ptr=SearchForPhrase(p,temp,maxlength,&phrase_length);
if(maxlength>1)
lazy_ptr=SearchForPhrase((p+1),(temp+1),maxlength--,&lazy_length);
if(((lazy_ptr!=NULL) && lazy_length>phrase_length) ||
!phrase_ptr || !maxlength) {
phrase_ptr=lazy_ptr;
phrase_length=lazy_length;
control<<=1;
control|=CONTROL_LITERAL;
*dest=*p;
dest++;
p++;
}
else {
control<<=1;
control|=CONTROL_CODEWORD;
phrase_index=(unsigned short)(p-phrase_ptr);
phrase_index=phrase_index<<4;
phrase_index=phrase_index|(((unsigned short)phrase_length-2)&0x0f);
*dest=(phrase_index>>8);
dest++;
*dest=(phrase_index&0xff);
dest++;
p+=phrase_length;
phrase_ptr=NULL;
}
iterationcnt++;
if(iterationcnt==ITERATIONS_BEFORE_CALLBACK) {
iterationcnt=0;
CallbackProc(src,src_end,p);
}
}
control_counter++;
control<<=1;
control|=CONTROL_CODEWORD;
*dest=0;dest++;
*dest=0;dest++;
while(control_counter!=8) {
control<<=1;
control_counter++;
}
*control_ptr=control;
CompressCallback(src,src_end,src_end);
return (unsigned long)(dest-start_dest);
}
int main(int argc,char **argv) {
FILE *infile,*outfile;
unsigned long newsize=0;
time_t time1,time2;
fpos_t filesize=0;
void *srcmem,*destmem;
printf("\nSimple LZSS implementation v1.0, by Jeremy Collake");
printf("\nWritten for demonstrative/learning purposes only.");
printf("\nhttp://www.bitsum.com - jeremy@bitsum.com");
printf("\n---------------------------------------------------");
if(argc!=4) {
printf("\nInvalid parameters.");
printf("\nUsage: LZSS [c|d] inputfile outputfile\n");
return 1;
}
if(toupper(argv[1][0])=='C') {
printf("\nCompression mode.\n");
if((infile=fopen(argv[2],"rb"))==NULL) {
printf("\nError opening %s for input!",argv[2]);
return 2;
}
if((outfile=fopen(argv[3],"wb"))==NULL) {
printf("\nError opening %s for output!",argv[3]);
return 3;
}
fseek(infile,0,SEEK_END);
fgetpos(infile,&filesize);
fseek(infile,0,SEEK_SET);
srcmem=malloc((size_t)filesize);
destmem=malloc((size_t)filesize);
fread(srcmem,1,(size_t)filesize,infile);
fwrite((void *)&filesize,1,4,outfile);
time(&time1);
newsize=CompressData((unsigned char *)srcmem,(unsigned char *)destmem,(unsigned long)filesize,MAX_WINDOWSIZE,&CompressCallback);
time(&time2);
fwrite(destmem,1,(size_t)newsize,outfile);
printf("\nOriginal size: %u\tCompressed Size: %u\tTiming: %.0f seconds.\n",
(unsigned int)filesize,newsize+4,difftime(time2,time1));
}
else {
printf("\nDecompression mode.");
if((infile=fopen(argv[2],"rb"))==NULL) {
printf("\nError opening %s for input!",argv[2]);
return 2;
}
if((outfile=fopen(argv[3],"wb"))==NULL) {
printf("\nError opening %s for output!",argv[3]);
return 3;
}
fseek(infile,0,SEEK_END);
fgetpos(infile,&filesize);
fseek(infile,0,SEEK_SET);
fread((void *)&newsize,1,4,infile);
destmem=malloc((size_t)newsize+1);
srcmem=malloc((size_t)filesize);
fread(srcmem,1,(size_t)filesize-4,infile);
newsize=DecompressData((unsigned char *)srcmem,(unsigned char *)destmem);
fwrite(destmem,1,(size_t)newsize,outfile);
printf("\nCompressed size: %u Decompressed size: %u",(unsigned int)filesize,newsize);
}
free(srcmem);
free(destmem);
fclose(infile);
fclose(outfile);
return 0;
}