/* Simple LZSS implementation v1.0 by Jeremy Collake jeremy@bitsum.com http://www.bitsum.com synopsis: This is a very simple implementation of the LZSS compression algorithm. I wrote it some time ago and have had it lying around, not going to much good use. So, I figured I'd make it public in hopes that someone may be able to learn from it. Unfortunatly, my comments are non-existent. This implementation uses a 4095 (12 bit) window, with maximum phrase sizes of 17 bytes, and a minimum of 2 bytes, to allow storage within a nibble. Therefore, codewords are a nice and easy 16bits. Lazy-evaluation is performed when selecting the best phrase to encode. Control bits are stored conveniently in groups of 8. The end of stream marker is indicated by a null index. Please remember that this code is by no means optimal, nor is it intended to be used for any data compression purposes. */ #include<stdio.h> #include<stdlib.h> #include<time.h> #define CONTROL_LITERAL 0 #define CONTROL_CODEWORD 1 #define MAX_LENGTH 17 #define MAX_WINDOWSIZE 4095 #define ITERATIONS_BEFORE_CALLBACK 64 typedef unsigned int (_cdecl *PFNCOMPRESSCALLBACK)(unsigned char *,unsigned char *,unsigned char *); unsigned long CompressData(unsigned char *src, unsigned char *dest, unsigned long src_length, unsigned long windowsize,PFNCOMPRESSCALLBACK); unsigned long DecompressData(unsigned char *src, unsigned char *dest); unsigned long DecompressData(unsigned char *src, unsigned char *dest) { unsigned char control; unsigned int phrase_index,control_count=0; int phrase_length=0; unsigned char *dest_start=dest,*temp; unsigned short codeword=0; control=*src; src++; while(1) { if((control>>7)==CONTROL_LITERAL) { *dest=*src; dest++; src++; } else { codeword=*src; codeword<<=8; codeword|=*(src+1); phrase_index=codeword>>4; if(!phrase_index) break; temp=dest-phrase_index; for(phrase_length=((codeword&0x0f)+2); phrase_length>0; phrase_length--,temp++,dest++) { *dest=*temp; } src+=2; } control=control<<1; control_count++; if(control_count>=8) { control=*src; *src++; control_count=0; } } return (unsigned long)(dest-dest_start)-1; } unsigned int _cdecl CompressCallback(unsigned char *src,unsigned char *src_end,unsigned char *p) { printf("\r%d%% complete. ", ((((unsigned int)p-(unsigned int)src)*100)/((unsigned int)src_end-(unsigned int)src))); return 1; } unsigned long DataCompare(unsigned char *str1,unsigned char *str2, unsigned long maxlength) { unsigned long length=1; for(;*str1==*str2 && length<maxlength;length++,str1++,str2++); return length; } unsigned char *SearchForPhrase(unsigned char *str, unsigned char *src, unsigned long maxlength, unsigned long *bestlength) { unsigned char *p=str, *best=NULL; unsigned long curlength; *bestlength=1; p--; while(p>=src) { if(*p==*str) { curlength=DataCompare((p+1),(str+1),maxlength); if(curlength>*bestlength) { *bestlength=curlength; best=p; } } p--; } return best; } unsigned long CompressData(unsigned char *src, unsigned char *dest, unsigned long src_length, unsigned long windowsize, PFNCOMPRESSCALLBACK CallbackProc) { unsigned char *src_end=src+src_length, *dest_end=dest+src_length, *phrase_ptr=NULL, *lazy_ptr, *control_ptr, *start_dest=dest, *temp, *p=src; unsigned long phrase_length,maxlength,lazy_length,control_counter=0, iterationcnt=0; unsigned short phrase_index; unsigned char control=0; control_ptr=dest; dest++; while(p<=src_end && dest<=dest_end) { control_counter++; if(control_counter==9) { *control_ptr=control; control_ptr=dest; dest++; control=0; control_counter=1; } maxlength=(unsigned long)(src_end-src); if(maxlength>windowsize) maxlength=windowsize; if(maxlength>MAX_LENGTH) maxlength=MAX_LENGTH; temp=p-windowsize; if(temp<src) temp=src; if(!phrase_ptr) phrase_ptr=SearchForPhrase(p,temp,maxlength,&phrase_length); if(maxlength>1) lazy_ptr=SearchForPhrase((p+1),(temp+1),maxlength--,&lazy_length); if(((lazy_ptr!=NULL) && lazy_length>phrase_length) || !phrase_ptr || !maxlength) { phrase_ptr=lazy_ptr; phrase_length=lazy_length; control<<=1; control|=CONTROL_LITERAL; *dest=*p; dest++; p++; } else { control<<=1; control|=CONTROL_CODEWORD; phrase_index=(unsigned short)(p-phrase_ptr); phrase_index=phrase_index<<4; phrase_index=phrase_index|(((unsigned short)phrase_length-2)&0x0f); *dest=(phrase_index>>8); dest++; *dest=(phrase_index&0xff); dest++; p+=phrase_length; phrase_ptr=NULL; } iterationcnt++; if(iterationcnt==ITERATIONS_BEFORE_CALLBACK) { iterationcnt=0; CallbackProc(src,src_end,p); } } control_counter++; control<<=1; control|=CONTROL_CODEWORD; *dest=0;dest++; *dest=0;dest++; while(control_counter!=8) { control<<=1; control_counter++; } *control_ptr=control; CompressCallback(src,src_end,src_end); return (unsigned long)(dest-start_dest); } int main(int argc,char **argv) { FILE *infile,*outfile; unsigned long newsize=0; time_t time1,time2; fpos_t filesize=0; void *srcmem,*destmem; printf("\nSimple LZSS implementation v1.0, by Jeremy Collake"); printf("\nWritten for demonstrative/learning purposes only."); printf("\nhttp://www.bitsum.com - jeremy@bitsum.com"); printf("\n---------------------------------------------------"); if(argc!=4) { printf("\nInvalid parameters."); printf("\nUsage: LZSS [c|d] inputfile outputfile\n"); return 1; } if(toupper(argv[1][0])=='C') { printf("\nCompression mode.\n"); if((infile=fopen(argv[2],"rb"))==NULL) { printf("\nError opening %s for input!",argv[2]); return 2; } if((outfile=fopen(argv[3],"wb"))==NULL) { printf("\nError opening %s for output!",argv[3]); return 3; } fseek(infile,0,SEEK_END); fgetpos(infile,&filesize); fseek(infile,0,SEEK_SET); srcmem=malloc((size_t)filesize); destmem=malloc((size_t)filesize); fread(srcmem,1,(size_t)filesize,infile); fwrite((void *)&filesize,1,4,outfile); time(&time1); newsize=CompressData((unsigned char *)srcmem,(unsigned char *)destmem,(unsigned long)filesize,MAX_WINDOWSIZE,&CompressCallback); time(&time2); fwrite(destmem,1,(size_t)newsize,outfile); printf("\nOriginal size: %u\tCompressed Size: %u\tTiming: %.0f seconds.\n", (unsigned int)filesize,newsize+4,difftime(time2,time1)); } else { printf("\nDecompression mode."); if((infile=fopen(argv[2],"rb"))==NULL) { printf("\nError opening %s for input!",argv[2]); return 2; } if((outfile=fopen(argv[3],"wb"))==NULL) { printf("\nError opening %s for output!",argv[3]); return 3; } fseek(infile,0,SEEK_END); fgetpos(infile,&filesize); fseek(infile,0,SEEK_SET); fread((void *)&newsize,1,4,infile); destmem=malloc((size_t)newsize+1); srcmem=malloc((size_t)filesize); fread(srcmem,1,(size_t)filesize-4,infile); newsize=DecompressData((unsigned char *)srcmem,(unsigned char *)destmem); fwrite(destmem,1,(size_t)newsize,outfile); printf("\nCompressed size: %u Decompressed size: %u",(unsigned int)filesize,newsize); } free(srcmem); free(destmem); fclose(infile); fclose(outfile); return 0; }