/*
 Simple LZSS implementation v1.0 by Jeremy Collake
 jeremy@bitsum.com
 http://www.bitsum.com

 synopsis: 
 This is a very simple implementation of the LZSS 
 compression algorithm. I wrote it some time ago and
 have had it lying around, not going to much good use.
 So, I figured I'd make it public in hopes that someone 
 may be able to learn from it. Unfortunatly, my comments
 are non-existent.
 
 This implementation uses a 4095 (12 bit) window, with
 maximum phrase sizes of 17 bytes, and a minimum of 2 bytes,
 to allow storage within a nibble. Therefore, codewords are a 
 nice and easy 16bits. Lazy-evaluation is performed when 
 selecting the best phrase to encode. Control bits are 
 stored conveniently in groups of 8. The end of stream marker
 is indicated by a null index.

 Please remember that this code is by no means optimal, nor
 is it intended to be used for any data compression purposes.

*/
#include<stdio.h>
#include<stdlib.h>
#include<time.h>

#define CONTROL_LITERAL 0
#define CONTROL_CODEWORD 1

#define MAX_LENGTH 17
#define MAX_WINDOWSIZE 4095
#define ITERATIONS_BEFORE_CALLBACK 64

typedef unsigned int (_cdecl *PFNCOMPRESSCALLBACK)(unsigned char *,unsigned char *,unsigned char *);

unsigned long CompressData(unsigned char *src, unsigned char *dest, 
						   unsigned long src_length, 
						   unsigned long windowsize,PFNCOMPRESSCALLBACK);
unsigned long DecompressData(unsigned char *src, unsigned char *dest);

unsigned long DecompressData(unsigned char *src, unsigned char *dest) {
	unsigned char control;
	unsigned int phrase_index,control_count=0;
	int phrase_length=0;
	unsigned char *dest_start=dest,*temp;
	unsigned short codeword=0;	
	control=*src;
	src++;
	while(1) {				
		if((control>>7)==CONTROL_LITERAL) {						
			*dest=*src;
			dest++;
			src++;
		}		
		else { 								
			codeword=*src;			
			codeword<<=8;			
			codeword|=*(src+1);			
			phrase_index=codeword>>4;
			if(!phrase_index) break;
			temp=dest-phrase_index;									
			for(phrase_length=((codeword&0x0f)+2);
				phrase_length>0;
				phrase_length--,temp++,dest++) {
					*dest=*temp;
			}
			src+=2;
		}
		control=control<<1;
		control_count++;
		if(control_count>=8) {
			control=*src;
			*src++;
			control_count=0;
		}
	}
	return (unsigned long)(dest-dest_start)-1;
}

unsigned int _cdecl CompressCallback(unsigned char *src,unsigned char *src_end,unsigned char *p) {
	printf("\r%d%% complete.   ",
		((((unsigned int)p-(unsigned int)src)*100)/((unsigned int)src_end-(unsigned int)src)));
	return 1;
}

unsigned long DataCompare(unsigned char *str1,unsigned char *str2,
						  unsigned long maxlength) {
	unsigned long length=1;
	for(;*str1==*str2 && length<maxlength;length++,str1++,str2++);	
	return length;
}

unsigned char *SearchForPhrase(unsigned char *str, unsigned char *src, 
					  unsigned long maxlength,
					  unsigned long *bestlength) {
	unsigned char *p=str, *best=NULL;
	unsigned long curlength;
	*bestlength=1;	
	p--;
	while(p>=src) {
		if(*p==*str) {			
			curlength=DataCompare((p+1),(str+1),maxlength);
			if(curlength>*bestlength) {
				*bestlength=curlength;
				best=p;
			}
		}
		p--;
	}
	return best;
}

unsigned long CompressData(unsigned char *src, unsigned char *dest, 
						   unsigned long src_length, 
						   unsigned long windowsize, PFNCOMPRESSCALLBACK CallbackProc) {
	unsigned char *src_end=src+src_length,
		*dest_end=dest+src_length,
		*phrase_ptr=NULL,
		*lazy_ptr,
		*control_ptr,
		*start_dest=dest,
		*temp,
		*p=src;					
	unsigned long phrase_length,maxlength,lazy_length,control_counter=0,
		iterationcnt=0;
	unsigned short phrase_index;
	unsigned char control=0;
	control_ptr=dest;
	dest++;
	while(p<=src_end && dest<=dest_end) {
		control_counter++;		
		if(control_counter==9) {								
				*control_ptr=control;
				control_ptr=dest;
				dest++;
				control=0;	
				control_counter=1;
		}
		maxlength=(unsigned long)(src_end-src);
		if(maxlength>windowsize) maxlength=windowsize;
		if(maxlength>MAX_LENGTH) maxlength=MAX_LENGTH;
		temp=p-windowsize;
		if(temp<src) temp=src;						
		if(!phrase_ptr) phrase_ptr=SearchForPhrase(p,temp,maxlength,&phrase_length);		
		if(maxlength>1)		
		lazy_ptr=SearchForPhrase((p+1),(temp+1),maxlength--,&lazy_length);
		if(((lazy_ptr!=NULL) && lazy_length>phrase_length) || 
			!phrase_ptr || !maxlength) {
			phrase_ptr=lazy_ptr;
			phrase_length=lazy_length;
			control<<=1;			
			control|=CONTROL_LITERAL;			
			*dest=*p;
			dest++;
			p++;					
		}
		else {
			control<<=1;			
			control|=CONTROL_CODEWORD;
			phrase_index=(unsigned short)(p-phrase_ptr);		
			phrase_index=phrase_index<<4;
			phrase_index=phrase_index|(((unsigned short)phrase_length-2)&0x0f);			
			*dest=(phrase_index>>8);
			dest++;			
			*dest=(phrase_index&0xff);
			dest++;
			p+=phrase_length;			
			phrase_ptr=NULL;
		}		
		iterationcnt++;
		if(iterationcnt==ITERATIONS_BEFORE_CALLBACK) {
			iterationcnt=0;
			CallbackProc(src,src_end,p);
		}

	}
	control_counter++;
	control<<=1;
	control|=CONTROL_CODEWORD;	
	*dest=0;dest++;
	*dest=0;dest++;
	while(control_counter!=8) {
		control<<=1;		
		control_counter++;
	}
	*control_ptr=control;	
	CompressCallback(src,src_end,src_end);
	return (unsigned long)(dest-start_dest);	
}

int main(int argc,char **argv) {
	FILE *infile,*outfile;
	unsigned long newsize=0;
	time_t time1,time2;
	fpos_t filesize=0;
	void *srcmem,*destmem;
	printf("\nSimple LZSS implementation v1.0, by Jeremy Collake");
	printf("\nWritten for demonstrative/learning purposes only.");
	printf("\nhttp://www.bitsum.com - jeremy@bitsum.com");
	printf("\n---------------------------------------------------");
	if(argc!=4) {
		printf("\nInvalid parameters.");
		printf("\nUsage: LZSS [c|d] inputfile outputfile\n");
		return 1;
	}
	if(toupper(argv[1][0])=='C') {
		printf("\nCompression mode.\n");
		if((infile=fopen(argv[2],"rb"))==NULL) {
			printf("\nError opening %s for input!",argv[2]);
			return 2;
		}				
		if((outfile=fopen(argv[3],"wb"))==NULL) {
			printf("\nError opening %s for output!",argv[3]);
			return 3;
		}
		fseek(infile,0,SEEK_END);
		fgetpos(infile,&filesize);
		fseek(infile,0,SEEK_SET);		
		srcmem=malloc((size_t)filesize);
		destmem=malloc((size_t)filesize);
		fread(srcmem,1,(size_t)filesize,infile);
		fwrite((void *)&filesize,1,4,outfile);
		time(&time1);
		newsize=CompressData((unsigned char *)srcmem,(unsigned char *)destmem,(unsigned long)filesize,MAX_WINDOWSIZE,&CompressCallback);						
		time(&time2);
		fwrite(destmem,1,(size_t)newsize,outfile);		
		printf("\nOriginal size: %u\tCompressed Size: %u\tTiming: %.0f seconds.\n",
			(unsigned int)filesize,newsize+4,difftime(time2,time1));				
	}	
	else {
		printf("\nDecompression mode.");
		if((infile=fopen(argv[2],"rb"))==NULL) {
			printf("\nError opening %s for input!",argv[2]);
			return 2;
		}				
		if((outfile=fopen(argv[3],"wb"))==NULL) {
			printf("\nError opening %s for output!",argv[3]);
			return 3;
		}		
		fseek(infile,0,SEEK_END);
		fgetpos(infile,&filesize);
		fseek(infile,0,SEEK_SET);		
		fread((void *)&newsize,1,4,infile);
		destmem=malloc((size_t)newsize+1);
		srcmem=malloc((size_t)filesize);
		fread(srcmem,1,(size_t)filesize-4,infile);		
		newsize=DecompressData((unsigned char *)srcmem,(unsigned char *)destmem);
		fwrite(destmem,1,(size_t)newsize,outfile);
		printf("\nCompressed size: %u  Decompressed size: %u",(unsigned int)filesize,newsize);		
	}
	free(srcmem);
	free(destmem);
	fclose(infile);
	fclose(outfile);
	return 0;
}