lzhuf.c revision 677833bc953b6cb418c701facbdcf4aa18d6c44e
/*
----------------------------------------------------------------------------
M. LZHuf Compression
This is the LZHuf compression algorithm as used in DPBOX and F6FBB.
----------------------------------------------------------------------------
*/
/**************************************************************
written by Haruyasu Yoshizaki 11/20/1988
some minor changes 4/6/1989
comments translated by Haruhiko Okumura 4/7/1989
minor beautifications and adjustments for compiling under Linux
by Markus Gutschke <gutschk@math.uni-muenster.de>
1997-01-27
Modifications to allow use as a filter by Ken Yap <ken_yap@users.sourceforge.net>.
1997-07-01
Small mod to cope with running on big-endian machines
by Jim Hague <jim.hague@acm.org)
1998-02-06
Make compression statistics report shorter
by Ken Yap <ken_yap@users.sourceforge.net>.
2001-04-25
**************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#ifndef VERBOSE
#define Fprintf(x)
#define wterr 0
#else
static char wterr[] = "Can't write.";
#ifdef ENCODE
static unsigned long int codesize = 0;
#endif
static unsigned long int printcount = 0;
#endif
#endif
#ifndef MAIN
extern
#endif
static unsigned long int textsize = 0;
{
}
/* These will be a complete waste of time on a lo-endian */
/* system, but it only gets done once so WTF. */
static unsigned long i86ul_to_host(unsigned long ul)
{
unsigned long res = 0;
int i;
union
{
unsigned char c[4];
unsigned long ul;
} u;
for (i = 3; i >= 0; i--)
return res;
}
static unsigned long host_to_i86ul(unsigned long ul)
{
int i;
union
{
unsigned char c[4];
unsigned long ul;
} u;
for (i = 0; i < 4; i++)
{
u.c[i] = ul & 0xff;
ul >>= 8;
}
return u.ul;
}
#endif
/********** LZSS compression **********/
#define N 4096 /* buffer size */
/* Attention: When using this file for f6fbb-type compressed data exchange,
set N to 2048 ! (DL8HBS) */
#define F 60 /* lookahead buffer size */
#define THRESHOLD 2
#define NIL N /* leaf of tree */
static unsigned char
text_buf[N + F - 1];
#endif
#ifdef ENCODE
static int match_position, match_length,
static void InitTree(void) /* initialize trees */
{
int i;
for (i = N + 1; i <= N + 256; i++)
for (i = 0; i < N; i++)
}
static void InsertNode(int r) /* insert to tree */
{
int i, p, cmp;
unsigned char *key;
unsigned c;
cmp = 1;
p = N + 1 + key[0];
match_length = 0;
for ( ; ; ) {
if (cmp >= 0) {
p = rson[p];
else {
rson[p] = r;
dad[r] = p;
return;
}
} else {
p = lson[p];
else {
lson[p] = r;
dad[r] = p;
return;
}
}
for (i = 1; i < F; i++)
break;
if (i > THRESHOLD) {
if (i > match_length) {
if ((match_length = i) >= F)
break;
}
if (i == match_length) {
match_position = c;
}
}
}
}
else
}
static void DeleteNode(int p) /* remove from tree */
{
int q;
return; /* not registered */
q = lson[p];
else
q = rson[p];
else {
q = lson[p];
do {
q = rson[q];
}
}
else
}
#endif
/* Huffman coding */
/* kinds of characters (character code = 0..N_CHAR-1) */
#define R (T - 1) /* position of root */
/* root frequency comes to this value. */
typedef unsigned char uchar;
/* table for encoding and decoding the upper 6 bits of position */
/* for encoding */
#ifdef ENCODE
0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
};
0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68,
0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C,
0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC,
0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE,
0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE,
0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
};
#endif
#ifdef DECODE
/* for decoding */
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
};
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
};
#endif
/* elements [T..T + N_CHAR - 1] which are used to get */
/* the positions of leaves corresponding to the codes. */
static int son[T]; /* pointers to child nodes (son[], son[] + 1) */
#endif
#ifdef DECODE
static unsigned getbuf = 0;
static int GetBit(void) /* get one bit */
{
int i;
while (getlen <= 8) {
getlen += 8;
}
i = getbuf;
getbuf <<= 1;
getlen--;
return ((signed short)i < 0);
}
static int GetByte(void) /* get one byte */
{
unsigned short i;
while (getlen <= 8) {
getlen += 8;
}
i = getbuf;
getbuf <<= 8;
getlen -= 8;
return i >> 8;
}
#endif
#ifdef ENCODE
static unsigned putbuf = 0;
static void Putcode(int l, unsigned c) /* output c bits of code */
{
if ((putlen += l) >= 8) {
}
}
#ifdef VERBOSE
codesize += 2;
#endif
putlen -= 8;
} else {
putbuf <<= 8;
#ifdef VERBOSE
codesize++;
#endif
}
}
}
#endif
/* initialization of tree */
static void StartHuff(void)
{
int i, j;
for (i = 0; i < N_CHAR; i++) {
freq[i] = 1;
son[i] = i + T;
prnt[i + T] = i;
}
i = 0; j = N_CHAR;
while (j <= R) {
son[j] = i;
i += 2; j++;
}
freq[T] = 0xffff;
prnt[R] = 0;
}
/* reconstruction of tree */
static void reconst(void)
{
int i, j, k;
unsigned f, l;
/* collect leaf nodes in the first half of the table */
/* and replace the freq by (freq + 1) / 2. */
j = 0;
for (i = 0; i < T; i++) {
if (son[i] >= T) {
j++;
}
}
/* begin constructing tree by connecting sons */
for (i = 0, j = N_CHAR; j < T; i += 2, j++) {
k = i + 1;
for (k = j - 1; f < freq[k]; k--);
k++;
l = (j - k) * 2;
freq[k] = f;
son[k] = i;
}
/* connect prnt */
for (i = 0; i < T; i++) {
if ((k = son[i]) >= T) {
prnt[k] = i;
} else {
}
}
}
/* increment frequency of given code by one, and update tree */
static void update(int c)
{
int i, j, k, l;
reconst();
}
c = prnt[c + T];
do {
k = ++freq[c];
/* if the order is disturbed, exchange nodes */
if (k > freq[l = c + 1]) {
while (k > freq[++l]);
l--;
freq[l] = k;
i = son[c];
prnt[i] = l;
if (i < T) prnt[i + 1] = l;
j = son[l];
son[l] = i;
prnt[j] = c;
if (j < T) prnt[j + 1] = c;
son[c] = j;
c = l;
}
} while ((c = prnt[c]) != 0); /* repeat up to root */
}
#endif
#ifdef ENCODE
#if 0
#endif
static void EncodeChar(unsigned c)
{
unsigned i;
int j, k;
i = 0;
j = 0;
k = prnt[c + T];
/* travel from leaf to root */
do {
i >>= 1;
/* if node's address is odd-numbered, choose bigger brother node */
if (k & 1) i += 0x8000;
j++;
} while ((k = prnt[k]) != R);
Putcode(j, i);
#if 0
code = i;
len = j;
#endif
update(c);
}
static void EncodePosition(unsigned c)
{
unsigned i;
/* output upper 6 bits by table lookup */
i = c >> 6;
/* output lower 6 bits verbatim */
}
static void EncodeEnd(void)
{
if (putlen) {
}
#ifdef VERBOSE
codesize++;
#endif
}
}
#endif
#ifdef DECODE
static int DecodeChar(void)
{
unsigned c;
c = son[R];
/* travel from root to leaf, */
/* choosing the smaller child node (son[]) if the read bit is 0, */
/* the bigger (son[]+1} if 1 */
while (c < T) {
c += GetBit();
c = son[c];
}
c -= T;
update(c);
return c;
}
static int DecodePosition(void)
{
unsigned i, j, c;
/* recover upper 6 bits from table */
i = GetByte();
c = (unsigned)d_code[i] << 6;
j = d_len[i];
/* read lower 6 bits verbatim */
j -= 2;
while (j--) {
i = (i << 1) + GetBit();
}
return c | (i & 0x3f);
}
#endif
#ifdef ENCODE
/* compression */
void Encode(void) /* compression */
{
int i, c, len, r, s, last_match_length;
unsigned long tw;
#ifdef VERBOSE
if ((signed long)textsize < 0)
#endif
if (textsize == 0)
return;
textsize = 0; /* rewind and re-read */
StartHuff();
InitTree();
s = 0;
r = N - F;
for (i = s; i < r; i++)
text_buf[i] = ' ';
for (i = 1; i <= F; i++)
InsertNode(r - i);
InsertNode(r);
do {
if (match_length > len)
match_length = len;
if (match_length <= THRESHOLD) {
match_length = 1;
EncodeChar(text_buf[r]);
} else {
}
for (i = 0; i < last_match_length &&
DeleteNode(s);
text_buf[s] = c;
if (s < F - 1)
text_buf[s + N] = c;
s = (s + 1) & (N - 1);
r = (r + 1) & (N - 1);
InsertNode(r);
}
if ((textsize += i) > printcount) {
#if defined(VERBOSE) && defined(EXTRAVERBOSE)
#endif
printcount += 1024;
}
while (i++ < last_match_length) {
DeleteNode(s);
s = (s + 1) & (N - 1);
r = (r + 1) & (N - 1);
if (--len) InsertNode(r);
}
} while (len > 0);
EncodeEnd();
#ifdef LONG_REPORT
#else
#endif
}
#endif
#ifdef DECODE
void Decode(void) /* recover */
{
int i, j, k, r, c;
unsigned long int count;
unsigned long tw;
if (textsize == 0)
return;
StartHuff();
for (i = 0; i < N - F; i++)
text_buf[i] = ' ';
r = N - F;
c = DecodeChar();
if (c < 256) {
}
text_buf[r++] = c;
r &= (N - 1);
count++;
} else {
j = c - 255 + THRESHOLD;
for (k = 0; k < j; k++) {
c = text_buf[(i + k) & (N - 1)];
}
text_buf[r++] = c;
r &= (N - 1);
count++;
}
}
if (count > printcount) {
#if defined(VERBOSE) && defined(EXTRAVERBOSE)
#endif
printcount += 1024;
}
}
}
#endif
#ifdef MAIN
{
char *s;
FILE *f;
int c;
if (argc == 2) {
perror("tmpfile");
return EXIT_FAILURE;
}
fputc(c, f);
}
else if (argc != 4) {
"'lzhuf d file2 file1' decodes file2 into file1.\n"));
return EXIT_FAILURE;
}
if (argc == 4) {
return EXIT_FAILURE;
}
}
Encode();
else
Decode();
return EXIT_SUCCESS;
}
#endif