#pragma prototyped
/*
* Glenn Fowler
* AT&T Research
* 1999-06-23
*/
/*-
* Copyright (c) 1985, 1986, 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Diomidis Spinellis and James A. Woods, derived from original
* work by Spencer Thomas and Joseph Orost.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*-
* fcompress.c - File compression ala IEEE Computer, June 1984.
*
* Compress authors:
* Spencer W. Thomas (decvax!utah-cs!thomas)
* Jim McKie (decvax!mcvax!jim)
* Steve Davies (decvax!vax135!petsd!peora!srd)
* Ken Turkowski (decvax!decwrl!turtlevax!ken)
* James A. Woods (decvax!ihnp4!ames!jaw)
* Joe Orost (decvax!vax135!petsd!joe)
*
* Cleaned up and converted to library returning I/O streams by
* Diomidis Spinellis <dds@doc.ic.ac.uk>.
*/
#include <sfio_t.h>
#include <ast.h>
#include <sfdcgzip.h>
/* A code_int must be able to hold 2**BITS values of type int, and also -1. */
typedef long code_int;
typedef long count_int;
{ 0x1f, 0x9d };
/*
* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
* a fourth header byte (for expansion).
*/
typedef struct s_zstate {
enum {
/*
* Block compression parameters -- after all codes are used up,
* and compression rate changes, start over.
*/
int zs_block_compress;
int zs_clear_flg;
long zs_ratio;
int zs_offset;
union {
struct {
long zs_fcode;
int zs_hshift;
} w; /* Write paramenters */
struct {
int zs_finchar;
} r; /* Read parameters */
} u;
} LZW_t;
/* Definitions to retain old variable names */
/*
* To save much memory, we overlay the table used by compress() with those
* used by decompress(). The tab_prefix table is the same size and type as
* the codetab. The tab_suffix table needs 2**BITS characters. We get this
* from the beginning of htab. The output stack uses the rest of htab, and
* contains characters. There is plenty of room for any possible stack
* (stack used to be 8000 characters).
*/
/*
* the next two codes should not be changed lightly, as they must not
* lie within the contiguous general code space.
*/
/*-
* Algorithm from "A Technique for High Performance Data Compression",
* Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
*
* Algorithm:
* Modified Lempel-Ziv method (LZW). Basically finds common
* substrings and replaces them with a variable size code. This is
* deterministic, and can be done on the fly. Thus, the decompression
* procedure needs no input table, but tracks the way the table was built.
*/
/*-
* Output the given code.
* Inputs:
* code: A n_bits-bit integer. If == -1, then EOF. This assumes
* that n_bits =< (long)wordsize - 1.
* Outputs:
* Outputs code to the file.
* Assumptions:
* Chars are 8 bits long.
* Algorithm:
* Maintain a BITS character long buffer (so that 8 codes will
* fit in it exactly). Use the VAX insv instruction to insert each
* code in turn. When the buffer fills up empty it and start over.
*/
{0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
static int
{
if (ocode >= 0) {
/* Get to the first byte. */
r_off &= 7;
/*
* Since ocode is always >= 8 bits, only need to mask the first
* hunk on the left.
*/
bp++;
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if (bits >= 8) {
ocode >>= 8;
bits -= 8;
}
/* Last bits. */
if (bits)
return (-1);
bits = 0;
offset = 0;
}
/*
* If the next entry is going to be too big for the ocode size,
* then increase it, if possible.
*/
/*
* Write the whole buffer, because the input side won't
* discover the size increase until after it has read it.
*/
if (offset > 0) {
return (-1);
}
offset = 0;
if (clear_flg) {
clear_flg = 0;
} else {
n_bits++;
else
}
}
} else {
/* At EOF, write the rest of the buffer. */
if (offset > 0) {
return (-1);
}
offset = 0;
}
return (0);
}
/*-
* Read one code from the standard input. If EOF, return -1.
* Inputs:
* stdin
* Outputs:
* code or -1 is returned.
*/
static code_int
{
/*
* If the next entry will be too big for the current gcode
* size, then we must increase the size. This implies reading
* a new buffer full, too.
*/
n_bits++;
else
}
if (clear_flg > 0) {
clear_flg = 0;
}
if (size <= 0) /* End of file. */
return (-1);
roffset = 0;
/* Round size down to integral number of codes. */
}
/* Get to the first byte. */
r_off &= 7;
/* Get first part (low order bits). */
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if (bits >= 8) {
r_off += 8;
bits -= 8;
}
/* High order bits. */
return (gcode);
}
static void
{
register long i, m1;
m1 = -1;
i = cl_hsize - 16;
do { /* Might use Sys V memset(3) here. */
htab_p -= 16;
} while ((i -= 16) >= 0);
for (i += 16; i > 0; i--)
}
static int
{
register long rat;
if (rat == 0) /* Don't divide by zero. */
rat = 0x7fffffff;
else
} else
else {
ratio = 0;
clear_flg = 1;
return (-1);
}
return (0);
}
/*
* only sync is implemented right now
* see sfdcgzip() for full implementation
*/
static Sfoff_t
{
#if 1
static int nosync;
if (off == -1)
{
if (!nosync)
if (nosync > 0)
return 0;
}
#endif
return -1;
if (off == -1)
return 0;
return -1;
}
/*
* lzw exception handler
* free on close
*/
static int
{
int flags;
int r;
NoP(f);
switch (op)
{
case SF_ATEXIT:
sfdisc(f, SF_POPDISC);
return 0;
case SF_CLOSING:
case SF_DPOP:
case SF_FINAL:
r = 0;
{
r = -1;
else
{
out_count++;
r = -1;
}
}
if (op != SF_CLOSING)
return r;
case SF_DBUFFER:
return 1;
case SF_READ:
case SF_WRITE:
case SF_SYNC:
case SFGZ_HANDLE:
case SFGZ_GETPOS:
case SFGZ_SETPOS:
}
return 0;
}
/*-
* compress write
*
* Algorithm: use open addressing double hashing (no chaining) on the
* prefix code / next character combination. We do a variant of Knuth's
* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
* secondary probe. Here, the modular division first probe is gives way
* to a faster exclusive-or manipulation. Also do block compression with
* an adaptive reset, whereby the code table is cleared when the compression
* ratio decreases, but after the table fills. The variable-length output
* codes are re-sized at this point, and a special CLEAR code is generated
* for the decompressor. Late addition: construct the table according to
* file size for noticeable speed improvement on small files. Please direct
* questions about this implementation to ames!jaw.
*/
static ssize_t
{
register code_int i;
register int c, disp;
int count;
if (num == 0)
return (0);
goto middle;
return (-1);
return (-1);
offset = 0;
out_count = 0;
clear_flg = 0;
ratio = 0;
in_count = 1;
--count;
hshift = 0;
hshift++;
c = *bp++;
in_count++;
continue;
} else if ((long)htabof(i) < 0) /* Empty slot. */
goto nomatch;
if (i == 0)
disp = 1;
i += hsize_reg;
continue;
}
if ((long)htabof(i) >= 0)
goto probe;
return (-1);
out_count++;
ent = c;
if (free_ent < maxmaxcode) {
checkpoint && block_compress) {
return (-1);
}
}
return (num);
}
/*
* Decompress read. This routine adapts to the codes in the file building
* the "string" table on-the-fly; requiring no table to be stored in the
* compressed file. The tables used herein are shared with those of the
* compress() routine. See the definitions above.
*/
static ssize_t
{
if (num == 0)
return (0);
switch (state) {
case S_START:
break;
case S_MIDDLE:
goto middle;
case S_EOF:
goto eof;
}
/* Check the magic number */
return (-1);
return (-1);
/* As above, initialize the first 256 entries in the table. */
tab_prefixof(code) = 0;
}
return (0); /* Get out of here */
/* First code must be 8 bits = char. */
count--;
tab_prefixof(code) = 0;
clear_flg = 1;
break;
}
/* Special case for KwKwK string. */
}
/* Generate output characters in reverse order. */
while (code >= 256) {
}
/* And put them out in forward order. */
middle: do {
if (count-- == 0)
return (num);
/* Generate the new entry. */
}
/* Remember previous code. */
}
}
/*
* create and push the sfio lzw discipline
* for SF_READ streams only
*
* (flags&SFGZ_VERIFY) return
* >0 is an lzw file
* 0 not an lzw file
* <0 error
* otherwise return
* >0 discipline pushed (gzip or lzw)
* 0 discipline not needed
* <0 error
*/
int
{
{
register unsigned char* s;
register int n;
/*
* peek the first 2 bytes to verify the magic
*
* 0x1f8b sfdcgzip gzip
* 0x1f9d sfdclzw compress
*/
if (!n)
if (!s)
return -1;
if (*s != 0x1f)
n = -1;
else if (*(s + 1) == 0x8b)
n = 1;
else if (*(s + 1) == 0x9d)
n = 0;
else
n = -1;
sfread(f, s, 0);
if (n < 0)
return 0;
if (flags & SFGZ_VERIFY)
return n >= 0;
if (n > 0)
}
return -1;
else
free_ent = 0; /* First unused entry. */
clear_flg = 0;
ratio = 0;
out_count = 0; /* # of codes output (for debugging). */
roffset = 0;
size = 0;
{
return -1;
}
return 1;
}