#pragma prototyped
/*-----------------------------------------------------------*/
/*--- A block-sorting, lossless compressor bzip2.c ---*/
/*-----------------------------------------------------------*/
/*--
library for lossless, block-sorting data compression.
Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
3. Altered source versions must be plainly marked as such, and must
not be misrepresented as being the original software.
4. The name of the author may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Julian Seward, Guildford, Surrey, UK.
jseward@acm.org
This program is based on (at least) the work of:
Mike Burrows
David Wheeler
Peter Fenwick
Alistair Moffat
Radford Neal
Ian H. Witten
Robert Sedgewick
Jon L. Bentley
For more information on these sources, see the manual.
--*/
/*----------------------------------------------------*/
/*--- IMPORTANT ---*/
/*----------------------------------------------------*/
/*--
WARNING:
This program and library (attempts to) compress data by
performing several non-trivial transformations on it.
Unless you are 100% familiar with *all* the algorithms
contained herein, and with the consequences of modifying them,
you should NOT meddle with the compression or decompression
machinery. Incorrect changes can and very likely *will*
lead to disasterous loss of data.
DISCLAIMER:
I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
USE OF THIS PROGRAM, HOWSOEVER CAUSED.
Every compression of a file implies an assumption that the
compressed file can be decompressed to reproduce the original.
Great efforts in design, coding and testing have been made to
ensure that this program works correctly. However, the
complexity of the algorithms, and, in particular, the presence
of various special cases in the code which occur with very low
but non-zero probability make it impossible to rule out the
possibility of bugs remaining in the program. DO NOT COMPRESS
TO ACCEPT THE POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL
NOT BE RECOVERABLE.
That is not to say this program is inherently unreliable.
has been carefully constructed and extensively tested.
PATENTS:
patented algorithms. However, I do not have the resources
available to carry out a full patent search. Therefore I cannot
give any guarantee of the above statement.
--*/
/*----------------------------------------------------*/
/*--- and now for something much more pleasant :-) ---*/
/*----------------------------------------------------*/
/*---------------------------------------------*/
/*--
Some stuff for all platforms.
--*/
#include <bzlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <math.h>
/*---------------------------------------------*/
/*--
Platform-specific stuff.
--*/
#if BZ_UNIX
# if _PACKAGE_ast
# include <ast.h>
# include <ls.h>
# include <tm.h>
# else
# include <unistd.h>
# endif
#endif
#if BZ_LCCWIN32
# include <io.h>
# include <fcntl.h>
# if 0
/*-- lcc-win32 seems to expand wildcards itself --*/
do { \
if ((spec)[0] == '-') { \
} else { \
struct _finddata_t c_file; \
long hFile; \
if ( hFile == -1L ) { \
} else { \
int anInt = 0; \
while ( anInt == 0 ) { \
} \
} \
} \
} while ( 0 )
# else
# endif
do { \
O_BINARY ); \
ERROR_IF_MINUS_ONE ( retVal ); \
} while ( 0 )
#endif
/*---------------------------------------------*/
/*--
Some more stuff for all platforms :-)
--*/
typedef char Char;
typedef unsigned char Bool;
typedef unsigned char UChar;
typedef int Int32;
typedef unsigned int UInt32;
typedef short Int16;
typedef unsigned short UInt16;
/*--
IntNative is your platform's `native' int size.
Only here to avoid probs with 64-bit platforms.
--*/
typedef int IntNative;
/*---------------------------------------------------*/
/*--- Misc (file handling) data decls ---*/
/*---------------------------------------------------*/
/*-- source modes; F==file, I==stdin, O==stdout --*/
/*-- operation modes --*/
void ioError ( void );
void outOfMemory ( void );
void blockOverrun ( void );
void badBlockHeader ( void );
void badBGLengths ( void );
void crcError ( void );
void bitStreamEOF ( void );
void cleanUpAndFail ( Int32 );
void compressedStreamEOF ( void );
/*---------------------------------------------------*/
/*--- Processing of complete files and streams ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
{
ungetc ( c, f );
return False;
}
/*---------------------------------------------*/
{
while (True) {
}
}
if (verbosity >= 1)
"%5.2f%% saved, %d in, %d out.\n",
(float)nbytes_in / (float)nbytes_out,
);
return;
switch (bzerr) {
case BZ_MEM_ERROR:
outOfMemory ();
case BZ_IO_ERROR:
ioError(); break;
default:
panic ( "compress:unexpected error" );
}
panic ( "compress:end" );
/*notreached*/
}
/*---------------------------------------------*/
{
nUnused = 0;
streamNo = 0;
while (True) {
bzf = bzReadOpen (
);
streamNo++;
}
}
if (ret != 0) goto errhandler_io;
}
return True;
switch (bzerr) {
case BZ_IO_ERROR:
ioError(); break;
case BZ_DATA_ERROR:
crcError();
case BZ_MEM_ERROR:
outOfMemory();
case BZ_UNEXPECTED_EOF:
case BZ_DATA_ERROR_MAGIC:
if (streamNo == 1) {
return False;
} else {
"\n%s: %s: trailing garbage after EOF ignored\n",
return True;
}
default:
panic ( "decompress:unexpected error" );
}
panic ( "decompress:end" );
return True; /*notreached*/
}
/*---------------------------------------------*/
{
nUnused = 0;
streamNo = 0;
while (True) {
bzf = bzReadOpen (
);
streamNo++;
}
}
return True;
switch (bzerr) {
case BZ_IO_ERROR:
ioError(); break;
case BZ_DATA_ERROR:
"\n%s: data integrity (CRC) error in data\n",
inName );
return False;
case BZ_MEM_ERROR:
outOfMemory();
case BZ_UNEXPECTED_EOF:
"\n%s: file ends unexpectedly\n",
inName );
return False;
case BZ_DATA_ERROR_MAGIC:
if (streamNo == 1) {
"\n%s: bad magic number (ie, not created by bzip2)\n",
inName );
return False;
} else {
"\n%s: %s: trailing garbage after EOF ignored\n",
return True;
}
default:
panic ( "test:unexpected error" );
}
panic ( "test:end" );
return True; /*notreached*/
}
/*---------------------------------------------------*/
/*--- Error [non-] handling grunge ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
void cadvise ( void )
{
fprintf (
"\nIt is possible that the compressed file(s) have become corrupted.\n"
"You can use the -tvv option to test integrity of such files.\n\n"
"You can use the `bzip2recover' program to *attempt* to recover\n"
"data from undamaged sections of corrupted files.\n\n"
);
}
/*---------------------------------------------*/
void showFileNames ( void )
{
fprintf (
"\tInput file = %s, output file = %s\n",
);
}
/*---------------------------------------------*/
{
if (outputHandleJustInCase != NULL)
if (retVal != 0)
"%s: WARNING: deletion of output file (apparently) failed.\n",
progName );
}
"%s: WARNING: some files have not been processed:\n"
"\t%d specified on command line, %d not processed yet.\n\n",
}
}
/*---------------------------------------------*/
{
"\n%s: PANIC -- internal consistency error:\n"
"\t%s\n"
"\tThis is a BUG. Please report it to me at:\n"
"\tjseward@acm.org\n",
progName, s );
cleanUpAndFail( 3 );
}
/*---------------------------------------------*/
void crcError ()
{
"\n%s: Data integrity error when decompressing.\n",
progName );
cadvise();
cleanUpAndFail( 2 );
}
/*---------------------------------------------*/
void compressedStreamEOF ( void )
{
"\n%s: Compressed file ends unexpectedly;\n\t"
"perhaps it is corrupted? *Possible* reason follows.\n",
progName );
cadvise();
cleanUpAndFail( 2 );
}
/*---------------------------------------------*/
void ioError ( )
{
"\n%s: I/O or other error, bailing out. Possible reason follows.\n",
progName );
cleanUpAndFail( 1 );
}
/*---------------------------------------------*/
{
"\n%s: Control-C (or similar) caught, quitting.\n",
progName );
cleanUpAndFail(1);
}
/*---------------------------------------------*/
{
"\n%s: Caught a SIGSEGV or SIGBUS whilst compressing,\n"
"\twhich probably indicates a bug in bzip2. Please\n"
"\treport it to me at: jseward@acm.org\n",
progName );
else
"\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing,\n"
"\twhich probably indicates that the compressed data\n"
"\tis corrupted.\n",
progName );
cleanUpAndFail( 3 ); else
}
/*---------------------------------------------*/
void outOfMemory ( void )
{
"\n%s: couldn't allocate enough memory\n",
progName );
cleanUpAndFail(1);
}
/*---------------------------------------------------*/
/*--- The main driver machinery ---*/
/*---------------------------------------------------*/
/*---------------------------------------------*/
{
Int32 i;
}
/*---------------------------------------------*/
{
fprintf (
"bzip2: file name\n`%s'\nis suspiciously (> 1024 chars) long.\n"
"Try using a reasonable file name instead. Sorry! :)\n",
);
exit(1);
}
}
/*---------------------------------------------*/
{
return exists;
}
/*---------------------------------------------*/
/*--
if in doubt, return True
--*/
{
IntNative i;
if (i != 0) return True;
return True;
}
/*---------------------------------------------*/
{
#if BZ_UNIX
#if !_PACKAGE_ast
#endif
ERROR_IF_NOT_ZERO ( retVal );
#if !_PACKAGE_ast
#endif
ERROR_IF_NOT_ZERO ( retVal );
/* Not sure if this is really portable or not. Causes
problems on my x86-Linux Redhat 5.0 box. Decided
to omit it from 0.9.0. JRS, 27 June 98. If you
understand Unix file semantics and portability issues
well enough to fix this properly, drop me a line
at jseward@acm.org.
ERROR_IF_NOT_ZERO ( retVal );
*/
#if _PACKAGE_ast
#else
#endif
ERROR_IF_NOT_ZERO ( retVal );
#endif
}
/*---------------------------------------------*/
{
#if BZ_UNIX
ERROR_IF_NOT_ZERO ( retVal );
#endif
}
/*---------------------------------------------*/
{
if (n <= 4) return False;
return
}
/*---------------------------------------------*/
{
return cdc;
}
/*---------------------------------------------*/
{
panic ( "compress: bad modes\n" );
switch (srcMode) {
}
return;
}
return;
}
return;
}
return;
}
return;
}
switch ( srcMode ) {
case SM_I2O:
"%s: I won't write compressed data to a terminal.\n",
progName );
return;
};
break;
case SM_F2O:
"%s: I won't write compressed data to a terminal.\n",
progName );
return;
};
return;
};
break;
case SM_F2F:
return;
}
return;
};
break;
default:
panic ( "compress: bad srcMode" );
break;
}
if (verbosity >= 1) {
}
/*--- Now the input and output handles are sane. Do the Biz. ---*/
/*--- If there was an I/O error, we won't get here. ---*/
if ( !keepInputFiles ) {
ERROR_IF_NOT_ZERO ( retVal );
}
}
}
/*---------------------------------------------*/
{
panic ( "uncompress: bad modes\n" );
switch (srcMode) {
break;
}
return;
}
return;
}
"%s: Input file name %s doesn't end in `.bz2', skipping.\n",
return;
}
return;
}
return;
}
switch ( srcMode ) {
case SM_I2O:
"%s: I won't read compressed data from a terminal.\n",
progName );
return;
};
break;
case SM_F2O:
return;
};
break;
case SM_F2F:
return;
}
return;
};
break;
default:
panic ( "uncompress: bad srcMode" );
break;
}
if (verbosity >= 1) {
}
/*--- Now the input and output handles are sane. Do the Biz. ---*/
/*--- If there was an I/O error, we won't get here. ---*/
if ( magicNumberOK ) {
if ( !keepInputFiles ) {
ERROR_IF_NOT_ZERO ( retVal );
}
}
} else {
ERROR_IF_NOT_ZERO ( retVal );
}
}
if ( magicNumberOK ) {
if (verbosity >= 1)
} else {
if (verbosity >= 1)
"%s: %s is not a bzip2 file, skipping.\n",
}
}
/*---------------------------------------------*/
{
panic ( "testf: bad modes\n" );
switch (srcMode) {
}
return;
}
return;
}
"%s: Input file name %s doesn't end in `.bz2', skipping.\n",
return;
}
return;
}
switch ( srcMode ) {
case SM_I2O:
"%s: I won't read compressed data from a terminal.\n",
progName );
return;
};
break;
return;
};
break;
default:
panic ( "testf: bad srcMode" );
break;
}
if (verbosity >= 1) {
}
/*--- Now the input handle is sane. Do the Biz. ---*/
}
/*---------------------------------------------*/
void license ( void )
{
"bzip2, a block-sorting file compressor. "
"Version 0.9.0c, 18-Oct-98.\n"
" \n"
" Copyright (C) 1996, 1997, 1998 by Julian Seward.\n"
" \n"
" it under the terms set out in the LICENSE file, which is included\n"
" in the bzip2-0.9.0c source distribution.\n"
" \n"
" This program is distributed in the hope that it will be useful,\n"
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
" LICENSE file for more details.\n"
" \n"
);
}
/*---------------------------------------------*/
{
fprintf (
"bzip2, a block-sorting file compressor. "
"Version 0.9.0c, 18-Oct-98.\n"
"\n usage: %s [flags and input files in any order]\n"
"\n"
" -h --help print this message\n"
" -d --decompress force decompression\n"
" -z --compress force compression\n"
" -k --keep keep (don't delete) input files\n"
" -f --force overwrite existing output filess\n"
" -t --test test compressed file integrity\n"
" -c --stdout output to standard out\n"
" -v --verbose be verbose (a 2nd -v gives more)\n"
" -L --license display software version & license\n"
" -V --version display software version & license\n"
" -s --small use less memory (at most 2500k)\n"
" -1 .. -9 set block size to 100k .. 900k\n"
" --repetitive-fast compress repetitive blocks faster\n"
" --repetitive-best compress repetitive blocks better\n"
"\n"
" If invoked as `bzip2', default action is to compress.\n"
" as `bunzip2', default action is to decompress.\n"
" as `bz2cat', default action is to decompress to stdout.\n"
"\n"
" If no file names are given, bzip2 compresses or decompresses\n"
" from standard input to standard output. You can combine\n"
" short flags, so `-v -4' means the same as -v4 or -4v, &c.\n"
#if BZ_UNIX
"\n"
#endif
,
);
}
/*---------------------------------------------*/
/*--
All the garbage from here to main() is purely to
implement a linked list of command-line arguments,
into which main() copies argv[1 .. argc-1].
The purpose of this ridiculous exercise is to
facilitate the expansion of wildcard characters
* and ? in filenames for halfwitted OSs like
MSDOS, Windows 95 and NT.
The actual Dirty Work is done by the platform-specific
macro APPEND_FILESPEC.
--*/
typedef
struct zzzz {
}
Cell;
/*---------------------------------------------*/
{
void* p;
if (p == NULL) outOfMemory ();
return p;
}
/*---------------------------------------------*/
{
Cell *c;
return c;
}
/*---------------------------------------------*/
{
return tmp;
} else {
return root;
}
}
/*---------------------------------------------*/
{
Int32 i, j;
/*-- Be really really really paranoid :-) --*/
"bzip2: I'm not configured correctly for this platform!\n"
"\tI require Int32, Int16 and Char to have sizes\n"
"\tof 4, 2 and 1 bytes to run properly, and they don't.\n"
"\tProbably you can fix this by defining them correctly,\n"
"\tand recompiling. Bye!\n" );
exit(3);
}
/*-- Set up signal handlers --*/
#if BZ_UNIX
#endif
/*-- Initialise --*/
verbosity = 0;
blockSize100k = 9;
numFileNames = 0;
numFilesProcessed = 0;
workFactor = 30;
progName = &progNameReally[0];
/*-- Expand filename wildcards in arg list --*/
/*-- Find the length of the longest filename --*/
longestFileName = 7;
numFileNames = 0;
numFileNames++;
}
/*-- Determine source modes; flag handling may change this too. --*/
if (numFileNames == 0)
/*-- Determine what to do (compress/uncompress/test/cat). --*/
/*-- Note that subsequent flag handling may change this. --*/
for (;;)
{
switch (*tmp++)
{
case 0:
break;
case 'u':
case 'U':
{
break;
}
continue;
case 'z':
case 'Z':
{
break;
}
continue;
default:
continue;
}
break;
}
/*-- Look at the flags. --*/
case 'V':
case 'L': license(); break;
case 'v': verbosity++; break;
exit ( 1 );
break;
exit ( 1 );
break;
}
/*-- And again ... --*/
else
exit ( 1 );
}
}
progName );
exit ( 1 );
}
progName );
exit ( 1 );
}
else
}
} else
uncompress ( NULL );
else
}
} else {
else
}
if (testFailsExist) {
"\n"
"You can use the `bzip2recover' program to *attempt* to recover\n"
"data from undamaged sections of corrupted files.\n\n"
);
exit(2);
}
}
return 0;
}
/*-----------------------------------------------------------*/
/*--- end bzip2.c ---*/
/*-----------------------------------------------------------*/