bdiff.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <fatal.h>
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <wait.h>
#define ONSIG 16
/*
* This program segments two files into pieces of <= seglim lines
* (which is passed as a third argument or defaulted to some number)
* and then executes diff upon the pieces. The output of
* 'diff' is then processed to make it look as if 'diff' had
* processed the files whole. The reason for all this is that seglim
* is a reasonable upper limit on the size of files that diff can
* process.
* NOTE -- by segmenting the files in this manner, it cannot be
* guaranteed that the 'diffing' of the segments will generate
* a minimal set of differences.
* This process is most definitely not equivalent to 'diffing'
* the files whole, assuming 'diff' could handle such large files.
*
* 'diff' is executed by a child process, generated by forking,
* and communicates with this program through pipes.
*/
static char Error[128];
static int seglim; /* limit of size of file segment to be generated */
static char diff[] = "/usr/bin/diff";
static char tempskel[] = "/tmp/bdXXXXXX"; /* used to generate temp file names */
static char tempfile[32];
static char otmp[32], ntmp[32];
static int fflags;
static int fatal_num = 1; /* exit number for fatal exit */
static offset_t linenum;
static size_t obufsiz, nbufsiz, dbufsiz;
static char *readline(char **, size_t *, FILE *);
static void addgen(char **, size_t *, FILE *);
static void delgen(char **, size_t *, FILE *);
static void fixnum(char *);
static void fatal(char *);
static void setsig(void);
static void setsig1(int);
static char *satoi(char *, offset_t *);
static FILE *maket(char *);
static char *prognam;
int
main(int argc, char *argv[])
{
FILE *poldfile, *pnewfile;
char *oline, *nline, *diffline;
char *olp, *nlp, *dp;
int otcnt, ntcnt;
pid_t i;
int pfd[2];
FILE *poldtemp, *pnewtemp, *pipeinp;
int status;
prognam = argv[0];
/*
* Set flags for 'fatal' so that it will clean up,
* produce a message, and terminate.
*/
fflags = FTLMSG | FTLCLN | FTLEXIT;
setsig();
if (argc < 3 || argc > 5)
fatal("arg count");
if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0)
fatal("both files standard input");
if (strcmp(argv[1], "-") == 0)
poldfile = stdin;
else
if ((poldfile = fopen(argv[1], "r")) == NULL) {
(void) snprintf(Error, sizeof (Error),
"Can not open '%s'", argv[1]);
fatal(Error);
}
if (strcmp(argv[2], "-") == 0)
pnewfile = stdin;
else
if ((pnewfile = fopen(argv[2], "r")) == NULL) {
(void) snprintf(Error, sizeof (Error),
"Can not open '%s'", argv[2]);
fatal(Error);
}
seglim = 3500;
if (argc > 3) {
if (argv[3][0] == '-' && argv[3][1] == 's')
fflags &= ~FTLMSG;
else {
if ((seglim = atoi(argv[3])) == 0)
fatal("non-numeric limit");
if (argc == 5 && argv[4][0] == '-' &&
argv[4][1] == 's')
fflags &= ~FTLMSG;
}
}
linenum = 0;
/* Allocate the buffers and initialize their lengths */
obufsiz = BUFSIZ;
nbufsiz = BUFSIZ;
dbufsiz = BUFSIZ;
if ((oline = (char *)malloc(obufsiz)) == NULL ||
(nline = (char *)malloc(nbufsiz)) == NULL ||
(diffline = (char *)malloc(dbufsiz)) == NULL)
fatal("Out of memory");
/*
* The following while-loop will prevent any lines
* common to the beginning of both files from being
* sent to 'diff'. Since the running time of 'diff' is
* non-linear, this will help improve performance.
* If, during this process, both files reach EOF, then
* the files are equal and the program will terminate.
* If either file reaches EOF before the other, the
* program will generate the appropriate 'diff' output
* itself, since this can be easily determined and will
* avoid executing 'diff' completely.
*/
for (;;) {
olp = readline(&oline, &obufsiz, poldfile);
nlp = readline(&nline, &nbufsiz, pnewfile);
if (!olp && !nlp) /* EOF found on both: files equal */
return (0);
if (!olp) {
/*
* The entire old file is a prefix of the
* new file. Generate the appropriate "append"
* 'diff'-like output, which is of the form:
* nan, n
* where 'n' represents a line-number.
*/
addgen(&nline, &nbufsiz, pnewfile);
}
if (!nlp) {
/*
* The entire new file is a prefix of the
* old file. Generate the appropriate "delete"
* 'diff'-like output, which is of the form:
* n, ndn
* where 'n' represents a line-number.
*/
delgen(&oline, &obufsiz, poldfile);
}
if (strcmp(olp, nlp) == 0)
linenum++;
else
break;
}
/*
* Here, first 'linenum' lines are equal.
* The following while-loop segments both files into
* seglim segments, forks and executes 'diff' on the
* segments, and processes the resulting output of
* 'diff', which is read from a pipe.
*/
for (;;) {
/* If both files are at EOF, everything is done. */
if (!olp && !nlp) /* finished */
return (0);
if (!olp) {
/*
* Generate appropriate "append"
* output without executing 'diff'.
*/
addgen(&nline, &nbufsiz, pnewfile);
}
if (!nlp) {
/*
* Generate appropriate "delete"
* output without executing 'diff'.
*/
delgen(&oline, &obufsiz, poldfile);
}
/*
* Create a temporary file to hold a segment
* from the old file, and write it.
*/
poldtemp = maket(otmp);
otcnt = 0;
while (olp && otcnt < seglim) {
(void) fputs(oline, poldtemp);
if (ferror(poldtemp) != 0) {
fflags |= FTLMSG;
fatal("Can not write to temporary file");
}
olp = readline(&oline, &obufsiz, poldfile);
otcnt++;
}
(void) fclose(poldtemp);
/*
* Create a temporary file to hold a segment
* from the new file, and write it.
*/
pnewtemp = maket(ntmp);
ntcnt = 0;
while (nlp && ntcnt < seglim) {
(void) fputs(nline, pnewtemp);
if (ferror(pnewtemp) != 0) {
fflags |= FTLMSG;
fatal("Can not write to temporary file");
}
nlp = readline(&nline, &nbufsiz, pnewfile);
ntcnt++;
}
(void) fclose(pnewtemp);
/* Create pipes and fork. */
if ((pipe(pfd)) == -1)
fatal("Can not create pipe");
if ((i = fork()) < (pid_t)0) {
(void) close(pfd[0]);
(void) close(pfd[1]);
fatal("Can not fork, try again");
} else if (i == (pid_t)0) { /* child process */
(void) close(pfd[0]);
(void) close(1);
(void) dup(pfd[1]);
(void) close(pfd[1]);
/* Execute 'diff' on the segment files. */
(void) execlp(diff, diff, otmp, ntmp, 0);
/*
* Exit code here must be > 1.
* Parent process treats exit code of 1 from the child
* as non-error because the child process "diff" exits
* with a status of 1 when a difference is encountered.
* The error here is a true error--the parent process
* needs to detect it and exit with a non-zero status.
*/
(void) close(1);
(void) snprintf(Error, sizeof (Error),
"Can not execute '%s'", diff);
fatal_num = 2;
fatal(Error);
} else { /* parent process */
(void) close(pfd[1]);
pipeinp = fdopen(pfd[0], "r");
/* Process 'diff' output. */
while ((dp = readline(&diffline, &dbufsiz, pipeinp))) {
if (isdigit(*dp))
fixnum(diffline);
else
(void) printf("%s", diffline);
}
(void) fclose(pipeinp);
/* EOF on pipe. */
(void) wait(&status);
if (status&~0x100) {
(void) snprintf(Error, sizeof (Error),
"'%s' failed", diff);
fatal(Error);
}
}
linenum += seglim;
/* Remove temporary files. */
(void) unlink(otmp);
(void) unlink(ntmp);
}
}
/* Routine to save remainder of a file. */
static void
saverest(char **linep, size_t *bufsizp, FILE *iptr)
{
char *lp;
FILE *temptr;
temptr = maket(tempfile);
lp = *linep;
while (lp) {
(void) fputs(*linep, temptr);
linenum++;
lp = readline(linep, bufsizp, iptr);
}
(void) fclose(temptr);
}
/* Routine to write out data saved by 'saverest' and to remove the file. */
static void
putsave(char **linep, size_t *bufsizp, char type)
{
FILE *temptr;
if ((temptr = fopen(tempfile, "r")) == NULL) {
(void) snprintf(Error, sizeof (Error),
"Can not open tempfile ('%s')", tempfile); fatal(Error);
}
while (readline(linep, bufsizp, temptr))
(void) printf("%c %s", type, *linep);
(void) fclose(temptr);
(void) unlink(tempfile);
}
static void
fixnum(char *lp)
{
offset_t num;
while (*lp) {
switch (*lp) {
case 'a':
case 'c':
case 'd':
case ',':
case '\n':
(void) printf("%c", *lp);
lp++;
break;
default:
lp = satoi(lp, &num);
num += linenum;
(void) printf("%lld", num);
}
}
}
static void
addgen(char **lpp, size_t *bufsizp, FILE *fp)
{
offset_t oldline;
(void) printf("%llda%lld", linenum, linenum+1);
/* Save lines of new file. */
oldline = linenum + 1;
saverest(lpp, bufsizp, fp);
if (oldline < linenum)
(void) printf(",%lld\n", linenum);
else
(void) printf("\n");
/* Output saved lines, as 'diff' would. */
putsave(lpp, bufsizp, '>');
exit(0);
}
static void
delgen(char **lpp, size_t *bufsizp, FILE *fp)
{
offset_t savenum;
(void) printf("%lld", linenum+1);
savenum = linenum;
/* Save lines of old file. */
saverest(lpp, bufsizp, fp);
if (savenum +1 != linenum)
(void) printf(",%lldd%lld\n", linenum, savenum);
else
(void) printf("d%lld\n", savenum);
/* Output saved lines, as 'diff' would. */
putsave(lpp, bufsizp, '<');
exit(0);
}
static void
clean_up()
{
(void) unlink(tempfile);
(void) unlink(otmp);
(void) unlink(ntmp);
}
static FILE *
maket(char *file)
{
FILE *iop;
int fd;
(void) strcpy(file, tempskel);
if ((fd = mkstemp(file)) == -1 ||
(iop = fdopen(fd, "w+")) == NULL) {
(void) snprintf(Error, sizeof (Error),
"Can not open/create temp file ('%s')", file);
fatal(Error);
}
return (iop);
}
static void
fatal(char *msg)
/*
* General purpose error handler.
*
* The argument to fatal is a pointer to an error message string.
* The action of this routine is driven completely from
* the "fflags" global word (see <fatal.h>).
*
* The FTLMSG bit controls the writing of the error
* message on file descriptor 2. A newline is written
* after the user supplied message.
*
* If the FTLCLN bit is on, clean_up is called.
*/
{
if (fflags & FTLMSG)
(void) fprintf(stderr, "%s: %s\n", prognam, msg);
if (fflags & FTLCLN)
clean_up();
if (fflags & FTLEXIT)
exit(fatal_num);
}
static void
setsig()
/*
* General-purpose signal setting routine.
* All non-ignored, non-caught signals are caught.
* If a signal other than hangup, interrupt, or quit is caught,
* a "user-oriented" message is printed on file descriptor 2.
* If hangup, interrupt or quit is caught, that signal
* is set to ignore.
* Termination is like that of "fatal",
* via "clean_up()"
*/
{
void (*act)(int);
int j;
for (j = 1; j < ONSIG; j++) {
act = signal(j, setsig1);
if (act == SIG_ERR)
continue;
if (act == SIG_DFL)
continue;
(void) signal(j, act);
}
}
static void
setsig1(int sig)
{
(void) signal(sig, SIG_IGN);
clean_up();
exit(1);
}
static char *
satoi(char *p, offset_t *ip)
{
offset_t sum;
sum = 0;
while (isdigit(*p))
sum = sum * 10 + (*p++ - '0');
*ip = sum;
return (p);
}
/*
* Read a line of data from a file. If the current buffer is not large enough
* to contain the line, double the size of the buffer and continue reading.
* Loop until either the entire line is read or until there is no more space
* to be malloc'd.
*/
static char *
readline(char **bufferp, size_t *bufsizp, FILE *filep)
{
char *bufp;
size_t newsize; /* number of bytes to make buffer */
size_t oldsize;
(*bufferp)[*bufsizp - 1] = '\t'; /* arbitrary non-zero character */
(*bufferp)[*bufsizp - 2] = ' '; /* arbitrary non-newline char */
bufp = fgets(*bufferp, *bufsizp, filep);
if (bufp == NULL)
return (bufp);
while ((*bufferp)[*bufsizp -1] == '\0' &&
(*bufferp)[*bufsizp - 2] != '\n' &&
strlen(*bufferp) == *bufsizp - 1) {
newsize = 2 * (*bufsizp);
bufp = (char *)realloc((void *)*bufferp, newsize);
if (bufp == NULL)
fatal("Out of memory");
oldsize = *bufsizp;
*bufsizp = newsize;
*bufferp = bufp;
(*bufferp)[*bufsizp - 1] = '\t';
(*bufferp)[*bufsizp - 2] = ' ';
bufp = fgets(*bufferp + oldsize -1, oldsize + 1, filep);
if (bufp == NULL) {
if (filep->_flag & _IOEOF) {
bufp = *bufferp;
break;
} else
fatal("Read error");
} else
bufp = *bufferp;
}
return (bufp);
}