/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* diff - differential file comparison
*
* Uses an algorithm which finds
* a pair of longest identical subsequences in the two
* files.
*
* The major goal is to generate the match vector J.
* J[i] is the index of the line in file1 corresponding
* to line i file0. J[i] = 0 if there is no
* such line in file1.
*
* Lines are hashed so as to work in core. All potential
* matches are located by sorting the lines of each file
* on the hash (called value). In particular, this
* collects the equivalence classes in file1 together.
* Subroutine equiv replaces the value of each line in
* file0 by the index of the first element of its
* matching equivalence in (the reordered) file1.
* To save space equiv squeezes file1 into a single
* array member in which the equivalence classes
* are simply concatenated, except that their first
* members are flagged by changing sign.
*
* Next the indices that point into member are unsorted into
* array class according to the original order of file0.
*
* The cleverness lies in routine stone. This marches
* through the lines of file0, developing a vector klist
* of "k-candidates". At step i a k-candidate is a matched
* pair of lines x,y (x in file0 y in file1) such that
* there is a common subsequence of lenght k
* between the first i lines of file0 and the first y
* lines of file1, but there is no such subsequence for
* any smaller y. x is the earliest possible mate to y
* that occurs in such a subsequence.
*
* Whenever any of the members of the equivalence class of
* lines in file1 matable to a line in file0 has serial number
* less than the y of some k-candidate, that k-candidate
* with the smallest such y is replaced. The new
* k-candidate is chained (via pred) to the current
* k-1 candidate so that the actual subsequence can
* be recovered. When a member has serial number greater
* that the y of all k-candidates, the klist is extended.
* At the end, the longest subsequence is pulled out
* and placed in the array J by unravel.
*
* With J in hand, the matches there recorded are
* checked against reality to assure that no spurious
* matches have crept in due to hashing. If they have,
* they are broken, and "jackpot " is recorded--a harmless
* matter except that a true match for a spuriously
* mated line may now be unnecessarily reported as a change.
*
* Much of the complexity of the program comes simply
* from trying to minimize core utilization and
* maximize the range of doable problems by dynamically
* allocating what is needed and reusing what is not.
* The core requirements for problems larger than somewhat
* are (in words) 2*length(file0) + length(file1) +
* 3*(number of k-candidates installed), typically about
* 6n words for files of length n.
*/
#include <stdio.h>
#include <wchar.h>
#include <ctype.h>
#include <stdlib.h>
#include <limits.h>
#include <unistd.h>
#include <signal.h>
#include <fcntl.h>
#include <dirent.h>
#include <locale.h>
#include <stdarg.h>
#include <errno.h>
#include <string.h>
#include "diff.h"
#define max(a, b) ((a) < (b) ? (b) : (a))
#define min(a, b) ((a) > (b) ? (b) : (a))
int clen = 0;
int *J; /* will be overlaid on class */
static int mbcurmax;
static void error(const char *);
static void unravel(int);
static void check(void);
static void output(void);
static void change(int, int, int, int);
static void range(int, int, char *);
static void fetch(long *, int, int, int, char *, int);
static void dump_context_vec(void);
static void diffdir(char **);
static void setfile(char **, char **, char *);
char *, char *, char *);
static void prepare(int, char *);
static void prune(void);
static void done(void);
static void noroom(void);
static void usage(void);
static void resetbuf(int);
static int stone(int *, int, int *, int *);
static int newcand(int, int, int);
static int search(int *, int, int);
static int skipline(int);
static int calldiff(char *);
static int binary(int);
static int filebinary(FILE *);
static int isbinary(char *, int);
static int useless(char *);
static char *copytemp(char *);
static wint_t getbufwchar(int, int *);
static long ftellbuf(int);
/*
* error message string constants
*/
static void *
{
void *p;
p = malloc(n);
if (p == NULL)
noroom();
return (p);
}
static void *
{
void *q;
#if 0
free(p);
#endif
q = realloc(p, n);
if (q == NULL)
noroom();
return (q);
}
int
{
int k;
char *argp;
int i, j;
#if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
#endif
(void) textdomain(TEXT_DOMAIN);
whichtemp = 0;
switch (flag) {
case 'D':
wantelses = 1;
ifdef1 = "";
break;
case 'b':
bflag = 1;
break;
case 'C':
case 'U':
context = 0;
if (*argp)
if (flag == 'U')
uflag++;
else
uflag = 0;
break;
case 'c':
case 'u':
context = 3;
if (flag == 'u')
uflag++;
else
uflag = 0;
break;
case 'e':
break;
case 'f':
break;
case 'h':
hflag++;
break;
case 'i':
iflag = 1;
break;
case 'l':
lflag = 1;
break;
case 'n':
opt = D_NREVERSE;
break;
case 'r':
rflag = 1;
break;
case 'S':
break;
case 's':
sflag = 1;
break;
case 't':
tflag = 1;
break;
case 'w':
wflag = 1;
break;
case '?':
usage();
break;
default:
/* Not sure how it would get here, but just in case */
usage();
}
}
uflag = 0;
if (argc != 2)
if (hflag) {
if (opt) {
gettext("-h doesn't support -e, -f, -n, -c, or -I"));
} else {
diffargv[0] = "diffh";
status = 2;
done();
}
}
else {
perror("stdin");
done();
}
done();
}
else {
else {
perror("stdin");
done();
}
}
done();
}
done();
}
status = 2;
done();
}
status = 2;
done();
}
goto notsame;
for (;;) {
status = 2;
done();
}
if (i != j)
goto notsame;
if (i == 0 && j == 0) {
/* files are the same; diff -D needs to print one */
}
status = 0;
goto same; /* files don't differ */
}
for (j = 0; j < i; j++)
goto notsame;
}
status = 1;
status = 2;
done();
}
done();
}
prune();
check();
output();
same:
done();
/*NOTREACHED*/
return (0);
}
static int
stone(int *a, int n, int *b, int *c)
{
int i, k, y;
int j, l;
int oldl;
k = 0;
c[0] = newcand(0, 0, 0);
for (i = 1; i <= n; i++) {
j = a[i];
if (j == 0)
continue;
y = -b[j];
oldl = 0;
oldc = c[0];
do {
continue;
l = search(c, k, y);
if (l != oldl+1)
oldc = c[l-1];
if (l <= k) {
if (clist[c[l]].y <= y)
continue;
tc = c[l];
oldl = l;
} else {
k++;
break;
}
} while ((y = b[++j]) > 0);
}
return (k);
}
static int
{
struct cand *q;
q->x = x;
q->y = y;
return (clen - 1);
}
static int
search(int *c, int k, int y)
{
int i, j, l;
int t;
if (clist[c[k]].y < y) /* quick look for typical case */
return (k + 1);
i = 0;
j = k+1;
while ((l = (i + j) / 2) > i) {
t = clist[c[l]].y;
if (t > y)
j = l;
else if (t < y)
i = l;
else
return (l);
}
return (l + 1);
}
static void
unravel(int p)
{
int i;
struct cand *q;
for (i = 0; i <= len[0]; i++)
J[i] = i <= pref ? i :
0;
}
/*
* check does double duty:
* 1. ferret out any fortuitous correspondences due to confounding by
* hashing (which result in "jackpot")
* 2. collect random access indexes to the two files
*/
static void
check(void)
{
wint_t c, d;
int i, j;
/* int jackpot; */
int mlen;
resetbuf(0);
resetbuf(1);
j = 1;
/* jackpot = 0; */
/*
* ctold and ctnew are byte positions within the file (suitable for
* lseek()). After we get a character with getwc(), instead of
* just incrementing the byte position by 1, we have to determine
* how many bytes the character actually is. This is the reason for
* the wctomb() calls here and in skipline().
*/
for (i = 1; i <= len[0]; i++) {
if (J[i] == 0) {
continue;
}
while (j < J[i]) {
j++;
}
for (;;) {
c = getbufwchar(0, &mlen);
while (iswspace(c)) {
if (c == '\n' || c == WEOF)
break;
c = getbufwchar(0, &mlen);
}
while (iswspace(d)) {
if (d == '\n' || d == WEOF)
break;
}
} else if (wflag) {
while (iswspace(c) && c != '\n') {
c = getbufwchar(0, &mlen);
}
while (iswspace(d) && d != '\n') {
}
}
if (c != d) {
/* jackpot++; */
J[i] = 0;
if (c != '\n' && c != WEOF)
if (d != '\n' && d != WEOF)
break;
}
break;
} else {
/* jackpot++; */
J[i] = 0;
if (c != '\n')
if (d != '\n')
break;
}
if (c == '\n')
break;
}
}
} else {
for (;;) {
c = getbufwchar(0, &mlen);
if (c != d) {
/* jackpot++; */
J[i] = 0;
if (c != '\n' && c != WEOF)
if (d != '\n' && d != WEOF)
break;
}
if (c == '\n' || c == WEOF)
break;
}
}
j++;
}
for (; j <= len[1]; j++) {
}
/* if(jackpot) */
/* fprintf(stderr, "diff: jackpot\n"); */
}
static int
skipline(int f)
{
int i;
wint_t c;
int mlen;
if (c == '\n' || c == WEOF)
return (i);
i += mlen;
}
return (i);
}
static void
output(void)
{
int m;
int j0;
int mlen;
resetbuf(0);
resetbuf(1);
m = len[0];
J[0] = 0;
i0++;
i1++;
i0--;
i1--;
}
if (m == 0)
for (;;) {
return;
}
}
}
/*
* indicate that there is a difference between lines a and b of the from file
* to get to lines c to d of the to file.
* If a is greater then b then there are no lines in the from file involved
* and this means that there were lines appended (beginning at b).
* If c is greater than d then there are lines missing from the to file.
*/
static void
change(int a, int b, int c, int d)
{
char *dcmsg;
return;
if (anychange == 0) {
anychange = 1;
/*
* TRANSLATION_NOTE_FOR_DC
* This message is the format of file
* timestamps written with the -C and
* -c options.
* %a -- locale's abbreviated weekday name
* %b -- locale's abbreviated month name
* %e -- day of month [1,31]
* %T -- Time as %H:%M:%S
* %Y -- Year, including the century
*/
if (uflag)
time_buf);
else
time_buf);
if (uflag)
time_buf);
else
time_buf);
context_vec_start = (struct context_vec *)
sizeof (struct context_vec));
if (context_vec_start == NULL)
}
}
/*
* if this new change is within 'context' lines of
* the previous change, just add it to the change
* record. If the record is full or if this
* change is more than 'context' lines from the previous
* change, dump the record, reset it & add the new change.
*/
if (context_vec_ptr >= context_vec_end ||
(context_vec_ptr >= context_vec_start &&
context_vec_ptr->a = a;
context_vec_ptr->b = b;
context_vec_ptr->c = c;
context_vec_ptr->d = d;
return;
}
switch (opt) {
case D_NORMAL:
case D_EDIT:
range(a, b, ",");
(void) printf("\n");
break;
case D_REVERSE:
range(a, b, " ");
(void) printf("\n");
break;
case D_NREVERSE:
if (a > b)
else {
if (!(c > d))
/* add changed lines */
}
break;
}
(void) prints("---\n");
}
(void) prints(".\n");
if (inifdef) {
inifdef = 0;
}
}
static void
{
(void) printf("%d", a > b ? b : a);
if (a < b) {
}
}
static void
{
int i;
int col;
int nc;
int mlen = 0;
/*
* When doing #ifdef's, copy down to current line
* if this is the first file, so that stuff makes it to output.
*/
/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
(void) putchar('\n');
break;
} else {
}
}
}
if (a > b)
return;
if (inifdef)
else {
if (oneflag) {
/* There was only one ifdef given */
if (oldfile)
"#ifndef %s\n", endifname);
else
"#ifdef %s\n", endifname);
} else {
"#ifdef %s\n", endifname);
}
}
}
for (i = a; i <= b; i++) {
(void) prints(s);
col = 0;
do
(void) putchar(' ');
while (++col & 7);
else {
col++;
}
} else
break;
}
(void) putchar('\n');
}
}
/*
* hashing has the effect of
* arranging line in 7-bit bytes and then
* summing 1-s complement in 16-bit hunks
*/
static int
{
long sum;
unsigned int shift;
int space;
int t;
int mlen;
sum = 1;
space = 0;
if (iflag)
if (mbcurmax == 1) {
/* In this case, diff doesn't have to take */
/* care of multibyte characters. */
shift += 7) {
if (t == EOF) {
if (shift) {
break;
} else
return (0);
}
}
} else {
/* In this case, diff needs to take care of */
/* multibyte characters. */
for (shift = 0;
shift += 7) {
if (shift) {
break;
} else
return (0);
}
}
}
else
/* In this case, diff doesn't have to take care of */
/* multibyte characters. */
if (t == EOF) {
if (shift) {
break;
} else
return (0);
}
}
} else {
/* In this case, diff needs to take care of */
/* multibyte characters. */
for (shift = 0; ; ) {
space++;
continue;
} else {
switch (wt) {
case WEOF:
if (shift) {
break;
} else
return (0);
default:
shift += 7;
space = 0;
}
shift += 7;
continue;
case L'\n':
break;
}
}
break;
}
}
return (sum);
}
/* dump accumulated "context" diff changes */
static void
dump_context_vec(void)
{
int a, b, c, d;
char ch;
int do_output;
if (cvp > context_vec_ptr)
return;
if (uflag) {
(void) printf("@@ -%d,%d +%d,%d @@\n",
} else {
(void) printf("***************\n*** ");
(void) printf(" ****\n");
}
/*
* output changes to the "old" file. The first loop suppresses
* output if there were no changes to the "old" file (we'll see
* the "old" lines as context in the "new" list).
*/
if (uflag)
do_output = 1;
else
do_output++;
break;
}
if (do_output) {
while (cvp <= context_vec_ptr) {
if (a <= b && c <= d)
ch = 'c';
else
if (ch == 'a') {
/* The last argument should not affect */
/* the behavior of fetch() */
if (uflag)
} else if (ch == 'd') {
" ", 1);
} else {
/* The last argument should not affect */
/* the behavior of fetch() */
1);
if (uflag) {
} else
}
lowa = b + 1;
cvp++;
}
/* The last argument should not affect the behavior */
/* of fetch() */
}
if (uflag) {
return;
}
/* output changes to the "new" file */
(void) printf("--- ");
(void) printf(" ----\n");
do_output = 0;
do_output++;
break;
}
if (do_output) {
while (cvp <= context_vec_ptr) {
if (a <= b && c <= d)
ch = 'c';
else
if (ch == 'd')
/* The last argument should not affect */
/* the behavior of fetch() */
else {
/* The last argument should not affect */
/* the behavior of fetch() */
}
lowc = d + 1;
cvp++;
}
/* The last argument should not affect the behavior */
/* of fetch() */
}
}
/*
* diff - directory comparison
*/
int header;
static void
{
int i;
int cmp;
"warning: should not give -s or -l with -e\n"));
}
dirstatus = 0;
title[0] = 0;
continue; /* Skip -S and its argument */
}
}
;
d1++;
continue;
}
d2++;
continue;
}
cmp = 1;
cmp = -1;
else
if (cmp < 0) {
if (lflag)
d1++;
if (dirstatus == 0)
dirstatus = 1;
} else if (cmp == 0) {
d1++;
d2++;
} else {
if (lflag)
d2++;
if (dirstatus == 0)
dirstatus = 1;
}
}
if (lflag) {
gettext("Common identical files in %.*s and %.*s"),
gettext("Binary files which differ in %.*s and %.*s"),
gettext("Common subdirectories of %.*s and %.*s"),
}
if (rflag) {
(void) printf("\f");
continue;
}
}
}
static void
{
char *cp;
if (*fpp == 0) {
exit(1);
}
continue;
*cp++ = '/';
*cp = 0;
}
static void
{
int titled = 0;
continue;
if (titled == 0) {
if (header == 0)
header = 1;
else
(void) printf("\n");
(void) printf(":\n");
titled = 1;
}
}
}
static void
{
}
int entcmp();
static struct dir *
{
int nitems;
int size;
done();
}
nitems = 0;
if (dp == 0)
if (size > 0) {
}
if (dp == 0)
}
(int (*)(const void *, const void *))entcmp);
return (dp);
}
static int
{
}
static int
{
int i, j;
int result;
return (2);
}
return (2);
}
if (f1 < 0) {
return (2);
}
}
if (f2 < 0) {
return (2);
}
}
switch (fmt1) {
case S_IFDIR:
goto closem;
"Common subdirectories: %s and %s\n"),
goto closem;
case S_IFCHR:
case S_IFBLK:
goto same;
"Special files %s and %s differ\n"),
break;
case S_IFLNK:
"diff: cannot read link\n"));
return (2);
}
"diff: cannot read link\n"));
return (2);
}
if (i == j) {
goto same;
}
"Symbolic links %s and %s differ\n"),
break;
case S_IFIFO:
goto same;
"Named pipes %s and %s differ\n"),
break;
}
} else {
if (lflag)
/*
* TRANSLATION_NOTE
* The second and fourth parameters will take the gettext'ed string
* of one of the following:
* a directory
* a character special file
* a block special file
* a plain file
* a named pipe
* a socket
* a door
* an event port
* an unknown type
*/
(void) printf(
gettext("File %s is %s while file %s is %s\n"),
}
}
return (1);
}
goto notsame;
for (;;) {
if (i < 0 || j < 0) {
return (2);
}
if (i != j)
goto notsame;
if (i == 0 && j == 0)
goto same;
for (j = 0; j < i; j++)
goto notsame;
}
same:
if (sflag == 0)
goto closem;
if (lflag)
else
return (0);
if (lflag)
(void) printf(
gettext("Binary files %s and %s differ\n"),
return (1);
}
anychange = 1;
if (lflag) {
} else {
else
(void) printf("w\nq\n-*-END-*-\n");
}
return (result);
}
static int
{
if (wantpr) {
if (pid == 0) {
(void) close(0);
done();
}
}
if (pid == 0) {
if (wantpr) {
(void) close(1);
}
done();
}
if (wantpr) {
}
continue;
continue;
if ((diffstatus&0177) != 0)
return (2);
else
}
static char *
{
/*
* TRANSLATION_NOTE
* The following 9 messages will be used in the second and
* the fourth parameters of the message
* "File %s is %s while file %s is %s\n"
*/
switch (fmt) {
case S_IFDIR:
return (gettext("a directory"));
break;
case S_IFCHR:
return (gettext("a character special file"));
break;
case S_IFBLK:
return (gettext("a block special file"));
break;
case S_IFREG:
return (gettext("a plain file"));
break;
case S_IFIFO:
return (gettext("a named pipe"));
break;
case S_IFSOCK:
return (gettext("a socket"));
break;
case S_IFDOOR:
return (gettext("a door"));
break;
case S_IFPORT:
return (gettext("an event port"));
break;
default:
return (gettext("an unknown type"));
break;
}
}
static int
binary(int f)
{
int cnt;
if (cnt < 0)
return (1);
}
static int
{
int cnt;
if (ferror(f))
return (1);
}
/*
* We consider a "binary" file to be one that:
* contains a null character ("diff" doesn't handle them correctly, and
* neither do many other UNIX text-processing commands).
* Characters with their 8th bit set do NOT make a file binary; they may be
* legitimate text characters, or parts of same.
*/
static int
{
char *cp;
while (--cnt >= 0)
if (*cp++ == '\0')
return (1);
return (0);
}
/*
* THIS IS CRUDE.
*/
static int
{
if (cp[0] == '.') {
return (1); /* directory "." */
return (1); /* directory ".." */
}
return (1);
return (0);
}
void
{
struct line w;
int j, m;
int k;
for (j = 1, m = 0; j <= n; j *= 2)
m = 2 * j - 1;
for (m /= 2; m != 0; m /= 2) {
k = n - m;
for (j = 1; j <= k; j++) {
break; /* wraparound */
break;
}
}
}
}
static void
{
int *a;
int i;
a = (int *)talloc((l + 1) * sizeof (int));
for (i = 1; i <= l; i++)
for (i = 1; i <= l; i++)
b[i] = a[i];
free((char *)a);
}
static void
{
else
"no more memory - try again later\n"));
status = 2;
done();
}
;
"no more memory - try again later\n"));
status = 2;
done();
}
done();
}
done();
}
}
}
static char *
{
int i;
/*
* ... let's hope this goes away soon!
*/
done();
}
done();
}
done();
}
}
static void
{
struct line *p;
int j, h;
p[j].value = h;
}
len[i] = j;
file[i] = p;
}
static void
prune(void)
{
int i, j;
pref++)
;
suff++)
;
/* decremnt suff by 2 iff suff >= 2, ensure that suff is never < 0 */
if (suff >= 2)
suff -= 2;
for (j = 0; j < 2; j++) {
for (i = 0; i <= slen[j]; i++)
}
}
static void
{
int i, j;
i = j = 1;
while (i <= n && j <= m) {
a[i++].value = 0;
a[i++].value = j;
else
j++;
}
while (i <= n)
a[i++].value = 0;
b[m+1].value = 0; j = 0;
while (++j <= m) {
c[j] = -b[j].serial;
j++;
c[j] = b[j].serial;
}
}
c[j] = -1;
}
static void
done(void)
{
}
static void
noroom(void)
{
done();
}
static void
error(const char *s)
{
done();
}
static void
usage(void)
{
"usage: diff [-bitw] [-c | -e | -f | -h | -n | -u] file1 "
"file2\n"
" diff [-bitw] [-C number | -U number] file1 file2\n"
" diff [-bitw] [-D string] file1 file2\n"
" diff [-bitw] [-c | -e | -f | -h | -n | -u] [-l] [-r] "
"[-s] [-S name] directory1 directory2\n"));
status = 2;
done();
}
struct buff {
};
/*
* Initializes the buff structure for specified
* I/O stream. Also sets the specified offset
*/
static void
{
}
/*
* Reset a buff structure, and rewind the associated file.
*/
static void
{
}
/*
* Returns the current offset in the file
*/
static long
{
}
static wint_t
{
unsigned char *p;
int n;
if (n > 0) {
p = (unsigned char *)mbs;
while (n--) {
}
return (wc);
} else if (n < 0) {
return (wc & 0xff);
} else {
/* this should not happen */
return (WEOF);
}
}
/*
* Reads one wide-character from the file associated with filen.
* If multibyte locales, the input is buffered.
*
* Input: filen the file number (0 or 1)
* Output: *len number of bytes to make wide-character
* Return: wide-character
*/
static wint_t
{
if (mbcurmax == 1) {
/* If sigle byte locale, use getc() */
int ch;
*len = 1;
} else {
} else {
}
}
} else {
}
/* Not buffered */
status = 2;
done();
}
if (num == 0)
return (WEOF);
}
}
status = 2;
done();
}
}
}
if (clen <= 0) {
*len = 1;
} else {
}
}