/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved
*
* module:
* anal.c
*
* purpose:
* routines to analyze the file trees and figure out what has changed
* and queue files for reconciliation. It also contains tree enumeration
* routines to for other purposes (pruning and link location).
*
* contents:
*
* change analysis:
* analyze .... (top level) analyze all files in the tree for changes
* summary .... print out change/reconciliation statistics for each base
* check_file . (static) look for changes and queue file for reconciliation
* check_changes (static) figure out if a particular file has changed
* queue_file . (static) add a file to the reconciliation list
*
* other tree enumeration functions:
* prune_file . (static) recursive descent and actual pruning
* prune ...... (top level) initiate pruning analysis for nonexistant files
* find_link .. look for other files to which a file may be a link
* link_update. propagate changed stat info to all other links
* same_name .. (static) figure out if two nodes describe same file
*
* misc:
* push_name .. maintain a running full pathname as we descend
* pop_name ... maintain a running full pathname as we pop back
* get_name ... return full pathname for the current file
*
* notes:
* analysis is limited to files that were evaluated in the previous
* pass ... since we don't have complete information about files that
* were not evaluated in the previous pass.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include "messages.h"
#include "filesync.h"
#include "database.h"
#include "debug.h"
/*
* routines:
*/
void push_name(const char *);
void pop_name();
/*
* globals
*/
/*
* routine:
* analyze
*
* purpose:
* top level routine for the analysis/reconciliation process
*
* parameters:
* none
*
* returns:
* error mask
*
* notes:
* a critical side effect of this routine is the creation of
* the reconciliation list, an ordered list of files that
* needed to be processed in the subsequent reconciliation pass
*/
analyze()
int errs = 0;
int err;
int percentage;
/*
* run through all bases and directories looking for files
* that have been renamed. This must be done before the
* difference analysis because a directory rename can introduce
* radical restructuring into a name-based tree.
*/
}
/*
* run through all bases and files looking for candidates
* note, however that we only descend into trees that have
* the evaluate flag turned on. As a result of new rules or
* restriction arguments, we may be deliberatly ignoring
* large amounts of the baseline. This means we won't do
* any stats to update the information in those nodes, and
* they will be written back just as they were.
*
* note that there is code to prune out baseline nodes for
* files that no longer exist, but that code is in reconcile
* and will never get a chance to run on nodes that aren't
* analyzed.
*
* we also want to run though all nodes with STAT errors
* so that we can put them on the reconciliation list.
*/
}
/*
* my greatest fear is that someday, somehow, by messing with
* variables or baselines or who-knows-what, that someone will
* run a reconciliation against a large tree that doesn't correspond
* to the baseline, and I will infer that a bazillion files have
* been deleted and will propagate the slaughter before anyone
* can say somebody stop that maniac.
*
* in order to prevent such a possibility, we have a few different
* sanity checks. There is, of course, a tradeoff here between
* danger and irritation. The current set of heuristics for whether
* or not to generate a warning are (any of)
*
* at least CONFIRM_MIN files have been deleted AND
* CONFIRM_PCT of all files have been deleted
*
* the inode number on a LISTed directory has changed
*
* a non-empty directory has been deleted.
*/
msgbuf[0] = 0;
else if (inum_changes > 0)
else if (est_rmdirs)
if (msgbuf[0])
/*
* TRICK:
* the change list contains both files that have changed
* (and probably warrant reconciliation) and files that
* we couldn't get up-to-date stat information on. The
* latter files should just be flagged as being in conflict
* so they can be reported in the summary. The same is
* true of all subsequent files if we abort reconciliation.
*/
/* if it isn't in the baseline yet, don't add it */
errs |= ERR_UNRESOLVED;
if (opt_verbose)
: V_nostat),
fp->f_fullname);
} else {
}
}
return (errs);
}
/*
* routine:
* prune_file
*
* purpose:
* to look for file entries that should be pruned from baseline
* prune the current file if it needs pruning, and recursively
* descend if it is a directory.
*
* parameters:
* pointer to file node
*/
static int
int prunes = 0;
/* if node hasn't been evaluated, mark it for removal */
prunes++;
}
/* now check our children */
return (prunes);
}
/*
* routine:
* prune
*
* purpose:
* to prune the baseline of entries that no longer correspond to
* existing rules.
*
* notes:
* This routine just calls prune_file on the top of each base tree.
*/
int
prune()
int prunes = 0;
}
return (prunes);
}
/*
* routine:
* summary
*
* purpose:
* to print out statics and conflict lists
*/
void
summary()
extern bool_t need_super;
/* see if this base was irrelevant */
continue;
/* print out a summary for this base */
if (bp->b_unresolved)
bp->b_unresolved);
/* print out a list of unreconciled files for this base */
continue;
continue;
}
}
if (need_super)
}
/*
* routine:
* check_file
*
* purpose:
* figure out if a file requires reconciliation and recursively
* descend into all sub-files and directories
*
* parameters:
* base pointer
* file pointer
*
* returns:
* error mask
* built up changes needed list
* updated statistics
*
* notes:
* this routine builds up a path name as it descends through
* the tree (see push_name, pop_name, get_name).
*/
static errmask_t
int errs = 0;
/* see if the source has changed */
/* see if the destination has changed */
/* if nobody thinks the file exists, baseline needs pruning */
}
/* keep track of possible deletions to look for trouble */
est_deletes++;
/* see if file is (or has been) a non-empty directory */
est_rmdirs++;
}
}
/* if we found differences, queue the file for reconciliation */
queue_file(fp);
}
}
/* bump the total file count */
total_files++;
/* if this is not a directory, we're done */
return (errs);
/*
* If this is a directory, we need to recursively analyze
* our children, but only children who have been evaluated.
* If a node has not been evaluated, then we don't have
* updated stat information and there is nothing to analyze.
*
* we also want to run though all nodes with STAT errors
* so that we can put them on the reconciliation list.
* If a directory is unreadable on one side, all files
* under that directory (ON BOTH SIDES) must be marked as
* blocked by stat errors.
*/
}
pop_name();
return (errs);
}
/*
* routine:
* check_changes
*
* purpose:
* to figure out what has changed for a specific file
*
* parameters:
* file pointer
* the reference info
* the info to be checked for changes
*
* returns:
* diff mask
*
* notes:
* this routine doesn't pretend to understand what happened.
* it merely enumerates the ways in which the files differ.
*/
static diffmask_t
int mask = 0;
int type;
if (type == 0)
else
/*
*/
/*
* for directories, we don't look directly at
* the contents, so these fields don't mean
* anything. If the directories have changed
* in any interesting way, we'll find it by
* walking the tree.
*/
}
return (mask);
}
/*
* routine:
* same_name
*
* purpose:
* to figure out whether or not two databsae nodes actually refer to
* the same file.
*
* parameters:
* pointers to two file description nodes
* which side we should check
*
* returns:
*
* notes:
* if a single directory is specified in multiple base pairs, it
* is possible to have multiple nodes in the database describing
* the same file. This routine is supposed to detect those cases.
*
* what should be a trivial string comparison is complicated by
* the possibility that the two nodes might describe the same file
* from base directories at different depths. Thus, rather than
* comparing two strings, we really want to compare the concatenation
* of two pairs of strings. Unfortunately calling full_name would
* be awkward right now, so instead we have our own comparison
* routine that automatically skips from the first string to
* the second.
*/
static bool_t
{
} else {
}
/*
* Compare the two names, and if they differ before they end
* this is a non-match. If they both end at the same time,
* this is a match.
*
* The trick here is that each string is actually the logical
* concatenation of two strings, and we need to automatically
* wrap from the first to the second string in each pair. There
* is no requirement that the two (concatenated) strings be
* broken at the same point, so we have a slightly baroque
* comparsion loop.
*/
/*
* strings have been identical so far, so advance the
* pointers and continue the comparison. The trick
* is that when either string ends, we have to wrap
* over to its extension.
*/
continue;
/*
* at least one of the strings has ended.
* there is an implicit slash between the string
* and its extension, and this has to be matched
* against the other string.
*/
s2++;
s1++;
else
/* the disagreement doesn't come at a slash */
break;
}
/*
* if either string has ended, wrap to its extension
*/
x1 = 0;
}
x2 = 0;
}
}
}
/*
* routine:
* find_link
*
* purpose:
* to figure out if there is a file to which we should
* be creating a link (rather than making a copy)
*
* parameters:
* file node for the file to be created (that we hope is merely a link)
*
* return:
* 0 no link is appropriate
* else pointer to file node for link referent
*
* notes:
* there are a few strange heuristics in this routine and I
* wouldn't bet my soul that I got all of them right. The general
* theory is that when a new file is created, we look to see if it
* is a link to another file on the changed side, and if it is, we
* find the corresponding file on the unchanged side.
*
* cases we want to be able to handle:
* 1. one or more links are created to a prexisting file
* 2. a preexisting only link is renamed
* 3. a rename of one of multiple links to a preexisting file
* 4. a single file is created with multiple links
*/
struct file *
/* chg = side on which the change was noticed */
/* tgt = side to which the change is to be propagated */
/*
* cases 1 and 3
*
* When a new link is created, we should be able to find
* another file in the changed hierarchy that has the same
* I-node number. We expect it to be on the changed list
* because the link count will have gone up or because all
* of the copies are new. If we find one, then the new file
* on the receiving file should be a link to the corresponding
* existing file.
*
* case 4
*
* the first link will be dealt with as a copy, but all
* subsequent links should find an existing file analogous
* to one of the links on the changed side, and create
* corresponding links on the other side.
*
* in each of these cases, there should be multiple links
* on the changed side. If the linkcount on the changed
* side is one, we needn't bother searching for other links.
*/
/* finding the same node doesn't count */
continue;
/*
* if the file doesn't already exist on the target side
* we cannot make a link to it
*/
continue;
/*
* if this is indeed a link, then the prospective file on
* we are looking for
*/
continue;
continue;
continue;
/*
* if the target side is already a link to this file,
* then there is no new link to be created
* FIX: how does this interact with copies over links
*/
continue;
/*
* there is a pathological situation where a single file
* might appear under multiple base directories. This is
* damned awkward to detect in any other way, so we must
* check to see if we have just found another database
* instance for the same file (on the changed side).
*/
continue;
return (lp);
}
/*
* case 2: a simple rename of the only link
*
* In this case, there may not be any other existing file on
* the changed side that has the same I-node number. There
* might, however, be a record of such a file in the baseline.
* If we can find an identical file with a different name that
* has recently disappeared, we have a likely rename.
*/
/* finding the same node doesn't count */
continue;
/*
* if the file still exists on the changed side this is
* not a simple rename, and in fact the previous pass
* would have found it.
*/
continue;
/*
* the inode number for the new link on the changed
* side must match the inode number for the old link
* from the baseline.
*/
continue;
continue;
continue;
/* finding a file we are already linked to doesn't help */
continue;
/*
* there is a danger that we will confuse an
* inode reallocation with a rename. We should
* only consider this to be a rename if the
* new file is identical to the old one
*/
continue;
continue;
continue;
continue;
continue;
return (lp);
}
return (0);
}
/*
* routine:
* has_other_links
*
* purpose:
* to determine whether or not there is more that one link to a
* particular file. We are willing to delete a link to a file
* that has changed if we will still have other links to it.
* The trick here is that we only care about links under our
* dominion.
*
* parameters:
* file pointer to node we are interested in
* which side we are looking to additional links on
*
* returns:
* TRUE if there are multiple links
* FALSE if this is the only one we know of
*/
/* if the link count is one, there couldn't be others */
return (FALSE);
/* look for any other files for the same inode */
/* finding the same node doesn't count */
continue;
/*
* file must still exist on this side
*/
continue;
/*
* if this is indeed a link, then the prospective file on
* we are looking for
*/
continue;
continue;
continue;
/*
* we have found at least one other link
*/
return (TRUE);
}
return (FALSE);
}
/*
* routine:
* link_update
*
* purpose:
* to propoagate a stat change to all other file nodes that
* correspond to the same I-node on the changed side
*
* parameters:
* file pointer for the updated file
* which side was changed
*
* returns:
* void
*
* notes:
* if we have copied onto a file, we have copied onto all
* of its links, but since we do all stats before we do any
* copies, the stat information recently collected for links
* is no longer up-to-date, and this would result in incorrect
* reconciliation (redundant copies).
*
* There is an assumption here that all links to a changed
* file will be in the change list. This is true for almost
* all cases not involving restriction. If we do fail to
* update the baseline for a file that was off the change list,
* the worst that is likely to happen is that we will think
* it changed later (but will almost surely find that both
* copies agree).
*/
void
/* finding the current entry doesn't count */
continue;
/* look for same i#, maj, min on changed side */
continue;
continue;
continue;
/*
* this appears to be another link to the same file
* so the updated stat information for one must be
* correct for the other.
*/
"STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n",
}
}
/*
* routine:
* queue_file
*
* purpose:
* append a file to the list of needed reconciliations
*
* parameters:
* pointer to file
*
* notes:
* when a request is appended to the reconciliation list,
* we fill in the full name. We delayed this in hopes that
* it wouldn't be necessary (saving cycles and memory)
*
* There is some funny business with modification times.
* In general, we queue files in order of the latest modification
* time so that propagations preserve relative ordering. There
* are, however, a few important exceptions:
* 1. all directory creations happen at time zero,
* so that they are created before any files can
* be added to them.
* 2. all directory deletions happen at time infinity-depth,
* so that everything else can be removed before the
* directories themselves are removed.
* 3. all file deletions happen at time infinity-depth
* so that (in renames) the links will preceed the unlinks.
*/
static void
/*
* figure out the modification time for sequencing purposes
*/
/*
* deletions are performed last, and depth first
*/
/*
* for most files we use the latest mod time
*/
}
} else {
/*
* new directory creations need to happen before anything
* else and are automatically sequenced in traversal order
*/
}
/*
* insertion is time ordered, and for equal times,
* insertions is in (pre-order) traversal order
*/
continue;
break;
break;
}
}
/*
* routines:
*
* purpose:
* maintain a name stack so we can form name of a particular file
* as the concatenation of all of the names between it and the
* (know to be fully qualified) base directory.
*
* notes:
* we go to this trouble because most files never change and
* so we don't need to associate full names with every one.
* This stack is maintained during analysis, and if we decide
* to add a file to the reconciliation list, we can use the
* stack to generate a fully qualified name at that time.
*
* we compress out '/./' when we return a name. Given that the
* stack was built by a tree walk, the only place a /./ should
* appear is at the first level after the base ... but there
* are legitimate ways for them to appear there.
*
* these names can get deep, so we dynamically size our name buffer
*/
static int namedepth = 0;
static int namelen = 0;
void
{
/* make sure we don't overflow our name stack */
}
}
void
pop_name(void)
{
#ifdef DBG_ERRORS
/* just a little sanity check here */
if (namedepth <= 0) {
if (namedepth < 0) {
} else if (namelen != 0) {
}
}
#endif
}
char
{ int i;
static char *namebuf = 0;
static int buflen = 0;
/* make sure we have an adequate buffer */
if (buflen < i) {
}
/* assemble the name */
namebuf[0] = 0;
for (i = 0; i < namedepth; i++) {
}
}
return (namebuf);
}