dhcp_network.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2000 by Sun Microsystems, Inc.
* All rights reserved.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* This file contains public functions for managing DHCP network
* containers. For the semantics of these functions, please see the
* Enterprise DHCP Architecture Document.
*
* This module uses synchronization guarantees provided by dsvclockd(1M);
* please see $SRC/lib/libdhcpsvc/private/README.synch for details.
*
* Big Theory Statement for the SUNWbinfiles DHCP Network Module
* =============================================================
*
* 1. On-disk Structure
*
* Each container consists of two basic pieces on-disk: a header and an
* array of records. In order to provide fast client IP lookup, the array
* of records is directly indexed by client IP address (using a simple
* mapping function). In order to provide fast client id lookup, each
* in-use record is also on exactly one doubly-linked client id hash chain;
* the hash chains heads are contained in the header). For all other
* lookups, we can restrict our search to only the in-use records by merely
* walking all of the hash chains. Here's a crude illustration of what
* this looks like on-disk (note that hash chains 2 and 3 are empty):
*
* _______________________________________________
* | container info | hash chain heads (buckets) |
* header | | 1 | 2 | 3 | [ .... ] | N |
* | | | | | | | | |
* |__________________|_|________________________|_|
* | rec1 | rec2 | | rec3 | rec4 | |
* | | +---> | | |
* | unused | unused | hash1 | unused | |
* |___________|___________|________^|_|_________|_|
* | rec5 | rec6 | rec7 |v | rec8 | |
* | | | -> | |
* records | unused | hashN | hash1 <- hash1 | |
* |___________|________^|_|___________|_________|_|
* | : :: : : : |
* | : :: : [ more records... ] : |
* | : :: : : : |
* |___________:________::_:___________:_________:_|
* | recN-3 | recN-2 || | recN-1 | recN v |
* | | |+--> -> |
* | unused | unused +--- hashN <- hashN |
* |___________|___________|___________|___________|
*
* Note that the actual on-disk format is a bit more complicated than this
* due to robustness issues; see section 3 below for details.
*
* 2. Robustness Requirements
*
* This module has been designed to be as efficient as possible while still
* retaining the robustness minimally required for an enterprise-level
* environment. In particular, it is designed to handle the following
* failure situations:
*
* 1. An update operation (add, modify, delete) on a container is
* unable to complete due to an unexpected internal error at
* any point in the update code.
*
* 2. An update operation (add, modify, delete) on a container is
* unable to complete due to unexpected program termination while
* at any point in the update code.
*
* If either of these situations occur, the container in question must be
* left in a consistent (and viable) state. In addition, only the pending
* transaction (at most) may be lost.
*
* 3. Robustness Techniques
*
* This module uses a few different techniques to meet our robustness goals
* while maintaining high performance. The biggest problem we encounter
* when trying to achieve robustness is updating the client id hash chain.
* In particular, it is not possible to atomically add, move, or delete an
* item from a doubly linked list, thus creating a window where a crash
* could leave our hash chains in an inconsistent state.
*
* To address this problem, we actually maintain two images (copies) of all
* the hash chains in the container. At any point in time, exactly one of
* the two images is active (and thus considered authoritative), as
* indicated by a byte in the container header. When performing an update
* operation, all hash chain modifications are done on the *inactive*
* image, then, once the inactive image has completed the hash chain
* operations required by the update, the active and inactive images are
* atomically switched, making the formerly-inactive image authoritative.
* After the image switch, the update code then updates the formerly-active
* image's hash chains to match the active image's hash chains.
*
* This approach has the nice property that internal container consistency
* can always be restored after a crash by just resynchronizing the
* inactive image's hash chains with the active image's chains. Note that
* the atomic image switch serves as the "commit point" for the operation:
* if we crash before this point, we roll back the operation upon recovery
* and it appears as though the operation never happened; if we crash after
* this point, we roll forward the rest of the operation upon recovery as
* if the crash had not happened.
*
* This technique is enough to robustly implement our add and delete
* operations, but modify has an additional complication due to our direct
* mapping of client IP addresses to records. In particular, unless the
* record modification includes changing the client IP address, the
* modified record must be written at the same location as the original
* record -- however, if the modify operation fails part way through
* writing out the new client record, the record will be corrupt and we
* will have no way to return the record to a consistent state. To address
* this issue, we allocate a spare record in the container header called
* the "temporary" record. Upon a modification of this type, we first
* write the modified record to the temporary record and indicate that the
* temporary record is currently proxying for the actual record. We then
* copy the temporary record to the actual record and make the temporary
* record available again for future use. If a crash occurs before the
* copy to the temporary record is complete, then we just roll back as if
* the modify never happened (since we have not modified the actual
* record). If a crash occurs after copying the temporary record, we roll
* forward and complete the copy operation as if the crash never happened.
* Note that there are some additional subtle complications here; see the
* comments in the code for details.
*/
#include <unistd.h>
#include <sys/isa_defs.h>
#include <dhcp_svc_public.h>
#include <stdlib.h>
#include <dirent.h>
#include <string.h>
#include <libgen.h>
#include <errno.h>
#include <stddef.h>
#include <assert.h>
#include "dhcp_network.h"
#include "util.h"
static int check_dn(dn_handle_t *);
static int write_header(int, dn_header_t *);
int
{
dn_header_t header = { 0 };
char dnpath[MAXPATHLEN];
int i, retval;
return (DSVC_NO_MEMORY);
/*
* As a safeguard, check that the size of a dn_header_t hasn't
* changed (since it contains a dn_rec_t, this will probably catch
* a change in that structure as well). If it has, bail rather
* than totally corrupting the container (by continuing). Note
* that this situation indicates an internal programming error,
* which is why we prefer assert() to just returning DSVC_INTERNAL.
*/
/* CONSTCOND */
if (retval != DSVC_SUCCESS) {
return (retval);
}
if (flags & DSVC_CREATE) {
/*
* We just created the per-network container; initialize
* the header and put it out on disk. Note that we leave
* `dnh_version' zero until the entire header has been
* written, so we can detect partial failure.
*/
header.dnh_version = 0;
header.dnh_errors = 0;
header.dnh_checks = 0;
for (i = 0; i < DN_CIDHASHSZ; i++) {
}
return (retval);
}
/*
* Virtually reserve all the space we're going to need for
* the dn_rec_t's ahead of time, so that we don't have to
* worry about "growing" the file later (though it may
* increase in size as we fill in holes). We're guaranteed
* that we'll read these holes as zeros, which we take
* advantage of since a dn_filerec_t with a rec_prev of
* DN_NOREC (which is 0) indicates that a record is unused.
*/
if (retval != DSVC_SUCCESS) {
return (retval);
}
/*
* Set the version field on the container, effectively
* making it available for use.
*/
if (retval != DSVC_SUCCESS) {
return (retval);
}
} else {
/*
* Container already exists; sanity check against the
* header that's on-disk. If we detect a problem then
* either someone scribbled on our container or we
* terminated abnormally when creating the container.
*/
return (retval);
}
return (DSVC_INTERNAL);
}
}
return (DSVC_SUCCESS);
}
int
{
return (DSVC_INTERNAL);
return (DSVC_SUCCESS);
}
int
{
char dnpath[MAXPATHLEN];
return (syserr_to_dsvcerr(errno));
return (DSVC_SUCCESS);
}
int
{
int retval = DSVC_SUCCESS;
return (DSVC_ACCESS);
return (syserr_to_dsvcerr(errno));
/*
* It's possible that a previous update to this container failed
* part-way through. In general, this is fine since we always keep
* our active image's hash chains correct and only swap to the
* alternate image when the other image is completely safe to use.
* However, for reasons explained in modify_dn(), it's possible
* that a record being modified was not completely updated before a
* failure occurred. In this case, the actual data for that record
* is contained in the temporary record in the header. We need to
* be careful to use that temporary record anywhere we'd otherwise
* refer to the partially updated record. Note that we do this
* rather than attempting to restore the consistency of the
* container because we're MT-hot here.
*/
}
/*
* Lookup scenario 1: Caller has requested a QN_CIP
* query lookup; set `recid' to the only possible
* entry (which may not be in-use).
*/
if (n != 0)
break;
/*
* Lookup scenario 2: Caller has requested a
* QN_CID-based lookup. Walk the `cidhash' chain
* (one call at a time) and set `recid' to hash
* bucket candidates.
*
* Note that it's possible for the client id value
* 00 to appear more than once, and it's not
* impossible for other duplicate client ids to
* occur, so continue until we reach `nrecords'.
*/
if (n == 0) {
== -1)
return (syserr_to_dsvcerr(errno));
} else {
/* sanity check */
break;
}
} else {
/*
* Lookup scenario 3: Caller has requested any
* other type of search. Walk the all the client
* id hashes.
*/
if (n == 0) {
hash = 0;
return (syserr_to_dsvcerr(errno));
} else {
/* sanity check */
break;
}
}
/*
* No more records; bail.
*/
break;
if (recid == temp_recid) {
/*
* The temporary record is actually authoritative
* for this record's contents; use it instead.
*/
recid = DN_TEMPREC;
}
break;
}
/*
* If the record isn't in-use, then skip...
*/
continue;
/*
* See if we've got a match...
*/
continue;
/*
* Caller just wants a count of the number of matching
* records, not the records themselves; continue.
*/
nrecords++;
continue;
}
/*
* Allocate the record and fill it in.
*/
if (!partial)
break;
}
/*
* Chuck the record on the list and up the counter.
*/
if (new_records == NULL) {
if (!partial)
break;
}
nrecords++;
}
if (retval == DSVC_SUCCESS) {
return (DSVC_SUCCESS);
}
return (retval);
}
/*
* Compares `dnp' to the target `targetp', using `query' to decide what
* fields to compare. Returns B_TRUE if `dnp' matches `targetp', B_FALSE
* if not.
*/
static boolean_t
{
unsigned int i;
unsigned int query0;
/*
* As an optimization, skip any checks if the query is empty.
*/
return (B_TRUE);
return (B_FALSE);
return (B_FALSE);
return (B_FALSE);
return (B_FALSE);
return (B_FALSE);
return (B_FALSE);
return (B_FALSE);
return (B_FALSE);
return (B_FALSE);
return (B_FALSE);
for (i = 0; i < sizeof (qflags) / sizeof (unsigned int); i++) {
return (B_FALSE);
return (B_FALSE);
}
return (B_TRUE);
}
int
{
int retval;
return (DSVC_ACCESS);
if (retval != DSVC_SUCCESS)
return (retval);
/*
* Get the active image.
*/
if (retval != DSVC_SUCCESS)
return (retval);
/*
* Doublecheck to make sure this entry doesn't exist already.
*/
return (syserr_to_dsvcerr(errno));
return (DSVC_EXISTS);
/*
* We're going to insert `rec' at the head of the `hash' hash
* chain; get it ready-to-go. Note that we update the alternate
* image's hash record id pointers so that the record will
* atomically become in-use when we switch to the alternate image.
*/
return (syserr_to_dsvcerr(errno));
/*
* If there's a record currently on the hash chain (i.e, we're
* not the first) then load the record.
*/
return (syserr_to_dsvcerr(errno));
}
/*
* Before we update any information on disk, mark the container as
* dirty so that there's no chance the container is inconsistent
* without us knowing about it.
*/
if (retval != DSVC_SUCCESS)
return (retval);
/*
* Update the new record on-disk; note that it's not yet reachable
* via hash.
*/
return (syserr_to_dsvcerr(errno));
/*
* Update the alternate image's on-disk hash pointers. We need to
* do this before we switch to the alternate image so we cannot
* abort with an inconsistent active image.
*/
return (syserr_to_dsvcerr(errno));
}
return (syserr_to_dsvcerr(errno));
/*
* Activate the alternate image. This is our commit point -- if we
* fail after this point, we will roll forward on recovery.
*/
if (retval != DSVC_SUCCESS)
return (retval);
/*
* Update the old record id pointers to match
*/
return (syserr_to_dsvcerr(errno));
return (syserr_to_dsvcerr(errno));
}
return (syserr_to_dsvcerr(errno));
/*
* Update the signature on the record handed back to the caller.
*/
/*
* Finally, mark the container as clean.
*/
}
int
{
int retval;
return (DSVC_ACCESS);
if (retval != DSVC_SUCCESS)
return (retval);
/*
* Get the active image.
*/
if (retval != DSVC_SUCCESS)
return (retval);
/*
* Find the original entry in the network table, make sure the
* record is in-use, and check the signature field (to guard
* against collisions).
*/
return (syserr_to_dsvcerr(errno));
return (DSVC_NOENT);
/*
* The signatures must match to delete a record, *except* when
* delp->dn_sig == 0. This is so records can be deleted that
* weren't retrieved via lookup_dn()
*/
return (DSVC_COLLISION);
/*
* Read our neighboring records.
*/
return (syserr_to_dsvcerr(errno));
}
return (syserr_to_dsvcerr(errno));
}
/*
* Before we update the alternate image's on-disk hash pointers,
* mark the container as dirty so that there's no chance the
* container is inconsistent without us knowing about it.
*/
if (retval != DSVC_SUCCESS)
return (retval);
/*
* Update the alternate image's on-disk hash pointers. We need to
* do this before we switch to the alternate image so we do not
* abort with an inconsistent active image. Also reset the
* record's alternate image record id pointers, so that the old
* record will not be in-use when we switch to the alternate image.
*/
return (syserr_to_dsvcerr(errno));
}
return (syserr_to_dsvcerr(errno));
} else {
return (syserr_to_dsvcerr(errno));
}
return (syserr_to_dsvcerr(errno));
/*
* Activate the alternate image. This is our commit point -- if we
* fail after this point, we will roll forward on recovery.
*/
if (retval != DSVC_SUCCESS)
return (retval);
/*
* Update the old record id pointers to match.
*/
return (syserr_to_dsvcerr(errno));
}
return (syserr_to_dsvcerr(errno));
} else {
== -1)
return (syserr_to_dsvcerr(errno));
}
return (syserr_to_dsvcerr(errno));
/*
* Finally, mark the container as clean.
*/
}
int
{
int retval;
return (DSVC_ACCESS);
if (retval != DSVC_SUCCESS)
return (retval);
/*
* Get the active image
*/
if (retval != DSVC_SUCCESS)
return (retval);
/*
* Find the original entry in the network table, make sure the
* entry is in-use, and check the signature field (to guard against
* collisions).
*/
return (syserr_to_dsvcerr(errno));
return (DSVC_NOENT);
return (DSVC_COLLISION);
/*
* Check if the record id is changing (as a result of modifying the
* IP address). If it is, then make sure the new one is available
* (if not, fail with DSVC_EXISTS).
*/
return (syserr_to_dsvcerr(errno));
return (DSVC_EXISTS);
}
/*
* Update the record with the new information.
*/
/*
* Find out if our hash chain is changing. If so, then update the
* new record's record id pointers to be on the new chain;
* otherwise just take the original record's pointers. Note that
* in either case, only update the alternate image pointers, so
* that the new record becomes in-use when we switch to the
* alternate image.
*/
} else {
return (syserr_to_dsvcerr(errno));
}
/*
* Write the record out; if this means overwriting the old record,
* then write to a temporary record instead.
*/
== -1)
return (syserr_to_dsvcerr(errno));
/*
* Mark the container as dirty so that there's no chance the
* container is inconsistent without us knowing about it.
*/
if (retval != DSVC_SUCCESS)
return (retval);
/*
* If we've changed either the hash chain or the record id, then
* update our neighboring records' record id pointers. If we're
* changing hash chains, then remove ourselves from the old
* hash chain and insert ourselves on the new one -- otherwise, if
* we're changing record id's, then update our neighbors with our
* new record id. Note that we only apply these changes to the
* alternate image for now so that we can recover upon failure.
*/
return (syserr_to_dsvcerr(errno));
}
return (syserr_to_dsvcerr(errno));
}
} else {
}
return (syserr_to_dsvcerr(errno));
}
return (syserr_to_dsvcerr(errno));
} else {
!image) == -1)
return (syserr_to_dsvcerr(errno));
}
/*
* If our hash is changing, update the alternate image
* record id pointers to point to our moved record.
*/
if (recid_head != DN_NOREC) {
return (syserr_to_dsvcerr(errno));
return (syserr_to_dsvcerr(errno));
}
== -1)
return (syserr_to_dsvcerr(errno));
}
/*
* If our record id is changing, reset the old record's
* alternate image record id pointers, so that the old
* record will not be in-use once we switch over to the
* alternate image.
*/
return (syserr_to_dsvcerr(errno));
}
}
/*
* If we're using the temporary record, then set `dnh_tempimage' to
* the image that will be active when we're done. This piece of
* state is critical in the case of failure, since it indicates
* both that the temporary record is valid, and tells us whether we
* failed before or after activating the alternate image (below).
* If we failed before activating the alternate image, then the
* failure code can just reset `dnh_tempimage' to DN_NOIMAGE and
* resynchronize the pointers. Otherwise, we failed somewhere
* after making the alternate image active but before we completed
* copying the temporary record over to the actual record, which
* the recovery code will then complete on our behalf before
* resynchronizing the pointers.
*/
!image);
if (retval != DSVC_SUCCESS)
return (retval);
}
/*
* Activate the alternate image. This is our commit point -- if we
* fail after this point, we will roll forward on recovery.
*/
if (retval != DSVC_SUCCESS)
return (retval);
/*
* If we used the temporary record, copy the data into the actual
* record. Once finished, reset `dnh_tempimage' to DN_NOIMAGE
* since the temporary record no longer needs to be used.
*/
return (syserr_to_dsvcerr(errno));
if (retval != DSVC_SUCCESS)
return (retval);
}
/*
* Update the old record id pointers to match.
*/
return (syserr_to_dsvcerr(errno));
return (syserr_to_dsvcerr(errno));
}
return (syserr_to_dsvcerr(errno));
} else {
!image) == -1)
return (syserr_to_dsvcerr(errno));
}
/*
* If our hash changed, update the alternate image record
* id pointers to point to our moved record.
*/
if (recid_head != DN_NOREC) {
return (syserr_to_dsvcerr(errno));
}
== -1)
return (syserr_to_dsvcerr(errno));
}
/*
* If our record id changed, then finish marking the old
* record as "not in use".
*/
return (syserr_to_dsvcerr(errno));
}
}
/*
* Update the signature on the new record handed back to the caller.
*/
/*
* Finally, mark the container as clean.
*/
}
int
{
char ipaddr[INET_ADDRSTRLEN];
unsigned int i, count = 0;
char conver[4];
int error;
switch (errno) {
case EACCES:
case EPERM:
return (DSVC_ACCESS);
case ENOENT:
return (DSVC_NO_LOCATION);
default:
break;
}
return (DSVC_INTERNAL);
}
/*
* Compile a regular expression matching "SUNWbinfilesX_" (where X
* is a container version number) followed by an IP address
* (roughly speaking). Note that the $N constructions allow us to
* get the container version and IP address when calling regex(3C).
*/
"(([0-9]{1,3}_){3}[0-9]{1,3})$1$", (char *)0);
return (DSVC_NO_MEMORY);
for (;;) {
/*
* readdir_r() is very broken; see 4329196 -- in the
* meantime, workaround as best we can.
*/
break;
continue;
for (i = 0; ipaddr[i] != '\0'; i++)
if (ipaddr[i] == '_')
ipaddr[i] = '.';
(sizeof (char **)) * (count + 1));
if (new_listpp == NULL) {
goto fail;
}
listpp = new_listpp;
goto fail;
}
count++;
}
}
return (DSVC_SUCCESS);
fail:
for (i = 0; i < count; i++)
return (error);
}
/*
* Check (a la fsck) that a given DHCP network container is in a consistent
* state. If not, then attempt to restore internal consistency; this should
* always be possible unless the container has been externally corrupted.
*/
static int
{
int retval;
/*
* Reading the whole header is a very expensive operation; only do
* it once we're sure the container is actually dirty. On an
* E4500, this optimization lowers the wall-clock cost of creating
* a 5000-record datastore by 20 percent.
*/
if (retval != DSVC_SUCCESS)
return (retval);
if (dirty == 0)
return (DSVC_SUCCESS);
return (syserr_to_dsvcerr(errno));
/*
* If `dnh_tempimage' matches the current working image, then we
* crashed in the middle of a modify_dn() operation. Complete
* writing out the temporary record before restoring internal
* consistency. This is a bit of a kludge but there doesn't seem
* to be another way.
*/
return (syserr_to_dsvcerr(errno));
}
/*
* Blindly update all the header hashhead pointers since we're
* going to have to re-write the header anyway.
*/
}
/*
* Synchronize the record pointers of all in-use records. We do
* this instead of just walking the hashheads because not all dirty
* records are hashed (for instance, we may have failed part way
* through an add_dn()).
*/
return (syserr_to_dsvcerr(errno));
/*
* Verify the pointers match. If not, then correct
* the record and write it back to disk.
*/
header.dnh_errors++;
return (syserr_to_dsvcerr(errno));
}
}
header.dnh_checks++;
return (syserr_to_dsvcerr(errno));
/*
* Clear the dirty bit on the container.
*/
}
/*
* Given a buffer `path' of `pathlen' bytes, fill it in with a path to the
* DHCP Network table for IP network `ip' located in directory `dir'.
*/
static void
{
ip & 0xff);
}
/*
* Given a `cid' that's `cidlen' bytes long, hash it to a value between 0
* and DN_CIDHASHSZ - 1. We use CRC16 for our hash since it's known to be
* very evenly distributed.
*/
static uint16_t
{
while (cidlen-- != 0) {
if (result & 1)
else
result >>= 1;
}
}
return (result % DN_CIDHASHSZ);
}
/*
* Convert the dn_filerec_t pointed to by `rec' from native (host) to
* network order or the other way.
*/
/* ARGSUSED */
static void
{
#ifdef _LITTLE_ENDIAN
#endif
}
/*
* Convert the header pointed to by `hdrp' from native (host) to network
* order or the other way. If `hash' is false, then don't bother
* converting the hash chains.
*/
/* ARGSUSED */
static void
{
#ifdef _LITTLE_ENDIAN
unsigned int i;
if (hash) {
for (i = 0; i < DN_CIDHASHSZ; i++) {
}
}
#endif
}
/*
* Read the dn_filerec_t identified by `recid' from open container `fd'
* into `rec'. Returns 0 on success, -1 on failure (errno is set).
*/
static int
{
return (-1);
return (0);
}
/*
* Write the dn_filerec_t `rec' identified by `recid' into the open
* container `fd'. Returns 0 on success, -1 on failure (errno is set).
*/
static int
{
int retval;
return (retval);
}
/*
* Read the dn_header_t from the open container `fd' into the dn_header_t
* pointed to by `hdrp'; if `hash' is not set, then skip reading the
* dn_header_t hash chains. Returns 0 on success, -1 on failure (errno is
* set).
*/
static int
{
return (-1);
return (0);
}
/*
* Write the dn_header_t pointed to by `hdrp' into open container `fd'.
* Returns 0 on success, -1 on failure (errno is set).
*/
static int
{
int retval;
return (retval);
}
/*
* Read in the head of the `cidhash' hash chain from open container `fd'
* into `recid_headp', using image `image'. Returns 0 on success, -1 on
* failure (errno is set).
*/
static int
{
return (-1);
return (0);
}
/*
* Write out the head of the `cidhash' hash chain into open container `fd'
* from `recid_head', using image `image'. Returns 0 on success, -1 on
* failure (errno is set).
*/
static int
{
}
/*
* Get the byte `offset' bytes into open file `fd', and store in `bytep'.
* Returns a DSVC_* return code.
*/
static int
{
case 1:
return (DSVC_SUCCESS);
case -1:
return (syserr_to_dsvcerr(errno));
default:
break;
}
return (DSVC_INTERNAL);
}
/*
* Set the byte `offset' bytes into open file `fd' to `byte'. Returns a
* DSVC_* return code.
*/
static int
{
case 1:
return (DSVC_SUCCESS);
case -1:
return (syserr_to_dsvcerr(errno));
default:
break;
}
return (DSVC_INTERNAL);
}