vplat.c revision 108322fb1c3ed341aba9c80c9774df0ed9e35768
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* This module contains functions used to bring up and tear down the
* Virtual Platform: [un]mounting file-systems, [un]plumbing network
* interfaces, [un]configuring devices, establishing resource controls,
* and creating/destroying the zone in the kernel. These actions, on
* the way up, ready the zone; on the way down, they halt the zone.
* See the much longer block comment at the beginning of zoneadmd.c
* for a bigger picture of how the whole program functions.
*
* This module also has primary responsibility for the layout of "scratch
* zones." These are mounted, but inactive, zones that are used during
* operating system upgrade and potentially other administrative action. The
* scratch zone environment is similar to the miniroot environment. The zone's
* actual root is mounted read-write on /a, and the standard paths (/usr,
* /sbin, /lib) all lead to read-only copies of the running system's binaries.
* This allows the administrative tools to manipulate the zone using "-R /a"
* without relying on any binaries in the zone itself.
*
* If the scratch zone is on an alternate root (Live Upgrade [LU] boot
* environment), then we must resolve the lofs mounts used there to uncover
* writable (unshared) resources. Shared resources, though, are always
* read-only. In addition, if the "same" zone with a different root path is
* currently running, then "/b" inside the zone points to the running zone's
* root. This allows LU to synchronize configuration files during the upgrade
* process.
*
* To construct this environment, this module creates a tmpfs mount on
* described above is constructed on the fly. The zone is then created using
*
* Note that scratch zones are inactive. The zone's bits are not running and
* likely cannot be run correctly until upgrade is done. Init is not running
* there, nor is SMF. Because of this, the "mounted" state of a scratch zone
*/
#include <netdb.h>
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <rctl.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <wait.h>
#include <limits.h>
#include <libgen.h>
#include <zone.h>
#include <assert.h>
#include <pool.h>
#include <libzonecfg.h>
#include "zoneadmd.h"
#define V4_ADDR_LEN 32
#define V6_ADDR_LEN 128
/* 0755 is the default directory mode. */
#define DEFAULT_DIR_MODE \
#define IPD_DEFAULT_OPTS \
/*
* A list of directories which should be created.
*/
struct dir_info {
char *dir_name;
};
/*
* The pathnames below are relative to the zonepath
*/
{ "/dev", 0755 },
};
/*
*/
struct symlink_info {
char *sl_source;
char *sl_target;
};
/*
* The "source" paths are relative to the zonepath
*/
static struct symlink_info dev_symlinks[] = {
};
/* for routing socket */
static int rts_seqno = 0;
/* mangled zone name when mounting in an alternate root environment */
static char kernzone[ZONENAME_MAX];
/* array of cached mount entries for resolve_lofs */
/* from libsocket, not in any header file */
/*
* An optimization for build_mnttable: reallocate (and potentially copy the
* data) only once every N times through the loop.
*/
#define MNTTAB_HUNK 32
/*
* Private autofs system call
*/
extern int _autofssys(int, void *);
static int
{
/*
* Ask autofs to unmount all trigger nodes in the given zone.
*/
}
static void
{
uint_t i;
return;
for (i = 0; i < nelem; i++) {
}
}
/*
* Build the mount table for the zone rooted at "zroot", storing the resulting
* array of struct mnttabs in "mnt_arrayp" and the number of elements in the
* array in "nelemp".
*/
static int
{
nmnt = 0;
continue;
if (nmnt % MNTTAB_HUNK == 0) {
return (-1);
}
}
/*
* Zero out any fields we're not using.
*/
return (-1);
}
}
*mnt_arrayp = mnts;
return (0);
}
/*
* This is an optimization. The resolve_lofs function is used quite frequently
* to manipulate file paths, and on a machine with a large number of zones,
* there will be a huge number of mounted file systems. Thus, we trigger a
* reread of the list of mount points
*/
static void
lofs_discard_mnttab(void)
{
}
static int
{
return (-1);
&nmnts) == -1) {
return (-1);
}
return (0);
}
/*
* This function loops over potential loopback mounts and symlinks in a given
* path and resolves them all down to an absolute path.
*/
static void
{
const char *altroot;
char tmppath[MAXPATHLEN];
return;
/* This happens once per zoneadmd operation. */
return;
altroot = zonecfg_get_root();
for (;;) {
mnp++) {
continue;
break;
}
if (mnp >= resolve_lofs_mnt_max)
break;
if (outside_altroot) {
char *cp;
/*
* If we run into a read-only mount outside of the
* alternate root environment, then the user doesn't
* want this path to be made read-write.
*/
NULL &&
break;
}
} else if (arlen > 0 &&
}
/* use temporary buffer because new path might be longer */
break;
}
}
/*
* For a regular mount, check if a replacement lofs mount is needed because the
* referenced device is already mounted somewhere.
*/
static int
{
/* This happens once per zoneadmd operation. */
return (-1);
/*
* If this special node isn't already in use, then it's ours alone;
* no need to worry about conflicting mounts.
*/
mnp++) {
break;
}
if (mnp >= resolve_lofs_mnt_max)
return (0);
/*
* Convert this duplicate mount into a lofs mount.
*/
sizeof (fsptr->zone_fs_special));
sizeof (fsptr->zone_fs_type));
/*
* Discard all but one of the original options and set that to be the
* same set of options used for inherit package directory resources.
*/
fsptr->zone_fs_dir);
return (-1);
}
} else {
}
}
return (0);
}
static int
{
char path[MAXPATHLEN];
sizeof (path)) {
subdir);
return (-1);
}
/*
* We don't check the file mode since presumably the zone
* administrator may have had good reason to change the mode,
* and we don't need to second guess him.
*/
return (-1);
}
"a read-only file system in this local zone.\nMake "
else
return (-1);
}
return (0);
}
/*
* Make /dev and various directories underneath it.
*/
static int
{
int i;
return (-1);
}
return (0);
}
/*
* Make various sym-links underneath /dev.
*/
static int
{
int i;
for (i = 0; i < sizeof (dev_symlinks) / sizeof (struct symlink_info);
i++) {
char dev[MAXPATHLEN];
dev_symlinks[i].sl_source);
/*
* Try not to call unlink(2) on directories, since that
* makes UFS unhappy.
*/
return (-1);
}
}
dev_symlinks[i].sl_target);
return (-1);
}
}
return (0);
}
/*
* Create various directories and sym-links under /dev.
*/
static int
{
char zonepath[MAXPATHLEN];
return (-1);
}
if (zonecfg_in_alt_root())
return (-1);
return (-1);
return (0);
}
static void
free_remote_fstypes(char **types)
{
uint_t i;
return;
}
static char **
{
char buf[MAXPATHLEN];
char fstype[MAXPATHLEN];
uint_t i;
return (NULL);
}
/*
* Count the number of lines
*/
lines++;
if (lines == 0) /* didn't read anything; empty file */
goto out;
/*
* Allocate enough space for a NULL-terminated array.
*/
goto out;
}
i = 0;
/* LINTED - fstype is big enough to hold buf */
goto out;
}
goto out;
}
i++;
}
out:
return (types);
}
static boolean_t
{
uint_t i;
if (remote_fstypes == NULL)
return (B_FALSE);
for (i = 0; remote_fstypes[i] != NULL; i++) {
return (B_TRUE);
}
return (B_FALSE);
}
/*
* This converts a zone root path (normally of the form .../root) to a Live
* Upgrade scratch zone root (of the form .../lu).
*/
static void
{
if (!isresolved && zonecfg_in_alt_root())
}
/*
* The general strategy for unmounting filesystems is as follows:
*
* - Remote filesystems may be dead, and attempting to contact them as
* part of a regular unmount may hang forever; we want to always try to
* forcibly unmount such filesystems and only fall back to regular
* unmounts if the filesystem doesn't support forced unmounts.
*
* - We don't want to unnecessarily corrupt metadata on local
* filesystems (ie UFS), so we want to start off with graceful unmounts,
* and only escalate to doing forced unmounts if we get stuck.
*
* We start off walking backwards through the mount table. This doesn't
* give us strict ordering but ensures that we try to unmount submounts
* first. We thus limit the number of failed umount2(2) calls.
*
* The mechanism for determining if we're stuck is to count the number
* of failed unmounts each iteration through the mount table. This
* gives us an upper bound on the number of filesystems which remain
* mounted (autofs trigger nodes are dealt with separately). If at the
* end of one unmount+autofs_cleanup cycle we still have the same number
* of mounts that we started out with, we're stuck and try a forced
* unmount. If that fails (filesystem doesn't support forced unmounts)
* then we bail and are unable to teardown the zone. If it succeeds,
* we're no longer stuck so we continue with our policy of trying
* graceful mounts first.
*
* Zone must be down (ie, no processes or threads active).
*/
static int
{
int error = 0;
char **remote_fstypes = NULL;
return (-1);
}
if (unmount_cmd)
return (-1);
}
/*
* Use our hacky mntfs ioctl so we see everything, even mounts with
* MS_NOMNTTAB.
*/
error++;
goto out;
}
/*
* Build the list of remote fstypes so we know which ones we
* should forcibly unmount.
*/
for (; /* ever */; ) {
char *path;
uint_t i;
nmnt = 0;
/*
* MNTTAB gives us a way to walk through mounted
* filesystems; we need to be able to walk them in
* reverse order, so we build a list of all mounted
* filesystems.
*/
&nmnt) != 0) {
error++;
goto out;
}
for (i = 0; i < nmnt; i++) {
/*
* Try forced unmount first for remote filesystems.
*
* Not all remote filesystems support forced unmounts,
* so if this fails (ENOTSUP) we'll continue on
* and try a regular unmount.
*/
}
/*
* Try forced unmount if we're stuck.
*/
if (stuck) {
} else {
/*
* The first failure indicates a
* mount we won't be able to get
* rid of automatically, so we
* bail.
*/
error++;
"unable to unmount '%s'", path);
goto out;
}
}
/*
* Try regular unmounts for everything else.
*/
newcount++;
}
if (newcount == 0)
break;
/*
* Last round didn't unmount anything; we're stuck and
* should start trying forced unmounts.
*/
}
/*
* Autofs doesn't let you unmount its trigger nodes from
* userland so we have to tell the kernel to cleanup for us.
*/
if (autofs_cleanup(zoneid) != 0) {
error++;
goto out;
}
}
out:
return (error ? -1 : 0);
}
static int
{
}
/*
* Fork and exec (and wait for) the mentioned binary with the provided
* arguments. Returns (-1) if something went wrong with fork(2) or exec(2),
* returns the exit status otherwise.
*
* If we were unable to exec the provided pathname (for whatever
* reason), we return the special token ZEXIT_EXEC. The current value
* of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the
* consumers of this function; any future consumers must make sure this
* remains the case.
*/
static int
{
int child_status = 0;
/*
* Do not let another thread localize a message while we are forking.
*/
(void) mutex_lock(&msglock);
(void) mutex_unlock(&msglock);
if (child_pid == -1) {
return (-1);
} else if (child_pid == 0) {
closefrom(0);
/*
* Since we are in the child, there is no point calling zerror()
* since there is nobody waiting to consume it. So exit with a
* special code that the parent will recognize and call zerror()
* accordingly.
*/
} else {
}
if (WIFSIGNALED(child_status)) {
return (-1);
}
return (-1);
}
return (WEXITSTATUS(child_status));
}
static int
{
char cmdbuf[MAXPATHLEN];
char *argv[4];
int status;
/*
*/
> sizeof (cmdbuf)) {
return (-1);
}
argv[0] = "fsck";
return (status);
return (-1);
}
static int
{
char cmdbuf[MAXPATHLEN];
char *argv[6];
int status;
/*
*/
> sizeof (cmdbuf)) {
return (-1);
}
argv[0] = "mount";
if (opts[0] == '\0') {
} else {
}
return (status);
if (opts[0] == '\0')
"failed with exit code %d",
else
"failed with exit code %d",
return (-1);
}
/*
* Make sure if a given path exists, it is not a sym-link, and is a directory.
*/
static int
{
char respath[MAXPATHLEN];
int res;
return (0);
return (-1);
}
return (-1);
}
return (-1);
}
return (-1);
}
/*
* We don't like ".."s and "."s throwing us off
*/
return (-1);
}
return (0);
}
/*
* exists but isn't the canonical path to a directory), it is returned in
* badpath, which is assumed to be at least of size MAXPATHLEN.
*
* Relpath must begin with '/'.
*/
static boolean_t
{
/*
* Make sure abspath has at least one '/' after its rootpath
* component, and ends with '/'.
*/
sizeof (abspath)) {
relpath);
return (B_FALSE);
}
do {
*slashp = '\0';
return (B_FALSE);
*slashp = '/';
slashp++;
return (B_TRUE);
}
static int
{
char path[MAXPATHLEN];
char specpath[MAXPATHLEN];
char optstr[MAX_MNTOPT_STR];
return (-1);
}
DEFAULT_DIR_MODE) != 0)
return (-1);
fsptr->zone_fs_dir);
/*
* A zero-length special is how we distinguish IPDs from
* general-purpose FSs. Make sure it mounts from a place that
* can be seen via the alternate zone's root.
*/
sizeof (specpath)) {
return (-1);
}
if (zonecfg_in_alt_root())
specpath);
return (-1);
}
return (0);
}
/*
* In general the strategy here is to do just as much verification as
* necessary to avoid crashing or otherwise doing something bad; if the
* administrator initiated the operation via zoneadm(1m), he'll get
* auto-verification which will let him know what's wrong. If he
* modifies the zone configuration of a running zone and doesn't attempt
* to verify that it's OK we won't crash but won't bother trying to be
* too helpful either. zoneadm verify is only a couple keystrokes away.
*/
return (-1);
}
/*
* If we're looking at an alternate root environment, then construct
* read-only loopback mounts as necessary. For all lofs mounts, make
* sure that the 'special' entry points inside the alternate root. (We
* don't do this with other mounts, as devfs isn't in the alternate
* root, and we need to assume the device environment is roughly the
* same.)
*/
if (zonecfg_in_alt_root()) {
return (-1);
sizeof (specpath)) {
"too long in alternate root",
return (-1);
}
sizeof (fsptr->zone_fs_special));
}
}
/*
* Run 'fsck -m' if there's a device to fsck.
*/
return (-1);
/*
* Build up mount option string.
*/
optstr[0] = '\0';
sizeof (optstr));
sizeof (optstr));
}
}
}
static void
{
uint_t i;
return;
for (i = 0; i < nelem; i++)
}
/*
* This function constructs the miniroot-like "scratch zone" environment. If
* it returns B_FALSE, then the error has already been logged.
*/
static boolean_t
const char *zonepath)
{
char luroot[MAXPATHLEN];
const char **cpp;
static const char *mkdirs[] = {
"/a", NULL
};
static const char *localdirs[] = {
};
static const char *loopdirs[] = {
"/usr", NULL
};
static const char *tmpdirs[] = {
};
char *altstr;
/*
* Construct a small Solaris environment, including the zone root
* mounted on '/a' inside that environment.
*/
/*
* These are mostly special mount points; not handled here. (See
* zone_mount_early.)
*/
return (B_FALSE);
}
}
/*
* These are mounted read-write from the zone undergoing upgrade. We
* must be careful not to 'leak' things from the main system into the
* zone, and this accomplishes that goal.
*/
*cpp);
return (B_FALSE);
}
*cpp);
return (B_FALSE);
}
}
/*
* These are things mounted read-only from the running system because
* they contain binaries that must match system.
*/
return (B_FALSE);
}
return (B_FALSE);
}
/*
* Ignore any non-directories encountered. These are
* things that have been converted into symlinks
* fixup.
*/
continue;
}
tmp) != 0) {
*cpp);
return (B_FALSE);
}
}
/*
* These are things with tmpfs mounted inside.
*/
return (B_FALSE);
}
return (B_FALSE);
}
}
/*
* This is here to support lucopy. If there's an instance of this same
* zone on the current running system, then we mount its root up as
* read-only inside the scratch zone.
*/
return (B_FALSE);
}
zonecfg_set_root("");
return (B_FALSE);
}
tmp) != 0) {
fromdir);
return (B_FALSE);
}
}
return (B_FALSE);
}
}
return (B_FALSE);
return (B_TRUE);
}
static int
{
char rootpath[MAXPATHLEN];
char zonepath[MAXPATHLEN];
int num_fs = 0, i;
struct zone_fstab *fsp;
"zone must be in '%s' or '%s' state to mount file-systems",
goto bad;
}
goto bad;
}
goto bad;
}
"could not get zone configuration handle");
goto bad;
}
goto bad;
}
/*
* /dev in the zone is loopback'd from the external /dev repository,
* in order to provide a largely read-only semantic. But because
* processes in the zone need to be able to chown, chmod, etc. zone
* /dev files, we can't use a 'ro' lofs mount. Instead we use a
* special mode just for zones, "zonedevfs".
*
* In the future we should front /dev with a full-fledged filesystem.
*/
num_fs++;
num_fs--;
goto bad;
}
/*
* Note that mount_one will prepend the alternate root to
* zone_fs_special and do the necessary resolution, so all that is
* needed here is to strip the root added by zone_get_zonepath.
*/
sizeof (fsp->zone_fs_type));
goto bad;
}
/*
* Iterate through the rest of the filesystems, first the IPDs, then
* the general FSs. Sort them all, then mount them in sorted order.
* This is to make sure the higher level directories (e.g., /usr)
*/
goto bad;
}
num_fs++;
num_fs--;
(void) zonecfg_endipdent(handle);
goto bad;
}
/*
* IPDs logically only have a mount point; all other properties
* are implied.
*/
}
(void) zonecfg_endipdent(handle);
goto bad;
}
num_fs++;
num_fs--;
(void) zonecfg_endfsent(handle);
goto bad;
}
sizeof (fsp->zone_fs_special));
sizeof (fsp->zone_fs_raw));
sizeof (fsp->zone_fs_type));
}
(void) zonecfg_endfsent(handle);
/*
* If we're mounting a zone for administration, then we need to set up
* the "/a" environment inside the zone so that the commands that run
* in there have access to both the running system's utilities and the
* to-be-modified zone's files.
*/
if (mount_cmd &&
goto bad;
for (i = 0; i < num_fs; i++) {
/* /dev is special and always goes at the top */
goto bad;
continue;
}
goto bad;
}
/*
* Everything looks fine.
*/
return (0);
bad:
return (-1);
}
/* caller makes sure neither parameter is NULL */
static int
{
int prefixlen;
return (1);
while (prefixlen > 0) {
if (prefixlen >= 8) {
*maskstr++ = 0xFF;
prefixlen -= 8;
continue;
}
prefixlen--;
}
return (0);
}
/*
* Tear down all interfaces belonging to the given zone. This should
* be called with the zone in a state other than "running", so that
* interfaces can't be assigned to the zone after this returns.
*
* If anything goes wrong, log an error message and return an error.
*/
static int
{
ret_code = -1;
goto bad;
}
"could not determine number of interfaces");
ret_code = -1;
goto bad;
}
ret_code = -1;
goto bad;
}
ret_code = -1;
goto bad;
}
(void) close(s);
0) {
ret_code = -1;
continue;
}
"%s: could not determine zone interface belongs to",
ret_code = -1;
continue;
}
"%s: could not remove interface",
ret_code = -1;
continue;
}
}
}
bad:
if (s > 0)
(void) close(s);
if (buf)
return (ret_code);
}
static union sockunion {
struct sockaddr_in sin;
struct sockaddr_dl sdl;
struct sockaddr_in6 sin6;
static struct {
char space[512];
} rtmsg;
static int
{
case AF_INET:
return (sizeof (struct sockaddr_in));
case AF_LINK:
return (sizeof (struct sockaddr_dl));
case AF_INET6:
return (sizeof (struct sockaddr_in6));
default:
return (sizeof (struct sockaddr));
}
}
#define ROUNDUP_LONG(a) \
((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
/*
* Look up which zone is using a given IP address. The address in question
* is expected to have been stuffed into the structure to which lifr points
* via a previous SIOCGLIFADDR ioctl().
*
* This is done using black router socket magic.
*
* Return the name of the zone on success or NULL on failure.
*
* This is a lot of code for a simple task; a new ioctl request to take care
* of this might be a useful RFE.
*/
static char *
{
static char answer[ZONENAME_MAX];
int s, rlen, l, i;
char save_if_name[LIFNAMSIZ];
answer[0] = '\0';
return (NULL);
}
struct sockaddr_in *sin4;
} else {
struct sockaddr_in6 *sin6;
}
cp += l;
cp += l;
return (NULL);
"write to routing socket got only %d for len\n", rlen);
return (NULL);
}
do {
if (l < 0) {
return (NULL);
}
"routing message version %d not understood",
return (NULL);
}
"expected %d bytes, returned %d bytes",
return (NULL);
}
return (NULL);
}
return (NULL);
}
for (i = 1; i != 0; i <<= 1) {
/* LINTED E_BAD_PTR_CAST_ALIGN */
if (i != RTA_IFP) {
continue;
}
break;
}
return (NULL);
}
/*
* We need to set the I/F name to what we got above, then do the
* appropriate ioctl to get its zone name. But lifr->lifr_name is
* used by the calling function to do a REMOVEIF, so if we leave the
* "good" zone's I/F name in place, *that* I/F will be removed instead
* of the bad one. So we save the old (bad) I/F name before over-
* writing it and doing the ioctl, then restore it after the ioctl.
*/
if (i < 0) {
"%s: could not determine the zone interface belongs to",
return (NULL);
}
lifr->lifr_zoneid);
return (answer);
return (NULL);
}
typedef struct mcast_rtmsg_s {
union {
struct {
struct sockaddr_in m_dst;
struct sockaddr_in m_gw;
struct sockaddr_in m_netmask;
} m_v4;
struct {
struct sockaddr_in6 m_dst;
struct sockaddr_in6 m_gw;
struct sockaddr_in6 m_netmask;
} m_v6;
} m_u;
/*
* Configures a single interface: a new virtual interface is added, based on
* the physical interface nwiftabptr->zone_nwif_physical, with the address
* specified in nwiftabptr->zone_nwif_address, for zone zone_id. Note that
* the "address" can be an IPv6 address (with a /prefixlength required), an
* IPv4 address (with a /prefixlength optional), or a name; for the latter,
* an IPv4 name-to-address resolution will be attempted.
*
* A default interface route for multicast is created on the first IPv4 and
* IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively.
* This should really be done in the init scripts if we ever allow zones to
* modify the routing tables.
*
* If anything goes wrong, we log an detailed error message, attempt to tear
* down whatever we set up and return an error.
*/
static int
{
struct sockaddr_in netmask4;
struct sockaddr_in6 netmask6;
int s;
int rs;
int rlen;
char addrstr4[INET_ADDRSTRLEN];
int res;
return (-1);
}
else
return (-1);
}
(void) close(s);
return (-1);
}
"%s: could not set IP address to %s",
goto bad;
}
/* Preserve literal IPv4 address for later potential printing. */
goto bad;
}
} else {
/*
* The IPv4 netmask can be determined either
* directly if a prefix length was supplied with
* the address or via the netmasks database. Not
* being able to determine it is a common failure,
* but it often is not fatal to operation of the
* interface. In that case, a warning will be
* printed after the rest of the interface's
* parameters have been configured.
*/
*slashp = '/';
"%s: invalid prefix length in %s",
goto bad;
}
} else if (getnetmaskbyaddr(in4,
}
if (got_netmask) {
sizeof (netmask4));
}
} else {
*slashp = '/';
"%s: invalid prefix length in %s",
goto bad;
}
sizeof (netmask6));
}
if (got_netmask &&
goto bad;
}
/*
* This doesn't set the broadcast address at all. Rather, it
* gets, then sets the interface's address, relying on the fact
* that resetting the address will reset the broadcast address.
*/
goto bad;
}
"%s: could not reset broadcast address",
goto bad;
}
}
goto bad;
}
int save_errno = errno;
char *zone_using;
/*
* If we failed with something other than EADDRNOTAVAIL,
* then skip to the end. Otherwise, look up our address,
* then call a function to determine which zone is already
* using that address.
*/
if (errno != EADDRNOTAVAIL) {
goto bad;
}
goto bad;
}
errno = save_errno;
if (zone_using == NULL)
else
goto bad;
}
if (rs < 0) {
goto bad;
}
sizeof (struct sockaddr_in6));
} else {
}
if (rlen < 0) {
"default interface for multicast",
} else {
}
goto bad;
}
} else {
}
}
if (!got_netmask) {
/*
* A common, but often non-fatal problem, is that the system
* cannot find the netmask for an interface address. This is
* /etc/nsswitch.conf says to use NIS or NIS+ and it's not
* in that. This doesn't show up at boot because the netmask
* available. We warn the user here that something like this
* has happened and we're just running with a default and
* possible incorrect netmask.
*/
char buffer[INET6_ADDRSTRLEN];
void *addr;
addr = &((struct sockaddr_in *)
else
addr = &((struct sockaddr_in6 *)
/* Find out what netmask interface is going to be using */
goto bad;
"WARNING: %s: no matching subnet found in netmasks(4) for "
"%s; using default of %s.",
}
(void) close(s);
return (Z_OK);
bad:
(void) close(s);
return (-1);
}
/*
* Sets up network interfaces based on information from the zone configuration.
* An IPv4 loopback interface is set up "for free", modeling the global system.
* If any of the configuration interfaces were IPv6, then an IPv6 loopback
* address is set up as well.
*
* If anything goes wrong, we log a general error message, attempt to tear down
* whatever we set up, and return an error.
*/
static int
{
return (-1);
}
return (-1);
}
return (-1);
}
for (;;) {
break;
Z_OK) {
(void) zonecfg_endnwifent(handle);
return (-1);
}
&in6) == 1)
}
(void) zonecfg_endnwifent(handle);
}
sizeof (loopback_iftab.zone_nwif_physical));
sizeof (loopback_iftab.zone_nwif_address));
!= Z_OK) {
return (-1);
}
if (saw_v6) {
sizeof (loopback_iftab.zone_nwif_address));
return (-1);
}
}
return (0);
}
static int
{
int fd;
int error;
return (-1);
}
return (0);
return (-1);
}
static int
{
struct sockaddr_storage l, r;
int error;
/*
* Abort IPv4 connections.
*/
local = (struct sockaddr_in *)&l;
remote = (struct sockaddr_in *)&r;
return (error);
/*
* Abort IPv6 connections.
*/
local6 = (struct sockaddr_in6 *)&l;
remote6 = (struct sockaddr_in6 *)&r;
return (error);
return (0);
}
static int
{
char *argv[4];
int status;
return (status);
return (-1);
}
static int
{
/*
* Ready the zone's devices.
*/
}
static int
{
}
static int
{
char *nvl_packed = NULL;
int rctlcount = 0;
int error = -1;
struct zone_rctltab rctltab;
*bufsizep = 0;
return (-1);
}
return (-1);
}
goto out;
}
goto out;
}
goto out;
}
struct zone_rctlvaltab *rctlval;
/* zoneadm should have already warned about unknown rctls. */
if (!zonecfg_is_rctl(name)) {
continue;
}
count = 0;
count++;
}
if (count == 0) { /* ignore */
continue; /* Nothing to free */
}
goto out;
i = 0;
"nvlist_alloc");
goto out;
}
!= Z_OK) {
"(priv=%s,limit=%s,action=%s)",
goto out;
}
"(priv=%s,limit=%s,action=%s) is not a "
"valid value for rctl '%s'",
name);
goto out;
}
rctlblk_get_privilege(rctlblk)) != 0) {
"nvlist_add_uint64");
goto out;
}
rctlblk_get_value(rctlblk)) != 0) {
"nvlist_add_uint64");
goto out;
}
!= 0) {
"nvlist_add_uint64");
goto out;
}
}
!= 0) {
"nvlist_add_nvlist_array");
goto out;
}
for (i = 0; i < count; i++)
nvlist_free(nvlv[i]);
rctlcount++;
}
(void) zonecfg_endrctlent(handle);
if (rctlcount == 0) {
error = 0;
goto out;
}
!= 0) {
goto out;
}
error = 0;
*bufp = nvl_packed;
out:
return (error);
}
static int
{
int error;
return (-1);
}
return (-1);
}
return (error);
}
static int
{
char poolname[MAXPATHLEN];
int status;
int error;
/*
* Find the pool mentioned in the zone configuration, and bind to it.
*/
/*
* The property is not set on the zone, so the pool
* should be bound to the default pool. But that's
* already done by the kernel, so we can just return.
*/
return (0);
}
/*
* Not an error, even though it shouldn't be happening.
*/
"WARNING: unable to retrieve default pool.");
return (0);
}
/*
* Don't do anything if pools aren't enabled.
*/
"zone will not be bound to pool '%s'.", poolname);
return (0);
}
/*
* Try to provide a sane error message if the requested pool doesn't
* exist.
*/
return (-1);
}
PO_SUCCESS) {
return (-1);
}
(void) pool_conf_close(poolconf);
"using default pool.", poolname);
return (0);
}
/*
* Bind the zone to the pool.
*/
"using default pool.", poolname);
}
return (0);
}
int
return (0);
}
/*
* Look for zones running on the main system that are using this root (or any
* subdirectory of it). Return B_TRUE and print an error if a conflicting zone
* is found or if we can't tell.
*/
static boolean_t
{
char zroot[MAXPATHLEN];
char zonename[ZONENAME_MAX];
for (;;) {
nzids += 10;
return (B_TRUE);
}
break;
}
while (nzids > 0) {
/*
* Ignore errors; they just mean that the zone has disappeared
* while we were busy.
*/
sizeof (zroot)) == -1)
continue;
sizeof (zonename)) == -1)
"zone root %s already in use by zone %s",
break;
}
}
return (retv);
}
/*
* Search for loopback mounts that use this same source node (same device and
* inode). Return B_TRUE if there is one or if we can't tell.
*/
static boolean_t
{
return (B_TRUE);
}
return (B_TRUE);
continue;
/* We're looking at a loopback mount. Stat it. */
"zone root %s is reachable through %s",
return (B_TRUE);
}
}
return (B_FALSE);
}
{
char rootpath[MAXPATHLEN];
int xerr;
char *kzone;
return (-1);
}
if (zonecfg_in_alt_root())
return (-1);
}
goto error;
}
goto error;
}
/*
* We must do this scan twice. First, we look for zones running on the
* main system that are using this root (or any subdirectory of it).
* Next, we reduce to the shortest path and search for loopback mounts
* that use this same source node (same device and inode).
*/
goto error;
goto error;
if (mount_cmd) {
/*
* Forge up a special root for this zone. When a zone is
* mounted, we can't let the zone have its own root because the
* tools that will be used in this "scratch zone" need access
* to both the zone's resources and the running machine's
* executables.
*
* Note that the mkdir here also catches read-only filesystems.
*/
goto error;
}
goto error;
}
if (zonecfg_in_alt_root()) {
/*
* If we are mounting up a zone in an alternate root partition,
* then we have some additional work to do before starting the
* zone. First, resolve the root path down so that we're not
* fooled by duplicates. Then forge up an internal name for
* the zone.
*/
goto error;
}
if (zonecfg_lock_scratch(fp) != 0) {
goto error;
}
NULL, 0) == 0) {
goto error;
}
/* This is the preferred name */
0) == 0) {
/* This is just an arbitrary name; note "." usage */
}
}
xerr = 0;
if (xerr == ZE_AREMOUNTS) {
"An unknown file-system is mounted on "
"a subdirectory of %s", rootpath);
} else {
"These file-systems are mounted on "
"subdirectories of %s:", rootpath);
(void) zonecfg_find_mounts(rootpath,
}
} else if (xerr == ZE_CHROOTED) {
"cannot create a zone from a chrooted "
"environment", "zone_create");
} else {
}
goto error;
}
if (zonecfg_in_alt_root() &&
zonecfg_get_root()) == -1) {
goto error;
}
/*
* The following is a warning, not an error, and is not performed when
* merely mounting a zone for administrative use.
*/
"requested pool; using default pool.");
zoneid = -1;
if (zoneid != -1)
(void) zone_destroy(zoneid);
return (rval);
}
int
{
if (create_dev_files(zlogp) != 0 ||
return (-1);
}
configure_network_interfaces(zlogp) != 0)) {
return (-1);
}
return (0);
}
static int
{
char zroot[MAXPATHLEN];
return (-1);
}
/*
* At this point, the processes are gone, the filesystems (save the
* root) are unmounted, and the zone is on death row. But there may
* still be creds floating about in the system that reference the
* zone_t, and which pin down zone_rootvp causing this call to fail
* with EBUSY. Thus, we try for a little while before just giving up.
* (How I wish this were not true, and umount2 just did the right
* thing, or tmpfs supported MS_FORCE This is a gross hack.)
*/
goto unmounted;
int tries = 10;
while (--tries >= 0) {
(void) sleep(1);
goto unmounted;
break;
}
}
return (-1);
}
/*
* Only zones in an alternate root environment have scratch zone
* entries.
*/
if (zonecfg_in_alt_root()) {
int retv;
return (-1);
}
retv = -1;
if (zonecfg_lock_scratch(fp) != 0)
else
retv = 0;
return (retv);
} else {
return (0);
}
}
int
{
char *kzone;
if (zonecfg_in_alt_root()) {
goto error;
}
goto error;
}
}
if (!bringup_failure_recovery)
if (unmount_cmd)
(void) lu_root_teardown(zlogp);
goto error;
}
if (zone_shutdown(zoneid) != 0) {
goto error;
}
goto error;
if (!unmount_cmd &&
"unable to unconfigure network interfaces in zone");
goto error;
}
goto error;
}
"unable to unmount file systems in zone");
goto error;
}
if (zone_destroy(zoneid) != 0) {
goto error;
}
/*
* Special teardown for alternate boot environments: remove the tmpfs
* root for the zone and then remove it from the map file.
*/
goto error;
if (!unmount_cmd)
return (0);
return (-1);
}