/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
*/
/*
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/*
* ipd: Internet packet disturber
*
* The purpose of ipd is to simulate congested and lossy networks when they
* don't actually exist. The features of these congested and lossy networks are
* events that end up leading to retransmits and thus kicking us out of the
* congested network, which can be problematic, we instead simulate this
* behavior.
*
* 1. ipd's operations and restrictions
*
* ipd currently has facilities to cause IP traffic to be:
*
* - Corrupted with some probability.
* - Delayed for a set number of microseconds.
* - Dropped with some probability.
*
* Each of these features are enabled on a per-zone basic. The current
* implementation restricts this specifically to exclusive stack zones.
* Enabling ipd on a given zone causes pfhooks to be installed for that zone's
* netstack. Because of the nature of ipd, it currently only supports exclusive
* stack zones and as a further restriction, it only allows the global zone
* administrative access. ipd can be enabled for the global zone, but doing so
* will cause all shared-stack zones to also be affected.
*
* 2. General architecture and Locking
*
* ipd consists of a few components. There is a per netstack data structure that
* is created and destroyed with the creation and destruction of each exclusive
* stack zone. Each of these netstacks is stored in a global list which is
* accessed for control of ipd via ioctls. The following diagram touches on the
* data structures that are used throughout ipd.
*
* ADMINISTRATIVE DATA PATH
*
* +--------+ +------+ +------+
* | ipdadm | | ip | | nics |
* +--------+ +------+ +------+
* | ^ | |
* | | ioctl(2) | |
* V | V V
* +----------+ +-------------------------+
* +----------+ +-------------------------+
* | |
* | |
* V |
* +----------------+ |
* | list_t ipd_nsl |------+ |
* +----------------+ | |
* | |
* V per netstack V
* +----------------------------+
* | ipd_nestack_t |
* +----------------------------+
*
* ipd has two different entry points, one is administrative, the other is the
* data path. The administrative path is accessed by a userland component called
* If the administrative path enables a specific zone, then the data path will
* become active for that zone. Any packet that leaves that zone's IP stack or
* is going to enter it, comes through the callback specified in the hook_t(9S)
* structure. This will cause each packet to go through ipd_hook().
*
* While the locking inside of ipd should be straightforward, unfortunately, the
* pfhooks subsystem necessarily complicates this a little bit. There are
* currently three different sets of locks in ipd.
*
* - Global lock N on the netstack list.
* - Global lock A on the active count.
* - Per-netstack data structure lock Z.
*
* # Locking rules
*
* L.1a N must always be acquired first and released last
*
* If you need to acquire the netstack list lock, either for reading or writing,
* then N must be acquired first and before any other locks. It may not be
* dropped before any other lock.
*
* L.1b N must only be acquired from the administrative path and zone creation,
* shutdown, and destruct callbacks.
*
* The data path, e.g. receiving the per-packet callbacks, should never be
* grabbing the list lock. If it is, then the architecture here needs to be
* reconsidered.
*
* L.2 Z cannot be held across calls to the pfhooks subsystem if packet hooks
* are active.
*
* The way the pfhooks subsystem is designed is that a reference count is
* present on the hook_t while it is active. As long as that reference count is
* non-zero, a call to net_hook_unregister will block until it is lowered.
* Because the callbacks want the same lock for the netstack that is held by the
* administrative path calling into net_hook_unregister, we deadlock.
*
* ioctl from ipdadm remove hook_t cb (from nic) hook_t cb (from IP)
* ----------------------- -------------------- -------------------
* | | |
* | bump hook_t refcount |
* mutex_enter(ipd_nsl_lock); enter ipd_hook() bump hook_t refcount
* mutex acquired mutex_enter(ins->ipdn_lock); |
* | mutex acquired enter ipd_hook()
* mutex_enter(ins->ipdn_lock); | mutex_enter(ins->ipdn_lock);
* | | |
* | | |
* | mutex_exit(ins->ipdn_lock); |
* | | |
* mutex acquired leave ipd_hook() |
* | decrement hook_t refcount |
* | | |
* ipd_teardown_hooks() | |
* net_hook_unregister() | |
* cv_wait() if recount | |
* | | |
* ---------------------------------------------------------------------------
*
* At this point, we can see that the second hook callback still doesn't have
* the mutex, but it has bumped the hook_t refcount. However, it will never
* acquire the mutex that it needs to finish its operation and decrement the
* refcount.
*
* Obviously, deadlocking is not acceptable, thus the following corollary to the
* second locking rule:
*
* L.2 Corollary: If Z is being released across a call to the pfhooks subsystem,
* N must be held.
*
* There is currently only one path where we have to worry about this. That is
* when we are removing a hook, but the zone is not being shutdown, then hooks
* are currently active. The only place that this currently happens is in
* ipd_check_hooks().
*
*/
#include <sys/sysmacros.h>
#include <sys/netstack.h>
#include <sys/hook_event.h>
/*
* These flags are used to determine whether or not the hooks are registered.
*/
/*
* Per-netstack kstats.
*/
typedef struct ipd_nskstat {
/*
* Different parts of this structure have different locking semantics. The list
* node is not normally referenced, if it is, one has to hold the ipd_nsl_lock.
* The following members are read only: ipdn_netid and ipdn_zoneid. The members
* of the kstat structure are always accessible in the data path, but the
* counters must be bumped with atomic operations. The ipdn_lock protects every
* other aspect of this structure. Please see the big theory statement on the
* requirements for lock ordering.
*/
typedef struct ipd_netstack {
/*
* ipd internal variables
*/
/*
* Note that this random number implementation is based upon the old BSD 4.1
* rand. It's good enough for us!
*/
static int
{
}
static void
{
}
/*
* This is where all the magic actually happens. The way that this works is we
* grab the ins lock to basically get a copy of all the data that we need to do
* our job and then let it go to minimize contention. In terms of actual work on
* the packet we do them in the following order:
*
* - drop
* - delay
* - corrupt
*/
/*ARGSUSED*/
static int
{
unsigned char *crp;
/*
* This probably cannot happen, but we'll do an extra guard just in
* case.
*/
if (status & IPDN_STATUS_CONDEMNED)
return (0);
return (1);
}
if (dwait != 0) {
else
}
/*
* Since we're corrupting the mblk, just corrupt everything in
* the chain. While we could corrupt the entire packet, that's a
* little strong. Instead we're going to just change one of the
* bytes in each mblock.
*/
continue;
/*
* While pfhooks probably won't send us anything else,
* let's just be extra careful. The stack probably isn't
* as resiliant to corruption of control messages.
*/
continue;
}
}
return (0);
}
/*
* Sets up and registers all the proper hooks needed for the netstack to capture
* packets. Callers are assumed to already be holding the ipd_netstack_t's lock.
* If there is a failure in setting something up, it is the responsibility of
* this function to clean it up. Once this function has been called, it should
* not be called until a corresponding call to tear down the hooks has been
* done.
*/
static int
{
goto cleanup;
goto cleanup;
goto cleanup;
goto cleanup;
goto cleanup;
ins->ipdn_v4out) != 0)
goto cleanup;
goto cleanup;
goto cleanup;
goto cleanup;
ins->ipdn_v6out) != 0)
goto cleanup;
ipd_nactive++;
return (0);
ins->ipdn_v6out);
ins->ipdn_v4out);
return (1);
}
static void
{
ins->ipdn_v6out) == 0);
ins->ipdn_v4out) == 0);
ipd_nactive--;
}
static int
{
if (enable)
else
/*
* If hooks were previously enabled.
*/
if (rval != 0) {
return (rval);
}
return (0);
}
/*
* We have to drop the lock here, lest we cause a deadlock.
* Unfortunately, there may be hooks that are running and are
* actively in flight and we have to call the unregister
* function. Due to the hooks framework, if there is an inflight
* hook (most likely right now), and we are holding the
* netstack's lock, those hooks will never return. This is
* unfortunate.
*
* Because we only come into this path holding the list lock, we
* know that only way that someone else can come in and get to
* this structure is via the hook callbacks which are going to
* only be doing reads. They'll also see that everything has
* been disabled and return. So while this is unfortunate, it
* should be relatively safe.
*/
return (0);
}
/*
* Othwerise, nothing should have changed here.
*/
return (0);
}
static int
{
int rval;
return (ERANGE);
/*
* If we've been asked to set the value to a value that we already have,
* great, then we're done.
*/
return (0);
/*
* If ipd_check_hooks_failed, that must mean that we failed to set up
* the hooks, so we are going to effectively zero out and fail the
* request to enable corruption.
*/
if (rval != 0)
ins->ipdn_corrupt = 0;
return (rval);
}
static int
{
int rval;
if (delay > ipd_max_delay)
return (ERANGE);
/*
* If we've been asked to set the value to a value that we already have,
* great, then we're done.
*/
return (0);
/*
* If ipd_check_hooks_failed, that must mean that we failed to set up
* the hooks, so we are going to effectively zero out and fail the
* request to enable corruption.
*/
if (rval != 0)
ins->ipdn_delay = 0;
return (rval);
}
static int
{
int rval;
return (ERANGE);
/*
* If we've been asked to set the value to a value that we already have,
* great, then we're done.
*/
return (0);
/*
* If ipd_check_hooks_failed, that must mean that we failed to set up
* the hooks, so we are going to effectively zero out and fail the
* request to enable corruption.
*/
if (rval != 0)
return (rval);
}
static int
{
int rval = 0;
/*
* If the zone that we're coming from is not the GZ, then we ignore it
* completely and then instead just set the zoneid to be that of the
* caller. If the zoneid is that of the GZ, then we don't touch this
* value.
*/
if (zid != GLOBAL_ZONEID)
zid != GLOBAL_ZONEID)
return (EPERM);
/*
* We need to hold the ipd_nsl_lock throughout the entire operation,
* otherwise someone else could come in and remove us from the list and
* free us, e.g. the netstack destroy handler. By holding the lock, we
* stop it from being able to do anything wrong.
*/
break;
}
return (EINVAL);
}
goto cleanup;
}
switch (cmd) {
case IPDIOC_CORRUPT:
break;
case IPDIOC_DELAY:
break;
case IPDIOC_DROP:
break;
}
return (rval);
}
static int
{
int rval = 0;
/*
* See ipd_ioctl_perturb for the rational here.
*/
if (zid != GLOBAL_ZONEID)
zid != GLOBAL_ZONEID)
return (EPERM);
break;
}
return (EINVAL);
}
/*
* If this is condemned, that means it's very shortly going to be torn
* down. In that case, there's no reason to actually do anything here,
* as it will all be done rather shortly in the destroy function.
* Furthermore, because condemned corresponds with it having hit
* shutdown, we know that no more packets can be received by this
* netstack. All this translates to a no-op.
*/
rval = 0;
goto cleanup;
}
/*
* Go through and disable the requested pieces. We can safely ignore the
* return value of ipd_check_hooks because the removal case should never
* fail, we verify that in the hook teardown case.
*/
ins->ipdn_corrupt = 0;
rval = 0;
}
ins->ipdn_delay = 0;
rval = 0;
}
rval = 0;
}
return (rval);
}
/*
* When this function is called, the value of the ipil_nzones argument controls
* how this function works. When called with a value of zero, then we treat that
* as the caller asking us what's a reasonable number of entries for me to
* allocate memory for. If the zone is the global zone, then we tell them how
* many folks are currently active and add a fudge factor. Otherwise the answer
* is always one.
*
* In the non-zero case, we give them that number of zone ids. While this isn't
* quite ideal as it might mean that someone misses something, this generally
* won't be an issue, as it involves a rather tight race condition in the
* current ipdadm implementation.
*/
static int
{
int rval = 0;
STRUCT_DECL(ipd_ioc_list, h);
STRUCT_INIT(h, get_udatamodel());
STRUCT_SIZE(h), 0) != 0)
return (EFAULT);
if (rzones == 0) {
if (zid == GLOBAL_ZONEID) {
} else {
rzones = 1;
}
STRUCT_SIZE(h), 0) != 0)
return (EFAULT);
return (0);
}
if (zid == GLOBAL_ZONEID) {
} else {
azones = 1;
}
cur = 0;
if (ins->ipdn_enabled == 0)
continue;
++cur;
}
break;
}
if (cur == 0)
STRUCT_FSET(h, ipil_nzones, 0);
else
if (nzones > 0) {
}
return (EFAULT);
return (rval);
}
static void *
{
sizeof (ipd_nskstat_t) / sizeof (kstat_named_t),
}
return (ins);
}
static void
{
}
/*ARGSUSED*/
static void
{
/*
* At this point none of the hooks should be able to fire because the
* zone has been shutdown and we are in the process of destroying it.
* Thus it should not be possible for someone else to come in and grab
* our ipd_netstack_t for this zone. Because of that, we know that we
* are the only ones who could be running here.
*/
if (ins->ipdn_hooked)
}
/*ARGSUSED*/
static int
{
return (EINVAL);
return (EINVAL);
return (EINVAL);
return (EPERM);
return (0);
}
/*ARGSUSED*/
static int
{
int rval;
switch (cmd) {
case IPDIOC_CORRUPT:
case IPDIOC_DELAY:
case IPDIOC_DROP:
0) != 0)
return (EFAULT);
return (rval);
case IPDIOC_REMOVE:
0) != 0)
return (EFAULT);
return (rval);
case IPDIOC_LIST:
/*
* Because the list ioctl doesn't have a fixed-size struct due
* to needing to pass around a pointer, we instead delegate the
* copyin logic to the list code.
*/
default:
break;
}
return (ENOTTY);
}
/*ARGSUSED*/
static int
{
return (0);
}
static int
{
if (cmd != DDI_ATTACH)
return (DDI_FAILURE);
return (DDI_FAILURE);
DDI_PSEUDO, 0) == DDI_FAILURE)
return (DDI_FAILURE);
return (DDI_FAILURE);
}
/*
* Note that these global structures MUST be initialized before we call
* net_instance_register, as that will instantly cause us to drive into
* the ipd_nin_create callbacks.
*/
/* Note, net_instance_alloc sets the version. */
}
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static int
{
int error;
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
error = DDI_SUCCESS;
break;
case DDI_INFO_DEVT2INSTANCE:
error = DDI_SUCCESS;
default:
error = DDI_FAILURE;
break;
}
return (error);
}
static int
{
if (cmd != DDI_DETACH)
return (DDI_FAILURE);
if (ipd_nactive > 0) {
return (EBUSY);
}
}
return (DDI_SUCCESS);
}
ipd_open, /* open */
ipd_close, /* close */
nodev, /* strategy */
nodev, /* print */
nodev, /* dump */
nodev, /* read */
nodev, /* write */
ipd_ioctl, /* ioctl */
nodev, /* devmap */
nodev, /* mmap */
nodev, /* segmap */
nochpoll, /* poll */
ddi_prop_op, /* cb_prop_op */
NULL, /* streamtab */
CB_REV, /* rev */
nodev, /* aread */
nodev /* awrite */
};
DEVO_REV, /* devo_rev */
0, /* refcnt */
ipd_getinfo, /* get_dev_info */
nulldev, /* identify */
nulldev, /* probe */
ipd_attach, /* attach */
ipd_detach, /* detach */
nodev, /* reset */
&ipd_cb_ops, /* driver operations */
NULL, /* bus operations */
nodev, /* dev power */
ddi_quiesce_not_needed /* quiesce */
};
"Internet packet disturber",
};
};
int
_init(void)
{
return (mod_install(&modlinkage));
}
int
{
}
int
_fini(void)
{
return (mod_remove(&modlinkage));
}