/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2016, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/vm.h>
#include <sys/proc.h>
#include <sys/tuneable.h>
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/sdt.h>
#include <sys/mutex.h>
#include <sys/bitmap.h>
#include <sys/atomic.h>
#include <sys/kobj.h>
#include <sys/disp.h>
#include <vm/seg_kmem.h>
#include <sys/zone.h>
#include <sys/netstack.h>
/*
* What we use so that the zones framework can tell us about new zones,
* which we use to create new stacks.
*/
static zone_key_t netstack_zone_key;
static int netstack_initialized = 0;
/*
* Track the registered netstacks.
* The global lock protects
* - ns_reg
* - the list starting at netstack_head and following the netstack_next
* pointers.
*/
static kmutex_t netstack_g_lock;
/*
* Registry of netstacks with their create/shutdown/destory functions.
*/
static struct netstack_registry ns_reg[NS_MAX];
/*
* Global list of existing stacks. We use this when a new zone with
* an exclusive IP instance is created.
*
* Note that in some cases a netstack_t needs to stay around after the zone
* has gone away. This is because there might be outstanding references
* (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
* structure and all the foo_stack_t's hanging off of it will be cleaned up
* when the last reference to it is dropped.
* However, the same zone might be rebooted. That is handled using the
* assumption that the zones framework picks a new zoneid each time a zone
* is (re)booted. We assert for that condition in netstack_zone_create().
* Thus the old netstack_t can take its time for things to time out.
*/
static netstack_t *netstack_head;
/*
* To support kstat_create_netstack() using kstat_zone_add we need
* to track both
* - all zoneids that use the global/shared stack
* - all kstats that have been added for the shared stack
*/
struct shared_zone_list {
struct shared_zone_list *sz_next;
zoneid_t sz_zoneid;
};
struct shared_kstat_list {
struct shared_kstat_list *sk_next;
kstat_t *sk_kstat;
};
static kmutex_t netstack_shared_lock; /* protects the following two */
static struct shared_zone_list *netstack_shared_zones;
static struct shared_kstat_list *netstack_shared_kstats;
static void *netstack_zone_create(zoneid_t zoneid);
static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
static void netstack_shared_zone_add(zoneid_t zoneid);
static void netstack_shared_zone_remove(zoneid_t zoneid);
static void netstack_shared_kstat_add(kstat_t *ks);
static void netstack_shared_kstat_remove(kstat_t *ks);
typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
static void apply_all_netstacks(int, applyfn_t *);
static void apply_all_modules(netstack_t *, applyfn_t *);
static void apply_all_modules_reverse(netstack_t *, applyfn_t *);
static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
kmutex_t *);
void
netstack_init(void)
{
mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
netstack_initialized = 1;
/*
* We want to be informed each time a zone is created or
* destroyed in the kernel, so we can maintain the
* stack instance information.
*/
zone_key_create(&netstack_zone_key, netstack_zone_create,
netstack_zone_shutdown, netstack_zone_destroy);
}
/*
* Register a new module with the framework.
* This registers interest in changes to the set of netstacks.
* The createfn and destroyfn are required, but the shutdownfn can be
* NULL.
* Note that due to the current zsd implementation, when the create
* function is called the zone isn't fully present, thus functions
* like zone_find_by_* will fail, hence the create function can not
* use many zones kernel functions including zcmn_err().
*/
void
netstack_register(int moduleid,
void *(*module_create)(netstackid_t, netstack_t *),
void (*module_shutdown)(netstackid_t, void *),
void (*module_destroy)(netstackid_t, void *))
{
netstack_t *ns;
ASSERT(netstack_initialized);
ASSERT(moduleid >= 0 && moduleid < NS_MAX);
ASSERT(module_create != NULL);
/*
* Make instances created after this point in time run the create
* callback.
*/
mutex_enter(&netstack_g_lock);
ASSERT(ns_reg[moduleid].nr_create == NULL);
ASSERT(ns_reg[moduleid].nr_flags == 0);
ns_reg[moduleid].nr_create = module_create;
ns_reg[moduleid].nr_shutdown = module_shutdown;
ns_reg[moduleid].nr_destroy = module_destroy;
ns_reg[moduleid].nr_flags = NRF_REGISTERED;
/*
* Determine the set of stacks that exist before we drop the lock.
* Set NSS_CREATE_NEEDED for each of those.
* netstacks which have been deleted will have NSS_CREATE_COMPLETED
* set, but check NSF_CLOSING to be sure.
*/
for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
nm_state_t *nms = &ns->netstack_m_state[moduleid];
mutex_enter(&ns->netstack_lock);
if (!(ns->netstack_flags & NSF_CLOSING) &&
(nms->nms_flags & NSS_CREATE_ALL) == 0) {
nms->nms_flags |= NSS_CREATE_NEEDED;
DTRACE_PROBE2(netstack__create__needed,
netstack_t *, ns, int, moduleid);
}
mutex_exit(&ns->netstack_lock);
}
mutex_exit(&netstack_g_lock);
/*
* At this point in time a new instance can be created or an instance
* can be destroyed, or some other module can register or unregister.
* Make sure we either run all the create functions for this moduleid
* or we wait for any other creators for this moduleid.
*/
apply_all_netstacks(moduleid, netstack_apply_create);
}
void
netstack_unregister(int moduleid)
{
netstack_t *ns;
ASSERT(moduleid >= 0 && moduleid < NS_MAX);
ASSERT(ns_reg[moduleid].nr_create != NULL);
ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
mutex_enter(&netstack_g_lock);
/*
* Determine the set of stacks that exist before we drop the lock.
* Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
* That ensures that when we return all the callbacks for existing
* instances have completed. And since we set NRF_DYING no new
* instances can use this module.
*/
for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
boolean_t created = B_FALSE;
nm_state_t *nms = &ns->netstack_m_state[moduleid];
mutex_enter(&ns->netstack_lock);
/*
* We need to be careful here. We could actually have a netstack
* being created as we speak waiting for us to let go of this
* lock to proceed. It may have set NSS_CREATE_NEEDED, but not
* have gotten to the point of completing it yet. If
* NSS_CREATE_NEEDED, we can safely just remove it here and
* never create the module. However, if NSS_CREATE_INPROGRESS is
* set, we need to still flag this module for shutdown and
* deletion, just as though it had reached NSS_CREATE_COMPLETED.
*
* It is safe to do that because of two different guarantees
* that exist in the system. The first is that before we do a
* create, shutdown, or destroy, we ensure that nothing else is
* in progress in the system for this netstack and wait for it
* to complete. Secondly, because the zone is being created, we
* know that the following call to apply_all_netstack will block
* on the zone finishing its initialization.
*/
if (nms->nms_flags & NSS_CREATE_NEEDED)
nms->nms_flags &= ~NSS_CREATE_NEEDED;
if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
nms->nms_flags & NSS_CREATE_COMPLETED)
created = B_TRUE;
if (ns_reg[moduleid].nr_shutdown != NULL && created &&
(nms->nms_flags & NSS_CREATE_COMPLETED) &&
(nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
DTRACE_PROBE2(netstack__shutdown__needed,
netstack_t *, ns, int, moduleid);
}
if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
ns_reg[moduleid].nr_destroy != NULL && created &&
(nms->nms_flags & NSS_DESTROY_ALL) == 0) {
nms->nms_flags |= NSS_DESTROY_NEEDED;
DTRACE_PROBE2(netstack__destroy__needed,
netstack_t *, ns, int, moduleid);
}
mutex_exit(&ns->netstack_lock);
}
/*
* Prevent any new netstack from calling the registered create
* function, while keeping the function pointers in place until the
* shutdown and destroy callbacks are complete.
*/
ns_reg[moduleid].nr_flags |= NRF_DYING;
mutex_exit(&netstack_g_lock);
apply_all_netstacks(moduleid, netstack_apply_shutdown);
apply_all_netstacks(moduleid, netstack_apply_destroy);
/*
* Clear the nms_flags so that we can handle this module
* being loaded again.
* Also remove the registered functions.
*/
mutex_enter(&netstack_g_lock);
ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
nm_state_t *nms = &ns->netstack_m_state[moduleid];
mutex_enter(&ns->netstack_lock);
if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
nms->nms_flags = 0;
DTRACE_PROBE2(netstack__destroy__done,
netstack_t *, ns, int, moduleid);
}
mutex_exit(&ns->netstack_lock);
}
ns_reg[moduleid].nr_create = NULL;
ns_reg[moduleid].nr_shutdown = NULL;
ns_reg[moduleid].nr_destroy = NULL;
ns_reg[moduleid].nr_flags = 0;
mutex_exit(&netstack_g_lock);
}
/*
* Lookup and/or allocate a netstack for this zone.
*/
static void *
netstack_zone_create(zoneid_t zoneid)
{
netstackid_t stackid;
netstack_t *ns;
netstack_t **nsp;
zone_t *zone;
int i;
ASSERT(netstack_initialized);
zone = zone_find_by_id_nolock(zoneid);
ASSERT(zone != NULL);
if (zone->zone_flags & ZF_NET_EXCL) {
stackid = zoneid;
} else {
/* Look for the stack instance for the global */
stackid = GLOBAL_NETSTACKID;
}
/* Allocate even if it isn't needed; simplifies locking */
ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
/* Look if there is a matching stack instance */
mutex_enter(&netstack_g_lock);
for (nsp = &netstack_head; *nsp != NULL;
nsp = &((*nsp)->netstack_next)) {
if ((*nsp)->netstack_stackid == stackid) {
/*
* Should never find a pre-existing exclusive stack
*/
VERIFY(stackid == GLOBAL_NETSTACKID);
kmem_free(ns, sizeof (netstack_t));
ns = *nsp;
mutex_enter(&ns->netstack_lock);
ns->netstack_numzones++;
mutex_exit(&ns->netstack_lock);
mutex_exit(&netstack_g_lock);
DTRACE_PROBE1(netstack__inc__numzones,
netstack_t *, ns);
/* Record that we have a new shared stack zone */
netstack_shared_zone_add(zoneid);
zone->zone_netstack = ns;
return (ns);
}
}
/* Not found */
mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
ns->netstack_stackid = zoneid;
ns->netstack_numzones = 1;
ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
ns->netstack_flags = NSF_UNINIT;
*nsp = ns;
zone->zone_netstack = ns;
mutex_enter(&ns->netstack_lock);
/*
* Mark this netstack as having a CREATE running so
* any netstack_register/netstack_unregister waits for
* the existing create callbacks to complete in moduleid order
*/
ns->netstack_flags |= NSF_ZONE_CREATE;
/*
* Determine the set of module create functions that need to be
* called before we drop the lock.
* Set NSS_CREATE_NEEDED for each of those.
* Skip any with NRF_DYING set, since those are in the process of
* going away, by checking for flags being exactly NRF_REGISTERED.
*/
for (i = 0; i < NS_MAX; i++) {
nm_state_t *nms = &ns->netstack_m_state[i];
cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
(nms->nms_flags & NSS_CREATE_ALL) == 0) {
nms->nms_flags |= NSS_CREATE_NEEDED;
DTRACE_PROBE2(netstack__create__needed,
netstack_t *, ns, int, i);
}
}
mutex_exit(&ns->netstack_lock);
mutex_exit(&netstack_g_lock);
apply_all_modules(ns, netstack_apply_create);
/* Tell any waiting netstack_register/netstack_unregister to proceed */
mutex_enter(&ns->netstack_lock);
ns->netstack_flags &= ~NSF_UNINIT;
ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
ns->netstack_flags &= ~NSF_ZONE_CREATE;
cv_broadcast(&ns->netstack_cv);
mutex_exit(&ns->netstack_lock);
return (ns);
}
/* ARGSUSED */
static void
netstack_zone_shutdown(zoneid_t zoneid, void *arg)
{
netstack_t *ns = (netstack_t *)arg;
int i;
ASSERT(arg != NULL);
mutex_enter(&ns->netstack_lock);
ASSERT(ns->netstack_numzones > 0);
if (ns->netstack_numzones != 1) {
/* Stack instance being used by other zone */
mutex_exit(&ns->netstack_lock);
ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
return;
}
mutex_exit(&ns->netstack_lock);
mutex_enter(&netstack_g_lock);
mutex_enter(&ns->netstack_lock);
/*
* Mark this netstack as having a SHUTDOWN running so
* any netstack_register/netstack_unregister waits for
* the existing create callbacks to complete in moduleid order
*/
ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
/*
* Determine the set of stacks that exist before we drop the lock.
* Set NSS_SHUTDOWN_NEEDED for each of those.
*/
for (i = 0; i < NS_MAX; i++) {
nm_state_t *nms = &ns->netstack_m_state[i];
if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
ns_reg[i].nr_shutdown != NULL &&
(nms->nms_flags & NSS_CREATE_COMPLETED) &&
(nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
DTRACE_PROBE2(netstack__shutdown__needed,
netstack_t *, ns, int, i);
}
}
mutex_exit(&ns->netstack_lock);
mutex_exit(&netstack_g_lock);
/*
* Call the shutdown function for all registered modules for this
* netstack.
*/
apply_all_modules_reverse(ns, netstack_apply_shutdown);
/* Tell any waiting netstack_register/netstack_unregister to proceed */
mutex_enter(&ns->netstack_lock);
ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
cv_broadcast(&ns->netstack_cv);
mutex_exit(&ns->netstack_lock);
}
/*
* Common routine to release a zone.
* If this was the last zone using the stack instance then prepare to
* have the refcnt dropping to zero free the zone.
*/
/* ARGSUSED */
static void
netstack_zone_destroy(zoneid_t zoneid, void *arg)
{
netstack_t *ns = (netstack_t *)arg;
ASSERT(arg != NULL);
mutex_enter(&ns->netstack_lock);
ASSERT(ns->netstack_numzones > 0);
ns->netstack_numzones--;
if (ns->netstack_numzones != 0) {
/* Stack instance being used by other zone */
mutex_exit(&ns->netstack_lock);
ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
/* Record that we a shared stack zone has gone away */
netstack_shared_zone_remove(zoneid);
return;
}
/*
* Set CLOSING so that netstack_find_by will not find it.
*/
ns->netstack_flags |= NSF_CLOSING;
mutex_exit(&ns->netstack_lock);
DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
/* No other thread can call zone_destroy for this stack */
/*
* Decrease refcnt to account for the one in netstack_zone_init()
*/
netstack_rele(ns);
}
/*
* Called when the reference count drops to zero.
* Call the destroy functions for each registered module.
*/
static void
netstack_stack_inactive(netstack_t *ns)
{
int i;
mutex_enter(&netstack_g_lock);
mutex_enter(&ns->netstack_lock);
/*
* Mark this netstack as having a DESTROY running so
* any netstack_register/netstack_unregister waits for
* the existing destroy callbacks to complete in reverse moduleid order
*/
ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
ns->netstack_flags |= NSF_ZONE_DESTROY;
/*
* If the shutdown callback wasn't called earlier (e.g., if this is
* a netstack shared between multiple zones), then we schedule it now.
*
* Determine the set of stacks that exist before we drop the lock.
* Set NSS_DESTROY_NEEDED for each of those. That
* ensures that when we return all the callbacks for existing
* instances have completed.
*/
for (i = 0; i < NS_MAX; i++) {
nm_state_t *nms = &ns->netstack_m_state[i];
if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
ns_reg[i].nr_shutdown != NULL &&
(nms->nms_flags & NSS_CREATE_COMPLETED) &&
(nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
DTRACE_PROBE2(netstack__shutdown__needed,
netstack_t *, ns, int, i);
}
if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
ns_reg[i].nr_destroy != NULL &&
(nms->nms_flags & NSS_CREATE_COMPLETED) &&
(nms->nms_flags & NSS_DESTROY_ALL) == 0) {
nms->nms_flags |= NSS_DESTROY_NEEDED;
DTRACE_PROBE2(netstack__destroy__needed,
netstack_t *, ns, int, i);
}
}
mutex_exit(&ns->netstack_lock);
mutex_exit(&netstack_g_lock);
/*
* Call the shutdown and destroy functions for all registered modules
* for this netstack.
*
* Since there are some ordering dependencies between the modules we
* tear them down in the reverse order of what was used to create them.
*
* Since a netstack_t is never reused (when a zone is rebooted it gets
* a new zoneid == netstackid i.e. a new netstack_t is allocated) we
* leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
* That is different than in the netstack_unregister() case.
*/
apply_all_modules_reverse(ns, netstack_apply_shutdown);
apply_all_modules_reverse(ns, netstack_apply_destroy);
/* Tell any waiting netstack_register/netstack_unregister to proceed */
mutex_enter(&ns->netstack_lock);
ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
ns->netstack_flags &= ~NSF_ZONE_DESTROY;
cv_broadcast(&ns->netstack_cv);
mutex_exit(&ns->netstack_lock);
}
/*
* Apply a function to all netstacks for a particular moduleid.
*
* If there is any zone activity (due to a zone being created, shutdown,
* or destroyed) we wait for that to complete before we proceed. This ensures
* that the moduleids are processed in order when a zone is created or
* destroyed.
*
* The applyfn has to drop netstack_g_lock if it does some work.
* In that case we don't follow netstack_next,
* even if it is possible to do so without any hazards. This is
* because we want the design to allow for the list of netstacks threaded
* by netstack_next to change in any arbitrary way during the time the
* lock was dropped.
*
* It is safe to restart the loop at netstack_head since the applyfn
* changes netstack_m_state as it processes things, so a subsequent
* pass through will have no effect in applyfn, hence the loop will terminate
* in at worst O(N^2).
*/
static void
apply_all_netstacks(int moduleid, applyfn_t *applyfn)
{
netstack_t *ns;
mutex_enter(&netstack_g_lock);
ns = netstack_head;
while (ns != NULL) {
if (wait_for_zone_creator(ns, &netstack_g_lock)) {
/* Lock dropped - restart at head */
ns = netstack_head;
} else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
/* Lock dropped - restart at head */
ns = netstack_head;
} else {
ns = ns->netstack_next;
}
}
mutex_exit(&netstack_g_lock);
}
/*
* Apply a function to all moduleids for a particular netstack.
*
* Since the netstack linkage doesn't matter in this case we can
* ignore whether the function drops the lock.
*/
static void
apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
{
int i;
mutex_enter(&netstack_g_lock);
for (i = 0; i < NS_MAX; i++) {
/*
* We don't care whether the lock was dropped
* since we are not iterating over netstack_head.
*/
(void) (applyfn)(&netstack_g_lock, ns, i);
}
mutex_exit(&netstack_g_lock);
}
/* Like the above but in reverse moduleid order */
static void
apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
{
int i;
mutex_enter(&netstack_g_lock);
for (i = NS_MAX-1; i >= 0; i--) {
/*
* We don't care whether the lock was dropped
* since we are not iterating over netstack_head.
*/
(void) (applyfn)(&netstack_g_lock, ns, i);
}
mutex_exit(&netstack_g_lock);
}
/*
* Call the create function for the ns and moduleid if CREATE_NEEDED
* is set.
* If some other thread gets here first and sets *_INPROGRESS, then
* we wait for that thread to complete so that we can ensure that
* all the callbacks are done when we've looped over all netstacks/moduleids.
*
* When we call the create function, we temporarily drop the netstack_lock
* held by the caller, and return true to tell the caller it needs to
* re-evalute the state.
*/
static boolean_t
netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
{
void *result;
netstackid_t stackid;
nm_state_t *nms = &ns->netstack_m_state[moduleid];
boolean_t dropped = B_FALSE;
ASSERT(MUTEX_HELD(lockp));
mutex_enter(&ns->netstack_lock);
if (wait_for_nms_inprogress(ns, nms, lockp))
dropped = B_TRUE;
if (nms->nms_flags & NSS_CREATE_NEEDED) {
nms->nms_flags &= ~NSS_CREATE_NEEDED;
nms->nms_flags |= NSS_CREATE_INPROGRESS;
DTRACE_PROBE2(netstack__create__inprogress,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
mutex_exit(lockp);
dropped = B_TRUE;
ASSERT(ns_reg[moduleid].nr_create != NULL);
stackid = ns->netstack_stackid;
DTRACE_PROBE2(netstack__create__start,
netstackid_t, stackid,
netstack_t *, ns);
result = (ns_reg[moduleid].nr_create)(stackid, ns);
DTRACE_PROBE2(netstack__create__end,
void *, result, netstack_t *, ns);
ASSERT(result != NULL);
mutex_enter(lockp);
mutex_enter(&ns->netstack_lock);
ns->netstack_modules[moduleid] = result;
nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
nms->nms_flags |= NSS_CREATE_COMPLETED;
cv_broadcast(&nms->nms_cv);
DTRACE_PROBE2(netstack__create__completed,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
return (dropped);
} else {
mutex_exit(&ns->netstack_lock);
return (dropped);
}
}
/*
* Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
* is set.
* If some other thread gets here first and sets *_INPROGRESS, then
* we wait for that thread to complete so that we can ensure that
* all the callbacks are done when we've looped over all netstacks/moduleids.
*
* When we call the shutdown function, we temporarily drop the netstack_lock
* held by the caller, and return true to tell the caller it needs to
* re-evalute the state.
*/
static boolean_t
netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
{
netstackid_t stackid;
void * netstack_module;
nm_state_t *nms = &ns->netstack_m_state[moduleid];
boolean_t dropped = B_FALSE;
ASSERT(MUTEX_HELD(lockp));
mutex_enter(&ns->netstack_lock);
if (wait_for_nms_inprogress(ns, nms, lockp))
dropped = B_TRUE;
if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
DTRACE_PROBE2(netstack__shutdown__inprogress,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
mutex_exit(lockp);
dropped = B_TRUE;
ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
stackid = ns->netstack_stackid;
netstack_module = ns->netstack_modules[moduleid];
DTRACE_PROBE2(netstack__shutdown__start,
netstackid_t, stackid,
void *, netstack_module);
(ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
DTRACE_PROBE1(netstack__shutdown__end,
netstack_t *, ns);
mutex_enter(lockp);
mutex_enter(&ns->netstack_lock);
nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
cv_broadcast(&nms->nms_cv);
DTRACE_PROBE2(netstack__shutdown__completed,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
return (dropped);
} else {
mutex_exit(&ns->netstack_lock);
return (dropped);
}
}
/*
* Call the destroy function for the ns and moduleid if DESTROY_NEEDED
* is set.
* If some other thread gets here first and sets *_INPROGRESS, then
* we wait for that thread to complete so that we can ensure that
* all the callbacks are done when we've looped over all netstacks/moduleids.
*
* When we call the destroy function, we temporarily drop the netstack_lock
* held by the caller, and return true to tell the caller it needs to
* re-evalute the state.
*/
static boolean_t
netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
{
netstackid_t stackid;
void * netstack_module;
nm_state_t *nms = &ns->netstack_m_state[moduleid];
boolean_t dropped = B_FALSE;
ASSERT(MUTEX_HELD(lockp));
mutex_enter(&ns->netstack_lock);
if (wait_for_nms_inprogress(ns, nms, lockp))
dropped = B_TRUE;
if (nms->nms_flags & NSS_DESTROY_NEEDED) {
nms->nms_flags &= ~NSS_DESTROY_NEEDED;
nms->nms_flags |= NSS_DESTROY_INPROGRESS;
DTRACE_PROBE2(netstack__destroy__inprogress,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
mutex_exit(lockp);
dropped = B_TRUE;
ASSERT(ns_reg[moduleid].nr_destroy != NULL);
stackid = ns->netstack_stackid;
netstack_module = ns->netstack_modules[moduleid];
DTRACE_PROBE2(netstack__destroy__start,
netstackid_t, stackid,
void *, netstack_module);
(ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
DTRACE_PROBE1(netstack__destroy__end,
netstack_t *, ns);
mutex_enter(lockp);
mutex_enter(&ns->netstack_lock);
ns->netstack_modules[moduleid] = NULL;
nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
nms->nms_flags |= NSS_DESTROY_COMPLETED;
cv_broadcast(&nms->nms_cv);
DTRACE_PROBE2(netstack__destroy__completed,
netstack_t *, ns, int, moduleid);
mutex_exit(&ns->netstack_lock);
return (dropped);
} else {
mutex_exit(&ns->netstack_lock);
return (dropped);
}
}
/*
* If somebody is creating the netstack (due to a new zone being created)
* then we wait for them to complete. This ensures that any additional
* netstack_register() doesn't cause the create functions to run out of
* order.
* Note that we do not need such a global wait in the case of the shutdown
* and destroy callbacks, since in that case it is sufficient for both
* threads to set NEEDED and wait for INPROGRESS to ensure ordering.
* Returns true if lockp was temporarily dropped while waiting.
*/
static boolean_t
wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
{
boolean_t dropped = B_FALSE;
mutex_enter(&ns->netstack_lock);
while (ns->netstack_flags & NSF_ZONE_CREATE) {
DTRACE_PROBE1(netstack__wait__zone__inprogress,
netstack_t *, ns);
if (lockp != NULL) {
dropped = B_TRUE;
mutex_exit(lockp);
}
cv_wait(&ns->netstack_cv, &ns->netstack_lock);
if (lockp != NULL) {
/* First drop netstack_lock to preserve order */
mutex_exit(&ns->netstack_lock);
mutex_enter(lockp);
mutex_enter(&ns->netstack_lock);
}
}
mutex_exit(&ns->netstack_lock);
return (dropped);
}
/*
* Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
* combination.
* Returns true if lockp was temporarily dropped while waiting.
*/
static boolean_t
wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
{
boolean_t dropped = B_FALSE;
while (nms->nms_flags & NSS_ALL_INPROGRESS) {
DTRACE_PROBE2(netstack__wait__nms__inprogress,
netstack_t *, ns, nm_state_t *, nms);
if (lockp != NULL) {
dropped = B_TRUE;
mutex_exit(lockp);
}
cv_wait(&nms->nms_cv, &ns->netstack_lock);
if (lockp != NULL) {
/* First drop netstack_lock to preserve order */
mutex_exit(&ns->netstack_lock);
mutex_enter(lockp);
mutex_enter(&ns->netstack_lock);
}
}
return (dropped);
}
/*
* Get the stack instance used in caller's zone.
* Increases the reference count, caller must do a netstack_rele.
* It can't be called after zone_destroy() has started.
*/
netstack_t *
netstack_get_current(void)
{
netstack_t *ns;
ns = curproc->p_zone->zone_netstack;
ASSERT(ns != NULL);
if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
return (NULL);
netstack_hold(ns);
return (ns);
}
/*
* Find a stack instance given the cred.
* This is used by the modules to potentially allow for a future when
* something other than the zoneid is used to determine the stack.
*/
netstack_t *
netstack_find_by_cred(const cred_t *cr)
{
zoneid_t zoneid = crgetzoneid(cr);
/* Handle the case when cr_zone is NULL */
if (zoneid == (zoneid_t)-1)
zoneid = GLOBAL_ZONEID;
/* For performance ... */
if (curproc->p_zone->zone_id == zoneid)
return (netstack_get_current());
else
return (netstack_find_by_zoneid(zoneid));
}
/*
* Find a stack instance given the zoneid.
* Increases the reference count if found; caller must do a
* netstack_rele().
*
* If there is no exact match then assume the shared stack instance
* matches.
*
* Skip the unitialized ones.
*/
netstack_t *
netstack_find_by_zoneid(zoneid_t zoneid)
{
netstack_t *ns;
zone_t *zone;
zone = zone_find_by_id(zoneid);
if (zone == NULL)
return (NULL);
ns = zone->zone_netstack;
ASSERT(ns != NULL);
if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
ns = NULL;
else
netstack_hold(ns);
zone_rele(zone);
return (ns);
}
/*
* Find a stack instance given the zoneid. Can only be called from
* the create callback. See the comments in zone_find_by_id_nolock why
* that limitation exists.
*
* Increases the reference count if found; caller must do a
* netstack_rele().
*
* If there is no exact match then assume the shared stack instance
* matches.
*
* Skip the unitialized ones.
*/
netstack_t *
netstack_find_by_zoneid_nolock(zoneid_t zoneid)
{
netstack_t *ns;
zone_t *zone;
zone = zone_find_by_id_nolock(zoneid);
if (zone == NULL)
return (NULL);
ns = zone->zone_netstack;
ASSERT(ns != NULL);
if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
ns = NULL;
else
netstack_hold(ns);
/* zone_find_by_id_nolock does not have a hold on the zone */
return (ns);
}
/*
* Find a stack instance given the stackid with exact match?
* Increases the reference count if found; caller must do a
* netstack_rele().
*
* Skip the unitialized ones.
*/
netstack_t *
netstack_find_by_stackid(netstackid_t stackid)
{
netstack_t *ns;
mutex_enter(&netstack_g_lock);
for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
mutex_enter(&ns->netstack_lock);
if (ns->netstack_stackid == stackid &&
!(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
mutex_exit(&ns->netstack_lock);
netstack_hold(ns);
mutex_exit(&netstack_g_lock);
return (ns);
}
mutex_exit(&ns->netstack_lock);
}
mutex_exit(&netstack_g_lock);
return (NULL);
}
boolean_t
netstack_inuse_by_stackid(netstackid_t stackid)
{
netstack_t *ns;
boolean_t rval = B_FALSE;
mutex_enter(&netstack_g_lock);
for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
if (ns->netstack_stackid == stackid) {
rval = B_TRUE;
break;
}
}
mutex_exit(&netstack_g_lock);
return (rval);
}
void
netstack_rele(netstack_t *ns)
{
netstack_t **nsp;
boolean_t found;
int refcnt, numzones;
int i;
mutex_enter(&ns->netstack_lock);
ASSERT(ns->netstack_refcnt > 0);
ns->netstack_refcnt--;
/*
* As we drop the lock additional netstack_rele()s can come in
* and decrement the refcnt to zero and free the netstack_t.
* Store pointers in local variables and if we were not the last
* then don't reference the netstack_t after that.
*/
refcnt = ns->netstack_refcnt;
numzones = ns->netstack_numzones;
DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
mutex_exit(&ns->netstack_lock);
if (refcnt == 0 && numzones == 0) {
/*
* Time to call the destroy functions and free up
* the structure
*/
netstack_stack_inactive(ns);
/* Make sure nothing increased the references */
ASSERT(ns->netstack_refcnt == 0);
ASSERT(ns->netstack_numzones == 0);
/* Finally remove from list of netstacks */
mutex_enter(&netstack_g_lock);
found = B_FALSE;
for (nsp = &netstack_head; *nsp != NULL;
nsp = &(*nsp)->netstack_next) {
if (*nsp == ns) {
*nsp = ns->netstack_next;
ns->netstack_next = NULL;
found = B_TRUE;
break;
}
}
ASSERT(found);
mutex_exit(&netstack_g_lock);
/* Make sure nothing increased the references */
ASSERT(ns->netstack_refcnt == 0);
ASSERT(ns->netstack_numzones == 0);
ASSERT(ns->netstack_flags & NSF_CLOSING);
for (i = 0; i < NS_MAX; i++) {
nm_state_t *nms = &ns->netstack_m_state[i];
cv_destroy(&nms->nms_cv);
}
mutex_destroy(&ns->netstack_lock);
cv_destroy(&ns->netstack_cv);
kmem_free(ns, sizeof (*ns));
}
}
void
netstack_hold(netstack_t *ns)
{
mutex_enter(&ns->netstack_lock);
ns->netstack_refcnt++;
ASSERT(ns->netstack_refcnt > 0);
mutex_exit(&ns->netstack_lock);
DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
}
/*
* To support kstat_create_netstack() using kstat_zone_add we need
* to track both
* - all zoneids that use the global/shared stack
* - all kstats that have been added for the shared stack
*/
kstat_t *
kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
netstackid_t ks_netstackid)
{
kstat_t *ks;
if (ks_netstackid == GLOBAL_NETSTACKID) {
ks = kstat_create_zone(ks_module, ks_instance, ks_name,
ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
if (ks != NULL)
netstack_shared_kstat_add(ks);
return (ks);
} else {
zoneid_t zoneid = ks_netstackid;
return (kstat_create_zone(ks_module, ks_instance, ks_name,
ks_class, ks_type, ks_ndata, ks_flags, zoneid));
}
}
void
kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
{
if (ks_netstackid == GLOBAL_NETSTACKID) {
netstack_shared_kstat_remove(ks);
}
kstat_delete(ks);
}
static void
netstack_shared_zone_add(zoneid_t zoneid)
{
struct shared_zone_list *sz;
struct shared_kstat_list *sk;
sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
sz->sz_zoneid = zoneid;
/* Insert in list */
mutex_enter(&netstack_shared_lock);
sz->sz_next = netstack_shared_zones;
netstack_shared_zones = sz;
/*
* Perform kstat_zone_add for each existing shared stack kstat.
* Note: Holds netstack_shared_lock lock across kstat_zone_add.
*/
for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
kstat_zone_add(sk->sk_kstat, zoneid);
}
mutex_exit(&netstack_shared_lock);
}
static void
netstack_shared_zone_remove(zoneid_t zoneid)
{
struct shared_zone_list **szp, *sz;
struct shared_kstat_list *sk;
/* Find in list */
mutex_enter(&netstack_shared_lock);
sz = NULL;
for (szp = &netstack_shared_zones; *szp != NULL;
szp = &((*szp)->sz_next)) {
if ((*szp)->sz_zoneid == zoneid) {
sz = *szp;
break;
}
}
/* We must find it */
ASSERT(sz != NULL);
*szp = sz->sz_next;
sz->sz_next = NULL;
/*
* Perform kstat_zone_remove for each existing shared stack kstat.
* Note: Holds netstack_shared_lock lock across kstat_zone_remove.
*/
for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
kstat_zone_remove(sk->sk_kstat, zoneid);
}
mutex_exit(&netstack_shared_lock);
kmem_free(sz, sizeof (*sz));
}
static void
netstack_shared_kstat_add(kstat_t *ks)
{
struct shared_zone_list *sz;
struct shared_kstat_list *sk;
sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
sk->sk_kstat = ks;
/* Insert in list */
mutex_enter(&netstack_shared_lock);
sk->sk_next = netstack_shared_kstats;
netstack_shared_kstats = sk;
/*
* Perform kstat_zone_add for each existing shared stack zone.
* Note: Holds netstack_shared_lock lock across kstat_zone_add.
*/
for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
kstat_zone_add(ks, sz->sz_zoneid);
}
mutex_exit(&netstack_shared_lock);
}
static void
netstack_shared_kstat_remove(kstat_t *ks)
{
struct shared_zone_list *sz;
struct shared_kstat_list **skp, *sk;
/* Find in list */
mutex_enter(&netstack_shared_lock);
sk = NULL;
for (skp = &netstack_shared_kstats; *skp != NULL;
skp = &((*skp)->sk_next)) {
if ((*skp)->sk_kstat == ks) {
sk = *skp;
break;
}
}
/* Must find it */
ASSERT(sk != NULL);
*skp = sk->sk_next;
sk->sk_next = NULL;
/*
* Perform kstat_zone_remove for each existing shared stack kstat.
* Note: Holds netstack_shared_lock lock across kstat_zone_remove.
*/
for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
kstat_zone_remove(ks, sz->sz_zoneid);
}
mutex_exit(&netstack_shared_lock);
kmem_free(sk, sizeof (*sk));
}
/*
* If a zoneid is part of the shared zone, return true
*/
static boolean_t
netstack_find_shared_zoneid(zoneid_t zoneid)
{
struct shared_zone_list *sz;
mutex_enter(&netstack_shared_lock);
for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
if (sz->sz_zoneid == zoneid) {
mutex_exit(&netstack_shared_lock);
return (B_TRUE);
}
}
mutex_exit(&netstack_shared_lock);
return (B_FALSE);
}
/*
* Hide the fact that zoneids and netstackids are allocated from
* the same space in the current implementation.
* We currently do not check that the stackid/zoneids are valid, since there
* is no need for that. But this should only be done for ids that are
* valid.
*/
zoneid_t
netstackid_to_zoneid(netstackid_t stackid)
{
return (stackid);
}
netstackid_t
zoneid_to_netstackid(zoneid_t zoneid)
{
if (netstack_find_shared_zoneid(zoneid))
return (GLOBAL_ZONEID);
else
return (zoneid);
}
zoneid_t
netstack_get_zoneid(netstack_t *ns)
{
return (netstackid_to_zoneid(ns->netstack_stackid));
}
/*
* Simplistic support for walking all the handles.
* Example usage:
* netstack_handle_t nh;
* netstack_t *ns;
*
* netstack_next_init(&nh);
* while ((ns = netstack_next(&nh)) != NULL) {
* do something;
* netstack_rele(ns);
* }
* netstack_next_fini(&nh);
*/
void
netstack_next_init(netstack_handle_t *handle)
{
*handle = 0;
}
/* ARGSUSED */
void
netstack_next_fini(netstack_handle_t *handle)
{
}
netstack_t *
netstack_next(netstack_handle_t *handle)
{
netstack_t *ns;
int i, end;
end = *handle;
/* Walk skipping *handle number of instances */
/* Look if there is a matching stack instance */
mutex_enter(&netstack_g_lock);
ns = netstack_head;
for (i = 0; i < end; i++) {
if (ns == NULL)
break;
ns = ns->netstack_next;
}
/* skip those with that aren't really here */
while (ns != NULL) {
mutex_enter(&ns->netstack_lock);
if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
mutex_exit(&ns->netstack_lock);
break;
}
mutex_exit(&ns->netstack_lock);
end++;
ns = ns->netstack_next;
}
if (ns != NULL) {
*handle = end + 1;
netstack_hold(ns);
}
mutex_exit(&netstack_g_lock);
return (ns);
}