sockfilter.c revision e82bc0ba9649a7146fdab88089eaa4b8502b2da4
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#include <sys/sysmacros.h>
#include <sys/socketvar.h>
/*
* Socket Filter Framework
*
* Socket filter entry (sof_entry_t):
*
* There exists one entry for each configured filter (done via soconfig(1M)),
* and they are all in sof_entry_list. In addition to the global list, each
* sockparams entry maintains a list of filters that is interested in that
* particular socket type. So the filter entry may be referenced by multiple
* sockparams. The set of sockparams referencing a filter may change as
* and the sockparams list is protected by sockconf_lock.
*
* Each filter entry has a ref count which is incremented whenever a filter
* is attached to a socket. An entry is marked SOFEF_CONDEMED when it is
* unconfigured, which will result in the entry being freed when its ref
* count reaches zero.
*
* Socket filter module (sof_module_t):
*
* Modules are created by sof_register() and placed in sof_module_list,
* which is protected by sof_module_lock. Each module has a reference count
* that is incremented when a filter entry is using the module. A module
* can be destroyed by sof_unregister() only when its ref count is zero.
*
* Socket filter instance (sof_instance_t):
*
* Whenever a filter is attached to a socket (sonode), a new instance is
* created. The socket is guaranteed to be single threaded when filters are
* protection.
*
* The lifetime of an instance is the same as the socket it's attached to.
*
* How things link together:
*
* sockparams.sp_{auto,prog}_filters -> sp_filter_t -> sp_filter_t
* ^ | |
* | | |
* sonode.so_filter_top -> sof_instance_t | |
* | | |
* v v v
* sof_entry_list -> sof_entry_t -> sof_entry -> ... -> sof_entry_t
* |
* v
* sof_module_list -> sof_module_t -> ... -> sof_module_t
*/
static sof_kstat_t sof_stat;
static kstat_t *sof_stat_ksp;
#ifdef DEBUG
static int socket_filter_debug = 0;
#endif
/*
* A connection that has been deferred for more than `sof_defer_drop_time'
* ticks can be dropped to make room for new connections. A connection that
* is to be dropped is moved over to `sof_close_deferred_list' where it will
* be closed by sof_close_deferred() (which is running on a taskq). Connections
* will not be moved over to the close list if it grows larger than
* `sof_close_deferred_max_backlog'.
*/
static void sof_close_deferred(void *);
static void sof_module_rele(sof_module_t *);
static sof_module_t *sof_module_hold_by_name(const char *, const char *);
static int sof_entry_load_module(sof_entry_t *);
static void sof_entry_hold(sof_entry_t *);
static void sof_entry_rele(sof_entry_t *);
static int sof_entry_kstat_create(sof_entry_t *);
static void sof_entry_kstat_destroy(sof_entry_t *);
static void sof_instance_destroy(sof_instance_t *);
static int
{
if (rw == KSTAT_WRITE)
return (EACCES);
return (0);
}
void
sof_init(void)
{
if (sof_stat_ksp == NULL)
return;
"defer_close_backlog", KSTAT_DATA_UINT64);
"defer_close_failed_backlog_too_big", KSTAT_DATA_UINT64);
}
/*
* Process filter options.
*/
static int
{
int error;
/*
* Is the filter in a state where filters can be attached?
*/
return (EINVAL);
if (option_name == FIL_ATTACH) {
/*
* Make sure there isn't already another instance of the
* same filter attached to the socket.
*/
(const char *)optval, SOF_MAXNAMELEN) == 0)
return (EEXIST);
}
/* Look up the filter. */
SOF_MAXNAMELEN) == 0)
break;
}
/* No such filter */
return (ENOENT);
}
/* Failed to create an instance; must be out of memory */
return (ENOMEM);
/*
* This might be the first time the filter is being used,
* so try to load the module if it's not already registered.
*/
return (error);
}
/* Module loaded OK, so there must be an ops vector */
if (rval != SOF_RVAL_CONTINUE) {
switch (rval) {
case SOF_RVAL_DETACH:
/*
* Filter does not want to to attach.
* An error is returned so the user
* knows the request did not go
* through.
*/
break;
default:
/* Not a valid rval for active attach */
break;
}
return (error);
}
}
return (0);
} else if (option_name == FIL_DETACH) {
SOF_MAXNAMELEN) == 0)
break;
}
return (ENXIO);
/* automatic filters cannot be detached */
return (EINVAL);
return (0);
} else {
return (EINVAL);
}
}
int
{
int error;
/*
* By grabbing the lock as a writer we ensure that no other socket
* operations can start while the filter stack is being manipulated.
*
* We do a tryenter so that in case there is an active thread we
* ask the caller to try again instead of blocking here until the
* other thread is done (which could be indefinitely in case of recv).
*/
return (EAGAIN);
}
/* Bail out if a fallback has taken place */
else
return (error);
}
/*
* Get filter socket options.
*/
static int
{
int i;
if (option_name == FIL_LIST) {
return (EINVAL);
cnt++;
}
return (0);
} else {
return (EINVAL);
}
}
int
{
int error;
/*
* The fallback lock is used here to serialize set and get
* filter operations.
*/
else
cr);
return (error);
}
/*
* The socket `so' wants to inherit the filter stack from `pso'.
* Returns 0 if all went well or an errno otherwise.
*/
int
{
int error;
/*
* Make sure there is enough room to retrieve the addresses
*/
return (ENOMEM);
return (ENOMEM);
}
} else {
}
if (error != 0)
goto out;
if (error != 0)
goto out;
/*
* The stack is built bottom up. Filters are allowed to modify the
* the foreign and local addresses during attach.
*/
goto out;
}
/*
* The filter module must be loaded since it's already
* attached to the listener.
*/
&inst->sofi_cookie);
if (rval != SOF_RVAL_CONTINUE) {
if (rval == SOF_RVAL_DEFER) {
} else if (rval == SOF_RVAL_DETACH) {
} else {
/*
* Filters that called attached will be
* destroyed when the socket goes away,
* after detach is called.
*/
goto out;
}
}
}
}
out:
}
return (error);
}
/*
* Attach any automatic filters to sonode `so'. Returns 0 if all went well
* and an errno otherwise.
*/
int
{
int error;
/*
* A created instance is added to the top of the sonode's filter
* stack, so traverse the config list in reverse order.
*/
goto free_all;
}
}
/*
* Notify each filter that it's being attached.
*/
/*
* This might be the first time the filter is being used,
* so try to load the module if it's not already registered.
*/
goto free_detached;
/* Module loaded OK, so there must be an ops vector */
if (rval != SOF_RVAL_CONTINUE) {
switch (rval) {
case SOF_RVAL_DETACH:
/* filter does not want to attach */
break;
default:
/* Not a valid rval for active attach */
goto free_detached;
}
}
}
}
return (0);
/*
* Destroy all filters for which attach was not called. The other
* filters will be destroyed (and detach called) when the socket
* is freed.
*/
do {
inst = t;
return (error);
}
/*
* Detaches and frees all filters attached to sonode `so'.
*/
void
{
}
}
/*
* Notifies all active filters attached to `so' about the `event' and
* where `arg' is an event specific argument.
*/
void
{
}
}
/*
* The socket `so' is closing. Notify filters and make sure that there
* are no pending tx operations.
*/
void
{
/*
* Notify filters that the socket is being closed. It's OK for
* filters to inject data.
*/
/*
* Stop any future attempts to inject data, and wait for any
* pending operations to complete. This has to be done to ensure
* that no data is sent down to the protocol once a close
* downcall has been made.
*/
while (so->so_filter_tx > 0)
}
/*
* Called when socket `so' wants to get rid of a deferred connection.
* Returns TRUE if a connection was dropped.
*/
{
return (B_FALSE);
}
so->so_acceptq_len--;
} else {
return (B_FALSE);
}
if (!sof_close_deferred_running) {
} else {
}
return (B_TRUE);
}
/*
* Called from a taskq to close connections that have been deferred for
* too long.
*/
void
sof_close_deferred(void *unused)
{
if (!sof_close_deferred_running) {
while ((drop =
}
ASSERT(sof_close_deferred_backlog == 0);
}
}
/*
* Creates a new filter instance from the entry `ent' and attaches
* it to the sonode `so'. On success, return a pointer to the created
* instance.
*
* The new instance will be placed on the top of the filter stack.
*
* The caller is responsible for assigning the instance's ops vector and
* calling the filter's attach callback.
*
* No locks are held while manipulating the sonode fields because we are
* guaranteed that this operation is serialized.
*
* We can be sure that the entry `ent' will not disappear, because the
* caller is either holding sockconf_lock (in case of an active open), or is
* already holding a reference (in case of a passive open, the listener has
* one).
*/
static sof_instance_t *
{
return (NULL);
else
so->so_filter_active++;
return (inst);
}
/*
* Destroys the filter instance `inst' and unlinks it from the sonode.
*
* Any filter private state must be destroyed (via the detach callback)
* before the instance is destroyed.
*/
static void
{
else
else
so->so_filter_active--;
}
}
static sof_entry_t *
sof_entry_find(const char *name)
{
return (ent);
}
return (NULL);
}
void
{
}
if (ent->sofe_socktuple_cnt > 0) {
ent->sofe_socktuple_cnt = 0;
}
}
static int
{
if (rw == KSTAT_WRITE)
return (EACCES);
return (0);
}
/*
* Create the kstat for filter entry `ent'.
*/
static int
{
sizeof (sof_entry_kstat_t) / sizeof (kstat_named_t),
return (ENOMEM);
"tot_active_attach", KSTAT_DATA_UINT64);
"tot_passive_attach", KSTAT_DATA_UINT64);
"attach_failures", KSTAT_DATA_UINT64);
return (0);
}
/*
* Destroys the kstat for filter entry `ent'.
*/
static void
{
}
}
static void
{
ent->sofe_refcnt++;
}
/*
* Decrement the reference count for `ent'. The entry will
* drop its' reference on the filter module whenever its'
* ref count reaches zero.
*/
static void
{
if (--ent->sofe_refcnt == 0) {
} else {
}
} else {
}
}
/*
* Loads the module used by `ent'
*/
static int
{
ent->sofe_modname);
return (EINVAL);
/* Another thread might have already loaded the module */
} else {
}
return (0);
}
/*
* Add filter entry `ent' to the global list and attach it to all sockparam
* entries which the filter is interested in. Upon successful return the filter
* will be available for applications to use.
*/
int
{
int error;
/*
* We hold sockconf_lock as a WRITER for the whole operation,
* so all operations must be non-blocking.
*/
return (EEXIST);
}
/* The entry is unique; create the kstats */
if (sof_entry_kstat_create(ent) != 0) {
return (ENOMEM);
}
/*
* Attach the filter to sockparams of interest.
*/
return (error);
}
/*
* Everything is OK; insert in global list.
*/
return (0);
}
/*
* Removes the filter entry `ent' from global list and all sockparams.
*/
sof_entry_remove_by_name(const char *name)
{
return (NULL);
}
return (ent);
}
/*
* Filter entry `ent' will process sockparams entry `sp' to determine whether
* it should be attached to the sockparams. It should be called whenever a new
* filter or sockparams is being added. Returns zero either if the filter is
* not interested in the sockparams or if it successfully attached to the
* sockparams. On failure an errno is returned.
*/
int
{
uint_t i;
/* Only interested in non-TPI sockets */
return (0);
for (i = 0; i < ent->sofe_socktuple_cnt; i++) {
break;
}
/* This filter is not interested in the sockparams entry */
if (i == ent->sofe_socktuple_cnt)
return (0);
return (ENOMEM);
/* placement is irrelevant for programmatic filters */
return (0);
} else {
/*
* If the filter specifies a placement hint, then make sure
* it can be satisfied.
*/
case SOF_HINT_TOP:
break;
return (0);
case SOF_HINT_BOTTOM:
break;
return (0);
case SOF_HINT_BEFORE:
case SOF_HINT_AFTER:
SOF_MAXNAMELEN) == 0)
break;
}
break;
} else {
break;
}
return (0);
}
/*FALLTHRU*/
case SOF_HINT_NONE:
/*
* Insert the new filter at the beginning as long as it
* does not violate a TOP hint, otherwise insert in the
* next suitable location.
*/
new);
} else {
}
return (0);
}
/* Failed to insert the filter */
return (ENOSPC);
}
}
/*
* Remove all filter entries attached to the sockparams entry `sp'.
*/
void
{
}
/*
* A new sockparams is being added. Walk all filters and attach those that
* are interested in the entry.
*
* It should be called when the sockparams entry is about to be made available
* for use and while holding the sockconf_lock.
*/
int
{
return (ENOMEM);
}
}
return (0);
}
static sof_module_t *
sof_module_find(const char *name)
{
return (ent);
return (NULL);
}
/*
* Returns a pointer to a module identified by `name' with its ref count
* bumped. An attempt to load the module is done if it's not found in the
* global list.
*/
{
char *modpath;
int error;
/*
* We'll go through the loop at most two times, which will only
* happen if the module needs to be loaded.
*/
for (;;) {
break;
modname);
/* Failed to load, then bail */
"Failed to load socket filter module: %s (err %d)",
return (NULL);
}
}
mod->sofm_refcnt++;
(void) ddi_modclose(handle);
/*
* The module was loaded, but the filter module could not be
* found. It's likely a misconfigured filter.
*/
"Socket filter module %s was loaded, but did not" \
"register. Filter %s is likely misconfigured.",
}
}
return (mod);
}
void
{
mod->sofm_refcnt--;
}
int
{
if (rval > SOF_RVAL_CONTINUE) {
return ((int)rval);
} else {
#ifdef DEBUG
if (socket_filter_debug)
#endif
return (EINVAL);
}
}
/*
* Walk through all the filters attached to `so' and allow each filter
* to process the data using its data_out callback. `mp' is a b_cont chain.
*
* Returns the processed mblk, or NULL if mblk was consumed. The mblk might
* have been consumed as a result of an error, in which case `errp' is set to
* the appropriate errno.
*/
mblk_t *
{
continue;
break;
}
}
return (mp);
}
/*
* Walk through all the filters attached to `so' and allow each filter
* to process the data using its data_in_proc callback. `mp' is the start of
* a possible b_next chain, and `lastmp' points to the last mblk in the chain.
*
* Returns the processed mblk, or NULL if all mblks in the chain were
* consumed. `lastmp' is updated to point to the last mblk in the processed
* chain.
*/
mblk_t *
{
do {
continue;
break;
}
continue;
;
else
/*
* The size of the chain has changed; make sure the rcv queue
* stays consistent and check if the flow control state should
* change.
*/
if (diff != 0) {
/* so_check_flow_control drops so_lock */
(void) so_check_flow_control(so);
}
return (retmp);
}
int
{
}
int
{
}
int
{
}
int
{
continue;
(sof_rval_t), rval);
if (rval != SOF_RVAL_CONTINUE) {
return (sof_rval2errno(rval));
}
}
return (-1);
}
int
{
}
int
{
}
int
{
}
int
{
}
int
{
}
int
{
}
/*
* sof_register(version, name, ops, flags)
*
* Register a socket filter identified by name `name' and which should use
* the ops vector `ops' for event notification. `flags' should be set to 0.
* On success 0 is returned, otherwise an errno is returned.
*/
int
{
if (version != SOF_VERSION)
return (EINVAL);
return (EEXIST);
}
return (0);
}
/*
* sof_unregister(name)
*
* Try to unregister the socket filter identified by `name'. If the filter
* is successfully unregistered, then 0 is returned, otherwise an errno is
* returned.
*/
int
sof_unregister(const char *name)
{
if (mod->sofm_refcnt == 0) {
return (0);
} else {
return (EBUSY);
}
}
return (ENXIO);
}
/*
* sof_newconn_ready(handle)
*
* The filter `handle` no longer wants to defer the socket it is attached
* to. A newconn notification will be generated if there is no other filter
* that wants the socket deferred.
*/
void
{
return;
}
/*
* Check if any other filter has deferred the socket. The last
* filter to remove its DEFER flag will be the one generating the
* wakeup.
*/
/* Still deferred; nothing to do */
return;
}
}
/*
* The socket is no longer deferred; move it over to the regular
* accept list and notify the user. However, it is possible that
* the socket is being dropped by sof_sonode_drop_deferred(), so
* first make sure the socket is on the deferred list.
*/
return;
}
}
/*
* sof_bypass(handle)
*
* Stop generating callbacks for `handle'.
*/
void
{
so->so_filter_active--;
}
}
/*
* sof_rcv_flowctrl(handle, enable)
*
* If `enable' is TRUE, then recv side flow control will be asserted for
* the socket associated with `handle'. When `enable' is FALSE the filter
* indicates that it no longer wants to assert flow control, however, the
* condition will not be removed until there are no other filters asserting
* flow control and there is space available in the receive buffer.
*/
void
{
if (enable) {
} else {
/* another filter is asserting flow control */
return;
}
}
/* so_check_flow_control drops so_lock */
(void) so_check_flow_control(so);
}
}
/*
* sof_snd_flowctrl(handle, enable)
*
* If `enable' is TRUE, then send side flow control will be asserted for
* the socket associated with `handle'. When `enable' is FALSE the filter
* indicates that is no longer wants to assert flow control, however, the
* condition will not be removed until there are no other filters asserting
* flow control and there are tx buffers available.
*/
void
{
if (enable) {
} else {
return;
}
}
/*
* Wake up writer if the socket is no longer flow controlled.
*/
if (!SO_SND_FLOWCTRLD(so)) {
/* so_notify_writable drops so_lock */
return;
}
}
}
/*
* sof_get_cookie(handle)
*
* Returns the cookie used by `handle'.
*/
void *
{
}
/*
* sof_cas_cookie(handle, old, new)
*
* Compare-and-swap the cookie used by `handle'.
*/
void *
{
}
/*
* sof_inject_data_out(handle, mp, msg, flowctrld)
*
* Submit `mp' for transmission. `msg' cannot by NULL, and may contain
* ancillary data and destination address. Returns 0 when successful
* in which case `flowctrld' is updated. If flow controlled, no new data
* should be injected until a SOF_EV_INJECT_DATA_OUT_OK event is observed.
* In case of failure, an errno is returned.
*
* Filters that are lower in the stack than `handle' will see the data
* before it is transmitted and may end up modifying or freeing the data.
*/
int
{
int error;
return (EPIPE);
}
so->so_filter_tx++;
so->so_filter_tx--;
error = 0;
} else {
}
return (error);
}
/*
* sof_inject_data_in(handle, mp, len, flag, flowctrld)
*
* Enqueue `mp' which contains `len' bytes of M_DATA onto the socket
* associated with `handle'. `flags' should be set to 0. Returns 0 when
* successful in which case `flowctrld' is updated. If flow controlled,
* no new data should be injected until a SOF_EV_INJECT_DATA_IN_OK event
* is observed. In case of failure, an errno is returned.
*
* Filters that are higher in the stack than `handle' will see the data
* before it is enqueued on the receive queue and may end up modifying or
* freeing the data.
*/
int
{
int error = 0;
/* fallback should never happen when there is an active filter */
return (error);
}
/*
* sof_newconn_move(handle, newparent)
*
* Private interface only to be used by KSSL.
*
* Moves the socket associated with `handle' from its current listening
* socket to the listener associated with `newparent'. The socket being
* moved must be in a deferred state and it is up to the consumer of the
* interface to ensure that the `newparent' does not go away while this
* operation is pending.
*/
{
return (B_FALSE);
old->so_acceptq_len--;
new->so_acceptq_len++;
return (B_TRUE);
}