/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/mac_impl.h>
#include <sys/mac_client_impl.h>
#include <sys/mac_stat.h>
#include <sys/dls_impl.h>
#include <sys/mac_soft_ring.h>
#include <sys/ethernet.h>
#include <sys/pool_pset.h>
typedef struct flow_stats_s {
} flow_stats_t;
/* global flow table, will be a per exclusive-zone table later */
typedef struct {
const char *fs_name;
};
/*
* Checks whether a flow mask is legal.
*/
static void
{
int i;
}
}
static int
{
int i;
if (rw != KSTAT_READ)
return (EACCES);
for (i = 0; i < fep->fe_rx_srs_cnt; i++) {
break;
}
goto done;
done:
}
return (0);
}
static void
{
/*
* Fow now, flow entries are only manipulated and visible from the
* global zone.
*/
return;
}
void
{
}
}
/*
* Initialize the flow table
*/
void
{
}
/*
* Cleanup and release the flow table
*/
void
{
}
/*
* mac_create_flow(): create a flow_entry_t.
*/
int
{
int err = 0;
if (err != 0)
return (err);
}
/* Initialize the receiver function to a safe routine */
}
/* This is an initial flow, will be configured later */
return (0);
}
/* Save flow desc */
/*
* We have already set fe_resource_props for a Link.
*/
sizeof (mac_resource_props_t));
}
/*
* The effective resource list should reflect the priority
* that we set implicitly.
*/
else
sizeof (mac_resource_props_t));
}
return (0);
}
/*
* Validate flow entry and add it to a flow table.
*/
int
{
int err;
/*
* Check for invalid bits in mask.
*/
return (EOPNOTSUPP);
/*
* Validate flent.
*/
return (err);
}
/*
* Flent is valid. now calculate hash and insert it
* into hash table.
*/
/*
* We do not need a lock up until now because we were
* not accessing the flow table.
*/
/*
* Check for duplicate flow.
*/
if ((*p)->fe_flow_desc.fd_mask !=
continue;
return (EALREADY);
}
}
/*
* Insert flow to hash list.
*/
if (err != 0) {
return (err);
}
/*
* Save the hash index so it can be used by mac_flow_remove().
*/
/*
* Save the flow tab back reference.
*/
ft->ft_flow_count++;
return (0);
}
/*
* Remove a flow from a mac client's subflow table
*/
void
{
/*
* The interface is not yet plumbed and mac_client_flow_add
* was not done.
*/
if (FLOW_TAB_EMPTY(ft)) {
}
} else {
}
}
/*
* Add a flow to a mac client's subflow table and instantiate the flow
* in the mac by creating the associated SRSs etc.
*/
int
{
int err;
return (err);
/*
* If the subflow table exists already just add the new subflow
* to the existing table, else we create a new subflow table below.
*/
/*
* Try to create a new table and then add the subflow to the
* newly created subflow table
*/
return (EOPNOTSUPP);
}
ft_created = B_TRUE;
}
if (err != 0) {
if (ft_created)
return (err);
}
if (instantiate_flow) {
/* Now activate the flow by creating its SRSs */
if (err != 0) {
if (ft_created)
return (err);
}
} else {
}
if (ft_created) {
if (instantiate_flow)
}
return (0);
}
/*
* Remove flow entry from flow table.
*/
void
{
return;
/*
* If this is a permanent removal from the flow table, mark it
* CONDEMNED to prevent future references. If this is a temporary
* removal from the table, say to update the flow descriptor then
* we don't mark it CONDEMNED
*/
if (!temp)
/*
* Locate the specified flent.
*/
/*
* The flent must exist. Otherwise it's a bug.
*/
/*
* Reset fe_index to -1 so any attempt to call mac_flow_remove()
* on a flent that is supposed to be in the table (FE_FLOW_TAB)
* will panic.
*/
ft->ft_flow_count--;
}
/*
* This is the flow lookup routine used by the mac sw classifier engine.
*/
int
{
flow_state_t s;
int i, err;
/*
* Walk the list of predeclared accept functions.
* Each of these would accumulate enough state to allow the next
* accept routine to make progress.
*/
/*
* ENOBUFS indicates that the mp could be too short
* and may need a pullup.
*/
return (err);
/*
* The pullup is done on the last processed mblk, not
* the starting one. pullup is not done if the mblk
* has references or if b_cont is NULL.
*/
return (EINVAL);
flow_state_t *, &s);
goto retry;
}
}
/*
* The packet is considered sane. We may now attempt to
* find the corresponding flent.
*/
if (err != 0)
continue;
return (0);
}
}
return (ENOENT);
}
/*
* Walk flow table.
* The caller is assumed to have proper perimeter protection.
*/
int
void *arg)
{
return (0);
cnt++;
if (err != 0)
return (err);
}
}
return (0);
}
/*
* Same as the above except a mutex is used for protection here.
*/
int
void *arg)
{
int err;
return (0);
return (err);
}
/*
* Destroy a flow entry. Called when the last reference on a flow is released.
*/
void
{
} else {
}
}
/*
* XXX eric
* The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and
* two functions below. This would consolidate all the mac property
* checking in one place. I'm leaving this alone for now since it's
* out of scope of the new flows work.
*/
/* ARGSUSED */
{
int i;
} else {
}
}
} else {
}
}
/* modify fanout */
break;
}
/*
* The new set of cpus passed is exactly
* the same as the existing set.
*/
return (changed_mask);
}
}
changed_mask |= MRP_CPUS;
}
/*
* Modify the rings property.
*/
changed_mask |= MRP_POOL;
else
}
return (changed_mask);
}
void
{
/* Update the cached values inside the subflow entry */
/*
* Push the changed parameters to the scheduling code in the
* SRS's, to take effect right away.
*/
if (changed_mask & MRP_MAXBW) {
/*
* If bandwidth is changed, we may have to change
* the number of soft ring to be used for fanout.
* Call mac_flow_update_fanout() if MAC_BIND_CPU
* is not set and there is no user supplied cpu
* info. This applies only to link at this time.
*/
!(changed_mask & MRP_CPUS) &&
}
}
if (changed_mask & MRP_CPUS)
NULL);
pool_lock();
cpupart);
pool_unlock();
}
}
/*
* This function waits for a certain condition to be met and is generally
* used before a destructive or quiescing operation.
*/
void
{
switch (event) {
case FLOW_DRIVER_UPCALL:
/*
* We want to make sure the driver upcalls have finished before
* we signal the Rx SRS worker to quit.
*/
break;
case FLOW_USER_REF:
/*
* Wait for the fe_user_refcnt to drop to 0. The flow has
* been removed from the global flow hash.
*/
while (flent->fe_user_refcnt != 0)
break;
default:
ASSERT(0);
}
}
static boolean_t
{
return (B_TRUE);
}
void
{
} else {
}
/* This is a multicast or broadcast flow entry */
}
}
/*
* In the normal case fe_rx_srs_cnt is 1. However in the error case
* when mac_unicast_add fails we may not have set up any SRS
* in which case fe_rx_srs_cnt will be zero.
*/
if (flent->fe_rx_srs_cnt != 0) {
flent->fe_rx_srs_cnt = 0;
}
}
void
{
/*
* Grab the fe_lock to see a self-consistent fe_flow_desc.
* Updates to the fe_flow_desc happen under the fe_lock
* after removing the flent from the flow table
*/
}
/*
* Update a field of a flow entry. The mac perimeter ensures that
* this is the only thread doing a modify operation on this mac end point.
* So the flow table can't change or disappear. The ft_lock protects access
* to the flow entry, and holding the lock ensures that there isn't any thread
* accessing the flow entry or attempting a flow table lookup. However
* data threads that are using the flow entry based on the old descriptor
* will continue to use the flow entry. If strong coherence is required
* then the flow will have to be quiesced before the descriptor can be
* changed.
*/
void
{
int err;
/*
* The flow hasn't yet been inserted into the table,
* so only the caller knows about this flow, however for
* uniformity we grab the fe_lock here.
*/
}
/*
* Need to remove the flow entry from the table and reinsert it,
* into a potentially diference hash line. The hash depends on
* the new descriptor fields. However access to fe_desc itself
* is always under the fe_lock. This helps log and stat functions
* see a self-consistent fe_flow_desc.
*/
/*
* The add failed say due to an invalid flow descriptor.
* Undo the update
*/
}
}
void
{
/*
* The flow hasn't yet been inserted into the table,
* so only the caller knows about this flow
*/
} else {
}
}
/*
* Return the client-private cookie that was associated with
* the flow when it was created.
*/
void *
{
return (flent->fe_client_cookie);
}
/*
* Forward declarations.
*/
/*
* Create flow table.
*/
void
{
/*
* We make a copy of the ops vector instead of just pointing to it
* because we might want to customize the ops vector on a per table
* basis (e.g. for optimization).
*/
/*
* Optimizations for DL_ETHER media.
*/
}
}
void
{
}
/*
* Destroy flow table.
*/
void
{
return;
}
/*
* Add a new flow entry to the global flow hash table
*/
int
{
int err;
if (err != 0) {
return (EEXIST);
}
/* Mark as inserted into the global flow hash table */
return (err);
}
/*
* Remove a flow entry from the global flow hash table
*/
void
{
/* Clear the mark that says inserted into the global flow hash table */
}
/*
* Retrieve a flow entry from the global flow hash table.
*/
int
{
int err;
(mod_hash_val_t *)&flent);
if (err != 0) {
return (ENOENT);
}
return (0);
}
/*
* Initialize or release mac client flows by walking the subflow table.
*/
static int
{
} else {
}
return (0);
}
void
{
/*
* If mac client had subflow(s) configured before plumb, change
* function to mac_rx_srs_subflow_process and in case of hardware
* classification, disable polling.
*/
}
{
return (B_TRUE);
return (B_FALSE);
}
static int
{
return (0);
}
void
{
/*
* Change the mci_flent callback back to mac_rx_srs_process()
* because flows are about to be deactivated.
*/
}
void
{
}
}
/*
* mac_link_flow_init()
* Internal flow interface used for allocating SRSs and related
* data structures. Not meant to be used by mac clients.
*/
int
{
int err;
return (err);
return (0);
}
/*
* mac_link_flow_add()
* Used by flowadm(1m) or kernel mac clients for creating flows.
*/
int
{
int err;
if (err == 0) {
return (EEXIST);
}
/*
* First create a flow entry given the description provided
* by the caller.
*/
if (err != 0)
return (err);
/*
* We've got a local variable referencing this flow now, so we need
* to hold it. We'll release this flow before returning.
* All failures until we return will undo any action that may internally
* held the flow, so the last REFRELE will assure a clean freeing
* of resources.
*/
if (err != 0) {
return (err);
}
/*
* dls will eventually be merged with mac so it's ok
* to call dls' internal functions.
*/
if (err != 0)
goto bail;
/*
* Add the flow to the global flow table, this table will be per
* exclusive zone so each zone can have its own flow namespace.
* RFE 6625651 will fix this.
*
*/
goto bail;
hash_added = B_TRUE;
/*
* do not allow flows to be configured on an anchor VNIC
*/
goto bail;
}
/*
* Add the subflow to the subflow table. Also instantiate the flow
* in the mac if there is an active user (we check if the MAC client's
* datapath has been setup).
*/
if (err != 0)
goto bail;
return (0);
bail:
if (hash_added)
if (link_held)
/*
* Wait for any transient global flow hash refs to clear
* and then release the creation reference on the flow
*/
return (err);
}
/*
* mac_link_flow_clean()
* Internal flow interface used for freeing SRSs and related
* data structures. Not meant to be used by mac clients.
*/
void
{
/*
* This sub flow entry may fail to be fully initialized by
* mac_link_flow_init(). If so, simply return.
*/
return;
/*
* Tear down the data path
*/
/*
* Delete the SRSs associated with this subflow. If this is being
* driven by flowadm(1M) then the subflow will be deleted by
* dls_rem_flow. However if this is a result of the interface being
* unplumbed then the subflow itself won't be deleted.
*/
/*
* If all the subflows are gone, renable some of the stuff
* we disabled when adding a subflow, polling etc.
*/
if (last_subflow) {
/*
* The subflow table itself is not protected by any locks or
* refcnts. Hence quiesce the client upfront before clearing
* mci_subflow_tab.
*/
}
}
/*
* mac_link_flow_remove()
* Used by flowadm(1m) or kernel mac clients for removing flows.
*/
int
{
int err;
if (err != 0)
return (err);
/*
* The perim must be acquired before acquiring any other references
* to maintain the lock and perimeter hierarchy. Please note the
* FLOW_REFRELE above.
*/
if (err != 0)
return (err);
/*
* Note the second lookup of the flow, because a concurrent thread
* may have removed it already while we were waiting to enter the
* link's perimeter.
*/
if (err != 0) {
return (err);
}
/*
* Remove the flow from the subflow table and deactivate the flow
* by quiescing and removings its SRSs
*/
/*
* Finally, remove the flow from the global table.
*/
/*
* Wait for any transient global flow hash refs to clear
* and then release the creation reference on the flow
*/
return (0);
}
/*
* mac_link_flow_modify()
* Modifies the properties of a flow identified by its name.
*/
int
{
int err = 0;
if (err != 0)
return (err);
if (err != 0)
return (err);
/*
* The perim must be acquired before acquiring any other references
* to maintain the lock and perimeter hierarchy. Please note the
* FLOW_REFRELE above.
*/
if (err != 0)
return (err);
/*
* Note the second lookup of the flow, because a concurrent thread
* may have removed it already while we were waiting to enter the
* link's perimeter.
*/
if (err != 0) {
return (err);
}
/*
* If this flow is attached to a MAC client, then pass the request
* along to the client.
* Otherwise, just update the cached values.
*/
} else {
}
} else {
}
done:
return (err);
}
/*
* State structure and misc functions used by mac_link_flow_walk().
*/
typedef struct {
void *ws_arg;
static void
{
}
static int
{
int err;
return (err);
}
/*
* mac_link_flow_walk()
* Invokes callback 'func' for all flows belonging to the specified link.
*/
int
{
int err;
if (err != 0)
return (err);
if (err != 0) {
return (err);
}
return (err);
}
/*
* mac_link_flow_info()
* Retrieves information about a specific flow.
*/
int
{
int err;
if (err != 0)
return (err);
return (0);
}
/*
* Hash function macro that takes an Ethernet address and VLAN id as input.
*/
#define HASH_ETHER_VID(a, v, s) \
/*
* Generic layer-2 address hashing function that takes an address and address
* length as input. This is the DJB hash function.
*/
static uint32_t
{
size_t i;
for (i = 0; i < addrlen; i++)
}
return (EINVAL); \
\
} \
}
/* ARGSUSED */
static boolean_t
{
}
/*
* Layer 2 hash function.
* Must be paired with flow_l2_accept() within a set of flow_ops
* because it assumes the dest address is already extracted.
*/
static uint32_t
{
}
/*
* This is the generic layer 2 accept function.
* It makes use of mac_header_info() to extract the header length,
* sap, vlan ID and destination address.
*/
static int
{
int err;
return (err);
}
((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
return (ENOBUFS);
} else {
}
return (0);
}
/*
* flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/
* accept(). The notable difference is that dest address is now extracted
* by hash() rather than by accept(). This saves a few memory references
* for flow tables that do not care about mac addresses.
*/
static uint32_t
{
}
static uint32_t
{
}
/* ARGSUSED */
static int
{
return (ENOBUFS);
((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
return (ENOBUFS);
} else {
}
return (0);
}
/*
* Validates a layer 2 flow entry.
*/
static int
{
/*
* Dest address is mandatory, and 0 length addresses are not yet
* supported.
*/
return (EINVAL);
/*
* VLAN flows are only supported over ethernet macs.
*/
return (EINVAL);
return (EINVAL);
}
return (0);
}
/*
* Calculates hash index of flow entry.
*/
static uint32_t
{
}
/*
* This is used for duplicate flow checking.
*/
/* ARGSUSED */
static boolean_t
{
}
/*
* Generic flow entry insertion function.
* Used by flow tables that do not have ordering requirements.
*/
/* ARGSUSED */
static int
{
}
return (0);
}
/*
* IP version independent DSField matching function.
*/
/* ARGSUSED */
static boolean_t
{
switch (l3info->l3_version) {
case IPV4_VERSION: {
return ((ipha->ipha_type_of_service &
}
case IPV6_VERSION: {
}
default:
return (B_FALSE);
}
}
/*
* IP v4 and v6 address matching.
* The netmask only needs to be applied on the packet but not on the
* flow_desc since fd_local_addr/fd_remote_addr are premasked subnets.
*/
/* ARGSUSED */
static boolean_t
{
}
}
/* ARGSUSED */
static boolean_t
{
fd->fd_local_addr));
}
}
/* ARGSUSED */
static boolean_t
{
}
static uint32_t
{
if ((mask & FLOW_IP_LOCAL) != 0) {
} else if ((mask & FLOW_IP_REMOTE) != 0) {
} else if ((mask & FLOW_IP_DSFIELD) != 0) {
/*
* DSField flents are arranged as a single list.
*/
return (0);
}
/*
* IP addr flents are hashed into two lists, v4 or v6.
*/
}
static uint32_t
{
}
/* ARGSUSED */
static int
{
/*
* Adjust start pointer if we're at the end of an mblk.
*/
return (EINVAL);
switch (sap) {
case ETHERTYPE_IP: {
return (ENOBUFS);
break;
}
case ETHERTYPE_IPV6: {
return (ENOBUFS);
}
break;
}
default:
return (EINVAL);
}
return (0);
}
/* ARGSUSED */
static int
{
switch (fd->fd_protocol) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_SCTP:
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
return (0);
default:
return (EINVAL);
}
}
/* ARGSUSED */
static int
{
/*
* DSField does not require a IP version.
*/
if (fd->fd_dsfield_mask == 0)
return (EINVAL);
return (0);
}
/*
* IP addresses must come with a version to avoid ambiguity.
*/
return (EINVAL);
return (EINVAL);
switch (mask) {
case FLOW_IP_LOCAL:
break;
case FLOW_IP_REMOTE:
break;
default:
return (EINVAL);
}
/*
* Apply netmask onto specified address.
*/
if (version == IPV4_VERSION) {
return (EINVAL);
} else {
if (IN6_IS_ADDR_UNSPECIFIED(addr) ||
return (EINVAL);
}
return (0);
}
static uint32_t
{
}
static uint32_t
{
/*
* DSField flents are arranged as a single list.
*/
return (0);
/*
* IP addr flents are hashed into two lists, v4 or v6.
*/
}
/* ARGSUSED */
static boolean_t
{
}
/* ARGSUSED */
static boolean_t
{
}
/*
* flow_ip_accept_fe() already validated the version.
*/
return (B_FALSE);
case FLOW_IP_LOCAL:
break;
case FLOW_IP_REMOTE:
break;
default:
/*
* This is unreachable given the checks in
* flow_ip_accept_fe().
*/
return (B_FALSE);
}
} else {
}
}
static int
{
int bits;
int i;
for (i = 3; i >= 0; i--) {
plen -= 32;
continue;
}
if (bits == 0)
break;
}
return (plen);
}
/* ARGSUSED */
static int
{
flow_entry_t **p = headp;
/*
* No special ordering needed for dsfield.
*/
if (*p != NULL) {
}
*p = flent;
return (0);
}
/*
* IP address flows are arranged in descending prefix length order.
*/
fd = &(*p)->fe_flow_desc;
/*
* Normally a dsfield flent shouldn't end up on the same
* list as an IP address because flow tables are (for now)
* disjoint. If we decide to support both IP and dsfield
* in the same table in the future, this check will allow
* for that.
*/
continue;
/*
* We also allow for the mixing of local and remote address
* flents within one list.
*/
plen = flow_ip_mask2plen(m);
break;
}
if (*p != NULL) {
}
*p = flent;
return (0);
}
/*
* Transport layer protocol and port matching functions.
*/
/* ARGSUSED */
static boolean_t
{
}
/* ARGSUSED */
static boolean_t
{
}
/*
* Transport hash function.
* Since we only support either local or remote port flows,
* we only need to extract one of the ports to be used for
* matching.
*/
static uint32_t
{
} else {
}
}
/*
* Unlike other accept() functions above, we do not need to get the header
* size because this is our highest layer so far. If we want to do support
* other higher layer protocols, we would need to save the l4_hdrsize
* in the code below.
*/
/* ARGSUSED */
static int
{
/*
* Adjust start pointer if we're at the end of an mblk.
*/
return (EINVAL);
return (EINVAL);
switch (proto) {
case IPPROTO_TCP: {
return (ENOBUFS);
break;
}
case IPPROTO_UDP: {
return (ENOBUFS);
break;
}
case IPPROTO_SCTP: {
return (ENOBUFS);
break;
}
default:
return (EINVAL);
}
return (0);
}
/*
* Validates transport flow entry.
* The protocol field must be present.
*/
/* ARGSUSED */
static int
{
if ((mask & FLOW_IP_PROTOCOL) == 0)
return (EINVAL);
switch (fd->fd_protocol) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_SCTP:
break;
default:
return (EINVAL);
}
switch (mask & ~FLOW_IP_PROTOCOL) {
case FLOW_ULP_PORT_LOCAL:
if (fd->fd_local_port == 0)
return (EINVAL);
break;
case FLOW_ULP_PORT_REMOTE:
if (fd->fd_remote_port == 0)
return (EINVAL);
break;
case 0:
/*
* transport-only flows conflicts with our table type.
*/
return (EOPNOTSUPP);
default:
return (EINVAL);
}
return (0);
}
static uint32_t
{
}
/* ARGSUSED */
static boolean_t
{
return (B_FALSE);
return (B_TRUE);
}
};
};
};
};
};
#define FLOW_MAX_TAB_INFO \
((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t))
static flow_tab_info_t *
{
int i;
for (i = 0; i < FLOW_MAX_TAB_INFO; i++) {
return (&flow_tab_info_list[i]);
}
return (NULL);
}