mac_client.c revision fc4e975d651c96c6d30d506e8d2cb1f70b36fab7
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * CDDL HEADER START
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * The contents of this file are subject to the terms of the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Common Development and Distribution License (the "License").
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * You may not use this file except in compliance with the License.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * See the License for the specific language governing permissions
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * and limitations under the License.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * When distributing Covered Code, include this CDDL HEADER in each
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * If applicable, add the following below this CDDL HEADER, with the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * fields enclosed by brackets "[]" replaced with your own identifying
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * information: Portions Copyright [yyyy] [name of copyright owner]
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * CDDL HEADER END
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Use is subject to license terms.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * - General Introduction:
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * This file contains the implementation of the MAC client kernel
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * API and related code. The MAC client API allows a kernel module
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * to gain access to a MAC instance (physical NIC, link aggregation, etc).
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * It allows a MAC client to associate itself with a MAC address,
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * VLANs, callback functions for data traffic and for promiscuous mode.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * The MAC client API is also used to specify the properties associated
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * with a MAC client, such as bandwidth limits, priority, CPUS, etc.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * These properties are further used to determine the hardware resources
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * to allocate to the various MAC clients.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * - Primary MAC clients:
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * The MAC client API refers to "primary MAC clients". A primary MAC
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * client is a client which "owns" the primary MAC address of
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * the underlying MAC instance. The primary MAC address is called out
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * since it is associated with specific semantics: the primary MAC
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * address is the MAC address which is assigned to the IP interface
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * when it is plumbed, and the primary MAC address is assigned
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * to VLAN data-links. The primary address of a MAC instance can
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * also change dynamically from under the MAC client, for example
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * as a result of a change of state of a link aggregation. In that
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * case the MAC layer automatically updates all data-structures which
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * refer to the current value of the primary MAC address. Typical
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * primary MAC clients are dls, aggr, and xnb. A typical non-primary
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * MAC client is the vnic driver.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * - Virtual Switching:
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * The MAC layer implements a virtual switch between the MAC clients
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * (primary and non-primary) defined on top of the same underlying
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * NIC (physical, link aggregation, etc). The virtual switch is
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * VLAN-aware, i.e. it allows multiple MAC clients to be member
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * of one or more VLANs, and the virtual switch will distribute
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * multicast tagged packets only to the member of the corresponding
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * - Upper vs Lower MAC:
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Creating a VNIC on top of a MAC instance effectively causes
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * two MAC instances to be layered on top of each other, one for
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * the VNIC(s), one for the underlying MAC instance (physical NIC,
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * link aggregation, etc). In the code below we refer to the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * underlying NIC as the "lower MAC", and we refer to VNICs as
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * the "upper MAC".
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * - Pass-through for VNICs:
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * When VNICs are created on top of an underlying MAC, this causes
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * a layering of two MAC instances. Since the lower MAC already
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * does the switching and demultiplexing to its MAC clients, the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * upper MAC would simply have to pass packets to the layer below
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * or above it, which would introduce overhead. In order to avoid
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * this overhead, the MAC layer implements a pass-through mechanism
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * for VNICs. When a VNIC opens the lower MAC instance, it saves
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * the MAC client handle it optains from the MAC layer. When a MAC
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * client opens a VNIC (upper MAC), the MAC layer detects that
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * the MAC being opened is a VNIC, and gets the MAC client handle
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * that the VNIC driver obtained from the lower MAC. This exchange
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * is doing through a private capability between the MAC layer
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * and the VNIC driver. The upper MAC then returns that handle
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * directly to its MAC client. Any operation done by the upper
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * MAC client is now done on the lower MAC client handle, which
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * allows the VNIC driver to be completely bypassed for the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * performance sensitive data-path.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksstatic boolean_t mac_client_single_rcvr(mac_client_impl_t *);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksstatic flow_entry_t *mac_client_swap_mciflent(mac_client_impl_t *);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksstatic flow_entry_t *mac_client_get_flow(mac_client_impl_t *,
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksstatic void mac_client_remove_flow_from_list(mac_client_impl_t *,
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksstatic void mac_client_add_to_flow_list(mac_client_impl_t *, flow_entry_t *);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksstatic void mac_rename_flow_names(mac_client_impl_t *, const char *);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks/* ARGSUSED */
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksi_mac_client_impl_ctor(void *buf, void *arg, int kmflag)
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks mutex_init(&mcip->mci_tx_cb_lock, NULL, MUTEX_DRIVER, NULL);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks mcip->mci_tx_notify_cb_info.mcbi_lockp = &mcip->mci_tx_cb_lock;
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks for (i = 0; i <= mac_tx_percpu_cnt; i++) {
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks return (0);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks/* ARGSUSED */
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks for (i = 0; i <= mac_tx_percpu_cnt; i++) {
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks/* ARGSUSED */
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksi_mac_promisc_impl_ctor(void *buf, void *arg, int kmflag)
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks mpip->mpi_mci_link.mcb_objsize = sizeof (mac_promisc_impl_t);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks mpip->mpi_mi_link.mcb_objsize = sizeof (mac_promisc_impl_t);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks return (0);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks/* ARGSUSED */
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks ASSERT(mpip->mpi_mci_link.mcb_objsize == sizeof (mac_promisc_impl_t));
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks ASSERT(mpip->mpi_mi_link.mcb_objp == mpip->mpi_mci_link.mcb_objp);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks ASSERT(mpip->mpi_mi_link.mcb_objsize == sizeof (mac_promisc_impl_t));
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks mac_client_impl_cache = kmem_cache_create("mac_client_impl_cache",
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks mac_promisc_impl_cache = kmem_cache_create("mac_promisc_impl_cache",
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks sizeof (mac_promisc_impl_t), 0, i_mac_promisc_impl_ctor,
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Return the lower MAC client handle from the VNIC driver for the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * specified VNIC MAC instance.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks VERIFY(i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, &cap));
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Return the MAC client handle of the primary MAC client for the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * specified MAC instance, or NULL otherwise.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks if (MCIP_DATAPATH_SETUP(mcip) && mac_is_primary_client(mcip))
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Open a MAC specified by its MAC name.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Look up its entry in the global hash table.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Hold the dip associated to the MAC to prevent it from being
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * detached. For a softmac, its underlying dip is held by the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * mi_open() callback.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * This is done to be more tolerant with some defective drivers,
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * which incorrectly handle mac_unregister() failure in their
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * xxx_detach() routine. For example, some drivers ignore the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * failure of mac_unregister() and free all resources that
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * that are needed for data transmition.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks return (0);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * The mac perimeter is used in both mac_open and mac_close by the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * framework to single thread the MC_OPEN/MC_CLOSE of drivers.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks if (mip->mi_oref != 1 || ((err = mip->mi_open(mip->mi_driver)) == 0)) {
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks return (0);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Open a MAC specified by its linkid.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksmac_open_by_linkid(datalink_id_t linkid, mac_handle_t *mhp)
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Open a MAC specified by its link name.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksmac_open_by_linkname(const char *link, mac_handle_t *mhp)
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Close the specified MAC.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * The mac perimeter is used in both mac_open and mac_close by the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * framework to single thread the MC_OPEN/MC_CLOSE of drivers.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Misc utility functions to retrieve various information about a MAC
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * instance or a MAC client.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarksconst char *
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Return the VID associated with a MAC client. This function should
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * be called for clients which are associated with only one VID.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks ASSERT(MCIP_DATAPATH_SETUP(mcip) && mac_client_single_rcvr(mcip));
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Return the link speed associated with the specified MAC client.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * The link speed of a MAC client is equal to the smallest value of
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * 1) the current link speed of the underlying NIC, or
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * 2) the bandwidth limit set for the MAC client.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * Note that the bandwidth limit can be higher than the speed
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * of the underlying NIC. This is allowed to avoid spurious
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * administration action failures or artifically lowering the
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * bandwidth limit of a link that may have temporarily lowered
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks * its link speed due to hardware problem or administrator action.
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks nic_speed = mac_stat_get((mac_handle_t)mip, MAC_STAT_IFSPEED);
5a5eeccada4b11bc692e9a5015d5f4a4f188226cmarks return (0);
static uint64_t
mui_next) {
return (LINK_STATE_UP);
switch (stat) {
case MAC_STAT_LINK_STATE:
case MAC_STAT_LINK_UP:
case MAC_STAT_PROMISC:
case MAC_STAT_IFSPEED:
case MAC_STAT_MULTIRCV:
case MAC_STAT_BRDCSTRCV:
case MAC_STAT_MULTIXMT:
case MAC_STAT_BRDCSTXMT:
case MAC_STAT_OBYTES:
case MAC_STAT_OPACKETS:
case MAC_STAT_OERRORS:
case MAC_STAT_IPACKETS:
case MAC_STAT_RBYTES:
case MAC_STAT_IERRORS:
return (val);
int ret;
switch (stat) {
case MAC_STAT_LINK_STATE:
case MAC_STAT_LINK_UP:
case MAC_STAT_PROMISC:
if (ret != 0) {
return (val);
return (VLAN_ID_NONE);
return (B_FALSE);
return (B_TRUE);
int err;
return (ENOTSUP);
return (EACCES);
return (EBUSY);
return (ENOTSUP);
if (err != 0) {
return (err);
int err;
return (EINVAL);
return (EBUSY);
if (err == 0)
if (err != 0) {
return (err);
if (err == 0)
return (err);
static mac_unicast_impl_t *
return (muip);
static boolean_t
return (B_TRUE);
return (B_FALSE);
return (EINVAL);
if (prefix_len > 0) {
addr_len)) {
return (EINVAL);
*diag = 0;
int err = 0;
return (EINVAL);
return (err);
&linkid)) != 0) {
goto done;
err = 0;
goto done;
goto done;
if (err != 0)
goto done;
if (no_hwrings)
if (req_hwrings)
if (share_desired) {
done:
return (err);
return (B_FALSE);
return (B_TRUE);
int err = 0;
if (err != 0)
return (err);
int err;
if (vid != 0) {
if (is_primary)
if (first_flow) {
return (err);
if (!first_flow)
if (add) {
int err = 0;
goto bail;
if (err != 0)
goto bail;
goto bail;
goto bail;
goto bail;
goto bail;
goto bail;
if (nactiveclients_added)
bail:
if (bcast_added)
if (mac_started)
if (nactiveclients_added)
return (err);
static mac_client_impl_t *
return (mcip);
return (NULL);
int err;
!is_vnic_primary) {
MAC_CLIENT_FLAGS_MULTI_PRIMARY) == 0 ||
MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) != 0) {
return (EBUSY);
if (passive_client)
return (EAGAIN);
return (ENXIO);
if (err != 0)
return (err);
if (fastpath_disabled)
return (EBUSY);
return (EBUSY);
return (EBUSY);
MCIS_IS_AGGR_PORT))) {
sizeof (mac_resource_props_t));
goto bail_out;
goto bail_out;
if (is_unicast_hw) {
!is_unicast_hw ||
goto bail_out;
goto bail_out;
MAC_CLIENT_FLAGS_PASSIVE_PRIMARY) == 0);
if (err != 0)
goto bail_out;
if (fastpath_disabled)
return (err);
return (err);
return (err);
return (err);
if (!isprimary) {
int err = 0;
return (err);
if (err != 0)
return (err);
int rc;
return (rc);
return (rc);
(error) = 0; \
int error;
int error;
if (error != 0) {
return (NULL);
int err = 0;
if (err != 0) {
goto done;
goto done;
done:
if (is_subflow)
return (cookie);
int err;
if (err != 0)
return (B_TRUE);
return (B_FALSE);
return (blocked);
int err = 0;
return (err);
void *arg)
int err = 0;
return (err);
int err = 0;
if (err != 0) {
return (err);
return (err);
static uint16_t
static boolean_t
return (B_FALSE);
if (is_mcast &&
if (is_sender ||
!is_mcast) {
typedef struct i_mac_info_state_s {
const char *mi_name;
static uint_t
return (MH_WALK_CONTINUE);
return (MH_WALK_CONTINUE);
return (MH_WALK_TERMINATE);
return (B_FALSE);
return (B_TRUE);
return (B_FALSE);
switch (cap) {
case MAC_CAPAB_NO_NATIVEVLAN:
case MAC_CAPAB_NO_ZCOPY:
return (B_TRUE);
case MAC_CAPAB_LEGACY:
case MAC_CAPAB_HCKSUM:
return (B_FALSE);
mblk_t *
mhip));
mblk_t *
return (NULL);
return (mp);
mblk_t *
return (NULL);
return (mp);
if (is_user_flow) {
int err = 0;
if (err != 0)
return (err);
return (err);
err =
if (err == 0) {
sizeof (mac_resource_props_t));
return (err);
int err;
return (err);
mrp);
goto done;
(void *)(&aggr_cap));
done:
return (B_TRUE);
return (B_FALSE);
static flow_entry_t *
return (NULL);
return (flent);
return (NULL);
static flow_entry_t *
int err;
sizeof (flow_desc_t));
return (flent1);
static boolean_t
return (EINVAL);
return (EINVAL);
return (EINVAL);
return (EINVAL);
int rv;
rv = 0;
if (rv == 0)
return (EINVAL);
return (EINVAL);
return (ENOENT);
return (EBUSY);
int rv;
goto bail;
goto bail;
goto bail;
bail:
if (exclusive)
return (rv);
name_len);
if (index > 0)
*n_clnts = i;