mac_util.c revision 5cd376e8b7030707d78315f63adb4bb2b4d9963e
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* MAC Services Module - misc utilities
*/
#include <sys/types.h>
#include <sys/mac.h>
#include <sys/mac_impl.h>
#include <sys/mac_client_priv.h>
#include <sys/mac_client_impl.h>
#include <sys/mac_soft_ring.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
#include <sys/vlan.h>
#include <sys/pattr.h>
#include <sys/pci_tools.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
#include <inet/ip6.h>
#include <sys/vtrace.h>
#include <sys/dlpi.h>
#include <sys/sunndi.h>
#include <inet/ipsec_impl.h>
#include <inet/sadb.h>
#include <inet/ipsecesp.h>
#include <inet/ipsecah.h>
/*
* Copy an mblk, preserving its hardware checksum flags.
*/
static mblk_t *
mac_copymsg_cksum(mblk_t *mp)
{
mblk_t *mp1;
uint32_t start, stuff, end, value, flags;
mp1 = copymsg(mp);
if (mp1 == NULL)
return (NULL);
hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
(void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value,
flags, KM_NOSLEEP);
return (mp1);
}
/*
* Copy an mblk chain, presenting the hardware checksum flags of the
* individual mblks.
*/
mblk_t *
mac_copymsgchain_cksum(mblk_t *mp)
{
mblk_t *nmp = NULL;
mblk_t **nmpp = &nmp;
for (; mp != NULL; mp = mp->b_next) {
if ((*nmpp = mac_copymsg_cksum(mp)) == NULL) {
freemsgchain(nmp);
return (NULL);
}
nmpp = &((*nmpp)->b_next);
}
return (nmp);
}
/*
* Process the specified mblk chain for proper handling of hardware
* checksum offload. This routine is invoked for loopback traffic
* between MAC clients.
* The function handles a NULL mblk chain passed as argument.
*/
mblk_t *
mac_fix_cksum(mblk_t *mp_chain)
{
mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1;
uint32_t flags, start, stuff, end, value;
for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) {
uint16_t len;
uint32_t offset;
struct ether_header *ehp;
uint16_t sap;
hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value,
&flags);
if (flags == 0)
continue;
/*
* Since the processing of checksum offload for loopback
* traffic requires modification of the packet contents,
* ensure sure that we are always modifying our own copy.
*/
if (DB_REF(mp) > 1) {
mp1 = copymsg(mp);
if (mp1 == NULL)
continue;
mp1->b_next = mp->b_next;
mp->b_next = NULL;
freemsg(mp);
if (prev != NULL)
prev->b_next = mp1;
else
new_chain = mp1;
mp = mp1;
}
/*
* Ethernet, and optionally VLAN header.
*/
/* LINTED: improper alignment cast */
ehp = (struct ether_header *)mp->b_rptr;
if (ntohs(ehp->ether_type) == VLAN_TPID) {
struct ether_vlan_header *evhp;
ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
/* LINTED: improper alignment cast */
evhp = (struct ether_vlan_header *)mp->b_rptr;
sap = ntohs(evhp->ether_type);
offset = sizeof (struct ether_vlan_header);
} else {
sap = ntohs(ehp->ether_type);
offset = sizeof (struct ether_header);
}
if (MBLKL(mp) <= offset) {
offset -= MBLKL(mp);
if (mp->b_cont == NULL) {
/* corrupted packet, skip it */
if (prev != NULL)
prev->b_next = mp->b_next;
else
new_chain = mp->b_next;
mp1 = mp->b_next;
mp->b_next = NULL;
freemsg(mp);
mp = mp1;
continue;
}
mp = mp->b_cont;
}
if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) {
ipha_t *ipha = NULL;
/*
* In order to compute the full and header
* checksums, we need to find and parse
* the IP and/or ULP headers.
*/
sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
/*
* IP header.
*/
if (sap != ETHERTYPE_IP)
continue;
ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t));
/* LINTED: improper alignment cast */
ipha = (ipha_t *)(mp->b_rptr + offset);
if (flags & HCK_FULLCKSUM) {
ipaddr_t src, dst;
uint32_t cksum;
uint16_t *up;
uint8_t proto;
/*
* Pointer to checksum field in ULP header.
*/
proto = ipha->ipha_protocol;
ASSERT(ipha->ipha_version_and_hdr_length ==
IP_SIMPLE_HDR_VERSION);
switch (proto) {
case IPPROTO_TCP:
/* LINTED: improper alignment cast */
up = IPH_TCPH_CHECKSUMP(ipha,
IP_SIMPLE_HDR_LENGTH);
break;
case IPPROTO_UDP:
/* LINTED: improper alignment cast */
up = IPH_UDPH_CHECKSUMP(ipha,
IP_SIMPLE_HDR_LENGTH);
break;
default:
cmn_err(CE_WARN, "mac_fix_cksum: "
"unexpected protocol: %d", proto);
continue;
}
/*
* Pseudo-header checksum.
*/
src = ipha->ipha_src;
dst = ipha->ipha_dst;
len = ntohs(ipha->ipha_length) -
IP_SIMPLE_HDR_LENGTH;
cksum = (dst >> 16) + (dst & 0xFFFF) +
(src >> 16) + (src & 0xFFFF);
cksum += htons(len);
/*
* The checksum value stored in the packet needs
* to be correct. Compute it here.
*/
*up = 0;
cksum += (((proto) == IPPROTO_UDP) ?
IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP);
cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH +
offset, cksum);
*(up) = (uint16_t)(cksum ? cksum : ~cksum);
/*
* Flag the packet so that it appears
* that the checksum has already been
* verified by the hardware.
*/
flags &= ~HCK_FULLCKSUM;
flags |= HCK_FULLCKSUM_OK;
value = 0;
}
if (flags & HCK_IPV4_HDRCKSUM) {
ASSERT(ipha != NULL);
ipha->ipha_hdr_checksum =
(uint16_t)ip_csum_hdr(ipha);
flags &= ~HCK_IPV4_HDRCKSUM;
flags |= HCK_IPV4_HDRCKSUM_OK;
}
}
if (flags & HCK_PARTIALCKSUM) {
uint16_t *up, partial, cksum;
uchar_t *ipp; /* ptr to beginning of IP header */
if (mp->b_cont != NULL) {
mblk_t *mp1;
mp1 = msgpullup(mp, offset + end);
if (mp1 == NULL)
continue;
mp1->b_next = mp->b_next;
mp->b_next = NULL;
freemsg(mp);
if (prev != NULL)
prev->b_next = mp1;
else
new_chain = mp1;
mp = mp1;
}
ipp = mp->b_rptr + offset;
/* LINTED: cast may result in improper alignment */
up = (uint16_t *)((uchar_t *)ipp + stuff);
partial = *up;
*up = 0;
cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start,
end - start, partial);
cksum = ~cksum;
*up = cksum ? cksum : ~cksum;
/*
* Since we already computed the whole checksum,
* indicate to the stack that it has already
* been verified by the hardware.
*/
flags &= ~HCK_PARTIALCKSUM;
flags |= HCK_FULLCKSUM_OK;
value = 0;
}
(void) hcksum_assoc(mp, NULL, NULL, start, stuff, end,
value, flags, KM_NOSLEEP);
}
return (new_chain);
}
/*
* Add VLAN tag to the specified mblk.
*/
mblk_t *
mac_add_vlan_tag(mblk_t *mp, uint_t pri, uint16_t vid)
{
mblk_t *hmp;
struct ether_vlan_header *evhp;
struct ether_header *ehp;
uint32_t start, stuff, end, value, flags;
ASSERT(pri != 0 || vid != 0);
/*
* Allocate an mblk for the new tagged ethernet header,
* and copy the MAC addresses and ethertype from the
* original header.
*/
hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED);
if (hmp == NULL) {
freemsg(mp);
return (NULL);
}
evhp = (struct ether_vlan_header *)hmp->b_rptr;
ehp = (struct ether_header *)mp->b_rptr;
bcopy(ehp, evhp, (ETHERADDRL * 2));
evhp->ether_type = ehp->ether_type;
evhp->ether_tpid = htons(ETHERTYPE_VLAN);
hmp->b_wptr += sizeof (struct ether_vlan_header);
mp->b_rptr += sizeof (struct ether_header);
/*
* Free the original message if it's now empty. Link the
* rest of messages to the header message.
*/
hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
(void) hcksum_assoc(hmp, NULL, NULL, start, stuff, end, value, flags,
KM_NOSLEEP);
if (MBLKL(mp) == 0) {
hmp->b_cont = mp->b_cont;
freeb(mp);
} else {
hmp->b_cont = mp;
}
ASSERT(MBLKL(hmp) >= sizeof (struct ether_vlan_header));
/*
* Initialize the new TCI (Tag Control Information).
*/
evhp->ether_tci = htons(VLAN_TCI(pri, 0, vid));
return (hmp);
}
/*
* Adds a VLAN tag with the specified VID and priority to each mblk of
* the specified chain.
*/
mblk_t *
mac_add_vlan_tag_chain(mblk_t *mp_chain, uint_t pri, uint16_t vid)
{
mblk_t *next_mp, **prev, *mp;
mp = mp_chain;
prev = &mp_chain;
while (mp != NULL) {
next_mp = mp->b_next;
mp->b_next = NULL;
if ((mp = mac_add_vlan_tag(mp, pri, vid)) == NULL) {
freemsgchain(next_mp);
break;
}
*prev = mp;
prev = &mp->b_next;
mp = mp->b_next = next_mp;
}
return (mp_chain);
}
/*
* Strip VLAN tag
*/
mblk_t *
mac_strip_vlan_tag(mblk_t *mp)
{
mblk_t *newmp;
struct ether_vlan_header *evhp;
evhp = (struct ether_vlan_header *)mp->b_rptr;
if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) {
ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
if (DB_REF(mp) > 1) {
newmp = copymsg(mp);
if (newmp == NULL)
return (NULL);
freemsg(mp);
mp = newmp;
}
evhp = (struct ether_vlan_header *)mp->b_rptr;
ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL);
mp->b_rptr += VLAN_TAGSZ;
}
return (mp);
}
/*
* Strip VLAN tag from each mblk of the chain.
*/
mblk_t *
mac_strip_vlan_tag_chain(mblk_t *mp_chain)
{
mblk_t *mp, *next_mp, **prev;
mp = mp_chain;
prev = &mp_chain;
while (mp != NULL) {
next_mp = mp->b_next;
mp->b_next = NULL;
if ((mp = mac_strip_vlan_tag(mp)) == NULL) {
freemsgchain(next_mp);
break;
}
*prev = mp;
prev = &mp->b_next;
mp = mp->b_next = next_mp;
}
return (mp_chain);
}
/*
* Default callback function. Used when the datapath is not yet initialized.
*/
/* ARGSUSED */
void
mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp,
boolean_t loopback)
{
mblk_t *mp1 = mp;
while (mp1 != NULL) {
mp1->b_prev = NULL;
mp1->b_queue = NULL;
mp1 = mp1->b_next;
}
freemsgchain(mp);
}
/*
* Determines the IPv6 header length accounting for all the optional IPv6
* headers (hop-by-hop, destination, routing and fragment). The header length
* and next header value (a transport header) is captured.
*
* Returns B_FALSE if all the IP headers are not in the same mblk otherwise
* returns B_TRUE.
*/
boolean_t
mac_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, uint16_t *hdr_length,
uint8_t *next_hdr, ip6_frag_t **fragp)
{
uint16_t length;
uint_t ehdrlen;
uint8_t *whereptr;
uint8_t *nexthdrp;
ip6_dest_t *desthdr;
ip6_rthdr_t *rthdr;
ip6_frag_t *fraghdr;
if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr)
return (B_FALSE);
ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
length = IPV6_HDR_LEN;
whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
if (fragp != NULL)
*fragp = NULL;
nexthdrp = &ip6h->ip6_nxt;
while (whereptr < endptr) {
/* Is there enough left for len + nexthdr? */
if (whereptr + MIN_EHDR_LEN > endptr)
break;
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
/* Assumes the headers are identical for hbh and dst */
desthdr = (ip6_dest_t *)whereptr;
ehdrlen = 8 * (desthdr->ip6d_len + 1);
if ((uchar_t *)desthdr + ehdrlen > endptr)
return (B_FALSE);
nexthdrp = &desthdr->ip6d_nxt;
break;
case IPPROTO_ROUTING:
rthdr = (ip6_rthdr_t *)whereptr;
ehdrlen = 8 * (rthdr->ip6r_len + 1);
if ((uchar_t *)rthdr + ehdrlen > endptr)
return (B_FALSE);
nexthdrp = &rthdr->ip6r_nxt;
break;
case IPPROTO_FRAGMENT:
fraghdr = (ip6_frag_t *)whereptr;
ehdrlen = sizeof (ip6_frag_t);
if ((uchar_t *)&fraghdr[1] > endptr)
return (B_FALSE);
nexthdrp = &fraghdr->ip6f_nxt;
if (fragp != NULL)
*fragp = fraghdr;
break;
case IPPROTO_NONE:
/* No next header means we're finished */
default:
*hdr_length = length;
*next_hdr = *nexthdrp;
return (B_TRUE);
}
length += ehdrlen;
whereptr += ehdrlen;
*hdr_length = length;
*next_hdr = *nexthdrp;
}
switch (*nexthdrp) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
case IPPROTO_ROUTING:
case IPPROTO_FRAGMENT:
/*
* If any know extension headers are still to be processed,
* the packet's malformed (or at least all the IP header(s) are
* not in the same mblk - and that should never happen.
*/
return (B_FALSE);
default:
/*
* If we get here, we know that all of the IP headers were in
* the same mblk, even if the ULP header is in the next mblk.
*/
*hdr_length = length;
*next_hdr = *nexthdrp;
return (B_TRUE);
}
}
/*
* The following set of routines are there to take care of interrupt
* re-targeting for legacy (fixed) interrupts. Some older versions
* of the popular NICs like e1000g do not support MSI-X interrupts
* and they reserve fixed interrupts for RX/TX rings. To re-target
* these interrupts, PCITOOL ioctls need to be used.
*/
typedef struct mac_dladm_intr {
int ino;
int cpu_id;
char driver_path[MAXPATHLEN];
char nexus_path[MAXPATHLEN];
} mac_dladm_intr_t;
/* Bind the interrupt to cpu_num */
static int
mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int oldcpuid, int ino)
{
pcitool_intr_set_t iset;
int err;
iset.old_cpu = oldcpuid;
iset.ino = ino;
iset.cpu_id = cpu_num;
iset.user_version = PCITOOL_VERSION;
err = ldi_ioctl(lh, PCITOOL_DEVICE_SET_INTR, (intptr_t)&iset, FKIOCTL,
kcred, NULL);
return (err);
}
/*
* Search interrupt information. iget is filled in with the info to search
*/
static boolean_t
mac_search_intrinfo(pcitool_intr_get_t *iget_p, mac_dladm_intr_t *dln)
{
int i;
char driver_path[2 * MAXPATHLEN];
for (i = 0; i < iget_p->num_devs; i++) {
(void) strlcpy(driver_path, iget_p->dev[i].path, MAXPATHLEN);
(void) snprintf(&driver_path[strlen(driver_path)], MAXPATHLEN,
":%s%d", iget_p->dev[i].driver_name,
iget_p->dev[i].dev_inst);
/* Match the device path for the device path */
if (strcmp(driver_path, dln->driver_path) == 0) {
dln->ino = iget_p->ino;
dln->cpu_id = iget_p->cpu_id;
return (B_TRUE);
}
}
return (B_FALSE);
}
/*
* Get information about ino, i.e. if this is the interrupt for our
* device and where it is bound etc.
*/
static boolean_t
mac_get_single_intr(ldi_handle_t lh, int oldcpuid, int ino,
mac_dladm_intr_t *dln)
{
pcitool_intr_get_t *iget_p;
int ipsz;
int nipsz;
int err;
uint8_t inum;
/*
* Check if SLEEP is OK, i.e if could come here in response to
* changing the fanout due to some callback from the driver, say
* link speed changes.
*/
ipsz = PCITOOL_IGET_SIZE(0);
iget_p = kmem_zalloc(ipsz, KM_SLEEP);
iget_p->num_devs_ret = 0;
iget_p->user_version = PCITOOL_VERSION;
iget_p->cpu_id = oldcpuid;
iget_p->ino = ino;
err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p,
FKIOCTL, kcred, NULL);
if (err != 0) {
kmem_free(iget_p, ipsz);
return (B_FALSE);
}
if (iget_p->num_devs == 0) {
kmem_free(iget_p, ipsz);
return (B_FALSE);
}
inum = iget_p->num_devs;
if (iget_p->num_devs_ret < iget_p->num_devs) {
/* Reallocate */
nipsz = PCITOOL_IGET_SIZE(iget_p->num_devs);
kmem_free(iget_p, ipsz);
ipsz = nipsz;
iget_p = kmem_zalloc(ipsz, KM_SLEEP);
iget_p->num_devs_ret = inum;
iget_p->cpu_id = oldcpuid;
iget_p->ino = ino;
iget_p->user_version = PCITOOL_VERSION;
err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p,
FKIOCTL, kcred, NULL);
if (err != 0) {
kmem_free(iget_p, ipsz);
return (B_FALSE);
}
/* defensive */
if (iget_p->num_devs != iget_p->num_devs_ret) {
kmem_free(iget_p, ipsz);
return (B_FALSE);
}
}
if (mac_search_intrinfo(iget_p, dln)) {
kmem_free(iget_p, ipsz);
return (B_TRUE);
}
kmem_free(iget_p, ipsz);
return (B_FALSE);
}
/*
* Get the interrupts and check each one to see if it is for our device.
*/
static int
mac_validate_intr(ldi_handle_t lh, mac_dladm_intr_t *dln, processorid_t cpuid)
{
pcitool_intr_info_t intr_info;
int err;
int ino;
int oldcpuid;
err = ldi_ioctl(lh, PCITOOL_SYSTEM_INTR_INFO, (intptr_t)&intr_info,
FKIOCTL, kcred, NULL);
if (err != 0)
return (-1);
for (oldcpuid = 0; oldcpuid < intr_info.num_cpu; oldcpuid++) {
for (ino = 0; ino < intr_info.num_intr; ino++) {
if (mac_get_single_intr(lh, oldcpuid, ino, dln)) {
if (dln->cpu_id == cpuid)
return (0);
return (1);
}
}
}
return (-1);
}
/*
* Obtain the nexus parent node info. for mdip.
*/
static dev_info_t *
mac_get_nexus_node(dev_info_t *mdip, mac_dladm_intr_t *dln)
{
struct dev_info *tdip = (struct dev_info *)mdip;
struct ddi_minor_data *minordata;
int circ;
dev_info_t *pdip;
char pathname[MAXPATHLEN];
while (tdip != NULL) {
/*
* The netboot code could call this function while walking the
* device tree so we need to use ndi_devi_tryenter() here to
* avoid deadlock.
*/
if (ndi_devi_tryenter((dev_info_t *)tdip, &circ) == 0)
break;
for (minordata = tdip->devi_minor; minordata != NULL;
minordata = minordata->next) {
if (strncmp(minordata->ddm_node_type, DDI_NT_INTRCTL,
strlen(DDI_NT_INTRCTL)) == 0) {
pdip = minordata->dip;
(void) ddi_pathname(pdip, pathname);
(void) snprintf(dln->nexus_path, MAXPATHLEN,
"/devices%s:intr", pathname);
(void) ddi_pathname_minor(minordata, pathname);
ndi_devi_exit((dev_info_t *)tdip, circ);
return (pdip);
}
}
ndi_devi_exit((dev_info_t *)tdip, circ);
tdip = tdip->devi_parent;
}
return (NULL);
}
/*
* For a primary MAC client, if the user has set a list or CPUs or
* we have obtained it implicitly, we try to retarget the interrupt
* for that device on one of the CPUs in the list.
* We assign the interrupt to the same CPU as the poll thread.
*/
static boolean_t
mac_check_interrupt_binding(dev_info_t *mdip, int32_t cpuid)
{
ldi_handle_t lh = NULL;
ldi_ident_t li = NULL;
int err;
int ret;
mac_dladm_intr_t dln;
dev_info_t *dip;
struct ddi_minor_data *minordata;
dln.nexus_path[0] = '\0';
dln.driver_path[0] = '\0';
minordata = ((struct dev_info *)mdip)->devi_minor;
while (minordata != NULL) {
if (minordata->type == DDM_MINOR)
break;
minordata = minordata->next;
}
if (minordata == NULL)
return (B_FALSE);
(void) ddi_pathname_minor(minordata, dln.driver_path);
dip = mac_get_nexus_node(mdip, &dln);
/* defensive */
if (dip == NULL)
return (B_FALSE);
err = ldi_ident_from_major(ddi_driver_major(dip), &li);
if (err != 0)
return (B_FALSE);
err = ldi_open_by_name(dln.nexus_path, FREAD|FWRITE, kcred, &lh, li);
if (err != 0)
return (B_FALSE);
ret = mac_validate_intr(lh, &dln, cpuid);
if (ret < 0) {
(void) ldi_close(lh, FREAD|FWRITE, kcred);
return (B_FALSE);
}
/* cmn_note? */
if (ret != 0)
if ((err = (mac_set_intr(lh, cpuid, dln.cpu_id, dln.ino)))
!= 0) {
(void) ldi_close(lh, FREAD|FWRITE, kcred);
return (B_FALSE);
}
(void) ldi_close(lh, FREAD|FWRITE, kcred);
return (B_TRUE);
}
void
mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid)
{
dev_info_t *mdip = (dev_info_t *)arg;
mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
mac_resource_props_t *mrp;
mac_perim_handle_t mph;
flow_entry_t *flent = mcip->mci_flent;
mac_soft_ring_set_t *rx_srs;
mac_cpus_t *srs_cpu;
if (!mac_check_interrupt_binding(mdip, cpuid))
cpuid = -1;
mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph);
mrp = MCIP_RESOURCE_PROPS(mcip);
mrp->mrp_rx_intr_cpu = cpuid;
if (flent != NULL && flent->fe_rx_srs_cnt == 2) {
rx_srs = flent->fe_rx_srs[1];
srs_cpu = &rx_srs->srs_cpu;
srs_cpu->mc_rx_intr_cpu = cpuid;
}
mac_perim_exit(mph);
}
int32_t
mac_client_intr_cpu(mac_client_handle_t mch)
{
mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
mac_cpus_t *srs_cpu;
mac_soft_ring_set_t *rx_srs;
flow_entry_t *flent = mcip->mci_flent;
mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip);
mac_ring_t *ring;
mac_intr_t *mintr;
/*
* Check if we need to retarget the interrupt. We do this only
* for the primary MAC client. We do this if we have the only
* exclusive ring in the group.
*/
if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) {
rx_srs = flent->fe_rx_srs[1];
srs_cpu = &rx_srs->srs_cpu;
ring = rx_srs->srs_ring;
mintr = &ring->mr_info.mri_intr;
/*
* If ddi_handle is present or the poll CPU is
* already bound to the interrupt CPU, return -1.
*/
if (mintr->mi_ddi_handle != NULL ||
((mrp->mrp_ncpus != 0) &&
(mrp->mrp_rx_intr_cpu == srs_cpu->mc_rx_pollid))) {
return (-1);
}
return (srs_cpu->mc_rx_pollid);
}
return (-1);
}
void *
mac_get_devinfo(mac_handle_t mh)
{
mac_impl_t *mip = (mac_impl_t *)mh;
return ((void *)mip->mi_dip);
}
#define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1])
#define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3])
#define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5])
uint64_t
mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound)
{
struct ether_header *ehp;
uint64_t hash = 0;
uint16_t sap;
uint_t skip_len;
uint8_t proto;
boolean_t ip_fragmented;
/*
* We may want to have one of these per MAC type plugin in the
* future. For now supports only ethernet.
*/
if (media != DL_ETHER)
return (0L);
/* for now we support only outbound packets */
ASSERT(is_outbound);
ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
/* compute L2 hash */
ehp = (struct ether_header *)mp->b_rptr;
if ((policy & MAC_PKT_HASH_L2) != 0) {
uchar_t *mac_src = ehp->ether_shost.ether_addr_octet;
uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet;
hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst);
policy &= ~MAC_PKT_HASH_L2;
}
if (policy == 0)
goto done;
/* skip ethernet header */
sap = ntohs(ehp->ether_type);
if (sap == ETHERTYPE_VLAN) {
struct ether_vlan_header *evhp;
mblk_t *newmp = NULL;
skip_len = sizeof (struct ether_vlan_header);
if (MBLKL(mp) < skip_len) {
/* the vlan tag is the payload, pull up first */
newmp = msgpullup(mp, -1);
if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) {
goto done;
}
evhp = (struct ether_vlan_header *)newmp->b_rptr;
} else {
evhp = (struct ether_vlan_header *)mp->b_rptr;
}
sap = ntohs(evhp->ether_type);
freemsg(newmp);
} else {
skip_len = sizeof (struct ether_header);
}
/* if ethernet header is in its own mblk, skip it */
if (MBLKL(mp) <= skip_len) {
skip_len -= MBLKL(mp);
mp = mp->b_cont;
if (mp == NULL)
goto done;
}
sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
/* compute IP src/dst addresses hash and skip IPv{4,6} header */
switch (sap) {
case ETHERTYPE_IP: {
ipha_t *iphp;
/*
* If the header is not aligned or the header doesn't fit
* in the mblk, bail now. Note that this may cause packets
* reordering.
*/
iphp = (ipha_t *)(mp->b_rptr + skip_len);
if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) ||
!OK_32PTR((char *)iphp))
goto done;
proto = iphp->ipha_protocol;
skip_len += IPH_HDR_LENGTH(iphp);
/* Check if the packet is fragmented. */
ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) &
IPH_OFFSET;
/*
* For fragmented packets, use addresses in addition to
* the frag_id to generate the hash inorder to get
* better distribution.
*/
if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) {
uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src);
uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst);
hash ^= (PKT_HASH_4BYTES(ip_src) ^
PKT_HASH_4BYTES(ip_dst));
policy &= ~MAC_PKT_HASH_L3;
}
if (ip_fragmented) {
uint8_t *identp = (uint8_t *)&iphp->ipha_ident;
hash ^= PKT_HASH_2BYTES(identp);
goto done;
}
break;
}
case ETHERTYPE_IPV6: {
ip6_t *ip6hp;
ip6_frag_t *frag = NULL;
uint16_t hdr_length;
/*
* If the header is not aligned or the header doesn't fit
* in the mblk, bail now. Note that this may cause packets
* reordering.
*/
ip6hp = (ip6_t *)(mp->b_rptr + skip_len);
if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) ||
!OK_32PTR((char *)ip6hp))
goto done;
if (!mac_ip_hdr_length_v6(ip6hp, mp->b_wptr, &hdr_length,
&proto, &frag))
goto done;
skip_len += hdr_length;
/*
* For fragmented packets, use addresses in addition to
* the frag_id to generate the hash inorder to get
* better distribution.
*/
if (frag != NULL || (policy & MAC_PKT_HASH_L3) != 0) {
uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]);
uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]);
hash ^= (PKT_HASH_4BYTES(ip_src) ^
PKT_HASH_4BYTES(ip_dst));
policy &= ~MAC_PKT_HASH_L3;
}
if (frag != NULL) {
uint8_t *identp = (uint8_t *)&frag->ip6f_ident;
hash ^= PKT_HASH_4BYTES(identp);
goto done;
}
break;
}
default:
goto done;
}
if (policy == 0)
goto done;
/* if ip header is in its own mblk, skip it */
if (MBLKL(mp) <= skip_len) {
skip_len -= MBLKL(mp);
mp = mp->b_cont;
if (mp == NULL)
goto done;
}
/* parse ULP header */
again:
switch (proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_ESP:
case IPPROTO_SCTP:
/*
* These Internet Protocols are intentionally designed
* for hashing from the git-go. Port numbers are in the first
* word for transports, SPI is first for ESP.
*/
if (mp->b_rptr + skip_len + 4 > mp->b_wptr)
goto done;
hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len));
break;
case IPPROTO_AH: {
ah_t *ah = (ah_t *)(mp->b_rptr + skip_len);
uint_t ah_length = AH_TOTAL_LEN(ah);
if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr)
goto done;
proto = ah->ah_nexthdr;
skip_len += ah_length;
/* if AH header is in its own mblk, skip it */
if (MBLKL(mp) <= skip_len) {
skip_len -= MBLKL(mp);
mp = mp->b_cont;
if (mp == NULL)
goto done;
}
goto again;
}
}
done:
return (hash);
}