eib_fip.c revision b494511a9cf72b1fc4eb13a0e593f55c624ab829
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/ksynch.h>
#include <sys/byteorder.h>
#include <sys/ib/clients/eoib/eib_impl.h>
/*
* Declarations private to this file
*/
static int eib_fip_make_login(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
static int eib_fip_make_update(eib_t *, eib_vnic_t *, eib_wqe_t *, int, int *);
static int eib_fip_make_table(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
static int eib_fip_make_ka(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
static int eib_fip_make_logout(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
static int eib_fip_send_login(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
static int eib_fip_send_update(eib_t *, eib_vnic_t *, eib_wqe_t *,
uint_t, int *);
static int eib_fip_send_table(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
static int eib_fip_send_ka(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
static int eib_fip_send_logout(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
static int eib_fip_parse_vhub_table(uint8_t *, eib_vnic_t *);
static int eib_fip_parse_vhub_update(uint8_t *, eib_vnic_t *);
static void eib_fip_update_eport_state(eib_t *, eib_vhub_table_t *,
eib_vhub_update_t *, boolean_t, uint8_t);
static void eib_fip_queue_tbl_entry(eib_vhub_table_t *, eib_vhub_map_t *,
uint32_t, uint8_t);
static void eib_fip_queue_upd_entry(eib_vhub_update_t *, eib_vhub_map_t *,
uint32_t, uint8_t);
static void eib_fip_queue_gw_entry(eib_vnic_t *, eib_vhub_table_t *, uint32_t,
uint8_t);
static int eib_fip_apply_updates(eib_t *, eib_vhub_table_t *,
eib_vhub_update_t *);
static void eib_fip_dequeue_tbl_entry(eib_vhub_table_t *, uint8_t *, uint32_t,
uint8_t);
static eib_vhub_map_t *eib_fip_get_vhub_map(void);
/*
* Definitions private to this file
*/
const char eib_vendor_mellanox[] = {
0x4d, 0x65, 0x6c, 0x6c, 0x61, 0x6e, 0x6f, 0x78
};
/*
* The three requests to the gateway - request a vHUB table, request a
* vHUB update (aka keepalive) and vNIC logout - all need the same
* vnic identity descriptor to be sent with different flag settings.
*
* vHUB table: R=1, U=0, TUSN=last, subcode=KEEPALIVE
* keepalive/vHUB update: R=0, U=1, TUSN=last, subcode=KEEPALIVE
* vNIC logout: R=0, U=0, TUSN=0, subcode=LOGOUT
*/
#define EIB_UPD_REQ_TABLE 1
#define EIB_UPD_REQ_KA 2
#define EIB_UPD_REQ_LOGOUT 3
int
eib_fip_login(eib_t *ss, eib_vnic_t *vnic, int *err)
{
eib_wqe_t *swqe;
int ret;
int ntries = 0;
do {
if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_login: "
"no swqe available, not sending "
"vnic login request");
*err = ENOMEM;
return (EIB_E_FAILURE);
}
ret = eib_fip_make_login(ss, vnic, swqe, err);
if (ret != EIB_E_SUCCESS) {
eib_rsrc_return_swqe(ss, swqe, NULL);
return (EIB_E_FAILURE);
}
ret = eib_fip_send_login(ss, vnic, swqe, err);
if (ret != EIB_E_SUCCESS) {
eib_rsrc_return_swqe(ss, swqe, NULL);
return (EIB_E_FAILURE);
}
ret = eib_vnic_wait_for_login_ack(ss, vnic, err);
if (ret == EIB_E_SUCCESS)
break;
} while ((*err == ETIME) && (ntries++ < EIB_MAX_LOGIN_ATTEMPTS));
return (ret);
}
int
eib_fip_vhub_table(eib_t *ss, eib_vnic_t *vnic, int *err)
{
eib_wqe_t *swqe;
int ret;
int ntries = 0;
do {
if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_vhub_table: "
"no swqe available, not sending "
"vhub table request");
*err = ENOMEM;
return (EIB_E_FAILURE);
}
ret = eib_fip_make_table(ss, vnic, swqe, err);
if (ret != EIB_E_SUCCESS) {
eib_rsrc_return_swqe(ss, swqe, NULL);
return (EIB_E_FAILURE);
}
ret = eib_fip_send_table(ss, vnic, swqe, err);
if (ret != EIB_E_SUCCESS) {
eib_rsrc_return_swqe(ss, swqe, NULL);
return (EIB_E_FAILURE);
}
ret = eib_vnic_wait_for_table(ss, vnic, err);
if (ret == EIB_E_SUCCESS) {
return (EIB_E_SUCCESS);
}
/*
* If we'd failed in constructing a proper vhub table above,
* the vnic login state would be set to EIB_LOGIN_TBL_FAILED.
* We need to clean up any pending entries from the vhub
* table and vhub update structures and reset the vnic state
* to EIB_LOGIN_ACK_RCVD before we can try again.
*/
eib_vnic_fini_tables(ss, vnic, B_FALSE);
mutex_enter(&vnic->vn_lock);
vnic->vn_state = EIB_LOGIN_ACK_RCVD;
mutex_exit(&vnic->vn_lock);
} while ((*err == ETIME) && (ntries++ < EIB_MAX_VHUB_TBL_ATTEMPTS));
return (EIB_E_FAILURE);
}
int
eib_fip_heartbeat(eib_t *ss, eib_vnic_t *vnic, int *err)
{
eib_wqe_t *swqe;
int ntries = 0;
int ret;
/*
* Even if we're running low on the wqe resource, we want to be
* able to grab a wqe to send the keepalive, to avoid getting
* logged out by the gateway, so we use EIB_WPRI_HI.
*/
if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_HI)) == NULL) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_heartbeat: "
"no swqe available, not sending heartbeat");
return (EIB_E_FAILURE);
}
while (ntries++ < EIB_MAX_KA_ATTEMPTS) {
ret = eib_fip_make_ka(ss, vnic, swqe, err);
if (ret != EIB_E_SUCCESS)
continue;
ret = eib_fip_send_ka(ss, vnic, swqe, err);
if (ret == EIB_E_SUCCESS)
break;
}
if (ret != EIB_E_SUCCESS)
eib_rsrc_return_swqe(ss, swqe, NULL);
return (ret);
}
int
eib_fip_logout(eib_t *ss, eib_vnic_t *vnic, int *err)
{
eib_wqe_t *swqe;
int ret;
/*
* This routine is only called after the vnic has successfully
* logged in to the gateway. If that's really the case, there
* is nothing in terms of resources we need to release: the swqe
* that was acquired during login has already been posted, the
* work has been completed and the swqe has also been reaped back
* into the free pool. The only thing we need to rollback is the
* fact that we're logged in to the gateway at all -- and the way
* to do this is to send a logout request.
*/
if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_logout: "
"no swqe available, not sending logout");
return (EIB_E_FAILURE);
}
ret = eib_fip_make_logout(ss, vnic, swqe, err);
if (ret != EIB_E_SUCCESS) {
eib_rsrc_return_swqe(ss, swqe, NULL);
return (EIB_E_FAILURE);
}
ret = eib_fip_send_logout(ss, vnic, swqe, err);
if (ret != EIB_E_SUCCESS) {
eib_rsrc_return_swqe(ss, swqe, NULL);
return (EIB_E_FAILURE);
}
return (EIB_E_SUCCESS);
}
int
eib_fip_parse_login_ack(eib_t *ss, uint8_t *pkt, eib_login_data_t *ld)
{
fip_login_ack_t *ack;
fip_basic_hdr_t *hdr;
fip_desc_iba_t *iba;
fip_desc_vnic_login_t *login;
fip_desc_partition_t *partition;
ib_guid_t guid;
uint32_t syn_ctl_qpn;
uint16_t sl_portid;
uint16_t flags_vlan;
uint16_t opcode;
uint8_t subcode;
/*
* Note that 'pkt' is always atleast double-word aligned
* when it is passed to us, so we can cast it without any
* problems.
*/
ack = (fip_login_ack_t *)(void *)pkt;
hdr = &(ack->ak_fip_header);
/*
* Verify that the opcode is EoIB
*/
if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
"unsupported opcode 0x%x in login ack, ignoring",
opcode);
return (EIB_E_FAILURE);
}
/*
* The admin qp in the EoIB driver should receive only the login
* acknowledgements
*/
subcode = hdr->hd_subcode;
if (subcode != FIP_SUBCODE_G_VNIC_LOGIN_ACK) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
"unexpected subcode 0x%x received by adm qp, ignoring",
subcode);
return (EIB_E_FAILURE);
}
/*
* Verify if the descriptor list length in the received packet is
* valid if the workaround to disable it explicitly is absent.
*/
if (!eib_wa_no_desc_list_len) {
uint_t pkt_data_sz;
pkt_data_sz = (ntohs(hdr->hd_desc_list_len) + 2) << 2;
if (pkt_data_sz < sizeof (fip_login_ack_t)) {
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_fip_parse_login_ack: "
"login ack desc list len (0x%lx) too small "
"(min 0x%lx)",
pkt_data_sz, sizeof (fip_login_ack_t));
return (EIB_E_FAILURE);
}
}
/*
* Validate all the header and descriptor types and lengths
*/
if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
"invalid type/len in basic hdr: expected (0x%x,0x%x), "
"got (0x%x,0x%x)", FIP_DESC_TYPE_VENDOR_ID,
FIP_DESC_LEN_VENDOR_ID, hdr->hd_type, hdr->hd_len);
return (EIB_E_FAILURE);
}
iba = &(ack->ak_iba);
if (iba->ia_type != FIP_DESC_TYPE_IBA ||
iba->ia_len != FIP_DESC_LEN_IBA) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
"invalid type/len in iba desc: expected (0x%x,0x%x), "
"got (0x%x,0x%x)", FIP_DESC_TYPE_IBA, FIP_DESC_LEN_IBA,
iba->ia_type, iba->ia_len);
return (EIB_E_FAILURE);
}
login = &(ack->ak_vnic_login);
if (login->vl_type != FIP_DESC_TYPE_VNIC_LOGIN ||
login->vl_len != FIP_DESC_LEN_VNIC_LOGIN) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
"invalid type/len in login desc: expected (0x%x,0x%x), "
"got (0x%x,0x%x)", FIP_DESC_TYPE_VNIC_LOGIN,
FIP_DESC_LEN_VNIC_LOGIN, login->vl_type, login->vl_len);
return (EIB_E_FAILURE);
}
partition = &(ack->ak_vhub_partition);
if (partition->pn_type != FIP_DESC_TYPE_PARTITION ||
partition->pn_len != FIP_DESC_LEN_PARTITION) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
"invalid type/len in partition desc: expected (0x%x,0x%x), "
"got (0x%x,0x%x)", FIP_DESC_TYPE_PARTITION,
FIP_DESC_LEN_PARTITION, partition->pn_type,
partition->pn_len);
return (EIB_E_FAILURE);
}
/*
* Note that we'll return the vnic id as-is. The msb is not actually
* part of the vnic id in our internal records, so we'll mask it out
* later before we do our searches.
*/
ld->ld_vnic_id = ntohs(login->vl_vnic_id);
syn_ctl_qpn = ntohl(login->vl_syndrome_ctl_qpn);
/*
* If the syndrome indicates a nack, we're done. No need to collect
* any more information
*/
ld->ld_syndrome = (uint8_t)((syn_ctl_qpn & FIP_VL_SYN_MASK) >>
FIP_VL_SYN_SHIFT);
if (ld->ld_syndrome) {
return (EIB_E_SUCCESS);
}
/*
* Let's get the rest of the information out of the login ack
*/
sl_portid = ntohs(iba->ia_sl_portid);
ld->ld_gw_port_id = sl_portid & FIP_IBA_PORTID_MASK;
ld->ld_gw_sl = (sl_portid & FIP_IBA_SL_MASK) >> FIP_IBA_SL_SHIFT;
ld->ld_gw_data_qpn = ntohl(iba->ia_qpn) & FIP_IBA_QPN_MASK;
ld->ld_gw_lid = ntohs(iba->ia_lid);
bcopy(iba->ia_guid, &guid, sizeof (ib_guid_t));
ld->ld_gw_guid = ntohll(guid);
ld->ld_vhub_mtu = ntohs(login->vl_mtu);
bcopy(login->vl_mac, ld->ld_assigned_mac, ETHERADDRL);
bcopy(login->vl_gw_mgid_prefix, ld->ld_gw_mgid_prefix,
FIP_MGID_PREFIX_LEN);
ld->ld_n_rss_mcgid = login->vl_flags_rss & FIP_VL_N_RSS_MCGID_MASK;
ld->ld_n_mac_mcgid = login->vl_n_mac_mcgid & FIP_VL_N_MAC_MCGID_MASK;
ld->ld_gw_ctl_qpn = (syn_ctl_qpn & FIP_VL_CTL_QPN_MASK);
flags_vlan = ntohs(login->vl_flags_vlan);
ld->ld_assigned_vlan = flags_vlan & FIP_VL_VLAN_MASK;
ld->ld_vlan_in_packets = (flags_vlan & FIP_VL_FLAGS_VP) ? 1 : 0;
bcopy(login->vl_vnic_name, ld->ld_vnic_name, FIP_VNIC_NAME_LEN);
ld->ld_vhub_pkey = ntohs(partition->pn_pkey);
return (EIB_E_SUCCESS);
}
int
eib_fip_parse_ctl_pkt(uint8_t *pkt, eib_vnic_t *vnic)
{
eib_t *ss = vnic->vn_ss;
fip_vhub_pkt_t *vhb;
fip_basic_hdr_t *hdr;
uint16_t opcode;
uint8_t subcode;
uint_t vnic_state;
int ret = EIB_E_FAILURE;
/*
* Note that 'pkt' is always atleast double-word aligned when it is
* passed to us, so we can cast it without any problems.
*/
vhb = (fip_vhub_pkt_t *)(void *)pkt;
hdr = &(vhb->hb_fip_header);
/*
* Verify that the opcode is EoIB
*/
if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_ctl_pkt: "
"unsupported opcode 0x%x in ctl pkt, ignoring",
opcode);
return (EIB_E_FAILURE);
}
mutex_enter(&vnic->vn_lock);
vnic_state = vnic->vn_state;
mutex_exit(&vnic->vn_lock);
/*
* The ctl qp in the EoIB driver should receive only vHUB messages
*/
subcode = hdr->hd_subcode;
if (subcode == FIP_SUBCODE_G_VHUB_UPDATE) {
if (vnic_state != EIB_LOGIN_TBL_WAIT &&
vnic_state != EIB_LOGIN_TBL_INPROG &&
vnic_state != EIB_LOGIN_TBL_DONE &&
vnic_state != EIB_LOGIN_DONE) {
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_fip_parse_ctl_pkt: unexpected vnic state "
"(0x%lx) for subcode (VHUB_UPDATE 0x%x)",
vnic_state, subcode);
return (EIB_E_FAILURE);
}
ret = eib_fip_parse_vhub_update(pkt, vnic);
} else if (subcode == FIP_SUBCODE_G_VHUB_TABLE) {
if ((vnic_state != EIB_LOGIN_TBL_WAIT) &&
(vnic_state != EIB_LOGIN_TBL_INPROG)) {
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_fip_parse_ctl_pkt: unexpected vnic state "
"(0x%lx) for subcode (VHUB_TABLE 0x%x)",
vnic_state, subcode);
return (EIB_E_FAILURE);
}
ret = eib_fip_parse_vhub_table(pkt, vnic);
} else {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_ctl_pkt: "
"unexpected subcode 0x%x for ctl pkt", subcode);
}
if (ret == EIB_E_SUCCESS) {
/*
* Update last gateway heartbeat received time and
* gateway eport state. The eport state should only
* be updated if the vnic's vhub table has been fully
* constructed.
*/
mutex_enter(&ss->ei_vnic_lock);
ss->ei_gw_last_heartbeat = ddi_get_lbolt64();
if (vnic_state == EIB_LOGIN_TBL_DONE ||
vnic_state == EIB_LOGIN_DONE) {
ss->ei_gw_eport_state =
vnic->vn_vhub_table->tb_eport_state;
}
mutex_exit(&ss->ei_vnic_lock);
}
return (ret);
}
static int
eib_fip_make_login(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
{
fip_login_t *login;
fip_proto_t *proto;
fip_basic_hdr_t *hdr;
fip_desc_iba_t *iba;
fip_desc_vnic_login_t *vlg;
ib_gid_t port_gid;
ib_guid_t port_guid;
uint16_t sl_portid;
uint16_t flags_vlan;
uint16_t gw_portid = ss->ei_gw_props->pp_gw_portid;
uint16_t sl = ss->ei_gw_props->pp_gw_sl;
uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
uint_t pktsz = swqe->qe_sgl.ds_len;
uint_t login_sz = sizeof (fip_login_t);
if (pktsz < login_sz) {
*err = EINVAL;
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_make_login: "
"send buffer size (0x%lx) too small to send"
"login request (min 0x%lx)",
pktsz, login_sz);
return (EIB_E_FAILURE);
}
/*
* Lint complains that there may be an alignment issue here,
* but we know that the "pkt" is atleast double-word aligned,
* so it's ok.
*/
login = (fip_login_t *)(void *)pkt;
bzero(pkt, login_sz);
/*
* Fill in the FIP protocol version
*/
proto = &login->lg_proto_version;
proto->pr_version = FIP_PROTO_VERSION;
/*
* Fill in the basic header
*/
hdr = &login->lg_fip_header;
hdr->hd_opcode = htons(FIP_OPCODE_EOIB);
hdr->hd_subcode = FIP_SUBCODE_H_VNIC_LOGIN;
hdr->hd_desc_list_len = htons((login_sz >> 2) - 2);
hdr->hd_flags = 0;
hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID;
hdr->hd_len = FIP_DESC_LEN_VENDOR_ID;
bcopy(eib_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN);
/*
* Fill in the Infiniband Address descriptor
*/
iba = &login->lg_iba;
iba->ia_type = FIP_DESC_TYPE_IBA;
iba->ia_len = FIP_DESC_LEN_IBA;
bcopy(eib_vendor_mellanox, iba->ia_vendor_id, FIP_VENDOR_LEN);
iba->ia_qpn = htonl(vnic->vn_data_chan->ch_qpn);
sl_portid = (gw_portid & FIP_IBA_PORTID_MASK) |
((sl << FIP_IBA_SL_SHIFT) & FIP_IBA_SL_MASK);
iba->ia_sl_portid = htons(sl_portid);
iba->ia_lid = htons(ss->ei_props->ep_blid);
port_gid = ss->ei_props->ep_sgid;
port_guid = htonll(port_gid.gid_guid);
bcopy(&port_guid, iba->ia_guid, FIP_GUID_LEN);
/*
* Now, fill in the vNIC Login descriptor
*/
vlg = &login->lg_vnic_login;
vlg->vl_type = FIP_DESC_TYPE_VNIC_LOGIN;
vlg->vl_len = FIP_DESC_LEN_VNIC_LOGIN;
bcopy(eib_vendor_mellanox, vlg->vl_vendor_id, FIP_VENDOR_LEN);
/*
* Only for the physlink instance 0, we ask the gateway to assign
* the mac address and a VLAN (tagless, actually). For this vnic
* only, we do not set the H bit. All other vnics are created by
* Solaris admin and will have the H bit set. Note also that we
* need to clear the vnic id's most significant bit for those that
* are administered by the gateway, so vnic0's vnic_id's msb should
* be 0 as well.
*/
if (vnic->vn_instance == 0) {
vlg->vl_vnic_id = htons(vnic->vn_id);
flags_vlan = vnic->vn_vlan & FIP_VL_VLAN_MASK;
} else {
vlg->vl_vnic_id = htons(vnic->vn_id | FIP_VL_VNIC_ID_MSBIT);
flags_vlan = (vnic->vn_vlan & FIP_VL_VLAN_MASK) |
FIP_VL_FLAGS_H | FIP_VL_FLAGS_M;
if (vnic->vn_vlan & FIP_VL_VLAN_MASK)
flags_vlan |= (FIP_VL_FLAGS_V | FIP_VL_FLAGS_VP);
}
vlg->vl_flags_vlan = htons(flags_vlan);
bcopy(vnic->vn_macaddr, vlg->vl_mac, ETHERADDRL);
/*
* We aren't ready to enable rss, so we set the RSS bit and
* the n_rss_mcgid field to 0. Set the mac mcgid to 0 as well.
*/
vlg->vl_flags_rss = 0;
vlg->vl_n_mac_mcgid = 0;
/*
* Set the syndrome to 0 and pass the control qpn
*/
vlg->vl_syndrome_ctl_qpn =
htonl(vnic->vn_ctl_chan->ch_qpn & FIP_VL_CTL_QPN_MASK);
/*
* Try to set as unique a name as possible for this vnic
*/
(void) snprintf((char *)(vlg->vl_vnic_name), FIP_VNIC_NAME_LEN,
"eoib_%02x_%02x", ss->ei_instance, vnic->vn_instance);
/*
* Adjust the ds_len in the sgl to indicate the size of this
* request before returning
*/
swqe->qe_sgl.ds_len = login_sz;
return (EIB_E_SUCCESS);
}
static int
eib_fip_make_update(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int req,
int *err)
{
fip_keep_alive_t *ka;
fip_proto_t *proto;
fip_basic_hdr_t *hdr;
fip_desc_vnic_identity_t *vid;
ib_gid_t port_gid;
ib_guid_t port_guid;
uint32_t flags_vhub_id;
uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
uint_t pktsz = swqe->qe_sgl.ds_len;
uint_t ka_sz = sizeof (fip_keep_alive_t);
if (pktsz < ka_sz) {
*err = EINVAL;
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_make_update: "
"send buffer size (0x%lx) too small to send"
"keepalive/update request (min 0x%lx)",
pktsz, ka_sz);
return (EIB_E_FAILURE);
}
/*
* Lint complains that there may be an alignment issue here,
* but we know that the "pkt" is atleast double-word aligned,
* so it's ok.
*/
ka = (fip_keep_alive_t *)(void *)pkt;
bzero(pkt, ka_sz);
/*
* Fill in the FIP protocol version
*/
proto = &ka->ka_proto_version;
proto->pr_version = FIP_PROTO_VERSION;
/*
* Fill in the basic header
*/
hdr = &ka->ka_fip_header;
hdr->hd_opcode = htons(FIP_OPCODE_EOIB);
hdr->hd_subcode = (req == EIB_UPD_REQ_LOGOUT) ?
FIP_SUBCODE_H_VNIC_LOGOUT : FIP_SUBCODE_H_KEEP_ALIVE;
hdr->hd_desc_list_len = htons((ka_sz >> 2) - 2);
hdr->hd_flags = 0;
hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID;
hdr->hd_len = FIP_DESC_LEN_VENDOR_ID;
bcopy(eib_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN);
/*
* Fill in the vNIC Identity descriptor
*/
vid = &ka->ka_vnic_identity;
vid->vi_type = FIP_DESC_TYPE_VNIC_IDENTITY;
vid->vi_len = FIP_DESC_LEN_VNIC_IDENTITY;
bcopy(eib_vendor_mellanox, vid->vi_vendor_id, FIP_VENDOR_LEN);
flags_vhub_id = vnic->vn_login_data.ld_vhub_id;
if (vnic->vn_login_data.ld_vlan_in_packets) {
flags_vhub_id |= FIP_VI_FLAG_VP;
}
if (req == EIB_UPD_REQ_TABLE) {
flags_vhub_id |= FIP_VI_FLAG_R;
} else if (req == EIB_UPD_REQ_KA) {
flags_vhub_id |= FIP_VI_FLAG_U;
}
vid->vi_flags_vhub_id = htonl(flags_vhub_id);
vid->vi_tusn = (req != EIB_UPD_REQ_LOGOUT) ?
htonl(vnic->vn_vhub_table->tb_tusn) : 0;
vid->vi_vnic_id = htons(vnic->vn_login_data.ld_vnic_id);
bcopy(vnic->vn_login_data.ld_assigned_mac, vid->vi_mac, ETHERADDRL);
port_gid = ss->ei_props->ep_sgid;
port_guid = htonll(port_gid.gid_guid);
bcopy(&port_guid, vid->vi_port_guid, FIP_GUID_LEN);
bcopy(vnic->vn_login_data.ld_vnic_name, vid->vi_vnic_name,
FIP_VNIC_NAME_LEN);
/*
* Adjust the ds_len in the sgl to indicate the size of this
* request before returning
*/
swqe->qe_sgl.ds_len = ka_sz;
return (EIB_E_SUCCESS);
}
static int
eib_fip_make_table(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
{
return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_TABLE, err));
}
static int
eib_fip_make_ka(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
{
return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_KA, err));
}
static int
eib_fip_make_logout(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
{
return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_LOGOUT, err));
}
static int
eib_fip_send_login(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
{
eib_avect_t *av;
eib_chan_t *chan = ss->ei_admin_chan;
ibt_status_t ret;
/*
* Get an address vector for this destination
*/
if ((av = eib_ibt_hold_avect(ss, ss->ei_gw_props->pp_gw_lid,
ss->ei_gw_props->pp_gw_sl)) == NULL) {
*err = ENOMEM;
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
"eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed",
ss->ei_gw_props->pp_gw_lid, ss->ei_gw_props->pp_gw_sl);
return (EIB_E_FAILURE);
}
/*
* Modify the UD destination handle to the gateway
*/
ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_FIP_QKEY,
ss->ei_gw_props->pp_gw_ctrl_qpn, &av->av_vect);
eib_ibt_release_avect(ss, av);
if (ret != IBT_SUCCESS) {
*err = EINVAL;
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
"ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, "
"ret=%d", ss->ei_gw_props->pp_gw_ctrl_qpn,
EIB_FIP_QKEY, ret);
return (EIB_E_FAILURE);
}
/*
* Send the login packet to the destination gateway. Posting
* the login and setting the login state to wait-for-ack should
* ideally be atomic to avoid race.
*/
mutex_enter(&vnic->vn_lock);
ret = ibt_post_send(chan->ch_chan, &(swqe->qe_wr.send), 1, NULL);
if (ret != IBT_SUCCESS) {
mutex_exit(&vnic->vn_lock);
*err = EINVAL;
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
"ibt_post_send() failed for vnic id 0x%x, ret=%d",
vnic->vn_id, ret);
return (EIB_E_FAILURE);
}
vnic->vn_state = EIB_LOGIN_ACK_WAIT;
mutex_enter(&chan->ch_tx_lock);
chan->ch_tx_posted++;
mutex_exit(&chan->ch_tx_lock);
mutex_exit(&vnic->vn_lock);
return (EIB_E_SUCCESS);
}
static int
eib_fip_send_update(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe,
uint_t nxt_state, int *err)
{
eib_login_data_t *ld = &vnic->vn_login_data;
eib_chan_t *chan = vnic->vn_ctl_chan;
eib_avect_t *av;
ibt_status_t ret;
/*
* Get an address vector for this destination
*/
if ((av = eib_ibt_hold_avect(ss, ld->ld_gw_lid,
ld->ld_gw_sl)) == NULL) {
*err = ENOMEM;
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
"eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed",
ld->ld_gw_lid, ld->ld_gw_sl);
return (EIB_E_FAILURE);
}
/*
* Modify the UD destination handle to the destination appropriately
*/
ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_FIP_QKEY,
ld->ld_gw_ctl_qpn, &av->av_vect);
eib_ibt_release_avect(ss, av);
if (ret != IBT_SUCCESS) {
*err = EINVAL;
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
"ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, "
"ret=%d", ld->ld_gw_ctl_qpn, EIB_FIP_QKEY, ret);
return (EIB_E_FAILURE);
}
/*
* Send the update packet to the destination. Posting the update request
* and setting the login state to wait-for-vhub_table needs to be atomic
* to avoid race.
*/
mutex_enter(&vnic->vn_lock);
ret = ibt_post_send(chan->ch_chan, &(swqe->qe_wr.send), 1, NULL);
if (ret != IBT_SUCCESS) {
mutex_exit(&vnic->vn_lock);
*err = EINVAL;
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
"ibt_post_send() failed for vnic id 0x%x, ret=%d",
vnic->vn_id, ret);
return (EIB_E_FAILURE);
}
vnic->vn_state = nxt_state;
mutex_enter(&chan->ch_tx_lock);
chan->ch_tx_posted++;
mutex_exit(&chan->ch_tx_lock);
mutex_exit(&vnic->vn_lock);
return (EIB_E_SUCCESS);
}
static int
eib_fip_send_table(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
{
return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGIN_TBL_WAIT, err));
}
static int
eib_fip_send_ka(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
{
return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGIN_DONE, err));
}
static int
eib_fip_send_logout(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
{
return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGOUT_DONE, err));
}
static int
eib_fip_parse_vhub_table(uint8_t *pkt, eib_vnic_t *vnic)
{
fip_vhub_table_t *tbl;
fip_desc_vhub_table_t *desc_tbl;
fip_vhub_table_entry_t *entry;
fip_basic_hdr_t *hdr;
eib_t *ss = vnic->vn_ss;
eib_login_data_t *ld = &vnic->vn_login_data;
eib_vhub_table_t *etbl = vnic->vn_vhub_table;
eib_vhub_update_t *eupd = vnic->vn_vhub_update;
eib_vhub_map_t *newmap;
uint32_t *ipkt;
uint32_t init_checksum = 0;
uint32_t tusn;
uint32_t vhub_id;
uint_t entries_in_pkt;
uint_t ndx;
uint_t i;
/*
* If we're here receiving vhub table messages, we certainly should
* have the vhub table structure allocated and present at this point.
*/
if (etbl == NULL) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
"vhub table missing for vnic id 0x%x", vnic->vn_id);
return (EIB_E_FAILURE);
}
/*
* Note that 'pkt' is always atleast double-word aligned when it is
* passed to us, so we can cast it without any problems.
*/
ipkt = (uint32_t *)(void *)pkt;
tbl = (fip_vhub_table_t *)(void *)pkt;
hdr = &(tbl->vt_fip_header);
/*
* Validate all the header and descriptor types and lengths
*/
if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
"invalid type/len in fip basic header, "
"exp (0x%x,0x%x), got (0x%x,0x%x)",
FIP_DESC_TYPE_VENDOR_ID, FIP_DESC_LEN_VENDOR_ID,
hdr->hd_type, hdr->hd_len);
return (EIB_E_FAILURE);
}
desc_tbl = &(tbl->vt_vhub_table);
if (desc_tbl->tb_type != FIP_DESC_TYPE_VHUB_TABLE) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
"invalid type in vhub desc, exp 0x%x, got 0x%x",
FIP_DESC_TYPE_VHUB_TABLE, desc_tbl->tb_type);
return (EIB_E_FAILURE);
}
/*
* Verify that the vhub id is ok for this vnic
*/
vhub_id = ntohl(desc_tbl->tb_flags_vhub_id) & FIP_TB_VHUB_ID_MASK;
if (vhub_id != ld->ld_vhub_id) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
"invalid vhub id in vhub table pkt: exp 0x%x, got 0x%x",
ld->ld_vhub_id, vhub_id);
return (EIB_E_FAILURE);
}
/*
* Count the number of vhub table entries in this packet
*/
entries_in_pkt = (desc_tbl->tb_len - FIP_DESC_VHUB_TABLE_WORDS) /
FIP_VHUB_TABLE_ENTRY_WORDS;
/*
* While we're here, also compute the 32-bit 2's complement carry-
* discarded checksum of the vHUB table descriptor in this packet
* till the first vhub table entry.
*/
for (i = 0; i < FIP_DESC_VHUB_TABLE_WORDS; i++)
init_checksum += ipkt[i];
/*
* Initialize the vhub's Table Update Sequence Number (tusn),
* checksum and record the total number of entries in in the table
* if this is the first pkt of the table.
*/
tusn = ntohl(desc_tbl->tb_tusn);
if (desc_tbl->tb_hdr & FIP_TB_HDR_FIRST) {
etbl->tb_entries_in_table = ntohs(desc_tbl->tb_table_size);
etbl->tb_tusn = tusn;
etbl->tb_checksum = 0;
mutex_enter(&vnic->vn_lock);
vnic->vn_state = EIB_LOGIN_TBL_INPROG;
mutex_exit(&vnic->vn_lock);
}
/*
* First, middle or last, the current table TUSN we have must match this
* packet's TUSN.
*/
if (etbl->tb_tusn != tusn) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
"unexpected TUSN (0x%lx) during vhub table construction, "
"expected 0x%lx", etbl->tb_tusn, tusn);
goto vhub_table_fail;
}
/*
* See if we've overrun/underrun our original entries count
*/
if ((etbl->tb_entries_seen + entries_in_pkt) >
etbl->tb_entries_in_table) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
"vhub table overrun, total_exp=%d, so_far=%d, this_pkt=%d",
etbl->tb_entries_in_table, etbl->tb_entries_seen,
entries_in_pkt);
goto vhub_table_fail;
} else if (((etbl->tb_entries_seen + entries_in_pkt) <
etbl->tb_entries_in_table) &&
(desc_tbl->tb_hdr & FIP_TB_HDR_LAST)) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
"vhub table underrun, total_exp=%d, so_far=%d, last_pkt=%d",
etbl->tb_entries_in_table, etbl->tb_entries_seen,
entries_in_pkt);
goto vhub_table_fail;
}
/*
* Process and add the entries we have in this packet
*/
etbl->tb_checksum += init_checksum;
entry = (fip_vhub_table_entry_t *)(void *)
((uint8_t *)desc_tbl + FIP_DESC_VHUB_TABLE_SZ);
for (ndx = 0; ndx < entries_in_pkt; ndx++, entry++) {
/*
* Allocate a eib_vhub_map_t, copy the current entry details
* and chain it to the appropriate queue.
*/
if ((newmap = eib_fip_get_vhub_map()) == NULL) {
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_fip_parse_vhub_table: no memory for vhub "
"table entry, ignoring this vhub table packet");
goto vhub_table_fail;
}
ASSERT((entry->te_v_rss_type & FIP_TE_VALID) == FIP_TE_VALID);
newmap->mp_v_rss_type = entry->te_v_rss_type;
bcopy(entry->te_mac, newmap->mp_mac, ETHERADDRL);
newmap->mp_qpn = (ntohl(entry->te_qpn) & FIP_TE_QPN_MASK);
newmap->mp_sl = (entry->te_sl & FIP_TE_SL_MASK);
newmap->mp_lid = ntohs(entry->te_lid);
newmap->mp_tusn = tusn;
newmap->mp_next = NULL;
/*
* The vhub table messages do not provide status on eport
* state, so we'll simply assume that the eport is up.
*/
eib_fip_queue_tbl_entry(etbl, newmap, tusn, FIP_EPORT_UP);
/*
* Update table checksum with this entry's computed checksum
*/
ipkt = (uint32_t *)entry;
for (i = 0; i < FIP_VHUB_TABLE_ENTRY_WORDS; i++)
etbl->tb_checksum += ipkt[i];
}
etbl->tb_entries_seen += entries_in_pkt;
/*
* If this is the last packet of this vhub table, complete vhub
* table by verifying checksum and applying all the vhub updates
* that may have come in while we were constructing this table.
*/
if (desc_tbl->tb_hdr & FIP_TB_HDR_LAST) {
ipkt = (uint32_t *)entry;
if (!eib_wa_no_good_vhub_cksum) {
if (*ipkt != etbl->tb_checksum) {
EIB_DPRINTF_VERBOSE(ss->ei_instance,
"eib_fip_parse_vhub_table: "
"vhub table checksum invalid, "
"computed=0x%lx, found=0x%lx",
etbl->tb_checksum, *ipkt);
}
}
/*
* Per the EoIB specification, the gateway is supposed to
* include its address information for data messages in the
* vhub table. But we've observed that it doesn't do this
* (with the current version). If this is the case, we'll
* hand-create and add a vhub map for the gateway from the
* information we got in login ack.
*/
if (etbl->tb_gateway == NULL)
eib_fip_queue_gw_entry(vnic, etbl, tusn, FIP_EPORT_UP);
/*
* Apply pending vhub updates and reset table counters needed
* during table construction.
*/
if (eib_fip_apply_updates(ss, etbl, eupd) != EIB_E_SUCCESS)
goto vhub_table_fail;
etbl->tb_entries_seen = 0;
etbl->tb_entries_in_table = 0;
eib_vnic_vhub_table_done(vnic, EIB_LOGIN_TBL_DONE);
}
return (EIB_E_SUCCESS);
vhub_table_fail:
eib_vnic_vhub_table_done(vnic, EIB_LOGIN_TBL_FAILED);
return (EIB_E_FAILURE);
}
static int
eib_fip_parse_vhub_update(uint8_t *pkt, eib_vnic_t *vnic)
{
fip_vhub_update_t *upd;
fip_desc_vhub_update_t *desc_upd;
fip_vhub_table_entry_t *entry;
fip_basic_hdr_t *hdr;
eib_t *ss = vnic->vn_ss;
eib_login_data_t *ld = &vnic->vn_login_data;
eib_vhub_table_t *etbl = vnic->vn_vhub_table;
eib_vhub_update_t *eupd = vnic->vn_vhub_update;
eib_vhub_map_t *newmap;
boolean_t vhub_tbl_done;
uint32_t eport_vp_vhub_id;
uint32_t vhub_id;
uint32_t tusn;
uint32_t prev_tusn;
uint8_t eport_state;
/*
* We should have the vhub table allocated as long as we're receiving
* vhub control messages.
*/
if (etbl == NULL) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
"vhub table missing for vnic id 0x%x", vnic->vn_id);
return (EIB_E_FAILURE);
}
mutex_enter(&vnic->vn_lock);
vhub_tbl_done = ((vnic->vn_state == EIB_LOGIN_TBL_DONE) ||
(vnic->vn_state == EIB_LOGIN_DONE)) ? B_TRUE : B_FALSE;
mutex_exit(&vnic->vn_lock);
/*
* Note that 'pkt' is always atleast double-word aligned when it is
* passed to us, so we can cast it without any problems.
*/
upd = (fip_vhub_update_t *)(void *)pkt;
hdr = &(upd->vu_fip_header);
/*
* Validate all the header and descriptor types and lengths
*/
if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
"invalid type/len in fip basic header, "
"exp (0x%x,0x%x), got (0x%x,0x%x)",
FIP_DESC_TYPE_VENDOR_ID, FIP_DESC_LEN_VENDOR_ID,
hdr->hd_type, hdr->hd_len);
return (EIB_E_FAILURE);
}
desc_upd = &(upd->vu_vhub_update);
if (desc_upd->up_type != FIP_DESC_TYPE_VHUB_UPDATE ||
desc_upd->up_len != FIP_DESC_LEN_VHUB_UPDATE) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
"invalid type/len in vhub update desc: "
"exp (0x%x,0x%x), got (0x%x,0x%x)",
FIP_DESC_TYPE_VHUB_UPDATE, FIP_DESC_LEN_VHUB_UPDATE,
desc_upd->up_type, desc_upd->up_len);
return (EIB_E_FAILURE);
}
/*
* Verify that the vhub id is ok for this vnic and save the eport state
*/
eport_vp_vhub_id = ntohl(desc_upd->up_eport_vp_vhub_id);
vhub_id = eport_vp_vhub_id & FIP_UP_VHUB_ID_MASK;
if (vhub_id != ld->ld_vhub_id) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
"invalid vhub id in vhub update pkt: exp 0x%x, got 0x%x",
ld->ld_vhub_id, vhub_id);
return (EIB_E_FAILURE);
}
eport_state = (uint8_t)((eport_vp_vhub_id >> FIP_UP_EPORT_STATE_SHIFT) &
FIP_UP_EPORT_STATE_MASK);
/*
* If this is the first update we receive, any tusn is ok. Otherwise,
* make sure the tusn we see in the packet is appropriate.
*/
tusn = ntohl(desc_upd->up_tusn);
prev_tusn = vhub_tbl_done ? etbl->tb_tusn : eupd->up_tusn;
if (prev_tusn != 0) {
if (tusn == prev_tusn) {
eib_fip_update_eport_state(ss, etbl, eupd,
vhub_tbl_done, eport_state);
return (EIB_E_SUCCESS);
}
if (tusn != (prev_tusn + 1)) {
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_fip_parse_vhub_update: "
"out of order TUSN received (exp 0x%lx, "
"got 0x%lx), dropping pkt", prev_tusn + 1, tusn);
return (EIB_E_FAILURE);
}
}
/*
* EoIB expects only type 0 (vnic address) entries to maintain the
* context table
*/
entry = &(desc_upd->up_tbl_entry);
ASSERT((entry->te_v_rss_type & FIP_TE_TYPE_MASK) == FIP_TE_TYPE_VNIC);
/*
* If the vHUB table has already been fully constructed and if we've
* now received a notice to remove a vnic entry from it, do it.
*/
if ((vhub_tbl_done) &&
((entry->te_v_rss_type & FIP_TE_VALID) == 0)) {
eib_fip_dequeue_tbl_entry(etbl, entry->te_mac,
tusn, eport_state);
if (bcmp(entry->te_mac, ld->ld_assigned_mac, ETHERADDRL) == 0) {
uint8_t *mymac;
mymac = entry->te_mac;
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_fip_parse_vhub_update: "
"vhub update pkt received to kill self "
"(%x:%x:%x:%x:%x:%x)", mymac[0], mymac[1], mymac[2],
mymac[3], mymac[4], mymac[5]);
return (EIB_E_FAILURE);
}
return (EIB_E_SUCCESS);
}
/*
* Otherwise, allocate a new eib_vhub_map_t and fill it in with
* the details of the new entry
*/
if ((newmap = eib_fip_get_vhub_map()) == NULL) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
"no memory for vhub update entry, will be ignoring"
"this vhub update packet");
return (EIB_E_FAILURE);
}
newmap->mp_v_rss_type = entry->te_v_rss_type;
bcopy(entry->te_mac, newmap->mp_mac, ETHERADDRL);
newmap->mp_qpn = (ntohl(entry->te_qpn) & FIP_TE_QPN_MASK);
newmap->mp_sl = (entry->te_sl & FIP_TE_SL_MASK);
newmap->mp_lid = ntohs(entry->te_lid);
newmap->mp_tusn = tusn;
newmap->mp_next = NULL;
/*
* Update the full vhub table or chain it to the list of pending
* updates depending on if the vhub table construction is over
* or not.
*/
if (vhub_tbl_done) {
eib_fip_queue_tbl_entry(etbl, newmap, tusn, eport_state);
} else {
eib_fip_queue_upd_entry(eupd, newmap, tusn, eport_state);
}
return (EIB_E_SUCCESS);
}
static void
eib_fip_update_eport_state(eib_t *ss, eib_vhub_table_t *tbl,
eib_vhub_update_t *upd, boolean_t tbl_done, uint8_t eport_state)
{
if (tbl_done) {
mutex_enter(&tbl->tb_lock);
if (tbl->tb_eport_state != eport_state) {
EIB_DPRINTF_DEBUG(ss->ei_instance,
"eib_fip_update_eport_state: "
"eport state changing from %d to %d",
tbl->tb_eport_state, eport_state);
tbl->tb_eport_state = eport_state;
}
mutex_exit(&tbl->tb_lock);
} else {
mutex_enter(&upd->up_lock);
if (upd->up_eport_state != eport_state) {
EIB_DPRINTF_DEBUG(ss->ei_instance,
"eib_fip_update_eport_state: "
"eport state changing from %d to %d",
upd->up_eport_state, eport_state);
upd->up_eport_state = eport_state;
}
mutex_exit(&upd->up_lock);
}
}
static void
eib_fip_queue_tbl_entry(eib_vhub_table_t *tbl, eib_vhub_map_t *map,
uint32_t tusn, uint8_t eport_state)
{
uint8_t bkt;
mutex_enter(&tbl->tb_lock);
switch (map->mp_v_rss_type & FIP_TE_TYPE_MASK) {
case FIP_TE_TYPE_GATEWAY:
if (tbl->tb_gateway) {
kmem_free(tbl->tb_gateway,
sizeof (eib_vhub_map_t));
}
tbl->tb_gateway = map;
break;
case FIP_TE_TYPE_UNICAST_MISS:
if (tbl->tb_unicast_miss) {
kmem_free(tbl->tb_unicast_miss,
sizeof (eib_vhub_map_t));
}
tbl->tb_unicast_miss = map;
break;
case FIP_TE_TYPE_VHUB_MULTICAST:
if (tbl->tb_vhub_multicast) {
kmem_free(tbl->tb_vhub_multicast,
sizeof (eib_vhub_map_t));
}
tbl->tb_vhub_multicast = map;
break;
case FIP_TE_TYPE_MULTICAST_ENTRY:
/*
* If multicast entry types are not to be specially
* processed, treat them like regular vnic addresses.
*/
if (!eib_wa_no_mcast_entries) {
bkt = (map->mp_mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
map->mp_next = tbl->tb_mcast_entry[bkt];
tbl->tb_mcast_entry[bkt] = map;
break;
}
/*FALLTHROUGH*/
case FIP_TE_TYPE_VNIC:
bkt = (map->mp_mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
map->mp_next = tbl->tb_vnic_entry[bkt];
tbl->tb_vnic_entry[bkt] = map;
break;
}
tbl->tb_tusn = tusn;
tbl->tb_eport_state = eport_state;
mutex_exit(&tbl->tb_lock);
}
static void
eib_fip_queue_upd_entry(eib_vhub_update_t *upd, eib_vhub_map_t *map,
uint32_t tusn, uint8_t eport_state)
{
eib_vhub_map_t *tail;
/*
* The eib_vhub_update_t list is only touched/traversed when the
* control cq handler is parsing either update or table message,
* or by the table cleanup routine when we aren't attached to any
* control mcgs. Bottom line is that this list traversal is always
* single-threaded and we could probably do away with the lock.
*/
mutex_enter(&upd->up_lock);
for (tail = upd->up_vnic_entry; tail != NULL; tail = tail->mp_next) {
if (tail->mp_next == NULL)
break;
}
if (tail) {
tail->mp_next = map;
} else {
upd->up_vnic_entry = map;
}
upd->up_tusn = tusn;
upd->up_eport_state = eport_state;
mutex_exit(&upd->up_lock);
}
static void
eib_fip_queue_gw_entry(eib_vnic_t *vnic, eib_vhub_table_t *tbl, uint32_t tusn,
uint8_t eport_state)
{
eib_t *ss = vnic->vn_ss;
eib_vhub_map_t *newmap;
eib_login_data_t *ld = &vnic->vn_login_data;
if ((newmap = eib_fip_get_vhub_map()) == NULL) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_queue_gw_entry: "
"no memory to queue gw entry, transactions could fail");
return;
}
newmap->mp_v_rss_type = FIP_TE_VALID | FIP_TE_TYPE_GATEWAY;
bcopy(eib_zero_mac, newmap->mp_mac, ETHERADDRL);
newmap->mp_qpn = ld->ld_gw_data_qpn;
newmap->mp_sl = ld->ld_gw_sl;
newmap->mp_lid = ld->ld_gw_lid;
newmap->mp_tusn = tusn;
newmap->mp_next = NULL;
eib_fip_queue_tbl_entry(tbl, newmap, tusn, eport_state);
}
static int
eib_fip_apply_updates(eib_t *ss, eib_vhub_table_t *tbl, eib_vhub_update_t *upd)
{
eib_vhub_map_t *list;
eib_vhub_map_t *map;
eib_vhub_map_t *nxt;
uint32_t tbl_tusn = tbl->tb_tusn;
/*
* Take the update list out
*/
mutex_enter(&upd->up_lock);
list = upd->up_vnic_entry;
upd->up_vnic_entry = NULL;
mutex_exit(&upd->up_lock);
/*
* Skip any updates with older/same tusn as our vhub table
*/
nxt = NULL;
for (map = list; (map) && (map->mp_tusn <= tbl_tusn); map = nxt) {
nxt = map->mp_next;
kmem_free(map, sizeof (eib_vhub_map_t));
}
if (map == NULL)
return (EIB_E_SUCCESS);
/*
* If we missed any updates between table tusn and the first
* update tusn we got, we need to fail.
*/
if (map->mp_tusn > (tbl_tusn + 1)) {
EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_apply_updates: "
"vhub update missed tusn(s), expected=0x%lx, got=0x%lx",
(tbl_tusn + 1), map->mp_tusn);
for (; map != NULL; map = nxt) {
nxt = map->mp_next;
kmem_free(map, sizeof (eib_vhub_map_t));
}
return (EIB_E_FAILURE);
}
/*
* If everything is fine, apply all the updates we received
*/
for (; map != NULL; map = nxt) {
nxt = map->mp_next;
map->mp_next = NULL;
if (map->mp_v_rss_type & FIP_TE_VALID) {
eib_fip_queue_tbl_entry(tbl, map, upd->up_tusn,
upd->up_eport_state);
} else {
eib_fip_dequeue_tbl_entry(tbl, map->mp_mac,
upd->up_tusn, upd->up_eport_state);
kmem_free(map, sizeof (eib_vhub_map_t));
}
}
return (EIB_E_SUCCESS);
}
static void
eib_fip_dequeue_tbl_entry(eib_vhub_table_t *tbl, uint8_t *mac, uint32_t tusn,
uint8_t eport_state)
{
uint8_t bkt;
eib_vhub_map_t *prev;
eib_vhub_map_t *elem;
bkt = (mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
mutex_enter(&tbl->tb_lock);
/*
* Note that for EoIB, the vhub table is maintained using only
* vnic entry updates
*/
prev = NULL;
for (elem = tbl->tb_vnic_entry[bkt]; elem; elem = elem->mp_next) {
if (bcmp(elem->mp_mac, mac, ETHERADDRL) == 0)
break;
prev = elem;
}
if (prev && elem) {
prev->mp_next = elem->mp_next;
kmem_free(elem, sizeof (eib_vhub_map_t));
}
tbl->tb_tusn = tusn;
tbl->tb_eport_state = eport_state;
mutex_exit(&tbl->tb_lock);
}
static eib_vhub_map_t *
eib_fip_get_vhub_map(void)
{
return (kmem_zalloc(sizeof (eib_vhub_map_t), KM_NOSLEEP));
}