/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#include <sys/byteorder.h>
/*
* Declarations private to this file
*/
uint_t, int *);
uint8_t);
uint8_t);
static eib_vhub_map_t *eib_fip_get_vhub_map(void);
/*
* Definitions private to this file
*/
const char eib_vendor_mellanox[] = {
0x4d, 0x65, 0x6c, 0x6c, 0x61, 0x6e, 0x6f, 0x78
};
/*
* The three requests to the gateway - request a vHUB table, request a
* vHUB update (aka keepalive) and vNIC logout - all need the same
* vnic identity descriptor to be sent with different flag settings.
*
* vHUB table: R=1, U=0, TUSN=last, subcode=KEEPALIVE
* vNIC logout: R=0, U=0, TUSN=0, subcode=LOGOUT
*/
int
{
int ret;
int ntries = 0;
do {
"no swqe available, not sending "
"vnic login request");
return (EIB_E_FAILURE);
}
if (ret != EIB_E_SUCCESS) {
return (EIB_E_FAILURE);
}
if (ret != EIB_E_SUCCESS) {
return (EIB_E_FAILURE);
}
if (ret == EIB_E_SUCCESS)
break;
return (ret);
}
int
{
int ret;
int ntries = 0;
do {
"no swqe available, not sending "
"vhub table request");
return (EIB_E_FAILURE);
}
if (ret != EIB_E_SUCCESS) {
return (EIB_E_FAILURE);
}
if (ret != EIB_E_SUCCESS) {
return (EIB_E_FAILURE);
}
if (ret == EIB_E_SUCCESS) {
return (EIB_E_SUCCESS);
}
/*
* If we'd failed in constructing a proper vhub table above,
* the vnic login state would be set to EIB_LOGIN_TBL_FAILED.
* We need to clean up any pending entries from the vhub
* table and vhub update structures and reset the vnic state
* to EIB_LOGIN_ACK_RCVD before we can try again.
*/
return (EIB_E_FAILURE);
}
int
{
int ntries = 0;
int ret;
/*
* Even if we're running low on the wqe resource, we want to be
* able to grab a wqe to send the keepalive, to avoid getting
* logged out by the gateway, so we use EIB_WPRI_HI.
*/
"no swqe available, not sending heartbeat");
return (EIB_E_FAILURE);
}
while (ntries++ < EIB_MAX_KA_ATTEMPTS) {
if (ret != EIB_E_SUCCESS)
continue;
if (ret == EIB_E_SUCCESS)
break;
}
if (ret != EIB_E_SUCCESS)
return (ret);
}
int
{
int ret;
/*
* This routine is only called after the vnic has successfully
* logged in to the gateway. If that's really the case, there
* is nothing in terms of resources we need to release: the swqe
* that was acquired during login has already been posted, the
* work has been completed and the swqe has also been reaped back
* into the free pool. The only thing we need to rollback is the
* fact that we're logged in to the gateway at all -- and the way
* to do this is to send a logout request.
*/
"no swqe available, not sending logout");
return (EIB_E_FAILURE);
}
if (ret != EIB_E_SUCCESS) {
return (EIB_E_FAILURE);
}
if (ret != EIB_E_SUCCESS) {
return (EIB_E_FAILURE);
}
return (EIB_E_SUCCESS);
}
int
{
/*
* Note that 'pkt' is always atleast double-word aligned
* when it is passed to us, so we can cast it without any
* problems.
*/
/*
* Verify that the opcode is EoIB
*/
"unsupported opcode 0x%x in login ack, ignoring",
opcode);
return (EIB_E_FAILURE);
}
/*
* The admin qp in the EoIB driver should receive only the login
* acknowledgements
*/
if (subcode != FIP_SUBCODE_G_VNIC_LOGIN_ACK) {
"unexpected subcode 0x%x received by adm qp, ignoring",
subcode);
return (EIB_E_FAILURE);
}
/*
* Verify if the descriptor list length in the received packet is
* valid if the workaround to disable it explicitly is absent.
*/
if (!eib_wa_no_desc_list_len) {
if (pkt_data_sz < sizeof (fip_login_ack_t)) {
"eib_fip_parse_login_ack: "
"login ack desc list len (0x%lx) too small "
"(min 0x%lx)",
pkt_data_sz, sizeof (fip_login_ack_t));
return (EIB_E_FAILURE);
}
}
/*
* Validate all the header and descriptor types and lengths
*/
"got (0x%x,0x%x)", FIP_DESC_TYPE_VENDOR_ID,
return (EIB_E_FAILURE);
}
return (EIB_E_FAILURE);
}
"got (0x%x,0x%x)", FIP_DESC_TYPE_VNIC_LOGIN,
return (EIB_E_FAILURE);
}
"got (0x%x,0x%x)", FIP_DESC_TYPE_PARTITION,
return (EIB_E_FAILURE);
}
/*
* Note that we'll return the vnic id as-is. The msb is not actually
* part of the vnic id in our internal records, so we'll mask it out
* later before we do our searches.
*/
/*
* If the syndrome indicates a nack, we're done. No need to collect
* any more information
*/
if (ld->ld_syndrome) {
return (EIB_E_SUCCESS);
}
/*
* Let's get the rest of the information out of the login ack
*/
return (EIB_E_SUCCESS);
}
int
{
/*
* Note that 'pkt' is always atleast double-word aligned when it is
* passed to us, so we can cast it without any problems.
*/
/*
* Verify that the opcode is EoIB
*/
"unsupported opcode 0x%x in ctl pkt, ignoring",
opcode);
return (EIB_E_FAILURE);
}
/*
* The ctl qp in the EoIB driver should receive only vHUB messages
*/
if (subcode == FIP_SUBCODE_G_VHUB_UPDATE) {
if (vnic_state != EIB_LOGIN_TBL_WAIT &&
vnic_state != EIB_LOGIN_TBL_DONE &&
vnic_state != EIB_LOGIN_DONE) {
"eib_fip_parse_ctl_pkt: unexpected vnic state "
"(0x%lx) for subcode (VHUB_UPDATE 0x%x)",
return (EIB_E_FAILURE);
}
} else if (subcode == FIP_SUBCODE_G_VHUB_TABLE) {
if ((vnic_state != EIB_LOGIN_TBL_WAIT) &&
(vnic_state != EIB_LOGIN_TBL_INPROG)) {
"eib_fip_parse_ctl_pkt: unexpected vnic state "
"(0x%lx) for subcode (VHUB_TABLE 0x%x)",
return (EIB_E_FAILURE);
}
} else {
"unexpected subcode 0x%x for ctl pkt", subcode);
}
if (ret == EIB_E_SUCCESS) {
/*
* Update last gateway heartbeat received time and
* gateway eport state. The eport state should only
* be updated if the vnic's vhub table has been fully
* constructed.
*/
if (vnic_state == EIB_LOGIN_TBL_DONE ||
vnic_state == EIB_LOGIN_DONE) {
}
}
return (ret);
}
static int
{
"send buffer size (0x%lx) too small to send"
"login request (min 0x%lx)",
return (EIB_E_FAILURE);
}
/*
* Lint complains that there may be an alignment issue here,
* but we know that the "pkt" is atleast double-word aligned,
* so it's ok.
*/
/*
* Fill in the FIP protocol version
*/
/*
* Fill in the basic header
*/
/*
* Fill in the Infiniband Address descriptor
*/
/*
* Now, fill in the vNIC Login descriptor
*/
/*
* Only for the physlink instance 0, we ask the gateway to assign
* the mac address and a VLAN (tagless, actually). For this vnic
* only, we do not set the H bit. All other vnics are created by
* Solaris admin and will have the H bit set. Note also that we
* need to clear the vnic id's most significant bit for those that
* are administered by the gateway, so vnic0's vnic_id's msb should
* be 0 as well.
*/
if (vnic->vn_instance == 0) {
} else {
}
/*
* We aren't ready to enable rss, so we set the RSS bit and
* the n_rss_mcgid field to 0. Set the mac mcgid to 0 as well.
*/
vlg->vl_flags_rss = 0;
vlg->vl_n_mac_mcgid = 0;
/*
* Set the syndrome to 0 and pass the control qpn
*/
/*
* Try to set as unique a name as possible for this vnic
*/
/*
* Adjust the ds_len in the sgl to indicate the size of this
* request before returning
*/
return (EIB_E_SUCCESS);
}
static int
int *err)
{
"send buffer size (0x%lx) too small to send"
return (EIB_E_FAILURE);
}
/*
* Lint complains that there may be an alignment issue here,
* but we know that the "pkt" is atleast double-word aligned,
* so it's ok.
*/
/*
* Fill in the FIP protocol version
*/
/*
* Fill in the basic header
*/
/*
* Fill in the vNIC Identity descriptor
*/
}
if (req == EIB_UPD_REQ_TABLE) {
} else if (req == EIB_UPD_REQ_KA) {
}
/*
* Adjust the ds_len in the sgl to indicate the size of this
* request before returning
*/
return (EIB_E_SUCCESS);
}
static int
{
}
static int
{
}
static int
{
}
static int
{
/*
* Get an address vector for this destination
*/
"eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed",
return (EIB_E_FAILURE);
}
/*
* Modify the UD destination handle to the gateway
*/
if (ret != IBT_SUCCESS) {
"ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, "
EIB_FIP_QKEY, ret);
return (EIB_E_FAILURE);
}
/*
* Send the login packet to the destination gateway. Posting
* the login and setting the login state to wait-for-ack should
* ideally be atomic to avoid race.
*/
if (ret != IBT_SUCCESS) {
"ibt_post_send() failed for vnic id 0x%x, ret=%d",
return (EIB_E_FAILURE);
}
chan->ch_tx_posted++;
return (EIB_E_SUCCESS);
}
static int
{
/*
* Get an address vector for this destination
*/
"eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed",
return (EIB_E_FAILURE);
}
/*
* Modify the UD destination handle to the destination appropriately
*/
if (ret != IBT_SUCCESS) {
"ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, "
return (EIB_E_FAILURE);
}
/*
* Send the update packet to the destination. Posting the update request
* and setting the login state to wait-for-vhub_table needs to be atomic
* to avoid race.
*/
if (ret != IBT_SUCCESS) {
"ibt_post_send() failed for vnic id 0x%x, ret=%d",
return (EIB_E_FAILURE);
}
chan->ch_tx_posted++;
return (EIB_E_SUCCESS);
}
static int
{
}
static int
{
}
static int
{
}
static int
{
uint_t i;
/*
* If we're here receiving vhub table messages, we certainly should
* have the vhub table structure allocated and present at this point.
*/
return (EIB_E_FAILURE);
}
/*
* Note that 'pkt' is always atleast double-word aligned when it is
* passed to us, so we can cast it without any problems.
*/
/*
* Validate all the header and descriptor types and lengths
*/
"exp (0x%x,0x%x), got (0x%x,0x%x)",
return (EIB_E_FAILURE);
}
"invalid type in vhub desc, exp 0x%x, got 0x%x",
return (EIB_E_FAILURE);
}
/*
* Verify that the vhub id is ok for this vnic
*/
"invalid vhub id in vhub table pkt: exp 0x%x, got 0x%x",
return (EIB_E_FAILURE);
}
/*
* Count the number of vhub table entries in this packet
*/
/*
* While we're here, also compute the 32-bit 2's complement carry-
* discarded checksum of the vHUB table descriptor in this packet
* till the first vhub table entry.
*/
for (i = 0; i < FIP_DESC_VHUB_TABLE_WORDS; i++)
init_checksum += ipkt[i];
/*
* Initialize the vhub's Table Update Sequence Number (tusn),
* checksum and record the total number of entries in in the table
* if this is the first pkt of the table.
*/
etbl->tb_checksum = 0;
}
/*
* First, middle or last, the current table TUSN we have must match this
* packet's TUSN.
*/
"unexpected TUSN (0x%lx) during vhub table construction, "
goto vhub_table_fail;
}
/*
*/
"vhub table overrun, total_exp=%d, so_far=%d, this_pkt=%d",
goto vhub_table_fail;
etbl->tb_entries_in_table) &&
"vhub table underrun, total_exp=%d, so_far=%d, last_pkt=%d",
goto vhub_table_fail;
}
/*
* Process and add the entries we have in this packet
*/
entry = (fip_vhub_table_entry_t *)(void *)
/*
* Allocate a eib_vhub_map_t, copy the current entry details
* and chain it to the appropriate queue.
*/
"eib_fip_parse_vhub_table: no memory for vhub "
"table entry, ignoring this vhub table packet");
goto vhub_table_fail;
}
/*
* The vhub table messages do not provide status on eport
* state, so we'll simply assume that the eport is up.
*/
/*
* Update table checksum with this entry's computed checksum
*/
for (i = 0; i < FIP_VHUB_TABLE_ENTRY_WORDS; i++)
}
/*
* If this is the last packet of this vhub table, complete vhub
* table by verifying checksum and applying all the vhub updates
* that may have come in while we were constructing this table.
*/
if (!eib_wa_no_good_vhub_cksum) {
"eib_fip_parse_vhub_table: "
"vhub table checksum invalid, "
"computed=0x%lx, found=0x%lx",
}
}
/*
* Per the EoIB specification, the gateway is supposed to
* include its address information for data messages in the
* vhub table. But we've observed that it doesn't do this
* (with the current version). If this is the case, we'll
* hand-create and add a vhub map for the gateway from the
* information we got in login ack.
*/
/*
* Apply pending vhub updates and reset table counters needed
* during table construction.
*/
goto vhub_table_fail;
etbl->tb_entries_seen = 0;
etbl->tb_entries_in_table = 0;
}
return (EIB_E_SUCCESS);
return (EIB_E_FAILURE);
}
static int
{
/*
* We should have the vhub table allocated as long as we're receiving
* vhub control messages.
*/
return (EIB_E_FAILURE);
}
/*
* Note that 'pkt' is always atleast double-word aligned when it is
* passed to us, so we can cast it without any problems.
*/
/*
* Validate all the header and descriptor types and lengths
*/
"exp (0x%x,0x%x), got (0x%x,0x%x)",
return (EIB_E_FAILURE);
}
"exp (0x%x,0x%x), got (0x%x,0x%x)",
return (EIB_E_FAILURE);
}
/*
* Verify that the vhub id is ok for this vnic and save the eport state
*/
"invalid vhub id in vhub update pkt: exp 0x%x, got 0x%x",
return (EIB_E_FAILURE);
}
/*
* If this is the first update we receive, any tusn is ok. Otherwise,
* make sure the tusn we see in the packet is appropriate.
*/
if (prev_tusn != 0) {
return (EIB_E_SUCCESS);
}
"eib_fip_parse_vhub_update: "
"out of order TUSN received (exp 0x%lx, "
return (EIB_E_FAILURE);
}
}
/*
* EoIB expects only type 0 (vnic address) entries to maintain the
* context table
*/
/*
* If the vHUB table has already been fully constructed and if we've
* now received a notice to remove a vnic entry from it, do it.
*/
if ((vhub_tbl_done) &&
tusn, eport_state);
"eib_fip_parse_vhub_update: "
"vhub update pkt received to kill self "
return (EIB_E_FAILURE);
}
return (EIB_E_SUCCESS);
}
/*
* Otherwise, allocate a new eib_vhub_map_t and fill it in with
* the details of the new entry
*/
"no memory for vhub update entry, will be ignoring"
"this vhub update packet");
return (EIB_E_FAILURE);
}
/*
* Update the full vhub table or chain it to the list of pending
* updates depending on if the vhub table construction is over
* or not.
*/
if (vhub_tbl_done) {
} else {
}
return (EIB_E_SUCCESS);
}
static void
{
if (tbl_done) {
"eib_fip_update_eport_state: "
"eport state changing from %d to %d",
}
} else {
"eib_fip_update_eport_state: "
"eport state changing from %d to %d",
}
}
}
static void
{
case FIP_TE_TYPE_GATEWAY:
if (tbl->tb_gateway) {
sizeof (eib_vhub_map_t));
}
break;
case FIP_TE_TYPE_UNICAST_MISS:
if (tbl->tb_unicast_miss) {
sizeof (eib_vhub_map_t));
}
break;
if (tbl->tb_vhub_multicast) {
sizeof (eib_vhub_map_t));
}
break;
/*
* If multicast entry types are not to be specially
* processed, treat them like regular vnic addresses.
*/
if (!eib_wa_no_mcast_entries) {
break;
}
/*FALLTHROUGH*/
case FIP_TE_TYPE_VNIC:
break;
}
}
static void
{
/*
* control cq handler is parsing either update or table message,
* or by the table cleanup routine when we aren't attached to any
* control mcgs. Bottom line is that this list traversal is always
* single-threaded and we could probably do away with the lock.
*/
break;
}
if (tail) {
} else {
}
}
static void
{
"no memory to queue gw entry, transactions could fail");
return;
}
}
static int
{
/*
* Take the update list out
*/
/*
*/
}
return (EIB_E_SUCCESS);
/*
* If we missed any updates between table tusn and the first
* update tusn we got, we need to fail.
*/
"vhub update missed tusn(s), expected=0x%lx, got=0x%lx",
}
return (EIB_E_FAILURE);
}
/*
* If everything is fine, apply all the updates we received
*/
} else {
}
}
return (EIB_E_SUCCESS);
}
static void
{
/*
* Note that for EoIB, the vhub table is maintained using only
* vnic entry updates
*/
break;
}
}
}
static eib_vhub_map_t *
eib_fip_get_vhub_map(void)
{
}