hermon.c revision dd9e16da4243358c2e9251a4ca5d50f56e0adc68
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* hermon.c
* Hermon (InfiniBand) HCA Driver attach/detach Routines
*
* Implements all the routines necessary for the attach, setup,
* initialization (and subsequent possible teardown and detach) of the
* Hermon InfiniBand HCA driver.
*/
#include <sys/types.h>
#include <sys/file.h>
#include <sys/open.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <sys/stat.h>
#include <sys/pci.h>
#include <sys/pci_cap.h>
#include <sys/bitmap.h>
#include <sys/policy.h>
#include <sys/ib/adapters/hermon/hermon.h>
/* The following works around a problem in pre-2_7_000 firmware. */
#define HERMON_FW_WORKAROUND
int hermon_verbose = 0;
/* Hermon HCA State Pointer */
void *hermon_statep;
int debug_vpd = 0;
/* Disable the internal error-check polling thread */
int hermon_no_inter_err_chk = 0;
/*
* The Hermon "userland resource database" is common to instances of the
* Hermon HCA driver. This structure "hermon_userland_rsrc_db" contains all
* the necessary information to maintain it.
*/
hermon_umap_db_t hermon_userland_rsrc_db;
static int hermon_attach(dev_info_t *, ddi_attach_cmd_t);
static int hermon_detach(dev_info_t *, ddi_detach_cmd_t);
static int hermon_open(dev_t *, int, int, cred_t *);
static int hermon_close(dev_t, int, int, cred_t *);
static int hermon_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
static int hermon_drv_init(hermon_state_t *state, dev_info_t *dip,
int instance);
static void hermon_drv_fini(hermon_state_t *state);
static void hermon_drv_fini2(hermon_state_t *state);
static int hermon_isr_init(hermon_state_t *state);
static void hermon_isr_fini(hermon_state_t *state);
static int hermon_hw_init(hermon_state_t *state);
static void hermon_hw_fini(hermon_state_t *state,
hermon_drv_cleanup_level_t cleanup);
static int hermon_soft_state_init(hermon_state_t *state);
static void hermon_soft_state_fini(hermon_state_t *state);
static int hermon_icm_config_setup(hermon_state_t *state,
hermon_hw_initqueryhca_t *inithca);
static void hermon_icm_tables_init(hermon_state_t *state);
static void hermon_icm_tables_fini(hermon_state_t *state);
static int hermon_icm_dma_init(hermon_state_t *state);
static void hermon_icm_dma_fini(hermon_state_t *state);
static void hermon_inithca_set(hermon_state_t *state,
hermon_hw_initqueryhca_t *inithca);
static int hermon_hca_port_init(hermon_state_t *state);
static int hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init);
static int hermon_internal_uarpg_init(hermon_state_t *state);
static void hermon_internal_uarpg_fini(hermon_state_t *state);
static int hermon_special_qp_contexts_reserve(hermon_state_t *state);
static void hermon_special_qp_contexts_unreserve(hermon_state_t *state);
static int hermon_sw_reset(hermon_state_t *state);
static int hermon_mcg_init(hermon_state_t *state);
static void hermon_mcg_fini(hermon_state_t *state);
static int hermon_fw_version_check(hermon_state_t *state);
static void hermon_device_info_report(hermon_state_t *state);
static int hermon_pci_capability_list(hermon_state_t *state,
ddi_acc_handle_t hdl);
static void hermon_pci_capability_vpd(hermon_state_t *state,
ddi_acc_handle_t hdl, uint_t offset);
static int hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset,
uint32_t addr, uint32_t *data);
static int hermon_intr_or_msi_init(hermon_state_t *state);
static int hermon_add_intrs(hermon_state_t *state, int intr_type);
static int hermon_intr_or_msi_fini(hermon_state_t *state);
void hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
uint_t offset);
static uint64_t hermon_size_icm(hermon_state_t *state);
/* X86 fastreboot support */
static ushort_t get_msix_ctrl(dev_info_t *);
static size_t get_msix_tbl_size(dev_info_t *);
static size_t get_msix_pba_size(dev_info_t *);
static void hermon_set_msix_info(hermon_state_t *);
static int hermon_intr_disable(hermon_state_t *);
static int hermon_quiesce(dev_info_t *);
/* Character/Block Operations */
static struct cb_ops hermon_cb_ops = {
hermon_open, /* open */
hermon_close, /* close */
nodev, /* strategy (block) */
nodev, /* print (block) */
nodev, /* dump (block) */
nodev, /* read */
nodev, /* write */
hermon_ioctl, /* ioctl */
hermon_devmap, /* devmap */
NULL, /* mmap */
nodev, /* segmap */
nochpoll, /* chpoll */
ddi_prop_op, /* prop_op */
NULL, /* streams */
D_NEW | D_MP |
D_64BIT | /* D_HOTPLUG | */
D_DEVMAP, /* flags */
CB_REV /* rev */
};
/* Driver Operations */
static struct dev_ops hermon_ops = {
DEVO_REV, /* struct rev */
0, /* refcnt */
hermon_getinfo, /* getinfo */
nulldev, /* identify */
nulldev, /* probe */
hermon_attach, /* attach */
hermon_detach, /* detach */
nodev, /* reset */
&hermon_cb_ops, /* cb_ops */
NULL, /* bus_ops */
nodev, /* power */
hermon_quiesce, /* devo_quiesce */
};
/* Module Driver Info */
static struct modldrv hermon_modldrv = {
&mod_driverops,
"ConnectX IB Driver",
&hermon_ops
};
/* Module Linkage */
static struct modlinkage hermon_modlinkage = {
MODREV_1,
&hermon_modldrv,
NULL
};
/*
* This extern refers to the ibc_operations_t function vector that is defined
* in the hermon_ci.c file.
*/
extern ibc_operations_t hermon_ibc_ops;
/*
* _init()
*/
int
_init()
{
int status;
status = ddi_soft_state_init(&hermon_statep, sizeof (hermon_state_t),
(size_t)HERMON_INITIAL_STATES);
if (status != 0) {
return (status);
}
status = ibc_init(&hermon_modlinkage);
if (status != 0) {
ddi_soft_state_fini(&hermon_statep);
return (status);
}
status = mod_install(&hermon_modlinkage);
if (status != 0) {
ibc_fini(&hermon_modlinkage);
ddi_soft_state_fini(&hermon_statep);
return (status);
}
/* Initialize the Hermon "userland resources database" */
hermon_umap_db_init();
return (status);
}
/*
* _info()
*/
int
_info(struct modinfo *modinfop)
{
int status;
status = mod_info(&hermon_modlinkage, modinfop);
return (status);
}
/*
* _fini()
*/
int
_fini()
{
int status;
status = mod_remove(&hermon_modlinkage);
if (status != 0) {
return (status);
}
/* Destroy the Hermon "userland resources database" */
hermon_umap_db_fini();
ibc_fini(&hermon_modlinkage);
ddi_soft_state_fini(&hermon_statep);
return (status);
}
/*
* hermon_getinfo()
*/
/* ARGSUSED */
static int
hermon_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
{
dev_t dev;
hermon_state_t *state;
minor_t instance;
switch (cmd) {
case DDI_INFO_DEVT2DEVINFO:
dev = (dev_t)arg;
instance = HERMON_DEV_INSTANCE(dev);
state = ddi_get_soft_state(hermon_statep, instance);
if (state == NULL) {
return (DDI_FAILURE);
}
*result = (void *)state->hs_dip;
return (DDI_SUCCESS);
case DDI_INFO_DEVT2INSTANCE:
dev = (dev_t)arg;
instance = HERMON_DEV_INSTANCE(dev);
*result = (void *)(uintptr_t)instance;
return (DDI_SUCCESS);
default:
break;
}
return (DDI_FAILURE);
}
/*
* hermon_open()
*/
/* ARGSUSED */
static int
hermon_open(dev_t *devp, int flag, int otyp, cred_t *credp)
{
hermon_state_t *state;
hermon_rsrc_t *rsrcp;
hermon_umap_db_entry_t *umapdb, *umapdb2;
minor_t instance;
uint64_t key, value;
uint_t hr_indx;
dev_t dev;
int status;
instance = HERMON_DEV_INSTANCE(*devp);
state = ddi_get_soft_state(hermon_statep, instance);
if (state == NULL) {
return (ENXIO);
}
/*
* Only allow driver to be opened for character access, and verify
* whether exclusive access is allowed.
*/
if ((otyp != OTYP_CHR) || ((flag & FEXCL) &&
secpolicy_excl_open(credp) != 0)) {
return (EINVAL);
}
/*
* Search for the current process PID in the "userland resources
* database". If it is not found, then attempt to allocate a UAR
* page and add the ("key", "value") pair to the database.
* Note: As a last step we always return a devp appropriate for
* the open. Either we return a new minor number (based on the
* instance and the UAR page index) or we return the current minor
* number for the given client process.
*
* We also add an entry to the database to allow for lookup from
* "dev_t" to the current process PID. This is necessary because,
* under certain circumstance, the process PID that calls the Hermon
* close() entry point may not be the same as the one who called
* open(). Specifically, this can happen if a child process calls
* the Hermon's open() entry point, gets a UAR page, maps it out (using
* mmap()), and then exits without calling munmap(). Because mmap()
* adds a reference to the file descriptor, at the exit of the child
* process the file descriptor is "inherited" by the parent (and will
* be close()'d by the parent's PID only when it exits).
*
* Note: We use the hermon_umap_db_find_nolock() and
* hermon_umap_db_add_nolock() database access routines below (with
* an explicit mutex_enter of the database lock - "hdl_umapdb_lock")
* to ensure that the multiple accesses (in this case searching for,
* and then adding _two_ database entries) can be done atomically.
*/
key = ddi_get_pid();
mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
status = hermon_umap_db_find_nolock(instance, key,
MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
if (status != DDI_SUCCESS) {
/*
* If we are in 'maintenance mode', we cannot alloc a UAR page.
* But we still need some rsrcp value, and a mostly unique
* hr_indx value. So we set rsrcp to NULL for maintenance
* mode, and use a rolling count for hr_indx. The field
* 'hs_open_hr_indx' is used only in this maintenance mode
* condition.
*
* Otherwise, if we are in operational mode then we allocate
* the UAR page as normal, and use the rsrcp value and tr_indx
* value from that allocation.
*/
if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
rsrcp = NULL;
hr_indx = state->hs_open_ar_indx++;
} else {
/* Allocate a new UAR page for this process */
status = hermon_rsrc_alloc(state, HERMON_UARPG, 1,
HERMON_NOSLEEP, &rsrcp);
if (status != DDI_SUCCESS) {
mutex_exit(
&hermon_userland_rsrc_db.hdl_umapdb_lock);
return (EAGAIN);
}
hr_indx = rsrcp->hr_indx;
}
/*
* Allocate an entry to track the UAR page resource in the
* "userland resources database".
*/
umapdb = hermon_umap_db_alloc(instance, key,
MLNX_UMAP_UARPG_RSRC, (uint64_t)(uintptr_t)rsrcp);
if (umapdb == NULL) {
mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
/* If in "maintenance mode", don't free the rsrc */
if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
hermon_rsrc_free(state, &rsrcp);
}
return (EAGAIN);
}
/*
* Create a new device number. Minor number is a function of
* the UAR page index (15 bits) and the device instance number
* (3 bits).
*/
dev = makedevice(getmajor(*devp), (hr_indx <<
HERMON_MINORNUM_SHIFT) | instance);
/*
* Allocate another entry in the "userland resources database"
* to track the association of the device number (above) to
* the current process ID (in "key").
*/
umapdb2 = hermon_umap_db_alloc(instance, dev,
MLNX_UMAP_PID_RSRC, (uint64_t)key);
if (umapdb2 == NULL) {
mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
hermon_umap_db_free(umapdb);
/* If in "maintenance mode", don't free the rsrc */
if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
hermon_rsrc_free(state, &rsrcp);
}
return (EAGAIN);
}
/* Add the entries to the database */
hermon_umap_db_add_nolock(umapdb);
hermon_umap_db_add_nolock(umapdb2);
} else {
/*
* Return the same device number as on the original open()
* call. This was calculated as a function of the UAR page
* index (top 16 bits) and the device instance number
*/
rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
dev = makedevice(getmajor(*devp), (rsrcp->hr_indx <<
HERMON_MINORNUM_SHIFT) | instance);
}
mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
*devp = dev;
return (0);
}
/*
* hermon_close()
*/
/* ARGSUSED */
static int
hermon_close(dev_t dev, int flag, int otyp, cred_t *credp)
{
hermon_state_t *state;
hermon_rsrc_t *rsrcp;
hermon_umap_db_entry_t *umapdb;
hermon_umap_db_priv_t *priv;
minor_t instance;
uint64_t key, value;
int status, reset_status = 0;
instance = HERMON_DEV_INSTANCE(dev);
state = ddi_get_soft_state(hermon_statep, instance);
if (state == NULL) {
return (ENXIO);
}
/*
* Search for "dev_t" in the "userland resources database". As
* explained above in hermon_open(), we can't depend on using the
* current process ID here to do the lookup because the process
* that ultimately closes may not be the same one who opened
* (because of inheritance).
* So we lookup the "dev_t" (which points to the PID of the process
* that opened), and we remove the entry from the database (and free
* it up). Then we do another query based on the PID value. And when
* we find that database entry, we free it up too and then free the
* Hermon UAR page resource.
*
* Note: We use the hermon_umap_db_find_nolock() database access
* routine below (with an explicit mutex_enter of the database lock)
* to ensure that the multiple accesses (which attempt to remove the
* two database entries) can be done atomically.
*
* This works the same in both maintenance mode and HCA mode, except
* for the call to hermon_rsrc_free(). In the case of maintenance mode,
* this call is not needed, as it was not allocated in hermon_open()
* above.
*/
key = dev;
mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
status = hermon_umap_db_find_nolock(instance, key, MLNX_UMAP_PID_RSRC,
&value, HERMON_UMAP_DB_REMOVE, &umapdb);
if (status == DDI_SUCCESS) {
/*
* If the "hdb_priv" field is non-NULL, it indicates that
* some "on close" handling is still necessary. Call
* hermon_umap_db_handle_onclose_cb() to do the handling (i.e.
* to invoke all the registered callbacks). Then free up
* the resources associated with "hdb_priv" and continue
* closing.
*/
priv = (hermon_umap_db_priv_t *)umapdb->hdbe_common.hdb_priv;
if (priv != NULL) {
reset_status = hermon_umap_db_handle_onclose_cb(priv);
kmem_free(priv, sizeof (hermon_umap_db_priv_t));
umapdb->hdbe_common.hdb_priv = (void *)NULL;
}
hermon_umap_db_free(umapdb);
/*
* Now do another lookup using PID as the key (copy it from
* "value"). When this lookup is complete, the "value" field
* will contain the hermon_rsrc_t pointer for the UAR page
* resource.
*/
key = value;
status = hermon_umap_db_find_nolock(instance, key,
MLNX_UMAP_UARPG_RSRC, &value, HERMON_UMAP_DB_REMOVE,
&umapdb);
if (status == DDI_SUCCESS) {
hermon_umap_db_free(umapdb);
/* If in "maintenance mode", don't free the rsrc */
if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
hermon_rsrc_free(state, &rsrcp);
}
}
}
mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
return (reset_status);
}
/*
* hermon_attach()
* Context: Only called from attach() path context
*/
static int
hermon_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
hermon_state_t *state;
ibc_clnt_hdl_t tmp_ibtfpriv;
ibc_status_t ibc_status;
int instance;
int status;
#ifdef __lock_lint
(void) hermon_quiesce(dip);
#endif
switch (cmd) {
case DDI_ATTACH:
instance = ddi_get_instance(dip);
status = ddi_soft_state_zalloc(hermon_statep, instance);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
"attach_ssz_fail", instance);
goto fail_attach_nomsg;
}
state = ddi_get_soft_state(hermon_statep, instance);
if (state == NULL) {
ddi_soft_state_free(hermon_statep, instance);
cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
"attach_gss_fail", instance);
goto fail_attach_nomsg;
}
/* clear the attach error buffer */
HERMON_ATTACH_MSG_INIT(state->hs_attach_buf);
/* Save away devinfo and instance before hermon_fm_init() */
state->hs_dip = dip;
state->hs_instance = instance;
hermon_fm_init(state);
/*
* Initialize Hermon driver and hardware.
*
* Note: If this initialization fails we may still wish to
* create a device node and remain operational so that Hermon
* firmware can be updated/flashed (i.e. "maintenance mode").
* If this is the case, then "hs_operational_mode" will be
* equal to HERMON_MAINTENANCE_MODE. We will not attempt to
* attach to the IBTF or register with the IBMF (i.e. no
* InfiniBand interfaces will be enabled).
*/
status = hermon_drv_init(state, dip, instance);
if ((status != DDI_SUCCESS) &&
(HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
goto fail_attach;
}
/*
* Change the Hermon FM mode
*/
if ((hermon_get_state(state) & HCA_PIO_FM) &&
HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
/*
* Now we wait for 50ms to give an opportunity
* to Solaris FMA so that HW errors can be notified.
* Then check if there are HW errors or not. If
* a HW error is detected, the Hermon attachment
* must be failed.
*/
delay(drv_usectohz(50000));
if (hermon_init_failure(state)) {
hermon_drv_fini(state);
HERMON_WARNING(state, "unable to "
"attach Hermon due to a HW error");
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hermon_attach_failure");
goto fail_attach;
}
/*
* There seems no HW errors during the attachment,
* so let's change the Hermon FM state to the
* ereport only mode.
*/
if (hermon_fm_ereport_init(state) != DDI_SUCCESS) {
/* unwind the resources */
hermon_drv_fini(state);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hermon_attach_failure");
goto fail_attach;
}
}
/* Create the minor node for device */
status = ddi_create_minor_node(dip, "devctl", S_IFCHR, instance,
DDI_PSEUDO, 0);
if (status != DDI_SUCCESS) {
hermon_drv_fini(state);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"attach_create_mn_fail");
goto fail_attach;
}
/*
* If we are in "maintenance mode", then we don't want to
* register with the IBTF. All InfiniBand interfaces are
* uninitialized, and the device is only capable of handling
* requests to update/flash firmware (or test/debug requests).
*/
if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
cmn_err(CE_NOTE, "!Hermon is operational\n");
/* Attach to InfiniBand Transport Framework (IBTF) */
ibc_status = ibc_attach(&tmp_ibtfpriv,
&state->hs_ibtfinfo);
if (ibc_status != IBC_SUCCESS) {
cmn_err(CE_CONT, "hermon_attach: ibc_attach "
"failed\n");
ddi_remove_minor_node(dip, "devctl");
hermon_drv_fini(state);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"attach_ibcattach_fail");
goto fail_attach;
}
/*
* Now that we've successfully attached to the IBTF,
* we enable all appropriate asynch and CQ events to
* be forwarded to the IBTF.
*/
HERMON_ENABLE_IBTF_CALLB(state, tmp_ibtfpriv);
ibc_post_attach(state->hs_ibtfpriv);
/* Register agents with IB Mgmt Framework (IBMF) */
status = hermon_agent_handlers_init(state);
if (status != DDI_SUCCESS) {
(void) ibc_pre_detach(tmp_ibtfpriv, DDI_DETACH);
HERMON_QUIESCE_IBTF_CALLB(state);
if (state->hs_in_evcallb != 0) {
HERMON_WARNING(state, "unable to "
"quiesce Hermon IBTF callbacks");
}
ibc_detach(tmp_ibtfpriv);
ddi_remove_minor_node(dip, "devctl");
hermon_drv_fini(state);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"attach_agentinit_fail");
goto fail_attach;
}
}
/* Report attach in maintenance mode, if appropriate */
if (!(HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
cmn_err(CE_NOTE, "hermon%d: driver attached "
"(for maintenance mode only)", state->hs_instance);
hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_DEGRADED);
}
/* Report that driver was loaded */
ddi_report_dev(dip);
/* Send device information to log file */
hermon_device_info_report(state);
/* DEBUG PRINT */
cmn_err(CE_CONT, "!Hermon attach complete\n");
return (DDI_SUCCESS);
case DDI_RESUME:
/* Add code here for DDI_RESUME XXX */
return (DDI_FAILURE);
default:
cmn_err(CE_WARN, "hermon_attach: unknown cmd (0x%x)\n", cmd);
break;
}
fail_attach:
cmn_err(CE_NOTE, "hermon%d: driver failed to attach: %s", instance,
state->hs_attach_buf);
if (hermon_get_state(state) & HCA_EREPORT_FM) {
hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
}
hermon_drv_fini2(state);
hermon_fm_fini(state);
ddi_soft_state_free(hermon_statep, instance);
fail_attach_nomsg:
return (DDI_FAILURE);
}
/*
* hermon_detach()
* Context: Only called from detach() path context
*/
static int
hermon_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
hermon_state_t *state;
ibc_clnt_hdl_t tmp_ibtfpriv;
ibc_status_t ibc_status;
int instance, status;
instance = ddi_get_instance(dip);
state = ddi_get_soft_state(hermon_statep, instance);
if (state == NULL) {
return (DDI_FAILURE);
}
switch (cmd) {
case DDI_DETACH:
/*
* If we are in "maintenance mode", then we do not want to
* do teardown for any of the InfiniBand interfaces.
* Specifically, this means not detaching from IBTF (we never
* attached to begin with) and not deregistering from IBMF.
*/
if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
/* Unregister agents from IB Mgmt Framework (IBMF) */
status = hermon_agent_handlers_fini(state);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
/*
* Attempt the "pre-detach" from InfiniBand Transport
* Framework (IBTF). At this point the IBTF is still
* capable of handling incoming asynch and completion
* events. This "pre-detach" is primarily a mechanism
* to notify the appropriate IBTF clients that the
* HCA is being removed/offlined.
*/
ibc_status = ibc_pre_detach(state->hs_ibtfpriv, cmd);
if (ibc_status != IBC_SUCCESS) {
status = hermon_agent_handlers_init(state);
if (status != DDI_SUCCESS) {
HERMON_WARNING(state, "failed to "
"restart Hermon agents");
}
return (DDI_FAILURE);
}
/*
* Before we can fully detach from the IBTF we need to
* ensure that we have handled all outstanding event
* callbacks. This is accomplished by quiescing the
* event callback mechanism. Note: if we are unable
* to successfully quiesce the callbacks, then this is
* an indication that something has probably gone
* seriously wrong. We print out a warning, but
* continue.
*/
tmp_ibtfpriv = state->hs_ibtfpriv;
HERMON_QUIESCE_IBTF_CALLB(state);
if (state->hs_in_evcallb != 0) {
HERMON_WARNING(state, "unable to quiesce "
"Hermon IBTF callbacks");
}
/* Complete the detach from the IBTF */
ibc_detach(tmp_ibtfpriv);
}
/* Remove the minor node for device */
ddi_remove_minor_node(dip, "devctl");
/*
* Only call hermon_drv_fini() if we are in Hermon HCA mode.
* (Because if we are in "maintenance mode", then we never
* successfully finished init.) Only report successful
* detach for normal HCA mode.
*/
if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
/* Cleanup driver resources and shutdown hardware */
hermon_drv_fini(state);
cmn_err(CE_CONT, "!Hermon driver successfully "
"detached\n");
}
hermon_drv_fini2(state);
hermon_fm_fini(state);
ddi_soft_state_free(hermon_statep, instance);
return (DDI_SUCCESS);
case DDI_SUSPEND:
/* Add code here for DDI_SUSPEND XXX */
return (DDI_FAILURE);
default:
cmn_err(CE_WARN, "hermon_detach: unknown cmd (0x%x)\n", cmd);
break;
}
return (DDI_FAILURE);
}
/*
* hermon_dma_attr_init()
* Context: Can be called from interrupt or base context.
*/
/* ARGSUSED */
void
hermon_dma_attr_init(hermon_state_t *state, ddi_dma_attr_t *dma_attr)
{
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr))
dma_attr->dma_attr_version = DMA_ATTR_V0;
dma_attr->dma_attr_addr_lo = 0;
dma_attr->dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull;
dma_attr->dma_attr_count_max = 0xFFFFFFFFFFFFFFFFull;
dma_attr->dma_attr_align = HERMON_PAGESIZE; /* default 4K */
dma_attr->dma_attr_burstsizes = 0x3FF;
dma_attr->dma_attr_minxfer = 1;
dma_attr->dma_attr_maxxfer = 0xFFFFFFFFFFFFFFFFull;
dma_attr->dma_attr_seg = 0xFFFFFFFFFFFFFFFFull;
dma_attr->dma_attr_sgllen = 0x7FFFFFFF;
dma_attr->dma_attr_granular = 1;
dma_attr->dma_attr_flags = 0;
}
/*
* hermon_dma_alloc()
* Context: Can be called from base context.
*/
int
hermon_dma_alloc(hermon_state_t *state, hermon_dma_info_t *dma_info,
uint16_t opcode)
{
ddi_dma_handle_t dma_hdl;
ddi_dma_attr_t dma_attr;
ddi_acc_handle_t acc_hdl;
ddi_dma_cookie_t cookie;
uint64_t kaddr;
uint64_t real_len;
uint_t ccount;
int status;
hermon_dma_attr_init(state, &dma_attr);
/* Allocate a DMA handle */
status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, DDI_DMA_SLEEP,
NULL, &dma_hdl);
if (status != DDI_SUCCESS) {
IBTF_DPRINTF_L2("DMA", "alloc handle failed: %d", status);
cmn_err(CE_CONT, "DMA alloc handle failed(status %d)", status);
return (DDI_FAILURE);
}
/* Allocate DMA memory */
status = ddi_dma_mem_alloc(dma_hdl, dma_info->length,
&state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
(caddr_t *)&kaddr, (size_t *)&real_len, &acc_hdl);
if (status != DDI_SUCCESS) {
ddi_dma_free_handle(&dma_hdl);
IBTF_DPRINTF_L2("DMA", "memory alloc failed: %d", status);
cmn_err(CE_CONT, "DMA memory alloc failed(status %d)", status);
return (DDI_FAILURE);
}
bzero((caddr_t)(uintptr_t)kaddr, real_len);
/* Bind the memory to the handle */
status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
(caddr_t)(uintptr_t)kaddr, (size_t)real_len, DDI_DMA_RDWR |
DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &ccount);
if (status != DDI_SUCCESS) {
ddi_dma_mem_free(&acc_hdl);
ddi_dma_free_handle(&dma_hdl);
IBTF_DPRINTF_L2("DMA", "bind handle failed: %d", status);
cmn_err(CE_CONT, "DMA bind handle failed(status %d)", status);
return (DDI_FAILURE);
}
/* Package the hermon_dma_info contents and return */
dma_info->vaddr = kaddr;
dma_info->dma_hdl = dma_hdl;
dma_info->acc_hdl = acc_hdl;
/* Pass the mapping information to the firmware */
status = hermon_map_cmd_post(state, dma_info, opcode, cookie, ccount);
if (status != DDI_SUCCESS) {
char *s;
hermon_dma_free(dma_info);
switch (opcode) {
case MAP_ICM:
s = "MAP_ICM";
break;
case MAP_FA:
s = "MAP_FA";
break;
case MAP_ICM_AUX:
s = "MAP_ICM_AUX";
break;
default:
s = "UNKNOWN";
}
cmn_err(CE_NOTE, "Map cmd '%s' failed, status %08x\n",
s, status);
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* hermon_dma_free()
* Context: Can be called from base context.
*/
void
hermon_dma_free(hermon_dma_info_t *info)
{
/* Unbind the handles and free the memory */
(void) ddi_dma_unbind_handle(info->dma_hdl);
ddi_dma_mem_free(&info->acc_hdl);
ddi_dma_free_handle(&info->dma_hdl);
}
/* These macros are valid for use only in hermon_icm_alloc/hermon_icm_free. */
#define HERMON_ICM_ALLOC(rsrc) \
hermon_icm_alloc(state, rsrc, index1, index2)
#define HERMON_ICM_FREE(rsrc) \
hermon_icm_free(state, rsrc, index1, index2)
/*
* hermon_icm_alloc()
* Context: Can be called from base context.
*
* Only one thread can be here for a given hermon_rsrc_type_t "type".
*/
int
hermon_icm_alloc(hermon_state_t *state, hermon_rsrc_type_t type,
uint32_t index1, uint32_t index2)
{
hermon_icm_table_t *icm;
hermon_dma_info_t *dma_info;
uint8_t *bitmap;
int status;
if (hermon_verbose) {
IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: rsrc_type (0x%x) "
"index1/2 (0x%x/0x%x)", type, index1, index2);
}
icm = &state->hs_icm[type];
switch (type) {
case HERMON_QPC:
status = HERMON_ICM_ALLOC(HERMON_CMPT_QPC);
if (status != DDI_SUCCESS) {
return (status);
}
status = HERMON_ICM_ALLOC(HERMON_RDB);
if (status != DDI_SUCCESS) { /* undo icm_alloc's */
HERMON_ICM_FREE(HERMON_CMPT_QPC);
return (status);
}
status = HERMON_ICM_ALLOC(HERMON_ALTC);
if (status != DDI_SUCCESS) { /* undo icm_alloc's */
HERMON_ICM_FREE(HERMON_RDB);
HERMON_ICM_FREE(HERMON_CMPT_QPC);
return (status);
}
status = HERMON_ICM_ALLOC(HERMON_AUXC);
if (status != DDI_SUCCESS) { /* undo icm_alloc's */
HERMON_ICM_FREE(HERMON_ALTC);
HERMON_ICM_FREE(HERMON_RDB);
HERMON_ICM_FREE(HERMON_CMPT_QPC);
return (status);
}
break;
case HERMON_SRQC:
status = HERMON_ICM_ALLOC(HERMON_CMPT_SRQC);
if (status != DDI_SUCCESS) {
return (status);
}
break;
case HERMON_CQC:
status = HERMON_ICM_ALLOC(HERMON_CMPT_CQC);
if (status != DDI_SUCCESS) {
return (status);
}
break;
case HERMON_EQC:
status = HERMON_ICM_ALLOC(HERMON_CMPT_EQC);
if (status != DDI_SUCCESS) { /* undo icm_alloc's */
return (status);
}
break;
}
/* ensure existence of bitmap and dmainfo, sets "dma_info" */
hermon_bitmap(bitmap, dma_info, icm, index1);
/* Set up the DMA handle for allocation and mapping */
dma_info = icm->icm_dma[index1] + index2;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_info))
dma_info->length = icm->span << icm->log_object_size;
dma_info->icmaddr = icm->icm_baseaddr +
(((index1 << icm->split_shift) +
(index2 << icm->span_shift)) << icm->log_object_size);
if (hermon_verbose) {
IBTF_DPRINTF_L2("hermon", "alloc DMA: "
"rsrc (0x%x) index (%x, %x) "
"icm_addr/len (%llx/%x) bitmap %p", type, index1, index2,
(longlong_t)dma_info->icmaddr, dma_info->length, bitmap);
}
/* Allocate and map memory for this span */
status = hermon_dma_alloc(state, dma_info, MAP_ICM);
if (status != DDI_SUCCESS) {
IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: DMA "
"allocation failed, status 0x%x", status);
switch (type) {
case HERMON_QPC:
HERMON_ICM_FREE(HERMON_AUXC);
HERMON_ICM_FREE(HERMON_ALTC);
HERMON_ICM_FREE(HERMON_RDB);
HERMON_ICM_FREE(HERMON_CMPT_QPC);
break;
case HERMON_SRQC:
HERMON_ICM_FREE(HERMON_CMPT_SRQC);
break;
case HERMON_CQC:
HERMON_ICM_FREE(HERMON_CMPT_CQC);
break;
case HERMON_EQC:
HERMON_ICM_FREE(HERMON_CMPT_EQC);
break;
}
return (DDI_FAILURE);
}
if (hermon_verbose) {
IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: mapping ICM: "
"rsrc_type (0x%x) index (0x%x, 0x%x) alloc length (0x%x) "
"icm_addr (0x%lx)", type, index1, index2, dma_info->length,
dma_info->icmaddr);
}
/* Set the bit for this slot in the table bitmap */
HERMON_BMAP_BIT_SET(icm->icm_bitmap[index1], index2);
return (DDI_SUCCESS);
}
/*
* hermon_icm_free()
* Context: Can be called from base context.
*
* ICM resources have been successfully returned from hermon_icm_alloc().
* Associated dma_info is no longer in use. Free the ICM backing memory.
*/
void
hermon_icm_free(hermon_state_t *state, hermon_rsrc_type_t type,
uint32_t index1, uint32_t index2)
{
hermon_icm_table_t *icm;
hermon_dma_info_t *dma_info;
int status;
icm = &state->hs_icm[type];
ASSERT(icm->icm_dma[index1][index2].icm_refcnt == 0);
if (hermon_verbose) {
IBTF_DPRINTF_L2("hermon", "hermon_icm_free: rsrc_type (0x%x) "
"index (0x%x, 0x%x)", type, index1, index2);
}
dma_info = icm->icm_dma[index1] + index2;
/* The following only happens if attach() is failing. */
if (dma_info == NULL)
return;
/* Unmap the ICM allocation, then free the backing DMA memory */
status = hermon_unmap_icm_cmd_post(state, dma_info);
if (status != DDI_SUCCESS) {
HERMON_WARNING(state, "UNMAP_ICM failure");
}
hermon_dma_free(dma_info);
/* Clear the bit in the ICM table bitmap */
HERMON_BMAP_BIT_CLR(icm->icm_bitmap[index1], index2);
switch (type) {
case HERMON_QPC:
HERMON_ICM_FREE(HERMON_AUXC);
HERMON_ICM_FREE(HERMON_ALTC);
HERMON_ICM_FREE(HERMON_RDB);
HERMON_ICM_FREE(HERMON_CMPT_QPC);
break;
case HERMON_SRQC:
HERMON_ICM_FREE(HERMON_CMPT_SRQC);
break;
case HERMON_CQC:
HERMON_ICM_FREE(HERMON_CMPT_CQC);
break;
case HERMON_EQC:
HERMON_ICM_FREE(HERMON_CMPT_EQC);
break;
}
}
/*
* hermon_drv_init()
* Context: Only called from attach() path context
*/
/* ARGSUSED */
static int
hermon_drv_init(hermon_state_t *state, dev_info_t *dip, int instance)
{
int status;
/*
* Check and set the operational mode of the device. If the driver is
* bound to the Hermon device in "maintenance mode", then this generally
* means that either the device has been specifically jumpered to
* start in this mode or the firmware boot process has failed to
* successfully load either the primary or the secondary firmware
* image.
*/
if (HERMON_IS_HCA_MODE(state->hs_dip)) {
state->hs_operational_mode = HERMON_HCA_MODE;
state->hs_cfg_profile_setting = HERMON_CFG_MEMFREE;
} else if (HERMON_IS_MAINTENANCE_MODE(state->hs_dip)) {
HERMON_FMANOTE(state, HERMON_FMA_MAINT);
state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
return (DDI_FAILURE);
} else {
state->hs_operational_mode = 0; /* invalid operational mode */
HERMON_FMANOTE(state, HERMON_FMA_PCIID);
HERMON_WARNING(state, "unexpected device type detected");
return (DDI_FAILURE);
}
/*
* Initialize the Hermon hardware.
*
* Note: If this routine returns an error, it is often a reasonably
* good indication that something Hermon firmware-related has caused
* the failure or some HW related errors have caused the failure.
* (also there are few possibilities that SW (e.g. SW resource
* shortage) can cause the failure, but the majority case is due to
* either a firmware related error or a HW related one) In order to
* give the user an opportunity (if desired) to update or reflash
* the Hermon firmware image, we set "hs_operational_mode" flag
* (described above) to indicate that we wish to enter maintenance
* mode in case of the firmware-related issue.
*/
status = hermon_hw_init(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "hermon%d: error during attach: %s", instance,
state->hs_attach_buf);
return (DDI_FAILURE);
}
/*
* Now that the ISR has been setup, arm all the EQs for event
* generation.
*/
status = hermon_eq_arm_all(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "EQ Arm All failed\n");
hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
return (DDI_FAILURE);
}
/* test interrupts and event queues */
status = hermon_nop_post(state, 0x0, 0x0);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "Interrupts/EQs failed\n");
hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
return (DDI_FAILURE);
}
/* Initialize Hermon softstate */
status = hermon_soft_state_init(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "Failed to init soft state\n");
hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* hermon_drv_fini()
* Context: Only called from attach() and/or detach() path contexts
*/
static void
hermon_drv_fini(hermon_state_t *state)
{
/* Cleanup Hermon softstate */
hermon_soft_state_fini(state);
/* Cleanup Hermon resources and shutdown hardware */
hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
}
/*
* hermon_drv_fini2()
* Context: Only called from attach() and/or detach() path contexts
*/
static void
hermon_drv_fini2(hermon_state_t *state)
{
if (state->hs_fm_poll_thread) {
ddi_periodic_delete(state->hs_fm_poll_thread);
state->hs_fm_poll_thread = NULL;
}
/* HERMON_DRV_CLEANUP_LEVEL1 */
if (state->hs_fm_cmdhdl) {
hermon_regs_map_free(state, &state->hs_fm_cmdhdl);
state->hs_fm_cmdhdl = NULL;
}
if (state->hs_reg_cmdhdl) {
ddi_regs_map_free(&state->hs_reg_cmdhdl);
state->hs_reg_cmdhdl = NULL;
}
/* HERMON_DRV_CLEANUP_LEVEL0 */
if (state->hs_msix_tbl_entries) {
kmem_free(state->hs_msix_tbl_entries,
state->hs_msix_tbl_size);
state->hs_msix_tbl_entries = NULL;
}
if (state->hs_msix_pba_entries) {
kmem_free(state->hs_msix_pba_entries,
state->hs_msix_pba_size);
state->hs_msix_pba_entries = NULL;
}
if (state->hs_fm_msix_tblhdl) {
hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
state->hs_fm_msix_tblhdl = NULL;
}
if (state->hs_reg_msix_tblhdl) {
ddi_regs_map_free(&state->hs_reg_msix_tblhdl);
state->hs_reg_msix_tblhdl = NULL;
}
if (state->hs_fm_msix_pbahdl) {
hermon_regs_map_free(state, &state->hs_fm_msix_pbahdl);
state->hs_fm_msix_pbahdl = NULL;
}
if (state->hs_reg_msix_pbahdl) {
ddi_regs_map_free(&state->hs_reg_msix_pbahdl);
state->hs_reg_msix_pbahdl = NULL;
}
if (state->hs_fm_pcihdl) {
hermon_pci_config_teardown(state, &state->hs_fm_pcihdl);
state->hs_fm_pcihdl = NULL;
}
if (state->hs_reg_pcihdl) {
pci_config_teardown(&state->hs_reg_pcihdl);
state->hs_reg_pcihdl = NULL;
}
}
/*
* hermon_isr_init()
* Context: Only called from attach() path context
*/
static int
hermon_isr_init(hermon_state_t *state)
{
int status;
int intr;
for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
/*
* Add a handler for the interrupt or MSI
*/
status = ddi_intr_add_handler(state->hs_intrmsi_hdl[intr],
hermon_isr, (caddr_t)state, (void *)(uintptr_t)intr);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
/*
* Enable the software interrupt. Note: depending on the value
* returned in the capability flag, we have to call either
* ddi_intr_block_enable() or ddi_intr_enable().
*/
if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
status = ddi_intr_block_enable(
&state->hs_intrmsi_hdl[intr], 1);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
} else {
status = ddi_intr_enable(state->hs_intrmsi_hdl[intr]);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
}
}
/*
* Now that the ISR has been enabled, defer arm_all EQs for event
* generation until later, in case MSIX is enabled
*/
return (DDI_SUCCESS);
}
/*
* hermon_isr_fini()
* Context: Only called from attach() and/or detach() path contexts
*/
static void
hermon_isr_fini(hermon_state_t *state)
{
int intr;
for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
/* Disable the software interrupt */
if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
(void) ddi_intr_block_disable(
&state->hs_intrmsi_hdl[intr], 1);
} else {
(void) ddi_intr_disable(state->hs_intrmsi_hdl[intr]);
}
/*
* Remove the software handler for the interrupt or MSI
*/
(void) ddi_intr_remove_handler(state->hs_intrmsi_hdl[intr]);
}
}
/*
* Sum of ICM configured values:
* cMPT, dMPT, MTT, QPC, SRQC, RDB, CQC, ALTC, AUXC, EQC, MCG
*
*/
static uint64_t
hermon_size_icm(hermon_state_t *state)
{
hermon_hw_querydevlim_t *devlim;
hermon_cfg_profile_t *cfg;
uint64_t num_cmpts, num_dmpts, num_mtts;
uint64_t num_qpcs, num_srqc, num_rdbs;
#ifndef HERMON_FW_WORKAROUND
uint64_t num_auxc;
#endif
uint64_t num_cqcs, num_altc;
uint64_t num_eqcs, num_mcgs;
uint64_t size;
devlim = &state->hs_devlim;
cfg = state->hs_cfg_profile;
/* number of respective entries */
num_cmpts = (uint64_t)0x1 << cfg->cp_log_num_cmpt;
num_mtts = (uint64_t)0x1 << cfg->cp_log_num_mtt;
num_dmpts = (uint64_t)0x1 << cfg->cp_log_num_dmpt;
num_qpcs = (uint64_t)0x1 << cfg->cp_log_num_qp;
num_srqc = (uint64_t)0x1 << cfg->cp_log_num_srq;
num_rdbs = (uint64_t)0x1 << cfg->cp_log_num_rdb;
num_cqcs = (uint64_t)0x1 << cfg->cp_log_num_cq;
num_altc = (uint64_t)0x1 << cfg->cp_log_num_qp;
#ifndef HERMON_FW_WORKAROUND
num_auxc = (uint64_t)0x1 << cfg->cp_log_num_qp;
#endif
num_eqcs = (uint64_t)0x1 << cfg->cp_log_num_eq;
num_mcgs = (uint64_t)0x1 << cfg->cp_log_num_mcg;
size =
num_cmpts * devlim->cmpt_entry_sz +
num_dmpts * devlim->dmpt_entry_sz +
num_mtts * devlim->mtt_entry_sz +
num_qpcs * devlim->qpc_entry_sz +
num_srqc * devlim->srq_entry_sz +
num_rdbs * devlim->rdmardc_entry_sz +
num_cqcs * devlim->cqc_entry_sz +
num_altc * devlim->altc_entry_sz +
#ifdef HERMON_FW_WORKAROUND
0x80000000ull +
#else
num_auxc * devlim->aux_entry_sz +
#endif
num_eqcs * devlim->eqc_entry_sz +
num_mcgs * HERMON_MCGMEM_SZ(state);
return (size);
}
/*
* hermon_hw_init()
* Context: Only called from attach() path context
*/
static int
hermon_hw_init(hermon_state_t *state)
{
hermon_drv_cleanup_level_t cleanup;
sm_nodeinfo_t nodeinfo;
uint64_t clr_intr_offset;
int status;
uint32_t fw_size; /* in page */
uint64_t offset;
/* This is where driver initialization begins */
cleanup = HERMON_DRV_CLEANUP_LEVEL0;
/* Setup device access attributes */
state->hs_reg_accattr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
state->hs_reg_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
state->hs_reg_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
state->hs_reg_accattr.devacc_attr_access = DDI_DEFAULT_ACC;
/* Setup fma-protected access attributes */
state->hs_fm_accattr.devacc_attr_version =
hermon_devacc_attr_version(state);
state->hs_fm_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
state->hs_fm_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
/* set acc err protection type */
state->hs_fm_accattr.devacc_attr_access =
hermon_devacc_attr_access(state);
/* Setup for PCI config read/write of HCA device */
status = hermon_pci_config_setup(state, &state->hs_fm_pcihdl);
if (status != DDI_SUCCESS) {
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_PCI_config_space_regmap_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
/* Map PCI config space and MSI-X tables/pba */
hermon_set_msix_info(state);
/* Map in Hermon registers (CMD, UAR, MSIX) and setup offsets */
status = hermon_regs_map_setup(state, HERMON_CMD_BAR,
&state->hs_reg_cmd_baseaddr, 0, 0, &state->hs_fm_accattr,
&state->hs_fm_cmdhdl);
if (status != DDI_SUCCESS) {
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_CMD_BAR_regmap_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL1;
/*
* We defer UAR-BAR mapping until later. Need to know if
* blueflame mapping is to be done, and don't know that until after
* we get the dev_caps, so do it right after that
*/
/*
* There is a third BAR defined for Hermon - it is for MSIX
*
* Will need to explore it's possible need/use w/ Mellanox
* [es] Temporary mapping maybe
*/
#ifdef HERMON_SUPPORTS_MSIX_BAR
status = ddi_regs_map_setup(state->hs_dip, HERMON_MSIX_BAR,
&state->hs_reg_msi_baseaddr, 0, 0, &state->hs_reg_accattr,
&state->hs_reg_msihdl);
if (status != DDI_SUCCESS) {
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_MSIX_BAR_regmap_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
#endif
cleanup = HERMON_DRV_CLEANUP_LEVEL2;
/*
* Save interesting registers away. The offsets of the first two
* here (HCR and sw_reset) are detailed in the PRM, the others are
* derived from values in the QUERY_FW output, so we'll save them
* off later.
*/
/* Host Command Register (HCR) */
state->hs_cmd_regs.hcr = (hermon_hw_hcr_t *)
((uintptr_t)state->hs_reg_cmd_baseaddr + HERMON_CMD_HCR_OFFSET);
state->hs_cmd_toggle = 0; /* initialize it for use */
/* Software Reset register (sw_reset) and semaphore */
state->hs_cmd_regs.sw_reset = (uint32_t *)
((uintptr_t)state->hs_reg_cmd_baseaddr +
HERMON_CMD_SW_RESET_OFFSET);
state->hs_cmd_regs.sw_semaphore = (uint32_t *)
((uintptr_t)state->hs_reg_cmd_baseaddr +
HERMON_CMD_SW_SEMAPHORE_OFFSET);
/* Retrieve PCI device, vendor and rev IDs */
state->hs_vendor_id = HERMON_GET_VENDOR_ID(state->hs_dip);
state->hs_device_id = HERMON_GET_DEVICE_ID(state->hs_dip);
state->hs_revision_id = HERMON_GET_REVISION_ID(state->hs_dip);
/* make sure init'd before we start filling things in */
bzero(&state->hs_hcaparams, sizeof (struct hermon_hw_initqueryhca_s));
/* Initialize the Phase1 configuration profile */
status = hermon_cfg_profile_init_phase1(state);
if (status != DDI_SUCCESS) {
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_cfginit1_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL3;
/* Do a software reset of the adapter to ensure proper state */
status = hermon_sw_reset(state);
if (status != HERMON_CMD_SUCCESS) {
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_sw_reset_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
/* Initialize mailboxes */
status = hermon_rsrc_init_phase1(state);
if (status != DDI_SUCCESS) {
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_rsrcinit1_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL4;
/* Post QUERY_FW */
status = hermon_cmn_query_cmd_post(state, QUERY_FW, 0, 0, &state->hs_fw,
sizeof (hermon_hw_queryfw_t), HERMON_CMD_NOSLEEP_SPIN);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_NOTE, "QUERY_FW command failed: %08x\n", status);
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_query_fw_cmd_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
/* Validate what/that HERMON FW version is appropriate */
status = hermon_fw_version_check(state);
if (status != DDI_SUCCESS) {
HERMON_FMANOTE(state, HERMON_FMA_FWVER);
if (state->hs_operational_mode == HERMON_HCA_MODE) {
cmn_err(CE_CONT, "Unsupported Hermon FW version: "
"expected: %04d.%04d.%04d, "
"actual: %04d.%04d.%04d\n",
HERMON_FW_VER_MAJOR,
HERMON_FW_VER_MINOR,
HERMON_FW_VER_SUBMINOR,
state->hs_fw.fw_rev_major,
state->hs_fw.fw_rev_minor,
state->hs_fw.fw_rev_subminor);
} else {
cmn_err(CE_CONT, "Unsupported FW version: "
"%04d.%04d.%04d\n",
state->hs_fw.fw_rev_major,
state->hs_fw.fw_rev_minor,
state->hs_fw.fw_rev_subminor);
}
state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_checkfwver_fail");
/* This case is the degraded one */
return (HERMON_CMD_BAD_NVMEM);
}
/*
* Save off the rest of the interesting registers that we'll be using.
* Setup the offsets for the other registers.
*/
/*
* Hermon does the intr_offset from the BAR - technically should get the
* BAR info from the response, but PRM says it's from BAR0-1, which is
* for us the CMD BAR
*/
clr_intr_offset = state->hs_fw.clr_intr_offs & HERMON_CMD_OFFSET_MASK;
/* Save Clear Interrupt address */
state->hs_cmd_regs.clr_intr = (uint64_t *)
(uintptr_t)(state->hs_reg_cmd_baseaddr + clr_intr_offset);
/*
* Set the error buffer also into the structure - used in hermon_event.c
* to check for internal error on the HCA, not reported in eqe or
* (necessarily) by interrupt
*/
state->hs_cmd_regs.fw_err_buf = (uint32_t *)(uintptr_t)
(state->hs_reg_cmd_baseaddr + state->hs_fw.error_buf_addr);
/*
* Invoke a polling thread to check the error buffer periodically.
*/
if (!hermon_no_inter_err_chk) {
state->hs_fm_poll_thread = ddi_periodic_add(
hermon_inter_err_chk, (void *)state, FM_POLL_INTERVAL,
DDI_IPL_0);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL5;
/*
* Allocate, map, and run the HCA Firmware.
*/
/* Allocate memory for the firmware to load into and map it */
/* get next higher power of 2 */
fw_size = 1 << highbit(state->hs_fw.fw_pages);
state->hs_fw_dma.length = fw_size << HERMON_PAGESHIFT;
status = hermon_dma_alloc(state, &state->hs_fw_dma, MAP_FA);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "FW alloc failed\n");
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_dma_alloc_fw_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL6;
/* Invoke the RUN_FW cmd to run the firmware */
status = hermon_run_fw_cmd_post(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "RUN_FW command failed: 0x%08x\n", status);
if (status == HERMON_CMD_BAD_NVMEM) {
state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
}
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_run_fw_fail");
/*
* If the status is HERMON_CMD_BAD_NVMEM, it's likely the
* firmware is corrupted, so the mode falls into the
* maintenance mode.
*/
return (status == HERMON_CMD_BAD_NVMEM ? HERMON_CMD_BAD_NVMEM :
DDI_FAILURE);
}
/*
* QUERY DEVICE LIMITS/CAPABILITIES
* NOTE - in Hermon, the command is changed to QUERY_DEV_CAP,
* but for familiarity we have kept the structure name the
* same as Tavor/Arbel
*/
status = hermon_cmn_query_cmd_post(state, QUERY_DEV_CAP, 0, 0,
&state->hs_devlim, sizeof (hermon_hw_querydevlim_t),
HERMON_CMD_NOSLEEP_SPIN);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_NOTE, "QUERY_DEV_CAP command failed: 0x%08x\n",
status);
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_devcap_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
state->hs_devlim.num_rsvd_eq = max(state->hs_devlim.num_rsvd_eq,
(4 * state->hs_devlim.num_rsvd_uar)); /* lesser of resvd's */
/* now we have enough info to map in the UAR BAR */
/*
* First, we figure out how to map the BAR for UAR - use only half if
* BlueFlame is enabled - in that case the mapped length is 1/2 the
* log_max_uar_sz (max__uar - 1) * 1MB ( +20).
*/
if (state->hs_devlim.blu_flm) { /* Blue Flame Enabled */
offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
} else {
offset = 0; /* a zero length means map the whole thing */
}
status = hermon_regs_map_setup(state, HERMON_UAR_BAR,
&state->hs_reg_uar_baseaddr, 0, offset, &state->hs_fm_accattr,
&state->hs_fm_uarhdl);
if (status != DDI_SUCCESS) {
HERMON_ATTACH_MSG(state->hs_attach_buf, "UAR BAR mapping");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
/* and if BlueFlame is enabled, map the other half there */
if (state->hs_devlim.blu_flm) { /* Blue Flame Enabled */
offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
status = ddi_regs_map_setup(state->hs_dip, HERMON_UAR_BAR,
&state->hs_reg_bf_baseaddr, offset, offset,
&state->hs_reg_accattr, &state->hs_reg_bfhdl);
if (status != DDI_SUCCESS) {
HERMON_ATTACH_MSG(state->hs_attach_buf,
"BlueFlame BAR mapping");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
/* This will be used in hw_fini if we fail to init. */
state->hs_bf_offset = offset;
}
cleanup = HERMON_DRV_CLEANUP_LEVEL7;
/* Hermon has a couple of things needed for phase 2 in query port */
status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0, 0x01,
&state->hs_queryport, sizeof (hermon_hw_query_port_t),
HERMON_CMD_NOSLEEP_SPIN);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_NOTE, "QUERY_PORT command failed: 0x%08x\n",
status);
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_queryport_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
/* Initialize the Phase2 Hermon configuration profile */
status = hermon_cfg_profile_init_phase2(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "CFG phase 2 failed: 0x%08x\n", status);
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_cfginit2_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
/* Determine and set the ICM size */
state->hs_icm_sz = hermon_size_icm(state);
status = hermon_set_icm_size_cmd_post(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "Hermon: SET_ICM_SIZE cmd failed: 0x%08x\n",
status);
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_seticmsz_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
/* alloc icm aux physical memory and map it */
state->hs_icma_dma.length = 1 << highbit(state->hs_icma_sz);
status = hermon_dma_alloc(state, &state->hs_icma_dma, MAP_ICM_AUX);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "failed to alloc (0x%llx) bytes for ICMA\n",
(longlong_t)state->hs_icma_dma.length);
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_dma_alloc_icm_aux_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL8;
cleanup = HERMON_DRV_CLEANUP_LEVEL9;
/* Allocate an array of structures to house the ICM tables */
state->hs_icm = kmem_zalloc(HERMON_NUM_ICM_RESOURCES *
sizeof (hermon_icm_table_t), KM_SLEEP);
/* Set up the ICM address space and the INIT_HCA command input */
status = hermon_icm_config_setup(state, &state->hs_hcaparams);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_NOTE, "ICM configuration failed\n");
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_icm_config_setup_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL10;
/* Initialize the adapter with the INIT_HCA cmd */
status = hermon_init_hca_cmd_post(state, &state->hs_hcaparams,
HERMON_CMD_NOSLEEP_SPIN);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_NOTE, "INIT_HCA command failed: %08x\n", status);
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_hca_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL11;
/* Enter the second phase of init for Hermon configuration/resources */
status = hermon_rsrc_init_phase2(state);
if (status != DDI_SUCCESS) {
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_rsrcinit2_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL12;
/* Query the adapter via QUERY_ADAPTER */
status = hermon_cmn_query_cmd_post(state, QUERY_ADAPTER, 0, 0,
&state->hs_adapter, sizeof (hermon_hw_queryadapter_t),
HERMON_CMD_NOSLEEP_SPIN);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_NOTE, "Hermon: QUERY_ADAPTER command failed: %08x\n",
status);
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_query_adapter_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
/* Allocate protection domain (PD) for Hermon internal use */
status = hermon_pd_alloc(state, &state->hs_pdhdl_internal,
HERMON_SLEEP);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "failed to alloc internal PD\n");
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_internal_pd_alloc_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL13;
/* Setup UAR page for kernel use */
status = hermon_internal_uarpg_init(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "failed to setup internal UAR\n");
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_internal_uarpg_alloc_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL14;
/* Query and initialize the Hermon interrupt/MSI information */
status = hermon_intr_or_msi_init(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "failed to setup INTR/MSI\n");
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_intr_or_msi_init_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL15;
status = hermon_isr_init(state); /* set up the isr */
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "failed to init isr\n");
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_isrinit_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL16;
/* Setup the event queues */
status = hermon_eq_init_all(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "failed to init EQs\n");
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_eqinitall_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL17;
/* Reserve contexts for QP0 and QP1 */
status = hermon_special_qp_contexts_reserve(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "failed to init special QPs\n");
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_rsrv_sqp_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL18;
/* Initialize for multicast group handling */
status = hermon_mcg_init(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "failed to init multicast\n");
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_mcg_init_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_LEVEL19;
/* Initialize the Hermon IB port(s) */
status = hermon_hca_port_init(state);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "failed to init HCA Port\n");
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_hca_port_init_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
cleanup = HERMON_DRV_CLEANUP_ALL;
/* Determine NodeGUID and SystemImageGUID */
status = hermon_getnodeinfo_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
&nodeinfo);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_NOTE, "GetNodeInfo command failed: %08x\n", status);
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_getnodeinfo_cmd_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
/*
* If the NodeGUID value was set in OBP properties, then we use that
* value. But we still print a message if the value we queried from
* firmware does not match this value.
*
* Otherwise if OBP value is not set then we use the value from
* firmware unconditionally.
*/
if (state->hs_cfg_profile->cp_nodeguid) {
state->hs_nodeguid = state->hs_cfg_profile->cp_nodeguid;
} else {
state->hs_nodeguid = nodeinfo.NodeGUID;
}
if (state->hs_nodeguid != nodeinfo.NodeGUID) {
cmn_err(CE_NOTE, "!NodeGUID value queried from firmware "
"does not match value set by device property");
}
/*
* If the SystemImageGUID value was set in OBP properties, then we use
* that value. But we still print a message if the value we queried
* from firmware does not match this value.
*
* Otherwise if OBP value is not set then we use the value from
* firmware unconditionally.
*/
if (state->hs_cfg_profile->cp_sysimgguid) {
state->hs_sysimgguid = state->hs_cfg_profile->cp_sysimgguid;
} else {
state->hs_sysimgguid = nodeinfo.SystemImageGUID;
}
if (state->hs_sysimgguid != nodeinfo.SystemImageGUID) {
cmn_err(CE_NOTE, "!SystemImageGUID value queried from firmware "
"does not match value set by device property");
}
/* Get NodeDescription */
status = hermon_getnodedesc_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
(sm_nodedesc_t *)&state->hs_nodedesc);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_CONT, "GetNodeDesc command failed: %08x\n", status);
hermon_hw_fini(state, cleanup);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"hw_init_getnodedesc_cmd_fail");
/* This case is not the degraded one */
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* hermon_hw_fini()
* Context: Only called from attach() and/or detach() path contexts
*/
static void
hermon_hw_fini(hermon_state_t *state, hermon_drv_cleanup_level_t cleanup)
{
uint_t num_ports;
int i, status;
/*
* JBDB - We might not want to run these returns in all cases of
* Bad News. We should still attempt to free all of the DMA memory
* resources... This needs to be worked last, after all allocations
* are implemented. For now, and possibly for later, this works.
*/
switch (cleanup) {
/*
* If we add more driver initialization steps that should be cleaned
* up here, we need to ensure that HERMON_DRV_CLEANUP_ALL is still the
* first entry (i.e. corresponds to the last init step).
*/
case HERMON_DRV_CLEANUP_ALL:
/* Shutdown the Hermon IB port(s) */
num_ports = state->hs_cfg_profile->cp_num_ports;
(void) hermon_hca_ports_shutdown(state, num_ports);
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL19:
/* Teardown resources used for multicast group handling */
hermon_mcg_fini(state);
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL18:
/* Unreserve the special QP contexts */
hermon_special_qp_contexts_unreserve(state);
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL17:
/*
* Attempt to teardown all event queues (EQ). If we fail
* here then print a warning message and return. Something
* (either in HW or SW) has gone seriously wrong.
*/
status = hermon_eq_fini_all(state);
if (status != DDI_SUCCESS) {
HERMON_WARNING(state, "failed to teardown EQs");
return;
}
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL16:
/* Teardown Hermon interrupts */
hermon_isr_fini(state);
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL15:
status = hermon_intr_or_msi_fini(state);
if (status != DDI_SUCCESS) {
HERMON_WARNING(state, "failed to free intr/MSI");
return;
}
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL14:
/* Free the resources for the Hermon internal UAR pages */
hermon_internal_uarpg_fini(state);
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL13:
/*
* Free the PD that was used internally by Hermon software. If
* we fail here then print a warning and return. Something
* (probably software-related, but perhaps HW) has gone wrong.
*/
status = hermon_pd_free(state, &state->hs_pdhdl_internal);
if (status != DDI_SUCCESS) {
HERMON_WARNING(state, "failed to free internal PD");
return;
}
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL12:
/* Cleanup all the phase2 resources first */
hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_ALL);
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL11:
/* LEVEL11 is after INIT_HCA */
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL10:
/*
* Unmap the ICM memory area with UNMAP_ICM command.
*/
status = hermon_unmap_icm_cmd_post(state, NULL);
if (status != DDI_SUCCESS) {
cmn_err(CE_WARN,
"hermon_hw_fini: failed to unmap ICM\n");
}
/* Free the initial ICM DMA handles */
hermon_icm_dma_fini(state);
/* Free the ICM table structures */
hermon_icm_tables_fini(state);
/* Free the ICM table handles */
kmem_free(state->hs_icm, HERMON_NUM_ICM_RESOURCES *
sizeof (hermon_icm_table_t));
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL9:
/*
* Unmap the ICM Aux memory area with UNMAP_ICM_AUX command.
*/
status = hermon_unmap_icm_aux_cmd_post(state);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_NOTE,
"hermon_hw_fini: failed to unmap ICMA\n");
}
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL8:
/*
* Deallocate ICM Aux DMA memory.
*/
hermon_dma_free(&state->hs_icma_dma);
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL7:
if (state->hs_fm_uarhdl) {
hermon_regs_map_free(state, &state->hs_fm_uarhdl);
state->hs_fm_uarhdl = NULL;
}
if (state->hs_reg_uarhdl) {
ddi_regs_map_free(&state->hs_reg_uarhdl);
state->hs_reg_uarhdl = NULL;
}
if (state->hs_bf_offset != 0 && state->hs_reg_bfhdl) {
ddi_regs_map_free(&state->hs_reg_bfhdl);
state->hs_reg_bfhdl = NULL;
}
for (i = 0; i < HERMON_MAX_PORTS; i++) {
if (state->hs_pkey[i]) {
kmem_free(state->hs_pkey[i], (1 <<
state->hs_cfg_profile->cp_log_max_pkeytbl) *
sizeof (ib_pkey_t));
state->hs_pkey[i] = NULL;
}
if (state->hs_guid[i]) {
kmem_free(state->hs_guid[i], (1 <<
state->hs_cfg_profile->cp_log_max_gidtbl) *
sizeof (ib_guid_t));
state->hs_guid[i] = NULL;
}
}
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL6:
/*
* Unmap the firmware memory area with UNMAP_FA command.
*/
status = hermon_unmap_fa_cmd_post(state);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_NOTE,
"hermon_hw_fini: failed to unmap FW\n");
}
/*
* Deallocate firmware DMA memory.
*/
hermon_dma_free(&state->hs_fw_dma);
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL5:
/* stop the poll thread */
if (state->hs_fm_poll_thread) {
ddi_periodic_delete(state->hs_fm_poll_thread);
state->hs_fm_poll_thread = NULL;
}
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL4:
/* Then cleanup the phase1 resources */
hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_PHASE1_COMPLETE);
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL3:
/* Teardown any resources allocated for the config profile */
hermon_cfg_profile_fini(state);
/* FALLTHROUGH */
case HERMON_DRV_CLEANUP_LEVEL2:
#ifdef HERMON_SUPPORTS_MSIX_BAR
/*
* unmap 3rd BAR, MSIX BAR
*/
if (state->hs_reg_msihdl) {
ddi_regs_map_free(&state->hs_reg_msihdl);
state->hs_reg_msihdl = NULL;
}
/* FALLTHROUGH */
#endif
case HERMON_DRV_CLEANUP_LEVEL1:
case HERMON_DRV_CLEANUP_LEVEL0:
/*
* LEVEL1 and LEVEL0 resources are freed in
* hermon_drv_fini2().
*/
break;
default:
HERMON_WARNING(state, "unexpected driver cleanup level");
return;
}
}
/*
* hermon_soft_state_init()
* Context: Only called from attach() path context
*/
static int
hermon_soft_state_init(hermon_state_t *state)
{
ibt_hca_attr_t *hca_attr;
uint64_t maxval, val;
ibt_hca_flags_t caps = IBT_HCA_NO_FLAGS;
ibt_hca_flags2_t caps2 = IBT_HCA2_NO_FLAGS;
int status;
int max_send_wqe_bytes;
int max_recv_wqe_bytes;
/*
* The ibc_hca_info_t struct is passed to the IBTF. This is the
* routine where we initialize it. Many of the init values come from
* either configuration variables or successful queries of the Hermon
* hardware abilities
*/
state->hs_ibtfinfo.hca_ci_vers = IBCI_V3;
state->hs_ibtfinfo.hca_dip = state->hs_dip;
state->hs_ibtfinfo.hca_handle = (ibc_hca_hdl_t)state;
state->hs_ibtfinfo.hca_ops = &hermon_ibc_ops;
hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
state->hs_ibtfinfo.hca_attr = hca_attr;
hca_attr->hca_fw_major_version = state->hs_fw.fw_rev_major;
hca_attr->hca_fw_minor_version = state->hs_fw.fw_rev_minor;
hca_attr->hca_fw_micro_version = state->hs_fw.fw_rev_subminor;
/* CQ interrupt moderation maximums - each limited to 16 bits */
hca_attr->hca_max_cq_mod_count = 0xFFFF;
hca_attr->hca_max_cq_mod_usec = 0xFFFF;
/* CQ relocation to other EQs - change when multiple MSI-Xs are used */
hca_attr->hca_max_cq_handlers = 1;
/*
* Determine HCA capabilities:
* No default support for IBT_HCA_RD, IBT_HCA_RAW_MULTICAST,
* IBT_HCA_ATOMICS_GLOBAL, IBT_HCA_RESIZE_CHAN, IBT_HCA_INIT_TYPE,
* or IBT_HCA_SHUTDOWN_PORT
* But IBT_HCA_AH_PORT_CHECK, IBT_HCA_SQD_RTS_PORT, IBT_HCA_SI_GUID,
* IBT_HCA_RNR_NAK, IBT_HCA_CURRENT_QP_STATE, IBT_HCA_PORT_UP,
* IBT_HCA_SRQ, IBT_HCA_RESIZE_SRQ and IBT_HCA_FMR are always
* supported
* All other features are conditionally supported, depending on the
* status return by the Hermon HCA in QUERY_DEV_LIM.
*/
if (state->hs_devlim.ud_multi) {
caps |= IBT_HCA_UD_MULTICAST;
}
if (state->hs_devlim.atomic) {
caps |= IBT_HCA_ATOMICS_HCA;
}
if (state->hs_devlim.apm) {
caps |= IBT_HCA_AUTO_PATH_MIG;
}
if (state->hs_devlim.pkey_v) {
caps |= IBT_HCA_PKEY_CNTR;
}
if (state->hs_devlim.qkey_v) {
caps |= IBT_HCA_QKEY_CNTR;
}
if (state->hs_devlim.ipoib_cksm) {
caps |= IBT_HCA_CKSUM_FULL;
caps2 |= IBT_HCA2_IP_CLASS;
}
if (state->hs_devlim.mod_wr_srq) {
caps |= IBT_HCA_RESIZE_SRQ;
}
if (state->hs_devlim.lif) {
caps |= IBT_HCA_LOCAL_INVAL_FENCE;
}
if (state->hs_devlim.reserved_lkey) {
caps2 |= IBT_HCA2_RES_LKEY;
hca_attr->hca_reserved_lkey = state->hs_devlim.rsv_lkey;
}
if (state->hs_devlim.local_inv && state->hs_devlim.remote_inv &&
state->hs_devlim.fast_reg_wr) { /* fw needs to be >= 2.6.636 */
if (state->hs_fw.fw_rev_major > 2)
caps2 |= IBT_HCA2_MEM_MGT_EXT;
else if (state->hs_fw.fw_rev_major == 2)
if (state->hs_fw.fw_rev_minor > 6)
caps2 |= IBT_HCA2_MEM_MGT_EXT;
else if (state->hs_fw.fw_rev_minor == 6)
if (state->hs_fw.fw_rev_subminor >= 636)
caps2 |= IBT_HCA2_MEM_MGT_EXT;
}
if (state->hs_devlim.mps) {
caps |= IBT_HCA_ZERO_BASED_VA;
}
if (state->hs_devlim.zb) {
caps |= IBT_HCA_MULT_PAGE_SZ_MR;
}
caps |= (IBT_HCA_AH_PORT_CHECK | IBT_HCA_SQD_SQD_PORT |
IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE |
IBT_HCA_PORT_UP | IBT_HCA_RC_SRQ | IBT_HCA_UD_SRQ | IBT_HCA_FMR);
if (state->hs_devlim.log_max_gso_sz) {
hca_attr->hca_max_lso_size =
(1 << state->hs_devlim.log_max_gso_sz);
/* More work needed in hermon_post_send for larger values */
hca_attr->hca_max_lso_hdr_size = 0x2c; /* IPv4 only */
}
caps |= IBT_HCA_WQE_SIZE_INFO;
max_send_wqe_bytes = state->hs_devlim.max_desc_sz_sq;
max_recv_wqe_bytes = state->hs_devlim.max_desc_sz_rq;
hca_attr->hca_ud_send_sgl_sz = (max_send_wqe_bytes / 16) - 4;
hca_attr->hca_conn_send_sgl_sz = (max_send_wqe_bytes / 16) - 1;
hca_attr->hca_conn_rdma_sgl_overhead = 1;
hca_attr->hca_recv_sgl_sz = max_recv_wqe_bytes / 16;
/* We choose not to support "inline" unless it improves performance */
hca_attr->hca_max_inline_size = 0;
hca_attr->hca_ud_send_inline_sz = 0;
hca_attr->hca_conn_send_inline_sz = 0;
hca_attr->hca_conn_rdmaw_inline_overhead = 4;
hca_attr->hca_flags = caps;
hca_attr->hca_flags2 = caps2;
/*
* Set hca_attr's IDs
*/
hca_attr->hca_vendor_id = state->hs_vendor_id;
hca_attr->hca_device_id = state->hs_device_id;
hca_attr->hca_version_id = state->hs_revision_id;
/*
* Determine number of available QPs and max QP size. Number of
* available QPs is determined by subtracting the number of
* "reserved QPs" (i.e. reserved for firmware use) from the
* total number configured.
*/
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
hca_attr->hca_max_qp = val - ((uint64_t)1 <<
state->hs_devlim.log_rsvd_qp);
maxval = ((uint64_t)1 << state->hs_devlim.log_max_qp_sz);
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_qp_sz);
if (val > maxval) {
kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
HERMON_ATTACH_MSG(state->hs_attach_buf,
"soft_state_init_maxqpsz_toobig_fail");
return (DDI_FAILURE);
}
/* we need to reduce this by the max space needed for headroom */
hca_attr->hca_max_qp_sz = (uint_t)val - (HERMON_QP_OH_SIZE >>
HERMON_QP_WQE_LOG_MINIMUM) - 1;
/*
* Determine max scatter-gather size in WQEs. The HCA has split
* the max sgl into rec'v Q and send Q values. Use the least.
*
* This is mainly useful for legacy clients. Smart clients
* such as IPoIB will use the IBT_HCA_WQE_SIZE_INFO sgl info.
*/
if (state->hs_devlim.max_sg_rq <= state->hs_devlim.max_sg_sq) {
maxval = state->hs_devlim.max_sg_rq;
} else {
maxval = state->hs_devlim.max_sg_sq;
}
val = state->hs_cfg_profile->cp_wqe_max_sgl;
if (val > maxval) {
kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
HERMON_ATTACH_MSG(state->hs_attach_buf,
"soft_state_init_toomanysgl_fail");
return (DDI_FAILURE);
}
/* If the rounded value for max SGL is too large, cap it */
if (state->hs_cfg_profile->cp_wqe_real_max_sgl > maxval) {
state->hs_cfg_profile->cp_wqe_real_max_sgl = (uint32_t)maxval;
val = maxval;
} else {
val = state->hs_cfg_profile->cp_wqe_real_max_sgl;
}
hca_attr->hca_max_sgl = (uint_t)val;
hca_attr->hca_max_rd_sgl = 0; /* zero because RD is unsupported */
/*
* Determine number of available CQs and max CQ size. Number of
* available CQs is determined by subtracting the number of
* "reserved CQs" (i.e. reserved for firmware use) from the
* total number configured.
*/
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_cq);
hca_attr->hca_max_cq = val - ((uint64_t)1 <<
state->hs_devlim.log_rsvd_cq);
maxval = ((uint64_t)1 << state->hs_devlim.log_max_cq_sz);
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_cq_sz) - 1;
if (val > maxval) {
kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
HERMON_ATTACH_MSG(state->hs_attach_buf,
"soft_state_init_maxcqsz_toobig_fail");
return (DDI_FAILURE);
}
hca_attr->hca_max_cq_sz = (uint_t)val;
/*
* Determine number of available SRQs and max SRQ size. Number of
* available SRQs is determined by subtracting the number of
* "reserved SRQs" (i.e. reserved for firmware use) from the
* total number configured.
*/
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_srq);
hca_attr->hca_max_srqs = val - ((uint64_t)1 <<
state->hs_devlim.log_rsvd_srq);
maxval = ((uint64_t)1 << state->hs_devlim.log_max_srq_sz);
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_srq_sz);
if (val > maxval) {
kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
HERMON_ATTACH_MSG(state->hs_attach_buf,
"soft_state_init_maxsrqsz_toobig_fail");
return (DDI_FAILURE);
}
hca_attr->hca_max_srqs_sz = (uint_t)val;
val = hca_attr->hca_recv_sgl_sz - 1; /* SRQ has a list link */
maxval = state->hs_devlim.max_sg_rq - 1;
if (val > maxval) {
kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
HERMON_ATTACH_MSG(state->hs_attach_buf,
"soft_state_init_toomanysrqsgl_fail");
return (DDI_FAILURE);
}
hca_attr->hca_max_srq_sgl = (uint_t)val;
/*
* Determine supported HCA page sizes
* XXX
* For now we simply return the system pagesize as the only supported
* pagesize
*/
hca_attr->hca_page_sz = ((PAGESIZE == (1 << 13)) ? IBT_PAGE_8K :
IBT_PAGE_4K);
/*
* Determine number of available MemReg, MemWin, and their max size.
* Number of available MRs and MWs is determined by subtracting
* the number of "reserved MPTs" (i.e. reserved for firmware use)
* from the total number configured for each.
*/
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_dmpt);
hca_attr->hca_max_memr = val - ((uint64_t)1 <<
state->hs_devlim.log_rsvd_dmpt);
hca_attr->hca_max_mem_win = state->hs_devlim.mem_win ? (val -
((uint64_t)1 << state->hs_devlim.log_rsvd_dmpt)) : 0;
maxval = state->hs_devlim.log_max_mrw_sz;
val = state->hs_cfg_profile->cp_log_max_mrw_sz;
if (val > maxval) {
kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
HERMON_ATTACH_MSG(state->hs_attach_buf,
"soft_state_init_maxmrwsz_toobig_fail");
return (DDI_FAILURE);
}
hca_attr->hca_max_memr_len = ((uint64_t)1 << val);
/* Determine RDMA/Atomic properties */
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_rdb);
hca_attr->hca_max_rsc = (uint_t)val;
val = state->hs_cfg_profile->cp_hca_max_rdma_in_qp;
hca_attr->hca_max_rdma_in_qp = (uint8_t)val;
val = state->hs_cfg_profile->cp_hca_max_rdma_out_qp;
hca_attr->hca_max_rdma_out_qp = (uint8_t)val;
hca_attr->hca_max_rdma_in_ee = 0;
hca_attr->hca_max_rdma_out_ee = 0;
/*
* Determine maximum number of raw IPv6 and Ether QPs. Set to 0
* because neither type of raw QP is supported
*/
hca_attr->hca_max_ipv6_qp = 0;
hca_attr->hca_max_ether_qp = 0;
/* Determine max number of MCGs and max QP-per-MCG */
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
hca_attr->hca_max_mcg_qps = (uint_t)val;
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_mcg);
hca_attr->hca_max_mcg = (uint_t)val;
val = state->hs_cfg_profile->cp_num_qp_per_mcg;
hca_attr->hca_max_qp_per_mcg = (uint_t)val;
/* Determine max number partitions (i.e. PKeys) */
maxval = ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
state->hs_queryport.log_max_pkey);
val = ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
state->hs_cfg_profile->cp_log_max_pkeytbl);
if (val > maxval) {
kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
HERMON_ATTACH_MSG(state->hs_attach_buf,
"soft_state_init_toomanypkey_fail");
return (DDI_FAILURE);
}
hca_attr->hca_max_partitions = (uint16_t)val;
/* Determine number of ports */
maxval = state->hs_devlim.num_ports;
val = state->hs_cfg_profile->cp_num_ports;
if ((val > maxval) || (val == 0)) {
kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
HERMON_ATTACH_MSG(state->hs_attach_buf,
"soft_state_init_toomanyports_fail");
return (DDI_FAILURE);
}
hca_attr->hca_nports = (uint8_t)val;
/* Copy NodeGUID and SystemImageGUID from softstate */
hca_attr->hca_node_guid = state->hs_nodeguid;
hca_attr->hca_si_guid = state->hs_sysimgguid;
/*
* Determine local ACK delay. Use the value suggested by the Hermon
* hardware (from the QUERY_DEV_CAP command)
*/
hca_attr->hca_local_ack_delay = state->hs_devlim.ca_ack_delay;
/* Determine max SGID table and PKey table sizes */
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_gidtbl);
hca_attr->hca_max_port_sgid_tbl_sz = (uint_t)val;
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_pkeytbl);
hca_attr->hca_max_port_pkey_tbl_sz = (uint16_t)val;
/* Determine max number of PDs */
maxval = ((uint64_t)1 << state->hs_devlim.log_max_pd);
val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_pd);
if (val > maxval) {
kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
HERMON_ATTACH_MSG(state->hs_attach_buf,
"soft_state_init_toomanypd_fail");
return (DDI_FAILURE);
}
hca_attr->hca_max_pd = (uint_t)val;
/* Determine max number of Address Handles (NOT IN ARBEL or HERMON) */
hca_attr->hca_max_ah = 0;
/* No RDDs or EECs (since Reliable Datagram is not supported) */
hca_attr->hca_max_rdd = 0;
hca_attr->hca_max_eec = 0;
/* Initialize lock for reserved UAR page access */
mutex_init(&state->hs_uar_lock, NULL, MUTEX_DRIVER,
DDI_INTR_PRI(state->hs_intrmsi_pri));
/* Initialize the flash fields */
state->hs_fw_flashstarted = 0;
mutex_init(&state->hs_fw_flashlock, NULL, MUTEX_DRIVER,
DDI_INTR_PRI(state->hs_intrmsi_pri));
/* Initialize the lock for the info ioctl */
mutex_init(&state->hs_info_lock, NULL, MUTEX_DRIVER,
DDI_INTR_PRI(state->hs_intrmsi_pri));
/* Initialize the AVL tree for QP number support */
hermon_qpn_avl_init(state);
/* Initialize the kstat info structure */
status = hermon_kstat_init(state);
if (status != DDI_SUCCESS) {
hermon_qpn_avl_fini(state);
mutex_destroy(&state->hs_info_lock);
mutex_destroy(&state->hs_fw_flashlock);
mutex_destroy(&state->hs_uar_lock);
kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
HERMON_ATTACH_MSG(state->hs_attach_buf,
"soft_state_init_kstatinit_fail");
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* hermon_soft_state_fini()
* Context: Called only from detach() path context
*/
static void
hermon_soft_state_fini(hermon_state_t *state)
{
/* Teardown the kstat info */
hermon_kstat_fini(state);
/* Teardown the AVL tree for QP number support */
hermon_qpn_avl_fini(state);
/* Free up info ioctl mutex */
mutex_destroy(&state->hs_info_lock);
/* Free up flash mutex */
mutex_destroy(&state->hs_fw_flashlock);
/* Free up the UAR page access mutex */
mutex_destroy(&state->hs_uar_lock);
/* Free up the hca_attr struct */
kmem_free(state->hs_ibtfinfo.hca_attr, sizeof (ibt_hca_attr_t));
}
/*
* hermon_icm_config_setup()
* Context: Only called from attach() path context
*/
static int
hermon_icm_config_setup(hermon_state_t *state,
hermon_hw_initqueryhca_t *inithca)
{
hermon_hw_querydevlim_t *devlim;
hermon_cfg_profile_t *cfg;
hermon_icm_table_t *icm_p[HERMON_NUM_ICM_RESOURCES];
hermon_icm_table_t *icm;
hermon_icm_table_t *tmp;
uint64_t icm_addr;
uint64_t icm_size;
int status, i, j;
/* Bring in local devlims, cfg_profile and hs_icm table list */
devlim = &state->hs_devlim;
cfg = state->hs_cfg_profile;
icm = state->hs_icm;
/*
* Assign each ICM table's entry size from data in the devlims,
* except for RDB and MCG sizes, which are not returned in devlims
* but do have a fixed size, and the UAR context entry size, which
* we determine. For this, we use the "cp_num_pgs_per_uce" value
* from our hs_cfg_profile.
*/
icm[HERMON_CMPT].object_size = devlim->cmpt_entry_sz;
icm[HERMON_CMPT_QPC].object_size = devlim->cmpt_entry_sz;
icm[HERMON_CMPT_SRQC].object_size = devlim->cmpt_entry_sz;
icm[HERMON_CMPT_CQC].object_size = devlim->cmpt_entry_sz;
icm[HERMON_CMPT_EQC].object_size = devlim->cmpt_entry_sz;
icm[HERMON_MTT].object_size = devlim->mtt_entry_sz;
icm[HERMON_DMPT].object_size = devlim->dmpt_entry_sz;
icm[HERMON_QPC].object_size = devlim->qpc_entry_sz;
icm[HERMON_CQC].object_size = devlim->cqc_entry_sz;
icm[HERMON_SRQC].object_size = devlim->srq_entry_sz;
icm[HERMON_EQC].object_size = devlim->eqc_entry_sz;
icm[HERMON_RDB].object_size = devlim->rdmardc_entry_sz *
cfg->cp_hca_max_rdma_in_qp;
icm[HERMON_MCG].object_size = HERMON_MCGMEM_SZ(state);
icm[HERMON_ALTC].object_size = devlim->altc_entry_sz;
icm[HERMON_AUXC].object_size = devlim->aux_entry_sz;
/* Assign each ICM table's log2 number of entries */
icm[HERMON_CMPT].log_num_entries = cfg->cp_log_num_cmpt;
icm[HERMON_CMPT_QPC].log_num_entries = cfg->cp_log_num_qp;
icm[HERMON_CMPT_SRQC].log_num_entries = cfg->cp_log_num_srq;
icm[HERMON_CMPT_CQC].log_num_entries = cfg->cp_log_num_cq;
icm[HERMON_CMPT_EQC].log_num_entries = HERMON_NUM_EQ_SHIFT;
icm[HERMON_MTT].log_num_entries = cfg->cp_log_num_mtt;
icm[HERMON_DMPT].log_num_entries = cfg->cp_log_num_dmpt;
icm[HERMON_QPC].log_num_entries = cfg->cp_log_num_qp;
icm[HERMON_SRQC].log_num_entries = cfg->cp_log_num_srq;
icm[HERMON_CQC].log_num_entries = cfg->cp_log_num_cq;
icm[HERMON_EQC].log_num_entries = HERMON_NUM_EQ_SHIFT;
icm[HERMON_RDB].log_num_entries = cfg->cp_log_num_qp;
icm[HERMON_MCG].log_num_entries = cfg->cp_log_num_mcg;
icm[HERMON_ALTC].log_num_entries = cfg->cp_log_num_qp;
icm[HERMON_AUXC].log_num_entries = cfg->cp_log_num_qp;
/* Initialize the ICM tables */
hermon_icm_tables_init(state);
/*
* ICM tables must be aligned on their size in the ICM address
* space. So, here we order the tables from largest total table
* size to the smallest. All tables are a power of 2 in size, so
* this will ensure that all tables are aligned on their own size
* without wasting space in the ICM.
*
* In order to easily set the ICM addresses without needing to
* worry about the ordering of our table indices as relates to
* the hermon_rsrc_type_t enum, we will use a list of pointers
* representing the tables for the sort, then assign ICM addresses
* below using it.
*/
for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
icm_p[i] = &icm[i];
}
for (i = HERMON_NUM_ICM_RESOURCES; i > 0; i--) {
switch (i) {
case HERMON_CMPT_QPC:
case HERMON_CMPT_SRQC:
case HERMON_CMPT_CQC:
case HERMON_CMPT_EQC:
continue;
}
for (j = 1; j < i; j++) {
if (icm_p[j]->table_size > icm_p[j - 1]->table_size) {
tmp = icm_p[j];
icm_p[j] = icm_p[j - 1];
icm_p[j - 1] = tmp;
}
}
}
/* Initialize the ICM address and ICM size */
icm_addr = icm_size = 0;
/*
* Set the ICM base address of each table, using our sorted
* list of pointers from above.
*/
for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
j = icm_p[i]->icm_type;
switch (j) {
case HERMON_CMPT_QPC:
case HERMON_CMPT_SRQC:
case HERMON_CMPT_CQC:
case HERMON_CMPT_EQC:
continue;
}
if (icm[j].table_size) {
/*
* Set the ICM base address in the table, save the
* ICM offset in the rsrc pool and increment the
* total ICM allocation.
*/
icm[j].icm_baseaddr = icm_addr;
if (hermon_verbose) {
IBTF_DPRINTF_L2("ICMADDR", "rsrc %x @ %p"
" size %llx", j, icm[j].icm_baseaddr,
icm[j].table_size);
}
icm_size += icm[j].table_size;
}
/* Verify that we don't exceed maximum ICM size */
if (icm_size > devlim->max_icm_size) {
/* free the ICM table memory resources */
hermon_icm_tables_fini(state);
cmn_err(CE_WARN, "ICM configuration exceeds maximum "
"configuration: max (0x%lx) requested (0x%lx)\n",
(ulong_t)devlim->max_icm_size, (ulong_t)icm_size);
HERMON_ATTACH_MSG(state->hs_attach_buf,
"icm_config_toobig_fail");
return (DDI_FAILURE);
}
/* assign address to the 4 pieces of the CMPT */
if (j == HERMON_CMPT) {
uint64_t cmpt_size = icm[j].table_size >> 2;
#define init_cmpt_icm_baseaddr(rsrc, indx) \
icm[rsrc].icm_baseaddr = icm_addr + (indx * cmpt_size);
init_cmpt_icm_baseaddr(HERMON_CMPT_QPC, 0);
init_cmpt_icm_baseaddr(HERMON_CMPT_SRQC, 1);
init_cmpt_icm_baseaddr(HERMON_CMPT_CQC, 2);
init_cmpt_icm_baseaddr(HERMON_CMPT_EQC, 3);
}
/* Increment the ICM address for the next table */
icm_addr += icm[j].table_size;
}
/* Populate the structure for the INIT_HCA command */
hermon_inithca_set(state, inithca);
/*
* Prior to invoking INIT_HCA, we must have ICM memory in place
* for the reserved objects in each table. We will allocate and map
* this initial ICM memory here. Note that given the assignment
* of span_size above, tables that are smaller or equal in total
* size to the default span_size will be mapped in full.
*/
status = hermon_icm_dma_init(state);
if (status != DDI_SUCCESS) {
/* free the ICM table memory resources */
hermon_icm_tables_fini(state);
HERMON_WARNING(state, "Failed to allocate initial ICM");
HERMON_ATTACH_MSG(state->hs_attach_buf,
"icm_config_dma_init_fail");
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* hermon_inithca_set()
* Context: Only called from attach() path context
*/
static void
hermon_inithca_set(hermon_state_t *state, hermon_hw_initqueryhca_t *inithca)
{
hermon_cfg_profile_t *cfg;
hermon_icm_table_t *icm;
int i;
/* Populate the INIT_HCA structure */
icm = state->hs_icm;
cfg = state->hs_cfg_profile;
/* set version */
inithca->version = 0x02; /* PRM 0.36 */
/* set cacheline - log2 in 16-byte chunks */
inithca->log2_cacheline = 0x2; /* optimized for 64 byte cache */
/* we need to update the inithca info with thie UAR info too */
inithca->uar.log_max_uars = highbit(cfg->cp_log_num_uar);
inithca->uar.uar_pg_sz = PAGESHIFT - HERMON_PAGESHIFT;
/* Set endianess */
#ifdef _LITTLE_ENDIAN
inithca->big_endian = 0;
#else
inithca->big_endian = 1;
#endif
/* Port Checking is on by default */
inithca->udav_port_chk = HERMON_UDAV_PORTCHK_ENABLED;
/* Enable IPoIB checksum */
if (state->hs_devlim.ipoib_cksm)
inithca->chsum_en = 1;
/* Set each ICM table's attributes */
for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
switch (icm[i].icm_type) {
case HERMON_CMPT:
inithca->tpt.cmpt_baseaddr = icm[i].icm_baseaddr;
break;
case HERMON_MTT:
inithca->tpt.mtt_baseaddr = icm[i].icm_baseaddr;
break;
case HERMON_DMPT:
inithca->tpt.dmpt_baseaddr = icm[i].icm_baseaddr;
inithca->tpt.log_dmpt_sz = icm[i].log_num_entries;
inithca->tpt.pgfault_rnr_to = 0; /* just in case */
break;
case HERMON_QPC:
inithca->context.log_num_qp = icm[i].log_num_entries;
inithca->context.qpc_baseaddr_h =
icm[i].icm_baseaddr >> 32;
inithca->context.qpc_baseaddr_l =
(icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
break;
case HERMON_CQC:
inithca->context.log_num_cq = icm[i].log_num_entries;
inithca->context.cqc_baseaddr_h =
icm[i].icm_baseaddr >> 32;
inithca->context.cqc_baseaddr_l =
(icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
break;
case HERMON_SRQC:
inithca->context.log_num_srq = icm[i].log_num_entries;
inithca->context.srqc_baseaddr_h =
icm[i].icm_baseaddr >> 32;
inithca->context.srqc_baseaddr_l =
(icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
break;
case HERMON_EQC:
inithca->context.log_num_eq = icm[i].log_num_entries;
inithca->context.eqc_baseaddr_h =
icm[i].icm_baseaddr >> 32;
inithca->context.eqc_baseaddr_l =
(icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
break;
case HERMON_RDB:
inithca->context.rdmardc_baseaddr_h =
icm[i].icm_baseaddr >> 32;
inithca->context.rdmardc_baseaddr_l =
(icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
inithca->context.log_num_rdmardc =
icm[i].log_num_entries;
break;
case HERMON_MCG:
inithca->multi.mc_baseaddr = icm[i].icm_baseaddr;
inithca->multi.log_mc_tbl_sz = icm[i].log_num_entries;
inithca->multi.log_mc_tbl_ent =
highbit(HERMON_MCGMEM_SZ(state)) - 1;
inithca->multi.log_mc_tbl_hash_sz =
cfg->cp_log_num_mcg_hash;
inithca->multi.mc_hash_fn = HERMON_MCG_DEFAULT_HASH_FN;
break;
case HERMON_ALTC:
inithca->context.altc_baseaddr = icm[i].icm_baseaddr;
break;
case HERMON_AUXC:
inithca->context.auxc_baseaddr = icm[i].icm_baseaddr;
break;
default:
break;
}
}
}
/*
* hermon_icm_tables_init()
* Context: Only called from attach() path context
*
* Dynamic ICM breaks the various ICM tables into "span_size" chunks
* to enable allocation of backing memory on demand. Arbel used a
* fixed size ARBEL_ICM_SPAN_SIZE (initially was 512KB) as the
* span_size for all ICM chunks. Hermon has other considerations,
* so the span_size used differs from Arbel.
*
* The basic considerations for why Hermon differs are:
*
* 1) ICM memory is in units of HERMON pages.
*
* 2) The AUXC table is approximately 1 byte per QP.
*
* 3) ICM memory for AUXC, ALTC, and RDB is allocated when
* the ICM memory for the corresponding QPC is allocated.
*
* 4) ICM memory for the CMPT corresponding to the various primary
* resources (QPC, SRQC, CQC, and EQC) is allocated when the ICM
* memory for the primary resource is allocated.
*
* One HERMON page (4KB) would typically map 4K QPs worth of AUXC.
* So, the minimum chunk for the various QPC related ICM memory should
* all be allocated to support the 4K QPs. Currently, this means the
* amount of memory for the various QP chunks is:
*
* QPC 256*4K bytes
* RDB 128*4K bytes
* CMPT 64*4K bytes
* ALTC 64*4K bytes
* AUXC 1*4K bytes
*
* The span_size chosen for the QP resource is 4KB of AUXC entries,
* or 1 HERMON_PAGESIZE worth, which is the minimum ICM mapping size.
*
* Other ICM resources can have their span_size be more arbitrary.
* This is 4K (HERMON_ICM_SPAN), except for MTTs because they are tiny.
*/
/* macro to make the code below cleaner */
#define init_dependent(rsrc, dep) \
icm[dep].span = icm[rsrc].span; \
icm[dep].num_spans = icm[rsrc].num_spans; \
icm[dep].split_shift = icm[rsrc].split_shift; \
icm[dep].span_mask = icm[rsrc].span_mask; \
icm[dep].span_shift = icm[rsrc].span_shift; \
icm[dep].rsrc_mask = icm[rsrc].rsrc_mask; \
if (hermon_verbose) { \
IBTF_DPRINTF_L2("hermon", "tables_init: " \
"rsrc (0x%x) size (0x%lx) span (0x%x) " \
"num_spans (0x%x)", dep, icm[dep].table_size, \
icm[dep].span, icm[dep].num_spans); \
IBTF_DPRINTF_L2("hermon", "tables_init: " \
"span_shift (0x%x) split_shift (0x%x)", \
icm[dep].span_shift, icm[dep].split_shift); \
IBTF_DPRINTF_L2("hermon", "tables_init: " \
"span_mask (0x%x) rsrc_mask (0x%x)", \
icm[dep].span_mask, icm[dep].rsrc_mask); \
}
static void
hermon_icm_tables_init(hermon_state_t *state)
{
hermon_icm_table_t *icm;
int i, k;
uint32_t per_split;
icm = state->hs_icm;
for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
icm[i].icm_type = i;
icm[i].num_entries = 1 << icm[i].log_num_entries;
icm[i].log_object_size = highbit(icm[i].object_size) - 1;
icm[i].table_size = icm[i].num_entries <<
icm[i].log_object_size;
/* deal with "dependent" resource types */
switch (i) {
case HERMON_AUXC:
#ifdef HERMON_FW_WORKAROUND
icm[i].table_size = 0x80000000ull;
/* FALLTHROUGH */
#endif
case HERMON_CMPT_QPC:
case HERMON_RDB:
case HERMON_ALTC:
init_dependent(HERMON_QPC, i);
continue;
case HERMON_CMPT_SRQC:
init_dependent(HERMON_SRQC, i);
continue;
case HERMON_CMPT_CQC:
init_dependent(HERMON_CQC, i);
continue;
case HERMON_CMPT_EQC:
init_dependent(HERMON_EQC, i);
continue;
}
icm[i].span = HERMON_ICM_SPAN; /* default #rsrc's in 1 span */
if (i == HERMON_MTT) /* Alloc enough MTTs to map 256MB */
icm[i].span = HERMON_ICM_SPAN * 16;
icm[i].num_spans = icm[i].num_entries / icm[i].span;
if (icm[i].num_spans == 0) {
icm[i].span = icm[i].num_entries;
per_split = 1;
icm[i].num_spans = icm[i].num_entries / icm[i].span;
} else {
per_split = icm[i].num_spans / HERMON_ICM_SPLIT;
if (per_split == 0) {
per_split = 1;
}
}
if (hermon_verbose)
IBTF_DPRINTF_L2("ICM", "rsrc %x span %x num_spans %x",
i, icm[i].span, icm[i].num_spans);
/*
* Ensure a minimum table size of an ICM page, and a
* maximum span size of the ICM table size. This ensures
* that we don't have less than an ICM page to map, which is
* impossible, and that we will map an entire table at
* once if it's total size is less than the span size.
*/
icm[i].table_size = max(icm[i].table_size, HERMON_PAGESIZE);
icm[i].span_shift = 0;
for (k = icm[i].span; k != 1; k >>= 1)
icm[i].span_shift++;
icm[i].split_shift = icm[i].span_shift;
for (k = per_split; k != 1; k >>= 1)
icm[i].split_shift++;
icm[i].span_mask = (1 << icm[i].split_shift) -
(1 << icm[i].span_shift);
icm[i].rsrc_mask = (1 << icm[i].span_shift) - 1;
/* Initialize the table lock */
mutex_init(&icm[i].icm_table_lock, NULL, MUTEX_DRIVER,
DDI_INTR_PRI(state->hs_intrmsi_pri));
cv_init(&icm[i].icm_table_cv, NULL, CV_DRIVER, NULL);
if (hermon_verbose) {
IBTF_DPRINTF_L2("hermon", "tables_init: "
"rsrc (0x%x) size (0x%lx)", i, icm[i].table_size);
IBTF_DPRINTF_L2("hermon", "tables_init: "
"span (0x%x) num_spans (0x%x)",
icm[i].span, icm[i].num_spans);
IBTF_DPRINTF_L2("hermon", "tables_init: "
"span_shift (0x%x) split_shift (0x%x)",
icm[i].span_shift, icm[i].split_shift);
IBTF_DPRINTF_L2("hermon", "tables_init: "
"span_mask (0x%x) rsrc_mask (0x%x)",
icm[i].span_mask, icm[i].rsrc_mask);
}
}
}
/*
* hermon_icm_tables_fini()
* Context: Only called from attach() path context
*
* Clean up all icm_tables. Free the bitmap and dma_info arrays.
*/
static void
hermon_icm_tables_fini(hermon_state_t *state)
{
hermon_icm_table_t *icm;
int nspans;
int i, j;
icm = state->hs_icm;
for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
mutex_enter(&icm[i].icm_table_lock);
nspans = icm[i].num_spans;
for (j = 0; j < HERMON_ICM_SPLIT; j++) {
if (icm[i].icm_dma[j])
/* Free the ICM DMA slots */
kmem_free(icm[i].icm_dma[j],
nspans * sizeof (hermon_dma_info_t));
if (icm[i].icm_bitmap[j])
/* Free the table bitmap */
kmem_free(icm[i].icm_bitmap[j],
(nspans + 7) / 8);
}
/* Destroy the table lock */
cv_destroy(&icm[i].icm_table_cv);
mutex_exit(&icm[i].icm_table_lock);
mutex_destroy(&icm[i].icm_table_lock);
}
}
/*
* hermon_icm_dma_init()
* Context: Only called from attach() path context
*/
static int
hermon_icm_dma_init(hermon_state_t *state)
{
hermon_icm_table_t *icm;
hermon_rsrc_type_t type;
int status;
/*
* This routine will allocate initial ICM DMA resources for ICM
* tables that have reserved ICM objects. This is the only routine
* where we should have to allocate ICM outside of hermon_rsrc_alloc().
* We need to allocate ICM here explicitly, rather than in
* hermon_rsrc_alloc(), because we've not yet completed the resource
* pool initialization. When the resource pools are initialized
* (in hermon_rsrc_init_phase2(), see hermon_rsrc.c for more
* information), resource preallocations will be invoked to match
* the ICM allocations seen here. We will then be able to use the
* normal allocation path. Note we don't need to set a refcnt on
* these initial allocations because that will be done in the calls
* to hermon_rsrc_alloc() from hermon_hw_entries_init() for the
* "prealloc" objects (see hermon_rsrc.c for more information).
*/
for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
/* ICM for these is allocated within hermon_icm_alloc() */
switch (type) {
case HERMON_CMPT:
case HERMON_CMPT_QPC:
case HERMON_CMPT_SRQC:
case HERMON_CMPT_CQC:
case HERMON_CMPT_EQC:
case HERMON_AUXC:
case HERMON_ALTC:
case HERMON_RDB:
continue;
}
icm = &state->hs_icm[type];
mutex_enter(&icm->icm_table_lock);
status = hermon_icm_alloc(state, type, 0, 0);
mutex_exit(&icm->icm_table_lock);
if (status != DDI_SUCCESS) {
while (type--) {
icm = &state->hs_icm[type];
mutex_enter(&icm->icm_table_lock);
hermon_icm_free(state, type, 0, 0);
mutex_exit(&icm->icm_table_lock);
}
return (DDI_FAILURE);
}
if (hermon_verbose) {
IBTF_DPRINTF_L2("hermon", "hermon_icm_dma_init: "
"table (0x%x) index (0x%x) allocated", type, 0);
}
}
return (DDI_SUCCESS);
}
/*
* hermon_icm_dma_fini()
* Context: Only called from attach() path context
*
* ICM has been completely unmapped. We just free the memory here.
*/
static void
hermon_icm_dma_fini(hermon_state_t *state)
{
hermon_icm_table_t *icm;
hermon_dma_info_t *dma_info;
hermon_rsrc_type_t type;
int index1, index2;
for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
icm = &state->hs_icm[type];
for (index1 = 0; index1 < HERMON_ICM_SPLIT; index1++) {
dma_info = icm->icm_dma[index1];
if (dma_info == NULL)
continue;
for (index2 = 0; index2 < icm->num_spans; index2++) {
if (dma_info[index2].dma_hdl)
hermon_dma_free(&dma_info[index2]);
dma_info[index2].dma_hdl = NULL;
}
}
}
}
/*
* hermon_hca_port_init()
* Context: Only called from attach() path context
*/
static int
hermon_hca_port_init(hermon_state_t *state)
{
hermon_hw_set_port_t *portinits, *initport;
hermon_cfg_profile_t *cfgprof;
uint_t num_ports;
int i = 0, status;
uint64_t maxval, val;
uint64_t sysimgguid, nodeguid, portguid;
cfgprof = state->hs_cfg_profile;
/* Get number of HCA ports */
num_ports = cfgprof->cp_num_ports;
/* Allocate space for Hermon set port struct(s) */
portinits = (hermon_hw_set_port_t *)kmem_zalloc(num_ports *
sizeof (hermon_hw_set_port_t), KM_SLEEP);
/* Post commands to initialize each Hermon HCA port */
/*
* In Hermon, the process is different than in previous HCAs.
* Here, you have to:
* QUERY_PORT - to get basic information from the HCA
* set the fields accordingly
* SET_PORT - to change/set everything as desired
* INIT_PORT - to bring the port up
*
* Needs to be done for each port in turn
*/
for (i = 0; i < num_ports; i++) {
bzero(&state->hs_queryport, sizeof (hermon_hw_query_port_t));
status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0,
(i + 1), &state->hs_queryport,
sizeof (hermon_hw_query_port_t), HERMON_CMD_NOSLEEP_SPIN);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_CONT, "Hermon: QUERY_PORT (port %02d) "
"command failed: %08x\n", i + 1, status);
goto init_ports_fail;
}
initport = &portinits[i];
state->hs_initport = &portinits[i];
bzero(initport, sizeof (hermon_hw_query_port_t));
/*
* Determine whether we need to override the firmware's
* default SystemImageGUID setting.
*/
sysimgguid = cfgprof->cp_sysimgguid;
if (sysimgguid != 0) {
initport->sig = 1;
initport->sys_img_guid = sysimgguid;
}
/*
* Determine whether we need to override the firmware's
* default NodeGUID setting.
*/
nodeguid = cfgprof->cp_nodeguid;
if (nodeguid != 0) {
initport->ng = 1;
initport->node_guid = nodeguid;
}
/*
* Determine whether we need to override the firmware's
* default PortGUID setting.
*/
portguid = cfgprof->cp_portguid[i];
if (portguid != 0) {
initport->g0 = 1;
initport->guid0 = portguid;
}
/* Validate max MTU size */
maxval = state->hs_queryport.ib_mtu;
val = cfgprof->cp_max_mtu;
if (val > maxval) {
goto init_ports_fail;
}
/* Validate the max port width */
maxval = state->hs_queryport.ib_port_wid;
val = cfgprof->cp_max_port_width;
if (val > maxval) {
goto init_ports_fail;
}
/* Validate max VL cap size */
maxval = state->hs_queryport.max_vl;
val = cfgprof->cp_max_vlcap;
if (val > maxval) {
goto init_ports_fail;
}
/* Validate max GID table size */
maxval = ((uint64_t)1 << state->hs_queryport.log_max_gid);
val = ((uint64_t)1 << cfgprof->cp_log_max_gidtbl);
if (val > maxval) {
goto init_ports_fail;
}
initport->max_guid = (uint16_t)val;
initport->mg = 1;
/* Validate max PKey table size */
maxval = ((uint64_t)1 << state->hs_queryport.log_max_pkey);
val = ((uint64_t)1 << cfgprof->cp_log_max_pkeytbl);
if (val > maxval) {
goto init_ports_fail;
}
initport->max_pkey = (uint16_t)val;
initport->mp = 1;
/*
* Post the SET_PORT cmd to Hermon firmware. This sets
* the parameters of the port.
*/
status = hermon_set_port_cmd_post(state, initport, i + 1,
HERMON_CMD_NOSLEEP_SPIN);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
"failed: %08x\n", i + 1, status);
goto init_ports_fail;
}
/* issue another SET_PORT cmd - performance fix/workaround */
/* XXX - need to discuss with Mellanox */
bzero(initport, sizeof (hermon_hw_query_port_t));
initport->cap_mask = 0x02500868;
status = hermon_set_port_cmd_post(state, initport, i + 1,
HERMON_CMD_NOSLEEP_SPIN);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
"failed: %08x\n", i + 1, status);
goto init_ports_fail;
}
}
/*
* Finally, do the INIT_PORT for each port in turn
* When this command completes, the corresponding Hermon port
* will be physically "Up" and initialized.
*/
for (i = 0; i < num_ports; i++) {
status = hermon_init_port_cmd_post(state, i + 1,
HERMON_CMD_NOSLEEP_SPIN);
if (status != HERMON_CMD_SUCCESS) {
cmn_err(CE_CONT, "Hermon: INIT_PORT (port %02d) "
"comman failed: %08x\n", i + 1, status);
goto init_ports_fail;
}
}
/* Free up the memory for Hermon port init struct(s), return success */
kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
return (DDI_SUCCESS);
init_ports_fail:
/*
* Free up the memory for Hermon port init struct(s), shutdown any
* successfully initialized ports, and return failure
*/
kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
(void) hermon_hca_ports_shutdown(state, i);
return (DDI_FAILURE);
}
/*
* hermon_hca_ports_shutdown()
* Context: Only called from attach() and/or detach() path contexts
*/
static int
hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init)
{
int i, status;
/*
* Post commands to shutdown all init'd Hermon HCA ports. Note: if
* any of these commands fail for any reason, it would be entirely
* unexpected and probably indicative a serious problem (HW or SW).
* Although we do return void from this function, this type of failure
* should not go unreported. That is why we have the warning message.
*/
for (i = 0; i < num_init; i++) {
status = hermon_close_port_cmd_post(state, i + 1,
HERMON_CMD_NOSLEEP_SPIN);
if (status != HERMON_CMD_SUCCESS) {
HERMON_WARNING(state, "failed to shutdown HCA port");
return (status);
}
}
return (HERMON_CMD_SUCCESS);
}
/*
* hermon_internal_uarpg_init
* Context: Only called from attach() path context
*/
static int
hermon_internal_uarpg_init(hermon_state_t *state)
{
int status;
hermon_dbr_info_t *info;
/*
* Allocate the UAR page for kernel use. This UAR page is
* the privileged UAR page through which all kernel generated
* doorbells will be rung. There are a number of UAR pages
* reserved by hardware at the front of the UAR BAR, indicated
* by DEVCAP.num_rsvd_uar, which we have already allocated. So,
* the kernel page, or UAR page index num_rsvd_uar, will be
* allocated here for kernel use.
*/
status = hermon_rsrc_alloc(state, HERMON_UARPG, 1, HERMON_SLEEP,
&state->hs_uarkpg_rsrc);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
/* Setup pointer to kernel UAR page */
state->hs_uar = (hermon_hw_uar_t *)state->hs_uarkpg_rsrc->hr_addr;
/* need to set up DBr tracking as well */
status = hermon_dbr_page_alloc(state, &info);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
state->hs_kern_dbr = info;
return (DDI_SUCCESS);
}
/*
* hermon_internal_uarpg_fini
* Context: Only called from attach() and/or detach() path contexts
*/
static void
hermon_internal_uarpg_fini(hermon_state_t *state)
{
/* Free up Hermon UAR page #1 (kernel driver doorbells) */
hermon_rsrc_free(state, &state->hs_uarkpg_rsrc);
}
/*
* hermon_special_qp_contexts_reserve()
* Context: Only called from attach() path context
*/
static int
hermon_special_qp_contexts_reserve(hermon_state_t *state)
{
hermon_rsrc_t *qp0_rsrc, *qp1_rsrc, *qp_resvd;
int status;
/* Initialize the lock used for special QP rsrc management */
mutex_init(&state->hs_spec_qplock, NULL, MUTEX_DRIVER,
DDI_INTR_PRI(state->hs_intrmsi_pri));
/*
* Reserve contexts for QP0. These QP contexts will be setup to
* act as aliases for the real QP0. Note: We are required to grab
* two QPs (one per port) even if we are operating in single-port
* mode.
*/
status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
HERMON_SLEEP, &qp0_rsrc);
if (status != DDI_SUCCESS) {
mutex_destroy(&state->hs_spec_qplock);
return (DDI_FAILURE);
}
state->hs_spec_qp0 = qp0_rsrc;
/*
* Reserve contexts for QP1. These QP contexts will be setup to
* act as aliases for the real QP1. Note: We are required to grab
* two QPs (one per port) even if we are operating in single-port
* mode.
*/
status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
HERMON_SLEEP, &qp1_rsrc);
if (status != DDI_SUCCESS) {
hermon_rsrc_free(state, &qp0_rsrc);
mutex_destroy(&state->hs_spec_qplock);
return (DDI_FAILURE);
}
state->hs_spec_qp1 = qp1_rsrc;
status = hermon_rsrc_alloc(state, HERMON_QPC, 4,
HERMON_SLEEP, &qp_resvd);
if (status != DDI_SUCCESS) {
hermon_rsrc_free(state, &qp1_rsrc);
hermon_rsrc_free(state, &qp0_rsrc);
mutex_destroy(&state->hs_spec_qplock);
return (DDI_FAILURE);
}
state->hs_spec_qp_unused = qp_resvd;
return (DDI_SUCCESS);
}
/*
* hermon_special_qp_contexts_unreserve()
* Context: Only called from attach() and/or detach() path contexts
*/
static void
hermon_special_qp_contexts_unreserve(hermon_state_t *state)
{
/* Unreserve contexts for spec_qp_unused */
hermon_rsrc_free(state, &state->hs_spec_qp_unused);
/* Unreserve contexts for QP1 */
hermon_rsrc_free(state, &state->hs_spec_qp1);
/* Unreserve contexts for QP0 */
hermon_rsrc_free(state, &state->hs_spec_qp0);
/* Destroy the lock used for special QP rsrc management */
mutex_destroy(&state->hs_spec_qplock);
}
/*
* hermon_sw_reset()
* Context: Currently called only from attach() path context
*/
static int
hermon_sw_reset(hermon_state_t *state)
{
ddi_acc_handle_t hdl = hermon_get_pcihdl(state);
ddi_acc_handle_t cmdhdl = hermon_get_cmdhdl(state);
uint32_t reset_delay;
int status, i;
uint32_t sem;
uint_t offset;
uint32_t data32; /* for devctl & linkctl */
int loopcnt;
/* initialize the FMA retry loop */
hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
hermon_pio_init(fm_loop_cnt2, fm_status2, fm_test2);
/*
* If the configured software reset delay is set to zero, then we
* will not attempt a software reset of the Hermon device.
*/
reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
if (reset_delay == 0) {
return (DDI_SUCCESS);
}
/* the FMA retry loop starts. */
hermon_pio_start(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
fm_test);
hermon_pio_start(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
fm_test2);
/* Query the PCI capabilities of the HCA device */
/* but don't process the VPD until after reset */
status = hermon_pci_capability_list(state, hdl);
if (status != DDI_SUCCESS) {
cmn_err(CE_NOTE, "failed to get pci capabilities list(0x%x)\n",
status);
return (DDI_FAILURE);
}
/*
* Read all PCI config info (reg0...reg63). Note: According to the
* Hermon software reset application note, we should not read or
* restore the values in reg22 and reg23.
* NOTE: For Hermon (and Arbel too) it says to restore the command
* register LAST, and technically, you need to restore the
* PCIE Capability "device control" and "link control" (word-sized,
* at offsets 0x08 and 0x10 from the capbility ID respectively).
* We hold off restoring the command register - offset 0x4 - till last
*/
/* 1st, wait for the semaphore assure accessibility - per PRM */
status = -1;
for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
if (sem == 0) {
status = 0;
break;
}
drv_usecwait(1);
}
/* Check if timeout happens */
if (status == -1) {
/*
* Remove this acc handle from Hermon, then log
* the error.
*/
hermon_pci_config_teardown(state, &hdl);
cmn_err(CE_WARN, "hermon_sw_reset timeout: "
"failed to get the semaphore(0x%p)\n",
(void *)state->hs_cmd_regs.sw_semaphore);
hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_NON_FATAL);
return (DDI_FAILURE);
}
for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
if ((i != HERMON_SW_RESET_REG22_RSVD) &&
(i != HERMON_SW_RESET_REG23_RSVD)) {
state->hs_cfg_data[i] = pci_config_get32(hdl, i << 2);
}
}
/*
* Perform the software reset (by writing 1 at offset 0xF0010)
*/
ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
/*
* This delay is required so as not to cause a panic here. If the
* device is accessed too soon after reset it will not respond to
* config cycles, causing a Master Abort and panic.
*/
drv_usecwait(reset_delay);
/*
* Poll waiting for the device to finish resetting.
*/
loopcnt = 100; /* 100 times @ 100 usec - total delay 10 msec */
while ((pci_config_get32(hdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
if (--loopcnt == 0)
break; /* just in case, break and go on */
}
if (loopcnt == 0)
cmn_err(CE_CONT, "!Never see VEND_ID - read == %X",
pci_config_get32(hdl, 0));
/*
* Restore the config info
*/
for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
if (i == 1) continue; /* skip the status/ctrl reg */
if ((i != HERMON_SW_RESET_REG22_RSVD) &&
(i != HERMON_SW_RESET_REG23_RSVD)) {
pci_config_put32(hdl, i << 2, state->hs_cfg_data[i]);
}
}
/*
* PCI Express Capability - we saved during capability list, and
* we'll restore them here.
*/
offset = state->hs_pci_cap_offset;
data32 = state->hs_pci_cap_devctl;
pci_config_put32(hdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
data32 = state->hs_pci_cap_lnkctl;
pci_config_put32(hdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
pci_config_put32(hdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
/* the FMA retry loop ends. */
hermon_pio_end(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
fm_test2);
hermon_pio_end(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
fm_test);
return (DDI_SUCCESS);
pio_error2:
/* fall through */
pio_error:
hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_NON_FATAL);
return (DDI_FAILURE);
}
/*
* hermon_mcg_init()
* Context: Only called from attach() path context
*/
static int
hermon_mcg_init(hermon_state_t *state)
{
uint_t mcg_tmp_sz;
/*
* Allocate space for the MCG temporary copy buffer. This is
* used by the Attach/Detach Multicast Group code
*/
mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
state->hs_mcgtmp = kmem_zalloc(mcg_tmp_sz, KM_SLEEP);
/*
* Initialize the multicast group mutex. This ensures atomic
* access to add, modify, and remove entries in the multicast
* group hash lists.
*/
mutex_init(&state->hs_mcglock, NULL, MUTEX_DRIVER,
DDI_INTR_PRI(state->hs_intrmsi_pri));
return (DDI_SUCCESS);
}
/*
* hermon_mcg_fini()
* Context: Only called from attach() and/or detach() path contexts
*/
static void
hermon_mcg_fini(hermon_state_t *state)
{
uint_t mcg_tmp_sz;
/* Free up the space used for the MCG temporary copy buffer */
mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
kmem_free(state->hs_mcgtmp, mcg_tmp_sz);
/* Destroy the multicast group mutex */
mutex_destroy(&state->hs_mcglock);
}
/*
* hermon_fw_version_check()
* Context: Only called from attach() path context
*/
static int
hermon_fw_version_check(hermon_state_t *state)
{
uint_t hermon_fw_ver_major;
uint_t hermon_fw_ver_minor;
uint_t hermon_fw_ver_subminor;
#ifdef FMA_TEST
if (hermon_test_num == -1) {
return (DDI_FAILURE);
}
#endif
/*
* Depending on which version of driver we have attached, and which
* HCA we've attached, the firmware version checks will be different.
* We set up the comparison values for both Arbel and Sinai HCAs.
*/
switch (state->hs_operational_mode) {
case HERMON_HCA_MODE:
hermon_fw_ver_major = HERMON_FW_VER_MAJOR;
hermon_fw_ver_minor = HERMON_FW_VER_MINOR;
hermon_fw_ver_subminor = HERMON_FW_VER_SUBMINOR;
break;
default:
return (DDI_FAILURE);
}
/*
* If FW revision major number is less than acceptable,
* return failure, else if greater return success. If
* the major numbers are equal than check the minor number
*/
if (state->hs_fw.fw_rev_major < hermon_fw_ver_major) {
return (DDI_FAILURE);
} else if (state->hs_fw.fw_rev_major > hermon_fw_ver_major) {
return (DDI_SUCCESS);
}
/*
* Do the same check as above, except for minor revision numbers
* If the minor numbers are equal than check the subminor number
*/
if (state->hs_fw.fw_rev_minor < hermon_fw_ver_minor) {
return (DDI_FAILURE);
} else if (state->hs_fw.fw_rev_minor > hermon_fw_ver_minor) {
return (DDI_SUCCESS);
}
/*
* Once again we do the same check as above, except for the subminor
* revision number. If the subminor numbers are equal here, then
* these are the same firmware version, return success
*/
if (state->hs_fw.fw_rev_subminor < hermon_fw_ver_subminor) {
return (DDI_FAILURE);
} else if (state->hs_fw.fw_rev_subminor > hermon_fw_ver_subminor) {
return (DDI_SUCCESS);
}
return (DDI_SUCCESS);
}
/*
* hermon_device_info_report()
* Context: Only called from attach() path context
*/
static void
hermon_device_info_report(hermon_state_t *state)
{
cmn_err(CE_CONT, "?hermon%d: FW ver: %04d.%04d.%04d, "
"HW rev: %02d\n", state->hs_instance, state->hs_fw.fw_rev_major,
state->hs_fw.fw_rev_minor, state->hs_fw.fw_rev_subminor,
state->hs_revision_id);
cmn_err(CE_CONT, "?hermon%d: %64s (0x%016" PRIx64 ")\n",
state->hs_instance, state->hs_nodedesc, state->hs_nodeguid);
}
/*
* hermon_pci_capability_list()
* Context: Only called from attach() path context
*/
static int
hermon_pci_capability_list(hermon_state_t *state, ddi_acc_handle_t hdl)
{
uint_t offset, data;
uint32_t data32;
state->hs_pci_cap_offset = 0; /* make sure it's cleared */
/*
* Check for the "PCI Capabilities" bit in the "Status Register".
* Bit 4 in this register indicates the presence of a "PCI
* Capabilities" list.
*
* PCI-Express requires this bit to be set to 1.
*/
data = pci_config_get16(hdl, 0x06);
if ((data & 0x10) == 0) {
return (DDI_FAILURE);
}
/*
* Starting from offset 0x34 in PCI config space, find the
* head of "PCI capabilities" list, and walk the list. If
* capabilities of a known type are encountered (e.g.
* "PCI-X Capability"), then call the appropriate handler
* function.
*/
offset = pci_config_get8(hdl, 0x34);
while (offset != 0x0) {
data = pci_config_get8(hdl, offset);
/*
* Check for known capability types. Hermon has the
* following:
* o Power Mgmt (0x02)
* o VPD Capability (0x03)
* o PCI-E Capability (0x10)
* o MSIX Capability (0x11)
*/
switch (data) {
case 0x01:
/* power mgmt handling */
break;
case 0x03:
/*
* Reading the PCIe VPD is inconsistent - that is, sometimes causes
* problems on (mostly) X64, though we've also seen problems w/ Sparc
* and Tavor --- so, for now until it's root caused, don't try and
* read it
*/
#ifdef HERMON_VPD_WORKS
hermon_pci_capability_vpd(state, hdl, offset);
#else
delay(100);
hermon_pci_capability_vpd(state, hdl, offset);
#endif
break;
case 0x10:
/*
* PCI Express Capability - save offset & contents
* for later in reset
*/
state->hs_pci_cap_offset = offset;
data32 = pci_config_get32(hdl,
offset + HERMON_PCI_CAP_DEV_OFFS);
state->hs_pci_cap_devctl = data32;
data32 = pci_config_get32(hdl,
offset + HERMON_PCI_CAP_LNK_OFFS);
state->hs_pci_cap_lnkctl = data32;
break;
case 0x11:
/*
* MSIX support - nothing to do, taken care of in the
* MSI/MSIX interrupt frameworkd
*/
break;
default:
/* just go on to the next */
break;
}
/* Get offset of next entry in list */
offset = pci_config_get8(hdl, offset + 1);
}
return (DDI_SUCCESS);
}
/*
* hermon_pci_read_vpd()
* Context: Only called from attach() path context
* utility routine for hermon_pci_capability_vpd()
*/
static int
hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset, uint32_t addr,
uint32_t *data)
{
int retry = 40; /* retry counter for EEPROM poll */
uint32_t val;
int vpd_addr = offset + 2;
int vpd_data = offset + 4;
/*
* In order to read a 32-bit value from VPD, we are to write down
* the address (offset in the VPD itself) to the address register.
* To signal the read, we also clear bit 31. We then poll on bit 31
* and when it is set, we can then read our 4 bytes from the data
* register.
*/
(void) pci_config_put32(hdl, offset, addr << 16);
do {
drv_usecwait(1000);
val = pci_config_get16(hdl, vpd_addr);
if (val & 0x8000) { /* flag bit set */
*data = pci_config_get32(hdl, vpd_data);
return (DDI_SUCCESS);
}
} while (--retry);
/* read of flag failed write one message but count the failures */
if (debug_vpd == 0)
cmn_err(CE_NOTE,
"!Failed to see flag bit after VPD addr write\n");
debug_vpd++;
vpd_read_fail:
return (DDI_FAILURE);
}
/*
* hermon_pci_capability_vpd()
* Context: Only called from attach() path context
*/
static void
hermon_pci_capability_vpd(hermon_state_t *state, ddi_acc_handle_t hdl,
uint_t offset)
{
uint8_t name_length;
uint8_t pn_length;
int i, err = 0;
int vpd_str_id = 0;
int vpd_ro_desc;
int vpd_ro_pn_desc;
#ifdef _BIG_ENDIAN
uint32_t data32;
#endif /* _BIG_ENDIAN */
union {
uint32_t vpd_int[HERMON_VPD_HDR_DWSIZE];
uchar_t vpd_char[HERMON_VPD_HDR_BSIZE];
} vpd;
/*
* Read in the Vital Product Data (VPD) to the extend needed
* by the fwflash utility
*/
for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
err = hermon_pci_read_vpd(hdl, offset, i << 2, &vpd.vpd_int[i]);
if (err != DDI_SUCCESS) {
cmn_err(CE_NOTE, "!VPD read failed\n");
goto out;
}
}
#ifdef _BIG_ENDIAN
/* Need to swap bytes for big endian. */
for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
data32 = vpd.vpd_int[i];
vpd.vpd_char[(i << 2) + 3] =
(uchar_t)((data32 & 0xFF000000) >> 24);
vpd.vpd_char[(i << 2) + 2] =
(uchar_t)((data32 & 0x00FF0000) >> 16);
vpd.vpd_char[(i << 2) + 1] =
(uchar_t)((data32 & 0x0000FF00) >> 8);
vpd.vpd_char[i << 2] = (uchar_t)(data32 & 0x000000FF);
}
#endif /* _BIG_ENDIAN */
/* Check for VPD String ID Tag */
if (vpd.vpd_char[vpd_str_id] == 0x82) {
/* get the product name */
name_length = (uint8_t)vpd.vpd_char[vpd_str_id + 1];
if (name_length > sizeof (state->hs_hca_name)) {
cmn_err(CE_NOTE, "!VPD name too large (0x%x)\n",
name_length);
goto out;
}
(void) memcpy(state->hs_hca_name, &vpd.vpd_char[vpd_str_id + 3],
name_length);
state->hs_hca_name[name_length] = 0;
/* get the part number */
vpd_ro_desc = name_length + 3; /* read-only tag location */
vpd_ro_pn_desc = vpd_ro_desc + 3; /* P/N keyword location */
/* Verify read-only tag and Part Number keyword. */
if (vpd.vpd_char[vpd_ro_desc] != 0x90 ||
(vpd.vpd_char[vpd_ro_pn_desc] != 'P' &&
vpd.vpd_char[vpd_ro_pn_desc + 1] != 'N')) {
cmn_err(CE_NOTE, "!VPD Part Number not found\n");
goto out;
}
pn_length = (uint8_t)vpd.vpd_char[vpd_ro_pn_desc + 2];
if (pn_length > sizeof (state->hs_hca_pn)) {
cmn_err(CE_NOTE, "!VPD part number too large (0x%x)\n",
name_length);
goto out;
}
(void) memcpy(state->hs_hca_pn,
&vpd.vpd_char[vpd_ro_pn_desc + 3],
pn_length);
state->hs_hca_pn[pn_length] = 0;
state->hs_hca_pn_len = pn_length;
cmn_err(CE_CONT, "!vpd %s\n", state->hs_hca_pn);
} else {
/* Wrong VPD String ID Tag */
cmn_err(CE_NOTE, "!VPD String ID Tag not found, tag: %02x\n",
vpd.vpd_char[0]);
goto out;
}
return;
out:
state->hs_hca_pn_len = 0;
}
/*
* hermon_intr_or_msi_init()
* Context: Only called from attach() path context
*/
static int
hermon_intr_or_msi_init(hermon_state_t *state)
{
int status;
/* Query for the list of supported interrupt event types */
status = ddi_intr_get_supported_types(state->hs_dip,
&state->hs_intr_types_avail);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
/*
* If Hermon supports MSI-X in this system (and, if it
* hasn't been overridden by a configuration variable), then
* the default behavior is to use a single MSI-X. Otherwise,
* fallback to using legacy interrupts. Also, if MSI-X is chosen,
* but fails for whatever reasons, then next try MSI
*/
if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
(state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
status = hermon_add_intrs(state, DDI_INTR_TYPE_MSIX);
if (status == DDI_SUCCESS) {
state->hs_intr_type_chosen = DDI_INTR_TYPE_MSIX;
return (DDI_SUCCESS);
}
}
/*
* If Hermon supports MSI in this system (and, if it
* hasn't been overridden by a configuration variable), then
* the default behavior is to use a single MSIX. Otherwise,
* fallback to using legacy interrupts. Also, if MSI is chosen,
* but fails for whatever reasons, then fallback to using legacy
* interrupts.
*/
if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
(state->hs_intr_types_avail & DDI_INTR_TYPE_MSI)) {
status = hermon_add_intrs(state, DDI_INTR_TYPE_MSI);
if (status == DDI_SUCCESS) {
state->hs_intr_type_chosen = DDI_INTR_TYPE_MSI;
return (DDI_SUCCESS);
}
}
/*
* MSI interrupt allocation failed, or was not available. Fallback to
* legacy interrupt support.
*/
if (state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED) {
status = hermon_add_intrs(state, DDI_INTR_TYPE_FIXED);
if (status == DDI_SUCCESS) {
state->hs_intr_type_chosen = DDI_INTR_TYPE_FIXED;
return (DDI_SUCCESS);
}
}
/*
* None of MSI, MSI-X, nor legacy interrupts were successful.
* Return failure.
*/
return (DDI_FAILURE);
}
/*
* hermon_add_intrs()
* Context: Only called from attach() patch context
*/
static int
hermon_add_intrs(hermon_state_t *state, int intr_type)
{
int status;
/* Get number of interrupts/MSI supported */
status = ddi_intr_get_nintrs(state->hs_dip, intr_type,
&state->hs_intrmsi_count);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
/* Get number of available interrupts/MSI */
status = ddi_intr_get_navail(state->hs_dip, intr_type,
&state->hs_intrmsi_avail);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
/* Ensure that we have at least one (1) usable MSI or interrupt */
if ((state->hs_intrmsi_avail < 1) || (state->hs_intrmsi_count < 1)) {
return (DDI_FAILURE);
}
/* Attempt to allocate the maximum #interrupt/MSI handles */
status = ddi_intr_alloc(state->hs_dip, &state->hs_intrmsi_hdl[0],
intr_type, 0, min(HERMON_MSIX_MAX, state->hs_intrmsi_avail),
&state->hs_intrmsi_allocd, DDI_INTR_ALLOC_NORMAL);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
/* Ensure that we have allocated at least one (1) MSI or interrupt */
if (state->hs_intrmsi_allocd < 1) {
return (DDI_FAILURE);
}
state->hs_eq_dist = state->hs_intrmsi_allocd - 1; /* start at 0 */
/*
* Extract the priority for the allocated interrupt/MSI. This
* will be used later when initializing certain mutexes.
*/
status = ddi_intr_get_pri(state->hs_intrmsi_hdl[0],
&state->hs_intrmsi_pri);
if (status != DDI_SUCCESS) {
/* Free the allocated interrupt/MSI handle */
(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
return (DDI_FAILURE);
}
/* Make sure the interrupt/MSI priority is below 'high level' */
if (state->hs_intrmsi_pri >= ddi_intr_get_hilevel_pri()) {
/* Free the allocated interrupt/MSI handle */
(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
return (DDI_FAILURE);
}
/* Get add'l capability information regarding interrupt/MSI */
status = ddi_intr_get_cap(state->hs_intrmsi_hdl[0],
&state->hs_intrmsi_cap);
if (status != DDI_SUCCESS) {
/* Free the allocated interrupt/MSI handle */
(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*
* hermon_intr_or_msi_fini()
* Context: Only called from attach() and/or detach() path contexts
*/
static int
hermon_intr_or_msi_fini(hermon_state_t *state)
{
int status;
int intr;
for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
/* Free the allocated interrupt/MSI handle */
status = ddi_intr_free(state->hs_intrmsi_hdl[intr]);
if (status != DDI_SUCCESS) {
return (DDI_FAILURE);
}
}
return (DDI_SUCCESS);
}
/*ARGSUSED*/
void
hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
uint_t offset)
{
uint32_t msix_data;
uint16_t msg_cntr;
uint32_t t_offset; /* table offset */
uint32_t t_bir;
uint32_t p_offset; /* pba */
uint32_t p_bir;
int t_size; /* size in entries - each is 4 dwords */
/* come in with offset pointing at the capability structure */
msix_data = pci_config_get32(hdl, offset);
cmn_err(CE_CONT, "Full cap structure dword = %X\n", msix_data);
msg_cntr = pci_config_get16(hdl, offset+2);
cmn_err(CE_CONT, "MSIX msg_control = %X\n", msg_cntr);
offset += 4;
msix_data = pci_config_get32(hdl, offset); /* table info */
t_offset = (msix_data & 0xFFF8) >> 3;
t_bir = msix_data & 0x07;
offset += 4;
cmn_err(CE_CONT, " table %X --offset = %X, bir(bar) = %X\n",
msix_data, t_offset, t_bir);
msix_data = pci_config_get32(hdl, offset); /* PBA info */
p_offset = (msix_data & 0xFFF8) >> 3;
p_bir = msix_data & 0x07;
cmn_err(CE_CONT, " PBA %X --offset = %X, bir(bar) = %X\n",
msix_data, p_offset, p_bir);
t_size = msg_cntr & 0x7FF; /* low eleven bits */
cmn_err(CE_CONT, " table size = %X entries\n", t_size);
offset = t_offset; /* reuse this for offset from BAR */
#ifdef HERMON_SUPPORTS_MSIX_BAR
cmn_err(CE_CONT, "First 2 table entries behind BAR2 \n");
for (i = 0; i < 2; i++) {
for (j = 0; j < 4; j++, offset += 4) {
msix_data = ddi_get32(state->hs_reg_msihdl,
(uint32_t *)((uintptr_t)state->hs_reg_msi_baseaddr
+ offset));
cmn_err(CE_CONT, "MSI table entry %d, dword %d == %X\n",
i, j, msix_data);
}
}
#endif
}
/*
* X86 fastreboot support functions.
* These functions are used to save/restore MSI-X table/PBA and also
* to disable MSI-X interrupts in hermon_quiesce().
*/
/* Return the message control for MSI-X */
static ushort_t
get_msix_ctrl(dev_info_t *dip)
{
ushort_t msix_ctrl = 0, caps_ctrl = 0;
hermon_state_t *state = ddi_get_soft_state(hermon_statep,
DEVI(dip)->devi_instance);
ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
ASSERT(pci_cfg_hdl != NULL);
if ((PCI_CAP_LOCATE(pci_cfg_hdl,
PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
return (0);
}
ASSERT(msix_ctrl != 0);
return (msix_ctrl);
}
/* Return the MSI-X table size */
static size_t
get_msix_tbl_size(dev_info_t *dip)
{
ushort_t msix_ctrl = get_msix_ctrl(dip);
ASSERT(msix_ctrl != 0);
return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
PCI_MSIX_VECTOR_SIZE);
}
/* Return the MSI-X PBA size */
static size_t
get_msix_pba_size(dev_info_t *dip)
{
ushort_t msix_ctrl = get_msix_ctrl(dip);
ASSERT(msix_ctrl != 0);
return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8);
}
/* Set up the MSI-X table/PBA save area */
static void
hermon_set_msix_info(hermon_state_t *state)
{
uint_t rnumber, breg, nregs;
ushort_t caps_ctrl, msix_ctrl;
pci_regspec_t *rp;
int reg_size, addr_space, offset, *regs_list, i;
/*
* MSI-X BIR Index Table:
* BAR indicator register (BIR) to Base Address register.
*/
uchar_t pci_msix_bir_index[8] = {0x10, 0x14, 0x18, 0x1c,
0x20, 0x24, 0xff, 0xff};
/* Fastreboot data access attribute */
ddi_device_acc_attr_t dev_attr = {
0, /* version */
DDI_STRUCTURE_LE_ACC,
DDI_STRICTORDER_ACC, /* attr access */
0
};
ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
ASSERT(pci_cfg_hdl != NULL);
if ((PCI_CAP_LOCATE(pci_cfg_hdl,
PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
return;
}
ASSERT(msix_ctrl != 0);
state->hs_msix_tbl_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
PCI_MSIX_TBL_OFFSET);
/* Get the BIR for MSI-X table */
breg = pci_msix_bir_index[state->hs_msix_tbl_offset &
PCI_MSIX_TBL_BIR_MASK];
ASSERT(breg != 0xFF);
/* Set the MSI-X table offset */
state->hs_msix_tbl_offset = state->hs_msix_tbl_offset &
~PCI_MSIX_TBL_BIR_MASK;
/* Set the MSI-X table size */
state->hs_msix_tbl_size = ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
PCI_MSIX_VECTOR_SIZE;
if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
DDI_PROP_DONTPASS, "reg", (int **)&regs_list, &nregs) !=
DDI_PROP_SUCCESS) {
return;
}
reg_size = sizeof (pci_regspec_t) / sizeof (int);
/* Check the register number for MSI-X table */
for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
rp = (pci_regspec_t *)&regs_list[i * reg_size];
addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
offset = PCI_REG_REG_G(rp->pci_phys_hi);
if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
(addr_space == PCI_ADDR_MEM64))) {
rnumber = i;
break;
}
}
ASSERT(rnumber != 0);
state->hs_msix_tbl_rnumber = rnumber;
/* Set device attribute version and access according to Hermon FM */
dev_attr.devacc_attr_version = hermon_devacc_attr_version(state);
dev_attr.devacc_attr_access = hermon_devacc_attr_access(state);
/* Map the entire MSI-X vector table */
if (hermon_regs_map_setup(state, state->hs_msix_tbl_rnumber,
(caddr_t *)&state->hs_msix_tbl_addr, state->hs_msix_tbl_offset,
state->hs_msix_tbl_size, &dev_attr,
&state->hs_fm_msix_tblhdl) != DDI_SUCCESS) {
return;
}
state->hs_msix_pba_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
PCI_MSIX_PBA_OFFSET);
/* Get the BIR for MSI-X PBA */
breg = pci_msix_bir_index[state->hs_msix_pba_offset &
PCI_MSIX_PBA_BIR_MASK];
ASSERT(breg != 0xFF);
/* Set the MSI-X PBA offset */
state->hs_msix_pba_offset = state->hs_msix_pba_offset &
~PCI_MSIX_PBA_BIR_MASK;
/* Set the MSI-X PBA size */
state->hs_msix_pba_size =
((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8;
/* Check the register number for MSI-X PBA */
for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
rp = (pci_regspec_t *)&regs_list[i * reg_size];
addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
offset = PCI_REG_REG_G(rp->pci_phys_hi);
if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
(addr_space == PCI_ADDR_MEM64))) {
rnumber = i;
break;
}
}
ASSERT(rnumber != 0);
state->hs_msix_pba_rnumber = rnumber;
/* Map in the MSI-X Pending Bit Array */
if (hermon_regs_map_setup(state, state->hs_msix_pba_rnumber,
(caddr_t *)&state->hs_msix_pba_addr, state->hs_msix_pba_offset,
state->hs_msix_pba_size, &dev_attr,
&state->hs_fm_msix_pbahdl) != DDI_SUCCESS) {
hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
state->hs_fm_msix_tblhdl = NULL;
return;
}
/* Set the MSI-X table save area */
state->hs_msix_tbl_entries = kmem_alloc(state->hs_msix_tbl_size,
KM_SLEEP);
/* Set the MSI-X PBA save area */
state->hs_msix_pba_entries = kmem_alloc(state->hs_msix_pba_size,
KM_SLEEP);
}
/* Disable Hermon interrupts */
static int
hermon_intr_disable(hermon_state_t *state)
{
ushort_t msix_ctrl = 0, caps_ctrl = 0;
ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
ddi_acc_handle_t msix_tblhdl = hermon_get_msix_tblhdl(state);
int i, j;
ASSERT(pci_cfg_hdl != NULL && msix_tblhdl != NULL);
ASSERT(state->hs_intr_types_avail &
(DDI_INTR_TYPE_FIXED | DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX));
/*
* Check if MSI-X interrupts are used. If so, disable MSI-X interupts.
* If not, since Hermon doesn't support MSI interrupts, assuming the
* legacy interrupt is used instead, disable the legacy interrupt.
*/
if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
(state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
if ((PCI_CAP_LOCATE(pci_cfg_hdl,
PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL,
caps_ctrl, PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
return (DDI_FAILURE);
}
ASSERT(msix_ctrl != 0);
if (!(msix_ctrl & PCI_MSIX_ENABLE_BIT))
return (DDI_SUCCESS);
/* Clear all inums in MSI-X table */
for (i = 0; i < get_msix_tbl_size(state->hs_dip);
i += PCI_MSIX_VECTOR_SIZE) {
for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
char *addr = state->hs_msix_tbl_addr + i + j;
ddi_put32(msix_tblhdl,
(uint32_t *)(uintptr_t)addr, 0x0);
}
}
/* Disable MSI-X interrupts */
msix_ctrl &= ~PCI_MSIX_ENABLE_BIT;
PCI_CAP_PUT16(pci_cfg_hdl, NULL, caps_ctrl, PCI_MSIX_CTRL,
msix_ctrl);
} else {
uint16_t cmdreg = pci_config_get16(pci_cfg_hdl, PCI_CONF_COMM);
ASSERT(state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED);
/* Disable the legacy interrupts */
cmdreg |= PCI_COMM_INTX_DISABLE;
pci_config_put16(pci_cfg_hdl, PCI_CONF_COMM, cmdreg);
}
return (DDI_SUCCESS);
}
/* Hermon quiesce(9F) entry */
static int
hermon_quiesce(dev_info_t *dip)
{
hermon_state_t *state = ddi_get_soft_state(hermon_statep,
DEVI(dip)->devi_instance);
ddi_acc_handle_t pcihdl = hermon_get_pcihdl(state);
ddi_acc_handle_t cmdhdl = hermon_get_cmdhdl(state);
ddi_acc_handle_t msix_tbl_hdl = hermon_get_msix_tblhdl(state);
ddi_acc_handle_t msix_pba_hdl = hermon_get_msix_pbahdl(state);
uint32_t sem, reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
uint64_t data64;
uint32_t data32;
int status, i, j, loopcnt;
uint_t offset;
ASSERT(state != NULL);
/* start fastreboot */
state->hs_quiescing = B_TRUE;
/* If it's in maintenance mode, do nothing but return with SUCCESS */
if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
return (DDI_SUCCESS);
}
/* suppress Hermon FM ereports */
if (hermon_get_state(state) & HCA_EREPORT_FM) {
hermon_clr_state_nolock(state, HCA_EREPORT_FM);
}
/* Shutdown HCA ports */
if (hermon_hca_ports_shutdown(state,
state->hs_cfg_profile->cp_num_ports) != HERMON_CMD_SUCCESS) {
state->hs_quiescing = B_FALSE;
return (DDI_FAILURE);
}
/* Close HCA */
if (hermon_close_hca_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN) !=
HERMON_CMD_SUCCESS) {
state->hs_quiescing = B_FALSE;
return (DDI_FAILURE);
}
/* Disable interrupts */
if (hermon_intr_disable(state) != DDI_SUCCESS) {
state->hs_quiescing = B_FALSE;
return (DDI_FAILURE);
}
/*
* Query the PCI capabilities of the HCA device, but don't process
* the VPD until after reset.
*/
if (hermon_pci_capability_list(state, pcihdl) != DDI_SUCCESS) {
state->hs_quiescing = B_FALSE;
return (DDI_FAILURE);
}
/*
* Read all PCI config info (reg0...reg63). Note: According to the
* Hermon software reset application note, we should not read or
* restore the values in reg22 and reg23.
* NOTE: For Hermon (and Arbel too) it says to restore the command
* register LAST, and technically, you need to restore the
* PCIE Capability "device control" and "link control" (word-sized,
* at offsets 0x08 and 0x10 from the capbility ID respectively).
* We hold off restoring the command register - offset 0x4 - till last
*/
/* 1st, wait for the semaphore assure accessibility - per PRM */
status = -1;
for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
if (sem == 0) {
status = 0;
break;
}
drv_usecwait(1);
}
/* Check if timeout happens */
if (status == -1) {
state->hs_quiescing = B_FALSE;
return (DDI_FAILURE);
}
/* MSI-X interrupts are used, save the MSI-X table */
if (msix_tbl_hdl && msix_pba_hdl) {
/* save MSI-X table */
for (i = 0; i < get_msix_tbl_size(state->hs_dip);
i += PCI_MSIX_VECTOR_SIZE) {
for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
char *addr = state->hs_msix_tbl_addr + i + j;
data32 = ddi_get32(msix_tbl_hdl,
(uint32_t *)(uintptr_t)addr);
*(uint32_t *)(uintptr_t)(state->
hs_msix_tbl_entries + i + j) = data32;
}
}
/* save MSI-X PBA */
for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
char *addr = state->hs_msix_pba_addr + i;
data64 = ddi_get64(msix_pba_hdl,
(uint64_t *)(uintptr_t)addr);
*(uint64_t *)(uintptr_t)(state->
hs_msix_pba_entries + i) = data64;
}
}
/* save PCI config space */
for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
if ((i != HERMON_SW_RESET_REG22_RSVD) &&
(i != HERMON_SW_RESET_REG23_RSVD)) {
state->hs_cfg_data[i] =
pci_config_get32(pcihdl, i << 2);
}
}
/* SW-reset HCA */
ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
/*
* This delay is required so as not to cause a panic here. If the
* device is accessed too soon after reset it will not respond to
* config cycles, causing a Master Abort and panic.
*/
drv_usecwait(reset_delay);
/* Poll waiting for the device to finish resetting */
loopcnt = 100; /* 100 times @ 100 usec - total delay 10 msec */
while ((pci_config_get32(pcihdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
if (--loopcnt == 0)
break; /* just in case, break and go on */
}
if (loopcnt == 0) {
state->hs_quiescing = B_FALSE;
return (DDI_FAILURE);
}
/* Restore the config info */
for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
if (i == 1) continue; /* skip the status/ctrl reg */
if ((i != HERMON_SW_RESET_REG22_RSVD) &&
(i != HERMON_SW_RESET_REG23_RSVD)) {
pci_config_put32(pcihdl, i << 2, state->hs_cfg_data[i]);
}
}
/* If MSI-X interrupts are used, restore the MSI-X table */
if (msix_tbl_hdl && msix_pba_hdl) {
/* restore MSI-X PBA */
for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
char *addr = state->hs_msix_pba_addr + i;
data64 = *(uint64_t *)(uintptr_t)
(state->hs_msix_pba_entries + i);
ddi_put64(msix_pba_hdl,
(uint64_t *)(uintptr_t)addr, data64);
}
/* restore MSI-X table */
for (i = 0; i < get_msix_tbl_size(state->hs_dip);
i += PCI_MSIX_VECTOR_SIZE) {
for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
char *addr = state->hs_msix_tbl_addr + i + j;
data32 = *(uint32_t *)(uintptr_t)
(state->hs_msix_tbl_entries + i + j);
ddi_put32(msix_tbl_hdl,
(uint32_t *)(uintptr_t)addr, data32);
}
}
}
/*
* PCI Express Capability - we saved during capability list, and
* we'll restore them here.
*/
offset = state->hs_pci_cap_offset;
data32 = state->hs_pci_cap_devctl;
pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
data32 = state->hs_pci_cap_lnkctl;
pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
/* restore the command register */
pci_config_put32(pcihdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
return (DDI_SUCCESS);
}