hermon_fm.h revision 9e39c5ba00a55fa05777cc94b148296af305e135
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_IB_ADAPTERS_HERMON_FM_H
#define _SYS_IB_ADAPTERS_HERMON_FM_H
/*
* hermon_fm.h
*/
#include <sys/ddifm.h>
#include <sys/fm/protocol.h>
#include <sys/fm/util.h>
#include <sys/fm/io/ddi.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* HCA FMA compile note.
*
* FMA_TEST is used for HCA function tests, and
* the macro can be on by changing Makefile.
*
* in case of DEBUG
* FMA_TEST is on
*
* in case of non-DEBUG (DEBUG is off)
* FMA_TEST is off
*/
/*
* HCA FM common data structure
*/
/*
* HCA FM Structure
* This structure is used to catch HCA HW errors.
*/
struct i_hca_fm {
uint32_t ref_cnt; /* the number of instances referring to this */
kmutex_t lock; /* protection for last_err & polling thread */
struct i_hca_acc_handle *hdl; /* HCA FM acc handle structure */
struct kmem_cache *fm_acc_cache; /* HCA acc handle cache */
};
/*
* HCA FM acc handle structure
* This structure is holding ddi_acc_handle_t and other members
* to deal with HCA PIO FM.
*/
struct i_hca_acc_handle {
struct i_hca_acc_handle *next; /* next structure */
ddi_acc_handle_t save_hdl; /* acc handle */
kmutex_t lock; /* mutex lock for thread count */
uint32_t thread_cnt; /* number of threads issuing PIOs */
};
_NOTE(SCHEME_PROTECTS_DATA("safe sharing", i_hca_acc_handle::save_hdl))
#define fm_acc_hdl(hdl) (((struct i_hca_acc_handle *)(hdl))->save_hdl)
#define FM_POLL_INTERVAL (10000000) /* 10ms (nano) */
/*
* HCA FM function test structure
* This structure can be used to test the basic fm function test for HCA.
* The test code is included if the FMA_TEST macro is defined.
*/
struct i_hca_fm_test {
int num; /* serial numner */
int type; /* PIO or Hermon specific errors */
#define HCA_TEST_PIO 0x1
#define HCA_TEST_IBA 0x2
int trigger; /* how to trigger a HW error */
#define HCA_TEST_TRANSIENT 0x0001
#define HCA_TEST_PERSISTENT 0x0002
#define HCA_TEST_ATTACH 0x0010
#define HCA_TEST_START 0x0100
#define HCA_TEST_END 0x0200
void (*pio_injection)(struct i_hca_fm_test *, ddi_fm_error_t *);
int errcnt; /* how many transient error occurs */
int line_num; /* line number in the source code */
char *file_name; /* source filename */
char *hash_key; /* hash table for test items */
void *private; /* private data */
};
/*
* Hermon FM data structure
*/
typedef struct i_hca_fm hermon_hca_fm_t;
typedef struct i_hca_acc_handle hermon_acc_handle_t;
typedef struct i_hca_fm_test hermon_test_t;
/*
* The following defines are to supplement device error reporting.
* At each place where the planned FMA error matrix specifies that
* an ereport will be generated, for now there is a HERMON_FMANOTE()
* call generating an appropriate message string.
*/
#define HERMON_FMANOTE(state, string) \
cmn_err(CE_NOTE, "hermon%d: Device Error: %s", \
(state)->hs_instance, string)
/* CQE Syndrome errors - see hermon_cq.c */
#define HERMON_FMA_LOCLEN "CQE local length error"
#define HERMON_FMA_LOCQPOP "CQE local qp operation error"
#define HERMON_FMA_LOCPROT "CQE local protection error"
#define HERMON_FMA_WQFLUSH "CQE wqe flushed in error"
#define HERMON_FMA_MWBIND "CQE memory window bind error"
#define HERMON_FMA_RESP "CQE bad response"
#define HERMON_FMA_LOCACC "CQE local access error"
#define HERMON_FMA_REMREQ "CQE remote invalid request error"
#define HERMON_FMA_REMACC "CQE remote access error"
#define HERMON_FMA_REMOP "CQE remote operation error"
#define HERMON_FMA_XPORTCNT "CQE transport retry counter exceeded"
#define HERMON_FMA_RNRCNT "CQE RNR retry counter exceeded"
#define HERMON_FMA_REMABRT "CQE remote aborted error"
#define HERMON_FMA_UNKN "CQE unknown/reserved error returned"
/* event errors - see hermon_event.c */
#define HERMON_FMA_OVERRUN "EQE cq overrun or protection error"
#define HERMON_FMA_LOCCAT "EQE local work queue catastrophic error"
#define HERMON_FMA_QPCAT "EQE local queue pair catastrophic error"
#define HERMON_FMA_PATHMIG "EQE path migration failed"
#define HERMON_FMA_LOCINV "EQE invalid request - local work queue"
#define HERMON_FMA_LOCACEQ "EQE local access violation"
#define HERMON_FMA_SRQCAT "EQE shared received queue catastrophic"
#define HERMON_FMA_INTERNAL "EQE hca internal error"
/* HCR device failure returns - see hermon_cmd.c */
#define HERMON_FMA_HCRINT "HCR internal error processing command"
#define HERMON_FMA_NVMEM "HCR NVRAM checksum/CRC failure"
#define HERMON_FMA_TOTOG "HCR Timeout waiting for command toggle"
#define HERMON_FMA_GOBIT "HCR Timeout waiting for command go bit"
#define HERMON_FMA_RSRC "HCR Command insufficient resources"
#define HERMON_FMA_CMDINV "HCR Command invalid status returned"
/* HCA initialization errors - see hermon.c */
#define HERMON_FMA_FWVER "HCA firmware not at minimum version"
#define HERMON_FMA_PCIID "HCA PCIe devid not supported"
#define HERMON_FMA_MAINT "HCA device set to memory controller mode"
#define HERMON_FMA_BADNVMEM "HCR bad NVMEM error"
/*
* HCA FM constants
*/
/* HCA FM state */
#define HCA_NO_FM 0x0000 /* HCA FM is not supported */
/* HCA FM state flags */
#define HCA_PIO_FM 0x0001 /* PIO is fma-protected */
#define HCA_DMA_FM 0x0002 /* DMA is fma-protected */
#define HCA_EREPORT_FM 0x0004 /* FMA ereport is available */
#define HCA_ERRCB_FM 0x0010 /* FMA error callback is supported */
#define HCA_ATTCH_FM 0x0100 /* HCA FM attach mode */
#define HCA_RUNTM_FM 0x0200 /* HCA FM runtime mode */
/* HCA ererport type */
#define HCA_SYS_ERR 0x001 /* HW error reported by Solaris FMA */
#define HCA_IBA_ERR 0x002 /* IB specific HW error */
/* HCA ereport detail */
#define HCA_ERR_TRANSIENT 0x010 /* HCA temporary error */
#define HCA_ERR_NON_FATAL 0x020 /* HCA persistent error */
#define HCA_ERR_SRV_LOST 0x040 /* HCA attach failure */
#define HCA_ERR_DEGRADED 0x080 /* HCA maintenance mode */
#define HCA_ERR_FATAL 0x100 /* HCA critical situation */
#define HCA_ERR_IOCTL 0x200 /* EIO */
/* Ignore HCA HW error check */
#define HCA_SKIP_HW_CHK (-1)
/* HCA FM pio retry operation state */
#define HCA_PIO_OK (0) /* No HW errors */
#define HCA_PIO_TRANSIENT (1) /* transient error */
#define HCA_PIO_PERSISTENT (2) /* persistent error */
#define HCA_PIO_RETRY_CNT (3)
/*
* Hermon FM macros
*/
#ifdef FMA_TEST
#define TEST_DECLARE(tst) hermon_test_t *tst;
#define REGISTER_PIO_TEST(st, tst) \
tst = hermon_test_register(st, __FILE__, __LINE__, HCA_TEST_PIO)
#define PIO_START(st, hdl, tst) hermon_PIO_start(st, hdl, tst)
#define PIO_END(st, hdl, cnt, tst) hermon_PIO_end(st, hdl, &cnt, tst)
#else
#define TEST_DECLARE(tst)
#define REGISTER_PIO_TEST(st, tst)
#define PIO_START(st, hdl, tst) hermon_PIO_start(st, hdl, NULL)
#define PIO_END(st, hdl, cnt, tst) hermon_PIO_end(st, hdl, &cnt, NULL)
#endif /* FMA_TEST */
/*
* hermon_pio_init() is a macro initializing variables.
*/
#define hermon_pio_init(cnt, status, tst) \
TEST_DECLARE(tst) \
int status = HCA_PIO_OK; \
int cnt = HCA_PIO_RETRY_CNT
/*
* hermon_pio_start() is one of a pair of macros checking HW errors
* at PIO requests, which should be called before the requests are issued.
*/
#define hermon_pio_start(st, hdl, label, cnt, status, tst) \
if (st->hs_fm_state & HCA_PIO_FM) { \
if (st->hs_fm_async_fatal) { \
hermon_fm_ereport(st, HCA_SYS_ERR, \
HCA_ERR_NON_FATAL); \
goto label; \
} else { \
REGISTER_PIO_TEST(st, tst); \
cnt = HCA_PIO_RETRY_CNT; \
if (PIO_START(st, hdl, tst) == \
HCA_PIO_PERSISTENT) { \
goto label; \
} \
} \
} else { \
status = HCA_SKIP_HW_CHK; \
} \
do {
/*
* hermon_pio_end() is the other of a pair of macros checking HW errors
* at PIO requests, which should be called after the requests end.
* If a HW error is detected and can be isolated well, these macros
* retry the operation to determine if the error is persistent or not.
*/
#define hermon_pio_end(st, hdl, label, cnt, status, tst) \
if (status != HCA_SKIP_HW_CHK) { \
if (st->hs_fm_async_fatal) { \
hermon_fm_ereport(st, HCA_SYS_ERR, \
HCA_ERR_NON_FATAL); \
goto label; \
} \
if ((status = PIO_END(st, hdl, cnt, tst)) == \
HCA_PIO_PERSISTENT) { \
goto label; \
} else if (status == HCA_PIO_TRANSIENT) { \
hermon_fm_ereport(st, HCA_SYS_ERR, \
HCA_ERR_TRANSIENT); \
} \
} \
} while (status == HCA_PIO_TRANSIENT)
extern void hermon_fm_init(hermon_state_t *);
extern void hermon_fm_fini(hermon_state_t *);
extern int hermon_fm_ereport_init(hermon_state_t *);
extern void hermon_fm_ereport_fini(hermon_state_t *);
extern int hermon_get_state(hermon_state_t *);
extern boolean_t hermon_init_failure(hermon_state_t *);
extern boolean_t hermon_cmd_retry_ok(hermon_cmd_post_t *, int);
extern void hermon_fm_ereport(hermon_state_t *, int, int);
extern int hermon_regs_map_setup(hermon_state_t *, uint_t, caddr_t *, offset_t,
offset_t, ddi_device_acc_attr_t *, ddi_acc_handle_t *);
extern void hermon_regs_map_free(hermon_state_t *, ddi_acc_handle_t *);
extern int hermon_pci_config_setup(hermon_state_t *, ddi_acc_handle_t *);
extern void hermon_pci_config_teardown(hermon_state_t *, ddi_acc_handle_t *);
extern ushort_t hermon_devacc_attr_version(hermon_state_t *);
extern uchar_t hermon_devacc_attr_access(hermon_state_t *);
extern int hermon_PIO_start(hermon_state_t *, ddi_acc_handle_t,
hermon_test_t *);
extern int hermon_PIO_end(hermon_state_t *, ddi_acc_handle_t, int *,
hermon_test_t *);
extern ddi_acc_handle_t hermon_rsrc_alloc_uarhdl(hermon_state_t *);
extern ddi_acc_handle_t hermon_get_uarhdl(hermon_state_t *);
extern ddi_acc_handle_t hermon_get_cmdhdl(hermon_state_t *);
extern ddi_acc_handle_t hermon_get_msix_tblhdl(hermon_state_t *);
extern ddi_acc_handle_t hermon_get_msix_pbahdl(hermon_state_t *);
extern ddi_acc_handle_t hermon_get_pcihdl(hermon_state_t *);
extern void hermon_clr_state_nolock(hermon_state_t *, int);
extern void hermon_inter_err_chk(void *);
#ifdef FMA_TEST
extern hermon_test_t *hermon_test_register(hermon_state_t *, char *, int, int);
extern void hermon_test_deregister(void);
extern int hermon_test_num;
#endif /* FMA_TEST */
#ifdef __cplusplus
}
#endif
#endif /* _SYS_IB_ADAPTERS_HERMON_FM_H */