eib_impl.h revision b494511a9cf72b1fc4eb13a0e593f55c624ab829
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#ifndef _SYS_IB_EOIB_EIB_IMPL_H
#define _SYS_IB_EOIB_EIB_IMPL_H
#ifdef __cplusplus
extern "C" {
#endif
/*
* Driver specific constants
*/
#define EIB_E_SUCCESS 0
#define EIB_E_FAILURE -1
#define EIB_MAX_LINE 128
#define EIB_MAX_SGL 59
#define EIB_MAX_POST_MULTIPLE 4
#define EIB_MAX_PAYLOAD_HDR_SZ 160
#define EIB_LOGIN_TIMEOUT_USEC 8000000
#define EIB_RWR_CHUNK_SZ 8
#define EIB_IPHDR_ALIGN_ROOM 32
#define EIB_IP_HDR_ALIGN 2
#define EIB_MAX_RX_PKTS_ONINTR 0x800
#define EIB_MAX_LOGIN_ATTEMPTS 3
#define EIB_MAX_VHUB_TBL_ATTEMPTS 3
#define EIB_MAX_KA_ATTEMPTS 3
#define EIB_MAX_ATTEMPTS 10
#define EIB_DELAY_HALF_SECOND 500000
#define EIB_GRH_SZ (sizeof (ib_grh_t))
/*
* Debug messages
*/
#define EIB_MSGS_CRIT 0x01
#define EIB_MSGS_ERR 0x02
#define EIB_MSGS_WARN 0x04
#define EIB_MSGS_DEBUG 0x08
#define EIB_MSGS_ARGS 0x10
#define EIB_MSGS_PKT 0x20
#define EIB_MSGS_VERBOSE 0x40
#define EIB_LOGSZ_DEFAULT 0x20000
#define EIB_DPRINTF_CRIT eib_dprintf_crit
#define EIB_DPRINTF_ERR eib_dprintf_err
#define EIB_DPRINTF_WARN eib_dprintf_warn
#ifdef EIB_DEBUG
#define EIB_DPRINTF_DEBUG eib_dprintf_debug
#define EIB_DPRINTF_ARGS eib_dprintf_args
#define EIB_DPRINTF_PKT eib_dprintf_pkt
#else
#define EIB_DPRINTF_DEBUG 0 &&
#define EIB_DPRINTF_ARGS 0 &&
#define EIB_DPRINTF_PKT 0 &&
#define EIB_DPRINTF_VERBOSE 0 &&
#endif
/*
* EoIB threads to provide various services
*/
#define EIB_EVENTS_HDLR "eib_events_handler"
#define EIB_RWQES_REFILLER "eib_rwqes_refiller"
#define EIB_VNIC_CREATOR "eib_vnic_creator"
#define EIB_TXWQES_MONITOR "eib_txwqe_monitor"
#define EIB_LSOBUFS_MONITOR "eib_lsobufs_monitor"
/*
* Macro for finding the least significant bit set in a 64-bit unsigned int
*/
/*
* LSO buffers
*
* Under normal circumstances we should never need to use any buffer
* that's larger than MTU. Unfortunately, IB HCA has limitations
* on the length of SGL that are much smaller than those for regular
* ethernet NICs. Since the network layer doesn't care to limit the
* number of mblk fragments in any send mp chain, we end up having to
* use these larger buffers occasionally.
*/
#define EIB_LSO_MAXLEN 65536
#define EIB_LSO_BUFSZ 8192
#define EIB_LSO_NUM_BUFS 1024
typedef struct eib_lsobuf_s {
struct eib_lsobuf_s *lb_next;
int lb_isfree;
} eib_lsobuf_t;
typedef struct eib_lsobkt_s {
} eib_lsobkt_t;
#define EIB_LBUF_SHORT 0x1
#define EIB_LBUF_MONITOR_DIE 0x2
/*
* The admin partition is only used for sending login and logout messages
* and receiving login acknowledgements from the gateway. While packets
* going out on several vlans at the same time could result in multiple
* vnic creations happening at the same time (and therefore multiple login
* packets), we serialize the vnic creation via the vnic creator thread, so
* we shouldn't need a lot of send wqes or receive wqes. Note also that we
* keep the cq size request to slightly less than a 2^n boundary to allow
* the alloc cq routine to return the closest 2^n boundary as the real cq
* size without wasting too much memory.
*/
#define EIB_ADMIN_MAX_SWQE 30
#define EIB_ADMIN_MAX_RWQE 30
/*
* The control qp is per vhub partition, and is used to send and receive
* update response and vnic alive messages. While the vhub table response
* and vhub update messages might take a few rwqes, the vhub table request
* is made only once per vnic, and the vnic alive message is periodic
* and uses a single swqe as well. Per vnic, we should certainly not need
*/
#define EIB_CTL_MAX_SWQE 30
#define EIB_CTL_MAX_RWQE 30
/*
* For the vNIC's data channel, there are three items that are of importance:
* the constraints defined below, the hca_max_chan_sz attribute and the value of
* of these three values.
*
* While the total number of RWQEs posted to the data channel of any vNIC will
* not exceed EIB_DATA_MAX_RWQE, we also do not want to acquire and post all of
* it during the data channel initialization, since that is a lot of wqes for
* one vnic to consume when we don't even know if the vnic will need it at all.
* We post an initial set of EIB_DATA_RWQE_BKT rwqes, and slowly post more and
* more sets as we see them being consumed, until we hit the hard limit of
* EIB_DATA_MAX_RWQE.
*/
#define EIB_DATA_MAX_SWQE 4000
#define EIB_DATA_MAX_RWQE 4000
#define EIB_DATA_RWQE_BKT 512
/*
* vNIC data channel CQ moderation parameters
*/
#define EIB_TX_COMP_COUNT 10
#define EIB_TX_COMP_USEC 300
#define EIB_RX_COMP_COUNT 4
#define EIB_RX_COMP_USEC 10
/*
* qe_info masks (blk:ndx:type:flags)
*/
#define EIB_WQEBLK_SHIFT 24
#define EIB_WQEBLK_MASK 0xFF
#define EIB_WQENDX_SHIFT 16
#define EIB_WQENDX_MASK 0xFF
#define EIB_WQETYP_SHIFT 8
#define EIB_WQETYP_MASK 0xFF
#define EIB_WQEFLGS_SHIFT 0
#define EIB_WQEFLGS_MASK 0xFF
/*
* Macros to get the bit fields from qe_info
*/
/*
* Values for type and flags in qe_info
*/
#define EIB_WQE_TX 0x1
#define EIB_WQE_RX 0x2
/*
*/
#define EIB_WQE_FLG_POSTED_TO_HCA 0x1
#define EIB_WQE_FLG_WITH_NW 0x2
/*
*/
#define EIB_WQE_FLG_BUFTYPE_LSO 0x4
#define EIB_WQE_FLG_BUFTYPE_MAPPED 0x8
/*
*/
typedef struct eib_wqe_s {
struct eib_wqe_pool_s *qe_pool;
int qe_vnic_inst;
struct eib_wqe_s *qe_nxt_post;
struct eib_chan_s *qe_chan;
} eib_wqe_t;
/*
* logic.
*
* Each set of 64 wqes (a "wqe block") is managed by a single 64-bit
* integer bitmap. The free status of a set of 64 such wqe blocks (a
* "wqe pool") is managed by one 64-bit integer bitmap (if any wqe in
* the wqe block is free, the bit in the map is 1, otherwise it is 0).
*
* The maximum pool size is 4096 wqes, but this can easily be extended
* to support more wqes using additional pools of wqes.
*
* Note that an entire pool of wqes is allocated via a single allocation,
* for a wqe pool are also allocated via a single allocation.
*/
#define EIB_BLKS_PER_POOL 64
#define EIB_WQE_SZ (sizeof (eib_wqe_t))
typedef struct eib_wqe_pool_s {
struct eib_wqe_pool_s *wp_next;
int wp_type;
/*
* Values for wp_type
*/
#define EIB_WP_TYPE_TX 0x1
#define EIB_WP_TYPE_RX 0x2
/*
* Values for wp_status (bit fields)
*/
/*
* The low-water-mark is an indication of when wqe grabs for low-priority
* qps should start to get refused (swqe grabs for control messages such
* as keepalives and rwqe grabs for posting back to control qps will still
* be allowed). The high-water-mark is an indication of when normal
* behavior should resume.
*/
/*
* The "rwqes low" is used to determine when we should start using allocb()
* to copy and send received mblks in the rx path. It should be a little
* above the rwqes low-water-mark, but less than the high-water-mark.
*/
#define EIB_NFREE_RWQES_LOW \
/*
* Multicast GID Layout: the multicast gid is specified in big-endian
* representation, as a collection of different-sized fields in the
* EoIB specification. On Solaris, the multicast gid is represented
* as a collection of two 8-byte fields (in ib_gid_t).
*/
typedef struct eib_mgid_spec_s {
/*
* Values for sp_type in mgid as per EoIB specification
*/
#define EIB_MGID_VHUB_DATA 0x0
#define EIB_MGID_VHUB_UPDATE 0x2
#define EIB_MGID_VHUB_TABLE 0x3
typedef union eib_mgid_s {
} eib_mgid_t;
/*
* Gateway properties handed over to us by the EoIB nexus
*/
typedef struct eib_gw_props_s {
/*
* Port-specific properties
*/
typedef struct eib_props_s {
} eib_props_t;
/*
* Capabilities derived from HCA attributes
*/
typedef struct eib_caps_s {
int cp_resv_lkey_capab;
} eib_caps_t;
/*
* List of multicast groups the vnic joined
*/
typedef struct eib_mcg_s {
} eib_mcg_t;
/*
*/
typedef struct eib_chan_s {
int ch_vnic_inst;
struct eib_wqe_s *ch_tx_tail;
struct eib_mcg_s *ch_vhub_table;
struct eib_mcg_s *ch_vhub_update;
struct eib_mcg_s *ch_vhub_data;
struct eib_chan_s *ch_rxpost_next;
} eib_chan_t;
/*
* States for vNIC state machine during login
*/
#define EIB_LOGIN_INIT 0
#define EIB_LOGIN_ACK_WAIT 1
#define EIB_LOGIN_ACK_RCVD 2
#define EIB_LOGIN_NACK_RCVD 3
#define EIB_LOGIN_TBL_WAIT 4
#define EIB_LOGIN_TBL_INPROG 5
#define EIB_LOGIN_TBL_DONE 6
#define EIB_LOGIN_TBL_FAILED 7
#define EIB_LOGIN_DONE 8
#define EIB_LOGIN_TIMED_OUT 9
#define EIB_LOGOUT_DONE 10
typedef struct eib_login_data_s {
/*
* Map to translate between DMAC and {qpn, lid, sl}
*/
typedef struct eib_vhub_map_s {
struct eib_vhub_map_s *mp_next;
/*
* Per-vNIC vHUB Table
*/
#define EIB_TB_NBUCKETS 13
typedef struct eib_vhub_table_s {
struct eib_vhub_map_s *tb_gateway;
struct eib_vhub_map_s *tb_unicast_miss;
struct eib_vhub_map_s *tb_vhub_multicast;
typedef struct eib_vhub_update_s {
typedef struct eib_ether_hdr_s {
int eh_tagless;
/*
* vNIC Information
*/
typedef struct eib_vnic_s {
int vn_instance;
struct eib_login_data_s vn_login_data;
struct eib_vhub_table_s *vn_vhub_table;
struct eib_vhub_update_s *vn_vhub_update;
} eib_vnic_t;
/*
* bits. Access to the rest of the mac state is protected by these
* two bits.
*/
#define EIB_NIC_STARTING 0x01
#define EIB_NIC_STOPPING 0x02
#define EIB_NIC_STARTED 0x80
typedef struct eib_node_state_s {
/*
* MIB-II statistics to report to the mac layer
*/
typedef struct eib_stats_s {
} eib_stats_t;
/*
* Cache of address vectors with dlid as the key. Currently we use
* eib state structure's ei_lock to protect the individual address
* vector's fields. This is a lock granularity that's slightly
* bigger than ideal, but it should do for now.
*/
#define EIB_AV_NBUCKETS 17
typedef struct eib_avect_s {
struct eib_avect_s *av_next;
} eib_avect_t;
/*
* vNIC creation and deletion are serialized by a non-zero value
* to the ei_vnic_state member (i.e. only one vnic may be created
* the ei_active_vnics member only after a successful setting of
* ei_vnic_state.
*/
#define EIB_VN_BEING_CREATED 0x01
#define EIB_VN_BEING_DELETED 0x02
/*
* All possible EoIB event work items that need to be handled
*/
#define EIB_EV_NONE 0
#define EIB_EV_PORT_DOWN 1
#define EIB_EV_PORT_UP 2
#define EIB_EV_PKEY_CHANGE 3
#define EIB_EV_SGID_CHANGE 4
#define EIB_EV_CLNT_REREG 5
#define EIB_EV_GW_EPORT_DOWN 6
#define EIB_EV_GW_DOWN 7
#define EIB_EV_GW_UP 8
#define EIB_EV_GW_INFO_UPDATE 9
#define EIB_EV_MCG_DELETED 10
#define EIB_EV_MCG_CREATED 11
#define EIB_EV_SHUTDOWN 12
typedef struct eib_event_s {
struct eib_event_s *ev_next;
void *ev_arg;
} eib_event_t;
/*
* Work element for new vnic creation
*/
typedef struct eib_vnic_req_s {
struct eib_vnic_req_s *vr_next;
/*
* Values for vr_req
*/
#define EIB_CR_REQ_NEW_VNIC 1
#define EIB_CR_REQ_FLUSH 2
#define EIB_CR_REQ_DIE 3
/*
* Work element for vnics kept alive by the keepalive manager thread
* and bitfield values for ei_ka_vnics_event.
*/
typedef struct eib_ka_vnics_s {
struct eib_ka_vnics_s *ka_next;
struct eib_vnic_s *ka_vnic;
#define EIB_KA_VNICS_DIE 0x1
#define EIB_KA_VNICS_TIMED_OUT 0x2
/*
* EoIB per-instance state
*/
typedef struct eib_s {
struct eib_gw_props_s *ei_gw_props;
struct eib_props_s *ei_props;
struct eib_caps_s *ei_caps;
struct eib_stats_s *ei_stats;
struct eib_node_state_s *ei_node_state;
struct eib_chan_s *ei_admin_chan;
struct eib_wqe_pool_s *ei_tx;
struct eib_wqe_pool_s *ei_rx;
struct eib_lsobkt_s *ei_lso;
struct eib_vnic_s *ei_vnic_pending;
struct eib_event_s *ei_event;
struct eib_chan_s *ei_rxpost;
struct eib_vnic_req_s *ei_vnic_req;
struct eib_vnic_req_s *ei_failed_vnic_req;
struct eib_vnic_req_s *ei_pending_vnic_req;
struct eib_ka_vnics_s *ei_ka_vnics;
} eib_t;
/*
* Private read-only datalink properties
*/
#define EIB_DLPROP_GW_EPORT_STATE "_eib_eport_state"
#define EIB_DLPROP_HCA_GUID "_eib_hca_guid"
#define EIB_DLPROP_PORT_GUID "_eib_port_guid"
/*
* FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE
*/
/*
* FIP protocol related
*/
/*
* Service threads and other handlers
*/
extern void eib_events_handler(eib_t *);
extern void eib_refill_rwqes(eib_t *);
extern void eib_vnic_creator(eib_t *);
extern void eib_monitor_tx_wqes(eib_t *);
extern void eib_monitor_lso_bufs(eib_t *);
extern void eib_manage_keepalives(eib_t *);
extern void eib_stop_events_handler(eib_t *);
extern void eib_stop_refill_rwqes(eib_t *);
extern void eib_stop_vnic_creator(eib_t *);
extern void eib_stop_monitor_tx_wqes(eib_t *);
extern void eib_stop_manage_keepalives(eib_t *);
extern void eib_flush_vnic_reqs(eib_t *);
/*
* Admin QP related
*/
extern int eib_adm_setup_qp(eib_t *, int *);
extern void eib_rb_adm_setup_qp(eib_t *);
/*
* Control QP related
*/
/*
* Data QP related
*/
extern void eib_data_rx_recycle(caddr_t);
boolean_t *);
eib_ether_hdr_t *);
/*
* Resource related
*/
extern int eib_rsrc_setup_bufs(eib_t *, int *);
extern void eib_rsrc_txwqes_needed(eib_t *);
extern void eib_rsrc_lsobufs_needed(eib_t *);
/*
* IBT related
*/
extern int eib_ibt_hca_init(eib_t *);
extern void eib_ibt_link_mod(eib_t *);
extern void eib_ibt_free_avects(eib_t *);
/*
* Chan related
*/
extern eib_chan_t *eib_chan_init(void);
extern void eib_chan_fini(eib_chan_t *);
/*
* Mac layer related
*/
extern int eib_mac_start(eib_t *);
extern void eib_mac_stop(eib_t *);
/*
* VNIC related
*/
boolean_t, int *);
extern void eib_vnic_rejoin_mcgs(eib_t *);
/*
* Logging and other stuff
*/
extern void eib_debug_init(void);
extern void eib_debug_fini(void);
extern void eib_dprintf_crit(int, const char *fmt, ...);
extern void eib_dprintf_err(int, const char *fmt, ...);
extern void eib_dprintf_warn(int, const char *fmt, ...);
#ifdef EIB_DEBUG
extern void eib_dprintf_debug(int, const char *fmt, ...);
extern void eib_dprintf_args(int, const char *fmt, ...);
extern void eib_dprintf_verbose(int, const char *fmt, ...);
#endif
extern int eib_get_props(eib_t *);
extern void eib_rb_get_props(eib_t *);
/*
* EoIB specific global variables
*/
extern ib_gid_t eib_reserved_gid;
extern uint8_t eib_zero_mac[];
extern uint8_t eib_broadcast_mac[];
extern int eib_setbit_mod67[];
extern char *eib_pvt_props[];
/*
*/
extern int eib_wa_no_desc_list_len;
extern int eib_wa_no_cksum_offload;
extern int eib_wa_no_lso;
extern int eib_wa_no_mcast_entries;
extern int eib_wa_no_av_discover;
extern int eib_wa_no_good_vp_flag;
extern int eib_wa_no_good_vhub_cksum;
/*
* Miscellaneous externs
*/
extern void freemsgchain(mblk_t *);
extern pri_t minclsyspri;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_IB_EOIB_EIB_IMPL_H */