/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
*/
#ifndef _SYS_IB_MGT_IBCM_IBCM_IMPL_H
#define _SYS_IB_MGT_IBCM_IBCM_IMPL_H
/*
*
* This file contains all of the internal data structures and
* definitions for IBCM.
*
* The general state transition processing of CM is achieved by the
* following callgraph:
*
* CM INIT : Register for hca attach and detach callbacks, and other asyncs
*
* On new HCA attach: Register with IBMF on all ports of upcoming HCA
* Specify CM callback and callback "per HCA arg"
* Register with SA, allocate AVL trees etc.
*
* IBMF Callback
* Validate combination of method and attribute Id in the generic MAD hdr
* -> Call CM Connection state transition function based on attribute ID
* Handle duplicate messages and MRA to adjust timers etc.
* Handle stale connections
* Allocate reply MADs
* Optionally fill up some fields of response MAD
* Post reply MADs
* Store reply MADs and reply MAD address, if necessary
* Initialize timeouts for the message
* Change CM state
* Deallocate reply MADs
*
* NOTES:
* o There are *NO* explicit CM allocation and deallocation routines for
* CM MADs and state data structures
* o CM timeouts are scheduled using timeout(9f), and cancelled using
* untimeout(9f)
* o svc_id allocation scheme
* A new counter for svcid is maintained in ibcm_hca_info_t
* which is used to allocate svcid. The svcids are incremented
* sequentially and allocated (with wrap around on overflow) with
* these considerations:
* The WellKnown service id's and locally allocated svcid's
* could be maintained in separate lists, thus allowing the
* lists to be kept apart and sorted easily.
* The insertions are done at the end of the list
* o reqid allocation scheme
* The list is a sorted one (as reqid's are allocated sequentially).
* If there is a code required for wrap around, it would search for
* a reqid from the head of the list.
* The insertions are always done at the end of the lists
* o XXX svc_id allocation scheme and req_id allocation scheme will
* be revisited.
*/
#include <sys/sysmacros.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Defines for all CM state machine states, as defined in
* section 12.9.7. IBCM_REJ_SENT is a state not defined in
* the spec and is added for implementation purposes.
*/
typedef enum ibcm_conn_state_e {
/* Initial states */
IBCM_STATE_IDLE = 0,
/* States during connection establishment */
/* States during connection establishment failures */
/* Established state */
/* States during connection teardown */
/* states for UD side of things */
/* states common to RC and UD, during state resource deletion */
typedef enum ibcm_ap_state_e {
/*
*/
typedef enum ibcm_event_type_e {
/* remote comid */
/*
* IBMF calls back into CM on only the first 11 events defined in
* ibcm_event_type_t. CM has pre-defined functions for these 11 events
*
*/
/*
* CM message attribute IDs begin at this "base ID". The first 11 event types
* in ibcm_event_type_t are CM protocol messages that are posted to IBMF by
* adding the "base_id" to the respective event type value. By subtracting
* the "base_id" in IBMF callback in CM MAD, the message type is gotten back
*/
/*
* Maximum number of com ids / req ids that can be active at any given time
* MUST ENSURE THAT (INITIAL ID + MAX IDS -1), for any of the IDs does not
* exceed the max 32 bit
*/
/* An hca can have max of 2^24 -2 RC connections */
/*
* Defines the CM Mode of operation for a connection
*/
typedef enum ibcm_mode_e {
} ibcm_mode_t;
/* different IBCM return values */
typedef enum ibcm_status_e {
/*
* Struct definition for addressing information that CM maintains for
* each of the incoming MADs
*/
typedef struct ibcm_mad_addr {
/* or on which MAD shall be */
/* sent out */
typedef enum ibcm_abort_flag_e {
typedef enum ibcm_isync_e {
} ibcm_isync_t;
/*
* Define a connection state structure, used by the IBTF CM
* to maintain state about connected QPs.
*
* state : CM connection state
* state_mutex : lock for this structure
* channel : Channel associated with this RC state structure
* ref_cnt : Number of active threads that may reference this
* state structure
* svcid : Service ID
* cm_handler : Client handler callback address
* stored_reply_addr : Address for replying using the stored mad
* hcap : A pointer to the HCA's entry
* mra_msg : Stores the response MRA MAD
* dreq_msg : Stores the DREQ MAD
* drep_msg : Stores the DREP MAD
* detect duplicate LAP messages
* local_comid : Local communication id
* local_hca_guid : Local HCA GUID
* local_qpn : Local QPN
*
* remote_comid : Remote communication id
* remote_hca_guid : Remote HCA GUID
* remote_qpn : Remote QPN
*
* timerid : Timer id for the timeout either for re-sending the
* stored mad or deleting the stored mad
* A REP response to an incoming REQ
* An outgoing REQ on active connection side
* timer_value : Time for any of the above timers in HZ
* pkt_life_time : pkt life time from source to destination
* remote_ack_delay : Remote hca's ack delay in clock_t
* rc_alt_pkt_lt : Life time for new ALT path specified in LAP
* stale_clock : clock used to detect stale vs duplicate REQs
* timer_stored_state : state of connection for timeout() validation
* timer_stored_ap_state: CM ap_state for timeout validation
* remaining_retry_count: Remaining count for retries ie., posting stored MADs
* delete_mra_msg : Set to TRUE for deletion, if MRA re-send in progress
* resend_mra_mad : B_TRUE, if a MRA mad re-sens is in progress
* cep_retry_cnt : Retry count for CEP.
* stale : B_TRUE, if connection has become stale
* blocking_done : B_TRUE, if cv_signal been issued to block_client_cv
* clnt_hdl : Clnt_hdl passed in ibt_open_channel
* return_data : RC return args, valid for blocking
* ibt_open_channel
* drep_priv_data; : The pointer to client specified outgoing private
* data, from close channel API call
* drep_priv_data_len : The length of DREP private data that client would
* like to be returned from close channel API call
* delete_state_data : B_TRUE, if CM decides to delete state data, but
* there is some thread that could access state data
*
* avl_active_link : For inserting this state-data into active AVL tree
* avl_passive_link : For inserting this state-data into passive AVL tree
* Note : All timer values that are of type "clock_t" below are in usecs
*/
typedef struct ibcm_state_data_s {
/* for AVL tree */
/* remote stuff */
/* local stuff */
/* ref_cnt so others cannot delete a statep that may be referenced */
int ref_cnt;
void *defer_cm_msg;
/* timeout related stuff */
/* some cep stuff, stored here temporarily during connection est */
/* Clients' information */
void *state_cm_private;
/* pointer to service info */
/* Data for recycle function */
/* Return data pointers in various cm api calls */
/* for queuing of open_rc_channel requests */
/* for queuing of non-blocking close_rc_channel requests */
/* For ibt_ofuvcm_get_req_data() */
void *req_msgp;
/* Stored RNR retry count from incoming REQ or REP */
conn_trace}))
/*
* Definitions for send mad flags. Respective bits in send_mad_flags or
* ud_send_mad_flags are set to 1, during MAD transmission, and reset in
* ibmf send completion callback or on completion of a blocking ibmf mad post.
*/
/* MADs that are retransmitted only because of a timeout */
#define IBCM_REF_CNT_DECR(s) \
ibcm_add_tlist(s);\
} \
/*
*/
/*
* handles, using the CM private data. These call into IBTL.
* The WAIT and RELEASE macros deal with related issues that
* require use of the same lock within IBTL.
*/
s = ibtl_cm_get_chan_private(ch); \
} else \
s = NULL;
ibtl_cm_set_chan_private(ch, (void *)(s)); \
}
}
/* In future, if we intend to change it to realtime_timeout, it's easy */
extern void ibcm_close_enter(void);
extern void ibcm_close_exit(void);
extern void ibcm_lapr_enter(void);
extern void ibcm_lapr_exit(void);
extern void ibcm_check_for_opens(void);
extern void ibcm_check_for_async_close(void);
extern void ibcm_run_tlist_thread(void);
/*
* Structures & defines for SIDR
*/
/*
* Define a connection state structure, used for SIDR REQ and REP
* (ibcm_ud_state_data_t - struct for SIDR connection)
*
* ud_state: CM connection state (See ibcm_conn_state_t)
* ud_req_id: Request ID
* ud_svcid: Service ID
* ud_state_mutex: CM connection state
*
* ud_max_cm_retries: Max retry count for sending a SIDR REQ
* ud_ref_cnt: State ref count for not deleting accidentally
* ud_remaining_retry_count: Remaining count for retries ie., posting
* stored MADs
* ud_cm_handler: Server's handler callback address
*
* ud_nextp: CM link for IBTF list
* ud_hcap: A pointer to the HCA's entry
*
* ud_timerid: Timer id for the timeout either for re-sending the
* stored mad or deleting the stored mad
* Ex: A SIDR REP response for an incoming SIDR REQ
* An outgoing SIDR REQ on active connection side
* ud_timer_value: Time for any of the above timers in HZ
* ud_pkt_life_time: pkt life time from source to destination
* ud_stored_reply_addr: Address for replying using the stored mad
*
* ud_sidr_req_lid: SIDR REQ sender's port LID
* ud_sidr_req_gid: SIDR REQ sender's port GID
* ud_grh_exists: TRUE if GRH exists in the incoming SIDR REQ
*
* ud_passive_qpn: QPN allocated by server for a SIDR REQ
* ud_passive_qpn_qkey: QPN's QKEY allocated by server
*
* ud_block_client_cv: CV condition variable on which ibt_ud_get_dqpn() waits,
* if called in blocking mode.
* ud_return_data: UD return args, valid for blocking ibt_ud_get_dqpn
* ud_timer_stored_state: State stored for timeout handling
* ud_blocking_done : Tells if cv_wait is needed or not. To handle the
* case where a cv_signal is received prior to its
* cv_wait().
* Note : All timer values that are of type "clock_t" below are in usec
*/
typedef struct ibcm_ud_state_data_s {
int ud_ref_cnt;
/* timeout related stuff */
/* SIDR REQ side related */
/* Clients' information */
void *ud_state_cm_private;
/* The following fields are not used by server side connection */
/*
* Structure used to specify the SIDR search parameters
*/
typedef struct ibcm_sidr_srch_s {
/*
*/
#define IBCM_UD_REF_CNT_DECR(s) \
ibcm_add_ud_tlist(s);\
} \
ASSERT(s->ud_ref_cnt >= 0);
/*
* Structure to store the Service Registration and Service Bind entries.
*
* Well known service id's are unique on a given HCA, but can be registered
* only at some GID's. Hence can be multiple GID's per Service ID. For each
* such GID and PKEY combination registered, there will be an ibcm_svc_info_t
* entry in the CM global service list.
*
* Annex A of the spec constrains that there shall be one service provider per
* service id, which implies same svc_rc_handler for all such entries
* There can be multiple transport types (svc_tran_type) per Service ID. For
* each such transport type, there will be an ibcm_svc_info_t entry in the
* CM global service list and cm handler can be different
*
* For locally allocated service id's (maintained by OS), there can be only
* one GID, where the service can be registered
*
* svc_id: Service ID
* svc_num_sids: Number (Range) of service-ids supported
* svc_flags: Service flags specified at registration time
* svc_link: Global AVL tree of ibcm_svc_info_t structs
* svc_rc_handler: Server handler for RC (only one is valid at a time)
* svc_ud_handler: Server handler for UD (only one is valid at a time)
* svc_ref_cnt: Reference count
* svc_to_delete: If 1, then the entry is marked to be deleted
*
* sbind_gid: GID
* sbind_pkey: P_Key
* sbind_lease: Service Lease
* sbind_name: Service Name
*/
typedef struct ibcm_svc_info_s {
int svc_ref_cnt;
int svc_to_delete;
int svc_num_sids;
typedef struct ibcm_svc_bind_s {
void *sbind_cm_private;
/* sbind_data is assumed to be 8-byte aligned */
/*
* is the master, etc.). When any of the above occurs, a PORT_UP
* async event is supposed to occur, at which point we mark all of
* our service record information as stale (REWRITE_NEEDED), and
* subsequently make the necessary sa_update calls to get the
*
* Values for sbind_rewrite_state follow. This field is protected by
* ibcm_svc_info_lock. ibt_unbind_service has to wait until a service
* binding is either idle or needed, sleeping on ibcm_svc_info_cv if
* busy (rewrite in progress).
*/
#define IBCM_REWRITE_IDLE 0
typedef struct ibcm_port_up_s {
/* arg is a pointer to ibcm_port_up_t */
extern void ibcm_service_record_rewrite_task(void *);
if (--((svcinfop)->svc_ref_cnt) == 0 && \
(svcinfop)->svc_to_delete) \
/* for avl tree search */
typedef struct ibcm_svc_lookup_s {
int num_sids;
typedef struct ibcm_ar_ref_s {
typedef struct ibcm_ar_s {
} ibcm_ar_t;
/* ar_flags */
#define IBCM_AR_SUCCESS 0
/*
* These flags are used for adding (if an entry does not exist) or
* for just looking one up
*/
typedef enum ibcm_lookup_flag_e {
/* lookup failed */
typedef enum ibcm_finit_state_e {
/*
* Identifies HCA's state. Used in the definition of ibcm_hca_info_t
* If HCA is in ACTIVE state only does CM allow any MAD processing.
*/
typedef enum ibcm_hca_state_e {
/* QP information per pkey, stored in port information */
typedef struct ibcm_qp_list_s {
/*
* port information per HCA
* port_ibmf_hdl - contains IBMF handle for that port if valid
* otherwise is NULL
* port_ibmf_saa_hdl - contains SA Access handle for that port if valid
* otherwise is NULL
*/
typedef struct ibcm_port_info_s {
port_hcap}))
/* Value to indicate to exit the timeout list processing thread */
/*
* IBCM code relies on AVL routines already in kernel for faster lookups.
* AVL was chosen over mod hashing mechanism based on the its internal
* limitations in the kernel (no support for over 100,000 keys).
*
* IBCM uses two AVL trees on the passive side and one on active side per HCA.
* The two trees are need on the passive side because the tree lookup criteria
* changes based on the type of message being processed. On passive side it is
* based on remote_qpn and remote_hca_guid for only incoming REQ message and for
* for all other messages the search criteria is based upon remote_comid.
* On active side the lookup criteria remains static based upon local_comid.
*
* AVL tree insertions are done by grabbing the writer lock (hca_state_rwlock)
* and lookups are done by grabbing the reader lock.
*/
/*
* CM's per HCA data structure.
*
* respectively.
*
* Comids are used for all connections. Req ids are used for SIDR REQ and
* SIDR REP messages. These are simple counters that wrap around INT_MAX.
* NOTE: The starting value for comid, per HCA, is 2.
*
* hca_state: HCA's current state (ibcm_hca_state_t) - whether
* IBT_HCA_ACTIVE, IBT_HCA_NOT_ACTIVE,
* hca_guid: Active HCA guid
* hca_caps: HCA capability mask
* hca_ack_delay: HCA ack delay
* hca_max_rdma_rd Max RDMA in Reads
* hca_max_rdma_dpt Max RDMA out Reads
* CM based on communication id ONLY.
* Passive Side CM based on remote_qpn and remote_hca_guid.
* hca_passive_comid_tree:
* Passive Side CM based on remote_comid and
* remote_hca_guid.
* for hca_active_tree
* for hca_passive_tree
* for hca_next_comid
* hca_sidr_list: List for UD side
* hca_sidr_list_lock: List lock for UD side
* for hca_sidr_list
* for hca_next_reqid
* hca_next_reqid: Next active ReqId
* hca_next_comid: Next active ComID
* hca_next: Pointer to the next HCA
* hca_svc_cnt: A count of services registered on this hca
* hca_acc_cnt: A count of active references to this ibcm_hca_info_t
* hca_res_cnt: A count of client's active resources on this hca
* hca_num_ports: Number of ports that this HCA has
*
* Note : The global mutex ibcm_global_hca_mutex declared in CM is used for
* accesses to the following fields :
* hca_acc_cnt, hca_res_cnt, hca_svc_cnt, hca_state
*/
typedef struct ibcm_hca_info_s {
/* services allocated */
/* Are we on Tavor HCA */
/*
* called to ensure that HCA is in "attached" state and is willing to
* process connections etc.
*/
/*
* Passive AVL tree lookup info (for hca_passive_tree)
* CM needs this structure as passive tree lookups are based on
* QPN and HCA GUID.
*/
typedef struct ibcm_passive_node_info_s {
/*
* Passive Com ID AVL tree lookup info (for hca_passive_comid_tree)
* CM needs this structure as passive comid tree lookups are based on
* Remote Com ID and Remote HCA GUID.
*/
typedef struct ibcm_passive_comid_node_info_s {
/* CM proceed task args structure definition */
typedef struct ibcm_proceed_targs_s {
union tst_t {
struct rc_s {
} rc;
struct ud_s {
} ud;
} tst;
/* keep priv_data as the last field */
/*
* function prototypes for AVL tree compares
*/
int ibcm_active_node_compare(const void *, const void *);
int ibcm_passive_node_compare(const void *, const void *);
int ibcm_passive_comid_node_compare(const void *, const void *);
/*
*/
/* function prototypes to Manage CM's IBMF QP's */
/*
* function prototypes to allocate and free outgoing CM messages
*/
/*
* Definition for CM state transition processing function
*/
/*
* CM REQ Message structure
*
* Request for communication.
*
* Things of interest are:-
* ib_qpn_t cannot be used - it is typecast to uint32_t but is 24 bits
* ib_eecn_t cannot be used - it is typecast to uint32_t but is 24 bits
*
* (See Table 85 REQ Message Contents - chapter 12 in IB Spec v1.0a)
*
*/
typedef struct ibcm_req_msg_s {
/* 32 bits */
/* local side QPN - 24 bits */
/* Offered responder */
/* resources - 8 bits */
/* Local side EECN - 24 bits */
/* Offered initiator */
/* depth - 8 bits */
/* Remote side EECN - 24 bits */
/* Remote CM timeout - 5 bits */
/* Transport srvtype - 2 bits */
/* End-to-End flow - 1 bit */
/* Starting PSN - 24 bits */
/* Local CM timeout - 5 bits */
/* Retry count - 3 bits */
/* Path Pkt MTU - 4 bits */
/* Does RDC exist? - 1 bits */
/* RNR retry count - 3 bits */
/* Max CM retries - 4 bits */
/* SRQ Exists - 1 bit */
/* Reserved2 - 3 bits */
/* Prim. flow label - 20 bits */
/* Reserved3 - 6 bits */
/* Primary rate - 6 bits */
/* Primary Traffic class */
/* Primary SL - 4 bits */
/* Prim. subnet local - 1 bit */
/* Reserved4 - 3 bits */
/* Primary local */
/* timeout - 5 bits */
/* Reserved5 - 3 bits */
/* Note: req_alt_l_port_gid/req_alt_r_port_gid are not 8-byte aligned */
/* Alt flow label - 20 bits */
/* Reserved6 - 6 bits */
/* Alternate rate - 6 bits */
/* Alternate SL - 4 bits */
/* Alt subnet local - 1 bit */
/* Reserved7 - 3 bits */
/* Alt Local ACK */
/* timeout - 5 bits */
/* Reserved8 - 3 bits */
/* Private data */
/*
* The following set of defines are short-cuts to CEP_PATH or GRH info
*/
/*
* The following set of defines are short-cuts to ibt_cm_event_t
*/
/*
* The following set of defines are short-cuts to qp_attrs or qp_info
*/
/* The following set of defines are short-cuts to RC and SIDR MAD HDRs */
/*
* CM MRA Message structure
*
* Message Receipt Acknowledgement (MRA).
*
* NOTE: IB hosts and targets are required to be able to receive and
* act upon an MRA, but the ability to send an MRA is optional.
*/
typedef struct ibcm_mra_msg_s {
/* Reserved1 - 6 bits */
/* Service timeout - 5 bits */
/* Reserved2 - 3 bits */
/* Private data */
/*
* CM REJ Message structure
* REJ indicates that the sender will not continue through the communication
* establishment sequence and the reason why it will not.
*
* of rejection reasons supported.
*/
typedef struct ibcm_rej_msg_s {
/* Msg being REJed - 2 bits */
/* Reserved1 - 6 bits */
/* Rej. Info Length - 7 bits */
/* Reserved2 - 1 bit */
/* Additional Reject Info */
/* Private data */
/*
* CM REP Message structure
*
* REP is returned in response to REQ, indicating that the respondent
* accepts the Service-ID, proposed primary port, and any parameters
* specified in the PrivateData of the REQ.
*/
typedef struct ibcm_rep_msg_s {
/* Local side QPN - 24 bits */
/* Reserved1 - 8 bits */
/* Local side EECN - 24 bits */
/* Reserved2 - 8 bits */
/* Starting PSN - 24 bits */
/* Reserved3 - 8 bits */
/* Target ACK delay - 5 bits */
/* Failover accepted - 2 bits */
/* End-to-End flow control - */
/* 1 bit */
/* RNR retry count - 3 bits */
/* SRQ Exists - 1 bit */
/* Reserved4 - 4 bits */
/* Private data */
/*
* CM RTU Message structure
*
* RTU indicates that the connection is established, and that the
* recipient may begin transmitting.
*/
typedef struct ibcm_rtu_msg_s {
/* Private data */
/*
* CM DREQ Message structure
*
* DREQ is sent to initiate the connection release sequence.
*/
typedef struct ibcm_dreq_msg_s {
/* reserved - 8 bits */
/* Private data */
/*
* CM DREP Message structure
*
* DREP is sent in response to DREQ, and signifies that the sender has
* received DREQ.
*/
typedef struct ibcm_drep_msg_s {
/* Private Data */
/*
* CM LAP Message structure
*
* NOTE: LAP and APR messages are optional. These are needed if CM
* accepts REQ messages and agrees to perform Automatic Path Migration.
*
* This message is used to change the alternate path information for a
* specific connection.
*/
typedef struct ibcm_lap_msg_s {
/* Remote CM response */
/* timeout - 5 bits */
/* Reserved1 - 3 bits */
/* Alt flow label - 20 bits */
/* Reserved3 - 4 bits */
/* Alt traffic class - 8 bits */
/* Alt. static rate - 6 bits */
/* Alternate SL - 4 bits */
/* Alt subnet local - 1 bit */
/* Reserved5 - 3 bits */
/* Alt Local ACK */
/* timeout - 5 bits */
/* Reserved6 - 3 bits */
/* Private data */
/*
* CM APR Message structure
*
* APR is sent in response to a LAP request. MRA may be sent to allow
* processing of the LAP.
*/
typedef struct ibcm_apr_msg_s {
/* Additional Information */
/* Private data */
/*
* CM SIDR_REQ Message structure
*
* NOTE: SIDR_REQ and SIDR_REP messages are conditionally required.
* These are needed if non-management services are provided on the Channel
* Adapter other than fixed QPNs. Management services include those
* provided thru Subnet Manager Packets or thru General Management Packets.
*
* SIDR_REQ requests that the recipient return the information necessary
* to communicate via UD messages with the entity specified by
* SIDR_REQ:ServiceID
*/
typedef struct ibcm_sidr_req_msg_s {
/* Private Data */
/*
* CM SIDR_REP Message structure
*
* SIDR_REP returns the information necessary to communicate via UD
* messages with the entity specified by SIDR_REQ:ServiceID
*/
typedef struct ibcm_sidr_rep_msg_s {
/* since the 64-bit SID is not aligned, treat it as a byte array */
/* Class Port Info */
/* aka., add'l info */
/* Private data */
typedef struct ibcm_classportinfo_msg_s {
/* resptime value : 5 bits */
/* SL: 4 bits */
/* Flow label: 20 bits */
/* QPN: 24 bits */
/* All msgs are readonly on receiving side */
/* Prototype definitions for CM implementation functions */
/*
* The callback from IBMF to CM. This routines calls one of the CM
*
* ibmf_handle : IBMF handle on which CM MAD was received
* pktp : MAD packet
* args : IBMF receive mad callback arg
*/
/*
* Prototypes for CM state transition handling functions
*/
/*
* The following are the CM state processing functions called on an
* (Also handled are SIDR_REP and SIDR_REQ)
* The brief description of these functions
* Search based on CM message fields in CM's HCA entry.
* Handle duplicate messages and state transitions
* Set and Cancel timeouts
* Handle stale connections
* Change CM connection state
* Call CM CEP state transition functions to update CEP state
* and set CEP attributes
*
* INPUTS:
* hcap: - IBMF callback argument
* cm_input_mad: - ibmf message pointer of incoming MAD
* cm_mad_addr - CM MAD address
*
* The state transition processing is specified in different functions based
* on incoming message type rather than as one function because, the CM
* processing is different for each of them.
*
* A global call table is initialized with these function addresses
* (is defined in ibcm_impl.c), and invoked from ibcm_recv_cb
*/
typedef enum ibcm_proceed_error_e {
/* Encapsulates the information that client returns back from CM callback */
typedef struct ibcm_clnt_reply_info_s {
void *priv_data;
/* Encapsulates the information that UD client returns back from CM callback */
typedef struct ibcm_ud_clnt_reply_info_s {
void *priv_data;
/*
* Prototypes for CM CEP state transition handling functions. These are
* called from CM connection state transition handling functions.
*
* The brief description of these functions :
* Validate CEP related attributes in the messages
* Change CEP state
* Set CEP attributes (modify CEP)
* Fill up the response MADs
*
* The arguments are :
* statep: Connection state structure
* cm_output_mad : The response CM MAD with some of the fields filled in
* The cm output mad is allocated by CM state transition
* functions and has generic MAD header
* Certain fields like com id, etc., are filled by CM
* connection state transition functions that are above
*/
/* QP state transition function called for an incoming REQ on passive side */
/* Processes QP state machine based on return values from cm handler */
/* Processes CM state machine based on return values from ibcm_cep_state_req */
/* QP state transition function called for an incoming REP on active side */
/* Processes QP state machine based on return values from cm handler */
/* Processes CM state machine based on return values from ibcm_cep_state_rep */
/* QP state transition function called for an incoming RTU on passive side */
/* QP state transition func for an incoming REJ on active side in est state */
/*
* QP state transition function called for an outgoing RTU on active side,
*/
/* QP state transition function called for an incoming LAP */
/* Processes QP state machine based on return value from cm handler for LAP */
/* Processes CM state machine based on return value from cm handler */
/* Processes CM UD state machine based on return values from cm handler */
void ibcm_proceed_via_taskq(void *targs);
void ibcm_ud_proceed_via_taskq(void *targs);
/*
* Builds the reply MAD address based on "incoming mad addr" that is
* supplied to it as an arg.
* Swaps the source and destination lids in ibmf_addr_info_t
* Swaps the source and destination gids in ib_grh_t
*
* INPUTS:
* incoming_cm_mad_addr - Address information in the incoming MAD
* reply_cm_mad_addr - Derived address for the reply MAD
* The reply MAD address is derived based
* address information of incoming CM MAD
*/
/* Posts RC CM MAD using IBMF */
/* Posts UD CM MAD using IBMF */
/* Posts CM MAD using IBMF */
/* Post REJ MAD */
/* Post REP MAD */
/* Post RTU MAD */
/* Post DREQ MAD */
void ibcm_post_dreq_mad(void *statep);
/* Post LAP MAD */
/*
* Posts CM SIDR MAD using IBMF in blocking mode
*
* INPUTS:
* ud_statep: UD statep which is posting the mad
* cm_mad_addr: Address information for the MAD to be posted
* status: SIDR status
*/
/* prototypes to resend RC mad and UD MAD */
/* Helper function used in connection abort processing */
/*
* Prototypes for CM functions that lookup for a connection state structure
*/
/*
* ibcm_lookup_msg:
*
* Retrieves an existing state structure or creates a new one if none found.
* This function is used during passive side of connection establishment for
* This function is used during active side of connection establishment for
* This function is used during active side of connection establishment for
* an outgoing REQ.
*
* NOTE: IBCM_LOOKP_FAIL is only returned if a new entry wasn't created and
* a match wasn't found.
*
* Arguments are:-
* ibcm_event_type_t - what type of message
* incoming REQ, REP, REJ, MRA, RTU, DREQ, DREP
* local_comid - ONLY *NOT* valid for incoming REQ.
* needed for others
* remote_qpn - Remote CM's QP number
* remote_hca_guid - ONLY VALID FOR incoming REQ.
* Ignored for others
* hcap - HCA entry table pointer
* statep - "return"ed state pointer
*
* Return Values:
* IBCM_LOOKUP_NEW - new statep allocated
* IBCM_LOOKUP_EXISTS - found an existing entry
* IBCM_LOOKUP_FAIL - failed to find an entry
* IBCM_MEMORY_FAILURE - failed to get memory
* iff flags != IBT_CHAN_BLOCKING
*/
/*
* Routines for CM SIDR state structure list manipulation
* Wherever possible, the list routines of ibtl are used
* for list manipulation
*/
/*
* Finds an entry based on lid, gid and grh exists fields
* lid: LID of incoming SIDR REQ
* gid: GID of incoming SIDR REQ
* grh_exists: TRUE if GRH exists in the incoming SIDR REQ
* hcap: CM State HCA entry ptr to search for SIDR state structure
* statep: Returns a valid state structure, if one exists based
* on lid, gid and grh_exists fields
* flag: whether to just look OR to look and add if it doesn't exist.
*/
/*
* Deletes a given state structure, from both hca state and passive trees
* If ref cnt is zero, deallocates all buffers and memory of state data
*/
/*
* Deallocates all the buffers and memory of state data.
* This function must be called, only when ref_cnt is zero.
*/
/*
* Deletes a given UD state structure, from SIDR list.
* The routine acquires and releases the SIDR list lock.
*/
/*
* Service ID entry create and lookup functions
*/
/*
* This global table is defined in ibcm_impl.c.
*
* svc_info_list_lock must be held for RW_READER by caller of
* ibcm_find_svc_entry().
*
* Arguments are:-
* sid - service id
* num_sids - Number (Range) of service-ids
*
* Return values:
* Pointer to ibcm_svc_info_t on success, otherwise NULL.
*/
/*
* The following are the function prototypes for various id initialization,
* allocation, free and destroy operations. The cm id allocations are based
* on vmem operations
* The service id's are maintained globally per host
* The com id and req id's are maintained per hca
* To maintain compatibility with intel, service ids are allocated on a 32 bit
* range, though spec has 64 bit range for service id's
*/
void ibcm_fini_ids();
ibcm_status_t ibcm_ar_init(void);
ibcm_status_t ibcm_ar_fini(void);
/* IP Addressing API debugging */
extern int ibcm_printip; /* set to 1 to enable IBTF DPRINTFs */
if (ibcm_printip) { \
}
/*
* These functions are called to do timeout processing from CM connection
* state transitions. (Also for SIDR REQ and SIDR REP processing)
*
* Brief description :
* If retry count is below max retry value, then post the stored response
* MAD using IBMF in blocking mode, adjusts remaining retry counters.
* If retry counter reaches max value, then retry failure handling is
* done here
*
* CM will ensure that the state data structure of the associated
* timeout is valid when this timeout function is called.
* (See timer_stored_state in ibcm_state_data_t and
* ud_timer_stored_state in ibcm_ud_state_data_t)
*/
void ibcm_timeout_cb(void *arg);
void ibcm_sidr_timeout_cb(void *arg);
/*
* function prototypes for IBMF send completion callbacks on non-blocking
* MAD posts
*/
void *args);
void *args); /* MRA Rcvd on active side */
void *args);
void *args); /* MRA Rcvd on passive side */
void *args);
void *args);
void *args);
void *args);
void *args);
void *args); /* MRA Rcvd for LAP on active side */
void *args); /* for MRA sender */
void *args);
/*
* ibcm_find_hca_entry:
* Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
*
* NOTE: This entry is not removed from the "ibcm_hca_listp".
* And this function is called with ibcm_hca_list_mutex mutex held.
*
* INPUTS:
* hca_guid - HCA's guid
*
* RETURN VALUE:
* hcap - if a match is found, else NULL
*/
/* Routines that manage the hca's temporary access count */
/* Routines that manage the hca's resource count */
/* Routines that manage the hca's service count */
/* Routine to fetch the saa_handle */
/* Allow some flow control of RC connection initiations */
void ibcm_flow_inc(void);
/* Allow some flow control of SA requests */
void ibcm_sa_access_enter(void);
void ibcm_sa_access_exit(void);
/*
* ibcm_cep_to_error_state:
* Helper function to transition a CEP to ERROR state
*
* NOTE: This function checks if ch_qp is valid or ch_eec and calls
* into IBTL to transition the CEP.
*
* INPUTS:
* statep - Connection state pointer
*
* RETURN VALUE:
* IBT_SUCCESS - if CEP transition succeeded; else error
*/
/*
* Processes the pending stateps in a linked list. The operations are to
* invoke a cm handler or delete statep
* When the above operations are required on statep from a timeout handler,
* they are linked for later processing by an independent thread
*/
void ibcm_process_tlist();
/* Links RC stateps to an RC timeout processing list */
/*
* operation
*/
void ibcm_process_rc_recycle(void *recycle_arg);
/*
* Helper function to handle endianess in case of Service Data.
* Used by ibt_bind_service() and ibt_get_paths().
*/
/* Misc ibcm global variables */
extern char cmlog[];
extern ibt_clnt_hdl_t ibcm_ibt_handle;
extern taskq_t *ibcm_taskq;
extern ibcm_state_handler_t ibcm_sm_funcs_tbl[];
extern uint8_t ibcm_timeout_list_flags;
extern ibcm_classportinfo_msg_t ibcm_clpinfo;
/* Global lists */
/* Default global retry counts */
extern uint8_t ibcm_max_retries;
extern uint32_t ibcm_max_sa_retries;
extern int ibcm_sa_timeout_delay; /* in ticks */
/* Various default global timers */
extern clock_t ibcm_remote_response_time;
extern ib_time_t ibcm_max_sidr_rep_proctime;
extern uint32_t ibcm_adj_btime;
extern uint32_t ibcm_sw_delay;
extern ib_time_t ibcm_max_ib_pkt_lt;
extern ib_time_t ibcm_max_ib_mad_pkt_lt;
/* Global locks */
extern kmutex_t ibcm_svc_info_lock;
extern kmutex_t ibcm_mcglist_lock;
extern kmutex_t ibcm_global_hca_lock;
extern kmutex_t ibcm_qp_list_lock;
extern kmutex_t ibcm_timeout_list_lock;
extern kmutex_t ibcm_recv_mutex;
/* Global cond variables */
extern kcondvar_t ibcm_global_hca_cv;
extern kcondvar_t ibcm_svc_info_cv;
extern kcondvar_t ibcm_timeout_list_cv;
/*
* miscellaneous defines for retries, times etc.
*/
/* in usecs */
/* in usecs */
/* versions for CM MADs */
/* for Class_Port_Info stuff - see section 16.7.3.1 in Vol1 IB Spec */
/* RDMA CM IP Service's Private Data Format. */
#ifdef _BIG_ENDIAN
typedef struct ibcm_ip_pvtdata_s {
#else
typedef struct ibcm_ip_pvtdata_s {
ip_MajV:4;
#endif
/*
* for debug purposes
*/
#ifdef DEBUG
extern int ibcm_test_mode;
void ibcm_dump_raw_message(uchar_t *);
void ibcm_dump_srvrec(sa_service_record_t *);
void ibcm_dump_pathrec(sa_path_record_t *);
void ibcm_dump_noderec(sa_node_record_t *);
#else
#define IBCM_DUMP_RAW_MSG(x)
#define IBCM_DUMP_SERVICE_REC(x)
#define IBCM_DUMP_PATH_REC(x)
#define IBCM_DUMP_NODE_REC(x)
#endif
void ibcm_path_cache_init(void);
void ibcm_path_cache_fini(void);
void ibcm_path_cache_purge(void);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_IB_MGT_IBCM_IBCM_IMPL_H */