rpcib.c revision 737d277a27d4872543f597e35c470e7510f61f03
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* The rpcib plugin. Implements the interface for RDMATF's
* interaction with IBTF.
*/
#include <sys/sysmacros.h>
#include <sys/pathname.h>
#include <sys/isa_defs.h>
#include <sys/pathname.h>
extern char *inet_ntop(int, const void *, char *, int);
/*
* Prototype declarations for driver ops
*/
void *, void **);
/* rpcib cb_ops */
static struct cb_ops rpcib_cbops = {
nulldev, /* open */
nulldev, /* close */
nodev, /* strategy */
nodev, /* print */
nodev, /* dump */
nodev, /* read */
nodev, /* write */
nodev, /* ioctl */
nodev, /* devmap */
nodev, /* mmap */
nodev, /* segmap */
nochpoll, /* poll */
ddi_prop_op, /* prop_op */
NULL, /* stream */
D_MP, /* cb_flag */
CB_REV, /* rev */
nodev, /* int (*cb_aread)() */
nodev /* int (*cb_awrite)() */
};
/*
* Device options
*/
DEVO_REV, /* devo_rev, */
0, /* refcnt */
rpcib_getinfo, /* info */
nulldev, /* identify */
nulldev, /* probe */
rpcib_attach, /* attach */
rpcib_detach, /* detach */
nodev, /* reset */
&rpcib_cbops, /* driver ops - devctl interfaces */
NULL, /* bus operations */
NULL /* power */
};
/*
* Module linkage information.
*/
static struct modldrv rib_modldrv = {
&mod_driverops, /* Driver module */
"RPCIB plugin driver, ver %I%", /* Driver name and version */
&rpcib_ops, /* Driver ops */
};
static struct modlinkage rib_modlinkage = {
(void *)&rib_modldrv,
};
/*
* rib_stat: private data pointer used when registering
* with the IBTF. It is returned to the consumer
* in all callbacks.
*/
#define RNR_RETRIES 2
#define MAX_PORTS 2
int preposted_rbufs = 16;
int send_threshold = 1;
/*
* State of the plugin.
* ACCEPT = accepting new connections and requests.
* NO_ACCEPT = not accepting new connection and requests.
* This should eventually move to rpcib_state_t structure, since this
* will tell in which state the plugin is for a particular type of service
* like NFS, NLM or v4 Callback deamon. The plugin might be in accept
* state for one and in no_accept state for the other.
*/
int plugin_state;
/*
* RPCIB RDMATF operations
*/
struct mrc *buf_handle);
struct mrc buf_handle);
static void rib_deregister_ats();
static void rib_stop_services(rib_hca_t *);
/*
* RPCIB addressing operations
*/
char ** get_ip_addrs(int *count);
int get_ibd_ipaddr(rpcib_ibd_insts_t *);
void rib_get_ibd_insts(rpcib_ibd_insts_t *);
/*
* RDMA operations the RPCIB module exports
*/
};
/*
* RDMATF RPCIB plugin details
*/
static rdma_mod_t rib_mod = {
"ibtf", /* api name */
0,
&rib_ops, /* rdma op vector for ibtf */
};
static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
ibt_mr_hdl_t *, ibt_mr_desc_t *);
rib_qp_t **);
rib_qp_t **);
static int rib_free_sendwait(struct send_wid *);
static void rdma_done_rem_list(rib_qp_t *);
static void rib_async_handler(void *,
static int rib_free_svc_recv(struct svc_recv *);
static void rib_free_wid(struct recv_wid *);
static void rib_detach_hca(rib_hca_t *);
ibt_path_info_t *);
/*
* Registration with IBTF as a consumer
*/
static struct ibt_clnt_modinfo_s rib_modinfo = {
rib_async_handler, /* async event handler */
NULL, /* Memory Region Handler */
};
/*
* Global strucuture
*/
typedef struct rpcib_s {
} rpcib_t;
/*
* debugging in rpcib kernel module.
* Set it to values greater that 1 to control
* the amount of debugging messages required.
*/
int rib_debug = 0;
static int ats_running = 0;
int
_init(void)
{
int error;
if (error != 0) {
/*
* Could not load module
*/
return (error);
}
return (0);
}
int
_fini()
{
int status;
return (EBUSY);
}
/*
* Remove module
*/
(void) rdma_register_mod(&rib_mod);
return (status);
}
return (0);
}
int
{
}
/*
* rpcib_getinfo()
* Given the device number, return the devinfo pointer or the
* instance number.
* Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
*/
/*ARGSUSED*/
static int
{
int ret = DDI_SUCCESS;
switch (cmd) {
case DDI_INFO_DEVT2DEVINFO:
else {
ret = DDI_FAILURE;
}
break;
case DDI_INFO_DEVT2INSTANCE:
break;
default:
ret = DDI_FAILURE;
}
return (ret);
}
static int
{
switch (cmd) {
case DDI_ATTACH:
break;
case DDI_RESUME:
return (DDI_SUCCESS);
default:
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
/*
* Create the "rpcib" minor-node.
*/
if (ddi_create_minor_node(dip,
/* Error message, no cmn_err as they print on console */
return (DDI_FAILURE);
}
}
return (DDI_FAILURE);
}
if (ibt_status != IBT_SUCCESS) {
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
/*
* Register with rdmatf
*/
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
/*ARGSUSED*/
static int
{
switch (cmd) {
case DDI_DETACH:
break;
case DDI_SUSPEND:
default:
return (DDI_FAILURE);
}
/*
* Detach the hca and free resources
*/
return (DDI_SUCCESS);
}
static void
{
/*
* deregister the Address Translation Service.
*/
if (ibt_status != IBT_SUCCESS) {
#ifdef DEBUG
if (rib_debug) {
"ibt_deregister_ar FAILED"
" status: %d", ibt_status);
}
#endif
} else {
ats_running = 0;
#ifdef DEBUG
if (rib_debug) {
"Successfully unregistered"
" ATS service: %s",
}
#endif
}
}
}
static void rib_rbufpool_free(rib_hca_t *, int);
static void rib_rbufpool_deregister(rib_hca_t *, int);
/*
* One CQ pair per HCA
*/
static rdma_stat
{
&real_size);
if (status != IBT_SUCCESS) {
" status=%d", status);
error = RDMA_FAILED;
goto fail;
}
/*
* Enable CQ callbacks. CQ Callbacks are single shot
* (e.g. you have to call ibt_enable_cq_notify()
* after each callback to get another one).
*/
if (status != IBT_SUCCESS) {
"enable_cq_notify failed, status %d", status);
error = RDMA_FAILED;
goto fail;
}
return (error);
fail:
if (cq->rib_cq_hdl)
if (cq)
return (error);
}
static rdma_stat
{
int i;
/*
* Open a hca and setup for RDMA
*/
if (ibt_status != IBT_SUCCESS) {
"returned %d", i, ibt_status);
continue;
}
/*
* query HCA info
*/
if (ibt_status != IBT_SUCCESS) {
"returned %d (hca_guid 0x%llx)",
goto fail1;
}
/*
* One PD (Protection Domain) per HCA.
* A qp is allowed to access a memory region
* only when it's in the same PD as that of
* the memory region.
*/
if (ibt_status != IBT_SUCCESS) {
"returned %d (hca_guid 0x%llx)",
goto fail1;
}
/*
* query HCA ports
*/
if (ibt_status != IBT_SUCCESS) {
"ibt_query_hca_ports returned %d "
"(hca_guid 0x%llx)",
goto fail2;
}
/*
* Create 2 pairs of cq's (1 pair for client
* and the other pair for server) on this hca.
* If number of qp's gets too large, then several
* cq's will be needed.
*/
if (status != RDMA_SUCCESS) {
goto fail3;
}
if (status != RDMA_SUCCESS) {
goto fail3;
}
if (status != RDMA_SUCCESS) {
goto fail3;
}
if (status != RDMA_SUCCESS) {
goto fail3;
}
/*
* Create buffer pools.
* Note rib_rbuf_create also allocates memory windows.
*/
goto fail3;
}
goto fail3;
}
/*
* Initialize the registered service list and
* the lock
*/
/*
* XXX One hca only. Add multi-hca functionality if needed
* later.
*/
ribstat->nhca_inited++;
break;
}
return (RDMA_SUCCESS);
else
return (RDMA_FAILED);
}
/*
* Callback routines
*/
/*
* SCQ handlers
*/
/* ARGSUSED */
static void
{
int i;
/*
* Re-enable cq notify here to avoid missing any
* completion queue notification.
*/
while (ibt_status != IBT_CQ_EMPTY) {
if (ibt_status != IBT_SUCCESS)
return;
/*
* Got a send completion
*/
case IBT_WC_SUCCESS:
break;
case IBT_WC_WR_FLUSHED_ERR:
break;
default:
/*
* RC Send Q Error Code Local state Remote State
* ==================== =========== ============
* IBT_WC_BAD_RESPONSE_ERR ERROR None
* IBT_WC_LOCAL_LEN_ERR ERROR None
* IBT_WC_LOCAL_CHAN_OP_ERR ERROR None
* IBT_WC_LOCAL_PROTECT_ERR ERROR None
* IBT_WC_MEM_WIN_BIND_ERR ERROR None
* IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR
* IBT_WC_REMOTE_ACCESS_ERR ERROR ERROR
* IBT_WC_REMOTE_OP_ERR ERROR ERROR
* IBT_WC_RNR_NAK_TIMEOUT_ERR ERROR None
* IBT_WC_TRANS_TIMEOUT_ERR ERROR None
* IBT_WC_WR_FLUSHED_ERR None None
*/
#ifdef DEBUG
if (rib_debug > 1) {
"WR completed in error, wc.wc_status:%d, "
}
}
#endif
/*
* Channel in error state. Set connection to
* ERROR and cleanup will happen either from
* conn_release or from rib_conn_get
*/
break;
}
/*
* Notify poster
*/
} else {
/*
* Poster not waiting for notification.
* Free the send buffers and send_wid
*/
}
(void) rib_free_sendwait(wd);
}
}
}
}
/* ARGSUSED */
static void
{
int i;
/*
* Re-enable cq notify here to avoid missing any
* completion queue notification.
*/
while (ibt_status != IBT_CQ_EMPTY) {
if (ibt_status != IBT_SUCCESS)
return;
/*
* Got a send completion
*/
#ifdef DEBUG
"wc.wc_status:%d, wc_id:%llX",
}
#endif
/*
* Update completion status and notify poster
*/
else
} else {
/*
* Poster not waiting for notification.
* Free the send buffers and send_wid
*/
}
(void) rib_free_sendwait(wd);
}
}
}
}
/*
* RCQ handler
*/
/* ARGSUSED */
static void
{
/*
* Re-enable cq notify here to avoid missing any
* completion queue notification.
*/
while (ibt_status != IBT_CQ_EMPTY) {
if (ibt_status != IBT_SUCCESS)
return;
struct reply *r;
/*
* Treat xid as opaque (xid is the first entity
* in the rpc rdma message).
*/
/* Skip xid and set the xdr position accordingly. */
if (vers != RPCRDMA_VERS) {
/*
* Set connection to ERROR state and bail out.
*/
continue;
}
find_xid = 1;
switch (op) {
case RDMA_MSG:
case RDMA_NOMSG:
case RDMA_MSGP:
r->status = RDMA_SUCCESS;
break;
default:
break;
}
break;
}
}
if (find_xid == 0) {
/* RPC caller not waiting for reply */
#ifdef DEBUG
if (rib_debug) {
"NO matching xid %u!\n", xid);
}
#endif
}
/*
* Connection being flushed. Just free
* the posted buffer
*/
} else {
/*
* RC Recv Q Error Code Local state Remote State
* ==================== =========== ============
* IBT_WC_LOCAL_ACCESS_ERR ERROR ERROR when NAK recvd
* IBT_WC_LOCAL_LEN_ERR ERROR ERROR when NAK recvd
* IBT_WC_LOCAL_PROTECT_ERR ERROR ERROR when NAK recvd
* IBT_WC_LOCAL_CHAN_OP_ERR ERROR ERROR when NAK recvd
* IBT_WC_REMOTE_INVALID_REQ_ERR ERROR ERROR when NAK recvd
* IBT_WC_WR_FLUSHED_ERR None None
*/
/*
* Channel in error state. Set connection
* in ERROR state.
*/
}
}
}
/* Server side */
/* ARGSUSED */
static void
{
/*
* Re-enable cq notify here to avoid missing any
* completion queue notification.
*/
while (ibt_status != IBT_CQ_EMPTY) {
if (ibt_status != IBT_SUCCESS)
return;
qp->n_posted_rbufs--;
if (qp->n_posted_rbufs == 0)
/* s_recvp->vaddr stores data */
/*
* Treat xid as opaque (xid is the first entity
* in the rpc rdma message).
*/
/* Skip xid and set the xdr position accordingly. */
#ifdef DEBUG
"xdr_u_int failed for qp %p, wc_id=%llx",
#endif
(void) rib_free_svc_recv(s_recvp);
continue;
}
if (vers != RPCRDMA_VERS) {
/*
*/
(void) rib_free_svc_recv(s_recvp);
continue;
}
/*
* Is this for RDMA_DONE?
*/
/*
* Wake up the thread waiting on
* a RDMA_DONE for xid
*/
(void) rib_free_svc_recv(s_recvp);
continue;
}
if (plugin_state == ACCEPT) {
/*
* Plugin is in accept state, hence the master
* transport queue for this is still accepting
* requests. Hence we can call svc_queuereq to
* queue this recieved msg.
*/
} else {
/*
* The master transport for this is going
* away and the queue is not accepting anymore
* requests for krpc, so don't do anything, just
* free the msg.
*/
}
} else {
}
(void) rib_free_svc_recv(s_recvp);
}
}
/*
* Handles DR event of IBT_HCA_DETACH_EVENT.
*/
/* ARGSUSED */
static void
{
switch (code) {
case IBT_HCA_ATTACH_EVENT:
/* ignore */
break;
case IBT_HCA_DETACH_EVENT:
{
#ifdef DEBUG
#endif
break;
}
#ifdef DEBUG
case IBT_EVENT_PATH_MIGRATED:
break;
case IBT_EVENT_SQD:
break;
case IBT_EVENT_COM_EST:
break;
break;
"IBT_ERROR_INVALID_REQUEST_CHAN\n");
break;
"IBT_ERROR_ACCESS_VIOLATION_CHAN\n");
break;
break;
case IBT_ERROR_CQ:
break;
case IBT_ERROR_PORT_DOWN:
break;
case IBT_EVENT_PORT_UP:
break;
case IBT_ASYNC_OPAQUE1:
break;
case IBT_ASYNC_OPAQUE2:
break;
case IBT_ASYNC_OPAQUE3:
break;
case IBT_ASYNC_OPAQUE4:
break;
#endif
default:
break;
}
}
/*
* Client's reachable function.
*/
static rdma_stat
{
/*
* First check if a hca is still attached
*/
return (RDMA_FAILED);
}
if (status == RDMA_SUCCESS) {
/*
* Register the Address translation service
*/
if (ats_running == 0) {
== RDMA_SUCCESS) {
ats_running = 1;
return (RDMA_SUCCESS);
} else {
return (RDMA_FAILED);
}
} else {
return (RDMA_SUCCESS);
}
} else {
if (rib_debug > 2)
return (RDMA_FAILED);
}
}
/* Client side qp creation */
static rdma_stat
{
/*
* Initialize
*/
return (RDMA_SUCCESS);
}
/* Server side qp creation */
static rdma_stat
{
/*
* Create the qp handle
*/
&chan_sizes);
} else {
goto fail;
}
if (ibt_status != IBT_SUCCESS) {
"ibt_alloc_rc_channel failed, ibt_status=%d.",
goto fail;
}
kqp->q = q; /* server ONLY */
/*
* Set the private data area to qp to be used in callbacks
*/
return (RDMA_SUCCESS);
fail:
if (kqp)
return (RDMA_FAILED);
}
void
{
if (rib_debug > 1) {
}
}
/* ARGSUSED */
{
/* got a connection close event */
case IBT_CM_EVENT_CONN_CLOSED:
{
/* check reason why connection was closed */
case IBT_CM_CLOSED_DREP_RCVD:
case IBT_CM_CLOSED_DUP:
case IBT_CM_CLOSED_ABORT:
case IBT_CM_CLOSED_ALREADY:
/*
* These cases indicate the local end initiated
* the closing of the channel. Nothing to do here.
*/
break;
default:
/*
* Reason for CONN_CLOSED event must be one of
* IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
* or IBT_CM_CLOSED_STALE. These indicate cases were
* the remote end is closing the channel. In these
* cases free the channel and transition to error
* state
*/
break;
}
/*
* Free the rc_channel. Channel has already
* transitioned to ERROR state and WRs have been
* FLUSHED_ERR already.
*/
/*
* Free the conn if c_ref is down to 0 already
*/
/*
* Remove from list and free conn
*/
(void) rib_disconnect_channel(conn,
&hca->cl_conn_list);
} else {
}
#ifdef DEBUG
if (rib_debug)
"(CONN_CLOSED) channel disconnected");
#endif
break;
}
break;
}
default:
break;
}
return (IBT_CM_ACCEPT);
}
/* Check if server has done ATS registration */
{
struct sockaddr_in *sin4;
struct sockaddr_in6 *sin6;
/*
* Construct svc name
*/
switch (addr_type) {
case AF_INET:
break;
case AF_INET6:
break;
default:
return (RDMA_INVAL);
}
/*
* Attempt a path to the server on an ATS-registered port.
* Try all ATS-registered ports until one succeeds.
* The first one that succeeds will be used to connect
* to the server. If none of them succeed, return RDMA_FAILED.
*/
if (ibt_status == IBT_SUCCESS ||
ibt_status == IBT_INSUFF_DATA) {
for (i = 0; i < npaths; i++) {
if (paths[i].pi_hca_guid) {
/*
* do ibt_query_ar()
*/
sgid =
&ar_result);
if (ibt_status == IBT_SUCCESS) {
#ifdef DEBUG
if (rib_debug > 1)
rib_dump_pathrec(&paths[i]);
#endif
sizeof (ibt_path_info_t));
return (RDMA_SUCCESS);
}
#ifdef DEBUG
if (rib_debug) {
"ibt_query_ar FAILED, return\n");
}
#endif
}
}
}
}
}
return (RDMA_FAILED);
}
/*
* Connect to the server.
*/
{
/* Alloc a RC channel */
&chan_sizes);
} else {
return (RDMA_FAILED);
}
if (ibt_status != IBT_SUCCESS) {
#ifdef DEBUG
"failed, ibt_status=%d.", ibt_status);
#endif
return (RDMA_FAILED);
}
/* Connect to the Server */
if (ibt_status != IBT_SUCCESS) {
#ifdef DEBUG
if (rib_debug)
" failed for qp %p, status=%d, "
"ret_args.rc_status=%d\n",
#endif
/*
* Got IBT_CM_CONN_STALE probably because of stale
* data on the passive end of a channel that existed
* prior to reboot. Retry establishing a channel
* REFRESH_ATTEMPTS times, during which time the
* stale conditions on the server might clear up.
*/
goto refresh;
}
return (RDMA_FAILED);
}
/*
* Set the private data area to qp to be used in callbacks
*/
return (RDMA_SUCCESS);
}
{
struct sockaddr_in *sin4;
struct sockaddr_in6 *sin6;
/*
* Conctruct svc name
*/
switch (addr_type) {
case AF_INET:
break;
case AF_INET6:
break;
default:
#ifdef DEBUG
if (rib_debug) {
}
#endif
return (RDMA_INVAL);
}
if (ibt_status != IBT_SUCCESS) {
if (rib_debug > 1) {
" status=%d\n", ibt_status);
}
} else if (path.pi_hca_guid) {
return (RDMA_SUCCESS);
}
return (RDMA_FAILED);
}
/*
* Close channel, remove from connection list and
* free up resources allocated for that channel.
*/
{
/*
* c_ref == 0 and connection is in C_DISCONN_PEND
*/
/*
* If the channel has not been establised,
* ibt_flush_channel is called to flush outstanding WRs
* on the Qs. Otherwise, ibt_close_rc_channel() is
* called. The channel is then freed.
*/
else
while (qp->n_posted_rbufs)
}
(void) rib_rem_replylist(qp);
}
}
}
/*
* If HCA has been DETACHED and the srv/clnt_conn_list is NULL,
* then the hca is no longer being used.
*/
}
}
}
}
return (RDMA_SUCCESS);
}
/*
* Wait for send completion notification. Only on receiving a
* notification be it a successful or error completion, free the
* send_wid.
*/
static rdma_stat
{
int i;
/*
* Wait for send to complete
*/
;
switch (cv_wait_ret) {
case -1: /* timeout */
#ifdef DEBUG
if (rib_debug > 2)
"timed out qp %p\n", (void *)qp);
#endif
break;
default: /* got send completion */
break;
}
} else {
;
switch (cv_wait_ret) {
case -1: /* timeout */
#ifdef DEBUG
if (rib_debug > 2)
"timed out qp %p\n", (void *)qp);
#endif
break;
case 0: /* interrupted */
#ifdef DEBUG
if (rib_debug > 2)
" interrupted on qp %p\n",
(void *)qp);
#endif
break;
default: /* got send completion */
break;
}
}
}
/* got send completion */
error = RDMA_FAILED;
}
}
(void) rib_free_sendwait(wd);
} else {
}
return (error);
}
static struct send_wid *
{
return (wd);
}
static int
{
return (0);
}
static rdma_stat
{
return (RDMA_SUCCESS);
}
return (RDMA_FAILED);
}
/*
* Send buffers are freed here only in case of error in posting
* on QP. If the post succeeded, the send buffers are freed upon
* send completion in rib_sendwait() or in the scq_handler.
*/
{
int i, nds;
nds = 0;
total_msg_size = 0;
" too small!");
return (RDMA_FAILED);
}
nds++;
}
if (send_sig) {
/* Set SEND_SIGNAL flag. */
} else {
}
for (i = 0; i < nds; i++) {
}
}
ibt_status != IBT_SUCCESS) {
for (i = 0; i < nds; i++) {
}
(void) rib_free_sendwait(wdesc);
#ifdef DEBUG
"failed! wr_id %llx on qpn %p, status=%d!",
#endif
return (RDMA_FAILED);
}
if (send_sig) {
if (cv_sig) {
/*
* cv_wait for send to complete.
* We can fail due to a timeout or signal or
* unsuccessful send.
*/
#ifdef DEBUG
if (rib_debug > 2)
if (ret != 0) {
"FAILED, rdma stat=%d, wr_id %llx, qp %p!",
}
#endif
return (ret);
}
}
return (RDMA_SUCCESS);
}
{
/* send-wait & cv_signal */
return (ret);
}
/*
* Server interface (svc_rdma_ksend).
* Send RPC reply and wait for RDMA_DONE.
*/
{
struct rdma_done_list *rd;
/* No cv_signal (whether send-wait or no-send-wait) */
if (ret != RDMA_SUCCESS) {
#ifdef DEBUG
#endif
goto done;
}
/*
* Wait for RDMA_DONE from remote end
*/
timout);
if (cv_wait_ret < 0) {
#ifdef DEBUG
if (rib_debug > 1) {
" recv'd for qp %p, xid:%u\n",
}
#endif
ret = RDMA_TIMEDOUT;
goto done;
}
done:
return (ret);
}
static struct recv_wid *
{
return (rwid);
}
static void
{
}
{
int nds;
/*
* rdma_clnt_postrecv uses RECV_BUFFER.
*/
nds = 0;
ret = RDMA_FAILED;
goto done;
}
nds++;
}
if (nds != 1) {
ret = RDMA_FAILED;
goto done;
}
if (rwid) {
} else {
goto done;
}
if (!rep) {
goto done;
}
}
ibt_status != IBT_SUCCESS) {
#ifdef DEBUG
"ibt_post_recv(), msgid=%d, status=%d",
#endif
ret = RDMA_FAILED;
goto done;
}
return (RDMA_SUCCESS);
done:
}
return (ret);
}
{
int nds;
nds = 0;
return (RDMA_FAILED);
}
nds++;
}
if (nds != 1) {
return (RDMA_FAILED);
}
/* Use s_recvp's addr as wr id */
}
ibt_status != IBT_SUCCESS) {
#ifdef DEBUG
"ibt_post_recv(), status=%d",
(void *)qp, ibt_status);
#endif
(void) rib_free_svc_recv(s_recvp);
return (RDMA_FAILED);
}
return (RDMA_SUCCESS);
}
/* Client */
{
}
/* Server */
{
qp->n_posted_rbufs++;
return (RDMA_SUCCESS);
}
return (RDMA_FAILED);
}
/*
* Client side only interface to "recv" the rpc reply buf
* posted earlier by rib_post_resp(conn, cl, msgid).
*/
{
/*
* Find the reply structure for this msgid
*/
break;
}
/*
* If message not yet received, wait.
*/
timout = ddi_get_lbolt() +
switch (cv_wait_ret) {
case -1: /* timeout */
ret = RDMA_TIMEDOUT;
break;
case 0:
break;
default:
break;
}
}
/*
* Got message successfully
*/
} else {
/*
* Got error in reply message. Free
* recv buffer here.
*/
}
}
} else {
/*
* No matching reply structure found for given msgid on the
* reply wait list.
*/
ret = RDMA_INVAL;
#ifdef DEBUG
#endif
}
/*
* Done.
*/
return (ret);
}
/*
* RDMA write a buffer to the remote address.
*/
{
int nds;
int cv_sig;
return (RDMA_FAILED);
}
/*
* Remote address is at the head chunk item in list.
*/
nds = 0;
return (RDMA_FAILED);
}
nds++;
}
if (wait) {
cv_sig = 1;
} else {
cv_sig = 0;
}
}
ibt_status != IBT_SUCCESS) {
(void) rib_free_sendwait(wdesc);
return (RDMA_FAILED);
}
/*
* Wait for send to complete
*/
if (wait) {
if (ret != 0) {
return (ret);
}
}
return (RDMA_SUCCESS);
}
/*
* RDMA Read a buffer from the remote address.
*/
{
int nds;
int cv_sig;
return (RDMA_FAILED);
}
/*
* Remote address is at the head chunk item in list.
*/
nds = 0;
return (RDMA_FAILED);
}
nds++;
}
if (wait) {
cv_sig = 1;
} else {
cv_sig = 0;
}
}
ibt_status != IBT_SUCCESS) {
#ifdef DEBUG
" wr_id %llx on qp %p, status=%d",
#endif
(void) rib_free_sendwait(wdesc);
return (RDMA_FAILED);
}
/*
* Wait for send to complete
*/
if (wait) {
if (ret != 0) {
return (ret);
}
}
return (RDMA_SUCCESS);
}
int
{
for (i = 0; i < (ATS_AR_DATA_LEN - size); i++)
return (zero == 0);
}
/*
* rib_srv_cm_handler()
* Connection Manager callback to handle RC connection requests.
*/
/* ARGSUSED */
static ibt_cm_status_t
{
queue_t *q;
int i;
/* got a connection request */
case IBT_CM_EVENT_REQ_RCV:
/*
* If the plugin is in the NO_ACCEPT state, bail out.
*/
if (plugin_state == NO_ACCEPT) {
return (IBT_CM_REJECT);
}
/*
* Need to send a MRA MAD to CM so that it does not
* timeout on us.
*/
q = rib_stat->q;
if (status) {
#ifdef DEBUG
"create_channel failed %d", status);
#endif
return (IBT_CM_REJECT);
}
#ifdef DEBUG
if (rib_debug > 2) {
"server recv'ed IBT_CM_EVENT_REQ_RCV\n");
"\t\t Remote GID:(prefix:%llx,guid:%llx)\n",
}
if (rib_debug > 2) {
== IBT_SUCCESS) {
}
}
#endif
/*
* Pre-posts RECV buffers
*/
for (i = 0; i < preposted_rbufs; i++) {
"No RECV_BUFFER buf!\n");
return (IBT_CM_REJECT);
}
if (status != RDMA_SUCCESS) {
"posting RPC_REQ buf to qp %p!", (void *)qp);
return (IBT_CM_REJECT);
}
}
/*
* Get the address translation service record from ATS
*/
return (IBT_CM_REJECT);
}
for (i = 0; i < hca->hca_nports; i++) {
if (ibt_status != IBT_SUCCESS) {
if (rib_debug) {
"ibt_get_port_state FAILED!"
"status = %d\n", ibt_status);
}
} else {
/*
* do ibt_query_ar()
*/
&ar_result);
if (ibt_status != IBT_SUCCESS) {
if (rib_debug) {
"ibt_query_ar FAILED!"
"status = %d\n", ibt_status);
}
} else {
if (is_for_ipv4(&ar_result)) {
struct sockaddr_in *s;
int sin_size = sizeof (struct sockaddr_in);
KM_SLEEP);
s->sin_family = AF_INET;
/*
* For IPv4, the IP addr is stored in
* the last four bytes of ar_data.
*/
if (rib_debug > 1) {
char print_addr[INET_ADDRSTRLEN];
"remote clnt_addr: %s\n", print_addr);
}
} else {
struct sockaddr_in6 *s6;
int sin6_size = sizeof (struct sockaddr_in6);
KM_SLEEP);
/* sin6_addr is stored in ar_data */
sizeof (struct in6_addr));
if (rib_debug > 1) {
char print_addr[INET6_ADDRSTRLEN];
"remote clnt_addr: %s\n", print_addr);
}
}
return (IBT_CM_ACCEPT);
}
}
}
if (rib_debug > 1) {
"address record query failed!");
}
break;
case IBT_CM_EVENT_CONN_CLOSED:
{
case IBT_CM_CLOSED_DREP_RCVD:
case IBT_CM_CLOSED_DUP:
case IBT_CM_CLOSED_ABORT:
case IBT_CM_CLOSED_ALREADY:
/*
* These cases indicate the local end initiated
* the closing of the channel. Nothing to do here.
*/
break;
default:
/*
* Reason for CONN_CLOSED event must be one of
* IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
* or IBT_CM_CLOSED_STALE. These indicate cases were
* the remote end is closing the channel. In these
* cases free the channel and transition to error
* state
*/
break;
}
/*
* Free the rc_channel. Channel has already
* transitioned to ERROR state and WRs have been
* FLUSHED_ERR already.
*/
/*
* Free the conn if c_ref goes down to 0
*/
/*
* Remove from list and free conn
*/
(void) rib_disconnect_channel(conn,
&hca->srv_conn_list);
} else {
}
#ifdef DEBUG
if (rib_debug)
" (CONN_CLOSED) channel disconnected");
#endif
break;
}
break;
}
case IBT_CM_EVENT_CONN_EST:
/*
* RTU received, hence connection established.
*/
if (rib_debug > 1)
"(CONN_EST) channel established");
break;
default:
if (rib_debug > 2) {
/* Let CM handle the following events. */
"server recv'ed IBT_CM_EVENT_REP_RCV\n");
"server recv'ed IBT_CM_EVENT_LAP_RCV\n");
"server recv'ed IBT_CM_EVENT_MRA_RCV\n");
"server recv'ed IBT_CM_EVENT_APR_RCV\n");
"server recv'ed IBT_CM_EVENT_FAILURE\n");
}
}
return (IBT_CM_REJECT);
}
/* accept all other CM messages (i.e. let the CM handle them) */
return (IBT_CM_ACCEPT);
}
static rdma_stat
{
/*
* Query all ports for the given HCA
*/
} else {
return (RDMA_FAILED);
}
if (ibt_status != IBT_SUCCESS) {
#ifdef DEBUG
if (rib_debug) {
"ibt_query_hca_ports, status = %d\n", ibt_status);
}
#endif
return (RDMA_FAILED);
}
#ifdef DEBUG
if (rib_debug > 1) {
"%d\n", num_ports);
for (i = 0; i < num_ports; i++) {
"Port #: %d INACTIVE\n", i+1);
} else if (port_infop[i].p_linkstate ==
"Port #: %d ACTIVE\n", i+1);
}
}
}
#endif
ibds.rib_ibd_cnt = 0;
sizeof (rpcib_ats_t), KM_SLEEP);
if (ibds.rib_ibd_cnt == 0) {
sizeof (rpcib_ats_t));
return (RDMA_FAILED);
}
/*
* Get the IP addresses of active ports and
* register them with ATS. IPv4 addresses
* have precedence over IPv6 addresses.
*/
if (get_ibd_ipaddr(&ibds) != 0) {
#ifdef DEBUG
if (rib_debug > 1) {
"get_ibd_ipaddr failed");
}
#endif
sizeof (rpcib_ats_t));
return (RDMA_FAILED);
}
/*
* Start ATS registration for active ports on this HCA.
*/
nbinds = 0;
new_service = NULL;
for (i = 0; i < num_ports; i++) {
continue;
continue;
/*
* store the sin[6]_addr in ar_data
*/
/*
* The ipv4 addr goes into the last
* four bytes of ar_data.
*/
sizeof (struct in_addr);
sizeof (struct in_addr));
sizeof (struct in6_addr));
} else
continue;
if (ibt_status == IBT_SUCCESS) {
#ifdef DEBUG
if (rib_debug > 1) {
"ibt_register_ar OK on port %d", i+1);
}
#endif
/*
* Allocate and prepare a service entry
*/
KM_SLEEP);
/*
* Add to the service list for this HCA
*/
new_service = NULL;
nbinds ++;
} else {
#ifdef DEBUG
if (rib_debug > 1) {
"ibt_register_ar FAILED on port %d", i+1);
}
#endif
}
}
}
}
#ifdef DEBUG
if (rib_debug > 1) {
}
}
#endif
if (nbinds == 0) {
#ifdef DEBUG
if (rib_debug > 1) {
}
#endif
return (RDMA_FAILED);
}
return (RDMA_SUCCESS);
}
static rdma_stat
{
char **addrs;
int addr_count;
/*
* Query all ports for the given HCA
*/
} else {
return (RDMA_FAILED);
}
if (ibt_status != IBT_SUCCESS) {
#ifdef DEBUG
"ibt_query_hca_ports, status = %d\n", ibt_status);
#endif
return (RDMA_FAILED);
}
#ifdef DEBUG
if (rib_debug > 1) {
"%d\n", num_ports);
for (i = 0; i < num_ports; i++) {
"Port #: %d INACTIVE\n", i+1);
} else if (port_infop[i].p_linkstate ==
"Port #: %d ACTIVE\n", i+1);
}
}
}
#endif
/*
* Get all the IP addresses on this system to register the
* given "service type" on all DNS recognized IP addrs.
* Each service type such as NFS will have all the systems
* IP addresses as its different names. For now the only
* type of service we support in RPCIB is NFS.
*/
#ifdef DEBUG
if (rib_debug) {
"get_ip_addrs failed\n");
}
#endif
return (RDMA_FAILED);
}
#ifdef DEBUG
if (rib_debug > 1) {
for (i = 0; i < addr_count; i++)
}
#endif
/*
* Start registering and binding service to active
* on active ports on this HCA.
*/
nbinds = 0;
new_service = NULL;
/*
* We use IP addresses as the service names for
* service registration. Register each of them
* with CM to obtain a svc_id and svc_hdl. We do not
* register the service with machine's loopback address.
*/
for (j = 1; j < addr_count; j++) {
if (ibt_status != IBT_SUCCESS) {
#ifdef DEBUG
if (rib_debug) {
"ibt_register_service FAILED, status "
"= %d\n", ibt_status);
}
#endif
/*
* No need to go on, since we failed to obtain
* a srv_id and srv_hdl. Move on to the next
* IP addr as a service name.
*/
continue;
}
for (i = 0; i < num_ports; i++) {
continue;
/*
* Allocate and prepare a service entry
*/
KM_SLEEP);
sizeof (ibt_sbind_hdl_t), KM_SLEEP);
KM_SLEEP);
/*
* Bind the service, specified by the IP address,
* from ibt_register_service().
*/
#ifdef DEBUG
if (rib_debug > 1) {
"binding service using name: %s\n",
}
#endif
if (ibt_status != IBT_SUCCESS) {
#ifdef DEBUG
if (rib_debug) {
" in ibt_bind_service, status = %d\n",
}
#endif
sizeof (ibt_sbind_hdl_t));
sizeof (rib_service_t));
new_service = NULL;
continue;
}
#ifdef DEBUG
if (rib_debug > 1) {
if (ibt_status == IBT_SUCCESS)
"Serv: %s REGISTERED on port: %d",
}
#endif
/*
* Add to the service list for this HCA
*/
new_service = NULL;
nbinds ++;
}
}
}
}
#ifdef DEBUG
if (rib_debug > 1) {
/*
* Change this print to a more generic one, as rpcib
* is supposed to handle multiple service types.
*/
" %d\n"
}
}
#endif
for (i = 0; i < addr_count; i++) {
if (addrs[i])
}
if (nbinds == 0) {
#ifdef DEBUG
if (rib_debug) {
"bind_service FAILED!\n");
}
#endif
return (RDMA_FAILED);
} else {
/*
* Put this plugin into accept state, since atleast
* one registration was successful.
*/
return (RDMA_SUCCESS);
}
}
void
{
/*
* First check if a hca is still attached
*/
return;
}
/*
* Register the Address translation service
*/
if (ats_running == 0) {
#ifdef DEBUG
if (rib_debug) {
"rib_listen(): ats registration failed!");
}
#endif
return;
} else {
ats_running = 1;
}
}
/*
* Right now the only service type is NFS. Hence force feed this
* value. Ideally to communicate the service type it should be
* passed down in rdma_svc_data.
*/
if (status != RDMA_SUCCESS) {
return;
}
/*
* Service active on an HCA, check rd->err_code for more
* explainable errors.
*/
}
/* XXXX */
/* ARGSUSED */
static void
{
/*
* KRPC called the RDMATF to stop the listeners, this means
* stop sending incomming or recieved requests to KRPC master
* transport handle for RDMA-IB. This is also means that the
* master transport handle, responsible for us, is going away.
*/
/*
* First check if a hca is still attached
*/
return;
}
}
/*
* Traverse the HCA's service list to unbind and deregister services.
* Instead of unbinding the service for a service handle by
* all the services for the service handle by making only one
* call to ibt_unbind_all_services(). Then, we deregister the
* service for the service handle.
*
* When traversing the entries in service_list, we compare the
* srv_hdl of the current entry with that of the next. If they
* are different or if the next entry is NULL, the current entry
* marks the last binding of the service handle. In this case,
* call ibt_unbind_all_services() and deregister the service for
* the service handle. If they are the same, the current and the
* next entries are bound to the same service handle. In this
* case, move on to the next entry.
*/
static void
{
/*
* unbind and deregister the services for this service type.
* Right now there is only one service type. In future it will
* be passed down to this function.
*/
if (ibt_status != IBT_SUCCESS) {
"ibt_unbind_all_services FAILED"
" status: %d\n", ibt_status);
}
if (ibt_status != IBT_SUCCESS) {
"ibt_deregister_service FAILED"
" status: %d\n", ibt_status);
}
#ifdef DEBUG
if (rib_debug > 1) {
if (ibt_status == IBT_SUCCESS)
"Successfully stopped and"
" UNREGISTERED service: %s\n",
}
#endif
}
sizeof (ibt_sbind_hdl_t));
}
}
static struct svc_recv *
{
recvp->bytes_xfer = 0;
return (recvp);
}
static int
{
return (0);
}
static struct reply *
{
return (NULL);
}
rep->bytes_xfer = 0;
}
qp->rep_list_size++;
if (rib_debug > 1)
return (rep);
}
static rdma_stat
{
struct reply *r, *n;
n = r->next;
(void) rib_remreply(qp, r);
}
return (RDMA_SUCCESS);
}
static int
{
}
}
qp->rep_list_size--;
if (rib_debug > 1)
return (0);
}
struct mrc *buf_handle)
{
/*
* Note: ALL buffer pools use the same memory type RDMARW.
*/
if (status == RDMA_SUCCESS) {
} else {
buf_handle->mrc_lmr = 0;
buf_handle->mrc_rmr = 0;
}
return (status);
}
static rdma_stat
{
} else {
return (RDMA_FAILED);
}
if (ibt_status != IBT_SUCCESS) {
"(spec:%d) failed for addr %llX, status %d",
return (RDMA_FAILED);
}
return (RDMA_SUCCESS);
}
{
/*
* Non-coherent memory registration.
*/
&mr_desc);
if (status == RDMA_SUCCESS) {
} else {
buf_handle->mrc_lmr = 0;
buf_handle->mrc_rmr = 0;
}
return (status);
}
/* ARGSUSED */
{
/*
* Allow memory deregistration even if HCA is
* getting detached. Need all outstanding
* memory registrations to be deregistered
* before HCA_DETACH_EVENT can be accepted.
*/
return (RDMA_SUCCESS);
}
/* ARGSUSED */
{
return (RDMA_SUCCESS);
}
/* ARGSUSED */
{
if (cpu) {
/* make incoming data visible to memory */
} else {
/* make memory changes visible to IO */
}
} else {
return (RDMA_FAILED);
}
if (status == IBT_SUCCESS)
return (RDMA_SUCCESS);
else {
#ifdef DEBUG
status);
#endif
return (RDMA_FAILED);
}
}
/*
* XXXX ????
*/
static rdma_stat
{
/*
* XXXX Hack!
*/
return (RDMA_SUCCESS);
}
{
int i, j;
switch (ptype) {
case SEND_BUFFER:
/* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
break;
case RECV_BUFFER:
/* mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; */
break;
default:
goto fail;
}
/*
* Register the pool.
*/
sizeof (ibt_mr_hdl_t), KM_SLEEP);
sizeof (ibt_mr_desc_t), KM_SLEEP);
goto fail;
}
if (ibt_status != IBT_SUCCESS) {
for (j = 0; j < i; j++) {
}
goto fail;
}
}
}
return (rbp);
fail:
if (bp) {
}
if (rbp) {
}
return (NULL);
}
static void
{
int i;
/*
* Obtain pool address based on type of pool
*/
switch (ptype) {
case SEND_BUFFER:
break;
case RECV_BUFFER:
break;
default:
return;
}
return;
/*
* Deregister the pool memory and free it.
*/
}
}
static void
{
/*
* Obtain pool address based on type of pool
*/
switch (ptype) {
case SEND_BUFFER:
break;
case RECV_BUFFER:
break;
default:
return;
}
return;
/*
* Free the pool memory.
*/
}
void
{
/*
* Deregister the pool memory and free it.
*/
}
/*
* Fetch a buffer from the pool of type specified in rdbuf->type.
*/
static rdma_stat
{
case SEND_BUFFER:
break;
case RECV_BUFFER:
break;
default:
}
return (RDMA_SUCCESS);
} else
return (RDMA_FAILED);
}
/*
* Fetch a buffer of specified type.
* Note that rdbuf->handle is mw's rkey.
*/
static void *
{
void *buf;
int i;
/*
* Obtain pool address based on type of pool
*/
switch (ptype) {
case SEND_BUFFER:
break;
case RECV_BUFFER:
break;
default:
return (NULL);
}
return (NULL);
return (NULL);
}
/* XXXX put buf, rdbuf->handle.mrc_rmr, ... in one place. */
if (rib_debug > 1)
return (buf);
}
}
return (NULL);
}
static void
{
}
static void
{
/*
* Obtain pool address based on type of pool
*/
switch (ptype) {
case SEND_BUFFER:
break;
case RECV_BUFFER:
break;
default:
return;
}
return;
/*
* Should never happen
*/
"too many frees!", ptype);
} else {
if (rib_debug > 1)
}
}
static rdma_stat
{
}
return (RDMA_SUCCESS);
}
static rdma_stat
{
}
}
return (RDMA_SUCCESS);
}
/*
* Connection management.
* IBTF does not support recycling of channels. So connections are only
* in four states - C_CONN_PEND, or C_CONNECTED, or C_ERROR or
* C_DISCONN_PEND state. No C_IDLE state.
* C_CONN_PEND state: Connection establishment in progress to the server.
* C_CONNECTED state: A connection when created is in C_CONNECTED state.
* It has an RC channel associated with it. ibt_post_send/recv are allowed
* only in this state.
* C_ERROR state: A connection transitions to this state when WRs on the
* channel are completed in error or an IBT_CM_EVENT_CONN_CLOSED event
* happens on the channel or a IBT_HCA_DETACH_EVENT occurs on the HCA.
* C_DISCONN_PEND state: When a connection is in C_ERROR state and when
* c_ref drops to 0 (this indicates that RPC has no more references to this
* connection), the connection should be destroyed. A connection transitions
* into this state when it is being destroyed.
*/
static rdma_stat
{
int status = RDMA_SUCCESS;
/*
* First, clear up any connection in the ERROR state
*/
/*
* Remove connection from list and destroy it.
*/
(void) rib_disconnect_channel(cn,
&hca->cl_conn_list);
goto again;
}
continue;
continue;
}
/*
* Our connection. Give up conn list lock
* as we are done traversing the list.
*/
return (status);
}
/*
* Hold a reference to this conn before
* we give up the lock.
*/
timout = ddi_get_lbolt() +
;
if (cv_stat == 0) {
return (RDMA_INTR);
}
if (cv_stat < 0) {
return (RDMA_TIMEDOUT);
}
return (status);
} else {
return (RDMA_TIMEDOUT);
}
}
}
}
if (status != RDMA_SUCCESS) {
#ifdef DEBUG
if (rib_debug) {
"No server ATS record!");
}
#endif
return (RDMA_FAILED);
}
/*
* Channel to server doesn't exist yet, create one.
*/
return (RDMA_FAILED);
}
/*
* Add to conn list.
* We had given up the READER lock. In the time since then,
* another thread might have created the connection we are
* trying here. But for now, that is quiet alright - there
* might be two connections between a pair of hosts instead
* of one. If we really want to close that window,
* then need to check the list after acquiring the
* WRITER lock.
*/
if (status == RDMA_SUCCESS) {
} else {
#ifdef DEBUG
if (rib_debug) {
" a channel!");
}
#endif
}
return (status);
}
static rdma_stat
{
/*
* If a conn is C_ERROR, close the channel.
* If it's CONNECTED, keep it that way.
*/
(void) rib_disconnect_channel(conn,
else
(void) rib_disconnect_channel(conn,
return (RDMA_SUCCESS);
}
return (RDMA_SUCCESS);
}
/*
* Add at front of list
*/
static struct rdma_done_list *
{
struct rdma_done_list *rd;
return (rd);
}
static void
{
struct rdma_done_list *r;
if (r != NULL) {
}
if (r != NULL) {
} else {
}
}
static void
{
struct rdma_done_list *r, *n;
n = r->next;
rdma_done_rm(qp, r);
}
}
static void
{
while (r) {
cv_signal(&r->rdma_done_cv);
return;
} else {
r = r->next;
}
}
if (rib_debug > 1) {
}
}
{
int i;
return (atsp);
}
}
return (NULL);
}
int
{
if (i_ddi_devi_attached(dip) &&
}
dip, 0, "hca-guid", 0)) == 0) ||
0, "port-number", 0)) == 0) ||
"port-pkey", IB_PKEY_INVALID_LIMITED)) <=
return (DDI_WALK_CONTINUE);
}
ibds->rib_ibd_cnt++;
}
return (DDI_WALK_CONTINUE);
}
void
{
}
/*
* Return ibd interfaces and ibd instances.
*/
int
{
int k, ip_cnt;
&kvp) == 0) {
} else {
}
}
&kvp6) == 0) {
} else {
}
}
return (-1);
/* Get ibd ip's */
ip_cnt = 0;
/* IPv4 */
sizeof (struct sockaddr_in));
ip_cnt++;
continue;
}
}
/* Try IPv6 */
sizeof (struct sockaddr_in6));
ip_cnt++;
}
}
}
}
}
if (ip_cnt == 0)
return (-1);
else
return (0);
}
char **
get_ip_addrs(int *count)
{
int num_of_ifs;
char **addresses;
int return_code;
/*
* Open a device for doing down stream kernel ioctls
*/
if (return_code != 0) {
*count = -1;
return (NULL);
}
if (return_code != 0) {
*count = -1;
return (NULL);
}
/*
* Perform the first ioctl to get the number of interfaces
*/
if (return_code != 0 || num_of_ifs == 0) {
*count = -1;
return (NULL);
}
/*
* Perform the second ioctl to get the address on each interface
* found.
*/
if (return_code <= 0) {
*count = -1;
return (NULL);
}
*count = return_code;
return (addresses);
}
int
{
int return_code;
/*
* Prep the number of interfaces request buffer for ioctl
*/
if_buf.lifn_flags = 0;
/*
* Prep the kernel ioctl buffer and send it down stream
*/
if (return_code != 0) {
*num = -1;
return (-1);
}
#ifdef DEBUG
if (rib_debug > 1)
#endif
return (0);
}
int
{
struct lifreq *if_data_buf;
struct sockaddr_in *sin4;
struct sockaddr_in6 *sin6;
int i, count, return_code;
/*
* Prep the buffer for requesting all interface's info
*/
lifc.lifc_flags = 0;
KM_SLEEP);
/*
* Prep the kernel ioctl buffer and send it down stream
*/
if (return_code != 0) {
return (-1);
}
/*
* Extract addresses and fill them in the requested array
* IB_SVC_NAME_LEN is defined to be 64 so it covers both IPv4 &
* IPv6. Here count is the number of IP addresses collected.
*/
count = 0;
if_data_buf++) {
count ++;
}
count ++;
}
}
return (count);
}
/*
* Goes through all connections and closes the channel
* This will cause all the WRs on those channels to be
* flushed.
*/
static void
{
/*
* Live connection in CONNECTED state.
* Call ibt_close_rc_channel in nonblocking mode
* with no callbacks.
*/
} else {
/*
* Connection in ERROR state but
* channel is not yet freed.
*/
NULL, 0);
}
}
}
}
/*
* Frees up all connections that are no longer being referenced
*/
static void
{
top:
/*
* At this point connection is either in ERROR
* or DISCONN_PEND state. If in DISCONN_PEND state
* then some other thread is culling that connection.
* If not and if c_ref is 0, then destroy the connection.
*/
/*
* Cull the connection
*/
goto top;
} else {
/*
* conn disconnect already scheduled or will
* happen from conn_release when c_ref drops to 0.
*/
}
}
/*
* At this point, only connections with c_ref != 0 are on the list
*/
}
/*
* Cleans and closes up all uses of the HCA
*/
static void
{
/*
* Stop all services on the HCA
* Go through cl_conn_list and close all rc_channels
* Go through svr_conn_list and close all rc_channels
* Free connections whose c_ref has dropped to 0
* Destroy all CQs
* Deregister and released all buffer pool memory after all
* connections are destroyed
* Free the protection domain
* ibt_close_hca()
*/
return;
}
rib_stat->nhca_inited--;
/*
* conn_lists are NULL, so destroy
* buffers, close hca and be done.
*/
}
/*
* conn_lists are now NULL, so destroy
* buffers, close hca and be done.
*/
}
}