clnt_rdma.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Portions of this source code were derived from Berkeley
* 4.3 BSD under license from the Regents of the University of
* California.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/isa_defs.h>
#include <rpc/rpc_rdma.h>
static void clnt_rdma_kabort(CLIENT *);
static void clnt_rdma_kdestroy(CLIENT *);
/*
* Operations vector for RDMA based RPC
*/
static struct clnt_ops rdma_clnt_ops = {
clnt_rdma_kcallit, /* do rpc call */
clnt_rdma_kabort, /* abort call */
clnt_rdma_kerror, /* return error status */
clnt_rdma_kfreeres, /* free results */
clnt_rdma_kdestroy, /* destroy rpc handle */
clnt_rdma_kcontrol, /* the ioctl() of rpc */
clnt_rdma_ksettimers, /* set retry timers */
};
/*
* The size of the preserialized RPC header information.
*/
#define CKU_HDRSIZE 20
/*
* Per RPC RDMA endpoint details
*/
typedef struct cku_private {
void *cku_rd_handle; /* underlying RDMA device */
int cku_addrfmly; /* for finding addr_type */
static int clnt_rdma_min_delay = CLNT_RDMA_DELAY;
struct {
} rdmarcstat = {
{ "calls", KSTAT_DATA_UINT64 },
{ "badcalls", KSTAT_DATA_UINT64 },
{ "badxids", KSTAT_DATA_UINT64 },
{ "timeouts", KSTAT_DATA_UINT64 },
{ "newcreds", KSTAT_DATA_UINT64 },
{ "badverfs", KSTAT_DATA_UINT64 },
{ "timers", KSTAT_DATA_UINT64 },
{ "cantconn", KSTAT_DATA_UINT64 },
{ "nomem", KSTAT_DATA_UINT64 },
{ "interrupts", KSTAT_DATA_UINT64 },
{ "longrpc", KSTAT_DATA_UINT64 }
};
#ifdef DEBUG
int rdma_clnt_debug = 0;
#endif
#ifdef accurate_stats
#define RCSTAT_INCR(x) \
#else
#define RCSTAT_INCR(x) \
#endif
#define ptoh(p) (&((p)->cku_client))
int
{
CLIENT *h;
struct cku_private *p;
return (EINVAL);
p = kmem_zalloc(sizeof (*p), KM_SLEEP);
/*
* Find underlying RDMATF plugin
*/
rp = rdma_mod_head;
else {
p->cku_rd_handle = handle;
break;
}
}
if (p->cku_rd_mod == NULL) {
/*
* Should not happen.
* No matching RDMATF plugin.
*/
kmem_free(p, sizeof (struct cku_private));
return (EINVAL);
}
h = ptoh(p);
h->cl_ops = &rdma_clnt_ops;
h->cl_private = (caddr_t)p;
h->cl_auth = authkern_create();
/* call message, just used to pre-serialize below */
/* pre-serialize call message header */
XDR_DESTROY(&p->cku_outxdr);
auth_destroy(h->cl_auth);
kmem_free(p, sizeof (struct cku_private));
return (EINVAL);
}
/*
* Set up the rpc information
*/
p->cku_addrfmly = family;
*cl = h;
return (0);
}
static void
{
struct cku_private *p = htop(h);
kmem_free(p, sizeof (*p));
}
void
{
struct cku_private *p = htop(h);
/*
* Find underlying RDMATF plugin
*/
p->cku_rd_mod = NULL;
rp = rdma_mod_head;
else {
p->cku_rd_handle = handle;
break;
}
}
/*
* Set up the rpc information
*/
p->cku_xid = 0;
}
h->cl_ops = &rdma_clnt_ops;
}
/* ARGSUSED */
static enum clnt_stat
{
cku_private_t *p = htop(h);
int status;
int msglen;
/*
* Get unique xid
*/
if (p->cku_xid == 0)
/*
* Connect failed to server. Could be because of one
* of several things. In some cases we don't want
* the caller to retry immediately - delay before
* returning to caller.
*/
switch (status) {
case RDMA_TIMEDOUT:
/*
* Already timed out. No need to delay
* some more.
*/
break;
case RDMA_INTR:
/*
* Failed because of an signal. Very likely
* the caller will not retry.
*/
break;
default:
/*
* All other failures - server down or service
* down or temporary resource failure. Delay before
* returning to caller.
*/
if (h->cl_nosignal == TRUE) {
} else {
}
}
break;
}
}
/*
* Get the size of the rpc call message. Need this
* to determine if the rpc call message will fit in
* the pre-allocated RDMA buffers. If the rpc call
* message length is greater that the pre-allocated
* buffers then, it is a Long RPC. A one time use
* buffer is allocated and registered for the Long
* RPC call.
*/
if (msglen > RPC_MSG_SZ) {
/*
* Long RPC. Allocate one time use custom buffer.
*/
op = RDMA_NOMSG;
} else {
/*
* Get a pre-allocated buffer for rpc call
*/
"clnt_rdma_kcallit: no buffers!");
goto done;
}
}
} else {
/*
* For RPCSEC_GSS since we cannot accurately presize the
* buffer required for encoding, we assume that its going
* to be a Long RPC to start with. We also create the
* the XDR stream with min_chunk set to 0 which instructs
* the XDR layer to not chunk the incoming byte stream.
*/
/*
* Long RPC. Allocate one time use custom buffer.
*/
XDR_ENCODE, NULL);
op = RDMA_NOMSG;
}
/*
* Copy in the preserialized RPC header
* information.
*/
/*
* transaction id is the 1st thing in the output
* buffer.
*/
/* LINTED pointer alignment */
/* Skip the preserialized stuff. */
/* Serialize dynamic stuff into the output buffer. */
if (cle)
"clnt_rdma_kcallit: XDR_PUTINT32/AUTH_MARSHAL/xdr_args failed");
goto done;
}
} else {
XDR_SETPOS(xdrs, 0);
/* Serialize the procedure number and the arguments. */
}
"clnt_rdma_kcallit: AUTH_WRAP failed");
goto done;
}
/*
* If we had to allocate a new buffer while encoding
* then update the addr and len.
*/
}
/*
* If it so happens that the encoded message is after all
* not long enough to be a Long RPC then allocate a
* SEND_BUFFER and copy the encoded message into it.
*/
if (p->cku_outsz > RPC_MSG_SZ) {
} else {
/*
* Get a pre-allocated buffer for rpc call
*/
"clnt_rdma_kcallit: no buffers!");
goto done;
}
}
}
/*
* Update the chunk size information for the Long RPC msg.
*/
/*
* Set up the RDMA chunk message
*/
vers = RPCRDMA_VERS;
goto done;
}
xdrs = &p->cku_outxdr;
/*
* Treat xid as opaque (xid is the first entity
* in the rpc rdma message).
*/
/* Skip xid and set the xdr position accordingly. */
/*
* Now XDR the chunk list
*/
/*
* Register the chunks in the list
*/
if (status != RDMA_SUCCESS) {
"clnt_rdma_kcallit: clist register failed");
clist_free(cl);
goto done;
}
}
/*
* Start with the RDMA header and clist (if any)
*/
/*
* Put the RPC call message in the send list if small RPC
*/
} else {
/* Long RPC already in chunk list */
}
/*
* Set up a reply buffer ready for the reply
*/
if (status != RDMA_SUCCESS) {
if (cl) {
clist_free(cl);
}
goto done;
}
/*
* sync the memory for dma
*/
if (status != RDMA_SUCCESS) {
clist_free(cl);
goto done;
}
}
/*
* Send the call message to the server
*/
if (status != RDMA_SUCCESS) {
if (cl) {
clist_free(cl);
/*
* If this was a long RPC message, need
* to free that buffer.
*/
}
goto done;
} else {
/*
* RDMA plugin now owns the send msg buffers.
* Clear them out and don't free them here.
*/
}
#ifdef DEBUG
if (rdma_clnt_debug) {
}
#endif
/*
* Recv rpc reply
*/
/*
* Deregister chunks sent. Do this only after the reply
* is received as that is a sure indication that the
* remote end has completed RDMA of the chunks.
*/
/*
* Deregister the chunks
*/
clist_free(cl);
/*
* If long RPC free chunk
*/
}
/*
* Now check recv status
*/
if (status != 0) {
#ifdef DEBUG
if (rdma_clnt_debug)
"clnt_rdma_kcallit: reply failed %u status %d",
#endif
} else if (status == RPC_TIMEDOUT) {
} else {
}
goto done;
}
#ifdef DEBUG
if (rdma_clnt_debug)
#endif
/*
* Process the reply message.
*
* First the chunk list (if any)
*/
/*
* Treat xid as opaque (xid is the first entity
* in the rpc rdma message).
*/
/* Skip xid and set the xdr position accordingly. */
/*
* Now the RPC reply message itself. If the reply
* came as a chunk item, then RDMA the reply over.
*/
/*
* Register the rpc reply chunk destination
*/
if (status) {
"clnt_rdma_kcallit: clist_register failed");
goto rdma_done;
}
/*
* Now read rpc reply in
*/
#ifdef DEBUG
if (rdma_clnt_debug)
printf("clnt_rdma_kcallit: read chunk, len %d, xid %u, \
#endif
if (status) {
"clnt_rdma_kcallit: RDMA_READ failed");
goto rdma_done;
}
/*
* sync the memory for dma
*/
if (status != RDMA_SUCCESS) {
goto rdma_done;
}
/*
* Deregister the Long RPC chunk
*/
XDR_DECODE, conn);
} else {
}
/*
* xdr_results will be done in AUTH_UNWRAP.
*/
/*
* Decode and validate the response.
*/
if (re_status == RPC_SUCCESS) {
/*
* Reply is good, check auth.
*/
if (!AUTH_VALIDATE(h->cl_auth,
"clnt_rdma_kcallit: AUTH_VALIDATE failed");
xdr_results, resultsp)) {
"clnt_rdma_kcallit: AUTH_UNWRAP failed");
}
} else {
/* set errno in case we can't recover */
if (re_status != RPC_VERSMISMATCH &&
re_status != RPC_AUTHERROR &&
if (re_status == RPC_AUTHERROR) {
/*
* Map recoverable and unrecoverable
* authentication errors to appropriate
* errno
*/
case AUTH_BADCRED:
case AUTH_BADVERF:
case AUTH_INVALIDRESP:
case AUTH_TOOWEAK:
case AUTH_FAILED:
case RPCSEC_GSS_NOCRED:
case RPCSEC_GSS_FAILED:
break;
case AUTH_REJECTEDCRED:
case AUTH_REJECTEDVERF:
default:
break;
}
"authentication failed with "
"RPC_AUTHERROR of type %d\n",
}
"clnt_rdma_kcallit: RPC failed");
}
} else {
}
/*
* If rpc reply is in a chunk, free it now.
*/
/*
* Free the list holding the chunk info
*/
if (cl) {
clist_free(cl);
}
/*
* Tell the server that the reads are done
*/
goto done;
}
xdrs = &p->cku_outxdr;
vers = RPCRDMA_VERS;
/*
* Treat xid as opaque (xid is the first entity
* in the rpc rdma message).
*/
/* Skip xid and set the xdr position accordingly. */
"clnt_rdma_kcallit: xdr_u_int failed");
goto done;
}
if (status != RDMA_SUCCESS) {
"clnt_rdma_kcallit: RDMA_SEND failed xid %u",
p->cku_xid);
}
#ifdef DEBUG
else {
if (rdma_clnt_debug)
printf("clnt_rdma_kcallit: sent RDMA_DONE xid %u\n",
p->cku_xid);
}
#endif
}
done:
if (cxdrp)
if (rxdrp) {
}
if (recvlist) {
}
}
}
/* ARGSUSED */
static void
{
}
static void
{
struct cku_private *p = htop(h);
}
static bool_t
{
struct cku_private *p = htop(h);
xdrs = &(p->cku_outxdr);
}
/* ARGSUSED */
static bool_t
{
return (TRUE);
}
/* ARGSUSED */
static int
{
return (0);
}
int
{
struct knetconfig *knc;
char *pf, *p;
int error = 0;
if (!INGLOBALZONE(curproc))
return (-1);
/*
* modload the RDMA plugins if not already done.
*/
if (!rdma_modloaded) {
if (!rdma_modloaded) {
error = rdma_modload();
}
if (error)
return (-1);
}
if (!rdma_dev_available)
return (-1);
rp = rdma_mod_head;
&handle);
if (status == RDMA_SUCCESS) {
KM_SLEEP);
return (0);
}
}
return (-1);
}