rpc_rdma.h revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _RPC_RPC_RDMA_H
#define _RPC_RPC_RDMA_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <rpc/rpc.h>
#include <rpc/rpc_sztypes.h>
#include <sys/sunddi.h>
#include <sys/sunldi.h>
#ifdef __cplusplus
extern "C" {
#endif
#define RPCRDMA_VERS 0 /* Version of the RPC over RDMA protocol */
#define RDMATF_VERS 1 /* Version of the API used by RPC for RDMA */
#define RDMATF_VERS_1 1 /* Current version of RDMATF */
/*
* The size of an RPC call or reply message
*/
#define RPC_MSG_SZ 1024
/*
* Storage for a chunk list
*/
#define RPC_CL_SZ 1024
/*
* Chunk size
*/
#define MINCHUNK 1024
/*
* Size of receive buffer
*/
#define RPC_BUF_SIZE 2048
#define NOWAIT 0 /* don't wait for operation of complete */
#define WAIT 1 /* wait and ensure that operation is complete */
/*
* RDMA xdr buffer control and other control flags. Add new flags here,
* set them in private structure for xdr over RDMA in xdr_rdma.c
*/
#define RDMA_NOCHUNK 0x1
/*
* Return codes from RDMA operations
*/
typedef enum {
RDMA_SUCCESS = 0, /* successful operation */
RDMA_INVAL = 1, /* invalid parameter */
RDMA_TIMEDOUT = 2, /* operation timed out */
RDMA_INTR = 3, /* operation interrupted */
RDMA_NORESOURCE = 4, /* insufficient resource */
/*
* connection errors
*/
RDMA_REJECT = 5, /* connection req rejected */
RDMA_NOLISTENER = 6, /* no listener on server */
RDMA_UNREACHABLE = 7, /* host unreachable */
RDMA_CONNLOST = 8, /* connection lost */
RDMA_XPRTFAILED = 9, /* RDMA transport failed */
RDMA_PROTECTERR = 10, /* memory protection error */
RDMA_OVERRUN = 11, /* transport overrun */
RDMA_RECVQEMPTY = 12, /* incoming pkt dropped, recv q empty */
RDMA_PROTFAILED = 13, /* RDMA protocol failed */
RDMA_NOTSUPP = 14, /* requested feature not supported */
RDMA_REMOTERR = 15, /* error at remote end */
/*
* RDMATF errors
*/
RDMA_BADVERS = 16, /* mismatch RDMATF versions */
RDMA_REG_EXIST = 17, /* RDMATF registration already exists */
/*
* fallback error
*/
RDMA_FAILED = 18 /* generic error */
} rdma_stat;
/*
* Memory region context. This is an RDMA provider generated
* handle for a registered arbitrary size contiguous virtual
* memory. The RDMA Interface Adapter needs this for local or
* remote memory access.
*
* The mrc_rmr field holds the remote memory region context
* which is sent over-the-wire to provide the remote host
* with RDMA access to the memory region.
*/
struct mrc {
uint32_t mrc_rmr; /* Remote MR context, sent OTW */
union {
struct mr {
uint32_t lmr; /* Local MR context */
uint64_t linfo; /* Local memory info */
} mr;
} lhdl;
};
#define mrc_lmr lhdl.mr.lmr
#define mrc_linfo lhdl.mr.linfo
/*
* The XDR offset value is used by the XDR
* routine to identify the position in the
* RPC message where the opaque object would
* normally occur. Neither the data content
* of the chunk, nor its size field are included
* in the RPC message. The XDR offset is calculated
* as if the chunks were present.
*
* The remaining fields identify the chunk of data
* on the sender. The c_memhandle identifies a
* registered RDMA memory region and the c_addr
* and c_len fields identify the chunk within it.
*/
struct clist {
uint32 c_xdroff; /* XDR offset */
uint32 c_len; /* Length */
struct mrc c_smemhandle; /* src memory handle */
uint64 c_ssynchandle; /* src sync handle */
uint64 c_saddr; /* src address */
struct mrc c_dmemhandle; /* dst memory handle */
uint64 c_dsynchandle; /* dst sync handle */
uint64 c_daddr; /* dst address */
struct clist *c_next; /* Next chunk */
};
typedef struct clist clist;
enum rdma_proc {
RDMA_MSG = 0, /* chunk list and RPC msg follow */
RDMA_NOMSG = 1, /* only chunk list follows */
RDMA_MSGP = 2, /* chunk list and RPC msg with padding follow */
RDMA_DONE = 3 /* signal completion of chunk transfer */
};
/*
* Listener information for a service
*/
struct rdma_svc_data {
queue_t q; /* queue_t to place incoming pkts */
int active; /* If active, after registeration startup */
rdma_stat err_code; /* Error code from plugin layer */
int32_t svcid; /* RDMA based service identifier */
};
/*
* Per RDMA plugin module information.
* Will be populated by each plugin
* module during its initialization.
*/
typedef struct rdma_mod {
char *rdma_api; /* "kvipl", "ibtf", etc */
uint_t rdma_version; /* RDMATF API version */
int rdma_count; /* # of devices */
struct rdmaops *rdma_ops; /* rdma op vector for api */
} rdma_mod_t;
/*
* Registry of RDMA plugins
*/
typedef struct rdma_registry {
rdma_mod_t *r_mod; /* plugin mod info */
struct rdma_registry *r_next; /* next registered RDMA plugin */
} rdma_registry_t;
/*
* RDMA transport information
*/
typedef struct rdma_info {
uint_t addrlen; /* address length */
uint_t mts; /* max transfer size */
uint_t mtu; /* native mtu size of unlerlying network */
} rdma_info_t;
/*
* RDMA Connection information
*/
typedef struct conn {
rdma_mod_t *c_rdmamod; /* RDMA transport info for conn */
struct netbuf c_raddr; /* remote address */
struct netbuf c_laddr; /* local address */
int c_ref; /* no. of clients of connection */
struct conn *c_next; /* next in list of connections */
struct conn *c_prev; /* prev in list of connections */
caddr_t c_private; /* transport specific stuff */
#define C_IDLE 0x80000000
#define C_CONN_PEND 0x40000000
#define C_CONNECTED 0x20000000
#define C_ERROR 0x10000000
#define C_DISCONN_PEND 0x08000000
#define C_REMOTE_DOWN 0x04000000
uint_t c_state; /* state of connection */
kmutex_t c_lock; /* protect c_state and c_ref fields */
kcondvar_t c_cv; /* to signal when pending is done */
} CONN;
/*
* Memory management for the RDMA buffers
*/
/*
* RDMA buffer types
*/
typedef enum {
SEND_BUFFER, /* buf for send msg */
SEND_DESCRIPTOR, /* buf used for send msg descriptor in plugins only */
RECV_BUFFER, /* buf for recv msg */
RECV_DESCRIPTOR, /* buf used for recv msg descriptor in plugins only */
CHUNK_BUFFER /* chunk buf used in RDMATF only and not in plugins */
} rdma_btype;
/*
* RDMA buffer information
*/
typedef struct rdma_buf {
rdma_btype type; /* buffer type */
int len; /* length of buffer */
caddr_t addr; /* buffer address */
struct mrc handle; /* buffer registration handle */
} rdma_buf_t;
/*
* Data transferred from plugin interrupt to svc_queuereq()
*/
struct recv_data {
CONN *conn;
int status;
rdma_buf_t rpcmsg;
};
/*
* Operations vector for RDMA transports.
*/
typedef struct rdmaops {
/* Network */
rdma_stat (*rdma_reachable)(int addr_type, struct netbuf *,
void **handle);
/* Connection */
rdma_stat (*rdma_get_conn)(struct netbuf *, int addr_type,
void *, CONN **);
rdma_stat (*rdma_rel_conn)(CONN *);
/* Server side listner start and stop routines */
void (*rdma_svc_listen)(struct rdma_svc_data *);
void (*rdma_svc_stop)(struct rdma_svc_data *);
/* Memory */
rdma_stat (*rdma_regmem)(CONN *, caddr_t, uint_t, struct mrc *);
rdma_stat (*rdma_deregmem)(CONN *, caddr_t, struct mrc);
rdma_stat (*rdma_regmemsync)(CONN *, caddr_t, uint_t,
struct mrc *, void **);
rdma_stat (*rdma_deregmemsync)(CONN *, caddr_t, struct mrc,
void *);
rdma_stat (*rdma_syncmem)(CONN *, void *, caddr_t, int, int);
/* Buffer */
rdma_stat (*rdma_buf_alloc)(CONN *, rdma_buf_t *);
void (*rdma_buf_free)(CONN *, rdma_buf_t *);
/* Transfer */
rdma_stat (*rdma_send)(CONN *, clist *, uint32_t);
rdma_stat (*rdma_send_resp)(CONN *, clist *, uint32_t);
rdma_stat (*rdma_clnt_recvbuf)(CONN *, clist *, uint32_t);
rdma_stat (*rdma_svc_recvbuf)(CONN *, clist *);
rdma_stat (*rdma_recv)(CONN *, clist **, uint32_t);
/* RDMA */
rdma_stat (*rdma_read)(CONN *, clist *, int);
rdma_stat (*rdma_write)(CONN *, clist *, int);
/* INFO */
rdma_stat (*rdma_getinfo)(rdma_info_t *info);
} rdmaops_t;
/*
* RDMA operations.
*/
#define RDMA_REACHABLE(rdma_ops, addr_type, addr, handle) \
(*(rdma_ops)->rdma_reachable)(addr_type, addr, handle)
#define RDMA_GET_CONN(rdma_ops, addr, addr_type, handle, conn) \
(*(rdma_ops)->rdma_get_conn)(addr, addr_type, handle, conn)
#define RDMA_REL_CONN(conn) \
(*(conn)->c_rdmamod->rdma_ops->rdma_rel_conn)(conn)
#define RDMA_REGMEM(conn, buff, len, handle) \
(*(conn)->c_rdmamod->rdma_ops->rdma_regmem)(conn, buff, len, handle)
#define RDMA_DEREGMEM(conn, buff, handle) \
(*(conn)->c_rdmamod->rdma_ops->rdma_deregmem)(conn, buff, handle)
#define RDMA_REGMEMSYNC(conn, buff, len, handle, synchandle) \
(*(conn)->c_rdmamod->rdma_ops->rdma_regmemsync)(conn, buff, \
len, handle, synchandle)
#define RDMA_DEREGMEMSYNC(conn, buff, handle, synchandle) \
(*(conn)->c_rdmamod->rdma_ops->rdma_deregmemsync)(conn, buff, \
handle, synchandle)
#define RDMA_SYNCMEM(conn, handle, buff, len, direction) \
(*(conn)->c_rdmamod->rdma_ops->rdma_syncmem)(conn, handle, \
buff, len, direction)
#define RDMA_BUF_ALLOC(conn, rbuf) \
(*(conn)->c_rdmamod->rdma_ops->rdma_buf_alloc)(conn, rbuf)
#define RDMA_BUF_FREE(conn, rbuf) \
(*(conn)->c_rdmamod->rdma_ops->rdma_buf_free)(conn, rbuf)
#define RDMA_SEND(conn, sendlist, xid) \
(*(conn)->c_rdmamod->rdma_ops->rdma_send)(conn, sendlist, xid)
#define RDMA_SEND_RESP(conn, sendlist, xid) \
(*(conn)->c_rdmamod->rdma_ops->rdma_send_resp)(conn, sendlist, xid)
#define RDMA_CLNT_RECVBUF(conn, cl, xid) \
(*(conn)->c_rdmamod->rdma_ops->rdma_clnt_recvbuf)(conn, cl, xid)
#define RDMA_SVC_RECVBUF(conn, cl) \
(*(conn)->c_rdmamod->rdma_ops->rdma_svc_recvbuf)(conn, cl)
#define RDMA_RECV(conn, recvlist, xid) \
(*(conn)->c_rdmamod->rdma_ops->rdma_recv)(conn, recvlist, xid)
#define RDMA_READ(conn, cl, wait) \
(*(conn)->c_rdmamod->rdma_ops->rdma_read)(conn, cl, wait)
#define RDMA_WRITE(conn, cl, wait) \
(*(conn)->c_rdmamod->rdma_ops->rdma_write)(conn, cl, wait)
#define RDMA_GETINFO(rdma_mod, info) \
(*(rdma_mod)->rdma_ops->rdma_getinfo)(info)
#ifdef _KERNEL
extern rdma_registry_t *rdma_mod_head;
extern krwlock_t rdma_lock; /* protects rdma_mod_head list */
extern int rdma_modloaded; /* flag for loading RDMA plugins */
extern int rdma_dev_available; /* rdma device is loaded or not */
extern kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */
extern uint_t rdma_minchunk;
extern ldi_ident_t rpcmod_li; /* needed by layed driver framework */
/*
* General RDMA routines
*/
extern void clist_add(struct clist **clp, uint32_t xdroff, int len,
struct mrc *shandle, caddr_t saddr,
struct mrc *dhandle, caddr_t daddr);
extern void clist_free(struct clist *cl);
extern int clist_register(CONN *conn, struct clist *cl, bool_t src);
extern int clist_deregister(CONN *conn, struct clist *cl, bool_t src);
rdma_stat rdma_clnt_postrecv(CONN *conn, uint32_t xid);
rdma_stat rdma_svc_postrecv(CONN *conn);
extern rdma_stat clist_syncmem(CONN *conn, struct clist *cl, bool_t src);
extern rdma_stat rdma_register_mod(rdma_mod_t *mod);
extern rdma_stat rdma_unregister_mod(rdma_mod_t *mod);
extern void rdma_buf_free(CONN *conn, rdma_buf_t *rbuf);
extern int rdma_modload();
/*
* RDMA XDR
*/
extern void xdrrdma_create(XDR *, caddr_t, uint_t, int, struct clist *,
enum xdr_op, CONN *);
extern void xdrrdma_destroy(XDR *);
extern struct clist *xdrrdma_clist(XDR *);
extern uint_t xdrrdma_getpos(XDR *);
extern bool_t xdrrdma_setpos(XDR *, uint_t);
extern bool_t xdr_clist(XDR *, clist *);
extern bool_t xdr_do_clist(XDR *, clist **);
extern uint_t xdr_getbufsize(XDR *);
unsigned int xdrrdma_sizeof(xdrproc_t func, void *data, int min_chunk);
unsigned int xdrrdma_authsize(AUTH *auth, struct cred *cred, int min_chunk);
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _RPC_RPC_RDMA_H */