#This patch was developed both in-house and from outside. We plan to submit it
#upstream, but do not yet have a target date for doing so
#
# HG changeset patch
# Parent b5e87c2d333b6adb2743deb20bd5d68a754ea03f
Add solaris specific changes for librdmacm
Added the changes required to compile and work with the Solaris
system.
diff -r b5e87c2d333b Makefile.am
--- a/Makefile.am Wed Nov 18 11:01:02 2015 -0800
+++ b/Makefile.am Thu Nov 19 11:18:58 2015 -0800
@@ -143,6 +143,7 @@
+ man/cmtime.1 \
if ENABLE_RSOCKET
diff -r b5e87c2d333b Makefile.in
--- a/Makefile.in Wed Nov 18 11:01:02 2015 -0800
+++ b/Makefile.in Thu Nov 19 11:18:58 2015 -0800
@@ -484,6 +484,7 @@
+ man/cmtime.1 \
$(am__append_4)
diff -r b5e87c2d333b examples/cmatose.c
--- a/examples/cmatose.c Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/cmatose.c Thu Nov 19 11:18:58 2015 -0800
@@ -40,7 +40,9 @@
#include <netinet/in.h>
#include <sys/socket.h>
#include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <byteswap.h>
+#endif
#include <getopt.h>
#include <rdma/rdma_cma.h>
diff -r b5e87c2d333b examples/common.c
--- a/examples/common.c Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/common.c Thu Nov 19 11:18:58 2015 -0800
@@ -40,7 +40,9 @@
#include <netinet/in.h>
#include <sys/socket.h>
#include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <byteswap.h>
+#endif
#include <rdma/rdma_cma.h>
#include "common.h"
diff -r b5e87c2d333b examples/common.h
--- a/examples/common.h Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/common.h Thu Nov 19 11:18:58 2015 -0800
@@ -34,7 +34,9 @@
#include <stdlib.h>
#include <sys/types.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <byteswap.h>
+#endif
#include <poll.h>
#include <rdma/rdma_cma.h>
diff -r b5e87c2d333b examples/mckey.c
--- a/examples/mckey.c Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/mckey.c Thu Nov 19 11:18:58 2015 -0800
@@ -41,7 +41,9 @@
#include <arpa/inet.h>
#include <sys/socket.h>
#include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <byteswap.h>
+#endif
#include <unistd.h>
#include <getopt.h>
@@ -329,6 +331,16 @@
while (1) {
ret = rdma_get_cm_event(test.channel, &event);
+
+/*
+ * Solaris returns EBADF if we close the channel while we're waiting
+ * for any events to occur. It is safe to ignore EBADF here.
+ */
+#if defined(__SVR4) && defined(__sun)
+ if (ret && (errno == EBADF))
+ break;
+#endif
+
if (ret) {
perror("rdma_get_cm_event");
break;
@@ -461,6 +473,8 @@
return ret;
}
+/* Solaris does not yet support family AF_IB */
+#if !(defined(__SVR4) && defined(__sun))
static int get_dst_addr(char *dst, struct sockaddr *addr)
{
struct sockaddr_ib *sib;
@@ -474,6 +488,7 @@
inet_pton(AF_INET6, dst, &sib->sib_addr);
return 0;
}
+#endif
static int run(void)
{
@@ -486,7 +501,12 @@
return ret;
}
+/* Solaris does not yet support family AF_IB */
+#if defined(__SVR4) && defined(__sun)
+ ret = get_addr(dst_addr, (struct sockaddr *) &test.dst_in);
+#else
ret = get_dst_addr(dst_addr, (struct sockaddr *) &test.dst_in);
+#endif
if (ret)
return ret;
diff -r b5e87c2d333b examples/rping.c
--- a/examples/rping.c Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/rping.c Thu Nov 19 11:18:58 2015 -0800
@@ -40,11 +40,17 @@
#include <netinet/in.h>
#include <sys/socket.h>
#include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <byteswap.h>
+#endif
#include <semaphore.h>
#include <arpa/inet.h>
#include <pthread.h>
#include <inttypes.h>
+#if defined(__SVR4) && defined(__sun)
+#include <unistd.h>
+#include <libgen.h>
+#endif
#include <rdma/rdma_cma.h>
#include <infiniband/arch.h>
@@ -572,9 +578,15 @@
while (1) {
ret = rdma_get_cm_event(cb->cm_channel, &event);
- if (ret) {
+ /*
+ * If the retry of read() syscall returned EBADF, as the
+ * file was closed on process exit. Ignore this error.
+ */
+ if (ret && errno != EBADF) {
perror("rdma_get_cm_event");
exit(ret);
+ } else if (ret && errno == EBADF) {
+ exit(0);
}
ret = rping_cma_event_handler(event->id, event);
rdma_ack_cm_event(event);
@@ -596,8 +608,14 @@
pthread_testcancel();
ret = ibv_get_cq_event(cb->channel, &ev_cq, &ev_ctx);
- if (ret) {
+ /*
+ * If the retry of write() syscall returned EBADF, as the
+ * file was closed on process exit. Ignore this error.
+ */
+ if (ret && errno != EBADF) {
fprintf(stderr, "Failed to get cq event!\n");
+ pthread_exit(NULL);
+ } else if (ret && errno == EBADF) {
pthread_exit(NULL);
}
if (ev_cq != cb->cq) {
diff -r b5e87c2d333b examples/udaddy.c
--- a/examples/udaddy.c Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/udaddy.c Thu Nov 19 11:18:58 2015 -0800
@@ -40,7 +40,9 @@
#include <netinet/in.h>
#include <sys/socket.h>
#include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <byteswap.h>
+#endif
#include <getopt.h>
#include <rdma/rdma_cma.h>
diff -r b5e87c2d333b include/infiniband/ib.h
--- a/include/infiniband/ib.h Wed Nov 18 11:01:02 2015 -0800
+++ b/include/infiniband/ib.h Thu Nov 19 11:18:58 2015 -0800
@@ -33,7 +33,11 @@
#if !defined(_RDMA_IB_H)
#define _RDMA_IB_H
+#if !(defined(__SVR4) && defined(__sun))
#include <linux/types.h>
+#else
+#include <infiniband/ofa_solaris.h>
+#endif
#include <string.h>
#ifndef AF_IB
diff -r b5e87c2d333b include/rdma/rdma_cma_abi.h
--- a/include/rdma/rdma_cma_abi.h Wed Nov 18 11:01:02 2015 -0800
+++ b/include/rdma/rdma_cma_abi.h Thu Nov 19 11:18:58 2015 -0800
@@ -113,6 +113,7 @@
__u64 response;
struct sockaddr_in6 addr;
__u32 id;
+ uint32_t reserved;
};
struct ucma_abi_bind {
@@ -291,6 +292,7 @@
__u64 uid;
struct sockaddr_in6 addr;
__u32 id;
+ uint32_t reserved;
};
struct ucma_abi_join_mcast {
diff -r b5e87c2d333b man/cmtime.1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/man/cmtime.1 Thu Nov 19 11:18:58 2015 -0800
@@ -0,0 +1,45 @@
+.TH "CMTIME" 1 "2015-11-19" "librdmacm" "librdmacm" librdmacm
+.SH NAME
+cmtime \- sample program that times rdma cm calls
+.SH SYNOPSIS
+.sp
+.nf
+\fIcmtime\fR [-s server_address] [-b bind_address] [-c connections]
+ [-p port_number] [-t timeout_ms]
+\fIcmtime\fR -s server_address [-b bind_address] [-c connections]
+ [-p port_number] [-t timeout_ms]
+.fi
+.SH "DESCRIPTION"
+Measures how long it takes for each step in the RDMA connection
+process to complete. It can be used to analyze the performance of
+the various CM steps.
+.SH "OPTIONS"
+.TP
+\-s server_address
+The IP address of the server system listening for connections.
+The used address must route over an RDMA device.This option must be
+specified by the client.
+.TP
+\-b bind_address
+The local network address to bind to.
+.TP
+\-c connections
+The number of connections to establish between the client and server.
+(default 100)
+.TP
+\-p port_number
+Specifies the port number that the server listens on. By default the server
+listens on port 7471.
+.TP
+\-t timeout_ms
+Specifies the timeout value in milliseconds for which it would try to
+resolve the given IP address. By default, the value is 2000ms
+.SH "NOTES"
+Basic usage is to start cmtime on a server system, then run
+cmtime -s server_name on a client system.
+.P
+Because this test maps RDMA resources to userspace, users must ensure
+that they have available system resources and permissions. See the
+libibverbs README file for additional details.
+.SH "SEE ALSO"
+rdma_cm(7), udaddy(1), rping(1), ucmatose(1), mckey(1)
diff -r b5e87c2d333b man/rdma_cm.7
--- a/man/rdma_cm.7 Wed Nov 18 11:01:02 2015 -0800
+++ b/man/rdma_cm.7 Thu Nov 19 11:18:58 2015 -0800
@@ -19,7 +19,7 @@
API defined by the libibverbs library. The libibverbs library provides the
underlying interfaces needed to send and receive data.
.P
-The RDMA CM can operate asynchronously or synchronously. The mode of
+The RDMA CM can operate asynchronously. The mode of
operation is controlled by the user through the use of the rdma_cm event channel
parameter in specific calls. If an event channel is provided, an rdma_cm identifier
will report its event data (results of connecting, for example), on that channel.
@@ -31,44 +31,10 @@
of the more commonly used verbs funcationality. The full set of abstracted
verb calls are:
.P
-rdma_reg_msgs - register an array of buffers for sending and receiving
-rdma_reg_read - registers a buffer for RDMA read operations
-rdma_reg_write - registers a buffer for RDMA write operations
-rdma_dereg_mr - deregisters a memory region
-rdma_post_recv - post a buffer to receive a message
-rdma_post_send - post a buffer to send a message
-rdma_post_read - post an RDMA to read data into a buffer
-rdma_post_write - post an RDMA to send data from a buffer
-rdma_post_recvv - post a vector of buffers to receive a message
-rdma_post_sendv - post a vector of buffers to send a message
-rdma_post_readv - post a vector of buffers to receive an RDMA read
-rdma_post_writev - post a vector of buffers to send an RDMA write
-rdma_post_ud_send - post a buffer to send a message on a UD QP
-rdma_get_send_comp - get completion status for a send or RDMA operation
-rdma_get_recv_comp - get information about a completed receive
.SH "CLIENT OPERATION"
This section provides a general overview of the basic operation for the active,
or client, side of communication. This flow assume asynchronous operation with
-low level call details shown. For
-synchronous operation, calls to rdma_create_event_channel, rdma_get_cm_event,
-rdma_ack_cm_event, and rdma_destroy_event_channel
-would be eliminated. Abstracted calls, such as rdma_create_ep encapsulate
-serveral of these calls under a single API.
-Users may also refer to the example applications for
+low level call details shown. Users may also refer to the example applications for
code samples. A general connection flow would be:
.IP rdma_getaddrinfo
retrieve address information of the destination
@@ -178,12 +144,9 @@
rdma_ack_cm_event(3),
rdma_bind_addr(3),
rdma_connect(3),
-rdma_create_ep(3),
rdma_create_event_channel(3),
rdma_create_id(3),
rdma_create_qp(3),
-rdma_dereg_mr(3),
-rdma_destroy_ep(3),
rdma_destroy_event_channel(3),
rdma_destroy_id(3),
rdma_destroy_qp(3),
@@ -196,27 +159,11 @@
rdma_get_dst_port(3),
rdma_get_local_addr(3),
rdma_get_peer_addr(3),
-rdma_get_recv_comp(3),
-rdma_get_request(3),
-rdma_get_send_comp(3),
rdma_get_src_port(3),
rdma_join_multicast(3),
rdma_leave_multicast(3),
rdma_listen(3),
-rdma_migrate_id(3),
rdma_notify(3),
-rdma_post_read(3)
-rdma_post_readv(3),
-rdma_post_recv(3),
-rdma_post_recvv(3),
-rdma_post_send(3),
-rdma_post_sendv(3),
-rdma_post_ud_send(3),
-rdma_post_write(3),
-rdma_post_writev(3),
-rdma_reg_msgs(3),
-rdma_reg_read(3),
-rdma_reg_write(3),
rdma_reject(3),
rdma_resolve_addr(3),
rdma_resolve_route(3),
diff -r b5e87c2d333b man/rdma_set_option.3
--- a/man/rdma_set_option.3 Wed Nov 18 11:01:02 2015 -0800
+++ b/man/rdma_set_option.3 Thu Nov 19 11:18:58 2015 -0800
@@ -14,16 +14,26 @@
.IP "id" 12
RDMA identifier.
.IP "level" 12
-Protocol level of the option to set.
+Protocol level of the option to set. Currently level RDMA_OPTION_ID is supported.
.IP "optname" 12
-Name of the option, relative to the level, to set.
+Name of the option, relative to the level, to set. The only supported option isRDMA_OPTION_ID_REUSEADDR for level RDMA_OPTION_ID.
.IP "optval" 12
-Reference to the option data. The data is dependent on the level and optname.
+Reference to the option data. The data is dependent on the level and optname. For the option RDMA_OPTION_ID_REUSEADDR, an integer is passed.
.IP "optlen" 12
The size of the %optval buffer.
.SH "DESCRIPTION"
Sets communication options for an rdma_cm_id. This call is used to override
the default system settings.
+.sp
+The RDMA_OPTION_ID_REUSEADDR option can be used to enable or
+disable REUSEADDR option for a CMID. A value of 0 disables
+the option and a non-zero value enables the option. This
+option can be set before calling rdma_bind_addr(3) or the
+rdma_resolve_addr(3) API. Listening for connection requests,
+using rdma_listen(3), is not supported for CMIDs set with
+this option. This option enables multiple connections to share
+the same source IP Port on the active side of the connection.
+.sp
.SH "RETURN VALUE"
Returns 0 on success, or -1 on error. If an error occurs, errno will be
set to indicate the failure reason.
diff -r b5e87c2d333b src/addrinfo.c
--- a/src/addrinfo.c Wed Nov 18 11:01:02 2015 -0800
+++ b/src/addrinfo.c Thu Nov 19 11:18:58 2015 -0800
@@ -132,9 +132,16 @@
(*dst)->sib_family = AF_IB;
(*dst)->sib_pkey = 0xFFFF;
(*dst)->sib_flowinfo = src->sin6_flowinfo;
+#if !(defined(__SVR4) && defined(__sun))
ib_addr_set(&(*dst)->sib_addr, src->sin6_addr.s6_addr32[0],
src->sin6_addr.s6_addr32[1], src->sin6_addr.s6_addr32[2],
src->sin6_addr.s6_addr32[3]);
+#else
+ ib_addr_set(&(*dst)->sib_addr, src->sin6_addr._S6_un._S6_u32[0],
+ src->sin6_addr._S6_un._S6_u32[1], src->sin6_addr._S6_un._S6_u32[2],
+ src->sin6_addr._S6_un._S6_u32[3]);
+#endif
+
ucma_set_sid(ps, (struct sockaddr *) src, *dst);
(*dst)->sib_scope_id = src->sin6_scope_id;
diff -r b5e87c2d333b src/cma.c
--- a/src/cma.c Wed Nov 18 11:01:02 2015 -0800
+++ b/src/cma.c Thu Nov 19 11:18:58 2015 -0800
@@ -44,8 +44,13 @@
#include <poll.h>
#include <unistd.h>
#include <pthread.h>
+#if defined(__SVR4) && defined(__sun)
+#include <sys/stat.h>
+#include <sys/mkdev.h>
+#else
#include <endian.h>
#include <byteswap.h>
+#endif
#include <stddef.h>
#include <netdb.h>
#include <syslog.h>
@@ -53,6 +58,7 @@
#include "cma.h"
#include "indexer.h"
+#include <infiniband/arch.h>
#include <infiniband/driver.h>
#include <infiniband/marshall.h>
#include <rdma/rdma_cma.h>
@@ -154,7 +160,6 @@
fastlock_destroy(&idm_lock);
free(cma_dev_array);
- cma_dev_cnt = 0;
}
}
@@ -173,12 +178,17 @@
* backports, assume the most recent version of the ABI. If
* we're wrong, we'll simply fail later when calling the ABI.
*/
+ fprintf(stderr, "librdmacm: couldn't read ABI version.\n");
+ fprintf(stderr, "librdmacm: assuming: %d\n", abi_ver);
return 0;
}
abi_ver = strtol(value, NULL, 10);
if (abi_ver < RDMA_USER_CM_MIN_ABI_VERSION ||
abi_ver > RDMA_USER_CM_MAX_ABI_VERSION) {
+ fprintf(stderr, "librdmacm: kernel ABI version %d "
+ "doesn't match library version %d.\n",
+ abi_ver, RDMA_USER_CM_MAX_ABI_VERSION);
return -1;
}
return 0;
@@ -232,11 +242,13 @@
dev_list = ibv_get_device_list(&dev_cnt);
if (!dev_list) {
+ printf("CMA: unable to get RDMA device list\n");
ret = ERR(ENODEV);
goto err1;
}
if (!dev_cnt) {
+ printf("CMA: no RDMA devices found\n");
ret = ERR(ENODEV);
goto err2;
}
@@ -272,6 +284,7 @@
dev_list = ibv_get_device_list(NULL);
if (!dev_list) {
+ fprintf(stderr, PFX "Fatal: unable to get RDMA device list\n");
return NULL;
}
@@ -282,6 +295,9 @@
}
}
+ if (!verbs)
+ fprintf(stderr, PFX "Fatal: unable to open RDMA device\n");
+
ibv_free_device_list(dev_list);
return verbs;
}
@@ -301,6 +317,7 @@
ret = ibv_query_device(cma_dev->verbs, &attr);
if (ret) {
+ fprintf(stderr, PFX "Fatal: unable to query RDMA device\n");
ret = ERR(ret);
goto err;
}
@@ -396,8 +413,18 @@
if (!channel)
return NULL;
+#if defined(__SVR4) && defined(__sun)
+ channel->fd = open("/dev/infiniband/ofs/rdma_cm", O_RDWR | O_CLOEXEC);
+#else
channel->fd = open("/dev/infiniband/rdma_cm", O_RDWR | O_CLOEXEC);
+#endif
+
if (channel->fd < 0) {
+#if defined(__SVR4) && defined(__sun)
+ printf("CMA: unable to open /dev/infiniband/ofs/rdma_cm\n");
+#else
+ printf("CMA: unable to open /dev/infiniband/rdma_cm\n");
+#endif
goto err;
}
return channel;
@@ -1388,6 +1415,14 @@
if (ret)
return ret;
+#if defined(__SVR4) && defined(__sun)
+ /*
+ * The 31st bit of sq_sig_all is set for QPs allocated
+ * using librdmacm. Consumers use sq_sig_all 0 /1.
+ */
+ attr->sq_sig_all |= LIB_RDMACM_QP_BIT;
+#endif
+
if (!attr->send_cq)
attr->send_cq = id->send_cq;
if (!attr->recv_cq)
@@ -2018,6 +2053,9 @@
CMA_INIT_CMD(&cmd, sizeof cmd, ACCEPT);
cmd.id = id_priv->handle;
+#if defined(__SVR4) && defined(__sun)
+ cmd.conn_param.qp_num = ((id_priv->id).qp)->qp_num;
+#endif
ret = write(id_priv->id.channel->fd, &cmd, sizeof cmd);
if (ret != sizeof cmd) {
@@ -2300,7 +2338,23 @@
CMA_INIT_CMD_RESP(&cmd, sizeof cmd, MIGRATE_ID, &resp, sizeof resp);
cmd.id = id_priv->handle;
+
+#if !(defined(__SVR4) && defined(__sun))
cmd.fd = id->channel->fd;
+#else
+ {
+ /* Solaris has no way of mapping fd back to the cloned device
+ * created during open() system call. It can only map the minor
+ * number to cloned device.
+ */
+ struct stat fstat_buf;
+ int rc;
+
+ if ((rc = fstat(id->channel->fd, &fstat_buf)) != 0)
+ return (ERR(ENODATA));
+ cmd.fd = minor(fstat_buf.st_rdev);
+ }
+#endif
ret = write(channel->fd, &cmd, sizeof cmd);
if (ret != sizeof cmd) {
diff -r b5e87c2d333b src/cma.h
--- a/src/cma.h Wed Nov 18 11:01:02 2015 -0800
+++ b/src/cma.h Thu Nov 19 11:18:58 2015 -0800
@@ -40,8 +40,11 @@
#include <stdlib.h>
#include <errno.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <endian.h>
#include <byteswap.h>
+#endif
+
#include <semaphore.h>
#include <rdma/rdma_cma.h>
@@ -60,6 +63,7 @@
#define PFX "librdmacm: "
+#if !(defined(__SVR4) && defined(__sun))
#if __BYTE_ORDER == __LITTLE_ENDIAN
static inline uint64_t htonll(uint64_t x) { return bswap_64(x); }
static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); }
@@ -67,6 +71,7 @@
static inline uint64_t htonll(uint64_t x) { return x; }
static inline uint64_t ntohll(uint64_t x) { return x; }
#endif
+#endif
#define max(a, b) ((a) > (b) ? a : b)
#define min(a, b) ((a) < (b) ? a : b)