components/open-fabrics/librdmacm/patches/002-librdmacm-solaris_port.patch
author Drew Fisher <drew.fisher@oracle.com>
Thu, 13 Oct 2016 12:49:00 -0700
changeset 7108 e30b3b76a5ab
parent 5708 49b43e37ce26
permissions -rw-r--r--
23329921 cloudbase-init creates C:\\cfn on Solaris

#This patch was developed both in-house and from outside. We plan to submit it
#upstream, but do not yet have a target date for doing so
#
# HG changeset patch
# Parent  b5e87c2d333b6adb2743deb20bd5d68a754ea03f
Add solaris specific changes for librdmacm

Added the changes required to compile and work with the Solaris
system.

diff -r b5e87c2d333b Makefile.am
--- a/Makefile.am	Wed Nov 18 11:01:02 2015 -0800
+++ b/Makefile.am	Thu Nov 19 11:18:58 2015 -0800
@@ -143,6 +143,7 @@
 	man/rdma_client.1 \
 	man/rdma_xserver.1 \
 	man/rdma_xclient.1 \
+	man/cmtime.1 \
 	man/rdma_cm.7 
 
 if ENABLE_RSOCKET
diff -r b5e87c2d333b Makefile.in
--- a/Makefile.in	Wed Nov 18 11:01:02 2015 -0800
+++ b/Makefile.in	Thu Nov 19 11:18:58 2015 -0800
@@ -484,6 +484,7 @@
 	man/rdma_client.1 \
 	man/rdma_xserver.1 \
 	man/rdma_xclient.1 \
+	man/cmtime.1 \
 	man/rdma_cm.7 \
 	$(am__append_4)
 
diff -r b5e87c2d333b examples/cmatose.c
--- a/examples/cmatose.c	Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/cmatose.c	Thu Nov 19 11:18:58 2015 -0800
@@ -40,7 +40,9 @@
 #include <netinet/in.h>
 #include <sys/socket.h>
 #include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <getopt.h>
 
 #include <rdma/rdma_cma.h>
diff -r b5e87c2d333b examples/common.c
--- a/examples/common.c	Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/common.c	Thu Nov 19 11:18:58 2015 -0800
@@ -40,7 +40,9 @@
 #include <netinet/in.h>
 #include <sys/socket.h>
 #include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 
 #include <rdma/rdma_cma.h>
 #include "common.h"
diff -r b5e87c2d333b examples/common.h
--- a/examples/common.h	Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/common.h	Thu Nov 19 11:18:58 2015 -0800
@@ -34,7 +34,9 @@
 
 #include <stdlib.h>
 #include <sys/types.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <poll.h>
 
 #include <rdma/rdma_cma.h>
diff -r b5e87c2d333b examples/mckey.c
--- a/examples/mckey.c	Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/mckey.c	Thu Nov 19 11:18:58 2015 -0800
@@ -41,7 +41,9 @@
 #include <arpa/inet.h>
 #include <sys/socket.h>
 #include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <unistd.h>
 #include <getopt.h>
 
@@ -329,6 +331,16 @@
 
 	while (1) {
 		ret = rdma_get_cm_event(test.channel, &event);
+
+/* 
+ * Solaris returns EBADF if we close the channel while we're waiting
+ * for any events to occur. It is safe to ignore EBADF here.
+ */
+#if defined(__SVR4) && defined(__sun)
+		if (ret && (errno == EBADF))
+			break;
+#endif
+
 		if (ret) {
 			perror("rdma_get_cm_event");
 			break;
@@ -461,6 +473,8 @@
 	return ret;
 }
 
+/* Solaris does not yet support family AF_IB */
+#if !(defined(__SVR4) && defined(__sun))
 static int get_dst_addr(char *dst, struct sockaddr *addr)
 {
 	struct sockaddr_ib *sib;
@@ -474,6 +488,7 @@
 	inet_pton(AF_INET6, dst, &sib->sib_addr);
 	return 0;
 }
+#endif
 
 static int run(void)
 {
@@ -486,7 +501,12 @@
 			return ret;
 	}
 
+/* Solaris does not yet support family AF_IB */
+#if defined(__SVR4) && defined(__sun)
+	ret = get_addr(dst_addr, (struct sockaddr *) &test.dst_in);
+#else
 	ret = get_dst_addr(dst_addr, (struct sockaddr *) &test.dst_in);
+#endif
 	if (ret)
 		return ret;
 
diff -r b5e87c2d333b examples/rping.c
--- a/examples/rping.c	Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/rping.c	Thu Nov 19 11:18:58 2015 -0800
@@ -40,11 +40,17 @@
 #include <netinet/in.h>
 #include <sys/socket.h>
 #include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <semaphore.h>
 #include <arpa/inet.h>
 #include <pthread.h>
 #include <inttypes.h>
+#if defined(__SVR4) && defined(__sun)
+#include <unistd.h>
+#include <libgen.h>
+#endif
 
 #include <rdma/rdma_cma.h>
 #include <infiniband/arch.h>
@@ -572,9 +578,15 @@
 
 	while (1) {
 		ret = rdma_get_cm_event(cb->cm_channel, &event);
-		if (ret) {
+		/*
+		 * If the retry of read() syscall returned EBADF, as the
+		 * file was closed on process exit. Ignore this error.
+		 */
+		if (ret && errno != EBADF) {
 			perror("rdma_get_cm_event");
 			exit(ret);
+		} else if (ret && errno == EBADF) {
+			exit(0);
 		}
 		ret = rping_cma_event_handler(event->id, event);
 		rdma_ack_cm_event(event);
@@ -596,8 +608,14 @@
 		pthread_testcancel();
 
 		ret = ibv_get_cq_event(cb->channel, &ev_cq, &ev_ctx);
-		if (ret) {
+		/*
+		 * If the retry of write() syscall returned EBADF, as the
+		 * file was closed on process exit. Ignore this error.
+		 */
+		if (ret && errno != EBADF) {
 			fprintf(stderr, "Failed to get cq event!\n");
+			 pthread_exit(NULL);
+		} else if (ret && errno == EBADF) {
 			pthread_exit(NULL);
 		}
 		if (ev_cq != cb->cq) {
diff -r b5e87c2d333b examples/udaddy.c
--- a/examples/udaddy.c	Wed Nov 18 11:01:02 2015 -0800
+++ b/examples/udaddy.c	Thu Nov 19 11:18:58 2015 -0800
@@ -40,7 +40,9 @@
 #include <netinet/in.h>
 #include <sys/socket.h>
 #include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <getopt.h>
 
 #include <rdma/rdma_cma.h>
diff -r b5e87c2d333b include/infiniband/ib.h
--- a/include/infiniband/ib.h	Wed Nov 18 11:01:02 2015 -0800
+++ b/include/infiniband/ib.h	Thu Nov 19 11:18:58 2015 -0800
@@ -33,7 +33,11 @@
 #if !defined(_RDMA_IB_H)
 #define _RDMA_IB_H
 
+#if !(defined(__SVR4) && defined(__sun))
 #include <linux/types.h>
+#else
+#include <infiniband/ofa_solaris.h>
+#endif
 #include <string.h>
 
 #ifndef AF_IB
diff -r b5e87c2d333b include/rdma/rdma_cma_abi.h
--- a/include/rdma/rdma_cma_abi.h	Wed Nov 18 11:01:02 2015 -0800
+++ b/include/rdma/rdma_cma_abi.h	Thu Nov 19 11:18:58 2015 -0800
@@ -113,6 +113,7 @@
 	__u64 response;
 	struct sockaddr_in6 addr;
 	__u32 id;
+	uint32_t	reserved;
 };
 
 struct ucma_abi_bind {
@@ -291,6 +292,7 @@
 	__u64 uid;
 	struct sockaddr_in6 addr;
 	__u32 id;
+	uint32_t	reserved;
 };
 
 struct ucma_abi_join_mcast {
diff -r b5e87c2d333b man/cmtime.1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/man/cmtime.1	Thu Nov 19 11:18:58 2015 -0800
@@ -0,0 +1,45 @@
+.TH "CMTIME" 1 "2015-11-19" "librdmacm" "librdmacm" librdmacm
+.SH NAME
+cmtime \- sample program that times rdma cm calls
+.SH SYNOPSIS
+.sp
+.nf
+\fIcmtime\fR [-s server_address] [-b bind_address] [-c connections]
+		[-p port_number] [-t timeout_ms]
+\fIcmtime\fR -s server_address [-b bind_address] [-c connections]
+		[-p port_number] [-t timeout_ms]
+.fi
+.SH "DESCRIPTION"
+Measures how long it takes for each step in the RDMA connection
+process to complete. It can be used to analyze the performance of
+the various CM steps.
+.SH "OPTIONS"
+.TP
+\-s server_address
+The IP address of the server system listening for connections.
+The used address must route over an RDMA device.This option must be 
+specified by the client.
+.TP
+\-b bind_address
+The local network address to bind to.
+.TP
+\-c connections
+The number of connections to establish between the client and server.
+(default 100)
+.TP
+\-p port_number
+Specifies the port number that the server listens on.  By default the server
+listens on port 7471.
+.TP
+\-t timeout_ms
+Specifies the timeout value in milliseconds for which it would try to 
+resolve the given IP address. By default, the value is 2000ms
+.SH "NOTES"
+Basic usage is to start cmtime on a server system, then run
+cmtime -s server_name on a client system.
+.P
+Because this test maps RDMA resources to userspace, users must ensure
+that they have available system resources and permissions.  See the
+libibverbs README file for additional details.
+.SH "SEE ALSO"
+rdma_cm(7), udaddy(1), rping(1), ucmatose(1), mckey(1)
diff -r b5e87c2d333b man/rdma_cm.7
--- a/man/rdma_cm.7	Wed Nov 18 11:01:02 2015 -0800
+++ b/man/rdma_cm.7	Thu Nov 19 11:18:58 2015 -0800
@@ -19,7 +19,7 @@
 API defined by the libibverbs library.  The libibverbs library provides the
 underlying interfaces needed to send and receive data.
 .P
-The RDMA CM can operate asynchronously or synchronously.  The mode of
+The RDMA CM can operate asynchronously.  The mode of
 operation is controlled by the user through the use of the rdma_cm event channel
 parameter in specific calls.  If an event channel is provided, an rdma_cm identifier
 will report its event data (results of connecting, for example), on that channel.
@@ -31,44 +31,10 @@
 of the more commonly used verbs funcationality.  The full set of abstracted
 verb calls are:
 .P
-rdma_reg_msgs  - register an array of buffers for sending and receiving
-.P
-rdma_reg_read  - registers a buffer for RDMA read operations
-.P
-rdma_reg_write - registers a buffer for RDMA write operations
-.P
-rdma_dereg_mr  - deregisters a memory region
-.P
-rdma_post_recv  - post a buffer to receive a message
-.P
-rdma_post_send  - post a buffer to send a message
-.P
-rdma_post_read  - post an RDMA to read data into a buffer
-.P
-rdma_post_write - post an RDMA to send data from a buffer
-.P
-rdma_post_recvv  - post a vector of buffers to receive a message
-.P
-rdma_post_sendv  - post a vector of buffers to send a message
-.P
-rdma_post_readv  - post a vector of buffers to receive an RDMA read
-.P
-rdma_post_writev - post a vector of buffers to send an RDMA write
-.P
-rdma_post_ud_send - post a buffer to send a message on a UD QP
-.P
-rdma_get_send_comp - get completion status for a send or RDMA operation
-.P
-rdma_get_recv_comp - get information about a completed receive
 .SH "CLIENT OPERATION"
 This section provides a general overview of the basic operation for the active,
 or client, side of communication.  This flow assume asynchronous operation with
-low level call details shown.  For
-synchronous operation, calls to rdma_create_event_channel, rdma_get_cm_event,
-rdma_ack_cm_event, and rdma_destroy_event_channel
-would be eliminated.  Abstracted calls, such as rdma_create_ep encapsulate
-serveral of these calls under a single API.
-Users may also refer to the example applications for
+low level call details shown.  Users may also refer to the example applications for
 code samples.  A general connection flow would be:
 .IP rdma_getaddrinfo
 retrieve address information of the destination
@@ -178,12 +144,9 @@
 rdma_ack_cm_event(3),
 rdma_bind_addr(3),
 rdma_connect(3),
-rdma_create_ep(3),
 rdma_create_event_channel(3),
 rdma_create_id(3),
 rdma_create_qp(3),
-rdma_dereg_mr(3),
-rdma_destroy_ep(3),
 rdma_destroy_event_channel(3),
 rdma_destroy_id(3),
 rdma_destroy_qp(3),
@@ -196,27 +159,11 @@
 rdma_get_dst_port(3),
 rdma_get_local_addr(3),
 rdma_get_peer_addr(3),
-rdma_get_recv_comp(3),
-rdma_get_request(3),
-rdma_get_send_comp(3),
 rdma_get_src_port(3),
 rdma_join_multicast(3),
 rdma_leave_multicast(3),
 rdma_listen(3),
-rdma_migrate_id(3),
 rdma_notify(3),
-rdma_post_read(3)
-rdma_post_readv(3),
-rdma_post_recv(3),
-rdma_post_recvv(3),
-rdma_post_send(3),
-rdma_post_sendv(3),
-rdma_post_ud_send(3),
-rdma_post_write(3),
-rdma_post_writev(3),
-rdma_reg_msgs(3),
-rdma_reg_read(3),
-rdma_reg_write(3),
 rdma_reject(3),
 rdma_resolve_addr(3),
 rdma_resolve_route(3),
diff -r b5e87c2d333b man/rdma_set_option.3
--- a/man/rdma_set_option.3	Wed Nov 18 11:01:02 2015 -0800
+++ b/man/rdma_set_option.3	Thu Nov 19 11:18:58 2015 -0800
@@ -14,16 +14,26 @@
 .IP "id" 12
 RDMA identifier.
 .IP "level" 12
-Protocol level of the option to set.
+Protocol level of the option to set.  Currently level RDMA_OPTION_ID is supported.
 .IP "optname" 12
-Name of the option, relative to the level, to set.
+Name of the option, relative to the level, to set.  The only supported option isRDMA_OPTION_ID_REUSEADDR for level RDMA_OPTION_ID.
 .IP "optval" 12
-Reference to the option data.  The data is dependent on the level and optname.
+Reference to the option data.  The data is dependent on the level and optname.  For the option RDMA_OPTION_ID_REUSEADDR, an integer is passed.
 .IP "optlen" 12
 The size of the %optval buffer.
 .SH "DESCRIPTION"
 Sets communication options for an rdma_cm_id.  This call is used to override
 the default system settings.
+.sp
+The RDMA_OPTION_ID_REUSEADDR option can be used to enable or
+disable REUSEADDR option for a CMID. A value of 0 disables
+the option and a non-zero value enables the option. This
+option can be set before calling rdma_bind_addr(3) or the
+rdma_resolve_addr(3) API. Listening for connection requests,
+using rdma_listen(3), is not supported for CMIDs set with
+this option. This option enables multiple connections to share
+the same source IP Port on the active side of the connection.
+.sp
 .SH "RETURN VALUE"
 Returns 0 on success, or -1 on error.  If an error occurs, errno will be
 set to indicate the failure reason.
diff -r b5e87c2d333b src/addrinfo.c
--- a/src/addrinfo.c	Wed Nov 18 11:01:02 2015 -0800
+++ b/src/addrinfo.c	Thu Nov 19 11:18:58 2015 -0800
@@ -132,9 +132,16 @@
 	(*dst)->sib_family = AF_IB;
 	(*dst)->sib_pkey = 0xFFFF;
 	(*dst)->sib_flowinfo = src->sin6_flowinfo;
+#if !(defined(__SVR4) && defined(__sun))
 	ib_addr_set(&(*dst)->sib_addr, src->sin6_addr.s6_addr32[0],
 		    src->sin6_addr.s6_addr32[1], src->sin6_addr.s6_addr32[2],
 		    src->sin6_addr.s6_addr32[3]);
+#else
+	ib_addr_set(&(*dst)->sib_addr, src->sin6_addr._S6_un._S6_u32[0],
+		    src->sin6_addr._S6_un._S6_u32[1], src->sin6_addr._S6_un._S6_u32[2],
+		    src->sin6_addr._S6_un._S6_u32[3]);
+#endif
+
 	ucma_set_sid(ps, (struct sockaddr *) src, *dst);
 	(*dst)->sib_scope_id = src->sin6_scope_id;
 
diff -r b5e87c2d333b src/cma.c
--- a/src/cma.c	Wed Nov 18 11:01:02 2015 -0800
+++ b/src/cma.c	Thu Nov 19 11:18:58 2015 -0800
@@ -44,8 +44,13 @@
 #include <poll.h>
 #include <unistd.h>
 #include <pthread.h>
+#if defined(__SVR4) && defined(__sun)
+#include <sys/stat.h>
+#include <sys/mkdev.h>
+#else
 #include <endian.h>
 #include <byteswap.h>
+#endif
 #include <stddef.h>
 #include <netdb.h>
 #include <syslog.h>
@@ -53,6 +58,7 @@
 
 #include "cma.h"
 #include "indexer.h"
+#include <infiniband/arch.h>
 #include <infiniband/driver.h>
 #include <infiniband/marshall.h>
 #include <rdma/rdma_cma.h>
@@ -154,7 +160,6 @@
 
 		fastlock_destroy(&idm_lock);
 		free(cma_dev_array);
-		cma_dev_cnt = 0;
 	}
 }
 
@@ -173,12 +178,17 @@
 		 * backports, assume the most recent version of the ABI.  If
 		 * we're wrong, we'll simply fail later when calling the ABI.
 		 */
+		fprintf(stderr, "librdmacm: couldn't read ABI version.\n");
+		fprintf(stderr, "librdmacm: assuming: %d\n", abi_ver);
 		return 0;
 	}
 
 	abi_ver = strtol(value, NULL, 10);
 	if (abi_ver < RDMA_USER_CM_MIN_ABI_VERSION ||
 	    abi_ver > RDMA_USER_CM_MAX_ABI_VERSION) {
+		fprintf(stderr, "librdmacm: kernel ABI version %d "
+				"doesn't match library version %d.\n",
+				abi_ver, RDMA_USER_CM_MAX_ABI_VERSION);
 		return -1;
 	}
 	return 0;
@@ -232,11 +242,13 @@
 
 	dev_list = ibv_get_device_list(&dev_cnt);
 	if (!dev_list) {
+		printf("CMA: unable to get RDMA device list\n");
 		ret = ERR(ENODEV);
 		goto err1;
 	}
 
 	if (!dev_cnt) {
+		printf("CMA: no RDMA devices found\n");
 		ret = ERR(ENODEV);
 		goto err2;
 	}
@@ -272,6 +284,7 @@
 
 	dev_list = ibv_get_device_list(NULL);
 	if (!dev_list) {
+		fprintf(stderr, PFX "Fatal: unable to get RDMA device list\n");
 		return NULL;
 	}
 
@@ -282,6 +295,9 @@
 		}
 	}
 
+	if (!verbs)
+		fprintf(stderr, PFX "Fatal: unable to open RDMA device\n");
+
 	ibv_free_device_list(dev_list);
 	return verbs;
 }
@@ -301,6 +317,7 @@
 
 	ret = ibv_query_device(cma_dev->verbs, &attr);
 	if (ret) {
+		fprintf(stderr, PFX "Fatal: unable to query RDMA device\n");
 		ret = ERR(ret);
 		goto err;
 	}
@@ -396,8 +413,18 @@
 	if (!channel)
 		return NULL;
 
+#if defined(__SVR4) && defined(__sun)
+	channel->fd = open("/dev/infiniband/ofs/rdma_cm", O_RDWR | O_CLOEXEC);
+#else
 	channel->fd = open("/dev/infiniband/rdma_cm", O_RDWR | O_CLOEXEC);
+#endif
+
 	if (channel->fd < 0) {
+#if defined(__SVR4) && defined(__sun)
+		printf("CMA: unable to open /dev/infiniband/ofs/rdma_cm\n");
+#else
+		printf("CMA: unable to open /dev/infiniband/rdma_cm\n");
+#endif
 		goto err;
 	}
 	return channel;
@@ -1388,6 +1415,14 @@
 	if (ret)
 		return ret;
 
+#if defined(__SVR4) && defined(__sun)
+	/*
+	 * The 31st bit of sq_sig_all is set for QPs allocated
+	 * using librdmacm. Consumers use sq_sig_all 0 /1.
+	 */
+	attr->sq_sig_all |= LIB_RDMACM_QP_BIT;
+#endif
+
 	if (!attr->send_cq)
 		attr->send_cq = id->send_cq;
 	if (!attr->recv_cq)
@@ -2018,6 +2053,9 @@
 
 	CMA_INIT_CMD(&cmd, sizeof cmd, ACCEPT);
 	cmd.id = id_priv->handle;
+#if defined(__SVR4) && defined(__sun)
+	cmd.conn_param.qp_num = ((id_priv->id).qp)->qp_num;
+#endif
 
 	ret = write(id_priv->id.channel->fd, &cmd, sizeof cmd);
 	if (ret != sizeof cmd) {
@@ -2300,7 +2338,23 @@
 
 	CMA_INIT_CMD_RESP(&cmd, sizeof cmd, MIGRATE_ID, &resp, sizeof resp);
 	cmd.id = id_priv->handle;
+
+#if !(defined(__SVR4) && defined(__sun))
 	cmd.fd = id->channel->fd;
+#else
+	{
+		/* Solaris has no way of mapping fd back to the cloned device 
+		 * created during open() system call. It can only map the minor 
+		 * number to cloned device.
+		 */
+		struct stat	fstat_buf;
+		int		rc;
+
+		if ((rc = fstat(id->channel->fd, &fstat_buf)) != 0)
+			return (ERR(ENODATA));
+		cmd.fd = minor(fstat_buf.st_rdev);
+	}
+#endif
 
 	ret = write(channel->fd, &cmd, sizeof cmd);
 	if (ret != sizeof cmd) {
diff -r b5e87c2d333b src/cma.h
--- a/src/cma.h	Wed Nov 18 11:01:02 2015 -0800
+++ b/src/cma.h	Thu Nov 19 11:18:58 2015 -0800
@@ -40,8 +40,11 @@
 
 #include <stdlib.h>
 #include <errno.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <endian.h>
 #include <byteswap.h>
+#endif
+
 #include <semaphore.h>
 
 #include <rdma/rdma_cma.h>
@@ -60,6 +63,7 @@
 
 #define PFX "librdmacm: "
 
+#if !(defined(__SVR4) && defined(__sun))
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 static inline uint64_t htonll(uint64_t x) { return bswap_64(x); }
 static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); }
@@ -67,6 +71,7 @@
 static inline uint64_t htonll(uint64_t x) { return x; }
 static inline uint64_t ntohll(uint64_t x) { return x; }
 #endif
+#endif
 
 #define max(a, b) ((a) > (b) ? a : b)
 #define min(a, b) ((a) < (b) ? a : b)