components/open-fabrics/librdmacm/patches/base.patch
author Boris Chiu <boris.chiu@oracle.com>
Tue, 01 Jul 2014 14:49:32 -0700
branchs11-update
changeset 3195 cf6a5a756b74
parent 2532 5b3dc1c8b85e
child 3227 281a940a917d
permissions -rw-r--r--
15695368 SUNBT7017250 man page from rds-stress has incorect entry for rdma bytes transmi 17489343 Update solaris rds-tools to 2.0.7-1.12 18382333 libibverbs ibv_cmd_get_context() fails when mcxnex driver UAR space is exhausted 18422470 modify librdmacm on solaris to avoid opening device context per hca on startup 18728045 Include local copy of rds.h file to build NRM rds-tools

diff -r -u /tmp/librdmacm-1.0.14.1/configure librdmacm-1.0.14.1/configure
--- /tmp/librdmacm-1.0.14.1/configure	Tue Feb 15 17:12:14 2011
+++ librdmacm-1.0.14.1/configure	Wed Apr 30 10:52:05 2014
@@ -7625,6 +7625,7 @@
  	esac ;;
       esac
       link_all_deplibs=yes
+      hardcode_libdir_flag_spec=
       ;;
 
     sunos4*)
diff -r -u /tmp/librdmacm-1.0.14.1/Makefile.in librdmacm-1.0.14.1/Makefile.in
--- /tmp/librdmacm-1.0.14.1/Makefile.in	Tue Feb 15 17:12:13 2011
+++ librdmacm-1.0.14.1/Makefile.in	Wed Apr 30 10:52:05 2014
@@ -69,7 +69,7 @@
 	"$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man3dir)" \
 	"$(DESTDIR)$(man7dir)" "$(DESTDIR)$(infinibandincludedir)" \
 	"$(DESTDIR)$(librdmacmincludedir)"
-libLTLIBRARIES_INSTALL = $(INSTALL)
+libLTLIBRARIES_INSTALL = $(INSTALL) -m 755
 LTLIBRARIES = $(lib_LTLIBRARIES)
 src_librdmacm_la_LIBADD =
 am_src_librdmacm_la_OBJECTS = src_librdmacm_la-cma.lo \
@@ -76,7 +76,7 @@
 	src_librdmacm_la-addrinfo.lo src_librdmacm_la-acm.lo
 src_librdmacm_la_OBJECTS = $(am_src_librdmacm_la_OBJECTS)
 am__dirstamp = $(am__leading_dot)dirstamp
-binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) -m 755
 PROGRAMS = $(bin_PROGRAMS)
 am_examples_mckey_OBJECTS = mckey.$(OBJEXT)
 examples_mckey_OBJECTS = $(am_examples_mckey_OBJECTS)
diff -r -u /tmp/librdmacm-1.0.14.1/src/cma.h librdmacm-1.0.14.1/src/cma.h
--- /tmp/librdmacm-1.0.14.1/src/cma.h	Mon Oct  4 17:00:18 2010
+++ librdmacm-1.0.14.1/src/cma.h	Tue May  6 11:32:19 2014
@@ -40,8 +40,10 @@
 
 #include <stdlib.h>
 #include <errno.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <endian.h>
 #include <byteswap.h>
+#endif
 
 #include <rdma/rdma_cma.h>
 
@@ -58,14 +60,6 @@
 
 #define PFX "librdmacm: "
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-static inline uint64_t htonll(uint64_t x) { return bswap_64(x); }
-static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); }
-#else
-static inline uint64_t htonll(uint64_t x) { return x; }
-static inline uint64_t ntohll(uint64_t x) { return x; }
-#endif
-
 #define min(a, b) (a < b ? a : b)
 
 static inline int ERR(int err)
@@ -74,7 +68,7 @@
 	return -1;
 }
 
-int ucma_init();
+int ucma_init(void);
 extern int af_ib_support;
 
 #define RAI_ROUTEONLY		0x01000000
diff -r -u /tmp/librdmacm-1.0.14.1/src/cma.c librdmacm-1.0.14.1/src/cma.c
--- /tmp/librdmacm-1.0.14.1/src/cma.c	Fri Dec 10 12:05:34 2010
+++ librdmacm-1.0.14.1/src/cma.c	Tue May  6 11:30:54 2014
@@ -46,12 +46,18 @@
 #include <poll.h>
 #include <unistd.h>
 #include <pthread.h>
+#if defined(__SVR4) && defined(__sun)
+#include <sys/stat.h>
+#include <sys/mkdev.h>
+#else
 #include <endian.h>
 #include <byteswap.h>
+#endif
 #include <stddef.h>
 #include <netdb.h>
 
 #include "cma.h"
+#include <infiniband/arch.h>
 #include <infiniband/driver.h>
 #include <infiniband/marshall.h>
 #include <rdma/rdma_cma.h>
@@ -100,6 +106,8 @@
 	struct ibv_pd	   *pd;
 	uint64_t	    guid;
 	int		    port_cnt;
+	int		    refcnt;
+	int		    max_qpsize;
 	uint8_t		    max_initiator_depth;
 	uint8_t		    max_responder_resources;
 };
@@ -143,6 +151,7 @@
 
 static struct cma_device *cma_dev_array;
 static int cma_dev_cnt;
+static int cma_init_cnt;
 static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
 static int abi_ver = RDMA_USER_CM_MAX_ABI_VERSION;
 int af_ib_support;
@@ -156,12 +165,16 @@
 
 	if (cma_dev_cnt) {
 		while (cma_dev_cnt--) {
-			ibv_dealloc_pd(cma_dev_array[cma_dev_cnt].pd);
+			if (!cma_dev_array[cma_dev_cnt].verbs)
+				continue;
+
+			if (cma_dev_array[cma_dev_cnt].refcnt)
+				ibv_dealloc_pd(cma_dev_array[cma_dev_cnt].pd);
 			ibv_close_device(cma_dev_array[cma_dev_cnt].verbs);
+			cma_init_cnt--;
 		}
 
 		free(cma_dev_array);
-		cma_dev_cnt = 0;
 	}
 }
 
@@ -228,9 +241,7 @@
 int ucma_init(void)
 {
 	struct ibv_device **dev_list = NULL;
-	struct cma_device *cma_dev;
-	struct ibv_device_attr attr;
-	int i, ret, dev_cnt, ib;
+	int i, ret, dev_cnt;
 
 	/* Quick check without lock to see if we're already initialized */
 	if (cma_dev_cnt)
@@ -253,46 +264,20 @@
 		goto err1;
 	}
 
-	cma_dev_array = malloc(sizeof *cma_dev * dev_cnt);
+	if (!dev_cnt) {
+		printf("CMA: no RDMA devices found\n");
+		ret = ERR(ENODEV);
+		goto err2;
+	}
+	cma_dev_array = calloc(dev_cnt, sizeof *cma_dev_array);
 	if (!cma_dev_array) {
 		ret = ERR(ENOMEM);
 		goto err2;
 	}
 
-	for (i = 0, ib = 0; dev_list[i];) {
-		cma_dev = &cma_dev_array[i];
+	for (i = 0; dev_list[i]; i++)
+		cma_dev_array[i].guid = ibv_get_device_guid(dev_list[i]);
 
-		cma_dev->guid = ibv_get_device_guid(dev_list[i]);
-		cma_dev->verbs = ibv_open_device(dev_list[i]);
-		if (!cma_dev->verbs) {
-			printf("CMA: unable to open RDMA device\n");
-			ret = ERR(ENODEV);
-			goto err3;
-		}
-
-		cma_dev->pd = ibv_alloc_pd(cma_dev->verbs);
-		if (!cma_dev->pd) {
-			ibv_close_device(cma_dev->verbs);
-			ret = ERR(ENOMEM);
-			goto err3;
-		}
-
-		i++;
-		ret = ibv_query_device(cma_dev->verbs, &attr);
-		if (ret) {
-			printf("CMA: unable to query RDMA device\n");
-			ret = ERR(ret);
-			goto err3;
-		}
-
-		cma_dev->port_cnt = attr.phys_port_cnt;
-		cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;
-		cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;
-		ib += (cma_dev->verbs->device->transport_type == IBV_TRANSPORT_IB);
-	}
-
-	if (ib)
-		ucma_ib_init();
 	cma_dev_cnt = dev_cnt;
 	ucma_set_af_ib_support();
 	pthread_mutex_unlock(&mut);
@@ -299,12 +284,6 @@
 	ibv_free_device_list(dev_list);
 	return 0;
 
-err3:
-	while (i--) {
-		ibv_dealloc_pd(cma_dev_array[i].pd);
-		ibv_close_device(cma_dev_array[i].verbs);
-	}
-	free(cma_dev_array);
 err2:
 	ibv_free_device_list(dev_list);
 err1:
@@ -312,12 +291,93 @@
 	return ret;
 }
 
+static struct ibv_context *ucma_open_device(uint64_t guid)
+{
+	struct ibv_device **dev_list;
+	struct ibv_context *verbs = NULL;
+	int i;
+
+	dev_list = ibv_get_device_list(NULL);
+	if (!dev_list) {
+		fprintf(stderr, PFX "Fatal: unable to get RDMA device list\n");
+		return NULL;
+	}
+
+	for (i = 0; dev_list[i]; i++) {
+		if (ibv_get_device_guid(dev_list[i]) == guid) {
+			verbs = ibv_open_device(dev_list[i]);
+			break;
+		}
+	}
+
+	if (!verbs)
+		fprintf(stderr, PFX "Fatal: unable to open RDMA device\n");
+
+	ibv_free_device_list(dev_list);
+	return verbs;
+}
+
+static int ucma_init_device(struct cma_device *cma_dev)
+{
+	struct ibv_device_attr attr;
+	int ret;
+
+	if (cma_dev->verbs)
+		return 0;
+
+	cma_dev->verbs = ucma_open_device(cma_dev->guid);
+	if (!cma_dev->verbs)
+		return ERR(ENODEV);
+
+	ret = ibv_query_device(cma_dev->verbs, &attr);
+	if (ret) {
+		fprintf(stderr, PFX "Fatal: unable to query RDMA device\n");
+		ret = ERR(ret);
+		goto err;
+	}
+
+	cma_dev->port_cnt = attr.phys_port_cnt;
+	cma_dev->max_qpsize = attr.max_qp_wr;
+	cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;
+	cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;
+	cma_init_cnt++;
+	return 0;
+
+err:
+	ibv_close_device(cma_dev->verbs);
+	cma_dev->verbs = NULL;
+	return ret;
+}
+
+static int ucma_init_all(void)
+{
+	int i, ret = 0;
+
+	if (!cma_dev_cnt) {
+		ret = ucma_init();
+		if (ret)
+			return ret;
+	}
+
+	if (cma_init_cnt == cma_dev_cnt)
+		return 0;
+
+	pthread_mutex_lock(&mut);
+	for (i = 0; i < cma_dev_cnt; i++) {
+		ret = ucma_init_device(&cma_dev_array[i]);
+		if (ret)
+			break;
+	}
+	pthread_mutex_unlock(&mut);
+	return ret;
+}
+
 struct ibv_context **rdma_get_devices(int *num_devices)
 {
 	struct ibv_context **devs = NULL;
 	int i;
 
-	if (ucma_init())
+	if (ucma_init_all())
 		goto out;
 
 	devs = malloc(sizeof *devs * (cma_dev_cnt + 1));
@@ -354,9 +414,18 @@
 	if (!channel)
 		return NULL;
 
+#if defined(__SVR4) && defined(__sun)
+	channel->fd = open("/dev/infiniband/ofs/rdma_cm", O_RDWR);
+#else
 	channel->fd = open("/dev/infiniband/rdma_cm", O_RDWR);
+#endif
+
 	if (channel->fd < 0) {
+#if defined(__SVR4) && defined(__sun)
+		printf("CMA: unable to open /dev/infiniband/ofs/rdma_cm\n");
+#else
 		printf("CMA: unable to open /dev/infiniband/rdma_cm\n");
+#endif
 		goto err;
 	}
 	return channel;
@@ -374,18 +443,33 @@
 static int ucma_get_device(struct cma_id_private *id_priv, uint64_t guid)
 {
 	struct cma_device *cma_dev;
-	int i;
+	int i, ret;
 
 	for (i = 0; i < cma_dev_cnt; i++) {
 		cma_dev = &cma_dev_array[i];
-		if (cma_dev->guid == guid) {
-			id_priv->cma_dev = cma_dev;
-			id_priv->id.verbs = cma_dev->verbs;
-			return 0;
-		}
+		if (cma_dev->guid == guid)
+			goto match;
 	}
-
+ 
 	return ERR(ENODEV);
+match:
+	pthread_mutex_lock(&mut);
+	if ((ret = ucma_init_device(cma_dev)))
+		goto out;
+ 
+	if (!cma_dev->refcnt++) {
+		cma_dev->pd = ibv_alloc_pd(cma_dev->verbs);
+		if (!cma_dev->pd) {
+			cma_dev->refcnt--;
+			ret = ERR(ENOMEM);
+			goto out;
+		}
+	}
+	id_priv->cma_dev = cma_dev;
+	id_priv->id.verbs = cma_dev->verbs;
+out:
+	pthread_mutex_unlock(&mut);
+	return ret;
 }
 
 static void ucma_free_id(struct cma_id_private *id_priv)
@@ -1186,6 +1270,10 @@
 	if (ret)
 		return ret;
 
+#if defined(__SVR4) && defined(__sun)
+	qp_init_attr->sq_sig_all |= LIB_RDMACM_QP_BIT;
+#endif
+
 	qp = ibv_create_qp(pd, qp_init_attr);
 	if (!qp) {
 		ret = ERR(ENOMEM);
@@ -1787,6 +1875,9 @@
 
 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_ACCEPT, size);
 	cmd->id = id_priv->handle;
+#if defined(__SVR4) && defined(__sun)
+	cmd->conn_param.qp_num = ((id_priv->id).qp)->qp_num;
+#endif
 
 	ret = write(id_priv->id.channel->fd, msg, size);
 	if (ret != size) {
@@ -2051,8 +2142,20 @@
 
 	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_MIGRATE_ID, size);
 	cmd->id = id_priv->handle;
+
+#if !(defined(__SVR4) && defined(__sun))
 	cmd->fd = id->channel->fd;
+#else
+	{
+		struct stat	fstat_buf;
+		int		rc;
 
+		if ((rc = fstat(id->channel->fd, &fstat_buf)) != 0)
+			return (ERR(ENODATA));
+		cmd->fd = minor(fstat_buf.st_rdev);
+	}
+#endif
+
 	ret = write(channel->fd, msg, size);
 	if (ret != size)
 		return (ret >= 0) ? ERR(ENODATA) : -1;
diff -r -u /tmp/librdmacm-1.0.14.1/man/rdma_create_id.3 librdmacm-1.0.14.1/man/rdma_create_id.3
--- /tmp/librdmacm-1.0.14.1/man/rdma_create_id.3	Mon Oct  4 17:00:18 2010
+++ librdmacm-1.0.14.1/man/rdma_create_id.3	Wed Apr 30 10:52:04 2014
@@ -31,9 +31,7 @@
 explicitly binding to a specified RDMA device before communication
 can occur, and most operations are asynchronous in nature.  Asynchronous
 communication events on an rdma_cm_id are reported through the associated
-event channel.  If the channel parameter is NULL, the rdma_cm_id will
-be placed into synchronous operation.  While operating synchronously,
-calls that result in an event will block until the operation completes.
+event channel.
 The event will be returned to the user through the rdma_cm_id structure,
 and be available for access until another rdma_cm call is made.
 .P
diff -r -u /tmp/librdmacm-1.0.14.1/man/rdma_create_qp.3 librdmacm-1.0.14.1/man/rdma_create_qp.3
--- /tmp/librdmacm-1.0.14.1/man/rdma_create_qp.3	Fri Dec 10 12:05:34 2010
+++ librdmacm-1.0.14.1/man/rdma_create_qp.3	Wed Apr 30 11:48:07 2014
@@ -33,8 +33,7 @@
 the rdma_cm_id will be created using a default protection domain.  One
 default protection domain is allocated per RDMA device.
 .P
-The initial QP attributes are specified by the qp_init_attr parameter.  The
-send_cq and recv_cq fields in the ibv_qp_init_attr are optional.  If
+The initial QP attributes are specified by the qp_init_attr parameter.  If
 a send or receive completion queue is not specified, then a CQ will be
 allocated by the rdma_cm for the QP, along with corresponding completion
 channels.  Completion channels and CQ data created by the rdma_cm are
diff -r -u /tmp/librdmacm-1.0.14.1/man/rdma_cm.7 librdmacm-1.0.14.1/man/rdma_cm.7
--- /tmp/librdmacm-1.0.14.1/man/rdma_cm.7	Mon Oct  4 17:00:18 2010
+++ librdmacm-1.0.14.1/man/rdma_cm.7	Wed Apr 30 11:48:07 2014
@@ -19,7 +19,7 @@
 API defined by the libibverbs library.  The libibverbs library provides the
 underlying interfaces needed to send and receive data.
 .P
-The RDMA CM can operate asynchronously or synchronously.  The mode of
+The RDMA CM operates asynchronously.  The mode of
 operation is controlled by the user through the use of the rdma_cm event channel
 parameter in specific calls.  If an event channel is provided, an rdma_cm identifier
 will report its event data (results of connecting, for example), on that channel.
@@ -63,12 +63,7 @@
 .SH "CLIENT OPERATION"
 This section provides a general overview of the basic operation for the active,
 or client, side of communication.  This flow assume asynchronous operation with
-low level call details shown.  For
-synchronous operation, calls to rdma_create_event_channel, rdma_get_cm_event,
-rdma_ack_cm_event, and rdma_destroy_event_channel
-would be eliminated.  Abstracted calls, such as rdma_create_ep encapsulate
-serveral of these calls under a single API.
-Users may also refer to the example applications for
+low level call details shown.  Users may also refer to the example applications for
 code samples.  A general connection flow would be:
 .IP rdma_getaddrinfo
 retrieve address information of the destination
@@ -178,12 +173,9 @@
 rdma_ack_cm_event(3),
 rdma_bind_addr(3),
 rdma_connect(3),
-rdma_create_ep(3),
 rdma_create_event_channel(3),
 rdma_create_id(3),
 rdma_create_qp(3),
-rdma_dereg_mr(3),
-rdma_destroy_ep(3),
 rdma_destroy_event_channel(3),
 rdma_destroy_id(3),
 rdma_destroy_qp(3),
@@ -196,27 +188,11 @@
 rdma_get_dst_port(3),
 rdma_get_local_addr(3),
 rdma_get_peer_addr(3),
-rdma_get_recv_comp(3),
-rdma_get_request(3),
-rdma_get_send_comp(3),
 rdma_get_src_port(3),
 rdma_join_multicast(3),
 rdma_leave_multicast(3),
 rdma_listen(3),
-rdma_migrate_id(3),
 rdma_notify(3),
-rdma_post_read(3)
-rdma_post_readv(3),
-rdma_post_recv(3),
-rdma_post_recvv(3),
-rdma_post_send(3),
-rdma_post_sendv(3),
-rdma_post_ud_send(3),
-rdma_post_write(3),
-rdma_post_writev(3),
-rdma_reg_msgs(3),
-rdma_reg_read(3),
-rdma_reg_write(3),
 rdma_reject(3),
 rdma_resolve_addr(3),
 rdma_resolve_route(3),
diff -r -u /tmp/librdmacm-1.0.14.1/include/infiniband/ib.h librdmacm-1.0.14.1/include/infiniband/ib.h
--- /tmp/librdmacm-1.0.14.1/include/infiniband/ib.h	Mon Oct  4 17:00:18 2010
+++ librdmacm-1.0.14.1/include/infiniband/ib.h	Wed Apr 30 10:52:05 2014
@@ -33,7 +33,11 @@
 #if !defined(_RDMA_IB_H)
 #define _RDMA_IB_H
 
+#if !(defined(__SVR4) && defined(__sun))
 #include <linux/types.h>
+#else
+#include <infiniband/ofa_solaris.h>
+#endif
 #include <string.h>
 
 #ifndef AF_IB
diff -r -u /tmp/librdmacm-1.0.14.1/include/rdma/rdma_cma_abi.h librdmacm-1.0.14.1/include/rdma/rdma_cma_abi.h
--- /tmp/librdmacm-1.0.14.1/include/rdma/rdma_cma_abi.h	Mon Oct  4 17:00:18 2010
+++ librdmacm-1.0.14.1/include/rdma/rdma_cma_abi.h	Wed Apr 30 10:52:05 2014
@@ -104,6 +104,9 @@
 	__u64 response;
 	struct sockaddr_in6 addr;
 	__u32 id;
+#if defined(__SVR4) && defined(__sun)
+	uint32_t	reserved;
+#endif
 };
 
 struct ucma_abi_bind {
@@ -243,6 +246,9 @@
 	__u64 uid;
 	struct sockaddr_in6 addr;
 	__u32 id;
+#if defined(__SVR4) && defined(__sun)
+	uint32_t	reserved;
+#endif
 };
 
 struct ucma_abi_join_mcast {
diff -r -u /tmp/librdmacm-1.0.14.1/examples/udaddy.c librdmacm-1.0.14.1/examples/udaddy.c
--- /tmp/librdmacm-1.0.14.1/examples/udaddy.c	Mon Oct  4 17:00:18 2010
+++ librdmacm-1.0.14.1/examples/udaddy.c	Wed Apr 30 11:48:05 2014
@@ -40,7 +40,9 @@
 #include <netinet/in.h>
 #include <sys/socket.h>
 #include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <getopt.h>
 
 #include <rdma/rdma_cma.h>
diff -r -u /tmp/librdmacm-1.0.14.1/examples/mckey.c librdmacm-1.0.14.1/examples/mckey.c
--- /tmp/librdmacm-1.0.14.1/examples/mckey.c	Mon Oct  4 17:00:18 2010
+++ librdmacm-1.0.14.1/examples/mckey.c	Wed Apr 30 11:48:05 2014
@@ -41,7 +41,9 @@
 #include <arpa/inet.h>
 #include <sys/socket.h>
 #include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <unistd.h>
 #include <getopt.h>
 
@@ -329,6 +331,16 @@
 
 	while (1) {
 		ret = rdma_get_cm_event(test.channel, &event);
+
+/* 
+ * Solaris returns EBADF if we close the channel while we're waiting
+ * for any events to occur. It is safe to ignore EBADF here.
+ */
+#if defined(__SVR4) && defined(__sun)
+		if (ret && (errno == EBADF))
+			break;
+#endif
+
 		if (ret) {
 			perror("rdma_get_cm_event");
 			break;
@@ -461,6 +473,7 @@
 	return ret;
 }
 
+#if !(defined(__SVR4) && defined(__sun))
 static int get_dst_addr(char *dst, struct sockaddr *addr)
 {
 	struct sockaddr_ib *sib;
@@ -474,6 +487,7 @@
 	inet_pton(AF_INET6, dst, &sib->sib_addr);
 	return 0;
 }
+#endif
 
 static int run(void)
 {
@@ -486,7 +500,12 @@
 			return ret;
 	}
 
+/* Solaris does not yet support family AF_IB */
+#if defined(__SVR4) && defined(__sun)
+	ret = get_addr(dst_addr, (struct sockaddr *) &test.dst_in);
+#else
 	ret = get_dst_addr(dst_addr, (struct sockaddr *) &test.dst_in);
+#endif
 	if (ret)
 		return ret;
 
diff -r -u /tmp/librdmacm-1.0.14.1/examples/cmatose.c librdmacm-1.0.14.1/examples/cmatose.c
--- /tmp/librdmacm-1.0.14.1/examples/cmatose.c	Mon Oct  4 17:00:18 2010
+++ librdmacm-1.0.14.1/examples/cmatose.c	Wed Apr 30 11:48:03 2014
@@ -40,7 +40,9 @@
 #include <netinet/in.h>
 #include <sys/socket.h>
 #include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <getopt.h>
 
 #include <rdma/rdma_cma.h>
diff -r -u /tmp/librdmacm-1.0.14.1/examples/rping.c librdmacm-1.0.14.1/examples/rping.c
--- /tmp/librdmacm-1.0.14.1/examples/rping.c	Tue Feb 15 17:10:48 2011
+++ librdmacm-1.0.14.1/examples/rping.c	Wed Apr 30 11:48:04 2014
@@ -40,11 +40,17 @@
 #include <netinet/in.h>
 #include <sys/socket.h>
 #include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
 #include <byteswap.h>
+#endif
 #include <semaphore.h>
 #include <arpa/inet.h>
 #include <pthread.h>
 #include <inttypes.h>
+#if defined(__SVR4) && defined(__sun)
+#include <unistd.h>
+#include <libgen.h>
+#endif
 
 #include <rdma/rdma_cma.h>
 #include <infiniband/arch.h>
@@ -85,6 +91,13 @@
 	ERROR
 };
 
+enum disconnect_state {
+	DISCONNECT_NONE,
+	CALLING_DISCONNECT = 1,
+	DISCONNECT_CALLED,
+	DISCONNECT_DONE
+};
+
 struct rping_rdma_info {
 	uint64_t buf;
 	uint32_t rkey;
@@ -143,6 +156,9 @@
 	enum test_state state;		/* used for cond/signalling */
 	sem_t sem;
 
+	enum disconnect_state  dis_state;
+	sem_t dis_sem;
+
 	struct sockaddr_storage sin;
 	uint16_t port;			/* dst port in NBO */
 	int verbose;			/* verbose logging */
@@ -218,6 +234,8 @@
 		fprintf(stderr, "%s DISCONNECT EVENT...\n",
 			cb->server ? "server" : "client");
 		sem_post(&cb->sem);
+		cb->dis_state = DISCONNECT_DONE;
+		sem_post(&cb->dis_sem);
 		break;
 
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
@@ -285,6 +303,29 @@
 					"cq completion failed status %d\n",
 					wc.status);
 				ret = -1;
+			} else {
+				/*
+				 * FLUSH Error can be polled before RDMA-CM
+				 * DISCONNECT is notified. Ensure that cb_state
+				 * is set appropriately in such a case.
+				 * sleep for sometime if Disconnect has not
+				 * been called. The FLUSH WR can be because
+				 * the remote end initiated the disconnect.
+				 */
+				if (cb->dis_state == DISCONNECT_NONE)
+					sleep(2);
+
+				if (cb->dis_state == DISCONNECT_DONE)
+					return (0);
+
+				/* Wait if disconnect is called. */
+				if (cb->dis_state == DISCONNECT_CALLED) {
+					sem_wait(&cb->dis_sem);
+					if (cb->dis_state == DISCONNECT_DONE)
+						return (0);
+					else
+						goto error;
+				}
 			}
 			goto error;
 		}
@@ -571,9 +612,15 @@
 
 	while (1) {
 		ret = rdma_get_cm_event(cb->cm_channel, &event);
-		if (ret) {
+		/*
+		 * If the retry of read() syscall returned EBADF, as the
+		 * file was closed on process exit. Ignore this error.
+		 */
+		if (ret && errno != EBADF) {
 			perror("rdma_get_cm_event");
 			exit(ret);
+		} else if (ret && errno == EBADF) {
+			exit(0);
 		}
 		ret = rping_cma_event_handler(event->id, event);
 		rdma_ack_cm_event(event);
@@ -595,8 +642,14 @@
 		pthread_testcancel();
 
 		ret = ibv_get_cq_event(cb->channel, &ev_cq, &ev_ctx);
-		if (ret) {
+		/*
+		 * If the retry of write() syscall returned EBADF, as the
+		 * file was closed on process exit. Ignore this error.
+		 */
+		if (ret && errno != EBADF) {
 			fprintf(stderr, "Failed to get cq event!\n");
+			 pthread_exit(NULL);
+		} else if (ret && errno == EBADF) {
 			pthread_exit(NULL);
 		}
 		if (ev_cq != cb->cq) {
@@ -801,11 +854,13 @@
 	}
 
 	rping_test_server(cb);
+	cb->dis_state = CALLING_DISCONNECT;
+	sem_post(&cb->dis_sem);
 	rdma_disconnect(cb->child_cm_id);
-	rping_free_buffers(cb);
-	rping_free_qp(cb);
 	pthread_cancel(cb->cqthread);
 	pthread_join(cb->cqthread, NULL);
+	rping_free_buffers(cb);
+	rping_free_qp(cb);
 	rdma_destroy_id(cb->child_cm_id);
 	free_cb(cb);
 	return NULL;
@@ -889,6 +944,8 @@
 	}
 
 	rping_test_server(cb);
+	cb->dis_state = CALLING_DISCONNECT;
+	sem_post(&cb->dis_sem);
 	rdma_disconnect(cb->child_cm_id);
 	rdma_destroy_id(cb->child_cm_id);
 err2:
@@ -1056,6 +1113,8 @@
 	}
 
 	rping_test_client(cb);
+	cb->dis_state = CALLING_DISCONNECT;
+	sem_post(&cb->dis_sem);
 	rdma_disconnect(cb->cm_id);
 err2:
 	rping_free_buffers(cb);
@@ -1123,6 +1182,7 @@
 	cb->sin.ss_family = PF_INET;
 	cb->port = htons(7174);
 	sem_init(&cb->sem, 0, 0);
+	sem_init(&cb->dis_sem, 0, 0);
 
 	opterr = 0;
 	while ((op=getopt(argc, argv, "a:Pp:C:S:t:scvVd")) != -1) {