18382333 libibverbs ibv_cmd_get_context() fails when mcxnex driver UAR space is exhausted
18422470 modify librdmacm on solaris to avoid opening device context per hca on startup
18720315 open-fabrics needs TPNOs in pkg manifest
diff -r -u /tmp/librdmacm-1.0.14.1/configure librdmacm-1.0.14.1/configure
--- /tmp/librdmacm-1.0.14.1/configure Tue Feb 15 17:12:14 2011
+++ librdmacm-1.0.14.1/configure Thu Feb 24 08:39:24 2011
@@ -7625,6 +7625,7 @@
esac ;;
esac
link_all_deplibs=yes
+ hardcode_libdir_flag_spec=
;;
sunos4*)
diff -r -u /tmp/librdmacm-1.0.14.1/Makefile.in librdmacm-1.0.14.1/Makefile.in
--- /tmp/librdmacm-1.0.14.1/Makefile.in Tue Feb 15 17:12:13 2011
+++ librdmacm-1.0.14.1/Makefile.in Mon Mar 28 16:49:13 2011
@@ -69,7 +69,7 @@
"$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man3dir)" \
"$(DESTDIR)$(man7dir)" "$(DESTDIR)$(infinibandincludedir)" \
"$(DESTDIR)$(librdmacmincludedir)"
-libLTLIBRARIES_INSTALL = $(INSTALL)
+libLTLIBRARIES_INSTALL = $(INSTALL) -m 755
LTLIBRARIES = $(lib_LTLIBRARIES)
src_librdmacm_la_LIBADD =
am_src_librdmacm_la_OBJECTS = src_librdmacm_la-cma.lo \
@@ -76,7 +76,7 @@
src_librdmacm_la-addrinfo.lo src_librdmacm_la-acm.lo
src_librdmacm_la_OBJECTS = $(am_src_librdmacm_la_OBJECTS)
am__dirstamp = $(am__leading_dot)dirstamp
-binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) -m 755
PROGRAMS = $(bin_PROGRAMS)
am_examples_mckey_OBJECTS = mckey.$(OBJEXT)
examples_mckey_OBJECTS = $(am_examples_mckey_OBJECTS)
diff -r -u /tmp/librdmacm-1.0.14.1/src/cma.h librdmacm-1.0.14.1/src/cma.h
--- /tmp/librdmacm-1.0.14.1/src/cma.h Mon Oct 4 17:00:18 2010
+++ librdmacm-1.0.14.1/src/cma.h Fri Feb 11 04:08:57 2011
@@ -40,8 +40,10 @@
#include <stdlib.h>
#include <errno.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <endian.h>
#include <byteswap.h>
+#endif
#include <rdma/rdma_cma.h>
@@ -58,14 +60,6 @@
#define PFX "librdmacm: "
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-static inline uint64_t htonll(uint64_t x) { return bswap_64(x); }
-static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); }
-#else
-static inline uint64_t htonll(uint64_t x) { return x; }
-static inline uint64_t ntohll(uint64_t x) { return x; }
-#endif
-
#define min(a, b) (a < b ? a : b)
static inline int ERR(int err)
@@ -74,7 +68,7 @@
return -1;
}
-int ucma_init();
+int ucma_init(void);
extern int af_ib_support;
#define RAI_ROUTEONLY 0x01000000
diff -r -u /tmp/librdmacm-1.0.14.1/src/cma.c librdmacm-1.0.14.1/src/cma.c
--- /tmp/librdmacm-1.0.14.1/src/cma.c Fri Dec 10 12:05:34 2010
+++ librdmacm-1.0.14.1/src/cma.c Mon Mar 28 16:44:55 2011
@@ -46,12 +46,18 @@
#include <poll.h>
#include <unistd.h>
#include <pthread.h>
+#if defined(__SVR4) && defined(__sun)
+#include <sys/stat.h>
+#include <sys/mkdev.h>
+#else
#include <endian.h>
#include <byteswap.h>
+#endif
#include <stddef.h>
#include <netdb.h>
#include "cma.h"
+#include <infiniband/arch.h>
#include <infiniband/driver.h>
#include <infiniband/marshall.h>
#include <rdma/rdma_cma.h>
@@ -100,6 +106,8 @@
struct ibv_pd *pd;
uint64_t guid;
int port_cnt;
+ int refcnt;
+ int max_qpsize;
uint8_t max_initiator_depth;
uint8_t max_responder_resources;
};
@@ -143,6 +151,7 @@
static struct cma_device *cma_dev_array;
static int cma_dev_cnt;
+static int cma_init_cnt;
static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
static int abi_ver = RDMA_USER_CM_MAX_ABI_VERSION;
int af_ib_support;
@@ -156,12 +165,16 @@
if (cma_dev_cnt) {
while (cma_dev_cnt--) {
- ibv_dealloc_pd(cma_dev_array[cma_dev_cnt].pd);
+ if (!cma_dev_array[cma_dev_cnt].verbs)
+ continue;
+
+ if (cma_dev_array[cma_dev_cnt].refcnt)
+ ibv_dealloc_pd(cma_dev_array[cma_dev_cnt].pd);
ibv_close_device(cma_dev_array[cma_dev_cnt].verbs);
+ cma_init_cnt--;
}
free(cma_dev_array);
- cma_dev_cnt = 0;
}
}
@@ -228,9 +241,7 @@
int ucma_init(void)
{
struct ibv_device **dev_list = NULL;
- struct cma_device *cma_dev;
- struct ibv_device_attr attr;
- int i, ret, dev_cnt, ib;
+ int i, ret, dev_cnt;
/* Quick check without lock to see if we're already initialized */
if (cma_dev_cnt)
@@ -253,46 +264,20 @@
goto err1;
}
- cma_dev_array = malloc(sizeof *cma_dev * dev_cnt);
+ if (!dev_cnt) {
+ printf("CMA: no RDMA devices found\n");
+ ret = ERR(ENODEV);
+ goto err2;
+ }
+ cma_dev_array = calloc(dev_cnt, sizeof *cma_dev_array);
if (!cma_dev_array) {
ret = ERR(ENOMEM);
goto err2;
}
- for (i = 0, ib = 0; dev_list[i];) {
- cma_dev = &cma_dev_array[i];
+ for (i = 0; dev_list[i]; i++)
+ cma_dev_array[i].guid = ibv_get_device_guid(dev_list[i]);
- cma_dev->guid = ibv_get_device_guid(dev_list[i]);
- cma_dev->verbs = ibv_open_device(dev_list[i]);
- if (!cma_dev->verbs) {
- printf("CMA: unable to open RDMA device\n");
- ret = ERR(ENODEV);
- goto err3;
- }
-
- cma_dev->pd = ibv_alloc_pd(cma_dev->verbs);
- if (!cma_dev->pd) {
- ibv_close_device(cma_dev->verbs);
- ret = ERR(ENOMEM);
- goto err3;
- }
-
- i++;
- ret = ibv_query_device(cma_dev->verbs, &attr);
- if (ret) {
- printf("CMA: unable to query RDMA device\n");
- ret = ERR(ret);
- goto err3;
- }
-
- cma_dev->port_cnt = attr.phys_port_cnt;
- cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;
- cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;
- ib += (cma_dev->verbs->device->transport_type == IBV_TRANSPORT_IB);
- }
-
- if (ib)
- ucma_ib_init();
cma_dev_cnt = dev_cnt;
ucma_set_af_ib_support();
pthread_mutex_unlock(&mut);
@@ -299,12 +284,6 @@
ibv_free_device_list(dev_list);
return 0;
-err3:
- while (i--) {
- ibv_dealloc_pd(cma_dev_array[i].pd);
- ibv_close_device(cma_dev_array[i].verbs);
- }
- free(cma_dev_array);
err2:
ibv_free_device_list(dev_list);
err1:
@@ -312,12 +291,93 @@
return ret;
}
+static struct ibv_context *ucma_open_device(uint64_t guid)
+{
+ struct ibv_device **dev_list;
+ struct ibv_context *verbs = NULL;
+ int i;
+
+ dev_list = ibv_get_device_list(NULL);
+ if (!dev_list) {
+ fprintf(stderr, PFX "Fatal: unable to get RDMA device list\n");
+ return NULL;
+ }
+
+ for (i = 0; dev_list[i]; i++) {
+ if (ibv_get_device_guid(dev_list[i]) == guid) {
+ verbs = ibv_open_device(dev_list[i]);
+ break;
+ }
+ }
+
+ if (!verbs)
+ fprintf(stderr, PFX "Fatal: unable to open RDMA device\n");
+
+ ibv_free_device_list(dev_list);
+ return verbs;
+}
+
+static int ucma_init_device(struct cma_device *cma_dev)
+{
+ struct ibv_device_attr attr;
+ int ret;
+
+ if (cma_dev->verbs)
+ return 0;
+
+ cma_dev->verbs = ucma_open_device(cma_dev->guid);
+ if (!cma_dev->verbs)
+ return ERR(ENODEV);
+
+ ret = ibv_query_device(cma_dev->verbs, &attr);
+ if (ret) {
+ fprintf(stderr, PFX "Fatal: unable to query RDMA device\n");
+ ret = ERR(ret);
+ goto err;
+ }
+
+ cma_dev->port_cnt = attr.phys_port_cnt;
+ cma_dev->max_qpsize = attr.max_qp_wr;
+ cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;
+ cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;
+ cma_init_cnt++;
+ return 0;
+
+err:
+ ibv_close_device(cma_dev->verbs);
+ cma_dev->verbs = NULL;
+ return ret;
+}
+
+static int ucma_init_all(void)
+{
+ int i, ret = 0;
+
+ if (!cma_dev_cnt) {
+ ret = ucma_init();
+ if (ret)
+ return ret;
+ }
+
+ if (cma_init_cnt == cma_dev_cnt)
+ return 0;
+
+ pthread_mutex_lock(&mut);
+ for (i = 0; i < cma_dev_cnt; i++) {
+ ret = ucma_init_device(&cma_dev_array[i]);
+ if (ret)
+ break;
+ }
+ pthread_mutex_unlock(&mut);
+ return ret;
+}
+
struct ibv_context **rdma_get_devices(int *num_devices)
{
struct ibv_context **devs = NULL;
int i;
- if (ucma_init())
+ if (ucma_init_all())
goto out;
devs = malloc(sizeof *devs * (cma_dev_cnt + 1));
@@ -354,9 +414,18 @@
if (!channel)
return NULL;
+#if defined(__SVR4) && defined(__sun)
+ channel->fd = open("/dev/infiniband/ofs/rdma_cm", O_RDWR);
+#else
channel->fd = open("/dev/infiniband/rdma_cm", O_RDWR);
+#endif
+
if (channel->fd < 0) {
+#if defined(__SVR4) && defined(__sun)
+ printf("CMA: unable to open /dev/infiniband/ofs/rdma_cm\n");
+#else
printf("CMA: unable to open /dev/infiniband/rdma_cm\n");
+#endif
goto err;
}
return channel;
@@ -374,18 +443,33 @@
static int ucma_get_device(struct cma_id_private *id_priv, uint64_t guid)
{
struct cma_device *cma_dev;
- int i;
+ int i, ret;
for (i = 0; i < cma_dev_cnt; i++) {
cma_dev = &cma_dev_array[i];
- if (cma_dev->guid == guid) {
- id_priv->cma_dev = cma_dev;
- id_priv->id.verbs = cma_dev->verbs;
- return 0;
- }
+ if (cma_dev->guid == guid)
+ goto match;
}
-
+
return ERR(ENODEV);
+match:
+ pthread_mutex_lock(&mut);
+ if ((ret = ucma_init_device(cma_dev)))
+ goto out;
+
+ if (!cma_dev->refcnt++) {
+ cma_dev->pd = ibv_alloc_pd(cma_dev->verbs);
+ if (!cma_dev->pd) {
+ cma_dev->refcnt--;
+ ret = ERR(ENOMEM);
+ goto out;
+ }
+ }
+ id_priv->cma_dev = cma_dev;
+ id_priv->id.verbs = cma_dev->verbs;
+out:
+ pthread_mutex_unlock(&mut);
+ return ret;
}
static void ucma_free_id(struct cma_id_private *id_priv)
@@ -1186,6 +1270,10 @@
if (ret)
return ret;
+#if defined(__SVR4) && defined(__sun)
+ qp_init_attr->sq_sig_all |= LIB_RDMACM_QP_BIT;
+#endif
+
qp = ibv_create_qp(pd, qp_init_attr);
if (!qp) {
ret = ERR(ENOMEM);
@@ -1787,6 +1875,9 @@
CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_ACCEPT, size);
cmd->id = id_priv->handle;
+#if defined(__SVR4) && defined(__sun)
+ cmd->conn_param.qp_num = ((id_priv->id).qp)->qp_num;
+#endif
ret = write(id_priv->id.channel->fd, msg, size);
if (ret != size) {
@@ -2051,8 +2142,20 @@
CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_MIGRATE_ID, size);
cmd->id = id_priv->handle;
+
+#if !(defined(__SVR4) && defined(__sun))
cmd->fd = id->channel->fd;
+#else
+ {
+ struct stat fstat_buf;
+ int rc;
+ if ((rc = fstat(id->channel->fd, &fstat_buf)) != 0)
+ return (ERR(ENODATA));
+ cmd->fd = minor(fstat_buf.st_rdev);
+ }
+#endif
+
ret = write(channel->fd, msg, size);
if (ret != size)
return (ret >= 0) ? ERR(ENODATA) : -1;
diff -r -u /tmp/librdmacm-1.0.14.1/man/rdma_create_id.3 librdmacm-1.0.14.1/man/rdma_create_id.3
--- /tmp/librdmacm-1.0.14.1/man/rdma_create_id.3 Mon Oct 4 17:00:18 2010
+++ librdmacm-1.0.14.1/man/rdma_create_id.3 Mon Mar 28 03:11:48 2011
@@ -31,9 +31,7 @@
explicitly binding to a specified RDMA device before communication
can occur, and most operations are asynchronous in nature. Asynchronous
communication events on an rdma_cm_id are reported through the associated
-event channel. If the channel parameter is NULL, the rdma_cm_id will
-be placed into synchronous operation. While operating synchronously,
-calls that result in an event will block until the operation completes.
+event channel.
The event will be returned to the user through the rdma_cm_id structure,
and be available for access until another rdma_cm call is made.
.P
diff -r -u /tmp/librdmacm-1.0.14.1/man/rdma_create_qp.3 librdmacm-1.0.14.1/man/rdma_create_qp.3
--- /tmp/librdmacm-1.0.14.1/man/rdma_create_qp.3 Fri Dec 10 12:05:34 2010
+++ librdmacm-1.0.14.1/man/rdma_create_qp.3 Mon Mar 28 03:11:48 2011
@@ -33,8 +33,7 @@
the rdma_cm_id will be created using a default protection domain. One
default protection domain is allocated per RDMA device.
.P
-The initial QP attributes are specified by the qp_init_attr parameter. The
-send_cq and recv_cq fields in the ibv_qp_init_attr are optional. If
+The initial QP attributes are specified by the qp_init_attr parameter. If
a send or receive completion queue is not specified, then a CQ will be
allocated by the rdma_cm for the QP, along with corresponding completion
channels. Completion channels and CQ data created by the rdma_cm are
diff -r -u /tmp/librdmacm-1.0.14.1/man/rdma_cm.7 librdmacm-1.0.14.1/man/rdma_cm.7
--- /tmp/librdmacm-1.0.14.1/man/rdma_cm.7 Mon Oct 4 17:00:18 2010
+++ librdmacm-1.0.14.1/man/rdma_cm.7 Mon Mar 28 03:11:47 2011
@@ -19,7 +19,7 @@
API defined by the libibverbs library. The libibverbs library provides the
underlying interfaces needed to send and receive data.
.P
-The RDMA CM can operate asynchronously or synchronously. The mode of
+The RDMA CM operates asynchronously. The mode of
operation is controlled by the user through the use of the rdma_cm event channel
parameter in specific calls. If an event channel is provided, an rdma_cm identifier
will report its event data (results of connecting, for example), on that channel.
@@ -63,12 +63,7 @@
.SH "CLIENT OPERATION"
This section provides a general overview of the basic operation for the active,
or client, side of communication. This flow assume asynchronous operation with
-low level call details shown. For
-synchronous operation, calls to rdma_create_event_channel, rdma_get_cm_event,
-rdma_ack_cm_event, and rdma_destroy_event_channel
-would be eliminated. Abstracted calls, such as rdma_create_ep encapsulate
-serveral of these calls under a single API.
-Users may also refer to the example applications for
+low level call details shown. Users may also refer to the example applications for
code samples. A general connection flow would be:
.IP rdma_getaddrinfo
retrieve address information of the destination
@@ -178,12 +173,9 @@
rdma_ack_cm_event(3),
rdma_bind_addr(3),
rdma_connect(3),
-rdma_create_ep(3),
rdma_create_event_channel(3),
rdma_create_id(3),
rdma_create_qp(3),
-rdma_dereg_mr(3),
-rdma_destroy_ep(3),
rdma_destroy_event_channel(3),
rdma_destroy_id(3),
rdma_destroy_qp(3),
@@ -196,27 +188,11 @@
rdma_get_dst_port(3),
rdma_get_local_addr(3),
rdma_get_peer_addr(3),
-rdma_get_recv_comp(3),
-rdma_get_request(3),
-rdma_get_send_comp(3),
rdma_get_src_port(3),
rdma_join_multicast(3),
rdma_leave_multicast(3),
rdma_listen(3),
-rdma_migrate_id(3),
rdma_notify(3),
-rdma_post_read(3)
-rdma_post_readv(3),
-rdma_post_recv(3),
-rdma_post_recvv(3),
-rdma_post_send(3),
-rdma_post_sendv(3),
-rdma_post_ud_send(3),
-rdma_post_write(3),
-rdma_post_writev(3),
-rdma_reg_msgs(3),
-rdma_reg_read(3),
-rdma_reg_write(3),
rdma_reject(3),
rdma_resolve_addr(3),
rdma_resolve_route(3),
diff -r -u /tmp/librdmacm-1.0.14.1/include/infiniband/ib.h librdmacm-1.0.14.1/include/infiniband/ib.h
--- /tmp/librdmacm-1.0.14.1/include/infiniband/ib.h Mon Oct 4 17:00:18 2010
+++ librdmacm-1.0.14.1/include/infiniband/ib.h Fri Feb 11 04:08:56 2011
@@ -33,7 +33,11 @@
#if !defined(_RDMA_IB_H)
#define _RDMA_IB_H
+#if !(defined(__SVR4) && defined(__sun))
#include <linux/types.h>
+#else
+#include <infiniband/ofa_solaris.h>
+#endif
#include <string.h>
#ifndef AF_IB
diff -r -u /tmp/librdmacm-1.0.14.1/include/rdma/rdma_cma_abi.h librdmacm-1.0.14.1/include/rdma/rdma_cma_abi.h
--- /tmp/librdmacm-1.0.14.1/include/rdma/rdma_cma_abi.h Mon Oct 4 17:00:18 2010
+++ librdmacm-1.0.14.1/include/rdma/rdma_cma_abi.h Fri Feb 11 04:08:48 2011
@@ -104,6 +104,9 @@
__u64 response;
struct sockaddr_in6 addr;
__u32 id;
+#if defined(__SVR4) && defined(__sun)
+ uint32_t reserved;
+#endif
};
struct ucma_abi_bind {
@@ -243,6 +246,9 @@
__u64 uid;
struct sockaddr_in6 addr;
__u32 id;
+#if defined(__SVR4) && defined(__sun)
+ uint32_t reserved;
+#endif
};
struct ucma_abi_join_mcast {
diff -r -u /tmp/librdmacm-1.0.14.1/examples/udaddy.c librdmacm-1.0.14.1/examples/udaddy.c
--- /tmp/librdmacm-1.0.14.1/examples/udaddy.c Mon Oct 4 17:00:18 2010
+++ librdmacm-1.0.14.1/examples/udaddy.c Fri Feb 11 04:08:48 2011
@@ -40,7 +40,9 @@
#include <netinet/in.h>
#include <sys/socket.h>
#include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <byteswap.h>
+#endif
#include <getopt.h>
#include <rdma/rdma_cma.h>
diff -r -u /tmp/librdmacm-1.0.14.1/examples/mckey.c librdmacm-1.0.14.1/examples/mckey.c
--- /tmp/librdmacm-1.0.14.1/examples/mckey.c Mon Oct 4 17:00:18 2010
+++ librdmacm-1.0.14.1/examples/mckey.c Fri Feb 11 04:08:48 2011
@@ -41,7 +41,9 @@
#include <arpa/inet.h>
#include <sys/socket.h>
#include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <byteswap.h>
+#endif
#include <unistd.h>
#include <getopt.h>
@@ -329,6 +331,16 @@
while (1) {
ret = rdma_get_cm_event(test.channel, &event);
+
+/*
+ * Solaris returns EBADF if we close the channel while we're waiting
+ * for any events to occur. It is safe to ignore EBADF here.
+ */
+#if defined(__SVR4) && defined(__sun)
+ if (ret && (errno == EBADF))
+ break;
+#endif
+
if (ret) {
perror("rdma_get_cm_event");
break;
@@ -461,6 +473,7 @@
return ret;
}
+#if !(defined(__SVR4) && defined(__sun))
static int get_dst_addr(char *dst, struct sockaddr *addr)
{
struct sockaddr_ib *sib;
@@ -474,6 +487,7 @@
inet_pton(AF_INET6, dst, &sib->sib_addr);
return 0;
}
+#endif
static int run(void)
{
@@ -486,7 +500,12 @@
return ret;
}
+/* Solaris does not yet support family AF_IB */
+#if defined(__SVR4) && defined(__sun)
+ ret = get_addr(dst_addr, (struct sockaddr *) &test.dst_in);
+#else
ret = get_dst_addr(dst_addr, (struct sockaddr *) &test.dst_in);
+#endif
if (ret)
return ret;
diff -r -u /tmp/librdmacm-1.0.14.1/examples/cmatose.c librdmacm-1.0.14.1/examples/cmatose.c
--- /tmp/librdmacm-1.0.14.1/examples/cmatose.c Mon Oct 4 17:00:18 2010
+++ librdmacm-1.0.14.1/examples/cmatose.c Fri Feb 11 04:08:48 2011
@@ -40,7 +40,9 @@
#include <netinet/in.h>
#include <sys/socket.h>
#include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <byteswap.h>
+#endif
#include <getopt.h>
#include <rdma/rdma_cma.h>
diff -r -u /tmp/librdmacm-1.0.14.1/examples/rping.c librdmacm-1.0.14.1/examples/rping.c
--- /tmp/librdmacm-1.0.14.1/examples/rping.c Tue Feb 15 17:10:48 2011
+++ librdmacm-1.0.14.1/examples/rping.c Wed Apr 23 10:37:57 2014
@@ -40,11 +40,17 @@
#include <netinet/in.h>
#include <sys/socket.h>
#include <netdb.h>
+#if !(defined(__SVR4) && defined(__sun))
#include <byteswap.h>
+#endif
#include <semaphore.h>
#include <arpa/inet.h>
#include <pthread.h>
#include <inttypes.h>
+#if defined(__SVR4) && defined(__sun)
+#include <unistd.h>
+#include <libgen.h>
+#endif
#include <rdma/rdma_cma.h>
#include <infiniband/arch.h>
@@ -85,6 +91,13 @@
ERROR
};
+enum disconnect_state {
+ DISCONNECT_NONE,
+ CALLING_DISCONNECT = 1,
+ DISCONNECT_CALLED,
+ DISCONNECT_DONE
+};
+
struct rping_rdma_info {
uint64_t buf;
uint32_t rkey;
@@ -143,6 +156,9 @@
enum test_state state; /* used for cond/signalling */
sem_t sem;
+ enum disconnect_state dis_state;
+ sem_t dis_sem;
+
struct sockaddr_storage sin;
uint16_t port; /* dst port in NBO */
int verbose; /* verbose logging */
@@ -218,6 +234,8 @@
fprintf(stderr, "%s DISCONNECT EVENT...\n",
cb->server ? "server" : "client");
sem_post(&cb->sem);
+ cb->dis_state = DISCONNECT_DONE;
+ sem_post(&cb->dis_sem);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
@@ -285,6 +303,29 @@
"cq completion failed status %d\n",
wc.status);
ret = -1;
+ } else {
+ /*
+ * FLUSH Error can be polled before RDMA-CM
+ * DISCONNECT is notified. Ensure that cb_state
+ * is set appropriately in such a case.
+ * sleep for sometime if Disconnect has not
+ * been called. The FLUSH WR can be because
+ * the remote end initiated the disconnect.
+ */
+ if (cb->dis_state == DISCONNECT_NONE)
+ sleep(2);
+
+ if (cb->dis_state == DISCONNECT_DONE)
+ return (0);
+
+ /* Wait if disconnect is called. */
+ if (cb->dis_state == DISCONNECT_CALLED) {
+ sem_wait(&cb->dis_sem);
+ if (cb->dis_state == DISCONNECT_DONE)
+ return (0);
+ else
+ goto error;
+ }
}
goto error;
}
@@ -571,9 +612,15 @@
while (1) {
ret = rdma_get_cm_event(cb->cm_channel, &event);
- if (ret) {
+ /*
+ * If the retry of read() syscall returned EBADF, as the
+ * file was closed on process exit. Ignore this error.
+ */
+ if (ret && errno != EBADF) {
perror("rdma_get_cm_event");
exit(ret);
+ } else if (ret && errno == EBADF) {
+ exit(0);
}
ret = rping_cma_event_handler(event->id, event);
rdma_ack_cm_event(event);
@@ -595,8 +642,14 @@
pthread_testcancel();
ret = ibv_get_cq_event(cb->channel, &ev_cq, &ev_ctx);
- if (ret) {
+ /*
+ * If the retry of write() syscall returned EBADF, as the
+ * file was closed on process exit. Ignore this error.
+ */
+ if (ret && errno != EBADF) {
fprintf(stderr, "Failed to get cq event!\n");
+ pthread_exit(NULL);
+ } else if (ret && errno == EBADF) {
pthread_exit(NULL);
}
if (ev_cq != cb->cq) {
@@ -801,11 +854,13 @@
}
rping_test_server(cb);
+ cb->dis_state = CALLING_DISCONNECT;
+ sem_post(&cb->dis_sem);
rdma_disconnect(cb->child_cm_id);
- rping_free_buffers(cb);
- rping_free_qp(cb);
pthread_cancel(cb->cqthread);
pthread_join(cb->cqthread, NULL);
+ rping_free_buffers(cb);
+ rping_free_qp(cb);
rdma_destroy_id(cb->child_cm_id);
free_cb(cb);
return NULL;
@@ -889,6 +944,8 @@
}
rping_test_server(cb);
+ cb->dis_state = CALLING_DISCONNECT;
+ sem_post(&cb->dis_sem);
rdma_disconnect(cb->child_cm_id);
rdma_destroy_id(cb->child_cm_id);
err2:
@@ -1056,6 +1113,8 @@
}
rping_test_client(cb);
+ cb->dis_state = CALLING_DISCONNECT;
+ sem_post(&cb->dis_sem);
rdma_disconnect(cb->cm_id);
err2:
rping_free_buffers(cb);
@@ -1123,6 +1182,7 @@
cb->sin.ss_family = PF_INET;
cb->port = htons(7174);
sem_init(&cb->sem, 0, 0);
+ sem_init(&cb->dis_sem, 0, 0);
opterr = 0;
while ((op=getopt(argc, argv, "a:Pp:C:S:t:scvVd")) != -1) {