PSARC/2017/028 OFUV Exafusion support: XRC and RDMA_OPTION_ID_TOS
25759055 OFUV (Userland) support for XRC APIs
22595881 defer librdmacm allocation of PD on ADDRESS_RESOLVED event
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/open-fabrics/libibverbs/patches/004-libibverbs-explorer-hangs-running-ibis-with-Titan-cards.patch Thu Apr 13 20:30:48 2017 -0700
@@ -0,0 +1,18 @@
+#This patch was developed both in-house and from outside. We plan to submit it
+#upstream, but do not yet have a target date for doing so
+#
+# HG changeset patch
+# Parent b116e18142b1d4ec433b67c77f389bf975cc8c42
+22741696 Explorer hangs running ibis on system with Titan cards
+
+diff -r b116e18142b1 src/init.c
+--- a/src/init.c Tue Apr 19 10:06:00 2016 -0700
++++ b/src/init.c Tue Apr 19 10:17:34 2016 -0700
+@@ -603,6 +603,7 @@
+ }
+ free(sysfs_dev);
+ }
++ sysfs_dev_list = NULL;
+
+ return num_devices;
+ }
--- a/components/open-fabrics/libibverbs/patches/004-libibverbs-man-changes-for-no-xrc.patch Thu Apr 13 13:20:29 2017 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,231 +0,0 @@
-#This patch was developed both in-house and from outside. We plan to submit it
-#upstream, but do not yet have a target date for doing so
-#
-# HG changeset patch
-# Parent 3903116dd520bb552df4e8fa55f573d1cb9b9097
-remove xrc man pages
-
-diff -r 3903116dd520 Makefile.am
---- a/Makefile.am Wed Jan 13 09:20:30 2016 -0800
-+++ b/Makefile.am Wed Jan 13 09:32:13 2016 -0800
-@@ -63,8 +63,7 @@
- man/ibv_query_pkey.3 man/ibv_query_port.3 man/ibv_query_qp.3 \
- man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3 \
- man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3 \
-- man/ibv_create_qp_ex.3 man/ibv_create_srq_ex.3 man/ibv_open_xrcd.3 \
-- man/ibv_get_srq_num.3 man/ibv_open_qp.3
-+ man/ibv_open_xrcd.3 man/ibv_open_qp.3
-
- DEBIAN = debian/changelog debian/compat debian/control debian/copyright \
- debian/ibverbs-utils.install debian/libibverbs1.install \
-diff -r 3903116dd520 Makefile.in
---- a/Makefile.in Wed Jan 13 09:20:30 2016 -0800
-+++ b/Makefile.in Wed Jan 13 09:32:13 2016 -0800
-@@ -494,8 +494,7 @@
- man/ibv_query_pkey.3 man/ibv_query_port.3 man/ibv_query_qp.3 \
- man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3 \
- man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3 \
-- man/ibv_create_qp_ex.3 man/ibv_create_srq_ex.3 man/ibv_open_xrcd.3 \
-- man/ibv_get_srq_num.3 man/ibv_open_qp.3
-+ man/ibv_open_xrcd.3 man/ibv_open_qp.3
-
- DEBIAN = debian/changelog debian/compat debian/control debian/copyright \
- debian/ibverbs-utils.install debian/libibverbs1.install \
-diff -r 3903116dd520 man/ibv_create_qp_ex.3
---- a/man/ibv_create_qp_ex.3 Wed Jan 13 09:20:30 2016 -0800
-+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
-@@ -1,83 +0,0 @@
--.\" -*- nroff -*-
--.\"
--.TH IBV_CREATE_QP_EX 3 2013-06-26 libibverbs "Libibverbs Programmer's Manual"
--.SH "NAME"
--ibv_create_qp_ex, ibv_destroy_qp \- create or destroy a queue pair (QP)
--.SH "SYNOPSIS"
--.nf
--.B #include <infiniband/verbs.h>
--.sp
--.BI "struct ibv_qp *ibv_create_qp_ex(struct ibv_context " "*context" ,
--.BI " struct ibv_qp_init_attr_ex " "*qp_init_attr" );
--.sp
--.BI "int ibv_destroy_qp(struct ibv_qp " "*qp" );
--.fi
--.SH "DESCRIPTION"
--.B ibv_create_qp_ex()
--creates a queue pair (QP) associated with the protection domain
--.I pd\fR.
--The argument
--.I qp_init_attr_ex
--is an ibv_qp_init_attr_ex struct, as defined in <infiniband/verbs.h>.
--.PP
--.nf
--struct ibv_qp_init_attr_ex {
--.in +8
--void *qp_context; /* Associated context of the QP */
--struct ibv_cq *send_cq; /* CQ to be associated with the Send Queue (SQ) */
--struct ibv_cq *recv_cq; /* CQ to be associated with the Receive Queue (RQ) */
--struct ibv_srq *srq; /* SRQ handle if QP is to be associated with an SRQ, otherwise NULL */
--struct ibv_qp_cap cap; /* QP capabilities */
--enum ibv_qp_type qp_type; /* QP Transport Service Type: IBV_QPT_RC, IBV_QPT_UC, IBV_QPT_UD or IBV_QPT_RAW_PACKET */
--int sq_sig_all; /* If set, each Work Request (WR) submitted to the SQ generates a completion entry */
--uint32_t comp_mask; /* Identifies valid fields */
--struct ibv_pd *pd; /* PD to be associated with the QP */
--struct ibv_xrcd *xrcd; /* XRC domain to be associated with the target QP */
--enum ibv_qp_create_flags create_flags; /* Creation flags for this QP */
--.in -8
--};
--.sp
--.nf
--struct ibv_qp_cap {
--.in +8
--uint32_t max_send_wr; /* Requested max number of outstanding WRs in the SQ */
--uint32_t max_recv_wr; /* Requested max number of outstanding WRs in the RQ */
--uint32_t max_send_sge; /* Requested max number of scatter/gather (s/g) elements in a WR in the SQ */
--uint32_t max_recv_sge; /* Requested max number of s/g elements in a WR in the SQ */
--uint32_t max_inline_data;/* Requested max number of data (bytes) that can be posted inline to the SQ, otherwise 0 */
--.in -8
--};
--.fi
--.PP
--The function
--.B ibv_create_qp_ex()
--will update the
--.I qp_init_attr_ex\fB\fR->cap
--struct with the actual \s-1QP\s0 values of the QP that was created;
--the values will be greater than or equal to the values requested.
--.PP
--.B ibv_destroy_qp()
--destroys the QP
--.I qp\fR.
--.SH "RETURN VALUE"
--.B ibv_create_qp_ex()
--returns a pointer to the created QP, or NULL if the request fails.
--Check the QP number (\fBqp_num\fR) in the returned QP.
--.PP
--.B ibv_destroy_qp()
--returns 0 on success, or the value of errno on failure (which indicates the failure reason).
--.SH "NOTES"
--.PP
--The attributes max_recv_wr and max_recv_sge are ignored by
--.B ibv_create_qp_ex()
--if the QP is to be associated with an SRQ.
--.PP
--.B ibv_destroy_qp()
--fails if the QP is attached to a multicast group.
--.SH "SEE ALSO"
--.BR ibv_alloc_pd (3),
--.BR ibv_modify_qp (3),
--.BR ibv_query_qp (3)
--.SH "AUTHORS"
--.TP
--Yishai Hadas <[email protected]>
-diff -r 3903116dd520 man/ibv_create_srq_ex.3
---- a/man/ibv_create_srq_ex.3 Wed Jan 13 09:20:30 2016 -0800
-+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
-@@ -1,71 +0,0 @@
--.\" -*- nroff -*-
--.\"
--.TH IBV_CREATE_SRQ_EX 3 2013-06-26 libibverbs "Libibverbs Programmer's Manual"
--.SH "NAME"
--ibv_create_srq_ex, ibv_destroy_srq \- create or destroy a shared receive queue (SRQ)
--.SH "SYNOPSIS"
--.nf
--.B #include <infiniband/verbs.h>
--.sp
--.BI "struct ibv_srq *ibv_create_srq_ex(struct ibv_context " "*context" ", struct "
--.BI " ibv_srq_init_attr_ex " "*srq_init_attr_ex" );
--.sp
--.BI "int ibv_destroy_srq(struct ibv_srq " "*srq" );
--.fi
--.SH "DESCRIPTION"
--.B ibv_create_srq_ex()
--creates a shared receive queue (SRQ) supporting both basic and xrc modes.
--The argument
--.I srq_init_attr_ex
--is an ibv_srq_init_attr_ex struct, as defined in <infiniband/verbs.h>.
--.PP
--.nf
--struct ibv_srq_init_attr_ex {
--.in +8
--void *srq_context; /* Associated context of the SRQ */
--struct ibv_srq_attr attr; /* SRQ attributes */
--uint32_t comp_mask; /* Identifies valid fields */
--enum ibv_srq_type srq_type; /* Basic / XRC */
--struct ibv_pd *pd; /* PD associated with the SRQ */
--struct ibv_xrcd *xrcd; /* XRC domain to associate with the SRQ */
--struct ibv_cq *cq; /* CQ to associate with the SRQ for XRC mode */
--.in -8
--};
--.sp
--.nf
--struct ibv_srq_attr {
--.in +8
--uint32_t max_wr; /* Requested max number of outstanding work requests (WRs) in the SRQ */
--uint32_t max_sge; /* Requested max number of scatter elements per WR */
--uint32_t srq_limit; /* The limit value of the SRQ */
--.in -8
--};
--.fi
--.PP
--The function
--.B ibv_create_srq_ex()
--will update the
--.I srq_init_attr_ex
--struct with the original values of the SRQ that was created; the
--values of max_wr and max_sge will be greater than or equal to the
--values requested.
--.PP
--.B ibv_destroy_srq()
--destroys the SRQ
--.I srq\fR.
--.SH "RETURN VALUE"
--.B ibv_create_srq_ex()
--returns a pointer to the created SRQ, or NULL if the request fails.
--.PP
--.B ibv_destroy_srq()
--returns 0 on success, or the value of errno on failure (which indicates the failure reason).
--.SH "NOTES"
--.B ibv_destroy_srq()
--fails if any queue pair is still associated with this SRQ.
--.SH "SEE ALSO"
--.BR ibv_alloc_pd (3),
--.BR ibv_modify_srq (3),
--.BR ibv_query_srq (3)
--.SH "AUTHORS"
--.TP
--Yishai Hadas <[email protected]>
-diff -r 3903116dd520 man/ibv_get_srq_num.3
---- a/man/ibv_get_srq_num.3 Wed Jan 13 09:20:30 2016 -0800
-+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
-@@ -1,32 +0,0 @@
--.\" -*- nroff -*-
--.\"
--.TH IBV_GET_SRQ_NUM 3 2013-06-26 libibverbs "Libibverbs Programmer's Manual"
--.SH "NAME"
--ibv_get_srq_num \- return srq number associated with the given shared receive queue (SRQ)
--.SH "SYNOPSIS"
--.nf
--.B #include <infiniband/verbs.h>
--.sp
--.BI "int ibv_get_srq_num(struct ibv_srq " "*srq" ,
--.BI " uint32_t " "*srq_num" );
--.fi
--.SH "DESCRIPTION"
--.B ibv_get_srq_num()
--return srq number associated with the given shared receive queue
--The argument
--.I srq
--is an ibv_srq struct, as defined in <infiniband/verbs.h>.
--.I srq_num
--is an output parameter that holds the returned srq number.
--.PP
--.nf
--.SH "RETURN VALUE"
--.B ibv_get_srq_num()
--returns 0 on success, or the value of errno on failure (which indicates the failure reason).
--.SH "SEE ALSO"
--.BR ibv_alloc_pd (3),
--.BR ibv_modify_srq (3),
--.BR ibv_create_srq_ex (3)
--.SH "AUTHORS"
--.TP
--Yishai Hadas <[email protected]>
--- a/components/open-fabrics/libibverbs/patches/005-libibverbs-explorer-hangs-running-ibis-with-Titan-cards.patch Thu Apr 13 13:20:29 2017 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-#This patch was developed both in-house and from outside. We plan to submit it
-#upstream, but do not yet have a target date for doing so
-#
-# HG changeset patch
-# Parent b116e18142b1d4ec433b67c77f389bf975cc8c42
-22741696 Explorer hangs running ibis on system with Titan cards
-
-diff -r b116e18142b1 src/init.c
---- a/src/init.c Tue Apr 19 10:06:00 2016 -0700
-+++ b/src/init.c Tue Apr 19 10:17:34 2016 -0700
-@@ -603,6 +603,7 @@
- }
- free(sysfs_dev);
- }
-+ sysfs_dev_list = NULL;
-
- return num_devices;
- }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/open-fabrics/libibverbs/patches/005-libibverbs-xrc.patch Thu Apr 13 20:30:48 2017 -0700
@@ -0,0 +1,832 @@
+#This patch was developed both in-house and from outside. We plan to submit it
+#upstream, but do not yet have a target date for doing so
+#
+# HG changeset patch
+# Parent f8684a1d3f02b9cc10a686daa8659805384ba51a
+25759055 OFUV (Userland) support for XRC APIs
+
+diff -r f8684a1d3f02 Makefile.am
+--- a/Makefile.am Mon Nov 21 11:48:20 2016 -0800
++++ b/Makefile.am Mon Mar 20 14:32:42 2017 -0700
+@@ -45,7 +45,8 @@
+
+ libibverbsinclude_HEADERS = include/infiniband/arch.h include/infiniband/driver.h \
+ include/infiniband/kern-abi.h include/infiniband/opcode.h include/infiniband/verbs.h \
+- include/infiniband/sa-kern-abi.h include/infiniband/sa.h include/infiniband/marshall.h
++ include/infiniband/sa-kern-abi.h include/infiniband/sa.h include/infiniband/marshall.h \
++ include/infiniband/ofa_verbs.h
+
+ man_MANS = man/ibv_asyncwatch.1 man/ibv_devices.1 man/ibv_devinfo.1 \
+ man/ibv_shpd_pingpong.1 \
+@@ -64,7 +65,8 @@
+ man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3 \
+ man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3 \
+ man/ibv_create_qp_ex.3 man/ibv_create_srq_ex.3 man/ibv_open_xrcd.3 \
+- man/ibv_get_srq_num.3 man/ibv_open_qp.3
++ man/ibv_get_srq_num.3 man/ibv_open_qp.3 man/ibv_create_xsrq.3 \
++ man/ibv_xsrq_pingpong.1
+
+ DEBIAN = debian/changelog debian/compat debian/control debian/copyright \
+ debian/ibverbs-utils.install debian/libibverbs1.install \
+diff -r f8684a1d3f02 Makefile.in
+--- a/Makefile.in Mon Nov 21 11:48:20 2016 -0800
++++ b/Makefile.in Mon Mar 20 14:32:42 2017 -0700
+@@ -476,7 +476,8 @@
+ libibverbsincludedir = $(includedir)/infiniband
+ libibverbsinclude_HEADERS = include/infiniband/arch.h include/infiniband/driver.h \
+ include/infiniband/kern-abi.h include/infiniband/opcode.h include/infiniband/verbs.h \
+- include/infiniband/sa-kern-abi.h include/infiniband/sa.h include/infiniband/marshall.h include/infiniband/ofa_solaris.h
++ include/infiniband/sa-kern-abi.h include/infiniband/sa.h include/infiniband/marshall.h \
++ include/infiniband/ofa_solaris.h include/infiniband/ofa_verbs.h
+
+ man_MANS = man/ibv_asyncwatch.1 man/ibv_devices.1 man/ibv_devinfo.1 \
+ man/ibv_shpd_pingpong.1 \
+@@ -495,7 +496,8 @@
+ man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3 \
+ man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3 \
+ man/ibv_create_qp_ex.3 man/ibv_create_srq_ex.3 man/ibv_open_xrcd.3 \
+- man/ibv_get_srq_num.3 man/ibv_open_qp.3
++ man/ibv_get_srq_num.3 man/ibv_open_qp.3 man/ibv_create_xsrq.3 \
++ man/ibv_xsrq_pingpong.1
+
+ DEBIAN = debian/changelog debian/compat debian/control debian/copyright \
+ debian/ibverbs-utils.install debian/libibverbs1.install \
+diff -r f8684a1d3f02 include/infiniband/ofa_verbs.h
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/include/infiniband/ofa_verbs.h Mon Mar 20 14:32:42 2017 -0700
+@@ -0,0 +1,140 @@
++/*
++ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
++ * Copyright (c) 2004, 2011-2012 Intel Corporation. All rights reserved.
++ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
++ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef INFINIBAND_OFA_VERBS_H
++#define INFINIBAND_OFA_VERBS_H
++
++struct ibv_srq_init_attr;
++struct ibv_cq;
++struct ibv_pd;
++struct ibv_qp_init_attr;
++struct ibv_qp_attr;
++
++
++#ifdef __GNUC__
++#define DEPRECATED __attribute__((deprecated))
++#else
++#define DEPRECATED
++#endif
++
++/* XRC compatability layer */
++#define LEGACY_XRC_SRQ_HANDLE 0xffffffff
++
++struct ibv_xrc_domain {
++ struct ibv_context *context;
++ uint32_t handle;
++};
++
++struct ibv_srq_legacy {
++ struct ibv_context *context;
++ void *srq_context;
++ struct ibv_pd *pd;
++ uint32_t handle;
++
++ uint32_t events_completed;
++
++ uint32_t xrc_srq_num_bin_compat;
++ struct ibv_xrc_domain *xrc_domain_bin_compat;
++ struct ibv_cq *xrc_cq_bin_compat;
++
++ pthread_mutex_t mutex;
++ pthread_cond_t cond;
++
++ void *ibv_srq;
++ /*
++ * Below fields are for legacy source compatibility. They reside
++ * on the same offset as of those fields in struct ibv_srq.
++ */
++ uint32_t xrc_srq_num;
++ struct ibv_xrc_domain *xrc_domain;
++ struct ibv_cq *xrc_cq;
++};
++
++/**
++ * ibv_open_xrc_domain - open an XRC domain
++ * Returns a reference to an XRC domain.
++ *
++ * @context: Device context
++ * @fd: descriptor for inode associated with the domain
++ * If fd == -1, no inode is associated with the domain; in this ca= se,
++ * the only legal value for oflag is O_CREAT
++ *
++ * @oflag: oflag values are constructed by OR-ing flags from the following list
++ *
++ * O_CREAT
++ * If a domain belonging to device named by context is already associated
++ * with the inode, this flag has no effect, except as noted under O_EXCL
++ * below. Otherwise, a new XRC domain is created and is associated with
++ * inode specified by fd.
++ *
++ * O_EXCL
++ * If O_EXCL and O_CREAT are set, open will fail if a domain associated with
++ * the inode exists. The check for the existence of the domain and creation
++ * of the domain if it does not exist is atomic with respect to other
++ * processes executing open with fd naming the same inode.
++ */
++struct ibv_xrc_domain *ibv_open_xrc_domain(struct ibv_context *context,
++ int fd, int oflag) DEPRECATED;
++
++/**
++ * ibv_create_xrc_srq - Creates a SRQ associated with the specified protection
++ * domain and xrc domain.
++ * @pd: The protection domain associated with the SRQ.
++ * @xrc_domain: The XRC domain associated with the SRQ.
++ * @xrc_cq: CQ to report completions for XRC packets on.
++ *
++ * @srq_init_attr: A list of initial attributes required to create the SRQ.
++ *
++ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
++ * requested size of the SRQ, and set to the actual values allocated
++ * on return. If ibv_create_srq() succeeds, then max_wr and max_sge
++ * will always be at least as large as the requested values.
++ */
++struct ibv_srq *ibv_create_xrc_srq(struct ibv_pd *pd,
++ struct ibv_xrc_domain *xrc_domain,
++ struct ibv_cq *xrc_cq,
++ struct ibv_srq_init_attr *srq_init_attr) DEPRECATED;
++
++/**
++ * ibv_close_xrc_domain - close an XRC domain
++ * If this is the last reference, destroys the domain.
++ *
++ * @d: reference to XRC domain to close
++ *
++ * close is implicitly performed at process exit.
++ */
++int ibv_close_xrc_domain(struct ibv_xrc_domain *d) DEPRECATED;
++
++#endif
+diff -r f8684a1d3f02 include/infiniband/verbs.h
+--- a/include/infiniband/verbs.h Mon Nov 21 11:48:20 2016 -0800
++++ b/include/infiniband/verbs.h Mon Mar 20 14:32:42 2017 -0700
+@@ -42,6 +42,7 @@
+ #include <errno.h>
+ #if defined(__SVR4) && defined(__sun)
+ #include <infiniband/ofa_solaris.h>
++#include <infiniband/ofa_verbs.h>
+ #endif
+
+ #ifdef __cplusplus
+@@ -252,6 +253,8 @@
+ struct ibv_srq *srq;
+ int port_num;
+ union ibv_gid gid;
++ /* For source compatibility with legacy API */
++ uint32_t xrc_qp_num;
+ } element;
+ enum ibv_event_type event_type;
+ };
+@@ -507,6 +510,7 @@
+ IBV_QPT_RC = 2,
+ IBV_QPT_UC,
+ IBV_QPT_UD,
++ IBV_QPT_XRC, /* XRC legacy compatible type */
+ IBV_QPT_RAW_PACKET = 8,
+ IBV_QPT_XRC_SEND = 9,
+ IBV_QPT_XRC_RECV
+@@ -536,6 +540,8 @@
+ struct ibv_qp_cap cap;
+ enum ibv_qp_type qp_type;
+ int sq_sig_all;
++ /* Below is needed for legacy compatibility */
++ struct ibv_xrc_domain *xrc_domain;
+ };
+
+ enum ibv_qp_init_attr_mask {
+@@ -692,10 +698,14 @@
+ } ud;
+ } wr;
+ union {
+- struct {
+- uint32_t remote_srqn;
+- } xrc;
+- } qp_type;
++ union {
++ struct {
++ uint32_t remote_srqn;
++ } xrc;
++ } qp_type;
++
++ uint32_t xrc_remote_srq_num;
++ };
+ };
+
+ struct ibv_recv_wr {
+@@ -723,6 +733,25 @@
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ uint32_t events_completed;
++
++ /*
++ * Below is for source compatibility with legacy XRC APIs.
++ * Padding is based on ibv_srq_legacy.
++ */
++ uint32_t xrc_srq_num_bin_compat_padding;
++ struct ibv_xrc_domain *xrc_domain_bin_compat_padding;
++ struct ibv_cq *xrc_cq_bin_compat_padding;
++ void *ibv_srq_padding;
++
++ /* legacy fields */
++ uint32_t xrc_srq_num;
++ struct ibv_xrc_domain *xrc_domain;
++ struct ibv_cq *xrc_cq;
++};
++
++/* XRC source compat layer */
++enum ibv_event_flags {
++ IBV_XRC_QP_EVENT_FLAG = 0x80000000,
+ };
+
+ struct ibv_qp {
+@@ -996,6 +1025,8 @@
+
+ struct verbs_context {
+ /* "grows up" - new fields go here */
++ void * (*drv_get_legacy_xrc) (struct ibv_srq *ibv_srq);
++ void (*drv_set_legacy_xrc) (struct ibv_srq *ibv_srq, void *legacy_xrc);
+ int (*drv_ibv_destroy_flow) (struct ibv_flow *flow);
+ int (*lib_ibv_destroy_flow) (struct ibv_flow *flow);
+ struct ibv_flow * (*drv_ibv_create_flow) (struct ibv_qp *qp,
+diff -r f8684a1d3f02 man/ibv_create_qp_ex.3
+--- a/man/ibv_create_qp_ex.3 Mon Nov 21 11:48:20 2016 -0800
++++ b/man/ibv_create_qp_ex.3 Mon Mar 20 14:32:42 2017 -0700
+@@ -28,7 +28,7 @@
+ struct ibv_cq *recv_cq; /* CQ to be associated with the Receive Queue (RQ) */
+ struct ibv_srq *srq; /* SRQ handle if QP is to be associated with an SRQ, otherwise NULL */
+ struct ibv_qp_cap cap; /* QP capabilities */
+-enum ibv_qp_type qp_type; /* QP Transport Service Type: IBV_QPT_RC, IBV_QPT_UC, IBV_QPT_UD or IBV_QPT_RAW_PACKET */
++enum ibv_qp_type qp_type; /* QP Transport Service Type: IBV_QPT_RC, IBV_QPT_XRC_SEND, IBV_QPT_XRC_RECV, IBV_QPT_UC, IBV_QPT_UD or IBV_QPT_RAW_PACKET */
+ int sq_sig_all; /* If set, each Work Request (WR) submitted to the SQ generates a completion entry */
+ uint32_t comp_mask; /* Identifies valid fields */
+ struct ibv_pd *pd; /* PD to be associated with the QP */
+diff -r f8684a1d3f02 man/ibv_create_xsrq.3
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/man/ibv_create_xsrq.3 Mon Mar 20 14:32:42 2017 -0700
+@@ -0,0 +1,87 @@
++.\" -*- nroff -*-
++.\"
++.TH IBV_CREATE_XSRQ 3 2011-06-17 libibverbs "Libibverbs Programmer's Manual"
++.SH "NAME"
++ibv_create_xsrq, ibv_destroy_srq \- create or destroy a shared receive queue (SRQ)
++.SH "SYNOPSIS"
++.nf
++.B #include
++.sp
++.BI "struct ibv_srq *ibv_create_xsrq(struct ibv_pd " "*pd" ", struct "
++.BI " ibv_srq_init_attr " "*srq_init_attr"
++);
++.sp
++.BI "int ibv_destroy_srq(struct ibv_srq " "*srq" );
++.fi
++.SH "DESCRIPTION"
++.B ibv_create_xsrq()
++creates a shared receive queue (SRQ) associated with the protection domain
++.I pd\fR.
++The argument
++.I srq_init_attr
++is an ibv_srq_init_attr struct, as defined in .
++.PP
++.nf
++struct ibv_srq_init_attr {
++.in +8
++void *srq_context; /* Associated context of the SRQ
++*/
++struct ibv_srq_attr attr; /* SRQ attributes */
++enum ibv_srq_type srq_type; /* Specifies type of SRQ to create
++*/
++union {
++.in +8
++struct {
++.in +8
++struct ibv_xrcd *xrcd; /* XRC domain associated with an XRC SRQ */
++struct ibv_cq *cq; /* completion queue for an XRC SRQ*/
++.in -8
++} xrc; /* Extended attributes for IBV_SRQT_XRC type SRQs */
++.in -8
++} ext;
++.in -8
++};
++.sp
++.nf
++struct ibv_srq_attr {
++.in +8
++uint32_t max_wr; /* Requested max number of
++outstanding work requests (WRs) in the SRQ */
++uint32_t max_sge; /* Requested max number of scatter
++elements per WR */
++uint32_t srq_limit; /* The limit value of the SRQ
++(ignored for ibv_create_srq) */
++.in -8
++};
++.fi
++.PP
++The function
++.B ibv_create_xsrq()
++will update the
++.I srq_init_attr
++struct with the original values of the SRQ that was created; the
++values of max_wr and max_sge will be greater than or equal to the
++values requested.
++.PP
++.B ibv_destroy_srq()
++destroys the SRQ
++.I srq\fR.
++.SH "RETURN VALUE"
++.B ibv_create_xsrq()
++returns a pointer to the created SRQ, or NULL if the request fails.
++.PP
++.B ibv_destroy_srq()
++returns 0 on success, or the value of errno on failure (which indicates
++the failure reason).
++.SH "NOTES"
++.B ibv_destroy_srq()
++fails if any queue pair is still associated with this SRQ.
++.SH "SEE ALSO"
++.BR ibv_alloc_pd (3),
++.BR ibv_create_cq (3),
++.BR ibv_open_xrcd (3),
++.BR ibv_modify_srq (3),
++.BR ibv_query_srq (3)
++.SH "AUTHORS"
++.TP
++Sean Hefty
+diff -r f8684a1d3f02 man/ibv_xsrq_pingpong.1
+--- /dev/null Thu Jan 01 00:00:00 1970 +0000
++++ b/man/ibv_xsrq_pingpong.1 Mon Mar 20 14:32:42 2017 -0700
+@@ -0,0 +1,71 @@
++.TH IBV_XSRQ_PINGPONG 1 "May 24, 2016" "libibverbs" "USER COMMANDS"
++
++.SH NAME
++ibv_xsrq_pingpong \- simple InfiniBand shared receive queue test
++
++.SH SYNOPSIS
++.B ibv_xsrq_pingpong
++[\-p port] [\-d device] [\-i ib port] [\-s size] [\-m mtu] [\-c clients]
++[\-n num_tests] [\-l sl] [\-e] \fBHOSTNAME\fR
++
++.B ibv_xsrq_pingpong
++[\-p port] [\-d device] [\-i ib port] [\-s size] [\-m mtu] [\-c clients]
++[\-n num_tests] [\-l sl] [\-e]
++
++.SH DESCRIPTION
++.PP
++Run a simple ping-pong test over InfiniBand via the extended reliable
++connected (XRC) transport service, using a shared receive queue (SRQ).
++
++.SH OPTIONS
++
++.PP
++.TP
++\fB\-p\fR, \fB\-\-port\fR=\fIPORT\fR
++use TCP port \fIPORT\fR for initial synchronization (default 18515)
++.TP
++\fB\-d\fR, \fB\-\-ib\-dev\fR=\fIDEVICE\fR
++use IB device \fIDEVICE\fR (default first device found)
++.TP
++\fB\-i\fR, \fB\-\-ib\-port\fR=\fIPORT\fR
++use IB port \fIPORT\fR (default port 1)
++.TP
++\fB\-s\fR, \fB\-\-size\fR=\fISIZE\fR
++ping-pong messages of size \fISIZE\fR (default 4096)
++.TP
++\fB\-m\fR, \fB\-\-mtu\fR=\fIMTU\fR
++use path mtu of size \fIMTU\fR (default 2048)
++.TP
++\fB\-c\fR, \fB\-\-clients\fR=\fICLIENTS\fR
++number of clients \fICLIENTS\fR (on server only, default 1)
++.TP
++\fB\-n\fR, \fB\-\-num\-tests\fR=\fINUM_TESTS\fR
++perform \fINUM_TESTS\fR tests per client (default 5)
++.TP
++\fB\-l\fR, \fB\-\-sl\fR=\fISL\fR
++use \fISL\fR as the service level value (default 0)
++.TP
++\fB\-e\fR, \fB\-\-events\fR
++sleep while waiting for work completion events (default is to poll for
++completions)
++
++.SH SEE ALSO
++.BR ibv_rc_pingpong (1),
++.BR ibv_uc_pingpong (1),
++.BR ibv_ud_pingpong (1)
++.BR ibv_srq_pingpong (1)
++
++.SH AUTHORS
++.TP
++Roland Dreier
++.RI < [email protected] >
++.TP
++Jarod Wilson
++.RI < [email protected] >
++
++.SH BUGS
++The network synchronization between client and server instances is
++weak, and does not prevent incompatible options from being used on the
++two instances. The method used for retrieving work completions is not
++strictly correct, and race conditions may cause failures on some
++systems.
+diff -r f8684a1d3f02 src/cmd.c
+--- a/src/cmd.c Mon Nov 21 11:48:20 2016 -0800
++++ b/src/cmd.c Mon Mar 20 14:32:42 2017 -0700
+@@ -815,6 +815,7 @@
+ cmd->user_handle = (uintptr_t) qp;
+
+ if (attr_ex->comp_mask & IBV_QP_INIT_ATTR_XRCD) {
++ /* XRC receive side */
+ vxrcd = container_of(attr_ex->xrcd, struct verbs_xrcd, xrcd);
+ cmd->pd_handle = vxrcd->handle;
+ } else {
+@@ -824,7 +825,9 @@
+ cmd->pd_handle = attr_ex->pd->handle;
+ cmd->send_cq_handle = attr_ex->send_cq->handle;
+
+- if (attr_ex->qp_type != IBV_QPT_XRC_SEND) {
++ /* XRC sender doesn't have a receive cq */
++ if (attr_ex->qp_type != IBV_QPT_XRC_SEND &&
++ attr_ex->qp_type != IBV_QPT_XRC) {
+ cmd->recv_cq_handle = attr_ex->recv_cq->handle;
+ cmd->srq_handle = attr_ex->srq ? attr_ex->srq->handle : 0;
+ }
+@@ -847,7 +850,8 @@
+ #else
+ cmd->sq_sig_all = attr_ex->sq_sig_all;
+ #endif
+- cmd->qp_type = attr_ex->qp_type;
++ cmd->qp_type = (attr_ex->qp_type == IBV_QPT_XRC) ?
++ IBV_QPT_XRC_SEND : attr_ex->qp_type;
+ cmd->is_srq = !!attr_ex->srq;
+ cmd->reserved = 0;
+
+@@ -1215,6 +1219,9 @@
+ tmp->wr.ud.remote_qpn = i->wr.ud.remote_qpn;
+ tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey;
+ } else {
++ if (ibqp->qp_type == IBV_QPT_XRC_SEND)
++ tmp->qp_type.xrc.remote_srqn =
++ i->qp_type.xrc.remote_srqn;
+ switch (i->opcode) {
+ case IBV_WR_RDMA_WRITE:
+ case IBV_WR_RDMA_WRITE_WITH_IMM:
+diff -r f8684a1d3f02 src/device.c
+--- a/src/device.c Mon Nov 21 11:48:20 2016 -0800
++++ b/src/device.c Mon Mar 20 14:32:42 2017 -0700
+@@ -261,6 +261,9 @@
+ struct ibv_async_event *event)
+ {
+ struct ibv_kern_async_event ev;
++ struct verbs_context *vctx;
++ struct ibv_srq_legacy *ibv_srq_legacy = NULL;
++ struct ibv_qp *qp;
+
+ if (read(context->async_fd, &ev, sizeof ev) != sizeof ev)
+ return -1;
+@@ -281,11 +284,24 @@
+ case IBV_EVENT_PATH_MIG_ERR:
+ case IBV_EVENT_QP_LAST_WQE_REACHED:
+ event->element.qp = (void *) (uintptr_t) ev.element;
++ qp = ibv_find_xrc_qp(event->element.qp->qp_num);
++ if (qp) {
++ /* This is an XRC receive QP created by the legacy API */
++ event->event_type |= IBV_XRC_QP_EVENT_FLAG;
++ event->element.qp = NULL;
++ event->element.xrc_qp_num = qp->qp_num;
++ }
+ break;
+
+ case IBV_EVENT_SRQ_ERR:
+ case IBV_EVENT_SRQ_LIMIT_REACHED:
+- event->element.srq = (void *) (uintptr_t) ev.element;
++ vctx = verbs_get_ctx_op(context, drv_get_legacy_xrc);
++ if (vctx)
++ ibv_srq_legacy =
++ vctx->drv_get_legacy_xrc((void *) (uintptr_t) ev.element);
++
++ event->element.srq = (ibv_srq_legacy) ? (void *)ibv_srq_legacy :
++ (void *) (uintptr_t) ev.element;
+ break;
+ case IBV_EVENT_GID_AVAIL:
+ case IBV_EVENT_GID_UNAVAIL:
+@@ -310,6 +326,12 @@
+
+ void __ibv_ack_async_event(struct ibv_async_event *event)
+ {
++ int is_legacy_xrc = 0;
++ if (event->event_type & IBV_XRC_QP_EVENT_FLAG) {
++ event->event_type ^= IBV_XRC_QP_EVENT_FLAG;
++ is_legacy_xrc = 1;
++ }
++
+ switch (event->event_type) {
+ case IBV_EVENT_CQ_ERR:
+ {
+@@ -334,6 +356,16 @@
+ {
+ struct ibv_qp *qp = event->element.qp;
+
++ if (is_legacy_xrc) {
++ qp = ibv_find_xrc_qp(event->element.xrc_qp_num);
++ if (!qp || qp->qp_num != event->element.xrc_qp_num) {
++ fprintf(stderr, PFX "Warning: ibv_ack_async_event, "
++ "XRC qpn=%u wasn't found\n",
++ event->element.xrc_qp_num);
++ return;
++ }
++ }
++
+ pthread_mutex_lock(&qp->mutex);
+ ++qp->events_completed;
+ pthread_cond_signal(&qp->cond);
+@@ -347,6 +379,12 @@
+ {
+ struct ibv_srq *srq = event->element.srq;
+
++ if (srq->handle == LEGACY_XRC_SRQ_HANDLE) {
++ struct ibv_srq_legacy *ibv_srq_legacy =
++ (struct ibv_srq_legacy *) srq;
++ srq = ibv_srq_legacy->ibv_srq;
++ }
++
+ pthread_mutex_lock(&srq->mutex);
+ ++srq->events_completed;
+ pthread_cond_signal(&srq->cond);
+diff -r f8684a1d3f02 src/ibverbs.h
+--- a/src/ibverbs.h Mon Nov 21 11:48:20 2016 -0800
++++ b/src/ibverbs.h Mon Mar 20 14:32:42 2017 -0700
+@@ -85,6 +85,7 @@
+ extern HIDDEN int abi_ver;
+
+ HIDDEN int ibverbs_init(struct ibv_device ***list);
++HIDDEN struct ibv_qp *ibv_find_xrc_qp(uint32_t qpn);
+
+ #define IBV_INIT_CMD(cmd, size, opcode) \
+ do { \
+diff -r f8684a1d3f02 src/libibverbs.map
+--- a/src/libibverbs.map Mon Nov 21 11:48:20 2016 -0800
++++ b/src/libibverbs.map Mon Mar 20 14:32:42 2017 -0700
+@@ -124,4 +124,7 @@
+ ibv_cmd_create_qp_ex;
+ ibv_cmd_open_qp;
+
++ ibv_open_xrc_domain;
++ ibv_create_xrc_srq;
++ ibv_close_xrc_domain;
+ } IBVERBS_1.0;
+diff -r f8684a1d3f02 src/verbs.c
+--- a/src/verbs.c Mon Nov 21 11:48:20 2016 -0800
++++ b/src/verbs.c Mon Mar 20 14:32:42 2017 -0700
+@@ -41,6 +41,7 @@
+ #include <stdlib.h>
+ #include <errno.h>
+ #include <string.h>
++#include <search.h>
+ #if defined(__SVR4) && defined(__sun)
+ #include <fcntl.h>
+ #include <sys/stat.h>
+@@ -854,3 +855,208 @@
+ return qp->context->ops.detach_mcast(qp, gid, lid);
+ }
+ default_symver(__ibv_detach_mcast, ibv_detach_mcast);
++
++
++/* XRC compatibility layer */
++struct ibv_xrc_domain *ibv_open_xrc_domain(struct ibv_context *context,
++ int fd, int oflag)
++{
++
++ struct ibv_xrcd *ibv_xrcd;
++ struct ibv_xrcd_init_attr xrcd_init_attr;
++
++ memset(&xrcd_init_attr, 0, sizeof(xrcd_init_attr));
++
++ xrcd_init_attr.fd = fd;
++ xrcd_init_attr.oflags = oflag;
++
++ xrcd_init_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD |
++ IBV_XRCD_INIT_ATTR_OFLAGS;
++
++ ibv_xrcd = ibv_open_xrcd(context, &xrcd_init_attr);
++ if (!ibv_xrcd)
++ return NULL;
++
++ return (struct ibv_xrc_domain *)ibv_xrcd;
++
++}
++
++
++struct ibv_srq *ibv_create_xrc_srq(struct ibv_pd *pd,
++ struct ibv_xrc_domain *xrc_domain,
++ struct ibv_cq *xrc_cq,
++ struct ibv_srq_init_attr *srq_init_attr)
++{
++
++ struct ibv_srq_init_attr_ex ibv_srq_init_attr_ex;
++ struct ibv_srq_legacy *ibv_srq_legacy;
++ struct ibv_srq *ibv_srq;
++ uint32_t xrc_srq_num;
++ struct verbs_context *vctx;
++
++ vctx = verbs_get_ctx_op(pd->context, drv_set_legacy_xrc);
++ if (!vctx) {
++ errno = ENOSYS;
++ return NULL;
++ }
++ memset(&ibv_srq_init_attr_ex, 0, sizeof ibv_srq_init_attr_ex);
++
++ ibv_srq_init_attr_ex.xrcd = (struct ibv_xrcd *)xrc_domain;
++ ibv_srq_init_attr_ex.comp_mask = IBV_SRQ_INIT_ATTR_XRCD |
++ IBV_SRQ_INIT_ATTR_TYPE |
++ IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD;
++
++ ibv_srq_init_attr_ex.cq = xrc_cq;
++ ibv_srq_init_attr_ex.pd = pd;
++ ibv_srq_init_attr_ex.srq_type = IBV_SRQT_XRC;
++
++ ibv_srq_init_attr_ex.attr.max_sge = srq_init_attr->attr.max_sge;
++ ibv_srq_init_attr_ex.attr.max_wr = srq_init_attr->attr.max_wr;
++ ibv_srq_init_attr_ex.attr.srq_limit = srq_init_attr->attr.srq_limit;
++ ibv_srq_init_attr_ex.srq_context = srq_init_attr->srq_context;
++
++ ibv_srq = ibv_create_srq_ex(pd->context, &ibv_srq_init_attr_ex);
++ if (!ibv_srq)
++ return NULL;
++
++ if (ibv_srq->handle == LEGACY_XRC_SRQ_HANDLE) {
++
++ struct ibv_srq *ibv_srq_tmp = ibv_srq;
++ int ret;
++
++ ibv_srq = ibv_create_srq_ex(pd->context, &ibv_srq_init_attr_ex);
++
++ ret = ibv_destroy_srq(ibv_srq_tmp);
++ if (ret) {
++ fprintf(stderr, PFX "ibv_create_xrc_srq, "
++ "fail to destroy intermediate srq\n");
++ return NULL;
++ }
++
++ if (!ibv_srq)
++ return NULL;
++
++ if (ibv_srq->handle == LEGACY_XRC_SRQ_HANDLE) {
++ ret = ibv_destroy_srq(ibv_srq);
++ if (ret)
++ fprintf(stderr, PFX "ibv_create_xrc_srq, "
++ "fail to destroy intermediate srq\n");
++ errno = EAGAIN;
++ return NULL;
++ }
++ }
++
++ ibv_srq_legacy = calloc(1, sizeof(*ibv_srq_legacy));
++ if (!ibv_srq_legacy) {
++ errno = ENOMEM;
++ goto err;
++ }
++
++ if (ibv_get_srq_num(ibv_srq, &xrc_srq_num))
++ goto err_free;
++
++ ibv_srq_legacy->ibv_srq = ibv_srq;
++ ibv_srq_legacy->xrc_srq_num = xrc_srq_num;
++
++ /* Setting the bin compat fields */
++ ibv_srq_legacy->xrc_srq_num_bin_compat = xrc_srq_num;
++ ibv_srq_legacy->xrc_domain_bin_compat = xrc_domain;
++ ibv_srq_legacy->xrc_cq_bin_compat = xrc_cq;
++ ibv_srq_legacy->context = pd->context;
++ ibv_srq_legacy->srq_context = srq_init_attr->srq_context;
++ ibv_srq_legacy->pd = pd;
++ /*
++ * Set an indication that this is a legacy structure. For legacy structures,
++ * we should use the internal ibv_srq.
++ */
++ ibv_srq_legacy->handle = LEGACY_XRC_SRQ_HANDLE;
++ ibv_srq_legacy->xrc_domain = xrc_domain;
++ ibv_srq_legacy->xrc_cq = xrc_cq;
++ ibv_srq_legacy->events_completed = 0;
++
++ srq_init_attr->attr.max_wr = ibv_srq_init_attr_ex.attr.max_wr;
++ srq_init_attr->attr.max_sge = ibv_srq_init_attr_ex.attr.max_sge;
++
++ vctx->drv_set_legacy_xrc(ibv_srq, ibv_srq_legacy);
++ return (struct ibv_srq *)(ibv_srq_legacy);
++
++err_free:
++ free(ibv_srq_legacy);
++err:
++ ibv_destroy_srq(ibv_srq);
++ return NULL;
++
++}
++
++static pthread_mutex_t xrc_tree_mutex = PTHREAD_MUTEX_INITIALIZER;
++static void *ibv_xrc_qp_tree;
++
++static int xrc_qp_compare(const void *a, const void *b)
++{
++
++ if ((*(uint32_t *) a) < (*(uint32_t *) b))
++ return -1;
++ else if ((*(uint32_t *) a) > (*(uint32_t *) b))
++ return 1;
++ else
++ return 0;
++
++}
++
++struct ibv_qp *ibv_find_xrc_qp(uint32_t qpn)
++{
++ uint32_t **qpn_ptr;
++ struct ibv_qp *ibv_qp = NULL;
++
++ pthread_mutex_lock(&xrc_tree_mutex);
++ qpn_ptr = tfind(&qpn, &ibv_xrc_qp_tree, xrc_qp_compare);
++ if (!qpn_ptr)
++ goto end;
++
++ ibv_qp = container_of(*qpn_ptr, struct ibv_qp, qp_num);
++
++end:
++ pthread_mutex_unlock(&xrc_tree_mutex);
++ return ibv_qp;
++}
++
++static int ibv_clear_xrc_qp(uint32_t qpn)
++{
++ uint32_t **qpn_ptr;
++ int ret = 0;
++
++ pthread_mutex_lock(&xrc_tree_mutex);
++ qpn_ptr = tdelete(&qpn, &ibv_xrc_qp_tree, xrc_qp_compare);
++ if (!qpn_ptr)
++ ret = EINVAL;
++
++ pthread_mutex_unlock(&xrc_tree_mutex);
++ return ret;
++}
++
++static int ibv_store_xrc_qp(struct ibv_qp *qp)
++{
++ uint32_t **qpn_ptr;
++ int ret = 0;
++
++ if (ibv_find_xrc_qp(qp->qp_num)) {
++ fprintf(stderr, PFX "ibv_store_xrc_qp failed, qpn=%u is already stored\n",
++ qp->qp_num);
++ return EEXIST;
++ }
++
++ pthread_mutex_lock(&xrc_tree_mutex);
++ qpn_ptr = tsearch(&qp->qp_num, &ibv_xrc_qp_tree, xrc_qp_compare);
++ if (!qpn_ptr)
++ ret = EINVAL;
++
++ pthread_mutex_unlock(&xrc_tree_mutex);
++ return ret;
++
++}
++
++int ibv_close_xrc_domain(struct ibv_xrc_domain *d)
++{
++ struct ibv_xrcd *ibv_xrcd = (struct ibv_xrcd *)d;
++ return ibv_close_xrcd(ibv_xrcd);
++}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/open-fabrics/libmlx4/patches/004-libmlx4-blueflame.patch Thu Apr 13 20:30:48 2017 -0700
@@ -0,0 +1,20 @@
+#This patch was developed both in-house and from outside. We plan to submit it
+#upstream, but do not yet have a target date for doing so
+#
+# HG changeset patch
+# Parent 710c2a999a0a5469c1dd4ea1c21b279debb26a3b
+24617163 Missing code in libmlx4 when upgrading from ofed-1.5.3 to ofed-3.18
+
+diff -r 710c2a999a0a src/qp.c
+--- a/src/qp.c Fri Sep 16 10:24:06 2016 -0700
++++ b/src/qp.c Tue Sep 20 08:12:11 2016 -0700
+@@ -405,7 +405,8 @@
+ out:
+ ctx = to_mctx(ibqp->context);
+
+- if (nreq == 1 && inl && size > 1 && size <= ctx->bf_buf_size / 16) {
++ if (ctx->ibv_ctx.device->blueflame_enabled && nreq == 1 && inl &&
++ size > 1 && size <= ctx->bf_buf_size / 16) {
+ ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8);
+ *(uint32_t *) ctrl->reserved |= qp->doorbell_qpn;
+ /*
--- a/components/open-fabrics/libmlx4/patches/004-libmlx4-remove-xrc.patch Thu Apr 13 13:20:29 2017 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-#This patch was developed both in-house and from outside. We plan to submit it
-#upstream, but do not yet have a target date for doing so
-#
-# HG changeset patch
-# Parent 56ebd417e6ed24cfa11c23bd564417ec7d2e5812
-Disable xrc routines in libmlx4
-
-diff -r 56ebd417e6ed src/srq.c
---- a/src/srq.c Wed Dec 02 15:27:52 2015 -0800
-+++ b/src/srq.c Wed Dec 02 15:47:02 2015 -0800
-@@ -286,6 +286,13 @@
- struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
- struct ibv_srq_init_attr_ex *attr_ex)
- {
-+#if defined(__SVR4) && defined(__sun)
-+ /*
-+ * Not supported by Solaris kernel driver. When/if supported
-+ * this routine will need to be ported.
-+ */
-+ return NULL;
-+#else
- struct mlx4_create_xsrq cmd;
- struct mlx4_create_srq_resp resp;
- struct mlx4_srq *srq;
-@@ -362,10 +369,18 @@
- err:
- free(srq);
- return NULL;
-+#endif
- }
-
- int mlx4_destroy_xrc_srq(struct ibv_srq *srq)
- {
-+#if defined(__SVR4) && defined(__sun)
-+ /*
-+ * Not supported by Solaris kernel driver. When/if supported
-+ * this routine will need to be ported.
-+ */
-+ return NULL;
-+#else
- struct mlx4_context *mctx = to_mctx(srq->context);
- struct mlx4_srq *msrq = to_msrq(srq);
- struct mlx4_cq *mcq;
-@@ -391,4 +406,5 @@
- free(msrq);
-
- return 0;
-+#endif
- }
-diff -r 56ebd417e6ed src/verbs.c
---- a/src/verbs.c Wed Dec 02 15:27:52 2015 -0800
-+++ b/src/verbs.c Wed Dec 02 15:47:02 2015 -0800
-@@ -151,6 +151,13 @@
- struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
- struct ibv_xrcd_init_attr *attr)
- {
-+#if defined(__SVR4) && defined(__sun)
-+ /*
-+ * Not supported by Solaris kernel driver. When/if supported
-+ * this routine will need to be ported.
-+ */
-+ return NULL;
-+#else
- struct ibv_open_xrcd cmd;
- struct ibv_open_xrcd_resp resp;
- struct verbs_xrcd *xrcd;
-@@ -170,10 +177,18 @@
- err:
- free(xrcd);
- return NULL;
-+#endif
- }
-
- int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
- {
-+#if defined(__SVR4) && defined(__sun)
-+ /*
-+ * Not supported by Solaris kernel driver. When/if supported
-+ * this routine will need to be ported.
-+ */
-+ return NULL;
-+#else
- struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
- int ret;
-
-@@ -182,6 +197,7 @@
- free(xrcd);
-
- return ret;
-+#endif
- }
-
- struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-@@ -1031,6 +1047,13 @@
-
- struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr)
- {
-+#if defined(__SVR4) && defined(__sun)
-+ /*
-+ * Not supported by Solaris kernel driver. When/if supported
-+ * this routine will need to be ported.
-+ */
-+ return NULL;
-+#else
- struct ibv_open_qp cmd;
- struct ibv_create_qp_resp resp;
- struct mlx4_qp *qp;
-@@ -1050,6 +1073,7 @@
- err:
- free(qp);
- return NULL;
-+#endif
- }
-
- int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
--- a/components/open-fabrics/libmlx4/patches/005-libmlx4-blueflame.patch Thu Apr 13 13:20:29 2017 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#This patch was developed both in-house and from outside. We plan to submit it
-#upstream, but do not yet have a target date for doing so
-#
-# HG changeset patch
-# Parent 710c2a999a0a5469c1dd4ea1c21b279debb26a3b
-24617163 Missing code in libmlx4 when upgrading from ofed-1.5.3 to ofed-3.18
-
-diff -r 710c2a999a0a src/qp.c
---- a/src/qp.c Fri Sep 16 10:24:06 2016 -0700
-+++ b/src/qp.c Tue Sep 20 08:12:11 2016 -0700
-@@ -405,7 +405,8 @@
- out:
- ctx = to_mctx(ibqp->context);
-
-- if (nreq == 1 && inl && size > 1 && size <= ctx->bf_buf_size / 16) {
-+ if (ctx->ibv_ctx.device->blueflame_enabled && nreq == 1 && inl &&
-+ size > 1 && size <= ctx->bf_buf_size / 16) {
- ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8);
- *(uint32_t *) ctrl->reserved |= qp->doorbell_qpn;
- /*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/open-fabrics/libmlx4/patches/005-libmlx4-xrc.patch Thu Apr 13 20:30:48 2017 -0700
@@ -0,0 +1,423 @@
+#This patch was developed both in-house and from outside. We plan to submit it
+#upstream, but do not yet have a target date for doing so
+#
+# HG changeset patch
+# Parent 90d898abcac39d3fc4a631a678f0bb7bbe28d877
+25759055 OFUV (Userland) support for XRC APIs
+
+diff -r 90d898abcac3 src/mlx4.c
+--- a/src/mlx4.c Mon Nov 21 11:48:10 2016 -0800
++++ b/src/mlx4.c Mon Mar 20 14:22:58 2017 -0700
+@@ -274,6 +274,8 @@
+ verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp);
+ verbs_set_ctx_op(verbs_ctx, drv_ibv_create_flow, ibv_cmd_create_flow);
+ verbs_set_ctx_op(verbs_ctx, drv_ibv_destroy_flow, ibv_cmd_destroy_flow);
++ verbs_set_ctx_op(verbs_ctx, drv_set_legacy_xrc, mlx4_set_legacy_xrc);
++ verbs_set_ctx_op(verbs_ctx, drv_get_legacy_xrc, mlx4_get_legacy_xrc);
+
+ return 0;
+
+diff -r 90d898abcac3 src/mlx4.h
+--- a/src/mlx4.h Mon Nov 21 11:48:10 2016 -0800
++++ b/src/mlx4.h Mon Mar 20 14:22:58 2017 -0700
+@@ -233,6 +233,7 @@
+ uint32_t *db;
+ uint16_t counter;
+ uint8_t ext_srq;
++ struct ibv_srq_legacy *ibv_srq_legacy;
+ };
+
+ struct mlx4_wq {
+@@ -464,4 +465,7 @@
+ struct mlx4_ah *ah);
+ void mlx4_free_av(struct mlx4_ah *ah);
+
++void *mlx4_get_legacy_xrc(struct ibv_srq *srq);
++void mlx4_set_legacy_xrc(struct ibv_srq *srq, void *legacy_xrc_srq);
++
+ #endif /* MLX4_H */
+diff -r 90d898abcac3 src/qp.c
+--- a/src/qp.c Mon Nov 21 11:48:10 2016 -0800
++++ b/src/qp.c Mon Mar 20 14:22:58 2017 -0700
+@@ -247,6 +247,7 @@
+
+ switch (ibqp->qp_type) {
+ case IBV_QPT_XRC_SEND:
++ case IBV_QPT_XRC:
+ ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr);
+ /* fall through */
+ case IBV_QPT_RC:
+@@ -559,6 +560,7 @@
+ break;
+
+ case IBV_QPT_XRC_SEND:
++ case IBV_QPT_XRC:
+ case IBV_QPT_RC:
+ size += sizeof (struct mlx4_wqe_raddr_seg);
+ /*
+@@ -596,9 +598,11 @@
+ qp->buf.buf = qpbuf;
+ qp->buf.length = buflen;
+
+- qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
+- if (!qp->sq.wrid)
+- return -1;
++ if (qp->sq.wqe_cnt) {
++ qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
++ if (!qp->sq.wrid)
++ return -1;
++ }
+
+ if (qp->rq.wqe_cnt) {
+ qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
+@@ -628,16 +632,20 @@
+ qp->sq.offset = 0;
+ }
+
+- if ((long int)qp->buf.length < (long int)qp->buf_size) {
+- fprintf(stderr, PFX "QP kernel buffer size %lu < user buf "
+- "size %d\n", (unsigned long)qp->buf.length, qp->buf_size);
+- }
+- if ((!rq_off && qp->rq.offset) || (!sq_off && qp->sq.offset)) {
+- fprintf(stderr, PFX "QP kernel and user out of sync on "
+- "buffer order\n");
+- }
++ if (qp->buf_size) {
++ if ((long int)qp->buf.length < (long int)qp->buf_size) {
++ fprintf(stderr, PFX "QP kernel buffer size %lu < user "
++ "buf size %d\n", (unsigned long)qp->buf.length,
++ qp->buf_size);
++ }
++ if ((!rq_off && qp->rq.offset) || (!sq_off && qp->sq.offset)) {
++ fprintf(stderr, PFX "QP kernel and user out of sync on "
++ "buffer order\n");
++ }
+
+- memset(qp->buf.buf, 0, qp->buf_size);
++ memset(qp->buf.buf, 0, qp->buf_size);
++ } else
++ qp->buf.buf = NULL;
+ return 0;
+ }
+ #endif
+@@ -705,6 +713,7 @@
+ break;
+
+ case IBV_QPT_XRC_SEND:
++ case IBV_QPT_XRC:
+ case IBV_QPT_UC:
+ case IBV_QPT_RC:
+ wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
+diff -r 90d898abcac3 src/srq.c
+--- a/src/srq.c Mon Nov 21 11:48:10 2016 -0800
++++ b/src/srq.c Mon Mar 20 14:22:58 2017 -0700
+@@ -66,13 +66,17 @@
+ struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr)
+ {
+- struct mlx4_srq *srq = to_msrq(ibsrq);
++ struct mlx4_srq *srq;
+ struct mlx4_wqe_srq_next_seg *next;
+ struct mlx4_wqe_data_seg *scat;
+ int err = 0;
+ int nreq;
+ int i;
+
++ if (ibsrq->handle == LEGACY_XRC_SRQ_HANDLE)
++ ibsrq = (struct ibv_srq *)(((struct ibv_srq_legacy *) ibsrq)->ibv_srq);
++
++ srq = to_msrq(ibsrq);
+ pthread_spin_lock(&srq->lock);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+@@ -290,6 +294,9 @@
+ struct mlx4_create_srq_resp resp;
+ struct mlx4_srq *srq;
+ int ret;
++#if defined(__SVR4) && defined(__sun)
++ void *srqbuf;
++#endif
+
+ /* Sanity check SRQ size before proceeding */
+ if (attr_ex->attr.max_wr > 1 << 16 || attr_ex->attr.max_sge > 64)
+@@ -342,9 +349,67 @@
+ attr_ex,
+ &cmd.ibv_cmd, sizeof cmd,
+ &resp.ibv_resp, sizeof resp);
++
++#if defined(__SVR4) && defined(__sun)
++ if (ret) {
++ goto err;
++ }
++
++ /*
++ * The kernel driver passes back mmap information for mapping the
++ * SRQ work queue memory it allocated and the doorbell for
++ * for posting.
++ */
++ if (resp.mdd.msrq_rev < 1) {
++ fprintf(stderr, PFX "libmlx4_create_xrc_srq libmlx4/hermon umap "
++ "rev mismatch (kernel rev=%d)\n", resp.mdd.msrq_rev);
++ goto err_destroy;
++ }
++
++ srqbuf = mmap64((void *)0, resp.mdd.msrq_maplen, (PROT_READ | PROT_WRITE),
++ MAP_SHARED, attr_ex->pd->context->mmap_fd, resp.mdd.msrq_mapoffset);
++
++ if (srqbuf == MAP_FAILED) {
++ goto err_destroy;
++ }
++
++ srq->buf.buf = srqbuf;
++ srq->buf.length = resp.mdd.msrq_maplen;
++ srq->max = resp.ibv_resp.max_wr;
++ srq->max_gs = resp.ibv_resp.max_sge;
++ srq->verbs_srq.srq_num = srq->srqn = resp.mdd.msrq_srqnum;
++ srq->counter = 0;
++
++ srq->db = mlx4_alloc_db(to_mctx(attr_ex->pd->context),
++ resp.mdd.msrq_rdbr_mapoffset,
++ resp.mdd.msrq_rdbr_maplen,
++ resp.mdd.msrq_rdbr_offset);
++ if (srq->db == NULL) {
++ goto err_unmap;
++ }
++
++ /*
++ * The following call only initializes memory and control structures,
++ * it utilizes the memory allocated by the kernel.
++ * It also allocates the srq->wrid memory.
++ */
++ if (mlx4_set_srq_buf(attr_ex->pd, srq, resp.mdd.msrq_wqesz,
++ resp.mdd.msrq_numwqe)) {
++ goto err_db;
++ }
++
++ /*
++ * The returned max wr will have been rounded up to the nearest
++ * power of 2, subtracting 1 from that and reporting that value
++ * as the max will give us the required free WR in the queue, as
++ * in OFED.
++ */
++ attr_ex->attr.max_wr -= 1;
++#else
+ if (ret)
+ goto err_db;
+
++#endif
+ ret = mlx4_store_xsrq(&to_mctx(context)->xsrq_table,
+ srq->verbs_srq.srq_num, srq);
+ if (ret)
+@@ -352,13 +417,35 @@
+
+ return &srq->verbs_srq.srq;
+
+-err_destroy:
+- ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
+ err_db:
+ mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, srq->db);
++
++#if defined(__SVR4) && defined(__sun)
++ if (srq->wrid)
++ free(srq->wrid);
++err_unmap:
++ mlx4_free_buf(&srq->buf);
++
++err_destroy:
++ /*
++ * Calling ibv_cmd_destroy_srq() will try and take the ibv_srq
++ * mutex that is initialised by the ibv_create_srq() entry point
++ * that called us AFTER we return, so its not initialised yet.
++ * So initialised it here so the destroy call doesn't hang.
++ */
++ pthread_mutex_init(&(srq->verbs_srq.srq.mutex), NULL);
++ pthread_cond_init(&(srq->verbs_srq.srq.cond), NULL);
++ srq->verbs_srq.srq.events_completed = 0;
++
++ ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
++#else
++err_destroy:
++ ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
+ err_free:
+ free(srq->wrid);
+ mlx4_free_buf(&srq->buf);
++#endif
++
+ err:
+ free(srq);
+ return NULL;
+diff -r 90d898abcac3 src/verbs.c
+--- a/src/verbs.c Mon Nov 21 11:48:10 2016 -0800
++++ b/src/verbs.c Mon Mar 20 14:22:58 2017 -0700
+@@ -549,6 +549,21 @@
+ return 0;
+ }
+
++void *mlx4_get_legacy_xrc(struct ibv_srq *srq)
++{
++ struct mlx4_srq *msrq = to_msrq(srq);
++
++ return msrq->ibv_srq_legacy;
++}
++
++void mlx4_set_legacy_xrc(struct ibv_srq *srq, void *legacy_xrc_srq)
++{
++ struct mlx4_srq *msrq = to_msrq(srq);
++
++ msrq->ibv_srq_legacy = legacy_xrc_srq;
++ return;
++}
++
+ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
+ struct ibv_srq_init_attr *attr)
+ {
+@@ -564,7 +579,7 @@
+ if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
+ return NULL;
+
+- srq = malloc(sizeof *srq);
++ srq = calloc(1, sizeof *srq);
+ if (!srq)
+ return NULL;
+
+@@ -724,6 +739,9 @@
+ {
+ struct ibv_modify_srq cmd;
+
++ if (srq->handle == LEGACY_XRC_SRQ_HANDLE)
++ srq = (struct ibv_srq *)(((struct ibv_srq_legacy *) srq)->ibv_srq);
++
+ #if !(defined(__SVR4) && defined(__sun))
+ return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
+ #else
+@@ -741,6 +759,9 @@
+ {
+ struct ibv_query_srq cmd;
+
++ if (srq->handle == LEGACY_XRC_SRQ_HANDLE)
++ srq = (struct ibv_srq *)(((struct ibv_srq_legacy *) srq)->ibv_srq);
++
+ #if !(defined(__SVR4) && defined(__sun))
+ return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
+ #else
+@@ -757,9 +778,23 @@
+ int mlx4_destroy_srq(struct ibv_srq *srq)
+ {
+ int ret;
++ struct ibv_srq *legacy_srq = NULL;
+
+- if (to_msrq(srq)->ext_srq)
+- return mlx4_destroy_xrc_srq(srq);
++ if (srq->handle == LEGACY_XRC_SRQ_HANDLE) {
++ legacy_srq = srq;
++ srq = (struct ibv_srq *)(((struct ibv_srq_legacy *) srq)->ibv_srq);
++ }
++
++ if (to_msrq(srq)->ext_srq) {
++ ret = mlx4_destroy_xrc_srq(srq);
++ if (ret)
++ return ret;
++
++ if (legacy_srq)
++ free(legacy_srq);
++
++ return 0;
++ }
+
+ ret = ibv_cmd_destroy_srq(srq);
+ if (ret)
+@@ -783,7 +818,7 @@
+ struct ibv_create_qp_resp resp;
+ #else
+ struct mlx4_create_qp_resp resp;
+- void *qpbuf;
++ void *qpbuf = NULL;
+ #endif
+
+ /* Sanity check QP size before proceeding */
+@@ -813,7 +848,8 @@
+ }
+
+ if (attr->srq || attr->qp_type == IBV_QPT_XRC_SEND ||
+- attr->qp_type == IBV_QPT_XRC_RECV) {
++ attr->qp_type == IBV_QPT_XRC_RECV ||
++ attr->qp_type == IBV_QPT_XRC) {
+ attr->cap.max_recv_wr = qp->rq.wqe_cnt = attr->cap.max_recv_sge = 0;
+ } else {
+ #if !(defined(__SVR4) && defined(__sun))
+@@ -900,18 +936,22 @@
+ "rev mismatch (kernel rev=%d)\n", resp.mdd.mqp_rev);
+ goto err_destroy;
+ }
+- qpbuf = mmap64((void *)0, resp.mdd.mqp_maplen, (PROT_READ | PROT_WRITE),
+- MAP_SHARED, context->mmap_fd, resp.mdd.mqp_mapoffset);
+-
+- if (qpbuf == MAP_FAILED)
+- goto err_destroy;
+
+- /*
+- * Need to set qp->buf here in case alloc_db fails then
+- * we'll call mlx4_free_buf() to umap.
+- */
+- qp->buf.buf = qpbuf;
+- qp->buf.length = resp.mdd.mqp_maplen;
++ if (resp.mdd.mqp_maplen != 0) {
++ qpbuf = mmap64((void *)0, resp.mdd.mqp_maplen,
++ (PROT_READ | PROT_WRITE), MAP_SHARED, context->mmap_fd,
++ resp.mdd.mqp_mapoffset);
++
++ if (qpbuf == MAP_FAILED)
++ goto err_destroy;
++
++ /*
++ * Need to set qp->buf here in case alloc_db fails then
++ * we'll call mlx4_free_buf() to umap.
++ */
++ qp->buf.buf = qpbuf;
++ qp->buf.length = resp.mdd.mqp_maplen;
++ }
+
+ if (attr->cap.max_recv_sge) {
+ qp->db = mlx4_alloc_db(to_mctx(context),
+@@ -934,10 +974,12 @@
+ qp->sq_spare_wqes = resp.mdd.mqp_sq_headroomwqes;
+ qp->sq.wqe_cnt = resp.mdd.mqp_sq_numwqe;
+
+- if (attr->srq)
+- qp->rq.wqe_cnt = 0;
++ if (attr->srq || attr->qp_type == IBV_QPT_XRC ||
++ attr->qp_type == IBV_QPT_XRC_SEND ||
++ attr->qp_type == IBV_QPT_XRC_RECV)
++ qp->rq.wqe_cnt = 0;
+ else
+- qp->rq.wqe_cnt = resp.mdd.mqp_rq_numwqe;
++ qp->rq.wqe_cnt = resp.mdd.mqp_rq_numwqe;
+
+ if (mlx4_set_qp_buf(attr->pd, qp, qpbuf, resp.mdd.mqp_maplen,
+ resp.mdd.mqp_rq_wqesz, resp.mdd.mqp_rq_off,
+@@ -1020,12 +1062,23 @@
+ struct ibv_qp_init_attr_ex attr_ex;
+ struct ibv_qp *qp;
+
+- memcpy(&attr_ex, attr, sizeof *attr);
++ /* We should copy below only the shared fields excluding the xrc_domain field.
++ * Otherwise we may have an ABI issue with applications that were compiled
++ * without the xrc_domain field. The xrc_domain any way has no affect in
++ * the sender side, no need to copy in/out.
++ */
++ int init_attr_base_size = offsetof(struct ibv_qp_init_attr,
++ xrc_domain);
++
++ memset(&attr_ex, 0, sizeof(attr_ex)); /* pre-set all fields to zero */
++ /* copying only shared fields */
++ memcpy(&attr_ex, attr, init_attr_base_size);
+ attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD;
+ attr_ex.pd = pd;
++
+ qp = mlx4_create_qp_ex(pd->context, &attr_ex);
+ if (qp)
+- memcpy(attr, &attr_ex, sizeof *attr);
++ memcpy(attr, &attr_ex, init_attr_base_size);
+ return qp;
+ }
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/open-fabrics/librdmacm/patches/004-librdmacm-fix-core-dump-in-rping-client-without-running-server.patch Thu Apr 13 20:30:48 2017 -0700
@@ -0,0 +1,63 @@
+#This patch was developed both in-house and from outside. We plan to submit it
+#upstream, but do not yet have a target date for doing so
+# HG changeset patch
+# Parent 68a7383fdd511ce1ea9a0dfc24404b3d74e67055
+
+diff -r 68a7383fdd51 examples/rping.c
+--- a/examples/rping.c Tue Mar 15 19:57:38 2016 -0700
++++ b/examples/rping.c Thu Mar 17 00:08:03 2016 -0700
+@@ -88,6 +88,7 @@
+ RDMA_READ_COMPLETE,
+ RDMA_WRITE_ADV,
+ RDMA_WRITE_COMPLETE,
++ CALLING_DISCONNECT,
+ DISCONNECTED,
+ ERROR
+ };
+@@ -290,6 +291,20 @@
+
+ if (wc.status) {
+ if (wc.status == IBV_WC_WR_FLUSH_ERR) {
++ /*
++ * FLUSH Error can be polled before RDMA-CM
++ * DISCONNECT is notified. Ensure that cb_state
++ * is set appropriately in such a case.
++ * sleep for sometime if Disconnect has not
++ * been called. The FLUSH WR can be because
++ * the remote end initiated the disconnect.
++ */
++ if (!(cb->state == CALLING_DISCONNECT || cb->state == DISCONNECTED))
++ sleep(2);
++
++ if (cb->state == DISCONNECTED)
++ return (0);
++
+ flushed = 1;
+ continue;
+
+@@ -824,7 +839,9 @@
+ }
+
+ rping_test_server(cb);
++ cb->state = CALLING_DISCONNECT;
+ rdma_disconnect(cb->child_cm_id);
++ pthread_cancel(cb->cqthread);
+ pthread_join(cb->cqthread, NULL);
+ rping_free_buffers(cb);
+ rping_free_qp(cb);
+@@ -943,6 +960,7 @@
+
+ ret = 0;
+ err3:
++ cb->state = CALLING_DISCONNECT;
+ rdma_disconnect(cb->child_cm_id);
+ pthread_join(cb->cqthread, NULL);
+ rdma_destroy_id(cb->child_cm_id);
+@@ -1122,6 +1140,7 @@
+
+ ret = 0;
+ err4:
++ cb->state = CALLING_DISCONNECT;
+ rdma_disconnect(cb->cm_id);
+ err3:
+ pthread_join(cb->cqthread, NULL);
--- a/components/open-fabrics/librdmacm/patches/004-librdmacm-remove-comments-for-XRC-support-from-man-pages.patch Thu Apr 13 13:20:29 2017 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-#This patch was developed both in-house and from outside. We plan to submit it
-#upstream, but do not yet have a target date for doing so
-#
-# HG changeset patch
-# Parent 9b6cc8c68b03b28d0b674dbf0fec4e6e143bd185
-Added comments for XRC support not available in the man pages
-
-diff -r 9b6cc8c68b03 man/rdma_create_id.3
---- a/man/rdma_create_id.3 Thu Nov 19 11:19:08 2015 -0800
-+++ b/man/rdma_create_id.3 Fri Dec 04 17:46:54 2015 -0800
-@@ -48,7 +48,7 @@
- Provides unreliable, connectionless QP communication. Supports both datagram
- and multicast communication.
- .IP RDMA_PS_IB
--Provides for any IB services (UD, UC, RC, XRC, etc.).
-+Provides for any IB services (UD, UC, RC, etc.). Currently, it does not support XRC.
- .SH "SEE ALSO"
- rdma_cm(7), rdma_create_event_channel(3), rdma_destroy_id(3), rdma_get_devices(3),
- rdma_bind_addr(3), rdma_resolve_addr(3), rdma_connect(3), rdma_listen(3),
-diff -r 9b6cc8c68b03 man/rdma_create_srq.3
---- a/man/rdma_create_srq.3 Thu Nov 19 11:19:08 2015 -0800
-+++ b/man/rdma_create_srq.3 Fri Dec 04 17:46:54 2015 -0800
-@@ -35,10 +35,12 @@
- allocated by the rdma_cm for the SRQ, along with corresponding completion
- channels. Completion channels and CQ data created by the rdma_cm are
- exposed to the user through the rdma_cm_id structure.
-+Currently, the creation of XRC SRQs are not supported by this function in
-+the Solaris specific implementation.
- .P
- The actual capabilities and properties of the created SRQ will be
- returned to the user through the attr parameter. An rdma_cm_id
--may only be associated with a single SRQ.
-+may only be associated with a single SRQ.
- .SH "SEE ALSO"
- rdma_bind_addr(3), rdma_resolve_addr(3), rdma_create_ep(3),
- rdma_destroy_srq(3), ibv_create_srq(3), ibv_create_xsrq(3)
--- a/components/open-fabrics/librdmacm/patches/005-librdmacm-fix-core-dump-in-rping-client-without-running-server.patch Thu Apr 13 13:20:29 2017 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-#This patch was developed both in-house and from outside. We plan to submit it
-#upstream, but do not yet have a target date for doing so
-# HG changeset patch
-# Parent 68a7383fdd511ce1ea9a0dfc24404b3d74e67055
-
-diff -r 68a7383fdd51 examples/rping.c
---- a/examples/rping.c Tue Mar 15 19:57:38 2016 -0700
-+++ b/examples/rping.c Thu Mar 17 00:08:03 2016 -0700
-@@ -88,6 +88,7 @@
- RDMA_READ_COMPLETE,
- RDMA_WRITE_ADV,
- RDMA_WRITE_COMPLETE,
-+ CALLING_DISCONNECT,
- DISCONNECTED,
- ERROR
- };
-@@ -290,6 +291,20 @@
-
- if (wc.status) {
- if (wc.status == IBV_WC_WR_FLUSH_ERR) {
-+ /*
-+ * FLUSH Error can be polled before RDMA-CM
-+ * DISCONNECT is notified. Ensure that cb_state
-+ * is set appropriately in such a case.
-+ * sleep for sometime if Disconnect has not
-+ * been called. The FLUSH WR can be because
-+ * the remote end initiated the disconnect.
-+ */
-+ if (!(cb->state == CALLING_DISCONNECT || cb->state == DISCONNECTED))
-+ sleep(2);
-+
-+ if (cb->state == DISCONNECTED)
-+ return (0);
-+
- flushed = 1;
- continue;
-
-@@ -824,7 +839,9 @@
- }
-
- rping_test_server(cb);
-+ cb->state = CALLING_DISCONNECT;
- rdma_disconnect(cb->child_cm_id);
-+ pthread_cancel(cb->cqthread);
- pthread_join(cb->cqthread, NULL);
- rping_free_buffers(cb);
- rping_free_qp(cb);
-@@ -943,6 +960,7 @@
-
- ret = 0;
- err3:
-+ cb->state = CALLING_DISCONNECT;
- rdma_disconnect(cb->child_cm_id);
- pthread_join(cb->cqthread, NULL);
- rdma_destroy_id(cb->child_cm_id);
-@@ -1122,6 +1140,7 @@
-
- ret = 0;
- err4:
-+ cb->state = CALLING_DISCONNECT;
- rdma_disconnect(cb->cm_id);
- err3:
- pthread_join(cb->cqthread, NULL);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/open-fabrics/librdmacm/patches/005-librdmacm-xrc-and-22595881.patch Thu Apr 13 20:30:48 2017 -0700
@@ -0,0 +1,202 @@
+#This patch was developed both in-house and from outside. We plan to submit it
+#upstream, but do not yet have a target date for doing so
+#
+# HG changeset patch
+# Parent 217eb28f33861f64f8a7c4b78d8209e7465bbd83
+25759055 OFUV (Userland) support for XRC APIs
+22595881 defer librdmacm allocation of PD on ADDRESS_RESOLVED event
+
+diff -r 217eb28f3386 examples/rdma_xclient.c
+--- a/examples/rdma_xclient.c Mon Nov 21 11:48:29 2016 -0800
++++ b/examples/rdma_xclient.c Mon Mar 20 14:24:32 2017 -0700
+@@ -148,7 +148,11 @@
+ case 'r':
+ break;
+ case 'x':
+- hints.ai_port_space = RDMA_PS_IB;
++#if defined(__SVR4) && defined(__sun)
++ hints.ai_port_space = RDMA_PS_TCP;
++#else
++ hints.ai_port_space = RDMA_PS_IB;
++#endif
+ hints.ai_qp_type = IBV_QPT_XRC_SEND;
+ break;
+ default:
+@@ -167,7 +171,7 @@
+
+ err:
+ printf("usage: %s\n", argv[0]);
+- printf("\t[-s server]\n");
++ printf("\t[-s server_address]\n");
+ printf("\t[-p port_number]\n");
+ printf("\t[-c communication type]\n");
+ printf("\t r - RC: reliable-connected (default)\n");
+diff -r 217eb28f3386 examples/rdma_xserver.c
+--- a/examples/rdma_xserver.c Mon Nov 21 11:48:29 2016 -0800
++++ b/examples/rdma_xserver.c Mon Mar 20 14:24:32 2017 -0700
+@@ -162,7 +162,11 @@
+ case 'r':
+ break;
+ case 'x':
++#if defined(__SVR4) && defined(__sun)
++ hints.ai_port_space = RDMA_PS_TCP;
++#else
+ hints.ai_port_space = RDMA_PS_IB;
++#endif
+ hints.ai_qp_type = IBV_QPT_XRC_RECV;
+ break;
+ default:
+diff -r 217eb28f3386 man/rdma_set_option.3
+--- a/man/rdma_set_option.3 Mon Nov 21 11:48:29 2016 -0800
++++ b/man/rdma_set_option.3 Mon Mar 20 14:24:32 2017 -0700
+@@ -16,9 +16,9 @@
+ .IP "level" 12
+ Protocol level of the option to set. Currently level RDMA_OPTION_ID is supported.
+ .IP "optname" 12
+-Name of the option, relative to the level, to set. The only supported option isRDMA_OPTION_ID_REUSEADDR for level RDMA_OPTION_ID.
++Name of the option, relative to the level, to set. The supported options are RDMA_OPTION_ID_REUSEADDR and RDMA_OPTION_ID_TOS for level RDMA_OPTION_ID.
+ .IP "optval" 12
+-Reference to the option data. The data is dependent on the level and optname. For the option RDMA_OPTION_ID_REUSEADDR, an integer is passed.
++Reference to the option data. The data is dependent on the level and optname. For the options RDMA_OPTION_ID_REUSEADDR and RDMA_OPTION_ID_TOS, an integer is passed.
+ .IP "optlen" 12
+ The size of the %optval buffer.
+ .SH "DESCRIPTION"
+@@ -33,6 +33,9 @@
+ using rdma_listen(3), is not supported for CMIDs set with
+ this option. This option enables multiple connections to share
+ the same source IP Port on the active side of the connection.
++The RDMA_OPTION_ID_TOS option can be used to set the Terms of Service
++level. A value of 0 disables the option and a non-zero value
++enables the option.
+ .sp
+ .SH "RETURN VALUE"
+ Returns 0 on success, or -1 on error. If an error occurs, errno will be
+diff -r 217eb28f3386 src/cma.c
+--- a/src/cma.c Mon Nov 21 11:48:29 2016 -0800
++++ b/src/cma.c Mon Mar 20 14:24:32 2017 -0700
+@@ -456,17 +456,8 @@
+ if ((ret = ucma_init_device(cma_dev)))
+ goto out;
+
+- if (!cma_dev->refcnt++) {
+- cma_dev->pd = ibv_alloc_pd(cma_dev->verbs);
+- if (!cma_dev->pd) {
+- cma_dev->refcnt--;
+- ret = ERR(ENOMEM);
+- goto out;
+- }
+- }
+ id_priv->cma_dev = cma_dev;
+ id_priv->id.verbs = cma_dev->verbs;
+- id_priv->id.pd = cma_dev->pd;
+ out:
+ pthread_mutex_unlock(&mut);
+ return ret;
+@@ -475,11 +466,12 @@
+ static void ucma_put_device(struct cma_device *cma_dev)
+ {
+ pthread_mutex_lock(&mut);
+- if (!--cma_dev->refcnt) {
+- ibv_dealloc_pd(cma_dev->pd);
+- if (cma_dev->xrcd)
+- ibv_close_xrcd(cma_dev->xrcd);
++ if (cma_dev->pd && !--cma_dev->refcnt) {
++ ibv_dealloc_pd(cma_dev->pd);
++ cma_dev->pd = NULL;
+ }
++ if (cma_dev->xrcd)
++ ibv_close_xrcd(cma_dev->xrcd);
+ pthread_mutex_unlock(&mut);
+ }
+
+@@ -613,7 +605,7 @@
+ enum ibv_qp_type qp_type;
+
+ qp_type = (ps == RDMA_PS_IPOIB || ps == RDMA_PS_UDP) ?
+- IBV_QPT_UD : IBV_QPT_RC;
++ IBV_QPT_UD : IBV_QPT_RC;
+ return rdma_create_id2(channel, id, context, ps, qp_type);
+ }
+
+@@ -1391,9 +1383,26 @@
+ return ERR(EINVAL);
+
+ id_priv = container_of(id, struct cma_id_private, id);
+- if (!(attr->comp_mask & IBV_QP_INIT_ATTR_PD) || !attr->pd) {
++ if (!attr->pd || !(attr->comp_mask & IBV_QP_INIT_ATTR_PD)) {
++ struct cma_device *cma_dev;
++ pthread_mutex_lock(&id_priv->mut);
++ cma_dev = id_priv->cma_dev;
++ if (!cma_dev->pd && !cma_dev->refcnt++) {
++ pthread_mutex_unlock(&id_priv->mut);
++ cma_dev->pd = ibv_alloc_pd(cma_dev->verbs);
++ if (!cma_dev->pd) {
++ pthread_mutex_lock(&id_priv->mut);
++ cma_dev->refcnt--;
++ pthread_mutex_unlock(&id_priv->mut);
++ return ERR(ENOMEM);
++ }
++ pthread_mutex_lock(&id_priv->mut);
++ id_priv->id.pd = cma_dev->pd;
++ }
++ pthread_mutex_unlock(&id_priv->mut);
++
+ attr->comp_mask |= IBV_QP_INIT_ATTR_PD;
+- attr->pd = id->pd;
++ attr->pd = (id->pd) ? id->pd : cma_dev->pd;
+ } else if (id->verbs != attr->pd->context)
+ return ERR(EINVAL);
+
+@@ -1457,12 +1466,49 @@
+ {
+ struct ibv_qp_init_attr_ex attr_ex;
+ int ret;
+-
+- memcpy(&attr_ex, qp_init_attr, sizeof *qp_init_attr);
++ int init_attr_base_size;
++
++ /*
++ * XRC binary compatibility patches to libibverbs add 'xrc_domain'
++ * field to the end of "struct ibv_qp_init_attr" in libibverbs,
++ * so it is not completely isomorphic to initial fields in
++ * "struct ibv_qp_init_attr_ex".
++ *
++ * We should copy below only the shared fields excluding the
++ * xrc_domain field from "struct inv_qp_init_attr" into the
++ * "struct ibv_qp_init_attr_ex", otherwise it clobbers the field
++ * immediately following the isomorphic initial fields.
++ *
++ * (The xrc_domain any way has no affect on the sender side, so
++ * there is no need to copy it anyway!)
++ */
++ init_attr_base_size = offsetof(struct ibv_qp_init_attr, xrc_domain);
++
++ memset(&attr_ex, 0, sizeof(attr_ex)); /* pre-set all fields to zero */
++ /* copy only common fields */
++ memcpy(&attr_ex, qp_init_attr, init_attr_base_size);
+ attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD;
+ attr_ex.pd = pd ? pd : id->pd;
++
++ if (qp_init_attr->qp_type == IBV_QPT_XRC) {
++ /*
++ * another private handshake to indicate
++ * XRC send or receive side endpoint
++ */
++ if (qp_init_attr->cap.max_send_wr == 0) {
++ attr_ex.qp_type = IBV_QPT_XRC_RECV;
++ if (qp_init_attr->xrc_domain) {
++ attr_ex.comp_mask |= IBV_QP_INIT_ATTR_XRCD;
++ attr_ex.xrcd = (struct ibv_xrcd *)
++ qp_init_attr->xrc_domain;
++ }
++ } else {
++ attr_ex.qp_type = IBV_QPT_XRC_SEND;
++ }
++ }
+ ret = rdma_create_qp_ex(id, &attr_ex);
+- memcpy(qp_init_attr, &attr_ex, sizeof *qp_init_attr);
++ /* copy only common fields */
++ memcpy(qp_init_attr, &attr_ex, init_attr_base_size);
+ return ret;
+ }
+
--- a/components/open-fabrics/open-fabrics.p5m Thu Apr 13 13:20:29 2017 -0700
+++ b/components/open-fabrics/open-fabrics.p5m Thu Apr 13 20:30:48 2017 -0700
@@ -83,12 +83,15 @@
file path=usr/bin/ibv_rc_pingpong
file path=usr/bin/ibv_srq_pingpong
file path=usr/bin/ibv_ud_pingpong
+file path=usr/bin/ibv_xsrq_pingpong
file path=usr/bin/mckey
file path=usr/bin/qperf
file path=usr/bin/rdma_bw
file path=usr/bin/rdma_client
file path=usr/bin/rdma_lat
file path=usr/bin/rdma_server
+file path=usr/bin/rdma_xclient
+file path=usr/bin/rdma_xserver
file path=usr/bin/rds-info
file path=usr/bin/rds-ping
file path=usr/bin/rds-stress
@@ -101,6 +104,7 @@
file path=usr/include/infiniband/mad.h
file path=usr/include/infiniband/mad_osd.h
file path=usr/include/infiniband/ofa_solaris.h
+file path=usr/include/infiniband/ofa_verbs.h
file path=usr/include/infiniband/sa.h
file path=usr/include/infiniband/umad.h
file path=usr/include/infiniband/verbs.h
@@ -241,12 +245,15 @@
file path=usr/share/man/man1/ibv_rc_pingpong.1
file path=usr/share/man/man1/ibv_srq_pingpong.1
file path=usr/share/man/man1/ibv_ud_pingpong.1
+file path=usr/share/man/man1/ibv_xsrq_pingpong.1
file path=usr/share/man/man1/mckey.1
file path=usr/share/man/man1/qperf.1
file path=usr/share/man/man1/rdma_bw.1
file path=usr/share/man/man1/rdma_client.1
link path=usr/share/man/man1/rdma_lat.1 target=rdma_bw.1
file path=usr/share/man/man1/rdma_server.1
+file path=usr/share/man/man1/rdma_xclient.1
+file path=usr/share/man/man1/rdma_xserver.1
file path=usr/share/man/man1/rds-info.1
file path=usr/share/man/man1/rds-ping.1
file path=usr/share/man/man1/rds-stress.1
@@ -264,7 +271,10 @@
file path=usr/share/man/man3/ibv_create_comp_channel.3
file path=usr/share/man/man3/ibv_create_cq.3
file path=usr/share/man/man3/ibv_create_qp.3
+file path=usr/share/man/man3/ibv_create_qp_ex.3
file path=usr/share/man/man3/ibv_create_srq.3
+file path=usr/share/man/man3/ibv_create_srq_ex.3
+file path=usr/share/man/man3/ibv_create_xsrq.3
link path=usr/share/man/man3/ibv_dealloc_pd.3 target=ibv_alloc_pd.3
link path=usr/share/man/man3/ibv_dereg_mr.3 target=ibv_reg_mr.3
link path=usr/share/man/man3/ibv_destroy_ah.3 target=ibv_create_ah.3
@@ -282,6 +292,7 @@
file path=usr/share/man/man3/ibv_get_device_guid.3
file path=usr/share/man/man3/ibv_get_device_list.3
file path=usr/share/man/man3/ibv_get_device_name.3
+file path=usr/share/man/man3/ibv_get_srq_num.3
file path=usr/share/man/man3/ibv_gid_reachable.3
link path=usr/share/man/man3/ibv_init_ah_from_wc.3 \
target=ibv_create_ah_from_wc.3
@@ -289,6 +300,7 @@
file path=usr/share/man/man3/ibv_modify_srq.3
file path=usr/share/man/man3/ibv_node_type_str.3
file path=usr/share/man/man3/ibv_open_device.3
+file path=usr/share/man/man3/ibv_open_xrcd.3
file path=usr/share/man/man3/ibv_poll_cq.3
file path=usr/share/man/man3/ibv_port_state_str.3
file path=usr/share/man/man3/ibv_post_recv.3