# HG changeset patch # User Sharath M Srinivasan # Date 1492140648 25200 # Node ID 22ec3267b2a31f2a8d3174423fcb599b980a0b72 # Parent f11e8d81786a7c6f8a21a6e5eed6f868a0484f75 PSARC/2017/028 OFUV Exafusion support: XRC and RDMA_OPTION_ID_TOS 25759055 OFUV (Userland) support for XRC APIs 22595881 defer librdmacm allocation of PD on ADDRESS_RESOLVED event diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/libibverbs/patches/004-libibverbs-explorer-hangs-running-ibis-with-Titan-cards.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/open-fabrics/libibverbs/patches/004-libibverbs-explorer-hangs-running-ibis-with-Titan-cards.patch Thu Apr 13 20:30:48 2017 -0700 @@ -0,0 +1,18 @@ +#This patch was developed both in-house and from outside. We plan to submit it +#upstream, but do not yet have a target date for doing so +# +# HG changeset patch +# Parent b116e18142b1d4ec433b67c77f389bf975cc8c42 +22741696 Explorer hangs running ibis on system with Titan cards + +diff -r b116e18142b1 src/init.c +--- a/src/init.c Tue Apr 19 10:06:00 2016 -0700 ++++ b/src/init.c Tue Apr 19 10:17:34 2016 -0700 +@@ -603,6 +603,7 @@ + } + free(sysfs_dev); + } ++ sysfs_dev_list = NULL; + + return num_devices; + } diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/libibverbs/patches/004-libibverbs-man-changes-for-no-xrc.patch --- a/components/open-fabrics/libibverbs/patches/004-libibverbs-man-changes-for-no-xrc.patch Thu Apr 13 13:20:29 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,231 +0,0 @@ -#This patch was developed both in-house and from outside. We plan to submit it -#upstream, but do not yet have a target date for doing so -# -# HG changeset patch -# Parent 3903116dd520bb552df4e8fa55f573d1cb9b9097 -remove xrc man pages - -diff -r 3903116dd520 Makefile.am ---- a/Makefile.am Wed Jan 13 09:20:30 2016 -0800 -+++ b/Makefile.am Wed Jan 13 09:32:13 2016 -0800 -@@ -63,8 +63,7 @@ - man/ibv_query_pkey.3 man/ibv_query_port.3 man/ibv_query_qp.3 \ - man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3 \ - man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3 \ -- man/ibv_create_qp_ex.3 man/ibv_create_srq_ex.3 man/ibv_open_xrcd.3 \ -- man/ibv_get_srq_num.3 man/ibv_open_qp.3 -+ man/ibv_open_xrcd.3 man/ibv_open_qp.3 - - DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ - debian/ibverbs-utils.install debian/libibverbs1.install \ -diff -r 3903116dd520 Makefile.in ---- a/Makefile.in Wed Jan 13 09:20:30 2016 -0800 -+++ b/Makefile.in Wed Jan 13 09:32:13 2016 -0800 -@@ -494,8 +494,7 @@ - man/ibv_query_pkey.3 man/ibv_query_port.3 man/ibv_query_qp.3 \ - man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3 \ - man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3 \ -- man/ibv_create_qp_ex.3 man/ibv_create_srq_ex.3 man/ibv_open_xrcd.3 \ -- man/ibv_get_srq_num.3 man/ibv_open_qp.3 -+ man/ibv_open_xrcd.3 man/ibv_open_qp.3 - - DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ - debian/ibverbs-utils.install debian/libibverbs1.install \ -diff -r 3903116dd520 man/ibv_create_qp_ex.3 ---- a/man/ibv_create_qp_ex.3 Wed Jan 13 09:20:30 2016 -0800 -+++ /dev/null Thu Jan 01 00:00:00 1970 +0000 -@@ -1,83 +0,0 @@ --.\" -*- nroff -*- --.\" --.TH IBV_CREATE_QP_EX 3 2013-06-26 libibverbs "Libibverbs Programmer's Manual" --.SH "NAME" --ibv_create_qp_ex, ibv_destroy_qp \- create or destroy a queue pair (QP) --.SH "SYNOPSIS" --.nf --.B #include --.sp --.BI "struct ibv_qp *ibv_create_qp_ex(struct ibv_context " "*context" , --.BI " struct ibv_qp_init_attr_ex " "*qp_init_attr" ); --.sp --.BI "int ibv_destroy_qp(struct ibv_qp " "*qp" ); --.fi --.SH "DESCRIPTION" --.B ibv_create_qp_ex() --creates a queue pair (QP) associated with the protection domain --.I pd\fR. --The argument --.I qp_init_attr_ex --is an ibv_qp_init_attr_ex struct, as defined in . --.PP --.nf --struct ibv_qp_init_attr_ex { --.in +8 --void *qp_context; /* Associated context of the QP */ --struct ibv_cq *send_cq; /* CQ to be associated with the Send Queue (SQ) */ --struct ibv_cq *recv_cq; /* CQ to be associated with the Receive Queue (RQ) */ --struct ibv_srq *srq; /* SRQ handle if QP is to be associated with an SRQ, otherwise NULL */ --struct ibv_qp_cap cap; /* QP capabilities */ --enum ibv_qp_type qp_type; /* QP Transport Service Type: IBV_QPT_RC, IBV_QPT_UC, IBV_QPT_UD or IBV_QPT_RAW_PACKET */ --int sq_sig_all; /* If set, each Work Request (WR) submitted to the SQ generates a completion entry */ --uint32_t comp_mask; /* Identifies valid fields */ --struct ibv_pd *pd; /* PD to be associated with the QP */ --struct ibv_xrcd *xrcd; /* XRC domain to be associated with the target QP */ --enum ibv_qp_create_flags create_flags; /* Creation flags for this QP */ --.in -8 --}; --.sp --.nf --struct ibv_qp_cap { --.in +8 --uint32_t max_send_wr; /* Requested max number of outstanding WRs in the SQ */ --uint32_t max_recv_wr; /* Requested max number of outstanding WRs in the RQ */ --uint32_t max_send_sge; /* Requested max number of scatter/gather (s/g) elements in a WR in the SQ */ --uint32_t max_recv_sge; /* Requested max number of s/g elements in a WR in the SQ */ --uint32_t max_inline_data;/* Requested max number of data (bytes) that can be posted inline to the SQ, otherwise 0 */ --.in -8 --}; --.fi --.PP --The function --.B ibv_create_qp_ex() --will update the --.I qp_init_attr_ex\fB\fR->cap --struct with the actual \s-1QP\s0 values of the QP that was created; --the values will be greater than or equal to the values requested. --.PP --.B ibv_destroy_qp() --destroys the QP --.I qp\fR. --.SH "RETURN VALUE" --.B ibv_create_qp_ex() --returns a pointer to the created QP, or NULL if the request fails. --Check the QP number (\fBqp_num\fR) in the returned QP. --.PP --.B ibv_destroy_qp() --returns 0 on success, or the value of errno on failure (which indicates the failure reason). --.SH "NOTES" --.PP --The attributes max_recv_wr and max_recv_sge are ignored by --.B ibv_create_qp_ex() --if the QP is to be associated with an SRQ. --.PP --.B ibv_destroy_qp() --fails if the QP is attached to a multicast group. --.SH "SEE ALSO" --.BR ibv_alloc_pd (3), --.BR ibv_modify_qp (3), --.BR ibv_query_qp (3) --.SH "AUTHORS" --.TP --Yishai Hadas -diff -r 3903116dd520 man/ibv_create_srq_ex.3 ---- a/man/ibv_create_srq_ex.3 Wed Jan 13 09:20:30 2016 -0800 -+++ /dev/null Thu Jan 01 00:00:00 1970 +0000 -@@ -1,71 +0,0 @@ --.\" -*- nroff -*- --.\" --.TH IBV_CREATE_SRQ_EX 3 2013-06-26 libibverbs "Libibverbs Programmer's Manual" --.SH "NAME" --ibv_create_srq_ex, ibv_destroy_srq \- create or destroy a shared receive queue (SRQ) --.SH "SYNOPSIS" --.nf --.B #include --.sp --.BI "struct ibv_srq *ibv_create_srq_ex(struct ibv_context " "*context" ", struct " --.BI " ibv_srq_init_attr_ex " "*srq_init_attr_ex" ); --.sp --.BI "int ibv_destroy_srq(struct ibv_srq " "*srq" ); --.fi --.SH "DESCRIPTION" --.B ibv_create_srq_ex() --creates a shared receive queue (SRQ) supporting both basic and xrc modes. --The argument --.I srq_init_attr_ex --is an ibv_srq_init_attr_ex struct, as defined in . --.PP --.nf --struct ibv_srq_init_attr_ex { --.in +8 --void *srq_context; /* Associated context of the SRQ */ --struct ibv_srq_attr attr; /* SRQ attributes */ --uint32_t comp_mask; /* Identifies valid fields */ --enum ibv_srq_type srq_type; /* Basic / XRC */ --struct ibv_pd *pd; /* PD associated with the SRQ */ --struct ibv_xrcd *xrcd; /* XRC domain to associate with the SRQ */ --struct ibv_cq *cq; /* CQ to associate with the SRQ for XRC mode */ --.in -8 --}; --.sp --.nf --struct ibv_srq_attr { --.in +8 --uint32_t max_wr; /* Requested max number of outstanding work requests (WRs) in the SRQ */ --uint32_t max_sge; /* Requested max number of scatter elements per WR */ --uint32_t srq_limit; /* The limit value of the SRQ */ --.in -8 --}; --.fi --.PP --The function --.B ibv_create_srq_ex() --will update the --.I srq_init_attr_ex --struct with the original values of the SRQ that was created; the --values of max_wr and max_sge will be greater than or equal to the --values requested. --.PP --.B ibv_destroy_srq() --destroys the SRQ --.I srq\fR. --.SH "RETURN VALUE" --.B ibv_create_srq_ex() --returns a pointer to the created SRQ, or NULL if the request fails. --.PP --.B ibv_destroy_srq() --returns 0 on success, or the value of errno on failure (which indicates the failure reason). --.SH "NOTES" --.B ibv_destroy_srq() --fails if any queue pair is still associated with this SRQ. --.SH "SEE ALSO" --.BR ibv_alloc_pd (3), --.BR ibv_modify_srq (3), --.BR ibv_query_srq (3) --.SH "AUTHORS" --.TP --Yishai Hadas -diff -r 3903116dd520 man/ibv_get_srq_num.3 ---- a/man/ibv_get_srq_num.3 Wed Jan 13 09:20:30 2016 -0800 -+++ /dev/null Thu Jan 01 00:00:00 1970 +0000 -@@ -1,32 +0,0 @@ --.\" -*- nroff -*- --.\" --.TH IBV_GET_SRQ_NUM 3 2013-06-26 libibverbs "Libibverbs Programmer's Manual" --.SH "NAME" --ibv_get_srq_num \- return srq number associated with the given shared receive queue (SRQ) --.SH "SYNOPSIS" --.nf --.B #include --.sp --.BI "int ibv_get_srq_num(struct ibv_srq " "*srq" , --.BI " uint32_t " "*srq_num" ); --.fi --.SH "DESCRIPTION" --.B ibv_get_srq_num() --return srq number associated with the given shared receive queue --The argument --.I srq --is an ibv_srq struct, as defined in . --.I srq_num --is an output parameter that holds the returned srq number. --.PP --.nf --.SH "RETURN VALUE" --.B ibv_get_srq_num() --returns 0 on success, or the value of errno on failure (which indicates the failure reason). --.SH "SEE ALSO" --.BR ibv_alloc_pd (3), --.BR ibv_modify_srq (3), --.BR ibv_create_srq_ex (3) --.SH "AUTHORS" --.TP --Yishai Hadas diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/libibverbs/patches/005-libibverbs-explorer-hangs-running-ibis-with-Titan-cards.patch --- a/components/open-fabrics/libibverbs/patches/005-libibverbs-explorer-hangs-running-ibis-with-Titan-cards.patch Thu Apr 13 13:20:29 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -#This patch was developed both in-house and from outside. We plan to submit it -#upstream, but do not yet have a target date for doing so -# -# HG changeset patch -# Parent b116e18142b1d4ec433b67c77f389bf975cc8c42 -22741696 Explorer hangs running ibis on system with Titan cards - -diff -r b116e18142b1 src/init.c ---- a/src/init.c Tue Apr 19 10:06:00 2016 -0700 -+++ b/src/init.c Tue Apr 19 10:17:34 2016 -0700 -@@ -603,6 +603,7 @@ - } - free(sysfs_dev); - } -+ sysfs_dev_list = NULL; - - return num_devices; - } diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/libibverbs/patches/005-libibverbs-xrc.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/open-fabrics/libibverbs/patches/005-libibverbs-xrc.patch Thu Apr 13 20:30:48 2017 -0700 @@ -0,0 +1,832 @@ +#This patch was developed both in-house and from outside. We plan to submit it +#upstream, but do not yet have a target date for doing so +# +# HG changeset patch +# Parent f8684a1d3f02b9cc10a686daa8659805384ba51a +25759055 OFUV (Userland) support for XRC APIs + +diff -r f8684a1d3f02 Makefile.am +--- a/Makefile.am Mon Nov 21 11:48:20 2016 -0800 ++++ b/Makefile.am Mon Mar 20 14:32:42 2017 -0700 +@@ -45,7 +45,8 @@ + + libibverbsinclude_HEADERS = include/infiniband/arch.h include/infiniband/driver.h \ + include/infiniband/kern-abi.h include/infiniband/opcode.h include/infiniband/verbs.h \ +- include/infiniband/sa-kern-abi.h include/infiniband/sa.h include/infiniband/marshall.h ++ include/infiniband/sa-kern-abi.h include/infiniband/sa.h include/infiniband/marshall.h \ ++ include/infiniband/ofa_verbs.h + + man_MANS = man/ibv_asyncwatch.1 man/ibv_devices.1 man/ibv_devinfo.1 \ + man/ibv_shpd_pingpong.1 \ +@@ -64,7 +65,8 @@ + man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3 \ + man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3 \ + man/ibv_create_qp_ex.3 man/ibv_create_srq_ex.3 man/ibv_open_xrcd.3 \ +- man/ibv_get_srq_num.3 man/ibv_open_qp.3 ++ man/ibv_get_srq_num.3 man/ibv_open_qp.3 man/ibv_create_xsrq.3 \ ++ man/ibv_xsrq_pingpong.1 + + DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ + debian/ibverbs-utils.install debian/libibverbs1.install \ +diff -r f8684a1d3f02 Makefile.in +--- a/Makefile.in Mon Nov 21 11:48:20 2016 -0800 ++++ b/Makefile.in Mon Mar 20 14:32:42 2017 -0700 +@@ -476,7 +476,8 @@ + libibverbsincludedir = $(includedir)/infiniband + libibverbsinclude_HEADERS = include/infiniband/arch.h include/infiniband/driver.h \ + include/infiniband/kern-abi.h include/infiniband/opcode.h include/infiniband/verbs.h \ +- include/infiniband/sa-kern-abi.h include/infiniband/sa.h include/infiniband/marshall.h include/infiniband/ofa_solaris.h ++ include/infiniband/sa-kern-abi.h include/infiniband/sa.h include/infiniband/marshall.h \ ++ include/infiniband/ofa_solaris.h include/infiniband/ofa_verbs.h + + man_MANS = man/ibv_asyncwatch.1 man/ibv_devices.1 man/ibv_devinfo.1 \ + man/ibv_shpd_pingpong.1 \ +@@ -495,7 +496,8 @@ + man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3 \ + man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/ibv_rate_to_mbps.3 \ + man/ibv_create_qp_ex.3 man/ibv_create_srq_ex.3 man/ibv_open_xrcd.3 \ +- man/ibv_get_srq_num.3 man/ibv_open_qp.3 ++ man/ibv_get_srq_num.3 man/ibv_open_qp.3 man/ibv_create_xsrq.3 \ ++ man/ibv_xsrq_pingpong.1 + + DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ + debian/ibverbs-utils.install debian/libibverbs1.install \ +diff -r f8684a1d3f02 include/infiniband/ofa_verbs.h +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/include/infiniband/ofa_verbs.h Mon Mar 20 14:32:42 2017 -0700 +@@ -0,0 +1,140 @@ ++/* ++ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. ++ * Copyright (c) 2004, 2011-2012 Intel Corporation. All rights reserved. ++ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. ++ * Copyright (c) 2005 PathScale, Inc. All rights reserved. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ */ ++ ++#ifndef INFINIBAND_OFA_VERBS_H ++#define INFINIBAND_OFA_VERBS_H ++ ++struct ibv_srq_init_attr; ++struct ibv_cq; ++struct ibv_pd; ++struct ibv_qp_init_attr; ++struct ibv_qp_attr; ++ ++ ++#ifdef __GNUC__ ++#define DEPRECATED __attribute__((deprecated)) ++#else ++#define DEPRECATED ++#endif ++ ++/* XRC compatability layer */ ++#define LEGACY_XRC_SRQ_HANDLE 0xffffffff ++ ++struct ibv_xrc_domain { ++ struct ibv_context *context; ++ uint32_t handle; ++}; ++ ++struct ibv_srq_legacy { ++ struct ibv_context *context; ++ void *srq_context; ++ struct ibv_pd *pd; ++ uint32_t handle; ++ ++ uint32_t events_completed; ++ ++ uint32_t xrc_srq_num_bin_compat; ++ struct ibv_xrc_domain *xrc_domain_bin_compat; ++ struct ibv_cq *xrc_cq_bin_compat; ++ ++ pthread_mutex_t mutex; ++ pthread_cond_t cond; ++ ++ void *ibv_srq; ++ /* ++ * Below fields are for legacy source compatibility. They reside ++ * on the same offset as of those fields in struct ibv_srq. ++ */ ++ uint32_t xrc_srq_num; ++ struct ibv_xrc_domain *xrc_domain; ++ struct ibv_cq *xrc_cq; ++}; ++ ++/** ++ * ibv_open_xrc_domain - open an XRC domain ++ * Returns a reference to an XRC domain. ++ * ++ * @context: Device context ++ * @fd: descriptor for inode associated with the domain ++ * If fd == -1, no inode is associated with the domain; in this ca= se, ++ * the only legal value for oflag is O_CREAT ++ * ++ * @oflag: oflag values are constructed by OR-ing flags from the following list ++ * ++ * O_CREAT ++ * If a domain belonging to device named by context is already associated ++ * with the inode, this flag has no effect, except as noted under O_EXCL ++ * below. Otherwise, a new XRC domain is created and is associated with ++ * inode specified by fd. ++ * ++ * O_EXCL ++ * If O_EXCL and O_CREAT are set, open will fail if a domain associated with ++ * the inode exists. The check for the existence of the domain and creation ++ * of the domain if it does not exist is atomic with respect to other ++ * processes executing open with fd naming the same inode. ++ */ ++struct ibv_xrc_domain *ibv_open_xrc_domain(struct ibv_context *context, ++ int fd, int oflag) DEPRECATED; ++ ++/** ++ * ibv_create_xrc_srq - Creates a SRQ associated with the specified protection ++ * domain and xrc domain. ++ * @pd: The protection domain associated with the SRQ. ++ * @xrc_domain: The XRC domain associated with the SRQ. ++ * @xrc_cq: CQ to report completions for XRC packets on. ++ * ++ * @srq_init_attr: A list of initial attributes required to create the SRQ. ++ * ++ * srq_attr->max_wr and srq_attr->max_sge are read the determine the ++ * requested size of the SRQ, and set to the actual values allocated ++ * on return. If ibv_create_srq() succeeds, then max_wr and max_sge ++ * will always be at least as large as the requested values. ++ */ ++struct ibv_srq *ibv_create_xrc_srq(struct ibv_pd *pd, ++ struct ibv_xrc_domain *xrc_domain, ++ struct ibv_cq *xrc_cq, ++ struct ibv_srq_init_attr *srq_init_attr) DEPRECATED; ++ ++/** ++ * ibv_close_xrc_domain - close an XRC domain ++ * If this is the last reference, destroys the domain. ++ * ++ * @d: reference to XRC domain to close ++ * ++ * close is implicitly performed at process exit. ++ */ ++int ibv_close_xrc_domain(struct ibv_xrc_domain *d) DEPRECATED; ++ ++#endif +diff -r f8684a1d3f02 include/infiniband/verbs.h +--- a/include/infiniband/verbs.h Mon Nov 21 11:48:20 2016 -0800 ++++ b/include/infiniband/verbs.h Mon Mar 20 14:32:42 2017 -0700 +@@ -42,6 +42,7 @@ + #include + #if defined(__SVR4) && defined(__sun) + #include ++#include + #endif + + #ifdef __cplusplus +@@ -252,6 +253,8 @@ + struct ibv_srq *srq; + int port_num; + union ibv_gid gid; ++ /* For source compatibility with legacy API */ ++ uint32_t xrc_qp_num; + } element; + enum ibv_event_type event_type; + }; +@@ -507,6 +510,7 @@ + IBV_QPT_RC = 2, + IBV_QPT_UC, + IBV_QPT_UD, ++ IBV_QPT_XRC, /* XRC legacy compatible type */ + IBV_QPT_RAW_PACKET = 8, + IBV_QPT_XRC_SEND = 9, + IBV_QPT_XRC_RECV +@@ -536,6 +540,8 @@ + struct ibv_qp_cap cap; + enum ibv_qp_type qp_type; + int sq_sig_all; ++ /* Below is needed for legacy compatibility */ ++ struct ibv_xrc_domain *xrc_domain; + }; + + enum ibv_qp_init_attr_mask { +@@ -692,10 +698,14 @@ + } ud; + } wr; + union { +- struct { +- uint32_t remote_srqn; +- } xrc; +- } qp_type; ++ union { ++ struct { ++ uint32_t remote_srqn; ++ } xrc; ++ } qp_type; ++ ++ uint32_t xrc_remote_srq_num; ++ }; + }; + + struct ibv_recv_wr { +@@ -723,6 +733,25 @@ + pthread_mutex_t mutex; + pthread_cond_t cond; + uint32_t events_completed; ++ ++ /* ++ * Below is for source compatibility with legacy XRC APIs. ++ * Padding is based on ibv_srq_legacy. ++ */ ++ uint32_t xrc_srq_num_bin_compat_padding; ++ struct ibv_xrc_domain *xrc_domain_bin_compat_padding; ++ struct ibv_cq *xrc_cq_bin_compat_padding; ++ void *ibv_srq_padding; ++ ++ /* legacy fields */ ++ uint32_t xrc_srq_num; ++ struct ibv_xrc_domain *xrc_domain; ++ struct ibv_cq *xrc_cq; ++}; ++ ++/* XRC source compat layer */ ++enum ibv_event_flags { ++ IBV_XRC_QP_EVENT_FLAG = 0x80000000, + }; + + struct ibv_qp { +@@ -996,6 +1025,8 @@ + + struct verbs_context { + /* "grows up" - new fields go here */ ++ void * (*drv_get_legacy_xrc) (struct ibv_srq *ibv_srq); ++ void (*drv_set_legacy_xrc) (struct ibv_srq *ibv_srq, void *legacy_xrc); + int (*drv_ibv_destroy_flow) (struct ibv_flow *flow); + int (*lib_ibv_destroy_flow) (struct ibv_flow *flow); + struct ibv_flow * (*drv_ibv_create_flow) (struct ibv_qp *qp, +diff -r f8684a1d3f02 man/ibv_create_qp_ex.3 +--- a/man/ibv_create_qp_ex.3 Mon Nov 21 11:48:20 2016 -0800 ++++ b/man/ibv_create_qp_ex.3 Mon Mar 20 14:32:42 2017 -0700 +@@ -28,7 +28,7 @@ + struct ibv_cq *recv_cq; /* CQ to be associated with the Receive Queue (RQ) */ + struct ibv_srq *srq; /* SRQ handle if QP is to be associated with an SRQ, otherwise NULL */ + struct ibv_qp_cap cap; /* QP capabilities */ +-enum ibv_qp_type qp_type; /* QP Transport Service Type: IBV_QPT_RC, IBV_QPT_UC, IBV_QPT_UD or IBV_QPT_RAW_PACKET */ ++enum ibv_qp_type qp_type; /* QP Transport Service Type: IBV_QPT_RC, IBV_QPT_XRC_SEND, IBV_QPT_XRC_RECV, IBV_QPT_UC, IBV_QPT_UD or IBV_QPT_RAW_PACKET */ + int sq_sig_all; /* If set, each Work Request (WR) submitted to the SQ generates a completion entry */ + uint32_t comp_mask; /* Identifies valid fields */ + struct ibv_pd *pd; /* PD to be associated with the QP */ +diff -r f8684a1d3f02 man/ibv_create_xsrq.3 +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/man/ibv_create_xsrq.3 Mon Mar 20 14:32:42 2017 -0700 +@@ -0,0 +1,87 @@ ++.\" -*- nroff -*- ++.\" ++.TH IBV_CREATE_XSRQ 3 2011-06-17 libibverbs "Libibverbs Programmer's Manual" ++.SH "NAME" ++ibv_create_xsrq, ibv_destroy_srq \- create or destroy a shared receive queue (SRQ) ++.SH "SYNOPSIS" ++.nf ++.B #include ++.sp ++.BI "struct ibv_srq *ibv_create_xsrq(struct ibv_pd " "*pd" ", struct " ++.BI " ibv_srq_init_attr " "*srq_init_attr" ++); ++.sp ++.BI "int ibv_destroy_srq(struct ibv_srq " "*srq" ); ++.fi ++.SH "DESCRIPTION" ++.B ibv_create_xsrq() ++creates a shared receive queue (SRQ) associated with the protection domain ++.I pd\fR. ++The argument ++.I srq_init_attr ++is an ibv_srq_init_attr struct, as defined in . ++.PP ++.nf ++struct ibv_srq_init_attr { ++.in +8 ++void *srq_context; /* Associated context of the SRQ ++*/ ++struct ibv_srq_attr attr; /* SRQ attributes */ ++enum ibv_srq_type srq_type; /* Specifies type of SRQ to create ++*/ ++union { ++.in +8 ++struct { ++.in +8 ++struct ibv_xrcd *xrcd; /* XRC domain associated with an XRC SRQ */ ++struct ibv_cq *cq; /* completion queue for an XRC SRQ*/ ++.in -8 ++} xrc; /* Extended attributes for IBV_SRQT_XRC type SRQs */ ++.in -8 ++} ext; ++.in -8 ++}; ++.sp ++.nf ++struct ibv_srq_attr { ++.in +8 ++uint32_t max_wr; /* Requested max number of ++outstanding work requests (WRs) in the SRQ */ ++uint32_t max_sge; /* Requested max number of scatter ++elements per WR */ ++uint32_t srq_limit; /* The limit value of the SRQ ++(ignored for ibv_create_srq) */ ++.in -8 ++}; ++.fi ++.PP ++The function ++.B ibv_create_xsrq() ++will update the ++.I srq_init_attr ++struct with the original values of the SRQ that was created; the ++values of max_wr and max_sge will be greater than or equal to the ++values requested. ++.PP ++.B ibv_destroy_srq() ++destroys the SRQ ++.I srq\fR. ++.SH "RETURN VALUE" ++.B ibv_create_xsrq() ++returns a pointer to the created SRQ, or NULL if the request fails. ++.PP ++.B ibv_destroy_srq() ++returns 0 on success, or the value of errno on failure (which indicates ++the failure reason). ++.SH "NOTES" ++.B ibv_destroy_srq() ++fails if any queue pair is still associated with this SRQ. ++.SH "SEE ALSO" ++.BR ibv_alloc_pd (3), ++.BR ibv_create_cq (3), ++.BR ibv_open_xrcd (3), ++.BR ibv_modify_srq (3), ++.BR ibv_query_srq (3) ++.SH "AUTHORS" ++.TP ++Sean Hefty +diff -r f8684a1d3f02 man/ibv_xsrq_pingpong.1 +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/man/ibv_xsrq_pingpong.1 Mon Mar 20 14:32:42 2017 -0700 +@@ -0,0 +1,71 @@ ++.TH IBV_XSRQ_PINGPONG 1 "May 24, 2016" "libibverbs" "USER COMMANDS" ++ ++.SH NAME ++ibv_xsrq_pingpong \- simple InfiniBand shared receive queue test ++ ++.SH SYNOPSIS ++.B ibv_xsrq_pingpong ++[\-p port] [\-d device] [\-i ib port] [\-s size] [\-m mtu] [\-c clients] ++[\-n num_tests] [\-l sl] [\-e] \fBHOSTNAME\fR ++ ++.B ibv_xsrq_pingpong ++[\-p port] [\-d device] [\-i ib port] [\-s size] [\-m mtu] [\-c clients] ++[\-n num_tests] [\-l sl] [\-e] ++ ++.SH DESCRIPTION ++.PP ++Run a simple ping-pong test over InfiniBand via the extended reliable ++connected (XRC) transport service, using a shared receive queue (SRQ). ++ ++.SH OPTIONS ++ ++.PP ++.TP ++\fB\-p\fR, \fB\-\-port\fR=\fIPORT\fR ++use TCP port \fIPORT\fR for initial synchronization (default 18515) ++.TP ++\fB\-d\fR, \fB\-\-ib\-dev\fR=\fIDEVICE\fR ++use IB device \fIDEVICE\fR (default first device found) ++.TP ++\fB\-i\fR, \fB\-\-ib\-port\fR=\fIPORT\fR ++use IB port \fIPORT\fR (default port 1) ++.TP ++\fB\-s\fR, \fB\-\-size\fR=\fISIZE\fR ++ping-pong messages of size \fISIZE\fR (default 4096) ++.TP ++\fB\-m\fR, \fB\-\-mtu\fR=\fIMTU\fR ++use path mtu of size \fIMTU\fR (default 2048) ++.TP ++\fB\-c\fR, \fB\-\-clients\fR=\fICLIENTS\fR ++number of clients \fICLIENTS\fR (on server only, default 1) ++.TP ++\fB\-n\fR, \fB\-\-num\-tests\fR=\fINUM_TESTS\fR ++perform \fINUM_TESTS\fR tests per client (default 5) ++.TP ++\fB\-l\fR, \fB\-\-sl\fR=\fISL\fR ++use \fISL\fR as the service level value (default 0) ++.TP ++\fB\-e\fR, \fB\-\-events\fR ++sleep while waiting for work completion events (default is to poll for ++completions) ++ ++.SH SEE ALSO ++.BR ibv_rc_pingpong (1), ++.BR ibv_uc_pingpong (1), ++.BR ibv_ud_pingpong (1) ++.BR ibv_srq_pingpong (1) ++ ++.SH AUTHORS ++.TP ++Roland Dreier ++.RI < roland@purestorage.com > ++.TP ++Jarod Wilson ++.RI < jarod@redhat.com > ++ ++.SH BUGS ++The network synchronization between client and server instances is ++weak, and does not prevent incompatible options from being used on the ++two instances. The method used for retrieving work completions is not ++strictly correct, and race conditions may cause failures on some ++systems. +diff -r f8684a1d3f02 src/cmd.c +--- a/src/cmd.c Mon Nov 21 11:48:20 2016 -0800 ++++ b/src/cmd.c Mon Mar 20 14:32:42 2017 -0700 +@@ -815,6 +815,7 @@ + cmd->user_handle = (uintptr_t) qp; + + if (attr_ex->comp_mask & IBV_QP_INIT_ATTR_XRCD) { ++ /* XRC receive side */ + vxrcd = container_of(attr_ex->xrcd, struct verbs_xrcd, xrcd); + cmd->pd_handle = vxrcd->handle; + } else { +@@ -824,7 +825,9 @@ + cmd->pd_handle = attr_ex->pd->handle; + cmd->send_cq_handle = attr_ex->send_cq->handle; + +- if (attr_ex->qp_type != IBV_QPT_XRC_SEND) { ++ /* XRC sender doesn't have a receive cq */ ++ if (attr_ex->qp_type != IBV_QPT_XRC_SEND && ++ attr_ex->qp_type != IBV_QPT_XRC) { + cmd->recv_cq_handle = attr_ex->recv_cq->handle; + cmd->srq_handle = attr_ex->srq ? attr_ex->srq->handle : 0; + } +@@ -847,7 +850,8 @@ + #else + cmd->sq_sig_all = attr_ex->sq_sig_all; + #endif +- cmd->qp_type = attr_ex->qp_type; ++ cmd->qp_type = (attr_ex->qp_type == IBV_QPT_XRC) ? ++ IBV_QPT_XRC_SEND : attr_ex->qp_type; + cmd->is_srq = !!attr_ex->srq; + cmd->reserved = 0; + +@@ -1215,6 +1219,9 @@ + tmp->wr.ud.remote_qpn = i->wr.ud.remote_qpn; + tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey; + } else { ++ if (ibqp->qp_type == IBV_QPT_XRC_SEND) ++ tmp->qp_type.xrc.remote_srqn = ++ i->qp_type.xrc.remote_srqn; + switch (i->opcode) { + case IBV_WR_RDMA_WRITE: + case IBV_WR_RDMA_WRITE_WITH_IMM: +diff -r f8684a1d3f02 src/device.c +--- a/src/device.c Mon Nov 21 11:48:20 2016 -0800 ++++ b/src/device.c Mon Mar 20 14:32:42 2017 -0700 +@@ -261,6 +261,9 @@ + struct ibv_async_event *event) + { + struct ibv_kern_async_event ev; ++ struct verbs_context *vctx; ++ struct ibv_srq_legacy *ibv_srq_legacy = NULL; ++ struct ibv_qp *qp; + + if (read(context->async_fd, &ev, sizeof ev) != sizeof ev) + return -1; +@@ -281,11 +284,24 @@ + case IBV_EVENT_PATH_MIG_ERR: + case IBV_EVENT_QP_LAST_WQE_REACHED: + event->element.qp = (void *) (uintptr_t) ev.element; ++ qp = ibv_find_xrc_qp(event->element.qp->qp_num); ++ if (qp) { ++ /* This is an XRC receive QP created by the legacy API */ ++ event->event_type |= IBV_XRC_QP_EVENT_FLAG; ++ event->element.qp = NULL; ++ event->element.xrc_qp_num = qp->qp_num; ++ } + break; + + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_SRQ_LIMIT_REACHED: +- event->element.srq = (void *) (uintptr_t) ev.element; ++ vctx = verbs_get_ctx_op(context, drv_get_legacy_xrc); ++ if (vctx) ++ ibv_srq_legacy = ++ vctx->drv_get_legacy_xrc((void *) (uintptr_t) ev.element); ++ ++ event->element.srq = (ibv_srq_legacy) ? (void *)ibv_srq_legacy : ++ (void *) (uintptr_t) ev.element; + break; + case IBV_EVENT_GID_AVAIL: + case IBV_EVENT_GID_UNAVAIL: +@@ -310,6 +326,12 @@ + + void __ibv_ack_async_event(struct ibv_async_event *event) + { ++ int is_legacy_xrc = 0; ++ if (event->event_type & IBV_XRC_QP_EVENT_FLAG) { ++ event->event_type ^= IBV_XRC_QP_EVENT_FLAG; ++ is_legacy_xrc = 1; ++ } ++ + switch (event->event_type) { + case IBV_EVENT_CQ_ERR: + { +@@ -334,6 +356,16 @@ + { + struct ibv_qp *qp = event->element.qp; + ++ if (is_legacy_xrc) { ++ qp = ibv_find_xrc_qp(event->element.xrc_qp_num); ++ if (!qp || qp->qp_num != event->element.xrc_qp_num) { ++ fprintf(stderr, PFX "Warning: ibv_ack_async_event, " ++ "XRC qpn=%u wasn't found\n", ++ event->element.xrc_qp_num); ++ return; ++ } ++ } ++ + pthread_mutex_lock(&qp->mutex); + ++qp->events_completed; + pthread_cond_signal(&qp->cond); +@@ -347,6 +379,12 @@ + { + struct ibv_srq *srq = event->element.srq; + ++ if (srq->handle == LEGACY_XRC_SRQ_HANDLE) { ++ struct ibv_srq_legacy *ibv_srq_legacy = ++ (struct ibv_srq_legacy *) srq; ++ srq = ibv_srq_legacy->ibv_srq; ++ } ++ + pthread_mutex_lock(&srq->mutex); + ++srq->events_completed; + pthread_cond_signal(&srq->cond); +diff -r f8684a1d3f02 src/ibverbs.h +--- a/src/ibverbs.h Mon Nov 21 11:48:20 2016 -0800 ++++ b/src/ibverbs.h Mon Mar 20 14:32:42 2017 -0700 +@@ -85,6 +85,7 @@ + extern HIDDEN int abi_ver; + + HIDDEN int ibverbs_init(struct ibv_device ***list); ++HIDDEN struct ibv_qp *ibv_find_xrc_qp(uint32_t qpn); + + #define IBV_INIT_CMD(cmd, size, opcode) \ + do { \ +diff -r f8684a1d3f02 src/libibverbs.map +--- a/src/libibverbs.map Mon Nov 21 11:48:20 2016 -0800 ++++ b/src/libibverbs.map Mon Mar 20 14:32:42 2017 -0700 +@@ -124,4 +124,7 @@ + ibv_cmd_create_qp_ex; + ibv_cmd_open_qp; + ++ ibv_open_xrc_domain; ++ ibv_create_xrc_srq; ++ ibv_close_xrc_domain; + } IBVERBS_1.0; +diff -r f8684a1d3f02 src/verbs.c +--- a/src/verbs.c Mon Nov 21 11:48:20 2016 -0800 ++++ b/src/verbs.c Mon Mar 20 14:32:42 2017 -0700 +@@ -41,6 +41,7 @@ + #include + #include + #include ++#include + #if defined(__SVR4) && defined(__sun) + #include + #include +@@ -854,3 +855,208 @@ + return qp->context->ops.detach_mcast(qp, gid, lid); + } + default_symver(__ibv_detach_mcast, ibv_detach_mcast); ++ ++ ++/* XRC compatibility layer */ ++struct ibv_xrc_domain *ibv_open_xrc_domain(struct ibv_context *context, ++ int fd, int oflag) ++{ ++ ++ struct ibv_xrcd *ibv_xrcd; ++ struct ibv_xrcd_init_attr xrcd_init_attr; ++ ++ memset(&xrcd_init_attr, 0, sizeof(xrcd_init_attr)); ++ ++ xrcd_init_attr.fd = fd; ++ xrcd_init_attr.oflags = oflag; ++ ++ xrcd_init_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | ++ IBV_XRCD_INIT_ATTR_OFLAGS; ++ ++ ibv_xrcd = ibv_open_xrcd(context, &xrcd_init_attr); ++ if (!ibv_xrcd) ++ return NULL; ++ ++ return (struct ibv_xrc_domain *)ibv_xrcd; ++ ++} ++ ++ ++struct ibv_srq *ibv_create_xrc_srq(struct ibv_pd *pd, ++ struct ibv_xrc_domain *xrc_domain, ++ struct ibv_cq *xrc_cq, ++ struct ibv_srq_init_attr *srq_init_attr) ++{ ++ ++ struct ibv_srq_init_attr_ex ibv_srq_init_attr_ex; ++ struct ibv_srq_legacy *ibv_srq_legacy; ++ struct ibv_srq *ibv_srq; ++ uint32_t xrc_srq_num; ++ struct verbs_context *vctx; ++ ++ vctx = verbs_get_ctx_op(pd->context, drv_set_legacy_xrc); ++ if (!vctx) { ++ errno = ENOSYS; ++ return NULL; ++ } ++ memset(&ibv_srq_init_attr_ex, 0, sizeof ibv_srq_init_attr_ex); ++ ++ ibv_srq_init_attr_ex.xrcd = (struct ibv_xrcd *)xrc_domain; ++ ibv_srq_init_attr_ex.comp_mask = IBV_SRQ_INIT_ATTR_XRCD | ++ IBV_SRQ_INIT_ATTR_TYPE | ++ IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD; ++ ++ ibv_srq_init_attr_ex.cq = xrc_cq; ++ ibv_srq_init_attr_ex.pd = pd; ++ ibv_srq_init_attr_ex.srq_type = IBV_SRQT_XRC; ++ ++ ibv_srq_init_attr_ex.attr.max_sge = srq_init_attr->attr.max_sge; ++ ibv_srq_init_attr_ex.attr.max_wr = srq_init_attr->attr.max_wr; ++ ibv_srq_init_attr_ex.attr.srq_limit = srq_init_attr->attr.srq_limit; ++ ibv_srq_init_attr_ex.srq_context = srq_init_attr->srq_context; ++ ++ ibv_srq = ibv_create_srq_ex(pd->context, &ibv_srq_init_attr_ex); ++ if (!ibv_srq) ++ return NULL; ++ ++ if (ibv_srq->handle == LEGACY_XRC_SRQ_HANDLE) { ++ ++ struct ibv_srq *ibv_srq_tmp = ibv_srq; ++ int ret; ++ ++ ibv_srq = ibv_create_srq_ex(pd->context, &ibv_srq_init_attr_ex); ++ ++ ret = ibv_destroy_srq(ibv_srq_tmp); ++ if (ret) { ++ fprintf(stderr, PFX "ibv_create_xrc_srq, " ++ "fail to destroy intermediate srq\n"); ++ return NULL; ++ } ++ ++ if (!ibv_srq) ++ return NULL; ++ ++ if (ibv_srq->handle == LEGACY_XRC_SRQ_HANDLE) { ++ ret = ibv_destroy_srq(ibv_srq); ++ if (ret) ++ fprintf(stderr, PFX "ibv_create_xrc_srq, " ++ "fail to destroy intermediate srq\n"); ++ errno = EAGAIN; ++ return NULL; ++ } ++ } ++ ++ ibv_srq_legacy = calloc(1, sizeof(*ibv_srq_legacy)); ++ if (!ibv_srq_legacy) { ++ errno = ENOMEM; ++ goto err; ++ } ++ ++ if (ibv_get_srq_num(ibv_srq, &xrc_srq_num)) ++ goto err_free; ++ ++ ibv_srq_legacy->ibv_srq = ibv_srq; ++ ibv_srq_legacy->xrc_srq_num = xrc_srq_num; ++ ++ /* Setting the bin compat fields */ ++ ibv_srq_legacy->xrc_srq_num_bin_compat = xrc_srq_num; ++ ibv_srq_legacy->xrc_domain_bin_compat = xrc_domain; ++ ibv_srq_legacy->xrc_cq_bin_compat = xrc_cq; ++ ibv_srq_legacy->context = pd->context; ++ ibv_srq_legacy->srq_context = srq_init_attr->srq_context; ++ ibv_srq_legacy->pd = pd; ++ /* ++ * Set an indication that this is a legacy structure. For legacy structures, ++ * we should use the internal ibv_srq. ++ */ ++ ibv_srq_legacy->handle = LEGACY_XRC_SRQ_HANDLE; ++ ibv_srq_legacy->xrc_domain = xrc_domain; ++ ibv_srq_legacy->xrc_cq = xrc_cq; ++ ibv_srq_legacy->events_completed = 0; ++ ++ srq_init_attr->attr.max_wr = ibv_srq_init_attr_ex.attr.max_wr; ++ srq_init_attr->attr.max_sge = ibv_srq_init_attr_ex.attr.max_sge; ++ ++ vctx->drv_set_legacy_xrc(ibv_srq, ibv_srq_legacy); ++ return (struct ibv_srq *)(ibv_srq_legacy); ++ ++err_free: ++ free(ibv_srq_legacy); ++err: ++ ibv_destroy_srq(ibv_srq); ++ return NULL; ++ ++} ++ ++static pthread_mutex_t xrc_tree_mutex = PTHREAD_MUTEX_INITIALIZER; ++static void *ibv_xrc_qp_tree; ++ ++static int xrc_qp_compare(const void *a, const void *b) ++{ ++ ++ if ((*(uint32_t *) a) < (*(uint32_t *) b)) ++ return -1; ++ else if ((*(uint32_t *) a) > (*(uint32_t *) b)) ++ return 1; ++ else ++ return 0; ++ ++} ++ ++struct ibv_qp *ibv_find_xrc_qp(uint32_t qpn) ++{ ++ uint32_t **qpn_ptr; ++ struct ibv_qp *ibv_qp = NULL; ++ ++ pthread_mutex_lock(&xrc_tree_mutex); ++ qpn_ptr = tfind(&qpn, &ibv_xrc_qp_tree, xrc_qp_compare); ++ if (!qpn_ptr) ++ goto end; ++ ++ ibv_qp = container_of(*qpn_ptr, struct ibv_qp, qp_num); ++ ++end: ++ pthread_mutex_unlock(&xrc_tree_mutex); ++ return ibv_qp; ++} ++ ++static int ibv_clear_xrc_qp(uint32_t qpn) ++{ ++ uint32_t **qpn_ptr; ++ int ret = 0; ++ ++ pthread_mutex_lock(&xrc_tree_mutex); ++ qpn_ptr = tdelete(&qpn, &ibv_xrc_qp_tree, xrc_qp_compare); ++ if (!qpn_ptr) ++ ret = EINVAL; ++ ++ pthread_mutex_unlock(&xrc_tree_mutex); ++ return ret; ++} ++ ++static int ibv_store_xrc_qp(struct ibv_qp *qp) ++{ ++ uint32_t **qpn_ptr; ++ int ret = 0; ++ ++ if (ibv_find_xrc_qp(qp->qp_num)) { ++ fprintf(stderr, PFX "ibv_store_xrc_qp failed, qpn=%u is already stored\n", ++ qp->qp_num); ++ return EEXIST; ++ } ++ ++ pthread_mutex_lock(&xrc_tree_mutex); ++ qpn_ptr = tsearch(&qp->qp_num, &ibv_xrc_qp_tree, xrc_qp_compare); ++ if (!qpn_ptr) ++ ret = EINVAL; ++ ++ pthread_mutex_unlock(&xrc_tree_mutex); ++ return ret; ++ ++} ++ ++int ibv_close_xrc_domain(struct ibv_xrc_domain *d) ++{ ++ struct ibv_xrcd *ibv_xrcd = (struct ibv_xrcd *)d; ++ return ibv_close_xrcd(ibv_xrcd); ++} diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/libmlx4/patches/004-libmlx4-blueflame.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/open-fabrics/libmlx4/patches/004-libmlx4-blueflame.patch Thu Apr 13 20:30:48 2017 -0700 @@ -0,0 +1,20 @@ +#This patch was developed both in-house and from outside. We plan to submit it +#upstream, but do not yet have a target date for doing so +# +# HG changeset patch +# Parent 710c2a999a0a5469c1dd4ea1c21b279debb26a3b +24617163 Missing code in libmlx4 when upgrading from ofed-1.5.3 to ofed-3.18 + +diff -r 710c2a999a0a src/qp.c +--- a/src/qp.c Fri Sep 16 10:24:06 2016 -0700 ++++ b/src/qp.c Tue Sep 20 08:12:11 2016 -0700 +@@ -405,7 +405,8 @@ + out: + ctx = to_mctx(ibqp->context); + +- if (nreq == 1 && inl && size > 1 && size <= ctx->bf_buf_size / 16) { ++ if (ctx->ibv_ctx.device->blueflame_enabled && nreq == 1 && inl && ++ size > 1 && size <= ctx->bf_buf_size / 16) { + ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8); + *(uint32_t *) ctrl->reserved |= qp->doorbell_qpn; + /* diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/libmlx4/patches/004-libmlx4-remove-xrc.patch --- a/components/open-fabrics/libmlx4/patches/004-libmlx4-remove-xrc.patch Thu Apr 13 13:20:29 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -#This patch was developed both in-house and from outside. We plan to submit it -#upstream, but do not yet have a target date for doing so -# -# HG changeset patch -# Parent 56ebd417e6ed24cfa11c23bd564417ec7d2e5812 -Disable xrc routines in libmlx4 - -diff -r 56ebd417e6ed src/srq.c ---- a/src/srq.c Wed Dec 02 15:27:52 2015 -0800 -+++ b/src/srq.c Wed Dec 02 15:47:02 2015 -0800 -@@ -286,6 +286,13 @@ - struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context, - struct ibv_srq_init_attr_ex *attr_ex) - { -+#if defined(__SVR4) && defined(__sun) -+ /* -+ * Not supported by Solaris kernel driver. When/if supported -+ * this routine will need to be ported. -+ */ -+ return NULL; -+#else - struct mlx4_create_xsrq cmd; - struct mlx4_create_srq_resp resp; - struct mlx4_srq *srq; -@@ -362,10 +369,18 @@ - err: - free(srq); - return NULL; -+#endif - } - - int mlx4_destroy_xrc_srq(struct ibv_srq *srq) - { -+#if defined(__SVR4) && defined(__sun) -+ /* -+ * Not supported by Solaris kernel driver. When/if supported -+ * this routine will need to be ported. -+ */ -+ return NULL; -+#else - struct mlx4_context *mctx = to_mctx(srq->context); - struct mlx4_srq *msrq = to_msrq(srq); - struct mlx4_cq *mcq; -@@ -391,4 +406,5 @@ - free(msrq); - - return 0; -+#endif - } -diff -r 56ebd417e6ed src/verbs.c ---- a/src/verbs.c Wed Dec 02 15:27:52 2015 -0800 -+++ b/src/verbs.c Wed Dec 02 15:47:02 2015 -0800 -@@ -151,6 +151,13 @@ - struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context, - struct ibv_xrcd_init_attr *attr) - { -+#if defined(__SVR4) && defined(__sun) -+ /* -+ * Not supported by Solaris kernel driver. When/if supported -+ * this routine will need to be ported. -+ */ -+ return NULL; -+#else - struct ibv_open_xrcd cmd; - struct ibv_open_xrcd_resp resp; - struct verbs_xrcd *xrcd; -@@ -170,10 +177,18 @@ - err: - free(xrcd); - return NULL; -+#endif - } - - int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd) - { -+#if defined(__SVR4) && defined(__sun) -+ /* -+ * Not supported by Solaris kernel driver. When/if supported -+ * this routine will need to be ported. -+ */ -+ return NULL; -+#else - struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd); - int ret; - -@@ -182,6 +197,7 @@ - free(xrcd); - - return ret; -+#endif - } - - struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length, -@@ -1031,6 +1047,13 @@ - - struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr) - { -+#if defined(__SVR4) && defined(__sun) -+ /* -+ * Not supported by Solaris kernel driver. When/if supported -+ * this routine will need to be ported. -+ */ -+ return NULL; -+#else - struct ibv_open_qp cmd; - struct ibv_create_qp_resp resp; - struct mlx4_qp *qp; -@@ -1050,6 +1073,7 @@ - err: - free(qp); - return NULL; -+#endif - } - - int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/libmlx4/patches/005-libmlx4-blueflame.patch --- a/components/open-fabrics/libmlx4/patches/005-libmlx4-blueflame.patch Thu Apr 13 13:20:29 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -#This patch was developed both in-house and from outside. We plan to submit it -#upstream, but do not yet have a target date for doing so -# -# HG changeset patch -# Parent 710c2a999a0a5469c1dd4ea1c21b279debb26a3b -24617163 Missing code in libmlx4 when upgrading from ofed-1.5.3 to ofed-3.18 - -diff -r 710c2a999a0a src/qp.c ---- a/src/qp.c Fri Sep 16 10:24:06 2016 -0700 -+++ b/src/qp.c Tue Sep 20 08:12:11 2016 -0700 -@@ -405,7 +405,8 @@ - out: - ctx = to_mctx(ibqp->context); - -- if (nreq == 1 && inl && size > 1 && size <= ctx->bf_buf_size / 16) { -+ if (ctx->ibv_ctx.device->blueflame_enabled && nreq == 1 && inl && -+ size > 1 && size <= ctx->bf_buf_size / 16) { - ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8); - *(uint32_t *) ctrl->reserved |= qp->doorbell_qpn; - /* diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/libmlx4/patches/005-libmlx4-xrc.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/open-fabrics/libmlx4/patches/005-libmlx4-xrc.patch Thu Apr 13 20:30:48 2017 -0700 @@ -0,0 +1,423 @@ +#This patch was developed both in-house and from outside. We plan to submit it +#upstream, but do not yet have a target date for doing so +# +# HG changeset patch +# Parent 90d898abcac39d3fc4a631a678f0bb7bbe28d877 +25759055 OFUV (Userland) support for XRC APIs + +diff -r 90d898abcac3 src/mlx4.c +--- a/src/mlx4.c Mon Nov 21 11:48:10 2016 -0800 ++++ b/src/mlx4.c Mon Mar 20 14:22:58 2017 -0700 +@@ -274,6 +274,8 @@ + verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp); + verbs_set_ctx_op(verbs_ctx, drv_ibv_create_flow, ibv_cmd_create_flow); + verbs_set_ctx_op(verbs_ctx, drv_ibv_destroy_flow, ibv_cmd_destroy_flow); ++ verbs_set_ctx_op(verbs_ctx, drv_set_legacy_xrc, mlx4_set_legacy_xrc); ++ verbs_set_ctx_op(verbs_ctx, drv_get_legacy_xrc, mlx4_get_legacy_xrc); + + return 0; + +diff -r 90d898abcac3 src/mlx4.h +--- a/src/mlx4.h Mon Nov 21 11:48:10 2016 -0800 ++++ b/src/mlx4.h Mon Mar 20 14:22:58 2017 -0700 +@@ -233,6 +233,7 @@ + uint32_t *db; + uint16_t counter; + uint8_t ext_srq; ++ struct ibv_srq_legacy *ibv_srq_legacy; + }; + + struct mlx4_wq { +@@ -464,4 +465,7 @@ + struct mlx4_ah *ah); + void mlx4_free_av(struct mlx4_ah *ah); + ++void *mlx4_get_legacy_xrc(struct ibv_srq *srq); ++void mlx4_set_legacy_xrc(struct ibv_srq *srq, void *legacy_xrc_srq); ++ + #endif /* MLX4_H */ +diff -r 90d898abcac3 src/qp.c +--- a/src/qp.c Mon Nov 21 11:48:10 2016 -0800 ++++ b/src/qp.c Mon Mar 20 14:22:58 2017 -0700 +@@ -247,6 +247,7 @@ + + switch (ibqp->qp_type) { + case IBV_QPT_XRC_SEND: ++ case IBV_QPT_XRC: + ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr); + /* fall through */ + case IBV_QPT_RC: +@@ -559,6 +560,7 @@ + break; + + case IBV_QPT_XRC_SEND: ++ case IBV_QPT_XRC: + case IBV_QPT_RC: + size += sizeof (struct mlx4_wqe_raddr_seg); + /* +@@ -596,9 +598,11 @@ + qp->buf.buf = qpbuf; + qp->buf.length = buflen; + +- qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t)); +- if (!qp->sq.wrid) +- return -1; ++ if (qp->sq.wqe_cnt) { ++ qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t)); ++ if (!qp->sq.wrid) ++ return -1; ++ } + + if (qp->rq.wqe_cnt) { + qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t)); +@@ -628,16 +632,20 @@ + qp->sq.offset = 0; + } + +- if ((long int)qp->buf.length < (long int)qp->buf_size) { +- fprintf(stderr, PFX "QP kernel buffer size %lu < user buf " +- "size %d\n", (unsigned long)qp->buf.length, qp->buf_size); +- } +- if ((!rq_off && qp->rq.offset) || (!sq_off && qp->sq.offset)) { +- fprintf(stderr, PFX "QP kernel and user out of sync on " +- "buffer order\n"); +- } ++ if (qp->buf_size) { ++ if ((long int)qp->buf.length < (long int)qp->buf_size) { ++ fprintf(stderr, PFX "QP kernel buffer size %lu < user " ++ "buf size %d\n", (unsigned long)qp->buf.length, ++ qp->buf_size); ++ } ++ if ((!rq_off && qp->rq.offset) || (!sq_off && qp->sq.offset)) { ++ fprintf(stderr, PFX "QP kernel and user out of sync on " ++ "buffer order\n"); ++ } + +- memset(qp->buf.buf, 0, qp->buf_size); ++ memset(qp->buf.buf, 0, qp->buf_size); ++ } else ++ qp->buf.buf = NULL; + return 0; + } + #endif +@@ -705,6 +713,7 @@ + break; + + case IBV_QPT_XRC_SEND: ++ case IBV_QPT_XRC: + case IBV_QPT_UC: + case IBV_QPT_RC: + wqe_size -= sizeof (struct mlx4_wqe_raddr_seg); +diff -r 90d898abcac3 src/srq.c +--- a/src/srq.c Mon Nov 21 11:48:10 2016 -0800 ++++ b/src/srq.c Mon Mar 20 14:22:58 2017 -0700 +@@ -66,13 +66,17 @@ + struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) + { +- struct mlx4_srq *srq = to_msrq(ibsrq); ++ struct mlx4_srq *srq; + struct mlx4_wqe_srq_next_seg *next; + struct mlx4_wqe_data_seg *scat; + int err = 0; + int nreq; + int i; + ++ if (ibsrq->handle == LEGACY_XRC_SRQ_HANDLE) ++ ibsrq = (struct ibv_srq *)(((struct ibv_srq_legacy *) ibsrq)->ibv_srq); ++ ++ srq = to_msrq(ibsrq); + pthread_spin_lock(&srq->lock); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { +@@ -290,6 +294,9 @@ + struct mlx4_create_srq_resp resp; + struct mlx4_srq *srq; + int ret; ++#if defined(__SVR4) && defined(__sun) ++ void *srqbuf; ++#endif + + /* Sanity check SRQ size before proceeding */ + if (attr_ex->attr.max_wr > 1 << 16 || attr_ex->attr.max_sge > 64) +@@ -342,9 +349,67 @@ + attr_ex, + &cmd.ibv_cmd, sizeof cmd, + &resp.ibv_resp, sizeof resp); ++ ++#if defined(__SVR4) && defined(__sun) ++ if (ret) { ++ goto err; ++ } ++ ++ /* ++ * The kernel driver passes back mmap information for mapping the ++ * SRQ work queue memory it allocated and the doorbell for ++ * for posting. ++ */ ++ if (resp.mdd.msrq_rev < 1) { ++ fprintf(stderr, PFX "libmlx4_create_xrc_srq libmlx4/hermon umap " ++ "rev mismatch (kernel rev=%d)\n", resp.mdd.msrq_rev); ++ goto err_destroy; ++ } ++ ++ srqbuf = mmap64((void *)0, resp.mdd.msrq_maplen, (PROT_READ | PROT_WRITE), ++ MAP_SHARED, attr_ex->pd->context->mmap_fd, resp.mdd.msrq_mapoffset); ++ ++ if (srqbuf == MAP_FAILED) { ++ goto err_destroy; ++ } ++ ++ srq->buf.buf = srqbuf; ++ srq->buf.length = resp.mdd.msrq_maplen; ++ srq->max = resp.ibv_resp.max_wr; ++ srq->max_gs = resp.ibv_resp.max_sge; ++ srq->verbs_srq.srq_num = srq->srqn = resp.mdd.msrq_srqnum; ++ srq->counter = 0; ++ ++ srq->db = mlx4_alloc_db(to_mctx(attr_ex->pd->context), ++ resp.mdd.msrq_rdbr_mapoffset, ++ resp.mdd.msrq_rdbr_maplen, ++ resp.mdd.msrq_rdbr_offset); ++ if (srq->db == NULL) { ++ goto err_unmap; ++ } ++ ++ /* ++ * The following call only initializes memory and control structures, ++ * it utilizes the memory allocated by the kernel. ++ * It also allocates the srq->wrid memory. ++ */ ++ if (mlx4_set_srq_buf(attr_ex->pd, srq, resp.mdd.msrq_wqesz, ++ resp.mdd.msrq_numwqe)) { ++ goto err_db; ++ } ++ ++ /* ++ * The returned max wr will have been rounded up to the nearest ++ * power of 2, subtracting 1 from that and reporting that value ++ * as the max will give us the required free WR in the queue, as ++ * in OFED. ++ */ ++ attr_ex->attr.max_wr -= 1; ++#else + if (ret) + goto err_db; + ++#endif + ret = mlx4_store_xsrq(&to_mctx(context)->xsrq_table, + srq->verbs_srq.srq_num, srq); + if (ret) +@@ -352,13 +417,35 @@ + + return &srq->verbs_srq.srq; + +-err_destroy: +- ibv_cmd_destroy_srq(&srq->verbs_srq.srq); + err_db: + mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, srq->db); ++ ++#if defined(__SVR4) && defined(__sun) ++ if (srq->wrid) ++ free(srq->wrid); ++err_unmap: ++ mlx4_free_buf(&srq->buf); ++ ++err_destroy: ++ /* ++ * Calling ibv_cmd_destroy_srq() will try and take the ibv_srq ++ * mutex that is initialised by the ibv_create_srq() entry point ++ * that called us AFTER we return, so its not initialised yet. ++ * So initialised it here so the destroy call doesn't hang. ++ */ ++ pthread_mutex_init(&(srq->verbs_srq.srq.mutex), NULL); ++ pthread_cond_init(&(srq->verbs_srq.srq.cond), NULL); ++ srq->verbs_srq.srq.events_completed = 0; ++ ++ ibv_cmd_destroy_srq(&srq->verbs_srq.srq); ++#else ++err_destroy: ++ ibv_cmd_destroy_srq(&srq->verbs_srq.srq); + err_free: + free(srq->wrid); + mlx4_free_buf(&srq->buf); ++#endif ++ + err: + free(srq); + return NULL; +diff -r 90d898abcac3 src/verbs.c +--- a/src/verbs.c Mon Nov 21 11:48:10 2016 -0800 ++++ b/src/verbs.c Mon Mar 20 14:22:58 2017 -0700 +@@ -549,6 +549,21 @@ + return 0; + } + ++void *mlx4_get_legacy_xrc(struct ibv_srq *srq) ++{ ++ struct mlx4_srq *msrq = to_msrq(srq); ++ ++ return msrq->ibv_srq_legacy; ++} ++ ++void mlx4_set_legacy_xrc(struct ibv_srq *srq, void *legacy_xrc_srq) ++{ ++ struct mlx4_srq *msrq = to_msrq(srq); ++ ++ msrq->ibv_srq_legacy = legacy_xrc_srq; ++ return; ++} ++ + struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *attr) + { +@@ -564,7 +579,7 @@ + if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64) + return NULL; + +- srq = malloc(sizeof *srq); ++ srq = calloc(1, sizeof *srq); + if (!srq) + return NULL; + +@@ -724,6 +739,9 @@ + { + struct ibv_modify_srq cmd; + ++ if (srq->handle == LEGACY_XRC_SRQ_HANDLE) ++ srq = (struct ibv_srq *)(((struct ibv_srq_legacy *) srq)->ibv_srq); ++ + #if !(defined(__SVR4) && defined(__sun)) + return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd); + #else +@@ -741,6 +759,9 @@ + { + struct ibv_query_srq cmd; + ++ if (srq->handle == LEGACY_XRC_SRQ_HANDLE) ++ srq = (struct ibv_srq *)(((struct ibv_srq_legacy *) srq)->ibv_srq); ++ + #if !(defined(__SVR4) && defined(__sun)) + return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd); + #else +@@ -757,9 +778,23 @@ + int mlx4_destroy_srq(struct ibv_srq *srq) + { + int ret; ++ struct ibv_srq *legacy_srq = NULL; + +- if (to_msrq(srq)->ext_srq) +- return mlx4_destroy_xrc_srq(srq); ++ if (srq->handle == LEGACY_XRC_SRQ_HANDLE) { ++ legacy_srq = srq; ++ srq = (struct ibv_srq *)(((struct ibv_srq_legacy *) srq)->ibv_srq); ++ } ++ ++ if (to_msrq(srq)->ext_srq) { ++ ret = mlx4_destroy_xrc_srq(srq); ++ if (ret) ++ return ret; ++ ++ if (legacy_srq) ++ free(legacy_srq); ++ ++ return 0; ++ } + + ret = ibv_cmd_destroy_srq(srq); + if (ret) +@@ -783,7 +818,7 @@ + struct ibv_create_qp_resp resp; + #else + struct mlx4_create_qp_resp resp; +- void *qpbuf; ++ void *qpbuf = NULL; + #endif + + /* Sanity check QP size before proceeding */ +@@ -813,7 +848,8 @@ + } + + if (attr->srq || attr->qp_type == IBV_QPT_XRC_SEND || +- attr->qp_type == IBV_QPT_XRC_RECV) { ++ attr->qp_type == IBV_QPT_XRC_RECV || ++ attr->qp_type == IBV_QPT_XRC) { + attr->cap.max_recv_wr = qp->rq.wqe_cnt = attr->cap.max_recv_sge = 0; + } else { + #if !(defined(__SVR4) && defined(__sun)) +@@ -900,18 +936,22 @@ + "rev mismatch (kernel rev=%d)\n", resp.mdd.mqp_rev); + goto err_destroy; + } +- qpbuf = mmap64((void *)0, resp.mdd.mqp_maplen, (PROT_READ | PROT_WRITE), +- MAP_SHARED, context->mmap_fd, resp.mdd.mqp_mapoffset); +- +- if (qpbuf == MAP_FAILED) +- goto err_destroy; + +- /* +- * Need to set qp->buf here in case alloc_db fails then +- * we'll call mlx4_free_buf() to umap. +- */ +- qp->buf.buf = qpbuf; +- qp->buf.length = resp.mdd.mqp_maplen; ++ if (resp.mdd.mqp_maplen != 0) { ++ qpbuf = mmap64((void *)0, resp.mdd.mqp_maplen, ++ (PROT_READ | PROT_WRITE), MAP_SHARED, context->mmap_fd, ++ resp.mdd.mqp_mapoffset); ++ ++ if (qpbuf == MAP_FAILED) ++ goto err_destroy; ++ ++ /* ++ * Need to set qp->buf here in case alloc_db fails then ++ * we'll call mlx4_free_buf() to umap. ++ */ ++ qp->buf.buf = qpbuf; ++ qp->buf.length = resp.mdd.mqp_maplen; ++ } + + if (attr->cap.max_recv_sge) { + qp->db = mlx4_alloc_db(to_mctx(context), +@@ -934,10 +974,12 @@ + qp->sq_spare_wqes = resp.mdd.mqp_sq_headroomwqes; + qp->sq.wqe_cnt = resp.mdd.mqp_sq_numwqe; + +- if (attr->srq) +- qp->rq.wqe_cnt = 0; ++ if (attr->srq || attr->qp_type == IBV_QPT_XRC || ++ attr->qp_type == IBV_QPT_XRC_SEND || ++ attr->qp_type == IBV_QPT_XRC_RECV) ++ qp->rq.wqe_cnt = 0; + else +- qp->rq.wqe_cnt = resp.mdd.mqp_rq_numwqe; ++ qp->rq.wqe_cnt = resp.mdd.mqp_rq_numwqe; + + if (mlx4_set_qp_buf(attr->pd, qp, qpbuf, resp.mdd.mqp_maplen, + resp.mdd.mqp_rq_wqesz, resp.mdd.mqp_rq_off, +@@ -1020,12 +1062,23 @@ + struct ibv_qp_init_attr_ex attr_ex; + struct ibv_qp *qp; + +- memcpy(&attr_ex, attr, sizeof *attr); ++ /* We should copy below only the shared fields excluding the xrc_domain field. ++ * Otherwise we may have an ABI issue with applications that were compiled ++ * without the xrc_domain field. The xrc_domain any way has no affect in ++ * the sender side, no need to copy in/out. ++ */ ++ int init_attr_base_size = offsetof(struct ibv_qp_init_attr, ++ xrc_domain); ++ ++ memset(&attr_ex, 0, sizeof(attr_ex)); /* pre-set all fields to zero */ ++ /* copying only shared fields */ ++ memcpy(&attr_ex, attr, init_attr_base_size); + attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD; + attr_ex.pd = pd; ++ + qp = mlx4_create_qp_ex(pd->context, &attr_ex); + if (qp) +- memcpy(attr, &attr_ex, sizeof *attr); ++ memcpy(attr, &attr_ex, init_attr_base_size); + return qp; + } + diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/librdmacm/patches/004-librdmacm-fix-core-dump-in-rping-client-without-running-server.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/open-fabrics/librdmacm/patches/004-librdmacm-fix-core-dump-in-rping-client-without-running-server.patch Thu Apr 13 20:30:48 2017 -0700 @@ -0,0 +1,63 @@ +#This patch was developed both in-house and from outside. We plan to submit it +#upstream, but do not yet have a target date for doing so +# HG changeset patch +# Parent 68a7383fdd511ce1ea9a0dfc24404b3d74e67055 + +diff -r 68a7383fdd51 examples/rping.c +--- a/examples/rping.c Tue Mar 15 19:57:38 2016 -0700 ++++ b/examples/rping.c Thu Mar 17 00:08:03 2016 -0700 +@@ -88,6 +88,7 @@ + RDMA_READ_COMPLETE, + RDMA_WRITE_ADV, + RDMA_WRITE_COMPLETE, ++ CALLING_DISCONNECT, + DISCONNECTED, + ERROR + }; +@@ -290,6 +291,20 @@ + + if (wc.status) { + if (wc.status == IBV_WC_WR_FLUSH_ERR) { ++ /* ++ * FLUSH Error can be polled before RDMA-CM ++ * DISCONNECT is notified. Ensure that cb_state ++ * is set appropriately in such a case. ++ * sleep for sometime if Disconnect has not ++ * been called. The FLUSH WR can be because ++ * the remote end initiated the disconnect. ++ */ ++ if (!(cb->state == CALLING_DISCONNECT || cb->state == DISCONNECTED)) ++ sleep(2); ++ ++ if (cb->state == DISCONNECTED) ++ return (0); ++ + flushed = 1; + continue; + +@@ -824,7 +839,9 @@ + } + + rping_test_server(cb); ++ cb->state = CALLING_DISCONNECT; + rdma_disconnect(cb->child_cm_id); ++ pthread_cancel(cb->cqthread); + pthread_join(cb->cqthread, NULL); + rping_free_buffers(cb); + rping_free_qp(cb); +@@ -943,6 +960,7 @@ + + ret = 0; + err3: ++ cb->state = CALLING_DISCONNECT; + rdma_disconnect(cb->child_cm_id); + pthread_join(cb->cqthread, NULL); + rdma_destroy_id(cb->child_cm_id); +@@ -1122,6 +1140,7 @@ + + ret = 0; + err4: ++ cb->state = CALLING_DISCONNECT; + rdma_disconnect(cb->cm_id); + err3: + pthread_join(cb->cqthread, NULL); diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/librdmacm/patches/004-librdmacm-remove-comments-for-XRC-support-from-man-pages.patch --- a/components/open-fabrics/librdmacm/patches/004-librdmacm-remove-comments-for-XRC-support-from-man-pages.patch Thu Apr 13 13:20:29 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -#This patch was developed both in-house and from outside. We plan to submit it -#upstream, but do not yet have a target date for doing so -# -# HG changeset patch -# Parent 9b6cc8c68b03b28d0b674dbf0fec4e6e143bd185 -Added comments for XRC support not available in the man pages - -diff -r 9b6cc8c68b03 man/rdma_create_id.3 ---- a/man/rdma_create_id.3 Thu Nov 19 11:19:08 2015 -0800 -+++ b/man/rdma_create_id.3 Fri Dec 04 17:46:54 2015 -0800 -@@ -48,7 +48,7 @@ - Provides unreliable, connectionless QP communication. Supports both datagram - and multicast communication. - .IP RDMA_PS_IB --Provides for any IB services (UD, UC, RC, XRC, etc.). -+Provides for any IB services (UD, UC, RC, etc.). Currently, it does not support XRC. - .SH "SEE ALSO" - rdma_cm(7), rdma_create_event_channel(3), rdma_destroy_id(3), rdma_get_devices(3), - rdma_bind_addr(3), rdma_resolve_addr(3), rdma_connect(3), rdma_listen(3), -diff -r 9b6cc8c68b03 man/rdma_create_srq.3 ---- a/man/rdma_create_srq.3 Thu Nov 19 11:19:08 2015 -0800 -+++ b/man/rdma_create_srq.3 Fri Dec 04 17:46:54 2015 -0800 -@@ -35,10 +35,12 @@ - allocated by the rdma_cm for the SRQ, along with corresponding completion - channels. Completion channels and CQ data created by the rdma_cm are - exposed to the user through the rdma_cm_id structure. -+Currently, the creation of XRC SRQs are not supported by this function in -+the Solaris specific implementation. - .P - The actual capabilities and properties of the created SRQ will be - returned to the user through the attr parameter. An rdma_cm_id --may only be associated with a single SRQ. -+may only be associated with a single SRQ. - .SH "SEE ALSO" - rdma_bind_addr(3), rdma_resolve_addr(3), rdma_create_ep(3), - rdma_destroy_srq(3), ibv_create_srq(3), ibv_create_xsrq(3) diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/librdmacm/patches/005-librdmacm-fix-core-dump-in-rping-client-without-running-server.patch --- a/components/open-fabrics/librdmacm/patches/005-librdmacm-fix-core-dump-in-rping-client-without-running-server.patch Thu Apr 13 13:20:29 2017 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -#This patch was developed both in-house and from outside. We plan to submit it -#upstream, but do not yet have a target date for doing so -# HG changeset patch -# Parent 68a7383fdd511ce1ea9a0dfc24404b3d74e67055 - -diff -r 68a7383fdd51 examples/rping.c ---- a/examples/rping.c Tue Mar 15 19:57:38 2016 -0700 -+++ b/examples/rping.c Thu Mar 17 00:08:03 2016 -0700 -@@ -88,6 +88,7 @@ - RDMA_READ_COMPLETE, - RDMA_WRITE_ADV, - RDMA_WRITE_COMPLETE, -+ CALLING_DISCONNECT, - DISCONNECTED, - ERROR - }; -@@ -290,6 +291,20 @@ - - if (wc.status) { - if (wc.status == IBV_WC_WR_FLUSH_ERR) { -+ /* -+ * FLUSH Error can be polled before RDMA-CM -+ * DISCONNECT is notified. Ensure that cb_state -+ * is set appropriately in such a case. -+ * sleep for sometime if Disconnect has not -+ * been called. The FLUSH WR can be because -+ * the remote end initiated the disconnect. -+ */ -+ if (!(cb->state == CALLING_DISCONNECT || cb->state == DISCONNECTED)) -+ sleep(2); -+ -+ if (cb->state == DISCONNECTED) -+ return (0); -+ - flushed = 1; - continue; - -@@ -824,7 +839,9 @@ - } - - rping_test_server(cb); -+ cb->state = CALLING_DISCONNECT; - rdma_disconnect(cb->child_cm_id); -+ pthread_cancel(cb->cqthread); - pthread_join(cb->cqthread, NULL); - rping_free_buffers(cb); - rping_free_qp(cb); -@@ -943,6 +960,7 @@ - - ret = 0; - err3: -+ cb->state = CALLING_DISCONNECT; - rdma_disconnect(cb->child_cm_id); - pthread_join(cb->cqthread, NULL); - rdma_destroy_id(cb->child_cm_id); -@@ -1122,6 +1140,7 @@ - - ret = 0; - err4: -+ cb->state = CALLING_DISCONNECT; - rdma_disconnect(cb->cm_id); - err3: - pthread_join(cb->cqthread, NULL); diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/librdmacm/patches/005-librdmacm-xrc-and-22595881.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/components/open-fabrics/librdmacm/patches/005-librdmacm-xrc-and-22595881.patch Thu Apr 13 20:30:48 2017 -0700 @@ -0,0 +1,202 @@ +#This patch was developed both in-house and from outside. We plan to submit it +#upstream, but do not yet have a target date for doing so +# +# HG changeset patch +# Parent 217eb28f33861f64f8a7c4b78d8209e7465bbd83 +25759055 OFUV (Userland) support for XRC APIs +22595881 defer librdmacm allocation of PD on ADDRESS_RESOLVED event + +diff -r 217eb28f3386 examples/rdma_xclient.c +--- a/examples/rdma_xclient.c Mon Nov 21 11:48:29 2016 -0800 ++++ b/examples/rdma_xclient.c Mon Mar 20 14:24:32 2017 -0700 +@@ -148,7 +148,11 @@ + case 'r': + break; + case 'x': +- hints.ai_port_space = RDMA_PS_IB; ++#if defined(__SVR4) && defined(__sun) ++ hints.ai_port_space = RDMA_PS_TCP; ++#else ++ hints.ai_port_space = RDMA_PS_IB; ++#endif + hints.ai_qp_type = IBV_QPT_XRC_SEND; + break; + default: +@@ -167,7 +171,7 @@ + + err: + printf("usage: %s\n", argv[0]); +- printf("\t[-s server]\n"); ++ printf("\t[-s server_address]\n"); + printf("\t[-p port_number]\n"); + printf("\t[-c communication type]\n"); + printf("\t r - RC: reliable-connected (default)\n"); +diff -r 217eb28f3386 examples/rdma_xserver.c +--- a/examples/rdma_xserver.c Mon Nov 21 11:48:29 2016 -0800 ++++ b/examples/rdma_xserver.c Mon Mar 20 14:24:32 2017 -0700 +@@ -162,7 +162,11 @@ + case 'r': + break; + case 'x': ++#if defined(__SVR4) && defined(__sun) ++ hints.ai_port_space = RDMA_PS_TCP; ++#else + hints.ai_port_space = RDMA_PS_IB; ++#endif + hints.ai_qp_type = IBV_QPT_XRC_RECV; + break; + default: +diff -r 217eb28f3386 man/rdma_set_option.3 +--- a/man/rdma_set_option.3 Mon Nov 21 11:48:29 2016 -0800 ++++ b/man/rdma_set_option.3 Mon Mar 20 14:24:32 2017 -0700 +@@ -16,9 +16,9 @@ + .IP "level" 12 + Protocol level of the option to set. Currently level RDMA_OPTION_ID is supported. + .IP "optname" 12 +-Name of the option, relative to the level, to set. The only supported option isRDMA_OPTION_ID_REUSEADDR for level RDMA_OPTION_ID. ++Name of the option, relative to the level, to set. The supported options are RDMA_OPTION_ID_REUSEADDR and RDMA_OPTION_ID_TOS for level RDMA_OPTION_ID. + .IP "optval" 12 +-Reference to the option data. The data is dependent on the level and optname. For the option RDMA_OPTION_ID_REUSEADDR, an integer is passed. ++Reference to the option data. The data is dependent on the level and optname. For the options RDMA_OPTION_ID_REUSEADDR and RDMA_OPTION_ID_TOS, an integer is passed. + .IP "optlen" 12 + The size of the %optval buffer. + .SH "DESCRIPTION" +@@ -33,6 +33,9 @@ + using rdma_listen(3), is not supported for CMIDs set with + this option. This option enables multiple connections to share + the same source IP Port on the active side of the connection. ++The RDMA_OPTION_ID_TOS option can be used to set the Terms of Service ++level. A value of 0 disables the option and a non-zero value ++enables the option. + .sp + .SH "RETURN VALUE" + Returns 0 on success, or -1 on error. If an error occurs, errno will be +diff -r 217eb28f3386 src/cma.c +--- a/src/cma.c Mon Nov 21 11:48:29 2016 -0800 ++++ b/src/cma.c Mon Mar 20 14:24:32 2017 -0700 +@@ -456,17 +456,8 @@ + if ((ret = ucma_init_device(cma_dev))) + goto out; + +- if (!cma_dev->refcnt++) { +- cma_dev->pd = ibv_alloc_pd(cma_dev->verbs); +- if (!cma_dev->pd) { +- cma_dev->refcnt--; +- ret = ERR(ENOMEM); +- goto out; +- } +- } + id_priv->cma_dev = cma_dev; + id_priv->id.verbs = cma_dev->verbs; +- id_priv->id.pd = cma_dev->pd; + out: + pthread_mutex_unlock(&mut); + return ret; +@@ -475,11 +466,12 @@ + static void ucma_put_device(struct cma_device *cma_dev) + { + pthread_mutex_lock(&mut); +- if (!--cma_dev->refcnt) { +- ibv_dealloc_pd(cma_dev->pd); +- if (cma_dev->xrcd) +- ibv_close_xrcd(cma_dev->xrcd); ++ if (cma_dev->pd && !--cma_dev->refcnt) { ++ ibv_dealloc_pd(cma_dev->pd); ++ cma_dev->pd = NULL; + } ++ if (cma_dev->xrcd) ++ ibv_close_xrcd(cma_dev->xrcd); + pthread_mutex_unlock(&mut); + } + +@@ -613,7 +605,7 @@ + enum ibv_qp_type qp_type; + + qp_type = (ps == RDMA_PS_IPOIB || ps == RDMA_PS_UDP) ? +- IBV_QPT_UD : IBV_QPT_RC; ++ IBV_QPT_UD : IBV_QPT_RC; + return rdma_create_id2(channel, id, context, ps, qp_type); + } + +@@ -1391,9 +1383,26 @@ + return ERR(EINVAL); + + id_priv = container_of(id, struct cma_id_private, id); +- if (!(attr->comp_mask & IBV_QP_INIT_ATTR_PD) || !attr->pd) { ++ if (!attr->pd || !(attr->comp_mask & IBV_QP_INIT_ATTR_PD)) { ++ struct cma_device *cma_dev; ++ pthread_mutex_lock(&id_priv->mut); ++ cma_dev = id_priv->cma_dev; ++ if (!cma_dev->pd && !cma_dev->refcnt++) { ++ pthread_mutex_unlock(&id_priv->mut); ++ cma_dev->pd = ibv_alloc_pd(cma_dev->verbs); ++ if (!cma_dev->pd) { ++ pthread_mutex_lock(&id_priv->mut); ++ cma_dev->refcnt--; ++ pthread_mutex_unlock(&id_priv->mut); ++ return ERR(ENOMEM); ++ } ++ pthread_mutex_lock(&id_priv->mut); ++ id_priv->id.pd = cma_dev->pd; ++ } ++ pthread_mutex_unlock(&id_priv->mut); ++ + attr->comp_mask |= IBV_QP_INIT_ATTR_PD; +- attr->pd = id->pd; ++ attr->pd = (id->pd) ? id->pd : cma_dev->pd; + } else if (id->verbs != attr->pd->context) + return ERR(EINVAL); + +@@ -1457,12 +1466,49 @@ + { + struct ibv_qp_init_attr_ex attr_ex; + int ret; +- +- memcpy(&attr_ex, qp_init_attr, sizeof *qp_init_attr); ++ int init_attr_base_size; ++ ++ /* ++ * XRC binary compatibility patches to libibverbs add 'xrc_domain' ++ * field to the end of "struct ibv_qp_init_attr" in libibverbs, ++ * so it is not completely isomorphic to initial fields in ++ * "struct ibv_qp_init_attr_ex". ++ * ++ * We should copy below only the shared fields excluding the ++ * xrc_domain field from "struct inv_qp_init_attr" into the ++ * "struct ibv_qp_init_attr_ex", otherwise it clobbers the field ++ * immediately following the isomorphic initial fields. ++ * ++ * (The xrc_domain any way has no affect on the sender side, so ++ * there is no need to copy it anyway!) ++ */ ++ init_attr_base_size = offsetof(struct ibv_qp_init_attr, xrc_domain); ++ ++ memset(&attr_ex, 0, sizeof(attr_ex)); /* pre-set all fields to zero */ ++ /* copy only common fields */ ++ memcpy(&attr_ex, qp_init_attr, init_attr_base_size); + attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD; + attr_ex.pd = pd ? pd : id->pd; ++ ++ if (qp_init_attr->qp_type == IBV_QPT_XRC) { ++ /* ++ * another private handshake to indicate ++ * XRC send or receive side endpoint ++ */ ++ if (qp_init_attr->cap.max_send_wr == 0) { ++ attr_ex.qp_type = IBV_QPT_XRC_RECV; ++ if (qp_init_attr->xrc_domain) { ++ attr_ex.comp_mask |= IBV_QP_INIT_ATTR_XRCD; ++ attr_ex.xrcd = (struct ibv_xrcd *) ++ qp_init_attr->xrc_domain; ++ } ++ } else { ++ attr_ex.qp_type = IBV_QPT_XRC_SEND; ++ } ++ } + ret = rdma_create_qp_ex(id, &attr_ex); +- memcpy(qp_init_attr, &attr_ex, sizeof *qp_init_attr); ++ /* copy only common fields */ ++ memcpy(qp_init_attr, &attr_ex, init_attr_base_size); + return ret; + } + diff -r f11e8d81786a -r 22ec3267b2a3 components/open-fabrics/open-fabrics.p5m --- a/components/open-fabrics/open-fabrics.p5m Thu Apr 13 13:20:29 2017 -0700 +++ b/components/open-fabrics/open-fabrics.p5m Thu Apr 13 20:30:48 2017 -0700 @@ -83,12 +83,15 @@ file path=usr/bin/ibv_rc_pingpong file path=usr/bin/ibv_srq_pingpong file path=usr/bin/ibv_ud_pingpong +file path=usr/bin/ibv_xsrq_pingpong file path=usr/bin/mckey file path=usr/bin/qperf file path=usr/bin/rdma_bw file path=usr/bin/rdma_client file path=usr/bin/rdma_lat file path=usr/bin/rdma_server +file path=usr/bin/rdma_xclient +file path=usr/bin/rdma_xserver file path=usr/bin/rds-info file path=usr/bin/rds-ping file path=usr/bin/rds-stress @@ -101,6 +104,7 @@ file path=usr/include/infiniband/mad.h file path=usr/include/infiniband/mad_osd.h file path=usr/include/infiniband/ofa_solaris.h +file path=usr/include/infiniband/ofa_verbs.h file path=usr/include/infiniband/sa.h file path=usr/include/infiniband/umad.h file path=usr/include/infiniband/verbs.h @@ -241,12 +245,15 @@ file path=usr/share/man/man1/ibv_rc_pingpong.1 file path=usr/share/man/man1/ibv_srq_pingpong.1 file path=usr/share/man/man1/ibv_ud_pingpong.1 +file path=usr/share/man/man1/ibv_xsrq_pingpong.1 file path=usr/share/man/man1/mckey.1 file path=usr/share/man/man1/qperf.1 file path=usr/share/man/man1/rdma_bw.1 file path=usr/share/man/man1/rdma_client.1 link path=usr/share/man/man1/rdma_lat.1 target=rdma_bw.1 file path=usr/share/man/man1/rdma_server.1 +file path=usr/share/man/man1/rdma_xclient.1 +file path=usr/share/man/man1/rdma_xserver.1 file path=usr/share/man/man1/rds-info.1 file path=usr/share/man/man1/rds-ping.1 file path=usr/share/man/man1/rds-stress.1 @@ -264,7 +271,10 @@ file path=usr/share/man/man3/ibv_create_comp_channel.3 file path=usr/share/man/man3/ibv_create_cq.3 file path=usr/share/man/man3/ibv_create_qp.3 +file path=usr/share/man/man3/ibv_create_qp_ex.3 file path=usr/share/man/man3/ibv_create_srq.3 +file path=usr/share/man/man3/ibv_create_srq_ex.3 +file path=usr/share/man/man3/ibv_create_xsrq.3 link path=usr/share/man/man3/ibv_dealloc_pd.3 target=ibv_alloc_pd.3 link path=usr/share/man/man3/ibv_dereg_mr.3 target=ibv_reg_mr.3 link path=usr/share/man/man3/ibv_destroy_ah.3 target=ibv_create_ah.3 @@ -282,6 +292,7 @@ file path=usr/share/man/man3/ibv_get_device_guid.3 file path=usr/share/man/man3/ibv_get_device_list.3 file path=usr/share/man/man3/ibv_get_device_name.3 +file path=usr/share/man/man3/ibv_get_srq_num.3 file path=usr/share/man/man3/ibv_gid_reachable.3 link path=usr/share/man/man3/ibv_init_ah_from_wc.3 \ target=ibv_create_ah_from_wc.3 @@ -289,6 +300,7 @@ file path=usr/share/man/man3/ibv_modify_srq.3 file path=usr/share/man/man3/ibv_node_type_str.3 file path=usr/share/man/man3/ibv_open_device.3 +file path=usr/share/man/man3/ibv_open_xrcd.3 file path=usr/share/man/man3/ibv_poll_cq.3 file path=usr/share/man/man3/ibv_port_state_str.3 file path=usr/share/man/man3/ibv_post_recv.3