components/open-fabrics/libmlx4/patches/base.patch
author boris.chiu@oracle.com
Thu, 22 Oct 2015 08:27:04 -0700
branchs11u3-sru
changeset 4996 739983ef315c
parent 2550 02b339f70efd
child 4835 d5abd56e3fcf
permissions -rw-r--r--
PSARC 2015/310 IBTF and OFUV updates 20889536 remove some compiler warnings in libibverbs 21351609 Some sol_uverbs <-> libibverbs cmd/resp API structs not 64 bit aligned 20888202 remove Mellanox specific code from libibverbs 20735273 libibverbs must support AH interfaces for PSIF 20549008 open-fabrics should move from gcc3 to gcc4 21055860 ib_write_bw/ib_send_bw not sending inline data for non-hermon hcas 21218536 qperf randomly cores when compiled with gcc 4.8.2 20449637 Open Fabrics tools/utilities should be LP64 only 21499287 open-fabrics ibutils-1.5.7 fails to build in 64 only environment 21747062 update libibverbs to handle EDR/FDR speeds 21834282 saquery should use default SA timeout of 1sec from upstream 21863393 qperf attempts to free the PD before freeing the MR associated with it 21863652 qperf cannot handle device name with IB port number as argument

#This patch was developed in-house. We plan to submit it upstream, but do
# not yet have a target date for doing so
#
diff -r -u /tmp/839450/libmlx4-1.0.1/Makefile.am libmlx4-1.0.1/Makefile.am
--- /tmp/839450/libmlx4-1.0.1/Makefile.am	Tue Sep  8 06:40:35 2009
+++ libmlx4-1.0.1/Makefile.am	Tue Mar 15 06:49:47 2011
@@ -10,7 +10,7 @@
     src_libmlx4_la_SOURCES = $(MLX4_SOURCES)
     src_libmlx4_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \
         $(mlx4_version_script)
-    mlx4confdir = $(sysconfdir)/libibverbs.d
+    mlx4confdir = $(datadir)/libibverbs.d
     mlx4conf_DATA = mlx4.driver
 else
     mlx4libdir = $(libdir)/infiniband
diff -r -u /tmp/839450/libmlx4-1.0.1/src/mlx4-abi.h libmlx4-1.0.1/src/mlx4-abi.h
--- /tmp/839450/libmlx4-1.0.1/src/mlx4-abi.h	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/mlx4-abi.h	Fri Feb 11 03:49:51 2011
@@ -35,6 +35,14 @@
 
 #include <infiniband/kern-abi.h>
 
+#if defined(__SVR4) && defined(__sun)
+/* Restore once build systems are in sync
+   See 21170572 - libmlx4 should be built with the system mlnx_umap.h
+#include <sys/ib/adapters/mlnx_umap.h>   / * Opaque CI data out definitions * /
+*/
+#include "mlnx_umap.h"   /* Opaque CI data out definitions */
+#endif
+
 #define MLX4_UVERBS_MIN_ABI_VERSION	2
 #define MLX4_UVERBS_MAX_ABI_VERSION	3
 
@@ -43,6 +51,10 @@
 	__u32				qp_tab_size;
 	__u16				bf_reg_size;
 	__u16				bf_regs_per_page;
+#if defined(__SVR4) && defined(__sun)
+	uint32_t			muc_rev;
+	uint32_t			muc_reserved;
+#endif
 };
 
 struct mlx4_alloc_pd_resp {
@@ -51,23 +63,45 @@
 	__u32				reserved;
 };
 
+struct mlx4_share_pd_resp {
+	struct ibv_share_pd_resp	ibv_resp;
+	__u32				pdn;
+	__u32				reserved;
+};
+
 struct mlx4_create_cq {
 	struct ibv_create_cq		ibv_cmd;
+#if !(defined(__SVR4) && defined(__sun))
 	__u64				buf_addr;
 	__u64				db_addr;
+#endif
 };
 
 struct mlx4_create_cq_resp {
 	struct ibv_create_cq_resp	ibv_resp;
+#if !(defined(__SVR4) && defined(__sun))
 	__u32				cqn;
 	__u32				reserved;
+#else
+	mlnx_umap_cq_data_out_t		mdd;
+#endif
 };
 
 struct mlx4_resize_cq {
 	struct ibv_resize_cq		ibv_cmd;
+#if !(defined(__SVR4) && defined(__sun))
 	__u64				buf_addr;
+#endif
 };
 
+#if defined(__SVR4) && defined(__sun)
+struct mlx4_resize_cq_resp {
+	struct ibv_resize_cq_resp	ibv_resp;
+	mlnx_umap_cq_data_out_t		mdd;
+};
+#endif
+
+
 #ifdef HAVE_IBV_XRC_OPS
 struct mlx4_create_xrc_srq {
 	struct ibv_create_xrc_srq	ibv_cmd;
@@ -78,18 +112,25 @@
 
 struct mlx4_create_srq {
 	struct ibv_create_srq		ibv_cmd;
+#if !(defined(__SVR4) && defined(__sun))
 	__u64				buf_addr;
 	__u64				db_addr;
+#endif
 };
 
 struct mlx4_create_srq_resp {
 	struct ibv_create_srq_resp	ibv_resp;
+#if !(defined(__SVR4) && defined(__sun))
 	__u32				srqn;
 	__u32				reserved;
+#else
+	mlnx_umap_srq_data_out_t	mdd;
+#endif
 };
 
 struct mlx4_create_qp {
 	struct ibv_create_qp		ibv_cmd;
+#if !(defined(__SVR4) && defined(__sun))
 	__u64				buf_addr;
 	__u64				db_addr;
 	__u8				log_sq_bb_count;
@@ -96,8 +137,16 @@
 	__u8				log_sq_stride;
 	__u8				sq_no_prefetch;	/* was reserved in ABI 2 */
 	__u8				reserved[5];
+#endif
 };
 
+#if defined(__SVR4) && defined(__sun)
+struct mlx4_create_qp_resp {
+	struct ibv_create_qp_resp	ibv_resp;
+	mlnx_umap_qp_data_out_t		mdd;
+};
+#endif
+
 #ifdef HAVE_IBV_XRC_OPS
 struct mlx4_open_xrc_domain_resp {
 	struct ibv_open_xrc_domain_resp	ibv_resp;
diff -r -u /tmp/839450/libmlx4-1.0.1/src/verbs.c libmlx4-1.0.1/src/verbs.c
--- /tmp/839450/libmlx4-1.0.1/src/verbs.c	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/verbs.c	Fri Mar 11 14:40:18 2011
@@ -56,6 +56,14 @@
 	if (ret)
 		return ret;
 
+#if defined(__SVR4) && defined(__sun)
+	/*
+	 * To be consistent with OFED and so the queue operations in srq.c work
+	 * we need to report the max as actual max  less 1. In OFED this is
+	 * done in the HCA driver.
+	 */
+	attr->max_srq_wr -=1;
+#endif
 	major     = (raw_fw_ver >> 32) & 0xffff;
 	minor     = (raw_fw_ver >> 16) & 0xffff;
 	sub_minor = raw_fw_ver & 0xffff;
@@ -95,6 +103,39 @@
 	return &pd->ibv_pd;
 }
 
+struct ibv_shpd *mlx4_alloc_shpd(struct ibv_pd *pd, uint64_t share_key, struct ibv_shpd *shpd)
+{
+	struct ibv_alloc_shpd cmd;
+	struct ibv_alloc_shpd_resp resp;
+
+	if (ibv_cmd_alloc_shpd(pd->context, pd, share_key, shpd, &cmd, sizeof cmd,
+			     &resp, sizeof resp)) {
+		return NULL;
+	}
+
+	return shpd;
+}
+
+
+struct ibv_pd *mlx4_share_pd(struct ibv_context *context, struct ibv_shpd *shpd, uint64_t share_key)
+{
+	struct ibv_share_pd       cmd;
+	struct mlx4_share_pd_resp resp;
+	struct mlx4_pd		 *pd;
+
+	pd = malloc(sizeof *pd);
+	if (!pd)
+		return NULL;
+
+	if (ibv_cmd_share_pd(context, shpd, share_key, &pd->ibv_pd, &cmd, sizeof cmd,
+			     &resp.ibv_resp, sizeof resp)) {
+		free(pd);
+		return NULL;
+	}
+	pd->pdn = resp.pdn;
+	return &pd->ibv_pd;
+}
+
 int mlx4_free_pd(struct ibv_pd *pd)
 {
 	int ret;
@@ -138,6 +179,37 @@
 	return mr;
 }
 
+struct ibv_mr *mlx4_reg_mr_relaxed(struct ibv_pd *pd, void *addr, size_t length,
+			   int access)
+{
+	struct ibv_mr *mr;
+	struct ibv_reg_mr cmd;
+	int ret;
+
+	mr = malloc(sizeof *mr);
+	if (!mr)
+		return NULL;
+
+#ifdef IBV_CMD_REG_MR_RELAXED_HAS_RESP_PARAMS
+	{
+		struct ibv_reg_mr_resp resp;
+
+		ret = ibv_cmd_reg_mr_relaxed(pd, addr, length, (uintptr_t) addr,
+				     access, mr, &cmd, sizeof cmd,
+				     &resp, sizeof resp);
+	}
+#else
+	ret = ibv_cmd_reg_mr_relaxed(pd, addr, length, (uintptr_t) addr, access, mr,
+			     &cmd, sizeof cmd);
+#endif
+	if (ret) {
+		free(mr);
+		return NULL;
+	}
+
+	return mr;
+}
+
 int mlx4_dereg_mr(struct ibv_mr *mr)
 {
 	int ret;
@@ -150,6 +222,29 @@
 	return 0;
 }
 
+int mlx4_dereg_mr_relaxed(struct ibv_mr *mr)
+{
+	int ret;
+
+	ret = ibv_cmd_dereg_mr_relaxed(mr);
+	if (ret)
+		return ret;
+
+	free(mr);
+	return 0;
+}
+
+int mlx4_flush_relaxed_mr(struct ibv_pd *pd)
+{
+	int ret;
+
+	ret = ibv_cmd_flush_relaxed_mr(pd);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int align_queue_size(int req)
 {
 	int nent;
@@ -168,6 +263,9 @@
 	struct mlx4_create_cq_resp resp;
 	struct mlx4_cq		  *cq;
 	int			   ret;
+#if defined(__SVR4) && defined(__sun)
+	void                      *cqbuf;
+#endif
 
 	/* Sanity check CQ size before proceeding */
 	if (cqe > 0x3fffff)
@@ -184,7 +282,8 @@
 
 	cqe = align_queue_size(cqe + 1);
 
-	if (mlx4_alloc_cq_buf(to_mdev(context->device), &cq->buf, cqe))
+#if !(defined(__SVR4) && defined(__sun))
+	if (mlx4_alloc_cq_buf((to_mdev(context->device), &cq->buf, cqe))
 		goto err;
 
 	cq->set_ci_db  = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ);
@@ -198,15 +297,73 @@
 
 	cmd.buf_addr = (uintptr_t) cq->buf.buf;
 	cmd.db_addr  = (uintptr_t) cq->set_ci_db;
+#else
+	cq->buf.buf    = NULL;
+	cq->buf.length = 0;
+	cq->arm_db     = NULL;
+	cq->set_ci_db  = NULL;
+#endif
 
 	ret = ibv_cmd_create_cq(context, cqe - 1, channel, comp_vector,
 				&cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd,
 				&resp.ibv_resp, sizeof resp);
 	if (ret)
+#if !(defined(__SVR4) && defined(__sun))
 		goto err_db;
-
 	cq->cqn = resp.cqn;
+#else
+		goto err;
 
+        /*
+         * For Solaris the kernel driver passes back mmap information for
+	 *  mapping the CQ memory it allocated.
+         */
+	if (resp.mdd.mcq_rev < MLNX_UMAP_IF_VERSION) {
+		fprintf(stderr, PFX "libmlx4_create_cq: libmlx4/hermon umap "
+		    "rev mismatch (kernel rev=%d)\n", resp.mdd.mcq_rev);
+		goto err_destroy;
+	}
+
+        cqbuf = mmap64((void *)0, resp.mdd.mcq_maplen, (PROT_READ | PROT_WRITE),
+	    MAP_SHARED, context->mmap_fd, resp.mdd.mcq_mapoffset);
+
+        if (cqbuf == MAP_FAILED)
+                goto err_destroy;
+
+        /*
+         * Extract hardware driver values for the number of CQEs and the
+	 * hardware CQ number to use (needed for user space doorbells).
+         */
+	cqe            = resp.mdd.mcq_numcqe;
+	cq->cqn        = resp.mdd.mcq_cqnum;
+	cq->buf.buf    = cqbuf;
+	cq->buf.length = resp.mdd.mcq_maplen;
+	cq->ibv_cq.cqe = cqe-1;
+
+	/*
+	 * We map both poll and arm as seperate doorbells (OFED assumes 1 word
+	 * offset and just bumps the address) since Solaris provides a
+	 * separate offst. This will amount to the same thing (a second
+	 * reference to the first doorbell is added) but is more flexible.
+	 */
+	cq->set_ci_db = mlx4_alloc_db(to_mctx(context),
+	    resp.mdd.mcq_polldbr_mapoffset, resp.mdd.mcq_polldbr_maplen,
+	    resp.mdd.mcq_polldbr_offset);
+
+        if (cq->set_ci_db == NULL)
+                goto err_buf;
+
+	cq->arm_db = mlx4_alloc_db(to_mctx(context),
+	    resp.mdd.mcq_armdbr_mapoffset, resp.mdd.mcq_armdbr_maplen,
+	    resp.mdd.mcq_armdbr_offset);
+
+        if (cq->arm_db == NULL)
+                goto err_db;
+
+	*cq->arm_db    = 0;
+	cq->arm_sn     = 1;
+	*cq->set_ci_db = 0;
+#endif
 	return &cq->ibv_cq;
 
 err_db:
@@ -215,6 +372,21 @@
 err_buf:
 	mlx4_free_buf(&cq->buf);
 
+#if defined(__SVR4) && defined(__sun)
+err_destroy:
+	/*
+	 * Calling ibv_cmd_destroy_cq() will try and take the ibv_cq
+	 * mutext that is initialised by the ibv_create_cq() entry point
+	 * that called us AFETR we return, so its not initialised yet.
+	 * So initialised it here so the destroy call doesn't hang.
+	 */
+	pthread_mutex_init(&(cq->ibv_cq.mutex), NULL);
+	pthread_cond_init(&(cq->ibv_cq.cond), NULL);
+	cq->ibv_cq.comp_events_completed = 0;
+	cq->ibv_cq.async_events_completed = 0;
+
+	ibv_cmd_destroy_cq(&cq->ibv_cq);
+#endif
 err:
 	free(cq);
 
@@ -227,10 +399,15 @@
 	struct mlx4_resize_cq cmd;
 	struct mlx4_buf buf;
 	int old_cqe, outst_cqe, ret;
-
+#if !(defined(__SVR4) && defined(__sun))
+	struct ibv_resize_cq_resp resp;
+#else
+	struct mlx4_resize_cq_resp	resp;
+	void				*cqbuf;
+#endif
 	/* Sanity check CQ size before proceeding */
 	if (cqe > 0x3fffff)
-		return EINVAL;
+ 		return EINVAL;
 
 	pthread_spin_lock(&cq->lock);
 
@@ -247,32 +424,79 @@
 		goto out;
 	}
 
+#if !(defined(__SVR4) && defined(__sun))
 	ret = mlx4_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe);
 	if (ret)
 		goto out;
-
-	old_cqe = ibcq->cqe;
 	cmd.buf_addr = (uintptr_t) buf.buf;
+#endif
+        old_cqe = ibcq->cqe;
 
 #ifdef IBV_CMD_RESIZE_CQ_HAS_RESP_PARAMS
-	{
-		struct ibv_resize_cq_resp resp;
-		ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd,
+	ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd,
+#if !(defined(__SVR4) && defined(__sun))
 					&resp, sizeof resp);
-	}
 #else
+					&resp.ibv_resp, sizeof resp);
+#endif
+#else
 	ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd);
 #endif
-	if (ret) {
-		mlx4_free_buf(&buf);
+
+        if (ret) {
+#if !(defined(__SVR4) && defined(__sun))
+                mlx4_free_buf(&buf);
 		goto out;
 	}
 
-	mlx4_cq_resize_copy_cqes(cq, buf.buf, old_cqe);
+        mlx4_cq_resize_copy_cqes(cq, buf.buf, old_cqe);
+        mlx4_free_buf(&cq->buf);
+        cq->buf = buf;
+#else
+		goto out;
+	}
+        if (cq->buf.buf != NULL) {
+        	buf.buf = malloc(cq->buf.length);
+        	if (!buf.buf) {
+                	ret = ENOMEM;
+                	goto out;
+        	}
 
-	mlx4_free_buf(&cq->buf);
-	cq->buf = buf;
+        	memcpy(buf.buf, cq->buf.buf, cq->buf.length);
+        	buf.length =  cq->buf.length;
+                ret = munmap((char *)cq->buf.buf, cq->buf.length);
+                if (ret) {
+                        free(buf.buf);
+                        goto out;
+                }
+        }
+	/*
+	 * For Solaris the kernel driver passes back mmap information for
+	 * mapping the CQ memory it allocated.
+	 */
+	if (resp.mdd.mcq_rev < MLNX_UMAP_IF_VERSION) {
+		fprintf(stderr, PFX "libmlx4_resize_cq: libmlx4/hermon umap "
+		    "rev mismatch (kernel rev=%d)\n", resp.mdd.mcq_rev);
+		ret = EINVAL;
+		goto out;
+	}
 
+	cqbuf = mmap64((void *)0, resp.mdd.mcq_maplen, (PROT_READ | PROT_WRITE),
+	     MAP_SHARED, ibcq->context->mmap_fd, resp.mdd.mcq_mapoffset);
+
+	if (cqbuf == MAP_FAILED) {
+		ret = EINVAL;
+		goto out;
+	}
+	cq->buf.buf    = buf.buf;
+	cq->buf.length = buf.length;
+	mlx4_cq_resize_copy_cqes(cq, cqbuf, old_cqe);
+	cq->buf.buf    = cqbuf;
+	cq->buf.length = resp.mdd.mcq_maplen;
+	free(buf.buf);
+	cq->ibv_cq.cqe =  resp.mdd.mcq_numcqe - 1;
+	cq->cqn        = resp.mdd.mcq_cqnum;
+#endif
 out:
 	pthread_spin_unlock(&cq->lock);
 	return ret;
@@ -287,6 +511,9 @@
 		return ret;
 
 	mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->set_ci_db);
+#if defined(__SVR4) && defined(__sun)
+	mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->arm_db);
+#endif
 	mlx4_free_buf(&to_mcq(cq)->buf);
 	free(to_mcq(cq));
 
@@ -300,6 +527,9 @@
 	struct mlx4_create_srq_resp resp;
 	struct mlx4_srq		   *srq;
 	int			    ret;
+#if defined(__SVR4) && defined(__sun)
+	void                       *srqbuf;
+#endif
 
 	/* Sanity check SRQ size before proceeding */
 	if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
@@ -312,6 +542,7 @@
 	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
 		goto err;
 
+#if !(defined(__SVR4) && defined(__sun))
 	srq->max     = align_queue_size(attr->attr.max_wr + 1);
 	srq->max_gs  = attr->attr.max_sge;
 	srq->counter = 0;
@@ -327,23 +558,118 @@
 
 	cmd.buf_addr = (uintptr_t) srq->buf.buf;
 	cmd.db_addr  = (uintptr_t) srq->db;
+#else
+	/*
+	 * Solaris SRQ WQE memory is supplied by the kernel; we'll update
+	 * these after the creation.
+	 */
+	srq->buf.buf      = NULL;
+	srq->buf.length   = 0;
+	srq->db           = NULL;
 
+	/*
+	 * Need solaris to allocate space for the spare WR in
+	 * the list that makes the queue work. The Solaris driver
+	 * will round up to the nearest power of 2 as align_queue_size()
+	 * does for OFED.
+	 */
+	attr->attr.max_wr += 1;
+#endif
+
 	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr,
 				 &cmd.ibv_cmd, sizeof cmd,
 				 &resp.ibv_resp, sizeof resp);
+#if defined(__SVR4) && defined(__sun)
+	if (ret) {
+		goto err;
+	}
+
+        /*
+         * The kernel driver passes back mmap information for mapping the
+         * SRQ work queue memory it allocated and the doorbell for
+	 * for posting.
+         */
+	if (resp.mdd.msrq_rev < 1) {
+		fprintf(stderr, PFX "libmlx4_create_srq libmlx4/hermon umap "
+			"rev mismatch (kernel rev=%d)\n", resp.mdd.msrq_rev);
+		goto err_destroy;
+	}
+
+        srqbuf = mmap64((void *)0, resp.mdd.msrq_maplen,
+	    (PROT_READ | PROT_WRITE), MAP_SHARED, pd->context->mmap_fd,
+	    resp.mdd.msrq_mapoffset);
+
+        if (srqbuf == MAP_FAILED) {
+                goto err_destroy;
+        }
+
+	srq->buf.buf    = srqbuf;
+	srq->buf.length = resp.mdd.msrq_maplen;
+	srq->max	= resp.ibv_resp.max_wr;
+	srq->max_gs	= resp.ibv_resp.max_sge;
+	srq->srqn       = resp.mdd.msrq_srqnum;
+	srq->counter 	= 0;
+
+	srq->db = mlx4_alloc_db(to_mctx(pd->context),
+	    resp.mdd.msrq_rdbr_mapoffset, resp.mdd.msrq_rdbr_maplen,
+	    resp.mdd.msrq_rdbr_offset);
+	if (srq->db == NULL) {
+                goto err_unmap;
+	}
+
+	/*
+	 * The following call only initializes memory and control structures,
+	 * it utilizes the memory allocated by the kernel.
+	 * It also allocates the srq->wrid memory.
+	 */
+	if (mlx4_set_srq_buf(pd, srq, resp.mdd.msrq_wqesz,
+	    resp.mdd.msrq_numwqe)) {
+		goto err_db;
+	}
+
+	/*
+	 * The rturned max wr will have been rounded up to the nearest
+	 * power of 2, subtracting  1 from that and rporting that value
+	 * as the max will give us the required free WR in the queue, as
+	 * in OFED.
+	 */
+	attr->attr.max_wr -= 1;
+#else
 	if (ret)
 		goto err_db;
 
 	srq->srqn = resp.srqn;
 
+#endif
 	return &srq->ibv_srq;
 
 err_db:
 	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
 
+#if defined(__SVR4) && defined(__sun)
+	if (srq->wrid)
+		free(srq->wrid);
+err_unmap:
+	mlx4_free_buf(&srq->buf);
+
+err_destroy:
+	/*
+	 * Calling ibv_cmd_destroy_srq() will try and take the ibv_srq
+	 * mutext that is initialised by the ibv_create_srq() entry point
+	 * that called us AFETR we return, so its not initialised yet.
+	 * So initialised it here so the destroy call doesn't hang.
+	 */
+	pthread_mutex_init(&(srq->ibv_srq.mutex), NULL);
+	pthread_cond_init(&(srq->ibv_srq.cond), NULL);
+	srq->ibv_srq.events_completed = 0;
+
+	ibv_cmd_destroy_srq(&srq->ibv_srq);
+
+#else
 err_free:
 	free(srq->wrid);
 	mlx4_free_buf(&srq->buf);
+#endif
 
 err:
 	free(srq);
@@ -357,7 +683,16 @@
 {
 	struct ibv_modify_srq cmd;
 
+#if !(defined(__SVR4) && defined(__sun))
 	return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
+#else
+	int	ret;
+
+	attr->max_wr += 1;		/* See create_srq */
+	ret = ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
+	attr->max_wr -= 1;
+	return (ret);
+#endif
 }
 
 int mlx4_query_srq(struct ibv_srq *srq,
@@ -365,7 +700,17 @@
 {
 	struct ibv_query_srq cmd;
 
+#if !(defined(__SVR4) && defined(__sun))
 	return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
+#else
+
+	int	ret;
+
+	ret = ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
+	attr->max_wr -= 1;		/* See create_srq */
+
+	return (ret);
+#endif
 }
 
 int mlx4_destroy_srq(struct ibv_srq *ibsrq)
@@ -443,12 +788,16 @@
 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 {
 	struct mlx4_create_qp     cmd;
-	struct ibv_create_qp_resp resp;
 	struct mlx4_qp		 *qp;
 	int			  ret;
 	struct mlx4_context	 *context = to_mctx(pd->context);
+#if !(defined(__SVR4) && defined(__sun))
+	struct ibv_create_qp_resp resp;
+#else
+	struct mlx4_create_qp_resp	resp;
+	void				*qpbuf;
+#endif
 
-
 	/* Sanity check QP size before proceeding */
 	if (verify_sizes(attr, context))
 		return NULL;
@@ -457,6 +806,7 @@
 	if (!qp)
 		return NULL;
 
+#if !(defined(__SVR4) && defined(__sun))
 	mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
 
 	/*
@@ -466,6 +816,7 @@
 	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
 	qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
 	qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
+#endif
 
 	if (attr->srq || attr->qp_type == IBV_QPT_XRC)
 		attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
@@ -476,6 +827,22 @@
 			attr->cap.max_recv_wr = 1;
 	}
 
+#if defined(__SVR4) && defined(__sun)
+	if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) ||
+	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
+		goto err;
+
+	/*
+	 * Solaris QP work queue memory is supplied by the kernel, so
+	 * we will update this after creation.
+	 */
+	qp->buf.buf	= NULL;
+	qp->sq.wrid	= NULL;
+	qp->rq.wrid	= NULL;
+	qp->buf.length	= 0;
+	qp->db		= NULL;
+	memset(&cmd, 0, sizeof(cmd));
+#else
 	if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp))
 		goto err;
 
@@ -505,10 +872,12 @@
 		; /* nothing */
 	cmd.sq_no_prefetch = 0;	/* OK for ABI 2: just a reserved field */
 	memset(cmd.reserved, 0, sizeof cmd.reserved);
+#endif
 
 	pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex);
 
 	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
+#if !(defined(__SVR4) && defined(__sun))
 				&resp, sizeof resp);
 	if (ret)
 		goto err_rq_db;
@@ -516,6 +885,70 @@
 	ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
 	if (ret)
 		goto err_destroy;
+#else
+				&resp.ibv_resp, sizeof resp);
+	if (ret)
+		goto err_free;
+
+        /*
+         * The kernel driver passes back mmap information for mapping the
+         * QP work queue memory it allocated back into user space.
+         */
+	if (resp.mdd.mqp_rev < 2) {
+		fprintf(stderr, PFX "libmlx4_create_qp: libmlx4/hermon umap "
+		    "rev mismatch (kernel rev=%d)\n", resp.mdd.mqp_rev);
+		goto err_destroy;
+	}
+	qpbuf = mmap64((void *)0, resp.mdd.mqp_maplen, (PROT_READ | PROT_WRITE),
+	    MAP_SHARED, pd->context->mmap_fd, resp.mdd.mqp_mapoffset);
+
+	if (qpbuf == MAP_FAILED)
+		goto err_destroy;
+
+	/*
+	 * Need to set qp->buf here in case alloc_db fails then
+	 * we'll call mlx4_free_buf() to umap.
+	 */
+	qp->buf.buf	= qpbuf;
+	qp->buf.length	= resp.mdd.mqp_maplen;
+
+	if (!attr->srq && attr->qp_type != IBV_QPT_XRC) {
+		qp->db = mlx4_alloc_db(to_mctx(pd->context),
+		    resp.mdd.mqp_rdbr_mapoffset, resp.mdd.mqp_rdbr_maplen,
+		    resp.mdd.mqp_rdbr_offset);
+		if (qp->db == NULL)
+			goto err_buf;
+
+		*qp->db = 0;
+	}
+
+	/*
+	 * Retrieve sendqueue actual size, and the number of headroom WQEs
+	 * that were required based on kernel setup of prefetch or not for
+	 * send queue.
+	 * 	Note: mqp_sq_numwqe includes the head room wqes. The private
+	 *	      wqe.cnt also includes headroom wqes, the verbs count
+	 *	      should reflect the wqe count that is usable.
+	 */
+	qp->sq_spare_wqes = resp.mdd.mqp_sq_headroomwqes;
+	qp->sq.wqe_cnt    = resp.mdd.mqp_sq_numwqe;
+
+	if (attr->srq)
+		qp->rq.wqe_cnt  = 0;
+	else
+		qp->rq.wqe_cnt  = resp.mdd.mqp_rq_numwqe;
+
+	if (mlx4_set_qp_buf(pd, qp, qpbuf, resp.mdd.mqp_maplen,
+	    resp.mdd.mqp_rq_wqesz, resp.mdd.mqp_rq_off,
+	    resp.mdd.mqp_sq_wqesz, resp.mdd.mqp_sq_off))
+		goto err_rq_db;
+
+	mlx4_init_qp_indices(qp);
+
+	ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
+	if (ret)
+		goto err_rq_db;
+#endif
 	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
 
 	qp->rq.wqe_cnt = attr->cap.max_recv_wr;
@@ -536,9 +969,38 @@
 
 	return &qp->ibv_qp;
 
+#if defined(__SVR4) && defined(__sun)
+err_rq_db:
+	if (!attr->srq && attr->qp_type != IBV_QPT_XRC)
+		mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
+err_buf:
+	mlx4_free_buf(&qp->buf);
+
 err_destroy:
+	/*
+	 * Calling ibv_cmd_destroy_qp() will try and take the ibv_qp
+	 * mutext that is initialised by the ibv_create_qp() entry point
+	 * that called us AFETR we retrun, so its not initialised yet.
+	 * So initialised it here so the destroy call doesn't hang.
+	 */
+	pthread_mutex_init(&(qp->ibv_qp.mutex), NULL);
+	pthread_cond_init(&(qp->ibv_qp.cond), NULL);
+	qp->ibv_qp.events_completed = 0;
 	ibv_cmd_destroy_qp(&qp->ibv_qp);
+err_free:
+	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
 
+	if (qp->sq.wrid)
+		free(qp->sq.wrid);
+
+	if (qp->rq.wrid)
+		free(qp->rq.wrid);
+err:
+	free(qp);
+#else
+err_destroy:
+	ibv_cmd_destroy_qp(&qp->ibv_qp);
+
 err_rq_db:
 	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
 	if (!attr->srq && attr->qp_type != IBV_QPT_XRC)
@@ -552,6 +1014,7 @@
 
 err:
 	free(qp);
+#endif
 
 	return NULL;
 }
@@ -745,6 +1208,13 @@
 				    struct ibv_cq *xrc_cq,
 				    struct ibv_srq_init_attr *attr)
 {
+#if defined(__SVR4) && defined(__sun)
+	/*
+	 * Not supported by Solaris kenrel driver.  When/if supported
+	 * this routine will need to be ported.
+	 */
+	return NULL;
+#else
 	struct mlx4_create_xrc_srq  cmd;
 	struct mlx4_create_srq_resp resp;
 	struct mlx4_srq		   *srq;
@@ -807,6 +1277,7 @@
 	free(srq);
 
 	return NULL;
+#endif
 }
 
 struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context,
diff -r -u /tmp/839450/libmlx4-1.0.1/src/qp.c libmlx4-1.0.1/src/qp.c
--- /tmp/839450/libmlx4-1.0.1/src/qp.c	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/qp.c	Tue Mar 15 07:09:43 2011
@@ -407,7 +407,8 @@
 out:
 	ctx = to_mctx(ibqp->context);
 
-	if (nreq == 1 && inl && size > 1 && size < ctx->bf_buf_size / 16) {
+	if (ctx->ibv_ctx.device->blueflame_enabled && nreq == 1 && inl &&
+	    size > 1 && size < ctx->bf_buf_size / 16) {
 		ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8);
 		*(uint32_t *) (&ctrl->vlan_tag) |= qp->doorbell_qpn;
 		/*
@@ -589,6 +590,59 @@
 		; /* nothing */
 }
 
+#if defined(__SVR4) && defined(__sun)
+int mlx4_set_qp_buf(struct ibv_pd *pd, struct mlx4_qp *qp, void *qpbuf,
+    uint64_t buflen, uint32_t rq_wqesz, uint32_t rq_off,
+    uint32_t sq_wqesz, uint32_t sq_off)
+{
+	qp->buf.buf      = qpbuf;
+	qp->buf.length   = buflen;
+
+	qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
+	if (!qp->sq.wrid)
+		return -1;
+
+	if (qp->rq.wqe_cnt) {
+		qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
+		if (!qp->rq.wrid) {
+			free(qp->sq.wrid);
+			return -1;
+		}
+	}
+
+	for (qp->rq.wqe_shift = 4; 1 << qp->rq.wqe_shift < rq_wqesz;
+	     qp->rq.wqe_shift++) {
+		; /* nothing */
+	}
+
+	for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < sq_wqesz;
+	     qp->sq.wqe_shift++) {
+		; /* nothing */
+	}
+
+	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+		(qp->sq.wqe_cnt << qp->sq.wqe_shift);
+	if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
+		qp->rq.offset = 0;
+		qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+	} else {
+		qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
+		qp->sq.offset = 0;
+	}
+
+	if ((long int)qp->buf.length < (long int)qp->buf_size) {
+		fprintf(stderr, PFX "QP kernel buffer size %lu < user buf "
+		    "size %d\n", (unsigned long)qp->buf.length, qp->buf_size);
+	}
+	if ((!rq_off && qp->rq.offset) || (!sq_off && qp->sq.offset)) {
+		fprintf(stderr, PFX "QP kernel and user out of sync on "
+		    "buffer order\n");
+	}
+
+	memset(qp->buf.buf, 0, qp->buf_size);
+	return 0;
+}
+#endif
 int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp)
 {
diff -r -u /tmp/839450/libmlx4-1.0.1/src/mlx4.h libmlx4-1.0.1/src/mlx4.h
--- /tmp/839450/libmlx4-1.0.1/src/mlx4.h	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/mlx4.h	Fri Feb 11 04:05:04 2011
@@ -202,7 +202,11 @@
 	int				xrc_srq_table_shift;
 	int				xrc_srq_table_mask;
 
+#if defined(__SVR4) && defined(__sun)
+	struct mlx4_db_page	       *db_page_list;
+#else
 	struct mlx4_db_page	       *db_list[MLX4_NUM_DB_TYPE];
+#endif
 	pthread_mutex_t			db_list_mutex;
 };
 
@@ -351,7 +355,12 @@
 int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size);
 void mlx4_free_buf(struct mlx4_buf *buf);
 
+#if defined(__SVR4) && defined(__sun)
+uint32_t *mlx4_alloc_db(struct mlx4_context *context, uint64_t mapoffset,
+                        uint64_t maplen, uint32_t offset);
+#else
 uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type);
+#endif
 void mlx4_free_db(struct mlx4_context *context, enum mlx4_db_type type, uint32_t *db);
 
 int mlx4_query_device(struct ibv_context *context,
@@ -360,11 +369,17 @@
 		     struct ibv_port_attr *attr);
 
 struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context);
+struct ibv_shpd *mlx4_alloc_shpd(struct ibv_pd *pd, uint64_t share_key, struct ibv_shpd *shpd);
+struct ibv_pd *mlx4_share_pd(struct ibv_context *context, struct ibv_shpd *shpd, uint64_t share_key);
 int mlx4_free_pd(struct ibv_pd *pd);
 
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
 			    size_t length, int access);
+struct ibv_mr *mlx4_reg_mr_relaxed(struct ibv_pd *pd, void *addr,
+			    size_t length, int access);
 int mlx4_dereg_mr(struct ibv_mr *mr);
+int mlx4_dereg_mr_relaxed(struct ibv_mr *mr);
+int mlx4_flush_relaxed_mr(struct ibv_pd *pd);
 
 struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
 			       struct ibv_comp_channel *channel,
@@ -388,8 +403,13 @@
 int mlx4_query_srq(struct ibv_srq *srq,
 			   struct ibv_srq_attr *attr);
 int mlx4_destroy_srq(struct ibv_srq *srq);
+#if defined(__SVR4) && defined(__sun)
+int mlx4_set_srq_buf(struct ibv_pd *pd, struct mlx4_srq *srq,
+			uint32_t srq_wqesz, uint32_t srq_numwqe);
+#else
 int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 			struct mlx4_srq *srq);
+#endif
 void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
 int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
 		       struct ibv_recv_wr *wr,
@@ -415,8 +435,14 @@
 void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 			   struct mlx4_qp *qp);
 int num_inline_segs(int data, enum ibv_qp_type type);
+#if defined(__SVR4) && defined(__sun)
+int mlx4_set_qp_buf(struct ibv_pd *pd, struct mlx4_qp *qp,
+                    void *qpbuf, uint64_t buflen, uint32_t rq_wqesz,
+                     uint32_t rq_off, uint32_t sq_wqesz, uint32_t sq_off);
+#else
 int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp);
+#endif
 void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type);
 struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn);
diff -r -u /tmp/839450/libmlx4-1.0.1/src/srq.c libmlx4-1.0.1/src/srq.c
--- /tmp/839450/libmlx4-1.0.1/src/srq.c	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/srq.c	Fri Feb 11 04:05:05 2011
@@ -124,6 +124,54 @@
 	return err;
 }
 
+#if defined(__SVR4) && defined(__sun)
+/*
+ * The Solaris kernel allocates the SRQ WQE buffer, this routine
+ * initializes the control structures and buffer contents for the
+ * SRQ memory.  That memory is mapped into the caller's address
+ * space prior to this call.
+ */
+int mlx4_set_srq_buf(struct ibv_pd *pd, struct mlx4_srq *srq,
+				uint32_t srq_wqesz, uint32_t srq_numwqe)
+{
+	struct mlx4_wqe_srq_next_seg *next;
+	struct mlx4_wqe_data_seg *scatter;
+	int i;
+
+	srq->max         = srq_numwqe;
+
+	srq->wrid = malloc(srq->max * sizeof (uint64_t));
+	if (!srq->wrid) {
+		return -1;
+	}
+
+	for (srq->wqe_shift = 5; 1 << srq->wqe_shift < srq_wqesz; ++srq->wqe_shift) {
+		; /* nothing */
+	}
+
+	memset(srq->buf.buf, 0, srq->buf.length);
+
+	/*
+	 * Now initialize the SRQ buffer so that all of the WQEs are
+	 * linked into the list of free WQEs.
+	 */
+	for (i = 0; i < srq->max; ++i) {
+		next = get_wqe(srq, i);
+		next->next_wqe_index = htons((i + 1) & (srq->max - 1));
+
+		for (scatter = (void *) (next + 1);
+		    (void *) scatter < (void *) next + (1 << srq->wqe_shift);
+		    ++scatter)
+			scatter->lkey = htonl(MLX4_INVALID_LKEY);
+	}
+
+	srq->head = 0;
+	srq->tail = srq->max - 1;
+
+	return 0;
+}
+#endif
+
 int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 		       struct mlx4_srq *srq)
 {
diff -r -u /tmp/839450/libmlx4-1.0.1/src/dbrec.c libmlx4-1.0.1/src/dbrec.c
--- /tmp/839450/libmlx4-1.0.1/src/dbrec.c	Tue Sep  8 06:40:36 2009
+++ libmlx4-1.0.1/src/dbrec.c	Fri Feb 11 04:05:04 2011
@@ -41,7 +41,157 @@
 
 #include "mlx4.h"
 
+#if defined(__SVR4) && defined(__sun)
+/*
+ * In Solaris the doorbell UAR is setup by the kernel, we only
+ * mmap the offset specified for the doorbell into the user
+ * address space.  A reference counted page list is maintained
+ * per user context of doorbell pages that have been mapped.
+ */
 struct mlx4_db_page {
+	struct mlx4_db_page	   *prev, *next;
+	uint32_t                   *dbp_page_addr;
+	uint32_t                   *dbp_page_addr_end;
+	uint64_t                   dbp_map_offset;
+	uint64_t                   dbp_map_len;
+	int                        fd;
+	int                        dbp_use_cnt;
+};
+
+/*
+ * These are not required in Solaris, but we keep them to pass
+ * as dummy arguments so that the the doorbell function calls can
+ * keep the same API.
+ */
+static const int db_size[] = {
+	[MLX4_DB_TYPE_CQ] = 8,
+	[MLX4_DB_TYPE_RQ] = 4,
+};
+
+/*
+ * Return a doorbell pointer for the specified map offset.  If this
+ * offset has not been previously mapped it will be mmap'd and the
+ * appropriate doorbell returned; otherwise, the reference count
+ * will be updated and the appropriate doorbell will be returned.
+ * Each successful call to mlx4_alloc_db() must call mlx4_free_db()
+ * to release the reference to the doorbell page  when the doorbell
+ * is no longer required.
+ */
+uint32_t *mlx4_alloc_db(struct mlx4_context *context,
+                        uint64_t mapoffset,
+                        uint64_t maplen,
+                        uint32_t offset)
+{
+	struct mlx4_db_page *page;
+	uint32_t *db = NULL;
+
+	pthread_mutex_lock(&context->db_list_mutex);
+
+	for (page = context->db_page_list; page; page = page->next) {
+		if (page->dbp_map_offset == mapoffset &&
+		    page->fd == context->ibv_ctx.mmap_fd) {
+			if (page->dbp_map_len == maplen) {
+				goto found;
+			}
+			fprintf(stderr,
+			        PFX "mlx4_alloc_db: Bad maplen (%" PRId64 ")\n",
+			        maplen);
+			goto out;
+		}
+	}
+
+	page = malloc(sizeof(struct mlx4_db_page));
+	if (!page) {
+		fprintf(stderr, PFX "DB alloc memory allocate failure\n");
+		goto out;
+	}
+
+	page->dbp_page_addr = (uint32_t *) mmap64((void *)0, maplen,
+	                                          (PROT_READ | PROT_WRITE),
+	                                          MAP_SHARED,
+	                                          context->ibv_ctx.mmap_fd,
+	                                          mapoffset);
+	if (page->dbp_page_addr == MAP_FAILED) {
+		fprintf(stderr, PFX
+		        "Unable to map doorbell entry: maplen:%" PRId64 ", offset:%" PRId64
+		        "\n", maplen, mapoffset);
+		free(page);
+		goto out;
+	}
+
+	page->dbp_page_addr_end = (uint32_t *)((uint8_t *) page->dbp_page_addr + maplen);
+
+	page->dbp_map_offset  = mapoffset;
+	page->dbp_map_len     = maplen;
+	page->dbp_use_cnt     = 0;
+	page->fd	      = context->ibv_ctx.mmap_fd;
+	page->next            = context->db_page_list;
+
+	if (context->db_page_list)
+		context->db_page_list->prev = page;
+
+	page->prev            = NULL;
+	context->db_page_list = page;
+
+found:
+	page->dbp_use_cnt++;
+
+
+	db = (uint32_t *) ((uint8_t *)page->dbp_page_addr + offset);
+out:
+	pthread_mutex_unlock(&context->db_list_mutex);
+
+	return db;
+}
+
+/*
+ * Dereference doorbell page mappinge associated with the specified doorbell.
+ * If this is the last doorbell that references a mapped region, then that
+ * region will be unmapped.
+ */
+void mlx4_free_db(struct mlx4_context *context,
+                  enum mlx4_db_type dummy,
+                  uint32_t *db)
+{
+	struct mlx4_db_page *page;
+
+	pthread_mutex_lock(&context->db_list_mutex);
+
+	for (page = context->db_page_list; page; page = page->next) {
+		if (db >= page->dbp_page_addr && db < page->dbp_page_addr_end) {
+			break;
+		}
+	}
+
+	if (page == NULL) {
+		fprintf(stderr, PFX "mlx4_free_db: Doorbell not mapped\n");
+		goto out;
+	}
+
+	page->dbp_use_cnt--;
+	if (page->dbp_use_cnt > 0) {
+		goto out;
+	}
+
+	munmap((void *)page->dbp_page_addr, page->dbp_map_len);
+	if (page->next) {
+		page->next->prev = page->prev;
+	}
+	if (page->prev) {
+		page->prev->next = page->next;
+	} else {
+		context->db_page_list = page->next;
+	}
+
+	free(page);
+
+out:
+	pthread_mutex_unlock(&context->db_list_mutex);
+}
+
+#else
+
+struct mlx4_db_page {
 	struct mlx4_db_page	       *prev, *next;
 	struct mlx4_buf			buf;
 	int				num_db;
@@ -152,3 +302,4 @@
 out:
 	pthread_mutex_unlock(&context->db_list_mutex);
 }
+#endif
diff -r -u /tmp/839450/libmlx4-1.0.1/src/buf.c libmlx4-1.0.1/src/buf.c
--- /tmp/839450/libmlx4-1.0.1/src/buf.c	Tue Sep  8 06:41:26 2009
+++ libmlx4-1.0.1/src/buf.c	Fri Feb 11 04:05:03 2011
@@ -78,6 +78,8 @@
 
 void mlx4_free_buf(struct mlx4_buf *buf)
 {
+#if !(defined(__SVR4) && defined(__sun))
 	ibv_dofork_range(buf->buf, buf->length);
+#endif
 	munmap(buf->buf, buf->length);
 }
diff -r -u /tmp/839450/libmlx4-1.0.1/src/mlx4.c libmlx4-1.0.1/src/mlx4.c
--- /tmp/839450/libmlx4-1.0.1/src/mlx4.c	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/mlx4.c	Fri Mar 11 14:05:26 2011
@@ -112,8 +112,13 @@
 	.query_port    = mlx4_query_port,
 	.alloc_pd      = mlx4_alloc_pd,
 	.dealloc_pd    = mlx4_free_pd,
+	.alloc_shpd    = mlx4_alloc_shpd,
+	.share_pd      = mlx4_share_pd,
 	.reg_mr	       = mlx4_reg_mr,
+	.reg_mr_relaxed	       = mlx4_reg_mr_relaxed,
 	.dereg_mr      = mlx4_dereg_mr,
+	.dereg_mr_relaxed      = mlx4_dereg_mr_relaxed,
+	.flush_relaxed_mr      = mlx4_flush_relaxed_mr,
 	.create_cq     = mlx4_create_cq,
 	.poll_cq       = mlx4_poll_cq,
 	.req_notify_cq = mlx4_arm_cq,
@@ -144,6 +149,11 @@
 	struct mlx4_alloc_ucontext_resp resp;
 	int				i;
 	struct ibv_device_attr		dev_attrs;
+#if defined(__SVR4) && defined(__sun)
+	pid_t                           cur_pid;
+	off64_t                         uarpg_offset;
+	uint32_t                        temp_qp_num;
+#endif
 
 	context = calloc(1, sizeof *context);
 	if (!context)
@@ -150,11 +160,29 @@
 		return NULL;
 
 	context->ibv_ctx.cmd_fd = cmd_fd;
-
+#if defined(__SVR4) && defined(__sun)
+	context->ibv_ctx.device = ibdev;
+#endif
 	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
 				&resp.ibv_resp, sizeof resp))
 		goto err_free;
 
+#if defined(__SVR4) && defined(__sun)
+	/* 
+	 * OFED expects power of two, round up here to make user table
+	 * large enough.
+	 */
+	for (temp_qp_num = 1; temp_qp_num < resp.qp_tab_size; temp_qp_num <<= 1)
+		;
+	resp.qp_tab_size = temp_qp_num;
+
+	/*
+	 * NOTE: In Solaris this value is not returned in the channel interface
+	 * opaque data and is assumed to be 2*256 by the dapl code.  We have
+	 * made the same assumption here.
+	 */
+	resp.bf_reg_size = 512;
+#endif
 	context->num_qps	= resp.qp_tab_size;
 	context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
 	context->qp_table_mask	= (1 << context->qp_table_shift) - 1;
@@ -172,20 +200,45 @@
 	for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i)
 		context->xrc_srq_table[i].refcnt = 0;
 
+#if defined(__SVR4) && defined(__sun)
+	context->db_page_list = NULL;
+#else
 	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
 		context->db_list[i] = NULL;
+#endif
 
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
+#if defined(__SVR4) && defined(__sun)
+	cur_pid = getpid();
+	uarpg_offset = (((off64_t) cur_pid << MLNX_UMAP_RSRC_TYPE_SHIFT) |
+	                MLNX_UMAP_UARPG_RSRC) * to_mdev(ibdev)->page_size;
+	context->uar = mmap64((void *)0, to_mdev(ibdev)->page_size, PROT_WRITE,
+			    MAP_SHARED, context->ibv_ctx.mmap_fd, uarpg_offset);
+#else
 	context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
 			    MAP_SHARED, cmd_fd, 0);
+#endif
 	if (context->uar == MAP_FAILED)
 		goto err_free;
 
 	if (resp.bf_reg_size) {
+#if defined(__SVR4) && defined(__sun)
+		/*
+		 * If kernel driver is supporting Blue Flame feature, map
+		 * the Blue Flame user access region as well.
+		 */
+		uarpg_offset = (((off64_t) cur_pid << MLNX_UMAP_RSRC_TYPE_SHIFT)
+		    | MLNX_UMAP_BLUEFLAMEPG_RSRC) * to_mdev(ibdev)->page_size;
+
+		context->bf_page = mmap64((void *)0, to_mdev(ibdev)->page_size,
+		    PROT_WRITE, MAP_SHARED, context->ibv_ctx.mmap_fd,
+		    uarpg_offset);
+#else
 		context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
 					PROT_WRITE, MAP_SHARED, cmd_fd,
 					to_mdev(ibdev)->page_size);
+#endif
 		if (context->bf_page == MAP_FAILED) {
 			fprintf(stderr, PFX "Warning: BlueFlame available, "
 				"but failed to mmap() BlueFlame page.\n");
@@ -214,6 +267,7 @@
 	context->max_qp_wr = dev_attrs.max_qp_wr;
 	context->max_sge = dev_attrs.max_sge;
 	context->max_cqe = dev_attrs.max_cqe;
+#ifdef HAVE_IBV_XRC_OPS
 	if (!(dev_attrs.device_cap_flags & IBV_DEVICE_XRC)) {
 		fprintf(stderr, PFX "There is a mismatch between "
 		        "the kernel and the userspace libraries: "
@@ -220,6 +274,7 @@
 			"Kernel does not support XRC. Exiting.\n");
 		goto query_free;
 	}
+#endif
 
 	return &context->ibv_ctx;
 
@@ -240,6 +295,7 @@
 	munmap(context->uar, to_mdev(ibctx->device)->page_size);
 	if (context->bf_page)
 		munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
+
 	free(context);
 }
 
diff -r -u /tmp/839450/libmlx4-1.0.1/configure.in libmlx4-1.0.1/configure.in
--- /tmp/839450/libmlx4-1.0.1/configure.in	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/configure.in	Tue Mar 15 07:41:46 2011
@@ -43,11 +43,11 @@
 AC_C_CONST
 AC_CHECK_SIZEOF(long)
 AC_CHECK_MEMBER(struct ibv_context.more_ops,
-    [AC_DEFINE([HAVE_IBV_MORE_OPS], 1, [Define to 1 if more_ops is a member of ibv_context])],,
+    [AC_DEFINE([HAVE_IBV_MORE_OPS], 0, [Define to 1 if more_ops is a member of ibv_context])],,
     [#include <infiniband/verbs.h>])
-AC_CHECK_MEMBER(struct ibv_more_ops.create_xrc_srq,
-    [AC_DEFINE([HAVE_IBV_XRC_OPS], 1, [Define to 1 if have xrc ops])],,
-    [#include <infiniband/verbs.h>])
+#AC_CHECK_MEMBER(struct ibv_more_ops.create_xrc_srq,
+#    [AC_DEFINE([HAVE_IBV_XRC_OPS], 1, [Define to 1 if have xrc ops])],,
+#    [#include <infiniband/verbs.h>])
 
 dnl Checks for library functions
 AC_CHECK_FUNC(ibv_read_sysfs_file, [],
@@ -80,6 +80,5 @@
 fi
 AC_SUBST(MLX4_VERSION_SCRIPT)
 
-SHAVE_INIT([], [enable])
 AC_CONFIG_FILES([Makefile libmlx4.spec shave shave-libtool])
 AC_OUTPUT
diff -r -u /tmp/839450/libmlx4-1.0.1/libmlx4.spec.in libmlx4-1.0.1/libmlx4.spec.in
--- /tmp/839450/libmlx4-1.0.1/libmlx4.spec.in	Thu Mar 10 00:23:34 2011
+++ libmlx4-1.0.1/libmlx4.spec.in	Tue Mar 15 07:43:54 2011
@@ -6,7 +6,7 @@
 Group: System Environment/Libraries
 License: GPLv2 or BSD
 Url: http://openfabrics.org/
-Source: http://openfabrics.org/downloads/libmlx4/libmlx4-1.0.1.tar.gz
+Source: http://openfabrics.org/downloads/libmlx4-1.0.1.tar.gz
 BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 
 BuildRequires: libibverbs-devel >= 1.1-0.1.rc2
diff -r -u /tmp/839450/libmlx4-1.0.1/configure libmlx4-1.0.1/configure
--- /tmp/839450/libmlx4-1.0.1/configure	Thu Mar 10 04:48:41 2011
+++ libmlx4-1.0.1/configure	Tue Mar 15 07:35:49 2011
@@ -3899,13 +3899,13 @@
   CFLAGS=$ac_save_CFLAGS
 elif test $ac_cv_prog_cc_g = yes; then
   if test "$GCC" = yes; then
-    CFLAGS="-g -O2"
+    CFLAGS="-g -O3"
   else
     CFLAGS="-g"
   fi
 else
   if test "$GCC" = yes; then
-    CFLAGS="-O2"
+    CFLAGS="-O3"
   else
     CFLAGS=
   fi
@@ -8890,6 +8890,7 @@
 	;;
       esac
       link_all_deplibs=yes
+	hardcode_libdir_flag_spec=
       ;;
 
     sunos4*)
@@ -11113,13 +11114,13 @@
   CFLAGS=$ac_save_CFLAGS
 elif test $ac_cv_prog_cc_g = yes; then
   if test "$GCC" = yes; then
-    CFLAGS="-g -O2"
+    CFLAGS="-g -O3"
   else
     CFLAGS="-g"
   fi
 else
   if test "$GCC" = yes; then
-    CFLAGS="-O2"
+    CFLAGS="-O3"
   else
     CFLAGS=
   fi
@@ -11654,11 +11655,11 @@
 
 ac_fn_c_check_member "$LINENO" "struct ibv_more_ops" "create_xrc_srq" "ac_cv_member_struct_ibv_more_ops_create_xrc_srq" "#include <infiniband/verbs.h>
 "
-if test "x$ac_cv_member_struct_ibv_more_ops_create_xrc_srq" = x""yes; then :
+#if test "x$ac_cv_member_struct_ibv_more_ops_create_xrc_srq" = x""yes; then :
 
-$as_echo "#define HAVE_IBV_XRC_OPS 1" >>confdefs.h
+#$as_echo "#define HAVE_IBV_XRC_OPS 1" >>confdefs.h
 
-fi
+#fi
 
 
 ac_fn_c_check_func "$LINENO" "ibv_read_sysfs_file" "ac_cv_func_ibv_read_sysfs_file"
diff -r -u /tmp/839450/libmlx4-1.0.1/Makefile.in libmlx4-1.0.1/Makefile.in
--- /tmp/839450/libmlx4-1.0.1/Makefile.in	Thu Mar 10 04:48:40 2011
+++ libmlx4-1.0.1/Makefile.in	Tue Mar 15 07:48:16 2011
@@ -264,7 +264,7 @@
 @HAVE_IBV_DEVICE_LIBRARY_EXTENSION_TRUE@src_libmlx4_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \
 @HAVE_IBV_DEVICE_LIBRARY_EXTENSION_TRUE@        $(mlx4_version_script)
 
-@HAVE_IBV_DEVICE_LIBRARY_EXTENSION_TRUE@mlx4confdir = $(sysconfdir)/libibverbs.d
+@HAVE_IBV_DEVICE_LIBRARY_EXTENSION_TRUE@mlx4confdir = $(datadir)/libibverbs.d
 @HAVE_IBV_DEVICE_LIBRARY_EXTENSION_TRUE@mlx4conf_DATA = mlx4.driver
 @HAVE_IBV_DEVICE_LIBRARY_EXTENSION_FALSE@mlx4libdir = $(libdir)/infiniband
 @HAVE_IBV_DEVICE_LIBRARY_EXTENSION_FALSE@mlx4lib_LTLIBRARIES = src/mlx4.la
@@ -345,8 +345,8 @@
 	  else :; fi; \
 	done; \
 	test -z "$$list2" || { \
-	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
-	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) -m 755 $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) -m 755 $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
 	}
 
 uninstall-libLTLIBRARIES:
@@ -376,8 +376,8 @@
 	  else :; fi; \
 	done; \
 	test -z "$$list2" || { \
-	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(mlx4libdir)'"; \
-	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(mlx4libdir)"; \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) -m755 $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(mlx4libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) -m755 $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(mlx4libdir)"; \
 	}
 
 uninstall-mlx4libLTLIBRARIES: