components/open-fabrics/libmlx4/patches/base.patch
author boris.chiu@oracle.com
Wed, 02 Sep 2015 11:01:59 -0700
changeset 4835 d5abd56e3fcf
parent 4661 353f8a998732
permissions -rw-r--r--
21170572 libmlx4 should be built with the system mlnx_umap.h 21481241 mlnx_umap.h needs removal and unwanted comment in libibverbs needs deletion 21747062 update libibverbs to handle EDR/FDR speeds

#This patch was developed in-house. We plan to submit it upstream, but do
# not yet have a target date for doing so
#
diff -r -u /tmp/839450/libmlx4-1.0.1/Makefile.am libmlx4-1.0.1/Makefile.am
--- /tmp/839450/libmlx4-1.0.1/Makefile.am	Tue Sep  8 06:40:35 2009
+++ libmlx4-1.0.1/Makefile.am	Tue Mar 15 06:49:47 2011
@@ -10,7 +10,7 @@
     src_libmlx4_la_SOURCES = $(MLX4_SOURCES)
     src_libmlx4_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \
         $(mlx4_version_script)
-    mlx4confdir = $(sysconfdir)/libibverbs.d
+    mlx4confdir = $(datadir)/libibverbs.d
     mlx4conf_DATA = mlx4.driver
 else
     mlx4libdir = $(libdir)/infiniband
diff -r -u /tmp/839450/libmlx4-1.0.1/src/mlx4-abi.h libmlx4-1.0.1/src/mlx4-abi.h
--- /tmp/839450/libmlx4-1.0.1/src/mlx4-abi.h	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/mlx4-abi.h	Fri Feb 11 03:49:51 2011
@@ -35,6 +35,10 @@
 
 #include <infiniband/kern-abi.h>
 
+#if defined(__SVR4) && defined(__sun)
+#include <sys/ib/adapters/mlnx_umap.h>   / * Opaque CI data out definitions * /
+#endif
+
 #define MLX4_UVERBS_MIN_ABI_VERSION	2
 #define MLX4_UVERBS_MAX_ABI_VERSION	3
 
@@ -43,6 +47,10 @@
 	__u32				qp_tab_size;
 	__u16				bf_reg_size;
 	__u16				bf_regs_per_page;
+#if defined(__SVR4) && defined(__sun)
+	uint32_t			muc_rev;
+	uint32_t			muc_reserved;
+#endif
 };
 
 struct mlx4_alloc_pd_resp {
@@ -51,23 +59,45 @@
 	__u32				reserved;
 };
 
+struct mlx4_share_pd_resp {
+	struct ibv_share_pd_resp	ibv_resp;
+	__u32				pdn;
+	__u32				reserved;
+};
+
 struct mlx4_create_cq {
 	struct ibv_create_cq		ibv_cmd;
+#if !(defined(__SVR4) && defined(__sun))
 	__u64				buf_addr;
 	__u64				db_addr;
+#endif
 };
 
 struct mlx4_create_cq_resp {
 	struct ibv_create_cq_resp	ibv_resp;
+#if !(defined(__SVR4) && defined(__sun))
 	__u32				cqn;
 	__u32				reserved;
+#else
+	mlnx_umap_cq_data_out_t		mdd;
+#endif
 };
 
 struct mlx4_resize_cq {
 	struct ibv_resize_cq		ibv_cmd;
+#if !(defined(__SVR4) && defined(__sun))
 	__u64				buf_addr;
+#endif
 };
 
+#if defined(__SVR4) && defined(__sun)
+struct mlx4_resize_cq_resp {
+	struct ibv_resize_cq_resp	ibv_resp;
+	mlnx_umap_cq_data_out_t		mdd;
+};
+#endif
+
+
 #ifdef HAVE_IBV_XRC_OPS
 struct mlx4_create_xrc_srq {
 	struct ibv_create_xrc_srq	ibv_cmd;
@@ -78,18 +108,25 @@
 
 struct mlx4_create_srq {
 	struct ibv_create_srq		ibv_cmd;
+#if !(defined(__SVR4) && defined(__sun))
 	__u64				buf_addr;
 	__u64				db_addr;
+#endif
 };
 
 struct mlx4_create_srq_resp {
 	struct ibv_create_srq_resp	ibv_resp;
+#if !(defined(__SVR4) && defined(__sun))
 	__u32				srqn;
 	__u32				reserved;
+#else
+	mlnx_umap_srq_data_out_t	mdd;
+#endif
 };
 
 struct mlx4_create_qp {
 	struct ibv_create_qp		ibv_cmd;
+#if !(defined(__SVR4) && defined(__sun))
 	__u64				buf_addr;
 	__u64				db_addr;
 	__u8				log_sq_bb_count;
@@ -96,8 +133,16 @@
 	__u8				log_sq_stride;
 	__u8				sq_no_prefetch;	/* was reserved in ABI 2 */
 	__u8				reserved[5];
+#endif
 };
 
+#if defined(__SVR4) && defined(__sun)
+struct mlx4_create_qp_resp {
+	struct ibv_create_qp_resp	ibv_resp;
+	mlnx_umap_qp_data_out_t		mdd;
+};
+#endif
+
 #ifdef HAVE_IBV_XRC_OPS
 struct mlx4_open_xrc_domain_resp {
 	struct ibv_open_xrc_domain_resp	ibv_resp;
diff -r -u /tmp/839450/libmlx4-1.0.1/src/verbs.c libmlx4-1.0.1/src/verbs.c
--- /tmp/839450/libmlx4-1.0.1/src/verbs.c	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/verbs.c	Fri Mar 11 14:40:18 2011
@@ -56,6 +56,14 @@
 	if (ret)
 		return ret;
 
+#if defined(__SVR4) && defined(__sun)
+	/*
+	 * To be consistent with OFED and so the queue operations in srq.c work
+	 * we need to report the max as actual max  less 1. In OFED this is
+	 * done in the HCA driver.
+	 */
+	attr->max_srq_wr -=1;
+#endif
 	major     = (raw_fw_ver >> 32) & 0xffff;
 	minor     = (raw_fw_ver >> 16) & 0xffff;
 	sub_minor = raw_fw_ver & 0xffff;
@@ -95,6 +103,39 @@
 	return &pd->ibv_pd;
 }
 
+struct ibv_shpd *mlx4_alloc_shpd(struct ibv_pd *pd, uint64_t share_key, struct ibv_shpd *shpd)
+{
+	struct ibv_alloc_shpd cmd;
+	struct ibv_alloc_shpd_resp resp;
+
+	if (ibv_cmd_alloc_shpd(pd->context, pd, share_key, shpd, &cmd, sizeof cmd,
+			     &resp, sizeof resp)) {
+		return NULL;
+	}
+
+	return shpd;
+}
+
+
+struct ibv_pd *mlx4_share_pd(struct ibv_context *context, struct ibv_shpd *shpd, uint64_t share_key)
+{
+	struct ibv_share_pd       cmd;
+	struct mlx4_share_pd_resp resp;
+	struct mlx4_pd		 *pd;
+
+	pd = malloc(sizeof *pd);
+	if (!pd)
+		return NULL;
+
+	if (ibv_cmd_share_pd(context, shpd, share_key, &pd->ibv_pd, &cmd, sizeof cmd,
+			     &resp.ibv_resp, sizeof resp)) {
+		free(pd);
+		return NULL;
+	}
+	pd->pdn = resp.pdn;
+	return &pd->ibv_pd;
+}
+
 int mlx4_free_pd(struct ibv_pd *pd)
 {
 	int ret;
@@ -138,6 +179,37 @@
 	return mr;
 }
 
+struct ibv_mr *mlx4_reg_mr_relaxed(struct ibv_pd *pd, void *addr, size_t length,
+			   int access)
+{
+	struct ibv_mr *mr;
+	struct ibv_reg_mr cmd;
+	int ret;
+
+	mr = malloc(sizeof *mr);
+	if (!mr)
+		return NULL;
+
+#ifdef IBV_CMD_REG_MR_RELAXED_HAS_RESP_PARAMS
+	{
+		struct ibv_reg_mr_resp resp;
+
+		ret = ibv_cmd_reg_mr_relaxed(pd, addr, length, (uintptr_t) addr,
+				     access, mr, &cmd, sizeof cmd,
+				     &resp, sizeof resp);
+	}
+#else
+	ret = ibv_cmd_reg_mr_relaxed(pd, addr, length, (uintptr_t) addr, access, mr,
+			     &cmd, sizeof cmd);
+#endif
+	if (ret) {
+		free(mr);
+		return NULL;
+	}
+
+	return mr;
+}
+
 int mlx4_dereg_mr(struct ibv_mr *mr)
 {
 	int ret;
@@ -150,6 +222,29 @@
 	return 0;
 }
 
+int mlx4_dereg_mr_relaxed(struct ibv_mr *mr)
+{
+	int ret;
+
+	ret = ibv_cmd_dereg_mr_relaxed(mr);
+	if (ret)
+		return ret;
+
+	free(mr);
+	return 0;
+}
+
+int mlx4_flush_relaxed_mr(struct ibv_pd *pd)
+{
+	int ret;
+
+	ret = ibv_cmd_flush_relaxed_mr(pd);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int align_queue_size(int req)
 {
 	int nent;
@@ -168,6 +263,9 @@
 	struct mlx4_create_cq_resp resp;
 	struct mlx4_cq		  *cq;
 	int			   ret;
+#if defined(__SVR4) && defined(__sun)
+	void                      *cqbuf;
+#endif
 
 	/* Sanity check CQ size before proceeding */
 	if (cqe > 0x3fffff)
@@ -184,7 +282,8 @@
 
 	cqe = align_queue_size(cqe + 1);
 
-	if (mlx4_alloc_cq_buf(to_mdev(context->device), &cq->buf, cqe))
+#if !(defined(__SVR4) && defined(__sun))
+	if (mlx4_alloc_cq_buf((to_mdev(context->device), &cq->buf, cqe))
 		goto err;
 
 	cq->set_ci_db  = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ);
@@ -198,15 +297,73 @@
 
 	cmd.buf_addr = (uintptr_t) cq->buf.buf;
 	cmd.db_addr  = (uintptr_t) cq->set_ci_db;
+#else
+	cq->buf.buf    = NULL;
+	cq->buf.length = 0;
+	cq->arm_db     = NULL;
+	cq->set_ci_db  = NULL;
+#endif
 
 	ret = ibv_cmd_create_cq(context, cqe - 1, channel, comp_vector,
 				&cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd,
 				&resp.ibv_resp, sizeof resp);
 	if (ret)
+#if !(defined(__SVR4) && defined(__sun))
 		goto err_db;
-
 	cq->cqn = resp.cqn;
+#else
+		goto err;
 
+        /*
+         * For Solaris the kernel driver passes back mmap information for
+	 *  mapping the CQ memory it allocated.
+         */
+	if (resp.mdd.mcq_rev < MLNX_UMAP_IF_VERSION) {
+		fprintf(stderr, PFX "libmlx4_create_cq: libmlx4/hermon umap "
+		    "rev mismatch (kernel rev=%d)\n", resp.mdd.mcq_rev);
+		goto err_destroy;
+	}
+
+        cqbuf = mmap64((void *)0, resp.mdd.mcq_maplen, (PROT_READ | PROT_WRITE),
+	    MAP_SHARED, context->mmap_fd, resp.mdd.mcq_mapoffset);
+
+        if (cqbuf == MAP_FAILED)
+                goto err_destroy;
+
+        /*
+         * Extract hardware driver values for the number of CQEs and the
+	 * hardware CQ number to use (needed for user space doorbells).
+         */
+	cqe            = resp.mdd.mcq_numcqe;
+	cq->cqn        = resp.mdd.mcq_cqnum;
+	cq->buf.buf    = cqbuf;
+	cq->buf.length = resp.mdd.mcq_maplen;
+	cq->ibv_cq.cqe = cqe-1;
+
+	/*
+	 * We map both poll and arm as seperate doorbells (OFED assumes 1 word
+	 * offset and just bumps the address) since Solaris provides a
+	 * separate offst. This will amount to the same thing (a second
+	 * reference to the first doorbell is added) but is more flexible.
+	 */
+	cq->set_ci_db = mlx4_alloc_db(to_mctx(context),
+	    resp.mdd.mcq_polldbr_mapoffset, resp.mdd.mcq_polldbr_maplen,
+	    resp.mdd.mcq_polldbr_offset);
+
+        if (cq->set_ci_db == NULL)
+                goto err_buf;
+
+	cq->arm_db = mlx4_alloc_db(to_mctx(context),
+	    resp.mdd.mcq_armdbr_mapoffset, resp.mdd.mcq_armdbr_maplen,
+	    resp.mdd.mcq_armdbr_offset);
+
+        if (cq->arm_db == NULL)
+                goto err_db;
+
+	*cq->arm_db    = 0;
+	cq->arm_sn     = 1;
+	*cq->set_ci_db = 0;
+#endif
 	return &cq->ibv_cq;
 
 err_db:
@@ -215,6 +372,21 @@
 err_buf:
 	mlx4_free_buf(&cq->buf);
 
+#if defined(__SVR4) && defined(__sun)
+err_destroy:
+	/*
+	 * Calling ibv_cmd_destroy_cq() will try and take the ibv_cq
+	 * mutext that is initialised by the ibv_create_cq() entry point
+	 * that called us AFETR we return, so its not initialised yet.
+	 * So initialised it here so the destroy call doesn't hang.
+	 */
+	pthread_mutex_init(&(cq->ibv_cq.mutex), NULL);
+	pthread_cond_init(&(cq->ibv_cq.cond), NULL);
+	cq->ibv_cq.comp_events_completed = 0;
+	cq->ibv_cq.async_events_completed = 0;
+
+	ibv_cmd_destroy_cq(&cq->ibv_cq);
+#endif
 err:
 	free(cq);
 
@@ -227,10 +399,15 @@
 	struct mlx4_resize_cq cmd;
 	struct mlx4_buf buf;
 	int old_cqe, outst_cqe, ret;
-
+#if !(defined(__SVR4) && defined(__sun))
+	struct ibv_resize_cq_resp resp;
+#else
+	struct mlx4_resize_cq_resp	resp;
+	void				*cqbuf;
+#endif
 	/* Sanity check CQ size before proceeding */
 	if (cqe > 0x3fffff)
-		return EINVAL;
+ 		return EINVAL;
 
 	pthread_spin_lock(&cq->lock);
 
@@ -247,32 +424,79 @@
 		goto out;
 	}
 
+#if !(defined(__SVR4) && defined(__sun))
 	ret = mlx4_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe);
 	if (ret)
 		goto out;
-
-	old_cqe = ibcq->cqe;
 	cmd.buf_addr = (uintptr_t) buf.buf;
+#endif
+        old_cqe = ibcq->cqe;
 
 #ifdef IBV_CMD_RESIZE_CQ_HAS_RESP_PARAMS
-	{
-		struct ibv_resize_cq_resp resp;
-		ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd,
+	ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd,
+#if !(defined(__SVR4) && defined(__sun))
 					&resp, sizeof resp);
-	}
 #else
+					&resp.ibv_resp, sizeof resp);
+#endif
+#else
 	ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd);
 #endif
-	if (ret) {
-		mlx4_free_buf(&buf);
+
+        if (ret) {
+#if !(defined(__SVR4) && defined(__sun))
+                mlx4_free_buf(&buf);
 		goto out;
 	}
 
-	mlx4_cq_resize_copy_cqes(cq, buf.buf, old_cqe);
+        mlx4_cq_resize_copy_cqes(cq, buf.buf, old_cqe);
+        mlx4_free_buf(&cq->buf);
+        cq->buf = buf;
+#else
+		goto out;
+	}
+        if (cq->buf.buf != NULL) {
+        	buf.buf = malloc(cq->buf.length);
+        	if (!buf.buf) {
+                	ret = ENOMEM;
+                	goto out;
+        	}
 
-	mlx4_free_buf(&cq->buf);
-	cq->buf = buf;
+        	memcpy(buf.buf, cq->buf.buf, cq->buf.length);
+        	buf.length =  cq->buf.length;
+                ret = munmap((char *)cq->buf.buf, cq->buf.length);
+                if (ret) {
+                        free(buf.buf);
+                        goto out;
+                }
+        }
+	/*
+	 * For Solaris the kernel driver passes back mmap information for
+	 * mapping the CQ memory it allocated.
+	 */
+	if (resp.mdd.mcq_rev < MLNX_UMAP_IF_VERSION) {
+		fprintf(stderr, PFX "libmlx4_resize_cq: libmlx4/hermon umap "
+		    "rev mismatch (kernel rev=%d)\n", resp.mdd.mcq_rev);
+		ret = EINVAL;
+		goto out;
+	}
 
+	cqbuf = mmap64((void *)0, resp.mdd.mcq_maplen, (PROT_READ | PROT_WRITE),
+	     MAP_SHARED, ibcq->context->mmap_fd, resp.mdd.mcq_mapoffset);
+
+	if (cqbuf == MAP_FAILED) {
+		ret = EINVAL;
+		goto out;
+	}
+	cq->buf.buf    = buf.buf;
+	cq->buf.length = buf.length;
+	mlx4_cq_resize_copy_cqes(cq, cqbuf, old_cqe);
+	cq->buf.buf    = cqbuf;
+	cq->buf.length = resp.mdd.mcq_maplen;
+	free(buf.buf);
+	cq->ibv_cq.cqe =  resp.mdd.mcq_numcqe - 1;
+	cq->cqn        = resp.mdd.mcq_cqnum;
+#endif
 out:
 	pthread_spin_unlock(&cq->lock);
 	return ret;
@@ -287,6 +511,9 @@
 		return ret;
 
 	mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->set_ci_db);
+#if defined(__SVR4) && defined(__sun)
+	mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->arm_db);
+#endif
 	mlx4_free_buf(&to_mcq(cq)->buf);
 	free(to_mcq(cq));
 
@@ -300,6 +527,9 @@
 	struct mlx4_create_srq_resp resp;
 	struct mlx4_srq		   *srq;
 	int			    ret;
+#if defined(__SVR4) && defined(__sun)
+	void                       *srqbuf;
+#endif
 
 	/* Sanity check SRQ size before proceeding */
 	if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
@@ -312,6 +542,7 @@
 	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
 		goto err;
 
+#if !(defined(__SVR4) && defined(__sun))
 	srq->max     = align_queue_size(attr->attr.max_wr + 1);
 	srq->max_gs  = attr->attr.max_sge;
 	srq->counter = 0;
@@ -327,23 +558,118 @@
 
 	cmd.buf_addr = (uintptr_t) srq->buf.buf;
 	cmd.db_addr  = (uintptr_t) srq->db;
+#else
+	/*
+	 * Solaris SRQ WQE memory is supplied by the kernel; we'll update
+	 * these after the creation.
+	 */
+	srq->buf.buf      = NULL;
+	srq->buf.length   = 0;
+	srq->db           = NULL;
 
+	/*
+	 * Need solaris to allocate space for the spare WR in
+	 * the list that makes the queue work. The Solaris driver
+	 * will round up to the nearest power of 2 as align_queue_size()
+	 * does for OFED.
+	 */
+	attr->attr.max_wr += 1;
+#endif
+
 	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr,
 				 &cmd.ibv_cmd, sizeof cmd,
 				 &resp.ibv_resp, sizeof resp);
+#if defined(__SVR4) && defined(__sun)
+	if (ret) {
+		goto err;
+	}
+
+        /*
+         * The kernel driver passes back mmap information for mapping the
+         * SRQ work queue memory it allocated and the doorbell for
+	 * for posting.
+         */
+	if (resp.mdd.msrq_rev < 1) {
+		fprintf(stderr, PFX "libmlx4_create_srq libmlx4/hermon umap "
+			"rev mismatch (kernel rev=%d)\n", resp.mdd.msrq_rev);
+		goto err_destroy;
+	}
+
+        srqbuf = mmap64((void *)0, resp.mdd.msrq_maplen,
+	    (PROT_READ | PROT_WRITE), MAP_SHARED, pd->context->mmap_fd,
+	    resp.mdd.msrq_mapoffset);
+
+        if (srqbuf == MAP_FAILED) {
+                goto err_destroy;
+        }
+
+	srq->buf.buf    = srqbuf;
+	srq->buf.length = resp.mdd.msrq_maplen;
+	srq->max	= resp.ibv_resp.max_wr;
+	srq->max_gs	= resp.ibv_resp.max_sge;
+	srq->srqn       = resp.mdd.msrq_srqnum;
+	srq->counter 	= 0;
+
+	srq->db = mlx4_alloc_db(to_mctx(pd->context),
+	    resp.mdd.msrq_rdbr_mapoffset, resp.mdd.msrq_rdbr_maplen,
+	    resp.mdd.msrq_rdbr_offset);
+	if (srq->db == NULL) {
+                goto err_unmap;
+	}
+
+	/*
+	 * The following call only initializes memory and control structures,
+	 * it utilizes the memory allocated by the kernel.
+	 * It also allocates the srq->wrid memory.
+	 */
+	if (mlx4_set_srq_buf(pd, srq, resp.mdd.msrq_wqesz,
+	    resp.mdd.msrq_numwqe)) {
+		goto err_db;
+	}
+
+	/*
+	 * The rturned max wr will have been rounded up to the nearest
+	 * power of 2, subtracting  1 from that and rporting that value
+	 * as the max will give us the required free WR in the queue, as
+	 * in OFED.
+	 */
+	attr->attr.max_wr -= 1;
+#else
 	if (ret)
 		goto err_db;
 
 	srq->srqn = resp.srqn;
 
+#endif
 	return &srq->ibv_srq;
 
 err_db:
 	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
 
+#if defined(__SVR4) && defined(__sun)
+	if (srq->wrid)
+		free(srq->wrid);
+err_unmap:
+	mlx4_free_buf(&srq->buf);
+
+err_destroy:
+	/*
+	 * Calling ibv_cmd_destroy_srq() will try and take the ibv_srq
+	 * mutext that is initialised by the ibv_create_srq() entry point
+	 * that called us AFETR we return, so its not initialised yet.
+	 * So initialised it here so the destroy call doesn't hang.
+	 */
+	pthread_mutex_init(&(srq->ibv_srq.mutex), NULL);
+	pthread_cond_init(&(srq->ibv_srq.cond), NULL);
+	srq->ibv_srq.events_completed = 0;
+
+	ibv_cmd_destroy_srq(&srq->ibv_srq);
+
+#else
 err_free:
 	free(srq->wrid);
 	mlx4_free_buf(&srq->buf);
+#endif
 
 err:
 	free(srq);
@@ -357,7 +683,16 @@
 {
 	struct ibv_modify_srq cmd;
 
+#if !(defined(__SVR4) && defined(__sun))
 	return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
+#else
+	int	ret;
+
+	attr->max_wr += 1;		/* See create_srq */
+	ret = ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
+	attr->max_wr -= 1;
+	return (ret);
+#endif
 }
 
 int mlx4_query_srq(struct ibv_srq *srq,
@@ -365,7 +700,17 @@
 {
 	struct ibv_query_srq cmd;
 
+#if !(defined(__SVR4) && defined(__sun))
 	return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
+#else
+
+	int	ret;
+
+	ret = ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
+	attr->max_wr -= 1;		/* See create_srq */
+
+	return (ret);
+#endif
 }
 
 int mlx4_destroy_srq(struct ibv_srq *ibsrq)
@@ -443,12 +788,16 @@
 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
 {
 	struct mlx4_create_qp     cmd;
-	struct ibv_create_qp_resp resp;
 	struct mlx4_qp		 *qp;
 	int			  ret;
 	struct mlx4_context	 *context = to_mctx(pd->context);
+#if !(defined(__SVR4) && defined(__sun))
+	struct ibv_create_qp_resp resp;
+#else
+	struct mlx4_create_qp_resp	resp;
+	void				*qpbuf;
+#endif
 
-
 	/* Sanity check QP size before proceeding */
 	if (verify_sizes(attr, context))
 		return NULL;
@@ -457,6 +806,7 @@
 	if (!qp)
 		return NULL;
 
+#if !(defined(__SVR4) && defined(__sun))
 	mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
 
 	/*
@@ -466,6 +816,7 @@
 	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
 	qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
 	qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
+#endif
 
 	if (attr->srq || attr->qp_type == IBV_QPT_XRC)
 		attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
@@ -476,6 +827,22 @@
 			attr->cap.max_recv_wr = 1;
 	}
 
+#if defined(__SVR4) && defined(__sun)
+	if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) ||
+	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
+		goto err;
+
+	/*
+	 * Solaris QP work queue memory is supplied by the kernel, so
+	 * we will update this after creation.
+	 */
+	qp->buf.buf	= NULL;
+	qp->sq.wrid	= NULL;
+	qp->rq.wrid	= NULL;
+	qp->buf.length	= 0;
+	qp->db		= NULL;
+	memset(&cmd, 0, sizeof(cmd));
+#else
 	if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp))
 		goto err;
 
@@ -505,10 +872,12 @@
 		; /* nothing */
 	cmd.sq_no_prefetch = 0;	/* OK for ABI 2: just a reserved field */
 	memset(cmd.reserved, 0, sizeof cmd.reserved);
+#endif
 
 	pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex);
 
 	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
+#if !(defined(__SVR4) && defined(__sun))
 				&resp, sizeof resp);
 	if (ret)
 		goto err_rq_db;
@@ -516,6 +885,70 @@
 	ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
 	if (ret)
 		goto err_destroy;
+#else
+				&resp.ibv_resp, sizeof resp);
+	if (ret)
+		goto err_free;
+
+        /*
+         * The kernel driver passes back mmap information for mapping the
+         * QP work queue memory it allocated back into user space.
+         */
+	if (resp.mdd.mqp_rev < 2) {
+		fprintf(stderr, PFX "libmlx4_create_qp: libmlx4/hermon umap "
+		    "rev mismatch (kernel rev=%d)\n", resp.mdd.mqp_rev);
+		goto err_destroy;
+	}
+	qpbuf = mmap64((void *)0, resp.mdd.mqp_maplen, (PROT_READ | PROT_WRITE),
+	    MAP_SHARED, pd->context->mmap_fd, resp.mdd.mqp_mapoffset);
+
+	if (qpbuf == MAP_FAILED)
+		goto err_destroy;
+
+	/*
+	 * Need to set qp->buf here in case alloc_db fails then
+	 * we'll call mlx4_free_buf() to umap.
+	 */
+	qp->buf.buf	= qpbuf;
+	qp->buf.length	= resp.mdd.mqp_maplen;
+
+	if (!attr->srq && attr->qp_type != IBV_QPT_XRC) {
+		qp->db = mlx4_alloc_db(to_mctx(pd->context),
+		    resp.mdd.mqp_rdbr_mapoffset, resp.mdd.mqp_rdbr_maplen,
+		    resp.mdd.mqp_rdbr_offset);
+		if (qp->db == NULL)
+			goto err_buf;
+
+		*qp->db = 0;
+	}
+
+	/*
+	 * Retrieve sendqueue actual size, and the number of headroom WQEs
+	 * that were required based on kernel setup of prefetch or not for
+	 * send queue.
+	 * 	Note: mqp_sq_numwqe includes the head room wqes. The private
+	 *	      wqe.cnt also includes headroom wqes, the verbs count
+	 *	      should reflect the wqe count that is usable.
+	 */
+	qp->sq_spare_wqes = resp.mdd.mqp_sq_headroomwqes;
+	qp->sq.wqe_cnt    = resp.mdd.mqp_sq_numwqe;
+
+	if (attr->srq)
+		qp->rq.wqe_cnt  = 0;
+	else
+		qp->rq.wqe_cnt  = resp.mdd.mqp_rq_numwqe;
+
+	if (mlx4_set_qp_buf(pd, qp, qpbuf, resp.mdd.mqp_maplen,
+	    resp.mdd.mqp_rq_wqesz, resp.mdd.mqp_rq_off,
+	    resp.mdd.mqp_sq_wqesz, resp.mdd.mqp_sq_off))
+		goto err_rq_db;
+
+	mlx4_init_qp_indices(qp);
+
+	ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
+	if (ret)
+		goto err_rq_db;
+#endif
 	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
 
 	qp->rq.wqe_cnt = attr->cap.max_recv_wr;
@@ -536,9 +969,38 @@
 
 	return &qp->ibv_qp;
 
+#if defined(__SVR4) && defined(__sun)
+err_rq_db:
+	if (!attr->srq && attr->qp_type != IBV_QPT_XRC)
+		mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
+err_buf:
+	mlx4_free_buf(&qp->buf);
+
 err_destroy:
+	/*
+	 * Calling ibv_cmd_destroy_qp() will try and take the ibv_qp
+	 * mutext that is initialised by the ibv_create_qp() entry point
+	 * that called us AFETR we retrun, so its not initialised yet.
+	 * So initialised it here so the destroy call doesn't hang.
+	 */
+	pthread_mutex_init(&(qp->ibv_qp.mutex), NULL);
+	pthread_cond_init(&(qp->ibv_qp.cond), NULL);
+	qp->ibv_qp.events_completed = 0;
 	ibv_cmd_destroy_qp(&qp->ibv_qp);
+err_free:
+	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
 
+	if (qp->sq.wrid)
+		free(qp->sq.wrid);
+
+	if (qp->rq.wrid)
+		free(qp->rq.wrid);
+err:
+	free(qp);
+#else
+err_destroy:
+	ibv_cmd_destroy_qp(&qp->ibv_qp);
+
 err_rq_db:
 	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
 	if (!attr->srq && attr->qp_type != IBV_QPT_XRC)
@@ -552,6 +1014,7 @@
 
 err:
 	free(qp);
+#endif
 
 	return NULL;
 }
@@ -745,6 +1208,13 @@
 				    struct ibv_cq *xrc_cq,
 				    struct ibv_srq_init_attr *attr)
 {
+#if defined(__SVR4) && defined(__sun)
+	/*
+	 * Not supported by Solaris kenrel driver.  When/if supported
+	 * this routine will need to be ported.
+	 */
+	return NULL;
+#else
 	struct mlx4_create_xrc_srq  cmd;
 	struct mlx4_create_srq_resp resp;
 	struct mlx4_srq		   *srq;
@@ -807,6 +1277,7 @@
 	free(srq);
 
 	return NULL;
+#endif
 }
 
 struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context,
diff -r -u /tmp/839450/libmlx4-1.0.1/src/qp.c libmlx4-1.0.1/src/qp.c
--- /tmp/839450/libmlx4-1.0.1/src/qp.c	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/qp.c	Tue Mar 15 07:09:43 2011
@@ -407,7 +407,8 @@
 out:
 	ctx = to_mctx(ibqp->context);
 
-	if (nreq == 1 && inl && size > 1 && size < ctx->bf_buf_size / 16) {
+	if (ctx->ibv_ctx.device->blueflame_enabled && nreq == 1 && inl &&
+	    size > 1 && size < ctx->bf_buf_size / 16) {
 		ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8);
 		*(uint32_t *) (&ctrl->vlan_tag) |= qp->doorbell_qpn;
 		/*
@@ -589,6 +590,59 @@
 		; /* nothing */
 }
 
+#if defined(__SVR4) && defined(__sun)
+int mlx4_set_qp_buf(struct ibv_pd *pd, struct mlx4_qp *qp, void *qpbuf,
+    uint64_t buflen, uint32_t rq_wqesz, uint32_t rq_off,
+    uint32_t sq_wqesz, uint32_t sq_off)
+{
+	qp->buf.buf      = qpbuf;
+	qp->buf.length   = buflen;
+
+	qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
+	if (!qp->sq.wrid)
+		return -1;
+
+	if (qp->rq.wqe_cnt) {
+		qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
+		if (!qp->rq.wrid) {
+			free(qp->sq.wrid);
+			return -1;
+		}
+	}
+
+	for (qp->rq.wqe_shift = 4; 1 << qp->rq.wqe_shift < rq_wqesz;
+	     qp->rq.wqe_shift++) {
+		; /* nothing */
+	}
+
+	for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < sq_wqesz;
+	     qp->sq.wqe_shift++) {
+		; /* nothing */
+	}
+
+	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+		(qp->sq.wqe_cnt << qp->sq.wqe_shift);
+	if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
+		qp->rq.offset = 0;
+		qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+	} else {
+		qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
+		qp->sq.offset = 0;
+	}
+
+	if ((long int)qp->buf.length < (long int)qp->buf_size) {
+		fprintf(stderr, PFX "QP kernel buffer size %lu < user buf "
+		    "size %d\n", (unsigned long)qp->buf.length, qp->buf_size);
+	}
+	if ((!rq_off && qp->rq.offset) || (!sq_off && qp->sq.offset)) {
+		fprintf(stderr, PFX "QP kernel and user out of sync on "
+		    "buffer order\n");
+	}
+
+	memset(qp->buf.buf, 0, qp->buf_size);
+	return 0;
+}
+#endif
 int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp)
 {
diff -r -u /tmp/839450/libmlx4-1.0.1/src/mlx4.h libmlx4-1.0.1/src/mlx4.h
--- /tmp/839450/libmlx4-1.0.1/src/mlx4.h	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/mlx4.h	Fri Feb 11 04:05:04 2011
@@ -202,7 +202,11 @@
 	int				xrc_srq_table_shift;
 	int				xrc_srq_table_mask;
 
+#if defined(__SVR4) && defined(__sun)
+	struct mlx4_db_page	       *db_page_list;
+#else
 	struct mlx4_db_page	       *db_list[MLX4_NUM_DB_TYPE];
+#endif
 	pthread_mutex_t			db_list_mutex;
 };
 
@@ -351,7 +355,12 @@
 int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size);
 void mlx4_free_buf(struct mlx4_buf *buf);
 
+#if defined(__SVR4) && defined(__sun)
+uint32_t *mlx4_alloc_db(struct mlx4_context *context, uint64_t mapoffset,
+                        uint64_t maplen, uint32_t offset);
+#else
 uint32_t *mlx4_alloc_db(struct mlx4_context *context, enum mlx4_db_type type);
+#endif
 void mlx4_free_db(struct mlx4_context *context, enum mlx4_db_type type, uint32_t *db);
 
 int mlx4_query_device(struct ibv_context *context,
@@ -360,11 +369,17 @@
 		     struct ibv_port_attr *attr);
 
 struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context);
+struct ibv_shpd *mlx4_alloc_shpd(struct ibv_pd *pd, uint64_t share_key, struct ibv_shpd *shpd);
+struct ibv_pd *mlx4_share_pd(struct ibv_context *context, struct ibv_shpd *shpd, uint64_t share_key);
 int mlx4_free_pd(struct ibv_pd *pd);
 
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
 			    size_t length, int access);
+struct ibv_mr *mlx4_reg_mr_relaxed(struct ibv_pd *pd, void *addr,
+			    size_t length, int access);
 int mlx4_dereg_mr(struct ibv_mr *mr);
+int mlx4_dereg_mr_relaxed(struct ibv_mr *mr);
+int mlx4_flush_relaxed_mr(struct ibv_pd *pd);
 
 struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
 			       struct ibv_comp_channel *channel,
@@ -388,8 +403,13 @@
 int mlx4_query_srq(struct ibv_srq *srq,
 			   struct ibv_srq_attr *attr);
 int mlx4_destroy_srq(struct ibv_srq *srq);
+#if defined(__SVR4) && defined(__sun)
+int mlx4_set_srq_buf(struct ibv_pd *pd, struct mlx4_srq *srq,
+			uint32_t srq_wqesz, uint32_t srq_numwqe);
+#else
 int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 			struct mlx4_srq *srq);
+#endif
 void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
 int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
 		       struct ibv_recv_wr *wr,
@@ -415,8 +435,14 @@
 void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 			   struct mlx4_qp *qp);
 int num_inline_segs(int data, enum ibv_qp_type type);
+#if defined(__SVR4) && defined(__sun)
+int mlx4_set_qp_buf(struct ibv_pd *pd, struct mlx4_qp *qp,
+                    void *qpbuf, uint64_t buflen, uint32_t rq_wqesz,
+                     uint32_t rq_off, uint32_t sq_wqesz, uint32_t sq_off);
+#else
 int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type, struct mlx4_qp *qp);
+#endif
 void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 		       enum ibv_qp_type type);
 struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn);
diff -r -u /tmp/839450/libmlx4-1.0.1/src/srq.c libmlx4-1.0.1/src/srq.c
--- /tmp/839450/libmlx4-1.0.1/src/srq.c	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/srq.c	Fri Feb 11 04:05:05 2011
@@ -124,6 +124,54 @@
 	return err;
 }
 
+#if defined(__SVR4) && defined(__sun)
+/*
+ * The Solaris kernel allocates the SRQ WQE buffer, this routine
+ * initializes the control structures and buffer contents for the
+ * SRQ memory.  That memory is mapped into the caller's address
+ * space prior to this call.
+ */
+int mlx4_set_srq_buf(struct ibv_pd *pd, struct mlx4_srq *srq,
+				uint32_t srq_wqesz, uint32_t srq_numwqe)
+{
+	struct mlx4_wqe_srq_next_seg *next;
+	struct mlx4_wqe_data_seg *scatter;
+	int i;
+
+	srq->max         = srq_numwqe;
+
+	srq->wrid = malloc(srq->max * sizeof (uint64_t));
+	if (!srq->wrid) {
+		return -1;
+	}
+
+	for (srq->wqe_shift = 5; 1 << srq->wqe_shift < srq_wqesz; ++srq->wqe_shift) {
+		; /* nothing */
+	}
+
+	memset(srq->buf.buf, 0, srq->buf.length);
+
+	/*
+	 * Now initialize the SRQ buffer so that all of the WQEs are
+	 * linked into the list of free WQEs.
+	 */
+	for (i = 0; i < srq->max; ++i) {
+		next = get_wqe(srq, i);
+		next->next_wqe_index = htons((i + 1) & (srq->max - 1));
+
+		for (scatter = (void *) (next + 1);
+		    (void *) scatter < (void *) next + (1 << srq->wqe_shift);
+		    ++scatter)
+			scatter->lkey = htonl(MLX4_INVALID_LKEY);
+	}
+
+	srq->head = 0;
+	srq->tail = srq->max - 1;
+
+	return 0;
+}
+#endif
+
 int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 		       struct mlx4_srq *srq)
 {
diff -r -u /tmp/839450/libmlx4-1.0.1/src/dbrec.c libmlx4-1.0.1/src/dbrec.c
--- /tmp/839450/libmlx4-1.0.1/src/dbrec.c	Tue Sep  8 06:40:36 2009
+++ libmlx4-1.0.1/src/dbrec.c	Fri Feb 11 04:05:04 2011
@@ -41,7 +41,157 @@
 
 #include "mlx4.h"
 
+#if defined(__SVR4) && defined(__sun)
+/*
+ * In Solaris the doorbell UAR is setup by the kernel, we only
+ * mmap the offset specified for the doorbell into the user
+ * address space.  A reference counted page list is maintained
+ * per user context of doorbell pages that have been mapped.
+ */
 struct mlx4_db_page {
+	struct mlx4_db_page	   *prev, *next;
+	uint32_t                   *dbp_page_addr;
+	uint32_t                   *dbp_page_addr_end;
+	uint64_t                   dbp_map_offset;
+	uint64_t                   dbp_map_len;
+	int                        fd;
+	int                        dbp_use_cnt;
+};
+
+/*
+ * These are not required in Solaris, but we keep them to pass
+ * as dummy arguments so that the the doorbell function calls can
+ * keep the same API.
+ */
+static const int db_size[] = {
+	[MLX4_DB_TYPE_CQ] = 8,
+	[MLX4_DB_TYPE_RQ] = 4,
+};
+
+/*
+ * Return a doorbell pointer for the specified map offset.  If this
+ * offset has not been previously mapped it will be mmap'd and the
+ * appropriate doorbell returned; otherwise, the reference count
+ * will be updated and the appropriate doorbell will be returned.
+ * Each successful call to mlx4_alloc_db() must call mlx4_free_db()
+ * to release the reference to the doorbell page  when the doorbell
+ * is no longer required.
+ */
+uint32_t *mlx4_alloc_db(struct mlx4_context *context,
+                        uint64_t mapoffset,
+                        uint64_t maplen,
+                        uint32_t offset)
+{
+	struct mlx4_db_page *page;
+	uint32_t *db = NULL;
+
+	pthread_mutex_lock(&context->db_list_mutex);
+
+	for (page = context->db_page_list; page; page = page->next) {
+		if (page->dbp_map_offset == mapoffset &&
+		    page->fd == context->ibv_ctx.mmap_fd) {
+			if (page->dbp_map_len == maplen) {
+				goto found;
+			}
+			fprintf(stderr,
+			        PFX "mlx4_alloc_db: Bad maplen (%" PRId64 ")\n",
+			        maplen);
+			goto out;
+		}
+	}
+
+	page = malloc(sizeof(struct mlx4_db_page));
+	if (!page) {
+		fprintf(stderr, PFX "DB alloc memory allocate failure\n");
+		goto out;
+	}
+
+	page->dbp_page_addr = (uint32_t *) mmap64((void *)0, maplen,
+	                                          (PROT_READ | PROT_WRITE),
+	                                          MAP_SHARED,
+	                                          context->ibv_ctx.mmap_fd,
+	                                          mapoffset);
+	if (page->dbp_page_addr == MAP_FAILED) {
+		fprintf(stderr, PFX
+		        "Unable to map doorbell entry: maplen:%" PRId64 ", offset:%" PRId64
+		        "\n", maplen, mapoffset);
+		free(page);
+		goto out;
+	}
+
+	page->dbp_page_addr_end = (uint32_t *)((uint8_t *) page->dbp_page_addr + maplen);
+
+	page->dbp_map_offset  = mapoffset;
+	page->dbp_map_len     = maplen;
+	page->dbp_use_cnt     = 0;
+	page->fd	      = context->ibv_ctx.mmap_fd;
+	page->next            = context->db_page_list;
+
+	if (context->db_page_list)
+		context->db_page_list->prev = page;
+
+	page->prev            = NULL;
+	context->db_page_list = page;
+
+found:
+	page->dbp_use_cnt++;
+
+
+	db = (uint32_t *) ((uint8_t *)page->dbp_page_addr + offset);
+out:
+	pthread_mutex_unlock(&context->db_list_mutex);
+
+	return db;
+}
+
+/*
+ * Dereference doorbell page mappinge associated with the specified doorbell.
+ * If this is the last doorbell that references a mapped region, then that
+ * region will be unmapped.
+ */
+void mlx4_free_db(struct mlx4_context *context,
+                  enum mlx4_db_type dummy,
+                  uint32_t *db)
+{
+	struct mlx4_db_page *page;
+
+	pthread_mutex_lock(&context->db_list_mutex);
+
+	for (page = context->db_page_list; page; page = page->next) {
+		if (db >= page->dbp_page_addr && db < page->dbp_page_addr_end) {
+			break;
+		}
+	}
+
+	if (page == NULL) {
+		fprintf(stderr, PFX "mlx4_free_db: Doorbell not mapped\n");
+		goto out;
+	}
+
+	page->dbp_use_cnt--;
+	if (page->dbp_use_cnt > 0) {
+		goto out;
+	}
+
+	munmap((void *)page->dbp_page_addr, page->dbp_map_len);
+	if (page->next) {
+		page->next->prev = page->prev;
+	}
+	if (page->prev) {
+		page->prev->next = page->next;
+	} else {
+		context->db_page_list = page->next;
+	}
+
+	free(page);
+
+out:
+	pthread_mutex_unlock(&context->db_list_mutex);
+}
+
+#else
+
+struct mlx4_db_page {
 	struct mlx4_db_page	       *prev, *next;
 	struct mlx4_buf			buf;
 	int				num_db;
@@ -152,3 +302,4 @@
 out:
 	pthread_mutex_unlock(&context->db_list_mutex);
 }
+#endif
diff -r -u /tmp/839450/libmlx4-1.0.1/src/buf.c libmlx4-1.0.1/src/buf.c
--- /tmp/839450/libmlx4-1.0.1/src/buf.c	Tue Sep  8 06:41:26 2009
+++ libmlx4-1.0.1/src/buf.c	Fri Feb 11 04:05:03 2011
@@ -78,6 +78,8 @@
 
 void mlx4_free_buf(struct mlx4_buf *buf)
 {
+#if !(defined(__SVR4) && defined(__sun))
 	ibv_dofork_range(buf->buf, buf->length);
+#endif
 	munmap(buf->buf, buf->length);
 }
diff -r -u /tmp/839450/libmlx4-1.0.1/src/mlx4.c libmlx4-1.0.1/src/mlx4.c
--- /tmp/839450/libmlx4-1.0.1/src/mlx4.c	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/src/mlx4.c	Fri Mar 11 14:05:26 2011
@@ -112,8 +112,13 @@
 	.query_port    = mlx4_query_port,
 	.alloc_pd      = mlx4_alloc_pd,
 	.dealloc_pd    = mlx4_free_pd,
+	.alloc_shpd    = mlx4_alloc_shpd,
+	.share_pd      = mlx4_share_pd,
 	.reg_mr	       = mlx4_reg_mr,
+	.reg_mr_relaxed	       = mlx4_reg_mr_relaxed,
 	.dereg_mr      = mlx4_dereg_mr,
+	.dereg_mr_relaxed      = mlx4_dereg_mr_relaxed,
+	.flush_relaxed_mr      = mlx4_flush_relaxed_mr,
 	.create_cq     = mlx4_create_cq,
 	.poll_cq       = mlx4_poll_cq,
 	.req_notify_cq = mlx4_arm_cq,
@@ -144,6 +149,11 @@
 	struct mlx4_alloc_ucontext_resp resp;
 	int				i;
 	struct ibv_device_attr		dev_attrs;
+#if defined(__SVR4) && defined(__sun)
+	pid_t                           cur_pid;
+	off64_t                         uarpg_offset;
+	uint32_t                        temp_qp_num;
+#endif
 
 	context = calloc(1, sizeof *context);
 	if (!context)
@@ -150,11 +160,29 @@
 		return NULL;
 
 	context->ibv_ctx.cmd_fd = cmd_fd;
-
+#if defined(__SVR4) && defined(__sun)
+	context->ibv_ctx.device = ibdev;
+#endif
 	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
 				&resp.ibv_resp, sizeof resp))
 		goto err_free;
 
+#if defined(__SVR4) && defined(__sun)
+	/* 
+	 * OFED expects power of two, round up here to make user table
+	 * large enough.
+	 */
+	for (temp_qp_num = 1; temp_qp_num < resp.qp_tab_size; temp_qp_num <<= 1)
+		;
+	resp.qp_tab_size = temp_qp_num;
+
+	/*
+	 * NOTE: In Solaris this value is not returned in the channel interface
+	 * opaque data and is assumed to be 2*256 by the dapl code.  We have
+	 * made the same assumption here.
+	 */
+	resp.bf_reg_size = 512;
+#endif
 	context->num_qps	= resp.qp_tab_size;
 	context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
 	context->qp_table_mask	= (1 << context->qp_table_shift) - 1;
@@ -172,20 +200,45 @@
 	for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i)
 		context->xrc_srq_table[i].refcnt = 0;
 
+#if defined(__SVR4) && defined(__sun)
+	context->db_page_list = NULL;
+#else
 	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
 		context->db_list[i] = NULL;
+#endif
 
 	pthread_mutex_init(&context->db_list_mutex, NULL);
 
+#if defined(__SVR4) && defined(__sun)
+	cur_pid = getpid();
+	uarpg_offset = (((off64_t) cur_pid << MLNX_UMAP_RSRC_TYPE_SHIFT) |
+	                MLNX_UMAP_UARPG_RSRC) * to_mdev(ibdev)->page_size;
+	context->uar = mmap64((void *)0, to_mdev(ibdev)->page_size, PROT_WRITE,
+			    MAP_SHARED, context->ibv_ctx.mmap_fd, uarpg_offset);
+#else
 	context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
 			    MAP_SHARED, cmd_fd, 0);
+#endif
 	if (context->uar == MAP_FAILED)
 		goto err_free;
 
 	if (resp.bf_reg_size) {
+#if defined(__SVR4) && defined(__sun)
+		/*
+		 * If kernel driver is supporting Blue Flame feature, map
+		 * the Blue Flame user access region as well.
+		 */
+		uarpg_offset = (((off64_t) cur_pid << MLNX_UMAP_RSRC_TYPE_SHIFT)
+		    | MLNX_UMAP_BLUEFLAMEPG_RSRC) * to_mdev(ibdev)->page_size;
+
+		context->bf_page = mmap64((void *)0, to_mdev(ibdev)->page_size,
+		    PROT_WRITE, MAP_SHARED, context->ibv_ctx.mmap_fd,
+		    uarpg_offset);
+#else
 		context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
 					PROT_WRITE, MAP_SHARED, cmd_fd,
 					to_mdev(ibdev)->page_size);
+#endif
 		if (context->bf_page == MAP_FAILED) {
 			fprintf(stderr, PFX "Warning: BlueFlame available, "
 				"but failed to mmap() BlueFlame page.\n");
@@ -214,6 +267,7 @@
 	context->max_qp_wr = dev_attrs.max_qp_wr;
 	context->max_sge = dev_attrs.max_sge;
 	context->max_cqe = dev_attrs.max_cqe;
+#ifdef HAVE_IBV_XRC_OPS
 	if (!(dev_attrs.device_cap_flags & IBV_DEVICE_XRC)) {
 		fprintf(stderr, PFX "There is a mismatch between "
 		        "the kernel and the userspace libraries: "
@@ -220,6 +274,7 @@
 			"Kernel does not support XRC. Exiting.\n");
 		goto query_free;
 	}
+#endif
 
 	return &context->ibv_ctx;
 
@@ -240,6 +295,7 @@
 	munmap(context->uar, to_mdev(ibctx->device)->page_size);
 	if (context->bf_page)
 		munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
+
 	free(context);
 }
 
diff -r -u /tmp/839450/libmlx4-1.0.1/configure.in libmlx4-1.0.1/configure.in
--- /tmp/839450/libmlx4-1.0.1/configure.in	Thu Mar 10 04:48:34 2011
+++ libmlx4-1.0.1/configure.in	Tue Mar 15 07:41:46 2011
@@ -43,11 +43,11 @@
 AC_C_CONST
 AC_CHECK_SIZEOF(long)
 AC_CHECK_MEMBER(struct ibv_context.more_ops,
-    [AC_DEFINE([HAVE_IBV_MORE_OPS], 1, [Define to 1 if more_ops is a member of ibv_context])],,
+    [AC_DEFINE([HAVE_IBV_MORE_OPS], 0, [Define to 1 if more_ops is a member of ibv_context])],,
     [#include <infiniband/verbs.h>])
-AC_CHECK_MEMBER(struct ibv_more_ops.create_xrc_srq,
-    [AC_DEFINE([HAVE_IBV_XRC_OPS], 1, [Define to 1 if have xrc ops])],,
-    [#include <infiniband/verbs.h>])
+#AC_CHECK_MEMBER(struct ibv_more_ops.create_xrc_srq,
+#    [AC_DEFINE([HAVE_IBV_XRC_OPS], 1, [Define to 1 if have xrc ops])],,
+#    [#include <infiniband/verbs.h>])
 
 dnl Checks for library functions
 AC_CHECK_FUNC(ibv_read_sysfs_file, [],
@@ -80,6 +80,5 @@
 fi
 AC_SUBST(MLX4_VERSION_SCRIPT)
 
-SHAVE_INIT([], [enable])
 AC_CONFIG_FILES([Makefile libmlx4.spec shave shave-libtool])
 AC_OUTPUT
diff -r -u /tmp/839450/libmlx4-1.0.1/libmlx4.spec.in libmlx4-1.0.1/libmlx4.spec.in
--- /tmp/839450/libmlx4-1.0.1/libmlx4.spec.in	Thu Mar 10 00:23:34 2011
+++ libmlx4-1.0.1/libmlx4.spec.in	Tue Mar 15 07:43:54 2011
@@ -6,7 +6,7 @@
 Group: System Environment/Libraries
 License: GPLv2 or BSD
 Url: http://openfabrics.org/
-Source: http://openfabrics.org/downloads/libmlx4/libmlx4-1.0.1.tar.gz
+Source: http://openfabrics.org/downloads/libmlx4-1.0.1.tar.gz
 BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 
 BuildRequires: libibverbs-devel >= 1.1-0.1.rc2
diff -r -u /tmp/839450/libmlx4-1.0.1/configure libmlx4-1.0.1/configure
--- /tmp/839450/libmlx4-1.0.1/configure	Thu Mar 10 04:48:41 2011
+++ libmlx4-1.0.1/configure	Tue Mar 15 07:35:49 2011
@@ -3899,13 +3899,13 @@
   CFLAGS=$ac_save_CFLAGS
 elif test $ac_cv_prog_cc_g = yes; then
   if test "$GCC" = yes; then
-    CFLAGS="-g -O2"
+    CFLAGS="-g -O3"
   else
     CFLAGS="-g"
   fi
 else
   if test "$GCC" = yes; then
-    CFLAGS="-O2"
+    CFLAGS="-O3"
   else
     CFLAGS=
   fi
@@ -8890,6 +8890,7 @@
 	;;
       esac
       link_all_deplibs=yes
+	hardcode_libdir_flag_spec=
       ;;
 
     sunos4*)
@@ -11113,13 +11114,13 @@
   CFLAGS=$ac_save_CFLAGS
 elif test $ac_cv_prog_cc_g = yes; then
   if test "$GCC" = yes; then
-    CFLAGS="-g -O2"
+    CFLAGS="-g -O3"
   else
     CFLAGS="-g"
   fi
 else
   if test "$GCC" = yes; then
-    CFLAGS="-O2"
+    CFLAGS="-O3"
   else
     CFLAGS=
   fi
@@ -11654,11 +11655,11 @@
 
 ac_fn_c_check_member "$LINENO" "struct ibv_more_ops" "create_xrc_srq" "ac_cv_member_struct_ibv_more_ops_create_xrc_srq" "#include <infiniband/verbs.h>
 "
-if test "x$ac_cv_member_struct_ibv_more_ops_create_xrc_srq" = x""yes; then :
+#if test "x$ac_cv_member_struct_ibv_more_ops_create_xrc_srq" = x""yes; then :
 
-$as_echo "#define HAVE_IBV_XRC_OPS 1" >>confdefs.h
+#$as_echo "#define HAVE_IBV_XRC_OPS 1" >>confdefs.h
 
-fi
+#fi
 
 
 ac_fn_c_check_func "$LINENO" "ibv_read_sysfs_file" "ac_cv_func_ibv_read_sysfs_file"
diff -r -u /tmp/839450/libmlx4-1.0.1/Makefile.in libmlx4-1.0.1/Makefile.in
--- /tmp/839450/libmlx4-1.0.1/Makefile.in	Thu Mar 10 04:48:40 2011
+++ libmlx4-1.0.1/Makefile.in	Tue Mar 15 07:48:16 2011
@@ -264,7 +264,7 @@
 @HAVE_IBV_DEVICE_LIBRARY_EXTENSION_TRUE@src_libmlx4_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \
 @HAVE_IBV_DEVICE_LIBRARY_EXTENSION_TRUE@        $(mlx4_version_script)
 
-@HAVE_IBV_DEVICE_LIBRARY_EXTENSION_TRUE@mlx4confdir = $(sysconfdir)/libibverbs.d
+@HAVE_IBV_DEVICE_LIBRARY_EXTENSION_TRUE@mlx4confdir = $(datadir)/libibverbs.d
 @HAVE_IBV_DEVICE_LIBRARY_EXTENSION_TRUE@mlx4conf_DATA = mlx4.driver
 @HAVE_IBV_DEVICE_LIBRARY_EXTENSION_FALSE@mlx4libdir = $(libdir)/infiniband
 @HAVE_IBV_DEVICE_LIBRARY_EXTENSION_FALSE@mlx4lib_LTLIBRARIES = src/mlx4.la
@@ -345,8 +345,8 @@
 	  else :; fi; \
 	done; \
 	test -z "$$list2" || { \
-	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
-	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) -m 755 $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) -m 755 $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
 	}
 
 uninstall-libLTLIBRARIES:
@@ -376,8 +376,8 @@
 	  else :; fi; \
 	done; \
 	test -z "$$list2" || { \
-	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(mlx4libdir)'"; \
-	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(mlx4libdir)"; \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) -m755 $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(mlx4libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) -m755 $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(mlx4libdir)"; \
 	}
 
 uninstall-mlx4libLTLIBRARIES: