6773181 panic due to assertion failure from ibmf_saa_impl_hca_detach()
authorRajkumar Sivaprakasam <Rajkumar.Sivaprakasam@Sun.COM>
Thu, 05 Feb 2009 10:03:55 -0800
changeset 8695 115e6d42744b
parent 8694 78d64822f8bb
child 8696 c76030436981
6773181 panic due to assertion failure from ibmf_saa_impl_hca_detach() 6800017 HCA DR requires RCM script for SDP 6773886 Panic in ibnex_name_child() function 6784821 typo from 'cfgadm configure' 6763923 Calling 'cfgadm -yx update_pkey_tbls' with a invalid ap_id doesn't return error 6751194 rpcib: unable to unconfigure InfiniBand HCA cards 6794326 ibcm:ibcm_hca_detach fails due to transient connections 6794307 ibnex_ioc_list not protected properly 6778827 rpcib: Panic due to invalid mutex reference
usr/src/cmd/rcm_daemon/Makefile.com
usr/src/cmd/rcm_daemon/common/SUNW,ibsdpu.sh
usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.c
usr/src/pkgdefs/SUNWibsdpu/prototype_com
usr/src/uts/common/fs/nfs/nfs_server.c
usr/src/uts/common/io/ib/ibnex/ibnex.c
usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c
usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c
usr/src/uts/common/rpc/clnt_rdma.c
usr/src/uts/common/rpc/ib.h
usr/src/uts/common/rpc/rdma_subr.c
usr/src/uts/common/rpc/rpc_rdma.h
usr/src/uts/common/rpc/rpcib.c
usr/src/uts/common/rpc/rpcmod.c
usr/src/uts/common/rpc/svc.c
usr/src/uts/common/rpc/svc.h
usr/src/uts/common/rpc/svc_rdma.c
usr/src/uts/common/sys/ib/ibnex/ibnex.h
--- a/usr/src/cmd/rcm_daemon/Makefile.com	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/cmd/rcm_daemon/Makefile.com	Thu Feb 05 10:03:55 2009 -0800
@@ -64,6 +64,8 @@
 
 sparc_PERL_SCRIPT_SRC = SUNW,vdevices.pl
 
+COMMON_SHELL_SCRIPT_SRC = SUNW,ibsdpu.sh
+
 COMMON_MOD_OBJ = \
 	filesys_rcm.o \
 	dump_rcm.o \
@@ -135,7 +137,8 @@
 POFILE = prcm_daemon.po
 
 PERL_SCRIPTS = $(COMMON_PERL_SRC) $($(MACH)_PERL_SCRIPT_SRC)
-RCM_SCRIPTS = $(PERL_SCRIPTS)
+SHELL_SCRIPTS = $(COMMON_SHELL_SCRIPT_SRC)
+RCM_SCRIPTS = $(PERL_SCRIPTS) $(SHELL_SCRIPTS)
 
 # install specifics
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/rcm_daemon/common/SUNW,ibsdpu.sh	Thu Feb 05 10:03:55 2009 -0800
@@ -0,0 +1,109 @@
+#!/sbin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# RCM script to inform the need to run 'sdpadm disable' before removing the
+# last IB HCA, when SDP is enabled in the system.
+#
+
+rcm_script_version=1
+rcm_script_func_info="SDP (un)configuration rcm script"
+rcm_cmd_timeout=10
+rcm_resource_name=/devices/ib/sdpib@0:sdpib
+
+do_scriptinfo()
+{
+	printf "rcm_script_version=%d\n" $rcm_script_version;
+	printf "rcm_script_func_info=$rcm_script_func_info\n";
+	printf "rcm_cmd_timeout=%d\n" $rcm_cmd_timeout;
+	exit 0;
+}
+
+do_register()
+{
+	printf "rcm_resource_name=%s\n" $rcm_resource_name;
+	exit 0;
+}
+
+do_resourceinfo()
+{
+	if [ x"$1" = x"/devices/ib/sdpib@0:sdpib" ]
+	then
+		printf "rcm_resource_usage_info=SDP IB device 0\n";
+		exit 0;
+	else
+		printf "rcm_failure_reason=Unknown SDP device\n";
+		exit 3;
+	fi
+}
+
+do_queryremove()
+{
+	status=`sdpadm status`
+	ret=$?
+
+	if [ $ret -eq 0 ] && [ "$status" != "SDP is Disabled" ]
+	then
+		printf "rcm_log_warn=SDP is enabled. Please run 'sdpadm disable' command "
+		printf "before un-configuring IB HCA/SDP\n";
+		printf "rcm_failure_reason=SDP is enabled on this system\n";
+		exit 3;
+	elif [ $ret -ne 0 ]
+	then
+		printf "rcm_log_warn='sdpadm status' command failed. Could not find the "
+		printf "status of SDP\n";
+		printf "rcm_failure_reason='sdpadm status' command failed.\n";
+		exit 1;
+	fi
+	exit 0;
+}
+
+do_preremove()
+{
+	exit 0;
+}
+
+do_undoremove()
+{
+	exit 0;
+}
+
+do_postremove()
+{
+	exit 0;
+}
+
+case "$1" in 
+	scriptinfo) do_scriptinfo;;
+	register) do_register;;
+	resourceinfo) do_resourceinfo $2;;
+	queryremove) do_queryremove $2;;
+	preremove) do_preremove $2;;
+	undoremove) do_undoremove $2;;
+	postremove) do_postremove $2;;
+	*) echo Unknown option $1;;
+esac
--- a/usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/lib/cfgadm_plugins/ib/common/cfga_ib.c	Thu Feb 05 10:03:55 2009 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -919,11 +919,6 @@
 
 		rv = CFGA_IB_OK;	/* Other status don't matter */
 
-		if (!ib_confirm(confp, IB_CONFIRM1)) {
-			ib_cleanup_after_devctl_cmd(hdl, nvl);
-			return (CFGA_NACK);
-		}
-
 		if (devctl_ap_configure(hdl, nvl) != 0) {
 			DPRINTF("cfga_change_state: devctl_ap_configure "
 			    "failed. errno: %d\n", errno);
@@ -1284,7 +1279,7 @@
 		}
 
 		/* CHECK: Only supported on fabric ap_ids */
-		if (fab_apid == NULL) {
+		if (fab_apid == NULL || strcmp(fab_apid, IBNEX_FABRIC) != 0) {
 			DPRINTF("cfga_private_func: fabric apid needed\n");
 			return (ib_err_msg(errstring, CFGA_IB_INVALID_OP_ERR,
 			    ap_id, errno));
--- a/usr/src/pkgdefs/SUNWibsdpu/prototype_com	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/pkgdefs/SUNWibsdpu/prototype_com	Thu Feb 05 10:03:55 2009 -0800
@@ -19,10 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
 #
 # This required package information file contains a list of package contents.
 # The 'pkgmk' command uses this file to identify the contents of a package
@@ -45,4 +44,8 @@
 #
 d none usr 0755 root sys
 d none usr/sbin 0755 root bin
+d none usr/lib 0755 root bin
+d none usr/lib/rcm 0755 root bin
+d none usr/lib/rcm/scripts 0755 root bin
 f none usr/sbin/sdpadm 0555 root bin
+f none usr/lib/rcm/scripts/SUNW,ibsdpu.sh 0555 root bin
--- a/usr/src/uts/common/fs/nfs/nfs_server.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/fs/nfs/nfs_server.c	Thu Feb 05 10:03:55 2009 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -70,6 +70,7 @@
 #include <rpc/auth_des.h>
 #include <rpc/svc.h>
 #include <rpc/xdr.h>
+#include <rpc/rpc_rdma.h>
 
 #include <nfs/nfs.h>
 #include <nfs/export.h>
@@ -552,6 +553,8 @@
 {
 	int error;
 	rdma_xprt_group_t started_rdma_xprts;
+	rdma_stat stat;
+	int svc_state = 0;
 
 	/* Double check the vers min/max ranges */
 	if ((rsa->nfs_versmin > rsa->nfs_versmax) ||
@@ -580,15 +583,52 @@
 	started_rdma_xprts.rtg_count = 0;
 	started_rdma_xprts.rtg_listhead = NULL;
 	started_rdma_xprts.rtg_poolid = rsa->poolid;
+
+restart:
 	error = svc_rdma_kcreate(rsa->netid, &nfs_sct_rdma, rsa->poolid,
 	    &started_rdma_xprts);
 
-	if (error == 0) {
-		mutex_enter(&rdma_wait_mutex);
-		if (!cv_wait_sig(&rdma_wait_cv, &rdma_wait_mutex)) {
-			rdma_stop(started_rdma_xprts);
+	svc_state = !error;
+
+	while (!error) {
+
+		/*
+		 * wait till either interrupted by a signal on
+		 * nfs service stop/restart or signalled by a
+		 * rdma plugin attach/detatch.
+		 */
+
+		stat = rdma_kwait();
+
+		/*
+		 * stop services if running -- either on a HCA detach event
+		 * or if the nfs service is stopped/restarted.
+		 */
+
+		if ((stat == RDMA_HCA_DETACH || stat == RDMA_INTR) &&
+		    svc_state) {
+			rdma_stop(&started_rdma_xprts);
+			svc_state = 0;
 		}
-		mutex_exit(&rdma_wait_mutex);
+
+		/*
+		 * nfs service stop/restart, break out of the
+		 * wait loop and return;
+		 */
+		if (stat == RDMA_INTR)
+			return (0);
+
+		/*
+		 * restart stopped services on a HCA attach event
+		 * (if not already running)
+		 */
+
+		if ((stat == RDMA_HCA_ATTACH) && (svc_state == 0))
+			goto restart;
+
+		/*
+		 * loop until a nfs service stop/restart
+		 */
 	}
 
 	return (error);
--- a/usr/src/uts/common/io/ib/ibnex/ibnex.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/io/ib/ibnex/ibnex.c	Thu Feb 05 10:03:55 2009 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -407,10 +407,12 @@
 	IBTF_DPRINTF_L4("ibnex", "\t_init");
 	mutex_init(&ibnex.ibnex_mutex, NULL, MUTEX_DRIVER, NULL);
 	cv_init(&ibnex.ibnex_reprobe_cv, NULL, CV_DRIVER, NULL);
+	cv_init(&ibnex.ibnex_ioc_list_cv, NULL, CV_DRIVER, NULL);
 	if ((error = mod_install(&modlinkage)) != 0) {
 		IBTF_DPRINTF_L2("ibnex", "\t_init: mod_install failed");
 		mutex_destroy(&ibnex.ibnex_mutex);
 		cv_destroy(&ibnex.ibnex_reprobe_cv);
+		cv_destroy(&ibnex.ibnex_ioc_list_cv);
 	} else {
 		ibdm_ibnex_register_callback(ibnex_dm_callback);
 		ibtl_ibnex_register_callback(ibnex_ibtl_callback);
@@ -436,6 +438,7 @@
 	ibtl_ibnex_unregister_callback();
 	mutex_destroy(&ibnex.ibnex_mutex);
 	cv_destroy(&ibnex.ibnex_reprobe_cv);
+	cv_destroy(&ibnex.ibnex_ioc_list_cv);
 	return (0);
 }
 
@@ -1301,34 +1304,43 @@
 		 * ibdm and configure all children.
 		 */
 		if (parent == ibnex.ibnex_dip) {
-			ibdm_ioc_info_t	*ioc_list;
-
+			ibdm_ioc_info_t	*ioc_list, *new_ioc_list;
+
+			mutex_enter(&ibnex.ibnex_mutex);
+			while (ibnex.ibnex_ioc_list_state !=
+			    IBNEX_IOC_LIST_READY) {
+				cv_wait(&ibnex.ibnex_ioc_list_cv,
+				    &ibnex.ibnex_mutex);
+			}
+			ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_RENEW;
+			mutex_exit(&ibnex.ibnex_mutex);
+			/* Enumerate all the IOC's */
+			ibdm_ibnex_port_settle_wait(0,
+			    ibnex_port_settling_time);
+
+			new_ioc_list = ibdm_ibnex_get_ioc_list(
+			    IBDM_IBNEX_NORMAL_PROBE);
+			IBTF_DPRINTF_L4("ibnex",
+			    "\tbus_config: alloc ioc_list %p", new_ioc_list);
 			/*
 			 * Optimize the calls for each BUS_CONFIG_ALL request
 			 * to the IB Nexus dip. This is currently done for
 			 * each PDIP.
 			 */
-			if (ibnex.ibnex_ioc_list) {
+			mutex_enter(&ibnex.ibnex_mutex);
+			ioc_list = ibnex.ibnex_ioc_list;
+			ibnex.ibnex_ioc_list = new_ioc_list;
+			ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_READY;
+			cv_broadcast(&ibnex.ibnex_ioc_list_cv);
+			mutex_exit(&ibnex.ibnex_mutex);
+
+			if (ioc_list) {
 				IBTF_DPRINTF_L4("ibnex",
 				    "\tbus_config: freeing ioc_list %p",
-				    ibnex.ibnex_ioc_list);
-				ibdm_ibnex_free_ioc_list(ibnex.ibnex_ioc_list);
-				mutex_enter(&ibnex.ibnex_mutex);
-				ibnex.ibnex_ioc_list = NULL;
-				mutex_exit(&ibnex.ibnex_mutex);
+				    ioc_list);
+				ibdm_ibnex_free_ioc_list(ioc_list);
 			}
 
-			/* Enumerate all the IOC's */
-			ibdm_ibnex_port_settle_wait(0,
-			    ibnex_port_settling_time);
-
-			ioc_list = ibdm_ibnex_get_ioc_list(
-			    IBDM_IBNEX_NORMAL_PROBE);
-			IBTF_DPRINTF_L4("ibnex",
-			    "\tbus_config: alloc ioc_list %p", ioc_list);
-			mutex_enter(&ibnex.ibnex_mutex);
-			ibnex.ibnex_ioc_list = ioc_list;
-			mutex_exit(&ibnex.ibnex_mutex);
 
 			ret = mdi_vhci_bus_config(parent,
 			    flag, op, devname, child, NULL);
@@ -1524,14 +1536,18 @@
 
 	ibnex_pseudo_initnodes();
 
+	mutex_enter(&ibnex.ibnex_mutex);
+	while (ibnex.ibnex_ioc_list_state != IBNEX_IOC_LIST_READY) {
+		cv_wait(&ibnex.ibnex_ioc_list_cv, &ibnex.ibnex_mutex);
+	}
+	ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_ACCESS;
 	ioc_list = ibnex.ibnex_ioc_list;
-
-	mutex_enter(&ibnex.ibnex_mutex);
-
 	while (ioc_list) {
 		(void) ibnex_ioc_config_from_pdip(ioc_list, parent, 0);
 		ioc_list = ioc_list->ioc_next;
 	}
+	ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_READY;
+	cv_broadcast(&ibnex.ibnex_ioc_list_cv);
 
 	/* Config IBTF Pseudo clients */
 	ibnex_config_pseudo_all(parent);
@@ -3188,6 +3204,7 @@
 	ibnex_node_data_t	*node_data;
 	ibnex_port_node_t	*port_node;
 	char devname[MAXNAMELEN];
+	int 			cdip_allocated = 0;
 
 	ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex));
 
@@ -3265,9 +3282,7 @@
 			IBTF_DPRINTF_L2("ibnex", "\tcommsvc_initnode:"
 			    "\tInvalid Node type");
 			*rval = IBNEX_FAILURE;
-			mutex_exit(&ibnex.ibnex_mutex);
 			ibnex_delete_port_node_data(node_data);
-			mutex_enter(&ibnex.ibnex_mutex);
 			return (NULL);
 	}
 
@@ -3277,11 +3292,14 @@
 			node_data->node_data.port_node.port_pdip = parent;
 			node_data->node_state = IBNEX_CFGADM_CONFIGURED;
 			ddi_set_parent_data(cdip, node_data);
+			IBTF_DPRINTF_L4("ibnex", "\tcommsvc_initnode: found "
+			    "attached cdip 0x%p for devname %s", cdip, devname);
 			return (cdip);
 		}
 	} else {
 		ndi_devi_alloc_sleep(parent,
 		    IBNEX_IBPORT_CNAME, (pnode_t)DEVI_SID_NODEID, &cdip);
+		cdip_allocated = 1;
 	}
 
 	node_data->node_dip	= cdip;
@@ -3301,11 +3319,16 @@
 			node_data->node_data.port_node.port_pdip = parent;
 			return (cdip);
 		}
+		IBTF_DPRINTF_L4("ibnex", "\tcommsvc_initnode: BIND/ONLINE "
+		    "of cdip 0x%p for devname %s and flag %d failed", cdip,
+		    devname, flag);
 	}
 
 	*rval = IBNEX_FAILURE;
-	ibnex_delete_port_node_data(node_data);
-	(void) ndi_devi_free(cdip);
+	node_data->node_dip = NULL;
+	ddi_set_parent_data(cdip, NULL);
+	if (cdip_allocated)
+		(void) ndi_devi_free(cdip);
 	mutex_enter(&ibnex.ibnex_mutex);
 	IBTF_DPRINTF_L4("ibnex", "\tcommsvc_initnode: failure exit");
 	return (NULL);
@@ -3562,7 +3585,6 @@
 static void
 ibnex_delete_port_node_data(ibnex_node_data_t *node)
 {
-	mutex_enter(&ibnex.ibnex_mutex);
 	if ((node->node_next == NULL) && (node->node_prev == NULL))
 		ibnex.ibnex_port_node_head = NULL;
 	else if (node->node_next == NULL)
@@ -3574,7 +3596,6 @@
 		node->node_prev->node_next = node->node_next;
 		node->node_next->node_prev = node->node_prev;
 	}
-	mutex_exit(&ibnex.ibnex_mutex);
 	kmem_free(node, sizeof (ibnex_node_data_t));
 }
 
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c	Thu Feb 05 10:03:55 2009 -0800
@@ -261,8 +261,8 @@
 ib_time_t	ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
 
 ib_time_t	ibcm_max_sidr_rep_store_time = 18;
-uint32_t	ibcm_wait_for_acc_cnt_timeout = 500000;	/* 500 ms */
-uint32_t	ibcm_wait_for_res_cnt_timeout = 500000;	/* 500 ms */
+uint32_t	ibcm_wait_for_acc_cnt_timeout = 2000000;	/* 2 sec */
+uint32_t	ibcm_wait_for_res_cnt_timeout = 2000000;	/* 2 sec */
 
 ib_time_t	ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
 ib_time_t	ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
@@ -933,7 +933,7 @@
 	 */
 	hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
 
-	/* wait on response CV to 500mS */
+	/* wait on response CV */
 	absolute_time = ddi_get_lbolt() +
 	    drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
 
@@ -944,15 +944,10 @@
 
 	if (hcap->hca_acc_cnt != 0) {
 		/* We got a timeout */
-#ifdef DEBUG
-		if (ibcm_test_mode > 0)
-			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
-			    "abort due to timeout on acc_cnt %u",
-			    hcap->hca_acc_cnt);
-		else
-#endif
-			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
-			    " to timeout on acc_cnt %u", hcap->hca_acc_cnt);
+		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
+		    " to timeout on hca_acc_cnt %u, \n Some CM Clients are "
+		    "still active, looks like we need to wait some more time "
+		    "(ibcm_wait_for_acc_cnt_timeout).", hcap->hca_acc_cnt);
 		hcap->hca_state = IBCM_HCA_ACTIVE;
 		return (IBCM_FAILURE);
 	}
@@ -989,10 +984,10 @@
 	 * All these stateps must be short lived ones, waiting to be cleaned
 	 * up after some timeout value, based on the current state.
 	 */
-	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
+	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
 	    hcap->hca_guid, hcap->hca_res_cnt);
 
-	/* wait on response CV to 500mS */
+	/* wait on response CV */
 	absolute_time = ddi_get_lbolt() +
 	    drv_usectohz(ibcm_wait_for_res_cnt_timeout);
 
@@ -1003,15 +998,11 @@
 
 	if (hcap->hca_res_cnt != 0) {
 		/* We got a timeout waiting for hca_res_cnt to become 0 */
-#ifdef DEBUG
-		if (ibcm_test_mode > 0)
-			IBTF_DPRINTF_L1(cmlog, "ibcm_hca_detach: Unexpected "
-			    "abort due to timeout on res_cnt %d",
-			    hcap->hca_res_cnt);
-		else
-#endif
-			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
-			    " to timeout on res_cnt %d", hcap->hca_res_cnt);
+		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
+		    " to timeout on res_cnt %d, \n Some CM connections are "
+		    "still in transient state, looks like we need to wait "
+		    "some more time (ibcm_wait_for_res_cnt_timeout).",
+		    hcap->hca_res_cnt);
 		hcap->hca_state = IBCM_HCA_ACTIVE;
 		return (IBCM_FAILURE);
 	}
--- a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c	Thu Feb 05 10:03:55 2009 -0800
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/ib/mgt/ibmf/ibmf_saa_impl.h>
 #include <sys/ib/mgt/ibmf/ibmf_saa_utils.h>
 
@@ -72,7 +70,7 @@
     ibt_hca_portinfo_t *portinfop);
 static void ibmf_saa_impl_update_sa_address_info(saa_port_t *saa_portp,
     ibmf_msg_t *msgp);
-static void ibmf_saa_impl_ibmf_unreg(saa_port_t *saa_portp);
+static int ibmf_saa_impl_ibmf_unreg(saa_port_t *saa_portp);
 
 int	ibmf_saa_max_wait_time = IBMF_SAA_MAX_WAIT_TIME_IN_SECS;
 int	ibmf_saa_trans_wait_time = IBMF_SAA_TRANS_WAIT_TIME_IN_SECS;
@@ -164,6 +162,7 @@
 {
 	int		ret = 0;
 	saa_port_t	*saa_portp;
+	saa_port_t	*next;
 
 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_saa_impl_fini_start,
 	    IBMF_TNF_TRACE, "", "ibmf_saa_impl_fini() enter\n");
@@ -231,8 +230,10 @@
 	 * no more clients nor pending transaction:
 	 * unregister ibmf and destroy port entries
 	 */
-	saa_portp = saa_statep->saa_port_list;
-	while (saa_portp != NULL) {
+	while (saa_statep->saa_port_list != NULL) {
+
+		saa_portp = saa_statep->saa_port_list;
+		next = saa_portp->next;
 
 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
 		    ibmf_saa_impl_fini, IBMF_TNF_TRACE, "",
@@ -240,8 +241,6 @@
 		    tnf_string, msg, "deinitializing port",
 		    tnf_opaque, port_guid, saa_portp->saa_pt_port_guid);
 
-		saa_statep->saa_port_list = saa_portp->next;
-
 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*saa_portp))
 
 		mutex_enter(&saa_portp->saa_pt_mutex);
@@ -251,13 +250,17 @@
 
 			mutex_exit(&saa_portp->saa_pt_mutex);
 
-			ibmf_saa_impl_ibmf_unreg(saa_portp);
+			if (ibmf_saa_impl_ibmf_unreg(saa_portp)
+			    != IBMF_SUCCESS) {
+				ret = EBUSY;
+				goto bail;
+			}
 		} else
-
-		mutex_exit(&saa_portp->saa_pt_mutex);
+			mutex_exit(&saa_portp->saa_pt_mutex);
 
 		ibmf_saa_impl_destroy_port(saa_portp);
-		saa_portp = saa_statep->saa_port_list;
+
+		saa_statep->saa_port_list = next;
 	}
 
 	taskq_destroy(saa_statep->saa_event_taskq);
@@ -515,14 +518,12 @@
 
 	if (status != IBMF_SUCCESS) {
 
-		mutex_enter(
-			&saa_portp->saa_pt_kstat_mutex);
+		mutex_enter(&saa_portp->saa_pt_kstat_mutex);
 
 		IBMF_SAA_ADD32_KSTATS(saa_portp,
 		    clients_reg_failed, 1);
 
-		mutex_exit(
-			&saa_portp->saa_pt_kstat_mutex);
+		mutex_exit(&saa_portp->saa_pt_kstat_mutex);
 
 		/* decrementing refcount is last thing we do on entry */
 
@@ -828,6 +829,98 @@
 	    IBMF_TNF_TRACE, "", "ibmf_saa_impl_register_failed() exit\n");
 }
 
+static int
+ibmf_saa_impl_setup_qp_async_cb(saa_port_t *saa_portp, int setup_async_cb_only)
+{
+	int		status;
+	int		unreg_status;
+	ib_pkey_t	p_key;
+	ib_qkey_t	q_key;
+	uint8_t		portnum;
+	boolean_t	qp_alloced = B_FALSE;
+
+	if (setup_async_cb_only == 0) {
+
+		/* allocate a qp through ibmf */
+		status = ibmf_alloc_qp(saa_portp->saa_pt_ibmf_handle,
+		    IB_PKEY_DEFAULT_LIMITED, IB_GSI_QKEY,
+		    IBMF_ALT_QP_MAD_RMPP, &saa_portp->saa_pt_qp_handle);
+
+		if (status != IBMF_SUCCESS) {
+
+			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
+			    ibmf_saa_impl_setup_qp_async_cb, IBMF_TNF_ERROR, "",
+			    "ibmf_saa_impl_setup_qp_async_cb: %s, "
+			    "ibmf_status = %d\n",
+			    tnf_string, msg, "Cannot alloc qp with ibmf",
+			    tnf_int, status, status);
+
+			return (status);
+		}
+
+		qp_alloced = B_TRUE;
+
+		/*
+		 * query the queue pair number; we will need it to unsubscribe
+		 * from notice reports
+		 */
+		status = ibmf_query_qp(saa_portp->saa_pt_ibmf_handle,
+		    saa_portp->saa_pt_qp_handle, &saa_portp->saa_pt_qpn,
+		    &p_key, &q_key, &portnum, 0);
+
+		if (status != IBMF_SUCCESS) {
+
+			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
+			    ibmf_saa_impl_setup_qp_async_cb, IBMF_TNF_ERROR, "",
+			    "ibmf_saa_impl_setup_qp_async_cb: %s, "
+			    "ibmf_status = %d\n",
+			    tnf_string, msg,
+			    "Cannot query alt qp to get qp num",
+			    tnf_int, status, status);
+
+			goto bail;
+		}
+	}
+
+	/*
+	 * core ibmf is taking advantage of the fact that saa_portp is our
+	 * callback arg. If this changes, the code in ibmf_recv would need to
+	 * change as well
+	 */
+	status = ibmf_setup_async_cb(saa_portp->saa_pt_ibmf_handle,
+	    saa_portp->saa_pt_qp_handle, ibmf_saa_report_cb, saa_portp, 0);
+	if (status != IBMF_SUCCESS) {
+
+		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
+		    ibmf_saa_impl_setup_qp_async_cb, IBMF_TNF_ERROR, "",
+		    "ibmf_saa_impl_setup_qp_async_cb: %s, ibmf_status = %d\n",
+		    tnf_string, msg, "Cannot register async cb with ibmf",
+		    tnf_int, status, status);
+
+		goto bail;
+	}
+
+	return (IBMF_SUCCESS);
+
+bail:
+	if (qp_alloced == B_TRUE) {
+		/* free alternate qp */
+		unreg_status = ibmf_free_qp(saa_portp->saa_pt_ibmf_handle,
+		    &saa_portp->saa_pt_qp_handle, 0);
+		if (unreg_status != IBMF_SUCCESS) {
+
+			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
+			    ibmf_saa_impl_setup_qp_async_cb, IBMF_TNF_ERROR, "",
+			    "ibmf_saa_impl_setup_qp_async_cb: %s, ibmf_status ="
+			    " %d\n", tnf_string, msg,
+			    "Cannot free alternate queue pair with ibmf",
+			    tnf_int, unreg_status, unreg_status);
+		}
+	}
+
+	return (status);
+}
+
 /*
  * ibmf_saa_impl_register_port:
  */
@@ -843,12 +936,8 @@
 	ibt_hca_portinfo_t *port_info_list = NULL;
 	uint_t		port_count	= 0;
 	uint_t		port_size	= 0;
-	ib_pkey_t	p_key;
-	ib_qkey_t	q_key;
-	uint8_t		portnum;
 	int		ihca, iport;
 	ib_guid_t	port_guid;
-	boolean_t	qp_alloced = B_FALSE;
 	boolean_t	ibmf_reg = B_FALSE;
 
 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
@@ -990,79 +1079,10 @@
 
 	ibmf_reg = B_TRUE;
 
-	/* allocate a qp through ibmf */
-	status = ibmf_alloc_qp(saa_portp->saa_pt_ibmf_handle,
-	    IB_PKEY_DEFAULT_LIMITED, IB_GSI_QKEY, IBMF_ALT_QP_MAD_RMPP,
-	    &saa_portp->saa_pt_qp_handle);
-
-	if (status != IBMF_SUCCESS) {
-
-		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
-		    ibmf_saa_impl_register_port, IBMF_TNF_ERROR, "",
-		    "ibmf_saa_impl_register_port: %s, ibmf_status = %d\n",
-		    tnf_string, msg, "Cannot alloc qp with ibmf",
-		    tnf_int, status, status);
-
-		goto bail;
-	}
-
-	qp_alloced = B_TRUE;
-
-	/*
-	 * query the queue pair number; we will need it to unsubscribe from
-	 * notice reports
-	 */
-	status = ibmf_query_qp(saa_portp->saa_pt_ibmf_handle,
-	    saa_portp->saa_pt_qp_handle, &saa_portp->saa_pt_qpn, &p_key, &q_key,
-	    &portnum, 0);
-	if (status != IBMF_SUCCESS) {
-
-		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
-		    ibmf_saa_impl_register_port, IBMF_TNF_ERROR, "",
-		    "ibmf_saa_impl_register_port: %s, ibmf_status = %d\n",
-		    tnf_string, msg, "Cannot query alt qp to get qp num",
-		    tnf_int, status, status);
-
-		goto bail;
-	}
-
-	/*
-	 * core ibmf is taking advantage of the fact that saa_portp is our
-	 * callback arg. If this changes, the code in ibmf_recv would need to
-	 * change as well
-	 */
-	status = ibmf_setup_async_cb(saa_portp->saa_pt_ibmf_handle,
-	    saa_portp->saa_pt_qp_handle, ibmf_saa_report_cb, saa_portp, 0);
-	if (status != IBMF_SUCCESS) {
-
-		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
-		    ibmf_saa_impl_register_port, IBMF_TNF_ERROR, "",
-		    "ibmf_saa_impl_register_port: %s, ibmf_status = %d\n",
-		    tnf_string, msg, "Cannot register async cb with ibmf",
-		    tnf_int, status, status);
-
-		goto bail;
-	}
-
-	return (IBMF_SUCCESS);
+	if (ibmf_saa_impl_setup_qp_async_cb(saa_portp, 0) == IBMF_SUCCESS)
+		return (IBMF_SUCCESS);
 
 bail:
-	if (qp_alloced == B_TRUE) {
-
-		/* free alternate qp */
-		unreg_status = ibmf_free_qp(saa_portp->saa_pt_ibmf_handle,
-		    &saa_portp->saa_pt_qp_handle, 0);
-		if (unreg_status != IBMF_SUCCESS) {
-
-			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
-			    ibmf_saa_impl_register_port, IBMF_TNF_ERROR, "",
-			    "ibmf_saa_impl_register_port: %s, ibmf_status ="
-			    " %d\n", tnf_string, msg,
-			    "Cannot free alternate queue pair with ibmf",
-			    tnf_int, unreg_status, unreg_status);
-		}
-	}
-
 	if (ibmf_reg == B_TRUE) {
 		/* unregister from ibmf */
 		unreg_status = ibmf_unregister(
@@ -1458,12 +1478,12 @@
 		    (sleep_flag == B_TRUE)) {
 			if (sa_is_redirected == B_TRUE) {
 				ibmf_status = ibmf_saa_impl_revert_to_qp1(
-					saa_portp, msgp, ibmf_callback,
-					    ibmf_callback_arg, transport_flags);
+				    saa_portp, msgp, ibmf_callback,
+				    ibmf_callback_arg, transport_flags);
 			} else {
 				ibmf_status = ibmf_saa_impl_new_smlid_retry(
-					saa_portp, msgp, ibmf_callback,
-					    ibmf_callback_arg, transport_flags);
+				    saa_portp, msgp, ibmf_callback,
+				    ibmf_callback_arg, transport_flags);
 			}
 		}
 
@@ -1524,7 +1544,7 @@
 		 */
 		if (mad_status == MAD_STATUS_BUSY)
 			delay(drv_usectohz(
-				IBMF_SAA_BUSY_RETRY_SLEEP_SECS * 1000000));
+			    IBMF_SAA_BUSY_RETRY_SLEEP_SECS * 1000000));
 	}
 
 	if (ibmf_status != IBMF_SUCCESS) {
@@ -2530,8 +2550,23 @@
 
 	mutex_exit(&saa_portp->saa_pt_mutex);
 
-	if (must_unreg == B_TRUE)
-		ibmf_saa_impl_ibmf_unreg(saa_portp);
+	if (must_unreg == B_TRUE) {
+		if (ibmf_saa_impl_ibmf_unreg(saa_portp) != IBMF_SUCCESS) {
+			mutex_enter(&saa_portp->saa_pt_mutex);
+			mutex_enter(&saa_portp->saa_pt_kstat_mutex);
+			(void) ibmf_saa_impl_init_kstats(saa_portp);
+			mutex_exit(&saa_portp->saa_pt_kstat_mutex);
+			saa_portp->saa_pt_state = IBMF_SAA_PORT_STATE_READY;
+			if (must_unsub == B_TRUE)
+				saa_portp->saa_pt_reference_count++;
+			mutex_exit(&saa_portp->saa_pt_mutex);
+
+			if (must_unsub == B_TRUE) {
+				ibmf_saa_subscribe_events(saa_portp, B_TRUE,
+				    B_FALSE);
+			}
+		}
+	}
 bail:
 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_saa_impl_hca_detach_end,
 	    IBMF_TNF_TRACE, "", "ibmf_saa_impl_hca_detach() exit\n");
@@ -3545,9 +3580,9 @@
 		return;
 	}
 	rv = ibmf_saa_utils_unpack_payload(
-		msgp->im_msgbufs_recv.im_bufs_cl_data,
-		    msgp->im_msgbufs_recv.im_bufs_cl_data_len, attr_id, &result,
-		    &length, sa_hdr->AttributeOffset, B_TRUE, KM_NOSLEEP);
+	    msgp->im_msgbufs_recv.im_bufs_cl_data,
+	    msgp->im_msgbufs_recv.im_bufs_cl_data_len, attr_id, &result,
+	    &length, sa_hdr->AttributeOffset, B_TRUE, KM_NOSLEEP);
 	if (rv != IBMF_SUCCESS) {
 
 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L1,
@@ -3579,8 +3614,8 @@
 
 		mutex_exit(&saa_portp->saa_pt_mutex);
 		ibt_status = ibt_query_hca_ports_byguid(
-			saa_portp->saa_pt_node_guid, saa_portp->saa_pt_port_num,
-			    &ibt_pinfo, &nports, &size);
+		    saa_portp->saa_pt_node_guid, saa_portp->saa_pt_port_num,
+		    &ibt_pinfo, &nports, &size);
 		if (ibt_status != IBT_SUCCESS) {
 
 			IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L1,
@@ -3645,7 +3680,7 @@
 /*
  * ibmf_saa_impl_ibmf_unreg:
  */
-static void
+static int
 ibmf_saa_impl_ibmf_unreg(saa_port_t *saa_portp)
 {
 	int	ibmf_status;
@@ -3658,12 +3693,13 @@
 	    saa_portp->saa_pt_qp_handle, 0);
 	if (ibmf_status != IBMF_SUCCESS) {
 
-		/* continue anyway even though unreg will probably fail */
 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L1,
 		    ibmf_saa_impl_ibmf_unreg, IBMF_TNF_TRACE, "",
 		    "ibmf_saa_impl_ibmf_unreg: %s, ibmf_status = %d\n",
 		    tnf_string, msg, "Could not tear down async cb",
 		    tnf_int, ibmf_status, ibmf_status);
+
+		goto bail;
 	}
 
 	/* free qp */
@@ -3672,12 +3708,14 @@
 
 	if (ibmf_status != IBMF_SUCCESS) {
 
-		/* continue anyway even though unreg will probably fail */
 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L1,
 		    ibmf_saa_impl_ibmf_unreg, IBMF_TNF_TRACE, "",
 		    "ibmf_saa_impl_ibmf_unreg: %s, ibmf_status = %d\n",
 		    tnf_string, msg, "Could not free queue pair",
 		    tnf_int, ibmf_status, ibmf_status);
+
+		(void) ibmf_saa_impl_setup_qp_async_cb(saa_portp, 1);
+		goto bail;
 	}
 
 	ibmf_status = ibmf_unregister(&saa_portp->saa_pt_ibmf_handle, 0);
@@ -3689,8 +3727,13 @@
 		    "ibmf_saa_impl_ibmf_unreg: %s, ibmf_status = %d\n",
 		    tnf_string, msg, "ibmf_unregister() failed",
 		    tnf_int, ibmf_status, ibmf_status);
+
+		(void) ibmf_saa_impl_setup_qp_async_cb(saa_portp, 0);
 	}
 
+bail:
 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_saa_impl_ibmf_unreg_end,
 	    IBMF_TNF_TRACE, "", "ibmf_saa_impl_ibmf_unreg() exit\n");
+
+	return (ibmf_status);
 }
--- a/usr/src/uts/common/rpc/clnt_rdma.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/rpc/clnt_rdma.c	Thu Feb 05 10:03:55 2009 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -530,6 +530,7 @@
 	XDR 	*rdmahdr_o_xdrs, *rdmahdr_i_xdrs;
 
 	struct rpc_msg 	reply_msg;
+	rdma_registry_t	*m;
 
 	struct clist *cl_sendlist;
 	struct clist *cl_recvlist;
@@ -578,6 +579,27 @@
 	rcil.rcil_len_alt = 0;
 	long_reply_len = 0;
 
+	rw_enter(&rdma_lock, RW_READER);
+	m = (rdma_registry_t *)p->cku_rd_handle;
+	if (m->r_mod_state == RDMA_MOD_INACTIVE) {
+		/*
+		 * If we didn't find a matching RDMA module in the registry
+		 * then there is no transport.
+		 */
+		rw_exit(&rdma_lock);
+		p->cku_err.re_status = RPC_CANTSEND;
+		p->cku_err.re_errno = EIO;
+		ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
+		if (h->cl_nosignal == TRUE) {
+			delay(ticks);
+		} else {
+			if (delay_sig(ticks) == EINTR) {
+				p->cku_err.re_status = RPC_INTR;
+				p->cku_err.re_errno = EINTR;
+			}
+		}
+		return (RPC_CANTSEND);
+	}
 	/*
 	 * Get unique xid
 	 */
@@ -586,6 +608,7 @@
 
 	status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr,
 	    p->cku_addrfmly, p->cku_rd_handle, &conn);
+	rw_exit(&rdma_lock);
 
 	/*
 	 * If there is a problem with the connection reflect the issue
@@ -1266,6 +1289,10 @@
 	rw_enter(&rdma_lock, RW_READER);
 	rp = rdma_mod_head;
 	while (rp != NULL) {
+		if (rp->r_mod_state == RDMA_MOD_INACTIVE) {
+			rp = rp->r_next;
+			continue;
+		}
 		status = RDMA_REACHABLE(rp->r_mod->rdma_ops, addr_type, addr,
 		    &handle);
 		if (status == RDMA_SUCCESS) {
@@ -1285,7 +1312,7 @@
 
 			knc->knc_protofmly = pf;
 			knc->knc_proto = p;
-			knc->knc_rdev = (dev_t)handle;
+			knc->knc_rdev = (dev_t)rp;
 			*knconf = knc;
 			rw_exit(&rdma_lock);
 			return (0);
--- a/usr/src/uts/common/rpc/ib.h	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/rpc/ib.h	Thu Feb 05 10:03:55 2009 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /*
@@ -205,8 +205,8 @@
 } rib_conn_list_t;
 
 enum hca_state {
+	HCA_DETACHED,		/* hca in detached state */
 	HCA_INITED,		/* hca in up and running state */
-	HCA_DETACHED		/* hca in detached state */
 };
 
 /*
--- a/usr/src/uts/common/rpc/rdma_subr.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/rpc/rdma_subr.c	Thu Feb 05 10:03:55 2009 -0800
@@ -19,12 +19,12 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
- * Copyright (c) 2007, The Ohio State University. All rights reserved.
+ * Copyright (c) 2008, The Ohio State University. All rights reserved.
  *
  * Portions of this source code is developed by the team members of
  * The Ohio State University's Network-Based Computing Laboratory (NBCL),
@@ -53,6 +53,9 @@
 int rdma_modloaded = 0;		/* flag to load RDMA plugin modules */
 int rdma_dev_available = 0;	/* if any RDMA device is loaded */
 kmutex_t rdma_modload_lock;	/* protects rdma_modloaded flag */
+
+rdma_svc_wait_t rdma_wait;
+
 rdma_registry_t	*rdma_mod_head = NULL;	/* head for RDMA modules */
 krwlock_t	rdma_lock;		/* protects rdma_mod_head list */
 ldi_ident_t rpcmod_li = NULL;	/* identifies us with ldi_ framework */
@@ -62,7 +65,7 @@
 /*
  * Statics
  */
-static ldi_handle_t rpcib_handle = NULL;
+ldi_handle_t rpcib_handle = NULL;
 
 /*
  * Externs
@@ -96,6 +99,12 @@
 	while (*mp != NULL) {
 		if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api,
 		    KNC_STRSIZE) == 0) {
+			if ((*mp)->r_mod_state == RDMA_MOD_INACTIVE) {
+				(*mp)->r_mod_state = RDMA_MOD_ACTIVE;
+				(*mp)->r_mod->rdma_ops = mod->rdma_ops;
+				(*mp)->r_mod->rdma_count = mod->rdma_count;
+				goto announce_hca;
+			}
 			rw_exit(&rdma_lock);
 			return (RDMA_REG_EXIST);
 		}
@@ -112,8 +121,20 @@
 	m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
 	(void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE);
 	m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0';
+	m->r_mod_state = RDMA_MOD_ACTIVE;
 	*mp = m;
+
+announce_hca:
 	rw_exit(&rdma_lock);
+	/*
+	 * Start the nfs service on the rdma xprts.
+	 * (this notification mechanism will need to change when we support
+	 * multiple hcas and have support for multiple rdma plugins).
+	 */
+	mutex_enter(&rdma_wait.svc_lock);
+	rdma_wait.svc_stat = RDMA_HCA_ATTACH;
+	cv_signal(&rdma_wait.svc_cv);
+	mutex_exit(&rdma_wait.svc_lock);
 
 	return (RDMA_SUCCESS);
 }
@@ -140,27 +161,37 @@
 		/*
 		 * Check if any device attached, if so return error
 		 */
-		if ((*m)->r_mod->rdma_count != 0) {
+		if (mod->rdma_count != 0) {
 			rw_exit(&rdma_lock);
 			return (RDMA_FAILED);
 		}
 		/*
-		 * Found entry. Now remove it.
+		 * Found entry. Mark it inactive.
 		 */
 		mmod = *m;
-		*m = (*m)->r_next;
-		kmem_free(mmod->r_mod->rdma_api, KNC_STRSIZE);
-		kmem_free(mmod->r_mod, sizeof (rdma_mod_t));
-		kmem_free(mmod, sizeof (rdma_registry_t));
-		rw_exit(&rdma_lock);
-		return (RDMA_SUCCESS);
+		mmod->r_mod->rdma_count = 0;
+		mmod->r_mod_state = RDMA_MOD_INACTIVE;
+		break;
 	}
 
+	rdma_modloaded = 0;
+	rdma_dev_available = 0;
+	rw_exit(&rdma_lock);
+
+	/*
+	 * Stop the nfs service running on the rdma xprts.
+	 * (this notification mechanism will need to change when we support
+	 * multiple hcas and have support for multiple rdma plugins).
+	 */
+	mutex_enter(&rdma_wait.svc_lock);
+	rdma_wait.svc_stat = RDMA_HCA_DETACH;
+	cv_signal(&rdma_wait.svc_cv);
+	mutex_exit(&rdma_wait.svc_lock);
+
 	/*
 	 * Not found.
 	 */
-	rw_exit(&rdma_lock);
-	return (RDMA_FAILED);
+	return (RDMA_SUCCESS);
 }
 
 struct clist *
@@ -423,15 +454,23 @@
 	status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib",
 	    FREAD | FWRITE, kcred,
 	    &rpcib_handle, rpcmod_li);
+
 	if (status != 0)
 		return (EPROTONOSUPPORT);
 
-	/* success */
-	rdma_kstat_init();
 
-	clist_cache = kmem_cache_create("rdma_clist",
-	    sizeof (struct clist), _POINTER_ALIGNMENT, NULL,
-	    NULL, NULL, NULL, 0, 0);
+	/*
+	 * We will need to reload the plugin module after it was unregistered
+	 * but the resources below need to allocated only the first time.
+	 */
+	if (!clist_cache) {
+		clist_cache = kmem_cache_create("rdma_clist",
+		    sizeof (struct clist), _POINTER_ALIGNMENT, NULL,
+		    NULL, NULL, NULL, 0, 0);
+		rdma_kstat_init();
+	}
+
+	(void) ldi_close(rpcib_handle, FREAD|FWRITE, kcred);
 
 	return (0);
 }
@@ -462,3 +501,28 @@
 		kstat_install(ksp);
 	}
 }
+
+rdma_stat
+rdma_kwait(void)
+{
+	int ret;
+	rdma_stat stat;
+
+	mutex_enter(&rdma_wait.svc_lock);
+
+	ret = cv_wait_sig(&rdma_wait.svc_cv, &rdma_wait.svc_lock);
+
+	/*
+	 * If signalled by a hca attach/detach, pass the right
+	 * stat back.
+	 */
+
+	if (ret)
+		stat =  rdma_wait.svc_stat;
+	else
+		stat = RDMA_INTR;
+
+	mutex_exit(&rdma_wait.svc_lock);
+
+	return (stat);
+}
--- a/usr/src/uts/common/rpc/rpc_rdma.h	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/rpc/rpc_rdma.h	Thu Feb 05 10:03:55 2009 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -169,11 +169,13 @@
 	 */
 	RDMA_BADVERS = 16,	/* mismatch RDMATF versions */
 	RDMA_REG_EXIST = 17,	/* RDMATF registration already exists */
+	RDMA_HCA_ATTACH = 18,
+	RDMA_HCA_DETACH = 19,
 
 	/*
 	 * fallback error
 	 */
-	RDMA_FAILED = 18	/* generic error */
+	RDMA_FAILED = 20	/* generic error */
 } rdma_stat;
 
 /*
@@ -302,10 +304,17 @@
  */
 typedef struct rdma_registry {
 	rdma_mod_t	*r_mod;		/* plugin mod info */
+	uint32_t	r_mod_state;
 	struct rdma_registry *r_next;	/* next registered RDMA plugin */
 } rdma_registry_t;
 
 /*
+ * RDMA MODULE state flags (r_mod_state).
+ */
+#define	RDMA_MOD_ACTIVE		1
+#define	RDMA_MOD_INACTIVE	0
+
+/*
  * RDMA transport information
  */
 typedef struct rdma_info {
@@ -421,6 +430,14 @@
 	rdma_stat	(*rdma_getinfo)(rdma_info_t *info);
 } rdmaops_t;
 
+typedef struct rdma_svc_wait {
+	kmutex_t svc_lock;
+	kcondvar_t svc_cv;
+	rdma_stat svc_stat;
+} rdma_svc_wait_t;
+
+extern rdma_svc_wait_t rdma_wait;
+
 /*
  * RDMA operations.
  */
@@ -513,6 +530,7 @@
 extern void rdma_buf_free(CONN *, rdma_buf_t *);
 extern int rdma_modload();
 extern bool_t   rdma_get_wchunk(struct svc_req *, iovec_t *, struct clist *);
+extern rdma_stat rdma_kwait(void);
 
 /*
  * RDMA XDR
--- a/usr/src/uts/common/rpc/rpcib.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/rpc/rpcib.c	Thu Feb 05 10:03:55 2009 -0800
@@ -421,15 +421,10 @@
 {
 	int status;
 
-	if ((status = rdma_unregister_mod(&rib_mod)) != RDMA_SUCCESS) {
-		return (EBUSY);
-	}
-
 	/*
 	 * Remove module
 	 */
 	if ((status = mod_remove(&rib_modlinkage)) != 0) {
-		(void) rdma_register_mod(&rib_mod);
 		return (status);
 	}
 	mutex_destroy(&plugin_state_lock);
@@ -534,33 +529,40 @@
 
 	mutex_enter(&rib_stat->open_hca_lock);
 	if (open_hcas(rib_stat) != RDMA_SUCCESS) {
-		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
-		(void) ibt_detach(rib_stat->ibt_clnt_hdl);
 		mutex_exit(&rib_stat->open_hca_lock);
-		mutex_destroy(&rib_stat->open_hca_lock);
-		kmem_free(rib_stat, sizeof (*rib_stat));
-		rib_stat = NULL;
-		return (DDI_FAILURE);
+		goto open_fail;
 	}
 	mutex_exit(&rib_stat->open_hca_lock);
 
+	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) !=
+	    DDI_PROP_SUCCESS) {
+		cmn_err(CE_WARN, "rpcib_attach: ddi-no-autodetach prop update "
+		    "failed.");
+		goto register_fail;
+	}
+
 	/*
 	 * Register with rdmatf
 	 */
-	rib_mod.rdma_count = rib_stat->hca_count;
+	rib_mod.rdma_count = rib_stat->nhca_inited;
 	r_status = rdma_register_mod(&rib_mod);
 	if (r_status != RDMA_SUCCESS && r_status != RDMA_REG_EXIST) {
-		rib_detach_hca(rib_stat->hca);
-		ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
-		(void) ibt_detach(rib_stat->ibt_clnt_hdl);
-		mutex_destroy(&rib_stat->open_hca_lock);
-		kmem_free(rib_stat, sizeof (*rib_stat));
-		rib_stat = NULL;
-		return (DDI_FAILURE);
+		cmn_err(CE_WARN, "rpcib_attach:rdma_register_mod failed, "
+		    "status = %d", r_status);
+		goto register_fail;
 	}
 
-
 	return (DDI_SUCCESS);
+
+register_fail:
+	rib_detach_hca(rib_stat->hca);
+open_fail:
+	ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
+	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
+	mutex_destroy(&rib_stat->open_hca_lock);
+	kmem_free(rib_stat, sizeof (*rib_stat));
+	rib_stat = NULL;
+	return (DDI_FAILURE);
 }
 
 /*ARGSUSED*/
@@ -586,11 +588,18 @@
 	rib_detach_hca(rib_stat->hca);
 	ibt_free_hca_list(rib_stat->hca_guids, rib_stat->hca_count);
 	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
+	mutex_destroy(&rib_stat->open_hca_lock);
+	if (rib_stat->hcas) {
+		kmem_free(rib_stat->hcas, rib_stat->hca_count *
+		    sizeof (rib_hca_t));
+		rib_stat->hcas = NULL;
+	}
+	kmem_free(rib_stat, sizeof (*rib_stat));
+	rib_stat = NULL;
 
 	mutex_enter(&rpcib.rpcib_mutex);
 	rpcib.rpcib_dip = NULL;
 	mutex_exit(&rpcib.rpcib_mutex);
-
 	mutex_destroy(&rpcib.rpcib_mutex);
 	return (DDI_SUCCESS);
 }
@@ -3905,17 +3914,28 @@
  * connection), the connection should be destroyed. A connection transitions
  * into this state when it is being destroyed.
  */
+/* ARGSUSED */
 static rdma_stat
 rib_conn_get(struct netbuf *svcaddr, int addr_type, void *handle, CONN **conn)
 {
 	CONN *cn;
 	int status = RDMA_SUCCESS;
-	rib_hca_t *hca = (rib_hca_t *)handle;
+	rib_hca_t *hca = rib_stat->hca;
 	rib_qp_t *qp;
 	clock_t cv_stat, timout;
 	ibt_path_info_t path;
 	ibt_ip_addr_t s_ip, d_ip;
 
+	if (hca == NULL)
+		return (RDMA_FAILED);
+
+	rw_enter(&rib_stat->hca->state_lock, RW_READER);
+	if (hca->state == HCA_DETACHED) {
+		rw_exit(&rib_stat->hca->state_lock);
+		return (RDMA_FAILED);
+	}
+	rw_exit(&rib_stat->hca->state_lock);
+
 again:
 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
 	cn = hca->cl_conn_list.conn_hd;
@@ -4269,19 +4289,31 @@
 	rib_stop_services(hca);
 	rib_close_channels(&hca->cl_conn_list);
 	rib_close_channels(&hca->srv_conn_list);
+
+	rib_mod.rdma_count--;
+
 	rw_exit(&hca->state_lock);
 
-	rib_purge_connlist(&hca->cl_conn_list);
-	rib_purge_connlist(&hca->srv_conn_list);
-
+	/*
+	 * purge will free all datastructures used by CQ handlers. We don't
+	 * want to receive completions after purge, so we'll free the CQs now.
+	 */
 	(void) ibt_free_cq(hca->clnt_rcq->rib_cq_hdl);
 	(void) ibt_free_cq(hca->clnt_scq->rib_cq_hdl);
 	(void) ibt_free_cq(hca->svc_rcq->rib_cq_hdl);
 	(void) ibt_free_cq(hca->svc_scq->rib_cq_hdl);
+
+	rib_purge_connlist(&hca->cl_conn_list);
+	rib_purge_connlist(&hca->srv_conn_list);
+
 	kmem_free(hca->clnt_rcq, sizeof (rib_cq_t));
 	kmem_free(hca->clnt_scq, sizeof (rib_cq_t));
 	kmem_free(hca->svc_rcq, sizeof (rib_cq_t));
 	kmem_free(hca->svc_scq, sizeof (rib_cq_t));
+	if (stats_enabled) {
+		kstat_delete_byname_zone("unix", 0, "rpcib_cache",
+		    GLOBAL_ZONEID);
+	}
 
 	rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
@@ -4294,6 +4326,7 @@
 		rib_rbufpool_destroy(hca, RECV_BUFFER);
 		rib_rbufpool_destroy(hca, SEND_BUFFER);
 		rib_destroy_cache(hca);
+		rdma_unregister_mod(&rib_mod);
 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
 		(void) ibt_close_hca(hca->hca_hdl);
 		hca->hca_hdl = NULL;
@@ -4306,12 +4339,16 @@
 		while (hca->inuse)
 			cv_wait(&hca->cb_cv, &hca->inuse_lock);
 		mutex_exit(&hca->inuse_lock);
+
+		rdma_unregister_mod(&rib_mod);
+
 		/*
 		 * conn_lists are now NULL, so destroy
 		 * buffers, close hca and be done.
 		 */
 		rib_rbufpool_destroy(hca, RECV_BUFFER);
 		rib_rbufpool_destroy(hca, SEND_BUFFER);
+		rib_destroy_cache(hca);
 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
 		(void) ibt_close_hca(hca->hca_hdl);
 		hca->hca_hdl = NULL;
@@ -4385,7 +4422,9 @@
 			kmem_free(rb, sizeof (rib_lrc_entry_t));
 		}
 		mutex_destroy(&rcas->node_lock);
-		kmem_cache_free(hca->server_side_cache, rcas);
+		if (hca->server_side_cache) {
+			kmem_cache_free(hca->server_side_cache, rcas);
+		}
 		if ((cache_allocation) < cache_limit) {
 			rw_exit(&hca->avl_rw_lock);
 			return;
@@ -4417,8 +4456,12 @@
 		ddi_taskq_destroy(hca->reg_cache_clean_up);
 		hca->reg_cache_clean_up = NULL;
 	}
-	if (!hca->avl_init) {
-		kmem_cache_destroy(hca->server_side_cache);
+	if (hca->avl_init) {
+		rib_server_side_cache_reclaim((void *)hca);
+		if (hca->server_side_cache) {
+			kmem_cache_destroy(hca->server_side_cache);
+			hca->server_side_cache = NULL;
+		}
 		avl_destroy(&hca->avl_tree);
 		mutex_destroy(&hca->cache_allocation);
 		rw_destroy(&hca->avl_rw_lock);
--- a/usr/src/uts/common/rpc/rpcmod.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/rpc/rpcmod.c	Thu Feb 05 10:03:55 2009 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /* Copyright (c) 1990 Mentat Inc. */
@@ -159,6 +159,10 @@
 	 */
 	rw_init(&rdma_lock, NULL, RW_DEFAULT, NULL);
 	mutex_init(&rdma_modload_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	cv_init(&rdma_wait.svc_cv, NULL, CV_DEFAULT, NULL);
+	mutex_init(&rdma_wait.svc_lock, NULL, MUTEX_DEFAULT, NULL);
+
 	mt_kstat_init();
 
 	/*
--- a/usr/src/uts/common/rpc/svc.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/rpc/svc.c	Thu Feb 05 10:03:55 2009 -0800
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -2585,21 +2585,24 @@
  * to cleanup the pool and destroy the xprt in svc_queueclose()
  */
 void
-rdma_stop(rdma_xprt_group_t rdma_xprts)
+rdma_stop(rdma_xprt_group_t *rdma_xprts)
 {
 	SVCMASTERXPRT *xprt;
 	rdma_xprt_record_t *curr_rec;
 	queue_t *q;
 	mblk_t *mp;
-	int i;
+	int i, rtg_count;
 	SVCPOOL *pool;
 
-	if (rdma_xprts.rtg_count == 0)
+	if (rdma_xprts->rtg_count == 0)
 		return;
 
-	for (i = 0; i < rdma_xprts.rtg_count; i++) {
-		curr_rec = rdma_xprts.rtg_listhead;
-		rdma_xprts.rtg_listhead = curr_rec->rtr_next;
+	rtg_count = rdma_xprts->rtg_count;
+
+	for (i = 0; i < rtg_count; i++) {
+		curr_rec = rdma_xprts->rtg_listhead;
+		rdma_xprts->rtg_listhead = curr_rec->rtr_next;
+		rdma_xprts->rtg_count--;
 		curr_rec->rtr_next = NULL;
 		xprt = curr_rec->rtr_xprt_ptr;
 		q = xprt->xp_wq;
@@ -2617,8 +2620,13 @@
 			mp->b_next = (mblk_t *)0;
 			pool->p_reqs--;
 			mutex_exit(&pool->p_req_lock);
-			if (mp)
+			if (mp) {
+				rdma_recv_data_t *rdp = (rdma_recv_data_t *)
+				    mp->b_rptr;
+				RDMA_BUF_FREE(rdp->conn, &rdp->rpcmsg);
+				RDMA_REL_CONN(rdp->conn);
 				freemsg(mp);
+			}
 		}
 		mutex_exit(&xprt->xp_req_lock);
 		svc_queueclose(q);
@@ -2631,7 +2639,7 @@
 		 * based master transport handle.
 		 */
 		kmem_free(curr_rec, sizeof (rdma_xprt_record_t));
-		if (!rdma_xprts.rtg_listhead)
+		if (!rdma_xprts->rtg_listhead)
 			break;
 	}
 }
--- a/usr/src/uts/common/rpc/svc.h	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/rpc/svc.h	Thu Feb 05 10:03:55 2009 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -804,7 +804,7 @@
 			rdma_xprt_group_t *);
 extern void	svc_rdma_kstop(SVCMASTERXPRT *);
 extern void	svc_rdma_kdestroy(SVCMASTERXPRT *);
-extern void	rdma_stop(rdma_xprt_group_t);
+extern void	rdma_stop(rdma_xprt_group_t *);
 
 /*
  * GSS cleanup method.
--- a/usr/src/uts/common/rpc/svc_rdma.c	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/rpc/svc_rdma.c	Thu Feb 05 10:03:55 2009 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -360,7 +360,10 @@
 	 * Create a listener for  module at this port
 	 */
 
-	(*rmod->rdma_ops->rdma_svc_listen)(svcdata);
+	if (rmod->rdma_count != 0)
+		(*rmod->rdma_ops->rdma_svc_listen)(svcdata);
+	else
+		svcdata->err_code = RDMA_FAILED;
 }
 
 void
@@ -373,9 +376,13 @@
 	rmod = ((struct rdma_data *)xprt->xp_p2)->r_mod;
 
 	/*
-	 * Call the stop listener routine for each plugin.
+	 * Call the stop listener routine for each plugin. If rdma_count is
+	 * already zero set active to zero.
 	 */
-	(*rmod->rdma_ops->rdma_svc_stop)(svcdata);
+	if (rmod->rdma_count != 0)
+		(*rmod->rdma_ops->rdma_svc_stop)(svcdata);
+	else
+		svcdata->active = 0;
 	if (svcdata->active)
 		DTRACE_PROBE(krpc__e__svcrdma__kstop);
 }
--- a/usr/src/uts/common/sys/ib/ibnex/ibnex.h	Thu Feb 05 11:59:59 2009 -0500
+++ b/usr/src/uts/common/sys/ib/ibnex/ibnex.h	Thu Feb 05 10:03:55 2009 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -217,10 +217,22 @@
 
 	/* Pseudo nodes inited from ibnex_get_snapshot? */
 	int			ibnex_pseudo_inited;
+	/*
+	 * IOC list used by all HCAs.
+	 */
+	kcondvar_t		ibnex_ioc_list_cv;
+	uint32_t		ibnex_ioc_list_state;
 	ibdm_ioc_info_t		*ibnex_ioc_list;
 } ibnex_t;
 
 /*
+ * States for ibnex_ioc_list_state
+ */
+#define	IBNEX_IOC_LIST_READY	0x0
+#define	IBNEX_IOC_LIST_RENEW	0x1
+#define	IBNEX_IOC_LIST_ACCESS	0x2
+
+/*
  * States for ibnex_reprobe_state
  *	0 to REPROBE_ALL_PROGRESS
  *		Reprobe all when no reprobes pending