PSARC 2010/085 IPoIB Administration Enhancement
authorRamaswamy Tummala <Ramaswamy.Tummala@Sun.COM>
Wed, 14 Apr 2010 10:26:18 -0700
changeset 12163 f0cf0084e756
parent 12162 5cbbd1bec773
child 12164 0eb8d6741e37
PSARC 2010/085 IPoIB Administration Enhancement 6837574 IPoIB Administration Enhancement - PSARC 2010/085 6864899 IB datalink names need to be consistent between cluster controllers 6855737 cfgadm status for ibd attachment points gets to inconsistent state 6883212 ibd: add Brussels framework support 6927048 IBD driver should be hardened to handle late HCA port initialization issue 6827237 Fix warlock errors in ibnex
usr/src/cmd/Makefile
usr/src/cmd/Makefile.check
usr/src/cmd/datadm/Makefile
usr/src/cmd/datadm/datadm.c
usr/src/cmd/dladm/dladm.c
usr/src/cmd/ibd_upgrade/Makefile
usr/src/cmd/ibd_upgrade/ibd-post-upgrade
usr/src/cmd/ibd_upgrade/ibd-post-upgrade.xml
usr/src/cmd/ibd_upgrade/ibd_delete_link.c
usr/src/cmd/ibd_upgrade/ibd_upgrade.sh
usr/src/cmd/rcm_daemon/Makefile.com
usr/src/cmd/rcm_daemon/common/ibpart_rcm.c
usr/src/cmd/svc/milestone/net-nwam
usr/src/cmd/svc/milestone/net-physical
usr/src/lib/libdladm/Makefile
usr/src/lib/libdladm/Makefile.com
usr/src/lib/libdladm/common/libdladm.c
usr/src/lib/libdladm/common/libdladm.h
usr/src/lib/libdladm/common/libdladm_impl.h
usr/src/lib/libdladm/common/libdlib.c
usr/src/lib/libdladm/common/libdlib.h
usr/src/lib/libdladm/common/libdllink.c
usr/src/lib/libdladm/common/linkprop.c
usr/src/lib/libdladm/common/llib-ldladm
usr/src/lib/libdladm/common/mapfile-vers
usr/src/lib/udapl/udapl_tavor/Makefile.com
usr/src/lib/udapl/udapl_tavor/common/dapl_name_service.c
usr/src/lib/udapl/udapl_tavor/include/dapl.h
usr/src/lib/udapl/udapl_tavor/tavor/dapl_tavor_hca.c
usr/src/pkg/manifests/SUNWcs.mf
usr/src/pkg/manifests/driver-network-ib.mf
usr/src/pkg/manifests/driver-network-ibd.mf
usr/src/pkg/manifests/driver-network-ibp.mf
usr/src/pkg/manifests/system-header.mf
usr/src/tools/scripts/bfu.sh
usr/src/uts/common/Makefile.files
usr/src/uts/common/io/dld/dld_drv.c
usr/src/uts/common/io/ib/clients/ibd/ibd.c
usr/src/uts/common/io/ib/clients/ibd/ibd.conf
usr/src/uts/common/io/ib/clients/ibd/ibd_cm.c
usr/src/uts/common/io/ib/clients/ibd/ibp.conf
usr/src/uts/common/io/ib/ibnex/ib.conf
usr/src/uts/common/io/ib/ibnex/ibnex.c
usr/src/uts/common/io/ib/ibnex/ibnex_hca.c
usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c
usr/src/uts/common/io/ib/ibtl/ibtl_impl.c
usr/src/uts/common/io/ib/ibtl/ibtl_misc.c
usr/src/uts/common/io/ib/inc.flg
usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c
usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c
usr/src/uts/common/io/warlock/ib.wlcmd
usr/src/uts/common/os/swapgeneric.c
usr/src/uts/common/rpc/ib.h
usr/src/uts/common/sys/Makefile
usr/src/uts/common/sys/dld_ioc.h
usr/src/uts/common/sys/dls_mgmt.h
usr/src/uts/common/sys/ib/clients/ibd/ibd.h
usr/src/uts/common/sys/ib/ibnex/ibnex.h
usr/src/uts/common/sys/ib/ibtl/ibti_common.h
usr/src/uts/common/sys/ib/ibtl/ibtl_status.h
usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_arp.h
usr/src/uts/common/sys/ibpart.h
usr/src/uts/common/sys/mac.h
usr/src/uts/intel/Makefile.intel.shared
usr/src/uts/intel/ibd/Makefile
usr/src/uts/intel/ibp/Makefile
usr/src/uts/sparc/Makefile.sparc.shared
usr/src/uts/sparc/ibd/Makefile
usr/src/uts/sparc/ibp/Makefile
--- a/usr/src/cmd/Makefile	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/Makefile	Wed Apr 14 10:26:18 2010 -0700
@@ -195,6 +195,7 @@
 	hotplug		\
 	hotplugd	\
 	hwdata		\
+	ibd_upgrade	\
 	id		\
 	idmap		\
 	infocmp		\
--- a/usr/src/cmd/Makefile.check	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/Makefile.check	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 include ../Makefile.master
@@ -43,6 +42,7 @@
 	fcinfo				\
 	fcoesvc				\
 	fm				\
+	ibd_upgrade			\
 	intrd				\
 	iscsid				\
 	iscsitsvc			\
--- a/usr/src/cmd/datadm/Makefile	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/datadm/Makefile	Wed Apr 14 10:26:18 2010 -0700
@@ -2,9 +2,8 @@
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -20,10 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#ident	"%Z%%M%	%I%	%E% SMI"
+# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 # cmd/datadm/Makefile
 #
@@ -38,7 +34,7 @@
 SRCS = $(OBJS:%.o=%.c)
 
 CFLAGS += $(CCVERBOSE)
-LDLIBS += -ldevinfo -lsocket
+LDLIBS += -ldlpi -ldladm -lsocket
 
 .KEEP_STATE:
 
--- a/usr/src/cmd/datadm/datadm.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/datadm/datadm.c	Wed Apr 14 10:26:18 2010 -0700
@@ -19,14 +19,14 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
+#include <sys/stat.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <net/if.h>
@@ -37,7 +37,12 @@
 #include <errno.h>
 #include <libintl.h>
 #include <locale.h>
-#include <libdevinfo.h>
+#include <fcntl.h>
+#include <libdlpi.h>
+#include <libdladm.h>
+#include <libdlib.h>
+#include <libdllink.h>
+#include <sys/ib/ibnex/ibnex_devctl.h>
 
 #define	DATADM_OP_VIEW		0x0000
 #define	DATADM_OP_UPDATE	0x0001
@@ -48,7 +53,6 @@
 #define	DATADM_LINESZ		1024
 #define	DATADM_NUM_SP_TOKENS	7
 #define	DATADM_NUM_DAT_TOKENS	8
-#define	DATADM_IA_NAME		"ibd"
 #define	DATADM_DRV_NAME		"driver_name"
 #define	DATADM_MAX_TOKENS	16
 
@@ -106,7 +110,7 @@
  * are added when sp entry processing occurs. duplicate
  * sp entries are not added to this list. the ia_list may
  * be built statically using the information in dat.conf or
- * dynamically using libdevinfo. similar to the sp_list,
+ * dynamically. similar to the sp_list,
  * the ia_list contains only unique entries.
  */
 typedef struct datadm_hca_entry {
@@ -119,12 +123,12 @@
 /*
  * an ia_entry is created when a new ia name is encountered
  * during sp_entry processing or when a new ia name is
- * discovered by datadm_fill_ia_list. ia_entry holds the ia
+ * discovered by datadm_build_ia_lists. ia_entry holds the ia
  * device's instance number.
  */
 typedef struct datadm_ia_entry {
 	datadm_entry_t		iae_header;
-	int			iae_devnum;
+	char			iae_name[MAXLINKNAMELEN];
 } datadm_ia_entry_t;
 
 /*
@@ -138,6 +142,11 @@
 	char			*cmnt_line;
 } datadm_cmnt_entry_t;
 
+typedef struct datadm_hca_find_by_name {
+	char			*hf_name;
+	datadm_hca_entry_t	*hf_hca_entry;
+} datadm_hca_find_by_name_t;
+
 /*
  * 2nd argument to datadm_hca_entry_find.
  * hf_hca_entry is filled in if an hca_entry with
@@ -151,20 +160,20 @@
 /*
  * 2nd argument to datadm_ia_entry_find.
  * if_ia_entry is filled in if an ia_entry with
- * a matching ia_devnum is found.
+ * a matching ia_name is found.
  */
 typedef struct datadm_ia_find {
-	int			if_ia_devnum;
+	char			*if_ia_name;
 	datadm_ia_entry_t	*if_ia_entry;
 } datadm_ia_find_t;
 
 /*
- * this gets passed to datadm_fill_ia_list.
- * we do this to avoid regenerating the device
- * tree for each hca_entry we process.
+ * this gets passed to datadm_add_plink.
  */
 typedef struct datadm_fill_ia_list {
-	di_node_t		ia_root_node;
+	datadm_list_t		*ia_hca_list;
+	dladm_handle_t		ia_dlh;
+	int			ia_ibnex_fd;
 	int			ia_sock_fd_v4;
 	int			ia_sock_fd_v6;
 } datadm_fill_ia_list_t;
@@ -183,10 +192,8 @@
 static datadm_list_t		datadm_conf_header;
 static char			*datadm_conf_header_default =
 	"#\n"
-	"# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.\n"
-	"# Use is subject to license terms.\n"
-	"#\n"
-	"# ident \"@(#)dat.conf   1.1     03/08/26 SMI\"\n"
+	"# Copyright (c) 2003, 2010, Oracle and/or its affiliates. "
+	"All rights reserved.\n"
 	"#\n"
 	"# DAT configuration file.\n"
 	"#\n"
@@ -215,7 +222,7 @@
 static int datadm_parse_libpath(char *, datadm_sp_entry_t *);
 static int datadm_parse_sp_version(char *, datadm_sp_entry_t *);
 static int datadm_parse_sp_data(char *, datadm_sp_entry_t *);
-static int datadm_parse_ia_name(char *, int *);
+static int datadm_parse_ia_name(char *, char *);
 
 /*
  * utility functions
@@ -245,13 +252,13 @@
  */
 static int datadm_parse_sp_conf(datadm_list_t *);
 static int datadm_parse_dat_conf(datadm_list_t *);
-static int datadm_process_sp_entry(datadm_list_t *, datadm_sp_entry_t *, int);
+static int datadm_process_sp_entry(datadm_list_t *, datadm_sp_entry_t *,
+    char *);
 
 /*
  * ia devices discovery
  */
 static int datadm_build_ia_lists(datadm_list_t *);
-static int datadm_fill_ia_list(datadm_hca_entry_t *, datadm_fill_ia_list_t *);
 
 /*
  * helper function for OP_REMOVE
@@ -429,27 +436,11 @@
  * parses the ia_name field in dat.conf
  */
 static int
-datadm_parse_ia_name(char *str, int *ia_devnum)
+datadm_parse_ia_name(char *str, char *ia_name)
 {
-	int	len;
-	int	i, start;
-
-	len = strlen(DATADM_IA_NAME);
-	if (strncmp(str, DATADM_IA_NAME, len) != 0) {
+	if (strlen(str) >= MAXLINKNAMELEN)
 		return (-1);
-	}
-	start = i = len;
-	len = strlen(str);
-	if (str[i] == '\0') {
-		return (-1);
-	}
-	for (; i < len; i++) {
-		if (!isdigit(str[i])) break;
-	}
-	if (i != len) {
-		return (-1);
-	}
-	*ia_devnum = atoi(str + start);
+	(void) strlcpy(ia_name, str, MAXLINKNAMELEN);
 	return (0);
 }
 
@@ -803,7 +794,7 @@
 static int
 datadm_ia_entry_find(datadm_ia_entry_t *i1, datadm_ia_find_t *iaf)
 {
-	if (i1->iae_devnum == iaf->if_ia_devnum) {
+	if (strcmp(i1->iae_name, iaf->if_ia_name) == 0) {
 		iaf->if_ia_entry = i1;
 		return (1);
 	}
@@ -930,7 +921,7 @@
  */
 static int
 datadm_process_sp_entry(datadm_list_t *hca_list, datadm_sp_entry_t *sp_entry,
-	int ia_devnum)
+	char *ia_name)
 {
 	datadm_hca_find_t	hca_find;
 	datadm_ia_find_t	ia_find;
@@ -963,10 +954,10 @@
 	} else {
 		hca_entry = hca_find.hf_hca_entry;
 	}
-	if (ia_devnum == -1) {
+	if (ia_name == NULL) {
 		goto put_sp_entry;
 	}
-	ia_find.if_ia_devnum = ia_devnum;
+	ia_find.if_ia_name = ia_name;
 	ia_find.if_ia_entry = NULL;
 	(void) datadm_walk_list(&hca_entry->he_ia_list,
 	    (int (*)(datadm_entry_t *, void *))datadm_ia_entry_find, &ia_find);
@@ -982,7 +973,7 @@
 		if (ia_entry == NULL) {
 			return (-1);
 		}
-		ia_entry->iae_devnum = ia_devnum;
+		(void) strlcpy(ia_entry->iae_name, ia_name, MAXLINKNAMELEN);
 		datadm_enqueue_entry(&hca_entry->he_ia_list,
 		    (datadm_entry_t *)ia_entry);
 	}
@@ -1069,7 +1060,7 @@
 			}
 
 			retval = datadm_process_sp_entry(hca_list,
-			    sp_entry, -1);
+			    sp_entry, NULL);
 			if (retval != 0) {
 				datadm_free_sp_entry(sp_entry);
 				if (retval == 1) {
@@ -1168,7 +1159,7 @@
 		}
 		if (token_count == DATADM_NUM_DAT_TOKENS) {
 			int i = 0;
-			int ia_devnum = -1;
+			char ia_name[MAXLINKNAMELEN];
 
 			/*
 			 * we stop saving comment lines once
@@ -1195,7 +1186,7 @@
 					 * does not belong to an
 					 * sp_entry
 					 */
-					arg = (void *)&ia_devnum;
+					arg = (void *)ia_name;
 				} else {
 					arg = (void *)sp_entry;
 				}
@@ -1217,10 +1208,12 @@
 			 * doing update
 			 */
 			if (datadm_args.da_op_type == DATADM_OP_UPDATE) {
-				ia_devnum = -1;
+				retval = datadm_process_sp_entry(hca_list,
+				    sp_entry, NULL);
+			} else {
+				retval = datadm_process_sp_entry(hca_list,
+				    sp_entry, ia_name);
 			}
-			retval = datadm_process_sp_entry(hca_list, sp_entry,
-			    ia_devnum);
 			if (retval != 0) {
 				datadm_free_sp_entry(sp_entry);
 				if (retval == 1) {
@@ -1249,96 +1242,6 @@
 }
 
 /*
- * discovers all ibd devices under a particular hca
- */
-static int
-datadm_fill_ia_list(datadm_hca_entry_t *hca, datadm_fill_ia_list_t *args)
-{
-	di_node_t	root_node;
-	di_node_t	hca_node;
-	int		retval = 0;
-	int		sv4, sv6;
-
-	root_node = args->ia_root_node;
-	sv4 = args->ia_sock_fd_v4;
-	sv6 = args->ia_sock_fd_v6;
-
-	hca_node = di_drv_first_node(hca->he_name, root_node);
-	if (hca_node == DI_NODE_NIL) {
-		return (0);
-	}
-	while (hca_node != DI_NODE_NIL) {
-		di_node_t	ibd_node;
-
-		ibd_node = di_drv_first_node(DATADM_IA_NAME, hca_node);
-		while (ibd_node != DI_NODE_NIL) {
-			datadm_ia_find_t	ia_find;
-			datadm_ia_entry_t	*ia_entry;
-			struct lifreq		req;
-			int			devnum, rval;
-
-			if (hca_node != di_parent_node(ibd_node)) {
-				ibd_node = di_drv_next_node(ibd_node);
-				continue;
-			}
-			devnum = di_instance(ibd_node);
-			if (devnum == -1) {
-				ibd_node = di_drv_next_node(ibd_node);
-				continue;
-			}
-
-			(void) snprintf(req.lifr_name, sizeof (req.lifr_name),
-			    "%s%d", DATADM_IA_NAME, devnum);
-			/*
-			 * we don't really need to know the ip address.
-			 * we just want to check if the device is plumbed
-			 * or not.
-			 */
-			rval = ioctl(sv4, SIOCGLIFADDR, (caddr_t)&req);
-			if (rval != 0) {
-				/*
-				 * we try v6 if the v4 address isn't found.
-				 */
-				rval = ioctl(sv6, SIOCGLIFADDR, (caddr_t)&req);
-				if (rval != 0) {
-					ibd_node = di_drv_next_node(ibd_node);
-					continue;
-				}
-			}
-			ia_find.if_ia_devnum = devnum;
-			ia_find.if_ia_entry = NULL;
-			(void) datadm_walk_list(&hca->he_ia_list,
-			    (int (*)(datadm_entry_t *, void *))
-			    datadm_ia_entry_find, &ia_find);
-
-			if (ia_find.if_ia_entry == NULL) {
-				/*
-				 * we insert an ia entry only if
-				 * it is unique.
-				 */
-				ia_entry = datadm_alloc_ia_entry();
-				if (ia_entry == NULL) {
-					retval = -1;
-					break;
-				}
-				ia_entry->iae_devnum = devnum;
-				datadm_enqueue_entry(&hca->he_ia_list,
-				    (datadm_entry_t *)ia_entry);
-			} else {
-				ia_entry = ia_find.if_ia_entry;
-			}
-			ibd_node = di_drv_next_node(ibd_node);
-		}
-		hca_node = di_drv_next_node(hca_node);
-	}
-	if (retval != 0) {
-		datadm_free_list(&hca->he_ia_list,
-		    (void (*)(datadm_entry_t *))datadm_free_ia_entry);
-	}
-	return (0);
-}
-
-/*
  * used by OP_REMOVE to invalidate common sp entries between hl1 and hl2.
  * invalid sp entries will be ignored by datadm_generate_dat_conf.
  */
@@ -1382,45 +1285,146 @@
 	}
 }
 
+static int
+datadm_hca_entry_find_by_name(datadm_hca_entry_t *h1,
+    datadm_hca_find_by_name_t *hf)
+{
+	if (datadm_str_match(h1->he_name, hf->hf_name)) {
+		hf->hf_hca_entry = h1;
+		return (1);
+	}
+	return (0);
+}
+
+datadm_hca_entry_t *
+datadm_hca_lookup_by_name(datadm_list_t *hca_list, char *hca_driver_name)
+{
+	datadm_hca_find_by_name_t	hf;
+
+	hf.hf_name = hca_driver_name;
+	hf.hf_hca_entry = NULL;
+	(void) datadm_walk_list(hca_list,
+	    (int (*)(datadm_entry_t *, void *))datadm_hca_entry_find_by_name,
+	    &hf);
+	return (hf.hf_hca_entry);
+}
+
+static boolean_t
+datadm_add_plink(char *linkname, datadm_fill_ia_list_t *ia_args)
+{
+	datalink_class_t	class;
+	datalink_id_t		linkid;
+	dladm_ib_attr_t		ib_attr;
+	ibnex_ctl_query_hca_t	query_hca;
+	datadm_hca_entry_t	*hca;
+	struct lifreq		req;
+	datadm_ia_find_t	ia_find;
+	datadm_ia_entry_t	*ia_entry;
+
+	if ((dladm_name2info(ia_args->ia_dlh, linkname, &linkid, NULL, &class,
+	    NULL) != DLADM_STATUS_OK) ||
+	    (class != DATALINK_CLASS_PART) ||
+	    (dladm_part_info(ia_args->ia_dlh, linkid, &ib_attr,
+	    DLADM_OPT_ACTIVE) != DLADM_STATUS_OK)) {
+		return (B_FALSE);
+	}
+
+	(void) strlcpy(req.lifr_name, linkname, sizeof (req.lifr_name));
+	/*
+	 * we don't really need to know the ip address.
+	 * we just want to check if the device is plumbed
+	 * or not.
+	 */
+	if (ioctl(ia_args->ia_sock_fd_v4, SIOCGLIFADDR, (caddr_t)&req) != 0) {
+		/*
+		 * we try v6 if the v4 address isn't found.
+		 */
+		if (ioctl(ia_args->ia_sock_fd_v6, SIOCGLIFADDR,
+		    (caddr_t)&req) != 0)
+			return (B_FALSE);
+	}
+
+	bzero(&query_hca, sizeof (query_hca));
+	query_hca.hca_guid = ib_attr.dia_hca_guid;
+	if (ioctl(ia_args->ia_ibnex_fd, IBNEX_CTL_QUERY_HCA, &query_hca) == -1)
+		return (B_FALSE);
+
+	if ((hca = datadm_hca_lookup_by_name(ia_args->ia_hca_list,
+	    query_hca.hca_info.hca_driver_name)) == NULL)
+		return (B_FALSE);
+
+	ia_find.if_ia_name = linkname;
+	ia_find.if_ia_entry = NULL;
+	(void) datadm_walk_list(&hca->he_ia_list,
+	    (int (*)(datadm_entry_t *, void *))
+	    datadm_ia_entry_find, &ia_find);
+
+	if (ia_find.if_ia_entry == NULL) {
+		/*
+		 * we insert an ia entry only if
+		 * it is unique.
+		 */
+		ia_entry = datadm_alloc_ia_entry();
+		if (ia_entry != NULL) {
+			(void) strlcpy(ia_entry->iae_name, linkname,
+			    MAXLINKNAMELEN);
+			datadm_enqueue_entry(&hca->he_ia_list,
+			    (datadm_entry_t *)ia_entry);
+		}
+	}
+
+	return (B_FALSE);
+}
+
 /*
- * applies datadm_fill_ia_list on each hca_list element
+ * build ia lists for each hca_list element
  */
 static int
 datadm_build_ia_lists(datadm_list_t *hca_list)
 {
+	dladm_handle_t		dlh;
 	datadm_fill_ia_list_t	ia_args;
-	di_node_t		root_node;
-	int			retval = 0;
-	int			sv4, sv6;
+	int			rv = -1;
+	int			fd = -1;
+	int			sv4 = -1;
+	int			sv6 = -1;
 
-	root_node = di_init("/", DINFOCPYALL);
-	if (root_node == DI_NODE_NIL) {
-		perror("datadm: di_init");
+	if (dladm_open(&dlh) != DLADM_STATUS_OK)
 		return (-1);
+
+	if ((fd = open(IBNEX_DEVCTL_DEV, O_RDONLY)) < 0)
+		goto out;
+
+	if ((sv4 = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
+		perror("datadm: socket");
+		goto out;
 	}
-	sv4 = socket(AF_INET, SOCK_DGRAM, 0);
-	if (sv4 < 0) {
+
+	if ((sv6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
 		perror("datadm: socket");
-		di_fini(root_node);
-		return (-1);
+		goto out;
 	}
-	sv6 = socket(AF_INET6, SOCK_DGRAM, 0);
-	if (sv6 < 0) {
-		perror("datadm: socket");
-		di_fini(root_node);
-		return (-1);
-	}
-	ia_args.ia_root_node = root_node;
+
+	ia_args.ia_hca_list = hca_list;
+	ia_args.ia_dlh = dlh;
+	ia_args.ia_ibnex_fd = fd;
 	ia_args.ia_sock_fd_v4 = sv4;
 	ia_args.ia_sock_fd_v6 = sv6;
 
-	retval = datadm_walk_list(hca_list,
-	    (int (*)(datadm_entry_t *, void *))datadm_fill_ia_list, &ia_args);
+	dlpi_walk((boolean_t (*) (const char *, void *))datadm_add_plink,
+	    &ia_args, 0);
+	rv = 0;
 
-	(void) close(sv4);
-	(void) close(sv6);
-	di_fini(root_node);
-	return (retval);
+out:
+	if (sv4 != -1)
+		(void) close(sv4);
+	if (sv6 != -1)
+		(void) close(sv6);
+	if (fd != -1)
+		(void) close(fd);
+
+	dladm_close(dlh);
+	return (rv);
 }
 
 static int
@@ -1430,8 +1434,8 @@
 	int	retval;
 
 	retval = fprintf(outfile,
-	    "%s%d  %s%d.%d  %s  %s  %s  %s%d.%d  \"%s\"  \"%s%s%s\"\n",
-	    DATADM_IA_NAME, ia_entry->iae_devnum,
+	    "%s  %s%d.%d  %s  %s  %s  %s%d.%d  \"%s\"  \"%s%s%s\"\n",
+	    ia_entry->iae_name,
 	    (sp_entry->spe_api_version.dv_name ?
 	    sp_entry->spe_api_version.dv_name : ""),
 	    sp_entry->spe_api_version.dv_major,
--- a/usr/src/cmd/dladm/dladm.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/dladm/dladm.c	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <stdio.h>
@@ -55,6 +54,7 @@
 #include <libdlwlan.h>
 #include <libdlvlan.h>
 #include <libdlvnic.h>
+#include <libdlib.h>
 #include <libdlether.h>
 #include <libdliptun.h>
 #include <libdlsim.h>
@@ -66,6 +66,7 @@
 #include <libdlvnic.h>
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <sys/ib/ib_types.h>
 #include <sys/processor.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
@@ -155,6 +156,25 @@
 	ofmt_handle_t	vs_ofmt;
 } show_vnic_state_t;
 
+typedef struct show_part_state {
+	datalink_id_t	ps_over_id;
+	char		ps_part[MAXLINKNAMELEN];
+	boolean_t	ps_parsable;
+	boolean_t	ps_found;
+	dladm_status_t	ps_status;
+	uint32_t	ps_flags;
+	ofmt_handle_t	ps_ofmt;
+} show_part_state_t;
+
+typedef struct show_ib_state {
+	datalink_id_t	is_link_id;
+	char		is_link[MAXLINKNAMELEN];
+	boolean_t	is_parsable;
+	dladm_status_t	is_status;
+	uint32_t	is_flags;
+	ofmt_handle_t	is_ofmt;
+} show_ib_state_t;
+
 typedef struct show_usage_state_s {
 	boolean_t	us_plot;
 	boolean_t	us_parsable;
@@ -189,6 +209,8 @@
 static cmdfunc_t do_show_ether;
 static cmdfunc_t do_create_vnic, do_delete_vnic, do_show_vnic;
 static cmdfunc_t do_up_vnic;
+static cmdfunc_t do_create_part, do_delete_part, do_show_part, do_show_ib;
+static cmdfunc_t do_up_part;
 static cmdfunc_t do_create_etherstub, do_delete_etherstub, do_show_etherstub;
 static cmdfunc_t do_create_simnet, do_modify_simnet;
 static cmdfunc_t do_delete_simnet, do_show_simnet, do_up_simnet;
@@ -200,6 +222,8 @@
 
 static void 	do_up_vnic_common(int, char **, const char *, boolean_t);
 
+static int show_part(dladm_handle_t, datalink_id_t, void *);
+
 static void	altroot_cmd(char *, int, char **);
 static int	show_linkprop_onelink(dladm_handle_t, datalink_id_t, void *);
 
@@ -327,6 +351,17 @@
 	    "    show-vnic        [-pP] [-l <link>] [-s [-i <interval>]] "
 	    "[<link>]\n"						},
 	{ "up-vnic",		do_up_vnic,		NULL		},
+	{ "create-part",	do_create_part,
+	    "    create-part      [-t] [-f] -l <link> [-P <pkey>]\n"
+	    "\t\t     [-R <root-dir>] <part-link>"			},
+	{ "delete-part",	do_delete_part,
+	    "    delete-part      [-t] [-R <root-dir>] <part-link>"},
+	{ "show-part",		do_show_part,
+	    "    show-part        [-pP] [-o <field>,...][-l <linkover>]\n"
+	    "\t\t     [<part-link>]"		},
+	{ "show-ib",		do_show_ib,
+	    "    show-ib          [-p] [-o <field>,...] [<link>]\n"	},
+	{ "up-part",		do_up_part,		NULL		},
 	{ "create-etherstub",	do_create_etherstub,
 	    "    create-etherstub [-t] <link>"				},
 	{ "delete-etherstub",	do_delete_etherstub,
@@ -483,6 +518,25 @@
 	{ 0, 0, 0, 0 }
 };
 
+static const struct option part_lopts[] = {
+	{"temporary",	no_argument,		0, 't'  },
+	{"pkey",	required_argument,	0, 'P'  },
+	{"link",	required_argument,	0, 'l'  },
+	{"force",	no_argument,		0, 'f'  },
+	{"root-dir",	required_argument,	0, 'R'  },
+	{"prop",	required_argument,	0, 'p'  },
+	{ 0, 0, 0, 0 }
+};
+
+static const struct option show_part_lopts[] = {
+	{"parsable",	no_argument,		0, 'p'  },
+	{"parseable",	no_argument,		0, 'p'  },
+	{"link",	required_argument,	0, 'l'  },
+	{"persistent",	no_argument,		0, 'P'  },
+	{"output",	required_argument,	0, 'o'  },
+	{ 0, 0, 0, 0 }
+};
+
 static const struct option etherstub_lopts[] = {
 	{"temporary",	no_argument,		0, 't'	},
 	{"root-dir",	required_argument,	0, 'R'	},
@@ -975,6 +1029,59 @@
 ;
 
 /*
+ * structures for 'dladm show-ib'
+ */
+typedef struct ib_fields_buf_s
+{
+	char ib_link[DLPI_LINKNAME_MAX];
+	char ib_hcaguid[17];
+	char ib_portguid[17];
+	char ib_portnum[4];
+	char ib_state[6];
+	char ib_pkeys[MAXPKEYSTRSZ];
+} ib_fields_buf_t;
+
+static const ofmt_field_t ib_fields[] = {
+{ "LINK",		13,
+	offsetof(ib_fields_buf_t, ib_link),	print_default_cb},
+{ "HCAGUID",		IBGUIDSTRLEN,
+	offsetof(ib_fields_buf_t, ib_hcaguid),	print_default_cb},
+{ "PORTGUID",		IBGUIDSTRLEN,
+	offsetof(ib_fields_buf_t, ib_portguid),	print_default_cb},
+{ "PORT",		IBPORTSTRLEN,
+	offsetof(ib_fields_buf_t, ib_portnum), print_default_cb},
+{ "STATE",		7,
+	offsetof(ib_fields_buf_t, ib_state), print_default_cb},
+{ "PKEYS",	18,
+	offsetof(ib_fields_buf_t, ib_pkeys), print_default_cb},
+{ NULL,			0, 0, NULL}};
+
+/*
+ * structures for 'dladm show-part'
+ */
+typedef struct part_fields_buf_s
+{
+	char part_link[DLPI_LINKNAME_MAX];
+	char part_pkey[5];
+	char part_over[DLPI_LINKNAME_MAX];
+	char part_state[8];
+	char part_flags[5];
+} part_fields_buf_t;
+
+static const ofmt_field_t part_fields[] = {
+{ "LINK",		13,
+	offsetof(part_fields_buf_t, part_link),	print_default_cb},
+{ "PKEY",		MAXPKEYLEN,
+	offsetof(part_fields_buf_t, part_pkey),	print_default_cb},
+{ "OVER",		13,
+	offsetof(part_fields_buf_t, part_over), print_default_cb},
+{ "STATE",		9,
+	offsetof(part_fields_buf_t, part_state), print_default_cb},
+{ "FLAGS",	5,
+	offsetof(part_fields_buf_t, part_flags), print_default_cb},
+{ NULL,			0, 0, NULL}};
+
+/*
  * structures for 'dladm show-simnet'
  */
 typedef struct simnet_fields_buf_s
@@ -2597,6 +2704,22 @@
 			(void) strcpy(lbuf->link_over, "?");
 		break;
 	}
+
+	case DATALINK_CLASS_PART: {
+		dladm_part_attr_t	pinfo;
+
+		if (dladm_part_info(handle, linkid, &pinfo, flags) !=
+		    DLADM_STATUS_OK) {
+			(void) strcpy(lbuf->link_over, "?");
+			break;
+		}
+		if (dladm_datalink_id2info(handle, pinfo.dia_physlinkid, NULL,
+		    NULL, NULL, lbuf->link_over, sizeof (lbuf->link_over)) !=
+		    DLADM_STATUS_OK)
+			(void) strcpy(lbuf->link_over, "?");
+		break;
+	}
+
 	case DATALINK_CLASS_BRIDGE: {
 		datalink_id_t *dlp;
 		uint_t i, nports;
@@ -8951,3 +9074,611 @@
 		warn(buf);
 	}
 }
+
+/*
+ * Called from the walker dladm_walk_datalink_id() for each IB partition to
+ * display IB partition specific information.
+ */
+static dladm_status_t
+print_part(show_part_state_t *state, datalink_id_t linkid)
+{
+	dladm_part_attr_t	attr;
+	dladm_status_t		status;
+	dladm_conf_t		conf;
+	char			part_over[MAXLINKNAMELEN];
+	char			part_name[MAXLINKNAMELEN];
+	part_fields_buf_t	pbuf;
+	boolean_t		force_in_conf = B_FALSE;
+
+	/*
+	 * Get the information about the IB partition from the partition
+	 * datlink ID 'linkid'.
+	 */
+	if ((status = dladm_part_info(handle, linkid, &attr, state->ps_flags))
+	    != DLADM_STATUS_OK)
+		return (status);
+
+	/*
+	 * If an IB Phys link name was provided on the command line we have
+	 * the Phys link's datalink ID in the ps_over_id field of the state
+	 * structure. Proceed only if the IB partition represented by 'linkid'
+	 * was created over Phys link denoted by ps_over_id. The
+	 * 'dia_physlinkid' field of dladm_part_attr_t represents the IB Phys
+	 * link over which the partition was created.
+	 */
+	if (state->ps_over_id != DATALINK_ALL_LINKID)
+		if (state->ps_over_id != attr.dia_physlinkid)
+			return (DLADM_STATUS_OK);
+
+	/*
+	 * The linkid argument passed to this function is the datalink ID
+	 * of the IB Partition. Get the partitions name from this linkid.
+	 */
+	if (dladm_datalink_id2info(handle, linkid, NULL, NULL,
+	    NULL, part_name, sizeof (part_name)) != DLADM_STATUS_OK)
+		return (DLADM_STATUS_BADARG);
+
+	bzero(part_over, sizeof (part_over));
+
+	/*
+	 * The 'dia_physlinkid' field contains the datalink ID of the IB Phys
+	 * link over which the partition was created. Use this linkid to get the
+	 * linkover field.
+	 */
+	if (dladm_datalink_id2info(handle, attr.dia_physlinkid, NULL, NULL,
+	    NULL, part_over, sizeof (part_over)) != DLADM_STATUS_OK)
+		(void) sprintf(part_over, "?");
+	state->ps_found = B_TRUE;
+
+	/*
+	 * Read the FFORCE field from this datalink's persistent configuration
+	 * database line to determine if this datalink was created forcibly.
+	 * If this datalink is a temporary datalink, then it will not have an
+	 * entry in the persistent configuration, so check if force create flag
+	 * is set in the partition attributes.
+	 *
+	 * We need this two level check since persistent partitions brought up
+	 * by up-part during boot will have force create flag always set, since
+	 * we want up-part to always succeed even if the port is currently down
+	 * or P_Key is not yet available in the subnet.
+	 */
+	if ((status = dladm_read_conf(handle, linkid, &conf)) ==
+	    DLADM_STATUS_OK) {
+		(void) dladm_get_conf_field(handle, conf, FFORCE,
+		    &force_in_conf, sizeof (boolean_t));
+		dladm_destroy_conf(handle, conf);
+	} else if (status == DLADM_STATUS_NOTFOUND) {
+		/*
+		 * for a temp link the force create flag will determine
+		 * whether it was created with force flag.
+		 */
+		force_in_conf = ((attr.dia_flags & DLADM_IBPART_FORCE_CREATE)
+		    != 0);
+	}
+
+	(void) snprintf(pbuf.part_link, sizeof (pbuf.part_link),
+	    "%s", part_name);
+
+	(void) snprintf(pbuf.part_over, sizeof (pbuf.part_over),
+	    "%s", part_over);
+
+	(void) snprintf(pbuf.part_pkey, sizeof (pbuf.part_pkey),
+	    "%X", attr.dia_pkey);
+
+	(void) get_linkstate(pbuf.part_link, B_TRUE, pbuf.part_state);
+
+	(void) snprintf(pbuf.part_flags, sizeof (pbuf.part_flags),
+	    "%c----", force_in_conf ? 'f' : '-');
+
+	ofmt_print(state->ps_ofmt, &pbuf);
+
+	return (DLADM_STATUS_OK);
+}
+
+/* ARGSUSED */
+static int
+show_part(dladm_handle_t dh, datalink_id_t linkid, void *arg)
+{
+	((show_part_state_t *)arg)->ps_status = print_part(arg, linkid);
+	return (DLADM_WALK_CONTINUE);
+}
+
+/*
+ * Show the information about the IB partition objects.
+ */
+static void
+do_show_part(int argc, char *argv[], const char *use)
+{
+	int			option;
+	boolean_t		l_arg = B_FALSE;
+	uint32_t		flags = DLADM_OPT_ACTIVE;
+	datalink_id_t		linkid = DATALINK_ALL_LINKID;
+	datalink_id_t		over_linkid = DATALINK_ALL_LINKID;
+	char			over_link[MAXLINKNAMELEN];
+	show_part_state_t	state;
+	dladm_status_t		status;
+	boolean_t		o_arg = B_FALSE;
+	char			*fields_str = NULL;
+	ofmt_handle_t		ofmt;
+	ofmt_status_t		oferr;
+	uint_t			ofmtflags = 0;
+
+	bzero(&state, sizeof (state));
+	opterr = 0;
+	while ((option = getopt_long(argc, argv, ":pPl:o:", show_part_lopts,
+	    NULL)) != -1) {
+		switch (option) {
+		case 'p':
+			state.ps_parsable = B_TRUE;
+			break;
+		case 'P':
+			flags = DLADM_OPT_PERSIST;
+			break;
+		case 'l':
+			/*
+			 * The data link ID of the IB Phys link. When this
+			 * argument is provided we list only the partition
+			 * objects created over this IB Phys link.
+			 */
+			if (strlcpy(over_link, optarg, MAXLINKNAMELEN) >=
+			    MAXLINKNAMELEN)
+				die("link name too long");
+
+			l_arg = B_TRUE;
+			break;
+		case 'o':
+			o_arg = B_TRUE;
+			fields_str = optarg;
+			break;
+		default:
+			die_opterr(optopt, option, use);
+		}
+	}
+
+	/*
+	 * Get the partition ID (optional last argument).
+	 */
+	if (optind == (argc - 1)) {
+		status = dladm_name2info(handle, argv[optind], &linkid, NULL,
+		    NULL, NULL);
+		if (status != DLADM_STATUS_OK) {
+			die_dlerr(status, "invalid partition link name '%s'",
+			    argv[optind]);
+		}
+		(void) strlcpy(state.ps_part, argv[optind], MAXLINKNAMELEN);
+	} else if (optind != argc) {
+		usage();
+	}
+
+	if (state.ps_parsable && !o_arg)
+		die("-p requires -o");
+
+	/*
+	 * If an IB Phys link name was provided as an argument, then get its
+	 * datalink ID.
+	 */
+	if (l_arg) {
+		status = dladm_name2info(handle, over_link, &over_linkid, NULL,
+		    NULL, NULL);
+		if (status != DLADM_STATUS_OK) {
+			die_dlerr(status, "invalid link name '%s'", over_link);
+		}
+	}
+
+	state.ps_over_id = over_linkid; /* IB Phys link ID */
+	state.ps_found = B_FALSE;
+	state.ps_flags = flags;
+
+	if (state.ps_parsable)
+		ofmtflags |= OFMT_PARSABLE;
+	oferr = ofmt_open(fields_str, part_fields, ofmtflags, 0, &ofmt);
+	dladm_ofmt_check(oferr, state.ps_parsable, ofmt);
+	state.ps_ofmt = ofmt;
+
+	/*
+	 * If a specific IB partition name was not provided as an argument,
+	 * walk all the datalinks and display the information for all
+	 * IB partitions. If IB Phys link was provided limit it to only
+	 * IB partitions created over that IB Phys link.
+	 */
+	if (linkid == DATALINK_ALL_LINKID) {
+		(void) dladm_walk_datalink_id(show_part, handle, &state,
+		    DATALINK_CLASS_PART, DATALINK_ANY_MEDIATYPE, flags);
+	} else {
+		(void) show_part(handle, linkid, &state);
+		if (state.ps_status != DLADM_STATUS_OK) {
+			ofmt_close(ofmt);
+			die_dlerr(state.ps_status, "failed to show IB partition"
+			    " '%s'", state.ps_part);
+		}
+	}
+	ofmt_close(ofmt);
+}
+
+
+/*
+ * Called from the walker dladm_walk_datalink_id() for each IB Phys link to
+ * display IB specific information for these Phys links.
+ */
+static dladm_status_t
+print_ib(show_ib_state_t *state, datalink_id_t phys_linkid)
+{
+	dladm_ib_attr_t		attr;
+	dladm_status_t		status;
+	char			linkname[MAXLINKNAMELEN];
+	char			pkeystr[MAXPKEYLEN];
+	int			i;
+	ib_fields_buf_t		ibuf;
+
+	bzero(&attr, sizeof (attr));
+
+	/*
+	 * Get the attributes of the IB Phys link from active/Persistent config
+	 * based on the flag passed.
+	 */
+	if ((status = dladm_ib_info(handle, phys_linkid, &attr,
+	    state->is_flags)) != DLADM_STATUS_OK)
+		return (status);
+
+	if ((state->is_link_id != DATALINK_ALL_LINKID) && (state->is_link_id
+	    != attr.dia_physlinkid)) {
+		dladm_free_ib_info(&attr);
+		return (DLADM_STATUS_OK);
+	}
+
+	/*
+	 * Get the data link name for the phys_linkid. If we are doing show-ib
+	 * for all IB Phys links, we have only the datalink IDs not the
+	 * datalink name.
+	 */
+	if (dladm_datalink_id2info(handle, phys_linkid, NULL, NULL, NULL,
+	    linkname, MAXLINKNAMELEN) != DLADM_STATUS_OK)
+		return (status);
+
+	(void) snprintf(ibuf.ib_link, sizeof (ibuf.ib_link),
+	    "%s", linkname);
+
+	(void) snprintf(ibuf.ib_portnum, sizeof (ibuf.ib_portnum),
+	    "%d", attr.dia_portnum);
+
+	(void) snprintf(ibuf.ib_hcaguid, sizeof (ibuf.ib_hcaguid),
+	    "%llX", attr.dia_hca_guid);
+
+	(void) snprintf(ibuf.ib_portguid, sizeof (ibuf.ib_portguid),
+	    "%llX", attr.dia_port_guid);
+
+	(void) get_linkstate(linkname, B_TRUE, ibuf.ib_state);
+
+	/*
+	 * Create a comma separated list of pkeys from the pkey table returned
+	 * by the IP over IB driver instance.
+	 */
+	bzero(ibuf.ib_pkeys, attr.dia_port_pkey_tbl_sz * sizeof (ib_pkey_t));
+	for (i = 0; i < attr.dia_port_pkey_tbl_sz; i++) {
+		if (attr.dia_port_pkeys[i] != IB_PKEY_INVALID_FULL &&
+		    attr.dia_port_pkeys[i] != IB_PKEY_INVALID_LIMITED) {
+			if (i == 0)
+				(void) snprintf(pkeystr, MAXPKEYLEN, "%X",
+				    attr.dia_port_pkeys[i]);
+			else
+				(void) snprintf(pkeystr, MAXPKEYLEN, ",%X",
+				    attr.dia_port_pkeys[i]);
+			(void) strlcat(ibuf.ib_pkeys, pkeystr, MAXPKEYSTRSZ);
+		}
+	}
+
+	dladm_free_ib_info(&attr);
+
+	ofmt_print(state->is_ofmt, &ibuf);
+
+	return (DLADM_STATUS_OK);
+}
+
+/* ARGSUSED */
+static int
+show_ib(dladm_handle_t dh, datalink_id_t linkid, void *arg)
+{
+	((show_ib_state_t *)arg)->is_status = print_ib(arg, linkid);
+	return (DLADM_WALK_CONTINUE);
+}
+
+/*
+ * Show the properties of one/all IB Phys links. This is different from
+ * show-phys command since this will display IB specific information about the
+ * Phys link like, HCA GUID, PORT GUID, PKEYS active for this port etc.
+ */
+static void
+do_show_ib(int argc, char *argv[], const char *use)
+{
+	int			option;
+	uint32_t		flags = DLADM_OPT_ACTIVE;
+	datalink_id_t		linkid = DATALINK_ALL_LINKID;
+	show_ib_state_t		state;
+	dladm_status_t		status;
+	boolean_t		o_arg = B_FALSE;
+	char			*fields_str = NULL;
+	ofmt_handle_t		ofmt;
+	ofmt_status_t		oferr;
+	uint_t			ofmtflags = 0;
+
+	bzero(&state, sizeof (state));
+	opterr = 0;
+	while ((option = getopt_long(argc, argv, ":po:", show_lopts,
+	    NULL)) != -1) {
+		switch (option) {
+		case 'p':
+			state.is_parsable = B_TRUE;
+			break;
+		case 'o':
+			o_arg = B_TRUE;
+			fields_str = optarg;
+			break;
+		default:
+			die_opterr(optopt, option, use);
+		}
+	}
+
+	/* get IB Phys link ID (optional last argument) */
+	if (optind == (argc - 1)) {
+		status = dladm_name2info(handle, argv[optind], &linkid, NULL,
+		    NULL, NULL);
+		if (status != DLADM_STATUS_OK) {
+			die_dlerr(status, "invalid IB port name '%s'",
+			    argv[optind]);
+		}
+		(void) strlcpy(state.is_link, argv[optind], MAXLINKNAMELEN);
+	} else if (optind != argc) {
+		usage();
+	}
+
+	if (state.is_parsable && !o_arg)
+		die("-p requires -o");
+
+	/*
+	 * linkid is the data link ID of the IB Phys link. By default it will
+	 * be DATALINK_ALL_LINKID.
+	 */
+	state.is_link_id = linkid;
+	state.is_flags = flags;
+
+	if (state.is_parsable)
+		ofmtflags |= OFMT_PARSABLE;
+	oferr = ofmt_open(fields_str, ib_fields, ofmtflags, 0, &ofmt);
+	dladm_ofmt_check(oferr, state.is_parsable, ofmt);
+	state.is_ofmt = ofmt;
+
+	/*
+	 * If we are going to display the information for all IB Phys links
+	 * then we'll walk through all the datalinks for datalinks of Phys
+	 * class and media type IB.
+	 */
+	if (linkid == DATALINK_ALL_LINKID) {
+		(void) dladm_walk_datalink_id(show_ib, handle, &state,
+		    DATALINK_CLASS_PHYS, DL_IB, flags);
+	} else {
+		/*
+		 * We need to display the information only for the IB phys link
+		 * linkid. Call show_ib for this link.
+		 */
+		(void) show_ib(handle, linkid, &state);
+		if (state.is_status != DLADM_STATUS_OK) {
+			ofmt_close(ofmt);
+			die_dlerr(state.is_status, "failed to show IB Phys link"
+			    " '%s'", state.is_link);
+		}
+	}
+	ofmt_close(ofmt);
+}
+
+/*
+ * Create an IP over Infiniband partition object over an IB Phys link. The IB
+ * Phys link is associated with an Infiniband HCA port. The IB partition object
+ * is created over a port, pkey combination. This partition object represents
+ * an instance of IP over IB interface.
+ */
+/* ARGSUSED */
+static void
+do_create_part(int argc, char *argv[], const char *use)
+{
+	int		status, option;
+	int		flags = DLADM_OPT_ACTIVE | DLADM_OPT_PERSIST;
+	char		*pname;
+	char		*l_arg = NULL;
+	char		*altroot = NULL;
+	datalink_id_t	physlinkid = 0;
+	datalink_id_t	partlinkid = 0;
+	ib_pkey_t	pkey = 0;
+	char		*endp = NULL;
+	char		propstr[DLADM_STRSIZE];
+	dladm_arg_list_t	*proplist = NULL;
+
+	propstr[0] = '\0';
+	while ((option = getopt_long(argc, argv, ":tfl:P:R:p:",
+	    part_lopts, NULL)) != -1) {
+		switch (option) {
+		case 't':
+			/*
+			 * Create a temporary IB partition object. This
+			 * instance is not entered into the persistent database
+			 * so it will not be recreated automatically on a
+			 * reboot.
+			 */
+			flags &= ~DLADM_OPT_PERSIST;
+			break;
+		case 'l':
+			/*
+			 * The IB phys link over which the partition object will
+			 * be created.
+			 */
+			l_arg = optarg;
+			break;
+		case 'R':
+			altroot = optarg;
+			break;
+		case 'p':
+			(void) strlcat(propstr, optarg, DLADM_STRSIZE);
+			if (strlcat(propstr, ",", DLADM_STRSIZE) >=
+			    DLADM_STRSIZE)
+				die("property list too long '%s'", propstr);
+			break;
+		case 'P':
+			/*
+			 * The P_Key for the port, pkey tuple of the partition
+			 * object. This P_Key should exist in the IB subnet.
+			 * The partition creation for a non-existent P_Key will
+			 * fail unless the -f option is used.
+			 *
+			 * The P_Key is expected to be a hexadecimal number.
+			 */
+			pkey = strtoul(optarg, &endp, 16);
+			if (errno == ERANGE || pkey > USHRT_MAX ||
+			    *endp != '\0')
+				die("Invalid pkey");
+			break;
+		case 'f':
+			flags |= DLADM_OPT_FORCE;
+			break;
+		default:
+			die_opterr(optopt, option, use);
+			break;
+		}
+	}
+
+	/* check required options */
+	if (!l_arg)
+		usage();
+
+	/* the partition name is a required operand */
+	if (optind != (argc - 1))
+		usage();
+
+	pname = argv[argc - 1];
+
+	/*
+	 * Verify that the partition object's name is in the valid link name
+	 * format.
+	 */
+	if (!dladm_valid_linkname(pname))
+		die("Invalid link name '%s'", pname);
+
+	/* pkey is a mandatory argument */
+	if (pkey == 0)
+		usage();
+
+	if (altroot != NULL)
+		altroot_cmd(altroot, argc, argv);
+
+	/*
+	 * Get the data link id of the IB Phys link over which we will be
+	 * creating partition object.
+	 */
+	if (dladm_name2info(handle, l_arg,
+	    &physlinkid, NULL, NULL, NULL) != DLADM_STATUS_OK)
+		die("invalid link name '%s'", l_arg);
+
+	/*
+	 * parse the property list provided with -p option.
+	 */
+	if (dladm_parse_link_props(propstr, &proplist, B_FALSE)
+	    != DLADM_STATUS_OK)
+		die("invalid IB partition property");
+
+	/*
+	 * Call the library routine to create the partition object.
+	 */
+	status = dladm_part_create(handle, physlinkid, pkey, flags, pname,
+	    &partlinkid, proplist);
+	if (status != DLADM_STATUS_OK)
+		die_dlerr(status,
+		    "partition %x creation over %s failed", pkey, l_arg);
+}
+
+/*
+ * Delete an IP over Infiniband partition object. The partition object should
+ * be unplumbed before attempting the delete.
+ */
+static void
+do_delete_part(int argc, char *argv[], const char *use)
+{
+	int option, flags = DLADM_OPT_ACTIVE | DLADM_OPT_PERSIST;
+	int status;
+	char *altroot = NULL;
+	datalink_id_t	partid;
+
+	opterr = 0;
+	while ((option = getopt_long(argc, argv, "R:t", part_lopts,
+	    NULL)) != -1) {
+		switch (option) {
+		case 't':
+			flags &= ~DLADM_OPT_PERSIST;
+			break;
+		case 'R':
+			altroot = optarg;
+			break;
+		default:
+			die_opterr(optopt, option, use);
+		}
+	}
+
+	/* get partition name (required last argument) */
+	if (optind != (argc - 1))
+		usage();
+
+	if (altroot != NULL)
+		altroot_cmd(altroot, argc, argv);
+
+	/*
+	 * Get the data link id of the partition object given the partition
+	 * name.
+	 */
+	status = dladm_name2info(handle, argv[optind], &partid, NULL, NULL,
+	    NULL);
+	if (status != DLADM_STATUS_OK)
+		die("invalid link name '%s'", argv[optind]);
+
+	/*
+	 * Call the library routine to delete the IB partition. This will
+	 * result in the IB partition object and all its resources getting
+	 * deleted.
+	 */
+	status = dladm_part_delete(handle, partid, flags);
+	if (status != DLADM_STATUS_OK)
+		die_dlerr(status, "%s: partition deletion failed",
+		    argv[optind]);
+}
+
+/*
+ * Bring up all or one IB partition already present in the persistent database
+ * but not active yet.
+ *
+ * This sub-command is used during the system boot up to bring up all IB
+ * partitions present in the persistent database. This is similar to a
+ * create partition except that, the partitions are always created even if the
+ * HCA port is down or P_Key is not present in the IB subnet. This is similar
+ * to using the 'force' option while creating the partition except that the 'f'
+ * flag will be set in the flags field only if the create-part for this command
+ * was called with '-f' option.
+ */
+/* ARGSUSED */
+static void
+do_up_part(int argc, char *argv[], const char *use)
+{
+	datalink_id_t	partid = DATALINK_ALL_LINKID;
+	dladm_status_t status;
+
+	/*
+	 * If a partition name was passed as an argument, get its data link
+	 * id. By default we'll attempt to bring up all IB partition data
+	 * links.
+	 */
+	if (argc == 2) {
+		status = dladm_name2info(handle, argv[argc - 1], &partid, NULL,
+		    NULL, NULL);
+		if (status != DLADM_STATUS_OK)
+			return;
+	} else if (argc > 2) {
+		usage();
+	}
+
+	(void) dladm_part_up(handle, partid, 0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/ibd_upgrade/Makefile	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,61 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+MANIFEST = ibd-post-upgrade.xml
+SVCMETHOD = ibd-post-upgrade
+
+include ../Makefile.cmd
+
+SHFILES = ibd_upgrade
+IBD_DELETE_LINK = ibd_delete_link
+OBJS = $(IBD_DELETE_LINK).o
+SRCS = $(OBJS:%.o=%.c)
+
+CLOBBERFILES = $(SHFILES) $(IBD_DELETE_LINK) $(OBJS)
+
+ROOTMANIFESTDIR = $(ROOTSVCNETWORK)
+LDLIBS += -ldladm
+
+.KEEP_STATE:
+
+all: $(IBD_DELETE_LINK) $(SHFILES)
+
+install: all					\
+	$(ROOTSBIN)/$(IBD_DELETE_LINK)		\
+	$(ROOTMANIFEST)				\
+	$(ROOTSVCMETHOD)			\
+	$(ROOTSBIN)/ibd_upgrade
+
+check:	$(CHKMANIFEST)
+	$(CSTYLE) -pP $(SRCS)
+
+clean:
+
+lint:	lint_SRCS
+
+include ../Makefile.targ
+
+$(ROOTSBIN)/%: %
+	$(INS.file)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/ibd_upgrade/ibd-post-upgrade	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,47 @@
+#!/sbin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+. /lib/svc/share/smf_include.sh
+
+#
+# Actual work of ibd upgrade is done in network/physical service.
+# Here we mearly set the property to indicate that the ibd upgrade has been
+# done. Setting of the property can not be done in network/physical service
+# because the file system is read-only at that point.
+#
+if smf_is_globalzone; then
+	NETPHYS=svc:/network/physical:default
+	PROP=ibd/ibd_upgraded
+
+	upgrade_done=`/bin/svcprop -c -p $PROP $NETPHYS 2> /dev/null`
+
+	if [ "$upgrade_done" != "true" ]; then
+		/usr/sbin/svccfg -s $NETPHYS addpg ibd system 2> /dev/null
+		/usr/sbin/svccfg -s $NETPHYS setprop $PROP = boolean: true
+	fi
+fi
+
+/usr/sbin/svcadm disable $SMF_FMRI
+exit $SMF_EXIT_OK
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/ibd_upgrade/ibd-post-upgrade.xml	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,92 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+<!--
+ Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+	NOTE:  This service manifest is not editable; its contents will
+	be overwritten by package or patch operations, including
+	operating system upgrade.  Make customizations in a different
+	file.
+-->
+
+<service_bundle type='manifest' name='SUNWipoib:ibd-post-upgrade'>
+
+<service
+	name='network/ibd-post-upgrade'
+	type='service'
+	version='1'>
+
+	<create_default_instance enabled='true' />
+
+	<single_instance />
+
+	<dependency
+		name='network-physical'
+		type='service'
+		grouping='require_all'
+		restart_on='none'>
+		<service_fmri value='svc:/network/physical' />
+	</dependency>
+
+	<dependency
+		name='filesystem-minimal'
+		type='service'
+		grouping='require_all'
+		restart_on='none'>
+		<service_fmri value='svc:/system/filesystem/minimal' />
+	</dependency>
+
+	<exec_method
+		type='method'
+		name='start'
+		exec='/lib/svc/method/ibd-post-upgrade'
+		timeout_seconds='0' />
+
+	<exec_method
+		type='method'
+		name='stop'
+		exec=':true'
+		timeout_seconds='0' />
+
+	<property_group name='startd' type='framework'>
+		<propval name='duration' type='astring' value='transient' />
+	</property_group>
+
+	<stability value='Unstable' />
+
+	<template>
+		<common_name>
+			<loctext xml:lang='C'>
+				ibd upgrade
+			</loctext>
+		</common_name>
+
+		<documentation>
+			<manpage
+				title='ibp'
+				section='7D'
+				manpath='/usr/share/man' />
+		</documentation>
+	</template>
+</service>
+
+</service_bundle>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/ibd_upgrade/ibd_delete_link.c	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,151 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <door.h>
+#include <errno.h>
+#include <strings.h>
+#include <sys/mman.h>
+#include <libdladm.h>
+#include <libdlib.h>
+#include <libdllink.h>
+
+extern dladm_status_t	dladm_door_fd(dladm_handle_t, int *);
+
+static dladm_status_t
+ibd_dladm_door_call(dladm_handle_t handle, void *arg, size_t asize, void *rbuf,
+    size_t rsize)
+{
+	door_arg_t	darg;
+	int		door_fd;
+	dladm_status_t	status = DLADM_STATUS_OK;
+
+	darg.data_ptr	= arg;
+	darg.data_size	= asize;
+	darg.desc_ptr	= NULL;
+	darg.desc_num	= 0;
+	darg.rbuf	= rbuf;
+	darg.rsize	= rsize;
+
+	/* The door descriptor is opened if it isn't already */
+	if ((status = dladm_door_fd(handle, &door_fd)) != DLADM_STATUS_OK)
+		return (status);
+
+	if (door_call(door_fd, &darg) == -1)
+		return (DLADM_STATUS_FAILED);
+
+	if (darg.rbuf != rbuf) {
+		/*
+		 * The size of the input rbuf is not big enough so that
+		 * the door allocate the rbuf itself. In this case, simply
+		 * think something wrong with the door call.
+		 */
+		(void) munmap(darg.rbuf, darg.rsize);
+		return (DLADM_STATUS_TOOSMALL);
+	}
+
+	if (darg.rsize != rsize)
+		return (DLADM_STATUS_FAILED);
+
+	if ((((dlmgmt_retval_t *)rbuf)->lr_err) == 0)
+		return (DLADM_STATUS_OK);
+	else
+		return (DLADM_STATUS_FAILED);
+}
+
+static int
+ibd_delete_link(dladm_handle_t dlh, char *link)
+{
+	dlmgmt_door_getlinkid_t		getlinkid;
+	dlmgmt_getlinkid_retval_t	retval;
+	datalink_id_t			linkid;
+	dladm_status_t			status;
+	char				errmsg[DLADM_STRSIZE];
+
+	getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
+	(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+
+	if ((status = ibd_dladm_door_call(dlh, &getlinkid, sizeof (getlinkid),
+	    &retval, sizeof (retval))) != DLADM_STATUS_OK) {
+		(void) fprintf(stderr,
+		    "dladm_door_call failed: %s; linkname = %s\n",
+		    dladm_status2str(status, errmsg), link);
+		return (status);
+	}
+
+	if (retval.lr_class != DATALINK_CLASS_PHYS) {
+		(void) fprintf(stderr,
+		    "Not a physical link: linkname = %s, class = 0x%x\n",
+		    link, (uint_t)retval.lr_class);
+		return (status);
+	}
+
+	linkid = retval.lr_linkid;
+
+	if ((status = dladm_remove_conf(dlh, linkid)) != DLADM_STATUS_OK) {
+		(void) fprintf(stderr, "dladm_remove_conf failed: %s\n",
+		    dladm_status2str(status, errmsg));
+		return (status);
+	}
+
+	if ((status = dladm_destroy_datalink_id(dlh, linkid,
+	    DLADM_OPT_ACTIVE | DLADM_OPT_PERSIST)) != DLADM_STATUS_OK) {
+		(void) fprintf(stderr, "dladm_destroy_datalink_id failed: %s\n",
+		    dladm_status2str(status, errmsg));
+	}
+
+	return (status);
+}
+
+int
+main(int argc, char *argv[])
+{
+	dladm_handle_t	dlh;
+	int		i;
+	dladm_status_t	status;
+	char		errmsg[DLADM_STRSIZE];
+
+	if (argc < 2) {
+		(void) fprintf(stderr,
+		    "Usage: ibd_delete_link linkname ...\n");
+		return (2);
+	}
+
+	if ((status = dladm_open(&dlh)) != DLADM_STATUS_OK) {
+		(void) fprintf(stderr, "Failed to open dladm handle: %s\n",
+		    dladm_status2str(status, errmsg));
+		return (1);
+	}
+
+	for (i = 1; i < argc; i++) {
+		if (ibd_delete_link(dlh, argv[i]) != DLADM_STATUS_OK) {
+			dladm_close(dlh);
+			return (1);
+		}
+	}
+
+	dladm_close(dlh);
+	return (0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/ibd_upgrade/ibd_upgrade.sh	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,159 @@
+#!/sbin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+#
+
+PATH=/sbin:/bin
+ORIGIFS="${IFS}"
+USAGE="Usage: ibd_upgrade [-v]"
+DRVCONF=/kernel/drv/ibp.conf.old
+
+#
+# split device path into path components
+#
+split_path_components()
+{
+	hca_path=
+	node_name=
+	port=
+	pkey=
+	service=
+	partition_name=
+
+	hca_path="/dev/`dirname $device_path`"
+	bname=`basename $device_path`
+	IFS=":"
+	set -- $bname
+	node_at_addr=$1
+	partition_name=$2
+	IFS="@"
+	set -- $node_at_addr
+	node_name=$1
+	IFS=","
+	set -- $2
+	port=$1
+	pkey=0x$2
+	service=$3
+
+	IFS="${ORIGIFS}"
+}
+
+do_cmd()
+{
+	if [ $verbose -eq 1 ]; then
+		echo "$1"
+	fi
+	$1
+}
+
+process_rc_mode()
+{
+	device=$1
+
+	#
+	# Get the instance number of ibd
+	# Device name format would be ibd#, 
+	#
+	IFS="d"
+	set -- ${device}
+	IFS="${ORIGIFS}"
+
+	if [ "$1" != "ib" ]; then
+		return
+	fi
+
+	inst=$2
+
+	IFS=","
+	set -- ${enable_rc}
+	IFS="${ORIGIFS}"
+
+	if [ ${inst} -lt $# ]; then
+		(( inst = $inst + 1 ))
+		eval "linkmode=\$${inst}"
+	else
+		linkmode=0
+	fi
+
+	if [ "$linkmode" = "0" ]; then
+		do_cmd "dladm set-linkprop -p linkmode=ud ${device}"
+	fi
+}
+
+verbose=0
+while getopts v c
+do
+	case $c in
+	v)	verbose=1;;
+	\?)	echo "$USAGE" 1>&2
+		exit 2;;
+	esac
+done
+
+enable_rc=
+if [ -f ${DRVCONF} ]; then
+	enable_rc=`egrep "^[ 	]*enable_rc[ 	]*=" ${DRVCONF} | sed -e "s/[ 	]*//g" -e "s/enable_rc=//" -e "s/;$//" 2>/dev/null`
+fi
+
+#
+# Loop through all ibd devices based on the old model (i.e., one ibd instance
+# per partition; consequently device names have non zero pkey)
+# and create data links with the same names as in the old model under the
+# new model.
+#
+ls -l /dev/ibd* 2> /dev/null \
+    | while read x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 device_path
+do
+	split_path_components
+
+	if [ "$node_name" != "ibport" -o "$service" != "ipib" \
+	    -o "$pkey" = "0x0" -o "$pkey" = "0x" ]; then
+		continue
+	fi
+
+	# verify that the hca path exists
+	cd $hca_path 2> /dev/null
+	if [ $? -ne 0 ]; then
+		continue
+	fi
+
+	fn=`echo ibport@${port},0,ipib:ibp*[0-9]`
+	if [ -c "$fn" ]; then
+		IFS=":"
+		set -- $fn
+		IFS="${ORIGIFS}"
+
+		do_cmd "dladm delete-phys $partition_name" 2>/dev/null
+		if [ $? -ne 0 ]; then
+			do_cmd "ibd_delete_link $partition_name"
+		fi
+		do_cmd "dladm create-part -f -l $2 -P $pkey $partition_name"
+
+		if [ "$enable_rc" != "" ]; then
+			process_rc_mode $partition_name
+		fi
+	fi
+done 
+
+exit 0
--- a/usr/src/cmd/rcm_daemon/Makefile.com	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/rcm_daemon/Makefile.com	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 include ../../Makefile.cmd
@@ -50,6 +49,7 @@
 	$(COMMON)/network_rcm.c \
 	$(COMMON)/vlan_rcm.c \
 	$(COMMON)/vnic_rcm.c \
+	$(COMMON)/ibpart_rcm.c \
 	$(COMMON)/aggr_rcm.c \
 	$(COMMON)/ip_rcm.c \
 	$(COMMON)/cluster_rcm.c \
@@ -74,6 +74,7 @@
 	network_rcm.o \
 	vlan_rcm.o \
 	vnic_rcm.o \
+	ibpart_rcm.o \
 	aggr_rcm.o \
 	ip_rcm.o \
 	cluster_rcm.o \
@@ -94,6 +95,7 @@
 	SUNW_network_rcm.so \
 	SUNW_vlan_rcm.so \
 	SUNW_vnic_rcm.so \
+	SUNW_ibpart_rcm.so \
 	SUNW_aggr_rcm.so \
 	SUNW_ip_rcm.so \
 	SUNW_cluster_rcm.so \
@@ -128,6 +130,7 @@
 SUNW_network_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
 SUNW_vlan_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
 SUNW_vnic_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
+SUNW_ibpart_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
 SUNW_aggr_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
 SUNW_ip_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -linetutil -ldladm -lipmp -lipadm
 SUNW_ip_anon_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -linetutil
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/rcm_daemon/common/ibpart_rcm.c	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,1368 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This RCM module adds support to the RCM framework for IBPART links
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <synch.h>
+#include <assert.h>
+#include <strings.h>
+#include "rcm_module.h"
+#include <libintl.h>
+#include <libdllink.h>
+#include <libdlib.h>
+#include <libdlpi.h>
+
+/*
+ * Definitions
+ */
+#ifndef lint
+#define	_(x)	gettext(x)
+#else
+#define	_(x)	x
+#endif
+
+/* Some generic well-knowns and defaults used in this module */
+#define	RCM_LINK_PREFIX		"SUNW_datalink"	/* RCM datalink name prefix */
+#define	RCM_LINK_RESOURCE_MAX	(13 + LINKID_STR_WIDTH)
+
+/* IBPART link flags */
+typedef enum {
+	IBPART_OFFLINED			= 0x1,
+	IBPART_CONSUMER_OFFLINED	= 0x2,
+	IBPART_STALE			= 0x4
+} ibpart_flag_t;
+
+/* link representation */
+typedef struct dl_ibpart {
+	struct dl_ibpart	*dlib_next;	/* next IBPART on this link */
+	struct dl_ibpart	*dlib_prev;	/* prev IBPART on this link */
+	datalink_id_t	dlib_ibpart_id;
+	ibpart_flag_t	dlib_flags;		/* IBPART link flags */
+} dl_ibpart_t;
+
+/* IBPART Cache state flags */
+typedef enum {
+	CACHE_NODE_STALE	= 0x1,		/* stale cached data */
+	CACHE_NODE_NEW		= 0x2,		/* new cached nodes */
+	CACHE_NODE_OFFLINED	= 0x4		/* nodes offlined */
+} cache_node_state_t;
+
+/* Network Cache lookup options */
+#define	CACHE_NO_REFRESH	0x1		/* cache refresh not needed */
+#define	CACHE_REFRESH		0x2		/* refresh cache */
+
+/* Cache element */
+typedef struct link_cache {
+	struct link_cache	*pc_next;	/* next cached resource */
+	struct link_cache	*pc_prev;	/* prev cached resource */
+	char			*pc_resource;	/* resource name */
+	datalink_id_t		pc_linkid;	/* linkid */
+	dl_ibpart_t		*pc_ibpart;	/* IBPART list on this link */
+	cache_node_state_t	pc_state;	/* cache state flags */
+} link_cache_t;
+
+/*
+ * Global cache for network IBPARTs
+ */
+static link_cache_t	cache_head;
+static link_cache_t	cache_tail;
+static mutex_t		cache_lock;
+static int		events_registered = 0;
+
+static dladm_handle_t	dld_handle = NULL;
+
+/*
+ * RCM module interface prototypes
+ */
+static int		ibpart_register(rcm_handle_t *);
+static int		ibpart_unregister(rcm_handle_t *);
+static int		ibpart_get_info(rcm_handle_t *, char *, id_t, uint_t,
+			    char **, char **, nvlist_t *, rcm_info_t **);
+static int		ibpart_suspend(rcm_handle_t *, char *, id_t,
+			    timespec_t *, uint_t, char **, rcm_info_t **);
+static int		ibpart_resume(rcm_handle_t *, char *, id_t, uint_t,
+			    char **, rcm_info_t **);
+static int		ibpart_offline(rcm_handle_t *, char *, id_t, uint_t,
+			    char **, rcm_info_t **);
+static int		ibpart_undo_offline(rcm_handle_t *, char *, id_t,
+			    uint_t, char **, rcm_info_t **);
+static int		ibpart_remove(rcm_handle_t *, char *, id_t, uint_t,
+			    char **, rcm_info_t **);
+static int		ibpart_notify_event(rcm_handle_t *, char *, id_t,
+			    uint_t, char **, nvlist_t *, rcm_info_t **);
+static int		ibpart_configure(rcm_handle_t *, datalink_id_t);
+
+/* Module private routines */
+static void 		cache_free();
+static int 		cache_update(rcm_handle_t *);
+static void 		cache_remove(link_cache_t *);
+static void 		node_free(link_cache_t *);
+static void 		cache_insert(link_cache_t *);
+static link_cache_t	*cache_lookup(rcm_handle_t *, char *, char);
+static int		ibpart_consumer_offline(rcm_handle_t *, link_cache_t *,
+			    char **, uint_t, rcm_info_t **);
+static void		ibpart_consumer_online(rcm_handle_t *, link_cache_t *,
+			    char **, uint_t, rcm_info_t **);
+static int		ibpart_offline_ibpart(link_cache_t *, uint32_t,
+			    cache_node_state_t);
+static void		ibpart_online_ibpart(link_cache_t *);
+static char 		*ibpart_usage(link_cache_t *);
+static void 		ibpart_log_err(datalink_id_t, char **, char *);
+static int		ibpart_consumer_notify(rcm_handle_t *, datalink_id_t,
+			    char **, uint_t, rcm_info_t **);
+
+/* Module-Private data */
+static struct rcm_mod_ops ibpart_ops =
+{
+	RCM_MOD_OPS_VERSION,
+	ibpart_register,
+	ibpart_unregister,
+	ibpart_get_info,
+	ibpart_suspend,
+	ibpart_resume,
+	ibpart_offline,
+	ibpart_undo_offline,
+	ibpart_remove,
+	NULL,
+	NULL,
+	ibpart_notify_event
+};
+
+/*
+ * rcm_mod_init() - Update registrations, and return the ops structure.
+ */
+struct rcm_mod_ops *
+rcm_mod_init(void)
+{
+	char errmsg[DLADM_STRSIZE];
+	dladm_status_t status;
+
+	rcm_log_message(RCM_TRACE1, "IBPART: mod_init\n");
+
+	cache_head.pc_next = &cache_tail;
+	cache_head.pc_prev = NULL;
+	cache_tail.pc_prev = &cache_head;
+	cache_tail.pc_next = NULL;
+	(void) mutex_init(&cache_lock, 0, NULL);
+
+	if ((status = dladm_open(&dld_handle)) != DLADM_STATUS_OK) {
+		rcm_log_message(RCM_WARNING,
+		    "IBPART: mod_init failed: cannot open datalink "
+		    "handle: %s\n", dladm_status2str(status, errmsg));
+		return (NULL);
+	}
+
+	/* Return the ops vectors */
+	return (&ibpart_ops);
+}
+
+/*
+ * rcm_mod_info() - Return a string describing this module.
+ */
+const char *
+rcm_mod_info(void)
+{
+	rcm_log_message(RCM_TRACE1, "IBPART: mod_info\n");
+
+	return ("IBPART module");
+}
+
+/*
+ * rcm_mod_fini() - Destroy the network IBPART cache.
+ */
+int
+rcm_mod_fini(void)
+{
+	rcm_log_message(RCM_TRACE1, "IBPART: mod_fini\n");
+
+	/*
+	 * Note that ibpart_unregister() does not seem to be called anywhere,
+	 * therefore we free the cache nodes here. In theory we should call
+	 * rcm_register_interest() for each node before we free it, the
+	 * framework does not provide the rcm_handle to allow us to do so.
+	 */
+	cache_free();
+	(void) mutex_destroy(&cache_lock);
+
+	dladm_close(dld_handle);
+	return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_register() - Make sure the cache is properly sync'ed, and its
+ *		 registrations are in order.
+ */
+static int
+ibpart_register(rcm_handle_t *hd)
+{
+	rcm_log_message(RCM_TRACE1, "IBPART: register\n");
+
+	if (cache_update(hd) < 0)
+		return (RCM_FAILURE);
+
+	/*
+	 * Need to register interest in all new resources
+	 * getting attached, so we get attach event notifications
+	 */
+	if (!events_registered) {
+		if (rcm_register_event(hd, RCM_RESOURCE_LINK_NEW, 0, NULL)
+		    != RCM_SUCCESS) {
+			rcm_log_message(RCM_ERROR,
+			    _("IBPART: failed to register %s\n"),
+			    RCM_RESOURCE_LINK_NEW);
+			return (RCM_FAILURE);
+		} else {
+			rcm_log_message(RCM_DEBUG, "IBPART: registered %s\n",
+			    RCM_RESOURCE_LINK_NEW);
+			events_registered++;
+		}
+	}
+
+	return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_unregister() - Walk the cache, unregistering all the networks.
+ */
+static int
+ibpart_unregister(rcm_handle_t *hd)
+{
+	link_cache_t *node;
+
+	rcm_log_message(RCM_TRACE1, "IBPART: unregister\n");
+
+	/* Walk the cache, unregistering everything */
+	(void) mutex_lock(&cache_lock);
+	node = cache_head.pc_next;
+	while (node != &cache_tail) {
+		if (rcm_unregister_interest(hd, node->pc_resource, 0)
+		    != RCM_SUCCESS) {
+			rcm_log_message(RCM_ERROR,
+			    _("IBPART: failed to unregister %s\n"),
+			    node->pc_resource);
+			(void) mutex_unlock(&cache_lock);
+			return (RCM_FAILURE);
+		}
+		cache_remove(node);
+		node_free(node);
+		node = cache_head.pc_next;
+	}
+	(void) mutex_unlock(&cache_lock);
+
+	/*
+	 * Unregister interest in all new resources
+	 */
+	if (events_registered) {
+		if (rcm_unregister_event(hd, RCM_RESOURCE_LINK_NEW, 0)
+		    != RCM_SUCCESS) {
+			rcm_log_message(RCM_ERROR,
+			    _("IBPART: failed to unregister %s\n"),
+			    RCM_RESOURCE_LINK_NEW);
+			return (RCM_FAILURE);
+		} else {
+			rcm_log_message(RCM_DEBUG, "IBPART: unregistered %s\n",
+			    RCM_RESOURCE_LINK_NEW);
+			events_registered--;
+		}
+	}
+
+	return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_offline() - Offline IBPARTs on a specific node.
+ */
+static int
+ibpart_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+    char **errorp, rcm_info_t **info)
+{
+	link_cache_t *node;
+
+	rcm_log_message(RCM_TRACE1, "IBPART: offline(%s)\n", rsrc);
+
+	/* Lock the cache and lookup the resource */
+	(void) mutex_lock(&cache_lock);
+	node = cache_lookup(hd, rsrc, CACHE_REFRESH);
+	if (node == NULL) {
+		/* should not happen because the resource is registered. */
+		ibpart_log_err(node->pc_linkid, errorp,
+		    "unrecognized resource");
+		(void) mutex_unlock(&cache_lock);
+		return (RCM_SUCCESS);
+	}
+
+	/*
+	 * Inform consumers (IP interfaces) of associated IBPARTs to be offlined
+	 */
+	if (ibpart_consumer_offline(hd, node, errorp, flags, info) ==
+	    RCM_SUCCESS) {
+		rcm_log_message(RCM_DEBUG,
+		    "IBPART: consumers agreed on offline\n");
+	} else {
+		ibpart_log_err(node->pc_linkid, errorp,
+		    "consumers failed to offline");
+		(void) mutex_unlock(&cache_lock);
+		return (RCM_FAILURE);
+	}
+
+	/* Check if it's a query */
+	if (flags & RCM_QUERY) {
+		rcm_log_message(RCM_TRACE1,
+		    "IBPART: offline query succeeded(%s)\n", rsrc);
+		(void) mutex_unlock(&cache_lock);
+		return (RCM_SUCCESS);
+	}
+
+	if (ibpart_offline_ibpart(node, IBPART_OFFLINED, CACHE_NODE_OFFLINED) !=
+	    RCM_SUCCESS) {
+		ibpart_online_ibpart(node);
+		ibpart_log_err(node->pc_linkid, errorp, "offline failed");
+		(void) mutex_unlock(&cache_lock);
+		return (RCM_FAILURE);
+	}
+
+	rcm_log_message(RCM_TRACE1, "IBPART: Offline succeeded(%s)\n", rsrc);
+	(void) mutex_unlock(&cache_lock);
+	return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_undo_offline() - Undo offline of a previously offlined node.
+ */
+/*ARGSUSED*/
+static int
+ibpart_undo_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+    char **errorp, rcm_info_t **info)
+{
+	link_cache_t *node;
+
+	rcm_log_message(RCM_TRACE1, "IBPART: online(%s)\n", rsrc);
+
+	(void) mutex_lock(&cache_lock);
+	node = cache_lookup(hd, rsrc, CACHE_NO_REFRESH);
+	if (node == NULL) {
+		ibpart_log_err(DATALINK_INVALID_LINKID, errorp, "no such link");
+		(void) mutex_unlock(&cache_lock);
+		errno = ENOENT;
+		return (RCM_FAILURE);
+	}
+
+	/* Check if no attempt should be made to online the link here */
+	if (!(node->pc_state & CACHE_NODE_OFFLINED)) {
+		ibpart_log_err(node->pc_linkid, errorp, "link not offlined");
+		(void) mutex_unlock(&cache_lock);
+		errno = ENOTSUP;
+		return (RCM_SUCCESS);
+	}
+
+	ibpart_online_ibpart(node);
+
+	/*
+	 * Inform IP interfaces on associated IBPARTs to be onlined
+	 */
+	ibpart_consumer_online(hd, node, errorp, flags, info);
+
+	node->pc_state &= ~CACHE_NODE_OFFLINED;
+	rcm_log_message(RCM_TRACE1, "IBPART: online succeeded(%s)\n", rsrc);
+	(void) mutex_unlock(&cache_lock);
+	return (RCM_SUCCESS);
+}
+
+static void
+ibpart_online_ibpart(link_cache_t *node)
+{
+	dl_ibpart_t *ibpart;
+	dladm_status_t status;
+	char errmsg[DLADM_STRSIZE];
+
+	/*
+	 * Try to bring on all offlined IBPARTs
+	 */
+	for (ibpart = node->pc_ibpart; ibpart != NULL;
+	    ibpart = ibpart->dlib_next) {
+		if (!(ibpart->dlib_flags & IBPART_OFFLINED))
+			continue;
+
+		rcm_log_message(RCM_TRACE1, "IBPART: online DLID %d\n",
+		    ibpart->dlib_ibpart_id);
+		if ((status = dladm_part_up(dld_handle,
+		    ibpart->dlib_ibpart_id, 0)) != DLADM_STATUS_OK) {
+			/*
+			 * Print a warning message and continue to online
+			 * other IBPARTs.
+			 */
+			rcm_log_message(RCM_WARNING,
+			    _("IBPART: IBPART online failed (%u): %s\n"),
+			    ibpart->dlib_ibpart_id,
+			    dladm_status2str(status, errmsg));
+		} else {
+			ibpart->dlib_flags &= ~IBPART_OFFLINED;
+		}
+	}
+}
+
+static int
+ibpart_offline_ibpart(link_cache_t *node, uint32_t flags,
+    cache_node_state_t state)
+{
+	dl_ibpart_t *ibpart;
+	dladm_status_t status;
+	char errmsg[DLADM_STRSIZE];
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_offline_ibpart "
+	    "(%s %u %u)\n", node->pc_resource, flags, state);
+
+	/*
+	 * Try to delete all explicit created IBPART
+	 */
+	for (ibpart = node->pc_ibpart; ibpart != NULL;
+	    ibpart = ibpart->dlib_next) {
+		rcm_log_message(RCM_TRACE1, "IBPART: offline DLID %d\n",
+		    ibpart->dlib_ibpart_id);
+		if ((status = dladm_part_delete(dld_handle,
+		    ibpart->dlib_ibpart_id, DLADM_OPT_ACTIVE)) !=
+		    DLADM_STATUS_OK) {
+			rcm_log_message(RCM_WARNING,
+			    _("IBPART: IBPART offline failed (%u): %s\n"),
+			    ibpart->dlib_ibpart_id,
+			    dladm_status2str(status, errmsg));
+			return (RCM_FAILURE);
+		} else {
+			rcm_log_message(RCM_TRACE1,
+			    "IBPART: IBPART offline succeeded(%u)\n",
+			    ibpart->dlib_ibpart_id);
+			ibpart->dlib_flags |= flags;
+		}
+	}
+
+	node->pc_state |= state;
+	return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_get_info() - Gather usage information for this resource.
+ */
+/*ARGSUSED*/
+int
+ibpart_get_info(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+    char **usagep, char **errorp, nvlist_t *props, rcm_info_t **info)
+{
+	link_cache_t *node;
+
+	rcm_log_message(RCM_TRACE1, "IBPART: get_info(%s)\n", rsrc);
+
+	(void) mutex_lock(&cache_lock);
+	node = cache_lookup(hd, rsrc, CACHE_REFRESH);
+	if (node == NULL) {
+		rcm_log_message(RCM_INFO,
+		    _("IBPART: get_info(%s) unrecognized resource\n"), rsrc);
+		(void) mutex_unlock(&cache_lock);
+		errno = ENOENT;
+		return (RCM_FAILURE);
+	}
+
+	*usagep = ibpart_usage(node);
+	(void) mutex_unlock(&cache_lock);
+	if (*usagep == NULL) {
+		/* most likely malloc failure */
+		rcm_log_message(RCM_ERROR,
+		    _("IBPART: get_info(%s) malloc failure\n"), rsrc);
+		(void) mutex_unlock(&cache_lock);
+		errno = ENOMEM;
+		return (RCM_FAILURE);
+	}
+
+	/* Set client/role properties */
+	(void) nvlist_add_string(props, RCM_CLIENT_NAME, "IBPART");
+
+	rcm_log_message(RCM_TRACE1, "IBPART: get_info(%s) info = %s\n",
+	    rsrc, *usagep);
+	return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_suspend() - Nothing to do, always okay
+ */
+/*ARGSUSED*/
+static int
+ibpart_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval,
+    uint_t flags, char **errorp, rcm_info_t **info)
+{
+	rcm_log_message(RCM_TRACE1, "IBPART: suspend(%s)\n", rsrc);
+	return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_resume() - Nothing to do, always okay
+ */
+/*ARGSUSED*/
+static int
+ibpart_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+    char **errorp, rcm_info_t **info)
+{
+	rcm_log_message(RCM_TRACE1, "IBPART: resume(%s)\n", rsrc);
+	return (RCM_SUCCESS);
+}
+
+/*
+ * ibpart_consumer_remove()
+ *
+ *	Notify IBPART consumers to remove cache.
+ */
+static int
+ibpart_consumer_remove(rcm_handle_t *hd, link_cache_t *node, uint_t flags,
+    rcm_info_t **info)
+{
+	dl_ibpart_t *ibpart = NULL;
+	char rsrc[RCM_LINK_RESOURCE_MAX];
+	int ret = RCM_SUCCESS;
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_remove (%s)\n",
+	    node->pc_resource);
+
+	for (ibpart = node->pc_ibpart; ibpart != NULL;
+	    ibpart = ibpart->dlib_next) {
+
+		/*
+		 * This will only be called when the offline operation
+		 * succeeds, so the IBPART consumers must have been offlined
+		 * at this point.
+		 */
+		assert(ibpart->dlib_flags & IBPART_CONSUMER_OFFLINED);
+
+		(void) snprintf(rsrc, RCM_LINK_RESOURCE_MAX, "%s/%u",
+		    RCM_LINK_PREFIX, ibpart->dlib_ibpart_id);
+
+		ret = rcm_notify_remove(hd, rsrc, flags, info);
+		if (ret != RCM_SUCCESS) {
+			rcm_log_message(RCM_WARNING,
+			    _("IBPART: notify remove failed (%s)\n"), rsrc);
+			break;
+		}
+	}
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_remove done\n");
+	return (ret);
+}
+
+/*
+ * ibpart_remove() - remove a resource from cache
+ */
+/*ARGSUSED*/
+static int
+ibpart_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+    char **errorp, rcm_info_t **info)
+{
+	link_cache_t *node;
+	int rv;
+
+	rcm_log_message(RCM_TRACE1, "IBPART: remove(%s)\n", rsrc);
+
+	(void) mutex_lock(&cache_lock);
+	node = cache_lookup(hd, rsrc, CACHE_NO_REFRESH);
+	if (node == NULL) {
+		rcm_log_message(RCM_INFO,
+		    _("IBPART: remove(%s) unrecognized resource\n"), rsrc);
+		(void) mutex_unlock(&cache_lock);
+		errno = ENOENT;
+		return (RCM_FAILURE);
+	}
+
+	/* remove the cached entry for the resource */
+	cache_remove(node);
+	(void) mutex_unlock(&cache_lock);
+
+	rv = ibpart_consumer_remove(hd, node, flags, info);
+	node_free(node);
+	return (rv);
+}
+
+/*
+ * ibpart_notify_event - Project private implementation to receive new resource
+ *		   events. It intercepts all new resource events. If the
+ *		   new resource is a network resource, pass up a notify
+ *		   for it too. The new resource need not be cached, since
+ *		   it is done at register again.
+ */
+/*ARGSUSED*/
+static int
+ibpart_notify_event(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
+    char **errorp, nvlist_t *nvl, rcm_info_t **info)
+{
+	nvpair_t	*nvp = NULL;
+	datalink_id_t	linkid;
+	uint64_t	id64;
+	int		rv = RCM_SUCCESS;
+
+	rcm_log_message(RCM_TRACE1, "IBPART: notify_event(%s)\n", rsrc);
+
+	if (strcmp(rsrc, RCM_RESOURCE_LINK_NEW) != 0) {
+		ibpart_log_err(DATALINK_INVALID_LINKID, errorp,
+		    "unrecognized event");
+		errno = EINVAL;
+		return (RCM_FAILURE);
+	}
+
+	/* Update cache to reflect latest IBPARTs */
+	if (cache_update(hd) < 0) {
+		ibpart_log_err(DATALINK_INVALID_LINKID, errorp,
+		    "private Cache update failed");
+		return (RCM_FAILURE);
+	}
+
+	/*
+	 * Try best to recover all configuration.
+	 */
+	rcm_log_message(RCM_DEBUG, "IBPART: process_nvlist\n");
+	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+		if (strcmp(nvpair_name(nvp), RCM_NV_LINKID) != 0)
+			continue;
+
+		if (nvpair_value_uint64(nvp, &id64) != 0) {
+			ibpart_log_err(DATALINK_INVALID_LINKID, errorp,
+			    "cannot get linkid");
+			rv = RCM_FAILURE;
+			continue;
+		}
+
+		linkid = (datalink_id_t)id64;
+		if (ibpart_configure(hd, linkid) != 0) {
+			ibpart_log_err(linkid, errorp, "configuring failed");
+			rv = RCM_FAILURE;
+			continue;
+		}
+
+		/* Notify all IBPART consumers */
+		if (ibpart_consumer_notify(hd, linkid, errorp, flags,
+		    info) != 0) {
+			ibpart_log_err(linkid, errorp,
+			    "consumer notify failed");
+			rv = RCM_FAILURE;
+		}
+	}
+
+	rcm_log_message(RCM_TRACE1,
+	    "IBPART: notify_event: link configuration complete\n");
+	return (rv);
+}
+
+/*
+ * ibpart_usage - Determine the usage of a link.
+ *	    The returned buffer is owned by caller, and the caller
+ *	    must free it up when done.
+ */
+static char *
+ibpart_usage(link_cache_t *node)
+{
+	dl_ibpart_t *ibpart;
+	int nibpart;
+	char *buf;
+	const char *fmt;
+	char *sep;
+	char errmsg[DLADM_STRSIZE];
+	char name[MAXLINKNAMELEN];
+	dladm_status_t status;
+	size_t bufsz;
+
+	rcm_log_message(RCM_TRACE2, "IBPART: usage(%s)\n", node->pc_resource);
+
+	assert(MUTEX_HELD(&cache_lock));
+	if ((status = dladm_datalink_id2info(dld_handle, node->pc_linkid, NULL,
+	    NULL, NULL, name, sizeof (name))) != DLADM_STATUS_OK) {
+		rcm_log_message(RCM_ERROR,
+		    _("IBPART: usage(%s) get link name failure(%s)\n"),
+		    node->pc_resource, dladm_status2str(status, errmsg));
+		return (NULL);
+	}
+
+	if (node->pc_state & CACHE_NODE_OFFLINED)
+		fmt = _("%1$s offlined");
+	else
+		fmt = _("%1$s IBPART: ");
+
+	/* TRANSLATION_NOTE: separator used between IBPART linkids */
+	sep = _(", ");
+
+	nibpart = 0;
+	for (ibpart = node->pc_ibpart; ibpart != NULL;
+	    ibpart = ibpart->dlib_next)
+		nibpart++;
+
+	/* space for IBPARTs and separators, plus message */
+	bufsz = nibpart * (MAXLINKNAMELEN + strlen(sep)) +
+	    strlen(fmt) + MAXLINKNAMELEN + 1;
+	if ((buf = malloc(bufsz)) == NULL) {
+		rcm_log_message(RCM_ERROR,
+		    _("IBPART: usage(%s) malloc failure(%s)\n"),
+		    node->pc_resource, strerror(errno));
+		return (NULL);
+	}
+	(void) snprintf(buf, bufsz, fmt, name);
+
+	if (node->pc_state & CACHE_NODE_OFFLINED) {
+		/* Nothing else to do */
+		rcm_log_message(RCM_TRACE2, "IBPART: usage (%s) info = %s\n",
+		    node->pc_resource, buf);
+		return (buf);
+	}
+
+	for (ibpart = node->pc_ibpart; ibpart != NULL;
+	    ibpart = ibpart->dlib_next) {
+		rcm_log_message(RCM_DEBUG, "IBPART:= %u\n",
+		    ibpart->dlib_ibpart_id);
+
+		if ((status = dladm_datalink_id2info(dld_handle,
+		    ibpart->dlib_ibpart_id, NULL, NULL, NULL, name,
+		    sizeof (name))) != DLADM_STATUS_OK) {
+			rcm_log_message(RCM_ERROR,
+			    _("IBPART: usage(%s) get ibpart %u name "
+			    "failure(%s)\n"), node->pc_resource,
+			    ibpart->dlib_ibpart_id,
+			    dladm_status2str(status, errmsg));
+			free(buf);
+			return (NULL);
+		}
+
+		(void) strlcat(buf, name, bufsz);
+		if (ibpart->dlib_next != NULL)
+			(void) strlcat(buf, sep, bufsz);
+	}
+
+	rcm_log_message(RCM_TRACE2, "IBPART: usage (%s) info = %s\n",
+	    node->pc_resource, buf);
+
+	return (buf);
+}
+
+/*
+ * Cache management routines, all cache management functions should be
+ * be called with cache_lock held.
+ */
+
+/*
+ * cache_lookup() - Get a cache node for a resource.
+ *		  Call with cache lock held.
+ *
+ * This ensures that the cache is consistent with the system state and
+ * returns a pointer to the cache element corresponding to the resource.
+ */
+static link_cache_t *
+cache_lookup(rcm_handle_t *hd, char *rsrc, char options)
+{
+	link_cache_t *node;
+
+	rcm_log_message(RCM_TRACE2, "IBPART: cache lookup(%s)\n", rsrc);
+
+	assert(MUTEX_HELD(&cache_lock));
+	if (options & CACHE_REFRESH) {
+		/* drop lock since update locks cache again */
+		(void) mutex_unlock(&cache_lock);
+		(void) cache_update(hd);
+		(void) mutex_lock(&cache_lock);
+	}
+
+	node = cache_head.pc_next;
+	for (; node != &cache_tail; node = node->pc_next) {
+		if (strcmp(rsrc, node->pc_resource) == 0) {
+			rcm_log_message(RCM_TRACE2,
+			    "IBPART: cache lookup succeeded(%s)\n", rsrc);
+			return (node);
+		}
+	}
+	return (NULL);
+}
+
+/*
+ * node_free - Free a node from the cache
+ */
+static void
+node_free(link_cache_t *node)
+{
+	dl_ibpart_t *ibpart, *next;
+
+	if (node != NULL) {
+		free(node->pc_resource);
+
+		/* free the IBPART list */
+		for (ibpart = node->pc_ibpart; ibpart != NULL; ibpart = next) {
+			next = ibpart->dlib_next;
+			free(ibpart);
+		}
+		free(node);
+	}
+}
+
+/*
+ * cache_insert - Insert a resource node in cache
+ */
+static void
+cache_insert(link_cache_t *node)
+{
+	assert(MUTEX_HELD(&cache_lock));
+
+	/* insert at the head for best performance */
+	node->pc_next = cache_head.pc_next;
+	node->pc_prev = &cache_head;
+
+	node->pc_next->pc_prev = node;
+	node->pc_prev->pc_next = node;
+}
+
+/*
+ * cache_remove() - Remove a resource node from cache.
+ */
+static void
+cache_remove(link_cache_t *node)
+{
+	assert(MUTEX_HELD(&cache_lock));
+	node->pc_next->pc_prev = node->pc_prev;
+	node->pc_prev->pc_next = node->pc_next;
+	node->pc_next = NULL;
+	node->pc_prev = NULL;
+}
+
+typedef struct ibpart_update_arg_s {
+	rcm_handle_t	*hd;
+	int		retval;
+} ibpart_update_arg_t;
+
+/*
+ * ibpart_update() - Update physical interface properties
+ */
+static int
+ibpart_update(dladm_handle_t handle, datalink_id_t ibpartid, void *arg)
+{
+	ibpart_update_arg_t *ibpart_update_argp = arg;
+	rcm_handle_t *hd = ibpart_update_argp->hd;
+	link_cache_t *node;
+	dl_ibpart_t *ibpart;
+	char *rsrc;
+	dladm_ib_attr_t ibpart_attr;
+	dladm_status_t status;
+	char errmsg[DLADM_STRSIZE];
+	boolean_t newnode = B_FALSE;
+	int ret = -1;
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_update(%u)\n", ibpartid);
+
+	assert(MUTEX_HELD(&cache_lock));
+	status = dladm_part_info(handle, ibpartid, &ibpart_attr,
+	    DLADM_OPT_ACTIVE);
+	if (status != DLADM_STATUS_OK) {
+		rcm_log_message(RCM_TRACE1,
+		    "IBPART: ibpart_update() cannot get ibpart information for "
+		    "%u(%s)\n", ibpartid, dladm_status2str(status, errmsg));
+		return (DLADM_WALK_CONTINUE);
+	}
+
+	if (ibpart_attr.dia_physlinkid == DATALINK_INVALID_LINKID) {
+		/*
+		 * Skip the IB port nodes.
+		 */
+		rcm_log_message(RCM_TRACE1,
+		    "IBPART: ibpart_update(): skip the PORT nodes %u\n",
+		    ibpartid);
+		return (DLADM_WALK_CONTINUE);
+	}
+
+	rsrc = malloc(RCM_LINK_RESOURCE_MAX);
+	if (rsrc == NULL) {
+		rcm_log_message(RCM_ERROR, _("IBPART: malloc error(%s): %u\n"),
+		    strerror(errno), ibpartid);
+		goto done;
+	}
+
+	(void) snprintf(rsrc, RCM_LINK_RESOURCE_MAX, "%s/%u",
+	    RCM_LINK_PREFIX, ibpart_attr.dia_physlinkid);
+
+	node = cache_lookup(hd, rsrc, CACHE_NO_REFRESH);
+	if (node != NULL) {
+		rcm_log_message(RCM_DEBUG,
+		    "IBPART: %s already registered (ibpartid:%d)\n",
+		    rsrc, ibpart_attr.dia_partlinkid);
+		free(rsrc);
+	} else {
+		rcm_log_message(RCM_DEBUG,
+		    "IBPART: %s is a new resource (ibpartid:%d)\n",
+		    rsrc, ibpart_attr.dia_partlinkid);
+		if ((node = calloc(1, sizeof (link_cache_t))) == NULL) {
+			free(rsrc);
+			rcm_log_message(RCM_ERROR, _("IBPART: calloc: %s\n"),
+			    strerror(errno));
+			goto done;
+		}
+
+		node->pc_resource = rsrc;
+		node->pc_ibpart = NULL;
+		node->pc_linkid = ibpart_attr.dia_physlinkid;
+		node->pc_state |= CACHE_NODE_NEW;
+		newnode = B_TRUE;
+	}
+
+	for (ibpart = node->pc_ibpart; ibpart != NULL;
+	    ibpart = ibpart->dlib_next) {
+		if (ibpart->dlib_ibpart_id == ibpartid) {
+			ibpart->dlib_flags &= ~IBPART_STALE;
+			break;
+		}
+	}
+
+	if (ibpart == NULL) {
+		if ((ibpart = calloc(1, sizeof (dl_ibpart_t))) == NULL) {
+			rcm_log_message(RCM_ERROR, _("IBPART: malloc: %s\n"),
+			    strerror(errno));
+			if (newnode) {
+				free(rsrc);
+				free(node);
+			}
+			goto done;
+		}
+		ibpart->dlib_ibpart_id = ibpartid;
+		ibpart->dlib_next = node->pc_ibpart;
+		ibpart->dlib_prev = NULL;
+		if (node->pc_ibpart != NULL)
+			node->pc_ibpart->dlib_prev = ibpart;
+		node->pc_ibpart = ibpart;
+	}
+
+	node->pc_state &= ~CACHE_NODE_STALE;
+
+	if (newnode)
+		cache_insert(node);
+
+	rcm_log_message(RCM_TRACE3, "IBPART: ibpart_update: succeeded(%u)\n",
+	    ibpartid);
+	ret = 0;
+done:
+	ibpart_update_argp->retval = ret;
+	return (ret == 0 ? DLADM_WALK_CONTINUE : DLADM_WALK_TERMINATE);
+}
+
+/*
+ * ibpart_update_all() - Determine all IBPART links in the system
+ */
+static int
+ibpart_update_all(rcm_handle_t *hd)
+{
+	ibpart_update_arg_t arg = {NULL, 0};
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_update_all\n");
+
+	assert(MUTEX_HELD(&cache_lock));
+	arg.hd = hd;
+	(void) dladm_walk_datalink_id(ibpart_update, dld_handle, &arg,
+	    DATALINK_CLASS_PART, DATALINK_ANY_MEDIATYPE, DLADM_OPT_ACTIVE);
+	return (arg.retval);
+}
+
+/*
+ * cache_update() - Update cache with latest interface info
+ */
+static int
+cache_update(rcm_handle_t *hd)
+{
+	link_cache_t *node, *nnode;
+	dl_ibpart_t *ibpart;
+	int rv;
+
+	rcm_log_message(RCM_TRACE2, "IBPART: cache_update\n");
+
+	(void) mutex_lock(&cache_lock);
+
+	/* first we walk the entire cache, marking each entry stale */
+	node = cache_head.pc_next;
+	for (; node != &cache_tail; node = node->pc_next) {
+		node->pc_state |= CACHE_NODE_STALE;
+		for (ibpart = node->pc_ibpart; ibpart != NULL;
+		    ibpart = ibpart->dlib_next)
+			ibpart->dlib_flags |= IBPART_STALE;
+	}
+
+	rv = ibpart_update_all(hd);
+
+	/*
+	 * Continue to delete all stale nodes from the cache even
+	 * ibpart_update_all() failed. Unregister link that are not offlined
+	 * and still in cache
+	 */
+	for (node = cache_head.pc_next; node != &cache_tail; node = nnode) {
+		dl_ibpart_t *ibpart, *next;
+
+		for (ibpart = node->pc_ibpart; ibpart != NULL; ibpart = next) {
+			next = ibpart->dlib_next;
+
+			/* clear stale IBPARTs */
+			if (ibpart->dlib_flags & IBPART_STALE) {
+				if (ibpart->dlib_prev != NULL)
+					ibpart->dlib_prev->dlib_next = next;
+				else
+					node->pc_ibpart = next;
+
+				if (next != NULL)
+					next->dlib_prev = ibpart->dlib_prev;
+				free(ibpart);
+			}
+		}
+
+		nnode = node->pc_next;
+		if (node->pc_state & CACHE_NODE_STALE) {
+			(void) rcm_unregister_interest(hd, node->pc_resource,
+			    0);
+			rcm_log_message(RCM_DEBUG, "IBPART: unregistered %s\n",
+			    node->pc_resource);
+			assert(node->pc_ibpart == NULL);
+			cache_remove(node);
+			node_free(node);
+			continue;
+		}
+
+		if (!(node->pc_state & CACHE_NODE_NEW))
+			continue;
+
+		if (rcm_register_interest(hd, node->pc_resource, 0, NULL) !=
+		    RCM_SUCCESS) {
+			rcm_log_message(RCM_ERROR,
+			    _("IBPART: failed to register %s\n"),
+			    node->pc_resource);
+			rv = -1;
+		} else {
+			rcm_log_message(RCM_DEBUG, "IBPART: registered %s\n",
+			    node->pc_resource);
+			node->pc_state &= ~CACHE_NODE_NEW;
+		}
+	}
+
+	(void) mutex_unlock(&cache_lock);
+	return (rv);
+}
+
+/*
+ * cache_free() - Empty the cache
+ */
+static void
+cache_free()
+{
+	link_cache_t *node;
+
+	rcm_log_message(RCM_TRACE2, "IBPART: cache_free\n");
+
+	(void) mutex_lock(&cache_lock);
+	node = cache_head.pc_next;
+	while (node != &cache_tail) {
+		cache_remove(node);
+		node_free(node);
+		node = cache_head.pc_next;
+	}
+	(void) mutex_unlock(&cache_lock);
+}
+
+/*
+ * ibpart_log_err() - RCM error log wrapper
+ */
+static void
+ibpart_log_err(datalink_id_t linkid, char **errorp, char *errmsg)
+{
+	char link[MAXLINKNAMELEN];
+	char errstr[DLADM_STRSIZE];
+	dladm_status_t status;
+	int len;
+	const char *errfmt;
+	char *error;
+
+	link[0] = '\0';
+	if (linkid != DATALINK_INVALID_LINKID) {
+		char rsrc[RCM_LINK_RESOURCE_MAX];
+
+		(void) snprintf(rsrc, sizeof (rsrc), "%s/%u",
+		    RCM_LINK_PREFIX, linkid);
+
+		rcm_log_message(RCM_ERROR, _("IBPART: %s(%s)\n"), errmsg, rsrc);
+		if ((status = dladm_datalink_id2info(dld_handle, linkid, NULL,
+		    NULL, NULL, link, sizeof (link))) != DLADM_STATUS_OK) {
+			rcm_log_message(RCM_WARNING,
+			    _("IBPART: cannot get link name for (%s) %s\n"),
+			    rsrc, dladm_status2str(status, errstr));
+		}
+	} else {
+		rcm_log_message(RCM_ERROR, _("IBPART: %s\n"), errmsg);
+	}
+
+	errfmt = strlen(link) > 0 ? _("IBPART: %s(%s)") : _("IBPART: %s");
+	len = strlen(errfmt) + strlen(errmsg) + MAXLINKNAMELEN + 1;
+	if ((error = malloc(len)) != NULL) {
+		if (strlen(link) > 0)
+			(void) snprintf(error, len, errfmt, errmsg, link);
+		else
+			(void) snprintf(error, len, errfmt, errmsg);
+	}
+
+	if (errorp != NULL)
+		*errorp = error;
+}
+
+/*
+ * ibpart_consumer_online()
+ *
+ *	Notify online to IBPART consumers.
+ */
+/* ARGSUSED */
+static void
+ibpart_consumer_online(rcm_handle_t *hd, link_cache_t *node, char **errorp,
+    uint_t flags, rcm_info_t **info)
+{
+	dl_ibpart_t *ibpart;
+	char rsrc[RCM_LINK_RESOURCE_MAX];
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_online (%s)\n",
+	    node->pc_resource);
+
+	for (ibpart = node->pc_ibpart; ibpart != NULL;
+	    ibpart = ibpart->dlib_next) {
+		if (!(ibpart->dlib_flags & IBPART_CONSUMER_OFFLINED))
+			continue;
+
+		(void) snprintf(rsrc, RCM_LINK_RESOURCE_MAX, "%s/%u",
+		    RCM_LINK_PREFIX, ibpart->dlib_ibpart_id);
+
+		if (rcm_notify_online(hd, rsrc, flags, info) == RCM_SUCCESS)
+			ibpart->dlib_flags &= ~IBPART_CONSUMER_OFFLINED;
+	}
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_online done\n");
+}
+
+/*
+ * ibpart_consumer_offline()
+ *
+ *	Offline IBPART consumers.
+ */
+static int
+ibpart_consumer_offline(rcm_handle_t *hd, link_cache_t *node, char **errorp,
+    uint_t flags, rcm_info_t **info)
+{
+	dl_ibpart_t *ibpart;
+	char rsrc[RCM_LINK_RESOURCE_MAX];
+	int ret = RCM_SUCCESS;
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_offline (%s)\n",
+	    node->pc_resource);
+
+	for (ibpart = node->pc_ibpart; ibpart != NULL;
+	    ibpart = ibpart->dlib_next) {
+		(void) snprintf(rsrc, RCM_LINK_RESOURCE_MAX, "%s/%u",
+		    RCM_LINK_PREFIX, ibpart->dlib_ibpart_id);
+
+		ret = rcm_request_offline(hd, rsrc, flags, info);
+		if (ret != RCM_SUCCESS)
+			break;
+
+		ibpart->dlib_flags |= IBPART_CONSUMER_OFFLINED;
+	}
+
+	if (ibpart != NULL)
+		ibpart_consumer_online(hd, node, errorp, flags, info);
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_offline done\n");
+	return (ret);
+}
+
+/*
+ * Send RCM_RESOURCE_LINK_NEW events to other modules about new IBPARTs.
+ * Return 0 on success, -1 on failure.
+ */
+static int
+ibpart_notify_new_ibpart(rcm_handle_t *hd, char *rsrc)
+{
+	link_cache_t *node;
+	dl_ibpart_t *ibpart;
+	nvlist_t *nvl = NULL;
+	uint64_t id;
+	int ret = -1;
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_notify_new_ibpart (%s)\n",
+	    rsrc);
+
+	(void) mutex_lock(&cache_lock);
+	if ((node = cache_lookup(hd, rsrc, CACHE_REFRESH)) == NULL) {
+		(void) mutex_unlock(&cache_lock);
+		return (0);
+	}
+
+	if (nvlist_alloc(&nvl, 0, 0) != 0) {
+		(void) mutex_unlock(&cache_lock);
+		rcm_log_message(RCM_WARNING,
+		    _("IBPART: failed to allocate nvlist\n"));
+		goto done;
+	}
+
+	for (ibpart = node->pc_ibpart; ibpart != NULL;
+	    ibpart = ibpart->dlib_next) {
+		rcm_log_message(RCM_TRACE2, "IBPART: ibpart_notify_new_ibpart "
+		    "add (%u)\n", ibpart->dlib_ibpart_id);
+
+		id = ibpart->dlib_ibpart_id;
+		if (nvlist_add_uint64(nvl, RCM_NV_LINKID, id) != 0) {
+			rcm_log_message(RCM_ERROR,
+			    _("IBPART: failed to construct nvlist\n"));
+			(void) mutex_unlock(&cache_lock);
+			goto done;
+		}
+	}
+	(void) mutex_unlock(&cache_lock);
+
+	if (rcm_notify_event(hd, RCM_RESOURCE_LINK_NEW, 0, nvl, NULL) !=
+	    RCM_SUCCESS) {
+		rcm_log_message(RCM_ERROR,
+		    _("IBPART: failed to notify %s event for %s\n"),
+		    RCM_RESOURCE_LINK_NEW, node->pc_resource);
+		goto done;
+	}
+
+	ret = 0;
+done:
+	if (nvl != NULL)
+		nvlist_free(nvl);
+	return (ret);
+}
+
+/*
+ * ibpart_consumer_notify() - Notify consumers of IBPARTs coming back online.
+ */
+static int
+ibpart_consumer_notify(rcm_handle_t *hd, datalink_id_t linkid, char **errorp,
+    uint_t flags, rcm_info_t **info)
+{
+	char rsrc[RCM_LINK_RESOURCE_MAX];
+	link_cache_t *node;
+
+	/* Check for the interface in the cache */
+	(void) snprintf(rsrc, RCM_LINK_RESOURCE_MAX, "%s/%u", RCM_LINK_PREFIX,
+	    linkid);
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_notify(%s)\n",
+	    rsrc);
+
+	/*
+	 * Inform IP consumers of the new link.
+	 */
+	if (ibpart_notify_new_ibpart(hd, rsrc) != 0) {
+		(void) mutex_lock(&cache_lock);
+		if ((node = cache_lookup(hd, rsrc, CACHE_NO_REFRESH)) != NULL) {
+			(void) ibpart_offline_ibpart(node, IBPART_STALE,
+			    CACHE_NODE_STALE);
+		}
+		(void) mutex_unlock(&cache_lock);
+		rcm_log_message(RCM_TRACE2,
+		    "IBPART: ibpart_notify_new_ibpart failed(%s)\n", rsrc);
+		return (-1);
+	}
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_consumer_notify "
+	    "succeeded\n");
+	return (0);
+}
+
+typedef struct ibpart_up_arg_s {
+	datalink_id_t	linkid;
+	int		retval;
+} ibpart_up_arg_t;
+
+static int
+ibpart_up(dladm_handle_t handle, datalink_id_t ibpartid, void *arg)
+{
+	ibpart_up_arg_t *ibpart_up_argp = arg;
+	dladm_status_t status;
+	dladm_ib_attr_t ibpart_attr;
+	char errmsg[DLADM_STRSIZE];
+
+	status = dladm_part_info(handle, ibpartid, &ibpart_attr,
+	    DLADM_OPT_PERSIST);
+	if (status != DLADM_STATUS_OK) {
+		rcm_log_message(RCM_TRACE1,
+		    "IBPART: ibpart_up(): cannot get information for IBPART %u "
+		    "(%s)\n", ibpartid, dladm_status2str(status, errmsg));
+		return (DLADM_WALK_CONTINUE);
+	}
+
+	if (ibpart_attr.dia_physlinkid != ibpart_up_argp->linkid)
+		return (DLADM_WALK_CONTINUE);
+
+	rcm_log_message(RCM_TRACE3, "IBPART: ibpart_up(%u)\n", ibpartid);
+	if ((status = dladm_part_up(handle, ibpartid, 0)) == DLADM_STATUS_OK)
+		return (DLADM_WALK_CONTINUE);
+
+	/*
+	 * Prompt the warning message and continue to UP other IBPARTs.
+	 */
+	rcm_log_message(RCM_WARNING,
+	    _("IBPART: IBPART up failed (%u): %s\n"),
+	    ibpartid, dladm_status2str(status, errmsg));
+
+	ibpart_up_argp->retval = -1;
+	return (DLADM_WALK_CONTINUE);
+}
+
+/*
+ * ibpart_configure() - Configure IBPARTs over a physical link after it attaches
+ */
+static int
+ibpart_configure(rcm_handle_t *hd, datalink_id_t linkid)
+{
+	char rsrc[RCM_LINK_RESOURCE_MAX];
+	link_cache_t *node;
+	ibpart_up_arg_t arg = {DATALINK_INVALID_LINKID, 0};
+
+	/* Check for the IBPARTs in the cache */
+	(void) snprintf(rsrc, sizeof (rsrc), "%s/%u", RCM_LINK_PREFIX, linkid);
+
+	rcm_log_message(RCM_TRACE2, "IBPART: ibpart_configure(%s)\n", rsrc);
+
+	/* Check if the link is new or was previously offlined */
+	(void) mutex_lock(&cache_lock);
+	if (((node = cache_lookup(hd, rsrc, CACHE_REFRESH)) != NULL) &&
+	    (!(node->pc_state & CACHE_NODE_OFFLINED))) {
+		rcm_log_message(RCM_TRACE2,
+		    "IBPART: Skipping configured interface(%s)\n", rsrc);
+		(void) mutex_unlock(&cache_lock);
+		return (0);
+	}
+	(void) mutex_unlock(&cache_lock);
+
+	arg.linkid = linkid;
+	(void) dladm_walk_datalink_id(ibpart_up, dld_handle, &arg,
+	    DATALINK_CLASS_PART, DATALINK_ANY_MEDIATYPE, DLADM_OPT_PERSIST);
+
+	if (arg.retval == 0) {
+		rcm_log_message(RCM_TRACE2,
+		    "IBPART: ibpart_configure succeeded(%s)\n", rsrc);
+	}
+	return (arg.retval);
+}
--- a/usr/src/cmd/svc/milestone/net-nwam	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/svc/milestone/net-nwam	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 . /lib/svc/share/smf_include.sh
@@ -525,6 +524,19 @@
 			. "${upgrade_script}"
 		fi
 
+		#
+		# Upgrade handling for ibd:
+		# After we are done with the upgrade handling, we can not set the
+		# ibd/ibd_upgraded property to "true" as the file system is
+		# read-only at this point. It will be done later by ibd-post-upgrade
+		# service.
+		#
+		ibd_upgraded=`/bin/svcprop -c -p ibd/ibd_upgraded \
+		    svc:/network/physical:default 2> /dev/null`
+		if [ "$ibd_upgraded" != "true" ]; then
+			/sbin/ibd_upgrade -v
+		fi
+
 		# Bring up simnet instances
 		/sbin/dladm up-simnet
 		# Initialize security objects.
@@ -536,6 +548,7 @@
 		#
 		/sbin/dladm up-vnic
 		/sbin/dladm up-vlan
+		/sbin/dladm up-part
 		/sbin/dladm up-aggr
 		/sbin/flowadm init-flow
 	fi
--- a/usr/src/cmd/svc/milestone/net-physical	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/cmd/svc/milestone/net-physical	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 # Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T.
 # All rights reserved.
@@ -62,6 +61,19 @@
 	fi
 
 	#
+	# Upgrade handling for ibd:
+	# After we are done with the upgrade handling, we can not set the
+	# ibd/ibd_upgraded property to "true" as the file system is
+	# read-only at this point. It will be done later by ibd-post-upgrade
+	# service.
+	#
+	ibd_upgraded=`/bin/svcprop -c -p ibd/ibd_upgraded \
+	    $SMF_FMRI 2> /dev/null`
+	if [ "$ibd_upgraded" != "true" ]; then
+		/sbin/ibd_upgrade -v
+	fi
+
+	#
 	# Bring up simnets, link aggregations and initialize security objects.
 	# Note that link property initialization is deferred until after
 	# IP interfaces are plumbed to ensure that the links will not
@@ -72,6 +84,7 @@
 	/sbin/dladm up-simnet
 	/sbin/dladm up-aggr
 	/sbin/dladm up-vlan
+	/sbin/dladm up-part
 	/sbin/dladm init-secobj
 	#
 	# Bring up VNICs
--- a/usr/src/lib/libdladm/Makefile	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/Makefile	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 #
 
@@ -29,7 +28,8 @@
 HDRS =		libdladm.h libdladm_impl.h libdllink.h libdlaggr.h	\
 		libdlwlan.h libdlwlan_impl.h libdlvnic.h libdlvlan.h	\
 		libdlmgmt.h libdlflow.h libdlflow_impl.h libdlstat.h	\
-		libdlether.h libdlsim.h libdlbridge.h libdliptun.h
+		libdlether.h libdlsim.h libdlbridge.h libdliptun.h	\
+		libdlib.h
 
 HDRDIR =	common
 
@@ -45,7 +45,8 @@
 		common/propfuncs.c common/libdlflow.c	\
 		common/libdlstat.c common/flowattr.c	\
 		common/libdlether.c common/libdlsim.c	\
-		common/libdlbridge.c common/libdliptun.c
+		common/libdlbridge.c common/libdliptun.c\
+		common/libdlib.c
 
 XGETFLAGS =     -a -x libdladm.xcl
 
--- a/usr/src/lib/libdladm/Makefile.com	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/Makefile.com	Wed Apr 14 10:26:18 2010 -0700
@@ -19,14 +19,13 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 LIBRARY = libdladm.a
 VERS    = .1
 OBJECTS = libdladm.o secobj.o linkprop.o libdllink.o libdlaggr.o \
-	libdlwlan.o libdlvnic.o libdlmgmt.o libdlvlan.o \
+	libdlwlan.o libdlvnic.o libdlmgmt.o libdlvlan.o	libdlib.o\
 	flowattr.o flowprop.o propfuncs.o libdlflow.o libdlstat.o \
 	usage.o libdlether.o libdlsim.o libdlbridge.o libdliptun.o
 
--- a/usr/src/lib/libdladm/common/libdladm.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/libdladm.c	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <unistd.h>
@@ -385,6 +384,27 @@
 	case DLADM_STATUS_POOLCPU:
 		s = "pool and cpus property are mutually exclusive";
 		break;
+	case DLADM_STATUS_INVALID_PORT_INSTANCE:
+		s = "invalid IB phys link";
+		break;
+	case DLADM_STATUS_PORT_IS_DOWN:
+		s = "port is down";
+		break;
+	case DLADM_STATUS_PARTITION_EXISTS:
+		s = "partition already exists";
+		break;
+	case DLADM_STATUS_PKEY_NOT_PRESENT:
+		s = "PKEY is not present on the port";
+		break;
+	case DLADM_STATUS_INVALID_PKEY:
+		s = "invalid PKEY";
+		break;
+	case DLADM_STATUS_NO_IB_HW_RESOURCE:
+		s = "IB internal resource not available";
+		break;
+	case DLADM_STATUS_INVALID_PKEY_TBL_SIZE:
+		s = "invalid PKEY table size";
+		break;
 	default:
 		s = "<unknown error>";
 		break;
@@ -618,6 +638,9 @@
 	case DATALINK_CLASS_BRIDGE:
 		s = "bridge";
 		break;
+	case DATALINK_CLASS_PART:
+		s = "part";
+		break;
 	default:
 		s = "unknown";
 		break;
--- a/usr/src/lib/libdladm/common/libdladm.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/libdladm.h	Wed Apr 14 10:26:18 2010 -0700
@@ -161,7 +161,14 @@
 	DLADM_STATUS_NO_HWRINGS,
 	DLADM_STATUS_PERMONLY,
 	DLADM_STATUS_OPTMISSING,
-	DLADM_STATUS_POOLCPU
+	DLADM_STATUS_POOLCPU,
+	DLADM_STATUS_INVALID_PORT_INSTANCE,
+	DLADM_STATUS_PORT_IS_DOWN,
+	DLADM_STATUS_PKEY_NOT_PRESENT,
+	DLADM_STATUS_PARTITION_EXISTS,
+	DLADM_STATUS_INVALID_PKEY,
+	DLADM_STATUS_NO_IB_HW_RESOURCE,
+	DLADM_STATUS_INVALID_PKEY_TBL_SIZE
 } dladm_status_t;
 
 typedef enum {
--- a/usr/src/lib/libdladm/common/libdladm_impl.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/libdladm_impl.h	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _LIBDLADM_IMPL_H
@@ -103,6 +102,11 @@
 #define	FSIMNETPEER	"simnetpeer"	/* uint64_t */
 
 /*
+ * Set for IB partitions only
+ */
+#define	FPORTPKEY	"pkey"		/* uint64_t */
+
+/*
  * Common fields
  */
 #define	FMACADDR	"macaddr"	/* string */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/libdladm/common/libdlib.c	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,732 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <libdevinfo.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <strings.h>
+#include <libintl.h>
+#include <net/if_types.h>
+#include <net/if_dl.h>
+#include <sys/dld.h>
+#include <sys/ib/ib_types.h>
+#include <sys/ibpart.h>
+#include <libdllink.h>
+#include <libdladm.h>
+#include <libdlib.h>
+#include <libdladm_impl.h>
+
+/*
+ * IP over IB administration API; see PSARC/2010/085
+ */
+
+/*
+ * Function prototypes
+ */
+dladm_status_t dladm_part_create(dladm_handle_t, datalink_id_t, ib_pkey_t,
+    uint32_t, char *, datalink_id_t *, dladm_arg_list_t *);
+static int	dladm_ibd_get_instance(char *);
+static dladm_status_t	i_dladm_part_create(dladm_handle_t,
+    dladm_part_attr_t *);
+dladm_status_t	dladm_part_persist_conf(dladm_handle_t, dladm_part_attr_t *);
+static dladm_status_t i_dladm_part_delete(dladm_handle_t, datalink_id_t);
+dladm_status_t	dladm_part_delete(dladm_handle_t, datalink_id_t, int);
+static int	i_dladm_part_up(dladm_handle_t, datalink_id_t, void *);
+dladm_status_t	dladm_part_up(dladm_handle_t, datalink_id_t, uint32_t);
+
+/*
+ * Convert a error status returned by the IP over IB kernel driver to a
+ * valid dladm status.
+ */
+static dladm_status_t
+dladm_ib_ioctl_err2status(int err)
+{
+	switch (err) {
+	case 0:
+		return (DLADM_STATUS_OK);
+	case IBD_INVALID_PORT_INST:
+		return (DLADM_STATUS_INVALID_PORT_INSTANCE);
+	case IBD_PORT_IS_DOWN:
+		return (DLADM_STATUS_PORT_IS_DOWN);
+	case IBD_PKEY_NOT_PRESENT:
+		return (DLADM_STATUS_PKEY_NOT_PRESENT);
+	case IBD_PARTITION_EXISTS:
+		return (DLADM_STATUS_PARTITION_EXISTS);
+	case IBD_INVALID_PKEY:
+		return (DLADM_STATUS_INVALID_PKEY);
+	case IBD_NO_HW_RESOURCE:
+		return (DLADM_STATUS_NO_IB_HW_RESOURCE);
+	case IBD_INVALID_PKEY_TBL_SIZE:
+		return (DLADM_STATUS_INVALID_PKEY_TBL_SIZE);
+	default:
+		return (DLADM_STATUS_FAILED);
+	}
+}
+
+static dladm_status_t
+i_dladm_ib_ioctl(dladm_handle_t handle, int ioccmd, ibd_ioctl_t *iocp)
+{
+	if (ioctl(dladm_dld_fd(handle), ioccmd, iocp) == 0)
+		return (DLADM_STATUS_OK);
+
+	if (iocp->ioc_status == 0)
+		return (dladm_errno2status(errno));
+
+	return (dladm_ib_ioctl_err2status(iocp->ioc_status));
+}
+
+/*
+ * Get the active configuration information for the partition given by
+ * the 'linkid'.
+ */
+static dladm_status_t
+i_dladm_part_info_active(dladm_handle_t handle, datalink_id_t linkid,
+    dladm_part_attr_t *attrp)
+{
+	ibpart_ioctl_t ioc;
+	dladm_status_t status = DLADM_STATUS_OK;
+
+	bzero(&ioc, sizeof (ioc));
+	bzero(attrp, sizeof (*attrp));
+	/*
+	 * The ioc_linkid here will contain the data link id of the IB partition
+	 * object.
+	 */
+	ioc.ibdioc.ioc_linkid = linkid;
+	ioc.ibdioc.ioc_info_cmd = IBD_INFO_CMD_IBPART;
+
+	status = i_dladm_ib_ioctl(handle, IBD_INFO_IBPART, (ibd_ioctl_t *)&ioc);
+	if (status != DLADM_STATUS_OK)
+		goto bail;
+
+	/*
+	 * On return from the ioctl ioc_linkid field contains the IB port's
+	 * linkid.
+	 */
+	attrp->dia_physlinkid = ioc.ibdioc.ioc_linkid;
+	attrp->dia_partlinkid = ioc.ioc_partid;
+	attrp->dia_pkey = ioc.ioc_pkey;
+	attrp->dia_portnum = ioc.ibdioc.ioc_portnum;
+	attrp->dia_hca_guid = ioc.ibdioc.ioc_hcaguid;
+	attrp->dia_port_guid = ioc.ibdioc.ioc_portguid;
+	attrp->dia_instance = ioc.ibdioc.ioc_port_inst;
+
+	/*
+	 * If the IP over IB driver reports that this partition was created
+	 * forcibly, then set the force create flag.
+	 */
+	if (ioc.ioc_force_create)
+		attrp->dia_flags |= DLADM_IBPART_FORCE_CREATE;
+
+bail:
+	return (status);
+}
+
+/*
+ * Get the configuration information about the IB partition 'linkid' from the
+ * persistent configuration.
+ */
+static dladm_status_t
+i_dladm_part_info_persist(dladm_handle_t handle, datalink_id_t linkid,
+    dladm_part_attr_t *attrp)
+{
+	dladm_conf_t conf;
+	dladm_status_t status;
+	char linkover[MAXLINKNAMELEN];
+	datalink_class_t class;
+	boolean_t force = B_FALSE;
+
+	/* Get the IB partition's datalink ID */
+	if ((status = dladm_datalink_id2info(handle, linkid, NULL, &class,
+	    NULL, NULL, 0)) != DLADM_STATUS_OK)
+		goto done;
+
+	bzero(attrp, sizeof (*attrp));
+	attrp->dia_partlinkid = linkid;
+	if ((status = dladm_read_conf(handle, linkid, &conf)) !=
+	    DLADM_STATUS_OK)
+		return (status);
+
+	/*
+	 * Get the name of the IB Phys link over which IB partition was
+	 * created.
+	 */
+	status = dladm_get_conf_field(handle, conf, FLINKOVER, linkover,
+	    sizeof (linkover));
+	if (status != DLADM_STATUS_OK) {
+		attrp->dia_physlinkid = DATALINK_INVALID_LINKID;
+		goto done;
+	} else {
+		/* Get the IB Phys link's datalink ID */
+		if ((status = dladm_name2info(handle, linkover,
+		    &attrp->dia_physlinkid, NULL, NULL, NULL)) !=
+		    DLADM_STATUS_OK)
+			goto done;
+	}
+
+	/* Get the IB partition's P_Key */
+	status = dladm_get_conf_field(handle, conf, FPORTPKEY,
+	    &attrp->dia_pkey, sizeof (uint64_t));
+	if (status != DLADM_STATUS_OK)
+		goto done;
+
+	if (class != DATALINK_CLASS_PART) {
+		status = DLADM_STATUS_BADARG;
+		goto done;
+	}
+
+	/*
+	 * If the FFORCE field is set in the persistent configuration database
+	 * set the force create flag in the partition attributes.
+	 */
+	status = dladm_get_conf_field(handle, conf, FFORCE, &force,
+	    sizeof (boolean_t));
+	if (status != DLADM_STATUS_OK) {
+		if (status != DLADM_STATUS_NOTFOUND)
+			goto done;
+	} else if (force == B_TRUE) {
+		attrp->dia_flags |= DLADM_IBPART_FORCE_CREATE;
+	}
+
+	status = DLADM_STATUS_OK;
+done:
+	dladm_destroy_conf(handle, conf);
+	return (status);
+}
+
+/*
+ * Get the configuration information for the IB partition given by the datalink
+ * ID 'linkid'. Based on the 'flags' field the information is either from the
+ * active system (DLADM_OPT_ACTIVE) or from the persistent configuration
+ * database.
+ */
+dladm_status_t
+dladm_part_info(dladm_handle_t handle, datalink_id_t linkid,
+    dladm_part_attr_t *attrp, uint32_t flags)
+{
+	if (flags == DLADM_OPT_ACTIVE)
+		return (i_dladm_part_info_active(handle, linkid, attrp));
+	else if (flags == DLADM_OPT_PERSIST)
+		return (i_dladm_part_info_persist(handle, linkid, attrp));
+	else
+		return (DLADM_STATUS_BADARG);
+}
+
+/*
+ * Get the configuration information for the IB Phys link given by the datalink
+ * ID 'linkid'.
+ */
+/* ARGSUSED */
+dladm_status_t
+dladm_ib_info(dladm_handle_t handle, datalink_id_t linkid,
+    dladm_ib_attr_t *attrp, uint32_t flags)
+{
+	int instance;
+	ibport_ioctl_t ioc;
+	dladm_phys_attr_t	dpa;
+	dladm_status_t status = DLADM_STATUS_OK;
+
+	/*
+	 * We need to get the device name of the IB Phys link to get the
+	 * correct instance number of the IP over IB driver instance.
+	 */
+	if (dladm_phys_info(handle, linkid, &dpa, DLADM_OPT_ACTIVE)
+	    != DLADM_STATUS_OK)
+		return (DLADM_STATUS_BADARG);
+
+	/*
+	 * Get the instance number of the IP over IB driver instance which
+	 * represents this IB Phys link.
+	 */
+	instance = dladm_ibd_get_instance(dpa.dp_dev);
+	if (instance == -1)
+		return (DLADM_STATUS_FAILED);
+
+	bzero(&ioc, sizeof (ioc));
+	/*
+	 * The ioc_linkid here will contain IB port linkid here. We make the
+	 * first ioctl call to get the P_Key table size for this HCA port.
+	 */
+	ioc.ibdioc.ioc_linkid = linkid;
+	ioc.ibdioc.ioc_info_cmd = IBD_INFO_CMD_PKEYTBLSZ;
+	ioc.ioc_pkey_tbl_sz = 0;
+	ioc.ibdioc.ioc_port_inst = instance;
+
+	status = i_dladm_ib_ioctl(handle, IBD_INFO_IBPART, (ibd_ioctl_t *)&ioc);
+	if (status != DLADM_STATUS_OK)
+		return (status);
+
+	/*
+	 * Now allocate the memory for the P_Key table based on the table size
+	 * return by the ioctl.
+	 */
+	ioc.ioc_pkeys = calloc(sizeof (ib_pkey_t), ioc.ioc_pkey_tbl_sz);
+	if (ioc.ioc_pkeys == NULL) {
+		status = dladm_errno2status(errno);
+		goto bail;
+	}
+
+	/*
+	 * Call the ioctl again to get the P_Key table and other IB Phys link
+	 * attributes.
+	 */
+	ioc.ibdioc.ioc_linkid = linkid;
+	ioc.ibdioc.ioc_port_inst = instance;
+	ioc.ibdioc.ioc_info_cmd = IBD_INFO_CMD_IBPORT;
+
+	status = i_dladm_ib_ioctl(handle, IBD_INFO_IBPART, (ibd_ioctl_t *)&ioc);
+	if (status != DLADM_STATUS_OK)
+		goto bail;
+
+	attrp->dia_physlinkid = ioc.ibdioc.ioc_linkid;
+	attrp->dia_portnum = ioc.ibdioc.ioc_portnum;
+	attrp->dia_port_pkey_tbl_sz = ioc.ioc_pkey_tbl_sz;
+	attrp->dia_port_pkeys = ioc.ioc_pkeys;
+	attrp->dia_hca_guid = ioc.ibdioc.ioc_hcaguid;
+	attrp->dia_port_guid = ioc.ibdioc.ioc_portguid;
+	attrp->dia_instance = ioc.ibdioc.ioc_port_inst;
+	return (status);
+bail:
+	free(ioc.ioc_pkeys);
+	return (status);
+}
+
+/*
+ * Free the memory allocated for the IB HCA port's P_Key table by
+ * dladm_ib_info library call.
+ */
+void
+dladm_free_ib_info(dladm_ib_attr_t *attr)
+{
+	if (attr && attr->dia_port_pkeys)
+		free(attr->dia_port_pkeys);
+}
+
+/*
+ * Call into the IP over IB driver to create a partition object.
+ */
+static dladm_status_t
+i_dladm_part_create(dladm_handle_t handle, dladm_part_attr_t *pattr)
+{
+	ibpart_ioctl_t	ioc;
+	dladm_status_t	status = DLADM_STATUS_OK;
+
+	bzero(&ioc, sizeof (ioc));
+
+	/* IB Physical datalink ID */
+	ioc.ibdioc.ioc_linkid		= pattr->dia_physlinkid;
+	/* IB Partition datalink ID */
+	ioc.ioc_partid			= pattr->dia_partlinkid;
+	ioc.ioc_pkey			= pattr->dia_pkey;
+	ioc.ibdioc.ioc_port_inst	= pattr->dia_instance;
+	ioc.ioc_force_create		= ((pattr->dia_flags & DLADM_OPT_FORCE)
+	    != 0);
+
+	status = i_dladm_ib_ioctl(handle, IBD_CREATE_IBPART,
+	    (ibd_ioctl_t *)&ioc);
+	return (status);
+}
+
+/*
+ * Create an entry in the dladm persistent configuration database for the
+ * partition specified by pattr.
+ */
+dladm_status_t
+dladm_part_persist_conf(dladm_handle_t handle, dladm_part_attr_t *pattr)
+{
+
+	dladm_conf_t	conf;
+	dladm_status_t	status;
+	char 		linkover[MAXLINKNAMELEN];
+	uint64_t	u64;
+
+	status = dladm_create_conf(handle, pattr->dia_pname,
+	    pattr->dia_partlinkid, DATALINK_CLASS_PART, DL_IB, &conf);
+
+	if (status != DLADM_STATUS_OK)
+		return (status);
+
+	/*
+	 * Get the name of the IB Phys link over which this partition was
+	 * created.
+	 */
+	status = dladm_datalink_id2info(handle, pattr->dia_physlinkid,
+	    NULL, NULL, NULL, linkover, sizeof (linkover));
+	if (status != DLADM_STATUS_OK)
+		return (status);
+
+	/* Store IB Phys link name (linkover) */
+	status = dladm_set_conf_field(handle, conf, FLINKOVER, DLADM_TYPE_STR,
+	    linkover);
+	if (status != DLADM_STATUS_OK)
+		return (status);
+
+	u64 = pattr->dia_pkey;
+
+	/* Store the IB Partitions P_Key */
+	status = dladm_set_conf_field(handle, conf, FPORTPKEY,
+	    DLADM_TYPE_UINT64, &u64);
+	if (status != DLADM_STATUS_OK)
+		return (status);
+
+	if (pattr->dia_flags & DLADM_OPT_FORCE) {
+		boolean_t force = B_TRUE;
+		/* Store the force create flag. */
+		status = dladm_set_conf_field(handle, conf, FFORCE,
+		    DLADM_TYPE_BOOLEAN, &force);
+		if (status != DLADM_STATUS_OK)
+			goto done;
+	}
+
+	status = dladm_write_conf(handle, conf);
+	if (status != DLADM_STATUS_OK)
+		return (status);
+
+	dladm_destroy_conf(handle, conf);
+done:
+	return (status);
+}
+
+/*
+ * Create a new IB Partition datalink of name 'pname' over the IB Physical link
+ * given in 'physlinkid' with the P_key 'pkey' and return the datalink ID in
+ * 'partlinkid'. If the 'force' option is set in the 'flags' argument, the
+ * partition will be created even if the P_Key 'pkey' does not exist or if the
+ * HCA port represented by the IB Phys link is down. If the 'temporary' flag is
+ * set, then the configuration information is not added to the persistent
+ * database.
+ */
+dladm_status_t
+dladm_part_create(dladm_handle_t handle, datalink_id_t physlinkid,
+    ib_pkey_t pkey, uint32_t flags, char *pname, datalink_id_t *partlinkid,
+    dladm_arg_list_t *proplist)
+{
+	int			i;
+	dladm_status_t		status;
+	uint_t			media;
+	boolean_t		part_created = B_FALSE;
+	boolean_t		conf_set = B_FALSE;
+	dladm_phys_attr_t	dpa;
+	dladm_part_attr_t	pattr;
+
+	pattr.dia_pkey = pkey;
+	pattr.dia_physlinkid = physlinkid; /* IB Phys link's datalink id */
+	pattr.dia_flags = flags;
+	pattr.dia_pname = pname;
+
+	flags &= ~DLADM_OPT_FORCE;
+
+	/*
+	 * Check whether the PKEY is valid. If not, return immediately
+	 * Only full members are allowed as per the IPoIB specification
+	 */
+	if (pattr.dia_pkey <= IB_PKEY_INVALID_FULL)
+		return (DLADM_STATUS_INVALID_PKEY);
+
+	/*
+	 * Get the media type of the Phys link datalink ID provided and
+	 * make sure that it is Infiniband media DL_IB)
+	 */
+	if ((status = dladm_datalink_id2info(handle, pattr.dia_physlinkid, NULL,
+	    NULL, &media, NULL, 0)) != DLADM_STATUS_OK)
+		return (status);
+
+	if (media != DL_IB)
+		return (dladm_errno2status(ENOTSUP));
+
+	/*
+	 * Get the instance number of the IP over IB driver instance which the
+	 * IB Phys link 'physlinkid' over which we will be creating our IB
+	 * partition.
+	 */
+	if ((status = dladm_phys_info(handle, pattr.dia_physlinkid, &dpa,
+	    DLADM_OPT_ACTIVE)) != DLADM_STATUS_OK)
+		return (status);
+
+	pattr.dia_instance = dladm_ibd_get_instance(dpa.dp_dev);
+	if (pattr.dia_instance == -1)
+		return (DLADM_STATUS_FAILED);
+
+
+	if ((status = dladm_create_datalink_id(handle, pattr.dia_pname,
+	    DATALINK_CLASS_PART, DL_IB, flags, &pattr.dia_partlinkid)) !=
+	    DLADM_STATUS_OK)
+		return (status);
+
+	/*
+	 * Create the IB partition object.
+	 */
+	status = i_dladm_part_create(handle, &pattr);
+	if (status != DLADM_STATUS_OK)
+		goto done;
+
+	part_created = B_TRUE;
+
+	/*
+	 * If the persist flag is set then write this partition information
+	 * to the persistent configuration.
+	 */
+	if (pattr.dia_flags & DLADM_OPT_PERSIST) {
+		status = dladm_part_persist_conf(handle, &pattr);
+		if (status != DLADM_STATUS_OK)
+			goto done;
+		conf_set = B_TRUE;
+	}
+
+	/*
+	 * If the name-value pair list of properties were provided set those
+	 * properties over the datalink.
+	 */
+	if (proplist != NULL) {
+		for (i = 0; i < proplist->al_count; i++) {
+			dladm_arg_info_t *aip = &proplist->al_info[i];
+
+			status = dladm_set_linkprop(handle,
+			    pattr.dia_partlinkid, aip->ai_name, aip->ai_val,
+			    aip->ai_count, pattr.dia_flags);
+			if (status != DLADM_STATUS_OK)
+				break;
+		}
+	}
+done:
+	if (status != DLADM_STATUS_OK) {
+		if (conf_set)
+			(void) dladm_remove_conf(handle, pattr.dia_partlinkid);
+		if (part_created)
+			(void) i_dladm_part_delete(handle,
+			    pattr.dia_partlinkid);
+		(void) dladm_destroy_datalink_id(handle, pattr.dia_partlinkid,
+		    flags);
+	}
+
+	if (partlinkid != NULL)
+		*partlinkid = pattr.dia_partlinkid;
+
+	return (status);
+}
+
+/*
+ * Call into the IP over IB driver to delete the IB partition and free up all
+ * the resources allocated for it.
+ */
+static dladm_status_t
+i_dladm_part_delete(dladm_handle_t handle, datalink_id_t partid)
+{
+	ibpart_ioctl_t ioc;
+	dladm_status_t status = DLADM_STATUS_OK;
+
+	bzero(&ioc, sizeof (ioc));
+	ioc.ioc_partid = partid;
+	status = i_dladm_ib_ioctl(handle, IBD_DELETE_IBPART,
+	    (ibd_ioctl_t *)&ioc);
+	return (status);
+}
+
+/*
+ * Delete an IB partition if 'flags' contains the active flag. Update the
+ * persistent configuration if 'flags' contains the persist flag.
+ */
+dladm_status_t
+dladm_part_delete(dladm_handle_t handle, datalink_id_t partid, int flags)
+{
+	dladm_status_t	status = DLADM_STATUS_OK;
+	datalink_class_t class;
+
+	if (flags == 0)
+		return (DLADM_STATUS_BADARG);
+
+	/*
+	 * Make sure that the datalinkid provided is an IB partition class
+	 * datalink ID.
+	 */
+	if ((dladm_datalink_id2info(handle, partid, NULL, &class, NULL, NULL, 0)
+	    != DLADM_STATUS_OK))
+		return (DLADM_STATUS_BADARG);
+
+	if (class != DATALINK_CLASS_PART)
+		return (DLADM_STATUS_BADARG);
+
+	if ((flags & DLADM_OPT_ACTIVE) != 0) {
+		status = i_dladm_part_delete(handle, partid);
+		if (status == DLADM_STATUS_OK) {
+			(void) dladm_set_linkprop(handle, partid, NULL, NULL, 0,
+			    DLADM_OPT_ACTIVE);
+			(void) dladm_destroy_datalink_id(handle, partid,
+			    DLADM_OPT_ACTIVE);
+		} else if (status != DLADM_STATUS_NOTFOUND ||
+		    !(flags & DLADM_OPT_PERSIST)) {
+			return (status);
+		}
+	}
+
+	if ((flags & DLADM_OPT_PERSIST) != 0) {
+		dladm_status_t db_status;
+		db_status = dladm_remove_conf(handle, partid);
+
+		/*
+		 * A partition could have been temporarily deleted in which
+		 * case the delete of the active partition above would have
+		 * failed. In that case, we update the status to be returned
+		 * to that of the status returned for deleting the persistent
+		 * database entry.
+		 */
+		if (status == DLADM_STATUS_NOTFOUND)
+			status = db_status;
+
+		(void) dladm_destroy_datalink_id(handle, partid,
+		    DLADM_OPT_PERSIST);
+	}
+
+	return (status);
+}
+
+/*
+ * Call into the IP over IB driver to create the active instances of one or all
+ * IB partitions present in the persistent configuration.
+ */
+/* ARGSUSED */
+static int
+i_dladm_part_up(dladm_handle_t handle, datalink_id_t plinkid, void *arg)
+{
+	dladm_conf_t	conf;
+	datalink_id_t	linkid;
+	ib_pkey_t	pkey;
+	uint64_t	u64;
+	char linkover[MAXLINKNAMELEN];
+	dladm_status_t	status;
+	dladm_phys_attr_t dpa;
+	dladm_part_attr_t pattr;
+
+	/*
+	 * plinkid is the IB partition datalink's ID. Get an handle to the
+	 * persistent configuration entry for this datalink ID. If this datalink
+	 * ID is not present in the persistent configuration return.
+	 */
+	if ((status = dladm_read_conf(handle, plinkid, &conf)) !=
+	    DLADM_STATUS_OK)
+		return (status);
+
+	/*
+	 * Get the name of the IB Phys link over which this partition was
+	 * created.
+	 */
+	status = dladm_get_conf_field(handle, conf, FLINKOVER, linkover,
+	    sizeof (linkover));
+	if (status != DLADM_STATUS_OK)
+		goto done;
+
+	if ((status = dladm_name2info(handle, linkover, &linkid, NULL, NULL,
+	    NULL)) != DLADM_STATUS_OK)
+		goto done;
+
+	/*
+	 * Get the phys attribute of the IB Phys link to get the device name
+	 * associated with the phys link. We need this to get the IP over IB
+	 * driver instance number.
+	 */
+	if (dladm_phys_info(handle, linkid, &dpa, DLADM_OPT_ACTIVE)
+	    != DLADM_STATUS_OK)
+		goto done;
+
+	/* Get the IB partition's P_key */
+	status = dladm_get_conf_field(handle, conf, FPORTPKEY, &u64,
+	    sizeof (u64));
+	if (status != DLADM_STATUS_OK)
+		goto done;
+
+	pkey = (ib_pkey_t)u64;
+
+	/*
+	 * We always set the force flag during dladm_part_up because we want
+	 * the partition creation to succeed even if the IB HCA port over which
+	 * the partition is being created is still down. Since dladm_part_up
+	 * is usually invoked during early boot sequence, it is possible under
+	 * some IB subnet configurations for dladm_up_part to be called before
+	 * the IB link negotiation is completed and port state is set to active
+	 * and P_Key table is updated.
+	 */
+	pattr.dia_flags = DLADM_OPT_FORCE | DLADM_OPT_ACTIVE |
+	    DLADM_OPT_PERSIST;
+	/* IB Phys link's datalink ID. */
+	pattr.dia_physlinkid = linkid;
+	/* IB Partition's datalink ID. */
+	pattr.dia_partlinkid = plinkid;
+	pattr.dia_pkey = pkey;
+	pattr.dia_instance = dladm_ibd_get_instance(dpa.dp_dev);
+	if (pattr.dia_instance == -1)
+		return (DLADM_WALK_CONTINUE);
+
+	/* Create the active IB Partition object. */
+	if (i_dladm_part_create(handle, &pattr) == DLADM_STATUS_OK &&
+	    dladm_up_datalink_id(handle, plinkid) != DLADM_STATUS_OK)
+			(void) i_dladm_part_delete(handle, linkid);
+
+done:
+	dladm_destroy_conf(handle, conf);
+	return (DLADM_WALK_CONTINUE);
+}
+
+/*
+ * Bring up one or all IB partition(s) present in the persistent configuration
+ * database. If we need to bring up one IB Partition, its datalink ID is
+ * provided in 'linkid'.
+ */
+/* ARGSUSED */
+dladm_status_t
+dladm_part_up(dladm_handle_t handle, datalink_id_t linkid, uint32_t flags)
+{
+	dladm_status_t status = DLADM_STATUS_OK;
+
+	if (linkid == DATALINK_ALL_LINKID) {
+		(void) dladm_walk_datalink_id(i_dladm_part_up, handle,
+		    &status, DATALINK_CLASS_PART, DATALINK_ANY_MEDIATYPE,
+		    DLADM_OPT_PERSIST);
+		return (DLADM_STATUS_OK);
+	} else {
+		(void) i_dladm_part_up(handle, linkid, &status);
+		return (status);
+	}
+}
+
+static int
+dladm_ibd_get_instance(char *devname)
+{
+	int instance;
+
+	/*
+	 * The devname contains the driver name followed by the instance
+	 * number. Lets just skip the driver name and get the instance. We use
+	 * strlen of ibp here to get the driver name length.
+	 */
+	instance = atoi(devname + strlen("ibp"));
+
+	return (instance);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/libdladm/common/libdlib.h	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,78 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _LIBDLIB_H
+#define	_LIBDLIB_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ib/ib_types.h>
+
+#define	MAXPKEYSTRSZ	968
+#define	MAXPKEYLEN 6
+#define	IBGUIDSTRLEN 16
+#define	IBPORTSTRLEN 5
+
+#define	DLADM_IBPART_FORCE_CREATE 0x1
+
+typedef struct dladm_ib_attr_s {
+	datalink_id_t	dia_physlinkid;	/* IB Phys link datalink ID */
+	datalink_id_t	dia_partlinkid;	/* IB Partition datalink ID */
+	ib_pkey_t	dia_pkey;	/* IB partitions P_Key */
+	uint32_t	dia_flags;
+	char		*dia_devname;	/* IB Phys link's device name */
+	char		*dia_pname;	/* IB partition's name */
+	uint_t		dia_portnum;	/* IB Phys link's HCA port number */
+	int		dia_instance;	/* IP over IB driver instance number */
+	ib_guid_t	dia_hca_guid;	/* IB HCA GUID */
+	ib_guid_t	dia_port_guid;	/* IB HCA Port GUID */
+	uint_t		dia_port_pkey_tbl_sz;
+	ib_pkey_t	*dia_port_pkeys;	/* Ptr to the P_Key table */
+} dladm_ib_attr_t;
+
+typedef struct dladm_ib_attr_s dladm_part_attr_t;
+
+typedef enum {
+	DLADM_IBPART_UD_MODE = 0,
+	DLADM_IBPART_CM_MODE
+} dladm_ibpart_linkmode_t;
+
+extern dladm_status_t dladm_part_create(dladm_handle_t, datalink_id_t,
+    ib_pkey_t, uint32_t, char *, datalink_id_t *, dladm_arg_list_t *);
+extern dladm_status_t dladm_part_delete(dladm_handle_t, datalink_id_t, int);
+extern dladm_status_t dladm_part_up(dladm_handle_t, datalink_id_t, uint32_t);
+extern dladm_status_t dladm_part_info(dladm_handle_t, datalink_id_t,
+    dladm_part_attr_t *, uint32_t);
+extern dladm_status_t dladm_ib_info(dladm_handle_t, datalink_id_t,
+    dladm_ib_attr_t *, uint32_t);
+extern void dladm_free_ib_info(dladm_ib_attr_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _LIBDLIB_H */
--- a/usr/src/lib/libdladm/common/libdllink.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/libdllink.c	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -40,6 +39,7 @@
 #include <libdlaggr.h>
 #include <libdlvlan.h>
 #include <libdlvnic.h>
+#include <libdlib.h>
 #include <libdllink.h>
 #include <libdlmgmt.h>
 #include <libdladm_impl.h>
@@ -691,6 +691,22 @@
 }
 
 static int
+i_dladm_part_link_del(dladm_handle_t handle, datalink_id_t partid, void *arg)
+{
+	consumer_del_phys_arg_t	*del_arg = arg;
+	dladm_part_attr_t	pinfo;
+	dladm_status_t		status;
+
+	status = dladm_part_info(handle, partid, &pinfo, DLADM_OPT_PERSIST);
+	if (status != DLADM_STATUS_OK)
+		return (DLADM_WALK_CONTINUE);
+
+	if (pinfo.dia_physlinkid == del_arg->linkid)
+		(void) dladm_part_delete(handle, partid, DLADM_OPT_PERSIST);
+	return (DLADM_WALK_CONTINUE);
+}
+
+static int
 i_dladm_aggr_link_del(dladm_handle_t handle, datalink_id_t aggrid, void *arg)
 {
 	consumer_del_phys_arg_t		*del_arg = arg;
@@ -766,6 +782,10 @@
 		(void) dladm_walk_datalink_id(i_dladm_vlan_link_del, handle,
 		    &del_arg, DATALINK_CLASS_VLAN, DATALINK_ANY_MEDIATYPE,
 		    DLADM_OPT_PERSIST);
+	} else if (media == DL_IB) {
+		del_arg.linkid = linkid;
+		(void) dladm_walk_datalink_id(i_dladm_part_link_del, handle,
+		    &del_arg, DATALINK_CLASS_PART, DL_IB, DLADM_OPT_PERSIST);
 	}
 
 	(void) dladm_remove_conf(handle, linkid);
--- a/usr/src/lib/libdladm/common/linkprop.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/linkprop.c	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <stdlib.h>
@@ -42,6 +41,7 @@
 #include <libdlwlan.h>
 #include <libdlvlan.h>
 #include <libdlvnic.h>
+#include <libdlib.h>
 #include <libintl.h>
 #include <dlfcn.h>
 #include <link.h>
@@ -150,7 +150,7 @@
 			get_bridge_pvid, get_protection, get_rxrings,
 			get_txrings, get_cntavail,
 			get_allowedips, get_allowedcids, get_pool,
-			get_rings_range;
+			get_rings_range, get_linkmode_prop;
 
 static pd_setf_t	set_zone, set_rate, set_powermode, set_radio,
 			set_public_prop, set_resource, set_stp_prop,
@@ -360,6 +360,8 @@
 
 	{ MAC_PROP_MAX_TXHWCLNT_AVAIL,	sizeof (uint_t), "txhwclnt-available"},
 
+	{ MAC_PROP_IB_LINKMODE,	sizeof (uint32_t),	"linkmode"},
+
 	{ MAC_PROP_PRIVATE,	0,			"driver-private"}
 };
 
@@ -431,6 +433,11 @@
 	{ "auto",	P2P_AUTO		}
 };
 
+static  val_desc_t	dladm_ibpart_linkmode_vals[] = {
+	{ "cm",		DLADM_IBPART_CM_MODE	},
+	{ "ud",		DLADM_IBPART_UD_MODE	},
+};
+
 #define	VALCNT(vals)    (sizeof ((vals)) / sizeof (val_desc_t))
 #define	RESET_VAL	((uintptr_t)-1)
 #define	UNSPEC_VAL	((uintptr_t)-2)
@@ -453,6 +460,11 @@
 	    get_radio, NULL, 0,
 	    DATALINK_CLASS_PHYS, DL_WIFI },
 
+	{ "linkmode",	{ "cm", DLADM_IBPART_CM_MODE },
+	    dladm_ibpart_linkmode_vals, VALCNT(dladm_ibpart_linkmode_vals),
+	    set_public_prop, NULL, get_linkmode_prop, NULL, 0,
+	    DATALINK_CLASS_PART, DL_IB },
+
 	{ "speed",	{ "", 0 }, NULL, 0,
 	    set_rate, get_rate_mod,
 	    get_rate, check_rate, 0,
@@ -4436,3 +4448,35 @@
 		free(buf);
 	return (status);
 }
+
+/* ARGSUSED */
+static dladm_status_t
+get_linkmode_prop(dladm_handle_t handle, prop_desc_t *pdp,
+    datalink_id_t linkid, char **prop_val, uint_t *val_cnt,
+    datalink_media_t media, uint_t flags, uint_t *perm_flags)
+{
+	char			*s;
+	uint32_t		v;
+	dladm_status_t		status;
+
+	status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags,
+	    perm_flags, &v, sizeof (v));
+	if (status != DLADM_STATUS_OK)
+		return (status);
+
+	switch (v) {
+	case DLADM_IBPART_CM_MODE:
+		s = "cm";
+		break;
+	case DLADM_IBPART_UD_MODE:
+		s = "ud";
+		break;
+	default:
+		s = "";
+		break;
+	}
+	(void) snprintf(prop_val[0], DLADM_STRSIZE, "%s", s);
+
+	*val_cnt = 1;
+	return (DLADM_STATUS_OK);
+}
--- a/usr/src/lib/libdladm/common/llib-ldladm	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/llib-ldladm	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*LINTLIBRARY*/
@@ -31,6 +30,7 @@
 #include <libdlwlan.h>
 #include <libdlvnic.h>
 #include <libdlvlan.h>
+#include <libdlib.h>
 #include <libdliptun.h>
 #include <libdlmgmt.h>
 #include <libdlflow.h>
--- a/usr/src/lib/libdladm/common/mapfile-vers	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/libdladm/common/mapfile-vers	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 #
@@ -256,6 +255,13 @@
 	dladm_flow_stat_free;
 	dladm_flow_stat_query_all;
 	dladm_flow_stat_query_all_free;
+
+	dladm_part_create;
+	dladm_part_delete;
+	dladm_part_info;
+	dladm_part_up;
+	dladm_ib_info;
+	dladm_free_ib_info;
     local:
 	*;
 };
--- a/usr/src/lib/udapl/udapl_tavor/Makefile.com	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/udapl/udapl_tavor/Makefile.com	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
 #
 
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 LIBRARY=       	udapl_tavor.a 
@@ -133,7 +132,7 @@
 include $(SRC)/lib/Makefile.lib
 
 LIBS =		$(DYNLIB)
-LDLIBS +=	-ldevinfo -lsocket -lnsl -ldat -lc
+LDLIBS +=	-ldevinfo -lsocket -lnsl -ldat -lc -ldladm
 
 SRCDIR =	../common
 TAVORSRCDIR =	../tavor
--- a/usr/src/lib/udapl/udapl_tavor/common/dapl_name_service.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/udapl/udapl_tavor/common/dapl_name_service.c	Wed Apr 14 10:26:18 2010 -0700
@@ -24,8 +24,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -299,7 +298,6 @@
  *	DAT_INTERNAL_ERROR
  */
 
-#define	IBD_NAME	"ibd"
 #define	NS_MAX_RETRIES	60
 
 DAT_RETURN
@@ -464,9 +462,7 @@
 	}
 	(void) dapl_os_memzero(&lifr, sizeof (lifr));
 	(void) dapl_os_memcpy(&lifr.lifr_nd.lnr_addr, addr, sizeof (*addr));
-	(void) dapl_os_strcpy(lifr.lifr_name, IBD_NAME);
-	(void) sprintf(&lifr.lifr_name[dapl_os_strlen(IBD_NAME)], "%d",
-	    ia_ptr->hca_ptr->hca_ibd_inst);
+	(void) dapl_os_strcpy(lifr.lifr_name, ia_ptr->hca_ptr->name);
 
 again:;
 	if (ioctl(s, SIOCLIFGETND, (caddr_t)&lifr) < 0)  {
@@ -550,9 +546,7 @@
 	int			retval;
 	uint32_t		netmask, netaddr, netaddr_dest;
 
-	(void) dapl_os_strcpy(lifreq.lifr_name, IBD_NAME);
-	(void) sprintf(&lifreq.lifr_name[dapl_os_strlen(IBD_NAME)], "%d",
-	    ia_ptr->hca_ptr->hca_ibd_inst);
+	(void) dapl_os_strcpy(lifreq.lifr_name, ia_ptr->hca_ptr->name);
 
 	retval = ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifreq);
 	if (retval < 0) {
@@ -597,9 +591,7 @@
 	uchar_t			*netmask, *local_addr, *dest_addr;
 	int			i, retval;
 
-	(void) dapl_os_strcpy(lifreq.lifr_name, IBD_NAME);
-	(void) sprintf(&lifreq.lifr_name[dapl_os_strlen(IBD_NAME)], "%d",
-	    ia_ptr->hca_ptr->hca_ibd_inst);
+	(void) dapl_os_strcpy(lifreq.lifr_name, ia_ptr->hca_ptr->name);
 
 	retval = ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifreq);
 	if (retval < 0) {
--- a/usr/src/lib/udapl/udapl_tavor/include/dapl.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/udapl/udapl_tavor/include/dapl.h	Wed Apr 14 10:26:18 2010 -0700
@@ -24,8 +24,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -327,7 +326,6 @@
 	ib_uint32_t partition_max;
 	ib_uint32_t partition_key;
 	ib_uint32_t tavor_idx;
-	ib_uint32_t hca_ibd_inst;
 	ib_guid_t node_GUID;
 	ib_lid_t lid;
 	int max_inline_send;
--- a/usr/src/lib/udapl/udapl_tavor/tavor/dapl_tavor_hca.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/lib/udapl/udapl_tavor/tavor/dapl_tavor_hca.c	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -36,14 +35,18 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
-#include <libdevinfo.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <libdladm.h>
+#include <libdlib.h>
+#include <libdllink.h>
+#include <sys/ib/ibnex/ibnex_devctl.h>
 
 #include "dapl.h"
 #include "dapl_adapter_util.h"
 #include "dapl_tavor_ibtf_impl.h"
 #include "dapl_hca_util.h"
 #include "dapl_name_service.h"
-#define	IF_NAME			"ibd"
 #define	MAX_HCAS		64
 #define	PROP_HCA_GUID		"hca-guid"
 #define	PROP_PORT_NUM		"port-number"
@@ -52,9 +55,9 @@
 #define	DEVDAPLT		"/dev/daplt"
 
 /* function prototypes */
-static DAT_RETURN dapli_process_tavor_node(di_node_t node, int *hca_idx,
+static DAT_RETURN dapli_process_tavor_node(char *dev_path, int *hca_idx,
     int try_blueflame);
-static DAT_RETURN dapli_process_ibd_node(di_node_t node, DAPL_HCA *hca_ptr,
+static DAT_RETURN dapli_process_ia(dladm_ib_attr_t *ib_attr, DAPL_HCA *hca_ptr,
     int hca_idx);
 
 #if defined(IBHOSTS_NAMING)
@@ -70,63 +73,75 @@
 dapli_init_hca(
 	IN   DAPL_HCA			*hca_ptr)
 {
-	di_node_t	root_node;
-	di_node_t	hca_node;
-	di_node_t	ibd_node;
-	DAT_RETURN	dat_status = DAT_SUCCESS;
-	int		hca_idx = 0;
-	int		ia_instance;
-	int		check_for_bf = 0;
+	DAT_RETURN		dat_status = DAT_SUCCESS;
+	int			hca_idx = 0;
+	int			check_for_bf = 0;
+	datalink_class_t	class;
+	datalink_id_t		linkid;
+	dladm_ib_attr_t		ib_attr;
+	ibnex_ctl_query_hca_t	query_hca;
+	int			ibnex_fd = -1;
+	dladm_handle_t		dlh;
+	char			hca_device_path[MAXPATHLEN];
 
-	ia_instance = (int)dapl_os_strtol(hca_ptr->name + strlen(IF_NAME),
-	    NULL, 0);
-
-	root_node = di_init("/", DINFOCPYALL);
-	if (root_node == DI_NODE_NIL) {
+	if (dladm_open(&dlh) != DLADM_STATUS_OK) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-		    "init_hca: di_init failed %s\n", strerror(errno));
+		    "init_hca: dladm_open failed\n");
 		return (DAT_INTERNAL_ERROR);
 	}
 
-	ibd_node = di_drv_first_node(IF_NAME, root_node);
-	while (ibd_node != DI_NODE_NIL) {
-		/* find the ibd node matching our ianame */
-		if (di_instance(ibd_node) == ia_instance) {
-			break;
-		}
-		ibd_node = di_drv_next_node(ibd_node);
-	}
-
-	if (ibd_node == DI_NODE_NIL) {
+	if ((ibnex_fd = open(IBNEX_DEVCTL_DEV, O_RDONLY)) < 0) {
 		dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0);
-		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-		    "init_hcas: ibd%d di_node not found\n", ia_instance);
+		dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+		    "init_hca: could not open ib nexus (%s)\n",
+		    strerror(errno));
 		goto bail;
 	}
 
-	hca_node = di_parent_node(ibd_node);
-	if ((hca_node != DI_NODE_NIL) && (strncmp(di_driver_name(hca_node),
-	    "tavor", strlen("tavor")) == 0))
+	if ((dladm_name2info(dlh, hca_ptr->name, &linkid, NULL, &class,
+	    NULL) != DLADM_STATUS_OK) ||
+	    (class != DATALINK_CLASS_PART) ||
+	    (dladm_part_info(dlh, linkid, &ib_attr,
+	    DLADM_OPT_ACTIVE) != DLADM_STATUS_OK)) {
+		dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0);
+		dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+		    "init_hca: %s not found - couldn't get partition info\n",
+		    hca_ptr->name);
+		goto bail;
+	}
+
+	bzero(&query_hca, sizeof (query_hca));
+	query_hca.hca_guid = ib_attr.dia_hca_guid;
+	query_hca.hca_device_path = hca_device_path;
+	query_hca.hca_device_path_alloc_sz = sizeof (hca_device_path);
+	if (ioctl(ibnex_fd, IBNEX_CTL_QUERY_HCA, &query_hca) == -1) {
+		dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0);
+		dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+		    "init_hca: %s not found; query_hca failed\n",
+		    hca_ptr->name);
+		goto bail;
+	}
+
+	if (strcmp(query_hca.hca_info.hca_driver_name, "tavor") == 0)
 		dapls_init_funcs_tavor(hca_ptr);
-	else if ((hca_node != DI_NODE_NIL) && (strncmp(di_driver_name
-	    (hca_node), "arbel", strlen("arbel")) == 0))
+	else if (strcmp(query_hca.hca_info.hca_driver_name, "arbel") == 0)
 		dapls_init_funcs_arbel(hca_ptr);
-	else if ((hca_node != DI_NODE_NIL) && (strncmp(di_driver_name
-	    (hca_node), "hermon", strlen("hermon")) == 0)) {
+	else if (strcmp(query_hca.hca_info.hca_driver_name, "hermon") == 0) {
 		dapls_init_funcs_hermon(hca_ptr);
 		check_for_bf = 1;
 	} else {
 		dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0);
 		dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-		    "init_hcas: ibd%d hca_node not found\n", ia_instance);
+		    "init_hca: %s not found\n", hca_ptr->name);
 		goto bail;
 	}
 
-	dat_status = dapli_process_tavor_node(hca_node, &hca_idx, check_for_bf);
+	dat_status = dapli_process_tavor_node(hca_device_path, &hca_idx,
+	    check_for_bf);
 	if (dat_status != DAT_SUCCESS) {
 		dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-		    "init_hcas: ibd%d process_tavor_node failed(0x%x)\n",
-		    ia_instance, dat_status);
+		    "init_hcas: %s process_tavor_node failed(0x%x)\n",
+		    hca_ptr->name, dat_status);
 		goto bail;
 	}
 
@@ -136,27 +151,28 @@
 		dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0);
 	}
 #else
-	dat_status = dapli_process_ibd_node(ibd_node, hca_ptr, hca_idx);
+	dat_status = dapli_process_ia(&ib_attr, hca_ptr, hca_idx);
 #endif
 	if (dat_status != DAT_SUCCESS) {
 		dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-		    "init_hcas: ibd%d process_ibd_node failed(0x%x)\n",
-		    ia_instance, dat_status);
+		    "init_hcas: %s process_ia failed(0x%x)\n",
+		    hca_ptr->name, dat_status);
 		goto bail;
 	}
 
 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-	    "init_hcas: done ibd%d\n", ia_instance);
+	    "init_hcas: done %s\n", hca_ptr->name);
 
 bail:
-	di_fini(root_node);
+	if (ibnex_fd != -1)
+		(void) close(ibnex_fd);
+	dladm_close(dlh);
 	return (dat_status);
 }
 
 static DAT_RETURN
-dapli_process_tavor_node(di_node_t node, int *hca_idx, int try_blueflame)
+dapli_process_tavor_node(char *dev_path, int *hca_idx, int try_blueflame)
 {
-	char		*dev_path;
 	char		path_buf[MAXPATHLEN];
 	int		i, idx, fd;
 #ifndef _LP64
@@ -183,7 +199,6 @@
 		dapl_os_unlock(&g_tavor_state_lock);
 		return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
 	}
-	dev_path = di_devfs_path(node);
 
 	for (i = 0; i < idx; i++) {
 		if (strcmp(dev_path, g_tavor_state[i].hca_path) == 0) {
@@ -199,7 +214,6 @@
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
 		    "process_tavor: devfs path %s is too long\n",
 		    dev_path);
-		di_devfs_path_free(dev_path);
 		dapl_os_unlock(&g_tavor_state_lock);
 		return (DAT_ERROR(DAT_INTERNAL_ERROR, 0));
 	}
@@ -207,7 +221,6 @@
 	(void) dapl_os_strcat(path_buf, dev_path);
 	(void) dapl_os_strcat(path_buf, ":devctl");
 	(void) dapl_os_strcpy(g_tavor_state[idx].hca_path, dev_path);
-	di_devfs_path_free(dev_path);
 
 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
 	if (pagesize == 0) {
@@ -294,49 +307,18 @@
 }
 
 static DAT_RETURN
-dapli_process_ibd_node(di_node_t node, DAPL_HCA *hca_ptr, int hca_idx)
+dapli_process_ia(dladm_ib_attr_t *ib_attr, DAPL_HCA *hca_ptr, int hca_idx)
 {
-	di_prop_t	prop;
-	ib_guid_t	hca_guid = 0;
 	struct lifreq	lifreq;
-	uint32_t	port_num = 0;
-	uint32_t	partition_key = 0;
-	int		instance, sfd, retval, af;
-	int		tmp;
-	int		digits;
-	char		*drv_name;
+	int		sfd, retval, af;
 	char		addr_buf[64];
 
-	prop = di_prop_next(node, DI_PROP_NIL);
-	while (prop != DI_PROP_NIL) {
-		char		*prop_name;
-		uchar_t		*bytep;
-		int		*intp, count;
-
-		prop_name = di_prop_name(prop);
-		count = 0;
-
-		if (strcmp(prop_name, PROP_HCA_GUID) == 0) {
-			count = di_prop_bytes(prop, &bytep);
-			dapl_os_assert(count == sizeof (ib_guid_t));
-			(void) dapl_os_memcpy((void *)&hca_guid, (void *)bytep,
-			    sizeof (ib_guid_t));
-		} else if (strcmp(prop_name, PROP_PORT_NUM) == 0) {
-			count = di_prop_ints(prop, &intp);
-			dapl_os_assert(count == 1);
-			port_num = (uint32_t)intp[0];
-		} else if (strcmp(prop_name, PROP_PORT_PKEY) == 0) {
-			count = di_prop_ints(prop, &intp);
-			dapl_os_assert(count == 1);
-			partition_key = (uint32_t)intp[0];
-		}
-		prop = di_prop_next(node, prop);
-	}
-	if (hca_guid == 0 || port_num == 0 || partition_key == 0) {
+	if (ib_attr->dia_hca_guid == 0 || ib_attr->dia_portnum == 0 ||
+	    ib_attr->dia_pkey == 0) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-		    "process_ibd: invalid properties: guid 0x%016llx, "
-		    "port %d, pkey 0x%08x\n", hca_guid, port_num,
-		    partition_key);
+		    "process_ia: invalid properties: guid 0x%016llx, "
+		    "port %d, pkey 0x%08x\n", ib_attr->dia_hca_guid,
+		    ib_attr->dia_portnum, (uint_t)ib_attr->dia_pkey);
 		return (DAT_ERROR(DAT_INVALID_PARAMETER, 0));
 	}
 
@@ -349,31 +331,20 @@
 	sfd = socket(af, SOCK_DGRAM, 0);
 	if (sfd < 0) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-		    "process_ibd: socket failed: %s\n", strerror(errno));
+		    "process_ia: socket failed: %s\n", strerror(errno));
 		return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
 	}
-	instance = di_instance(node);
-	drv_name = di_driver_name(node);
 
-	/* calculate the number of digits in instance */
-	tmp = instance;
-	digits = 0;
-	do {
-		tmp = tmp / 10;
-		digits++;
-	} while (tmp > 0);
 	/* check if name will fit in lifr_name */
-	if (dapl_os_strlen(drv_name) + digits +  1 > LIFNAMSIZ) {
+	if (dapl_os_strlen(hca_ptr->name) >= LIFNAMSIZ) {
 		(void) close(sfd);
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-		    "process_ibd: if name overflow %s:%d\n",
-		    drv_name, instance);
+		    "process_ia: if name overflow %s\n",
+		    hca_ptr->name);
 		return (DAT_ERROR(DAT_INVALID_PARAMETER, 0));
 	}
 
-	(void) dapl_os_strcpy(lifreq.lifr_name, drv_name);
-	(void) sprintf(&lifreq.lifr_name[dapl_os_strlen(drv_name)], "%d",
-	    instance);
+	(void) dapl_os_strcpy(lifreq.lifr_name, hca_ptr->name);
 	retval = ioctl(sfd, SIOCGLIFADDR, (caddr_t)&lifreq);
 	if (retval < 0) {
 		(void) close(sfd);
@@ -382,7 +353,7 @@
 			 * the interface is not plumbed.
 			 */
 			dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-			    "process_ibd: %s: ip address not found\n",
+			    "process_ia: %s: ip address not found\n",
 			    lifreq.lifr_name);
 			return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
 		} else {
@@ -396,19 +367,18 @@
 	}
 	(void) close(sfd);
 
-	hca_ptr->hca_ibd_inst = instance;
 	hca_ptr->tavor_idx = hca_idx;
-	hca_ptr->node_GUID = hca_guid;
-	hca_ptr->port_num = port_num;
-	hca_ptr->partition_key = partition_key;
+	hca_ptr->node_GUID = ib_attr->dia_hca_guid;
+	hca_ptr->port_num = ib_attr->dia_portnum;
+	hca_ptr->partition_key = ib_attr->dia_pkey;
 	(void) dapl_os_memcpy((void *)&hca_ptr->hca_address,
 	    (void *)&lifreq.lifr_addr, sizeof (hca_ptr->hca_address));
 	hca_ptr->max_inline_send = dapls_tavor_max_inline();
 
 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-	    "process_ibd: interface %s, hca guid 0x%016llx, port %d, "
-	    "pkey 0x%08x, ip addr %s\n", lifreq.lifr_name, hca_guid,
-	    port_num, partition_key, dapls_inet_ntop(
+	    "process_ia: interface %s, hca guid 0x%016llx, port %d, "
+	    "pkey 0x%08x, ip addr %s\n", lifreq.lifr_name, hca_ptr->node_GUID,
+	    hca_ptr->port_num, hca_ptr->partition_key, dapls_inet_ntop(
 	    (struct sockaddr *)&hca_ptr->hca_address, addr_buf, 64));
 	return (DAT_SUCCESS);
 }
@@ -674,7 +644,6 @@
 		(void) sprintf(line_buf, "%s-ib%d", localhost, count + 1);
 		if (strncmp(line_buf, host_buf, strlen(line_buf)) == 0) {
 			guid &= 0xfffffffffffffff0;
-			hca_ptr->hca_ibd_inst = count + 1;
 			hca_ptr->tavor_idx = hca_idx;
 			hca_ptr->node_GUID = guid;
 			hca_ptr->port_num = count + 1;
--- a/usr/src/pkg/manifests/SUNWcs.mf	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/pkg/manifests/SUNWcs.mf	Wed Apr 14 10:26:18 2010 -0700
@@ -1171,6 +1171,7 @@
 $(sparc_ONLY)file path=usr/lib/rcm/modules/SUNW_ttymux_rcm.so mode=0555
 file path=usr/lib/rcm/modules/SUNW_vlan_rcm.so mode=0555
 file path=usr/lib/rcm/modules/SUNW_vnic_rcm.so mode=0555
+file path=usr/lib/rcm/modules/SUNW_ibpart_rcm.so mode=0555
 file path=usr/lib/rcm/rcm_daemon mode=0555
 file path=usr/lib/reparse/reparsed group=sys mode=0555
 file path=usr/lib/saf/listen group=sys mode=0755
--- a/usr/src/pkg/manifests/driver-network-ib.mf	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/pkg/manifests/driver-network-ib.mf	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
 #
 
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 #
@@ -45,7 +44,7 @@
 file path=kernel/drv/$(ARCH64)/ib group=sys
 $(i386_ONLY)file path=kernel/drv/ib group=sys
 file path=kernel/drv/ib.conf group=sys original_name=SUNWib:kernel/drv/ib.conf \
-    preserve=true reboot-needed=false
+    preserve=renameold reboot-needed=false
 file path=kernel/misc/$(ARCH64)/ibcm group=sys mode=0755 reboot-needed=true
 file path=kernel/misc/$(ARCH64)/ibdm group=sys mode=0755 reboot-needed=true
 file path=kernel/misc/$(ARCH64)/ibmf group=sys mode=0755 reboot-needed=true
--- a/usr/src/pkg/manifests/driver-network-ibd.mf	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/pkg/manifests/driver-network-ibd.mf	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
 #
 
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 #
@@ -30,24 +29,8 @@
 # will only be installed into the global zone.
 #
 <include hollow_zone_pkg>
-set name=pkg.fmri value=pkg:/driver/network/ibd@$(PKGVERS)
-set name=pkg.description value="Sun IP over InfiniBand"
-set name=pkg.summary value="Sun IP over InfiniBand"
-set name=info.classification value=org.opensolaris.category.2008:System/Hardware
+set name=pkg.fmri value=pkg:/driver/network/[email protected],5.11-0.139
+set name=pkg.renamed value=true
 set name=variant.arch value=$(ARCH)
 set name=variant.opensolaris.zone value=global value=nonglobal
-dir path=kernel group=sys
-dir path=kernel/drv group=sys
-dir path=kernel/drv/$(ARCH64) group=sys
-driver name=ibd alias=ib.ipib clone_perms="ibd 0666 root sys" \
-    perms="* 0666 root sys"
-file path=kernel/drv/$(ARCH64)/ibd group=sys
-$(i386_ONLY)file path=kernel/drv/ibd group=sys
-file path=kernel/drv/ibd.conf group=sys \
-    original_name=SUNWipoib:kernel/drv/ibd.conf preserve=renamenew
-legacy pkg=SUNWipoib arch=$(ARCH) category=system desc="Sun IP over InfiniBand" \
-    hotline="Please contact your local service provider" \
-    name="Sun IP over InfiniBand" vendor="Sun Microsystems, Inc." \
-    version=11.11,REV=2009.11.11
-license cr_Sun license=cr_Sun
-license lic_CDDL license=lic_CDDL
+depend fmri=pkg:/driver/network/[email protected],5.11-0.139 type=require
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkg/manifests/driver-network-ibp.mf	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,66 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+#
+# This package will install successfully into any zone, global or
+# non-global.  The files, directories, links, and hardlinks, however,
+# will only be installed into the global zone.
+#
+<include hollow_zone_pkg>
+set name=pkg.fmri value=pkg:/driver/network/ibp@$(PKGVERS)
+set name=pkg.description value="Sun IP over InfiniBand"
+set name=pkg.summary value="Sun IP over InfiniBand"
+set name=info.classification value=org.opensolaris.category.2008:System/Hardware
+set name=variant.arch value=$(ARCH)
+set name=variant.opensolaris.zone value=global value=nonglobal
+
+dir path=kernel group=sys
+dir path=kernel/drv group=sys
+dir path=kernel/drv/$(ARCH64) group=sys
+driver name=ibp alias=ib.ipib clone_perms="ibp 0666 root sys" \
+    perms="* 0666 root sys"
+file path=kernel/drv/$(ARCH64)/ibp group=sys
+$(i386_ONLY)file path=kernel/drv/ibp group=sys
+file path=kernel/drv/ibp.conf group=sys \
+    original_name=SUNWipoib:kernel/drv/ibd.conf preserve=renameold
+
+dir path=lib
+dir path=lib/svc
+dir path=lib/svc/method
+file path=lib/svc/method/ibd-post-upgrade mode=0555
+dir path=lib/svc/manifest group=sys
+dir path=lib/svc/manifest/network group=sys
+file path=lib/svc/manifest/network/ibd-post-upgrade.xml group=sys mode=0444
+
+dir path=sbin group=sys
+file path=sbin/ibd_upgrade mode=0555
+file path=sbin/ibd_delete_link mode=0555
+
+legacy pkg=SUNWipoib arch=$(ARCH) category=system desc="Sun IP over InfiniBand" \
+    hotline="Please contact your local service provider" \
+    name="Sun IP over InfiniBand" vendor="Sun Microsystems, Inc." \
+    version=11.11,REV=2009.11.11
+license cr_Sun license=cr_Sun
+license lic_CDDL license=lic_CDDL
--- a/usr/src/pkg/manifests/system-header.mf	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/pkg/manifests/system-header.mf	Wed Apr 14 10:26:18 2010 -0700
@@ -474,6 +474,7 @@
 file path=usr/include/libdllink.h
 file path=usr/include/libdlpi.h
 file path=usr/include/libdlvlan.h
+file path=usr/include/libdlib.h
 file path=usr/include/libelf.h
 $(i386_ONLY)file path=usr/include/libfdisk.h
 file path=usr/include/libfstyp.h
@@ -1071,6 +1072,7 @@
 $(i386_ONLY)file path=usr/include/sys/i8272A.h
 file path=usr/include/sys/ia.h
 file path=usr/include/sys/iapriocntl.h
+file path=usr/include/sys/ibpart.h
 file path=usr/include/sys/ib/adapters/hermon/hermon_ioctl.h
 file path=usr/include/sys/ib/adapters/tavor/tavor_ioctl.h
 file path=usr/include/sys/ib/clients/ibd/ibd.h
--- a/usr/src/tools/scripts/bfu.sh	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/tools/scripts/bfu.sh	Wed Apr 14 10:26:18 2010 -0700
@@ -21,8 +21,7 @@
 #
 
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 # Upgrade a machine from a cpio archive area in about 5 minutes.
 # By Roger Faulkner and Jeff Bonwick, April 1993.
@@ -7824,6 +7823,16 @@
 	rm -f $usr/include/sys/pcmcia/pcelx.h
 
 	#
+	# Remove the old ibd driver. It is replaced by ibp driver.
+	#
+	rm -f $root/kernel/drv/ibd
+	rm -f $root/kernel/drv/amd64/ibd
+	rm -f $root/kernel/drv/sparcv9/ibd
+	if [ -f $root/kernel/drv/ibd.conf ]; then
+		mv $root/kernel/drv/ibd.conf $root/kernel/drv/ibp.conf.old
+	fi
+
+	#
 	# Remove bpp, esp, and dma
 	#
 	rm -f $root/kernel/drv/sparcv9/bpp
--- a/usr/src/uts/common/Makefile.files	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/Makefile.files	Wed Apr 14 10:26:18 2010 -0700
@@ -663,7 +663,7 @@
 
 SIMNET_OBJS +=	simnet.o
 
-IB_OBJS +=	ibnex.o ibnex_ioctl.o
+IB_OBJS +=	ibnex.o ibnex_ioctl.o ibnex_hca.o
 
 IBCM_OBJS +=	ibcm_impl.o ibcm_sm.o ibcm_ti.o ibcm_utils.o ibcm_path.o \
 		ibcm_arp.o ibcm_arp_link.o
@@ -679,7 +679,7 @@
 
 IBTL_OBJS +=	ibtl_impl.o ibtl_util.o ibtl_mem.o ibtl_handlers.o ibtl_qp.o \
 		ibtl_cq.o ibtl_wr.o ibtl_hca.o ibtl_chan.o ibtl_cm.o \
-		ibtl_mcg.o ibtl_ibnex.o ibtl_srq.o
+		ibtl_mcg.o ibtl_ibnex.o ibtl_srq.o ibtl_misc.o
 
 TAVOR_OBJS +=	tavor.o tavor_agents.o tavor_cfg.o tavor_ci.o tavor_cmd.o \
 		tavor_cq.o tavor_event.o tavor_ioctl.o tavor_misc.o \
--- a/usr/src/uts/common/io/dld/dld_drv.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/dld/dld_drv.c	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -1373,7 +1372,8 @@
 	{VNIC_IOC,	"vnic",	0, NULL, 0},
 	{SIMNET_IOC,	"simnet", 0, NULL, 0},
 	{BRIDGE_IOC,	"bridge", 0, NULL, 0},
-	{IPTUN_IOC,	"iptun", 0, NULL, 0}
+	{IPTUN_IOC,	"iptun", 0, NULL, 0},
+	{IBPART_IOC,	"ibp", -1, NULL, 0}
 };
 #define	DLDIOC_CNT	\
 	(sizeof (dld_ioc_modtable) / sizeof (dld_ioc_modentry_t))
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -63,44 +62,110 @@
 
 #include <sys/ib/mgt/ibmf/ibmf.h>	/* for ibd_get_portspeed */
 
-/*
- * Per-interface tunables (for developers)
+#include <sys/priv_names.h>
+#include <sys/dls.h>
+#include <sys/dld_ioc.h>
+#include <sys/policy.h>
+#include <sys/ibpart.h>
+#include <sys/file.h>
+
+/*
+ * The write-up below includes details on the following:
+ * 1. The dladm administrative model.
+ * 2. Late HCA initialization feature.
+ * 3. Brussels support and its implications to the current architecture.
+ *
+ * 1. The dladm administrative model.
+ * ------------------------------------------
+ * With the dladm model, ibnex will create one ibd instance per port. These
+ * instances will be created independent of the port state.
+ *
+ * The ibd driver is two faceted: One side of it working as the port driver and
+ * the other as the partition object driver.
  *
- * ibd_tx_copy_thresh
- *     This sets the threshold at which ibd will attempt to do a bcopy of the
- *     outgoing data into a pre-mapped buffer. The IPoIB driver's send behavior
- *     is restricted by various parameters, so setting of this value must be
- *     made after careful considerations only.  For instance, IB HCAs currently
- *     impose a relatively small limit (when compared to ethernet NICs) on the
- *     length of the SGL for transmit. On the other hand, the ip stack could
- *     send down mp chains that are quite long when LSO is enabled.
+ * The port instance is a child of the HCA, and will have an entry in the devfs.
+ * A DDI attach only happens for the port driver, and its attach is
+ * handled in ibd_port_attach(). Similary, a DDI detach for the port driver is
+ * handled in ibd_port_unattach().
+ *
+ * The partition object is only a registrant to the mac layer via mac_register()
+ * and does not have an entry in the device tree. There is no DDI softstate
+ * managed by the DDI framework for the partition objects. However, the state is
+ * managed inside the ibd driver, and every partition object hangs off the
+ * "ibd_objlist_head".
+ *
+ * The partition object first comes into existence when a user runs the
+ * 'create-part' subcommand of dladm. This is like invoking the attach entry
+ * point of the partition object. The partition object goes away with the
+ * 'delete-part' subcommand of dladm. This is like invoking the detach entry
+ * point of the partition object.
+ *
+ * The create-part and delete-part subcommands result in dld ioctls that end up
+ * calling ibd_create_parition() and ibd_delete_partition respectively.
+ * There ioctls are registered with the dld layer in _init() via a call to
+ * dld_ioc_register().
+ *
+ * The port instance by itself cannot be plumbed. It is only the partition
+ * objects that can be plumbed and they alone participate in I/O and not the
+ * port driver.
+ *
+ * There are some info ioctls supported in ibd which are used by dladm(1M) to
+ * display useful information. The info entry point for ibd is
+ * ibd_get_partition_info().
  *
- * ibd_num_swqe
- *     Number of "send WQE" elements that will be allocated and used by ibd.
- *     When tuning this parameter, the size of pre-allocated, pre-mapped copy
- *     buffer in each of these send wqes must be taken into account. This
- *     copy buffer size is determined by the value of IBD_TX_BUF_SZ (this is
- *     currently set to the same value of ibd_tx_copy_thresh, but may be
- *     changed independently if needed).
+ * 2. Late HCA initialization feature.
+ * ------------------------------------
+ * As mentioned in section 1, the user creates the partition objects via
+ * dladm(1M). It is possible that:
+ * a) The physical port itself is down and the SM cannot be reached.
+ * b) The PKEY specified by the used has not been created in the SM yet.
+ * c) An IPoIB broadcast group for the specified PKEY is not present.
  *
- * ibd_num_rwqe
- *     Number of "receive WQE" elements that will be allocated and used by
- *     ibd. This parameter is limited by the maximum channel size of the HCA.
- *     Each buffer in the receive wqe will be of MTU size.
+ * In all of the above cases, complete initialization of the partition object is
+ * not possible. However, the new model allows the creation of partition
+ * objects even in such cases but will defer the initialization for later.
+ * When such a partition object is plumbed, the link state will be displayed as
+ * "down".
+ * The driver, at this point, is listening to events that herald the
+ * availability of resources -
+ * i)   LINK_UP when the link becomes available
+ * ii)  PORT_CHANGE when the PKEY has been created
+ * iii) MCG_CREATED when the IPoIB broadcast group for the given pkey has been
+ * created
+ * via ibd_async_handler() for events i) and ii), and via
+ * ibd_snet_notices_handler() for iii.
+ * The driver handles these events (as and when they arrive) and completes the
+ * initialization of the partition object and transitions it to a usable state.
  *
- * ibd_num_lso_bufs
- *     Number of "larger-than-MTU" copy buffers to use for cases when the
- *     outgoing mblk chain is too fragmented to be used with ibt_map_mem_iov()
- *     and too large to be used with regular MTU-sized copy buffers. It is
- *     not recommended to tune this variable without understanding the
- *     application environment and/or memory resources. The size of each of
- *     these lso buffers is determined by the value of IBD_LSO_BUFSZ.
+ * 3. Brussels support and its implications to the current architecture.
+ * ---------------------------------------------------------------------
+ * The brussels support introduces two new interfaces to the ibd driver -
+ * ibd_m_getprop() and ibd_m_setprop().
+ * These interfaces allow setting and retrieval of certain properties.
+ * Some of them are public properties while most other are private properties
+ * meant to be used by developers. Tuning the latter kind can cause
+ * performance issues and should not be used without understanding the
+ * implications. All properties are specific to an instance of either the
+ * partition object or the port driver.
+ *
+ * The public properties are : mtu and linkmode.
+ * mtu is a read-only property.
+ * linkmode can take two values - UD and CM.
  *
- * ibd_num_ah
- *     Number of AH cache entries to allocate
- *
- * ibd_hash_size
- *     Hash table size for the active AH list
+ * Changing the linkmode requires some bookkeeping in the driver. The
+ * capabilities need to be re-reported to the mac layer. This is done by
+ * calling mac_capab_update().  The maxsdu is updated by calling
+ * mac_maxsdu_update().
+ * The private properties retain their values across the change of linkmode.
+ * NOTE:
+ * - The port driver does not support any property apart from mtu.
+ * - All other properties are only meant for the partition object.
+ * - The properties cannot be set when an instance is plumbed. The
+ * instance has to be unplumbed to effect any setting.
+ */
+
+/*
+ * Driver wide tunables
  *
  * ibd_tx_softintr
  * ibd_rx_softintr
@@ -113,49 +178,17 @@
  *     allocated and logging is enabled only when IBD_LOGGING is defined.
  *
  */
-uint_t ibd_tx_copy_thresh = 0x1000;
-uint_t ibd_num_swqe = 4000;
-uint_t ibd_num_rwqe = 4000;
-uint_t ibd_num_lso_bufs = 0x400;
-uint_t ibd_num_ah = 256;
-uint_t ibd_hash_size = 32;
 uint_t ibd_rx_softintr = 1;
 uint_t ibd_tx_softintr = 1;
-uint_t ibd_create_broadcast_group = 1;
+
 #ifdef IBD_LOGGING
 uint_t ibd_log_sz = 0x20000;
 #endif
 
-#define	IBD_TX_COPY_THRESH		ibd_tx_copy_thresh
-#define	IBD_TX_BUF_SZ			ibd_tx_copy_thresh
-#define	IBD_NUM_SWQE			ibd_num_swqe
-#define	IBD_NUM_RWQE			ibd_num_rwqe
-#define	IBD_NUM_LSO_BUFS		ibd_num_lso_bufs
-#define	IBD_NUM_AH			ibd_num_ah
-#define	IBD_HASH_SIZE			ibd_hash_size
 #ifdef IBD_LOGGING
 #define	IBD_LOG_SZ			ibd_log_sz
 #endif
 
-/*
- * ibd_rc_tx_copy_thresh
- *     This sets the threshold upto which ibd will attempt to do a bcopy of the
- *     outgoing data into a pre-mapped buffer.
- */
-uint_t ibd_rc_tx_copy_thresh = 0x1000;
-
-/*
- * Receive CQ moderation parameters: tunable (for developers)
- */
-uint_t ibd_rxcomp_count = 4;
-uint_t ibd_rxcomp_usec = 10;
-
-/*
- * Send CQ moderation parameters: tunable (for developers)
- */
-uint_t ibd_txcomp_count = 16;
-uint_t ibd_txcomp_usec = 300;
-
 /* Post IBD_RX_POST_CNT receive work requests at a time. */
 #define	IBD_RX_POST_CNT			8
 
@@ -170,7 +203,6 @@
  */
 #define	IBD_LSO_MAXLEN			65536
 #define	IBD_LSO_BUFSZ			8192
-#define	IBD_PROP_LSO_POLICY		"lso-policy"
 
 /*
  * Async operation states
@@ -184,32 +216,35 @@
 /*
  * State of IBD driver initialization during attach/m_start
  */
-#define	IBD_DRV_STATE_INITIALIZED	0x00001
-#define	IBD_DRV_RXINTR_ADDED		0x00002
-#define	IBD_DRV_TXINTR_ADDED		0x00004
-#define	IBD_DRV_IBTL_ATTACH_DONE	0x00008
-#define	IBD_DRV_HCA_OPENED		0x00010
-#define	IBD_DRV_PD_ALLOCD		0x00020
-#define	IBD_DRV_MAC_REGISTERED		0x00040
-#define	IBD_DRV_PORT_DETAILS_OBTAINED	0x00080
-#define	IBD_DRV_BCAST_GROUP_FOUND	0x00100
-#define	IBD_DRV_ACACHE_INITIALIZED	0x00200
-#define	IBD_DRV_CQS_ALLOCD		0x00400
-#define	IBD_DRV_UD_CHANNEL_SETUP	0x00800
-#define	IBD_DRV_TXLIST_ALLOCD		0x01000
-#define	IBD_DRV_SCQ_NOTIFY_ENABLED	0x02000
-#define	IBD_DRV_RXLIST_ALLOCD		0x04000
-#define	IBD_DRV_BCAST_GROUP_JOINED	0x08000
-#define	IBD_DRV_ASYNC_THR_CREATED	0x10000
-#define	IBD_DRV_RCQ_NOTIFY_ENABLED	0x20000
-#define	IBD_DRV_SM_NOTICES_REGISTERED	0x40000
-#define	IBD_DRV_STARTED			0x80000
+#define	IBD_DRV_STATE_INITIALIZED	0x000001
+#define	IBD_DRV_RXINTR_ADDED		0x000002
+#define	IBD_DRV_TXINTR_ADDED		0x000004
+#define	IBD_DRV_IBTL_ATTACH_DONE	0x000008
+#define	IBD_DRV_HCA_OPENED		0x000010
+#define	IBD_DRV_PD_ALLOCD		0x000020
+#define	IBD_DRV_MAC_REGISTERED		0x000040
+#define	IBD_DRV_PORT_DETAILS_OBTAINED	0x000080
+#define	IBD_DRV_BCAST_GROUP_FOUND	0x000100
+#define	IBD_DRV_ACACHE_INITIALIZED	0x000200
+#define	IBD_DRV_CQS_ALLOCD		0x000400
+#define	IBD_DRV_UD_CHANNEL_SETUP	0x000800
+#define	IBD_DRV_TXLIST_ALLOCD		0x001000
+#define	IBD_DRV_SCQ_NOTIFY_ENABLED	0x002000
+#define	IBD_DRV_RXLIST_ALLOCD		0x004000
+#define	IBD_DRV_BCAST_GROUP_JOINED	0x008000
+#define	IBD_DRV_ASYNC_THR_CREATED	0x010000
+#define	IBD_DRV_RCQ_NOTIFY_ENABLED	0x020000
+#define	IBD_DRV_SM_NOTICES_REGISTERED	0x040000
+#define	IBD_DRV_STARTED			0x080000
 #define	IBD_DRV_RC_SRQ_ALLOCD		0x100000
 #define	IBD_DRV_RC_LARGEBUF_ALLOCD	0x200000
 #define	IBD_DRV_RC_LISTEN		0x400000
 #ifdef DEBUG
 #define	IBD_DRV_RC_PRIVATE_STATE	0x800000
 #endif
+#define	IBD_DRV_IN_DELETION		0x1000000
+#define	IBD_DRV_IN_LATE_HCA_INIT 	0x2000000
+#define	IBD_DRV_REQ_LIST_INITED 	0x4000000
 
 /*
  * Start/stop in-progress flags; note that restart must always remain
@@ -218,12 +253,16 @@
 #define	IBD_DRV_START_IN_PROGRESS	0x10000000
 #define	IBD_DRV_STOP_IN_PROGRESS	0x20000000
 #define	IBD_DRV_RESTART_IN_PROGRESS	0x30000000
+#define	IBD_DRV_DELETE_IN_PROGRESS	IBD_DRV_RESTART_IN_PROGRESS
 
 /*
  * Miscellaneous constants
  */
 #define	IB_MGID_IPV4_LOWGRP_MASK	0xFFFFFFFF
 #define	IBD_DEF_MAX_SDU			2044
+#define	IBD_DEF_MAX_MTU			(IBD_DEF_MAX_SDU + IPOIB_HDRSIZE)
+#define	IBD_DEF_RC_MAX_SDU		65520
+#define	IBD_DEF_RC_MAX_MTU		(IBD_DEF_RC_MAX_SDU + IPOIB_HDRSIZE)
 #define	IBD_DEFAULT_QKEY		0xB1B
 #ifdef IBD_LOGGING
 #define	IBD_DMAX_LINE			100
@@ -249,6 +288,12 @@
 ibd_global_state_t ibd_gstate;
 
 /*
+ * Partition object list
+ */
+ibd_state_t	*ibd_objlist_head = NULL;
+kmutex_t	ibd_objlist_lock;
+
+/*
  * Logging
  */
 #ifdef IBD_LOGGING
@@ -275,6 +320,15 @@
 static mblk_t *ibd_m_tx(void *, mblk_t *);
 static boolean_t ibd_m_getcapab(void *, mac_capab_t, void *);
 
+static int ibd_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
+    const void *);
+static int ibd_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
+static void ibd_m_propinfo(void *, const char *, mac_prop_id_t,
+    mac_prop_info_handle_t);
+static int ibd_set_priv_prop(ibd_state_t *, const char *, uint_t,
+    const void *);
+static int ibd_get_priv_prop(ibd_state_t *, const char *, uint_t, void *);
+
 /*
  * Private driver entry points for GLDv3
  */
@@ -339,6 +393,8 @@
 static void ibd_freemsg_cb(char *);
 static void ibd_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
     ibt_async_event_t *);
+static void ibdpd_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
+    ibt_async_event_t *);
 static void ibd_snet_notices_handler(void *, ib_gid_t,
     ibt_subnet_event_code_t, ibt_subnet_event_t *);
 
@@ -393,8 +449,7 @@
  * Helpers for attach/start routines
  */
 static int ibd_register_mac(ibd_state_t *, dev_info_t *);
-static int ibd_record_capab(ibd_state_t *, dev_info_t *);
-static int ibd_unattach(ibd_state_t *, dev_info_t *);
+static int ibd_record_capab(ibd_state_t *);
 static int ibd_get_port_details(ibd_state_t *);
 static int ibd_alloc_cqs(ibd_state_t *);
 static int ibd_setup_ud_channel(ibd_state_t *);
@@ -402,6 +457,11 @@
 static int ibd_undo_start(ibd_state_t *, link_state_t);
 static void ibd_set_mac_progress(ibd_state_t *, uint_t);
 static void ibd_clr_mac_progress(ibd_state_t *, uint_t);
+static int ibd_part_attach(ibd_state_t *state, dev_info_t *dip);
+static int ibd_part_unattach(ibd_state_t *state);
+static int ibd_port_attach(dev_info_t *);
+static int ibd_port_unattach(ibd_state_t *state, dev_info_t *dip);
+static int ibd_get_port_state(ibd_state_t *, link_state_t *);
 
 
 /*
@@ -414,6 +474,10 @@
 static void *list_get_head(list_t *);
 static int ibd_hash_key_cmp(mod_hash_key_t, mod_hash_key_t);
 static uint_t ibd_hash_by_id(void *, mod_hash_key_t);
+
+ibt_status_t ibd_get_part_attr(datalink_id_t, ibt_part_attr_t *);
+ibt_status_t ibd_get_all_part_attr(ibt_part_attr_t **, int *);
+
 #ifdef IBD_LOGGING
 static void ibd_log(const char *, ...);
 #endif
@@ -441,13 +505,23 @@
 	IBT_NETWORK,
 	ibd_async_handler,
 	NULL,
+	"IBPART"
+};
+
+static struct ibt_clnt_modinfo_s ibdpd_clnt_modinfo = {
+	IBTI_V_CURR,
+	IBT_NETWORK,
+	ibdpd_async_handler,
+	NULL,
 	"IPIB"
 };
 
 /*
  * GLDv3 entry points
  */
-#define	IBD_M_CALLBACK_FLAGS	(MC_GETCAPAB)
+#define	IBD_M_CALLBACK_FLAGS	\
+	(MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
+
 static mac_callbacks_t ibd_m_callbacks = {
 	IBD_M_CALLBACK_FLAGS,
 	ibd_m_stat,
@@ -459,7 +533,55 @@
 	ibd_m_tx,
 	NULL,
 	NULL,
-	ibd_m_getcapab
+	ibd_m_getcapab,
+	NULL,
+	NULL,
+	ibd_m_setprop,
+	ibd_m_getprop,
+	ibd_m_propinfo
+};
+
+/* Private properties */
+char *ibd_priv_props[] = {
+	"_ibd_broadcast_group",
+	"_ibd_coalesce_completions",
+	"_ibd_create_broadcast_group",
+	"_ibd_hash_size",
+	"_ibd_lso_enable",
+	"_ibd_num_ah",
+	"_ibd_num_lso_bufs",
+	"_ibd_rc_enable_srq",
+	"_ibd_rc_num_rwqe",
+	"_ibd_rc_num_srq",
+	"_ibd_rc_num_swqe",
+	"_ibd_rc_rx_comp_count",
+	"_ibd_rc_rx_comp_usec",
+	"_ibd_rc_rx_copy_thresh",
+	"_ibd_rc_rx_rwqe_thresh",
+	"_ibd_rc_tx_comp_count",
+	"_ibd_rc_tx_comp_usec",
+	"_ibd_rc_tx_copy_thresh",
+	"_ibd_ud_num_rwqe",
+	"_ibd_ud_num_swqe",
+	"_ibd_ud_rx_comp_count",
+	"_ibd_ud_rx_comp_usec",
+	"_ibd_ud_tx_comp_count",
+	"_ibd_ud_tx_comp_usec",
+	"_ibd_ud_tx_copy_thresh",
+	NULL
+};
+
+static int ibd_create_partition(void *, intptr_t, int, cred_t *, int *);
+static int ibd_delete_partition(void *, intptr_t, int, cred_t *, int *);
+static int ibd_get_partition_info(void *, intptr_t, int, cred_t *, int *);
+
+static dld_ioc_info_t ibd_dld_ioctl_list[] = {
+	{IBD_CREATE_IBPART, DLDCOPYINOUT, sizeof (ibpart_ioctl_t),
+	    ibd_create_partition, secpolicy_dl_config},
+	{IBD_DELETE_IBPART, DLDCOPYIN, sizeof (ibpart_ioctl_t),
+	    ibd_delete_partition, secpolicy_dl_config},
+	{IBD_INFO_IBPART, DLDCOPYIN, sizeof (ibd_ioctl_t),
+	    ibd_get_partition_info, NULL}
 };
 
 /*
@@ -701,8 +823,8 @@
     ibd_state_s::id_bgroup_created
     ibd_state_s::id_mac_state
     ibd_state_s::id_mtu
-    ibd_state_s::id_num_rwqe
-    ibd_state_s::id_num_swqe
+    ibd_state_s::id_ud_num_rwqe
+    ibd_state_s::id_ud_num_swqe
     ibd_state_s::id_qpnum
     ibd_state_s::id_rcq_hdl
     ibd_state_s::id_rx_buf_sz
@@ -885,7 +1007,9 @@
 		return (status);
 	}
 
-	mac_init_ops(&ibd_dev_ops, "ibd");
+	mutex_init(&ibd_objlist_lock, NULL, MUTEX_DRIVER, NULL);
+
+	mac_init_ops(&ibd_dev_ops, "ibp");
 	status = mod_install(&ibd_modlinkage);
 	if (status != 0) {
 		DPRINT(10, "_init:failed in mod_install()");
@@ -901,6 +1025,13 @@
 	ibd_gstate.ig_service_list = NULL;
 	mutex_exit(&ibd_gstate.ig_mutex);
 
+	if (dld_ioc_register(IBPART_IOC, ibd_dld_ioctl_list,
+	    DLDIOCCNT(ibd_dld_ioctl_list)) != 0) {
+		return (EIO);
+	}
+
+	ibt_register_part_attr_cb(ibd_get_part_attr, ibd_get_all_part_attr);
+
 #ifdef IBD_LOGGING
 	ibd_log_init();
 #endif
@@ -922,7 +1053,10 @@
 	if (status != 0)
 		return (status);
 
+	ibt_unregister_part_attr_cb();
+
 	mac_fini_ops(&ibd_dev_ops);
+	mutex_destroy(&ibd_objlist_lock);
 	ddi_soft_state_fini(&ibd_list);
 	mutex_destroy(&ibd_gstate.ig_mutex);
 #ifdef IBD_LOGGING
@@ -1091,6 +1225,17 @@
 			mutex_exit(&state->id_acache_req_lock);
 
 			/*
+			 * If we are in late hca initialization mode, do not
+			 * process any other async request other than TRAP. TRAP
+			 * is used for indicating creation of a broadcast group;
+			 * in which case, we need to join/create the group.
+			 */
+			if ((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) &&
+			    (ptr->rq_op != IBD_ASYNC_TRAP)) {
+				goto free_req_and_continue;
+			}
+
+			/*
 			 * Once we have done the operation, there is no
 			 * guarantee the request slot is going to be valid,
 			 * it might be freed up (as in IBD_ASYNC_LEAVE, REAP,
@@ -1152,6 +1297,7 @@
 					ibd_async_rc_recycle_ace(state, ptr);
 					break;
 			}
+free_req_and_continue:
 			if (ptr != NULL)
 				kmem_cache_free(state->id_req_kmc, ptr);
 
@@ -1252,9 +1398,6 @@
 	ibd_ace_t *ce;
 	int i;
 
-	mutex_init(&state->id_acache_req_lock, NULL, MUTEX_DRIVER, NULL);
-	cv_init(&state->id_acache_req_cv, NULL, CV_DEFAULT, NULL);
-
 	mutex_init(&state->id_ac_mutex, NULL, MUTEX_DRIVER, NULL);
 	mutex_init(&state->id_mc_mutex, NULL, MUTEX_DRIVER, NULL);
 	mutex_enter(&state->id_ac_mutex);
@@ -1263,19 +1406,17 @@
 	list_create(&state->id_ah_active, sizeof (ibd_ace_t),
 	    offsetof(ibd_ace_t, ac_list));
 	state->id_ah_active_hash = mod_hash_create_extended("IBD AH hash",
-	    IBD_HASH_SIZE, mod_hash_null_keydtor, mod_hash_null_valdtor,
+	    state->id_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
 	    ibd_hash_by_id, NULL, ibd_hash_key_cmp, KM_SLEEP);
 	list_create(&state->id_mc_full, sizeof (ibd_mce_t),
 	    offsetof(ibd_mce_t, mc_list));
 	list_create(&state->id_mc_non, sizeof (ibd_mce_t),
 	    offsetof(ibd_mce_t, mc_list));
-	list_create(&state->id_req_list, sizeof (ibd_req_t),
-	    offsetof(ibd_req_t, rq_list));
 	state->id_ac_hot_ace = NULL;
 
 	state->id_ac_list = ce = (ibd_ace_t *)kmem_zalloc(sizeof (ibd_ace_t) *
-	    IBD_NUM_AH, KM_SLEEP);
-	for (i = 0; i < IBD_NUM_AH; i++, ce++) {
+	    state->id_num_ah, KM_SLEEP);
+	for (i = 0; i < state->id_num_ah; i++, ce++) {
 		if (ibt_alloc_ud_dest(state->id_hca_hdl, IBT_UD_DEST_NO_FLAGS,
 		    state->id_pd_hdl, &ce->ac_dest) != IBT_SUCCESS) {
 			mutex_exit(&state->id_ac_mutex);
@@ -1316,13 +1457,10 @@
 	list_destroy(&state->id_ah_active);
 	list_destroy(&state->id_mc_full);
 	list_destroy(&state->id_mc_non);
-	list_destroy(&state->id_req_list);
-	kmem_free(state->id_ac_list, sizeof (ibd_ace_t) * IBD_NUM_AH);
+	kmem_free(state->id_ac_list, sizeof (ibd_ace_t) * state->id_num_ah);
 	mutex_exit(&state->id_ac_mutex);
 	mutex_destroy(&state->id_ac_mutex);
 	mutex_destroy(&state->id_mc_mutex);
-	mutex_destroy(&state->id_acache_req_lock);
-	cv_destroy(&state->id_acache_req_cv);
 }
 
 /*
@@ -1960,6 +2098,12 @@
 }
 
 /*
+ * Late HCA Initialization:
+ * If plumb had succeeded without the availability of an active port or the
+ * pkey, and either of their availability is now being indicated via PORT_UP
+ * or PORT_CHANGE respectively, try a start of the interface.
+ *
+ * Normal Operation:
  * When the link is notified up, we need to do a few things, based
  * on the port's current p_init_type_reply claiming a reinit has been
  * done or not. The reinit steps are:
@@ -1995,8 +2139,8 @@
 	mutex_enter(&state->id_link_mutex);
 
 	/*
-	 * If the init code in ibd_m_start hasn't yet set up the
-	 * pkey/gid, nothing to do; that code will set the link state.
+	 * If the link state is unknown, a plumb has not yet been attempted
+	 * on the interface. Nothing to do.
 	 */
 	if (state->id_link_state == LINK_STATE_UNKNOWN) {
 		mutex_exit(&state->id_link_mutex);
@@ -2004,6 +2148,17 @@
 	}
 
 	/*
+	 * If link state is down because of plumb failure, and we are not in
+	 * late HCA init, and we were not successfully plumbed, nothing to do.
+	 */
+	if ((state->id_link_state == LINK_STATE_DOWN) &&
+	    ((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) == 0) &&
+	    ((state->id_mac_state & IBD_DRV_STARTED) == 0)) {
+		mutex_exit(&state->id_link_mutex);
+		goto link_mod_return;
+	}
+
+	/*
 	 * If this routine was called in response to a port down event,
 	 * we just need to see if this should be informed.
 	 */
@@ -2028,6 +2183,21 @@
 	}
 
 	/*
+	 * If in the previous attempt, the pkey was not found either due to the
+	 * port state being down, or due to it's absence in the pkey table,
+	 * look for it now and try to start the interface.
+	 */
+	if (state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) {
+		mutex_exit(&state->id_link_mutex);
+		if ((ret = ibd_start(state)) != 0) {
+			DPRINT(10, "ibd_linkmod: cannot start from late HCA "
+			    "init, ret=%d", ret);
+		}
+		ibt_free_portinfo(port_infop, port_infosz);
+		goto link_mod_return;
+	}
+
+	/*
 	 * Check the SM InitTypeReply flags. If both NoLoadReply and
 	 * PreserveContentReply are 0, we don't know anything about the
 	 * data loaded into the port attributes, so we need to verify
@@ -2234,11 +2404,14 @@
 	macp->m_src_addr = (uint8_t *)&state->id_macaddr;
 	macp->m_callbacks = &ibd_m_callbacks;
 	macp->m_min_sdu = 0;
-	if (state->id_enable_rc) {
+	if (state->id_type == IBD_PORT_DRIVER) {
+		macp->m_max_sdu = IBD_DEF_RC_MAX_SDU;
+	} else if (state->id_enable_rc) {
 		macp->m_max_sdu = state->rc_mtu - IPOIB_HDRSIZE;
 	} else {
 		macp->m_max_sdu = IBD_DEF_MAX_SDU;
 	}
+	macp->m_priv_props = ibd_priv_props;
 
 	/*
 	 *  Register ourselves with the GLDv3 interface
@@ -2255,7 +2428,7 @@
 }
 
 static int
-ibd_record_capab(ibd_state_t *state, dev_info_t *dip)
+ibd_record_capab(ibd_state_t *state)
 {
 	ibt_hca_attr_t hca_attrs;
 	ibt_status_t ibt_status;
@@ -2285,17 +2458,9 @@
 	 * 2. Set LSO policy, capability and maximum length
 	 */
 	if (state->id_enable_rc) {
-		state->id_lso_policy = B_FALSE;
 		state->id_lso_capable = B_FALSE;
 		state->id_lso_maxlen = 0;
 	} else {
-		if (ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS
-		    |DDI_PROP_NOTPROM, IBD_PROP_LSO_POLICY, 1)) {
-			state->id_lso_policy = B_TRUE;
-		} else {
-			state->id_lso_policy = B_FALSE;
-		}
-
 		if (hca_attrs.hca_max_lso_size > 0) {
 			state->id_lso_capable = B_TRUE;
 			if (hca_attrs.hca_max_lso_size > IBD_LSO_MAXLEN)
@@ -2356,28 +2521,30 @@
 
 	/*
 	 * 5. Set number of recv and send wqes after checking hca maximum
-	 *    channel size
-	 */
-	if (hca_attrs.hca_max_chan_sz < IBD_NUM_RWQE) {
-		state->id_num_rwqe = hca_attrs.hca_max_chan_sz;
-	} else {
-		state->id_num_rwqe = IBD_NUM_RWQE;
-	}
-	state->id_rx_bufs_outstanding_limit = state->id_num_rwqe - IBD_RWQE_MIN;
-	if (hca_attrs.hca_max_chan_sz < IBD_NUM_SWQE) {
-		state->id_num_swqe = hca_attrs.hca_max_chan_sz;
-	} else {
-		state->id_num_swqe = IBD_NUM_SWQE;
-	}
+	 *    channel size. Store the max channel size in the state so that it
+	 *    can be referred to when the swqe/rwqe change is requested via
+	 *    dladm.
+	 */
+
+	state->id_hca_max_chan_sz = hca_attrs.hca_max_chan_sz;
+
+	if (hca_attrs.hca_max_chan_sz < state->id_ud_num_rwqe)
+		state->id_ud_num_rwqe = hca_attrs.hca_max_chan_sz;
+
+	state->id_rx_bufs_outstanding_limit = state->id_ud_num_rwqe -
+	    IBD_RWQE_MIN;
+
+	if (hca_attrs.hca_max_chan_sz < state->id_ud_num_swqe)
+		state->id_ud_num_swqe = hca_attrs.hca_max_chan_sz;
+
 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*state))
 
 	return (DDI_SUCCESS);
 }
 
 static int
-ibd_unattach(ibd_state_t *state, dev_info_t *dip)
-{
-	int instance;
+ibd_part_unattach(ibd_state_t *state)
+{
 	uint32_t progress = state->id_mac_state;
 	ibt_status_t ret;
 
@@ -2405,6 +2572,33 @@
 		state->id_mac_state &= (~IBD_DRV_MAC_REGISTERED);
 	}
 
+	if (progress & IBD_DRV_ASYNC_THR_CREATED) {
+		/*
+		 * No new async requests will be posted since the device
+		 * link state has been marked as unknown; completion handlers
+		 * have been turned off, so Tx handler will not cause any
+		 * more IBD_ASYNC_REAP requests.
+		 *
+		 * Queue a request for the async thread to exit, which will
+		 * be serviced after any pending ones. This can take a while,
+		 * specially if the SM is unreachable, since IBMF will slowly
+		 * timeout each SM request issued by the async thread.  Reap
+		 * the thread before continuing on, we do not want it to be
+		 * lingering in modunloaded code.
+		 */
+		ibd_queue_work_slot(state, &state->id_ah_req, IBD_ASYNC_EXIT);
+		thread_join(state->id_async_thrid);
+
+		state->id_mac_state &= (~IBD_DRV_ASYNC_THR_CREATED);
+	}
+
+	if (progress & IBD_DRV_REQ_LIST_INITED) {
+		list_destroy(&state->id_req_list);
+		mutex_destroy(&state->id_acache_req_lock);
+		cv_destroy(&state->id_acache_req_cv);
+		state->id_mac_state &= ~IBD_DRV_REQ_LIST_INITED;
+	}
+
 	if (progress & IBD_DRV_PD_ALLOCD) {
 		if ((ret = ibt_free_pd(state->id_hca_hdl,
 		    state->id_pd_hdl)) != IBT_SUCCESS) {
@@ -2471,44 +2665,22 @@
 		state->id_mac_state &= (~IBD_DRV_STATE_INITIALIZED);
 	}
 
-	instance = ddi_get_instance(dip);
-	ddi_soft_state_free(ibd_list, instance);
-
 	return (DDI_SUCCESS);
 }
 
-/*
- * Attach device to the IO framework.
- */
-static int
-ibd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
-{
-	ibd_state_t *state = NULL;
-	ib_guid_t hca_guid;
-	int instance;
+int
+ibd_part_attach(ibd_state_t *state, dev_info_t *dip)
+{
 	ibt_status_t ret;
 	int rv;
-
-	/*
-	 * IBD doesn't support suspend/resume
-	 */
-	if (cmd != DDI_ATTACH)
-		return (DDI_FAILURE);
-
-	/*
-	 * Allocate softstate structure
-	 */
-	instance = ddi_get_instance(dip);
-	if (ddi_soft_state_zalloc(ibd_list, instance) == DDI_FAILURE)
-		return (DDI_FAILURE);
-	state = ddi_get_soft_state(ibd_list, instance);
+	kthread_t *kht;
 
 	/*
 	 * Initialize mutexes and condition variables
 	 */
 	if (ibd_state_init(state, dip) != DDI_SUCCESS) {
 		DPRINT(10, "ibd_attach: failed in ibd_state_init()");
-		goto attach_fail;
+		return (DDI_FAILURE);
 	}
 	state->id_mac_state |= IBD_DRV_STATE_INITIALIZED;
 
@@ -2520,7 +2692,7 @@
 		    NULL, NULL, ibd_intr, (caddr_t)state)) != DDI_SUCCESS) {
 			DPRINT(10, "ibd_attach: failed in "
 			    "ddi_add_softintr(id_rx),  ret=%d", rv);
-			goto attach_fail;
+			return (DDI_FAILURE);
 		}
 		state->id_mac_state |= IBD_DRV_RXINTR_ADDED;
 	}
@@ -2530,37 +2702,12 @@
 		    (caddr_t)state)) != DDI_SUCCESS) {
 			DPRINT(10, "ibd_attach: failed in "
 			    "ddi_add_softintr(id_tx), ret=%d", rv);
-			goto attach_fail;
+			return (DDI_FAILURE);
 		}
 		state->id_mac_state |= IBD_DRV_TXINTR_ADDED;
 	}
 
 	/*
-	 * Obtain IBA P_Key, port number and HCA guid and validate
-	 * them (for P_Key, only full members are allowed as per
-	 * IPoIB specification; neither port number nor HCA guid
-	 * can be zero)
-	 */
-	if ((state->id_pkey = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
-	    "port-pkey", IB_PKEY_INVALID_LIMITED)) <= IB_PKEY_INVALID_FULL) {
-		DPRINT(10, "ibd_attach: port device has wrong partition (0x%x)",
-		    state->id_pkey);
-		goto attach_fail;
-	}
-	if ((state->id_port = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
-	    "port-number", 0)) == 0) {
-		DPRINT(10, "ibd_attach: invalid port number (%d)",
-		    state->id_port);
-		goto attach_fail;
-	}
-	if ((hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
-	    "hca-guid", 0)) == 0) {
-		DPRINT(10, "ibd_attach: port hca has invalid guid (0x%llx)",
-		    hca_guid);
-		goto attach_fail;
-	}
-
-	/*
 	 * Attach to IBTL
 	 */
 	mutex_enter(&ibd_gstate.ig_mutex);
@@ -2570,15 +2717,14 @@
 			DPRINT(10, "ibd_attach: global: failed in "
 			    "ibt_attach(), ret=%d", ret);
 			mutex_exit(&ibd_gstate.ig_mutex);
-			goto attach_fail;
+			return (DDI_FAILURE);
 		}
 	}
 	if ((ret = ibt_attach(&ibd_clnt_modinfo, dip, state,
 	    &state->id_ibt_hdl)) != IBT_SUCCESS) {
-		DPRINT(10, "ibd_attach: failed in ibt_attach(), ret=%d",
-		    ret);
+		DPRINT(10, "ibd_attach: failed in ibt_attach(), ret=%d", ret);
 		mutex_exit(&ibd_gstate.ig_mutex);
-		goto attach_fail;
+		return (DDI_FAILURE);
 	}
 	ibd_gstate.ig_ibt_hdl_ref_cnt++;
 	mutex_exit(&ibd_gstate.ig_mutex);
@@ -2587,22 +2733,19 @@
 	/*
 	 * Open the HCA
 	 */
-	if ((ret = ibt_open_hca(state->id_ibt_hdl, hca_guid,
+	if ((ret = ibt_open_hca(state->id_ibt_hdl, state->id_hca_guid,
 	    &state->id_hca_hdl)) != IBT_SUCCESS) {
 		DPRINT(10, "ibd_attach: ibt_open_hca() failed, ret=%d", ret);
-		goto attach_fail;
+		return (DDI_FAILURE);
 	}
 	state->id_mac_state |= IBD_DRV_HCA_OPENED;
 
-	/* Get RC config before ibd_record_capab */
-	ibd_rc_get_conf(state);
-
 #ifdef DEBUG
 	/* Initialize Driver Counters for Reliable Connected Mode */
 	if (state->id_enable_rc) {
 		if (ibd_rc_init_stats(state) != DDI_SUCCESS) {
 			DPRINT(10, "ibd_attach: failed in ibd_rc_init_stats");
-			goto attach_fail;
+			return (DDI_FAILURE);
 		}
 		state->id_mac_state |= IBD_DRV_RC_PRIVATE_STATE;
 	}
@@ -2611,7 +2754,7 @@
 	/*
 	 * Record capabilities
 	 */
-	(void) ibd_record_capab(state, dip);
+	(void) ibd_record_capab(state);
 
 	/*
 	 * Allocate a protection domain on the HCA
@@ -2619,32 +2762,49 @@
 	if ((ret = ibt_alloc_pd(state->id_hca_hdl, IBT_PD_NO_FLAGS,
 	    &state->id_pd_hdl)) != IBT_SUCCESS) {
 		DPRINT(10, "ibd_attach: ibt_alloc_pd() failed, ret=%d", ret);
-		goto attach_fail;
+		return (DDI_FAILURE);
 	}
 	state->id_mac_state |= IBD_DRV_PD_ALLOCD;
 
 
 	/*
-	 * Register ibd interfaces with the Nemo framework
-	 */
-	if (ibd_register_mac(state, dip) != IBT_SUCCESS) {
-		DPRINT(10, "ibd_attach: failed in ibd_register_mac()");
-		goto attach_fail;
-	}
-	state->id_mac_state |= IBD_DRV_MAC_REGISTERED;
-
-	/*
-	 * We're done with everything we could to make the attach
-	 * succeed.  All the buffer allocations and IPoIB broadcast
-	 * group joins are deferred to when the interface instance
-	 * is actually plumbed to avoid wasting memory.
-	 */
+	 * We need to initialise the req_list that is required for the
+	 * operation of the async_thread.
+	 */
+	mutex_init(&state->id_acache_req_lock, NULL, MUTEX_DRIVER, NULL);
+	cv_init(&state->id_acache_req_cv, NULL, CV_DEFAULT, NULL);
+	list_create(&state->id_req_list, sizeof (ibd_req_t),
+	    offsetof(ibd_req_t, rq_list));
+	state->id_mac_state |= IBD_DRV_REQ_LIST_INITED;
+
+	/*
+	 * Create the async thread; thread_create never fails.
+	 */
+	kht = thread_create(NULL, 0, ibd_async_work, state, 0, &p0,
+	    TS_RUN, minclsyspri);
+	state->id_async_thrid = kht->t_did;
+	state->id_mac_state |= IBD_DRV_ASYNC_THR_CREATED;
+
 	return (DDI_SUCCESS);
-
-attach_fail:
-	(void) ibd_unattach(state, dip);
-	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*state))
-	return (DDI_FAILURE);
+}
+
+/*
+ * Attach device to the IO framework.
+ */
+static int
+ibd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+	int ret;
+
+	switch (cmd) {
+		case DDI_ATTACH:
+			ret = ibd_port_attach(dip);
+			break;
+		default:
+			ret = DDI_FAILURE;
+			break;
+	}
+	return (ret);
 }
 
 /*
@@ -2673,7 +2833,7 @@
 	 * done ibd_attach(), ibd_m_start() and ibd_m_stop() correctly
 	 * so far, we should find all the flags we need in id_mac_state.
 	 */
-	return (ibd_unattach(state, dip));
+	return (ibd_port_unattach(state, dip));
 }
 
 /*
@@ -2708,13 +2868,11 @@
 	state->id_rx_list.dl_cnt = 0;
 	mutex_init(&state->id_rx_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
 	mutex_init(&state->id_rx_free_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
-	(void) sprintf(buf, "ibd_req%d", ddi_get_instance(dip));
+	(void) sprintf(buf, "ibd_req%d_%x", ddi_get_instance(dip),
+	    state->id_pkey);
 	state->id_req_kmc = kmem_cache_create(buf, sizeof (ibd_req_t),
 	    0, NULL, NULL, NULL, NULL, NULL, 0);
 
-	mutex_init(&state->id_macst_lock, NULL, MUTEX_DRIVER, NULL);
-	cv_init(&state->id_macst_cv, NULL, CV_DEFAULT, NULL);
-
 	/* For Reliable Connected Mode */
 	mutex_init(&state->rc_rx_lock, NULL, MUTEX_DRIVER, NULL);
 	mutex_init(&state->rc_tx_large_bufs_lock, NULL, MUTEX_DRIVER, NULL);
@@ -2723,6 +2881,41 @@
 	mutex_init(&state->rc_pass_chan_list.chan_list_mutex, NULL,
 	    MUTEX_DRIVER, NULL);
 
+	/*
+	 * Make the default link mode as RC. If this fails during connection
+	 * setup, the link mode is automatically transitioned to UD.
+	 * Also set the RC MTU.
+	 */
+	state->id_enable_rc = IBD_DEF_LINK_MODE;
+	state->rc_mtu = IBD_DEF_RC_MAX_MTU;
+	state->id_mtu = IBD_DEF_MAX_MTU;
+
+	/* Iniatialize all tunables to default */
+	state->id_lso_policy = IBD_DEF_LSO_POLICY;
+	state->id_num_lso_bufs = IBD_DEF_NUM_LSO_BUFS;
+	state->id_num_ah = IBD_DEF_NUM_AH;
+	state->id_hash_size = IBD_DEF_HASH_SIZE;
+	state->id_create_broadcast_group = IBD_DEF_CREATE_BCAST_GROUP;
+	state->id_allow_coalesce_comp_tuning = IBD_DEF_COALESCE_COMPLETIONS;
+	state->id_ud_rx_comp_count = IBD_DEF_UD_RX_COMP_COUNT;
+	state->id_ud_rx_comp_usec = IBD_DEF_UD_RX_COMP_USEC;
+	state->id_ud_tx_comp_count = IBD_DEF_UD_TX_COMP_COUNT;
+	state->id_ud_tx_comp_usec = IBD_DEF_UD_TX_COMP_USEC;
+	state->id_rc_rx_comp_count = IBD_DEF_RC_RX_COMP_COUNT;
+	state->id_rc_rx_comp_usec = IBD_DEF_RC_RX_COMP_USEC;
+	state->id_rc_tx_comp_count = IBD_DEF_RC_TX_COMP_COUNT;
+	state->id_rc_tx_comp_usec = IBD_DEF_RC_TX_COMP_USEC;
+	state->id_ud_tx_copy_thresh = IBD_DEF_UD_TX_COPY_THRESH;
+	state->id_rc_rx_copy_thresh = IBD_DEF_RC_RX_COPY_THRESH;
+	state->id_rc_tx_copy_thresh = IBD_DEF_RC_TX_COPY_THRESH;
+	state->id_ud_num_rwqe = IBD_DEF_UD_NUM_RWQE;
+	state->id_ud_num_swqe = IBD_DEF_UD_NUM_SWQE;
+	state->id_rc_num_rwqe = IBD_DEF_RC_NUM_RWQE;
+	state->id_rc_num_swqe = IBD_DEF_RC_NUM_SWQE;
+	state->rc_enable_srq = IBD_DEF_RC_ENABLE_SRQ;
+	state->id_rc_num_srq = IBD_DEF_RC_NUM_SRQ;
+	state->id_rc_rx_rwqe_thresh = IBD_DEF_RC_RX_RWQE_THRESH;
+
 	return (DDI_SUCCESS);
 }
 
@@ -2732,9 +2925,6 @@
 static void
 ibd_state_fini(ibd_state_t *state)
 {
-	cv_destroy(&state->id_macst_cv);
-	mutex_destroy(&state->id_macst_lock);
-
 	kmem_cache_destroy(state->id_req_kmc);
 
 	mutex_destroy(&state->id_rx_list.dl_mutex);
@@ -3213,6 +3403,7 @@
 	ibt_mcg_info_t mcg_info;
 
 	state->id_bgroup_created = B_FALSE;
+	state->id_bgroup_present = B_FALSE;
 
 query_bcast_grp:
 	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
@@ -3242,7 +3433,7 @@
 	}
 
 	if (!found) {
-		if (ibd_create_broadcast_group) {
+		if (state->id_create_broadcast_group) {
 			/*
 			 * If we created the broadcast group, but failed to
 			 * find it, we can't do anything except leave the
@@ -3300,6 +3491,7 @@
 		goto find_bgroup_fail;
 	}
 	state->id_mtu = mcgmtu;
+	state->id_bgroup_present = B_TRUE;
 
 	return (IBT_SUCCESS);
 
@@ -3323,30 +3515,30 @@
 	 */
 	state->id_tx_buf_sz = state->id_mtu;
 	if (state->id_lso_policy && state->id_lso_capable &&
-	    (IBD_TX_BUF_SZ > state->id_mtu)) {
-		state->id_tx_buf_sz = IBD_TX_BUF_SZ;
-	}
-
-	state->id_tx_bufs = kmem_zalloc(state->id_num_swqe *
+	    (state->id_ud_tx_copy_thresh > state->id_mtu)) {
+		state->id_tx_buf_sz = state->id_ud_tx_copy_thresh;
+	}
+
+	state->id_tx_bufs = kmem_zalloc(state->id_ud_num_swqe *
 	    state->id_tx_buf_sz, KM_SLEEP);
 
-	state->id_tx_wqes = kmem_zalloc(state->id_num_swqe *
+	state->id_tx_wqes = kmem_zalloc(state->id_ud_num_swqe *
 	    sizeof (ibd_swqe_t), KM_SLEEP);
 
 	/*
 	 * Do one memory registration on the entire txbuf area
 	 */
 	mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->id_tx_bufs;
-	mem_attr.mr_len = state->id_num_swqe * state->id_tx_buf_sz;
+	mem_attr.mr_len = state->id_ud_num_swqe * state->id_tx_buf_sz;
 	mem_attr.mr_as = NULL;
 	mem_attr.mr_flags = IBT_MR_SLEEP;
 	if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
 	    &state->id_tx_mr_hdl, &state->id_tx_mr_desc) != IBT_SUCCESS) {
 		DPRINT(10, "ibd_alloc_tx_copybufs: ibt_register_mr failed");
 		kmem_free(state->id_tx_wqes,
-		    state->id_num_swqe * sizeof (ibd_swqe_t));
+		    state->id_ud_num_swqe * sizeof (ibd_swqe_t));
 		kmem_free(state->id_tx_bufs,
-		    state->id_num_swqe * state->id_tx_buf_sz);
+		    state->id_ud_num_swqe * state->id_tx_buf_sz);
 		state->id_tx_bufs = NULL;
 		return (DDI_FAILURE);
 	}
@@ -3375,7 +3567,7 @@
 	/*
 	 * Allocate the entire lso memory and register it
 	 */
-	memsz = IBD_NUM_LSO_BUFS * IBD_LSO_BUFSZ;
+	memsz = state->id_num_lso_bufs * IBD_LSO_BUFSZ;
 	membase = kmem_zalloc(memsz, KM_SLEEP);
 
 	mem_attr.mr_vaddr = (uint64_t)(uintptr_t)membase;
@@ -3398,7 +3590,7 @@
 	 * can always derive the address of a buflist entry from the address of
 	 * an lso buffer.
 	 */
-	buflist = kmem_zalloc(IBD_NUM_LSO_BUFS * sizeof (ibd_lsobuf_t),
+	buflist = kmem_zalloc(state->id_num_lso_bufs * sizeof (ibd_lsobuf_t),
 	    KM_SLEEP);
 
 	/*
@@ -3406,7 +3598,7 @@
 	 */
 	memp = membase;
 	lbufp = buflist;
-	for (i = 0; i < IBD_NUM_LSO_BUFS; i++) {
+	for (i = 0; i < state->id_num_lso_bufs; i++) {
 		lbufp->lb_isfree = 1;
 		lbufp->lb_buf = memp;
 		lbufp->lb_next = lbufp + 1;
@@ -3424,7 +3616,7 @@
 	bktp->bkt_bufl = buflist;
 	bktp->bkt_free_head = buflist;
 	bktp->bkt_mem = membase;
-	bktp->bkt_nelem = IBD_NUM_LSO_BUFS;
+	bktp->bkt_nelem = state->id_num_lso_bufs;
 	bktp->bkt_nfree = bktp->bkt_nelem;
 
 	state->id_lso = bktp;
@@ -3450,7 +3642,7 @@
 
 	if (state->id_lso_policy && state->id_lso_capable) {
 		if (ibd_alloc_tx_lsobufs(state) != DDI_SUCCESS)
-			state->id_lso_policy = B_FALSE;
+			state->id_lso_capable = B_FALSE;
 	}
 
 	mutex_enter(&state->id_tx_list.dl_mutex);
@@ -3472,7 +3664,7 @@
 	len = state->id_tx_buf_sz;
 	swqe = state->id_tx_wqes;
 	mutex_enter(&state->id_tx_list.dl_mutex);
-	for (i = 0; i < state->id_num_swqe; i++, swqe++, bufaddr += len) {
+	for (i = 0; i < state->id_ud_num_swqe; i++, swqe++, bufaddr += len) {
 		swqe->swqe_next = NULL;
 		swqe->swqe_im_mblk = NULL;
 
@@ -3633,8 +3825,10 @@
 	/*
 	 * Free txbuf memory
 	 */
-	kmem_free(state->id_tx_wqes, state->id_num_swqe * sizeof (ibd_swqe_t));
-	kmem_free(state->id_tx_bufs, state->id_num_swqe * state->id_tx_buf_sz);
+	kmem_free(state->id_tx_wqes, state->id_ud_num_swqe *
+	    sizeof (ibd_swqe_t));
+	kmem_free(state->id_tx_bufs, state->id_ud_num_swqe *
+	    state->id_tx_buf_sz);
 	state->id_tx_wqes = NULL;
 	state->id_tx_bufs = NULL;
 }
@@ -3828,10 +4022,10 @@
 	 */
 	state->id_rx_buf_sz = state->id_mtu + IPOIB_GRH_SIZE;
 
-	state->id_rx_bufs = kmem_zalloc(state->id_num_rwqe *
+	state->id_rx_bufs = kmem_zalloc(state->id_ud_num_rwqe *
 	    state->id_rx_buf_sz, KM_SLEEP);
 
-	state->id_rx_wqes = kmem_zalloc(state->id_num_rwqe *
+	state->id_rx_wqes = kmem_zalloc(state->id_ud_num_rwqe *
 	    sizeof (ibd_rwqe_t), KM_SLEEP);
 
 	state->id_rx_nqueues = 1 << IBD_LOG_RX_POST;
@@ -3846,16 +4040,16 @@
 	 * Do one memory registration on the entire rxbuf area
 	 */
 	mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->id_rx_bufs;
-	mem_attr.mr_len = state->id_num_rwqe * state->id_rx_buf_sz;
+	mem_attr.mr_len = state->id_ud_num_rwqe * state->id_rx_buf_sz;
 	mem_attr.mr_as = NULL;
 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
 	if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
 	    &state->id_rx_mr_hdl, &state->id_rx_mr_desc) != IBT_SUCCESS) {
 		DPRINT(10, "ibd_alloc_rx_copybufs: ibt_register_mr failed");
 		kmem_free(state->id_rx_wqes,
-		    state->id_num_rwqe * sizeof (ibd_rwqe_t));
+		    state->id_ud_num_rwqe * sizeof (ibd_rwqe_t));
 		kmem_free(state->id_rx_bufs,
-		    state->id_num_rwqe * state->id_rx_buf_sz);
+		    state->id_ud_num_rwqe * state->id_rx_buf_sz);
 		state->id_rx_bufs = NULL;
 		state->id_rx_wqes = NULL;
 		return (DDI_FAILURE);
@@ -3928,7 +4122,7 @@
 	rwqe = state->id_rx_wqes;
 	bufaddr = state->id_rx_bufs;
 	list = NULL;
-	for (i = 0; i < state->id_num_rwqe; i++, rwqe++, bufaddr += len) {
+	for (i = 0; i < state->id_ud_num_rwqe; i++, rwqe++, bufaddr += len) {
 		rwqe->w_state = state;
 		rwqe->w_freemsg_cb.free_func = ibd_freemsg_cb;
 		rwqe->w_freemsg_cb.free_arg = (char *)rwqe;
@@ -4001,8 +4195,10 @@
 	}
 	kmem_free(state->id_rx_queues, state->id_rx_nqueues *
 	    sizeof (ibd_rx_queue_t));
-	kmem_free(state->id_rx_wqes, state->id_num_rwqe * sizeof (ibd_rwqe_t));
-	kmem_free(state->id_rx_bufs, state->id_num_rwqe * state->id_rx_buf_sz);
+	kmem_free(state->id_rx_wqes, state->id_ud_num_rwqe *
+	    sizeof (ibd_rwqe_t));
+	kmem_free(state->id_rx_bufs, state->id_ud_num_rwqe *
+	    state->id_rx_buf_sz);
 	state->id_rx_queues = NULL;
 	state->id_rx_wqes = NULL;
 	state->id_rx_bufs = NULL;
@@ -4017,7 +4213,7 @@
 		mutex_exit(&state->id_rx_free_list.dl_mutex);
 		return;
 	}
-	ASSERT(state->id_rx_free_list.dl_cnt == state->id_num_rwqe);
+	ASSERT(state->id_rx_free_list.dl_cnt == state->id_ud_num_rwqe);
 	ibd_free_rx_copybufs(state);
 	state->id_rx_free_list.dl_cnt = 0;
 	state->id_rx_free_list.dl_head = NULL;
@@ -4182,6 +4378,15 @@
 		case IBT_SM_EVENT_MCG_CREATED:
 		case IBT_SM_EVENT_MCG_DELETED:
 			/*
+			 * If it is a "deleted" event and we are in late hca
+			 * init, nothing to do.
+			 */
+			if (((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ==
+			    IBD_DRV_IN_LATE_HCA_INIT) && (code ==
+			    IBT_SM_EVENT_MCG_DELETED)) {
+				break;
+			}
+			/*
 			 * Common processing of creation/deletion traps.
 			 * First check if the instance is being
 			 * [de]initialized; back off then, without doing
@@ -4206,10 +4411,50 @@
 {
 	ib_gid_t mgid = req->rq_gid;
 	ibt_subnet_event_code_t code = (ibt_subnet_event_code_t)req->rq_ptr;
+	int ret;
+	ib_pkey_t pkey = (mgid.gid_prefix >> 16) & 0xffff;
 
 	DPRINT(10, "ibd_async_trap : %d\n", code);
 
 	/*
+	 * Check if we have already joined the IPoIB broadcast group for our
+	 * PKEY. If joined, perform the rest of the operation.
+	 * Else, the interface is not initialised. Do the initialisation here
+	 * by calling ibd_start() and return.
+	 */
+
+	if (((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ==
+	    IBD_DRV_IN_LATE_HCA_INIT) && (state->id_bgroup_present == 0) &&
+	    (code == IBT_SM_EVENT_MCG_CREATED)) {
+		/*
+		 * If we are in late HCA init and a notification for the
+		 * creation of a MCG came in, check if it is the IPoIB MCG for
+		 * this pkey. If not, return.
+		 */
+		if ((mgid.gid_guid != IB_MGID_IPV4_LOWGRP_MASK) || (pkey !=
+		    state->id_pkey)) {
+			ibd_async_done(state);
+			return;
+		}
+		ibd_set_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS);
+		/*
+		 * Check if there is still a necessity to start the interface.
+		 * It is possible that the user attempted unplumb at just about
+		 * the same time, and if unplumb succeeded, we have nothing to
+		 * do.
+		 */
+		if (((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ==
+		    IBD_DRV_IN_LATE_HCA_INIT) &&
+		    ((ret = ibd_start(state)) != 0)) {
+			DPRINT(10, "ibd_async_trap: cannot start from late HCA "
+			    "init, ret=%d", ret);
+		}
+		ibd_clr_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS);
+		ibd_async_done(state);
+		return;
+	}
+
+	/*
 	 * Atomically search the nonmember and sendonlymember lists and
 	 * delete.
 	 */
@@ -4250,6 +4495,9 @@
 {
 	ibd_state_t *state = arg;
 
+	if (state->id_type == IBD_PORT_DRIVER)
+		return (B_FALSE);
+
 	switch (cap) {
 	case MAC_CAPAB_HCKSUM: {
 		uint32_t *txflags = cap_data;
@@ -4299,6 +4547,759 @@
 	return (B_TRUE);
 }
 
+/*
+ * callback function for set/get of properties
+ */
+static int
+ibd_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+    uint_t pr_valsize, const void *pr_val)
+{
+	ibd_state_t *state = arg;
+	int err = 0;
+	uint32_t link_mode;
+
+	/* Cannot set properties on a port driver */
+	if (state->id_type == IBD_PORT_DRIVER) {
+		return (ENOTSUP);
+	}
+
+	switch (pr_num) {
+		case MAC_PROP_IB_LINKMODE:
+			if (state->id_mac_state & IBD_DRV_STARTED) {
+				err = EBUSY;
+				break;
+			}
+			if (pr_val == NULL) {
+				err = EINVAL;
+				break;
+			}
+			bcopy(pr_val, &link_mode, sizeof (link_mode));
+			if (link_mode != IBD_LINK_MODE_UD &&
+			    link_mode != IBD_LINK_MODE_RC) {
+				err = EINVAL;
+			} else {
+				if (link_mode == IBD_LINK_MODE_RC) {
+					if (state->id_enable_rc) {
+						return (0);
+					}
+					state->id_enable_rc = 1;
+					/* inform MAC framework of new MTU */
+					err = mac_maxsdu_update(state->id_mh,
+					    state->rc_mtu - IPOIB_HDRSIZE);
+				} else {
+					if (!state->id_enable_rc) {
+						return (0);
+					}
+					state->id_enable_rc = 0;
+					err = mac_maxsdu_update(state->id_mh,
+					    state->id_mtu - IPOIB_HDRSIZE);
+				}
+				(void) ibd_record_capab(state);
+				mac_capab_update(state->id_mh);
+			}
+			break;
+		case MAC_PROP_PRIVATE:
+			err = ibd_set_priv_prop(state, pr_name,
+			    pr_valsize, pr_val);
+			break;
+		default:
+			err = ENOTSUP;
+			break;
+	}
+	return (err);
+}
+
+static int
+ibd_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+    uint_t pr_valsize, void *pr_val)
+{
+	ibd_state_t *state = arg;
+	int err = 0;
+
+	switch (pr_num) {
+		case MAC_PROP_MTU:
+			break;
+		default:
+			if (state->id_type == IBD_PORT_DRIVER) {
+				return (ENOTSUP);
+			}
+			break;
+	}
+
+	switch (pr_num) {
+		case MAC_PROP_IB_LINKMODE:
+			*(uint_t *)pr_val = state->id_enable_rc;
+			break;
+		case MAC_PROP_PRIVATE:
+			err = ibd_get_priv_prop(state, pr_name, pr_valsize,
+			    pr_val);
+			break;
+		default:
+			err = ENOTSUP;
+			break;
+	}
+	return (err);
+}
+
+static void
+ibd_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+    mac_prop_info_handle_t prh)
+{
+	ibd_state_t *state = arg;
+
+	switch (pr_num) {
+	case MAC_PROP_IB_LINKMODE: {
+		mac_prop_info_set_default_uint32(prh, IBD_DEF_LINK_MODE);
+		break;
+	}
+	case MAC_PROP_MTU: {
+		uint32_t min, max;
+		if (state->id_type == IBD_PORT_DRIVER) {
+			min = 1500;
+			max = IBD_DEF_RC_MAX_SDU;
+		} else if (state->id_enable_rc) {
+			min = max = IBD_DEF_RC_MAX_SDU;
+		} else {
+			min = max = state->id_mtu - IPOIB_HDRSIZE;
+		}
+		mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+		mac_prop_info_set_range_uint32(prh, min, max);
+		break;
+	}
+	case MAC_PROP_PRIVATE: {
+		char valstr[64];
+		int value;
+
+		if (strcmp(pr_name, "_ibd_broadcast_group") == 0) {
+			mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+			return;
+		} else if (strcmp(pr_name, "_ibd_coalesce_completions") == 0) {
+			value = IBD_DEF_COALESCE_COMPLETIONS;
+		} else if (strcmp(pr_name,
+		    "_ibd_create_broadcast_group") == 0) {
+			value = IBD_DEF_CREATE_BCAST_GROUP;
+		} else if (strcmp(pr_name, "_ibd_hash_size") == 0) {
+			value = IBD_DEF_HASH_SIZE;
+		} else if (strcmp(pr_name, "_ibd_lso_enable") == 0) {
+			value = IBD_DEF_LSO_POLICY;
+		} else if (strcmp(pr_name, "_ibd_num_ah") == 0) {
+			value = IBD_DEF_NUM_AH;
+		} else if (strcmp(pr_name, "_ibd_num_lso_bufs") == 0) {
+			value = IBD_DEF_NUM_LSO_BUFS;
+		} else if (strcmp(pr_name, "_ibd_rc_enable_srq") == 0) {
+			value = IBD_DEF_RC_ENABLE_SRQ;
+		} else if (strcmp(pr_name, "_ibd_rc_num_rwqe") == 0) {
+			value = IBD_DEF_RC_NUM_RWQE;
+		} else if (strcmp(pr_name, "_ibd_rc_num_srq") == 0) {
+			value = IBD_DEF_RC_NUM_SRQ;
+		} else if (strcmp(pr_name, "_ibd_rc_num_swqe") == 0) {
+			value = IBD_DEF_RC_NUM_SWQE;
+		} else if (strcmp(pr_name, "_ibd_rc_rx_comp_count") == 0) {
+			value = IBD_DEF_RC_RX_COMP_COUNT;
+		} else if (strcmp(pr_name, "_ibd_rc_rx_comp_usec") == 0) {
+			value = IBD_DEF_RC_RX_COMP_USEC;
+		} else if (strcmp(pr_name, "_ibd_rc_rx_copy_thresh") == 0) {
+			value = IBD_DEF_RC_RX_COPY_THRESH;
+		} else if (strcmp(pr_name, "_ibd_rc_rx_rwqe_thresh") == 0) {
+			value = IBD_DEF_RC_RX_RWQE_THRESH;
+		} else if (strcmp(pr_name, "_ibd_rc_tx_comp_count") == 0) {
+			value = IBD_DEF_RC_TX_COMP_COUNT;
+		} else if (strcmp(pr_name, "_ibd_rc_tx_comp_usec") == 0) {
+			value = IBD_DEF_RC_TX_COMP_USEC;
+		} else if (strcmp(pr_name, "_ibd_rc_tx_copy_thresh") == 0) {
+			value = IBD_DEF_RC_TX_COPY_THRESH;
+		} else if (strcmp(pr_name, "_ibd_ud_num_rwqe") == 0) {
+			value = IBD_DEF_UD_NUM_RWQE;
+		} else if (strcmp(pr_name, "_ibd_ud_num_swqe") == 0) {
+			value = IBD_DEF_UD_NUM_SWQE;
+		} else if (strcmp(pr_name, "_ibd_ud_rx_comp_count") == 0) {
+			value = IBD_DEF_UD_RX_COMP_COUNT;
+		} else if (strcmp(pr_name, "_ibd_ud_rx_comp_usec") == 0) {
+			value = IBD_DEF_UD_RX_COMP_USEC;
+		} else if (strcmp(pr_name, "_ibd_ud_tx_comp_count") == 0) {
+			value = IBD_DEF_UD_TX_COMP_COUNT;
+		} else if (strcmp(pr_name, "_ibd_ud_tx_comp_usec") == 0) {
+			value = IBD_DEF_UD_TX_COMP_USEC;
+		} else if (strcmp(pr_name, "_ibd_ud_tx_copy_thresh") == 0) {
+			value = IBD_DEF_UD_TX_COPY_THRESH;
+		} else {
+			return;
+		}
+
+		(void) snprintf(valstr, sizeof (valstr), "%d", value);
+		mac_prop_info_set_default_str(prh, valstr);
+		break;
+	}
+	} /* switch (pr_num) */
+}
+
+/* ARGSUSED2 */
+static int
+ibd_set_priv_prop(ibd_state_t *state, const char *pr_name,
+    uint_t pr_valsize, const void *pr_val)
+{
+	int err = 0;
+	long result;
+
+	if (strcmp(pr_name, "_ibd_coalesce_completions") == 0) {
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 0 || result > 1) {
+			err = EINVAL;
+		} else {
+			state->id_allow_coalesce_comp_tuning = (result == 1) ?
+			    B_TRUE: B_FALSE;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_create_broadcast_group") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 0 || result > 1) {
+			err = EINVAL;
+		} else {
+			state->id_create_broadcast_group = (result == 1) ?
+			    B_TRUE: B_FALSE;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_hash_size") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_HASH_SIZE || result > IBD_MAX_HASH_SIZE) {
+			err = EINVAL;
+		} else {
+			state->id_hash_size = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_lso_enable") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 0 || result > 1) {
+			err = EINVAL;
+		} else {
+			state->id_lso_policy = (result == 1) ?
+			    B_TRUE: B_FALSE;
+		}
+		mac_capab_update(state->id_mh);
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_num_ah") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_NUM_AH || result > IBD_MAX_NUM_AH) {
+			err = EINVAL;
+		} else {
+			state->id_num_ah = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_num_lso_bufs") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (!state->id_lso_policy || !state->id_lso_capable) {
+			return (EINVAL);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_NUM_LSO_BUFS ||
+		    result > IBD_MAX_NUM_LSO_BUFS) {
+			err = EINVAL;
+		} else {
+			state->id_num_lso_bufs = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_enable_srq") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 0 || result > 1) {
+			err = EINVAL;
+		} else {
+			state->rc_enable_srq = (result == 1) ?
+			    B_TRUE: B_FALSE;
+		}
+		if (!state->rc_enable_srq) {
+			state->id_rc_num_srq = 0;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_num_rwqe") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_RC_NUM_RWQE ||
+		    result > IBD_MAX_RC_NUM_RWQE) {
+			err = EINVAL;
+		} else {
+			state->id_rc_num_rwqe = (uint32_t)result;
+			if (state->id_allow_coalesce_comp_tuning &&
+			    state->id_rc_rx_comp_count > state->id_rc_num_rwqe)
+				state->id_rc_rx_comp_count =
+				    state->id_rc_num_rwqe;
+			if (state->id_rc_num_srq > state->id_rc_num_rwqe)
+				state->id_rc_num_srq =
+				    state->id_rc_num_rwqe - 1;
+			/*
+			 * If rx_rwqe_threshold is greater than the number of
+			 * rwqes, pull it back to 25% of number of rwqes.
+			 */
+			if (state->id_rc_rx_rwqe_thresh > state->id_rc_num_rwqe)
+				state->id_rc_rx_rwqe_thresh =
+				    (state->id_rc_num_rwqe >> 2);
+
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_num_srq") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		if (!state->rc_enable_srq)
+			return (EINVAL);
+
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_RC_NUM_SRQ ||
+		    result >= state->id_rc_num_rwqe) {
+			err = EINVAL;
+		} else
+			state->id_rc_num_srq = (uint32_t)result;
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_num_swqe") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_RC_NUM_SWQE ||
+		    result > IBD_MAX_RC_NUM_SWQE) {
+			err = EINVAL;
+		} else {
+			state->id_rc_num_swqe = (uint32_t)result;
+			if (state->id_allow_coalesce_comp_tuning &&
+			    state->id_rc_tx_comp_count > state->id_rc_num_swqe)
+				state->id_rc_tx_comp_count =
+				    state->id_rc_num_swqe;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_rx_comp_count") == 0) {
+		if (!state->id_allow_coalesce_comp_tuning) {
+			return (ENOTSUP);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 1 || result > state->id_rc_num_rwqe) {
+			err = EINVAL;
+		} else {
+			state->id_rc_rx_comp_count = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_rx_comp_usec") == 0) {
+		if (!state->id_allow_coalesce_comp_tuning) {
+			return (ENOTSUP);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 1) {
+			err = EINVAL;
+		} else {
+			state->id_rc_rx_comp_usec = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_rx_copy_thresh") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_RC_RX_COPY_THRESH ||
+		    result > state->rc_mtu) {
+			err = EINVAL;
+		} else {
+			state->id_rc_rx_copy_thresh = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_rx_rwqe_thresh") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_RC_RX_RWQE_THRESH ||
+		    result >= state->id_rc_num_rwqe) {
+			err = EINVAL;
+		} else {
+			state->id_rc_rx_rwqe_thresh = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_tx_comp_count") == 0) {
+		if (!state->id_allow_coalesce_comp_tuning) {
+			return (ENOTSUP);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 1 || result > state->id_rc_num_swqe) {
+			err = EINVAL;
+		} else {
+			state->id_rc_tx_comp_count = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_tx_comp_usec") == 0) {
+		if (!state->id_allow_coalesce_comp_tuning) {
+			return (ENOTSUP);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 1)
+			err = EINVAL;
+		else {
+			state->id_rc_tx_comp_usec = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_rc_tx_copy_thresh") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_RC_TX_COPY_THRESH ||
+		    result > state->rc_mtu) {
+			err = EINVAL;
+		} else {
+			state->id_rc_tx_copy_thresh = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_ud_num_rwqe") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_UD_NUM_RWQE ||
+		    result > IBD_MAX_UD_NUM_RWQE) {
+			err = EINVAL;
+		} else {
+			if (result > state->id_hca_max_chan_sz) {
+				state->id_ud_num_rwqe =
+				    state->id_hca_max_chan_sz;
+			} else {
+				state->id_ud_num_rwqe = (uint32_t)result;
+			}
+			if (state->id_allow_coalesce_comp_tuning &&
+			    state->id_ud_rx_comp_count > state->id_ud_num_rwqe)
+				state->id_ud_rx_comp_count =
+				    state->id_ud_num_rwqe;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_ud_num_swqe") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_UD_NUM_SWQE ||
+		    result > IBD_MAX_UD_NUM_SWQE) {
+			err = EINVAL;
+		} else {
+			if (result > state->id_hca_max_chan_sz) {
+				state->id_ud_num_swqe =
+				    state->id_hca_max_chan_sz;
+			} else {
+				state->id_ud_num_swqe = (uint32_t)result;
+			}
+			if (state->id_allow_coalesce_comp_tuning &&
+			    state->id_ud_tx_comp_count > state->id_ud_num_swqe)
+				state->id_ud_tx_comp_count =
+				    state->id_ud_num_swqe;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_ud_rx_comp_count") == 0) {
+		if (!state->id_allow_coalesce_comp_tuning) {
+			return (ENOTSUP);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 1 || result > state->id_ud_num_rwqe) {
+			err = EINVAL;
+		} else {
+			state->id_ud_rx_comp_count = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_ud_rx_comp_usec") == 0) {
+		if (!state->id_allow_coalesce_comp_tuning) {
+			return (ENOTSUP);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 1) {
+			err = EINVAL;
+		} else {
+			state->id_ud_rx_comp_usec = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_ud_tx_comp_count") == 0) {
+		if (!state->id_allow_coalesce_comp_tuning) {
+			return (ENOTSUP);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 1 || result > state->id_ud_num_swqe) {
+			err = EINVAL;
+		} else {
+			state->id_ud_tx_comp_count = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_ud_tx_comp_usec") == 0) {
+		if (!state->id_allow_coalesce_comp_tuning) {
+			return (ENOTSUP);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < 1) {
+			err = EINVAL;
+		} else {
+			state->id_ud_tx_comp_usec = (uint32_t)result;
+		}
+		return (err);
+	}
+	if (strcmp(pr_name, "_ibd_ud_tx_copy_thresh") == 0) {
+		if (state->id_mac_state & IBD_DRV_STARTED) {
+			return (EBUSY);
+		}
+		if (pr_val == NULL) {
+			return (EINVAL);
+		}
+		(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
+		if (result < IBD_MIN_UD_TX_COPY_THRESH ||
+		    result > IBD_MAX_UD_TX_COPY_THRESH) {
+			err = EINVAL;
+		} else {
+			state->id_ud_tx_copy_thresh = (uint32_t)result;
+		}
+		return (err);
+	}
+	return (ENOTSUP);
+}
+
+static int
+ibd_get_priv_prop(ibd_state_t *state, const char *pr_name, uint_t pr_valsize,
+    void *pr_val)
+{
+	int err = ENOTSUP;
+	int value;
+
+	if (strcmp(pr_name, "_ibd_broadcast_group") == 0) {
+		value = state->id_bgroup_present;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_coalesce_completions") == 0) {
+		value = state->id_allow_coalesce_comp_tuning;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_create_broadcast_group") == 0) {
+		value = state->id_create_broadcast_group;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_hash_size") == 0) {
+		value = state->id_hash_size;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_lso_enable") == 0) {
+		value = state->id_lso_policy;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_num_ah") == 0) {
+		value = state->id_num_ah;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_num_lso_bufs") == 0) {
+		value = state->id_num_lso_bufs;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_enable_srq") == 0) {
+		value = state->rc_enable_srq;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_num_rwqe") == 0) {
+		value = state->id_rc_num_rwqe;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_num_srq") == 0) {
+		value = state->id_rc_num_srq;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_num_swqe") == 0) {
+		value = state->id_rc_num_swqe;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_rx_comp_count") == 0) {
+		value = state->id_rc_rx_comp_count;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_rx_comp_usec") == 0) {
+		value = state->id_rc_rx_comp_usec;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_rx_copy_thresh") == 0) {
+		value = state->id_rc_rx_copy_thresh;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_rx_rwqe_thresh") == 0) {
+		value = state->id_rc_rx_rwqe_thresh;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_tx_comp_count") == 0) {
+		value = state->id_rc_tx_comp_count;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_tx_comp_usec") == 0) {
+		value = state->id_rc_tx_comp_usec;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_rc_tx_copy_thresh") == 0) {
+		value = state->id_rc_tx_copy_thresh;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_ud_num_rwqe") == 0) {
+		value = state->id_ud_num_rwqe;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_ud_num_swqe") == 0) {
+		value = state->id_ud_num_swqe;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_ud_rx_comp_count") == 0) {
+		value = state->id_ud_rx_comp_count;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_ud_rx_comp_usec") == 0) {
+		value = state->id_ud_rx_comp_usec;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_ud_tx_comp_count") == 0) {
+		value = state->id_ud_tx_comp_count;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_ud_tx_comp_usec") == 0) {
+		value = state->id_ud_tx_comp_usec;
+		err = 0;
+		goto done;
+	}
+	if (strcmp(pr_name, "_ibd_ud_tx_copy_thresh") == 0) {
+		value = state->id_ud_tx_copy_thresh;
+		err = 0;
+		goto done;
+	}
+done:
+	if (err == 0) {
+		(void) snprintf(pr_val, pr_valsize, "%d", value);
+	}
+	return (err);
+}
+
 static int
 ibd_get_port_details(ibd_state_t *state)
 {
@@ -4321,42 +5322,32 @@
 	}
 
 	/*
-	 * If the link already went down by the time we get here,
-	 * give up
-	 */
-	if (port_infop->p_linkstate != IBT_PORT_ACTIVE) {
-		mutex_exit(&state->id_link_mutex);
-		ibt_free_portinfo(port_infop, port_infosz);
-		DPRINT(10, "ibd_get_port_details: port is not active");
-		return (ENETDOWN);
-	}
-
-	/*
 	 * If the link is active, verify the pkey
 	 */
-	if ((ret = ibt_pkey2index(state->id_hca_hdl, state->id_port,
-	    state->id_pkey, &state->id_pkix)) != IBT_SUCCESS) {
-		mutex_exit(&state->id_link_mutex);
-		ibt_free_portinfo(port_infop, port_infosz);
-		DPRINT(10, "ibd_get_port_details: ibt_pkey2index "
-		    "failed, ret=%d", ret);
-		return (ENONET);
-	}
-
-	state->id_mtu = (128 << port_infop->p_mtu);
-	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
-	state->id_sgid = *port_infop->p_sgid_tbl;
-	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
-	state->id_link_state = LINK_STATE_UP;
-
+	if (port_infop->p_linkstate == IBT_PORT_ACTIVE) {
+		if ((ret = ibt_pkey2index(state->id_hca_hdl, state->id_port,
+		    state->id_pkey, &state->id_pkix)) != IBT_SUCCESS) {
+			state->id_link_state = LINK_STATE_DOWN;
+		} else {
+			state->id_link_state = LINK_STATE_UP;
+		}
+		state->id_mtu = (128 << port_infop->p_mtu);
+		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
+		state->id_sgid = *port_infop->p_sgid_tbl;
+		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
+		/*
+		 * Now that the port is active, record the port speed
+		 */
+		state->id_link_speed = ibd_get_portspeed(state);
+	} else {
+		/* Make sure that these are handled in PORT_UP/CHANGE */
+		state->id_mtu = 0;
+		state->id_link_state = LINK_STATE_DOWN;
+		state->id_link_speed = 0;
+	}
 	mutex_exit(&state->id_link_mutex);
 	ibt_free_portinfo(port_infop, port_infosz);
 
-	/*
-	 * Now that the port is active, record the port speed
-	 */
-	state->id_link_speed = ibd_get_portspeed(state);
-
 	return (0);
 }
 
@@ -4367,6 +5358,8 @@
 	ibt_cq_attr_t cq_attr;
 	ibt_status_t ret;
 	uint32_t real_size;
+	uint_t num_rwqe_change = 0;
+	uint_t num_swqe_change = 0;
 
 	ret = ibt_query_hca(state->id_hca_hdl, &hca_attrs);
 	ASSERT(ret == IBT_SUCCESS);
@@ -4385,11 +5378,12 @@
 	/*
 	 * Allocate Receive CQ.
 	 */
-	if (hca_attrs.hca_max_cq_sz >= (state->id_num_rwqe + 1)) {
-		cq_attr.cq_size = state->id_num_rwqe + 1;
+	if (hca_attrs.hca_max_cq_sz >= (state->id_ud_num_rwqe + 1)) {
+		cq_attr.cq_size = state->id_ud_num_rwqe + 1;
 	} else {
 		cq_attr.cq_size = hca_attrs.hca_max_cq_sz;
-		state->id_num_rwqe = cq_attr.cq_size - 1;
+		num_rwqe_change = state->id_ud_num_rwqe;
+		state->id_ud_num_rwqe = cq_attr.cq_size - 1;
 	}
 
 	if ((ret = ibt_alloc_cq(state->id_hca_hdl, &cq_attr,
@@ -4399,8 +5393,8 @@
 		return (DDI_FAILURE);
 	}
 
-	if ((ret = ibt_modify_cq(state->id_rcq_hdl,
-	    ibd_rxcomp_count, ibd_rxcomp_usec, 0)) != IBT_SUCCESS) {
+	if ((ret = ibt_modify_cq(state->id_rcq_hdl, state->id_ud_rx_comp_count,
+	    state->id_ud_rx_comp_usec, 0)) != IBT_SUCCESS) {
 		DPRINT(10, "ibd_alloc_cqs: Receive CQ interrupt "
 		    "moderation failed, ret=%d\n", ret);
 	}
@@ -4413,11 +5407,12 @@
 	/*
 	 * Allocate Send CQ.
 	 */
-	if (hca_attrs.hca_max_cq_sz >= (state->id_num_swqe + 1)) {
-		cq_attr.cq_size = state->id_num_swqe + 1;
+	if (hca_attrs.hca_max_cq_sz >= (state->id_ud_num_swqe + 1)) {
+		cq_attr.cq_size = state->id_ud_num_swqe + 1;
 	} else {
 		cq_attr.cq_size = hca_attrs.hca_max_cq_sz;
-		state->id_num_swqe = cq_attr.cq_size - 1;
+		num_swqe_change = state->id_ud_num_swqe;
+		state->id_ud_num_swqe = cq_attr.cq_size - 1;
 	}
 
 	if ((ret = ibt_alloc_cq(state->id_hca_hdl, &cq_attr,
@@ -4429,8 +5424,8 @@
 		(void) ibt_free_cq(state->id_rcq_hdl);
 		return (DDI_FAILURE);
 	}
-	if ((ret = ibt_modify_cq(state->id_scq_hdl,
-	    ibd_txcomp_count, ibd_txcomp_usec, 0)) != IBT_SUCCESS) {
+	if ((ret = ibt_modify_cq(state->id_scq_hdl, state->id_ud_tx_comp_count,
+	    state->id_ud_tx_comp_usec, 0)) != IBT_SUCCESS) {
 		DPRINT(10, "ibd_alloc_cqs: Send CQ interrupt "
 		    "moderation failed, ret=%d\n", ret);
 	}
@@ -4443,13 +5438,13 @@
 	 * Print message in case we could not allocate as many wqe's
 	 * as was requested.
 	 */
-	if (state->id_num_rwqe != IBD_NUM_RWQE) {
+	if (num_rwqe_change) {
 		ibd_print_warn(state, "Setting #rwqe = %d instead of default "
-		    "%d", state->id_num_rwqe, IBD_NUM_RWQE);
-	}
-	if (state->id_num_swqe != IBD_NUM_SWQE) {
+		    "%d", state->id_ud_num_rwqe, num_rwqe_change);
+	}
+	if (num_swqe_change) {
 		ibd_print_warn(state, "Setting #swqe = %d instead of default "
-		    "%d", state->id_num_swqe, IBD_NUM_SWQE);
+		    "%d", state->id_ud_num_swqe, num_swqe_change);
 	}
 
 	return (DDI_SUCCESS);
@@ -4471,8 +5466,8 @@
 	ud_alloc_attr.ud_hca_port_num	= state->id_port;
 	ud_alloc_attr.ud_sizes.cs_sq_sgl = state->id_max_sqseg;
 	ud_alloc_attr.ud_sizes.cs_rq_sgl = IBD_MAX_RQSEG;
-	ud_alloc_attr.ud_sizes.cs_sq    = state->id_num_swqe;
-	ud_alloc_attr.ud_sizes.cs_rq    = state->id_num_rwqe;
+	ud_alloc_attr.ud_sizes.cs_sq    = state->id_ud_num_swqe;
+	ud_alloc_attr.ud_sizes.cs_rq    = state->id_ud_num_rwqe;
 	ud_alloc_attr.ud_qkey		= state->id_mcinfo->mc_qkey;
 	ud_alloc_attr.ud_scq		= state->id_scq_hdl;
 	ud_alloc_attr.ud_rcq		= state->id_rcq_hdl;
@@ -4528,6 +5523,7 @@
 		state->id_link_state = LINK_STATE_UNKNOWN;
 	}
 	mutex_exit(&state->id_link_mutex);
+	bzero(&state->id_macaddr, sizeof (ipoib_mac_t));
 	mac_link_update(state->id_mh, state->id_link_state);
 
 	state->id_mac_state &= (~IBD_DRV_PORT_DETAILS_OBTAINED);
@@ -4535,6 +5531,10 @@
 		state->id_mac_state &= (~IBD_DRV_STARTED);
 	}
 
+	if (progress & IBD_DRV_IN_LATE_HCA_INIT) {
+		state->id_mac_state &= (~IBD_DRV_IN_LATE_HCA_INIT);
+	}
+
 	/* Stop listen under Reliable Connected Mode */
 	if (progress & IBD_DRV_RC_LISTEN) {
 		ASSERT(state->id_enable_rc);
@@ -4626,7 +5626,7 @@
 		mutex_enter(&state->id_tx_rel_list.dl_mutex);
 		attempts = 10;
 		while (state->id_tx_list.dl_cnt + state->id_tx_rel_list.dl_cnt
-		    != state->id_num_swqe) {
+		    != state->id_ud_num_swqe) {
 			if (--attempts == 0)
 				break;
 			mutex_exit(&state->id_tx_rel_list.dl_mutex);
@@ -4637,7 +5637,7 @@
 		}
 		ibt_set_cq_handler(state->id_scq_hdl, 0, 0);
 		if (state->id_tx_list.dl_cnt + state->id_tx_rel_list.dl_cnt !=
-		    state->id_num_swqe) {
+		    state->id_ud_num_swqe) {
 			cmn_err(CE_WARN, "tx resources not freed\n");
 		}
 		mutex_exit(&state->id_tx_rel_list.dl_mutex);
@@ -4657,28 +5657,6 @@
 		state->id_mac_state &= (~IBD_DRV_SCQ_NOTIFY_ENABLED);
 	}
 
-	if (progress & IBD_DRV_ASYNC_THR_CREATED) {
-		/*
-		 * No new async requests will be posted since the device
-		 * link state has been marked as unknown; completion handlers
-		 * have been turned off, so Tx handler will not cause any
-		 * more IBD_ASYNC_REAP requests.
-		 *
-		 * Queue a request for the async thread to exit, which will
-		 * be serviced after any pending ones. This can take a while,
-		 * specially if the SM is unreachable, since IBMF will slowly
-		 * timeout each SM request issued by the async thread.  Reap
-		 * the thread before continuing on, we do not want it to be
-		 * lingering in modunloaded code (or we could move the reap
-		 * to ibd_detach(), provided we keep track of the current
-		 * id_async_thrid somewhere safe).
-		 */
-		ibd_queue_work_slot(state, &state->id_ah_req, IBD_ASYNC_EXIT);
-		thread_join(state->id_async_thrid);
-
-		state->id_mac_state &= (~IBD_DRV_ASYNC_THR_CREATED);
-	}
-
 	if (progress & IBD_DRV_BCAST_GROUP_JOINED) {
 		/*
 		 * Drop all residual full/non membership. This includes full
@@ -4811,26 +5789,37 @@
 	ibd_state_t *state = arg;
 	int	ret;
 
+	if (state->id_type == IBD_PORT_DRIVER)
+		return (EINVAL);
+
 	ibd_set_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
+	if (state->id_mac_state & IBD_DRV_IN_DELETION) {
+		ibd_clr_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
+		return (EIO);
+	}
 
 	ret = ibd_start(state);
-
 	ibd_clr_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
-
 	return (ret);
 }
 
 static int
 ibd_start(ibd_state_t *state)
 {
-	kthread_t *kht;
 	int err;
 	ibt_status_t ret;
+	int late_hca_init = 0;
 
 	if (state->id_mac_state & IBD_DRV_STARTED)
 		return (DDI_SUCCESS);
 
-	if (atomic_inc_32_nv(&state->id_running) != 1) {
+	/*
+	 * We do not increment the running flag when calling ibd_start() as
+	 * a result of some event which moves the state away from late HCA
+	 * initialization viz. MCG_CREATED, PORT_CHANGE or link availability.
+	 */
+	if (!(state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) &&
+	    (atomic_inc_32_nv(&state->id_running) != 1)) {
 		DPRINT(10, "ibd_start: id_running is non-zero");
 		cmn_err(CE_WARN, "ibd_start: id_running was not 0\n");
 		atomic_dec_32(&state->id_running);
@@ -4838,22 +5827,31 @@
 	}
 
 	/*
-	 * Get port details; if we fail here, very likely the port
-	 * state is inactive or the pkey can't be verified.
+	 * Get port details; if we fail here, something bad happened.
+	 * Fail plumb.
 	 */
 	if ((err = ibd_get_port_details(state)) != 0) {
 		DPRINT(10, "ibd_start: ibd_get_port_details() failed");
 		goto start_fail;
 	}
+	/*
+	 * If state->id_link_state is DOWN, it indicates that either the port
+	 * is down, or the pkey is not available. In both cases, resort to late
+	 * initialization. Register for subnet notices, and return success.
+	 */
 	state->id_mac_state |= IBD_DRV_PORT_DETAILS_OBTAINED;
+	if (state->id_link_state == LINK_STATE_DOWN) {
+		late_hca_init = 1;
+		goto late_hca_init_return;
+	}
 
 	/*
 	 * Find the IPoIB broadcast group
 	 */
 	if (ibd_find_bgroup(state) != IBT_SUCCESS) {
-		DPRINT(10, "ibd_start: ibd_find_bgroup() failed");
-		err = ENOTACTIVE;
-		goto start_fail;
+		/* Resort to late initialization */
+		late_hca_init = 1;
+		goto reg_snet_notices;
 	}
 	state->id_mac_state |= IBD_DRV_BCAST_GROUP_FOUND;
 
@@ -4932,16 +5930,6 @@
 	state->id_mac_state |= IBD_DRV_BCAST_GROUP_JOINED;
 
 	/*
-	 * Create the async thread; thread_create never fails.
-	 */
-	kht = thread_create(NULL, 0, ibd_async_work, state, 0, &p0,
-	    TS_RUN, minclsyspri);
-	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_async_thrid))
-	state->id_async_thrid = kht->t_did;
-	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_async_thrid))
-	state->id_mac_state |= IBD_DRV_ASYNC_THR_CREATED;
-
-	/*
 	 * When we did mac_register() in ibd_attach(), we didn't register
 	 * the real macaddr and we didn't have the true port mtu. Now that
 	 * we're almost ready, set the local mac address and broadcast
@@ -4980,7 +5968,9 @@
 	}
 	state->id_mac_state |= IBD_DRV_RCQ_NOTIFY_ENABLED;
 
-	/*
+reg_snet_notices:
+	/*
+	 * In case of normal initialization sequence,
 	 * Setup the subnet notices handler after we've initialized the acache/
 	 * mcache and started the async thread, both of which are required for
 	 * the trap handler to function properly.
@@ -4989,13 +5979,37 @@
 	 * a mac_register() during attach so mac_tx_update() can be called
 	 * if necessary without any problem), we can enable the trap handler
 	 * to queue requests to the async thread.
-	 */
-	ibt_register_subnet_notices(state->id_ibt_hdl,
-	    ibd_snet_notices_handler, state);
-	mutex_enter(&state->id_trap_lock);
-	state->id_trap_stop = B_FALSE;
-	mutex_exit(&state->id_trap_lock);
-	state->id_mac_state |= IBD_DRV_SM_NOTICES_REGISTERED;
+	 *
+	 * In case of late hca initialization, the subnet notices handler will
+	 * only handle MCG created/deleted event. The action performed as part
+	 * of handling these events is to start the interface. So, the
+	 * acache/mcache initialization is not a necessity in such cases for
+	 * registering the subnet notices handler. Also, if we are in
+	 * ibd_start() as a result of, say, some event handling after entering
+	 * late hca initialization phase no need to register again.
+	 */
+	if ((state->id_mac_state & IBD_DRV_SM_NOTICES_REGISTERED) == 0) {
+		ibt_register_subnet_notices(state->id_ibt_hdl,
+		    ibd_snet_notices_handler, state);
+		mutex_enter(&state->id_trap_lock);
+		state->id_trap_stop = B_FALSE;
+		mutex_exit(&state->id_trap_lock);
+		state->id_mac_state |= IBD_DRV_SM_NOTICES_REGISTERED;
+	}
+
+late_hca_init_return:
+	if (late_hca_init == 1) {
+		state->id_mac_state |= IBD_DRV_IN_LATE_HCA_INIT;
+		/*
+		 * In case of late initialization, mark the link state as down,
+		 * immaterial of the actual link state as reported in the
+		 * port_info.
+		 */
+		state->id_link_state = LINK_STATE_DOWN;
+		mac_unicst_update(state->id_mh, (uint8_t *)&state->id_macaddr);
+		mac_link_update(state->id_mh, state->id_link_state);
+		return (DDI_SUCCESS);
+	}
 
 	if (state->id_enable_rc) {
 		if (state->rc_enable_srq) {
@@ -5043,8 +6057,9 @@
 	 * notifications to GLDv3 till we reach here in the initialization
 	 * sequence.
 	 */
+	mac_link_update(state->id_mh, state->id_link_state);
+	state->id_mac_state &= ~IBD_DRV_IN_LATE_HCA_INIT;
 	state->id_mac_state |= IBD_DRV_STARTED;
-	mac_link_update(state->id_mh, state->id_link_state);
 
 	return (DDI_SUCCESS);
 
@@ -5068,6 +6083,9 @@
 {
 	ibd_state_t *state = (ibd_state_t *)arg;
 
+	if (state->id_type == IBD_PORT_DRIVER)
+		return;
+
 	ibd_set_mac_progress(state, IBD_DRV_STOP_IN_PROGRESS);
 
 	(void) ibd_undo_start(state, state->id_link_state);
@@ -5084,6 +6102,9 @@
 {
 	ibd_state_t *state = arg;
 
+	if (state->id_type == IBD_PORT_DRIVER)
+		return (EINVAL);
+
 	/*
 	 * Don't bother even comparing the macaddr if we haven't
 	 * completed ibd_m_start().
@@ -5134,6 +6155,9 @@
 	ib_gid_t mgid;
 	ibd_req_t *req;
 
+	if (state->id_type == IBD_PORT_DRIVER)
+		return (EINVAL);
+
 	/*
 	 * If we haven't completed ibd_m_start(), async thread wouldn't
 	 * have been started and id_bcaddr wouldn't be set, so there's
@@ -5289,6 +6313,9 @@
 	ibd_state_t *state = (ibd_state_t *)arg;
 	ibd_req_t *req;
 
+	if (state->id_type == IBD_PORT_DRIVER)
+		return (EINVAL);
+
 	/*
 	 * Async thread wouldn't have been started if we haven't
 	 * passed ibd_m_start()
@@ -5700,7 +6727,7 @@
 	 * the maximum acceptable.
 	 */
 	if ((state->id_hca_res_lkey_capab) &&
-	    (pktsize > IBD_TX_COPY_THRESH) &&
+	    (pktsize > state->id_ud_tx_copy_thresh) &&
 	    (nmblks < state->id_max_sqseg_hiwm)) {
 		ibt_iov_t iov_arr[IBD_MAX_SQSEG];
 		ibt_iov_attr_t iov_attr;
@@ -6073,7 +7100,7 @@
 		 * the "copy-threshold", and if the number of mp
 		 * fragments is less than the maximum acceptable.
 		 */
-		if (pktsize <= ibd_rc_tx_copy_thresh) {
+		if (pktsize <= state->id_rc_tx_copy_thresh) {
 			atomic_inc_64(&state->rc_xmt_small_pkt);
 			/*
 			 * Only process unicast packet in Reliable Connected
@@ -6358,7 +7385,13 @@
 	ibd_state_t *state = (ibd_state_t *)arg;
 	mblk_t *next;
 
-	if (state->id_link_state != LINK_STATE_UP) {
+	if (state->id_type == IBD_PORT_DRIVER) {
+		freemsgchain(mp);
+		return (NULL);
+	}
+
+	if ((state->id_link_state != LINK_STATE_UP) ||
+	    !(state->id_mac_state & IBD_DRV_STARTED)) {
 		freemsgchain(mp);
 		mp = NULL;
 	}
@@ -6502,7 +7535,7 @@
 		 * Post more here, if less than one fourth full.
 		 */
 		if (atomic_add_32_nv(&state->id_rx_list.dl_cnt, -num_polled) <
-		    (state->id_num_rwqe / 4))
+		    (state->id_ud_num_rwqe / 4))
 			ibd_post_recv_intr(state);
 	}
 }
@@ -7054,3 +8087,765 @@
 	bcopy(tmpbuf, ibd_lbuf+off, msglen);	/* no lock needed for this */
 }
 #endif
+
+/* ARGSUSED */
+static int
+ibd_create_partition(void *karg, intptr_t arg, int mode, cred_t *credp,
+    int *rvalp)
+{
+	ibd_create_ioctl_t	*cmd = karg;
+	ibd_state_t		*state, *port_state, *p;
+	int			i, err, rval = 0;
+	mac_register_t		*macp;
+	ibt_hca_portinfo_t 	*pinfop = NULL;
+	ibt_status_t 		ibt_status;
+	uint_t 			psize, pinfosz;
+	boolean_t		force_create = B_FALSE;
+
+	cmd->ibdioc.ioc_status = 0;
+
+	if (cmd->ibdioc.ioc_port_inst < 0) {
+		cmd->ibdioc.ioc_status = IBD_INVALID_PORT_INST;
+		return (EINVAL);
+	}
+	port_state = ddi_get_soft_state(ibd_list, cmd->ibdioc.ioc_port_inst);
+	if (port_state == NULL) {
+		DPRINT(10, "ibd_create_partition: failed to get state %d",
+		    cmd->ibdioc.ioc_port_inst);
+		cmd->ibdioc.ioc_status = IBD_INVALID_PORT_INST;
+		return (EINVAL);
+	}
+
+	/* Limited PKeys not supported */
+	if (cmd->ioc_pkey <= IB_PKEY_INVALID_FULL) {
+		rval = EINVAL;
+		goto part_create_return;
+	}
+
+	if (cmd->ioc_force_create == 0) {
+		/*
+		 * Check if the port pkey table contains the pkey for which
+		 * this partition is being created.
+		 */
+		ibt_status = ibt_query_hca_ports(port_state->id_hca_hdl,
+		    port_state->id_port, &pinfop, &psize, &pinfosz);
+
+		if ((ibt_status != IBT_SUCCESS) || (psize != 1)) {
+			rval = EINVAL;
+			goto part_create_return;
+		}
+
+		if (pinfop->p_linkstate != IBT_PORT_ACTIVE) {
+			rval = ENETDOWN;
+			cmd->ibdioc.ioc_status = IBD_PORT_IS_DOWN;
+			goto part_create_return;
+		}
+
+		for (i = 0; i < pinfop->p_pkey_tbl_sz; i++) {
+			if (pinfop->p_pkey_tbl[i] == cmd->ioc_pkey) {
+				break;
+			}
+		}
+		if (i == pinfop->p_pkey_tbl_sz) {
+			rval = EINVAL;
+			cmd->ibdioc.ioc_status = IBD_PKEY_NOT_PRESENT;
+			goto part_create_return;
+		}
+	} else {
+		force_create = B_TRUE;
+	}
+
+	mutex_enter(&ibd_objlist_lock);
+	for (p = ibd_objlist_head; p; p = p->id_next) {
+		if ((p->id_port_inst == cmd->ibdioc.ioc_port_inst) &&
+		    (p->id_pkey == cmd->ioc_pkey)) {
+			mutex_exit(&ibd_objlist_lock);
+			rval = EEXIST;
+			cmd->ibdioc.ioc_status = IBD_PARTITION_EXISTS;
+			goto part_create_return;
+		}
+	}
+	mutex_exit(&ibd_objlist_lock);
+
+	state = kmem_zalloc(sizeof (ibd_state_t), KM_SLEEP);
+
+	state->id_type		= IBD_PARTITION_OBJ;
+
+	state->id_plinkid	= cmd->ioc_partid;
+	state->id_dlinkid	= cmd->ibdioc.ioc_linkid;
+	state->id_port_inst	= cmd->ibdioc.ioc_port_inst;
+
+	state->id_dip		= port_state->id_dip;
+	state->id_port		= port_state->id_port;
+	state->id_pkey		= cmd->ioc_pkey;
+	state->id_hca_guid	= port_state->id_hca_guid;
+	state->id_port_guid	= port_state->id_port_guid;
+	state->id_force_create	= force_create;
+
+	mutex_init(&state->id_macst_lock, NULL, MUTEX_DRIVER, NULL);
+	cv_init(&state->id_macst_cv, NULL, CV_DEFAULT, NULL);
+
+	if (ibd_part_attach(state, state->id_dip) != DDI_SUCCESS) {
+		rval = EIO;
+		cmd->ibdioc.ioc_status = IBD_NO_HW_RESOURCE;
+		goto fail;
+	}
+
+	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
+		rval = EAGAIN;
+		goto fail;
+	}
+
+	macp->m_type_ident	= MAC_PLUGIN_IDENT_IB;
+	macp->m_dip		= port_state->id_dip;
+	macp->m_instance	= (uint_t)-1;
+	macp->m_driver		= state;
+	macp->m_src_addr	= (uint8_t *)&state->id_macaddr;
+	macp->m_callbacks	= &ibd_m_callbacks;
+	macp->m_min_sdu		= 0;
+	if (state->id_enable_rc) {
+		macp->m_max_sdu		= IBD_DEF_RC_MAX_SDU;
+	} else {
+		macp->m_max_sdu		= IBD_DEF_MAX_SDU;
+	}
+	macp->m_priv_props = ibd_priv_props;
+
+	err = mac_register(macp, &state->id_mh);
+	mac_free(macp);
+
+	if (err != 0) {
+		DPRINT(10, "ibd_create_partition: mac_register() failed %d",
+		    err);
+		rval = err;
+		goto fail;
+	}
+
+	err = dls_devnet_create(state->id_mh,
+	    cmd->ioc_partid, crgetzoneid(credp));
+	if (err != 0) {
+		DPRINT(10, "ibd_create_partition: dls_devnet_create() failed "
+		    "%d", err);
+		rval = err;
+		(void) mac_unregister(state->id_mh);
+		goto fail;
+	}
+
+	/*
+	 * Add the new partition state structure to the list
+	 */
+	mutex_enter(&ibd_objlist_lock);
+	if (ibd_objlist_head)
+		state->id_next = ibd_objlist_head;
+
+	ibd_objlist_head = state;
+	mutex_exit(&ibd_objlist_lock);
+
+part_create_return:
+	if (pinfop) {
+		ibt_free_portinfo(pinfop, pinfosz);
+	}
+	return (rval);
+
+fail:
+	if (pinfop) {
+		ibt_free_portinfo(pinfop, pinfosz);
+	}
+	(void) ibd_part_unattach(state);
+	kmem_free(state, sizeof (ibd_state_t));
+	return (rval);
+}
+
+/* ARGSUSED */
+static int
+ibd_delete_partition(void *karg, intptr_t arg, int mode, cred_t *credp,
+    int *rvalp)
+{
+	int err;
+	datalink_id_t tmpid;
+	ibd_state_t *node, *prev;
+	ibd_delete_ioctl_t *cmd = karg;
+
+	prev = NULL;
+
+	mutex_enter(&ibd_objlist_lock);
+	node = ibd_objlist_head;
+
+	/* Find the ibd state structure corresponding the partion */
+	while (node != NULL) {
+		if (node->id_plinkid == cmd->ioc_partid)
+			break;
+		prev = node;
+		node = node->id_next;
+	}
+
+	if (node == NULL) {
+		mutex_exit(&ibd_objlist_lock);
+		return (ENOENT);
+	}
+
+	if ((err = dls_devnet_destroy(node->id_mh, &tmpid, B_TRUE)) != 0) {
+		DPRINT(10, "ibd_delete_partition: dls_devnet_destroy() failed "
+		    "%d", err);
+		mutex_exit(&ibd_objlist_lock);
+		return (err);
+	}
+
+	if ((err = mac_disable(node->id_mh)) != 0) {
+		(void) dls_devnet_create(node->id_mh, cmd->ioc_partid,
+		    crgetzoneid(credp));
+		mutex_exit(&ibd_objlist_lock);
+		return (err);
+	}
+
+	/*
+	 * Call ibd_part_unattach() only after making sure that the instance has
+	 * not been started yet and is also not in late hca init mode.
+	 */
+	ibd_set_mac_progress(node, IBD_DRV_DELETE_IN_PROGRESS);
+	if ((node->id_mac_state & IBD_DRV_STARTED) ||
+	    (node->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ||
+	    (ibd_part_unattach(node) != DDI_SUCCESS)) {
+		ibd_clr_mac_progress(node, IBD_DRV_DELETE_IN_PROGRESS);
+		mutex_exit(&ibd_objlist_lock);
+		return (EBUSY);
+	}
+	node->id_mac_state |= IBD_DRV_IN_DELETION;
+	ibd_clr_mac_progress(node, IBD_DRV_DELETE_IN_PROGRESS);
+
+	/* Remove the partition state structure from the linked list */
+	if (prev == NULL)
+		ibd_objlist_head = node->id_next;
+	else
+		prev->id_next = node->id_next;
+	mutex_exit(&ibd_objlist_lock);
+
+	if ((err = mac_unregister(node->id_mh)) != 0) {
+		DPRINT(10, "ibd_delete_partition: mac_unregister() failed %d",
+		    err);
+	}
+
+	cv_destroy(&node->id_macst_cv);
+	mutex_destroy(&node->id_macst_lock);
+
+	kmem_free(node, sizeof (ibd_state_t));
+
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+ibd_get_partition_info(void *karg, intptr_t arg, int mode, cred_t *cred,
+    int *rvalp)
+{
+	ibd_ioctl_t		cmd;
+	ibpart_ioctl_t		partioc;
+	ibport_ioctl_t		portioc;
+#ifdef _MULTI_DATAMODEL
+	ibport_ioctl32_t	portioc32;
+#endif
+	ibd_state_t		*state, *port_state;
+	int			size;
+	ibt_hca_portinfo_t 	*pinfop = NULL;
+	ibt_status_t 		ibt_status;
+	uint_t 			psize, pinfosz;
+	int			rval = 0;
+
+	size = sizeof (ibd_ioctl_t);
+	if (ddi_copyin((void *)arg, &cmd, size, mode)) {
+		return (EFAULT);
+	}
+	cmd.ioc_status = 0;
+	switch (cmd.ioc_info_cmd) {
+	case IBD_INFO_CMD_IBPART:
+		size = sizeof (ibpart_ioctl_t);
+		if (ddi_copyin((void *)arg, &partioc, size, mode)) {
+			return (EFAULT);
+		}
+
+		mutex_enter(&ibd_objlist_lock);
+		/* Find the ibd state structure corresponding the partition */
+		for (state = ibd_objlist_head; state; state = state->id_next) {
+			if (state->id_plinkid == cmd.ioc_linkid) {
+				break;
+			}
+		}
+
+		if (state == NULL) {
+			mutex_exit(&ibd_objlist_lock);
+			return (ENOENT);
+		}
+
+		partioc.ibdioc.ioc_linkid = state->id_dlinkid;
+		partioc.ibdioc.ioc_port_inst = state->id_port_inst;
+		partioc.ibdioc.ioc_portnum = state->id_port;
+		partioc.ibdioc.ioc_hcaguid = state->id_hca_guid;
+		partioc.ibdioc.ioc_portguid = state->id_port_guid;
+		partioc.ibdioc.ioc_status = 0;
+		partioc.ioc_partid = state->id_plinkid;
+		partioc.ioc_pkey = state->id_pkey;
+		partioc.ioc_force_create = state->id_force_create;
+		if (ddi_copyout((void *)&partioc, (void *)arg, size, mode)) {
+			mutex_exit(&ibd_objlist_lock);
+			return (EFAULT);
+		}
+		mutex_exit(&ibd_objlist_lock);
+
+		break;
+
+	case IBD_INFO_CMD_IBPORT:
+		if ((cmd.ioc_port_inst < 0) || ((port_state =
+		    ddi_get_soft_state(ibd_list, cmd.ioc_port_inst)) == NULL)) {
+			DPRINT(10, "ibd_create_partition: failed to get"
+			    " state %d", cmd.ioc_port_inst);
+			size = sizeof (ibd_ioctl_t);
+			cmd.ioc_status = IBD_INVALID_PORT_INST;
+			if (ddi_copyout((void *)&cmd, (void *)arg, size,
+			    mode)) {
+				return (EFAULT);
+			}
+			return (EINVAL);
+		}
+		ibt_status = ibt_query_hca_ports(port_state->id_hca_hdl,
+		    port_state->id_port, &pinfop, &psize, &pinfosz);
+		if ((ibt_status != IBT_SUCCESS) || (psize != 1)) {
+			return (EINVAL);
+		}
+#ifdef _MULTI_DATAMODEL
+		switch (ddi_model_convert_from(mode & FMODELS)) {
+		case DDI_MODEL_ILP32: {
+			size = sizeof (ibport_ioctl32_t);
+			if (ddi_copyin((void *)arg, &portioc32, size, mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			portioc32.ibdioc.ioc_status = 0;
+			portioc32.ibdioc.ioc_portnum = port_state->id_port;
+			portioc32.ibdioc.ioc_hcaguid =
+			    port_state->id_hca_guid;
+			portioc32.ibdioc.ioc_portguid =
+			    port_state->id_port_guid;
+			if (portioc32.ioc_pkey_tbl_sz !=
+			    pinfop->p_pkey_tbl_sz) {
+				rval = EINVAL;
+				size = sizeof (ibd_ioctl_t);
+				portioc32.ibdioc.ioc_status =
+				    IBD_INVALID_PKEY_TBL_SIZE;
+				if (ddi_copyout((void *)&portioc32.ibdioc,
+				    (void *)arg, size, mode)) {
+					rval = EFAULT;
+					goto fail;
+				}
+				goto fail;
+			}
+			size = pinfop->p_pkey_tbl_sz * sizeof (ib_pkey_t);
+			if (ddi_copyout((void *)pinfop->p_pkey_tbl,
+			    (void *)(uintptr_t)portioc32.ioc_pkeys, size,
+			    mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			size = sizeof (ibport_ioctl32_t);
+			if (ddi_copyout((void *)&portioc32, (void *)arg, size,
+			    mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			break;
+		}
+		case DDI_MODEL_NONE:
+			size = sizeof (ibport_ioctl_t);
+			if (ddi_copyin((void *)arg, &portioc, size, mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			portioc.ibdioc.ioc_status = 0;
+			portioc.ibdioc.ioc_portnum = port_state->id_port;
+			portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
+			portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
+			if (portioc.ioc_pkey_tbl_sz != pinfop->p_pkey_tbl_sz) {
+				rval = EINVAL;
+				size = sizeof (ibd_ioctl_t);
+				portioc.ibdioc.ioc_status =
+				    IBD_INVALID_PKEY_TBL_SIZE;
+				if (ddi_copyout((void *)&portioc.ibdioc,
+				    (void *)arg, size, mode)) {
+					rval = EFAULT;
+					goto fail;
+				}
+				goto fail;
+			}
+			size = pinfop->p_pkey_tbl_sz * sizeof (ib_pkey_t);
+			if (ddi_copyout((void *)pinfop->p_pkey_tbl,
+			    (void *)(portioc.ioc_pkeys), size, mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			size = sizeof (ibport_ioctl_t);
+			if (ddi_copyout((void *)&portioc, (void *)arg, size,
+			    mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			break;
+		}
+#else /* ! _MULTI_DATAMODEL */
+		size = sizeof (ibport_ioctl_t);
+		if (ddi_copyin((void *)arg, &portioc, size, mode)) {
+			rval = EFAULT;
+			goto fail;
+		}
+		portioc.ibdioc.ioc_status = 0;
+		portioc.ibdioc.ioc_portnum = port_state->id_port;
+		portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
+		portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
+		if (portioc.ioc_pkey_tbl_sz != pinfop->p_pkey_tbl_sz) {
+			rval = EINVAL;
+			size = sizeof (ibd_ioctl_t);
+			portioc.ibdioc.ioc_status = IBD_INVALID_PKEY_TBL_SIZE;
+			if (ddi_copyout((void *)&portioc.ibdioc, (void *)arg,
+			    size, mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			goto fail;
+		}
+		size = pinfop->p_pkey_tbl_sz * sizeof (ib_pkey_t);
+		if (ddi_copyout((void *)pinfop->p_pkey_tbl,
+		    (void *)(portioc.ioc_pkeys), size, mode)) {
+			rval = EFAULT;
+			goto fail;
+		}
+		size = sizeof (ibport_ioctl_t);
+		if (ddi_copyout((void *)&portioc, (void *)arg, size,
+		    mode)) {
+			rval = EFAULT;
+			goto fail;
+		}
+#endif /* _MULTI_DATAMODEL */
+
+		break;
+
+	case IBD_INFO_CMD_PKEYTBLSZ:
+		if ((cmd.ioc_port_inst < 0) || ((port_state =
+		    ddi_get_soft_state(ibd_list, cmd.ioc_port_inst)) == NULL)) {
+			DPRINT(10, "ibd_create_partition: failed to get"
+			    " state %d", cmd.ioc_port_inst);
+			size = sizeof (ibd_ioctl_t);
+			cmd.ioc_status = IBD_INVALID_PORT_INST;
+			if (ddi_copyout((void *)&cmd, (void *)arg, size,
+			    mode)) {
+				return (EFAULT);
+			}
+			return (EINVAL);
+		}
+		ibt_status = ibt_query_hca_ports(port_state->id_hca_hdl,
+		    port_state->id_port, &pinfop, &psize, &pinfosz);
+		if ((ibt_status != IBT_SUCCESS) || (psize != 1)) {
+			return (EINVAL);
+		}
+#ifdef _MULTI_DATAMODEL
+		switch (ddi_model_convert_from(mode & FMODELS)) {
+		case DDI_MODEL_ILP32: {
+			size = sizeof (ibport_ioctl32_t);
+			if (ddi_copyin((void *)arg, &portioc32, size, mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			portioc32.ibdioc.ioc_status = 0;
+			portioc32.ibdioc.ioc_portnum = port_state->id_port;
+			portioc32.ibdioc.ioc_hcaguid =
+			    port_state->id_hca_guid;
+			portioc32.ibdioc.ioc_portguid =
+			    port_state->id_port_guid;
+			portioc32.ioc_pkey_tbl_sz = pinfop->p_pkey_tbl_sz;
+			if (ddi_copyout((void *)&portioc32, (void *)arg, size,
+			    mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			break;
+		}
+		case DDI_MODEL_NONE:
+			size = sizeof (ibport_ioctl_t);
+			if (ddi_copyin((void *)arg, &portioc, size, mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			portioc.ibdioc.ioc_status = 0;
+			portioc.ibdioc.ioc_portnum = port_state->id_port;
+			portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
+			portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
+			portioc.ioc_pkey_tbl_sz = pinfop->p_pkey_tbl_sz;
+			if (ddi_copyout((void *)&portioc, (void *)arg, size,
+			    mode)) {
+				rval = EFAULT;
+				goto fail;
+			}
+			break;
+		}
+#else /* ! _MULTI_DATAMODEL */
+		size = sizeof (ibport_ioctl_t);
+		if (ddi_copyin((void *)arg, &portioc, size, mode)) {
+			rval = EFAULT;
+			goto fail;
+		}
+		portioc.ibdioc.ioc_status = 0;
+		portioc.ibdioc.ioc_portnum = port_state->id_port;
+		portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
+		portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
+		portioc.ioc_pkey_tbl_sz = pinfop->p_pkey_tbl_sz;
+		if (ddi_copyout((void *)&portioc, (void *)arg, size,
+		    mode)) {
+			rval = EFAULT;
+			goto fail;
+		}
+#endif /* _MULTI_DATAMODEL */
+		break;
+
+	default:
+		return (EINVAL);
+
+	} /* switch (cmd.ioc_info_cmd) */
+fail:
+	if (pinfop) {
+		ibt_free_portinfo(pinfop, pinfosz);
+	}
+	return (rval);
+}
+
+/* ARGSUSED */
+static void
+ibdpd_async_handler(void *arg, ibt_hca_hdl_t hca_hdl,
+    ibt_async_code_t code, ibt_async_event_t *event)
+{
+	ibd_state_t *state = (ibd_state_t *)arg;
+	link_state_t	lstate;
+
+	switch (code) {
+	case IBT_EVENT_PORT_UP:
+	case IBT_ERROR_PORT_DOWN:
+		if (ibd_get_port_state(state, &lstate) != 0)
+			break;
+
+		if (state->id_link_state != lstate) {
+			state->id_link_state = lstate;
+			mac_link_update(state->id_mh, lstate);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static int
+ibd_get_port_state(ibd_state_t *state, link_state_t *lstate)
+{
+	ibt_hca_portinfo_t *port_infop;
+	uint_t psize, port_infosz;
+	ibt_status_t	ret;
+
+	ret = ibt_query_hca_ports(state->id_hca_hdl, state->id_port,
+	    &port_infop, &psize, &port_infosz);
+	if ((ret != IBT_SUCCESS) || (psize != 1))
+		return (-1);
+
+	state->id_sgid = *port_infop->p_sgid_tbl;
+	state->id_link_speed = ibd_get_portspeed(state);
+
+	if (port_infop->p_linkstate == IBT_PORT_ACTIVE)
+		*lstate = LINK_STATE_UP;
+	else
+		*lstate = LINK_STATE_DOWN;
+
+	ibt_free_portinfo(port_infop, port_infosz);
+	return (0);
+}
+
+static int
+ibd_port_attach(dev_info_t *dip)
+{
+	ibd_state_t		*state;
+	link_state_t		lstate;
+	int			instance;
+	ibt_status_t		ret;
+
+	/*
+	 * Allocate softstate structure
+	 */
+	instance = ddi_get_instance(dip);
+	if (ddi_soft_state_zalloc(ibd_list, instance) == DDI_FAILURE) {
+		DPRINT(10, "ibd_attach: ddi_soft_state_zalloc() failed");
+		return (DDI_FAILURE);
+	}
+
+	state = ddi_get_soft_state(ibd_list, instance);
+
+	state->id_dip = dip;
+	state->id_type = IBD_PORT_DRIVER;
+
+	if ((state->id_port = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
+	    "port-number", 0)) == 0) {
+		DPRINT(10, "ibd_attach: invalid port number (%d)",
+		    state->id_port);
+		return (DDI_FAILURE);
+	}
+	if ((state->id_hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
+	    "hca-guid", 0)) == 0) {
+		DPRINT(10, "ibd_attach: hca has invalid guid (0x%llx)",
+		    state->id_hca_guid);
+		return (DDI_FAILURE);
+	}
+	if ((state->id_port_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
+	    "port-guid", 0)) == 0) {
+		DPRINT(10, "ibd_attach: port has invalid guid (0x%llx)",
+		    state->id_port_guid);
+		return (DDI_FAILURE);
+	}
+
+	/*
+	 * Attach to IBTL
+	 */
+	if ((ret = ibt_attach(&ibdpd_clnt_modinfo, dip, state,
+	    &state->id_ibt_hdl)) != IBT_SUCCESS) {
+		DPRINT(10, "ibd_attach: failed in ibt_attach(), ret=%d", ret);
+		goto done;
+	}
+
+	state->id_mac_state |= IBD_DRV_IBTL_ATTACH_DONE;
+
+	if ((ret = ibt_open_hca(state->id_ibt_hdl, state->id_hca_guid,
+	    &state->id_hca_hdl)) != IBT_SUCCESS) {
+		DPRINT(10, "ibd_attach: ibt_open_hca() failed, ret=%d", ret);
+		goto done;
+	}
+	state->id_mac_state |= IBD_DRV_HCA_OPENED;
+
+	/* Update link status */
+
+	if (ibd_get_port_state(state, &lstate) != 0) {
+		DPRINT(10, "ibd_attach: ibt_open_hca() failed, ret=%d", ret);
+		goto done;
+	}
+	state->id_link_state = lstate;
+	/*
+	 * Register ibd interfaces with the Nemo framework
+	 */
+	if (ibd_register_mac(state, dip) != IBT_SUCCESS) {
+		DPRINT(10, "ibd_attach: failed in ibd_register_mac()");
+		goto done;
+	}
+	state->id_mac_state |= IBD_DRV_MAC_REGISTERED;
+
+	mac_link_update(state->id_mh, lstate);
+
+	return (DDI_SUCCESS);
+done:
+	(void) ibd_port_unattach(state, dip);
+	return (DDI_FAILURE);
+}
+
+static int
+ibd_port_unattach(ibd_state_t *state, dev_info_t *dip)
+{
+	int instance;
+	uint32_t progress = state->id_mac_state;
+	ibt_status_t ret;
+
+	if (progress & IBD_DRV_MAC_REGISTERED) {
+		(void) mac_unregister(state->id_mh);
+		state->id_mac_state &= (~IBD_DRV_MAC_REGISTERED);
+	}
+
+	if (progress & IBD_DRV_HCA_OPENED) {
+		if ((ret = ibt_close_hca(state->id_hca_hdl)) !=
+		    IBT_SUCCESS) {
+			ibd_print_warn(state, "failed to close "
+			    "HCA device, ret=%d", ret);
+		}
+		state->id_hca_hdl = NULL;
+		state->id_mac_state &= (~IBD_DRV_HCA_OPENED);
+	}
+
+	if (progress & IBD_DRV_IBTL_ATTACH_DONE) {
+		if ((ret = ibt_detach(state->id_ibt_hdl)) != IBT_SUCCESS) {
+			ibd_print_warn(state,
+			    "ibt_detach() failed, ret=%d", ret);
+		}
+		state->id_ibt_hdl = NULL;
+		state->id_mac_state &= (~IBD_DRV_IBTL_ATTACH_DONE);
+	}
+	instance = ddi_get_instance(dip);
+	ddi_soft_state_free(ibd_list, instance);
+
+	return (DDI_SUCCESS);
+}
+
+ibt_status_t
+ibd_get_part_attr(datalink_id_t linkid, ibt_part_attr_t *attr)
+{
+	ibd_state_t	*state;
+
+	mutex_enter(&ibd_objlist_lock);
+
+	/* Find the ibd state structure corresponding the partition */
+	for (state = ibd_objlist_head; state; state = state->id_next) {
+		if (state->id_plinkid == linkid) {
+			break;
+		}
+	}
+
+	if (state == NULL) {
+		mutex_exit(&ibd_objlist_lock);
+		return (IBT_NO_SUCH_OBJECT);
+	}
+
+	attr->pa_dlinkid = state->id_dlinkid;
+	attr->pa_plinkid = state->id_plinkid;
+	attr->pa_port = state->id_port;
+	attr->pa_hca_guid = state->id_hca_guid;
+	attr->pa_port_guid = state->id_port_guid;
+	attr->pa_pkey = state->id_pkey;
+
+	mutex_exit(&ibd_objlist_lock);
+
+	return (IBT_SUCCESS);
+}
+
+ibt_status_t
+ibd_get_all_part_attr(ibt_part_attr_t **attr_list, int *nparts)
+{
+	ibd_state_t	*state;
+	int		n = 0;
+	ibt_part_attr_t	*attr;
+
+	mutex_enter(&ibd_objlist_lock);
+
+	for (state = ibd_objlist_head; state; state = state->id_next)
+		n++;
+
+	*nparts = n;
+	if (n == 0) {
+		*attr_list = NULL;
+		mutex_exit(&ibd_objlist_lock);
+		return (IBT_SUCCESS);
+	}
+
+	*attr_list = kmem_alloc(sizeof (ibt_part_attr_t) * n, KM_SLEEP);
+	attr = *attr_list;
+	for (state = ibd_objlist_head; state; state = state->id_next) {
+#ifdef DEBUG
+		ASSERT(n > 0);
+		n--;
+#endif
+		attr->pa_dlinkid = state->id_dlinkid;
+		attr->pa_plinkid = state->id_plinkid;
+		attr->pa_port = state->id_port;
+		attr->pa_hca_guid = state->id_hca_guid;
+		attr->pa_port_guid = state->id_port_guid;
+		attr->pa_pkey = state->id_pkey;
+		attr++;
+	}
+
+	mutex_exit(&ibd_objlist_lock);
+	return (IBT_SUCCESS);
+}
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd.conf	Wed Apr 14 10:17:23 2010 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#########################################################################
-#
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-# Configuration file for the ibd driver.
-#
-
-#
-# Reliable Connected mode (RC) can be enabled or disabled using
-# enable_rc property.
-#
-# 1: unicast packets will be sent over Reliable Connected Mode
-# 0: unicast packets will be sent over Unreliable Datagram Mode
-#
-# Each element in the list below maps to the corresponding ibd
-# instance; the first element is for ibd instance 0, the second
-# element is for instance 1 and so on.
-#
-enable_rc=1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1;
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd_cm.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/clients/ibd/ibd_cm.c	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  */
 /* Copyright (c) 1990 Mentat Inc. */
 
@@ -55,72 +54,7 @@
 #include <sys/ib/clients/ibd/ibd.h>
 
 extern ibd_global_state_t ibd_gstate;
-
-/* Per-interface tunables (for developers) */
-extern uint_t ibd_rc_tx_copy_thresh;
-/*
- * ibd_rc_rx_copy_thresh
- *     If (the size of incoming buffer <= ibd_rc_rx_copy_thresh), ibd will
- * attempt to allocate a buffer and do a bcopy of the incoming data into
- * the alocated buffer.
- *
- * ibd_rc_rx_rwqe_thresh
- *     If (the number of available rwqe < ibd_rc_rx_rwqe_thresh), ibd will
- * attempt to allocate a buffer and do a bcopy of the incoming data into
- * the allocated buffer.
- */
-uint_t ibd_rc_rx_copy_thresh = 0x1000;
-uint_t ibd_rc_rx_rwqe_thresh = 0x200;	/* old is 32; */
-
-/*
- * ibd_rc_num_swqe
- *	1) Send CQ size = ibd_rc_num_swqe
- *	2) The send queue size = ibd_rc_num_swqe -1
- *	3) Number of pre-allocated Tx buffers for ibt_post_send() =
- * ibd_rc_num_swqe - 1.
- */
-uint_t ibd_rc_num_swqe = 0x1ff;
-
-/*
- * ibd_rc_num_rwqe
- *	1) For non-SRQ, we pre-post ibd_rc_num_rwqe number of WRs
- * via ibt_post_receive() for receive queue of each RC channel.
- *	2) For SRQ and non-SRQ, receive CQ size = ibd_rc_num_rwqe
- */
-uint_t ibd_rc_num_rwqe = 0x7ff;
-
-/*
- * For SRQ
- *	If using SRQ, we allocate ibd_rc_num_srq number of buffers (the size of
- * each buffer is equal to RC mtu). And post them by ibt_post_srq().
- *
- *	ibd_rc_num_srq should not be larger than ibd_rc_num_rwqe, otherwise
- * it will cause a bug with the following warnings:
- * NOTICE: hermon0: Device Error: EQE cq overrun or protection error
- * NOTICE: hermon0: Device Error: EQE local work queue catastrophic error
- * NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff catastrophic
- * channel error
- * NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff completion queue
- * error
- */
-uint_t ibd_rc_num_srq = 0x7fe;
-
-boolean_t ibd_rc_enable_cq_moderation = B_TRUE;
-
-/*
- * Send CQ moderation parameters
- */
-uint_t ibd_rc_txcomp_count = 10;
-uint_t ibd_rc_txcomp_usec = 300;
-
-/*
- * Receive CQ moderation parameters
- */
-uint_t ibd_rc_rxcomp_count = 4;
-uint_t ibd_rc_rxcomp_usec = 10;
-
 uint_t ibd_rc_tx_softintr = 1;
-
 /*
  * If the number of WRs in receive queue of each RC connection less than
  * IBD_RC_RX_WR_THRESHOLD, we will post more receive WRs into it.
@@ -362,47 +296,6 @@
 	mutex_exit(&ace->tx_too_big_mutex);
 }
 
-void
-ibd_rc_get_conf(ibd_state_t *state)
-{
-	int *props;
-	uint_t num_props;
-	int instance;
-
-	instance = ddi_get_instance(state->id_dip);
-
-	/*
-	 * Get the array of "enable_rc" properties from "ibd.conf" file
-	 */
-	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->id_dip,
-	    DDI_PROP_DONTPASS, "enable_rc", &props, &num_props)
-	    == DDI_PROP_SUCCESS) {
-		if (instance < num_props) {
-			if (props[instance] == 1) {
-				state->id_enable_rc = B_TRUE;
-			} else {
-				state->id_enable_rc = B_FALSE;
-			}
-		} else {
-			/* not enough properties configured */
-			state->id_enable_rc = B_FALSE;
-			DPRINT(40, "ibd_rc_get_conf: Not enough "
-			    "enable_rc values in ibd.conf,"
-			    " disable RC mode, instance=%d", instance);
-		}
-
-		/* free memory allocated for properties */
-		ddi_prop_free(props);
-	} else {
-		state->id_enable_rc = B_FALSE;
-		DPRINT(30, "ibd_rc_get_conf: fail to find "
-		    "enable_rc in ibd.conf, disable RC mode");
-	}
-
-	state->rc_mtu = 65524;
-	state->rc_enable_srq = B_TRUE;
-}
-
 #ifdef DEBUG
 /*
  * ibd_rc_update_stats - update driver private kstat counters
@@ -479,12 +372,15 @@
 {
 	kstat_t *ksp;
 	ibd_rc_stat_t *ibd_rc_ksp;
+	char stat_name[32];
+	int inst;
 
 	/*
 	 * Create and init kstat
 	 */
-	ksp = kstat_create("ibd", ddi_get_instance(state->id_dip),
-	    "statistics", "net", KSTAT_TYPE_NAMED,
+	inst = ddi_get_instance(state->id_dip);
+	(void) snprintf(stat_name, 31, "statistics%d_%x", inst, state->id_pkey);
+	ksp = kstat_create("ibd", 0, stat_name, "net", KSTAT_TYPE_NAMED,
 	    sizeof (ibd_rc_stat_t) / sizeof (kstat_named_t), 0);
 
 	if (ksp == NULL) {
@@ -611,11 +507,11 @@
 
 	/* Allocate IB structures for a new RC channel. */
 	if (is_tx_chan) {
-		chan->scq_size = ibd_rc_num_swqe;
+		chan->scq_size = state->id_rc_num_swqe;
 		chan->rcq_size = IBD_RC_MIN_CQ_SIZE;
 	} else {
 		chan->scq_size = IBD_RC_MIN_CQ_SIZE;
-		chan->rcq_size = ibd_rc_num_rwqe;
+		chan->rcq_size = state->id_rc_num_rwqe;
 	}
 	cq_atts.cq_size = chan->scq_size;
 	cq_atts.cq_sched = NULL;
@@ -629,12 +525,10 @@
 		goto alloc_scq_err;
 	}	/* if failure to alloc cq */
 
-	if (ibd_rc_enable_cq_moderation) {
-		if (ibt_modify_cq(chan->scq_hdl, ibd_rc_txcomp_count,
-		    ibd_rc_txcomp_usec, 0) != IBT_SUCCESS) {
-			ibd_print_warn(state, "ibd_rc_alloc_chan: Send CQ "
-			    "interrupt moderation failed");
-		}
+	if (ibt_modify_cq(chan->scq_hdl, state->id_rc_tx_comp_count,
+	    state->id_rc_tx_comp_usec, 0) != IBT_SUCCESS) {
+		ibd_print_warn(state, "ibd_rc_alloc_chan: Send CQ "
+		    "interrupt moderation failed");
 	}
 
 	ibt_set_cq_private(chan->scq_hdl, (void *) (uintptr_t)chan);
@@ -652,13 +546,12 @@
 		goto alloc_rcq_err;
 	}	/* if failure to alloc cq */
 
-	if (ibd_rc_enable_cq_moderation) {
-		if (ibt_modify_cq(chan->rcq_hdl, ibd_rc_rxcomp_count,
-		    ibd_rc_rxcomp_usec, 0) != IBT_SUCCESS) {
-			ibd_print_warn(state, "ibd_rc_alloc_chan: Receive CQ "
-			    "interrupt moderation failed");
-		}
+	if (ibt_modify_cq(chan->rcq_hdl, state->id_rc_rx_comp_count,
+	    state->id_rc_rx_comp_usec, 0) != IBT_SUCCESS) {
+		ibd_print_warn(state, "ibd_rc_alloc_chan: Receive CQ "
+		    "interrupt moderation failed");
 	}
+
 	ibt_set_cq_private(chan->rcq_hdl, (void *) (uintptr_t)chan);
 	ibt_set_cq_handler(chan->rcq_hdl, ibd_rc_rcq_handler,
 	    (void *)(uintptr_t)chan);
@@ -978,7 +871,7 @@
 	ibt_status_t ret;
 
 	srq_sizes.srq_sgl_sz = 1;
-	srq_sizes.srq_wr_sz = ibd_rc_num_srq;
+	srq_sizes.srq_wr_sz = state->id_rc_num_srq;
 	ret = ibt_alloc_srq(state->id_hca_hdl, IBT_SRQ_NO_FLAGS,
 	    state->id_pd_hdl, &srq_sizes, &state->rc_srq_hdl, &srq_real_sizes);
 	if (ret != IBT_SUCCESS) {
@@ -1443,7 +1336,7 @@
 
 
 #ifdef DEBUG
-	if (rxcnt < ibd_rc_rx_rwqe_thresh) {
+	if (rxcnt < state->id_rc_rx_rwqe_thresh) {
 		state->rc_rwqe_short++;
 	}
 #endif
@@ -1451,8 +1344,8 @@
 	/*
 	 * Possibly replenish the Rx pool if needed.
 	 */
-	if ((rxcnt >= ibd_rc_rx_rwqe_thresh) &&
-	    (wc->wc_bytes_xfer > ibd_rc_rx_copy_thresh)) {
+	if ((rxcnt >= state->id_rc_rx_rwqe_thresh) &&
+	    (wc->wc_bytes_xfer > state->id_rc_rx_copy_thresh)) {
 		atomic_add_64(&state->rc_rcv_trans_byte, wc->wc_bytes_xfer);
 		atomic_inc_64(&state->rc_rcv_trans_pkt);
 
@@ -1758,7 +1651,7 @@
 	size_t  mem_size;
 	int i;
 
-	num_swqe = ibd_rc_num_swqe - 1;
+	num_swqe = state->id_rc_num_swqe - 1;
 
 	/*
 	 * Allocate one big chunk for all Tx large copy bufs
@@ -1814,7 +1707,7 @@
 {
 	uint32_t num_swqe;
 
-	num_swqe = ibd_rc_num_swqe - 1;
+	num_swqe = state->id_rc_num_swqe - 1;
 
 	if (ibt_deregister_mr(state->id_hca_hdl,
 	    state->rc_tx_mr_hdl) != IBT_SUCCESS) {
@@ -1843,7 +1736,7 @@
 	/*
 	 * Allocate one big chunk for all regular tx copy bufs
 	 */
-	mem_attr.mr_len = chan->scq_size * ibd_rc_tx_copy_thresh;
+	mem_attr.mr_len = chan->scq_size * state->id_rc_tx_copy_thresh;
 
 	chan->tx_mr_bufs = kmem_zalloc(mem_attr.mr_len, KM_SLEEP);
 
@@ -1857,7 +1750,7 @@
 	    &chan->tx_mr_hdl, &chan->tx_mr_desc) != IBT_SUCCESS) {
 		DPRINT(40, "ibd_rc_alloc_tx_copybufs: ibt_register_mr failed");
 		ASSERT(mem_attr.mr_len ==
-		    chan->scq_size * ibd_rc_tx_copy_thresh);
+		    chan->scq_size * state->id_rc_tx_copy_thresh);
 		kmem_free(chan->tx_mr_bufs, mem_attr.mr_len);
 		chan->tx_mr_bufs = NULL;
 		return (DDI_FAILURE);
@@ -1875,6 +1768,7 @@
 	ibd_swqe_t *swqe;
 	int i;
 	ibt_lkey_t lkey;
+	ibd_state_t *state = chan->state;
 
 	if (ibd_rc_alloc_tx_copybufs(chan) != DDI_SUCCESS)
 		return (DDI_FAILURE);
@@ -1896,7 +1790,7 @@
 		swqe->w_swr.wr_id = (ibt_wrid_t)(uintptr_t)swqe;
 		swqe->w_swr.wr_flags = IBT_WR_SEND_SIGNAL;
 		swqe->swqe_copybuf.ic_sgl.ds_va = (ib_vaddr_t)(uintptr_t)
-		    (chan->tx_mr_bufs + i * ibd_rc_tx_copy_thresh);
+		    (chan->tx_mr_bufs + i * state->id_rc_tx_copy_thresh);
 		swqe->w_swr.wr_trans = IBT_RC_SRV;
 
 		/* Add to list */
@@ -1916,6 +1810,7 @@
 static void
 ibd_rc_fini_txlist(ibd_rc_chan_t *chan)
 {
+	ibd_state_t *state = chan->state;
 	if (chan->tx_mr_hdl != NULL) {
 		if (ibt_deregister_mr(chan->state->id_hca_hdl,
 		    chan->tx_mr_hdl) != IBT_SUCCESS) {
@@ -1927,7 +1822,7 @@
 
 	if (chan->tx_mr_bufs != NULL) {
 		kmem_free(chan->tx_mr_bufs, chan->scq_size *
-		    ibd_rc_tx_copy_thresh);
+		    state->id_rc_tx_copy_thresh);
 		chan->tx_mr_bufs = NULL;
 	}
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/clients/ibd/ibp.conf	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,25 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+# Configuration file for the ibp driver.
+#
--- a/usr/src/uts/common/io/ib/ibnex/ib.conf	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/ibnex/ib.conf	Wed Apr 14 10:26:18 2010 -0700
@@ -2,9 +2,8 @@
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -20,10 +19,8 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
 #
 # Configuration file for the IB nexus driver
 #
@@ -75,6 +72,6 @@
 # hca-svc-list="nfs1", "nfs2";
 #
 #
-port-svc-list="";
-vppa-svc-list="ipib";
+port-svc-list="ipib";
+vppa-svc-list="";
 hca-svc-list="";
--- a/usr/src/uts/common/io/ib/ibnex/ibnex.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/ibnex/ibnex.c	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -61,9 +60,9 @@
 static int		ibnex_getinfo(dev_info_t *, ddi_info_cmd_t,
 			    void *, void **);
 static int		ibnex_detach(dev_info_t *, ddi_detach_cmd_t);
-static int		ibnex_busctl(dev_info_t *,
+int			ibnex_busctl(dev_info_t *,
 			    dev_info_t *, ddi_ctl_enum_t, void *, void *);
-static int		ibnex_map_fault(dev_info_t *,
+int			ibnex_map_fault(dev_info_t *,
 			    dev_info_t *, struct hat *, struct seg *,
 			    caddr_t, struct devpage *, pfn_t, uint_t, uint_t);
 static int		ibnex_init_child(dev_info_t *);
@@ -75,6 +74,8 @@
 static void		ibnex_delete_port_node_data(ibnex_node_data_t *);
 int			ibnex_get_dip_from_guid(ib_guid_t, int,
 			    ib_pkey_t, dev_info_t **);
+int 			ibnex_get_node_and_dip_from_guid(ib_guid_t, int,
+			    ib_pkey_t, ibnex_node_data_t **, dev_info_t **);
 static ibnex_node_data_t *ibnex_is_node_data_present(ibnex_node_type_t,
 			    void *, int, ib_pkey_t);
 static ibnex_node_data_t *ibnex_init_child_nodedata(ibnex_node_type_t, void *,
@@ -100,17 +101,15 @@
 			    ddi_bus_config_op_t, void *, dev_info_t **);
 static int		ibnex_bus_unconfig(dev_info_t *,
 			    uint_t, ddi_bus_config_op_t, void *);
-static dev_info_t	*ibnex_config_port_node(dev_info_t *, char *);
-static dev_info_t	*ibnex_config_obp_args(dev_info_t *, char *);
-static int		ibnex_get_pkey_commsvc_index_portnum(
+dev_info_t	*ibnex_config_port_node(dev_info_t *, char *);
+int		ibnex_get_pkey_commsvc_index_portnum(
 			    char *, int *, ib_pkey_t *, uint8_t *);
-static void		ibnex_config_all_children(dev_info_t *);
+void		ibnex_config_all_children(dev_info_t *);
 static int		ibnex_devname_to_portnum(char *, uint8_t *);
-static void		ibnex_create_vppa_nodes(
+void		ibnex_create_vppa_nodes(dev_info_t *, ibdm_port_attr_t *);
+void		ibnex_create_port_nodes(
 			    dev_info_t *, ibdm_port_attr_t *);
-static void		ibnex_create_port_nodes(
-			    dev_info_t *, ibdm_port_attr_t *);
-static void		ibnex_create_hcasvc_nodes(
+void		ibnex_create_hcasvc_nodes(
 			    dev_info_t *, ibdm_port_attr_t *);
 static int		ibnex_config_root_iocnode(dev_info_t *, char *);
 static int		ibnex_devname2port(char *, int *);
@@ -127,14 +126,12 @@
 static int		ibnex_create_ioc_compatible_prop(
 			    dev_info_t *, ib_dm_ioc_ctrl_profile_t *);
 uint64_t		ibnex_str2hex(char *, int, int *);
-static int		ibnex_str2int(char *, int, int *);
+int		ibnex_str2int(char *, int, int *);
 static int		ibnex_create_ioc_portgid_prop(
 			    dev_info_t *, ibdm_ioc_info_t *);
 static void		ibnex_wakeup_reprobe_ioc(ibnex_node_data_t *, int);
 static void		ibnex_wakeup_reprobe_all();
 ibt_status_t		ibnex_ibtl_callback(ibtl_ibnex_cb_args_t *);
-static int		ibnex_prom_devname_to_pkey_n_portnum(
-			    char *, ib_pkey_t *, uint8_t *);
 void			ibnex_pseudo_initnodes(void);
 static char		*ibnex_lookup_named_prop(ddi_prop_t *, char *);
 static void		ibnex_pseudo_node_cleanup(void);
@@ -161,12 +158,12 @@
 int			ibnex_pseudo_create_all_pi(ibnex_node_data_t *);
 static int		ibnex_pseudo_create_pi_pdip(ibnex_node_data_t *,
 			    dev_info_t *);
-static int		ibnex_pseudo_config_one(
+int		ibnex_pseudo_config_one(
 			    ibnex_node_data_t *, char *, dev_info_t *);
-static int		ibnex_pseudo_mdi_config_one(int, void *, dev_info_t **,
+int		ibnex_pseudo_mdi_config_one(int, void *, dev_info_t **,
 			    char *, char *);
 static void		ibnex_config_pseudo_all(dev_info_t *);
-static int		ibnex_ioc_bus_config_one(dev_info_t **, uint_t,
+int		ibnex_ioc_bus_config_one(dev_info_t **, uint_t,
 			    ddi_bus_config_op_t, void *, dev_info_t **, int *);
 static int		ibnex_is_merge_node(dev_info_t *);
 static void		ibnex_hw_in_dev_tree(char *);
@@ -176,35 +173,9 @@
 static int		ibnex_ioc_pi_reachable(ibdm_ioc_info_t *,
     dev_info_t *);
 
-/*
- * The bus_ops structure defines the capabilities of HCA nexus driver.
- */
-struct bus_ops ibnex_ci_busops = {
-	BUSO_REV,
-	nullbusmap,		/* bus_map */
-	NULL,			/* bus_get_intrspec */
-	NULL,			/* bus_add_intrspec */
-	NULL,			/* bus_remove_intrspec */
-	ibnex_map_fault,	/* Map Fault */
-	ddi_no_dma_map,		/* DMA related entry points */
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	ibnex_busctl,		/* bus_ctl */
-	ddi_bus_prop_op,	/* bus_prop_op */
-	NULL,			/* bus_get_eventcookie	*/
-	NULL,			/* bus_add_eventcall	*/
-	NULL,			/* bus_remove_eventcall	*/
-	NULL,			/* bus_post_event	*/
-	NULL,
-	ibnex_bus_config,	/* bus config */
-	ibnex_bus_unconfig	/* bus unconfig */
-};
-
+extern void ibnex_handle_hca_attach(void *);
+
+extern struct bus_ops ibnex_ci_busops;
 /*
  * Prototype declarations for the VHCI options
  */
@@ -491,12 +462,27 @@
 	}
 	mutex_exit(&ibnex.ibnex_mutex);
 
+	/*
+	 * Create a IB nexus taskq
+	 */
+
+	ibnex.ibnex_taskq_id = ddi_taskq_create(dip,
+	    "ibnex-enum-taskq", 1, TASKQ_DEFAULTPRI, 0);
+	if (ibnex.ibnex_taskq_id == NULL) {
+		IBTF_DPRINTF_L2("ibnex",
+		    "\tattach: ddi_taskq_create() failed");
+		return (DDI_FAILURE);
+
+	}
+
 	/* Register with MPxIO framework */
 
 	if (mdi_vhci_register(MDI_HCI_CLASS_IB, dip, &ibnex_vhci_ops, 0)
 	    != MDI_SUCCESS) {
 		IBTF_DPRINTF_L2("ibnex",
 		    "\tattach: mdi_vhci_register() failed");
+		(void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+		ibnex.ibnex_taskq_id = NULL;
 		return (DDI_FAILURE);
 	}
 
@@ -510,6 +496,8 @@
 	    DDI_NT_IB_ATTACHMENT_POINT, 0) != DDI_SUCCESS) {
 		IBTF_DPRINTF_L2("ibnex",
 		    "\tattach: failed to create fabric minornode");
+		(void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+		ibnex.ibnex_taskq_id = NULL;
 		(void) mdi_vhci_unregister(dip, 0);
 		return (DDI_FAILURE);
 	}
@@ -523,6 +511,8 @@
 		IBTF_DPRINTF_L2("ibnex",
 		    "\tattach: failed to create devctl minornode");
 		(void) ddi_remove_minor_node(dip, NULL);
+		(void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+		ibnex.ibnex_taskq_id = NULL;
 		(void) mdi_vhci_unregister(dip, 0);
 		return (DDI_FAILURE);
 	}
@@ -536,6 +526,8 @@
 		IBTF_DPRINTF_L2("ibnex",
 		    "_attach: create pm-want-child-notification failed");
 		(void) ddi_remove_minor_node(dip, NULL);
+		(void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+		ibnex.ibnex_taskq_id = NULL;
 		(void) mdi_vhci_unregister(dip, 0);
 		return (DDI_FAILURE);
 	}
@@ -549,9 +541,11 @@
 	 */
 	if (ndi_event_alloc_hdl(dip, 0, &ibnex.ibnex_ndi_event_hdl,
 	    NDI_SLEEP) != NDI_SUCCESS) {
-		(void) ddi_remove_minor_node(dip, NULL);
 		IBTF_DPRINTF_L2("ibnex",
 		    "_attach: ndi_event_alloc_hdl failed");
+		(void) ddi_remove_minor_node(dip, NULL);
+		(void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+		ibnex.ibnex_taskq_id = NULL;
 		(void) mdi_vhci_unregister(dip, 0);
 		return (DDI_FAILURE);
 	}
@@ -561,6 +555,8 @@
 		(void) ndi_event_free_hdl(ibnex.ibnex_ndi_event_hdl);
 		IBTF_DPRINTF_L2("ibnex",
 		    "_attach: ndi_event_bind_set failed");
+		(void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+		ibnex.ibnex_taskq_id = NULL;
 		(void) mdi_vhci_unregister(dip, 0);
 		return (DDI_FAILURE);
 	}
@@ -577,6 +573,8 @@
 			ibnex.ibnex_ndi_event_hdl = NULL;
 			IBTF_DPRINTF_L2("ibnex", "_attach: ibnex_comm_svc_init"
 			    " failed %s", ibnex_properties[i].name);
+			(void) ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+			ibnex.ibnex_taskq_id = NULL;
 			(void) mdi_vhci_unregister(dip, 0);
 			return (DDI_FAILURE);
 		}
@@ -676,6 +674,12 @@
 	ibnex.ibnex_dip = NULL;
 	mutex_exit(&ibnex.ibnex_mutex);
 	(void) mdi_vhci_unregister(dip, 0);
+
+	if (ibnex.ibnex_taskq_id != NULL) {
+		ddi_taskq_destroy(ibnex.ibnex_taskq_id);
+		ibnex.ibnex_taskq_id = NULL;
+	}
+
 	return (DDI_SUCCESS);
 }
 
@@ -884,7 +888,7 @@
  *	such calls.
  */
 /*ARGSUSED*/
-static int
+int
 ibnex_map_fault(dev_info_t *dip, dev_info_t *rdip, struct hat *hat,
     struct seg *seg, caddr_t addr, struct devpage *dp, pfn_t pfn,
     uint_t prot, uint_t lock)
@@ -898,7 +902,7 @@
  * 	bus_ctl bus_ops entry point
  */
 /*ARGSUSED*/
-static int
+int
 ibnex_busctl(dev_info_t *dip, dev_info_t *rdip,
     ddi_ctl_enum_t ctlop, void *arg, void *result)
 {
@@ -1120,175 +1124,68 @@
  * BUS_CONFIG_DRIVER:
  *	Enumerate all the instances of a particular driver.
  */
+
 static int
 ibnex_bus_config(dev_info_t *parent, uint_t flag,
     ddi_bus_config_op_t op, void *devname, dev_info_t **child)
 {
 	int			ret = IBNEX_SUCCESS, len, circ, need_bus_config;
 	char 			*device_name, *cname = NULL, *caddr = NULL;
-	char			*device_name1;
-	char			*srvname, nameaddr[MAXNAMELEN];
-	dev_info_t		*cdip, *pdip = NULL;
+	dev_info_t		*cdip;
 	ibnex_node_data_t	*node_data;
-	ibnex_port_node_t	*port_node;
-	int			use_mdi_devi_locking = 0;
-
-	if (parent != ibnex.ibnex_dip) {
-		/*
-		 * This must be an HCA.In a normal case HCA is setup as a phci.
-		 * If an HCA is in maintenance mode, its phci is not set up
-		 * but the driver is attached to update the firmware. In this
-		 * case, do not configure the MPxIO clients.
-		 */
-		if (mdi_component_is_phci(parent, NULL) == MDI_FAILURE) {
-			if (op == BUS_CONFIG_ALL || op == BUS_CONFIG_DRIVER)
-				return (NDI_SUCCESS);
-			else
-				return (NDI_FAILURE);
-		}
-
-		/* Set use_mdi_devi_locking appropriately */
-		if ((op != BUS_CONFIG_ONE) || (op == BUS_CONFIG_ONE &&
-		    strncmp((char *)devname, IBNEX_IBPORT_CNAME, 6) != 0)) {
-			IBTF_DPRINTF_L4("ibnex",
-			    "\tbus_config: using mdi_devi_enter");
-			use_mdi_devi_locking = 1;
-		}
-	}
-
-	if (use_mdi_devi_locking)
-		mdi_devi_enter(parent, &circ);
-	else
-		ndi_devi_enter(parent, &circ);
 
 	switch (op) {
 	case BUS_CONFIG_ONE:
 		IBTF_DPRINTF_L4("ibnex", "\tbus_config: CONFIG_ONE, "
 		    "parent %p", parent);
 
+		ndi_devi_enter(parent, &circ);
+
 		len = strlen((char *)devname) + 1;
 		device_name = i_ddi_strdup(devname, KM_SLEEP);
 		i_ddi_parse_name(device_name, &cname, &caddr, NULL);
 
 		if (caddr == NULL || (strlen(caddr) == 0)) {
 			kmem_free(device_name, len);
-			if (use_mdi_devi_locking)
-				mdi_devi_exit(parent, circ);
-			else
-				ndi_devi_exit(parent, circ);
+			ndi_devi_exit(parent, circ);
 			return (NDI_FAILURE);
 		}
 
-		/*
-		 * i_ddi_parse_name() strips of the address portion
-		 * of the device name. Recreate device name for
-		 * ndi_devi_findchild
-		 */
-		device_name1 = i_ddi_strdup(devname, KM_SLEEP);
-
-		IBTF_DPRINTF_L4("ibnex",
-		    "\tbus_config: cname %s addr %s", cname, caddr);
-
-		cdip = ndi_devi_findchild(parent, device_name1);
+		cdip = ndi_devi_findchild(parent, devname);
 		if (cdip)
 			node_data = ddi_get_parent_data(cdip);
-		kmem_free(device_name1, len);
+
+		ndi_devi_exit(parent, circ);
+
 		if (cdip == NULL || (node_data != NULL &&
 		    node_data->node_dip == NULL)) {
 			/* Node is not present */
 			if (strncmp(cname, IBNEX_IOC_CNAME, 3) == 0) {
-				if (use_mdi_devi_locking)
-					mdi_devi_exit(parent, circ);
-				else
-					ndi_devi_exit(parent, circ);
-
 				ret = ibnex_ioc_bus_config_one(&parent, flag,
 				    op, devname, child, &need_bus_config);
 				if (!need_bus_config) {
 					kmem_free(device_name, len);
 					return (ret);
 				}
-
-				if (use_mdi_devi_locking)
-					mdi_devi_enter(parent, &circ);
-				else
-					ndi_devi_enter(parent, &circ);
-			} else if ((strncmp(cname,
-			    IBNEX_IBPORT_CNAME, 6) == 0) &&
-			    (parent != ibnex.ibnex_dip)) { /* parent is HCA */
-				cdip = ibnex_config_port_node(parent, devname);
-				if (cdip)
-					ret = IBNEX_SUCCESS;
-				else
-					ret = IBNEX_FAILURE;
 			} else {
 				/*
-				 * if not IOC or PORT device then always
-				 * assume a Pseudo child
-				 *
-				 * if IB Nexus is the parent, call MDI.
-				 * else if HCA is the parent, enumerate
-				 * the Pseudo node.
+				 * if IB Nexus is the parent, call MDI. Bus
+				 * config with HCA as the parent would have
+				 * enumerated the Pseudo node.
 				 */
 				ret = IBNEX_SUCCESS;
 				ibnex_pseudo_initnodes();
-				if (parent == ibnex.ibnex_dip) {
-					if (use_mdi_devi_locking)
-						mdi_devi_exit(parent, circ);
-					else
-						ndi_devi_exit(parent, circ);
-
-					mutex_enter(&ibnex.ibnex_mutex);
-					ret = ibnex_pseudo_mdi_config_one(
-					    flag, devname, child, cname,
-					    caddr);
-					mutex_exit(&ibnex.ibnex_mutex);
-					kmem_free(device_name, len);
-					return (ret);
-				}
 				mutex_enter(&ibnex.ibnex_mutex);
-				ret = ibnex_pseudo_config_one(NULL,
-				    caddr, parent);
+				ret = ibnex_pseudo_mdi_config_one(flag, devname,
+				    child, cname, caddr);
 				mutex_exit(&ibnex.ibnex_mutex);
+				kmem_free(device_name, len);
+				return (ret);
 			}
 		}
-
-		if (strncmp(cname, IBNEX_IBPORT_CNAME, 6) == 0) {
-			/* Allows enumeration under PHCI */
-			flag |= NDI_MDI_FALLBACK;
-		}
 		kmem_free(device_name, len);
 		break;
 
-	case BUS_CONFIG_OBP_ARGS:
-		cdip = ibnex_config_obp_args(parent, devname);
-		if (cdip) {
-			/*
-			 * Boot case.
-			 * Special handling because the "devname"
-			 * format for the enumerated device is
-			 * different.
-			 */
-			node_data = ddi_get_parent_data(cdip);
-			port_node = &node_data->node_data.port_node;
-			if (node_data->node_type ==
-			    IBNEX_VPPA_COMMSVC_NODE) {
-				srvname =
-				    ibnex.ibnex_vppa_comm_svc_names[
-				    port_node->port_commsvc_idx];
-				(void) snprintf(nameaddr, MAXNAMELEN,
-				    "ibport@%x,%x,%s",
-				    port_node->port_num,
-				    port_node->port_pkey, srvname);
-			}
-			devname = (void *)nameaddr;
-		} else {
-			IBTF_DPRINTF_L2("ibnex",
-			    "\tbus_config: CONFIG_OBP_ARGS : invalid state!!");
-
-			ret = IBNEX_FAILURE;
-		}
-		break;
 	case BUS_CONFIG_ALL:
 		/*FALLTHRU*/
 	case BUS_CONFIG_DRIVER:
@@ -1300,15 +1197,6 @@
 			    ", parent %p", parent);
 
 		/*
-		 * No locks to be held while calling mdi_vhci_bus_config()
-		 * ibnex_config_all_children() holds appropriate locks.
-		 */
-		if (use_mdi_devi_locking)
-			mdi_devi_exit(parent, circ);
-		else
-			ndi_devi_exit(parent, circ);
-
-		/*
 		 * Drive CONFIG requests for IB Nexus parent through
 		 * MDI. This is needed to load the HCA drivers in x86 SRP
 		 * boot case.
@@ -1316,77 +1204,55 @@
 		 * CONFIG Requests with HCA parent will probe devices using
 		 * ibdm and configure all children.
 		 */
-		if (parent == ibnex.ibnex_dip) {
-			ibdm_ioc_info_t	*ioc_list, *new_ioc_list;
-
-			mutex_enter(&ibnex.ibnex_mutex);
-			while (ibnex.ibnex_ioc_list_state !=
-			    IBNEX_IOC_LIST_READY) {
-				cv_wait(&ibnex.ibnex_ioc_list_cv,
-				    &ibnex.ibnex_mutex);
-			}
-			ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_RENEW;
-			mutex_exit(&ibnex.ibnex_mutex);
-			/* Enumerate all the IOC's */
-			ibdm_ibnex_port_settle_wait(0,
-			    ibnex_port_settling_time);
-
-			new_ioc_list = ibdm_ibnex_get_ioc_list(
-			    IBDM_IBNEX_NORMAL_PROBE);
+		ibdm_ioc_info_t	*ioc_list, *new_ioc_list;
+
+		mutex_enter(&ibnex.ibnex_mutex);
+		while (ibnex.ibnex_ioc_list_state !=
+		    IBNEX_IOC_LIST_READY) {
+			cv_wait(&ibnex.ibnex_ioc_list_cv,
+			    &ibnex.ibnex_mutex);
+		}
+		ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_RENEW;
+		mutex_exit(&ibnex.ibnex_mutex);
+		/* Enumerate all the IOC's */
+		ibdm_ibnex_port_settle_wait(0, ibnex_port_settling_time);
+
+		new_ioc_list = ibdm_ibnex_get_ioc_list(
+		    IBDM_IBNEX_NORMAL_PROBE);
+		IBTF_DPRINTF_L4("ibnex",
+		    "\tbus_config: alloc ioc_list %p", new_ioc_list);
+		/*
+		 * Optimize the calls for each BUS_CONFIG_ALL request
+		 * to the IB Nexus dip. This is currently done for
+		 * each PDIP.
+		 */
+		mutex_enter(&ibnex.ibnex_mutex);
+		ioc_list = ibnex.ibnex_ioc_list;
+		ibnex.ibnex_ioc_list = new_ioc_list;
+		ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_READY;
+		cv_broadcast(&ibnex.ibnex_ioc_list_cv);
+		mutex_exit(&ibnex.ibnex_mutex);
+
+		if (ioc_list) {
 			IBTF_DPRINTF_L4("ibnex",
-			    "\tbus_config: alloc ioc_list %p", new_ioc_list);
-			/*
-			 * Optimize the calls for each BUS_CONFIG_ALL request
-			 * to the IB Nexus dip. This is currently done for
-			 * each PDIP.
-			 */
-			mutex_enter(&ibnex.ibnex_mutex);
-			ioc_list = ibnex.ibnex_ioc_list;
-			ibnex.ibnex_ioc_list = new_ioc_list;
-			ibnex.ibnex_ioc_list_state = IBNEX_IOC_LIST_READY;
-			cv_broadcast(&ibnex.ibnex_ioc_list_cv);
-			mutex_exit(&ibnex.ibnex_mutex);
-
-			if (ioc_list) {
-				IBTF_DPRINTF_L4("ibnex",
-				    "\tbus_config: freeing ioc_list %p",
-				    ioc_list);
-				ibdm_ibnex_free_ioc_list(ioc_list);
-			}
-
-
-			ret = mdi_vhci_bus_config(parent,
-			    flag, op, devname, child, NULL);
-			return (ret);
-		} else {
-			ibnex_config_all_children(parent);
-
-			if (use_mdi_devi_locking)
-				mdi_devi_enter(parent, &circ);
-			else
-				ndi_devi_enter(parent, &circ);
+			    "\tbus_config: freeing ioc_list %p",
+			    ioc_list);
+			ibdm_ibnex_free_ioc_list(ioc_list);
 		}
-		break;
+
+
+		ret = mdi_vhci_bus_config(parent,
+		    flag, op, devname, child, NULL);
+		return (ret);
 	default:
 		IBTF_DPRINTF_L4("ibnex", "\tbus_config: error");
 		ret = IBNEX_FAILURE;
 		break;
 	}
 
-	if (use_mdi_devi_locking)
-		mdi_devi_exit(parent, circ);
-	else
-		ndi_devi_exit(parent, circ);
-
 	if (ret == IBNEX_SUCCESS) {
-		if (op == BUS_CONFIG_OBP_ARGS)
-			op = BUS_CONFIG_ONE;
-
-		if (pdip == NULL)
-			pdip = parent;
-
 		ret = ndi_busop_bus_config(
-		    pdip, flag, op, devname, child, 0);
+		    parent, flag, op, devname, child, 0);
 		IBTF_DPRINTF_L4("ibnex", "\tbus_config:"
 		    "ndi_busop_bus_config : retval %d", ret);
 		return (ret);
@@ -1506,7 +1372,7 @@
  *	Bind drivers for all the newly created device nodes
  *	Support Pseudo nodes enumerated using their .conf file
  */
-static void
+void
 ibnex_config_all_children(dev_info_t *parent)
 {
 	int			ii;
@@ -1517,6 +1383,7 @@
 
 	IBTF_DPRINTF_L4("ibnex", "\tconfig_all_children: Begin");
 
+
 	/*
 	 * Enumerate children of this HCA, port nodes,
 	 * VPPA & HCA_SVC nodes. Use ndi_devi_enter() for
@@ -1535,8 +1402,7 @@
 	for (ii = 0; ii < hca_list->hl_nports; ii++) {
 		ibnex_create_port_nodes(
 		    parent, &hca_list->hl_port_attr[ii]);
-		ibnex_create_vppa_nodes(
-		    parent, &hca_list->hl_port_attr[ii]);
+		ibnex_create_vppa_nodes(parent, &hca_list->hl_port_attr[ii]);
 	}
 	ibdm_ibnex_free_hca_list(hca_list);
 	ndi_devi_exit(parent, circ);
@@ -1577,7 +1443,7 @@
  *	Creates a device node per each communication service defined
  *	in the "port-commsvc-list" property per HCA port
  */
-static void
+void
 ibnex_create_port_nodes(dev_info_t *parent, ibdm_port_attr_t *port_attr)
 {
 	int		idx;
@@ -1588,7 +1454,7 @@
 	for (idx = 0; idx < ibnex.ibnex_num_comm_svcs; idx++) {
 		rval = ibnex_get_dip_from_guid(port_attr->pa_port_guid,
 		    idx, 0, &dip);
-		if (rval != IBNEX_SUCCESS) {
+		if (rval != IBNEX_SUCCESS || dip == NULL) {
 			(void) ibnex_commsvc_initnode(parent, port_attr, idx,
 			    IBNEX_PORT_COMMSVC_NODE, 0, &rval,
 			    IBNEX_DEVFS_ENUMERATE);
@@ -1604,8 +1470,9 @@
  *	in the "vppa-commsvc-list" property and per each PKEY that
  *	this particular port supports and per HCA port
  */
-static void
-ibnex_create_vppa_nodes(dev_info_t *parent, ibdm_port_attr_t *port_attr)
+void
+ibnex_create_vppa_nodes(
+    dev_info_t *parent, ibdm_port_attr_t *port_attr)
 {
 	int 		idx, ii;
 	int		rval;
@@ -1622,6 +1489,10 @@
 		return;
 	}
 	for (idx = 0; idx < ibnex.ibnex_nvppa_comm_svcs; idx++) {
+		if (strcmp("ipib", ibnex.ibnex_vppa_comm_svc_names[idx]) == 0) {
+			IBTF_DPRINTF_L2("ibnex", "Skipping IBD devices...");
+			continue;
+		}
 		for (ii = 0; ii < port_attr->pa_npkeys; ii++) {
 			pkey = port_attr->pa_pkey_tbl[ii].pt_pkey;
 
@@ -1646,7 +1517,7 @@
  *	Creates a device node per each communication service defined
  *	in the "port-commsvc-list" property per HCA port
  */
-static void
+void
 ibnex_create_hcasvc_nodes(dev_info_t *parent, ibdm_port_attr_t *port_attr)
 {
 	int		idx;
@@ -1657,7 +1528,7 @@
 	for (idx = 0; idx < ibnex.ibnex_nhcasvc_comm_svcs; idx++) {
 		rval = ibnex_get_dip_from_guid(port_attr->pa_port_guid,
 		    idx, 0, &dip);
-		if (rval != IBNEX_SUCCESS) {
+		if (rval != IBNEX_SUCCESS || dip == NULL) {
 			(void) ibnex_commsvc_initnode(parent, port_attr, idx,
 			    IBNEX_HCASVC_COMMSVC_NODE, 0, &rval,
 			    IBNEX_DEVFS_ENUMERATE);
@@ -1687,74 +1558,27 @@
 	    DDI_SUCCESS)
 		return (DDI_FAILURE);
 
-	/*
-	 * We can come into this routine with dip as ibnexus dip or hca dip.
-	 * When the dip is that of ib nexus we need to clean up the IOC and
-	 * pseudo nodes. When the dip is that of an HCA (not IB nexus dip)
-	 * cleanup the port nodes.
-	 */
 	if ((op == BUS_UNCONFIG_ALL || op == BUS_UNCONFIG_DRIVER) &&
 	    (flag & (NDI_UNCONFIG | NDI_DETACH_DRIVER))) {
 		mutex_enter(&ibnex.ibnex_mutex);
-		if (parent != ibnex.ibnex_dip) {
-			if (major == -1) {
-				/*
-				 * HCA dip. When major number is -1 HCA is
-				 * going away cleanup all the port nodes.
-				 */
-				for (ndp = ibnex.ibnex_port_node_head;
-				    ndp; ndp = ndp->node_next) {
-					ibnex_port_node_t	*port_node;
-
-					port_node = &ndp->node_data.port_node;
-					if (port_node->port_pdip == parent) {
-						port_node->port_pdip = NULL;
-						ndp->node_dip = NULL;
-						ndp->node_state =
-						    IBNEX_CFGADM_UNCONFIGURED;
-					}
-				}
-			} else {
-				/*
-				 * HCA dip. Cleanup only the port nodes that
-				 * match the major number.
-				 */
-				for (ndp = ibnex.ibnex_port_node_head;
-				    ndp; ndp = ndp->node_next) {
-					ibnex_port_node_t	*port_node;
-
-					port_node = &ndp->node_data.port_node;
-					dip = ndp->node_dip;
-					if (dip && (ddi_driver_major(dip) ==
-					    major) && port_node->port_pdip ==
-					    parent) {
-						port_node->port_pdip = NULL;
-						ndp->node_dip = NULL;
-						ndp->node_state =
-						    IBNEX_CFGADM_UNCONFIGURED;
-					}
-				}
+		/*
+		 * IB dip. here we handle IOC and pseudo nodes which
+		 * are the children of IB nexus. Cleanup only the nodes
+		 * with matching major number. We also need to cleanup
+		 * the PathInfo links to the PHCI here.
+		 */
+		for (ndp = ibnex.ibnex_ioc_node_head;
+		    ndp; ndp = ndp->node_next) {
+			dip = ndp->node_dip;
+			if (dip && (ddi_driver_major(dip) == major)) {
+				(void) ibnex_offline_childdip(dip);
 			}
-		} else {
-			/*
-			 * IB dip. here we handle IOC and pseudo nodes which
-			 * are the children of IB nexus. Cleanup only the nodes
-			 * with matching major number. We also need to cleanup
-			 * the PathInfo links to the PHCI here.
-			 */
-			for (ndp = ibnex.ibnex_ioc_node_head;
-			    ndp; ndp = ndp->node_next) {
-				dip = ndp->node_dip;
-				if (dip && (ddi_driver_major(dip) == major)) {
-					(void) ibnex_offline_childdip(dip);
-				}
-			}
-			for (ndp = ibnex.ibnex_pseudo_node_head;
-			    ndp; ndp = ndp->node_next) {
-				dip = ndp->node_dip;
-				if (dip && (ddi_driver_major(dip) == major)) {
-					(void) ibnex_offline_childdip(dip);
-				}
+		}
+		for (ndp = ibnex.ibnex_pseudo_node_head;
+		    ndp; ndp = ndp->node_next) {
+			dip = ndp->node_dip;
+			if (dip && (ddi_driver_major(dip) == major)) {
+				(void) ibnex_offline_childdip(dip);
 			}
 		}
 		mutex_exit(&ibnex.ibnex_mutex);
@@ -1775,7 +1599,7 @@
  *	Returns "dev_info_t" of the "child" node just created
  *	NULL when failed to enumerate the child node
  */
-static dev_info_t *
+dev_info_t *
 ibnex_config_port_node(dev_info_t *parent, char *devname)
 {
 	int			ii, index;
@@ -1826,8 +1650,8 @@
 
 	port_guid = port_attr->pa_port_guid;
 	mutex_enter(&ibnex.ibnex_mutex);
-	if ((rval = ibnex_get_dip_from_guid(port_guid, index, pkey,
-	    &cdip)) == IBNEX_SUCCESS) {
+	rval = ibnex_get_dip_from_guid(port_guid, index, pkey, &cdip);
+	if ((rval == IBNEX_SUCCESS) && cdip != NULL) {
 		IBTF_DPRINTF_L4("ibnex", "\tconfig_port_node: Node exists");
 		mutex_exit(&ibnex.ibnex_mutex);
 		if (port_num != 0)
@@ -1878,152 +1702,12 @@
 
 
 /*
- * ibnex_config_obp_args()
- *	Configures a particular port node for a IP over IB communication
- *	service.
- *	The format of the input string "devname" is
- *		port=x,pkey=y,protocol=ip,<wanboot options>
- *	Thr format of the node name created here is
- *		ibport@<Port#>,<pkey>,<service name>
- *	where pkey = 0 for port communication service nodes
- *	Returns "dev_info_t" of the "child" node just created
- *	NULL when failed to enumerate the child node
- *
- */
-static dev_info_t *
-ibnex_config_obp_args(dev_info_t *parent, char *devname)
-{
-	int			ii, index;
-	int			rval, iter = 0;
-	char			*temp;
-	uint8_t			port_num;
-	ib_guid_t		hca_guid, port_guid;
-	ib_pkey_t		pkey;
-	dev_info_t		*cdip;
-	boolean_t		displayed = B_FALSE;
-	ibdm_port_attr_t	*port_attr;
-
-	IBTF_DPRINTF_L4("ibnex", "\tconfig_obp_args: %s", devname);
-
-	/* Is this OBP node for IPoIB ? */
-	temp = devname;
-	do {
-		temp = strstr(temp, ",protocol=ip");
-		if (temp == NULL)
-			break;
-
-		if (strlen(devname) > (int)((temp - devname) + 12)) {
-			if (temp[12] == ',')
-				break;
-		} else {
-			break;
-		}
-		temp++;
-	} while (temp);
-
-	if (temp == NULL)
-		return (NULL);
-	if (ibnex_prom_devname_to_pkey_n_portnum(
-	    devname, &pkey, &port_num) != IBNEX_SUCCESS) {
-		return (NULL);
-	}
-	for (index = 0; index < ibnex.ibnex_nvppa_comm_svcs; index++) {
-		if (strcmp(ibnex.ibnex_vppa_comm_svc_names[index],
-		    "ipib") == 0) {
-			break;
-		}
-	}
-
-	hca_guid = ibtl_ibnex_hcadip2guid(parent);
-	if ((port_attr = ibdm_ibnex_probe_hcaport(
-	    hca_guid, port_num)) == NULL) {
-		IBTF_DPRINTF_L2("ibnex",
-		    "\tconfig_port_node: Port does not exist");
-		return (NULL);
-	}
-
-	/* Wait until "port is up" */
-	while (port_attr->pa_state != IBT_PORT_ACTIVE) {
-		ibdm_ibnex_free_port_attr(port_attr);
-		delay(drv_usectohz(10000));
-		if ((port_attr = ibdm_ibnex_probe_hcaport(
-		    hca_guid, port_num)) == NULL) {
-			return (NULL);
-		}
-		if (iter++ == 400) {
-			if (displayed == B_FALSE) {
-				cmn_err(CE_NOTE, "\tWaiting for Port %d "
-				    "initialization", port_attr->pa_port_num);
-				displayed = B_TRUE;
-			}
-		}
-	}
-	IBTF_DPRINTF_L4("ibnex", "\tPort is initialized");
-
-	mutex_enter(&ibnex.ibnex_mutex);
-	port_guid = port_attr->pa_port_guid;
-	if ((rval = ibnex_get_dip_from_guid(port_guid, index, pkey,
-	    &cdip)) == IBNEX_SUCCESS) {
-		IBTF_DPRINTF_L4("ibnex", "\tconfig_port_node: Node exists");
-		mutex_exit(&ibnex.ibnex_mutex);
-		ibdm_ibnex_free_port_attr(port_attr);
-		return (cdip);
-	}
-	for (ii = 0; ii < port_attr->pa_npkeys; ii++) {
-		if (pkey == port_attr->pa_pkey_tbl[ii].pt_pkey) {
-			cdip = ibnex_commsvc_initnode(parent, port_attr,
-			    index, IBNEX_VPPA_COMMSVC_NODE, pkey, &rval,
-			    IBNEX_CFGADM_ENUMERATE);
-			IBTF_DPRINTF_L5("ibnex",
-			    "\t ibnex_commsvc_initnode rval %x", rval);
-			break;
-		}
-	}
-	mutex_exit(&ibnex.ibnex_mutex);
-
-	ibdm_ibnex_free_port_attr(port_attr);
-	return (cdip);
-}
-
-
-/*
- * ibnex_prom_devname_to_pkey_n_portnum()
- *	Parses the device node name and extracts "PKEY" and "port#"
- *	Returns IBNEX_SUCCESS/IBNEX_FAILURE
- */
-static int
-ibnex_prom_devname_to_pkey_n_portnum(
-    char *devname, ib_pkey_t *pkey, uint8_t *port)
-{
-	int	ret = IBNEX_SUCCESS;
-	char	*tmp, *tmp1;
-
-	if ((tmp = strstr(devname, "port=")) != NULL) {
-		if ((tmp = strchr(++tmp, '=')) != NULL)
-			if ((tmp1 = strchr(++tmp, ',')) != NULL)
-				*port = ibnex_str2int(tmp, (tmp1 - tmp), &ret);
-	} else
-		ret = IBNEX_FAILURE;
-
-	if ((ret == IBNEX_SUCCESS) &&
-	    (tmp = strstr(devname, "pkey=")) != NULL) {
-		if ((tmp = strchr(++tmp, '=')) != NULL)
-			if ((tmp1 = strchr(++tmp, ',')) != NULL)
-				*pkey = ibnex_str2hex(tmp, (tmp1 - tmp), &ret);
-	} else
-		ret = IBNEX_FAILURE;
-
-	return (ret);
-}
-
-
-/*
  * ibnex_get_pkey_commsvc_index_portnum()
  *	Parses the device node name and extracts PKEY, communication
  *	service index & Port #.
  *	Returns IBNEX_SUCCESS/IBNEX_FAILURE
  */
-static int
+int
 ibnex_get_pkey_commsvc_index_portnum(char *device_name, int *index,
     ib_pkey_t *pkey, uint8_t *port_num)
 {
@@ -2238,7 +1922,7 @@
 /*
  * ibnex_pseudo_config_one()
  */
-static int
+int
 ibnex_pseudo_config_one(ibnex_node_data_t *node_data, char *caddr,
     dev_info_t *pdip)
 {
@@ -2345,7 +2029,7 @@
  * node_state, node_dip, etc. These checks and initializations
  * are done when BUS_CONFIG is called with PHCI as the parent.
  */
-static int
+int
 ibnex_pseudo_mdi_config_one(int flag, void *devname, dev_info_t **child,
     char *cname, char *caddr)
 {
@@ -2948,6 +2632,7 @@
 	ibdm_ioc_info_t	*ioc_list, *ioc;
 	ibnex_node_data_t	*node_data;
 	dev_info_t		*phci;
+	ib_guid_t		*guid;
 
 	IBTF_DPRINTF_L4("ibnex", "\tdm_callback: attr %p event %x", arg, flag);
 
@@ -2963,6 +2648,15 @@
 			    "create minor node for port w/ guid %s", hca_guid);
 		}
 
+		guid = kmem_alloc(sizeof (ib_guid_t), KM_SLEEP);
+		*guid = *(ib_guid_t *)arg;
+		if (ddi_taskq_dispatch(ibnex.ibnex_taskq_id,
+		    ibnex_handle_hca_attach, guid, DDI_NOSLEEP)
+		    != DDI_SUCCESS) {
+			IBTF_DPRINTF_L4("ibnex", "\tdm_callback: failed to "
+			    "dispatch HCA add event for guid %s", hca_guid);
+		}
+
 		break;
 
 	case IBDM_EVENT_HCA_REMOVED:
@@ -3002,6 +2696,56 @@
 
 
 /*
+ * ibnex_get_node_and_dip_from_guid()
+ *
+ *	Searches the linked list of the port nodes and returns the dip for
+ *	the of the Port / Node guid requested.
+ *	Returns NULL if not found
+ */
+int
+ibnex_get_node_and_dip_from_guid(ib_guid_t guid, int index, ib_pkey_t pkey,
+    ibnex_node_data_t **nodep, dev_info_t **dip)
+{
+	int			node_index;
+	ib_guid_t		node_guid;
+	ib_pkey_t		node_pkey;
+	ibnex_node_data_t	*node_data;
+
+	IBTF_DPRINTF_L4("ibnex",
+	    "\tget_node_and_dip_from_guid: guid = %llX", guid);
+
+	ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex));
+	/* Search for a matching entry in internal lists */
+	node_data = ibnex.ibnex_port_node_head;
+	while (node_data) {
+		node_guid = node_data->node_data.port_node.port_guid;
+		node_index = node_data->node_data.port_node.port_commsvc_idx;
+		node_pkey = node_data->node_data.port_node.port_pkey;
+		if ((node_guid == guid) && (index == node_index) &&
+		    (node_pkey == pkey)) {
+			break;
+		}
+		node_data = node_data->node_next;
+	}
+
+	/* matching found with a valid dip */
+	if (node_data && node_data->node_dip) {
+		*nodep = node_data;
+		*dip = node_data->node_dip;
+		return (IBNEX_SUCCESS);
+	} else if (node_data && !node_data->node_dip) {	/* dip is invalid */
+		*nodep = node_data;
+		*dip = NULL;
+		return (IBNEX_SUCCESS);
+	}
+
+	/* no match found */
+	*nodep = NULL;
+	*dip = NULL;
+	return (IBNEX_FAILURE);
+}
+
+/*
  * ibnex_get_dip_from_guid()
  *
  *	Searches the linked list of the port nodes and returns the dip for
@@ -3490,7 +3234,7 @@
  *	integer.
  *	Returns IBNEX_SUCCESS/IBNEX_FAILURE
  */
-static int
+int
 ibnex_str2int(char *c, int len, int *ret)
 {
 	int intval = 0, ii;
@@ -4209,23 +3953,13 @@
 	 * the client would have been detached by mdi_devi_offline.
 	 */
 	if (clnt_num_pi == 1) {
-		for (node_data = ibnex.ibnex_ioc_node_head;
-		    node_data; node_data = node_data->node_next) {
-			if (node_data->node_dip == cdip) {
-				node_data->node_dip = NULL;
-				node_data->node_state =
-				    IBNEX_CFGADM_UNCONFIGURED;
-				return (MDI_SUCCESS);
-			}
-		}
-		for (node_data = ibnex.ibnex_pseudo_node_head;
-		    node_data; node_data = node_data->node_next) {
-			if (node_data->node_dip == cdip) {
-				node_data->node_dip = NULL;
-				node_data->node_state =
-				    IBNEX_CFGADM_UNCONFIGURED;
-				return (MDI_SUCCESS);
-			}
+		node_data = ddi_get_parent_data(cdip);
+		if (node_data == NULL)
+			return (MDI_SUCCESS);
+		if (node_data->node_dip == cdip) {
+			node_data->node_dip = NULL;
+			node_data->node_state = IBNEX_CFGADM_UNCONFIGURED;
+			return (MDI_SUCCESS);
 		}
 	}
 	return (MDI_SUCCESS);
@@ -4298,7 +4032,7 @@
  *	1. ibdm to probe IOC
  *	2. Create a pathinfo only if the IOC is reachable from the parent dip.
  */
-static int
+int
 ibnex_ioc_bus_config_one(dev_info_t **pdipp, uint_t flag,
     ddi_bus_config_op_t op, void *devname, dev_info_t **child,
     int *need_bus_config)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/ibnex/ibnex_hca.c	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,608 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/conf.h>
+#include <sys/stat.h>
+#include <sys/modctl.h>
+#include <sys/taskq.h>
+#include <sys/mdi_impldefs.h>
+#include <sys/sunmdi.h>
+#include <sys/sunpm.h>
+#include <sys/ib/mgt/ibdm/ibdm_impl.h>
+#include <sys/ib/ibnex/ibnex.h>
+#include <sys/ib/ibnex/ibnex_devctl.h>
+#include <sys/ib/ibtl/ibti.h>
+#include <sys/ib/ibtl/impl/ibtl_ibnex.h>
+#include <sys/file.h>
+#include <sys/hwconf.h>
+#include <sys/fs/dv_node.h>
+
+void ibnex_handle_hca_attach(void *);
+static int ibnex_hca_bus_config_one(dev_info_t *, void *,
+		ddi_bus_config_op_t, uint_t *, dev_info_t **);
+
+static ibnex_node_data_t *ibnex_get_cdip_info(dev_info_t *, char *,
+		dev_info_t **, ibnex_node_type_t *);
+static int ibnex_prom_devname_to_pkey_n_portnum(
+		char *, ib_pkey_t *, uint8_t *);
+static dev_info_t *ibnex_config_obp_args(dev_info_t *, char *);
+
+extern int	ibnex_busctl(dev_info_t *,
+		    dev_info_t *, ddi_ctl_enum_t, void *, void *);
+extern int	ibnex_map_fault(dev_info_t *,
+		    dev_info_t *, struct hat *, struct seg *,
+			caddr_t, struct devpage *, pfn_t, uint_t, uint_t);
+static int	ibnex_hca_bus_config(dev_info_t *, uint_t,
+		    ddi_bus_config_op_t, void *, dev_info_t **);
+static int	ibnex_hca_bus_unconfig(dev_info_t *,
+		    uint_t, ddi_bus_config_op_t, void *);
+extern dev_info_t	*ibnex_config_port_node(dev_info_t *, char *);
+extern dev_info_t	*ibnex_config_obp_args(dev_info_t *, char *);
+extern int		ibnex_ioc_bus_config_one(dev_info_t **, uint_t,
+			    ddi_bus_config_op_t, void *, dev_info_t **, int *);
+extern int		ibnex_pseudo_config_one(
+		    ibnex_node_data_t *, char *, dev_info_t *);
+extern void		ibnex_config_all_children(dev_info_t *);
+extern void			ibnex_pseudo_initnodes(void);
+
+extern int		ibnex_pseudo_mdi_config_one(int, void *, dev_info_t **,
+			    char *, char *);
+extern int			ibnex_get_dip_from_guid(ib_guid_t, int,
+			    ib_pkey_t, dev_info_t **);
+extern dev_info_t	*ibnex_commsvc_initnode(dev_info_t *,
+			    ibdm_port_attr_t *, int, int, ib_pkey_t, int *,
+			    int);
+extern uint64_t		ibnex_str2hex(char *, int, int *);
+extern int		ibnex_str2int(char *, int, int *);
+extern void		ibnex_create_hcasvc_nodes(
+			    dev_info_t *, ibdm_port_attr_t *);
+extern void		ibnex_create_port_nodes(
+			    dev_info_t *, ibdm_port_attr_t *);
+extern void		ibnex_create_vppa_nodes(
+			    dev_info_t *, ibdm_port_attr_t *);
+extern int		ibnex_get_pkey_commsvc_index_portnum(
+			    char *, int *, ib_pkey_t *, uint8_t *);
+
+extern ibnex_t	ibnex;
+extern int	ibnex_port_settling_time;
+
+/*
+ * The bus_ops structure defines the capabilities of HCA nexus driver.
+ */
+struct bus_ops ibnex_ci_busops = {
+	BUSO_REV,
+	nullbusmap,		/* bus_map */
+	NULL,			/* bus_get_intrspec */
+	NULL,			/* bus_add_intrspec */
+	NULL,			/* bus_remove_intrspec */
+	ibnex_map_fault,	/* Map Fault */
+	ddi_no_dma_map,		/* DMA related entry points */
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
+	ibnex_busctl,		/* bus_ctl */
+	ddi_bus_prop_op,	/* bus_prop_op */
+	NULL,			/* bus_get_eventcookie	*/
+	NULL,			/* bus_add_eventcall	*/
+	NULL,			/* bus_remove_eventcall	*/
+	NULL,			/* bus_post_event	*/
+	NULL,
+	ibnex_hca_bus_config,	/* bus config */
+	ibnex_hca_bus_unconfig	/* bus unconfig */
+};
+
+/*
+ * ibnex_hca_bus_config()
+ *
+ * BUS_CONFIG_ONE:
+ *	Enumerate the exact instance of the driver. Use the device node name
+ *	to locate the exact instance.
+ *	Query IBDM to find whether the hardware exits for the instance of the
+ *	driver. If exists, create a device node and return NDI_SUCCESS.
+ *
+ * BUS_CONFIG_ALL:
+ *	Enumerate all the instances of all the possible children (seen before
+ *	and never seen before).
+ *
+ * BUS_CONFIG_DRIVER:
+ *	Enumerate all the instances of a particular driver.
+ */
+static int
+ibnex_hca_bus_config(dev_info_t *parent, uint_t flag,
+    ddi_bus_config_op_t op, void *devname, dev_info_t **child)
+{
+	int			ret = IBNEX_SUCCESS, circ;
+	char			*srvname, nameaddr[MAXNAMELEN];
+	dev_info_t		*cdip;
+	ibnex_node_data_t	*node_data;
+	ibnex_port_node_t	*port_node;
+
+	/*
+	 * In a normal case HCA is setup as a phci.
+	 * If an HCA is in maintenance mode, its phci is not set up
+	 * but the driver is attached to update the firmware. In this
+	 * case, do not configure the MPxIO clients.
+	 */
+	if (mdi_component_is_phci(parent, NULL) == MDI_FAILURE) {
+		if (op == BUS_CONFIG_ALL || op == BUS_CONFIG_DRIVER)
+			return (NDI_SUCCESS);
+		else
+			return (NDI_FAILURE);
+	}
+
+	switch (op) {
+	case BUS_CONFIG_ONE:
+		IBTF_DPRINTF_L4("ibnex", "\thca_bus_config: CONFIG_ONE, "
+		    "parent %p", parent);
+		ret = ibnex_hca_bus_config_one(
+		    parent, devname, op, &flag, child);
+		break;
+
+	case BUS_CONFIG_OBP_ARGS:
+		mdi_devi_enter(parent, &circ);
+		cdip = ibnex_config_obp_args(parent, devname);
+		if (cdip) {
+			/*
+			 * Boot case.
+			 * Special handling because the "devname"
+			 * format for the enumerated device is
+			 * different.
+			 */
+			node_data = ddi_get_parent_data(cdip);
+			port_node = &node_data->node_data.port_node;
+			if (node_data->node_type ==
+			    IBNEX_VPPA_COMMSVC_NODE) {
+				srvname =
+				    ibnex.ibnex_vppa_comm_svc_names[
+				    port_node->port_commsvc_idx];
+				(void) snprintf(nameaddr, MAXNAMELEN,
+				    "ibport@%x,%x,%s",
+				    port_node->port_num,
+				    port_node->port_pkey, srvname);
+			}
+			devname = (void *)nameaddr;
+		} else {
+			IBTF_DPRINTF_L2("ibnex", "\thca_bus_config: "
+			    "CONFIG_OBP_ARGS : invalid state!!");
+
+			ret = IBNEX_FAILURE;
+		}
+		mdi_devi_exit(parent, circ);
+		break;
+
+	case BUS_CONFIG_ALL:
+		IBTF_DPRINTF_L4("ibnex",
+		    "\thca_bus_config: CONFIG_ALL parent %p", parent);
+		ibnex_config_all_children(parent);
+		break;
+
+	case BUS_CONFIG_DRIVER:
+		IBTF_DPRINTF_L4("ibnex", "\thca_bus_config: "
+		    "CONFIG_DRIVER parent %p", parent);
+		ibnex_config_all_children(parent);
+		break;
+
+	default:
+		IBTF_DPRINTF_L4("ibnex", "\thca_bus_config: error");
+		ret = IBNEX_FAILURE;
+		break;
+	}
+
+
+	if (ret == IBNEX_SUCCESS) {
+		if (op == BUS_CONFIG_OBP_ARGS)
+			op = BUS_CONFIG_ONE;
+
+		ret = ndi_busop_bus_config(
+		    parent, flag, op, devname, child, 0);
+		IBTF_DPRINTF_L4("ibnex", "\thca_bus_config:"
+		    "ndi_busop_bus_config : retval %d", ret);
+		return (ret);
+	}
+
+	return (NDI_FAILURE);
+}
+
+/*
+ * ibnex_hca_bus_unconfig()
+ *
+ *	Unconfigure a particular device node or all instance of a device
+ *	driver device or all children of IBnex
+ */
+static int
+ibnex_hca_bus_unconfig(dev_info_t *parent,
+    uint_t flag, ddi_bus_config_op_t op, void *device_name)
+{
+
+	if (ndi_busop_bus_unconfig(parent, flag, op, device_name) !=
+	    DDI_SUCCESS)
+		return (DDI_FAILURE);
+
+	if ((op == BUS_UNCONFIG_ALL || op == BUS_UNCONFIG_DRIVER) &&
+	    (flag & NDI_UNCONFIG)) {
+		ibnex_node_data_t	*ndp;
+		dev_info_t		*dip = NULL;
+		major_t			major = (major_t)(uintptr_t)device_name;
+
+		mutex_enter(&ibnex.ibnex_mutex);
+
+		if (major == -1) {
+			/*
+			 * HCA dip. When major number is -1 HCA is
+			 * going away cleanup all the port nodes.
+			 */
+			for (ndp = ibnex.ibnex_port_node_head;
+			    ndp; ndp = ndp->node_next) {
+				ibnex_port_node_t	*port_node;
+
+				port_node = &ndp->node_data.port_node;
+				if (port_node->port_pdip == parent) {
+					port_node->port_pdip = NULL;
+					ndp->node_dip = NULL;
+					ndp->node_state =
+					    IBNEX_CFGADM_UNCONFIGURED;
+				}
+			}
+		} else {
+			/*
+			 * HCA dip. Cleanup only the port nodes that
+			 * match the major number.
+			 */
+			for (ndp = ibnex.ibnex_port_node_head;
+			    ndp; ndp = ndp->node_next) {
+				ibnex_port_node_t	*port_node;
+
+				port_node = &ndp->node_data.port_node;
+				dip = ndp->node_dip;
+				if (dip && (ddi_driver_major(dip) ==
+				    major) && port_node->port_pdip ==
+				    parent) {
+					port_node->port_pdip = NULL;
+					ndp->node_dip = NULL;
+					ndp->node_state =
+					    IBNEX_CFGADM_UNCONFIGURED;
+				}
+			}
+		}
+		mutex_exit(&ibnex.ibnex_mutex);
+	}
+	return (DDI_SUCCESS);
+}
+
+/*
+ * ibnex_config_obp_args()
+ *	Configures a particular port node for a IP over IB communication
+ *	service.
+ *	The format of the input string "devname" is
+ *		port=x,pkey=y,protocol=ip,<wanboot options>
+ *	Thr format of the node name created here is
+ *		ibport@<Port#>,<pkey>,<service name>
+ *	where pkey = 0 for port communication service nodes
+ *	Returns "dev_info_t" of the "child" node just created
+ *	NULL when failed to enumerate the child node
+ *
+ */
+static dev_info_t *
+ibnex_config_obp_args(dev_info_t *parent, char *devname)
+{
+	int			ii, index;
+	int			rval, iter = 0;
+	char			*temp;
+	uint8_t			port_num;
+	ib_guid_t		hca_guid, port_guid;
+	ib_pkey_t		pkey;
+	dev_info_t		*cdip;
+	boolean_t		displayed = B_FALSE;
+	ibdm_port_attr_t	*port_attr;
+
+	IBTF_DPRINTF_L4("ibnex", "\tconfig_obp_args: %s", devname);
+
+	/* Is this OBP node for IPoIB ? */
+	temp = devname;
+	do {
+		temp = strstr(temp, ",protocol=ip");
+		if (temp == NULL)
+			break;
+
+		if (strlen(devname) > (int)((temp - devname) + 12)) {
+			if (temp[12] == ',')
+				break;
+		} else {
+			break;
+		}
+		temp++;
+	} while (temp);
+
+	if (temp == NULL)
+		return (NULL);
+	if (ibnex_prom_devname_to_pkey_n_portnum(
+	    devname, &pkey, &port_num) != IBNEX_SUCCESS) {
+		return (NULL);
+	}
+	for (index = 0; index < ibnex.ibnex_nvppa_comm_svcs; index++) {
+		if (strcmp(ibnex.ibnex_vppa_comm_svc_names[index],
+		    "ipib") == 0) {
+			break;
+		}
+	}
+
+	hca_guid = ibtl_ibnex_hcadip2guid(parent);
+	if ((port_attr = ibdm_ibnex_probe_hcaport(
+	    hca_guid, port_num)) == NULL) {
+		IBTF_DPRINTF_L2("ibnex",
+		    "\tconfig_port_node: Port does not exist");
+		return (NULL);
+	}
+
+	/* Wait until "port is up" */
+	while (port_attr->pa_state != IBT_PORT_ACTIVE) {
+		ibdm_ibnex_free_port_attr(port_attr);
+		delay(drv_usectohz(10000));
+		if ((port_attr = ibdm_ibnex_probe_hcaport(
+		    hca_guid, port_num)) == NULL) {
+			return (NULL);
+		}
+		if (iter++ == 400) {
+			if (displayed == B_FALSE) {
+				cmn_err(CE_NOTE, "\tWaiting for Port %d "
+				    "initialization", port_attr->pa_port_num);
+				displayed = B_TRUE;
+			}
+		}
+	}
+	IBTF_DPRINTF_L4("ibnex", "\tPort is initialized");
+
+	mutex_enter(&ibnex.ibnex_mutex);
+	port_guid = port_attr->pa_port_guid;
+	rval = ibnex_get_dip_from_guid(port_guid, index, pkey, &cdip);
+	if (rval == IBNEX_SUCCESS && cdip != NULL) {
+		IBTF_DPRINTF_L4("ibnex", "\tconfig_port_node: Node exists");
+		mutex_exit(&ibnex.ibnex_mutex);
+		ibdm_ibnex_free_port_attr(port_attr);
+		return (cdip);
+	}
+	for (ii = 0; ii < port_attr->pa_npkeys; ii++) {
+		if (pkey == port_attr->pa_pkey_tbl[ii].pt_pkey) {
+			cdip = ibnex_commsvc_initnode(parent, port_attr,
+			    index, IBNEX_VPPA_COMMSVC_NODE, pkey, &rval,
+			    IBNEX_CFGADM_ENUMERATE);
+			IBTF_DPRINTF_L5("ibnex",
+			    "\t ibnex_commsvc_initnode rval %x", rval);
+			break;
+		}
+	}
+	mutex_exit(&ibnex.ibnex_mutex);
+
+	ibdm_ibnex_free_port_attr(port_attr);
+	return (cdip);
+}
+
+
+/*
+ * ibnex_prom_devname_to_pkey_n_portnum()
+ *	Parses the device node name and extracts "PKEY" and "port#"
+ *	Returns IBNEX_SUCCESS/IBNEX_FAILURE
+ */
+static int
+ibnex_prom_devname_to_pkey_n_portnum(
+    char *devname, ib_pkey_t *pkey, uint8_t *port)
+{
+	int	ret = IBNEX_SUCCESS;
+	char	*tmp, *tmp1;
+
+	if ((tmp = strstr(devname, "port=")) != NULL) {
+		if ((tmp = strchr(++tmp, '=')) != NULL)
+			if ((tmp1 = strchr(++tmp, ',')) != NULL)
+				*port = ibnex_str2int(tmp, (tmp1 - tmp), &ret);
+	} else
+		ret = IBNEX_FAILURE;
+
+	if ((ret == IBNEX_SUCCESS) &&
+	    (tmp = strstr(devname, "pkey=")) != NULL) {
+		if ((tmp = strchr(++tmp, '=')) != NULL)
+			if ((tmp1 = strchr(++tmp, ',')) != NULL)
+				*pkey = ibnex_str2hex(tmp, (tmp1 - tmp), &ret);
+	} else
+		ret = IBNEX_FAILURE;
+
+	return (ret);
+}
+
+static ibnex_node_data_t *
+ibnex_get_cdip_info(dev_info_t *parent,
+    char *devname, dev_info_t **cdip, ibnex_node_type_t *type)
+{
+	char 			*device_name, *cname = NULL, *caddr = NULL;
+	int			len;
+	ibnex_node_data_t	*node_data = NULL;
+
+	len = strlen((char *)devname) + 1;
+	device_name = i_ddi_strdup(devname, KM_SLEEP);
+	i_ddi_parse_name(device_name, &cname, &caddr, NULL);
+
+	IBTF_DPRINTF_L4("ibnex",
+	    "\tfind_child_dip: cname %s addr %s", cname, caddr);
+
+	if (strncmp(cname, IBNEX_IOC_CNAME, 3) ==  0)
+		*type = IBNEX_IOC_NODE;
+	else if (strncmp(cname, IBNEX_IBPORT_CNAME, 3) ==  0)
+		*type = IBNEX_HCA_CHILD_NODE;
+	else
+		*type = IBNEX_PSEUDO_NODE;
+
+	*cdip = ndi_devi_findchild(parent, devname);
+
+	IBTF_DPRINTF_L4("ibnex",
+	    "\tfind_child_dip: cdip %p type %x", *cdip, *type);
+
+	if (*cdip)
+		node_data = ddi_get_parent_data(*cdip);
+	kmem_free(device_name, len);
+
+	return (node_data);
+}
+
+static int
+ibnex_hca_bus_config_one(dev_info_t *parent, void *devname,
+ddi_bus_config_op_t op, uint_t *flag, dev_info_t **child)
+{
+	int			ret = IBNEX_SUCCESS, len, circ, need_bus_config;
+	char 			*device_name, *caddr, *cname;
+	dev_info_t		*cdip;
+	ibnex_node_data_t	*node_data;
+	ibnex_node_type_t	node_type;
+	int			index;
+	uint8_t			port_num;
+	ib_pkey_t		pkey;
+
+	len = strlen((char *)devname) + 1;
+	device_name = i_ddi_strdup(devname, KM_SLEEP);
+	i_ddi_parse_name(device_name, &cname, &caddr, NULL);
+
+	if (caddr == NULL || (strlen(caddr) == 0)) {
+		IBTF_DPRINTF_L2("ibnex",
+		    "\thca_bus_config: Invalid device node address");
+		kmem_free(device_name, len);
+		return (IBNEX_FAILURE);
+	}
+
+	ndi_devi_enter(parent, &circ);
+	node_data = ibnex_get_cdip_info(
+	    parent, devname, &cdip, &node_type);
+	ndi_devi_exit(parent, circ);
+
+	if (cdip) {
+		if ((node_data) && (node_data->node_type ==
+		    IBNEX_PORT_COMMSVC_NODE)) {
+			if (node_data->node_dip == NULL) {
+				node_data->node_dip = cdip;
+				node_data->node_data.port_node.port_pdip =
+				    parent;
+			}
+		}
+	}
+
+	/*
+	 * If child dip is present, just return
+	 * from here.
+	 */
+	if (cdip != NULL || (node_data != NULL &&
+	    node_data->node_dip != NULL)) {
+		goto end;
+	}
+
+	switch (node_type) {
+
+	case IBNEX_IOC_NODE:
+		ret = ibnex_ioc_bus_config_one(&parent, *flag,
+		    op, devname, child, &need_bus_config);
+		if (!need_bus_config) {
+			kmem_free(device_name, len);
+			return (ret);
+		}
+		break;
+
+	case IBNEX_PSEUDO_NODE:
+		ret = IBNEX_SUCCESS;
+		mdi_devi_enter(parent, &circ);
+		ibnex_pseudo_initnodes();
+		mutex_enter(&ibnex.ibnex_mutex);
+		ret = ibnex_pseudo_config_one(NULL,
+		    caddr, parent);
+		mutex_exit(&ibnex.ibnex_mutex);
+		mdi_devi_exit(parent, circ);
+		break;
+
+	default:
+		if (ibnex_get_pkey_commsvc_index_portnum(devname,
+		    &index, &pkey, &port_num) != IBNEX_SUCCESS) {
+			IBTF_DPRINTF_L2("ibnex",
+			    "\tconfig_port_node: Invalid Service Name");
+			return (IBNEX_FAILURE);
+		}
+
+		if ((pkey != 0) && (port_num != 0)) {
+			if (strcmp("ipib",
+			    ibnex.ibnex_vppa_comm_svc_names[index]) == 0) {
+				IBTF_DPRINTF_L2("ibnex",
+				    "Skipping IBD devices... ");
+				break;
+			}
+		}
+
+		ndi_devi_enter(parent, &circ);
+		cdip = ibnex_config_port_node(parent, devname);
+		if (cdip)
+			ret = IBNEX_SUCCESS;
+		else
+			ret = IBNEX_FAILURE;
+		ndi_devi_exit(parent, circ);
+		break;
+	}
+end:
+	if (node_type == IBNEX_HCA_CHILD_NODE) {
+		/* Allows enumeration under PHCI */
+		*flag |= NDI_MDI_FALLBACK;
+	}
+	kmem_free(device_name, len);
+	return (ret);
+}
+
+void
+ibnex_handle_hca_attach(void *cb_arg)
+{
+	ib_guid_t hca_guid = *((ib_guid_t *)cb_arg);
+	dev_info_t	*phci;
+	int		ii, circ;
+	ibdm_hca_list_t		*hca_list;
+
+	IBTF_DPRINTF_L4("ibnex", "handle_hca_attach(%llx)", hca_guid);
+
+	phci = ibtl_ibnex_hcaguid2dip(hca_guid);
+
+	/*
+	 * Enumerate children of this HCA, port nodes,
+	 * VPPA & HCA_SVC nodes. Use ndi_devi_enter() for
+	 * locking. IB Nexus is enumerating the children
+	 * of HCA, not MPXIO clients.
+	 */
+	ndi_devi_enter(phci, &circ);
+	ibdm_ibnex_port_settle_wait(hca_guid, ibnex_port_settling_time);
+	hca_list = ibdm_ibnex_get_hca_info_by_guid(hca_guid);
+	if (hca_list == NULL) {
+		ndi_devi_exit(phci, circ);
+		return;
+	}
+	ibnex_create_hcasvc_nodes(phci, hca_list->hl_hca_port_attr);
+	for (ii = 0; ii < hca_list->hl_nports; ii++) {
+		ibnex_create_vppa_nodes(
+		    phci, &hca_list->hl_port_attr[ii]);
+	}
+	ibdm_ibnex_free_hca_list(hca_list);
+	ndi_devi_exit(phci, circ);
+}
--- a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -58,12 +57,14 @@
 static int		ibnex_fill_ioc_tmp(nvlist_t **, ibdm_ioc_info_t *);
 static int		ibnex_fill_nodeinfo(nvlist_t **, ibnex_node_data_t *,
 			    void *);
-static void		ibnex_figure_ap_devstate(dev_info_t *,
+static void		ibnex_figure_ap_devstate(ibnex_node_data_t *,
 			    devctl_ap_state_t *);
 static void		ibnex_figure_ib_apid_devstate(devctl_ap_state_t *);
 static	char 		*ibnex_get_apid(struct devctl_iocdata *);
 static int		ibnex_get_dip_from_apid(char *, dev_info_t **,
 			    ibnex_node_data_t **);
+extern int		ibnex_get_node_and_dip_from_guid(ib_guid_t, int,
+			    ib_pkey_t, ibnex_node_data_t **, dev_info_t **);
 static ibnex_rval_t	ibnex_handle_pseudo_configure(char *);
 static ibnex_rval_t	ibnex_handle_ioc_configure(char *);
 static ibnex_rval_t	ibnex_handle_commsvcnode_configure(char *);
@@ -212,7 +213,8 @@
 	ibnex_rval_t		ret_val;
 	ib_service_type_t	svc_type = IB_NONE;
 	devctl_ap_state_t	ap_state;
-	ibnex_node_data_t	*nodep, *scanp;
+	ibnex_node_data_t	*nodep = NULL;
+	ibnex_node_data_t	*scanp;
 	struct devctl_iocdata	*dcp = NULL;
 
 	IBTF_DPRINTF_L4("ibnex", "\tdevctl: cmd=%x, arg=%p, mode=%x, cred=%p, "
@@ -257,7 +259,7 @@
 			/* rv could be something undesirable, so reset it */
 			rv = 0;
 
-			ibnex_figure_ap_devstate(apid_dip, &ap_state);
+			ibnex_figure_ap_devstate(nodep, &ap_state);
 		}
 
 		/* copy the return-AP-state information to the user space */
@@ -1390,9 +1392,9 @@
 	    "node_type = %x", hca_guid, port_guid, svc_index, p_key, node_type);
 
 	/* check if this node was seen before? */
-	rval = ibnex_get_dip_from_guid(port_guid, svc_index, p_key, &dip);
-	if (rval == IBNEX_SUCCESS && dip) {
-		nodep = ddi_get_parent_data(dip);
+	rval = ibnex_get_node_and_dip_from_guid(port_guid, svc_index, p_key,
+	    &nodep, &dip);
+	if (rval == IBNEX_SUCCESS && nodep != NULL) {
 
 		if (ibnex_fill_nodeinfo(nvlpp, nodep, NULL) != 0) {
 			IBTF_DPRINTF_L2("ibnex",
@@ -1646,7 +1648,7 @@
 	    IBNEX_NODE_TYPE_NVL, node_datap->node_type);
 
 	/* figure out "ostate", "rstate" and "condition" */
-	ibnex_figure_ap_devstate(node_datap->node_dip, &state);
+	ibnex_figure_ap_devstate(node_datap, &state);
 
 	if (nvlist_add_int32(*nvlpp, IBNEX_NODE_RSTATE_NVL, state.ap_rstate)) {
 		IBTF_DPRINTF_L2("ibnex", "ibnex_fill_nodeinfo: "
@@ -1684,16 +1686,19 @@
  *	"last_change" value.
  */
 static void
-ibnex_figure_ap_devstate(dev_info_t *dip, devctl_ap_state_t *ap_state)
+ibnex_figure_ap_devstate(ibnex_node_data_t *nodep, devctl_ap_state_t *ap_state)
 {
-	IBTF_DPRINTF_L5("ibnex", "ibnex_figure_ap_devstate: dip = %p", dip);
+	IBTF_DPRINTF_L5("ibnex", "ibnex_figure_ap_devstate: nodep = %p", nodep);
 
 	ap_state->ap_rstate = AP_RSTATE_CONNECTED;
-	if (dip == NULL) {	/* for nodes not seen by IBNEX yet */
+	if (nodep == NULL) {	/* for nodes not seen by IBNEX yet */
 		ap_state->ap_ostate = AP_OSTATE_UNCONFIGURED;
 		ap_state->ap_condition = AP_COND_UNKNOWN;
 	} else {
-		if (i_ddi_node_state(dip) < DS_BOUND) {
+		/*
+		 * IBNEX_NODE_AP_UNCONFIGURED & IBNEX_NODE_AP_CONFIGURING.
+		 */
+		if (nodep->node_ap_state >= IBNEX_NODE_AP_UNCONFIGURED) {
 			ap_state->ap_ostate = AP_OSTATE_UNCONFIGURED;
 			ap_state->ap_condition = AP_COND_UNKNOWN;
 		} else {
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -110,6 +109,8 @@
 static void ibtl_kstat_stats_create(ibtl_hca_devinfo_t *, uint_t);
 static void ibtl_kstat_pkeys_create(ibtl_hca_devinfo_t *, uint_t);
 
+extern kmutex_t ibtl_part_attr_mutex;
+
 /*
  * IBTF Loadable Module Routines.
  */
@@ -147,6 +148,8 @@
 	mutex_init(&ibtl_qp_mutex, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&ibtl_qp_cv, NULL, CV_DEFAULT, NULL);
 
+	mutex_init(&ibtl_part_attr_mutex, NULL, MUTEX_DEFAULT, NULL);
+
 	ibtl_thread_init();
 
 	return (rval);
@@ -173,6 +176,7 @@
 	cv_destroy(&ibtl_close_hca_cv);
 	mutex_destroy(&ibtl_qp_mutex);
 	cv_destroy(&ibtl_qp_cv);
+	mutex_destroy(&ibtl_part_attr_mutex);
 
 	/*
 	 * Stop Logging
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_misc.c	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/ksynch.h>
+#include <sys/ib/ibtl/ibti_common.h>
+
+kmutex_t	ibtl_part_attr_mutex;
+ibt_status_t	(*ibtl_get_part_attr_cb)(datalink_id_t, ibt_part_attr_t *);
+ibt_status_t	(*ibtl_get_all_part_attr_cb)(ibt_part_attr_t **, int *);
+
+void
+ibt_register_part_attr_cb(
+    ibt_status_t (*get_part_attr)(datalink_id_t, ibt_part_attr_t *),
+    ibt_status_t (*get_all_part_attr)(ibt_part_attr_t **, int *))
+{
+	mutex_enter(&ibtl_part_attr_mutex);
+	ibtl_get_part_attr_cb = get_part_attr;
+	ibtl_get_all_part_attr_cb = get_all_part_attr;
+	mutex_exit(&ibtl_part_attr_mutex);
+}
+
+void
+ibt_unregister_part_attr_cb(void)
+{
+	mutex_enter(&ibtl_part_attr_mutex);
+	ibtl_get_part_attr_cb = NULL;
+	ibtl_get_all_part_attr_cb = NULL;
+	mutex_exit(&ibtl_part_attr_mutex);
+}
+
+ibt_status_t
+ibt_get_part_attr(datalink_id_t linkid, ibt_part_attr_t *attr)
+{
+	ibt_status_t	status;
+
+	mutex_enter(&ibtl_part_attr_mutex);
+	if (ibtl_get_part_attr_cb != NULL)
+		status = (*ibtl_get_part_attr_cb) (linkid, attr);
+	else
+		status = IBT_NO_SUCH_OBJECT;
+	mutex_exit(&ibtl_part_attr_mutex);
+
+	return (status);
+}
+
+ibt_status_t
+ibt_get_all_part_attr(ibt_part_attr_t **attr, int *nparts)
+{
+	ibt_status_t	status;
+
+	mutex_enter(&ibtl_part_attr_mutex);
+	if (ibtl_get_all_part_attr_cb != NULL)
+		status = (*ibtl_get_all_part_attr_cb) (attr, nparts);
+	else {
+		*attr = NULL;
+		*nparts = 0;
+		status = IBT_SUCCESS;
+	}
+	mutex_exit(&ibtl_part_attr_mutex);
+
+	return (status);
+}
+
+ibt_status_t
+ibt_free_part_attr(ibt_part_attr_t *attr, int nparts)
+{
+	if (nparts > 0)
+		kmem_free(attr, sizeof (ibt_part_attr_t) * nparts);
+	return (IBT_SUCCESS);
+}
--- a/usr/src/uts/common/io/ib/inc.flg	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/inc.flg	Wed Apr 14 10:26:18 2010 -0700
@@ -21,8 +21,7 @@
 #
 
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 #
@@ -62,14 +61,14 @@
 	usr/src/uts/sparc/ibdm		\
 	usr/src/uts/sparc/ibmf		\
 	usr/src/uts/sparc/ibtl		\
-	usr/src/uts/sparc/ibd		\
+	usr/src/uts/sparc/ibp		\
 	usr/src/uts/sparc/rpcib		\
 	usr/src/uts/intel/ib		\
 	usr/src/uts/intel/ibcm		\
 	usr/src/uts/intel/ibdm		\
 	usr/src/uts/intel/ibmf		\
 	usr/src/uts/intel/ibtl		\
-	usr/src/uts/intel/ibd		\
+	usr/src/uts/intel/ibp		\
 	usr/src/uts/intel/rpcib	
 
 # packaging files
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -38,6 +37,8 @@
 #include <sys/kstr.h>
 #include <sys/t_kuser.h>
 
+#include <sys/dls.h>
+
 extern char cmlog[];
 
 extern int ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s,
@@ -202,64 +203,55 @@
 		return (IBT_SUCCESS);
 }
 
-
-static int
-ibcm_arp_get_ibd_insts_cb(dev_info_t *dip, void *arg)
+void
+ibcm_arp_free_ibds(ibcm_arp_ibd_insts_t *ibds)
 {
-	ibcm_arp_ibd_insts_t *ibds = (ibcm_arp_ibd_insts_t *)arg;
-	ibcm_arp_ip_t	*ipp;
-	ib_pkey_t	pkey;
-	uint8_t		port;
-	ib_guid_t	hca_guid;
-	ib_gid_t	port_gid;
-
-	if (i_ddi_devi_attached(dip) &&
-	    (strcmp(ddi_node_name(dip), "ibport") == 0) &&
-	    (strstr(ddi_get_name_addr(dip), "ipib") != NULL)) {
-
-		if (ibds->ibcm_arp_ibd_cnt >= ibds->ibcm_arp_ibd_alloc) {
-			ibcm_arp_ip_t	*tmp = NULL;
-			uint8_t		new_count;
-
-			new_count = ibds->ibcm_arp_ibd_alloc +
-			    IBCM_ARP_IBD_INSTANCES;
-
-			tmp = (ibcm_arp_ip_t *)kmem_zalloc(
-			    new_count * sizeof (ibcm_arp_ip_t), KM_SLEEP);
-			bcopy(ibds->ibcm_arp_ip, tmp,
-			    ibds->ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t));
-			kmem_free(ibds->ibcm_arp_ip,
-			    ibds->ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t));
-			ibds->ibcm_arp_ibd_alloc = new_count;
-			ibds->ibcm_arp_ip = tmp;
-		}
-
-		if (((hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
-		    "hca-guid", 0)) == 0) ||
-		    ((port = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
-		    "port-number", 0)) == 0) ||
-		    (ibt_get_port_state_byguid(hca_guid, port, &port_gid,
-		    NULL) != IBT_SUCCESS) ||
-		    ((pkey = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
-		    "port-pkey", IB_PKEY_INVALID_LIMITED)) <=
-		    IB_PKEY_INVALID_FULL)) {
-			return (DDI_WALK_CONTINUE);
-		}
-
-		ipp = &ibds->ibcm_arp_ip[ibds->ibcm_arp_ibd_cnt];
-		ipp->ip_inst = ddi_get_instance(dip);
-		ipp->ip_pkey = pkey;
-		ipp->ip_hca_guid = hca_guid;
-		ipp->ip_port_gid = port_gid;
-		ibds->ibcm_arp_ibd_cnt++;
+	if (ibds->ibcm_arp_ip) {
+		kmem_free(ibds->ibcm_arp_ip, ibds->ibcm_arp_ibd_alloc *
+		    sizeof (ibcm_arp_ip_t));
+		ibds->ibcm_arp_ibd_alloc = 0;
+		ibds->ibcm_arp_ibd_cnt = 0;
+		ibds->ibcm_arp_ip = NULL;
 	}
-	return (DDI_WALK_CONTINUE);
 }
 
 static void
 ibcm_arp_get_ibd_insts(ibcm_arp_ibd_insts_t *ibds)
 {
-	ddi_walk_devs(ddi_root_node(), ibcm_arp_get_ibd_insts_cb, ibds);
+	ibcm_arp_ip_t	*ipp;
+	ib_gid_t	port_gid;
+	ibt_part_attr_t	*attr_list, *attr;
+	int		nparts;
+
+	if ((ibt_get_all_part_attr(&attr_list, &nparts) != IBT_SUCCESS) ||
+	    (nparts == 0)) {
+		ibds->ibcm_arp_ibd_alloc = 0;
+		ibds->ibcm_arp_ibd_cnt = 0;
+		ibds->ibcm_arp_ip = NULL;
+		return;
+	}
+
+	ibds->ibcm_arp_ibd_alloc = nparts;
+	ibds->ibcm_arp_ibd_cnt = 0;
+	ibds->ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc(
+	    nparts * sizeof (ibcm_arp_ip_t), KM_SLEEP);
+
+	attr = attr_list;
+	while (nparts--) {
+		if (ibt_get_port_state_byguid(attr->pa_hca_guid,
+		    attr->pa_port, &port_gid, NULL) == IBT_SUCCESS) {
+
+			ipp = &ibds->ibcm_arp_ip[ibds->ibcm_arp_ibd_cnt];
+			ipp->ip_linkid = attr->pa_plinkid;
+			ipp->ip_pkey = attr->pa_pkey;
+			ipp->ip_hca_guid = attr->pa_hca_guid;
+			ipp->ip_port_gid = port_gid;
+			ibds->ibcm_arp_ibd_cnt++;
+		}
+		attr++;
+	}
+
+	(void) ibt_free_part_attr(attr_list, ibds->ibcm_arp_ibd_alloc);
 }
 
 /*
@@ -331,6 +323,37 @@
 	return (0);
 }
 
+static ibcm_arp_ip_t *
+ibcm_arp_lookup(ibcm_arp_ibd_insts_t *ibds, char *linkname)
+{
+	datalink_id_t	linkid;
+	int		i;
+
+	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: linkname =  %s\n", linkname);
+
+	/*
+	 * If at first we don't succeed, try again, just in case it is in
+	 * hiding. The first call requires the datalink management daemon
+	 * (the authorative source of information about name to id mapping)
+	 * to be present and answering upcalls, the second does not.
+	 */
+	if (dls_mgmt_get_linkid(linkname, &linkid) != 0) {
+		if (dls_devnet_macname2linkid(linkname, &linkid) != 0) {
+			IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: could not "
+			    "get linkid from linkname\n");
+			return (NULL);
+		}
+	}
+
+	for (i = 0; i < ibds->ibcm_arp_ibd_cnt; i++) {
+		if (ibds->ibcm_arp_ip[i].ip_linkid == linkid)
+			return (&ibds->ibcm_arp_ip[i]);
+	}
+
+	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: returning NULL\n");
+	return (NULL);
+}
+
 /*
  * Fill in `ibds' with IP addresses tied to IFT_IB IP interfaces.  Returns
  * B_TRUE if at least one address was filled in.
@@ -352,12 +375,13 @@
 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibd_ipaddr: Family %d, nifs %d",
 	    family_loc, nifs);
 
-	for (lifrp = lifc.lifc_req, i = 0;
-	    i < nifs && naddr < ibds->ibcm_arp_ibd_cnt; i++, lifrp++) {
+	for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) {
 		if (lifrp->lifr_type != IFT_IB)
 			continue;
 
-		ipp = &ibds->ibcm_arp_ip[naddr];
+		if ((ipp = ibcm_arp_lookup(ibds, lifrp->lifr_name)) == NULL)
+			continue;
+
 		switch (lifrp->lifr_addr.ss_family) {
 		case AF_INET:
 			ipp->ip_inet_family = AF_INET;
@@ -399,6 +423,7 @@
 	if (!ibcm_arp_get_ibd_ipaddr(ibdp, family_loc)) {
 		IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_ibds: failed to get "
 		    "ibd instance: IBT_SRC_IP_NOT_FOUND");
+		ibcm_arp_free_ibds(ibdp);
 		return (IBT_SRC_IP_NOT_FOUND);
 	}
 
@@ -407,9 +432,9 @@
 		char    my_buf[INET6_ADDRSTRLEN];
 		ibcm_arp_ip_t	*aip = &ibdp->ibcm_arp_ip[i];
 
-		IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds: ibd[%d]: Family %d "
-		    "Instance %d PKey 0x%lX \n HCAGUID 0x%llX SGID %llX:%llX",
-		    i, aip->ip_inet_family, aip->ip_inst, aip->ip_pkey,
+		IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds: Linkid %d Family %d "
+		    "PKey 0x%lX \n HCAGUID 0x%llX SGID %llX:%llX",
+		    aip->ip_linkid, aip->ip_inet_family, aip->ip_pkey,
 		    aip->ip_hca_guid, aip->ip_port_gid.gid_prefix,
 		    aip->ip_port_gid.gid_guid);
 		if (aip->ip_inet_family == AF_INET) {
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c	Wed Apr 14 10:26:18 2010 -0700
@@ -6357,12 +6357,6 @@
 		return (IBT_INVALID_PARAM);
 	}
 
-	bzero(&ibds, sizeof (ibcm_arp_ibd_insts_t));
-	ibds.ibcm_arp_ibd_alloc = IBCM_ARP_IBD_INSTANCES;
-	ibds.ibcm_arp_ibd_cnt = 0;
-	ibds.ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc(
-	    ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t), KM_SLEEP);
-
 	retval = ibcm_arp_get_ibds(&ibds, AF_UNSPEC);
 	if (retval != IBT_SUCCESS) {
 		IBTF_DPRINTF_L2(cmlog, "ibt_get_src_ip: ibcm_arp_get_ibds "
@@ -6403,10 +6397,7 @@
 	}
 
 get_src_ip_end:
-	if (ibds.ibcm_arp_ip)
-		kmem_free(ibds.ibcm_arp_ip,
-		    ibds.ibcm_arp_ibd_alloc * sizeof (ibcm_arp_ip_t));
-
+	ibcm_arp_free_ibds(&ibds);
 	return (retval);
 }
 
--- a/usr/src/uts/common/io/warlock/ib.wlcmd	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/io/warlock/ib.wlcmd	Wed Apr 14 10:26:18 2010 -0700
@@ -18,10 +18,8 @@
 #
 # CDDL HEADER END
 #
-# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
 
 # ibnexus Callback handlers for IBDM
 add	ibdm.ibdm_ibnex_callback targets	ibnex_dm_callback
@@ -46,6 +44,7 @@
 
 # ibnex reprobe function, called from taskq
 root	ibnex_handle_reprobe_dev
+root	ibnex_handle_hca_attach
 
 # ibnex other functions
 root	ibnex_name_child
--- a/usr/src/uts/common/os/swapgeneric.c	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/os/swapgeneric.c	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
  */
 /* ONC_PLUS EXTRACT START */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1982, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 /* ONC_PLUS EXTRACT END */
 
@@ -1000,8 +999,8 @@
 	 * InfiniBand.
 	 */
 	if (netboot_over_ib(bootpath) &&
-	    modloadonly("drv", "ibd") == -1) {
-		cmn_err(CE_CONT, "ibd: cannot load platform driver\n");
+	    modloadonly("drv", "ibp") == -1) {
+		cmn_err(CE_CONT, "ibp: cannot load platform driver\n");
 		kmem_free(pathcopy, pathcopy_len);
 		return (NULL);
 	}
--- a/usr/src/uts/common/rpc/ib.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/rpc/ib.h	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 /*
  * Copyright (c) 2007, The Ohio State University. All rights reserved.
@@ -140,8 +139,6 @@
  * ATS relsted defines and structures.
  */
 #define	ATS_AR_DATA_LEN	16
-#define	IBD_NAME	"ibd"
-#define	N_IBD_INSTANCES	4
 
 
 /*
--- a/usr/src/uts/common/sys/Makefile	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/Makefile	Wed Apr 14 10:26:18 2010 -0700
@@ -261,6 +261,7 @@
 	hwconf.h		\
 	ia.h			\
 	iapriocntl.h		\
+	ibpart.h		\
 	id32.h			\
 	idmap.h 		\
 	ieeefp.h		\
--- a/usr/src/uts/common/sys/dld_ioc.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/dld_ioc.h	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_DLD_IOC_H
@@ -59,6 +58,7 @@
 #define	SIMNET_IOC	0x5132
 #define	IPTUN_IOC	0x454A
 #define	BRIDGE_IOC	0xB81D
+#define	IBPART_IOC	0x6171
 
 /* GLDv3 modules use these macros to generate unique ioctl commands */
 #define	DLDIOC(cmdid)		DLD_IOC_CMD(DLD_IOC, (cmdid))
@@ -67,6 +67,7 @@
 #define	SIMNETIOC(cmdid)	DLD_IOC_CMD(SIMNET_IOC, (cmdid))
 #define	IPTUNIOC(cmdid)		DLD_IOC_CMD(IPTUN_IOC, (cmdid))
 #define	BRIDGEIOC(cmdid)	DLD_IOC_CMD(BRIDGE_IOC, (cmdid))
+#define	IBPARTIOC(cmdid)	DLD_IOC_CMD(IBPART_IOC, (cmdid))
 
 #ifdef _KERNEL
 
--- a/usr/src/uts/common/sys/dls_mgmt.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/dls_mgmt.h	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_DLS_MGMT_H
@@ -46,13 +45,14 @@
 	DATALINK_CLASS_ETHERSTUB	= 0x10,
 	DATALINK_CLASS_SIMNET		= 0x20,
 	DATALINK_CLASS_BRIDGE		= 0x40,
-	DATALINK_CLASS_IPTUN		= 0x60
+	DATALINK_CLASS_IPTUN		= 0x60,
+	DATALINK_CLASS_PART		= 0x100
 } datalink_class_t;
 
 #define	DATALINK_CLASS_ALL	(DATALINK_CLASS_PHYS |	\
 	DATALINK_CLASS_VLAN | DATALINK_CLASS_AGGR | DATALINK_CLASS_VNIC | \
 	DATALINK_CLASS_ETHERSTUB | DATALINK_CLASS_SIMNET | \
-	DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN)
+	DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN | DATALINK_CLASS_PART)
 
 /*
  * A combination of flags and media.
--- a/usr/src/uts/common/sys/ib/clients/ibd/ibd.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/ib/clients/ibd/ibd.h	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_IB_CLIENTS_IBD_H
@@ -82,6 +81,61 @@
 #define	IBD_SEND			0
 #define	IBD_RECV			1
 
+/* Tunables defaults and limits */
+#define	IBD_LINK_MODE_UD		0
+#define	IBD_LINK_MODE_RC		1
+
+#define	IBD_DEF_LINK_MODE		IBD_LINK_MODE_RC
+#define	IBD_DEF_LSO_POLICY		B_TRUE
+#define	IBD_DEF_NUM_LSO_BUFS		1024
+#define	IBD_DEF_CREATE_BCAST_GROUP	B_TRUE
+#define	IBD_DEF_COALESCE_COMPLETIONS	B_TRUE
+#define	IBD_DEF_UD_RX_COMP_COUNT	4
+#define	IBD_DEF_UD_RX_COMP_USEC		10
+#define	IBD_DEF_UD_TX_COMP_COUNT	16
+#define	IBD_DEF_UD_TX_COMP_USEC		300
+#define	IBD_DEF_RC_RX_COMP_COUNT	4
+#define	IBD_DEF_RC_RX_COMP_USEC		10
+#define	IBD_DEF_RC_TX_COMP_COUNT	10
+#define	IBD_DEF_RC_TX_COMP_USEC		300
+#define	IBD_DEF_UD_TX_COPY_THRESH	4096
+#define	IBD_DEF_RC_RX_COPY_THRESH	4096
+#define	IBD_DEF_RC_TX_COPY_THRESH	4096
+#define	IBD_DEF_UD_NUM_RWQE		4000
+#define	IBD_DEF_UD_NUM_SWQE		4000
+#define	IBD_DEF_RC_ENABLE_SRQ		B_TRUE
+#define	IBD_DEF_RC_NUM_RWQE		2047
+#define	IBD_DEF_RC_NUM_SWQE		511
+#define	IBD_DEF_NUM_AH			256
+#define	IBD_DEF_HASH_SIZE		32
+#define	IBD_DEF_RC_NUM_SRQ		(IBD_DEF_RC_NUM_RWQE - 1)
+#define	IBD_DEF_RC_RX_RWQE_THRESH	(IBD_DEF_RC_NUM_RWQE >> 2)
+
+/* Tunable limits */
+#define	IBD_MIN_NUM_LSO_BUFS		512
+#define	IBD_MAX_NUM_LSO_BUFS		4096
+#define	IBD_MIN_UD_TX_COPY_THRESH	2048
+#define	IBD_MAX_UD_TX_COPY_THRESH	65536
+#define	IBD_MIN_UD_NUM_SWQE		512
+#define	IBD_MAX_UD_NUM_SWQE		8000
+#define	IBD_MIN_UD_NUM_RWQE		512
+#define	IBD_MAX_UD_NUM_RWQE		8000
+#define	IBD_MIN_NUM_AH			32
+#define	IBD_MAX_NUM_AH			8192
+#define	IBD_MIN_HASH_SIZE		32
+#define	IBD_MAX_HASH_SIZE		1024
+
+#define	IBD_MIN_RC_NUM_SWQE		511
+#define	IBD_MAX_RC_NUM_SWQE		8000
+#define	IBD_MIN_RC_NUM_RWQE		511
+#define	IBD_MAX_RC_NUM_RWQE		8000
+#define	IBD_MIN_RC_RX_COPY_THRESH	1500
+#define	IBD_MAX_RC_RX_COPY_THRESH	65520
+#define	IBD_MIN_RC_TX_COPY_THRESH	1500
+#define	IBD_MAX_RC_TX_COPY_THRESH	65520
+#define	IBD_MIN_RC_NUM_SRQ		(IBD_MIN_RC_NUM_RWQE - 1)
+#define	IBD_MIN_RC_RX_RWQE_THRESH	(IBD_MIN_RC_NUM_RWQE >> 2)
+
 /*
  * Thresholds
  *
@@ -511,7 +565,7 @@
 	kstat_named_t		rc_rwqe_short;	/* short rwqe */
 
 	kstat_named_t		rc_xmt_bytes;
-	/* pkt size <= ibd_rc_tx_copy_thresh */
+	/* pkt size <= state->id_rc_tx_copy_thresh */
 	kstat_named_t		rc_xmt_small_pkt;
 	kstat_named_t		rc_xmt_fragmented_pkt;
 	/* fail in ibt_map_mem_iov() */
@@ -652,6 +706,9 @@
 	uint_t		bkt_nfree;
 } ibd_lsobkt_t;
 
+#define	IBD_PORT_DRIVER		0x1
+#define	IBD_PARTITION_OBJ	0x2
+
 /*
  * Posting to a single software rx post queue is contentious,
  * so break it out to (multiple) an array of queues.
@@ -673,6 +730,7 @@
  * (per network interface).
  */
 typedef struct ibd_state_s {
+	uint_t			id_type;
 	dev_info_t		*id_dip;
 	ibt_clnt_hdl_t		id_ibt_hdl;
 	ibt_hca_hdl_t		id_hca_hdl;
@@ -720,7 +778,13 @@
 	ibt_mr_hdl_t		id_rx_mr_hdl;
 	ibt_mr_desc_t		id_rx_mr_desc;
 	uint_t			id_rx_buf_sz;
-	uint32_t		id_num_rwqe;
+	/*
+	 * id_ud_num_rwqe
+	 * Number of "receive WQE" elements that will be allocated and used
+	 * by ibd. This parameter is limited by the maximum channel size of
+	 * the HCA. Each buffer in the receive wqe will be of MTU size.
+	 */
+	uint32_t		id_ud_num_rwqe;
 	ibd_list_t		id_rx_list;
 	ddi_softintr_t		id_rx;
 	uint32_t		id_rx_bufs_outstanding_limit;
@@ -789,7 +853,16 @@
 
 	uint64_t		id_num_intrs;
 	uint64_t		id_tx_short;
-	uint32_t		id_num_swqe;
+	/*
+	 * id_ud_num_swqe
+	 * Number of "send WQE" elements that will be allocated and used by
+	 * ibd. When tuning this parameter, the size of pre-allocated, pre-
+	 * mapped copy buffer in each of these send wqes must be taken into
+	 * account. This copy buffer size is determined by the value of
+	 * IBD_TX_BUF_SZ (this is currently set to the same value of
+	 * ibd_tx_copy_thresh, but may be changed independently if needed).
+	 */
+	uint32_t		id_ud_num_swqe;
 
 	uint64_t		id_xmt_bytes;
 	uint64_t		id_rcv_bytes;
@@ -953,6 +1026,112 @@
 #ifdef DEBUG
 	kstat_t 		*rc_ksp;
 #endif
+	ib_guid_t		id_hca_guid;
+	ib_guid_t		id_port_guid;
+	datalink_id_t		id_dlinkid;
+	datalink_id_t		id_plinkid;
+	int			id_port_inst;
+	struct ibd_state_s	*id_next;
+	boolean_t		id_force_create;
+	boolean_t		id_bgroup_present;
+	uint_t			id_hca_max_chan_sz;
+
+	/*
+	 * UD Mode Tunables
+	 *
+	 * id_ud_tx_copy_thresh
+	 * This sets the threshold at which ibd will attempt to do a bcopy
+	 * of the outgoing data into a pre-mapped buffer. IPoIB driver's
+	 * send behavior is restricted by various parameters, so setting of
+	 * this value must be made after careful considerations only. For
+	 * instance, IB HCAs currently impose a relatively small limit
+	 * (when compared to ethernet NICs) on the length of the SGL for
+	 * transmit. On the other hand, the ip stack could send down mp
+	 * chains that are quite long when LSO is enabled.
+	 *
+	 * id_num_lso_bufs
+	 * Number of "larger-than-MTU" copy buffers to use for cases when the
+	 * outgoing mblk chain is too fragmented to be used with
+	 * ibt_map_mem_iov() and too large to be used with regular MTU-sized
+	 * copy buffers. It is not recommended to tune this variable without
+	 * understanding the application environment and/or memory resources.
+	 * The size of each of these lso buffers is determined by the value of
+	 * IBD_LSO_BUFSZ.
+	 *
+	 * id_num_ah
+	 * Number of AH cache entries to allocate
+	 *
+	 * id_hash_size
+	 * Hash table size for the active AH list
+	 *
+	 */
+	uint_t id_ud_tx_copy_thresh;
+	uint_t id_num_lso_bufs;
+	uint_t id_num_ah;
+	uint_t id_hash_size;
+
+	boolean_t id_create_broadcast_group;
+
+	boolean_t id_allow_coalesce_comp_tuning;
+	uint_t id_ud_rx_comp_count;
+	uint_t id_ud_rx_comp_usec;
+	uint_t id_ud_tx_comp_count;
+	uint_t id_ud_tx_comp_usec;
+
+	/* RC Mode Tunables */
+
+	uint_t id_rc_rx_comp_count;
+	uint_t id_rc_rx_comp_usec;
+	uint_t id_rc_tx_comp_count;
+	uint_t id_rc_tx_comp_usec;
+	/*
+	 * id_rc_tx_copy_thresh
+	 * This sets the threshold at which ibd will attempt to do a bcopy
+	 * of the outgoing data into a pre-mapped buffer.
+	 *
+	 * id_rc_rx_copy_thresh
+	 * If (the size of incoming buffer <= id_rc_rx_copy_thresh), ibd
+	 * will attempt to allocate a buffer and do a bcopy of the incoming
+	 * data into the allocated buffer.
+	 *
+	 * id_rc_rx_rwqe_thresh
+	 * If (the number of available rwqe < ibd_rc_rx_rwqe_thresh), ibd
+	 * will attempt to allocate a buffer and do a bcopy of the incoming
+	 * data into the allocated buffer.
+	 *
+	 * id_rc_num_swqe
+	 * 1) Send CQ size = ibd_rc_num_swqe
+	 * 2) The send queue size = ibd_rc_num_swqe -1
+	 * 3) Number of pre-allocated Tx buffers for ibt_post_send() =
+	 * ibd_rc_num_swqe - 1.
+	 *
+	 * id_rc_num_rwqe
+	 * 1) For non-SRQ, we pre-post ibd_rc_num_rwqe number of WRs
+	 * via ibt_post_receive() for receive queue of each RC channel.
+	 * 2) For SRQ and non-SRQ, receive CQ size = ibd_rc_num_rwqe
+	 *
+	 * For SRQ
+	 * If using SRQ, we allocate ibd_rc_num_srq number of buffers (the
+	 * size of each buffer is equal to RC mtu). And post them by
+	 * ibt_post_srq().
+	 *
+	 * id_rc_num_srq
+	 * ibd_rc_num_srq should not be larger than ibd_rc_num_rwqe,
+	 * otherwise it will cause a bug with the following warnings:
+	 * NOTICE: hermon0: Device Error: EQE cq overrun or protection error
+	 * NOTICE: hermon0: Device Error: EQE local work queue catastrophic
+	 * error
+	 * NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff
+	 * catastrophic channel error
+	 * NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff
+	 * completion queue error
+	 */
+	uint_t id_rc_tx_copy_thresh;
+	uint_t id_rc_rx_copy_thresh;
+	uint_t id_rc_rx_rwqe_thresh;
+	uint_t id_rc_num_swqe;
+	uint_t id_rc_num_rwqe;
+	uint_t id_rc_num_srq;
 } ibd_state_t;
 
 /*
--- a/usr/src/uts/common/sys/ib/ibnex/ibnex.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/ib/ibnex/ibnex.h	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_IB_IBNEX_IBNEX_H
@@ -86,13 +85,16 @@
  * Any changes to these need to be reflected in that file as well.
  */
 typedef enum {
-	IBNEX_PORT_COMMSVC_NODE,
-	IBNEX_VPPA_COMMSVC_NODE,
-	IBNEX_HCASVC_COMMSVC_NODE,
-	IBNEX_IOC_NODE,
-	IBNEX_PSEUDO_NODE
+	IBNEX_PORT_COMMSVC_NODE		= 0,
+	IBNEX_VPPA_COMMSVC_NODE		= 1,
+	IBNEX_HCASVC_COMMSVC_NODE	= 2,
+	IBNEX_IOC_NODE			= 4,
+	IBNEX_PSEUDO_NODE		= 8
 } ibnex_node_type_t;
 
+#define	IBNEX_HCA_CHILD_NODE (IBNEX_PORT_COMMSVC_NODE |	\
+	    IBNEX_VPPA_COMMSVC_NODE | IBNEX_HCASVC_COMMSVC_NODE)
+
 
 /*
  * Defines for Child device node state:
@@ -223,6 +225,8 @@
 	kcondvar_t		ibnex_ioc_list_cv;
 	uint32_t		ibnex_ioc_list_state;
 	ibdm_ioc_info_t		*ibnex_ioc_list;
+
+	ddi_taskq_t		*ibnex_taskq_id;
 } ibnex_t;
 
 /*
@@ -284,6 +288,10 @@
 #define	IBNEX_HW_NOT_IN_DEVTREE		0
 #define	IBNEX_HW_IN_DEVTREE		1
 
+/*
+ * Function prototype declarations
+ */
+
 #ifdef __cplusplus
 }
 #endif
--- a/usr/src/uts/common/sys/ib/ibtl/ibti_common.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/ib/ibtl/ibti_common.h	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_IB_IBTL_IBTI_COMMON_H
@@ -32,6 +31,9 @@
  * This file contains the shared/common transport data types and function
  * prototypes.
  */
+#include <sys/types.h>
+#include <sys/ib/ib_types.h>
+#include <sys/ib/ibtl/ibtl_status.h>
 #include <sys/ib/ibtl/ibtl_types.h>
 #include <sys/ib/ibtl/ibti_cm.h>
 #include <sys/isa_defs.h>
@@ -1910,6 +1912,28 @@
 
 ibt_status_t ibt_free_io_mem(ibt_hca_hdl_t, ibt_mem_alloc_hdl_t);
 
+/*
+ * Interfaces to get IB partition information.
+ */
+
+typedef struct ibt_part_attr_s {
+	datalink_id_t	pa_dlinkid;
+	datalink_id_t	pa_plinkid;
+	uint8_t		pa_port;
+	ib_guid_t	pa_hca_guid;
+	ib_guid_t	pa_port_guid;
+	ib_pkey_t	pa_pkey;
+} ibt_part_attr_t;
+
+void ibt_register_part_attr_cb(
+    ibt_status_t (*)(datalink_id_t, ibt_part_attr_t *),
+    ibt_status_t (*)(ibt_part_attr_t **, int *));
+void ibt_unregister_part_attr_cb(void);
+
+ibt_status_t ibt_get_part_attr(datalink_id_t, ibt_part_attr_t *);
+ibt_status_t ibt_get_all_part_attr(ibt_part_attr_t **, int *);
+ibt_status_t ibt_free_part_attr(ibt_part_attr_t *, int);
+
 #ifdef __cplusplus
 }
 #endif
--- a/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_IB_IBTL_IBTL_STATUS_H
@@ -124,6 +123,7 @@
 						/* records was returned. */
 	IBT_DEST_IP_GID_NOT_FOUND	= 25,	/* No IP to GID Mapping */
 	IBT_SRC_IP_NOT_FOUND		= 26,	/* SRC IP Endpoint not found */
+	IBT_NO_SUCH_OBJECT		= 27,	/* No such object */
 
 	/*
 	 * Resource Errors
--- a/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_arp.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_arp.h	Wed Apr 14 10:26:18 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_IB_MGT_IBCM_IBCM_ARP_H
@@ -79,10 +78,8 @@
 	ibcm_arp_prwqn_t	*wqnp;
 } ibcm_arp_streams_t;
 
-#define	IBCM_ARP_IBD_INSTANCES		4
-
 typedef struct ibcm_arp_ip_s {
-	uint8_t		ip_inst;
+	datalink_id_t	ip_linkid;
 	ib_pkey_t	ip_pkey;
 	ib_guid_t	ip_hca_guid;
 	ib_gid_t	ip_port_gid;
@@ -105,6 +102,7 @@
     ibt_ip_addr_t destip, ib_gid_t *sgid, ib_gid_t *dgid,
     ibt_ip_addr_t *saddr_p);
 ibt_status_t ibcm_arp_get_ibds(ibcm_arp_ibd_insts_t *ibdp, sa_family_t fam);
+void ibcm_arp_free_ibds(ibcm_arp_ibd_insts_t *ibds);
 
 #ifdef	__cplusplus
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/ibpart.h	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,102 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	_SYS_IBPART_H
+#define	_SYS_IBPART_H
+
+#include <sys/types.h>
+#include <sys/ib/ib_types.h>
+#include <sys/dld_ioc.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	IBD_CREATE_IBPART	IBPARTIOC(1)
+#define	IBD_DELETE_IBPART	IBPARTIOC(2)
+#define	IBD_INFO_IBPART	IBPARTIOC(3)
+
+#define	IBD_INFO_CMD_IBPART	1
+#define	IBD_INFO_CMD_IBPORT	2
+#define	IBD_INFO_CMD_PKEYTBLSZ	3
+
+typedef enum ibd_part_err_e {
+	IBD_INVALID_PORT_INST = 1,
+	IBD_PORT_IS_DOWN,
+	IBD_PKEY_NOT_PRESENT,
+	IBD_INVALID_PKEY,
+	IBD_PARTITION_EXISTS,
+	IBD_NO_HW_RESOURCE,
+	IBD_INVALID_PKEY_TBL_SIZE
+} ibd_part_err_t;
+/*
+ * NOTE: If you change this structure make sure that alignments are correct
+ * for the proper operation of the ioctl in both the 32 and 64 bit modes.
+ */
+typedef struct ibd_ioctl_s {
+	int		ioc_info_cmd;
+	datalink_id_t	ioc_linkid;
+	int		ioc_port_inst;
+	uint_t		ioc_portnum;
+	ib_guid_t	ioc_hcaguid;
+	ib_guid_t	ioc_portguid;
+	int		ioc_status;
+	uint32_t	align1;
+} ibd_ioctl_t;
+
+/*
+ * NOTE: If you change this structure make sure that alignments are correct
+ * for the proper operation of the ioctl in both the 32 and 64 bit modes.
+ */
+typedef struct ibpart_ioctl_s {
+	ibd_ioctl_t	ibdioc;
+	datalink_id_t	ioc_partid;
+	boolean_t	ioc_force_create;
+	ib_pkey_t	ioc_pkey;
+	uint16_t	align1;
+	uint32_t	align2;
+} ibpart_ioctl_t;
+
+typedef struct ibpart_ioctl_s ibd_create_ioctl_t;
+typedef struct ibpart_ioctl_s ibd_delete_ioctl_t;
+
+typedef struct ibport_ioctl_s {
+	ibd_ioctl_t	ibdioc;
+	uint_t		ioc_pkey_tbl_sz;
+	ib_pkey_t	*ioc_pkeys;
+} ibport_ioctl_t;
+
+#ifdef _SYSCALL32
+typedef struct ibport_ioctl32_s {
+	ibd_ioctl_t	ibdioc;
+	uint_t		ioc_pkey_tbl_sz;
+	caddr32_t	ioc_pkeys;
+} ibport_ioctl32_t;
+#endif
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_IBPART_H */
--- a/usr/src/uts/common/sys/mac.h	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/common/sys/mac.h	Wed Apr 14 10:26:18 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_MAC_H
@@ -205,6 +204,7 @@
 	MAC_PROP_MAX_RX_RINGS_AVAIL,
 	MAC_PROP_MAX_RXHWCLNT_AVAIL,
 	MAC_PROP_MAX_TXHWCLNT_AVAIL,
+	MAC_PROP_IB_LINKMODE,
 	MAC_PROP_PRIVATE = -1
 } mac_prop_id_t;
 
--- a/usr/src/uts/intel/Makefile.intel.shared	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/intel/Makefile.intel.shared	Wed Apr 14 10:26:18 2010 -0700
@@ -496,7 +496,7 @@
 #
 #	InfiniBand pseudo drivers
 #
-DRV_KMODS	+= ib ibd rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs
+DRV_KMODS	+= ib ibp rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs
 
 #
 #	LVM modules
--- a/usr/src/uts/intel/ibd/Makefile	Wed Apr 14 10:17:23 2010 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#
-
-#
-#	Path to the base of the uts directory tree (usually /usr/src/uts).
-#
-UTSBASE	= ../..
-
-#
-#	Define the module and object file sets.
-#
-MODULE		= ibd
-OBJECTS		= $(IBD_OBJS:%=$(OBJS_DIR)/%)
-LINTS		= $(IBD_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE	= $(ROOT_DRV_DIR)/$(MODULE)
-CONF_SRCDIR	= $(UTSBASE)/common/io/ib/clients/ibd
-LDFLAGS		+= -dy -Nmisc/mac -Nmisc/ibtl -Nmisc/ibcm -Nmisc/ibmf -Ndrv/ip
-WARLOCK_OUT	= $(IBD_OBJS:%.o=%.ll)
-WARLOCK_OK	= $(MODULE).ok
-WLCMD_DIR	= $(UTSBASE)/common/io/warlock
-#
-#	Include common rules.
-#
-include $(UTSBASE)/intel/Makefile.intel
-
-#
-#	Define targets
-#
-ALL_TARGET	= $(BINARY) $(CONFMOD)
-LINT_TARGET	= $(MODULE).lint
-INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
-
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW
-
-#
-#	Default build targets.
-#
-.KEEP_STATE:
-
-def:		$(DEF_DEPS)
-
-all:		$(ALL_DEPS)
-
-clean:		$(CLEAN_DEPS)
-		$(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
-
-clobber:	$(CLOBBER_DEPS)
-		$(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
-
-lint:		$(LINT_DEPS)
-
-modlintlib:	$(MODLINTLIB_DEPS)
-
-clean.lint:	$(CLEAN_LINT_DEPS)
-
-install:	$(INSTALL_DEPS)
-
-#
-#	Include common targets.
-#
-include $(UTSBASE)/intel/Makefile.targ
-
-#
-#       Defines for local commands.
-#
-WARLOCK         = warlock
-WLCC            = wlcc
-TOUCH           = touch
-TEST            = test
-
-warlock: $(WARLOCK_OK)
-
-$(WARLOCK_OK): $(WARLOCK_OUT) $(WLCMD_DIR)/ibd.wlcmd warlock_ddi.files
-	$(WARLOCK) -c $(WLCMD_DIR)/ibd.wlcmd $(WARLOCK_OUT) \
-		-l ../warlock/ddi_dki_impl.ll
-	$(TOUCH) $@
-
-%.ll: $(UTSBASE)/common/io/ib/clients/ibd/%.c \
-	$(UTSBASE)/common/sys/ib/clients/ibd/ibd.h
-	$(WLCC)  $(CPPFLAGS) -DDEBUG -o $@ $<
-
-warlock_ddi.files:
-	@cd ../warlock; pwd; $(MAKE) warlock
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/intel/ibp/Makefile	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,112 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= ibp
+OBJECTS		= $(IBD_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(IBD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR	= $(UTSBASE)/common/io/ib/clients/ibd
+LDFLAGS		+= -dy -Nmisc/mac -Nmisc/ibtl -Nmisc/ibcm -Nmisc/ibmf -Ndrv/ip \
+		-Nmisc/dls -Nmisc/dld
+WARLOCK_OUT	= $(IBD_OBJS:%.o=%.ll)
+WARLOCK_OK	= $(MODULE).ok
+WLCMD_DIR	= $(UTSBASE)/common/io/warlock
+#
+#	Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY) $(CONFMOD)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+		$(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
+
+clobber:	$(CLOBBER_DEPS)
+		$(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+#       Defines for local commands.
+#
+WARLOCK         = warlock
+WLCC            = wlcc
+TOUCH           = touch
+TEST            = test
+
+warlock: $(WARLOCK_OK)
+
+$(WARLOCK_OK): $(WARLOCK_OUT) $(WLCMD_DIR)/ibd.wlcmd warlock_ddi.files
+	$(WARLOCK) -c $(WLCMD_DIR)/ibd.wlcmd $(WARLOCK_OUT) \
+		-l ../warlock/ddi_dki_impl.ll
+	$(TOUCH) $@
+
+%.ll: $(UTSBASE)/common/io/ib/clients/ibd/%.c \
+	$(UTSBASE)/common/sys/ib/clients/ibd/ibd.h
+	$(WLCC)  $(CPPFLAGS) -DDEBUG -o $@ $<
+
+warlock_ddi.files:
+	@cd ../warlock; pwd; $(MAKE) warlock
--- a/usr/src/uts/sparc/Makefile.sparc.shared	Wed Apr 14 10:17:23 2010 -0700
+++ b/usr/src/uts/sparc/Makefile.sparc.shared	Wed Apr 14 10:26:18 2010 -0700
@@ -281,7 +281,7 @@
 DRV_KMODS	+= usbecm
 DRV_KMODS	+= hci1394 av1394 scsa1394 dcam1394
 DRV_KMODS	+= sbp2
-DRV_KMODS	+= ib ibd rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs
+DRV_KMODS	+= ib ibp rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs
 DRV_KMODS	+= pci_pci pcieb pcieb_bcm
 DRV_KMODS	+= i8042 kb8042 mouse8042
 DRV_KMODS	+= fcode
--- a/usr/src/uts/sparc/ibd/Makefile	Wed Apr 14 10:17:23 2010 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#
-
-#
-#	Path to the base of the uts directory tree (usually /usr/src/uts).
-#
-UTSBASE	= ../..
-
-#
-#	Define the module and object file sets.
-#
-MODULE		= ibd
-OBJECTS		= $(IBD_OBJS:%=$(OBJS_DIR)/%)
-LINTS		= $(IBD_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE	= $(ROOT_DRV_DIR)/$(MODULE)
-CONF_SRCDIR	= $(UTSBASE)/common/io/ib/clients/ibd
-LDFLAGS		+= -dy -Nmisc/mac -Nmisc/ibtl -Nmisc/ibcm -Nmisc/ibmf -Ndrv/ip
-WARLOCK_OUT     = $(IBD_OBJS:%.o=%.ll)
-WARLOCK_OK	= $(MODULE).ok
-WLCMD_DIR	= $(UTSBASE)/common/io/warlock
-#
-#	Include common rules.
-#
-include $(UTSBASE)/sparc/Makefile.sparc
-
-#
-#	Define targets
-#
-ALL_TARGET	= $(BINARY) $(SRC_CONFFILE)
-LINT_TARGET	= $(MODULE).lint
-INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
-
-#
-#	Overrides
-#
-ALL_BUILDS	= $(ALL_BUILDSONLY64)
-DEF_BUILDS	= $(DEF_BUILDSONLY64)
-CLEANLINTFILES	+= $(LINT32_FILES)
-
-# 
-# lint pass one enforcement 
-# 
-CFLAGS += $(CCVERBOSE)
-
-#
-# For now, disable these lint checks; maintainers should endeavor
-# to investigate and remove these for maximum lint coverage.
-# Please do not carry these forward to new Makefiles.
-#
-LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
-LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW
-
-#
-#	Default build targets.
-#
-.KEEP_STATE:
-
-def:		$(DEF_DEPS)
-
-all:		$(ALL_DEPS)
-
-clean:		$(CLEAN_DEPS)
-		$(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
-
-clobber:	$(CLOBBER_DEPS)
-		$(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
-
-lint:		$(LINT_DEPS)
-
-modlintlib:	$(MODLINTLIB_DEPS) lint32
-
-clean.lint:	$(CLEAN_LINT_DEPS)
-
-install:	$(INSTALL_DEPS)
-
-#
-#	Include common targets.
-#
-include $(UTSBASE)/sparc/Makefile.targ
-
-#
-#       Defines for local commands.
-#
-WARLOCK         = warlock
-WLCC            = wlcc
-TOUCH           = touch
-TEST            = test
-
-warlock: $(WARLOCK_OK) $(WARLOCK_OUT)
-
-$(WARLOCK_OK): $(WARLOCK_OUT) $(WLCMD_DIR)/ibd.wlcmd warlock_ddi.files
-	$(WARLOCK) -c $(WLCMD_DIR)/ibd.wlcmd $(WARLOCK_OUT) \
-		-l ../warlock/ddi_dki_impl.ll
-	$(TOUCH) $@
-
-%.ll: $(UTSBASE)/common/io/ib/clients/ibd/%.c \
-	$(UTSBASE)/common/sys/ib/clients/ibd/ibd.h
-	$(WLCC)  $(CPPFLAGS) -DDEBUG -o $@ $<
-
-warlock_ddi.files:
-	@cd ../warlock; pwd; $(MAKE) warlock
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/sparc/ibp/Makefile	Wed Apr 14 10:26:18 2010 -0700
@@ -0,0 +1,124 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+#
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= ibp
+OBJECTS		= $(IBD_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(IBD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR	= $(UTSBASE)/common/io/ib/clients/ibd
+LDFLAGS		+= -dy -Nmisc/mac -Nmisc/ibtl -Nmisc/ibcm -Nmisc/ibmf -Ndrv/ip \
+		-Nmisc/dls -Nmisc/dld
+WARLOCK_OUT     = $(IBD_OBJS:%.o=%.ll)
+WARLOCK_OK	= $(MODULE).ok
+WLCMD_DIR	= $(UTSBASE)/common/io/warlock
+#
+#	Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+#	Overrides
+#
+ALL_BUILDS	= $(ALL_BUILDSONLY64)
+DEF_BUILDS	= $(DEF_BUILDSONLY64)
+CLEANLINTFILES	+= $(LINT32_FILES)
+
+# 
+# lint pass one enforcement 
+# 
+CFLAGS += $(CCVERBOSE)
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+		$(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
+
+clobber:	$(CLOBBER_DEPS)
+		$(RM) $(WARLOCK_OUT) $(WARLOCK_OK)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS) lint32
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
+
+#
+#       Defines for local commands.
+#
+WARLOCK         = warlock
+WLCC            = wlcc
+TOUCH           = touch
+TEST            = test
+
+warlock: $(WARLOCK_OK) $(WARLOCK_OUT)
+
+$(WARLOCK_OK): $(WARLOCK_OUT) $(WLCMD_DIR)/ibd.wlcmd warlock_ddi.files
+	$(WARLOCK) -c $(WLCMD_DIR)/ibd.wlcmd $(WARLOCK_OUT) \
+		-l ../warlock/ddi_dki_impl.ll
+	$(TOUCH) $@
+
+%.ll: $(UTSBASE)/common/io/ib/clients/ibd/%.c \
+	$(UTSBASE)/common/sys/ib/clients/ibd/ibd.h
+	$(WLCC)  $(CPPFLAGS) -DDEBUG -o $@ $<
+
+warlock_ddi.files:
+	@cd ../warlock; pwd; $(MAKE) warlock