PSARC 2005/583 VHCI Driven Device Enumeration
authorramat
Thu, 10 Nov 2005 07:14:29 -0800
changeset 878 964ddd439490
parent 877 dacbad95c80a
child 879 9f64559fd01d
PSARC 2005/583 VHCI Driven Device Enumeration 4938301 scsi_vhci does not enumerate devices on-demand (BUS_CONFIG_ONE)
usr/src/cmd/boot/bootadm/filelist.ramdisk
usr/src/cmd/devfsadm/devfsadm.c
usr/src/cmd/truss/print.c
usr/src/uts/common/io/devinfo.c
usr/src/uts/common/os/devctl.c
usr/src/uts/common/os/modctl.c
usr/src/uts/common/os/sunmdi.c
usr/src/uts/common/sys/ddi_implfuncs.h
usr/src/uts/common/sys/devinfo_impl.h
usr/src/uts/common/sys/mdi_impldefs.h
usr/src/uts/common/sys/modctl.h
usr/src/uts/common/sys/sunmdi.h
--- a/usr/src/cmd/boot/bootadm/filelist.ramdisk	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/cmd/boot/bootadm/filelist.ramdisk	Thu Nov 10 07:14:29 2005 -0800
@@ -8,6 +8,8 @@
 etc/path_to_inst
 etc/mach
 etc/devices/devid_cache
+etc/devices/mdi_scsi_vhci_cache
+etc/devices/mdi_ib_cache
 kernel
 platform/i86pc/biosint
 platform/i86pc/kernel
--- a/usr/src/cmd/devfsadm/devfsadm.c	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/cmd/devfsadm/devfsadm.c	Thu Nov 10 07:14:29 2005 -0800
@@ -992,6 +992,13 @@
 		} else {
 			vprint(CHATTY_MID, "%sattaching all drivers\n", fcn);
 			flags |= DINFOFORCE;
+			if (cleanup) {
+				/*
+				 * remove dangling entries from /etc/devices
+				 * files.
+				 */
+				flags |= DINFOCLEANUP;
+			}
 		}
 	}
 
@@ -1012,13 +1019,6 @@
 	/* handle post-cleanup operations desired by the modules. */
 	pre_and_post_cleanup(RM_POST);
 
-	/*
-	 * Remove dangling entries from /etc/devices/devid_cache
-	 * if we forceloaded the entire device tree.
-	 */
-	if (cleanup && (flags & DINFOFORCE) == 0)
-		(void) modctl(MODCLEANUP, NULL, 0, NULL);
-
 	unlock_dev(SYNC_STATE);
 }
 
--- a/usr/src/cmd/truss/print.c	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/cmd/truss/print.c	Thu Nov 10 07:14:29 2005 -0800
@@ -1093,7 +1093,6 @@
 		case MODALLOCPRIV:	s = "MODALLOCPRIV";	break;
 		case MODGETDEVPOLICYBYNAME:
 					s = "MODGETDEVPOLICYBYNAME"; break;
-		case MODCLEANUP:	s = "MODCLEANUP"; break;
 		case MODLOADMINORPERM:	s = "MODLOADMINORPERM"; break;
 		case MODADDMINORPERM:	s = "MODADDMINORPERM"; break;
 		case MODREMMINORPERM:	s = "MODREMMINORPERM"; break;
--- a/usr/src/uts/common/io/devinfo.c	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/uts/common/io/devinfo.c	Thu Nov 10 07:14:29 2005 -0800
@@ -259,6 +259,14 @@
 #define	DI_CACHE_UNLOCK(c)	(mutex_exit(&(c).cache_lock))
 #define	DI_CACHE_LOCKED(c)	(mutex_owned(&(c).cache_lock))
 
+/*
+ * Check that whole device tree is being configured as a pre-condition for
+ * cleaning up /etc/devices files.
+ */
+#define	DEVICES_FILES_CLEANABLE(st)	\
+	(((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \
+	strcmp(DI_ALL_PTR(st)->root_path, "/") == 0)
+
 #define	CACHE_DEBUG(args)	\
 	{ if (di_cache_debug != DI_QUIET) di_cache_print args; }
 
@@ -270,7 +278,7 @@
 static int di_detach(dev_info_t *, ddi_detach_cmd_t);
 
 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int);
-static di_off_t di_snapshot(struct di_state *);
+static di_off_t di_snapshot_and_clean(struct di_state *);
 static di_off_t di_copydevnm(di_off_t *, struct di_state *);
 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *);
 static di_off_t di_copynode(struct di_stack *, struct di_state *);
@@ -726,6 +734,12 @@
 		return (EFAULT);
 	}
 
+	if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) {
+		di_freemem(st);
+		(void) di_setstate(st, IOC_IDLE);
+		return (EINVAL);
+	}
+
 	error = 0;
 	if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) {
 		di_freemem(st);
@@ -792,11 +806,8 @@
 		DI_CACHE_LOCK(di_cache);
 		*rvalp = di_cache_update(st);
 		DI_CACHE_UNLOCK(di_cache);
-	} else {
-		modunload_disable();
-		*rvalp = di_snapshot(st);
-		modunload_enable();
-	}
+	} else
+		*rvalp = di_snapshot_and_clean(st);
 
 	if (*rvalp) {
 		DI_ALL_PTR(st)->map_size = *rvalp;
@@ -1351,6 +1362,32 @@
 }
 
 /*
+ * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set
+ */
+static di_off_t
+di_snapshot_and_clean(struct di_state *st)
+{
+	di_off_t off;
+
+	modunload_disable();
+	off = di_snapshot(st);
+	if (off != 0 && (st->command & DINFOCLEANUP)) {
+		ASSERT(DEVICES_FILES_CLEANABLE(st));
+		/*
+		 * Cleanup /etc/devices files:
+		 * In order to accurately account for the system configuration
+		 * in /etc/devices files, the appropriate drivers must be
+		 * fully configured before the cleanup starts.
+		 * So enable modunload only after the cleanup.
+		 */
+		i_ddi_clean_devices_files();
+	}
+	modunload_enable();
+
+	return (off);
+}
+
+/*
  * Assumes all devinfo nodes in device tree have been snapshotted
  */
 static void
@@ -3649,7 +3686,8 @@
 	ASSERT(st->mem_size > 0);
 	ASSERT(st->memlist != NULL);
 
-	if (st->command != (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) {
+	if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) !=
+	    (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) {
 		CACHE_DEBUG((DI_INFO,
 		    "not cacheable: incompatible flags: 0x%x",
 		    st->command));
@@ -3774,9 +3812,7 @@
 	 */
 	atomic_or_32(&di_cache.cache_valid, 1);
 
-	modunload_disable();
-	rval = di_snapshot(st);
-	modunload_enable();
+	rval = di_snapshot_and_clean(st);
 
 	if (rval == 0) {
 		CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot"));
--- a/usr/src/uts/common/os/devctl.c	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/uts/common/os/devctl.c	Thu Nov 10 07:14:29 2005 -0800
@@ -90,6 +90,9 @@
 extern int modrootloaded;
 extern struct bootops *bootops;
 
+extern void mdi_read_devices_files(void);
+extern void mdi_clean_vhcache(void);
+
 #ifdef	DEBUG
 int nvp_devid_debug = 0;
 int nvpdaemon_debug = 0;
@@ -140,7 +143,7 @@
 	return (cksum);
 }
 
-static int
+int
 fread_nvlist(char *filename, nvlist_t **ret_nvlist)
 {
 	struct _buf	*file;
@@ -435,26 +438,25 @@
 	return (rval);
 }
 
-static int
-fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl)
+int
+fwrite_nvlist(char *filename, nvlist_t *nvl)
 {
 	char	*buf;
 	char	*nvbuf;
 	kfile_t	*fp;
 	char	*newname;
-	int	len, err;
-	int	rval;
+	int	len, err, err1;
 	size_t	buflen;
 	ssize_t	n;
 
 	ASSERT(modrootloaded);
 
 	nvbuf = NULL;
-	rval = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0);
-	if (rval != 0) {
+	err = nvlist_pack(nvl, &nvbuf, &buflen, NV_ENCODE_NATIVE, 0);
+	if (err != 0) {
 		KFIOERR((CE_CONT, "%s: error %d packing nvlist\n",
-			nvfd->nvf_name, rval));
-		return (DDI_FAILURE);
+			filename, err));
+		return (err);
 	}
 
 	buf = kmem_alloc(sizeof (nvpf_hdr_t) + buflen, KM_SLEEP);
@@ -471,12 +473,12 @@
 	kmem_free(nvbuf, buflen);
 	buflen += sizeof (nvpf_hdr_t);
 
-	len = strlen(nvfd->nvf_name) + MAX_SUFFIX_LEN + 2;
+	len = strlen(filename) + MAX_SUFFIX_LEN + 2;
 	newname = kmem_alloc(len, KM_SLEEP);
 
 
 	(void) sprintf(newname, "%s.%s",
-		nvfd->nvf_name, NEW_FILENAME_SUFFIX);
+		filename, NEW_FILENAME_SUFFIX);
 
 	/*
 	 * To make it unlikely we suffer data loss, write
@@ -485,15 +487,11 @@
 	 * to replace the previous.
 	 */
 
-	rval = DDI_SUCCESS;
 	if ((err = kfcreate(newname, &fp)) == 0) {
 		err = kfwrite(fp, buf, buflen, &n);
 		if (err) {
 			KFIOERR((CE_CONT, "%s: write error - %d\n",
 				newname, err));
-			if (err == EROFS)
-				NVF_MARK_READONLY(nvfd);
-			rval = DDI_FAILURE;
 		} else {
 			if (n != buflen) {
 				KFIOERR((CE_CONT,
@@ -501,14 +499,15 @@
 				    newname, n, buflen));
 				KFIOERR((CE_CONT,
 				    "%s: filesystem may be full?\n", newname));
-				rval = DDI_FAILURE;
+				err = EIO;
 			}
 		}
-		if ((err = kfclose(fp)) != 0) {
+		if ((err1 = kfclose(fp)) != 0) {
 			KFIOERR((CE_CONT, "%s: close error\n", newname));
-			rval = DDI_FAILURE;
+			if (err == 0)
+				err = err1;
 		}
-		if (rval != DDI_SUCCESS) {
+		if (err != 0) {
 			if (kfremove(newname) != 0) {
 				KFIOERR((CE_CONT, "%s: remove failed\n",
 				    newname));
@@ -516,26 +515,35 @@
 		}
 	} else {
 		KFIOERR((CE_CONT, "%s: create failed - %d\n",
-			nvfd->nvf_name, err));
-		if (err == EROFS)
-			NVF_MARK_READONLY(nvfd);
-		rval = DDI_FAILURE;
+			filename, err));
 	}
 
-	if (rval == DDI_SUCCESS) {
-		if (kfrename(newname, nvfd->nvf_name) != 0) {
+	if (err == 0) {
+		if ((err = kfrename(newname, filename)) != 0) {
 			KFIOERR((CE_CONT, "%s: rename from %s failed\n",
-				newname, nvfd->nvf_name));
-			rval = DDI_FAILURE;
+				newname, filename));
 		}
 	}
 
 	kmem_free(newname, len);
 	kmem_free(buf, buflen);
 
-	return (rval);
+	return (err);
 }
 
+static int
+e_fwrite_nvlist(nvfd_t *nvfd, nvlist_t *nvl)
+{
+	int err;
+
+	if ((err = fwrite_nvlist(nvfd->nvf_name, nvl)) == 0)
+		return (DDI_SUCCESS);
+	else {
+		if (err == EROFS)
+			NVF_MARK_READONLY(nvfd);
+		return (DDI_FAILURE);
+	}
+}
 
 static void
 nvp_free(nvp_list_t *np)
@@ -834,6 +842,8 @@
 	nvfd_t nvfd;
 	int rval;
 
+	mdi_read_devices_files();
+
 	if (devid_cache_read_disable)
 		return;
 
@@ -1603,7 +1613,7 @@
 	nvfd->nvf_flags |= NVF_FLUSHING;
 	rw_exit(&nvfd->nvf_lock);
 
-	rval = fwrite_nvlist(nvfd, nvl);
+	rval = e_fwrite_nvlist(nvfd, nvl);
 	nvlist_free(nvl);
 
 	rw_enter(&nvfd->nvf_lock, RW_WRITER);
@@ -1705,6 +1715,13 @@
 	}
 }
 
+void
+i_ddi_clean_devices_files(void)
+{
+	e_devid_cache_cleanup();
+	mdi_clean_vhcache();
+}
+
 #ifdef	DEBUG
 static void
 devid_log(char *fmt, ddi_devid_t devid, char *path)
--- a/usr/src/uts/common/os/modctl.c	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/uts/common/os/modctl.c	Thu Nov 10 07:14:29 2005 -0800
@@ -1830,11 +1830,6 @@
 		    (devplcysys_t *)a2, (char *)a3);
 		break;
 
-	case MODCLEANUP:
-		e_devid_cache_cleanup();
-		error = 0;
-		break;
-
 	case MODLOADMINORPERM:
 	case MODADDMINORPERM:
 	case MODREMMINORPERM:
--- a/usr/src/uts/common/os/sunmdi.c	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/uts/common/os/sunmdi.c	Thu Nov 10 07:14:29 2005 -0800
@@ -64,6 +64,7 @@
 #include <sys/taskq.h>
 #include <sys/epm.h>
 #include <sys/sunpm.h>
+#include <sys/modhash.h>
 
 #ifdef	DEBUG
 #include <sys/debug.h>
@@ -107,13 +108,27 @@
 taskq_t				*mdi_taskq;
 static uint_t			mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
 
-static int		mdi_max_bus_config_threads = 100;
-/*
- * To reduce unnecessary BUS_CONFIG_ALLs, do not BUS_CONFIG_ALL phcis in the
- * context of a BUS_CONFIG_ONE if a BUS_CONFIG_ALL has already been performed
- * in the last mdi_bus_config_timeout seconds.
- */
-static int		mdi_bus_config_timeout = 60;	/* in seconds */
+#define	TICKS_PER_SECOND	(drv_usectohz(1000000))
+
+/*
+ * The data should be "quiet" for this interval (in seconds) before the
+ * vhci cached data is flushed to the disk.
+ */
+static int mdi_vhcache_flush_delay = 10;
+
+/* number of seconds the vhcache flush daemon will sleep idle before exiting */
+static int mdi_vhcache_flush_daemon_idle_time = 60;
+
+/*
+ * number of seconds the asynchronous configuration thread will sleep idle
+ * before exiting.
+ */
+static int mdi_async_config_idle_time = 600;
+
+static int mdi_bus_config_cache_hash_size = 256;
+
+/* turns off multithreaded configuration for certain operations */
+static int mdi_mtc_off = 0;
 
 /*
  * MDI component property name/value string definitions
@@ -181,8 +196,7 @@
 static void		i_mdi_phci_get_client_lock(mdi_phci_t *,
 			    mdi_client_t *);
 static void		i_mdi_phci_unlock(mdi_phci_t *);
-static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *,
-			    mdi_client_t *, int);
+static mdi_pathinfo_t	*i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
 static void		i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
 static void		i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
 static void		i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
@@ -195,14 +209,14 @@
 			    mdi_pathinfo_state_t, int);
 static int		i_mdi_pi_offline(mdi_pathinfo_t *, int);
 static dev_info_t	*i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
-			    char **, int, int);
+			    char **, int);
 static dev_info_t	*i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
 static int		i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
 static int		i_mdi_is_child_present(dev_info_t *, dev_info_t *);
-static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *, int);
+static mdi_client_t	*i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
 static void		i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
 static void		i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
-static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *);
+static mdi_client_t	*i_mdi_client_find(mdi_vhci_t *, char *, char *);
 static void		i_mdi_client_update_state(mdi_client_t *);
 static int		i_mdi_client_compute_state(mdi_client_t *,
 			    mdi_phci_t *);
@@ -225,6 +239,33 @@
 static void		i_mdi_report_path_state(mdi_client_t *,
 			    mdi_pathinfo_t *);
 
+static void		setup_vhci_cache(mdi_vhci_t *);
+static int		destroy_vhci_cache(mdi_vhci_t *);
+static void		setup_phci_driver_list(mdi_vhci_t *);
+static void		free_phci_driver_list(mdi_vhci_config_t *);
+static int		stop_vhcache_async_threads(mdi_vhci_config_t *);
+static boolean_t	stop_vhcache_flush_thread(void *, int);
+static void		free_string_array(char **, int);
+static void		free_vhcache_phci(mdi_vhcache_phci_t *);
+static void		free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
+static void		free_vhcache_client(mdi_vhcache_client_t *);
+static int		mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
+static nvlist_t		*vhcache_to_mainnvl(mdi_vhci_cache_t *);
+static void		vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
+static void		vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
+static void		vhcache_pi_add(mdi_vhci_config_t *,
+			    struct mdi_pathinfo *);
+static void		vhcache_pi_remove(mdi_vhci_config_t *,
+			    struct mdi_pathinfo *);
+static void		free_phclient_path_list(mdi_phys_path_t *);
+static void		sort_vhcache_paths(mdi_vhcache_client_t *);
+static int		flush_vhcache(mdi_vhci_config_t *, int);
+static void		vhcache_dirty(mdi_vhci_config_t *);
+static void		free_async_client_config(mdi_async_client_config_t *);
+static nvlist_t		*read_on_disk_vhci_cache(char *);
+extern int		fread_nvlist(char *, nvlist_t **);
+extern int		fwrite_nvlist(char *, nvlist_t *);
+
 /* called once when first vhci registers with mdi */
 static void
 i_mdi_init()
@@ -343,11 +384,7 @@
 		vh->vh_dip = vdip;
 		vh->vh_ops = vops;
 
-		/*
-		 * other members of vh_bus_config are initialized by
-		 * the above kmem_zalloc of the vhci structure.
-		 */
-		cv_init(&vh->vh_bus_config.vhc_cv, NULL, CV_DRIVER, NULL);
+		setup_vhci_cache(vh);
 
 		if (mdi_vhci_head == NULL) {
 			mdi_vhci_head = vh;
@@ -387,7 +424,6 @@
 mdi_vhci_unregister(dev_info_t *vdip, int flags)
 {
 	mdi_vhci_t	*found, *vh, *prev = NULL;
-	mdi_phci_config_t *phc, *next_phc;
 
 	/*
 	 * Check for invalid VHCI
@@ -416,9 +452,7 @@
 	 * should have been unregistered, before a vHCI can be
 	 * unregistered.
 	 */
-	if (vh->vh_phci_count || vh->vh_client_count) {
-		MDI_DEBUG(1, (CE_NOTE, NULL,
-		    "!mdi_vhci_unregister: pHCI in registered state.\n"));
+	if (vh->vh_phci_count || vh->vh_client_count || vh->vh_refcnt) {
 		mutex_exit(&mdi_mutex);
 		return (MDI_FAILURE);
 	}
@@ -435,31 +469,28 @@
 		mdi_vhci_tail = prev;
 	}
 
-	vh->vh_ops = NULL;
 	mdi_vhci_count--;
 	mutex_exit(&mdi_mutex);
+
+	if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
+		/* add vhci to the global list */
+		mutex_enter(&mdi_mutex);
+		if (mdi_vhci_head == NULL)
+			mdi_vhci_head = vh;
+		else
+			mdi_vhci_tail->vh_next = vh;
+		mdi_vhci_tail = vh;
+		mdi_vhci_count++;
+		mutex_exit(&mdi_mutex);
+		return (MDI_FAILURE);
+	}
+
+	vh->vh_ops = NULL;
 	DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
 	DEVI(vdip)->devi_mdi_xhci = NULL;
 	kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
 	kmem_free(vh->vh_client_table,
 	    mdi_client_table_size * sizeof (struct client_hash));
-
-	/*
-	 * there must be no more tasks on the bus config taskq as the vhci
-	 * driver can not be detached while bus config is in progress.
-	 */
-	ASSERT(vh->vh_bus_config.vhc_start_time == 0);
-
-	if (vh->vh_bus_config.vhc_taskq != NULL)
-		taskq_destroy(vh->vh_bus_config.vhc_taskq);
-
-	for (phc = vh->vh_bus_config.vhc_phc; phc != NULL; phc = next_phc) {
-		next_phc = phc->phc_next;
-		kmem_free(phc, sizeof (*phc));
-	}
-
-	cv_destroy(&vh->vh_bus_config.vhc_cv);
-
 	kmem_free(vh, sizeof (mdi_vhci_t));
 	return (MDI_SUCCESS);
 }
@@ -573,6 +604,8 @@
 	DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
 	DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
 
+	vhcache_phci_add(vh->vh_config, ph);
+
 	mutex_enter(&mdi_mutex);
 	if (vh->vh_phci_head == NULL) {
 		vh->vh_phci_head = ph;
@@ -582,8 +615,6 @@
 	}
 	vh->vh_phci_tail = ph;
 	vh->vh_phci_count++;
-	/* to force discovery of all phci children during busconfig */
-	vh->vh_bus_config.vhc_cutoff_time = -1;
 	mutex_exit(&mdi_mutex);
 	return (MDI_SUCCESS);
 }
@@ -645,27 +676,9 @@
 
 	vh->vh_phci_count--;
 
-	/*
-	 * If no busconfig is in progress, release the phci busconfig resources.
-	 * We only need vh->vh_phci_count of busconfig resources.
-	 */
-	if (vh->vh_bus_config.vhc_start_time == 0 &&
-	    vh->vh_bus_config.vhc_phc_cnt > vh->vh_phci_count) {
-		int count;
-
-		count = vh->vh_bus_config.vhc_phc_cnt - vh->vh_phci_count;
-		while (count--) {
-			mdi_phci_config_t *phc;
-
-			phc = vh->vh_bus_config.vhc_phc;
-			vh->vh_bus_config.vhc_phc = phc->phc_next;
-			kmem_free(phc, sizeof (*phc));
-		}
-		vh->vh_bus_config.vhc_phc_cnt = vh->vh_phci_count;
-	}
-
 	mutex_exit(&mdi_mutex);
 
+	vhcache_phci_remove(vh->vh_config, ph);
 	cv_destroy(&ph->ph_unstable_cv);
 	cv_destroy(&ph->ph_powerchange_cv);
 	mutex_destroy(&ph->ph_mutex);
@@ -836,7 +849,7 @@
  */
 static dev_info_t *
 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
-	char **compatible, int ncompatible, int flags)
+	char **compatible, int ncompatible)
 {
 	dev_info_t *cdip = NULL;
 
@@ -851,13 +864,7 @@
 			(void *)cdip);
 	}
 
-	if (flags == DDI_SLEEP) {
-		ndi_devi_alloc_sleep(vh->vh_dip, name,
-		    DEVI_SID_NODEID, &cdip);
-	} else {
-		(void) ndi_devi_alloc(vh->vh_dip, name,
-		    DEVI_SID_NODEID, &cdip);
-	}
+	ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
 	if (cdip == NULL)
 		goto fail;
 
@@ -1058,38 +1065,24 @@
  */
 /*ARGSUSED*/
 static mdi_client_t *
-i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid, int flags)
+i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
 {
 	mdi_client_t	*ct;
-	char		*drvname = NULL;
-	char		*guid = NULL;
-	client_lb_args_t 	*lb_args = NULL;
 
 	ASSERT(MUTEX_HELD(&mdi_mutex));
 
 	/*
 	 * Allocate and initialize a component structure.
 	 */
-	ct = kmem_zalloc(sizeof (*ct),
-	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
-	if (ct == NULL)
-		goto fail;
+	ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
 	mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
 	ct->ct_hnext = NULL;
 	ct->ct_hprev = NULL;
 	ct->ct_dip = NULL;
 	ct->ct_vhci = vh;
-	drvname = kmem_alloc(strlen(name) + 1,
-	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
-	if (drvname == NULL)
-		goto fail;
-	ct->ct_drvname = drvname;
+	ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
 	(void) strcpy(ct->ct_drvname, name);
-	guid = kmem_alloc(strlen(lguid) + 1,
-	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
-	if (guid == NULL)
-		goto fail;
-	ct->ct_guid = guid;
+	ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
 	(void) strcpy(ct->ct_guid, lguid);
 	ct->ct_cprivate = NULL;
 	ct->ct_vprivate = NULL;
@@ -1105,33 +1098,18 @@
 	cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
 	cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
 	ct->ct_lb = vh->vh_lb;
-	lb_args =  kmem_zalloc(sizeof (client_lb_args_t),
-		(flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
-	if (lb_args == NULL)
-		goto fail;
-	ct->ct_lb_args = lb_args;
+	ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
 	ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
 	ct->ct_path_count = 0;
 	ct->ct_path_head = NULL;
 	ct->ct_path_tail = NULL;
 	ct->ct_path_last = NULL;
 
-
 	/*
 	 * Add this client component to our client hash queue
 	 */
 	i_mdi_client_enlist_table(vh, ct);
 	return (ct);
-
-fail:
-	if (guid)
-		kmem_free(guid, strlen(lguid) + 1);
-	if (drvname)
-		kmem_free(drvname, strlen(name) + 1);
-	if (lb_args)
-		kmem_free(lb_args, sizeof (client_lb_args_t));
-	kmem_free(ct, sizeof (*ct));
-	return (NULL);
 }
 
 /*
@@ -1254,7 +1232,7 @@
  *		Caller should hold the mdi_mutex
  */
 static mdi_client_t *
-i_mdi_client_find(mdi_vhci_t *vh, char *guid)
+i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
 {
 	int			index;
 	struct client_hash	*head;
@@ -1266,7 +1244,8 @@
 
 	ct = head->ct_hash_head;
 	while (ct != NULL) {
-		if (strcmp(ct->ct_guid, guid) == 0) {
+		if (strcmp(ct->ct_guid, guid) == 0 &&
+		    (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
 			break;
 		}
 		ct = ct->ct_hnext;
@@ -2373,10 +2352,23 @@
 	}
 
 	/*
+	 * XXX - Is the rest of the code in this function really necessary?
+	 * The consumers of mdi_pi_find() can search for the desired pathinfo
+	 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
+	 * whether the search is based on the pathinfo nodes attached to
+	 * the pHCI or the client node, the result will be the same.
+	 */
+
+	/*
 	 * Find the client device corresponding to 'caddr'
 	 */
 	mutex_enter(&mdi_mutex);
-	ct = i_mdi_client_find(vh, caddr);
+
+	/*
+	 * XXX - Passing NULL to the following function works as long as the
+	 * the client addresses (caddr) are unique per vhci basis.
+	 */
+	ct = i_mdi_client_find(vh, NULL, caddr);
 	if (ct == NULL) {
 		/*
 		 * Client not found, Obviously mdi_pathinfo node has not been
@@ -2444,6 +2436,7 @@
 	mdi_pathinfo_t	*pip = NULL;
 	dev_info_t	*cdip;
 	int		rv = MDI_NOMEM;
+	int		path_allocated = 0;
 
 	if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
 	    ret_pip == NULL) {
@@ -2484,16 +2477,12 @@
 	MDI_PHCI_UNSTABLE(ph);
 	MDI_PHCI_UNLOCK(ph);
 
-	/*
-	 * Look for a client device with matching guid identified by caddr,
-	 * If not found create one
-	 */
+	/* look for a matching client, create one if not found */
 	mutex_enter(&mdi_mutex);
-	ct = i_mdi_client_find(vh, caddr);
+	ct = i_mdi_client_find(vh, cname, caddr);
 	if (ct == NULL) {
-		ct = i_mdi_client_alloc(vh, cname, caddr, flags);
-		if (ct == NULL)
-			goto fail;
+		ct = i_mdi_client_alloc(vh, cname, caddr);
+		ASSERT(ct != NULL);
 	}
 
 	if (ct->ct_dip == NULL) {
@@ -2501,7 +2490,7 @@
 		 * Allocate a devinfo node
 		 */
 		ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
-		    compatible, ncompatible, flags);
+		    compatible, ncompatible);
 		if (ct->ct_dip == NULL) {
 			(void) i_mdi_client_free(vh, ct);
 			goto fail;
@@ -2529,11 +2518,9 @@
 		 * This is a new path for this client device.  Allocate and
 		 * initialize a new pathinfo node
 		 */
-		pip = i_mdi_pi_alloc(ph, paddr, ct, flags);
-		if (pip == NULL) {
-			(void) i_mdi_client_free(vh, ct);
-			goto fail;
-		}
+		pip = i_mdi_pi_alloc(ph, paddr, ct);
+		ASSERT(pip != NULL);
+		path_allocated = 1;
 	}
 	rv = MDI_SUCCESS;
 
@@ -2550,6 +2537,10 @@
 	MDI_PHCI_STABLE(ph);
 	MDI_PHCI_UNLOCK(ph);
 	*ret_pip = pip;
+
+	if (path_allocated)
+		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
+
 	return (rv);
 }
 
@@ -2571,19 +2562,13 @@
 
 /*ARGSUSED*/
 static mdi_pathinfo_t *
-i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct, int flags)
-{
-	mdi_pathinfo_t	*pip = NULL;
-	char		*pi_addr = NULL;
-	nvlist_t	*pi_prop = NULL;
-
+i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
+{
+	mdi_pathinfo_t	*pip;
 	int		ct_circular;
 	int		ph_circular;
 
-	pip = kmem_zalloc(sizeof (struct mdi_pathinfo),
-	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
-	if (pip == NULL)
-		goto fail;
+	pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
 	mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
 	MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
 	    MDI_PATHINFO_STATE_TRANSIENT;
@@ -2601,17 +2586,10 @@
 	cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
 	MDI_PI(pip)->pi_client = ct;
 	MDI_PI(pip)->pi_phci = ph;
-	pi_addr =
-	    MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1,
-	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
-	if (pi_addr == NULL)
-		goto fail;
+	MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
 	(void) strcpy(MDI_PI(pip)->pi_addr, paddr);
-	(void) nvlist_alloc(&pi_prop, NV_UNIQUE_NAME,
-	    (flags == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
-	if (pi_prop == NULL)
-		goto fail;
-	MDI_PI(pip)->pi_prop = pi_prop;
+	(void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
+	ASSERT(MDI_PI(pip)->pi_prop != NULL);
 	MDI_PI(pip)->pi_pprivate = NULL;
 	MDI_PI(pip)->pi_cprivate = NULL;
 	MDI_PI(pip)->pi_vprivate = NULL;
@@ -2635,14 +2613,6 @@
 	ndi_devi_exit(ct->ct_dip, ct_circular);
 
 	return (pip);
-
-fail:
-	if (pi_prop)
-		(void) nvlist_free(pi_prop);
-	if (pi_addr)
-		kmem_free(pi_addr, strlen(paddr) + 1);
-	kmem_free(pip, sizeof (struct mdi_pathinfo));
-	return (NULL);
 }
 
 /*
@@ -2788,6 +2758,8 @@
 	}
 	MDI_PI_UNLOCK(pip);
 
+	vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
+
 	MDI_CLIENT_LOCK(ct);
 
 	/* Prevent further failovers till mdi_mutex is held */
@@ -2833,6 +2805,10 @@
 	}
 	MDI_CLIENT_UNLOCK(ct);
 	mutex_exit(&mdi_mutex);
+
+	if (rv == MDI_FAILURE)
+		vhcache_pi_add(vh->vh_config, MDI_PI(pip));
+
 	return (rv);
 }
 
@@ -6289,390 +6265,6 @@
 	return (MDI_SUCCESS);
 }
 
-/*
- * XXX This list should include all phci drivers needed during boot time
- * though it currently contains "fp" only.
- * Hopefully, the mechanism provided here will be replaced with a better
- * mechanism by vhci driven enumeration project.
- */
-static char *phci_driver_list[] = { "fp" };
-#define	N_PHCI_DRIVERS	(sizeof (phci_driver_list) / sizeof (char *))
-
-static void
-i_mdi_attach_phci_drivers()
-{
-	int  i;
-	major_t m;
-
-	for (i = 0; i < N_PHCI_DRIVERS; i++) {
-		m = ddi_name_to_major(phci_driver_list[i]);
-		if (m != (major_t)-1) {
-			if (ddi_hold_installed_driver(m) != NULL)
-				ddi_rele_driver(m);
-		}
-	}
-}
-
-/* bus config the specified phci */
-static void
-i_mdi_phci_bus_config(void *arg)
-{
-	mdi_phci_config_t *phc = (mdi_phci_config_t *)arg;
-	mdi_vhci_config_t *vhc;
-	dev_info_t	*ph_dip;
-	int		rv;
-
-	ASSERT(phc);
-	vhc = phc->phc_vhc;
-	ASSERT(vhc->vhc_op == BUS_CONFIG_ALL ||
-	    vhc->vhc_op == BUS_CONFIG_DRIVER);
-
-	/*
-	 * Must have already held the phci parent in
-	 * i_mdi_bus_config_all_phcis().
-	 * First configure the phci itself.
-	 */
-	rv = ndi_devi_config_one(phc->phc_parent_dip, phc->phc_devnm + 1,
-	    &ph_dip, vhc->vhc_flags);
-
-	/* release the hold that i_mdi_bus_config_all_phcis() placed */
-	ndi_rele_devi(phc->phc_parent_dip);
-
-	if (rv == NDI_SUCCESS) {
-		/* now bus config the phci */
-		if (vhc->vhc_op == BUS_CONFIG_DRIVER) {
-			(void) ndi_devi_config_driver(ph_dip, vhc->vhc_flags,
-				vhc->vhc_major);
-		} else
-			(void) ndi_devi_config(ph_dip, vhc->vhc_flags);
-
-		/* release the hold that ndi_devi_config_one() placed */
-		ndi_rele_devi(ph_dip);
-	}
-}
-
-/*
- * Bus config all registered phcis associated with the vhci in parallel.
- * This process guarantees that the child nodes are enumerated under the vhci,
- * but not necessarily attached.
- * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
- */
-static int
-i_mdi_bus_config_all_phcis(dev_info_t *vdip, uint_t flags,
-    ddi_bus_config_op_t op, major_t maj, int optimize)
-{
-	mdi_vhci_t		*vh;
-	mdi_phci_t		*ph;
-	mdi_phci_config_t	*phc;
-	int64_t			req_time;
-	int			phci_count, rv;
-	static int		first_time = 1;
-
-	ASSERT(op == BUS_CONFIG_ALL || op == BUS_CONFIG_DRIVER);
-	ASSERT(!DEVI_BUSY_OWNED(vdip));
-
-	MDI_DEBUG(2, (CE_NOTE, vdip,
-	    "!MDI: %s on all phcis: major = %d, flags = 0x%x, optimize = %d\n",
-	    (op == BUS_CONFIG_DRIVER) ? "BUS_CONFIG_DRIVER" : "BUS_CONFIG_ALL",
-	    (int)maj, flags, optimize));
-
-	vh = i_devi_get_vhci(vdip);
-	ASSERT(vh);
-
-	mutex_enter(&mdi_mutex);
-
-	req_time = lbolt64;
-
-	/*
-	 * Reduce unnecessary BUS_CONFIG_ALLs when opening stale
-	 * /dev/[r]dsk links.
-	 */
-	if (optimize && (req_time < vh->vh_bus_config.vhc_cutoff_time)) {
-		mutex_exit(&mdi_mutex);
-		return (MDI_SUCCESS);
-	}
-
-	/*
-	 * To initiate bus configs on all phcis in parallel, create a taskq
-	 * with multiple threads. Since creation of a taskq is a heavy weight
-	 * operation, taskq is created once per vhci and destroyed only when
-	 * vhci unregisters with mdi.
-	 *
-	 * If multiple bus config requests arrive at a time, bus configs on
-	 * phcis are initiated on behalf of one of the requests. Other requests
-	 * wait until the bus configs on phcis is done.
-	 *
-	 * When a BUS_CONFIG_ALL on phcis completes, the following is done
-	 * to avoid more of unnecessary bus configs.
-	 *
-	 *	o all BUS_CONFIG_ALL requests currently waiting with optimize
-	 *	flag set are returned, i.e., no new BUS_CONFIG_ALL is initiated
-	 *	on phcis on behalf of these requests.
-	 *
-	 *	o all BUS_CONFIG_ALL or BUS_CONFIG_DRIVER requests currently
-	 *	waiting but have arrived prior to initiating BUS_CONFIG_ALL on
-	 *	phcis are also returned.
-	 *
-	 * In other cases a new BUS_CONFIG_ALL or BUS_CONFIG_DRIVER is
-	 * initiated on phcis on behalf of a new request.
-	 */
-
-	/* check if a bus config on phcis is in progress */
-	while (vh->vh_bus_config.vhc_start_time != 0) {
-		ddi_bus_config_op_t current_op;
-		int64_t start_time;
-
-		current_op = vh->vh_bus_config.vhc_op;
-		start_time = vh->vh_bus_config.vhc_start_time;
-
-		/* wait until the current bus configs on phcis are done */
-		while (vh->vh_bus_config.vhc_start_time == start_time)
-			cv_wait(&vh->vh_bus_config.vhc_cv, &mdi_mutex);
-
-		if (current_op == BUS_CONFIG_ALL &&
-		    vh->vh_bus_config.vhc_cutoff_time > 0 && (optimize ||
-		    req_time < start_time)) {
-			mutex_exit(&mdi_mutex);
-			return (MDI_SUCCESS);
-		}
-	}
-
-	/*
-	 * At this point we are single threaded until vh_bus_config.start_time
-	 * is reset to 0 at the end of this function.
-	 */
-
-	vh->vh_bus_config.vhc_op = op;
-	vh->vh_bus_config.vhc_major = maj;
-	vh->vh_bus_config.vhc_flags = flags;
-	vh->vh_bus_config.vhc_start_time = lbolt64;
-
-	if (first_time && strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) {
-		mutex_exit(&mdi_mutex);
-		i_mdi_attach_phci_drivers();
-		mutex_enter(&mdi_mutex);
-		first_time = 0;
-	}
-
-	ASSERT(vh->vh_phci_count >= 0);
-	if (vh->vh_phci_count == 0) {
-		rv = MDI_SUCCESS;
-		goto out1;
-	}
-
-	/*
-	 * Create a taskq to initiate bus configs in parallel on phcis.
-	 * Taskq allocation can be done in mdi_vhci_register() routine
-	 * instead of here. For most systems, doing it here on demand saves
-	 * resources as this code path is never called most of the times.
-	 */
-	if (vh->vh_bus_config.vhc_taskq == NULL) {
-		/*
-		 * it is ok even if vh->vh_phci_count changes after we release
-		 * the mdi_mutex as phci_count is used just as an
-		 * advisory number to taskq_create.
-		 */
-		phci_count = vh->vh_phci_count;
-		mutex_exit(&mdi_mutex);
-
-		/*
-		 * As we are single threaded, it is ok to access the
-		 * vh_bus_config.taskq member of vh outside of mdi_mutex
-		 */
-		if ((vh->vh_bus_config.vhc_taskq = taskq_create(
-		    "mdi_bus_config_taskq", mdi_max_bus_config_threads,
-		    MDI_TASKQ_PRI, phci_count, INT_MAX,
-		    TASKQ_PREPOPULATE | TASKQ_DYNAMIC)) == NULL) {
-			rv = MDI_FAILURE;
-			goto out;
-		}
-
-		mutex_enter(&mdi_mutex);
-	}
-
-	/* allocate at least vh->vh_phci_count phci bus config structures */
-	while (vh->vh_bus_config.vhc_phc_cnt < vh->vh_phci_count) {
-		int count;
-
-		count = vh->vh_phci_count - vh->vh_bus_config.vhc_phc_cnt;
-		mutex_exit(&mdi_mutex);
-		while (count--) {
-			phc = kmem_alloc(sizeof (*phc), KM_SLEEP);
-			phc->phc_vhc = &vh->vh_bus_config;
-			/*
-			 * there is no need to hold a lock here as we
-			 * are single threaded and no one else manipulates
-			 * the list while we are here.
-			 */
-			phc->phc_next = vh->vh_bus_config.vhc_phc;
-			vh->vh_bus_config.vhc_phc = phc;
-			vh->vh_bus_config.vhc_phc_cnt++;
-		}
-		mutex_enter(&mdi_mutex);
-		/*
-		 * as new phcis could register with mdi after we dropped
-		 * the mdi_mutex, we need to recheck the vh->vh_phci_count.
-		 * Hence the while loop.
-		 */
-	}
-
-	for (ph = vh->vh_phci_head, phc = vh->vh_bus_config.vhc_phc;
-	    ph != NULL; ph = ph->ph_next, phc = phc->phc_next) {
-
-		ASSERT(phc != NULL);
-
-		/* build a phci config handle to be passed to a taskq thread */
-		MDI_PHCI_LOCK(ph);
-		ASSERT(ph->ph_dip);
-
-		/*
-		 * We need to hold the phci dip before bus configuring the phci.
-		 * But placing a hold on the phci dip is not safe here due to
-		 * the race with phci detach. To get around this race,
-		 * we place a hold on the phci dip's parent and note down
-		 * the phci's name@addr. Later, in i_mdi_phci_bus_config(),
-		 * we'll first configure the phci itself before bus
-		 * configuring the phci.
-		 */
-		phc->phc_parent_dip = ddi_get_parent(ph->ph_dip);
-		ndi_hold_devi(phc->phc_parent_dip);
-		(void) ddi_deviname(ph->ph_dip, phc->phc_devnm);
-		MDI_PHCI_UNLOCK(ph);
-	}
-
-	phci_count = vh->vh_phci_count;
-	if (vh->vh_bus_config.vhc_cutoff_time == -1)
-		vh->vh_bus_config.vhc_cutoff_time = 0;
-	mutex_exit(&mdi_mutex);
-
-	MDI_DEBUG(2, (CE_NOTE, vdip,
-	    "!MDI: initiating %s on all phcis, major = %d, flags = 0x%x\n",
-	    (op == BUS_CONFIG_DRIVER) ? "BUS_CONFIG_DRIVER" : "BUS_CONFIG_ALL",
-	    (int)maj, flags));
-
-	/*
-	 * again, no need to hold a lock here as we are single threaded and
-	 * no one else manipulates the list while we are here.
-	 */
-	for (phc = vh->vh_bus_config.vhc_phc; phci_count--;
-	    phc = phc->phc_next) {
-		(void) taskq_dispatch(vh->vh_bus_config.vhc_taskq,
-		    i_mdi_phci_bus_config, phc, TQ_SLEEP);
-	}
-
-	/* wait until all phci bus configs are done */
-	taskq_wait(vh->vh_bus_config.vhc_taskq);
-	rv = MDI_SUCCESS;
-
-out:
-	mutex_enter(&mdi_mutex);
-out1:
-	vh->vh_bus_config.vhc_start_time = 0;
-	if (op == BUS_CONFIG_ALL && vh->vh_bus_config.vhc_cutoff_time != -1) {
-		vh->vh_bus_config.vhc_cutoff_time = lbolt64 +
-		    (int64_t)drv_usectohz(mdi_bus_config_timeout * 1000000);
-	}
-	cv_broadcast(&vh->vh_bus_config.vhc_cv);
-	mutex_exit(&mdi_mutex);
-
-	MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: %s on all phcis %s\n",
-	    (op == BUS_CONFIG_DRIVER) ? "BUS_CONFIG_DRIVER" : "BUS_CONFIG_ALL",
-	    (rv == MDI_SUCCESS) ? "successful" : "failed"));
-
-	return (rv);
-}
-
-/*
- * A simple bus config implementation for vhcis with the assumption that all
- * phcis are always registered with MDI.
- *
- * BUS_CONFIG_ALL
- *
- * 	Do BUS_CONFIG_ALL on all phcis associated with the vhci.
- *
- * BUS_CONFIG_DRIVER
- *
- * 	Do BUS_CONFIG_DRIVER on all phcis associated with the vhci.
- *
- * BUS_CONFIG_ONE
- *
- *	If the requested child has already been enumerated under the vhci
- *	configure the child and return. Otherwise do BUS_CONFIG_ALL on all
- *	phcis associated with the vhci.
- */
-int
-mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
-    void *arg, dev_info_t **child)
-{
-	int rv = MDI_SUCCESS;
-
-	/*
-	 * While bus configuring phcis, the phci driver interactions with MDI
-	 * cause child nodes to be enumerated under the vhci node for which
-	 * they need to ndi_devi_enter the vhci node.
-	 *
-	 * Unfortunately, to avoid the deadlock, we ourself can not wait for
-	 * for the bus config operations on phcis to finish while holding the
-	 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on
-	 * phcis and call the default framework provided bus config function
-	 * if we are called with ndi_devi_enter lock held.
-	 */
-	if (DEVI_BUSY_OWNED(vdip)) {
-		MDI_DEBUG(2, (CE_NOTE, vdip,
-		    "!MDI: vhci bus config: vhci dip is busy owned\n"));
-		goto default_bus_config;
-	}
-
-	switch (op) {
-	case BUS_CONFIG_ONE:
-		/*
-		 * First try to directly configure the requested child.
-		 * This will work only if the requested child has already
-		 * been enumerated under vhci, which is usually the most common
-		 * case.
-		 */
-		if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
-		    NDI_SUCCESS) {
-			return (MDI_SUCCESS);
-		}
-
-		MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: BUS_CONFIG_ONE on %s: "
-		    "will do BUS_CONFIG_ALL on all phcis\n", (char *)arg));
-
-		/* now do BUS_CONFIG_ALL on all phcis */
-		rv = i_mdi_bus_config_all_phcis(vdip, flags,
-		    BUS_CONFIG_ALL, -1, 1);
-		break;
-
-	case BUS_CONFIG_DRIVER:
-		rv = i_mdi_bus_config_all_phcis(vdip, flags, op,
-		    (major_t)(uintptr_t)arg, 0);
-		break;
-
-	case BUS_CONFIG_ALL:
-		rv = i_mdi_bus_config_all_phcis(vdip, flags, op, -1, 0);
-		break;
-
-	default:
-		break;
-	}
-
-default_bus_config:
-	/*
-	 * i_mdi_bus_config_all_phcis() guarantees that child nodes are
-	 * enumerated under the vhci, but not necessarily attached.
-	 * Now configure the appropriate child nodes.
-	 */
-	if (rv == MDI_SUCCESS &&
-	    ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
-	    NDI_SUCCESS) {
-		return (MDI_SUCCESS);
-	}
-
-	return (MDI_FAILURE);
-}
-
-
 void *
 mdi_client_get_vhci_private(dev_info_t *dip)
 {
@@ -6753,3 +6345,2020 @@
 		ph->ph_vprivate = priv;
 	}
 }
+
+/*
+ * List of vhci class names:
+ * A vhci class name must be in this list only if the corresponding vhci
+ * driver intends to use the mdi provided bus config implementation
+ * (i.e., mdi_vhci_bus_config()).
+ */
+static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
+#define	N_VHCI_CLASSES	(sizeof (vhci_class_list) / sizeof (char *))
+
+/*
+ * Built-in list of phci drivers for every vhci class.
+ * All phci drivers expect iscsi have root device support.
+ */
+static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
+	{ "fp", 1 },
+	{ "iscsi", 0 },
+	{ "ibsrp", 1 }
+	};
+
+static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
+
+/*
+ * During boot time, the on-disk vhci cache for every vhci class is read
+ * in the form of an nvlist and stored here.
+ */
+static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
+
+/* nvpair names in vhci cache nvlist */
+#define	MDI_VHCI_CACHE_VERSION	1
+#define	MDI_NVPNAME_VERSION	"version"
+#define	MDI_NVPNAME_PHCIS	"phcis"
+#define	MDI_NVPNAME_CTADDRMAP	"clientaddrmap"
+
+typedef enum {
+	VHCACHE_NOT_REBUILT,
+	VHCACHE_PARTIALLY_BUILT,
+	VHCACHE_FULLY_BUILT
+} vhcache_build_status_t;
+
+/*
+ * Given vhci class name, return its on-disk vhci cache filename.
+ * Memory for the returned filename which includes the full path is allocated
+ * by this function.
+ */
+static char *
+vhclass2vhcache_filename(char *vhclass)
+{
+	char *filename;
+	int len;
+	static char *fmt = "/etc/devices/mdi_%s_cache";
+
+	/*
+	 * fmt contains the on-disk vhci cache file name format;
+	 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
+	 */
+
+	/* the -1 below is to account for "%s" in the format string */
+	len = strlen(fmt) + strlen(vhclass) - 1;
+	filename = kmem_alloc(len, KM_SLEEP);
+	(void) snprintf(filename, len, fmt, vhclass);
+	ASSERT(len == (strlen(filename) + 1));
+	return (filename);
+}
+
+/*
+ * initialize the vhci cache related data structures and read the on-disk
+ * vhci cached data into memory.
+ */
+static void
+setup_vhci_cache(mdi_vhci_t *vh)
+{
+	mdi_vhci_config_t *vhc;
+	mdi_vhci_cache_t *vhcache;
+	int i;
+	nvlist_t *nvl = NULL;
+
+	vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
+	vh->vh_config = vhc;
+	vhcache = &vhc->vhc_vhcache;
+
+	vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
+
+	mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
+
+	rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
+
+	/*
+	 * Create string hash; same as mod_hash_create_strhash() except that
+	 * we use NULL key destructor.
+	 */
+	vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
+	    mdi_bus_config_cache_hash_size,
+	    mod_hash_null_keydtor, mod_hash_null_valdtor,
+	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
+
+	setup_phci_driver_list(vh);
+
+	/*
+	 * The on-disk vhci cache is read during booting prior to the
+	 * lights-out period by mdi_read_devices_files().
+	 */
+	for (i = 0; i < N_VHCI_CLASSES; i++) {
+		if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
+			nvl = vhcache_nvl[i];
+			vhcache_nvl[i] = NULL;
+			break;
+		}
+	}
+
+	/*
+	 * this is to cover the case of some one manually causing unloading
+	 * (or detaching) and reloading (or attaching) of a vhci driver.
+	 */
+	if (nvl == NULL && modrootloaded)
+		nvl = read_on_disk_vhci_cache(vh->vh_class);
+
+	if (nvl != NULL) {
+		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
+		if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
+			vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
+		else  {
+			cmn_err(CE_WARN,
+			    "%s: data file corrupted, will recreate\n",
+			    vhc->vhc_vhcache_filename);
+		}
+		rw_exit(&vhcache->vhcache_lock);
+		nvlist_free(nvl);
+	}
+
+	vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
+	    CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
+}
+
+/*
+ * free all vhci cache related resources
+ */
+static int
+destroy_vhci_cache(mdi_vhci_t *vh)
+{
+	mdi_vhci_config_t *vhc = vh->vh_config;
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+	mdi_vhcache_phci_t *cphci, *cphci_next;
+	mdi_vhcache_client_t *cct, *cct_next;
+	mdi_vhcache_pathinfo_t *cpi, *cpi_next;
+
+	if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
+		return (MDI_FAILURE);
+
+	kmem_free(vhc->vhc_vhcache_filename,
+	    strlen(vhc->vhc_vhcache_filename) + 1);
+
+	if (vhc->vhc_phci_driver_list)
+		free_phci_driver_list(vhc);
+
+	mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
+
+	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
+	    cphci = cphci_next) {
+		cphci_next = cphci->cphci_next;
+		free_vhcache_phci(cphci);
+	}
+
+	for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
+		cct_next = cct->cct_next;
+		for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
+			cpi_next = cpi->cpi_next;
+			free_vhcache_pathinfo(cpi);
+		}
+		free_vhcache_client(cct);
+	}
+
+	rw_destroy(&vhcache->vhcache_lock);
+
+	mutex_destroy(&vhc->vhc_lock);
+	cv_destroy(&vhc->vhc_cv);
+	kmem_free(vhc, sizeof (mdi_vhci_config_t));
+	return (MDI_SUCCESS);
+}
+
+/*
+ * Setup the list of phci drivers associated with the specified vhci class.
+ * MDI uses this information to rebuild bus config cache if in case the
+ * cache is not available or corrupted.
+ */
+static void
+setup_phci_driver_list(mdi_vhci_t *vh)
+{
+	mdi_vhci_config_t *vhc = vh->vh_config;
+	mdi_phci_driver_info_t *driver_list;
+	char **driver_list1;
+	uint_t ndrivers, ndrivers1;
+	int i, j;
+
+	if (strcmp(vh->vh_class, MDI_HCI_CLASS_SCSI) == 0) {
+		driver_list = scsi_phci_driver_list;
+		ndrivers = sizeof (scsi_phci_driver_list) /
+		    sizeof (mdi_phci_driver_info_t);
+	} else if (strcmp(vh->vh_class, MDI_HCI_CLASS_IB) == 0) {
+		driver_list = ib_phci_driver_list;
+		ndrivers = sizeof (ib_phci_driver_list) /
+		    sizeof (mdi_phci_driver_info_t);
+	} else {
+		driver_list = NULL;
+		ndrivers = 0;
+	}
+
+	/*
+	 * The driver.conf file of a vhci driver can specify additional
+	 * phci drivers using a project private "phci-drivers" property.
+	 */
+	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, vh->vh_dip,
+	    DDI_PROP_DONTPASS, "phci-drivers", &driver_list1,
+	    &ndrivers1) != DDI_PROP_SUCCESS)
+		ndrivers1 = 0;
+
+	vhc->vhc_nphci_drivers = ndrivers + ndrivers1;
+	if (vhc->vhc_nphci_drivers == 0)
+		return;
+
+	vhc->vhc_phci_driver_list = kmem_alloc(
+	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers, KM_SLEEP);
+
+	for (i = 0; i < ndrivers; i++) {
+		vhc->vhc_phci_driver_list[i].phdriver_name =
+		    i_ddi_strdup(driver_list[i].phdriver_name, KM_SLEEP);
+		vhc->vhc_phci_driver_list[i].phdriver_root_support =
+		    driver_list[i].phdriver_root_support;
+	}
+
+	for (j = 0; j < ndrivers1; j++, i++) {
+		vhc->vhc_phci_driver_list[i].phdriver_name =
+		    i_ddi_strdup(driver_list1[j], KM_SLEEP);
+		vhc->vhc_phci_driver_list[i].phdriver_root_support = 1;
+	}
+
+	if (ndrivers1)
+		ddi_prop_free(driver_list1);
+}
+
+/*
+ * Free the memory allocated for the phci driver list
+ */
+static void
+free_phci_driver_list(mdi_vhci_config_t *vhc)
+{
+	int i;
+
+	if (vhc->vhc_phci_driver_list == NULL)
+		return;
+
+	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
+		kmem_free(vhc->vhc_phci_driver_list[i].phdriver_name,
+		    strlen(vhc->vhc_phci_driver_list[i].phdriver_name) + 1);
+	}
+
+	kmem_free(vhc->vhc_phci_driver_list,
+	    sizeof (mdi_phci_driver_info_t) * vhc->vhc_nphci_drivers);
+}
+
+/*
+ * Stop all vhci cache related async threads and free their resources.
+ */
+static int
+stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
+{
+	mdi_async_client_config_t *acc, *acc_next;
+
+	mutex_enter(&vhc->vhc_lock);
+	vhc->vhc_flags |= MDI_VHC_EXIT;
+	ASSERT(vhc->vhc_acc_thrcount >= 0);
+	cv_broadcast(&vhc->vhc_cv);
+
+	while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
+	    (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD) ||
+	    vhc->vhc_acc_thrcount != 0) {
+		mutex_exit(&vhc->vhc_lock);
+		delay(1);
+		mutex_enter(&vhc->vhc_lock);
+	}
+
+	vhc->vhc_flags &= ~MDI_VHC_EXIT;
+
+	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
+		acc_next = acc->acc_next;
+		free_async_client_config(acc);
+	}
+	vhc->vhc_acc_list_head = NULL;
+	vhc->vhc_acc_list_tail = NULL;
+	vhc->vhc_acc_count = 0;
+
+	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
+		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
+		mutex_exit(&vhc->vhc_lock);
+		if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
+			vhcache_dirty(vhc);
+			return (MDI_FAILURE);
+		}
+	} else
+		mutex_exit(&vhc->vhc_lock);
+
+	if (callb_delete(vhc->vhc_cbid) != 0)
+		return (MDI_FAILURE);
+
+	return (MDI_SUCCESS);
+}
+
+/*
+ * Stop vhci cache flush thread
+ */
+/* ARGSUSED */
+static boolean_t
+stop_vhcache_flush_thread(void *arg, int code)
+{
+	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
+
+	mutex_enter(&vhc->vhc_lock);
+	vhc->vhc_flags |= MDI_VHC_EXIT;
+	cv_broadcast(&vhc->vhc_cv);
+
+	while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
+		mutex_exit(&vhc->vhc_lock);
+		delay(1);
+		mutex_enter(&vhc->vhc_lock);
+	}
+
+	if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
+		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
+		mutex_exit(&vhc->vhc_lock);
+		(void) flush_vhcache(vhc, 1);
+	} else
+		mutex_exit(&vhc->vhc_lock);
+
+	return (B_TRUE);
+}
+
+/*
+ * Enqueue the vhcache phci (cphci) at the tail of the list
+ */
+static void
+enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
+{
+	cphci->cphci_next = NULL;
+	if (vhcache->vhcache_phci_head == NULL)
+		vhcache->vhcache_phci_head = cphci;
+	else
+		vhcache->vhcache_phci_tail->cphci_next = cphci;
+	vhcache->vhcache_phci_tail = cphci;
+}
+
+/*
+ * Enqueue the vhcache pathinfo (cpi) at the tail of the list
+ */
+static void
+enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
+    mdi_vhcache_pathinfo_t *cpi)
+{
+	cpi->cpi_next = NULL;
+	if (cct->cct_cpi_head == NULL)
+		cct->cct_cpi_head = cpi;
+	else
+		cct->cct_cpi_tail->cpi_next = cpi;
+	cct->cct_cpi_tail = cpi;
+}
+
+/*
+ * Enqueue the vhcache pathinfo (cpi) at the correct location in the
+ * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
+ * flag set come at the beginning of the list. All cpis which have this
+ * flag set come at the end of the list.
+ */
+static void
+enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
+    mdi_vhcache_pathinfo_t *newcpi)
+{
+	mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
+
+	if (cct->cct_cpi_head == NULL ||
+	    (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
+		enqueue_tail_vhcache_pathinfo(cct, newcpi);
+	else {
+		for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
+		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
+		    prev_cpi = cpi, cpi = cpi->cpi_next)
+			;
+
+		if (prev_cpi == NULL)
+			cct->cct_cpi_head = newcpi;
+		else
+			prev_cpi->cpi_next = newcpi;
+
+		newcpi->cpi_next = cpi;
+
+		if (cpi == NULL)
+			cct->cct_cpi_tail = newcpi;
+	}
+}
+
+/*
+ * Enqueue the vhcache client (cct) at the tail of the list
+ */
+static void
+enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
+    mdi_vhcache_client_t *cct)
+{
+	cct->cct_next = NULL;
+	if (vhcache->vhcache_client_head == NULL)
+		vhcache->vhcache_client_head = cct;
+	else
+		vhcache->vhcache_client_tail->cct_next = cct;
+	vhcache->vhcache_client_tail = cct;
+}
+
+static void
+free_string_array(char **str, int nelem)
+{
+	int i;
+
+	if (str) {
+		for (i = 0; i < nelem; i++) {
+			if (str[i])
+				kmem_free(str[i], strlen(str[i]) + 1);
+		}
+		kmem_free(str, sizeof (char *) * nelem);
+	}
+}
+
+static void
+free_vhcache_phci(mdi_vhcache_phci_t *cphci)
+{
+	kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
+	kmem_free(cphci, sizeof (*cphci));
+}
+
+static void
+free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
+{
+	kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
+	kmem_free(cpi, sizeof (*cpi));
+}
+
+static void
+free_vhcache_client(mdi_vhcache_client_t *cct)
+{
+	kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
+	kmem_free(cct, sizeof (*cct));
+}
+
+static char *
+vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
+{
+	char *name_addr;
+	int len;
+
+	len = strlen(ct_name) + strlen(ct_addr) + 2;
+	name_addr = kmem_alloc(len, KM_SLEEP);
+	(void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
+
+	if (ret_len)
+		*ret_len = len;
+	return (name_addr);
+}
+
+/*
+ * Copy the contents of paddrnvl to vhci cache.
+ * paddrnvl nvlist contains path information for a vhci client.
+ * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
+ */
+static void
+paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
+    mdi_vhcache_client_t *cct)
+{
+	nvpair_t *nvp = NULL;
+	mdi_vhcache_pathinfo_t *cpi;
+	uint_t nelem;
+	uint32_t *val;
+
+	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+		ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
+		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
+		cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
+		(void) nvpair_value_uint32_array(nvp, &val, &nelem);
+		ASSERT(nelem == 2);
+		cpi->cpi_cphci = cphci_list[val[0]];
+		cpi->cpi_flags = val[1];
+		enqueue_tail_vhcache_pathinfo(cct, cpi);
+	}
+}
+
+/*
+ * Copy the contents of caddrmapnvl to vhci cache.
+ * caddrmapnvl nvlist contains vhci client address to phci client address
+ * mappings. See the comment in mainnvl_to_vhcache() for the format of
+ * this nvlist.
+ */
+static void
+caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
+    mdi_vhcache_phci_t *cphci_list[])
+{
+	nvpair_t *nvp = NULL;
+	nvlist_t *paddrnvl;
+	mdi_vhcache_client_t *cct;
+
+	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+		ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
+		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
+		cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
+		(void) nvpair_value_nvlist(nvp, &paddrnvl);
+		paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
+		/* the client must contain at least one path */
+		ASSERT(cct->cct_cpi_head != NULL);
+
+		enqueue_vhcache_client(vhcache, cct);
+		(void) mod_hash_insert(vhcache->vhcache_client_hash,
+		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
+	}
+}
+
+/*
+ * Copy the contents of the main nvlist to vhci cache.
+ *
+ * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
+ * The nvlist contains the mappings between the vhci client addresses and
+ * their corresponding phci client addresses.
+ *
+ * The structure of the nvlist is as follows:
+ *
+ * Main nvlist:
+ *	NAME		TYPE		DATA
+ *	version		int32		version number
+ *	phcis		string array	array of phci paths
+ *	clientaddrmap	nvlist_t	c2paddrs_nvl (see below)
+ *
+ * structure of c2paddrs_nvl:
+ *	NAME		TYPE		DATA
+ *	caddr1		nvlist_t	paddrs_nvl1
+ *	caddr2		nvlist_t	paddrs_nvl2
+ *	...
+ * where caddr1, caddr2, ... are vhci client name and addresses in the
+ * form of "<clientname>@<clientaddress>".
+ * (for example: "ssd@2000002037cd9f72");
+ * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
+ *
+ * structure of paddrs_nvl:
+ *	NAME		TYPE		DATA
+ *	pi_addr1	uint32_array	(phci-id, cpi_flags)
+ *	pi_addr2	uint32_array	(phci-id, cpi_flags)
+ *	...
+ * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
+ * (so called pi_addrs, for example: "w2100002037cd9f72,0");
+ * phci-ids are integers that identify PHCIs to which the
+ * the bus specific address belongs to. These integers are used as an index
+ * into to the phcis string array in the main nvlist to get the PHCI path.
+ */
+static int
+mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
+{
+	char **phcis, **phci_namep;
+	uint_t nphcis;
+	mdi_vhcache_phci_t *cphci, **cphci_list;
+	nvlist_t *caddrmapnvl;
+	int32_t ver;
+	int i;
+	size_t cphci_list_size;
+
+	ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
+
+	if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
+	    ver != MDI_VHCI_CACHE_VERSION)
+		return (MDI_FAILURE);
+
+	if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
+	    &nphcis) != 0)
+		return (MDI_SUCCESS);
+
+	ASSERT(nphcis > 0);
+
+	cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
+	cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
+	for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
+		cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
+		cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
+		enqueue_vhcache_phci(vhcache, cphci);
+		cphci_list[i] = cphci;
+	}
+
+	ASSERT(vhcache->vhcache_phci_head != NULL);
+
+	if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
+		caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
+
+	kmem_free(cphci_list, cphci_list_size);
+	return (MDI_SUCCESS);
+}
+
+/*
+ * Build paddrnvl for the specified client using the information in the
+ * vhci cache and add it to the caddrmapnnvl.
+ * Returns 0 on success, errno on failure.
+ */
+static int
+vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
+    nvlist_t *caddrmapnvl)
+{
+	mdi_vhcache_pathinfo_t *cpi;
+	nvlist_t *nvl;
+	int err;
+	uint32_t val[2];
+
+	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
+
+	if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
+		return (err);
+
+	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
+		val[0] = cpi->cpi_cphci->cphci_id;
+		val[1] = cpi->cpi_flags;
+		if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
+		    != 0)
+			goto out;
+	}
+
+	err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
+out:
+	nvlist_free(nvl);
+	return (err);
+}
+
+/*
+ * Build caddrmapnvl using the information in the vhci cache
+ * and add it to the mainnvl.
+ * Returns 0 on success, errno on failure.
+ */
+static int
+vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
+{
+	mdi_vhcache_client_t *cct;
+	nvlist_t *nvl;
+	int err;
+
+	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
+
+	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
+		return (err);
+
+	for (cct = vhcache->vhcache_client_head; cct != NULL;
+	    cct = cct->cct_next) {
+		if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
+			goto out;
+	}
+
+	err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
+out:
+	nvlist_free(nvl);
+	return (err);
+}
+
+/*
+ * Build nvlist using the information in the vhci cache.
+ * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
+ * Returns nvl on success, NULL on failure.
+ */
+static nvlist_t *
+vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
+{
+	mdi_vhcache_phci_t *cphci;
+	uint_t phci_count;
+	char **phcis;
+	nvlist_t *nvl;
+	int err, i;
+
+	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
+		nvl = NULL;
+		goto out;
+	}
+
+	if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
+	    MDI_VHCI_CACHE_VERSION)) != 0)
+		goto out;
+
+	rw_enter(&vhcache->vhcache_lock, RW_READER);
+	if (vhcache->vhcache_phci_head == NULL) {
+		rw_exit(&vhcache->vhcache_lock);
+		return (nvl);
+	}
+
+	phci_count = 0;
+	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
+	    cphci = cphci->cphci_next)
+		cphci->cphci_id = phci_count++;
+
+	/* build phci pathname list */
+	phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
+	for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
+	    cphci = cphci->cphci_next, i++)
+		phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
+
+	err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
+	    phci_count);
+	free_string_array(phcis, phci_count);
+
+	if (err == 0 &&
+	    (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
+		rw_exit(&vhcache->vhcache_lock);
+		return (nvl);
+	}
+
+	rw_exit(&vhcache->vhcache_lock);
+out:
+	if (nvl)
+		nvlist_free(nvl);
+	return (NULL);
+}
+
+/*
+ * Lookup vhcache phci structure for the specified phci path.
+ */
+static mdi_vhcache_phci_t *
+lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
+{
+	mdi_vhcache_phci_t *cphci;
+
+	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
+
+	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
+	    cphci = cphci->cphci_next) {
+		if (strcmp(cphci->cphci_path, phci_path) == 0)
+			return (cphci);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Lookup vhcache phci structure for the specified phci.
+ */
+static mdi_vhcache_phci_t *
+lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
+{
+	mdi_vhcache_phci_t *cphci;
+
+	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
+
+	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
+	    cphci = cphci->cphci_next) {
+		if (cphci->cphci_phci == ph)
+			return (cphci);
+	}
+
+	return (NULL);
+}
+
+/*
+ * Add the specified phci to the vhci cache if not already present.
+ */
+static void
+vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
+{
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+	mdi_vhcache_phci_t *cphci;
+	char *pathname;
+	int cache_updated;
+
+	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
+
+	pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+	(void) ddi_pathname(ph->ph_dip, pathname);
+	if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
+	    != NULL) {
+		cphci->cphci_phci = ph;
+		cache_updated = 0;
+	} else {
+		cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
+		cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
+		cphci->cphci_phci = ph;
+		enqueue_vhcache_phci(vhcache, cphci);
+		cache_updated = 1;
+	}
+	rw_exit(&vhcache->vhcache_lock);
+
+	kmem_free(pathname, MAXPATHLEN);
+	if (cache_updated)
+		vhcache_dirty(vhc);
+}
+
+/*
+ * Remove the reference to the specified phci from the vhci cache.
+ */
+static void
+vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
+{
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+	mdi_vhcache_phci_t *cphci;
+
+	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
+	if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
+		/* do not remove the actual mdi_vhcache_phci structure */
+		cphci->cphci_phci = NULL;
+	}
+	rw_exit(&vhcache->vhcache_lock);
+}
+
+static void
+init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
+    mdi_vhcache_lookup_token_t *src)
+{
+	if (src == NULL) {
+		dst->lt_cct = NULL;
+		dst->lt_cct_lookup_time = 0;
+	} else {
+		dst->lt_cct = src->lt_cct;
+		dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
+	}
+}
+
+/*
+ * Look up vhcache client for the specified client.
+ */
+static mdi_vhcache_client_t *
+lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
+    mdi_vhcache_lookup_token_t *token)
+{
+	mod_hash_val_t hv;
+	char *name_addr;
+	int len;
+
+	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
+
+	/*
+	 * If no vhcache clean occurred since the last lookup, we can
+	 * simply return the cct from the last lookup operation.
+	 * It works because ccts are never freed except during the vhcache
+	 * cleanup operation.
+	 */
+	if (token != NULL &&
+	    vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
+		return (token->lt_cct);
+
+	name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
+	if (mod_hash_find(vhcache->vhcache_client_hash,
+	    (mod_hash_key_t)name_addr, &hv) == 0) {
+		if (token) {
+			token->lt_cct = (mdi_vhcache_client_t *)hv;
+			token->lt_cct_lookup_time = lbolt64;
+		}
+	} else {
+		if (token) {
+			token->lt_cct = NULL;
+			token->lt_cct_lookup_time = 0;
+		}
+		hv = NULL;
+	}
+	kmem_free(name_addr, len);
+	return ((mdi_vhcache_client_t *)hv);
+}
+
+/*
+ * Add the specified path to the vhci cache if not already present.
+ * Also add the vhcache client for the client corresponding to this path
+ * if it doesn't already exist.
+ */
+static void
+vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
+{
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+	mdi_vhcache_client_t *cct;
+	mdi_vhcache_pathinfo_t *cpi;
+	mdi_phci_t *ph = pip->pi_phci;
+	mdi_client_t *ct = pip->pi_client;
+	int cache_updated = 0;
+
+	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
+
+	/* if vhcache client for this pip doesn't already exist, add it */
+	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
+	    NULL)) == NULL) {
+		cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
+		cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
+		    ct->ct_guid, NULL);
+		enqueue_vhcache_client(vhcache, cct);
+		(void) mod_hash_insert(vhcache->vhcache_client_hash,
+		    (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
+		cache_updated = 1;
+	}
+
+	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
+		if (cpi->cpi_cphci->cphci_phci == ph &&
+		    strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
+			cpi->cpi_pip = pip;
+			if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
+				cpi->cpi_flags &=
+				    ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
+				sort_vhcache_paths(cct);
+				cache_updated = 1;
+			}
+			break;
+		}
+	}
+
+	if (cpi == NULL) {
+		cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
+		cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
+		cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
+		ASSERT(cpi->cpi_cphci != NULL);
+		cpi->cpi_pip = pip;
+		enqueue_vhcache_pathinfo(cct, cpi);
+		cache_updated = 1;
+	}
+
+	rw_exit(&vhcache->vhcache_lock);
+
+	if (cache_updated)
+		vhcache_dirty(vhc);
+}
+
+/*
+ * Remove the reference to the specified path from the vhci cache.
+ */
+static void
+vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
+{
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+	mdi_client_t *ct = pip->pi_client;
+	mdi_vhcache_client_t *cct;
+	mdi_vhcache_pathinfo_t *cpi;
+
+	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
+	if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
+	    NULL)) != NULL) {
+		for (cpi = cct->cct_cpi_head; cpi != NULL;
+		    cpi = cpi->cpi_next) {
+			if (cpi->cpi_pip == pip) {
+				cpi->cpi_pip = NULL;
+				break;
+			}
+		}
+	}
+	rw_exit(&vhcache->vhcache_lock);
+}
+
+/*
+ * Flush the vhci cache to disk.
+ * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
+ */
+static int
+flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
+{
+	nvlist_t *nvl;
+	int err;
+	int rv;
+
+	/*
+	 * It is possible that the system may shutdown before
+	 * i_ddi_io_initialized (during stmsboot for example). To allow for
+	 * flushing the cache in this case do not check for
+	 * i_ddi_io_initialized when force flag is set.
+	 */
+	if (force_flag == 0 && !i_ddi_io_initialized())
+		return (MDI_FAILURE);
+
+	if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
+		err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
+		nvlist_free(nvl);
+	} else
+		err = EFAULT;
+
+	rv = MDI_SUCCESS;
+	mutex_enter(&vhc->vhc_lock);
+	if (err != 0) {
+		if (err == EROFS) {
+			vhc->vhc_flags |= MDI_VHC_READONLY_FS;
+			vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
+			    MDI_VHC_VHCACHE_DIRTY);
+		} else {
+			if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
+				cmn_err(CE_CONT, "%s: update failed\n",
+				    vhc->vhc_vhcache_filename);
+				vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
+			}
+			rv = MDI_FAILURE;
+		}
+	} else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
+		cmn_err(CE_CONT,
+		    "%s: update now ok\n", vhc->vhc_vhcache_filename);
+		vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
+	}
+	mutex_exit(&vhc->vhc_lock);
+
+	return (rv);
+}
+
+/*
+ * Call flush_vhcache() to flush the vhci cache at the scheduled time.
+ * Exits itself if left idle for the idle timeout period.
+ */
+static void
+vhcache_flush_thread(void *arg)
+{
+	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
+	clock_t idle_time, quit_at_ticks;
+	callb_cpr_t cprinfo;
+
+	/* number of seconds to sleep idle before exiting */
+	idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
+
+	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
+	    "mdi_vhcache_flush");
+	mutex_enter(&vhc->vhc_lock);
+	for (; ; ) {
+		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
+		    (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
+			if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
+				CALLB_CPR_SAFE_BEGIN(&cprinfo);
+				(void) cv_timedwait(&vhc->vhc_cv,
+				    &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
+				CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
+			} else {
+				vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
+				mutex_exit(&vhc->vhc_lock);
+
+				if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
+					vhcache_dirty(vhc);
+
+				mutex_enter(&vhc->vhc_lock);
+			}
+		}
+
+		quit_at_ticks = ddi_get_lbolt() + idle_time;
+
+		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
+		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
+		    ddi_get_lbolt() < quit_at_ticks) {
+			CALLB_CPR_SAFE_BEGIN(&cprinfo);
+			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
+			    quit_at_ticks);
+			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
+		}
+
+		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
+		    !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
+			goto out;
+	}
+
+out:
+	vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
+	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
+	CALLB_CPR_EXIT(&cprinfo);
+}
+
+/*
+ * Make vhci cache dirty and schedule flushing by vhcache flush thread.
+ */
+static void
+vhcache_dirty(mdi_vhci_config_t *vhc)
+{
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+	int create_thread;
+
+	rw_enter(&vhcache->vhcache_lock, RW_READER);
+	/* do not flush cache until the cache is fully built */
+	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
+		rw_exit(&vhcache->vhcache_lock);
+		return;
+	}
+	rw_exit(&vhcache->vhcache_lock);
+
+	mutex_enter(&vhc->vhc_lock);
+	if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
+		mutex_exit(&vhc->vhc_lock);
+		return;
+	}
+
+	vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
+	vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
+	    mdi_vhcache_flush_delay * TICKS_PER_SECOND;
+	if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
+		cv_broadcast(&vhc->vhc_cv);
+		create_thread = 0;
+	} else {
+		vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
+		create_thread = 1;
+	}
+	mutex_exit(&vhc->vhc_lock);
+
+	if (create_thread)
+		(void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
+		    0, &p0, TS_RUN, minclsyspri);
+}
+
+/*
+ * phci bus config structure - one for for each phci bus config operation that
+ * we initiate on behalf of a vhci.
+ */
+typedef struct mdi_phci_bus_config_s {
+	char *phbc_phci_path;
+	struct mdi_vhci_bus_config_s *phbc_vhbusconfig;	/* vhci bus config */
+	struct mdi_phci_bus_config_s *phbc_next;
+} mdi_phci_bus_config_t;
+
+/* vhci bus config structure - one for each vhci bus config operation */
+typedef struct mdi_vhci_bus_config_s {
+	ddi_bus_config_op_t vhbc_op;	/* bus config op */
+	major_t vhbc_op_major;		/* bus config op major */
+	uint_t vhbc_op_flags;		/* bus config op flags */
+	kmutex_t vhbc_lock;
+	kcondvar_t vhbc_cv;
+	int vhbc_thr_count;
+} mdi_vhci_bus_config_t;
+
+/*
+ * bus config the specified phci
+ */
+static void
+bus_config_phci(void *arg)
+{
+	mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
+	mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
+	dev_info_t *ph_dip;
+
+	/*
+	 * first configure all path components upto phci and then configure
+	 * the phci children.
+	 */
+	if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
+	    != NULL) {
+		if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
+		    vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
+			(void) ndi_devi_config_driver(ph_dip,
+			    vhbc->vhbc_op_flags,
+			    vhbc->vhbc_op_major);
+		} else
+			(void) ndi_devi_config(ph_dip,
+			    vhbc->vhbc_op_flags);
+
+		/* release the hold that e_ddi_hold_devi_by_path() placed */
+		ndi_rele_devi(ph_dip);
+	}
+
+	kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
+	kmem_free(phbc, sizeof (*phbc));
+
+	mutex_enter(&vhbc->vhbc_lock);
+	vhbc->vhbc_thr_count--;
+	if (vhbc->vhbc_thr_count == 0)
+		cv_broadcast(&vhbc->vhbc_cv);
+	mutex_exit(&vhbc->vhbc_lock);
+}
+
+/*
+ * Bus config all phcis associated with the vhci in parallel.
+ * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
+ */
+static void
+bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
+    ddi_bus_config_op_t op, major_t maj)
+{
+	mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
+	mdi_vhci_bus_config_t *vhbc;
+	mdi_vhcache_phci_t *cphci;
+
+	rw_enter(&vhcache->vhcache_lock, RW_READER);
+	if (vhcache->vhcache_phci_head == NULL) {
+		rw_exit(&vhcache->vhcache_lock);
+		return;
+	}
+
+	vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
+
+	for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
+	    cphci = cphci->cphci_next) {
+		phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
+		phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
+		    KM_SLEEP);
+		phbc->phbc_vhbusconfig = vhbc;
+		phbc->phbc_next = phbc_head;
+		phbc_head = phbc;
+		vhbc->vhbc_thr_count++;
+	}
+	rw_exit(&vhcache->vhcache_lock);
+
+	vhbc->vhbc_op = op;
+	vhbc->vhbc_op_major = maj;
+	vhbc->vhbc_op_flags = NDI_NO_EVENT |
+	    (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
+	mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
+
+	/* now create threads to initiate bus config on all phcis in parallel */
+	for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
+		phbc_next = phbc->phbc_next;
+		if (mdi_mtc_off)
+			bus_config_phci((void *)phbc);
+		else
+			(void) thread_create(NULL, 0, bus_config_phci, phbc,
+			    0, &p0, TS_RUN, minclsyspri);
+	}
+
+	mutex_enter(&vhbc->vhbc_lock);
+	/* wait until all threads exit */
+	while (vhbc->vhbc_thr_count > 0)
+		cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
+	mutex_exit(&vhbc->vhbc_lock);
+
+	mutex_destroy(&vhbc->vhbc_lock);
+	cv_destroy(&vhbc->vhbc_cv);
+	kmem_free(vhbc, sizeof (*vhbc));
+}
+
+/*
+ * Perform BUS_CONFIG_ONE on the specified child of the phci.
+ * The path includes the child component in addition to the phci path.
+ */
+static int
+bus_config_one_phci_child(char *path)
+{
+	dev_info_t *ph_dip, *child;
+	char *devnm;
+	int rv = MDI_FAILURE;
+
+	/* extract the child component of the phci */
+	devnm = strrchr(path, '/');
+	*devnm++ = '\0';
+
+	/*
+	 * first configure all path components upto phci and then
+	 * configure the phci child.
+	 */
+	if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
+		if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
+		    NDI_SUCCESS) {
+			/*
+			 * release the hold that ndi_devi_config_one() placed
+			 */
+			ndi_rele_devi(child);
+			rv = MDI_SUCCESS;
+		}
+
+		/* release the hold that e_ddi_hold_devi_by_path() placed */
+		ndi_rele_devi(ph_dip);
+	}
+
+	devnm--;
+	*devnm = '/';
+	return (rv);
+}
+
+/*
+ * Build a list of phci client paths for the specified vhci client.
+ * The list includes only those phci client paths which aren't configured yet.
+ */
+static mdi_phys_path_t *
+build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
+{
+	mdi_vhcache_pathinfo_t *cpi;
+	mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
+	int config_path, len;
+
+	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
+		/*
+		 * include only those paths that aren't configured.
+		 */
+		config_path = 0;
+		if (cpi->cpi_pip == NULL)
+			config_path = 1;
+		else {
+			MDI_PI_LOCK(cpi->cpi_pip);
+			if (MDI_PI_IS_INIT(cpi->cpi_pip))
+				config_path = 1;
+			MDI_PI_UNLOCK(cpi->cpi_pip);
+		}
+
+		if (config_path) {
+			pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
+			len = strlen(cpi->cpi_cphci->cphci_path) +
+			    strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
+			pp->phys_path = kmem_alloc(len, KM_SLEEP);
+			(void) snprintf(pp->phys_path, len, "%s/%s@%s",
+			    cpi->cpi_cphci->cphci_path, ct_name,
+			    cpi->cpi_addr);
+			pp->phys_path_next = NULL;
+
+			if (pp_head == NULL)
+				pp_head = pp;
+			else
+				pp_tail->phys_path_next = pp;
+			pp_tail = pp;
+		}
+	}
+
+	return (pp_head);
+}
+
+/*
+ * Free the memory allocated for phci client path list.
+ */
+static void
+free_phclient_path_list(mdi_phys_path_t *pp_head)
+{
+	mdi_phys_path_t *pp, *pp_next;
+
+	for (pp = pp_head; pp != NULL; pp = pp_next) {
+		pp_next = pp->phys_path_next;
+		kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
+		kmem_free(pp, sizeof (*pp));
+	}
+}
+
+/*
+ * Allocated async client structure and initialize with the specified values.
+ */
+static mdi_async_client_config_t *
+alloc_async_client_config(char *ct_name, char *ct_addr,
+    mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
+{
+	mdi_async_client_config_t *acc;
+
+	acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
+	acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
+	acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
+	acc->acc_phclient_path_list_head = pp_head;
+	init_vhcache_lookup_token(&acc->acc_token, tok);
+	acc->acc_next = NULL;
+	return (acc);
+}
+
+/*
+ * Free the memory allocated for the async client structure and their members.
+ */
+static void
+free_async_client_config(mdi_async_client_config_t *acc)
+{
+	if (acc->acc_phclient_path_list_head)
+		free_phclient_path_list(acc->acc_phclient_path_list_head);
+	kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
+	kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
+	kmem_free(acc, sizeof (*acc));
+}
+
+/*
+ * Sort vhcache pathinfos (cpis) of the specified client.
+ * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
+ * flag set come at the beginning of the list. All cpis which have this
+ * flag set come at the end of the list.
+ */
+static void
+sort_vhcache_paths(mdi_vhcache_client_t *cct)
+{
+	mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
+
+	cpi_head = cct->cct_cpi_head;
+	cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
+	for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
+		cpi_next = cpi->cpi_next;
+		enqueue_vhcache_pathinfo(cct, cpi);
+	}
+}
+
+/*
+ * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
+ * every vhcache pathinfo of the specified client. If not adjust the flag
+ * setting appropriately.
+ *
+ * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
+ * on-disk vhci cache. So every time this flag is updated the cache must be
+ * flushed.
+ */
+static void
+adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
+    mdi_vhcache_lookup_token_t *tok)
+{
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+	mdi_vhcache_client_t *cct;
+	mdi_vhcache_pathinfo_t *cpi;
+
+	rw_enter(&vhcache->vhcache_lock, RW_READER);
+	if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
+	    == NULL) {
+		rw_exit(&vhcache->vhcache_lock);
+		return;
+	}
+
+	/*
+	 * to avoid unnecessary on-disk cache updates, first check if an
+	 * update is really needed. If no update is needed simply return.
+	 */
+	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
+		if ((cpi->cpi_pip != NULL &&
+		    (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
+		    (cpi->cpi_pip == NULL &&
+		    !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
+			break;
+		}
+	}
+	if (cpi == NULL) {
+		rw_exit(&vhcache->vhcache_lock);
+		return;
+	}
+
+	if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
+		rw_exit(&vhcache->vhcache_lock);
+		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
+		if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
+		    tok)) == NULL) {
+			rw_exit(&vhcache->vhcache_lock);
+			return;
+		}
+	}
+
+	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
+		if (cpi->cpi_pip != NULL)
+			cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
+		else
+			cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
+	}
+	sort_vhcache_paths(cct);
+
+	rw_exit(&vhcache->vhcache_lock);
+	vhcache_dirty(vhc);
+}
+
+/*
+ * Configure all specified paths of the client.
+ */
+static void
+config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
+    mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
+{
+	mdi_phys_path_t *pp;
+
+	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
+		(void) bus_config_one_phci_child(pp->phys_path);
+	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
+}
+
+/*
+ * Dequeue elements from vhci async client config list and bus configure
+ * their corresponding phci clients.
+ */
+static void
+config_client_paths_thread(void *arg)
+{
+	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
+	mdi_async_client_config_t *acc;
+	clock_t quit_at_ticks;
+	clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
+	callb_cpr_t cprinfo;
+
+	CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
+	    "mdi_config_client_paths");
+
+	for (; ; ) {
+		quit_at_ticks = ddi_get_lbolt() + idle_time;
+
+		mutex_enter(&vhc->vhc_lock);
+		while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
+		    vhc->vhc_acc_list_head == NULL &&
+		    ddi_get_lbolt() < quit_at_ticks) {
+			CALLB_CPR_SAFE_BEGIN(&cprinfo);
+			(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
+			    quit_at_ticks);
+			CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
+		}
+
+		if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
+		    vhc->vhc_acc_list_head == NULL)
+			goto out;
+
+		acc = vhc->vhc_acc_list_head;
+		vhc->vhc_acc_list_head = acc->acc_next;
+		if (vhc->vhc_acc_list_head == NULL)
+			vhc->vhc_acc_list_tail = NULL;
+		vhc->vhc_acc_count--;
+		mutex_exit(&vhc->vhc_lock);
+
+		config_client_paths_sync(vhc, acc->acc_ct_name,
+		    acc->acc_ct_addr, acc->acc_phclient_path_list_head,
+		    &acc->acc_token);
+
+		free_async_client_config(acc);
+	}
+
+out:
+	vhc->vhc_acc_thrcount--;
+	/* CALLB_CPR_EXIT releases the vhc->vhc_lock */
+	CALLB_CPR_EXIT(&cprinfo);
+}
+
+/*
+ * Arrange for all the phci client paths (pp_head) for the specified client
+ * to be bus configured asynchronously by a thread.
+ */
+static void
+config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
+    mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
+{
+	mdi_async_client_config_t *acc, *newacc;
+	int create_thread;
+
+	if (pp_head == NULL)
+		return;
+
+	if (mdi_mtc_off) {
+		config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
+		free_phclient_path_list(pp_head);
+		return;
+	}
+
+	newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
+	ASSERT(newacc);
+
+	mutex_enter(&vhc->vhc_lock);
+	for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
+		if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
+		    strcmp(ct_addr, acc->acc_ct_addr) == 0) {
+			free_async_client_config(newacc);
+			mutex_exit(&vhc->vhc_lock);
+			return;
+		}
+	}
+
+	if (vhc->vhc_acc_list_head == NULL)
+		vhc->vhc_acc_list_head = newacc;
+	else
+		vhc->vhc_acc_list_tail->acc_next = newacc;
+	vhc->vhc_acc_list_tail = newacc;
+	vhc->vhc_acc_count++;
+	if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
+		cv_broadcast(&vhc->vhc_cv);
+		create_thread = 0;
+	} else {
+		vhc->vhc_acc_thrcount++;
+		create_thread = 1;
+	}
+	mutex_exit(&vhc->vhc_lock);
+
+	if (create_thread)
+		(void) thread_create(NULL, 0, config_client_paths_thread, vhc,
+		    0, &p0, TS_RUN, minclsyspri);
+}
+
+/*
+ * Return number of online paths for the specified client.
+ */
+static int
+nonline_paths(mdi_vhcache_client_t *cct)
+{
+	mdi_vhcache_pathinfo_t *cpi;
+	int online_count = 0;
+
+	for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
+		if (cpi->cpi_pip != NULL) {
+			MDI_PI_LOCK(cpi->cpi_pip);
+			if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
+				online_count++;
+			MDI_PI_UNLOCK(cpi->cpi_pip);
+		}
+	}
+
+	return (online_count);
+}
+
+/*
+ * Bus configure all paths for the specified vhci client.
+ * If at least one path for the client is already online, the remaining paths
+ * will be configured asynchronously. Otherwise, it synchronously configures
+ * the paths until at least one path is online and then rest of the paths
+ * will be configured asynchronously.
+ */
+static void
+config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
+{
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+	mdi_phys_path_t *pp_head, *pp;
+	mdi_vhcache_client_t *cct;
+	mdi_vhcache_lookup_token_t tok;
+
+	ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
+
+	init_vhcache_lookup_token(&tok, NULL);
+
+	if (ct_name == NULL || ct_addr == NULL ||
+	    (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
+	    == NULL ||
+	    (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
+		rw_exit(&vhcache->vhcache_lock);
+		return;
+	}
+
+	/* if at least one path is online, configure the rest asynchronously */
+	if (nonline_paths(cct) > 0) {
+		rw_exit(&vhcache->vhcache_lock);
+		config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
+		return;
+	}
+
+	rw_exit(&vhcache->vhcache_lock);
+
+	for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
+		if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
+			rw_enter(&vhcache->vhcache_lock, RW_READER);
+
+			if ((cct = lookup_vhcache_client(vhcache, ct_name,
+			    ct_addr, &tok)) == NULL) {
+				rw_exit(&vhcache->vhcache_lock);
+				goto out;
+			}
+
+			if (nonline_paths(cct) > 0 &&
+			    pp->phys_path_next != NULL) {
+				rw_exit(&vhcache->vhcache_lock);
+				config_client_paths_async(vhc, ct_name, ct_addr,
+				    pp->phys_path_next, &tok);
+				pp->phys_path_next = NULL;
+				goto out;
+			}
+
+			rw_exit(&vhcache->vhcache_lock);
+		}
+	}
+
+	adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
+out:
+	free_phclient_path_list(pp_head);
+}
+
+static void
+single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
+{
+	mutex_enter(&vhc->vhc_lock);
+	while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
+		cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
+	vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
+	mutex_exit(&vhc->vhc_lock);
+}
+
+static void
+single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
+{
+	mutex_enter(&vhc->vhc_lock);
+	vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
+	cv_broadcast(&vhc->vhc_cv);
+	mutex_exit(&vhc->vhc_lock);
+}
+
+/*
+ * Attach the phci driver instances associated with the vhci:
+ * If root is mounted attach all phci driver instances.
+ * If root is not mounted, attach the instances of only those phci
+ * drivers that have the root support.
+ */
+static void
+attach_phci_drivers(mdi_vhci_config_t *vhc, int root_mounted)
+{
+	int  i;
+	major_t m;
+
+	for (i = 0; i < vhc->vhc_nphci_drivers; i++) {
+		if (root_mounted == 0 &&
+		    vhc->vhc_phci_driver_list[i].phdriver_root_support == 0)
+			continue;
+
+		m = ddi_name_to_major(
+		    vhc->vhc_phci_driver_list[i].phdriver_name);
+		if (m != (major_t)-1) {
+			if (ddi_hold_installed_driver(m) != NULL)
+				ddi_rele_driver(m);
+		}
+	}
+}
+
+/*
+ * Build vhci cache:
+ *
+ * Attach phci driver instances and then drive BUS_CONFIG_ALL on
+ * the phci driver instances. During this process the cache gets built.
+ *
+ * Cache is built fully if the root is mounted (i.e., root_mounted is nonzero).
+ *
+ * If the root is not mounted, phci drivers that do not have root support
+ * are not attached. As a result the cache is built partially. The entries
+ * in the cache reflect only those phci drivers that have root support.
+ */
+static vhcache_build_status_t
+build_vhci_cache(mdi_vhci_config_t *vhc, int root_mounted)
+{
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+
+	rw_enter(&vhcache->vhcache_lock, RW_READER);
+	if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
+		rw_exit(&vhcache->vhcache_lock);
+		return (VHCACHE_NOT_REBUILT);
+	}
+	rw_exit(&vhcache->vhcache_lock);
+
+	attach_phci_drivers(vhc, root_mounted);
+	bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
+	    BUS_CONFIG_ALL, (major_t)-1);
+
+	if (root_mounted) {
+		rw_enter(&vhcache->vhcache_lock, RW_WRITER);
+		vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
+		rw_exit(&vhcache->vhcache_lock);
+		vhcache_dirty(vhc);
+		return (VHCACHE_FULLY_BUILT);
+	} else
+		return (VHCACHE_PARTIALLY_BUILT);
+}
+
+/*
+ * Wait until the root is mounted and then build the vhci cache.
+ */
+static void
+build_vhci_cache_thread(void *arg)
+{
+	mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
+
+	mutex_enter(&vhc->vhc_lock);
+	while (!modrootloaded && !(vhc->vhc_flags & MDI_VHC_EXIT)) {
+		(void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
+		    ddi_get_lbolt() + 10 * TICKS_PER_SECOND);
+	}
+	if (vhc->vhc_flags & MDI_VHC_EXIT)
+		goto out;
+
+	mutex_exit(&vhc->vhc_lock);
+
+	/*
+	 * Now that the root is mounted. So build_vhci_cache() will build
+	 * the full cache.
+	 */
+	(void) build_vhci_cache(vhc, 1);
+
+	mutex_enter(&vhc->vhc_lock);
+out:
+	vhc->vhc_flags &= ~MDI_VHC_BUILD_VHCI_CACHE_THREAD;
+	mutex_exit(&vhc->vhc_lock);
+}
+
+/*
+ * Build vhci cache - a wrapper for build_vhci_cache().
+ *
+ * In a normal case on-disk vhci cache is read and setup during booting.
+ * But if the on-disk vhci cache is not there or deleted or corrupted then
+ * this function sets up the vhci cache.
+ *
+ * The cache is built fully if the root is mounted.
+ *
+ * If the root is not mounted, initially the cache is built reflecting only
+ * those driver entries that have the root support. A separate thread is
+ * created to handle the creation of full cache. This thread will wait
+ * until the root is mounted and then rebuilds the cache.
+ */
+static int
+e_build_vhci_cache(mdi_vhci_config_t *vhc)
+{
+	vhcache_build_status_t rv;
+
+	single_threaded_vhconfig_enter(vhc);
+
+	mutex_enter(&vhc->vhc_lock);
+	if (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD) {
+		if (modrootloaded) {
+			cv_broadcast(&vhc->vhc_cv);
+			/* wait until build vhci cache thread exits */
+			while (vhc->vhc_flags & MDI_VHC_BUILD_VHCI_CACHE_THREAD)
+				cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
+			rv = VHCACHE_FULLY_BUILT;
+		} else {
+			/*
+			 * The presense of MDI_VHC_BUILD_VHCI_CACHE_THREAD
+			 * flag indicates that the cache has already been
+			 * partially built.
+			 */
+			rv = VHCACHE_PARTIALLY_BUILT;
+		}
+
+		mutex_exit(&vhc->vhc_lock);
+		single_threaded_vhconfig_exit(vhc);
+		return (rv);
+	}
+	mutex_exit(&vhc->vhc_lock);
+
+	rv = build_vhci_cache(vhc, modrootloaded);
+
+	if (rv == VHCACHE_PARTIALLY_BUILT) {
+		/*
+		 * create a thread; this thread will wait until the root is
+		 * mounted and then fully rebuilds the cache.
+		 */
+		mutex_enter(&vhc->vhc_lock);
+		vhc->vhc_flags |= MDI_VHC_BUILD_VHCI_CACHE_THREAD;
+		mutex_exit(&vhc->vhc_lock);
+		(void) thread_create(NULL, 0, build_vhci_cache_thread,
+		    vhc, 0, &p0, TS_RUN, minclsyspri);
+	}
+
+	single_threaded_vhconfig_exit(vhc);
+	return (rv);
+}
+
+/*
+ * Generic vhci bus config implementation:
+ *
+ * Parameters
+ *	vdip	vhci dip
+ *	flags	bus config flags
+ *	op	bus config operation
+ *	The remaining parameters are bus config operation specific
+ *
+ * for BUS_CONFIG_ONE
+ *	arg	pointer to name@addr
+ *	child	upon successful return from this function, *child will be
+ *		set to the configured and held devinfo child node of vdip.
+ *	ct_addr	pointer to client address (i.e. GUID)
+ *
+ * for BUS_CONFIG_DRIVER
+ *	arg	major number of the driver
+ *	child and ct_addr parameters are ignored
+ *
+ * for BUS_CONFIG_ALL
+ *	arg, child, and ct_addr parameters are ignored
+ *
+ * Note that for the rest of the bus config operations, this function simply
+ * calls the framework provided default bus config routine.
+ */
+int
+mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
+    void *arg, dev_info_t **child, char *ct_addr)
+{
+	mdi_vhci_t *vh = i_devi_get_vhci(vdip);
+	mdi_vhci_config_t *vhc = vh->vh_config;
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+	vhcache_build_status_t rv = VHCACHE_NOT_REBUILT;
+	char *cp;
+
+	/*
+	 * While bus configuring phcis, the phci driver interactions with MDI
+	 * cause child nodes to be enumerated under the vhci node for which
+	 * they need to ndi_devi_enter the vhci node.
+	 *
+	 * Unfortunately, to avoid the deadlock, we ourself can not wait for
+	 * for the bus config operations on phcis to finish while holding the
+	 * ndi_devi_enter lock. To avoid this deadlock, skip bus configs on
+	 * phcis and call the default framework provided bus config function
+	 * if we are called with ndi_devi_enter lock held.
+	 */
+	if (DEVI_BUSY_OWNED(vdip)) {
+		MDI_DEBUG(2, (CE_NOTE, vdip,
+		    "!MDI: vhci bus config: vhci dip is busy owned\n"));
+		goto default_bus_config;
+	}
+
+	rw_enter(&vhcache->vhcache_lock, RW_READER);
+	if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
+		rw_exit(&vhcache->vhcache_lock);
+		rv = e_build_vhci_cache(vhc);
+		rw_enter(&vhcache->vhcache_lock, RW_READER);
+	}
+
+	switch (op) {
+	case BUS_CONFIG_ONE:
+		/* extract node name */
+		cp = (char *)arg;
+		while (*cp != '\0' && *cp != '@')
+			cp++;
+		if (*cp == '@') {
+			*cp = '\0';
+			config_client_paths(vhc, (char *)arg, ct_addr);
+			/* config_client_paths() releases the cache_lock */
+			*cp = '@';
+		} else
+			rw_exit(&vhcache->vhcache_lock);
+		break;
+
+	case BUS_CONFIG_DRIVER:
+		rw_exit(&vhcache->vhcache_lock);
+		if (rv == VHCACHE_NOT_REBUILT)
+			bus_config_all_phcis(vhcache, flags, op,
+			    (major_t)(uintptr_t)arg);
+		break;
+
+	case BUS_CONFIG_ALL:
+		rw_exit(&vhcache->vhcache_lock);
+		if (rv == VHCACHE_NOT_REBUILT)
+			bus_config_all_phcis(vhcache, flags, op, -1);
+		break;
+
+	default:
+		rw_exit(&vhcache->vhcache_lock);
+		break;
+	}
+
+
+default_bus_config:
+	/*
+	 * All requested child nodes are enumerated under the vhci.
+	 * Now configure them.
+	 */
+	if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
+	    NDI_SUCCESS) {
+		return (MDI_SUCCESS);
+	}
+
+	return (MDI_FAILURE);
+}
+
+/*
+ * Read the on-disk vhci cache into an nvlist for the specified vhci class.
+ */
+static nvlist_t *
+read_on_disk_vhci_cache(char *vhci_class)
+{
+	nvlist_t *nvl;
+	int err;
+	char *filename;
+
+	filename = vhclass2vhcache_filename(vhci_class);
+
+	if ((err = fread_nvlist(filename, &nvl)) == 0) {
+		kmem_free(filename, strlen(filename) + 1);
+		return (nvl);
+	} else if (err == EIO)
+		cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename);
+	else if (err == EINVAL)
+		cmn_err(CE_WARN,
+		    "%s: data file corrupted, will recreate\n", filename);
+
+	kmem_free(filename, strlen(filename) + 1);
+	return (NULL);
+}
+
+/*
+ * Read on-disk vhci cache into nvlists for all vhci classes.
+ * Called during booting by i_ddi_read_devices_files().
+ */
+void
+mdi_read_devices_files(void)
+{
+	int i;
+
+	for (i = 0; i < N_VHCI_CLASSES; i++)
+		vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
+}
+
+/*
+ * Remove all stale entries from vhci cache.
+ */
+static void
+clean_vhcache(mdi_vhci_config_t *vhc)
+{
+	mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
+	mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next;
+	mdi_vhcache_client_t *cct, *cct_head, *cct_next;
+	mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next;
+
+	rw_enter(&vhcache->vhcache_lock, RW_WRITER);
+
+	cct_head = vhcache->vhcache_client_head;
+	vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
+	for (cct = cct_head; cct != NULL; cct = cct_next) {
+		cct_next = cct->cct_next;
+
+		cpi_head = cct->cct_cpi_head;
+		cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
+		for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
+			cpi_next = cpi->cpi_next;
+			if (cpi->cpi_pip != NULL) {
+				ASSERT(cpi->cpi_cphci->cphci_phci != NULL);
+				enqueue_tail_vhcache_pathinfo(cct, cpi);
+			} else
+				free_vhcache_pathinfo(cpi);
+		}
+
+		if (cct->cct_cpi_head != NULL)
+			enqueue_vhcache_client(vhcache, cct);
+		else {
+			(void) mod_hash_destroy(vhcache->vhcache_client_hash,
+			    (mod_hash_key_t)cct->cct_name_addr);
+			free_vhcache_client(cct);
+		}
+	}
+
+	cphci_head = vhcache->vhcache_phci_head;
+	vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
+	for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) {
+		cphci_next = cphci->cphci_next;
+		if (cphci->cphci_phci != NULL)
+			enqueue_vhcache_phci(vhcache, cphci);
+		else
+			free_vhcache_phci(cphci);
+	}
+
+	vhcache->vhcache_clean_time = lbolt64;
+	rw_exit(&vhcache->vhcache_lock);
+	vhcache_dirty(vhc);
+}
+
+/*
+ * Remove all stale entries from vhci cache.
+ * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
+ */
+void
+mdi_clean_vhcache(void)
+{
+	mdi_vhci_t *vh;
+
+	mutex_enter(&mdi_mutex);
+	for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
+		vh->vh_refcnt++;
+		mutex_exit(&mdi_mutex);
+		clean_vhcache(vh->vh_config);
+		mutex_enter(&mdi_mutex);
+		vh->vh_refcnt--;
+	}
+	mutex_exit(&mdi_mutex);
+}
--- a/usr/src/uts/common/sys/ddi_implfuncs.h	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/uts/common/sys/ddi_implfuncs.h	Thu Nov 10 07:14:29 2005 -0800
@@ -255,6 +255,7 @@
  */
 void i_ddi_devices_init(void);
 void i_ddi_read_devices_files(void);
+void i_ddi_clean_devices_files(void);
 
 int i_ddi_devi_get_devid(dev_t, dev_info_t *, ddi_devid_t *);
 
@@ -262,7 +263,6 @@
 
 int e_devid_cache_register(dev_info_t *, ddi_devid_t);
 void e_devid_cache_unregister(dev_info_t *);
-void e_devid_cache_cleanup(void);
 
 int e_devid_cache_to_devt_list(ddi_devid_t, char *, int *, dev_t **);
 void e_devid_cache_free_devt_list(int, dev_t *);
--- a/usr/src/uts/common/sys/devinfo_impl.h	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/uts/common/sys/devinfo_impl.h	Thu Nov 10 07:14:29 2005 -0800
@@ -20,7 +20,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -60,6 +60,7 @@
 #define	DINFOPRIVDATA	(DIIOC | 0x10)	/* include private data */
 #define	DINFOFORCE	(DIIOC | 0x20)	/* force load all drivers */
 #define	DINFOCACHE	(DIIOC | 0x100000) /* use cached data  */
+#define	DINFOCLEANUP	(DIIOC | 0x200000) /* cleanup /etc/devices files */
 
 /* new public flag for the layered drivers framework */
 #define	DINFOLYR	(DIIOC | 0x40)	/* get device layering information */
--- a/usr/src/uts/common/sys/mdi_impldefs.h	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/uts/common/sys/mdi_impldefs.h	Thu Nov 10 07:14:29 2005 -0800
@@ -30,7 +30,10 @@
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #include <sys/note.h>
+#include <sys/types.h>
 #include <sys/sunmdi.h>
+#include <sys/modhash.h>
+#include <sys/callb.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -217,30 +220,6 @@
 } mdi_vhci_ops_t;
 
 /*
- * phci bus config structure - one for for each phci bus config operation that
- * we initiate on behalf of a vhci.
- */
-typedef struct mdi_phci_config {
-	struct mdi_vhci_config	*phc_vhc;	/* vhci bus config */
-	struct mdi_phci_config	*phc_next;	/* next one on this list */
-	dev_info_t	*phc_parent_dip;	/* parent of the phci */
-	char		phc_devnm[MAXNAMELEN];	/* /name@addr of the phci */
-} mdi_phci_config_t;
-
-/* vhci bus config structure - one for vhci instance */
-typedef struct mdi_vhci_config {
-	volatile ddi_bus_config_op_t vhc_op;	/* bus config - op type */
-	major_t			vhc_major;	/* bus config - major */
-	int			vhc_flags;	/* bus config - flags */
-	volatile int64_t	vhc_start_time;	/* bus config start time */
-	int64_t			vhc_cutoff_time; /* end time + some timeout */
-	taskq_t			*vhc_taskq;
-	kcondvar_t		vhc_cv;		/* mutex is mdi_mutex */
-	mdi_phci_config_t	*vhc_phc;	/* phci bus config list */
-	int			vhc_phc_cnt;	/* # of phcs on vhc_phc list */
-} mdi_vhci_config_t;
-
-/*
  * An mdi_vhci structure is created and bound to the devinfo node of every
  * registered vHCI class driver; this happens when a vHCI registers itself from
  * attach(9e).  This structure is unbound and freed when the vHCI unregisters
@@ -267,7 +246,8 @@
 	struct mdi_phci		*vh_phci_tail;	/* pHCI list tail	*/
 	int			vh_client_count;	/* Client count	*/
 	struct client_hash	*vh_client_table;	/* Client hash	*/
-	mdi_vhci_config_t	vh_bus_config;
+	int			vh_refcnt;	/* reference count */
+	struct mdi_vhci_config	*vh_config;	/* vhci config */
 } mdi_vhci_t;
 
 /*
@@ -901,6 +881,126 @@
 	    ((MDI_PI(pip))->pi_phci->ph_flags & MDI_PHCI_FLAGS_SUSPEND)
 
 /*
+ * mdi_vhcache_client, mdi_vhcache_pathinfo, and mdi_vhcache_phci structures
+ * hold the vhci to phci client mappings of the on-disk vhci busconfig cache.
+ */
+
+/* phci structure of vhci cache */
+typedef struct mdi_vhcache_phci {
+	char			*cphci_path;	/* phci path name */
+	uint32_t		cphci_id;	/* used when building nvlist */
+	mdi_phci_t		*cphci_phci;	/* pointer to actual phci */
+	struct mdi_vhcache_phci	*cphci_next;	/* next in vhci phci list */
+} mdi_vhcache_phci_t;
+
+/* pathinfo structure of vhci cache */
+typedef struct mdi_vhcache_pathinfo {
+	char			*cpi_addr;	/* path address */
+	mdi_vhcache_phci_t	*cpi_cphci;	/* phci the path belongs to */
+	struct mdi_pathinfo	*cpi_pip;	/* ptr to actual pathinfo */
+	uint32_t		cpi_flags;	/* see below */
+	struct mdi_vhcache_pathinfo *cpi_next;	/* next path for the client */
+} mdi_vhcache_pathinfo_t;
+
+/*
+ * cpi_flags
+ *
+ * MDI_CPI_HINT_PATH_DOES_NOT_EXIST - set when configuration of the path has
+ * failed.
+ */
+#define	MDI_CPI_HINT_PATH_DOES_NOT_EXIST	0x0001
+
+/* client structure of vhci cache */
+typedef struct mdi_vhcache_client {
+	char			*cct_name_addr;	/* client address */
+	mdi_vhcache_pathinfo_t	*cct_cpi_head;	/* client's path list head */
+	mdi_vhcache_pathinfo_t	*cct_cpi_tail;	/* client's path list tail */
+	struct mdi_vhcache_client *cct_next;	/* next in vhci client list */
+} mdi_vhcache_client_t;
+
+/* vhci cache structure - one for vhci instance */
+typedef struct mdi_vhci_cache {
+	mdi_vhcache_phci_t	*vhcache_phci_head;	/* phci list head */
+	mdi_vhcache_phci_t	*vhcache_phci_tail;	/* phci list tail */
+	mdi_vhcache_client_t	*vhcache_client_head;	/* client list head */
+	mdi_vhcache_client_t	*vhcache_client_tail;	/* client list tail */
+	mod_hash_t		*vhcache_client_hash;	/* client hash */
+	int			vhcache_flags;		/* see below */
+	int64_t			vhcache_clean_time;	/* last clean time */
+	krwlock_t		vhcache_lock;		/* cache lock */
+} mdi_vhci_cache_t;
+
+/* vhcache_flags */
+#define	MDI_VHCI_CACHE_SETUP_DONE	0x0001	/* cache setup completed */
+
+typedef struct mdi_phci_driver_info {
+	char			*phdriver_name;	/* name of the phci driver */
+
+	/* set to non zero if the phci driver supports root device */
+	int			phdriver_root_support;
+} mdi_phci_driver_info_t;
+
+/* vhci bus config structure - one for vhci instance */
+typedef struct mdi_vhci_config {
+	char			*vhc_vhcache_filename;	/* on-disk file name */
+	mdi_vhci_cache_t	vhc_vhcache;		/* vhci cache */
+	mdi_phci_driver_info_t	*vhc_phci_driver_list;	/* ph drv info array */
+	int			vhc_nphci_drivers;	/* # of phci drivers */
+	kmutex_t		vhc_lock;		/* vhci config lock */
+	kcondvar_t		vhc_cv;
+	int			vhc_flags;		/* see below */
+
+	/* flush vhci cache when lbolt reaches vhc_flush_at_ticks */
+	clock_t			vhc_flush_at_ticks;
+
+	/*
+	 * Head and tail of the client list whose paths are being configured
+	 * asynchronously. vhc_acc_count is the number of clients on this list.
+	 * vhc_acc_thrcount is the number threads running to configure
+	 * the paths for these clients.
+	 */
+	struct mdi_async_client_config *vhc_acc_list_head;
+	struct mdi_async_client_config *vhc_acc_list_tail;
+	int			vhc_acc_count;
+	int			vhc_acc_thrcount;
+
+	/* callback id - for flushing the cache during system shutdown */
+	callb_id_t		vhc_cbid;
+} mdi_vhci_config_t;
+
+/* vhc_flags */
+#define	MDI_VHC_SINGLE_THREADED		0x0001	/* config single threaded */
+#define	MDI_VHC_EXIT			0x0002	/* exit all config activity */
+#define	MDI_VHC_VHCACHE_DIRTY		0x0004	/* cache dirty */
+#define	MDI_VHC_VHCACHE_FLUSH_THREAD	0x0008	/* cache flush thead running */
+#define	MDI_VHC_VHCACHE_FLUSH_ERROR	0x0010	/* failed to flush cache */
+#define	MDI_VHC_READONLY_FS		0x0020	/* filesys is readonly */
+#define	MDI_VHC_BUILD_VHCI_CACHE_THREAD	0x0040	/* cachebuild thread running */
+
+typedef struct mdi_phys_path {
+	char			*phys_path;
+	struct mdi_phys_path	*phys_path_next;
+} mdi_phys_path_t;
+
+/*
+ * Lookup tokens are used to cache the result of the vhci cache client lookup
+ * operations (to reduce the number of real lookup operations).
+ */
+typedef struct mdi_vhcache_lookup_token {
+	mdi_vhcache_client_t	*lt_cct;		/* vhcache client */
+	int64_t			lt_cct_lookup_time;	/* last lookup time */
+} mdi_vhcache_lookup_token_t;
+
+/* asynchronous configuration of client paths */
+typedef struct mdi_async_client_config {
+	char			*acc_ct_name;	/* client name */
+	char			*acc_ct_addr;	/* client address */
+	mdi_phys_path_t		*acc_phclient_path_list_head;	/* path head */
+	mdi_vhcache_lookup_token_t acc_token;	/* lookup token */
+	struct mdi_async_client_config *acc_next; /* next in vhci acc list */
+} mdi_async_client_config_t;
+
+/*
  * vHCI driver instance registration/unregistration
  *
  * mdi_vhci_register() is called by a vHCI driver to register itself as the
--- a/usr/src/uts/common/sys/modctl.h	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/uts/common/sys/modctl.h	Thu Nov 10 07:14:29 2005 -0800
@@ -220,7 +220,6 @@
 #define	MODGETDEVPOLICY		27
 #define	MODALLOCPRIV		28
 #define	MODGETDEVPOLICYBYNAME	29
-#define	MODCLEANUP		30
 #define	MODLOADMINORPERM	31
 #define	MODADDMINORPERM		32
 #define	MODREMMINORPERM		33
--- a/usr/src/uts/common/sys/sunmdi.h	Thu Nov 10 01:39:35 2005 -0800
+++ b/usr/src/uts/common/sys/sunmdi.h	Thu Nov 10 07:14:29 2005 -0800
@@ -239,7 +239,7 @@
 void mdi_pi_set_phci_private(mdi_pathinfo_t *, caddr_t);
 
 int mdi_vhci_bus_config(dev_info_t *, uint_t, ddi_bus_config_op_t, void *,
-    dev_info_t **);
+    dev_info_t **, char *);
 
 #endif /* _KERNEL */