6574286 removing a slog doesn't work
authorGeorge Wilson <George.Wilson@Sun.COM>
Mon, 21 Sep 2009 10:38:24 -0700
changeset 10594 986cb68d2347
parent 10593 f28e850593b5
child 10595 1df9a0df7a0b
6574286 removing a slog doesn't work 6856566 zpool import -F can cause panic 6863456 system panic by load_nvlist(spa, spa->spa_config_object, &nv) == 0 while running zfs test suite 6882947 dump_nvlist() should live in libnvpair
usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
usr/src/cmd/fstyp/fstyp.c
usr/src/cmd/power/handlers.c
usr/src/cmd/zdb/zdb.c
usr/src/cmd/zinject/zinject.c
usr/src/cmd/zpool/zpool_main.c
usr/src/cmd/zpool/zpool_util.c
usr/src/cmd/zpool/zpool_util.h
usr/src/cmd/ztest/ztest.c
usr/src/grub/capability
usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h
usr/src/lib/libnvpair/libnvpair.c
usr/src/lib/libnvpair/libnvpair.h
usr/src/lib/libnvpair/mapfile-vers
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_import.c
usr/src/lib/libzfs/common/libzfs_pool.c
usr/src/uts/common/fs/zfs/metaslab.c
usr/src/uts/common/fs/zfs/spa.c
usr/src/uts/common/fs/zfs/spa_config.c
usr/src/uts/common/fs/zfs/spa_misc.c
usr/src/uts/common/fs/zfs/sys/metaslab.h
usr/src/uts/common/fs/zfs/sys/metaslab_impl.h
usr/src/uts/common/fs/zfs/sys/spa.h
usr/src/uts/common/fs/zfs/sys/vdev.h
usr/src/uts/common/fs/zfs/sys/vdev_impl.h
usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
usr/src/uts/common/fs/zfs/sys/zio.h
usr/src/uts/common/fs/zfs/vdev.c
usr/src/uts/common/fs/zfs/vdev_label.c
usr/src/uts/common/fs/zfs/vdev_missing.c
usr/src/uts/common/fs/zfs/zio_inject.c
usr/src/uts/common/sys/fs/zfs.h
--- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c	Mon Sep 21 10:38:24 2009 -0700
@@ -166,7 +166,7 @@
 		return;
 	}
 
-	dev_name = zpool_vdev_name(NULL, zhp, vdev);
+	dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
 
 	/*
 	 * Try to replace each spare, ending when we successfully
--- a/usr/src/cmd/fstyp/fstyp.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/cmd/fstyp/fstyp.c	Mon Sep 21 10:38:24 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -50,7 +50,6 @@
 
 static const char *getmodfsname();
 static char *getexecpathname();
-static void dump_nvlist(nvlist_t *list, int indent);
 static boolean_t dos_to_dev(char *path, char **devpath, int *num);
 static boolean_t find_dos_drive(int fd, int num, off_t *offset);
 static void run_legacy_cmds(int fd, char *device, int vflag);
@@ -177,151 +176,6 @@
 
 }
 
-#define	NVP(elem, type, vtype, ptype, format) { \
-	vtype	value; \
-\
-	(void) nvpair_value_##type(elem, &value); \
-	(void) printf("%*s%s: " format "\n", indent, "", \
-	    nvpair_name(elem), (ptype)value); \
-}
-
-#define	NVPA(elem, type, vtype, ptype, format) { \
-	uint_t	i, count; \
-	vtype	*value;  \
-\
-	(void) nvpair_value_##type(elem, &value, &count); \
-	for (i = 0; i < count; i++) { \
-		(void) printf("%*s%s[%d]: " format "\n", indent, "", \
-		    nvpair_name(elem), i, (ptype)value[i]); \
-	} \
-}
-
-static void
-dump_nvlist(nvlist_t *list, int indent)
-{
-	nvpair_t	*elem = NULL;
-	boolean_t	bool_value;
-	nvlist_t	*nvlist_value;
-	nvlist_t	**nvlist_array_value;
-	uint_t		i, count;
-
-	if (list == NULL) {
-		return;
-	}
-
-	while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
-		switch (nvpair_type(elem)) {
-		case DATA_TYPE_BOOLEAN_VALUE:
-			(void) nvpair_value_boolean_value(elem, &bool_value);
-			(void) printf("%*s%s: %s\n", indent, "",
-			    nvpair_name(elem), bool_value ? "true" : "false");
-			break;
-
-		case DATA_TYPE_BYTE:
-			NVP(elem, byte, uchar_t, int, "%u");
-			break;
-
-		case DATA_TYPE_INT8:
-			NVP(elem, int8, int8_t, int, "%d");
-			break;
-
-		case DATA_TYPE_UINT8:
-			NVP(elem, uint8, uint8_t, int, "%u");
-			break;
-
-		case DATA_TYPE_INT16:
-			NVP(elem, int16, int16_t, int, "%d");
-			break;
-
-		case DATA_TYPE_UINT16:
-			NVP(elem, uint16, uint16_t, int, "%u");
-			break;
-
-		case DATA_TYPE_INT32:
-			NVP(elem, int32, int32_t, long, "%ld");
-			break;
-
-		case DATA_TYPE_UINT32:
-			NVP(elem, uint32, uint32_t, ulong_t, "%lu");
-			break;
-
-		case DATA_TYPE_INT64:
-			NVP(elem, int64, int64_t, longlong_t, "%lld");
-			break;
-
-		case DATA_TYPE_UINT64:
-			NVP(elem, uint64, uint64_t, u_longlong_t, "%llu");
-			break;
-
-		case DATA_TYPE_STRING:
-			NVP(elem, string, char *, char *, "'%s'");
-			break;
-
-		case DATA_TYPE_BYTE_ARRAY:
-			NVPA(elem, byte_array, uchar_t, int, "%u");
-			break;
-
-		case DATA_TYPE_INT8_ARRAY:
-			NVPA(elem, int8_array, int8_t, int, "%d");
-			break;
-
-		case DATA_TYPE_UINT8_ARRAY:
-			NVPA(elem, uint8_array, uint8_t, int, "%u");
-			break;
-
-		case DATA_TYPE_INT16_ARRAY:
-			NVPA(elem, int16_array, int16_t, int, "%d");
-			break;
-
-		case DATA_TYPE_UINT16_ARRAY:
-			NVPA(elem, uint16_array, uint16_t, int, "%u");
-			break;
-
-		case DATA_TYPE_INT32_ARRAY:
-			NVPA(elem, int32_array, int32_t, long, "%ld");
-			break;
-
-		case DATA_TYPE_UINT32_ARRAY:
-			NVPA(elem, uint32_array, uint32_t, ulong_t, "%lu");
-			break;
-
-		case DATA_TYPE_INT64_ARRAY:
-			NVPA(elem, int64_array, int64_t, longlong_t, "%lld");
-			break;
-
-		case DATA_TYPE_UINT64_ARRAY:
-			NVPA(elem, uint64_array, uint64_t, u_longlong_t,
-			    "%llu");
-			break;
-
-		case DATA_TYPE_STRING_ARRAY:
-			NVPA(elem, string_array, char *, char *, "'%s'");
-			break;
-
-		case DATA_TYPE_NVLIST:
-			(void) nvpair_value_nvlist(elem, &nvlist_value);
-			(void) printf("%*s%s:\n", indent, "",
-			    nvpair_name(elem));
-			dump_nvlist(nvlist_value, indent + 4);
-			break;
-
-		case DATA_TYPE_NVLIST_ARRAY:
-			(void) nvpair_value_nvlist_array(elem,
-			    &nvlist_array_value, &count);
-			for (i = 0; i < count; i++) {
-				(void) printf("%*s%s[%u]:\n", indent, "",
-				    nvpair_name(elem), i);
-				dump_nvlist(nvlist_array_value[i], indent + 4);
-			}
-			break;
-
-		default:
-			(void) printf(gettext("bad config type %d for %s\n"),
-			    nvpair_type(elem), nvpair_name(elem));
-		}
-	}
-}
-
 /*
  * If the executable is a fs-specific hardlink, /usr/lib/fs/<fsname>/fstyp,
  * return that fsname; otherwise return NULL.
--- a/usr/src/cmd/power/handlers.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/cmd/power/handlers.c	Mon Sep 21 10:38:24 2009 -0700
@@ -1043,7 +1043,7 @@
 		libzfs_fini(lzfs);
 		return (-1);
 	}
-	vname = zpool_vdev_name(lzfs, zpool_handle, child[0]);
+	vname = zpool_vdev_name(lzfs, zpool_handle, child[0], B_FALSE);
 	if (vname == NULL) {
 		mesg(MERR, "couldn't determine vdev name\n");
 		zpool_close(zpool_handle);
--- a/usr/src/cmd/zdb/zdb.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/cmd/zdb/zdb.c	Mon Sep 21 10:38:24 2009 -0700
@@ -146,68 +146,6 @@
 	exit(1);
 }
 
-static void
-dump_nvlist(nvlist_t *list, int indent)
-{
-	nvpair_t *elem = NULL;
-
-	while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
-		switch (nvpair_type(elem)) {
-		case DATA_TYPE_STRING:
-			{
-				char *value;
-
-				VERIFY(nvpair_value_string(elem, &value) == 0);
-				(void) printf("%*s%s='%s'\n", indent, "",
-				    nvpair_name(elem), value);
-			}
-			break;
-
-		case DATA_TYPE_UINT64:
-			{
-				uint64_t value;
-
-				VERIFY(nvpair_value_uint64(elem, &value) == 0);
-				(void) printf("%*s%s=%llu\n", indent, "",
-				    nvpair_name(elem), (u_longlong_t)value);
-			}
-			break;
-
-		case DATA_TYPE_NVLIST:
-			{
-				nvlist_t *value;
-
-				VERIFY(nvpair_value_nvlist(elem, &value) == 0);
-				(void) printf("%*s%s\n", indent, "",
-				    nvpair_name(elem));
-				dump_nvlist(value, indent + 4);
-			}
-			break;
-
-		case DATA_TYPE_NVLIST_ARRAY:
-			{
-				nvlist_t **value;
-				uint_t c, count;
-
-				VERIFY(nvpair_value_nvlist_array(elem, &value,
-				    &count) == 0);
-
-				for (c = 0; c < count; c++) {
-					(void) printf("%*s%s[%u]\n", indent, "",
-					    nvpair_name(elem), c);
-					dump_nvlist(value[c], indent + 8);
-				}
-			}
-			break;
-
-		default:
-
-			(void) printf("bad config type %d for %s\n",
-			    nvpair_type(elem), nvpair_name(elem));
-		}
-	}
-}
-
 /* ARGSUSED */
 static void
 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
--- a/usr/src/cmd/zinject/zinject.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/cmd/zinject/zinject.c	Mon Sep 21 10:38:24 2009 -0700
@@ -222,6 +222,11 @@
 	    "\t\tClear the particular record (if given a numeric ID), or\n"
 	    "\t\tall records if 'all' is specificed.\n"
 	    "\n"
+	    "\tzinject -p <function name> pool\n"
+	    "\t\tInject a panic fault at the specified function. Only \n"
+	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
+	    "\t\tspa_vdev_exit() will trigger a panic.\n"
+	    "\n"
 	    "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
 	    "\t\tInject a fault into a particular device or the device's\n"
 	    "\t\tlabel.  Label injection can either be 'nvlist' or 'uber'.\n"
@@ -295,7 +300,7 @@
 {
 	int *count = data;
 
-	if (record->zi_guid != 0)
+	if (record->zi_guid != 0 || record->zi_func[0] != '\0')
 		return (0);
 
 	if (*count == 0) {
@@ -327,7 +332,7 @@
 {
 	int *count = data;
 
-	if (record->zi_guid == 0)
+	if (record->zi_guid == 0 || record->zi_func[0] != '\0')
 		return (0);
 
 	if (*count == 0) {
@@ -343,6 +348,27 @@
 	return (0);
 }
 
+static int
+print_panic_handler(int id, const char *pool, zinject_record_t *record,
+    void *data)
+{
+	int *count = data;
+
+	if (record->zi_func[0] == '\0')
+		return (0);
+
+	if (*count == 0) {
+		(void) printf("%3s  %-15s  %s\n", "ID", "POOL", "FUNCTION");
+		(void) printf("---  ---------------  ----------------\n");
+	}
+
+	*count += 1;
+
+	(void) printf("%3d  %-15s  %s\n", id, pool, record->zi_func);
+
+	return (0);
+}
+
 /*
  * Print all registered error handlers.  Returns the number of handlers
  * registered.
@@ -356,6 +382,9 @@
 	(void) printf("\n");
 	count = 0;
 	(void) iter_handlers(print_data_handler, &count);
+	(void) printf("\n");
+	count = 0;
+	(void) iter_handlers(print_panic_handler, &count);
 
 	return (count);
 }
@@ -443,6 +472,9 @@
 		if (record->zi_guid) {
 			(void) printf("  vdev: %llx\n",
 			    (u_longlong_t)record->zi_guid);
+		} else if (record->zi_func[0] != '\0') {
+			(void) printf("  panic function: %s\n",
+			    record->zi_func);
 		} else {
 			(void) printf("objset: %llu\n",
 			    (u_longlong_t)record->zi_objset);
@@ -514,7 +546,7 @@
 		return (0);
 	}
 
-	while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
+	while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:p:")) != -1) {
 		switch (c) {
 		case 'a':
 			flags |= ZINJECT_FLUSH_ARC;
@@ -569,6 +601,10 @@
 		case 'm':
 			domount = 1;
 			break;
+		case 'p':
+			(void) strlcpy(record.zi_func, optarg,
+			    sizeof (record.zi_func));
+			break;
 		case 'q':
 			quiet = 1;
 			break;
@@ -617,7 +653,7 @@
 		 * '-c' is invalid with any other options.
 		 */
 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
-		    level != 0) {
+		    level != 0 || record.zi_func[0] != '\0') {
 			(void) fprintf(stderr, "cancel (-c) incompatible with "
 			    "any other options\n");
 			usage();
@@ -649,7 +685,7 @@
 		 * for doing injection, so handle it separately here.
 		 */
 		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
-		    level != 0) {
+		    level != 0 || record.zi_func[0] != '\0') {
 			(void) fprintf(stderr, "device (-d) incompatible with "
 			    "data error injection\n");
 			usage();
@@ -677,7 +713,8 @@
 		if (!error)
 			error = ENXIO;
 	} else if (raw != NULL) {
-		if (range != NULL || type != TYPE_INVAL || level != 0) {
+		if (range != NULL || type != TYPE_INVAL || level != 0 ||
+		    record.zi_func[0] != '\0') {
 			(void) fprintf(stderr, "raw (-b) format with "
 			    "any other options\n");
 			usage();
@@ -704,10 +741,28 @@
 			return (1);
 		if (!error)
 			error = EIO;
+	} else if (record.zi_func[0] != '\0') {
+		if (raw != NULL || range != NULL || type != TYPE_INVAL ||
+		    level != 0 || device != NULL) {
+			(void) fprintf(stderr, "panic (-p) incompatible with "
+			    "other options\n");
+			usage();
+			return (2);
+		}
+
+		if (argc != 1) {
+			(void) fprintf(stderr, "panic (-p) injection requires "
+			    "a single pool name\n");
+			usage();
+			return (2);
+		}
+
+		(void) strcpy(pool, argv[0]);
+		dataset[0] = '\0';
 	} else if (type == TYPE_INVAL) {
 		if (flags == 0) {
 			(void) fprintf(stderr, "at least one of '-b', '-d', "
-			    "'-t', '-a', or '-u' must be specified\n");
+			    "'-t', '-a', '-p', or '-u' must be specified\n");
 			usage();
 			return (2);
 		}
--- a/usr/src/cmd/zpool/zpool_main.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/cmd/zpool/zpool_main.c	Mon Sep 21 10:38:24 2009 -0700
@@ -343,7 +343,7 @@
 		if ((is_log && !print_logs) || (!is_log && print_logs))
 			continue;
 
-		vname = zpool_vdev_name(g_zfs, zhp, child[c]);
+		vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE);
 		print_vdev_tree(zhp, vname, child[c], indent + 2,
 		    B_FALSE);
 		free(vname);
@@ -944,7 +944,7 @@
 static int
 max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
 {
-	char *name = zpool_vdev_name(g_zfs, zhp, nv);
+	char *name = zpool_vdev_name(g_zfs, zhp, nv, B_TRUE);
 	nvlist_t **child;
 	uint_t c, children;
 	int ret;
@@ -1144,14 +1144,16 @@
 	(void) printf("\n");
 
 	for (c = 0; c < children; c++) {
-		uint64_t is_log = B_FALSE;
-
-		/* Don't print logs here */
+		uint64_t islog = B_FALSE, ishole = B_FALSE;
+
+		/* Don't print logs or holes here */
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
-		    &is_log);
-		if (is_log)
+		    &islog);
+		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
+		    &ishole);
+		if (islog || ishole)
 			continue;
-		vname = zpool_vdev_name(g_zfs, zhp, child[c]);
+		vname = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE);
 		print_status_config(zhp, vname, child[c],
 		    namewidth, depth + 2, isspare);
 		free(vname);
@@ -1172,7 +1174,8 @@
 	char *type, *vname;
 
 	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
-	if (strcmp(type, VDEV_TYPE_MISSING) == 0)
+	if (strcmp(type, VDEV_TYPE_MISSING) == 0 ||
+	    strcmp(type, VDEV_TYPE_HOLE) == 0)
 		return;
 
 	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
@@ -1224,7 +1227,7 @@
 		if (is_log)
 			continue;
 
-		vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+		vname = zpool_vdev_name(g_zfs, NULL, child[c], B_TRUE);
 		print_import_config(vname, child[c], namewidth, depth + 2);
 		free(vname);
 	}
@@ -1233,7 +1236,7 @@
 	    &child, &children) == 0) {
 		(void) printf(gettext("\tcache\n"));
 		for (c = 0; c < children; c++) {
-			vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+			vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE);
 			(void) printf("\t  %s\n", vname);
 			free(vname);
 		}
@@ -1243,7 +1246,7 @@
 	    &child, &children) == 0) {
 		(void) printf(gettext("\tspares\n"));
 		for (c = 0; c < children; c++) {
-			vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+			vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE);
 			(void) printf("\t  %s\n", vname);
 			free(vname);
 		}
@@ -1278,7 +1281,7 @@
 		    &is_log);
 		if (!is_log)
 			continue;
-		name = zpool_vdev_name(g_zfs, zhp, child[c]);
+		name = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE);
 		if (verbose)
 			print_status_config(zhp, name, child[c], namewidth,
 			    2, B_FALSE);
@@ -1964,7 +1967,7 @@
 		return;
 
 	for (c = 0; c < children; c++) {
-		vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
+		vname = zpool_vdev_name(g_zfs, zhp, newchild[c], B_FALSE);
 		print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
 		    newchild[c], cb, depth + 2);
 		free(vname);
@@ -1985,7 +1988,8 @@
 		(void) printf("%-*s      -      -      -      -      -      "
 		    "-\n", cb->cb_namewidth, "cache");
 		for (c = 0; c < children; c++) {
-			vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
+			vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
+			    B_FALSE);
 			print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
 			    newchild[c], cb, depth + 2);
 			free(vname);
@@ -2996,7 +3000,7 @@
 	(void) printf(gettext("\tspares\n"));
 
 	for (i = 0; i < nspares; i++) {
-		name = zpool_vdev_name(g_zfs, zhp, spares[i]);
+		name = zpool_vdev_name(g_zfs, zhp, spares[i], B_FALSE);
 		print_status_config(zhp, name, spares[i],
 		    namewidth, 2, B_TRUE);
 		free(name);
@@ -3016,7 +3020,7 @@
 	(void) printf(gettext("\tcache\n"));
 
 	for (i = 0; i < nl2cache; i++) {
-		name = zpool_vdev_name(g_zfs, zhp, l2cache[i]);
+		name = zpool_vdev_name(g_zfs, zhp, l2cache[i], B_FALSE);
 		print_status_config(zhp, name, l2cache[i],
 		    namewidth, 2, B_FALSE);
 		free(name);
@@ -3573,6 +3577,7 @@
 		(void) printf(gettext(" 16  stmf property support\n"));
 		(void) printf(gettext(" 17  Triple-parity RAID-Z\n"));
 		(void) printf(gettext(" 18  snapshot user holds\n"));
+		(void) printf(gettext(" 19  Log device removal\n"));
 		(void) printf(gettext("For more information on a particular "
 		    "version, including supported releases, see:\n\n"));
 		(void) printf("http://www.opensolaris.org/os/community/zfs/"
--- a/usr/src/cmd/zpool/zpool_util.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/cmd/zpool/zpool_util.c	Mon Sep 21 10:38:24 2009 -0700
@@ -49,22 +49,6 @@
 }
 
 /*
- * Same as above, but for strdup()
- */
-char *
-zpool_safe_strdup(const char *str)
-{
-	char *ret;
-
-	if ((ret = strdup(str)) == NULL) {
-		(void) fprintf(stderr, "internal error: out of memory\n");
-		exit(1);
-	}
-
-	return (ret);
-}
-
-/*
  * Display an out of memory error message and abort the current program.
  */
 void
--- a/usr/src/cmd/zpool/zpool_util.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/cmd/zpool/zpool_util.h	Mon Sep 21 10:38:24 2009 -0700
@@ -37,7 +37,6 @@
  * Basic utility functions
  */
 void *safe_malloc(size_t);
-char *zpool_safe_strdup(const char *);
 void zpool_no_memory(void);
 uint_t num_logs(nvlist_t *nv);
 
--- a/usr/src/cmd/ztest/ztest.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/cmd/ztest/ztest.c	Mon Sep 21 10:38:24 2009 -0700
@@ -92,6 +92,7 @@
 #include <sys/vdev_impl.h>
 #include <sys/vdev_file.h>
 #include <sys/spa_impl.h>
+#include <sys/metaslab_impl.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
 #include <sys/refcount.h>
@@ -231,7 +232,7 @@
 typedef struct ztest_shared {
 	mutex_t		zs_vdev_lock;
 	rwlock_t	zs_name_lock;
-	uint64_t	zs_vdev_primaries;
+	uint64_t	zs_vdev_next_leaf;
 	uint64_t	zs_vdev_aux;
 	uint64_t	zs_enospc_count;
 	hrtime_t	zs_start_time;
@@ -558,7 +559,7 @@
 			(void) sprintf(path, ztest_aux_template,
 			    zopt_dir, zopt_pool, aux, vdev);
 		} else {
-			vdev = ztest_shared->zs_vdev_primaries++;
+			vdev = ztest_shared->zs_vdev_next_leaf++;
 			(void) sprintf(path, ztest_dev_template,
 			    zopt_dir, zopt_pool, vdev);
 		}
@@ -850,6 +851,26 @@
 }
 
 /*
+ * Find the first available hole which can be used as a top-level.
+ */
+int
+find_vdev_hole(spa_t *spa)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+	int c;
+
+	ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV);
+
+	for (c = 0; c < rvd->vdev_children; c++) {
+		vdev_t *cvd = rvd->vdev_child[c];
+
+		if (cvd->vdev_ishole)
+			break;
+	}
+	return (c);
+}
+
+/*
  * Verify that vdev_add() works as expected.
  */
 void
@@ -857,6 +878,7 @@
 {
 	spa_t *spa = za->za_spa;
 	uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+	uint64_t guid;
 	nvlist_t *nvroot;
 	int error;
 
@@ -864,26 +886,52 @@
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
-	ztest_shared->zs_vdev_primaries =
-	    spa->spa_root_vdev->vdev_children * leaves;
-
-	spa_config_exit(spa, SCL_VDEV, FTAG);
+	ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
 
 	/*
-	 * Make 1/4 of the devices be log devices.
+	 * If we have slogs then remove them 1/4 of the time.
 	 */
-	nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
-	    ztest_random(4) == 0, zopt_raidz, zopt_mirrors, 1);
-
-	error = spa_vdev_add(spa, nvroot);
-	nvlist_free(nvroot);
+	if (spa_has_slogs(spa) && ztest_random(4) == 0) {
+		/*
+		 * Grab the guid from the head of the log class rotor.
+		 */
+		guid = spa->spa_log_class->mc_rotor->mg_vd->vdev_guid;
+
+		spa_config_exit(spa, SCL_VDEV, FTAG);
+
+		/*
+		 * We have to grab the zs_name_lock as writer to
+		 * prevent a race between removing a slog (dmu_objset_find)
+		 * and destroying a dataset. Removing the slog will
+		 * grab a reference on the dataset which may cause
+		 * dmu_objset_destroy() to fail with EBUSY thus
+		 * leaving the dataset in an inconsistent state.
+		 */
+		(void) rw_wrlock(&ztest_shared->zs_name_lock);
+		error = spa_vdev_remove(spa, guid, B_FALSE);
+		(void) rw_unlock(&ztest_shared->zs_name_lock);
+
+		if (error && error != EEXIST)
+			fatal(0, "spa_vdev_remove() = %d", error);
+	} else {
+		spa_config_exit(spa, SCL_VDEV, FTAG);
+
+		/*
+		 * Make 1/4 of the devices be log devices.
+		 */
+		nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
+		    ztest_random(4) == 0, zopt_raidz, zopt_mirrors, 1);
+
+		error = spa_vdev_add(spa, nvroot);
+		nvlist_free(nvroot);
+
+		if (error == ENOSPC)
+			ztest_record_enospc("spa_vdev_add");
+		else if (error != 0)
+			fatal(0, "spa_vdev_add() = %d", error);
+	}
 
 	(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
-
-	if (error == ENOSPC)
-		ztest_record_enospc("spa_vdev_add");
-	else if (error != 0)
-		fatal(0, "spa_vdev_add() = %d", error);
 }
 
 /*
@@ -4004,7 +4052,7 @@
 	 * Create the storage pool.
 	 */
 	(void) spa_destroy(pool);
-	ztest_shared->zs_vdev_primaries = 0;
+	ztest_shared->zs_vdev_next_leaf = 0;
 	nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
 	    0, zopt_raidz, zopt_mirrors, 1);
 	error = spa_create(pool, nvroot, NULL, NULL, NULL);
--- a/usr/src/grub/capability	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/grub/capability	Mon Sep 21 10:38:24 2009 -0700
@@ -40,7 +40,7 @@
 # This file and the associated version are Solaris specific and are
 # not a part of the open source distribution of GRUB.
 #
-VERSION=11
+VERSION=12
 dboot
 xVM
 zfs
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Mon Sep 21 10:38:24 2009 -0700
@@ -27,7 +27,7 @@
 /*
  * On-disk version number.
  */
-#define	SPA_VERSION			18ULL
+#define	SPA_VERSION			19ULL
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
@@ -61,6 +61,9 @@
 #define	ZPOOL_CONFIG_NPARITY		"nparity"
 #define	ZPOOL_CONFIG_PHYS_PATH		"phys_path"
 #define	ZPOOL_CONFIG_L2CACHE		"l2cache"
+#define	ZPOOL_CONFIG_HOLE_ARRAY		"hole_array"
+#define	ZPOOL_CONFIG_VDEV_CHILDREN	"vdev_children"
+#define	ZPOOL_CONFIG_IS_HOLE		"is_hole"
 /*
  * The persistent vdev state is stored as separate values rather than a single
  * 'vdev_state' entry.  This is because a device can be in multiple states, such
@@ -78,6 +81,7 @@
 #define	VDEV_TYPE_DISK			"disk"
 #define	VDEV_TYPE_FILE			"file"
 #define	VDEV_TYPE_MISSING		"missing"
+#define	VDEV_TYPE_HOLE			"hole"
 #define	VDEV_TYPE_SPARE			"spare"
 #define	VDEV_TYPE_L2CACHE		"l2cache"
 
--- a/usr/src/lib/libnvpair/libnvpair.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/lib/libnvpair/libnvpair.c	Mon Sep 21 10:38:24 2009 -0700
@@ -19,14 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <unistd.h>
 #include <strings.h>
+#include <libintl.h>
 #include <sys/types.h>
 #include <sys/inttypes.h>
 #include "libnvpair.h"
@@ -272,6 +271,156 @@
 	nvlist_print_with_indent(fp, nvl, 0);
 }
 
+
+#define	NVP(elem, type, vtype, ptype, format) { \
+	vtype	value; \
+\
+	(void) nvpair_value_##type(elem, &value); \
+	(void) printf("%*s%s: " format "\n", indent, "", \
+	    nvpair_name(elem), (ptype)value); \
+}
+
+#define	NVPA(elem, type, vtype, ptype, format) { \
+	uint_t	i, count; \
+	vtype	*value;  \
+\
+	(void) nvpair_value_##type(elem, &value, &count); \
+	for (i = 0; i < count; i++) { \
+		(void) printf("%*s%s[%d]: " format "\n", indent, "", \
+		    nvpair_name(elem), i, (ptype)value[i]); \
+	} \
+}
+
+/*
+ * Similar to nvlist_print() but handles arrays slightly differently.
+ */
+void
+dump_nvlist(nvlist_t *list, int indent)
+{
+	nvpair_t	*elem = NULL;
+	boolean_t	bool_value;
+	nvlist_t	*nvlist_value;
+	nvlist_t	**nvlist_array_value;
+	uint_t		i, count;
+
+	if (list == NULL) {
+		return;
+	}
+
+	while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
+		switch (nvpair_type(elem)) {
+		case DATA_TYPE_BOOLEAN_VALUE:
+			(void) nvpair_value_boolean_value(elem, &bool_value);
+			(void) printf("%*s%s: %s\n", indent, "",
+			    nvpair_name(elem), bool_value ? "true" : "false");
+			break;
+
+		case DATA_TYPE_BYTE:
+			NVP(elem, byte, uchar_t, int, "%u");
+			break;
+
+		case DATA_TYPE_INT8:
+			NVP(elem, int8, int8_t, int, "%d");
+			break;
+
+		case DATA_TYPE_UINT8:
+			NVP(elem, uint8, uint8_t, int, "%u");
+			break;
+
+		case DATA_TYPE_INT16:
+			NVP(elem, int16, int16_t, int, "%d");
+			break;
+
+		case DATA_TYPE_UINT16:
+			NVP(elem, uint16, uint16_t, int, "%u");
+			break;
+
+		case DATA_TYPE_INT32:
+			NVP(elem, int32, int32_t, long, "%ld");
+			break;
+
+		case DATA_TYPE_UINT32:
+			NVP(elem, uint32, uint32_t, ulong_t, "%lu");
+			break;
+
+		case DATA_TYPE_INT64:
+			NVP(elem, int64, int64_t, longlong_t, "%lld");
+			break;
+
+		case DATA_TYPE_UINT64:
+			NVP(elem, uint64, uint64_t, u_longlong_t, "%llu");
+			break;
+
+		case DATA_TYPE_STRING:
+			NVP(elem, string, char *, char *, "'%s'");
+			break;
+
+		case DATA_TYPE_BYTE_ARRAY:
+			NVPA(elem, byte_array, uchar_t, int, "%u");
+			break;
+
+		case DATA_TYPE_INT8_ARRAY:
+			NVPA(elem, int8_array, int8_t, int, "%d");
+			break;
+
+		case DATA_TYPE_UINT8_ARRAY:
+			NVPA(elem, uint8_array, uint8_t, int, "%u");
+			break;
+
+		case DATA_TYPE_INT16_ARRAY:
+			NVPA(elem, int16_array, int16_t, int, "%d");
+			break;
+
+		case DATA_TYPE_UINT16_ARRAY:
+			NVPA(elem, uint16_array, uint16_t, int, "%u");
+			break;
+
+		case DATA_TYPE_INT32_ARRAY:
+			NVPA(elem, int32_array, int32_t, long, "%ld");
+			break;
+
+		case DATA_TYPE_UINT32_ARRAY:
+			NVPA(elem, uint32_array, uint32_t, ulong_t, "%lu");
+			break;
+
+		case DATA_TYPE_INT64_ARRAY:
+			NVPA(elem, int64_array, int64_t, longlong_t, "%lld");
+			break;
+
+		case DATA_TYPE_UINT64_ARRAY:
+			NVPA(elem, uint64_array, uint64_t, u_longlong_t,
+			    "%llu");
+			break;
+
+		case DATA_TYPE_STRING_ARRAY:
+			NVPA(elem, string_array, char *, char *, "'%s'");
+			break;
+
+		case DATA_TYPE_NVLIST:
+			(void) nvpair_value_nvlist(elem, &nvlist_value);
+			(void) printf("%*s%s:\n", indent, "",
+			    nvpair_name(elem));
+			dump_nvlist(nvlist_value, indent + 4);
+			break;
+
+		case DATA_TYPE_NVLIST_ARRAY:
+			(void) nvpair_value_nvlist_array(elem,
+			    &nvlist_array_value, &count);
+			for (i = 0; i < count; i++) {
+				(void) printf("%*s%s[%u]:\n", indent, "",
+				    nvpair_name(elem), i);
+				dump_nvlist(nvlist_array_value[i], indent + 4);
+			}
+			break;
+
+		default:
+			(void) printf(dgettext(TEXT_DOMAIN, "bad config type "
+			    "%d for %s\n"), nvpair_type(elem),
+			    nvpair_name(elem));
+		}
+	}
+}
+
 /*
  * Determine if string 'value' matches 'nvp' value.  The 'value' string is
  * converted, depending on the type of 'nvp', prior to match.  For numeric
--- a/usr/src/lib/libnvpair/libnvpair.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/lib/libnvpair/libnvpair.h	Mon Sep 21 10:38:24 2009 -0700
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_LIBNVPAIR_H
 #define	_LIBNVPAIR_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/nvpair.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -40,6 +38,7 @@
 void nvlist_print(FILE *, nvlist_t *);
 int nvpair_value_match(nvpair_t *, int, char *, char **);
 int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, char **);
+void dump_nvlist(nvlist_t *, int);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/lib/libnvpair/mapfile-vers	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/lib/libnvpair/mapfile-vers	Mon Sep 21 10:38:24 2009 -0700
@@ -166,6 +166,7 @@
 
 SUNWprivate_1.1 {
     global:
+	dump_nvlist;
 	nvlist_add_hrtime;
 	nvlist_lookup_hrtime;
 	nvlist_print;
--- a/usr/src/lib/libzfs/common/libzfs.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/lib/libzfs/common/libzfs.h	Mon Sep 21 10:38:24 2009 -0700
@@ -332,7 +332,8 @@
  */
 struct zfs_cmd;
 
-extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
+extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
+    boolean_t verbose);
 extern int zpool_upgrade(zpool_handle_t *, uint64_t);
 extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
 extern void zpool_set_history_str(const char *subcommand, int argc,
--- a/usr/src/lib/libzfs/common/libzfs_import.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/lib/libzfs/common/libzfs_import.c	Mon Sep 21 10:38:24 2009 -0700
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * Pool import support functions.
  *
@@ -388,8 +386,6 @@
 	}
 
 	if (err) {
-		(void) zpool_standard_error(hdl, errno,
-		    dgettext(TEXT_DOMAIN, "cannot discover pools"));
 		zcmd_free_nvlists(&zc);
 		return (NULL);
 	}
@@ -404,6 +400,21 @@
 }
 
 /*
+ * Determine if the vdev id is a hole in the namespace.
+ */
+boolean_t
+vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
+{
+	for (int c = 0; c < holes; c++) {
+
+		/* Top-level is a hole */
+		if (hole_array[c] == id)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+/*
  * Convert our list of pools into the definitive set of configurations.  We
  * start by picking the best config for each toplevel vdev.  Once that's done,
  * we assemble the toplevel vdevs into a full config for the pool.  We make a
@@ -425,17 +436,20 @@
 	uint64_t version, guid;
 	uint_t children = 0;
 	nvlist_t **child = NULL;
+	uint_t holes;
+	uint64_t *hole_array, max_id;
 	uint_t c;
 	boolean_t isactive;
 	uint64_t hostid;
 	nvlist_t *nvl;
 	boolean_t found_one = B_FALSE;
+	boolean_t valid_top_config = B_FALSE;
 
 	if (nvlist_alloc(&ret, 0, 0) != 0)
 		goto nomem;
 
 	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
-		uint64_t id;
+		uint64_t id, max_txg = 0;
 
 		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
 			goto nomem;
@@ -463,6 +477,42 @@
 				}
 			}
 
+			/*
+			 * We rely on the fact that the max txg for the
+			 * pool will contain the most up-to-date information
+			 * about the valid top-levels in the vdev namespace.
+			 */
+			if (best_txg > max_txg) {
+				(void) nvlist_remove(config,
+				    ZPOOL_CONFIG_VDEV_CHILDREN,
+				    DATA_TYPE_UINT64);
+				(void) nvlist_remove(config,
+				    ZPOOL_CONFIG_HOLE_ARRAY,
+				    DATA_TYPE_UINT64_ARRAY);
+
+				max_txg = best_txg;
+				hole_array = NULL;
+				holes = 0;
+				max_id = 0;
+				valid_top_config = B_FALSE;
+
+				if (nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
+					verify(nvlist_add_uint64(config,
+					    ZPOOL_CONFIG_VDEV_CHILDREN,
+					    max_id) == 0);
+					valid_top_config = B_TRUE;
+				}
+
+				if (nvlist_lookup_uint64_array(tmp,
+				    ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
+				    &holes) == 0) {
+					verify(nvlist_add_uint64_array(config,
+					    ZPOOL_CONFIG_HOLE_ARRAY,
+					    hole_array, holes) == 0);
+				}
+			}
+
 			if (!config_seen) {
 				/*
 				 * Copy the relevant pieces of data to the pool
@@ -522,6 +572,7 @@
 			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
 			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
 			    &id) == 0);
+
 			if (id >= children) {
 				nvlist_t **newchild;
 
@@ -542,17 +593,82 @@
 
 		}
 
+		/*
+		 * If we have information about all the top-levels then
+		 * clean up the nvlist which we've constructed. This
+		 * means removing any extraneous devices that are
+		 * beyond the valid range or adding devices to the end
+		 * of our array which appear to be missing.
+		 */
+		if (valid_top_config) {
+			if (max_id < children) {
+				for (c = max_id; c < children; c++)
+					nvlist_free(child[c]);
+				children = max_id;
+			} else if (max_id > children) {
+				nvlist_t **newchild;
+
+				newchild = zfs_alloc(hdl, (max_id) *
+				    sizeof (nvlist_t *));
+				if (newchild == NULL)
+					goto nomem;
+
+				for (c = 0; c < children; c++)
+					newchild[c] = child[c];
+
+				free(child);
+				child = newchild;
+				children = max_id;
+			}
+		}
+
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 		    &guid) == 0);
 
 		/*
+		 * The vdev namespace may contain holes as a result of
+		 * device removal. We must add them back into the vdev
+		 * tree before we process any missing devices.
+		 */
+		if (holes > 0) {
+			ASSERT(valid_top_config);
+
+			for (c = 0; c < children; c++) {
+				nvlist_t *holey;
+
+				if (child[c] != NULL ||
+				    !vdev_is_hole(hole_array, holes, c))
+					continue;
+
+				if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
+				    0) != 0)
+					goto nomem;
+
+				/*
+				 * Holes in the namespace are treated as
+				 * "hole" top-level vdevs and have a
+				 * special flag set on them.
+				 */
+				if (nvlist_add_string(holey,
+				    ZPOOL_CONFIG_TYPE,
+				    VDEV_TYPE_HOLE) != 0 ||
+				    nvlist_add_uint64(holey,
+				    ZPOOL_CONFIG_ID, c) != 0 ||
+				    nvlist_add_uint64(holey,
+				    ZPOOL_CONFIG_GUID, 0ULL) != 0)
+					goto nomem;
+				child[c] = holey;
+			}
+		}
+
+		/*
 		 * Look for any missing top-level vdevs.  If this is the case,
 		 * create a faked up 'missing' vdev as a placeholder.  We cannot
 		 * simply compress the child array, because the kernel performs
 		 * certain checks to make sure the vdev IDs match their location
 		 * in the configuration.
 		 */
-		for (c = 0; c < children; c++)
+		for (c = 0; c < children; c++) {
 			if (child[c] == NULL) {
 				nvlist_t *missing;
 				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
@@ -570,6 +686,7 @@
 				}
 				child[c] = missing;
 			}
+		}
 
 		/*
 		 * Put all of this pool's top-level vdevs into a root vdev.
@@ -636,8 +753,11 @@
 			continue;
 		}
 
-		if ((nvl = refresh_config(hdl, config)) == NULL)
-			goto error;
+		if ((nvl = refresh_config(hdl, config)) == NULL) {
+			nvlist_free(config);
+			config = NULL;
+			continue;
+		}
 
 		nvlist_free(config);
 		config = nvl;
--- a/usr/src/lib/libzfs/common/libzfs_pool.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c	Mon Sep 21 10:38:24 2009 -0700
@@ -1063,7 +1063,8 @@
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "device '%s' contains an EFI label and "
 				    "cannot be used on root pools."),
-				    zpool_vdev_name(hdl, NULL, spares[s]));
+				    zpool_vdev_name(hdl, NULL, spares[s],
+				    B_FALSE));
 				return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
 			}
 		}
@@ -1419,8 +1420,9 @@
 
 		/*
 		 * Search for the requested value. We special case the search
-		 * for ZPOOL_CONFIG_PATH when it's a wholedisk. Otherwise,
-		 * all other searches are simple string compares.
+		 * for ZPOOL_CONFIG_PATH when it's a wholedisk and when
+		 * Looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
+		 * Otherwise, all other searches are simple string compares.
 		 */
 		if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && val) {
 			uint64_t wholedisk = 0;
@@ -1437,6 +1439,52 @@
 					return (nv);
 				break;
 			}
+		} else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
+			char *type, *idx, *end, *p;
+			uint64_t id, vdev_id;
+
+			/*
+			 * Determine our vdev type, keeping in mind
+			 * that the srchval is composed of a type and
+			 * vdev id pair (i.e. mirror-4).
+			 */
+			if ((type = strdup(srchval)) == NULL)
+				return (NULL);
+
+			if ((p = strrchr(type, '-')) == NULL) {
+				free(type);
+				break;
+			}
+			idx = p + 1;
+			*p = '\0';
+
+			/*
+			 * If the types don't match then keep looking.
+			 */
+			if (strncmp(val, type, strlen(val)) != 0) {
+				free(type);
+				break;
+			}
+
+			verify(strncmp(type, VDEV_TYPE_RAIDZ,
+			    strlen(VDEV_TYPE_RAIDZ)) == 0 ||
+			    strncmp(type, VDEV_TYPE_MIRROR,
+			    strlen(VDEV_TYPE_MIRROR)) == 0);
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
+			    &id) == 0);
+
+			errno = 0;
+			vdev_id = strtoull(idx, &end, 10);
+
+			free(type);
+			if (errno != 0)
+				return (NULL);
+
+			/*
+			 * Now verify that we have the correct vdev id.
+			 */
+			if (vdev_id == id)
+				return (nv);
 		}
 
 		/*
@@ -1522,6 +1570,18 @@
 	return (ret);
 }
 
+/*
+ * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
+ */
+boolean_t
+zpool_vdev_is_interior(const char *name)
+{
+	if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
+	    strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
+		return (B_TRUE);
+	return (B_FALSE);
+}
+
 nvlist_t *
 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
     boolean_t *l2cache, boolean_t *log)
@@ -1536,6 +1596,8 @@
 	guid = strtoull(path, &end, 10);
 	if (guid != 0 && *end == '\0') {
 		verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
+	} else if (zpool_vdev_is_interior(path)) {
+		verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
 	} else if (path[0] != '/') {
 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
 		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
@@ -2038,7 +2100,7 @@
 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
 
-	if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL)
+	if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
 		return (-1);
 
 	/*
@@ -2235,24 +2297,34 @@
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	nvlist_t *tgt;
-	boolean_t avail_spare, l2cache;
+	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	uint64_t version;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
-	    NULL)) == 0)
+	    &islog)) == 0)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
-
-	if (!avail_spare && !l2cache) {
+	/*
+	 * XXX - this should just go away.
+	 */
+	if (!avail_spare && !l2cache && !islog) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "only inactive hot spares or cache devices "
-		    "can be removed"));
+		    "only inactive hot spares, cache, top-level, "
+		    "or log devices can be removed"));
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 	}
 
+	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
+	if (islog && version < SPA_VERSION_HOLES) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "pool must be upgrade to support log removal"));
+		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+	}
+
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
@@ -2420,7 +2492,8 @@
  * of these checks.
  */
 char *
-zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
+zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
+    boolean_t verbose)
 {
 	char *path, *devid;
 	uint64_t value;
@@ -2499,6 +2572,20 @@
 			    (u_longlong_t)value);
 			path = buf;
 		}
+
+		/*
+		 * We identify each top-level vdev by using a <type-id>
+		 * naming convention.
+		 */
+		if (verbose) {
+			uint64_t id;
+
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
+			    &id) == 0);
+			(void) snprintf(buf, sizeof (buf), "%s-%llu", path,
+			    (u_longlong_t)id);
+			path = buf;
+		}
 	}
 
 	return (zfs_strdup(hdl, path));
@@ -3036,6 +3123,7 @@
 	if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
 	    strcmp(type, VDEV_TYPE_FILE) == 0 ||
 	    strcmp(type, VDEV_TYPE_LOG) == 0 ||
+	    strcmp(type, VDEV_TYPE_HOLE) == 0 ||
 	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "vdev type '%s' is not supported"), type);
--- a/usr/src/uts/common/fs/zfs/metaslab.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/metaslab.c	Mon Sep 21 10:38:24 2009 -0700
@@ -57,12 +57,13 @@
  * ==========================================================================
  */
 metaslab_class_t *
-metaslab_class_create(space_map_ops_t *ops)
+metaslab_class_create(spa_t *spa, space_map_ops_t *ops)
 {
 	metaslab_class_t *mc;
 
 	mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP);
 
+	mc->mc_spa = spa;
 	mc->mc_rotor = NULL;
 	mc->mc_ops = ops;
 
@@ -126,6 +127,32 @@
 	mg->mg_class = NULL;
 }
 
+int
+metaslab_class_validate(metaslab_class_t *mc)
+{
+	metaslab_group_t *mg;
+	vdev_t *vd;
+
+	/*
+	 * Must hold one of the spa_config locks.
+	 */
+	ASSERT(spa_config_held(mc->mc_spa, SCL_ALL, RW_READER) ||
+	    spa_config_held(mc->mc_spa, SCL_ALL, RW_WRITER));
+
+	if ((mg = mc->mc_rotor) == NULL)
+		return (0);
+
+	do {
+		vd = mg->mg_vd;
+		ASSERT(vd->vdev_mg != NULL);
+		ASSERT3P(vd->vdev_top, ==, vd);
+		ASSERT3P(mg->mg_class, ==, mc);
+		ASSERT3P(vd->vdev_ops, !=, &vdev_hole_ops);
+	} while ((mg = mg->mg_next) != mc->mc_rotor);
+
+	return (0);
+}
+
 /*
  * ==========================================================================
  * Metaslab groups
@@ -634,6 +661,8 @@
 	dmu_tx_t *tx;
 	int t;
 
+	ASSERT(!vd->vdev_ishole);
+
 	tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
 
 	/*
@@ -721,6 +750,8 @@
 	vdev_t *vd = mg->mg_vd;
 	int t;
 
+	ASSERT(!vd->vdev_ishole);
+
 	mutex_enter(&msp->ms_lock);
 
 	/*
@@ -932,10 +963,21 @@
 	 */
 	if (hintdva) {
 		vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d]));
-		if (flags & METASLAB_HINTBP_AVOID)
-			mg = vd->vdev_mg->mg_next;
-		else
+
+		/*
+		 * It's possible the vdev we're using as the hint no
+		 * longer exists (i.e. removed). Consult the rotor when
+		 * all else fails.
+		 */
+		if (vd != NULL && vd->vdev_mg != NULL) {
 			mg = vd->vdev_mg;
+
+			if (flags & METASLAB_HINTBP_AVOID &&
+			    mg->mg_next != NULL)
+				mg = mg->mg_next;
+		} else {
+			mg = mc->mc_rotor;
+		}
 	} else if (d != 0) {
 		vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
 		mg = vd->vdev_mg->mg_next;
--- a/usr/src/uts/common/fs/zfs/spa.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/spa.c	Mon Sep 21 10:38:24 2009 -0700
@@ -42,6 +42,7 @@
 #include <sys/zil.h>
 #include <sys/vdev_impl.h>
 #include <sys/metaslab.h>
+#include <sys/metaslab_impl.h>
 #include <sys/uberblock_impl.h>
 #include <sys/txg.h>
 #include <sys/avl.h>
@@ -578,8 +579,8 @@
 	spa->spa_state = POOL_STATE_ACTIVE;
 	spa->spa_mode = mode;
 
-	spa->spa_normal_class = metaslab_class_create(zfs_metaslab_ops);
-	spa->spa_log_class = metaslab_class_create(zfs_metaslab_ops);
+	spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
+	spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
 
 	for (int t = 0; t < ZIO_TYPES; t++) {
 		const zio_taskq_info_t *ztip = &zio_taskqs[t];
@@ -1101,26 +1102,23 @@
  * that the label does not contain the most up-to-date information.
  */
 void
-spa_load_log_state(spa_t *spa)
+spa_load_log_state(spa_t *spa, nvlist_t *nv)
 {
-	nvlist_t *nv, *nvroot, **child;
-	uint64_t is_log;
-	uint_t children;
-	vdev_t *rvd = spa->spa_root_vdev;
-
-	VERIFY(load_nvlist(spa, spa->spa_config_object, &nv) == 0);
-	VERIFY(nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
-	VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
-	    &child, &children) == 0);
-
-	for (int c = 0; c < children; c++) {
-		vdev_t *tvd = rvd->vdev_child[c];
-
-		if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
-		    &is_log) == 0 && is_log)
-			vdev_load_log_state(tvd, child[c]);
+	vdev_t *ovd, *rvd = spa->spa_root_vdev;
+
+	/*
+	 * Load the original root vdev tree from the passed config.
+	 */
+	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+	VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
+
+	for (int c = 0; c < rvd->vdev_children; c++) {
+		vdev_t *cvd = rvd->vdev_child[c];
+		if (cvd->vdev_islog)
+			vdev_load_log_state(cvd, ovd->vdev_child[c]);
 	}
-	nvlist_free(nv);
+	vdev_free(ovd);
+	spa_config_exit(spa, SCL_ALL, FTAG);
 }
 
 /*
@@ -1151,7 +1149,7 @@
 spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
 {
 	int error = 0;
-	nvlist_t *nvroot = NULL;
+	nvlist_t *nvconfig, *nvroot = NULL;
 	vdev_t *rvd;
 	uberblock_t *ub = &spa->spa_uberblock;
 	uint64_t config_cache_txg = spa->spa_config_txg;
@@ -1306,23 +1304,22 @@
 		goto out;
 	}
 
+	if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) {
+		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
+		    VDEV_AUX_CORRUPT_DATA);
+		error = EIO;
+		goto out;
+	}
+
 	if (!mosconfig) {
-		nvlist_t *newconfig;
 		uint64_t hostid;
 
-		if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) {
-			vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
-			    VDEV_AUX_CORRUPT_DATA);
-			error = EIO;
-			goto out;
-		}
-
-		if (!spa_is_root(spa) && nvlist_lookup_uint64(newconfig,
+		if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
 		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
 			char *hostname;
 			unsigned long myhostid = 0;
 
-			VERIFY(nvlist_lookup_string(newconfig,
+			VERIFY(nvlist_lookup_string(nvconfig,
 			    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
 
 #ifdef	_KERNEL
@@ -1347,12 +1344,12 @@
 			}
 		}
 
-		spa_config_set(spa, newconfig);
+		spa_config_set(spa, nvconfig);
 		spa_unload(spa);
 		spa_deactivate(spa);
 		spa_activate(spa, orig_mode);
 
-		return (spa_load(spa, newconfig, state, B_TRUE));
+		return (spa_load(spa, nvconfig, state, B_TRUE));
 	}
 
 	if (zap_lookup(spa->spa_meta_objset,
@@ -1471,7 +1468,10 @@
 		spa_config_exit(spa, SCL_ALL, FTAG);
 	}
 
-	spa_load_log_state(spa);
+	VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	spa_load_log_state(spa, nvroot);
+	nvlist_free(nvconfig);
 
 	if (spa_check_logs(spa)) {
 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
@@ -2910,7 +2910,7 @@
 int
 spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 {
-	uint64_t txg;
+	uint64_t txg, id;
 	int error;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd, *tvd;
@@ -2951,9 +2951,19 @@
 	 * Transfer each new top-level vdev from vd to rvd.
 	 */
 	for (int c = 0; c < vd->vdev_children; c++) {
+
+		/*
+		 * Set the vdev id to the first hole, if one exists.
+		 */
+		for (id = 0; id < rvd->vdev_children; id++) {
+			if (rvd->vdev_child[id]->vdev_ishole) {
+				vdev_free(rvd->vdev_child[id]);
+				break;
+			}
+		}
 		tvd = vd->vdev_child[c];
 		vdev_remove_child(vd, tvd);
-		tvd->vdev_id = rvd->vdev_children;
+		tvd->vdev_id = id;
 		vdev_add_child(rvd, tvd);
 		vdev_config_dirty(tvd);
 	}
@@ -3136,6 +3146,7 @@
 	 */
 	vdev_remove_child(newrootvd, newvd);
 	newvd->vdev_id = pvd->vdev_children;
+	newvd->vdev_crtxg = oldvd->vdev_crtxg;
 	vdev_add_child(pvd, newvd);
 
 	tvd = newvd->vdev_top;
@@ -3444,16 +3455,127 @@
 }
 
 /*
+ * Removing a device from the vdev namespace requires several steps
+ * and can take a significant amount of time.  As a result we use
+ * the spa_vdev_config_[enter/exit] functions which allow us to
+ * grab and release the spa_config_lock while still holding the namespace
+ * lock.  During each step the configuration is synced out.
+ */
+
+/*
+ * Initial phase of device removal - stop future allocations from this device.
+ */
+void
+spa_vdev_remove_start(spa_t *spa, vdev_t *vd)
+{
+	metaslab_group_t *mg = vd->vdev_mg;
+
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
+	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
+
+	/*
+	 * Remove our vdev from the allocatable vdevs
+	 */
+	if (mg)
+		metaslab_class_remove(mg->mg_class, mg);
+}
+
+/*
+ * Evacuate the device.
+ */
+int
+spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
+{
+	uint64_t txg;
+	int error;
+
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
+	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+
+	/*
+	 * Evacuate the device.  We don't hold the config lock as writer
+	 * since we need to do I/O but we do keep the
+	 * spa_namespace_lock held.  Once this completes the device
+	 * should no longer have any blocks allocated on it.
+	 */
+	if (vd->vdev_islog) {
+		/*
+		 * Evacuate the device.
+		 */
+		if (error = dmu_objset_find(spa_name(spa),
+		    zil_vdev_offline, NULL, DS_FIND_CHILDREN)) {
+			uint64_t txg;
+
+			txg = spa_vdev_config_enter(spa);
+			metaslab_class_add(spa->spa_log_class,
+			    vd->vdev_mg);
+			return (spa_vdev_exit(spa, NULL, txg, error));
+		}
+		txg_wait_synced(spa_get_dsl(spa), 0);
+	}
+
+	/*
+	 * Remove any remaining MOS metadata associated with the device.
+	 */
+	txg = spa_vdev_config_enter(spa);
+	vd->vdev_removing = B_TRUE;
+	vdev_dirty(vd, 0, NULL, txg);
+	vdev_config_dirty(vd);
+	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
+
+	return (0);
+}
+
+/*
+ * Complete the removal by cleaning up the namespace.
+ */
+void
+spa_vdev_remove_done(spa_t *spa, vdev_t *vd)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+	metaslab_group_t *mg = vd->vdev_mg;
+	uint64_t id = vd->vdev_id;
+	boolean_t last_vdev = (id == (rvd->vdev_children - 1));
+
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
+	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
+
+	(void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
+	vdev_free(vd);
+
+	/*
+	 * It's possible that another thread is trying todo a spa_vdev_add()
+	 * at the same time we're trying remove it. As a result the
+	 * added vdev may not have initialized its metaslabs yet.
+	 */
+	if (mg != NULL)
+		metaslab_group_destroy(mg);
+
+	if (last_vdev) {
+		vdev_compact_children(rvd);
+	} else {
+		vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
+		vdev_add_child(rvd, vd);
+	}
+	vdev_config_dirty(rvd);
+
+	/*
+	 * Reassess the health of our root vdev.
+	 */
+	vdev_reopen(rvd);
+}
+
+/*
  * Remove a device from the pool.  Currently, this supports removing only hot
- * spares and level 2 ARC devices.
+ * spares, slogs, and level 2 ARC devices.
  */
 int
 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
 {
 	vdev_t *vd;
 	nvlist_t **spares, **l2cache, *nv;
+	uint64_t txg = 0;
 	uint_t nspares, nl2cache;
-	uint64_t txg = 0;
 	int error = 0;
 	boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
 
@@ -3489,6 +3611,29 @@
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
 		spa_load_l2cache(spa);
 		spa->spa_l2cache.sav_sync = B_TRUE;
+	} else if (vd != NULL && vd->vdev_islog) {
+		ASSERT(!locked);
+
+		/*
+		 * XXX - Once we have bp-rewrite this should
+		 * become the common case.
+		 */
+
+		/*
+		 * 1. Stop allocations
+		 * 2. Evacuate the device (i.e. kill off stubby and
+		 *    metadata) and wait for it to complete (i.e. sync).
+		 * 3. Cleanup the vdev namespace.
+		 */
+		spa_vdev_remove_start(spa, vd);
+
+		spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
+		if ((error = spa_vdev_remove_evacuate(spa, vd)) != 0)
+			return (error);
+		txg = spa_vdev_config_enter(spa);
+
+		spa_vdev_remove_done(spa, vd);
+
 	} else if (vd != NULL) {
 		/*
 		 * Normal vdevs cannot be removed (yet).
--- a/usr/src/uts/common/fs/zfs/spa_config.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/spa_config.c	Mon Sep 21 10:38:24 2009 -0700
@@ -383,6 +383,13 @@
 		vd = vd->vdev_top;		/* label contains top config */
 	}
 
+	/*
+	 * Add the top-level config.  We even add this on pools which
+	 * don't support holes in the namespace as older pools will
+	 * just ignore it.
+	 */
+	vdev_top_config_generate(spa, config);
+
 	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE);
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
 	nvlist_free(nvroot);
--- a/usr/src/uts/common/fs/zfs/spa_misc.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c	Mon Sep 21 10:38:24 2009 -0700
@@ -836,6 +836,18 @@
 spa_vdev_enter(spa_t *spa)
 {
 	mutex_enter(&spa_namespace_lock);
+	return (spa_vdev_config_enter(spa));
+}
+
+/*
+ * Internal implementation for spa_vdev_enter().  Used when a vdev
+ * operation requires multiple syncs (i.e. removing a device) while
+ * keeping the spa_namespace_lock held.
+ */
+uint64_t
+spa_vdev_config_enter(spa_t *spa)
+{
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
 
@@ -843,14 +855,14 @@
 }
 
 /*
- * Unlock the spa_t after adding or removing a vdev.  Besides undoing the
- * locking of spa_vdev_enter(), we also want make sure the transactions have
- * synced to disk, and then update the global configuration cache with the new
- * information.
+ * Used in combination with spa_vdev_config_enter() to allow the syncing
+ * of multiple transactions without releasing the spa_namespace_lock.
  */
-int
-spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
+void
+spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
 {
+	ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
 	int config_changed = B_FALSE;
 
 	ASSERT(txg > spa_last_synced_txg(spa));
@@ -870,9 +882,23 @@
 		config_changed = B_TRUE;
 	}
 
+	/*
+	 * Verify the metaslab classes.
+	 */
+	ASSERT(metaslab_class_validate(spa->spa_normal_class) == 0);
+	ASSERT(metaslab_class_validate(spa->spa_log_class) == 0);
+
 	spa_config_exit(spa, SCL_ALL, spa);
 
 	/*
+	 * Panic the system if the specified tag requires it.  This
+	 * is useful for ensuring that configurations are updated
+	 * transactionally.
+	 */
+	if (zio_injection_enabled)
+		zio_handle_panic_injection(spa, tag);
+
+	/*
 	 * Note: this txg_wait_synced() is important because it ensures
 	 * that there won't be more than one config change per txg.
 	 * This allows us to use the txg as the generation number.
@@ -892,7 +918,18 @@
 	 */
 	if (config_changed)
 		spa_config_sync(spa, B_FALSE, B_TRUE);
+}
 
+/*
+ * Unlock the spa_t after adding or removing a vdev.  Besides undoing the
+ * locking of spa_vdev_enter(), we also want make sure the transactions have
+ * synced to disk, and then update the global configuration cache with the new
+ * information.
+ */
+int
+spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
+{
+	spa_vdev_config_exit(spa, vd, txg, error, FTAG);
 	mutex_exit(&spa_namespace_lock);
 
 	return (error);
--- a/usr/src/uts/common/fs/zfs/sys/metaslab.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/sys/metaslab.h	Mon Sep 21 10:38:24 2009 -0700
@@ -57,10 +57,12 @@
     boolean_t now);
 extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
 
-extern metaslab_class_t *metaslab_class_create(space_map_ops_t *ops);
+extern metaslab_class_t *metaslab_class_create(spa_t *spa,
+    space_map_ops_t *ops);
 extern void metaslab_class_destroy(metaslab_class_t *mc);
 extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
 extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
+extern int metaslab_class_validate(metaslab_class_t *mc);
 
 extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
     vdev_t *vd);
--- a/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h	Mon Sep 21 10:38:24 2009 -0700
@@ -37,6 +37,7 @@
 #endif
 
 struct metaslab_class {
+	spa_t			*mc_spa;
 	metaslab_group_t	*mc_rotor;
 	uint64_t		mc_allocated;
 	space_map_ops_t		*mc_ops;
--- a/usr/src/uts/common/fs/zfs/sys/spa.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h	Mon Sep 21 10:38:24 2009 -0700
@@ -430,6 +430,9 @@
 
 /* Pool vdev add/remove lock */
 extern uint64_t spa_vdev_enter(spa_t *spa);
+extern uint64_t spa_vdev_config_enter(spa_t *spa);
+extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
+    int error, char *tag);
 extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
 
 /* Pool vdev state change lock */
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h	Mon Sep 21 10:38:24 2009 -0700
@@ -122,6 +122,7 @@
 extern void vdev_state_dirty(vdev_t *vd);
 extern void vdev_state_clean(vdev_t *vd);
 
+extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config);
 extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
     boolean_t getstats, boolean_t isspare, boolean_t isl2cache);
 
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Mon Sep 21 10:38:24 2009 -0700
@@ -129,6 +129,7 @@
 	boolean_t	vdev_expanding;	/* expand the vdev?		*/
 	int		vdev_open_error; /* error on last open		*/
 	kthread_t	*vdev_open_thread; /* thread opening children	*/
+	uint64_t	vdev_crtxg;	/* txg when top-level was added */
 
 	/*
 	 * Top-level vdev state.
@@ -143,10 +144,12 @@
 	txg_node_t	vdev_txg_node;	/* per-txg dirty vdev linkage	*/
 	boolean_t	vdev_remove_wanted; /* async remove wanted?	*/
 	boolean_t	vdev_probe_wanted; /* async probe wanted?	*/
+	boolean_t	vdev_removing;	/* device is being removed?	*/
 	list_node_t	vdev_config_dirty_node; /* config dirty list	*/
 	list_node_t	vdev_state_dirty_node; /* state dirty list	*/
 	uint64_t	vdev_deflate_ratio; /* deflation ratio (x512)	*/
 	uint64_t	vdev_islog;	/* is an intent log device	*/
+	uint64_t	vdev_ishole;	/* is a hole in the namespace 	*/
 
 	/*
 	 * Leaf vdev state.
@@ -248,6 +251,8 @@
 /*
  * Allocate or free a vdev
  */
+extern vdev_t *vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid,
+    vdev_ops_t *ops);
 extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
     vdev_t *parent, uint_t id, int alloctype);
 extern void vdev_free(vdev_t *vd);
@@ -264,7 +269,7 @@
 /*
  * vdev sync load and sync
  */
-extern void vdev_load_log_state(vdev_t *vd, nvlist_t *nv);
+extern void vdev_load_log_state(vdev_t *nvd, vdev_t *ovd);
 extern void vdev_load(vdev_t *vd);
 extern void vdev_sync(vdev_t *vd, uint64_t txg);
 extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
@@ -280,6 +285,7 @@
 extern vdev_ops_t vdev_disk_ops;
 extern vdev_ops_t vdev_file_ops;
 extern vdev_ops_t vdev_missing_ops;
+extern vdev_ops_t vdev_hole_ops;
 extern vdev_ops_t vdev_spare_ops;
 
 /*
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Mon Sep 21 10:38:24 2009 -0700
@@ -117,6 +117,7 @@
 	uint64_t	zi_type;
 	uint32_t	zi_freq;
 	uint32_t	zi_failfast;
+	char		zi_func[MAXNAMELEN];
 } zinject_record_t;
 
 #define	ZINJECT_NULL		0x1
--- a/usr/src/uts/common/fs/zfs/sys/zio.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h	Mon Sep 21 10:38:24 2009 -0700
@@ -442,6 +442,7 @@
 extern int zio_inject_list_next(int *id, char *name, size_t buflen,
     struct zinject_record *record);
 extern int zio_clear_fault(int id);
+extern void zio_handle_panic_injection(spa_t *spa, char *tag);
 extern int zio_handle_fault_injection(zio_t *zio, int error);
 extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
 extern int zio_handle_label_injection(zio_t *zio, int error);
--- a/usr/src/uts/common/fs/zfs/vdev.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Mon Sep 21 10:38:24 2009 -0700
@@ -54,6 +54,7 @@
 	&vdev_disk_ops,
 	&vdev_file_ops,
 	&vdev_missing_ops,
+	&vdev_hole_ops,
 	NULL
 };
 
@@ -281,7 +282,7 @@
 /*
  * Allocate and minimally initialize a vdev_t.
  */
-static vdev_t *
+vdev_t *
 vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
 {
 	vdev_t *vd;
@@ -293,7 +294,7 @@
 		spa->spa_root_vdev = vd;
 	}
 
-	if (guid == 0) {
+	if (guid == 0 && ops != &vdev_hole_ops) {
 		if (spa->spa_root_vdev == vd) {
 			/*
 			 * The root vdev's guid will also be the pool guid,
@@ -318,6 +319,7 @@
 	vd->vdev_guid_sum = guid;
 	vd->vdev_ops = ops;
 	vd->vdev_state = VDEV_STATE_CLOSED;
+	vd->vdev_ishole = (ops == &vdev_hole_ops);
 
 	mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -397,6 +399,9 @@
 	if (islog && spa_version(spa) < SPA_VERSION_SLOGS)
 		return (ENOTSUP);
 
+	if (ops == &vdev_hole_ops && spa_version(spa) < SPA_VERSION_HOLES)
+		return (ENOTSUP);
+
 	/*
 	 * Set the nparity property for RAID-Z vdevs.
 	 */
@@ -472,6 +477,12 @@
 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift);
 
 	/*
+	 * Retrieve the vdev creation time.
+	 */
+	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_CREATE_TXG,
+	    &vd->vdev_crtxg);
+
+	/*
 	 * If we're a top-level vdev, try to load the allocation parameters.
 	 */
 	if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) {
@@ -705,6 +716,7 @@
 	mvd->vdev_min_asize = cvd->vdev_min_asize;
 	mvd->vdev_ashift = cvd->vdev_ashift;
 	mvd->vdev_state = cvd->vdev_state;
+	mvd->vdev_crtxg = cvd->vdev_crtxg;
 
 	vdev_remove_child(pvd, cvd);
 	vdev_add_child(pvd, mvd);
@@ -772,9 +784,14 @@
 	metaslab_t **mspp;
 	int error;
 
-	if (vd->vdev_ms_shift == 0)	/* not being allocated from yet */
+	/*
+	 * This vdev is not being allocated from yet or is a hole.
+	 */
+	if (vd->vdev_ms_shift == 0)
 		return (0);
 
+	ASSERT(!vd->vdev_ishole);
+
 	/*
 	 * Compute the raidz-deflation ratio.  Note, we hard-code
 	 * in 128k (1 << 17) because it is the current "typical" blocksize.
@@ -1105,6 +1122,12 @@
 		vd->vdev_state = VDEV_STATE_HEALTHY;
 	}
 
+	/*
+	 * For hole or missing vdevs we just return success.
+	 */
+	if (vd->vdev_ishole || vd->vdev_ops == &vdev_missing_ops)
+		return (0);
+
 	for (int c = 0; c < vd->vdev_children; c++) {
 		if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) {
 			vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED,
@@ -1393,6 +1416,7 @@
 vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg)
 {
 	ASSERT(vd == vd->vdev_top);
+	ASSERT(!vd->vdev_ishole);
 	ASSERT(ISP2(flags));
 
 	if (flags & VDD_METASLAB)
@@ -1502,7 +1526,7 @@
 		vdev_dtl_reassess(vd->vdev_child[c], txg,
 		    scrub_txg, scrub_done);
 
-	if (vd == spa->spa_root_vdev)
+	if (vd == spa->spa_root_vdev || vd->vdev_ishole)
 		return;
 
 	if (vd->vdev_ops->vdev_op_leaf) {
@@ -1592,6 +1616,8 @@
 	if (smo->smo_object == 0)
 		return (0);
 
+	ASSERT(!vd->vdev_ishole);
+
 	if ((error = dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)) != 0)
 		return (error);
 
@@ -1619,6 +1645,8 @@
 	dmu_buf_t *db;
 	dmu_tx_t *tx;
 
+	ASSERT(!vd->vdev_ishole);
+
 	tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
 
 	if (vd->vdev_detached) {
@@ -1755,7 +1783,7 @@
 	/*
 	 * If this is a top-level vdev, initialize its metaslabs.
 	 */
-	if (vd == vd->vdev_top &&
+	if (vd == vd->vdev_top && !vd->vdev_ishole &&
 	    (vd->vdev_ashift == 0 || vd->vdev_asize == 0 ||
 	    vdev_metaslab_init(vd, 0) != 0))
 		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
@@ -1812,10 +1840,48 @@
 }
 
 void
+vdev_remove(vdev_t *vd, uint64_t txg)
+{
+	spa_t *spa = vd->vdev_spa;
+	objset_t *mos = spa->spa_meta_objset;
+	dmu_tx_t *tx;
+
+	tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
+
+	if (vd->vdev_dtl_smo.smo_object) {
+		ASSERT3U(vd->vdev_dtl_smo.smo_alloc, ==, 0);
+		(void) dmu_object_free(mos, vd->vdev_dtl_smo.smo_object, tx);
+		vd->vdev_dtl_smo.smo_object = 0;
+	}
+
+	if (vd->vdev_ms != NULL) {
+		for (int m = 0; m < vd->vdev_ms_count; m++) {
+			metaslab_t *msp = vd->vdev_ms[m];
+
+			if (msp == NULL || msp->ms_smo.smo_object == 0)
+				continue;
+
+			ASSERT3U(msp->ms_smo.smo_alloc, ==, 0);
+			(void) dmu_object_free(mos, msp->ms_smo.smo_object, tx);
+			msp->ms_smo.smo_object = 0;
+		}
+	}
+
+	if (vd->vdev_ms_array) {
+		(void) dmu_object_free(mos, vd->vdev_ms_array, tx);
+		vd->vdev_ms_array = 0;
+		vd->vdev_ms_shift = 0;
+	}
+	dmu_tx_commit(tx);
+}
+
+void
 vdev_sync_done(vdev_t *vd, uint64_t txg)
 {
 	metaslab_t *msp;
 
+	ASSERT(!vd->vdev_ishole);
+
 	while (msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)))
 		metaslab_sync_done(msp, txg);
 }
@@ -1828,6 +1894,8 @@
 	metaslab_t *msp;
 	dmu_tx_t *tx;
 
+	ASSERT(!vd->vdev_ishole);
+
 	if (vd->vdev_ms_array == 0 && vd->vdev_ms_shift != 0) {
 		ASSERT(vd == vd->vdev_top);
 		tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg);
@@ -1838,6 +1906,9 @@
 		dmu_tx_commit(tx);
 	}
 
+	if (vd->vdev_removing)
+		vdev_remove(vd, txg);
+
 	while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) {
 		metaslab_sync(msp, txg);
 		(void) txg_list_add(&vd->vdev_ms_list, msp, TXG_CLEAN(txg));
@@ -2110,7 +2181,15 @@
 boolean_t
 vdev_is_dead(vdev_t *vd)
 {
-	return (vd->vdev_state < VDEV_STATE_DEGRADED);
+	/*
+	 * Holes and missing devices are always considered "dead".
+	 * This simplifies the code since we don't have to check for
+	 * these types of devices in the various code paths.
+	 * Instead we rely on the fact that we skip over dead devices
+	 * before issuing I/O to them.
+	 */
+	return (vd->vdev_state < VDEV_STATE_DEGRADED || vd->vdev_ishole ||
+	    vd->vdev_ops == &vdev_missing_ops);
 }
 
 boolean_t
@@ -2139,7 +2218,7 @@
 	 * we're asking two separate questions about it.
 	 */
 	return (!(state < VDEV_STATE_DEGRADED && state != VDEV_STATE_CLOSED) &&
-	    !vd->vdev_cant_write);
+	    !vd->vdev_cant_write && !vd->vdev_ishole && !vd->vdev_removing);
 }
 
 boolean_t
@@ -2391,7 +2470,7 @@
 		 * Don't count non-normal (e.g. intent log) space as part of
 		 * the pool's capacity.
 		 */
-		if (vd->vdev_mg->mg_class != spa->spa_normal_class)
+		if (vd->vdev_islog)
 			return;
 
 		mutex_enter(&rvd->vdev_stat_lock);
@@ -2472,7 +2551,8 @@
 	} else {
 		ASSERT(vd == vd->vdev_top);
 
-		if (!list_link_active(&vd->vdev_config_dirty_node))
+		if (!list_link_active(&vd->vdev_config_dirty_node) &&
+		    !vd->vdev_ishole)
 			list_insert_head(&spa->spa_config_dirty_list, vd);
 	}
 }
@@ -2546,6 +2626,12 @@
 		for (int c = 0; c < vd->vdev_children; c++) {
 			child = vd->vdev_child[c];
 
+			/*
+			 * Don't factor holes into the decision.
+			 */
+			if (child->vdev_ishole)
+				continue;
+
 			if (!vdev_readable(child) ||
 			    (!vdev_writeable(child) && spa_writeable(spa))) {
 				/*
@@ -2739,32 +2825,31 @@
 	return (B_TRUE);
 }
 
+/*
+ * Load the state from the original vdev tree (ovd) which
+ * we've retrieved from the MOS config object. If the original
+ * vdev was offline then we transfer that state to the device
+ * in the current vdev tree (nvd).
+ */
 void
-vdev_load_log_state(vdev_t *vd, nvlist_t *nv)
+vdev_load_log_state(vdev_t *nvd, vdev_t *ovd)
 {
-	uint_t children;
-	nvlist_t **child;
-	uint64_t val;
-	spa_t *spa = vd->vdev_spa;
-
-	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
-	    &child, &children) == 0) {
-		for (int c = 0; c < children; c++)
-			vdev_load_log_state(vd->vdev_child[c], child[c]);
-	}
-
-	if (vd->vdev_ops->vdev_op_leaf && nvlist_lookup_uint64(nv,
-	    ZPOOL_CONFIG_OFFLINE, &val) == 0 && val) {
-
+	spa_t *spa = nvd->vdev_spa;
+
+	ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
+	ASSERT3U(nvd->vdev_guid, ==, ovd->vdev_guid);
+
+	for (int c = 0; c < nvd->vdev_children; c++)
+		vdev_load_log_state(nvd->vdev_child[c], ovd->vdev_child[c]);
+
+	if (nvd->vdev_ops->vdev_op_leaf && ovd->vdev_offline) {
 		/*
 		 * It would be nice to call vdev_offline()
 		 * directly but the pool isn't fully loaded and
 		 * the txg threads have not been started yet.
 		 */
-		spa_config_enter(spa, SCL_STATE_ALL, FTAG, RW_WRITER);
-		vd->vdev_offline = val;
-		vdev_reopen(vd->vdev_top);
-		spa_config_exit(spa, SCL_STATE_ALL, FTAG);
+		nvd->vdev_offline = ovd->vdev_offline;
+		vdev_reopen(nvd->vdev_top);
 	}
 }
 
--- a/usr/src/uts/common/fs/zfs/vdev_label.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/vdev_label.c	Mon Sep 21 10:38:24 2009 -0700
@@ -287,6 +287,10 @@
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DTL,
 		    vd->vdev_dtl_smo.smo_object) == 0);
 
+	if (vd->vdev_crtxg)
+		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG,
+		    vd->vdev_crtxg) == 0);
+
 	if (getstats) {
 		vdev_stat_t vs;
 		vdev_get_stats(vd, &vs);
@@ -298,6 +302,8 @@
 		nvlist_t **child;
 		int c;
 
+		ASSERT(!vd->vdev_ishole);
+
 		child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *),
 		    KM_SLEEP);
 
@@ -329,11 +335,45 @@
 		if (vd->vdev_unspare)
 			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE,
 			    B_TRUE) == 0);
+		if (vd->vdev_ishole)
+			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE,
+			    B_TRUE) == 0);
 	}
 
 	return (nv);
 }
 
+/*
+ * Generate a view of the top-level vdevs.  If we currently have holes
+ * in the namespace, then generate an array which contains a list of holey
+ * vdevs.  Additionally, add the number of top-level children that currently
+ * exist.
+ */
+void
+vdev_top_config_generate(spa_t *spa, nvlist_t *config)
+{
+	vdev_t *rvd = spa->spa_root_vdev;
+	uint64_t *array;
+	uint_t idx;
+
+	array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP);
+
+	idx = 0;
+	for (int c = 0; c < rvd->vdev_children; c++) {
+		vdev_t *tvd = rvd->vdev_child[c];
+
+		if (tvd->vdev_ishole)
+			array[idx++] = c;
+	}
+
+	VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY,
+	    array, idx++) == 0);
+	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
+	    rvd->vdev_children) == 0);
+
+	kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
+}
+
 nvlist_t *
 vdev_label_read_config(vdev_t *vd)
 {
@@ -516,6 +556,9 @@
 		    crtxg, reason)) != 0)
 			return (error);
 
+	/* Track the creation time for this vdev */
+	vd->vdev_crtxg = crtxg;
+
 	if (!vd->vdev_ops->vdev_op_leaf)
 		return (0);
 
@@ -976,6 +1019,9 @@
 	for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) {
 		uint64_t *good_writes = kmem_zalloc(sizeof (uint64_t),
 		    KM_SLEEP);
+
+		ASSERT(!vd->vdev_ishole);
+
 		zio_t *vio = zio_null(zio, spa, NULL,
 		    (vd->vdev_islog || vd->vdev_aux != NULL) ?
 		    vdev_label_sync_ignore_done : vdev_label_sync_top_done,
--- a/usr/src/uts/common/fs/zfs/vdev_missing.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/vdev_missing.c	Mon Sep 21 10:38:24 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -48,8 +48,8 @@
 	 * VDEV_AUX_BAD_GUID_SUM.  So we pretend to succeed, knowing that we
 	 * will fail the GUID sum check before ever trying to open the pool.
 	 */
-	*psize = SPA_MINDEVSIZE;
-	*ashift = SPA_MINBLOCKSHIFT;
+	*psize = 0;
+	*ashift = 0;
 	return (0);
 }
 
@@ -83,3 +83,14 @@
 	VDEV_TYPE_MISSING,	/* name of this vdev type */
 	B_TRUE			/* leaf vdev */
 };
+
+vdev_ops_t vdev_hole_ops = {
+	vdev_missing_open,
+	vdev_missing_close,
+	vdev_default_asize,
+	vdev_missing_io_start,
+	vdev_missing_io_done,
+	NULL,
+	VDEV_TYPE_HOLE,		/* name of this vdev type */
+	B_TRUE			/* leaf vdev */
+};
--- a/usr/src/uts/common/fs/zfs/zio_inject.c	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/fs/zfs/zio_inject.c	Mon Sep 21 10:38:24 2009 -0700
@@ -96,6 +96,30 @@
 }
 
 /*
+ * Panic the system when a config change happens in the function
+ * specified by tag.
+ */
+void
+zio_handle_panic_injection(spa_t *spa, char *tag)
+{
+	inject_handler_t *handler;
+
+	rw_enter(&inject_lock, RW_READER);
+
+	for (handler = list_head(&inject_handlers); handler != NULL;
+	    handler = list_next(&inject_handlers, handler)) {
+
+		if (spa != handler->zi_spa)
+			continue;
+
+		if (strcmp(tag, handler->zi_record.zi_func) == 0)
+			panic("Panic requested in function %s\n", tag);
+	}
+
+	rw_exit(&inject_lock);
+}
+
+/*
  * Determine if the I/O in question should return failure.  Returns the errno
  * to be returned to the caller.
  */
@@ -126,8 +150,9 @@
 		if (zio->io_spa != handler->zi_spa)
 			continue;
 
-		/* Ignore device errors */
-		if (handler->zi_record.zi_guid != 0)
+		/* Ignore device errors and panic injection */
+		if (handler->zi_record.zi_guid != 0 ||
+		    handler->zi_record.zi_func[0] != '\0')
 			continue;
 
 		/* If this handler matches, return EIO */
@@ -170,8 +195,9 @@
 		uint64_t start = handler->zi_record.zi_start;
 		uint64_t end = handler->zi_record.zi_end;
 
-		/* Ignore device only faults */
-		if (handler->zi_record.zi_start == 0)
+		/* Ignore device only faults or panic injection */
+		if (handler->zi_record.zi_start == 0 ||
+		    handler->zi_record.zi_func[0] != '\0')
 			continue;
 
 		/*
@@ -205,8 +231,9 @@
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 
-		/* Ignore label specific faults */
-		if (handler->zi_record.zi_start != 0)
+		/* Ignore label specific faults or panic injection */
+		if (handler->zi_record.zi_start != 0 ||
+		    handler->zi_record.zi_func[0] != '\0')
 			continue;
 
 		if (vd->vdev_guid == handler->zi_record.zi_guid) {
--- a/usr/src/uts/common/sys/fs/zfs.h	Mon Sep 21 11:25:30 2009 -0600
+++ b/usr/src/uts/common/sys/fs/zfs.h	Mon Sep 21 10:38:24 2009 -0700
@@ -295,14 +295,15 @@
 #define	SPA_VERSION_16			16ULL
 #define	SPA_VERSION_17			17ULL
 #define	SPA_VERSION_18			18ULL
+#define	SPA_VERSION_19			19ULL
 /*
  * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
  * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
  * and do the appropriate changes.  Also bump the version number in
  * usr/src/grub/capability.
  */
-#define	SPA_VERSION			SPA_VERSION_18
-#define	SPA_VERSION_STRING		"18"
+#define	SPA_VERSION			SPA_VERSION_19
+#define	SPA_VERSION_STRING		"19"
 
 /*
  * Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -342,6 +343,7 @@
 #define	SPA_VERSION_STMF_PROP		SPA_VERSION_16
 #define	SPA_VERSION_RAIDZ3		SPA_VERSION_17
 #define	SPA_VERSION_USERREFS		SPA_VERSION_18
+#define	SPA_VERSION_HOLES		SPA_VERSION_19
 
 /*
  * ZPL version - rev'd whenever an incompatible on-disk format change
@@ -401,6 +403,9 @@
 #define	ZPOOL_CONFIG_PHYS_PATH		"phys_path"
 #define	ZPOOL_CONFIG_IS_LOG		"is_log"
 #define	ZPOOL_CONFIG_L2CACHE		"l2cache"
+#define	ZPOOL_CONFIG_HOLE_ARRAY		"hole_array"
+#define	ZPOOL_CONFIG_VDEV_CHILDREN	"vdev_children"
+#define	ZPOOL_CONFIG_IS_HOLE		"is_hole"
 #define	ZPOOL_CONFIG_SUSPENDED		"suspended"	/* not stored on disk */
 #define	ZPOOL_CONFIG_TIMESTAMP		"timestamp"	/* not stored on disk */
 #define	ZPOOL_CONFIG_BOOTFS		"bootfs"	/* not stored on disk */
@@ -422,6 +427,7 @@
 #define	VDEV_TYPE_DISK			"disk"
 #define	VDEV_TYPE_FILE			"file"
 #define	VDEV_TYPE_MISSING		"missing"
+#define	VDEV_TYPE_HOLE			"hole"
 #define	VDEV_TYPE_SPARE			"spare"
 #define	VDEV_TYPE_LOG			"log"
 #define	VDEV_TYPE_L2CACHE		"l2cache"