2619 asynchronous destruction of ZFS file systems
authorChristopher Siden <chris.siden@delphix.com>
Mon, 21 May 2012 12:11:39 -0700
changeset 13700 2889e2596bd6
parent 13699 733714f4dc24
child 13701 1949b688d5fb
2619 asynchronous destruction of ZFS file systems 2747 SPA versioning with zfs feature flags Reviewed by: Matt Ahrens <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Richard Lowe <[email protected]> Reviewed by: Dan Kruchinin <[email protected]> Approved by: Eric Schrock <[email protected]>
usr/src/Makefile.lint
usr/src/cmd/Makefile
usr/src/cmd/mdb/common/modules/zfs/zfs.c
usr/src/cmd/zdb/zdb.c
usr/src/cmd/zpool/zpool_main.c
usr/src/cmd/ztest/Makefile.com
usr/src/cmd/ztest/ztest.c
usr/src/common/zfs/zpool_prop.c
usr/src/grub/capability
usr/src/grub/grub-0.97/stage2/fsys_zfs.c
usr/src/grub/grub-0.97/stage2/fsys_zfs.h
usr/src/grub/grub-0.97/stage2/zfs-include/dmu.h
usr/src/grub/grub-0.97/stage2/zfs-include/spa.h
usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h
usr/src/lib/libc/port/mapfile-vers
usr/src/lib/libc/port/threads/assfail.c
usr/src/lib/libnvpair/Makefile.com
usr/src/lib/libnvpair/libnvpair.c
usr/src/lib/libnvpair/mapfile-vers
usr/src/lib/libzfs/Makefile.com
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_config.c
usr/src/lib/libzfs/common/libzfs_pool.c
usr/src/lib/libzfs/common/libzfs_status.c
usr/src/lib/libzfs/common/libzfs_util.c
usr/src/lib/libzfs/common/llib-lzfs
usr/src/lib/libzfs/common/mapfile-vers
usr/src/lib/libzpool/common/kernel.c
usr/src/lib/libzpool/common/llib-lzpool
usr/src/man/man1m/zpool.1m
usr/src/man/man5/Makefile
usr/src/pkg/manifests/system-file-system-zfs-tests.mf
usr/src/pkg/manifests/system-file-system-zfs.mf
usr/src/uts/common/Makefile.files
usr/src/uts/common/fs/zfs/arc.c
usr/src/uts/common/fs/zfs/dbuf.c
usr/src/uts/common/fs/zfs/ddt.c
usr/src/uts/common/fs/zfs/dmu.c
usr/src/uts/common/fs/zfs/dmu_send.c
usr/src/uts/common/fs/zfs/dmu_traverse.c
usr/src/uts/common/fs/zfs/dmu_tx.c
usr/src/uts/common/fs/zfs/dnode.c
usr/src/uts/common/fs/zfs/dnode_sync.c
usr/src/uts/common/fs/zfs/dsl_dataset.c
usr/src/uts/common/fs/zfs/dsl_deleg.c
usr/src/uts/common/fs/zfs/dsl_pool.c
usr/src/uts/common/fs/zfs/dsl_scan.c
usr/src/uts/common/fs/zfs/sa.c
usr/src/uts/common/fs/zfs/spa.c
usr/src/uts/common/fs/zfs/spa_config.c
usr/src/uts/common/fs/zfs/spa_misc.c
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/sys/dmu_traverse.h
usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
usr/src/uts/common/fs/zfs/sys/dsl_pool.h
usr/src/uts/common/fs/zfs/sys/dsl_scan.h
usr/src/uts/common/fs/zfs/sys/spa.h
usr/src/uts/common/fs/zfs/sys/spa_impl.h
usr/src/uts/common/fs/zfs/sys/vdev.h
usr/src/uts/common/fs/zfs/sys/vdev_impl.h
usr/src/uts/common/fs/zfs/sys/zap.h
usr/src/uts/common/fs/zfs/sys/zio.h
usr/src/uts/common/fs/zfs/vdev.c
usr/src/uts/common/fs/zfs/vdev_label.c
usr/src/uts/common/fs/zfs/zap.c
usr/src/uts/common/fs/zfs/zap_micro.c
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/fs/zfs/zio.c
usr/src/uts/common/sys/fs/zfs.h
usr/src/uts/common/sys/nvpair.h
--- a/usr/src/Makefile.lint	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/Makefile.lint	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
 #
 
 # Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012 by Delphix. All rights reserved.
 
 # include global definitions
 include Makefile.master
@@ -324,6 +325,7 @@
 	cmd/zdb \
 	cmd/zdump \
 	cmd/zfs \
+	cmd/zhack \
 	cmd/zinject \
 	cmd/zlogin \
 	cmd/zoneadm \
--- a/usr/src/cmd/Makefile	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/cmd/Makefile	Mon May 21 12:11:39 2012 -0700
@@ -22,6 +22,7 @@
 # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
 # Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
 # Copyright 2011 Joyent, Inc.  All rights reserved.
+# Copyright (c) 2012 by Delphix. All rights reserved.
 
 include ../Makefile.master
 
@@ -454,6 +455,7 @@
 	zdb		\
 	zdump		\
 	zfs		\
+	zhack		\
 	zic		\
 	zinject		\
 	zlogin		\
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Mon May 21 12:11:39 2012 -0700
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -1237,6 +1238,9 @@
 		case VDEV_AUX_VERSION_OLDER:
 			aux = "VERS_OLDER";
 			break;
+		case VDEV_AUX_UNSUP_FEAT:
+			aux = "UNSUP_FEAT";
+			break;
 		case VDEV_AUX_SPARED:
 			aux = "SPARED";
 			break;
@@ -2182,7 +2186,7 @@
 		return (DCMD_ERR);
 	}
 
-	tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_NUMTYPES];
+	tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_TOTAL];
 	if (tzb->zb_gangs != 0) {
 		mdb_printf("Ganged blocks: %llu\n",
 		    (longlong_t)tzb->zb_gangs);
@@ -2198,7 +2202,7 @@
 	mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
 	    "\t  avg\t comp\t%%Total\tType\n");
 
-	for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
+	for (t = 0; t <= DMU_OT_TOTAL; t++) {
 		char csize[NICENUM_BUFLEN], lsize[NICENUM_BUFLEN];
 		char psize[NICENUM_BUFLEN], asize[NICENUM_BUFLEN];
 		char avg[NICENUM_BUFLEN];
@@ -2209,6 +2213,8 @@
 
 		if (t == DMU_OT_DEFERRED)
 			strcpy(typename, "deferred free");
+		else if (t == DMU_OT_OTHER)
+			strcpy(typename, "other");
 		else if (t == DMU_OT_TOTAL)
 			strcpy(typename, "Total");
 		else if (mdb_readstr(typename, sizeof (typename),
--- a/usr/src/cmd/zdb/zdb.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/cmd/zdb/zdb.c	Mon May 21 12:11:39 2012 -0700
@@ -18,8 +18,10 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <stdio.h>
@@ -54,6 +56,7 @@
 #include <sys/zfs_fuid.h>
 #include <sys/arc.h>
 #include <sys/ddt.h>
+#include <sys/zfeature.h>
 #undef ZFS_MAXNAMELEN
 #undef verify
 #include <libzfs.h>
@@ -63,7 +66,8 @@
 #define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
     zio_checksum_table[(idx)].ci_name : "UNKNOWN")
 #define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
-    dmu_ot[(idx)].ot_name : "UNKNOWN")
+    dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ? \
+    dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
 #define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
 
 #ifndef lint
@@ -1088,7 +1092,7 @@
 
 	ASSERT(size == sizeof (*ds));
 	crtime = ds->ds_creation_time;
-	zdb_nicenum(ds->ds_used_bytes, used);
+	zdb_nicenum(ds->ds_referenced_bytes, used);
 	zdb_nicenum(ds->ds_compressed_bytes, compressed);
 	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
 	zdb_nicenum(ds->ds_unique_bytes, unique);
@@ -1132,6 +1136,44 @@
 
 /* ARGSUSED */
 static int
+dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+	char blkbuf[BP_SPRINTF_LEN];
+
+	if (bp->blk_birth != 0) {
+		sprintf_blkptr(blkbuf, bp);
+		(void) printf("\t%s\n", blkbuf);
+	}
+	return (0);
+}
+
+static void
+dump_bptree(objset_t *os, uint64_t obj, char *name)
+{
+	char bytes[32];
+	bptree_phys_t *bt;
+	dmu_buf_t *db;
+
+	if (dump_opt['d'] < 3)
+		return;
+
+	VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
+	bt = db->db_data;
+	zdb_nicenum(bt->bt_bytes, bytes);
+	(void) printf("\n    %s: %llu datasets, %s\n",
+	    name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
+	dmu_buf_rele(db, FTAG);
+
+	if (dump_opt['d'] < 5)
+		return;
+
+	(void) printf("\n");
+
+	(void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
+}
+
+/* ARGSUSED */
+static int
 dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	char blkbuf[BP_SPRINTF_LEN];
@@ -1883,11 +1925,13 @@
  */
 #define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
 #define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
-#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 2)
+#define	ZDB_OT_OTHER	(DMU_OT_NUMTYPES + 2)
+#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 3)
 
 static char *zdb_ot_extname[] = {
 	"deferred free",
 	"dedup ditto",
+	"other",
 	"Total",
 };
 
@@ -1968,9 +2012,10 @@
 
 	type = BP_GET_TYPE(bp);
 
-	zdb_count_block(zcb, zilog, bp, type);
-
-	is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
+	zdb_count_block(zcb, zilog, bp,
+	    (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
+
+	is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
 
 	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
 		int ioerr;
@@ -2195,6 +2240,12 @@
 	    count_block_cb, &zcb, NULL);
 	(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
 	    count_block_cb, &zcb, NULL);
+	if (spa_feature_is_active(spa,
+	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+		VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
+		    spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
+		    &zcb, NULL));
+	}
 
 	if (dump_opt['c'] > 1)
 		flags |= TRAVERSE_PREFETCH_DATA;
@@ -2371,7 +2422,7 @@
 	}
 
 	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
-	    BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
+	    BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
 		return (0);
 
 	ddt_key_fill(&zdde_search.zdde_key, bp);
@@ -2476,7 +2527,14 @@
 			dump_bpobj(&spa->spa_deferred_bpobj, "Deferred frees");
 			if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
 				dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
-				    "Pool frees");
+				    "Pool snapshot frees");
+			}
+
+			if (spa_feature_is_active(spa,
+			    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+				dump_bptree(spa->spa_meta_objset,
+				    spa->spa_dsl_pool->dp_bptree_obj,
+				    "Pool dataset frees");
 			}
 			dump_dtl(spa->spa_root_vdev, 0);
 		}
--- a/usr/src/cmd/zpool/zpool_main.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/cmd/zpool/zpool_main.c	Mon May 21 12:11:39 2012 -0700
@@ -51,6 +51,7 @@
 
 #include "zpool_util.h"
 #include "zfs_comutil.h"
+#include "zfeature_common.h"
 
 #include "statcommon.h"
 
@@ -200,7 +201,7 @@
 	case HELP_CLEAR:
 		return (gettext("\tclear [-nF] <pool> [device]\n"));
 	case HELP_CREATE:
-		return (gettext("\tcreate [-fn] [-o property=value] ... \n"
+		return (gettext("\tcreate [-fnd] [-o property=value] ... \n"
 		    "\t    [-O file-system-property=value] ... \n"
 		    "\t    [-m mountpoint] [-R root] <pool> <vdev> ...\n"));
 	case HELP_DESTROY:
@@ -331,6 +332,12 @@
 		/* Iterate over all properties */
 		(void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE,
 		    ZFS_TYPE_POOL);
+
+		(void) fprintf(fp, "\t%-15s   ", "feature@...");
+		(void) fprintf(fp, "YES   disabled | enabled | active\n");
+
+		(void) fprintf(fp, gettext("\nThe feature@ properties must be "
+		    "appended with a feature name.\nSee zpool-features(5).\n"));
 	}
 
 	/*
@@ -397,12 +404,16 @@
 	proplist = *props;
 
 	if (poolprop) {
-		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
+		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL &&
+		    !zpool_prop_feature(propname)) {
 			(void) fprintf(stderr, gettext("property '%s' is "
 			    "not a valid pool property\n"), propname);
 			return (2);
 		}
-		normnm = zpool_prop_to_name(prop);
+		if (zpool_prop_feature(propname))
+			normnm = propname;
+		else
+			normnm = zpool_prop_to_name(prop);
 	} else {
 		if ((fprop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
 			normnm = zfs_prop_to_name(fprop);
@@ -574,7 +585,7 @@
 }
 
 /*
- * zpool create [-fn] [-o property=value] ...
+ * zpool create [-fnd] [-o property=value] ...
  *		[-O file-system-property=value] ...
  *		[-R root] [-m mountpoint] <pool> <dev> ...
  *
@@ -583,8 +594,10 @@
  *		were to be created.
  *      -R	Create a pool under an alternate root
  *      -m	Set default mountpoint for the root dataset.  By default it's
- *      	'/<pool>'
+ *		'/<pool>'
  *	-o	Set property=value.
+ *	-d	Don't automatically enable all supported pool features
+ *		(individual features can be enabled with -o).
  *	-O	Set fsproperty=value in the pool's root file system
  *
  * Creates the named pool according to the given vdev specification.  The
@@ -597,6 +610,7 @@
 {
 	boolean_t force = B_FALSE;
 	boolean_t dryrun = B_FALSE;
+	boolean_t enable_all_pool_feat = B_TRUE;
 	int c;
 	nvlist_t *nvroot = NULL;
 	char *poolname;
@@ -608,7 +622,7 @@
 	char *propval;
 
 	/* check options */
-	while ((c = getopt(argc, argv, ":fnR:m:o:O:")) != -1) {
+	while ((c = getopt(argc, argv, ":fndR:m:o:O:")) != -1) {
 		switch (c) {
 		case 'f':
 			force = B_TRUE;
@@ -616,6 +630,9 @@
 		case 'n':
 			dryrun = B_TRUE;
 			break;
+		case 'd':
+			enable_all_pool_feat = B_FALSE;
+			break;
 		case 'R':
 			altroot = optarg;
 			if (add_prop_list(zpool_prop_to_name(
@@ -643,6 +660,21 @@
 
 			if (add_prop_list(optarg, propval, &props, B_TRUE))
 				goto errout;
+
+			/*
+			 * If the user is creating a pool that doesn't support
+			 * feature flags, don't enable any features.
+			 */
+			if (zpool_name_to_prop(optarg) == ZPOOL_PROP_VERSION) {
+				char *end;
+				u_longlong_t ver;
+
+				ver = strtoull(propval, &end, 10);
+				if (*end == '\0' &&
+				    ver < SPA_VERSION_FEATURES) {
+					enable_all_pool_feat = B_FALSE;
+				}
+			}
 			break;
 		case 'O':
 			if ((propval = strchr(optarg, '=')) == NULL) {
@@ -708,7 +740,6 @@
 		goto errout;
 	}
 
-
 	if (altroot != NULL && altroot[0] != '/') {
 		(void) fprintf(stderr, gettext("invalid alternate root '%s': "
 		    "must be an absolute path\n"), altroot);
@@ -790,6 +821,27 @@
 		/*
 		 * Hand off to libzfs.
 		 */
+		if (enable_all_pool_feat) {
+			int i;
+			for (i = 0; i < SPA_FEATURES; i++) {
+				char propname[MAXPATHLEN];
+				zfeature_info_t *feat = &spa_feature_table[i];
+
+				(void) snprintf(propname, sizeof (propname),
+				    "feature@%s", feat->fi_uname);
+
+				/*
+				 * Skip feature if user specified it manually
+				 * on the command line.
+				 */
+				if (nvlist_exists(props, propname))
+					continue;
+
+				if (add_prop_list(propname, ZFS_FEATURE_ENABLED,
+				    &props, B_TRUE) != 0)
+					goto errout;
+			}
+		}
 		if (zpool_create(g_zfs, poolname,
 		    nvroot, props, fsprops) == 0) {
 			zfs_handle_t *pool = zfs_open(g_zfs, poolname,
@@ -1121,6 +1173,10 @@
 			(void) printf(gettext("newer version"));
 			break;
 
+		case VDEV_AUX_UNSUP_FEAT:
+			(void) printf(gettext("unsupported feature(s)"));
+			break;
+
 		case VDEV_AUX_SPARED:
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
 			    &cb.cb_guid) == 0);
@@ -1238,6 +1294,10 @@
 			(void) printf(gettext("newer version"));
 			break;
 
+		case VDEV_AUX_UNSUP_FEAT:
+			(void) printf(gettext("unsupported feature(s)"));
+			break;
+
 		case VDEV_AUX_ERR_EXCEEDED:
 			(void) printf(gettext("too many errors"));
 			break;
@@ -1404,6 +1464,20 @@
 		    "incompatible version.\n"));
 		break;
 
+	case ZPOOL_STATUS_UNSUP_FEAT_READ:
+		(void) printf(gettext("status: The pool uses the following "
+		    "feature(s) not supported on this sytem:\n"));
+		zpool_print_unsup_feat(config);
+		break;
+
+	case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
+		(void) printf(gettext("status: The pool can only be accessed "
+		    "in read-only mode on this system. It\n\tcannot be "
+		    "accessed in read-write mode because it uses the "
+		    "following\n\tfeature(s) not supported on this system:\n"));
+		zpool_print_unsup_feat(config);
+		break;
+
 	case ZPOOL_STATUS_HOSTID_MISMATCH:
 		(void) printf(gettext(" status: The pool was last accessed by "
 		    "another system.\n"));
@@ -1461,6 +1535,20 @@
 			    "newer\n\tsoftware, or recreate the pool from "
 			    "backup.\n"));
 			break;
+		case ZPOOL_STATUS_UNSUP_FEAT_READ:
+			(void) printf(gettext("action: The pool cannot be "
+			    "imported. Access the pool on a system that "
+			    "supports\n\tthe required feature(s), or recreate "
+			    "the pool from backup.\n"));
+			break;
+		case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
+			(void) printf(gettext("action: The pool cannot be "
+			    "imported in read-write mode. Import the pool "
+			    "with\n"
+			    "\t\"-o readonly=on\", access the pool on a system "
+			    "that supports the\n\trequired feature(s), or "
+			    "recreate the pool from backup.\n"));
+			break;
 		case ZPOOL_STATUS_MISSING_DEV_R:
 		case ZPOOL_STATUS_MISSING_DEV_NR:
 		case ZPOOL_STATUS_BAD_GUID_SUM:
@@ -1536,9 +1624,9 @@
 	    ZPOOL_CONFIG_POOL_STATE, &state) == 0);
 	verify(nvlist_lookup_uint64(config,
 	    ZPOOL_CONFIG_VERSION, &version) == 0);
-	if (version > SPA_VERSION) {
+	if (!SPA_VERSION_IS_SUPPORTED(version)) {
 		(void) fprintf(stderr, gettext("cannot import '%s': pool "
-		    "is formatted using a newer ZFS version\n"), name);
+		    "is formatted using an unsupported ZFS version\n"), name);
 		return (1);
 	} else if (state != POOL_STATE_EXPORTED &&
 	    !(flags & ZFS_IMPORT_ANY_HOST)) {
@@ -2473,15 +2561,13 @@
 print_header(list_cbdata_t *cb)
 {
 	zprop_list_t *pl = cb->cb_proplist;
+	char headerbuf[ZPOOL_MAXPROPLEN];
 	const char *header;
 	boolean_t first = B_TRUE;
 	boolean_t right_justify;
 	size_t width = 0;
 
 	for (; pl != NULL; pl = pl->pl_next) {
-		if (pl->pl_prop == ZPROP_INVAL)
-			continue;
-
 		width = pl->pl_width;
 		if (first && cb->cb_verbose) {
 			/*
@@ -2496,8 +2582,18 @@
 		else
 			first = B_FALSE;
 
-		header = zpool_prop_column_name(pl->pl_prop);
-		right_justify = zpool_prop_align_right(pl->pl_prop);
+		right_justify = B_FALSE;
+		if (pl->pl_prop != ZPROP_INVAL) {
+			header = zpool_prop_column_name(pl->pl_prop);
+			right_justify = zpool_prop_align_right(pl->pl_prop);
+		} else {
+			int i;
+
+			for (i = 0; pl->pl_user_prop[i] != '\0'; i++)
+				headerbuf[i] = toupper(pl->pl_user_prop[i]);
+			headerbuf[i] = '\0';
+			header = headerbuf;
+		}
 
 		if (pl->pl_next == NULL && !right_justify)
 			(void) printf("%s", header);
@@ -2557,6 +2653,11 @@
 				propstr = property;
 
 			right_justify = zpool_prop_align_right(pl->pl_prop);
+		} else if ((zpool_prop_feature(pl->pl_user_prop) ||
+		    zpool_prop_unsupported(pl->pl_user_prop)) &&
+		    zpool_prop_get_feature(zhp, pl->pl_user_prop, property,
+		    sizeof (property)) == 0) {
+			propstr = property;
 		} else {
 			propstr = "-";
 		}
@@ -3896,6 +3997,31 @@
 		    "backup.\n"));
 		break;
 
+	case ZPOOL_STATUS_UNSUP_FEAT_READ:
+		(void) printf(gettext("status: The pool cannot be accessed on "
+		    "this system because it uses the\n\tfollowing feature(s) "
+		    "not supported on this system:\n"));
+		zpool_print_unsup_feat(config);
+		(void) printf("\n");
+		(void) printf(gettext("action: Access the pool from a system "
+		    "that supports the required feature(s),\n\tor restore the "
+		    "pool from backup.\n"));
+		break;
+
+	case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
+		(void) printf(gettext("status: The pool can only be accessed "
+		    "in read-only mode on this system. It\n\tcannot be "
+		    "accessed in read-write mode because it uses the "
+		    "following\n\tfeature(s) not supported on this system:\n"));
+		zpool_print_unsup_feat(config);
+		(void) printf("\n");
+		(void) printf(gettext("action: The pool cannot be accessed in "
+		    "read-write mode. Import the pool with\n"
+		    "\t\"-o readonly=on\", access the pool from a system that "
+		    "supports the\n\trequired feature(s), or restore the "
+		    "pool from backup.\n"));
+		break;
+
 	case ZPOOL_STATUS_FAULTED_DEV_R:
 		(void) printf(gettext("status: One or more devices are "
 		    "faulted in response to persistent errors.\n\tSufficient "
@@ -4120,7 +4246,8 @@
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &version) == 0);
 
-	if (!cbp->cb_newer && version < SPA_VERSION) {
+	if (!cbp->cb_newer && SPA_VERSION_IS_SUPPORTED(version) &&
+	    version != SPA_VERSION) {
 		if (!cbp->cb_all) {
 			if (cbp->cb_first) {
 				(void) printf(gettext("The following pools are "
@@ -4143,13 +4270,14 @@
 				    "'%s'\n\n"), zpool_get_name(zhp));
 			}
 		}
-	} else if (cbp->cb_newer && version > SPA_VERSION) {
+	} else if (cbp->cb_newer && !SPA_VERSION_IS_SUPPORTED(version)) {
 		assert(!cbp->cb_all);
 
 		if (cbp->cb_first) {
 			(void) printf(gettext("The following pools are "
-			    "formatted using a newer software version and\n"
-			    "cannot be accessed on the current system.\n\n"));
+			    "formatted using an unsupported software version "
+			    "and\ncannot be accessed on the current "
+			    "system.\n\n"));
 			(void) printf(gettext("VER  POOL\n"));
 			(void) printf(gettext("---  ------------\n"));
 			cbp->cb_first = B_FALSE;
@@ -4233,8 +4361,8 @@
 			break;
 		case 'V':
 			cb.cb_version = strtoll(optarg, &end, 10);
-			if (*end != '\0' || cb.cb_version > SPA_VERSION ||
-			    cb.cb_version < SPA_VERSION_1) {
+			if (*end != '\0' ||
+			    !SPA_VERSION_IS_SUPPORTED(cb.cb_version)) {
 				(void) fprintf(stderr,
 				    gettext("invalid version '%s'\n"), optarg);
 				usage(B_FALSE);
@@ -4279,8 +4407,8 @@
 		}
 	}
 
-	(void) printf(gettext("This system is currently running "
-	    "ZFS pool version %llu.\n\n"), SPA_VERSION);
+	(void) printf(gettext("This system supports ZFS pool feature "
+	    "flags.\n\n"));
 	cb.cb_first = B_TRUE;
 	if (showversions) {
 		(void) printf(gettext("The following versions are "
@@ -4531,13 +4659,26 @@
 		    pl == cbp->cb_proplist)
 			continue;
 
-		if (zpool_get_prop(zhp, pl->pl_prop,
-		    value, sizeof (value), &srctype) != 0)
-			continue;
-
-		zprop_print_one_property(zpool_get_name(zhp), cbp,
-		    zpool_prop_to_name(pl->pl_prop), value, srctype, NULL,
-		    NULL);
+		if (pl->pl_prop == ZPROP_INVAL &&
+		    (zpool_prop_feature(pl->pl_user_prop) ||
+		    zpool_prop_unsupported(pl->pl_user_prop))) {
+			srctype = ZPROP_SRC_LOCAL;
+
+			if (zpool_prop_get_feature(zhp, pl->pl_user_prop,
+			    value, sizeof (value)) == 0) {
+				zprop_print_one_property(zpool_get_name(zhp),
+				    cbp, pl->pl_user_prop, value, srctype,
+				    NULL, NULL);
+			}
+		} else {
+			if (zpool_get_prop(zhp, pl->pl_prop, value,
+			    sizeof (value), &srctype) != 0)
+				continue;
+
+			zprop_print_one_property(zpool_get_name(zhp), cbp,
+			    zpool_prop_to_name(pl->pl_prop), value, srctype,
+			    NULL, NULL);
+		}
 	}
 	return (0);
 }
@@ -4549,8 +4690,11 @@
 	zprop_list_t fake_name = { 0 };
 	int ret;
 
-	if (argc < 3)
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing property "
+		    "argument\n"));
 		usage(B_FALSE);
+	}
 
 	cb.cb_first = B_TRUE;
 	cb.cb_sources = ZPROP_SRC_ALL;
@@ -4560,7 +4704,7 @@
 	cb.cb_columns[3] = GET_COL_SOURCE;
 	cb.cb_type = ZFS_TYPE_POOL;
 
-	if (zprop_get_list(g_zfs, argv[1],  &cb.cb_proplist,
+	if (zprop_get_list(g_zfs, argv[1], &cb.cb_proplist,
 	    ZFS_TYPE_POOL) != 0)
 		usage(B_FALSE);
 
--- a/usr/src/cmd/ztest/Makefile.com	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/cmd/ztest/Makefile.com	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
 #
 #
 # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
 PROG= ztest
@@ -29,8 +30,9 @@
 include ../../Makefile.cmd
 include ../../Makefile.ctf
 
-INCS += -I../../../lib/libzpool/common 
-INCS += -I../../../uts/common/fs/zfs 
+INCS += -I../../../lib/libzpool/common
+INCS += -I../../../uts/common/fs/zfs
+INCS += -I../../../common/zfs
 
 LDLIBS += -lumem -lzpool -lcmdutils -lm -lnvpair
 
--- a/usr/src/cmd/ztest/ztest.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/cmd/ztest/ztest.c	Mon May 21 12:11:39 2012 -0700
@@ -106,6 +106,7 @@
 #include <sys/dsl_scan.h>
 #include <sys/zio_checksum.h>
 #include <sys/refcount.h>
+#include <sys/zfeature.h>
 #include <stdio.h>
 #include <stdio_ext.h>
 #include <stdlib.h>
@@ -5572,10 +5573,9 @@
 {
 	nvlist_t *props;
 
+	VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
 	if (ztest_random(2) == 0)
-		return (NULL);
-
-	VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
+		return (props);
 	VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
 
 	return (props);
@@ -5606,6 +5606,12 @@
 	nvroot = make_vdev_root(NULL, NULL, ztest_opts.zo_vdev_size, 0,
 	    0, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
 	props = make_random_props();
+	for (int i = 0; i < SPA_FEATURES; i++) {
+		char buf[1024];
+		(void) snprintf(buf, sizeof (buf), "feature@%s",
+		    spa_feature_table[i].fi_uname);
+		VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
+	}
 	VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props,
 	    NULL, NULL));
 	nvlist_free(nvroot);
@@ -5613,6 +5619,7 @@
 	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	zs->zs_metaslab_sz =
 	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
+
 	spa_close(spa, FTAG);
 
 	kernel_fini();
@@ -5642,15 +5649,32 @@
 	ASSERT3U(fd, ==, ZTEST_FD_RAND);
 }
 
+static int
+shared_data_size(ztest_shared_hdr_t *hdr)
+{
+	int size;
+
+	size = hdr->zh_hdr_size;
+	size += hdr->zh_opts_size;
+	size += hdr->zh_size;
+	size += hdr->zh_stats_size * hdr->zh_stats_count;
+	size += hdr->zh_ds_size * hdr->zh_ds_count;
+
+	return (size);
+}
+
 static void
 setup_hdr(void)
 {
+	int size;
 	ztest_shared_hdr_t *hdr;
 
 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
 	    PROT_READ | PROT_WRITE, MAP_SHARED, ZTEST_FD_DATA, 0);
 	ASSERT(hdr != MAP_FAILED);
 
+	VERIFY3U(0, ==, ftruncate(ZTEST_FD_DATA, sizeof (ztest_shared_hdr_t)));
+
 	hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t);
 	hdr->zh_opts_size = sizeof (ztest_shared_opts_t);
 	hdr->zh_size = sizeof (ztest_shared_t);
@@ -5659,6 +5683,9 @@
 	hdr->zh_ds_size = sizeof (ztest_shared_ds_t);
 	hdr->zh_ds_count = ztest_opts.zo_datasets;
 
+	size = shared_data_size(hdr);
+	VERIFY3U(0, ==, ftruncate(ZTEST_FD_DATA, size));
+
 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
 }
 
@@ -5673,11 +5700,7 @@
 	    PROT_READ, MAP_SHARED, ZTEST_FD_DATA, 0);
 	ASSERT(hdr != MAP_FAILED);
 
-	size = hdr->zh_hdr_size;
-	size += hdr->zh_opts_size;
-	size += hdr->zh_size;
-	size += hdr->zh_stats_size * hdr->zh_stats_count;
-	size += hdr->zh_ds_size * hdr->zh_ds_count;
+	size = shared_data_size(hdr);
 
 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
 	hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()),
--- a/usr/src/common/zfs/zpool_prop.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/common/zfs/zpool_prop.c	Mon May 21 12:11:39 2012 -0700
@@ -79,6 +79,8 @@
 	    ZFS_TYPE_POOL, "<size>", "SIZE");
 	zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY,
 	    ZFS_TYPE_POOL, "<size>", "FREE");
+	zprop_register_number(ZPOOL_PROP_FREEING, "freeing", 0, PROP_READONLY,
+	    ZFS_TYPE_POOL, "<size>", "FREEING");
 	zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
 	    PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
 	zprop_register_number(ZPOOL_PROP_EXPANDSZ, "expandsize", 0,
@@ -166,6 +168,26 @@
 	return (zpool_prop_table[prop].pd_numdefault);
 }
 
+/*
+ * Returns true if this is a valid feature@ property.
+ */
+boolean_t
+zpool_prop_feature(const char *name)
+{
+	static const char *prefix = "feature@";
+	return (strncmp(name, prefix, strlen(prefix)) == 0);
+}
+
+/*
+ * Returns true if this is a valid unsupported@ property.
+ */
+boolean_t
+zpool_prop_unsupported(const char *name)
+{
+	static const char *prefix = "unsupported@";
+	return (strncmp(name, prefix, strlen(prefix)) == 0);
+}
+
 int
 zpool_prop_string_to_index(zpool_prop_t prop, const char *string,
     uint64_t *index)
--- a/usr/src/grub/capability	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/grub/capability	Mon May 21 12:11:39 2012 -0700
@@ -19,7 +19,7 @@
 # CDDL HEADER END
 #
 # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
-#
+# Copyright (c) 2012 by Delphix. All rights reserved.
 #
 # This file defines the current capabilities of GRUB over and above that
 # supported by the standard distribution
@@ -39,7 +39,7 @@
 # This file and the associated version are Solaris specific and are
 # not a part of the open source distribution of GRUB.
 #
-VERSION=20
+VERSION=21
 dboot
 xVM
 zfs
--- a/usr/src/grub/grub-0.97/stage2/fsys_zfs.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/grub/grub-0.97/stage2/fsys_zfs.c	Mon May 21 12:11:39 2012 -0700
@@ -16,12 +16,17 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+/*
  * The zfs plug-in routines for GRUB are:
  *
  * zfs_mount() - locates a valid uberblock of the root pool and reads
@@ -118,16 +123,16 @@
 
 /* Checksum Table and Values */
 zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
-	NULL,			NULL,			0, 0,	"inherit",
-	NULL,			NULL,			0, 0,	"on",
-	zio_checksum_off,	zio_checksum_off,	0, 0,	"off",
-	zio_checksum_SHA256,	zio_checksum_SHA256,	1, 1,	"label",
-	zio_checksum_SHA256,	zio_checksum_SHA256,	1, 1,	"gang_header",
-	NULL,			NULL,			0, 0,	"zilog",
-	fletcher_2_native,	fletcher_2_byteswap,	0, 0,	"fletcher2",
-	fletcher_4_native,	fletcher_4_byteswap,	1, 0,	"fletcher4",
-	zio_checksum_SHA256,	zio_checksum_SHA256,	1, 0,	"SHA256",
-	NULL,			NULL,			0, 0,	"zilog2",
+	{{NULL,			NULL},			0, 0,	"inherit"},
+	{{NULL,			NULL},			0, 0,	"on"},
+	{{zio_checksum_off,	zio_checksum_off},	0, 0,	"off"},
+	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 1,	"label"},
+	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 1,	"gang_header"},
+	{{NULL,			NULL},			0, 0,	"zilog"},
+	{{fletcher_2_native,	fletcher_2_byteswap},	0, 0,	"fletcher2"},
+	{{fletcher_4_native,	fletcher_4_byteswap},	1, 0,	"fletcher4"},
+	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 0,	"SHA256"},
+	{{NULL,			NULL},			0, 0,	"zilog2"},
 };
 
 /*
@@ -217,9 +222,6 @@
  * Three pieces of information are needed to verify an uberblock: the magic
  * number, the version number, and the checksum.
  *
- * Currently Implemented: version number, magic number
- * Need to Implement: checksum
- *
  * Return:
  *     0 - Success
  *    -1 - Failure
@@ -238,7 +240,7 @@
 		return (-1);
 
 	if (uber->ub_magic == UBERBLOCK_MAGIC &&
-	    uber->ub_version > 0 && uber->ub_version <= SPA_VERSION)
+	    SPA_VERSION_IS_SUPPORTED(uber->ub_version))
 		return (0);
 
 	return (-1);
@@ -296,7 +298,7 @@
 	zio_gb = (zio_gbh_phys_t *)stack;
 	stack += SPA_GANGBLOCKSIZE;
 	offset = DVA_GET_OFFSET(dva);
-	sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
+	sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
 
 	/* read in the gang block header */
 	if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) {
@@ -355,8 +357,8 @@
 		} else {
 			/* read in a data block */
 			offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
-			sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
-			if (devread(sector, 0, psize, buf))
+			sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
+			if (devread(sector, 0, psize, buf) != 0)
 				return (0);
 		}
 	}
@@ -400,7 +402,7 @@
 		stack += psize;
 	}
 
-	if (zio_read_data(bp, buf, stack)) {
+	if (zio_read_data(bp, buf, stack) != 0) {
 		grub_printf("zio_read_data failed\n");
 		return (ERR_FSYS_CORRUPT);
 	}
@@ -466,13 +468,13 @@
  *	errnum - failure
  */
 static int
-mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name,
+mzap_lookup(mzap_phys_t *zapobj, int objsize, const char *name,
 	uint64_t *value)
 {
 	int i, chunks;
 	mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
 
-	chunks = objsize/MZAP_ENT_LEN - 1;
+	chunks = objsize / MZAP_ENT_LEN - 1;
 	for (i = 0; i < chunks; i++) {
 		if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) {
 			*value = mzap_ent[i].mze_value;
@@ -512,8 +514,8 @@
 	/*
 	 * Only use 28 bits, since we need 4 bits in the cookie for the
 	 * collision differentiator.  We MUST use the high bits, since
-	 * those are the onces that we first pay attention to when
-	 * chosing the bucket.
+	 * those are the ones that we first pay attention to when
+	 * choosing the bucket.
 	 */
 	crc &= ~((1ULL << (64 - 28)) - 1);
 
@@ -618,7 +620,7 @@
  */
 static int
 fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap,
-    char *name, uint64_t *value, char *stack)
+    const char *name, uint64_t *value, char *stack)
 {
 	zap_leaf_phys_t *l;
 	uint64_t hash, idx, blkid;
@@ -661,7 +663,8 @@
  *	errnum - failure
  */
 static int
-zap_lookup(dnode_phys_t *zap_dnode, char *name, uint64_t *val, char *stack)
+zap_lookup(dnode_phys_t *zap_dnode, const char *name, uint64_t *val,
+    char *stack)
 {
 	uint64_t block_type;
 	int size;
@@ -672,7 +675,7 @@
 	size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	stack += size;
 
-	if (errnum = dmu_read(zap_dnode, 0, zapbuf, stack))
+	if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack)) != 0)
 		return (errnum);
 
 	block_type = *((uint64_t *)zapbuf);
@@ -688,6 +691,56 @@
 	return (ERR_FSYS_CORRUPT);
 }
 
+typedef struct zap_attribute {
+	int za_integer_length;
+	uint64_t za_num_integers;
+	uint64_t za_first_integer;
+	char *za_name;
+} zap_attribute_t;
+
+typedef int (zap_cb_t)(zap_attribute_t *za, void *arg, char *stack);
+
+static int
+zap_iterate(dnode_phys_t *zap_dnode, zap_cb_t *cb, void *arg, char *stack)
+{
+	uint32_t size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
+	zap_attribute_t za;
+	int i;
+	mzap_phys_t *mzp = (mzap_phys_t *)stack;
+	stack += size;
+
+	if ((errnum = dmu_read(zap_dnode, 0, mzp, stack)) != 0)
+		return (errnum);
+
+	/*
+	 * Iteration over fatzap objects has not yet been implemented.
+	 * If we encounter a pool in which there are more features for
+	 * read than can fit inside a microzap (i.e., more than 2048
+	 * features for read), we can add support for fatzap iteration.
+	 * For now, fail.
+	 */
+	if (mzp->mz_block_type != ZBT_MICRO) {
+		grub_printf("feature information stored in fatzap, pool "
+		    "version not supported\n");
+		return (1);
+	}
+
+	za.za_integer_length = 8;
+	za.za_num_integers = 1;
+	for (i = 0; i < size / MZAP_ENT_LEN - 1; i++) {
+		mzap_ent_phys_t *mzep = &mzp->mz_chunk[i];
+		int err;
+
+		za.za_first_integer = mzep->mze_value;
+		za.za_name = mzep->mze_name;
+		err = cb(&za, arg, stack);
+		if (err != 0)
+			return (err);
+	}
+
+	return (0);
+}
+
 /*
  * Get the dnode of an object number from the metadnode of an object set.
  *
@@ -767,6 +820,24 @@
 	return (0);
 }
 
+static int
+check_feature(zap_attribute_t *za, void *arg, char *stack)
+{
+	const char **names = arg;
+	int i;
+
+	if (za->za_first_integer == 0)
+		return (0);
+
+	for (i = 0; names[i] != NULL; i++) {
+		if (grub_strcmp(za->za_name, names[i]) == 0) {
+			return (0);
+		}
+	}
+	grub_printf("missing feature for read '%s'\n", za->za_name);
+	return (ERR_NEWER_VERSION);
+}
+
 /*
  * Get the file dnode for a given file name where mdn is the meta dnode
  * for this ZFS object set. When found, place the file dnode in dn.
@@ -803,11 +874,11 @@
 	while (*path == '/')
 		path++;
 
-	while (*path && !isspace(*path)) {
+	while (*path && !grub_isspace(*path)) {
 
 		/* get the next component name */
 		cname = path;
-		while (*path && !isspace(*path) && *path != '/')
+		while (*path && !grub_isspace(*path) && *path != '/')
 			path++;
 		ch = *path;
 		*path = 0;   /* ensure null termination */
@@ -869,6 +940,53 @@
 }
 
 /*
+ * List of pool features that the grub implementation of ZFS supports for
+ * read. Note that features that are only required for write do not need
+ * to be listed here since grub opens pools in read-only mode.
+ */
+static const char *spa_feature_names[] = {
+	NULL
+};
+
+/*
+ * Checks whether the MOS features that are active are supported by this
+ * (GRUB's) implementation of ZFS.
+ *
+ * Return:
+ *	0: Success.
+ *	errnum: Failure.
+ */
+static int
+check_mos_features(dnode_phys_t *mosmdn, char *stack)
+{
+	uint64_t objnum;
+	dnode_phys_t *dn;
+	uint8_t error = 0;
+
+	dn = (dnode_phys_t *)stack;
+	stack += DNODE_SIZE;
+
+	if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_OT_OBJECT_DIRECTORY, dn, stack)) != 0)
+		return (errnum);
+
+	/*
+	 * Find the object number for 'features_for_read' and retrieve its
+	 * corresponding dnode. Note that we don't check features_for_write
+	 * because GRUB is not opening the pool for write.
+	 */
+	if ((errnum = zap_lookup(dn, DMU_POOL_FEATURES_FOR_READ, &objnum,
+	    stack)) != 0)
+		return (errnum);
+
+	if ((errnum = dnode_get(mosmdn, objnum, DMU_OTN_ZAP_METADATA,
+	    dn, stack)) != 0)
+		return (errnum);
+
+	return (zap_iterate(dn, check_feature, spa_feature_names, stack));
+}
+
+/*
  * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
  * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
  * of pool/rootfs.
@@ -915,23 +1033,24 @@
 	}
 
 	/* take out the pool name */
-	while (*fsname && !isspace(*fsname) && *fsname != '/')
+	while (*fsname && !grub_isspace(*fsname) && *fsname != '/')
 		fsname++;
 
-	while (*fsname && !isspace(*fsname)) {
+	while (*fsname && !grub_isspace(*fsname)) {
 		uint64_t childobj;
 
 		while (*fsname == '/')
 			fsname++;
 
 		cname = fsname;
-		while (*fsname && !isspace(*fsname) && *fsname != '/')
+		while (*fsname && !grub_isspace(*fsname) && *fsname != '/')
 			fsname++;
 		ch = *fsname;
 		*fsname = 0;
 
 		snapname = cname;
-		while (*snapname && !isspace(*snapname) && *snapname != '@')
+		while (*snapname && !grub_isspace(*snapname) && *snapname !=
+		    '@')
 			snapname++;
 		if (*snapname == '@') {
 			issnapshot = 1;
@@ -1020,8 +1139,7 @@
 	if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN)
 		return (1);
 
-	nvlist += 4;
-	*out = nvlist;
+	*out = nvlist + 4;
 	return (0);
 }
 
@@ -1043,69 +1161,159 @@
 	return (nvlist);
 }
 
+/*
+ * The nvlist_next_nvpair() function returns a handle to the next nvpair in the
+ * list following nvpair. If nvpair is NULL, the first pair is returned. If
+ * nvpair is the last pair in the nvlist, NULL is returned.
+ */
+static char *
+nvlist_next_nvpair(char *nvl, char *nvpair)
+{
+	char *cur, *prev;
+	int encode_size;
+
+	if (nvl == NULL)
+		return (NULL);
+
+	if (nvpair == NULL) {
+		/* skip over nvl_version and nvl_nvflag */
+		nvpair = nvl + 4 * 2;
+	} else {
+		/* skip to the next nvpair */
+		encode_size = BSWAP_32(*(uint32_t *)nvpair);
+		nvpair += encode_size;
+	}
+
+	/* 8 bytes of 0 marks the end of the list */
+	if (*(uint64_t *)nvpair == 0)
+		return (NULL);
+
+	return (nvpair);
+}
+
+/*
+ * This function returns 0 on success and 1 on failure. On success, a string
+ * containing the name of nvpair is saved in buf.
+ */
+static int
+nvpair_name(char *nvp, char *buf, int buflen)
+{
+	int len;
+
+	/* skip over encode/decode size */
+	nvp += 4 * 2;
+
+	len = BSWAP_32(*(uint32_t *)nvp);
+	if (buflen < len + 1)
+		return (1);
+
+	grub_memmove(buf, nvp + 4, len);
+	buf[len] = '\0';
+
+	return (0);
+}
+
+/*
+ * This function retrieves the value of the nvpair in the form of enumerated
+ * type data_type_t. This is used to determine the appropriate type to pass to
+ * nvpair_value().
+ */
+static int
+nvpair_type(char *nvp)
+{
+	int name_len, type;
+
+	/* skip over encode/decode size */
+	nvp += 4 * 2;
+
+	/* skip over name_len */
+	name_len = BSWAP_32(*(uint32_t *)nvp);
+	nvp += 4;
+
+	/* skip over name */
+	nvp = nvp + ((name_len + 3) & ~3); /* align */
+
+	type = BSWAP_32(*(uint32_t *)nvp);
+
+	return (type);
+}
+
+static int
+nvpair_value(char *nvp, void *val, int valtype, int *nelmp)
+{
+	int name_len, type, slen;
+	char *strval = val;
+	uint64_t *intval = val;
+
+	/* skip over encode/decode size */
+	nvp += 4 * 2;
+
+	/* skip over name_len */
+	name_len = BSWAP_32(*(uint32_t *)nvp);
+	nvp += 4;
+
+	/* skip over name */
+	nvp = nvp + ((name_len + 3) & ~3); /* align */
+
+	/* skip over type */
+	type = BSWAP_32(*(uint32_t *)nvp);
+	nvp += 4;
+
+	if (type == valtype) {
+		int nelm;
+
+		nelm = BSWAP_32(*(uint32_t *)nvp);
+		if (valtype != DATA_TYPE_BOOLEAN && nelm < 1)
+			return (1);
+		nvp += 4;
+
+		switch (valtype) {
+		case DATA_TYPE_BOOLEAN:
+			return (0);
+
+		case DATA_TYPE_STRING:
+			slen = BSWAP_32(*(uint32_t *)nvp);
+			nvp += 4;
+			grub_memmove(strval, nvp, slen);
+			strval[slen] = '\0';
+			return (0);
+
+		case DATA_TYPE_UINT64:
+			*intval = BSWAP_64(*(uint64_t *)nvp);
+			return (0);
+
+		case DATA_TYPE_NVLIST:
+			*(void **)val = (void *)nvp;
+			return (0);
+
+		case DATA_TYPE_NVLIST_ARRAY:
+			*(void **)val = (void *)nvp;
+			if (nelmp)
+				*nelmp = nelm;
+			return (0);
+		}
+	}
+
+	return (1);
+}
+
 static int
 nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype,
     int *nelmp)
 {
-	int name_len, type, slen, encode_size;
-	char *nvpair, *nvp_name, *strval = val;
-	uint64_t *intval = val;
-
-	/* skip the header, nvl_version, and nvl_nvflag */
-	nvlist = nvlist + 4 * 2;
+	char *nvpair;
 
-	/*
-	 * Loop thru the nvpair list
-	 * The XDR representation of an integer is in big-endian byte order.
-	 */
-	while (encode_size = BSWAP_32(*(uint32_t *)nvlist))  {
-
-		nvpair = nvlist + 4 * 2; /* skip the encode/decode size */
-
-		name_len = BSWAP_32(*(uint32_t *)nvpair);
-		nvpair += 4;
-
-		nvp_name = nvpair;
-		nvpair = nvpair + ((name_len + 3) & ~3); /* align */
-
-		type = BSWAP_32(*(uint32_t *)nvpair);
-		nvpair += 4;
+	for (nvpair = nvlist_next_nvpair(nvlist, NULL);
+	    nvpair != NULL;
+	    nvpair = nvlist_next_nvpair(nvlist, nvpair)) {
+		int name_len = BSWAP_32(*(uint32_t *)(nvpair + 4 * 2));
+		char *nvp_name = nvpair + 4 * 3;
 
 		if ((grub_strncmp(nvp_name, name, name_len) == 0) &&
-		    type == valtype) {
-			int nelm;
-
-			if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1)
-				return (1);
-			nvpair += 4;
-
-			switch (valtype) {
-			case DATA_TYPE_STRING:
-				slen = BSWAP_32(*(uint32_t *)nvpair);
-				nvpair += 4;
-				grub_memmove(strval, nvpair, slen);
-				strval[slen] = '\0';
-				return (0);
-
-			case DATA_TYPE_UINT64:
-				*intval = BSWAP_64(*(uint64_t *)nvpair);
-				return (0);
-
-			case DATA_TYPE_NVLIST:
-				*(void **)val = (void *)nvpair;
-				return (0);
-
-			case DATA_TYPE_NVLIST_ARRAY:
-				*(void **)val = (void *)nvpair;
-				if (nelmp)
-					*nelmp = nelm;
-				return (0);
-			}
+		    nvpair_type(nvpair) == valtype) {
+			return (nvpair_value(nvpair, val, valtype, nelmp));
 		}
-
-		nvlist += encode_size; /* goto the next nvpair */
 	}
-
 	return (1);
 }
 
@@ -1142,7 +1350,7 @@
 	    NULL))
 		return (ERR_FSYS_CORRUPT);
 
-	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
+	if (grub_strcmp(type, VDEV_TYPE_DISK) == 0) {
 		uint64_t guid;
 
 		if (vdev_validate(nv) != 0)
@@ -1172,15 +1380,15 @@
 		    devid, DATA_TYPE_STRING, NULL) != 0)
 			devid[0] = '\0';
 
-		if (strlen(bootpath) >= MAXPATHLEN ||
-		    strlen(devid) >= MAXPATHLEN)
+		if (grub_strlen(bootpath) >= MAXPATHLEN ||
+		    grub_strlen(devid) >= MAXPATHLEN)
 			return (ERR_WONT_FIT);
 
 		return (0);
 
-	} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
-	    strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
-	    (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
+	} else if (grub_strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
+	    grub_strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
+	    (is_spare = (grub_strcmp(type, VDEV_TYPE_SPARE) == 0))) {
 		int nelm, i;
 		char *child;
 
@@ -1208,15 +1416,14 @@
  *	0 - success
  *	ERR_* - failure
  */
-int
+static int
 check_pool_label(uint64_t sector, char *stack, char *outdevid,
-    char *outpath, uint64_t *outguid, uint64_t *outashift)
+    char *outpath, uint64_t *outguid, uint64_t *outashift, uint64_t *outversion)
 {
 	vdev_phys_t *vdev;
 	uint64_t pool_state, txg = 0;
-	char *nvlist, *nv;
+	char *nvlist, *nv, *features;
 	uint64_t diskguid;
-	uint64_t version;
 
 	sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT);
 
@@ -1249,10 +1456,10 @@
 	if (txg == 0)
 		return (ERR_NO_BOOTPATH);
 
-	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version,
+	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, outversion,
 	    DATA_TYPE_UINT64, NULL))
 		return (ERR_FSYS_CORRUPT);
-	if (version > SPA_VERSION)
+	if (!SPA_VERSION_IS_SUPPORTED(*outversion))
 		return (ERR_NEWER_VERSION);
 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
 	    DATA_TYPE_NVLIST, NULL))
@@ -1268,6 +1475,30 @@
 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid,
 	    DATA_TYPE_UINT64, NULL))
 		return (ERR_FSYS_CORRUPT);
+
+	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ,
+	    &features, DATA_TYPE_NVLIST, NULL) == 0) {
+		char *nvp;
+		char *name = stack;
+		stack += MAXNAMELEN;
+
+		for (nvp = nvlist_next_nvpair(features, NULL);
+		    nvp != NULL;
+		    nvp = nvlist_next_nvpair(features, nvp)) {
+			zap_attribute_t za;
+
+			if (nvpair_name(nvp, name, MAXNAMELEN) != 0)
+				return (ERR_FSYS_CORRUPT);
+
+			za.za_integer_length = 8;
+			za.za_num_integers = 1;
+			za.za_first_integer = 1;
+			za.za_name = name;
+			if (check_feature(&za, spa_feature_names, stack) != 0)
+				return (ERR_NEWER_VERSION);
+		}
+	}
+
 	return (0);
 }
 
@@ -1288,7 +1519,7 @@
 	objset_phys_t *osp;
 	char tmp_bootpath[MAXNAMELEN];
 	char tmp_devid[MAXNAMELEN];
-	uint64_t tmp_guid, ashift;
+	uint64_t tmp_guid, ashift, version;
 	uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT;
 	int err = errnum; /* preserve previous errnum state */
 
@@ -1331,7 +1562,7 @@
 			continue;
 
 		if (check_pool_label(sector, stack, tmp_devid,
-		    tmp_bootpath, &tmp_guid, &ashift))
+		    tmp_bootpath, &tmp_guid, &ashift, &version))
 			continue;
 
 		if (pool_guid == 0)
@@ -1343,6 +1574,10 @@
 
 		VERIFY_OS_TYPE(osp, DMU_OST_META);
 
+		if (version >= SPA_VERSION_FEATURES &&
+		    check_mos_features(&osp->os_meta_dnode, stack) != 0)
+			continue;
+
 		if (find_best_root && ((pool_guid != tmp_guid) ||
 		    vdev_uberblock_compare(ubbest, &(current_uberblock)) <= 0))
 			continue;
@@ -1479,7 +1714,6 @@
 zfs_read(char *buf, int len)
 {
 	char *stack;
-	char *tmpbuf;
 	int blksz, length, movesize;
 
 	if (file_buf == NULL) {
--- a/usr/src/grub/grub-0.97/stage2/fsys_zfs.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/grub/grub-0.97/stage2/fsys_zfs.h	Mon May 21 12:11:39 2012 -0700
@@ -16,10 +16,16 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
 #ifndef _FSYS_ZFS_H
 #define	_FSYS_ZFS_H
 
@@ -119,10 +125,36 @@
 #define	NV_ENCODE_NATIVE	0
 #define	NV_ENCODE_XDR		1
 #define	HOST_ENDIAN		1	/* for x86 machine */
-#define	DATA_TYPE_UINT64	8
-#define	DATA_TYPE_STRING	9
-#define	DATA_TYPE_NVLIST	19
-#define	DATA_TYPE_NVLIST_ARRAY	20
+typedef enum {
+	DATA_TYPE_UNKNOWN = 0,
+	DATA_TYPE_BOOLEAN,
+	DATA_TYPE_BYTE,
+	DATA_TYPE_INT16,
+	DATA_TYPE_UINT16,
+	DATA_TYPE_INT32,
+	DATA_TYPE_UINT32,
+	DATA_TYPE_INT64,
+	DATA_TYPE_UINT64,
+	DATA_TYPE_STRING,
+	DATA_TYPE_BYTE_ARRAY,
+	DATA_TYPE_INT16_ARRAY,
+	DATA_TYPE_UINT16_ARRAY,
+	DATA_TYPE_INT32_ARRAY,
+	DATA_TYPE_UINT32_ARRAY,
+	DATA_TYPE_INT64_ARRAY,
+	DATA_TYPE_UINT64_ARRAY,
+	DATA_TYPE_STRING_ARRAY,
+	DATA_TYPE_HRTIME,
+	DATA_TYPE_NVLIST,
+	DATA_TYPE_NVLIST_ARRAY,
+	DATA_TYPE_BOOLEAN_VALUE,
+	DATA_TYPE_INT8,
+	DATA_TYPE_UINT8,
+	DATA_TYPE_BOOLEAN_ARRAY,
+	DATA_TYPE_INT8_ARRAY,
+	DATA_TYPE_UINT8_ARRAY,
+	DATA_TYPE_DOUBLE
+} data_type_t;
 
 /*
  * Decompression Entry - lzjb
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/dmu.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/dmu.h	Mon May 21 12:11:39 2012 -0700
@@ -16,11 +16,16 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
 #ifndef	_SYS_DMU_H
 #define	_SYS_DMU_H
 
@@ -31,6 +36,41 @@
  * The DMU also interacts with the SPA.  That interface is described in
  * dmu_spa.h.
  */
+
+#define	B_FALSE	0
+#define	B_TRUE	1
+
+#define	DMU_OT_NEWTYPE 0x80
+#define	DMU_OT_METADATA 0x40
+#define	DMU_OT_BYTESWAP_MASK 0x3f
+
+#define	DMU_OT(byteswap, metadata) \
+	(DMU_OT_NEWTYPE | \
+	((metadata) ? DMU_OT_METADATA : 0) | \
+	((byteswap) & DMU_OT_BYTESWAP_MASK))
+
+#define	DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+	((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \
+	(ot) < DMU_OT_NUMTYPES)
+
+#define	DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+	((ot) & DMU_OT_METADATA) : \
+	dmu_ot[(ot)].ot_metadata)
+
+typedef enum dmu_object_byteswap {
+	DMU_BSWAP_UINT8,
+	DMU_BSWAP_UINT16,
+	DMU_BSWAP_UINT32,
+	DMU_BSWAP_UINT64,
+	DMU_BSWAP_ZAP,
+	DMU_BSWAP_DNODE,
+	DMU_BSWAP_OBJSET,
+	DMU_BSWAP_ZNODE,
+	DMU_BSWAP_OLDACL,
+	DMU_BSWAP_ACL,
+	DMU_BSWAP_NUMFUNCS
+} dmu_object_byteswap_t;
+
 typedef enum dmu_object_type {
 	DMU_OT_NONE,
 	/* general: */
@@ -38,8 +78,8 @@
 	DMU_OT_OBJECT_ARRAY,		/* UINT64 */
 	DMU_OT_PACKED_NVLIST,		/* UINT8 (XDR by nvlist_pack/unpack) */
 	DMU_OT_PACKED_NVLIST_SIZE,	/* UINT64 */
-	DMU_OT_BPLIST,			/* UINT64 */
-	DMU_OT_BPLIST_HDR,		/* UINT64 */
+	DMU_OT_BPOBJ,			/* UINT64 */
+	DMU_OT_BPOBJ_HDR,		/* UINT64 */
 	/* spa: */
 	DMU_OT_SPACE_MAP_HEADER,	/* UINT64 */
 	DMU_OT_SPACE_MAP,		/* UINT64 */
@@ -56,7 +96,7 @@
 	DMU_OT_DSL_DATASET,		/* UINT64 */
 	/* zpl: */
 	DMU_OT_ZNODE,			/* ZNODE */
-	DMU_OT_OLDACL,			/* OLD ACL */
+	DMU_OT_OLDACL,			/* Old ACL */
 	DMU_OT_PLAIN_FILE_CONTENTS,	/* UINT8 */
 	DMU_OT_DIRECTORY_CONTENTS,	/* ZAP */
 	DMU_OT_MASTER_NODE,		/* ZAP */
@@ -79,7 +119,7 @@
 	DMU_OT_FUID,			/* FUID table (Packed NVLIST UINT8) */
 	DMU_OT_FUID_SIZE,		/* FUID table size UINT64 */
 	DMU_OT_NEXT_CLONES,		/* ZAP */
-	DMU_OT_SCRUB_QUEUE,		/* ZAP */
+	DMU_OT_SCAN_QUEUE,		/* ZAP */
 	DMU_OT_USERGROUP_USED,		/* ZAP */
 	DMU_OT_USERGROUP_QUOTA,		/* ZAP */
 	DMU_OT_USERREFS,		/* ZAP */
@@ -89,7 +129,24 @@
 	DMU_OT_SA_MASTER_NODE,		/* ZAP */
 	DMU_OT_SA_ATTR_REGISTRATION,	/* ZAP */
 	DMU_OT_SA_ATTR_LAYOUTS,		/* ZAP */
-	DMU_OT_NUMTYPES
+	DMU_OT_SCAN_XLATE,		/* ZAP */
+	DMU_OT_DEDUP,			/* fake dedup BP from ddt_bp_create() */
+	DMU_OT_DEADLIST,		/* ZAP */
+	DMU_OT_DEADLIST_HDR,		/* UINT64 */
+	DMU_OT_DSL_CLONES,		/* ZAP */
+	DMU_OT_BPOBJ_SUBOBJ,		/* UINT64 */
+	DMU_OT_NUMTYPES,
+
+	DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
+	DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
+	DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
+	DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
+	DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
+	DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
+	DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
+	DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
+	DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
+	DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
 } dmu_object_type_t;
 
 typedef enum dmu_objset_type {
@@ -107,6 +164,9 @@
  */
 #define	DMU_POOL_DIRECTORY_OBJECT	1
 #define	DMU_POOL_CONFIG			"config"
+#define	DMU_POOL_FEATURES_FOR_READ	"features_for_read"
+#define	DMU_POOL_FEATURES_FOR_WRITE	"features_for_write"
+#define	DMU_POOL_FEATURE_DESCRIPTIONS	"feature_descriptions"
 #define	DMU_POOL_ROOT_DATASET		"root_dataset"
 #define	DMU_POOL_SYNC_BPLIST		"sync_bplist"
 #define	DMU_POOL_ERRLOG_SCRUB		"errlog_scrub"
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/spa.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/spa.h	Mon May 21 12:11:39 2012 -0700
@@ -16,11 +16,16 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
 #ifndef _SYS_SPA_H
 #define	_SYS_SPA_H
 
@@ -65,7 +70,7 @@
 /*
  * Size of block to hold the configuration data (a packed nvlist)
  */
-#define	SPA_CONFIG_BLOCKSIZE	(1 << 14)
+#define	SPA_CONFIG_BLOCKSIZE	(1ULL << 14)
 
 /*
  * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Mon May 21 12:11:39 2012 -0700
@@ -16,8 +16,10 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_FS_ZFS_H
@@ -26,7 +28,14 @@
 /*
  * On-disk version number.
  */
-#define	SPA_VERSION			28ULL
+#define	SPA_VERSION_INITIAL		1ULL
+#define	SPA_VERSION_BEFORE_FEATURES	28ULL
+#define	SPA_VERSION			5000ULL
+#define	SPA_VERSION_FEATURES		5000ULL
+
+#define	SPA_VERSION_IS_SUPPORTED(v) \
+	(((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
+	((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
@@ -66,6 +75,7 @@
 #define	ZPOOL_CONFIG_DDT_HISTOGRAM	"ddt_histogram"
 #define	ZPOOL_CONFIG_DDT_OBJ_STATS	"ddt_object_stats"
 #define	ZPOOL_CONFIG_DDT_STATS		"ddt_stats"
+#define	ZPOOL_CONFIG_FEATURES_FOR_READ	"features_for_read"
 /*
  * The persistent vdev state is stored as separate values rather than a single
  * 'vdev_state' entry.  This is because a device can be in multiple states, such
--- a/usr/src/lib/libc/port/mapfile-vers	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libc/port/mapfile-vers	Mon May 21 12:11:39 2012 -0700
@@ -24,7 +24,7 @@
 # Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# Copyright (c) 2011 by Delphix. All rights reserved.
+# Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
 #
@@ -90,6 +90,11 @@
 $add amd64
 $endif
 
+SYMBOL_VERSION ILLUMOS_0.3 {	# Illumos additions
+    protected:
+        assfail3;
+} ILLUMOS_0.2;
+
 SYMBOL_VERSION ILLUMOS_0.2 {	# Illumos additions
     protected:
         posix_spawn_pipe_np;
--- a/usr/src/lib/libc/port/threads/assfail.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libc/port/threads/assfail.c	Mon May 21 12:11:39 2012 -0700
@@ -23,6 +23,9 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 #include "lint.h"
 #include "thr_uberdata.h"
@@ -442,3 +445,19 @@
 {
 	__assfail(assertion, filename, line_num);
 }
+
+void
+assfail3(const char *assertion, uintmax_t lv, const char *op, uintmax_t rv,
+    const char *filename, int line_num)
+{
+	char buf[1000];
+	(void) strcpy(buf, assertion);
+	(void) strcat(buf, " (0x");
+	ultos((uint64_t)lv, 16, buf + strlen(buf));
+	(void) strcat(buf, " ");
+	(void) strcat(buf, op);
+	(void) strcat(buf, " 0x");
+	ultos((uint64_t)rv, 16, buf + strlen(buf));
+	(void) strcat(buf, ")");
+	__assfail(buf, filename, line_num);
+}
--- a/usr/src/lib/libnvpair/Makefile.com	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libnvpair/Makefile.com	Mon May 21 12:11:39 2012 -0700
@@ -22,7 +22,7 @@
 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
+# Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
 LIBRARY=	libnvpair.a
@@ -31,7 +31,8 @@
 OBJECTS=	libnvpair.o \
 		nvpair_alloc_system.o \
 		nvpair_alloc_fixed.o \
-		nvpair.o
+		nvpair.o \
+		fnvpair.o
 
 include ../../Makefile.lib
 include ../../Makefile.rootfs
@@ -39,7 +40,8 @@
 SRCS=		../libnvpair.c \
 		../nvpair_alloc_system.c \
 		$(SRC)/common/nvpair/nvpair_alloc_fixed.c \
-		$(SRC)/common/nvpair/nvpair.c
+		$(SRC)/common/nvpair/nvpair.c \
+		$(SRC)/common/nvpair/fnvpair.c
 
 #
 # Libraries added to the next line must be present in miniroot
--- a/usr/src/lib/libnvpair/libnvpair.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libnvpair/libnvpair.c	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <unistd.h>
@@ -803,6 +804,10 @@
 
 	while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
 		switch (nvpair_type(elem)) {
+		case DATA_TYPE_BOOLEAN:
+			(void) printf("%*s%s\n", indent, "", nvpair_name(elem));
+			break;
+
 		case DATA_TYPE_BOOLEAN_VALUE:
 			(void) nvpair_value_boolean_value(elem, &bool_value);
 			(void) printf("%*s%s: %s\n", indent, "",
--- a/usr/src/lib/libnvpair/mapfile-vers	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libnvpair/mapfile-vers	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
 #
 #
 # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
 #
@@ -38,6 +39,73 @@
 
 $mapfile_version 2
 
+SYMBOL_VERSION ILLUMOS_0.1 {	# Illumos additions
+    global:
+    fnvlist_alloc;
+    fnvlist_free;
+    fnvlist_size;
+    fnvlist_pack;
+    fnvlist_pack_free;
+    fnvlist_unpack;
+    fnvlist_dup;
+    fnvlist_merge;
+    fnvlist_add_boolean;
+    fnvlist_add_boolean_value;
+    fnvlist_add_byte;
+    fnvlist_add_int8;
+    fnvlist_add_uint8;
+    fnvlist_add_int16;
+    fnvlist_add_uint16;
+    fnvlist_add_int32;
+    fnvlist_add_uint32;
+    fnvlist_add_int64;
+    fnvlist_add_uint64;
+    fnvlist_add_string;
+    fnvlist_add_nvlist;
+    fnvlist_add_nvpair;
+    fnvlist_add_boolean_array;
+    fnvlist_add_byte_array;
+    fnvlist_add_int8_array;
+    fnvlist_add_uint8_array;
+    fnvlist_add_int16_array;
+    fnvlist_add_uint16_array;
+    fnvlist_add_int32_array;
+    fnvlist_add_uint32_array;
+    fnvlist_add_int64_array;
+    fnvlist_add_uint64_array;
+    fnvlist_add_string_array;
+    fnvlist_add_nvlist_array;
+    fnvlist_remove;
+    fnvlist_remove_nvpair;
+    fnvlist_lookup_nvpair;
+    fnvlist_lookup_boolean;
+    fnvlist_lookup_boolean_value;
+    fnvlist_lookup_byte;
+    fnvlist_lookup_int8;
+    fnvlist_lookup_int16;
+    fnvlist_lookup_int32;
+    fnvlist_lookup_int64;
+    fnvlist_lookup_uint8_t;
+    fnvlist_lookup_uint16;
+    fnvlist_lookup_uint32;
+    fnvlist_lookup_uint64;
+    fnvlist_lookup_string;
+    fnvlist_lookup_nvlist;
+    fnvpair_value_boolean_value;
+    fnvpair_value_byte;
+    fnvpair_value_int8;
+    fnvpair_value_int16;
+    fnvpair_value_int32;
+    fnvpair_value_int64;
+    fnvpair_value_uint8_t;
+    fnvpair_value_uint16;
+    fnvpair_value_uint32;
+    fnvpair_value_uint64;
+    fnvpair_value_string;
+    fnvpair_value_nvlist;
+} SUNW_1.3;
+
+
 SYMBOL_VERSION SUNW_1.3 {
     global:
 	nvlist_add_double;
--- a/usr/src/lib/libzfs/Makefile.com	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libzfs/Makefile.com	Mon May 21 12:11:39 2012 -0700
@@ -20,13 +20,14 @@
 #
 #
 # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2011 by Delphix. All rights reserved.
+# Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
 LIBRARY= libzfs.a
 VERS= .1
 
 OBJS_SHARED=			\
+	zfeature_common.o	\
 	zfs_comutil.o		\
 	zfs_deleg.o		\
 	zfs_fletcher.o		\
--- a/usr/src/lib/libzfs/common/libzfs.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libzfs/common/libzfs.h	Mon May 21 12:11:39 2012 -0700
@@ -291,6 +291,15 @@
 	ZPOOL_STATUS_BAD_LOG,		/* cannot read log chain(s) */
 
 	/*
+	 * If the pool has unsupported features but can still be opened in
+	 * read-only mode, its status is ZPOOL_STATUS_UNSUP_FEAT_WRITE. If the
+	 * pool has unsupported features but cannot be opened at all, its
+	 * status is ZPOOL_STATUS_UNSUP_FEAT_READ.
+	 */
+	ZPOOL_STATUS_UNSUP_FEAT_READ,	/* unsupported features for read */
+	ZPOOL_STATUS_UNSUP_FEAT_WRITE,	/* unsupported features for write */
+
+	/*
 	 * These faults have no corresponding message ID.  At the time we are
 	 * checking the status, the original reason for the FMA fault (I/O or
 	 * checksum errors) has been lost.
@@ -322,6 +331,7 @@
  * Statistics and configuration functions.
  */
 extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
+extern nvlist_t *zpool_get_features(zpool_handle_t *);
 extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
 extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
 
@@ -334,6 +344,7 @@
     char *altroot);
 extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
     nvlist_t *, int);
+extern void zpool_print_unsup_feat(nvlist_t *config);
 
 /*
  * Search for pools to import
@@ -423,6 +434,8 @@
     uint64_t *propvalue);
 extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,
     char *propbuf, int proplen, boolean_t literal);
+extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname,
+    char *buf, size_t len);
 extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap,
     uint64_t *usedp);
 extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
@@ -450,10 +463,19 @@
 #define	ZFS_MOUNTPOINT_NONE	"none"
 #define	ZFS_MOUNTPOINT_LEGACY	"legacy"
 
+#define	ZFS_FEATURE_DISABLED	"disabled"
+#define	ZFS_FEATURE_ENABLED	"enabled"
+#define	ZFS_FEATURE_ACTIVE	"active"
+
+#define	ZFS_UNSUPPORTED_INACTIVE	"inactive"
+#define	ZFS_UNSUPPORTED_READONLY	"readonly"
+
 /*
  * zpool property management
  */
 extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **);
+extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *,
+    size_t);
 extern const char *zpool_prop_default_string(zpool_prop_t);
 extern uint64_t zpool_prop_default_numeric(zpool_prop_t);
 extern const char *zpool_prop_column_name(zpool_prop_t);
--- a/usr/src/lib/libzfs/common/libzfs_config.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libzfs/common/libzfs_config.c	Mon May 21 12:11:39 2012 -0700
@@ -18,12 +18,17 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+/*
  * The pool configuration repository is stored in /etc/zfs/zpool.cache as a
  * single packed nvlist.  While it would be nice to just read in this
  * file from userland, this wouldn't work from a local zone.  So we have to have
@@ -218,6 +223,36 @@
 }
 
 /*
+ * Retrieves a list of enabled features and their refcounts and caches it in
+ * the pool handle.
+ */
+nvlist_t *
+zpool_get_features(zpool_handle_t *zhp)
+{
+	nvlist_t *config, *features;
+
+	config = zpool_get_config(zhp, NULL);
+
+	if (config == NULL || !nvlist_exists(config,
+	    ZPOOL_CONFIG_FEATURE_STATS)) {
+		int error;
+		boolean_t missing = B_FALSE;
+
+		error = zpool_refresh_stats(zhp, &missing);
+
+		if (error != 0 || missing)
+			return (NULL);
+
+		config = zpool_get_config(zhp, NULL);
+	}
+
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
+	    &features) == 0);
+
+	return (features);
+}
+
+/*
  * Refresh the vdev statistics associated with the given pool.  This is used in
  * iostat to show configuration changes and determine the delta from the last
  * time the function was called.  This function can fail, in case the pool has
--- a/usr/src/lib/libzfs/common/libzfs_pool.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c	Mon May 21 12:11:39 2012 -0700
@@ -43,6 +43,7 @@
 #include "zfs_prop.h"
 #include "libzfs_impl.h"
 #include "zfs_comutil.h"
+#include "zfeature_common.h"
 
 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
 
@@ -273,6 +274,7 @@
 		case ZPOOL_PROP_SIZE:
 		case ZPOOL_PROP_ALLOCATED:
 		case ZPOOL_PROP_FREE:
+		case ZPOOL_PROP_FREEING:
 		case ZPOOL_PROP_EXPANDSZ:
 			(void) zfs_nicenum(intval, buf, len);
 			break;
@@ -298,6 +300,12 @@
 			(void) strlcpy(buf, zpool_state_to_name(intval,
 			    vs->vs_aux), len);
 			break;
+		case ZPOOL_PROP_VERSION:
+			if (intval >= SPA_VERSION_FEATURES) {
+				(void) snprintf(buf, len, "-");
+				break;
+			}
+			/* FALLTHROUGH */
 		default:
 			(void) snprintf(buf, len, "%llu", intval);
 		}
@@ -400,10 +408,48 @@
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 		const char *propname = nvpair_name(elem);
 
+		prop = zpool_name_to_prop(propname);
+		if (prop == ZPROP_INVAL && zpool_prop_feature(propname)) {
+			int err;
+			zfeature_info_t *feature;
+			char *fname = strchr(propname, '@') + 1;
+
+			err = zfeature_lookup_name(fname, &feature);
+			if (err != 0) {
+				ASSERT3U(err, ==, ENOENT);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "invalid feature '%s'"), fname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			if (nvpair_type(elem) != DATA_TYPE_STRING) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be a string"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			(void) nvpair_value_string(elem, &strval);
+			if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' can only be set to "
+				    "'enabled'"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			if (nvlist_add_uint64(retprops, propname, 0) != 0) {
+				(void) no_memory(hdl);
+				goto error;
+			}
+			continue;
+		}
+
 		/*
 		 * Make sure this property is valid and applies to this type.
 		 */
-		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
+		if (prop == ZPROP_INVAL) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid property '%s'"), propname);
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
@@ -426,7 +472,8 @@
 		 */
 		switch (prop) {
 		case ZPOOL_PROP_VERSION:
-			if (intval < version || intval > SPA_VERSION) {
+			if (intval < version ||
+			    !SPA_VERSION_IS_SUPPORTED(intval)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' number %d is invalid."),
 				    propname, intval);
@@ -648,10 +695,77 @@
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	zprop_list_t *entry;
 	char buf[ZFS_MAXPROPLEN];
+	nvlist_t *features = NULL;
+	zprop_list_t **last;
+	boolean_t firstexpand = (NULL == *plp);
 
 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
 		return (-1);
 
+	last = plp;
+	while (*last != NULL)
+		last = &(*last)->pl_next;
+
+	if ((*plp)->pl_all)
+		features = zpool_get_features(zhp);
+
+	if ((*plp)->pl_all && firstexpand) {
+		for (int i = 0; i < SPA_FEATURES; i++) {
+			zprop_list_t *entry = zfs_alloc(hdl,
+			    sizeof (zprop_list_t));
+			entry->pl_prop = ZPROP_INVAL;
+			entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
+			    spa_feature_table[i].fi_uname);
+			entry->pl_width = strlen(entry->pl_user_prop);
+			entry->pl_all = B_TRUE;
+
+			*last = entry;
+			last = &entry->pl_next;
+		}
+	}
+
+	/* add any unsupported features */
+	for (nvpair_t *nvp = nvlist_next_nvpair(features, NULL);
+	    nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
+		char *propname;
+		boolean_t found;
+		zprop_list_t *entry;
+
+		if (zfeature_is_supported(nvpair_name(nvp)))
+			continue;
+
+		propname = zfs_asprintf(hdl, "unsupported@%s",
+		    nvpair_name(nvp));
+
+		/*
+		 * Before adding the property to the list make sure that no
+		 * other pool already added the same property.
+		 */
+		found = B_FALSE;
+		entry = *plp;
+		while (entry != NULL) {
+			if (entry->pl_user_prop != NULL &&
+			    strcmp(propname, entry->pl_user_prop) == 0) {
+				found = B_TRUE;
+				break;
+			}
+			entry = entry->pl_next;
+		}
+		if (found) {
+			free(propname);
+			continue;
+		}
+
+		entry = zfs_alloc(hdl, sizeof (zprop_list_t));
+		entry->pl_prop = ZPROP_INVAL;
+		entry->pl_user_prop = propname;
+		entry->pl_width = strlen(entry->pl_user_prop);
+		entry->pl_all = B_TRUE;
+
+		*last = entry;
+		last = &entry->pl_next;
+	}
+
 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
 
 		if (entry->pl_fixed)
@@ -668,6 +782,66 @@
 	return (0);
 }
 
+/*
+ * Get the state for the given feature on the given ZFS pool.
+ */
+int
+zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
+    size_t len)
+{
+	uint64_t refcount;
+	boolean_t found = B_FALSE;
+	nvlist_t *features = zpool_get_features(zhp);
+	boolean_t supported;
+	const char *feature = strchr(propname, '@') + 1;
+
+	supported = zpool_prop_feature(propname);
+	ASSERT(supported || zfs_prop_unsupported(propname));
+
+	/*
+	 * Convert from feature name to feature guid. This conversion is
+	 * unecessary for unsupported@... properties because they already
+	 * use guids.
+	 */
+	if (supported) {
+		int ret;
+		zfeature_info_t *fi;
+
+		ret = zfeature_lookup_name(feature, &fi);
+		if (ret != 0) {
+			(void) strlcpy(buf, "-", len);
+			return (ENOTSUP);
+		}
+		feature = fi->fi_guid;
+	}
+
+	if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
+		found = B_TRUE;
+
+	if (supported) {
+		if (!found) {
+			(void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
+		} else  {
+			if (refcount == 0)
+				(void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
+			else
+				(void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
+		}
+	} else {
+		if (found) {
+			if (refcount == 0) {
+				(void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
+			} else {
+				(void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
+			}
+		} else {
+			(void) strlcpy(buf, "-", len);
+			return (ENOTSUP);
+		}
+	}
+
+	return (0);
+}
 
 /*
  * Don't start the slice at the default block of 34; many storage
@@ -1254,8 +1428,10 @@
 	if (!hdl->libzfs_printerr || config == NULL)
 		return;
 
-	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0)
+	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
+	    nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
 		return;
+	}
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
 		return;
@@ -1311,6 +1487,7 @@
 
 	/* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
+	    nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
 		goto no_info;
 
@@ -1433,6 +1610,30 @@
 	}
 }
 
+void
+zpool_print_unsup_feat(nvlist_t *config)
+{
+	nvlist_t *nvinfo, *unsup_feat;
+
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
+	    0);
+	verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
+	    &unsup_feat) == 0);
+
+	for (nvpair_t *nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
+	    nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
+		char *desc;
+
+		verify(nvpair_type(nvp) == DATA_TYPE_STRING);
+		verify(nvpair_value_string(nvp, &desc) == 0);
+
+		if (strlen(desc) > 0)
+			(void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
+		else
+			(void) printf("\t%s\n", nvpair_name(nvp));
+	}
+}
+
 /*
  * Import the given pool using the known configuration and a list of
  * properties to be set. The configuration should have come from
@@ -1539,6 +1740,22 @@
 
 		switch (error) {
 		case ENOTSUP:
+			if (nv != NULL && nvlist_lookup_nvlist(nv,
+			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
+			    nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
+				(void) printf(dgettext(TEXT_DOMAIN, "This "
+				    "pool uses the following feature(s) not "
+				    "supported by this system:\n"));
+				zpool_print_unsup_feat(nv);
+				if (nvlist_exists(nvinfo,
+				    ZPOOL_CONFIG_CAN_RDONLY)) {
+					(void) printf(dgettext(TEXT_DOMAIN,
+					    "All unsupported features are only "
+					    "required for writing to the pool."
+					    "\nThe pool can be imported using "
+					    "'-o readonly=on'.\n"));
+				}
+			}
 			/*
 			 * Unsupported version.
 			 */
--- a/usr/src/lib/libzfs/common/libzfs_status.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libzfs/common/libzfs_status.c	Mon May 21 12:11:39 2012 -0700
@@ -18,8 +18,10 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
@@ -214,6 +216,20 @@
 		return (ZPOOL_STATUS_VERSION_NEWER);
 
 	/*
+	 * Unsupported feature(s).
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_UNSUP_FEAT) {
+		nvlist_t *nvinfo;
+
+		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
+		    &nvinfo) == 0);
+		if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY))
+			return (ZPOOL_STATUS_UNSUP_FEAT_WRITE);
+		return (ZPOOL_STATUS_UNSUP_FEAT_READ);
+	}
+
+	/*
 	 * Check that the config is complete.
 	 */
 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
@@ -300,7 +316,7 @@
 	/*
 	 * Outdated, but usable, version
 	 */
-	if (version < SPA_VERSION)
+	if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION)
 		return (ZPOOL_STATUS_VERSION_OLDER);
 
 	return (ZPOOL_STATUS_OK);
--- a/usr/src/lib/libzfs/common/libzfs_util.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libzfs/common/libzfs_util.c	Mon May 21 12:11:39 2012 -0700
@@ -18,9 +18,10 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
@@ -45,6 +46,7 @@
 
 #include "libzfs_impl.h"
 #include "zfs_prop.h"
+#include "zfeature_common.h"
 
 int
 libzfs_errno(libzfs_handle_t *hdl)
@@ -112,7 +114,8 @@
 	case EZFS_RESILVERING:
 		return (dgettext(TEXT_DOMAIN, "currently resilvering"));
 	case EZFS_BADVERSION:
-		return (dgettext(TEXT_DOMAIN, "unsupported version"));
+		return (dgettext(TEXT_DOMAIN, "unsupported version or "
+		    "feature"));
 	case EZFS_POOLUNAVAIL:
 		return (dgettext(TEXT_DOMAIN, "pool is unavailable"));
 	case EZFS_DEVOVERFLOW:
@@ -629,6 +632,7 @@
 
 	zfs_prop_init();
 	zpool_prop_init();
+	zpool_feature_init();
 	libzfs_mnttab_init(hdl);
 
 	return (hdl);
@@ -1282,9 +1286,11 @@
 	 * this is a pool property or if this isn't a user-defined
 	 * dataset property,
 	 */
-	if (prop == ZPROP_INVAL && (type == ZFS_TYPE_POOL ||
-	    (!zfs_prop_user(propname) && !zfs_prop_userquota(propname) &&
-	    !zfs_prop_written(propname)))) {
+	if (prop == ZPROP_INVAL && ((type == ZFS_TYPE_POOL &&
+	    !zpool_prop_feature(propname) &&
+	    !zpool_prop_unsupported(propname)) ||
+	    (type == ZFS_TYPE_DATASET && !zfs_prop_user(propname) &&
+	    !zfs_prop_userquota(propname) && !zfs_prop_written(propname)))) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "invalid property '%s'"), propname);
 		return (zfs_error(hdl, EZFS_BADPROP,
@@ -1296,7 +1302,8 @@
 
 	entry->pl_prop = prop;
 	if (prop == ZPROP_INVAL) {
-		if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) == NULL) {
+		if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) ==
+		    NULL) {
 			free(entry);
 			return (-1);
 		}
--- a/usr/src/lib/libzfs/common/llib-lzfs	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libzfs/common/llib-lzfs	Mon May 21 12:11:39 2012 -0700
@@ -24,6 +24,10 @@
  * Use is subject to license terms.
  */
 
+ /*
+  * Copyright (c) 2012 by Delphix. All rights reserved.
+  */
+
 /*LINTLIBRARY*/
 /*PROTOLIB1*/
 
@@ -31,3 +35,4 @@
 #include "../../../common/zfs/zfs_comutil.h"
 #include "../../../common/zfs/zfs_fletcher.h"
 #include "../../../common/zfs/zfs_prop.h"
+#include "../../../common/zfs/zfeature_common.h"
--- a/usr/src/lib/libzfs/common/mapfile-vers	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libzfs/common/mapfile-vers	Mon May 21 12:11:39 2012 -0700
@@ -59,6 +59,7 @@
 	libzfs_init;
 	libzfs_mnttab_cache;
 	libzfs_print_on_error;
+	spa_feature_table;
 	zfs_allocatable_devs;
 	zfs_asprintf;
 	zfs_clone;
@@ -201,10 +202,14 @@
 	zpool_obj_to_path;
 	zpool_open;
 	zpool_open_canfail;
+	zpool_print_unsup_feat;
 	zpool_prop_align_right;
 	zpool_prop_column_name;
+	zpool_prop_feature;
+	zpool_prop_get_feature;
 	zpool_prop_readonly;
 	zpool_prop_to_name;
+	zpool_prop_unsupported;
 	zpool_prop_values;
 	zpool_read_label;
 	zpool_refresh_stats;
--- a/usr/src/lib/libzpool/common/kernel.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libzpool/common/kernel.c	Mon May 21 12:11:39 2012 -0700
@@ -426,7 +426,9 @@
 		 * To simulate partial disk writes, we split writes into two
 		 * system calls so that the process can be killed in between.
 		 */
-		split = (len > 0 ? rand() % len : 0);
+		int sectors = len >> SPA_MINBLOCKSHIFT;
+		split = (sectors > 0 ? rand() % sectors : 0) <<
+		    SPA_MINBLOCKSHIFT;
 		iolen = pwrite64(vp->v_fd, addr, split, offset);
 		iolen += pwrite64(vp->v_fd, (char *)addr + split,
 		    len - split, offset + split);
--- a/usr/src/lib/libzpool/common/llib-lzpool	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/lib/libzpool/common/llib-lzpool	Mon May 21 12:11:39 2012 -0700
@@ -23,6 +23,10 @@
  * Use is subject to license terms.
  */
 
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
 /* LINTLIBRARY */
 /* PROTOLIB1 */
 
@@ -52,6 +56,8 @@
 #include <sys/ddt.h>
 #include <sys/sa.h>
 #include <sys/zfs_sa.h>
+#include <sys/zfeature.h>
 
 extern uint64_t metaslab_gang_bang;
 extern uint64_t metaslab_df_alloc_threshold;
+extern boolean_t zfeature_checks_disable;
--- a/usr/src/man/man1m/zpool.1m	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/man/man1m/zpool.1m	Mon May 21 12:11:39 2012 -0700
@@ -1,11 +1,20 @@
 '\" te
 .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
 .\" Copyright 2011, Nexenta Systems, Inc. All Rights Reserved.
-.\" Copyright (c) 2012 by Delphix. All Rights Reserved.
-.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
-.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the
-.\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH ZPOOL 1M "Nov 14, 2011"
+.\" Copyright (c) 2012 by Delphix. All rights reserved.
+.\" The contents of this file are subject to the terms of the Common Development
+.\" and Distribution License (the "License"). You may not use this file except
+.\" in compliance with the License. You can obtain a copy of the license at
+.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\"
+.\" See the License for the specific language governing permissions and
+.\" limitations under the License. When distributing Covered Code, include this
+.\" CDDL HEADER in each file and include the License file at
+.\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this
+.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
+.\" own identifying information:
+.\" Portions Copyright [yyyy] [name of copyright owner]
+.TH ZPOOL 1M "Mar 16, 2012"
 .SH NAME
 zpool \- configures ZFS storage pools
 .SH SYNOPSIS
@@ -31,7 +40,7 @@
 
 .LP
 .nf
-\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR \fIfile-system-property=value\fR]
+\fBzpool create\fR [\fB-fnd\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR \fIfile-system-property=value\fR]
      ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR \fIroot\fR] \fIpool\fR \fIvdev\fR ...
 .fi
 
@@ -547,24 +556,34 @@
 .sp
 .ne 2
 .na
-\fB\fBcomment\fR\fR
+\fB\fBexpandsize\fR\fR
 .ad
 .RS 20n
-A text string consisting of printable ASCII characters that will be stored
-such that it is available even if the pool becomes faulted.  An administrator
-can provide additional information about a pool using this property.
+Amount of uninitialized space within the pool or device that can be used to
+increase the total capacity of the pool.  Uninitialized space consists of
+any space on an EFI labeled vdev which has not been brought online
+(i.e. zpool online -e).  This space occurs when a LUN is dynamically expanded.
 .RE
 
 .sp
 .ne 2
 .na
-\fB\fBexpandsize\fR\fR
+\fB\fBfree\fR\fR
 .ad
 .RS 20n
-Amount of uninitialized space within the pool or device that can be used to
-increase the total capacity of the pool.  Uninitialized space consists of 
-any space on an EFI labeled vdev which has not been brought online 
-(i.e. zpool online -e).  This space occurs when a LUN is dynamically expanded.
+The amount of free space available in the pool.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBfreeing\fR\fR
+.ad
+.RS 20n
+After a file system or snapshot is destroyed, the space it was using is
+returned to the pool asynchronously. \fB\fBfreeing\fR\fR is the amount of
+space remaining to be reclaimed. Over time \fB\fBfreeing\fR\fR will decrease
+while \fB\fBfree\fR\fR increases.
 .RE
 
 .sp
@@ -598,6 +617,16 @@
 .sp
 .ne 2
 .na
+\fB\fBunsupported@\fR\fIfeature_guid\fR\fR
+.ad
+.RS 20n
+Information about unsupported features that are enabled on the pool. See
+\fBzpool-features\fR(5) for details.
+.RE
+
+.sp
+.ne 2
+.na
 \fB\fBused\fR\fR
 .ad
 .RS 20n
@@ -606,7 +635,7 @@
 
 .sp
 .LP
-These space usage properties report actual physical space available to the
+The space usage properties report actual physical space available to the
 storage pool. The physical space can be different from the total amount of
 space that any contained datasets can actually use. The amount of space used in
 a \fBraidz\fR configuration depends on the characteristics of the data being
@@ -705,6 +734,17 @@
 .sp
 .ne 2
 .na
+\fB\fBcomment\fR=\fB\fItext\fR\fR
+.ad
+.RS 4n
+A text string consisting of printable ASCII characters that will be stored
+such that it is available even if the pool becomes faulted.  An administrator
+can provide additional information about a pool using this property.
+.RE
+
+.sp
+.ne 2
+.na
 \fB\fBdelegation\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
@@ -760,6 +800,18 @@
 .sp
 .ne 2
 .na
+\fB\fBfeature@\fR\fIfeature_name\fR=\fBenabled\fR\fR
+.ad
+.RS 4n
+The value of this property is the current state of \fIfeature_name\fR. The
+only valid value when setting this property is \fBenabled\fR which moves
+\fIfeature_name\fR to the enabled state. See \fBzpool-features\fR(5) for
+details on feature states.
+.RE
+
+.sp
+.ne 2
+.na
 \fB\fBlistsnaps\fR=on | off\fR
 .ad
 .sp .6
@@ -779,8 +831,8 @@
 The current on-disk version of the pool. This can be increased, but never
 decreased. The preferred method of updating pools is with the "\fBzpool
 upgrade\fR" command, though this property can be used when a specific version
-is needed for backwards compatibility. This property can be any number between
-1 and the current version reported by "\fBzpool upgrade -v\fR".
+is needed for backwards compatibility. Once feature flags is enabled on a
+pool this property will no longer have a value.
 .RE
 
 .SS "Subcommands"
@@ -881,7 +933,7 @@
 .sp
 .ne 2
 .na
-\fB\fBzpool create\fR [\fB-fn\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR
+\fB\fBzpool create\fR [\fB-fnd\fR] [\fB-o\fR \fIproperty=value\fR] ... [\fB-O\fR
 \fIfile-system-property=value\fR] ... [\fB-m\fR \fImountpoint\fR] [\fB-R\fR
 \fIroot\fR] \fIpool\fR \fIvdev\fR ...\fR
 .ad
@@ -911,6 +963,9 @@
 root dataset cannot be mounted. This can be overridden with the \fB-m\fR
 option.
 .sp
+By default all supported features are enabled on the new pool unless the
+\fB-d\fR option is specified.
+.sp
 .ne 2
 .na
 \fB\fB-f\fR\fR
@@ -936,6 +991,18 @@
 .sp
 .ne 2
 .na
+\fB\fB-d\fR\fR
+.ad
+.sp .6
+.RS 4n
+Do not enable any features on the new pool. Individual features can be enabled
+by setting their corresponding properties to \fBenabled\fR with the \fB-o\fR
+option. See \fBzpool-features\fR(5) for details about feature properties.
+.RE
+
+.sp
+.ne 2
+.na
 \fB\fB-o\fR \fIproperty=value\fR [\fB-o\fR \fIproperty=value\fR] ...\fR
 .ad
 .sp .6
@@ -2059,4 +2126,4 @@
 .SH SEE ALSO
 .sp
 .LP
-\fBzfs\fR(1M), \fBattributes\fR(5)
+\fBzfs\fR(1M), \fBzpool-features\fR(5), \fBattributes\fR(5)
--- a/usr/src/man/man5/Makefile	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/man/man5/Makefile	Mon May 21 12:11:39 2012 -0700
@@ -9,109 +9,113 @@
 # at http://www.illumos.org/license/CDDL.
 #
 
+#
 # Copyright 2011, Richard Lowe
+# Copyright (c) 2012 by Delphix. All rights reserved.
+#
 
 include ../../Makefile.master
 
-MANSECT = 	5
+MANSECT =	5
 
-MANFILES = 	Intro.5			\
-	 	acl.5			\
-	 	ad.5			\
-	 	ascii.5			\
-	 	attributes.5		\
-	 	audit_binfile.5		\
-	 	audit_remote.5		\
-	 	audit_syslog.5		\
-	 	brands.5		\
-	 	cancellation.5		\
-	 	charmap.5		\
-	 	condition.5		\
-	 	crypt_bsdbf.5		\
-	 	crypt_bsdmd5.5		\
-	 	crypt_sha256.5		\
-	 	crypt_sha512.5		\
-	 	crypt_sunmd5.5		\
-	 	crypt_unix.5		\
-	 	device_clean.5		\
-	 	dhcp.5			\
-	 	dhcp_modules.5		\
-	 	environ.5		\
-	 	eqnchar.5		\
-	 	extendedFILE.5		\
-	 	filesystem.5		\
-	 	fnmatch.5		\
-	 	formats.5		\
-	 	fsattr.5		\
-	 	grub.5			\
-	 	gss_auth_rules.5	\
-	 	hal.5			\
-	 	iconv.5			\
-	 	iconv_unicode.5		\
-	 	ieee802.11.5		\
-	 	ipfilter.5		\
-	 	isalist.5		\
-	 	kerberos.5		\
-	 	krb5_auth_rules.5	\
-	 	krb5envvar.5		\
-	 	largefile.5		\
-	 	lf64.5			\
-	 	lfcompile.5		\
-	 	lfcompile64.5		\
-	 	locale.5		\
-	 	man.5			\
-	 	mansun.5		\
-	 	me.5			\
-	 	mech_spnego.5		\
-	 	mm.5			\
-	 	ms.5			\
-	 	mutex.5			\
-	 	nfssec.5		\
-	 	pam_allow.5		\
-	 	pam_authtok_check.5	\
-	 	pam_authtok_get.5	\
-	 	pam_authtok_store.5	\
-	 	pam_deny.5		\
-	 	pam_dhkeys.5		\
-	 	pam_dial_auth.5		\
-	 	pam_krb5.5		\
-	 	pam_krb5_migrate.5	\
-	 	pam_ldap.5		\
-	 	pam_list.5		\
-	 	pam_passwd_auth.5	\
-	 	pam_rhosts_auth.5	\
-	 	pam_roles.5		\
-	 	pam_sample.5		\
-	 	pam_smb_passwd.5	\
-	 	pam_smbfs_login.5	\
-	 	pam_tsol_account.5	\
-	 	pam_unix_account.5	\
-	 	pam_unix_auth.5		\
-	 	pam_unix_cred.5		\
-	 	pam_unix_session.5	\
-	 	pkcs11_kernel.5		\
-	 	pkcs11_softtoken.5	\
-	 	pkcs11_tpm.5		\
-	 	privileges.5		\
-	 	prof.5			\
-	 	rbac.5			\
-	 	regex.5			\
-	 	regexp.5		\
-	 	resource_controls.5	\
-	 	smf.5			\
-	 	smf_bootstrap.5		\
-	 	smf_method.5		\
-	 	smf_restarter.5		\
-	 	smf_security.5		\
-	 	smf_template.5		\
-	 	standards.5		\
-	 	sticky.5		\
-	 	tecla.5			\
-	 	term.5			\
-	 	threads.5		\
-	 	trusted_extensions.5	\
-	 	vgrindefs.5		\
-	 	zones.5
+MANFILES =	Intro.5			\
+		acl.5			\
+		ad.5			\
+		ascii.5			\
+		attributes.5		\
+		audit_binfile.5		\
+		audit_remote.5		\
+		audit_syslog.5		\
+		brands.5		\
+		cancellation.5		\
+		charmap.5		\
+		condition.5		\
+		crypt_bsdbf.5		\
+		crypt_bsdmd5.5		\
+		crypt_sha256.5		\
+		crypt_sha512.5		\
+		crypt_sunmd5.5		\
+		crypt_unix.5		\
+		device_clean.5		\
+		dhcp.5			\
+		dhcp_modules.5		\
+		environ.5		\
+		eqnchar.5		\
+		extendedFILE.5		\
+		filesystem.5		\
+		fnmatch.5		\
+		formats.5		\
+		fsattr.5		\
+		grub.5			\
+		gss_auth_rules.5	\
+		hal.5			\
+		iconv.5			\
+		iconv_unicode.5		\
+		ieee802.11.5		\
+		ipfilter.5		\
+		isalist.5		\
+		kerberos.5		\
+		krb5_auth_rules.5	\
+		krb5envvar.5		\
+		largefile.5		\
+		lf64.5			\
+		lfcompile.5		\
+		lfcompile64.5		\
+		locale.5		\
+		man.5			\
+		mansun.5		\
+		me.5			\
+		mech_spnego.5		\
+		mm.5			\
+		ms.5			\
+		mutex.5			\
+		nfssec.5		\
+		pam_allow.5		\
+		pam_authtok_check.5	\
+		pam_authtok_get.5	\
+		pam_authtok_store.5	\
+		pam_deny.5		\
+		pam_dhkeys.5		\
+		pam_dial_auth.5		\
+		pam_krb5.5		\
+		pam_krb5_migrate.5	\
+		pam_ldap.5		\
+		pam_list.5		\
+		pam_passwd_auth.5	\
+		pam_rhosts_auth.5	\
+		pam_roles.5		\
+		pam_sample.5		\
+		pam_smb_passwd.5	\
+		pam_smbfs_login.5	\
+		pam_tsol_account.5	\
+		pam_unix_account.5	\
+		pam_unix_auth.5		\
+		pam_unix_cred.5		\
+		pam_unix_session.5	\
+		pkcs11_kernel.5		\
+		pkcs11_softtoken.5	\
+		pkcs11_tpm.5		\
+		privileges.5		\
+		prof.5			\
+		rbac.5			\
+		regex.5			\
+		regexp.5		\
+		resource_controls.5	\
+		smf.5			\
+		smf_bootstrap.5		\
+		smf_method.5		\
+		smf_restarter.5		\
+		smf_security.5		\
+		smf_template.5		\
+		standards.5		\
+		sticky.5		\
+		tecla.5			\
+		term.5			\
+		threads.5		\
+		trusted_extensions.5	\
+		vgrindefs.5		\
+		zones.5			\
+		zpool-features.5
 
 MANSOFILES =	ANSI.5		\
 		C++.5		\
--- a/usr/src/pkg/manifests/system-file-system-zfs-tests.mf	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/pkg/manifests/system-file-system-zfs-tests.mf	Mon May 21 12:11:39 2012 -0700
@@ -21,6 +21,7 @@
 
 #
 # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
 set name=pkg.fmri value=pkg:/system/file-system/zfs/tests@$(PKGVERS)
@@ -55,10 +56,13 @@
 file path=usr/bin/$(ARCH64)/ztest mode=0555
 file path=usr/include/sys/fs/zut.h
 file path=usr/lib/devfsadm/linkmod/SUNW_zut_link.so group=sys
+$(i386_ONLY)file path=usr/sbin/$(ARCH32)/zhack mode=0555
 $(i386_ONLY)file path=usr/sbin/$(ARCH32)/zinject mode=0555
+file path=usr/sbin/$(ARCH64)/zhack mode=0555
 file path=usr/sbin/$(ARCH64)/zinject mode=0555
 hardlink path=usr/bin/zlook target=../../usr/lib/isaexec
 hardlink path=usr/bin/ztest target=../../usr/lib/isaexec
+hardlink path=usr/sbin/zhack target=../../usr/lib/isaexec
 hardlink path=usr/sbin/zinject target=../../usr/lib/isaexec
 legacy pkg=SUNWonzfs desc="ZFS test commands" name="ZFS unbundled utilities"
 legacy pkg=SUNWonzfsr desc="ZFS test driver" name="ZFS unbundled driver"
--- a/usr/src/pkg/manifests/system-file-system-zfs.mf	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/pkg/manifests/system-file-system-zfs.mf	Mon May 21 12:11:39 2012 -0700
@@ -21,6 +21,7 @@
 
 #
 # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
 set name=pkg.fmri value=pkg:/system/file-system/zfs@$(PKGVERS)
@@ -131,6 +132,7 @@
 file path=usr/share/man/man1m/zfs.1m
 file path=usr/share/man/man1m/zpool.1m
 file path=usr/share/man/man1m/zstreamdump.1m
+file path=usr/share/man/man5/zpool-features.5
 hardlink path=kernel/fs/$(ARCH64)/zfs target=../../../kernel/drv/$(ARCH64)/zfs
 $(i386_ONLY)hardlink path=kernel/fs/zfs target=../../kernel/drv/zfs
 hardlink path=usr/lib/fs/zfs/fstyp target=../../../sbin/fstyp
--- a/usr/src/uts/common/Makefile.files	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/Makefile.files	Mon May 21 12:11:39 2012 -0700
@@ -21,10 +21,8 @@
 
 #
 # Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
-#
-
-#
 # Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+# Copyright (c) 2012 by Delphix. All rights reserved.
 #
 
 #
@@ -245,6 +243,7 @@
 		nvpair.o	\
 		nvpair_alloc_system.o	\
 		nvpair_alloc_fixed.o	\
+		fnvpair.o	\
 		octet.o		\
 		open.o		\
 		p_online.o	\
@@ -1088,7 +1087,7 @@
 	    drm_auth.o drm_bufs.o drm_context.o drm_dma.o \
 	    drm_drawable.o drm_drv.o drm_fops.o drm_ioctl.o drm_irq.o \
 	    drm_lock.o drm_memory.o drm_msg.o drm_pci.o drm_scatter.o \
-	    drm_cache.o drm_gem.o drm_mm.o ati_pcigart.o 
+	    drm_cache.o drm_gem.o drm_mm.o ati_pcigart.o
 
 FM_OBJS += devfm.o devfm_machdep.o
 
@@ -1330,6 +1329,7 @@
 	arc.o			\
 	bplist.o		\
 	bpobj.o			\
+	bptree.o		\
 	dbuf.o			\
 	ddt.o			\
 	ddt_zap.o		\
@@ -1351,6 +1351,7 @@
 	dsl_deleg.o		\
 	dsl_prop.o		\
 	dsl_scan.o		\
+	zfeature.o		\
 	gzip.o			\
 	lzjb.o			\
 	metaslab.o		\
@@ -1393,11 +1394,12 @@
 	zrlock.o
 
 ZFS_SHARED_OBJS +=		\
+	zfeature_common.o	\
+	zfs_comutil.o		\
+	zfs_deleg.o		\
+	zfs_fletcher.o		\
 	zfs_namecheck.o		\
-	zfs_deleg.o		\
 	zfs_prop.o		\
-	zfs_comutil.o		\
-	zfs_fletcher.o		\
 	zpool_prop.o		\
 	zprop_common.o
 
@@ -1555,7 +1557,7 @@
 #
 #			kernel SSL
 #
-KSSL_OBJS +=	kssl.o ksslioctl.o 
+KSSL_OBJS +=	kssl.o ksslioctl.o
 
 KSSL_SOCKFIL_MOD_OBJS += ksslfilter.o ksslapi.o ksslrec.o
 
--- a/usr/src/uts/common/fs/zfs/arc.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/arc.c	Mon May 21 12:11:39 2012 -0700
@@ -2547,9 +2547,11 @@
 	callback_list = hdr->b_acb;
 	ASSERT(callback_list != NULL);
 	if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) {
+		dmu_object_byteswap_t bswap =
+		    DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp));
 		arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ?
 		    byteswap_uint64_array :
-		    dmu_ot[BP_GET_TYPE(zio->io_bp)].ot_byteswap;
+		    dmu_ot_byteswap[bswap].ob_func;
 		func(buf->b_data, hdr->b_size);
 	}
 
--- a/usr/src/uts/common/fs/zfs/dbuf.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dbuf.c	Mon May 21 12:11:39 2012 -0700
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -227,7 +228,7 @@
 		boolean_t is_metadata;
 
 		DB_DNODE_ENTER(db);
-		is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata;
+		is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type);
 		DB_DNODE_EXIT(db);
 
 		return (is_metadata);
--- a/usr/src/uts/common/fs/zfs/ddt.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/ddt.c	Mon May 21 12:11:39 2012 -0700
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -1061,11 +1062,9 @@
 	ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP);
 
 	if (spa->spa_ddt_stat_object == 0) {
-		spa->spa_ddt_stat_object = zap_create(ddt->ddt_os,
-		    DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx);
-		VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT,
-		    DMU_POOL_DDT_STATS, sizeof (uint64_t), 1,
-		    &spa->spa_ddt_stat_object, tx) == 0);
+		spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os,
+		    DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT,
+		    DMU_POOL_DDT_STATS, tx);
 	}
 
 	while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {
--- a/usr/src/uts/common/fs/zfs/dmu.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -46,60 +47,73 @@
 #endif
 
 const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
-	{	byteswap_uint8_array,	TRUE,	"unallocated"		},
-	{	zap_byteswap,		TRUE,	"object directory"	},
-	{	byteswap_uint64_array,	TRUE,	"object array"		},
-	{	byteswap_uint8_array,	TRUE,	"packed nvlist"		},
-	{	byteswap_uint64_array,	TRUE,	"packed nvlist size"	},
-	{	byteswap_uint64_array,	TRUE,	"bpobj"			},
-	{	byteswap_uint64_array,	TRUE,	"bpobj header"		},
-	{	byteswap_uint64_array,	TRUE,	"SPA space map header"	},
-	{	byteswap_uint64_array,	TRUE,	"SPA space map"		},
-	{	byteswap_uint64_array,	TRUE,	"ZIL intent log"	},
-	{	dnode_buf_byteswap,	TRUE,	"DMU dnode"		},
-	{	dmu_objset_byteswap,	TRUE,	"DMU objset"		},
-	{	byteswap_uint64_array,	TRUE,	"DSL directory"		},
-	{	zap_byteswap,		TRUE,	"DSL directory child map"},
-	{	zap_byteswap,		TRUE,	"DSL dataset snap map"	},
-	{	zap_byteswap,		TRUE,	"DSL props"		},
-	{	byteswap_uint64_array,	TRUE,	"DSL dataset"		},
-	{	zfs_znode_byteswap,	TRUE,	"ZFS znode"		},
-	{	zfs_oldacl_byteswap,	TRUE,	"ZFS V0 ACL"		},
-	{	byteswap_uint8_array,	FALSE,	"ZFS plain file"	},
-	{	zap_byteswap,		TRUE,	"ZFS directory"		},
-	{	zap_byteswap,		TRUE,	"ZFS master node"	},
-	{	zap_byteswap,		TRUE,	"ZFS delete queue"	},
-	{	byteswap_uint8_array,	FALSE,	"zvol object"		},
-	{	zap_byteswap,		TRUE,	"zvol prop"		},
-	{	byteswap_uint8_array,	FALSE,	"other uint8[]"		},
-	{	byteswap_uint64_array,	FALSE,	"other uint64[]"	},
-	{	zap_byteswap,		TRUE,	"other ZAP"		},
-	{	zap_byteswap,		TRUE,	"persistent error log"	},
-	{	byteswap_uint8_array,	TRUE,	"SPA history"		},
-	{	byteswap_uint64_array,	TRUE,	"SPA history offsets"	},
-	{	zap_byteswap,		TRUE,	"Pool properties"	},
-	{	zap_byteswap,		TRUE,	"DSL permissions"	},
-	{	zfs_acl_byteswap,	TRUE,	"ZFS ACL"		},
-	{	byteswap_uint8_array,	TRUE,	"ZFS SYSACL"		},
-	{	byteswap_uint8_array,	TRUE,	"FUID table"		},
-	{	byteswap_uint64_array,	TRUE,	"FUID table size"	},
-	{	zap_byteswap,		TRUE,	"DSL dataset next clones"},
-	{	zap_byteswap,		TRUE,	"scan work queue"	},
-	{	zap_byteswap,		TRUE,	"ZFS user/group used"	},
-	{	zap_byteswap,		TRUE,	"ZFS user/group quota"	},
-	{	zap_byteswap,		TRUE,	"snapshot refcount tags"},
-	{	zap_byteswap,		TRUE,	"DDT ZAP algorithm"	},
-	{	zap_byteswap,		TRUE,	"DDT statistics"	},
-	{	byteswap_uint8_array,	TRUE,	"System attributes"	},
-	{	zap_byteswap,		TRUE,	"SA master node"	},
-	{	zap_byteswap,		TRUE,	"SA attr registration"	},
-	{	zap_byteswap,		TRUE,	"SA attr layouts"	},
-	{	zap_byteswap,		TRUE,	"scan translations"	},
-	{	byteswap_uint8_array,	FALSE,	"deduplicated block"	},
-	{	zap_byteswap,		TRUE,	"DSL deadlist map"	},
-	{	byteswap_uint64_array,	TRUE,	"DSL deadlist map hdr"	},
-	{	zap_byteswap,		TRUE,	"DSL dir clones"	},
-	{	byteswap_uint64_array,	TRUE,	"bpobj subobj"		},
+	{	DMU_BSWAP_UINT8,	TRUE,	"unallocated"		},
+	{	DMU_BSWAP_ZAP,		TRUE,	"object directory"	},
+	{	DMU_BSWAP_UINT64,	TRUE,	"object array"		},
+	{	DMU_BSWAP_UINT8,	TRUE,	"packed nvlist"		},
+	{	DMU_BSWAP_UINT64,	TRUE,	"packed nvlist size"	},
+	{	DMU_BSWAP_UINT64,	TRUE,	"bpobj"			},
+	{	DMU_BSWAP_UINT64,	TRUE,	"bpobj header"		},
+	{	DMU_BSWAP_UINT64,	TRUE,	"SPA space map header"	},
+	{	DMU_BSWAP_UINT64,	TRUE,	"SPA space map"		},
+	{	DMU_BSWAP_UINT64,	TRUE,	"ZIL intent log"	},
+	{	DMU_BSWAP_DNODE,	TRUE,	"DMU dnode"		},
+	{	DMU_BSWAP_OBJSET,	TRUE,	"DMU objset"		},
+	{	DMU_BSWAP_UINT64,	TRUE,	"DSL directory"		},
+	{	DMU_BSWAP_ZAP,		TRUE,	"DSL directory child map"},
+	{	DMU_BSWAP_ZAP,		TRUE,	"DSL dataset snap map"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"DSL props"		},
+	{	DMU_BSWAP_UINT64,	TRUE,	"DSL dataset"		},
+	{	DMU_BSWAP_ZNODE,	TRUE,	"ZFS znode"		},
+	{	DMU_BSWAP_OLDACL,	TRUE,	"ZFS V0 ACL"		},
+	{	DMU_BSWAP_UINT8,	FALSE,	"ZFS plain file"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"ZFS directory"		},
+	{	DMU_BSWAP_ZAP,		TRUE,	"ZFS master node"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"ZFS delete queue"	},
+	{	DMU_BSWAP_UINT8,	FALSE,	"zvol object"		},
+	{	DMU_BSWAP_ZAP,		TRUE,	"zvol prop"		},
+	{	DMU_BSWAP_UINT8,	FALSE,	"other uint8[]"		},
+	{	DMU_BSWAP_UINT64,	FALSE,	"other uint64[]"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"other ZAP"		},
+	{	DMU_BSWAP_ZAP,		TRUE,	"persistent error log"	},
+	{	DMU_BSWAP_UINT8,	TRUE,	"SPA history"		},
+	{	DMU_BSWAP_UINT64,	TRUE,	"SPA history offsets"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"Pool properties"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"DSL permissions"	},
+	{	DMU_BSWAP_ACL,		TRUE,	"ZFS ACL"		},
+	{	DMU_BSWAP_UINT8,	TRUE,	"ZFS SYSACL"		},
+	{	DMU_BSWAP_UINT8,	TRUE,	"FUID table"		},
+	{	DMU_BSWAP_UINT64,	TRUE,	"FUID table size"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"DSL dataset next clones"},
+	{	DMU_BSWAP_ZAP,		TRUE,	"scan work queue"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"ZFS user/group used"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"ZFS user/group quota"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"snapshot refcount tags"},
+	{	DMU_BSWAP_ZAP,		TRUE,	"DDT ZAP algorithm"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"DDT statistics"	},
+	{	DMU_BSWAP_UINT8,	TRUE,	"System attributes"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"SA master node"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"SA attr registration"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"SA attr layouts"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"scan translations"	},
+	{	DMU_BSWAP_UINT8,	FALSE,	"deduplicated block"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"DSL deadlist map"	},
+	{	DMU_BSWAP_UINT64,	TRUE,	"DSL deadlist map hdr"	},
+	{	DMU_BSWAP_ZAP,		TRUE,	"DSL dir clones"	},
+	{	DMU_BSWAP_UINT64,	TRUE,	"bpobj subobj"		}
+};
+
+const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
+	{	byteswap_uint8_array,	"uint8"		},
+	{	byteswap_uint16_array,	"uint16"	},
+	{	byteswap_uint32_array,	"uint32"	},
+	{	byteswap_uint64_array,	"uint64"	},
+	{	zap_byteswap,		"zap"		},
+	{	dnode_buf_byteswap,	"dnode"		},
+	{	dmu_objset_byteswap,	"objset"	},
+	{	zfs_znode_byteswap,	"znode"		},
+	{	zfs_oldacl_byteswap,	"oldacl"	},
+	{	zfs_acl_byteswap,	"acl"		}
 };
 
 int
@@ -176,7 +190,7 @@
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
 
-	if (type > DMU_OT_NUMTYPES) {
+	if (!DMU_OT_IS_VALID(type)) {
 		error = EINVAL;
 	} else if (dn->dn_bonus != db) {
 		error = EINVAL;
@@ -1503,7 +1517,7 @@
 dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
 {
 	dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
-	boolean_t ismd = (level > 0 || dmu_ot[type].ot_metadata ||
+	boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) ||
 	    (wp & WP_SPILL));
 	enum zio_checksum checksum = os->os_checksum;
 	enum zio_compress compress = os->os_compress;
--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Mon May 21 12:11:39 2012 -0700
@@ -20,11 +20,8 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
- */
-/*
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
@@ -1075,8 +1072,8 @@
 	void *data = NULL;
 
 	if (drro->drr_type == DMU_OT_NONE ||
-	    drro->drr_type >= DMU_OT_NUMTYPES ||
-	    drro->drr_bonustype >= DMU_OT_NUMTYPES ||
+	    !DMU_OT_IS_VALID(drro->drr_type) ||
+	    !DMU_OT_IS_VALID(drro->drr_bonustype) ||
 	    drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
 	    drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
 	    P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
@@ -1141,7 +1138,9 @@
 		ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
 		bcopy(data, db->db_data, drro->drr_bonuslen);
 		if (ra->byteswap) {
-			dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data,
+			dmu_object_byteswap_t byteswap =
+			    DMU_OT_BYTESWAP(drro->drr_bonustype);
+			dmu_ot_byteswap[byteswap].ob_func(db->db_data,
 			    drro->drr_bonuslen);
 		}
 		dmu_buf_rele(db, FTAG);
@@ -1184,7 +1183,7 @@
 	int err;
 
 	if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
-	    drrw->drr_type >= DMU_OT_NUMTYPES)
+	    !DMU_OT_IS_VALID(drrw->drr_type))
 		return (EINVAL);
 
 	data = restore_read(ra, drrw->drr_length);
@@ -1203,8 +1202,11 @@
 		dmu_tx_abort(tx);
 		return (err);
 	}
-	if (ra->byteswap)
-		dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length);
+	if (ra->byteswap) {
+		dmu_object_byteswap_t byteswap =
+		    DMU_OT_BYTESWAP(drrw->drr_type);
+		dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
+	}
 	dmu_write(os, drrw->drr_object,
 	    drrw->drr_offset, drrw->drr_length, data, tx);
 	dmu_tx_commit(tx);
--- a/usr/src/uts/common/fs/zfs/dmu_traverse.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -53,6 +54,7 @@
 	uint64_t td_objset;
 	blkptr_t *td_rootbp;
 	uint64_t td_min_txg;
+	zbookmark_t *td_resume;
 	int td_flags;
 	prefetch_data_t *td_pfd;
 	blkptr_cb_t *td_func;
@@ -128,6 +130,54 @@
 	zil_free(zilog);
 }
 
+typedef enum resume_skip {
+	RESUME_SKIP_ALL,
+	RESUME_SKIP_NONE,
+	RESUME_SKIP_CHILDREN
+} resume_skip_t;
+
+/*
+ * Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and
+ * the block indicated by zb does not need to be visited at all. Returns
+ * RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the
+ * resume point. This indicates that this block should be visited but not its
+ * children (since they must have been visited in a previous traversal).
+ * Otherwise returns RESUME_SKIP_NONE.
+ */
+static resume_skip_t
+resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
+    const zbookmark_t *zb)
+{
+	if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) {
+		/*
+		 * If we already visited this bp & everything below,
+		 * don't bother doing it again.
+		 */
+		if (zbookmark_is_before(dnp, zb, td->td_resume))
+			return (RESUME_SKIP_ALL);
+
+		/*
+		 * If we found the block we're trying to resume from, zero
+		 * the bookmark out to indicate that we have resumed.
+		 */
+		ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object);
+		if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) {
+			bzero(td->td_resume, sizeof (*zb));
+			if (td->td_flags & TRAVERSE_POST)
+				return (RESUME_SKIP_CHILDREN);
+		}
+	}
+	return (RESUME_SKIP_NONE);
+}
+
+static void
+traverse_pause(traverse_data_t *td, const zbookmark_t *zb)
+{
+	ASSERT(td->td_resume != NULL);
+	ASSERT3U(zb->zb_level, ==, 0);
+	bcopy(zb, td->td_resume, sizeof (*td->td_resume));
+}
+
 static int
 traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
     arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
@@ -137,8 +187,20 @@
 	arc_buf_t *buf = NULL;
 	prefetch_data_t *pd = td->td_pfd;
 	boolean_t hard = td->td_flags & TRAVERSE_HARD;
+	boolean_t pause = B_FALSE;
 
-	if (bp->blk_birth == 0) {
+	switch (resume_skip_check(td, dnp, zb)) {
+	case RESUME_SKIP_ALL:
+		return (0);
+	case RESUME_SKIP_CHILDREN:
+		goto post;
+	case RESUME_SKIP_NONE:
+		break;
+	default:
+		ASSERT(0);
+	}
+
+	if (BP_IS_HOLE(bp)) {
 		err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
 		    td->td_arg);
 		return (err);
@@ -164,8 +226,10 @@
 		    td->td_arg);
 		if (err == TRAVERSE_VISIT_NO_CHILDREN)
 			return (0);
-		if (err)
-			return (err);
+		if (err == ERESTART)
+			pause = B_TRUE; /* handle pausing at a common point */
+		if (err != 0)
+			goto post;
 	}
 
 	if (BP_GET_LEVEL(bp) > 0) {
@@ -253,9 +317,18 @@
 	if (buf)
 		(void) arc_buf_remove_ref(buf, &buf);
 
+post:
 	if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
 		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
 		    td->td_arg);
+		if (err == ERESTART)
+			pause = B_TRUE;
+	}
+
+	if (pause && td->td_resume != NULL) {
+		ASSERT3U(err, ==, ERESTART);
+		ASSERT(!hard);
+		traverse_pause(td, zb);
 	}
 
 	return (err != 0 ? err : lasterr);
@@ -353,18 +426,23 @@
  * in syncing context).
  */
 static int
-traverse_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *rootbp,
-    uint64_t txg_start, int flags, blkptr_cb_t func, void *arg)
+traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
+    uint64_t txg_start, zbookmark_t *resume, int flags,
+    blkptr_cb_t func, void *arg)
 {
 	traverse_data_t td;
 	prefetch_data_t pd = { 0 };
 	zbookmark_t czb;
 	int err;
 
+	ASSERT(ds == NULL || objset == ds->ds_object);
+	ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
+
 	td.td_spa = spa;
-	td.td_objset = ds ? ds->ds_object : 0;
+	td.td_objset = objset;
 	td.td_rootbp = rootbp;
 	td.td_min_txg = txg_start;
+	td.td_resume = resume;
 	td.td_func = func;
 	td.td_arg = arg;
 	td.td_pfd = &pd;
@@ -416,8 +494,17 @@
 traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
     blkptr_cb_t func, void *arg)
 {
-	return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds,
-	    &ds->ds_phys->ds_bp, txg_start, flags, func, arg));
+	return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
+	    &ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg));
+}
+
+int
+traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
+    uint64_t txg_start, zbookmark_t *resume, int flags,
+    blkptr_cb_t func, void *arg)
+{
+	return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET,
+	    blkptr, txg_start, resume, flags, func, arg));
 }
 
 /*
@@ -434,8 +521,8 @@
 	boolean_t hard = (flags & TRAVERSE_HARD);
 
 	/* visit the MOS */
-	err = traverse_impl(spa, NULL, spa_get_rootblkptr(spa),
-	    txg_start, flags, func, arg);
+	err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
+	    txg_start, NULL, flags, func, arg);
 	if (err)
 		return (err);
 
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c	Mon May 21 12:11:39 2012 -0700
@@ -20,9 +20,8 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-/*
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -676,7 +675,7 @@
 		return;
 	}
 
-	ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap);
+	ASSERT3P(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP);
 
 	if (dn->dn_maxblkid == 0 && !add) {
 		blkptr_t *bp;
--- a/usr/src/uts/common/fs/zfs/dnode.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dnode.c	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -193,7 +194,7 @@
 	ASSERT(dn->dn_objset);
 	ASSERT(dn->dn_handle->dnh_dnode == dn);
 
-	ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
+	ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
 
 	if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY))
 		return;
@@ -212,7 +213,7 @@
 			ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
 		}
 		ASSERT3U(dn->dn_nlevels, <=, 30);
-		ASSERT3U(dn->dn_type, <=, DMU_OT_NUMTYPES);
+		ASSERT(DMU_OT_IS_VALID(dn->dn_type));
 		ASSERT3U(dn->dn_nblkptr, >=, 1);
 		ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
 		ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
@@ -278,8 +279,10 @@
 		 */
 		int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
 		size_t len = DN_MAX_BONUSLEN - off;
-		ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES);
-		dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len);
+		ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
+		dmu_object_byteswap_t byteswap =
+		    DMU_OT_BYTESWAP(dnp->dn_bonustype);
+		dmu_ot_byteswap[byteswap].ob_func(dnp->dn_bonus + off, len);
 	}
 
 	/* Swap SPILL block if we have one */
@@ -407,7 +410,7 @@
 
 	dmu_zfetch_init(&dn->dn_zfetch, dn);
 
-	ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
+	ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
 
 	mutex_enter(&os->os_lock);
 	list_insert_head(&os->os_dnodes, dn);
@@ -496,11 +499,11 @@
 	ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
 	ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE);
 	ASSERT(ot != DMU_OT_NONE);
-	ASSERT3U(ot, <, DMU_OT_NUMTYPES);
+	ASSERT(DMU_OT_IS_VALID(ot));
 	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
 	    (bonustype == DMU_OT_SA && bonuslen == 0) ||
 	    (bonustype != DMU_OT_NONE && bonuslen != 0));
-	ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
+	ASSERT(DMU_OT_IS_VALID(bonustype));
 	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
 	ASSERT(dn->dn_type == DMU_OT_NONE);
 	ASSERT3U(dn->dn_maxblkid, ==, 0);
@@ -568,7 +571,7 @@
 	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
 	    (bonustype != DMU_OT_NONE && bonuslen != 0) ||
 	    (bonustype == DMU_OT_SA && bonuslen == 0));
-	ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
+	ASSERT(DMU_OT_IS_VALID(bonustype));
 	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
 
 	/* clean up any unreferenced dbufs */
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c	Mon May 21 12:11:39 2012 -0700
@@ -18,8 +18,10 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -594,7 +596,7 @@
 	}
 
 	if (dn->dn_next_bonustype[txgoff]) {
-		ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
+		ASSERT(DMU_OT_IS_VALID(dn->dn_next_bonustype[txgoff]));
 		dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
 		dn->dn_next_bonustype[txgoff] = 0;
 	}
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Mon May 21 12:11:39 2012 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
@@ -35,6 +35,7 @@
 #include <sys/arc.h>
 #include <sys/zio.h>
 #include <sys/zap.h>
+#include <sys/zfeature.h>
 #include <sys/unique.h>
 #include <sys/zfs_context.h>
 #include <sys/zfs_ioctl.h>
@@ -100,7 +101,7 @@
 	if (BP_IS_HOLE(bp))
 		return;
 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
-	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
+	ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
 	if (ds == NULL) {
 		/*
 		 * Account for the meta-objset space in its placeholder
@@ -117,7 +118,7 @@
 	mutex_enter(&ds->ds_dir->dd_lock);
 	mutex_enter(&ds->ds_lock);
 	delta = parent_delta(ds, used);
-	ds->ds_phys->ds_used_bytes += used;
+	ds->ds_phys->ds_referenced_bytes += used;
 	ds->ds_phys->ds_compressed_bytes += compressed;
 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
 	ds->ds_phys->ds_unique_bytes += used;
@@ -211,8 +212,8 @@
 		}
 	}
 	mutex_enter(&ds->ds_lock);
-	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
-	ds->ds_phys->ds_used_bytes -= used;
+	ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
+	ds->ds_phys->ds_referenced_bytes -= used;
 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
 	ds->ds_phys->ds_compressed_bytes -= compressed;
 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
@@ -818,8 +819,8 @@
 		dsphys->ds_prev_snap_obj = origin->ds_object;
 		dsphys->ds_prev_snap_txg =
 		    origin->ds_phys->ds_creation_txg;
-		dsphys->ds_used_bytes =
-		    origin->ds_phys->ds_used_bytes;
+		dsphys->ds_referenced_bytes =
+		    origin->ds_phys->ds_referenced_bytes;
 		dsphys->ds_compressed_bytes =
 		    origin->ds_phys->ds_compressed_bytes;
 		dsphys->ds_uncompressed_bytes =
@@ -933,7 +934,6 @@
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		dsl_dataset_t *ds;
-		int err;
 
 		err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
 		if (err == 0) {
@@ -1082,19 +1082,23 @@
 		goto out;
 
 	/*
-	 * remove the objects in open context, so that we won't
-	 * have too much to do in syncing context.
+	 * If async destruction is not enabled try to remove all objects
+	 * while in the open context so that there is less work to do in
+	 * the syncing context.
 	 */
-	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
-	    ds->ds_phys->ds_prev_snap_txg)) {
-		/*
-		 * Ignore errors, if there is not enough disk space
-		 * we will deal with it in dsl_dataset_destroy_sync().
-		 */
-		(void) dmu_free_object(os, obj);
+	if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
+	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+		for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
+		    ds->ds_phys->ds_prev_snap_txg)) {
+			/*
+			 * Ignore errors, if there is not enough disk space
+			 * we will deal with it in dsl_dataset_destroy_sync().
+			 */
+			(void) dmu_free_object(os, obj);
+		}
+		if (err != ESRCH)
+			goto out;
 	}
-	if (err != ESRCH)
-		goto out;
 
 	/*
 	 * Only the ZIL knows how to free log blocks.
@@ -1240,7 +1244,7 @@
 	ASSERT(!dsl_dataset_is_snapshot(ds));
 
 	if (ds->ds_phys->ds_prev_snap_obj != 0)
-		mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
+		mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
 	else
 		mrs_used = 0;
 
@@ -1248,7 +1252,7 @@
 
 	ASSERT3U(dlused, <=, mrs_used);
 	ds->ds_phys->ds_unique_bytes =
-	    ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
+	    ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
 	    SPA_VERSION_UNIQUE_ACCURATE)
@@ -1606,6 +1610,30 @@
 	    ds_next->ds_phys->ds_deadlist_obj);
 }
 
+static int
+old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	int err;
+	struct killarg ka;
+
+	/*
+	 * Free everything that we point to (that's born after
+	 * the previous snapshot, if we are a clone)
+	 *
+	 * NB: this should be very quick, because we already
+	 * freed all the objects in open context.
+	 */
+	ka.ds = ds;
+	ka.tx = tx;
+	err = traverse_dataset(ds,
+	    ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
+	    kill_blkptr, &ka);
+	ASSERT3U(err, ==, 0);
+	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
+
+	return (err);
+}
+
 void
 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
 {
@@ -1752,7 +1780,6 @@
 			    tx);
 			dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
 			    DD_USED_HEAD, used, comp, uncomp, tx);
-			dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx);
 
 			/* Merge our deadlist into next's and free it. */
 			dsl_deadlist_merge(&ds_next->ds_deadlist,
@@ -1828,32 +1855,54 @@
 		}
 		dsl_dataset_rele(ds_next, FTAG);
 	} else {
+		zfeature_info_t *async_destroy =
+		    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
+
 		/*
 		 * There's no next snapshot, so this is a head dataset.
 		 * Destroy the deadlist.  Unless it's a clone, the
 		 * deadlist should be empty.  (If it's a clone, it's
 		 * safe to ignore the deadlist contents.)
 		 */
-		struct killarg ka;
-
 		dsl_deadlist_close(&ds->ds_deadlist);
 		dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
 		ds->ds_phys->ds_deadlist_obj = 0;
 
-		/*
-		 * Free everything that we point to (that's born after
-		 * the previous snapshot, if we are a clone)
-		 *
-		 * NB: this should be very quick, because we already
-		 * freed all the objects in open context.
-		 */
-		ka.ds = ds;
-		ka.tx = tx;
-		err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
-		    TRAVERSE_POST, kill_blkptr, &ka);
-		ASSERT3U(err, ==, 0);
-		ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
-		    ds->ds_phys->ds_unique_bytes == 0);
+		if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
+			err = old_synchronous_dataset_destroy(ds, tx);
+		} else {
+			/*
+			 * Move the bptree into the pool's list of trees to
+			 * clean up and update space accounting information.
+			 */
+			uint64_t used, comp, uncomp;
+
+			ASSERT(err == 0 || err == EBUSY);
+			if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
+				spa_feature_incr(dp->dp_spa, async_destroy, tx);
+				dp->dp_bptree_obj = bptree_alloc(
+				    dp->dp_meta_objset, tx);
+				VERIFY(zap_add(dp->dp_meta_objset,
+				    DMU_POOL_DIRECTORY_OBJECT,
+				    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
+				    &dp->dp_bptree_obj, tx) == 0);
+			}
+
+			used = ds->ds_dir->dd_phys->dd_used_bytes;
+			comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
+			uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
+
+			ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
+			    ds->ds_phys->ds_unique_bytes == used);
+
+			bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
+			    &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
+			    used, comp, uncomp, tx);
+			dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
+			    -used, -comp, -uncomp, tx);
+			dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
+			    used, comp, uncomp, tx);
+		}
 
 		if (ds->ds_prev != NULL) {
 			if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
@@ -2044,7 +2093,7 @@
 	dsphys->ds_creation_time = gethrestime_sec();
 	dsphys->ds_creation_txg = crtxg;
 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
-	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
+	dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
 	dsphys->ds_flags = ds->ds_phys->ds_flags;
@@ -2168,10 +2217,22 @@
 	    zap_cursor_advance(&zc)) {
 		dsl_dataset_t *clone;
 		char buf[ZFS_MAXNAMELEN];
+		/*
+		 * Even though we hold the dp_config_rwlock, the dataset
+		 * may fail to open, returning ENOENT.  If there is a
+		 * thread concurrently attempting to destroy this
+		 * dataset, it will have the ds_rwlock held for
+		 * RW_WRITER.  Our call to dsl_dataset_hold_obj() ->
+		 * dsl_dataset_hold_ref() will fail its
+		 * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
+		 * dp_config_rwlock, and wait for the destroy progress
+		 * and signal ds_exclusive_cv.  If the destroy was
+		 * successful, we will see that
+		 * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
+		 */
 		if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
-		    za.za_first_integer, FTAG, &clone) != 0) {
-			goto fail;
-		}
+		    za.za_first_integer, FTAG, &clone) != 0)
+			continue;
 		dsl_dir_name(clone->ds_dir, buf);
 		VERIFY(nvlist_add_boolean(val, buf) == 0);
 		dsl_dataset_rele(clone, FTAG);
@@ -2294,7 +2355,7 @@
     uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp)
 {
-	*refdbytesp = ds->ds_phys->ds_used_bytes;
+	*refdbytesp = ds->ds_phys->ds_referenced_bytes;
 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
 		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
@@ -2631,7 +2692,7 @@
 	 * Note however, if we stop before we reach the ORIGIN we get:
 	 * uN + kN + kN-1 + ... + kM - uM-1
 	 */
-	pa->used = origin_ds->ds_phys->ds_used_bytes;
+	pa->used = origin_ds->ds_phys->ds_referenced_bytes;
 	pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
 	pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
 	for (snap = list_head(&pa->shared_snaps); snap;
@@ -2665,7 +2726,7 @@
 	 * so we need to subtract out the clone origin's used space.
 	 */
 	if (pa->origin_origin) {
-		pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
+		pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
 		pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
 		pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
 	}
@@ -3181,8 +3242,8 @@
 		dsl_deadlist_space(&csa->ohds->ds_deadlist,
 		    &odl_used, &odl_comp, &odl_uncomp);
 
-		dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
-		    (csa->ohds->ds_phys->ds_used_bytes + odl_used);
+		dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
+		    (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
 		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
 		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
 		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
@@ -3211,8 +3272,8 @@
 	}
 
 	/* swap ds_*_bytes */
-	SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
-	    csa->cds->ds_phys->ds_used_bytes);
+	SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
+	    csa->cds->ds_phys->ds_referenced_bytes);
 	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
 	    csa->cds->ds_phys->ds_compressed_bytes);
 	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
@@ -3341,8 +3402,9 @@
 	 * on-disk is over quota and there are no pending changes (which
 	 * may free up space for us).
 	 */
-	if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
-		if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
+	if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
+		if (inflight > 0 ||
+		    ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
 			error = ERESTART;
 		else
 			error = EDQUOT;
@@ -3369,7 +3431,7 @@
 	if (psa->psa_effective_value == 0)
 		return (0);
 
-	if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes ||
+	if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
 	    psa->psa_effective_value < ds->ds_reserved)
 		return (ENOSPC);
 
@@ -4123,8 +4185,8 @@
 	dsl_pool_t *dp = new->ds_dir->dd_pool;
 
 	*usedp = 0;
-	*usedp += new->ds_phys->ds_used_bytes;
-	*usedp -= oldsnap->ds_phys->ds_used_bytes;
+	*usedp += new->ds_phys->ds_referenced_bytes;
+	*usedp -= oldsnap->ds_phys->ds_referenced_bytes;
 
 	*compp = 0;
 	*compp += new->ds_phys->ds_compressed_bytes;
@@ -4140,9 +4202,13 @@
 		dsl_dataset_t *snap;
 		uint64_t used, comp, uncomp;
 
-		err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
-		if (err != 0)
-			break;
+		if (snapobj == new->ds_object) {
+			snap = new;
+		} else {
+			err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
+			if (err != 0)
+				break;
+		}
 
 		if (snap->ds_phys->ds_prev_snap_txg ==
 		    oldsnap->ds_phys->ds_creation_txg) {
@@ -4171,7 +4237,8 @@
 		 * was not a snapshot of/before new.
 		 */
 		snapobj = snap->ds_phys->ds_prev_snap_obj;
-		dsl_dataset_rele(snap, FTAG);
+		if (snap != new)
+			dsl_dataset_rele(snap, FTAG);
 		if (snapobj == 0) {
 			err = EINVAL;
 			break;
--- a/usr/src/uts/common/fs/zfs/dsl_deleg.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c	Mon May 21 12:11:39 2012 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
@@ -171,10 +171,8 @@
 		VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
 
 		if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
-			jumpobj = zap_create(mos, DMU_OT_DSL_PERMS,
-			    DMU_OT_NONE, 0, tx);
-			VERIFY(zap_update(mos, zapobj,
-			    whokey, 8, 1, &jumpobj, tx) == 0);
+			jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
+			    zapobj, whokey, tx);
 		}
 
 		while (permpair = nvlist_next_nvpair(perms, permpair)) {
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c	Mon May 21 12:11:39 2012 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/dsl_pool.h>
@@ -40,6 +40,8 @@
 #include <sys/zfs_znode.h>
 #include <sys/spa_impl.h>
 #include <sys/dsl_deadlist.h>
+#include <sys/bptree.h>
+#include <sys/zfeature.h>
 
 int zfs_no_write_throttle = 0;
 int zfs_write_limit_shift = 3;			/* 1/8th of physical memory */
@@ -100,20 +102,32 @@
 }
 
 int
-dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
+dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
 {
 	int err;
 	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
+
+	err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
+	    &dp->dp_meta_objset);
+	if (err != 0)
+		dsl_pool_close(dp);
+	else
+		*dpp = dp;
+
+	return (err);
+}
+
+int
+dsl_pool_open(dsl_pool_t *dp)
+{
+	int err;
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	uint64_t obj;
 
+	ASSERT(!dmu_objset_is_dirty_anywhere(dp->dp_meta_objset));
+
 	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
-	err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
-	    &dp->dp_meta_objset);
-	if (err)
-		goto out;
-
 	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
 	    &dp->dp_root_dir_obj);
@@ -129,7 +143,7 @@
 	if (err)
 		goto out;
 
-	if (spa_version(spa) >= SPA_VERSION_ORIGIN) {
+	if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) {
 		err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd);
 		if (err)
 			goto out;
@@ -146,7 +160,7 @@
 			goto out;
 	}
 
-	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
+	if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
 		err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
 		    &dp->dp_free_dir);
 		if (err)
@@ -160,6 +174,15 @@
 		    dp->dp_meta_objset, obj));
 	}
 
+	if (spa_feature_is_active(dp->dp_spa,
+	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+		    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
+		    &dp->dp_bptree_obj);
+		if (err != 0)
+			goto out;
+	}
+
 	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
 	    &dp->dp_tmp_userrefs_obj);
@@ -168,15 +191,10 @@
 	if (err)
 		goto out;
 
-	err = dsl_scan_init(dp, txg);
+	err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
 
 out:
 	rw_exit(&dp->dp_config_rwlock);
-	if (err)
-		dsl_pool_close(dp);
-	else
-		*dpp = dp;
-
 	return (err);
 }
 
@@ -470,7 +488,7 @@
 dsl_pool_sync_context(dsl_pool_t *dp)
 {
 	return (curthread == dp->dp_tx.tx_sync_thread ||
-	    spa_get_dsl(dp->dp_spa) == NULL);
+	    spa_is_initializing(dp->dp_spa));
 }
 
 uint64_t
@@ -788,11 +806,8 @@
 	ASSERT(dp->dp_tmp_userrefs_obj == 0);
 	ASSERT(dmu_tx_is_syncing(tx));
 
-	dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS,
-	    DMU_OT_NONE, 0, tx);
-
-	VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS,
-	    sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0);
+	dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS,
+	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx);
 }
 
 static int
--- a/usr/src/uts/common/fs/zfs/dsl_scan.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/dsl_scan.c	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/dsl_scan.h>
@@ -44,6 +45,7 @@
 #include <sys/ddt.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
+#include <sys/zfeature.h>
 #ifdef _KERNEL
 #include <sys/zfs_vfsops.h>
 #endif
@@ -382,55 +384,6 @@
 	    priority, zio_flags, arc_flags, zb));
 }
 
-static boolean_t
-bookmark_is_zero(const zbookmark_t *zb)
-{
-	return (zb->zb_objset == 0 && zb->zb_object == 0 &&
-	    zb->zb_level == 0 && zb->zb_blkid == 0);
-}
-
-/* dnp is the dnode for zb1->zb_object */
-static boolean_t
-bookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
-    const zbookmark_t *zb2)
-{
-	uint64_t zb1nextL0, zb2thisobj;
-
-	ASSERT(zb1->zb_objset == zb2->zb_objset);
-	ASSERT(zb2->zb_level == 0);
-
-	/*
-	 * A bookmark in the deadlist is considered to be after
-	 * everything else.
-	 */
-	if (zb2->zb_object == DMU_DEADLIST_OBJECT)
-		return (B_TRUE);
-
-	/* The objset_phys_t isn't before anything. */
-	if (dnp == NULL)
-		return (B_FALSE);
-
-	zb1nextL0 = (zb1->zb_blkid + 1) <<
-	    ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
-
-	zb2thisobj = zb2->zb_object ? zb2->zb_object :
-	    zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
-
-	if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
-		uint64_t nextobj = zb1nextL0 *
-		    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
-		return (nextobj <= zb2thisobj);
-	}
-
-	if (zb1->zb_object < zb2thisobj)
-		return (B_TRUE);
-	if (zb1->zb_object > zb2thisobj)
-		return (B_FALSE);
-	if (zb2->zb_object == DMU_META_DNODE_OBJECT)
-		return (B_FALSE);
-	return (zb1nextL0 <= zb2->zb_blkid);
-}
-
 static uint64_t
 dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
 {
@@ -462,7 +415,7 @@
 	if (scn->scn_pausing)
 		return (B_TRUE); /* we're already pausing */
 
-	if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark))
+	if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
 		return (B_FALSE); /* we're resuming */
 
 	/* We only know how to resume from level-0 blocks. */
@@ -617,13 +570,13 @@
 	/*
 	 * We never skip over user/group accounting objects (obj<0)
 	 */
-	if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark) &&
+	if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark) &&
 	    (int64_t)zb->zb_object >= 0) {
 		/*
 		 * If we already visited this bp & everything below (in
 		 * a prior txg sync), don't bother doing it again.
 		 */
-		if (bookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
+		if (zbookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
 			return (B_TRUE);
 
 		/*
@@ -816,22 +769,6 @@
 	if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
 		return;
 
-	if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) {
-		/*
-		 * For non-user-accounting blocks, we need to read the
-		 * new bp (from a deleted snapshot, found in
-		 * check_existing_xlation).  If we used the old bp,
-		 * pointers inside this block from before we resumed
-		 * would be untranslated.
-		 *
-		 * For user-accounting blocks, we need to read the old
-		 * bp, because we will apply the entire space delta to
-		 * it (original untranslated -> translations from
-		 * deleted snap -> now).
-		 */
-		bp_toread = *bp;
-	}
-
 	if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx,
 	    &buf) != 0)
 		return;
@@ -1396,19 +1333,28 @@
 	zap_cursor_fini(&zc);
 }
 
-static int
-dsl_scan_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+static boolean_t
+dsl_scan_free_should_pause(dsl_scan_t *scn)
 {
-	dsl_scan_t *scn = arg;
 	uint64_t elapsed_nanosecs;
 
 	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
-
-	if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
+	return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
 	    (elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms &&
 	    txg_sync_waiting(scn->scn_dp)) ||
-	    spa_shutting_down(scn->scn_dp->dp_spa))
-		return (ERESTART);
+	    spa_shutting_down(scn->scn_dp->dp_spa));
+}
+
+static int
+dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+	dsl_scan_t *scn = arg;
+
+	if (!scn->scn_is_bptree ||
+	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
+		if (dsl_scan_free_should_pause(scn))
+			return (ERESTART);
+	}
 
 	zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
 	    dmu_tx_get_txg(tx), bp, 0));
@@ -1433,6 +1379,10 @@
 	if (scn->scn_phys.scn_state == DSS_SCANNING)
 		return (B_TRUE);
 
+	if (spa_feature_is_active(spa,
+	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+		return (B_TRUE);
+	}
 	if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
 		(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
 		    &used, &comp, &uncomp);
@@ -1479,14 +1429,40 @@
 	 * traversing it.
 	 */
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
+		scn->scn_is_bptree = B_FALSE;
 		scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
 		    NULL, ZIO_FLAG_MUSTSUCCEED);
 		err = bpobj_iterate(&dp->dp_free_bpobj,
-		    dsl_scan_free_cb, scn, tx);
+		    dsl_scan_free_block_cb, scn, tx);
 		VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
+
+		if (err == 0 && spa_feature_is_active(spa,
+		    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+			scn->scn_is_bptree = B_TRUE;
+			scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
+			    NULL, ZIO_FLAG_MUSTSUCCEED);
+			err = bptree_iterate(dp->dp_meta_objset,
+			    dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
+			    scn, tx);
+			VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
+			if (err != 0)
+				return;
+
+			/* disable async destroy feature */
+			spa_feature_decr(spa,
+			    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
+			ASSERT(!spa_feature_is_active(spa,
+			    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
+			VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
+			    DMU_POOL_DIRECTORY_OBJECT,
+			    DMU_POOL_BPTREE_OBJ, tx));
+			VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
+			    dp->dp_bptree_obj, tx));
+			dp->dp_bptree_obj = 0;
+		}
 		if (scn->scn_visited_this_txg) {
 			zfs_dbgmsg("freed %llu blocks in %llums from "
-			    "free_bpobj txg %llu",
+			    "free_bpobj/bptree txg %llu",
 			    (longlong_t)scn->scn_visited_this_txg,
 			    (longlong_t)
 			    (gethrtime() - scn->scn_sync_start_time) / MICROSEC,
@@ -1601,6 +1577,8 @@
 	for (i = 0; i < 4; i++) {
 		int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
 		int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
+		if (t & DMU_OT_NEWTYPE)
+			t = DMU_OT_OTHER;
 		zfs_blkstat_t *zb = &zab->zab_type[l][t];
 		int equal;
 
--- a/usr/src/uts/common/fs/zfs/sa.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sa.c	Mon May 21 12:11:39 2012 -0700
@@ -18,9 +18,11 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 iXsystems, Inc
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -427,10 +429,9 @@
 		char attr_name[8];
 
 		if (sa->sa_layout_attr_obj == 0) {
-			sa->sa_layout_attr_obj = zap_create(os,
-			    DMU_OT_SA_ATTR_LAYOUTS, DMU_OT_NONE, 0, tx);
-			VERIFY(zap_add(os, sa->sa_master_obj, SA_LAYOUTS, 8, 1,
-			    &sa->sa_layout_attr_obj, tx) == 0);
+			sa->sa_layout_attr_obj = zap_create_link(os,
+			    DMU_OT_SA_ATTR_LAYOUTS,
+			    sa->sa_master_obj, SA_LAYOUTS, tx);
 		}
 
 		(void) snprintf(attr_name, sizeof (attr_name),
@@ -1552,10 +1553,9 @@
 	}
 
 	if (sa->sa_reg_attr_obj == NULL) {
-		sa->sa_reg_attr_obj = zap_create(hdl->sa_os,
-		    DMU_OT_SA_ATTR_REGISTRATION, DMU_OT_NONE, 0, tx);
-		VERIFY(zap_add(hdl->sa_os, sa->sa_master_obj,
-		    SA_REGISTRY, 8, 1, &sa->sa_reg_attr_obj, tx) == 0);
+		sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os,
+		    DMU_OT_SA_ATTR_REGISTRATION,
+		    sa->sa_master_obj, SA_REGISTRY, tx);
 	}
 	for (i = 0; i != sa->sa_num_attrs; i++) {
 		if (sa->sa_attr_table[i].sa_registered)
--- a/usr/src/uts/common/fs/zfs/spa.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/spa.c	Mon May 21 12:11:39 2012 -0700
@@ -62,6 +62,7 @@
 #include <sys/spa_boot.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/dsl_scan.h>
+#include <sys/zfeature.h>
 
 #ifdef	_KERNEL
 #include <sys/bootprops.h>
@@ -113,6 +114,7 @@
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
 };
 
+static dsl_syncfunc_t spa_sync_version;
 static dsl_syncfunc_t spa_sync_props;
 static boolean_t spa_has_active_shared_spare(spa_t *spa);
 static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
@@ -168,6 +170,7 @@
 spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
+	dsl_pool_t *pool = spa->spa_dsl_pool;
 	uint64_t size;
 	uint64_t alloc;
 	uint64_t space;
@@ -214,6 +217,22 @@
 		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
 	}
 
+	if (pool != NULL) {
+		dsl_dir_t *freedir = pool->dp_free_dir;
+
+		/*
+		 * The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
+		 * when opening pools before this version freedir will be NULL.
+		 */
+		if (freedir != NULL) {
+			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
+			    freedir->dd_phys->dd_used_bytes, src);
+		} else {
+			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
+			    NULL, 0, src);
+		}
+	}
+
 	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
 
 	if (spa->spa_comment != NULL) {
@@ -353,25 +372,55 @@
 	nvpair_t *elem;
 	int error = 0, reset_bootfs = 0;
 	uint64_t objnum;
+	boolean_t has_feature = B_FALSE;
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
-		zpool_prop_t prop;
-		char *propname, *strval;
 		uint64_t intval;
-		objset_t *os;
-		char *slash, *check;
-
-		propname = nvpair_name(elem);
-
-		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
-			return (EINVAL);
+		char *strval, *slash, *check, *fname;
+		const char *propname = nvpair_name(elem);
+		zpool_prop_t prop = zpool_name_to_prop(propname);
 
 		switch (prop) {
+		case ZPROP_INVAL:
+			if (!zpool_prop_feature(propname)) {
+				error = EINVAL;
+				break;
+			}
+
+			/*
+			 * Sanitize the input.
+			 */
+			if (nvpair_type(elem) != DATA_TYPE_UINT64) {
+				error = EINVAL;
+				break;
+			}
+
+			if (nvpair_value_uint64(elem, &intval) != 0) {
+				error = EINVAL;
+				break;
+			}
+
+			if (intval != 0) {
+				error = EINVAL;
+				break;
+			}
+
+			fname = strchr(propname, '@') + 1;
+			if (zfeature_lookup_name(fname, NULL) != 0) {
+				error = EINVAL;
+				break;
+			}
+
+			has_feature = B_TRUE;
+			break;
+
 		case ZPOOL_PROP_VERSION:
 			error = nvpair_value_uint64(elem, &intval);
 			if (!error &&
-			    (intval < spa_version(spa) || intval > SPA_VERSION))
+			    (intval < spa_version(spa) ||
+			    intval > SPA_VERSION_BEFORE_FEATURES ||
+			    has_feature))
 				error = EINVAL;
 			break;
 
@@ -408,6 +457,7 @@
 			error = nvpair_value_string(elem, &strval);
 
 			if (!error) {
+				objset_t *os;
 				uint64_t compress;
 
 				if (strval == NULL || strval[0] == '\0') {
@@ -557,33 +607,58 @@
 spa_prop_set(spa_t *spa, nvlist_t *nvp)
 {
 	int error;
-	nvpair_t *elem;
+	nvpair_t *elem = NULL;
 	boolean_t need_sync = B_FALSE;
-	zpool_prop_t prop;
 
 	if ((error = spa_prop_validate(spa, nvp)) != 0)
 		return (error);
 
-	elem = NULL;
 	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
-		if ((prop = zpool_name_to_prop(
-		    nvpair_name(elem))) == ZPROP_INVAL)
-			return (EINVAL);
+		zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem));
 
 		if (prop == ZPOOL_PROP_CACHEFILE ||
 		    prop == ZPOOL_PROP_ALTROOT ||
 		    prop == ZPOOL_PROP_READONLY)
 			continue;
 
+		if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) {
+			uint64_t ver;
+
+			if (prop == ZPOOL_PROP_VERSION) {
+				VERIFY(nvpair_value_uint64(elem, &ver) == 0);
+			} else {
+				ASSERT(zpool_prop_feature(nvpair_name(elem)));
+				ver = SPA_VERSION_FEATURES;
+				need_sync = B_TRUE;
+			}
+
+			/* Save time if the version is already set. */
+			if (ver == spa_version(spa))
+				continue;
+
+			/*
+			 * In addition to the pool directory object, we might
+			 * create the pool properties object, the features for
+			 * read object, the features for write object, or the
+			 * feature descriptions object.
+			 */
+			error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
+			    spa_sync_version, spa, &ver, 6);
+			if (error)
+				return (error);
+			continue;
+		}
+
 		need_sync = B_TRUE;
 		break;
 	}
 
-	if (need_sync)
+	if (need_sync) {
 		return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
-		    spa, nvp, 3));
-	else
-		return (0);
+		    spa, nvp, 6));
+	}
+
+	return (0);
 }
 
 /*
@@ -1607,7 +1682,7 @@
 	int error = zio->io_error;
 
 	if (error) {
-		if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
+		if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
 		    type != DMU_OT_INTENT_LOG)
 			atomic_add_64(&sle->sle_meta_count, 1);
 		else
@@ -1837,6 +1912,9 @@
 			    KM_SLEEP) == 0);
 		}
 
+		nvlist_free(spa->spa_load_info);
+		spa->spa_load_info = fnvlist_alloc();
+
 		gethrestime(&spa->spa_loaded_ts);
 		error = spa_load_impl(spa, pool_guid, config, state, type,
 		    mosconfig, &ereport);
@@ -1869,12 +1947,14 @@
 {
 	int error = 0;
 	nvlist_t *nvroot = NULL;
+	nvlist_t *label;
 	vdev_t *rvd;
 	uberblock_t *ub = &spa->spa_uberblock;
 	uint64_t children, config_cache_txg = spa->spa_config_txg;
 	int orig_mode = spa->spa_mode;
 	int parse;
 	uint64_t obj;
+	boolean_t missing_feat_write = B_FALSE;
 
 	/*
 	 * If this is an untrusted config, access the pool in read-only mode.
@@ -1954,19 +2034,78 @@
 	/*
 	 * Find the best uberblock.
 	 */
-	vdev_uberblock_load(NULL, rvd, ub);
+	vdev_uberblock_load(rvd, ub, &label);
 
 	/*
 	 * If we weren't able to find a single valid uberblock, return failure.
 	 */
-	if (ub->ub_txg == 0)
+	if (ub->ub_txg == 0) {
+		nvlist_free(label);
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
+	}
 
 	/*
-	 * If the pool is newer than the code, we can't open it.
+	 * If the pool has an unsupported version we can't open it.
 	 */
-	if (ub->ub_version > SPA_VERSION)
+	if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) {
+		nvlist_free(label);
 		return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
+	}
+
+	if (ub->ub_version >= SPA_VERSION_FEATURES) {
+		nvlist_t *features;
+
+		/*
+		 * If we weren't able to find what's necessary for reading the
+		 * MOS in the label, return failure.
+		 */
+		if (label == NULL || nvlist_lookup_nvlist(label,
+		    ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) {
+			nvlist_free(label);
+			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
+			    ENXIO));
+		}
+
+		/*
+		 * Update our in-core representation with the definitive values
+		 * from the label.
+		 */
+		nvlist_free(spa->spa_label_features);
+		VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
+	}
+
+	nvlist_free(label);
+
+	/*
+	 * Look through entries in the label nvlist's features_for_read. If
+	 * there is a feature listed there which we don't understand then we
+	 * cannot open a pool.
+	 */
+	if (ub->ub_version >= SPA_VERSION_FEATURES) {
+		nvlist_t *unsup_feat;
+
+		VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
+		    0);
+
+		for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
+		    NULL); nvp != NULL;
+		    nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
+			if (!zfeature_is_supported(nvpair_name(nvp))) {
+				VERIFY(nvlist_add_string(unsup_feat,
+				    nvpair_name(nvp), "") == 0);
+			}
+		}
+
+		if (!nvlist_empty(unsup_feat)) {
+			VERIFY(nvlist_add_nvlist(spa->spa_load_info,
+			    ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
+			nvlist_free(unsup_feat);
+			return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
+			    ENOTSUP));
+		}
+
+		nvlist_free(unsup_feat);
+	}
 
 	/*
 	 * If the vdev guid sum doesn't match the uberblock, we have an
@@ -2000,7 +2139,7 @@
 	spa->spa_claim_max_txg = spa->spa_first_txg;
 	spa->spa_prev_software_version = ub->ub_software_version;
 
-	error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
+	error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
 	if (error)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
@@ -2008,6 +2147,84 @@
 	if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
+	if (spa_version(spa) >= SPA_VERSION_FEATURES) {
+		boolean_t missing_feat_read = B_FALSE;
+		nvlist_t *unsup_feat;
+
+		if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
+		    &spa->spa_feat_for_read_obj) != 0) {
+			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+		}
+
+		if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE,
+		    &spa->spa_feat_for_write_obj) != 0) {
+			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+		}
+
+		if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS,
+		    &spa->spa_feat_desc_obj) != 0) {
+			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+		}
+
+		VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
+		    0);
+
+		if (!feature_is_supported(spa->spa_meta_objset,
+		    spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj,
+		    unsup_feat))
+			missing_feat_read = B_TRUE;
+
+		if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
+			if (!feature_is_supported(spa->spa_meta_objset,
+			    spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj,
+			    unsup_feat))
+				missing_feat_write = B_TRUE;
+		}
+
+		if (!nvlist_empty(unsup_feat)) {
+			VERIFY(nvlist_add_nvlist(spa->spa_load_info,
+			    ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
+		}
+
+		nvlist_free(unsup_feat);
+
+		if (!missing_feat_read) {
+			fnvlist_add_boolean(spa->spa_load_info,
+			    ZPOOL_CONFIG_CAN_RDONLY);
+		}
+
+		/*
+		 * If the state is SPA_LOAD_TRYIMPORT, our objective is
+		 * twofold: to determine whether the pool is available for
+		 * import in read-write mode and (if it is not) whether the
+		 * pool is available for import in read-only mode. If the pool
+		 * is available for import in read-write mode, it is displayed
+		 * as available in userland; if it is not available for import
+		 * in read-only mode, it is displayed as unavailable in
+		 * userland. If the pool is available for import in read-only
+		 * mode but not read-write mode, it is displayed as unavailable
+		 * in userland with a special note that the pool is actually
+		 * available for open in read-only mode.
+		 *
+		 * As a result, if the state is SPA_LOAD_TRYIMPORT and we are
+		 * missing a feature for write, we must first determine whether
+		 * the pool can be opened read-only before returning to
+		 * userland in order to know whether to display the
+		 * abovementioned note.
+		 */
+		if (missing_feat_read || (missing_feat_write &&
+		    spa_writeable(spa))) {
+			return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
+			    ENOTSUP));
+		}
+	}
+
+	spa->spa_is_initializing = B_TRUE;
+	error = dsl_pool_open(spa->spa_dsl_pool);
+	spa->spa_is_initializing = B_FALSE;
+	if (error != 0)
+		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
 	if (!mosconfig) {
 		uint64_t hostid;
 		nvlist_t *policy = NULL, *nvconfig;
@@ -2225,7 +2442,7 @@
 		nvlist_free(nvconfig);
 
 		/*
-		 * Now that we've validate the config, check the state of the
+		 * Now that we've validated the config, check the state of the
 		 * root vdev.  If it can't be opened, it indicates one or
 		 * more toplevel vdevs are faulted.
 		 */
@@ -2238,6 +2455,17 @@
 		}
 	}
 
+	if (missing_feat_write) {
+		ASSERT(state == SPA_LOAD_TRYIMPORT);
+
+		/*
+		 * At this point, we know that we can open the pool in
+		 * read-only mode but not read-write mode. We now have enough
+		 * information and can return to userland.
+		 */
+		return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
+	}
+
 	/*
 	 * We've successfully opened the pool, verify that we're ready
 	 * to start pushing transactions.
@@ -2347,10 +2575,18 @@
 	return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
 }
 
+/*
+ * If spa_load() fails this function will try loading prior txg's. If
+ * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool
+ * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this
+ * function will not rewind the pool and will return the same error as
+ * spa_load().
+ */
 static int
 spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
     uint64_t max_request, int rewind_flags)
 {
+	nvlist_t *loadinfo = NULL;
 	nvlist_t *config = NULL;
 	int load_error, rewind_error;
 	uint64_t safe_rewind_txg;
@@ -2379,9 +2615,18 @@
 		return (load_error);
 	}
 
-	/* Price of rolling back is discarding txgs, including log */
-	if (state == SPA_LOAD_RECOVER)
+	if (state == SPA_LOAD_RECOVER) {
+		/* Price of rolling back is discarding txgs, including log */
 		spa_set_log_state(spa, SPA_LOG_CLEAR);
+	} else {
+		/*
+		 * If we aren't rolling back save the load info from our first
+		 * import attempt so that we can restore it after attempting
+		 * to rewind.
+		 */
+		loadinfo = spa->spa_load_info;
+		spa->spa_load_info = fnvlist_alloc();
+	}
 
 	spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
 	safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
@@ -2405,7 +2650,20 @@
 	if (config && (rewind_error || state != SPA_LOAD_RECOVER))
 		spa_config_set(spa, config);
 
-	return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
+	if (state == SPA_LOAD_RECOVER) {
+		ASSERT3P(loadinfo, ==, NULL);
+		return (rewind_error);
+	} else {
+		/* Store the rewind info as part of the initial load info */
+		fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO,
+		    spa->spa_load_info);
+
+		/* Restore the initial load info */
+		fnvlist_free(spa->spa_load_info);
+		spa->spa_load_info = loadinfo;
+
+		return (load_error);
+	}
 }
 
 /*
@@ -2675,8 +2933,50 @@
 	}
 }
 
+static void
+spa_add_feature_stats(spa_t *spa, nvlist_t *config)
+{
+	nvlist_t *features;
+	zap_cursor_t zc;
+	zap_attribute_t za;
+
+	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
+	VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+	if (spa->spa_feat_for_read_obj != 0) {
+		for (zap_cursor_init(&zc, spa->spa_meta_objset,
+		    spa->spa_feat_for_read_obj);
+		    zap_cursor_retrieve(&zc, &za) == 0;
+		    zap_cursor_advance(&zc)) {
+			ASSERT(za.za_integer_length == sizeof (uint64_t) &&
+			    za.za_num_integers == 1);
+			VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
+			    za.za_first_integer));
+		}
+		zap_cursor_fini(&zc);
+	}
+
+	if (spa->spa_feat_for_write_obj != 0) {
+		for (zap_cursor_init(&zc, spa->spa_meta_objset,
+		    spa->spa_feat_for_write_obj);
+		    zap_cursor_retrieve(&zc, &za) == 0;
+		    zap_cursor_advance(&zc)) {
+			ASSERT(za.za_integer_length == sizeof (uint64_t) &&
+			    za.za_num_integers == 1);
+			VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
+			    za.za_first_integer));
+		}
+		zap_cursor_fini(&zc);
+	}
+
+	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
+	    features) == 0);
+	nvlist_free(features);
+}
+
 int
-spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
+spa_get_stats(const char *name, nvlist_t **config,
+    char *altroot, size_t buflen)
 {
 	int error;
 	spa_t *spa;
@@ -2711,6 +3011,7 @@
 
 			spa_add_spares(spa, *config);
 			spa_add_l2cache(spa, *config);
+			spa_add_feature_stats(spa, *config);
 		}
 	}
 
@@ -2931,6 +3232,7 @@
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
 	uint64_t version, obj;
+	boolean_t has_features;
 
 	/*
 	 * If this pool already exists, return failure.
@@ -2956,10 +3258,18 @@
 		return (error);
 	}
 
-	if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
-	    &version) != 0)
+	has_features = B_FALSE;
+	for (nvpair_t *elem = nvlist_next_nvpair(props, NULL);
+	    elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
+		if (zpool_prop_feature(nvpair_name(elem)))
+			has_features = B_TRUE;
+	}
+
+	if (has_features || nvlist_lookup_uint64(props,
+	    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) {
 		version = SPA_VERSION;
-	ASSERT(version <= SPA_VERSION);
+	}
+	ASSERT(SPA_VERSION_IS_SUPPORTED(version));
 
 	spa->spa_first_txg = txg;
 	spa->spa_uberblock.ub_txg = txg - 1;
@@ -3035,8 +3345,10 @@
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
+	spa->spa_is_initializing = B_TRUE;
 	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
 	spa->spa_meta_objset = dp->dp_meta_objset;
+	spa->spa_is_initializing = B_FALSE;
 
 	/*
 	 * Create DDTs (dedup tables).
@@ -3060,6 +3372,9 @@
 		cmn_err(CE_PANIC, "failed to add pool config");
 	}
 
+	if (spa_version(spa) >= SPA_VERSION_FEATURES)
+		spa_feature_create_zap_objects(spa, tx);
+
 	if (zap_add(spa->spa_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
 	    sizeof (uint64_t), 1, &version, tx) != 0) {
@@ -3250,7 +3565,7 @@
 	}
 #endif
 	if (config == NULL) {
-		cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
+		cmn_err(CE_NOTE, "Cannot read the pool label from '%s'",
 		    devpath);
 		return (EIO);
 	}
@@ -3564,6 +3879,8 @@
 		    state) == 0);
 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
 		    spa->spa_uberblock.ub_timestamp) == 0);
+		VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
+		    spa->spa_load_info) == 0);
 
 		/*
 		 * If the bootfs property exists on this pool then we
@@ -5281,7 +5598,7 @@
 	 * information.  This avoids the dbuf_will_dirty() path and
 	 * saves us a pre-read to get data we don't actually care about.
 	 */
-	bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
+	bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
 	packed = kmem_alloc(bufsize, KM_SLEEP);
 
 	VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
@@ -5366,6 +5683,24 @@
 	spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
 }
 
+static void
+spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+	spa_t *spa = arg1;
+	uint64_t version = *(uint64_t *)arg2;
+
+	/*
+	 * Setting the version is special cased when first creating the pool.
+	 */
+	ASSERT(tx->tx_txg != TXG_INITIAL);
+
+	ASSERT(version <= SPA_VERSION);
+	ASSERT(version >= spa_version(spa));
+
+	spa->spa_uberblock.ub_version = version;
+	vdev_config_dirty(spa->spa_root_vdev);
+}
+
 /*
  * Set zpool properties.
  */
@@ -5375,32 +5710,38 @@
 	spa_t *spa = arg1;
 	objset_t *mos = spa->spa_meta_objset;
 	nvlist_t *nvp = arg2;
-	nvpair_t *elem;
-	uint64_t intval;
-	char *strval;
-	zpool_prop_t prop;
-	const char *propname;
-	zprop_type_t proptype;
+	nvpair_t *elem = NULL;
 
 	mutex_enter(&spa->spa_props_lock);
 
-	elem = NULL;
 	while ((elem = nvlist_next_nvpair(nvp, elem))) {
+		uint64_t intval;
+		char *strval, *fname;
+		zpool_prop_t prop;
+		const char *propname;
+		zprop_type_t proptype;
+		zfeature_info_t *feature;
+
 		switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
-		case ZPOOL_PROP_VERSION:
+		case ZPROP_INVAL:
 			/*
-			 * Only set version for non-zpool-creation cases
-			 * (set/import). spa_create() needs special care
-			 * for version setting.
+			 * We checked this earlier in spa_prop_validate().
 			 */
-			if (tx->tx_txg != TXG_INITIAL) {
-				VERIFY(nvpair_value_uint64(elem,
-				    &intval) == 0);
-				ASSERT(intval <= SPA_VERSION);
-				ASSERT(intval >= spa_version(spa));
-				spa->spa_uberblock.ub_version = intval;
-				vdev_config_dirty(spa->spa_root_vdev);
-			}
+			ASSERT(zpool_prop_feature(nvpair_name(elem)));
+
+			fname = strchr(nvpair_name(elem), '@') + 1;
+			VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
+
+			spa_feature_enable(spa, feature, tx);
+			break;
+
+		case ZPOOL_PROP_VERSION:
+			VERIFY(nvpair_value_uint64(elem, &intval) == 0);
+			/*
+			 * The version is synced seperatly before other
+			 * properties and should be correct by now.
+			 */
+			ASSERT3U(spa_version(spa), >=, intval);
 			break;
 
 		case ZPOOL_PROP_ALTROOT:
@@ -5437,14 +5778,10 @@
 			 * Set pool property values in the poolprops mos object.
 			 */
 			if (spa->spa_pool_props_object == 0) {
-				VERIFY((spa->spa_pool_props_object =
-				    zap_create(mos, DMU_OT_POOL_PROPS,
-				    DMU_OT_NONE, 0, tx)) > 0);
-
-				VERIFY(zap_update(mos,
+				spa->spa_pool_props_object =
+				    zap_create_link(mos, DMU_OT_POOL_PROPS,
 				    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
-				    8, 1, &spa->spa_pool_props_object, tx)
-				    == 0);
+				    tx);
 			}
 
 			/* normalize the property name */
@@ -5543,6 +5880,11 @@
 		/* Keeping the freedir open increases spa_minref */
 		spa->spa_minref += 3;
 	}
+
+	if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
+	    spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
+		spa_feature_create_zap_objects(spa, tx);
+	}
 }
 
 /*
--- a/usr/src/uts/common/fs/zfs/spa_config.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/spa_config.c	Mon May 21 12:11:39 2012 -0700
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/spa.h>
@@ -35,6 +35,7 @@
 #include <sys/utsname.h>
 #include <sys/systeminfo.h>
 #include <sys/sunddi.h>
+#include <sys/zfeature.h>
 #ifdef _KERNEL
 #include <sys/kobj.h>
 #include <sys/zone.h>
@@ -409,6 +410,12 @@
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
 	nvlist_free(nvroot);
 
+	/*
+	 * Store what's necessary for reading the MOS in the label.
+	 */
+	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
+	    spa->spa_label_features) == 0);
+
 	if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
 		ddt_histogram_t *ddh;
 		ddt_stat_t *dds;
--- a/usr/src/uts/common/fs/zfs/spa_misc.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c	Mon May 21 12:11:39 2012 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  */
 
@@ -48,6 +48,7 @@
 #include <sys/arc.h>
 #include <sys/ddt.h>
 #include "zfs_prop.h"
+#include "zfeature_common.h"
 
 /*
  * SPA locking
@@ -216,7 +217,7 @@
  * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
  * locking is, always, based on spa_namespace_lock and spa_config_lock[].
  *
- * spa_rename() is also implemented within this file since is requires
+ * spa_rename() is also implemented within this file since it requires
  * manipulation of the namespace.
  */
 
@@ -483,8 +484,22 @@
 	VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
 	    KM_SLEEP) == 0);
 
-	if (config != NULL)
+	if (config != NULL) {
+		nvlist_t *features;
+
+		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
+		    &features) == 0) {
+			VERIFY(nvlist_dup(features, &spa->spa_label_features,
+			    0) == 0);
+		}
+
 		VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
+	}
+
+	if (spa->spa_label_features == NULL) {
+		VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
+		    KM_SLEEP) == 0);
+	}
 
 	return (spa);
 }
@@ -521,6 +536,7 @@
 
 	list_destroy(&spa->spa_config_list);
 
+	nvlist_free(spa->spa_label_features);
 	nvlist_free(spa->spa_load_info);
 	spa_config_set(spa, NULL);
 
@@ -1029,6 +1045,20 @@
  * ==========================================================================
  */
 
+void
+spa_activate_mos_feature(spa_t *spa, const char *feature)
+{
+	(void) nvlist_add_boolean(spa->spa_label_features, feature);
+	vdev_config_dirty(spa->spa_root_vdev);
+}
+
+void
+spa_deactivate_mos_feature(spa_t *spa, const char *feature)
+{
+	(void) nvlist_remove_all(spa->spa_label_features, feature);
+	vdev_config_dirty(spa->spa_root_vdev);
+}
+
 /*
  * Rename a spa_t.
  */
@@ -1179,12 +1209,22 @@
 void
 sprintf_blkptr(char *buf, const blkptr_t *bp)
 {
-	char *type = NULL;
+	char type[256];
 	char *checksum = NULL;
 	char *compress = NULL;
 
 	if (bp != NULL) {
-		type = dmu_ot[BP_GET_TYPE(bp)].ot_name;
+		if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
+			dmu_object_byteswap_t bswap =
+			    DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
+			(void) snprintf(type, sizeof (type), "bswap %s %s",
+			    DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ?
+			    "metadata" : "data",
+			    dmu_ot_byteswap[bswap].ob_name);
+		} else {
+			(void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
+			    sizeof (type));
+		}
 		checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
 		compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
 	}
@@ -1266,6 +1306,12 @@
 	return (spa->spa_dsl_pool);
 }
 
+boolean_t
+spa_is_initializing(spa_t *spa)
+{
+	return (spa->spa_is_initializing);
+}
+
 blkptr_t *
 spa_get_rootblkptr(spa_t *spa)
 {
@@ -1549,6 +1595,7 @@
 	vdev_cache_stat_init();
 	zfs_prop_init();
 	zpool_prop_init();
+	zpool_feature_init();
 	spa_config_load();
 	l2arc_start();
 }
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Mon May 21 12:11:39 2012 -0700
@@ -18,11 +18,10 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
- */
-/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
@@ -75,6 +74,53 @@
 typedef struct dmu_tx dmu_tx_t;
 typedef struct dsl_dir dsl_dir_t;
 
+typedef enum dmu_object_byteswap {
+	DMU_BSWAP_UINT8,
+	DMU_BSWAP_UINT16,
+	DMU_BSWAP_UINT32,
+	DMU_BSWAP_UINT64,
+	DMU_BSWAP_ZAP,
+	DMU_BSWAP_DNODE,
+	DMU_BSWAP_OBJSET,
+	DMU_BSWAP_ZNODE,
+	DMU_BSWAP_OLDACL,
+	DMU_BSWAP_ACL,
+	/*
+	 * Allocating a new byteswap type number makes the on-disk format
+	 * incompatible with any other format that uses the same number.
+	 *
+	 * Data can usually be structured to work with one of the
+	 * DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types.
+	 */
+	DMU_BSWAP_NUMFUNCS
+} dmu_object_byteswap_t;
+
+#define	DMU_OT_NEWTYPE 0x80
+#define	DMU_OT_METADATA 0x40
+#define	DMU_OT_BYTESWAP_MASK 0x3f
+
+/*
+ * Defines a uint8_t object type. Object types specify if the data
+ * in the object is metadata (boolean) and how to byteswap the data
+ * (dmu_object_byteswap_t).
+ */
+#define	DMU_OT(byteswap, metadata) \
+	(DMU_OT_NEWTYPE | \
+	((metadata) ? DMU_OT_METADATA : 0) | \
+	((byteswap) & DMU_OT_BYTESWAP_MASK))
+
+#define	DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+	((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \
+	(ot) < DMU_OT_NUMTYPES)
+
+#define	DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+	((ot) & DMU_OT_METADATA) : \
+	dmu_ot[(ot)].ot_metadata)
+
+#define	DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+	((ot) & DMU_OT_BYTESWAP_MASK) : \
+	dmu_ot[(ot)].ot_byteswap)
+
 typedef enum dmu_object_type {
 	DMU_OT_NONE,
 	/* general: */
@@ -139,7 +185,35 @@
 	DMU_OT_DEADLIST_HDR,		/* UINT64 */
 	DMU_OT_DSL_CLONES,		/* ZAP */
 	DMU_OT_BPOBJ_SUBOBJ,		/* UINT64 */
-	DMU_OT_NUMTYPES
+	/*
+	 * Do not allocate new object types here. Doing so makes the on-disk
+	 * format incompatible with any other format that uses the same object
+	 * type number.
+	 *
+	 * When creating an object which does not have one of the above types
+	 * use the DMU_OTN_* type with the correct byteswap and metadata
+	 * values.
+	 *
+	 * The DMU_OTN_* types do not have entries in the dmu_ot table,
+	 * use the DMU_OT_IS_METDATA() and DMU_OT_BYTESWAP() macros instead
+	 * of indexing into dmu_ot directly (this works for both DMU_OT_* types
+	 * and DMU_OTN_* types).
+	 */
+	DMU_OT_NUMTYPES,
+
+	/*
+	 * Names for valid types declared with DMU_OT().
+	 */
+	DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
+	DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
+	DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
+	DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
+	DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
+	DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
+	DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
+	DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
+	DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
+	DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
 } dmu_object_type_t;
 
 typedef enum dmu_objset_type {
@@ -219,6 +293,9 @@
  */
 #define	DMU_POOL_DIRECTORY_OBJECT	1
 #define	DMU_POOL_CONFIG			"config"
+#define	DMU_POOL_FEATURES_FOR_WRITE	"features_for_write"
+#define	DMU_POOL_FEATURES_FOR_READ	"features_for_read"
+#define	DMU_POOL_FEATURE_DESCRIPTIONS	"feature_descriptions"
 #define	DMU_POOL_ROOT_DATASET		"root_dataset"
 #define	DMU_POOL_SYNC_BPOBJ		"sync_bplist"
 #define	DMU_POOL_ERRLOG_SCRUB		"errlog_scrub"
@@ -234,6 +311,7 @@
 #define	DMU_POOL_CREATION_VERSION	"creation_version"
 #define	DMU_POOL_SCAN			"scan"
 #define	DMU_POOL_FREE_BPOBJ		"free_bpobj"
+#define	DMU_POOL_BPTREE_OBJ		"bptree_obj"
 
 /*
  * Allocate an object from this objset.  The range of object numbers
@@ -494,7 +572,7 @@
 
 /*
  * Free up the data blocks for a defined range of a file.  If size is
- * zero, the range from offset to end-of-file is freed.
+ * -1, the range from offset to end-of-file is freed.
  */
 int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
 	uint64_t size, dmu_tx_t *tx);
@@ -564,12 +642,18 @@
 typedef void arc_byteswap_func_t(void *buf, size_t size);
 
 typedef struct dmu_object_type_info {
-	arc_byteswap_func_t	*ot_byteswap;
+	dmu_object_byteswap_t	ot_byteswap;
 	boolean_t		ot_metadata;
 	char			*ot_name;
 } dmu_object_type_info_t;
 
+typedef struct dmu_object_byteswap_info {
+	arc_byteswap_func_t	*ob_func;
+	char			*ob_name;
+} dmu_object_byteswap_info_t;
+
 extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
+extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
 
 /*
  * Get information on a DMU object.
--- a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DMU_TRAVERSE_H
@@ -54,6 +55,9 @@
 
 int traverse_dataset(struct dsl_dataset *ds,
     uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
+int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
+    uint64_t txg_start, zbookmark_t *resume, int flags,
+    blkptr_cb_t func, void *arg);
 int traverse_pool(spa_t *spa,
     uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
 
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Mon May 21 12:11:39 2012 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
@@ -86,7 +86,12 @@
 	uint64_t ds_creation_time;	/* seconds since 1970 */
 	uint64_t ds_creation_txg;
 	uint64_t ds_deadlist_obj;	/* DMU_OT_DEADLIST */
-	uint64_t ds_used_bytes;
+	/*
+	 * ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes
+	 * include all blocks referenced by this dataset, including those
+	 * shared with any other datasets.
+	 */
+	uint64_t ds_referenced_bytes;
 	uint64_t ds_compressed_bytes;
 	uint64_t ds_uncompressed_bytes;
 	uint64_t ds_unique_bytes;	/* only relevant to snapshots */
--- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_POOL_H
@@ -34,6 +35,7 @@
 #include <sys/ddt.h>
 #include <sys/arc.h>
 #include <sys/bpobj.h>
+#include <sys/bptree.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -48,7 +50,8 @@
 
 /* These macros are for indexing into the zfs_all_blkstats_t. */
 #define	DMU_OT_DEFERRED	DMU_OT_NONE
-#define	DMU_OT_TOTAL	DMU_OT_NUMTYPES
+#define	DMU_OT_OTHER	DMU_OT_NUMTYPES /* place holder for DMU_OT() types */
+#define	DMU_OT_TOTAL	(DMU_OT_NUMTYPES + 1)
 
 typedef struct zfs_blkstat {
 	uint64_t	zb_count;
@@ -85,6 +88,7 @@
 	uint64_t dp_write_limit;
 	uint64_t dp_tmp_userrefs_obj;
 	bpobj_t dp_free_bpobj;
+	uint64_t dp_bptree_obj;
 
 	struct dsl_scan *dp_scan;
 
@@ -110,7 +114,8 @@
 	zfs_all_blkstats_t *dp_blkstats;
 } dsl_pool_t;
 
-int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
+int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
+int dsl_pool_open(dsl_pool_t *dp);
 void dsl_pool_close(dsl_pool_t *dp);
 dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
 void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
--- a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_SCAN_H
@@ -79,6 +80,9 @@
 	uint64_t scn_sync_start_time;
 	zio_t *scn_zio_root;
 
+	/* for freeing blocks */
+	boolean_t scn_is_bptree;
+
 	/* for debugging / information */
 	uint64_t scn_visited_this_txg;
 
--- a/usr/src/uts/common/fs/zfs/sys/spa.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h	Mon May 21 12:11:39 2012 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  */
 
@@ -94,7 +94,7 @@
 /*
  * Size of block to hold the configuration data (a packed nvlist)
  */
-#define	SPA_CONFIG_BLOCKSIZE	(1 << 14)
+#define	SPA_CONFIG_BLOCKSIZE	(1ULL << 14)
 
 /*
  * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
@@ -262,7 +262,7 @@
 		DVA_GET_ASIZE(&(bp)->blk_dva[2]))
 
 #define	BP_GET_UCSIZE(bp) \
-	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
+	((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \
 	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
 
 #define	BP_GET_NDVAS(bp)	\
@@ -403,8 +403,8 @@
 #include <sys/dmu.h>
 
 #define	BP_GET_BUFC_TYPE(bp)						\
-	(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
-	ARC_BUFC_METADATA : ARC_BUFC_DATA);
+	(((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \
+	ARC_BUFC_METADATA : ARC_BUFC_DATA)
 
 typedef enum spa_import_type {
 	SPA_IMPORT_EXISTING,
@@ -415,8 +415,8 @@
 extern int spa_open(const char *pool, spa_t **, void *tag);
 extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
     nvlist_t *policy, nvlist_t **config);
-extern int spa_get_stats(const char *pool, nvlist_t **config,
-    char *altroot, size_t buflen);
+extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
+    size_t buflen);
 extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
     const char *history_str, nvlist_t *zplprops);
 extern int spa_import_rootpool(char *devpath, char *devid);
@@ -573,6 +573,7 @@
 /* Accessor functions */
 extern boolean_t spa_shutting_down(spa_t *spa);
 extern struct dsl_pool *spa_get_dsl(spa_t *spa);
+extern boolean_t spa_is_initializing(spa_t *spa);
 extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
 extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
 extern void spa_altroot(spa_t *, char *, size_t);
@@ -604,6 +605,8 @@
 extern objset_t *spa_meta_objset(spa_t *spa);
 
 /* Miscellaneous support routines */
+extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
+extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature);
 extern int spa_rename(const char *oldname, const char *newname);
 extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
 extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Mon May 21 12:11:39 2012 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  */
 
@@ -127,6 +127,7 @@
 	uint64_t	spa_import_flags;	/* import specific flags */
 	taskq_t		*spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
 	dsl_pool_t	*spa_dsl_pool;
+	boolean_t	spa_is_initializing;	/* true while opening pool */
 	metaslab_class_t *spa_normal_class;	/* normal data class */
 	metaslab_class_t *spa_log_class;	/* intent log data class */
 	uint64_t	spa_first_txg;		/* first txg after spa_open() */
@@ -144,6 +145,7 @@
 	list_t		spa_state_dirty_list;	/* vdevs with dirty state */
 	spa_aux_vdev_t	spa_spares;		/* hot spares */
 	spa_aux_vdev_t	spa_l2cache;		/* L2ARC cache devices */
+	nvlist_t	*spa_label_features;	/* Features for reading MOS */
 	uint64_t	spa_config_object;	/* MOS object for pool config */
 	uint64_t	spa_config_generation;	/* config generation number */
 	uint64_t	spa_syncing_txg;	/* txg currently syncing */
@@ -220,7 +222,10 @@
 	boolean_t	spa_autoreplace;	/* autoreplace set in open */
 	int		spa_vdev_locks;		/* locks grabbed */
 	uint64_t	spa_creation_version;	/* version at pool creation */
-	uint64_t	spa_prev_software_version;
+	uint64_t	spa_prev_software_version; /* See ub_software_version */
+	uint64_t	spa_feat_for_write_obj;	/* required to write to pool */
+	uint64_t	spa_feat_for_read_obj;	/* required to read from pool */
+	uint64_t	spa_feat_desc_obj;	/* Feature descriptions */
 	/*
 	 * spa_refcnt & spa_config_lock must be the last elements
 	 * because refcount_t changes size based on compilation options.
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h	Mon May 21 12:11:39 2012 -0700
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
@@ -141,8 +142,8 @@
 struct uberblock;
 extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
 extern int vdev_label_number(uint64_t psise, uint64_t offset);
-extern nvlist_t *vdev_label_read_config(vdev_t *vd);
-extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
+extern nvlist_t *vdev_label_read_config(vdev_t *vd, int label);
+extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
 
 typedef enum {
 	VDEV_LABEL_CREATE,	/* create/add a new device */
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Mon May 21 12:11:39 2012 -0700
@@ -202,7 +202,7 @@
 	 * For DTrace to work in userland (libzpool) context, these fields must
 	 * remain at the end of the structure.  DTrace will use the kernel's
 	 * CTF definition for 'struct vdev', and since the size of a kmutex_t is
-	 * larger in userland, the offsets for the rest fields would be
+	 * larger in userland, the offsets for the rest of the fields would be
 	 * incorrect.
 	 */
 	kmutex_t	vdev_dtl_lock;	/* vdev_dtl_{map,resilver}	*/
@@ -257,6 +257,7 @@
 #define	VDEV_LABEL_START_SIZE	(2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
 #define	VDEV_LABEL_END_SIZE	(2 * sizeof (vdev_label_t))
 #define	VDEV_LABELS		4
+#define	VDEV_BEST_LABEL		VDEV_LABELS
 
 #define	VDEV_ALLOC_LOAD		0
 #define	VDEV_ALLOC_ADD		1
--- a/usr/src/uts/common/fs/zfs/sys/zap.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/zap.h	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_ZAP_H
@@ -132,6 +133,8 @@
 uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
+    uint64_t parent_obj, const char *name, dmu_tx_t *tx);
 
 /*
  * Create a new zapobj with no attributes from the given (unallocated)
@@ -300,12 +303,6 @@
 int zap_lookup_int_key(objset_t *os, uint64_t obj,
     uint64_t key, uint64_t *valuep);
 
-/*
- * They name is a stringified version of key; increment its value by
- * delta.  Zero values will be zap_remove()-ed.
- */
-int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
-    dmu_tx_t *tx);
 int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
     dmu_tx_t *tx);
 
--- a/usr/src/uts/common/fs/zfs/sys/zio.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h	Mon May 21 12:11:39 2012 -0700
@@ -24,6 +24,7 @@
  */
 /*
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef _ZIO_H
@@ -272,6 +273,14 @@
 #define	ZB_ZIL_OBJECT		(0ULL)
 #define	ZB_ZIL_LEVEL		(-2LL)
 
+#define	ZB_IS_ZERO(zb)						\
+	((zb)->zb_objset == 0 && (zb)->zb_object == 0 &&	\
+	(zb)->zb_level == 0 && (zb)->zb_blkid == 0)
+#define	ZB_IS_ROOT(zb)				\
+	((zb)->zb_object == ZB_ROOT_OBJECT &&	\
+	(zb)->zb_level == ZB_ROOT_LEVEL &&	\
+	(zb)->zb_blkid == ZB_ROOT_BLKID)
+
 typedef struct zio_prop {
 	enum zio_checksum	zp_checksum;
 	enum zio_compress	zp_compress;
@@ -289,6 +298,7 @@
 typedef void zio_cksum_free_f(void *cbdata, size_t size);
 
 struct zio_bad_cksum;				/* defined in zio_checksum.h */
+struct dnode_phys;
 
 struct zio_cksum_report {
 	struct zio_cksum_report *zcr_next;
@@ -558,6 +568,10 @@
 /* Called from spa_sync(), but primarily an injection handler */
 extern void spa_handle_ignored_writes(spa_t *spa);
 
+/* zbookmark functions */
+boolean_t zbookmark_is_before(const struct dnode_phys *dnp,
+    const zbookmark_t *zb1, const zbookmark_t *zb2);
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/common/fs/zfs/vdev.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Mon May 21 12:11:39 2012 -0700
@@ -1328,7 +1328,8 @@
 		uint64_t aux_guid = 0;
 		nvlist_t *nvl;
 
-		if ((label = vdev_label_read_config(vd)) == NULL) {
+		if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) ==
+		    NULL) {
 			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
 			    VDEV_AUX_BAD_LABEL);
 			return (0);
@@ -1969,14 +1970,14 @@
 	if (!vdev_readable(vd))
 		return (0);
 
-	if ((label = vdev_label_read_config(vd)) == NULL) {
+	if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) {
 		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
 		    VDEV_AUX_CORRUPT_DATA);
 		return (-1);
 	}
 
 	if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 ||
-	    version > SPA_VERSION ||
+	    !SPA_VERSION_IS_SUPPORTED(version) ||
 	    nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 ||
 	    guid != vd->vdev_guid ||
 	    nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) {
--- a/usr/src/uts/common/fs/zfs/vdev_label.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/vdev_label.c	Mon May 21 12:11:39 2012 -0700
@@ -18,8 +18,10 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
@@ -121,6 +123,8 @@
  * 	txg		Transaction group in which this label was written
  * 	pool_guid	Unique identifier for this pool
  * 	vdev_tree	An nvlist describing vdev tree.
+ *	features_for_read
+ *			An nvlist of the features necessary for reading the MOS.
  *
  * Each leaf device label also contains the following:
  *
@@ -428,8 +432,13 @@
 	kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
 }
 
+/*
+ * Returns the configuration from the label of the given vdev. If 'label' is
+ * VDEV_BEST_LABEL, each label of the vdev will be read until a valid
+ * configuration is found; otherwise, only the specified label will be read.
+ */
 nvlist_t *
-vdev_label_read_config(vdev_t *vd)
+vdev_label_read_config(vdev_t *vd, int label)
 {
 	spa_t *spa = vd->vdev_spa;
 	nvlist_t *config = NULL;
@@ -447,6 +456,8 @@
 
 retry:
 	for (int l = 0; l < VDEV_LABELS; l++) {
+		if (label >= 0 && label < VDEV_LABELS && label != l)
+			continue;
 
 		zio = zio_root(spa, NULL, NULL, flags);
 
@@ -496,7 +507,7 @@
 	/*
 	 * Read the label, if any, and perform some basic sanity checks.
 	 */
-	if ((label = vdev_label_read_config(vd)) == NULL)
+	if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL)
 		return (B_FALSE);
 
 	(void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
@@ -833,7 +844,7 @@
  * come back up, we fail to see the uberblock for txg + 1 because, say,
  * it was on a mirrored device and the replica to which we wrote txg + 1
  * is now offline.  If we then make some changes and sync txg + 1, and then
- * the missing replica comes back, then for a new seconds we'll have two
+ * the missing replica comes back, then for a few seconds we'll have two
  * conflicting uberblocks on disk with the same txg.  The solution is simple:
  * among uberblocks with equal txg, choose the one with the latest timestamp.
  */
@@ -853,46 +864,50 @@
 	return (0);
 }
 
+struct ubl_cbdata {
+	uberblock_t	*ubl_ubbest;	/* Best uberblock */
+	vdev_t		*ubl_vd;	/* vdev associated with the above */
+	int		ubl_label;	/* Label associated with the above */
+};
+
 static void
 vdev_uberblock_load_done(zio_t *zio)
 {
+	vdev_t *vd = zio->io_vd;
 	spa_t *spa = zio->io_spa;
 	zio_t *rio = zio->io_private;
 	uberblock_t *ub = zio->io_data;
-	uberblock_t *ubbest = rio->io_private;
+	struct ubl_cbdata *cbp = rio->io_private;
 
-	ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd));
+	ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(vd));
 
 	if (zio->io_error == 0 && uberblock_verify(ub) == 0) {
 		mutex_enter(&rio->io_lock);
 		if (ub->ub_txg <= spa->spa_load_max_txg &&
-		    vdev_uberblock_compare(ub, ubbest) > 0)
-			*ubbest = *ub;
+		    vdev_uberblock_compare(ub, cbp->ubl_ubbest) > 0) {
+			/*
+			 * Keep track of the vdev and label in which this
+			 * uberblock was found. We will use this information
+			 * later to obtain the config nvlist associated with
+			 * this uberblock.
+			 */
+			*cbp->ubl_ubbest = *ub;
+			cbp->ubl_vd = vd;
+			cbp->ubl_label = vdev_label_number(vd->vdev_psize,
+			    zio->io_offset);
+		}
 		mutex_exit(&rio->io_lock);
 	}
 
 	zio_buf_free(zio->io_data, zio->io_size);
 }
 
-void
-vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
+static void
+vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags,
+    struct ubl_cbdata *cbp)
 {
-	spa_t *spa = vd->vdev_spa;
-	vdev_t *rvd = spa->spa_root_vdev;
-	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
-	    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
-
-	if (vd == rvd) {
-		ASSERT(zio == NULL);
-		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
-		zio = zio_root(spa, NULL, ubbest, flags);
-		bzero(ubbest, sizeof (uberblock_t));
-	}
-
-	ASSERT(zio != NULL);
-
 	for (int c = 0; c < vd->vdev_children; c++)
-		vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
+		vdev_uberblock_load_impl(zio, vd->vdev_child[c], flags, cbp);
 
 	if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
 		for (int l = 0; l < VDEV_LABELS; l++) {
@@ -905,11 +920,45 @@
 			}
 		}
 	}
+}
 
-	if (vd == rvd) {
-		(void) zio_wait(zio);
-		spa_config_exit(spa, SCL_ALL, FTAG);
+/*
+ * Reads the 'best' uberblock from disk along with its associated
+ * configuration. First, we read the uberblock array of each label of each
+ * vdev, keeping track of the uberblock with the highest txg in each array.
+ * Then, we read the configuration from the same label as the best uberblock.
+ */
+void
+vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
+{
+	int i;
+	zio_t *zio;
+	spa_t *spa = rvd->vdev_spa;
+	struct ubl_cbdata cb;
+	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+	    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
+
+	ASSERT(ub);
+	ASSERT(config);
+
+	bzero(ub, sizeof (uberblock_t));
+	*config = NULL;
+
+	cb.ubl_ubbest = ub;
+	cb.ubl_vd = NULL;
+
+	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+	zio = zio_root(spa, NULL, &cb, flags);
+	vdev_uberblock_load_impl(zio, rvd, flags, &cb);
+	(void) zio_wait(zio);
+	if (cb.ubl_vd != NULL) {
+		for (i = cb.ubl_label % 2; i < VDEV_LABELS; i += 2) {
+			*config = vdev_label_read_config(cb.ubl_vd, i);
+			if (*config != NULL)
+				break;
+		}
 	}
+	spa_config_exit(spa, SCL_ALL, FTAG);
 }
 
 /*
--- a/usr/src/uts/common/fs/zfs/zap.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/zap.c	Mon May 21 12:11:39 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
@@ -946,6 +947,19 @@
  * Helper functions for consumers.
  */
 
+uint64_t
+zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
+    const char *name, dmu_tx_t *tx)
+{
+	uint64_t new_obj;
+
+	VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0);
+	VERIFY(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj,
+	    tx) == 0);
+
+	return (new_obj);
+}
+
 int
 zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask,
     char *name)
--- a/usr/src/uts/common/fs/zfs/zap_micro.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/zap_micro.c	Mon May 21 12:11:39 2012 -0700
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/zio.h>
@@ -461,7 +461,7 @@
 	{
 		dmu_object_info_t doi;
 		dmu_object_info_from_db(db, &doi);
-		ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
+		ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
 	}
 #endif
 
@@ -585,7 +585,7 @@
 	{
 		dmu_object_info_t doi;
 		dmu_object_info_from_db(db, &doi);
-		ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
+		ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
 	}
 #endif
 
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Mon May 21 12:11:39 2012 -0700
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 Martin Matuska
@@ -1134,6 +1135,8 @@
 /*
  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
  * case its z_vfs will be NULL, and it will be opened as the owner.
+ * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
+ * which prevents all vnode ops from running.
  */
 static int
 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
@@ -1197,7 +1200,7 @@
 
 		(void) nvlist_lookup_uint64(props,
 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
-		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
+		if (!SPA_VERSION_IS_SUPPORTED(version)) {
 			error = EINVAL;
 			goto pool_props_bad;
 		}
@@ -1321,6 +1324,15 @@
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name		name of the pool
+ *
+ * outputs:
+ * zc_cookie		real errno
+ * zc_nvlist_dst	config nvlist
+ * zc_nvlist_dst_size	size of config nvlist
+ */
 static int
 zfs_ioc_pool_stats(zfs_cmd_t *zc)
 {
@@ -1422,7 +1434,8 @@
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
-	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
+	if (zc->zc_cookie < spa_version(spa) ||
+	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
 		spa_close(spa, FTAG);
 		return (EINVAL);
 	}
--- a/usr/src/uts/common/fs/zfs/zio.c	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/fs/zfs/zio.c	Mon May 21 12:11:39 2012 -0700
@@ -619,7 +619,7 @@
 	    zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS &&
 	    zp->zp_compress >= ZIO_COMPRESS_OFF &&
 	    zp->zp_compress < ZIO_COMPRESS_FUNCTIONS &&
-	    zp->zp_type < DMU_OT_NUMTYPES &&
+	    DMU_OT_IS_VALID(zp->zp_type) &&
 	    zp->zp_level < 32 &&
 	    zp->zp_copies > 0 &&
 	    zp->zp_copies <= spa_max_replication(spa) &&
@@ -903,7 +903,7 @@
 		zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
 	}
 
-	if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0)
+	if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
 		zio->io_flags |= ZIO_FLAG_DONT_CACHE;
 
 	if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
@@ -2989,3 +2989,45 @@
 	zio_checksum_verify,
 	zio_done
 };
+
+/* dnp is the dnode for zb1->zb_object */
+boolean_t
+zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
+    const zbookmark_t *zb2)
+{
+	uint64_t zb1nextL0, zb2thisobj;
+
+	ASSERT(zb1->zb_objset == zb2->zb_objset);
+	ASSERT(zb2->zb_level == 0);
+
+	/*
+	 * A bookmark in the deadlist is considered to be after
+	 * everything else.
+	 */
+	if (zb2->zb_object == DMU_DEADLIST_OBJECT)
+		return (B_TRUE);
+
+	/* The objset_phys_t isn't before anything. */
+	if (dnp == NULL)
+		return (B_FALSE);
+
+	zb1nextL0 = (zb1->zb_blkid + 1) <<
+	    ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+
+	zb2thisobj = zb2->zb_object ? zb2->zb_object :
+	    zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
+
+	if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
+		uint64_t nextobj = zb1nextL0 *
+		    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
+		return (nextobj <= zb2thisobj);
+	}
+
+	if (zb1->zb_object < zb2thisobj)
+		return (B_TRUE);
+	if (zb1->zb_object > zb2thisobj)
+		return (B_FALSE);
+	if (zb2->zb_object == DMU_META_DNODE_OBJECT)
+		return (B_FALSE);
+	return (zb1nextL0 <= zb2->zb_blkid);
+}
--- a/usr/src/uts/common/sys/fs/zfs.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/sys/fs/zfs.h	Mon May 21 12:11:39 2012 -0700
@@ -169,6 +169,7 @@
 	ZPOOL_PROP_READONLY,
 	ZPOOL_PROP_COMMENT,
 	ZPOOL_PROP_EXPANDSZ,
+	ZPOOL_PROP_FREEING,
 	ZPOOL_NUM_PROPS
 } zpool_prop_t;
 
@@ -243,6 +244,8 @@
 const char *zpool_prop_default_string(zpool_prop_t);
 uint64_t zpool_prop_default_numeric(zpool_prop_t);
 boolean_t zpool_prop_readonly(zpool_prop_t);
+boolean_t zpool_prop_feature(const char *);
+boolean_t zpool_prop_unsupported(const char *name);
 int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
 int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
 uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
@@ -350,6 +353,7 @@
 #define	SPA_VERSION_26			26ULL
 #define	SPA_VERSION_27			27ULL
 #define	SPA_VERSION_28			28ULL
+#define	SPA_VERSION_5000		5000ULL
 
 /*
  * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
@@ -357,8 +361,8 @@
  * and do the appropriate changes.  Also bump the version number in
  * usr/src/grub/capability.
  */
-#define	SPA_VERSION			SPA_VERSION_28
-#define	SPA_VERSION_STRING		"28"
+#define	SPA_VERSION			SPA_VERSION_5000
+#define	SPA_VERSION_STRING		"5000"
 
 /*
  * Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -409,6 +413,12 @@
 #define	SPA_VERSION_DEADLISTS		SPA_VERSION_26
 #define	SPA_VERSION_FAST_SNAP		SPA_VERSION_27
 #define	SPA_VERSION_MULTI_REPLACE	SPA_VERSION_28
+#define	SPA_VERSION_BEFORE_FEATURES	SPA_VERSION_28
+#define	SPA_VERSION_FEATURES		SPA_VERSION_5000
+
+#define	SPA_VERSION_IS_SUPPORTED(v) \
+	(((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
+	((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
 
 /*
  * ZPL version - rev'd whenever an incompatible on-disk format change
@@ -506,6 +516,11 @@
 #define	ZPOOL_CONFIG_BOOTFS		"bootfs"	/* not stored on disk */
 #define	ZPOOL_CONFIG_MISSING_DEVICES	"missing_vdevs"	/* not stored on disk */
 #define	ZPOOL_CONFIG_LOAD_INFO		"load_info"	/* not stored on disk */
+#define	ZPOOL_CONFIG_REWIND_INFO	"rewind_info"	/* not stored on disk */
+#define	ZPOOL_CONFIG_UNSUP_FEAT		"unsup_feat"	/* not stored on disk */
+#define	ZPOOL_CONFIG_CAN_RDONLY		"can_rdonly"	/* not stored on disk */
+#define	ZPOOL_CONFIG_FEATURES_FOR_READ	"features_for_read"
+#define	ZPOOL_CONFIG_FEATURE_STATS	"feature_stats"	/* not stored on disk */
 /*
  * The persistent vdev state is stored as separate values rather than a single
  * 'vdev_state' entry.  This is because a device can be in multiple states, such
@@ -584,6 +599,7 @@
 	VDEV_AUX_BAD_LABEL,	/* the label is OK but invalid		*/
 	VDEV_AUX_VERSION_NEWER,	/* on-disk version is too new		*/
 	VDEV_AUX_VERSION_OLDER,	/* on-disk version is too old		*/
+	VDEV_AUX_UNSUP_FEAT,	/* unsupported features			*/
 	VDEV_AUX_SPARED,	/* hot spare used in another pool	*/
 	VDEV_AUX_ERR_EXCEEDED,	/* too many errors			*/
 	VDEV_AUX_IO_FAILURE,	/* experienced I/O failure		*/
--- a/usr/src/uts/common/sys/nvpair.h	Sun May 20 02:46:39 2012 -0500
+++ b/usr/src/uts/common/sys/nvpair.h	Mon May 21 12:11:39 2012 -0700
@@ -20,12 +20,14 @@
  */
 /*
  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_NVPAIR_H
 #define	_SYS_NVPAIR_H
 
 #include <sys/types.h>
+#include <sys/time.h>
 #include <sys/errno.h>
 #include <sys/va_list.h>
 
@@ -274,6 +276,73 @@
 int nvpair_value_double(nvpair_t *, double *);
 #endif
 
+nvlist_t *fnvlist_alloc(void);
+void fnvlist_free(nvlist_t *);
+size_t fnvlist_size(nvlist_t *);
+char *fnvlist_pack(nvlist_t *, size_t *);
+void fnvlist_pack_free(char *, size_t);
+nvlist_t *fnvlist_unpack(char *, size_t);
+nvlist_t *fnvlist_dup(nvlist_t *);
+void fnvlist_merge(nvlist_t *, nvlist_t *);
+
+void fnvlist_add_boolean(nvlist_t *, const char *);
+void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);
+void fnvlist_add_byte(nvlist_t *, const char *, uchar_t);
+void fnvlist_add_int8(nvlist_t *, const char *, int8_t);
+void fnvlist_add_uint8(nvlist_t *, const char *, uint8_t);
+void fnvlist_add_int16(nvlist_t *, const char *, int16_t);
+void fnvlist_add_uint16(nvlist_t *, const char *, uint16_t);
+void fnvlist_add_int32(nvlist_t *, const char *, int32_t);
+void fnvlist_add_uint32(nvlist_t *, const char *, uint32_t);
+void fnvlist_add_int64(nvlist_t *, const char *, int64_t);
+void fnvlist_add_uint64(nvlist_t *, const char *, uint64_t);
+void fnvlist_add_string(nvlist_t *, const char *, const char *);
+void fnvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *);
+void fnvlist_add_nvpair(nvlist_t *, nvpair_t *);
+void fnvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t);
+void fnvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t);
+void fnvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t);
+void fnvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t);
+void fnvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t);
+void fnvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t);
+void fnvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t);
+void fnvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t);
+void fnvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t);
+void fnvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t);
+void fnvlist_add_string_array(nvlist_t *, const char *, char * const *, uint_t);
+void fnvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t);
+
+void fnvlist_remove(nvlist_t *, const char *);
+void fnvlist_remove_nvpair(nvlist_t *, nvpair_t *);
+
+nvpair_t *fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name);
+boolean_t fnvlist_lookup_boolean(nvlist_t *nvl, const char *name);
+boolean_t fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name);
+uchar_t fnvlist_lookup_byte(nvlist_t *nvl, const char *name);
+int8_t fnvlist_lookup_int8(nvlist_t *nvl, const char *name);
+int16_t fnvlist_lookup_int16(nvlist_t *nvl, const char *name);
+int32_t fnvlist_lookup_int32(nvlist_t *nvl, const char *name);
+int64_t fnvlist_lookup_int64(nvlist_t *nvl, const char *name);
+uint8_t fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name);
+uint16_t fnvlist_lookup_uint16(nvlist_t *nvl, const char *name);
+uint32_t fnvlist_lookup_uint32(nvlist_t *nvl, const char *name);
+uint64_t fnvlist_lookup_uint64(nvlist_t *nvl, const char *name);
+char *fnvlist_lookup_string(nvlist_t *nvl, const char *name);
+nvlist_t *fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name);
+
+boolean_t fnvpair_value_boolean_value(nvpair_t *nvp);
+uchar_t fnvpair_value_byte(nvpair_t *nvp);
+int8_t fnvpair_value_int8(nvpair_t *nvp);
+int16_t fnvpair_value_int16(nvpair_t *nvp);
+int32_t fnvpair_value_int32(nvpair_t *nvp);
+int64_t fnvpair_value_int64(nvpair_t *nvp);
+uint8_t fnvpair_value_uint8_t(nvpair_t *nvp);
+uint16_t fnvpair_value_uint16(nvpair_t *nvp);
+uint32_t fnvpair_value_uint32(nvpair_t *nvp);
+uint64_t fnvpair_value_uint64(nvpair_t *nvp);
+char *fnvpair_value_string(nvpair_t *nvp);
+nvlist_t *fnvpair_value_nvlist(nvpair_t *nvp);
+
 #ifdef	__cplusplus
 }
 #endif