--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/zhack/Makefile Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,57 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2012 by Delphix. All rights reserved.
+#
+
+PROG= zhack
+
+include ../Makefile.cmd
+
+$(INTEL_BLD)SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+install := TARGET = install
+clean := TARGET = clean
+clobber := TARGET = clobber
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint: $(SUBDIRS)
+
+install: $(SUBDIRS)
+ -$(RM) $(ROOTUSRSBINPROG)
+ -$(LN) $(ISAEXEC) $(ROOTUSRSBINPROG)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/zhack/Makefile.com Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,67 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2012 by Delphix. All rights reserved.
+#
+
+PROG= zhack
+SRCS= ../$(PROG).c
+OBJS= $(PROG).o
+
+include ../../Makefile.cmd
+include ../../Makefile.ctf
+
+INCS += -I../../../lib/libzpool/common
+INCS += -I../../../uts/common/fs/zfs
+INCS += -I../../../common/zfs
+
+LDLIBS += -lzpool -lumem -lnvpair -lzfs
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+CFLAGS += $(CCVERBOSE)
+CFLAGS64 += $(CCVERBOSE)
+CPPFLAGS += -D_LARGEFILE64_SOURCE=1 -D_REENTRANT $(INCS)
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $(PROG) $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+clean:
+
+lint: lint_SRCS
+
+include ../../Makefile.targ
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/zhack/amd64/Makefile Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+install: all $(ROOTUSRSBINPROG64)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/zhack/i386/Makefile Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTUSRSBINPROG32)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/zhack/sparcv9/Makefile Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+install: all $(ROOTUSRSBINPROG64)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/zhack/zhack.c Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,533 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+/*
+ * zhack is a debugging tool that can write changes to ZFS pool using libzpool
+ * for testing purposes. Altering pools with zhack is unsupported and may
+ * result in corrupted pools.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/dmu.h>
+#include <sys/zap.h>
+#include <sys/zfs_znode.h>
+#include <sys/dsl_synctask.h>
+#include <sys/vdev.h>
+#include <sys/fs/zfs.h>
+#include <sys/dmu_objset.h>
+#include <sys/dsl_pool.h>
+#include <sys/zio_checksum.h>
+#include <sys/zio_compress.h>
+#include <sys/zfeature.h>
+#undef ZFS_MAXNAMELEN
+#undef verify
+#include <libzfs.h>
+
+extern boolean_t zfeature_checks_disable;
+
+const char cmdname[] = "zhack";
+libzfs_handle_t *g_zfs;
+static importargs_t g_importargs;
+static char *g_pool;
+static boolean_t g_readonly;
+
+static void
+usage(void)
+{
+ (void) fprintf(stderr,
+ "Usage: %s [-c cachefile] [-d dir] <subcommand> <args> ...\n"
+ "where <subcommand> <args> is one of the following:\n"
+ "\n", cmdname);
+
+ (void) fprintf(stderr,
+ " feature stat <pool>\n"
+ " print information about enabled features\n"
+ " feature enable [-d desc] <pool> <feature>\n"
+ " add a new enabled feature to the pool\n"
+ " -d <desc> sets the feature's description\n"
+ " feature ref [-md] <pool> <feature>\n"
+ " change the refcount on the given feature\n"
+ " -d decrease instead of increase the refcount\n"
+ " -m add the feature to the label if increasing refcount\n"
+ "\n"
+ " <feature> : should be a feature guid\n");
+ exit(1);
+}
+
+
+static void
+fatal(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ (void) fprintf(stderr, "%s: ", cmdname);
+ (void) vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ (void) fprintf(stderr, "\n");
+
+ exit(1);
+}
+
+/* ARGSUSED */
+static int
+space_delta_cb(dmu_object_type_t bonustype, void *data,
+ uint64_t *userp, uint64_t *groupp)
+{
+ /*
+ * Is it a valid type of object to track?
+ */
+ if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
+ return (ENOENT);
+ (void) fprintf(stderr, "modifying object that needs user accounting");
+ abort();
+ /* NOTREACHED */
+}
+
+/*
+ * Target is the dataset whose pool we want to open.
+ */
+static void
+import_pool(const char *target, boolean_t readonly)
+{
+ nvlist_t *config;
+ nvlist_t *pools;
+ int error;
+ char *sepp;
+ spa_t *spa;
+ nvpair_t *elem;
+ nvlist_t *props;
+ const char *name;
+
+ kernel_init(readonly ? FREAD : (FREAD | FWRITE));
+ g_zfs = libzfs_init();
+ ASSERT(g_zfs != NULL);
+
+ dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
+
+ g_readonly = readonly;
+
+ /*
+ * If we only want readonly access, it's OK if we find
+ * a potentially-active (ie, imported into the kernel) pool from the
+ * default cachefile.
+ */
+ if (readonly && spa_open(target, &spa, FTAG) == 0) {
+ spa_close(spa, FTAG);
+ return;
+ }
+
+ g_importargs.unique = B_TRUE;
+ g_importargs.can_be_active = readonly;
+ g_pool = strdup(target);
+ if ((sepp = strpbrk(g_pool, "/@")) != NULL)
+ *sepp = '\0';
+ g_importargs.poolname = g_pool;
+ pools = zpool_search_import(g_zfs, &g_importargs);
+
+ if (pools == NULL || nvlist_next_nvpair(pools, NULL) == NULL) {
+ if (!g_importargs.can_be_active) {
+ g_importargs.can_be_active = B_TRUE;
+ if (zpool_search_import(g_zfs, &g_importargs) != NULL ||
+ spa_open(target, &spa, FTAG) == 0) {
+ fatal("cannot import '%s': pool is active; run "
+ "\"zpool export %s\" first\n",
+ g_pool, g_pool);
+ }
+ }
+
+ fatal("cannot import '%s': no such pool available\n", g_pool);
+ }
+
+ elem = nvlist_next_nvpair(pools, NULL);
+ name = nvpair_name(elem);
+ verify(nvpair_value_nvlist(elem, &config) == 0);
+
+ props = NULL;
+ if (readonly) {
+ verify(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
+ verify(nvlist_add_uint64(props,
+ zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0);
+ }
+
+ zfeature_checks_disable = B_TRUE;
+ error = spa_import(name, config, props, ZFS_IMPORT_NORMAL);
+ zfeature_checks_disable = B_FALSE;
+ if (error == EEXIST)
+ error = 0;
+
+ if (error)
+ fatal("can't import '%s': %s", name, strerror(error));
+}
+
+static void
+zhack_spa_open(const char *target, boolean_t readonly, void *tag, spa_t **spa)
+{
+ int err;
+
+ import_pool(target, readonly);
+
+ zfeature_checks_disable = B_TRUE;
+ err = spa_open(target, spa, tag);
+ zfeature_checks_disable = B_FALSE;
+
+ if (err != 0)
+ fatal("cannot open '%s': %s", target, strerror(err));
+ if (spa_version(*spa) < SPA_VERSION_FEATURES) {
+ fatal("'%s' has version %d, features not enabled", target,
+ (int)spa_version(*spa));
+ }
+}
+
+static void
+dump_obj(objset_t *os, uint64_t obj, const char *name)
+{
+ zap_cursor_t zc;
+ zap_attribute_t za;
+
+ (void) printf("%s_obj:\n", name);
+
+ for (zap_cursor_init(&zc, os, obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ if (za.za_integer_length == 8) {
+ ASSERT(za.za_num_integers == 1);
+ (void) printf("\t%s = %llu\n",
+ za.za_name, (u_longlong_t)za.za_first_integer);
+ } else {
+ ASSERT(za.za_integer_length == 1);
+ char val[1024];
+ VERIFY(zap_lookup(os, obj, za.za_name,
+ 1, sizeof (val), val) == 0);
+ (void) printf("\t%s = %s\n", za.za_name, val);
+ }
+ }
+ zap_cursor_fini(&zc);
+}
+
+static void
+dump_mos(spa_t *spa)
+{
+ nvlist_t *nv = spa->spa_label_features;
+
+ (void) printf("label config:\n");
+ for (nvpair_t *pair = nvlist_next_nvpair(nv, NULL);
+ pair != NULL;
+ pair = nvlist_next_nvpair(nv, pair)) {
+ (void) printf("\t%s\n", nvpair_name(pair));
+ }
+}
+
+static void
+zhack_do_feature_stat(int argc, char **argv)
+{
+ spa_t *spa;
+ objset_t *os;
+ char *target;
+
+ argc--;
+ argv++;
+
+ if (argc < 1) {
+ (void) fprintf(stderr, "error: missing pool name\n");
+ usage();
+ }
+ target = argv[0];
+
+ zhack_spa_open(target, B_TRUE, FTAG, &spa);
+ os = spa->spa_meta_objset;
+
+ dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
+ dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
+ dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
+ dump_mos(spa);
+
+ spa_close(spa, FTAG);
+}
+
+static void
+feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ spa_t *spa = arg1;
+ zfeature_info_t *feature = arg2;
+
+ spa_feature_enable(spa, feature, tx);
+}
+
+static void
+zhack_do_feature_enable(int argc, char **argv)
+{
+ char c;
+ char *desc, *target;
+ spa_t *spa;
+ objset_t *mos;
+ zfeature_info_t feature;
+ zfeature_info_t *nodeps[] = { NULL };
+
+ /*
+ * Features are not added to the pool's label until their refcounts
+ * are incremented, so fi_mos can just be left as false for now.
+ */
+ desc = NULL;
+ feature.fi_uname = "zhack";
+ feature.fi_mos = B_FALSE;
+ feature.fi_can_readonly = B_FALSE;
+ feature.fi_depends = nodeps;
+
+ optind = 1;
+ while ((c = getopt(argc, argv, "rmd:")) != -1) {
+ switch (c) {
+ case 'r':
+ feature.fi_can_readonly = B_TRUE;
+ break;
+ case 'd':
+ desc = strdup(optarg);
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ if (desc == NULL)
+ desc = strdup("zhack injected");
+ feature.fi_desc = desc;
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "error: missing feature or pool name\n");
+ usage();
+ }
+ target = argv[0];
+ feature.fi_guid = argv[1];
+
+ if (!zfeature_is_valid_guid(feature.fi_guid))
+ fatal("invalid feature guid: %s", feature.fi_guid);
+
+ zhack_spa_open(target, B_FALSE, FTAG, &spa);
+ mos = spa->spa_meta_objset;
+
+ if (0 == zfeature_lookup_guid(feature.fi_guid, NULL))
+ fatal("'%s' is a real feature, will not enable");
+ if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
+ fatal("feature already enabled: %s", feature.fi_guid);
+
+ VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
+ feature_enable_sync, spa, &feature, 5));
+
+ spa_close(spa, FTAG);
+
+ free(desc);
+}
+
+static void
+feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ spa_t *spa = arg1;
+ zfeature_info_t *feature = arg2;
+
+ spa_feature_incr(spa, feature, tx);
+}
+
+static void
+feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ spa_t *spa = arg1;
+ zfeature_info_t *feature = arg2;
+
+ spa_feature_decr(spa, feature, tx);
+}
+
+static void
+zhack_do_feature_ref(int argc, char **argv)
+{
+ char c;
+ char *target;
+ boolean_t decr = B_FALSE;
+ spa_t *spa;
+ objset_t *mos;
+ zfeature_info_t feature;
+ zfeature_info_t *nodeps[] = { NULL };
+
+ /*
+ * fi_desc does not matter here because it was written to disk
+ * when the feature was enabled, but we need to properly set the
+ * feature for read or write based on the information we read off
+ * disk later.
+ */
+ feature.fi_uname = "zhack";
+ feature.fi_mos = B_FALSE;
+ feature.fi_desc = NULL;
+ feature.fi_depends = nodeps;
+
+ optind = 1;
+ while ((c = getopt(argc, argv, "md")) != -1) {
+ switch (c) {
+ case 'm':
+ feature.fi_mos = B_TRUE;
+ break;
+ case 'd':
+ decr = B_TRUE;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "error: missing feature or pool name\n");
+ usage();
+ }
+ target = argv[0];
+ feature.fi_guid = argv[1];
+
+ if (!zfeature_is_valid_guid(feature.fi_guid))
+ fatal("invalid feature guid: %s", feature.fi_guid);
+
+ zhack_spa_open(target, B_FALSE, FTAG, &spa);
+ mos = spa->spa_meta_objset;
+
+ if (0 == zfeature_lookup_guid(feature.fi_guid, NULL))
+ fatal("'%s' is a real feature, will not change refcount");
+
+ if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
+ feature.fi_guid)) {
+ feature.fi_can_readonly = B_FALSE;
+ } else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
+ feature.fi_guid)) {
+ feature.fi_can_readonly = B_TRUE;
+ } else {
+ fatal("feature is not enabled: %s", feature.fi_guid);
+ }
+
+ if (decr && !spa_feature_is_active(spa, &feature))
+ fatal("feature refcount already 0: %s", feature.fi_guid);
+
+ VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
+ decr ? feature_decr_sync : feature_incr_sync, spa, &feature, 5));
+
+ spa_close(spa, FTAG);
+}
+
+static int
+zhack_do_feature(int argc, char **argv)
+{
+ char *subcommand;
+
+ argc--;
+ argv++;
+ if (argc == 0) {
+ (void) fprintf(stderr,
+ "error: no feature operation specified\n");
+ usage();
+ }
+
+ subcommand = argv[0];
+ if (strcmp(subcommand, "stat") == 0) {
+ zhack_do_feature_stat(argc, argv);
+ } else if (strcmp(subcommand, "enable") == 0) {
+ zhack_do_feature_enable(argc, argv);
+ } else if (strcmp(subcommand, "ref") == 0) {
+ zhack_do_feature_ref(argc, argv);
+ } else {
+ (void) fprintf(stderr, "error: unknown subcommand: %s\n",
+ subcommand);
+ usage();
+ }
+
+ return (0);
+}
+
+#define MAX_NUM_PATHS 1024
+
+int
+main(int argc, char **argv)
+{
+ extern void zfs_prop_init(void);
+
+ char *path[MAX_NUM_PATHS];
+ const char *subcommand;
+ int rv = 0;
+ char c;
+
+ g_importargs.path = path;
+
+ dprintf_setup(&argc, argv);
+ zfs_prop_init();
+
+ while ((c = getopt(argc, argv, "c:d:")) != -1) {
+ switch (c) {
+ case 'c':
+ g_importargs.cachefile = optarg;
+ break;
+ case 'd':
+ assert(g_importargs.paths < MAX_NUM_PATHS);
+ g_importargs.path[g_importargs.paths++] = optarg;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+ optind = 1;
+
+ if (argc == 0) {
+ (void) fprintf(stderr, "error: no command specified\n");
+ usage();
+ }
+
+ subcommand = argv[0];
+
+ if (strcmp(subcommand, "feature") == 0) {
+ rv = zhack_do_feature(argc, argv);
+ } else {
+ (void) fprintf(stderr, "error: unknown subcommand: %s\n",
+ subcommand);
+ usage();
+ }
+
+ if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_TRUE) != 0) {
+ fatal("pool export failed; "
+ "changes may not be committed to disk\n");
+ }
+
+ libzfs_fini(g_zfs);
+ kernel_fini();
+
+ return (rv);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/common/nvpair/fnvpair.c Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,496 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#include <sys/nvpair.h>
+#include <sys/kmem.h>
+#include <sys/debug.h>
+#ifndef _KERNEL
+#include <stdlib.h>
+#endif
+
+/*
+ * "Force" nvlist wrapper.
+ *
+ * These functions wrap the nvlist_* functions with assertions that assume
+ * the operation is successful. This allows the caller's code to be much
+ * more readable, especially for the fnvlist_lookup_* and fnvpair_value_*
+ * functions, which can return the requested value (rather than filling in
+ * a pointer).
+ *
+ * These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_NATIVE, and allocate
+ * with KM_SLEEP.
+ *
+ * More wrappers should be added as needed -- for example
+ * nvlist_lookup_*_array and nvpair_value_*_array.
+ */
+
+nvlist_t *
+fnvlist_alloc(void)
+{
+ nvlist_t *nvl;
+ VERIFY3U(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP), ==, 0);
+ return (nvl);
+}
+
+void
+fnvlist_free(nvlist_t *nvl)
+{
+ nvlist_free(nvl);
+}
+
+size_t
+fnvlist_size(nvlist_t *nvl)
+{
+ size_t size;
+ VERIFY3U(nvlist_size(nvl, &size, NV_ENCODE_NATIVE), ==, 0);
+ return (size);
+}
+
+/*
+ * Returns allocated buffer of size *sizep. Caller must free the buffer with
+ * fnvlist_pack_free().
+ */
+char *
+fnvlist_pack(nvlist_t *nvl, size_t *sizep)
+{
+ char *packed = 0;
+ VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_NATIVE,
+ KM_SLEEP), ==, 0);
+ return (packed);
+}
+
+/*ARGSUSED*/
+void
+fnvlist_pack_free(char *pack, size_t size)
+{
+#ifdef _KERNEL
+ kmem_free(pack, size);
+#else
+ free(pack);
+#endif
+}
+
+nvlist_t *
+fnvlist_unpack(char *buf, size_t buflen)
+{
+ nvlist_t *rv;
+ VERIFY3U(nvlist_unpack(buf, buflen, &rv, KM_SLEEP), ==, 0);
+ return (rv);
+}
+
+nvlist_t *
+fnvlist_dup(nvlist_t *nvl)
+{
+ nvlist_t *rv;
+ VERIFY3U(nvlist_dup(nvl, &rv, KM_SLEEP), ==, 0);
+ return (rv);
+}
+
+void
+fnvlist_merge(nvlist_t *dst, nvlist_t *src)
+{
+ VERIFY3U(nvlist_merge(dst, src, KM_SLEEP), ==, 0);
+}
+
+void
+fnvlist_add_boolean(nvlist_t *nvl, const char *name)
+{
+ VERIFY3U(nvlist_add_boolean(nvl, name), ==, 0);
+}
+
+void
+fnvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
+{
+ VERIFY3U(nvlist_add_boolean_value(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
+{
+ VERIFY3U(nvlist_add_byte(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
+{
+ VERIFY3U(nvlist_add_int8(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
+{
+ VERIFY3U(nvlist_add_uint8(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
+{
+ VERIFY3U(nvlist_add_int16(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
+{
+ VERIFY3U(nvlist_add_uint16(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
+{
+ VERIFY3U(nvlist_add_int32(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
+{
+ VERIFY3U(nvlist_add_uint32(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
+{
+ VERIFY3U(nvlist_add_int64(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
+{
+ VERIFY3U(nvlist_add_uint64(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
+{
+ VERIFY3U(nvlist_add_string(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
+{
+ VERIFY3U(nvlist_add_nvlist(nvl, name, val), ==, 0);
+}
+
+void
+fnvlist_add_nvpair(nvlist_t *nvl, nvpair_t *pair)
+{
+ VERIFY3U(nvlist_add_nvpair(nvl, pair), ==, 0);
+}
+
+void
+fnvlist_add_boolean_array(nvlist_t *nvl, const char *name,
+ boolean_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_boolean_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_byte_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_int8_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_uint8_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_int16_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_uint16_array(nvlist_t *nvl, const char *name,
+ uint16_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_uint16_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_int32_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_uint32_array(nvlist_t *nvl, const char *name,
+ uint32_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_uint32_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_int64_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_uint64_array(nvlist_t *nvl, const char *name,
+ uint64_t *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_uint64_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_string_array(nvlist_t *nvl, const char *name,
+ char * const *val, uint_t n)
+{
+ VERIFY3U(nvlist_add_string_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_add_nvlist_array(nvlist_t *nvl, const char *name,
+ nvlist_t **val, uint_t n)
+{
+ VERIFY3U(nvlist_add_nvlist_array(nvl, name, val, n), ==, 0);
+}
+
+void
+fnvlist_remove(nvlist_t *nvl, const char *name)
+{
+ VERIFY3U(nvlist_remove_all(nvl, name), ==, 0);
+}
+
+void
+fnvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *pair)
+{
+ VERIFY3U(nvlist_remove_nvpair(nvl, pair), ==, 0);
+}
+
+nvpair_t *
+fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name)
+{
+ nvpair_t *rv;
+ VERIFY3U(nvlist_lookup_nvpair(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+/* returns B_TRUE if the entry exists */
+boolean_t
+fnvlist_lookup_boolean(nvlist_t *nvl, const char *name)
+{
+ return (nvlist_lookup_boolean(nvl, name) == 0);
+}
+
+boolean_t
+fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name)
+{
+ boolean_t rv;
+ VERIFY3U(nvlist_lookup_boolean_value(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+uchar_t
+fnvlist_lookup_byte(nvlist_t *nvl, const char *name)
+{
+ uchar_t rv;
+ VERIFY3U(nvlist_lookup_byte(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+int8_t
+fnvlist_lookup_int8(nvlist_t *nvl, const char *name)
+{
+ int8_t rv;
+ VERIFY3U(nvlist_lookup_int8(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+int16_t
+fnvlist_lookup_int16(nvlist_t *nvl, const char *name)
+{
+ int16_t rv;
+ VERIFY3U(nvlist_lookup_int16(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+int32_t
+fnvlist_lookup_int32(nvlist_t *nvl, const char *name)
+{
+ int32_t rv;
+ VERIFY3U(nvlist_lookup_int32(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+int64_t
+fnvlist_lookup_int64(nvlist_t *nvl, const char *name)
+{
+ int64_t rv;
+ VERIFY3U(nvlist_lookup_int64(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+uint8_t
+fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name)
+{
+ uint8_t rv;
+ VERIFY3U(nvlist_lookup_uint8(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+uint16_t
+fnvlist_lookup_uint16(nvlist_t *nvl, const char *name)
+{
+ uint16_t rv;
+ VERIFY3U(nvlist_lookup_uint16(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+uint32_t
+fnvlist_lookup_uint32(nvlist_t *nvl, const char *name)
+{
+ uint32_t rv;
+ VERIFY3U(nvlist_lookup_uint32(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+uint64_t
+fnvlist_lookup_uint64(nvlist_t *nvl, const char *name)
+{
+ uint64_t rv;
+ VERIFY3U(nvlist_lookup_uint64(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+char *
+fnvlist_lookup_string(nvlist_t *nvl, const char *name)
+{
+ char *rv;
+ VERIFY3U(nvlist_lookup_string(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+nvlist_t *
+fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name)
+{
+ nvlist_t *rv;
+ VERIFY3U(nvlist_lookup_nvlist(nvl, name, &rv), ==, 0);
+ return (rv);
+}
+
+boolean_t
+fnvpair_value_boolean_value(nvpair_t *nvp)
+{
+ boolean_t rv;
+ VERIFY3U(nvpair_value_boolean_value(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+uchar_t
+fnvpair_value_byte(nvpair_t *nvp)
+{
+ uchar_t rv;
+ VERIFY3U(nvpair_value_byte(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+int8_t
+fnvpair_value_int8(nvpair_t *nvp)
+{
+ int8_t rv;
+ VERIFY3U(nvpair_value_int8(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+int16_t
+fnvpair_value_int16(nvpair_t *nvp)
+{
+ int16_t rv;
+ VERIFY3U(nvpair_value_int16(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+int32_t
+fnvpair_value_int32(nvpair_t *nvp)
+{
+ int32_t rv;
+ VERIFY3U(nvpair_value_int32(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+int64_t
+fnvpair_value_int64(nvpair_t *nvp)
+{
+ int64_t rv;
+ VERIFY3U(nvpair_value_int64(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+uint8_t
+fnvpair_value_uint8_t(nvpair_t *nvp)
+{
+ uint8_t rv;
+ VERIFY3U(nvpair_value_uint8(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+uint16_t
+fnvpair_value_uint16(nvpair_t *nvp)
+{
+ uint16_t rv;
+ VERIFY3U(nvpair_value_uint16(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+uint32_t
+fnvpair_value_uint32(nvpair_t *nvp)
+{
+ uint32_t rv;
+ VERIFY3U(nvpair_value_uint32(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+uint64_t
+fnvpair_value_uint64(nvpair_t *nvp)
+{
+ uint64_t rv;
+ VERIFY3U(nvpair_value_uint64(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+char *
+fnvpair_value_string(nvpair_t *nvp)
+{
+ char *rv;
+ VERIFY3U(nvpair_value_string(nvp, &rv), ==, 0);
+ return (rv);
+}
+
+nvlist_t *
+fnvpair_value_nvlist(nvpair_t *nvp)
+{
+ nvlist_t *rv;
+ VERIFY3U(nvpair_value_nvlist(nvp, &rv), ==, 0);
+ return (rv);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/common/zfs/zfeature_common.c Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,156 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <errno.h>
+#include <string.h>
+#endif
+#include <sys/debug.h>
+#include <sys/fs/zfs.h>
+#include <sys/inttypes.h>
+#include <sys/types.h>
+#include "zfeature_common.h"
+
+/*
+ * Set to disable all feature checks while opening pools, allowing pools with
+ * unsupported features to be opened. Set for testing only.
+ */
+boolean_t zfeature_checks_disable = B_FALSE;
+
+zfeature_info_t spa_feature_table[SPA_FEATURES];
+
+/*
+ * Valid characters for feature guids. This list is mainly for aesthetic
+ * purposes and could be expanded in the future. There are different allowed
+ * characters in the guids reverse dns portion (before the colon) and its
+ * short name (after the colon).
+ */
+static int
+valid_char(char c, boolean_t after_colon)
+{
+ return ((c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '9') ||
+ c == (after_colon ? '_' : '.'));
+}
+
+/*
+ * Every feature guid must contain exactly one colon which separates a reverse
+ * dns organization name from the feature's "short" name (e.g.
+ * "com.company:feature_name").
+ */
+boolean_t
+zfeature_is_valid_guid(const char *name)
+{
+ int i;
+ boolean_t has_colon = B_FALSE;
+
+ i = 0;
+ while (name[i] != '\0') {
+ char c = name[i++];
+ if (c == ':') {
+ if (has_colon)
+ return (B_FALSE);
+ has_colon = B_TRUE;
+ continue;
+ }
+ if (!valid_char(c, has_colon))
+ return (B_FALSE);
+ }
+
+ return (has_colon);
+}
+
+boolean_t
+zfeature_is_supported(const char *guid)
+{
+ if (zfeature_checks_disable)
+ return (B_TRUE);
+
+ return (0 == zfeature_lookup_guid(guid, NULL));
+}
+
+int
+zfeature_lookup_guid(const char *guid, zfeature_info_t **res)
+{
+ for (int i = 0; i < SPA_FEATURES; i++) {
+ zfeature_info_t *feature = &spa_feature_table[i];
+ if (strcmp(guid, feature->fi_guid) == 0) {
+ if (res != NULL)
+ *res = feature;
+ return (0);
+ }
+ }
+
+ return (ENOENT);
+}
+
+int
+zfeature_lookup_name(const char *name, zfeature_info_t **res)
+{
+ for (int i = 0; i < SPA_FEATURES; i++) {
+ zfeature_info_t *feature = &spa_feature_table[i];
+ if (strcmp(name, feature->fi_uname) == 0) {
+ if (res != NULL)
+ *res = feature;
+ return (0);
+ }
+ }
+
+ return (ENOENT);
+}
+
+static void
+zfeature_register(int fid, const char *guid, const char *name, const char *desc,
+ boolean_t readonly, boolean_t mos, zfeature_info_t **deps)
+{
+ zfeature_info_t *feature = &spa_feature_table[fid];
+ static zfeature_info_t *nodeps[] = { NULL };
+
+ ASSERT(name != NULL);
+ ASSERT(desc != NULL);
+ ASSERT(!readonly || !mos);
+ ASSERT3U(fid, <, SPA_FEATURES);
+ ASSERT(zfeature_is_valid_guid(guid));
+
+ if (deps == NULL)
+ deps = nodeps;
+
+ feature->fi_guid = guid;
+ feature->fi_uname = name;
+ feature->fi_desc = desc;
+ feature->fi_can_readonly = readonly;
+ feature->fi_mos = mos;
+ feature->fi_depends = deps;
+}
+
+void
+zpool_feature_init(void)
+{
+ zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
+ "com.delphix:async_destroy", "async_destroy",
+ "Destroy filesystems asynchronously.", B_TRUE, B_FALSE, NULL);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/common/zfs/zfeature_common.h Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,71 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#ifndef _ZFEATURE_COMMON_H
+#define _ZFEATURE_COMMON_H
+
+#include <sys/fs/zfs.h>
+#include <sys/inttypes.h>
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct zfeature_info;
+
+typedef struct zfeature_info {
+ const char *fi_uname; /* User-facing feature name */
+ const char *fi_guid; /* On-disk feature identifier */
+ const char *fi_desc; /* Feature description */
+ boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */
+ boolean_t fi_mos; /* Is the feature necessary to read the MOS? */
+ struct zfeature_info **fi_depends; /* array; null terminated */
+} zfeature_info_t;
+
+typedef int (zfeature_func_t)(zfeature_info_t *fi, void *arg);
+
+#define ZFS_FEATURE_DEBUG
+
+enum spa_feature {
+ SPA_FEATURE_ASYNC_DESTROY,
+ SPA_FEATURES
+} spa_feature_t;
+
+extern zfeature_info_t spa_feature_table[SPA_FEATURES];
+
+extern boolean_t zfeature_is_valid_guid(const char *);
+
+extern boolean_t zfeature_is_supported(const char *);
+extern int zfeature_lookup_guid(const char *, zfeature_info_t **res);
+extern int zfeature_lookup_name(const char *, zfeature_info_t **res);
+
+extern void zpool_feature_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZFEATURE_COMMON_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/man/man5/zpool-features.5 Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,172 @@
+'\" te
+.\" Copyright (c) 2012 by Delphix. All rights reserved.
+.\" The contents of this file are subject to the terms of the Common Development
+.\" and Distribution License (the "License"). You may not use this file except
+.\" in compliance with the License. You can obtain a copy of the license at
+.\" usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\"
+.\" See the License for the specific language governing permissions and
+.\" limitations under the License. When distributing Covered Code, include this
+.\" CDDL HEADER in each file and include the License file at
+.\" usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this
+.\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
+.\" own identifying information:
+.\" Portions Copyright [yyyy] [name of copyright owner]
+.TH ZPOOL-FEATURES 5 "Mar 16, 2012"
+.SH NAME
+zpool\-features \- ZFS pool feature descriptions
+.SH DESCRIPTION
+.sp
+.LP
+ZFS pool on\-disk format versions are specified via "features" which replace
+the old on\-disk format numbers (the last supported on\-disk format number is
+28). To enable a feature on a pool use the \fBzpool\fR(1M) command to set
+the \fBfeature@\fR\fIfeature_name\fR property to \fBenabled\fR.
+.sp
+.LP
+The pool format does not affect file system version compatibility or the ability
+to send file systems between pools.
+.sp
+.LP
+Since most features can be enabled independently of each other the on\-disk
+format of the pool is specified by the set of all features marked as
+\fBactive\fR on the pool. If the pool was created by another software version
+this set may include unsupported features.
+.SS "Identifying features"
+.sp
+.LP
+Every feature has a guid of the form \fIcom.example:feature_name\fR. The reverse
+DNS name ensures that the feature's guid is unique across all ZFS
+implementations. When unsupported features are encountered on a pool they will
+be identified by their guids. Refer to the documentation for the ZFS
+implementation that created the pool for information about those features.
+.sp
+.LP
+Each supported feature also has a short name. By convention a feature's short
+name is the portion of its guid which follows the ':' (e.g.
+\fIcom.example:feature_name\fR would have the short name \fIfeature_name\fR),
+however a feature's short name may differ across ZFS implementations if
+following the convention would result in name conflicts.
+.SS "Feature states"
+.sp
+.LP
+Features can be in one of three states:
+.sp
+.ne 2
+.na
+\fB\fBactive\fR\fR
+.ad
+.RS 12n
+This feature's on\-disk format changes are in effect on the pool. Support for
+this feature is required to import the pool in read\-write mode. If this
+feature is not read-only compatible, support is also required to import the pool
+in read\-only mode (see "Read\-only compatibility").
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBenabled\fR\fR
+.ad
+.RS 12n
+An administrator has marked this feature as enabled on the pool, but the
+feature's on\-disk format changes have not been made yet. The pool can still be
+imported by software that does not support this feature, but changes may be made
+to the on\-disk format at any time which will move the feature to the
+\fBactive\fR state. Some features may support returning to the \fBenabled\fR
+state after becoming \fBactive\fR. See feature\-specific documentation for
+details.
+.RE
+
+.sp
+.ne 2
+.na
+\fBdisabled\fR
+.ad
+.RS 12n
+This feature's on\-disk format changes have not been made and will not be made
+unless an administrator moves the feature to the \fBenabled\fR state. Features
+cannot be disabled once they have been enabled.
+.RE
+
+.sp
+.LP
+The state of supported features is exposed through pool properties of the form
+\fIfeature@short_name\fR.
+.SS "Read\-only compatibility"
+.sp
+.LP
+Some features may make on\-disk format changes that do not interfere with other
+software's ability to read from the pool. These features are referred to as
+"read\-only compatible". If all unsupported features on a pool are read\-only
+compatible, the pool can be imported in read\-only mode by setting the
+\fBreadonly\fR property during import (see \fBzpool\fR(1M) for details on
+importing pools).
+.SS "Unsupported features"
+.sp
+.LP
+For each unsupported feature enabled on an imported pool a pool property
+named \fIunsupported@feature_guid\fR will indicate why the import was allowed
+despite the unsupported feature. Possible values for this property are:
+
+.sp
+.ne 2
+.na
+\fB\fBinactive\fR\fR
+.ad
+.RS 12n
+The feature is in the \fBenabled\fR state and therefore the pool's on\-disk
+format is still compatible with software that does not support this feature.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBreadonly\fR\fR
+.ad
+.RS 12n
+The feature is read\-only compatible and the pool has been imported in
+read\-only mode.
+.RE
+
+.SS "Feature dependencies"
+.sp
+.LP
+Some features depend on other features being enabled in order to function
+properly. Enabling a feature will automatically enable any features it
+depends on.
+.SH FEATURES
+.sp
+.LP
+The following features are supported on this system:
+.sp
+.ne 2
+.na
+\fB\fBasync_destroy\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID com.delphix:async_destroy
+READ\-ONLY COMPATIBLE yes
+DEPENDENCIES none
+.TE
+
+Destroying a file system requires traversing all of its data in order to
+return its used space to the pool. Without \fBasync_destroy\fR the file system
+is not fully removed until all space has been reclaimed. If the destroy
+operation is interrupted by a reboot or power outage the next attempt to open
+the pool will need to complete the destroy operation synchronously.
+
+When \fBasync_destroy\fR is enabled the file system's data will be reclaimed
+by a background process, allowing the destroy operation to complete without
+traversing the entire file system. The background process is able to resume
+interrupted destroys after the pool has been opened, eliminating the need
+to finish interrupted destroys as part of the open operation. The amount
+of space remaining to be reclaimed by the background process is available
+through the \fBfreeing\fR property.
+
+This feature is only \fBactive\fR while \fBfreeing\fR is non\-zero.
+.RE
+.SH "SEE ALSO"
+\fBzpool\fR(1M)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/bptree.c Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,224 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#include <sys/arc.h>
+#include <sys/bptree.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dmu_tx.h>
+#include <sys/dmu_traverse.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_pool.h>
+#include <sys/dnode.h>
+#include <sys/refcount.h>
+#include <sys/spa.h>
+
+/*
+ * A bptree is a queue of root block pointers from destroyed datasets. When a
+ * dataset is destroyed its root block pointer is put on the end of the pool's
+ * bptree queue so the dataset's blocks can be freed asynchronously by
+ * dsl_scan_sync. This allows the delete operation to finish without traversing
+ * all the dataset's blocks.
+ *
+ * Note that while bt_begin and bt_end are only ever incremented in this code
+ * they are effectively reset to 0 every time the entire bptree is freed because
+ * the bptree's object is destroyed and re-created.
+ */
+
+struct bptree_args {
+ bptree_phys_t *ba_phys; /* data in bonus buffer, dirtied if freeing */
+ boolean_t ba_free; /* true if freeing during traversal */
+
+ bptree_itor_t *ba_func; /* function to call for each blockpointer */
+ void *ba_arg; /* caller supplied argument to ba_func */
+ dmu_tx_t *ba_tx; /* caller supplied tx, NULL if not freeing */
+} bptree_args_t;
+
+uint64_t
+bptree_alloc(objset_t *os, dmu_tx_t *tx)
+{
+ uint64_t obj;
+ dmu_buf_t *db;
+ bptree_phys_t *bt;
+
+ obj = dmu_object_alloc(os, DMU_OTN_UINT64_METADATA,
+ SPA_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA,
+ sizeof (bptree_phys_t), tx);
+
+ /*
+ * Bonus buffer contents are already initialized to 0, but for
+ * readability we make it explicit.
+ */
+ VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
+ dmu_buf_will_dirty(db, tx);
+ bt = db->db_data;
+ bt->bt_begin = 0;
+ bt->bt_end = 0;
+ bt->bt_bytes = 0;
+ bt->bt_comp = 0;
+ bt->bt_uncomp = 0;
+ dmu_buf_rele(db, FTAG);
+
+ return (obj);
+}
+
+int
+bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
+{
+ dmu_buf_t *db;
+ bptree_phys_t *bt;
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
+ bt = db->db_data;
+ ASSERT3U(bt->bt_begin, ==, bt->bt_end);
+ ASSERT3U(bt->bt_bytes, ==, 0);
+ ASSERT3U(bt->bt_comp, ==, 0);
+ ASSERT3U(bt->bt_uncomp, ==, 0);
+ dmu_buf_rele(db, FTAG);
+
+ return (dmu_object_free(os, obj, tx));
+}
+
+void
+bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
+ uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx)
+{
+ dmu_buf_t *db;
+ bptree_phys_t *bt;
+ bptree_entry_phys_t bte;
+
+ /*
+ * bptree objects are in the pool mos, therefore they can only be
+ * modified in syncing context. Furthermore, this is only modified
+ * by the sync thread, so no locking is necessary.
+ */
+ ASSERT(dmu_tx_is_syncing(tx));
+
+ VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
+ bt = db->db_data;
+
+ bte.be_birth_txg = birth_txg;
+ bte.be_bp = *bp;
+ bzero(&bte.be_zb, sizeof (bte.be_zb));
+ dmu_write(os, obj, bt->bt_end * sizeof (bte), sizeof (bte), &bte, tx);
+
+ dmu_buf_will_dirty(db, tx);
+ bt->bt_end++;
+ bt->bt_bytes += bytes;
+ bt->bt_comp += comp;
+ bt->bt_uncomp += uncomp;
+ dmu_buf_rele(db, FTAG);
+}
+
+/* ARGSUSED */
+static int
+bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
+ const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+{
+ int err;
+ struct bptree_args *ba = arg;
+
+ if (bp == NULL)
+ return (0);
+
+ err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
+ if (err == 0 && ba->ba_free) {
+ ba->ba_phys->bt_bytes -= bp_get_dsize_sync(spa, bp);
+ ba->ba_phys->bt_comp -= BP_GET_PSIZE(bp);
+ ba->ba_phys->bt_uncomp -= BP_GET_UCSIZE(bp);
+ }
+ return (err);
+}
+
+int
+bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
+ void *arg, dmu_tx_t *tx)
+{
+ int err;
+ uint64_t i;
+ dmu_buf_t *db;
+ struct bptree_args ba;
+
+ ASSERT(!free || dmu_tx_is_syncing(tx));
+
+ err = dmu_bonus_hold(os, obj, FTAG, &db);
+ if (err != 0)
+ return (err);
+
+ if (free)
+ dmu_buf_will_dirty(db, tx);
+
+ ba.ba_phys = db->db_data;
+ ba.ba_free = free;
+ ba.ba_func = func;
+ ba.ba_arg = arg;
+ ba.ba_tx = tx;
+
+ err = 0;
+ for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
+ bptree_entry_phys_t bte;
+
+ ASSERT(!free || i == ba.ba_phys->bt_begin);
+
+ err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte),
+ &bte, DMU_READ_NO_PREFETCH);
+ if (err != 0)
+ break;
+
+ err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
+ bte.be_birth_txg, &bte.be_zb, TRAVERSE_POST,
+ bptree_visit_cb, &ba);
+ if (free) {
+ ASSERT(err == 0 || err == ERESTART);
+ if (err != 0) {
+ /* save bookmark for future resume */
+ ASSERT3U(bte.be_zb.zb_objset, ==,
+ ZB_DESTROYED_OBJSET);
+ ASSERT3U(bte.be_zb.zb_level, ==, 0);
+ dmu_write(os, obj, i * sizeof (bte),
+ sizeof (bte), &bte, tx);
+ break;
+ } else {
+ ba.ba_phys->bt_begin++;
+ (void) dmu_free_range(os, obj,
+ i * sizeof (bte), sizeof (bte), tx);
+ }
+ }
+ }
+
+ ASSERT(!free || err != 0 || ba.ba_phys->bt_begin == ba.ba_phys->bt_end);
+
+ /* if all blocks are free there should be no used space */
+ if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) {
+ ASSERT3U(ba.ba_phys->bt_bytes, ==, 0);
+ ASSERT3U(ba.ba_phys->bt_comp, ==, 0);
+ ASSERT3U(ba.ba_phys->bt_uncomp, ==, 0);
+ }
+
+ dmu_buf_rele(db, FTAG);
+
+ return (err);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/sys/bptree.h Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#ifndef _SYS_BPTREE_H
+#define _SYS_BPTREE_H
+
+#include <sys/spa.h>
+#include <sys/zio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct bptree_phys {
+ uint64_t bt_begin;
+ uint64_t bt_end;
+ uint64_t bt_bytes;
+ uint64_t bt_comp;
+ uint64_t bt_uncomp;
+} bptree_phys_t;
+
+typedef struct bptree_entry_phys {
+ blkptr_t be_bp;
+ uint64_t be_birth_txg; /* only delete blocks born after this txg */
+ zbookmark_t be_zb; /* holds traversal resume point if needed */
+} bptree_entry_phys_t;
+
+typedef int bptree_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
+
+uint64_t bptree_alloc(objset_t *os, dmu_tx_t *tx);
+int bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
+
+void bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
+ uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx);
+
+int bptree_iterate(objset_t *os, uint64_t obj, boolean_t free,
+ bptree_itor_t func, void *arg, dmu_tx_t *tx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_BPTREE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/sys/zfeature.h Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,52 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#ifndef _SYS_ZFEATURE_H
+#define _SYS_ZFEATURE_H
+
+#include <sys/dmu.h>
+#include <sys/nvpair.h>
+#include "zfeature_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern boolean_t feature_is_supported(objset_t *os, uint64_t obj,
+ uint64_t desc_obj, nvlist_t *unsup_feat);
+
+struct spa;
+extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *);
+extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *);
+extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *);
+extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *);
+extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *);
+extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ZFEATURE_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/zfeature.c Mon May 21 13:37:21 2012 -0700
@@ -0,0 +1,414 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zfeature.h>
+#include <sys/dmu.h>
+#include <sys/nvpair.h>
+#include <sys/zap.h>
+#include <sys/dmu_tx.h>
+#include "zfeature_common.h"
+#include <sys/spa_impl.h>
+
+/*
+ * ZFS Feature Flags
+ * -----------------
+ *
+ * ZFS feature flags are used to provide fine-grained versioning to the ZFS
+ * on-disk format. Once enabled on a pool feature flags replace the old
+ * spa_version() number.
+ *
+ * Each new on-disk format change will be given a uniquely identifying string
+ * guid rather than a version number. This avoids the problem of different
+ * organizations creating new on-disk formats with the same version number. To
+ * keep feature guids unique they should consist of the reverse dns name of the
+ * organization which implemented the feature and a short name for the feature,
+ * separated by a colon (e.g. com.delphix:async_destroy).
+ *
+ * Reference Counts
+ * ----------------
+ *
+ * Within each pool features can be in one of three states: disabled, enabled,
+ * or active. These states are differentiated by a reference count stored on
+ * disk for each feature:
+ *
+ * 1) If there is no reference count stored on disk the feature is disabled.
+ * 2) If the reference count is 0 a system administrator has enabled the
+ * feature, but the feature has not been used yet, so no on-disk
+ * format changes have been made.
+ * 3) If the reference count is greater than 0 the feature is active.
+ * The format changes required by the feature are currently on disk.
+ * Note that if the feature's format changes are reversed the feature
+ * may choose to set its reference count back to 0.
+ *
+ * Feature flags makes no differentiation between non-zero reference counts
+ * for an active feature (e.g. a reference count of 1 means the same thing as a
+ * reference count of 27834721), but feature implementations may choose to use
+ * the reference count to store meaningful information. For example, a new RAID
+ * implementation might set the reference count to the number of vdevs using
+ * it. If all those disks are removed from the pool the feature goes back to
+ * having a reference count of 0.
+ *
+ * It is the responsibility of the individual features to maintain a non-zero
+ * reference count as long as the feature's format changes are present on disk.
+ *
+ * Dependencies
+ * ------------
+ *
+ * Each feature may depend on other features. The only effect of this
+ * relationship is that when a feature is enabled all of its dependencies are
+ * automatically enabled as well. Any future work to support disabling of
+ * features would need to ensure that features cannot be disabled if other
+ * enabled features depend on them.
+ *
+ * On-disk Format
+ * --------------
+ *
+ * When feature flags are enabled spa_version() is set to SPA_VERSION_FEATURES
+ * (5000). In order for this to work the pool is automatically upgraded to
+ * SPA_VERSION_BEFORE_FEATURES (28) first, so all pre-feature flags on disk
+ * format changes will be in use.
+ *
+ * Information about features is stored in 3 ZAP objects in the pool's MOS.
+ * These objects are linked to by the following names in the pool directory
+ * object:
+ *
+ * 1) features_for_read: feature guid -> reference count
+ * Features needed to open the pool for reading.
+ * 2) features_for_write: feature guid -> reference count
+ * Features needed to open the pool for writing.
+ * 3) feature_descriptions: feature guid -> descriptive string
+ * A human readable string.
+ *
+ * All enabled features appear in either features_for_read or
+ * features_for_write, but not both.
+ *
+ * To open a pool in read-only mode only the features listed in
+ * features_for_read need to be supported.
+ *
+ * To open the pool in read-write mode features in both features_for_read and
+ * features_for_write need to be supported.
+ *
+ * Some features may be required to read the ZAP objects containing feature
+ * information. To allow software to check for compatibility with these features
+ * before the pool is opened their names must be stored in the label in a
+ * new "features_for_read" entry (note that features that are only required
+ * to write to a pool never need to be stored in the label since the
+ * features_for_write ZAP object can be read before the pool is written to).
+ * To save space in the label features must be explicitly marked as needing to
+ * be written to the label. Also, reference counts are not stored in the label,
+ * instead any feature whose reference count drops to 0 is removed from the
+ * label.
+ *
+ * Adding New Features
+ * -------------------
+ *
+ * Features must be registered in zpool_feature_init() function in
+ * zfeature_common.c using the zfeature_register() function. This function
+ * has arguments to specify if the feature should be stored in the
+ * features_for_read or features_for_write ZAP object and if it needs to be
+ * written to the label when active.
+ *
+ * Once a feature is registered it will appear as a "feature@<feature name>"
+ * property which can be set by an administrator. Feature implementors should
+ * use the spa_feature_is_enabled() and spa_feature_is_active() functions to
+ * query the state of a feature and the spa_feature_incr() and
+ * spa_feature_decr() functions to change an enabled feature's reference count.
+ * Reference counts may only be updated in the syncing context.
+ *
+ * Features may not perform enable-time initialization. Instead, any such
+ * initialization should occur when the feature is first used. This design
+ * enforces that on-disk changes be made only when features are used. Code
+ * should only check if a feature is enabled using spa_feature_is_enabled(),
+ * not by relying on any feature specific metadata existing. If a feature is
+ * enabled, but the feature's metadata is not on disk yet then it should be
+ * created as needed.
+ *
+ * As an example, consider the com.delphix:async_destroy feature. This feature
+ * relies on the existence of a bptree in the MOS that store blocks for
+ * asynchronous freeing. This bptree is not created when async_destroy is
+ * enabled. Instead, when a dataset is destroyed spa_feature_is_enabled() is
+ * called to check if async_destroy is enabled. If it is and the bptree object
+ * does not exist yet, the bptree object is created as part of the dataset
+ * destroy and async_destroy's reference count is incremented to indicate it
+ * has made an on-disk format change. Later, after the destroyed dataset's
+ * blocks have all been asynchronously freed there is no longer any use for the
+ * bptree object, so it is destroyed and async_destroy's reference count is
+ * decremented back to 0 to indicate that it has undone its on-disk format
+ * changes.
+ */
+
+typedef enum {
+ FEATURE_ACTION_ENABLE,
+ FEATURE_ACTION_INCR,
+ FEATURE_ACTION_DECR,
+} feature_action_t;
+
+/*
+ * Checks that the features active in the specified object are supported by
+ * this software. Adds each unsupported feature (name -> description) to
+ * the supplied nvlist.
+ */
+boolean_t
+feature_is_supported(objset_t *os, uint64_t obj, uint64_t desc_obj,
+ nvlist_t *unsup_feat)
+{
+ boolean_t supported;
+ zap_cursor_t zc;
+ zap_attribute_t za;
+
+ supported = B_TRUE;
+ for (zap_cursor_init(&zc, os, obj);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ ASSERT(za.za_integer_length == sizeof (uint64_t) &&
+ za.za_num_integers == 1);
+
+ if (za.za_first_integer != 0 &&
+ !zfeature_is_supported(za.za_name)) {
+ supported = B_FALSE;
+
+ if (unsup_feat != NULL) {
+ char *desc = "";
+ char buf[MAXPATHLEN];
+
+ if (zap_lookup(os, desc_obj, za.za_name,
+ 1, sizeof (buf), buf) == 0)
+ desc = buf;
+
+ VERIFY(nvlist_add_string(unsup_feat, za.za_name,
+ desc) == 0);
+ }
+ }
+ }
+ zap_cursor_fini(&zc);
+
+ return (supported);
+}
+
+static int
+feature_get_refcount(objset_t *os, uint64_t read_obj, uint64_t write_obj,
+ zfeature_info_t *feature, uint64_t *res)
+{
+ int err;
+ uint64_t refcount;
+ uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
+
+ ASSERT(0 != zapobj);
+
+ err = zap_lookup(os, zapobj, feature->fi_guid, sizeof (uint64_t), 1,
+ &refcount);
+ if (err != 0) {
+ if (err == ENOENT)
+ return (ENOTSUP);
+ else
+ return (err);
+ }
+ *res = refcount;
+ return (0);
+}
+
+static int
+feature_do_action(objset_t *os, uint64_t read_obj, uint64_t write_obj,
+ uint64_t desc_obj, zfeature_info_t *feature, feature_action_t action,
+ dmu_tx_t *tx)
+{
+ int error;
+ uint64_t refcount;
+ uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
+
+ ASSERT(0 != zapobj);
+ ASSERT(zfeature_is_valid_guid(feature->fi_guid));
+
+ error = zap_lookup(os, zapobj, feature->fi_guid,
+ sizeof (uint64_t), 1, &refcount);
+
+ /*
+ * If we can't ascertain the status of the specified feature, an I/O
+ * error occurred.
+ */
+ if (error != 0 && error != ENOENT)
+ return (error);
+
+ switch (action) {
+ case FEATURE_ACTION_ENABLE:
+ /*
+ * If the feature is already enabled, ignore the request.
+ */
+ if (error == 0)
+ return (0);
+ refcount = 0;
+ break;
+ case FEATURE_ACTION_INCR:
+ if (error == ENOENT)
+ return (ENOTSUP);
+ if (refcount == UINT64_MAX)
+ return (EOVERFLOW);
+ refcount++;
+ break;
+ case FEATURE_ACTION_DECR:
+ if (error == ENOENT)
+ return (ENOTSUP);
+ if (refcount == 0)
+ return (EOVERFLOW);
+ refcount--;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ if (action == FEATURE_ACTION_ENABLE) {
+ int i;
+
+ for (i = 0; feature->fi_depends[i] != NULL; i++) {
+ zfeature_info_t *dep = feature->fi_depends[i];
+
+ error = feature_do_action(os, read_obj, write_obj,
+ desc_obj, dep, FEATURE_ACTION_ENABLE, tx);
+ if (error != 0)
+ return (error);
+ }
+ }
+
+ error = zap_update(os, zapobj, feature->fi_guid,
+ sizeof (uint64_t), 1, &refcount, tx);
+ if (error != 0)
+ return (error);
+
+ if (action == FEATURE_ACTION_ENABLE) {
+ error = zap_update(os, desc_obj,
+ feature->fi_guid, 1, strlen(feature->fi_desc) + 1,
+ feature->fi_desc, tx);
+ if (error != 0)
+ return (error);
+ }
+
+ if (action == FEATURE_ACTION_INCR && refcount == 1 && feature->fi_mos) {
+ spa_activate_mos_feature(dmu_objset_spa(os), feature->fi_guid);
+ }
+
+ if (action == FEATURE_ACTION_DECR && refcount == 0) {
+ spa_deactivate_mos_feature(dmu_objset_spa(os),
+ feature->fi_guid);
+ }
+
+ return (0);
+}
+
+void
+spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx)
+{
+ /*
+ * We create feature flags ZAP objects in two instances: during pool
+ * creation and during pool upgrade.
+ */
+ ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on &&
+ tx->tx_txg == TXG_INITIAL));
+
+ spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset,
+ DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_FEATURES_FOR_READ, tx);
+ spa->spa_feat_for_write_obj = zap_create_link(spa->spa_meta_objset,
+ DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_FEATURES_FOR_WRITE, tx);
+ spa->spa_feat_desc_obj = zap_create_link(spa->spa_meta_objset,
+ DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_FEATURE_DESCRIPTIONS, tx);
+}
+
+/*
+ * Enable any required dependencies, then enable the requested feature.
+ */
+void
+spa_feature_enable(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
+{
+ ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
+ VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
+ spa->spa_feat_desc_obj, feature, FEATURE_ACTION_ENABLE, tx));
+}
+
+/*
+ * If the specified feature has not yet been enabled, this function returns
+ * ENOTSUP; otherwise, this function increments the feature's refcount (or
+ * returns EOVERFLOW if the refcount cannot be incremented). This function must
+ * be called from syncing context.
+ */
+void
+spa_feature_incr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
+{
+ ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
+ VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
+ spa->spa_feat_desc_obj, feature, FEATURE_ACTION_INCR, tx));
+}
+
+/*
+ * If the specified feature has not yet been enabled, this function returns
+ * ENOTSUP; otherwise, this function decrements the feature's refcount (or
+ * returns EOVERFLOW if the refcount is already 0). This function must
+ * be called from syncing context.
+ */
+void
+spa_feature_decr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
+{
+ ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
+ VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
+ spa->spa_feat_desc_obj, feature, FEATURE_ACTION_DECR, tx));
+}
+
+boolean_t
+spa_feature_is_enabled(spa_t *spa, zfeature_info_t *feature)
+{
+ int err;
+ uint64_t refcount;
+
+ if (spa_version(spa) < SPA_VERSION_FEATURES)
+ return (B_FALSE);
+
+ err = feature_get_refcount(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
+ feature, &refcount);
+ ASSERT(err == 0 || err == ENOTSUP);
+ return (err == 0);
+}
+
+boolean_t
+spa_feature_is_active(spa_t *spa, zfeature_info_t *feature)
+{
+ int err;
+ uint64_t refcount;
+
+ if (spa_version(spa) < SPA_VERSION_FEATURES)
+ return (B_FALSE);
+
+ err = feature_get_refcount(spa->spa_meta_objset,
+ spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
+ feature, &refcount);
+ ASSERT(err == 0 || err == ENOTSUP);
+ return (err == 0 && refcount > 0);
+}