PSARC/2009/297 zfs snapshot holds
authorChris Kirby <chris.kirby@sun.com>
Sat, 01 Aug 2009 15:09:50 -0600
changeset 10242 c40d075fbca6
parent 10241 356a64b58ebc
child 10243 051184299af5
PSARC/2009/297 zfs snapshot holds 6803121 want user-settable refcounts on snapshots 6851824 zfs_ioc_rename() can be called with a NULL zc_name
usr/src/cmd/ndmpd/tlm/tlm_lib.c
usr/src/cmd/truss/codes.c
usr/src/cmd/zdb/zdb.c
usr/src/cmd/zfs/zfs_iter.c
usr/src/cmd/zfs/zfs_main.c
usr/src/cmd/zoneadm/zfs.c
usr/src/cmd/zpool/zpool_main.c
usr/src/cmd/ztest/ztest.c
usr/src/common/zfs/zfs_deleg.c
usr/src/common/zfs/zfs_deleg.h
usr/src/common/zfs/zfs_namecheck.c
usr/src/common/zfs/zfs_prop.c
usr/src/grub/capability
usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h
usr/src/lib/fm/topo/libtopo/common/zfs.c
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_dataset.c
usr/src/lib/libzfs/common/libzfs_sendrecv.c
usr/src/lib/libzfs/common/libzfs_util.c
usr/src/lib/libzfs/common/mapfile-vers
usr/src/lib/libzfs_jni/common/libzfs_jni_property.c
usr/src/lib/pyzfs/Makefile.com
usr/src/lib/pyzfs/common/allow.py
usr/src/lib/pyzfs/common/dataset.py
usr/src/lib/pyzfs/common/holds.py
usr/src/lib/pyzfs/common/ioctl.c
usr/src/lib/pyzfs/common/table.py
usr/src/lib/pyzfs/common/userspace.py
usr/src/pkgdefs/SUNWzfsu/prototype_com
usr/src/uts/common/fs/zfs/dmu.c
usr/src/uts/common/fs/zfs/dmu_objset.c
usr/src/uts/common/fs/zfs/dmu_send.c
usr/src/uts/common/fs/zfs/dsl_dataset.c
usr/src/uts/common/fs/zfs/dsl_prop.c
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/sys/dmu_impl.h
usr/src/uts/common/fs/zfs/sys/dmu_objset.h
usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
usr/src/uts/common/fs/zfs/sys/dsl_deleg.h
usr/src/uts/common/fs/zfs/sys/dsl_prop.h
usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
usr/src/uts/common/fs/zfs/zfs_ctldir.c
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/sys/fs/zfs.h
--- a/usr/src/cmd/ndmpd/tlm/tlm_lib.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/cmd/ndmpd/tlm/tlm_lib.c	Sat Aug 01 15:09:50 2009 -0600
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1237,7 +1237,7 @@
 		(void) mutex_unlock(&zlib_mtx);
 		return (-1);
 	}
-	(void) zfs_destroy(zhp);
+	(void) zfs_destroy(zhp, B_FALSE);
 	zfs_close(zhp);
 	(void) mutex_unlock(&zlib_mtx);
 
--- a/usr/src/cmd/truss/codes.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/cmd/truss/codes.c	Sat Aug 01 15:09:50 2009 -0600
@@ -1103,6 +1103,12 @@
 		"zfs_cmd_t" },
 	{ (uint_t)ZFS_IOC_USERSPACE_UPGRADE,	"ZFS_IOC_USERSPACE_UPGRADE",
 		"zfs_cmd_t" },
+	{ (uint_t)ZFS_IOC_HOLD,			"ZFS_IOC_HOLD",
+		"zfs_cmd_t" },
+	{ (uint_t)ZFS_IOC_RELEASE,		"ZFS_IOC_RELEASE",
+		"zfs_cmd_t" },
+	{ (uint_t)ZFS_IOC_GET_HOLDS,		"ZFS_IOC_GET_HOLDS",
+		"zfs_cmd_t" },
 
 	/* kssl ioctls */
 	{ (uint_t)KSSL_ADD_ENTRY,		"KSSL_ADD_ENTRY",
--- a/usr/src/cmd/zdb/zdb.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/cmd/zdb/zdb.c	Sat Aug 01 15:09:50 2009 -0600
@@ -818,6 +818,8 @@
 	    (u_longlong_t)ds->ds_snapnames_zapobj);
 	(void) printf("\t\tnum_children = %llu\n",
 	    (u_longlong_t)ds->ds_num_children);
+	(void) printf("\t\tuserrefs_obj = %llu\n",
+	    (u_longlong_t)ds->ds_userrefs_obj);
 	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
 	(void) printf("\t\tcreation_txg = %llu\n",
 	    (u_longlong_t)ds->ds_creation_txg);
@@ -1049,6 +1051,7 @@
 	dump_zap,		/* DSL scrub queue		*/
 	dump_zap,		/* ZFS user/group used		*/
 	dump_zap,		/* ZFS user/group quota		*/
+	dump_zap,		/* snapshot refcount tags	*/
 };
 
 static void
--- a/usr/src/cmd/zfs/zfs_iter.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/cmd/zfs/zfs_iter.c	Sat Aug 01 15:09:50 2009 -0600
@@ -362,7 +362,7 @@
 	cb.cb_types = types;
 	cb.cb_depth_limit = limit;
 	/*
-	 * If cb_proplist is provided then in the zfs_handles created  we
+	 * If cb_proplist is provided then in the zfs_handles created we
 	 * retain only those properties listed in cb_proplist and sortcol.
 	 * The rest are pruned. So, the caller should make sure that no other
 	 * properties other than those listed in cb_proplist/sortcol are
--- a/usr/src/cmd/zfs/zfs_main.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/cmd/zfs/zfs_main.c	Sat Aug 01 15:09:50 2009 -0600
@@ -80,6 +80,8 @@
 static int zfs_do_promote(int argc, char **argv);
 static int zfs_do_userspace(int argc, char **argv);
 static int zfs_do_python(int argc, char **argv);
+static int zfs_do_hold(int argc, char **argv);
+static int zfs_do_release(int argc, char **argv);
 
 /*
  * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
@@ -121,7 +123,10 @@
 	HELP_ALLOW,
 	HELP_UNALLOW,
 	HELP_USERSPACE,
-	HELP_GROUPSPACE
+	HELP_GROUPSPACE,
+	HELP_HOLD,
+	HELP_HOLDS,
+	HELP_RELEASE
 } zfs_help_t;
 
 typedef struct zfs_command {
@@ -169,6 +174,10 @@
 	{ "allow",	zfs_do_python,		HELP_ALLOW		},
 	{ NULL },
 	{ "unallow",	zfs_do_python,		HELP_UNALLOW		},
+	{ NULL },
+	{ "hold",	zfs_do_hold,		HELP_HOLD		},
+	{ "holds",	zfs_do_python,		HELP_HOLDS		},
+	{ "release",	zfs_do_release,		HELP_RELEASE		},
 };
 
 #define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
@@ -189,7 +198,8 @@
 		    "-V <size> <volume>\n"));
 	case HELP_DESTROY:
 		return (gettext("\tdestroy [-rRf] "
-		    "<filesystem|volume|snapshot>\n"));
+		    "<filesystem|volume|snapshot>\n"
+		    "\tdestroy -d [-r] <filesystem|volume|snapshot>\n"));
 	case HELP_GET:
 		return (gettext("\tget [-rHp] [-d max] "
 		    "[-o field[,...]] [-s source[,...]]\n"
@@ -266,6 +276,12 @@
 		return (gettext("\tgroupspace [-hniHpU] [-o field[,...]] "
 		    "[-sS field] ... [-t type[,...]]\n"
 		    "\t    <filesystem|snapshot>\n"));
+	case HELP_HOLD:
+		return (gettext("\thold [-r] <tag> <snapshot> ...\n"));
+	case HELP_HOLDS:
+		return (gettext("\tholds [-r] <snapshot> ...\n"));
+	case HELP_RELEASE:
+		return (gettext("\trelease [-r] <tag> <snapshot> ...\n"));
 	}
 
 	abort();
@@ -769,11 +785,13 @@
 }
 
 /*
- * zfs destroy [-rf] <fs, snap, vol>
+ * zfs destroy [-rRf] <fs, snap, vol>
+ * zfs destroy -d [-r] <fs, snap, vol>
  *
  * 	-r	Recursively destroy all children
  * 	-R	Recursively destroy all dependents, including clones
  * 	-f	Force unmounting of any dependents
+ *	-d	If we can't destroy now, mark for deferred destruction
  *
  * Destroys the given dataset.  By default, it will unmount any filesystems,
  * and refuse to destroy a dataset that has any dependents.  A dependent can
@@ -789,6 +807,7 @@
 	boolean_t	cb_closezhp;
 	zfs_handle_t	*cb_target;
 	char		*cb_snapname;
+	boolean_t	cb_defer_destroy;
 } destroy_cbdata_t;
 
 /*
@@ -869,7 +888,7 @@
 	 * Bail out on the first error.
 	 */
 	if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0 ||
-	    zfs_destroy(zhp) != 0) {
+	    zfs_destroy(zhp, cbp->cb_defer_destroy) != 0) {
 		zfs_close(zhp);
 		return (-1);
 	}
@@ -923,8 +942,11 @@
 	char *cp;
 
 	/* check options */
-	while ((c = getopt(argc, argv, "frR")) != -1) {
+	while ((c = getopt(argc, argv, "dfrR")) != -1) {
 		switch (c) {
+		case 'd':
+			cb.cb_defer_destroy = B_TRUE;
+			break;
 		case 'f':
 			cb.cb_force = 1;
 			break;
@@ -956,6 +978,9 @@
 		usage(B_FALSE);
 	}
 
+	if (cb.cb_defer_destroy && cb.cb_doclones)
+		usage(B_FALSE);
+
 	/*
 	 * If we are doing recursive destroy of a snapshot, then the
 	 * named snapshot may not exist.  Go straight to libzfs.
@@ -977,7 +1002,7 @@
 			}
 		}
 
-		ret = zfs_destroy_snaps(zhp, cp);
+		ret = zfs_destroy_snaps(zhp, cp, cb.cb_defer_destroy);
 		zfs_close(zhp);
 		if (ret) {
 			(void) fprintf(stderr,
@@ -986,7 +1011,6 @@
 		return (ret != 0);
 	}
 
-
 	/* Open the given dataset */
 	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
 		return (1);
@@ -1014,15 +1038,15 @@
 	 * Check for any dependents and/or clones.
 	 */
 	cb.cb_first = B_TRUE;
-	if (!cb.cb_doclones &&
+	if (!cb.cb_doclones && !cb.cb_defer_destroy &&
 	    zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
 	    &cb) != 0) {
 		zfs_close(zhp);
 		return (1);
 	}
 
-	if (cb.cb_error ||
-	    zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0) {
+	if (cb.cb_error || (!cb.cb_defer_destroy &&
+	    (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0))) {
 		zfs_close(zhp);
 		return (1);
 	}
@@ -1035,7 +1059,6 @@
 	if (destroy_callback(zhp, &cb) != 0)
 		return (1);
 
-
 	return (0);
 }
 
@@ -2651,6 +2674,108 @@
 	return (err != 0);
 }
 
+static int
+zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
+{
+	int errors = 0;
+	int i;
+	const char *tag;
+	boolean_t recursive = B_FALSE;
+	int c;
+	int (*func)(zfs_handle_t *, const char *, const char *, boolean_t);
+
+	/* check options */
+	while ((c = getopt(argc, argv, "r")) != -1) {
+		switch (c) {
+		case 'r':
+			recursive = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	/* check number of arguments */
+	if (argc < 2)
+		usage(B_FALSE);
+
+	tag = argv[0];
+	--argc;
+	++argv;
+
+	if (holding) {
+		if (tag[0] == '.') {
+			/* tags starting with '.' are reserved for libzfs */
+			(void) fprintf(stderr,
+			    gettext("tag may not start with '.'\n"));
+			usage(B_FALSE);
+		}
+		func = zfs_hold;
+	} else {
+		func = zfs_release;
+	}
+
+	for (i = 0; i < argc; ++i) {
+		zfs_handle_t *zhp;
+		char parent[ZFS_MAXNAMELEN];
+		const char *delim;
+		char *path = argv[i];
+
+		delim = strchr(path, '@');
+		if (delim == NULL) {
+			(void) fprintf(stderr,
+			    gettext("'%s' is not a snapshot\n"), path);
+			++errors;
+			continue;
+		}
+		(void) strncpy(parent, path, delim - path);
+		parent[delim - path] = '\0';
+
+		zhp = zfs_open(g_zfs, parent,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (zhp == NULL) {
+			++errors;
+			continue;
+		}
+		if (func(zhp, delim+1, tag, recursive) != 0)
+			++errors;
+		zfs_close(zhp);
+	}
+
+	return (errors != 0);
+}
+
+/*
+ * zfs hold [-r] <tag> <snap> ...
+ *
+ * 	-r	Recursively hold
+ *
+ * Apply a user-hold with the given tag to the list of snapshots.
+ */
+static int
+zfs_do_hold(int argc, char **argv)
+{
+	return (zfs_do_hold_rele_impl(argc, argv, B_TRUE));
+}
+
+/*
+ * zfs release [-r] <tag> <snap> ...
+ *
+ * 	-r	Recursively release
+ *
+ * Release a user-hold with the given tag from the list of snapshots.
+ */
+static int
+zfs_do_release(int argc, char **argv)
+{
+	return (zfs_do_hold_rele_impl(argc, argv, B_FALSE));
+}
+
 typedef struct get_all_cbdata {
 	zfs_handle_t	**cb_handles;
 	size_t		cb_alloc;
--- a/usr/src/cmd/zoneadm/zfs.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/cmd/zoneadm/zfs.c	Sat Aug 01 15:09:50 2009 -0600
@@ -671,7 +671,7 @@
 	}
 
 	if (zfs_unmount(zhp, NULL, 0) == 0) {
-		(void) zfs_destroy(zhp);
+		(void) zfs_destroy(zhp, B_FALSE);
 	}
 
 	zfs_close(zhp);
@@ -961,7 +961,7 @@
 		if ((zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_SNAPSHOT))
 		    != NULL) {
 			if (zfs_unmount(zhp, NULL, 0) == 0)
-				(void) zfs_destroy(zhp);
+				(void) zfs_destroy(zhp, B_FALSE);
 			zfs_close(zhp);
 		}
 
@@ -1018,7 +1018,7 @@
 	if (zfs_mount(zhp, NULL, 0) != 0) {
 		(void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
-		(void) zfs_destroy(zhp);
+		(void) zfs_destroy(zhp, B_FALSE);
 	} else {
 		if (chmod(zonepath, S_IRWXU) != 0) {
 			(void) fprintf(stderr, gettext("file system %s "
@@ -1085,7 +1085,7 @@
 		return (Z_ERR);
 	}
 
-	if (zfs_destroy(zhp) != 0) {
+	if (zfs_destroy(zhp, B_FALSE) != 0) {
 		/*
 		 * If the destroy fails for some reason, try to remount
 		 * the file system so that we can use "rm -rf" to clean up
@@ -1118,7 +1118,7 @@
 		    ZFS_TYPE_SNAPSHOT)) != NULL) {
 			if (zfs_iter_dependents(ohp, B_TRUE, has_dependent,
 			    NULL) == 0 && zfs_unmount(ohp, NULL, 0) == 0)
-				(void) zfs_destroy(ohp);
+				(void) zfs_destroy(ohp, B_FALSE);
 			zfs_close(ohp);
 		}
 	}
--- a/usr/src/cmd/zpool/zpool_main.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/cmd/zpool/zpool_main.c	Sat Aug 01 15:09:50 2009 -0600
@@ -3552,6 +3552,7 @@
 		(void) printf(gettext(" 15  user/group space accounting\n"));
 		(void) printf(gettext(" 16  stmf property support\n"));
 		(void) printf(gettext(" 17  Triple-parity RAID-Z\n"));
+		(void) printf(gettext(" 18  snapshot user holds\n"));
 		(void) printf(gettext("For more information on a particular "
 		    "version, including supported releases, see:\n\n"));
 		(void) printf("http://www.opensolaris.org/os/community/zfs/"
@@ -3637,6 +3638,8 @@
 	"refquota set",
 	"refreservation set",
 	"pool scrub done",
+	"user hold",
+	"user release",
 };
 
 /*
--- a/usr/src/cmd/ztest/ztest.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/cmd/ztest/ztest.c	Sat Aug 01 15:09:50 2009 -0600
@@ -1387,7 +1387,7 @@
 	/*
 	 * Destroy the dataset.
 	 */
-	error = dmu_objset_destroy(name);
+	error = dmu_objset_destroy(name, B_FALSE);
 	if (error) {
 		(void) dmu_objset_open(name, DMU_OST_OTHER,
 		    DS_MODE_USER | DS_MODE_READONLY, &os);
@@ -1560,7 +1560,7 @@
 	zil_close(zilog);
 	dmu_objset_close(os);
 
-	error = dmu_objset_destroy(name);
+	error = dmu_objset_destroy(name, B_FALSE);
 	if (error)
 		fatal(0, "dmu_objset_destroy(%s) = %d", name, error);
 
@@ -1583,7 +1583,7 @@
 	(void) snprintf(snapname, 100, "%s@%llu", osname,
 	    (u_longlong_t)za->za_instance);
 
-	error = dmu_objset_destroy(snapname);
+	error = dmu_objset_destroy(snapname, B_FALSE);
 	if (error != 0 && error != ENOENT)
 		fatal(0, "dmu_objset_destroy() = %d", error);
 	error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1,
@@ -1614,19 +1614,19 @@
 	(void) snprintf(clone2name, 100, "%s/c2_%llu", osname, curval);
 	(void) snprintf(snap3name, 100, "%s@s3_%llu", clone1name, curval);
 
-	error = dmu_objset_destroy(clone2name);
+	error = dmu_objset_destroy(clone2name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
-	error = dmu_objset_destroy(snap3name);
+	error = dmu_objset_destroy(snap3name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error);
-	error = dmu_objset_destroy(snap2name);
+	error = dmu_objset_destroy(snap2name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
-	error = dmu_objset_destroy(clone1name);
+	error = dmu_objset_destroy(clone1name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
-	error = dmu_objset_destroy(snap1name);
+	error = dmu_objset_destroy(snap1name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
 }
--- a/usr/src/common/zfs/zfs_deleg.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/common/zfs/zfs_deleg.c	Sat Aug 01 15:09:50 2009 -0600
@@ -67,6 +67,8 @@
 	{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
 	{ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
 	{ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
+	{ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD },
+	{ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE },
 	{NULL, ZFS_DELEG_NOTE_NONE }
 };
 
--- a/usr/src/common/zfs/zfs_deleg.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/common/zfs/zfs_deleg.h	Sat Aug 01 15:09:50 2009 -0600
@@ -61,6 +61,8 @@
 	ZFS_DELEG_NOTE_GROUPQUOTA,
 	ZFS_DELEG_NOTE_USERUSED,
 	ZFS_DELEG_NOTE_GROUPUSED,
+	ZFS_DELEG_NOTE_HOLD,
+	ZFS_DELEG_NOTE_RELEASE,
 	ZFS_DELEG_NOTE_NONE
 } zfs_deleg_note_t;
 
--- a/usr/src/common/zfs/zfs_namecheck.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/common/zfs/zfs_namecheck.c	Sat Aug 01 15:09:50 2009 -0600
@@ -59,7 +59,7 @@
  * Snapshot names must be made up of alphanumeric characters plus the following
  * characters:
  *
- * 	[-_.:]
+ * 	[-_.: ]
  */
 int
 snapshot_namecheck(const char *path, namecheck_err_t *why, char *what)
--- a/usr/src/common/zfs/zfs_prop.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/common/zfs/zfs_prop.c	Sat Aug 01 15:09:50 2009 -0600
@@ -235,6 +235,9 @@
 	/* readonly index (boolean) properties */
 	register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
 	    ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table);
+	register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
+	    PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY",
+	    boolean_table);
 
 	/* set once index properties */
 	register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
@@ -286,6 +289,8 @@
 	register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0,
 	    PROP_READONLY,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV");
+	register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
+	    ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS");
 
 	/* default number properties */
 	register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
--- a/usr/src/grub/capability	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/grub/capability	Sat Aug 01 15:09:50 2009 -0600
@@ -40,7 +40,7 @@
 # This file and the associated version are Solaris specific and are
 # not a part of the open source distribution of GRUB.
 #
-VERSION=10
+VERSION=11
 dboot
 xVM
 zfs
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Sat Aug 01 15:09:50 2009 -0600
@@ -27,7 +27,7 @@
 /*
  * On-disk version number.
  */
-#define	SPA_VERSION			17ULL
+#define	SPA_VERSION			18ULL
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
--- a/usr/src/lib/fm/topo/libtopo/common/zfs.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/fm/topo/libtopo/common/zfs.c	Sat Aug 01 15:09:50 2009 -0600
@@ -45,7 +45,7 @@
 
 static int zfs_enum(topo_mod_t *, tnode_t *, const char *, topo_instance_t,
     topo_instance_t, void *, void *);
-static void zfs_release(topo_mod_t *, tnode_t *);
+static void zfs_rele(topo_mod_t *, tnode_t *);
 static int zfs_fmri_nvl2str(topo_mod_t *, tnode_t *, topo_version_t,
     nvlist_t *, nvlist_t **);
 
@@ -56,7 +56,7 @@
 };
 
 static const topo_modops_t zfs_ops =
-	{ zfs_enum, zfs_release };
+	{ zfs_enum, zfs_rele };
 static const topo_modinfo_t zfs_info =
 	{ ZFS, FM_FMRI_SCHEME_ZFS, ZFS_VERSION, &zfs_ops };
 
@@ -109,7 +109,7 @@
 
 /*ARGSUSED*/
 static void
-zfs_release(topo_mod_t *mp, tnode_t *node)
+zfs_rele(topo_mod_t *mp, tnode_t *node)
 {
 	topo_method_unregister_all(mp, node);
 }
--- a/usr/src/lib/libzfs/common/libzfs.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs.h	Sat Aug 01 15:09:50 2009 -0600
@@ -117,6 +117,8 @@
 	EZFS_NOTSUP,		/* ops not supported on this dataset */
 	EZFS_ACTIVE_SPARE,	/* pool has active shared spare devices */
 	EZFS_UNPLAYED_LOGS,	/* log device has unplayed logs */
+	EZFS_REFTAG_RELE,	/* snapshot release: tag not found */
+	EZFS_REFTAG_HOLD,	/* snapshot hold: tag already exists */
 	EZFS_UNKNOWN
 };
 
@@ -455,8 +457,8 @@
 extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
     nvlist_t *);
 extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
-extern int zfs_destroy(zfs_handle_t *);
-extern int zfs_destroy_snaps(zfs_handle_t *, char *);
+extern int zfs_destroy(zfs_handle_t *, boolean_t);
+extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
 extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
 extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
 extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
@@ -464,6 +466,8 @@
 extern int zfs_send(zfs_handle_t *, const char *, const char *,
     boolean_t, boolean_t, boolean_t, boolean_t, int);
 extern int zfs_promote(zfs_handle_t *);
+extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t);
+extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
 
 typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
     uid_t rid, uint64_t space);
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c	Sat Aug 01 15:09:50 2009 -0600
@@ -2766,7 +2766,7 @@
  * isn't mounted, and that there are no active dependents.
  */
 int
-zfs_destroy(zfs_handle_t *zhp)
+zfs_destroy(zfs_handle_t *zhp, boolean_t defer)
 {
 	zfs_cmd_t zc = { 0 };
 
@@ -2790,6 +2790,7 @@
 		zc.zc_objset_type = DMU_OST_ZFS;
 	}
 
+	zc.zc_defer_destroy = defer;
 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY, &zc) != 0) {
 		return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
@@ -2846,7 +2847,7 @@
  * Destroys all snapshots with the given name in zhp & descendants.
  */
 int
-zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
+zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
 {
 	zfs_cmd_t zc = { 0 };
 	int ret;
@@ -2863,6 +2864,7 @@
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
+	zc.zc_defer_destroy = defer;
 
 	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS, &zc);
 	if (ret != 0) {
@@ -3278,7 +3280,7 @@
 
 			logstr = zhp->zfs_hdl->libzfs_log_str;
 			zhp->zfs_hdl->libzfs_log_str = NULL;
-			cbp->cb_error |= zfs_destroy(zhp);
+			cbp->cb_error |= zfs_destroy(zhp, B_FALSE);
 			zhp->zfs_hdl->libzfs_log_str = logstr;
 		}
 	} else {
@@ -3292,7 +3294,7 @@
 			zfs_close(zhp);
 			return (0);
 		}
-		if (zfs_destroy(zhp) != 0)
+		if (zfs_destroy(zhp, B_FALSE) != 0)
 			cbp->cb_error = B_TRUE;
 		else
 			changelist_remove(clp, zhp->zfs_name);
@@ -4092,3 +4094,79 @@
 
 	return (error);
 }
+
+int
+zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
+    boolean_t recursive)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
+	(void) strlcpy(zc.zc_string, tag, sizeof (zc.zc_string));
+	zc.zc_cookie = recursive;
+
+	if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
+		char errbuf[ZFS_MAXNAMELEN+32];
+
+		/*
+		 * if it was recursive, the one that actually failed will be in
+		 * zc.zc_name.
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot hold '%s@%s'"), zc.zc_name, snapname);
+		switch (errno) {
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded"));
+			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+		case EINVAL:
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+		case EEXIST:
+			return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf));
+		default:
+			return (zfs_standard_error_fmt(hdl, errno, errbuf));
+		}
+	}
+
+	return (0);
+}
+
+int
+zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
+    boolean_t recursive)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
+	(void) strlcpy(zc.zc_string, tag, sizeof (zc.zc_string));
+	zc.zc_cookie = recursive;
+
+	if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) {
+		char errbuf[ZFS_MAXNAMELEN+32];
+
+		/*
+		 * if it was recursive, the one that actually failed will be in
+		 * zc.zc_name.
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot release '%s@%s'"), zc.zc_name, snapname);
+		switch (errno) {
+		case ESRCH:
+			return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf));
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded"));
+			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+		case EINVAL:
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+		default:
+			return (zfs_standard_error_fmt(hdl, errno, errbuf));
+		}
+	}
+
+	return (0);
+}
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Sat Aug 01 15:09:50 2009 -0600
@@ -921,11 +921,12 @@
 	if (err)
 		return (err);
 
+	zc.zc_objset_type = DMU_OST_ZFS;
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
 	if (tryname) {
 		(void) strcpy(newname, tryname);
 
-		zc.zc_objset_type = DMU_OST_ZFS;
-		(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
 		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
 
 		if (flags.verbose) {
@@ -980,12 +981,18 @@
 	int err = 0;
 	prop_changelist_t *clp;
 	zfs_handle_t *zhp;
+	boolean_t defer = B_FALSE;
+	int spa_version;
 
 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
 	if (zhp == NULL)
 		return (-1);
 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
 	    flags.force ? MS_FORCE : 0);
+	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
+	    zfs_spa_version(zhp, &spa_version) == 0 &&
+	    spa_version >= SPA_VERSION_USERREFS)
+		defer = B_TRUE;
 	zfs_close(zhp);
 	if (clp == NULL)
 		return (-1);
@@ -994,12 +1001,12 @@
 		return (err);
 
 	zc.zc_objset_type = DMU_OST_ZFS;
+	zc.zc_defer_destroy = defer;
 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
 
 	if (flags.verbose)
 		(void) printf("attempting destroy %s\n", zc.zc_name);
 	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
-
 	if (err == 0) {
 		if (flags.verbose)
 			(void) printf("success\n");
@@ -1009,7 +1016,12 @@
 	(void) changelist_postfix(clp);
 	changelist_free(clp);
 
-	if (err != 0)
+	/*
+	 * Deferred destroy should always succeed. Since we can't tell
+	 * if it destroyed the dataset or just marked it for deferred
+	 * destroy, always do the rename just in case.
+	 */
+	if (err != 0 || defer)
 		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
 
 	return (err);
--- a/usr/src/lib/libzfs/common/libzfs_util.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_util.c	Sat Aug 01 15:09:50 2009 -0600
@@ -213,6 +213,11 @@
 	case EZFS_UNPLAYED_LOGS:
 		return (dgettext(TEXT_DOMAIN, "log device has unplayed intent "
 		    "logs"));
+	case EZFS_REFTAG_RELE:
+		return (dgettext(TEXT_DOMAIN, "no such tag on this dataset"));
+	case EZFS_REFTAG_HOLD:
+		return (dgettext(TEXT_DOMAIN, "tag already exists on this "
+		    "dataset"));
 	case EZFS_UNKNOWN:
 		return (dgettext(TEXT_DOMAIN, "unknown error"));
 	default:
--- a/usr/src/lib/libzfs/common/mapfile-vers	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/libzfs/common/mapfile-vers	Sat Aug 01 15:09:50 2009 -0600
@@ -61,6 +61,7 @@
 	zfs_get_pool_handle;
 	zfs_get_user_props;
 	zfs_get_type;
+	zfs_hold;
 	zfs_iscsi_perm_check;
 	zfs_is_mounted;
 	zfs_is_shared;
@@ -105,6 +106,7 @@
 	zfs_prune_proplist;
 	zfs_receive;
 	zfs_refresh_properties;
+	zfs_release;
 	zfs_rename;
 	zfs_rollback;
 	zfs_send;
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_property.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_property.c	Sat Aug 01 15:09:50 2009 -0600
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include "libzfs_jni_property.h"
 #include "libzfs_jni_util.h"
 #include <strings.h>
@@ -89,6 +87,7 @@
 	ZFS_PROP_READONLY,
 	ZFS_PROP_SETUID,
 	ZFS_PROP_ZONED,
+	ZFS_PROP_DEFER_DESTROY,
 	ZPROP_INVAL
 };
 
@@ -102,6 +101,7 @@
 	ZFS_PROP_VOLSIZE,
 	ZFS_PROP_REFQUOTA,
 	ZFS_PROP_REFRESERVATION,
+	ZFS_PROP_USERREFS,
 	ZPROP_INVAL
 };
 
--- a/usr/src/lib/pyzfs/Makefile.com	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/pyzfs/Makefile.com	Sat Aug 01 15:09:50 2009 -0600
@@ -29,7 +29,7 @@
 
 PYSRCS=		__init__.py util.py dataset.py \
 	allow.py unallow.py \
-	userspace.py groupspace.py
+	userspace.py groupspace.py holds.py table.py
 
 
 include ../../Makefile.lib
--- a/usr/src/lib/pyzfs/common/allow.py	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/pyzfs/common/allow.py	Sat Aug 01 15:09:50 2009 -0600
@@ -217,6 +217,8 @@
     mount=_("Allows mount/umount of ZFS datasets"),
     share=_("Allows sharing file systems over NFS or SMB\n\t\t\t\tprotocols"),
     send="",
+    hold=_("Allows adding a user hold to a snapshot"),
+    release=_("Allows releasing a user hold which\n\t\t\t\tmight destroy the snapshot"),
 )
 
 perms_other = dict(
@@ -265,7 +267,7 @@
 			print(fmt % (name, _("property"), ""))
 
 def do_allow():
-	"""Implementes the "zfs allow" and "zfs unallow" subcommands."""
+	"""Implements the "zfs allow" and "zfs unallow" subcommands."""
 	un = (sys.argv[1] == "unallow")
 
 	def usage(msg=None):
--- a/usr/src/lib/pyzfs/common/dataset.py	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/pyzfs/common/dataset.py	Sat Aug 01 15:09:50 2009 -0600
@@ -109,7 +109,7 @@
 
 		types is an iterable of strings specifying which types
 		of datasets are permitted.  Accepted strings are
-		"filesystem" and "volume".  Defaults to acceptying all
+		"filesystem" and "volume".  Defaults to accepting all
 		types.
 
 		snaps is a boolean specifying if snapshots are acceptable.
@@ -203,3 +203,29 @@
 		Return a dict("whostr": { "perm" -> None })."""
 
 		return zfs.ioctl.get_fsacl(self.name)
+
+	def get_holds(self):
+		"""Get the user holds on this Dataset.
+
+		Return a dict("tag": timestamp)."""
+
+		return zfs.ioctl.get_holds(self.name)
+
+def snapshots_fromcmdline(dsnames, recursive):
+	for dsname in dsnames:
+		ds = Dataset(dsname)
+		if not "@" in dsname:
+			raise zfs.util.ZFSError(errno.EINVAL,
+			    _("cannot open %s") % dsname,
+			    _("operation only applies to snapshots"))
+		yield ds
+		if recursive:
+			(base, snapname) = dsname.split('@')
+			parent = Dataset(base)
+			for child in parent.descendents():
+				try:
+					yield Dataset(child.name + "@" +
+					    snapname)
+				except zfs.util.ZFSError, e:
+					if e.errno != errno.ENOENT:
+						raise
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/holds.py	Sat Aug 01 15:09:50 2009 -0600
@@ -0,0 +1,72 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+"""This module implements the "zfs holds" subcommand.
+The only public interface is the zfs.holds.do_holds() function."""
+
+import optparse
+import sys
+import errno
+import time
+import zfs.util
+import zfs.dataset
+import zfs.table
+
+_ = zfs.util._
+
+def do_holds():
+	"""Implements the "zfs holds" subcommand."""
+	def usage(msg=None):
+		parser.print_help()
+		if msg:
+			print
+			parser.exit("zfs: error: " + msg)
+		else:
+			parser.exit()
+
+	u = _("""holds [-r] <snapshot> ...""")
+
+	parser = optparse.OptionParser(usage=u, prog="zfs")
+
+	parser.add_option("-r", action="store_true", dest="recursive",
+	    help=_("list holds recursively"))
+
+	(options, args) = parser.parse_args(sys.argv[2:])
+
+	if len(args) < 1:
+		usage(_("missing snapshot argument"))
+
+	fields = ("name", "tag", "timestamp")
+	rjustfields = ()
+	printing = False 
+	t = zfs.table.Table(fields, rjustfields) 
+	for ds in zfs.dataset.snapshots_fromcmdline(args, options.recursive):
+		for tag, tm in ds.get_holds().iteritems():
+			val = {"name": ds.name, "tag": tag,
+			    "timestamp": time.ctime(tm)}
+			t.addline(ds.name, val)
+			printing = True
+	if printing:
+		t.printme()
--- a/usr/src/lib/pyzfs/common/ioctl.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/pyzfs/common/ioctl.c	Sat Aug 01 15:09:50 2009 -0600
@@ -350,6 +350,25 @@
 }
 
 static PyObject *
+py_get_holds(PyObject *self, PyObject *args)
+{
+	zfs_cmd_t zc = { 0 };
+	char *name;
+	PyObject *nvl;
+
+	if (!PyArg_ParseTuple(args, "s", &name))
+		return (NULL);
+
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
+	nvl = ioctl_with_dstnv(ZFS_IOC_GET_HOLDS, &zc);
+	if (nvl == NULL)
+		seterr(_("cannot get holds for %s"), name);
+
+	return (nvl);
+}
+
+static PyObject *
 py_userspace_many(PyObject *self, PyObject *args)
 {
 	zfs_cmd_t zc = { 0 };
@@ -582,6 +601,7 @@
 	    "Map SID to name@domain."},
 	{"isglobalzone", py_isglobalzone, METH_NOARGS,
 	    "Determine if this is the global zone."},
+	{"get_holds", py_get_holds, METH_VARARGS, "Get user holds."},
 	{NULL, NULL, 0, NULL}
 };
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/table.py	Sat Aug 01 15:09:50 2009 -0600
@@ -0,0 +1,71 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+import zfs.util
+
+class Table:
+	__slots__ = "fields", "rjustfields", "maxfieldlen", "lines"
+	__repr__ = zfs.util.default_repr
+
+	def __init__(self, fields, rjustfields=()):
+		# XXX maybe have a defaults, too?
+		self.fields = fields
+		self.rjustfields = rjustfields
+		self.maxfieldlen = dict.fromkeys(fields, 0)
+		self.lines = list()
+	
+	def __updatemax(self, k, v):
+		self.maxfieldlen[k] = max(self.maxfieldlen.get(k, None), v)
+
+	def addline(self, sortkey, values):
+		"""values is a dict from field name to value"""
+
+		va = list()
+		for f in self.fields:
+			v = str(values[f])
+			va.append(v)
+			self.__updatemax(f, len(v))
+		self.lines.append((sortkey, va))
+
+	def printme(self, headers=True):
+		if headers:
+			d = dict([(f, f.upper()) for f in self.fields])
+			self.addline(None, d)
+
+		self.lines.sort()
+		for (k, va) in self.lines:
+			line = str()
+			for i in range(len(self.fields)):
+				if not headers:
+					line += va[i]
+					line += "\t"
+				else:
+					if self.fields[i] in self.rjustfields:
+						fmt = "%*s  "
+					else:
+						fmt = "%-*s  "
+					mfl = self.maxfieldlen[self.fields[i]]
+					line += fmt % (mfl, va[i])
+			print(line)
--- a/usr/src/lib/pyzfs/common/userspace.py	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/lib/pyzfs/common/userspace.py	Sat Aug 01 15:09:50 2009 -0600
@@ -26,14 +26,15 @@
 """This module implements the "zfs userspace" and "zfs groupspace" subcommands.
 The only public interface is the zfs.userspace.do_userspace() function."""
 
-import zfs.util
-import zfs.ioctl
-import zfs.dataset
 import optparse
 import sys
 import pwd
 import grp
 import errno
+import zfs.util
+import zfs.ioctl
+import zfs.dataset
+import zfs.table
 
 _ = zfs.util._
 
@@ -58,9 +59,6 @@
 		return True
 	return False
 
-def updatemax(d, k, v):
-	d[k] = max(d.get(k, None), v)
-
 def new_entry(options, isgroup, domain, rid):
 	"""Return a dict("field": value) for this domain (string) + rid (int)"""
 
@@ -102,8 +100,8 @@
 	v["quota.sort"] = 0
 	return v
 
-def process_one_raw(acct, maxfieldlen, options, prop, elem):
-	"""Update the acct and maxfieldlen dicts to incorporate the
+def process_one_raw(acct, options, prop, elem):
+	"""Update the acct dict to incorporate the
 	information from this elem from Dataset.userspace(prop)."""
 
 	(domain, rid, value) = elem
@@ -134,10 +132,6 @@
 		v[field] = str(value)
 	else:
 		v[field] = zfs.util.nicenum(value)
-	for k in v.keys():
-		# some of the .sort fields are integers, so have no len()
-		if isinstance(v[k], str):
-			updatemax(maxfieldlen, k, len(v[k]))
 
 def do_userspace():
 	"""Implements the "zfs userspace" and "zfs groupspace" subcommands."""
@@ -156,7 +150,7 @@
 		defaulttypes = "posixgroup,smbgroup"
 
 	fields = ("type", "name", "used", "quota")
-	ljustfields = ("type", "name")
+	rjustfields = ("used", "quota")
 	types = ("all", "posixuser", "smbuser", "posixgroup", "smbgroup")
 
 	u = _("%s [-niHp] [-o field[,...]] [-sS field] ... \n") % sys.argv[1]
@@ -216,31 +210,16 @@
 		print(_("Initializing accounting information on old filesystem, please wait..."))
 		ds.userspace_upgrade()
 
+	# gather and process accounting information
+	# Due to -i, we need to keep a dict, so we can potentially add
+	# together the posix ID and SID's usage.  Grr.
 	acct = dict()
-	maxfieldlen = dict()
-
-	# gather and process accounting information
 	for prop in props.keys():
 		if skiptype(options, prop):
 			continue;
 		for elem in ds.userspace(prop):
-			process_one_raw(acct, maxfieldlen, options, prop, elem)
-
-	# print out headers
-	if not options.noheaders:
-		line = str()
-		for field in options.fields:
-			# make sure the field header will fit
-			updatemax(maxfieldlen, field, len(field))
+			process_one_raw(acct, options, prop, elem)
 
-			if field in ljustfields:
-				fmt = "%-*s  "
-			else:
-				fmt = "%*s  "
-			line += fmt % (maxfieldlen[field], field.upper())
-		print(line)
-
-	# custom sorting func
 	def cmpkey(val):
 		l = list()
 		for (opt, field) in options.sortfields:
@@ -261,17 +240,7 @@
 			l.append(n)
 		return l
 
-	# print out data lines
-	for val in sorted(acct.itervalues(), key=cmpkey):
-		line = str()
-		for field in options.fields:
-			if options.noheaders:
-				line += val[field]
-				line += "\t"
-			else:
-				if field in ljustfields:
-					fmt = "%-*s  "
-				else:
-					fmt = "%*s  "
-				line += fmt % (maxfieldlen[field], val[field])
-		print(line)
+	t = zfs.table.Table(options.fields, rjustfields)
+	for val in acct.itervalues():
+		t.addline(cmpkey(val), val)
+	t.printme(not options.noheaders)
--- a/usr/src/pkgdefs/SUNWzfsu/prototype_com	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/pkgdefs/SUNWzfsu/prototype_com	Sat Aug 01 15:09:50 2009 -0600
@@ -69,7 +69,11 @@
 f none usr/lib/python2.4/vendor-packages/zfs/dataset.pyc 644 root bin
 f none usr/lib/python2.4/vendor-packages/zfs/groupspace.py 644 root bin
 f none usr/lib/python2.4/vendor-packages/zfs/groupspace.pyc 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/holds.py 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/holds.pyc 644 root bin
 f none usr/lib/python2.4/vendor-packages/zfs/ioctl.so 755 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/table.py 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/table.pyc 644 root bin
 f none usr/lib/python2.4/vendor-packages/zfs/unallow.py 644 root bin
 f none usr/lib/python2.4/vendor-packages/zfs/unallow.pyc 644 root bin
 f none usr/lib/python2.4/vendor-packages/zfs/userspace.py 644 root bin
--- a/usr/src/uts/common/fs/zfs/dmu.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Sat Aug 01 15:09:50 2009 -0600
@@ -87,6 +87,7 @@
 	{	zap_byteswap,		TRUE,	"scrub work queue"	},
 	{	zap_byteswap,		TRUE,	"ZFS user/group used"	},
 	{	zap_byteswap,		TRUE,	"ZFS user/group quota"	},
+	{	zap_byteswap,		TRUE,	"snapshot refcount tags"},
 };
 
 int
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c	Sat Aug 01 15:09:50 2009 -0600
@@ -679,7 +679,7 @@
 }
 
 int
-dmu_objset_destroy(const char *name)
+dmu_objset_destroy(const char *name, boolean_t defer)
 {
 	objset_t *os;
 	int error;
@@ -696,7 +696,7 @@
 		dsl_dataset_t *ds = os->os->os_dsl_dataset;
 		zil_destroy(dmu_objset_zil(os), B_FALSE);
 
-		error = dsl_dataset_destroy(ds, os);
+		error = dsl_dataset_destroy(ds, os, defer);
 		/*
 		 * dsl_dataset_destroy() closes the ds.
 		 */
--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Sat Aug 01 15:09:50 2009 -0600
@@ -393,6 +393,7 @@
 	dsl_dataset_t *ds = arg1;
 	struct recvbeginsyncarg *rbsa = arg2;
 	int err;
+	struct dsl_ds_destroyarg dsda = {0};
 
 	/* must be a head ds */
 	if (ds->ds_phys->ds_next_snap_obj != 0)
@@ -402,7 +403,8 @@
 	if (dsl_dir_is_clone(ds->ds_dir))
 		return (EINVAL);
 
-	err = dsl_dataset_destroy_check(ds, rbsa->tag, tx);
+	dsda.ds = ds;
+	err = dsl_dataset_destroy_check(&dsda, rbsa->tag, tx);
 	if (err)
 		return (err);
 
@@ -427,13 +429,16 @@
 	dsl_dir_t *dd = ds->ds_dir;
 	uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
 	uint64_t dsobj;
+	struct dsl_ds_destroyarg dsda = {0};
 
 	/*
 	 * NB: caller must provide an extra hold on the dsl_dir_t, so it
 	 * won't go away when dsl_dataset_destroy_sync() closes the
 	 * dataset.
 	 */
-	dsl_dataset_destroy_sync(ds, rbsa->tag, cr, tx);
+	dsda.ds = ds;
+	dsl_dataset_destroy_sync(&dsda, rbsa->tag, cr, tx);
+	ASSERT3P(dsda.rm_origin, ==, NULL);
 
 	dsobj = dsl_dataset_create_sync_dd(dd, rbsa->origin, flags, tx);
 
@@ -1028,7 +1033,8 @@
 		 */
 		txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0);
 
-		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag);
+		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
+		    B_FALSE);
 		if (drc->drc_real_ds != drc->drc_logical_ds) {
 			mutex_exit(&drc->drc_logical_ds->ds_recvlock);
 			dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag);
@@ -1099,7 +1105,8 @@
 			dsl_dataset_rele(ds, dmu_recv_tag);
 		}
 		/* dsl_dataset_destroy() will disown the ds */
-		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag);
+		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
+		    B_FALSE);
 		mutex_exit(&drc->drc_logical_ds->ds_recvlock);
 		if (err)
 			return (err);
@@ -1114,7 +1121,8 @@
 	if (err) {
 		if (drc->drc_newfs) {
 			ASSERT(ds == drc->drc_real_ds);
-			(void) dsl_dataset_destroy(ds, dmu_recv_tag);
+			(void) dsl_dataset_destroy(ds, dmu_recv_tag,
+			    B_FALSE);
 			return (err);
 		} else {
 			(void) dsl_dataset_rollback(ds, DMU_OST_NONE);
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Sat Aug 01 15:09:50 2009 -0600
@@ -39,6 +39,7 @@
 #include <sys/spa.h>
 #include <sys/zfs_znode.h>
 #include <sys/sunddi.h>
+#include <sys/zvol.h>
 
 static char *dsl_reaper = "the grim reaper";
 
@@ -409,8 +410,15 @@
 					dsl_dataset_rele(origin, FTAG);
 				}
 			}
-		} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
-			err = dsl_dataset_get_snapname(ds);
+		} else {
+			if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
+				err = dsl_dataset_get_snapname(ds);
+			if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) {
+				err = zap_count(
+				    ds->ds_dir->dd_pool->dp_meta_objset,
+				    ds->ds_phys->ds_userrefs_obj,
+				    &ds->ds_userrefs);
+			}
 		}
 
 		if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
@@ -849,6 +857,7 @@
 	dsl_sync_task_group_t *dstg;
 	char *snapname;
 	char *failed;
+	boolean_t defer;
 };
 
 static int
@@ -856,23 +865,30 @@
 {
 	struct destroyarg *da = arg;
 	dsl_dataset_t *ds;
-	char *cp;
 	int err;
-
-	(void) strcat(name, "@");
-	(void) strcat(name, da->snapname);
-	err = dsl_dataset_own(name, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
+	char *dsname;
+	size_t buflen;
+
+	/* alloc a buffer to hold name@snapname, plus the terminating NULL */
+	buflen = strlen(name) + strlen(da->snapname) + 2;
+	dsname = kmem_alloc(buflen, KM_SLEEP);
+	(void) snprintf(dsname, buflen, "%s@%s", name, da->snapname);
+	err = dsl_dataset_own(dsname, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
 	    da->dstg, &ds);
-	cp = strchr(name, '@');
-	*cp = '\0';
+	kmem_free(dsname, buflen);
 	if (err == 0) {
+		struct dsl_ds_destroyarg *dsda;
+
 		dsl_dataset_make_exclusive(ds, da->dstg);
 		if (ds->ds_user_ptr) {
 			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
 			ds->ds_user_ptr = NULL;
 		}
+		dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP);
+		dsda->ds = ds;
+		dsda->defer = da->defer;
 		dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
-		    dsl_dataset_destroy_sync, ds, da->dstg, 0);
+		    dsl_dataset_destroy_sync, dsda, da->dstg, 0);
 	} else if (err == ENOENT) {
 		err = 0;
 	} else {
@@ -886,7 +902,7 @@
  */
 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
 int
-dsl_snapshots_destroy(char *fsname, char *snapname)
+dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer)
 {
 	int err;
 	struct destroyarg da;
@@ -899,6 +915,7 @@
 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 	da.snapname = snapname;
 	da.failed = fsname;
+	da.defer = defer;
 
 	err = dmu_objset_find(fsname,
 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
@@ -908,7 +925,9 @@
 
 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
-		dsl_dataset_t *ds = dst->dst_arg1;
+		struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
+		dsl_dataset_t *ds = dsda->ds;
+
 		/*
 		 * Return the file system name that triggered the error
 		 */
@@ -916,7 +935,9 @@
 			dsl_dataset_name(ds, fsname);
 			*strchr(fsname, '@') = '\0';
 		}
+		ASSERT3P(dsda->rm_origin, ==, NULL);
 		dsl_dataset_disown(ds, da.dstg);
+		kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
 	}
 
 	dsl_sync_task_group_destroy(da.dstg);
@@ -924,18 +945,100 @@
 	return (err);
 }
 
+static boolean_t
+dsl_dataset_might_destroy_origin(dsl_dataset_t *ds)
+{
+	boolean_t might_destroy = B_FALSE;
+
+	mutex_enter(&ds->ds_lock);
+	if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 &&
+	    DS_IS_DEFER_DESTROY(ds))
+		might_destroy = B_TRUE;
+	mutex_exit(&ds->ds_lock);
+
+	return (might_destroy);
+}
+
+#ifdef _KERNEL
+static int
+dsl_dataset_zvol_cleanup(dsl_dataset_t *ds, const char *name)
+{
+	int error;
+	objset_t *os;
+
+	error = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
+	if (error)
+		return (error);
+
+	if (dmu_objset_type(os) == DMU_OST_ZVOL)
+		error = zvol_remove_minor(name);
+	dmu_objset_close(os);
+
+	return (error);
+}
+#endif
+
+/*
+ * If we're removing a clone, and these three conditions are true:
+ *	1) the clone's origin has no other children
+ *	2) the clone's origin has no user references
+ *	3) the clone's origin has been marked for deferred destruction
+ * Then, prepare to remove the origin as part of this sync task group.
+ */
+static int
+dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag)
+{
+	dsl_dataset_t *ds = dsda->ds;
+	dsl_dataset_t *origin = ds->ds_prev;
+
+	if (dsl_dataset_might_destroy_origin(origin)) {
+		char *name;
+		int namelen;
+		int error;
+
+		namelen = dsl_dataset_namelen(origin) + 1;
+		name = kmem_alloc(namelen, KM_SLEEP);
+		dsl_dataset_name(origin, name);
+#ifdef _KERNEL
+		error = zfs_unmount_snap(name, NULL);
+		if (error) {
+			kmem_free(name, namelen);
+			return (error);
+		}
+		error = dsl_dataset_zvol_cleanup(origin, name);
+		if (error) {
+			kmem_free(name, namelen);
+			return (error);
+		}
+#endif
+		error = dsl_dataset_own(name,
+		    DS_MODE_READONLY | DS_MODE_INCONSISTENT,
+		    tag, &origin);
+		kmem_free(name, namelen);
+		if (error)
+			return (error);
+		dsda->rm_origin = origin;
+		dsl_dataset_make_exclusive(origin, tag);
+	}
+
+	return (0);
+}
+
 /*
  * ds must be opened as OWNER.  On return (whether successful or not),
  * ds will be closed and caller can no longer dereference it.
  */
 int
-dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
+dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
 {
 	int err;
 	dsl_sync_task_group_t *dstg;
 	objset_t *os;
 	dsl_dir_t *dd;
 	uint64_t obj;
+	struct dsl_ds_destroyarg dsda = {0};
+
+	dsda.ds = ds;
 
 	if (dsl_dataset_is_snapshot(ds)) {
 		/* Destroying a snapshot is simpler */
@@ -945,9 +1048,12 @@
 			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
 			ds->ds_user_ptr = NULL;
 		}
+		/* NOTE: defer is always B_FALSE for non-snapshots */
+		dsda.defer = defer;
 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
-		    ds, tag, 0);
+		    &dsda, tag, 0);
+		ASSERT3P(dsda.rm_origin, ==, NULL);
 		goto out;
 	}
 
@@ -1028,13 +1134,45 @@
 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
 		ds->ds_user_ptr = NULL;
 	}
-	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
-	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
-	    dsl_dataset_destroy_sync, ds, tag, 0);
-	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
-	    dsl_dir_destroy_sync, dd, FTAG, 0);
-	err = dsl_sync_task_group_wait(dstg);
-	dsl_sync_task_group_destroy(dstg);
+
+	/*
+	 * If we're removing a clone, we might also need to remove its
+	 * origin.
+	 */
+	do {
+		dsda.need_prep = B_FALSE;
+		if (dsl_dir_is_clone(dd)) {
+			err = dsl_dataset_origin_rm_prep(&dsda, tag);
+			if (err) {
+				dsl_dir_close(dd, FTAG);
+				goto out;
+			}
+		}
+
+		dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
+		dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
+		    dsl_dataset_destroy_sync, &dsda, tag, 0);
+		dsl_sync_task_create(dstg, dsl_dir_destroy_check,
+		    dsl_dir_destroy_sync, dd, FTAG, 0);
+		err = dsl_sync_task_group_wait(dstg);
+		dsl_sync_task_group_destroy(dstg);
+
+		/*
+		 * We could be racing against 'zfs release' or 'zfs destroy -d'
+		 * on the origin snap, in which case we can get EBUSY if we
+		 * needed to destroy the origin snap but were not ready to
+		 * do so.
+		 */
+		if (dsda.need_prep) {
+			ASSERT(err == EBUSY);
+			ASSERT(dsl_dir_is_clone(dd));
+			ASSERT(dsda.rm_origin == NULL);
+		}
+	} while (dsda.need_prep);
+
+	if (dsda.rm_origin != NULL)
+		dsl_dataset_disown(dsda.rm_origin, tag);
+
 	/* if it is successful, dsl_dir_destroy_sync will close the dd */
 	if (err)
 		dsl_dir_close(dd, FTAG);
@@ -1391,18 +1529,63 @@
 	    cr, "dataset = %llu", ds->ds_object);
 }
 
+static int
+dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
+    dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds = dsda->ds;
+	dsl_dataset_t *ds_prev = ds->ds_prev;
+
+	if (dsl_dataset_might_destroy_origin(ds_prev)) {
+		struct dsl_ds_destroyarg ndsda = {0};
+
+		/*
+		 * If we're not prepared to remove the origin, don't remove
+		 * the clone either.
+		 */
+		if (dsda->rm_origin == NULL) {
+			dsda->need_prep = B_TRUE;
+			return (EBUSY);
+		}
+
+		ndsda.ds = ds_prev;
+		ndsda.is_origin_rm = B_TRUE;
+		return (dsl_dataset_destroy_check(&ndsda, tag, tx));
+	}
+
+	/*
+	 * If we're not going to remove the origin after all,
+	 * undo the open context setup.
+	 */
+	if (dsda->rm_origin != NULL) {
+		dsl_dataset_disown(dsda->rm_origin, tag);
+		dsda->rm_origin = NULL;
+	}
+
+	return (0);
+}
+
 /* ARGSUSED */
 int
 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
+	struct dsl_ds_destroyarg *dsda = arg1;
+	dsl_dataset_t *ds = dsda->ds;
 
 	/* we have an owner hold, so noone else can destroy us */
 	ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
 
-	/* Can't delete a branch point. */
-	if (ds->ds_phys->ds_num_children > 1)
-		return (EEXIST);
+	/*
+	 * Only allow deferred destroy on pools that support it.
+	 * NOTE: deferred destroy is only supported on snapshots.
+	 */
+	if (dsda->defer) {
+		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
+		    SPA_VERSION_USERREFS)
+			return (ENOTSUP);
+		ASSERT(dsl_dataset_is_snapshot(ds));
+		return (0);
+	}
 
 	/*
 	 * Can't delete a head dataset if there are snapshots of it.
@@ -1420,6 +1603,31 @@
 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
 		return (EAGAIN);
 
+	if (dsl_dataset_is_snapshot(ds)) {
+		/*
+		 * If this snapshot has an elevated user reference count,
+		 * we can't destroy it yet.
+		 */
+		if (ds->ds_userrefs > 0 && !dsda->releasing)
+			return (EBUSY);
+
+		mutex_enter(&ds->ds_lock);
+		/*
+		 * Can't delete a branch point. However, if we're destroying
+		 * a clone and removing its origin due to it having a user
+		 * hold count of 0 and having been marked for deferred destroy,
+		 * it's OK for the origin to have a single clone.
+		 */
+		if (ds->ds_phys->ds_num_children >
+		    (dsda->is_origin_rm ? 2 : 1)) {
+			mutex_exit(&ds->ds_lock);
+			return (EEXIST);
+		}
+		mutex_exit(&ds->ds_lock);
+	} else if (dsl_dir_is_clone(ds->ds_dir)) {
+		return (dsl_dataset_origin_check(dsda, arg2, tx));
+	}
+
 	/* XXX we should do some i/o error checking... */
 	return (0);
 }
@@ -1467,7 +1675,8 @@
 void
 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
+	struct dsl_ds_destroyarg *dsda = arg1;
+	dsl_dataset_t *ds = dsda->ds;
 	zio_t *zio;
 	int err;
 	int after_branch_point = FALSE;
@@ -1477,11 +1686,20 @@
 	uint64_t obj;
 
 	ASSERT(ds->ds_owner);
-	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
+	ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
 	ASSERT(ds->ds_prev == NULL ||
 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
 
+	if (dsda->defer) {
+		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
+		if (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1) {
+			dmu_buf_will_dirty(ds->ds_dbuf, tx);
+			ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
+			return;
+		}
+	}
+
 	/* signal any waiters that this dataset is going away */
 	mutex_enter(&ds->ds_lock);
 	ds->ds_owner = dsl_reaper;
@@ -1527,6 +1745,20 @@
 			/* This clone is toast. */
 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
 			ds_prev->ds_phys->ds_num_children--;
+
+			/*
+			 * If the clone's origin has no other clones, no
+			 * user holds, and has been marked for deferred
+			 * deletion, then we should have done the necessary
+			 * destroy setup for it.
+			 */
+			if (ds_prev->ds_phys->ds_num_children == 1 &&
+			    ds_prev->ds_userrefs == 0 &&
+			    DS_IS_DEFER_DESTROY(ds_prev)) {
+				ASSERT3P(dsda->rm_origin, !=, NULL);
+			} else {
+				ASSERT3P(dsda->rm_origin, ==, NULL);
+			}
 		} else if (!after_branch_point) {
 			ds_prev->ds_phys->ds_next_snap_obj =
 			    ds->ds_phys->ds_next_snap_obj;
@@ -1739,10 +1971,32 @@
 	}
 	if (ds->ds_phys->ds_props_obj != 0)
 		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
+	if (ds->ds_phys->ds_userrefs_obj != 0)
+		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
 	dsl_dir_close(ds->ds_dir, ds);
 	ds->ds_dir = NULL;
 	dsl_dataset_drain_refs(ds, tag);
 	VERIFY(0 == dmu_object_free(mos, obj, tx));
+
+	if (dsda->rm_origin) {
+		/*
+		 * Remove the origin of the clone we just destroyed.
+		 */
+		dsl_dataset_t *origin = ds->ds_prev;
+		struct dsl_ds_destroyarg ndsda = {0};
+
+		ASSERT3P(origin, ==, dsda->rm_origin);
+		if (origin->ds_user_ptr) {
+			origin->ds_user_evict_func(origin, origin->ds_user_ptr);
+			origin->ds_user_ptr = NULL;
+		}
+
+		dsl_dataset_rele(origin, tag);
+		ds->ds_prev = NULL;
+
+		ndsda.ds = origin;
+		dsl_dataset_destroy_sync(&ndsda, tag, cr, tx);
+	}
 }
 
 static int
@@ -1957,6 +2211,9 @@
 	    ds->ds_reserved);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
 	    ds->ds_phys->ds_guid);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, ds->ds_userrefs);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
+	    DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
 
 	if (ds->ds_phys->ds_next_snap_obj) {
 		/*
@@ -3025,7 +3282,7 @@
 
 	ds->ds_quota = new_quota;
 
-	dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
+	dsl_dir_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
 
 	spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa,
 	    tx, cr, "%lld dataset = %llu ",
@@ -3120,7 +3377,7 @@
 
 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
 	mutex_exit(&ds->ds_dir->dd_lock);
-	dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation",
+	dsl_dir_prop_set_uint64_sync(ds->ds_dir, "refreservation",
 	    new_reservation, cr, tx);
 
 	spa_history_internal_log(LOG_DS_REFRESERV,
@@ -3144,3 +3401,421 @@
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
+
+static int
+dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds = arg1;
+	char *htag = arg2;
+	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+	int error = 0;
+
+	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
+		return (ENOTSUP);
+
+	if (!dsl_dataset_is_snapshot(ds))
+		return (EINVAL);
+
+	if (strlen(htag) >= ZAP_MAXNAMELEN)
+		return (ENAMETOOLONG);
+
+	/* tags must be unique */
+	mutex_enter(&ds->ds_lock);
+	if (ds->ds_phys->ds_userrefs_obj) {
+		error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag,
+		    8, 1, tx);
+		if (error == 0)
+			error = EEXIST;
+		else if (error == ENOENT)
+			error = 0;
+	}
+	mutex_exit(&ds->ds_lock);
+
+	return (error);
+}
+
+static void
+dsl_dataset_user_hold_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds = arg1;
+	char *htag = arg2;
+	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+	time_t now = gethrestime_sec();
+	uint64_t zapobj;
+
+	mutex_enter(&ds->ds_lock);
+	if (ds->ds_phys->ds_userrefs_obj == 0) {
+		/*
+		 * This is the first user hold for this dataset.  Create
+		 * the userrefs zap object.
+		 */
+		dmu_buf_will_dirty(ds->ds_dbuf, tx);
+		zapobj = ds->ds_phys->ds_userrefs_obj =
+		    zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
+	} else {
+		zapobj = ds->ds_phys->ds_userrefs_obj;
+	}
+	ds->ds_userrefs++;
+	mutex_exit(&ds->ds_lock);
+
+	VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx));
+
+	spa_history_internal_log(LOG_DS_USER_HOLD,
+	    ds->ds_dir->dd_pool->dp_spa, tx, cr, "<%s> dataset = %llu",
+	    htag, ds->ds_object);
+}
+
+struct dsl_ds_holdarg {
+	dsl_sync_task_group_t *dstg;
+	char *htag;
+	char *snapname;
+	boolean_t recursive;
+	char failed[MAXPATHLEN];
+};
+
+static int
+dsl_dataset_user_hold_one(char *dsname, void *arg)
+{
+	struct dsl_ds_holdarg *ha = arg;
+	dsl_dataset_t *ds;
+	int error;
+	char *name;
+	size_t buflen;
+
+	/* alloc a buffer to hold dsname@snapname plus terminating NULL */
+	buflen = strlen(dsname) + strlen(ha->snapname) + 2;
+	name = kmem_alloc(buflen, KM_SLEEP);
+	(void) snprintf(name, buflen, "%s@%s", dsname, ha->snapname);
+	error = dsl_dataset_hold(name, ha->dstg, &ds);
+	kmem_free(name, buflen);
+	if (error == 0) {
+		dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
+		    dsl_dataset_user_hold_sync, ds, ha->htag, 0);
+	} else if (error == ENOENT && ha->recursive) {
+		error = 0;
+	} else {
+		(void) strcpy(ha->failed, dsname);
+	}
+	return (error);
+}
+
+int
+dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
+    boolean_t recursive)
+{
+	struct dsl_ds_holdarg *ha;
+	dsl_sync_task_t *dst;
+	spa_t *spa;
+	int error;
+
+	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
+
+	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
+
+	error = spa_open(dsname, &spa, FTAG);
+	if (error) {
+		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+		return (error);
+	}
+
+	ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
+	ha->htag = htag;
+	ha->snapname = snapname;
+	ha->recursive = recursive;
+	if (recursive) {
+		error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
+		    ha, DS_FIND_CHILDREN);
+	} else {
+		error = dsl_dataset_user_hold_one(dsname, ha);
+	}
+	if (error == 0)
+		error = dsl_sync_task_group_wait(ha->dstg);
+
+	for (dst = list_head(&ha->dstg->dstg_tasks); dst;
+	    dst = list_next(&ha->dstg->dstg_tasks, dst)) {
+		dsl_dataset_t *ds = dst->dst_arg1;
+
+		if (dst->dst_err) {
+			dsl_dataset_name(ds, ha->failed);
+			*strchr(ha->failed, '@') = '\0';
+		}
+		dsl_dataset_rele(ds, ha->dstg);
+	}
+
+	if (error)
+		(void) strcpy(dsname, ha->failed);
+
+	dsl_sync_task_group_destroy(ha->dstg);
+	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+	spa_close(spa, FTAG);
+	return (error);
+}
+
+struct dsl_ds_releasearg {
+	dsl_dataset_t *ds;
+	const char *htag;
+	boolean_t own;		/* do we own or just hold ds? */
+};
+
+static int
+dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag,
+    boolean_t *might_destroy)
+{
+	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+	uint64_t zapobj;
+	uint64_t tmp;
+	int error;
+
+	*might_destroy = B_FALSE;
+
+	mutex_enter(&ds->ds_lock);
+	zapobj = ds->ds_phys->ds_userrefs_obj;
+	if (zapobj == 0) {
+		/* The tag can't possibly exist */
+		mutex_exit(&ds->ds_lock);
+		return (ESRCH);
+	}
+
+	/* Make sure the tag exists */
+	error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp);
+	if (error) {
+		mutex_exit(&ds->ds_lock);
+		if (error == ENOENT)
+			error = ESRCH;
+		return (error);
+	}
+
+	if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 &&
+	    DS_IS_DEFER_DESTROY(ds))
+		*might_destroy = B_TRUE;
+
+	mutex_exit(&ds->ds_lock);
+	return (0);
+}
+
+static int
+dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx)
+{
+	struct dsl_ds_releasearg *ra = arg1;
+	dsl_dataset_t *ds = ra->ds;
+	boolean_t might_destroy;
+	int error;
+
+	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
+		return (ENOTSUP);
+
+	error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy);
+	if (error)
+		return (error);
+
+	if (might_destroy) {
+		struct dsl_ds_destroyarg dsda = {0};
+
+		if (dmu_tx_is_syncing(tx)) {
+			/*
+			 * If we're not prepared to remove the snapshot,
+			 * we can't allow the release to happen right now.
+			 */
+			if (!ra->own)
+				return (EBUSY);
+			if (ds->ds_user_ptr) {
+				ds->ds_user_evict_func(ds, ds->ds_user_ptr);
+				ds->ds_user_ptr = NULL;
+			}
+		}
+		dsda.ds = ds;
+		dsda.releasing = B_TRUE;
+		return (dsl_dataset_destroy_check(&dsda, tag, tx));
+	}
+
+	return (0);
+}
+
+static void
+dsl_dataset_user_release_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
+{
+	struct dsl_ds_releasearg *ra = arg1;
+	dsl_dataset_t *ds = ra->ds;
+	spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
+	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+	uint64_t zapobj;
+	uint64_t dsobj = ds->ds_object;
+	uint64_t refs;
+
+	mutex_enter(&ds->ds_lock);
+	ds->ds_userrefs--;
+	refs = ds->ds_userrefs;
+	mutex_exit(&ds->ds_lock);
+	zapobj = ds->ds_phys->ds_userrefs_obj;
+	VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx));
+	if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 &&
+	    DS_IS_DEFER_DESTROY(ds)) {
+		struct dsl_ds_destroyarg dsda = {0};
+
+		ASSERT(ra->own);
+		dsda.ds = ds;
+		dsda.releasing = B_TRUE;
+		/* We already did the destroy_check */
+		dsl_dataset_destroy_sync(&dsda, tag, cr, tx);
+	}
+
+	spa_history_internal_log(LOG_DS_USER_RELEASE,
+	    spa, tx, cr, "<%s> %lld dataset = %llu",
+	    ra->htag, (longlong_t)refs, dsobj);
+}
+
+static int
+dsl_dataset_user_release_one(char *dsname, void *arg)
+{
+	struct dsl_ds_holdarg *ha = arg;
+	struct dsl_ds_releasearg *ra;
+	dsl_dataset_t *ds;
+	int error;
+	void *dtag = ha->dstg;
+	char *name;
+	size_t buflen;
+	boolean_t own = B_FALSE;
+	boolean_t might_destroy;
+
+	if (strlen(ha->htag) >= ZAP_MAXNAMELEN)
+		return (ENAMETOOLONG);
+
+	/* alloc a buffer to hold dsname@snapname, plus the terminating NULL */
+	buflen = strlen(dsname) + strlen(ha->snapname) + 2;
+	name = kmem_alloc(buflen, KM_SLEEP);
+	(void) snprintf(name, buflen, "%s@%s", dsname, ha->snapname);
+	error = dsl_dataset_hold(name, dtag, &ds);
+	kmem_free(name, buflen);
+	if (error == ENOENT && ha->recursive)
+		return (0);
+	(void) strcpy(ha->failed, dsname);
+	if (error)
+		return (error);
+
+	ASSERT(dsl_dataset_is_snapshot(ds));
+
+	error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy);
+	if (error) {
+		dsl_dataset_rele(ds, dtag);
+		return (error);
+	}
+
+	if (might_destroy) {
+#ifdef _KERNEL
+		error = zfs_unmount_snap(name, NULL);
+		if (error) {
+			dsl_dataset_rele(ds, dtag);
+			return (error);
+		}
+		error = dsl_dataset_zvol_cleanup(ds, name);
+		if (error) {
+			dsl_dataset_rele(ds, dtag);
+			return (error);
+		}
+#endif
+		if (!dsl_dataset_tryown(ds,
+		    DS_MODE_READONLY | DS_MODE_INCONSISTENT, dtag)) {
+			dsl_dataset_rele(ds, dtag);
+			return (EBUSY);
+		} else {
+			own = B_TRUE;
+			dsl_dataset_make_exclusive(ds, dtag);
+		}
+	}
+
+	ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP);
+	ra->ds = ds;
+	ra->htag = ha->htag;
+	ra->own = own;
+	dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check,
+	    dsl_dataset_user_release_sync, ra, dtag, 0);
+
+	return (0);
+}
+
+int
+dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
+    boolean_t recursive)
+{
+	struct dsl_ds_holdarg *ha;
+	dsl_sync_task_t *dst;
+	spa_t *spa;
+	int error;
+
+	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
+
+	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
+
+	error = spa_open(dsname, &spa, FTAG);
+	if (error) {
+		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+		return (error);
+	}
+
+	ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
+	ha->htag = htag;
+	ha->snapname = snapname;
+	ha->recursive = recursive;
+	if (recursive) {
+		error = dmu_objset_find(dsname, dsl_dataset_user_release_one,
+		    ha, DS_FIND_CHILDREN);
+	} else {
+		error = dsl_dataset_user_release_one(dsname, ha);
+	}
+	if (error == 0)
+		error = dsl_sync_task_group_wait(ha->dstg);
+
+	for (dst = list_head(&ha->dstg->dstg_tasks); dst;
+	    dst = list_next(&ha->dstg->dstg_tasks, dst)) {
+		struct dsl_ds_releasearg *ra = dst->dst_arg1;
+		dsl_dataset_t *ds = ra->ds;
+
+		if (dst->dst_err)
+			dsl_dataset_name(ds, ha->failed);
+
+		if (ra->own)
+			dsl_dataset_disown(ds, ha->dstg);
+		else
+			dsl_dataset_rele(ds, ha->dstg);
+
+		kmem_free(ra, sizeof (struct dsl_ds_releasearg));
+	}
+
+	if (error)
+		(void) strcpy(dsname, ha->failed);
+
+	dsl_sync_task_group_destroy(ha->dstg);
+	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
+	spa_close(spa, FTAG);
+	return (error);
+}
+
+int
+dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
+{
+	dsl_dataset_t *ds;
+	int err;
+
+	err = dsl_dataset_hold(dsname, FTAG, &ds);
+	if (err)
+		return (err);
+
+	VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
+	if (ds->ds_phys->ds_userrefs_obj != 0) {
+		zap_attribute_t *za;
+		zap_cursor_t zc;
+
+		za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
+		for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
+		    ds->ds_phys->ds_userrefs_obj);
+		    zap_cursor_retrieve(&zc, za) == 0;
+		    zap_cursor_advance(&zc)) {
+			VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name,
+			    za->za_first_integer));
+		}
+		zap_cursor_fini(&zc);
+		kmem_free(za, sizeof (zap_attribute_t));
+	}
+	dsl_dataset_rele(ds, FTAG);
+	return (0);
+}
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c	Sat Aug 01 15:09:50 2009 -0600
@@ -442,7 +442,7 @@
 }
 
 void
-dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
+dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
     cred_t *cr, dmu_tx_t *tx)
 {
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Sat Aug 01 15:09:50 2009 -0600
@@ -117,6 +117,7 @@
 	DMU_OT_SCRUB_QUEUE,		/* ZAP */
 	DMU_OT_USERGROUP_USED,		/* ZAP */
 	DMU_OT_USERGROUP_QUOTA,		/* ZAP */
+	DMU_OT_USERREFS,		/* ZAP */
 	DMU_OT_NUMTYPES
 } dmu_object_type_t;
 
@@ -174,8 +175,8 @@
 int dmu_objset_create(const char *name, dmu_objset_type_t type,
     objset_t *clone_parent, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
-int dmu_objset_destroy(const char *name);
-int dmu_snapshots_destroy(char *fsname, char *snapname);
+int dmu_objset_destroy(const char *name, boolean_t defer);
+int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
 int dmu_objset_rollback(objset_t *os);
 int dmu_objset_snapshot(char *fsname, char *snapname, struct nvlist *props,
     boolean_t recursive);
--- a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h	Sat Aug 01 15:09:50 2009 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -211,10 +211,11 @@
  * ds_lock
  *    protects:
  *    	ds_user_ptr
- *    	ds_user_evice_func
+ *    	ds_user_evict_func
  *    	ds_open_refcount
  *    	ds_snapname
  *    	ds_phys accounting
+ *	ds_phys userrefs zapobj
  *	ds_reserved
  *    held from:
  *    	dsl_dataset_*
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Sat Aug 01 15:09:50 2009 -0600
@@ -117,7 +117,7 @@
 int dmu_objset_create(const char *name, dmu_objset_type_t type,
     objset_t *clone_parent, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
-int dmu_objset_destroy(const char *name);
+int dmu_objset_destroy(const char *name, boolean_t defer);
 int dmu_objset_rollback(objset_t *os);
 int dmu_objset_snapshot(char *fsname, char *snapname, nvlist_t *props,
     boolean_t recursive);
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Sat Aug 01 15:09:50 2009 -0600
@@ -63,6 +63,14 @@
 #define	DS_FLAG_UNIQUE_ACCURATE	(1ULL<<2)
 
 /*
+ * DS_FLAG_DEFER_DESTROY is set after 'zfs destroy -d' has been called
+ * on a dataset. This allows the dataset to be destroyed using 'zfs release'.
+ */
+#define	DS_FLAG_DEFER_DESTROY	(1ULL<<3)
+#define	DS_IS_DEFER_DESTROY(ds)	\
+	((ds)->ds_phys->ds_flags & DS_FLAG_DEFER_DESTROY)
+
+/*
  * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
  * name lookups should be performed case-insensitively.
  */
@@ -93,7 +101,8 @@
 	blkptr_t ds_bp;
 	uint64_t ds_next_clones_obj;	/* DMU_OT_DSL_CLONES */
 	uint64_t ds_props_obj;		/* DMU_OT_DSL_PROPS for snaps */
-	uint64_t ds_pad[6]; /* pad out to 320 bytes for good measure */
+	uint64_t ds_userrefs_obj;	/* DMU_OT_USERREFS */
+	uint64_t ds_pad[5]; /* pad out to 320 bytes for good measure */
 } dsl_dataset_phys_t;
 
 typedef struct dsl_dataset {
@@ -125,6 +134,7 @@
 	kmutex_t ds_lock;
 	void *ds_user_ptr;
 	dsl_dataset_evict_func_t *ds_user_evict_func;
+	uint64_t ds_userrefs;
 
 	/*
 	 * ds_owner is protected by the ds_rwlock and the ds_lock
@@ -146,6 +156,15 @@
 	char ds_snapname[MAXNAMELEN];
 } dsl_dataset_t;
 
+struct dsl_ds_destroyarg {
+	dsl_dataset_t *ds;		/* ds to destroy */
+	dsl_dataset_t *rm_origin;	/* also remove our origin? */
+	boolean_t is_origin_rm;		/* set if removing origin snap */
+	boolean_t defer;		/* destroy -d requested? */
+	boolean_t releasing;		/* destroying due to release? */
+	boolean_t need_prep;		/* do we need to retry due to EBUSY? */
+};
+
 #define	dsl_dataset_is_snapshot(ds)	\
 	((ds)->ds_phys->ds_num_children != 0)
 
@@ -170,8 +189,8 @@
     dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
 uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
     uint64_t flags, dmu_tx_t *tx);
-int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag);
-int dsl_snapshots_destroy(char *fsname, char *snapname);
+int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer);
+int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
 dsl_checkfunc_t dsl_dataset_destroy_check;
 dsl_syncfunc_t dsl_dataset_destroy_sync;
 dsl_checkfunc_t dsl_dataset_snapshot_check;
@@ -181,6 +200,11 @@
 int dsl_dataset_promote(const char *name);
 int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
     boolean_t force);
+int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
+    boolean_t recursive);
+int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
+    boolean_t recursive);
+int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
 
 void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
     void *p, dsl_dataset_evict_func_t func);
--- a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h	Sat Aug 01 15:09:50 2009 -0600
@@ -53,6 +53,8 @@
 #define	ZFS_DELEG_PERM_GROUPQUOTA	"groupquota"
 #define	ZFS_DELEG_PERM_USERUSED		"userused"
 #define	ZFS_DELEG_PERM_GROUPUSED	"groupused"
+#define	ZFS_DELEG_PERM_HOLD		"hold"
+#define	ZFS_DELEG_PERM_RELEASE		"release"
 
 /*
  * Note: the names of properties that are marked delegatable are also
--- a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h	Sat Aug 01 15:09:50 2009 -0600
@@ -69,7 +69,7 @@
 int dsl_prop_set(const char *ddname, const char *propname,
     int intsz, int numints, const void *buf);
 int dsl_props_set(const char *dsname, nvlist_t *nvl);
-void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
+void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
     cred_t *cr, dmu_tx_t *tx);
 
 void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Sat Aug 01 15:09:50 2009 -0600
@@ -165,6 +165,7 @@
 	dmu_objset_stats_t zc_objset_stats;
 	struct drr_begin zc_begin_record;
 	zinject_record_t zc_inject_record;
+	boolean_t	zc_defer_destroy;
 } zfs_cmd_t;
 
 typedef struct zfs_useracct {
--- a/usr/src/uts/common/fs/zfs/zfs_ctldir.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ctldir.c	Sat Aug 01 15:09:50 2009 -0600
@@ -700,7 +700,7 @@
 		if (err)
 			avl_add(&sdp->sd_snaps, sep);
 		else
-			err = dmu_objset_destroy(snapname);
+			err = dmu_objset_destroy(snapname, B_FALSE);
 	} else {
 		err = ENOENT;
 	}
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Sat Aug 01 15:09:50 2009 -0600
@@ -761,6 +761,20 @@
 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, cr));
 }
 
+static int
+zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
+{
+	return (zfs_secpolicy_write_perms(zc->zc_name,
+	    ZFS_DELEG_PERM_HOLD, cr));
+}
+
+static int
+zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
+{
+	return (zfs_secpolicy_write_perms(zc->zc_name,
+	    ZFS_DELEG_PERM_RELEASE, cr));
+}
+
 /*
  * Returns the nvlist as specified by the user in the zfs_cmd_t.
  */
@@ -2466,7 +2480,7 @@
 	 */
 	if (error == 0) {
 		if ((error = zfs_set_prop_nvlist(zc->zc_name, nvprops)) != 0)
-			(void) dmu_objset_destroy(zc->zc_name);
+			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
 	}
 	nvlist_free(nvprops);
 	return (error);
@@ -2553,8 +2567,9 @@
 
 /*
  * inputs:
- * zc_name	name of filesystem
- * zc_value	short name of snapshot
+ * zc_name		name of filesystem
+ * zc_value		short name of snapshot
+ * zc_defer_destroy	mark for deferred destroy
  *
  * outputs:	none
  */
@@ -2569,13 +2584,15 @@
 	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
 	if (err)
 		return (err);
-	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value));
+	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
+	    zc->zc_defer_destroy));
 }
 
 /*
  * inputs:
  * zc_name		name of dataset to destroy
  * zc_objset_type	type of objset
+ * zc_defer_destroy	mark for deferred destroy
  *
  * outputs:		none
  */
@@ -2588,7 +2605,7 @@
 			return (err);
 	}
 
-	return (dmu_objset_destroy(zc->zc_name));
+	return (dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy));
 }
 
 /*
@@ -3422,6 +3439,69 @@
 }
 
 /*
+ * inputs:
+ * zc_name	name of filesystem
+ * zc_value	short name of snap
+ * zc_string	user-supplied tag for this reference
+ * zc_cookie	recursive flag
+ *
+ * outputs:		none
+ */
+static int
+zfs_ioc_hold(zfs_cmd_t *zc)
+{
+	boolean_t recursive = zc->zc_cookie;
+
+	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
+		return (EINVAL);
+
+	return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
+	    zc->zc_string, recursive));
+}
+
+/*
+ * inputs:
+ * zc_name	name of dataset from which we're releasing a user reference
+ * zc_value	short name of snap
+ * zc_string	user-supplied tag for this reference
+ * zc_cookie	recursive flag
+ *
+ * outputs:		none
+ */
+static int
+zfs_ioc_release(zfs_cmd_t *zc)
+{
+	boolean_t recursive = zc->zc_cookie;
+
+	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
+		return (EINVAL);
+
+	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
+	    zc->zc_string, recursive));
+}
+
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ *
+ * outputs:
+ * zc_nvlist_src{_size}	nvlist of snapshot holds
+ */
+static int
+zfs_ioc_get_holds(zfs_cmd_t *zc)
+{
+	nvlist_t *nvp;
+	int error;
+
+	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
+		error = put_nvlist(zc, nvp);
+		nvlist_free(nvp);
+	}
+
+	return (error);
+}
+
+/*
  * pool create, destroy, and export don't log the history as part of
  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
  * do the logging of those commands.
@@ -3524,6 +3604,11 @@
 	    DATASET_NAME, B_FALSE, B_FALSE },
 	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
 	    DATASET_NAME, B_FALSE, B_TRUE },
+	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, B_TRUE },
+	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
+	    B_TRUE },
+	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
+	    B_TRUE }
 };
 
 int
--- a/usr/src/uts/common/sys/fs/zfs.h	Sat Aug 01 11:59:47 2009 -0700
+++ b/usr/src/uts/common/sys/fs/zfs.h	Sat Aug 01 15:09:50 2009 -0600
@@ -27,6 +27,8 @@
 #ifndef	_SYS_FS_ZFS_H
 #define	_SYS_FS_ZFS_H
 
+#include <sys/time.h>
+
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -112,6 +114,8 @@
 	ZFS_PROP_USEDREFRESERV,
 	ZFS_PROP_USERACCOUNTING,	/* not exposed to the user */
 	ZFS_PROP_STMF_SHAREINFO,	/* not exposed to the user */
+	ZFS_PROP_DEFER_DESTROY,
+	ZFS_PROP_USERREFS,
 	ZFS_NUM_PROPS
 } zfs_prop_t;
 
@@ -282,14 +286,15 @@
 #define	SPA_VERSION_15			15ULL
 #define	SPA_VERSION_16			16ULL
 #define	SPA_VERSION_17			17ULL
+#define	SPA_VERSION_18			18ULL
 /*
  * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
  * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
  * and do the appropriate changes.  Also bump the version number in
  * usr/src/grub/capability.
  */
-#define	SPA_VERSION			SPA_VERSION_17
-#define	SPA_VERSION_STRING		"17"
+#define	SPA_VERSION			SPA_VERSION_18
+#define	SPA_VERSION_STRING		"18"
 
 /*
  * Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -328,6 +333,7 @@
 #define	SPA_VERSION_USERSPACE		SPA_VERSION_15
 #define	SPA_VERSION_STMF_PROP		SPA_VERSION_16
 #define	SPA_VERSION_RAIDZ3		SPA_VERSION_17
+#define	SPA_VERSION_USERREFS		SPA_VERSION_18
 
 /*
  * ZPL version - rev'd whenever an incompatible on-disk format change
@@ -604,7 +610,10 @@
 	ZFS_IOC_SMB_ACL,
 	ZFS_IOC_USERSPACE_ONE,
 	ZFS_IOC_USERSPACE_MANY,
-	ZFS_IOC_USERSPACE_UPGRADE
+	ZFS_IOC_USERSPACE_UPGRADE,
+	ZFS_IOC_HOLD,
+	ZFS_IOC_RELEASE,
+	ZFS_IOC_GET_HOLDS
 } zfs_ioc_t;
 
 /*
@@ -718,6 +727,8 @@
 	LOG_DS_REFQUOTA,
 	LOG_DS_REFRESERV,
 	LOG_POOL_SCRUB_DONE,
+	LOG_DS_USER_HOLD,
+	LOG_DS_USER_RELEASE,
 	LOG_END
 } history_internal_events_t;