3740 Poor ZFS send / receive performance due to snapshot hold / release processing
authorSteven Hartland <smh@freebsd.org>
Tue, 11 Jun 2013 22:01:53 -0800
changeset 14128 1a272fe1337b
parent 14127 849852750426
child 14129 31867e9f3bb9
child 14130 c8bc47846d40
3740 Poor ZFS send / receive performance due to snapshot hold / release processing Reviewed by: Matthew Ahrens <[email protected]> Approved by: Christopher Siden <[email protected]>
usr/src/cmd/ndmpd/ndmp/ndmpd_chkpnt.c
usr/src/cmd/zfs/zfs_main.c
usr/src/cmd/zhack/zhack.c
usr/src/cmd/ztest/ztest.c
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_dataset.c
usr/src/lib/libzfs/common/libzfs_sendrecv.c
usr/src/lib/libzfs_core/common/libzfs_core.c
usr/src/uts/common/fs/zfs/dsl_destroy.c
usr/src/uts/common/fs/zfs/dsl_pool.c
usr/src/uts/common/fs/zfs/dsl_userhold.c
usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
usr/src/uts/common/fs/zfs/sys/dsl_userhold.h
usr/src/uts/common/fs/zfs/zfs_ioctl.c
--- a/usr/src/cmd/ndmpd/ndmp/ndmpd_chkpnt.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/cmd/ndmpd/ndmp/ndmpd_chkpnt.c	Tue Jun 11 22:01:53 2013 -0800
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 /*
@@ -203,7 +204,7 @@
 	}
 
 	p = strchr(snapname, '@') + 1;
-	if (zfs_hold(zhp, p, jname, recursive, B_FALSE, cleanup_fd) != 0) {
+	if (zfs_hold(zhp, p, jname, recursive, cleanup_fd) != 0) {
 		NDMP_LOG(LOG_ERR, "Cannot hold snapshot %s", p);
 		zfs_close(zhp);
 		return (-1);
--- a/usr/src/cmd/zfs/zfs_main.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/cmd/zfs/zfs_main.c	Tue Jun 11 22:01:53 2013 -0800
@@ -25,6 +25,7 @@
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2012 Milan Jurik. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland.  All rights reserved.
  */
 
 #include <assert.h>
@@ -5164,8 +5165,7 @@
 			continue;
 		}
 		if (holding) {
-			if (zfs_hold(zhp, delim+1, tag, recursive,
-			    B_FALSE, -1) != 0)
+			if (zfs_hold(zhp, delim+1, tag, recursive, -1) != 0)
 				++errors;
 		} else {
 			if (zfs_release(zhp, delim+1, tag, recursive) != 0)
--- a/usr/src/cmd/zhack/zhack.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/cmd/zhack/zhack.c	Tue Jun 11 22:01:53 2013 -0800
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 /*
@@ -153,7 +154,7 @@
 	g_importargs.poolname = g_pool;
 	pools = zpool_search_import(g_zfs, &g_importargs);
 
-	if (pools == NULL || nvlist_next_nvpair(pools, NULL) == NULL) {
+	if (nvlist_empty(pools)) {
 		if (!g_importargs.can_be_active) {
 			g_importargs.can_be_active = B_TRUE;
 			if (zpool_search_import(g_zfs, &g_importargs) != NULL ||
--- a/usr/src/cmd/ztest/ztest.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/cmd/ztest/ztest.c	Tue Jun 11 22:01:53 2013 -0800
@@ -22,6 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 /*
@@ -4711,7 +4712,7 @@
 
 	error = user_release_one(fullname, tag);
 	if (error)
-		fatal(0, "user_release_one(%s)", fullname, tag);
+		fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error);
 
 	VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
 
--- a/usr/src/lib/libzfs/common/libzfs.h	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/lib/libzfs/common/libzfs.h	Tue Jun 11 22:01:53 2013 -0800
@@ -24,6 +24,7 @@
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #ifndef	_LIBZFS_H
@@ -594,7 +595,8 @@
 
 extern int zfs_promote(zfs_handle_t *);
 extern int zfs_hold(zfs_handle_t *, const char *, const char *,
-    boolean_t, boolean_t, int);
+    boolean_t, int);
+extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *);
 extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
 extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
 extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c	Tue Jun 11 22:01:53 2013 -0800
@@ -25,6 +25,7 @@
  * Copyright (c) 2012 DEY Storage Systems, Inc.  All rights reserved.
  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2013 Martin Matuska. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #include <ctype.h>
@@ -3104,18 +3105,14 @@
 zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
 {
 	struct destroydata *dd = arg;
-	zfs_handle_t *szhp;
 	char name[ZFS_MAXNAMELEN];
 	int rv = 0;
 
 	(void) snprintf(name, sizeof (name),
 	    "%s@%s", zhp->zfs_name, dd->snapname);
 
-	szhp = make_dataset_handle(zhp->zfs_hdl, name);
-	if (szhp) {
+	if (lzc_exists(name))
 		verify(nvlist_add_boolean(dd->nvl, name) == 0);
-		zfs_close(szhp);
-	}
 
 	rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd);
 	zfs_close(zhp);
@@ -3135,7 +3132,7 @@
 	verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0);
 	(void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd);
 
-	if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) {
+	if (nvlist_empty(dd.nvl)) {
 		ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
 		    zhp->zfs_name, snapname);
@@ -3160,7 +3157,7 @@
 	if (ret == 0)
 		return (0);
 
-	if (nvlist_next_nvpair(errlist, NULL) == NULL) {
+	if (nvlist_empty(errlist)) {
 		char errbuf[1024];
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
@@ -4082,18 +4079,14 @@
 zfs_hold_one(zfs_handle_t *zhp, void *arg)
 {
 	struct holdarg *ha = arg;
-	zfs_handle_t *szhp;
 	char name[ZFS_MAXNAMELEN];
 	int rv = 0;
 
 	(void) snprintf(name, sizeof (name),
 	    "%s@%s", zhp->zfs_name, ha->snapname);
 
-	szhp = make_dataset_handle(zhp->zfs_hdl, name);
-	if (szhp) {
+	if (lzc_exists(name))
 		fnvlist_add_string(ha->nvl, name, ha->tag);
-		zfs_close(szhp);
-	}
 
 	if (ha->recursive)
 		rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha);
@@ -4103,14 +4096,10 @@
 
 int
 zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
-    boolean_t recursive, boolean_t enoent_ok, int cleanup_fd)
+    boolean_t recursive, int cleanup_fd)
 {
 	int ret;
 	struct holdarg ha;
-	nvlist_t *errors;
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	char errbuf[1024];
-	nvpair_t *elem;
 
 	ha.nvl = fnvlist_alloc();
 	ha.snapname = snapname;
@@ -4118,26 +4107,44 @@
 	ha.recursive = recursive;
 	(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
 
-	if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
+	if (nvlist_empty(ha.nvl)) {
+		char errbuf[1024];
+
 		fnvlist_free(ha.nvl);
 		ret = ENOENT;
-		if (!enoent_ok) {
-			(void) snprintf(errbuf, sizeof (errbuf),
-			    dgettext(TEXT_DOMAIN,
-			    "cannot hold snapshot '%s@%s'"),
-			    zhp->zfs_name, snapname);
-			(void) zfs_standard_error(hdl, ret, errbuf);
-		}
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN,
+		    "cannot hold snapshot '%s@%s'"),
+		    zhp->zfs_name, snapname);
+		(void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf);
 		return (ret);
 	}
 
-	ret = lzc_hold(ha.nvl, cleanup_fd, &errors);
+	ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl);
 	fnvlist_free(ha.nvl);
 
-	if (ret == 0)
+	return (ret);
+}
+
+int
+zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds)
+{
+	int ret;
+	nvlist_t *errors;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
+	nvpair_t *elem;
+
+	errors = NULL;
+	ret = lzc_hold(holds, cleanup_fd, &errors);
+
+	if (ret == 0) {
+		/* There may be errors even in the success case. */
+		fnvlist_free(errors);
 		return (0);
-
-	if (nvlist_next_nvpair(errors, NULL) == NULL) {
+	}
+
+	if (nvlist_empty(errors)) {
 		/* no hold-specific errors */
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot hold"));
@@ -4177,10 +4184,6 @@
 		case EEXIST:
 			(void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf);
 			break;
-		case ENOENT:
-			if (enoent_ok)
-				return (ENOENT);
-			/* FALLTHROUGH */
 		default:
 			(void) zfs_standard_error(hdl,
 			    fnvpair_value_int32(elem), errbuf);
@@ -4191,30 +4194,21 @@
 	return (ret);
 }
 
-struct releasearg {
-	nvlist_t *nvl;
-	const char *snapname;
-	const char *tag;
-	boolean_t recursive;
-};
-
 static int
 zfs_release_one(zfs_handle_t *zhp, void *arg)
 {
 	struct holdarg *ha = arg;
-	zfs_handle_t *szhp;
 	char name[ZFS_MAXNAMELEN];
 	int rv = 0;
 
 	(void) snprintf(name, sizeof (name),
 	    "%s@%s", zhp->zfs_name, ha->snapname);
 
-	szhp = make_dataset_handle(zhp->zfs_hdl, name);
-	if (szhp) {
+	if (lzc_exists(name)) {
 		nvlist_t *holds = fnvlist_alloc();
 		fnvlist_add_boolean(holds, ha->tag);
 		fnvlist_add_nvlist(ha->nvl, name, holds);
-		zfs_close(szhp);
+		fnvlist_free(holds);
 	}
 
 	if (ha->recursive)
@@ -4229,7 +4223,7 @@
 {
 	int ret;
 	struct holdarg ha;
-	nvlist_t *errors;
+	nvlist_t *errors = NULL;
 	nvpair_t *elem;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char errbuf[1024];
@@ -4240,7 +4234,7 @@
 	ha.recursive = recursive;
 	(void) zfs_release_one(zfs_handle_dup(zhp), &ha);
 
-	if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
+	if (nvlist_empty(ha.nvl)) {
 		fnvlist_free(ha.nvl);
 		ret = ENOENT;
 		(void) snprintf(errbuf, sizeof (errbuf),
@@ -4254,10 +4248,13 @@
 	ret = lzc_release(ha.nvl, &errors);
 	fnvlist_free(ha.nvl);
 
-	if (ret == 0)
+	if (ret == 0) {
+		/* There may be errors even in the success case. */
+		fnvlist_free(errors);
 		return (0);
-
-	if (nvlist_next_nvpair(errors, NULL) == NULL) {
+	}
+
+	if (nvlist_empty(errors)) {
 		/* no hold-specific errors */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot release"));
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Tue Jun 11 22:01:53 2013 -0800
@@ -23,6 +23,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #include <assert.h>
@@ -793,6 +794,7 @@
 	int outfd;
 	boolean_t err;
 	nvlist_t *fss;
+	nvlist_t *snapholds;
 	avl_tree_t *fsavl;
 	snapfilter_cb_t *filter_cb;
 	void *filter_cb_arg;
@@ -942,41 +944,19 @@
 	return (0);
 }
 
-static int
-hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
+static void
+gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
 {
-	zfs_handle_t *pzhp;
-	int error = 0;
-	char *thissnap;
-
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 
-	if (sdd->dryrun)
-		return (0);
-
 	/*
-	 * zfs_send() only opens a cleanup_fd for sends that need it,
+	 * zfs_send() only sets snapholds for sends that need them,
 	 * e.g. replication and doall.
 	 */
-	if (sdd->cleanup_fd == -1)
-		return (0);
-
-	thissnap = strchr(zhp->zfs_name, '@') + 1;
-	*(thissnap - 1) = '\0';
-	pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
-	*(thissnap - 1) = '@';
-
-	/*
-	 * It's OK if the parent no longer exists.  The send code will
-	 * handle that error.
-	 */
-	if (pzhp) {
-		error = zfs_hold(pzhp, thissnap, sdd->holdtag,
-		    B_FALSE, B_TRUE, sdd->cleanup_fd);
-		zfs_close(pzhp);
-	}
-
-	return (error);
+	if (sdd->snapholds == NULL)
+		return;
+
+	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
 }
 
 static void *
@@ -1032,28 +1012,23 @@
 	send_dump_data_t *sdd = arg;
 	progress_arg_t pa = { 0 };
 	pthread_t tid;
-
 	char *thissnap;
 	int err;
 	boolean_t isfromsnap, istosnap, fromorigin;
 	boolean_t exclude = B_FALSE;
 
+	err = 0;
 	thissnap = strchr(zhp->zfs_name, '@') + 1;
 	isfromsnap = (sdd->fromsnap != NULL &&
 	    strcmp(sdd->fromsnap, thissnap) == 0);
 
 	if (!sdd->seenfrom && isfromsnap) {
-		err = hold_for_send(zhp, sdd);
-		if (err == 0) {
-			sdd->seenfrom = B_TRUE;
-			(void) strcpy(sdd->prevsnap, thissnap);
-			sdd->prevsnap_obj = zfs_prop_get_int(zhp,
-			    ZFS_PROP_OBJSETID);
-		} else if (err == ENOENT) {
-			err = 0;
-		}
+		gather_holds(zhp, sdd);
+		sdd->seenfrom = B_TRUE;
+		(void) strcpy(sdd->prevsnap, thissnap);
+		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 		zfs_close(zhp);
-		return (err);
+		return (0);
 	}
 
 	if (sdd->seento || !sdd->seenfrom) {
@@ -1104,14 +1079,7 @@
 		return (0);
 	}
 
-	err = hold_for_send(zhp, sdd);
-	if (err) {
-		if (err == ENOENT)
-			err = 0;
-		zfs_close(zhp);
-		return (err);
-	}
-
+	gather_holds(zhp, sdd);
 	fromorigin = sdd->prevsnap[0] == '\0' &&
 	    (sdd->fromorigin || sdd->replicate);
 
@@ -1379,7 +1347,7 @@
 	avl_tree_t *fsavl = NULL;
 	static uint64_t holdseq;
 	int spa_version;
-	pthread_t tid;
+	pthread_t tid = 0;
 	int pipefd[2];
 	dedup_arg_t dda = { 0 };
 	int featureflags = 0;
@@ -1452,11 +1420,8 @@
 				*debugnvp = hdrnv;
 			else
 				nvlist_free(hdrnv);
-			if (err) {
-				fsavl_destroy(fsavl);
-				nvlist_free(fss);
+			if (err)
 				goto stderr_out;
-			}
 		}
 
 		if (!flags->dryrun) {
@@ -1480,8 +1445,6 @@
 			}
 			free(packbuf);
 			if (err == -1) {
-				fsavl_destroy(fsavl);
-				nvlist_free(fss);
 				err = errno;
 				goto stderr_out;
 			}
@@ -1492,8 +1455,6 @@
 			drr.drr_u.drr_end.drr_checksum = zc;
 			err = write(outfd, &drr, sizeof (drr));
 			if (err == -1) {
-				fsavl_destroy(fsavl);
-				nvlist_free(fss);
 				err = errno;
 				goto stderr_out;
 			}
@@ -1505,7 +1466,7 @@
 	/* dump each stream */
 	sdd.fromsnap = fromsnap;
 	sdd.tosnap = tosnap;
-	if (flags->dedup)
+	if (tid != 0)
 		sdd.outfd = pipefd[0];
 	else
 		sdd.outfd = outfd;
@@ -1542,36 +1503,71 @@
 			err = errno;
 			goto stderr_out;
 		}
+		sdd.snapholds = fnvlist_alloc();
 	} else {
 		sdd.cleanup_fd = -1;
+		sdd.snapholds = NULL;
 	}
-	if (flags->verbose) {
+	if (flags->verbose || sdd.snapholds != NULL) {
 		/*
 		 * Do a verbose no-op dry run to get all the verbose output
-		 * before generating any data.  Then do a non-verbose real
-		 * run to generate the streams.
+		 * or to gather snapshot hold's before generating any data,
+		 * then do a non-verbose real run to generate the streams.
 		 */
 		sdd.dryrun = B_TRUE;
 		err = dump_filesystems(zhp, &sdd);
-		sdd.dryrun = flags->dryrun;
+
+		if (err != 0)
+			goto stderr_out;
+
+		if (flags->verbose) {
+			if (flags->parsable) {
+				(void) fprintf(stderr, "size\t%llu\n",
+				    (longlong_t)sdd.size);
+			} else {
+				char buf[16];
+				zfs_nicenum(sdd.size, buf, sizeof (buf));
+				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+				    "total estimated size is %s\n"), buf);
+			}
+		}
+
+		/* Ensure no snaps found is treated as an error. */
+		if (!sdd.seento) {
+			err = ENOENT;
+			goto err_out;
+		}
+
+		/* Skip the second run if dryrun was requested. */
+		if (flags->dryrun)
+			goto err_out;
+
+		if (sdd.snapholds != NULL) {
+			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
+			if (err != 0)
+				goto stderr_out;
+
+			fnvlist_free(sdd.snapholds);
+			sdd.snapholds = NULL;
+		}
+
+		sdd.dryrun = B_FALSE;
 		sdd.verbose = B_FALSE;
-		if (flags->parsable) {
-			(void) fprintf(stderr, "size\t%llu\n",
-			    (longlong_t)sdd.size);
-		} else {
-			char buf[16];
-			zfs_nicenum(sdd.size, buf, sizeof (buf));
-			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
-			    "total estimated size is %s\n"), buf);
-		}
 	}
+
 	err = dump_filesystems(zhp, &sdd);
 	fsavl_destroy(fsavl);
 	nvlist_free(fss);
 
-	if (flags->dedup) {
+	/* Ensure no snaps found is treated as an error. */
+	if (err == 0 && !sdd.seento)
+		err = ENOENT;
+
+	if (tid != 0) {
+		if (err != 0)
+			(void) pthread_cancel(tid);
+		(void) pthread_join(tid, NULL);
 		(void) close(pipefd[0]);
-		(void) pthread_join(tid, NULL);
 	}
 
 	if (sdd.cleanup_fd != -1) {
@@ -1599,9 +1595,13 @@
 stderr_out:
 	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
 err_out:
+	fsavl_destroy(fsavl);
+	nvlist_free(fss);
+	fnvlist_free(sdd.snapholds);
+
 	if (sdd.cleanup_fd != -1)
 		VERIFY(0 == close(sdd.cleanup_fd));
-	if (flags->dedup) {
+	if (tid != 0) {
 		(void) pthread_cancel(tid);
 		(void) pthread_join(tid, NULL);
 		(void) close(pipefd[0]);
--- a/usr/src/lib/libzfs_core/common/libzfs_core.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/lib/libzfs_core/common/libzfs_core.c	Tue Jun 11 22:01:53 2013 -0800
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 /*
@@ -254,8 +255,11 @@
  * marked for deferred destruction, and will be destroyed when the last hold
  * or clone is removed/destroyed.
  *
+ * The return value will be ENOENT if none of the snapshots existed.
+ *
  * The return value will be 0 if all snapshots were destroyed (or marked for
- * later destruction if 'defer' is set) or didn't exist to begin with.
+ * later destruction if 'defer' is set) or didn't exist to begin with and
+ * at least one snapshot was destroyed.
  *
  * Otherwise the return value will be the errno of a (unspecified) snapshot
  * that failed, no snapshots will be destroyed, and the errlist will have an
@@ -286,7 +290,6 @@
 	nvlist_free(args);
 
 	return (error);
-
 }
 
 int
@@ -346,11 +349,22 @@
  * uncleanly, the holds will be released when the pool is next opened
  * or imported.
  *
- * The return value will be 0 if all holds were created. Otherwise the return
- * value will be the errno of a (unspecified) hold that failed, no holds will
- * be created, and the errlist will have an entry for each hold that
- * failed (name = snapshot).  The value in the errlist will be the error
- * code (int32).
+ * Holds for snapshots which don't exist will be skipped and have an entry
+ * added to errlist, but will not cause an overall failure, except in the
+ * case that all holds where skipped.
+ *
+ * The return value will be ENOENT if none of the snapshots for the requested
+ * holds existed.
+ *
+ * The return value will be 0 if the nvl holds was empty or all holds, for
+ * snapshots that existed, were succesfully created and at least one hold
+ * was created.
+ *
+ * Otherwise the return value will be the errno of a (unspecified) hold that
+ * failed and no holds will be created.
+ *
+ * In all cases the errlist will have an entry for each hold that failed
+ * (name = snapshot), with its value being the error code (int32).
  */
 int
 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
@@ -387,11 +401,20 @@
  * The snapshots must all be in the same pool.
  * The value is a nvlist whose keys are the holds to remove.
  *
- * The return value will be 0 if all holds were removed.
- * Otherwise the return value will be the errno of a (unspecified) release
- * that failed, no holds will be released, and the errlist will have an
- * entry for each snapshot that has failed releases (name = snapshot).
- * The value in the errlist will be the error code (int32) of a failed release.
+ * Holds which failed to release because they didn't exist will have an entry
+ * added to errlist, but will not cause an overall failure, except in the
+ * case that all releases where skipped.
+ *
+ * The return value will be ENOENT if none of the specified holds existed.
+ *
+ * The return value will be 0 if the nvl holds was empty or all holds that
+ * existed, were successfully removed and at least one hold was removed.
+ *
+ * Otherwise the return value will be the errno of a (unspecified) hold that
+ * failed to release and no holds will be released.
+ *
+ * In all cases the errlist will have an entry for each hold that failed to
+ * to release.
  */
 int
 lzc_release(nvlist_t *holds, nvlist_t **errlist)
--- a/usr/src/uts/common/fs/zfs/dsl_destroy.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/uts/common/fs/zfs/dsl_destroy.c	Tue Jun 11 22:01:53 2013 -0800
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -127,6 +128,10 @@
 	pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
 	if (pair != NULL)
 		return (fnvpair_value_int32(pair));
+
+	if (nvlist_empty(dsda->dsda_successful_snaps))
+		return (SET_ERROR(ENOENT));
+
 	return (0);
 }
 
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c	Tue Jun 11 22:01:53 2013 -0800
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #include <sys/dsl_pool.h>
@@ -836,23 +837,34 @@
 	zap_cursor_t zc;
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t zapobj = dp->dp_tmp_userrefs_obj;
+	nvlist_t *holds;
 
 	if (zapobj == 0)
 		return;
 	ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 
+	holds = fnvlist_alloc();
+
 	for (zap_cursor_init(&zc, mos, zapobj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		char *htag;
-		uint64_t dsobj;
+		nvlist_t *tags;
 
 		htag = strchr(za.za_name, '-');
 		*htag = '\0';
 		++htag;
-		dsobj = strtonum(za.za_name, NULL);
-		dsl_dataset_user_release_tmp(dp, dsobj, htag);
+		if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) {
+			tags = fnvlist_alloc();
+			fnvlist_add_boolean(tags, htag);
+			fnvlist_add_nvlist(holds, za.za_name, tags);
+			fnvlist_free(tags);
+		} else {
+			fnvlist_add_boolean(tags, htag);
+		}
 	}
+	dsl_dataset_user_release_tmp(dp, holds);
+	fnvlist_free(holds);
 	zap_cursor_fini(&zc);
 }
 
--- a/usr/src/uts/common/fs/zfs/dsl_userhold.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/uts/common/fs/zfs/dsl_userhold.c	Tue Jun 11 22:01:53 2013 -0800
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -37,6 +38,7 @@
 
 typedef struct dsl_dataset_user_hold_arg {
 	nvlist_t *dduha_holds;
+	nvlist_t *dduha_chkholds;
 	nvlist_t *dduha_errlist;
 	minor_t dduha_minor;
 } dsl_dataset_user_hold_arg_t;
@@ -53,25 +55,24 @@
 	objset_t *mos = dp->dp_meta_objset;
 	int error = 0;
 
+	ASSERT(dsl_pool_config_held(dp));
+
 	if (strlen(htag) > MAXNAMELEN)
-		return (E2BIG);
+		return (SET_ERROR(E2BIG));
 	/* Tempholds have a more restricted length */
 	if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
-		return (E2BIG);
+		return (SET_ERROR(E2BIG));
 
 	/* tags must be unique (if ds already exists) */
-	if (ds != NULL) {
-		mutex_enter(&ds->ds_lock);
-		if (ds->ds_phys->ds_userrefs_obj != 0) {
-			uint64_t value;
-			error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
-			    htag, 8, 1, &value);
-			if (error == 0)
-				error = SET_ERROR(EEXIST);
-			else if (error == ENOENT)
-				error = 0;
-		}
-		mutex_exit(&ds->ds_lock);
+	if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {
+		uint64_t value;
+
+		error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
+		    htag, 8, 1, &value);
+		if (error == 0)
+			error = SET_ERROR(EEXIST);
+		else if (error == ENOENT)
+			error = 0;
 	}
 
 	return (error);
@@ -82,52 +83,67 @@
 {
 	dsl_dataset_user_hold_arg_t *dduha = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
-	nvpair_t *pair;
-	int rv = 0;
 
 	if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
 		return (SET_ERROR(ENOTSUP));
 
-	for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
+	if (!dmu_tx_is_syncing(tx))
+		return (0);
+
+	for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_holds, NULL);
+	    pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
+		dsl_dataset_t *ds;
 		int error = 0;
-		dsl_dataset_t *ds;
-		char *htag;
+		char *htag, *name;
 
 		/* must be a snapshot */
-		if (strchr(nvpair_name(pair), '@') == NULL)
+		name = nvpair_name(pair);
+		if (strchr(name, '@') == NULL)
 			error = SET_ERROR(EINVAL);
 
 		if (error == 0)
 			error = nvpair_value_string(pair, &htag);
-		if (error == 0) {
-			error = dsl_dataset_hold(dp,
-			    nvpair_name(pair), FTAG, &ds);
-		}
+
+		if (error == 0)
+			error = dsl_dataset_hold(dp, name, FTAG, &ds);
+
 		if (error == 0) {
 			error = dsl_dataset_user_hold_check_one(ds, htag,
 			    dduha->dduha_minor != 0, tx);
 			dsl_dataset_rele(ds, FTAG);
 		}
 
-		if (error != 0) {
-			rv = error;
-			fnvlist_add_int32(dduha->dduha_errlist,
-			    nvpair_name(pair), error);
+		if (error == 0) {
+			fnvlist_add_string(dduha->dduha_chkholds, name, htag);
+		} else {
+			/*
+			 * We register ENOENT errors so they can be correctly
+			 * reported if needed, such as when all holds fail.
+			 */
+			fnvlist_add_int32(dduha->dduha_errlist, name, error);
+			if (error != ENOENT)
+				return (error);
 		}
 	}
-	return (rv);
+
+	/* Return ENOENT if no holds would be created. */
+	if (nvlist_empty(dduha->dduha_chkholds))
+		return (SET_ERROR(ENOENT));
+
+	return (0);
 }
 
-void
-dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
-    minor_t minor, uint64_t now, dmu_tx_t *tx)
+
+static void
+dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
+    const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t zapobj;
 
-	mutex_enter(&ds->ds_lock);
+	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+
 	if (ds->ds_phys->ds_userrefs_obj == 0) {
 		/*
 		 * This is the first user hold for this dataset.  Create
@@ -140,14 +156,26 @@
 		zapobj = ds->ds_phys->ds_userrefs_obj;
 	}
 	ds->ds_userrefs++;
-	mutex_exit(&ds->ds_lock);
 
 	VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 
 	if (minor != 0) {
+		char name[MAXNAMELEN];
+		nvlist_t *tags;
+
 		VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 		    htag, now, tx));
-		dsl_register_onexit_hold_cleanup(ds, htag, minor);
+		(void) snprintf(name, sizeof (name), "%llx",
+		    (u_longlong_t)ds->ds_object);
+
+		if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
+			tags = fnvlist_alloc();
+			fnvlist_add_boolean(tags, htag);
+			fnvlist_add_nvlist(tmpholds, name, tags);
+			fnvlist_free(tags);
+		} else {
+			fnvlist_add_boolean(tags, htag);
+		}
 	}
 
 	spa_history_log_internal_ds(ds, "hold", tx,
@@ -155,305 +183,10 @@
 	    htag, minor != 0, ds->ds_userrefs);
 }
 
-static void
-dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
-{
-	dsl_dataset_user_hold_arg_t *dduha = arg;
-	dsl_pool_t *dp = dmu_tx_pool(tx);
-	nvpair_t *pair;
-	uint64_t now = gethrestime_sec();
-
-	for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
-		dsl_dataset_t *ds;
-		VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
-		dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
-		    dduha->dduha_minor, now, tx);
-		dsl_dataset_rele(ds, FTAG);
-	}
-}
-
-/*
- * holds is nvl of snapname -> holdname
- * errlist will be filled in with snapname -> error
- * if cleanup_minor is not 0, the holds will be temporary, cleaned up
- * when the process exits.
- *
- * if any fails, all will fail.
- */
-int
-dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
-{
-	dsl_dataset_user_hold_arg_t dduha;
-	nvpair_t *pair;
-
-	pair = nvlist_next_nvpair(holds, NULL);
-	if (pair == NULL)
-		return (0);
-
-	dduha.dduha_holds = holds;
-	dduha.dduha_errlist = errlist;
-	dduha.dduha_minor = cleanup_minor;
-
-	return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
-	    dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
-}
-
-typedef struct dsl_dataset_user_release_arg {
-	nvlist_t *ddura_holds;
-	nvlist_t *ddura_todelete;
-	nvlist_t *ddura_errlist;
-} dsl_dataset_user_release_arg_t;
-
-static int
-dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
-    nvlist_t *holds, boolean_t *todelete)
-{
-	uint64_t zapobj;
-	nvpair_t *pair;
-	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
-	int error;
-	int numholds = 0;
-
-	*todelete = B_FALSE;
-
-	if (!dsl_dataset_is_snapshot(ds))
-		return (SET_ERROR(EINVAL));
-
-	zapobj = ds->ds_phys->ds_userrefs_obj;
-	if (zapobj == 0)
-		return (SET_ERROR(ESRCH));
-
-	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(holds, pair)) {
-		/* Make sure the hold exists */
-		uint64_t tmp;
-		error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
-		if (error == ENOENT)
-			error = SET_ERROR(ESRCH);
-		if (error != 0)
-			return (error);
-		numholds++;
-	}
-
-	if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
-	    ds->ds_userrefs == numholds) {
-		/* we need to destroy the snapshot as well */
-
-		if (dsl_dataset_long_held(ds))
-			return (SET_ERROR(EBUSY));
-		*todelete = B_TRUE;
-	}
-	return (0);
-}
-
-static int
-dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
-{
-	dsl_dataset_user_release_arg_t *ddura = arg;
-	dsl_pool_t *dp = dmu_tx_pool(tx);
-	nvpair_t *pair;
-	int rv = 0;
-
-	if (!dmu_tx_is_syncing(tx))
-		return (0);
-
-	for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
-		const char *name = nvpair_name(pair);
-		int error;
-		dsl_dataset_t *ds;
-		nvlist_t *holds;
-
-		error = nvpair_value_nvlist(pair, &holds);
-		if (error != 0)
-			return (SET_ERROR(EINVAL));
-
-		error = dsl_dataset_hold(dp, name, FTAG, &ds);
-		if (error == 0) {
-			boolean_t deleteme;
-			error = dsl_dataset_user_release_check_one(ds,
-			    holds, &deleteme);
-			if (error == 0 && deleteme) {
-				fnvlist_add_boolean(ddura->ddura_todelete,
-				    name);
-			}
-			dsl_dataset_rele(ds, FTAG);
-		}
-		if (error != 0) {
-			if (ddura->ddura_errlist != NULL) {
-				fnvlist_add_int32(ddura->ddura_errlist,
-				    name, error);
-			}
-			rv = error;
-		}
-	}
-	return (rv);
-}
-
-static void
-dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
-    dmu_tx_t *tx)
-{
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
-	objset_t *mos = dp->dp_meta_objset;
-	uint64_t zapobj;
-	int error;
-	nvpair_t *pair;
-
-	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(holds, pair)) {
-		ds->ds_userrefs--;
-		error = dsl_pool_user_release(dp, ds->ds_object,
-		    nvpair_name(pair), tx);
-		VERIFY(error == 0 || error == ENOENT);
-		zapobj = ds->ds_phys->ds_userrefs_obj;
-		VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
-
-		spa_history_log_internal_ds(ds, "release", tx,
-		    "tag=%s refs=%lld", nvpair_name(pair),
-		    (longlong_t)ds->ds_userrefs);
-	}
-}
-
-static void
-dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
-{
-	dsl_dataset_user_release_arg_t *ddura = arg;
-	dsl_pool_t *dp = dmu_tx_pool(tx);
-	nvpair_t *pair;
-
-	for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
-		dsl_dataset_t *ds;
-
-		VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
-		dsl_dataset_user_release_sync_one(ds,
-		    fnvpair_value_nvlist(pair), tx);
-		if (nvlist_exists(ddura->ddura_todelete,
-		    nvpair_name(pair))) {
-			ASSERT(ds->ds_userrefs == 0 &&
-			    ds->ds_phys->ds_num_children == 1 &&
-			    DS_IS_DEFER_DESTROY(ds));
-			dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
-		}
-		dsl_dataset_rele(ds, FTAG);
-	}
-}
-
-/*
- * holds is nvl of snapname -> { holdname, ... }
- * errlist will be filled in with snapname -> error
- *
- * if any fails, all will fail.
- */
-int
-dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
-{
-	dsl_dataset_user_release_arg_t ddura;
-	nvpair_t *pair;
-	int error;
-
-	pair = nvlist_next_nvpair(holds, NULL);
-	if (pair == NULL)
-		return (0);
-
-	ddura.ddura_holds = holds;
-	ddura.ddura_errlist = errlist;
-	ddura.ddura_todelete = fnvlist_alloc();
-
-	error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
-	    dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
-	fnvlist_free(ddura.ddura_todelete);
-	return (error);
-}
-
-typedef struct dsl_dataset_user_release_tmp_arg {
-	uint64_t ddurta_dsobj;
-	nvlist_t *ddurta_holds;
-	boolean_t ddurta_deleteme;
-} dsl_dataset_user_release_tmp_arg_t;
-
-static int
-dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
-{
-	dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
-	dsl_pool_t *dp = dmu_tx_pool(tx);
-	dsl_dataset_t *ds;
-	int error;
-
-	if (!dmu_tx_is_syncing(tx))
-		return (0);
-
-	error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
-	if (error)
-		return (error);
-
-	error = dsl_dataset_user_release_check_one(ds,
-	    ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
-	dsl_dataset_rele(ds, FTAG);
-	return (error);
-}
-
-static void
-dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
-{
-	dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
-	dsl_pool_t *dp = dmu_tx_pool(tx);
-	dsl_dataset_t *ds;
-
-	VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
-	dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
-	if (ddurta->ddurta_deleteme) {
-		ASSERT(ds->ds_userrefs == 0 &&
-		    ds->ds_phys->ds_num_children == 1 &&
-		    DS_IS_DEFER_DESTROY(ds));
-		dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
-	}
-	dsl_dataset_rele(ds, FTAG);
-}
-
-/*
- * Called at spa_load time to release a stale temporary user hold.
- * Also called by the onexit code.
- */
-void
-dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
-{
-	dsl_dataset_user_release_tmp_arg_t ddurta;
-	dsl_dataset_t *ds;
-	int error;
-
-#ifdef _KERNEL
-	/* Make sure it is not mounted. */
-	dsl_pool_config_enter(dp, FTAG);
-	error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
-	if (error == 0) {
-		char name[MAXNAMELEN];
-		dsl_dataset_name(ds, name);
-		dsl_dataset_rele(ds, FTAG);
-		dsl_pool_config_exit(dp, FTAG);
-		(void) zfs_unmount_snap(name);
-	} else {
-		dsl_pool_config_exit(dp, FTAG);
-	}
-#endif
-
-	ddurta.ddurta_dsobj = dsobj;
-	ddurta.ddurta_holds = fnvlist_alloc();
-	fnvlist_add_boolean(ddurta.ddurta_holds, htag);
-
-	(void) dsl_sync_task(spa_name(dp->dp_spa),
-	    dsl_dataset_user_release_tmp_check,
-	    dsl_dataset_user_release_tmp_sync, &ddurta, 1);
-	fnvlist_free(ddurta.ddurta_holds);
-}
-
 typedef struct zfs_hold_cleanup_arg {
 	char zhca_spaname[MAXNAMELEN];
 	uint64_t zhca_spa_load_guid;
-	uint64_t zhca_dsobj;
-	char zhca_htag[MAXNAMELEN];
+	nvlist_t *zhca_holds;
 } zfs_hold_cleanup_arg_t;
 
 static void
@@ -465,40 +198,442 @@
 
 	error = spa_open(ca->zhca_spaname, &spa, FTAG);
 	if (error != 0) {
-		zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
+		zfs_dbgmsg("couldn't release holds on pool=%s "
 		    "because pool is no longer loaded",
-		    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
+		    ca->zhca_spaname);
 		return;
 	}
 	if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
-		zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
+		zfs_dbgmsg("couldn't release holds on pool=%s "
 		    "because pool is no longer loaded (guid doesn't match)",
-		    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
+		    ca->zhca_spaname);
 		spa_close(spa, FTAG);
 		return;
 	}
 
-	dsl_dataset_user_release_tmp(spa_get_dsl(spa),
-	    ca->zhca_dsobj, ca->zhca_htag);
+	(void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
+	fnvlist_free(ca->zhca_holds);
 	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 	spa_close(spa, FTAG);
 }
 
-void
-dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
-    minor_t minor)
+static void
+dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
 {
-	zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
-	spa_t *spa = dsl_dataset_get_spa(ds);
+	zfs_hold_cleanup_arg_t *ca;
+
+	if (minor == 0 || nvlist_empty(holds)) {
+		fnvlist_free(holds);
+		return;
+	}
+
+	ASSERT(spa != NULL);
+	ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
+
 	(void) strlcpy(ca->zhca_spaname, spa_name(spa),
 	    sizeof (ca->zhca_spaname));
 	ca->zhca_spa_load_guid = spa_load_guid(spa);
-	ca->zhca_dsobj = ds->ds_object;
-	(void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
+	ca->zhca_holds = holds;
 	VERIFY0(zfs_onexit_add_cb(minor,
 	    dsl_dataset_user_release_onexit, ca, NULL));
 }
 
+void
+dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
+    minor_t minor, uint64_t now, dmu_tx_t *tx)
+{
+	nvlist_t *tmpholds;
+
+	if (minor != 0)
+		tmpholds = fnvlist_alloc();
+	else
+		tmpholds = NULL;
+	dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
+	dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor);
+}
+
+static void
+dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
+{
+	dsl_dataset_user_hold_arg_t *dduha = arg;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	nvlist_t *tmpholds;
+	uint64_t now = gethrestime_sec();
+
+	if (dduha->dduha_minor != 0)
+		tmpholds = fnvlist_alloc();
+	else
+		tmpholds = NULL;
+	for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
+	    pair != NULL;
+	    pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
+		dsl_dataset_t *ds;
+
+		VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
+		dsl_dataset_user_hold_sync_one_impl(tmpholds, ds,
+		    fnvpair_value_string(pair), dduha->dduha_minor, now, tx);
+		dsl_dataset_rele(ds, FTAG);
+	}
+	dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor);
+}
+
+/*
+ * The full semantics of this function are described in the comment above
+ * lzc_hold().
+ *
+ * To summarize:
+ * holds is nvl of snapname -> holdname
+ * errlist will be filled in with snapname -> error
+ *
+ * The snaphosts must all be in the same pool.
+ *
+ * Holds for snapshots that don't exist will be skipped.
+ *
+ * If none of the snapshots for requested holds exist then ENOENT will be
+ * returned.
+ *
+ * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
+ * up when the process exits.
+ *
+ * On success all the holds, for snapshots that existed, will be created and 0
+ * will be returned.
+ *
+ * On failure no holds will be created, the errlist will be filled in,
+ * and an errno will returned.
+ *
+ * In all cases the errlist will contain entries for holds where the snapshot
+ * didn't exist.
+ */
+int
+dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
+{
+	dsl_dataset_user_hold_arg_t dduha;
+	nvpair_t *pair;
+	int ret;
+
+	pair = nvlist_next_nvpair(holds, NULL);
+	if (pair == NULL)
+		return (0);
+
+	dduha.dduha_holds = holds;
+	dduha.dduha_chkholds = fnvlist_alloc();
+	dduha.dduha_errlist = errlist;
+	dduha.dduha_minor = cleanup_minor;
+
+	ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
+	    dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
+	fnvlist_free(dduha.dduha_chkholds);
+
+	return (ret);
+}
+
+typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
+    dsl_dataset_t **dsp);
+
+typedef struct dsl_dataset_user_release_arg {
+	dsl_holdfunc_t *ddura_holdfunc;
+	nvlist_t *ddura_holds;
+	nvlist_t *ddura_todelete;
+	nvlist_t *ddura_errlist;
+	nvlist_t *ddura_chkholds;
+} dsl_dataset_user_release_arg_t;
+
+/* Place a dataset hold on the snapshot identified by passed dsobj string */
+static int
+dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
+    dsl_dataset_t **dsp)
+{
+	return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp));
+}
+
+static int
+dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
+    dsl_dataset_t *ds, nvlist_t *holds, const char *snapname)
+{
+	uint64_t zapobj;
+	nvlist_t *holds_found;
+	objset_t *mos;
+	int numholds;
+
+	if (!dsl_dataset_is_snapshot(ds))
+		return (SET_ERROR(EINVAL));
+
+	if (nvlist_empty(holds))
+		return (0);
+
+	numholds = 0;
+	mos = ds->ds_dir->dd_pool->dp_meta_objset;
+	zapobj = ds->ds_phys->ds_userrefs_obj;
+	holds_found = fnvlist_alloc();
+
+	for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(holds, pair)) {
+		uint64_t tmp;
+		int error;
+		const char *holdname = nvpair_name(pair);
+
+		if (zapobj != 0)
+			error = zap_lookup(mos, zapobj, holdname, 8, 1, &tmp);
+		else
+			error = SET_ERROR(ENOENT);
+
+		/*
+		 * Non-existent holds are put on the errlist, but don't
+		 * cause an overall failure.
+		 */
+		if (error == ENOENT) {
+			if (ddura->ddura_errlist != NULL) {
+				char *errtag = kmem_asprintf("%s#%s",
+				    snapname, holdname);
+				fnvlist_add_int32(ddura->ddura_errlist, errtag,
+				    ENOENT);
+				strfree(errtag);
+			}
+			continue;
+		}
+
+		if (error != 0) {
+			fnvlist_free(holds_found);
+			return (error);
+		}
+
+		fnvlist_add_boolean(holds_found, holdname);
+		numholds++;
+	}
+
+	if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
+	    ds->ds_userrefs == numholds) {
+		/* we need to destroy the snapshot as well */
+		if (dsl_dataset_long_held(ds)) {
+			fnvlist_free(holds_found);
+			return (SET_ERROR(EBUSY));
+		}
+		fnvlist_add_boolean(ddura->ddura_todelete, snapname);
+	}
+
+	if (numholds != 0) {
+		fnvlist_add_nvlist(ddura->ddura_chkholds, snapname,
+		    holds_found);
+	}
+	fnvlist_free(holds_found);
+
+	return (0);
+}
+
+static int
+dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
+{
+	dsl_dataset_user_release_arg_t *ddura;
+	dsl_holdfunc_t *holdfunc;
+	dsl_pool_t *dp;
+
+	if (!dmu_tx_is_syncing(tx))
+		return (0);
+
+	dp = dmu_tx_pool(tx);
+
+	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+
+	ddura = arg;
+	holdfunc = ddura->ddura_holdfunc;
+
+	for (nvpair_t *pair = nvlist_next_nvpair(ddura->ddura_holds, NULL);
+	    pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
+		int error;
+		dsl_dataset_t *ds;
+		nvlist_t *holds;
+		const char *snapname = nvpair_name(pair);
+
+		error = nvpair_value_nvlist(pair, &holds);
+		if (error != 0)
+			error = (SET_ERROR(EINVAL));
+		else
+			error = holdfunc(dp, snapname, FTAG, &ds);
+		if (error == 0) {
+			error = dsl_dataset_user_release_check_one(ddura, ds,
+			    holds, snapname);
+			dsl_dataset_rele(ds, FTAG);
+		}
+		if (error != 0) {
+			if (ddura->ddura_errlist != NULL) {
+				fnvlist_add_int32(ddura->ddura_errlist,
+				    snapname, error);
+			}
+			/*
+			 * Non-existent snapshots are put on the errlist,
+			 * but don't cause an overall failure.
+			 */
+			if (error != ENOENT)
+				return (error);
+		}
+	}
+
+	/* Return ENOENT if none of the holds existed. */
+	if (nvlist_empty(ddura->ddura_chkholds))
+		return (SET_ERROR(ENOENT));
+
+	return (0);
+}
+
+static void
+dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
+    dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	objset_t *mos = dp->dp_meta_objset;
+
+	for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(holds, pair)) {
+		int error;
+		const char *holdname = nvpair_name(pair);
+
+		/* Remove temporary hold if one exists. */
+		error = dsl_pool_user_release(dp, ds->ds_object, holdname, tx);
+		VERIFY(error == 0 || error == ENOENT);
+
+		VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, holdname,
+		    tx));
+		ds->ds_userrefs--;
+
+		spa_history_log_internal_ds(ds, "release", tx,
+		    "tag=%s refs=%lld", holdname, (longlong_t)ds->ds_userrefs);
+	}
+}
+
+static void
+dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
+{
+	dsl_dataset_user_release_arg_t *ddura = arg;
+	dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+
+	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+
+	for (nvpair_t *pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL);
+	    pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds,
+	    pair)) {
+		dsl_dataset_t *ds;
+		const char *name = nvpair_name(pair);
+
+		VERIFY0(holdfunc(dp, name, FTAG, &ds));
+
+		dsl_dataset_user_release_sync_one(ds,
+		    fnvpair_value_nvlist(pair), tx);
+		if (nvlist_exists(ddura->ddura_todelete, name)) {
+			ASSERT(ds->ds_userrefs == 0 &&
+			    ds->ds_phys->ds_num_children == 1 &&
+			    DS_IS_DEFER_DESTROY(ds));
+			dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
+		}
+		dsl_dataset_rele(ds, FTAG);
+	}
+}
+
+/*
+ * The full semantics of this function are described in the comment above
+ * lzc_release().
+ *
+ * To summarize:
+ * Releases holds specified in the nvl holds.
+ *
+ * holds is nvl of snapname -> { holdname, ... }
+ * errlist will be filled in with snapname -> error
+ *
+ * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
+ * otherwise they should be the names of shapshots.
+ *
+ * As a release may cause snapshots to be destroyed this trys to ensure they
+ * aren't mounted.
+ *
+ * The release of non-existent holds are skipped.
+ *
+ * At least one hold must have been released for the this function to succeed
+ * and return 0.
+ */
+static int
+dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
+    dsl_pool_t *tmpdp)
+{
+	dsl_dataset_user_release_arg_t ddura;
+	nvpair_t *pair;
+	char *pool;
+	int error;
+
+	pair = nvlist_next_nvpair(holds, NULL);
+	if (pair == NULL)
+		return (0);
+
+	/*
+	 * The release may cause snapshots to be destroyed; make sure they
+	 * are not mounted.
+	 */
+	if (tmpdp != NULL) {
+		/* Temporary holds are specified by dsobj string. */
+		ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
+		pool = spa_name(tmpdp->dp_spa);
+#ifdef _KERNEL
+		dsl_pool_config_enter(tmpdp, FTAG);
+		for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+		    pair = nvlist_next_nvpair(holds, pair)) {
+			dsl_dataset_t *ds;
+
+			error = dsl_dataset_hold_obj_string(tmpdp,
+			    nvpair_name(pair), FTAG, &ds);
+			if (error == 0) {
+				char name[MAXNAMELEN];
+				dsl_dataset_name(ds, name);
+				dsl_dataset_rele(ds, FTAG);
+				(void) zfs_unmount_snap(name);
+			}
+		}
+		dsl_pool_config_exit(tmpdp, FTAG);
+#endif
+	} else {
+		/* Non-temporary holds are specified by name. */
+		ddura.ddura_holdfunc = dsl_dataset_hold;
+		pool = nvpair_name(pair);
+#ifdef _KERNEL
+		for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+		    pair = nvlist_next_nvpair(holds, pair)) {
+			(void) zfs_unmount_snap(nvpair_name(pair));
+		}
+#endif
+	}
+
+	ddura.ddura_holds = holds;
+	ddura.ddura_errlist = errlist;
+	ddura.ddura_todelete = fnvlist_alloc();
+	ddura.ddura_chkholds = fnvlist_alloc();
+
+	error = dsl_sync_task(pool, dsl_dataset_user_release_check,
+	    dsl_dataset_user_release_sync, &ddura,
+	    fnvlist_num_pairs(holds));
+	fnvlist_free(ddura.ddura_todelete);
+	fnvlist_free(ddura.ddura_chkholds);
+
+	return (error);
+}
+
+/*
+ * holds is nvl of snapname -> { holdname, ... }
+ * errlist will be filled in with snapname -> error
+ */
+int
+dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
+{
+	return (dsl_dataset_user_release_impl(holds, errlist, NULL));
+}
+
+/*
+ * holds is nvl of snapdsobj -> { holdname, ... }
+ */
+void
+dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)
+{
+	ASSERT(dp != NULL);
+	(void) dsl_dataset_user_release_impl(holds, NULL, dp);
+}
+
 int
 dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 {
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Tue Jun 11 22:01:53 2013 -0800
@@ -22,6 +22,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_DATASET_H
@@ -187,8 +188,6 @@
 void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
 void dsl_dataset_name(dsl_dataset_t *ds, char *name);
 boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
-void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
-    minor_t minor);
 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
     dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
 uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
--- a/usr/src/uts/common/fs/zfs/sys/dsl_userhold.h	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_userhold.h	Tue Jun 11 22:01:53 2013 -0800
@@ -23,6 +23,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_USERHOLD_H
@@ -43,8 +44,7 @@
     nvlist_t *errlist);
 int dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist);
 int dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl);
-void dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
-    const char *htag);
+void dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds);
 int dsl_dataset_user_hold_check_one(struct dsl_dataset *ds, const char *htag,
     boolean_t temphold, struct dmu_tx *tx);
 void dsl_dataset_user_hold_sync_one(struct dsl_dataset *ds, const char *htag,
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Tue Jun 11 09:13:55 2013 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Tue Jun 11 22:01:53 2013 -0800
@@ -26,6 +26,7 @@
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 /*
@@ -4984,20 +4985,6 @@
 static int
 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
 {
-	nvpair_t *pair;
-	int err;
-
-	/*
-	 * The release may cause the snapshot to be destroyed; make sure it
-	 * is not mounted.
-	 */
-	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(holds, pair)) {
-		err = zfs_unmount_snap(nvpair_name(pair));
-		if (err != 0)
-			return (err);
-	}
-
 	return (dsl_dataset_user_release(holds, errlist));
 }