6884007 zfs_send() can leave temporary holds around
authorChris Kirby <Chris.Kirby@oracle.com>
Tue, 01 Jun 2010 17:04:42 -0600
changeset 12527 693dd2cad55f
parent 12526 6f48102ad665
child 12528 13ec26f2b29c
6884007 zfs_send() can leave temporary holds around 6954429 ZFS_IOC_VDEV_SPLIT missing from truss/codes.c
usr/src/cmd/truss/codes.c
usr/src/cmd/zfs/zfs_main.c
usr/src/cmd/ztest/ztest.c
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_dataset.c
usr/src/lib/libzfs/common/libzfs_sendrecv.c
usr/src/lib/libzpool/common/kernel.c
usr/src/uts/common/Makefile.files
usr/src/uts/common/fs/zfs/dmu_send.c
usr/src/uts/common/fs/zfs/dsl_dataset.c
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
usr/src/uts/common/fs/zfs/sys/zfs_onexit.h
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/fs/zfs/zfs_onexit.c
usr/src/uts/common/fs/zfs/zvol.c
--- a/usr/src/cmd/truss/codes.c	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/cmd/truss/codes.c	Tue Jun 01 17:04:42 2010 -0600
@@ -1237,6 +1237,8 @@
 		"zfs_cmd_t" },
 	{ (uint_t)ZFS_IOC_OBJSET_RECVD_PROPS,	"ZFS_IOC_OBJSET_RECVD_PROPS",
 		"zfs_cmd_t" },
+	{ (uint_t)ZFS_IOC_VDEV_SPLIT,		"ZFS_IOC_VDEV_SPLIT",
+		"zfs_cmd_t" },
 
 	/* kssl ioctls */
 	{ (uint_t)KSSL_ADD_ENTRY,		"KSSL_ADD_ENTRY",
--- a/usr/src/cmd/zfs/zfs_main.c	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/cmd/zfs/zfs_main.c	Tue Jun 01 17:04:42 2010 -0600
@@ -2888,7 +2888,7 @@
 		}
 		if (holding) {
 			if (zfs_hold(zhp, delim+1, tag, recursive,
-			    temphold, B_FALSE) != 0)
+			    temphold, B_FALSE, -1) != 0)
 				++errors;
 		} else {
 			if (zfs_release(zhp, delim+1, tag, recursive) != 0)
--- a/usr/src/cmd/ztest/ztest.c	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/cmd/ztest/ztest.c	Tue Jun 01 17:04:42 2010 -0600
@@ -4355,7 +4355,8 @@
 		fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
 	}
 
-	error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE, B_TRUE);
+	error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE,
+	    B_TRUE, -1);
 	if (error)
 		fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
 
--- a/usr/src/lib/libzfs/common/libzfs.h	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/lib/libzfs/common/libzfs.h	Tue Jun 01 17:04:42 2010 -0600
@@ -533,9 +533,9 @@
 
 extern int zfs_promote(zfs_handle_t *);
 extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
-    boolean_t, boolean_t);
+    boolean_t, boolean_t, int);
 extern int zfs_hold_range(zfs_handle_t *, const char *, const char *,
-    const char *, boolean_t, boolean_t, snapfilter_cb_t, void *);
+    const char *, boolean_t, boolean_t, snapfilter_cb_t, void *, int);
 extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
 extern int zfs_release_range(zfs_handle_t *, const char *, const char *,
     const char *, boolean_t);
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c	Tue Jun 01 17:04:42 2010 -0600
@@ -3926,7 +3926,8 @@
 
 int
 zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
-    boolean_t recursive, boolean_t temphold, boolean_t enoent_ok)
+    boolean_t recursive, boolean_t temphold, boolean_t enoent_ok,
+    int cleanup_fd)
 {
 	zfs_cmd_t zc = { 0 };
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
@@ -3938,6 +3939,7 @@
 		return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
 	zc.zc_cookie = recursive;
 	zc.zc_temphold = temphold;
+	zc.zc_cleanup_fd = cleanup_fd;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
 		char errbuf[ZFS_MAXNAMELEN+32];
@@ -3990,6 +3992,7 @@
 	boolean_t	recursive;
 	snapfilter_cb_t	*filter_cb;
 	void		*filter_cb_arg;
+	int		cleanup_fd;
 };
 
 static int
@@ -4023,7 +4026,7 @@
 	if (hra->holding) {
 		/* We could be racing with destroy, so ignore ENOENT. */
 		error = zfs_hold(hra->origin, thissnap, hra->tag,
-		    hra->recursive, hra->temphold, B_TRUE);
+		    hra->recursive, hra->temphold, B_TRUE, hra->cleanup_fd);
 		if (error == 0) {
 			(void) strlcpy(hra->lastsnapheld, zfs_get_name(zhp),
 			    sizeof (hra->lastsnapheld));
@@ -4045,7 +4048,7 @@
 int
 zfs_hold_range(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
     const char *tag, boolean_t recursive, boolean_t temphold,
-    snapfilter_cb_t filter_cb, void *cbarg)
+    snapfilter_cb_t filter_cb, void *cbarg, int cleanup_fd)
 {
 	struct hold_range_arg arg = { 0 };
 	int error;
@@ -4060,13 +4063,17 @@
 	arg.seenfrom = (fromsnap == NULL);
 	arg.filter_cb = filter_cb;
 	arg.filter_cb_arg = cbarg;
+	arg.cleanup_fd = cleanup_fd;
 
 	error = zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg);
 
 	/*
-	 * Make sure we either hold the entire range or none.
+	 * Make sure we either hold the entire range or none. If we're
+	 * using cleanup-on-exit, we'll let the closing of the cleanup_fd
+	 * do the work for us.
 	 */
-	if (error && arg.lastsnapheld[0] != '\0') {
+	if (error && arg.lastsnapheld[0] != '\0' &&
+	    (cleanup_fd == -1 || !temphold)) {
 		(void) zfs_release_range(zhp, fromsnap,
 		    (const char *)arg.lastsnapheld, tag, recursive);
 	}
@@ -4130,6 +4137,7 @@
 	arg.tag = tag;
 	arg.recursive = recursive;
 	arg.seenfrom = (fromsnap == NULL);
+	arg.cleanup_fd = -1;
 
 	return (zfs_iter_snapshots_sorted(zhp, zfs_hold_range_one, &arg));
 }
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Tue Jun 01 17:04:42 2010 -0600
@@ -51,7 +51,7 @@
 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
 
 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
-    int, const char *, nvlist_t *, avl_tree_t *, char **);
+    int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
 
 static const zio_cksum_t zero_cksum = { 0 };
 
@@ -1210,6 +1210,7 @@
 	int pipefd[2];
 	dedup_arg_t dda = { 0 };
 	int featureflags = 0;
+	int cleanup_fd = -1;
 
 	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
 		uint64_t version;
@@ -1259,12 +1260,17 @@
 		zio_cksum_t zc = { 0 };
 
 		if (holdsnaps) {
+			++holdseq;
 			(void) snprintf(holdtag, sizeof (holdtag),
 			    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
-			++holdseq;
+			cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
+			if (cleanup_fd < 0) {
+				err = errno;
+				goto stderr_out;
+			}
 			err = zfs_hold_range(zhp, fromsnap, tosnap,
 			    holdtag, flags.replicate, B_TRUE, filter_func,
-			    cb_arg);
+			    cb_arg, cleanup_fd);
 			if (err)
 				goto err_out;
 		}
@@ -1285,13 +1291,8 @@
 
 			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
 			    fromsnap, tosnap, flags.replicate, &fss, &fsavl);
-			if (err) {
-				if (holdsnaps) {
-					(void) zfs_release_range(zhp, fromsnap,
-					    tosnap, holdtag, flags.replicate);
-				}
+			if (err)
 				goto err_out;
-			}
 			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
 			err = nvlist_pack(hdrnv, &packbuf, &buflen,
 			    NV_ENCODE_XDR, 0);
@@ -1302,10 +1303,6 @@
 			if (err) {
 				fsavl_destroy(fsavl);
 				nvlist_free(fss);
-				if (holdsnaps) {
-					(void) zfs_release_range(zhp, fromsnap,
-					    tosnap, holdtag, flags.replicate);
-				}
 				goto stderr_out;
 			}
 		}
@@ -1331,10 +1328,6 @@
 		if (err == -1) {
 			fsavl_destroy(fsavl);
 			nvlist_free(fss);
-			if (holdsnaps) {
-				(void) zfs_release_range(zhp, fromsnap, tosnap,
-				    holdtag, flags.replicate);
-			}
 			err = errno;
 			goto stderr_out;
 		}
@@ -1349,10 +1342,6 @@
 				fsavl_destroy(fsavl);
 				nvlist_free(fss);
 				err = errno;
-				if (holdsnaps) {
-					(void) zfs_release_range(zhp, fromsnap,
-					    tosnap, holdtag, flags.replicate);
-				}
 				goto stderr_out;
 			}
 		}
@@ -1384,6 +1373,11 @@
 		(void) pthread_join(tid, NULL);
 	}
 
+	if (cleanup_fd != -1) {
+		VERIFY(0 == close(cleanup_fd));
+		cleanup_fd = -1;
+	}
+
 	if (flags.replicate || flags.doall || flags.props) {
 		/*
 		 * write final end record.  NB: want to do this even if
@@ -1392,10 +1386,6 @@
 		 */
 		dmu_replay_record_t drr = { 0 };
 		drr.drr_type = DRR_END;
-		if (holdsnaps) {
-			(void) zfs_release_range(zhp, fromsnap, tosnap,
-			    holdtag, flags.replicate);
-		}
 		if (write(outfd, &drr, sizeof (drr)) == -1) {
 			return (zfs_standard_error(zhp->zfs_hdl,
 			    errno, errbuf));
@@ -1407,6 +1397,8 @@
 stderr_out:
 	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
 err_out:
+	if (cleanup_fd != -1)
+		VERIFY(0 == close(cleanup_fd));
 	if (flags.dedup) {
 		(void) pthread_cancel(tid);
 		(void) pthread_join(tid, NULL);
@@ -1992,7 +1984,7 @@
 static int
 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
     recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
-    char **top_zfs)
+    char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
 {
 	nvlist_t *stream_nv = NULL;
 	avl_tree_t *stream_avl = NULL;
@@ -2158,7 +2150,8 @@
 		 * recv_skip() and return 0).
 		 */
 		error = zfs_receive_impl(hdl, destname, flags, fd,
-		    sendfs, stream_nv, stream_avl, top_zfs);
+		    sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
+		    action_handlep);
 		if (error == ENODATA) {
 			error = 0;
 			break;
@@ -2281,7 +2274,8 @@
 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
     recvflags_t flags, dmu_replay_record_t *drr,
     dmu_replay_record_t *drr_noswap, const char *sendfs,
-    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs)
+    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
+    uint64_t *action_handlep)
 {
 	zfs_cmd_t zc = { 0 };
 	time_t begin_time;
@@ -2609,6 +2603,8 @@
 
 	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
 	zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
+	zc.zc_cleanup_fd = cleanup_fd;
+	zc.zc_action_handle = *action_handlep;
 
 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
 	ioctl_errno = errno;
@@ -2796,6 +2792,8 @@
 	if (err || ioctl_err)
 		return (-1);
 
+	*action_handlep = zc.zc_action_handle;
+
 	if (flags.verbose) {
 		char buf1[64];
 		char buf2[64];
@@ -2816,7 +2814,7 @@
 static int
 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
     int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
-    char **top_zfs)
+    char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
 {
 	int err;
 	dmu_replay_record_t drr, drr_noswap;
@@ -2909,12 +2907,12 @@
 		}
 		return (zfs_receive_one(hdl, infd, tosnap, flags,
 		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
-		    top_zfs));
+		    top_zfs, cleanup_fd, action_handlep));
 	} else {
 		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 		    DMU_COMPOUNDSTREAM);
 		return (zfs_receive_package(hdl, infd, tosnap, flags,
-		    &drr, &zcksum, top_zfs));
+		    &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
 	}
 }
 
@@ -2930,9 +2928,16 @@
 {
 	char *top_zfs = NULL;
 	int err;
+	int cleanup_fd;
+	uint64_t action_handle = 0;
+
+	cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
+	VERIFY(cleanup_fd >= 0);
 
 	err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
-	    stream_avl, &top_zfs);
+	    stream_avl, &top_zfs, cleanup_fd, &action_handle);
+
+	VERIFY(0 == close(cleanup_fd));
 
 	if (err == 0 && !flags.nomount && top_zfs) {
 		zfs_handle_t *zhp;
--- a/usr/src/lib/libzpool/common/kernel.c	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/lib/libzpool/common/kernel.c	Tue Jun 01 17:04:42 2010 -0600
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <assert.h>
@@ -944,3 +943,25 @@
 
 	return (buf);
 }
+
+/* ARGSUSED */
+int
+zfs_onexit_add_cb(int fd, void (*func)(void *), void *data,
+    uint64_t *action_handle)
+{
+	return (0);
+}
+
+/* ARGSUSED */
+int
+zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire)
+{
+	return (0);
+}
+
+/* ARGSUSED */
+int
+zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data)
+{
+	return (0);
+}
--- a/usr/src/uts/common/Makefile.files	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/uts/common/Makefile.files	Tue Jun 01 17:04:42 2010 -0600
@@ -1404,6 +1404,7 @@
 	zfs_dir.o		\
 	zfs_ioctl.o		\
 	zfs_log.o		\
+	zfs_onexit.o		\
 	zfs_replay.o		\
 	zfs_rlock.o		\
 	rrwlock.o		\
--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Tue Jun 01 17:04:42 2010 -0600
@@ -42,6 +42,7 @@
 #include <zfs_fletcher.h>
 #include <sys/avl.h>
 #include <sys/ddt.h>
+#include <sys/zfs_onexit.h>
 
 static char *dmu_recv_tag = "dmu_recv_tag";
 
@@ -810,7 +811,7 @@
 	uint64_t voff;
 	int bufsize; /* amount of memory allocated for buf */
 	zio_cksum_t cksum;
-	avl_tree_t guid_to_ds_map;
+	avl_tree_t *guid_to_ds_map;
 };
 
 typedef struct guid_map_entry {
@@ -887,6 +888,21 @@
 	return (0);
 }
 
+static void
+free_guid_map_onexit(void *arg)
+{
+	avl_tree_t *ca = arg;
+	void *cookie = NULL;
+	guid_map_entry_t *gmep;
+
+	while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
+		dsl_dataset_rele(gmep->gme_ds, ca);
+		kmem_free(gmep, sizeof (guid_map_entry_t));
+	}
+	avl_destroy(ca);
+	kmem_free(ca, sizeof (avl_tree_t));
+}
+
 static void *
 restore_read(struct restorearg *ra, int len)
 {
@@ -1173,7 +1189,7 @@
 	 */
 	if (drrwbr->drr_toguid != drrwbr->drr_refguid) {
 		gmesrch.guid = drrwbr->drr_refguid;
-		if ((gmep = avl_find(&ra->guid_to_ds_map, &gmesrch,
+		if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch,
 		    &where)) == NULL) {
 			return (EINVAL);
 		}
@@ -1276,13 +1292,13 @@
  * NB: callers *must* call dmu_recv_end() if this succeeds.
  */
 int
-dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp)
+dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
+    int cleanup_fd, uint64_t *action_handlep)
 {
 	struct restorearg ra = { 0 };
 	dmu_replay_record_t *drr;
 	objset_t *os;
 	zio_cksum_t pcksum;
-	guid_map_entry_t *gmep;
 	int featureflags;
 
 	if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
@@ -1336,12 +1352,30 @@
 
 	/* if this stream is dedup'ed, set up the avl tree for guid mapping */
 	if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
-		avl_create(&ra.guid_to_ds_map, guid_compare,
-		    sizeof (guid_map_entry_t),
-		    offsetof(guid_map_entry_t, avlnode));
-		(void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid,
-		    (void *)&ra.guid_to_ds_map,
-		    DS_FIND_CHILDREN);
+		if (cleanup_fd == -1) {
+			ra.err = EBADF;
+			goto out;
+		}
+		if (*action_handlep == 0) {
+			ra.guid_to_ds_map =
+			    kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
+			avl_create(ra.guid_to_ds_map, guid_compare,
+			    sizeof (guid_map_entry_t),
+			    offsetof(guid_map_entry_t, avlnode));
+			(void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid,
+			    (void *)ra.guid_to_ds_map,
+			    DS_FIND_CHILDREN);
+			ra.err = zfs_onexit_add_cb(cleanup_fd,
+			    free_guid_map_onexit, ra.guid_to_ds_map,
+			    action_handlep);
+			if (ra.err)
+				goto out;
+		} else {
+			ra.err = zfs_onexit_cb_data(cleanup_fd, *action_handlep,
+			    (void **)&ra.guid_to_ds_map);
+			if (ra.err)
+				goto out;
+		}
 	}
 
 	/*
@@ -1438,16 +1472,6 @@
 		}
 	}
 
-	if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
-		void *cookie = NULL;
-
-		while (gmep = avl_destroy_nodes(&ra.guid_to_ds_map, &cookie)) {
-			dsl_dataset_rele(gmep->gme_ds, &ra.guid_to_ds_map);
-			kmem_free(gmep, sizeof (guid_map_entry_t));
-		}
-		avl_destroy(&ra.guid_to_ds_map);
-	}
-
 	kmem_free(ra.buf, ra.bufsize);
 	*voffp = ra.voff;
 	return (ra.err);
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Tue Jun 01 17:04:42 2010 -0600
@@ -37,6 +37,7 @@
 #include <sys/zfs_ioctl.h>
 #include <sys/spa.h>
 #include <sys/zfs_znode.h>
+#include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sys/dsl_deadlist.h>
@@ -3421,6 +3422,23 @@
 	char failed[MAXPATHLEN];
 };
 
+typedef struct zfs_hold_cleanup_arg {
+	char dsname[MAXNAMELEN];
+	char snapname[MAXNAMELEN];
+	char htag[MAXNAMELEN];
+	boolean_t recursive;
+} zfs_hold_cleanup_arg_t;
+
+static void
+dsl_dataset_user_release_onexit(void *arg)
+{
+	zfs_hold_cleanup_arg_t *ca = arg;
+
+	(void) dsl_dataset_user_release(ca->dsname, ca->snapname,
+	    ca->htag, ca->recursive);
+	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
+}
+
 /*
  * The max length of a temporary tag prefix is the number of hex digits
  * required to express UINT64_MAX plus one for the hyphen.
@@ -3525,7 +3543,7 @@
 
 int
 dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
-    boolean_t recursive, boolean_t temphold)
+    boolean_t recursive, boolean_t temphold, int cleanup_fd)
 {
 	struct dsl_ds_holdarg *ha;
 	dsl_sync_task_t *dst;
@@ -3547,6 +3565,7 @@
 	ha->snapname = snapname;
 	ha->recursive = recursive;
 	ha->temphold = temphold;
+
 	if (recursive) {
 		error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
 		    ha, DS_FIND_CHILDREN);
@@ -3574,6 +3593,24 @@
 		(void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
 
 	dsl_sync_task_group_destroy(ha->dstg);
+
+	/*
+	 * If this set of temporary holds is to be removed upon process exit,
+	 * register that action now.
+	 */
+	if (error == 0 && cleanup_fd != -1 && temphold) {
+		zfs_hold_cleanup_arg_t *ca;
+		uint64_t action_handle;
+
+		ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
+		(void) strlcpy(ca->dsname, dsname, sizeof (ca->dsname));
+		(void) strlcpy(ca->snapname, snapname, sizeof (ca->snapname));
+		(void) strlcpy(ca->htag, htag, sizeof (ca->htag));
+		ca->recursive = recursive;
+		ASSERT3U(0, ==, zfs_onexit_add_cb(cleanup_fd,
+		    dsl_dataset_user_release_onexit, ca, &action_handle));
+	}
+
 	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 	spa_close(spa, FTAG);
 	return (error);
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Tue Jun 01 17:04:42 2010 -0600
@@ -721,7 +721,8 @@
 
 int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
     boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
-int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
+int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
+    int cleanup_fd, uint64_t *action_handlep);
 int dmu_recv_end(dmu_recv_cookie_t *drc);
 
 /* CRC64 table */
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Tue Jun 01 17:04:42 2010 -0600
@@ -197,7 +197,7 @@
 int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
     boolean_t force);
 int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
-    boolean_t recursive, boolean_t temphold);
+    boolean_t recursive, boolean_t temphold, int cleanup_fd);
 int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
     boolean_t recursive);
 int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Tue Jun 01 17:04:42 2010 -0600
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_ZFS_IOCTL_H
@@ -265,6 +264,9 @@
 	zinject_record_t zc_inject_record;
 	boolean_t	zc_defer_destroy;
 	boolean_t	zc_temphold;
+	uint64_t	zc_action_handle;
+	int		zc_cleanup_fd;
+	uint8_t		zc_pad[4];
 } zfs_cmd_t;
 
 typedef struct zfs_useracct {
@@ -274,8 +276,8 @@
 	uint64_t zu_space;
 } zfs_useracct_t;
 
-#define	ZVOL_MAX_MINOR	(1 << 16)
-#define	ZFS_MIN_MINOR	(ZVOL_MAX_MINOR + 1)
+#define	ZFSDEV_MAX_MINOR	(1 << 16)
+#define	ZFS_MIN_MINOR	(ZFSDEV_MAX_MINOR + 1)
 
 #define	ZPOOL_EXPORT_AFTER_SPLIT 0x1
 
@@ -295,6 +297,28 @@
 extern int zfs_busy(void);
 extern int zfs_unmount_snap(const char *, void *);
 
+/*
+ * ZFS minor numbers can refer to either a control device instance or
+ * a zvol. Depending on the value of zss_type, zss_data points to either
+ * a zvol_state_t or a zfs_onexit_t.
+ */
+enum zfs_soft_state_type {
+	ZSST_ZVOL,
+	ZSST_CTLDEV
+};
+
+typedef struct zfs_soft_state {
+	enum zfs_soft_state_type zss_type;
+	void *zss_data;
+} zfs_soft_state_t;
+
+extern void *zfsdev_get_soft_state(minor_t minor,
+    enum zfs_soft_state_type which);
+extern minor_t zfsdev_minor_alloc(void);
+
+extern void *zfsdev_state;
+extern kmutex_t zfsdev_state_lock;
+
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_onexit.h	Tue Jun 01 17:04:42 2010 -0600
@@ -0,0 +1,62 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	_SYS_ZFS_ONEXIT_H
+#define	_SYS_ZFS_ONEXIT_H
+
+#include <sys/zfs_context.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+typedef struct zfs_onexit {
+	kmutex_t	zo_lock;
+	list_t		zo_actions;
+} zfs_onexit_t;
+
+typedef struct zfs_onexit_action_node {
+	list_node_t	za_link;
+	void		(*za_func)(void *);
+	void		*za_data;
+} zfs_onexit_action_node_t;
+
+extern void zfs_onexit_init(zfs_onexit_t **zo);
+extern void zfs_onexit_destroy(zfs_onexit_t *zo);
+
+#endif
+
+extern int zfs_onexit_add_cb(int fd, void (*func)(void *), void *data,
+    uint64_t *action_handle);
+extern int zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire);
+extern int zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_ZFS_ONEXIT_H */
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Tue Jun 01 17:04:42 2010 -0600
@@ -60,6 +60,7 @@
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_dir.h>
+#include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sharefs/share.h>
@@ -3342,11 +3343,14 @@
  * zc_cookie		file descriptor to recv from
  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
  * zc_guid		force flag
+ * zc_cleanup_fd	cleanup-on-exit file descriptor
+ * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
  *
  * outputs:
  * zc_cookie		number of bytes read
  * zc_nvlist_dst{_size} error for each unapplied received property
  * zc_obj		zprop_errflags_t
+ * zc_action_handle	handle for this guid/ds mapping
  */
 static int
 zfs_ioc_recv(zfs_cmd_t *zc)
@@ -3475,7 +3479,8 @@
 	}
 
 	off = fp->f_offset;
-	error = dmu_recv_stream(&drc, fp->f_vnode, &off);
+	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
+	    &zc->zc_action_handle);
 
 	if (error == 0) {
 		zfsvfs_t *zfsvfs = NULL;
@@ -4182,11 +4187,12 @@
 
 /*
  * inputs:
- * zc_name	name of filesystem
- * zc_value	short name of snap
- * zc_string	user-supplied tag for this reference
- * zc_cookie	recursive flag
- * zc_temphold	set if hold is temporary
+ * zc_name		name of filesystem
+ * zc_value		short name of snap
+ * zc_string		user-supplied tag for this hold
+ * zc_cookie		recursive flag
+ * zc_temphold		set if hold is temporary
+ * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
  *
  * outputs:		none
  */
@@ -4199,17 +4205,17 @@
 		return (EINVAL);
 
 	return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
-	    zc->zc_string, recursive, zc->zc_temphold));
+	    zc->zc_string, recursive, zc->zc_temphold, zc->zc_cleanup_fd));
 }
 
 /*
  * inputs:
- * zc_name	name of dataset from which we're releasing a user reference
+ * zc_name	name of dataset from which we're releasing a user hold
  * zc_value	short name of snap
- * zc_string	user-supplied tag for this reference
+ * zc_string	user-supplied tag for this hold
  * zc_cookie	recursive flag
  *
- * outputs:		none
+ * outputs:	none
  */
 static int
 zfs_ioc_release(zfs_cmd_t *zc)
@@ -4369,14 +4375,124 @@
 	return (error);
 }
 
+/*
+ * Find a free minor number.
+ */
+minor_t
+zfsdev_minor_alloc(void)
+{
+	static minor_t last_minor;
+	minor_t m;
+
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
+
+	for (m = last_minor + 1; m != last_minor; m++) {
+		if (m > ZFSDEV_MAX_MINOR)
+			m = 1;
+		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
+			last_minor = m;
+			return (m);
+		}
+	}
+
+	return (0);
+}
+
+static int
+zfs_ctldev_init(dev_t *devp)
+{
+	minor_t minor;
+	zfs_soft_state_t *zs;
+
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
+	ASSERT(getminor(*devp) == 0);
+
+	minor = zfsdev_minor_alloc();
+	if (minor == 0)
+		return (ENXIO);
+
+	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
+		return (EAGAIN);
+
+	*devp = makedevice(getemajor(*devp), minor);
+
+	zs = ddi_get_soft_state(zfsdev_state, minor);
+	zs->zss_type = ZSST_CTLDEV;
+	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
+
+	return (0);
+}
+
+static void
+zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
+{
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
+
+	zfs_onexit_destroy(zo);
+	ddi_soft_state_free(zfsdev_state, minor);
+}
+
+void *
+zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
+{
+	zfs_soft_state_t *zp;
+
+	zp = ddi_get_soft_state(zfsdev_state, minor);
+	if (zp == NULL || zp->zss_type != which)
+		return (NULL);
+
+	return (zp->zss_data);
+}
+
+static int
+zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
+{
+	int error = 0;
+
+	if (getminor(*devp) != 0)
+		return (zvol_open(devp, flag, otyp, cr));
+
+	/* This is the control device. Allocate a new minor if requested. */
+	if (flag & FEXCL) {
+		mutex_enter(&zfsdev_state_lock);
+		error = zfs_ctldev_init(devp);
+		mutex_exit(&zfsdev_state_lock);
+	}
+
+	return (error);
+}
+
+static int
+zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
+{
+	zfs_onexit_t *zo;
+	minor_t minor = getminor(dev);
+
+	if (minor == 0)
+		return (0);
+
+	mutex_enter(&zfsdev_state_lock);
+	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
+	if (zo == NULL) {
+		mutex_exit(&zfsdev_state_lock);
+		return (zvol_close(dev, flag, otyp, cr));
+	}
+	zfs_ctldev_destroy(zo, minor);
+	mutex_exit(&zfsdev_state_lock);
+
+	return (0);
+}
+
 static int
 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
 {
 	zfs_cmd_t *zc;
 	uint_t vec;
 	int error, rc;
-
-	if (getminor(dev) != 0)
+	minor_t minor = getminor(dev);
+
+	if (minor != 0 &&
+	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
 
 	vec = cmd - ZFS_IOC;
@@ -4499,8 +4615,8 @@
  * so most of the standard driver entry points are in zvol.c.
  */
 static struct cb_ops zfs_cb_ops = {
-	zvol_open,	/* open */
-	zvol_close,	/* close */
+	zfsdev_open,	/* open */
+	zfsdev_close,	/* close */
 	zvol_strategy,	/* strategy */
 	nodev,		/* print */
 	zvol_dump,	/* dump */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/zfs_onexit.c	Tue Jun 01 17:04:42 2010 -0600
@@ -0,0 +1,225 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/open.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/mkdev.h>
+#include <sys/zfs_onexit.h>
+#include <sys/zvol.h>
+
+/*
+ * ZFS kernel routines may add/delete callback routines to be invoked
+ * upon process exit (triggered via the close operation from the /dev/zfs
+ * driver).
+ *
+ * These cleanup callbacks are intended to allow for the accumulation
+ * of kernel state across multiple ioctls.  User processes participate
+ * by opening ZFS_DEV with O_EXCL. This causes the ZFS driver to do a
+ * clone-open, generating a unique minor number. The process then passes
+ * along that file descriptor to each ioctl that might have a cleanup operation.
+ *
+ * A simple example is zfs_ioc_recv(), where we might create an AVL tree
+ * with dataset/GUID mappings and then reuse that tree on subsequent
+ * zfs_ioc_recv() calls.
+ *
+ * On the first zfs_ioc_recv() call, dmu_recv_stream() will kmem_alloc()
+ * the AVL tree and pass it along with a callback function to
+ * zfs_onexit_add_cb(). The zfs_onexit_add_cb() routine will register the
+ * callback and return an action handle.
+ *
+ * The action handle is then passed from user space to subsequent
+ * zfs_ioc_recv() calls, so that dmu_recv_stream() can fetch its AVL tree
+ * by calling zfs_onexit_cb_data() with the cleanup fd and action handle.
+ *
+ * If the user process exits abnormally, the callback is invoked implicitly
+ * as part of the driver close operation.  Once the user space process is
+ * finished with the accumulated kernel state, it can also just call close(2)
+ * on the cleanup fd to trigger the cleanup callback.
+ */
+
+void
+zfs_onexit_init(zfs_onexit_t **zop)
+{
+	zfs_onexit_t *zo;
+
+	zo = *zop = kmem_zalloc(sizeof (zfs_onexit_t), KM_SLEEP);
+	mutex_init(&zo->zo_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&zo->zo_actions, sizeof (zfs_onexit_action_node_t),
+	    offsetof(zfs_onexit_action_node_t, za_link));
+}
+
+void
+zfs_onexit_destroy(zfs_onexit_t *zo)
+{
+	zfs_onexit_action_node_t *ap;
+
+	mutex_enter(&zo->zo_lock);
+	while ((ap = list_head(&zo->zo_actions)) != NULL) {
+		list_remove(&zo->zo_actions, ap);
+		mutex_exit(&zo->zo_lock);
+		ap->za_func(ap->za_data);
+		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
+		mutex_enter(&zo->zo_lock);
+	}
+	mutex_exit(&zo->zo_lock);
+
+	list_destroy(&zo->zo_actions);
+	mutex_destroy(&zo->zo_lock);
+	kmem_free(zo, sizeof (zfs_onexit_t));
+}
+
+static int
+zfs_onexit_fd_to_state(int fd, zfs_onexit_t **zo)
+{
+	file_t *fp;
+	dev_t rdev;
+
+	fp = getf(fd);
+	if (fp == NULL)
+		return (EBADF);
+
+	rdev = fp->f_vnode->v_rdev;
+	*zo = zfsdev_get_soft_state(getminor(rdev), ZSST_CTLDEV);
+	if (*zo == NULL) {
+		releasef(fd);
+		return (EBADF);
+	}
+
+	return (0);
+}
+
+/*
+ * Add a callback to be invoked when the calling process exits.
+ */
+int
+zfs_onexit_add_cb(int fd, void (*func)(void *), void *data,
+    uint64_t *action_handle)
+{
+	zfs_onexit_t *zo;
+	zfs_onexit_action_node_t *ap;
+	int error;
+
+	error = zfs_onexit_fd_to_state(fd, &zo);
+	if (error)
+		return (error);
+
+	ap = kmem_alloc(sizeof (zfs_onexit_action_node_t), KM_SLEEP);
+	list_link_init(&ap->za_link);
+	ap->za_func = func;
+	ap->za_data = data;
+
+	mutex_enter(&zo->zo_lock);
+	list_insert_tail(&zo->zo_actions, ap);
+	mutex_exit(&zo->zo_lock);
+	*action_handle = (uint64_t)(uintptr_t)ap;
+	releasef(fd);
+
+	return (0);
+}
+
+static zfs_onexit_action_node_t *
+zfs_onexit_find_cb(zfs_onexit_t *zo, uint64_t action_handle)
+{
+	zfs_onexit_action_node_t *match;
+	zfs_onexit_action_node_t *ap;
+	list_t *l;
+
+	ASSERT(MUTEX_HELD(&zo->zo_lock));
+
+	match = (zfs_onexit_action_node_t *)(uintptr_t)action_handle;
+	l = &zo->zo_actions;
+	for (ap = list_head(l); ap != NULL; ap = list_next(l, ap)) {
+		if (match == ap)
+			break;
+	}
+	return (ap);
+}
+
+/*
+ * Delete the callback, triggering it first if 'fire' is set.
+ */
+int
+zfs_onexit_del_cb(int fd, uint64_t action_handle, boolean_t fire)
+{
+	zfs_onexit_t *zo;
+	zfs_onexit_action_node_t *ap;
+	int error;
+
+	error = zfs_onexit_fd_to_state(fd, &zo);
+	if (error)
+		return (error);
+
+	mutex_enter(&zo->zo_lock);
+	ap = zfs_onexit_find_cb(zo, action_handle);
+	if (ap != NULL) {
+		list_remove(&zo->zo_actions, ap);
+		mutex_exit(&zo->zo_lock);
+		if (fire)
+			ap->za_func(ap->za_data);
+		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
+	} else {
+		mutex_exit(&zo->zo_lock);
+		error = ENOENT;
+	}
+	releasef(fd);
+
+	return (error);
+}
+
+/*
+ * Return the data associated with this callback.  This allows consumers
+ * of the cleanup-on-exit interfaces to stash kernel data across system
+ * calls, knowing that it will be cleaned up if the calling process exits.
+ */
+int
+zfs_onexit_cb_data(int fd, uint64_t action_handle, void **data)
+{
+	zfs_onexit_t *zo;
+	zfs_onexit_action_node_t *ap;
+	int error;
+
+	*data = NULL;
+
+	error = zfs_onexit_fd_to_state(fd, &zo);
+	if (error)
+		return (error);
+
+	mutex_enter(&zo->zo_lock);
+	ap = zfs_onexit_find_cb(zo, action_handle);
+	if (ap != NULL)
+		*data = ap->za_data;
+	else
+		error = ENOENT;
+	mutex_exit(&zo->zo_lock);
+	releasef(fd);
+
+	return (error);
+}
--- a/usr/src/uts/common/fs/zfs/zvol.c	Tue Jun 01 17:33:59 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/zvol.c	Tue Jun 01 17:04:42 2010 -0600
@@ -80,18 +80,18 @@
 
 #include "zfs_namecheck.h"
 
-static void *zvol_state;
+void *zfsdev_state;
 static char *zvol_tag = "zvol_tag";
 
 #define	ZVOL_DUMPSIZE		"dumpsize"
 
 /*
- * This lock protects the zvol_state structure from being modified
+ * This lock protects the zfsdev_state structure from being modified
  * while it's being used, e.g. an open that comes in before a create
  * finishes.  It also protects temporary opens of the dataset so that,
  * e.g., an open doesn't get a spurious EBUSY.
  */
-static kmutex_t zvol_state_lock;
+kmutex_t zfsdev_state_lock;
 static uint32_t zvol_minors;
 
 typedef struct zvol_extent {
@@ -205,33 +205,16 @@
 	return (error);
 }
 
-/*
- * Find a free minor number.
- */
-static minor_t
-zvol_minor_alloc(void)
-{
-	minor_t minor;
-
-	ASSERT(MUTEX_HELD(&zvol_state_lock));
-
-	for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++)
-		if (ddi_get_soft_state(zvol_state, minor) == NULL)
-			return (minor);
-
-	return (0);
-}
-
 static zvol_state_t *
 zvol_minor_lookup(const char *name)
 {
 	minor_t minor;
 	zvol_state_t *zv;
 
-	ASSERT(MUTEX_HELD(&zvol_state_lock));
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
-	for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) {
-		zv = ddi_get_soft_state(zvol_state, minor);
+	for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) {
+		zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
 		if (zv == NULL)
 			continue;
 		if (strcmp(zv->zv_name, name) == 0)
@@ -438,11 +421,11 @@
 {
 	zvol_state_t *zv;
 
-	mutex_enter(&zvol_state_lock);
+	mutex_enter(&zfsdev_state_lock);
 	zv = zvol_minor_lookup(name);
 	if (minor && zv)
 		*minor = zv->zv_minor;
-	mutex_exit(&zvol_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 	return (zv ? 0 : -1);
 }
 
@@ -452,6 +435,7 @@
 int
 zvol_create_minor(const char *name)
 {
+	zfs_soft_state_t *zs;
 	zvol_state_t *zv;
 	objset_t *os;
 	dmu_object_info_t doi;
@@ -459,10 +443,10 @@
 	char chrbuf[30], blkbuf[30];
 	int error;
 
-	mutex_enter(&zvol_state_lock);
+	mutex_enter(&zfsdev_state_lock);
 
 	if (zvol_minor_lookup(name) != NULL) {
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (EEXIST);
 	}
 
@@ -470,19 +454,19 @@
 	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
 
 	if (error) {
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (error);
 	}
 
-	if ((minor = zvol_minor_alloc()) == 0) {
+	if ((minor = zfsdev_minor_alloc()) == 0) {
 		dmu_objset_disown(os, zvol_tag);
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (ENXIO);
 	}
 
-	if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) {
+	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) {
 		dmu_objset_disown(os, zvol_tag);
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (EAGAIN);
 	}
 	(void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME,
@@ -492,9 +476,9 @@
 
 	if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
 	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
-		ddi_soft_state_free(zvol_state, minor);
+		ddi_soft_state_free(zfsdev_state, minor);
 		dmu_objset_disown(os, zvol_tag);
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (EAGAIN);
 	}
 
@@ -503,14 +487,15 @@
 	if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK,
 	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
 		ddi_remove_minor_node(zfs_dip, chrbuf);
-		ddi_soft_state_free(zvol_state, minor);
+		ddi_soft_state_free(zfsdev_state, minor);
 		dmu_objset_disown(os, zvol_tag);
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (EAGAIN);
 	}
 
-	zv = ddi_get_soft_state(zvol_state, minor);
-
+	zs = ddi_get_soft_state(zfsdev_state, minor);
+	zs->zss_type = ZSST_ZVOL;
+	zv = zs->zss_data = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
 	(void) strlcpy(zv->zv_name, name, MAXPATHLEN);
 	zv->zv_min_bs = DEV_BSHIFT;
 	zv->zv_minor = minor;
@@ -536,7 +521,7 @@
 
 	zvol_minors++;
 
-	mutex_exit(&zvol_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 
 	return (0);
 }
@@ -548,21 +533,24 @@
 zvol_remove_zv(zvol_state_t *zv)
 {
 	char nmbuf[20];
+	minor_t minor = zv->zv_minor;
 
-	ASSERT(MUTEX_HELD(&zvol_state_lock));
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 	if (zv->zv_total_opens != 0)
 		return (EBUSY);
 
-	(void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", zv->zv_minor);
+	(void) snprintf(nmbuf, sizeof (nmbuf), "%u,raw", minor);
 	ddi_remove_minor_node(zfs_dip, nmbuf);
 
-	(void) snprintf(nmbuf, sizeof (nmbuf), "%u", zv->zv_minor);
+	(void) snprintf(nmbuf, sizeof (nmbuf), "%u", minor);
 	ddi_remove_minor_node(zfs_dip, nmbuf);
 
 	avl_destroy(&zv->zv_znode.z_range_avl);
 	mutex_destroy(&zv->zv_znode.z_range_lock);
 
-	ddi_soft_state_free(zvol_state, zv->zv_minor);
+	kmem_free(zv, sizeof (zvol_state_t));
+
+	ddi_soft_state_free(zfsdev_state, minor);
 
 	zvol_minors--;
 	return (0);
@@ -574,13 +562,13 @@
 	zvol_state_t *zv;
 	int rc;
 
-	mutex_enter(&zvol_state_lock);
+	mutex_enter(&zfsdev_state_lock);
 	if ((zv = zvol_minor_lookup(name)) == NULL) {
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (ENXIO);
 	}
 	rc = zvol_remove_zv(zv);
-	mutex_exit(&zvol_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 	return (rc);
 }
 
@@ -680,7 +668,7 @@
 	dmu_tx_t *tx;
 	int error;
 
-	ASSERT(MUTEX_HELD(&zvol_state_lock));
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
@@ -710,10 +698,10 @@
 	namebuf = kmem_zalloc(strlen(name) + 2, KM_SLEEP);
 	(void) strncpy(namebuf, name, strlen(name));
 	(void) strcat(namebuf, "/");
-	mutex_enter(&zvol_state_lock);
-	for (minor = 1; minor <= ZVOL_MAX_MINOR; minor++) {
+	mutex_enter(&zfsdev_state_lock);
+	for (minor = 1; minor <= ZFSDEV_MAX_MINOR; minor++) {
 
-		zv = ddi_get_soft_state(zvol_state, minor);
+		zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
 		if (zv == NULL)
 			continue;
 		if (strncmp(namebuf, zv->zv_name, strlen(namebuf)) == 0)
@@ -721,7 +709,7 @@
 	}
 	kmem_free(namebuf, strlen(name) + 2);
 
-	mutex_exit(&zvol_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 }
 
 int
@@ -734,10 +722,10 @@
 	uint64_t old_volsize = 0ULL;
 	uint64_t readonly;
 
-	mutex_enter(&zvol_state_lock);
+	mutex_enter(&zfsdev_state_lock);
 	zv = zvol_minor_lookup(name);
 	if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) {
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (error);
 	}
 
@@ -800,7 +788,7 @@
 out:
 	dmu_objset_rele(os, FTAG);
 
-	mutex_exit(&zvol_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 
 	return (error);
 }
@@ -809,25 +797,21 @@
 int
 zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr)
 {
-	minor_t minor = getminor(*devp);
 	zvol_state_t *zv;
 	int err = 0;
 
-	if (minor == 0)			/* This is the control device */
-		return (0);
+	mutex_enter(&zfsdev_state_lock);
 
-	mutex_enter(&zvol_state_lock);
-
-	zv = ddi_get_soft_state(zvol_state, minor);
+	zv = zfsdev_get_soft_state(getminor(*devp), ZSST_ZVOL);
 	if (zv == NULL) {
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (ENXIO);
 	}
 
 	if (zv->zv_total_opens == 0)
 		err = zvol_first_open(zv);
 	if (err) {
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (err);
 	}
 	if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
@@ -850,13 +834,13 @@
 		zv->zv_open_count[otyp]++;
 		zv->zv_total_opens++;
 	}
-	mutex_exit(&zvol_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 
 	return (err);
 out:
 	if (zv->zv_total_opens == 0)
 		zvol_last_close(zv);
-	mutex_exit(&zvol_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 	return (err);
 }
 
@@ -868,14 +852,11 @@
 	zvol_state_t *zv;
 	int error = 0;
 
-	if (minor == 0)		/* This is the control device */
-		return (0);
+	mutex_enter(&zfsdev_state_lock);
 
-	mutex_enter(&zvol_state_lock);
-
-	zv = ddi_get_soft_state(zvol_state, minor);
+	zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
 	if (zv == NULL) {
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (ENXIO);
 	}
 
@@ -900,7 +881,7 @@
 	if (zv->zv_total_opens == 0)
 		zvol_last_close(zv);
 
-	mutex_exit(&zvol_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 	return (error);
 }
 
@@ -1138,7 +1119,8 @@
 int
 zvol_strategy(buf_t *bp)
 {
-	zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev));
+	zfs_soft_state_t *zs = NULL;
+	zvol_state_t *zv;
 	uint64_t off, volsize;
 	size_t resid;
 	char *addr;
@@ -1149,17 +1131,23 @@
 	boolean_t is_dump;
 	boolean_t sync;
 
-	if (zv == NULL) {
-		bioerror(bp, ENXIO);
+	if (getminor(bp->b_edev) == 0) {
+		error = EINVAL;
+	} else {
+		zs = ddi_get_soft_state(zfsdev_state, getminor(bp->b_edev));
+		if (zs == NULL)
+			error = ENXIO;
+		else if (zs->zss_type != ZSST_ZVOL)
+			error = EINVAL;
+	}
+
+	if (error) {
+		bioerror(bp, error);
 		biodone(bp);
 		return (0);
 	}
 
-	if (getminor(bp->b_edev) == 0) {
-		bioerror(bp, EINVAL);
-		biodone(bp);
-		return (0);
-	}
+	zv = zs->zss_data;
 
 	if (!(bp->b_flags & B_READ) && (zv->zv_flags & ZVOL_RDONLY)) {
 		bioerror(bp, EROFS);
@@ -1264,10 +1252,7 @@
 	uint64_t boff;
 	uint64_t resid;
 
-	if (minor == 0)			/* This is the control device */
-		return (ENXIO);
-
-	zv = ddi_get_soft_state(zvol_state, minor);
+	zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
 	if (zv == NULL)
 		return (ENXIO);
 
@@ -1299,10 +1284,7 @@
 	rl_t *rl;
 	int error = 0;
 
-	if (minor == 0)			/* This is the control device */
-		return (ENXIO);
-
-	zv = ddi_get_soft_state(zvol_state, minor);
+	zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
 	if (zv == NULL)
 		return (ENXIO);
 
@@ -1349,10 +1331,7 @@
 	int error = 0;
 	boolean_t sync;
 
-	if (minor == 0)			/* This is the control device */
-		return (ENXIO);
-
-	zv = ddi_get_soft_state(zvol_state, minor);
+	zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
 	if (zv == NULL)
 		return (ENXIO);
 
@@ -1471,9 +1450,8 @@
 {
 	zvol_state_t *zv;
 
-	if (minor == 0)
-		return (ENXIO);
-	if ((zv = ddi_get_soft_state(zvol_state, minor)) == NULL)
+	zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
+	if (zv == NULL)
 		return (ENXIO);
 	if (zv->zv_flags & ZVOL_DUMPIFIED)
 		return (ENXIO);
@@ -1544,12 +1522,12 @@
 	int error = 0;
 	rl_t *rl;
 
-	mutex_enter(&zvol_state_lock);
+	mutex_enter(&zfsdev_state_lock);
 
-	zv = ddi_get_soft_state(zvol_state, getminor(dev));
+	zv = zfsdev_get_soft_state(getminor(dev), ZSST_ZVOL);
 
 	if (zv == NULL) {
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		return (ENXIO);
 	}
 	ASSERT(zv->zv_total_opens > 0);
@@ -1563,7 +1541,7 @@
 		dki.dki_ctype = DKC_UNKNOWN;
 		dki.dki_unit = getminor(dev);
 		dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs);
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag))
 			error = EFAULT;
 		return (error);
@@ -1573,7 +1551,7 @@
 		dkm.dki_lbsize = 1U << zv->zv_min_bs;
 		dkm.dki_capacity = zv->zv_volsize >> zv->zv_min_bs;
 		dkm.dki_media_type = DK_UNKNOWN;
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		if (ddi_copyout(&dkm, (void *)arg, sizeof (dkm), flag))
 			error = EFAULT;
 		return (error);
@@ -1583,14 +1561,14 @@
 			uint64_t vs = zv->zv_volsize;
 			uint8_t bs = zv->zv_min_bs;
 
-			mutex_exit(&zvol_state_lock);
+			mutex_exit(&zfsdev_state_lock);
 			error = zvol_getefi((void *)arg, flag, vs, bs);
 			return (error);
 		}
 
 	case DKIOCFLUSHWRITECACHE:
 		dkc = (struct dk_callback *)arg;
-		mutex_exit(&zvol_state_lock);
+		mutex_exit(&zfsdev_state_lock);
 		zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ);
 		if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) {
 			(*dkc->dkc_callback)(dkc->dkc_cookie, error);
@@ -1616,10 +1594,10 @@
 			}
 			if (wce) {
 				zv->zv_flags |= ZVOL_WCE;
-				mutex_exit(&zvol_state_lock);
+				mutex_exit(&zfsdev_state_lock);
 			} else {
 				zv->zv_flags &= ~ZVOL_WCE;
-				mutex_exit(&zvol_state_lock);
+				mutex_exit(&zfsdev_state_lock);
 				zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ);
 			}
 			return (0);
@@ -1655,7 +1633,7 @@
 		break;
 
 	}
-	mutex_exit(&zvol_state_lock);
+	mutex_exit(&zfsdev_state_lock);
 	return (error);
 }
 
@@ -1668,15 +1646,16 @@
 void
 zvol_init(void)
 {
-	VERIFY(ddi_soft_state_init(&zvol_state, sizeof (zvol_state_t), 1) == 0);
-	mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
+	VERIFY(ddi_soft_state_init(&zfsdev_state, sizeof (zfs_soft_state_t),
+	    1) == 0);
+	mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
 }
 
 void
 zvol_fini(void)
 {
-	mutex_destroy(&zvol_state_lock);
-	ddi_soft_state_fini(&zvol_state);
+	mutex_destroy(&zfsdev_state_lock);
+	ddi_soft_state_fini(&zfsdev_state);
 }
 
 static int
@@ -1688,7 +1667,7 @@
 	nvlist_t *nv = NULL;
 	uint64_t version = spa_version(dmu_objset_spa(zv->zv_objset));
 
-	ASSERT(MUTEX_HELD(&zvol_state_lock));
+	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 	error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, 0,
 	    DMU_OBJECT_END);
 	/* wait for dmu_free_long_range to actually free the blocks */