6860996 %temporary clones are not automatically destroyed on error
authorMatthew Ahrens <Matthew.Ahrens@Sun.COM>
Wed, 12 Aug 2009 22:11:31 -0700
changeset 10298 a0d52501437c
parent 10297 614c29db4bc4
child 10299 80845694147f
6860996 %temporary clones are not automatically destroyed on error 6863088 zfs panic in dnode_hold_impl from readdir()
usr/src/cmd/mdb/common/modules/zfs/zfs.c
usr/src/cmd/zdb/zdb.c
usr/src/cmd/zinject/translate.c
usr/src/cmd/ztest/ztest.c
usr/src/lib/libzfs/common/libzfs_dataset.c
usr/src/uts/common/fs/zfs/dbuf.c
usr/src/uts/common/fs/zfs/dmu.c
usr/src/uts/common/fs/zfs/dmu_object.c
usr/src/uts/common/fs/zfs/dmu_objset.c
usr/src/uts/common/fs/zfs/dmu_send.c
usr/src/uts/common/fs/zfs/dmu_tx.c
usr/src/uts/common/fs/zfs/dnode.c
usr/src/uts/common/fs/zfs/dsl_dataset.c
usr/src/uts/common/fs/zfs/dsl_deleg.c
usr/src/uts/common/fs/zfs/dsl_dir.c
usr/src/uts/common/fs/zfs/dsl_pool.c
usr/src/uts/common/fs/zfs/dsl_prop.c
usr/src/uts/common/fs/zfs/spa.c
usr/src/uts/common/fs/zfs/sys/dbuf.h
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/sys/dmu_impl.h
usr/src/uts/common/fs/zfs/sys/dmu_objset.h
usr/src/uts/common/fs/zfs/sys/dnode.h
usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h
usr/src/uts/common/fs/zfs/zfs_ctldir.c
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/fs/zfs/zfs_vfsops.c
usr/src/uts/common/fs/zfs/zil.c
usr/src/uts/common/fs/zfs/zvol.c
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Wed Aug 12 22:11:31 2009 -0700
@@ -241,7 +241,7 @@
 objset_name(uintptr_t addr, char *buf)
 {
 	static int gotid;
-	static mdb_ctf_id_t osi_id, ds_id;
+	static mdb_ctf_id_t os_id, ds_id;
 	uintptr_t os_dsl_dataset;
 	char ds_snapname[MAXNAMELEN];
 	uintptr_t ds_dir;
@@ -249,9 +249,9 @@
 	buf[0] = '\0';
 
 	if (!gotid) {
-		if (mdb_ctf_lookup_by_name("struct objset_impl",
-		    &osi_id) == -1) {
-			mdb_warn("couldn't find struct objset_impl");
+		if (mdb_ctf_lookup_by_name("struct objset",
+		    &os_id) == -1) {
+			mdb_warn("couldn't find struct objset");
 			return (DCMD_ERR);
 		}
 		if (mdb_ctf_lookup_by_name("struct dsl_dataset",
@@ -263,7 +263,7 @@
 		gotid = TRUE;
 	}
 
-	if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset))
+	if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset))
 		return (DCMD_ERR);
 
 	if (os_dsl_dataset == 0) {
@@ -2158,7 +2158,7 @@
 	{ "dbuf", ":", "print dmu_buf_impl_t", dbuf },
 	{ "dbuf_stats", ":", "dbuf stats", dbuf_stats },
 	{ "dbufs",
-	    "\t[-O objset_impl_t*] [-n objset_name | \"mos\"] "
+	    "\t[-O objset_t*] [-n objset_name | \"mos\"] "
 	    "[-o object | \"mdn\"] \n"
 	    "\t[-l level] [-b blkid | \"bonus\"]",
 	    "find dmu_buf_impl_t's that match specified criteria", dbufs },
--- a/usr/src/cmd/zdb/zdb.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/cmd/zdb/zdb.c	Wed Aug 12 22:11:31 2009 -0700
@@ -724,11 +724,11 @@
 
 	(void) printf("Indirect blocks:\n");
 
-	SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
+	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
 	    dn->dn_object, dnp->dn_nlevels - 1, 0);
 	for (j = 0; j < dnp->dn_nblkptr; j++) {
 		czb.zb_blkid = j;
-		(void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
+		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
 		    &dnp->dn_blkptr[j], &czb);
 	}
 
@@ -1073,7 +1073,7 @@
 	}
 
 	if (object == 0) {
-		dn = os->os->os_meta_dnode;
+		dn = os->os_meta_dnode;
 	} else {
 		error = dmu_bonus_hold(os, object, FTAG, &db);
 		if (error)
@@ -1191,21 +1191,21 @@
 
 	if (dds.dds_type == DMU_OST_META) {
 		dds.dds_creation_txg = TXG_INITIAL;
-		usedobjs = os->os->os_rootbp->blk_fill;
-		refdbytes = os->os->os_spa->spa_dsl_pool->
+		usedobjs = os->os_rootbp->blk_fill;
+		refdbytes = os->os_spa->spa_dsl_pool->
 		    dp_mos_dir->dd_phys->dd_used_bytes;
 	} else {
 		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
 	}
 
-	ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill);
+	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
 
 	nicenum(refdbytes, numbuf);
 
 	if (verbosity >= 4) {
 		(void) sprintf(blkbuf + strlen(blkbuf), ", rootbp ");
 		(void) sprintf_blkptr(blkbuf + strlen(blkbuf),
-		    BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
+		    BP_SPRINTF_LEN - strlen(blkbuf), os->os_rootbp);
 	} else {
 		blkbuf[0] = '\0';
 	}
@@ -1227,7 +1227,7 @@
 	if (verbosity < 2)
 		return;
 
-	if (os->os->os_rootbp->blk_birth == 0)
+	if (os->os_rootbp->blk_birth == 0)
 		return;
 
 	if (zopt_objects != 0) {
@@ -1240,8 +1240,8 @@
 
 	dump_object(os, 0, verbosity, &print_header);
 	object_count = 0;
-	if (os->os->os_userused_dnode &&
-	    os->os->os_userused_dnode->dn_type != 0) {
+	if (os->os_userused_dnode &&
+	    os->os_userused_dnode->dn_type != 0) {
 		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
 		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
 	}
@@ -1398,14 +1398,13 @@
 	int error;
 	objset_t *os;
 
-	error = dmu_objset_open(dsname, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os);
+	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
 	if (error) {
 		(void) printf("Could not open %s\n", dsname);
 		return (0);
 	}
 	dump_dir(os);
-	dmu_objset_close(os);
+	dmu_objset_disown(os, FTAG);
 	fuid_table_destroy();
 	return (0);
 }
@@ -2474,8 +2473,8 @@
 
 	if (error == 0) {
 		if (strchr(argv[0], '/') != NULL) {
-			error = dmu_objset_open(argv[0], DMU_OST_ANY,
-			    DS_MODE_USER | DS_MODE_READONLY, &os);
+			error = dmu_objset_own(argv[0], DMU_OST_ANY,
+			    B_TRUE, FTAG, &os);
 		} else {
 			error = spa_open(argv[0], &spa, FTAG);
 		}
@@ -2499,7 +2498,7 @@
 
 	if (os != NULL) {
 		dump_dir(os);
-		dmu_objset_close(os);
+		dmu_objset_disown(os, FTAG);
 	} else {
 		dump_zpool(spa);
 		spa_close(spa, FTAG);
--- a/usr/src/cmd/zinject/translate.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/cmd/zinject/translate.c	Wed Aug 12 22:11:31 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -162,8 +162,8 @@
 	 */
 	sync();
 
-	if ((err = dmu_objset_open(dataset, DMU_OST_ZFS,
-	    DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+	err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
+	if (err != 0) {
 		(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
 		    dataset, strerror(err));
 		return (-1);
@@ -172,7 +172,7 @@
 	record->zi_objset = dmu_objset_id(os);
 	record->zi_object = statbuf->st_ino;
 
-	dmu_objset_close(os);
+	dmu_objset_disown(os, FTAG);
 
 	return (0);
 }
@@ -247,17 +247,17 @@
 	 * Get the dnode associated with object, so we can calculate the block
 	 * size.
 	 */
-	if ((err = dmu_objset_open(dataset, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
+	if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
+	    B_TRUE, FTAG, &os)) != 0) {
 		(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
 		    dataset, strerror(err));
 		goto out;
 	}
 
 	if (record->zi_object == 0) {
-		dn = os->os->os_meta_dnode;
+		dn = os->os_meta_dnode;
 	} else {
-		err = dnode_hold(os->os, record->zi_object, FTAG, &dn);
+		err = dnode_hold(os, record->zi_object, FTAG, &dn);
 		if (err != 0) {
 			(void) fprintf(stderr, "failed to hold dnode "
 			    "for object %llu\n",
@@ -306,11 +306,11 @@
 	ret = 0;
 out:
 	if (dn) {
-		if (dn != os->os->os_meta_dnode)
+		if (dn != os->os_meta_dnode)
 			dnode_rele(dn, FTAG);
 	}
 	if (os)
-		dmu_objset_close(os);
+		dmu_objset_disown(os, FTAG);
 
 	return (ret);
 }
--- a/usr/src/cmd/ztest/ztest.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/cmd/ztest/ztest.c	Wed Aug 12 22:11:31 2009 -0700
@@ -1372,8 +1372,7 @@
 	/*
 	 * Verify that the dataset contains a directory object.
 	 */
-	error = dmu_objset_open(name, DMU_OST_OTHER,
-	    DS_MODE_USER | DS_MODE_READONLY, &os);
+	error = dmu_objset_hold(name, FTAG, &os);
 	ASSERT3U(error, ==, 0);
 	error = dmu_object_info(os, ZTEST_DIROBJ, doi);
 	if (error != ENOENT) {
@@ -1382,16 +1381,15 @@
 		ASSERT3U(doi->doi_type, ==, DMU_OT_UINT64_OTHER);
 		ASSERT3S(doi->doi_physical_blks, >=, 0);
 	}
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 
 	/*
 	 * Destroy the dataset.
 	 */
 	error = dmu_objset_destroy(name, B_FALSE);
 	if (error) {
-		(void) dmu_objset_open(name, DMU_OST_OTHER,
-		    DS_MODE_USER | DS_MODE_READONLY, &os);
-		fatal(0, "dmu_objset_destroy(os=%p) = %d\n", &os, error);
+		(void) dmu_objset_hold(name, FTAG, &os);
+		fatal(0, "dmu_objset_destroy(os=%p) = %d\n", os, error);
 	}
 	return (0);
 }
@@ -1434,7 +1432,6 @@
 	int error;
 	objset_t *os, *os2;
 	char name[100];
-	int basemode, expected_error;
 	zilog_t *zilog;
 	uint64_t seq;
 	uint64_t objects;
@@ -1443,19 +1440,15 @@
 	(void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool,
 	    (u_longlong_t)za->za_instance);
 
-	basemode = DS_MODE_TYPE(za->za_instance);
-	if (basemode != DS_MODE_USER && basemode != DS_MODE_OWNER)
-		basemode = DS_MODE_USER;
-
 	/*
 	 * If this dataset exists from a previous run, process its replay log
 	 * half of the time.  If we don't replay it, then dmu_objset_destroy()
 	 * (invoked from ztest_destroy_cb() below) should just throw it away.
 	 */
 	if (ztest_random(2) == 0 &&
-	    dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) {
+	    dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) {
 		zil_replay(os, os, ztest_replay_vector);
-		dmu_objset_close(os);
+		dmu_objset_disown(os, FTAG);
 	}
 
 	/*
@@ -1469,7 +1462,7 @@
 	/*
 	 * Verify that the destroyed dataset is no longer in the namespace.
 	 */
-	error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
+	error = dmu_objset_hold(name, FTAG, &os);
 	if (error != ENOENT)
 		fatal(1, "dmu_objset_open(%s) found destroyed dataset %p",
 		    name, os);
@@ -1488,7 +1481,7 @@
 		fatal(0, "dmu_objset_create(%s) = %d", name, error);
 	}
 
-	error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
+	error = dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os);
 	if (error) {
 		fatal(0, "dmu_objset_open(%s) = %d", name, error);
 	}
@@ -1538,27 +1531,23 @@
 		fatal(0, "created existing dataset, error = %d", error);
 
 	/*
-	 * Verify that multiple dataset holds are allowed, but only when
-	 * the new access mode is compatible with the base mode.
+	 * Verify that we can hold an objset that is also owned.
 	 */
-	if (basemode == DS_MODE_OWNER) {
-		error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_USER,
-		    &os2);
-		if (error)
-			fatal(0, "dmu_objset_open('%s') = %d", name, error);
-		else
-			dmu_objset_close(os2);
-	}
-	error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os2);
-	expected_error = (basemode == DS_MODE_OWNER) ? EBUSY : 0;
-	if (error != expected_error)
-		fatal(0, "dmu_objset_open('%s') = %d, expected %d",
-		    name, error, expected_error);
-	if (error == 0)
-		dmu_objset_close(os2);
+	error = dmu_objset_hold(name, FTAG, &os2);
+	if (error)
+		fatal(0, "dmu_objset_open('%s') = %d", name, error);
+	dmu_objset_rele(os2, FTAG);
+
+	/*
+	 * Verify that we can not own an objset that is already owned.
+	 */
+	error = dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2);
+	if (error != EBUSY)
+		fatal(0, "dmu_objset_open('%s') = %d, expected EBUSY",
+		    name, error);
 
 	zil_close(zilog);
-	dmu_objset_close(os);
+	dmu_objset_disown(os, FTAG);
 
 	error = dmu_objset_destroy(name, B_FALSE);
 	if (error)
@@ -1670,13 +1659,12 @@
 		fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
 	}
 
-	error = dmu_objset_open(snap1name, DMU_OST_OTHER,
-	    DS_MODE_USER | DS_MODE_READONLY, &clone);
+	error = dmu_objset_hold(snap1name, FTAG, &clone);
 	if (error)
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
 
 	error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0);
-	dmu_objset_close(clone);
+	dmu_objset_rele(clone, FTAG);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_create");
@@ -1705,13 +1693,12 @@
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
 	}
 
-	error = dmu_objset_open(snap3name, DMU_OST_OTHER,
-	    DS_MODE_USER | DS_MODE_READONLY, &clone);
+	error = dmu_objset_hold(snap3name, FTAG, &clone);
 	if (error)
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
 
 	error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0);
-	dmu_objset_close(clone);
+	dmu_objset_rele(clone, FTAG);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_create");
@@ -1720,7 +1707,7 @@
 		fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
 	}
 
-	error = dsl_dataset_own(snap1name, DS_MODE_READONLY, FTAG, &ds);
+	error = dsl_dataset_own(snap1name, B_FALSE, FTAG, &ds);
 	if (error)
 		fatal(0, "dsl_dataset_own(%s) = %d", snap1name, error);
 	error = dsl_dataset_promote(clone2name);
@@ -3806,8 +3793,7 @@
 				fatal(0, "dmu_objset_create(%s) = %d",
 				    name, error);
 			}
-			error = dmu_objset_open(name, DMU_OST_OTHER,
-			    DS_MODE_USER, &za[d].za_os);
+			error = dmu_objset_hold(name, FTAG, &za[d].za_os);
 			if (error)
 				fatal(0, "dmu_objset_open('%s') = %d",
 				    name, error);
@@ -3827,7 +3813,7 @@
 		VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0);
 		if (t < zopt_datasets) {
 			zil_close(za[t].za_zilog);
-			dmu_objset_close(za[t].za_os);
+			dmu_objset_rele(za[t].za_os, FTAG);
 		}
 	}
 
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c	Wed Aug 12 22:11:31 2009 -0700
@@ -401,70 +401,8 @@
 static int
 make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc)
 {
-	char *logstr;
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-
-	/*
-	 * Preserve history log string.
-	 * any changes performed here will be
-	 * logged as an internal event.
-	 */
-	logstr = zhp->zfs_hdl->libzfs_log_str;
-	zhp->zfs_hdl->libzfs_log_str = NULL;
-
-top:
-	if (put_stats_zhdl(zhp, zc) != 0) {
-		zhp->zfs_hdl->libzfs_log_str = logstr;
+	if (put_stats_zhdl(zhp, zc) != 0)
 		return (-1);
-	}
-
-
-	if (zhp->zfs_dmustats.dds_inconsistent) {
-		zfs_cmd_t zc2 = { 0 };
-
-		/*
-		 * If it is dds_inconsistent, then we've caught it in
-		 * the middle of a 'zfs receive' or 'zfs destroy', and
-		 * it is inconsistent from the ZPL's point of view, so
-		 * can't be mounted.  However, it could also be that we
-		 * have crashed in the middle of one of those
-		 * operations, in which case we need to get rid of the
-		 * inconsistent state.  We do that by either rolling
-		 * back to the previous snapshot (which will fail if
-		 * there is none), or destroying the filesystem.  Note
-		 * that if we are still in the middle of an active
-		 * 'receive' or 'destroy', then the rollback and destroy
-		 * will fail with EBUSY and we will drive on as usual.
-		 */
-
-		(void) strlcpy(zc2.zc_name, zhp->zfs_name,
-		    sizeof (zc2.zc_name));
-
-		if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
-			(void) zvol_remove_link(hdl, zhp->zfs_name);
-			zc2.zc_objset_type = DMU_OST_ZVOL;
-		} else {
-			zc2.zc_objset_type = DMU_OST_ZFS;
-		}
-
-		/*
-		 * If we can successfully destroy it, pretend that it
-		 * never existed.
-		 */
-		if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc2) == 0) {
-			zhp->zfs_hdl->libzfs_log_str = logstr;
-			errno = ENOENT;
-			return (-1);
-		}
-		/* If we can successfully roll it back, reset the stats */
-		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc2) == 0) {
-			if (get_stats_ioctl(zhp, zc) != 0) {
-				zhp->zfs_hdl->libzfs_log_str = logstr;
-				return (-1);
-			}
-			goto top;
-		}
-	}
 
 	/*
 	 * We've managed to open the dataset and gather statistics.  Determine
@@ -486,7 +424,6 @@
 	else
 		abort();	/* we should never see any other types */
 
-	zhp->zfs_hdl->libzfs_log_str = logstr;
 	zhp->zpool_hdl = zpool_handle(zhp);
 	return (0);
 }
--- a/usr/src/uts/common/fs/zfs/dbuf.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dbuf.c	Wed Aug 12 22:11:31 2009 -0700
@@ -109,7 +109,7 @@
 dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid)
 {
 	dbuf_hash_table_t *h = &dbuf_hash_table;
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	uint64_t obj = dn->dn_object;
 	uint64_t hv = DBUF_HASH(os, obj, level, blkid);
 	uint64_t idx = hv & h->hash_table_mask;
@@ -140,7 +140,7 @@
 dbuf_hash_insert(dmu_buf_impl_t *db)
 {
 	dbuf_hash_table_t *h = &dbuf_hash_table;
-	objset_impl_t *os = db->db_objset;
+	objset_t *os = db->db_objset;
 	uint64_t obj = db->db.db_object;
 	int level = db->db_level;
 	uint64_t blkid = db->db_blkid;
@@ -894,7 +894,7 @@
 dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 {
 	dnode_t *dn = db->db_dnode;
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	dbuf_dirty_record_t **drp, *dr;
 	int drop_struct_lock = FALSE;
 	boolean_t do_free_accounting = B_FALSE;
@@ -1488,7 +1488,7 @@
 dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
     dmu_buf_impl_t *parent, blkptr_t *blkptr)
 {
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	dmu_buf_impl_t *db, *odb;
 
 	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
@@ -2011,7 +2011,7 @@
 	arc_buf_t **datap = &dr->dt.dl.dr_data;
 	dmu_buf_impl_t *db = dr->dr_dbuf;
 	dnode_t *dn = db->db_dnode;
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	uint64_t txg = tx->tx_txg;
 
 	ASSERT(dmu_tx_is_syncing(tx));
@@ -2182,7 +2182,7 @@
 {
 	dmu_buf_impl_t *db = dr->dr_dbuf;
 	dnode_t *dn = db->db_dnode;
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	dmu_buf_impl_t *parent = db->db_parent;
 	uint64_t txg = tx->tx_txg;
 	zbookmark_t zb;
@@ -2279,7 +2279,7 @@
 {
 	dmu_buf_impl_t *db = vdb;
 	dnode_t *dn = db->db_dnode;
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	blkptr_t *bp = zio->io_bp;
 	blkptr_t *bp_orig = &zio->io_bp_orig;
 	uint64_t fill = 0;
--- a/usr/src/uts/common/fs/zfs/dmu.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Wed Aug 12 22:11:31 2009 -0700
@@ -99,7 +99,7 @@
 	dmu_buf_impl_t *db;
 	int err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 	blkid = dbuf_whichblock(dn, offset);
@@ -150,7 +150,7 @@
 	dmu_buf_impl_t *db;
 	int error;
 
-	error = dnode_hold(os->os, object, FTAG, &dn);
+	error = dnode_hold(os, object, FTAG, &dn);
 	if (error)
 		return (error);
 
@@ -282,7 +282,7 @@
 	dnode_t *dn;
 	int err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 
@@ -335,7 +335,7 @@
 		return;
 
 	if (len == 0) {  /* they're interested in the bonus buffer */
-		dn = os->os->os_meta_dnode;
+		dn = os->os_meta_dnode;
 
 		if (object == 0 || object >= DN_MAX_OBJECT)
 			return;
@@ -352,7 +352,7 @@
 	 * already cached, we will do a *synchronous* read in the
 	 * dnode_hold() call.  The same is true for any indirects.
 	 */
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err != 0)
 		return;
 
@@ -484,7 +484,7 @@
 	dnode_t *dn;
 	int err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err != 0)
 		return (err);
 	err = dmu_free_long_range_impl(os, dn, offset, length, FALSE);
@@ -499,7 +499,7 @@
 	dmu_tx_t *tx;
 	int err;
 
-	err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
 	    FTAG, &dn);
 	if (err != 0)
 		return (err);
@@ -527,7 +527,7 @@
     uint64_t size, dmu_tx_t *tx)
 {
 	dnode_t *dn;
-	int err = dnode_hold(os->os, object, FTAG, &dn);
+	int err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 	ASSERT(offset < UINT64_MAX);
@@ -545,7 +545,7 @@
 	dmu_buf_t **dbp;
 	int numbufs, err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 
@@ -852,8 +852,7 @@
 		dbuf_rele(db, FTAG);
 	} else {
 		dbuf_rele(db, FTAG);
-		ASSERT(dn->dn_objset->os.os == dn->dn_objset);
-		dmu_write(&dn->dn_objset->os, dn->dn_object, offset, blksz,
+		dmu_write(dn->dn_objset, dn->dn_object, offset, blksz,
 		    buf->b_data, tx);
 		dmu_return_arcbuf(buf);
 	}
@@ -930,7 +929,7 @@
     blkptr_t *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
-	objset_impl_t *os = db->db_objset;
+	objset_t *os = db->db_objset;
 	dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
 	tx_state_t *tx = &dp->dp_tx;
 	dbuf_dirty_record_t *dr;
@@ -1078,7 +1077,7 @@
 	dnode_t *dn;
 	int err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 	err = dnode_set_blksz(dn, size, ibs, tx);
@@ -1093,7 +1092,7 @@
 	dnode_t *dn;
 
 	/* XXX assumes dnode_hold will not get an i/o error */
-	(void) dnode_hold(os->os, object, FTAG, &dn);
+	(void) dnode_hold(os, object, FTAG, &dn);
 	ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS);
 	dn->dn_checksum = checksum;
 	dnode_setdirty(dn, tx);
@@ -1107,7 +1106,7 @@
 	dnode_t *dn;
 
 	/* XXX assumes dnode_hold will not get an i/o error */
-	(void) dnode_hold(os->os, object, FTAG, &dn);
+	(void) dnode_hold(os, object, FTAG, &dn);
 	ASSERT(compress < ZIO_COMPRESS_FUNCTIONS);
 	dn->dn_compress = compress;
 	dnode_setdirty(dn, tx);
@@ -1120,7 +1119,7 @@
 	dnode_t *dn;
 	int i, err;
 
-	err = dnode_hold(os->os, object, FTAG, &dn);
+	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
 		return (err);
 	/*
@@ -1134,7 +1133,7 @@
 	if (i != TXG_SIZE) {
 		dnode_rele(dn, FTAG);
 		txg_wait_synced(dmu_objset_pool(os), 0);
-		err = dnode_hold(os->os, object, FTAG, &dn);
+		err = dnode_hold(os, object, FTAG, &dn);
 		if (err)
 			return (err);
 	}
@@ -1176,7 +1175,7 @@
 dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi)
 {
 	dnode_t *dn;
-	int err = dnode_hold(os->os, object, FTAG, &dn);
+	int err = dnode_hold(os, object, FTAG, &dn);
 
 	if (err)
 		return (err);
--- a/usr/src/uts/common/fs/zfs/dmu_object.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_object.c	Wed Aug 12 22:11:31 2009 -0700
@@ -32,16 +32,15 @@
 dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	objset_impl_t *osi = os->os;
 	uint64_t object;
 	uint64_t L2_dnode_count = DNODES_PER_BLOCK <<
-	    (osi->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT);
+	    (os->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT);
 	dnode_t *dn = NULL;
 	int restarted = B_FALSE;
 
-	mutex_enter(&osi->os_obj_lock);
+	mutex_enter(&os->os_obj_lock);
 	for (;;) {
-		object = osi->os_obj_next;
+		object = os->os_obj_next;
 		/*
 		 * Each time we polish off an L2 bp worth of dnodes
 		 * (2^13 objects), move to another L2 bp that's still
@@ -51,14 +50,14 @@
 		 */
 		if (P2PHASE(object, L2_dnode_count) == 0) {
 			uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
-			int error = dnode_next_offset(osi->os_meta_dnode,
+			int error = dnode_next_offset(os->os_meta_dnode,
 			    DNODE_FIND_HOLE,
 			    &offset, 2, DNODES_PER_BLOCK >> 2, 0);
 			restarted = B_TRUE;
 			if (error == 0)
 				object = offset >> DNODE_SHIFT;
 		}
-		osi->os_obj_next = ++object;
+		os->os_obj_next = ++object;
 
 		/*
 		 * XXX We should check for an i/o error here and return
@@ -66,19 +65,19 @@
 		 * dmu_tx_assign(), but there is currently no mechanism
 		 * to do so.
 		 */
-		(void) dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE,
+		(void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
 		    FTAG, &dn);
 		if (dn)
 			break;
 
 		if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
-			osi->os_obj_next = object - 1;
+			os->os_obj_next = object - 1;
 	}
 
 	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
 	dnode_rele(dn, FTAG);
 
-	mutex_exit(&osi->os_obj_lock);
+	mutex_exit(&os->os_obj_lock);
 
 	dmu_tx_add_new_object(tx, os, object);
 	return (object);
@@ -94,7 +93,7 @@
 	if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
 		return (EBADF);
 
-	err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
 	if (err)
 		return (err);
 	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
@@ -116,7 +115,7 @@
 	if (object == DMU_META_DNODE_OBJECT)
 		return (EBADF);
 
-	err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
 	    FTAG, &dn);
 	if (err)
 		return (err);
@@ -166,7 +165,7 @@
 
 	ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
 
-	err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
 	    FTAG, &dn);
 	if (err)
 		return (err);
@@ -185,7 +184,7 @@
 	uint64_t offset = (*objectp + 1) << DNODE_SHIFT;
 	int error;
 
-	error = dnode_next_offset(os->os->os_meta_dnode,
+	error = dnode_next_offset(os->os_meta_dnode,
 	    (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);
 
 	*objectp = offset >> DNODE_SHIFT;
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c	Wed Aug 12 22:11:31 2009 -0700
@@ -45,13 +45,13 @@
 spa_t *
 dmu_objset_spa(objset_t *os)
 {
-	return (os->os->os_spa);
+	return (os->os_spa);
 }
 
 zilog_t *
 dmu_objset_zil(objset_t *os)
 {
-	return (os->os->os_zil);
+	return (os->os_zil);
 }
 
 dsl_pool_t *
@@ -59,34 +59,34 @@
 {
 	dsl_dataset_t *ds;
 
-	if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir)
+	if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
 		return (ds->ds_dir->dd_pool);
 	else
-		return (spa_get_dsl(os->os->os_spa));
+		return (spa_get_dsl(os->os_spa));
 }
 
 dsl_dataset_t *
 dmu_objset_ds(objset_t *os)
 {
-	return (os->os->os_dsl_dataset);
+	return (os->os_dsl_dataset);
 }
 
 dmu_objset_type_t
 dmu_objset_type(objset_t *os)
 {
-	return (os->os->os_phys->os_type);
+	return (os->os_phys->os_type);
 }
 
 void
 dmu_objset_name(objset_t *os, char *buf)
 {
-	dsl_dataset_name(os->os->os_dsl_dataset, buf);
+	dsl_dataset_name(os->os_dsl_dataset, buf);
 }
 
 uint64_t
 dmu_objset_id(objset_t *os)
 {
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 
 	return (ds ? ds->ds_object : 0);
 }
@@ -94,47 +94,47 @@
 static void
 checksum_changed_cb(void *arg, uint64_t newval)
 {
-	objset_impl_t *osi = arg;
+	objset_t *os = arg;
 
 	/*
 	 * Inheritance should have been done by now.
 	 */
 	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
 
-	osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
+	os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
 }
 
 static void
 compression_changed_cb(void *arg, uint64_t newval)
 {
-	objset_impl_t *osi = arg;
+	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
 	 */
 	ASSERT(newval != ZIO_COMPRESS_INHERIT);
 
-	osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
+	os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
 }
 
 static void
 copies_changed_cb(void *arg, uint64_t newval)
 {
-	objset_impl_t *osi = arg;
+	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
 	 */
 	ASSERT(newval > 0);
-	ASSERT(newval <= spa_max_replication(osi->os_spa));
+	ASSERT(newval <= spa_max_replication(os->os_spa));
 
-	osi->os_copies = newval;
+	os->os_copies = newval;
 }
 
 static void
 primary_cache_changed_cb(void *arg, uint64_t newval)
 {
-	objset_impl_t *osi = arg;
+	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
@@ -142,13 +142,13 @@
 	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
 	    newval == ZFS_CACHE_METADATA);
 
-	osi->os_primary_cache = newval;
+	os->os_primary_cache = newval;
 }
 
 static void
 secondary_cache_changed_cb(void *arg, uint64_t newval)
 {
-	objset_impl_t *osi = arg;
+	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
@@ -156,7 +156,7 @@
 	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
 	    newval == ZFS_CACHE_METADATA);
 
-	osi->os_secondary_cache = newval;
+	os->os_secondary_cache = newval;
 }
 
 void
@@ -177,39 +177,38 @@
 
 int
 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
-    objset_impl_t **osip)
+    objset_t **osp)
 {
-	objset_impl_t *osi;
+	objset_t *os;
 	int i, err;
 
 	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
 
-	osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP);
-	osi->os.os = osi;
-	osi->os_dsl_dataset = ds;
-	osi->os_spa = spa;
-	osi->os_rootbp = bp;
-	if (!BP_IS_HOLE(osi->os_rootbp)) {
+	os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
+	os->os_dsl_dataset = ds;
+	os->os_spa = spa;
+	os->os_rootbp = bp;
+	if (!BP_IS_HOLE(os->os_rootbp)) {
 		uint32_t aflags = ARC_WAIT;
 		zbookmark_t zb;
 		zb.zb_objset = ds ? ds->ds_object : 0;
 		zb.zb_object = 0;
 		zb.zb_level = -1;
 		zb.zb_blkid = 0;
-		if (DMU_OS_IS_L2CACHEABLE(osi))
+		if (DMU_OS_IS_L2CACHEABLE(os))
 			aflags |= ARC_L2CACHE;
 
-		dprintf_bp(osi->os_rootbp, "reading %s", "");
+		dprintf_bp(os->os_rootbp, "reading %s", "");
 		/*
 		 * NB: when bprewrite scrub can change the bp,
 		 * and this is called from dmu_objset_open_ds_os, the bp
 		 * could change, and we'll need a lock.
 		 */
-		err = arc_read_nolock(NULL, spa, osi->os_rootbp,
-		    arc_getbuf_func, &osi->os_phys_buf,
+		err = arc_read_nolock(NULL, spa, os->os_rootbp,
+		    arc_getbuf_func, &os->os_phys_buf,
 		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
 		if (err) {
-			kmem_free(osi, sizeof (objset_impl_t));
+			kmem_free(os, sizeof (objset_t));
 			/* convert checksum errors into IO errors */
 			if (err == ECKSUM)
 				err = EIO;
@@ -218,27 +217,27 @@
 
 		/* Increase the blocksize if we are permitted. */
 		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
-		    arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) {
+		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
 			arc_buf_t *buf = arc_buf_alloc(spa,
-			    sizeof (objset_phys_t), &osi->os_phys_buf,
+			    sizeof (objset_phys_t), &os->os_phys_buf,
 			    ARC_BUFC_METADATA);
 			bzero(buf->b_data, sizeof (objset_phys_t));
-			bcopy(osi->os_phys_buf->b_data, buf->b_data,
-			    arc_buf_size(osi->os_phys_buf));
-			(void) arc_buf_remove_ref(osi->os_phys_buf,
-			    &osi->os_phys_buf);
-			osi->os_phys_buf = buf;
+			bcopy(os->os_phys_buf->b_data, buf->b_data,
+			    arc_buf_size(os->os_phys_buf));
+			(void) arc_buf_remove_ref(os->os_phys_buf,
+			    &os->os_phys_buf);
+			os->os_phys_buf = buf;
 		}
 
-		osi->os_phys = osi->os_phys_buf->b_data;
-		osi->os_flags = osi->os_phys->os_flags;
+		os->os_phys = os->os_phys_buf->b_data;
+		os->os_flags = os->os_phys->os_flags;
 	} else {
 		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
 		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
-		osi->os_phys_buf = arc_buf_alloc(spa, size,
-		    &osi->os_phys_buf, ARC_BUFC_METADATA);
-		osi->os_phys = osi->os_phys_buf->b_data;
-		bzero(osi->os_phys, size);
+		os->os_phys_buf = arc_buf_alloc(spa, size,
+		    &os->os_phys_buf, ARC_BUFC_METADATA);
+		os->os_phys = os->os_phys_buf->b_data;
+		bzero(os->os_phys, size);
 	}
 
 	/*
@@ -249,61 +248,61 @@
 	 */
 	if (ds) {
 		err = dsl_prop_register(ds, "primarycache",
-		    primary_cache_changed_cb, osi);
+		    primary_cache_changed_cb, os);
 		if (err == 0)
 			err = dsl_prop_register(ds, "secondarycache",
-			    secondary_cache_changed_cb, osi);
+			    secondary_cache_changed_cb, os);
 		if (!dsl_dataset_is_snapshot(ds)) {
 			if (err == 0)
 				err = dsl_prop_register(ds, "checksum",
-				    checksum_changed_cb, osi);
+				    checksum_changed_cb, os);
 			if (err == 0)
 				err = dsl_prop_register(ds, "compression",
-				    compression_changed_cb, osi);
+				    compression_changed_cb, os);
 			if (err == 0)
 				err = dsl_prop_register(ds, "copies",
-				    copies_changed_cb, osi);
+				    copies_changed_cb, os);
 		}
 		if (err) {
-			VERIFY(arc_buf_remove_ref(osi->os_phys_buf,
-			    &osi->os_phys_buf) == 1);
-			kmem_free(osi, sizeof (objset_impl_t));
+			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
+			    &os->os_phys_buf) == 1);
+			kmem_free(os, sizeof (objset_t));
 			return (err);
 		}
 	} else if (ds == NULL) {
 		/* It's the meta-objset. */
-		osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
-		osi->os_compress = ZIO_COMPRESS_LZJB;
-		osi->os_copies = spa_max_replication(spa);
-		osi->os_primary_cache = ZFS_CACHE_ALL;
-		osi->os_secondary_cache = ZFS_CACHE_ALL;
+		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
+		os->os_compress = ZIO_COMPRESS_LZJB;
+		os->os_copies = spa_max_replication(spa);
+		os->os_primary_cache = ZFS_CACHE_ALL;
+		os->os_secondary_cache = ZFS_CACHE_ALL;
 	}
 
-	osi->os_zil_header = osi->os_phys->os_zil_header;
-	osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header);
+	os->os_zil_header = os->os_phys->os_zil_header;
+	os->os_zil = zil_alloc(os, &os->os_zil_header);
 
 	for (i = 0; i < TXG_SIZE; i++) {
-		list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t),
+		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
 		    offsetof(dnode_t, dn_dirty_link[i]));
-		list_create(&osi->os_free_dnodes[i], sizeof (dnode_t),
+		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
 		    offsetof(dnode_t, dn_dirty_link[i]));
 	}
-	list_create(&osi->os_dnodes, sizeof (dnode_t),
+	list_create(&os->os_dnodes, sizeof (dnode_t),
 	    offsetof(dnode_t, dn_link));
-	list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
+	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
 	    offsetof(dmu_buf_impl_t, db_link));
 
-	mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
 
-	osi->os_meta_dnode = dnode_special_open(osi,
-	    &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);
-	if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) {
-		osi->os_userused_dnode = dnode_special_open(osi,
-		    &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT);
-		osi->os_groupused_dnode = dnode_special_open(osi,
-		    &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT);
+	os->os_meta_dnode = dnode_special_open(os,
+	    &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);
+	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
+		os->os_userused_dnode = dnode_special_open(os,
+		    &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT);
+		os->os_groupused_dnode = dnode_special_open(os,
+		    &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT);
 	}
 
 	/*
@@ -311,117 +310,95 @@
 	 * have ds_opening_lock
 	 */
 	if (ds) {
-		VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi,
-		    dmu_objset_evict));
+		mutex_enter(&ds->ds_lock);
+		ASSERT(ds->ds_objset == NULL);
+		ds->ds_objset = os;
+		mutex_exit(&ds->ds_lock);
 	}
 
-	*osip = osi;
-	return (0);
-}
-
-static int
-dmu_objset_open_ds_os(dsl_dataset_t *ds, objset_t *os, dmu_objset_type_t type)
-{
-	objset_impl_t *osi;
-
-	mutex_enter(&ds->ds_opening_lock);
-	osi = dsl_dataset_get_user_ptr(ds);
-	if (osi == NULL) {
-		int err;
-
-		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
-		    ds, &ds->ds_phys->ds_bp, &osi);
-		if (err) {
-			mutex_exit(&ds->ds_opening_lock);
-			return (err);
-		}
-	}
-	mutex_exit(&ds->ds_opening_lock);
-
-	os->os = osi;
-	os->os_mode = DS_MODE_NOHOLD;
-
-	if (type != DMU_OST_ANY && type != os->os->os_phys->os_type)
-		return (EINVAL);
+	*osp = os;
 	return (0);
 }
 
 int
-dmu_objset_open_ds(dsl_dataset_t *ds, dmu_objset_type_t type, objset_t **osp)
+dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
 {
-	objset_t *os;
+	int err = 0;
+
+	mutex_enter(&ds->ds_opening_lock);
+	*osp = ds->ds_objset;
+	if (*osp == NULL) {
+		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
+		    ds, &ds->ds_phys->ds_bp, osp);
+	}
+	mutex_exit(&ds->ds_opening_lock);
+	return (err);
+}
+
+/* called from zpl */
+int
+dmu_objset_hold(const char *name, void *tag, objset_t **osp)
+{
+	dsl_dataset_t *ds;
 	int err;
 
-	os = kmem_alloc(sizeof (objset_t), KM_SLEEP);
-	err = dmu_objset_open_ds_os(ds, os, type);
+	err = dsl_dataset_hold(name, tag, &ds);
 	if (err)
-		kmem_free(os, sizeof (objset_t));
-	else
-		*osp = os;
+		return (err);
+
+	err = dmu_objset_from_ds(ds, osp);
+	if (err)
+		dsl_dataset_rele(ds, tag);
+
 	return (err);
 }
 
 /* called from zpl */
 int
-dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
-    objset_t **osp)
+dmu_objset_own(const char *name, dmu_objset_type_t type,
+    boolean_t readonly, void *tag, objset_t **osp)
 {
-	objset_t *os;
 	dsl_dataset_t *ds;
 	int err;
 
-	ASSERT(DS_MODE_TYPE(mode) == DS_MODE_USER ||
-	    DS_MODE_TYPE(mode) == DS_MODE_OWNER);
+	err = dsl_dataset_own(name, B_FALSE, tag, &ds);
+	if (err)
+		return (err);
 
-	os = kmem_alloc(sizeof (objset_t), KM_SLEEP);
-	if (DS_MODE_TYPE(mode) == DS_MODE_USER)
-		err = dsl_dataset_hold(name, os, &ds);
-	else
-		err = dsl_dataset_own(name, mode, os, &ds);
+	err = dmu_objset_from_ds(ds, osp);
 	if (err) {
-		kmem_free(os, sizeof (objset_t));
-		return (err);
+		dsl_dataset_disown(ds, tag);
+	} else if ((type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) ||
+	    (!readonly && dsl_dataset_is_snapshot(ds))) {
+		dmu_objset_disown(*osp, tag);
+		return (EINVAL);
 	}
 
-	err = dmu_objset_open_ds_os(ds, os, type);
-	if (err) {
-		if (DS_MODE_TYPE(mode) == DS_MODE_USER)
-			dsl_dataset_rele(ds, os);
-		else
-			dsl_dataset_disown(ds, os);
-		kmem_free(os, sizeof (objset_t));
-	} else {
-		os->os_mode = mode;
-		*osp = os;
-	}
 	return (err);
 }
 
 void
-dmu_objset_close(objset_t *os)
+dmu_objset_rele(objset_t *os, void *tag)
 {
-	ASSERT(DS_MODE_TYPE(os->os_mode) == DS_MODE_USER ||
-	    DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER ||
-	    DS_MODE_TYPE(os->os_mode) == DS_MODE_NOHOLD);
+	dsl_dataset_rele(os->os_dsl_dataset, tag);
+}
 
-	if (DS_MODE_TYPE(os->os_mode) == DS_MODE_USER)
-		dsl_dataset_rele(os->os->os_dsl_dataset, os);
-	else if (DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER)
-		dsl_dataset_disown(os->os->os_dsl_dataset, os);
-	kmem_free(os, sizeof (objset_t));
+void
+dmu_objset_disown(objset_t *os, void *tag)
+{
+	dsl_dataset_disown(os->os_dsl_dataset, tag);
 }
 
 int
 dmu_objset_evict_dbufs(objset_t *os)
 {
-	objset_impl_t *osi = os->os;
 	dnode_t *dn;
 
-	mutex_enter(&osi->os_lock);
+	mutex_enter(&os->os_lock);
 
 	/* process the mdn last, since the other dnodes have holds on it */
-	list_remove(&osi->os_dnodes, osi->os_meta_dnode);
-	list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode);
+	list_remove(&os->os_dnodes, os->os_meta_dnode);
+	list_insert_tail(&os->os_dnodes, os->os_meta_dnode);
 
 	/*
 	 * Find the first dnode with holds.  We have to do this dance
@@ -429,93 +406,91 @@
 	 * hold.  If there are no holds then it has no dbufs so OK to
 	 * skip.
 	 */
-	for (dn = list_head(&osi->os_dnodes);
+	for (dn = list_head(&os->os_dnodes);
 	    dn && !dnode_add_ref(dn, FTAG);
-	    dn = list_next(&osi->os_dnodes, dn))
+	    dn = list_next(&os->os_dnodes, dn))
 		continue;
 
 	while (dn) {
 		dnode_t *next_dn = dn;
 
 		do {
-			next_dn = list_next(&osi->os_dnodes, next_dn);
+			next_dn = list_next(&os->os_dnodes, next_dn);
 		} while (next_dn && !dnode_add_ref(next_dn, FTAG));
 
-		mutex_exit(&osi->os_lock);
+		mutex_exit(&os->os_lock);
 		dnode_evict_dbufs(dn);
 		dnode_rele(dn, FTAG);
-		mutex_enter(&osi->os_lock);
+		mutex_enter(&os->os_lock);
 		dn = next_dn;
 	}
-	mutex_exit(&osi->os_lock);
-	return (list_head(&osi->os_dnodes) != osi->os_meta_dnode);
+	mutex_exit(&os->os_lock);
+	return (list_head(&os->os_dnodes) != os->os_meta_dnode);
 }
 
 void
-dmu_objset_evict(dsl_dataset_t *ds, void *arg)
+dmu_objset_evict(objset_t *os)
 {
-	objset_impl_t *osi = arg;
-	objset_t os;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	int i;
 
 	for (i = 0; i < TXG_SIZE; i++) {
-		ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL);
-		ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL);
+		ASSERT(list_head(&os->os_dirty_dnodes[i]) == NULL);
+		ASSERT(list_head(&os->os_free_dnodes[i]) == NULL);
 	}
 
 	if (ds) {
 		if (!dsl_dataset_is_snapshot(ds)) {
 			VERIFY(0 == dsl_prop_unregister(ds, "checksum",
-			    checksum_changed_cb, osi));
+			    checksum_changed_cb, os));
 			VERIFY(0 == dsl_prop_unregister(ds, "compression",
-			    compression_changed_cb, osi));
+			    compression_changed_cb, os));
 			VERIFY(0 == dsl_prop_unregister(ds, "copies",
-			    copies_changed_cb, osi));
+			    copies_changed_cb, os));
 		}
 		VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
-		    primary_cache_changed_cb, osi));
+		    primary_cache_changed_cb, os));
 		VERIFY(0 == dsl_prop_unregister(ds, "secondarycache",
-		    secondary_cache_changed_cb, osi));
+		    secondary_cache_changed_cb, os));
 	}
 
 	/*
 	 * We should need only a single pass over the dnode list, since
 	 * nothing can be added to the list at this point.
 	 */
-	os.os = osi;
-	(void) dmu_objset_evict_dbufs(&os);
+	(void) dmu_objset_evict_dbufs(os);
 
-	dnode_special_close(osi->os_meta_dnode);
-	if (osi->os_userused_dnode) {
-		dnode_special_close(osi->os_userused_dnode);
-		dnode_special_close(osi->os_groupused_dnode);
+	dnode_special_close(os->os_meta_dnode);
+	if (os->os_userused_dnode) {
+		dnode_special_close(os->os_userused_dnode);
+		dnode_special_close(os->os_groupused_dnode);
 	}
-	zil_free(osi->os_zil);
+	zil_free(os->os_zil);
 
-	ASSERT3P(list_head(&osi->os_dnodes), ==, NULL);
+	ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
 
-	VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1);
-	mutex_destroy(&osi->os_lock);
-	mutex_destroy(&osi->os_obj_lock);
-	mutex_destroy(&osi->os_user_ptr_lock);
-	kmem_free(osi, sizeof (objset_impl_t));
+	VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1);
+	mutex_destroy(&os->os_lock);
+	mutex_destroy(&os->os_obj_lock);
+	mutex_destroy(&os->os_user_ptr_lock);
+	kmem_free(os, sizeof (objset_t));
 }
 
 /* called from dsl for meta-objset */
-objset_impl_t *
+objset_t *
 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
     dmu_objset_type_t type, dmu_tx_t *tx)
 {
-	objset_impl_t *osi;
+	objset_t *os;
 	dnode_t *mdn;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	if (ds)
 		mutex_enter(&ds->ds_opening_lock);
-	VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &osi));
+	VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &os));
 	if (ds)
 		mutex_exit(&ds->ds_opening_lock);
-	mdn = osi->os_meta_dnode;
+	mdn = os->os_meta_dnode;
 
 	dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
 	    DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
@@ -550,15 +525,15 @@
 	ASSERT(type != DMU_OST_NONE);
 	ASSERT(type != DMU_OST_ANY);
 	ASSERT(type < DMU_OST_NUMTYPES);
-	osi->os_phys->os_type = type;
-	if (dmu_objset_userused_enabled(osi)) {
-		osi->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
-		osi->os_flags = osi->os_phys->os_flags;
+	os->os_phys->os_type = type;
+	if (dmu_objset_userused_enabled(os)) {
+		os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+		os->os_flags = os->os_phys->os_flags;
 	}
 
 	dsl_dataset_dirty(ds, tx);
 
-	return (osi);
+	return (os);
 }
 
 struct oscarg {
@@ -613,18 +588,18 @@
 	if (oa->clone_origin == NULL) {
 		dsl_dataset_t *ds;
 		blkptr_t *bp;
-		objset_impl_t *osi;
+		objset_t *os;
 
 		VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj,
 		    FTAG, &ds));
 		bp = dsl_dataset_get_blkptr(ds);
 		ASSERT(BP_IS_HOLE(bp));
 
-		osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds),
+		os = dmu_objset_create_impl(dsl_dataset_get_spa(ds),
 		    ds, bp, oa->type, tx);
 
 		if (oa->userfunc)
-			oa->userfunc(&osi->os, oa->userarg, cr, tx);
+			oa->userfunc(os, oa->userarg, cr, tx);
 		dsl_dataset_rele(ds, FTAG);
 	}
 
@@ -692,7 +667,7 @@
 int
 dmu_objset_destroy(const char *name, boolean_t defer)
 {
-	objset_t *os;
+	dsl_dataset_t *ds;
 	int error;
 
 	/*
@@ -702,15 +677,13 @@
 	 * structure.  Only the ZIL knows how to free them, so we have
 	 * to call into it here.
 	 */
-	error = dmu_objset_open(name, DMU_OST_ANY,
-	    DS_MODE_OWNER|DS_MODE_READONLY|DS_MODE_INCONSISTENT, &os);
+	error = dsl_dataset_own(name, B_TRUE, FTAG, &ds);
 	if (error == 0) {
-		dsl_dataset_t *ds = os->os->os_dsl_dataset;
-		zil_destroy(dmu_objset_zil(os), B_FALSE);
-
-		error = dsl_dataset_destroy(ds, os, defer);
+		objset_t *os;
+		if (dmu_objset_from_ds(ds, &os) == 0)
+			zil_destroy(dmu_objset_zil(os), B_FALSE);
+		error = dsl_dataset_destroy(ds, FTAG, defer);
 		/* dsl_dataset_destroy() closes the ds. */
-		kmem_free(os, sizeof (objset_t));
 	}
 
 	return (error);
@@ -732,7 +705,7 @@
 
 	/* The props have already been checked by zfs_check_userprops(). */
 
-	return (dsl_dataset_snapshot_check(os->os->os_dsl_dataset,
+	return (dsl_dataset_snapshot_check(os->os_dsl_dataset,
 	    sn->snapname, tx));
 }
 
@@ -740,7 +713,7 @@
 snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 {
 	objset_t *os = arg1;
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	struct snaparg *sn = arg2;
 
 	dsl_dataset_snapshot_sync(ds, sn->snapname, cr, tx);
@@ -767,13 +740,13 @@
 	    (err = zfs_secpolicy_snapshot_perms(name, CRED())))
 		return (err);
 
-	err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_USER, &os);
+	err = dmu_objset_hold(name, sn, &os);
 	if (err != 0)
 		return (err);
 
 	/* If the objset is in an inconsistent state, return busy */
-	if (os->os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
-		dmu_objset_close(os);
+	if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
+		dmu_objset_rele(os, sn);
 		return (EBUSY);
 	}
 
@@ -787,7 +760,7 @@
 		dsl_sync_task_create(sn->dstg, snapshot_check,
 		    snapshot_sync, os, sn, 3);
 	} else {
-		dmu_objset_close(os);
+		dmu_objset_rele(os, sn);
 	}
 
 	return (err);
@@ -827,11 +800,11 @@
 	for (dst = list_head(&sn.dstg->dstg_tasks); dst;
 	    dst = list_next(&sn.dstg->dstg_tasks, dst)) {
 		objset_t *os = dst->dst_arg1;
-		dsl_dataset_t *ds = os->os->os_dsl_dataset;
+		dsl_dataset_t *ds = os->os_dsl_dataset;
 		if (dst->dst_err)
 			dsl_dataset_name(ds, sn.failed);
 		zil_resume(dmu_objset_zil(os));
-		dmu_objset_close(os);
+		dmu_objset_rele(os, &sn);
 	}
 
 	if (err)
@@ -874,7 +847,7 @@
 {
 	blkptr_t *bp = zio->io_bp;
 	blkptr_t *bp_orig = &zio->io_bp_orig;
-	objset_impl_t *os = arg;
+	objset_t *os = arg;
 	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
 
 	ASSERT(bp == os->os_rootbp);
@@ -903,7 +876,7 @@
 
 /* called from dsl */
 void
-dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
+dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
 {
 	int txgoff;
 	zbookmark_t zb;
@@ -1008,7 +981,7 @@
 }
 
 boolean_t
-dmu_objset_userused_enabled(objset_impl_t *os)
+dmu_objset_userused_enabled(objset_t *os)
 {
 	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
 	    used_cbs[os->os_phys->os_type] &&
@@ -1016,7 +989,7 @@
 }
 
 void
-dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx)
+dmu_objset_do_userquota_callbacks(objset_t *os, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	list_t *list = &os->os_synced_dnodes;
@@ -1035,10 +1008,10 @@
 
 		/* Allocate the user/groupused objects if necessary. */
 		if (os->os_userused_dnode->dn_type == DMU_OT_NONE) {
-			VERIFY(0 == zap_create_claim(&os->os,
+			VERIFY(0 == zap_create_claim(os,
 			    DMU_USERUSED_OBJECT,
 			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
-			VERIFY(0 == zap_create_claim(&os->os,
+			VERIFY(0 == zap_create_claim(os,
 			    DMU_GROUPUSED_OBJECT,
 			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
 		}
@@ -1065,7 +1038,7 @@
 		    (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf,
 		    DN_MAX_BONUSLEN) == 0 &&
 		    DN_USED_BYTES(dn->dn_oldphys) == 0));
-		used_cbs[os->os_phys->os_type](&os->os, bonustype,
+		used_cbs[os->os_phys->os_type](os, bonustype,
 		    DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys),
 		    DN_USED_BYTES(dn->dn_oldphys),
 		    DN_USED_BYTES(dn->dn_phys), tx);
@@ -1086,7 +1059,7 @@
 boolean_t
 dmu_objset_userspace_present(objset_t *os)
 {
-	return (os->os->os_phys->os_flags &
+	return (os->os_phys->os_flags &
 	    OBJSET_FLAG_USERACCOUNTING_COMPLETE);
 }
 
@@ -1098,7 +1071,7 @@
 
 	if (dmu_objset_userspace_present(os))
 		return (0);
-	if (!dmu_objset_userused_enabled(os->os))
+	if (!dmu_objset_userused_enabled(os))
 		return (ENOTSUP);
 	if (dmu_objset_is_snapshot(os))
 		return (EINVAL);
@@ -1134,7 +1107,7 @@
 		dmu_tx_commit(tx);
 	}
 
-	os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+	os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
 	txg_wait_synced(dmu_objset_pool(os), 0);
 	return (0);
 }
@@ -1143,35 +1116,35 @@
 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp)
 {
-	dsl_dataset_space(os->os->os_dsl_dataset, refdbytesp, availbytesp,
+	dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp,
 	    usedobjsp, availobjsp);
 }
 
 uint64_t
 dmu_objset_fsid_guid(objset_t *os)
 {
-	return (dsl_dataset_fsid_guid(os->os->os_dsl_dataset));
+	return (dsl_dataset_fsid_guid(os->os_dsl_dataset));
 }
 
 void
 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
 {
-	stat->dds_type = os->os->os_phys->os_type;
-	if (os->os->os_dsl_dataset)
-		dsl_dataset_fast_stat(os->os->os_dsl_dataset, stat);
+	stat->dds_type = os->os_phys->os_type;
+	if (os->os_dsl_dataset)
+		dsl_dataset_fast_stat(os->os_dsl_dataset, stat);
 }
 
 void
 dmu_objset_stats(objset_t *os, nvlist_t *nv)
 {
-	ASSERT(os->os->os_dsl_dataset ||
-	    os->os->os_phys->os_type == DMU_OST_META);
+	ASSERT(os->os_dsl_dataset ||
+	    os->os_phys->os_type == DMU_OST_META);
 
-	if (os->os->os_dsl_dataset != NULL)
-		dsl_dataset_stats(os->os->os_dsl_dataset, nv);
+	if (os->os_dsl_dataset != NULL)
+		dsl_dataset_stats(os->os_dsl_dataset, nv);
 
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
-	    os->os->os_phys->os_type);
+	    os->os_phys->os_type);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
 	    dmu_objset_userspace_present(os));
 }
@@ -1179,8 +1152,8 @@
 int
 dmu_objset_is_snapshot(objset_t *os)
 {
-	if (os->os->os_dsl_dataset != NULL)
-		return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset));
+	if (os->os_dsl_dataset != NULL)
+		return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
 	else
 		return (B_FALSE);
 }
@@ -1189,7 +1162,7 @@
 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
     boolean_t *conflict)
 {
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	uint64_t ignored;
 
 	if (ds->ds_phys->ds_snapnames_zapobj == 0)
@@ -1204,7 +1177,7 @@
 dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
 {
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	zap_cursor_t cursor;
 	zap_attribute_t attr;
 
@@ -1241,12 +1214,12 @@
 dmu_dir_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp)
 {
-	dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir;
+	dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
 	zap_cursor_t cursor;
 	zap_attribute_t attr;
 
 	/* there is no next dir on a snapshot! */
-	if (os->os->os_dsl_dataset->ds_object !=
+	if (os->os_dsl_dataset->ds_object !=
 	    dd->dd_phys->dd_head_dataset_obj)
 		return (ENOENT);
 
@@ -1420,7 +1393,7 @@
 
 	if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) {
 		mutex_enter(&ds->ds_opening_lock);
-		if (!dsl_dataset_get_user_ptr(ds)) {
+		if (ds->ds_objset == NULL) {
 			uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
 			zbookmark_t zb;
 
@@ -1445,13 +1418,13 @@
 void
 dmu_objset_set_user(objset_t *os, void *user_ptr)
 {
-	ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock));
-	os->os->os_user_ptr = user_ptr;
+	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
+	os->os_user_ptr = user_ptr;
 }
 
 void *
 dmu_objset_get_user(objset_t *os)
 {
-	ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock));
-	return (os->os->os_user_ptr);
+	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
+	return (os->os_user_ptr);
 }
--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Wed Aug 12 22:11:31 2009 -0700
@@ -216,8 +216,8 @@
 dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
     vnode_t *vp, offset_t *off)
 {
-	dsl_dataset_t *ds = tosnap->os->os_dsl_dataset;
-	dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL;
+	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
+	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
 	dmu_replay_record_t *drr;
 	struct backuparg ba;
 	int err;
@@ -257,7 +257,7 @@
 	drr->drr_u.drr_begin.drr_version = DMU_BACKUP_STREAM_VERSION;
 	drr->drr_u.drr_begin.drr_creation_time =
 	    ds->ds_phys->ds_creation_time;
-	drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type;
+	drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type;
 	if (fromorigin)
 		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
 	drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
@@ -362,7 +362,7 @@
 	dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
 	    rbsa->origin, flags, cr, tx);
 	VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj,
-	    DS_MODE_INCONSISTENT, dmu_recv_tag, &rbsa->ds));
+	    B_TRUE, dmu_recv_tag, &rbsa->ds));
 
 	if (rbsa->origin == NULL) {
 		(void) dmu_objset_create_impl(dd->dd_pool->dp_spa,
@@ -431,8 +431,7 @@
 	/* create and open the temporary clone */
 	dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname,
 	    ohds->ds_prev, flags, cr, tx);
-	VERIFY(0 == dsl_dataset_own_obj(dp, dsobj,
-	    DS_MODE_INCONSISTENT, dmu_recv_tag, &cds));
+	VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds));
 
 	/*
 	 * If we actually created a non-clone, we need to create the
@@ -477,7 +476,7 @@
 
 	rbsa.tofs = tofs;
 	rbsa.tosnap = tosnap;
-	rbsa.origin = origin ? origin->os->os_dsl_dataset : NULL;
+	rbsa.origin = origin ? origin->os_dsl_dataset : NULL;
 	rbsa.fromguid = drrb->drr_fromguid;
 	rbsa.type = drrb->drr_type;
 	rbsa.tag = FTAG;
@@ -880,7 +879,7 @@
 	/*
 	 * Open the objset we are modifying.
 	 */
-	VERIFY(dmu_objset_open_ds(drc->drc_real_ds, DMU_OST_ANY, &os) == 0);
+	VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0);
 
 	ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
 
@@ -950,8 +949,6 @@
 	ASSERT(ra.err != 0);
 
 out:
-	dmu_objset_close(os);
-
 	if (ra.err != 0) {
 		/*
 		 * destroy what we created, so we don't leave it in the
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c	Wed Aug 12 22:11:31 2009 -0700
@@ -58,9 +58,9 @@
 dmu_tx_t *
 dmu_tx_create(objset_t *os)
 {
-	dmu_tx_t *tx = dmu_tx_create_dd(os->os->os_dsl_dataset->ds_dir);
+	dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir);
 	tx->tx_objset = os;
-	tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os->os_dsl_dataset);
+	tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset);
 	return (tx);
 }
 
@@ -98,7 +98,7 @@
 	int err;
 
 	if (object != DMU_NEW_OBJECT) {
-		err = dnode_hold(os->os, object, tx, &dn);
+		err = dnode_hold(os, object, tx, &dn);
 		if (err) {
 			tx->tx_err = err;
 			return (NULL);
@@ -376,7 +376,7 @@
 dmu_tx_count_dnode(dmu_tx_hold_t *txh)
 {
 	dnode_t *dn = txh->txh_dnode;
-	dnode_t *mdn = txh->txh_tx->tx_objset->os->os_meta_dnode;
+	dnode_t *mdn = txh->txh_tx->tx_objset->os_meta_dnode;
 	uint64_t space = mdn->dn_datablksz +
 	    ((mdn->dn_nlevels-1) << mdn->dn_indblkshift);
 
@@ -688,7 +688,7 @@
 		 * access the name in this fat-zap so that we'll check
 		 * for i/o errors to the leaf blocks, etc.
 		 */
-		err = zap_lookup(&dn->dn_objset->os, dn->dn_object, name,
+		err = zap_lookup(dn->dn_objset, dn->dn_object, name,
 		    8, 0, NULL);
 		if (err == EIO) {
 			tx->tx_err = err;
@@ -696,7 +696,7 @@
 		}
 	}
 
-	err = zap_count_write(&dn->dn_objset->os, dn->dn_object, name, add,
+	err = zap_count_write(dn->dn_objset, dn->dn_object, name, add,
 	    &txh->txh_space_towrite, &txh->txh_space_tooverwrite);
 
 	/*
@@ -771,7 +771,7 @@
 	dnode_t *dn = db->db_dnode;
 
 	ASSERT(tx->tx_txg != 0);
-	ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset->os);
+	ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset);
 	ASSERT3U(dn->dn_object, ==, db->db.db_object);
 
 	if (tx->tx_anyobj)
@@ -931,7 +931,7 @@
 	 * assume that we won't be able to free or overwrite anything.
 	 */
 	if (tx->tx_objset &&
-	    dsl_dataset_prev_snap_txg(tx->tx_objset->os->os_dsl_dataset) >
+	    dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) >
 	    tx->tx_lastsnap_txg) {
 		towrite += tooverwrite;
 		tooverwrite = tofree = 0;
--- a/usr/src/uts/common/fs/zfs/dnode.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dnode.c	Wed Aug 12 22:11:31 2009 -0700
@@ -272,7 +272,7 @@
 }
 
 static dnode_t *
-dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
+dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
     uint64_t object)
 {
 	dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
@@ -309,7 +309,7 @@
 static void
 dnode_destroy(dnode_t *dn)
 {
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 
 #ifdef ZFS_DEBUG
 	int i;
@@ -488,7 +488,7 @@
 }
 
 dnode_t *
-dnode_special_open(objset_impl_t *os, dnode_phys_t *dnp, uint64_t object)
+dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object)
 {
 	dnode_t *dn = dnode_create(os, dnp, NULL, object);
 	DNODE_VERIFY(dn);
@@ -535,7 +535,7 @@
  * succeeds even for free dnodes.
  */
 int
-dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
+dnode_hold_impl(objset_t *os, uint64_t object, int flag,
     void *tag, dnode_t **dnp)
 {
 	int epb, idx, err;
@@ -650,7 +650,7 @@
  * Return held dnode if the object is allocated, NULL if not.
  */
 int
-dnode_hold(objset_impl_t *os, uint64_t object, void *tag, dnode_t **dnp)
+dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
 {
 	return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
 }
@@ -689,7 +689,7 @@
 void
 dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
 {
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	uint64_t txg = tx->tx_txg;
 
 	if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
@@ -1248,7 +1248,7 @@
 void
 dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
 {
-	objset_impl_t *os = dn->dn_objset;
+	objset_t *os = dn->dn_objset;
 	dsl_dataset_t *ds = os->os_dsl_dataset;
 
 	if (space > 0)
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Wed Aug 12 22:11:31 2009 -0700
@@ -244,8 +244,8 @@
 
 	unique_remove(ds->ds_fsid_guid);
 
-	if (ds->ds_user_ptr != NULL)
-		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
+	if (ds->ds_objset != NULL)
+		dmu_objset_evict(ds->ds_objset);
 
 	if (ds->ds_prev) {
 		dsl_dataset_drop_ref(ds->ds_prev, ds);
@@ -545,17 +545,14 @@
 }
 
 int
-dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner,
-    dsl_dataset_t **dsp)
+dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok,
+    void *tag, dsl_dataset_t **dsp)
 {
-	int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp);
-
-	ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER);
-
+	int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
 	if (err)
 		return (err);
-	if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
-		dsl_dataset_rele(*dsp, owner);
+	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
+		dsl_dataset_rele(*dsp, tag);
 		*dsp = NULL;
 		return (EBUSY);
 	}
@@ -622,18 +619,14 @@
 }
 
 int
-dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp)
+dsl_dataset_own(const char *name, boolean_t inconsistentok,
+    void *tag, dsl_dataset_t **dsp)
 {
-	int err = dsl_dataset_hold(name, owner, dsp);
+	int err = dsl_dataset_hold(name, tag, dsp);
 	if (err)
 		return (err);
-	if ((*dsp)->ds_phys->ds_num_children > 0 &&
-	    !DS_MODE_IS_READONLY(flags)) {
-		dsl_dataset_rele(*dsp, owner);
-		return (EROFS);
-	}
-	if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
-		dsl_dataset_rele(*dsp, owner);
+	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
+		dsl_dataset_rele(*dsp, tag);
 		return (EBUSY);
 	}
 	return (0);
@@ -705,9 +698,9 @@
 }
 
 void
-dsl_dataset_disown(dsl_dataset_t *ds, void *owner)
+dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
 {
-	ASSERT((ds->ds_owner == owner && ds->ds_dbuf) ||
+	ASSERT((ds->ds_owner == tag && ds->ds_dbuf) ||
 	    (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
 
 	mutex_enter(&ds->ds_lock);
@@ -718,20 +711,20 @@
 	}
 	mutex_exit(&ds->ds_lock);
 	if (ds->ds_dbuf)
-		dsl_dataset_drop_ref(ds, owner);
+		dsl_dataset_drop_ref(ds, tag);
 	else
 		dsl_dataset_evict(ds->ds_dbuf, ds);
 }
 
 boolean_t
-dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner)
+dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag)
 {
 	boolean_t gotit = FALSE;
 
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_owner == NULL &&
 	    (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
-		ds->ds_owner = owner;
+		ds->ds_owner = tag;
 		if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
 			rw_exit(&ds->ds_rwlock);
 		gotit = TRUE;
@@ -865,16 +858,15 @@
 	char *dsname;
 
 	dsname = kmem_asprintf("%s@%s", name, da->snapname);
-	err = dsl_dataset_own(dsname, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
-	    da->dstg, &ds);
+	err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds);
 	strfree(dsname);
 	if (err == 0) {
 		struct dsl_ds_destroyarg *dsda;
 
 		dsl_dataset_make_exclusive(ds, da->dstg);
-		if (ds->ds_user_ptr) {
-			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-			ds->ds_user_ptr = NULL;
+		if (ds->ds_objset != NULL) {
+			dmu_objset_evict(ds->ds_objset);
+			ds->ds_objset = NULL;
 		}
 		dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP);
 		dsda->ds = ds;
@@ -958,13 +950,12 @@
 	int error;
 	objset_t *os;
 
-	error = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
+	error = dmu_objset_from_ds(ds, &os);
 	if (error)
 		return (error);
 
 	if (dmu_objset_type(os) == DMU_OST_ZVOL)
 		error = zvol_remove_minor(name);
-	dmu_objset_close(os);
 
 	return (error);
 }
@@ -1003,9 +994,7 @@
 			return (error);
 		}
 #endif
-		error = dsl_dataset_own(name,
-		    DS_MODE_READONLY | DS_MODE_INCONSISTENT,
-		    tag, &origin);
+		error = dsl_dataset_own(name, B_TRUE, tag, &origin);
 		kmem_free(name, namelen);
 		if (error)
 			return (error);
@@ -1036,9 +1025,9 @@
 		/* Destroying a snapshot is simpler */
 		dsl_dataset_make_exclusive(ds, tag);
 
-		if (ds->ds_user_ptr) {
-			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-			ds->ds_user_ptr = NULL;
+		if (ds->ds_objset != NULL) {
+			dmu_objset_evict(ds->ds_objset);
+			ds->ds_objset = NULL;
 		}
 		/* NOTE: defer is always B_FALSE for non-snapshots */
 		dsda.defer = defer;
@@ -1060,7 +1049,7 @@
 	if (err)
 		goto out;
 
-	err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
+	err = dmu_objset_from_ds(ds, &os);
 	if (err)
 		goto out;
 
@@ -1089,7 +1078,7 @@
 	 * context, the user space accounting should be zero.
 	 */
 	if (ds->ds_phys->ds_bp.blk_fill == 0 &&
-	    dmu_objset_userused_enabled(os->os)) {
+	    dmu_objset_userused_enabled(os)) {
 		uint64_t count;
 
 		ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
@@ -1098,7 +1087,6 @@
 		    count == 0);
 	}
 
-	dmu_objset_close(os);
 	if (err != ESRCH)
 		goto out;
 
@@ -1109,7 +1097,7 @@
 	if (err)
 		goto out;
 
-	if (ds->ds_user_ptr) {
+	if (ds->ds_objset) {
 		/*
 		 * We need to sync out all in-flight IO before we try
 		 * to evict (the dataset evict func is trying to clear
@@ -1122,9 +1110,9 @@
 	 * Blow away the dsl_dir + head dataset.
 	 */
 	dsl_dataset_make_exclusive(ds, tag);
-	if (ds->ds_user_ptr) {
-		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-		ds->ds_user_ptr = NULL;
+	if (ds->ds_objset) {
+		dmu_objset_evict(ds->ds_objset);
+		ds->ds_objset = NULL;
 	}
 
 	/*
@@ -1173,28 +1161,6 @@
 	return (err);
 }
 
-void *
-dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
-    void *p, dsl_dataset_evict_func_t func)
-{
-	void *old;
-
-	mutex_enter(&ds->ds_lock);
-	old = ds->ds_user_ptr;
-	if (old == NULL) {
-		ds->ds_user_ptr = p;
-		ds->ds_user_evict_func = func;
-	}
-	mutex_exit(&ds->ds_lock);
-	return (old);
-}
-
-void *
-dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
-{
-	return (ds->ds_user_ptr);
-}
-
 blkptr_t *
 dsl_dataset_get_blkptr(dsl_dataset_t *ds)
 {
@@ -1228,7 +1194,7 @@
 	if (ds == NULL) /* this is the meta-objset */
 		return;
 
-	ASSERT(ds->ds_user_ptr != NULL);
+	ASSERT(ds->ds_objset != NULL);
 
 	if (ds->ds_phys->ds_next_snap_obj != 0)
 		panic("dirtying snapshot!");
@@ -1820,12 +1786,12 @@
 		struct dsl_ds_destroyarg ndsda = {0};
 
 		ASSERT3P(origin, ==, dsda->rm_origin);
-		if (origin->ds_user_ptr) {
-			origin->ds_user_evict_func(origin, origin->ds_user_ptr);
-			origin->ds_user_ptr = NULL;
+		if (origin->ds_objset) {
+			dmu_objset_evict(origin->ds_objset);
+			origin->ds_objset = NULL;
 		}
 
-		dsl_dataset_rele(origin, ds);
+		dsl_dataset_rele(ds->ds_prev, ds);
 		ds->ds_prev = NULL;
 
 		ndsda.ds = origin;
@@ -2010,7 +1976,7 @@
 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
-	ASSERT(ds->ds_user_ptr != NULL);
+	ASSERT(ds->ds_objset != NULL);
 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
 
 	/*
@@ -2021,7 +1987,7 @@
 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
 
 	dsl_dir_dirty(ds->ds_dir, tx);
-	dmu_objset_sync(ds->ds_user_ptr, zio, tx);
+	dmu_objset_sync(ds->ds_objset, zio, tx);
 }
 
 void
@@ -2577,9 +2543,9 @@
 		dsl_dataset_t *ds = snap->ds;
 
 		/* unregister props as dsl_dir is changing */
-		if (ds->ds_user_ptr) {
-			ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-			ds->ds_user_ptr = NULL;
+		if (ds->ds_objset) {
+			dmu_objset_evict(ds->ds_objset);
+			ds->ds_objset = NULL;
 		}
 		/* move snap name entry */
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
@@ -2879,15 +2845,14 @@
 	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
 	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
 
-	if (csa->cds->ds_user_ptr != NULL) {
-		csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr);
-		csa->cds->ds_user_ptr = NULL;
+	if (csa->cds->ds_objset != NULL) {
+		dmu_objset_evict(csa->cds->ds_objset);
+		csa->cds->ds_objset = NULL;
 	}
 
-	if (csa->ohds->ds_user_ptr != NULL) {
-		csa->ohds->ds_user_evict_func(csa->ohds,
-		    csa->ohds->ds_user_ptr);
-		csa->ohds->ds_user_ptr = NULL;
+	if (csa->ohds->ds_objset != NULL) {
+		dmu_objset_evict(csa->ohds->ds_objset);
+		csa->ohds->ds_objset = NULL;
 	}
 
 	/*
@@ -3463,9 +3428,9 @@
 			 */
 			if (!ra->own)
 				return (EBUSY);
-			if (ds->ds_user_ptr) {
-				ds->ds_user_evict_func(ds, ds->ds_user_ptr);
-				ds->ds_user_ptr = NULL;
+			if (ds->ds_objset) {
+				dmu_objset_evict(ds->ds_objset);
+				ds->ds_objset = NULL;
 			}
 		}
 		dsda.ds = ds;
@@ -3557,8 +3522,7 @@
 			return (error);
 		}
 #endif
-		if (!dsl_dataset_tryown(ds,
-		    DS_MODE_READONLY | DS_MODE_INCONSISTENT, dtag)) {
+		if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) {
 			dsl_dataset_rele(ds, dtag);
 			return (EBUSY);
 		} else {
@@ -3666,3 +3630,24 @@
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
+
+/*
+ * Note, this fuction is used as the callback for dmu_objset_find().  We
+ * always return 0 so that we will continue to find and process
+ * inconsistent datasets, even if we encounter an error trying to
+ * process one of them.
+ */
+/* ARGSUSED */
+int
+dsl_destroy_inconsistent(char *dsname, void *arg)
+{
+	dsl_dataset_t *ds;
+
+	if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) {
+		if (DS_IS_INCONSISTENT(ds))
+			(void) dsl_dataset_destroy(ds, FTAG, B_FALSE);
+		else
+			dsl_dataset_disown(ds, FTAG);
+	}
+	return (0);
+}
--- a/usr/src/uts/common/fs/zfs/dsl_deleg.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c	Wed Aug 12 22:11:31 2009 -0700
@@ -739,5 +739,5 @@
 boolean_t
 dsl_delegation_on(objset_t *os)
 {
-	return (os->os->os_spa->spa_delegation);
+	return (os->os_spa->spa_delegation);
 }
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c	Wed Aug 12 22:11:31 2009 -0700
@@ -703,7 +703,7 @@
 	 */
 	if (first && tx->tx_objset) {
 		int error;
-		dsl_dataset_t *ds = tx->tx_objset->os->os_dsl_dataset;
+		dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
 
 		error = dsl_dataset_check_quota(ds, checkrefquota,
 		    asize, est_inflight, &used_on_disk, &ref_rsrv);
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c	Wed Aug 12 22:11:31 2009 -0700
@@ -103,13 +103,12 @@
 	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
-	objset_impl_t *osi;
 
 	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
-	err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &osi);
+	err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
+	    &dp->dp_meta_objset);
 	if (err)
 		goto out;
-	dp->dp_meta_objset = &osi->os;
 
 	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
@@ -219,7 +218,7 @@
 
 	/* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */
 	if (dp->dp_meta_objset)
-		dmu_objset_evict(NULL, dp->dp_meta_objset->os);
+		dmu_objset_evict(dp->dp_meta_objset);
 
 	txg_list_destroy(&dp->dp_dirty_datasets);
 	txg_list_destroy(&dp->dp_dirty_dirs);
@@ -242,13 +241,13 @@
 	int err;
 	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
 	dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
-	objset_impl_t *osip;
+	objset_t *os;
 	dsl_dataset_t *ds;
 	uint64_t dsobj;
 
 	/* create and open the MOS (meta-objset) */
-	dp->dp_meta_objset = &dmu_objset_create_impl(spa,
-	    NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx)->os;
+	dp->dp_meta_objset = dmu_objset_create_impl(spa,
+	    NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
 
 	/* create the pool directory */
 	err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
@@ -273,10 +272,10 @@
 
 	/* create the root objset */
 	VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
-	osip = dmu_objset_create_impl(dp->dp_spa, ds,
+	os = dmu_objset_create_impl(dp->dp_spa, ds,
 	    dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
 #ifdef _KERNEL
-	zfs_create_fs(&osip->os, kcred, zplprops, tx);
+	zfs_create_fs(os, kcred, zplprops, tx);
 #endif
 	dsl_dataset_rele(ds, FTAG);
 
@@ -293,7 +292,7 @@
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	dsl_sync_task_group_t *dstg;
-	objset_impl_t *mosi = dp->dp_meta_objset->os;
+	objset_t *mos = dp->dp_meta_objset;
 	hrtime_t start, write_time;
 	uint64_t data_written;
 	int err;
@@ -323,7 +322,7 @@
 
 	for (ds = list_head(&dp->dp_synced_datasets); ds;
 	    ds = list_next(&dp->dp_synced_datasets, ds))
-		dmu_objset_do_userquota_callbacks(ds->ds_user_ptr, tx);
+		dmu_objset_do_userquota_callbacks(ds->ds_objset, tx);
 
 	/*
 	 * Sync the datasets again to push out the changes due to
@@ -358,10 +357,10 @@
 		dsl_pool_scrub_sync(dp, tx);
 
 	start = gethrtime();
-	if (list_head(&mosi->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
-	    list_head(&mosi->os_free_dnodes[txg & TXG_MASK]) != NULL) {
+	if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
+	    list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
 		zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
-		dmu_objset_sync(mosi, zio, tx);
+		dmu_objset_sync(mos, zio, tx);
 		err = zio_wait(zio);
 		ASSERT(err == 0);
 		dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
@@ -421,8 +420,8 @@
 
 	while (ds = list_head(&dp->dp_synced_datasets)) {
 		list_remove(&dp->dp_synced_datasets, ds);
-		ASSERT(ds->ds_user_ptr != NULL);
-		zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil);
+		ASSERT(ds->ds_objset != NULL);
+		zil_clean(ds->ds_objset->os_zil);
 		dmu_buf_rele(ds->ds_dbuf, ds);
 	}
 }
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c	Wed Aug 12 22:11:31 2009 -0700
@@ -552,7 +552,7 @@
 int
 dsl_prop_get_all(objset_t *os, nvlist_t **nvp, boolean_t local)
 {
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = os->os_dsl_dataset;
 	dsl_dir_t *dd = ds->ds_dir;
 	boolean_t snapshot = dsl_dataset_is_snapshot(ds);
 	int err = 0;
--- a/usr/src/uts/common/fs/zfs/spa.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/spa.c	Wed Aug 12 22:11:31 2009 -0700
@@ -371,12 +371,14 @@
 					break;
 				}
 
-				if (error = dmu_objset_open(strval, DMU_OST_ZFS,
-				    DS_MODE_USER | DS_MODE_READONLY, &os))
+				if (error = dmu_objset_hold(strval, FTAG, &os))
 					break;
 
-				/* We don't support gzip bootable datasets */
-				if ((error = dsl_prop_get_integer(strval,
+				/* Must be ZPL and not gzip compressed. */
+
+				if (dmu_objset_type(os) != DMU_OST_ZFS) {
+					error = ENOTSUP;
+				} else if ((error = dsl_prop_get_integer(strval,
 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
 				    &compress, NULL)) == 0 &&
 				    !BOOTFS_COMPRESS_VALID(compress)) {
@@ -384,7 +386,7 @@
 				} else {
 					objnum = dmu_objset_id(os);
 				}
-				dmu_objset_close(os);
+				dmu_objset_rele(os, FTAG);
 			}
 			break;
 
@@ -1598,6 +1600,12 @@
 		 */
 		if (vdev_resilver_needed(rvd, NULL, NULL))
 			spa_async_request(spa, SPA_ASYNC_RESILVER);
+
+		/*
+		 * Delete any inconsistent datasets.
+		 */
+		(void) dmu_objset_find(spa_name(spa),
+		    dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
 	}
 
 	error = 0;
--- a/usr/src/uts/common/fs/zfs/sys/dbuf.h	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h	Wed Aug 12 22:11:31 2009 -0700
@@ -75,7 +75,6 @@
 	DB_EVICTING
 } dbuf_states_t;
 
-struct objset_impl;
 struct dnode;
 struct dmu_tx;
 
@@ -148,7 +147,7 @@
 	dmu_buf_t db;
 
 	/* the objset we belong to */
-	struct objset_impl *db_objset;
+	struct objset *db_objset;
 
 	/*
 	 * the dnode we belong to (NULL when evicted)
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Wed Aug 12 22:11:31 2009 -0700
@@ -59,7 +59,6 @@
 struct zbookmark;
 struct spa;
 struct nvlist;
-struct objset_impl;
 struct arc_buf;
 
 typedef struct objset objset_t;
@@ -140,16 +139,6 @@
 void zfs_acl_byteswap(void *buf, size_t size);
 void zfs_znode_byteswap(void *buf, size_t size);
 
-#define	DS_MODE_NOHOLD		0	/* internal use only */
-#define	DS_MODE_USER		1	/* simple access, no special needs */
-#define	DS_MODE_OWNER		2	/* the "main" access, e.g. a mount */
-#define	DS_MODE_TYPE_MASK	0x3
-#define	DS_MODE_TYPE(x)		((x) & DS_MODE_TYPE_MASK)
-#define	DS_MODE_READONLY	0x8
-#define	DS_MODE_IS_READONLY(x)	((x) & DS_MODE_READONLY)
-#define	DS_MODE_INCONSISTENT	0x10
-#define	DS_MODE_IS_INCONSISTENT(x)	((x) & DS_MODE_INCONSISTENT)
-
 #define	DS_FIND_SNAPSHOTS	(1<<0)
 #define	DS_FIND_CHILDREN	(1<<1)
 
@@ -166,11 +155,13 @@
 /*
  * Public routines to create, destroy, open, and close objsets.
  */
-int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
-    objset_t **osp);
-int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
-    objset_t **osp);
-void dmu_objset_close(objset_t *os);
+int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
+int dmu_objset_own(const char *name, dmu_objset_type_t type,
+    boolean_t readonly, void *tag, objset_t **osp);
+void dmu_objset_rele(objset_t *os, void *tag);
+void dmu_objset_disown(objset_t *os, void *tag);
+int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
+
 int dmu_objset_evict_dbufs(objset_t *os);
 int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
@@ -309,7 +300,7 @@
  * Decide how many copies of a given block we should make.  Can be from
  * 1 to SPA_DVAS_PER_BP.
  */
-int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
+int dmu_get_replication_level(objset_t *os, struct zbookmark *zb,
     dmu_object_type_t ot);
 /*
  * The bonus data is accessed more or less like a regular buffer.
--- a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h	Wed Aug 12 22:11:31 2009 -0700
@@ -210,8 +210,7 @@
  *
  * ds_lock
  *    protects:
- *    	ds_user_ptr
- *    	ds_user_evict_func
+ *    	ds_objset
  *    	ds_open_refcount
  *    	ds_snapname
  *    	ds_phys accounting
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Wed Aug 12 22:11:31 2009 -0700
@@ -40,7 +40,6 @@
 
 struct dsl_dataset;
 struct dmu_tx;
-struct objset_impl;
 
 #define	OBJSET_PHYS_SIZE 2048
 #define	OBJSET_OLD_PHYS_SIZE 1024
@@ -59,11 +58,6 @@
 } objset_phys_t;
 
 struct objset {
-	struct objset_impl *os;
-	int os_mode;
-};
-
-typedef struct objset_impl {
 	/* Immutable: */
 	struct dsl_dataset *os_dsl_dataset;
 	spa_t *os_spa;
@@ -73,7 +67,6 @@
 	dnode_t *os_userused_dnode;
 	dnode_t *os_groupused_dnode;
 	zilog_t *os_zil;
-	objset_t os;
 	uint8_t os_checksum;	/* can change, under dsl_dir's locks */
 	uint8_t os_compress;	/* can change, under dsl_dir's locks */
 	uint8_t os_copies;	/* can change, under dsl_dir's locks */
@@ -101,7 +94,7 @@
 	/* stuff we store for the user */
 	kmutex_t os_user_ptr_lock;
 	void *os_user_ptr;
-} objset_impl_t;
+};
 
 #define	DMU_META_DNODE_OBJECT	0
 #define	DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0)
@@ -111,9 +104,13 @@
 	(os)->os_secondary_cache == ZFS_CACHE_METADATA)
 
 /* called from zpl */
-int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
-    objset_t **osp);
-void dmu_objset_close(objset_t *os);
+int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
+int dmu_objset_own(const char *name, dmu_objset_type_t type,
+    boolean_t readonly, void *tag, objset_t **osp);
+void dmu_objset_rele(objset_t *os, void *tag);
+void dmu_objset_disown(objset_t *os, void *tag);
+int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
+
 int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
 int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
@@ -135,14 +132,14 @@
 int dmu_objset_evict_dbufs(objset_t *os);
 
 /* called from dsl */
-void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
-objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
+void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
+objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
     blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
 int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
-    objset_impl_t **osip);
-void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
-void dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx);
-boolean_t dmu_objset_userused_enabled(objset_impl_t *os);
+    objset_t **osp);
+void dmu_objset_evict(objset_t *os);
+void dmu_objset_do_userquota_callbacks(objset_t *os, dmu_tx_t *tx);
+boolean_t dmu_objset_userused_enabled(objset_t *os);
 int dmu_objset_userspace_upgrade(objset_t *os);
 boolean_t dmu_objset_userspace_present(objset_t *os);
 
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h	Wed Aug 12 22:11:31 2009 -0700
@@ -88,7 +88,7 @@
 #define	EPB(blkshift, typeshift)	(1 << (blkshift - typeshift))
 
 struct dmu_buf_impl;
-struct objset_impl;
+struct objset;
 struct zio;
 
 enum dnode_dirtycontext {
@@ -136,7 +136,7 @@
 	list_node_t dn_link;
 
 	/* immutable: */
-	struct objset_impl *dn_objset;
+	struct objset *dn_objset;
 	uint64_t dn_object;
 	struct dmu_buf_impl *dn_dbuf;
 	dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
@@ -202,14 +202,14 @@
 	uint64_t fr_nblks;
 } free_range_t;
 
-dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
+dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
     uint64_t object);
 void dnode_special_close(dnode_t *dn);
 
 void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
-int dnode_hold(struct objset_impl *dd, uint64_t object,
+int dnode_hold(struct objset *dd, uint64_t object,
     void *ref, dnode_t **dnp);
-int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
+int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
     void *ref, dnode_t **dnp);
 boolean_t dnode_add_ref(dnode_t *dn, void *ref);
 void dnode_rele(dnode_t *dn, void *ref);
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Wed Aug 12 22:11:31 2009 -0700
@@ -42,8 +42,6 @@
 struct dsl_dir;
 struct dsl_pool;
 
-typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
-
 #define	DS_FLAG_INCONSISTENT	(1ULL<<0)
 #define	DS_IS_INCONSISTENT(ds)	\
 	((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
@@ -132,8 +130,7 @@
 	 * Protected by ds_lock:
 	 */
 	kmutex_t ds_lock;
-	void *ds_user_ptr;
-	dsl_dataset_evict_func_t *ds_user_evict_func;
+	objset_t *ds_objset;
 	uint64_t ds_userrefs;
 
 	/*
@@ -174,17 +171,17 @@
 int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
 int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
     void *tag, dsl_dataset_t **);
-int dsl_dataset_own(const char *name, int flags, void *owner,
-    dsl_dataset_t **dsp);
+int dsl_dataset_own(const char *name, boolean_t inconsistentok,
+    void *tag, dsl_dataset_t **dsp);
 int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
-    int flags, void *owner, dsl_dataset_t **);
+    boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
 void dsl_dataset_name(dsl_dataset_t *ds, char *name);
 void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
-void dsl_dataset_disown(dsl_dataset_t *ds, void *owner);
+void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
 void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
 boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
-    void *owner);
-void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner);
+    void *tag);
+void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
     dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
 uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
@@ -205,10 +202,6 @@
     boolean_t recursive);
 int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
 
-void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
-    void *p, dsl_dataset_evict_func_t func);
-void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
-
 blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
 void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
 
@@ -245,6 +238,8 @@
 int64_t dsl_dataset_new_refreservation(dsl_dataset_t *ds, uint64_t reservation,
     dmu_tx_t *tx);
 
+int dsl_destroy_inconsistent(char *dsname, void *arg);
+
 #ifdef ZFS_DEBUG
 #define	dprintf_ds(ds, fmt, ...) do { \
 	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
--- a/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h	Wed Aug 12 22:11:31 2009 -0700
@@ -132,8 +132,8 @@
 
 extern uint_t zfs_fsyncer_key;
 
-extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
-extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
+extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
+extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname);
 extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
     const char *domain, uint64_t rid, uint64_t *valuep);
 extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
@@ -143,7 +143,7 @@
 extern boolean_t zfs_usergroup_overquota(zfsvfs_t *zfsvfs,
     boolean_t isgroup, uint64_t fuid);
 extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
-extern int zfsvfs_create(const char *name, int mode, zfsvfs_t **zvp);
+extern int zfsvfs_create(const char *name, zfsvfs_t **zvp);
 extern void zfsvfs_free(zfsvfs_t *zfsvfs);
 
 #ifdef	__cplusplus
--- a/usr/src/uts/common/fs/zfs/zfs_ctldir.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ctldir.c	Wed Aug 12 22:11:31 2009 -0700
@@ -853,8 +853,7 @@
 		 */
 		return (err == EILSEQ ? ENOENT : err);
 	}
-	if (dmu_objset_open(snapname, DMU_OST_ZFS,
-	    DS_MODE_USER | DS_MODE_READONLY, &snap) != 0) {
+	if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
 		mutex_exit(&sdp->sd_lock);
 		ZFS_EXIT(zfsvfs);
 		return (ENOENT);
@@ -866,7 +865,7 @@
 	*vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
 	avl_insert(&sdp->sd_snaps, sep, where);
 
-	dmu_objset_close(snap);
+	dmu_objset_rele(snap, FTAG);
 domount:
 	mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) +
 	    strlen("/.zfs/snapshot/") + strlen(nm) + 1;
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Wed Aug 12 22:11:31 2009 -0700
@@ -175,22 +175,15 @@
 static boolean_t
 zfs_is_bootfs(const char *name)
 {
-	spa_t *spa;
-	boolean_t ret = B_FALSE;
-
-	if (spa_open(name, &spa, FTAG) == 0) {
-		if (spa->spa_bootfs) {
-			objset_t *os;
-
-			if (dmu_objset_open(name, DMU_OST_ZFS,
-			    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
-				ret = (dmu_objset_id(os) == spa->spa_bootfs);
-				dmu_objset_close(os);
-			}
-		}
-		spa_close(spa, FTAG);
+	objset_t *os;
+
+	if (dmu_objset_hold(name, FTAG, &os) == 0) {
+		boolean_t ret;
+		ret = (dmu_objset_id(os) == dmu_objset_spa(os)->spa_bootfs);
+		dmu_objset_rele(os, FTAG);
+		return (ret);
 	}
-	return (ret);
+	return (B_FALSE);
 }
 
 /*
@@ -224,13 +217,17 @@
 	objset_t *os;
 	boolean_t rc = B_TRUE;
 
-	if (dmu_objset_open(name, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
+	if (dmu_objset_hold(name, FTAG, &os) == 0) {
 		uint64_t zplversion;
 
+		if (dmu_objset_type(os) != DMU_OST_ZFS) {
+			dmu_objset_rele(os, FTAG);
+			return (B_TRUE);
+		}
+		/* XXX reading from non-owned objset */
 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 			rc = zplversion < version;
-		dmu_objset_close(os);
+		dmu_objset_rele(os, FTAG);
 	}
 	return (rc);
 }
@@ -548,20 +545,19 @@
 	if (error)
 		return (error);
 
-	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &clone);
+	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
 
 	if (error == 0) {
 		dsl_dataset_t *pclone = NULL;
 		dsl_dir_t *dd;
-		dd = clone->os->os_dsl_dataset->ds_dir;
+		dd = clone->os_dsl_dataset->ds_dir;
 
 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 		error = dsl_dataset_hold_obj(dd->dd_pool,
 		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
 		rw_exit(&dd->dd_pool->dp_config_rwlock);
 		if (error) {
-			dmu_objset_close(clone);
+			dmu_objset_rele(clone, FTAG);
 			return (error);
 		}
 
@@ -569,7 +565,7 @@
 		    ZFS_DELEG_PERM_MOUNT, cr);
 
 		dsl_dataset_name(pclone, parentname);
-		dmu_objset_close(clone);
+		dmu_objset_rele(clone, FTAG);
 		dsl_dataset_rele(pclone, FTAG);
 		if (error == 0)
 			error = zfs_secpolicy_write_perms(parentname,
@@ -840,20 +836,23 @@
 	objset_t *os;
 	int error;
 
-	error = dmu_objset_open(dsname, DMU_OST_ZFS,
-	    DS_MODE_USER | DS_MODE_READONLY, &os);
+	error = dmu_objset_hold(dsname, FTAG, &os);
 	if (error)
 		return (error);
-
-	mutex_enter(&os->os->os_user_ptr_lock);
+	if (dmu_objset_type(os) != DMU_OST_ZFS) {
+		dmu_objset_rele(os, FTAG);
+		return (EINVAL);
+	}
+
+	mutex_enter(&os->os_user_ptr_lock);
 	*zvp = dmu_objset_get_user(os);
 	if (*zvp) {
 		VFS_HOLD((*zvp)->z_vfs);
 	} else {
 		error = ESRCH;
 	}
-	mutex_exit(&os->os->os_user_ptr_lock);
-	dmu_objset_close(os);
+	mutex_exit(&os->os_user_ptr_lock);
+	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
@@ -862,13 +861,12 @@
  * case its z_vfs will be NULL, and it will be opened as the owner.
  */
 static int
-zfsvfs_hold(const char *name, boolean_t readonly, void *tag, zfsvfs_t **zvp)
+zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zvp)
 {
 	int error = 0;
-	int mode = DS_MODE_OWNER | (readonly ? DS_MODE_READONLY : 0);
 
 	if (getzfsvfs(name, zvp) != 0)
-		error = zfsvfs_create(name, mode, zvp);
+		error = zfsvfs_create(name, zvp);
 	if (error == 0) {
 		rrw_enter(&(*zvp)->z_teardown_lock, RW_READER, tag);
 		if ((*zvp)->z_unmounted) {
@@ -892,7 +890,7 @@
 	if (zfsvfs->z_vfs) {
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
-		dmu_objset_close(zfsvfs->z_os);
+		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	}
 }
@@ -1186,18 +1184,30 @@
 	return (0);
 }
 
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ * zc_obj		object to find
+ *
+ * outputs:
+ * zc_value		name of object
+ */
 static int
 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
 {
-	objset_t *osp;
+	objset_t *os;
 	int error;
 
-	if ((error = dmu_objset_open(zc->zc_name, DMU_OST_ZFS,
-	    DS_MODE_USER | DS_MODE_READONLY, &osp)) != 0)
+	/* XXX reading from objset not owned */
+	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
 		return (error);
-	error = zfs_obj_to_path(osp, zc->zc_obj, zc->zc_value,
+	if (dmu_objset_type(os) != DMU_OST_ZFS) {
+		dmu_objset_rele(os, FTAG);
+		return (EINVAL);
+	}
+	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
 	    sizeof (zc->zc_value));
-	dmu_objset_close(osp);
+	dmu_objset_rele(os, FTAG);
 
 	return (error);
 }
@@ -1380,8 +1390,7 @@
 	int error;
 	nvlist_t *nv;
 
-	if (error = dmu_objset_open(zc->zc_name,
-	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os))
+	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
 		return (error);
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
@@ -1394,6 +1403,7 @@
 		 * which we aren't supposed to do with a
 		 * DS_MODE_USER hold, because it could be
 		 * inconsistent.  So this is a bit of a workaround...
+		 * XXX reading with out owning
 		 */
 		if (!zc->zc_objset_stats.dds_inconsistent) {
 			if (dmu_objset_type(os) == DMU_OST_ZVOL)
@@ -1403,7 +1413,7 @@
 		nvlist_free(nv);
 	}
 
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
@@ -1438,8 +1448,8 @@
 	objset_t *os;
 	int err;
 
-	if (err = dmu_objset_open(zc->zc_name,
-	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os))
+	/* XXX reading without owning */
+	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
 		return (err);
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
@@ -1464,7 +1474,7 @@
 	} else {
 		err = ENOENT;
 	}
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	return (err);
 }
 
@@ -1505,8 +1515,7 @@
 	int error;
 	char *p;
 
-	if (error = dmu_objset_open(zc->zc_name,
-	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os)) {
+	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
 		if (error == ENOENT)
 			error = ESRCH;
 		return (error);
@@ -1536,7 +1545,7 @@
 		if (error == ENOENT)
 			error = ESRCH;
 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 
 	if (error == 0)
 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
@@ -1562,8 +1571,7 @@
 	objset_t *os;
 	int error;
 
-	error = dmu_objset_open(zc->zc_name,
-	    DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os);
+	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error)
 		return (error == ENOENT ? ESRCH : error);
 
@@ -1576,14 +1584,14 @@
 	 * so exit immediately.
 	 */
 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
-		dmu_objset_close(os);
+		dmu_objset_rele(os, FTAG);
 		return (ESRCH);
 	}
 
 	error = dmu_snapshot_list_next(os,
 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
 	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL);
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	if (error == 0)
 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
 	else if (error == ENOENT)
@@ -1726,8 +1734,7 @@
 				domain = propname +
 				    strlen(zfs_userquota_prop_prefixes[type]);
 
-				error = zfsvfs_hold(name, B_FALSE, FTAG,
-				    &zfsvfs);
+				error = zfsvfs_hold(name, FTAG, &zfsvfs);
 				if (error == 0) {
 					error = zfs_set_userquota(zfsvfs,
 					    type, domain, rid, quota);
@@ -1794,8 +1801,7 @@
 
 			if ((error = nvpair_value_uint64(elem, &intval)) != 0)
 				goto out;
-			if ((error = zfsvfs_hold(name, B_FALSE, FTAG,
-			    &zfsvfs)) != 0)
+			if ((error = zfsvfs_hold(name, FTAG, &zfsvfs)) != 0)
 				goto out;
 			error = zfs_set_version(zfsvfs, intval);
 			zfsvfs_rele(zfsvfs, FTAG);
@@ -1911,13 +1917,12 @@
 		nvlist_t *origprops;
 		objset_t *os;
 
-		if (dmu_objset_open(zc->zc_name, DMU_OST_ANY,
-		    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
+		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
 			if (dsl_prop_get_all(os, &origprops, TRUE) == 0) {
 				clear_props(zc->zc_name, origprops, nvl);
 				nvlist_free(origprops);
 			}
-			dmu_objset_close(os);
+			dmu_objset_rele(os, FTAG);
 		}
 
 	}
@@ -2316,13 +2321,12 @@
 	/*
 	 * Open parent object set so we can inherit zplprop values.
 	 */
-	if ((error = dmu_objset_open(parentname, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os)) != 0)
+	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
 		return (error);
 
 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, createprops,
 	    zplprops, is_ci);
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
@@ -2399,15 +2403,14 @@
 			return (EINVAL);
 		}
 
-		error = dmu_objset_open(zc->zc_value, type,
-		    DS_MODE_USER | DS_MODE_READONLY, &clone);
+		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
 		if (error) {
 			nvlist_free(nvprops);
 			return (error);
 		}
 
 		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
-		dmu_objset_close(clone);
+		dmu_objset_rele(clone, FTAG);
 		if (error) {
 			nvlist_free(nvprops);
 			return (error);
@@ -2644,7 +2647,7 @@
 	if (error)
 		goto out;
 
-	error = dsl_dataset_own(clone_name, DS_MODE_INCONSISTENT, FTAG, &clone);
+	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
 	if (error)
 		goto out;
 
@@ -2652,9 +2655,7 @@
 	 * Do clone swap.
 	 */
 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
-		int mode;
-
-		error = zfs_suspend_fs(zfsvfs, NULL, &mode);
+		error = zfs_suspend_fs(zfsvfs);
 		if (error == 0) {
 			int resume_err;
 
@@ -2666,7 +2667,7 @@
 			} else {
 				error = EBUSY;
 			}
-			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name, mode);
+			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
 			error = error ? error : resume_err;
 		}
 		VFS_RELE(zfsvfs->z_vfs);
@@ -2796,20 +2797,18 @@
 		return (EBADF);
 	}
 
-	if (props && dmu_objset_open(tofs, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
+	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
 		/*
 		 * If new properties are supplied, they are to completely
 		 * replace the existing ones, so stash away the existing ones.
 		 */
-		(void) dsl_prop_get_all(os, &origprops, TRUE);
-
-		dmu_objset_close(os);
+		(void) dsl_prop_get_all(os, &origprops, B_TRUE);
+
+		dmu_objset_rele(os, FTAG);
 	}
 
 	if (zc->zc_string[0]) {
-		error = dmu_objset_open(zc->zc_string, DMU_OST_ANY,
-		    DS_MODE_USER | DS_MODE_READONLY, &origin);
+		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
 		if (error)
 			goto out;
 	}
@@ -2817,7 +2816,7 @@
 	error = dmu_recv_begin(tofs, tosnap, &zc->zc_begin_record,
 	    force, origin, &drc);
 	if (origin)
-		dmu_objset_close(origin);
+		dmu_objset_rele(origin, FTAG);
 	if (error)
 		goto out;
 
@@ -2842,11 +2841,8 @@
 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
 			/* online recv */
 			int end_err;
-			char *osname;
-			int mode;
-
-			osname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
-			error = zfs_suspend_fs(zfsvfs, osname, &mode);
+
+			error = zfs_suspend_fs(zfsvfs);
 			/*
 			 * If the suspend fails, then the recv_end will
 			 * likely also fail, and clean up after itself.
@@ -2854,12 +2850,11 @@
 			end_err = dmu_recv_end(&drc);
 			if (error == 0) {
 				int resume_err =
-				    zfs_resume_fs(zfsvfs, osname, mode);
+				    zfs_resume_fs(zfsvfs, tofs);
 				error = error ? error : resume_err;
 			}
 			error = error ? error : end_err;
 			VFS_RELE(zfsvfs->z_vfs);
-			kmem_free(osname, MAXNAMELEN);
 		} else {
 			error = dmu_recv_end(&drc);
 		}
@@ -2901,8 +2896,7 @@
 	int error;
 	offset_t off;
 
-	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &tosnap);
+	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
 	if (error)
 		return (error);
 
@@ -2916,20 +2910,19 @@
 		if (cp)
 			*(cp+1) = 0;
 		(void) strncat(buf, zc->zc_value, MAXPATHLEN);
-		error = dmu_objset_open(buf, DMU_OST_ANY,
-		    DS_MODE_USER | DS_MODE_READONLY, &fromsnap);
+		error = dmu_objset_hold(buf, FTAG, &fromsnap);
 		kmem_free(buf, MAXPATHLEN);
 		if (error) {
-			dmu_objset_close(tosnap);
+			dmu_objset_rele(tosnap, FTAG);
 			return (error);
 		}
 	}
 
 	fp = getf(zc->zc_cookie);
 	if (fp == NULL) {
-		dmu_objset_close(tosnap);
+		dmu_objset_rele(tosnap, FTAG);
 		if (fromsnap)
-			dmu_objset_close(fromsnap);
+			dmu_objset_rele(fromsnap, FTAG);
 		return (EBADF);
 	}
 
@@ -2940,8 +2933,8 @@
 		fp->f_offset = off;
 	releasef(zc->zc_cookie);
 	if (fromsnap)
-		dmu_objset_close(fromsnap);
-	dmu_objset_close(tosnap);
+		dmu_objset_rele(fromsnap, FTAG);
+	dmu_objset_rele(tosnap, FTAG);
 	return (error);
 }
 
@@ -3099,7 +3092,7 @@
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (EINVAL);
 
-	error = zfsvfs_hold(zc->zc_name, B_TRUE, FTAG, &zfsvfs);
+	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
 	if (error)
 		return (error);
 
@@ -3127,7 +3120,7 @@
 	zfsvfs_t *zfsvfs;
 	int error;
 
-	error = zfsvfs_hold(zc->zc_name, B_TRUE, FTAG, &zfsvfs);
+	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
 	if (error)
 		return (error);
 
@@ -3163,30 +3156,27 @@
 	zfsvfs_t *zfsvfs;
 
 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
-		if (!dmu_objset_userused_enabled(zfsvfs->z_os->os)) {
+		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
 			/*
 			 * If userused is not enabled, it may be because the
 			 * objset needs to be closed & reopened (to grow the
 			 * objset_phys_t).  Suspend/resume the fs will do that.
 			 */
-			int mode;
-			error = zfs_suspend_fs(zfsvfs, NULL, &mode);
-			if (error == 0) {
-				error = zfs_resume_fs(zfsvfs,
-				    zc->zc_name, mode);
-			}
+			error = zfs_suspend_fs(zfsvfs);
+			if (error == 0)
+				error = zfs_resume_fs(zfsvfs, zc->zc_name);
 		}
 		if (error == 0)
 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
-		error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
-		    DS_MODE_USER, &os);
+		/* XXX kind of reading contents without owning */
+		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 		if (error)
 			return (error);
 
 		error = dmu_objset_userspace_upgrade(os);
-		dmu_objset_close(os);
+		dmu_objset_rele(os, FTAG);
 	}
 
 	return (error);
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Wed Aug 12 22:11:31 2009 -0700
@@ -835,32 +835,30 @@
 }
 
 int
-zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
+zfsvfs_create(const char *osname, zfsvfs_t **zvp)
 {
 	objset_t *os;
 	zfsvfs_t *zfsvfs;
 	uint64_t zval;
 	int i, error;
 
-	if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL))
-		return (error);
-	if (zval)
-		mode |= DS_MODE_READONLY;
+	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 
-	error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
-	if (error == EROFS) {
-		mode |= DS_MODE_READONLY;
-		error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
+	/*
+	 * We claim to always be readonly so we can open snapshots;
+	 * other ZPL code will prevent us from writing to snapshots.
+	 */
+	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
+	if (error) {
+		kmem_free(zfsvfs, sizeof (zfsvfs_t));
+		return (error);
 	}
-	if (error)
-		return (error);
 
 	/*
 	 * Initialize the zfs-specific filesystem structure.
 	 * Should probably make this a kmem cache, shuffle fields,
 	 * and just bzero up to z_hold_mtx[].
 	 */
-	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 	zfsvfs->z_vfs = NULL;
 	zfsvfs->z_parent = zfsvfs;
 	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
@@ -948,7 +946,7 @@
 	return (0);
 
 out:
-	dmu_objset_close(os);
+	dmu_objset_disown(os, zfsvfs);
 	*zvp = NULL;
 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
 	return (error);
@@ -966,9 +964,9 @@
 	/*
 	 * Set the objset user_ptr to track its zfsvfs.
 	 */
-	mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
+	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
-	mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 
 	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
 	if (zil_disable) {
@@ -1084,7 +1082,7 @@
 	ASSERT(vfsp);
 	ASSERT(osname);
 
-	error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs);
+	error = zfsvfs_create(osname, &zfsvfs);
 	if (error)
 		return (error);
 	zfsvfs->z_vfs = vfsp;
@@ -1146,9 +1144,9 @@
 		xattr_changed_cb(zfsvfs, pval);
 		zfsvfs->z_issnap = B_TRUE;
 
-		mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
+		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
-		mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
+		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 	} else {
 		error = zfsvfs_setup(zfsvfs, B_TRUE);
 	}
@@ -1157,7 +1155,7 @@
 		zfsctl_create(zfsvfs);
 out:
 	if (error) {
-		dmu_objset_close(zfsvfs->z_os);
+		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	} else {
 		atomic_add_32(&zfs_active_fs_count, 1);
@@ -1725,14 +1723,14 @@
 		/*
 		 * Unset the objset user_ptr.
 		 */
-		mutex_enter(&os->os->os_user_ptr_lock);
+		mutex_enter(&os->os_user_ptr_lock);
 		dmu_objset_set_user(os, NULL);
-		mutex_exit(&os->os->os_user_ptr_lock);
+		mutex_exit(&os->os_user_ptr_lock);
 
 		/*
 		 * Finally release the objset
 		 */
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zfsvfs);
 	}
 
 	/*
@@ -1835,17 +1833,13 @@
  * 'z_teardown_inactive_lock' write held.
  */
 int
-zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
+zfs_suspend_fs(zfsvfs_t *zfsvfs)
 {
 	int error;
 
 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
 		return (error);
-
-	*modep = zfsvfs->z_os->os_mode;
-	if (name)
-		dmu_objset_name(zfsvfs->z_os, name);
-	dmu_objset_close(zfsvfs->z_os);
+	dmu_objset_disown(zfsvfs->z_os, zfsvfs);
 
 	return (0);
 }
@@ -1854,14 +1848,15 @@
  * Reopen zfsvfs_t::z_os and release VOPs.
  */
 int
-zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
+zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
 {
 	int err;
 
 	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
 	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
 
-	err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
+	err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
+	    &zfsvfs->z_os);
 	if (err) {
 		zfsvfs->z_os = NULL;
 	} else {
--- a/usr/src/uts/common/fs/zfs/zil.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zil.c	Wed Aug 12 22:11:31 2009 -0700
@@ -500,7 +500,7 @@
 	objset_t *os;
 	int error;
 
-	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
+	error = dmu_objset_hold(osname, FTAG, &os);
 	if (error) {
 		cmn_err(CE_WARN, "can't open objset for %s", osname);
 		return (0);
@@ -549,7 +549,7 @@
 	}
 
 	ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	return (0);
 }
 
@@ -571,7 +571,7 @@
 	zil_trailer_t *ztp;
 	int error;
 
-	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
+	error = dmu_objset_hold(osname, FTAG, &os);
 	if (error) {
 		cmn_err(CE_WARN, "can't open objset for %s", osname);
 		return (0);
@@ -581,7 +581,7 @@
 	zh = zil_header_in_syncing_context(zilog);
 	blk = zh->zh_log;
 	if (BP_IS_HOLE(&blk)) {
-		dmu_objset_close(os);
+		dmu_objset_rele(os, FTAG);
 		return (0); /* no chain */
 	}
 
@@ -594,7 +594,7 @@
 		blk = ztp->zit_next_blk;
 		VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
 	}
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	if (error == ECKSUM)
 		return (0); /* normal end of chain */
 	return (error);
@@ -1658,7 +1658,7 @@
 	zilog_t *zilog;
 	int error;
 
-	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
+	error = dmu_objset_hold(osname, FTAG, &os);
 	if (error)
 		return (error);
 
@@ -1667,6 +1667,6 @@
 		error = EEXIST;
 	else
 		zil_resume(zilog);
-	dmu_objset_close(os);
+	dmu_objset_rele(os, FTAG);
 	return (error);
 }
--- a/usr/src/uts/common/fs/zfs/zvol.c	Wed Aug 12 21:16:19 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zvol.c	Wed Aug 12 22:11:31 2009 -0700
@@ -80,6 +80,7 @@
 #include "zfs_namecheck.h"
 
 static void *zvol_state;
+static char *zvol_tag = "zvol_tag";
 
 #define	ZVOL_DUMPSIZE		"dumpsize"
 
@@ -109,7 +110,7 @@
 	uint8_t		zv_min_bs;	/* minimum addressable block shift */
 	uint8_t		zv_flags;	/* readonly, dumpified, etc. */
 	objset_t	*zv_objset;	/* objset handle */
-	uint32_t	zv_mode;	/* DS_MODE_* flags at open time */
+	boolean_t 	zv_issnap;	/* is a snapshot (read-only) */
 	uint32_t	zv_open_count[OTYPCNT];	/* open counts */
 	uint32_t	zv_total_opens;	/* total open count */
 	zilog_t		*zv_zilog;	/* ZIL handle */
@@ -432,7 +433,6 @@
 	uint64_t volsize;
 	minor_t minor = 0;
 	struct pathname linkpath;
-	int ds_mode = DS_MODE_OWNER;
 	vnode_t *vp = NULL;
 	char *devpath;
 	char chrbuf[30], blkbuf[30];
@@ -445,10 +445,8 @@
 		return (EEXIST);
 	}
 
-	if (strchr(name, '@') != 0)
-		ds_mode |= DS_MODE_READONLY;
-
-	error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os);
+	/* lie and say we're read-only */
+	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
 
 	if (error) {
 		mutex_exit(&zvol_state_lock);
@@ -458,7 +456,7 @@
 	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
 
 	if (error) {
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zvol_tag);
 		mutex_exit(&zvol_state_lock);
 		return (error);
 	}
@@ -500,13 +498,13 @@
 		minor = zvol_minor_alloc();
 
 	if (minor == 0) {
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zvol_tag);
 		mutex_exit(&zvol_state_lock);
 		return (ENXIO);
 	}
 
 	if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) {
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zvol_tag);
 		mutex_exit(&zvol_state_lock);
 		return (EAGAIN);
 	}
@@ -519,7 +517,7 @@
 	if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
 	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
 		ddi_soft_state_free(zvol_state, minor);
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zvol_tag);
 		mutex_exit(&zvol_state_lock);
 		return (EAGAIN);
 	}
@@ -530,7 +528,7 @@
 	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
 		ddi_remove_minor_node(zfs_dip, chrbuf);
 		ddi_soft_state_free(zvol_state, minor);
-		dmu_objset_close(os);
+		dmu_objset_disown(os, zvol_tag);
 		mutex_exit(&zvol_state_lock);
 		return (EAGAIN);
 	}
@@ -542,7 +540,7 @@
 	zv->zv_minor = minor;
 	zv->zv_volsize = volsize;
 	zv->zv_objset = os;
-	zv->zv_mode = ds_mode;
+	zv->zv_issnap = dmu_objset_is_snapshot(os);
 	zv->zv_zilog = zil_open(os, zvol_get_data);
 	mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
 	avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
@@ -600,7 +598,7 @@
 
 	zil_close(zv->zv_zilog);
 	zv->zv_zilog = NULL;
-	dmu_objset_close(zv->zv_objset);
+	dmu_objset_disown(zv->zv_objset, zvol_tag);
 	zv->zv_objset = NULL;
 	avl_destroy(&zv->zv_znode.z_range_avl);
 	mutex_destroy(&zv->zv_znode.z_range_lock);
@@ -704,7 +702,7 @@
 		 * If we are doing a "zfs clone -o volsize=", then the
 		 * minor node won't exist yet.
 		 */
-		error = dmu_objset_open(name, DMU_OST_ZVOL, DS_MODE_OWNER,
+		error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, FTAG,
 		    &state.zv_objset);
 		if (error != 0)
 			goto out;
@@ -717,7 +715,7 @@
 	    doi.doi_data_block_size)) != 0)
 		goto out;
 
-	if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) {
+	if (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap) {
 		error = EROFS;
 		goto out;
 	}
@@ -760,7 +758,7 @@
 
 out:
 	if (state.zv_objset)
-		dmu_objset_close(state.zv_objset);
+		dmu_objset_disown(state.zv_objset, FTAG);
 
 	mutex_exit(&zvol_state_lock);
 
@@ -788,7 +786,7 @@
 			mutex_exit(&zvol_state_lock);
 		return (ENXIO);
 	}
-	if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) {
+	if (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap) {
 		if (needlock)
 			mutex_exit(&zvol_state_lock);
 		return (EROFS);
@@ -836,7 +834,7 @@
 	ASSERT(zv->zv_objset != NULL);
 
 	if ((flag & FWRITE) &&
-	    (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY))) {
+	    (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap)) {
 		mutex_exit(&zvol_state_lock);
 		return (EROFS);
 	}
@@ -1160,8 +1158,7 @@
 	}
 
 	if (!(bp->b_flags & B_READ) &&
-	    (zv->zv_flags & ZVOL_RDONLY ||
-	    zv->zv_mode & DS_MODE_READONLY)) {
+	    (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap)) {
 		bioerror(bp, EROFS);
 		biodone(bp);
 		return (0);
@@ -1721,7 +1718,7 @@
 	dmu_tx_t *tx;
 	objset_t *os = zv->zv_objset;
 
-	if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY))
+	if (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap)
 		return (EROFS);
 
 	/*