6421210 zfs rollback without unmounting
authorahrens
Fri, 09 Nov 2007 15:28:25 -0800
changeset 5446 51fbc14b301d
parent 5445 7cb529554cc9
child 5447 19783d5e8d69
6421210 zfs rollback without unmounting 6597358 assertion failed in dsl_dir_stats() 6623982 stack overflow due to zfs_ioc_recv() 6626110 recvflags_t could use a comment 6626117 z_dbuf_held is redundant (use z_dbuf != NULL)
usr/src/cmd/zfs/zfs_main.c
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_dataset.c
usr/src/lib/libzfs/common/libzfs_sendrecv.c
usr/src/uts/common/fs/zfs/dmu_objset.c
usr/src/uts/common/fs/zfs/dsl_dataset.c
usr/src/uts/common/fs/zfs/dsl_dir.c
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/sys/dmu_objset.h
usr/src/uts/common/fs/zfs/sys/zfs_dir.h
usr/src/uts/common/fs/zfs/sys/zfs_znode.h
usr/src/uts/common/fs/zfs/zfs_dir.c
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/fs/zfs/zfs_vfsops.c
usr/src/uts/common/fs/zfs/zfs_vnops.c
usr/src/uts/common/fs/zfs/zfs_znode.c
--- a/usr/src/cmd/zfs/zfs_main.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/cmd/zfs/zfs_main.c	Fri Nov 09 15:28:25 2007 -0800
@@ -214,7 +214,7 @@
 		    "\trename -p <filesystem|volume> <filesystem|volume>\n"
 		    "\trename -r <snapshot> <snapshot>"));
 	case HELP_ROLLBACK:
-		return (gettext("\trollback [-rRf] <snapshot>\n"));
+		return (gettext("\trollback [-rR] <snapshot>\n"));
 	case HELP_SEND:
 		return (gettext("\tsend [-R] [-[iI] snapshot] <snapshot>\n"));
 	case HELP_SET:
@@ -1903,11 +1903,10 @@
 }
 
 /*
- * zfs rollback [-rfR] <snapshot>
+ * zfs rollback [-rR] <snapshot>
  *
  * 	-r	Delete any intervening snapshots before doing rollback
  * 	-R	Delete any snapshots and their clones
- * 	-f	Force unmount filesystems, even if they are in use.
  *
  * Given a filesystem, rollback to a specific snapshot, discarding any changes
  * since then and making it the active dataset.  If more recent snapshots exist,
@@ -1998,14 +1997,10 @@
 	zfs_handle_t *zhp, *snap;
 	char parentname[ZFS_MAXNAMELEN];
 	char *delim;
-	int force = 0;
 
 	/* check options */
-	while ((c = getopt(argc, argv, "rfR")) != -1) {
+	while ((c = getopt(argc, argv, "rR")) != -1) {
 		switch (c) {
-		case 'f':
-			force = 1;
-			break;
 		case 'r':
 			cb.cb_recurse = 1;
 			break;
@@ -2063,7 +2058,7 @@
 	/*
 	 * Rollback parent to the given snapshot.
 	 */
-	ret = zfs_rollback(zhp, snap, force);
+	ret = zfs_rollback(zhp, snap);
 
 out:
 	zfs_close(snap);
--- a/usr/src/lib/libzfs/common/libzfs.h	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/lib/libzfs/common/libzfs.h	Fri Nov 09 15:28:25 2007 -0800
@@ -415,18 +415,29 @@
 extern int zfs_destroy_snaps(zfs_handle_t *, char *);
 extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
 extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t);
-extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int);
+extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *);
 extern int zfs_rename(zfs_handle_t *, const char *, boolean_t);
 extern int zfs_send(zfs_handle_t *, const char *, const char *,
     boolean_t, boolean_t, boolean_t, boolean_t, int);
 extern int zfs_promote(zfs_handle_t *);
 
 typedef struct recvflags {
+	/* print informational messages (ie, -v was specified) */
 	boolean_t verbose : 1;
+
+	/* the destination is a prefix, not the exact fs (ie, -d) */
 	boolean_t isprefix : 1;
+
+	/* do not actually do the recv, just check if it would work (ie, -n) */
 	boolean_t dryrun : 1;
+
+	/* rollback/destroy filesystems as necessary (eg, -F) */
 	boolean_t force : 1;
+
+	/* set "canmount=off" on all modified filesystems */
 	boolean_t canmountoff : 1;
+
+	/* byteswap flag is used internally; callers need not specify */
 	boolean_t byteswap : 1;
 } recvflags_t;
 
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c	Fri Nov 09 15:28:25 2007 -0800
@@ -3274,7 +3274,6 @@
 typedef struct rollback_data {
 	const char	*cb_target;		/* the snapshot */
 	uint64_t	cb_create;		/* creation time reference */
-	prop_changelist_t *cb_clp;		/* changelist pointer */
 	int		cb_error;
 	boolean_t	cb_dependent;
 } rollback_data_t;
@@ -3292,24 +3291,17 @@
 			char *logstr;
 
 			cbp->cb_dependent = B_TRUE;
-			if (zfs_iter_dependents(zhp, B_FALSE, rollback_destroy,
-			    cbp) != 0)
-				cbp->cb_error = 1;
+			cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE,
+			    rollback_destroy, cbp);
 			cbp->cb_dependent = B_FALSE;
 
 			logstr = zhp->zfs_hdl->libzfs_log_str;
 			zhp->zfs_hdl->libzfs_log_str = NULL;
-			if (zfs_destroy(zhp) != 0)
-				cbp->cb_error = 1;
-			else
-				changelist_remove(cbp->cb_clp, zhp->zfs_name);
+			cbp->cb_error |= zfs_destroy(zhp);
 			zhp->zfs_hdl->libzfs_log_str = logstr;
 		}
 	} else {
-		if (zfs_destroy(zhp) != 0)
-			cbp->cb_error = 1;
-		else
-			changelist_remove(cbp->cb_clp, zhp->zfs_name);
+		cbp->cb_error |= zfs_destroy(zhp);
 	}
 
 	zfs_close(zhp);
@@ -3317,17 +3309,37 @@
 }
 
 /*
- * Rollback the dataset to its latest snapshot.
+ * Given a dataset, rollback to a specific snapshot, discarding any
+ * data changes since then and making it the active dataset.
+ *
+ * Any snapshots more recent than the target are destroyed, along with
+ * their dependents.
  */
-static int
-do_rollback(zfs_handle_t *zhp)
+int
+zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap)
 {
-	int ret;
+	rollback_data_t cb = { 0 };
+	int err;
 	zfs_cmd_t zc = { 0 };
 
 	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM ||
 	    zhp->zfs_type == ZFS_TYPE_VOLUME);
 
+	/*
+	 * Destroy all recent snapshots and its dependends.
+	 */
+	cb.cb_target = snap->zfs_name;
+	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
+	(void) zfs_iter_children(zhp, rollback_destroy, &cb);
+
+	if (cb.cb_error != 0)
+		return (cb.cb_error);
+
+	/*
+	 * Now that we have verified that the snapshot is the latest,
+	 * rollback to the given snapshot.
+	 */
+
 	if (zhp->zfs_type == ZFS_TYPE_VOLUME &&
 	    zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
 		return (-1);
@@ -3340,81 +3352,21 @@
 		zc.zc_objset_type = DMU_OST_ZFS;
 
 	/*
-	 * We rely on the consumer to verify that there are no newer snapshots
-	 * for the given dataset.  Given these constraints, we can simply pass
-	 * the name on to the ioctl() call.  There is still an unlikely race
-	 * condition where the user has taken a snapshot since we verified that
-	 * this was the most recent.
+	 * We rely on zfs_iter_children() to verify that there are no
+	 * newer snapshots for the given dataset.  Therefore, we can
+	 * simply pass the name on to the ioctl() call.  There is still
+	 * an unlikely race condition where the user has taken a
+	 * snapshot since we verified that this was the most recent.
 	 */
-	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_ROLLBACK, &zc)) != 0) {
+	if ((err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_ROLLBACK, &zc)) != 0) {
 		(void) zfs_standard_error_fmt(zhp->zfs_hdl, errno,
 		    dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
 		    zhp->zfs_name);
 	} else if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
-		ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
+		err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
 	}
 
-	return (ret);
-}
-
-/*
- * Given a dataset, rollback to a specific snapshot, discarding any
- * data changes since then and making it the active dataset.
- *
- * Any snapshots more recent than the target are destroyed, along with
- * their dependents.
- */
-int
-zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, int flag)
-{
-	int ret;
-	rollback_data_t cb = { 0 };
-	prop_changelist_t *clp;
-
-	/*
-	 * Unmount all dependendents of the dataset and the dataset itself.
-	 * The list we need to gather is the same as for doing rename
-	 */
-	clp = changelist_gather(zhp, ZFS_PROP_NAME, flag ? MS_FORCE: 0);
-	if (clp == NULL)
-		return (-1);
-
-	if ((ret = changelist_prefix(clp)) != 0)
-		goto out;
-
-	/*
-	 * Destroy all recent snapshots and its dependends.
-	 */
-	cb.cb_target = snap->zfs_name;
-	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
-	cb.cb_clp = clp;
-	(void) zfs_iter_children(zhp, rollback_destroy, &cb);
-
-	if ((ret = cb.cb_error) != 0) {
-		(void) changelist_postfix(clp);
-		goto out;
-	}
-
-	/*
-	 * Now that we have verified that the snapshot is the latest,
-	 * rollback to the given snapshot.
-	 */
-	ret = do_rollback(zhp);
-
-	if (ret != 0) {
-		(void) changelist_postfix(clp);
-		goto out;
-	}
-
-	/*
-	 * We only want to re-mount the filesystem if it was mounted in the
-	 * first place.
-	 */
-	ret = changelist_postfix(clp);
-
-out:
-	changelist_free(clp);
-	return (ret);
+	return (err);
 }
 
 /*
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Fri Nov 09 15:28:25 2007 -0800
@@ -1896,6 +1896,7 @@
 	/* the kernel needs the non-byteswapped begin record */
 	drr_noswap = drr;
 
+	flags.byteswap = B_FALSE;
 	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
 		/*
 		 * We computed the checksum in the wrong byteorder in
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c	Fri Nov 09 15:28:25 2007 -0800
@@ -620,19 +620,22 @@
 	return (error);
 }
 
+/*
+ * This will close the objset.
+ */
 int
-dmu_objset_rollback(const char *name)
+dmu_objset_rollback(objset_t *os)
 {
 	int err;
-	objset_t *os;
 	dsl_dataset_t *ds;
 
-	err = dmu_objset_open(name, DMU_OST_ANY,
-	    DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os);
-	if (err)
-		return (err);
+	ds = os->os->os_dsl_dataset;
 
-	ds = os->os->os_dsl_dataset;
+	if (!dsl_dataset_tryupgrade(ds, DS_MODE_STANDARD, DS_MODE_EXCLUSIVE)) {
+		dmu_objset_close(os);
+		return (EBUSY);
+	}
+
 	err = dsl_dataset_rollback(ds, os->os->os_phys->os_type);
 
 	/*
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Fri Nov 09 15:28:25 2007 -0800
@@ -1757,17 +1757,17 @@
 	}
 
 	/* clone origin is really a dsl_dir thing... */
+	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 	if (ds->ds_dir->dd_phys->dd_origin_obj) {
 		dsl_dataset_t *ods;
 
-		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 		VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
 		    ds->ds_dir->dd_phys->dd_origin_obj,
 		    NULL, DS_MODE_NONE, FTAG, &ods));
 		dsl_dataset_name(ods, stat->dds_origin);
 		dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
-		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 	}
+	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 }
 
 uint64_t
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c	Fri Nov 09 15:28:25 2007 -0800
@@ -530,20 +530,19 @@
 	    dd->dd_phys->dd_compressed_bytes));
 	mutex_exit(&dd->dd_lock);
 
+	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 	if (dd->dd_phys->dd_origin_obj) {
 		dsl_dataset_t *ds;
 		char buf[MAXNAMELEN];
 
-		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 		VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
 		    dd->dd_phys->dd_origin_obj,
 		    NULL, DS_MODE_NONE, FTAG, &ds));
 		dsl_dataset_name(ds, buf);
 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
-		rw_exit(&dd->dd_pool->dp_config_rwlock);
-
 		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
 	}
+	rw_exit(&dd->dd_pool->dp_config_rwlock);
 }
 
 void
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Fri Nov 09 15:28:25 2007 -0800
@@ -170,7 +170,7 @@
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
 int dmu_objset_destroy(const char *name);
 int dmu_snapshots_destroy(char *fsname, char *snapname);
-int dmu_objset_rollback(const char *name);
+int dmu_objset_rollback(objset_t *os);
 int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
 int dmu_objset_rename(const char *name, const char *newname,
     boolean_t recursive);
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Fri Nov 09 15:28:25 2007 -0800
@@ -102,7 +102,7 @@
     objset_t *clone_parent,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
 int dmu_objset_destroy(const char *name);
-int dmu_objset_rollback(const char *name);
+int dmu_objset_rollback(objset_t *os);
 int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
 void dmu_objset_stats(objset_t *os, nvlist_t *nv);
 void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
--- a/usr/src/uts/common/fs/zfs/sys/zfs_dir.h	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_dir.h	Fri Nov 09 15:28:25 2007 -0800
@@ -58,9 +58,8 @@
     boolean_t *);
 extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
     pathname_t *);
-extern void zfs_mknode(znode_t *, vattr_t *, uint64_t *,
-    dmu_tx_t *, cred_t *, uint_t, znode_t **, int,
-    zfs_acl_t *, zfs_fuid_info_t **);
+extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
+    uint_t, znode_t **, int, zfs_acl_t *, zfs_fuid_info_t **);
 extern void zfs_rmnode(znode_t *);
 extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
 extern boolean_t zfs_dirempty(znode_t *);
--- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h	Fri Nov 09 15:28:25 2007 -0800
@@ -193,7 +193,6 @@
 	avl_tree_t	z_range_avl;	/* avl tree of file range locks */
 	uint8_t		z_unlinked;	/* file has been unlinked */
 	uint8_t		z_atime_dirty;	/* atime needs to be synced */
-	uint8_t		z_dbuf_held;	/* Is z_dbuf already held? */
 	uint8_t		z_zn_prefetch;	/* Prefetch znodes? */
 	uint_t		z_blksz;	/* block size in bytes */
 	uint_t		z_seq;		/* modification sequence number */
@@ -250,7 +249,7 @@
 #define	ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
 
 #define	ZFS_VERIFY_ZP(zp) \
-	if (!(zp)->z_dbuf_held) { \
+	if ((zp)->z_dbuf == NULL) { \
 		ZFS_EXIT((zp)->z_zfsvfs); \
 		return (EIO); \
 	} \
@@ -258,28 +257,27 @@
 /*
  * Macros for dealing with dmu_buf_hold
  */
-#define	ZFS_OBJ_HASH(obj_num)	(obj_num & (ZFS_OBJ_MTX_SZ - 1))
+#define	ZFS_OBJ_HASH(obj_num)	((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
 #define	ZFS_OBJ_MUTEX(zp)	\
-	(&zp->z_zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(zp->z_id)])
+	(&(zp)->z_zfsvfs->z_hold_mtx[ZFS_OBJ_HASH((zp)->z_id)])
 #define	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \
-	mutex_enter(&zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]);
-
+	mutex_enter(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]);
 #define	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
-	mutex_exit(&zfsvfs->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
+	mutex_exit(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
 
 /*
  * Macros to encode/decode ZFS stored time values from/to struct timespec
  */
 #define	ZFS_TIME_ENCODE(tp, stmp)		\
 {						\
-	stmp[0] = (uint64_t)(tp)->tv_sec; 	\
-	stmp[1] = (uint64_t)(tp)->tv_nsec;	\
+	(stmp)[0] = (uint64_t)(tp)->tv_sec; 	\
+	(stmp)[1] = (uint64_t)(tp)->tv_nsec;	\
 }
 
 #define	ZFS_TIME_DECODE(tp, stmp)		\
 {						\
-	(tp)->tv_sec = (time_t)stmp[0];		\
-	(tp)->tv_nsec = (long)stmp[1];		\
+	(tp)->tv_sec = (time_t)(stmp)[0];		\
+	(tp)->tv_nsec = (long)(stmp)[1];		\
 }
 
 /*
@@ -319,7 +317,7 @@
 extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
     znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,
     vattr_t *vap);
-extern int	zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
+extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
     vattr_t *vap);
 extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
     znode_t *dzp, char *name);
--- a/usr/src/uts/common/fs/zfs/zfs_dir.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_dir.c	Fri Nov 09 15:28:25 2007 -0800
@@ -801,7 +801,6 @@
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	znode_t *xzp;
 	dmu_tx_t *tx;
-	uint64_t xoid;
 	int error;
 	zfs_fuid_info_t *fuidp = NULL;
 
@@ -828,11 +827,10 @@
 		dmu_tx_abort(tx);
 		return (error);
 	}
-	zfs_mknode(zp, vap, &xoid, tx, cr, IS_XATTR, &xzp, 0, NULL, &fuidp);
-	ASSERT(xzp->z_id == xoid);
+	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, 0, NULL, &fuidp);
 	ASSERT(xzp->z_phys->zp_parent == zp->z_id);
 	dmu_buf_will_dirty(zp->z_dbuf, tx);
-	zp->z_phys->zp_xattr = xoid;
+	zp->z_phys->zp_xattr = xzp->z_id;
 
 	(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
 	    xzp, "", NULL, fuidp, vap);
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Fri Nov 09 15:28:25 2007 -0800
@@ -2193,14 +2193,51 @@
 
 /*
  * inputs:
- * zc_name	name of snapshot to roll back to
+ * zc_name	name of dataset to rollback (to most recent snapshot)
  *
  * outputs:	none
  */
 static int
 zfs_ioc_rollback(zfs_cmd_t *zc)
 {
-	return (dmu_objset_rollback(zc->zc_name));
+	objset_t *os;
+	int error;
+	zfsvfs_t *zfsvfs = NULL;
+
+	/*
+	 * Get the zfsvfs for the receiving objset. There
+	 * won't be one if we're operating on a zvol, if the
+	 * objset doesn't exist yet, or is not mounted.
+	 */
+	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
+	    DS_MODE_STANDARD, &os);
+	if (error)
+		return (error);
+
+	if (dmu_objset_type(os) == DMU_OST_ZFS) {
+		mutex_enter(&os->os->os_user_ptr_lock);
+		zfsvfs = dmu_objset_get_user(os);
+		if (zfsvfs != NULL)
+			VFS_HOLD(zfsvfs->z_vfs);
+		mutex_exit(&os->os->os_user_ptr_lock);
+	}
+
+	if (zfsvfs != NULL) {
+		char osname[MAXNAMELEN];
+		int mode;
+
+		VERIFY3U(0, ==, zfs_suspend_fs(zfsvfs, osname, &mode));
+		ASSERT(strcmp(osname, zc->zc_name) == 0);
+		error = dmu_objset_rollback(os);
+		VERIFY3U(0, ==, zfs_resume_fs(zfsvfs, osname, mode));
+
+		VFS_RELE(zfsvfs->z_vfs);
+	} else {
+		error = dmu_objset_rollback(os);
+	}
+	/* Note, the dmu_objset_rollback() closes the objset for us. */
+
+	return (error);
 }
 
 /*
@@ -2292,16 +2329,14 @@
 	 * objset doesn't exist yet, or is not mounted.
 	 */
 
-	error = dmu_objset_open(tofs, DMU_OST_ANY,
+	error = dmu_objset_open(tofs, DMU_OST_ZFS,
 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
 	if (!error) {
-		if (dmu_objset_type(os) == DMU_OST_ZFS) {
-			mutex_enter(&os->os->os_user_ptr_lock);
-			zfsvfs = dmu_objset_get_user(os);
-			if (zfsvfs != NULL)
-				VFS_HOLD(zfsvfs->z_vfs);
-			mutex_exit(&os->os->os_user_ptr_lock);
-		}
+		mutex_enter(&os->os->os_user_ptr_lock);
+		zfsvfs = dmu_objset_get_user(os);
+		if (zfsvfs != NULL)
+			VFS_HOLD(zfsvfs->z_vfs);
+		mutex_exit(&os->os->os_user_ptr_lock);
 		dmu_objset_close(os);
 	}
 
@@ -2346,15 +2381,17 @@
 		}
 		if (error == 0) {
 			nvpair_t *elem;
-			zfs_cmd_t zc2 = { 0 };
-
-			(void) strcpy(zc2.zc_name, tofs);
+			zfs_cmd_t *zc2;
+			zc2 = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
+
+			(void) strcpy(zc2->zc_name, tofs);
 			for (elem = nvlist_next_nvpair(nv, NULL); elem;
 			    elem = nvlist_next_nvpair(nv, elem)) {
-				(void) strcpy(zc2.zc_value, nvpair_name(elem));
-				if (zfs_secpolicy_inherit(&zc2, CRED()) == 0)
-					(void) zfs_ioc_inherit_prop(&zc2);
+				(void) strcpy(zc2->zc_value, nvpair_name(elem));
+				if (zfs_secpolicy_inherit(zc2, CRED()) == 0)
+					(void) zfs_ioc_inherit_prop(zc2);
 			}
+			kmem_free(zc2, sizeof (zfs_cmd_t));
 		}
 		if (nv)
 			nvlist_free(nv);
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Fri Nov 09 15:28:25 2007 -0800
@@ -1259,13 +1259,13 @@
 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != &markerzp;
 	    zp = nextzp) {
 		nextzp = list_next(&zfsvfs->z_all_znodes, zp);
-		if (zp->z_dbuf_held) {
+		if (zp->z_dbuf) {
 			/* dbufs should only be held when force unmounting */
-			zp->z_dbuf_held = 0;
 			mutex_exit(&zfsvfs->z_znodes_lock);
 			dmu_buf_rele(zp->z_dbuf, NULL);
+			zp->z_dbuf = NULL;
+			mutex_enter(&zfsvfs->z_znodes_lock);
 			/* Start again */
-			mutex_enter(&zfsvfs->z_znodes_lock);
 			nextzp = list_head(&zfsvfs->z_all_znodes);
 		} else {
 			list_remove(&zfsvfs->z_all_znodes, zp);
@@ -1529,7 +1529,6 @@
 		mutex_enter(&zfsvfs->z_znodes_lock);
 		for (zp = list_head(&zfsvfs->z_all_znodes); zp;
 		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
-			ASSERT(!zp->z_dbuf_held);
 			(void) zfs_rezget(zp);
 		}
 		mutex_exit(&zfsvfs->z_znodes_lock);
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c	Fri Nov 09 15:28:25 2007 -0800
@@ -1121,7 +1121,6 @@
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	int		error;
-	uint64_t	zoid;
 	zfs_acl_t	*aclp = NULL;
 	zfs_fuid_info_t *fuidp = NULL;
 
@@ -1194,7 +1193,6 @@
 			return (error);
 		}
 	}
-	zoid = zp ? zp->z_id : -1ULL;
 
 	if (zp == NULL) {
 		uint64_t txtype;
@@ -1250,8 +1248,7 @@
 				zfs_acl_free(aclp);
 			return (error);
 		}
-		zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, 0, aclp, &fuidp);
-		ASSERT(zp->z_id == zoid);
+		zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, aclp, &fuidp);
 		(void) zfs_link_create(dl, zp, tx, ZNEW);
 		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
 		if (flag & FIGNORECASE)
@@ -1564,7 +1561,6 @@
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	zfs_dirlock_t	*dl;
-	uint64_t	zoid = 0;
 	uint64_t	txtype;
 	dmu_tx_t	*tx;
 	int		error;
@@ -1671,7 +1667,7 @@
 	/*
 	 * Create new node.
 	 */
-	zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, 0, aclp, &fuidp);
+	zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, aclp, &fuidp);
 
 	if (aclp)
 		zfs_acl_free(aclp);
@@ -3123,7 +3119,6 @@
 	dmu_tx_t	*tx;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
-	uint64_t	zoid;
 	int		len = strlen(link);
 	int		error;
 	int		zflg = ZNEW;
@@ -3198,22 +3193,22 @@
 	 * Put the link content into bonus buffer if it will fit;
 	 * otherwise, store it just like any other file data.
 	 */
-	zoid = 0;
 	if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) {
-		zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, len, NULL, &fuidp);
+		zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, NULL, &fuidp);
 		if (len != 0)
 			bcopy(link, zp->z_phys + 1, len);
 	} else {
 		dmu_buf_t *dbp;
 
-		zfs_mknode(dzp, vap, &zoid, tx, cr, 0, &zp, 0, NULL, &fuidp);
+		zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, NULL, &fuidp);
 		/*
 		 * Nothing can access the znode yet so no locking needed
 		 * for growing the znode's blocksize.
 		 */
 		zfs_grow_blocksize(zp, len, tx);
 
-		VERIFY(0 == dmu_buf_hold(zfsvfs->z_os, zoid, 0, FTAG, &dbp));
+		VERIFY(0 == dmu_buf_hold(zfsvfs->z_os,
+		    zp->z_id, 0, FTAG, &dbp));
 		dmu_buf_will_dirty(dbp, tx);
 
 		ASSERT3U(len, <=, dbp->db_size);
@@ -3653,7 +3648,11 @@
 	int error;
 
 	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
-	if (zp->z_dbuf_held == 0) {
+	if (zp->z_dbuf == NULL) {
+		/*
+		 * This fs has been unmounted, or we did
+		 * zfs_suspend/resume and it no longer exists.
+		 */
 		if (vn_has_cached_data(vp)) {
 			(void) pvn_vplist_dirty(vp, 0, zfs_null_putapage,
 			    B_INVAL, cr);
@@ -3661,12 +3660,8 @@
 
 		mutex_enter(&zp->z_lock);
 		vp->v_count = 0; /* count arrives as 1 */
-		if (zp->z_dbuf == NULL) {
-			mutex_exit(&zp->z_lock);
-			zfs_znode_free(zp);
-		} else {
-			mutex_exit(&zp->z_lock);
-		}
+		mutex_exit(&zp->z_lock);
+		zfs_znode_free(zp);
 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
 		VFS_RELE(zfsvfs->z_vfs);
 		return;
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c	Fri Nov 09 13:00:18 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c	Fri Nov 09 15:28:25 2007 -0800
@@ -79,13 +79,12 @@
 	vnode_t *vp = ZTOV(zp);
 
 	mutex_enter(&zp->z_lock);
+	zp->z_dbuf = NULL;
 	if (vp->v_count == 0) {
 		mutex_exit(&zp->z_lock);
 		vn_invalid(vp);
 		zfs_znode_free(zp);
 	} else {
-		/* signal force unmount that this znode can be freed */
-		zp->z_dbuf = NULL;
 		mutex_exit(&zp->z_lock);
 	}
 }
@@ -108,7 +107,7 @@
 	avl_create(&zp->z_range_avl, zfs_range_compare,
 	    sizeof (rl_t), offsetof(rl_t, r_node));
 
-	zp->z_dbuf_held = 0;
+	zp->z_dbuf = NULL;
 	zp->z_dirlocks = 0;
 	return (0);
 }
@@ -128,7 +127,7 @@
 	avl_destroy(&zp->z_range_avl);
 	mutex_destroy(&zp->z_range_lock);
 
-	ASSERT(zp->z_dbuf_held == 0);
+	ASSERT(zp->z_dbuf == NULL);
 	ASSERT(ZTOV(zp)->v_count == 0);
 	vn_free(ZTOV(zp));
 }
@@ -400,6 +399,47 @@
 #endif
 }
 
+static void
+zfs_znode_dmu_init(znode_t *zp, dmu_buf_t *db)
+{
+	znode_t		*nzp;
+	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
+
+	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp)));
+
+	mutex_enter(&zp->z_lock);
+
+	ASSERT(zp->z_dbuf == NULL);
+	zp->z_dbuf = db;
+	nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_pageout_func);
+
+	/*
+	 * there should be no
+	 * concurrent zgets on this object.
+	 */
+	if (nzp != NULL)
+		panic("existing znode %p for dbuf %p", nzp, db);
+
+	/*
+	 * Slap on VROOT if we are the root znode
+	 */
+	if (zp->z_id == zfsvfs->z_root)
+		ZTOV(zp)->v_flag |= VROOT;
+
+	mutex_exit(&zp->z_lock);
+	vn_exists(ZTOV(zp));
+}
+
+static void
+zfs_znode_dmu_fini(znode_t *zp)
+{
+	dmu_buf_t *db = zp->z_dbuf;
+	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp)));
+	ASSERT(zp->z_dbuf != NULL);
+	zp->z_dbuf = NULL;
+	dmu_buf_rele(db, NULL);
+}
+
 /*
  * Construct a new znode/vnode and intialize.
  *
@@ -408,7 +448,7 @@
  * return the znode
  */
 static znode_t *
-zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, uint64_t obj_num, int blksz)
+zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz)
 {
 	znode_t	*zp;
 	vnode_t *vp;
@@ -416,28 +456,30 @@
 	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
 
 	ASSERT(zp->z_dirlocks == NULL);
+	ASSERT(zp->z_dbuf == NULL);
 
-	zp->z_phys = db->db_data;
+	zp->z_phys = NULL;
 	zp->z_zfsvfs = zfsvfs;
 	zp->z_unlinked = 0;
 	zp->z_atime_dirty = 0;
-	zp->z_dbuf_held = 0;
 	zp->z_mapcnt = 0;
 	zp->z_last_itx = 0;
-	zp->z_dbuf = db;
-	zp->z_id = obj_num;
+	zp->z_id = db->db_object;
 	zp->z_blksz = blksz;
 	zp->z_seq = 0x7A4653;
 	zp->z_sync_cnt = 0;
+
+	vp = ZTOV(zp);
+	vn_reinit(vp);
+
+	zfs_znode_dmu_init(zp, db);
+
 	zp->z_gen = zp->z_phys->zp_gen;
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
-	vp = ZTOV(zp);
-	vn_reinit(vp);
-
 	vp->v_vfsp = zfsvfs->z_parent->z_vfs;
 	vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode);
 
@@ -446,8 +488,9 @@
 		if (zp->z_phys->zp_flags & ZFS_XATTR) {
 			vn_setops(vp, zfs_xdvnodeops);
 			vp->v_flag |= V_XATTRDIR;
-		} else
+		} else {
 			vn_setops(vp, zfs_dvnodeops);
+		}
 		zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
 		break;
 	case VBLK:
@@ -471,40 +514,12 @@
 		break;
 	}
 
+	/* it can be NULL from zfs_create_fs */
+	if (zfsvfs->z_vfs)
+		VFS_HOLD(zfsvfs->z_vfs);
 	return (zp);
 }
 
-static void
-zfs_znode_dmu_init(znode_t *zp)
-{
-	znode_t		*nzp;
-	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
-	dmu_buf_t	*db = zp->z_dbuf;
-
-	mutex_enter(&zp->z_lock);
-
-	nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_pageout_func);
-
-	/*
-	 * there should be no
-	 * concurrent zgets on this object.
-	 */
-	ASSERT3P(nzp, ==, NULL);
-
-	/*
-	 * Slap on VROOT if we are the root znode
-	 */
-	if (zp->z_id == zfsvfs->z_root) {
-		ZTOV(zp)->v_flag |= VROOT;
-	}
-
-	ASSERT(zp->z_dbuf_held == 0);
-	zp->z_dbuf_held = 1;
-	VFS_HOLD(zfsvfs->z_vfs);
-	mutex_exit(&zp->z_lock);
-	vn_exists(ZTOV(zp));
-}
-
 /*
  * Create a new DMU object to hold a zfs znode.
  *
@@ -520,32 +535,30 @@
  *		setaclp  - File/Dir initial ACL
  *		fuidp	 - Tracks fuid allocation.
  *
- *	OUT:	oid	- ID of created object
- *		zpp	- allocated znode
+ *	OUT:	zpp	- allocated znode
  *
  */
 void
-zfs_mknode(znode_t *dzp, vattr_t *vap, uint64_t *oid, dmu_tx_t *tx, cred_t *cr,
+zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
     uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_t *setaclp,
     zfs_fuid_info_t **fuidp)
 {
-	dmu_buf_t	*dbp;
+	dmu_buf_t	*db;
 	znode_phys_t	*pzp;
-	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	timestruc_t	now;
-	uint64_t	gen;
+	uint64_t	gen, obj;
 	int		err;
 
 	ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
 
 	if (zfsvfs->z_assign >= TXG_INITIAL) {		/* ZIL replay */
-		*oid = vap->va_nodeid;
+		obj = vap->va_nodeid;
 		flag |= IS_REPLAY;
 		now = vap->va_ctime;		/* see zfs_replay_create() */
 		gen = vap->va_nblocks;		/* ditto */
 	} else {
-		*oid = 0;
+		obj = 0;
 		gethrestime(&now);
 		gen = dmu_tx_get_txg(tx);
 	}
@@ -561,36 +574,36 @@
 	 */
 	if (vap->va_type == VDIR) {
 		if (flag & IS_REPLAY) {
-			err = zap_create_claim_norm(zfsvfs->z_os, *oid,
+			err = zap_create_claim_norm(zfsvfs->z_os, obj,
 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
 			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
 			ASSERT3U(err, ==, 0);
 		} else {
-			*oid = zap_create_norm(zfsvfs->z_os,
+			obj = zap_create_norm(zfsvfs->z_os,
 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
 			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
 		}
 	} else {
 		if (flag & IS_REPLAY) {
-			err = dmu_object_claim(zfsvfs->z_os, *oid,
+			err = dmu_object_claim(zfsvfs->z_os, obj,
 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
 			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
 			ASSERT3U(err, ==, 0);
 		} else {
-			*oid = dmu_object_alloc(zfsvfs->z_os,
+			obj = dmu_object_alloc(zfsvfs->z_os,
 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
 			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
 		}
 	}
-	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, *oid, NULL, &dbp));
-	dmu_buf_will_dirty(dbp, tx);
+	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, obj, NULL, &db));
+	dmu_buf_will_dirty(db, tx);
 
 	/*
 	 * Initialize the znode physical data to zero.
 	 */
-	ASSERT(dbp->db_size >= sizeof (znode_phys_t));
-	bzero(dbp->db_data, dbp->db_size);
-	pzp = dbp->db_data;
+	ASSERT(db->db_size >= sizeof (znode_phys_t));
+	bzero(db->db_data, db->db_size);
+	pzp = db->db_data;
 
 	/*
 	 * If this is the root, fix up the half-initialized parent pointer
@@ -598,7 +611,7 @@
 	 */
 	if (flag & IS_ROOT_NODE) {
 		dzp->z_phys = pzp;
-		dzp->z_id = *oid;
+		dzp->z_id = obj;
 	}
 
 	/*
@@ -641,23 +654,10 @@
 	}
 
 	pzp->zp_mode = MAKEIMODE(vap->va_type, vap->va_mode);
-	zp = zfs_znode_alloc(zfsvfs, dbp, *oid, 0);
-
-	zfs_perm_init(zp, dzp, flag, vap, tx, cr, setaclp, fuidp);
-
-	if (zpp) {
-		kmutex_t *hash_mtx = ZFS_OBJ_MUTEX(zp);
-
-		mutex_enter(hash_mtx);
-		zfs_znode_dmu_init(zp);
-		mutex_exit(hash_mtx);
-
-		*zpp = zp;
-	} else {
-		ZTOV(zp)->v_count = 0;
-		dmu_buf_rele(dbp, NULL);
-		zfs_znode_free(zp);
-	}
+	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj)
+	*zpp = zfs_znode_alloc(zfsvfs, db, 0);
+	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
+	zfs_perm_init(*zpp, dzp, flag, vap, tx, cr, setaclp, fuidp);
 }
 
 void
@@ -751,42 +751,34 @@
 		return (EINVAL);
 	}
 
-	ASSERT(db->db_object == obj_num);
-	ASSERT(db->db_offset == -1);
-	ASSERT(db->db_data != NULL);
-
 	zp = dmu_buf_get_user(db);
-
 	if (zp != NULL) {
 		mutex_enter(&zp->z_lock);
 
+		/*
+		 * Since we do immediate eviction of the z_dbuf, we
+		 * should never find a dbuf with a znode that doesn't
+		 * know about the dbuf.
+		 */
+		ASSERT3P(zp->z_dbuf, ==, db);
 		ASSERT3U(zp->z_id, ==, obj_num);
 		if (zp->z_unlinked) {
-			dmu_buf_rele(db, NULL);
-			mutex_exit(&zp->z_lock);
-			ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
-			return (ENOENT);
-		} else if (zp->z_dbuf_held) {
-			dmu_buf_rele(db, NULL);
+			err = ENOENT;
 		} else {
-			zp->z_dbuf_held = 1;
-			VFS_HOLD(zfsvfs->z_vfs);
+			VN_HOLD(ZTOV(zp));
+			*zpp = zp;
+			err = 0;
 		}
-
-
-		VN_HOLD(ZTOV(zp));
+		dmu_buf_rele(db, NULL);
 		mutex_exit(&zp->z_lock);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
-		*zpp = zp;
-		return (0);
+		return (err);
 	}
 
 	/*
 	 * Not found create new znode/vnode
 	 */
-	zp = zfs_znode_alloc(zfsvfs, db, obj_num, doi.doi_data_block_size);
-	ASSERT3U(zp->z_id, ==, obj_num);
-	zfs_znode_dmu_init(zp);
+	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size);
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 	*zpp = zp;
 	return (0);
@@ -817,23 +809,15 @@
 		return (EINVAL);
 	}
 
-	ASSERT(db->db_object == obj_num);
-	ASSERT(db->db_offset == -1);
-	ASSERT(db->db_data != NULL);
-
 	if (((znode_phys_t *)db->db_data)->zp_gen != zp->z_gen) {
 		dmu_buf_rele(db, NULL);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (EIO);
 	}
 
-	zp->z_dbuf = db;
-	zp->z_phys = db->db_data;
-	zfs_znode_dmu_init(zp);
+	zfs_znode_dmu_init(zp, db);
 	zp->z_unlinked = (zp->z_phys->zp_links == 0);
 
-	/* release the hold from zfs_znode_dmu_init() */
-	VFS_RELE(zfsvfs->z_vfs);
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 
 	return (0);
@@ -843,19 +827,16 @@
 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
-	int error;
+	uint64_t obj = zp->z_id;
 
-	ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id);
+	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
 	if (zp->z_phys->zp_acl.z_acl_extern_obj) {
-		error = dmu_object_free(zfsvfs->z_os,
-		    zp->z_phys->zp_acl.z_acl_extern_obj, tx);
-		ASSERT3U(error, ==, 0);
+		VERIFY(0 == dmu_object_free(zfsvfs->z_os,
+		    zp->z_phys->zp_acl.z_acl_extern_obj, tx));
 	}
-	error = dmu_object_free(zfsvfs->z_os, zp->z_id, tx);
-	ASSERT3U(error, ==, 0);
-	zp->z_dbuf_held = 0;
-	ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id);
-	dmu_buf_rele(zp->z_dbuf, NULL);
+	VERIFY(0 == dmu_object_free(zfsvfs->z_os, obj, tx));
+	zfs_znode_dmu_fini(zp);
+	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
 }
 
 void
@@ -865,7 +846,7 @@
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	uint64_t z_id = zp->z_id;
 
-	ASSERT(zp->z_dbuf_held && zp->z_phys);
+	ASSERT(zp->z_dbuf && zp->z_phys);
 
 	/*
 	 * Don't allow a zfs_zget() while were trying to release this znode
@@ -905,14 +886,12 @@
 		VFS_RELE(zfsvfs->z_vfs);
 		return;
 	}
-	ASSERT(zp->z_phys);
-	ASSERT(zp->z_dbuf_held);
-
-	zp->z_dbuf_held = 0;
 	mutex_exit(&zp->z_lock);
-	dmu_buf_rele(zp->z_dbuf, NULL);
+	zfs_znode_dmu_fini(zp);
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
-	VFS_RELE(zfsvfs->z_vfs);
+	/* it can be NULL from zfs_create_fs */
+	if (zfsvfs->z_vfs)
+		VFS_RELE(zfsvfs->z_vfs);
 }
 
 void
@@ -1195,11 +1174,12 @@
     int norm, dmu_tx_t *tx)
 {
 	zfsvfs_t	zfsvfs;
-	uint64_t	moid, doid, roid = 0;
+	uint64_t	moid, doid;
 	int		error;
 	znode_t		*rootzp = NULL;
 	vnode_t		*vp;
 	vattr_t		vattr;
+	znode_t		*zp;
 
 	/*
 	 * First attempt to create master node.
@@ -1242,7 +1222,6 @@
 	rootzp->z_zfsvfs = &zfsvfs;
 	rootzp->z_unlinked = 0;
 	rootzp->z_atime_dirty = 0;
-	rootzp->z_dbuf_held = 0;
 
 	vp = ZTOV(rootzp);
 	vn_reinit(vp);
@@ -1261,10 +1240,10 @@
 	list_create(&zfsvfs.z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
 
-	zfs_mknode(rootzp, &vattr, &roid, tx, cr, IS_ROOT_NODE,
-	    NULL, 0, NULL, NULL);
-	ASSERT3U(rootzp->z_id, ==, roid);
-	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &roid, tx);
+	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE,
+	    &zp, 0, NULL, NULL);
+	VN_RELE(ZTOV(zp));
+	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
 	ASSERT(error == 0);
 
 	ZTOV(rootzp)->v_count = 0;