6460059 zfs destroy <snapshot> leaves behind kruft
authorahrens
Mon, 09 Oct 2006 10:56:01 -0700
changeset 2885 c0259887ebbc
parent 2884 0a7f3f349684
child 2886 be6558533f55
6460059 zfs destroy <snapshot> leaves behind kruft 6463788 'zfs recv -d' fails if some ancestors already exist 6464897 assertion failed: "BP_GET_COMPRESS(bp) == compress" zio.c, line:897 6472843 panic when write to zvol snapshot 6475506 panic in dmu_recvbackup due to NULL pointer dereference 6475942 need more assertions in dnode_destroy() 6477102 recvbackup ioctl does not advance file offset 6477103 read-only properties should be passed as nvlist 6477900 want more /etc/system tunables for ZFS performance analysis 6479497 ::abuf_find is broken
usr/src/cmd/mdb/common/modules/zfs/zfs.c
usr/src/cmd/zdb/zdb.c
usr/src/cmd/zfs/zfs_main.c
usr/src/common/zfs/zfs_prop.c
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_dataset.c
usr/src/lib/libzfs/common/libzfs_impl.h
usr/src/lib/libzfs/common/libzfs_util.c
usr/src/uts/common/fs/zfs/arc.c
usr/src/uts/common/fs/zfs/dmu.c
usr/src/uts/common/fs/zfs/dmu_objset.c
usr/src/uts/common/fs/zfs/dmu_send.c
usr/src/uts/common/fs/zfs/dmu_zfetch.c
usr/src/uts/common/fs/zfs/dnode.c
usr/src/uts/common/fs/zfs/dsl_dataset.c
usr/src/uts/common/fs/zfs/dsl_dir.c
usr/src/uts/common/fs/zfs/dsl_prop.c
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/sys/dmu_objset.h
usr/src/uts/common/fs/zfs/sys/dnode.h
usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
usr/src/uts/common/fs/zfs/sys/dsl_dir.h
usr/src/uts/common/fs/zfs/sys/dsl_prop.h
usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
usr/src/uts/common/fs/zfs/sys/zvol.h
usr/src/uts/common/fs/zfs/vdev.c
usr/src/uts/common/fs/zfs/vdev_disk.c
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/fs/zfs/zfs_vfsops.c
usr/src/uts/common/fs/zfs/zfs_znode.c
usr/src/uts/common/fs/zfs/zio.c
usr/src/uts/common/fs/zfs/zvol.c
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Mon Oct 09 10:56:01 2006 -0700
@@ -659,10 +659,10 @@
 	GElf_Sym sym;
 	int i;
 	const char *syms[] = {
-		"ARC_mru_top",
-		"ARC_mru_bot",
-		"ARC_mfu_top",
-		"ARC_mfu_bot",
+		"ARC_mru",
+		"ARC_mru_ghost",
+		"ARC_mfu",
+		"ARC_mfu_ghost",
 	};
 
 	if (argc != 2)
--- a/usr/src/cmd/zdb/zdb.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/cmd/zdb/zdb.c	Mon Oct 09 10:56:01 2006 -0700
@@ -1047,6 +1047,7 @@
 {
 	dmu_objset_stats_t dds;
 	uint64_t object, object_count;
+	uint64_t refdbytes, usedobjs, scratch;
 	char numbuf[8];
 	char blkbuf[BP_SPRINTF_LEN];
 	char osname[MAXNAMELEN];
@@ -1055,22 +1056,23 @@
 	int print_header = 1;
 	int i, error;
 
-	dmu_objset_stats(os, &dds);
+	dmu_objset_fast_stat(os, &dds);
 
 	if (dds.dds_type < DMU_OST_NUMTYPES)
 		type = objset_types[dds.dds_type];
 
 	if (dds.dds_type == DMU_OST_META) {
 		dds.dds_creation_txg = TXG_INITIAL;
-		dds.dds_last_txg = os->os->os_rootbp.blk_birth;
-		dds.dds_objects_used = os->os->os_rootbp.blk_fill;
-		dds.dds_space_refd =
+		usedobjs = os->os->os_rootbp.blk_fill;
+		refdbytes =
 		    os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes;
+	} else {
+		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
 	}
 
-	ASSERT3U(dds.dds_objects_used, ==, os->os->os_rootbp.blk_fill);
+	ASSERT3U(usedobjs, ==, os->os->os_rootbp.blk_fill);
 
-	nicenum(dds.dds_space_refd, numbuf);
+	nicenum(refdbytes, numbuf);
 
 	if (verbosity >= 4) {
 		(void) strcpy(blkbuf, ", rootbp ");
@@ -1082,14 +1084,11 @@
 
 	dmu_objset_name(os, osname);
 
-	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, last_txg %llu, "
+	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
 	    "%s, %llu objects%s\n",
 	    osname, type, (u_longlong_t)dmu_objset_id(os),
 	    (u_longlong_t)dds.dds_creation_txg,
-	    (u_longlong_t)dds.dds_last_txg,
-	    numbuf,
-	    (u_longlong_t)dds.dds_objects_used,
-	    blkbuf);
+	    numbuf, (u_longlong_t)usedobjs, blkbuf);
 
 	dump_intent_log(dmu_objset_zil(os));
 
@@ -1117,7 +1116,7 @@
 		object_count++;
 	}
 
-	ASSERT3U(object_count, ==, dds.dds_objects_used);
+	ASSERT3U(object_count, ==, usedobjs);
 
 	(void) printf("\n");
 
--- a/usr/src/cmd/zfs/zfs_main.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/cmd/zfs/zfs_main.c	Mon Oct 09 10:56:01 2006 -0700
@@ -1977,7 +1977,7 @@
 }
 
 /*
- * zfs send [-i <fs@snap>] <fs@snap>
+ * zfs send [-i <@snap>] <fs@snap>
  *
  * Send a backup stream to stdout.
  */
@@ -1985,14 +1985,16 @@
 zfs_do_send(int argc, char **argv)
 {
 	char *fromname = NULL;
-	zfs_handle_t *zhp_from = NULL, *zhp_to;
+	char *cp;
+	zfs_handle_t *zhp;
 	int c, err;
-	char fullname[MAXPATHLEN];
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":i:")) != -1) {
 		switch (c) {
 		case 'i':
+			if (fromname)
+				usage(B_FALSE);
 			fromname = optarg;
 			break;
 		case ':':
@@ -2022,44 +2024,36 @@
 
 	if (isatty(STDOUT_FILENO)) {
 		(void) fprintf(stderr,
-		    gettext("Error: Stream can not be written "
-			    "to a terminal.\n"
+		    gettext("Error: Stream can not be written to a terminal.\n"
 			    "You must redirect standard output.\n"));
 		return (1);
 	}
 
-	if ((zhp_to = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
+	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 
-	if (fromname) {
-
-		/*
-		 * If fromname is an abbreviated snapshot name,
-		 * then reconstruct the name of the parent dataset
-		 */
-		if ((strchr(fromname, '@') == NULL) ||
-		    *fromname == '@') {
-			char *cp;
-			cp = strchr(argv[0], '@');
-			if (strchr(fromname, '@') == NULL)
-				*(++cp) = '\0';
-			else
-				*cp = '\0';
-			(void) strncpy(fullname, argv[0], sizeof (fullname));
-			(void) strlcat(fullname, fromname, sizeof (fullname));
-			fromname = fullname;
+	/*
+	 * If they specified the full path to the snapshot, chop off
+	 * everything except the short name of the snapshot.
+	 */
+	if (fromname && (cp = strchr(fromname, '@')) != NULL) {
+		if (cp != fromname &&
+		    strncmp(argv[0], fromname, cp - fromname + 1)) {
+			(void) fprintf(stderr,
+			    gettext("incremental source must be "
+			    "in same filesystem\n"));
+			usage(B_FALSE);
 		}
-
-		if ((zhp_from = zfs_open(g_zfs, fromname,
-		    ZFS_TYPE_SNAPSHOT)) == NULL)
-			return (1);
+		fromname = cp + 1;
+		if (strchr(fromname, '@') || strchr(fromname, '/')) {
+			(void) fprintf(stderr,
+			    gettext("invalid incremental source\n"));
+			usage(B_FALSE);
+		}
 	}
 
-	err = zfs_send(zhp_to, zhp_from);
-
-	if (zhp_from)
-		zfs_close(zhp_from);
-	zfs_close(zhp_to);
+	err = zfs_send(zhp, fromname);
+	zfs_close(zhp);
 
 	return (err != 0);
 }
--- a/usr/src/common/zfs/zfs_prop.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/common/zfs/zfs_prop.c	Mon Oct 09 10:56:01 2006 -0700
@@ -156,10 +156,9 @@
 	    ZFS_TYPE_FILESYSTEM,
 	    "on | off", "CANMOUNT", B_TRUE },
 	{ "createtxg",	prop_type_number,	0,	NULL,	prop_readonly,
-	    ZFS_TYPE_ANY, NULL, NULL, NULL},
+	    ZFS_TYPE_ANY, NULL, NULL, B_FALSE},
 	{ "name",	prop_type_string,	0,	NULL,	prop_readonly,
-	    ZFS_TYPE_ANY,
-	    NULL, "NAME", B_FALSE },
+	    ZFS_TYPE_ANY, NULL, "NAME", B_FALSE },
 };
 
 zfs_proptype_t
--- a/usr/src/lib/libzfs/common/libzfs.h	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/lib/libzfs/common/libzfs.h	Mon Oct 09 10:56:01 2006 -0700
@@ -297,7 +297,7 @@
 extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t);
 extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int);
 extern int zfs_rename(zfs_handle_t *, const char *);
-extern int zfs_send(zfs_handle_t *, zfs_handle_t *);
+extern int zfs_send(zfs_handle_t *, const char *);
 extern int zfs_receive(libzfs_handle_t *, const char *, int, int, int,
     boolean_t);
 extern int zfs_promote(zfs_handle_t *);
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c	Mon Oct 09 10:56:01 2006 -0700
@@ -257,8 +257,7 @@
 		}
 	}
 
-	bcopy(&zc.zc_objset_stats, &zhp->zfs_dmustats,
-	    sizeof (zc.zc_objset_stats));
+	zhp->zfs_dmustats = zc.zc_objset_stats; /* structure assignment */
 
 	(void) strlcpy(zhp->zfs_root, zc.zc_value, sizeof (zhp->zfs_root));
 
@@ -274,8 +273,6 @@
 
 	zcmd_free_nvlists(&zc);
 
-	zhp->zfs_volstats = zc.zc_vol_stats;
-
 	if (process_user_props(zhp) != 0)
 		return (-1);
 
@@ -333,7 +330,7 @@
 
 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
-		if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+		if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
 			(void) zvol_remove_link(hdl, zhp->zfs_name);
 			zc.zc_objset_type = DMU_OST_ZVOL;
 		} else {
@@ -358,6 +355,13 @@
 	 * We've managed to open the dataset and gather statistics.  Determine
 	 * the high-level type.
 	 */
+	if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
+		zhp->zfs_head_type = ZFS_TYPE_VOLUME;
+	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
+		zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
+	else
+		abort();
+
 	if (zhp->zfs_dmustats.dds_is_snapshot)
 		zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
@@ -1235,10 +1239,11 @@
 	nvlist_t *nv;
 	uint64_t value;
 
+	*source = NULL;
 	if (nvlist_lookup_nvlist(zhp->zfs_props,
 	    zfs_prop_to_name(prop), &nv) == 0) {
 		verify(nvlist_lookup_uint64(nv, ZFS_PROP_VALUE, &value) == 0);
-		verify(nvlist_lookup_string(nv, ZFS_PROP_SOURCE, source) == 0);
+		(void) nvlist_lookup_string(nv, ZFS_PROP_SOURCE, source);
 	} else {
 		value = zfs_prop_default_numeric(prop);
 		*source = "";
@@ -1253,10 +1258,11 @@
 	nvlist_t *nv;
 	char *value;
 
+	*source = NULL;
 	if (nvlist_lookup_nvlist(zhp->zfs_props,
 	    zfs_prop_to_name(prop), &nv) == 0) {
 		verify(nvlist_lookup_string(nv, ZFS_PROP_VALUE, &value) == 0);
-		verify(nvlist_lookup_string(nv, ZFS_PROP_SOURCE, source) == 0);
+		(void) nvlist_lookup_string(nv, ZFS_PROP_SOURCE, source);
 	} else {
 		if ((value = (char *)zfs_prop_default_string(prop)) == NULL)
 			value = "";
@@ -1330,10 +1336,6 @@
 		}
 		break;
 
-	case ZFS_PROP_AVAILABLE:
-		*val = zhp->zfs_dmustats.dds_available;
-		break;
-
 	case ZFS_PROP_DEVICES:
 		*val = getprop_uint64(zhp, prop, source);
 
@@ -1365,6 +1367,14 @@
 	case ZFS_PROP_RECORDSIZE:
 	case ZFS_PROP_COMPRESSION:
 	case ZFS_PROP_ZONED:
+	case ZFS_PROP_CREATION:
+	case ZFS_PROP_COMPRESSRATIO:
+	case ZFS_PROP_REFERENCED:
+	case ZFS_PROP_USED:
+	case ZFS_PROP_CREATETXG:
+	case ZFS_PROP_AVAILABLE:
+	case ZFS_PROP_VOLSIZE:
+	case ZFS_PROP_VOLBLOCKSIZE:
 		*val = getprop_uint64(zhp, prop, source);
 		break;
 
@@ -1382,46 +1392,13 @@
 		}
 		break;
 
-	case ZFS_PROP_CREATION:
-		*val = zhp->zfs_dmustats.dds_creation_time;
-		break;
-
 	case ZFS_PROP_QUOTA:
-		if (zhp->zfs_dmustats.dds_quota == 0)
-			*source = "";	/* default */
-		else
-			*source = zhp->zfs_name;
-		*val = zhp->zfs_dmustats.dds_quota;
-		break;
-
 	case ZFS_PROP_RESERVATION:
-		if (zhp->zfs_dmustats.dds_reserved == 0)
+		*val = getprop_uint64(zhp, prop, source);
+		if (*val == 0)
 			*source = "";	/* default */
 		else
 			*source = zhp->zfs_name;
-		*val = zhp->zfs_dmustats.dds_reserved;
-		break;
-
-	case ZFS_PROP_COMPRESSRATIO:
-		/*
-		 * Using physical space and logical space, calculate the
-		 * compression ratio.  We return the number as a multiple of
-		 * 100, so '2.5x' would be returned as 250.
-		 */
-		if (zhp->zfs_dmustats.dds_compressed_bytes == 0)
-			*val = 100ULL;
-		else
-			*val =
-			    (zhp->zfs_dmustats.dds_uncompressed_bytes * 100 /
-			    zhp->zfs_dmustats.dds_compressed_bytes);
-		break;
-
-	case ZFS_PROP_REFERENCED:
-		/*
-		 * 'referenced' refers to the amount of physical space
-		 * referenced (possibly shared) by this object.
-		 */
-		*val = zhp->zfs_dmustats.dds_space_refd;
 		break;
 
 	case ZFS_PROP_SETUID:
@@ -1438,22 +1415,6 @@
 		}
 		break;
 
-	case ZFS_PROP_VOLSIZE:
-		*val = zhp->zfs_volstats.zv_volsize;
-		break;
-
-	case ZFS_PROP_VOLBLOCKSIZE:
-		*val = zhp->zfs_volstats.zv_volblocksize;
-		break;
-
-	case ZFS_PROP_USED:
-		*val = zhp->zfs_dmustats.dds_space_used;
-		break;
-
-	case ZFS_PROP_CREATETXG:
-		*val = zhp->zfs_dmustats.dds_creation_txg;
-		break;
-
 	case ZFS_PROP_MOUNTED:
 		*val = (zhp->zfs_mntopts != NULL);
 		break;
@@ -1577,17 +1538,15 @@
 		 * this into a string unless 'literal' is specified.
 		 */
 		{
-			time_t time = (time_t)
-			    zhp->zfs_dmustats.dds_creation_time;
+			val = getprop_uint64(zhp, prop, &source);
+			time_t time = (time_t)val;
 			struct tm t;
 
 			if (literal ||
 			    localtime_r(&time, &t) == NULL ||
 			    strftime(propbuf, proplen, "%a %b %e %k:%M %Y",
 			    &t) == 0)
-				(void) snprintf(propbuf, proplen, "%llu",
-				    (u_longlong_t)
-				    zhp->zfs_dmustats.dds_creation_time);
+				(void) snprintf(propbuf, proplen, "%llu", val);
 		}
 		break;
 
@@ -1637,7 +1596,7 @@
 		break;
 
 	case ZFS_PROP_ORIGIN:
-		(void) strlcpy(propbuf, zhp->zfs_dmustats.dds_clone_of,
+		(void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
 		    proplen);
 		/*
 		 * If there is no parent at all, return failure to indicate that
@@ -2561,27 +2520,23 @@
  * Dumps a backup of tosnap, incremental from fromsnap if it isn't NULL.
  */
 int
-zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from)
+zfs_send(zfs_handle_t *zhp, const char *fromsnap)
 {
 	zfs_cmd_t zc = { 0 };
 	int ret;
 	char errbuf[1024];
-	libzfs_handle_t *hdl = zhp_to->zfs_hdl;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-	    "cannot send '%s'"), zhp_to->zfs_name);
+	    "cannot send '%s'"), zhp->zfs_name);
 
 	/* do the ioctl() */
-	(void) strlcpy(zc.zc_name, zhp_to->zfs_name, sizeof (zc.zc_name));
-	if (zhp_from) {
-		(void) strlcpy(zc.zc_value, zhp_from->zfs_name,
-		    sizeof (zc.zc_name));
-	} else {
-		zc.zc_value[0] = '\0';
-	}
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	if (fromsnap)
+		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_name));
 	zc.zc_cookie = STDOUT_FILENO;
 
-	ret = ioctl(zhp_to->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc);
+	ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc);
 	if (ret != 0) {
 		switch (errno) {
 
@@ -2613,6 +2568,73 @@
 }
 
 /*
+ * Create ancestors of 'target', but not target itself, and not
+ * ancestors whose names are shorter than prefixlen.  Die if
+ * prefixlen-ancestor does not exist.
+ */
+static int
+create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
+{
+	zfs_handle_t *h;
+	char *cp;
+
+	/* make sure prefix exists */
+	cp = strchr(target + prefixlen, '/');
+	*cp = '\0';
+	h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+	*cp = '/';
+	if (h == NULL)
+		return (-1);
+	zfs_close(h);
+
+	/*
+	 * Attempt to create, mount, and share any ancestor filesystems,
+	 * up to the prefixlen-long one.
+	 */
+	for (cp = target + prefixlen + 1;
+	    cp = strchr(cp, '/'); *cp = '/', cp++) {
+		const char *opname;
+
+		*cp = '\0';
+
+		h = make_dataset_handle(hdl, target);
+		if (h) {
+			/* it already exists, nothing to do here */
+			zfs_close(h);
+			continue;
+		}
+
+		opname = dgettext(TEXT_DOMAIN, "create");
+		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
+		    NULL) != 0)
+			goto ancestorerr;
+
+		opname = dgettext(TEXT_DOMAIN, "open");
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+		if (h == NULL)
+			goto ancestorerr;
+
+		opname = dgettext(TEXT_DOMAIN, "mount");
+		if (zfs_mount(h, NULL, 0) != 0)
+			goto ancestorerr;
+
+		opname = dgettext(TEXT_DOMAIN, "share");
+		if (zfs_share(h) != 0)
+			goto ancestorerr;
+
+		zfs_close(h);
+
+		continue;
+ancestorerr:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "failed to %s ancestor '%s'"), opname, target);
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*
  * Restores a backup of tosnap from stdin.
  */
 int
@@ -2621,24 +2643,19 @@
 {
 	zfs_cmd_t zc = { 0 };
 	time_t begin_time;
-	int ioctl_err, err, bytes, size;
+	int ioctl_err, err, bytes, size, choplen;
 	char *cp;
 	dmu_replay_record_t drr;
 	struct drr_begin *drrb = &zc.zc_begin_record;
 	char errbuf[1024];
 	prop_changelist_t *clp;
+	char chopprefix[ZFS_MAXNAMELEN];
 
 	begin_time = time(NULL);
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
 
-	/* trim off snapname, if any */
-	(void) strlcpy(zc.zc_name, tosnap, sizeof (zc.zc_name));
-	cp = strchr(zc.zc_name, '@');
-	if (cp)
-		*cp = '\0';
-
 	/* read in the BEGIN record */
 	cp = (char *)&drr;
 	bytes = 0;
@@ -2671,44 +2688,59 @@
 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
+	if (strchr(drr.drr_u.drr_begin.drr_toname, '@') == NULL) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+			    "stream (bad snapshot name)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
 	/*
-	 * Determine name of destination snapshot.
+	 * Determine how much of the snapshot name stored in the stream
+	 * we are going to tack on to the name they specified on the
+	 * command line, and how much we are going to chop off.
+	 *
+	 * If they specified a snapshot, chop the entire name stored in
+	 * the stream.
 	 */
-	(void) strlcpy(zc.zc_value, tosnap, sizeof (zc.zc_value));
+	(void) strcpy(chopprefix, drr.drr_u.drr_begin.drr_toname);
 	if (isprefix) {
-		if (strchr(tosnap, '@') != NULL) {
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "destination must be a filesystem"));
-			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+		/*
+		 * They specified a fs with -d, we want to tack on
+		 * everything but the pool name stored in the stream
+		 */
+		if (strchr(tosnap, '@')) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+			    "argument - snapshot not allowed with -d"));
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
-
-		cp = strchr(drr.drr_u.drr_begin.drr_toname, '/');
+		cp = strchr(chopprefix, '/');
 		if (cp == NULL)
-			cp = drr.drr_u.drr_begin.drr_toname;
-		else
-			cp++;
-
-		(void) strcat(zc.zc_value, "/");
-		(void) strcat(zc.zc_value, cp);
+			cp = strchr(chopprefix, '@');
+		*cp = '\0';
 	} else if (strchr(tosnap, '@') == NULL) {
 		/*
-		 * they specified just a filesystem; tack on the
-		 * snapname from the backup.
+		 * If they specified a filesystem without -d, we want to
+		 * tack on everything after the fs specified in the
+		 * first name from the stream.
 		 */
-		cp = strchr(drr.drr_u.drr_begin.drr_toname, '@');
-		if (cp == NULL || strlen(tosnap) + strlen(cp) >= MAXNAMELEN)
-			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
-		(void) strcat(zc.zc_value, cp);
+		cp = strchr(chopprefix, '@');
+		*cp = '\0';
 	}
-
+	choplen = strlen(chopprefix);
+
+	/*
+	 * Determine name of destination snapshot, store in zc_value.
+	 */
+	(void) strcpy(zc.zc_value, tosnap);
+	(void) strncat(zc.zc_value, drr.drr_u.drr_begin.drr_toname+choplen,
+	    sizeof (zc.zc_value));
+
+	(void) strcpy(zc.zc_name, zc.zc_value);
 	if (drrb->drr_fromguid) {
-		zfs_handle_t *h;
 		/* incremental backup stream */
-
-		/* do the ioctl to the containing fs */
-		(void) strlcpy(zc.zc_name, zc.zc_value, sizeof (zc.zc_name));
-		cp = strchr(zc.zc_name, '@');
-		*cp = '\0';
+		zfs_handle_t *h;
+
+		/* do the recvbackup ioctl to the containing fs */
+		*strchr(zc.zc_name, '@') = '\0';
 
 		/* make sure destination fs exists */
 		h = zfs_open(hdl, zc.zc_name,
@@ -2737,87 +2769,32 @@
 	} else {
 		/* full backup stream */
 
-		(void) strlcpy(zc.zc_name, zc.zc_value, sizeof (zc.zc_name));
-
-		/* make sure they aren't trying to receive into the root */
-		if (strchr(zc.zc_name, '/') == NULL) {
-			cp = strchr(zc.zc_name, '@');
-			if (cp)
-				*cp = '\0';
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "destination '%s' already exists"), zc.zc_name);
-			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
-		}
-
-		if (isprefix) {
-			zfs_handle_t *h;
-
-			/* make sure prefix exists */
-			h = zfs_open(hdl, tosnap, ZFS_TYPE_FILESYSTEM);
-			if (h == NULL)
-				return (-1);
-			zfs_close(h);
-
-			/* create any necessary ancestors up to prefix */
-			zc.zc_objset_type = DMU_OST_ZFS;
-
-			/*
-			 * zc.zc_name is now the full name of the snap
-			 * we're restoring into.  Attempt to create,
-			 * mount, and share any ancestor filesystems, up
-			 * to the one that was named.
-			 */
-			for (cp = zc.zc_name + strlen(tosnap) + 1;
-			    cp = strchr(cp, '/'); *cp = '/', cp++) {
-				const char *opname;
-				*cp = '\0';
-
-				opname = dgettext(TEXT_DOMAIN, "create");
-				if (zfs_create(hdl, zc.zc_name,
-				    ZFS_TYPE_FILESYSTEM, NULL) != 0) {
-					if (errno == EEXIST)
-						continue;
-					goto ancestorerr;
-				}
-
-				opname = dgettext(TEXT_DOMAIN, "open");
-				h = zfs_open(hdl, zc.zc_name,
-				    ZFS_TYPE_FILESYSTEM);
-				if (h == NULL)
-					goto ancestorerr;
-
-				opname = dgettext(TEXT_DOMAIN, "mount");
-				if (zfs_mount(h, NULL, 0) != 0)
-					goto ancestorerr;
-
-				opname = dgettext(TEXT_DOMAIN, "share");
-				if (zfs_share(h) != 0)
-					goto ancestorerr;
-
-				zfs_close(h);
-
-				continue;
-ancestorerr:
-				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "failed to %s ancestor '%s'"), opname,
-				    zc.zc_name);
-				return (zfs_error(hdl, EZFS_BADRESTORE,
-				    errbuf));
-			}
-		}
-
 		/* Make sure destination fs does not exist */
-		cp = strchr(zc.zc_name, '@');
-		*cp = '\0';
+		*strchr(zc.zc_name, '@') = '\0';
 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination '%s' exists"), zc.zc_name);
 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 		}
 
+		if (strchr(zc.zc_name, '/') == NULL) {
+			/*
+			 * they're trying to do a recv into a
+			 * nonexistant topmost filesystem.
+			 */
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination does not exist"), zc.zc_name);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+		}
+
 		/* Do the recvbackup ioctl to the fs's parent. */
-		cp = strrchr(zc.zc_name, '/');
-		*cp = '\0';
+		*strrchr(zc.zc_name, '/') = '\0';
+
+		if (isprefix && (err = create_parents(hdl,
+		    zc.zc_value, strlen(tosnap))) != 0) {
+			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
+		}
+
 	}
 
 	zc.zc_cookie = STDIN_FILENO;
--- a/usr/src/lib/libzfs/common/libzfs_impl.h	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_impl.h	Mon Oct 09 10:56:01 2006 -0700
@@ -58,9 +58,9 @@
 struct zfs_handle {
 	libzfs_handle_t *zfs_hdl;
 	char zfs_name[ZFS_MAXNAMELEN];
-	zfs_type_t zfs_type;
+	zfs_type_t zfs_type; /* type including snapshot */
+	zfs_type_t zfs_head_type; /* type excluding snapshot */
 	dmu_objset_stats_t zfs_dmustats;
-	zvol_stats_t zfs_volstats;
 	nvlist_t *zfs_props;
 	nvlist_t *zfs_user_props;
 	boolean_t zfs_mntcheck;
@@ -72,7 +72,7 @@
  * This is different from checking zfs_type, because it will also catch
  * snapshots of volumes.
  */
-#define	ZFS_IS_VOLUME(zhp) ((zhp)->zfs_volstats.zv_volblocksize != 0)
+#define	ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME)
 
 struct zpool_handle {
 	libzfs_handle_t *zpool_hdl;
--- a/usr/src/lib/libzfs/common/libzfs_util.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_util.c	Mon Oct 09 10:56:01 2006 -0700
@@ -502,7 +502,7 @@
 zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
 {
 	if (len == 0)
-		len = 1024;
+		len = 2048;
 	zc->zc_nvlist_dst_size = len;
 	if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
 	    zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == NULL)
@@ -529,10 +529,7 @@
 }
 
 /*
- * Called to free the destination nvlist stored in the command structure.  This
- * is only needed if the caller must abort abnormally.  The various other
- * zcmd_*() routines will free it on failure (or on success, for
- * zcmd_read_nvlist).
+ * Called to free the src and dst nvlists stored in the command structure.
  */
 void
 zcmd_free_nvlists(zfs_cmd_t *zc)
--- a/usr/src/uts/common/fs/zfs/arc.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/arc.c	Mon Oct 09 10:56:01 2006 -0700
@@ -149,6 +149,12 @@
 static int arc_dead;
 
 /*
+ * These tunables are for performance analysis.
+ */
+uint64_t zfs_arc_max;
+uint64_t zfs_arc_min;
+
+/*
  * Note that buffers can be on one of 5 states:
  *	ARC_anon	- anonymous (discussed below)
  *	ARC_mru		- recently used, currently cached
@@ -2429,6 +2435,16 @@
 	else
 		arc.c_max = arc.c_min;
 	arc.c_max = MAX(arc.c * 6, arc.c_max);
+
+	/*
+	 * Allow the tunables to override our calculations if they are
+	 * reasonable (ie. over 64MB)
+	 */
+	if (zfs_arc_max > 64<<20 && zfs_arc_max < physmem * PAGESIZE)
+		arc.c_max = zfs_arc_max;
+	if (zfs_arc_min > 64<<20 && zfs_arc_min <= arc.c_max)
+		arc.c_min = zfs_arc_min;
+
 	arc.c = arc.c_max;
 	arc.p = (arc.c >> 1);
 
--- a/usr/src/uts/common/fs/zfs/dmu.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Mon Oct 09 10:56:01 2006 -0700
@@ -37,6 +37,7 @@
 #include <sys/dsl_dir.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_synctask.h>
+#include <sys/dsl_prop.h>
 #include <sys/dmu_zfetch.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zap.h>
@@ -233,7 +234,7 @@
 	return (0);
 }
 
-int
+static int
 dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
     uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
 {
@@ -367,7 +368,6 @@
 		bzero((char *)buf + newsz, size - newsz);
 		size = newsz;
 	}
-	dnode_rele(dn, FTAG);
 
 	while (size > 0) {
 		uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2);
@@ -377,7 +377,7 @@
 		 * NB: we could do this block-at-a-time, but it's nice
 		 * to be reading in parallel.
 		 */
-		err = dmu_buf_hold_array(os, object, offset, mylen,
+		err = dmu_buf_hold_array_by_dnode(dn, offset, mylen,
 		    TRUE, FTAG, &numbufs, &dbp);
 		if (err)
 			return (err);
@@ -400,6 +400,7 @@
 		}
 		dmu_buf_rele_array(dbp, numbufs, FTAG);
 	}
+	dnode_rele(dn, FTAG);
 	return (0);
 }
 
@@ -761,18 +762,6 @@
 	return (arc_flag == ARC_NOWAIT ? EINPROGRESS : 0);
 }
 
-uint64_t
-dmu_object_max_nonzero_offset(objset_t *os, uint64_t object)
-{
-	dnode_t *dn;
-
-	/* XXX assumes dnode_hold will not get an i/o error */
-	(void) dnode_hold(os->os, object, FTAG, &dn);
-	uint64_t rv = dnode_max_nonzero_offset(dn);
-	dnode_rele(dn, FTAG);
-	return (rv);
-}
-
 int
 dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
 	dmu_tx_t *tx)
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c	Mon Oct 09 10:56:01 2006 -0700
@@ -34,6 +34,7 @@
 #include <sys/dsl_synctask.h>
 #include <sys/dnode.h>
 #include <sys/dbuf.h>
+#include <sys/zvol.h>
 #include <sys/dmu_tx.h>
 #include <sys/zio_checksum.h>
 #include <sys/zap.h>
@@ -721,7 +722,6 @@
 	}
 }
 
-
 /* called from dsl */
 void
 dmu_objset_sync(objset_impl_t *os, dmu_tx_t *tx)
@@ -783,15 +783,38 @@
 }
 
 void
-dmu_objset_stats(objset_t *os, dmu_objset_stats_t *dds)
+dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
+    uint64_t *usedobjsp, uint64_t *availobjsp)
+{
+	dsl_dataset_space(os->os->os_dsl_dataset, refdbytesp, availbytesp,
+	    usedobjsp, availobjsp);
+}
+
+uint64_t
+dmu_objset_fsid_guid(objset_t *os)
+{
+	return (dsl_dataset_fsid_guid(os->os->os_dsl_dataset));
+}
+
+void
+dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
 {
-	if (os->os->os_dsl_dataset != NULL) {
-		dsl_dataset_stats(os->os->os_dsl_dataset, dds);
-	} else {
-		ASSERT(os->os->os_phys->os_type == DMU_OST_META);
-		bzero(dds, sizeof (*dds));
-	}
-	dds->dds_type = os->os->os_phys->os_type;
+	stat->dds_type = os->os->os_phys->os_type;
+	if (os->os->os_dsl_dataset)
+		dsl_dataset_fast_stat(os->os->os_dsl_dataset, stat);
+}
+
+void
+dmu_objset_stats(objset_t *os, nvlist_t *nv)
+{
+	ASSERT(os->os->os_dsl_dataset ||
+	    os->os->os_phys->os_type == DMU_OST_META);
+
+	if (os->os->os_dsl_dataset != NULL)
+		dsl_dataset_stats(os->os->os_dsl_dataset, nv);
+
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
+	    os->os->os_phys->os_type);
 }
 
 int
--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Mon Oct 09 10:56:01 2006 -0700
@@ -789,7 +789,8 @@
 		 * matches the incremental source
 		 */
 		if (force) {
-			if (ds->ds_prev->ds_phys->ds_guid !=
+			if (ds->ds_prev == NULL ||
+			    ds->ds_prev->ds_phys->ds_guid !=
 			    drrb->drr_fromguid) {
 				dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
 				return (ENODEV);
--- a/usr/src/uts/common/fs/zfs/dmu_zfetch.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_zfetch.c	Mon Oct 09 10:56:01 2006 -0700
@@ -37,6 +37,8 @@
  * until we can get this working the way we want it to.
  */
 
+int zfs_prefetch_disable;
+
 /* max # of streams per zfetch */
 uint32_t	zfetch_max_streams = 8;
 /* min time before stream reclaim */
@@ -578,10 +580,12 @@
 	unsigned int	blkshft;
 	uint64_t	blksz;
 
+	if (zfs_prefetch_disable)
+		return;
+
 	/* files that aren't ln2 blocksz are only one block -- nothing to do */
-	if (!zf->zf_dnode->dn_datablkshift) {
+	if (!zf->zf_dnode->dn_datablkshift)
 		return;
-	}
 
 	/* convert offset and size, into blockid and nblocks */
 	blkshft = zf->zf_dnode->dn_datablkshift;
--- a/usr/src/uts/common/fs/zfs/dnode.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/dnode.c	Mon Oct 09 10:56:01 2006 -0700
@@ -291,6 +291,17 @@
 {
 	objset_impl_t *os = dn->dn_objset;
 
+#ifdef ZFS_DEBUG
+	int i;
+
+	for (i = 0; i < TXG_SIZE; i++) {
+		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
+		ASSERT(NULL == list_head(&dn->dn_dirty_dbufs[i]));
+		ASSERT(0 == avl_numnodes(&dn->dn_ranges[i]));
+	}
+	ASSERT(NULL == list_head(&dn->dn_dbufs));
+#endif
+
 	mutex_enter(&os->os_lock);
 	list_remove(&os->os_dnodes, dn);
 	mutex_exit(&os->os_lock);
@@ -797,16 +808,6 @@
 	return (ENOTSUP);
 }
 
-uint64_t
-dnode_max_nonzero_offset(dnode_t *dn)
-{
-	if (dn->dn_phys->dn_maxblkid == 0 &&
-	    BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]))
-		return (0);
-	else
-		return ((dn->dn_phys->dn_maxblkid+1) * dn->dn_datablksz);
-}
-
 void
 dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx)
 {
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Mon Oct 09 10:56:01 2006 -0700
@@ -181,6 +181,8 @@
 uint64_t
 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
 {
+	uint64_t trysnap = 0;
+
 	if (ds == NULL)
 		return (0);
 	/*
@@ -193,7 +195,10 @@
 	 * snapshot, because we could set the sync task in the quiescing
 	 * phase.  So this should only be used as a guess.
 	 */
-	return (MAX(ds->ds_phys->ds_prev_snap_txg, ds->ds_trysnap_txg));
+	if (ds->ds_trysnap_txg >
+	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
+		trysnap = ds->ds_trysnap_txg;
+	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
 }
 
 int
@@ -858,7 +863,9 @@
 		return;
 
 	ASSERT(ds->ds_user_ptr != NULL);
-	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
+
+	if (ds->ds_phys->ds_next_snap_obj != 0)
+		panic("dirtying snapshot!");
 
 	dp = ds->ds_dir->dd_pool;
 
@@ -1410,45 +1417,70 @@
 }
 
 void
-dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds)
+dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
 {
-	/* fill in properties crap */
-	dsl_dir_stats(ds->ds_dir, dds);
-
-	if (ds->ds_phys->ds_num_children != 0) {
-		dds->dds_is_snapshot = TRUE;
-		dds->dds_num_clones = ds->ds_phys->ds_num_children - 1;
-	}
+	dsl_dir_stats(ds->ds_dir, nv);
 
-	dds->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
-	dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth;
-
-	dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill;
-	dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used;
-
-	/* We override the dataset's creation time... they should be the same */
-	dds->dds_creation_time = ds->ds_phys->ds_creation_time;
-	dds->dds_creation_txg = ds->ds_phys->ds_creation_txg;
-	dds->dds_space_refd = ds->ds_phys->ds_used_bytes;
-	dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid;
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
+	    ds->ds_phys->ds_creation_time);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
+	    ds->ds_phys->ds_creation_txg);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
+	    ds->ds_phys->ds_used_bytes);
 
 	if (ds->ds_phys->ds_next_snap_obj) {
 		/*
 		 * This is a snapshot; override the dd's space used with
-		 * our unique space
+		 * our unique space and compression ratio.
 		 */
-		dds->dds_space_used = ds->ds_phys->ds_unique_bytes;
-		dds->dds_compressed_bytes =
-		    ds->ds_phys->ds_compressed_bytes;
-		dds->dds_uncompressed_bytes =
-		    ds->ds_phys->ds_uncompressed_bytes;
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
+		    ds->ds_phys->ds_unique_bytes);
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
+		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
+		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
+		    ds->ds_phys->ds_compressed_bytes));
 	}
 }
 
-dsl_pool_t *
-dsl_dataset_pool(dsl_dataset_t *ds)
+void
+dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
 {
-	return (ds->ds_dir->dd_pool);
+	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
+	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
+	if (ds->ds_phys->ds_next_snap_obj) {
+		stat->dds_is_snapshot = B_TRUE;
+		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
+	}
+
+	/* clone origin is really a dsl_dir thing... */
+	if (ds->ds_dir->dd_phys->dd_clone_parent_obj) {
+		dsl_dataset_t *ods;
+
+		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
+		VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
+		    ds->ds_dir->dd_phys->dd_clone_parent_obj,
+		    NULL, DS_MODE_NONE, FTAG, &ods));
+		dsl_dataset_name(ods, stat->dds_clone_of);
+		dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
+		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
+	}
+}
+
+uint64_t
+dsl_dataset_fsid_guid(dsl_dataset_t *ds)
+{
+	return (ds->ds_phys->ds_fsid_guid);
+}
+
+void
+dsl_dataset_space(dsl_dataset_t *ds,
+    uint64_t *refdbytesp, uint64_t *availbytesp,
+    uint64_t *usedobjsp, uint64_t *availobjsp)
+{
+	*refdbytesp = ds->ds_phys->ds_used_bytes;
+	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
+	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
+	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
 }
 
 /* ARGSUSED */
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c	Mon Oct 09 10:56:01 2006 -0700
@@ -38,8 +38,6 @@
 #include "zfs_namecheck.h"
 
 static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd);
-static uint64_t dsl_dir_space_available(dsl_dir_t *dd,
-    dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
 static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
 
 
@@ -493,32 +491,36 @@
 }
 
 void
-dsl_dir_stats(dsl_dir_t *dd, dmu_objset_stats_t *dds)
+dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
 {
-	bzero(dds, sizeof (dmu_objset_stats_t));
-
-	dds->dds_available = dsl_dir_space_available(dd, NULL, 0, TRUE);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE,
+	    dsl_dir_space_available(dd, NULL, 0, TRUE));
 
 	mutex_enter(&dd->dd_lock);
-	dds->dds_space_used = dd->dd_used_bytes;
-	dds->dds_compressed_bytes = dd->dd_phys->dd_compressed_bytes;
-	dds->dds_uncompressed_bytes = dd->dd_phys->dd_uncompressed_bytes;
-	dds->dds_quota = dd->dd_phys->dd_quota;
-	dds->dds_reserved = dd->dd_phys->dd_reserved;
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dd->dd_used_bytes);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA,
+	    dd->dd_phys->dd_quota);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
+	    dd->dd_phys->dd_reserved);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
+	    dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
+	    (dd->dd_phys->dd_uncompressed_bytes * 100 /
+	    dd->dd_phys->dd_compressed_bytes));
 	mutex_exit(&dd->dd_lock);
 
-	dds->dds_creation_time = dd->dd_phys->dd_creation_time;
-
 	if (dd->dd_phys->dd_clone_parent_obj) {
 		dsl_dataset_t *ds;
+		char buf[MAXNAMELEN];
 
 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 		VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
 		    dd->dd_phys->dd_clone_parent_obj,
 		    NULL, DS_MODE_NONE, FTAG, &ds));
-		dsl_dataset_name(ds, dds->dds_clone_of);
+		dsl_dataset_name(ds, buf);
 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
 		rw_exit(&dd->dd_pool->dp_config_rwlock);
+
+		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
 	}
 }
 
@@ -584,7 +586,7 @@
  * to it?  If ondiskonly is set, we're only interested in what's
  * on-disk, not estimated pending changes.
  */
-static uint64_t
+uint64_t
 dsl_dir_space_available(dsl_dir_t *dd,
     dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
 {
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c	Mon Oct 09 10:56:01 2006 -0700
@@ -185,17 +185,6 @@
 }
 
 /*
- * Return 0 on success, ENOENT if ddname is invalid, EOVERFLOW if
- * valuelen not big enough.
- */
-int
-dsl_prop_get_string(const char *ddname, const char *propname,
-    char *value, int valuelen, char *setpoint)
-{
-	return (dsl_prop_get(ddname, propname, 1, valuelen, value, setpoint));
-}
-
-/*
  * Get the current property value.  It may have changed by the time this
  * function returns, so it is NOT safe to follow up with
  * dsl_prop_register() and assume that the value has not changed in
@@ -210,13 +199,6 @@
 	return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint));
 }
 
-int
-dsl_prop_get_ds_integer(dsl_dir_t *dd, const char *propname,
-    uint64_t *valuep, char *setpoint)
-{
-	return (dsl_prop_get_ds(dd, propname, 8, 1, valuep, setpoint));
-}
-
 /*
  * Unregister this callback.  Return 0 on success, ENOENT if ddname is
  * invalid, ENOMSG if no matching callback registered.
@@ -366,12 +348,26 @@
 }
 
 int
+dsl_prop_set_dd(dsl_dir_t *dd, const char *propname,
+    int intsz, int numints, const void *buf)
+{
+	struct prop_set_arg psa;
+
+	psa.name = propname;
+	psa.intsz = intsz;
+	psa.numints = numints;
+	psa.buf = buf;
+
+	return (dsl_sync_task_do(dd->dd_pool,
+	    NULL, dsl_prop_set_sync, dd, &psa, 2));
+}
+
+int
 dsl_prop_set(const char *ddname, const char *propname,
     int intsz, int numints, const void *buf)
 {
 	dsl_dir_t *dd;
 	int err;
-	struct prop_set_arg psa;
 
 	/*
 	 * We must do these checks before we get to the syncfunc, since
@@ -385,16 +381,8 @@
 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
 	if (err)
 		return (err);
-
-	psa.name = propname;
-	psa.intsz = intsz;
-	psa.numints = numints;
-	psa.buf = buf;
-	err = dsl_sync_task_do(dd->dd_pool,
-	    NULL, dsl_prop_set_sync, dd, &psa, 2);
-
+	err = dsl_prop_set_dd(dd, propname, intsz, numints, buf);
 	dsl_dir_close(dd, FTAG);
-
 	return (err);
 }
 
@@ -409,12 +397,6 @@
 	int err = 0;
 	dsl_pool_t *dp;
 	objset_t *mos;
-	zap_cursor_t zc;
-	zap_attribute_t za;
-	char setpoint[MAXNAMELEN];
-	char *tmp;
-	nvlist_t *propval;
-	zfs_prop_t prop;
 
 	if (dsl_dataset_is_snapshot(ds)) {
 		VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
@@ -428,11 +410,17 @@
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	for (; dd != NULL; dd = dd->dd_parent) {
+		char setpoint[MAXNAMELEN];
+		zap_cursor_t zc;
+		zap_attribute_t za;
+
 		dsl_dir_name(dd, setpoint);
 
 		for (zap_cursor_init(&zc, mos, dd->dd_phys->dd_props_zapobj);
 		    (err = zap_cursor_retrieve(&zc, &za)) == 0;
 		    zap_cursor_advance(&zc)) {
+			nvlist_t *propval;
+			zfs_prop_t prop;
 			/*
 			 * Skip non-inheritable properties.
 			 */
@@ -451,7 +439,8 @@
 				/*
 				 * String property
 				 */
-				tmp = kmem_alloc(za.za_num_integers, KM_SLEEP);
+				char *tmp = kmem_alloc(za.za_num_integers,
+				    KM_SLEEP);
 				err = zap_lookup(mos,
 				    dd->dd_phys->dd_props_zapobj,
 				    za.za_name, 1, za.za_num_integers,
@@ -488,3 +477,25 @@
 
 	return (err);
 }
+
+void
+dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value)
+{
+	nvlist_t *propval;
+
+	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	VERIFY(nvlist_add_uint64(propval, ZFS_PROP_VALUE, value) == 0);
+	VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(prop), propval) == 0);
+	nvlist_free(propval);
+}
+
+void
+dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value)
+{
+	nvlist_t *propval;
+
+	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	VERIFY(nvlist_add_string(propval, ZFS_PROP_VALUE, value) == 0);
+	VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(prop), propval) == 0);
+	nvlist_free(propval);
+}
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Mon Oct 09 10:56:01 2006 -0700
@@ -59,6 +59,7 @@
 struct drr_end;
 struct zbookmark;
 struct spa;
+struct nvlist;
 
 typedef struct objset objset_t;
 typedef struct dmu_tx dmu_tx_t;
@@ -160,9 +161,6 @@
 int dmu_objset_rollback(const char *name);
 int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
 int dmu_objset_rename(const char *name, const char *newname);
-void dmu_objset_set_quota(objset_t *os, uint64_t quota);
-uint64_t dmu_objset_get_quota(objset_t *os);
-int dmu_objset_request_reservation(objset_t *os, uint64_t reservation);
 int dmu_objset_find(char *name, int func(char *, void *), void *arg,
     int flags);
 void dmu_objset_byteswap(void *buf, size_t size);
@@ -322,8 +320,6 @@
  * with dmu_buf_rele_array.  You can NOT release the hold on each buffer
  * individually with dmu_buf_rele.
  */
-int dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
-    uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
 int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
     uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
 void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
@@ -360,9 +356,6 @@
     dmu_buf_evict_func_t *pageout_func);
 void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
 
-void dmu_buf_hold_data(dmu_buf_t *db);
-void dmu_buf_rele_data(dmu_buf_t *db);
-
 /*
  * Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
  */
@@ -474,88 +467,47 @@
 void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
     u_longlong_t *nblk512);
 
-/*
- * Get the maximum nonzero offset in the object (ie. this offset and all
- * offsets following are zero).
- *
- * XXX Perhaps integrate this with dmu_object_info(), although that
- * would then have to bring in the indirect blocks.
- */
-uint64_t dmu_object_max_nonzero_offset(objset_t *os, uint64_t object);
-
 typedef struct dmu_objset_stats {
+	uint64_t dds_num_clones; /* number of clones of this */
+	uint64_t dds_creation_txg;
 	dmu_objset_type_t dds_type;
 	uint8_t dds_is_snapshot;
 	uint8_t dds_inconsistent;
-	uint8_t dds_pad[2];
-
-	uint64_t dds_creation_time;
-	uint64_t dds_creation_txg;
-
 	char dds_clone_of[MAXNAMELEN];
-
-	/* How much data is there in this objset? */
-
-	/*
-	 * Space referenced, taking into account pending writes and
-	 * frees.  Only relavent to filesystems and snapshots (not
-	 * collections).
-	 */
-	uint64_t dds_space_refd;
-
-	/*
-	 * Space "used", taking into account pending writes and frees, and
-	 * children's reservations (in bytes).  This is the amount of
-	 * space that will be freed if this and all dependent items are
-	 * destroyed (eg. child datasets, objsets, and snapshots).  So
-	 * for snapshots, this is the amount of space unique to this
-	 * snapshot.
-	 */
-	uint64_t dds_space_used;
-
-	/*
-	 * Compressed and uncompressed bytes consumed.  Does not take
-	 * into account reservations.  Used for computing compression
-	 * ratio.
-	 */
-	uint64_t dds_compressed_bytes;
-	uint64_t dds_uncompressed_bytes;
-
-	/*
-	 * The ds_fsid_guid is a 56-bit ID that can change to avoid
-	 * collisions.  The ds_guid is a 64-bit ID that will never
-	 * change, so there is a small probability that it will collide.
-	 */
-	uint64_t dds_fsid_guid;
-
-	uint64_t dds_objects_used;	/* number of objects used */
-	uint64_t dds_objects_avail;	/* number of objects available */
-
-	uint64_t dds_num_clones; /* number of clones of this */
-
-	/* The dataset's administratively-set quota, in bytes. */
-	uint64_t dds_quota;
-
-	/* The dataset's administratively-set reservation, in bytes */
-	uint64_t dds_reserved;
-
-	/*
-	 * The amount of additional space that this dataset can consume.
-	 * Takes into account quotas & reservations.
-	 * (Assuming that no other datasets consume it first.)
-	 */
-	uint64_t dds_available;
-
-	/*
-	 * Used for debugging purposes
-	 */
-	uint64_t dds_last_txg;
 } dmu_objset_stats_t;
 
 /*
  * Get stats on a dataset.
  */
-void dmu_objset_stats(objset_t *os, dmu_objset_stats_t *dds);
+void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
+
+/*
+ * Add entries to the nvlist for all the objset's properties.  See
+ * zfs_prop_table[] and zfs(1m) for details on the properties.
+ */
+void dmu_objset_stats(objset_t *os, struct nvlist *nv);
+
+/*
+ * Get the space usage statistics for statvfs().
+ *
+ * refdbytes is the amount of space "referenced" by this objset.
+ * availbytes is the amount of space available to this objset, taking
+ * into account quotas & reservations, assuming that no other objsets
+ * use the space first.  These values correspond to the 'referenced' and
+ * 'available' properties, described in the zfs(1m) manpage.
+ *
+ * usedobjs and availobjs are the number of objects currently allocated,
+ * and available.
+ */
+void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
+    uint64_t *usedobjsp, uint64_t *availobjsp);
+
+/*
+ * The fsid_guid is a 56-bit ID that can change to avoid collisions.
+ * (Contrast with the ds_guid which is a 64-bit ID that will never
+ * change, so there is a small probability that it will collide.)
+ */
+uint64_t dmu_objset_fsid_guid(objset_t *os);
 
 int dmu_objset_is_snapshot(objset_t *os);
 
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Mon Oct 09 10:56:01 2006 -0700
@@ -97,7 +97,11 @@
 int dmu_objset_destroy(const char *name);
 int dmu_objset_rollback(const char *name);
 int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
-void dmu_objset_stats(objset_t *os, dmu_objset_stats_t *dds);
+void dmu_objset_stats(objset_t *os, nvlist_t *nv);
+void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
+void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
+    uint64_t *usedobjsp, uint64_t *availobjsp);
+uint64_t dmu_objset_fsid_guid(objset_t *os);
 int dmu_objset_find(char *name, int func(char *, void *), void *arg,
     int flags);
 void dmu_objset_byteswap(void *buf, size_t size);
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h	Mon Oct 09 10:56:01 2006 -0700
@@ -211,7 +211,6 @@
 void dnode_verify(dnode_t *dn);
 int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
 uint64_t dnode_current_max_length(dnode_t *dn);
-uint64_t dnode_max_nonzero_offset(dnode_t *dn);
 void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
 void dnode_clear_range(dnode_t *dn, uint64_t blkid,
     uint64_t nblks, dmu_tx_t *tx);
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Mon Oct 09 10:56:01 2006 -0700
@@ -151,8 +151,12 @@
 uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
 
 void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
-void dsl_dataset_stats(dsl_dataset_t *os, dmu_objset_stats_t *dds);
-struct dsl_pool *dsl_dataset_pool(dsl_dataset_t *ds);
+void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
+void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
+void dsl_dataset_space(dsl_dataset_t *ds,
+    uint64_t *refdbytesp, uint64_t *availbytesp,
+    uint64_t *usedobjsp, uint64_t *availobjsp);
+uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
 
 void dsl_dataset_create_root(struct dsl_pool *dp, uint64_t *ddobjp,
     dmu_tx_t *tx);
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h	Mon Oct 09 10:56:01 2006 -0700
@@ -41,7 +41,7 @@
 struct dsl_dataset;
 
 typedef struct dsl_dir_phys {
-	uint64_t dd_creation_time;
+	uint64_t dd_creation_time; /* not actually used */
 	uint64_t dd_head_dataset_obj;
 	uint64_t dd_parent_obj;
 	uint64_t dd_clone_parent_obj;
@@ -102,7 +102,9 @@
 void dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx);
 dsl_checkfunc_t dsl_dir_destroy_check;
 dsl_syncfunc_t dsl_dir_destroy_sync;
-void dsl_dir_stats(dsl_dir_t *dd, dmu_objset_stats_t *dds);
+void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
+uint64_t dsl_dir_space_available(dsl_dir_t *dd,
+    dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
 void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
 void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
 int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
--- a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h	Mon Oct 09 10:56:01 2006 -0700
@@ -57,16 +57,18 @@
 
 int dsl_prop_get(const char *ddname, const char *propname,
     int intsz, int numints, void *buf, char *setpoint);
-int dsl_prop_get_string(const char *ddname, const char *propname,
-    char *value, int valuelen, char *setpoint);
 int dsl_prop_get_integer(const char *ddname, const char *propname,
     uint64_t *valuep, char *setpoint);
-int dsl_prop_get_ds_integer(dsl_dir_t *dd, const char *propname,
-    uint64_t *valuep, char *setpoint);
 int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
 
 int dsl_prop_set(const char *ddname, const char *propname,
     int intsz, int numints, const void *buf);
+int dsl_prop_set_dd(dsl_dir_t *dd, const char *propname,
+    int intsz, int numints, const void *buf);
+
+void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
+void dsl_prop_nvlist_add_string(nvlist_t *nv,
+    zfs_prop_t prop, const char *value);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Mon Oct 09 10:56:01 2006 -0700
@@ -31,7 +31,6 @@
 #include <sys/cred.h>
 #include <sys/dmu.h>
 #include <sys/zio.h>
-#include <sys/zvol.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -78,6 +77,7 @@
 			uint8_t drr_checksum;
 			uint8_t drr_compress;
 			uint8_t drr_pad[6];
+			/* bonus content follows */
 		} drr_object;
 		struct drr_freeobjects {
 			uint64_t drr_firstobj;
@@ -89,6 +89,7 @@
 			uint32_t drr_pad;
 			uint64_t drr_offset;
 			uint64_t drr_length;
+			/* content follows */
 		} drr_write;
 		struct drr_free {
 			uint64_t drr_object;
@@ -127,7 +128,6 @@
 	uint64_t	zc_dev;
 	uint64_t	zc_objset_type;
 	dmu_objset_stats_t zc_objset_stats;
-	zvol_stats_t	zc_vol_stats;
 	struct drr_begin zc_begin_record;
 	zinject_record_t zc_inject_record;
 	zbookmark_t	zc_bookmark;
--- a/usr/src/uts/common/fs/zfs/sys/zvol.h	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zvol.h	Mon Oct 09 10:56:01 2006 -0700
@@ -35,15 +35,10 @@
 extern "C" {
 #endif
 
-typedef struct zvol_stats {
-	uint64_t	zv_volsize;
-	uint64_t	zv_volblocksize;
-} zvol_stats_t;
-
 #ifdef _KERNEL
 extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
 extern int zvol_check_volblocksize(uint64_t volblocksize);
-extern int zvol_get_stats(objset_t *os, zvol_stats_t *zvs);
+extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
 extern void zvol_create_cb(objset_t *os, void *arg, dmu_tx_t *tx);
 extern int zvol_create_minor(const char *, dev_t);
 extern int zvol_remove_minor(const char *);
--- a/usr/src/uts/common/fs/zfs/vdev.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Mon Oct 09 10:56:01 2006 -0700
@@ -45,6 +45,15 @@
  * Virtual device management.
  */
 
+/*
+ * These tunables are for performance analysis, and override the
+ * (not-easily-turnable) vdev "knobs".
+ */
+int zfs_vdev_cache_max;
+int zfs_vdev_max_pending;
+int zfs_vdev_min_pending;
+int zfs_vdev_time_shift;
+
 static vdev_ops_t *vdev_ops_table[] = {
 	&vdev_root_ops,
 	&vdev_raidz_ops,
@@ -790,6 +799,15 @@
 		*valp = MIN(*valp, vk->vk_max);
 	}
 
+	if (zfs_vdev_cache_max)
+		vd->vdev_cache.vc_max = zfs_vdev_cache_max;
+	if (zfs_vdev_max_pending)
+		vd->vdev_queue.vq_max_pending = zfs_vdev_max_pending;
+	if (zfs_vdev_min_pending)
+		vd->vdev_queue.vq_min_pending = zfs_vdev_min_pending;
+	if (zfs_vdev_time_shift)
+		vd->vdev_queue.vq_time_shift = zfs_vdev_time_shift;
+
 	if (vd->vdev_ops->vdev_op_leaf) {
 		vdev_cache_init(vd);
 		vdev_queue_init(vd);
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c	Mon Oct 09 10:56:01 2006 -0700
@@ -37,6 +37,13 @@
  * Virtual device vector for disks.
  */
 
+/*
+ * Tunable parameter for debugging or performance analysis.  Setting
+ * zfs_nocacheflush will cause corruption on power loss if a volatile
+ * out-of-order write cache is enabled.
+ */
+boolean_t zfs_nocacheflush = B_FALSE;
+
 extern ldi_ident_t zfs_li;
 
 typedef struct vdev_disk_buf {
@@ -255,6 +262,9 @@
 
 		case DKIOCFLUSHWRITECACHE:
 
+			if (zfs_nocacheflush)
+				break;
+
 			if (vd->vdev_nowritecache) {
 				zio->io_error = ENOTSUP;
 				break;
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Mon Oct 09 10:56:01 2006 -0700
@@ -56,6 +56,7 @@
 #include <sys/sdt.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ctldir.h>
+#include <sys/zvol.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
@@ -612,17 +613,17 @@
 		return (error);
 	}
 
-	dmu_objset_stats(os, &zc->zc_objset_stats);
+	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 
 	if (zc->zc_nvlist_dst != 0 &&
 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
+		dmu_objset_stats(os, nv);
+		if (dmu_objset_type(os) == DMU_OST_ZVOL)
+			VERIFY(zvol_get_stats(os, nv) == 0);
 		error = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	}
 
-	if (!error && zc->zc_objset_stats.dds_type == DMU_OST_ZVOL)
-		error = zvol_get_stats(os, &zc->zc_vol_stats);
-
 	spa_altroot(dmu_objset_spa(os), zc->zc_value, sizeof (zc->zc_value));
 
 	dmu_objset_close(os);
@@ -738,7 +739,6 @@
 	zfs_prop_t prop;
 	uint64_t intval;
 	char *strval;
-	const char *unused;
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
@@ -839,6 +839,8 @@
 				    strval)) != 0)
 					return (error);
 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
+				const char *unused;
+
 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
 
 				switch (zfs_prop_get_type(prop)) {
@@ -1156,6 +1158,7 @@
 {
 	file_t *fp;
 	int error, fd;
+	offset_t new_off;
 
 	fd = zc->zc_cookie;
 	fp = getf(fd);
@@ -1164,6 +1167,11 @@
 	error = dmu_recvbackup(zc->zc_value, &zc->zc_begin_record,
 	    &zc->zc_cookie, (boolean_t)zc->zc_guid, fp->f_vnode,
 	    fp->f_offset);
+
+	new_off = fp->f_offset + zc->zc_cookie;
+	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &new_off) == 0)
+		fp->f_offset = new_off;
+
 	releasef(fd);
 	return (error);
 }
@@ -1182,7 +1190,15 @@
 		return (error);
 
 	if (zc->zc_value[0] != '\0') {
-		error = dmu_objset_open(zc->zc_value, DMU_OST_ANY,
+		char buf[MAXPATHLEN];
+		char *cp;
+
+		(void) strncpy(buf, zc->zc_name, sizeof (buf));
+		cp = strchr(buf, '@');
+		if (cp)
+			*(cp+1) = 0;
+		(void) strncat(buf, zc->zc_value, sizeof (buf));
+		error = dmu_objset_open(buf, DMU_OST_ANY,
 		    DS_MODE_STANDARD | DS_MODE_READONLY, &fromsnap);
 		if (error) {
 			dmu_objset_close(tosnap);
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Mon Oct 09 10:56:01 2006 -0700
@@ -807,12 +807,13 @@
 zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
-	dmu_objset_stats_t dstats;
 	dev32_t d32;
+	uint64_t refdbytes, availbytes, usedobjs, availobjs;
 
 	ZFS_ENTER(zfsvfs);
 
-	dmu_objset_stats(zfsvfs->z_os, &dstats);
+	dmu_objset_space(zfsvfs->z_os,
+	    &refdbytes, &availbytes, &usedobjs, &availobjs);
 
 	/*
 	 * The underlying storage pool actually uses multiple block sizes.
@@ -828,9 +829,8 @@
 	 * "fragment" size.
 	 */
 
-	statp->f_blocks =
-	    (dstats.dds_space_refd + dstats.dds_available) >> SPA_MINBLOCKSHIFT;
-	statp->f_bfree = dstats.dds_available >> SPA_MINBLOCKSHIFT;
+	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
+	statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT;
 	statp->f_bavail = statp->f_bfree; /* no root reservation */
 
 	/*
@@ -841,9 +841,9 @@
 	 * For f_ffree, report the smaller of the number of object available
 	 * and the number of blocks (each object will take at least a block).
 	 */
-	statp->f_ffree = MIN(dstats.dds_objects_avail, statp->f_bfree);
+	statp->f_ffree = MIN(availobjs, statp->f_bfree);
 	statp->f_favail = statp->f_ffree;	/* no "root reservation" */
-	statp->f_files = statp->f_ffree + dstats.dds_objects_used;
+	statp->f_files = statp->f_ffree + usedobjs;
 
 	(void) cmpldev(&d32, vfsp->vfs_dev);
 	statp->f_fsid = d32;
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c	Mon Oct 09 10:56:01 2006 -0700
@@ -239,7 +239,7 @@
 	uint64_t	version = ZPL_VERSION;
 	int		i, error;
 	dmu_object_info_t doi;
-	dmu_objset_stats_t *stats;
+	uint64_t fsid_guid;
 
 	*zpp = NULL;
 
@@ -279,14 +279,11 @@
 	 * The 8-bit fs type must be put in the low bits of fsid[1]
 	 * because that's where other Solaris filesystems put it.
 	 */
-	stats = kmem_alloc(sizeof (dmu_objset_stats_t), KM_SLEEP);
-	dmu_objset_stats(os, stats);
-	ASSERT((stats->dds_fsid_guid & ~((1ULL<<56)-1)) == 0);
-	zfsvfs->z_vfs->vfs_fsid.val[0] = stats->dds_fsid_guid;
-	zfsvfs->z_vfs->vfs_fsid.val[1] = ((stats->dds_fsid_guid>>32) << 8) |
+	fsid_guid = dmu_objset_fsid_guid(os);
+	ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
+	zfsvfs->z_vfs->vfs_fsid.val[0] = fsid_guid;
+	zfsvfs->z_vfs->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
 	    zfsfstype & 0xFF;
-	kmem_free(stats, sizeof (dmu_objset_stats_t));
-	stats = NULL;
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &zoid);
 	if (error)
--- a/usr/src/uts/common/fs/zfs/zio.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/zio.c	Mon Oct 09 10:56:01 2006 -0700
@@ -895,9 +895,8 @@
 	if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == csize &&
 	    pass > zio_sync_pass.zp_rewrite) {
 		ASSERT(csize != 0);
-		ASSERT3U(BP_GET_COMPRESS(bp), ==, compress);
-		ASSERT3U(BP_GET_LSIZE(bp), ==, lsize);
-
+		BP_SET_LSIZE(bp, lsize);
+		BP_SET_COMPRESS(bp, compress);
 		zio->io_pipeline = ZIO_REWRITE_PIPELINE;
 	} else {
 		if (bp->blk_birth == zio->io_txg) {
--- a/usr/src/uts/common/fs/zfs/zvol.c	Sun Oct 08 21:26:45 2006 -0700
+++ b/usr/src/uts/common/fs/zfs/zvol.c	Mon Oct 09 10:56:01 2006 -0700
@@ -151,20 +151,25 @@
 }
 
 int
-zvol_get_stats(objset_t *os, zvol_stats_t *zvs)
+zvol_get_stats(objset_t *os, nvlist_t *nv)
 {
 	int error;
 	dmu_object_info_t doi;
+	uint64_t val;
 
-	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &zvs->zv_volsize);
 
+	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
 	if (error)
 		return (error);
 
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
+
 	error = dmu_object_info(os, ZVOL_OBJ, &doi);
 
-	if (error == 0)
-		zvs->zv_volblocksize = doi.doi_data_block_size;
+	if (error == 0) {
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
+		    doi.doi_data_block_size);
+	}
 
 	return (error);
 }
@@ -786,7 +791,8 @@
 		return (0);
 	}
 
-	if (zv->zv_readonly && !(bp->b_flags & B_READ)) {
+	if ((zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) &&
+	    !(bp->b_flags & B_READ)) {
 		bioerror(bp, EROFS);
 		biodone(bp);
 		return (0);