upstream/illumos/illumos-gate: changeset 12296:7cf402a7f374

--- a/usr/src/cmd/availdevs/availdevs.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/availdevs/availdevs.c	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include "availdevs.h"
@@ -134,7 +133,7 @@
 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &state) ||
 	    nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &devices) ||
 	    nvlist_lookup_uint64_array(
-	    devices, ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &n)) {
+	    devices, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &n)) {
 		return (-1);
 	}

--- a/usr/src/cmd/fm/schemes/zfs/scheme.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/fm/schemes/zfs/scheme.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <fm/fmd_fmri.h>
@@ -214,7 +213,7 @@
 		vdev_stat_t *vs;
 		uint_t c;
 
-		(void) nvlist_lookup_uint64_array(vd, ZPOOL_CONFIG_STATS,
+		(void) nvlist_lookup_uint64_array(vd, ZPOOL_CONFIG_VDEV_STATS,
 		    (uint64_t **)&vs, &c);
 
 		ret = (vs->vs_state < VDEV_STATE_DEGRADED);

--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Mon May 03 14:54:08 2010 -0700
@@ -129,6 +129,16 @@
 	return (GETMEMBID(addr + off, &rc_id, rc_count, *rc));
 }
 
+static boolean_t
+strisprint(const char *cp)
+{
+	for (; *cp; cp++) {
+		if (!isprint(*cp))
+			return (B_FALSE);
+	}
+	return (B_TRUE);
+}
+
 static int verbose;
 
 static int
@@ -624,8 +634,10 @@
 			    zlc->l_entry.le_hash);
 			break;
 		case ZAP_CHUNK_ARRAY:
-			mdb_printf("    %u: array \"%s\"\n",
-			    i, zlc->l_array.la_array);
+			mdb_printf("    %u: array", i);
+			if (strisprint((char *)zlc->l_array.la_array))
+				mdb_printf(" \"%s\"", zlc->l_array.la_array);
+			mdb_printf("\n");
 			if (verbose) {
 				int j;
 				mdb_printf("        ");
@@ -811,6 +823,77 @@
 	return (DCMD_OK);
 }
 
+/* ARGSUSED */
+static int
+dbgmsg_cb(uintptr_t addr, const void *unknown, void *arg)
+{
+	static mdb_ctf_id_t id;
+	static boolean_t gotid;
+	static ulong_t off;
+
+	int *verbosep = arg;
+	time_t timestamp;
+	char buf[1024];
+
+	if (!gotid) {
+		if (mdb_ctf_lookup_by_name("struct zfs_dbgmsg", &id) == -1) {
+			mdb_warn("couldn't find struct zfs_dbgmsg");
+			return (WALK_ERR);
+		}
+		gotid = TRUE;
+		if (mdb_ctf_offsetof(id, "zdm_msg", &off) == -1) {
+			mdb_warn("couldn't find zdm_msg");
+			return (WALK_ERR);
+		}
+		off /= 8;
+	}
+
+
+	if (GETMEMBID(addr, &id, zdm_timestamp, timestamp)) {
+		return (WALK_ERR);
+	}
+
+	if (mdb_readstr(buf, sizeof (buf), addr + off) == -1) {
+		mdb_warn("failed to read zdm_msg at %p\n", addr + off);
+		return (DCMD_ERR);
+	}
+
+	if (*verbosep)
+		mdb_printf("%Y ", timestamp);
+
+	mdb_printf("%s\n", buf);
+
+	if (*verbosep)
+		(void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
+
+	return (WALK_NEXT);
+}
+
+/* ARGSUSED */
+static int
+dbgmsg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	GElf_Sym sym;
+	int verbose = FALSE;
+
+	if (mdb_getopts(argc, argv,
+	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
+	    NULL) != argc)
+		return (DCMD_USAGE);
+
+	if (mdb_lookup_by_name("zfs_dbgmsgs", &sym)) {
+		mdb_warn("can't find zfs_dbgmsgs");
+		return (DCMD_ERR);
+	}
+
+	if (mdb_pwalk("list", dbgmsg_cb, &verbose, sym.st_value) != 0) {
+		mdb_warn("can't walk zfs_dbgmsgs");
+		return (DCMD_ERR);
+	}
+
+	return (DCMD_OK);
+}
+
 /*ARGSUSED*/
 static int
 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
@@ -2195,7 +2278,7 @@
 	uintptr_t ref_holder;
 	uintptr_t ref_removed;
 	uint64_t ref_number;
-	boolean_t holder_is_str;
+	boolean_t holder_is_str = B_FALSE;
 	char holder_str[128];
 	boolean_t removed = (boolean_t)arg;
 
@@ -2212,18 +2295,8 @@
 	    GETMEMBID(addr, &ref_id, ref_number, ref_number))
 		return (WALK_ERR);
 
-	if (mdb_readstr(holder_str, sizeof (holder_str), ref_holder) != -1) {
-		char *cp;
-		holder_is_str = B_TRUE;
-		for (cp = holder_str; *cp; cp++) {
-			if (!isprint(*cp)) {
-				holder_is_str = B_FALSE;
-				break;
-			}
-		}
-	} else {
-		holder_is_str = B_FALSE;
-	}
+	if (mdb_readstr(holder_str, sizeof (holder_str), ref_holder) != -1)
+		holder_is_str = strisprint(holder_str);
 
 	if (removed)
 		mdb_printf("removed ");
@@ -2940,6 +3013,8 @@
 	    sa_attr_table},
 	{ "sa_attr", ": attr_id",
 	    "print SA attribute address when given sa_handle_t", sa_attr_print},
+	{ "zfs_dbgmsg", ":[-v]",
+	    "print zfs debug log", dbgmsg},
 	{ NULL }
 };

--- a/usr/src/cmd/ndmpd/ndmp/ndmpd_zfs.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/ndmpd/ndmp/ndmpd_zfs.c	Mon May 03 14:54:08 2010 -0700
@@ -610,7 +610,7 @@
 	}
 
 	err = zfs_send(zhp, fromsnap, ndmpd_zfs_args->nz_snapname, flags,
-	    ndmpd_zfs_args->nz_pipe_fd[PIPE_ZFS], NULL, NULL);
+	    ndmpd_zfs_args->nz_pipe_fd[PIPE_ZFS], NULL, NULL, NULL);
 
 	if (err && !session->ns_data.dd_abort)
 		NDMPD_ZFS_LOG_ZERR(ndmpd_zfs_args, "zfs_send: %d", err);

--- a/usr/src/cmd/truss/codes.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/truss/codes.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
@@ -1146,7 +1145,7 @@
 		"zfs_cmd_t" },
 	{ (uint_t)ZFS_IOC_POOL_TRYIMPORT,	"ZFS_IOC_POOL_TRYIMPORT",
 		"zfs_cmd_t" },
-	{ (uint_t)ZFS_IOC_POOL_SCRUB,		"ZFS_IOC_POOL_SCRUB",
+	{ (uint_t)ZFS_IOC_POOL_SCAN,		"ZFS_IOC_POOL_SCAN",
 		"zfs_cmd_t" },
 	{ (uint_t)ZFS_IOC_POOL_FREEZE,		"ZFS_IOC_POOL_FREEZE",
 		"zfs_cmd_t" },

--- a/usr/src/cmd/zdb/zdb.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/zdb/zdb.c	Mon May 03 14:54:08 2010 -0700
@@ -150,6 +150,7 @@
 	    "has altroot/not in a cachefile\n");
 	(void) fprintf(stderr, "        -p <path> -- use one or more with "
 	    "-e to specify path to vdev dir\n");
+	(void) fprintf(stderr, "	-P print numbers parsable\n");
 	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
 	    "searching for uberblocks\n");
 	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
@@ -196,6 +197,15 @@
 	nvlist_free(nv);
 }
 
+static void
+zdb_nicenum(uint64_t num, char *buf)
+{
+	if (dump_opt['P'])
+		(void) sprintf(buf, "%llu", (longlong_t)num);
+	else
+		nicenum(num, buf);
+}
+
 const char dump_zap_stars[] = "****************************************";
 const int dump_zap_width = sizeof (dump_zap_stars) - 1;
 
@@ -490,7 +500,7 @@
 	 */
 	alloc = 0;
 	for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
-		VERIFY(0 == dmu_read(os, smo->smo_object, offset,
+		VERIFY3U(0, ==, dmu_read(os, smo->smo_object, offset,
 		    sizeof (entry), &entry, DMU_READ_PREFETCH));
 		if (SM_DEBUG_DECODE(entry)) {
 			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
@@ -525,12 +535,12 @@
 static void
 dump_metaslab_stats(metaslab_t *msp)
 {
-	char maxbuf[5];
+	char maxbuf[32];
 	space_map_t *sm = &msp->ms_map;
 	avl_tree_t *t = sm->sm_pp_root;
 	int free_pct = sm->sm_space * 100 / sm->sm_size;
 
-	nicenum(space_map_maxsize(sm), maxbuf);
+	zdb_nicenum(space_map_maxsize(sm), maxbuf);
 
 	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
 	    "segments", avl_numnodes(t), "maxsize", maxbuf,
@@ -544,9 +554,9 @@
 	spa_t *spa = vd->vdev_spa;
 	space_map_t *sm = &msp->ms_map;
 	space_map_obj_t *smo = &msp->ms_smo;
-	char freebuf[5];
-
-	nicenum(sm->sm_size - smo->smo_alloc, freebuf);
+	char freebuf[32];
+
+	zdb_nicenum(sm->sm_size - smo->smo_alloc, freebuf);
 
 	(void) printf(
 	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
@@ -855,7 +865,7 @@
 			(void) snprintf(internalstr,
 			    sizeof (internalstr),
 			    "[internal %s txg:%lld] %s",
-			    hist_event_table[ievent], txg,
+			    zfs_history_event_names[ievent], txg,
 			    intstr);
 			cmd = internalstr;
 		}
@@ -966,6 +976,7 @@
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 		if (err)
 			return (err);
+		ASSERT(buf->b_data);
 
 		/* recursively visit blocks below this */
 		cbp = buf->b_data;
@@ -1015,7 +1026,7 @@
 {
 	dsl_dir_phys_t *dd = data;
 	time_t crtime;
-	char nice[6];
+	char nice[32];
 
 	if (dd == NULL)
 		return;
@@ -1032,15 +1043,15 @@
 	    (u_longlong_t)dd->dd_origin_obj);
 	(void) printf("\t\tchild_dir_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_child_dir_zapobj);
-	nicenum(dd->dd_used_bytes, nice);
+	zdb_nicenum(dd->dd_used_bytes, nice);
 	(void) printf("\t\tused_bytes = %s\n", nice);
-	nicenum(dd->dd_compressed_bytes, nice);
+	zdb_nicenum(dd->dd_compressed_bytes, nice);
 	(void) printf("\t\tcompressed_bytes = %s\n", nice);
-	nicenum(dd->dd_uncompressed_bytes, nice);
+	zdb_nicenum(dd->dd_uncompressed_bytes, nice);
 	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
-	nicenum(dd->dd_quota, nice);
+	zdb_nicenum(dd->dd_quota, nice);
 	(void) printf("\t\tquota = %s\n", nice);
-	nicenum(dd->dd_reserved, nice);
+	zdb_nicenum(dd->dd_reserved, nice);
 	(void) printf("\t\treserved = %s\n", nice);
 	(void) printf("\t\tprops_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_props_zapobj);
@@ -1050,7 +1061,7 @@
 	    (u_longlong_t)dd->dd_flags);
 
 #define	DO(which) \
-	nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
+	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
 	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
 	DO(HEAD);
 	DO(SNAP);
@@ -1066,7 +1077,7 @@
 {
 	dsl_dataset_phys_t *ds = data;
 	time_t crtime;
-	char used[6], compressed[6], uncompressed[6], unique[6];
+	char used[32], compressed[32], uncompressed[32], unique[32];
 	char blkbuf[BP_SPRINTF_LEN];
 
 	if (ds == NULL)
@@ -1074,10 +1085,10 @@
 
 	ASSERT(size == sizeof (*ds));
 	crtime = ds->ds_creation_time;
-	nicenum(ds->ds_used_bytes, used);
-	nicenum(ds->ds_compressed_bytes, compressed);
-	nicenum(ds->ds_uncompressed_bytes, uncompressed);
-	nicenum(ds->ds_unique_bytes, unique);
+	zdb_nicenum(ds->ds_used_bytes, used);
+	zdb_nicenum(ds->ds_compressed_bytes, compressed);
+	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
+	zdb_nicenum(ds->ds_unique_bytes, unique);
 	sprintf_blkptr(blkbuf, &ds->ds_bp);
 
 	(void) printf("\t\tdir_obj = %llu\n",
@@ -1122,9 +1133,9 @@
 	bplist_t bpl = { 0 };
 	blkptr_t blk, *bp = &blk;
 	uint64_t itor = 0;
-	char bytes[6];
-	char comp[6];
-	char uncomp[6];
+	char bytes[32];
+	char comp[32];
+	char uncomp[32];
 
 	if (dump_opt['d'] < 3)
 		return;
@@ -1137,10 +1148,10 @@
 		return;
 	}
 
-	nicenum(bpl.bpl_phys->bpl_bytes, bytes);
+	zdb_nicenum(bpl.bpl_phys->bpl_bytes, bytes);
 	if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
-		nicenum(bpl.bpl_phys->bpl_comp, comp);
-		nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
+		zdb_nicenum(bpl.bpl_phys->bpl_comp, comp);
+		zdb_nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
 		(void) printf("\n    %s: %llu entries, %s (%s/%s comp)\n",
 		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
 		    bytes, comp, uncomp);
@@ -1391,6 +1402,8 @@
 	dump_zap,		/* SA Master Node		*/
 	dump_sa_attrs,		/* SA attribute registration	*/
 	dump_sa_layouts,	/* SA attribute layouts		*/
+	dump_zap,		/* DSL scrub translations	*/
+	dump_none,		/* fake dedup BP		*/
 	dump_unknown,		/* Unknown type, must be last	*/
 };
 
@@ -1402,7 +1415,8 @@
 	dnode_t *dn;
 	void *bonus = NULL;
 	size_t bsize = 0;
-	char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], fill[7];
+	char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
+	char bonus_size[32];
 	char aux[50];
 	int error;
 
@@ -1426,11 +1440,11 @@
 	}
 	dmu_object_info_from_dnode(dn, &doi);
 
-	nicenum(doi.doi_metadata_block_size, iblk);
-	nicenum(doi.doi_data_block_size, dblk);
-	nicenum(doi.doi_max_offset, lsize);
-	nicenum(doi.doi_physical_blocks_512 << 9, asize);
-	nicenum(doi.doi_bonus_size, bonus_size);
+	zdb_nicenum(doi.doi_metadata_block_size, iblk);
+	zdb_nicenum(doi.doi_data_block_size, dblk);
+	zdb_nicenum(doi.doi_max_offset, lsize);
+	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
+	zdb_nicenum(doi.doi_bonus_size, bonus_size);
 	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
 	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
 	    doi.doi_max_offset);
@@ -1492,7 +1506,7 @@
 		}
 
 		for (;;) {
-			char segsize[6];
+			char segsize[32];
 			error = dnode_next_offset(dn,
 			    0, &start, minlvl, blkfill, 0);
 			if (error)
@@ -1500,7 +1514,7 @@
 			end = start;
 			error = dnode_next_offset(dn,
 			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
-			nicenum(end - start, segsize);
+			zdb_nicenum(end - start, segsize);
 			(void) printf("\t\tsegment [%016llx, %016llx)"
 			    " size %5s\n", (u_longlong_t)start,
 			    (u_longlong_t)end, segsize);
@@ -1523,7 +1537,7 @@
 	dmu_objset_stats_t dds;
 	uint64_t object, object_count;
 	uint64_t refdbytes, usedobjs, scratch;
-	char numbuf[8];
+	char numbuf[32];
 	char blkbuf[BP_SPRINTF_LEN + 20];
 	char osname[MAXNAMELEN];
 	char *type = "UNKNOWN";
@@ -1547,7 +1561,7 @@
 
 	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
 
-	nicenum(refdbytes, numbuf);
+	zdb_nicenum(refdbytes, numbuf);
 
 	if (verbosity >= 4) {
 		(void) sprintf(blkbuf, ", rootbp ");
@@ -1905,8 +1919,9 @@
 	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
 }
 
+/* ARGSUSED */
 static int
-zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	zdb_cb_t *zcb = arg;
@@ -2222,7 +2237,8 @@
 		    "\t  avg\t comp\t%%Total\tType\n");
 
 		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
-			char csize[6], lsize[6], psize[6], asize[6], avg[6];
+			char csize[32], lsize[32], psize[32], asize[32];
+			char avg[32];
 			char *typename;
 
 			if (t < DMU_OT_NUMTYPES)
@@ -2258,11 +2274,11 @@
 				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
 					continue;
 
-				nicenum(zb->zb_count, csize);
-				nicenum(zb->zb_lsize, lsize);
-				nicenum(zb->zb_psize, psize);
-				nicenum(zb->zb_asize, asize);
-				nicenum(zb->zb_asize / zb->zb_count, avg);
+				zdb_nicenum(zb->zb_count, csize);
+				zdb_nicenum(zb->zb_lsize, lsize);
+				zdb_nicenum(zb->zb_psize, psize);
+				zdb_nicenum(zb->zb_asize, asize);
+				zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
 
 				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
 				    "\t%5.2f\t%6.2f\t",
@@ -2302,7 +2318,7 @@
 /* ARGSUSED */
 static int
 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
-    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+    arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	avl_tree_t *t = arg;
 	avl_index_t where;
@@ -2897,7 +2913,7 @@
 
 	dprintf_setup(&argc, argv);
 
-	while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:")) != -1) {
+	while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
 		switch (c) {
 		case 'b':
 		case 'c':
@@ -2920,6 +2936,7 @@
 		case 'L':
 		case 'X':
 		case 'e':
+		case 'P':
 			dump_opt[c]++;
 			break;
 		case 'v':
@@ -2970,7 +2987,7 @@
 		verbose = MAX(verbose, 1);
 
 	for (c = 0; c < 256; c++) {
-		if (dump_all && !strchr("elAFLRSX", c))
+		if (dump_all && !strchr("elAFLRSXP", c))
 			dump_opt[c] = 1;
 		if (dump_opt[c])
 			dump_opt[c] += verbose;

--- a/usr/src/cmd/zfs/zfs_main.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/zfs/zfs_main.c	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <assert.h>
@@ -2573,6 +2572,8 @@
 	zfs_handle_t *zhp;
 	sendflags_t flags = { 0 };
 	int c, err;
+	nvlist_t *dbgnv;
+	boolean_t extraverbose = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":i:I:RDpv")) != -1) {
@@ -2595,6 +2596,8 @@
 			flags.props = B_TRUE;
 			break;
 		case 'v':
+			if (flags.verbose)
+				extraverbose = B_TRUE;
 			flags.verbose = B_TRUE;
 			break;
 		case 'D':
@@ -2679,7 +2682,19 @@
 	if (flags.replicate && fromname == NULL)
 		flags.doall = B_TRUE;
 
-	err = zfs_send(zhp, fromname, toname, flags, STDOUT_FILENO, NULL, 0);
+	err = zfs_send(zhp, fromname, toname, flags, STDOUT_FILENO, NULL, 0,
+	    extraverbose ? &dbgnv : NULL);
+
+	if (extraverbose) {
+		/*
+		 * dump_nvlist prints to stdout, but that's been
+		 * redirected to a file.  Make it print to stderr
+		 * instead.
+		 */
+		(void) dup2(STDERR_FILENO, STDOUT_FILENO);
+		dump_nvlist(dbgnv, 0);
+		nvlist_free(dbgnv);
+	}
 	zfs_close(zhp);
 
 	return (err != 0);

--- a/usr/src/cmd/zpool/Makefile	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/zpool/Makefile	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 PROG= zpool

--- a/usr/src/cmd/zpool/zpool_main.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/zpool/zpool_main.c	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <assert.h>
@@ -42,7 +41,6 @@
 #include <pwd.h>
 #include <zone.h>
 #include <sys/fs/zfs.h>
-
 #include <sys/stat.h>
 
 #include <libzfs.h>
@@ -215,7 +213,7 @@
 		    "[count]]\n"));
 	case HELP_LIST:
 		return (gettext("\tlist [-H] [-o property[,...]] "
-		    "[pool] ...\n"));
+		    "[-T d|u] [pool] ... [interval [count]]\n"));
 	case HELP_OFFLINE:
 		return (gettext("\toffline [-t] <pool> <device> ...\n"));
 	case HELP_ONLINE:
@@ -228,7 +226,8 @@
 	case HELP_SCRUB:
 		return (gettext("\tscrub [-s] <pool> ...\n"));
 	case HELP_STATUS:
-		return (gettext("\tstatus [-vx] [pool] ...\n"));
+		return (gettext("\tstatus [-vx] [-T d|u] [pool] ... [interval "
+		    "[count]]\n"));
 	case HELP_UPGRADE:
 		return (gettext("\tupgrade\n"
 		    "\tupgrade -v\n"
@@ -519,11 +518,10 @@
 }
 
 /*
- * zpool remove <pool> <vdev> ...
+ * zpool remove  <pool> <vdev> ...
  *
- * Removes the given vdev from the pool.  Currently, this only supports removing
- * spares and cache devices from the pool.  Eventually, we'll want to support
- * removing leaf vdevs (as an alias for 'detach') as well as toplevel vdevs.
+ * Removes the given vdev from the pool.  Currently, this supports removing
+ * spares, cache, and log devices from the pool.
  */
 int
 zpool_do_remove(int argc, char **argv)
@@ -1044,20 +1042,21 @@
 {
 	nvlist_t **child;
 	uint_t c, children;
+	pool_scan_stat_t *ps = NULL;
 	vdev_stat_t *vs;
-	char rbuf[6], wbuf[6], cbuf[6], repaired[7];
+	char rbuf[6], wbuf[6], cbuf[6];
 	char *vname;
 	uint64_t notpresent;
 	spare_cbdata_t cb;
 	char *state;
 
-	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
-	    (uint64_t **)&vs, &c) == 0);
-
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		children = 0;
 
+	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
+	    (uint64_t **)&vs, &c) == 0);
+
 	state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
 	if (isspare) {
 		/*
@@ -1147,14 +1146,16 @@
 			(void) printf(gettext("corrupted data"));
 			break;
 		}
-	} else if (vs->vs_scrub_repaired != 0 && children == 0) {
-		/*
-		 * Report bytes resilvered/repaired on leaf devices.
-		 */
-		zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
-		(void) printf(gettext("  %s %s"), repaired,
-		    (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
-		    "resilvered" : "repaired");
+	}
+
+	(void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS,
+	    (uint64_t **)&ps, &c);
+
+	if (ps && ps->pss_state == DSS_SCANNING &&
+	    vs->vs_scan_processed != 0 && children == 0) {
+		(void) printf(gettext("  (%s)"),
+		    (ps->pss_func == POOL_SCAN_RESILVER) ?
+		    "resilvering" : "repairing");
 	}
 
 	(void) printf("\n");
@@ -1194,7 +1195,7 @@
 	    strcmp(type, VDEV_TYPE_HOLE) == 0)
 		return;
 
-	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 
 	(void) printf("\t%*s%-*s", depth, "", namewidth - depth, name);
@@ -1333,7 +1334,7 @@
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 
-	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &vsc) == 0);
 	health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
 
@@ -1400,6 +1401,11 @@
 		    "read.\n"));
 		break;
 
+	case ZPOOL_STATUS_RESILVERING:
+		(void) printf(gettext("status: One or more devices were being "
+		    "resilvered.\n"));
+		break;
+
 	default:
 		/*
 		 * No other status can be seen when importing pools.
@@ -1990,13 +1996,13 @@
 	char *vname;
 
 	if (oldnv != NULL) {
-		verify(nvlist_lookup_uint64_array(oldnv, ZPOOL_CONFIG_STATS,
-		    (uint64_t **)&oldvs, &c) == 0);
+		verify(nvlist_lookup_uint64_array(oldnv,
+		    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&oldvs, &c) == 0);
 	} else {
 		oldvs = &zerovs;
 	}
 
-	verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_STATS,
+	verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&newvs, &c) == 0);
 
 	if (strlen(name) + depth > cb->cb_namewidth)
@@ -2046,6 +2052,12 @@
 		return;
 
 	for (c = 0; c < children; c++) {
+		uint64_t ishole = B_FALSE;
+
+		if (nvlist_lookup_uint64(newchild[c],
+		    ZPOOL_CONFIG_IS_HOLE, &ishole) == 0 && ishole)
+			continue;
+
 		vname = zpool_vdev_name(g_zfs, zhp, newchild[c], B_FALSE);
 		print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
 		    newchild[c], cb, depth + 2);
@@ -2157,55 +2169,14 @@
 }
 
 /*
- * zpool iostat [-T d|u] [-v] [pool] ... [interval [count]]
- *
- *	-T	Display a timestamp in date(1) or Unix format
- *	-v	Display statistics for individual vdevs
- *
- * This command can be tricky because we want to be able to deal with pool
- * creation/destruction as well as vdev configuration changes.  The bulk of this
- * processing is handled by the pool_list_* routines in zpool_iter.c.  We rely
- * on pool_list_update() to detect the addition of new pools.  Configuration
- * changes are all handled within libzfs.
+ * Parse the input string, get the 'interval' and 'count' value if there is one.
  */
-int
-zpool_do_iostat(int argc, char **argv)
+static void
+get_interval_count(int *argcp, char **argv, unsigned long *iv,
+    unsigned long *cnt)
 {
-	int c;
-	int ret;
-	int npools;
 	unsigned long interval = 0, count = 0;
-	zpool_list_t *list;
-	boolean_t verbose = B_FALSE;
-	iostat_cbdata_t cb;
-
-	/* check options */
-	while ((c = getopt(argc, argv, "T:v")) != -1) {
-		switch (c) {
-		case 'T':
-			if (optarg) {
-				if (*optarg == 'u')
-					timestamp_fmt = UDATE;
-				else if (*optarg == 'd')
-					timestamp_fmt = DDATE;
-				else
-					usage(B_FALSE);
-			} else {
-				usage(B_FALSE);
-			}
-			break;
-		case 'v':
-			verbose = B_TRUE;
-			break;
-		case '?':
-			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
-			    optopt);
-			usage(B_FALSE);
-		}
-	}
-
-	argc -= optind;
-	argv += optind;
+	int argc = *argcp, errno;
 
 	/*
 	 * Determine if the last argument is an integer or a pool name
@@ -2222,7 +2193,6 @@
 				    "cannot be zero\n"));
 				usage(B_FALSE);
 			}
-
 			/*
 			 * Ignore the last parameter
 			 */
@@ -2239,7 +2209,7 @@
 
 	/*
 	 * If the last argument is also an integer, then we have both a count
-	 * and an integer.
+	 * and an interval.
 	 */
 	if (argc > 0 && isdigit(argv[argc - 1][0])) {
 		char *end;
@@ -2264,6 +2234,66 @@
 		}
 	}
 
+	*iv = interval;
+	*cnt = count;
+	*argcp = argc;
+}
+
+static void
+get_timestamp_arg(char c)
+{
+	if (c == 'u')
+		timestamp_fmt = UDATE;
+	else if (c == 'd')
+		timestamp_fmt = DDATE;
+	else
+		usage(B_FALSE);
+}
+
+/*
+ * zpool iostat [-v] [-T d|u] [pool] ... [interval [count]]
+ *
+ *	-v	Display statistics for individual vdevs
+ *	-T	Display a timestamp in date(1) or Unix format
+ *
+ * This command can be tricky because we want to be able to deal with pool
+ * creation/destruction as well as vdev configuration changes.  The bulk of this
+ * processing is handled by the pool_list_* routines in zpool_iter.c.  We rely
+ * on pool_list_update() to detect the addition of new pools.  Configuration
+ * changes are all handled within libzfs.
+ */
+int
+zpool_do_iostat(int argc, char **argv)
+{
+	int c;
+	int ret;
+	int npools;
+	unsigned long interval = 0, count = 0;
+	zpool_list_t *list;
+	boolean_t verbose = B_FALSE;
+	iostat_cbdata_t cb;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "T:v")) != -1) {
+		switch (c) {
+		case 'T':
+			get_timestamp_arg(*optarg);
+			break;
+		case 'v':
+			verbose = B_TRUE;
+			break;
+		case '?':
+			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+			    optopt);
+			usage(B_FALSE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	get_interval_count(&argc, argv, &interval, &count);
+
 	/*
 	 * Construct the list of all interesting pools.
 	 */
@@ -2464,12 +2494,13 @@
 }
 
 /*
- * zpool list [-H] [-o prop[,prop]*] [pool] ...
+ * zpool list [-H] [-o prop[,prop]*] [-T d|u] [pool] ... [interval [count]]
  *
  *	-H	Scripted mode.  Don't display headers, and separate properties
  *		by a single tab.
  *	-o	List of properties to display.  Defaults to
  *		"name,size,allocated,free,capacity,health,altroot"
+ *	-T	Display a timestamp in date(1) or Unix format
  *
  * List all pools in the system, whether or not they're healthy.  Output space
  * statistics for each one, as well as health status summary.
@@ -2483,9 +2514,10 @@
 	static char default_props[] =
 	    "name,size,allocated,free,capacity,dedupratio,health,altroot";
 	char *props = default_props;
+	unsigned long interval = 0, count = 0;
 
 	/* check options */
-	while ((c = getopt(argc, argv, ":Ho:")) != -1) {
+	while ((c = getopt(argc, argv, ":Ho:T:")) != -1) {
 		switch (c) {
 		case 'H':
 			cb.cb_scripted = B_TRUE;
@@ -2493,6 +2525,9 @@
 		case 'o':
 			props = optarg;
 			break;
+		case 'T':
+			get_timestamp_arg(*optarg);
+			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
@@ -2508,21 +2543,37 @@
 	argc -= optind;
 	argv += optind;
 
+	get_interval_count(&argc, argv, &interval, &count);
+
 	if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0)
 		usage(B_FALSE);
 
 	cb.cb_first = B_TRUE;
 
-	ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist,
-	    list_callback, &cb);
+	for (;;) {
+
+		if (timestamp_fmt != NODATE)
+			print_timestamp(timestamp_fmt);
+
+		ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist,
+		    list_callback, &cb);
+
+		if (argc == 0 && cb.cb_first && !cb.cb_scripted) {
+			(void) printf(gettext("no pools available\n"));
+			zprop_free_list(cb.cb_proplist);
+			return (0);
+		}
+
+		if (interval == 0)
+			break;
+
+		if (count != 0 && --count == 0)
+			break;
+
+		(void) sleep(interval);
+	}
 
 	zprop_free_list(cb.cb_proplist);
-
-	if (argc == 0 && cb.cb_first && !cb.cb_scripted) {
-		(void) printf(gettext("no pools available\n"));
-		return (0);
-	}
-
 	return (ret);
 }
 
@@ -3106,7 +3157,7 @@
 		return (1);
 	}
 
-	err = zpool_scrub(zhp, cb->cb_type);
+	err = zpool_scan(zhp, cb->cb_type);
 
 	return (err != 0);
 }
@@ -3122,13 +3173,13 @@
 	int c;
 	scrub_cbdata_t cb;
 
-	cb.cb_type = POOL_SCRUB_EVERYTHING;
+	cb.cb_type = POOL_SCAN_SCRUB;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "s")) != -1) {
 		switch (c) {
 		case 's':
-			cb.cb_type = POOL_SCRUB_NONE;
+			cb.cb_type = POOL_SCAN_NONE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -3163,62 +3214,103 @@
  * Print out detailed scrub status.
  */
 void
-print_scrub_status(nvlist_t *nvroot)
+print_scan_status(pool_scan_stat_t *ps)
 {
-	vdev_stat_t *vs;
-	uint_t vsc;
-	time_t start, end, now;
+	time_t start, end;
+	uint64_t elapsed, mins_left;
+	uint64_t pass_exam, examined, total;
+	uint_t rate;
 	double fraction_done;
-	uint64_t examined, total, minutes_left, minutes_taken;
-	char *scrub_type;
-
-	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
-	    (uint64_t **)&vs, &vsc) == 0);
-
-	/*
-	 * If there's never been a scrub, there's not much to say.
-	 */
-	if (vs->vs_scrub_end == 0 && vs->vs_scrub_type == POOL_SCRUB_NONE) {
+	char processed_buf[7], examined_buf[7], total_buf[7], rate_buf[7];
+
+	(void) printf(gettext(" scan: "));
+
+	/* If there's never been a scan, there's not much to say. */
+	if (ps == NULL || ps->pss_func == POOL_SCAN_NONE ||
+	    ps->pss_func >= POOL_SCAN_FUNCS) {
 		(void) printf(gettext("none requested\n"));
 		return;
 	}
 
-	scrub_type = (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
-	    "resilver" : "scrub";
-
-	start = vs->vs_scrub_start;
-	end = vs->vs_scrub_end;
-	now = time(NULL);
-	examined = vs->vs_scrub_examined;
-	total = vs->vs_alloc;
-
-	if (end != 0) {
-		minutes_taken = (uint64_t)((end - start) / 60);
-
-		(void) printf(gettext("%s %s after %lluh%um with %llu errors "
-		    "on %s"),
-		    scrub_type, vs->vs_scrub_complete ? "completed" : "stopped",
+	start = ps->pss_start_time;
+	end = ps->pss_end_time;
+	zfs_nicenum(ps->pss_processed, processed_buf, sizeof (processed_buf));
+
+	assert(ps->pss_func == POOL_SCAN_SCRUB ||
+	    ps->pss_func == POOL_SCAN_RESILVER);
+	/*
+	 * Scan is finished or canceled.
+	 */
+	if (ps->pss_state == DSS_FINISHED) {
+		uint64_t minutes_taken = (end - start) / 60;
+		char *fmt;
+
+		if (ps->pss_func == POOL_SCAN_SCRUB) {
+			fmt = gettext("scrub repaired %s in %lluh%um with "
+			    "%llu errors on %s");
+		} else if (ps->pss_func == POOL_SCAN_RESILVER) {
+			fmt = gettext("resilvered %s in %lluh%um with "
+			    "%llu errors on %s");
+		}
+		/* LINTED */
+		(void) printf(fmt, processed_buf,
 		    (u_longlong_t)(minutes_taken / 60),
 		    (uint_t)(minutes_taken % 60),
-		    (u_longlong_t)vs->vs_scrub_errors, ctime(&end));
+		    (u_longlong_t)ps->pss_errors,
+		    ctime((time_t *)&end));
+		return;
+	} else if (ps->pss_state == DSS_CANCELED) {
+		if (ps->pss_func == POOL_SCAN_SCRUB) {
+			(void) printf(gettext("scrub canceled on %s"),
+			    ctime(&end));
+		} else if (ps->pss_func == POOL_SCAN_RESILVER) {
+			(void) printf(gettext("resilver canceled on %s"),
+			    ctime(&end));
+		}
 		return;
 	}
 
-	if (examined == 0)
-		examined = 1;
-	if (examined > total)
-		total = examined;
-
+	assert(ps->pss_state == DSS_SCANNING);
+
+	/*
+	 * Scan is in progress.
+	 */
+	if (ps->pss_func == POOL_SCAN_SCRUB) {
+		(void) printf(gettext("scrub in progress since %s"),
+		    ctime(&start));
+	} else if (ps->pss_func == POOL_SCAN_RESILVER) {
+		(void) printf(gettext("resilver in progress since %s"),
+		    ctime(&start));
+	}
+
+	examined = ps->pss_examined ? ps->pss_examined : 1;
+	total = ps->pss_to_examine;
 	fraction_done = (double)examined / total;
-	minutes_left = (uint64_t)((now - start) *
-	    (1 - fraction_done) / fraction_done / 60);
-	minutes_taken = (uint64_t)((now - start) / 60);
-
-	(void) printf(gettext("%s in progress for %lluh%um, %.2f%% done, "
-	    "%lluh%um to go\n"),
-	    scrub_type, (u_longlong_t)(minutes_taken / 60),
-	    (uint_t)(minutes_taken % 60), 100 * fraction_done,
-	    (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
+
+	/* elapsed time for this pass */
+	elapsed = time(NULL) - ps->pss_pass_start;
+	elapsed = elapsed ? elapsed : 1;
+	pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
+	rate = pass_exam / elapsed;
+	rate = rate ? rate : 1;
+	mins_left = ((total - examined) / rate) / 60;
+
+	zfs_nicenum(examined, examined_buf, sizeof (examined_buf));
+	zfs_nicenum(total, total_buf, sizeof (total_buf));
+	zfs_nicenum(rate, rate_buf, sizeof (rate_buf));
+
+	(void) printf(gettext("    %s scanned out of %s at "
+	    "%s/s, %lluh%um to go\n"), examined_buf, total_buf, rate_buf,
+	    (u_longlong_t)(mins_left / 60),
+	    (uint_t)(mins_left % 60));
+
+	if (ps->pss_func == POOL_SCAN_RESILVER) {
+		(void) printf(gettext("    %s resilvered, %.2f%% done\n"),
+		    processed_buf, 100 * fraction_done);
+	} else if (ps->pss_func == POOL_SCAN_SCRUB) {
+		(void) printf(gettext("    %s repaired, %.2f%% done\n"),
+		    processed_buf, 100 * fraction_done);
+	}
 }
 
 static void
@@ -3378,7 +3470,7 @@
 
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
-	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 	health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
 
@@ -3451,7 +3543,6 @@
 		    "replace'.\n"));
 		break;
 
-
 	case ZPOOL_STATUS_RESILVERING:
 		(void) printf(gettext("status: One or more devices is "
 		    "currently being resilvered.  The pool will\n\tcontinue "
@@ -3549,10 +3640,11 @@
 		uint64_t nerr;
 		nvlist_t **spares, **l2cache;
 		uint_t nspares, nl2cache;
-
-
-		(void) printf(gettext(" scrub: "));
-		print_scrub_status(nvroot);
+		pool_scan_stat_t *ps = NULL;
+
+		(void) nvlist_lookup_uint64_array(nvroot,
+		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c);
+		print_scan_status(ps);
 
 		namewidth = max_width(zhp, nvroot, 0, 0);
 		if (namewidth < 10)
@@ -3620,11 +3712,12 @@
 }
 
 /*
- * zpool status [-vx] [pool] ...
+ * zpool status [-vx] [-T d|u] [pool] ... [interval [count]]
  *
  *	-v	Display complete error logs
  *	-x	Display only pools with potential problems
  *	-D	Display dedup status (undocumented)
+ *	-T	Display a timestamp in date(1) or Unix format
  *
  * Describes the health status of all pools or some subset.
  */
@@ -3633,10 +3726,11 @@
 {
 	int c;
 	int ret;
+	unsigned long interval = 0, count = 0;
 	status_cbdata_t cb = { 0 };
 
 	/* check options */
-	while ((c = getopt(argc, argv, "vxD")) != -1) {
+	while ((c = getopt(argc, argv, "vxDT:")) != -1) {
 		switch (c) {
 		case 'v':
 			cb.cb_verbose = B_TRUE;
@@ -3647,6 +3741,9 @@
 		case 'D':
 			cb.cb_dedup_stats = B_TRUE;
 			break;
+		case 'T':
+			get_timestamp_arg(*optarg);
+			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
@@ -3657,19 +3754,38 @@
 	argc -= optind;
 	argv += optind;
 
-	cb.cb_first = B_TRUE;
+	get_interval_count(&argc, argv, &interval, &count);
 
 	if (argc == 0)
 		cb.cb_allpools = B_TRUE;
 
-	ret = for_each_pool(argc, argv, B_TRUE, NULL, status_callback, &cb);
-
-	if (argc == 0 && cb.cb_count == 0)
-		(void) printf(gettext("no pools available\n"));
-	else if (cb.cb_explain && cb.cb_first && cb.cb_allpools)
-		(void) printf(gettext("all pools are healthy\n"));
-
-	return (ret);
+	cb.cb_first = B_TRUE;
+
+	for (;;) {
+		if (timestamp_fmt != NODATE)
+			print_timestamp(timestamp_fmt);
+
+		ret = for_each_pool(argc, argv, B_TRUE, NULL,
+		    status_callback, &cb);
+
+		if (argc == 0 && cb.cb_count == 0)
+			(void) printf(gettext("no pools available\n"));
+		else if (cb.cb_explain && cb.cb_first && cb.cb_allpools)
+			(void) printf(gettext("all pools are healthy\n"));
+
+		if (ret != 0)
+			return (ret);
+
+		if (interval == 0)
+			break;
+
+		if (count != 0 && --count == 0)
+			break;
+
+		(void) sleep(interval);
+	}
+
+	return (0);
 }
 
 typedef struct upgrade_cbdata {
@@ -3890,6 +4006,7 @@
 		(void) printf(gettext(" 22  Received properties\n"));
 		(void) printf(gettext(" 23  Slim ZIL\n"));
 		(void) printf(gettext(" 24  System attributes\n"));
+		(void) printf(gettext(" 25  Improved scrub stats\n"));
 		(void) printf(gettext("\nFor more information on a particular "
 		    "version, including supported releases, see:\n\n"));
 		(void) printf("http://www.opensolaris.org/os/community/zfs/"
@@ -3993,7 +4110,7 @@
 			(void) snprintf(internalstr,
 			    sizeof (internalstr),
 			    "[internal %s txg:%lld] %s",
-			    hist_event_table[ievent], txg,
+			    zfs_history_event_names[ievent], txg,
 			    pathstr);
 			cmdstr = internalstr;
 		}

--- a/usr/src/cmd/zpool/zpool_util.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/zpool/zpool_util.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	ZPOOL_UTIL_H
@@ -45,7 +44,7 @@
  */
 
 nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
-    boolean_t isreplace, boolean_t dryrun, int argc, char **argv);
+    boolean_t replacing, boolean_t dryrun, int argc, char **argv);
 nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname,
     nvlist_t *props, splitflags_t flags, int argc, char **argv);

--- a/usr/src/cmd/zpool/zpool_vdev.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/zpool/zpool_vdev.c	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -1004,8 +1003,8 @@
 		return (B_FALSE);
 	}
 	free(name);
+	(void) close(fd);
 
-	(void) close(fd);
 	verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
 	nvlist_free(label);
 
@@ -1029,8 +1028,8 @@
  * the majority of this task.
  */
 static int
-check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
-    int isspare)
+check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
+    boolean_t replacing, boolean_t isspare)
 {
 	nvlist_t **child;
 	uint_t c, children;
@@ -1051,13 +1050,14 @@
 		 * hot spare within the same pool.  If so, we allow it
 		 * regardless of what libdiskmgt or zpool_in_use() says.
 		 */
-		if (isreplacing) {
+		if (replacing) {
 			if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 			    &wholedisk) == 0 && wholedisk)
 				(void) snprintf(buf, sizeof (buf), "%ss0",
 				    path);
 			else
 				(void) strlcpy(buf, path, sizeof (buf));
+
 			if (is_spare(config, buf))
 				return (0);
 		}
@@ -1073,21 +1073,21 @@
 
 	for (c = 0; c < children; c++)
 		if ((ret = check_in_use(config, child[c], force,
-		    isreplacing, B_FALSE)) != 0)
+		    replacing, B_FALSE)) != 0)
 			return (ret);
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 	    &child, &children) == 0)
 		for (c = 0; c < children; c++)
 			if ((ret = check_in_use(config, child[c], force,
-			    isreplacing, B_TRUE)) != 0)
+			    replacing, B_TRUE)) != 0)
 				return (ret);
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
 	    &child, &children) == 0)
 		for (c = 0; c < children; c++)
 			if ((ret = check_in_use(config, child[c], force,
-			    isreplacing, B_FALSE)) != 0)
+			    replacing, B_FALSE)) != 0)
 				return (ret);
 
 	return (0);
@@ -1419,7 +1419,7 @@
  */
 nvlist_t *
 make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
-    boolean_t isreplacing, boolean_t dryrun, int argc, char **argv)
+    boolean_t replacing, boolean_t dryrun, int argc, char **argv)
 {
 	nvlist_t *newroot;
 	nvlist_t *poolconfig = NULL;
@@ -1442,8 +1442,7 @@
 	 * uses (such as a dedicated dump device) that even '-f' cannot
 	 * override.
 	 */
-	if (check_in_use(poolconfig, newroot, force, isreplacing,
-	    B_FALSE) != 0) {
+	if (check_in_use(poolconfig, newroot, force, replacing, B_FALSE) != 0) {
 		nvlist_free(newroot);
 		return (NULL);
 	}

--- a/usr/src/cmd/ztest/ztest.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/cmd/ztest/ztest.c	Mon May 03 14:54:08 2010 -0700
@@ -93,6 +93,7 @@
 #include <sys/metaslab_impl.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
+#include <sys/dsl_scan.h>
 #include <sys/refcount.h>
 #include <stdio.h>
 #include <stdio_ext.h>
@@ -284,9 +285,9 @@
 	{ ztest_spa_rename,			1,	&zopt_rarely	},
 	{ ztest_scrub,				1,	&zopt_rarely	},
 	{ ztest_dsl_dataset_promote_busy,	1,	&zopt_rarely	},
-	{ ztest_vdev_attach_detach,		1,	&zopt_rarely	},
+	{ ztest_vdev_attach_detach,		1,	&zopt_rarely },
 	{ ztest_vdev_LUN_growth,		1,	&zopt_rarely	},
-	{ ztest_vdev_add_remove,		1,	&zopt_vdevtime	},
+	{ ztest_vdev_add_remove,		1,	&zopt_vdevtime },
 	{ ztest_vdev_aux_add_remove,		1,	&zopt_vdevtime	},
 };
 
@@ -4662,9 +4663,9 @@
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = zs->zs_spa;
 
-	(void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
+	(void) spa_scan(spa, POOL_SCAN_SCRUB);
 	(void) poll(NULL, 0, 100); /* wait a moment, then force a restart */
-	(void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
+	(void) spa_scan(spa, POOL_SCAN_SCRUB);
 }
 
 /*
@@ -4817,7 +4818,7 @@
 	 * Kick off a scrub to tickle scrub/export races.
 	 */
 	if (ztest_random(2) == 0)
-		(void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
+		(void) spa_scan(spa, POOL_SCAN_SCRUB);
 
 	pool_guid = spa_guid(spa);
 	spa_close(spa, FTAG);

--- a/usr/src/common/zfs/zfs_comutil.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/common/zfs/zfs_comutil.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -157,3 +156,47 @@
 
 	return (version);
 }
+
+const char *zfs_history_event_names[LOG_END] = {
+	"invalid event",
+	"pool create",
+	"vdev add",
+	"pool remove",
+	"pool destroy",
+	"pool export",
+	"pool import",
+	"vdev attach",
+	"vdev replace",
+	"vdev detach",
+	"vdev online",
+	"vdev offline",
+	"vdev upgrade",
+	"pool clear",
+	"pool scrub",
+	"pool property set",
+	"create",
+	"clone",
+	"destroy",
+	"destroy_begin_sync",
+	"inherit",
+	"property set",
+	"quota set",
+	"permission update",
+	"permission remove",
+	"permission who remove",
+	"promote",
+	"receive",
+	"rename",
+	"reservation set",
+	"replay_inc_sync",
+	"replay_full_sync",
+	"rollback",
+	"snapshot",
+	"filesystem version upgrade",
+	"refquota set",
+	"refreservation set",
+	"pool scrub done",
+	"user hold",
+	"user release",
+	"pool split",
+};

--- a/usr/src/common/zfs/zfs_comutil.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/common/zfs/zfs_comutil.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_ZFS_COMUTIL_H
@@ -38,6 +37,7 @@
 
 extern int zfs_zpl_version_map(int spa_version);
 extern int zfs_spa_version_map(int zpl_version);
+extern const char *zfs_history_event_names[LOG_END];
 
 #ifdef	__cplusplus
 }

--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Mon May 03 14:54:08 2010 -0700
@@ -17,8 +17,7 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_FS_ZFS_H
@@ -27,7 +26,7 @@
 /*
  * On-disk version number.
  */
-#define	SPA_VERSION			24ULL
+#define	SPA_VERSION			25ULL
 
 /*
  * The following are configuration names used in the nvlist describing a pool's

--- a/usr/src/lib/libzfs/common/libzfs.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/lib/libzfs/common/libzfs.h	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_LIBZFS_H
@@ -119,6 +118,8 @@
 	EZFS_PIPEFAILED,	/* pipe create failed */
 	EZFS_THREADCREATEFAILED, /* thread create failed */
 	EZFS_POSTSPLIT_ONLINE,	/* onlining a disk after splitting it */
+	EZFS_SCRUBBING,		/* currently scrubbing */
+	EZFS_NO_SCRUB,		/* no active scrub */
 	EZFS_UNKNOWN
 };
 
@@ -224,7 +225,7 @@
 /*
  * Functions to manipulate pool and vdev state
  */
-extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t);
+extern int zpool_scan(zpool_handle_t *, pool_scan_func_t);
 extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
 
 extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
@@ -354,7 +355,7 @@
  */
 struct zfs_cmd;
 
-extern const char *hist_event_table[LOG_END];
+extern const char *zfs_history_event_names[LOG_END];
 
 extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
     boolean_t verbose);
@@ -526,8 +527,9 @@
 
 typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
 
-extern int zfs_send(zfs_handle_t *, const char *, const char *,
-    sendflags_t, int, snapfilter_cb_t, void *);
+extern int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
+    sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
+    void *cb_arg, nvlist_t **debugnvp);
 
 extern int zfs_promote(zfs_handle_t *);
 extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,

--- a/usr/src/lib/libzfs/common/libzfs_pool.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <ctype.h>
@@ -43,50 +42,6 @@
 #include "libzfs_impl.h"
 #include "zfs_comutil.h"
 
-const char *hist_event_table[LOG_END] = {
-	"invalid event",
-	"pool create",
-	"vdev add",
-	"pool remove",
-	"pool destroy",
-	"pool export",
-	"pool import",
-	"vdev attach",
-	"vdev replace",
-	"vdev detach",
-	"vdev online",
-	"vdev offline",
-	"vdev upgrade",
-	"pool clear",
-	"pool scrub",
-	"pool property set",
-	"create",
-	"clone",
-	"destroy",
-	"destroy_begin_sync",
-	"inherit",
-	"property set",
-	"quota set",
-	"permission update",
-	"permission remove",
-	"permission who remove",
-	"promote",
-	"receive",
-	"rename",
-	"reservation set",
-	"replay_inc_sync",
-	"replay_full_sync",
-	"rollback",
-	"snapshot",
-	"filesystem version upgrade",
-	"refquota set",
-	"refreservation set",
-	"pool scrub done",
-	"user hold",
-	"user release",
-	"pool split",
-};
-
 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
 
 #if defined(__i386) || defined(__amd64)
@@ -334,7 +289,8 @@
 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 			verify(nvlist_lookup_uint64_array(nvroot,
-			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
+			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
+			    == 0);
 
 			(void) strlcpy(buf, zpool_state_to_name(intval,
 			    vs->vs_aux), len);
@@ -1558,28 +1514,51 @@
 }
 
 /*
- * Scrub the pool.
+ * Scan the pool.
  */
 int
-zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
+zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	zc.zc_cookie = type;
-
-	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
+	zc.zc_cookie = func;
+
+	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
+	    (errno == ENOENT && func != POOL_SCAN_NONE))
 		return (0);
 
-	(void) snprintf(msg, sizeof (msg),
-	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
-
-	if (errno == EBUSY)
-		return (zfs_error(hdl, EZFS_RESILVERING, msg));
-	else
+	if (func == POOL_SCAN_SCRUB) {
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
+	} else if (func == POOL_SCAN_NONE) {
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
+		    zc.zc_name);
+	} else {
+		assert(!"unexpected result");
+	}
+
+	if (errno == EBUSY) {
+		nvlist_t *nvroot;
+		pool_scan_stat_t *ps = NULL;
+		uint_t psc;
+
+		verify(nvlist_lookup_nvlist(zhp->zpool_config,
+		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+		(void) nvlist_lookup_uint64_array(nvroot,
+		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
+		if (ps && ps->pss_func == POOL_SCAN_SCRUB)
+			return (zfs_error(hdl, EZFS_SCRUBBING, msg));
+		else
+			return (zfs_error(hdl, EZFS_RESILVERING, msg));
+	} else if (errno == ENOENT) {
+		return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
+	} else {
 		return (zpool_standard_error(hdl, errno, msg));
+	}
 }
 
 /*
@@ -2987,7 +2966,7 @@
 		 * open a misbehaving device, which can have undesirable
 		 * effects.
 		 */
-		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 		    (uint64_t **)&vs, &vsc) != 0 ||
 		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
 		    zhp != NULL &&

--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Mon May 03 14:54:08 2010 -0700
@@ -852,6 +852,7 @@
 	avl_tree_t *fsavl;
 	snapfilter_cb_t *filter_cb;
 	void *filter_cb_arg;
+	nvlist_t *debugnv;
 } send_dump_data_t;
 
 /*
@@ -860,10 +861,11 @@
  */
 static int
 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
-    int outfd, boolean_t enoent_ok, boolean_t *got_enoent)
+    int outfd, boolean_t enoent_ok, boolean_t *got_enoent, nvlist_t *debugnv)
 {
 	zfs_cmd_t zc = { 0 };
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	nvlist_t *thisdbg;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
@@ -876,11 +878,24 @@
 
 	*got_enoent = B_FALSE;
 
+	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
+	if (fromsnap && fromsnap[0] != '\0') {
+		VERIFY(0 == nvlist_add_string(thisdbg,
+		    "fromsnap", fromsnap));
+	}
+
 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
 		char errbuf[1024];
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "warning: cannot send '%s'"), zhp->zfs_name);
 
+		VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
+		if (debugnv) {
+			VERIFY(0 == nvlist_add_nvlist(debugnv,
+			    zhp->zfs_name, thisdbg));
+		}
+		nvlist_free(thisdbg);
+
 		switch (errno) {
 
 		case EXDEV:
@@ -920,6 +935,10 @@
 		}
 	}
 
+	if (debugnv)
+		VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
+	nvlist_free(thisdbg);
+
 	return (0);
 }
 
@@ -1000,7 +1019,7 @@
 
 	err = dump_ioctl(zhp, sdd->prevsnap,
 	    sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
-	    sdd->outfd, B_TRUE, &got_enoent);
+	    sdd->outfd, B_TRUE, &got_enoent, sdd->debugnv);
 
 	if (got_enoent)
 		err = 0;
@@ -1176,7 +1195,7 @@
 int
 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
     sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
-    void *cb_arg)
+    void *cb_arg, nvlist_t **debugnvp)
 {
 	char errbuf[1024];
 	send_dump_data_t sdd = { 0 };
@@ -1276,7 +1295,10 @@
 			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
 			err = nvlist_pack(hdrnv, &packbuf, &buflen,
 			    NV_ENCODE_XDR, 0);
-			nvlist_free(hdrnv);
+			if (debugnvp)
+				*debugnvp = hdrnv;
+			else
+				nvlist_free(hdrnv);
 			if (err) {
 				fsavl_destroy(fsavl);
 				nvlist_free(fss);
@@ -1351,6 +1373,8 @@
 	sdd.verbose = flags.verbose;
 	sdd.filter_cb = filter_func;
 	sdd.filter_cb_arg = cb_arg;
+	if (debugnvp)
+		sdd.debugnv = *debugnvp;
 	err = dump_filesystems(zhp, &sdd);
 	fsavl_destroy(fsavl);
 	nvlist_free(fss);

--- a/usr/src/lib/libzfs/common/libzfs_status.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_status.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -138,7 +137,7 @@
 			if (find_vdev_problem(child[c], func))
 				return (B_TRUE);
 	} else {
-		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
+		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
 		    (uint64_t **)&vs, &c) == 0);
 
 		if (func(vs->vs_state, vs->vs_aux,
@@ -173,7 +172,8 @@
 {
 	nvlist_t *nvroot;
 	vdev_stat_t *vs;
-	uint_t vsc;
+	pool_scan_stat_t *ps = NULL;
+	uint_t vsc, psc;
 	uint64_t nerr;
 	uint64_t version;
 	uint64_t stateval;
@@ -184,15 +184,24 @@
 	    &version) == 0);
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
-	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
+	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &vsc) == 0);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 	    &stateval) == 0);
-	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
+
+	/*
+	 * Currently resilvering a vdev
+	 */
+	(void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
+	    (uint64_t **)&ps, &psc);
+	if (ps && ps->pss_func == POOL_SCAN_RESILVER &&
+	    ps->pss_state == DSS_SCANNING)
+		return (ZPOOL_STATUS_RESILVERING);
 
 	/*
 	 * Pool last accessed by another system.
 	 */
+	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
 	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
 	    stateval == POOL_STATE_ACTIVE)
 		return (ZPOOL_STATUS_HOSTID_MISMATCH);
@@ -289,12 +298,6 @@
 		return (ZPOOL_STATUS_REMOVED_DEV);
 
 	/*
-	 * Currently resilvering
-	 */
-	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
-		return (ZPOOL_STATUS_RESILVERING);
-
-	/*
 	 * Outdated, but usable, version
 	 */
 	if (version < SPA_VERSION)

--- a/usr/src/lib/libzfs/common/libzfs_util.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_util.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -215,6 +214,11 @@
 	case EZFS_POSTSPLIT_ONLINE:
 		return (dgettext(TEXT_DOMAIN, "disk was split from this pool "
 		    "into a new one"));
+	case EZFS_SCRUBBING:
+		return (dgettext(TEXT_DOMAIN, "currently scrubbing; "
+		    "use 'zpool scrub -s' to cancel current scrub"));
+	case EZFS_NO_SCRUB:
+		return (dgettext(TEXT_DOMAIN, "there is no active scrub"));
 	case EZFS_UNKNOWN:
 		return (dgettext(TEXT_DOMAIN, "unknown error"));
 	default:

--- a/usr/src/lib/libzfs/common/mapfile-vers	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/lib/libzfs/common/mapfile-vers	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 #
@@ -45,7 +44,6 @@
 	fletcher_4_byteswap;
 	fletcher_4_incremental_native;
 	fletcher_4_incremental_byteswap;
-	hist_event_table;
 	libzfs_errno;
 	libzfs_error_action;
 	libzfs_error_description;
@@ -73,6 +71,7 @@
 	zfs_get_pool_handle;
 	zfs_get_user_props;
 	zfs_get_type;
+	zfs_history_event_names;
 	zfs_hold;
 	zfs_hold_range;
 	zfs_is_mounted;
@@ -195,7 +194,7 @@
 	zpool_prop_values;
 	zpool_read_label;
 	zpool_refresh_stats;
-	zpool_scrub;
+	zpool_scan;
 	zpool_search_import;
 	zpool_set_history_str;
 	zpool_set_prop;

--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include "libzfs_jni_util.h"
@@ -1055,7 +1054,7 @@
 	vdev_stat_t *vs;
 
 	int result = nvlist_lookup_uint64_array(
-	    vdev, ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &c);
+	    vdev, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c);
 	if (result != 0) {
 		zjni_throw_exception(env,
 		    "could not retrieve virtual device statistics");

--- a/usr/src/lib/libzpool/common/util.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/lib/libzpool/common/util.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <assert.h>
@@ -90,7 +89,7 @@
 		if (is_log)
 			prefix = "log ";
 
-		if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
+		if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 		    (uint64_t **)&vs, &c) != 0)
 			vs = &v0;

--- a/usr/src/uts/common/Makefile.files	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/Makefile.files	Mon May 03 14:54:08 2010 -0700
@@ -1343,7 +1343,7 @@
 	dmu_zfetch.o		\
 	dsl_deleg.o		\
 	dsl_prop.o		\
-	dsl_scrub.o		\
+	dsl_scan.o		\
 	gzip.o			\
 	lzjb.o			\
 	metaslab.o		\
@@ -1372,6 +1372,7 @@
 	zap_leaf.o		\
 	zap_micro.o		\
 	zfs_byteswap.o		\
+	zfs_debug.o		\
 	zfs_fm.o		\
 	zfs_fuid.o		\
 	zfs_sa.o		\

--- a/usr/src/uts/common/fs/zfs/arc.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/arc.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -437,6 +436,7 @@
 
 	kmutex_t		b_freeze_lock;
 	zio_cksum_t		*b_freeze_cksum;
+	void			*b_thawed;
 
 	arc_buf_hdr_t		*b_hash_next;
 	arc_buf_t		*b_buf;
@@ -545,8 +545,8 @@
 	(buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
 #define	BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
 #define	BUF_HASH_LOCK(idx)	(&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
-#define	HDR_LOCK(buf) \
-	(BUF_HASH_LOCK(BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth)))
+#define	HDR_LOCK(hdr) \
+	(BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
 
 uint64_t zfs_crc64_table[256];
 
@@ -664,6 +664,15 @@
 	((buf)->b_dva.dva_word[1] == (dva)->dva_word[1]) &&	\
 	((buf)->b_birth == birth) && ((buf)->b_spa == spa)
 
+static void
+buf_discard_identity(arc_buf_hdr_t *hdr)
+{
+	hdr->b_dva.dva_word[0] = 0;
+	hdr->b_dva.dva_word[1] = 0;
+	hdr->b_birth = 0;
+	hdr->b_cksum0 = 0;
+}
+
 static arc_buf_hdr_t *
 buf_hash_find(uint64_t spa, const dva_t *dva, uint64_t birth, kmutex_t **lockp)
 {
@@ -797,7 +806,8 @@
 	arc_buf_t *buf = vbuf;
 
 	bzero(buf, sizeof (arc_buf_t));
-	rw_init(&buf->b_lock, NULL, RW_DEFAULT, NULL);
+	mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL);
+	rw_init(&buf->b_data_lock, NULL, RW_DEFAULT, NULL);
 	arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS);
 
 	return (0);
@@ -826,7 +836,8 @@
 {
 	arc_buf_t *buf = vbuf;
 
-	rw_destroy(&buf->b_lock);
+	mutex_destroy(&buf->b_evict_lock);
+	rw_destroy(&buf->b_data_lock);
 	arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS);
 }
 
@@ -941,6 +952,11 @@
 void
 arc_buf_thaw(arc_buf_t *buf)
 {
+	kmutex_t *hash_lock;
+
+	hash_lock = HDR_LOCK(buf->b_hdr);
+	mutex_enter(hash_lock);
+
 	if (zfs_flags & ZFS_DEBUG_MODIFY) {
 		if (buf->b_hdr->b_state != arc_anon)
 			panic("modifying non-anon buffer!");
@@ -954,18 +970,32 @@
 		kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t));
 		buf->b_hdr->b_freeze_cksum = NULL;
 	}
+
+	if (zfs_flags & ZFS_DEBUG_MODIFY) {
+		if (buf->b_hdr->b_thawed)
+			kmem_free(buf->b_hdr->b_thawed, 1);
+		buf->b_hdr->b_thawed = kmem_alloc(1, KM_SLEEP);
+	}
+
 	mutex_exit(&buf->b_hdr->b_freeze_lock);
+	mutex_exit(hash_lock);
 }
 
 void
 arc_buf_freeze(arc_buf_t *buf)
 {
+	kmutex_t *hash_lock;
+
 	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
 		return;
 
+	hash_lock = HDR_LOCK(buf->b_hdr);
+	mutex_enter(hash_lock);
+
 	ASSERT(buf->b_hdr->b_freeze_cksum != NULL ||
 	    buf->b_hdr->b_state == arc_anon);
 	arc_cksum_compute(buf, B_FALSE);
+	mutex_exit(hash_lock);
 }
 
 static void
@@ -1037,7 +1067,6 @@
 	ASSERT(new_state != old_state);
 	ASSERT(refcnt == 0 || ab->b_datacnt > 0);
 	ASSERT(ab->b_datacnt == 0 || !GHOST_STATE(new_state));
-	ASSERT(ab->b_datacnt <= 1 || new_state != arc_anon);
 	ASSERT(ab->b_datacnt <= 1 || old_state != arc_anon);
 
 	from_delta = to_delta = ab->b_datacnt * ab->b_size;
@@ -1059,7 +1088,7 @@
 
 			/*
 			 * If prefetching out of the ghost cache,
-			 * we will have a non-null datacnt.
+			 * we will have a non-zero datacnt.
 			 */
 			if (GHOST_STATE(old_state) && ab->b_datacnt == 0) {
 				/* ghost elements have a ghost size */
@@ -1095,9 +1124,8 @@
 	}
 
 	ASSERT(!BUF_EMPTY(ab));
-	if (new_state == arc_anon) {
+	if (new_state == arc_anon && HDR_IN_HASH_TABLE(ab))
 		buf_hash_remove(ab);
-	}
 
 	/* adjust state sizes */
 	if (to_delta)
@@ -1254,7 +1282,6 @@
 {
 	arc_buf_hdr_t *hdr;
 
-	rw_enter(&buf->b_lock, RW_WRITER);
 	ASSERT(buf->b_data != NULL);
 	hdr = buf->b_hdr;
 	(void) refcount_add(&hdr->b_refcnt, arc_onloan_tag);
@@ -1263,7 +1290,6 @@
 	buf->b_private = NULL;
 
 	atomic_add_64(&arc_loaned_bytes, hdr->b_size);
-	rw_exit(&buf->b_lock);
 }
 
 static arc_buf_t *
@@ -1299,16 +1325,16 @@
 	 * must verify b_data != NULL to know if the add_ref
 	 * was successful.
 	 */
-	rw_enter(&buf->b_lock, RW_READER);
+	mutex_enter(&buf->b_evict_lock);
 	if (buf->b_data == NULL) {
-		rw_exit(&buf->b_lock);
+		mutex_exit(&buf->b_evict_lock);
 		return;
 	}
+	hash_lock = HDR_LOCK(buf->b_hdr);
+	mutex_enter(hash_lock);
 	hdr = buf->b_hdr;
-	ASSERT(hdr != NULL);
-	hash_lock = HDR_LOCK(hdr);
-	mutex_enter(hash_lock);
-	rw_exit(&buf->b_lock);
+	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
+	mutex_exit(&buf->b_evict_lock);
 
 	ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
 	add_reference(hdr, hash_lock, tag);
@@ -1394,6 +1420,7 @@
 	for (bufp = &buf->b_hdr->b_buf; *bufp != buf; bufp = &(*bufp)->b_next)
 		continue;
 	*bufp = buf->b_next;
+	buf->b_next = NULL;
 
 	ASSERT(buf->b_efunc == NULL);
 
@@ -1442,23 +1469,21 @@
 
 	if (!BUF_EMPTY(hdr)) {
 		ASSERT(!HDR_IN_HASH_TABLE(hdr));
-		bzero(&hdr->b_dva, sizeof (dva_t));
-		hdr->b_birth = 0;
-		hdr->b_cksum0 = 0;
+		buf_discard_identity(hdr);
 	}
 	while (hdr->b_buf) {
 		arc_buf_t *buf = hdr->b_buf;
 
 		if (buf->b_efunc) {
 			mutex_enter(&arc_eviction_mtx);
-			rw_enter(&buf->b_lock, RW_WRITER);
+			mutex_enter(&buf->b_evict_lock);
 			ASSERT(buf->b_hdr != NULL);
 			arc_buf_destroy(hdr->b_buf, FALSE, FALSE);
 			hdr->b_buf = buf->b_next;
 			buf->b_hdr = &arc_eviction_hdr;
 			buf->b_next = arc_eviction_list;
 			arc_eviction_list = buf;
-			rw_exit(&buf->b_lock);
+			mutex_exit(&buf->b_evict_lock);
 			mutex_exit(&arc_eviction_mtx);
 		} else {
 			arc_buf_destroy(hdr->b_buf, FALSE, TRUE);
@@ -1468,6 +1493,10 @@
 		kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
 		hdr->b_freeze_cksum = NULL;
 	}
+	if (hdr->b_thawed) {
+		kmem_free(hdr->b_thawed, 1);
+		hdr->b_thawed = NULL;
+	}
 
 	ASSERT(!list_link_active(&hdr->b_arc_node));
 	ASSERT3P(hdr->b_hash_next, ==, NULL);
@@ -1488,6 +1517,9 @@
 		kmutex_t *hash_lock = HDR_LOCK(hdr);
 
 		mutex_enter(hash_lock);
+		hdr = buf->b_hdr;
+		ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
+
 		(void) remove_reference(hdr, hash_lock, tag);
 		if (hdr->b_datacnt > 1) {
 			arc_buf_destroy(buf, FALSE, TRUE);
@@ -1512,12 +1544,10 @@
 		if (destroy_hdr)
 			arc_hdr_destroy(hdr);
 	} else {
-		if (remove_reference(hdr, NULL, tag) > 0) {
-			ASSERT(HDR_IO_ERROR(hdr));
+		if (remove_reference(hdr, NULL, tag) > 0)
 			arc_buf_destroy(buf, FALSE, TRUE);
-		} else {
+		else
 			arc_hdr_destroy(hdr);
-		}
 	}
 }
 
@@ -1535,6 +1565,8 @@
 	}
 
 	mutex_enter(hash_lock);
+	hdr = buf->b_hdr;
+	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
 	ASSERT(hdr->b_state != arc_anon);
 	ASSERT(buf->b_data != NULL);
 
@@ -1613,7 +1645,7 @@
 			ASSERT(ab->b_datacnt > 0);
 			while (ab->b_buf) {
 				arc_buf_t *buf = ab->b_buf;
-				if (!rw_tryenter(&buf->b_lock, RW_WRITER)) {
+				if (!mutex_tryenter(&buf->b_evict_lock)) {
 					missed += 1;
 					break;
 				}
@@ -1635,9 +1667,9 @@
 					buf->b_next = arc_eviction_list;
 					arc_eviction_list = buf;
 					mutex_exit(&arc_eviction_mtx);
-					rw_exit(&buf->b_lock);
+					mutex_exit(&buf->b_evict_lock);
 				} else {
-					rw_exit(&buf->b_lock);
+					mutex_exit(&buf->b_evict_lock);
 					arc_buf_destroy(buf,
 					    buf->b_data == stolen, TRUE);
 				}
@@ -1854,9 +1886,9 @@
 	while (arc_eviction_list != NULL) {
 		arc_buf_t *buf = arc_eviction_list;
 		arc_eviction_list = buf->b_next;
-		rw_enter(&buf->b_lock, RW_WRITER);
+		mutex_enter(&buf->b_evict_lock);
 		buf->b_hdr = NULL;
-		rw_exit(&buf->b_lock);
+		mutex_exit(&buf->b_evict_lock);
 		mutex_exit(&arc_eviction_mtx);
 
 		if (buf->b_efunc != NULL)
@@ -2438,7 +2470,8 @@
 void
 arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
 {
-	bcopy(buf->b_data, arg, buf->b_hdr->b_size);
+	if (zio == NULL || zio->io_error == 0)
+		bcopy(buf->b_data, arg, buf->b_hdr->b_size);
 	VERIFY(arc_buf_remove_ref(buf, arg) == 1);
 }
 
@@ -2452,6 +2485,7 @@
 		*bufp = NULL;
 	} else {
 		*bufp = buf;
+		ASSERT(buf->b_data);
 	}
 }
 
@@ -2606,13 +2640,22 @@
 {
 	int err;
 
+	if (pbuf == NULL) {
+		/*
+		 * XXX This happens from traverse callback funcs, for
+		 * the objset_phys_t block.
+		 */
+		return (arc_read_nolock(pio, spa, bp, done, private, priority,
+		    zio_flags, arc_flags, zb));
+	}
+
 	ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt));
 	ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size);
-	rw_enter(&pbuf->b_lock, RW_READER);
+	rw_enter(&pbuf->b_data_lock, RW_READER);
 
 	err = arc_read_nolock(pio, spa, bp, done, private, priority,
 	    zio_flags, arc_flags, zb);
-	rw_exit(&pbuf->b_lock);
+	rw_exit(&pbuf->b_data_lock);
 
 	return (err);
 }
@@ -2721,9 +2764,7 @@
 			if (exists) {
 				/* somebody beat us to the hash insert */
 				mutex_exit(hash_lock);
-				bzero(&hdr->b_dva, sizeof (dva_t));
-				hdr->b_birth = 0;
-				hdr->b_cksum0 = 0;
+				buf_discard_identity(hdr);
 				(void) arc_buf_remove_ref(buf, private);
 				goto top; /* restart the IO request */
 			}
@@ -2901,14 +2942,14 @@
 	kmutex_t *hash_lock;
 	arc_buf_t **bufp;
 
-	rw_enter(&buf->b_lock, RW_WRITER);
+	mutex_enter(&buf->b_evict_lock);
 	hdr = buf->b_hdr;
 	if (hdr == NULL) {
 		/*
 		 * We are in arc_do_user_evicts().
 		 */
 		ASSERT(buf->b_data == NULL);
-		rw_exit(&buf->b_lock);
+		mutex_exit(&buf->b_evict_lock);
 		return (0);
 	} else if (buf->b_data == NULL) {
 		arc_buf_t copy = *buf; /* structure assignment */
@@ -2917,14 +2958,15 @@
 		 * but let arc_do_user_evicts() do the reaping.
 		 */
 		buf->b_efunc = NULL;
-		rw_exit(&buf->b_lock);
+		mutex_exit(&buf->b_evict_lock);
 		VERIFY(copy.b_efunc(&copy) == 0);
 		return (1);
 	}
 	hash_lock = HDR_LOCK(hdr);
 	mutex_enter(hash_lock);
-
-	ASSERT(buf->b_hdr == hdr);
+	hdr = buf->b_hdr;
+	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
+
 	ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt);
 	ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
 
@@ -2943,6 +2985,7 @@
 		arc_state_t *old_state = hdr->b_state;
 		arc_state_t *evicted_state;
 
+		ASSERT(hdr->b_buf == NULL);
 		ASSERT(refcount_is_zero(&hdr->b_refcnt));
 
 		evicted_state =
@@ -2960,12 +3003,13 @@
 		mutex_exit(&old_state->arcs_mtx);
 	}
 	mutex_exit(hash_lock);
-	rw_exit(&buf->b_lock);
+	mutex_exit(&buf->b_evict_lock);
 
 	VERIFY(buf->b_efunc(buf) == 0);
 	buf->b_efunc = NULL;
 	buf->b_private = NULL;
 	buf->b_hdr = NULL;
+	buf->b_next = NULL;
 	kmem_cache_free(buf_cache, buf);
 	return (1);
 }
@@ -2980,12 +3024,17 @@
 arc_release(arc_buf_t *buf, void *tag)
 {
 	arc_buf_hdr_t *hdr;
-	kmutex_t *hash_lock;
+	kmutex_t *hash_lock = NULL;
 	l2arc_buf_hdr_t *l2hdr;
 	uint64_t buf_size;
-	boolean_t released = B_FALSE;
-
-	rw_enter(&buf->b_lock, RW_WRITER);
+
+	/*
+	 * It would be nice to assert that if it's DMU metadata (level >
+	 * 0 || it's the dnode file), then it must be syncing context.
+	 * But we don't know that information at this level.
+	 */
+
+	mutex_enter(&buf->b_evict_lock);
 	hdr = buf->b_hdr;
 
 	/* this buffer is not on any list */
@@ -2993,15 +3042,12 @@
 
 	if (hdr->b_state == arc_anon) {
 		/* this buffer is already released */
-		ASSERT3U(refcount_count(&hdr->b_refcnt), ==, 1);
-		ASSERT(BUF_EMPTY(hdr));
 		ASSERT(buf->b_efunc == NULL);
-		arc_buf_thaw(buf);
-		rw_exit(&buf->b_lock);
-		released = B_TRUE;
 	} else {
 		hash_lock = HDR_LOCK(hdr);
 		mutex_enter(hash_lock);
+		hdr = buf->b_hdr;
+		ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
 	}
 
 	l2hdr = hdr->b_l2hdr;
@@ -3011,9 +3057,6 @@
 		buf_size = hdr->b_size;
 	}
 
-	if (released)
-		goto out;
-
 	/*
 	 * Do we have more than one buf?
 	 */
@@ -3027,14 +3070,14 @@
 
 		ASSERT(hdr->b_buf != buf || buf->b_next != NULL);
 		/*
-		 * Pull the data off of this buf and attach it to
-		 * a new anonymous buf.
+		 * Pull the data off of this hdr and attach it to
+		 * a new anonymous hdr.
 		 */
 		(void) remove_reference(hdr, hash_lock, tag);
 		bufp = &hdr->b_buf;
 		while (*bufp != buf)
 			bufp = &(*bufp)->b_next;
-		*bufp = (*bufp)->b_next;
+		*bufp = buf->b_next;
 		buf->b_next = NULL;
 
 		ASSERT3U(hdr->b_state->arcs_size, >=, hdr->b_size);
@@ -3062,26 +3105,25 @@
 		nhdr->b_freeze_cksum = NULL;
 		(void) refcount_add(&nhdr->b_refcnt, tag);
 		buf->b_hdr = nhdr;
-		rw_exit(&buf->b_lock);
+		mutex_exit(&buf->b_evict_lock);
 		atomic_add_64(&arc_anon->arcs_size, blksz);
 	} else {
-		rw_exit(&buf->b_lock);
+		mutex_exit(&buf->b_evict_lock);
 		ASSERT(refcount_count(&hdr->b_refcnt) == 1);
 		ASSERT(!list_link_active(&hdr->b_arc_node));
 		ASSERT(!HDR_IO_IN_PROGRESS(hdr));
-		arc_change_state(arc_anon, hdr, hash_lock);
+		if (hdr->b_state != arc_anon)
+			arc_change_state(arc_anon, hdr, hash_lock);
 		hdr->b_arc_access = 0;
-		mutex_exit(hash_lock);
-
-		bzero(&hdr->b_dva, sizeof (dva_t));
-		hdr->b_birth = 0;
-		hdr->b_cksum0 = 0;
+		if (hash_lock)
+			mutex_exit(hash_lock);
+
+		buf_discard_identity(hdr);
 		arc_buf_thaw(buf);
 	}
 	buf->b_efunc = NULL;
 	buf->b_private = NULL;
 
-out:
 	if (l2hdr) {
 		list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
 		kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
@@ -3090,14 +3132,27 @@
 	}
 }
 
+/*
+ * Release this buffer.  If it does not match the provided BP, fill it
+ * with that block's contents.
+ */
+/* ARGSUSED */
+int
+arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
+    zbookmark_t *zb)
+{
+	arc_release(buf, tag);
+	return (0);
+}
+
 int
 arc_released(arc_buf_t *buf)
 {
 	int released;
 
-	rw_enter(&buf->b_lock, RW_READER);
+	mutex_enter(&buf->b_evict_lock);
 	released = (buf->b_data != NULL && buf->b_hdr->b_state == arc_anon);
-	rw_exit(&buf->b_lock);
+	mutex_exit(&buf->b_evict_lock);
 	return (released);
 }
 
@@ -3106,9 +3161,9 @@
 {
 	int callback;
 
-	rw_enter(&buf->b_lock, RW_READER);
+	mutex_enter(&buf->b_evict_lock);
 	callback = (buf->b_efunc != NULL);
-	rw_exit(&buf->b_lock);
+	mutex_exit(&buf->b_evict_lock);
 	return (callback);
 }
 
@@ -3118,9 +3173,9 @@
 {
 	int referenced;
 
-	rw_enter(&buf->b_lock, RW_READER);
+	mutex_enter(&buf->b_evict_lock);
 	referenced = (refcount_count(&buf->b_hdr->b_refcnt));
-	rw_exit(&buf->b_lock);
+	mutex_exit(&buf->b_evict_lock);
 	return (referenced);
 }
 #endif
@@ -3173,8 +3228,8 @@
 	/*
 	 * If the block to be written was all-zero, we may have
 	 * compressed it away.  In this case no write was performed
-	 * so there will be no dva/birth-date/checksum.  The buffer
-	 * must therefor remain anonymous (and uncached).
+	 * so there will be no dva/birth/checksum.  The buffer must
+	 * therefore remain anonymous (and uncached).
 	 */
 	if (!BUF_EMPTY(hdr)) {
 		arc_buf_hdr_t *exists;
@@ -3278,9 +3333,7 @@
 			if (HDR_IN_HASH_TABLE(ab))
 				buf_hash_remove(ab);
 			ab->b_arc_access = 0;
-			bzero(&ab->b_dva, sizeof (dva_t));
-			ab->b_birth = 0;
-			ab->b_cksum0 = 0;
+			buf_discard_identity(ab);
 			ab->b_buf->b_efunc = NULL;
 			ab->b_buf->b_private = NULL;
 			mutex_exit(hash_lock);
@@ -3974,11 +4027,11 @@
 	ASSERT(cb != NULL);
 	buf = cb->l2rcb_buf;
 	ASSERT(buf != NULL);
+
+	hash_lock = HDR_LOCK(buf->b_hdr);
+	mutex_enter(hash_lock);
 	hdr = buf->b_hdr;
-	ASSERT(hdr != NULL);
-
-	hash_lock = HDR_LOCK(hdr);
-	mutex_enter(hash_lock);
+	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
 
 	/*
 	 * Check this survived the L2ARC journey.

--- a/usr/src/uts/common/fs/zfs/bplist.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/bplist.c	Mon May 03 14:54:08 2010 -0700
@@ -175,23 +175,26 @@
 		return (err);
 	}
 
-	if (*itorp >= bpl->bpl_phys->bpl_entries) {
-		mutex_exit(&bpl->bpl_lock);
-		return (ENOENT);
-	}
+	do {
+		if (*itorp >= bpl->bpl_phys->bpl_entries) {
+			mutex_exit(&bpl->bpl_lock);
+			return (ENOENT);
+		}
 
-	blk = *itorp >> bpl->bpl_bpshift;
-	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
+		blk = *itorp >> bpl->bpl_bpshift;
+		off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
 
-	err = bplist_cache(bpl, blk);
-	if (err) {
-		mutex_exit(&bpl->bpl_lock);
-		return (err);
-	}
+		err = bplist_cache(bpl, blk);
+		if (err) {
+			mutex_exit(&bpl->bpl_lock);
+			return (err);
+		}
 
-	bparray = bpl->bpl_cached_dbuf->db_data;
-	*bp = bparray[off];
-	(*itorp)++;
+		bparray = bpl->bpl_cached_dbuf->db_data;
+		*bp = bparray[off];
+		(*itorp)++;
+	} while (bp->blk_birth == 0);
+
 	mutex_exit(&bpl->bpl_lock);
 	return (0);
 }
@@ -206,8 +209,10 @@
 	ASSERT(!BP_IS_HOLE(bp));
 	mutex_enter(&bpl->bpl_lock);
 	err = bplist_hold(bpl);
-	if (err)
+	if (err) {
+		mutex_exit(&bpl->bpl_lock);
 		return (err);
+	}
 
 	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
 	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);

--- a/usr/src/uts/common/fs/zfs/dbuf.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dbuf.c	Mon May 03 14:54:08 2010 -0700
@@ -363,6 +363,7 @@
 		}
 	}
 	if ((db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr)) &&
+	    (db->db_buf == NULL || db->db_buf->b_data) &&
 	    db->db.db_data && db->db_blkid != DMU_BONUS_BLKID &&
 	    db->db_state != DB_FILL && !dn->dn_free_txg) {
 		/*
@@ -477,8 +478,7 @@
 		db->db_state = DB_UNCACHED;
 	}
 	cv_broadcast(&db->db_changed);
-	mutex_exit(&db->db_mtx);
-	dbuf_rele(db, NULL);
+	dbuf_rele_and_unlock(db, NULL);
 }
 
 static void
@@ -549,7 +549,7 @@
 	else
 		pbuf = db->db_objset->os_phys_buf;
 
-	(void) arc_read(zio, dn->dn_objset->os_spa, db->db_blkptr, pbuf,
+	(void) dsl_read(zio, dn->dn_objset->os_spa, db->db_blkptr, pbuf,
 	    dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
 	    (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
 	    &aflags, &zb);
@@ -727,7 +727,7 @@
 
 	/* free this block */
 	if (!BP_IS_HOLE(bp))
-		dsl_free(spa_get_dsl(db->db_dnode->dn_objset->os_spa), txg, bp);
+		zio_free(db->db_dnode->dn_objset->os_spa, txg, bp);
 
 	dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
 	/*
@@ -921,6 +921,26 @@
 	dnode_willuse_space(db->db_dnode, size-osize, tx);
 }
 
+void
+dbuf_release_bp(dmu_buf_impl_t *db)
+{
+	objset_t *os = db->db_dnode->dn_objset;
+	zbookmark_t zb;
+
+	ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
+	ASSERT(arc_released(os->os_phys_buf) ||
+	    list_link_active(&os->os_dsl_dataset->ds_synced_link));
+	ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf));
+
+	zb.zb_objset = os->os_dsl_dataset ?
+	    os->os_dsl_dataset->ds_object : 0;
+	zb.zb_object = db->db.db_object;
+	zb.zb_level = db->db_level;
+	zb.zb_blkid = db->db_blkid;
+	(void) arc_release_bp(db->db_buf, db,
+	    db->db_blkptr, os->os_spa, &zb);
+}
+
 dbuf_dirty_record_t *
 dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 {
@@ -1717,7 +1737,7 @@
 			else
 				pbuf = dn->dn_objset->os_phys_buf;
 
-			(void) arc_read(NULL, dn->dn_objset->os_spa,
+			(void) dsl_read(NULL, dn->dn_objset->os_spa,
 			    bp, pbuf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
 			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
 			    &aflags, &zb);
@@ -2463,7 +2483,7 @@
 			if (BP_IS_HOLE(db->db_blkptr)) {
 				arc_buf_thaw(data);
 			} else {
-				arc_release(data, db);
+				dbuf_release_bp(db);
 			}
 		}
 	}

--- a/usr/src/uts/common/fs/zfs/ddt.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/ddt.c	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -35,6 +34,7 @@
 #include <sys/dsl_pool.h>
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
+#include <sys/dsl_scan.h>
 
 static const ddt_ops_t *ddt_ops[DDT_TYPES] = {
 	&ddt_zap_ops,
@@ -160,7 +160,7 @@
 	    ddt->ddt_object[type][class], dde));
 }
 
-static int
+int
 ddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
     ddt_entry_t *dde, dmu_tx_t *tx)
 {
@@ -245,12 +245,13 @@
 		ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth);
 
 	bp->blk_cksum = ddk->ddk_cksum;
+	bp->blk_fill = 1;
 
 	BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk));
 	BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk));
 	BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk));
 	BP_SET_CHECKSUM(bp, checksum);
-	BP_SET_TYPE(bp, DMU_OT_NONE);
+	BP_SET_TYPE(bp, DMU_OT_DEDUP);
 	BP_SET_LEVEL(bp, 0);
 	BP_SET_DEDUP(bp, 0);
 	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
@@ -996,10 +997,17 @@
 			ddt_object_create(ddt, ntype, nclass, tx);
 		VERIFY(ddt_object_update(ddt, ntype, nclass, dde, tx) == 0);
 
-		if (dp->dp_scrub_func != SCRUB_FUNC_NONE &&
-		    oclass > nclass &&
-		    nclass <= dp->dp_scrub_ddt_class_max)
-			dsl_pool_scrub_ddt_entry(dp, ddt->ddt_checksum, dde);
+		/*
+		 * If the class changes, the order that we scan this bp
+		 * changes.  If it decreases, we could miss it, so
+		 * scan it right now.  (This covers both class changing
+		 * while we are doing ddt_walk(), and when we are
+		 * traversing.)
+		 */
+		if (nclass < oclass) {
+			dsl_scan_ddt_entry(dp->dp_scan,
+			    ddt->ddt_checksum, dde, tx);
+		}
 	}
 }
 
@@ -1013,7 +1021,6 @@
 	if (avl_numnodes(&ddt->ddt_tree) == 0)
 		return;
 
-	ASSERT(spa_sync_pass(spa) == 1);
 	ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP);
 
 	if (spa->spa_ddt_stat_object == 0) {
@@ -1081,6 +1088,8 @@
 					    ddb->ddb_type, ddb->ddb_class,
 					    &ddb->ddb_cursor, dde);
 				}
+				dde->dde_type = ddb->ddb_type;
+				dde->dde_class = ddb->ddb_class;
 				if (error == 0)
 					return (0);
 				if (error != ENOENT)

--- a/usr/src/uts/common/fs/zfs/dmu.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Mon May 03 14:54:08 2010 -0700
@@ -84,7 +84,7 @@
 	{	byteswap_uint8_array,	TRUE,	"FUID table"		},
 	{	byteswap_uint64_array,	TRUE,	"FUID table size"	},
 	{	zap_byteswap,		TRUE,	"DSL dataset next clones"},
-	{	zap_byteswap,		TRUE,	"scrub work queue"	},
+	{	zap_byteswap,		TRUE,	"scan work queue"	},
 	{	zap_byteswap,		TRUE,	"ZFS user/group used"	},
 	{	zap_byteswap,		TRUE,	"ZFS user/group quota"	},
 	{	zap_byteswap,		TRUE,	"snapshot refcount tags"},
@@ -93,7 +93,10 @@
 	{	byteswap_uint8_array,	TRUE,	"System attributes"	},
 	{	zap_byteswap,		TRUE,	"SA master node"	},
 	{	zap_byteswap,		TRUE,	"SA attr registration"	},
-	{	zap_byteswap,		TRUE,	"SA attr layouts"	}, };
+	{	zap_byteswap,		TRUE,	"SA attr layouts"	},
+	{	zap_byteswap,		TRUE,	"scan translations"	},
+	{	byteswap_uint8_array,	FALSE,	"deduplicated block"	},
+};
 
 int
 dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
@@ -1630,6 +1633,7 @@
 void
 dmu_init(void)
 {
+	zfs_dbgmsg_init();
 	dbuf_init();
 	dnode_init();
 	zfetch_init();
@@ -1649,4 +1653,5 @@
 	l2arc_fini();
 	xuio_stat_fini();
 	sa_cache_fini();
+	zfs_dbgmsg_fini();
 }

--- a/usr/src/uts/common/fs/zfs/dmu_objset.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c	Mon May 03 14:54:08 2010 -0700
@@ -259,11 +259,11 @@
 
 		dprintf_bp(os->os_rootbp, "reading %s", "");
 		/*
-		 * NB: when bprewrite scrub can change the bp,
+		 * XXX when bprewrite scrub can change the bp,
 		 * and this is called from dmu_objset_open_ds_os, the bp
 		 * could change, and we'll need a lock.
 		 */
-		err = arc_read_nolock(NULL, spa, os->os_rootbp,
+		err = dsl_read_nolock(NULL, spa, os->os_rootbp,
 		    arc_getbuf_func, &os->os_phys_buf,
 		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
 		if (err) {
@@ -628,6 +628,7 @@
 	const char *lastname;
 	dmu_objset_type_t type;
 	uint64_t flags;
+	cred_t *cr;
 };
 
 /*ARGSUSED*/
@@ -659,7 +660,7 @@
 }
 
 static void
-dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	struct oscarg *oa = arg2;
@@ -668,7 +669,7 @@
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	dsobj = dsl_dataset_create_sync(dd, oa->lastname,
-	    oa->clone_origin, oa->flags, cr, tx);
+	    oa->clone_origin, oa->flags, oa->cr, tx);
 
 	if (oa->clone_origin == NULL) {
 		dsl_dataset_t *ds;
@@ -684,12 +685,12 @@
 		    ds, bp, oa->type, tx);
 
 		if (oa->userfunc)
-			oa->userfunc(os, oa->userarg, cr, tx);
+			oa->userfunc(os, oa->userarg, oa->cr, tx);
 		dsl_dataset_rele(ds, FTAG);
 	}
 
-	spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa,
-	    tx, cr, "dataset = %llu", dsobj);
+	spa_history_log_internal(LOG_DS_CREATE, dd->dd_pool->dp_spa,
+	    tx, "dataset = %llu", dsobj);
 }
 
 int
@@ -715,6 +716,7 @@
 	oa.lastname = tail;
 	oa.type = type;
 	oa.flags = flags;
+	oa.cr = CRED();
 
 	err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
 	    dmu_objset_create_sync, pdd, &oa, 5);
@@ -742,6 +744,7 @@
 	oa.lastname = tail;
 	oa.clone_origin = clone_origin;
 	oa.flags = flags;
+	oa.cr = CRED();
 
 	err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
 	    dmu_objset_create_sync, pdd, &oa, 5);
@@ -795,19 +798,19 @@
 }
 
 static void
-snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	objset_t *os = arg1;
 	dsl_dataset_t *ds = os->os_dsl_dataset;
 	struct snaparg *sn = arg2;
 
-	dsl_dataset_snapshot_sync(ds, sn->snapname, cr, tx);
+	dsl_dataset_snapshot_sync(ds, sn->snapname, tx);
 
 	if (sn->props) {
 		dsl_props_arg_t pa;
 		pa.pa_props = sn->props;
 		pa.pa_source = ZPROP_SRC_LOCAL;
-		dsl_props_set_sync(ds->ds_prev, &pa, cr, tx);
+		dsl_props_set_sync(ds->ds_prev, &pa, tx);
 	}
 }
 
@@ -1016,11 +1019,11 @@
 	/*
 	 * Create the root block IO
 	 */
-	arc_release(os->os_phys_buf, &os->os_phys_buf);
-
 	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
 	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
 	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
+	VERIFY3U(0, ==, arc_release_bp(os->os_phys_buf, &os->os_phys_buf,
+	    os->os_rootbp, os->os_spa, &zb));
 
 	dmu_write_policy(os, NULL, 0, 0, &zp);
 
@@ -1082,7 +1085,7 @@
 	    !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
 }
 
-static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
+objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
 
 void
 dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
@@ -1649,7 +1652,7 @@
 			SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
 			    ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 
-			(void) arc_read_nolock(NULL, dsl_dataset_get_spa(ds),
+			(void) dsl_read_nolock(NULL, dsl_dataset_get_spa(ds),
 			    &ds->ds_phys->ds_bp, NULL, NULL,
 			    ZIO_PRIORITY_ASYNC_READ,
 			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,

--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Mon May 03 14:54:08 2010 -0700
@@ -301,7 +301,7 @@
 
 /* ARGSUSED */
 static int
-backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	struct backuparg *ba = arg;
@@ -330,7 +330,7 @@
 		uint32_t aflags = ARC_WAIT;
 		arc_buf_t *abuf;
 
-		if (arc_read_nolock(NULL, spa, bp,
+		if (dsl_read(NULL, spa, bp, pbuf,
 		    arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
 		    ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
 			return (EIO);
@@ -361,7 +361,7 @@
 		arc_buf_t *abuf;
 		int blksz = BP_GET_LSIZE(bp);
 
-		if (arc_read_nolock(NULL, spa, bp,
+		if (dsl_read(NULL, spa, bp, pbuf,
 		    arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
 		    ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
 			return (EIO);
@@ -504,6 +504,7 @@
 	uint64_t dsflags;
 	char clonelastname[MAXNAMELEN];
 	dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */
+	cred_t *cr;
 };
 
 /* ARGSUSED */
@@ -536,7 +537,7 @@
 }
 
 static void
-recv_new_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	struct recvbeginsyncarg *rbsa = arg2;
@@ -545,7 +546,7 @@
 
 	/* Create and open new dataset. */
 	dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
-	    rbsa->origin, flags, cr, tx);
+	    rbsa->origin, flags, rbsa->cr, tx);
 	VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj,
 	    B_TRUE, dmu_recv_tag, &rbsa->ds));
 
@@ -554,8 +555,8 @@
 		    rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx);
 	}
 
-	spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC,
-	    dd->dd_pool->dp_spa, tx, cr, "dataset = %lld", dsobj);
+	spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC,
+	    dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj);
 }
 
 /* ARGSUSED */
@@ -630,7 +631,7 @@
 
 /* ARGSUSED */
 static void
-recv_existing_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ohds = arg1;
 	struct recvbeginsyncarg *rbsa = arg2;
@@ -641,7 +642,7 @@
 
 	/* create and open the temporary clone */
 	dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname,
-	    ohds->ds_prev, flags, cr, tx);
+	    ohds->ds_prev, flags, rbsa->cr, tx);
 	VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds));
 
 	/*
@@ -655,8 +656,8 @@
 
 	rbsa->ds = cds;
 
-	spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC,
-	    dp->dp_spa, tx, cr, "dataset = %lld", dsobj);
+	spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC,
+	    dp->dp_spa, tx, "dataset = %lld", dsobj);
 }
 
 
@@ -701,6 +702,7 @@
 	rbsa.type = drrb->drr_type;
 	rbsa.tag = FTAG;
 	rbsa.dsflags = 0;
+	rbsa.cr = CRED();
 	versioninfo = drrb->drr_versioninfo;
 	flags = drrb->drr_flags;
 
@@ -1466,12 +1468,12 @@
 }
 
 static void
-recv_end_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	struct recvendsyncarg *resa = arg2;
 
-	dsl_dataset_snapshot_sync(ds, resa->tosnap, cr, tx);
+	dsl_dataset_snapshot_sync(ds, resa->tosnap, tx);
 
 	/* set snapshot's creation time and guid */
 	dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);

--- a/usr/src/uts/common/fs/zfs/dmu_traverse.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -77,7 +76,7 @@
 	SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
 	    bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
-	(void) td->td_func(td->td_spa, zilog, bp, &zb, NULL, td->td_arg);
+	(void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL, td->td_arg);
 
 	return (0);
 }
@@ -102,7 +101,7 @@
 		SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid, ZB_ZIL_LEVEL,
 		    lr->lr_offset / BP_GET_LSIZE(bp));
 
-		(void) td->td_func(td->td_spa, zilog, bp, &zb, NULL,
+		(void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL,
 		    td->td_arg);
 	}
 	return (0);
@@ -140,7 +139,8 @@
 	boolean_t hard = td->td_flags & TRAVERSE_HARD;
 
 	if (bp->blk_birth == 0) {
-		err = td->td_func(td->td_spa, NULL, NULL, zb, dnp, td->td_arg);
+		err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
+		    td->td_arg);
 		return (err);
 	}
 
@@ -160,7 +160,8 @@
 	}
 
 	if (td->td_flags & TRAVERSE_PRE) {
-		err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
+		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
+		    td->td_arg);
 		if (err)
 			return (err);
 	}
@@ -171,7 +172,7 @@
 		blkptr_t *cbp;
 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 
-		err = arc_read(NULL, td->td_spa, bp, pbuf,
+		err = dsl_read(NULL, td->td_spa, bp, pbuf,
 		    arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 		if (err)
@@ -195,7 +196,7 @@
 		int i;
 		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 
-		err = arc_read(NULL, td->td_spa, bp, pbuf,
+		err = dsl_read(NULL, td->td_spa, bp, pbuf,
 		    arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 		if (err)
@@ -217,7 +218,7 @@
 		objset_phys_t *osp;
 		dnode_phys_t *dnp;
 
-		err = arc_read_nolock(NULL, td->td_spa, bp,
+		err = dsl_read_nolock(NULL, td->td_spa, bp,
 		    arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 		if (err)
@@ -252,8 +253,10 @@
 	if (buf)
 		(void) arc_buf_remove_ref(buf, &buf);
 
-	if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST))
-		err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
+	if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
+		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
+		    td->td_arg);
+	}
 
 	return (err != 0 ? err : lasterr);
 }
@@ -275,16 +278,17 @@
 				break;
 			lasterr = err;
 		}
-		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
-			SET_BOOKMARK(&czb, objset,
-			    object, 0, DMU_SPILL_BLKID);
-			err = traverse_visitbp(td, dnp, buf,
-			    (blkptr_t *)&dnp->dn_spill, &czb);
-			if (err) {
-				if (!hard)
-					break;
-				lasterr = err;
-			}
+	}
+
+	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+		SET_BOOKMARK(&czb, objset,
+		    object, 0, DMU_SPILL_BLKID);
+		err = traverse_visitbp(td, dnp, buf,
+		    (blkptr_t *)&dnp->dn_spill, &czb);
+		if (err) {
+			if (!hard)
+				return (err);
+			lasterr = err;
 		}
 	}
 	return (err != 0 ? err : lasterr);
@@ -293,7 +297,8 @@
 /* ARGSUSED */
 static int
 traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
-    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+    arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp,
+    void *arg)
 {
 	struct prefetch_data *pfd = arg;
 	uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
@@ -314,7 +319,7 @@
 	cv_broadcast(&pfd->pd_cv);
 	mutex_exit(&pfd->pd_mtx);
 
-	(void) arc_read_nolock(NULL, spa, bp, NULL, NULL,
+	(void) dsl_read(NULL, spa, bp, pbuf, NULL, NULL,
 	    ZIO_PRIORITY_ASYNC_READ,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
 	    &aflags, zb);

--- a/usr/src/uts/common/fs/zfs/dnode_sync.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c	Mon May 03 14:54:08 2010 -0700
@@ -227,7 +227,7 @@
 	if (db->db_state != DB_CACHED)
 		(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
 
-	arc_release(db->db_buf, db);
+	dbuf_release_bp(db);
 	bp = (blkptr_t *)db->db.db_data;
 
 	epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;

--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Mon May 03 14:54:08 2010 -0700
@@ -38,6 +38,7 @@
 #include <sys/spa.h>
 #include <sys/zfs_znode.h>
 #include <sys/zvol.h>
+#include <sys/dsl_scan.h>
 
 static char *dsl_reaper = "the grim reaper";
 
@@ -80,7 +81,7 @@
 	int uncompressed = BP_GET_UCSIZE(bp);
 	int64_t delta;
 
-	dprintf_bp(bp, "born, ds=%p\n", ds);
+	dprintf_bp(bp, "ds=%p", ds);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	/* It could have been compressed away to nothing */
@@ -100,6 +101,7 @@
 		return;
 	}
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
+
 	mutex_enter(&ds->ds_dir->dd_lock);
 	mutex_enter(&ds->ds_lock);
 	delta = parent_delta(ds, used);
@@ -150,7 +152,7 @@
 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
 		int64_t delta;
 
-		dprintf_bp(bp, "freeing: %s", "");
+		dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
 		dsl_free(tx->tx_pool, tx->tx_txg, bp);
 
 		mutex_enter(&ds->ds_dir->dd_lock);
@@ -191,7 +193,7 @@
 			ds->ds_prev->ds_phys->ds_unique_bytes += used;
 			mutex_exit(&ds->ds_prev->ds_lock);
 		}
-		if (bp->blk_birth > ds->ds_origin_txg) {
+		if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
 			dsl_dir_transfer_space(ds->ds_dir, used,
 			    DD_USED_HEAD, DD_USED_SNAP, tx);
 		}
@@ -397,19 +399,6 @@
 				    ds->ds_phys->ds_prev_snap_obj,
 				    ds, &ds->ds_prev);
 			}
-
-			if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) {
-				dsl_dataset_t *origin;
-
-				err = dsl_dataset_hold_obj(dp,
-				    ds->ds_dir->dd_phys->dd_origin_obj,
-				    FTAG, &origin);
-				if (err == 0) {
-					ds->ds_origin_txg =
-					    origin->ds_phys->ds_creation_txg;
-					dsl_dataset_rele(origin, FTAG);
-				}
-			}
 		} else {
 			if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
 				err = dsl_dataset_get_snapname(ds);
@@ -876,10 +865,6 @@
 		struct dsl_ds_destroyarg *dsda;
 
 		dsl_dataset_make_exclusive(ds, da->dstg);
-		if (ds->ds_objset != NULL) {
-			dmu_objset_evict(ds->ds_objset);
-			ds->ds_objset = NULL;
-		}
 		dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP);
 		dsda->ds = ds;
 		dsda->defer = da->defer;
@@ -989,11 +974,6 @@
 			return (error);
 		dsda->rm_origin = origin;
 		dsl_dataset_make_exclusive(origin, tag);
-
-		if (origin->ds_objset != NULL) {
-			dmu_objset_evict(origin->ds_objset);
-			origin->ds_objset = NULL;
-		}
 	}
 
 	return (0);
@@ -1020,10 +1000,6 @@
 		/* Destroying a snapshot is simpler */
 		dsl_dataset_make_exclusive(ds, tag);
 
-		if (ds->ds_objset != NULL) {
-			dmu_objset_evict(ds->ds_objset);
-			ds->ds_objset = NULL;
-		}
 		dsda.defer = defer;
 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
@@ -1096,24 +1072,10 @@
 	if (err)
 		goto out;
 
-	if (ds->ds_objset) {
-		/*
-		 * We need to sync out all in-flight IO before we try
-		 * to evict (the dataset evict func is trying to clear
-		 * the cached entries for this dataset in the ARC).
-		 */
-		txg_wait_synced(dd->dd_pool, 0);
-	}
-
 	/*
 	 * Blow away the dsl_dir + head dataset.
 	 */
 	dsl_dataset_make_exclusive(ds, tag);
-	if (ds->ds_objset) {
-		dmu_objset_evict(ds->ds_objset);
-		ds->ds_objset = NULL;
-	}
-
 	/*
 	 * If we're removing a clone, we might also need to remove its
 	 * origin.
@@ -1220,7 +1182,7 @@
 	uint64_t mrs_used;
 	uint64_t dlused, dlcomp, dluncomp;
 
-	ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj);
+	ASSERT(!dsl_dataset_is_snapshot(ds));
 
 	if (ds->ds_phys->ds_prev_snap_obj != 0)
 		mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
@@ -1234,21 +1196,11 @@
 	ds->ds_phys->ds_unique_bytes =
 	    ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
 
-	if (!DS_UNIQUE_IS_ACCURATE(ds) &&
-	    spa_version(ds->ds_dir->dd_pool->dp_spa) >=
+	if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
 	    SPA_VERSION_UNIQUE_ACCURATE)
 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 }
 
-static uint64_t
-dsl_dataset_unique(dsl_dataset_t *ds)
-{
-	if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds))
-		dsl_dataset_recalc_head_uniq(ds);
-
-	return (ds->ds_phys->ds_unique_bytes);
-}
-
 struct killarg {
 	dsl_dataset_t *ds;
 	dmu_tx_t *tx;
@@ -1256,7 +1208,7 @@
 
 /* ARGSUSED */
 static int
-kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	struct killarg *ka = arg;
@@ -1315,7 +1267,7 @@
 
 /* ARGSUSED */
 static void
-dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
@@ -1324,8 +1276,8 @@
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 
-	spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
-	    cr, "dataset = %llu", ds->ds_object);
+	spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
+	    "dataset = %llu", ds->ds_object);
 }
 
 static int
@@ -1499,7 +1451,7 @@
 }
 
 void
-dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
+dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
 {
 	struct dsl_ds_destroyarg *dsda = arg1;
 	dsl_dataset_t *ds = dsda->ds;
@@ -1531,6 +1483,11 @@
 	cv_broadcast(&ds->ds_exclusive_cv);
 	mutex_exit(&ds->ds_lock);
 
+	if (ds->ds_objset) {
+		dmu_objset_evict(ds->ds_objset);
+		ds->ds_objset = NULL;
+	}
+
 	/* Remove our reservation */
 	if (ds->ds_reserved != 0) {
 		dsl_prop_setarg_t psa;
@@ -1541,13 +1498,13 @@
 		    &value);
 		psa.psa_effective_value = 0;	/* predict default value */
 
-		dsl_dataset_set_reservation_sync(ds, &psa, cr, tx);
+		dsl_dataset_set_reservation_sync(ds, &psa, tx);
 		ASSERT3U(ds->ds_reserved, ==, 0);
 	}
 
 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
 
-	dsl_pool_ds_destroyed(ds, tx);
+	dsl_scan_ds_destroyed(ds, tx);
 
 	obj = ds->ds_object;
 
@@ -1596,7 +1553,7 @@
 		}
 	}
 
-	if (ds->ds_phys->ds_next_snap_obj != 0) {
+	if (dsl_dataset_is_snapshot(ds)) {
 		blkptr_t bp;
 		zio_t *pio;
 		dsl_dataset_t *ds_next;
@@ -1608,7 +1565,7 @@
 		    ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
 
-		old_unique = dsl_dataset_unique(ds_next);
+		old_unique = ds_next->ds_phys->ds_unique_bytes;
 
 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
 		ds_next->ds_phys->ds_prev_snap_obj =
@@ -1664,7 +1621,7 @@
 		    ds_next->ds_phys->ds_deadlist_obj));
 		ds->ds_phys->ds_deadlist_obj = 0;
 
-		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
+		if (dsl_dataset_is_snapshot(ds_next)) {
 			/*
 			 * Update next's unique to include blocks which
 			 * were previously shared by only this snapshot
@@ -1790,8 +1747,8 @@
 		dsl_dataset_rele(ds_prev, FTAG);
 
 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
-	spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx,
-	    cr, "dataset = %llu", ds->ds_object);
+	spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx,
+	    "dataset = %llu", ds->ds_object);
 
 	if (ds->ds_phys->ds_next_clones_obj != 0) {
 		uint64_t count;
@@ -1816,7 +1773,7 @@
 		struct dsl_ds_destroyarg ndsda = {0};
 
 		ndsda.ds = dsda->rm_origin;
-		dsl_dataset_destroy_sync(&ndsda, tag, cr, tx);
+		dsl_dataset_destroy_sync(&ndsda, tag, tx);
 	}
 }
 
@@ -1833,7 +1790,8 @@
 	 * owned by the snapshot dataset must be accommodated by space
 	 * outside of the reservation.
 	 */
-	asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
+	ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
+	asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
 	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE))
 		return (ENOSPC);
 
@@ -1847,7 +1805,6 @@
 	return (0);
 }
 
-/* ARGSUSED */
 int
 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
@@ -1888,7 +1845,7 @@
 }
 
 void
-dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	const char *snapname = arg2;
@@ -1959,9 +1916,11 @@
 	 * since our unique space is going to zero.
 	 */
 	if (ds->ds_reserved) {
-		int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
+		int64_t delta;
+		ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
+		delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
-		    add, 0, 0, tx);
+		    delta, 0, 0, tx);
 	}
 
 	bplist_close(&ds->ds_deadlist);
@@ -1987,11 +1946,11 @@
 	VERIFY(0 == dsl_dataset_get_ref(dp,
 	    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
 
-	dsl_pool_ds_snapshotted(ds, tx);
+	dsl_scan_ds_snapshotted(ds, tx);
 
 	dsl_dir_snap_cmtime_update(ds->ds_dir);
 
-	spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr,
+	spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx,
 	    "dataset = %llu", dsobj);
 }
 
@@ -2035,7 +1994,7 @@
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
 	    ds->ds_phys->ds_guid);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
-	    dsl_dataset_unique(ds));
+	    ds->ds_phys->ds_unique_bytes);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
 	    ds->ds_object);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
@@ -2163,8 +2122,7 @@
 }
 
 static void
-dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2,
-    cred_t *cr, dmu_tx_t *tx)
+dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	const char *newsnapname = arg2;
@@ -2188,8 +2146,8 @@
 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
 	ASSERT3U(err, ==, 0);
 
-	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
-	    cr, "dataset = %llu", ds->ds_object);
+	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
+	    "dataset = %llu", ds->ds_object);
 	dsl_dataset_rele(hds, FTAG);
 }
 
@@ -2371,14 +2329,14 @@
 
 struct promotearg {
 	list_t shared_snaps, origin_snaps, clone_snaps;
-	dsl_dataset_t *origin_origin, *origin_head;
+	dsl_dataset_t *origin_origin;
 	uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
 	char *err_ds;
 };
 
 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
-
-/* ARGSUSED */
+static boolean_t snaplist_unstable(list_t *l);
+
 static int
 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
@@ -2479,19 +2437,19 @@
 
 		/*
 		 * Note, typically this will not be a clone of a clone,
-		 * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so
+		 * so dd_origin_txg will be < TXG_INITIAL, so
 		 * these snaplist_space() -> bplist_space_birthrange()
 		 * calls will be fast because they do not have to
 		 * iterate over all bps.
 		 */
 		snap = list_head(&pa->origin_snaps);
 		err = snaplist_space(&pa->shared_snaps,
-		    snap->ds->ds_origin_txg, &pa->cloneusedsnap);
+		    snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap);
 		if (err)
 			return (err);
 
 		err = snaplist_space(&pa->clone_snaps,
-		    snap->ds->ds_origin_txg, &space);
+		    snap->ds->ds_dir->dd_origin_txg, &space);
 		if (err)
 			return (err);
 		pa->cloneusedsnap += space;
@@ -2510,7 +2468,7 @@
 }
 
 static void
-dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *hds = arg1;
 	struct promotearg *pa = arg2;
@@ -2554,10 +2512,11 @@
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
 	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
-	hds->ds_origin_txg = origin_head->ds_origin_txg;
+	dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg;
 	dmu_buf_will_dirty(odd->dd_dbuf, tx);
 	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
-	origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg;
+	origin_head->ds_dir->dd_origin_txg =
+	    origin_ds->ds_phys->ds_creation_txg;
 
 	/* move snapshots to this dir */
 	for (snap = list_head(&pa->shared_snaps); snap;
@@ -2614,8 +2573,8 @@
 	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
 
 	/* log history record */
-	spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
-	    cr, "dataset = %llu", hds->ds_object);
+	spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
+	    "dataset = %llu", hds->ds_object);
 
 	dsl_dir_close(odd, FTAG);
 }
@@ -2862,7 +2821,7 @@
 
 /* ARGSUSED */
 static void
-dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	struct cloneswaparg *csa = arg1;
 	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
@@ -2937,9 +2896,9 @@
 		 * changing that affects the snapused).
 		 */
 		VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist,
-		    csa->ohds->ds_origin_txg, UINT64_MAX, &cdl_used));
+		    csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, &cdl_used));
 		VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist,
-		    csa->ohds->ds_origin_txg, UINT64_MAX, &odl_used));
+		    csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX, &odl_used));
 		dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
 		    DD_USED_HEAD, DD_USED_SNAP, tx);
 	}
@@ -2975,7 +2934,7 @@
 	VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
 	    csa->ohds->ds_phys->ds_deadlist_obj));
 
-	dsl_pool_ds_clone_swapped(csa->ohds, csa->cds, tx);
+	dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx);
 }
 
 /*
@@ -3110,24 +3069,24 @@
 	return (0);
 }
 
-extern void dsl_prop_set_sync(void *, void *, cred_t *, dmu_tx_t *);
+extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
 
 void
-dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value = psa->psa_effective_value;
 
-	dsl_prop_set_sync(ds, psa, cr, tx);
+	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
 
 	if (ds->ds_quota != effective_value) {
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		ds->ds_quota = effective_value;
 
-		spa_history_internal_log(LOG_DS_REFQUOTA,
-		    ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu ",
+		spa_history_log_internal(LOG_DS_REFQUOTA,
+		    ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu ",
 		    (longlong_t)ds->ds_quota, ds->ds_object);
 	}
 }
@@ -3188,7 +3147,9 @@
 		return (0);
 
 	mutex_enter(&ds->ds_lock);
-	unique = dsl_dataset_unique(ds);
+	if (!DS_UNIQUE_IS_ACCURATE(ds))
+		dsl_dataset_recalc_head_uniq(ds);
+	unique = ds->ds_phys->ds_unique_bytes;
 	mutex_exit(&ds->ds_lock);
 
 	if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) {
@@ -3205,10 +3166,8 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static void
-dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr,
-    dmu_tx_t *tx)
+dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
@@ -3216,14 +3175,15 @@
 	uint64_t unique;
 	int64_t delta;
 
-	dsl_prop_set_sync(ds, psa, cr, tx);
+	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 
 	mutex_enter(&ds->ds_dir->dd_lock);
 	mutex_enter(&ds->ds_lock);
-	unique = dsl_dataset_unique(ds);
+	ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
+	unique = ds->ds_phys->ds_unique_bytes;
 	delta = MAX(0, (int64_t)(effective_value - unique)) -
 	    MAX(0, (int64_t)(ds->ds_reserved - unique));
 	ds->ds_reserved = effective_value;
@@ -3232,8 +3192,8 @@
 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
 	mutex_exit(&ds->ds_dir->dd_lock);
 
-	spa_history_internal_log(LOG_DS_REFRESERV,
-	    ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu",
+	spa_history_log_internal(LOG_DS_REFRESERV,
+	    ds->ds_dir->dd_pool->dp_spa, tx, "%lld dataset = %llu",
 	    (longlong_t)effective_value, ds->ds_object);
 }
 
@@ -3311,7 +3271,7 @@
 }
 
 static void
-dsl_dataset_user_hold_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	struct dsl_ds_holdarg *ha = arg2;
@@ -3343,8 +3303,8 @@
 		    htag, &now, tx));
 	}
 
-	spa_history_internal_log(LOG_DS_USER_HOLD,
-	    dp->dp_spa, tx, cr, "<%s> temp = %d dataset = %llu", htag,
+	spa_history_log_internal(LOG_DS_USER_HOLD,
+	    dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag,
 	    (int)ha->temphold, ds->ds_object);
 }
 
@@ -3495,10 +3455,6 @@
 			 */
 			if (!ra->own)
 				return (EBUSY);
-			if (ds->ds_objset) {
-				dmu_objset_evict(ds->ds_objset);
-				ds->ds_objset = NULL;
-			}
 		}
 		dsda.ds = ds;
 		dsda.releasing = B_TRUE;
@@ -3509,7 +3465,7 @@
 }
 
 static void
-dsl_dataset_user_release_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
+dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
 {
 	struct dsl_ds_releasearg *ra = arg1;
 	dsl_dataset_t *ds = ra->ds;
@@ -3520,6 +3476,11 @@
 	uint64_t refs;
 	int error;
 
+	if (ds->ds_objset) {
+		dmu_objset_evict(ds->ds_objset);
+		ds->ds_objset = NULL;
+	}
+
 	mutex_enter(&ds->ds_lock);
 	ds->ds_userrefs--;
 	refs = ds->ds_userrefs;
@@ -3536,11 +3497,11 @@
 		dsda.ds = ds;
 		dsda.releasing = B_TRUE;
 		/* We already did the destroy_check */
-		dsl_dataset_destroy_sync(&dsda, tag, cr, tx);
+		dsl_dataset_destroy_sync(&dsda, tag, tx);
 	}
 
-	spa_history_internal_log(LOG_DS_USER_RELEASE,
-	    dp->dp_spa, tx, cr, "<%s> %lld dataset = %llu",
+	spa_history_log_internal(LOG_DS_USER_RELEASE,
+	    dp->dp_spa, tx, "<%s> %lld dataset = %llu",
 	    ra->htag, (longlong_t)refs, dsobj);
 }

--- a/usr/src/uts/common/fs/zfs/dsl_deleg.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -148,7 +147,7 @@
 }
 
 static void
-dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	nvlist_t *nvp = arg2;
@@ -183,8 +182,8 @@
 
 			VERIFY(zap_update(mos, jumpobj,
 			    perm, 8, 1, &n, tx) == 0);
-			spa_history_internal_log(LOG_DS_PERM_UPDATE,
-			    dd->dd_pool->dp_spa, tx, cr,
+			spa_history_log_internal(LOG_DS_PERM_UPDATE,
+			    dd->dd_pool->dp_spa, tx,
 			    "%s %s dataset = %llu", whokey, perm,
 			    dd->dd_phys->dd_head_dataset_obj);
 		}
@@ -192,7 +191,7 @@
 }
 
 static void
-dsl_deleg_unset_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	nvlist_t *nvp = arg2;
@@ -215,8 +214,8 @@
 				(void) zap_remove(mos, zapobj, whokey, tx);
 				VERIFY(0 == zap_destroy(mos, jumpobj, tx));
 			}
-			spa_history_internal_log(LOG_DS_PERM_WHO_REMOVE,
-			    dd->dd_pool->dp_spa, tx, cr,
+			spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE,
+			    dd->dd_pool->dp_spa, tx,
 			    "%s dataset = %llu", whokey,
 			    dd->dd_phys->dd_head_dataset_obj);
 			continue;
@@ -236,8 +235,8 @@
 				VERIFY(0 == zap_destroy(mos,
 				    jumpobj, tx));
 			}
-			spa_history_internal_log(LOG_DS_PERM_REMOVE,
-			    dd->dd_pool->dp_spa, tx, cr,
+			spa_history_log_internal(LOG_DS_PERM_REMOVE,
+			    dd->dd_pool->dp_spa, tx,
 			    "%s %s dataset = %llu", whokey, perm,
 			    dd->dd_phys->dd_head_dataset_obj);
 		}

--- a/usr/src/uts/common/fs/zfs/dsl_dir.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -40,8 +39,7 @@
 #include "zfs_namecheck.h"
 
 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
-static void dsl_dir_set_reservation_sync(void *arg1, void *arg2,
-    cred_t *cr, dmu_tx_t *tx);
+static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
 
 
 /* ARGSUSED */
@@ -64,8 +62,8 @@
 	spa_close(dd->dd_pool->dp_spa, dd);
 
 	/*
-	 * The props callback list should be empty since they hold the
-	 * dir open.
+	 * The props callback list should have been cleaned up by
+	 * objset_evict().
 	 */
 	list_destroy(&dd->dd_prop_cbs);
 	mutex_destroy(&dd->dd_lock);
@@ -136,6 +134,25 @@
 			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
 		}
 
+		if (dsl_dir_is_clone(dd)) {
+			dmu_buf_t *origin_bonus;
+			dsl_dataset_phys_t *origin_phys;
+
+			/*
+			 * We can't open the origin dataset, because
+			 * that would require opening this dsl_dir.
+			 * Just look at its phys directly instead.
+			 */
+			err = dmu_bonus_hold(dp->dp_meta_objset,
+			    dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
+			if (err)
+				goto errout;
+			origin_phys = origin_bonus->db_data;
+			dd->dd_origin_txg =
+			    origin_phys->ds_creation_txg;
+			dmu_buf_rele(origin_bonus, FTAG);
+		}
+
 		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
 		    dsl_dir_evict);
 		if (winner) {
@@ -458,7 +475,7 @@
 }
 
 void
-dsl_dir_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
+dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_dir_t *dd = ds->ds_dir;
@@ -477,7 +494,7 @@
 	    &value);
 	psa.psa_effective_value = 0;	/* predict default value */
 
-	dsl_dir_set_reservation_sync(ds, &psa, cr, tx);
+	dsl_dir_set_reservation_sync(ds, &psa, tx);
 
 	ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
 	ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
@@ -652,15 +669,6 @@
 	if (used > quota) {
 		/* over quota */
 		myspace = 0;
-
-		/*
-		 * While it's OK to be a little over quota, if
-		 * we think we are using more space than there
-		 * is in the pool (which is already 1.6% more than
-		 * dsl_pool_adjustedsize()), something is very
-		 * wrong.
-		 */
-		ASSERT3U(used, <=, spa_get_dspace(dd->dd_pool->dp_spa));
 	} else {
 		/*
 		 * the lesser of the space provided by our parent and
@@ -1033,18 +1041,17 @@
 	return (err);
 }
 
-extern void dsl_prop_set_sync(void *, void *, cred_t *, dmu_tx_t *);
+extern dsl_syncfunc_t dsl_prop_set_sync;
 
-/* ARGSUSED */
 static void
-dsl_dir_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value = psa->psa_effective_value;
 
-	dsl_prop_set_sync(ds, psa, cr, tx);
+	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(dd, psa);
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
@@ -1053,8 +1060,8 @@
 	dd->dd_phys->dd_quota = effective_value;
 	mutex_exit(&dd->dd_lock);
 
-	spa_history_internal_log(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
-	    tx, cr, "%lld dataset = %llu ",
+	spa_history_log_internal(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
+	    tx, "%lld dataset = %llu ",
 	    (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj);
 }
 
@@ -1141,9 +1148,8 @@
 	return (0);
 }
 
-/* ARGSUSED */
 static void
-dsl_dir_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_dir_t *dd = ds->ds_dir;
@@ -1152,7 +1158,7 @@
 	uint64_t used;
 	int64_t delta;
 
-	dsl_prop_set_sync(ds, psa, cr, tx);
+	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(dd, psa);
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
@@ -1170,8 +1176,8 @@
 	}
 	mutex_exit(&dd->dd_lock);
 
-	spa_history_internal_log(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
-	    tx, cr, "%lld dataset = %llu",
+	spa_history_log_internal(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
+	    tx, "%lld dataset = %llu",
 	    (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj);
 }
 
@@ -1240,7 +1246,6 @@
 	const char *mynewname;
 };
 
-/*ARGSUSED*/
 static int
 dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
@@ -1287,7 +1292,7 @@
 }
 
 static void
-dsl_dir_rename_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	struct renamearg *ra = arg2;
@@ -1336,8 +1341,8 @@
 	    dd->dd_myname, 8, 1, &dd->dd_object, tx);
 	ASSERT3U(err, ==, 0);
 
-	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa,
-	    tx, cr, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
+	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
+	    tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
 }
 
 int

--- a/usr/src/uts/common/fs/zfs/dsl_pool.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c	Mon May 03 14:54:08 2010 -0700
@@ -19,14 +19,16 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dataset.h>
+#include <sys/dsl_prop.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_synctask.h>
+#include <sys/dsl_scan.h>
+#include <sys/dnode.h>
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
 #include <sys/arc.h>
@@ -50,7 +52,7 @@
 
 static pgcnt_t old_physmem = 0;
 
-static int
+int
 dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
 {
 	uint64_t obj;
@@ -88,7 +90,6 @@
 	    offsetof(dsl_dataset_t, ds_synced_link));
 
 	mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&dp->dp_scrub_cancel_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri,
 	    1, 4, 0);
@@ -150,64 +151,7 @@
 	if (err)
 		goto out;
 
-	/* get scrub status */
-	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_FUNC, sizeof (uint32_t), 1,
-	    &dp->dp_scrub_func);
-	if (err == 0) {
-		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-		    DMU_POOL_SCRUB_QUEUE, sizeof (uint64_t), 1,
-		    &dp->dp_scrub_queue_obj);
-		if (err)
-			goto out;
-		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-		    DMU_POOL_SCRUB_MIN_TXG, sizeof (uint64_t), 1,
-		    &dp->dp_scrub_min_txg);
-		if (err)
-			goto out;
-		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-		    DMU_POOL_SCRUB_MAX_TXG, sizeof (uint64_t), 1,
-		    &dp->dp_scrub_max_txg);
-		if (err)
-			goto out;
-		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-		    DMU_POOL_SCRUB_BOOKMARK, sizeof (uint64_t),
-		    sizeof (dp->dp_scrub_bookmark) / sizeof (uint64_t),
-		    &dp->dp_scrub_bookmark);
-		if (err)
-			goto out;
-		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-		    DMU_POOL_SCRUB_DDT_BOOKMARK, sizeof (uint64_t),
-		    sizeof (dp->dp_scrub_ddt_bookmark) / sizeof (uint64_t),
-		    &dp->dp_scrub_ddt_bookmark);
-		if (err && err != ENOENT)
-			goto out;
-		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-		    DMU_POOL_SCRUB_DDT_CLASS_MAX, sizeof (uint64_t), 1,
-		    &dp->dp_scrub_ddt_class_max);
-		if (err && err != ENOENT)
-			goto out;
-		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-		    DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1,
-		    &spa->spa_scrub_errors);
-		if (err)
-			goto out;
-		if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) {
-			/*
-			 * A new-type scrub was in progress on an old
-			 * pool.  Restart from the beginning, since the
-			 * old software may have changed the pool in the
-			 * meantime.
-			 */
-			dsl_pool_scrub_restart(dp);
-		}
-	} else {
-		/*
-		 * It's OK if there is no scrub in progress (and if
-		 * there was an I/O error, ignore it).
-		 */
-		err = 0;
-	}
+	err = dsl_scan_init(dp, txg);
 
 out:
 	rw_exit(&dp->dp_config_rwlock);
@@ -247,9 +191,9 @@
 
 	arc_flush(dp->dp_spa);
 	txg_fini(dp);
+	dsl_scan_fini(dp);
 	rw_destroy(&dp->dp_config_rwlock);
 	mutex_destroy(&dp->dp_lock);
-	mutex_destroy(&dp->dp_scrub_cancel_lock);
 	taskq_destroy(dp->dp_vnrele_taskq);
 	if (dp->dp_blkstats)
 		kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
@@ -275,6 +219,9 @@
 	    DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx);
 	ASSERT3U(err, ==, 0);
 
+	/* Initialize scan structures */
+	VERIFY3U(0, ==, dsl_scan_init(dp, txg));
+
 	/* create and open the root dir */
 	dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
 	VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
@@ -318,6 +265,14 @@
 	uint64_t data_written;
 	int err;
 
+	/*
+	 * We need to copy dp_space_towrite() before doing
+	 * dsl_sync_task_group_sync(), because
+	 * dsl_dataset_snapshot_reserve_space() will increase
+	 * dp_space_towrite but not actually write anything.
+	 */
+	data_written = dp->dp_space_towrite[txg & TXG_MASK];
+
 	tx = dmu_tx_create_assigned(dp, txg);
 
 	dp->dp_read_overhead = 0;
@@ -347,7 +302,7 @@
 
 	/*
 	 * Sync the datasets again to push out the changes due to
-	 * userquota updates.  This must be done before we process the
+	 * userspace updates.  This must be done before we process the
 	 * sync tasks, because that could cause a snapshot of a dataset
 	 * whose ds_bp will be rewritten when we do this 2nd sync.
 	 */
@@ -383,13 +338,6 @@
 		dsl_dir_sync(dd, tx);
 	write_time += gethrtime() - start;
 
-	if (spa_sync_pass(dp->dp_spa) == 1) {
-		dp->dp_scrub_prefetch_zio_root = zio_root(dp->dp_spa, NULL,
-		    NULL, ZIO_FLAG_CANFAIL);
-		dsl_pool_scrub_sync(dp, tx);
-		(void) zio_wait(dp->dp_scrub_prefetch_zio_root);
-	}
-
 	start = gethrtime();
 	if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
 	    list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
@@ -407,7 +355,6 @@
 
 	dmu_tx_commit(tx);
 
-	data_written = dp->dp_space_towrite[txg & TXG_MASK];
 	dp->dp_space_towrite[txg & TXG_MASK] = 0;
 	ASSERT(dp->dp_tempreserved[txg & TXG_MASK] == 0);
 
@@ -679,7 +626,7 @@
 	dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
 	    NULL, 0, kcred, tx);
 	VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
-	dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, kcred, tx);
+	dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx);
 	VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
 	    dp, &dp->dp_origin_snap));
 	dsl_dataset_rele(ds, FTAG);

--- a/usr/src/uts/common/fs/zfs/dsl_prop.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -260,11 +259,8 @@
 
 	cbr->cbr_func(cbr->cbr_arg, value);
 
-	VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
-	    NULL, cbr, &dd));
 	if (need_rwlock)
 		rw_exit(&dp->dp_config_rwlock);
-	/* Leave dir open until this callback is unregistered */
 	return (0);
 }
 
@@ -464,8 +460,6 @@
 	kmem_free((void*)cbr->cbr_propname, strlen(cbr->cbr_propname)+1);
 	kmem_free(cbr, sizeof (dsl_prop_cb_record_t));
 
-	/* Clean up from dsl_prop_register */
-	dsl_dir_close(dd, cbr);
 	return (0);
 }
 
@@ -552,7 +546,7 @@
 }
 
 void
-dsl_prop_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
@@ -707,9 +701,9 @@
 		}
 	}
 
-	spa_history_internal_log((source == ZPROP_SRC_NONE ||
+	spa_history_log_internal((source == ZPROP_SRC_NONE ||
 	    source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT :
-	    LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx, cr,
+	    LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx,
 	    "%s=%s dataset = %llu", propname,
 	    (valstr == NULL ? "" : valstr), ds->ds_object);
 
@@ -718,7 +712,7 @@
 }
 
 void
-dsl_props_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_props_arg_t *pa = arg2;
@@ -756,13 +750,13 @@
 			psa.psa_numints = 1;
 			psa.psa_value = &intval;
 		}
-		dsl_prop_set_sync(ds, &psa, cr, tx);
+		dsl_prop_set_sync(ds, &psa, tx);
 	}
 }
 
 void
 dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
-    cred_t *cr, dmu_tx_t *tx)
+    dmu_tx_t *tx)
 {
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	uint64_t zapobj = dd->dd_phys->dd_props_zapobj;
@@ -773,7 +767,7 @@
 
 	dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE);
 
-	spa_history_internal_log(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx, cr,
+	spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx,
 	    "%s=%llu dataset = %llu", name, (u_longlong_t)val,
 	    dd->dd_phys->dd_head_dataset_obj);
 }

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/dsl_scan.c	Mon May 03 14:54:08 2010 -0700
@@ -0,0 +1,1660 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/dsl_scan.h>
+#include <sys/dsl_pool.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_prop.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_synctask.h>
+#include <sys/dnode.h>
+#include <sys/dmu_tx.h>
+#include <sys/dmu_objset.h>
+#include <sys/arc.h>
+#include <sys/zap.h>
+#include <sys/zio.h>
+#include <sys/zfs_context.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_znode.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev_impl.h>
+#include <sys/zil_impl.h>
+#include <sys/zio_checksum.h>
+#include <sys/ddt.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
+#ifdef _KERNEL
+#include <sys/zfs_vfsops.h>
+#endif
+
+typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
+
+static scan_cb_t dsl_scan_defrag_cb;
+static scan_cb_t dsl_scan_scrub_cb;
+static scan_cb_t dsl_scan_remove_cb;
+static dsl_syncfunc_t dsl_scan_cancel_sync;
+static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
+
+int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */
+int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */
+boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
+boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */
+enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
+int dsl_scan_delay_completion = B_FALSE; /* set to delay scan completion */
+
+#define	DSL_SCAN_IS_SCRUB_RESILVER(scn) \
+	((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \
+	(scn)->scn_phys.scn_func == POOL_SCAN_RESILVER)
+
+extern int zfs_txg_timeout;
+
+/* the order has to match pool_scan_type */
+static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = {
+	NULL,
+	dsl_scan_scrub_cb,	/* POOL_SCAN_SCRUB */
+	dsl_scan_scrub_cb,	/* POOL_SCAN_RESILVER */
+};
+
+int
+dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
+{
+	int err;
+	dsl_scan_t *scn;
+	spa_t *spa = dp->dp_spa;
+	uint64_t f;
+
+	scn = dp->dp_scan = kmem_zalloc(sizeof (dsl_scan_t), KM_SLEEP);
+	scn->scn_dp = dp;
+
+	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+	    "scrub_func", sizeof (uint64_t), 1, &f);
+	if (err == 0) {
+		/*
+		 * There was an old-style scrub in progress.  Restart a
+		 * new-style scrub from the beginning.
+		 */
+		scn->scn_restart_txg = txg;
+		zfs_dbgmsg("old-style scrub was in progress; "
+		    "restarting new-style scrub in txg %llu",
+		    scn->scn_restart_txg);
+
+		/*
+		 * Load the queue obj from the old location so that it
+		 * can be freed by dsl_scan_done().
+		 */
+		(void) zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+		    "scrub_queue", sizeof (uint64_t), 1,
+		    &scn->scn_phys.scn_queue_obj);
+	} else {
+		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+		    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
+		    &scn->scn_phys);
+		if (err == ENOENT)
+			return (0);
+		else if (err)
+			return (err);
+
+		if (scn->scn_phys.scn_state == DSS_SCANNING &&
+		    spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) {
+			/*
+			 * A new-type scrub was in progress on an old
+			 * pool, and the pool was accessed by old
+			 * software.  Restart from the beginning, since
+			 * the old software may have changed the pool in
+			 * the meantime.
+			 */
+			scn->scn_restart_txg = txg;
+			zfs_dbgmsg("new-style scrub was modified "
+			    "by old software; restarting in txg %llu",
+			    scn->scn_restart_txg);
+		}
+	}
+
+	spa_scan_stat_init(spa);
+	return (0);
+}
+
+void
+dsl_scan_fini(dsl_pool_t *dp)
+{
+	if (dp->dp_scan) {
+		kmem_free(dp->dp_scan, sizeof (dsl_scan_t));
+		dp->dp_scan = NULL;
+	}
+}
+
+/* ARGSUSED */
+static int
+dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+	dsl_scan_t *scn = arg1;
+
+	if (scn->scn_phys.scn_state == DSS_SCANNING)
+		return (EBUSY);
+
+	return (0);
+}
+
+/* ARGSUSED */
+static void
+dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+	dsl_scan_t *scn = arg1;
+	pool_scan_func_t *funcp = arg2;
+	dmu_object_type_t ot = 0;
+	dsl_pool_t *dp = scn->scn_dp;
+	spa_t *spa = dp->dp_spa;
+
+	ASSERT(scn->scn_phys.scn_state != DSS_SCANNING);
+	ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
+	bzero(&scn->scn_phys, sizeof (scn->scn_phys));
+	scn->scn_phys.scn_func = *funcp;
+	scn->scn_phys.scn_state = DSS_SCANNING;
+	scn->scn_phys.scn_min_txg = 0;
+	scn->scn_phys.scn_max_txg = tx->tx_txg;
+	scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
+	scn->scn_phys.scn_start_time = gethrestime_sec();
+	scn->scn_phys.scn_errors = 0;
+	scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
+	scn->scn_restart_txg = 0;
+	spa_scan_stat_init(spa);
+
+	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
+		scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
+
+		/* rewrite all disk labels */
+		vdev_config_dirty(spa->spa_root_vdev);
+
+		if (vdev_resilver_needed(spa->spa_root_vdev,
+		    &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
+			spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START);
+		} else {
+			spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START);
+		}
+
+		spa->spa_scrub_started = B_TRUE;
+		/*
+		 * If this is an incremental scrub, limit the DDT scrub phase
+		 * to just the auto-ditto class (for correctness); the rest
+		 * of the scrub should go faster using top-down pruning.
+		 */
+		if (scn->scn_phys.scn_min_txg > TXG_INITIAL)
+			scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO;
+
+	}
+
+	/* back to the generic stuff */
+
+	if (dp->dp_blkstats == NULL) {
+		dp->dp_blkstats =
+		    kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP);
+	}
+	bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
+
+	if (spa_version(spa) < SPA_VERSION_DSL_SCRUB)
+		ot = DMU_OT_ZAP_OTHER;
+
+	scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset,
+	    ot ? ot : DMU_OT_SCAN_QUEUE, DMU_OT_NONE, 0, tx);
+
+	dsl_scan_sync_state(scn, tx);
+
+	spa_history_log_internal(LOG_POOL_SCAN, spa, tx,
+	    "func=%u mintxg=%llu maxtxg=%llu",
+	    *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);
+}
+
+/* ARGSUSED */
+static void
+dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
+{
+	static const char *old_names[] = {
+		"scrub_bookmark",
+		"scrub_ddt_bookmark",
+		"scrub_ddt_class_max",
+		"scrub_queue",
+		"scrub_min_txg",
+		"scrub_max_txg",
+		"scrub_func",
+		"scrub_errors",
+		NULL
+	};
+
+	dsl_pool_t *dp = scn->scn_dp;
+	spa_t *spa = dp->dp_spa;
+	int i;
+
+	/* Remove any remnants of an old-style scrub. */
+	for (i = 0; old_names[i]; i++) {
+		(void) zap_remove(dp->dp_meta_objset,
+		    DMU_POOL_DIRECTORY_OBJECT, old_names[i], tx);
+	}
+
+	if (scn->scn_phys.scn_queue_obj != 0) {
+		VERIFY(0 == dmu_object_free(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, tx));
+		scn->scn_phys.scn_queue_obj = 0;
+	}
+
+	/*
+	 * If we were "restarted" from a stopped state, don't bother
+	 * with anything else.
+	 */
+	if (scn->scn_phys.scn_state != DSS_SCANNING)
+		return;
+
+	if (complete)
+		scn->scn_phys.scn_state = DSS_FINISHED;
+	else
+		scn->scn_phys.scn_state = DSS_CANCELED;
+
+	spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx,
+	    "complete=%u", complete);
+
+	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
+		mutex_enter(&spa->spa_scrub_lock);
+		while (spa->spa_scrub_inflight > 0) {
+			cv_wait(&spa->spa_scrub_io_cv,
+			    &spa->spa_scrub_lock);
+		}
+		mutex_exit(&spa->spa_scrub_lock);
+		spa->spa_scrub_started = B_FALSE;
+		spa->spa_scrub_active = B_FALSE;
+
+		/*
+		 * If the scrub/resilver completed, update all DTLs to
+		 * reflect this.  Whether it succeeded or not, vacate
+		 * all temporary scrub DTLs.
+		 */
+		vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
+		    complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE);
+		if (complete) {
+			spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ?
+			    ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
+		}
+		spa_errlog_rotate(spa);
+
+		/*
+		 * We may have finished replacing a device.
+		 * Let the async thread assess this and handle the detach.
+		 */
+		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
+	}
+
+	scn->scn_phys.scn_end_time = gethrestime_sec();
+}
+
+/* ARGSUSED */
+static int
+dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+	dsl_scan_t *scn = arg1;
+
+	if (scn->scn_phys.scn_state != DSS_SCANNING)
+		return (ENOENT);
+	return (0);
+}
+
+/* ARGSUSED */
+static void
+dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+	dsl_scan_t *scn = arg1;
+
+	dsl_scan_done(scn, B_FALSE, tx);
+	dsl_scan_sync_state(scn, tx);
+}
+
+int
+dsl_scan_cancel(dsl_pool_t *dp)
+{
+	boolean_t complete = B_FALSE;
+	int err;
+
+	err = dsl_sync_task_do(dp, dsl_scan_cancel_check,
+	    dsl_scan_cancel_sync, dp->dp_scan, &complete, 3);
+	return (err);
+}
+
+static void dsl_scan_visitbp(blkptr_t *bp,
+    const zbookmark_t *zb, dnode_phys_t *dnp, arc_buf_t *pbuf,
+    dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype,
+    dmu_tx_t *tx);
+static void dsl_scan_visitdnode(dsl_scan_t *, dsl_dataset_t *ds,
+    dmu_objset_type_t ostype,
+    dnode_phys_t *dnp, arc_buf_t *buf, uint64_t object, dmu_tx_t *tx);
+
+void
+dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bp)
+{
+	zio_free(dp->dp_spa, txg, bp);
+}
+
+void
+dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
+{
+	ASSERT(dsl_pool_sync_context(dp));
+	zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags));
+}
+
+int
+dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf,
+    arc_done_func_t *done, void *private, int priority, int zio_flags,
+    uint32_t *arc_flags, const zbookmark_t *zb)
+{
+	return (arc_read(pio, spa, bpp, pbuf, done, private,
+	    priority, zio_flags, arc_flags, zb));
+}
+
+int
+dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
+    arc_done_func_t *done, void *private, int priority, int zio_flags,
+    uint32_t *arc_flags, const zbookmark_t *zb)
+{
+	return (arc_read_nolock(pio, spa, bpp, done, private,
+	    priority, zio_flags, arc_flags, zb));
+}
+
+static boolean_t
+bookmark_is_zero(const zbookmark_t *zb)
+{
+	return (zb->zb_objset == 0 && zb->zb_object == 0 &&
+	    zb->zb_level == 0 && zb->zb_blkid == 0);
+}
+
+/* dnp is the dnode for zb1->zb_object */
+static boolean_t
+bookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
+    const zbookmark_t *zb2)
+{
+	uint64_t zb1nextL0, zb2thisobj;
+
+	ASSERT(zb1->zb_objset == zb2->zb_objset);
+	ASSERT(zb2->zb_level == 0);
+
+	/*
+	 * A bookmark in the deadlist is considered to be after
+	 * everything else.
+	 */
+	if (zb2->zb_object == DMU_DEADLIST_OBJECT)
+		return (B_TRUE);
+
+	/* The objset_phys_t isn't before anything. */
+	if (dnp == NULL)
+		return (B_FALSE);
+
+	zb1nextL0 = (zb1->zb_blkid + 1) <<
+	    ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+
+	zb2thisobj = zb2->zb_object ? zb2->zb_object :
+	    zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
+
+	if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
+		uint64_t nextobj = zb1nextL0 *
+		    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
+		return (nextobj <= zb2thisobj);
+	}
+
+	if (zb1->zb_object < zb2thisobj)
+		return (B_TRUE);
+	if (zb1->zb_object > zb2thisobj)
+		return (B_FALSE);
+	if (zb2->zb_object == DMU_META_DNODE_OBJECT)
+		return (B_FALSE);
+	return (zb1nextL0 <= zb2->zb_blkid);
+}
+
+static uint64_t
+dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
+{
+	uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg;
+	if (dsl_dataset_is_snapshot(ds))
+		return (MIN(smt, ds->ds_phys->ds_creation_txg));
+	return (smt);
+}
+
+static void
+dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
+{
+	VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset,
+	    DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
+	    &scn->scn_phys, tx));
+}
+
+static boolean_t
+dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb)
+{
+	uint64_t elapsed_nanosecs;
+	int mintime;
+
+	/* we never skip user/group accounting objects */
+	if (zb && (int64_t)zb->zb_object < 0)
+		return (B_FALSE);
+
+	if (scn->scn_pausing)
+		return (B_TRUE); /* we're already pausing */
+
+	if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark))
+		return (B_FALSE); /* we're resuming */
+
+	/* We only know how to resume from level-0 blocks. */
+	if (zb && zb->zb_level != 0)
+		return (B_FALSE);
+
+	mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
+	    zfs_resilver_min_time_ms : zfs_scan_min_time_ms;
+	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
+	if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
+	    (elapsed_nanosecs / MICROSEC > mintime &&
+	    txg_sync_waiting(scn->scn_dp)) ||
+	    spa_shutting_down(scn->scn_dp->dp_spa)) {
+		if (zb) {
+			dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",
+			    (longlong_t)zb->zb_objset,
+			    (longlong_t)zb->zb_object,
+			    (longlong_t)zb->zb_level,
+			    (longlong_t)zb->zb_blkid);
+			scn->scn_phys.scn_bookmark = *zb;
+		}
+		dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n",
+		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
+		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
+		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
+		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
+		scn->scn_pausing = B_TRUE;
+		return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+typedef struct zil_scan_arg {
+	dsl_pool_t	*zsa_dp;
+	zil_header_t	*zsa_zh;
+} zil_scan_arg_t;
+
+/* ARGSUSED */
+static int
+dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
+{
+	zil_scan_arg_t *zsa = arg;
+	dsl_pool_t *dp = zsa->zsa_dp;
+	dsl_scan_t *scn = dp->dp_scan;
+	zil_header_t *zh = zsa->zsa_zh;
+	zbookmark_t zb;
+
+	if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
+		return (0);
+
+	/*
+	 * One block ("stubby") can be allocated a long time ago; we
+	 * want to visit that one because it has been allocated
+	 * (on-disk) even if it hasn't been claimed (even though for
+	 * scrub there's nothing to do to it).
+	 */
+	if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(dp->dp_spa))
+		return (0);
+
+	SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
+	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
+
+	VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
+{
+	if (lrc->lrc_txtype == TX_WRITE) {
+		zil_scan_arg_t *zsa = arg;
+		dsl_pool_t *dp = zsa->zsa_dp;
+		dsl_scan_t *scn = dp->dp_scan;
+		zil_header_t *zh = zsa->zsa_zh;
+		lr_write_t *lr = (lr_write_t *)lrc;
+		blkptr_t *bp = &lr->lr_blkptr;
+		zbookmark_t zb;
+
+		if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
+			return (0);
+
+		/*
+		 * birth can be < claim_txg if this record's txg is
+		 * already txg sync'ed (but this log block contains
+		 * other records that are not synced)
+		 */
+		if (claim_txg == 0 || bp->blk_birth < claim_txg)
+			return (0);
+
+		SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
+		    lr->lr_foid, ZB_ZIL_LEVEL,
+		    lr->lr_offset / BP_GET_LSIZE(bp));
+
+		VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
+	}
+	return (0);
+}
+
+static void
+dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh)
+{
+	uint64_t claim_txg = zh->zh_claim_txg;
+	zil_scan_arg_t zsa = { dp, zh };
+	zilog_t *zilog;
+
+	/*
+	 * We only want to visit blocks that have been claimed but not yet
+	 * replayed (or, in read-only mode, blocks that *would* be claimed).
+	 */
+	if (claim_txg == 0 && spa_writeable(dp->dp_spa))
+		return;
+
+	zilog = zil_alloc(dp->dp_meta_objset, zh);
+
+	(void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa,
+	    claim_txg);
+
+	zil_free(zilog);
+}
+
+/* ARGSUSED */
+static void
+dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
+    uint64_t objset, uint64_t object, uint64_t blkid)
+{
+	zbookmark_t czb;
+	uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
+
+	if (zfs_no_scrub_prefetch)
+		return;
+
+	if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_min_txg ||
+	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE))
+		return;
+
+	SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);
+
+	/*
+	 * XXX need to make sure all of these arc_read() prefetches are
+	 * done before setting xlateall (similar to dsl_read())
+	 */
+	(void) arc_read(scn->scn_prefetch_zio_root, scn->scn_dp->dp_spa, bp,
+	    buf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
+	    &flags, &czb);
+}
+
+static boolean_t
+dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
+    const zbookmark_t *zb)
+{
+	/*
+	 * We never skip over user/group accounting objects (obj<0)
+	 */
+	if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark) &&
+	    (int64_t)zb->zb_object >= 0) {
+		/*
+		 * If we already visited this bp & everything below (in
+		 * a prior txg sync), don't bother doing it again.
+		 */
+		if (bookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
+			return (B_TRUE);
+
+		/*
+		 * If we found the block we're trying to resume from, or
+		 * we went past it to a different object, zero it out to
+		 * indicate that it's OK to start checking for pausing
+		 * again.
+		 */
+		if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
+		    zb->zb_object > scn->scn_phys.scn_bookmark.zb_object) {
+			dprintf("resuming at %llx/%llx/%llx/%llx\n",
+			    (longlong_t)zb->zb_objset,
+			    (longlong_t)zb->zb_object,
+			    (longlong_t)zb->zb_level,
+			    (longlong_t)zb->zb_blkid);
+			bzero(&scn->scn_phys.scn_bookmark, sizeof (*zb));
+		}
+	}
+	return (B_FALSE);
+}
+
+/*
+ * Return nonzero on i/o error.
+ * Return new buf to write out in *bufp.
+ */
+static int
+dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
+    dnode_phys_t *dnp, const blkptr_t *bp,
+    const zbookmark_t *zb, dmu_tx_t *tx, arc_buf_t **bufp)
+{
+	dsl_pool_t *dp = scn->scn_dp;
+	int err;
+
+	if (BP_GET_LEVEL(bp) > 0) {
+		uint32_t flags = ARC_WAIT;
+		int i;
+		blkptr_t *cbp;
+		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
+
+		err = arc_read_nolock(NULL, dp->dp_spa, bp,
+		    arc_getbuf_func, bufp,
+		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+		if (err) {
+			scn->scn_phys.scn_errors++;
+			return (err);
+		}
+		for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) {
+			dsl_scan_prefetch(scn, *bufp, cbp, zb->zb_objset,
+			    zb->zb_object, zb->zb_blkid * epb + i);
+		}
+		for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) {
+			zbookmark_t czb;
+
+			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
+			    zb->zb_level - 1,
+			    zb->zb_blkid * epb + i);
+			dsl_scan_visitbp(cbp, &czb, dnp,
+			    *bufp, ds, scn, ostype, tx);
+		}
+	} else if (BP_GET_TYPE(bp) == DMU_OT_USERGROUP_USED) {
+		uint32_t flags = ARC_WAIT;
+
+		err = arc_read_nolock(NULL, dp->dp_spa, bp,
+		    arc_getbuf_func, bufp,
+		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+		if (err) {
+			scn->scn_phys.scn_errors++;
+			return (err);
+		}
+	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
+		uint32_t flags = ARC_WAIT;
+		dnode_phys_t *cdnp;
+		int i, j;
+		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
+
+		err = arc_read_nolock(NULL, dp->dp_spa, bp,
+		    arc_getbuf_func, bufp,
+		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+		if (err) {
+			scn->scn_phys.scn_errors++;
+			return (err);
+		}
+		for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) {
+			for (j = 0; j < cdnp->dn_nblkptr; j++) {
+				blkptr_t *cbp = &cdnp->dn_blkptr[j];
+				dsl_scan_prefetch(scn, *bufp, cbp,
+				    zb->zb_objset, zb->zb_blkid * epb + i, j);
+			}
+		}
+		for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) {
+			dsl_scan_visitdnode(scn, ds, ostype,
+			    cdnp, *bufp, zb->zb_blkid * epb + i, tx);
+		}
+
+	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
+		uint32_t flags = ARC_WAIT;
+		objset_phys_t *osp;
+
+		err = arc_read_nolock(NULL, dp->dp_spa, bp,
+		    arc_getbuf_func, bufp,
+		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+		if (err) {
+			scn->scn_phys.scn_errors++;
+			return (err);
+		}
+
+		osp = (*bufp)->b_data;
+
+		if (DSL_SCAN_IS_SCRUB_RESILVER(scn))
+			dsl_scan_zil(dp, &osp->os_zil_header);
+
+		dsl_scan_visitdnode(scn, ds, osp->os_type,
+		    &osp->os_meta_dnode, *bufp, DMU_META_DNODE_OBJECT, tx);
+
+		if (OBJSET_BUF_HAS_USERUSED(*bufp)) {
+			/*
+			 * We also always visit user/group accounting
+			 * objects, and never skip them, even if we are
+			 * pausing.  This is necessary so that the space
+			 * deltas from this txg get integrated.
+			 */
+			dsl_scan_visitdnode(scn, ds, osp->os_type,
+			    &osp->os_groupused_dnode, *bufp,
+			    DMU_GROUPUSED_OBJECT, tx);
+			dsl_scan_visitdnode(scn, ds, osp->os_type,
+			    &osp->os_userused_dnode, *bufp,
+			    DMU_USERUSED_OBJECT, tx);
+		}
+	}
+
+	return (0);
+}
+
+static void
+dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds,
+    dmu_objset_type_t ostype, dnode_phys_t *dnp, arc_buf_t *buf,
+    uint64_t object, dmu_tx_t *tx)
+{
+	int j;
+
+	for (j = 0; j < dnp->dn_nblkptr; j++) {
+		zbookmark_t czb;
+
+		SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object,
+		    dnp->dn_nlevels - 1, j);
+		dsl_scan_visitbp(&dnp->dn_blkptr[j],
+		    &czb, dnp, buf, ds, scn, ostype, tx);
+	}
+
+	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+		zbookmark_t czb;
+		SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object,
+		    0, DMU_SPILL_BLKID);
+		dsl_scan_visitbp(&dnp->dn_spill,
+		    &czb, dnp, buf, ds, scn, ostype, tx);
+	}
+}
+
+/*
+ * The arguments are in this order because mdb can only print the
+ * first 5; we want them to be useful.
+ */
+static void
+dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb,
+    dnode_phys_t *dnp, arc_buf_t *pbuf,
+    dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype,
+    dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = scn->scn_dp;
+	arc_buf_t *buf = NULL;
+	blkptr_t bp_toread = *bp;
+
+	/* ASSERT(pbuf == NULL || arc_released(pbuf)); */
+
+	if (dsl_scan_check_pause(scn, zb))
+		return;
+
+	if (dsl_scan_check_resume(scn, dnp, zb))
+		return;
+
+	if (bp->blk_birth == 0)
+		return;
+
+	scn->scn_visited_this_txg++;
+
+	dprintf_bp(bp,
+	    "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx buf=%p bp=%p",
+	    ds, ds ? ds->ds_object : 0,
+	    zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid,
+	    pbuf, bp);
+
+	if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
+		return;
+
+	if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) {
+		/*
+		 * For non-user-accounting blocks, we need to read the
+		 * new bp (from a deleted snapshot, found in
+		 * check_existing_xlation).  If we used the old bp,
+		 * pointers inside this block from before we resumed
+		 * would be untranslated.
+		 *
+		 * For user-accounting blocks, we need to read the old
+		 * bp, because we will apply the entire space delta to
+		 * it (original untranslated -> translations from
+		 * deleted snap -> now).
+		 */
+		bp_toread = *bp;
+	}
+
+	if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx,
+	    &buf) != 0)
+		return;
+
+	/*
+	 * If dsl_scan_ddt() has aready visited this block, it will have
+	 * already done any translations or scrubbing, so don't call the
+	 * callback again.
+	 */
+	if (ddt_class_contains(dp->dp_spa,
+	    scn->scn_phys.scn_ddt_class_max, bp)) {
+		ASSERT(buf == NULL);
+		return;
+	}
+
+	/*
+	 * If this block is from the future (after cur_max_txg), then we
+	 * are doing this on behalf of a deleted snapshot, and we will
+	 * revisit the future block on the next pass of this dataset.
+	 * Don't scan it now unless we need to because something
+	 * under it was modified.
+	 */
+	if (bp->blk_birth <= scn->scn_phys.scn_cur_max_txg) {
+		scan_funcs[scn->scn_phys.scn_func](dp, bp, zb);
+	}
+	if (buf)
+		(void) arc_buf_remove_ref(buf, &buf);
+}
+
+static void
+dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp,
+    dmu_tx_t *tx)
+{
+	zbookmark_t zb;
+
+	SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
+	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
+	dsl_scan_visitbp(bp, &zb, NULL, NULL,
+	    ds, scn, DMU_OST_NONE, tx);
+
+	dprintf_ds(ds, "finished scan%s", "");
+}
+
+void
+dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	dsl_scan_t *scn = dp->dp_scan;
+	uint64_t mintxg;
+
+	if (scn->scn_phys.scn_state != DSS_SCANNING)
+		return;
+
+	if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
+		if (dsl_dataset_is_snapshot(ds)) {
+			/* Note, scn_cur_{min,max}_txg stays the same. */
+			scn->scn_phys.scn_bookmark.zb_objset =
+			    ds->ds_phys->ds_next_snap_obj;
+			zfs_dbgmsg("destroying ds %llu; currently traversing; "
+			    "reset zb_objset to %llu",
+			    (u_longlong_t)ds->ds_object,
+			    (u_longlong_t)ds->ds_phys->ds_next_snap_obj);
+			scn->scn_phys.scn_flags |= DSF_VISIT_DS_AGAIN;
+		} else {
+			SET_BOOKMARK(&scn->scn_phys.scn_bookmark,
+			    ZB_DESTROYED_OBJSET, 0, 0, 0);
+			zfs_dbgmsg("destroying ds %llu; currently traversing; "
+			    "reset bookmark to -1,0,0,0",
+			    (u_longlong_t)ds->ds_object);
+		}
+	} else if (zap_lookup_int_key(dp->dp_meta_objset,
+	    scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) {
+		ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
+		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
+		if (dsl_dataset_is_snapshot(ds)) {
+			/*
+			 * We keep the same mintxg; it could be >
+			 * ds_creation_txg if the previous snapshot was
+			 * deleted too.
+			 */
+			VERIFY(zap_add_int_key(dp->dp_meta_objset,
+			    scn->scn_phys.scn_queue_obj,
+			    ds->ds_phys->ds_next_snap_obj, mintxg, tx) == 0);
+			zfs_dbgmsg("destroying ds %llu; in queue; "
+			    "replacing with %llu",
+			    (u_longlong_t)ds->ds_object,
+			    (u_longlong_t)ds->ds_phys->ds_next_snap_obj);
+		} else {
+			zfs_dbgmsg("destroying ds %llu; in queue; removing",
+			    (u_longlong_t)ds->ds_object);
+		}
+	} else {
+		zfs_dbgmsg("destroying ds %llu; ignoring",
+		    (u_longlong_t)ds->ds_object);
+	}
+
+	/*
+	 * dsl_scan_sync() should be called after this, and should sync
+	 * out our changed state, but just to be safe, do it here.
+	 */
+	dsl_scan_sync_state(scn, tx);
+}
+
+void
+dsl_scan_ds_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	dsl_scan_t *scn = dp->dp_scan;
+	uint64_t mintxg;
+
+	if (scn->scn_phys.scn_state != DSS_SCANNING)
+		return;
+
+	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0);
+
+	if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
+		scn->scn_phys.scn_bookmark.zb_objset =
+		    ds->ds_phys->ds_prev_snap_obj;
+		zfs_dbgmsg("snapshotting ds %llu; currently traversing; "
+		    "reset zb_objset to %llu",
+		    (u_longlong_t)ds->ds_object,
+		    (u_longlong_t)ds->ds_phys->ds_prev_snap_obj);
+	} else if (zap_lookup_int_key(dp->dp_meta_objset,
+	    scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) {
+		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
+		VERIFY(zap_add_int_key(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj,
+		    ds->ds_phys->ds_prev_snap_obj, mintxg, tx) == 0);
+		zfs_dbgmsg("snapshotting ds %llu; in queue; "
+		    "replacing with %llu",
+		    (u_longlong_t)ds->ds_object,
+		    (u_longlong_t)ds->ds_phys->ds_prev_snap_obj);
+	}
+	dsl_scan_sync_state(scn, tx);
+}
+
+void
+dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = ds1->ds_dir->dd_pool;
+	dsl_scan_t *scn = dp->dp_scan;
+	uint64_t mintxg;
+
+	if (scn->scn_phys.scn_state != DSS_SCANNING)
+		return;
+
+	if (scn->scn_phys.scn_bookmark.zb_objset == ds1->ds_object) {
+		scn->scn_phys.scn_bookmark.zb_objset = ds2->ds_object;
+		zfs_dbgmsg("clone_swap ds %llu; currently traversing; "
+		    "reset zb_objset to %llu",
+		    (u_longlong_t)ds1->ds_object,
+		    (u_longlong_t)ds2->ds_object);
+	} else if (scn->scn_phys.scn_bookmark.zb_objset == ds2->ds_object) {
+		scn->scn_phys.scn_bookmark.zb_objset = ds1->ds_object;
+		zfs_dbgmsg("clone_swap ds %llu; currently traversing; "
+		    "reset zb_objset to %llu",
+		    (u_longlong_t)ds2->ds_object,
+		    (u_longlong_t)ds1->ds_object);
+	}
+
+	if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
+	    ds1->ds_object, &mintxg) == 0) {
+		int err;
+
+		ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg);
+		ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg);
+		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds1->ds_object, tx));
+		err = zap_add_int_key(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx);
+		VERIFY(err == 0 || err == EEXIST);
+		if (err == EEXIST) {
+			/* Both were there to begin with */
+			VERIFY(0 == zap_add_int_key(dp->dp_meta_objset,
+			    scn->scn_phys.scn_queue_obj,
+			    ds1->ds_object, mintxg, tx));
+		}
+		zfs_dbgmsg("clone_swap ds %llu; in queue; "
+		    "replacing with %llu",
+		    (u_longlong_t)ds1->ds_object,
+		    (u_longlong_t)ds2->ds_object);
+	} else if (zap_lookup_int_key(dp->dp_meta_objset,
+	    scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg) == 0) {
+		ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg);
+		ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg);
+		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds2->ds_object, tx));
+		VERIFY(0 == zap_add_int_key(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx));
+		zfs_dbgmsg("clone_swap ds %llu; in queue; "
+		    "replacing with %llu",
+		    (u_longlong_t)ds2->ds_object,
+		    (u_longlong_t)ds1->ds_object);
+	}
+
+	dsl_scan_sync_state(scn, tx);
+}
+
+struct enqueue_clones_arg {
+	dmu_tx_t *tx;
+	uint64_t originobj;
+};
+
+/* ARGSUSED */
+static int
+enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+{
+	struct enqueue_clones_arg *eca = arg;
+	dsl_dataset_t *ds;
+	int err;
+	dsl_pool_t *dp = spa->spa_dsl_pool;
+	dsl_scan_t *scn = dp->dp_scan;
+
+	err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+	if (err)
+		return (err);
+
+	if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
+		while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
+			dsl_dataset_t *prev;
+			err = dsl_dataset_hold_obj(dp,
+			    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
+
+			dsl_dataset_rele(ds, FTAG);
+			if (err)
+				return (err);
+			ds = prev;
+		}
+		VERIFY(zap_add_int_key(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds->ds_object,
+		    ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
+	}
+	dsl_dataset_rele(ds, FTAG);
+	return (0);
+}
+
+static void
+dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = scn->scn_dp;
+	dsl_dataset_t *ds;
+
+	VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
+
+	/*
+	 * Iterate over the bps in this ds.
+	 */
+	dmu_buf_will_dirty(ds->ds_dbuf, tx);
+	dsl_scan_visit_rootbp(scn, ds, &ds->ds_phys->ds_bp, tx);
+
+	char *dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP);
+	dsl_dataset_name(ds, dsname);
+	zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
+	    "pausing=%u",
+	    (longlong_t)dsobj, dsname,
+	    (longlong_t)scn->scn_phys.scn_cur_min_txg,
+	    (longlong_t)scn->scn_phys.scn_cur_max_txg,
+	    (int)scn->scn_pausing);
+	kmem_free(dsname, ZFS_MAXNAMELEN);
+
+	if (scn->scn_pausing)
+		goto out;
+
+	/*
+	 * We've finished this pass over this dataset.
+	 */
+
+	/*
+	 * If we did not completely visit this dataset, do another pass.
+	 */
+	if (scn->scn_phys.scn_flags & DSF_VISIT_DS_AGAIN) {
+		zfs_dbgmsg("incomplete pass; visiting again");
+		scn->scn_phys.scn_flags &= ~DSF_VISIT_DS_AGAIN;
+		VERIFY(zap_add_int_key(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds->ds_object,
+		    scn->scn_phys.scn_cur_max_txg, tx) == 0);
+		goto out;
+	}
+
+	/*
+	 * Add descendent datasets to work queue.
+	 */
+	if (ds->ds_phys->ds_next_snap_obj != 0) {
+		VERIFY(zap_add_int_key(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, ds->ds_phys->ds_next_snap_obj,
+		    ds->ds_phys->ds_creation_txg, tx) == 0);
+	}
+	if (ds->ds_phys->ds_num_children > 1) {
+		boolean_t usenext = B_FALSE;
+		if (ds->ds_phys->ds_next_clones_obj != 0) {
+			uint64_t count;
+			/*
+			 * A bug in a previous version of the code could
+			 * cause upgrade_clones_cb() to not set
+			 * ds_next_snap_obj when it should, leading to a
+			 * missing entry.  Therefore we can only use the
+			 * next_clones_obj when its count is correct.
+			 */
+			int err = zap_count(dp->dp_meta_objset,
+			    ds->ds_phys->ds_next_clones_obj, &count);
+			if (err == 0 &&
+			    count == ds->ds_phys->ds_num_children - 1)
+				usenext = B_TRUE;
+		}
+
+		if (usenext) {
+			VERIFY(zap_join_key(dp->dp_meta_objset,
+			    ds->ds_phys->ds_next_clones_obj,
+			    scn->scn_phys.scn_queue_obj,
+			    ds->ds_phys->ds_creation_txg, tx) == 0);
+		} else {
+			struct enqueue_clones_arg eca;
+			eca.tx = tx;
+			eca.originobj = ds->ds_object;
+
+			(void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
+			    NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
+		}
+	}
+
+out:
+	dsl_dataset_rele(ds, FTAG);
+}
+
+/* ARGSUSED */
+static int
+enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+{
+	dmu_tx_t *tx = arg;
+	dsl_dataset_t *ds;
+	int err;
+	dsl_pool_t *dp = spa->spa_dsl_pool;
+	dsl_scan_t *scn = dp->dp_scan;
+
+	err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+	if (err)
+		return (err);
+
+	while (ds->ds_phys->ds_prev_snap_obj != 0) {
+		dsl_dataset_t *prev;
+		err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
+		    FTAG, &prev);
+		if (err) {
+			dsl_dataset_rele(ds, FTAG);
+			return (err);
+		}
+
+		/*
+		 * If this is a clone, we don't need to worry about it for now.
+		 */
+		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
+			dsl_dataset_rele(ds, FTAG);
+			dsl_dataset_rele(prev, FTAG);
+			return (0);
+		}
+		dsl_dataset_rele(ds, FTAG);
+		ds = prev;
+	}
+
+	VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
+	    ds->ds_object, ds->ds_phys->ds_prev_snap_txg, tx) == 0);
+	dsl_dataset_rele(ds, FTAG);
+	return (0);
+}
+
+/*
+ * Scrub/dedup interaction.
+ *
+ * If there are N references to a deduped block, we don't want to scrub it
+ * N times -- ideally, we should scrub it exactly once.
+ *
+ * We leverage the fact that the dde's replication class (enum ddt_class)
+ * is ordered from highest replication class (DDT_CLASS_DITTO) to lowest
+ * (DDT_CLASS_UNIQUE) so that we may walk the DDT in that order.
+ *
+ * To prevent excess scrubbing, the scrub begins by walking the DDT
+ * to find all blocks with refcnt > 1, and scrubs each of these once.
+ * Since there are two replication classes which contain blocks with
+ * refcnt > 1, we scrub the highest replication class (DDT_CLASS_DITTO) first.
+ * Finally the top-down scrub begins, only visiting blocks with refcnt == 1.
+ *
+ * There would be nothing more to say if a block's refcnt couldn't change
+ * during a scrub, but of course it can so we must account for changes
+ * in a block's replication class.
+ *
+ * Here's an example of what can occur:
+ *
+ * If a block has refcnt > 1 during the DDT scrub phase, but has refcnt == 1
+ * when visited during the top-down scrub phase, it will be scrubbed twice.
+ * This negates our scrub optimization, but is otherwise harmless.
+ *
+ * If a block has refcnt == 1 during the DDT scrub phase, but has refcnt > 1
+ * on each visit during the top-down scrub phase, it will never be scrubbed.
+ * To catch this, ddt_sync_entry() notifies the scrub code whenever a block's
+ * reference class transitions to a higher level (i.e DDT_CLASS_UNIQUE to
+ * DDT_CLASS_DUPLICATE); if it transitions from refcnt == 1 to refcnt > 1
+ * while a scrub is in progress, it scrubs the block right then.
+ */
+static void
+dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
+{
+	ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark;
+	ddt_entry_t dde = { 0 };
+	int error;
+	uint64_t n = 0;
+
+	while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) {
+		ddt_t *ddt;
+
+		if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max)
+			break;
+		dprintf("visiting ddb=%llu/%llu/%llu/%llx\n",
+		    (longlong_t)ddb->ddb_class,
+		    (longlong_t)ddb->ddb_type,
+		    (longlong_t)ddb->ddb_checksum,
+		    (longlong_t)ddb->ddb_cursor);
+
+		/* There should be no pending changes to the dedup table */
+		ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum];
+		ASSERT(avl_first(&ddt->ddt_tree) == NULL);
+
+		dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
+		n++;
+
+		if (dsl_scan_check_pause(scn, NULL))
+			break;
+	}
+
+	zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u",
+	    (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max,
+	    (int)scn->scn_pausing);
+
+	ASSERT(error == 0 || error == ENOENT);
+	ASSERT(error != ENOENT ||
+	    ddb->ddb_class > scn->scn_phys.scn_ddt_class_max);
+}
+
+/* ARGSUSED */
+void
+dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
+    ddt_entry_t *dde, dmu_tx_t *tx)
+{
+	const ddt_key_t *ddk = &dde->dde_key;
+	ddt_phys_t *ddp = dde->dde_phys;
+	blkptr_t bp;
+	zbookmark_t zb = { 0 };
+
+	if (scn->scn_phys.scn_state != DSS_SCANNING)
+		return;
+
+	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+		if (ddp->ddp_phys_birth == 0 ||
+		    ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg)
+			continue;
+		ddt_bp_create(checksum, ddk, ddp, &bp);
+
+		scn->scn_visited_this_txg++;
+		scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb);
+	}
+}
+
+static void
+dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = scn->scn_dp;
+	zap_cursor_t zc;
+	zap_attribute_t za;
+
+	if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
+	    scn->scn_phys.scn_ddt_class_max) {
+		scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
+		scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
+		dsl_scan_ddt(scn, tx);
+		if (scn->scn_pausing)
+			return;
+	}
+
+	if (scn->scn_phys.scn_bookmark.zb_objset == DMU_META_OBJSET) {
+		/* First do the MOS & ORIGIN */
+
+		scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
+		scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
+		dsl_scan_visit_rootbp(scn, NULL,
+		    &dp->dp_meta_rootbp, tx);
+		spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
+		if (scn->scn_pausing)
+			return;
+
+		if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
+			VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
+			    NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
+		} else {
+			dsl_scan_visitds(scn,
+			    dp->dp_origin_snap->ds_object, tx);
+		}
+		ASSERT(!scn->scn_pausing);
+	} else if (scn->scn_phys.scn_bookmark.zb_objset !=
+	    ZB_DESTROYED_OBJSET) {
+		/*
+		 * If we were paused, continue from here.  Note if the
+		 * ds we were paused on was deleted, the zb_objset may
+		 * be -1, so we will skip this and find a new objset
+		 * below.
+		 */
+		dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx);
+		if (scn->scn_pausing)
+			return;
+	}
+
+	/*
+	 * In case we were paused right at the end of the ds, zero the
+	 * bookmark so we don't think that we're still trying to resume.
+	 */
+	bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_t));
+
+	/* keep pulling things out of the zap-object-as-queue */
+	while (zap_cursor_init(&zc, dp->dp_meta_objset,
+	    scn->scn_phys.scn_queue_obj),
+	    zap_cursor_retrieve(&zc, &za) == 0) {
+		dsl_dataset_t *ds;
+		uint64_t dsobj;
+
+		dsobj = strtonum(za.za_name, NULL);
+		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
+		    scn->scn_phys.scn_queue_obj, dsobj, tx));
+
+		/* Set up min/max txg */
+		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
+		if (za.za_first_integer != 0) {
+			scn->scn_phys.scn_cur_min_txg =
+			    MAX(scn->scn_phys.scn_min_txg,
+			    za.za_first_integer);
+		} else {
+			scn->scn_phys.scn_cur_min_txg =
+			    MAX(scn->scn_phys.scn_min_txg,
+			    ds->ds_phys->ds_prev_snap_txg);
+		}
+		scn->scn_phys.scn_cur_max_txg = dsl_scan_ds_maxtxg(ds);
+		dsl_dataset_rele(ds, FTAG);
+
+		dsl_scan_visitds(scn, dsobj, tx);
+		zap_cursor_fini(&zc);
+		if (scn->scn_pausing)
+			return;
+	}
+	zap_cursor_fini(&zc);
+}
+
+void
+dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
+{
+	dsl_scan_t *scn = dp->dp_scan;
+	spa_t *spa = dp->dp_spa;
+
+	/*
+	 * Check for scn_restart_txg before checking spa_load_state, so
+	 * that we can restart an old-style scan while the pool is being
+	 * imported (see dsl_scan_init).
+	 */
+	if (scn->scn_restart_txg != 0 &&
+	    scn->scn_restart_txg <= tx->tx_txg) {
+		pool_scan_func_t func = POOL_SCAN_SCRUB;
+		dsl_scan_done(scn, B_FALSE, tx);
+		if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
+			func = POOL_SCAN_RESILVER;
+		zfs_dbgmsg("restarting scan func=%u txg=%llu",
+		    func, tx->tx_txg);
+		dsl_scan_setup_sync(scn, &func, tx);
+	}
+
+	if (dp->dp_spa->spa_load_state != SPA_LOAD_NONE ||
+	    spa_shutting_down(spa) ||
+	    spa_sync_pass(dp->dp_spa) > 1 ||
+	    scn->scn_phys.scn_state != DSS_SCANNING)
+		return;
+
+	scn->scn_visited_this_txg = 0;
+	scn->scn_pausing = B_FALSE;
+	scn->scn_sync_start_time = gethrtime();
+	spa->spa_scrub_active = B_TRUE;
+
+	if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
+	    scn->scn_phys.scn_ddt_class_max) {
+		zfs_dbgmsg("doing scan sync txg %llu; "
+		    "ddt bm=%llu/%llu/%llu/%llx",
+		    (longlong_t)tx->tx_txg,
+		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
+		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
+		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
+		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
+		ASSERT(scn->scn_phys.scn_bookmark.zb_objset == 0);
+		ASSERT(scn->scn_phys.scn_bookmark.zb_object == 0);
+		ASSERT(scn->scn_phys.scn_bookmark.zb_level == 0);
+		ASSERT(scn->scn_phys.scn_bookmark.zb_blkid == 0);
+	} else {
+		zfs_dbgmsg("doing scan sync txg %llu; bm=%llu/%llu/%llu/%llu",
+		    (longlong_t)tx->tx_txg,
+		    (longlong_t)scn->scn_phys.scn_bookmark.zb_objset,
+		    (longlong_t)scn->scn_phys.scn_bookmark.zb_object,
+		    (longlong_t)scn->scn_phys.scn_bookmark.zb_level,
+		    (longlong_t)scn->scn_phys.scn_bookmark.zb_blkid);
+	}
+
+	scn->scn_prefetch_zio_root = zio_root(dp->dp_spa, NULL,
+	    NULL, ZIO_FLAG_CANFAIL);
+	dsl_scan_visit(scn, tx);
+	(void) zio_wait(scn->scn_prefetch_zio_root);
+	scn->scn_prefetch_zio_root = NULL;
+
+	zfs_dbgmsg("visited %llu blocks in %llums",
+	    (longlong_t)scn->scn_visited_this_txg,
+	    (longlong_t)(gethrtime() - scn->scn_sync_start_time) / MICROSEC);
+
+	if (!scn->scn_pausing) {
+		/* finished with scan. */
+		zfs_dbgmsg("finished scan txg %llu", (longlong_t)tx->tx_txg);
+		dsl_scan_done(scn, B_TRUE, tx);
+	}
+
+	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
+		mutex_enter(&spa->spa_scrub_lock);
+		while (spa->spa_scrub_inflight > 0) {
+			cv_wait(&spa->spa_scrub_io_cv,
+			    &spa->spa_scrub_lock);
+		}
+		mutex_exit(&spa->spa_scrub_lock);
+	}
+
+	dsl_scan_sync_state(scn, tx);
+}
+
+/*
+ * This will start a new scan, or restart an existing one.
+ */
+void
+dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
+{
+	if (txg == 0) {
+		dmu_tx_t *tx;
+		tx = dmu_tx_create_dd(dp->dp_mos_dir);
+		VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
+
+		txg = dmu_tx_get_txg(tx);
+		dp->dp_scan->scn_restart_txg = txg;
+		dmu_tx_commit(tx);
+	} else {
+		dp->dp_scan->scn_restart_txg = txg;
+	}
+	zfs_dbgmsg("restarting resilver txg=%llu", txg);
+}
+
+boolean_t
+dsl_scan_resilvering(dsl_pool_t *dp)
+{
+	return (dp->dp_scan->scn_phys.scn_state == DSS_SCANNING &&
+	    dp->dp_scan->scn_phys.scn_func == POOL_SCAN_RESILVER);
+}
+
+/*
+ * scrub consumers
+ */
+
+static void
+count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
+{
+	int i;
+
+	/*
+	 * If we resume after a reboot, zab will be NULL; don't record
+	 * incomplete stats in that case.
+	 */
+	if (zab == NULL)
+		return;
+
+	for (i = 0; i < 4; i++) {
+		int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
+		int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
+		zfs_blkstat_t *zb = &zab->zab_type[l][t];
+		int equal;
+
+		zb->zb_count++;
+		zb->zb_asize += BP_GET_ASIZE(bp);
+		zb->zb_lsize += BP_GET_LSIZE(bp);
+		zb->zb_psize += BP_GET_PSIZE(bp);
+		zb->zb_gangs += BP_COUNT_GANG(bp);
+
+		switch (BP_GET_NDVAS(bp)) {
+		case 2:
+			if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
+			    DVA_GET_VDEV(&bp->blk_dva[1]))
+				zb->zb_ditto_2_of_2_samevdev++;
+			break;
+		case 3:
+			equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
+			    DVA_GET_VDEV(&bp->blk_dva[1])) +
+			    (DVA_GET_VDEV(&bp->blk_dva[0]) ==
+			    DVA_GET_VDEV(&bp->blk_dva[2])) +
+			    (DVA_GET_VDEV(&bp->blk_dva[1]) ==
+			    DVA_GET_VDEV(&bp->blk_dva[2]));
+			if (equal == 1)
+				zb->zb_ditto_2_of_3_samevdev++;
+			else if (equal == 3)
+				zb->zb_ditto_3_of_3_samevdev++;
+			break;
+		}
+	}
+}
+
+static void
+dsl_scan_scrub_done(zio_t *zio)
+{
+	spa_t *spa = zio->io_spa;
+
+	zio_data_buf_free(zio->io_data, zio->io_size);
+
+	mutex_enter(&spa->spa_scrub_lock);
+	spa->spa_scrub_inflight--;
+	cv_broadcast(&spa->spa_scrub_io_cv);
+
+	if (zio->io_error && (zio->io_error != ECKSUM ||
+	    !(zio->io_flags & ZIO_FLAG_SPECULATIVE))) {
+		spa->spa_dsl_pool->dp_scan->scn_phys.scn_errors++;
+	}
+	mutex_exit(&spa->spa_scrub_lock);
+}
+
+static int
+dsl_scan_scrub_cb(dsl_pool_t *dp,
+    const blkptr_t *bp, const zbookmark_t *zb)
+{
+	dsl_scan_t *scn = dp->dp_scan;
+	size_t size = BP_GET_PSIZE(bp);
+	spa_t *spa = dp->dp_spa;
+	uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp);
+	boolean_t needs_io;
+	int zio_flags = ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
+	int zio_priority;
+
+	if (phys_birth <= scn->scn_phys.scn_min_txg ||
+	    phys_birth >= scn->scn_phys.scn_max_txg)
+		return (0);
+
+	count_block(dp->dp_blkstats, bp);
+
+	ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn));
+	if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) {
+		zio_flags |= ZIO_FLAG_SCRUB;
+		zio_priority = ZIO_PRIORITY_SCRUB;
+		needs_io = B_TRUE;
+	} else if (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) {
+		zio_flags |= ZIO_FLAG_RESILVER;
+		zio_priority = ZIO_PRIORITY_RESILVER;
+		needs_io = B_FALSE;
+	}
+
+	/* If it's an intent log block, failure is expected. */
+	if (zb->zb_level == ZB_ZIL_LEVEL)
+		zio_flags |= ZIO_FLAG_SPECULATIVE;
+
+	for (int d = 0; d < BP_GET_NDVAS(bp); d++) {
+		vdev_t *vd = vdev_lookup_top(spa,
+		    DVA_GET_VDEV(&bp->blk_dva[d]));
+
+		/*
+		 * Keep track of how much data we've examined so that
+		 * zpool(1M) status can make useful progress reports.
+		 */
+		scn->scn_phys.scn_examined += DVA_GET_ASIZE(&bp->blk_dva[d]);
+		spa->spa_scan_pass_exam += DVA_GET_ASIZE(&bp->blk_dva[d]);
+
+		/* if it's a resilver, this may not be in the target range */
+		if (!needs_io) {
+			if (DVA_GET_GANG(&bp->blk_dva[d])) {
+				/*
+				 * Gang members may be spread across multiple
+				 * vdevs, so the best estimate we have is the
+				 * scrub range, which has already been checked.
+				 * XXX -- it would be better to change our
+				 * allocation policy to ensure that all
+				 * gang members reside on the same vdev.
+				 */
+				needs_io = B_TRUE;
+			} else {
+				needs_io = vdev_dtl_contains(vd, DTL_PARTIAL,
+				    phys_birth, 1);
+			}
+		}
+	}
+
+	if (needs_io && !zfs_no_scrub_io) {
+		void *data = zio_data_buf_alloc(size);
+
+		mutex_enter(&spa->spa_scrub_lock);
+		while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight)
+			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
+		spa->spa_scrub_inflight++;
+		mutex_exit(&spa->spa_scrub_lock);
+
+		zio_nowait(zio_read(NULL, spa, bp, data, size,
+		    dsl_scan_scrub_done, NULL, zio_priority,
+		    zio_flags, zb));
+	}
+
+	/* do not relocate this block */
+	return (0);
+}
+
+int
+dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
+{
+	spa_t *spa = dp->dp_spa;
+
+	/*
+	 * Purge all vdev caches and probe all devices.  We do this here
+	 * rather than in sync context because this requires a writer lock
+	 * on the spa_config lock, which we can't do from sync context.  The
+	 * spa_scrub_reopen flag indicates that vdev_open() should not
+	 * attempt to start another scrub.
+	 */
+	spa_vdev_state_enter(spa, SCL_NONE);
+	spa->spa_scrub_reopen = B_TRUE;
+	vdev_reopen(spa->spa_root_vdev);
+	spa->spa_scrub_reopen = B_FALSE;
+	(void) spa_vdev_state_exit(spa, NULL, 0);
+
+	return (dsl_sync_task_do(dp, dsl_scan_setup_check,
+	    dsl_scan_setup_sync, dp->dp_scan, &func, 0));
+}

--- a/usr/src/uts/common/fs/zfs/dsl_scrub.c	Mon May 03 11:16:14 2010 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1214 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include <sys/dsl_pool.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_prop.h>
-#include <sys/dsl_dir.h>
-#include <sys/dsl_synctask.h>
-#include <sys/dnode.h>
-#include <sys/dmu_tx.h>
-#include <sys/dmu_objset.h>
-#include <sys/arc.h>
-#include <sys/zap.h>
-#include <sys/zio.h>
-#include <sys/zfs_context.h>
-#include <sys/fs/zfs.h>
-#include <sys/zfs_znode.h>
-#include <sys/spa_impl.h>
-#include <sys/vdev_impl.h>
-#include <sys/zil_impl.h>
-#include <sys/zio_checksum.h>
-#include <sys/ddt.h>
-#include <sys/sa.h>
-#include <sys/sa_impl.h>
-
-typedef int (scrub_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
-
-static scrub_cb_t dsl_pool_scrub_clean_cb;
-static dsl_syncfunc_t dsl_pool_scrub_cancel_sync;
-static void scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
-    uint64_t objset, uint64_t object);
-
-int zfs_scrub_min_time_ms = 1000; /* min millisecs to scrub per txg */
-int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */
-boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
-boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */
-enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
-
-extern int zfs_txg_timeout;
-
-static scrub_cb_t *scrub_funcs[SCRUB_FUNC_NUMFUNCS] = {
-	NULL,
-	dsl_pool_scrub_clean_cb
-};
-
-/* ARGSUSED */
-static void
-dsl_pool_scrub_setup_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
-{
-	dsl_pool_t *dp = arg1;
-	enum scrub_func *funcp = arg2;
-	dmu_object_type_t ot = 0;
-	boolean_t complete = B_FALSE;
-
-	dsl_pool_scrub_cancel_sync(dp, &complete, cr, tx);
-
-	ASSERT(dp->dp_scrub_func == SCRUB_FUNC_NONE);
-	ASSERT(*funcp > SCRUB_FUNC_NONE);
-	ASSERT(*funcp < SCRUB_FUNC_NUMFUNCS);
-
-	dp->dp_scrub_min_txg = 0;
-	dp->dp_scrub_max_txg = tx->tx_txg;
-	dp->dp_scrub_ddt_class_max = zfs_scrub_ddt_class_max;
-
-	if (*funcp == SCRUB_FUNC_CLEAN) {
-		vdev_t *rvd = dp->dp_spa->spa_root_vdev;
-
-		/* rewrite all disk labels */
-		vdev_config_dirty(rvd);
-
-		if (vdev_resilver_needed(rvd,
-		    &dp->dp_scrub_min_txg, &dp->dp_scrub_max_txg)) {
-			spa_event_notify(dp->dp_spa, NULL,
-			    ESC_ZFS_RESILVER_START);
-			dp->dp_scrub_max_txg = MIN(dp->dp_scrub_max_txg,
-			    tx->tx_txg);
-		} else {
-			spa_event_notify(dp->dp_spa, NULL,
-			    ESC_ZFS_SCRUB_START);
-		}
-
-		/* zero out the scrub stats in all vdev_stat_t's */
-		vdev_scrub_stat_update(rvd,
-		    dp->dp_scrub_min_txg ? POOL_SCRUB_RESILVER :
-		    POOL_SCRUB_EVERYTHING, B_FALSE);
-
-		/*
-		 * If this is an incremental scrub, limit the DDT scrub phase
-		 * to just the auto-ditto class (for correctness); the rest
-		 * of the scrub should go faster using top-down pruning.
-		 */
-		if (dp->dp_scrub_min_txg > TXG_INITIAL)
-			dp->dp_scrub_ddt_class_max = DDT_CLASS_DITTO;
-
-		dp->dp_spa->spa_scrub_started = B_TRUE;
-	}
-
-	/* back to the generic stuff */
-
-	if (dp->dp_blkstats == NULL) {
-		dp->dp_blkstats =
-		    kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP);
-	}
-	bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
-
-	if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB)
-		ot = DMU_OT_ZAP_OTHER;
-
-	dp->dp_scrub_func = *funcp;
-	dp->dp_scrub_queue_obj = zap_create(dp->dp_meta_objset,
-	    ot ? ot : DMU_OT_SCRUB_QUEUE, DMU_OT_NONE, 0, tx);
-	bzero(&dp->dp_scrub_bookmark, sizeof (zbookmark_t));
-	bzero(&dp->dp_scrub_ddt_bookmark, sizeof (ddt_bookmark_t));
-	dp->dp_scrub_restart = B_FALSE;
-	dp->dp_spa->spa_scrub_errors = 0;
-
-	VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_FUNC, sizeof (uint32_t), 1,
-	    &dp->dp_scrub_func, tx));
-	VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_QUEUE, sizeof (uint64_t), 1,
-	    &dp->dp_scrub_queue_obj, tx));
-	VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_MIN_TXG, sizeof (uint64_t), 1,
-	    &dp->dp_scrub_min_txg, tx));
-	VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_MAX_TXG, sizeof (uint64_t), 1,
-	    &dp->dp_scrub_max_txg, tx));
-	VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_BOOKMARK, sizeof (uint64_t),
-	    sizeof (dp->dp_scrub_bookmark) / sizeof (uint64_t),
-	    &dp->dp_scrub_bookmark, tx));
-	VERIFY(0 == zap_update(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_DDT_BOOKMARK, sizeof (uint64_t),
-	    sizeof (dp->dp_scrub_ddt_bookmark) / sizeof (uint64_t),
-	    &dp->dp_scrub_ddt_bookmark, tx));
-	VERIFY(0 == zap_update(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_DDT_CLASS_MAX, sizeof (uint64_t), 1,
-	    &dp->dp_scrub_ddt_class_max, tx));
-	VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1,
-	    &dp->dp_spa->spa_scrub_errors, tx));
-
-	spa_history_internal_log(LOG_POOL_SCRUB, dp->dp_spa, tx, cr,
-	    "func=%u mintxg=%llu maxtxg=%llu",
-	    *funcp, dp->dp_scrub_min_txg, dp->dp_scrub_max_txg);
-}
-
-int
-dsl_pool_scrub_setup(dsl_pool_t *dp, enum scrub_func func)
-{
-	return (dsl_sync_task_do(dp, NULL,
-	    dsl_pool_scrub_setup_sync, dp, &func, 0));
-}
-
-/* ARGSUSED */
-static void
-dsl_pool_scrub_cancel_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
-{
-	dsl_pool_t *dp = arg1;
-	boolean_t *completep = arg2;
-
-	if (dp->dp_scrub_func == SCRUB_FUNC_NONE)
-		return;
-
-	mutex_enter(&dp->dp_scrub_cancel_lock);
-
-	if (dp->dp_scrub_restart) {
-		dp->dp_scrub_restart = B_FALSE;
-		*completep = B_FALSE;
-	}
-
-	/* XXX this is scrub-clean specific */
-	mutex_enter(&dp->dp_spa->spa_scrub_lock);
-	while (dp->dp_spa->spa_scrub_inflight > 0) {
-		cv_wait(&dp->dp_spa->spa_scrub_io_cv,
-		    &dp->dp_spa->spa_scrub_lock);
-	}
-	mutex_exit(&dp->dp_spa->spa_scrub_lock);
-	dp->dp_spa->spa_scrub_active = B_FALSE;
-
-	dp->dp_scrub_func = SCRUB_FUNC_NONE;
-	VERIFY(0 == dmu_object_free(dp->dp_meta_objset,
-	    dp->dp_scrub_queue_obj, tx));
-	dp->dp_scrub_queue_obj = 0;
-	bzero(&dp->dp_scrub_bookmark, sizeof (zbookmark_t));
-	bzero(&dp->dp_scrub_ddt_bookmark, sizeof (ddt_bookmark_t));
-
-	VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_QUEUE, tx));
-	VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_MIN_TXG, tx));
-	VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_MAX_TXG, tx));
-	VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_BOOKMARK, tx));
-	VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_FUNC, tx));
-	VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_ERRORS, tx));
-
-	(void) zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_DDT_BOOKMARK, tx);
-	(void) zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_DDT_CLASS_MAX, tx);
-
-	spa_history_internal_log(LOG_POOL_SCRUB_DONE, dp->dp_spa, tx, cr,
-	    "complete=%u", *completep);
-
-	/* below is scrub-clean specific */
-	vdev_scrub_stat_update(dp->dp_spa->spa_root_vdev, POOL_SCRUB_NONE,
-	    *completep);
-	/*
-	 * If the scrub/resilver completed, update all DTLs to reflect this.
-	 * Whether it succeeded or not, vacate all temporary scrub DTLs.
-	 */
-	vdev_dtl_reassess(dp->dp_spa->spa_root_vdev, tx->tx_txg,
-	    *completep ? dp->dp_scrub_max_txg : 0, B_TRUE);
-	dp->dp_spa->spa_scrub_started = B_FALSE;
-	if (*completep)
-		spa_event_notify(dp->dp_spa, NULL, dp->dp_scrub_min_txg ?
-		    ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
-	spa_errlog_rotate(dp->dp_spa);
-
-	/*
-	 * We may have finished replacing a device.
-	 * Let the async thread assess this and handle the detach.
-	 */
-	spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER_DONE);
-
-	dp->dp_scrub_min_txg = dp->dp_scrub_max_txg = 0;
-	mutex_exit(&dp->dp_scrub_cancel_lock);
-}
-
-int
-dsl_pool_scrub_cancel(dsl_pool_t *dp)
-{
-	boolean_t complete = B_FALSE;
-
-	return (dsl_sync_task_do(dp, NULL,
-	    dsl_pool_scrub_cancel_sync, dp, &complete, 3));
-}
-
-void
-dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
-{
-	/*
-	 * This function will be used by bp-rewrite wad to intercept frees.
-	 */
-	zio_free(dp->dp_spa, txg, bpp);
-}
-
-void
-dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
-{
-	ASSERT(dsl_pool_sync_context(dp));
-	zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags));
-}
-
-static boolean_t
-bookmark_is_zero(const zbookmark_t *zb)
-{
-	return (zb->zb_objset == 0 && zb->zb_object == 0 &&
-	    zb->zb_level == 0 && zb->zb_blkid == 0);
-}
-
-/* dnp is the dnode for zb1->zb_object */
-static boolean_t
-bookmark_is_before(dnode_phys_t *dnp, const zbookmark_t *zb1,
-    const zbookmark_t *zb2)
-{
-	uint64_t zb1nextL0, zb2thisobj;
-
-	ASSERT(zb1->zb_objset == zb2->zb_objset);
-	ASSERT(zb1->zb_object != DMU_DEADLIST_OBJECT);
-	ASSERT(zb2->zb_level == 0);
-
-	/*
-	 * A bookmark in the deadlist is considered to be after
-	 * everything else.
-	 */
-	if (zb2->zb_object == DMU_DEADLIST_OBJECT)
-		return (B_TRUE);
-
-	/* The objset_phys_t isn't before anything. */
-	if (dnp == NULL)
-		return (B_FALSE);
-
-	zb1nextL0 = (zb1->zb_blkid + 1) <<
-	    ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
-
-	zb2thisobj = zb2->zb_object ? zb2->zb_object :
-	    zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
-
-	if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
-		uint64_t nextobj = zb1nextL0 *
-		    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
-		return (nextobj <= zb2thisobj);
-	}
-
-	if (zb1->zb_object < zb2thisobj)
-		return (B_TRUE);
-	if (zb1->zb_object > zb2thisobj)
-		return (B_FALSE);
-	if (zb2->zb_object == DMU_META_DNODE_OBJECT)
-		return (B_FALSE);
-	return (zb1nextL0 <= zb2->zb_blkid);
-}
-
-static boolean_t
-scrub_pause(dsl_pool_t *dp, const zbookmark_t *zb, const ddt_bookmark_t *ddb)
-{
-	uint64_t elapsed_nanosecs;
-	int mintime;
-
-	if (dp->dp_scrub_pausing)
-		return (B_TRUE); /* we're already pausing */
-
-	if (!bookmark_is_zero(&dp->dp_scrub_bookmark))
-		return (B_FALSE); /* we're resuming */
-
-	/* We only know how to resume from level-0 blocks. */
-	if (zb != NULL && zb->zb_level != 0)
-		return (B_FALSE);
-
-	mintime = dp->dp_scrub_isresilver ? zfs_resilver_min_time_ms :
-	    zfs_scrub_min_time_ms;
-	elapsed_nanosecs = gethrtime() - dp->dp_scrub_start_time;
-	if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
-	    (elapsed_nanosecs / MICROSEC > mintime && txg_sync_waiting(dp))) {
-		if (zb) {
-			dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",
-			    (longlong_t)zb->zb_objset,
-			    (longlong_t)zb->zb_object,
-			    (longlong_t)zb->zb_level,
-			    (longlong_t)zb->zb_blkid);
-			dp->dp_scrub_bookmark = *zb;
-		}
-		if (ddb) {
-			dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n",
-			    (longlong_t)ddb->ddb_class,
-			    (longlong_t)ddb->ddb_type,
-			    (longlong_t)ddb->ddb_checksum,
-			    (longlong_t)ddb->ddb_cursor);
-			ASSERT(&dp->dp_scrub_ddt_bookmark == ddb);
-		}
-		dp->dp_scrub_pausing = B_TRUE;
-		return (B_TRUE);
-	}
-	return (B_FALSE);
-}
-
-typedef struct zil_traverse_arg {
-	dsl_pool_t	*zta_dp;
-	zil_header_t	*zta_zh;
-} zil_traverse_arg_t;
-
-/* ARGSUSED */
-static int
-traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
-{
-	zil_traverse_arg_t *zta = arg;
-	dsl_pool_t *dp = zta->zta_dp;
-	zil_header_t *zh = zta->zta_zh;
-	zbookmark_t zb;
-
-	if (bp->blk_birth <= dp->dp_scrub_min_txg)
-		return (0);
-
-	/*
-	 * One block ("stubby") can be allocated a long time ago; we
-	 * want to visit that one because it has been allocated
-	 * (on-disk) even if it hasn't been claimed (even though for
-	 * plain scrub there's nothing to do to it).
-	 */
-	if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(dp->dp_spa))
-		return (0);
-
-	SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
-	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
-
-	VERIFY(0 == scrub_funcs[dp->dp_scrub_func](dp, bp, &zb));
-	return (0);
-}
-
-/* ARGSUSED */
-static int
-traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
-{
-	if (lrc->lrc_txtype == TX_WRITE) {
-		zil_traverse_arg_t *zta = arg;
-		dsl_pool_t *dp = zta->zta_dp;
-		zil_header_t *zh = zta->zta_zh;
-		lr_write_t *lr = (lr_write_t *)lrc;
-		blkptr_t *bp = &lr->lr_blkptr;
-		zbookmark_t zb;
-
-		if (bp->blk_birth <= dp->dp_scrub_min_txg)
-			return (0);
-
-		/*
-		 * birth can be < claim_txg if this record's txg is
-		 * already txg sync'ed (but this log block contains
-		 * other records that are not synced)
-		 */
-		if (claim_txg == 0 || bp->blk_birth < claim_txg)
-			return (0);
-
-		SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
-		    lr->lr_foid, ZB_ZIL_LEVEL,
-		    lr->lr_offset / BP_GET_LSIZE(bp));
-
-		VERIFY(0 == scrub_funcs[dp->dp_scrub_func](dp, bp, &zb));
-	}
-	return (0);
-}
-
-static void
-traverse_zil(dsl_pool_t *dp, zil_header_t *zh)
-{
-	uint64_t claim_txg = zh->zh_claim_txg;
-	zil_traverse_arg_t zta = { dp, zh };
-	zilog_t *zilog;
-
-	/*
-	 * We only want to visit blocks that have been claimed but not yet
-	 * replayed (or, in read-only mode, blocks that *would* be claimed).
-	 */
-	if (claim_txg == 0 && spa_writeable(dp->dp_spa))
-		return;
-
-	zilog = zil_alloc(dp->dp_meta_objset, zh);
-
-	(void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, &zta,
-	    claim_txg);
-
-	zil_free(zilog);
-}
-
-static void
-scrub_prefetch(dsl_pool_t *dp, arc_buf_t *buf, blkptr_t *bp, uint64_t objset,
-    uint64_t object, uint64_t blkid)
-{
-	zbookmark_t czb;
-	uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
-
-	if (zfs_no_scrub_prefetch)
-		return;
-
-	if (BP_IS_HOLE(bp) || bp->blk_birth <= dp->dp_scrub_min_txg ||
-	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE))
-		return;
-
-	SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);
-
-	(void) arc_read(dp->dp_scrub_prefetch_zio_root, dp->dp_spa, bp,
-	    buf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
-	    &flags, &czb);
-}
-
-static void
-scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
-    arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
-{
-	int err;
-	arc_buf_t *buf = NULL;
-
-	if (bp->blk_birth <= dp->dp_scrub_min_txg)
-		return;
-
-	if (scrub_pause(dp, zb, NULL))
-		return;
-
-	if (!bookmark_is_zero(&dp->dp_scrub_bookmark)) {
-		/*
-		 * If we already visited this bp & everything below (in
-		 * a prior txg), don't bother doing it again.
-		 */
-		if (bookmark_is_before(dnp, zb, &dp->dp_scrub_bookmark))
-			return;
-
-		/*
-		 * If we found the block we're trying to resume from, or
-		 * we went past it to a different object, zero it out to
-		 * indicate that it's OK to start checking for pausing
-		 * again.
-		 */
-		if (bcmp(zb, &dp->dp_scrub_bookmark, sizeof (*zb)) == 0 ||
-		    zb->zb_object > dp->dp_scrub_bookmark.zb_object) {
-			dprintf("resuming at %llx/%llx/%llx/%llx\n",
-			    (longlong_t)zb->zb_objset,
-			    (longlong_t)zb->zb_object,
-			    (longlong_t)zb->zb_level,
-			    (longlong_t)zb->zb_blkid);
-			bzero(&dp->dp_scrub_bookmark, sizeof (*zb));
-		}
-	}
-
-	/*
-	 * If dsl_pool_scrub_ddt() has aready scrubbed this block,
-	 * don't scrub it again.
-	 */
-	if (!ddt_class_contains(dp->dp_spa, dp->dp_scrub_ddt_class_max, bp))
-		(void) scrub_funcs[dp->dp_scrub_func](dp, bp, zb);
-
-	if (BP_GET_LEVEL(bp) > 0) {
-		uint32_t flags = ARC_WAIT;
-		int i;
-		blkptr_t *cbp;
-		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
-
-		err = arc_read(NULL, dp->dp_spa, bp, pbuf,
-		    arc_getbuf_func, &buf,
-		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
-		if (err) {
-			mutex_enter(&dp->dp_spa->spa_scrub_lock);
-			dp->dp_spa->spa_scrub_errors++;
-			mutex_exit(&dp->dp_spa->spa_scrub_lock);
-			return;
-		}
-		for (i = 0, cbp = buf->b_data; i < epb; i++, cbp++) {
-			scrub_prefetch(dp, buf, cbp, zb->zb_objset,
-			    zb->zb_object, zb->zb_blkid * epb + i);
-		}
-		for (i = 0, cbp = buf->b_data; i < epb; i++, cbp++) {
-			zbookmark_t czb;
-
-			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
-			    zb->zb_level - 1,
-			    zb->zb_blkid * epb + i);
-			scrub_visitbp(dp, dnp, buf, cbp, &czb);
-		}
-	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
-		uint32_t flags = ARC_WAIT;
-		dnode_phys_t *cdnp;
-		int i, j;
-		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
-
-		err = arc_read(NULL, dp->dp_spa, bp, pbuf,
-		    arc_getbuf_func, &buf,
-		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
-		if (err) {
-			mutex_enter(&dp->dp_spa->spa_scrub_lock);
-			dp->dp_spa->spa_scrub_errors++;
-			mutex_exit(&dp->dp_spa->spa_scrub_lock);
-			return;
-		}
-		for (i = 0, cdnp = buf->b_data; i < epb; i++, cdnp++) {
-			for (j = 0; j < cdnp->dn_nblkptr; j++) {
-				blkptr_t *cbp = &cdnp->dn_blkptr[j];
-				scrub_prefetch(dp, buf, cbp, zb->zb_objset,
-				    zb->zb_blkid * epb + i, j);
-			}
-		}
-		for (i = 0, cdnp = buf->b_data; i < epb; i++, cdnp++) {
-			scrub_visitdnode(dp, cdnp, buf, zb->zb_objset,
-			    zb->zb_blkid * epb + i);
-		}
-	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
-		uint32_t flags = ARC_WAIT;
-		objset_phys_t *osp;
-
-		err = arc_read_nolock(NULL, dp->dp_spa, bp,
-		    arc_getbuf_func, &buf,
-		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
-		if (err) {
-			mutex_enter(&dp->dp_spa->spa_scrub_lock);
-			dp->dp_spa->spa_scrub_errors++;
-			mutex_exit(&dp->dp_spa->spa_scrub_lock);
-			return;
-		}
-
-		osp = buf->b_data;
-
-		traverse_zil(dp, &osp->os_zil_header);
-
-		scrub_visitdnode(dp, &osp->os_meta_dnode,
-		    buf, zb->zb_objset, DMU_META_DNODE_OBJECT);
-		if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
-			scrub_visitdnode(dp, &osp->os_userused_dnode,
-			    buf, zb->zb_objset, DMU_USERUSED_OBJECT);
-			scrub_visitdnode(dp, &osp->os_groupused_dnode,
-			    buf, zb->zb_objset, DMU_GROUPUSED_OBJECT);
-		}
-	}
-
-	if (buf)
-		(void) arc_buf_remove_ref(buf, &buf);
-}
-
-static void
-scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
-    uint64_t objset, uint64_t object)
-{
-	int j;
-
-	for (j = 0; j < dnp->dn_nblkptr; j++) {
-		zbookmark_t czb;
-
-		SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
-		scrub_visitbp(dp, dnp, buf, &dnp->dn_blkptr[j], &czb);
-
-		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
-			zbookmark_t czb;
-			SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
-			scrub_visitbp(dp, dnp, buf, &dnp->dn_spill, &czb);
-		}
-	}
-}
-
-static void
-scrub_visit_rootbp(dsl_pool_t *dp, dsl_dataset_t *ds, blkptr_t *bp)
-{
-	zbookmark_t zb;
-
-	SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
-	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
-	scrub_visitbp(dp, NULL, NULL, bp, &zb);
-}
-
-void
-dsl_pool_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
-{
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
-	if (dp->dp_scrub_func == SCRUB_FUNC_NONE)
-		return;
-
-	if (dp->dp_scrub_bookmark.zb_objset == ds->ds_object) {
-		SET_BOOKMARK(&dp->dp_scrub_bookmark,
-		    ZB_DESTROYED_OBJSET, 0, 0, 0);
-	} else if (zap_remove_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
-	    ds->ds_object, tx) != 0) {
-		return;
-	}
-
-	if (ds->ds_phys->ds_next_snap_obj != 0) {
-		VERIFY(zap_add_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
-		    ds->ds_phys->ds_next_snap_obj, tx) == 0);
-	}
-	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
-}
-
-void
-dsl_pool_ds_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx)
-{
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
-	if (dp->dp_scrub_func == SCRUB_FUNC_NONE)
-		return;
-
-	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0);
-
-	if (dp->dp_scrub_bookmark.zb_objset == ds->ds_object) {
-		dp->dp_scrub_bookmark.zb_objset =
-		    ds->ds_phys->ds_prev_snap_obj;
-	} else if (zap_remove_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
-	    ds->ds_object, tx) == 0) {
-		VERIFY(zap_add_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
-		    ds->ds_phys->ds_prev_snap_obj, tx) == 0);
-	}
-}
-
-void
-dsl_pool_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx)
-{
-	dsl_pool_t *dp = ds1->ds_dir->dd_pool;
-
-	if (dp->dp_scrub_func == SCRUB_FUNC_NONE)
-		return;
-
-	if (dp->dp_scrub_bookmark.zb_objset == ds1->ds_object) {
-		dp->dp_scrub_bookmark.zb_objset = ds2->ds_object;
-	} else if (dp->dp_scrub_bookmark.zb_objset == ds2->ds_object) {
-		dp->dp_scrub_bookmark.zb_objset = ds1->ds_object;
-	}
-
-	if (zap_remove_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
-	    ds1->ds_object, tx) == 0) {
-		int err = zap_add_int(dp->dp_meta_objset,
-		    dp->dp_scrub_queue_obj, ds2->ds_object, tx);
-		VERIFY(err == 0 || err == EEXIST);
-		if (err == EEXIST) {
-			/* Both were there to begin with */
-			VERIFY(0 == zap_add_int(dp->dp_meta_objset,
-			    dp->dp_scrub_queue_obj, ds1->ds_object, tx));
-		}
-	} else if (zap_remove_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
-	    ds2->ds_object, tx) == 0) {
-		VERIFY(0 == zap_add_int(dp->dp_meta_objset,
-		    dp->dp_scrub_queue_obj, ds1->ds_object, tx));
-	}
-}
-
-struct enqueue_clones_arg {
-	dmu_tx_t *tx;
-	uint64_t originobj;
-};
-
-/* ARGSUSED */
-static int
-enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
-{
-	struct enqueue_clones_arg *eca = arg;
-	dsl_dataset_t *ds;
-	int err;
-	dsl_pool_t *dp;
-
-	err = dsl_dataset_hold_obj(spa->spa_dsl_pool, dsobj, FTAG, &ds);
-	if (err)
-		return (err);
-	dp = ds->ds_dir->dd_pool;
-
-	if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
-		while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
-			dsl_dataset_t *prev;
-			err = dsl_dataset_hold_obj(dp,
-			    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
-
-			dsl_dataset_rele(ds, FTAG);
-			if (err)
-				return (err);
-			ds = prev;
-		}
-		VERIFY(zap_add_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
-		    ds->ds_object, eca->tx) == 0);
-	}
-	dsl_dataset_rele(ds, FTAG);
-	return (0);
-}
-
-static void
-scrub_visitds(dsl_pool_t *dp, uint64_t dsobj, dmu_tx_t *tx)
-{
-	dsl_dataset_t *ds;
-	uint64_t min_txg_save;
-
-	VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
-
-	/*
-	 * Iterate over the bps in this ds.
-	 */
-	min_txg_save = dp->dp_scrub_min_txg;
-	dp->dp_scrub_min_txg =
-	    MAX(dp->dp_scrub_min_txg, ds->ds_phys->ds_prev_snap_txg);
-	scrub_visit_rootbp(dp, ds, &ds->ds_phys->ds_bp);
-	dp->dp_scrub_min_txg = min_txg_save;
-
-	if (dp->dp_scrub_pausing)
-		goto out;
-
-	/*
-	 * Add descendent datasets to work queue.
-	 */
-	if (ds->ds_phys->ds_next_snap_obj != 0) {
-		VERIFY(zap_add_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
-		    ds->ds_phys->ds_next_snap_obj, tx) == 0);
-	}
-	if (ds->ds_phys->ds_num_children > 1) {
-		boolean_t usenext = B_FALSE;
-		if (ds->ds_phys->ds_next_clones_obj != 0) {
-			uint64_t count;
-			/*
-			 * A bug in a previous version of the code could
-			 * cause upgrade_clones_cb() to not set
-			 * ds_next_snap_obj when it should, leading to a
-			 * missing entry.  Therefore we can only use the
-			 * next_clones_obj when its count is correct.
-			 */
-			int err = zap_count(dp->dp_meta_objset,
-			    ds->ds_phys->ds_next_clones_obj, &count);
-			if (err == 0 &&
-			    count == ds->ds_phys->ds_num_children - 1)
-				usenext = B_TRUE;
-		}
-
-		if (usenext) {
-			VERIFY(zap_join(dp->dp_meta_objset,
-			    ds->ds_phys->ds_next_clones_obj,
-			    dp->dp_scrub_queue_obj, tx) == 0);
-		} else {
-			struct enqueue_clones_arg eca;
-			eca.tx = tx;
-			eca.originobj = ds->ds_object;
-
-			(void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
-			    NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
-		}
-	}
-
-out:
-	dsl_dataset_rele(ds, FTAG);
-}
-
-/* ARGSUSED */
-static int
-enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
-{
-	dmu_tx_t *tx = arg;
-	dsl_dataset_t *ds;
-	int err;
-	dsl_pool_t *dp;
-
-	err = dsl_dataset_hold_obj(spa->spa_dsl_pool, dsobj, FTAG, &ds);
-	if (err)
-		return (err);
-
-	dp = ds->ds_dir->dd_pool;
-
-	while (ds->ds_phys->ds_prev_snap_obj != 0) {
-		dsl_dataset_t *prev;
-		err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
-		    FTAG, &prev);
-		if (err) {
-			dsl_dataset_rele(ds, FTAG);
-			return (err);
-		}
-
-		/*
-		 * If this is a clone, we don't need to worry about it for now.
-		 */
-		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
-			dsl_dataset_rele(ds, FTAG);
-			dsl_dataset_rele(prev, FTAG);
-			return (0);
-		}
-		dsl_dataset_rele(ds, FTAG);
-		ds = prev;
-	}
-
-	VERIFY(zap_add_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
-	    ds->ds_object, tx) == 0);
-	dsl_dataset_rele(ds, FTAG);
-	return (0);
-}
-
-/*
- * Scrub/dedup interaction.
- *
- * If there are N references to a deduped block, we don't want to scrub it
- * N times -- ideally, we should scrub it exactly once.
- *
- * We leverage the fact that the dde's replication class (enum ddt_class)
- * is ordered from highest replication class (DDT_CLASS_DITTO) to lowest
- * (DDT_CLASS_UNIQUE) so that we may walk the DDT in that order.
- *
- * To prevent excess scrubbing, the scrub begins by walking the DDT
- * to find all blocks with refcnt > 1, and scrubs each of these once.
- * Since there are two replication classes which contain blocks with
- * refcnt > 1, we scrub the highest replication class (DDT_CLASS_DITTO) first.
- * Finally the top-down scrub begins, only visiting blocks with refcnt == 1.
- *
- * There would be nothing more to say if a block's refcnt couldn't change
- * during a scrub, but of course it can so we must account for changes
- * in a block's replication class.
- *
- * Here's an example of what can occur:
- *
- * If a block has refcnt > 1 during the DDT scrub phase, but has refcnt == 1
- * when visited during the top-down scrub phase, it will be scrubbed twice.
- * This negates our scrub optimization, but is otherwise harmless.
- *
- * If a block has refcnt == 1 during the DDT scrub phase, but has refcnt > 1
- * on each visit during the top-down scrub phase, it will never be scrubbed.
- * To catch this, ddt_sync_entry() notifies the scrub code whenever a block's
- * reference class transitions to a higher level (i.e DDT_CLASS_UNIQUE to
- * DDT_CLASS_DUPLICATE); if it transitions from refcnt == 1 to refcnt > 1
- * while a scrub is in progress, it scrubs the block right then.
- */
-static void
-dsl_pool_scrub_ddt(dsl_pool_t *dp)
-{
-	ddt_bookmark_t *ddb = &dp->dp_scrub_ddt_bookmark;
-	ddt_entry_t dde;
-	int error;
-
-	while ((error = ddt_walk(dp->dp_spa, ddb, &dde)) == 0) {
-		if (ddb->ddb_class > dp->dp_scrub_ddt_class_max)
-			return;
-		dsl_pool_scrub_ddt_entry(dp, ddb->ddb_checksum, &dde);
-		if (scrub_pause(dp, NULL, ddb))
-			return;
-	}
-	ASSERT(error == ENOENT);
-	ASSERT(ddb->ddb_class > dp->dp_scrub_ddt_class_max);
-}
-
-void
-dsl_pool_scrub_ddt_entry(dsl_pool_t *dp, enum zio_checksum checksum,
-    const ddt_entry_t *dde)
-{
-	const ddt_key_t *ddk = &dde->dde_key;
-	const ddt_phys_t *ddp = dde->dde_phys;
-	blkptr_t blk;
-	zbookmark_t zb = { 0 };
-
-	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
-		if (ddp->ddp_phys_birth == 0)
-			continue;
-		ddt_bp_create(checksum, ddk, ddp, &blk);
-		scrub_funcs[dp->dp_scrub_func](dp, &blk, &zb);
-	}
-}
-
-void
-dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
-{
-	spa_t *spa = dp->dp_spa;
-	zap_cursor_t zc;
-	zap_attribute_t za;
-	boolean_t complete = B_TRUE;
-
-	if (dp->dp_scrub_func == SCRUB_FUNC_NONE)
-		return;
-
-	/*
-	 * If the pool is not loaded, or is trying to unload, leave it alone.
-	 */
-	if (spa_load_state(spa) != SPA_LOAD_NONE || spa_shutting_down(spa))
-		return;
-
-	if (dp->dp_scrub_restart) {
-		enum scrub_func func = dp->dp_scrub_func;
-		dp->dp_scrub_restart = B_FALSE;
-		dsl_pool_scrub_setup_sync(dp, &func, kcred, tx);
-	}
-
-	if (spa->spa_root_vdev->vdev_stat.vs_scrub_type == 0) {
-		/*
-		 * We must have resumed after rebooting; reset the vdev
-		 * stats to know that we're doing a scrub (although it
-		 * will think we're just starting now).
-		 */
-		vdev_scrub_stat_update(spa->spa_root_vdev,
-		    dp->dp_scrub_min_txg ? POOL_SCRUB_RESILVER :
-		    POOL_SCRUB_EVERYTHING, B_FALSE);
-	}
-
-	dp->dp_scrub_pausing = B_FALSE;
-	dp->dp_scrub_start_time = gethrtime();
-	dp->dp_scrub_isresilver = (dp->dp_scrub_min_txg != 0);
-	spa->spa_scrub_active = B_TRUE;
-
-	if (dp->dp_scrub_ddt_bookmark.ddb_class <= dp->dp_scrub_ddt_class_max) {
-		dsl_pool_scrub_ddt(dp);
-		if (dp->dp_scrub_pausing)
-			goto out;
-	}
-
-	if (dp->dp_scrub_bookmark.zb_objset == DMU_META_OBJSET) {
-		/* First do the MOS & ORIGIN */
-		scrub_visit_rootbp(dp, NULL, &dp->dp_meta_rootbp);
-		if (dp->dp_scrub_pausing)
-			goto out;
-
-		if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) {
-			VERIFY(0 == dmu_objset_find_spa(spa,
-			    NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
-		} else {
-			scrub_visitds(dp, dp->dp_origin_snap->ds_object, tx);
-		}
-		ASSERT(!dp->dp_scrub_pausing);
-	} else if (dp->dp_scrub_bookmark.zb_objset != ZB_DESTROYED_OBJSET) {
-		/*
-		 * If we were paused, continue from here.  Note if the ds
-		 * we were paused on was destroyed, the zb_objset will be
-		 * ZB_DESTROYED_OBJSET, so we will skip this and find a new
-		 * objset below.
-		 */
-		scrub_visitds(dp, dp->dp_scrub_bookmark.zb_objset, tx);
-		if (dp->dp_scrub_pausing)
-			goto out;
-	}
-
-	/*
-	 * In case we were paused right at the end of the ds, zero the
-	 * bookmark so we don't think that we're still trying to resume.
-	 */
-	bzero(&dp->dp_scrub_bookmark, sizeof (zbookmark_t));
-
-	/* keep pulling things out of the zap-object-as-queue */
-	while (zap_cursor_init(&zc, dp->dp_meta_objset, dp->dp_scrub_queue_obj),
-	    zap_cursor_retrieve(&zc, &za) == 0) {
-		VERIFY(0 == zap_remove(dp->dp_meta_objset,
-		    dp->dp_scrub_queue_obj, za.za_name, tx));
-		scrub_visitds(dp, za.za_first_integer, tx);
-		if (dp->dp_scrub_pausing)
-			break;
-		zap_cursor_fini(&zc);
-	}
-	zap_cursor_fini(&zc);
-	if (dp->dp_scrub_pausing)
-		goto out;
-
-	/* done. */
-
-	dsl_pool_scrub_cancel_sync(dp, &complete, kcred, tx);
-	return;
-out:
-	VERIFY(0 == zap_update(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_BOOKMARK, sizeof (uint64_t),
-	    sizeof (dp->dp_scrub_bookmark) / sizeof (uint64_t),
-	    &dp->dp_scrub_bookmark, tx));
-	VERIFY(0 == zap_update(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_DDT_BOOKMARK, sizeof (uint64_t),
-	    sizeof (dp->dp_scrub_ddt_bookmark) / sizeof (uint64_t),
-	    &dp->dp_scrub_ddt_bookmark, tx));
-	VERIFY(0 == zap_update(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_DDT_CLASS_MAX, sizeof (uint64_t), 1,
-	    &dp->dp_scrub_ddt_class_max, tx));
-	VERIFY(0 == zap_update(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1,
-	    &spa->spa_scrub_errors, tx));
-}
-
-void
-dsl_pool_scrub_restart(dsl_pool_t *dp)
-{
-	mutex_enter(&dp->dp_scrub_cancel_lock);
-	dp->dp_scrub_restart = B_TRUE;
-	mutex_exit(&dp->dp_scrub_cancel_lock);
-}
-
-/*
- * scrub consumers
- */
-
-static void
-count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
-{
-	int i;
-
-	/*
-	 * If we resume after a reboot, zab will be NULL; don't record
-	 * incomplete stats in that case.
-	 */
-	if (zab == NULL)
-		return;
-
-	for (i = 0; i < 4; i++) {
-		int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
-		int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
-		zfs_blkstat_t *zb = &zab->zab_type[l][t];
-		int equal;
-
-		zb->zb_count++;
-		zb->zb_asize += BP_GET_ASIZE(bp);
-		zb->zb_lsize += BP_GET_LSIZE(bp);
-		zb->zb_psize += BP_GET_PSIZE(bp);
-		zb->zb_gangs += BP_COUNT_GANG(bp);
-
-		switch (BP_GET_NDVAS(bp)) {
-		case 2:
-			if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
-			    DVA_GET_VDEV(&bp->blk_dva[1]))
-				zb->zb_ditto_2_of_2_samevdev++;
-			break;
-		case 3:
-			equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
-			    DVA_GET_VDEV(&bp->blk_dva[1])) +
-			    (DVA_GET_VDEV(&bp->blk_dva[0]) ==
-			    DVA_GET_VDEV(&bp->blk_dva[2])) +
-			    (DVA_GET_VDEV(&bp->blk_dva[1]) ==
-			    DVA_GET_VDEV(&bp->blk_dva[2]));
-			if (equal == 1)
-				zb->zb_ditto_2_of_3_samevdev++;
-			else if (equal == 3)
-				zb->zb_ditto_3_of_3_samevdev++;
-			break;
-		}
-	}
-}
-
-static void
-dsl_pool_scrub_clean_done(zio_t *zio)
-{
-	spa_t *spa = zio->io_spa;
-
-	zio_data_buf_free(zio->io_data, zio->io_size);
-
-	mutex_enter(&spa->spa_scrub_lock);
-	spa->spa_scrub_inflight--;
-	cv_broadcast(&spa->spa_scrub_io_cv);
-
-	if (zio->io_error && (zio->io_error != ECKSUM ||
-	    !(zio->io_flags & ZIO_FLAG_SPECULATIVE)))
-		spa->spa_scrub_errors++;
-	mutex_exit(&spa->spa_scrub_lock);
-}
-
-static int
-dsl_pool_scrub_clean_cb(dsl_pool_t *dp,
-    const blkptr_t *bp, const zbookmark_t *zb)
-{
-	size_t size = BP_GET_PSIZE(bp);
-	spa_t *spa = dp->dp_spa;
-	uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp);
-	boolean_t needs_io;
-	int zio_flags = ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
-	int zio_priority;
-
-	if (phys_birth <= dp->dp_scrub_min_txg ||
-	    phys_birth >= dp->dp_scrub_max_txg)
-		return (0);
-
-	count_block(dp->dp_blkstats, bp);
-
-	if (dp->dp_scrub_isresilver == 0) {
-		/* It's a scrub */
-		zio_flags |= ZIO_FLAG_SCRUB;
-		zio_priority = ZIO_PRIORITY_SCRUB;
-		needs_io = B_TRUE;
-	} else {
-		/* It's a resilver */
-		zio_flags |= ZIO_FLAG_RESILVER;
-		zio_priority = ZIO_PRIORITY_RESILVER;
-		needs_io = B_FALSE;
-	}
-
-	/* If it's an intent log block, failure is expected. */
-	if (zb->zb_level == ZB_ZIL_LEVEL)
-		zio_flags |= ZIO_FLAG_SPECULATIVE;
-
-	for (int d = 0; d < BP_GET_NDVAS(bp); d++) {
-		vdev_t *vd = vdev_lookup_top(spa,
-		    DVA_GET_VDEV(&bp->blk_dva[d]));
-
-		/*
-		 * Keep track of how much data we've examined so that
-		 * zpool(1M) status can make useful progress reports.
-		 */
-		mutex_enter(&vd->vdev_stat_lock);
-		vd->vdev_stat.vs_scrub_examined +=
-		    DVA_GET_ASIZE(&bp->blk_dva[d]);
-		mutex_exit(&vd->vdev_stat_lock);
-
-		/* if it's a resilver, this may not be in the target range */
-		if (!needs_io) {
-			if (DVA_GET_GANG(&bp->blk_dva[d])) {
-				/*
-				 * Gang members may be spread across multiple
-				 * vdevs, so the best estimate we have is the
-				 * scrub range, which has already been checked.
-				 * XXX -- it would be better to change our
-				 * allocation policy to ensure that all
-				 * gang members reside on the same vdev.
-				 */
-				needs_io = B_TRUE;
-			} else {
-				needs_io = vdev_dtl_contains(vd, DTL_PARTIAL,
-				    phys_birth, 1);
-			}
-		}
-	}
-
-	if (needs_io && !zfs_no_scrub_io) {
-		void *data = zio_data_buf_alloc(size);
-
-		mutex_enter(&spa->spa_scrub_lock);
-		while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight)
-			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
-		spa->spa_scrub_inflight++;
-		mutex_exit(&spa->spa_scrub_lock);
-
-		zio_nowait(zio_read(NULL, spa, bp, data, size,
-		    dsl_pool_scrub_clean_done, NULL, zio_priority,
-		    zio_flags, zb));
-	}
-
-	/* do not relocate this block */
-	return (0);
-}
-
-int
-dsl_pool_scrub_clean(dsl_pool_t *dp)
-{
-	spa_t *spa = dp->dp_spa;
-
-	/*
-	 * Purge all vdev caches and probe all devices.  We do this here
-	 * rather than in sync context because this requires a writer lock
-	 * on the spa_config lock, which we can't do from sync context.  The
-	 * spa_scrub_reopen flag indicates that vdev_open() should not
-	 * attempt to start another scrub.
-	 */
-	spa_vdev_state_enter(spa, SCL_NONE);
-	spa->spa_scrub_reopen = B_TRUE;
-	vdev_reopen(spa->spa_root_vdev);
-	spa->spa_scrub_reopen = B_FALSE;
-	(void) spa_vdev_state_exit(spa, NULL, 0);
-
-	return (dsl_pool_scrub_setup(dp, SCRUB_FUNC_CLEAN));
-}

--- a/usr/src/uts/common/fs/zfs/dsl_synctask.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_synctask.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -29,7 +28,6 @@
 #include <sys/dsl_dir.h>
 #include <sys/dsl_synctask.h>
 #include <sys/metaslab.h>
-#include <sys/cred.h>
 
 #define	DST_AVG_BLKSHIFT 14
 
@@ -49,7 +47,6 @@
 	list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
 	    offsetof(dsl_sync_task_t, dst_node));
 	dstg->dstg_pool = dp;
-	dstg->dstg_cr = CRED();
 
 	return (dstg);
 }
@@ -136,7 +133,6 @@
 	uint64_t txg;
 
 	dstg->dstg_nowaiter = B_TRUE;
-	dstg->dstg_cr = NULL; /* it won't be valid by the time we sync */
 	txg = dmu_tx_get_txg(tx);
 	/*
 	 * We don't generally have many sync tasks, so pay the price of
@@ -200,8 +196,7 @@
 		 */
 		for (dst = list_head(&dstg->dstg_tasks); dst;
 		    dst = list_next(&dstg->dstg_tasks, dst)) {
-			dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2,
-			    dstg->dstg_cr, tx);
+			dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
 		}
 	}
 	rw_exit(&dp->dp_config_rwlock);

--- a/usr/src/uts/common/fs/zfs/refcount.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/refcount.c	Mon May 03 14:54:08 2010 -0700
@@ -19,12 +19,9 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/zfs_context.h>
 #include <sys/refcount.h>

--- a/usr/src/uts/common/fs/zfs/spa.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/spa.c	Mon May 03 14:54:08 2010 -0700
@@ -59,6 +59,7 @@
 #include <sys/systeminfo.h>
 #include <sys/spa_boot.h>
 #include <sys/zfs_ioctl.h>
+#include <sys/dsl_scan.h>
 
 #ifdef	_KERNEL
 #include <sys/bootprops.h>
@@ -110,7 +111,7 @@
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
 };
 
-static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx);
+static dsl_syncfunc_t spa_sync_props;
 static boolean_t spa_has_active_shared_spare(spa_t *spa);
 static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
@@ -1105,7 +1106,7 @@
 	    KM_SLEEP);
 	for (i = 0; i < spa->spa_spares.sav_count; i++)
 		spares[i] = vdev_config_generate(spa,
-		    spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE);
+		    spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE);
 	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
 	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
 	for (i = 0; i < spa->spa_spares.sav_count; i++)
@@ -1231,7 +1232,7 @@
 	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
 	for (i = 0; i < sav->sav_count; i++)
 		l2cache[i] = vdev_config_generate(spa,
-		    sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE);
+		    sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
 	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
 	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
 out:
@@ -1429,7 +1430,7 @@
 /*ARGSUSED*/
 static int
 spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
-    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+    arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	if (bp != NULL) {
 		zio_t *rio = arg;
@@ -1780,6 +1781,7 @@
 	spa->spa_first_txg = spa->spa_last_ubsync_txg ?
 	    spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
 	spa->spa_claim_max_txg = spa->spa_first_txg;
+	spa->spa_prev_software_version = ub->ub_software_version;
 
 	error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
 	if (error)
@@ -1851,6 +1853,11 @@
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
+	error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION,
+	    &spa->spa_creation_version);
+	if (error != 0 && error != ENOENT)
+		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
 	/*
 	 * Load the persistent error log.  If we have an older pool, this will
 	 * not be present.
@@ -2076,7 +2083,8 @@
 		/*
 		 * Check all DTLs to see if anything needs resilvering.
 		 */
-		if (vdev_resilver_needed(rvd, NULL, NULL))
+		if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
+		    vdev_resilver_needed(rvd, NULL, NULL))
 			spa_async_request(spa, SPA_ASYNC_RESILVER);
 
 		/*
@@ -2375,7 +2383,7 @@
 			if (spa_spare_exists(guid, &pool, NULL) &&
 			    pool != 0ULL) {
 				VERIFY(nvlist_lookup_uint64_array(
-				    spares[i], ZPOOL_CONFIG_STATS,
+				    spares[i], ZPOOL_CONFIG_VDEV_STATS,
 				    (uint64_t **)&vs, &vsc) == 0);
 				vs->vs_state = VDEV_STATE_CANT_OPEN;
 				vs->vs_aux = VDEV_AUX_SPARED;
@@ -2432,7 +2440,8 @@
 			ASSERT(vd != NULL);
 
 			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
-			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
+			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
+			    == 0);
 			vdev_get_stats(vd, vs);
 		}
 	}
@@ -2819,6 +2828,12 @@
 		cmn_err(CE_PANIC, "failed to add pool config");
 	}
 
+	if (zap_add(spa->spa_meta_objset,
+	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
+	    sizeof (uint64_t), 1, &version, tx) != 0) {
+		cmn_err(CE_PANIC, "failed to add pool version");
+	}
+
 	/* Newly created pools with the right version are always deflated. */
 	if (version >= SPA_VERSION_RAIDZ_DEFLATE) {
 		spa->spa_deflate = TRUE;
@@ -2861,7 +2876,7 @@
 
 	if (props != NULL) {
 		spa_configfile_set(spa, props, B_FALSE);
-		spa_sync_props(spa, props, CRED(), tx);
+		spa_sync_props(spa, props, tx);
 	}
 
 	dmu_tx_commit(tx);
@@ -3219,8 +3234,6 @@
 		return (error);
 	}
 
-	spa_async_resume(spa);
-
 	/*
 	 * Override any spares and level 2 cache devices as specified by
 	 * the user, as these may have correct device names/devids, etc.
@@ -3271,6 +3284,8 @@
 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
 	}
 
+	spa_async_resume(spa);
+
 	/*
 	 * It's possible that the pool was expanded while it was exported.
 	 * We kick off an async task to handle this for us.
@@ -3455,7 +3470,8 @@
 		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 			spa->spa_state = new_state;
-			spa->spa_final_txg = spa_last_synced_txg(spa) + 1;
+			spa->spa_final_txg = spa_last_synced_txg(spa) +
+			    TXG_DEFER_SIZE + 1;
 			vdev_config_dirty(spa->spa_root_vdev);
 			spa_config_exit(spa, SCL_ALL, FTAG);
 		}
@@ -3635,7 +3651,7 @@
 int
 spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
 {
-	uint64_t txg, open_txg;
+	uint64_t txg, dtl_max_txg;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
 	vdev_ops_t *pvops;
@@ -3771,13 +3787,14 @@
 	vdev_config_dirty(tvd);
 
 	/*
-	 * Set newvd's DTL to [TXG_INITIAL, open_txg].  It will propagate
-	 * upward when spa_vdev_exit() calls vdev_dtl_reassess().
+	 * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account
+	 * for any dmu_sync-ed blocks.  It will propagate upward when
+	 * spa_vdev_exit() calls vdev_dtl_reassess().
 	 */
-	open_txg = txg + TXG_CONCURRENT_STATES - 1;
-
-	vdev_dtl_dirty(newvd, DTL_MISSING,
-	    TXG_INITIAL, open_txg - TXG_INITIAL + 1);
+	dtl_max_txg = txg + TXG_CONCURRENT_STATES;
+
+	vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL,
+	    dtl_max_txg - TXG_INITIAL);
 
 	if (newvd->vdev_isspare) {
 		spa_spare_activate(newvd);
@@ -3793,10 +3810,18 @@
 	 */
 	vdev_dirty(tvd, VDD_DTL, newvd, txg);
 
-	(void) spa_vdev_exit(spa, newrootvd, open_txg, 0);
-
-	spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL,
-	    CRED(),  "%s vdev=%s %s vdev=%s",
+	/*
+	 * Restart the resilver
+	 */
+	dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
+
+	/*
+	 * Commit the config
+	 */
+	(void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
+
+	spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL,
+	    "%s vdev=%s %s vdev=%s",
 	    replacing && newvd_isspare ? "spare in" :
 	    replacing ? "replace" : "attach", newvdpath,
 	    replacing ? "for" : "to", oldvdpath);
@@ -3804,11 +3829,6 @@
 	spa_strfree(oldvdpath);
 	spa_strfree(newvdpath);
 
-	/*
-	 * Kick off a resilver to update newvd.
-	 */
-	VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0);
-
 	return (0);
 }
 
@@ -4004,7 +4024,7 @@
 
 	error = spa_vdev_exit(spa, vd, txg, 0);
 
-	spa_history_internal_log(LOG_POOL_VDEV_DETACH, spa, NULL, CRED(),
+	spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
 	    "vdev=%s", vdpath);
 	spa_strfree(vdpath);
 
@@ -4024,7 +4044,8 @@
 				continue;
 			spa_open_ref(spa, FTAG);
 			mutex_exit(&spa_namespace_lock);
-			(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
+			(void) spa_vdev_remove(spa, unspare_guid,
+			    B_TRUE);
 			mutex_enter(&spa_namespace_lock);
 			spa_close(spa, FTAG);
 		}
@@ -4266,8 +4287,8 @@
 		if (vml[c] != NULL) {
 			vdev_split(vml[c]);
 			if (error == 0)
-				spa_history_internal_log(LOG_POOL_VDEV_DETACH,
-				    spa, tx, CRED(), "vdev=%s",
+				spa_history_log_internal(LOG_POOL_VDEV_DETACH,
+				    spa, tx, "vdev=%s",
 				    vml[c]->vdev_path);
 			vdev_free(vml[c]);
 		}
@@ -4283,7 +4304,7 @@
 		zio_handle_panic_injection(spa, FTAG, 3);
 
 	/* split is complete; log a history record */
-	spa_history_internal_log(LOG_POOL_SPLIT, newspa, NULL, CRED(),
+	spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL,
 	    "split new pool %s from pool %s", newname, spa_name(spa));
 
 	kmem_free(vml, children * sizeof (vdev_t *));
@@ -4359,21 +4380,13 @@
 }
 
 /*
- * Removing a device from the vdev namespace requires several steps
- * and can take a significant amount of time.  As a result we use
- * the spa_vdev_config_[enter/exit] functions which allow us to
- * grab and release the spa_config_lock while still holding the namespace
- * lock.  During each step the configuration is synced out.
- */
-
-/*
  * Evacuate the device.
  */
-int
+static int
 spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
 {
+	uint64_t txg;
 	int error = 0;
-	uint64_t txg;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
@@ -4386,14 +4399,12 @@
 	 * should no longer have any blocks allocated on it.
 	 */
 	if (vd->vdev_islog) {
-		error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
-		    NULL, DS_FIND_CHILDREN);
+		if (vd->vdev_stat.vs_alloc != 0)
+			error = spa_offline_log(spa);
 	} else {
-		error = ENOTSUP;	/* until we have bp rewrite */
+		error = ENOTSUP;
 	}
 
-	txg_wait_synced(spa_get_dsl(spa), 0);
-
 	if (error)
 		return (error);
 
@@ -4401,6 +4412,7 @@
 	 * The evacuation succeeded.  Remove any remaining MOS metadata
 	 * associated with this vdev, and wait for these changes to sync.
 	 */
+	ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0);
 	txg = spa_vdev_config_enter(spa);
 	vd->vdev_removing = B_TRUE;
 	vdev_dirty(vd, 0, NULL, txg);
@@ -4413,7 +4425,7 @@
 /*
  * Complete the removal by cleaning up the namespace.
  */
-void
+static void
 spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
@@ -4424,6 +4436,12 @@
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 	ASSERT(vd == vd->vdev_top);
 
+	/*
+	 * Only remove any devices which are empty.
+	 */
+	if (vd->vdev_stat.vs_alloc != 0)
+		return;
+
 	(void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
 
 	if (list_link_active(&vd->vdev_state_dirty_node))
@@ -4439,15 +4457,19 @@
 		vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
 		vdev_add_child(rvd, vd);
 	}
-	vdev_config_dirty(rvd);
-
-	/*
-	 * Reassess the health of our root vdev.
-	 */
-	vdev_reopen(rvd);
 }
 
 /*
+ * Remove a device from the pool -
+ *
+ * Removing a device from the vdev namespace requires several steps
+ * and can take a significant amount of time.  As a result we use
+ * the spa_vdev_config_[enter/exit] functions which allow us to
+ * grab and release the spa_config_lock while still holding the namespace
+ * lock.  During each step the configuration is synced out.
+ */
+
+/*
  * Remove a device from the pool.  Currently, this supports removing only hot
  * spares, slogs, and level 2 ARC devices.
  */
@@ -4690,40 +4712,38 @@
 
 /*
  * ==========================================================================
- * SPA Scrubbing
+ * SPA Scanning
  * ==========================================================================
  */
 
 int
-spa_scrub(spa_t *spa, pool_scrub_type_t type)
+spa_scan_stop(spa_t *spa)
 {
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
-
-	if ((uint_t)type >= POOL_SCRUB_TYPES)
+	if (dsl_scan_resilvering(spa->spa_dsl_pool))
+		return (EBUSY);
+	return (dsl_scan_cancel(spa->spa_dsl_pool));
+}
+
+int
+spa_scan(spa_t *spa, pool_scan_func_t func)
+{
+	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+
+	if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE)
 		return (ENOTSUP);
 
 	/*
 	 * If a resilver was requested, but there is no DTL on a
 	 * writeable leaf device, we have nothing to do.
 	 */
-	if (type == POOL_SCRUB_RESILVER &&
+	if (func == POOL_SCAN_RESILVER &&
 	    !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
 		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
 		return (0);
 	}
 
-	if (type == POOL_SCRUB_EVERYTHING &&
-	    spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE &&
-	    spa->spa_dsl_pool->dp_scrub_isresilver)
-		return (EBUSY);
-
-	if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) {
-		return (dsl_pool_scrub_clean(spa->spa_dsl_pool));
-	} else if (type == POOL_SCRUB_NONE) {
-		return (dsl_pool_scrub_cancel(spa->spa_dsl_pool));
-	} else {
-		return (EINVAL);
-	}
+	return (dsl_scan(spa->spa_dsl_pool, func));
 }
 
 /*
@@ -4829,8 +4849,8 @@
 		 * then log an internal history event.
 		 */
 		if (new_space != old_space) {
-			spa_history_internal_log(LOG_POOL_VDEV_ONLINE,
-			    spa, NULL, CRED(),
+			spa_history_log_internal(LOG_POOL_VDEV_ONLINE,
+			    spa, NULL,
 			    "pool '%s' size: %llu(+%llu)",
 			    spa_name(spa), new_space, new_space - old_space);
 		}
@@ -4874,7 +4894,7 @@
 	 * Kick off a resilver.
 	 */
 	if (tasks & SPA_ASYNC_RESILVER)
-		VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0);
+		dsl_resilver_restart(spa->spa_dsl_pool, 0);
 
 	/*
 	 * Let the world know that we're done.
@@ -4920,6 +4940,7 @@
 void
 spa_async_request(spa_t *spa, int task)
 {
+	zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task);
 	mutex_enter(&spa->spa_async_lock);
 	spa->spa_async_tasks |= task;
 	mutex_exit(&spa->spa_async_lock);
@@ -5024,7 +5045,7 @@
 		list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
 		for (i = 0; i < sav->sav_count; i++)
 			list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
-			    B_FALSE, B_FALSE, B_TRUE);
+			    B_FALSE, VDEV_CONFIG_L2CACHE);
 		VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
 		    sav->sav_count) == 0);
 		for (i = 0; i < sav->sav_count; i++)
@@ -5064,7 +5085,7 @@
  * Set zpool properties.
  */
 static void
-spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t *spa = arg1;
 	objset_t *mos = spa->spa_meta_objset;
@@ -5176,8 +5197,8 @@
 		/* log internal history if this is not a zpool create */
 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
 		    tx->tx_txg != TXG_INITIAL) {
-			spa_history_internal_log(LOG_POOL_PROPSET,
-			    spa, tx, cr, "%s %lld %s",
+			spa_history_log_internal(LOG_POOL_PROPSET,
+			    spa, tx, "%s %lld %s",
 			    nvpair_name(elem), intval, spa_name(spa));
 		}
 	}
@@ -5272,13 +5293,17 @@
 	}
 
 	/*
-	 * If anything has changed in this txg, push the deferred frees
-	 * from the previous txg.  If not, leave them alone so that we
-	 * don't generate work on an otherwise idle system.
+	 * If anything has changed in this txg, or if someone is waiting
+	 * for this txg to sync (eg, spa_vdev_remove()), push the
+	 * deferred frees from the previous txg.  If not, leave them
+	 * alone so that we don't generate work on an otherwise idle
+	 * system.
 	 */
 	if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
 	    !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
-	    !txg_list_empty(&dp->dp_sync_tasks, txg))
+	    !txg_list_empty(&dp->dp_sync_tasks, txg) ||
+	    ((dp->dp_scan->scn_phys.scn_state == DSS_SCANNING ||
+	    txg_sync_waiting(dp)) && !spa_shutting_down(spa)))
 		spa_sync_deferred_bplist(spa, defer_bpl, tx, txg);
 
 	/*
@@ -5304,11 +5329,7 @@
 		}
 
 		ddt_sync(spa, txg);
-
-		mutex_enter(&spa->spa_scrub_lock);
-		while (spa->spa_scrub_inflight > 0)
-			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
-		mutex_exit(&spa->spa_scrub_lock);
+		dsl_scan_sync(dp, tx);
 
 		while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
 			vdev_sync(vd, txg);

--- a/usr/src/uts/common/fs/zfs/spa_config.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/spa_config.c	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/spa.h>
@@ -419,7 +418,7 @@
 		    split_guid) == 0);
 	}
 
-	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE);
+	nvroot = vdev_config_generate(spa, vd, getstats, 0);
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
 	nvlist_free(nvroot);

--- a/usr/src/uts/common/fs/zfs/spa_errlog.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/spa_errlog.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -54,36 +53,6 @@
 #include <sys/zap.h>
 #include <sys/zio.h>
 
-/*
- * This is a stripped-down version of strtoull, suitable only for converting
- * lowercase hexidecimal numbers that don't overflow.
- */
-uint64_t
-strtonum(const char *str, char **nptr)
-{
-	uint64_t val = 0;
-	char c;
-	int digit;
-
-	while ((c = *str) != '\0') {
-		if (c >= '0' && c <= '9')
-			digit = c - '0';
-		else if (c >= 'a' && c <= 'f')
-			digit = 10 + c - 'a';
-		else
-			break;
-
-		val *= 16;
-		val += digit;
-
-		str++;
-	}
-
-	if (nptr)
-		*nptr = (char *)str;
-
-	return (val);
-}
 
 /*
  * Convert a bookmark to a string.

--- a/usr/src/uts/common/fs/zfs/spa_history.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/spa_history.c	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/spa.h>
@@ -33,6 +32,7 @@
 #include <sys/utsname.h>
 #include <sys/cmn_err.h>
 #include <sys/sunddi.h>
+#include "zfs_comutil.h"
 #ifdef _KERNEL
 #include <sys/zone.h>
 #endif
@@ -189,7 +189,7 @@
  */
 /*ARGSUSED*/
 static void
-spa_history_log_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t		*spa = arg1;
 	history_arg_t	*hap = arg2;
@@ -244,6 +244,8 @@
 	    hap->ha_log_type == LOG_CMD_NORMAL) {
 		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD,
 		    history_str) == 0);
+
+		zfs_dbgmsg("command: %s", history_str);
 	} else {
 		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT,
 		    hap->ha_event) == 0);
@@ -251,6 +253,11 @@
 		    tx->tx_txg) == 0);
 		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR,
 		    history_str) == 0);
+
+		zfs_dbgmsg("internal %s pool:%s txg:%llu %s",
+		    zfs_history_event_names[hap->ha_event], spa_name(spa),
+		    (longlong_t)tx->tx_txg, history_str);
+
 	}
 
 	VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0);
@@ -418,7 +425,7 @@
 
 static void
 log_internal(history_internal_events_t event, spa_t *spa,
-    dmu_tx_t *tx, cred_t *cr, const char *fmt, va_list adx)
+    dmu_tx_t *tx, const char *fmt, va_list adx)
 {
 	history_arg_t *ha;
 
@@ -441,7 +448,7 @@
 	ha->ha_uid = 0;
 
 	if (dmu_tx_is_syncing(tx)) {
-		spa_history_log_sync(spa, ha, cr, tx);
+		spa_history_log_sync(spa, ha, tx);
 	} else {
 		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
 		    spa_history_log_sync, spa, ha, 0, tx);
@@ -450,8 +457,8 @@
 }
 
 void
-spa_history_internal_log(history_internal_events_t event, spa_t *spa,
-    dmu_tx_t *tx, cred_t *cr, const char *fmt, ...)
+spa_history_log_internal(history_internal_events_t event, spa_t *spa,
+    dmu_tx_t *tx, const char *fmt, ...)
 {
 	dmu_tx_t *htx = tx;
 	va_list adx;
@@ -466,7 +473,7 @@
 	}
 
 	va_start(adx, fmt);
-	log_internal(event, spa, htx, cr, fmt, adx);
+	log_internal(event, spa, htx, fmt, adx);
 	va_end(adx);
 
 	/* if we didn't get a tx from the caller, commit the one we made */
@@ -481,7 +488,7 @@
 	uint64_t current_vers = spa_version(spa);
 
 	if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) {
-		spa_history_internal_log(event, spa, NULL, CRED(),
+		spa_history_log_internal(event, spa, NULL,
 		    "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s",
 		    (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
 		    utsname.nodename, utsname.release, utsname.version,

--- a/usr/src/uts/common/fs/zfs/spa_misc.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c	Mon May 03 14:54:08 2010 -0700
@@ -40,6 +40,7 @@
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
+#include <sys/dsl_scan.h>
 #include <sys/fs/zfs.h>
 #include <sys/metaslab_impl.h>
 #include <sys/arc.h>
@@ -888,10 +889,10 @@
 	vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
 
 	/*
-	 * If the config changed, notify the scrub thread that it must restart.
+	 * If the config changed, notify the scrub that it must restart.
+	 * This will initiate a resilver if needed.
 	 */
 	if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) {
-		dsl_pool_scrub_restart(spa->spa_dsl_pool);
 		config_changed = B_TRUE;
 		spa->spa_config_generation++;
 	}
@@ -1078,7 +1079,6 @@
 	return (0);
 }
 
-
 /*
  * Determine whether a pool with given pool_guid exists.  If device_guid is
  * non-zero, determine whether the pool exists *and* contains a device with the
@@ -1209,6 +1209,37 @@
 }
 
 /*
+ * This is a stripped-down version of strtoull, suitable only for converting
+ * lowercase hexidecimal numbers that don't overflow.
+ */
+uint64_t
+strtonum(const char *str, char **nptr)
+{
+	uint64_t val = 0;
+	char c;
+	int digit;
+
+	while ((c = *str) != '\0') {
+		if (c >= '0' && c <= '9')
+			digit = c - '0';
+		else if (c >= 'a' && c <= 'f')
+			digit = 10 + c - 'a';
+		else
+			break;
+
+		val *= 16;
+		val += digit;
+
+		str++;
+	}
+
+	if (nptr)
+		*nptr = (char *)str;
+
+	return (val);
+}
+
+/*
  * ==========================================================================
  * Accessor functions
  * ==========================================================================
@@ -1390,6 +1421,12 @@
 	return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
 }
 
+int
+spa_prev_software_version(spa_t *spa)
+{
+	return (spa->spa_prev_software_version);
+}
+
 uint64_t
 dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
 {
@@ -1584,3 +1621,45 @@
 {
 	return (spa->spa_dedup_checksum);
 }
+
+/*
+ * Reset pool scan stat per scan pass (or reboot).
+ */
+void
+spa_scan_stat_init(spa_t *spa)
+{
+	/* data not stored on disk */
+	spa->spa_scan_pass_start = gethrestime_sec();
+	spa->spa_scan_pass_exam = 0;
+	vdev_scan_stat_init(spa->spa_root_vdev);
+}
+
+/*
+ * Get scan stats for zpool status reports
+ */
+int
+spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
+{
+	dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL;
+
+	if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE)
+		return (ENOENT);
+	bzero(ps, sizeof (pool_scan_stat_t));
+
+	/* data stored on disk */
+	ps->pss_func = scn->scn_phys.scn_func;
+	ps->pss_start_time = scn->scn_phys.scn_start_time;
+	ps->pss_end_time = scn->scn_phys.scn_end_time;
+	ps->pss_to_examine = scn->scn_phys.scn_to_examine;
+	ps->pss_examined = scn->scn_phys.scn_examined;
+	ps->pss_to_process = scn->scn_phys.scn_to_process;
+	ps->pss_processed = scn->scn_phys.scn_processed;
+	ps->pss_errors = scn->scn_phys.scn_errors;
+	ps->pss_state = scn->scn_phys.scn_state;
+
+	/* data not stored on disk */
+	ps->pss_pass_start = spa->spa_scan_pass_start;
+	ps->pss_pass_exam = spa->spa_scan_pass_exam;
+
+	return (0);
+}

--- a/usr/src/uts/common/fs/zfs/sys/arc.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_ARC_H
@@ -48,7 +47,8 @@
 struct arc_buf {
 	arc_buf_hdr_t		*b_hdr;
 	arc_buf_t		*b_next;
-	krwlock_t		b_lock;
+	kmutex_t		b_evict_lock;
+	krwlock_t		b_data_lock;
 	void			*b_data;
 	arc_evict_func_t	*b_efunc;
 	void			*b_private;
@@ -92,6 +92,8 @@
 int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
 int arc_buf_size(arc_buf_t *buf);
 void arc_release(arc_buf_t *buf, void *tag);
+int arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
+    zbookmark_t *zb);
 int arc_released(arc_buf_t *buf);
 int arc_has_callback(arc_buf_t *buf);
 void arc_buf_freeze(arc_buf_t *buf);

--- a/usr/src/uts/common/fs/zfs/sys/dbuf.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_DBUF_H
@@ -278,6 +277,7 @@
 void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
 void dbuf_unoverride(dbuf_dirty_record_t *dr);
 void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
+void dbuf_release_bp(dmu_buf_impl_t *db);
 
 void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
     struct dmu_tx *);

--- a/usr/src/uts/common/fs/zfs/sys/ddt.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/ddt.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_DDT_H
@@ -232,6 +231,8 @@
 extern void ddt_unload(spa_t *spa);
 extern void ddt_sync(spa_t *spa, uint64_t txg);
 extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
+extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
+    enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx);
 
 extern const ddt_ops_t ddt_zap_ops;

--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Mon May 03 14:54:08 2010 -0700
@@ -118,7 +118,7 @@
 	DMU_OT_FUID,			/* FUID table (Packed NVLIST UINT8) */
 	DMU_OT_FUID_SIZE,		/* FUID table size UINT64 */
 	DMU_OT_NEXT_CLONES,		/* ZAP */
-	DMU_OT_SCRUB_QUEUE,		/* ZAP */
+	DMU_OT_SCAN_QUEUE,		/* ZAP */
 	DMU_OT_USERGROUP_USED,		/* ZAP */
 	DMU_OT_USERGROUP_QUOTA,		/* ZAP */
 	DMU_OT_USERREFS,		/* ZAP */
@@ -128,6 +128,8 @@
 	DMU_OT_SA_MASTER_NODE,		/* ZAP */
 	DMU_OT_SA_ATTR_REGISTRATION,	/* ZAP */
 	DMU_OT_SA_ATTR_LAYOUTS,		/* ZAP */
+	DMU_OT_SCAN_XLATE,		/* ZAP */
+	DMU_OT_DEDUP,			/* fake dedup BP from ddt_bp_create() */
 	DMU_OT_NUMTYPES
 } dmu_object_type_t;
 
@@ -220,23 +222,8 @@
 #define	DMU_POOL_TMP_USERREFS		"tmp_userrefs"
 #define	DMU_POOL_DDT			"DDT-%s-%s-%s"
 #define	DMU_POOL_DDT_STATS		"DDT-statistics"
-
-/* 4x8 zbookmark_t */
-#define	DMU_POOL_SCRUB_BOOKMARK		"scrub_bookmark"
-/* 4x8 ddt_bookmark_t */
-#define	DMU_POOL_SCRUB_DDT_BOOKMARK	"scrub_ddt_bookmark"
-/* 1x8 max_class */
-#define	DMU_POOL_SCRUB_DDT_CLASS_MAX	"scrub_ddt_class_max"
-/* 1x8 zap obj DMU_OT_SCRUB_QUEUE */
-#define	DMU_POOL_SCRUB_QUEUE		"scrub_queue"
-/* 1x8 txg */
-#define	DMU_POOL_SCRUB_MIN_TXG		"scrub_min_txg"
-/* 1x8 txg */
-#define	DMU_POOL_SCRUB_MAX_TXG		"scrub_max_txg"
-/* 1x4 enum scrub_func */
-#define	DMU_POOL_SCRUB_FUNC		"scrub_func"
-/* 1x8 count */
-#define	DMU_POOL_SCRUB_ERRORS		"scrub_errors"
+#define	DMU_POOL_CREATION_VERSION	"creation_version"
+#define	DMU_POOL_SCAN			"scan"
 
 /*
  * Allocate an object from this objset.  The range of object numbers

--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Mon May 03 14:54:08 2010 -0700
@@ -46,6 +46,9 @@
 #define	OBJSET_PHYS_SIZE 2048
 #define	OBJSET_OLD_PHYS_SIZE 1024
 
+#define	OBJSET_BUF_HAS_USERUSED(buf) \
+	(arc_buf_size(buf) > OBJSET_OLD_PHYS_SIZE)
+
 #define	OBJSET_FLAG_USERACCOUNTING_COMPLETE	(1ULL<<0)
 
 typedef struct objset_phys {

--- a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_DMU_TRAVERSE_H
@@ -37,9 +36,11 @@
 struct dnode_phys;
 struct dsl_dataset;
 struct zilog;
+struct arc_buf;
 
 typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
-    const zbookmark_t *zb, const struct dnode_phys *dnp, void *arg);
+    struct arc_buf *pbuf, const zbookmark_t *zb, const struct dnode_phys *dnp,
+    void *arg);
 
 #define	TRAVERSE_PRE			(1<<0)
 #define	TRAVERSE_POST			(1<<1)

--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_DATASET_H
@@ -113,7 +112,6 @@
 
 	/* only used in syncing context, only valid for non-snapshots: */
 	struct dsl_dataset *ds_prev;
-	uint64_t ds_origin_txg;
 
 	/* has internal locking: */
 	bplist_t ds_deadlist;
@@ -235,8 +233,7 @@
     uint64_t *ref_rsrv);
 int dsl_dataset_set_quota(const char *dsname, zprop_source_t source,
     uint64_t quota);
-void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
-    dmu_tx_t *tx);
+dsl_syncfunc_t dsl_dataset_set_quota_sync;
 int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
     uint64_t reservation);

--- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_DIR_H
@@ -90,6 +89,7 @@
 	kmutex_t dd_lock;
 	list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
 	timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */
+	uint64_t dd_origin_txg;
 
 	/* gross estimate of space used by in-flight tx's */
 	uint64_t dd_tempreserved[TXG_SIZE];
@@ -142,6 +142,7 @@
 /* internal reserved dir name */
 #define	MOS_DIR_NAME "$MOS"
 #define	ORIGIN_DIR_NAME "$ORIGIN"
+#define	XLATION_DIR_NAME "$XLATION"
 
 #ifdef ZFS_DEBUG
 #define	dprintf_dd(dd, fmt, ...) do { \

--- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h	Mon May 03 14:54:08 2010 -0700
@@ -32,6 +32,7 @@
 #include <sys/zio.h>
 #include <sys/dnode.h>
 #include <sys/ddt.h>
+#include <sys/arc.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -42,12 +43,7 @@
 struct dsl_dataset;
 struct dsl_pool;
 struct dmu_tx;
-
-enum scrub_func {
-	SCRUB_FUNC_NONE,
-	SCRUB_FUNC_CLEAN,
-	SCRUB_FUNC_NUMFUNCS
-};
+struct dsl_scan;
 
 /* These macros are for indexing into the zfs_all_blkstats_t. */
 #define	DMU_OT_DEFERRED	DMU_OT_NONE
@@ -87,25 +83,13 @@
 	uint64_t dp_write_limit;
 	uint64_t dp_tmp_userrefs_obj;
 
+	struct dsl_scan *dp_scan;
+
 	/* Uses dp_lock */
 	kmutex_t dp_lock;
 	uint64_t dp_space_towrite[TXG_SIZE];
 	uint64_t dp_tempreserved[TXG_SIZE];
 
-	enum scrub_func dp_scrub_func;
-	uint64_t dp_scrub_queue_obj;
-	uint64_t dp_scrub_min_txg;
-	uint64_t dp_scrub_max_txg;
-	uint64_t dp_scrub_start_time;
-	uint64_t dp_scrub_ddt_class_max;
-	zbookmark_t dp_scrub_bookmark;
-	ddt_bookmark_t dp_scrub_ddt_bookmark;
-	boolean_t dp_scrub_pausing;
-	boolean_t dp_scrub_isresilver;
-	boolean_t dp_scrub_restart;
-	kmutex_t dp_scrub_cancel_lock; /* protects dp_scrub_restart */
-	zio_t *dp_scrub_prefetch_zio_root;
-
 	/* Has its own locking */
 	tx_state_t dp_tx;
 	txg_list_t dp_dirty_datasets;
@@ -138,20 +122,15 @@
 void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
 void dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg,
     const blkptr_t *bpp);
-void dsl_pool_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
-void dsl_pool_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
-void dsl_pool_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
-    struct dmu_tx *tx);
+int dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf,
+    arc_done_func_t *done, void *private, int priority, int zio_flags,
+    uint32_t *arc_flags, const zbookmark_t *zb);
+int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
+    arc_done_func_t *done, void *private, int priority, int zio_flags,
+    uint32_t *arc_flags, const zbookmark_t *zb);
 void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
 void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
 
-int dsl_pool_scrub_cancel(dsl_pool_t *dp);
-int dsl_pool_scrub_clean(dsl_pool_t *dp);
-void dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx);
-void dsl_pool_scrub_restart(dsl_pool_t *dp);
-void dsl_pool_scrub_ddt_entry(dsl_pool_t *dp, enum zio_checksum checksum,
-    const ddt_entry_t *dde);
-
 taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
 
 extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
@@ -159,6 +138,7 @@
 extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
     const char *tag, dmu_tx_t *tx);
 extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
+int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
 
 #ifdef	__cplusplus
 }

--- a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_PROP_H
@@ -91,7 +90,7 @@
     zprop_source_t source, int intsz, int numints, const void *buf);
 int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
 void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
-    cred_t *cr, dmu_tx_t *tx);
+    dmu_tx_t *tx);
 
 void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
     zprop_source_t source, uint64_t *value);

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h	Mon May 03 14:54:08 2010 -0700
@@ -0,0 +1,107 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	_SYS_DSL_SCAN_H
+#define	_SYS_DSL_SCAN_H
+
+#include <sys/zfs_context.h>
+#include <sys/zio.h>
+#include <sys/ddt.h>
+#include <sys/bplist.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+struct objset;
+struct dsl_dir;
+struct dsl_dataset;
+struct dsl_pool;
+struct dmu_tx;
+
+/*
+ * All members of this structure must be uint64_t, for byteswap
+ * purposes.
+ */
+typedef struct dsl_scan_phys {
+	uint64_t scn_func; /* pool_scan_func_t */
+	uint64_t scn_state; /* dsl_scan_state_t */
+	uint64_t scn_queue_obj;
+	uint64_t scn_min_txg;
+	uint64_t scn_max_txg;
+	uint64_t scn_cur_min_txg;
+	uint64_t scn_cur_max_txg;
+	uint64_t scn_start_time;
+	uint64_t scn_end_time;
+	uint64_t scn_to_examine; /* total bytes to be scanned */
+	uint64_t scn_examined; /* bytes scanned so far */
+	uint64_t scn_to_process;
+	uint64_t scn_processed;
+	uint64_t scn_errors;	/* scan I/O error count */
+	uint64_t scn_ddt_class_max;
+	ddt_bookmark_t scn_ddt_bookmark;
+	zbookmark_t scn_bookmark;
+	uint64_t scn_flags; /* dsl_scan_flags_t */
+} dsl_scan_phys_t;
+
+#define	SCAN_PHYS_NUMINTS (sizeof (dsl_scan_phys_t) / sizeof (uint64_t))
+
+typedef enum dsl_scan_flags {
+	DSF_VISIT_DS_AGAIN = 1<<0,
+} dsl_scan_flags_t;
+
+typedef struct dsl_scan {
+	struct dsl_pool *scn_dp;
+
+	boolean_t scn_pausing;
+	uint64_t scn_restart_txg;
+	uint64_t scn_sync_start_time;
+	zio_t *scn_prefetch_zio_root;
+
+	/* for debugging / information */
+	uint64_t scn_visited_this_txg;
+
+	dsl_scan_phys_t scn_phys;
+} dsl_scan_t;
+
+int dsl_scan_init(struct dsl_pool *dp, uint64_t txg);
+void dsl_scan_fini(struct dsl_pool *dp);
+void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
+int dsl_scan_cancel(struct dsl_pool *);
+int dsl_scan(struct dsl_pool *, pool_scan_func_t);
+void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
+boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
+boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
+void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
+    ddt_entry_t *dde, dmu_tx_t *tx);
+void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
+void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
+void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
+    struct dmu_tx *tx);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_DSL_SCAN_H */

--- a/usr/src/uts/common/fs/zfs/sys/dsl_synctask.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_synctask.h	Mon May 03 14:54:08 2010 -0700
@@ -19,15 +19,12 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_SYNCTASK_H
 #define	_SYS_DSL_SYNCTASK_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/txg.h>
 #include <sys/zfs_context.h>
 
@@ -38,7 +35,7 @@
 struct dsl_pool;
 
 typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
-typedef void (dsl_syncfunc_t)(void *, void *, cred_t *, dmu_tx_t *);
+typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
 
 typedef struct dsl_sync_task {
 	list_node_t dst_node;
@@ -53,7 +50,6 @@
 	txg_node_t dstg_node;
 	list_t dstg_tasks;
 	struct dsl_pool *dstg_pool;
-	cred_t *dstg_cr;
 	uint64_t dstg_txg;
 	int dstg_err;
 	int dstg_space;

--- a/usr/src/uts/common/fs/zfs/sys/metaslab.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/metaslab.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_METASLAB_H

--- a/usr/src/uts/common/fs/zfs/sys/refcount.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/refcount.h	Mon May 03 14:54:08 2010 -0700
@@ -19,15 +19,12 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_REFCOUNT_H
 #define	_SYS_REFCOUNT_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/inttypes.h>
 #include <sys/list.h>
 #include <sys/zfs_context.h>
@@ -91,6 +88,11 @@
 	atomic_add_64_nv(&(rc)->rc_count, number)
 #define	refcount_remove_many(rc, number, holder) \
 	atomic_add_64_nv(&(rc)->rc_count, -number)
+#define	refcount_transfer(dst, src) { \
+	uint64_t __tmp = (src)->rc_count; \
+	atomic_add_64(&(src)->rc_count, -__tmp); \
+	atomic_add_64(&(dst)->rc_count, __tmp); \
+}
 
 #define	refcount_init()
 #define	refcount_fini()

--- a/usr/src/uts/common/fs/zfs/sys/spa.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_SPA_H
@@ -262,7 +261,7 @@
 
 #define	BP_GET_UCSIZE(bp) \
 	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
-	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
+	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
 
 #define	BP_GET_NDVAS(bp)	\
 	(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
@@ -432,6 +431,8 @@
 extern void spa_async_resume(spa_t *spa);
 extern spa_t *spa_inject_addref(char *pool);
 extern void spa_inject_delref(spa_t *spa);
+extern void spa_scan_stat_init(spa_t *spa);
+extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
 
 #define	SPA_ASYNC_CONFIG_UPDATE	0x01
 #define	SPA_ASYNC_REMOVE	0x02
@@ -439,6 +440,14 @@
 #define	SPA_ASYNC_RESILVER_DONE	0x08
 #define	SPA_ASYNC_RESILVER	0x10
 #define	SPA_ASYNC_AUTOEXPAND	0x20
+#define	SPA_ASYNC_REMOVE_DONE	0x40
+#define	SPA_ASYNC_REMOVE_STOP	0x80
+
+/*
+ * Controls the behavior of spa_vdev_remove().
+ */
+#define	SPA_REMOVE_UNSPARE	0x01
+#define	SPA_REMOVE_DONE		0x02
 
 /* device manipulation */
 extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
@@ -447,6 +456,7 @@
 extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
     int replace_done);
 extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
+extern boolean_t spa_vdev_remove_active(spa_t *spa);
 extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
 extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
 extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
@@ -465,14 +475,19 @@
 extern void spa_l2cache_activate(vdev_t *vd);
 extern void spa_l2cache_drop(spa_t *spa);
 
-/* scrubbing */
-extern int spa_scrub(spa_t *spa, pool_scrub_type_t type);
+/* scanning */
+extern int spa_scan(spa_t *spa, pool_scan_func_t func);
+extern int spa_scan_stop(spa_t *spa);
 
 /* spa syncing */
 extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
 extern void spa_sync_allpools(void);
 
-#define	SYNC_PASS_DEFERRED_FREE	1	/* defer frees after this pass */
+/*
+ * DEFERRED_FREE must be large enough that regular blocks are not
+ * deferred.  XXX so can't we change it back to 1?
+ */
+#define	SYNC_PASS_DEFERRED_FREE	2	/* defer frees after this pass */
 #define	SYNC_PASS_DONT_COMPRESS	4	/* don't compress after this pass */
 #define	SYNC_PASS_REWRITE	1	/* rewrite new bps after this pass */
 
@@ -577,6 +592,7 @@
 extern metaslab_class_t *spa_normal_class(spa_t *spa);
 extern metaslab_class_t *spa_log_class(spa_t *spa);
 extern int spa_max_replication(spa_t *spa);
+extern int spa_prev_software_version(spa_t *spa);
 extern int spa_busy(void);
 extern uint8_t spa_get_failmode(spa_t *spa);
 extern boolean_t spa_suspended(spa_t *spa);
@@ -632,8 +648,8 @@
     char *his_buf);
 extern int spa_history_log(spa_t *spa, const char *his_buf,
     history_log_type_t what);
-extern void spa_history_internal_log(history_internal_events_t event,
-    spa_t *spa, dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
+extern void spa_history_log_internal(history_internal_events_t event,
+    spa_t *spa, dmu_tx_t *tx, const char *fmt, ...);
 extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
 
 /* error handling */

--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_SPA_IMPL_H
@@ -150,13 +149,14 @@
 	kmutex_t	spa_scrub_lock;		/* resilver/scrub lock */
 	uint64_t	spa_scrub_inflight;	/* in-flight scrub I/Os */
 	uint64_t	spa_scrub_maxinflight;	/* max in-flight scrub I/Os */
-	uint64_t	spa_scrub_errors;	/* scrub I/O error count */
 	kcondvar_t	spa_scrub_io_cv;	/* scrub I/O completion */
 	uint8_t		spa_scrub_active;	/* active or suspended? */
 	uint8_t		spa_scrub_type;		/* type of scrub we're doing */
 	uint8_t		spa_scrub_finished;	/* indicator to rotate logs */
 	uint8_t		spa_scrub_started;	/* started since last boot */
 	uint8_t		spa_scrub_reopen;	/* scrub doing vdev_reopen */
+	uint64_t	spa_scan_pass_start;	/* start time per pass/reboot */
+	uint64_t	spa_scan_pass_exam;	/* examined bytes per pass */
 	kmutex_t	spa_async_lock;		/* protect async state */
 	kthread_t	*spa_async_thread;	/* thread doing async task */
 	int		spa_async_suspended;	/* async tasks suspended */
@@ -212,7 +212,8 @@
 	uint64_t	spa_did;		/* if procp != p0, did of t1 */
 	boolean_t	spa_autoreplace;	/* autoreplace set in open */
 	int		spa_vdev_locks;		/* locks grabbed */
-
+	uint64_t	spa_creation_version;	/* version at pool creation */
+	uint64_t	spa_prev_software_version;
 	/*
 	 * spa_refcnt & spa_config_lock must be the last elements
 	 * because refcount_t changes size based on compilation options.

--- a/usr/src/uts/common/fs/zfs/sys/uberblock_impl.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/uberblock_impl.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_UBERBLOCK_IMPL_H
@@ -52,6 +51,9 @@
 	uint64_t	ub_guid_sum;	/* sum of all vdev guids	*/
 	uint64_t	ub_timestamp;	/* UTC time of last sync	*/
 	blkptr_t	ub_rootbp;	/* MOS objset_phys_t		*/
+
+	/* highest SPA_VERSION supported by software that wrote this txg */
+	uint64_t	ub_software_version;
 };
 
 #ifdef	__cplusplus

--- a/usr/src/uts/common/fs/zfs/sys/vdev.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_VDEV_H
@@ -83,8 +82,7 @@
 extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
 extern void vdev_clear_stats(vdev_t *vd);
 extern void vdev_stat_update(zio_t *zio, uint64_t psize);
-extern void vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type,
-    boolean_t complete);
+extern void vdev_scan_stat_init(vdev_t *vd);
 extern void vdev_propagate_state(vdev_t *vd);
 extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
     vdev_aux_t aux);
@@ -126,9 +124,15 @@
 extern void vdev_state_dirty(vdev_t *vd);
 extern void vdev_state_clean(vdev_t *vd);
 
+typedef enum vdev_config_flag {
+	VDEV_CONFIG_SPARE = 1 << 0,
+	VDEV_CONFIG_L2CACHE = 1 << 1,
+	VDEV_CONFIG_REMOVING = 1 << 2
+} vdev_config_flag_t;
+
 extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config);
 extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
-    boolean_t getstats, boolean_t isspare, boolean_t isl2cache);
+    boolean_t getstats, vdev_config_flag_t flags);
 
 /*
  * Label routines

--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Mon May 03 14:54:08 2010 -0700
@@ -151,7 +151,7 @@
 	txg_node_t	vdev_txg_node;	/* per-txg dirty vdev linkage	*/
 	boolean_t	vdev_remove_wanted; /* async remove wanted?	*/
 	boolean_t	vdev_probe_wanted; /* async probe wanted?	*/
-	boolean_t	vdev_removing;	/* device is being removed?	*/
+	uint64_t	vdev_removing;	/* device is being removed?	*/
 	list_node_t	vdev_config_dirty_node; /* config dirty list	*/
 	list_node_t	vdev_state_dirty_node; /* state dirty list	*/
 	uint64_t	vdev_deflate_ratio; /* deflation ratio (x512)	*/

--- a/usr/src/uts/common/fs/zfs/sys/zap.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zap.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_ZAP_H
@@ -259,7 +258,6 @@
  */
 int zap_count(objset_t *ds, uint64_t zapobj, uint64_t *count);
 
-
 /*
  * Returns (in name) the name of the entry whose (value & mask)
  * (za_first_integer) is value, or ENOENT if not found.  The string
@@ -276,6 +274,14 @@
  */
 int zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx);
 
+/* Same as zap_join, but set the values to 'value'. */
+int zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
+    uint64_t value, dmu_tx_t *tx);
+
+/* Same as zap_join, but add together any duplicated entries. */
+int zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
+    dmu_tx_t *tx);
+
 /*
  * Manipulate entries where the name + value are the "same" (the name is
  * a stringified version of the value).
@@ -286,6 +292,21 @@
 int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
     dmu_tx_t *tx);
 
+/* Here the key is an int and the value is a different int. */
+int zap_add_int_key(objset_t *os, uint64_t obj,
+    uint64_t key, uint64_t value, dmu_tx_t *tx);
+int zap_lookup_int_key(objset_t *os, uint64_t obj,
+    uint64_t key, uint64_t *valuep);
+
+/*
+ * They name is a stringified version of key; increment its value by
+ * delta.  Zero values will be zap_remove()-ed.
+ */
+int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
+    dmu_tx_t *tx);
+int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
+    dmu_tx_t *tx);
+
 struct zap;
 struct zap_leaf;
 typedef struct zap_cursor {

--- a/usr/src/uts/common/fs/zfs/sys/zap_impl.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zap_impl.h	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_ZAP_IMPL_H
@@ -67,9 +66,12 @@
 	avl_node_t mze_node;
 	int mze_chunkid;
 	uint64_t mze_hash;
-	mzap_ent_phys_t mze_phys;
+	uint32_t mze_cd; /* copy from mze_phys->mze_cd */
 } mzap_ent_t;
 
+#define	MZE_PHYS(zap, mze) \
+	(&(zap)->zap_m.zap_phys->mz_chunk[(mze)->mze_chunkid])
+
 /*
  * The (fat) zap is stored in one object. It is an array of
  * 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:

--- a/usr/src/uts/common/fs/zfs/sys/zap_leaf.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zap_leaf.h	Mon May 03 14:54:08 2010 -0700
@@ -19,13 +19,14 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_ZAP_LEAF_H
 #define	_SYS_ZAP_LEAF_H
 
+#include <sys/zap.h>
+
 #ifdef	__cplusplus
 extern "C" {
 #endif

--- a/usr/src/uts/common/fs/zfs/sys/zfs_debug.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_debug.h	Mon May 03 14:54:08 2010 -0700
@@ -19,15 +19,12 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_ZFS_DEBUG_H
 #define	_SYS_ZFS_DEBUG_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -68,6 +65,16 @@
 
 extern void zfs_panic_recover(const char *fmt, ...);
 
+typedef struct zfs_dbgmsg {
+	list_node_t zdm_node;
+	time_t zdm_timestamp;
+	char zdm_msg[1]; /* variable length allocation */
+} zfs_dbgmsg_t;
+
+extern void zfs_dbgmsg_init(void);
+extern void zfs_dbgmsg_fini(void);
+extern void zfs_dbgmsg(const char *fmt, ...);
+
 #ifdef	__cplusplus
 }
 #endif

--- a/usr/src/uts/common/fs/zfs/txg.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/txg.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -28,6 +27,7 @@
 #include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/dsl_pool.h>
+#include <sys/dsl_scan.h>
 #include <sys/callb.h>
 
 /*
@@ -136,7 +136,7 @@
 	 * 32-bit x86.  This is due in part to nested pools and
 	 * scrub_visitbp() recursion.
 	 */
-	tx->tx_sync_thread = thread_create(NULL, 12<<10, txg_sync_thread,
+	tx->tx_sync_thread = thread_create(NULL, 32<<10, txg_sync_thread,
 	    dp, 0, &p0, TS_RUN, minclsyspri);
 
 	mutex_exit(&tx->tx_sync_lock);
@@ -366,12 +366,12 @@
 		uint64_t txg;
 
 		/*
-		 * We sync when we're scrubbing, there's someone waiting
+		 * We sync when we're scanning, there's someone waiting
 		 * on us, or the quiesce thread has handed off a txg to
 		 * us, or we have reached our timeout.
 		 */
 		timer = (delta >= timeout ? 0 : timeout - delta);
-		while ((dp->dp_scrub_func == SCRUB_FUNC_NONE ||
+		while ((dp->dp_scan->scn_phys.scn_state != DSS_SCANNING ||
 		    spa_load_state(spa) != SPA_LOAD_NONE ||
 		    spa_shutting_down(spa)) &&
 		    !tx->tx_exiting && timer > 0 &&

--- a/usr/src/uts/common/fs/zfs/uberblock.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/uberblock.c	Mon May 03 14:54:08 2010 -0700
@@ -19,12 +19,9 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/zfs_context.h>
 #include <sys/uberblock_impl.h>
 #include <sys/vdev_impl.h>
@@ -58,6 +55,7 @@
 	ub->ub_txg = txg;
 	ub->ub_guid_sum = rvd->vdev_guid_sum;
 	ub->ub_timestamp = gethrestime_sec();
+	ub->ub_software_version = SPA_VERSION;
 
 	return (ub->ub_rootbp.blk_birth == txg);
 }

--- a/usr/src/uts/common/fs/zfs/vdev.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Mon May 03 14:54:08 2010 -0700
@@ -39,6 +39,7 @@
 #include <sys/fs/zfs.h>
 #include <sys/arc.h>
 #include <sys/zil.h>
+#include <sys/dsl_scan.h>
 
 /*
  * Virtual device management.
@@ -486,6 +487,8 @@
 		    &vd->vdev_ms_shift);
 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE,
 		    &vd->vdev_asize);
+		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVING,
+		    &vd->vdev_removing);
 	}
 
 	if (parent && !parent->vdev_parent) {
@@ -860,7 +863,12 @@
 	if (txg == 0)
 		spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER);
 
-	if (oldc == 0)
+	/*
+	 * If the vdev is being removed we don't activate
+	 * the metaslabs since we want to ensure that no new
+	 * allocations are performed on this device.
+	 */
+	if (oldc == 0 && !vd->vdev_removing)
 		metaslab_group_activate(vd->vdev_mg);
 
 	if (txg == 0)
@@ -1648,9 +1656,12 @@
 		return;
 
 	if (vd->vdev_ops->vdev_op_leaf) {
+		dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
+
 		mutex_enter(&vd->vdev_dtl_lock);
 		if (scrub_txg != 0 &&
-		    (spa->spa_scrub_started || spa->spa_scrub_errors == 0)) {
+		    (spa->spa_scrub_started ||
+		    (scn && scn->scn_phys.scn_errors == 0))) {
 			/*
 			 * We completed a scrub up to scrub_txg.  If we
 			 * did it without rebooting, then the scrub dtl
@@ -2029,7 +2040,10 @@
 		dmu_tx_commit(tx);
 	}
 
-	if (vd->vdev_removing)
+	/*
+	 * Remove the metadata associated with this vdev once it's empty.
+	 */
+	if (vd->vdev_stat.vs_alloc == 0 && vd->vdev_removing)
 		vdev_remove(vd, txg);
 
 	while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) {
@@ -2403,7 +2417,7 @@
 	 * we're asking two separate questions about it.
 	 */
 	return (!(state < VDEV_STATE_DEGRADED && state != VDEV_STATE_CLOSED) &&
-	    !vd->vdev_cant_write && !vd->vdev_ishole && !vd->vdev_removing);
+	    !vd->vdev_cant_write && !vd->vdev_ishole);
 }
 
 boolean_t
@@ -2433,7 +2447,6 @@
 
 	mutex_enter(&vd->vdev_stat_lock);
 	bcopy(&vd->vdev_stat, vs, sizeof (*vs));
-	vs->vs_scrub_errors = vd->vdev_spa->spa_scrub_errors;
 	vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
 	vs->vs_state = vd->vdev_state;
 	vs->vs_rsize = vdev_get_min_asize(vd);
@@ -2455,7 +2468,7 @@
 				vs->vs_ops[t] += cvs->vs_ops[t];
 				vs->vs_bytes[t] += cvs->vs_bytes[t];
 			}
-			vs->vs_scrub_examined += cvs->vs_scrub_examined;
+			cvs->vs_scan_removing = cvd->vdev_removing;
 			mutex_exit(&vd->vdev_stat_lock);
 		}
 	}
@@ -2472,6 +2485,19 @@
 }
 
 void
+vdev_scan_stat_init(vdev_t *vd)
+{
+	vdev_stat_t *vs = &vd->vdev_stat;
+
+	for (int c = 0; c < vd->vdev_children; c++)
+		vdev_scan_stat_init(vd->vdev_child[c]);
+
+	mutex_enter(&vd->vdev_stat_lock);
+	vs->vs_scan_processed = 0;
+	mutex_exit(&vd->vdev_stat_lock);
+}
+
+void
 vdev_stat_update(zio_t *zio, uint64_t psize)
 {
 	spa_t *spa = zio->io_spa;
@@ -2515,8 +2541,17 @@
 		mutex_enter(&vd->vdev_stat_lock);
 
 		if (flags & ZIO_FLAG_IO_REPAIR) {
-			if (flags & ZIO_FLAG_SCRUB_THREAD)
-				vs->vs_scrub_repaired += psize;
+			if (flags & ZIO_FLAG_SCRUB_THREAD) {
+				dsl_scan_phys_t *scn_phys =
+				    &spa->spa_dsl_pool->dp_scan->scn_phys;
+				uint64_t *processed = &scn_phys->scn_processed;
+
+				/* XXX cleanup? */
+				if (vd->vdev_ops->vdev_op_leaf)
+					atomic_add_64(processed, psize);
+				vs->vs_scan_processed += psize;
+			}
+
 			if (flags & ZIO_FLAG_SELF_HEAL)
 				vs->vs_self_healed += psize;
 		}
@@ -2602,35 +2637,6 @@
 	}
 }
 
-void
-vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete)
-{
-	vdev_stat_t *vs = &vd->vdev_stat;
-
-	for (int c = 0; c < vd->vdev_children; c++)
-		vdev_scrub_stat_update(vd->vdev_child[c], type, complete);
-
-	mutex_enter(&vd->vdev_stat_lock);
-
-	if (type == POOL_SCRUB_NONE) {
-		/*
-		 * Update completion and end time.  Leave everything else alone
-		 * so we can report what happened during the previous scrub.
-		 */
-		vs->vs_scrub_complete = complete;
-		vs->vs_scrub_end = gethrestime_sec();
-	} else {
-		vs->vs_scrub_type = type;
-		vs->vs_scrub_complete = 0;
-		vs->vs_scrub_examined = 0;
-		vs->vs_scrub_repaired = 0;
-		vs->vs_scrub_start = gethrestime_sec();
-		vs->vs_scrub_end = 0;
-	}
-
-	mutex_exit(&vd->vdev_stat_lock);
-}
-
 /*
  * Update the in-core space usage stats for this vdev, its metaslab class,
  * and the root vdev.
@@ -2730,7 +2736,7 @@
 		 * sketchy, but it will work.
 		 */
 		nvlist_free(aux[c]);
-		aux[c] = vdev_config_generate(spa, vd, B_TRUE, B_FALSE, B_TRUE);
+		aux[c] = vdev_config_generate(spa, vd, B_TRUE, 0);
 
 		return;
 	}

--- a/usr/src/uts/common/fs/zfs/vdev_label.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/vdev_label.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -141,6 +140,7 @@
 #include <sys/uberblock_impl.h>
 #include <sys/metaslab.h>
 #include <sys/zio.h>
+#include <sys/dsl_scan.h>
 #include <sys/fs/zfs.h>
 
 /*
@@ -208,7 +208,7 @@
  */
 nvlist_t *
 vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
-    boolean_t isspare, boolean_t isl2cache)
+    vdev_config_flag_t flags)
 {
 	nvlist_t *nv = NULL;
 
@@ -216,7 +216,7 @@
 
 	VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
 	    vd->vdev_ops->vdev_op_type) == 0);
-	if (!isspare && !isl2cache)
+	if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id)
 		    == 0);
 	VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0);
@@ -270,7 +270,8 @@
 	if (vd->vdev_isspare)
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0);
 
-	if (!isspare && !isl2cache && vd == vd->vdev_top) {
+	if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
+	    vd == vd->vdev_top) {
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
 		    vd->vdev_ms_array) == 0);
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
@@ -281,6 +282,9 @@
 		    vd->vdev_asize) == 0);
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG,
 		    vd->vdev_islog) == 0);
+		if (vd->vdev_removing)
+			VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING,
+			    vd->vdev_removing) == 0);
 	}
 
 	if (vd->vdev_dtl_smo.smo_object != 0)
@@ -293,28 +297,52 @@
 
 	if (getstats) {
 		vdev_stat_t vs;
+		pool_scan_stat_t ps;
+
 		vdev_get_stats(vd, &vs);
-		VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_STATS,
+		VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 		    (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)) == 0);
+
+		/* provide either current or previous scan information */
+		if (spa_scan_get_stats(spa, &ps) == 0) {
+			VERIFY(nvlist_add_uint64_array(nv,
+			    ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps,
+			    sizeof (pool_scan_stat_t) / sizeof (uint64_t))
+			    == 0);
+		}
 	}
 
 	if (!vd->vdev_ops->vdev_op_leaf) {
 		nvlist_t **child;
-		int c;
+		int c, idx;
 
 		ASSERT(!vd->vdev_ishole);
 
 		child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *),
 		    KM_SLEEP);
 
-		for (c = 0; c < vd->vdev_children; c++)
-			child[c] = vdev_config_generate(spa, vd->vdev_child[c],
-			    getstats, isspare, isl2cache);
+		for (c = 0, idx = 0; c < vd->vdev_children; c++) {
+			vdev_t *cvd = vd->vdev_child[c];
 
-		VERIFY(nvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
-		    child, vd->vdev_children) == 0);
+			/*
+			 * If we're generating an nvlist of removing
+			 * vdevs then skip over any device which is
+			 * not being removed.
+			 */
+			if ((flags & VDEV_CONFIG_REMOVING) &&
+			    !cvd->vdev_removing)
+				continue;
 
-		for (c = 0; c < vd->vdev_children; c++)
+			child[idx++] = vdev_config_generate(spa, cvd,
+			    getstats, flags);
+		}
+
+		if (idx) {
+			VERIFY(nvlist_add_nvlist_array(nv,
+			    ZPOOL_CONFIG_CHILDREN, child, idx) == 0);
+		}
+
+		for (c = 0; c < idx; c++)
 			nvlist_free(child[c]);
 
 		kmem_free(child, vd->vdev_children * sizeof (nvlist_t *));
@@ -375,12 +403,11 @@
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	uint64_t *array;
-	uint_t idx;
+	uint_t c, idx;
 
 	array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP);
 
-	idx = 0;
-	for (int c = 0; c < rvd->vdev_children; c++) {
+	for (c = 0, idx = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 
 		if (tvd->vdev_ishole)

--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c	Mon May 03 14:54:08 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -1604,6 +1603,7 @@
 	return (ZIO_PIPELINE_CONTINUE);
 }
 
+
 /*
  * Report a checksum error for a child of a RAID-Z device.
  */

--- a/usr/src/uts/common/fs/zfs/zap.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zap.c	Mon May 03 14:54:08 2010 -0700
@@ -978,6 +978,56 @@
 }
 
 int
+zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
+    uint64_t value, dmu_tx_t *tx)
+{
+	zap_cursor_t zc;
+	zap_attribute_t za;
+	int err;
+
+	for (zap_cursor_init(&zc, os, fromobj);
+	    zap_cursor_retrieve(&zc, &za) == 0;
+	    (void) zap_cursor_advance(&zc)) {
+		if (za.za_integer_length != 8 || za.za_num_integers != 1)
+			return (EINVAL);
+		err = zap_add(os, intoobj, za.za_name,
+		    8, 1, &value, tx);
+		if (err)
+			return (err);
+	}
+	zap_cursor_fini(&zc);
+	return (0);
+}
+
+int
+zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
+    dmu_tx_t *tx)
+{
+	zap_cursor_t zc;
+	zap_attribute_t za;
+	int err;
+
+	for (zap_cursor_init(&zc, os, fromobj);
+	    zap_cursor_retrieve(&zc, &za) == 0;
+	    (void) zap_cursor_advance(&zc)) {
+		uint64_t delta = 0;
+
+		if (za.za_integer_length != 8 || za.za_num_integers != 1)
+			return (EINVAL);
+
+		err = zap_lookup(os, intoobj, za.za_name, 8, 1, &delta);
+		if (err != 0 && err != ENOENT)
+			return (err);
+		delta += za.za_first_integer;
+		err = zap_update(os, intoobj, za.za_name, 8, 1, &delta, tx);
+		if (err)
+			return (err);
+	}
+	zap_cursor_fini(&zc);
+	return (0);
+}
+
+int
 zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
 {
 	char name[20];
@@ -1005,17 +1055,34 @@
 }
 
 int
-zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
+zap_add_int_key(objset_t *os, uint64_t obj,
+    uint64_t key, uint64_t value, dmu_tx_t *tx)
+{
+	char name[20];
+
+	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
+	return (zap_add(os, obj, name, 8, 1, &value, tx));
+}
+
+int
+zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep)
+{
+	char name[20];
+
+	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
+	return (zap_lookup(os, obj, name, 8, 1, valuep));
+}
+
+int
+zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
     dmu_tx_t *tx)
 {
-	char name[20];
 	uint64_t value = 0;
 	int err;
 
 	if (delta == 0)
 		return (0);
 
-	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
 	err = zap_lookup(os, obj, name, 8, 1, &value);
 	if (err != 0 && err != ENOENT)
 		return (err);
@@ -1027,6 +1094,15 @@
 	return (err);
 }
 
+int
+zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
+    dmu_tx_t *tx)
+{
+	char name[20];
+
+	(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
+	return (zap_increment(os, obj, name, delta, tx));
+}
 
 /*
  * Routines for iterating over the attributes.
@@ -1101,7 +1177,6 @@
 	return (err);
 }
 
-
 static void
 zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs)
 {

--- a/usr/src/uts/common/fs/zfs/zap_leaf.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zap_leaf.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -37,6 +36,7 @@
 #include <sys/zap.h>
 #include <sys/zap_impl.h>
 #include <sys/zap_leaf.h>
+#include <sys/arc.h>
 
 static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
 
@@ -538,14 +538,6 @@
 	if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks)
 		return (EAGAIN);
 
-	/*
-	 * We should search other chained leaves (via
-	 * zap_entry_remove,create?) otherwise returning EAGAIN will
-	 * just send us into an infinite loop if we have to chain
-	 * another leaf block, rather than being able to split this
-	 * block.
-	 */
-
 	zap_leaf_array_free(l, &le->le_value_chunk);
 	le->le_value_chunk =
 	    zap_leaf_array_create(l, buf, integer_size, num_integers);

--- a/usr/src/uts/common/fs/zfs/zap_micro.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zap_micro.c	Mon May 03 14:54:08 2010 -0700
@@ -31,6 +31,7 @@
 #include <sys/zap_impl.h>
 #include <sys/zap_leaf.h>
 #include <sys/avl.h>
+#include <sys/arc.h>
 
 #ifdef _KERNEL
 #include <sys/sunddi.h>
@@ -254,26 +255,26 @@
 		return (+1);
 	if (mze1->mze_hash < mze2->mze_hash)
 		return (-1);
-	if (mze1->mze_phys.mze_cd > mze2->mze_phys.mze_cd)
+	if (mze1->mze_cd > mze2->mze_cd)
 		return (+1);
-	if (mze1->mze_phys.mze_cd < mze2->mze_phys.mze_cd)
+	if (mze1->mze_cd < mze2->mze_cd)
 		return (-1);
 	return (0);
 }
 
 static void
-mze_insert(zap_t *zap, int chunkid, uint64_t hash, mzap_ent_phys_t *mzep)
+mze_insert(zap_t *zap, int chunkid, uint64_t hash)
 {
 	mzap_ent_t *mze;
 
 	ASSERT(zap->zap_ismicro);
 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
-	ASSERT(mzep->mze_cd < zap_maxcd(zap));
 
 	mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
 	mze->mze_chunkid = chunkid;
 	mze->mze_hash = hash;
-	mze->mze_phys = *mzep;
+	mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
+	ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
 	avl_add(&zap->zap_m.zap_avl, mze);
 }
 
@@ -289,14 +290,15 @@
 	ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
 
 	mze_tofind.mze_hash = zn->zn_hash;
-	mze_tofind.mze_phys.mze_cd = 0;
+	mze_tofind.mze_cd = 0;
 
 again:
 	mze = avl_find(avl, &mze_tofind, &idx);
 	if (mze == NULL)
 		mze = avl_nearest(avl, idx, AVL_AFTER);
 	for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
-		if (zap_match(zn, mze->mze_phys.mze_name))
+		ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
+		if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
 			return (mze);
 	}
 	if (zn->zn_matchtype == MT_BEST) {
@@ -319,12 +321,12 @@
 	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
 
 	mze_tofind.mze_hash = hash;
-	mze_tofind.mze_phys.mze_cd = 0;
+	mze_tofind.mze_cd = 0;
 
 	cd = 0;
 	for (mze = avl_find(avl, &mze_tofind, &idx);
 	    mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
-		if (mze->mze_phys.mze_cd != cd)
+		if (mze->mze_cd != cd)
 			break;
 		cd++;
 	}
@@ -408,7 +410,7 @@
 				zap->zap_m.zap_num_entries++;
 				zn = zap_name_alloc(zap, mze->mze_name,
 				    MT_EXACT);
-				mze_insert(zap, i, zn->zn_hash, mze);
+				mze_insert(zap, i, zn->zn_hash);
 				zap_name_free(zn);
 			}
 		}
@@ -727,11 +729,11 @@
 	    other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
 
 		if (zn == NULL) {
-			zn = zap_name_alloc(zap, mze->mze_phys.mze_name,
+			zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
 			    MT_FIRST);
 			allocdzn = B_TRUE;
 		}
-		if (zap_match(zn, other->mze_phys.mze_name)) {
+		if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
 			if (allocdzn)
 				zap_name_free(zn);
 			return (B_TRUE);
@@ -793,9 +795,10 @@
 			} else if (integer_size != 8) {
 				err = EINVAL;
 			} else {
-				*(uint64_t *)buf = mze->mze_phys.mze_value;
+				*(uint64_t *)buf =
+				    MZE_PHYS(zap, mze)->mze_value;
 				(void) strlcpy(realname,
-				    mze->mze_phys.mze_name, rn_len);
+				    MZE_PHYS(zap, mze)->mze_name, rn_len);
 				if (ncp) {
 					*ncp = mzap_normalization_conflict(zap,
 					    zn, mze);
@@ -932,7 +935,7 @@
 			if (zap->zap_m.zap_alloc_next ==
 			    zap->zap_m.zap_num_chunks)
 				zap->zap_m.zap_alloc_next = 0;
-			mze_insert(zap, i, zn->zn_hash, mze);
+			mze_insert(zap, i, zn->zn_hash);
 			return;
 		}
 	}
@@ -1017,10 +1020,20 @@
 {
 	zap_t *zap;
 	mzap_ent_t *mze;
+	uint64_t oldval;
 	const uint64_t *intval = val;
 	zap_name_t *zn;
 	int err;
 
+#ifdef ZFS_DEBUG
+	/*
+	 * If there is an old value, it shouldn't change across the
+	 * lockdir (eg, due to bprewrite's xlation).
+	 */
+	if (integer_size == 8 && num_integers == 1)
+		(void) zap_lookup(os, zapobj, name, 8, 1, &oldval);
+#endif
+
 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
 	if (err)
 		return (err);
@@ -1044,9 +1057,8 @@
 	} else {
 		mze = mze_find(zn);
 		if (mze != NULL) {
-			mze->mze_phys.mze_value = *intval;
-			zap->zap_m.zap_phys->mz_chunk
-			    [mze->mze_chunkid].mze_value = *intval;
+			ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval);
+			MZE_PHYS(zap, mze)->mze_value = *intval;
 		} else {
 			mzap_addent(zn, *intval);
 		}
@@ -1245,7 +1257,7 @@
 		err = ENOENT;
 
 		mze_tofind.mze_hash = zc->zc_hash;
-		mze_tofind.mze_phys.mze_cd = zc->zc_cd;
+		mze_tofind.mze_cd = zc->zc_cd;
 
 		mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
 		if (mze == NULL) {
@@ -1253,18 +1265,16 @@
 			    idx, AVL_AFTER);
 		}
 		if (mze) {
-			ASSERT(0 == bcmp(&mze->mze_phys,
-			    &zc->zc_zap->zap_m.zap_phys->mz_chunk
-			    [mze->mze_chunkid], sizeof (mze->mze_phys)));
-
+			mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
+			ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
 			za->za_normalization_conflict =
 			    mzap_normalization_conflict(zc->zc_zap, NULL, mze);
 			za->za_integer_length = 8;
 			za->za_num_integers = 1;
-			za->za_first_integer = mze->mze_phys.mze_value;
-			(void) strcpy(za->za_name, mze->mze_phys.mze_name);
+			za->za_first_integer = mzep->mze_value;
+			(void) strcpy(za->za_name, mzep->mze_name);
 			zc->zc_hash = mze->mze_hash;
-			zc->zc_cd = mze->mze_phys.mze_cd;
+			zc->zc_cd = mze->mze_cd;
 			err = 0;
 		} else {
 			zc->zc_hash = -1ULL;
@@ -1313,7 +1323,7 @@
 			goto out;
 		}
 		zc->zc_hash = mze->mze_hash;
-		zc->zc_cd = mze->mze_phys.mze_cd;
+		zc->zc_cd = mze->mze_cd;
 	}
 
 out:

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/zfs_debug.c	Mon May 03 14:54:08 2010 -0700
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+
+list_t zfs_dbgmsgs;
+int zfs_dbgmsg_size;
+kmutex_t zfs_dbgmsgs_lock;
+int zfs_dbgmsg_maxsize = 1<<20; /* 1MB */
+
+void
+zfs_dbgmsg_init(void)
+{
+	list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
+	    offsetof(zfs_dbgmsg_t, zdm_node));
+	mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+void
+zfs_dbgmsg_fini(void)
+{
+	zfs_dbgmsg_t *zdm;
+
+	while ((zdm = list_remove_head(&zfs_dbgmsgs)) != NULL) {
+		int size = sizeof (zfs_dbgmsg_t) + strlen(zdm->zdm_msg);
+		kmem_free(zdm, size);
+		zfs_dbgmsg_size -= size;
+	}
+	mutex_destroy(&zfs_dbgmsgs_lock);
+	ASSERT3U(zfs_dbgmsg_size, ==, 0);
+}
+
+/*
+ * Print these messages by running:
+ * 	echo ::zfs_dbgmsg | mdb -k
+ *
+ * Monitor these messages by running:
+ * 	dtrace -q -n 'zfs-dbgmsg{printf("%s\n", stringof(arg0))}'
+ */
+void
+zfs_dbgmsg(const char *fmt, ...)
+{
+	int size;
+	va_list adx;
+	zfs_dbgmsg_t *zdm;
+
+	va_start(adx, fmt);
+	size = vsnprintf(NULL, 0, fmt, adx);
+	va_end(adx);
+
+	/*
+	 * There is one byte of string in sizeof (zfs_dbgmsg_t), used
+	 * for the terminating null.
+	 */
+	zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_SLEEP);
+	zdm->zdm_timestamp = gethrestime_sec();
+
+	va_start(adx, fmt);
+	(void) vsnprintf(zdm->zdm_msg, size + 1, fmt, adx);
+	va_end(adx);
+
+	DTRACE_PROBE1(zfs__dbgmsg, char *, zdm->zdm_msg);
+
+	mutex_enter(&zfs_dbgmsgs_lock);
+	list_insert_tail(&zfs_dbgmsgs, zdm);
+	zfs_dbgmsg_size += sizeof (zfs_dbgmsg_t) + size;
+	while (zfs_dbgmsg_size > zfs_dbgmsg_maxsize) {
+		zdm = list_remove_head(&zfs_dbgmsgs);
+		size = sizeof (zfs_dbgmsg_t) + strlen(zdm->zdm_msg);
+		kmem_free(zdm, size);
+		zfs_dbgmsg_size -= size;
+	}
+	mutex_exit(&zfs_dbgmsgs_lock);
+}

--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -62,6 +61,7 @@
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_dir.h>
 #include <sys/zvol.h>
+#include <sys/dsl_scan.h>
 #include <sharefs/share.h>
 #include <sys/dmu_objset.h>
 
@@ -117,7 +117,7 @@
 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 {
 	const char *newfile;
-	char buf[256];
+	char buf[512];
 	va_list adx;
 
 	/*
@@ -1237,8 +1237,13 @@
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name              name of the pool
+ * zc_cookie            scan func (pool_scan_func_t)
+ */
 static int
-zfs_ioc_pool_scrub(zfs_cmd_t *zc)
+zfs_ioc_pool_scan(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
@@ -1246,7 +1251,10 @@
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
-	error = spa_scrub(spa, zc->zc_cookie);
+	if (zc->zc_cookie == POOL_SCAN_NONE)
+		error = spa_scan_stop(spa);
+	else
+		error = spa_scan(spa, zc->zc_cookie);
 
 	spa_close(spa, FTAG);
 
@@ -1402,6 +1410,12 @@
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name		name of the pool
+ * zc_nvlist_conf	nvlist of devices to remove
+ * zc_cookie		to stop the remove?
+ */
 static int
 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
 {
@@ -4250,7 +4264,7 @@
 	    B_FALSE },
 	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
 	    B_FALSE },
-	{ zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
 	    B_TRUE },
 	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
 	    B_FALSE },

--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Mon May 03 14:54:08 2010 -0700
@@ -560,34 +560,6 @@
 
 }
 
-static void
-uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid,
-    int64_t delta, dmu_tx_t *tx)
-{
-	uint64_t used = 0;
-	char buf[32];
-	int err;
-	uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
-
-	if (delta == 0)
-		return;
-
-	(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
-	err = zap_lookup(os, obj, buf, 8, 1, &used);
-
-	ASSERT(err == 0 || err == ENOENT);
-	/* no underflow/overflow */
-	ASSERT(delta > 0 || used >= -delta);
-	ASSERT(delta < 0 || used + delta > used);
-	used += delta;
-	if (used == 0)
-		err = zap_remove(os, obj, buf, tx);
-	else
-		err = zap_update(os, obj, buf, 8, 1, &used, tx);
-	ASSERT(err == 0);
-
-}
-
 static int
 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
     uint64_t *userp, uint64_t *groupp)
@@ -2239,9 +2211,8 @@
 		sa_register_update_callback(os, zfs_sa_upgrade);
 	}
 
-	spa_history_internal_log(LOG_DS_UPGRADE,
-	    dmu_objset_spa(os), tx, CRED(),
-	    "oldver=%llu newver=%llu dataset = %llu",
+	spa_history_log_internal(LOG_DS_UPGRADE,
+	    dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
 	    zfsvfs->z_version, newvers, dmu_objset_id(os));
 
 	dmu_tx_commit(tx);

--- a/usr/src/uts/common/fs/zfs/zil.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zil.c	Mon May 03 14:54:08 2010 -0700
@@ -36,6 +36,7 @@
 #include <sys/dsl_dataset.h>
 #include <sys/vdev.h>
 #include <sys/dmu_tx.h>
+#include <sys/dsl_pool.h>
 
 /*
  * The zfs intent log (ZIL) saves transaction records of system calls
@@ -179,7 +180,7 @@
 	SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET],
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
-	error = arc_read_nolock(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
+	error = dsl_read_nolock(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
 	    ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
 
 	if (error == 0) {

--- a/usr/src/uts/common/fs/zfs/zio.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zio.c	Mon May 03 14:54:08 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -661,6 +660,9 @@
 {
 	zio_t *zio;
 
+	dprintf_bp(bp, "freeing in txg %llu, pass %u",
+	    (longlong_t)txg, spa->spa_sync_pass);
+
 	ASSERT(!BP_IS_HOLE(bp));
 	ASSERT(spa_syncing_txg(spa) == txg);
 	ASSERT(spa_sync_pass(spa) <= SYNC_PASS_DEFERRED_FREE);
@@ -2073,6 +2075,8 @@
 	return (ZIO_PIPELINE_CONTINUE);
 }
 
+ddt_entry_t *freedde; /* for debugging */
+
 static int
 zio_ddt_free(zio_t *zio)
 {
@@ -2086,7 +2090,7 @@
 	ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
 
 	ddt_enter(ddt);
-	dde = ddt_lookup(ddt, bp, B_TRUE);
+	freedde = dde = ddt_lookup(ddt, bp, B_TRUE);
 	ddp = ddt_phys_select(dde, bp);
 	ddt_phys_decref(ddp);
 	ddt_exit(ddt);

--- a/usr/src/uts/common/fs/zfs/zvol.c	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zvol.c	Mon May 03 14:54:08 2010 -0700
@@ -249,7 +249,7 @@
 
 /*ARGSUSED*/
 static int
-zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	struct maparg *ma = arg;

--- a/usr/src/uts/common/sys/fs/zfs.h	Mon May 03 11:16:14 2010 -0700
+++ b/usr/src/uts/common/sys/fs/zfs.h	Mon May 03 14:54:08 2010 -0700
@@ -333,14 +333,15 @@
 #define	SPA_VERSION_22			22ULL
 #define	SPA_VERSION_23			23ULL
 #define	SPA_VERSION_24			24ULL
+#define	SPA_VERSION_25			25ULL
 /*
  * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
  * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
  * and do the appropriate changes.  Also bump the version number in
  * usr/src/grub/capability.
  */
-#define	SPA_VERSION			SPA_VERSION_24
-#define	SPA_VERSION_STRING		"24"
+#define	SPA_VERSION			SPA_VERSION_25
+#define	SPA_VERSION_STRING		"25"
 
 /*
  * Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -386,6 +387,7 @@
 #define	SPA_VERSION_RECVD_PROPS		SPA_VERSION_22
 #define	SPA_VERSION_SLIM_ZIL		SPA_VERSION_23
 #define	SPA_VERSION_SA			SPA_VERSION_24
+#define	SPA_VERSION_SCAN		SPA_VERSION_25
 
 /*
  * ZPL version - rev'd whenever an incompatible on-disk format change
@@ -450,7 +452,8 @@
 #define	ZPOOL_CONFIG_ASHIFT		"ashift"
 #define	ZPOOL_CONFIG_ASIZE		"asize"
 #define	ZPOOL_CONFIG_DTL		"DTL"
-#define	ZPOOL_CONFIG_STATS		"stats"
+#define	ZPOOL_CONFIG_SCAN_STATS		"scan_stats"	/* not stored on disk */
+#define	ZPOOL_CONFIG_VDEV_STATS		"vdev_stats"	/* not stored on disk */
 #define	ZPOOL_CONFIG_WHOLE_DISK		"whole_disk"
 #define	ZPOOL_CONFIG_ERRCOUNT		"error_count"
 #define	ZPOOL_CONFIG_NOT_PRESENT	"not_present"
@@ -473,6 +476,7 @@
 #define	ZPOOL_CONFIG_ORIG_GUID		"orig_guid"
 #define	ZPOOL_CONFIG_SPLIT_GUID		"split_guid"
 #define	ZPOOL_CONFIG_SPLIT_LIST		"guid_list"
+#define	ZPOOL_CONFIG_REMOVING		"removing"
 #define	ZPOOL_CONFIG_SUSPENDED		"suspended"	/* not stored on disk */
 #define	ZPOOL_CONFIG_TIMESTAMP		"timestamp"	/* not stored on disk */
 #define	ZPOOL_CONFIG_BOOTFS		"bootfs"	/* not stored on disk */
@@ -580,14 +584,14 @@
 } pool_state_t;
 
 /*
- * Scrub types.
+ * Scan Functions.
  */
-typedef enum pool_scrub_type {
-	POOL_SCRUB_NONE,
-	POOL_SCRUB_RESILVER,
-	POOL_SCRUB_EVERYTHING,
-	POOL_SCRUB_TYPES
-} pool_scrub_type_t;
+typedef enum pool_scan_func {
+	POOL_SCAN_NONE,
+	POOL_SCAN_SCRUB,
+	POOL_SCAN_RESILVER,
+	POOL_SCAN_FUNCS
+} pool_scan_func_t;
 
 /*
  * ZIO types.  Needed to interpret vdev statistics below.
@@ -603,6 +607,36 @@
 } zio_type_t;
 
 /*
+ * Pool statistics.  Note: all fields should be 64-bit because this
+ * is passed between kernel and userland as an nvlist uint64 array.
+ */
+typedef struct pool_scan_stat {
+	/* values stored on disk */
+	uint64_t	pss_func;	/* pool_scan_func_t */
+	uint64_t	pss_state;	/* dsl_scan_state_t */
+	uint64_t	pss_start_time;	/* scan start time */
+	uint64_t	pss_end_time;	/* scan end time */
+	uint64_t	pss_to_examine;	/* total bytes to scan */
+	uint64_t	pss_examined;	/* total examined bytes	*/
+	uint64_t	pss_to_process; /* total bytes to process */
+	uint64_t	pss_processed;	/* total processed bytes */
+	uint64_t	pss_errors;	/* scan errors	*/
+
+	/* values not stored on disk */
+	uint64_t	pss_pass_exam;	/* examined bytes per scan pass */
+	uint64_t	pss_pass_start;	/* start time of a scan pass */
+} pool_scan_stat_t;
+
+typedef enum dsl_scan_state {
+	DSS_NONE,
+	DSS_SCANNING,
+	DSS_FINISHED,
+	DSS_CANCELED,
+	DSS_NUM_STATES
+} dsl_scan_state_t;
+
+
+/*
  * Vdev statistics.  Note: all fields should be 64-bit because this
  * is passed between kernel and userland as an nvlist uint64 array.
  */
@@ -620,13 +654,8 @@
 	uint64_t	vs_write_errors;	/* write errors		*/
 	uint64_t	vs_checksum_errors;	/* checksum errors	*/
 	uint64_t	vs_self_healed;		/* self-healed bytes	*/
-	uint64_t	vs_scrub_type;		/* pool_scrub_type_t	*/
-	uint64_t	vs_scrub_complete;	/* completed?		*/
-	uint64_t	vs_scrub_examined;	/* bytes examined; top	*/
-	uint64_t	vs_scrub_repaired;	/* bytes repaired; leaf	*/
-	uint64_t	vs_scrub_errors;	/* errors during scrub	*/
-	uint64_t	vs_scrub_start;		/* UTC scrub start time	*/
-	uint64_t	vs_scrub_end;		/* UTC scrub end time	*/
+	uint64_t	vs_scan_removing;	/* removing?	*/
+	uint64_t	vs_scan_processed;	/* scan processed bytes	*/
 } vdev_stat_t;
 
 /*
@@ -682,7 +711,7 @@
 	ZFS_IOC_POOL_CONFIGS,
 	ZFS_IOC_POOL_STATS,
 	ZFS_IOC_POOL_TRYIMPORT,
-	ZFS_IOC_POOL_SCRUB,
+	ZFS_IOC_POOL_SCAN,
 	ZFS_IOC_POOL_FREEZE,
 	ZFS_IOC_POOL_UPGRADE,
 	ZFS_IOC_POOL_GET_HISTORY,
@@ -820,7 +849,7 @@
 	LOG_POOL_VDEV_OFFLINE,
 	LOG_POOL_UPGRADE,
 	LOG_POOL_CLEAR,
-	LOG_POOL_SCRUB,
+	LOG_POOL_SCAN,
 	LOG_POOL_PROPSET,
 	LOG_DS_CREATE,
 	LOG_DS_CLONE,
@@ -843,7 +872,7 @@
 	LOG_DS_UPGRADE,
 	LOG_DS_REFQUOTA,
 	LOG_DS_REFRESERV,
-	LOG_POOL_SCRUB_DONE,
+	LOG_POOL_SCAN_DONE,
 	LOG_DS_USER_HOLD,
 	LOG_DS_USER_RELEASE,
 	LOG_POOL_SPLIT,

author	Lin Ling <Lin.Ling@Sun.COM>
	Mon, 03 May 2010 14:54:08 -0700
changeset 12296	7cf402a7f374
parent 12295	e16f396f04a1
child 12297	9b5ab8584c21

usr/src/cmd/availdevs/availdevs.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/fm/schemes/zfs/scheme.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/mdb/common/modules/zfs/zfs.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/ndmpd/ndmp/ndmpd_zfs.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/truss/codes.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zdb/zdb.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zfs/zfs_main.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zpool/Makefile		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zpool/zpool_main.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zpool/zpool_util.h		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zpool/zpool_vdev.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/ztest/ztest.c		file \| annotate \| diff \| comparison \| revisions
usr/src/common/zfs/zfs_comutil.c		file \| annotate \| diff \| comparison \| revisions
usr/src/common/zfs/zfs_comutil.h		file \| annotate \| diff \| comparison \| revisions
usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzfs/common/libzfs.h		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzfs/common/libzfs_pool.c		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzfs/common/libzfs_sendrecv.c		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzfs/common/libzfs_status.c		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzfs/common/libzfs_util.c		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzfs/common/mapfile-vers		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzpool/common/util.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/Makefile.files		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/arc.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/bplist.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dbuf.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/ddt.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dmu.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dmu_objset.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dmu_send.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dmu_traverse.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dnode_sync.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dsl_dataset.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dsl_deleg.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dsl_dir.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dsl_pool.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dsl_prop.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dsl_scan.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dsl_scrub.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dsl_synctask.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/refcount.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/spa.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/spa_config.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/spa_errlog.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/spa_history.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/spa_misc.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/arc.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dbuf.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/ddt.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dmu.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dmu_objset.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dmu_traverse.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dsl_dataset.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dsl_dir.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dsl_pool.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dsl_prop.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dsl_scan.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dsl_synctask.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/metaslab.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/refcount.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/spa.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/spa_impl.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/uberblock_impl.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/vdev.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/vdev_impl.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/zap.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/zap_impl.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/zap_leaf.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/zfs_debug.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/txg.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/uberblock.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/vdev.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/vdev_label.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/vdev_raidz.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zap.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zap_leaf.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zap_micro.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_debug.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_ioctl.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_vfsops.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zil.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zio.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zvol.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/sys/fs/zfs.h		file \| annotate \| diff \| comparison \| revisions