6903731 need userland dedup stats
authorGeorge Wilson <George.Wilson@Sun.COM>
Sun, 22 Nov 2009 11:04:51 -0800
changeset 11149 8bad7424e2c2
parent 11148 68adfb531269
child 11150 3b3a70d343cc
6903731 need userland dedup stats
usr/src/cmd/zdb/zdb.c
usr/src/cmd/zpool/zpool_main.c
usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_status.c
usr/src/lib/libzfs/common/mapfile-vers
usr/src/uts/common/fs/zfs/ddt.c
usr/src/uts/common/fs/zfs/spa.c
usr/src/uts/common/fs/zfs/spa_config.c
usr/src/uts/common/fs/zfs/sys/ddt.h
usr/src/uts/common/sys/fs/zfs.h
--- a/usr/src/cmd/zdb/zdb.c	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/cmd/zdb/zdb.c	Sun Nov 22 11:04:51 2009 -0800
@@ -585,70 +585,6 @@
 }
 
 static void
-dump_ddt_stat(const ddt_stat_t *dds, int h)
-{
-	char refcnt[6];
-	char blocks[6], lsize[6], psize[6], dsize[6];
-	char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
-
-	if (dds->dds_blocks == 0)
-		return;
-
-	if (h == -1)
-		(void) strcpy(refcnt, "Total");
-	else
-		nicenum(1ULL << h, refcnt);
-
-	nicenum(dds->dds_blocks, blocks);
-	nicenum(dds->dds_lsize, lsize);
-	nicenum(dds->dds_psize, psize);
-	nicenum(dds->dds_dsize, dsize);
-	nicenum(dds->dds_ref_blocks, ref_blocks);
-	nicenum(dds->dds_ref_lsize, ref_lsize);
-	nicenum(dds->dds_ref_psize, ref_psize);
-	nicenum(dds->dds_ref_dsize, ref_dsize);
-
-	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
-	    refcnt,
-	    blocks, lsize, psize, dsize,
-	    ref_blocks, ref_lsize, ref_psize, ref_dsize);
-}
-
-static void
-dump_ddt_histogram(const ddt_histogram_t *ddh)
-{
-	ddt_stat_t dds_total = { 0 };
-
-	ddt_histogram_stat(&dds_total, ddh);
-
-	(void) printf("\n");
-
-	(void) printf("bucket   "
-	    "           allocated             "
-	    "          referenced          \n");
-	(void) printf("______   "
-	    "______________________________   "
-	    "______________________________\n");
-
-	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
-	    "refcnt",
-	    "blocks", "LSIZE", "PSIZE", "DSIZE",
-	    "blocks", "LSIZE", "PSIZE", "DSIZE");
-
-	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
-	    "------",
-	    "------", "-----", "-----", "-----",
-	    "------", "-----", "-----", "-----");
-
-	for (int h = 0; h < 64; h++)
-		dump_ddt_stat(&ddh->ddh_stat[h], h);
-
-	dump_ddt_stat(&dds_total, -1);
-
-	(void) printf("\n");
-}
-
-static void
 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
 {
 	char name[DDT_NAMELEN];
@@ -681,7 +617,7 @@
 	if (dump_opt['D'] < 3)
 		return;
 
-	dump_ddt_histogram(&ddt->ddt_histogram[type][class]);
+	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
 
 	if (dump_opt['D'] < 4)
 		return;
@@ -710,14 +646,12 @@
 		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
 			for (enum ddt_class class = 0; class < DDT_CLASSES;
 			    class++) {
-				ddt_histogram_add(&ddh_total,
-				    &ddt->ddt_histogram[type][class]);
 				dump_ddt(ddt, type, class);
 			}
 		}
 	}
 
-	ddt_histogram_stat(&dds_total, &ddh_total);
+	ddt_get_dedup_stats(spa, &dds_total);
 
 	if (dds_total.dds_blocks == 0) {
 		(void) printf("All DDTs are empty\n");
@@ -728,7 +662,8 @@
 
 	if (dump_opt['D'] > 1) {
 		(void) printf("DDT histogram (aggregated over all DDTs):\n");
-		dump_ddt_histogram(&ddh_total);
+		ddt_get_dedup_histogram(spa, &ddh_total);
+		zpool_dump_ddt(&dds_total, &ddh_total);
 	}
 
 	dump_dedup_ratio(&dds_total);
@@ -2245,7 +2180,7 @@
 
 	(void) printf("Simulated DDT histogram:\n");
 
-	dump_ddt_histogram(&ddh_total);
+	zpool_dump_ddt(&dds_total, &ddh_total);
 
 	dump_dedup_ratio(&dds_total);
 }
--- a/usr/src/cmd/zpool/zpool_main.c	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/cmd/zpool/zpool_main.c	Sun Nov 22 11:04:51 2009 -0800
@@ -2984,6 +2984,7 @@
 	boolean_t	cb_verbose;
 	boolean_t	cb_explain;
 	boolean_t	cb_first;
+	boolean_t	cb_dedup_stats;
 } status_cbdata_t;
 
 /*
@@ -3123,6 +3124,36 @@
 	}
 }
 
+static void
+print_dedup_stats(nvlist_t *config)
+{
+	ddt_histogram_t *ddh;
+	ddt_stat_t *dds;
+	ddt_object_t *ddo;
+	uint_t c;
+
+	/*
+	 * If the pool was faulted then we may not have been able to
+	 * obtain the config. Otherwise, if have anything in the dedup
+	 * table continue processing the stats.
+	 */
+	if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS,
+	    (uint64_t **)&ddo, &c) != 0 || ddo->ddo_count == 0)
+		return;
+
+	(void) printf("\n");
+	(void) printf("DDT entries %llu, size %llu on disk, %llu in core\n",
+	    (u_longlong_t)ddo->ddo_count,
+	    (u_longlong_t)ddo->ddo_dspace,
+	    (u_longlong_t)ddo->ddo_mspace);
+
+	verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS,
+	    (uint64_t **)&dds, &c) == 0);
+	verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM,
+	    (uint64_t **)&ddh, &c) == 0);
+	zpool_dump_ddt(dds, ddh);
+}
+
 /*
  * Display a summary of pool status.  Displays a summary such as:
  *
@@ -3405,6 +3436,9 @@
 			else
 				print_error_log(zhp);
 		}
+
+		if (cbp->cb_dedup_stats)
+			print_dedup_stats(config);
 	} else {
 		(void) printf(gettext("config: The configuration cannot be "
 		    "determined.\n"));
@@ -3418,6 +3452,7 @@
  *
  *	-v	Display complete error logs
  *	-x	Display only pools with potential problems
+ *	-D	Display dedup status (undocumented)
  *
  * Describes the health status of all pools or some subset.
  */
@@ -3429,7 +3464,7 @@
 	status_cbdata_t cb = { 0 };
 
 	/* check options */
-	while ((c = getopt(argc, argv, "vx")) != -1) {
+	while ((c = getopt(argc, argv, "vxD")) != -1) {
 		switch (c) {
 		case 'v':
 			cb.cb_verbose = B_TRUE;
@@ -3437,6 +3472,9 @@
 		case 'x':
 			cb.cb_explain = B_TRUE;
 			break;
+		case 'D':
+			cb.cb_dedup_stats = B_TRUE;
+			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Sun Nov 22 11:04:51 2009 -0800
@@ -64,6 +64,9 @@
 #define	ZPOOL_CONFIG_HOLE_ARRAY		"hole_array"
 #define	ZPOOL_CONFIG_VDEV_CHILDREN	"vdev_children"
 #define	ZPOOL_CONFIG_IS_HOLE		"is_hole"
+#define	ZPOOL_CONFIG_DDT_HISTOGRAM	"ddt_histogram"
+#define	ZPOOL_CONFIG_DDT_OBJ_STATS	"ddt_object_stats"
+#define	ZPOOL_CONFIG_DDT_STATS		"ddt_stats"
 /*
  * The persistent vdev state is stored as separate values rather than a single
  * 'vdev_state' entry.  This is because a device can be in multiple states, such
--- a/usr/src/lib/libzfs/common/libzfs.h	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/lib/libzfs/common/libzfs.h	Sun Nov 22 11:04:51 2009 -0800
@@ -299,6 +299,7 @@
 
 extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
 extern zpool_status_t zpool_import_status(nvlist_t *, char **);
+extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh);
 
 /*
  * Statistics and configuration functions.
--- a/usr/src/lib/libzfs/common/libzfs_status.c	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/lib/libzfs/common/libzfs_status.c	Sun Nov 22 11:04:51 2009 -0800
@@ -328,3 +328,68 @@
 
 	return (ret);
 }
+
+static void
+dump_ddt_stat(const ddt_stat_t *dds, int h)
+{
+	char refcnt[6];
+	char blocks[6], lsize[6], psize[6], dsize[6];
+	char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
+
+	if (dds == NULL || dds->dds_blocks == 0)
+		return;
+
+	if (h == -1)
+		(void) strcpy(refcnt, "Total");
+	else
+		zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt));
+
+	zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks));
+	zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize));
+	zfs_nicenum(dds->dds_psize, psize, sizeof (psize));
+	zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize));
+	zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks));
+	zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize));
+	zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize));
+	zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize));
+
+	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
+	    refcnt,
+	    blocks, lsize, psize, dsize,
+	    ref_blocks, ref_lsize, ref_psize, ref_dsize);
+}
+
+/*
+ * Print the DDT histogram and the column totals.
+ */
+void
+zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh)
+{
+	int h;
+
+	(void) printf("\n");
+
+	(void) printf("bucket   "
+	    "           allocated             "
+	    "          referenced          \n");
+	(void) printf("______   "
+	    "______________________________   "
+	    "______________________________\n");
+
+	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
+	    "refcnt",
+	    "blocks", "LSIZE", "PSIZE", "DSIZE",
+	    "blocks", "LSIZE", "PSIZE", "DSIZE");
+
+	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
+	    "------",
+	    "------", "-----", "-----", "-----",
+	    "------", "-----", "-----", "-----");
+
+	for (h = 0; h < 64; h++)
+		dump_ddt_stat(&ddh->ddh_stat[h], h);
+
+	dump_ddt_stat(dds_total, -1);
+
+	(void) printf("\n");
+}
--- a/usr/src/lib/libzfs/common/mapfile-vers	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/lib/libzfs/common/mapfile-vers	Sun Nov 22 11:04:51 2009 -0800
@@ -157,6 +157,7 @@
 	zpool_create;
 	zpool_destroy;
 	zpool_disable_datasets;
+	zpool_dump_ddt;
 	zpool_enable_datasets;
 	zpool_expand_proplist;
 	zpool_explain_recover;
--- a/usr/src/uts/common/fs/zfs/ddt.c	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/ddt.c	Sun Nov 22 11:04:51 2009 -0800
@@ -399,23 +399,58 @@
 	return (B_TRUE);
 }
 
-static void
-ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
+void
+ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo)
 {
-	ddt_histogram_t ddh_total = { 0 };
+	dmu_object_info_t doi;
+	uint64_t count;
+	int error;
 
 	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 		ddt_t *ddt = spa->spa_ddt[c];
 		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
 			for (enum ddt_class class = 0; class < DDT_CLASSES;
 			    class++) {
-				ddt_histogram_add(&ddh_total,
+				error = ddt_object_info(ddt, type, class, &doi);
+				if (error == ENOENT)
+					continue;
+				ASSERT3U(error, ==, 0);
+
+				count = ddt_object_count(ddt, type, class);
+				ddo->ddo_count += count;
+				ddo->ddo_dspace +=
+				    (doi.doi_physical_blocks_512 << 9) / count;
+				ddo->ddo_mspace += doi.doi_fill_count *
+				    doi.doi_data_block_size / count;
+			}
+		}
+	}
+}
+
+void
+ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh)
+{
+	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+		ddt_t *ddt = spa->spa_ddt[c];
+		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
+			for (enum ddt_class class = 0; class < DDT_CLASSES;
+			    class++) {
+				ddt_histogram_add(ddh,
 				    &ddt->ddt_histogram[type][class]);
 			}
 		}
 	}
+}
 
-	ddt_histogram_stat(dds_total, &ddh_total);
+void
+ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
+{
+	ddt_histogram_t *ddh_total;
+
+	ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
+	ddt_get_dedup_histogram(spa, ddh_total);
+	ddt_histogram_stat(dds_total, ddh_total);
+	kmem_free(ddh_total, sizeof (ddt_histogram_t));
 }
 
 uint64_t
--- a/usr/src/uts/common/fs/zfs/spa.c	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/spa.c	Sun Nov 22 11:04:51 2009 -0800
@@ -1803,7 +1803,8 @@
 	spa->spa_minref = refcount_count(&spa->spa_refcount);
 	if (error && error != EBADF)
 		zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
-	spa->spa_load_state = SPA_LOAD_NONE;
+
+	spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
 	spa->spa_ena = 0;
 
 	return (error);
--- a/usr/src/uts/common/fs/zfs/spa_config.c	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/spa_config.c	Sun Nov 22 11:04:51 2009 -0800
@@ -404,6 +404,33 @@
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
 	nvlist_free(nvroot);
 
+	if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
+		ddt_histogram_t *ddh;
+		ddt_stat_t *dds;
+		ddt_object_t *ddo;
+
+		ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
+		ddt_get_dedup_histogram(spa, ddh);
+		VERIFY(nvlist_add_uint64_array(config,
+		    ZPOOL_CONFIG_DDT_HISTOGRAM,
+		    (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
+		kmem_free(ddh, sizeof (ddt_histogram_t));
+
+		ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
+		ddt_get_dedup_object_stats(spa, ddo);
+		VERIFY(nvlist_add_uint64_array(config,
+		    ZPOOL_CONFIG_DDT_OBJ_STATS,
+		    (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
+		kmem_free(ddo, sizeof (ddt_object_t));
+
+		dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
+		ddt_get_dedup_stats(spa, dds);
+		VERIFY(nvlist_add_uint64_array(config,
+		    ZPOOL_CONFIG_DDT_STATS,
+		    (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
+		kmem_free(dds, sizeof (ddt_stat_t));
+	}
+
 	spa_rewind_data_to_nvlist(spa, config);
 
 	if (locked)
--- a/usr/src/uts/common/fs/zfs/sys/ddt.h	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/ddt.h	Sun Nov 22 11:04:51 2009 -0800
@@ -60,24 +60,6 @@
 #define	DDT_COMPRESS_FUNCTION_MASK	0x7f
 
 /*
- * DDT statistics.
- */
-typedef struct ddt_stat {
-	uint64_t	dds_blocks;	/* blocks			*/
-	uint64_t	dds_lsize;	/* logical size			*/
-	uint64_t	dds_psize;	/* physical size		*/
-	uint64_t	dds_dsize;	/* deflated allocated size	*/
-	uint64_t	dds_ref_blocks;	/* referenced blocks		*/
-	uint64_t	dds_ref_lsize;	/* referenced lsize * refcnt	*/
-	uint64_t	dds_ref_psize;	/* referenced psize * refcnt	*/
-	uint64_t	dds_ref_dsize;	/* referenced dsize * refcnt	*/
-} ddt_stat_t;
-
-typedef struct ddt_histogram {
-	ddt_stat_t	ddh_stat[64];	/* power-of-two histogram buckets */
-} ddt_histogram_t;
-
-/*
  * On-disk ddt entry:  key (name) and physical storage (value).
  */
 typedef struct ddt_key {
@@ -215,6 +197,9 @@
 extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
 extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
 extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
+extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
+extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
+extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
 
 extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
 extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
--- a/usr/src/uts/common/sys/fs/zfs.h	Sun Nov 22 01:09:09 2009 -0800
+++ b/usr/src/uts/common/sys/fs/zfs.h	Sun Nov 22 11:04:51 2009 -0800
@@ -451,6 +451,9 @@
 #define	ZPOOL_CONFIG_HOLE_ARRAY		"hole_array"
 #define	ZPOOL_CONFIG_VDEV_CHILDREN	"vdev_children"
 #define	ZPOOL_CONFIG_IS_HOLE		"is_hole"
+#define	ZPOOL_CONFIG_DDT_HISTOGRAM	"ddt_histogram"
+#define	ZPOOL_CONFIG_DDT_OBJ_STATS	"ddt_object_stats"
+#define	ZPOOL_CONFIG_DDT_STATS		"ddt_stats"
 #define	ZPOOL_CONFIG_SUSPENDED		"suspended"	/* not stored on disk */
 #define	ZPOOL_CONFIG_TIMESTAMP		"timestamp"	/* not stored on disk */
 #define	ZPOOL_CONFIG_BOOTFS		"bootfs"	/* not stored on disk */
@@ -606,6 +609,31 @@
 	uint64_t	vs_scrub_end;		/* UTC scrub end time	*/
 } vdev_stat_t;
 
+/*
+ * DDT statistics.  Note: all fields should be 64-bit because this
+ * is passed between kernel and userland as an nvlist uint64 array.
+ */
+typedef struct ddt_object {
+	uint64_t	ddo_count;	/* number of elments in ddt 	*/
+	uint64_t	ddo_dspace;	/* size of ddt on disk		*/
+	uint64_t	ddo_mspace;	/* size of ddt in-core		*/
+} ddt_object_t;
+
+typedef struct ddt_stat {
+	uint64_t	dds_blocks;	/* blocks			*/
+	uint64_t	dds_lsize;	/* logical size			*/
+	uint64_t	dds_psize;	/* physical size		*/
+	uint64_t	dds_dsize;	/* deflated allocated size	*/
+	uint64_t	dds_ref_blocks;	/* referenced blocks		*/
+	uint64_t	dds_ref_lsize;	/* referenced lsize * refcnt	*/
+	uint64_t	dds_ref_psize;	/* referenced psize * refcnt	*/
+	uint64_t	dds_ref_dsize;	/* referenced dsize * refcnt	*/
+} ddt_stat_t;
+
+typedef struct ddt_histogram {
+	ddt_stat_t	ddh_stat[64];	/* power-of-two histogram buckets */
+} ddt_histogram_t;
+
 #define	ZVOL_DRIVER	"zvol"
 #define	ZFS_DRIVER	"zfs"
 #define	ZFS_DEV		"/dev/zfs"
@@ -686,11 +714,12 @@
  * Internal SPA load state.  Used by FMA diagnosis engine.
  */
 typedef enum {
-	SPA_LOAD_NONE,		/* no load in progress */
-	SPA_LOAD_OPEN,		/* normal open */
-	SPA_LOAD_IMPORT,	/* import in progress */
+	SPA_LOAD_NONE,		/* no load in progress	*/
+	SPA_LOAD_OPEN,		/* normal open		*/
+	SPA_LOAD_IMPORT,	/* import in progress	*/
 	SPA_LOAD_TRYIMPORT,	/* tryimport in progress */
-	SPA_LOAD_RECOVER	/* recovery requested */
+	SPA_LOAD_RECOVER,	/* recovery requested	*/
+	SPA_LOAD_ERROR		/* load failed		*/
 } spa_load_state_t;
 
 /*