6536445 want ::zio to show zio tree
authoreschrock
Tue, 17 Apr 2007 10:03:15 -0700
changeset 4055 9b8dd5af941d
parent 4054 44a43a2654b3
child 4056 3af5ac45a538
6536445 want ::zio to show zio tree 6546214 zpool_find_import() leaks directory handle
usr/src/cmd/mdb/common/modules/zfs/zfs.c
usr/src/lib/libzfs/common/libzfs_import.c
usr/src/uts/common/fs/zfs/zio.c
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Tue Apr 17 09:58:17 2007 -0700
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Tue Apr 17 10:03:15 2007 -0700
@@ -639,7 +639,7 @@
 
 	if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
 	    (data->osname == NULL || (objset_name(objset, osname) == 0 &&
-		strcmp(data->osname, osname) == 0)) &&
+	    strcmp(data->osname, osname) == 0)) &&
 	    (data->object == DBUFS_UNSET || data->object == db.db_object) &&
 	    (data->level == DBUFS_UNSET || data->level == level) &&
 	    (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
@@ -1039,8 +1039,8 @@
 vdev_help(void)
 {
 	mdb_printf("[vdev_t*]::vdev [-er]\n"
-		"\t-> -e display vdev stats\n"
-		"\t-> -r recursive (visit all children)\n");
+	    "\t-> -e display vdev stats\n"
+	    "\t-> -r recursive (visit all children)\n");
 }
 
 /*
@@ -1103,23 +1103,23 @@
 
 		switch (vdev.vdev_state) {
 		case VDEV_STATE_CLOSED:
-		    state = "CLOSED";
-		    break;
+			state = "CLOSED";
+			break;
 		case VDEV_STATE_OFFLINE:
-		    state = "OFFLINE";
-		    break;
+			state = "OFFLINE";
+			break;
 		case VDEV_STATE_CANT_OPEN:
-		    state = "CANT_OPEN";
-		    break;
+			state = "CANT_OPEN";
+			break;
 		case VDEV_STATE_DEGRADED:
-		    state = "DEGRADED";
-		    break;
+			state = "DEGRADED";
+			break;
 		case VDEV_STATE_HEALTHY:
-		    state = "HEALTHY";
-		    break;
+			state = "HEALTHY";
+			break;
 		default:
-		    state = "UNKNOWN";
-		    break;
+			state = "UNKNOWN";
+			break;
 		}
 
 		switch (vdev.vdev_stat.vs_aux) {
@@ -1552,6 +1552,113 @@
 	    flags, 1, &v));
 }
 
+/*
+ * ::zio
+ *
+ * Print a summary of zio_t and all its children.  This is intended to display a
+ * zio tree, and hence we only pick the most important pieces of information for
+ * the main summary.  More detailed information can always be found by doing a
+ * '::print zio' on the underlying zio_t.  The columns we display are:
+ *
+ *	ADDRESS		TYPE	STAGE		WAITER
+ *
+ * The 'address' column is indented by one space for each depth level as we
+ * descend down the tree.
+ */
+static int
+zio_print_cb(uintptr_t addr, const void *data, void *priv)
+{
+	const zio_t *zio = data;
+	uintptr_t depth = (uintptr_t)priv;
+	mdb_ctf_id_t type_enum, stage_enum;
+	const char *type, *stage;
+	int maxdepth;
+
+	/*
+	 * Allow enough space for a pointer and up to a 16-deep tree.
+	 */
+	maxdepth = sizeof (uintptr_t) * 2 + 16;
+	if (depth > 16)
+		depth = 16;
+
+	if (depth == 0)
+		mdb_printf("%<u>%-*s %-5s %-22s %-?s%</u>\n", maxdepth,
+		    "ADDRESS", "TYPE", "STAGE", "WAITER");
+
+	if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 ||
+	    mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) {
+		mdb_warn("failed to lookup zio enums");
+		return (WALK_ERR);
+	}
+
+	if ((type = mdb_ctf_enum_name(type_enum, zio->io_type)) != NULL)
+		type += sizeof ("ZIO_TYPE_") - 1;
+	else
+		type = "?";
+
+	if ((stage = mdb_ctf_enum_name(stage_enum, zio->io_stage)) != NULL)
+		stage += sizeof ("ZIO_STAGE_") - 1;
+	else
+		stage = "?";
+
+
+	mdb_printf("%*s%-*p %-5s %-22s ",
+	    depth, "", maxdepth - depth, addr, type, stage);
+	if (zio->io_waiter)
+		mdb_printf("%?p\n", zio->io_waiter);
+	else
+		mdb_printf("-\n");
+
+	if (mdb_pwalk("zio_child", zio_print_cb, (void *)(depth + 1),
+	    addr) !=  0) {
+		mdb_warn("failed to walk zio_t children at %p\n", addr);
+		return (WALK_ERR);
+	}
+
+	return (WALK_NEXT);
+}
+
+/*ARGSUSED*/
+static int
+zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	zio_t zio;
+
+	if (!(flags & DCMD_ADDRSPEC))
+		return (DCMD_USAGE);
+
+	if (mdb_vread(&zio, sizeof (zio_t), addr) == -1) {
+		mdb_warn("failed to read zio_t at %p", addr);
+		return (DCMD_ERR);
+	}
+
+	if (zio_print_cb(addr, &zio, NULL) != WALK_NEXT)
+		return (DCMD_ERR);
+
+	return (DCMD_OK);
+}
+
+/*
+ * [addr]::zio_state
+ *
+ * Print a summary of all zio_t structures on the system, or for a particular
+ * pool.  This is equivalent to '::walk zio_root | ::zio'.
+ */
+/*ARGSUSED*/
+static int
+zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	/*
+	 * MDB will remember the last address of the pipeline, so if we don't
+	 * zero this we'll end up trying to walk zio structures for a
+	 * non-existent spa_t.
+	 */
+	if (!(flags & DCMD_ADDRSPEC))
+		addr = 0;
+
+	return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr));
+}
+
 typedef struct txg_list_walk_data {
 	uintptr_t lw_head[TXG_SIZE];
 	int	lw_txgoff;
@@ -1693,6 +1800,110 @@
 }
 
 /*
+ * [addr]::walk zio
+ *
+ * Walk all active zio_t structures on the system.  This is simply a layered
+ * walk on top of ::walk zio_cache, with the optional ability to limit the
+ * structures to a particular pool.
+ */
+static int
+zio_walk_init(mdb_walk_state_t *wsp)
+{
+	wsp->walk_data = (void *)wsp->walk_addr;
+
+	if (mdb_layered_walk("zio_cache", wsp) == -1) {
+		mdb_warn("failed to walk 'zio_cache'\n");
+		return (WALK_ERR);
+	}
+
+	return (WALK_NEXT);
+}
+
+static int
+zio_walk_step(mdb_walk_state_t *wsp)
+{
+	zio_t zio;
+
+	if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
+		mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
+		return (WALK_ERR);
+	}
+
+	if (wsp->walk_data != NULL && wsp->walk_data != zio.io_spa)
+		return (WALK_NEXT);
+
+	return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
+}
+
+/*
+ * ::walk zio_child
+ *
+ * Walk the children of a zio_t structure.
+ */
+static int
+zio_child_walk_init(mdb_walk_state_t *wsp)
+{
+	zio_t zio;
+
+	if (wsp->walk_addr == 0) {
+		mdb_warn("::walk zio_child doesn't support global walks\n");
+		return (WALK_ERR);
+	}
+
+	if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
+		mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
+		return (WALK_ERR);
+	}
+
+	wsp->walk_addr = (uintptr_t)zio.io_child;
+	return (WALK_NEXT);
+}
+
+static int
+zio_sibling_walk_step(mdb_walk_state_t *wsp)
+{
+	zio_t zio;
+	int status;
+
+	if (wsp->walk_addr == NULL)
+		return (WALK_DONE);
+
+	if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
+		mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
+		return (WALK_ERR);
+	}
+
+	status = wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata);
+
+	wsp->walk_addr = (uintptr_t)zio.io_sibling_next;
+	return (status);
+}
+
+/*
+ * [addr]::walk zio_root
+ *
+ * Walk only root zio_t structures, optionally for a particular spa_t.
+ */
+static int
+zio_walk_root_step(mdb_walk_state_t *wsp)
+{
+	zio_t zio;
+
+	if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
+		mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
+		return (WALK_ERR);
+	}
+
+	if (wsp->walk_data != NULL && wsp->walk_data != zio.io_spa)
+		return (WALK_NEXT);
+
+	if ((uintptr_t)zio.io_root != wsp->walk_addr)
+		return (WALK_NEXT);
+
+	return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
+}
+
+/*
  * MDB module linkage information:
  *
  * We declare a list of structures describing our dcmds, and a function
@@ -1717,6 +1928,9 @@
 	{ "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
 	{ "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs },
 	{ "vdev", ":[-re]", "vdev_t summary", vdev_print },
+	{ "zio", ":", "zio_t summary", zio_print },
+	{ "zio_state", "?", "print out all zio_t structures on system or "
+	    "for a particular pool", zio_state },
 	{ "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
 	{ "zfs_params", "", "print zfs tunable parameters", zfs_params },
 	{ NULL }
@@ -1743,6 +1957,13 @@
 		txg_list2_walk_init, txg_list_walk_step, NULL },
 	{ "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
 		txg_list3_walk_init, txg_list_walk_step, NULL },
+	{ "zio", "walk all zio structures, optionally for a particular spa_t",
+		zio_walk_init, zio_walk_step, NULL },
+	{ "zio_child", "walk children of a zio_t structure",
+		zio_child_walk_init, zio_sibling_walk_step, NULL },
+	{ "zio_root", "walk all root zio_t structures, optionally for a "
+	    "particular spa_t",
+		zio_walk_init, zio_walk_root_step, NULL },
 	{ "spa", "walk all spa_t entries in the namespace",
 		spa_walk_init, spa_walk_step, NULL },
 	{ "metaslab", "given a spa_t *, walk all metaslab_t structures",
--- a/usr/src/lib/libzfs/common/libzfs_import.c	Tue Apr 17 09:58:17 2007 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_import.c	Tue Apr 17 10:03:15 2007 -0700
@@ -220,7 +220,7 @@
 	    &state) == 0 && state == POOL_STATE_SPARE &&
 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
 		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
-		    return (-1);
+			return (-1);
 
 		if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
 			free(ne);
@@ -748,7 +748,7 @@
 zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
 {
 	int i;
-	DIR *dirp;
+	DIR *dirp = NULL;
 	struct dirent64 *dp;
 	char path[MAXPATHLEN];
 	struct stat64 statbuf;
@@ -816,6 +816,7 @@
 				continue;
 
 			if ((zpool_read_label(fd, &config)) != 0) {
+				(void) close(fd);
 				(void) no_memory(hdl);
 				goto error;
 			}
@@ -826,6 +827,9 @@
 				if (add_config(hdl, &pools, path, config) != 0)
 					goto error;
 		}
+
+		(void) closedir(dirp);
+		dirp = NULL;
 	}
 
 	ret = get_configs(hdl, &pools);
@@ -853,6 +857,8 @@
 		free(ne);
 	}
 
+	if (dirp)
+		(void) closedir(dirp);
 
 	return (ret);
 }
--- a/usr/src/uts/common/fs/zfs/zio.c	Tue Apr 17 09:58:17 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/zio.c	Tue Apr 17 10:03:15 2007 -0700
@@ -84,6 +84,7 @@
  * I/O kmem caches
  * ==========================================================================
  */
+kmem_cache_t *zio_cache;
 kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
 kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
 
@@ -101,6 +102,9 @@
 	data_alloc_arena = zio_alloc_arena;
 #endif
 
+	zio_cache = kmem_cache_create("zio_cache", sizeof (zio_t), 0,
+	    NULL, NULL, NULL, NULL, NULL, 0);
+
 	/*
 	 * For small buffers, we want a cache for each multiple of
 	 * SPA_MINBLOCKSIZE.  For medium-size buffers, we want a cache
@@ -173,6 +177,8 @@
 		zio_data_buf_cache[c] = NULL;
 	}
 
+	kmem_cache_destroy(zio_cache);
+
 	zio_inject_fini();
 }
 
@@ -303,7 +309,8 @@
 	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
 	ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
 
-	zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
+	zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
+	bzero(zio, sizeof (zio_t));
 	zio->io_parent = pio;
 	zio->io_spa = spa;
 	zio->io_txg = txg;
@@ -749,7 +756,7 @@
 
 	error = zio->io_error;
 	mutex_destroy(&zio->io_lock);
-	kmem_free(zio, sizeof (zio_t));
+	kmem_cache_free(zio_cache, zio);
 
 	return (error);
 }
@@ -929,9 +936,8 @@
 	}
 
 	/*
-	 * Note: this I/O is now done, and will shortly be
-	 * kmem_free()'d, so there is no need to clear this (or any
-	 * other) flag.
+	 * Note: this I/O is now done, and will shortly be freed, so there is no
+	 * need to clear this (or any other) flag.
 	 */
 	if (zio->io_flags & ZIO_FLAG_CONFIG_GRABBED)
 		spa_config_exit(spa, zio);
@@ -943,7 +949,7 @@
 		cv_broadcast(&zio->io_cv);
 		mutex_exit(&zio->io_lock);
 	} else {
-		kmem_free(zio, sizeof (zio_t));
+		kmem_cache_free(zio_cache, zio);
 	}
 }