PSARC 2007/618 ZFS L2ARC
authorbrendan
Fri, 09 Nov 2007 21:33:30 -0800
changeset 5450 b25030891c44
parent 5449 12ad36a911ff
child 5451 d106cea3cae1
PSARC 2007/618 ZFS L2ARC 6536054 second tier ("external") ARC
usr/src/cmd/zinject/translate.c
usr/src/cmd/zpool/zpool_main.c
usr/src/cmd/zpool/zpool_vdev.c
usr/src/grub/grub-0.95/stage2/zfs-include/dmu.h
usr/src/grub/grub-0.95/stage2/zfs-include/zfs.h
usr/src/lib/libdiskmgt/common/entry.c
usr/src/lib/libdiskmgt/common/inuse_zpool.c
usr/src/lib/libdiskmgt/common/libdiskmgt.h
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_import.c
usr/src/lib/libzfs/common/libzfs_pool.c
usr/src/lib/libzfs/common/libzfs_util.c
usr/src/uts/common/fs/zfs/arc.c
usr/src/uts/common/fs/zfs/dmu.c
usr/src/uts/common/fs/zfs/metaslab.c
usr/src/uts/common/fs/zfs/spa.c
usr/src/uts/common/fs/zfs/spa_config.c
usr/src/uts/common/fs/zfs/spa_misc.c
usr/src/uts/common/fs/zfs/sys/arc.h
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/sys/spa.h
usr/src/uts/common/fs/zfs/sys/spa_impl.h
usr/src/uts/common/fs/zfs/sys/vdev.h
usr/src/uts/common/fs/zfs/sys/vdev_impl.h
usr/src/uts/common/fs/zfs/sys/zio.h
usr/src/uts/common/fs/zfs/vdev.c
usr/src/uts/common/fs/zfs/vdev_label.c
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/fs/zfs/zio.c
usr/src/uts/common/sys/fs/zfs.h
--- a/usr/src/cmd/zinject/translate.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/cmd/zinject/translate.c	Fri Nov 09 21:33:30 2007 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -437,7 +437,7 @@
 	char *end;
 	zpool_handle_t *zhp;
 	nvlist_t *tgt;
-	boolean_t isspare;
+	boolean_t isspare, iscache;
 
 	/*
 	 * Given a device name or GUID, create an appropriate injection record
@@ -448,7 +448,7 @@
 
 	record->zi_guid = strtoull(device, &end, 16);
 	if (record->zi_guid == 0 || *end != '\0') {
-		tgt = zpool_find_vdev(zhp, device, &isspare);
+		tgt = zpool_find_vdev(zhp, device, &isspare, &iscache);
 
 		if (tgt == NULL) {
 			(void) fprintf(stderr, "cannot find device '%s' in "
--- a/usr/src/cmd/zpool/zpool_main.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/cmd/zpool/zpool_main.c	Fri Nov 09 21:33:30 2007 -0800
@@ -213,7 +213,7 @@
 		return (gettext("\treplace [-f] <pool> <device> "
 		    "[new-device]\n"));
 	case HELP_REMOVE:
-		return (gettext("\tremove <pool> <device>\n"));
+		return (gettext("\tremove <pool> <device> ...\n"));
 	case HELP_SCRUB:
 		return (gettext("\tscrub [-s] <pool> ...\n"));
 	case HELP_STATUS:
@@ -493,17 +493,17 @@
 }
 
 /*
- * zpool remove <pool> <vdev>
+ * zpool remove <pool> <vdev> ...
  *
  * Removes the given vdev from the pool.  Currently, this only supports removing
- * spares from the pool.  Eventually, we'll want to support removing leaf vdevs
- * (as an alias for 'detach') as well as toplevel vdevs.
+ * spares and cache devices from the pool.  Eventually, we'll want to support
+ * removing leaf vdevs (as an alias for 'detach') as well as toplevel vdevs.
  */
 int
 zpool_do_remove(int argc, char **argv)
 {
 	char *poolname;
-	int ret;
+	int i, ret = 0;
 	zpool_handle_t *zhp;
 
 	argc--;
@@ -524,7 +524,10 @@
 	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
-	ret = (zpool_vdev_remove(zhp, argv[1]) != 0);
+	for (i = 1; i < argc; i++) {
+		if (zpool_vdev_remove(zhp, argv[i]) != 0)
+			ret = 1;
+	}
 
 	return (ret);
 }
@@ -910,6 +913,14 @@
 				max = ret;
 	}
 
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if ((ret = max_width(zhp, child[c], depth + 2,
+			    max)) > max)
+				max = ret;
+	}
+
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
@@ -995,15 +1006,24 @@
 		free(vname);
 	}
 
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+	    &child, &children) == 0) {
+		(void) printf(gettext("\tcache\n"));
+		for (c = 0; c < children; c++) {
+			vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+			(void) printf("\t  %s\n", vname);
+			free(vname);
+		}
+	}
+
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
-	    &child, &children) != 0)
-		return;
-
-	(void) printf(gettext("\tspares\n"));
-	for (c = 0; c < children; c++) {
-		vname = zpool_vdev_name(g_zfs, NULL, child[c]);
-		(void) printf("\t  %s\n", vname);
-		free(vname);
+	    &child, &children) == 0) {
+		(void) printf(gettext("\tspares\n"));
+		for (c = 0; c < children; c++) {
+			vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+			(void) printf("\t  %s\n", vname);
+			free(vname);
+		}
 	}
 }
 
@@ -1655,6 +1675,28 @@
 		    newchild[c], cb, depth + 2);
 		free(vname);
 	}
+
+	/*
+	 * Include level 2 ARC devices in iostat output
+	 */
+	if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_L2CACHE,
+	    &newchild, &children) != 0)
+		return;
+
+	if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_L2CACHE,
+	    &oldchild, &c) != 0)
+		return;
+
+	if (children > 0) {
+		(void) printf("%-*s      -      -      -      -      -      "
+		    "-\n", cb->cb_namewidth, "cache");
+		for (c = 0; c < children; c++) {
+			vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
+			print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
+			    newchild[c], cb, depth + 2);
+			free(vname);
+		}
+	}
 }
 
 static int
@@ -2805,6 +2847,26 @@
 	}
 }
 
+static void
+print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache,
+    int namewidth)
+{
+	uint_t i;
+	char *name;
+
+	if (nl2cache == 0)
+		return;
+
+	(void) printf(gettext("\tcache\n"));
+
+	for (i = 0; i < nl2cache; i++) {
+		name = zpool_vdev_name(g_zfs, zhp, l2cache[i]);
+		print_status_config(zhp, name, l2cache[i],
+		    namewidth, 2, B_FALSE, B_FALSE);
+		free(name);
+	}
+}
+
 /*
  * Display a summary of pool status.  Displays a summary such as:
  *
@@ -2996,8 +3058,8 @@
 	if (config != NULL) {
 		int namewidth;
 		uint64_t nerr;
-		nvlist_t **spares;
-		uint_t nspares;
+		nvlist_t **spares, **l2cache;
+		uint_t nspares, nl2cache;
 
 
 		(void) printf(gettext(" scrub: "));
@@ -3016,6 +3078,10 @@
 			print_status_config(zhp, "logs", nvroot, namewidth, 0,
 			    B_FALSE, B_TRUE);
 
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+		    &l2cache, &nl2cache) == 0)
+			print_l2cache(zhp, l2cache, nl2cache, namewidth);
+
 		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 		    &spares, &nspares) == 0)
 			print_spares(zhp, spares, nspares, namewidth);
@@ -3303,6 +3369,7 @@
 		(void) printf(gettext(" 8   Delegated administration\n"));
 		(void) printf(gettext(" 9   refquota and refreservation "
 		    "properties\n"));
+		(void) printf(gettext(" 10  Cache devices\n"));
 		(void) printf(gettext("For more information on a particular "
 		    "version, including supported releases, see:\n\n"));
 		(void) printf("http://www.opensolaris.org/os/community/zfs/"
--- a/usr/src/cmd/zpool/zpool_vdev.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/cmd/zpool/zpool_vdev.c	Fri Nov 09 21:33:30 2007 -0800
@@ -968,6 +968,12 @@
 			if ((ret = make_disks(zhp, child[c])) != 0)
 				return (ret);
 
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+	    &child, &children) == 0)
+		for (c = 0; c < children; c++)
+			if ((ret = make_disks(zhp, child[c])) != 0)
+				return (ret);
+
 	return (0);
 }
 
@@ -1077,6 +1083,14 @@
 			if ((ret = check_in_use(config, child[c], force,
 			    isreplacing, B_TRUE)) != 0)
 				return (ret);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+	    &child, &children) == 0)
+		for (c = 0; c < children; c++)
+			if ((ret = check_in_use(config, child[c], force,
+			    isreplacing, B_FALSE)) != 0)
+				return (ret);
+
 	return (0);
 }
 
@@ -1113,6 +1127,12 @@
 		return (VDEV_TYPE_LOG);
 	}
 
+	if (strcmp(type, "cache") == 0) {
+		if (mindev != NULL)
+			*mindev = 1;
+		return (VDEV_TYPE_L2CACHE);
+	}
+
 	return (NULL);
 }
 
@@ -1125,8 +1145,8 @@
 nvlist_t *
 construct_spec(int argc, char **argv)
 {
-	nvlist_t *nvroot, *nv, **top, **spares;
-	int t, toplevels, mindev, nspares, nlogs;
+	nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
+	int t, toplevels, mindev, nspares, nlogs, nl2cache;
 	const char *type;
 	uint64_t is_log;
 	boolean_t seen_logs;
@@ -1134,8 +1154,10 @@
 	top = NULL;
 	toplevels = 0;
 	spares = NULL;
+	l2cache = NULL;
 	nspares = 0;
 	nlogs = 0;
+	nl2cache = 0;
 	is_log = B_FALSE;
 	seen_logs = B_FALSE;
 
@@ -1180,6 +1202,17 @@
 				continue;
 			}
 
+			if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
+				if (l2cache != NULL) {
+					(void) fprintf(stderr,
+					    gettext("invalid vdev "
+					    "specification: 'cache' can be "
+					    "specified only once\n"));
+					return (NULL);
+				}
+				is_log = B_FALSE;
+			}
+
 			if (is_log) {
 				if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
 					(void) fprintf(stderr,
@@ -1219,6 +1252,10 @@
 				spares = child;
 				nspares = children;
 				continue;
+			} else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
+				l2cache = child;
+				nl2cache = children;
+				continue;
 			} else {
 				verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
 				    0) == 0);
@@ -1259,7 +1296,7 @@
 		top[toplevels - 1] = nv;
 	}
 
-	if (toplevels == 0 && nspares == 0) {
+	if (toplevels == 0 && nspares == 0 && nl2cache == 0) {
 		(void) fprintf(stderr, gettext("invalid vdev "
 		    "specification: at least one toplevel vdev must be "
 		    "specified\n"));
@@ -1283,13 +1320,20 @@
 	if (nspares != 0)
 		verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 		    spares, nspares) == 0);
+	if (nl2cache != 0)
+		verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+		    l2cache, nl2cache) == 0);
 
 	for (t = 0; t < toplevels; t++)
 		nvlist_free(top[t]);
 	for (t = 0; t < nspares; t++)
 		nvlist_free(spares[t]);
+	for (t = 0; t < nl2cache; t++)
+		nvlist_free(l2cache[t]);
 	if (spares)
 		free(spares);
+	if (l2cache)
+		free(l2cache);
 	free(top);
 
 	return (nvroot);
--- a/usr/src/grub/grub-0.95/stage2/zfs-include/dmu.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/grub/grub-0.95/stage2/zfs-include/dmu.h	Fri Nov 09 21:33:30 2007 -0800
@@ -102,5 +102,6 @@
 #define	DMU_POOL_DEFLATE		"deflate"
 #define	DMU_POOL_HISTORY		"history"
 #define	DMU_POOL_PROPS			"pool_props"
+#define	DMU_POOL_L2CACHE		"l2cache"
 
 #endif	/* _SYS_DMU_H */
--- a/usr/src/grub/grub-0.95/stage2/zfs-include/zfs.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/grub/grub-0.95/stage2/zfs-include/zfs.h	Fri Nov 09 21:33:30 2007 -0800
@@ -38,7 +38,8 @@
 #define	SPA_VERSION_7			7ULL
 #define	SPA_VERSION_8			8ULL
 #define	SPA_VERSION_9			9ULL
-#define	SPA_VERSION			SPA_VERSION_9
+#define	SPA_VERSION_10			10ULL
+#define	SPA_VERSION			SPA_VERSION_10
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
@@ -71,6 +72,7 @@
 #define	ZPOOL_CONFIG_SPARES		"spares"
 #define	ZPOOL_CONFIG_IS_SPARE		"is_spare"
 #define	ZPOOL_CONFIG_NPARITY		"nparity"
+#define	ZPOOL_CONFIG_L2CACHE		"l2cache"
 
 #define	VDEV_TYPE_ROOT			"root"
 #define	VDEV_TYPE_MIRROR		"mirror"
@@ -80,17 +82,20 @@
 #define	VDEV_TYPE_FILE			"file"
 #define	VDEV_TYPE_MISSING		"missing"
 #define	VDEV_TYPE_SPARE			"spare"
+#define	VDEV_TYPE_L2CACHE		"l2cache"
 
 /*
  * pool state.  The following states are written to disk as part of the normal
- * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE.  The remaining states are
- * software abstractions used at various levels to communicate pool state.
+ * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE, L2CACHE.  The remaining
+ * states are software abstractions used at various levels to communicate pool
+ * state.
  */
 typedef enum pool_state {
 	POOL_STATE_ACTIVE = 0,		/* In active use		*/
 	POOL_STATE_EXPORTED,		/* Explicitly exported		*/
 	POOL_STATE_DESTROYED,		/* Explicitly destroyed		*/
 	POOL_STATE_SPARE,		/* Reserved for hot spare use	*/
+	POOL_STATE_L2CACHE,		/* Level 2 ARC device		*/
 	POOL_STATE_UNINITIALIZED,	/* Internal spa_t state		*/
 	POOL_STATE_UNAVAIL,		/* Internal libzfs state	*/
 	POOL_STATE_POTENTIALLY_ACTIVE	/* Internal libzfs state	*/
--- a/usr/src/lib/libdiskmgt/common/entry.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/lib/libdiskmgt/common/entry.c	Fri Nov 09 21:33:30 2007 -0800
@@ -1118,6 +1118,10 @@
 		*usage_string = dgettext(TEXT_DOMAIN,
 		    "%s is reserved as a hot spare for ZFS pool %s.  Please "
 		    "see zpool(1M).\n");
+	} else if (strcmp(what, DM_USE_L2CACHE_ZPOOL) == 0) {
+		*usage_string = dgettext(TEXT_DOMAIN,
+		    "%s is in use as a cache device for ZFS pool %s.  "
+		    "Please see zpool(1M).\n");
 	}
 }
 void
--- a/usr/src/lib/libdiskmgt/common/inuse_zpool.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/lib/libdiskmgt/common/inuse_zpool.c	Fri Nov 09 21:33:30 2007 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -102,6 +102,9 @@
 				} else if (state == POOL_STATE_SPARE) {
 					found = 1;
 					type = DM_USE_SPARE_ZPOOL;
+				} else if (state == POOL_STATE_L2CACHE) {
+					found = 1;
+					type = DM_USE_L2CACHE_ZPOOL;
 				}
 			} else {
 				found = 1;
--- a/usr/src/lib/libdiskmgt/common/libdiskmgt.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/lib/libdiskmgt/common/libdiskmgt.h	Fri Nov 09 21:33:30 2007 -0800
@@ -216,6 +216,7 @@
 #define	DM_USE_EXPORTED_ZPOOL	"exported_zpool"
 #define	DM_USE_ACTIVE_ZPOOL	"active_zpool"
 #define	DM_USE_SPARE_ZPOOL	"spare_zpool"
+#define	DM_USE_L2CACHE_ZPOOL	"l2cache_zpool"
 
 /* event */
 #define	DM_EV_NAME		"name"
--- a/usr/src/lib/libzfs/common/libzfs.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/lib/libzfs/common/libzfs.h	Fri Nov 09 21:33:30 2007 -0800
@@ -113,6 +113,7 @@
 	EZFS_UNSHARESMBFAILED,	/* failed to unshare over smb */
 	EZFS_SHARESMBFAILED,	/* failed to share over smb */
 	EZFS_BADCACHE,		/* bad cache file */
+	EZFS_ISL2CACHE,		/* device is for the level 2 ARC */
 	EZFS_UNKNOWN
 };
 
@@ -216,7 +217,8 @@
 extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t);
 extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
 
-extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *);
+extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
+    boolean_t *);
 extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
 
 /*
--- a/usr/src/lib/libzfs/common/libzfs_import.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/lib/libzfs/common/libzfs_import.c	Fri Nov 09 21:33:30 2007 -0800
@@ -213,11 +213,13 @@
 	name_entry_t *ne;
 
 	/*
-	 * If this is a hot spare not currently in use, add it to the list of
-	 * names to translate, but don't do anything else.
+	 * If this is a hot spare not currently in use or level 2 cache
+	 * device, add it to the list of names to translate, but don't do
+	 * anything else.
 	 */
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
-	    &state) == 0 && state == POOL_STATE_SPARE &&
+	    &state) == 0 &&
+	    (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
 		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
 			return (-1);
@@ -415,8 +417,8 @@
 	vdev_entry_t *ve;
 	config_entry_t *ce;
 	nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
-	nvlist_t **spares;
-	uint_t i, nspares;
+	nvlist_t **spares, **l2cache;
+	uint_t i, nspares, nl2cache;
 	boolean_t config_seen;
 	uint64_t best_txg;
 	char *name, *hostname;
@@ -647,6 +649,17 @@
 		}
 
 		/*
+		 * Update the paths for l2cache devices.
+		 */
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+		    &l2cache, &nl2cache) == 0) {
+			for (i = 0; i < nl2cache; i++) {
+				if (fix_paths(l2cache[i], pl->names) != 0)
+					goto nomem;
+			}
+		}
+
+		/*
 		 * Restore the original information read from the actual label.
 		 */
 		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
@@ -728,12 +741,12 @@
 			continue;
 
 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
-		    &state) != 0 || state > POOL_STATE_SPARE) {
+		    &state) != 0 || state > POOL_STATE_L2CACHE) {
 			nvlist_free(*config);
 			continue;
 		}
 
-		if (state != POOL_STATE_SPARE &&
+		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
 		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
 		    &txg) != 0 || txg == 0)) {
 			nvlist_free(*config);
@@ -1001,27 +1014,28 @@
 	return (B_FALSE);
 }
 
-typedef struct spare_cbdata {
+typedef struct aux_cbdata {
+	const char	*cb_type;
 	uint64_t	cb_guid;
 	zpool_handle_t	*cb_zhp;
-} spare_cbdata_t;
+} aux_cbdata_t;
 
 static int
-find_spare(zpool_handle_t *zhp, void *data)
+find_aux(zpool_handle_t *zhp, void *data)
 {
-	spare_cbdata_t *cbp = data;
-	nvlist_t **spares;
-	uint_t i, nspares;
+	aux_cbdata_t *cbp = data;
+	nvlist_t **list;
+	uint_t i, count;
 	uint64_t guid;
 	nvlist_t *nvroot;
 
 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 
-	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
-	    &spares, &nspares) == 0) {
-		for (i = 0; i < nspares; i++) {
-			verify(nvlist_lookup_uint64(spares[i],
+	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
+	    &list, &count) == 0) {
+		for (i = 0; i < count; i++) {
+			verify(nvlist_lookup_uint64(list[i],
 			    ZPOOL_CONFIG_GUID, &guid) == 0);
 			if (guid == cbp->cb_guid) {
 				cbp->cb_zhp = zhp;
@@ -1050,7 +1064,7 @@
 	zpool_handle_t *zhp;
 	nvlist_t *pool_config;
 	uint64_t stateval, isspare;
-	spare_cbdata_t cb = { 0 };
+	aux_cbdata_t cb = { 0 };
 	boolean_t isactive;
 
 	*inuse = B_FALSE;
@@ -1068,7 +1082,7 @@
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
 	    &vdev_guid) == 0);
 
-	if (stateval != POOL_STATE_SPARE) {
+	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
 		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 		    &name) == 0);
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
@@ -1147,7 +1161,24 @@
 		 */
 		cb.cb_zhp = NULL;
 		cb.cb_guid = vdev_guid;
-		if (zpool_iter(hdl, find_spare, &cb) == 1) {
+		cb.cb_type = ZPOOL_CONFIG_SPARES;
+		if (zpool_iter(hdl, find_aux, &cb) == 1) {
+			name = (char *)zpool_get_name(cb.cb_zhp);
+			ret = TRUE;
+		} else {
+			ret = FALSE;
+		}
+		break;
+
+	case POOL_STATE_L2CACHE:
+
+		/*
+		 * Check if any pool is currently using this l2cache device.
+		 */
+		cb.cb_zhp = NULL;
+		cb.cb_guid = vdev_guid;
+		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
+		if (zpool_iter(hdl, find_aux, &cb) == 1) {
 			name = (char *)zpool_get_name(cb.cb_zhp);
 			ret = TRUE;
 		} else {
--- a/usr/src/lib/libzfs/common/libzfs_pool.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c	Fri Nov 09 21:33:30 2007 -0800
@@ -815,6 +815,11 @@
 			    "one or more devices is out of space"));
 			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
+		case ENOTBLK:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cache device must be a disk or disk slice"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
+
 		default:
 			return (zpool_standard_error(hdl, errno, msg));
 		}
@@ -898,14 +903,14 @@
 	int ret;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	char msg[1024];
-	nvlist_t **spares;
-	uint_t nspares;
+	nvlist_t **spares, **l2cache;
+	uint_t nspares, nl2cache;
 
 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 	    "cannot add to '%s'"), zhp->zpool_name);
 
-	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL)
-	    < SPA_VERSION_SPARES &&
+	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
+	    SPA_VERSION_SPARES &&
 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
@@ -913,6 +918,15 @@
 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
 	}
 
+	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
+	    SPA_VERSION_L2CACHE &&
+	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+	    &l2cache, &nl2cache) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
+		    "upgraded to add cache devices"));
+		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+	}
+
 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
 		return (-1);
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
@@ -963,6 +977,12 @@
 			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
 			break;
 
+		case ENOTBLK:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cache device must be a disk or disk slice"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			break;
+
 		default:
 			(void) zpool_standard_error(hdl, errno, msg);
 		}
@@ -1172,7 +1192,7 @@
  */
 static nvlist_t *
 vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
-    boolean_t *avail_spare)
+    boolean_t *avail_spare, boolean_t *l2cache)
 {
 	uint_t c, children;
 	nvlist_t **child;
@@ -1214,25 +1234,37 @@
 
 	for (c = 0; c < children; c++)
 		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
-		    avail_spare)) != NULL)
+		    avail_spare, l2cache)) != NULL)
 			return (ret);
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++) {
 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
-			    avail_spare)) != NULL) {
+			    avail_spare, l2cache)) != NULL) {
 				*avail_spare = B_TRUE;
 				return (ret);
 			}
 		}
 	}
 
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+			    avail_spare, l2cache)) != NULL) {
+				*l2cache = B_TRUE;
+				return (ret);
+			}
+		}
+	}
+
 	return (NULL);
 }
 
 nvlist_t *
-zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare)
+zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
+    boolean_t *l2cache)
 {
 	char buf[MAXPATHLEN];
 	const char *search;
@@ -1254,29 +1286,32 @@
 	    &nvroot) == 0);
 
 	*avail_spare = B_FALSE;
-	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare));
+	*l2cache = B_FALSE;
+	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
+	    l2cache));
 }
 
 /*
- * Returns TRUE if the given guid corresponds to a spare (INUSE or not).
+ * Returns TRUE if the given guid corresponds to the given type.
+ * This is used to check for hot spares (INUSE or not), and level 2 cache
+ * devices.
  */
 static boolean_t
-is_spare(zpool_handle_t *zhp, uint64_t guid)
+is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
 {
-	uint64_t spare_guid;
+	uint64_t target_guid;
 	nvlist_t *nvroot;
-	nvlist_t **spares;
-	uint_t nspares;
+	nvlist_t **list;
+	uint_t count;
 	int i;
 
 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
-	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
-	    &spares, &nspares) == 0) {
-		for (i = 0; i < nspares; i++) {
-			verify(nvlist_lookup_uint64(spares[i],
-			    ZPOOL_CONFIG_GUID, &spare_guid) == 0);
-			if (guid == spare_guid)
+	if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
+		for (i = 0; i < count; i++) {
+			verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
+			    &target_guid) == 0);
+			if (guid == target_guid)
 				return (B_TRUE);
 		}
 	}
@@ -1295,21 +1330,26 @@
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	nvlist_t *tgt;
-	boolean_t avail_spare;
+	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
-	if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
+	if (avail_spare ||
+	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
+	if (l2cache ||
+	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_L2CACHE) == B_TRUE)
+		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+
 	zc.zc_cookie = VDEV_STATE_ONLINE;
 	zc.zc_obj = flags;
 
@@ -1330,21 +1370,26 @@
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	nvlist_t *tgt;
-	boolean_t avail_spare;
+	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
-	if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
+	if (avail_spare ||
+	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
+	if (l2cache ||
+	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_L2CACHE) == B_TRUE)
+		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+
 	zc.zc_cookie = VDEV_STATE_OFFLINE;
 	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
 
@@ -1461,7 +1506,7 @@
 	char msg[1024];
 	int ret;
 	nvlist_t *tgt;
-	boolean_t avail_spare;
+	boolean_t avail_spare, l2cache;
 	uint64_t val, is_log;
 	char *path;
 	nvlist_t **child;
@@ -1477,12 +1522,15 @@
 		    "cannot attach %s to %s"), new_disk, old_disk);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare)) == 0)
+	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache)) == 0)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
+	if (l2cache)
+		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 	zc.zc_cookie = replacing;
 
@@ -1503,7 +1551,7 @@
 	if (replacing &&
 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
-	    (zpool_find_vdev(zhp, path, &avail_spare) == NULL ||
+	    (zpool_find_vdev(zhp, path, &avail_spare, &l2cache) == NULL ||
 	    !avail_spare) && is_replacing_spare(config_root, tgt, 1)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "can only be replaced by another hot spare"));
@@ -1516,8 +1564,8 @@
 	 */
 	if (replacing &&
 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
-	    zpool_find_vdev(zhp, path, &avail_spare) != NULL && avail_spare &&
-	    is_replacing_spare(config_root, tgt, 0)) {
+	    zpool_find_vdev(zhp, path, &avail_spare, &l2cache) != NULL &&
+	    avail_spare && is_replacing_spare(config_root, tgt, 0)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "device has already been replaced with a spare"));
 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
@@ -1612,19 +1660,22 @@
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	nvlist_t *tgt;
-	boolean_t avail_spare;
+	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == 0)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
+	if (l2cache)
+		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
@@ -1656,7 +1707,8 @@
 }
 
 /*
- * Remove the given device.  Currently, this is supported only for hot spares.
+ * Remove the given device.  Currently, this is supported only for hot spares
+ * and level 2 cache devices.
  */
 int
 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
@@ -1664,19 +1716,20 @@
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	nvlist_t *tgt;
-	boolean_t avail_spare;
+	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
+	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == 0)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
-	if (!avail_spare) {
+	if (!avail_spare && !l2cache) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "only inactive hot spares can be removed"));
+		    "only inactive hot spares or cache devices "
+		    "can be removed"));
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 	}
 
@@ -1697,7 +1750,7 @@
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	nvlist_t *tgt;
-	boolean_t avail_spare;
+	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	if (path)
@@ -1711,9 +1764,14 @@
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if (path) {
-		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
+		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
+		    &l2cache)) == 0)
 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
+		/*
+		 * Don't allow error clearing for hot spares.  Do allow
+		 * error clearing for l2cache devices.
+		 */
 		if (avail_spare)
 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
--- a/usr/src/lib/libzfs/common/libzfs_util.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/lib/libzfs/common/libzfs_util.c	Fri Nov 09 21:33:30 2007 -0800
@@ -201,6 +201,8 @@
 		    " modified"));
 	case EZFS_BADCACHE:
 		return (dgettext(TEXT_DOMAIN, "invalid or missing cache file"));
+	case EZFS_ISL2CACHE:
+		return (dgettext(TEXT_DOMAIN, "device is in use as a cache"));
 	case EZFS_UNKNOWN:
 		return (dgettext(TEXT_DOMAIN, "unknown error"));
 	default:
--- a/usr/src/uts/common/fs/zfs/arc.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/arc.c	Fri Nov 09 21:33:30 2007 -0800
@@ -47,13 +47,13 @@
  * There are times when it is not possible to evict the requested
  * space.  In these circumstances we are unable to adjust the cache
  * size.  To prevent the cache growing unbounded at these times we
- * implement a "cache throttle" that slowes the flow of new data
- * into the cache until we can make space avaiable.
+ * implement a "cache throttle" that slows the flow of new data
+ * into the cache until we can make space available.
  *
  * 2. The Megiddo and Modha model assumes a fixed cache size.
  * Pages are evicted when the cache is full and there is a cache
  * miss.  Our model has a variable sized cache.  It grows with
- * high use, but also tries to react to memory preasure from the
+ * high use, but also tries to react to memory pressure from the
  * operating system: decreasing its size when system memory is
  * tight.
  *
@@ -75,7 +75,7 @@
  *
  * A new reference to a cache buffer can be obtained in two
  * ways: 1) via a hash table lookup using the DVA as a key,
- * or 2) via one of the ARC lists.  The arc_read() inerface
+ * or 2) via one of the ARC lists.  The arc_read() interface
  * uses method 1, while the internal arc algorithms for
  * adjusting the cache use method 2.  We therefor provide two
  * types of locks: 1) the hash table lock array, and 2) the
@@ -109,6 +109,14 @@
  *
  * Note that the majority of the performance stats are manipulated
  * with atomic operations.
+ *
+ * The L2ARC uses the l2arc_buflist_mtx global mutex for the following:
+ *
+ *	- L2ARC buflist creation
+ *	- L2ARC buflist eviction
+ *	- L2ARC write completion, which walks L2ARC buflists
+ *	- ARC header destruction, as it removes from L2ARC buflists
+ *	- ARC header release, as it removes from L2ARC buflists
  */
 
 #include <sys/spa.h>
@@ -157,19 +165,20 @@
 uint64_t zfs_arc_meta_limit = 0;
 
 /*
- * Note that buffers can be in one of 5 states:
+ * Note that buffers can be in one of 6 states:
  *	ARC_anon	- anonymous (discussed below)
  *	ARC_mru		- recently used, currently cached
  *	ARC_mru_ghost	- recentely used, no longer in cache
  *	ARC_mfu		- frequently used, currently cached
  *	ARC_mfu_ghost	- frequently used, no longer in cache
+ *	ARC_l2c_only	- exists in L2ARC but not other states
  * When there are no active references to the buffer, they are
  * are linked onto a list in one of these arc states.  These are
  * the only buffers that can be evicted or deleted.  Within each
  * state there are multiple lists, one for meta-data and one for
  * non-meta-data.  Meta-data (indirect blocks, blocks of dnodes,
  * etc.) is tracked separately so that it can be managed more
- * explicitly: favored over data, limited explicitely.
+ * explicitly: favored over data, limited explicitly.
  *
  * Anonymous buffers are buffers that are not associated with
  * a DVA.  These are buffers that hold dirty block copies
@@ -177,6 +186,14 @@
  * they are "ref'd" and are considered part of arc_mru
  * that cannot be freed.  Generally, they will aquire a DVA
  * as they are written and migrate onto the arc_mru list.
+ *
+ * The ARC_l2c_only state is for buffers that are in the second
+ * level ARC but no longer in any of the ARC_m* lists.  The second
+ * level ARC itself may also contain buffers that are in any of
+ * the ARC_m* states - meaning that a buffer can exist in two
+ * places.  The reason for the ARC_l2c_only state is to keep the
+ * buffer header in the hash table, so that reads that hit the
+ * second level ARC benefit from these fast lookups.
  */
 
 typedef struct arc_state {
@@ -186,12 +203,13 @@
 	kmutex_t arcs_mtx;
 } arc_state_t;
 
-/* The 5 states: */
+/* The 6 states: */
 static arc_state_t ARC_anon;
 static arc_state_t ARC_mru;
 static arc_state_t ARC_mru_ghost;
 static arc_state_t ARC_mfu;
 static arc_state_t ARC_mfu_ghost;
+static arc_state_t ARC_l2c_only;
 
 typedef struct arc_stats {
 	kstat_named_t arcstat_hits;
@@ -222,6 +240,23 @@
 	kstat_named_t arcstat_c_min;
 	kstat_named_t arcstat_c_max;
 	kstat_named_t arcstat_size;
+	kstat_named_t arcstat_hdr_size;
+	kstat_named_t arcstat_l2_hits;
+	kstat_named_t arcstat_l2_misses;
+	kstat_named_t arcstat_l2_feeds;
+	kstat_named_t arcstat_l2_rw_clash;
+	kstat_named_t arcstat_l2_writes_sent;
+	kstat_named_t arcstat_l2_writes_done;
+	kstat_named_t arcstat_l2_writes_error;
+	kstat_named_t arcstat_l2_writes_hdr_miss;
+	kstat_named_t arcstat_l2_evict_lock_retry;
+	kstat_named_t arcstat_l2_evict_reading;
+	kstat_named_t arcstat_l2_free_on_write;
+	kstat_named_t arcstat_l2_abort_lowmem;
+	kstat_named_t arcstat_l2_cksum_bad;
+	kstat_named_t arcstat_l2_io_error;
+	kstat_named_t arcstat_l2_size;
+	kstat_named_t arcstat_l2_hdr_size;
 } arc_stats_t;
 
 static arc_stats_t arc_stats = {
@@ -252,7 +287,24 @@
 	{ "c",				KSTAT_DATA_UINT64 },
 	{ "c_min",			KSTAT_DATA_UINT64 },
 	{ "c_max",			KSTAT_DATA_UINT64 },
-	{ "size",			KSTAT_DATA_UINT64 }
+	{ "size",			KSTAT_DATA_UINT64 },
+	{ "hdr_size",			KSTAT_DATA_UINT64 },
+	{ "l2_hits",			KSTAT_DATA_UINT64 },
+	{ "l2_misses",			KSTAT_DATA_UINT64 },
+	{ "l2_feeds",			KSTAT_DATA_UINT64 },
+	{ "l2_rw_clash",		KSTAT_DATA_UINT64 },
+	{ "l2_writes_sent",		KSTAT_DATA_UINT64 },
+	{ "l2_writes_done",		KSTAT_DATA_UINT64 },
+	{ "l2_writes_error",		KSTAT_DATA_UINT64 },
+	{ "l2_writes_hdr_miss",		KSTAT_DATA_UINT64 },
+	{ "l2_evict_lock_retry",	KSTAT_DATA_UINT64 },
+	{ "l2_evict_reading",		KSTAT_DATA_UINT64 },
+	{ "l2_free_on_write",		KSTAT_DATA_UINT64 },
+	{ "l2_abort_lowmem",		KSTAT_DATA_UINT64 },
+	{ "l2_cksum_bad",		KSTAT_DATA_UINT64 },
+	{ "l2_io_error",		KSTAT_DATA_UINT64 },
+	{ "l2_size",			KSTAT_DATA_UINT64 },
+	{ "l2_hdr_size",		KSTAT_DATA_UINT64 }
 };
 
 #define	ARCSTAT(stat)	(arc_stats.stat.value.ui64)
@@ -299,6 +351,7 @@
 static arc_state_t	*arc_mru_ghost;
 static arc_state_t	*arc_mfu;
 static arc_state_t	*arc_mfu_ghost;
+static arc_state_t	*arc_l2c_only;
 
 /*
  * There are several ARC variables that are critical to export as kstats --
@@ -320,6 +373,8 @@
 static uint64_t		arc_meta_limit;
 static uint64_t		arc_meta_max = 0;
 
+typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
+
 typedef struct arc_callback arc_callback_t;
 
 struct arc_callback {
@@ -371,6 +426,9 @@
 
 	/* self protecting */
 	refcount_t		b_refcnt;
+
+	l2arc_buf_hdr_t		*b_l2hdr;
+	list_node_t		b_l2node;
 };
 
 static arc_buf_t *arc_eviction_list;
@@ -382,7 +440,8 @@
 static void arc_evict_ghost(arc_state_t *state, int64_t bytes);
 
 #define	GHOST_STATE(state)	\
-	((state) == arc_mru_ghost || (state) == arc_mfu_ghost)
+	((state) == arc_mru_ghost || (state) == arc_mfu_ghost ||	\
+	(state) == arc_l2c_only)
 
 /*
  * Private ARC flags.  These flags are private ARC only flags that will show up
@@ -398,12 +457,24 @@
 #define	ARC_FREED_IN_READ	(1 << 12)	/* buf freed while in read */
 #define	ARC_BUF_AVAILABLE	(1 << 13)	/* block not in active use */
 #define	ARC_INDIRECT		(1 << 14)	/* this is an indirect block */
+#define	ARC_FREE_IN_PROGRESS	(1 << 15)	/* hdr about to be freed */
+#define	ARC_DONT_L2CACHE	(1 << 16)	/* originated by prefetch */
+#define	ARC_L2_READING		(1 << 17)	/* L2ARC read in progress */
+#define	ARC_L2_WRITING		(1 << 18)	/* L2ARC write in progress */
+#define	ARC_L2_EVICTED		(1 << 19)	/* evicted during I/O */
+#define	ARC_L2_WRITE_HEAD	(1 << 20)	/* head of write list */
 
 #define	HDR_IN_HASH_TABLE(hdr)	((hdr)->b_flags & ARC_IN_HASH_TABLE)
 #define	HDR_IO_IN_PROGRESS(hdr)	((hdr)->b_flags & ARC_IO_IN_PROGRESS)
 #define	HDR_IO_ERROR(hdr)	((hdr)->b_flags & ARC_IO_ERROR)
 #define	HDR_FREED_IN_READ(hdr)	((hdr)->b_flags & ARC_FREED_IN_READ)
 #define	HDR_BUF_AVAILABLE(hdr)	((hdr)->b_flags & ARC_BUF_AVAILABLE)
+#define	HDR_FREE_IN_PROGRESS(hdr)	((hdr)->b_flags & ARC_FREE_IN_PROGRESS)
+#define	HDR_DONT_L2CACHE(hdr)	((hdr)->b_flags & ARC_DONT_L2CACHE)
+#define	HDR_L2_READING(hdr)	((hdr)->b_flags & ARC_L2_READING)
+#define	HDR_L2_WRITING(hdr)	((hdr)->b_flags & ARC_L2_WRITING)
+#define	HDR_L2_EVICTED(hdr)	((hdr)->b_flags & ARC_L2_EVICTED)
+#define	HDR_L2_WRITE_HEAD(hdr)	((hdr)->b_flags & ARC_L2_WRITE_HEAD)
 
 /*
  * Hash table routines
@@ -436,6 +507,87 @@
 
 uint64_t zfs_crc64_table[256];
 
+/*
+ * Level 2 ARC
+ */
+
+#define	L2ARC_WRITE_SIZE	(8 * 1024 * 1024)	/* initial write max */
+#define	L2ARC_HEADROOM		4		/* num of writes */
+#define	L2ARC_FEED_DELAY	180		/* starting grace */
+#define	L2ARC_FEED_SECS		1		/* caching interval */
+
+#define	l2arc_writes_sent	ARCSTAT(arcstat_l2_writes_sent)
+#define	l2arc_writes_done	ARCSTAT(arcstat_l2_writes_done)
+
+/*
+ * L2ARC Performance Tunables
+ */
+uint64_t l2arc_write_max = L2ARC_WRITE_SIZE;	/* default max write size */
+uint64_t l2arc_headroom = L2ARC_HEADROOM;	/* number of dev writes */
+uint64_t l2arc_feed_secs = L2ARC_FEED_SECS;	/* interval seconds */
+boolean_t l2arc_noprefetch = B_TRUE;		/* don't cache prefetch bufs */
+
+/*
+ * L2ARC Internals
+ */
+typedef struct l2arc_dev {
+	vdev_t			*l2ad_vdev;	/* vdev */
+	spa_t			*l2ad_spa;	/* spa */
+	uint64_t		l2ad_hand;	/* next write location */
+	uint64_t		l2ad_write;	/* desired write size, bytes */
+	uint64_t		l2ad_start;	/* first addr on device */
+	uint64_t		l2ad_end;	/* last addr on device */
+	uint64_t		l2ad_evict;	/* last addr eviction reached */
+	boolean_t		l2ad_first;	/* first sweep through */
+	list_t			*l2ad_buflist;	/* buffer list */
+	list_node_t		l2ad_node;	/* device list node */
+} l2arc_dev_t;
+
+static list_t L2ARC_dev_list;			/* device list */
+static list_t *l2arc_dev_list;			/* device list pointer */
+static kmutex_t l2arc_dev_mtx;			/* device list mutex */
+static l2arc_dev_t *l2arc_dev_last;		/* last device used */
+static kmutex_t l2arc_buflist_mtx;		/* mutex for all buflists */
+static list_t L2ARC_free_on_write;		/* free after write buf list */
+static list_t *l2arc_free_on_write;		/* free after write list ptr */
+static kmutex_t l2arc_free_on_write_mtx;	/* mutex for list */
+static uint64_t l2arc_ndev;			/* number of devices */
+
+typedef struct l2arc_read_callback {
+	arc_buf_t	*l2rcb_buf;		/* read buffer */
+	spa_t		*l2rcb_spa;		/* spa */
+	blkptr_t	l2rcb_bp;		/* original blkptr */
+	zbookmark_t	l2rcb_zb;		/* original bookmark */
+	int		l2rcb_flags;		/* original flags */
+} l2arc_read_callback_t;
+
+typedef struct l2arc_write_callback {
+	l2arc_dev_t	*l2wcb_dev;		/* device info */
+	arc_buf_hdr_t	*l2wcb_head;		/* head of write buflist */
+} l2arc_write_callback_t;
+
+struct l2arc_buf_hdr {
+	/* protected by arc_buf_hdr  mutex */
+	l2arc_dev_t	*b_dev;			/* L2ARC device */
+	daddr_t		b_daddr;		/* disk address, offset byte */
+};
+
+typedef struct l2arc_data_free {
+	/* protected by l2arc_free_on_write_mtx */
+	void		*l2df_data;
+	size_t		l2df_size;
+	void		(*l2df_func)(void *, size_t);
+	list_node_t	l2df_list_node;
+} l2arc_data_free_t;
+
+static kmutex_t l2arc_feed_thr_lock;
+static kcondvar_t l2arc_feed_thr_cv;
+static uint8_t l2arc_thread_exit;
+
+static void l2arc_read_done(zio_t *zio);
+static void l2arc_hdr_stat_add(void);
+static void l2arc_hdr_stat_remove(void);
+
 static uint64_t
 buf_hash(spa_t *spa, dva_t *dva, uint64_t birth)
 {
@@ -585,6 +737,8 @@
 	refcount_create(&buf->b_refcnt);
 	cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL);
 	mutex_init(&buf->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	ARCSTAT_INCR(arcstat_hdr_size, sizeof (arc_buf_hdr_t));
 	return (0);
 }
 
@@ -601,6 +755,8 @@
 	refcount_destroy(&buf->b_refcnt);
 	cv_destroy(&buf->b_cv);
 	mutex_destroy(&buf->b_freeze_lock);
+
+	ARCSTAT_INCR(arcstat_hdr_size, -sizeof (arc_buf_hdr_t));
 }
 
 /*
@@ -680,10 +836,24 @@
 	mutex_exit(&buf->b_hdr->b_freeze_lock);
 }
 
+static int
+arc_cksum_equal(arc_buf_t *buf)
+{
+	zio_cksum_t zc;
+	int equal;
+
+	mutex_enter(&buf->b_hdr->b_freeze_lock);
+	fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
+	equal = ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc);
+	mutex_exit(&buf->b_hdr->b_freeze_lock);
+
+	return (equal);
+}
+
 static void
-arc_cksum_compute(arc_buf_t *buf)
+arc_cksum_compute(arc_buf_t *buf, boolean_t force)
 {
-	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
+	if (!force && !(zfs_flags & ZFS_DEBUG_MODIFY))
 		return;
 
 	mutex_enter(&buf->b_hdr->b_freeze_lock);
@@ -700,14 +870,14 @@
 void
 arc_buf_thaw(arc_buf_t *buf)
 {
-	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
-		return;
-
-	if (buf->b_hdr->b_state != arc_anon)
-		panic("modifying non-anon buffer!");
-	if (buf->b_hdr->b_flags & ARC_IO_IN_PROGRESS)
-		panic("modifying buffer while i/o in progress!");
-	arc_cksum_verify(buf);
+	if (zfs_flags & ZFS_DEBUG_MODIFY) {
+		if (buf->b_hdr->b_state != arc_anon)
+			panic("modifying non-anon buffer!");
+		if (buf->b_hdr->b_flags & ARC_IO_IN_PROGRESS)
+			panic("modifying buffer while i/o in progress!");
+		arc_cksum_verify(buf);
+	}
+
 	mutex_enter(&buf->b_hdr->b_freeze_lock);
 	if (buf->b_hdr->b_freeze_cksum != NULL) {
 		kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t));
@@ -724,7 +894,7 @@
 
 	ASSERT(buf->b_hdr->b_freeze_cksum != NULL ||
 	    buf->b_hdr->b_state == arc_anon);
-	arc_cksum_compute(buf);
+	arc_cksum_compute(buf, B_FALSE);
 }
 
 static void
@@ -852,7 +1022,7 @@
 	}
 
 	ASSERT(!BUF_EMPTY(ab));
-	if (new_state == arc_anon && old_state != arc_anon) {
+	if (new_state == arc_anon) {
 		buf_hash_remove(ab);
 	}
 
@@ -864,6 +1034,12 @@
 		atomic_add_64(&old_state->arcs_size, -from_delta);
 	}
 	ab->b_state = new_state;
+
+	/* adjust l2arc hdr stats */
+	if (new_state == arc_l2c_only)
+		l2arc_hdr_stat_add();
+	else if (old_state == arc_l2c_only)
+		l2arc_hdr_stat_remove();
 }
 
 void
@@ -990,6 +1166,29 @@
 	    data, metadata, hits);
 }
 
+/*
+ * Free the arc data buffer.  If it is an l2arc write in progress,
+ * the buffer is placed on l2arc_free_on_write to be freed later.
+ */
+static void
+arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
+    void *data, size_t size)
+{
+	if (HDR_L2_WRITING(hdr)) {
+		l2arc_data_free_t *df;
+		df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
+		df->l2df_data = data;
+		df->l2df_size = size;
+		df->l2df_func = free_func;
+		mutex_enter(&l2arc_free_on_write_mtx);
+		list_insert_head(l2arc_free_on_write, df);
+		mutex_exit(&l2arc_free_on_write_mtx);
+		ARCSTAT_BUMP(arcstat_l2_free_on_write);
+	} else {
+		free_func(data, size);
+	}
+}
+
 static void
 arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
 {
@@ -1004,11 +1203,13 @@
 		arc_cksum_verify(buf);
 		if (!recycle) {
 			if (type == ARC_BUFC_METADATA) {
-				zio_buf_free(buf->b_data, size);
+				arc_buf_data_free(buf->b_hdr, zio_buf_free,
+				    buf->b_data, size);
 				arc_space_return(size);
 			} else {
 				ASSERT(type == ARC_BUFC_DATA);
-				zio_data_buf_free(buf->b_data, size);
+				arc_buf_data_free(buf->b_hdr,
+				    zio_data_buf_free, buf->b_data, size);
 				atomic_add_64(&arc_size, -size);
 			}
 		}
@@ -1051,6 +1252,30 @@
 	ASSERT3P(hdr->b_state, ==, arc_anon);
 	ASSERT(!HDR_IO_IN_PROGRESS(hdr));
 
+	if (hdr->b_l2hdr != NULL) {
+		if (!MUTEX_HELD(&l2arc_buflist_mtx)) {
+			/*
+			 * To prevent arc_free() and l2arc_evict() from
+			 * attempting to free the same buffer at the same time,
+			 * a FREE_IN_PROGRESS flag is given to arc_free() to
+			 * give it priority.  l2arc_evict() can't destroy this
+			 * header while we are waiting on l2arc_buflist_mtx.
+			 */
+			mutex_enter(&l2arc_buflist_mtx);
+			ASSERT(hdr->b_l2hdr != NULL);
+
+			list_remove(hdr->b_l2hdr->b_dev->l2ad_buflist, hdr);
+			mutex_exit(&l2arc_buflist_mtx);
+		} else {
+			list_remove(hdr->b_l2hdr->b_dev->l2ad_buflist, hdr);
+		}
+		ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
+		kmem_free(hdr->b_l2hdr, sizeof (l2arc_buf_hdr_t));
+		if (hdr->b_state == arc_l2c_only)
+			l2arc_hdr_stat_remove();
+		hdr->b_l2hdr = NULL;
+	}
+
 	if (!BUF_EMPTY(hdr)) {
 		ASSERT(!HDR_IN_HASH_TABLE(hdr));
 		bzero(&hdr->b_dva, sizeof (dva_t));
@@ -1214,7 +1439,8 @@
 				if (buf->b_data) {
 					bytes_evicted += ab->b_size;
 					if (recycle && ab->b_type == type &&
-					    ab->b_size == bytes) {
+					    ab->b_size == bytes &&
+					    !HDR_L2_WRITING(ab)) {
 						stolen = buf->b_data;
 						recycle = FALSE;
 					}
@@ -1236,7 +1462,8 @@
 			ASSERT(ab->b_datacnt == 0);
 			arc_change_state(evicted_state, ab, hash_lock);
 			ASSERT(HDR_IN_HASH_TABLE(ab));
-			ab->b_flags = ARC_IN_HASH_TABLE;
+			ab->b_flags |= ARC_IN_HASH_TABLE;
+			ab->b_flags &= ~ARC_BUF_AVAILABLE;
 			DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, ab);
 			if (!have_lock)
 				mutex_exit(hash_lock);
@@ -1306,11 +1533,22 @@
 		if (mutex_tryenter(hash_lock)) {
 			ASSERT(!HDR_IO_IN_PROGRESS(ab));
 			ASSERT(ab->b_buf == NULL);
-			arc_change_state(arc_anon, ab, hash_lock);
-			mutex_exit(hash_lock);
 			ARCSTAT_BUMP(arcstat_deleted);
 			bytes_deleted += ab->b_size;
-			arc_hdr_destroy(ab);
+
+			if (ab->b_l2hdr != NULL) {
+				/*
+				 * This buffer is cached on the 2nd Level ARC;
+				 * don't destroy the header.
+				 */
+				arc_change_state(arc_l2c_only, ab, hash_lock);
+				mutex_exit(hash_lock);
+			} else {
+				arc_change_state(arc_anon, ab, hash_lock);
+				mutex_exit(hash_lock);
+				arc_hdr_destroy(ab);
+			}
+
 			DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, ab);
 			if (bytes >= 0 && bytes_deleted >= bytes)
 				break;
@@ -1506,7 +1744,7 @@
 
 	/*
 	 * check to make sure that swapfs has enough space so that anon
-	 * reservations can still succeeed. anon_resvmem() checks that the
+	 * reservations can still succeed. anon_resvmem() checks that the
 	 * availrmem is greater than swapfs_minfree, and the number of reserved
 	 * swap pages.  We also add a bit of extra here just to prevent
 	 * circumstances from getting really dire.
@@ -1523,7 +1761,7 @@
 	 * can have in the system.  However, this is generally fixed at 25 pages
 	 * which is so low that it's useless.  In this comparison, we seek to
 	 * calculate the total heap-size, and reclaim if more than 3/4ths of the
-	 * heap is allocated.  (Or, in the caclulation, if less than 1/4th is
+	 * heap is allocated.  (Or, in the calculation, if less than 1/4th is
 	 * free)
 	 */
 	if (btop(vmem_size(heap_arena, VMEM_FREE)) <
@@ -1564,7 +1802,7 @@
 #endif
 
 	/*
-	 * An agressive reclamation will shrink the cache size as well as
+	 * An aggressive reclamation will shrink the cache size as well as
 	 * reap free buffers from the arc kmem caches.
 	 */
 	if (strat == ARC_RECLAIM_AGGR)
@@ -1648,6 +1886,9 @@
 {
 	int mult;
 
+	if (state == arc_l2c_only)
+		return;
+
 	ASSERT(bytes > 0);
 	/*
 	 * Adapt the target size of the MRU list:
@@ -1944,6 +2185,14 @@
 		arc_change_state(new_state, buf, hash_lock);
 
 		ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
+	} else if (buf->b_state == arc_l2c_only) {
+		/*
+		 * This buffer is on the 2nd Level ARC.
+		 */
+
+		buf->b_arc_access = lbolt;
+		DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
+		arc_change_state(arc_mfu, buf, hash_lock);
 	} else {
 		ASSERT(!"invalid arc state");
 	}
@@ -1996,7 +2245,12 @@
 	    &hash_lock);
 
 	ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) && hash_lock == NULL) ||
-	    (found == hdr && DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))));
+	    (found == hdr && DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))) ||
+	    (found == hdr && HDR_L2_READING(hdr)));
+
+	hdr->b_flags &= ~(ARC_L2_READING|ARC_L2_EVICTED);
+	if (l2arc_noprefetch && (hdr->b_flags & ARC_PREFETCH))
+		hdr->b_flags |= ARC_DONT_L2CACHE;
 
 	/* byteswap if necessary */
 	callback_list = hdr->b_acb;
@@ -2004,7 +2258,7 @@
 	if (BP_SHOULD_BYTESWAP(zio->io_bp) && callback_list->acb_byteswap)
 		callback_list->acb_byteswap(buf->b_data, hdr->b_size);
 
-	arc_cksum_compute(buf);
+	arc_cksum_compute(buf, B_FALSE);
 
 	/* create copies of the data buffer for the callers */
 	abuf = buf;
@@ -2108,7 +2362,7 @@
 	arc_buf_hdr_t *hdr;
 	arc_buf_t *buf;
 	kmutex_t *hash_lock;
-	zio_t	*rzio;
+	zio_t *rzio;
 
 top:
 	hdr = buf_hash_find(spa, BP_IDENTITY(bp), bp->blk_birth, &hash_lock);
@@ -2255,7 +2509,6 @@
 
 		if (GHOST_STATE(hdr->b_state))
 			arc_access(hdr, hash_lock);
-		mutex_exit(hash_lock);
 
 		ASSERT3U(hdr->b_size, ==, size);
 		DTRACE_PROBE3(arc__miss, blkptr_t *, bp, uint64_t, size,
@@ -2265,6 +2518,57 @@
 		    demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
 		    data, metadata, misses);
 
+		if (l2arc_ndev != 0) {
+			/*
+			 * Read from the L2ARC if the following are true:
+			 * 1. This buffer has L2ARC metadata.
+			 * 2. This buffer isn't currently writing to the L2ARC.
+			 */
+			if (hdr->b_l2hdr != NULL && !HDR_L2_WRITING(hdr)) {
+				vdev_t *vd = hdr->b_l2hdr->b_dev->l2ad_vdev;
+				daddr_t addr = hdr->b_l2hdr->b_daddr;
+				l2arc_read_callback_t *cb;
+
+				DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
+				ARCSTAT_BUMP(arcstat_l2_hits);
+
+				hdr->b_flags |= ARC_L2_READING;
+				mutex_exit(hash_lock);
+
+				cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
+				    KM_SLEEP);
+				cb->l2rcb_buf = buf;
+				cb->l2rcb_spa = spa;
+				cb->l2rcb_bp = *bp;
+				cb->l2rcb_zb = *zb;
+				cb->l2rcb_flags = flags;
+
+				/*
+				 * l2arc read.
+				 */
+				rzio = zio_read_phys(pio, vd, addr, size,
+				    buf->b_data, ZIO_CHECKSUM_OFF,
+				    l2arc_read_done, cb, priority,
+				    flags | ZIO_FLAG_DONT_CACHE, B_FALSE);
+				DTRACE_PROBE2(l2arc__read, vdev_t *, vd,
+				    zio_t *, rzio);
+
+				if (*arc_flags & ARC_WAIT)
+					return (zio_wait(rzio));
+
+				ASSERT(*arc_flags & ARC_NOWAIT);
+				zio_nowait(rzio);
+				return (0);
+			} else {
+				DTRACE_PROBE1(l2arc__miss,
+				    arc_buf_hdr_t *, hdr);
+				ARCSTAT_BUMP(arcstat_l2_misses);
+				if (HDR_L2_WRITING(hdr))
+					ARCSTAT_BUMP(arcstat_l2_rw_clash);
+			}
+		}
+		mutex_exit(hash_lock);
+
 		rzio = zio_read(pio, spa, bp, buf->b_data, size,
 		    arc_read_done, buf, priority, flags, zb);
 
@@ -2402,7 +2706,8 @@
 
 		arc_change_state(evicted_state, hdr, hash_lock);
 		ASSERT(HDR_IN_HASH_TABLE(hdr));
-		hdr->b_flags = ARC_IN_HASH_TABLE;
+		hdr->b_flags |= ARC_IN_HASH_TABLE;
+		hdr->b_flags &= ~ARC_BUF_AVAILABLE;
 
 		mutex_exit(&evicted_state->arcs_mtx);
 		mutex_exit(&old_state->arcs_mtx);
@@ -2428,6 +2733,8 @@
 {
 	arc_buf_hdr_t *hdr = buf->b_hdr;
 	kmutex_t *hash_lock = HDR_LOCK(hdr);
+	l2arc_buf_hdr_t *l2hdr = NULL;
+	uint64_t buf_size;
 
 	/* this buffer is not on any list */
 	ASSERT(refcount_count(&hdr->b_refcnt) > 0);
@@ -2452,6 +2759,7 @@
 		uint64_t blksz = hdr->b_size;
 		spa_t *spa = hdr->b_spa;
 		arc_buf_contents_t type = hdr->b_type;
+		uint32_t flags = hdr->b_flags;
 
 		ASSERT(hdr->b_datacnt > 1);
 		/*
@@ -2473,6 +2781,12 @@
 			atomic_add_64(size, -hdr->b_size);
 		}
 		hdr->b_datacnt -= 1;
+		if (hdr->b_l2hdr != NULL) {
+			mutex_enter(&l2arc_buflist_mtx);
+			l2hdr = hdr->b_l2hdr;
+			hdr->b_l2hdr = NULL;
+			buf_size = hdr->b_size;
+		}
 		arc_cksum_verify(buf);
 
 		mutex_exit(hash_lock);
@@ -2484,21 +2798,27 @@
 		nhdr->b_buf = buf;
 		nhdr->b_state = arc_anon;
 		nhdr->b_arc_access = 0;
-		nhdr->b_flags = 0;
+		nhdr->b_flags = flags & ARC_L2_WRITING;
+		nhdr->b_l2hdr = NULL;
 		nhdr->b_datacnt = 1;
 		nhdr->b_freeze_cksum = NULL;
 		(void) refcount_add(&nhdr->b_refcnt, tag);
 		buf->b_hdr = nhdr;
 		atomic_add_64(&arc_anon->arcs_size, blksz);
-
-		hdr = nhdr;
 	} else {
 		ASSERT(refcount_count(&hdr->b_refcnt) == 1);
 		ASSERT(!list_link_active(&hdr->b_arc_node));
 		ASSERT(!HDR_IO_IN_PROGRESS(hdr));
 		arc_change_state(arc_anon, hdr, hash_lock);
 		hdr->b_arc_access = 0;
+		if (hdr->b_l2hdr != NULL) {
+			mutex_enter(&l2arc_buflist_mtx);
+			l2hdr = hdr->b_l2hdr;
+			hdr->b_l2hdr = NULL;
+			buf_size = hdr->b_size;
+		}
 		mutex_exit(hash_lock);
+
 		bzero(&hdr->b_dva, sizeof (dva_t));
 		hdr->b_birth = 0;
 		hdr->b_cksum0 = 0;
@@ -2506,6 +2826,14 @@
 	}
 	buf->b_efunc = NULL;
 	buf->b_private = NULL;
+
+	if (l2hdr) {
+		list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
+		kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
+		ARCSTAT_INCR(arcstat_l2_size, -buf_size);
+	}
+	if (MUTEX_HELD(&l2arc_buflist_mtx))
+		mutex_exit(&l2arc_buflist_mtx);
 }
 
 int
@@ -2559,7 +2887,7 @@
 		}
 		mutex_exit(&hdr->b_freeze_lock);
 	}
-	arc_cksum_compute(buf);
+	arc_cksum_compute(buf, B_FALSE);
 	hdr->b_flags |= ARC_IO_IN_PROGRESS;
 }
 
@@ -2704,6 +3032,7 @@
 			ab->b_buf->b_private = NULL;
 			mutex_exit(hash_lock);
 		} else if (refcount_is_zero(&ab->b_refcnt)) {
+			ab->b_flags |= ARC_FREE_IN_PROGRESS;
 			mutex_exit(hash_lock);
 			arc_hdr_destroy(ab);
 			ARCSTAT_BUMP(arcstat_deleted);
@@ -2847,6 +3176,7 @@
 	arc_mru_ghost = &ARC_mru_ghost;
 	arc_mfu = &ARC_mfu;
 	arc_mfu_ghost = &ARC_mfu_ghost;
+	arc_l2c_only = &ARC_l2c_only;
 	arc_size = 0;
 
 	mutex_init(&arc_anon->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
@@ -2854,6 +3184,7 @@
 	mutex_init(&arc_mru_ghost->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&arc_mfu->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&arc_mfu_ghost->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&arc_l2c_only->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
 
 	list_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
 	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
@@ -2871,6 +3202,10 @@
 	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
 	list_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
 	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+	list_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
+	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+	list_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
+	    sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
 
 	buf_init();
 
@@ -2932,3 +3267,868 @@
 
 	buf_fini();
 }
+
+/*
+ * Level 2 ARC
+ *
+ * The level 2 ARC (L2ARC) is a cache layer in-between main memory and disk.
+ * It uses dedicated storage devices to hold cached data, which are populated
+ * using large infrequent writes.  The main role of this cache is to boost
+ * the performance of random read workloads.  The intended L2ARC devices
+ * include short-stroked disks, solid state disks, and other media with
+ * substantially faster read latency than disk.
+ *
+ *                 +-----------------------+
+ *                 |         ARC           |
+ *                 +-----------------------+
+ *                    |         ^     ^
+ *                    |         |     |
+ *      l2arc_feed_thread()    arc_read()
+ *                    |         |     |
+ *                    |  l2arc read   |
+ *                    V         |     |
+ *               +---------------+    |
+ *               |     L2ARC     |    |
+ *               +---------------+    |
+ *                   |    ^           |
+ *          l2arc_write() |           |
+ *                   |    |           |
+ *                   V    |           |
+ *                 +-------+      +-------+
+ *                 | vdev  |      | vdev  |
+ *                 | cache |      | cache |
+ *                 +-------+      +-------+
+ *                 +=========+     .-----.
+ *                 :  L2ARC  :    |-_____-|
+ *                 : devices :    | Disks |
+ *                 +=========+    `-_____-'
+ *
+ * Read requests are satisfied from the following sources, in order:
+ *
+ *	1) ARC
+ *	2) vdev cache of L2ARC devices
+ *	3) L2ARC devices
+ *	4) vdev cache of disks
+ *	5) disks
+ *
+ * Some L2ARC device types exhibit extremely slow write performance.
+ * To accommodate for this there are some significant differences between
+ * the L2ARC and traditional cache design:
+ *
+ * 1. There is no eviction path from the ARC to the L2ARC.  Evictions from
+ * the ARC behave as usual, freeing buffers and placing headers on ghost
+ * lists.  The ARC does not send buffers to the L2ARC during eviction as
+ * this would add inflated write latencies for all ARC memory pressure.
+ *
+ * 2. The L2ARC attempts to cache data from the ARC before it is evicted.
+ * It does this by periodically scanning buffers from the eviction-end of
+ * the MFU and MRU ARC lists, copying them to the L2ARC devices if they are
+ * not already there.  It scans until a headroom of buffers is satisfied,
+ * which itself is a buffer for ARC eviction.  The thread that does this is
+ * l2arc_feed_thread(), illustrated below; example sizes are included to
+ * provide a better sense of ratio than this diagram:
+ *
+ *	       head -->                        tail
+ *	        +---------------------+----------+
+ *	ARC_mfu |:::::#:::::::::::::::|o#o###o###|-->.   # already on L2ARC
+ *	        +---------------------+----------+   |   o L2ARC eligible
+ *	ARC_mru |:#:::::::::::::::::::|#o#ooo####|-->|   : ARC buffer
+ *	        +---------------------+----------+   |
+ *	             15.9 Gbytes      ^ 32 Mbytes    |
+ *	                           headroom          |
+ *	                                      l2arc_feed_thread()
+ *	                                             |
+ *	                 l2arc write hand <--[oooo]--'
+ *	                         |           8 Mbyte
+ *	                         |          write max
+ *	                         V
+ *		  +==============================+
+ *	L2ARC dev |####|#|###|###|    |####| ... |
+ *	          +==============================+
+ *	                     32 Gbytes
+ *
+ * 3. If an ARC buffer is copied to the L2ARC but then hit instead of
+ * evicted, then the L2ARC has cached a buffer much sooner than it probably
+ * needed to, potentially wasting L2ARC device bandwidth and storage.  It is
+ * safe to say that this is an uncommon case, since buffers at the end of
+ * the ARC lists have moved there due to inactivity.
+ *
+ * 4. If the ARC evicts faster than the L2ARC can maintain a headroom,
+ * then the L2ARC simply misses copying some buffers.  This serves as a
+ * pressure valve to prevent heavy read workloads from both stalling the ARC
+ * with waits and clogging the L2ARC with writes.  This also helps prevent
+ * the potential for the L2ARC to churn if it attempts to cache content too
+ * quickly, such as during backups of the entire pool.
+ *
+ * 5. Writes to the L2ARC devices are grouped and sent in-sequence, so that
+ * the vdev queue can aggregate them into larger and fewer writes.  Each
+ * device is written to in a rotor fashion, sweeping writes through
+ * available space then repeating.
+ *
+ * 6. The L2ARC does not store dirty content.  It never needs to flush
+ * write buffers back to disk based storage.
+ *
+ * 7. If an ARC buffer is written (and dirtied) which also exists in the
+ * L2ARC, the now stale L2ARC buffer is immediately dropped.
+ *
+ * The performance of the L2ARC can be tweaked by a number of tunables, which
+ * may be necessary for different workloads:
+ *
+ *	l2arc_write_max		max write bytes per interval
+ *	l2arc_noprefetch	skip caching prefetched buffers
+ *	l2arc_headroom		number of max device writes to precache
+ *	l2arc_feed_secs		seconds between L2ARC writing
+ *
+ * Tunables may be removed or added as future performance improvements are
+ * integrated, and also may become zpool properties.
+ */
+
+static void
+l2arc_hdr_stat_add(void)
+{
+	ARCSTAT_INCR(arcstat_l2_hdr_size, sizeof (arc_buf_hdr_t) +
+	    sizeof (l2arc_buf_hdr_t));
+	ARCSTAT_INCR(arcstat_hdr_size, -sizeof (arc_buf_hdr_t));
+}
+
+static void
+l2arc_hdr_stat_remove(void)
+{
+	ARCSTAT_INCR(arcstat_l2_hdr_size, -sizeof (arc_buf_hdr_t) -
+	    sizeof (l2arc_buf_hdr_t));
+	ARCSTAT_INCR(arcstat_hdr_size, sizeof (arc_buf_hdr_t));
+}
+
+/*
+ * Cycle through L2ARC devices.  This is how L2ARC load balances.
+ * This is called with l2arc_dev_mtx held, which also locks out spa removal.
+ */
+static l2arc_dev_t *
+l2arc_dev_get_next(void)
+{
+	l2arc_dev_t *next;
+
+	if (l2arc_dev_last == NULL) {
+		next = list_head(l2arc_dev_list);
+	} else {
+		next = list_next(l2arc_dev_list, l2arc_dev_last);
+		if (next == NULL)
+			next = list_head(l2arc_dev_list);
+	}
+
+	l2arc_dev_last = next;
+
+	return (next);
+}
+
+/*
+ * A write to a cache device has completed.  Update all headers to allow
+ * reads from these buffers to begin.
+ */
+static void
+l2arc_write_done(zio_t *zio)
+{
+	l2arc_write_callback_t *cb;
+	l2arc_dev_t *dev;
+	list_t *buflist;
+	l2arc_data_free_t *df, *df_prev;
+	arc_buf_hdr_t *head, *ab, *ab_prev;
+	kmutex_t *hash_lock;
+
+	cb = zio->io_private;
+	ASSERT(cb != NULL);
+	dev = cb->l2wcb_dev;
+	ASSERT(dev != NULL);
+	head = cb->l2wcb_head;
+	ASSERT(head != NULL);
+	buflist = dev->l2ad_buflist;
+	ASSERT(buflist != NULL);
+	DTRACE_PROBE2(l2arc__iodone, zio_t *, zio,
+	    l2arc_write_callback_t *, cb);
+
+	if (zio->io_error != 0)
+		ARCSTAT_BUMP(arcstat_l2_writes_error);
+
+	mutex_enter(&l2arc_buflist_mtx);
+
+	/*
+	 * All writes completed, or an error was hit.
+	 */
+	for (ab = list_prev(buflist, head); ab; ab = ab_prev) {
+		ab_prev = list_prev(buflist, ab);
+
+		hash_lock = HDR_LOCK(ab);
+		if (!mutex_tryenter(hash_lock)) {
+			/*
+			 * This buffer misses out.  It may be in a stage
+			 * of eviction.  Its ARC_L2_WRITING flag will be
+			 * left set, denying reads to this buffer.
+			 */
+			ARCSTAT_BUMP(arcstat_l2_writes_hdr_miss);
+			continue;
+		}
+
+		if (zio->io_error != 0) {
+			/*
+			 * Error - invalidate L2ARC entry.
+			 */
+			ab->b_l2hdr = NULL;
+		}
+
+		/*
+		 * Allow ARC to begin reads to this L2ARC entry.
+		 */
+		ab->b_flags &= ~ARC_L2_WRITING;
+
+		mutex_exit(hash_lock);
+	}
+
+	atomic_inc_64(&l2arc_writes_done);
+	list_remove(buflist, head);
+	kmem_cache_free(hdr_cache, head);
+	mutex_exit(&l2arc_buflist_mtx);
+
+	/*
+	 * Free buffers that were tagged for destruction.
+	 */
+	mutex_enter(&l2arc_free_on_write_mtx);
+	buflist = l2arc_free_on_write;
+	for (df = list_tail(buflist); df; df = df_prev) {
+		df_prev = list_prev(buflist, df);
+		ASSERT(df->l2df_data != NULL);
+		ASSERT(df->l2df_func != NULL);
+		df->l2df_func(df->l2df_data, df->l2df_size);
+		list_remove(buflist, df);
+		kmem_free(df, sizeof (l2arc_data_free_t));
+	}
+	mutex_exit(&l2arc_free_on_write_mtx);
+
+	kmem_free(cb, sizeof (l2arc_write_callback_t));
+}
+
+/*
+ * A read to a cache device completed.  Validate buffer contents before
+ * handing over to the regular ARC routines.
+ */
+static void
+l2arc_read_done(zio_t *zio)
+{
+	l2arc_read_callback_t *cb;
+	arc_buf_hdr_t *hdr;
+	arc_buf_t *buf;
+	zio_t *rzio;
+	kmutex_t *hash_lock;
+	int equal, err = 0;
+
+	cb = zio->io_private;
+	ASSERT(cb != NULL);
+	buf = cb->l2rcb_buf;
+	ASSERT(buf != NULL);
+	hdr = buf->b_hdr;
+	ASSERT(hdr != NULL);
+
+	hash_lock = HDR_LOCK(hdr);
+	mutex_enter(hash_lock);
+
+	/*
+	 * Check this survived the L2ARC journey.
+	 */
+	equal = arc_cksum_equal(buf);
+	if (equal && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) {
+		mutex_exit(hash_lock);
+		zio->io_private = buf;
+		arc_read_done(zio);
+	} else {
+		mutex_exit(hash_lock);
+		/*
+		 * Buffer didn't survive caching.  Increment stats and
+		 * reissue to the original storage device.
+		 */
+		if (zio->io_error != 0)
+			ARCSTAT_BUMP(arcstat_l2_io_error);
+		if (!equal)
+			ARCSTAT_BUMP(arcstat_l2_cksum_bad);
+
+		zio->io_flags &= ~ZIO_FLAG_DONT_CACHE;
+		rzio = zio_read(NULL, cb->l2rcb_spa, &cb->l2rcb_bp,
+		    buf->b_data, zio->io_size, arc_read_done, buf,
+		    zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb);
+
+		/*
+		 * Since this is a seperate thread, we can wait on this
+		 * I/O whether there is an io_waiter or not.
+		 */
+		err = zio_wait(rzio);
+
+		/*
+		 * Let the resent I/O call arc_read_done() instead.
+		 * io_error is set to the reissued I/O error status.
+		 */
+		zio->io_done = NULL;
+		zio->io_waiter = NULL;
+		zio->io_error = err;
+	}
+
+	kmem_free(cb, sizeof (l2arc_read_callback_t));
+}
+
+/*
+ * This is the list priority from which the L2ARC will search for pages to
+ * cache.  This is used within loops (0..3) to cycle through lists in the
+ * desired order.  This order can have a significant effect on cache
+ * performance.
+ *
+ * Currently the metadata lists are hit first, MFU then MRU, followed by
+ * the data lists.  This function returns a locked list, and also returns
+ * the lock pointer.
+ */
+static list_t *
+l2arc_list_locked(int list_num, kmutex_t **lock)
+{
+	list_t *list;
+
+	ASSERT(list_num >= 0 && list_num <= 3);
+
+	switch (list_num) {
+	case 0:
+		list = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
+		*lock = &arc_mfu->arcs_mtx;
+		break;
+	case 1:
+		list = &arc_mru->arcs_list[ARC_BUFC_METADATA];
+		*lock = &arc_mru->arcs_mtx;
+		break;
+	case 2:
+		list = &arc_mfu->arcs_list[ARC_BUFC_DATA];
+		*lock = &arc_mfu->arcs_mtx;
+		break;
+	case 3:
+		list = &arc_mru->arcs_list[ARC_BUFC_DATA];
+		*lock = &arc_mru->arcs_mtx;
+		break;
+	}
+
+	ASSERT(!(MUTEX_HELD(*lock)));
+	mutex_enter(*lock);
+	return (list);
+}
+
+/*
+ * Evict buffers from the device write hand to the distance specified in
+ * bytes.  This distance may span populated buffers, it may span nothing.
+ * This is clearing a region on the L2ARC device ready for writing.
+ * If the 'all' boolean is set, every buffer is evicted.
+ */
+static void
+l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
+{
+	list_t *buflist;
+	l2arc_buf_hdr_t *abl2;
+	arc_buf_hdr_t *ab, *ab_prev;
+	kmutex_t *hash_lock;
+	uint64_t taddr;
+
+	ASSERT(MUTEX_HELD(&l2arc_dev_mtx));
+
+	buflist = dev->l2ad_buflist;
+
+	if (buflist == NULL)
+		return;
+
+	if (!all && dev->l2ad_first) {
+		/*
+		 * This is the first sweep through the device.  There is
+		 * nothing to evict.
+		 */
+		return;
+	}
+
+	if (dev->l2ad_hand >= (dev->l2ad_end - (2 * dev->l2ad_write))) {
+		/*
+		 * When nearing the end of the device, evict to the end
+		 * before the device write hand jumps to the start.
+		 */
+		taddr = dev->l2ad_end;
+	} else {
+		taddr = dev->l2ad_hand + distance;
+	}
+	DTRACE_PROBE4(l2arc__evict, l2arc_dev_t *, dev, list_t *, buflist,
+	    uint64_t, taddr, boolean_t, all);
+
+top:
+	mutex_enter(&l2arc_buflist_mtx);
+	for (ab = list_tail(buflist); ab; ab = ab_prev) {
+		ab_prev = list_prev(buflist, ab);
+
+		hash_lock = HDR_LOCK(ab);
+		if (!mutex_tryenter(hash_lock)) {
+			/*
+			 * Missed the hash lock.  Retry.
+			 */
+			ARCSTAT_BUMP(arcstat_l2_evict_lock_retry);
+			mutex_exit(&l2arc_buflist_mtx);
+			mutex_enter(hash_lock);
+			mutex_exit(hash_lock);
+			goto top;
+		}
+
+		if (HDR_L2_WRITE_HEAD(ab)) {
+			/*
+			 * We hit a write head node.  Leave it for
+			 * l2arc_write_done().
+			 */
+			list_remove(buflist, ab);
+			mutex_exit(hash_lock);
+			continue;
+		}
+
+		if (!all && ab->b_l2hdr != NULL &&
+		    (ab->b_l2hdr->b_daddr > taddr ||
+		    ab->b_l2hdr->b_daddr < dev->l2ad_hand)) {
+			/*
+			 * We've evicted to the target address,
+			 * or the end of the device.
+			 */
+			mutex_exit(hash_lock);
+			break;
+		}
+
+		if (HDR_FREE_IN_PROGRESS(ab)) {
+			/*
+			 * Already on the path to destruction.
+			 */
+			mutex_exit(hash_lock);
+			continue;
+		}
+
+		if (ab->b_state == arc_l2c_only) {
+			ASSERT(!HDR_L2_READING(ab));
+			/*
+			 * This doesn't exist in the ARC.  Destroy.
+			 * arc_hdr_destroy() will call list_remove()
+			 * and decrement arcstat_l2_size.
+			 */
+			arc_change_state(arc_anon, ab, hash_lock);
+			arc_hdr_destroy(ab);
+		} else {
+			/*
+			 * Tell ARC this no longer exists in L2ARC.
+			 */
+			if (ab->b_l2hdr != NULL) {
+				abl2 = ab->b_l2hdr;
+				ab->b_l2hdr = NULL;
+				kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
+				ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
+			}
+			list_remove(buflist, ab);
+
+			/*
+			 * This may have been leftover after a
+			 * failed write.
+			 */
+			ab->b_flags &= ~ARC_L2_WRITING;
+
+			/*
+			 * Invalidate issued or about to be issued
+			 * reads, since we may be about to write
+			 * over this location.
+			 */
+			if (HDR_L2_READING(ab)) {
+				ARCSTAT_BUMP(arcstat_l2_evict_reading);
+				ab->b_flags |= ARC_L2_EVICTED;
+			}
+		}
+		mutex_exit(hash_lock);
+	}
+	mutex_exit(&l2arc_buflist_mtx);
+
+	spa_l2cache_space_update(dev->l2ad_vdev, 0, -(taddr - dev->l2ad_evict));
+	dev->l2ad_evict = taddr;
+}
+
+/*
+ * Find and write ARC buffers to the L2ARC device.
+ *
+ * An ARC_L2_WRITING flag is set so that the L2ARC buffers are not valid
+ * for reading until they have completed writing.
+ */
+static void
+l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev)
+{
+	arc_buf_hdr_t *ab, *ab_prev, *head;
+	l2arc_buf_hdr_t *hdrl2;
+	list_t *list;
+	uint64_t passed_sz, write_sz, buf_sz;
+	uint64_t target_sz = dev->l2ad_write;
+	uint64_t headroom = dev->l2ad_write * l2arc_headroom;
+	void *buf_data;
+	kmutex_t *hash_lock, *list_lock;
+	boolean_t have_lock, full;
+	l2arc_write_callback_t *cb;
+	zio_t *pio, *wzio;
+
+	ASSERT(MUTEX_HELD(&l2arc_dev_mtx));
+	ASSERT(dev->l2ad_vdev != NULL);
+
+	pio = NULL;
+	write_sz = 0;
+	full = B_FALSE;
+	head = kmem_cache_alloc(hdr_cache, KM_SLEEP);
+	head->b_flags |= ARC_L2_WRITE_HEAD;
+
+	/*
+	 * Copy buffers for L2ARC writing.
+	 */
+	mutex_enter(&l2arc_buflist_mtx);
+	for (int try = 0; try <= 3; try++) {
+		list = l2arc_list_locked(try, &list_lock);
+		passed_sz = 0;
+
+		for (ab = list_tail(list); ab; ab = ab_prev) {
+			ab_prev = list_prev(list, ab);
+
+			hash_lock = HDR_LOCK(ab);
+			have_lock = MUTEX_HELD(hash_lock);
+			if (!have_lock && !mutex_tryenter(hash_lock)) {
+				/*
+				 * Skip this buffer rather than waiting.
+				 */
+				continue;
+			}
+
+			passed_sz += ab->b_size;
+			if (passed_sz > headroom) {
+				/*
+				 * Searched too far.
+				 */
+				mutex_exit(hash_lock);
+				break;
+			}
+
+			if (ab->b_spa != spa) {
+				mutex_exit(hash_lock);
+				continue;
+			}
+
+			if (ab->b_l2hdr != NULL) {
+				/*
+				 * Already in L2ARC.
+				 */
+				mutex_exit(hash_lock);
+				continue;
+			}
+
+			if (HDR_IO_IN_PROGRESS(ab) || HDR_DONT_L2CACHE(ab)) {
+				mutex_exit(hash_lock);
+				continue;
+			}
+
+			if ((write_sz + ab->b_size) > target_sz) {
+				full = B_TRUE;
+				mutex_exit(hash_lock);
+				break;
+			}
+
+			if (ab->b_buf == NULL) {
+				DTRACE_PROBE1(l2arc__buf__null, void *, ab);
+				mutex_exit(hash_lock);
+				continue;
+			}
+
+			if (pio == NULL) {
+				/*
+				 * Insert a dummy header on the buflist so
+				 * l2arc_write_done() can find where the
+				 * write buffers begin without searching.
+				 */
+				list_insert_head(dev->l2ad_buflist, head);
+
+				cb = kmem_alloc(
+				    sizeof (l2arc_write_callback_t), KM_SLEEP);
+				cb->l2wcb_dev = dev;
+				cb->l2wcb_head = head;
+				pio = zio_root(spa, l2arc_write_done, cb,
+				    ZIO_FLAG_CANFAIL);
+			}
+
+			/*
+			 * Create and add a new L2ARC header.
+			 */
+			hdrl2 = kmem_zalloc(sizeof (l2arc_buf_hdr_t), KM_SLEEP);
+			hdrl2->b_dev = dev;
+			hdrl2->b_daddr = dev->l2ad_hand;
+
+			ab->b_flags |= ARC_L2_WRITING;
+			ab->b_l2hdr = hdrl2;
+			list_insert_head(dev->l2ad_buflist, ab);
+			buf_data = ab->b_buf->b_data;
+			buf_sz = ab->b_size;
+
+			/*
+			 * Compute and store the buffer cksum before
+			 * writing.  On debug the cksum is verified first.
+			 */
+			arc_cksum_verify(ab->b_buf);
+			arc_cksum_compute(ab->b_buf, B_TRUE);
+
+			mutex_exit(hash_lock);
+
+			wzio = zio_write_phys(pio, dev->l2ad_vdev,
+			    dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
+			    NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
+			    ZIO_FLAG_CANFAIL, B_FALSE);
+
+			DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
+			    zio_t *, wzio);
+			(void) zio_nowait(wzio);
+
+			write_sz += buf_sz;
+			dev->l2ad_hand += buf_sz;
+		}
+
+		mutex_exit(list_lock);
+
+		if (full == B_TRUE)
+			break;
+	}
+	mutex_exit(&l2arc_buflist_mtx);
+
+	if (pio == NULL) {
+		ASSERT3U(write_sz, ==, 0);
+		kmem_cache_free(hdr_cache, head);
+		return;
+	}
+
+	ASSERT3U(write_sz, <=, target_sz);
+	ARCSTAT_BUMP(arcstat_l2_writes_sent);
+	ARCSTAT_INCR(arcstat_l2_size, write_sz);
+	spa_l2cache_space_update(dev->l2ad_vdev, 0, write_sz);
+
+	/*
+	 * Bump device hand to the device start if it is approaching the end.
+	 * l2arc_evict() will already have evicted ahead for this case.
+	 */
+	if (dev->l2ad_hand >= (dev->l2ad_end - dev->l2ad_write)) {
+		spa_l2cache_space_update(dev->l2ad_vdev, 0,
+		    dev->l2ad_end - dev->l2ad_hand);
+		dev->l2ad_hand = dev->l2ad_start;
+		dev->l2ad_evict = dev->l2ad_start;
+		dev->l2ad_first = B_FALSE;
+	}
+
+	(void) zio_wait(pio);
+}
+
+/*
+ * This thread feeds the L2ARC at regular intervals.  This is the beating
+ * heart of the L2ARC.
+ */
+static void
+l2arc_feed_thread(void)
+{
+	callb_cpr_t cpr;
+	l2arc_dev_t *dev;
+	spa_t *spa;
+	int interval;
+	boolean_t startup = B_TRUE;
+
+	CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG);
+
+	mutex_enter(&l2arc_feed_thr_lock);
+
+	while (l2arc_thread_exit == 0) {
+		/*
+		 * Initially pause for L2ARC_FEED_DELAY seconds as a grace
+		 * interval during boot, followed by l2arc_feed_secs seconds
+		 * thereafter.
+		 */
+		CALLB_CPR_SAFE_BEGIN(&cpr);
+		if (startup) {
+			interval = L2ARC_FEED_DELAY;
+			startup = B_FALSE;
+		} else {
+			interval = l2arc_feed_secs;
+		}
+		(void) cv_timedwait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock,
+		    lbolt + (hz * interval));
+		CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock);
+
+		/*
+		 * Do nothing until L2ARC devices exist.
+		 */
+		mutex_enter(&l2arc_dev_mtx);
+		if (l2arc_ndev == 0) {
+			mutex_exit(&l2arc_dev_mtx);
+			continue;
+		}
+
+		/*
+		 * Avoid contributing to memory pressure.
+		 */
+		if (arc_reclaim_needed()) {
+			ARCSTAT_BUMP(arcstat_l2_abort_lowmem);
+			mutex_exit(&l2arc_dev_mtx);
+			continue;
+		}
+
+		/*
+		 * This selects the next l2arc device to write to, and in
+		 * doing so the next spa to feed from: dev->l2ad_spa.
+		 */
+		if ((dev = l2arc_dev_get_next()) == NULL) {
+			mutex_exit(&l2arc_dev_mtx);
+			continue;
+		}
+		spa = dev->l2ad_spa;
+		ASSERT(spa != NULL);
+		ARCSTAT_BUMP(arcstat_l2_feeds);
+
+		/*
+		 * Evict L2ARC buffers that will be overwritten.
+		 */
+		l2arc_evict(dev, dev->l2ad_write, B_FALSE);
+
+		/*
+		 * Write ARC buffers.
+		 */
+		l2arc_write_buffers(spa, dev);
+		mutex_exit(&l2arc_dev_mtx);
+	}
+
+	l2arc_thread_exit = 0;
+	cv_broadcast(&l2arc_feed_thr_cv);
+	CALLB_CPR_EXIT(&cpr);		/* drops l2arc_feed_thr_lock */
+	thread_exit();
+}
+
+/*
+ * Add a vdev for use by the L2ARC.  By this point the spa has already
+ * validated the vdev and opened it.
+ */
+void
+l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end)
+{
+	l2arc_dev_t *adddev;
+
+	/*
+	 * Create a new l2arc device entry.
+	 */
+	adddev = kmem_zalloc(sizeof (l2arc_dev_t), KM_SLEEP);
+	adddev->l2ad_spa = spa;
+	adddev->l2ad_vdev = vd;
+	adddev->l2ad_write = l2arc_write_max;
+	adddev->l2ad_start = start;
+	adddev->l2ad_end = end;
+	adddev->l2ad_hand = adddev->l2ad_start;
+	adddev->l2ad_evict = adddev->l2ad_start;
+	adddev->l2ad_first = B_TRUE;
+	ASSERT3U(adddev->l2ad_write, >, 0);
+
+	/*
+	 * This is a list of all ARC buffers that are still valid on the
+	 * device.
+	 */
+	adddev->l2ad_buflist = kmem_zalloc(sizeof (list_t), KM_SLEEP);
+	list_create(adddev->l2ad_buflist, sizeof (arc_buf_hdr_t),
+	    offsetof(arc_buf_hdr_t, b_l2node));
+
+	spa_l2cache_space_update(vd, adddev->l2ad_end - adddev->l2ad_hand, 0);
+
+	/*
+	 * Add device to global list
+	 */
+	mutex_enter(&l2arc_dev_mtx);
+	list_insert_head(l2arc_dev_list, adddev);
+	atomic_inc_64(&l2arc_ndev);
+	mutex_exit(&l2arc_dev_mtx);
+}
+
+/*
+ * Remove a vdev from the L2ARC.
+ */
+void
+l2arc_remove_vdev(vdev_t *vd)
+{
+	l2arc_dev_t *dev, *nextdev, *remdev = NULL;
+
+	/*
+	 * We can only grab the spa config lock when cache device writes
+	 * complete.
+	 */
+	ASSERT3U(l2arc_writes_sent, ==, l2arc_writes_done);
+
+	/*
+	 * Find the device by vdev
+	 */
+	mutex_enter(&l2arc_dev_mtx);
+	for (dev = list_head(l2arc_dev_list); dev; dev = nextdev) {
+		nextdev = list_next(l2arc_dev_list, dev);
+		if (vd == dev->l2ad_vdev) {
+			remdev = dev;
+			break;
+		}
+	}
+	ASSERT(remdev != NULL);
+
+	/*
+	 * Remove device from global list
+	 */
+	list_remove(l2arc_dev_list, remdev);
+	l2arc_dev_last = NULL;		/* may have been invalidated */
+
+	/*
+	 * Clear all buflists and ARC references.  L2ARC device flush.
+	 */
+	l2arc_evict(remdev, 0, B_TRUE);
+	list_destroy(remdev->l2ad_buflist);
+	kmem_free(remdev->l2ad_buflist, sizeof (list_t));
+	kmem_free(remdev, sizeof (l2arc_dev_t));
+
+	atomic_dec_64(&l2arc_ndev);
+	mutex_exit(&l2arc_dev_mtx);
+}
+
+void
+l2arc_init()
+{
+	l2arc_thread_exit = 0;
+	l2arc_ndev = 0;
+	l2arc_writes_sent = 0;
+	l2arc_writes_done = 0;
+
+	mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);
+	mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL);
+
+	l2arc_dev_list = &L2ARC_dev_list;
+	l2arc_free_on_write = &L2ARC_free_on_write;
+	list_create(l2arc_dev_list, sizeof (l2arc_dev_t),
+	    offsetof(l2arc_dev_t, l2ad_node));
+	list_create(l2arc_free_on_write, sizeof (l2arc_data_free_t),
+	    offsetof(l2arc_data_free_t, l2df_list_node));
+
+	(void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0,
+	    TS_RUN, minclsyspri);
+}
+
+void
+l2arc_fini()
+{
+	mutex_enter(&l2arc_feed_thr_lock);
+	cv_signal(&l2arc_feed_thr_cv);	/* kick thread out of startup */
+	l2arc_thread_exit = 1;
+	while (l2arc_thread_exit != 0)
+		cv_wait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock);
+	mutex_exit(&l2arc_feed_thr_lock);
+
+	mutex_destroy(&l2arc_feed_thr_lock);
+	cv_destroy(&l2arc_feed_thr_cv);
+	mutex_destroy(&l2arc_dev_mtx);
+	mutex_destroy(&l2arc_buflist_mtx);
+	mutex_destroy(&l2arc_free_on_write_mtx);
+
+	list_destroy(l2arc_dev_list);
+	list_destroy(l2arc_free_on_write);
+}
--- a/usr/src/uts/common/fs/zfs/dmu.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Fri Nov 09 21:33:30 2007 -0800
@@ -1036,6 +1036,7 @@
 	dbuf_init();
 	dnode_init();
 	arc_init();
+	l2arc_init();
 }
 
 void
@@ -1044,4 +1045,5 @@
 	arc_fini();
 	dnode_fini();
 	dbuf_fini();
+	l2arc_fini();
 }
--- a/usr/src/uts/common/fs/zfs/metaslab.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/metaslab.c	Fri Nov 09 21:33:30 2007 -0800
@@ -341,7 +341,7 @@
 	int t;
 
 	vdev_space_update(mg->mg_vd, -msp->ms_map.sm_size,
-	    -msp->ms_smo.smo_alloc);
+	    -msp->ms_smo.smo_alloc, B_TRUE);
 
 	metaslab_group_remove(mg, msp);
 
@@ -569,10 +569,10 @@
 			space_map_create(&msp->ms_freemap[t], sm->sm_start,
 			    sm->sm_size, sm->sm_shift, sm->sm_lock);
 		}
-		vdev_space_update(vd, sm->sm_size, 0);
+		vdev_space_update(vd, sm->sm_size, 0, B_TRUE);
 	}
 
-	vdev_space_update(vd, 0, smosync->smo_alloc - smo->smo_alloc);
+	vdev_space_update(vd, 0, smosync->smo_alloc - smo->smo_alloc, B_TRUE);
 
 	ASSERT(msp->ms_allocmap[txg & TXG_MASK].sm_space == 0);
 	ASSERT(msp->ms_freemap[txg & TXG_MASK].sm_space == 0);
--- a/usr/src/uts/common/fs/zfs/spa.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/spa.c	Fri Nov 09 21:33:30 2007 -0800
@@ -56,6 +56,7 @@
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/fs/zfs.h>
+#include <sys/arc.h>
 #include <sys/callb.h>
 #include <sys/systeminfo.h>
 #include <sys/sunddi.h>
@@ -662,6 +663,11 @@
 	spa_config_exit(spa, FTAG);
 
 	/*
+	 * Drop and purge level 2 cache
+	 */
+	spa_l2cache_drop(spa);
+
+	/*
 	 * Close the dsl pool.
 	 */
 	if (spa->spa_dsl_pool) {
@@ -676,15 +682,28 @@
 		vdev_free(spa->spa_root_vdev);
 	ASSERT(spa->spa_root_vdev == NULL);
 
-	for (i = 0; i < spa->spa_nspares; i++)
-		vdev_free(spa->spa_spares[i]);
-	if (spa->spa_spares) {
-		kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *));
-		spa->spa_spares = NULL;
+	for (i = 0; i < spa->spa_spares.sav_count; i++)
+		vdev_free(spa->spa_spares.sav_vdevs[i]);
+	if (spa->spa_spares.sav_vdevs) {
+		kmem_free(spa->spa_spares.sav_vdevs,
+		    spa->spa_spares.sav_count * sizeof (void *));
+		spa->spa_spares.sav_vdevs = NULL;
+	}
+	if (spa->spa_spares.sav_config) {
+		nvlist_free(spa->spa_spares.sav_config);
+		spa->spa_spares.sav_config = NULL;
 	}
-	if (spa->spa_sparelist) {
-		nvlist_free(spa->spa_sparelist);
-		spa->spa_sparelist = NULL;
+
+	for (i = 0; i < spa->spa_l2cache.sav_count; i++)
+		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
+	if (spa->spa_l2cache.sav_vdevs) {
+		kmem_free(spa->spa_l2cache.sav_vdevs,
+		    spa->spa_l2cache.sav_count * sizeof (void *));
+		spa->spa_l2cache.sav_vdevs = NULL;
+	}
+	if (spa->spa_l2cache.sav_config) {
+		nvlist_free(spa->spa_l2cache.sav_config);
+		spa->spa_l2cache.sav_config = NULL;
 	}
 
 	spa->spa_async_suspended = 0;
@@ -693,8 +712,8 @@
 /*
  * Load (or re-load) the current list of vdevs describing the active spares for
  * this pool.  When this is called, we have some form of basic information in
- * 'spa_sparelist'.  We parse this into vdevs, try to open them, and then
- * re-generate a more complete list including status information.
+ * 'spa_spares.sav_config'.  We parse this into vdevs, try to open them, and
+ * then re-generate a more complete list including status information.
  */
 static void
 spa_load_spares(spa_t *spa)
@@ -707,8 +726,8 @@
 	/*
 	 * First, close and free any existing spare vdevs.
 	 */
-	for (i = 0; i < spa->spa_nspares; i++) {
-		vd = spa->spa_spares[i];
+	for (i = 0; i < spa->spa_spares.sav_count; i++) {
+		vd = spa->spa_spares.sav_vdevs[i];
 
 		/* Undo the call to spa_activate() below */
 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL &&
@@ -718,17 +737,18 @@
 		vdev_free(vd);
 	}
 
-	if (spa->spa_spares)
-		kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *));
-
-	if (spa->spa_sparelist == NULL)
+	if (spa->spa_spares.sav_vdevs)
+		kmem_free(spa->spa_spares.sav_vdevs,
+		    spa->spa_spares.sav_count * sizeof (void *));
+
+	if (spa->spa_spares.sav_config == NULL)
 		nspares = 0;
 	else
-		VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
+		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
 
-	spa->spa_nspares = (int)nspares;
-	spa->spa_spares = NULL;
+	spa->spa_spares.sav_count = (int)nspares;
+	spa->spa_spares.sav_vdevs = NULL;
 
 	if (nspares == 0)
 		return;
@@ -742,13 +762,14 @@
 	 * validate each vdev on the spare list.  If the vdev also exists in the
 	 * active configuration, then we also mark this vdev as an active spare.
 	 */
-	spa->spa_spares = kmem_alloc(nspares * sizeof (void *), KM_SLEEP);
-	for (i = 0; i < spa->spa_nspares; i++) {
+	spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *),
+	    KM_SLEEP);
+	for (i = 0; i < spa->spa_spares.sav_count; i++) {
 		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
 		    VDEV_ALLOC_SPARE) == 0);
 		ASSERT(vd != NULL);
 
-		spa->spa_spares[i] = vd;
+		spa->spa_spares.sav_vdevs[i] = vd;
 
 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid)) != NULL) {
 			if (!tvd->vdev_isspare)
@@ -775,25 +796,160 @@
 			continue;
 
 		vd->vdev_top = vd;
-		(void) vdev_validate_spare(vd);
+		if (vdev_validate_aux(vd) == 0)
+			spa_spare_add(vd);
 	}
 
 	/*
 	 * Recompute the stashed list of spares, with status information
 	 * this time.
 	 */
-	VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
 	    DATA_TYPE_NVLIST_ARRAY) == 0);
 
-	spares = kmem_alloc(spa->spa_nspares * sizeof (void *), KM_SLEEP);
-	for (i = 0; i < spa->spa_nspares; i++)
-		spares[i] = vdev_config_generate(spa, spa->spa_spares[i],
-		    B_TRUE, B_TRUE);
-	VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
-	    spares, spa->spa_nspares) == 0);
-	for (i = 0; i < spa->spa_nspares; i++)
+	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
+	    KM_SLEEP);
+	for (i = 0; i < spa->spa_spares.sav_count; i++)
+		spares[i] = vdev_config_generate(spa,
+		    spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE);
+	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
+	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
+	for (i = 0; i < spa->spa_spares.sav_count; i++)
 		nvlist_free(spares[i]);
-	kmem_free(spares, spa->spa_nspares * sizeof (void *));
+	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
+}
+
+/*
+ * Load (or re-load) the current list of vdevs describing the active l2cache for
+ * this pool.  When this is called, we have some form of basic information in
+ * 'spa_l2cache.sav_config'.  We parse this into vdevs, try to open them, and
+ * then re-generate a more complete list including status information.
+ * Devices which are already active have their details maintained, and are
+ * not re-opened.
+ */
+static void
+spa_load_l2cache(spa_t *spa)
+{
+	nvlist_t **l2cache;
+	uint_t nl2cache;
+	int i, j, oldnvdevs;
+	uint64_t guid;
+	vdev_t *vd, **oldvdevs, **newvdevs;
+	spa_aux_vdev_t *sav = &spa->spa_l2cache;
+
+	if (sav->sav_config != NULL) {
+		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
+		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+		newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
+	} else {
+		nl2cache = 0;
+	}
+
+	oldvdevs = sav->sav_vdevs;
+	oldnvdevs = sav->sav_count;
+	sav->sav_vdevs = NULL;
+	sav->sav_count = 0;
+
+	/*
+	 * Process new nvlist of vdevs.
+	 */
+	for (i = 0; i < nl2cache; i++) {
+		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
+		    &guid) == 0);
+
+		newvdevs[i] = NULL;
+		for (j = 0; j < oldnvdevs; j++) {
+			vd = oldvdevs[j];
+			if (vd != NULL && guid == vd->vdev_guid) {
+				/*
+				 * Retain previous vdev for add/remove ops.
+				 */
+				newvdevs[i] = vd;
+				oldvdevs[j] = NULL;
+				break;
+			}
+		}
+
+		if (newvdevs[i] == NULL) {
+			/*
+			 * Create new vdev
+			 */
+			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
+			    VDEV_ALLOC_L2CACHE) == 0);
+			ASSERT(vd != NULL);
+			newvdevs[i] = vd;
+
+			/*
+			 * Commit this vdev as an l2cache device,
+			 * even if it fails to open.
+			 */
+			spa_l2cache_add(vd);
+
+			if (vdev_open(vd) != 0)
+				continue;
+
+			vd->vdev_top = vd;
+			(void) vdev_validate_aux(vd);
+
+			if (!vdev_is_dead(vd)) {
+				uint64_t size;
+				size = vdev_get_rsize(vd);
+				ASSERT3U(size, >, 0);
+				if (spa_mode & FWRITE) {
+					l2arc_add_vdev(spa, vd,
+					    VDEV_LABEL_START_SIZE,
+					    size - VDEV_LABEL_START_SIZE);
+				}
+				spa_l2cache_activate(vd);
+			}
+		}
+	}
+
+	/*
+	 * Purge vdevs that were dropped
+	 */
+	for (i = 0; i < oldnvdevs; i++) {
+		uint64_t pool;
+
+		vd = oldvdevs[i];
+		if (vd != NULL) {
+			if (spa_mode & FWRITE &&
+			    spa_l2cache_exists(vd->vdev_guid, &pool) &&
+			    pool != 0ULL) {
+				l2arc_remove_vdev(vd);
+			}
+			(void) vdev_close(vd);
+			spa_l2cache_remove(vd);
+		}
+	}
+
+	if (oldvdevs)
+		kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
+
+	if (sav->sav_config == NULL)
+		goto out;
+
+	sav->sav_vdevs = newvdevs;
+	sav->sav_count = (int)nl2cache;
+
+	/*
+	 * Recompute the stashed list of l2cache devices, with status
+	 * information this time.
+	 */
+	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
+	    DATA_TYPE_NVLIST_ARRAY) == 0);
+
+	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
+	for (i = 0; i < sav->sav_count; i++)
+		l2cache[i] = vdev_config_generate(spa,
+		    sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE);
+	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
+	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
+out:
+	for (i = 0; i < sav->sav_count; i++)
+		nvlist_free(l2cache[i]);
+	if (sav->sav_count)
+		kmem_free(l2cache, sav->sav_count * sizeof (void *));
 }
 
 static int
@@ -1090,7 +1246,7 @@
 	 * Load any hot spares for this pool.
 	 */
 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
-	    DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares_object);
+	    DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object);
 	if (error != 0 && error != ENOENT) {
 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
 		    VDEV_AUX_CORRUPT_DATA);
@@ -1099,8 +1255,8 @@
 	}
 	if (error == 0) {
 		ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
-		if (load_nvlist(spa, spa->spa_spares_object,
-		    &spa->spa_sparelist) != 0) {
+		if (load_nvlist(spa, spa->spa_spares.sav_object,
+		    &spa->spa_spares.sav_config) != 0) {
 			vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
 			    VDEV_AUX_CORRUPT_DATA);
 			error = EIO;
@@ -1112,6 +1268,34 @@
 		spa_config_exit(spa, FTAG);
 	}
 
+	/*
+	 * Load any level 2 ARC devices for this pool.
+	 */
+	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_L2CACHE, sizeof (uint64_t), 1,
+	    &spa->spa_l2cache.sav_object);
+	if (error != 0 && error != ENOENT) {
+		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
+		    VDEV_AUX_CORRUPT_DATA);
+		error = EIO;
+		goto out;
+	}
+	if (error == 0) {
+		ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
+		if (load_nvlist(spa, spa->spa_l2cache.sav_object,
+		    &spa->spa_l2cache.sav_config) != 0) {
+			vdev_set_state(rvd, B_TRUE,
+			    VDEV_STATE_CANT_OPEN,
+			    VDEV_AUX_CORRUPT_DATA);
+			error = EIO;
+			goto out;
+		}
+
+		spa_config_enter(spa, RW_WRITER, FTAG);
+		spa_load_l2cache(spa);
+		spa_config_exit(spa, FTAG);
+	}
+
 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
 
 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
@@ -1372,6 +1556,9 @@
 	mutex_exit(&spa_namespace_lock);
 }
 
+/*
+ * Add spares device information to the nvlist.
+ */
 static void
 spa_add_spares(spa_t *spa, nvlist_t *config)
 {
@@ -1383,12 +1570,12 @@
 	uint_t vsc;
 	uint64_t pool;
 
-	if (spa->spa_nspares == 0)
+	if (spa->spa_spares.sav_count == 0)
 		return;
 
 	VERIFY(nvlist_lookup_nvlist(config,
 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
-	VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
+	VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
 	if (nspares != 0) {
 		VERIFY(nvlist_add_nvlist_array(nvroot,
@@ -1415,6 +1602,62 @@
 	}
 }
 
+/*
+ * Add l2cache device information to the nvlist, including vdev stats.
+ */
+static void
+spa_add_l2cache(spa_t *spa, nvlist_t *config)
+{
+	nvlist_t **l2cache;
+	uint_t i, j, nl2cache;
+	nvlist_t *nvroot;
+	uint64_t guid;
+	vdev_t *vd;
+	vdev_stat_t *vs;
+	uint_t vsc;
+
+	if (spa->spa_l2cache.sav_count == 0)
+		return;
+
+	spa_config_enter(spa, RW_READER, FTAG);
+
+	VERIFY(nvlist_lookup_nvlist(config,
+	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+	VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
+	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+	if (nl2cache != 0) {
+		VERIFY(nvlist_add_nvlist_array(nvroot,
+		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
+		VERIFY(nvlist_lookup_nvlist_array(nvroot,
+		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+
+		/*
+		 * Update level 2 cache device stats.
+		 */
+
+		for (i = 0; i < nl2cache; i++) {
+			VERIFY(nvlist_lookup_uint64(l2cache[i],
+			    ZPOOL_CONFIG_GUID, &guid) == 0);
+
+			vd = NULL;
+			for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
+				if (guid ==
+				    spa->spa_l2cache.sav_vdevs[j]->vdev_guid) {
+					vd = spa->spa_l2cache.sav_vdevs[j];
+					break;
+				}
+			}
+			ASSERT(vd != NULL);
+
+			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
+			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
+			vdev_get_stats(vd, vs);
+		}
+	}
+
+	spa_config_exit(spa, FTAG);
+}
+
 int
 spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
 {
@@ -1429,6 +1672,7 @@
 		    spa_get_errlog_size(spa)) == 0);
 
 		spa_add_spares(spa, *config);
+		spa_add_l2cache(spa, *config);
 	}
 
 	/*
@@ -1457,45 +1701,46 @@
 }
 
 /*
- * Validate that the 'spares' array is well formed.  We must have an array of
- * nvlists, each which describes a valid leaf vdev.  If this is an import (mode
- * is VDEV_ALLOC_SPARE), then we allow corrupted spares to be specified, as long
- * as they are well-formed.
+ * Validate that the auxiliary device array is well formed.  We must have an
+ * array of nvlists, each which describes a valid leaf vdev.  If this is an
+ * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be
+ * specified, as long as they are well-formed.
  */
 static int
-spa_validate_spares(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
+spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
+    spa_aux_vdev_t *sav, const char *config, uint64_t version,
+    vdev_labeltype_t label)
 {
-	nvlist_t **spares;
-	uint_t i, nspares;
+	nvlist_t **dev;
+	uint_t i, ndev;
 	vdev_t *vd;
 	int error;
 
 	/*
-	 * It's acceptable to have no spares specified.
+	 * It's acceptable to have no devs specified.
 	 */
-	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
-	    &spares, &nspares) != 0)
+	if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0)
 		return (0);
 
-	if (nspares == 0)
+	if (ndev == 0)
 		return (EINVAL);
 
 	/*
-	 * Make sure the pool is formatted with a version that supports hot
-	 * spares.
+	 * Make sure the pool is formatted with a version that supports this
+	 * device type.
 	 */
-	if (spa_version(spa) < SPA_VERSION_SPARES)
+	if (spa_version(spa) < version)
 		return (ENOTSUP);
 
 	/*
-	 * Set the pending spare list so we correctly handle device in-use
+	 * Set the pending device list so we correctly handle device in-use
 	 * checking.
 	 */
-	spa->spa_pending_spares = spares;
-	spa->spa_pending_nspares = nspares;
-
-	for (i = 0; i < nspares; i++) {
-		if ((error = spa_config_parse(spa, &vd, spares[i], NULL, 0,
+	sav->sav_pending = dev;
+	sav->sav_npending = ndev;
+
+	for (i = 0; i < ndev; i++) {
+		if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0,
 		    mode)) != 0)
 			goto out;
 
@@ -1505,29 +1750,127 @@
 			goto out;
 		}
 
+		/*
+		 * The L2ARC currently only supports disk devices.
+		 */
+		if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
+		    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
+			error = ENOTBLK;
+			goto out;
+		}
+
 		vd->vdev_top = vd;
 
 		if ((error = vdev_open(vd)) == 0 &&
-		    (error = vdev_label_init(vd, crtxg,
-		    VDEV_LABEL_SPARE)) == 0) {
-			VERIFY(nvlist_add_uint64(spares[i], ZPOOL_CONFIG_GUID,
+		    (error = vdev_label_init(vd, crtxg, label)) == 0) {
+			VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
 			    vd->vdev_guid) == 0);
 		}
 
 		vdev_free(vd);
 
-		if (error && mode != VDEV_ALLOC_SPARE)
+		if (error &&
+		    (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE))
 			goto out;
 		else
 			error = 0;
 	}
 
 out:
-	spa->spa_pending_spares = NULL;
-	spa->spa_pending_nspares = 0;
+	sav->sav_pending = NULL;
+	sav->sav_npending = 0;
 	return (error);
 }
 
+static int
+spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
+{
+	int error;
+
+	if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode,
+	    &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES,
+	    VDEV_LABEL_SPARE)) != 0) {
+		return (error);
+	}
+
+	return (spa_validate_aux_devs(spa, nvroot, crtxg, mode,
+	    &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE,
+	    VDEV_LABEL_L2CACHE));
+}
+
+static void
+spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
+    const char *config)
+{
+	int i;
+
+	if (sav->sav_config != NULL) {
+		nvlist_t **olddevs;
+		uint_t oldndevs;
+		nvlist_t **newdevs;
+
+		/*
+		 * Generate new dev list by concatentating with the
+		 * current dev list.
+		 */
+		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
+		    &olddevs, &oldndevs) == 0);
+
+		newdevs = kmem_alloc(sizeof (void *) *
+		    (ndevs + oldndevs), KM_SLEEP);
+		for (i = 0; i < oldndevs; i++)
+			VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
+			    KM_SLEEP) == 0);
+		for (i = 0; i < ndevs; i++)
+			VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
+			    KM_SLEEP) == 0);
+
+		VERIFY(nvlist_remove(sav->sav_config, config,
+		    DATA_TYPE_NVLIST_ARRAY) == 0);
+
+		VERIFY(nvlist_add_nvlist_array(sav->sav_config,
+		    config, newdevs, ndevs + oldndevs) == 0);
+		for (i = 0; i < oldndevs + ndevs; i++)
+			nvlist_free(newdevs[i]);
+		kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
+	} else {
+		/*
+		 * Generate a new dev list.
+		 */
+		VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
+		    KM_SLEEP) == 0);
+		VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
+		    devs, ndevs) == 0);
+	}
+}
+
+/*
+ * Stop and drop level 2 ARC devices
+ */
+void
+spa_l2cache_drop(spa_t *spa)
+{
+	vdev_t *vd;
+	int i;
+	spa_aux_vdev_t *sav = &spa->spa_l2cache;
+
+	for (i = 0; i < sav->sav_count; i++) {
+		uint64_t pool;
+
+		vd = sav->sav_vdevs[i];
+		ASSERT(vd != NULL);
+
+		if (spa_mode & FWRITE &&
+		    spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL) {
+			l2arc_remove_vdev(vd);
+		}
+		if (vd->vdev_isl2cache)
+			spa_l2cache_remove(vd);
+		vdev_clear_stats(vd);
+		(void) vdev_close(vd);
+	}
+}
+
 /*
  * Pool Creation
  */
@@ -1542,8 +1885,8 @@
 	dmu_tx_t *tx;
 	int c, error = 0;
 	uint64_t txg = TXG_INITIAL;
-	nvlist_t **spares;
-	uint_t nspares;
+	nvlist_t **spares, **l2cache;
+	uint_t nspares, nl2cache;
 	uint64_t version;
 
 	/*
@@ -1594,7 +1937,7 @@
 
 	if (error == 0 &&
 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
-	    (error = spa_validate_spares(spa, nvroot, txg,
+	    (error = spa_validate_aux(spa, nvroot, txg,
 	    VDEV_ALLOC_ADD)) == 0) {
 		for (c = 0; c < rvd->vdev_children; c++)
 			vdev_init(rvd->vdev_child[c], txg);
@@ -1616,14 +1959,29 @@
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
-		VERIFY(nvlist_alloc(&spa->spa_sparelist, NV_UNIQUE_NAME,
+		VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
 		    KM_SLEEP) == 0);
-		VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
+		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
 		spa_config_enter(spa, RW_WRITER, FTAG);
 		spa_load_spares(spa);
 		spa_config_exit(spa, FTAG);
-		spa->spa_sync_spares = B_TRUE;
+		spa->spa_spares.sav_sync = B_TRUE;
+	}
+
+	/*
+	 * Get the list of level 2 cache devices, if specified.
+	 */
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+	    &l2cache, &nl2cache) == 0) {
+		VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
+		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
+		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
+		spa_config_enter(spa, RW_WRITER, FTAG);
+		spa_load_l2cache(spa);
+		spa_config_exit(spa, FTAG);
+		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg);
@@ -1717,8 +2075,8 @@
 	char *altroot = NULL;
 	int error;
 	nvlist_t *nvroot;
-	nvlist_t **spares;
-	uint_t nspares;
+	nvlist_t **spares, **l2cache;
+	uint_t nspares, nl2cache;
 
 	/*
 	 * If a pool with this name exists, return failure.
@@ -1749,18 +2107,24 @@
 	 * Toss any existing sparelist, as it doesn't have any validity anymore,
 	 * and conflicts with spa_has_spare().
 	 */
-	if (spa->spa_sparelist) {
-		nvlist_free(spa->spa_sparelist);
-		spa->spa_sparelist = NULL;
+	if (spa->spa_spares.sav_config) {
+		nvlist_free(spa->spa_spares.sav_config);
+		spa->spa_spares.sav_config = NULL;
 		spa_load_spares(spa);
 	}
+	if (spa->spa_l2cache.sav_config) {
+		nvlist_free(spa->spa_l2cache.sav_config);
+		spa->spa_l2cache.sav_config = NULL;
+		spa_load_l2cache(spa);
+	}
 
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
-	if (error == 0) {
-		error = spa_validate_spares(spa, nvroot, -1ULL,
-		    VDEV_ALLOC_SPARE);
-	}
+	if (error == 0)
+		error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_SPARE);
+	if (error == 0)
+		error = spa_validate_aux(spa, nvroot, -1ULL,
+		    VDEV_ALLOC_L2CACHE);
 	spa_config_exit(spa, FTAG);
 
 	if (error != 0 || (props && (error = spa_prop_set(spa, props)))) {
@@ -1772,23 +2136,38 @@
 	}
 
 	/*
-	 * Override any spares as specified by the user, as these may have
-	 * correct device names/devids, etc.
+	 * Override any spares and level 2 cache devices as specified by
+	 * the user, as these may have correct device names/devids, etc.
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
-		if (spa->spa_sparelist)
-			VERIFY(nvlist_remove(spa->spa_sparelist,
+		if (spa->spa_spares.sav_config)
+			VERIFY(nvlist_remove(spa->spa_spares.sav_config,
 			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
 		else
-			VERIFY(nvlist_alloc(&spa->spa_sparelist,
+			VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
-		VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
+		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
 		spa_config_enter(spa, RW_WRITER, FTAG);
 		spa_load_spares(spa);
 		spa_config_exit(spa, FTAG);
-		spa->spa_sync_spares = B_TRUE;
+		spa->spa_spares.sav_sync = B_TRUE;
+	}
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
+	    &l2cache, &nl2cache) == 0) {
+		if (spa->spa_l2cache.sav_config)
+			VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
+			    ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
+		else
+			VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
+			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
+		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
+		spa_config_enter(spa, RW_WRITER, FTAG);
+		spa_load_l2cache(spa);
+		spa_config_exit(spa, FTAG);
+		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	/*
@@ -1857,9 +2236,10 @@
 		    spa->spa_uberblock.ub_timestamp) == 0);
 
 		/*
-		 * Add the list of hot spares.
+		 * Add the list of hot spares and level 2 cache devices.
 		 */
 		spa_add_spares(spa, config);
+		spa_add_l2cache(spa, config);
 	}
 
 	spa_unload(spa);
@@ -2014,8 +2394,8 @@
 	int c, error;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd, *tvd;
-	nvlist_t **spares;
-	uint_t i, nspares;
+	nvlist_t **spares, **l2cache;
+	uint_t nspares, nl2cache;
 
 	txg = spa_vdev_enter(spa);
 
@@ -2025,11 +2405,15 @@
 
 	spa->spa_pending_vdev = vd;
 
-	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
-	    &spares, &nspares) != 0)
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
+	    &nspares) != 0)
 		nspares = 0;
 
-	if (vd->vdev_children == 0 && nspares == 0) {
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache,
+	    &nl2cache) != 0)
+		nl2cache = 0;
+
+	if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) {
 		spa->spa_pending_vdev = NULL;
 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
 	}
@@ -2042,11 +2426,10 @@
 	}
 
 	/*
-	 * We must validate the spares after checking the children.  Otherwise,
-	 * vdev_inuse() will blindly overwrite the spare.
+	 * We must validate the spares and l2cache devices after checking the
+	 * children.  Otherwise, vdev_inuse() will blindly overwrite the spare.
 	 */
-	if ((error = spa_validate_spares(spa, nvroot, txg,
-	    VDEV_ALLOC_ADD)) != 0) {
+	if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) {
 		spa->spa_pending_vdev = NULL;
 		return (spa_vdev_exit(spa, vd, txg, error));
 	}
@@ -2065,43 +2448,17 @@
 	}
 
 	if (nspares != 0) {
-		if (spa->spa_sparelist != NULL) {
-			nvlist_t **oldspares;
-			uint_t oldnspares;
-			nvlist_t **newspares;
-
-			VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
-			    ZPOOL_CONFIG_SPARES, &oldspares, &oldnspares) == 0);
-
-			newspares = kmem_alloc(sizeof (void *) *
-			    (nspares + oldnspares), KM_SLEEP);
-			for (i = 0; i < oldnspares; i++)
-				VERIFY(nvlist_dup(oldspares[i],
-				    &newspares[i], KM_SLEEP) == 0);
-			for (i = 0; i < nspares; i++)
-				VERIFY(nvlist_dup(spares[i],
-				    &newspares[i + oldnspares],
-				    KM_SLEEP) == 0);
-
-			VERIFY(nvlist_remove(spa->spa_sparelist,
-			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
-
-			VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
-			    ZPOOL_CONFIG_SPARES, newspares,
-			    nspares + oldnspares) == 0);
-			for (i = 0; i < oldnspares + nspares; i++)
-				nvlist_free(newspares[i]);
-			kmem_free(newspares, (oldnspares + nspares) *
-			    sizeof (void *));
-		} else {
-			VERIFY(nvlist_alloc(&spa->spa_sparelist,
-			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
-			VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
-			    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
-		}
-
+		spa_set_aux_vdevs(&spa->spa_spares, spares, nspares,
+		    ZPOOL_CONFIG_SPARES);
 		spa_load_spares(spa);
-		spa->spa_sync_spares = B_TRUE;
+		spa->spa_spares.sav_sync = B_TRUE;
+	}
+
+	if (nl2cache != 0) {
+		spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache,
+		    ZPOOL_CONFIG_L2CACHE);
+		spa_load_l2cache(spa);
+		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	/*
@@ -2511,55 +2868,38 @@
 }
 
 /*
- * Remove a device from the pool.  Currently, this supports removing only hot
- * spares.
+ * Remove a spares vdev from the nvlist config.
  */
-int
-spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
+static int
+spa_remove_spares(spa_aux_vdev_t *sav, uint64_t guid, boolean_t unspare,
+    nvlist_t **spares, int nspares, vdev_t *vd)
 {
-	vdev_t *vd;
-	nvlist_t **spares, *nv, **newspares;
-	uint_t i, j, nspares;
-	int ret = 0;
-
-	spa_config_enter(spa, RW_WRITER, FTAG);
-
-	vd = spa_lookup_by_guid(spa, guid);
+	nvlist_t *nv, **newspares;
+	int i, j;
 
 	nv = NULL;
-	if (spa->spa_spares != NULL &&
-	    nvlist_lookup_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
-	    &spares, &nspares) == 0) {
-		for (i = 0; i < nspares; i++) {
-			uint64_t theguid;
-
-			VERIFY(nvlist_lookup_uint64(spares[i],
-			    ZPOOL_CONFIG_GUID, &theguid) == 0);
-			if (theguid == guid) {
-				nv = spares[i];
-				break;
-			}
+	for (i = 0; i < nspares; i++) {
+		uint64_t theguid;
+
+		VERIFY(nvlist_lookup_uint64(spares[i],
+		    ZPOOL_CONFIG_GUID, &theguid) == 0);
+		if (theguid == guid) {
+			nv = spares[i];
+			break;
 		}
 	}
 
 	/*
-	 * We only support removing a hot spare, and only if it's not currently
-	 * in use in this pool.
+	 * Only remove the hot spare if it's not currently in use in this pool.
 	 */
-	if (nv == NULL && vd == NULL) {
-		ret = ENOENT;
-		goto out;
-	}
-
-	if (nv == NULL && vd != NULL) {
-		ret = ENOTSUP;
-		goto out;
-	}
-
-	if (!unspare && nv != NULL && vd != NULL) {
-		ret = EBUSY;
-		goto out;
-	}
+	if (nv == NULL && vd == NULL)
+		return (ENOENT);
+
+	if (nv == NULL && vd != NULL)
+		return (ENOTSUP);
+
+	if (!unspare && nv != NULL && vd != NULL)
+		return (EBUSY);
 
 	if (nspares == 1) {
 		newspares = NULL;
@@ -2573,20 +2913,119 @@
 		}
 	}
 
-	VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_SPARES,
 	    DATA_TYPE_NVLIST_ARRAY) == 0);
-	VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
-	    newspares, nspares - 1) == 0);
+	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
+	    ZPOOL_CONFIG_SPARES, newspares, nspares - 1) == 0);
 	for (i = 0; i < nspares - 1; i++)
 		nvlist_free(newspares[i]);
 	kmem_free(newspares, (nspares - 1) * sizeof (void *));
-	spa_load_spares(spa);
-	spa->spa_sync_spares = B_TRUE;
+
+	return (0);
+}
+
+/*
+ * Remove an l2cache vdev from the nvlist config.
+ */
+static int
+spa_remove_l2cache(spa_aux_vdev_t *sav, uint64_t guid, nvlist_t **l2cache,
+    int nl2cache, vdev_t *vd)
+{
+	nvlist_t *nv, **newl2cache;
+	int i, j;
+
+	nv = NULL;
+	for (i = 0; i < nl2cache; i++) {
+		uint64_t theguid;
+
+		VERIFY(nvlist_lookup_uint64(l2cache[i],
+		    ZPOOL_CONFIG_GUID, &theguid) == 0);
+		if (theguid == guid) {
+			nv = l2cache[i];
+			break;
+		}
+	}
+
+	if (vd == NULL) {
+		for (i = 0; i < nl2cache; i++) {
+			if (sav->sav_vdevs[i]->vdev_guid == guid) {
+				vd = sav->sav_vdevs[i];
+				break;
+			}
+		}
+	}
+
+	if (nv == NULL && vd == NULL)
+		return (ENOENT);
+
+	if (nv == NULL && vd != NULL)
+		return (ENOTSUP);
+
+	if (nl2cache == 1) {
+		newl2cache = NULL;
+	} else {
+		newl2cache = kmem_alloc((nl2cache - 1) * sizeof (void *),
+		    KM_SLEEP);
+		for (i = 0, j = 0; i < nl2cache; i++) {
+			if (l2cache[i] != nv)
+				VERIFY(nvlist_dup(l2cache[i],
+				    &newl2cache[j++], KM_SLEEP) == 0);
+		}
+	}
+
+	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
+	    DATA_TYPE_NVLIST_ARRAY) == 0);
+	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
+	    ZPOOL_CONFIG_L2CACHE, newl2cache, nl2cache - 1) == 0);
+	for (i = 0; i < nl2cache - 1; i++)
+		nvlist_free(newl2cache[i]);
+	kmem_free(newl2cache, (nl2cache - 1) * sizeof (void *));
+
+	return (0);
+}
+
+/*
+ * Remove a device from the pool.  Currently, this supports removing only hot
+ * spares and level 2 ARC devices.
+ */
+int
+spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
+{
+	vdev_t *vd;
+	nvlist_t **spares, **l2cache;
+	uint_t nspares, nl2cache;
+	int error = 0;
+
+	spa_config_enter(spa, RW_WRITER, FTAG);
+
+	vd = spa_lookup_by_guid(spa, guid);
+
+	if (spa->spa_spares.sav_vdevs != NULL &&
+	    spa_spare_exists(guid, NULL) &&
+	    nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
+	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
+		if ((error = spa_remove_spares(&spa->spa_spares, guid, unspare,
+		    spares, nspares, vd)) != 0)
+			goto out;
+		spa_load_spares(spa);
+		spa->spa_spares.sav_sync = B_TRUE;
+		goto out;
+	}
+
+	if (spa->spa_l2cache.sav_vdevs != NULL &&
+	    spa_l2cache_exists(guid, NULL) &&
+	    nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
+	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) {
+		if ((error = spa_remove_l2cache(&spa->spa_l2cache, guid,
+		    l2cache, nl2cache, vd)) != 0)
+			goto out;
+		spa_load_l2cache(spa);
+		spa->spa_l2cache.sav_sync = B_TRUE;
+	}
 
 out:
 	spa_config_exit(spa, FTAG);
-
-	return (ret);
+	return (error);
 }
 
 /*
@@ -2693,33 +3132,52 @@
 
 	if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
 		/*
-		 * Determine if this is a reference to a hot spare.  In that
-		 * case, update the path as stored in the spare list.
+		 * Determine if this is a reference to a hot spare or l2cache
+		 * device.  If it is, update the path as stored in their
+		 * device list.
 		 */
-		nvlist_t **spares;
-		uint_t i, nspares;
-		if (spa->spa_sparelist != NULL) {
-			VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
-			    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+		nvlist_t **spares, **l2cache;
+		uint_t i, nspares, nl2cache;
+
+		if (spa->spa_spares.sav_config != NULL) {
+			VERIFY(nvlist_lookup_nvlist_array(
+			    spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
+			    &spares, &nspares) == 0);
 			for (i = 0; i < nspares; i++) {
 				uint64_t theguid;
 				VERIFY(nvlist_lookup_uint64(spares[i],
 				    ZPOOL_CONFIG_GUID, &theguid) == 0);
-				if (theguid == guid)
-					break;
+				if (theguid == guid) {
+					VERIFY(nvlist_add_string(spares[i],
+					    ZPOOL_CONFIG_PATH, newpath) == 0);
+					spa_load_spares(spa);
+					spa->spa_spares.sav_sync = B_TRUE;
+					return (spa_vdev_exit(spa, NULL, txg,
+					    0));
+				}
 			}
-
-			if (i == nspares)
-				return (spa_vdev_exit(spa, NULL, txg, ENOENT));
-
-			VERIFY(nvlist_add_string(spares[i],
-			    ZPOOL_CONFIG_PATH, newpath) == 0);
-			spa_load_spares(spa);
-			spa->spa_sync_spares = B_TRUE;
-			return (spa_vdev_exit(spa, NULL, txg, 0));
-		} else {
-			return (spa_vdev_exit(spa, NULL, txg, ENOENT));
 		}
+
+		if (spa->spa_l2cache.sav_config != NULL) {
+			VERIFY(nvlist_lookup_nvlist_array(
+			    spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE,
+			    &l2cache, &nl2cache) == 0);
+			for (i = 0; i < nl2cache; i++) {
+				uint64_t theguid;
+				VERIFY(nvlist_lookup_uint64(l2cache[i],
+				    ZPOOL_CONFIG_GUID, &theguid) == 0);
+				if (theguid == guid) {
+					VERIFY(nvlist_add_string(l2cache[i],
+					    ZPOOL_CONFIG_PATH, newpath) == 0);
+					spa_load_l2cache(spa);
+					spa->spa_l2cache.sav_sync = B_TRUE;
+					return (spa_vdev_exit(spa, NULL, txg,
+					    0));
+				}
+			}
+		}
+
+		return (spa_vdev_exit(spa, NULL, txg, ENOENT));
 	}
 
 	if (!vd->vdev_ops->vdev_op_leaf)
@@ -3338,50 +3796,49 @@
 }
 
 static void
-spa_sync_spares(spa_t *spa, dmu_tx_t *tx)
+spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
+    const char *config, const char *entry)
 {
 	nvlist_t *nvroot;
-	nvlist_t **spares;
+	nvlist_t **list;
 	int i;
 
-	if (!spa->spa_sync_spares)
+	if (!sav->sav_sync)
 		return;
 
 	/*
-	 * Update the MOS nvlist describing the list of available spares.
-	 * spa_validate_spares() will have already made sure this nvlist is
+	 * Update the MOS nvlist describing the list of available devices.
+	 * spa_validate_aux() will have already made sure this nvlist is
 	 * valid and the vdevs are labeled appropriately.
 	 */
-	if (spa->spa_spares_object == 0) {
-		spa->spa_spares_object = dmu_object_alloc(spa->spa_meta_objset,
-		    DMU_OT_PACKED_NVLIST, 1 << 14,
-		    DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
+	if (sav->sav_object == 0) {
+		sav->sav_object = dmu_object_alloc(spa->spa_meta_objset,
+		    DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE,
+		    sizeof (uint64_t), tx);
 		VERIFY(zap_update(spa->spa_meta_objset,
-		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SPARES,
-		    sizeof (uint64_t), 1, &spa->spa_spares_object, tx) == 0);
+		    DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1,
+		    &sav->sav_object, tx) == 0);
 	}
 
 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	if (spa->spa_nspares == 0) {
-		VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
-		    NULL, 0) == 0);
+	if (sav->sav_count == 0) {
+		VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
 	} else {
-		spares = kmem_alloc(spa->spa_nspares * sizeof (void *),
-		    KM_SLEEP);
-		for (i = 0; i < spa->spa_nspares; i++)
-			spares[i] = vdev_config_generate(spa,
-			    spa->spa_spares[i], B_FALSE, B_TRUE);
-		VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
-		    spares, spa->spa_nspares) == 0);
-		for (i = 0; i < spa->spa_nspares; i++)
-			nvlist_free(spares[i]);
-		kmem_free(spares, spa->spa_nspares * sizeof (void *));
+		list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
+		for (i = 0; i < sav->sav_count; i++)
+			list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
+			    B_FALSE, B_FALSE, B_TRUE);
+		VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
+		    sav->sav_count) == 0);
+		for (i = 0; i < sav->sav_count; i++)
+			nvlist_free(list[i]);
+		kmem_free(list, sav->sav_count * sizeof (void *));
 	}
 
-	spa_sync_nvlist(spa, spa->spa_spares_object, nvroot, tx);
+	spa_sync_nvlist(spa, sav->sav_object, nvroot, tx);
 	nvlist_free(nvroot);
 
-	spa->spa_sync_spares = B_FALSE;
+	sav->sav_sync = B_FALSE;
 }
 
 static void
@@ -3606,7 +4063,10 @@
 		spa->spa_sync_pass++;
 
 		spa_sync_config_object(spa, tx);
-		spa_sync_spares(spa, tx);
+		spa_sync_aux_dev(spa, &spa->spa_spares, tx,
+		    ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES);
+		spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
+		    ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
 		spa_errlog_sync(spa, txg);
 		dsl_pool_sync(dp, txg);
 
@@ -3806,15 +4266,15 @@
 {
 	int i;
 	uint64_t spareguid;
-
-	for (i = 0; i < spa->spa_nspares; i++)
-		if (spa->spa_spares[i]->vdev_guid == guid)
+	spa_aux_vdev_t *sav = &spa->spa_spares;
+
+	for (i = 0; i < sav->sav_count; i++)
+		if (sav->sav_vdevs[i]->vdev_guid == guid)
 			return (B_TRUE);
 
-	for (i = 0; i < spa->spa_pending_nspares; i++) {
-		if (nvlist_lookup_uint64(spa->spa_pending_spares[i],
-		    ZPOOL_CONFIG_GUID, &spareguid) == 0 &&
-		    spareguid == guid)
+	for (i = 0; i < sav->sav_npending; i++) {
+		if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID,
+		    &spareguid) == 0 && spareguid == guid)
 			return (B_TRUE);
 	}
 
--- a/usr/src/uts/common/fs/zfs/spa_config.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/spa_config.c	Fri Nov 09 21:33:30 2007 -0800
@@ -422,7 +422,7 @@
 		vd = vd->vdev_top;		/* label contains top config */
 	}
 
-	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE);
+	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE);
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
 	nvlist_free(nvroot);
 
--- a/usr/src/uts/common/fs/zfs/spa_misc.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c	Fri Nov 09 21:33:30 2007 -0800
@@ -178,6 +178,8 @@
 
 static kmutex_t spa_spare_lock;
 static avl_tree_t spa_spare_avl;
+static kmutex_t spa_l2cache_lock;
+static avl_tree_t spa_l2cache_avl;
 
 kmem_cache_t *spa_buffer_pool;
 int spa_mode;
@@ -406,11 +408,108 @@
 
 /*
  * ==========================================================================
- * SPA spare tracking
+ * SPA spare and l2cache tracking
  * ==========================================================================
  */
 
 /*
+ * Hot spares and cache devices are tracked using the same code below,
+ * for 'auxiliary' devices.
+ */
+
+typedef struct spa_aux {
+	uint64_t	aux_guid;
+	uint64_t	aux_pool;
+	avl_node_t	aux_avl;
+	int		aux_count;
+} spa_aux_t;
+
+static int
+spa_aux_compare(const void *a, const void *b)
+{
+	const spa_aux_t *sa = a;
+	const spa_aux_t *sb = b;
+
+	if (sa->aux_guid < sb->aux_guid)
+		return (-1);
+	else if (sa->aux_guid > sb->aux_guid)
+		return (1);
+	else
+		return (0);
+}
+
+void
+spa_aux_add(vdev_t *vd, avl_tree_t *avl)
+{
+	avl_index_t where;
+	spa_aux_t search;
+	spa_aux_t *aux;
+
+	search.aux_guid = vd->vdev_guid;
+	if ((aux = avl_find(avl, &search, &where)) != NULL) {
+		aux->aux_count++;
+	} else {
+		aux = kmem_zalloc(sizeof (spa_aux_t), KM_SLEEP);
+		aux->aux_guid = vd->vdev_guid;
+		aux->aux_count = 1;
+		avl_insert(avl, aux, where);
+	}
+}
+
+void
+spa_aux_remove(vdev_t *vd, avl_tree_t *avl)
+{
+	spa_aux_t search;
+	spa_aux_t *aux;
+	avl_index_t where;
+
+	search.aux_guid = vd->vdev_guid;
+	aux = avl_find(avl, &search, &where);
+
+	ASSERT(aux != NULL);
+
+	if (--aux->aux_count == 0) {
+		avl_remove(avl, aux);
+		kmem_free(aux, sizeof (spa_aux_t));
+	} else if (aux->aux_pool == spa_guid(vd->vdev_spa)) {
+		aux->aux_pool = 0ULL;
+	}
+}
+
+boolean_t
+spa_aux_exists(uint64_t guid, uint64_t *pool, avl_tree_t *avl)
+{
+	spa_aux_t search, *found;
+	avl_index_t where;
+
+	search.aux_guid = guid;
+	found = avl_find(avl, &search, &where);
+
+	if (pool) {
+		if (found)
+			*pool = found->aux_pool;
+		else
+			*pool = 0ULL;
+	}
+
+	return (found != NULL);
+}
+
+void
+spa_aux_activate(vdev_t *vd, avl_tree_t *avl)
+{
+	spa_aux_t search, *found;
+	avl_index_t where;
+
+	search.aux_guid = vd->vdev_guid;
+	found = avl_find(avl, &search, &where);
+	ASSERT(found != NULL);
+	ASSERT(found->aux_pool == 0ULL);
+
+	found->aux_pool = spa_guid(vd->vdev_spa);
+}
+
+/*
  * Spares are tracked globally due to the following constraints:
  *
  * 	- A spare may be part of multiple pools.
@@ -432,73 +531,28 @@
  * be completely consistent with respect to other vdev configuration changes.
  */
 
-typedef struct spa_spare {
-	uint64_t	spare_guid;
-	uint64_t	spare_pool;
-	avl_node_t	spare_avl;
-	int		spare_count;
-} spa_spare_t;
-
 static int
 spa_spare_compare(const void *a, const void *b)
 {
-	const spa_spare_t *sa = a;
-	const spa_spare_t *sb = b;
-
-	if (sa->spare_guid < sb->spare_guid)
-		return (-1);
-	else if (sa->spare_guid > sb->spare_guid)
-		return (1);
-	else
-		return (0);
+	return (spa_aux_compare(a, b));
 }
 
 void
 spa_spare_add(vdev_t *vd)
 {
-	avl_index_t where;
-	spa_spare_t search;
-	spa_spare_t *spare;
-
 	mutex_enter(&spa_spare_lock);
 	ASSERT(!vd->vdev_isspare);
-
-	search.spare_guid = vd->vdev_guid;
-	if ((spare = avl_find(&spa_spare_avl, &search, &where)) != NULL) {
-		spare->spare_count++;
-	} else {
-		spare = kmem_zalloc(sizeof (spa_spare_t), KM_SLEEP);
-		spare->spare_guid = vd->vdev_guid;
-		spare->spare_count = 1;
-		avl_insert(&spa_spare_avl, spare, where);
-	}
+	spa_aux_add(vd, &spa_spare_avl);
 	vd->vdev_isspare = B_TRUE;
-
 	mutex_exit(&spa_spare_lock);
 }
 
 void
 spa_spare_remove(vdev_t *vd)
 {
-	spa_spare_t search;
-	spa_spare_t *spare;
-	avl_index_t where;
-
 	mutex_enter(&spa_spare_lock);
-
-	search.spare_guid = vd->vdev_guid;
-	spare = avl_find(&spa_spare_avl, &search, &where);
-
 	ASSERT(vd->vdev_isspare);
-	ASSERT(spare != NULL);
-
-	if (--spare->spare_count == 0) {
-		avl_remove(&spa_spare_avl, spare);
-		kmem_free(spare, sizeof (spa_spare_t));
-	} else if (spare->spare_pool == spa_guid(vd->vdev_spa)) {
-		spare->spare_pool = 0ULL;
-	}
-
+	spa_aux_remove(vd, &spa_spare_avl);
 	vd->vdev_isspare = B_FALSE;
 	mutex_exit(&spa_spare_lock);
 }
@@ -506,42 +560,81 @@
 boolean_t
 spa_spare_exists(uint64_t guid, uint64_t *pool)
 {
-	spa_spare_t search, *found;
-	avl_index_t where;
+	boolean_t found;
 
 	mutex_enter(&spa_spare_lock);
-
-	search.spare_guid = guid;
-	found = avl_find(&spa_spare_avl, &search, &where);
-
-	if (pool) {
-		if (found)
-			*pool = found->spare_pool;
-		else
-			*pool = 0ULL;
-	}
-
+	found = spa_aux_exists(guid, pool, &spa_spare_avl);
 	mutex_exit(&spa_spare_lock);
 
-	return (found != NULL);
+	return (found);
 }
 
 void
 spa_spare_activate(vdev_t *vd)
 {
-	spa_spare_t search, *found;
-	avl_index_t where;
-
 	mutex_enter(&spa_spare_lock);
 	ASSERT(vd->vdev_isspare);
+	spa_aux_activate(vd, &spa_spare_avl);
+	mutex_exit(&spa_spare_lock);
+}
 
-	search.spare_guid = vd->vdev_guid;
-	found = avl_find(&spa_spare_avl, &search, &where);
-	ASSERT(found != NULL);
-	ASSERT(found->spare_pool == 0ULL);
+/*
+ * Level 2 ARC devices are tracked globally for the same reasons as spares.
+ * Cache devices currently only support one pool per cache device, and so
+ * for these devices the aux reference count is currently unused beyond 1.
+ */
+
+static int
+spa_l2cache_compare(const void *a, const void *b)
+{
+	return (spa_aux_compare(a, b));
+}
+
+void
+spa_l2cache_add(vdev_t *vd)
+{
+	mutex_enter(&spa_l2cache_lock);
+	ASSERT(!vd->vdev_isl2cache);
+	spa_aux_add(vd, &spa_l2cache_avl);
+	vd->vdev_isl2cache = B_TRUE;
+	mutex_exit(&spa_l2cache_lock);
+}
 
-	found->spare_pool = spa_guid(vd->vdev_spa);
-	mutex_exit(&spa_spare_lock);
+void
+spa_l2cache_remove(vdev_t *vd)
+{
+	mutex_enter(&spa_l2cache_lock);
+	ASSERT(vd->vdev_isl2cache);
+	spa_aux_remove(vd, &spa_l2cache_avl);
+	vd->vdev_isl2cache = B_FALSE;
+	mutex_exit(&spa_l2cache_lock);
+}
+
+boolean_t
+spa_l2cache_exists(uint64_t guid, uint64_t *pool)
+{
+	boolean_t found;
+
+	mutex_enter(&spa_l2cache_lock);
+	found = spa_aux_exists(guid, pool, &spa_l2cache_avl);
+	mutex_exit(&spa_l2cache_lock);
+
+	return (found);
+}
+
+void
+spa_l2cache_activate(vdev_t *vd)
+{
+	mutex_enter(&spa_l2cache_lock);
+	ASSERT(vd->vdev_isl2cache);
+	spa_aux_activate(vd, &spa_l2cache_avl);
+	mutex_exit(&spa_l2cache_lock);
+}
+
+void
+spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc)
+{
+	vdev_space_update(vd, space, alloc, B_FALSE);
 }
 
 /*
@@ -1078,13 +1171,17 @@
 {
 	mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL);
 
 	avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
 	    offsetof(spa_t, spa_avl));
 
-	avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_spare_t),
-	    offsetof(spa_spare_t, spare_avl));
+	avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_aux_t),
+	    offsetof(spa_aux_t, aux_avl));
+
+	avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t),
+	    offsetof(spa_aux_t, aux_avl));
 
 	spa_mode = mode;
 
@@ -1111,10 +1208,12 @@
 
 	avl_destroy(&spa_namespace_avl);
 	avl_destroy(&spa_spare_avl);
+	avl_destroy(&spa_l2cache_avl);
 
 	cv_destroy(&spa_namespace_cv);
 	mutex_destroy(&spa_namespace_lock);
 	mutex_destroy(&spa_spare_lock);
+	mutex_destroy(&spa_l2cache_lock);
 }
 
 /*
--- a/usr/src/uts/common/fs/zfs/sys/arc.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h	Fri Nov 09 21:33:30 2007 -0800
@@ -36,6 +36,7 @@
 
 #include <sys/zio.h>
 #include <sys/dmu.h>
+#include <sys/spa.h>
 
 typedef struct arc_buf_hdr arc_buf_hdr_t;
 typedef struct arc_buf arc_buf_t;
@@ -106,6 +107,15 @@
 void arc_init(void);
 void arc_fini(void);
 
+/*
+ * Level 2 ARC
+ */
+
+void l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end);
+void l2arc_remove_vdev(vdev_t *vd);
+void l2arc_init(void);
+void l2arc_fini(void);
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Fri Nov 09 21:33:30 2007 -0800
@@ -200,6 +200,7 @@
 #define	DMU_POOL_DEFLATE		"deflate"
 #define	DMU_POOL_HISTORY		"history"
 #define	DMU_POOL_PROPS			"pool_props"
+#define	DMU_POOL_L2CACHE		"l2cache"
 
 /*
  * Allocate an object from this objset.  The range of object numbers
--- a/usr/src/uts/common/fs/zfs/sys/spa.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h	Fri Nov 09 21:33:30 2007 -0800
@@ -47,6 +47,7 @@
 typedef struct metaslab metaslab_t;
 typedef struct zilog zilog_t;
 typedef struct traverse_handle traverse_handle_t;
+typedef struct spa_aux_vdev spa_aux_vdev_t;
 struct dsl_pool;
 
 /*
@@ -356,6 +357,14 @@
 extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool);
 extern void spa_spare_activate(vdev_t *vd);
 
+/* L2ARC state (which is global across all pools) */
+extern void spa_l2cache_add(vdev_t *vd);
+extern void spa_l2cache_remove(vdev_t *vd);
+extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
+extern void spa_l2cache_activate(vdev_t *vd);
+extern void spa_l2cache_drop(spa_t *spa);
+extern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
+
 /* scrubbing */
 extern int spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force);
 extern void spa_scrub_suspend(spa_t *spa);
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Fri Nov 09 21:33:30 2007 -0800
@@ -58,6 +58,16 @@
 	uint64_t sh_records_lost;	/* num of records overwritten */
 } spa_history_phys_t;
 
+struct spa_aux_vdev {
+	uint64_t	sav_object;		/* MOS object for device list */
+	nvlist_t	*sav_config;		/* cached device config */
+	vdev_t		**sav_vdevs;		/* devices */
+	int		sav_count;		/* number devices */
+	boolean_t	sav_sync;		/* sync the device list */
+	nvlist_t	**sav_pending;		/* pending device additions */
+	uint_t		sav_npending;		/* # pending devices */
+};
+
 struct spa {
 	/*
 	 * Fields protected by spa_namespace_lock.
@@ -87,11 +97,8 @@
 	vdev_t		*spa_root_vdev;		/* top-level vdev container */
 	uint64_t	spa_load_guid;		/* initial guid for spa_load */
 	list_t		spa_dirty_list;		/* vdevs with dirty labels */
-	uint64_t	spa_spares_object;	/* MOS object for spare list */
-	nvlist_t	*spa_sparelist;		/* cached spare config */
-	vdev_t		**spa_spares;		/* available hot spares */
-	int		spa_nspares;		/* number of hot spares */
-	boolean_t	spa_sync_spares;	/* sync the spares list */
+	spa_aux_vdev_t	spa_spares;		/* hot spares */
+	spa_aux_vdev_t	spa_l2cache;		/* L2ARC cache devices */
 	uint64_t	spa_config_object;	/* MOS object for pool config */
 	uint64_t	spa_syncing_txg;	/* txg currently syncing */
 	uint64_t	spa_sync_bplist_obj;	/* object for deferred frees */
@@ -134,8 +141,6 @@
 	uint64_t	spa_history;		/* history object */
 	kmutex_t	spa_history_lock;	/* history lock */
 	vdev_t		*spa_pending_vdev;	/* pending vdev additions */
-	nvlist_t	**spa_pending_spares;	/* pending spare additions */
-	uint_t		spa_pending_nspares;	/* # pending spares */
 	kmutex_t	spa_props_lock;		/* property lock */
 	uint64_t	spa_pool_props_object;	/* object for properties */
 	uint64_t	spa_bootfs;		/* default boot filesystem */
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h	Fri Nov 09 21:33:30 2007 -0800
@@ -53,7 +53,7 @@
 extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
 extern void vdev_init(vdev_t *, uint64_t txg);
 extern void vdev_reopen(vdev_t *);
-extern int vdev_validate_spare(vdev_t *);
+extern int vdev_validate_aux(vdev_t *vd);
 extern int vdev_probe(vdev_t *);
 
 extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
@@ -69,6 +69,7 @@
 extern void vdev_metaslab_fini(vdev_t *vd);
 
 extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
+extern void vdev_clear_stats(vdev_t *vd);
 extern void vdev_stat_update(zio_t *zio);
 extern void vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type,
     boolean_t complete);
@@ -78,7 +79,7 @@
     vdev_aux_t aux);
 
 extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
-    int64_t alloc_delta);
+    int64_t alloc_delta, boolean_t update_root);
 
 extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
 
@@ -113,7 +114,7 @@
 extern int vdev_config_sync(vdev_t *vd, uint64_t txg);
 
 extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
-    boolean_t getstats, boolean_t isspare);
+    boolean_t getstats, boolean_t isspare, boolean_t isl2cache);
 
 /*
  * Label routines
@@ -127,7 +128,8 @@
 	VDEV_LABEL_CREATE,	/* create/add a new device */
 	VDEV_LABEL_REPLACE,	/* replace an existing device */
 	VDEV_LABEL_SPARE,	/* add a new hot spare */
-	VDEV_LABEL_REMOVE	/* remove an existing device */
+	VDEV_LABEL_REMOVE,	/* remove an existing device */
+	VDEV_LABEL_L2CACHE	/* add an L2ARC cache device */
 } vdev_labeltype_t;
 
 extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Fri Nov 09 21:33:30 2007 -0800
@@ -168,6 +168,7 @@
 	uint8_t		vdev_tmpoffline; /* device taken offline temporarily? */
 	uint8_t		vdev_detached;	/* device detached?		*/
 	uint64_t	vdev_isspare;	/* was a hot spare		*/
+	uint64_t	vdev_isl2cache;	/* was a l2cache device		*/
 	vdev_queue_t	vdev_queue;	/* I/O deadline schedule queue	*/
 	vdev_cache_t	vdev_cache;	/* physical block cache		*/
 	uint64_t	vdev_not_present; /* not present during import	*/
@@ -249,6 +250,7 @@
 #define	VDEV_ALLOC_LOAD		0
 #define	VDEV_ALLOC_ADD		1
 #define	VDEV_ALLOC_SPARE	2
+#define	VDEV_ALLOC_L2CACHE	3
 
 /*
  * Allocate or free a vdev
--- a/usr/src/uts/common/fs/zfs/sys/zio.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h	Fri Nov 09 21:33:30 2007 -0800
@@ -304,11 +304,13 @@
 
 extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
     uint64_t size, void *data, int checksum,
-    zio_done_func_t *done, void *private, int priority, int flags);
+    zio_done_func_t *done, void *private, int priority, int flags,
+    boolean_t labels);
 
 extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
     uint64_t size, void *data, int checksum,
-    zio_done_func_t *done, void *private, int priority, int flags);
+    zio_done_func_t *done, void *private, int priority, int flags,
+    boolean_t labels);
 
 extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
     blkptr_t *old_bp, uint64_t txg);
--- a/usr/src/uts/common/fs/zfs/vdev.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Fri Nov 09 21:33:30 2007 -0800
@@ -363,6 +363,9 @@
 	} else if (alloctype == VDEV_ALLOC_SPARE) {
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
 			return (EINVAL);
+	} else if (alloctype == VDEV_ALLOC_L2CACHE) {
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
+			return (EINVAL);
 	}
 
 	/*
@@ -550,6 +553,8 @@
 
 	if (vd->vdev_isspare)
 		spa_spare_remove(vd);
+	if (vd->vdev_isl2cache)
+		spa_l2cache_remove(vd);
 
 	txg_list_destroy(&vd->vdev_ms_list);
 	txg_list_destroy(&vd->vdev_dtl_list);
@@ -1367,14 +1372,14 @@
 }
 
 /*
- * This special case of vdev_spare() is used for hot spares.  It's sole purpose
- * it to set the vdev state for the associated vdev.  To do this, we make sure
- * that we can open the underlying device, then try to read the label, and make
- * sure that the label is sane and that it hasn't been repurposed to another
- * pool.
+ * The special vdev case is used for hot spares and l2cache devices.  Its
+ * sole purpose it to set the vdev state for the associated vdev.  To do this,
+ * we make sure that we can open the underlying device, then try to read the
+ * label, and make sure that the label is sane and that it hasn't been
+ * repurposed to another pool.
  */
 int
-vdev_validate_spare(vdev_t *vd)
+vdev_validate_aux(vdev_t *vd)
 {
 	nvlist_t *label;
 	uint64_t guid, version;
@@ -1397,8 +1402,6 @@
 		return (-1);
 	}
 
-	spa_spare_add(vd);
-
 	/*
 	 * We don't actually check the pool state here.  If it's in fact in
 	 * use by another pool, we update this fact on the fly when requested.
@@ -1855,6 +1858,16 @@
 }
 
 void
+vdev_clear_stats(vdev_t *vd)
+{
+	mutex_enter(&vd->vdev_stat_lock);
+	vd->vdev_stat.vs_space = 0;
+	vd->vdev_stat.vs_dspace = 0;
+	vd->vdev_stat.vs_alloc = 0;
+	mutex_exit(&vd->vdev_stat_lock);
+}
+
+void
 vdev_stat_update(zio_t *zio)
 {
 	vdev_t *vd = zio->io_vd;
@@ -1952,15 +1965,14 @@
  * Update the in-core space usage stats for this vdev and the root vdev.
  */
 void
-vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta)
+vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta,
+    boolean_t update_root)
 {
 	int64_t dspace_delta = space_delta;
 	spa_t *spa = vd->vdev_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 
 	ASSERT(vd == vd->vdev_top);
-	ASSERT(rvd == vd->vdev_parent);
-	ASSERT(vd->vdev_ms_count != 0);
 
 	/*
 	 * Apply the inverse of the psize-to-asize (ie. RAID-Z) space-expansion
@@ -1978,18 +1990,23 @@
 	vd->vdev_stat.vs_dspace += dspace_delta;
 	mutex_exit(&vd->vdev_stat_lock);
 
-	/*
-	 * Don't count non-normal (e.g. intent log) space as part of
-	 * the pool's capacity.
-	 */
-	if (vd->vdev_mg->mg_class != spa->spa_normal_class)
-		return;
+	if (update_root) {
+		ASSERT(rvd == vd->vdev_parent);
+		ASSERT(vd->vdev_ms_count != 0);
 
-	mutex_enter(&rvd->vdev_stat_lock);
-	rvd->vdev_stat.vs_space += space_delta;
-	rvd->vdev_stat.vs_alloc += alloc_delta;
-	rvd->vdev_stat.vs_dspace += dspace_delta;
-	mutex_exit(&rvd->vdev_stat_lock);
+		/*
+		 * Don't count non-normal (e.g. intent log) space as part of
+		 * the pool's capacity.
+		 */
+		if (vd->vdev_mg->mg_class != spa->spa_normal_class)
+			return;
+
+		mutex_enter(&rvd->vdev_stat_lock);
+		rvd->vdev_stat.vs_space += space_delta;
+		rvd->vdev_stat.vs_alloc += alloc_delta;
+		rvd->vdev_stat.vs_dspace += dspace_delta;
+		mutex_exit(&rvd->vdev_stat_lock);
+	}
 }
 
 /*
--- a/usr/src/uts/common/fs/zfs/vdev_label.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/vdev_label.c	Fri Nov 09 21:33:30 2007 -0800
@@ -169,7 +169,8 @@
 	    vdev_label_offset(vd->vdev_psize, l, offset),
 	    size, buf, ZIO_CHECKSUM_LABEL, done, private,
 	    ZIO_PRIORITY_SYNC_READ,
-	    ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE));
+	    ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
+	    B_TRUE));
 }
 
 static void
@@ -181,7 +182,8 @@
 	zio_nowait(zio_write_phys(zio, vd,
 	    vdev_label_offset(vd->vdev_psize, l, offset),
 	    size, buf, ZIO_CHECKSUM_LABEL, done, private,
-	    ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL));
+	    ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL,
+	    B_TRUE));
 }
 
 /*
@@ -189,7 +191,7 @@
  */
 nvlist_t *
 vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
-    boolean_t isspare)
+    boolean_t isspare, boolean_t isl2cache)
 {
 	nvlist_t *nv = NULL;
 
@@ -197,7 +199,7 @@
 
 	VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
 	    vd->vdev_ops->vdev_op_type) == 0);
-	if (!isspare)
+	if (!isspare && !isl2cache)
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id)
 		    == 0);
 	VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0);
@@ -245,7 +247,7 @@
 	if (vd->vdev_isspare)
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0);
 
-	if (!isspare && vd == vd->vdev_top) {
+	if (!isspare && !isl2cache && vd == vd->vdev_top) {
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
 		    vd->vdev_ms_array) == 0);
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
@@ -278,7 +280,7 @@
 
 		for (c = 0; c < vd->vdev_children; c++)
 			child[c] = vdev_config_generate(spa, vd->vdev_child[c],
-			    getstats, isspare);
+			    getstats, isspare, isl2cache);
 
 		VERIFY(nvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 		    child, vd->vdev_children) == 0);
@@ -357,7 +359,7 @@
  */
 static boolean_t
 vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
-    uint64_t *spare_guid)
+    uint64_t *spare_guid, uint64_t *l2cache_guid)
 {
 	spa_t *spa = vd->vdev_spa;
 	uint64_t state, pool_guid, device_guid, txg, spare_pool;
@@ -366,6 +368,8 @@
 
 	if (spare_guid)
 		*spare_guid = 0ULL;
+	if (l2cache_guid)
+		*l2cache_guid = 0ULL;
 
 	/*
 	 * Read the label, if any, and perform some basic sanity checks.
@@ -384,7 +388,7 @@
 		return (B_FALSE);
 	}
 
-	if (state != POOL_STATE_SPARE &&
+	if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
 	    (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID,
 	    &pool_guid) != 0 ||
 	    nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
@@ -400,9 +404,10 @@
 	 * be a part of.  The only way this is allowed is if the device is a hot
 	 * spare (which we check for later on).
 	 */
-	if (state != POOL_STATE_SPARE &&
+	if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
 	    !spa_guid_exists(pool_guid, device_guid) &&
-	    !spa_spare_exists(device_guid, NULL))
+	    !spa_spare_exists(device_guid, NULL) &&
+	    !spa_l2cache_exists(device_guid, NULL))
 		return (B_FALSE);
 
 	/*
@@ -412,13 +417,14 @@
 	 * user has attempted to add the same vdev multiple times in the same
 	 * transaction.
 	 */
-	if (state != POOL_STATE_SPARE && txg == 0 && vdtxg == crtxg)
+	if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
+	    txg == 0 && vdtxg == crtxg)
 		return (B_TRUE);
 
 	/*
 	 * Check to see if this is a spare device.  We do an explicit check for
 	 * spa_has_spare() here because it may be on our pending list of spares
-	 * to add.
+	 * to add.  We also check if it is an l2cache device.
 	 */
 	if (spa_spare_exists(device_guid, &spare_pool) ||
 	    spa_has_spare(spa, device_guid)) {
@@ -427,6 +433,7 @@
 
 		switch (reason) {
 		case VDEV_LABEL_CREATE:
+		case VDEV_LABEL_L2CACHE:
 			return (B_TRUE);
 
 		case VDEV_LABEL_REPLACE:
@@ -439,6 +446,12 @@
 	}
 
 	/*
+	 * Check to see if this is an l2cache device.
+	 */
+	if (spa_l2cache_exists(device_guid, NULL))
+		return (B_TRUE);
+
+	/*
 	 * If the device is marked ACTIVE, then this device is in use by another
 	 * pool on the system.
 	 */
@@ -466,7 +479,7 @@
 	char *buf;
 	size_t buflen;
 	int error;
-	uint64_t spare_guid;
+	uint64_t spare_guid, l2cache_guid;
 
 	ASSERT(spa_config_held(spa, RW_WRITER));
 
@@ -488,19 +501,20 @@
 	 * Determine if the vdev is in use.
 	 */
 	if (reason != VDEV_LABEL_REMOVE &&
-	    vdev_inuse(vd, crtxg, reason, &spare_guid))
+	    vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid))
 		return (EBUSY);
 
 	ASSERT(reason != VDEV_LABEL_REMOVE ||
-	    vdev_inuse(vd, crtxg, reason, NULL));
+	    vdev_inuse(vd, crtxg, reason, NULL, NULL));
 
 	/*
-	 * If this is a request to add or replace a spare that is in use
-	 * elsewhere on the system, then we must update the guid (which was
-	 * initialized to a random value) to reflect the actual GUID (which is
-	 * shared between multiple pools).
+	 * If this is a request to add or replace a spare or l2cache device
+	 * that is in use elsewhere on the system, then we must update the
+	 * guid (which was initialized to a random value) to reflect the
+	 * actual GUID (which is shared between multiple pools).
 	 */
-	if (reason != VDEV_LABEL_REMOVE && spare_guid != 0ULL) {
+	if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_L2CACHE &&
+	    spare_guid != 0ULL) {
 		vdev_t *pvd = vd->vdev_parent;
 
 		for (; pvd != NULL; pvd = pvd->vdev_parent) {
@@ -520,6 +534,27 @@
 		ASSERT(reason == VDEV_LABEL_REPLACE);
 	}
 
+	if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE &&
+	    l2cache_guid != 0ULL) {
+		vdev_t *pvd = vd->vdev_parent;
+
+		for (; pvd != NULL; pvd = pvd->vdev_parent) {
+			pvd->vdev_guid_sum -= vd->vdev_guid;
+			pvd->vdev_guid_sum += l2cache_guid;
+		}
+
+		vd->vdev_guid = vd->vdev_guid_sum = l2cache_guid;
+
+		/*
+		 * If this is a replacement, then we want to fallthrough to the
+		 * rest of the code.  If we're adding an l2cache, then it's
+		 * already labeled appropriately and we can just return.
+		 */
+		if (reason == VDEV_LABEL_L2CACHE)
+			return (0);
+		ASSERT(reason == VDEV_LABEL_REPLACE);
+	}
+
 	/*
 	 * Initialize its label.
 	 */
@@ -549,6 +584,19 @@
 		    POOL_STATE_SPARE) == 0);
 		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
 		    vd->vdev_guid) == 0);
+	} else if (reason == VDEV_LABEL_L2CACHE ||
+	    (reason == VDEV_LABEL_REMOVE && vd->vdev_isl2cache)) {
+		/*
+		 * For level 2 ARC devices, add a special label.
+		 */
+		VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION,
+		    spa_version(spa)) == 0);
+		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE,
+		    POOL_STATE_L2CACHE) == 0);
+		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
+		    vd->vdev_guid) == 0);
 	} else {
 		label = spa_config_generate(spa, vd, 0ULL, B_FALSE);
 
@@ -623,13 +671,19 @@
 	/*
 	 * If this vdev hasn't been previously identified as a spare, then we
 	 * mark it as such only if a) we are labeling it as a spare, or b) it
-	 * exists as a spare elsewhere in the system.
+	 * exists as a spare elsewhere in the system.  Do the same for
+	 * level 2 ARC devices.
 	 */
 	if (error == 0 && !vd->vdev_isspare &&
 	    (reason == VDEV_LABEL_SPARE ||
 	    spa_spare_exists(vd->vdev_guid, NULL)))
 		spa_spare_add(vd);
 
+	if (error == 0 && !vd->vdev_isl2cache &&
+	    (reason == VDEV_LABEL_L2CACHE ||
+	    spa_l2cache_exists(vd->vdev_guid, NULL)))
+		spa_l2cache_add(vd);
+
 	return (error);
 }
 
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Fri Nov 09 21:33:30 2007 -0800
@@ -963,23 +963,30 @@
 {
 	spa_t *spa;
 	int error;
-	nvlist_t *config;
+	nvlist_t *config, **l2cache;
+	uint_t nl2cache;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
+	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
+	    &config);
+	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
+	    &l2cache, &nl2cache);
+
 	/*
 	 * A root pool with concatenated devices is not supported.
 	 * Thus, can not add a device to a root pool with one device.
+	 * Allow for l2cache devices to be added.
 	 */
-	if (spa->spa_root_vdev->vdev_children == 1 && spa->spa_bootfs != 0) {
+	if (spa->spa_root_vdev->vdev_children == 1 && spa->spa_bootfs != 0 &&
+	    nl2cache == 0) {
 		spa_close(spa, FTAG);
 		return (EDOM);
 	}
 
-	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
-	    &config)) == 0) {
+	if (error == 0) {
 		error = spa_vdev_add(spa, config);
 		nvlist_free(config);
 	}
@@ -2573,9 +2580,26 @@
 	if (zc->zc_guid == 0) {
 		vd = NULL;
 	} else if ((vd = spa_lookup_by_guid(spa, zc->zc_guid)) == NULL) {
-		(void) spa_vdev_exit(spa, NULL, txg, ENODEV);
-		spa_close(spa, FTAG);
-		return (ENODEV);
+		spa_aux_vdev_t *sav;
+		int i;
+
+		/*
+		 * Check if this is an l2cache device.
+		 */
+		ASSERT(spa != NULL);
+		sav = &spa->spa_l2cache;
+		for (i = 0; i < sav->sav_count; i++) {
+			if (sav->sav_vdevs[i]->vdev_guid == zc->zc_guid) {
+				vd = sav->sav_vdevs[i];
+				break;
+			}
+		}
+
+		if (vd == NULL) {
+			(void) spa_vdev_exit(spa, NULL, txg, ENODEV);
+			spa_close(spa, FTAG);
+			return (ENODEV);
+		}
 	}
 
 	vdev_clear(spa, vd, B_TRUE);
--- a/usr/src/uts/common/fs/zfs/zio.c	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/zio.c	Fri Nov 09 21:33:30 2007 -0800
@@ -675,7 +675,7 @@
 
 static void
 zio_phys_bp_init(vdev_t *vd, blkptr_t *bp, uint64_t offset, uint64_t size,
-    int checksum)
+    int checksum, boolean_t labels)
 {
 	ASSERT(vd->vdev_children == 0);
 
@@ -683,8 +683,12 @@
 	ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
 	ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
 
-	ASSERT(offset + size <= VDEV_LABEL_START_SIZE ||
-	    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
+#ifdef ZFS_DEBUG
+	if (labels) {
+		ASSERT(offset + size <= VDEV_LABEL_START_SIZE ||
+		    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
+	}
+#endif
 	ASSERT3U(offset + size, <=, vd->vdev_psize);
 
 	BP_ZERO(bp);
@@ -703,14 +707,14 @@
 zio_t *
 zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
     void *data, int checksum, zio_done_func_t *done, void *private,
-    int priority, int flags)
+    int priority, int flags, boolean_t labels)
 {
 	zio_t *zio;
 	blkptr_t blk;
 
 	ZIO_ENTER(vd->vdev_spa);
 
-	zio_phys_bp_init(vd, &blk, offset, size, checksum);
+	zio_phys_bp_init(vd, &blk, offset, size, checksum, labels);
 
 	zio = zio_create(pio, vd->vdev_spa, 0, &blk, data, size, done, private,
 	    ZIO_TYPE_READ, priority, flags | ZIO_FLAG_PHYSICAL,
@@ -730,7 +734,7 @@
 zio_t *
 zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
     void *data, int checksum, zio_done_func_t *done, void *private,
-    int priority, int flags)
+    int priority, int flags, boolean_t labels)
 {
 	zio_block_tail_t *zbt;
 	void *wbuf;
@@ -739,7 +743,7 @@
 
 	ZIO_ENTER(vd->vdev_spa);
 
-	zio_phys_bp_init(vd, &blk, offset, size, checksum);
+	zio_phys_bp_init(vd, &blk, offset, size, checksum, labels);
 
 	zio = zio_create(pio, vd->vdev_spa, 0, &blk, data, size, done, private,
 	    ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_PHYSICAL,
--- a/usr/src/uts/common/sys/fs/zfs.h	Fri Nov 09 18:46:13 2007 -0800
+++ b/usr/src/uts/common/sys/fs/zfs.h	Fri Nov 09 21:33:30 2007 -0800
@@ -219,14 +219,15 @@
 #define	SPA_VERSION_7			7ULL
 #define	SPA_VERSION_8			8ULL
 #define	SPA_VERSION_9			9ULL
+#define	SPA_VERSION_10			10ULL
 
 /*
  * When bumping up SPA_VERSION, make sure GRUB ZFS understand the on-disk
  * format change. Go to usr/src/grub/grub-0.95/stage2/{zfs-include/, fsys_zfs*},
  * and do the appropriate changes.
  */
-#define	SPA_VERSION			SPA_VERSION_9
-#define	SPA_VERSION_STRING		"9"
+#define	SPA_VERSION			SPA_VERSION_10
+#define	SPA_VERSION_STRING		"10"
 
 /*
  * Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -256,6 +257,7 @@
 #define	SPA_VERSION_REFRESERVATION	SPA_VERSION_9
 #define	SPA_VERSION_REFQUOTA		SPA_VERSION_9
 #define	SPA_VERSION_UNIQUE_ACCURATE	SPA_VERSION_9
+#define	SPA_VERSION_L2CACHE		SPA_VERSION_10
 
 /*
  * ZPL version - rev'd whenever an incompatible on-disk format change
@@ -312,6 +314,7 @@
 #define	ZPOOL_CONFIG_UNSPARE		"unspare"
 #define	ZPOOL_CONFIG_PHYS_PATH		"phys_path"
 #define	ZPOOL_CONFIG_IS_LOG		"is_log"
+#define	ZPOOL_CONFIG_L2CACHE		"l2cache"
 /*
  * The persistent vdev state is stored as separate values rather than a single
  * 'vdev_state' entry.  This is because a device can be in multiple states, such
@@ -331,6 +334,7 @@
 #define	VDEV_TYPE_MISSING		"missing"
 #define	VDEV_TYPE_SPARE			"spare"
 #define	VDEV_TYPE_LOG			"log"
+#define	VDEV_TYPE_L2CACHE		"l2cache"
 
 /*
  * This is needed in userland to report the minimum necessary device size.
@@ -384,14 +388,16 @@
 
 /*
  * pool state.  The following states are written to disk as part of the normal
- * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE.  The remaining states are
- * software abstractions used at various levels to communicate pool state.
+ * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE, L2CACHE.  The remaining
+ * states are software abstractions used at various levels to communicate
+ * pool state.
  */
 typedef enum pool_state {
 	POOL_STATE_ACTIVE = 0,		/* In active use		*/
 	POOL_STATE_EXPORTED,		/* Explicitly exported		*/
 	POOL_STATE_DESTROYED,		/* Explicitly destroyed		*/
 	POOL_STATE_SPARE,		/* Reserved for hot spare use	*/
+	POOL_STATE_L2CACHE,		/* Level 2 ARC device		*/
 	POOL_STATE_UNINITIALIZED,	/* Internal spa_t state		*/
 	POOL_STATE_IO_FAILURE,		/* Internal pool state		*/
 	POOL_STATE_UNAVAIL,		/* Internal libzfs state	*/