PSARC 2008/486 Intent log replay failure handling
authorperrin
Mon, 04 Aug 2008 20:36:57 -0700
changeset 7294 c9c31ef4c960
parent 7293 7e3aaccffc51
child 7295 0a40d4215127
PSARC 2008/486 Intent log replay failure handling 6707530 log device failure needs some work
usr/src/cmd/fm/dicts/ZFS.dict
usr/src/cmd/fm/dicts/ZFS.po
usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c
usr/src/cmd/mdb/common/modules/zfs/zfs.c
usr/src/cmd/zpool/zpool_main.c
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_pool.c
usr/src/lib/libzfs/common/libzfs_status.c
usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c
usr/src/uts/common/fs/zfs/arc.c
usr/src/uts/common/fs/zfs/dmu_objset.c
usr/src/uts/common/fs/zfs/spa.c
usr/src/uts/common/fs/zfs/sys/spa_impl.h
usr/src/uts/common/fs/zfs/sys/zil.h
usr/src/uts/common/fs/zfs/zfs_acl.c
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/fs/zfs/zfs_vnops.c
usr/src/uts/common/fs/zfs/zil.c
usr/src/uts/common/fs/zfs/zvol.c
usr/src/uts/common/sys/fm/fs/zfs.h
usr/src/uts/common/sys/fs/zfs.h
--- a/usr/src/cmd/fm/dicts/ZFS.dict	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/cmd/fm/dicts/ZFS.dict	Mon Aug 04 20:36:57 2008 -0700
@@ -45,3 +45,4 @@
 fault.fs.zfs.vdev.checksum=15
 fault.fs.zfs.io_failure_wait=16
 fault.fs.zfs.io_failure_continue=17
+fault.fs.zfs.log_replay=18
--- a/usr/src/cmd/fm/dicts/ZFS.po	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/cmd/fm/dicts/ZFS.po	Mon Aug 04 20:36:57 2008 -0700
@@ -297,3 +297,20 @@
 msgstr "Read and write I/Os cannot be serviced."
 msgid "ZFS-8000-JQ.action"
 msgstr "Make sure the affected devices are connected, then run\n	    'zpool clear'."
+#
+# code: ZFS-8000-K4
+# keys: fault.fs.zfs.log_replay
+#
+msgid "ZFS-8000-K4.type"
+msgstr "Error"
+msgid "ZFS-8000-K4.severity"
+msgstr "Major"
+msgid "ZFS-8000-K4.description"
+msgstr "A ZFS intent log device could not be read.  Refer to %s for more information."
+msgid "ZFS-8000-K4.response"
+msgstr "No automated response will be taken."
+msgid "ZFS-8000-K4.impact"
+msgstr "The intent log(s) cannot be replayed."
+msgid "ZFS-8000-K4.action"
+msgstr "Either restore the affected device(s) and run 'zpool online',\n	    or ignore the intent log records by running 'zpool clear'."
+
--- a/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c	Mon Aug 04 20:36:57 2008 -0700
@@ -563,6 +563,12 @@
 		}
 
 		zfs_case_solve(hdl, zcp, "fault.fs.zfs.pool", B_TRUE);
+	} else if (fmd_nvl_class_match(hdl, nvl,
+	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_LOG_REPLAY))) {
+		/*
+		 * Pool level fault for reading the intent logs.
+		 */
+		zfs_case_solve(hdl, zcp, "fault.fs.zfs.log_replay", B_TRUE);
 	} else if (fmd_nvl_class_match(hdl, nvl, "ereport.fs.zfs.vdev.*")) {
 		/*
 		 * Device fault.  If this occurred during pool open, then defer
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Mon Aug 04 20:36:57 2008 -0700
@@ -1123,6 +1123,24 @@
 		case VDEV_AUX_BAD_LABEL:
 			aux = "BAD_LABEL";
 			break;
+		case VDEV_AUX_VERSION_NEWER:
+			aux = "VERS_NEWER";
+			break;
+		case VDEV_AUX_VERSION_OLDER:
+			aux = "VERS_OLDER";
+			break;
+		case VDEV_AUX_SPARED:
+			aux = "SPARED";
+			break;
+		case VDEV_AUX_ERR_EXCEEDED:
+			aux = "ERR_EXCEEDED";
+			break;
+		case VDEV_AUX_IO_FAILURE:
+			aux = "IO_FAILURE";
+			break;
+		case VDEV_AUX_BAD_LOG:
+			aux = "BAD_LOG";
+			break;
 		default:
 			aux = "UNKNOWN";
 			break;
--- a/usr/src/cmd/zpool/zpool_main.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/cmd/zpool/zpool_main.c	Mon Aug 04 20:36:57 2008 -0700
@@ -1142,16 +1142,23 @@
 		(void) printf(gettext("status: The pool is formatted using an "
 		    "incompatible version.\n"));
 		break;
+
 	case ZPOOL_STATUS_HOSTID_MISMATCH:
 		(void) printf(gettext("status: The pool was last accessed by "
 		    "another system.\n"));
 		break;
+
 	case ZPOOL_STATUS_FAULTED_DEV_R:
 	case ZPOOL_STATUS_FAULTED_DEV_NR:
 		(void) printf(gettext("status: One or more devices are "
 		    "faulted.\n"));
 		break;
 
+	case ZPOOL_STATUS_BAD_LOG:
+		(void) printf(gettext("status: An intent log record cannot be "
+		    "read.\n"));
+		break;
+
 	default:
 		/*
 		 * No other status can be seen when importing pools.
@@ -2553,7 +2560,7 @@
 	pool = argv[1];
 	device = argc == 3 ? argv[2] : NULL;
 
-	if ((zhp = zpool_open(g_zfs, pool)) == NULL)
+	if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)
 		return (1);
 
 	if (zpool_clear(zhp, device) != 0)
@@ -2840,6 +2847,10 @@
 			(void) printf(gettext("experienced I/O failures"));
 			break;
 
+		case VDEV_AUX_BAD_LOG:
+			(void) printf(gettext("bad intent log"));
+			break;
+
 		default:
 			(void) printf(gettext("corrupted data"));
 			break;
@@ -3130,6 +3141,17 @@
 		    "are connected, then run 'zpool clear'.\n"));
 		break;
 
+	case ZPOOL_STATUS_BAD_LOG:
+		(void) printf(gettext("status: An intent log record "
+		    "could not be read.\n"
+		    "\tWaiting for adminstrator intervention to fix the "
+		    "faulted pool.\n"));
+		(void) printf(gettext("action: Either restore the affected "
+		    "device(s) and run 'zpool online',\n"
+		    "\tor ignore the intent log records by running "
+		    "'zpool clear'.\n"));
+		break;
+
 	default:
 		/*
 		 * The remaining errors can't actually be generated, yet.
--- a/usr/src/lib/libzfs/common/libzfs.h	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/lib/libzfs/common/libzfs.h	Mon Aug 04 20:36:57 2008 -0700
@@ -260,6 +260,7 @@
 	ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
 	ZPOOL_STATUS_FAULTED_DEV_R,	/* faulted device with replicas */
 	ZPOOL_STATUS_FAULTED_DEV_NR,	/* faulted device with no replicas */
+	ZPOOL_STATUS_BAD_LOG,		/* cannot read log chain(s) */
 
 	/*
 	 * The following are not faults per se, but still an error possibly
--- a/usr/src/lib/libzfs/common/libzfs_pool.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c	Mon Aug 04 20:36:57 2008 -0700
@@ -137,8 +137,21 @@
 	uint64_t value;
 	zprop_source_t source;
 
-	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
+	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
+		/*
+		 * zpool_get_all_props() has most likely failed because
+		 * the pool is faulted, but if all we need is the top level
+		 * vdev's guid then get it from the zhp config nvlist.
+		 */
+		if ((prop == ZPOOL_PROP_GUID) &&
+		    (nvlist_lookup_nvlist(zhp->zpool_config,
+		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
+		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
+		    == 0)) {
+			return (value);
+		}
 		return (zpool_prop_default_numeric(prop));
+	}
 
 	nvl = zhp->zpool_props;
 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
@@ -169,7 +182,7 @@
 	case VDEV_STATE_REMOVED:
 		return (gettext("REMOVED"));
 	case VDEV_STATE_CANT_OPEN:
-		if (aux == VDEV_AUX_CORRUPT_DATA)
+		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
 			return (gettext("FAULTED"));
 		else
 			return (gettext("UNAVAIL"));
--- a/usr/src/lib/libzfs/common/libzfs_status.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_status.c	Mon Aug 04 20:36:57 2008 -0700
@@ -64,7 +64,8 @@
 	"ZFS-8000-A5",
 	"ZFS-8000-EY",
 	"ZFS-8000-HC",
-	"ZFS-8000-JQ"
+	"ZFS-8000-JQ",
+	"ZFS-8000-K4",
 };
 
 #define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
@@ -243,6 +244,14 @@
 	}
 
 	/*
+	 * Could not read a log.
+	 */
+	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
+	    vs->vs_aux == VDEV_AUX_BAD_LOG) {
+		return (ZPOOL_STATUS_BAD_LOG);
+	}
+
+	/*
 	 * Bad devices in non-replicated config.
 	 */
 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c	Mon Aug 04 20:36:57 2008 -0700
@@ -133,6 +133,7 @@
 	{ ZPOOL_STATUS_HOSTID_MISMATCH, "ZPOOL_STATUS_HOSTID_MISMATCH" },
 	{ ZPOOL_STATUS_FAULTED_DEV_R, "ZPOOL_STATUS_FAULTED_DEV_R" },
 	{ ZPOOL_STATUS_FAULTED_DEV_NR, "ZPOOL_STATUS_FAULTED_DEV_NR" },
+	{ ZPOOL_STATUS_BAD_LOG, "ZPOOL_STATUS_BAD_LOG" },
 	{ ZPOOL_STATUS_VERSION_OLDER, "ZPOOL_STATUS_VERSION_OLDER" },
 	{ ZPOOL_STATUS_RESILVERING, "ZPOOL_STATUS_RESILVERING" },
 	{ ZPOOL_STATUS_OFFLINE_DEV, "ZPOOL_STATUS_OFFLINE_DEV" },
--- a/usr/src/uts/common/fs/zfs/arc.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/arc.c	Mon Aug 04 20:36:57 2008 -0700
@@ -2338,9 +2338,6 @@
 		if (HDR_IN_HASH_TABLE(hdr))
 			buf_hash_remove(hdr);
 		freeable = refcount_is_zero(&hdr->b_refcnt);
-		/* convert checksum errors into IO errors */
-		if (zio->io_error == ECKSUM)
-			zio->io_error = EIO;
 	}
 
 	/*
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c	Mon Aug 04 20:36:57 2008 -0700
@@ -207,6 +207,9 @@
 		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
 		if (err) {
 			kmem_free(osi, sizeof (objset_impl_t));
+			/* convert checksum errors into IO errors */
+			if (err == ECKSUM)
+				err = EIO;
 			return (err);
 		}
 		osi->os_phys = osi->os_phys_buf->b_data;
--- a/usr/src/uts/common/fs/zfs/spa.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/spa.c	Mon Aug 04 20:36:57 2008 -0700
@@ -956,6 +956,32 @@
 }
 
 /*
+ * Check for missing log devices
+ */
+int
+spa_check_logs(spa_t *spa)
+{
+	switch (spa->spa_log_state) {
+	case SPA_LOG_MISSING:
+		/* need to recheck in case slog has been restored */
+	case SPA_LOG_UNKNOWN:
+		if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
+		    DS_FIND_CHILDREN)) {
+			spa->spa_log_state = SPA_LOG_MISSING;
+			return (1);
+		}
+		break;
+
+	case SPA_LOG_CLEAR:
+		(void) dmu_objset_find(spa->spa_name, zil_clear_log_chain, NULL,
+		    DS_FIND_CHILDREN);
+		break;
+	}
+	spa->spa_log_state = SPA_LOG_GOOD;
+	return (0);
+}
+
+/*
  * Load an existing storage pool, using the pool's builtin spa_config as a
  * source of configuration information.
  */
@@ -971,6 +997,7 @@
 	uint64_t version;
 	zio_t *zio;
 	uint64_t autoreplace = 0;
+	char *ereport = FM_EREPORT_ZFS_POOL;
 
 	spa->spa_load_state = state;
 
@@ -1259,6 +1286,15 @@
 		spa_config_exit(spa, FTAG);
 	}
 
+	if (spa_check_logs(spa)) {
+		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
+		    VDEV_AUX_BAD_LOG);
+		error = ENXIO;
+		ereport = FM_EREPORT_ZFS_LOG_REPLAY;
+		goto out;
+	}
+
+
 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
 
 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
@@ -1368,7 +1404,7 @@
 out:
 	spa->spa_minref = refcount_count(&spa->spa_refcount);
 	if (error && error != EBADF)
-		zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0);
+		zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
 	spa->spa_load_state = SPA_LOAD_NONE;
 	spa->spa_ena = 0;
 
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Mon Aug 04 20:36:57 2008 -0700
@@ -80,6 +80,13 @@
 	char		*scd_path;
 } spa_config_dirent_t;
 
+typedef enum spa_log_state {
+	SPA_LOG_UNKNOWN = 0,	/* unknown log state */
+	SPA_LOG_MISSING,	/* missing log(s) */
+	SPA_LOG_CLEAR,		/* clear the log(s) */
+	SPA_LOG_GOOD,		/* log(s) are good */
+} spa_log_state_t;
+
 struct spa {
 	/*
 	 * Fields protected by spa_namespace_lock.
@@ -159,6 +166,7 @@
 	boolean_t	spa_import_faulted;	/* allow faulted vdevs */
 	boolean_t	spa_is_root;		/* pool is root */
 	int		spa_minref;		/* num refs when first opened */
+	spa_log_state_t spa_log_state;		/* log state */
 	/*
 	 * spa_refcnt & spa_config_lock must be the last elements
 	 * because refcount_t changes size based on compilation options.
--- a/usr/src/uts/common/fs/zfs/sys/zil.h	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zil.h	Mon Aug 04 20:36:57 2008 -0700
@@ -362,6 +362,8 @@
 extern void	zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
 
 extern int	zil_claim(char *osname, void *txarg);
+extern int	zil_check_log_chain(char *osname, void *txarg);
+extern int	zil_clear_log_chain(char *osname, void *txarg);
 extern void	zil_sync(zilog_t *zilog, dmu_tx_t *tx);
 extern void	zil_clean(zilog_t *zilog);
 extern int	zil_is_committed(zilog_t *zilog);
--- a/usr/src/uts/common/fs/zfs/zfs_acl.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_acl.c	Mon Aug 04 20:36:57 2008 -0700
@@ -972,6 +972,9 @@
 
 	if (error != 0) {
 		zfs_acl_free(aclp);
+		/* convert checksum errors into IO errors */
+		if (error == ECKSUM)
+			error = EIO;
 		return (error);
 	}
 
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Mon Aug 04 20:36:57 2008 -0700
@@ -2669,6 +2669,21 @@
 	uint64_t txg;
 	int error;
 
+	/*
+	 * On zpool clear we also fix up missing slogs
+	 */
+	mutex_enter(&spa_namespace_lock);
+	spa = spa_lookup(zc->zc_name);
+	if (spa == NULL) {
+		mutex_exit(&spa_namespace_lock);
+		return (EIO);
+	}
+	if (spa->spa_log_state == SPA_LOG_MISSING) {
+		/* we need to let spa_open/spa_load clear the chains */
+		spa->spa_log_state = SPA_LOG_CLEAR;
+	}
+	mutex_exit(&spa_namespace_lock);
+
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c	Mon Aug 04 20:36:57 2008 -0700
@@ -501,8 +501,12 @@
 			error = mappedread(vp, nbytes, uio);
 		else
 			error = dmu_read_uio(os, zp->z_id, uio, nbytes);
-		if (error)
+		if (error) {
+			/* convert checksum errors into IO errors */
+			if (error == ECKSUM)
+				error = EIO;
 			break;
+		}
 
 		n -= nbytes;
 	}
@@ -3897,6 +3901,9 @@
 		if (err) {
 			/* On error, toss the entire kluster */
 			pvn_read_done(pp, B_ERROR);
+			/* convert checksum errors into IO errors */
+			if (err == ECKSUM)
+				err = EIO;
 			return (err);
 		}
 		cur_pp = cur_pp->p_next;
--- a/usr/src/uts/common/fs/zfs/zil.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/zil.c	Mon Aug 04 20:36:57 2008 -0700
@@ -505,7 +505,7 @@
 
 	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
 	if (error) {
-		cmn_err(CE_WARN, "can't process intent log for %s", osname);
+		cmn_err(CE_WARN, "can't open objset for %s", osname);
 		return (0);
 	}
 
@@ -532,6 +532,83 @@
 	return (0);
 }
 
+/*
+ * Check the log by walking the log chain.
+ * Checksum errors are ok as they indicate the end of the chain.
+ * Any other error (no device or read failure) returns an error.
+ */
+/* ARGSUSED */
+int
+zil_check_log_chain(char *osname, void *txarg)
+{
+	zilog_t *zilog;
+	zil_header_t *zh;
+	blkptr_t blk;
+	arc_buf_t *abuf;
+	objset_t *os;
+	char *lrbuf;
+	zil_trailer_t *ztp;
+	int error;
+
+	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
+	if (error) {
+		cmn_err(CE_WARN, "can't open objset for %s", osname);
+		return (0);
+	}
+
+	zilog = dmu_objset_zil(os);
+	zh = zil_header_in_syncing_context(zilog);
+	blk = zh->zh_log;
+	if (BP_IS_HOLE(&blk)) {
+		dmu_objset_close(os);
+		return (0); /* no chain */
+	}
+
+	for (;;) {
+		error = zil_read_log_block(zilog, &blk, &abuf);
+		if (error)
+			break;
+		lrbuf = abuf->b_data;
+		ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1;
+		blk = ztp->zit_next_blk;
+		VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
+	}
+	dmu_objset_close(os);
+	if (error == ECKSUM)
+		return (0); /* normal end of chain */
+	return (error);
+}
+
+/*
+ * Clear a log chain
+ */
+/* ARGSUSED */
+int
+zil_clear_log_chain(char *osname, void *txarg)
+{
+	zilog_t *zilog;
+	zil_header_t *zh;
+	objset_t *os;
+	dmu_tx_t *tx;
+	int error;
+
+	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
+	if (error) {
+		cmn_err(CE_WARN, "can't open objset for %s", osname);
+		return (0);
+	}
+
+	zilog = dmu_objset_zil(os);
+	tx = dmu_tx_create(zilog->zl_os);
+	(void) dmu_tx_assign(tx, TXG_WAIT);
+	zh = zil_header_in_syncing_context(zilog);
+	BP_ZERO(&zh->zh_log);
+	dsl_dataset_dirty(dmu_objset_ds(os), tx);
+	dmu_tx_commit(tx);
+	dmu_objset_close(os);
+	return (0);
+}
+
 static int
 zil_vdev_compare(const void *x1, const void *x2)
 {
--- a/usr/src/uts/common/fs/zfs/zvol.c	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/zvol.c	Mon Aug 04 20:36:57 2008 -0700
@@ -1284,8 +1284,12 @@
 				dmu_tx_commit(tx);
 			}
 		}
-		if (error)
+		if (error) {
+			/* convert checksum errors into IO errors */
+			if (error == ECKSUM)
+				error = EIO;
 			break;
+		}
 		off += size;
 		addr += size;
 		resid -= size;
@@ -1388,8 +1392,12 @@
 			bytes = volsize - uio->uio_loffset;
 
 		error =  dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes);
-		if (error)
+		if (error) {
+			/* convert checksum errors into IO errors */
+			if (error == ECKSUM)
+				error = EIO;
 			break;
+		}
 	}
 	zfs_range_unlock(rl);
 	return (error);
--- a/usr/src/uts/common/sys/fm/fs/zfs.h	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/sys/fm/fs/zfs.h	Mon Aug 04 20:36:57 2008 -0700
@@ -47,6 +47,7 @@
 #define	FM_EREPORT_ZFS_DEVICE_BAD_LABEL		"vdev.bad_label"
 #define	FM_EREPORT_ZFS_IO_FAILURE		"io_failure"
 #define	FM_EREPORT_ZFS_PROBE_FAILURE		"probe_failure"
+#define	FM_EREPORT_ZFS_LOG_REPLAY		"log_replay"
 
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL		"pool"
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE	"pool_failmode"
--- a/usr/src/uts/common/sys/fs/zfs.h	Mon Aug 04 19:27:44 2008 -0700
+++ b/usr/src/uts/common/sys/fs/zfs.h	Mon Aug 04 20:36:57 2008 -0700
@@ -108,7 +108,7 @@
 
 /*
  * Pool properties are identified by these constants and must be added to the
- * end of this list to ensure that external conumsers are not affected
+ * end of this list to ensure that external consumers are not affected
  * by the change. If you make any changes to this list, be sure to update
  * the property table in usr/src/common/zfs/zpool_prop.c.
  */
@@ -409,7 +409,8 @@
 	VDEV_AUX_VERSION_OLDER,	/* on-disk version is too old		*/
 	VDEV_AUX_SPARED,	/* hot spare used in another pool	*/
 	VDEV_AUX_ERR_EXCEEDED,	/* too many errors			*/
-	VDEV_AUX_IO_FAILURE	/* experienced I/O failure		*/
+	VDEV_AUX_IO_FAILURE,	/* experienced I/O failure		*/
+	VDEV_AUX_BAD_LOG	/* cannot read log chain(s)		*/
 } vdev_aux_t;
 
 /*