6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful
authorek110237
Wed, 30 Apr 2008 12:37:56 -0700
changeset 6523 c1d2a7f04573
parent 6522 26439431f254
child 6524 3609a6f77afc
6616739 panic message ZFS: I/O failure (write on <unknown> is not very helpful 6623234 better FMA integration for 'failmode' property 6680971 blkbuf is leaked in zio_assess() on DEBUG bits for truly failed I/Os 6685865 vdev_disk_probe_io()/vdev_file_probe_io() dvd's setting is wrong
usr/src/cmd/fm/dicts/ZFS.dict
usr/src/cmd/fm/dicts/ZFS.po
usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c
usr/src/cmd/zpool/zpool_main.c
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_status.c
usr/src/uts/common/fs/zfs/sys/zio.h
usr/src/uts/common/fs/zfs/vdev.c
usr/src/uts/common/fs/zfs/vdev_disk.c
usr/src/uts/common/fs/zfs/vdev_file.c
usr/src/uts/common/fs/zfs/zfs_fm.c
usr/src/uts/common/fs/zfs/zio.c
usr/src/uts/common/sys/fm/fs/zfs.h
usr/src/uts/common/sys/fs/zfs.h
--- a/usr/src/cmd/fm/dicts/ZFS.dict	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/cmd/fm/dicts/ZFS.dict	Wed Apr 30 12:37:56 2008 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 # CDDL HEADER START
@@ -43,3 +43,5 @@
 ereport.fs.zfs.pool.hostname_mismatch=13
 fault.fs.zfs.vdev.io=14
 fault.fs.zfs.vdev.checksum=15
+fault.fs.zfs.io_failure_wait=16
+fault.fs.zfs.io_failure_continue=17
--- a/usr/src/cmd/fm/dicts/ZFS.po	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/cmd/fm/dicts/ZFS.po	Wed Apr 30 12:37:56 2008 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 # CDDL HEADER START
@@ -265,3 +265,35 @@
 msgstr "Fault tolerance of the pool may be compromised."
 msgid "ZFS-8000-GH.action"
 msgstr "Run 'zpool status -x' and replace the bad device."
+#
+# code: ZFS-8000-HC
+# keys: fault.fs.zfs.io_failure_wait
+#
+msgid "ZFS-8000-HC.type"
+msgstr "Error"
+msgid "ZFS-8000-HC.severity"
+msgstr "Major"
+msgid "ZFS-8000-HC.description"
+msgstr "The ZFS pool has experienced currently unrecoverable I/O failures.  Refer to %s for more information."
+msgid "ZFS-8000-HC.response"
+msgstr "No automated response will be taken."
+msgid "ZFS-8000-HC.impact"
+msgstr "Read and write I/Os cannot be serviced."
+msgid "ZFS-8000-HC.action"
+msgstr "\n\nThe pool has experienced I/O failures. Since the ZFS pool property 'failmode'\nis set to 'wait', all I/Os (reads and writes) are blocked.  See the zpool(1M)\nmanpage for more information on the 'failmode' property.  Manual intervention\nis required for I/Os to be serviced.  You can see which devices are\naffected by running 'zpool status -x':\n\n\n\n# zpool status -x\n  pool: test\n state: FAULTED\nstatus: There are I/O failures.\naction: Make sure the affected devices are connected, then run 'zpool clear'.\n   see: http://www.sun.com/msg/ZFS-8000-HC\n scrub: none requested\nconfig:\n\n        NAME        STATE     READ WRITE CKSUM\n        test        FAULTED      0    13     0  insufficient replicas\n          c0t0d0    FAULTED      0     7     0  experienced I/O failures\n          c0t1d0    ONLINE       0     0     0\n\nerrors: 1 data errors, use '-v' for a list\n\n\n\nAfter you have made sure the affected devices are connected, run 'zpool clear'\nto allow I/O to the pool again:\n\n\n\n# zpool clear test\n\n\n\nIf I/O failures continue to happen, then applications and commands  for the\npool may hang.  At this point, a reboot may be necessary to allow I/O to the\npool again.\n\n\n	"
+#
+# code: ZFS-8000-JQ
+# keys: fault.fs.zfs.io_failure_continue
+#
+msgid "ZFS-8000-JQ.type"
+msgstr "Error"
+msgid "ZFS-8000-JQ.severity"
+msgstr "Major"
+msgid "ZFS-8000-JQ.description"
+msgstr "The ZFS pool has experienced currently unrecoverable I/O failures.  Refer to %s for more information."
+msgid "ZFS-8000-JQ.response"
+msgstr "No automated response will be taken."
+msgid "ZFS-8000-JQ.impact"
+msgstr "Write I/Os cannot be serviced."
+msgid "ZFS-8000-JQ.action"
+msgstr "\n\nThe pool has experienced I/O failures. Since the ZFS pool property 'failmode'\nis set to 'continue', read I/Os will continue to be serviced, but write I/Os\nare blocked.  See the zpool(1M) manpage for more information on the 'failmode'\nproperty.  Manual intervention is required for write I/Os to be serviced.\n  You can see which devices are affected by running 'zpool status -x':\n\n\n\n# zpool status -x\n  pool: test\n state: FAULTED\nstatus: There are I/O failures.\naction: Make sure the affected devices are connected, then run 'zpool clear'.\n   see: http://www.sun.com/msg/ZFS-8000-HC\n scrub: none requested\nconfig:\n\n        NAME        STATE     READ WRITE CKSUM\n        test        FAULTED      0    13     0  insufficient replicas\n          c0t0d0    FAULTED      0     7     0  experienced I/O failures\n          c0t1d0    ONLINE       0     0     0\n\nerrors: 1 data errors, use '-v' for a list\n\n\n\nAfter you have made sure the affected devices are connected, run 'zpool clear'\nto allow write I/O to the pool again:\n\n\n\n# zpool clear test\n\n\n\nIf I/O failures continue to happen, then applications and commands  for the\npool may hang.  At this point, a reboot may be necessary to allow I/O to the\npool again.\n\n\n	"
--- a/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/cmd/fm/modules/common/zfs-diagnosis/zfs_de.c	Wed Apr 30 12:37:56 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -531,6 +531,8 @@
 		 */
 		zfs_case_solve(hdl, zcp, "fault.fs.zfs.device",  B_TRUE);
 	} else {
+		char *failmode = NULL;
+
 		if (pool_state == SPA_LOAD_OPEN) {
 			/*
 			 * Error incurred during a pool open.  Reset the timer
@@ -576,6 +578,20 @@
 				zfs_case_serialize(hdl, zcp);
 			}
 			serd = zcp->zc_data.zc_serd_checksum;
+		} else if (fmd_nvl_class_match(hdl, nvl,
+		    "ereport.fs.zfs.io_failure") && (nvlist_lookup_string(nvl,
+		    FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE, &failmode) == 0) &&
+		    failmode != NULL) {
+			if (strncmp(failmode, FM_EREPORT_FAILMODE_CONTINUE,
+			    strlen(FM_EREPORT_FAILMODE_CONTINUE)) == 0) {
+				zfs_case_solve(hdl, zcp,
+				    "fault.fs.zfs.io_failure_continue",
+				    B_FALSE);
+			} else if (strncmp(failmode, FM_EREPORT_FAILMODE_WAIT,
+			    strlen(FM_EREPORT_FAILMODE_WAIT)) == 0) {
+				zfs_case_solve(hdl, zcp,
+				    "fault.fs.zfs.io_failure_wait", B_FALSE);
+			}
 		}
 
 		/*
--- a/usr/src/cmd/zpool/zpool_main.c	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/cmd/zpool/zpool_main.c	Wed Apr 30 12:37:56 2008 -0700
@@ -2776,6 +2776,10 @@
 			(void) printf(gettext("too many errors"));
 			break;
 
+		case VDEV_AUX_IO_FAILURE:
+			(void) printf(gettext("experienced I/O failures"));
+			break;
+
 		default:
 			(void) printf(gettext("corrupted data"));
 			break;
@@ -3058,6 +3062,14 @@
 		    "to be recovered.\n"));
 		break;
 
+	case ZPOOL_STATUS_IO_FAILURE_WAIT:
+	case ZPOOL_STATUS_IO_FAILURE_CONTINUE:
+		(void) printf(gettext("status: One or more devices are "
+		    "faultd in response to IO failures.\n"));
+		(void) printf(gettext("action: Make sure the affected devices "
+		    "are connected, then run 'zpool clear'.\n"));
+		break;
+
 	default:
 		/*
 		 * The remaining errors can't actually be generated, yet.
--- a/usr/src/lib/libzfs/common/libzfs.h	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/lib/libzfs/common/libzfs.h	Wed Apr 30 12:37:56 2008 -0700
@@ -253,6 +253,8 @@
 	ZPOOL_STATUS_FAILING_DEV,	/* device experiencing errors */
 	ZPOOL_STATUS_VERSION_NEWER,	/* newer on-disk version */
 	ZPOOL_STATUS_HOSTID_MISMATCH,	/* last accessed by another system */
+	ZPOOL_STATUS_IO_FAILURE_WAIT,	/* failed I/O, failmode 'wait' */
+	ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
 	ZPOOL_STATUS_FAULTED_DEV_R,	/* faulted device with replicas */
 	ZPOOL_STATUS_FAULTED_DEV_NR,	/* faulted device with no replicas */
 
--- a/usr/src/lib/libzfs/common/libzfs_status.c	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_status.c	Wed Apr 30 12:37:56 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -62,7 +62,9 @@
 	"ZFS-8000-8A",
 	"ZFS-8000-9P",
 	"ZFS-8000-A5",
-	"ZFS-8000-EY"
+	"ZFS-8000-EY",
+	"ZFS-8000-HC",
+	"ZFS-8000-JQ"
 };
 
 #define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
@@ -161,7 +163,7 @@
  * only picks the most damaging of all the current errors to report.
  */
 static zpool_status_t
-check_status(nvlist_t *config, boolean_t isimport)
+check_status(zpool_handle_t *zhp, nvlist_t *config, boolean_t isimport)
 {
 	nvlist_t *nvroot;
 	vdev_stat_t *vs;
@@ -203,6 +205,44 @@
 		return (ZPOOL_STATUS_BAD_GUID_SUM);
 
 	/*
+	 * Pool has experienced failed I/O.
+	 */
+	if (stateval == POOL_STATE_IO_FAILURE) {
+		zpool_handle_t *tmp_zhp = NULL;
+		libzfs_handle_t *hdl = NULL;
+		char property[ZPOOL_MAXPROPLEN];
+		char *failmode = NULL;
+
+		if (zhp == NULL) {
+			char *poolname;
+
+			verify(nvlist_lookup_string(config,
+			    ZPOOL_CONFIG_POOL_NAME, &poolname) == 0);
+			if ((hdl = libzfs_init()) == NULL)
+				return (ZPOOL_STATUS_IO_FAILURE_WAIT);
+			tmp_zhp = zpool_open_canfail(hdl, poolname);
+			if (tmp_zhp == NULL) {
+				libzfs_fini(hdl);
+				return (ZPOOL_STATUS_IO_FAILURE_WAIT);
+			}
+		}
+		if (zpool_get_prop(zhp ? zhp : tmp_zhp, ZPOOL_PROP_FAILUREMODE,
+		    property, sizeof (property), NULL) == 0)
+			failmode = property;
+		if (tmp_zhp != NULL)
+			zpool_close(tmp_zhp);
+		if (hdl != NULL)
+			libzfs_fini(hdl);
+		if (failmode == NULL)
+			return (ZPOOL_STATUS_IO_FAILURE_WAIT);
+
+		if (strncmp(failmode, "continue", strlen("continue")) == 0)
+			return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
+		else
+			return (ZPOOL_STATUS_IO_FAILURE_WAIT);
+	}
+
+	/*
 	 * Bad devices in non-replicated config.
 	 */
 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
@@ -273,7 +313,7 @@
 zpool_status_t
 zpool_get_status(zpool_handle_t *zhp, char **msgid)
 {
-	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
+	zpool_status_t ret = check_status(zhp, zhp->zpool_config, B_FALSE);
 
 	if (ret >= NMSGID)
 		*msgid = NULL;
@@ -286,7 +326,7 @@
 zpool_status_t
 zpool_import_status(nvlist_t *config, char **msgid)
 {
-	zpool_status_t ret = check_status(config, B_TRUE);
+	zpool_status_t ret = check_status(NULL, config, B_TRUE);
 
 	if (ret >= NMSGID)
 		*msgid = NULL;
--- a/usr/src/uts/common/fs/zfs/sys/zio.h	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h	Wed Apr 30 12:37:56 2008 -0700
@@ -248,6 +248,8 @@
 
 	/* Stuff for the vdev stack */
 	vdev_t		*io_vd;
+	vdev_t		**io_failed_vds;
+	uint64_t	io_failed_vds_count;
 	void		*io_vsd;
 	uint64_t	io_offset;
 	uint64_t	io_deadline;
--- a/usr/src/uts/common/fs/zfs/vdev.c	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Wed Apr 30 12:37:56 2008 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1778,7 +1778,14 @@
 int
 vdev_is_dead(vdev_t *vd)
 {
-	return (vd->vdev_state < VDEV_STATE_DEGRADED);
+	/*
+	 * If the vdev experienced I/O failures, then the vdev is marked
+	 * as faulted (VDEV_STATE_FAULTED) for status output and FMA; however,
+	 * we need to allow access to the vdev for resumed I/Os (see
+	 * zio_vdev_resume_io() ).
+	 */
+	return (vd->vdev_state < VDEV_STATE_DEGRADED &&
+	    vd->vdev_stat.vs_aux != VDEV_AUX_IO_FAILURE);
 }
 
 int
@@ -2044,6 +2051,9 @@
 	list_remove(&spa->spa_dirty_list, vd);
 }
 
+/*
+ * Propagate vdev state up from children to parent.
+ */
 void
 vdev_propagate_state(vdev_t *vd)
 {
@@ -2058,6 +2068,8 @@
 			child = vd->vdev_child[c];
 			if (vdev_is_dead(child) && !vdev_readable(child))
 				faulted++;
+			else if (child->vdev_stat.vs_aux == VDEV_AUX_IO_FAILURE)
+				faulted++;
 			else if (child->vdev_state <= VDEV_STATE_DEGRADED)
 				degraded++;
 
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c	Wed Apr 30 12:37:56 2008 -0700
@@ -300,7 +300,7 @@
     int flags)
 {
 	int error = 0;
-	vdev_disk_t *dvd = vd->vdev_tsd;
+	vdev_disk_t *dvd = vd ? vd->vdev_tsd : NULL;
 
 	if (vd == NULL || dvd == NULL || dvd->vd_lh == NULL)
 		return (EINVAL);
--- a/usr/src/uts/common/fs/zfs/vdev_file.c	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/vdev_file.c	Wed Apr 30 12:37:56 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -133,7 +133,7 @@
 vdev_file_probe_io(vdev_t *vd, caddr_t data, size_t size, uint64_t offset,
     enum uio_rw rw)
 {
-	vdev_file_t *vf = vd->vdev_tsd;
+	vdev_file_t *vf = vd ? vd->vdev_tsd : NULL;
 	ssize_t resid;
 	int error = 0;
 
--- a/usr/src/uts/common/fs/zfs/zfs_fm.c	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_fm.c	Wed Apr 30 12:37:56 2008 -0700
@@ -48,7 +48,7 @@
  * 	pool			X
  *
  * If we are in a loading state, all errors are chained together by the same
- * SPA-wide ENA.
+ * SPA-wide ENA (Error Numeric Association).
  *
  * For isolated I/O requests, we get the ENA from the zio_t. The propagation
  * gets very complicated due to RAID-Z, gang blocks, and vdev caching.  We want
@@ -197,6 +197,16 @@
 	    FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32,
 	    spa->spa_load_state, NULL);
 
+	if (spa != NULL) {
+		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
+		    DATA_TYPE_STRING,
+		    spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ?
+		    FM_EREPORT_FAILMODE_WAIT :
+		    spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ?
+		    FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC,
+		    NULL);
+	}
+
 	if (vd != NULL) {
 		vdev_t *pvd = vd->vdev_parent;
 
--- a/usr/src/uts/common/fs/zfs/zio.c	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/zio.c	Wed Apr 30 12:37:56 2008 -0700
@@ -796,6 +796,20 @@
  * Initiate I/O, either sync or async
  * ==========================================================================
  */
+static void
+zio_destroy(zio_t *zio)
+{
+	mutex_destroy(&zio->io_lock);
+	cv_destroy(&zio->io_cv);
+	if (zio->io_failed_vds != NULL) {
+		kmem_free(zio->io_failed_vds,
+		    zio->io_failed_vds_count * sizeof (vdev_t *));
+		zio->io_failed_vds = NULL;
+		zio->io_failed_vds_count = 0;
+	}
+	kmem_cache_free(zio_cache, zio);
+}
+
 int
 zio_wait(zio_t *zio)
 {
@@ -813,9 +827,7 @@
 	mutex_exit(&zio->io_lock);
 
 	error = zio->io_error;
-	mutex_destroy(&zio->io_lock);
-	cv_destroy(&zio->io_cv);
-	kmem_cache_free(zio_cache, zio);
+	zio_destroy(zio);
 
 	return (error);
 }
@@ -864,13 +876,44 @@
 }
 
 static void
+zio_add_failed_vdev(zio_t *pio, zio_t *zio)
+{
+	uint64_t oldcount = pio->io_failed_vds_count;
+	vdev_t **new_vds;
+	int i;
+
+	ASSERT(MUTEX_HELD(&pio->io_lock));
+
+	if (zio->io_vd == NULL)
+		return;
+
+	for (i = 0; i < oldcount; i++) {
+		if (pio->io_failed_vds[i] == zio->io_vd)
+			return;
+	}
+
+	new_vds = kmem_zalloc((oldcount + 1) * sizeof (vdev_t *), KM_SLEEP);
+	if (pio->io_failed_vds != NULL) {
+		bcopy(pio->io_failed_vds, new_vds,
+		    oldcount * sizeof (vdev_t *));
+		kmem_free(pio->io_failed_vds, oldcount * sizeof (vdev_t *));
+	}
+	pio->io_failed_vds = new_vds;
+	pio->io_failed_vds[oldcount] = zio->io_vd;
+	pio->io_failed_vds_count++;
+}
+
+static void
 zio_notify_parent(zio_t *zio, uint32_t stage, uint64_t *countp)
 {
 	zio_t *pio = zio->io_parent;
 
 	mutex_enter(&pio->io_lock);
-	if (pio->io_error == 0 && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
+	if (pio->io_error == 0 && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE)) {
 		pio->io_error = zio->io_error;
+		if (zio->io_error && zio->io_error != ENOTSUP)
+			zio_add_failed_vdev(pio, zio);
+	}
 	ASSERT3U(*countp, >, 0);
 	if (--*countp == 0 && pio->io_stalled == stage) {
 		pio->io_stalled = 0;
@@ -1083,6 +1126,38 @@
 	return (ZIO_PIPELINE_STOP);
 }
 
+static void
+zio_handle_io_failure(zio_t *zio, vdev_t *vd)
+{
+	spa_t *spa = zio->io_spa;
+	blkptr_t *bp = zio->io_bp;
+	char *blkbuf;
+
+#ifdef ZFS_DEBUG
+	blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_NOSLEEP);
+	if (blkbuf) {
+		sprintf_blkptr(blkbuf, BP_SPRINTF_LEN,
+		    bp ? bp : &zio->io_bp_copy);
+	}
+	cmn_err(CE_WARN, "ZFS: %s (%s on %s off %llx: zio %p %s): error %d",
+	    zio->io_error == ECKSUM ? "bad checksum" : "I/O failure",
+	    zio_type_name[zio->io_type], vdev_description(vd),
+	    (u_longlong_t)zio->io_offset, (void *)zio,
+	    blkbuf ? blkbuf : "", zio->io_error);
+	if (blkbuf)
+		kmem_free(blkbuf, BP_SPRINTF_LEN);
+#endif
+
+	if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC) {
+		fm_panic("Pool '%s' has encountered an uncorrectable I/O "
+		    "failure and the failure mode property for this pool "
+		    "is set to panic.", spa_name(spa));
+	}
+	zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0);
+	vdev_set_state(vd, vd == spa->spa_root_vdev ? B_TRUE : B_FALSE,
+	    VDEV_STATE_FAULTED, VDEV_AUX_IO_FAILURE);
+}
+
 static int
 zio_assess(zio_t *zio)
 {
@@ -1164,32 +1239,23 @@
 		 * a result of vdev failures vs. a full pool.
 		 */
 		if (!(zio->io_flags & ZIO_FLAG_CANFAIL)) {
-			char *blkbuf;
+			int i;
 
-#ifdef ZFS_DEBUG
-			blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_NOSLEEP);
-			if (blkbuf) {
-				sprintf_blkptr(blkbuf, BP_SPRINTF_LEN,
-				    bp ? bp : &zio->io_bp_copy);
+			for (i = 0; i < zio->io_failed_vds_count; i++) {
+				zio_handle_io_failure(zio,
+				    zio->io_failed_vds[i]);
 			}
-			cmn_err(CE_WARN, "ZFS: %s (%s on %s off %llx: zio %p "
-			    "%s): error %d", zio->io_error == ECKSUM ?
-			    "bad checksum" : "I/O failure",
-			    zio_type_name[zio->io_type],
-			    vdev_description(vd),
-			    (u_longlong_t)zio->io_offset,
-			    (void *)zio, blkbuf ? blkbuf : "", zio->io_error);
-#endif
-
-			if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC) {
-				fm_panic("Pool '%s' has encountered an "
-				    "uncorrectable I/O failure and the "
-				    "failure mode property for this pool "
-				    "is set to panic.", spa_name(spa));
+			if (zio->io_failed_vds_count == 0) {
+				zio_handle_io_failure(zio,
+				    vd ? vd : spa->spa_root_vdev);
 			}
-			cmn_err(CE_WARN, "Pool '%s' has encountered "
-			    "an uncorrectable I/O error. "
-			    "Manual intervention is required.", spa_name(spa));
+			if (zio->io_failed_vds != NULL) {
+				kmem_free(zio->io_failed_vds,
+				    zio->io_failed_vds_count *
+				    sizeof (vdev_t *));
+				zio->io_failed_vds = NULL;
+				zio->io_failed_vds_count = 0;
+			}
 			return (zio_vdev_suspend_io(zio));
 		}
 	}
@@ -1248,9 +1314,7 @@
 		cv_broadcast(&zio->io_cv);
 		mutex_exit(&zio->io_lock);
 	} else {
-		mutex_destroy(&zio->io_lock);
-		cv_destroy(&zio->io_cv);
-		kmem_cache_free(zio_cache, zio);
+		zio_destroy(zio);
 	}
 
 	return (ZIO_PIPELINE_STOP);
--- a/usr/src/uts/common/sys/fm/fs/zfs.h	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/uts/common/sys/fm/fs/zfs.h	Wed Apr 30 12:37:56 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -45,8 +45,10 @@
 #define	FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM	"vdev.bad_guid_sum"
 #define	FM_EREPORT_ZFS_DEVICE_TOO_SMALL		"vdev.too_small"
 #define	FM_EREPORT_ZFS_DEVICE_BAD_LABEL		"vdev.bad_label"
+#define	FM_EREPORT_ZFS_IO_FAILURE		"io_failure"
 
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL		"pool"
+#define	FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE	"pool_failmode"
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL_GUID	"pool_guid"
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT	"pool_context"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID	"vdev_guid"
@@ -66,6 +68,10 @@
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE		"zio_size"
 #define	FM_EREPORT_PAYLOAD_ZFS_PREV_STATE	"prev_state"
 
+#define	FM_EREPORT_FAILMODE_WAIT		"wait"
+#define	FM_EREPORT_FAILMODE_CONTINUE		"continue"
+#define	FM_EREPORT_FAILMODE_PANIC		"panic"
+
 #define	FM_RESOURCE_OK				"ok"
 #define	FM_RESOURCE_REMOVED			"removed"
 #define	FM_RESOURCE_AUTOREPLACE			"autoreplace"
--- a/usr/src/uts/common/sys/fs/zfs.h	Wed Apr 30 11:30:34 2008 -0700
+++ b/usr/src/uts/common/sys/fs/zfs.h	Wed Apr 30 12:37:56 2008 -0700
@@ -390,7 +390,8 @@
 	VDEV_AUX_VERSION_NEWER,	/* on-disk version is too new		*/
 	VDEV_AUX_VERSION_OLDER,	/* on-disk version is too old		*/
 	VDEV_AUX_SPARED,	/* hot spare used in another pool	*/
-	VDEV_AUX_ERR_EXCEEDED	/* too many errors			*/
+	VDEV_AUX_ERR_EXCEEDED,	/* too many errors			*/
+	VDEV_AUX_IO_FAILURE	/* experienced I/O failure		*/
 } vdev_aux_t;
 
 /*