6911420 ZFS device removal detection should work with SCSAv3
authorGeorge Wilson <George.Wilson@Sun.COM>
Mon, 26 Apr 2010 00:23:10 -0700
changeset 12247 5bcd281629f8
parent 12246 4d28e30f7dd0
child 12248 0f49ba3a14da
6911420 ZFS device removal detection should work with SCSAv3
usr/src/cmd/zinject/translate.c
usr/src/cmd/zinject/zinject.c
usr/src/cmd/zinject/zinject.h
usr/src/uts/common/fs/zfs/spa.c
usr/src/uts/common/fs/zfs/sys/vdev_impl.h
usr/src/uts/common/fs/zfs/vdev.c
usr/src/uts/common/fs/zfs/vdev_disk.c
usr/src/uts/common/fs/zfs/vdev_file.c
usr/src/uts/common/fs/zfs/zio_inject.c
--- a/usr/src/cmd/zinject/translate.c	Mon Apr 26 15:11:57 2010 +0800
+++ b/usr/src/cmd/zinject/translate.c	Mon Apr 26 00:23:10 2010 -0700
@@ -481,6 +481,14 @@
 		record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
 		record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
 		break;
+	case TYPE_LABEL_PAD1:
+		record->zi_start = offsetof(vdev_label_t, vl_pad1);
+		record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
+		break;
+	case TYPE_LABEL_PAD2:
+		record->zi_start = offsetof(vdev_label_t, vl_pad2);
+		record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
+		break;
 	}
 	return (0);
 }
--- a/usr/src/cmd/zinject/zinject.c	Mon Apr 26 15:11:57 2010 +0800
+++ b/usr/src/cmd/zinject/zinject.c	Mon Apr 26 00:23:10 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -42,12 +41,12 @@
  * any attempt to read from the device will return EIO, but any attempt to
  * reopen the device will also return ENXIO.
  * For label faults, the -L option must be specified. This allows faults
- * to be injected into either the nvlist or uberblock region of all the labels
- * for the specified device.
+ * to be injected into either the nvlist, uberblock, pad1, or pad2 region
+ * of all the labels for the specified device.
  *
  * This form of the command looks like:
  *
- * 	zinject -d device [-e errno] [-L <uber | nvlist>] pool
+ * 	zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
  *
  *
  * DATA FAULTS
@@ -168,7 +167,9 @@
 	"spacemap",
 	"errlog",
 	"uber",
-	"nvlist"
+	"nvlist",
+	"pad1",
+	"pad2"
 };
 
 static err_type_t
@@ -227,10 +228,11 @@
 	    "\t\tfunctions which call spa_vdev_config_exit(), or \n"
 	    "\t\tspa_vdev_exit() will trigger a panic.\n"
 	    "\n"
-	    "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F]\n"
+	    "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
 	    "\t    [-T <read|write|free|claim|all> pool\n"
 	    "\t\tInject a fault into a particular device or the device's\n"
-	    "\t\tlabel.  Label injection can either be 'nvlist' or 'uber'.\n"
+	    "\t\tlabel.  Label injection can either be 'nvlist', 'uber',\n "
+	    "\t\t'pad1', or 'pad2'.\n"
 	    "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
 	    "\n"
 	    "\tzinject -d device -A <degrade|fault> pool\n"
--- a/usr/src/cmd/zinject/zinject.h	Mon Apr 26 15:11:57 2010 +0800
+++ b/usr/src/cmd/zinject/zinject.h	Mon Apr 26 00:23:10 2010 -0700
@@ -19,15 +19,12 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_ZINJECT_H
 #define	_ZINJECT_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/zfs_ioctl.h>
 
 #ifdef	__cplusplus
@@ -46,6 +43,8 @@
 	TYPE_ERRLOG,		/* persistent error log		*/
 	TYPE_LABEL_UBERBLOCK,	/* label specific uberblock	*/
 	TYPE_LABEL_NVLIST,	/* label specific nvlist	*/
+	TYPE_LABEL_PAD1,	/* label specific 8K pad1 area	*/
+	TYPE_LABEL_PAD2,	/* label specific 8K pad2 area	*/
 	TYPE_INVAL
 } err_type_t;
 
--- a/usr/src/uts/common/fs/zfs/spa.c	Mon Apr 26 15:11:57 2010 +0800
+++ b/usr/src/uts/common/fs/zfs/spa.c	Mon Apr 26 00:23:10 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -4737,7 +4736,8 @@
 spa_async_remove(spa_t *spa, vdev_t *vd)
 {
 	if (vd->vdev_remove_wanted) {
-		vd->vdev_remove_wanted = 0;
+		vd->vdev_remove_wanted = B_FALSE;
+		vd->vdev_delayed_close = B_FALSE;
 		vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE);
 
 		/*
@@ -4761,7 +4761,7 @@
 spa_async_probe(spa_t *spa, vdev_t *vd)
 {
 	if (vd->vdev_probe_wanted) {
-		vd->vdev_probe_wanted = 0;
+		vd->vdev_probe_wanted = B_FALSE;
 		vdev_reopen(vd);	/* vdev_open() does the actual probe */
 	}
 
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Mon Apr 26 15:11:57 2010 +0800
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Mon Apr 26 00:23:10 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_VDEV_IMPL_H
@@ -182,6 +181,7 @@
 	boolean_t	vdev_checkremove; /* temporary online test	*/
 	boolean_t	vdev_forcefault; /* force online fault		*/
 	boolean_t	vdev_splitting;	/* split or repair in progress  */
+	boolean_t	vdev_delayed_close; /* delayed device close?	*/
 	uint8_t		vdev_tmpoffline; /* device taken offline temporarily? */
 	uint8_t		vdev_detached;	/* device detached?		*/
 	uint8_t		vdev_cant_read;	/* vdev is failing all reads	*/
--- a/usr/src/uts/common/fs/zfs/vdev.c	Mon Apr 26 15:11:57 2010 +0800
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Mon Apr 26 00:23:10 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -1005,6 +1004,10 @@
 		    vdev_probe_done, vps,
 		    vps->vps_flags | ZIO_FLAG_DONT_PROPAGATE);
 
+		/*
+		 * We can't change the vdev state in this context, so we
+		 * kick off an async task to do it on our behalf.
+		 */
 		if (zio != NULL) {
 			vd->vdev_probe_wanted = B_TRUE;
 			spa_async_request(spa, SPA_ASYNC_PROBE);
@@ -1250,8 +1253,8 @@
 	 */
 	if (vd->vdev_ops->vdev_op_leaf &&
 	    (error = zio_wait(vdev_probe(vd, NULL))) != 0) {
-		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
-		    VDEV_AUX_IO_FAILURE);
+		vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
+		    VDEV_AUX_ERR_EXCEEDED);
 		return (error);
 	}
 
@@ -2073,6 +2076,7 @@
 	/*
 	 * Faulted state takes precedence over degraded.
 	 */
+	vd->vdev_delayed_close = B_FALSE;
 	vd->vdev_faulted = 1ULL;
 	vd->vdev_degraded = 0ULL;
 	vdev_set_state(vd, B_FALSE, VDEV_STATE_FAULTED, aux);
@@ -2891,13 +2895,16 @@
 
 	/*
 	 * If we are setting the vdev state to anything but an open state, then
-	 * always close the underlying device.  Otherwise, we keep accessible
-	 * but invalid devices open forever.  We don't call vdev_close() itself,
-	 * because that implies some extra checks (offline, etc) that we don't
-	 * want here.  This is limited to leaf devices, because otherwise
-	 * closing the device will affect other children.
+	 * always close the underlying device unless the device has requested
+	 * a delayed close (i.e. we're about to remove or fault the device).
+	 * Otherwise, we keep accessible but invalid devices open forever.
+	 * We don't call vdev_close() itself, because that implies some extra
+	 * checks (offline, etc) that we don't want here.  This is limited to
+	 * leaf devices, because otherwise closing the device will affect other
+	 * children.
 	 */
-	if (vdev_is_dead(vd) && vd->vdev_ops->vdev_op_leaf)
+	if (!vd->vdev_delayed_close && vdev_is_dead(vd) &&
+	    vd->vdev_ops->vdev_op_leaf)
 		vd->vdev_ops->vdev_op_close(vd);
 
 	/*
@@ -2979,9 +2986,6 @@
 			case VDEV_AUX_BAD_LABEL:
 				class = FM_EREPORT_ZFS_DEVICE_BAD_LABEL;
 				break;
-			case VDEV_AUX_IO_FAILURE:
-				class = FM_EREPORT_ZFS_IO_FAILURE;
-				break;
 			default:
 				class = FM_EREPORT_ZFS_DEVICE_UNKNOWN;
 			}
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c	Mon Apr 26 15:11:57 2010 +0800
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c	Mon Apr 26 00:23:10 2010 -0700
@@ -319,6 +319,7 @@
 	if (dvd->vd_lh != NULL)
 		(void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
 
+	vd->vdev_delayed_close = B_FALSE;
 	kmem_free(dvd, sizeof (vdev_disk_t));
 	vd->vdev_tsd = NULL;
 }
@@ -510,6 +511,8 @@
 			zfs_post_remove(zio->io_spa, vd);
 			vd->vdev_remove_wanted = B_TRUE;
 			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
+		} else if (!vd->vdev_delayed_close) {
+			vd->vdev_delayed_close = B_TRUE;
 		}
 	}
 }
--- a/usr/src/uts/common/fs/zfs/vdev_file.c	Mon Apr 26 15:11:57 2010 +0800
+++ b/usr/src/uts/common/fs/zfs/vdev_file.c	Mon Apr 26 00:23:10 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -134,6 +133,7 @@
 		VN_RELE(vf->vf_vnode);
 	}
 
+	vd->vdev_delayed_close = B_FALSE;
 	kmem_free(vf, sizeof (vdev_file_t));
 	vd->vdev_tsd = NULL;
 }
--- a/usr/src/uts/common/fs/zfs/zio_inject.c	Mon Apr 26 15:11:57 2010 +0800
+++ b/usr/src/uts/common/fs/zfs/zio_inject.c	Mon Apr 26 00:23:10 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -277,6 +276,16 @@
 				if (error == ENXIO)
 					vd->vdev_stat.vs_aux =
 					    VDEV_AUX_OPEN_FAILED;
+
+				/*
+				 * Treat these errors as if they had been
+				 * retried so that all the appropriate stats
+				 * and FMA events are generated.
+				 */
+				if (!handler->zi_record.zi_failfast &&
+				    zio != NULL)
+					zio->io_flags |= ZIO_FLAG_IO_RETRY;
+
 				ret = error;
 				break;
 			}