6704717 ZFS mirrored root doesn't live up to expectations
authortaylor
Mon, 21 Jul 2008 15:39:58 -0700
changeset 7147 1e1d75c88283
parent 7146 827f3a1f7a84
child 7148 3009692eae80
6704717 ZFS mirrored root doesn't live up to expectations 6710937 Boot failed information should be more friendly
usr/src/grub/grub-0.95/stage2/builtins.c
usr/src/grub/grub-0.95/stage2/disk_io.c
usr/src/grub/grub-0.95/stage2/fsys_zfs.c
usr/src/grub/grub-0.95/stage2/shared.h
usr/src/uts/common/fs/zfs/spa.c
usr/src/uts/common/fs/zfs/sys/spa.h
usr/src/uts/common/fs/zfs/sys/spa_boot.h
usr/src/uts/common/fs/zfs/vdev_disk.c
usr/src/uts/common/fs/zfs/zfs_vfsops.c
usr/src/uts/intel/zfs/spa_boot.c
usr/src/uts/sparc/zfs/spa_boot.c
--- a/usr/src/grub/grub-0.95/stage2/builtins.c	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/grub/grub-0.95/stage2/builtins.c	Mon Jul 21 15:39:58 2008 -0700
@@ -122,6 +122,7 @@
   current_bootfs[0] = '\0';
   current_bootpath[0] = '\0';
   current_bootfs_obj = 0;
+  current_devid[0] = '\0';
   is_zfs_mount = 0;
 }
 
@@ -1518,6 +1519,21 @@
 };
 
 
+
+void
+set_root (char *root, unsigned long drive, unsigned long part)
+{
+  int bsd_part = (part >> 8) & 0xFF;
+  int pc_slice = part >> 16;
+
+  if (bsd_part == 0xFF) {
+    grub_sprintf (root, "(hd%d,%d)\n", drive - 0x80, pc_slice);
+  } else {
+    grub_sprintf (root, "(hd%d,%d,%c)\n",
+		 drive - 0x80, pc_slice, bsd_part + 'a');
+  }
+}
+
 static int
 find_common (char *arg, char *root, int for_root, int flags)
 {
@@ -1589,7 +1605,7 @@
       char buf[SECTOR_SIZE];
 
       if (for_root && tmp_argpart) {
-	grub_sprintf(device, "(hd%d%s", drive - 0x80, argpart); 
+	grub_sprintf(device, "(hd%d%s", drive - 0x80, argpart);
 	set_device(device);
         errnum = ERR_NONE;
 	part = current_partition;
@@ -1598,18 +1614,16 @@
 	   saved_partition = current_partition;
            errnum = ERR_NONE;
 	   if (grub_open (filename)) {
-	      int bsd_part = (part >> 8) & 0xFF;
-	      int pc_slice = part >> 16;
 	      grub_close ();
 	      got_file = 1;
-	      if (bsd_part == 0xFF)
-		grub_sprintf (root, "(hd%d,%d)\n", drive - 0x80, pc_slice);
-	      else
-		grub_sprintf (root, "(hd%d,%d,%c)\n",
-			     drive - 0x80, pc_slice, bsd_part + 'a');
-	      goto out;
+	      if (is_zfs_mount == 0) {
+	        set_root(root, current_drive, current_partition);
+	        goto out;
+	      } else {
+		best_drive = current_drive;
+		best_part = current_partition;
+	      }
            }
-
 	}
         errnum = ERR_NONE;
 	continue;
@@ -1630,30 +1644,22 @@
 		  saved_partition = current_partition;
 		  if (grub_open (filename))
 		    {
-		      int bsd_part = (part >> 8) & 0xFF;
-		      int pc_slice = part >> 16;
-		      
+		      char tmproot[32];
+
 		      grub_close ();
 		      got_file = 1;
-		      
-		      if (bsd_part == 0xFF) {
-			if (for_root)  {
-			   grub_sprintf (root, "(hd%d,%d)\n",
-				     drive - 0x80, pc_slice);
-			   goto out;
-			} else
-			   grub_printf (" (hd%d,%d)\n",
-				     drive - 0x80, pc_slice);
+		      set_root(tmproot, drive, part);
+		      if (for_root) {
+		 	grub_memcpy(root, tmproot, sizeof(tmproot));
+			if (is_zfs_mount == 0) {
+			      goto out;
+			} else {
+			      best_drive = current_drive;
+			      best_part = current_partition;
+			}
 		      } else {
-			if (for_root) {
-			   grub_sprintf (root, "(hd%d,%d,%c)\n",
-				     drive - 0x80, pc_slice, bsd_part + 'a');
-			   goto out;
-			} else
-			   grub_printf (" (hd%d,%d,%c)\n",
-				     drive - 0x80, pc_slice, bsd_part + 'a');
+			grub_printf("%s", tmproot);
 		      }
-
 		    }
 		}
 	    }
@@ -1668,8 +1674,13 @@
     }
 
 out:
-  saved_drive = tmp_drive;
-  saved_partition = tmp_partition;
+  if (is_zfs_mount && for_root) {
+        set_root(root, best_drive, best_part);
+	buf_drive = -1;
+  } else {
+	saved_drive = tmp_drive;
+	saved_partition = tmp_partition;
+  }
   if (tmp_argpart)
 	*tmp_argpart = ',';
 
@@ -2752,7 +2763,7 @@
 		token[0] = '\0';	
 		grub_sprintf(tmpout, "%s", in);
 		token[0] = '$';
-		in = token + 11; /* move over $ZFS-BOOTFS */
+		in = token + 11; /* skip over $ZFS-BOOTFS */
 		tmpout = out + strlen(out);
 
 		/* Note: %u only fits 32 bit integer; */ 
@@ -2791,6 +2802,16 @@
 		grub_sprintf(tmpout,
 		    postcomma ? "bootpath=\"%s\"," : ",bootpath=\"%s\"",
 		    current_bootpath);
+		tmpout = out + strlen(out);
+	}
+	if (strlen(current_devid)) {
+		if ((outlen += 13 + strlen(current_devid)) > MAX_CMDLINE) {
+			errnum = ERR_WONT_FIT;
+			return (1);
+		}
+		grub_sprintf(tmpout,
+		    postcomma ? "diskdevid=\"%s\"," : ",diskdevid=\"%s\"",
+		    current_devid);
 	}
 
 	strncat(out, in, MAX_CMDLINE);
@@ -4010,11 +4031,14 @@
 	return 1;
   }
 
+  find_best_root = 1;
+  best_drive = 0;
+  best_part = 0;
   ret = find_common(arg, root, 1, flags);
   if (ret != 0)
 	return (ret);
-
-  is_zfs_mount = 0;
+  find_best_root = 0;
+
   return real_root_func (root, 1);
 }
 
--- a/usr/src/grub/grub-0.95/stage2/disk_io.c	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/grub/grub-0.95/stage2/disk_io.c	Mon Jul 21 15:39:58 2008 -0700
@@ -128,7 +128,11 @@
 char current_rootpool[MAXNAMELEN];
 char current_bootfs[MAXNAMELEN];
 uint64_t current_bootfs_obj;
+char current_devid[MAXNAMELEN];
 int is_zfs_mount;
+unsigned long best_drive;
+unsigned long best_part;
+int find_best_root;
 
 /* disk buffer parameters */
 int buf_drive = -1;
@@ -686,7 +690,7 @@
 	  pcs_start = *start;	/* save the start of pc slice */
 	}
 
-      /* Search next valid BSD partition.  */
+      /* Search next valid Solaris partition.  */
       for (i = sol_part_no + 1; i < SOL_LABEL_NPARTS; i++)
 	{
 	  if (SOL_PART_EXISTS (buf, i))
--- a/usr/src/grub/grub-0.95/stage2/fsys_zfs.c	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/grub/grub-0.95/stage2/fsys_zfs.c	Mon Jul 21 15:39:58 2008 -0700
@@ -60,6 +60,8 @@
 static uint64_t dnode_start = 0;
 static uint64_t dnode_end = 0;
 
+static uberblock_t current_uberblock;
+
 static char *stackbase;
 
 decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] =
@@ -327,6 +329,8 @@
 	    comp != ZIO_COMPRESS_OFF && decomp_table[comp].decomp_func == NULL)
 		return (ERR_FSYS_CORRUPT);
 
+	if ((char *)buf < stack && ((char *)buf) + lsize > stack)
+		return (ERR_FSYS_CORRUPT);
 	/* pick a good dva from the block pointer */
 	for (i = 0; i < SPA_DVAS_PER_BP; i++) {
 
@@ -585,6 +589,8 @@
 	/* Get the leaf block */
 	l = (zap_leaf_phys_t *)stack;
 	stack += 1<<blksft;
+	if ((1<<blksft) < sizeof (zap_leaf_phys_t))
+	    return (ERR_FSYS_CORRUPT);
 	if (errnum = dmu_read(zap_dnode, blkid, l, stack))
 		return (errnum);
 
@@ -1033,8 +1039,6 @@
 	    DATA_TYPE_UINT64, NULL) == 0 ||
 	    nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival,
 	    DATA_TYPE_UINT64, NULL) == 0 ||
-	    nvlist_lookup_value(nv, ZPOOL_CONFIG_DEGRADED, &ival,
-	    DATA_TYPE_UINT64, NULL) == 0 ||
 	    nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival,
 	    DATA_TYPE_UINT64, NULL) == 0)
 		return (ERR_DEV_VALUES);
@@ -1046,8 +1050,8 @@
  * Get a list of valid vdev pathname from the boot device.
  * The caller should already allocate MAXNAMELEN memory for bootpath.
  */
-static int
-vdev_get_bootpath(char *nv, char *bootpath)
+int
+vdev_get_bootpath(char *nv, uint64_t inguid, char *devid, char *bootpath)
 {
 	char type[16];
 
@@ -1058,8 +1062,10 @@
 
 	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
 		if (vdev_validate(nv) != 0 ||
-		    nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, bootpath,
-		    DATA_TYPE_STRING, NULL) != 0)
+		    (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH,
+		    bootpath, DATA_TYPE_STRING, NULL) != 0) ||
+		    (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID,
+		    devid, DATA_TYPE_STRING, NULL) != 0))
 			return (ERR_NO_BOOTPATH);
 
 	} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
@@ -1072,7 +1078,9 @@
 
 		for (i = 0; i < nelm; i++) {
 			char tmp_path[MAXNAMELEN];
+			char tmp_devid[MAXNAMELEN];
 			char *child_i;
+			uint64_t guid;
 
 			child_i = nvlist_array(child, i);
 			if (vdev_validate(child_i) != 0)
@@ -1085,10 +1093,17 @@
 			if ((strlen(bootpath) + strlen(tmp_path)) > MAXNAMELEN)
 				return (ERR_WONT_FIT);
 
-			if (strlen(bootpath) == 0)
+			if (nvlist_lookup_value(child_i, ZPOOL_CONFIG_GUID,
+			    &guid, DATA_TYPE_UINT64, NULL) != 0)
+				return (ERR_NO_BOOTPATH);
+			if (nvlist_lookup_value(child_i, ZPOOL_CONFIG_DEVID,
+			    tmp_devid, DATA_TYPE_STRING, NULL) != 0)
+				return (ERR_NO_BOOTPATH);
+			if (guid == inguid) {
+				sprintf(devid, "%s", tmp_devid);
 				sprintf(bootpath, "%s", tmp_path);
-			else
-				sprintf(bootpath, "%s %s", bootpath, tmp_path);
+				break;
+			}
 		}
 	}
 
@@ -1102,12 +1117,13 @@
  *	0 - success
  *	ERR_* - failure
  */
-static int
-check_pool_label(int label, char *stack)
+int
+check_pool_label(int label, char *stack, char *outdevid, char *outpath)
 {
 	vdev_phys_t *vdev;
 	uint64_t sector, pool_state, txg = 0;
 	char *nvlist, *nv;
+	uint64_t diskguid;
 
 	sector = (label * sizeof (vdev_label_t) + VDEV_SKIP_SIZE +
 	    VDEV_BOOT_HEADER_SIZE) >> SPA_MINBLOCKSHIFT;
@@ -1117,6 +1133,7 @@
 		return (ERR_READ);
 
 	vdev = (vdev_phys_t *)stack;
+	stack += sizeof(vdev_phys_t);
 
 	if (nvlist_unpack(vdev->vp_nvlist, &nvlist))
 		return (ERR_FSYS_CORRUPT);
@@ -1143,10 +1160,11 @@
 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
 	    DATA_TYPE_NVLIST, NULL))
 		return (ERR_FSYS_CORRUPT);
-
-	if (vdev_get_bootpath(nv, current_bootpath))
+	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid,
+	    DATA_TYPE_UINT64, NULL))
+		return (ERR_FSYS_CORRUPT);
+	if (vdev_get_bootpath(nv, diskguid, outdevid, outpath))
 		return (ERR_NO_BOOTPATH);
-
 	return (0);
 }
 
@@ -1166,6 +1184,12 @@
 	uberblock_phys_t *ub_array, *ubbest = NULL;
 	vdev_boot_header_t *bh;
 	objset_phys_t *osp;
+	char tmp_bootpath[MAXNAMELEN];
+	char tmp_devid[MAXNAMELEN];
+
+	/* if it's our first time here, zero the best uberblock out */
+	if (best_drive == 0 && best_part == 0 && find_best_root)
+	    grub_memset(&current_uberblock, 0, sizeof(uberblock_t));
 
 	stackbase = ZFS_SCRATCH;
 	stack = stackbase;
@@ -1203,12 +1227,22 @@
 
 			VERIFY_OS_TYPE(osp, DMU_OST_META);
 
+			if (check_pool_label(label, stack, tmp_devid, tmp_bootpath))
+				return (0);
+
+			if (find_best_root &&
+			    vdev_uberblock_compare(&ubbest->ubp_uberblock,
+			    &(current_uberblock)) <= 0)
+				continue;
 			/* Got the MOS. Save it at the memory addr MOS. */
 			grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE);
-
-			if (check_pool_label(label, stack))
-				return (0);
-
+			grub_memmove(&current_uberblock,
+			    &ubbest->ubp_uberblock,
+			    sizeof(uberblock_t));
+			grub_memmove(current_bootpath, tmp_bootpath,
+			    MAXNAMELEN);
+			grub_memmove(current_devid, tmp_devid,
+			    grub_strlen(tmp_devid));
 			is_zfs_mount = 1;
 			return (1);
 		}
@@ -1264,7 +1298,7 @@
 		} else {
 			if (errnum = get_objset_mdn(MOS, current_bootfs,
 			    &current_bootfs_obj, mdn, stack)) {
-				memset(current_bootfs, 0, MAXNAMELEN);
+				grub_memset(current_bootfs, 0, MAXNAMELEN);
 				return (0);
 			}
 		}
--- a/usr/src/grub/grub-0.95/stage2/shared.h	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/grub/grub-0.95/stage2/shared.h	Mon Jul 21 15:39:58 2008 -0700
@@ -666,7 +666,11 @@
 extern char current_bootfs[MAXNAMELEN];
 extern char current_bootpath[MAXNAMELEN];
 extern unsigned long long current_bootfs_obj;
+extern char current_devid[MAXNAMELEN];
 extern int is_zfs_mount;
+extern unsigned long best_drive;
+extern unsigned long best_part;
+extern int find_best_root;
 
 extern int fsys_type;
 
--- a/usr/src/uts/common/fs/zfs/spa.c	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/spa.c	Mon Jul 21 15:39:58 2008 -0700
@@ -2206,27 +2206,24 @@
  * Get the root pool information from the root disk, then import the root pool
  * during the system boot up time.
  */
-extern nvlist_t *vdev_disk_read_rootlabel(char *);
-
-void
-spa_check_rootconf(char *devpath, char **bestdev, nvlist_t **bestconf,
+extern nvlist_t *vdev_disk_read_rootlabel(char *, char *);
+
+int
+spa_check_rootconf(char *devpath, char *devid, nvlist_t **bestconf,
     uint64_t *besttxg)
 {
 	nvlist_t *config;
 	uint64_t txg;
 
-	if ((config = vdev_disk_read_rootlabel(devpath)) == NULL)
-		return;
+	if ((config = vdev_disk_read_rootlabel(devpath, devid)) == NULL)
+		return (-1);
 
 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
 
-	if (txg > *besttxg) {
-		*besttxg = txg;
-		if (*bestconf != NULL)
-			nvlist_free(*bestconf);
+	if (bestconf != NULL)
 		*bestconf = config;
-		*bestdev = devpath;
-	}
+	*besttxg = txg;
+	return (0);
 }
 
 boolean_t
@@ -2236,20 +2233,99 @@
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
-	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED, &ival) == 0 ||
 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
 		return (B_FALSE);
 
 	return (B_TRUE);
 }
 
+
+/*
+ * Given the boot device's physical path or devid, check if the device
+ * is in a valid state.  If so, return the configuration from the vdev
+ * label.
+ */
+int
+spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf)
+{
+	nvlist_t *conf = NULL;
+	uint64_t txg = 0;
+	nvlist_t *nvtop, **child;
+	char *type;
+	char *bootpath = NULL;
+	uint_t children, c;
+	char *tmp;
+
+	if (devpath && ((tmp = strchr(devpath, ' ')) != NULL))
+		*tmp = '\0';
+	if (spa_check_rootconf(devpath, devid, &conf, &txg) < 0) {
+		cmn_err(CE_NOTE, "error reading device label");
+		nvlist_free(conf);
+		return (EINVAL);
+	}
+	if (txg == 0) {
+		cmn_err(CE_NOTE, "this device is detached");
+		nvlist_free(conf);
+		return (EINVAL);
+	}
+
+	VERIFY(nvlist_lookup_nvlist(conf, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvtop) == 0);
+	VERIFY(nvlist_lookup_string(nvtop, ZPOOL_CONFIG_TYPE, &type) == 0);
+
+	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
+		if (spa_rootdev_validate(nvtop)) {
+			goto out;
+		} else {
+			nvlist_free(conf);
+			return (EINVAL);
+		}
+	}
+
+	ASSERT(strcmp(type, VDEV_TYPE_MIRROR) == 0);
+
+	VERIFY(nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0);
+
+	/*
+	 * Go thru vdevs in the mirror to see if the given device
+	 * has the most recent txg. Only the device with the most
+	 * recent txg has valid information and should be booted.
+	 */
+	for (c = 0; c < children; c++) {
+		char *cdevid, *cpath;
+		uint64_t tmptxg;
+
+		if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_PHYS_PATH,
+		    &cpath) != 0)
+			return (EINVAL);
+		if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_DEVID,
+		    &cdevid) != 0)
+			return (EINVAL);
+		if ((spa_check_rootconf(cpath, cdevid, NULL,
+		    &tmptxg) == 0) && (tmptxg > txg)) {
+			txg = tmptxg;
+			VERIFY(nvlist_lookup_string(child[c],
+			    ZPOOL_CONFIG_PATH, &bootpath) == 0);
+		}
+	}
+
+	/* Does the best device match the one we've booted from? */
+	if (bootpath) {
+		cmn_err(CE_NOTE, "try booting from '%s'", bootpath);
+		return (EINVAL);
+	}
+out:
+	*bestconf = conf;
+	return (0);
+}
+
 /*
  * Import a root pool.
  *
- * For x86. devpath_list will consist the physpath name of the vdev in a single
- * disk root pool or a list of physnames for the vdevs in a mirrored rootpool.
- * e.g.
- *	"/pci@1f,0/ide@d/disk@0,0:a /pci@1f,o/ide@d/disk@2,0:a"
+ * For x86. devpath_list will consist of devid and/or physpath name of
+ * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a").
+ * The GRUB "findroot" command will return the vdev we should boot.
  *
  * For Sparc, devpath_list consists the physpath name of the booting device
  * no matter the rootpool is a single device pool or a mirrored pool.
@@ -2257,10 +2333,9 @@
  *	"/pci@1f,0/ide@d/disk@0,0:a"
  */
 int
-spa_import_rootpool(char *devpath_list)
+spa_import_rootpool(char *devpath, char *devid)
 {
 	nvlist_t *conf = NULL;
-	char *dev = NULL;
 	char *pname;
 	int error;
 
@@ -2268,7 +2343,7 @@
 	 * Get the vdev pathname and configuation from the most
 	 * recently updated vdev (highest txg).
 	 */
-	if (error = spa_get_rootconf(devpath_list, &dev, &conf))
+	if (error = spa_get_rootconf(devpath, devid, &conf))
 		goto msg_out;
 
 	/*
@@ -2292,12 +2367,12 @@
 	return (error);
 
 msg_out:
-	cmn_err(CE_NOTE, "\n\n"
+	cmn_err(CE_NOTE, "\n"
 	    "  ***************************************************  \n"
 	    "  *  This device is not bootable!                   *  \n"
 	    "  *  It is either offlined or detached or faulted.  *  \n"
 	    "  *  Please try to boot from a different device.    *  \n"
-	    "  ***************************************************  \n\n");
+	    "  ***************************************************  ");
 
 	return (error);
 }
--- a/usr/src/uts/common/fs/zfs/sys/spa.h	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h	Mon Jul 21 15:39:58 2008 -0700
@@ -326,10 +326,10 @@
     char *altroot, size_t buflen);
 extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
     const char *history_str);
-extern void spa_check_rootconf(char *devpath, char **the_dev_p,
-    nvlist_t **the_conf_p, uint64_t *the_txg_p);
+extern int spa_check_rootconf(char *devpath, char *devid,
+    nvlist_t **bestconf, uint64_t *besttxg);
 extern boolean_t spa_rootdev_validate(nvlist_t *nv);
-extern int spa_import_rootpool(char *devpath);
+extern int spa_import_rootpool(char *devpath, char *devid);
 extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
 extern int spa_import_faulted(const char *, nvlist_t *, nvlist_t *);
 extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
--- a/usr/src/uts/common/fs/zfs/sys/spa_boot.h	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/spa_boot.h	Mon Jul 21 15:39:58 2008 -0700
@@ -34,10 +34,9 @@
 extern "C" {
 #endif
 
-extern char *spa_get_bootfs();
-extern void spa_free_bootfs(char *bootfs);
-extern int spa_get_rootconf(char *devpath, char **bestdev_p,
-    nvlist_t **bestconf_p);
+extern char *spa_get_bootprop(char *prop);
+extern void spa_free_bootprop(char *prop);
+extern int spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf_p);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c	Mon Jul 21 15:39:58 2008 -0700
@@ -599,22 +599,34 @@
 };
 
 /*
- * Given the root disk device pathname, read the label from the device,
- * and construct a configuration nvlist.
+ * Given the root disk device devid or pathname, read the label from
+ * the device, and construct a configuration nvlist.
  */
 nvlist_t *
-vdev_disk_read_rootlabel(char *devpath)
+vdev_disk_read_rootlabel(char *devpath, char *devid)
 {
 	nvlist_t *config = NULL;
 	ldi_handle_t vd_lh;
 	vdev_label_t *label;
 	uint64_t s, size;
 	int l;
+	ddi_devid_t tmpdevid;
+	int error = -1;
+	char *minor_name;
 
 	/*
 	 * Read the device label and build the nvlist.
 	 */
-	if (ldi_open_by_name(devpath, FREAD, kcred, &vd_lh, zfs_li))
+	if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid,
+	    &minor_name) == 0) {
+		error = ldi_open_by_devid(tmpdevid, minor_name,
+		    spa_mode, kcred, &vd_lh, zfs_li);
+		ddi_devid_free(tmpdevid);
+		ddi_devid_str_free(minor_name);
+	}
+
+	if (error && ldi_open_by_name(devpath, FREAD, kcred, &vd_lh,
+	    zfs_li))
 		return (NULL);
 
 	if (ldi_get_size(vd_lh, &s)) {
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Mon Jul 21 15:39:58 2008 -0700
@@ -866,6 +866,7 @@
 	znode_t *zp = NULL;
 	vnode_t *vp = NULL;
 	char *zfs_bootfs;
+	char *zfs_devid;
 
 	ASSERT(vfsp);
 
@@ -885,40 +886,42 @@
 		 */
 		clkset(-1);
 
-		if ((zfs_bootfs = spa_get_bootfs()) == NULL) {
-			cmn_err(CE_NOTE, "\nspa_get_bootfs: can not get "
-			    "bootfs name \n");
+		if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
+			cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
+			    "bootfs name");
 			return (EINVAL);
 		}
-
-		if (error = spa_import_rootpool(rootfs.bo_name)) {
-			spa_free_bootfs(zfs_bootfs);
-			cmn_err(CE_NOTE, "\nspa_import_rootpool: error %d\n",
+		zfs_devid = spa_get_bootprop("diskdevid");
+		error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
+		if (zfs_devid)
+			spa_free_bootprop(zfs_devid);
+		if (error) {
+			spa_free_bootprop(zfs_bootfs);
+			cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
+			    error);
+			return (error);
+		}
+		if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
+			spa_free_bootprop(zfs_bootfs);
+			cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
 			    error);
 			return (error);
 		}
 
-		if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
-			spa_free_bootfs(zfs_bootfs);
-			cmn_err(CE_NOTE, "\nzfs_parse_bootfs: error %d\n",
-			    error);
-			return (error);
-		}
-
-		spa_free_bootfs(zfs_bootfs);
+		spa_free_bootprop(zfs_bootfs);
 
 		if (error = vfs_lock(vfsp))
 			return (error);
 
 		if (error = zfs_domount(vfsp, rootfs.bo_name)) {
-			cmn_err(CE_NOTE, "\nzfs_domount: error %d\n", error);
+			cmn_err(CE_NOTE, "zfs_domount: error %d", error);
 			goto out;
 		}
 
 		zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
 		ASSERT(zfsvfs);
 		if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
-			cmn_err(CE_NOTE, "\nzfs_zget: error %d\n", error);
+			cmn_err(CE_NOTE, "zfs_zget: error %d", error);
 			goto out;
 		}
 
--- a/usr/src/uts/intel/zfs/spa_boot.c	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/uts/intel/zfs/spa_boot.c	Mon Jul 21 15:39:58 2008 -0700
@@ -30,169 +30,18 @@
 #include <sys/sunddi.h>
 
 char *
-spa_get_bootfs()
+spa_get_bootprop(char *propname)
 {
-	char *zfs_bp;
+	char *value;
 
 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
-	    DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bp) !=
-	    DDI_SUCCESS)
+	    DDI_PROP_DONTPASS, propname, &value) != DDI_SUCCESS)
 		return (NULL);
-	return (zfs_bp);
+	return (value);
 }
 
 void
-spa_free_bootfs(char *bootfs)
-{
-	ddi_prop_free(bootfs);
-}
-
-/*
- * Calculate how many device pathnames are in devpath_list.
- * The devpath_list could look like this:
- *
- *	"/pci@1f,0/ide@d/disk@0,0:a /pci@1f,o/ide@d/disk@2,0:a"
- */
-static int
-spa_count_devpath(char *devpath_list)
-{
-	int numpath;
-	char *tmp_path, *blank;
-
-	numpath = 0;
-	tmp_path = devpath_list;
-
-	/* skip leading blanks */
-	while (*tmp_path == ' ')
-		tmp_path++;
-
-	while ((blank = strchr(tmp_path, ' ')) != NULL) {
-
-		numpath++;
-		/* skip contiguous blanks */
-		while (*blank == ' ')
-			blank++;
-		tmp_path = blank;
-	}
-
-	if (strlen(tmp_path) > 0)
-		numpath++;
-
-	return (numpath);
-}
-
-/*
- * Only allow booting the device if it has the same vdev information as
- * the most recently updated vdev (highest txg) and is in a valid state.
- *
- * GRUB passes online/active device path names, e.g.
- *	"/pci@1f,0/ide@d/disk@0,0:a /pci@1f,o/ide@d/disk@2,0:a"
- * to the kernel. The best vdev should have the same matching online/active
- * list as what GRUB passes in.
- */
-static int
-spa_check_devstate(char *devpath_list, char *dev, nvlist_t *conf)
+spa_free_bootprop(char *value)
 {
-	nvlist_t *nvtop, **child;
-	uint_t label_path, grub_path, c, children;
-	char *type;
-
-	VERIFY(nvlist_lookup_nvlist(conf, ZPOOL_CONFIG_VDEV_TREE,
-	    &nvtop) == 0);
-	VERIFY(nvlist_lookup_string(nvtop, ZPOOL_CONFIG_TYPE, &type) == 0);
-
-	if (strcmp(type, VDEV_TYPE_DISK) == 0)
-		return (spa_rootdev_validate(nvtop)? 0 : EINVAL);
-
-	ASSERT(strcmp(type, VDEV_TYPE_MIRROR) == 0);
-
-	VERIFY(nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN,
-	    &child, &children) == 0);
-
-	/*
-	 * Check if the devpath_list is the same as the path list in conf.
-	 * If these two lists are different, then the booting device is not an
-	 * up-to-date device that can be booted.
-	 */
-	label_path = 0;
-	for (c = 0; c < children; c++) {
-		char *physpath;
-
-		if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_PHYS_PATH,
-		    &physpath) != 0)
-			return (EINVAL);
-
-		if (spa_rootdev_validate(child[c])) {
-			if (strstr(devpath_list, physpath) == NULL)
-				return (EINVAL);
-			label_path++;
-		} else {
-			char *blank;
-
-			if (blank = strchr(dev, ' '))
-				*blank = '\0';
-			if (strcmp(physpath, dev) == 0)
-				return (EINVAL);
-			if (blank)
-				*blank = ' ';
-		}
-	}
-
-	grub_path = spa_count_devpath(devpath_list);
-
-	if (label_path != grub_path)
-		return (EINVAL);
-
-	return (0);
+	ddi_prop_free(value);
 }
-
-/*
- * Given a list of vdev physpath names, pick the vdev with the most recent txg,
- * and return the point of the device's physpath in the list and the device's
- * label configuration. The content of the label would be the most recent
- * updated information.
- */
-int
-spa_get_rootconf(char *devpath_list, char **bestdev, nvlist_t **bestconf)
-{
-	nvlist_t *conf = NULL;
-	char *dev = NULL;
-	uint64_t txg = 0;
-	char *devpath, *blank;
-
-	devpath = devpath_list;
-	dev = devpath;
-
-	while (devpath[0] == ' ')
-		devpath++;
-
-	while ((blank = strchr(devpath, ' ')) != NULL) {
-		*blank = '\0';
-		spa_check_rootconf(devpath, &dev, &conf, &txg);
-		*blank = ' ';
-
-		while (*blank == ' ')
-			blank++;
-		devpath = blank;
-	}
-
-	/* for the only or the last devpath in the devpath_list */
-	if (strlen(devpath) > 0)
-		spa_check_rootconf(devpath, &dev, &conf, &txg);
-
-	if (conf == NULL)
-		return (EINVAL);
-
-	/*
-	 * dev/conf is the vdev with the most recent txg.
-	 * Check if the device is in a bootable state.
-	 * dev may have a trailing blank since it points to a string
-	 * in the devpath_list.
-	 */
-	if (spa_check_devstate(devpath_list, dev, conf) != 0)
-		return (EINVAL);
-
-	*bestdev = dev;
-	*bestconf = conf;
-	return (0);
-}
--- a/usr/src/uts/sparc/zfs/spa_boot.c	Mon Jul 21 15:30:15 2008 -0700
+++ b/usr/src/uts/sparc/zfs/spa_boot.c	Mon Jul 21 15:39:58 2008 -0700
@@ -30,94 +30,26 @@
 #include <sys/bootconf.h>
 
 char *
-spa_get_bootfs()
+spa_get_bootprop(char *propname)
 {
 	int proplen;
-	char *zfs_bp;
+	char *value;
 
-	proplen = BOP_GETPROPLEN(bootops, "zfs-bootfs");
-	if (proplen == 0)
+	proplen = BOP_GETPROPLEN(bootops, propname);
+	if (proplen <= 0)
 		return (NULL);
 
-	zfs_bp = kmem_zalloc(proplen, KM_SLEEP);
-	if (BOP_GETPROP(bootops, "zfs-bootfs", zfs_bp) == -1) {
-		kmem_free(zfs_bp, proplen);
+	value = kmem_zalloc(proplen, KM_SLEEP);
+	if (BOP_GETPROP(bootops, propname, value) == -1) {
+		kmem_free(value, proplen);
 		return (NULL);
 	}
 
-	return (zfs_bp);
+	return (value);
 }
 
 void
-spa_free_bootfs(char *bootfs)
-{
-	kmem_free(bootfs, strlen(bootfs) + 1);
-}
-
-/*
- * Given the boot device physpath, check if the device is in a valid state.
- * If so, return the configuration from the vdev label.
- */
-int
-spa_get_rootconf(char *devpath, char **bestdev, nvlist_t **bestconf)
+spa_free_bootprop(char *propname)
 {
-	nvlist_t *conf = NULL;
-	char *dev = NULL;
-	uint64_t txg = 0;
-	nvlist_t *nvtop, **child;
-	char *type;
-	uint_t children, c;
-
-	spa_check_rootconf(devpath, &dev, &conf, &txg);
-	if (txg == 0 || conf == NULL)
-		return (EINVAL);
-
-	VERIFY(nvlist_lookup_nvlist(conf, ZPOOL_CONFIG_VDEV_TREE,
-	    &nvtop) == 0);
-	VERIFY(nvlist_lookup_string(nvtop, ZPOOL_CONFIG_TYPE, &type) == 0);
-
-	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
-		if (spa_rootdev_validate(nvtop))
-			goto out;
-		else
-			return (EINVAL);
-	}
-
-	ASSERT(strcmp(type, VDEV_TYPE_MIRROR) == 0);
-
-	VERIFY(nvlist_lookup_nvlist_array(nvtop, ZPOOL_CONFIG_CHILDREN,
-	    &child, &children) == 0);
-
-	/*
-	 * Go thru vdevs in the mirror to see if the given device (devpath)
-	 * is in a healthy state. Also check if the given device has the most
-	 * recent txg. Only the device with the most recent txg has valid
-	 * information and can be booted.
-	 */
-	for (c = 0; c < children; c++) {
-		char *physpath;
-
-		if (nvlist_lookup_string(child[c], ZPOOL_CONFIG_PHYS_PATH,
-		    &physpath) != 0)
-			return (EINVAL);
-
-		if (strcmp(devpath, physpath) == 0) {
-			if (!spa_rootdev_validate(child[c]))
-				return (EINVAL);
-		} else {
-			/* get dev with the highest txg */
-			if (spa_rootdev_validate(child[c])) {
-				spa_check_rootconf(physpath, &dev,
-				    &conf, &txg);
-			}
-		}
-	}
-
-	/* Does the given device have the most recent txg? */
-	if (strcmp(devpath, dev) != 0)
-		return (EINVAL);
-out:
-	*bestdev = dev;
-	*bestconf = conf;
-	return (0);
+	kmem_free(propname, strlen(propname) + 1);
 }