usr/src/grub/grub-0.97/stage2/fsys_zfs.c
changeset 13700 2889e2596bd6
parent 13421 a4721c685d96
child 13921 9d721847e469
equal deleted inserted replaced
13699:733714f4dc24 13700:2889e2596bd6
    14  *
    14  *
    15  *  You should have received a copy of the GNU General Public License
    15  *  You should have received a copy of the GNU General Public License
    16  *  along with this program; if not, write to the Free Software
    16  *  along with this program; if not, write to the Free Software
    17  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    17  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    18  */
    18  */
       
    19 
    19 /*
    20 /*
    20  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
    21  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
    21  * Use is subject to license terms.
    22  * Use is subject to license terms.
       
    23  */
       
    24 
       
    25 /*
       
    26  * Copyright (c) 2012 by Delphix. All rights reserved.
    22  */
    27  */
    23 
    28 
    24 /*
    29 /*
    25  * The zfs plug-in routines for GRUB are:
    30  * The zfs plug-in routines for GRUB are:
    26  *
    31  *
   116 	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
   121 	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
   117 }
   122 }
   118 
   123 
   119 /* Checksum Table and Values */
   124 /* Checksum Table and Values */
   120 zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
   125 zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
   121 	NULL,			NULL,			0, 0,	"inherit",
   126 	{{NULL,			NULL},			0, 0,	"inherit"},
   122 	NULL,			NULL,			0, 0,	"on",
   127 	{{NULL,			NULL},			0, 0,	"on"},
   123 	zio_checksum_off,	zio_checksum_off,	0, 0,	"off",
   128 	{{zio_checksum_off,	zio_checksum_off},	0, 0,	"off"},
   124 	zio_checksum_SHA256,	zio_checksum_SHA256,	1, 1,	"label",
   129 	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 1,	"label"},
   125 	zio_checksum_SHA256,	zio_checksum_SHA256,	1, 1,	"gang_header",
   130 	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 1,	"gang_header"},
   126 	NULL,			NULL,			0, 0,	"zilog",
   131 	{{NULL,			NULL},			0, 0,	"zilog"},
   127 	fletcher_2_native,	fletcher_2_byteswap,	0, 0,	"fletcher2",
   132 	{{fletcher_2_native,	fletcher_2_byteswap},	0, 0,	"fletcher2"},
   128 	fletcher_4_native,	fletcher_4_byteswap,	1, 0,	"fletcher4",
   133 	{{fletcher_4_native,	fletcher_4_byteswap},	1, 0,	"fletcher4"},
   129 	zio_checksum_SHA256,	zio_checksum_SHA256,	1, 0,	"SHA256",
   134 	{{zio_checksum_SHA256,	zio_checksum_SHA256},	1, 0,	"SHA256"},
   130 	NULL,			NULL,			0, 0,	"zilog2",
   135 	{{NULL,			NULL},			0, 0,	"zilog2"},
   131 };
   136 };
   132 
   137 
   133 /*
   138 /*
   134  * zio_checksum_verify: Provides support for checksum verification.
   139  * zio_checksum_verify: Provides support for checksum verification.
   135  *
   140  *
   215 
   220 
   216 /*
   221 /*
   217  * Three pieces of information are needed to verify an uberblock: the magic
   222  * Three pieces of information are needed to verify an uberblock: the magic
   218  * number, the version number, and the checksum.
   223  * number, the version number, and the checksum.
   219  *
   224  *
   220  * Currently Implemented: version number, magic number
       
   221  * Need to Implement: checksum
       
   222  *
       
   223  * Return:
   225  * Return:
   224  *     0 - Success
   226  *     0 - Success
   225  *    -1 - Failure
   227  *    -1 - Failure
   226  */
   228  */
   227 static int
   229 static int
   236 
   238 
   237 	if (zio_checksum_verify(&bp, (char *)uber, ub_size) != 0)
   239 	if (zio_checksum_verify(&bp, (char *)uber, ub_size) != 0)
   238 		return (-1);
   240 		return (-1);
   239 
   241 
   240 	if (uber->ub_magic == UBERBLOCK_MAGIC &&
   242 	if (uber->ub_magic == UBERBLOCK_MAGIC &&
   241 	    uber->ub_version > 0 && uber->ub_version <= SPA_VERSION)
   243 	    SPA_VERSION_IS_SUPPORTED(uber->ub_version))
   242 		return (0);
   244 		return (0);
   243 
   245 
   244 	return (-1);
   246 	return (-1);
   245 }
   247 }
   246 
   248 
   294 	int i;
   296 	int i;
   295 
   297 
   296 	zio_gb = (zio_gbh_phys_t *)stack;
   298 	zio_gb = (zio_gbh_phys_t *)stack;
   297 	stack += SPA_GANGBLOCKSIZE;
   299 	stack += SPA_GANGBLOCKSIZE;
   298 	offset = DVA_GET_OFFSET(dva);
   300 	offset = DVA_GET_OFFSET(dva);
   299 	sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
   301 	sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
   300 
   302 
   301 	/* read in the gang block header */
   303 	/* read in the gang block header */
   302 	if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) {
   304 	if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) {
   303 		grub_printf("failed to read in a gang block header\n");
   305 		grub_printf("failed to read in a gang block header\n");
   304 		return (1);
   306 		return (1);
   353 			if (zio_read_gang(bp, &bp->blk_dva[i], buf, stack) == 0)
   355 			if (zio_read_gang(bp, &bp->blk_dva[i], buf, stack) == 0)
   354 				return (0);
   356 				return (0);
   355 		} else {
   357 		} else {
   356 			/* read in a data block */
   358 			/* read in a data block */
   357 			offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
   359 			offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
   358 			sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
   360 			sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
   359 			if (devread(sector, 0, psize, buf))
   361 			if (devread(sector, 0, psize, buf) != 0)
   360 				return (0);
   362 				return (0);
   361 		}
   363 		}
   362 	}
   364 	}
   363 
   365 
   364 	return (1);
   366 	return (1);
   398 	if (comp != ZIO_COMPRESS_OFF) {
   400 	if (comp != ZIO_COMPRESS_OFF) {
   399 		buf = stack;
   401 		buf = stack;
   400 		stack += psize;
   402 		stack += psize;
   401 	}
   403 	}
   402 
   404 
   403 	if (zio_read_data(bp, buf, stack)) {
   405 	if (zio_read_data(bp, buf, stack) != 0) {
   404 		grub_printf("zio_read_data failed\n");
   406 		grub_printf("zio_read_data failed\n");
   405 		return (ERR_FSYS_CORRUPT);
   407 		return (ERR_FSYS_CORRUPT);
   406 	}
   408 	}
   407 
   409 
   408 	if (zio_checksum_verify(bp, buf, psize) != 0) {
   410 	if (zio_checksum_verify(bp, buf, psize) != 0) {
   464  * Return:
   466  * Return:
   465  *	0 - success
   467  *	0 - success
   466  *	errnum - failure
   468  *	errnum - failure
   467  */
   469  */
   468 static int
   470 static int
   469 mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name,
   471 mzap_lookup(mzap_phys_t *zapobj, int objsize, const char *name,
   470 	uint64_t *value)
   472 	uint64_t *value)
   471 {
   473 {
   472 	int i, chunks;
   474 	int i, chunks;
   473 	mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
   475 	mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
   474 
   476 
   475 	chunks = objsize/MZAP_ENT_LEN - 1;
   477 	chunks = objsize / MZAP_ENT_LEN - 1;
   476 	for (i = 0; i < chunks; i++) {
   478 	for (i = 0; i < chunks; i++) {
   477 		if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) {
   479 		if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) {
   478 			*value = mzap_ent[i].mze_value;
   480 			*value = mzap_ent[i].mze_value;
   479 			return (0);
   481 			return (0);
   480 		}
   482 		}
   510 		crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF];
   512 		crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF];
   511 
   513 
   512 	/*
   514 	/*
   513 	 * Only use 28 bits, since we need 4 bits in the cookie for the
   515 	 * Only use 28 bits, since we need 4 bits in the cookie for the
   514 	 * collision differentiator.  We MUST use the high bits, since
   516 	 * collision differentiator.  We MUST use the high bits, since
   515 	 * those are the onces that we first pay attention to when
   517 	 * those are the ones that we first pay attention to when
   516 	 * chosing the bucket.
   518 	 * choosing the bucket.
   517 	 */
   519 	 */
   518 	crc &= ~((1ULL << (64 - 28)) - 1);
   520 	crc &= ~((1ULL << (64 - 28)) - 1);
   519 
   521 
   520 	return (crc);
   522 	return (crc);
   521 }
   523 }
   616  *	0 - success
   618  *	0 - success
   617  *	errnum - failure
   619  *	errnum - failure
   618  */
   620  */
   619 static int
   621 static int
   620 fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap,
   622 fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap,
   621     char *name, uint64_t *value, char *stack)
   623     const char *name, uint64_t *value, char *stack)
   622 {
   624 {
   623 	zap_leaf_phys_t *l;
   625 	zap_leaf_phys_t *l;
   624 	uint64_t hash, idx, blkid;
   626 	uint64_t hash, idx, blkid;
   625 	int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT);
   627 	int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT);
   626 
   628 
   659  * Return:
   661  * Return:
   660  *	0 - success
   662  *	0 - success
   661  *	errnum - failure
   663  *	errnum - failure
   662  */
   664  */
   663 static int
   665 static int
   664 zap_lookup(dnode_phys_t *zap_dnode, char *name, uint64_t *val, char *stack)
   666 zap_lookup(dnode_phys_t *zap_dnode, const char *name, uint64_t *val,
       
   667     char *stack)
   665 {
   668 {
   666 	uint64_t block_type;
   669 	uint64_t block_type;
   667 	int size;
   670 	int size;
   668 	void *zapbuf;
   671 	void *zapbuf;
   669 
   672 
   670 	/* Read in the first block of the zap object data. */
   673 	/* Read in the first block of the zap object data. */
   671 	zapbuf = stack;
   674 	zapbuf = stack;
   672 	size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
   675 	size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
   673 	stack += size;
   676 	stack += size;
   674 
   677 
   675 	if (errnum = dmu_read(zap_dnode, 0, zapbuf, stack))
   678 	if ((errnum = dmu_read(zap_dnode, 0, zapbuf, stack)) != 0)
   676 		return (errnum);
   679 		return (errnum);
   677 
   680 
   678 	block_type = *((uint64_t *)zapbuf);
   681 	block_type = *((uint64_t *)zapbuf);
   679 
   682 
   680 	if (block_type == ZBT_MICRO) {
   683 	if (block_type == ZBT_MICRO) {
   684 		return (fzap_lookup(zap_dnode, zapbuf, name,
   687 		return (fzap_lookup(zap_dnode, zapbuf, name,
   685 		    val, stack));
   688 		    val, stack));
   686 	}
   689 	}
   687 
   690 
   688 	return (ERR_FSYS_CORRUPT);
   691 	return (ERR_FSYS_CORRUPT);
       
   692 }
       
   693 
       
   694 typedef struct zap_attribute {
       
   695 	int za_integer_length;
       
   696 	uint64_t za_num_integers;
       
   697 	uint64_t za_first_integer;
       
   698 	char *za_name;
       
   699 } zap_attribute_t;
       
   700 
       
   701 typedef int (zap_cb_t)(zap_attribute_t *za, void *arg, char *stack);
       
   702 
       
   703 static int
       
   704 zap_iterate(dnode_phys_t *zap_dnode, zap_cb_t *cb, void *arg, char *stack)
       
   705 {
       
   706 	uint32_t size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
       
   707 	zap_attribute_t za;
       
   708 	int i;
       
   709 	mzap_phys_t *mzp = (mzap_phys_t *)stack;
       
   710 	stack += size;
       
   711 
       
   712 	if ((errnum = dmu_read(zap_dnode, 0, mzp, stack)) != 0)
       
   713 		return (errnum);
       
   714 
       
   715 	/*
       
   716 	 * Iteration over fatzap objects has not yet been implemented.
       
   717 	 * If we encounter a pool in which there are more features for
       
   718 	 * read than can fit inside a microzap (i.e., more than 2048
       
   719 	 * features for read), we can add support for fatzap iteration.
       
   720 	 * For now, fail.
       
   721 	 */
       
   722 	if (mzp->mz_block_type != ZBT_MICRO) {
       
   723 		grub_printf("feature information stored in fatzap, pool "
       
   724 		    "version not supported\n");
       
   725 		return (1);
       
   726 	}
       
   727 
       
   728 	za.za_integer_length = 8;
       
   729 	za.za_num_integers = 1;
       
   730 	for (i = 0; i < size / MZAP_ENT_LEN - 1; i++) {
       
   731 		mzap_ent_phys_t *mzep = &mzp->mz_chunk[i];
       
   732 		int err;
       
   733 
       
   734 		za.za_first_integer = mzep->mze_value;
       
   735 		za.za_name = mzep->mze_name;
       
   736 		err = cb(&za, arg, stack);
       
   737 		if (err != 0)
       
   738 			return (err);
       
   739 	}
       
   740 
       
   741 	return (0);
   689 }
   742 }
   690 
   743 
   691 /*
   744 /*
   692  * Get the dnode of an object number from the metadnode of an object set.
   745  * Get the dnode of an object number from the metadnode of an object set.
   693  *
   746  *
   765 		return (1);
   818 		return (1);
   766 
   819 
   767 	return (0);
   820 	return (0);
   768 }
   821 }
   769 
   822 
       
   823 static int
       
   824 check_feature(zap_attribute_t *za, void *arg, char *stack)
       
   825 {
       
   826 	const char **names = arg;
       
   827 	int i;
       
   828 
       
   829 	if (za->za_first_integer == 0)
       
   830 		return (0);
       
   831 
       
   832 	for (i = 0; names[i] != NULL; i++) {
       
   833 		if (grub_strcmp(za->za_name, names[i]) == 0) {
       
   834 			return (0);
       
   835 		}
       
   836 	}
       
   837 	grub_printf("missing feature for read '%s'\n", za->za_name);
       
   838 	return (ERR_NEWER_VERSION);
       
   839 }
       
   840 
   770 /*
   841 /*
   771  * Get the file dnode for a given file name where mdn is the meta dnode
   842  * Get the file dnode for a given file name where mdn is the meta dnode
   772  * for this ZFS object set. When found, place the file dnode in dn.
   843  * for this ZFS object set. When found, place the file dnode in dn.
   773  * The 'path' argument will be mangled.
   844  * The 'path' argument will be mangled.
   774  *
   845  *
   801 
   872 
   802 	/* skip leading slashes */
   873 	/* skip leading slashes */
   803 	while (*path == '/')
   874 	while (*path == '/')
   804 		path++;
   875 		path++;
   805 
   876 
   806 	while (*path && !isspace(*path)) {
   877 	while (*path && !grub_isspace(*path)) {
   807 
   878 
   808 		/* get the next component name */
   879 		/* get the next component name */
   809 		cname = path;
   880 		cname = path;
   810 		while (*path && !isspace(*path) && *path != '/')
   881 		while (*path && !grub_isspace(*path) && *path != '/')
   811 			path++;
   882 			path++;
   812 		ch = *path;
   883 		ch = *path;
   813 		*path = 0;   /* ensure null termination */
   884 		*path = 0;   /* ensure null termination */
   814 
   885 
   815 		if (errnum = zap_lookup(dn, cname, &objnum, stack))
   886 		if (errnum = zap_lookup(dn, cname, &objnum, stack))
   867 	*obj = objnum;
   938 	*obj = objnum;
   868 	return (0);
   939 	return (0);
   869 }
   940 }
   870 
   941 
   871 /*
   942 /*
       
   943  * List of pool features that the grub implementation of ZFS supports for
       
   944  * read. Note that features that are only required for write do not need
       
   945  * to be listed here since grub opens pools in read-only mode.
       
   946  */
       
   947 static const char *spa_feature_names[] = {
       
   948 	NULL
       
   949 };
       
   950 
       
   951 /*
       
   952  * Checks whether the MOS features that are active are supported by this
       
   953  * (GRUB's) implementation of ZFS.
       
   954  *
       
   955  * Return:
       
   956  *	0: Success.
       
   957  *	errnum: Failure.
       
   958  */
       
   959 static int
       
   960 check_mos_features(dnode_phys_t *mosmdn, char *stack)
       
   961 {
       
   962 	uint64_t objnum;
       
   963 	dnode_phys_t *dn;
       
   964 	uint8_t error = 0;
       
   965 
       
   966 	dn = (dnode_phys_t *)stack;
       
   967 	stack += DNODE_SIZE;
       
   968 
       
   969 	if ((errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT,
       
   970 	    DMU_OT_OBJECT_DIRECTORY, dn, stack)) != 0)
       
   971 		return (errnum);
       
   972 
       
   973 	/*
       
   974 	 * Find the object number for 'features_for_read' and retrieve its
       
   975 	 * corresponding dnode. Note that we don't check features_for_write
       
   976 	 * because GRUB is not opening the pool for write.
       
   977 	 */
       
   978 	if ((errnum = zap_lookup(dn, DMU_POOL_FEATURES_FOR_READ, &objnum,
       
   979 	    stack)) != 0)
       
   980 		return (errnum);
       
   981 
       
   982 	if ((errnum = dnode_get(mosmdn, objnum, DMU_OTN_ZAP_METADATA,
       
   983 	    dn, stack)) != 0)
       
   984 		return (errnum);
       
   985 
       
   986 	return (zap_iterate(dn, check_feature, spa_feature_names, stack));
       
   987 }
       
   988 
       
   989 /*
   872  * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
   990  * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
   873  * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
   991  * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
   874  * of pool/rootfs.
   992  * of pool/rootfs.
   875  *
   993  *
   876  * If no fsname and no obj are given, return the DSL_DIR metadnode.
   994  * If no fsname and no obj are given, return the DSL_DIR metadnode.
   913 		    ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
  1031 		    ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
   914 		goto skip;
  1032 		goto skip;
   915 	}
  1033 	}
   916 
  1034 
   917 	/* take out the pool name */
  1035 	/* take out the pool name */
   918 	while (*fsname && !isspace(*fsname) && *fsname != '/')
  1036 	while (*fsname && !grub_isspace(*fsname) && *fsname != '/')
   919 		fsname++;
  1037 		fsname++;
   920 
  1038 
   921 	while (*fsname && !isspace(*fsname)) {
  1039 	while (*fsname && !grub_isspace(*fsname)) {
   922 		uint64_t childobj;
  1040 		uint64_t childobj;
   923 
  1041 
   924 		while (*fsname == '/')
  1042 		while (*fsname == '/')
   925 			fsname++;
  1043 			fsname++;
   926 
  1044 
   927 		cname = fsname;
  1045 		cname = fsname;
   928 		while (*fsname && !isspace(*fsname) && *fsname != '/')
  1046 		while (*fsname && !grub_isspace(*fsname) && *fsname != '/')
   929 			fsname++;
  1047 			fsname++;
   930 		ch = *fsname;
  1048 		ch = *fsname;
   931 		*fsname = 0;
  1049 		*fsname = 0;
   932 
  1050 
   933 		snapname = cname;
  1051 		snapname = cname;
   934 		while (*snapname && !isspace(*snapname) && *snapname != '@')
  1052 		while (*snapname && !grub_isspace(*snapname) && *snapname !=
       
  1053 		    '@')
   935 			snapname++;
  1054 			snapname++;
   936 		if (*snapname == '@') {
  1055 		if (*snapname == '@') {
   937 			issnapshot = 1;
  1056 			issnapshot = 1;
   938 			*snapname = 0;
  1057 			*snapname = 0;
   939 		}
  1058 		}
  1018 {
  1137 {
  1019 	/* Verify if the 1st and 2nd byte in the nvlist are valid. */
  1138 	/* Verify if the 1st and 2nd byte in the nvlist are valid. */
  1020 	if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN)
  1139 	if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN)
  1021 		return (1);
  1140 		return (1);
  1022 
  1141 
  1023 	nvlist += 4;
  1142 	*out = nvlist + 4;
  1024 	*out = nvlist;
       
  1025 	return (0);
  1143 	return (0);
  1026 }
  1144 }
  1027 
  1145 
  1028 static char *
  1146 static char *
  1029 nvlist_array(char *nvlist, int index)
  1147 nvlist_array(char *nvlist, int index)
  1041 	}
  1159 	}
  1042 
  1160 
  1043 	return (nvlist);
  1161 	return (nvlist);
  1044 }
  1162 }
  1045 
  1163 
       
  1164 /*
       
  1165  * The nvlist_next_nvpair() function returns a handle to the next nvpair in the
       
  1166  * list following nvpair. If nvpair is NULL, the first pair is returned. If
       
  1167  * nvpair is the last pair in the nvlist, NULL is returned.
       
  1168  */
       
  1169 static char *
       
  1170 nvlist_next_nvpair(char *nvl, char *nvpair)
       
  1171 {
       
  1172 	char *cur, *prev;
       
  1173 	int encode_size;
       
  1174 
       
  1175 	if (nvl == NULL)
       
  1176 		return (NULL);
       
  1177 
       
  1178 	if (nvpair == NULL) {
       
  1179 		/* skip over nvl_version and nvl_nvflag */
       
  1180 		nvpair = nvl + 4 * 2;
       
  1181 	} else {
       
  1182 		/* skip to the next nvpair */
       
  1183 		encode_size = BSWAP_32(*(uint32_t *)nvpair);
       
  1184 		nvpair += encode_size;
       
  1185 	}
       
  1186 
       
  1187 	/* 8 bytes of 0 marks the end of the list */
       
  1188 	if (*(uint64_t *)nvpair == 0)
       
  1189 		return (NULL);
       
  1190 
       
  1191 	return (nvpair);
       
  1192 }
       
  1193 
       
  1194 /*
       
  1195  * This function returns 0 on success and 1 on failure. On success, a string
       
  1196  * containing the name of nvpair is saved in buf.
       
  1197  */
       
  1198 static int
       
  1199 nvpair_name(char *nvp, char *buf, int buflen)
       
  1200 {
       
  1201 	int len;
       
  1202 
       
  1203 	/* skip over encode/decode size */
       
  1204 	nvp += 4 * 2;
       
  1205 
       
  1206 	len = BSWAP_32(*(uint32_t *)nvp);
       
  1207 	if (buflen < len + 1)
       
  1208 		return (1);
       
  1209 
       
  1210 	grub_memmove(buf, nvp + 4, len);
       
  1211 	buf[len] = '\0';
       
  1212 
       
  1213 	return (0);
       
  1214 }
       
  1215 
       
  1216 /*
       
  1217  * This function retrieves the value of the nvpair in the form of enumerated
       
  1218  * type data_type_t. This is used to determine the appropriate type to pass to
       
  1219  * nvpair_value().
       
  1220  */
       
  1221 static int
       
  1222 nvpair_type(char *nvp)
       
  1223 {
       
  1224 	int name_len, type;
       
  1225 
       
  1226 	/* skip over encode/decode size */
       
  1227 	nvp += 4 * 2;
       
  1228 
       
  1229 	/* skip over name_len */
       
  1230 	name_len = BSWAP_32(*(uint32_t *)nvp);
       
  1231 	nvp += 4;
       
  1232 
       
  1233 	/* skip over name */
       
  1234 	nvp = nvp + ((name_len + 3) & ~3); /* align */
       
  1235 
       
  1236 	type = BSWAP_32(*(uint32_t *)nvp);
       
  1237 
       
  1238 	return (type);
       
  1239 }
       
  1240 
       
  1241 static int
       
  1242 nvpair_value(char *nvp, void *val, int valtype, int *nelmp)
       
  1243 {
       
  1244 	int name_len, type, slen;
       
  1245 	char *strval = val;
       
  1246 	uint64_t *intval = val;
       
  1247 
       
  1248 	/* skip over encode/decode size */
       
  1249 	nvp += 4 * 2;
       
  1250 
       
  1251 	/* skip over name_len */
       
  1252 	name_len = BSWAP_32(*(uint32_t *)nvp);
       
  1253 	nvp += 4;
       
  1254 
       
  1255 	/* skip over name */
       
  1256 	nvp = nvp + ((name_len + 3) & ~3); /* align */
       
  1257 
       
  1258 	/* skip over type */
       
  1259 	type = BSWAP_32(*(uint32_t *)nvp);
       
  1260 	nvp += 4;
       
  1261 
       
  1262 	if (type == valtype) {
       
  1263 		int nelm;
       
  1264 
       
  1265 		nelm = BSWAP_32(*(uint32_t *)nvp);
       
  1266 		if (valtype != DATA_TYPE_BOOLEAN && nelm < 1)
       
  1267 			return (1);
       
  1268 		nvp += 4;
       
  1269 
       
  1270 		switch (valtype) {
       
  1271 		case DATA_TYPE_BOOLEAN:
       
  1272 			return (0);
       
  1273 
       
  1274 		case DATA_TYPE_STRING:
       
  1275 			slen = BSWAP_32(*(uint32_t *)nvp);
       
  1276 			nvp += 4;
       
  1277 			grub_memmove(strval, nvp, slen);
       
  1278 			strval[slen] = '\0';
       
  1279 			return (0);
       
  1280 
       
  1281 		case DATA_TYPE_UINT64:
       
  1282 			*intval = BSWAP_64(*(uint64_t *)nvp);
       
  1283 			return (0);
       
  1284 
       
  1285 		case DATA_TYPE_NVLIST:
       
  1286 			*(void **)val = (void *)nvp;
       
  1287 			return (0);
       
  1288 
       
  1289 		case DATA_TYPE_NVLIST_ARRAY:
       
  1290 			*(void **)val = (void *)nvp;
       
  1291 			if (nelmp)
       
  1292 				*nelmp = nelm;
       
  1293 			return (0);
       
  1294 		}
       
  1295 	}
       
  1296 
       
  1297 	return (1);
       
  1298 }
       
  1299 
  1046 static int
  1300 static int
  1047 nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype,
  1301 nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype,
  1048     int *nelmp)
  1302     int *nelmp)
  1049 {
  1303 {
  1050 	int name_len, type, slen, encode_size;
  1304 	char *nvpair;
  1051 	char *nvpair, *nvp_name, *strval = val;
  1305 
  1052 	uint64_t *intval = val;
  1306 	for (nvpair = nvlist_next_nvpair(nvlist, NULL);
  1053 
  1307 	    nvpair != NULL;
  1054 	/* skip the header, nvl_version, and nvl_nvflag */
  1308 	    nvpair = nvlist_next_nvpair(nvlist, nvpair)) {
  1055 	nvlist = nvlist + 4 * 2;
  1309 		int name_len = BSWAP_32(*(uint32_t *)(nvpair + 4 * 2));
  1056 
  1310 		char *nvp_name = nvpair + 4 * 3;
  1057 	/*
       
  1058 	 * Loop thru the nvpair list
       
  1059 	 * The XDR representation of an integer is in big-endian byte order.
       
  1060 	 */
       
  1061 	while (encode_size = BSWAP_32(*(uint32_t *)nvlist))  {
       
  1062 
       
  1063 		nvpair = nvlist + 4 * 2; /* skip the encode/decode size */
       
  1064 
       
  1065 		name_len = BSWAP_32(*(uint32_t *)nvpair);
       
  1066 		nvpair += 4;
       
  1067 
       
  1068 		nvp_name = nvpair;
       
  1069 		nvpair = nvpair + ((name_len + 3) & ~3); /* align */
       
  1070 
       
  1071 		type = BSWAP_32(*(uint32_t *)nvpair);
       
  1072 		nvpair += 4;
       
  1073 
  1311 
  1074 		if ((grub_strncmp(nvp_name, name, name_len) == 0) &&
  1312 		if ((grub_strncmp(nvp_name, name, name_len) == 0) &&
  1075 		    type == valtype) {
  1313 		    nvpair_type(nvpair) == valtype) {
  1076 			int nelm;
  1314 			return (nvpair_value(nvpair, val, valtype, nelmp));
  1077 
       
  1078 			if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1)
       
  1079 				return (1);
       
  1080 			nvpair += 4;
       
  1081 
       
  1082 			switch (valtype) {
       
  1083 			case DATA_TYPE_STRING:
       
  1084 				slen = BSWAP_32(*(uint32_t *)nvpair);
       
  1085 				nvpair += 4;
       
  1086 				grub_memmove(strval, nvpair, slen);
       
  1087 				strval[slen] = '\0';
       
  1088 				return (0);
       
  1089 
       
  1090 			case DATA_TYPE_UINT64:
       
  1091 				*intval = BSWAP_64(*(uint64_t *)nvpair);
       
  1092 				return (0);
       
  1093 
       
  1094 			case DATA_TYPE_NVLIST:
       
  1095 				*(void **)val = (void *)nvpair;
       
  1096 				return (0);
       
  1097 
       
  1098 			case DATA_TYPE_NVLIST_ARRAY:
       
  1099 				*(void **)val = (void *)nvpair;
       
  1100 				if (nelmp)
       
  1101 					*nelmp = nelm;
       
  1102 				return (0);
       
  1103 			}
       
  1104 		}
  1315 		}
  1105 
  1316 	}
  1106 		nvlist += encode_size; /* goto the next nvpair */
       
  1107 	}
       
  1108 
       
  1109 	return (1);
  1317 	return (1);
  1110 }
  1318 }
  1111 
  1319 
  1112 /*
  1320 /*
  1113  * Check if this vdev is online and is in a good state.
  1321  * Check if this vdev is online and is in a good state.
  1140 
  1348 
  1141 	if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING,
  1349 	if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING,
  1142 	    NULL))
  1350 	    NULL))
  1143 		return (ERR_FSYS_CORRUPT);
  1351 		return (ERR_FSYS_CORRUPT);
  1144 
  1352 
  1145 	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
  1353 	if (grub_strcmp(type, VDEV_TYPE_DISK) == 0) {
  1146 		uint64_t guid;
  1354 		uint64_t guid;
  1147 
  1355 
  1148 		if (vdev_validate(nv) != 0)
  1356 		if (vdev_validate(nv) != 0)
  1149 			return (ERR_NO_BOOTPATH);
  1357 			return (ERR_NO_BOOTPATH);
  1150 
  1358 
  1170 
  1378 
  1171 		if (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID,
  1379 		if (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID,
  1172 		    devid, DATA_TYPE_STRING, NULL) != 0)
  1380 		    devid, DATA_TYPE_STRING, NULL) != 0)
  1173 			devid[0] = '\0';
  1381 			devid[0] = '\0';
  1174 
  1382 
  1175 		if (strlen(bootpath) >= MAXPATHLEN ||
  1383 		if (grub_strlen(bootpath) >= MAXPATHLEN ||
  1176 		    strlen(devid) >= MAXPATHLEN)
  1384 		    grub_strlen(devid) >= MAXPATHLEN)
  1177 			return (ERR_WONT_FIT);
  1385 			return (ERR_WONT_FIT);
  1178 
  1386 
  1179 		return (0);
  1387 		return (0);
  1180 
  1388 
  1181 	} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
  1389 	} else if (grub_strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
  1182 	    strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
  1390 	    grub_strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
  1183 	    (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
  1391 	    (is_spare = (grub_strcmp(type, VDEV_TYPE_SPARE) == 0))) {
  1184 		int nelm, i;
  1392 		int nelm, i;
  1185 		char *child;
  1393 		char *child;
  1186 
  1394 
  1187 		if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child,
  1395 		if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child,
  1188 		    DATA_TYPE_NVLIST_ARRAY, &nelm))
  1396 		    DATA_TYPE_NVLIST_ARRAY, &nelm))
  1206  *
  1414  *
  1207  * Return:
  1415  * Return:
  1208  *	0 - success
  1416  *	0 - success
  1209  *	ERR_* - failure
  1417  *	ERR_* - failure
  1210  */
  1418  */
  1211 int
  1419 static int
  1212 check_pool_label(uint64_t sector, char *stack, char *outdevid,
  1420 check_pool_label(uint64_t sector, char *stack, char *outdevid,
  1213     char *outpath, uint64_t *outguid, uint64_t *outashift)
  1421     char *outpath, uint64_t *outguid, uint64_t *outashift, uint64_t *outversion)
  1214 {
  1422 {
  1215 	vdev_phys_t *vdev;
  1423 	vdev_phys_t *vdev;
  1216 	uint64_t pool_state, txg = 0;
  1424 	uint64_t pool_state, txg = 0;
  1217 	char *nvlist, *nv;
  1425 	char *nvlist, *nv, *features;
  1218 	uint64_t diskguid;
  1426 	uint64_t diskguid;
  1219 	uint64_t version;
       
  1220 
  1427 
  1221 	sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT);
  1428 	sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT);
  1222 
  1429 
  1223 	/* Read in the vdev name-value pair list (112K). */
  1430 	/* Read in the vdev name-value pair list (112K). */
  1224 	if (devread(sector, 0, VDEV_PHYS_SIZE, stack) == 0)
  1431 	if (devread(sector, 0, VDEV_PHYS_SIZE, stack) == 0)
  1247 
  1454 
  1248 	/* not an active device */
  1455 	/* not an active device */
  1249 	if (txg == 0)
  1456 	if (txg == 0)
  1250 		return (ERR_NO_BOOTPATH);
  1457 		return (ERR_NO_BOOTPATH);
  1251 
  1458 
  1252 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version,
  1459 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, outversion,
  1253 	    DATA_TYPE_UINT64, NULL))
  1460 	    DATA_TYPE_UINT64, NULL))
  1254 		return (ERR_FSYS_CORRUPT);
  1461 		return (ERR_FSYS_CORRUPT);
  1255 	if (version > SPA_VERSION)
  1462 	if (!SPA_VERSION_IS_SUPPORTED(*outversion))
  1256 		return (ERR_NEWER_VERSION);
  1463 		return (ERR_NEWER_VERSION);
  1257 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
  1464 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
  1258 	    DATA_TYPE_NVLIST, NULL))
  1465 	    DATA_TYPE_NVLIST, NULL))
  1259 		return (ERR_FSYS_CORRUPT);
  1466 		return (ERR_FSYS_CORRUPT);
  1260 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid,
  1467 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid,
  1266 	if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0))
  1473 	if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0))
  1267 		return (ERR_NO_BOOTPATH);
  1474 		return (ERR_NO_BOOTPATH);
  1268 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid,
  1475 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid,
  1269 	    DATA_TYPE_UINT64, NULL))
  1476 	    DATA_TYPE_UINT64, NULL))
  1270 		return (ERR_FSYS_CORRUPT);
  1477 		return (ERR_FSYS_CORRUPT);
       
  1478 
       
  1479 	if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ,
       
  1480 	    &features, DATA_TYPE_NVLIST, NULL) == 0) {
       
  1481 		char *nvp;
       
  1482 		char *name = stack;
       
  1483 		stack += MAXNAMELEN;
       
  1484 
       
  1485 		for (nvp = nvlist_next_nvpair(features, NULL);
       
  1486 		    nvp != NULL;
       
  1487 		    nvp = nvlist_next_nvpair(features, nvp)) {
       
  1488 			zap_attribute_t za;
       
  1489 
       
  1490 			if (nvpair_name(nvp, name, MAXNAMELEN) != 0)
       
  1491 				return (ERR_FSYS_CORRUPT);
       
  1492 
       
  1493 			za.za_integer_length = 8;
       
  1494 			za.za_num_integers = 1;
       
  1495 			za.za_first_integer = 1;
       
  1496 			za.za_name = name;
       
  1497 			if (check_feature(&za, spa_feature_names, stack) != 0)
       
  1498 				return (ERR_NEWER_VERSION);
       
  1499 		}
       
  1500 	}
       
  1501 
  1271 	return (0);
  1502 	return (0);
  1272 }
  1503 }
  1273 
  1504 
  1274 /*
  1505 /*
  1275  * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
  1506  * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
  1286 	int label = 0;
  1517 	int label = 0;
  1287 	uberblock_t *ubbest;
  1518 	uberblock_t *ubbest;
  1288 	objset_phys_t *osp;
  1519 	objset_phys_t *osp;
  1289 	char tmp_bootpath[MAXNAMELEN];
  1520 	char tmp_bootpath[MAXNAMELEN];
  1290 	char tmp_devid[MAXNAMELEN];
  1521 	char tmp_devid[MAXNAMELEN];
  1291 	uint64_t tmp_guid, ashift;
  1522 	uint64_t tmp_guid, ashift, version;
  1292 	uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT;
  1523 	uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT;
  1293 	int err = errnum; /* preserve previous errnum state */
  1524 	int err = errnum; /* preserve previous errnum state */
  1294 
  1525 
  1295 	/* if it's our first time here, zero the best uberblock out */
  1526 	/* if it's our first time here, zero the best uberblock out */
  1296 	if (best_drive == 0 && best_part == 0 && find_best_root) {
  1527 	if (best_drive == 0 && best_part == 0 && find_best_root) {
  1329 		    ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT),
  1560 		    ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >> SPA_MINBLOCKSHIFT),
  1330 		    0, VDEV_UBERBLOCK_RING, ub_array) == 0)
  1561 		    0, VDEV_UBERBLOCK_RING, ub_array) == 0)
  1331 			continue;
  1562 			continue;
  1332 
  1563 
  1333 		if (check_pool_label(sector, stack, tmp_devid,
  1564 		if (check_pool_label(sector, stack, tmp_devid,
  1334 		    tmp_bootpath, &tmp_guid, &ashift))
  1565 		    tmp_bootpath, &tmp_guid, &ashift, &version))
  1335 			continue;
  1566 			continue;
  1336 
  1567 
  1337 		if (pool_guid == 0)
  1568 		if (pool_guid == 0)
  1338 			pool_guid = tmp_guid;
  1569 			pool_guid = tmp_guid;
  1339 
  1570 
  1340 		if ((ubbest = find_bestub(ub_array, ashift, sector)) == NULL ||
  1571 		if ((ubbest = find_bestub(ub_array, ashift, sector)) == NULL ||
  1341 		    zio_read(&ubbest->ub_rootbp, osp, stack) != 0)
  1572 		    zio_read(&ubbest->ub_rootbp, osp, stack) != 0)
  1342 			continue;
  1573 			continue;
  1343 
  1574 
  1344 		VERIFY_OS_TYPE(osp, DMU_OST_META);
  1575 		VERIFY_OS_TYPE(osp, DMU_OST_META);
       
  1576 
       
  1577 		if (version >= SPA_VERSION_FEATURES &&
       
  1578 		    check_mos_features(&osp->os_meta_dnode, stack) != 0)
       
  1579 			continue;
  1345 
  1580 
  1346 		if (find_best_root && ((pool_guid != tmp_guid) ||
  1581 		if (find_best_root && ((pool_guid != tmp_guid) ||
  1347 		    vdev_uberblock_compare(ubbest, &(current_uberblock)) <= 0))
  1582 		    vdev_uberblock_compare(ubbest, &(current_uberblock)) <= 0))
  1348 			continue;
  1583 			continue;
  1349 
  1584 
  1477  */
  1712  */
  1478 int
  1713 int
  1479 zfs_read(char *buf, int len)
  1714 zfs_read(char *buf, int len)
  1480 {
  1715 {
  1481 	char *stack;
  1716 	char *stack;
  1482 	char *tmpbuf;
       
  1483 	int blksz, length, movesize;
  1717 	int blksz, length, movesize;
  1484 
  1718 
  1485 	if (file_buf == NULL) {
  1719 	if (file_buf == NULL) {
  1486 		file_buf = stackbase;
  1720 		file_buf = stackbase;
  1487 		stackbase += SPA_MAXBLOCKSIZE;
  1721 		stackbase += SPA_MAXBLOCKSIZE;