119 * name Pool name |
121 * name Pool name |
120 * state Pool state |
122 * state Pool state |
121 * txg Transaction group in which this label was written |
123 * txg Transaction group in which this label was written |
122 * pool_guid Unique identifier for this pool |
124 * pool_guid Unique identifier for this pool |
123 * vdev_tree An nvlist describing vdev tree. |
125 * vdev_tree An nvlist describing vdev tree. |
|
126 * features_for_read |
|
127 * An nvlist of the features necessary for reading the MOS. |
124 * |
128 * |
125 * Each leaf device label also contains the following: |
129 * Each leaf device label also contains the following: |
126 * |
130 * |
127 * top_guid Unique ID for top-level vdev in which this is contained |
131 * top_guid Unique ID for top-level vdev in which this is contained |
128 * guid Unique ID for the leaf vdev |
132 * guid Unique ID for the leaf vdev |
426 rvd->vdev_children) == 0); |
430 rvd->vdev_children) == 0); |
427 |
431 |
428 kmem_free(array, rvd->vdev_children * sizeof (uint64_t)); |
432 kmem_free(array, rvd->vdev_children * sizeof (uint64_t)); |
429 } |
433 } |
430 |
434 |
|
435 /* |
|
436 * Returns the configuration from the label of the given vdev. If 'label' is |
|
437 * VDEV_BEST_LABEL, each label of the vdev will be read until a valid |
|
438 * configuration is found; otherwise, only the specified label will be read. |
|
439 */ |
431 nvlist_t * |
440 nvlist_t * |
432 vdev_label_read_config(vdev_t *vd) |
441 vdev_label_read_config(vdev_t *vd, int label) |
433 { |
442 { |
434 spa_t *spa = vd->vdev_spa; |
443 spa_t *spa = vd->vdev_spa; |
435 nvlist_t *config = NULL; |
444 nvlist_t *config = NULL; |
436 vdev_phys_t *vp; |
445 vdev_phys_t *vp; |
437 zio_t *zio; |
446 zio_t *zio; |
445 |
454 |
446 vp = zio_buf_alloc(sizeof (vdev_phys_t)); |
455 vp = zio_buf_alloc(sizeof (vdev_phys_t)); |
447 |
456 |
448 retry: |
457 retry: |
449 for (int l = 0; l < VDEV_LABELS; l++) { |
458 for (int l = 0; l < VDEV_LABELS; l++) { |
|
459 if (label >= 0 && label < VDEV_LABELS && label != l) |
|
460 continue; |
450 |
461 |
451 zio = zio_root(spa, NULL, NULL, flags); |
462 zio = zio_root(spa, NULL, NULL, flags); |
452 |
463 |
453 vdev_label_read(zio, vd, l, vp, |
464 vdev_label_read(zio, vd, l, vp, |
454 offsetof(vdev_label_t, vl_vdev_phys), |
465 offsetof(vdev_label_t, vl_vdev_phys), |
831 * Consider the following situation: txg is safely synced to disk. We've |
842 * Consider the following situation: txg is safely synced to disk. We've |
832 * written the first uberblock for txg + 1, and then we lose power. When we |
843 * written the first uberblock for txg + 1, and then we lose power. When we |
833 * come back up, we fail to see the uberblock for txg + 1 because, say, |
844 * come back up, we fail to see the uberblock for txg + 1 because, say, |
834 * it was on a mirrored device and the replica to which we wrote txg + 1 |
845 * it was on a mirrored device and the replica to which we wrote txg + 1 |
835 * is now offline. If we then make some changes and sync txg + 1, and then |
846 * is now offline. If we then make some changes and sync txg + 1, and then |
836 * the missing replica comes back, then for a new seconds we'll have two |
847 * the missing replica comes back, then for a few seconds we'll have two |
837 * conflicting uberblocks on disk with the same txg. The solution is simple: |
848 * conflicting uberblocks on disk with the same txg. The solution is simple: |
838 * among uberblocks with equal txg, choose the one with the latest timestamp. |
849 * among uberblocks with equal txg, choose the one with the latest timestamp. |
839 */ |
850 */ |
840 static int |
851 static int |
841 vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) |
852 vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) |
851 return (1); |
862 return (1); |
852 |
863 |
853 return (0); |
864 return (0); |
854 } |
865 } |
855 |
866 |
|
867 struct ubl_cbdata { |
|
868 uberblock_t *ubl_ubbest; /* Best uberblock */ |
|
869 vdev_t *ubl_vd; /* vdev associated with the above */ |
|
870 int ubl_label; /* Label associated with the above */ |
|
871 }; |
|
872 |
856 static void |
873 static void |
857 vdev_uberblock_load_done(zio_t *zio) |
874 vdev_uberblock_load_done(zio_t *zio) |
858 { |
875 { |
|
876 vdev_t *vd = zio->io_vd; |
859 spa_t *spa = zio->io_spa; |
877 spa_t *spa = zio->io_spa; |
860 zio_t *rio = zio->io_private; |
878 zio_t *rio = zio->io_private; |
861 uberblock_t *ub = zio->io_data; |
879 uberblock_t *ub = zio->io_data; |
862 uberblock_t *ubbest = rio->io_private; |
880 struct ubl_cbdata *cbp = rio->io_private; |
863 |
881 |
864 ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd)); |
882 ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(vd)); |
865 |
883 |
866 if (zio->io_error == 0 && uberblock_verify(ub) == 0) { |
884 if (zio->io_error == 0 && uberblock_verify(ub) == 0) { |
867 mutex_enter(&rio->io_lock); |
885 mutex_enter(&rio->io_lock); |
868 if (ub->ub_txg <= spa->spa_load_max_txg && |
886 if (ub->ub_txg <= spa->spa_load_max_txg && |
869 vdev_uberblock_compare(ub, ubbest) > 0) |
887 vdev_uberblock_compare(ub, cbp->ubl_ubbest) > 0) { |
870 *ubbest = *ub; |
888 /* |
|
889 * Keep track of the vdev and label in which this |
|
890 * uberblock was found. We will use this information |
|
891 * later to obtain the config nvlist associated with |
|
892 * this uberblock. |
|
893 */ |
|
894 *cbp->ubl_ubbest = *ub; |
|
895 cbp->ubl_vd = vd; |
|
896 cbp->ubl_label = vdev_label_number(vd->vdev_psize, |
|
897 zio->io_offset); |
|
898 } |
871 mutex_exit(&rio->io_lock); |
899 mutex_exit(&rio->io_lock); |
872 } |
900 } |
873 |
901 |
874 zio_buf_free(zio->io_data, zio->io_size); |
902 zio_buf_free(zio->io_data, zio->io_size); |
875 } |
903 } |
876 |
904 |
877 void |
905 static void |
878 vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest) |
906 vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags, |
879 { |
907 struct ubl_cbdata *cbp) |
880 spa_t *spa = vd->vdev_spa; |
908 { |
881 vdev_t *rvd = spa->spa_root_vdev; |
|
882 int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | |
|
883 ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; |
|
884 |
|
885 if (vd == rvd) { |
|
886 ASSERT(zio == NULL); |
|
887 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); |
|
888 zio = zio_root(spa, NULL, ubbest, flags); |
|
889 bzero(ubbest, sizeof (uberblock_t)); |
|
890 } |
|
891 |
|
892 ASSERT(zio != NULL); |
|
893 |
|
894 for (int c = 0; c < vd->vdev_children; c++) |
909 for (int c = 0; c < vd->vdev_children; c++) |
895 vdev_uberblock_load(zio, vd->vdev_child[c], ubbest); |
910 vdev_uberblock_load_impl(zio, vd->vdev_child[c], flags, cbp); |
896 |
911 |
897 if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { |
912 if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { |
898 for (int l = 0; l < VDEV_LABELS; l++) { |
913 for (int l = 0; l < VDEV_LABELS; l++) { |
899 for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { |
914 for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { |
900 vdev_label_read(zio, vd, l, |
915 vdev_label_read(zio, vd, l, |
903 VDEV_UBERBLOCK_SIZE(vd), |
918 VDEV_UBERBLOCK_SIZE(vd), |
904 vdev_uberblock_load_done, zio, flags); |
919 vdev_uberblock_load_done, zio, flags); |
905 } |
920 } |
906 } |
921 } |
907 } |
922 } |
908 |
923 } |
909 if (vd == rvd) { |
924 |
910 (void) zio_wait(zio); |
925 /* |
911 spa_config_exit(spa, SCL_ALL, FTAG); |
926 * Reads the 'best' uberblock from disk along with its associated |
912 } |
927 * configuration. First, we read the uberblock array of each label of each |
|
928 * vdev, keeping track of the uberblock with the highest txg in each array. |
|
929 * Then, we read the configuration from the same label as the best uberblock. |
|
930 */ |
|
931 void |
|
932 vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config) |
|
933 { |
|
934 int i; |
|
935 zio_t *zio; |
|
936 spa_t *spa = rvd->vdev_spa; |
|
937 struct ubl_cbdata cb; |
|
938 int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | |
|
939 ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; |
|
940 |
|
941 ASSERT(ub); |
|
942 ASSERT(config); |
|
943 |
|
944 bzero(ub, sizeof (uberblock_t)); |
|
945 *config = NULL; |
|
946 |
|
947 cb.ubl_ubbest = ub; |
|
948 cb.ubl_vd = NULL; |
|
949 |
|
950 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); |
|
951 zio = zio_root(spa, NULL, &cb, flags); |
|
952 vdev_uberblock_load_impl(zio, rvd, flags, &cb); |
|
953 (void) zio_wait(zio); |
|
954 if (cb.ubl_vd != NULL) { |
|
955 for (i = cb.ubl_label % 2; i < VDEV_LABELS; i += 2) { |
|
956 *config = vdev_label_read_config(cb.ubl_vd, i); |
|
957 if (*config != NULL) |
|
958 break; |
|
959 } |
|
960 } |
|
961 spa_config_exit(spa, SCL_ALL, FTAG); |
913 } |
962 } |
914 |
963 |
915 /* |
964 /* |
916 * On success, increment root zio's count of good writes. |
965 * On success, increment root zio's count of good writes. |
917 * We only get credit for writes to known-visible vdevs; see spa_vdev_add(). |
966 * We only get credit for writes to known-visible vdevs; see spa_vdev_add(). |