--- a/usr/src/uts/common/fs/zfs/spa.c Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/spa.c Mon Jun 08 10:35:50 2009 -0700
@@ -59,6 +59,7 @@
#include <sys/systeminfo.h>
#include <sys/sunddi.h>
#include <sys/spa_boot.h>
+#include <sys/zfs_ioctl.h>
#ifdef _KERNEL
#include <sys/zone.h>
@@ -332,6 +333,7 @@
case ZPOOL_PROP_DELEGATION:
case ZPOOL_PROP_AUTOREPLACE:
case ZPOOL_PROP_LISTSNAPS:
+ case ZPOOL_PROP_AUTOEXPAND:
error = nvpair_value_uint64(elem, &intval);
if (!error && intval > 1)
error = EINVAL;
@@ -690,7 +692,7 @@
uint_t id, int atype)
{
nvlist_t **child;
- uint_t c, children;
+ uint_t children;
int error;
if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
@@ -711,7 +713,7 @@
return (EINVAL);
}
- for (c = 0; c < children; c++) {
+ for (int c = 0; c < children; c++) {
vdev_t *vd;
if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
atype)) != 0) {
@@ -939,7 +941,7 @@
nvlist_t **l2cache;
uint_t nl2cache;
int i, j, oldnvdevs;
- uint64_t guid, size;
+ uint64_t guid;
vdev_t *vd, **oldvdevs, **newvdevs;
spa_aux_vdev_t *sav = &spa->spa_l2cache;
@@ -1003,12 +1005,8 @@
(void) vdev_validate_aux(vd);
- if (!vdev_is_dead(vd)) {
- size = vdev_get_rsize(vd);
- l2arc_add_vdev(spa, vd,
- VDEV_LABEL_START_SIZE,
- size - VDEV_LABEL_START_SIZE);
- }
+ if (!vdev_is_dead(vd))
+ l2arc_add_vdev(spa, vd);
}
}
@@ -1087,9 +1085,7 @@
static void
spa_check_removed(vdev_t *vd)
{
- int c;
-
- for (c = 0; c < vd->vdev_children; c++)
+ for (int c = 0; c < vd->vdev_children; c++)
spa_check_removed(vd->vdev_child[c]);
if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
@@ -1107,7 +1103,7 @@
{
nvlist_t *nv, *nvroot, **child;
uint64_t is_log;
- uint_t children, c;
+ uint_t children;
vdev_t *rvd = spa->spa_root_vdev;
VERIFY(load_nvlist(spa, spa->spa_config_object, &nv) == 0);
@@ -1115,7 +1111,7 @@
VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0);
- for (c = 0; c < children; c++) {
+ for (int c = 0; c < children; c++) {
vdev_t *tvd = rvd->vdev_child[c];
if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
@@ -1513,6 +1509,10 @@
spa->spa_pool_props_object,
zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE),
sizeof (uint64_t), 1, &spa->spa_failmode);
+ (void) zap_lookup(spa->spa_meta_objset,
+ spa->spa_pool_props_object,
+ zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND),
+ sizeof (uint64_t), 1, &spa->spa_autoexpand);
}
/*
@@ -2086,7 +2086,7 @@
vdev_t *rvd;
dsl_pool_t *dp;
dmu_tx_t *tx;
- int c, error = 0;
+ int error = 0;
uint64_t txg = TXG_INITIAL;
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
@@ -2148,9 +2148,10 @@
(error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
(error = spa_validate_aux(spa, nvroot, txg,
VDEV_ALLOC_ADD)) == 0) {
- for (c = 0; c < rvd->vdev_children; c++)
- vdev_init(rvd->vdev_child[c], txg);
- vdev_config_dirty(rvd);
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_metaslab_set_size(rvd->vdev_child[c]);
+ vdev_expand(rvd->vdev_child[c], txg);
+ }
}
spa_config_exit(spa, SCL_ALL, FTAG);
@@ -2249,6 +2250,7 @@
spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
+ spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
if (props != NULL) {
spa_configfile_set(spa, props, B_FALSE);
spa_sync_props(spa, props, CRED(), tx);
@@ -2331,9 +2333,7 @@
static void
spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg)
{
- int c;
-
- for (c = 0; c < vd->vdev_children; c++)
+ for (int c = 0; c < vd->vdev_children; c++)
spa_alt_rootvdev(vd->vdev_child[c], avd, txg);
if (vd->vdev_ops->vdev_op_leaf) {
@@ -2627,6 +2627,12 @@
spa_config_update_common(spa, SPA_CONFIG_UPDATE_POOL, B_FALSE);
}
+ /*
+ * It's possible that the pool was expanded while it was exported.
+ * We kick off an async task to handle this for us.
+ */
+ spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
+
mutex_exit(&spa_namespace_lock);
return (0);
@@ -3064,10 +3070,9 @@
}
/*
- * Compare the new device size with the replaceable/attachable
- * device size.
+ * Make sure the new device is big enough.
*/
- if (newvd->vdev_psize < vdev_get_rsize(oldvd))
+ if (newvd->vdev_asize < vdev_get_min_asize(oldvd))
return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
/*
@@ -3111,12 +3116,6 @@
newvd->vdev_id = pvd->vdev_children;
vdev_add_child(pvd, newvd);
- /*
- * If newvd is smaller than oldvd, but larger than its rsize,
- * the addition of newvd may have decreased our parent's asize.
- */
- pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize);
-
tvd = newvd->vdev_top;
ASSERT(pvd->vdev_top == tvd);
ASSERT(tvd->vdev_parent == rvd);
@@ -3333,12 +3332,16 @@
vdev_propagate_state(cvd);
/*
- * If the device we just detached was smaller than the others, it may be
- * possible to add metaslabs (i.e. grow the pool). vdev_metaslab_init()
- * can't fail because the existing metaslabs are already in core, so
- * there's nothing to read from disk.
+ * If the 'autoexpand' property is set on the pool then automatically
+ * try to expand the size of the pool. For example if the device we
+ * just detached was smaller than the others, it may be possible to
+ * add metaslabs (i.e. grow the pool). We need to reopen the vdev
+ * first so that we can obtain the updated sizes of the leaf vdevs.
*/
- VERIFY(vdev_metaslab_init(tvd, txg) == 0);
+ if (spa->spa_autoexpand) {
+ vdev_reopen(tvd);
+ vdev_expand(tvd, txg);
+ }
vdev_config_dirty(tvd);
@@ -3496,9 +3499,8 @@
spa_vdev_resilver_done_hunt(vdev_t *vd)
{
vdev_t *newvd, *oldvd;
- int c;
-
- for (c = 0; c < vd->vdev_children; c++) {
+
+ for (int c = 0; c < vd->vdev_children; c++) {
oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]);
if (oldvd != NULL)
return (oldvd);
@@ -3686,6 +3688,37 @@
}
static void
+spa_async_autoexpand(spa_t *spa, vdev_t *vd)
+{
+ sysevent_id_t eid;
+ nvlist_t *attr;
+ char *physpath;
+
+ if (!spa->spa_autoexpand)
+ return;
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+ spa_async_autoexpand(spa, cvd);
+ }
+
+ if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
+ return;
+
+ physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+ (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
+
+ VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
+
+ (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
+ ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
+
+ nvlist_free(attr);
+ kmem_free(physpath, MAXPATHLEN);
+}
+
+static void
spa_async_thread(spa_t *spa)
{
int tasks;
@@ -3701,9 +3734,33 @@
* See if the config needs to be updated.
*/
if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
+ uint64_t oldsz, space_update;
+
mutex_enter(&spa_namespace_lock);
+ oldsz = spa_get_space(spa);
spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
+ space_update = spa_get_space(spa) - oldsz;
mutex_exit(&spa_namespace_lock);
+
+ /*
+ * If the pool grew as a result of the config update,
+ * then log an internal history event.
+ */
+ if (space_update) {
+ dmu_tx_t *tx;
+
+ tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+ if (dmu_tx_assign(tx, TXG_WAIT) == 0) {
+ spa_history_internal_log(LOG_POOL_VDEV_ONLINE,
+ spa, tx, CRED(),
+ "pool '%s' size: %llu(+%llu)",
+ spa_name(spa), spa_get_space(spa),
+ space_update);
+ dmu_tx_commit(tx);
+ } else {
+ dmu_tx_abort(tx);
+ }
+ }
}
/*
@@ -3719,6 +3776,12 @@
(void) spa_vdev_state_exit(spa, NULL, 0);
}
+ if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+ spa_async_autoexpand(spa, spa->spa_root_vdev);
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+ }
+
/*
* See if any devices need to be probed.
*/
@@ -4031,6 +4094,10 @@
case ZPOOL_PROP_FAILUREMODE:
spa->spa_failmode = intval;
break;
+ case ZPOOL_PROP_AUTOEXPAND:
+ spa->spa_autoexpand = intval;
+ spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
+ break;
default:
break;
}
@@ -4192,9 +4259,8 @@
int svdcount = 0;
int children = rvd->vdev_children;
int c0 = spa_get_random(children);
- int c;
-
- for (c = 0; c < children; c++) {
+
+ for (int c = 0; c < children; c++) {
vd = rvd->vdev_child[(c0 + c) % children];
if (vd->vdev_ms_array == 0 || vd->vdev_islog)
continue;