6678033 resilver code should prefetch
6841580 zfs stack overflow when upgrading to userspace accounting
6859446 scrub doesn't pause correctly
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c Sat Nov 21 23:44:56 2009 -0800
@@ -313,6 +313,8 @@
"zfs_vdev_max_pending",
"zfs_vdev_min_pending",
"zfs_scrub_limit",
+ "zfs_no_scrub_io",
+ "zfs_no_scrub_prefetch",
"zfs_vdev_time_shift",
"zfs_vdev_ramp_rate",
"zfs_vdev_aggregation_limit",
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c Sat Nov 21 23:44:56 2009 -0800
@@ -382,8 +382,12 @@
dsl_dir_sync(dd, tx);
write_time += gethrtime() - start;
- if (spa_sync_pass(dp->dp_spa) == 1)
+ if (spa_sync_pass(dp->dp_spa) == 1) {
+ dp->dp_scrub_prefetch_zio_root = zio_root(dp->dp_spa, NULL,
+ NULL, ZIO_FLAG_CANFAIL);
dsl_pool_scrub_sync(dp, tx);
+ (void) zio_wait(dp->dp_scrub_prefetch_zio_root);
+ }
start = gethrtime();
if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
--- a/usr/src/uts/common/fs/zfs/dsl_scrub.c Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/dsl_scrub.c Sat Nov 21 23:44:56 2009 -0800
@@ -53,6 +53,7 @@
int zfs_scrub_min_time = 1000; /* (millisec) min time to scrub per txg */
int zfs_resilver_min_time = 3000; /* (millisec) min time to resilver per txg */
boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
+boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */
enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
extern int zfs_txg_timeout;
@@ -450,6 +451,27 @@
}
static void
+scrub_prefetch(dsl_pool_t *dp, arc_buf_t *buf, blkptr_t *bp, uint64_t objset,
+ uint64_t object, uint64_t blkid)
+{
+ zbookmark_t czb;
+ uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
+
+ if (zfs_no_scrub_prefetch)
+ return;
+
+ if (BP_IS_HOLE(bp) || bp->blk_birth <= dp->dp_scrub_min_txg ||
+ (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE))
+ return;
+
+ SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);
+
+ (void) arc_read(dp->dp_scrub_prefetch_zio_root, dp->dp_spa, bp,
+ buf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
+ &flags, &czb);
+}
+
+static void
scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
{
@@ -487,6 +509,13 @@
}
}
+ /*
+ * If dsl_pool_scrub_ddt() has aready scrubbed this block,
+ * don't scrub it again.
+ */
+ if (!ddt_class_contains(dp->dp_spa, dp->dp_scrub_ddt_class_max, bp))
+ (void) scrub_funcs[dp->dp_scrub_func](dp, bp, zb);
+
if (BP_GET_LEVEL(bp) > 0) {
uint32_t flags = ARC_WAIT;
int i;
@@ -502,9 +531,11 @@
mutex_exit(&dp->dp_spa->spa_scrub_lock);
return;
}
- cbp = buf->b_data;
-
- for (i = 0; i < epb; i++, cbp++) {
+ for (i = 0, cbp = buf->b_data; i < epb; i++, cbp++) {
+ scrub_prefetch(dp, buf, cbp, zb->zb_objset,
+ zb->zb_object, zb->zb_blkid * epb + i);
+ }
+ for (i = 0, cbp = buf->b_data; i < epb; i++, cbp++) {
zbookmark_t czb;
SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
@@ -514,8 +545,8 @@
}
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
uint32_t flags = ARC_WAIT;
- dnode_phys_t *child_dnp;
- int i;
+ dnode_phys_t *cdnp;
+ int i, j;
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
err = arc_read(NULL, dp->dp_spa, bp, pbuf,
@@ -527,10 +558,15 @@
mutex_exit(&dp->dp_spa->spa_scrub_lock);
return;
}
- child_dnp = buf->b_data;
-
- for (i = 0; i < epb; i++, child_dnp++) {
- scrub_visitdnode(dp, child_dnp, buf, zb->zb_objset,
+ for (i = 0, cdnp = buf->b_data; i < epb; i++, cdnp++) {
+ for (j = 0; j < cdnp->dn_nblkptr; j++) {
+ blkptr_t *cbp = &cdnp->dn_blkptr[j];
+ scrub_prefetch(dp, buf, cbp, zb->zb_objset,
+ zb->zb_blkid * epb + i, j);
+ }
+ }
+ for (i = 0, cdnp = buf->b_data; i < epb; i++, cdnp++) {
+ scrub_visitdnode(dp, cdnp, buf, zb->zb_objset,
zb->zb_blkid * epb + i);
}
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
@@ -561,13 +597,6 @@
}
}
- /*
- * If dsl_pool_scrub_ddt() has aready scrubbed this block,
- * don't scrub it again.
- */
- if (!ddt_class_contains(dp->dp_spa, dp->dp_scrub_ddt_class_max, bp))
- (void) scrub_funcs[dp->dp_scrub_func](dp, bp, zb);
-
if (buf)
(void) arc_buf_remove_ref(buf, &buf);
}
@@ -887,7 +916,7 @@
/*
* If the pool is not loaded, or is trying to unload, leave it alone.
*/
- if (spa->spa_load_state != SPA_LOAD_NONE || spa_shutting_down(spa))
+ if (spa_load_state(spa) != SPA_LOAD_NONE || spa_shutting_down(spa))
return;
if (dp->dp_scrub_restart) {
--- a/usr/src/uts/common/fs/zfs/spa_errlog.c Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/spa_errlog.c Sat Nov 21 23:44:56 2009 -0800
@@ -132,7 +132,7 @@
* If we are trying to import a pool, ignore any errors, as we won't be
* writing to the pool any time soon.
*/
- if (spa->spa_load_state == SPA_LOAD_TRYIMPORT)
+ if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT)
return;
mutex_enter(&spa->spa_errlist_lock);
--- a/usr/src/uts/common/fs/zfs/spa_misc.c Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c Sat Nov 21 23:44:56 2009 -0800
@@ -1236,6 +1236,12 @@
return (spa->spa_state);
}
+spa_load_state_t
+spa_load_state(spa_t *spa)
+{
+ return (spa->spa_load_state);
+}
+
uint64_t
spa_freeze_txg(spa_t *spa)
{
--- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h Sat Nov 21 23:44:56 2009 -0800
@@ -105,6 +105,7 @@
boolean_t dp_scrub_isresilver;
boolean_t dp_scrub_restart;
kmutex_t dp_scrub_cancel_lock; /* protects dp_scrub_restart */
+ zio_t *dp_scrub_prefetch_zio_root;
/* Has its own locking */
tx_state_t dp_tx;
--- a/usr/src/uts/common/fs/zfs/sys/spa.h Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h Sat Nov 21 23:44:56 2009 -0800
@@ -555,6 +555,7 @@
extern uint64_t spa_syncing_txg(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern pool_state_t spa_state(spa_t *spa);
+extern spa_load_state_t spa_load_state(spa_t *spa);
extern uint64_t spa_freeze_txg(spa_t *spa);
extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
extern uint64_t spa_get_dspace(spa_t *spa);
--- a/usr/src/uts/common/fs/zfs/txg.c Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/txg.c Sat Nov 21 23:44:56 2009 -0800
@@ -353,6 +353,7 @@
static void
txg_sync_thread(dsl_pool_t *dp)
{
+ spa_t *spa = dp->dp_spa;
tx_state_t *tx = &dp->dp_tx;
callb_cpr_t cpr;
uint64_t start, delta;
@@ -371,7 +372,8 @@
*/
timer = (delta >= timeout ? 0 : timeout - delta);
while ((dp->dp_scrub_func == SCRUB_FUNC_NONE ||
- spa_shutting_down(dp->dp_spa)) &&
+ spa_load_state(spa) != SPA_LOAD_NONE ||
+ spa_shutting_down(spa)) &&
!tx->tx_exiting && timer > 0 &&
tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
tx->tx_quiesced_txg == 0) {
@@ -411,7 +413,7 @@
mutex_exit(&tx->tx_sync_lock);
start = ddi_get_lbolt();
- spa_sync(dp->dp_spa, txg);
+ spa_sync(spa, txg);
delta = ddi_get_lbolt() - start;
mutex_enter(&tx->tx_sync_lock);
--- a/usr/src/uts/common/fs/zfs/vdev.c Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/vdev.c Sat Nov 21 23:44:56 2009 -0800
@@ -529,7 +529,7 @@
* valid in the current context. Local vdevs will
* remain in the faulted state.
*/
- if (spa->spa_load_state == SPA_LOAD_OPEN) {
+ if (spa_load_state(spa) == SPA_LOAD_OPEN) {
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED,
&vd->vdev_faulted);
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED,
@@ -1345,7 +1345,7 @@
* state of the pool.
*/
if (!spa->spa_load_verbatim &&
- spa->spa_load_state == SPA_LOAD_OPEN &&
+ spa_load_state(spa) == SPA_LOAD_OPEN &&
state != POOL_STATE_ACTIVE)
return (EBADF);
@@ -2900,7 +2900,7 @@
* begin with. Failure to open such a device is not considered
* an error.
*/
- if (spa->spa_load_state == SPA_LOAD_IMPORT &&
+ if (spa_load_state(spa) == SPA_LOAD_IMPORT &&
vd->vdev_ops->vdev_op_leaf)
vd->vdev_not_present = 1;
--- a/usr/src/uts/common/fs/zfs/zfs_fm.c Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_fm.c Sat Nov 21 23:44:56 2009 -0800
@@ -112,8 +112,8 @@
* If we are doing a spa_tryimport() or in recovery mode,
* ignore errors.
*/
- if (spa->spa_load_state == SPA_LOAD_TRYIMPORT ||
- spa->spa_load_state == SPA_LOAD_RECOVER)
+ if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT ||
+ spa_load_state(spa) == SPA_LOAD_RECOVER)
return;
/*
@@ -121,7 +121,7 @@
* failed, don't bother logging any new ereports - we're just going to
* get the same diagnosis anyway.
*/
- if (spa->spa_load_state != SPA_LOAD_NONE &&
+ if (spa_load_state(spa) != SPA_LOAD_NONE &&
spa->spa_last_open_failed)
return;
@@ -202,7 +202,7 @@
* state, use a SPA-wide ENA. Otherwise, if we are in an I/O state, use
* a root zio-wide ENA. Otherwise, simply use a unique ENA.
*/
- if (spa->spa_load_state != SPA_LOAD_NONE) {
+ if (spa_load_state(spa) != SPA_LOAD_NONE) {
if (spa->spa_ena == 0)
spa->spa_ena = fm_ena_generate(0, FM_ENA_FMT1);
ena = spa->spa_ena;
@@ -238,7 +238,7 @@
DATA_TYPE_STRING, spa_name(spa), FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
DATA_TYPE_UINT64, spa_guid(spa),
FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32,
- spa->spa_load_state, NULL);
+ spa_load_state(spa), NULL);
if (spa != NULL) {
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
@@ -805,7 +805,7 @@
nvlist_t *resource;
char class[64];
- if (spa->spa_load_state == SPA_LOAD_TRYIMPORT)
+ if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT)
return;
if ((resource = fm_nvlist_create(NULL)) == NULL)
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c Sat Nov 21 23:44:56 2009 -0800
@@ -1890,9 +1890,12 @@
zfsvfs_rele(zfsvfs, FTAG);
if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
- zfs_cmd_t zc = { 0 };
- (void) strcpy(zc.zc_name, dsname);
- (void) zfs_ioc_userspace_upgrade(&zc);
+ zfs_cmd_t *zc;
+
+ zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
+ (void) strcpy(zc->zc_name, dsname);
+ (void) zfs_ioc_userspace_upgrade(zc);
+ kmem_free(zc, sizeof (zfs_cmd_t));
}
break;
}
--- a/usr/src/uts/common/fs/zfs/zio.c Sat Nov 21 22:51:29 2009 -0800
+++ b/usr/src/uts/common/fs/zfs/zio.c Sat Nov 21 23:44:56 2009 -0800
@@ -1785,7 +1785,7 @@
ddt_t *ddt = ddt_select(zio->io_spa, bp);
ddt_entry_t *dde = zio->io_vsd;
if (ddt == NULL) {
- ASSERT(zio->io_spa->spa_load_state != SPA_LOAD_NONE);
+ ASSERT(spa_load_state(zio->io_spa) != SPA_LOAD_NONE);
return (ZIO_PIPELINE_CONTINUE);
}
if (dde == NULL) {
@@ -2711,7 +2711,7 @@
if ((zio->io_type == ZIO_TYPE_READ ||
zio->io_type == ZIO_TYPE_FREE) &&
zio->io_error == ENXIO &&
- spa->spa_load_state == SPA_LOAD_NONE &&
+ spa_load_state(spa) == SPA_LOAD_NONE &&
spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE)
zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;