--- a/usr/src/uts/common/fs/zfs/dbuf.c Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dbuf.c Wed May 19 22:59:13 2010 -0700
@@ -868,7 +868,7 @@
/* If we don't exist or are in a snapshot, we can't be freed */
if (birth_txg)
return (ds == NULL ||
- dsl_dataset_block_freeable(ds, birth_txg));
+ dsl_dataset_block_freeable(ds, db->db_blkptr, birth_txg));
else
return (FALSE);
}
@@ -1725,6 +1725,8 @@
if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) {
if (bp && !BP_IS_HOLE(bp)) {
+ int priority = dn->dn_type == DMU_OT_DDT_ZAP ?
+ ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ;
arc_buf_t *pbuf;
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
@@ -1739,7 +1741,7 @@
pbuf = dn->dn_objset->os_phys_buf;
(void) dsl_read(NULL, dn->dn_objset->os_spa,
- bp, pbuf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
+ bp, pbuf, NULL, NULL, priority,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
&aflags, &zb);
}
@@ -2033,7 +2035,7 @@
if (db->db_blkptr)
res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset,
- db->db_blkptr->blk_birth);
+ db->db_blkptr, db->db_blkptr->blk_birth);
return (res);
}
--- a/usr/src/uts/common/fs/zfs/ddt.c Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/ddt.c Wed May 19 22:59:13 2010 -0700
@@ -160,6 +160,17 @@
ddt->ddt_object[type][class], dde));
}
+static void
+ddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+ ddt_entry_t *dde)
+{
+ if (!ddt_object_exists(ddt, type, class))
+ return;
+
+ ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os,
+ ddt->ddt_object[type][class], dde);
+}
+
int
ddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
ddt_entry_t *dde, dmu_tx_t *tx)
@@ -713,6 +724,30 @@
return (dde);
}
+void
+ddt_prefetch(spa_t *spa, const blkptr_t *bp)
+{
+ ddt_t *ddt;
+ ddt_entry_t dde;
+
+ if (!BP_GET_DEDUP(bp))
+ return;
+
+ /*
+ * We remove the DDT once it's empty and only prefetch dedup blocks
+ * when there are entries in the DDT. Thus no locking is required
+ * as the DDT can't disappear on us.
+ */
+ ddt = ddt_select(spa, bp);
+ ddt_key_fill(&dde.dde_key, bp);
+
+ for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
+ for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
+ ddt_object_prefetch(ddt, type, class, &dde);
+ }
+ }
+}
+
int
ddt_entry_compare(const void *x1, const void *x2)
{
--- a/usr/src/uts/common/fs/zfs/ddt_zap.c Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/ddt_zap.c Wed May 19 22:59:13 2010 -0700
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -81,6 +80,13 @@
return (0);
}
+static void
+ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde)
+{
+ (void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key,
+ DDT_KEY_WORDS);
+}
+
static int
ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
{
@@ -143,6 +149,7 @@
ddt_zap_create,
ddt_zap_destroy,
ddt_zap_lookup,
+ ddt_zap_prefetch,
ddt_zap_update,
ddt_zap_remove,
ddt_zap_walk,
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c Wed May 19 22:59:13 2010 -0700
@@ -195,7 +195,7 @@
}
freeable = (bp && (freeable ||
- dsl_dataset_block_freeable(ds, bp->blk_birth)));
+ dsl_dataset_block_freeable(ds, bp, bp->blk_birth)));
if (freeable)
txh->txh_space_tooverwrite += space;
@@ -390,7 +390,7 @@
if (dn && dn->dn_dbuf->db_blkptr &&
dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
- dn->dn_dbuf->db_blkptr->blk_birth)) {
+ dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) {
txh->txh_space_tooverwrite += space;
txh->txh_space_tounref += space;
} else {
@@ -465,7 +465,7 @@
blkptr_t *bp = dn->dn_phys->dn_blkptr;
ASSERT3U(blkid + i, <, dn->dn_nblkptr);
bp += blkid + i;
- if (dsl_dataset_block_freeable(ds, bp->blk_birth)) {
+ if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) {
dprintf_bp(bp, "can free old%s", "");
space += bp_get_dsize(spa, bp);
}
@@ -550,7 +550,8 @@
bp += blkoff;
for (i = 0; i < tochk; i++) {
- if (dsl_dataset_block_freeable(ds, bp[i].blk_birth)) {
+ if (dsl_dataset_block_freeable(ds, &bp[i],
+ bp[i].blk_birth)) {
dprintf_bp(&bp[i], "can free old%s", "");
space += bp_get_dsize(spa, &bp[i]);
}
@@ -690,6 +691,7 @@
* the size will change between now and the dbuf dirty call.
*/
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
+ &dn->dn_phys->dn_blkptr[0],
dn->dn_phys->dn_blkptr[0].blk_birth)) {
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
} else {
@@ -1279,7 +1281,7 @@
txh->txh_space_tounref = 0;
} else {
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
- bp->blk_birth))
+ bp, bp->blk_birth))
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
else
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c Wed May 19 22:59:13 2010 -0700
@@ -40,6 +40,11 @@
#include <sys/zvol.h>
#include <sys/dsl_scan.h>
+/*
+ * Enable/disable prefetching of dedup-ed blocks which are going to be freed.
+ */
+int zfs_dedup_prefetch = 1;
+
static char *dsl_reaper = "the grim reaper";
static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
@@ -234,9 +239,16 @@
}
boolean_t
-dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
+dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
+ uint64_t blk_birth)
{
- return (blk_birth > dsl_dataset_prev_snap_txg(ds));
+ if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
+ return (B_FALSE);
+
+ if (zfs_dedup_prefetch && bp && BP_GET_DEDUP(bp))
+ ddt_prefetch(dsl_dataset_get_spa(ds), bp);
+
+ return (B_TRUE);
}
/* ARGSUSED */
--- a/usr/src/uts/common/fs/zfs/spa.c Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/spa.c Wed May 19 22:59:13 2010 -0700
@@ -106,7 +106,7 @@
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL },
{ ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) },
- { ZTI_FIX(10), ZTI_NULL, ZTI_FIX(10), ZTI_NULL },
+ { ZTI_FIX(100), ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
};
--- a/usr/src/uts/common/fs/zfs/sys/ddt.h Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/ddt.h Wed May 19 22:59:13 2010 -0700
@@ -155,6 +155,8 @@
boolean_t prehash);
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
+ void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
+ ddt_entry_t *dde);
int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
dmu_tx_t *tx);
int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
@@ -216,6 +218,7 @@
extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt);
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
+extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Wed May 19 22:59:13 2010 -0700
@@ -215,7 +215,8 @@
dmu_tx_t *tx);
int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
dmu_tx_t *tx, boolean_t async);
-boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
+boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
+ uint64_t blk_birth);
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
--- a/usr/src/uts/common/fs/zfs/sys/zap.h Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zap.h Wed May 19 22:59:13 2010 -0700
@@ -197,6 +197,8 @@
int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
+int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+ int key_numints);
int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
int add, uint64_t *towrite, uint64_t *tooverwrite);
--- a/usr/src/uts/common/fs/zfs/sys/zap_impl.h Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zap_impl.h Wed May 19 22:59:13 2010 -0700
@@ -201,6 +201,7 @@
int fzap_lookup(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers, void *buf,
char *realname, int rn_len, boolean_t *normalization_conflictp);
+void fzap_prefetch(zap_name_t *zn);
int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
uint64_t *tooverwrite);
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
--- a/usr/src/uts/common/fs/zfs/sys/zio.h Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h Wed May 19 22:59:13 2010 -0700
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ZIO_H
@@ -132,7 +131,8 @@
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[8])
#define ZIO_PRIORITY_RESILVER (zio_priority_table[9])
#define ZIO_PRIORITY_SCRUB (zio_priority_table[10])
-#define ZIO_PRIORITY_TABLE_SIZE 11
+#define ZIO_PRIORITY_DDT_PREFETCH (zio_priority_table[11])
+#define ZIO_PRIORITY_TABLE_SIZE 12
#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
--- a/usr/src/uts/common/fs/zfs/zap.c Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zap.c Wed May 19 22:59:13 2010 -0700
@@ -927,6 +927,21 @@
return (err);
}
+void
+fzap_prefetch(zap_name_t *zn)
+{
+ uint64_t idx, blk;
+ zap_t *zap = zn->zn_zap;
+ int bs;
+
+ idx = ZAP_HASH_IDX(zn->zn_hash,
+ zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
+ if (zap_idx_to_blk(zap, idx, &blk) != 0)
+ return;
+ bs = FZAP_BLOCK_SHIFT(zap);
+ dmu_prefetch(zap->zap_objset, zap->zap_object, blk << bs, 1 << bs);
+}
+
/*
* Helper functions for consumers.
*/
--- a/usr/src/uts/common/fs/zfs/zap_micro.c Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zap_micro.c Wed May 19 22:59:13 2010 -0700
@@ -812,6 +812,29 @@
}
int
+zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+ int key_numints)
+{
+ zap_t *zap;
+ int err;
+ zap_name_t *zn;
+
+ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
+ if (err)
+ return (err);
+ zn = zap_name_alloc_uint64(zap, key, key_numints);
+ if (zn == NULL) {
+ zap_unlockdir(zap);
+ return (ENOTSUP);
+ }
+
+ fzap_prefetch(zn);
+ zap_name_free(zn);
+ zap_unlockdir(zap);
+ return (err);
+}
+
+int
zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
{
--- a/usr/src/uts/common/fs/zfs/zio.c Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zio.c Wed May 19 22:59:13 2010 -0700
@@ -52,6 +52,7 @@
6, /* ZIO_PRIORITY_ASYNC_READ */
10, /* ZIO_PRIORITY_RESILVER */
20, /* ZIO_PRIORITY_SCRUB */
+ 2, /* ZIO_PRIORITY_DDT_PREFETCH */
};
/*