6938089 dedup-induced latency causes FC initiator logouts/FC port resets
authorGeorge Wilson <George.Wilson@Sun.COM>
Wed, 19 May 2010 22:59:13 -0700
changeset 12450 c77e20e4e046
parent 12449 a87750d92895
child 12451 39b0738596da
6938089 dedup-induced latency causes FC initiator logouts/FC port resets
usr/src/uts/common/fs/zfs/dbuf.c
usr/src/uts/common/fs/zfs/ddt.c
usr/src/uts/common/fs/zfs/ddt_zap.c
usr/src/uts/common/fs/zfs/dmu_tx.c
usr/src/uts/common/fs/zfs/dsl_dataset.c
usr/src/uts/common/fs/zfs/spa.c
usr/src/uts/common/fs/zfs/sys/ddt.h
usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
usr/src/uts/common/fs/zfs/sys/zap.h
usr/src/uts/common/fs/zfs/sys/zap_impl.h
usr/src/uts/common/fs/zfs/sys/zio.h
usr/src/uts/common/fs/zfs/zap.c
usr/src/uts/common/fs/zfs/zap_micro.c
usr/src/uts/common/fs/zfs/zio.c
--- a/usr/src/uts/common/fs/zfs/dbuf.c	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dbuf.c	Wed May 19 22:59:13 2010 -0700
@@ -868,7 +868,7 @@
 	/* If we don't exist or are in a snapshot, we can't be freed */
 	if (birth_txg)
 		return (ds == NULL ||
-		    dsl_dataset_block_freeable(ds, birth_txg));
+		    dsl_dataset_block_freeable(ds, db->db_blkptr, birth_txg));
 	else
 		return (FALSE);
 }
@@ -1725,6 +1725,8 @@
 
 	if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) {
 		if (bp && !BP_IS_HOLE(bp)) {
+			int priority = dn->dn_type == DMU_OT_DDT_ZAP ?
+			    ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ;
 			arc_buf_t *pbuf;
 			dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
 			uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
@@ -1739,7 +1741,7 @@
 				pbuf = dn->dn_objset->os_phys_buf;
 
 			(void) dsl_read(NULL, dn->dn_objset->os_spa,
-			    bp, pbuf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
+			    bp, pbuf, NULL, NULL, priority,
 			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
 			    &aflags, &zb);
 		}
@@ -2033,7 +2035,7 @@
 
 	if (db->db_blkptr)
 		res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset,
-		    db->db_blkptr->blk_birth);
+		    db->db_blkptr, db->db_blkptr->blk_birth);
 
 	return (res);
 }
--- a/usr/src/uts/common/fs/zfs/ddt.c	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/ddt.c	Wed May 19 22:59:13 2010 -0700
@@ -160,6 +160,17 @@
 	    ddt->ddt_object[type][class], dde));
 }
 
+static void
+ddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
+    ddt_entry_t *dde)
+{
+	if (!ddt_object_exists(ddt, type, class))
+		return;
+
+	ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os,
+	    ddt->ddt_object[type][class], dde);
+}
+
 int
 ddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
     ddt_entry_t *dde, dmu_tx_t *tx)
@@ -713,6 +724,30 @@
 	return (dde);
 }
 
+void
+ddt_prefetch(spa_t *spa, const blkptr_t *bp)
+{
+	ddt_t *ddt;
+	ddt_entry_t dde;
+
+	if (!BP_GET_DEDUP(bp))
+		return;
+
+	/*
+	 * We remove the DDT once it's empty and only prefetch dedup blocks
+	 * when there are entries in the DDT.  Thus no locking is required
+	 * as the DDT can't disappear on us.
+	 */
+	ddt = ddt_select(spa, bp);
+	ddt_key_fill(&dde.dde_key, bp);
+
+	for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
+		for (enum ddt_class class = 0; class < DDT_CLASSES; class++) {
+			ddt_object_prefetch(ddt, type, class, &dde);
+		}
+	}
+}
+
 int
 ddt_entry_compare(const void *x1, const void *x2)
 {
--- a/usr/src/uts/common/fs/zfs/ddt_zap.c	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/ddt_zap.c	Wed May 19 22:59:13 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -81,6 +80,13 @@
 	return (0);
 }
 
+static void
+ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde)
+{
+	(void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key,
+	    DDT_KEY_WORDS);
+}
+
 static int
 ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
 {
@@ -143,6 +149,7 @@
 	ddt_zap_create,
 	ddt_zap_destroy,
 	ddt_zap_lookup,
+	ddt_zap_prefetch,
 	ddt_zap_update,
 	ddt_zap_remove,
 	ddt_zap_walk,
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c	Wed May 19 22:59:13 2010 -0700
@@ -195,7 +195,7 @@
 	}
 
 	freeable = (bp && (freeable ||
-	    dsl_dataset_block_freeable(ds, bp->blk_birth)));
+	    dsl_dataset_block_freeable(ds, bp, bp->blk_birth)));
 
 	if (freeable)
 		txh->txh_space_tooverwrite += space;
@@ -390,7 +390,7 @@
 
 	if (dn && dn->dn_dbuf->db_blkptr &&
 	    dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
-	    dn->dn_dbuf->db_blkptr->blk_birth)) {
+	    dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) {
 		txh->txh_space_tooverwrite += space;
 		txh->txh_space_tounref += space;
 	} else {
@@ -465,7 +465,7 @@
 			blkptr_t *bp = dn->dn_phys->dn_blkptr;
 			ASSERT3U(blkid + i, <, dn->dn_nblkptr);
 			bp += blkid + i;
-			if (dsl_dataset_block_freeable(ds, bp->blk_birth)) {
+			if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) {
 				dprintf_bp(bp, "can free old%s", "");
 				space += bp_get_dsize(spa, bp);
 			}
@@ -550,7 +550,8 @@
 		bp += blkoff;
 
 		for (i = 0; i < tochk; i++) {
-			if (dsl_dataset_block_freeable(ds, bp[i].blk_birth)) {
+			if (dsl_dataset_block_freeable(ds, &bp[i],
+			    bp[i].blk_birth)) {
 				dprintf_bp(&bp[i], "can free old%s", "");
 				space += bp_get_dsize(spa, &bp[i]);
 			}
@@ -690,6 +691,7 @@
 		 * the size will change between now and the dbuf dirty call.
 		 */
 		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
+		    &dn->dn_phys->dn_blkptr[0],
 		    dn->dn_phys->dn_blkptr[0].blk_birth)) {
 			txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
 		} else {
@@ -1279,7 +1281,7 @@
 		txh->txh_space_tounref = 0;
 	} else {
 		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
-		    bp->blk_birth))
+		    bp, bp->blk_birth))
 			txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
 		else
 			txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Wed May 19 22:59:13 2010 -0700
@@ -40,6 +40,11 @@
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 
+/*
+ * Enable/disable prefetching of dedup-ed blocks which are going to be freed.
+ */
+int zfs_dedup_prefetch = 1;
+
 static char *dsl_reaper = "the grim reaper";
 
 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
@@ -234,9 +239,16 @@
 }
 
 boolean_t
-dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
+dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
+    uint64_t blk_birth)
 {
-	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
+	if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
+		return (B_FALSE);
+
+	if (zfs_dedup_prefetch && bp && BP_GET_DEDUP(bp))
+		ddt_prefetch(dsl_dataset_get_spa(ds), bp);
+
+	return (B_TRUE);
 }
 
 /* ARGSUSED */
--- a/usr/src/uts/common/fs/zfs/spa.c	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/spa.c	Wed May 19 22:59:13 2010 -0700
@@ -106,7 +106,7 @@
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
 	{ ZTI_FIX(8),	ZTI_NULL,	ZTI_BATCH,	ZTI_NULL },
 	{ ZTI_BATCH,	ZTI_FIX(5),	ZTI_FIX(8),	ZTI_FIX(5) },
-	{ ZTI_FIX(10),	ZTI_NULL,	ZTI_FIX(10),	ZTI_NULL },
+	{ ZTI_FIX(100),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
 };
--- a/usr/src/uts/common/fs/zfs/sys/ddt.h	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/ddt.h	Wed May 19 22:59:13 2010 -0700
@@ -155,6 +155,8 @@
 	    boolean_t prehash);
 	int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
 	int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
+	void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
+	    ddt_entry_t *dde);
 	int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
 	    dmu_tx_t *tx);
 	int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
@@ -216,6 +218,7 @@
 extern void ddt_enter(ddt_t *ddt);
 extern void ddt_exit(ddt_t *ddt);
 extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
+extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
 extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
 
 extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h	Wed May 19 22:59:13 2010 -0700
@@ -215,7 +215,8 @@
     dmu_tx_t *tx);
 int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
     dmu_tx_t *tx, boolean_t async);
-boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
+boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
+    uint64_t blk_birth);
 uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
 
 void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
--- a/usr/src/uts/common/fs/zfs/sys/zap.h	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zap.h	Wed May 19 22:59:13 2010 -0700
@@ -197,6 +197,8 @@
 int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf);
 int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
+int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+    int key_numints);
 
 int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
     int add, uint64_t *towrite, uint64_t *tooverwrite);
--- a/usr/src/uts/common/fs/zfs/sys/zap_impl.h	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zap_impl.h	Wed May 19 22:59:13 2010 -0700
@@ -201,6 +201,7 @@
 int fzap_lookup(zap_name_t *zn,
     uint64_t integer_size, uint64_t num_integers, void *buf,
     char *realname, int rn_len, boolean_t *normalization_conflictp);
+void fzap_prefetch(zap_name_t *zn);
 int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
     uint64_t *tooverwrite);
 int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
--- a/usr/src/uts/common/fs/zfs/sys/zio.h	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h	Wed May 19 22:59:13 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _ZIO_H
@@ -132,7 +131,8 @@
 #define	ZIO_PRIORITY_ASYNC_READ		(zio_priority_table[8])
 #define	ZIO_PRIORITY_RESILVER		(zio_priority_table[9])
 #define	ZIO_PRIORITY_SCRUB		(zio_priority_table[10])
-#define	ZIO_PRIORITY_TABLE_SIZE		11
+#define	ZIO_PRIORITY_DDT_PREFETCH	(zio_priority_table[11])
+#define	ZIO_PRIORITY_TABLE_SIZE		12
 
 #define	ZIO_PIPELINE_CONTINUE		0x100
 #define	ZIO_PIPELINE_STOP		0x101
--- a/usr/src/uts/common/fs/zfs/zap.c	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zap.c	Wed May 19 22:59:13 2010 -0700
@@ -927,6 +927,21 @@
 	return (err);
 }
 
+void
+fzap_prefetch(zap_name_t *zn)
+{
+	uint64_t idx, blk;
+	zap_t *zap = zn->zn_zap;
+	int bs;
+
+	idx = ZAP_HASH_IDX(zn->zn_hash,
+	    zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
+	if (zap_idx_to_blk(zap, idx, &blk) != 0)
+		return;
+	bs = FZAP_BLOCK_SHIFT(zap);
+	dmu_prefetch(zap->zap_objset, zap->zap_object, blk << bs, 1 << bs);
+}
+
 /*
  * Helper functions for consumers.
  */
--- a/usr/src/uts/common/fs/zfs/zap_micro.c	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zap_micro.c	Wed May 19 22:59:13 2010 -0700
@@ -812,6 +812,29 @@
 }
 
 int
+zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
+    int key_numints)
+{
+	zap_t *zap;
+	int err;
+	zap_name_t *zn;
+
+	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
+	if (err)
+		return (err);
+	zn = zap_name_alloc_uint64(zap, key, key_numints);
+	if (zn == NULL) {
+		zap_unlockdir(zap);
+		return (ENOTSUP);
+	}
+
+	fzap_prefetch(zn);
+	zap_name_free(zn);
+	zap_unlockdir(zap);
+	return (err);
+}
+
+int
 zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
 {
--- a/usr/src/uts/common/fs/zfs/zio.c	Wed May 19 22:33:49 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zio.c	Wed May 19 22:59:13 2010 -0700
@@ -52,6 +52,7 @@
 	6,	/* ZIO_PRIORITY_ASYNC_READ	*/
 	10,	/* ZIO_PRIORITY_RESILVER	*/
 	20,	/* ZIO_PRIORITY_SCRUB		*/
+	2,	/* ZIO_PRIORITY_DDT_PREFETCH	*/
 };
 
 /*