6944833 Avoid prefetching dbufs in dmu_sync() path
authorJeff Bonwick <Jeff.Bonwick@Sun.COM>
Thu, 29 Apr 2010 13:37:53 -0700
changeset 12285 d736d62dcca2
parent 12284 3d1135425dbe
child 12286 2ba5ef27b57b
6944833 Avoid prefetching dbufs in dmu_sync() path
usr/src/cmd/ztest/ztest.c
usr/src/uts/common/fs/zfs/bplist.c
usr/src/uts/common/fs/zfs/dmu.c
usr/src/uts/common/fs/zfs/dmu_send.c
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/zap.c
usr/src/uts/common/fs/zfs/zap_micro.c
usr/src/uts/common/fs/zfs/zfs_sa.c
usr/src/uts/common/fs/zfs/zfs_vnops.c
usr/src/uts/common/fs/zfs/zvol.c
--- a/usr/src/cmd/ztest/ztest.c	Thu Apr 29 15:27:29 2010 -0400
+++ b/usr/src/cmd/ztest/ztest.c	Thu Apr 29 13:37:53 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -1674,7 +1673,8 @@
 		zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
 		    RL_READER);
 
-		error = dmu_buf_hold(os, object, offset, zgd, &db);
+		error = dmu_buf_hold(os, object, offset, zgd, &db,
+		    DMU_READ_NO_PREFETCH);
 
 		if (error == 0) {
 			zgd->zgd_db = db;
@@ -3603,7 +3603,7 @@
 
 			if (i == 1) {
 				VERIFY(dmu_buf_hold(os, bigobj, off,
-				    FTAG, &dbt) == 0);
+				    FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
 			}
 			if (i != 5) {
 				dmu_assign_arcbuf(bonus_db, off,
@@ -4600,7 +4600,8 @@
 	 */
 	for (int i = 0; i < copies; i++) {
 		uint64_t offset = i * blocksize;
-		VERIFY(dmu_buf_hold(os, object, offset, FTAG, &db) == 0);
+		VERIFY(dmu_buf_hold(os, object, offset, FTAG, &db,
+		    DMU_READ_NO_PREFETCH) == 0);
 		ASSERT(db->db_offset == offset);
 		ASSERT(db->db_size == blocksize);
 		ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
@@ -4616,7 +4617,8 @@
 	/*
 	 * Find out what block we got.
 	 */
-	VERIFY(dmu_buf_hold(os, object, 0, FTAG, &db) == 0);
+	VERIFY(dmu_buf_hold(os, object, 0, FTAG, &db,
+	    DMU_READ_NO_PREFETCH) == 0);
 	blk = *((dmu_buf_impl_t *)db)->db_blkptr;
 	dmu_buf_rele(db, FTAG);
 
--- a/usr/src/uts/common/fs/zfs/bplist.c	Thu Apr 29 15:27:29 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/bplist.c	Thu Apr 29 13:37:53 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/bplist.h>
@@ -154,7 +153,7 @@
 			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
 		err = dmu_buf_hold(bpl->bpl_mos,
 		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
-		    bpl, &bpl->bpl_cached_dbuf);
+		    bpl, &bpl->bpl_cached_dbuf, DMU_READ_PREFETCH);
 		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
 		    1ULL << bpl->bpl_blockshift);
 	}
--- a/usr/src/uts/common/fs/zfs/dmu.c	Thu Apr 29 15:27:29 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Thu Apr 29 13:37:53 2010 -0700
@@ -97,12 +97,16 @@
 
 int
 dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
-    void *tag, dmu_buf_t **dbp)
+    void *tag, dmu_buf_t **dbp, int flags)
 {
 	dnode_t *dn;
 	uint64_t blkid;
 	dmu_buf_impl_t *db;
 	int err;
+	int db_flags = DB_RF_CANFAIL;
+
+	if (flags & DMU_READ_NO_PREFETCH)
+		db_flags |= DB_RF_NOPREFETCH;
 
 	err = dnode_hold(os, object, FTAG, &dn);
 	if (err)
@@ -114,7 +118,7 @@
 	if (db == NULL) {
 		err = EIO;
 	} else {
-		err = dbuf_read(db, NULL, DB_RF_CANFAIL);
+		err = dbuf_read(db, NULL, db_flags);
 		if (err) {
 			dbuf_rele(db, tag);
 			db = NULL;
@@ -205,7 +209,7 @@
 
 	dnode_rele(dn, FTAG);
 
-	VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED));
+	VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH));
 
 	*dbp = &db->db;
 	return (0);
--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Thu Apr 29 15:27:29 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Thu Apr 29 13:37:53 2010 -0700
@@ -1181,7 +1181,7 @@
 	}
 
 	if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
-	    drrwbr->drr_refoffset, FTAG, &dbp))
+	    drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH))
 		return (err);
 
 	tx = dmu_tx_create(os);
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Thu Apr 29 15:27:29 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Thu Apr 29 13:37:53 2010 -0700
@@ -367,7 +367,7 @@
  * The object number must be a valid, allocated object number.
  */
 int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
-    void *tag, dmu_buf_t **);
+    void *tag, dmu_buf_t **, int flags);
 void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
 void dmu_buf_rele(dmu_buf_t *db, void *tag);
 uint64_t dmu_buf_refcount(dmu_buf_t *db);
--- a/usr/src/uts/common/fs/zfs/zap.c	Thu Apr 29 15:27:29 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/zap.c	Thu Apr 29 13:37:53 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -112,7 +111,7 @@
 	 * set up block 1 - the first leaf
 	 */
 	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
-	    1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db));
+	    1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
 	dmu_buf_will_dirty(db, tx);
 
 	l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
@@ -173,20 +172,20 @@
 
 	b = tbl->zt_blks_copied;
 	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
-	    (tbl->zt_blk + b) << bs, FTAG, &db_old);
+	    (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
 	if (err)
 		return (err);
 
 	/* first half of entries in old[b] go to new[2*b+0] */
 	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
-	    (newblk + 2*b+0) << bs, FTAG, &db_new));
+	    (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
 	dmu_buf_will_dirty(db_new, tx);
 	transfer_func(db_old->db_data, db_new->db_data, hepb);
 	dmu_buf_rele(db_new, FTAG);
 
 	/* second half of entries in old[b] go to new[2*b+1] */
 	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
-	    (newblk + 2*b+1) << bs, FTAG, &db_new));
+	    (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
 	dmu_buf_will_dirty(db_new, tx);
 	transfer_func((uint64_t *)db_old->db_data + hepb,
 	    db_new->db_data, hepb);
@@ -234,7 +233,7 @@
 	off = idx & ((1<<(bs-3))-1);
 
 	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
-	    (tbl->zt_blk + blk) << bs, FTAG, &db);
+	    (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
 	if (err)
 		return (err);
 	dmu_buf_will_dirty(db, tx);
@@ -246,7 +245,8 @@
 		dmu_buf_t *db2;
 
 		err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
-		    (tbl->zt_nextblk + blk2) << bs, FTAG, &db2);
+		    (tbl->zt_nextblk + blk2) << bs, FTAG, &db2,
+		    DMU_READ_NO_PREFETCH);
 		if (err) {
 			dmu_buf_rele(db, FTAG);
 			return (err);
@@ -277,7 +277,7 @@
 	off = idx & ((1<<(bs-3))-1);
 
 	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
-	    (tbl->zt_blk + blk) << bs, FTAG, &db);
+	    (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
 	if (err)
 		return (err);
 	*valp = ((uint64_t *)db->db_data)[off];
@@ -292,7 +292,8 @@
 		blk = (idx*2) >> (bs-3);
 
 		err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
-		    (tbl->zt_nextblk + blk) << bs, FTAG, &db);
+		    (tbl->zt_nextblk + blk) << bs, FTAG, &db,
+		    DMU_READ_NO_PREFETCH);
 		dmu_buf_rele(db, FTAG);
 	}
 	return (err);
@@ -341,7 +342,8 @@
 
 		newblk = zap_allocate_blocks(zap, 1);
 		err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
-		    newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new);
+		    newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
+		    DMU_READ_NO_PREFETCH);
 		if (err)
 			return (err);
 		dmu_buf_will_dirty(db_new, tx);
@@ -399,7 +401,8 @@
 	l->l_phys = NULL;
 
 	VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
-	    l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf));
+	    l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
+	    DMU_READ_NO_PREFETCH));
 	winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout);
 	ASSERT(winner == NULL);
 	dmu_buf_will_dirty(l->l_dbuf, tx);
@@ -502,7 +505,7 @@
 	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
 
 	err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
-	    blkid << bs, NULL, &db);
+	    blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
 	if (err)
 		return (err);
 
@@ -1195,7 +1198,7 @@
 
 			err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
 			    (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << bs,
-			    FTAG, &db);
+			    FTAG, &db, DMU_READ_NO_PREFETCH);
 			if (err == 0) {
 				zap_stats_ptrtbl(zap, db->db_data,
 				    1<<(bs-3), zs);
--- a/usr/src/uts/common/fs/zfs/zap_micro.c	Thu Apr 29 15:27:29 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/zap_micro.c	Thu Apr 29 13:37:53 2010 -0700
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zio.h>
@@ -451,7 +450,7 @@
 
 	*zapp = NULL;
 
-	err = dmu_buf_hold(os, obj, 0, NULL, &db);
+	err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
 	if (err)
 		return (err);
 
@@ -577,7 +576,7 @@
 	dmu_buf_t *db;
 	mzap_phys_t *zp;
 
-	VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db));
+	VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
 
 #ifdef ZFS_DEBUG
 	{
--- a/usr/src/uts/common/fs/zfs/zfs_sa.c	Thu Apr 29 15:27:29 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/zfs_sa.c	Thu Apr 29 13:37:53 2010 -0700
@@ -83,7 +83,7 @@
 	} else {
 		dmu_buf_t *dbp;
 		if ((error = dmu_buf_hold(zp->z_zfsvfs->z_os, zp->z_id,
-		    0, FTAG, &dbp)) == 0) {
+		    0, FTAG, &dbp, DMU_READ_NO_PREFETCH)) == 0) {
 			error = uiomove(dbp->db_data,
 			    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
 			dmu_buf_rele(dbp, FTAG);
@@ -109,7 +109,7 @@
 
 		zfs_grow_blocksize(zp, len, tx);
 		VERIFY(0 == dmu_buf_hold(zp->z_zfsvfs->z_os,
-		    zp->z_id, 0, FTAG, &dbp));
+		    zp->z_id, 0, FTAG, &dbp, DMU_READ_NO_PREFETCH));
 
 		dmu_buf_will_dirty(dbp, tx);
 
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c	Thu Apr 29 15:27:29 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c	Thu Apr 29 13:37:53 2010 -0700
@@ -1032,7 +1032,8 @@
 		}
 #endif
 		if (error == 0)
-			error = dmu_buf_hold(os, object, offset, zgd, &db);
+			error = dmu_buf_hold(os, object, offset, zgd, &db,
+			    DMU_READ_NO_PREFETCH);
 
 		if (error == 0) {
 			zgd->zgd_db = db;
@@ -3655,7 +3656,6 @@
 	 * Create a new object for the symlink.
 	 * for version 4 ZPL datsets the symlink will be an SA attribute
 	 */
-
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	if (fuid_dirtied)
--- a/usr/src/uts/common/fs/zfs/zvol.c	Thu Apr 29 15:27:29 2010 -0400
+++ b/usr/src/uts/common/fs/zfs/zvol.c	Thu Apr 29 13:37:53 2010 -0700
@@ -949,7 +949,8 @@
 	} else {
 		size = zv->zv_volblocksize;
 		offset = P2ALIGN(offset, size);
-		error = dmu_buf_hold(os, object, offset, zgd, &db);
+		error = dmu_buf_hold(os, object, offset, zgd, &db,
+		    DMU_READ_NO_PREFETCH);
 		if (error == 0) {
 			zgd->zgd_db = db;
 			zgd->zgd_bp = bp;