6572591 meta dnode lookup causes bucket lock contention in dbuf hash
authorTim Haley <Tim.Haley@Sun.COM>
Fri, 09 Apr 2010 22:09:40 -0600
changeset 12123 bb40732a982e
parent 12122 7f066dcce3ad
child 12124 0fc07fabc32e
6572591 meta dnode lookup causes bucket lock contention in dbuf hash
usr/src/uts/common/fs/zfs/dmu.c
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/zfs_vnops.c
usr/src/uts/common/fs/zfs/zvol.c
--- a/usr/src/uts/common/fs/zfs/dmu.c	Fri Apr 09 16:58:13 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Fri Apr 09 22:09:40 2010 -0600
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/dmu.h>
@@ -928,19 +927,16 @@
 	return (err);
 }
 
-int
-dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
-    dmu_tx_t *tx)
+static int
+dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
 {
 	dmu_buf_t **dbp;
-	int numbufs, i;
+	int numbufs;
 	int err = 0;
+	int i;
 
-	if (size == 0)
-		return (0);
-
-	err = dmu_buf_hold_array(os, object, uio->uio_loffset, size,
-	    FALSE, FTAG, &numbufs, &dbp);
+	err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
+	    FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
 	if (err)
 		return (err);
 
@@ -978,11 +974,44 @@
 
 		size -= tocpy;
 	}
+
 	dmu_buf_rele_array(dbp, numbufs, FTAG);
 	return (err);
 }
 
 int
+dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size,
+    dmu_tx_t *tx)
+{
+	if (size == 0)
+		return (0);
+
+	return (dmu_write_uio_dnode(((dmu_buf_impl_t *)zdb)->db_dnode,
+	    uio, size, tx));
+}
+
+int
+dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size,
+    dmu_tx_t *tx)
+{
+	dnode_t *dn;
+	int err;
+
+	if (size == 0)
+		return (0);
+
+	err = dnode_hold(os, object, FTAG, &dn);
+	if (err)
+		return (err);
+
+	err = dmu_write_uio_dnode(dn, uio, size, tx);
+
+	dnode_rele(dn, FTAG);
+
+	return (err);
+}
+
+int
 dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
     page_t *pp, dmu_tx_t *tx)
 {
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Fri Apr 09 16:58:13 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Fri Apr 09 22:09:40 2010 -0600
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_DMU_H
@@ -520,6 +519,8 @@
 int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
 int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
     dmu_tx_t *tx);
+int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
+    dmu_tx_t *tx);
 int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
     uint64_t size, struct page *pp, dmu_tx_t *tx);
 struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c	Fri Apr 09 16:58:13 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c	Fri Apr 09 22:09:40 2010 -0600
@@ -812,8 +812,8 @@
 
 		if (abuf == NULL) {
 			tx_bytes = uio->uio_resid;
-			error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio,
-			    nbytes, tx);
+			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
+			    uio, nbytes, tx);
 			tx_bytes -= uio->uio_resid;
 		} else {
 			tx_bytes = nbytes;
--- a/usr/src/uts/common/fs/zfs/zvol.c	Fri Apr 09 16:58:13 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zvol.c	Fri Apr 09 22:09:40 2010 -0600
@@ -114,6 +114,7 @@
 	zilog_t		*zv_zilog;	/* ZIL handle */
 	list_t		zv_extents;	/* List of extents for dump */
 	znode_t		zv_znode;	/* for range locking */
+	dmu_buf_t	*zv_dbuf;	/* bonus handle */
 } zvol_state_t;
 
 /*
@@ -599,6 +600,11 @@
 		return (error);
 	}
 	zv->zv_objset = os;
+	error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
+	if (error) {
+		dmu_objset_disown(os, zvol_tag);
+		return (error);
+	}
 	zv->zv_volsize = volsize;
 	zv->zv_zilog = zil_open(os, zvol_get_data);
 	zvol_size_changed(zv->zv_volsize, ddi_driver_major(zfs_dip),
@@ -618,6 +624,8 @@
 {
 	zil_close(zv->zv_zilog);
 	zv->zv_zilog = NULL;
+	dmu_buf_rele(zv->zv_dbuf, zvol_tag);
+	zv->zv_dbuf = NULL;
 	dmu_objset_disown(zv->zv_objset, zvol_tag);
 	zv->zv_objset = NULL;
 }
@@ -1372,7 +1380,7 @@
 			dmu_tx_abort(tx);
 			break;
 		}
-		error = dmu_write_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes, tx);
+		error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx);
 		if (error == 0)
 			zvol_log_write(zv, tx, off, bytes, sync);
 		dmu_tx_commit(tx);