--- a/usr/src/uts/common/fs/zfs/arc.c Fri Nov 10 20:26:18 2006 -0800
+++ b/usr/src/uts/common/fs/zfs/arc.c Sat Nov 11 09:04:24 2006 -0800
@@ -113,6 +113,7 @@
#include <sys/spa.h>
#include <sys/zio.h>
+#include <sys/zio_checksum.h>
#include <sys/zfs_context.h>
#include <sys/arc.h>
#include <sys/refcount.h>
@@ -239,6 +240,9 @@
uint64_t b_birth;
uint64_t b_cksum0;
+ kmutex_t b_freeze_lock;
+ zio_cksum_t *b_freeze_cksum;
+
arc_buf_hdr_t *b_hash_next;
arc_buf_t *b_buf;
uint32_t b_flags;
@@ -542,6 +546,69 @@
#define ARC_MINTIME (hz>>4) /* 62 ms */
static void
+arc_cksum_verify(arc_buf_t *buf)
+{
+ zio_cksum_t zc;
+
+ if (!zfs_flags & ZFS_DEBUG_MODIFY)
+ return;
+
+ mutex_enter(&buf->b_hdr->b_freeze_lock);
+ if (buf->b_hdr->b_freeze_cksum == NULL) {
+ mutex_exit(&buf->b_hdr->b_freeze_lock);
+ return;
+ }
+ fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
+ if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc))
+ panic("buffer modified while frozen!");
+ mutex_exit(&buf->b_hdr->b_freeze_lock);
+}
+
+static void
+arc_cksum_compute(arc_buf_t *buf)
+{
+ if (!zfs_flags & ZFS_DEBUG_MODIFY)
+ return;
+
+ mutex_enter(&buf->b_hdr->b_freeze_lock);
+ if (buf->b_hdr->b_freeze_cksum != NULL) {
+ mutex_exit(&buf->b_hdr->b_freeze_lock);
+ return;
+ }
+ buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP);
+ fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
+ buf->b_hdr->b_freeze_cksum);
+ mutex_exit(&buf->b_hdr->b_freeze_lock);
+}
+
+void
+arc_buf_thaw(arc_buf_t *buf)
+{
+ if (!zfs_flags & ZFS_DEBUG_MODIFY)
+ return;
+
+ if (buf->b_hdr->b_state != arc.anon)
+ panic("modifying non-anon buffer!");
+ if (buf->b_hdr->b_flags & ARC_IO_IN_PROGRESS)
+ panic("modifying buffer while i/o in progress!");
+ arc_cksum_verify(buf);
+ mutex_enter(&buf->b_hdr->b_freeze_lock);
+ if (buf->b_hdr->b_freeze_cksum != NULL) {
+ kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t));
+ buf->b_hdr->b_freeze_cksum = NULL;
+ }
+ mutex_exit(&buf->b_hdr->b_freeze_lock);
+}
+
+void
+arc_buf_freeze(arc_buf_t *buf)
+{
+ ASSERT(buf->b_hdr->b_freeze_cksum != NULL ||
+ buf->b_hdr->b_state == arc.anon);
+ arc_cksum_compute(buf);
+}
+
+static void
add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag)
{
ASSERT(MUTEX_HELD(hash_lock));
@@ -772,6 +839,7 @@
arc_state_t *state = buf->b_hdr->b_state;
uint64_t size = buf->b_hdr->b_size;
+ arc_cksum_verify(buf);
if (!recycle) {
zio_buf_free(buf->b_data, size);
atomic_add_64(&arc.size, -size);
@@ -834,6 +902,10 @@
arc_buf_destroy(hdr->b_buf, FALSE, TRUE);
}
}
+ if (hdr->b_freeze_cksum != NULL) {
+ kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
+ hdr->b_freeze_cksum = NULL;
+ }
ASSERT(!list_link_active(&hdr->b_arc_node));
ASSERT3P(hdr->b_hash_next, ==, NULL);
@@ -1671,7 +1743,7 @@
* read.
*/
found = buf_hash_find(zio->io_spa, &hdr->b_dva, hdr->b_birth,
- &hash_lock);
+ &hash_lock);
ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) && hash_lock == NULL) ||
(found == hdr && DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))));
@@ -1682,6 +1754,8 @@
if (BP_SHOULD_BYTESWAP(zio->io_bp) && callback_list->acb_byteswap)
callback_list->acb_byteswap(buf->b_data, hdr->b_size);
+ arc_cksum_compute(buf);
+
/* create copies of the data buffer for the callers */
abuf = buf;
for (acb = callback_list; acb; acb = acb->acb_next) {
@@ -2106,6 +2180,7 @@
ASSERT3U(refcount_count(&hdr->b_refcnt), ==, 1);
ASSERT(BUF_EMPTY(hdr));
ASSERT(buf->b_efunc == NULL);
+ arc_buf_thaw(buf);
return;
}
@@ -2149,6 +2224,9 @@
nhdr->b_arc_access = 0;
nhdr->b_flags = 0;
nhdr->b_datacnt = 1;
+ nhdr->b_freeze_cksum =
+ kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP);
+ *nhdr->b_freeze_cksum = *hdr->b_freeze_cksum; /* struct copy */
buf->b_hdr = nhdr;
buf->b_next = NULL;
(void) refcount_add(&nhdr->b_refcnt, tag);
@@ -2168,6 +2246,7 @@
}
buf->b_efunc = NULL;
buf->b_private = NULL;
+ arc_buf_thaw(buf);
}
int
@@ -2219,6 +2298,8 @@
arc_buf_hdr_t *exists;
kmutex_t *hash_lock;
+ arc_cksum_verify(buf);
+
exists = buf_hash_insert(hdr, &hash_lock);
if (exists) {
/*
@@ -2287,6 +2368,7 @@
acb->acb_byteswap = (arc_byteswap_func_t *)-1;
hdr->b_acb = acb;
hdr->b_flags |= ARC_IO_IN_PROGRESS;
+ arc_cksum_compute(buf);
rzio = zio_write(pio, spa, checksum, compress, ncopies, txg, bp,
buf->b_data, hdr->b_size, arc_write_done, buf, priority, flags, zb);
--- a/usr/src/uts/common/fs/zfs/dbuf.c Fri Nov 10 20:26:18 2006 -0800
+++ b/usr/src/uts/common/fs/zfs/dbuf.c Sat Nov 11 09:04:24 2006 -0800
@@ -448,6 +448,7 @@
/* we were freed in flight; disregard any error */
arc_release(buf, db);
bzero(buf->b_data, db->db.db_size);
+ arc_buf_freeze(buf);
db->db_d.db_freed_in_flight = FALSE;
dbuf_set_data(db, buf);
db->db_state = DB_CACHED;
@@ -647,8 +648,8 @@
ASSERT(db->db.db_data != NULL);
ASSERT(db->db_blkid != DB_BONUS_BLKID);
- quiescing = (arc_buf_t **)&db->db_d.db_data_old[(txg-1)&TXG_MASK];
- syncing = (arc_buf_t **)&db->db_d.db_data_old[(txg-2)&TXG_MASK];
+ quiescing = &db->db_d.db_data_old[(txg-1)&TXG_MASK];
+ syncing = &db->db_d.db_data_old[(txg-2)&TXG_MASK];
/*
* If this buffer is referenced from the current quiescing
@@ -701,7 +702,7 @@
static void
dbuf_fix_old_bonus_data(dmu_buf_impl_t *db, uint64_t txg)
{
- void **quiescing, **syncing;
+ arc_buf_t **quiescing, **syncing;
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(db->db.db_data != NULL);
@@ -742,7 +743,14 @@
kmem_free(db->db_d.db_overridden_by[txg&TXG_MASK],
sizeof (blkptr_t));
db->db_d.db_overridden_by[txg&TXG_MASK] = NULL;
- /* release the already-written buffer */
+ /*
+ * Release the already-written buffer, so we leave it in
+ * a consistent dirty state. Note that all callers are
+ * modifying the buffer, so they will immediately do
+ * another (redundant) arc_release(). Therefore, leave
+ * the buf thawed to save the effort of freezing &
+ * immediately re-thawing it.
+ */
arc_release(db->db_d.db_data_old[txg&TXG_MASK], db);
}
}
@@ -811,6 +819,7 @@
ASSERT(db->db.db_data != NULL);
arc_release(db->db_buf, db);
bzero(db->db.db_data, db->db.db_size);
+ arc_buf_freeze(db->db_buf);
}
mutex_exit(&db->db_mtx);
@@ -959,6 +968,10 @@
* If this buffer is already dirty, we're done.
*/
if (list_link_active(&db->db_dirty_node[txgoff])) {
+ if (db->db_blkid != DB_BONUS_BLKID && db->db_level == 0 &&
+ db->db.db_object != DMU_META_DNODE_OBJECT)
+ arc_buf_thaw(db->db_buf);
+
mutex_exit(&db->db_mtx);
return;
}
@@ -1647,6 +1660,9 @@
holds = refcount_remove(&db->db_holds, tag);
ASSERT(holds >= 0);
+ if (db->db_buf && holds == db->db_dirtycnt)
+ arc_buf_freeze(db->db_buf);
+
if (holds == db->db_dirtycnt &&
db->db_level == 0 && db->db_d.db_immediate_evict)
dbuf_evict_user(db);
@@ -1662,7 +1678,7 @@
*/
ASSERT3U(db->db_state, ==, DB_UNCACHED);
dbuf_evict(db);
- } else if (arc_released(db->db_buf)) {
+ } else if (arc_released(db->db_buf)) {
arc_buf_t *buf = db->db_buf;
/*
* This dbuf has anonymous data associated with it.
@@ -1777,7 +1793,7 @@
*/
if (db->db_blkid == DB_BONUS_BLKID) {
- void **datap = &db->db_d.db_data_old[txg&TXG_MASK];
+ arc_buf_t **datap = &db->db_d.db_data_old[txg&TXG_MASK];
/*
* Simply copy the bonus data into the dnode. It will
* be written out when the dnode is synced (and it will
@@ -1807,7 +1823,7 @@
}
if (db->db_level == 0) {
- data = (arc_buf_t **)&db->db_d.db_data_old[txg&TXG_MASK];
+ data = &db->db_d.db_data_old[txg&TXG_MASK];
blksz = arc_buf_size(*data);
/*
@@ -1963,7 +1979,7 @@
* We may have read this indirect block after we dirtied it,
* so never released it from the cache.
*/
- arc_release(parent->db_buf, db->db_parent);
+ arc_release(parent->db_buf, parent);
db->db_blkptr = (blkptr_t *)parent->db.db_data +
(db->db_blkid & ((1ULL << epbs) - 1));
@@ -1984,8 +2000,7 @@
if (db->db_level == 0 &&
db->db_d.db_overridden_by[txg&TXG_MASK] != NULL) {
- arc_buf_t **old =
- (arc_buf_t **)&db->db_d.db_data_old[txg&TXG_MASK];
+ arc_buf_t **old = &db->db_d.db_data_old[txg&TXG_MASK];
blkptr_t **bpp = &db->db_d.db_overridden_by[txg&TXG_MASK];
int old_size = bp_get_dasize(os->os_spa, db->db_blkptr);
int new_size = bp_get_dasize(os->os_spa, *bpp);
@@ -2130,8 +2145,7 @@
db->db_dirtied = 0;
if (db->db_level == 0) {
- arc_buf_t **old =
- (arc_buf_t **)&db->db_d.db_data_old[txg&TXG_MASK];
+ arc_buf_t **old = &db->db_d.db_data_old[txg&TXG_MASK];
ASSERT(db->db_blkid != DB_BONUS_BLKID);
--- a/usr/src/uts/common/fs/zfs/dmu_send.c Fri Nov 10 20:26:18 2006 -0800
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c Sat Nov 11 09:04:24 2006 -0800
@@ -896,10 +896,7 @@
* everything before the DRR_END record.
*/
if (drre.drr_checksum.zc_word[0] != 0 &&
- ((drre.drr_checksum.zc_word[0] - pzc.zc_word[0]) |
- (drre.drr_checksum.zc_word[1] - pzc.zc_word[1]) |
- (drre.drr_checksum.zc_word[2] - pzc.zc_word[2]) |
- (drre.drr_checksum.zc_word[3] - pzc.zc_word[3]))) {
+ !ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pzc)) {
ra.err = ECKSUM;
goto out;
}
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c Fri Nov 10 20:26:18 2006 -0800
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c Sat Nov 11 09:04:24 2006 -0800
@@ -62,6 +62,7 @@
db->db.db_size);
bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
sizeof (blkptr_t) * dn->dn_phys->dn_nblkptr);
+ arc_buf_freeze(db->db_buf);
}
dn->dn_phys->dn_nlevels += 1;
@@ -164,8 +165,7 @@
/* db_data_old better be zeroed */
if (child->db_d.db_data_old[txg & TXG_MASK]) {
- buf = ((arc_buf_t *)child->db_d.db_data_old
- [txg & TXG_MASK])->b_data;
+ buf = child->db_d.db_data_old[txg & TXG_MASK]->b_data;
for (j = 0; j < child->db.db_size >> 3; j++) {
if (buf[j] != 0) {
panic("freed data not zero: "
@@ -238,6 +238,7 @@
if (db->db_level == 1) {
FREE_VERIFY(db, start, end, tx);
free_blocks(dn, bp, end-start+1, tx);
+ arc_buf_freeze(db->db_buf);
ASSERT(all || list_link_active(&db->db_dirty_node[txgoff]));
return (all);
}
@@ -258,6 +259,7 @@
}
dbuf_rele(subdb, FTAG);
}
+ arc_buf_freeze(db->db_buf);
#ifdef ZFS_DEBUG
bp -= (end-start)+1;
for (i = start; i <= end; i++, bp++) {
--- a/usr/src/uts/common/fs/zfs/sys/arc.h Fri Nov 10 20:26:18 2006 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h Sat Nov 11 09:04:24 2006 -0800
@@ -69,6 +69,8 @@
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
int arc_has_callback(arc_buf_t *buf);
+void arc_buf_freeze(arc_buf_t *buf);
+void arc_buf_thaw(arc_buf_t *buf);
#ifdef ZFS_DEBUG
int arc_referenced(arc_buf_t *buf);
#endif
--- a/usr/src/uts/common/fs/zfs/sys/dbuf.h Fri Nov 10 20:26:18 2006 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h Sat Nov 11 09:04:24 2006 -0800
@@ -196,7 +196,7 @@
* modify (dirty or clean). db_mtx must be held
* before dn_dirty_mtx.
*/
- void *db_data_old[TXG_SIZE];
+ arc_buf_t *db_data_old[TXG_SIZE];
blkptr_t *db_overridden_by[TXG_SIZE];
} db_d;
} dmu_buf_impl_t;
--- a/usr/src/uts/common/fs/zfs/sys/spa.h Fri Nov 10 20:26:18 2006 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h Sat Nov 11 09:04:24 2006 -0800
@@ -252,6 +252,13 @@
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
(dva1)->dva_word[0] == (dva2)->dva_word[0])
+#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
+ (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
+ ((zc1).zc_word[1] - (zc2).zc_word[1]) | \
+ ((zc1).zc_word[2] - (zc2).zc_word[2]) | \
+ ((zc1).zc_word[3] - (zc2).zc_word[3])))
+
+
#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)
#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
--- a/usr/src/uts/common/fs/zfs/sys/zfs_debug.h Fri Nov 10 20:26:18 2006 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_debug.h Sat Nov 11 09:04:24 2006 -0800
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -55,6 +54,7 @@
#define ZFS_DEBUG_DBUF_VERIFY 0x0002
#define ZFS_DEBUG_DNODE_VERIFY 0x0004
#define ZFS_DEBUG_SNAPNAMES 0x0008
+#define ZFS_DEBUG_MODIFY 0x0010
#ifdef ZFS_DEBUG
extern void __dprintf(const char *file, const char *func,
--- a/usr/src/uts/common/fs/zfs/zio_checksum.c Fri Nov 10 20:26:18 2006 -0800
+++ b/usr/src/uts/common/fs/zfs/zio_checksum.c Sat Nov 11 09:04:24 2006 -0800
@@ -162,10 +162,7 @@
ci->ci_func[byteswap](data, size, &actual_cksum);
}
- if ((actual_cksum.zc_word[0] - zc.zc_word[0]) |
- (actual_cksum.zc_word[1] - zc.zc_word[1]) |
- (actual_cksum.zc_word[2] - zc.zc_word[2]) |
- (actual_cksum.zc_word[3] - zc.zc_word[3]))
+ if (!ZIO_CHECKSUM_EQUAL(actual_cksum, zc))
return (ECKSUM);
if (zio_injection_enabled && !zio->io_error)