6875779 zfs user accounting callbacks can be simplified
6771468 ::blkptr prints incorrectly on 32-bit
6832861 zcmd_alloc_dst_nvlist's default size is too small
6876808 want ::refcount to print refcount_t details
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c Fri Aug 28 11:22:11 2009 -0700
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c Fri Aug 28 13:57:58 2009 -0700
@@ -36,6 +36,7 @@
#include <sys/spa_impl.h>
#include <sys/vdev_impl.h>
#include <sys/zio_compress.h>
+#include <ctype.h>
#ifndef _KERNEL
#include "../genunix/list.h"
@@ -450,7 +451,7 @@
DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva));
mdb_printf("DVA[%d]: GANG: %-5s GRID: %04x\t"
"ASIZE: %llx\n", i, DVA_GET_GANG(dva) ? "TRUE" : "FALSE",
- DVA_GET_GRID(dva), DVA_GET_ASIZE(dva));
+ (int)DVA_GET_GRID(dva), DVA_GET_ASIZE(dva));
mdb_printf("DVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", i,
DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), BP_GET_PSIZE(&bp),
BP_SHOULD_BYTESWAP(&bp) ? "e" : "",
@@ -464,7 +465,7 @@
BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG",
doti[BP_GET_TYPE(&bp)].ot_name);
mdb_printf("BIRTH: %-16llx LEVEL: %-2d\tFILL: %llx\n",
- bp.blk_birth, BP_GET_LEVEL(&bp), bp.blk_fill);
+ bp.blk_birth, (int)BP_GET_LEVEL(&bp), bp.blk_fill);
mdb_printf("CKFUNC: %-16s\t\tCOMP: %s\n",
zci[BP_GET_CHECKSUM(&bp)].ci_name,
zct[BP_GET_COMPRESS(&bp)].ci_name);
@@ -2145,6 +2146,114 @@
return (DCMD_OK);
}
+/* ARGSUSED */
+static int
+reference_cb(uintptr_t addr, const void *ignored, void *arg)
+{
+ static int gotid;
+ static mdb_ctf_id_t ref_id;
+ uintptr_t ref_holder;
+ uintptr_t ref_removed;
+ uint64_t ref_number;
+ boolean_t holder_is_str;
+ char holder_str[128];
+ boolean_t removed = (boolean_t)arg;
+
+ if (!gotid) {
+ if (mdb_ctf_lookup_by_name("struct reference", &ref_id) == -1) {
+ mdb_warn("couldn't find struct reference");
+ return (WALK_ERR);
+ }
+ gotid = TRUE;
+ }
+
+ if (GETMEMBID(addr, &ref_id, ref_holder, ref_holder) ||
+ GETMEMBID(addr, &ref_id, ref_removed, ref_removed) ||
+ GETMEMBID(addr, &ref_id, ref_number, ref_number))
+ return (WALK_ERR);
+
+ if (mdb_readstr(holder_str, sizeof (holder_str), ref_holder) != -1) {
+ char *cp;
+ holder_is_str = B_TRUE;
+ for (cp = holder_str; *cp; cp++) {
+ if (!isprint(*cp)) {
+ holder_is_str = B_FALSE;
+ break;
+ }
+ }
+ } else {
+ holder_is_str = B_FALSE;
+ }
+
+ if (removed)
+ mdb_printf("removed ");
+ mdb_printf("reference ");
+ if (ref_number != 1)
+ mdb_printf("with count=%llu ", ref_number);
+ mdb_printf("with tag %p", (void*)ref_holder);
+ if (holder_is_str)
+ mdb_printf(" \"%s\"", holder_str);
+ mdb_printf(", held at:\n");
+
+ (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL);
+
+ if (removed) {
+ mdb_printf("removed at:\n");
+ (void) mdb_call_dcmd("whatis", ref_removed,
+ DCMD_ADDRSPEC, 0, NULL);
+ }
+
+ mdb_printf("\n");
+
+ return (WALK_NEXT);
+}
+
+/* ARGSUSED */
+static int
+refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uint64_t rc_count, rc_removed_count;
+ uintptr_t rc_list, rc_removed;
+ static int gotid;
+ static mdb_ctf_id_t rc_id;
+ ulong_t off;
+
+ if (!(flags & DCMD_ADDRSPEC))
+ return (DCMD_USAGE);
+
+ if (!gotid) {
+ if (mdb_ctf_lookup_by_name("struct refcount", &rc_id) == -1) {
+ mdb_warn("couldn't find struct refcount");
+ return (DCMD_ERR);
+ }
+ gotid = TRUE;
+ }
+
+ if (GETMEMBID(addr, &rc_id, rc_count, rc_count) ||
+ GETMEMBID(addr, &rc_id, rc_removed_count, rc_removed_count))
+ return (DCMD_ERR);
+
+ mdb_printf("refcount_t at %p has %llu current holds, "
+ "%llu recently released holds\n",
+ addr, (longlong_t)rc_count, (longlong_t)rc_removed_count);
+
+ if (rc_count > 0)
+ mdb_printf("current holds:\n");
+ if (mdb_ctf_offsetof(rc_id, "rc_list", &off) == -1)
+ return (DCMD_ERR);
+ rc_list = addr + off/NBBY;
+ mdb_pwalk("list", reference_cb, (void*)B_FALSE, rc_list);
+
+ if (rc_removed_count > 0)
+ mdb_printf("released holds:\n");
+ if (mdb_ctf_offsetof(rc_id, "rc_removed", &off) == -1)
+ return (DCMD_ERR);
+ rc_removed = addr + off/NBBY;
+ mdb_pwalk("list", reference_cb, (void*)B_TRUE, rc_removed);
+
+ return (DCMD_OK);
+}
+
/*
* MDB module linkage information:
*
@@ -2186,6 +2295,7 @@
"given a spa_t, print block type stats from last scrub",
zfs_blkstats },
{ "zfs_params", "", "print zfs tunable parameters", zfs_params },
+ { "refcount", "", "print refcount_t holders", refcount },
{ NULL }
};
--- a/usr/src/lib/libzfs/common/libzfs_util.c Fri Aug 28 11:22:11 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_util.c Fri Aug 28 13:57:58 2009 -0700
@@ -688,7 +688,7 @@
zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
{
if (len == 0)
- len = 2048;
+ len = 4*1024;
zc->zc_nvlist_dst_size = len;
if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == NULL)
--- a/usr/src/uts/common/fs/zfs/arc.c Fri Aug 28 11:22:11 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/arc.c Fri Aug 28 13:57:58 2009 -0700
@@ -2842,41 +2842,6 @@
return (0);
}
-/*
- * arc_read() variant to support pool traversal. If the block is already
- * in the ARC, make a copy of it; otherwise, the caller will do the I/O.
- * The idea is that we don't want pool traversal filling up memory, but
- * if the ARC already has the data anyway, we shouldn't pay for the I/O.
- */
-int
-arc_tryread(spa_t *spa, blkptr_t *bp, void *data)
-{
- arc_buf_hdr_t *hdr;
- kmutex_t *hash_mtx;
- uint64_t guid = spa_guid(spa);
- int rc = 0;
-
- hdr = buf_hash_find(guid, BP_IDENTITY(bp), bp->blk_birth, &hash_mtx);
-
- if (hdr && hdr->b_datacnt > 0 && !HDR_IO_IN_PROGRESS(hdr)) {
- arc_buf_t *buf = hdr->b_buf;
-
- ASSERT(buf);
- while (buf->b_data == NULL) {
- buf = buf->b_next;
- ASSERT(buf);
- }
- bcopy(buf->b_data, data, hdr->b_size);
- } else {
- rc = ENOENT;
- }
-
- if (hash_mtx)
- mutex_exit(hash_mtx);
-
- return (rc);
-}
-
void
arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private)
{
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c Fri Aug 28 11:22:11 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c Fri Aug 28 13:57:58 2009 -0700
@@ -1017,18 +1017,39 @@
os->os_userused_dnode);
}
+static void
+do_userquota_callback(objset_t *os, dnode_phys_t *dnp,
+ boolean_t subtract, dmu_tx_t *tx)
+{
+ static const char zerobuf[DN_MAX_BONUSLEN] = {0};
+ uint64_t user, group;
+
+ ASSERT(dnp->dn_type != 0 ||
+ (bcmp(DN_BONUS(dnp), zerobuf, DN_MAX_BONUSLEN) == 0 &&
+ DN_USED_BYTES(dnp) == 0));
+
+ if ((dnp->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) &&
+ 0 == used_cbs[os->os_phys->os_type](dnp->dn_bonustype,
+ DN_BONUS(dnp), &user, &group)) {
+ int64_t delta = DNODE_SIZE + DN_USED_BYTES(dnp);
+ if (subtract)
+ delta = -delta;
+ VERIFY(0 == zap_increment_int(os, DMU_USERUSED_OBJECT,
+ user, delta, tx));
+ VERIFY(0 == zap_increment_int(os, DMU_GROUPUSED_OBJECT,
+ group, delta, tx));
+ }
+}
+
void
dmu_objset_do_userquota_callbacks(objset_t *os, dmu_tx_t *tx)
{
dnode_t *dn;
list_t *list = &os->os_synced_dnodes;
- static const char zerobuf[DN_MAX_BONUSLEN] = {0};
ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
while (dn = list_head(list)) {
- dmu_object_type_t bonustype;
-
ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
ASSERT(dn->dn_oldphys);
ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
@@ -1046,31 +1067,14 @@
}
/*
- * If the object was not previously
- * accounted, pretend that it was free.
- */
- if (!(dn->dn_oldphys->dn_flags &
- DNODE_FLAG_USERUSED_ACCOUNTED)) {
- bzero(dn->dn_oldphys, sizeof (dnode_phys_t));
- }
-
- /*
- * If the object was freed, use the previous bonustype.
+ * We intentionally modify the zap object even if the
+ * net delta (due to phys-oldphys) is zero. Otherwise
+ * the block of the zap obj could be shared between
+ * datasets but need to be different between them after
+ * a bprewrite.
*/
- bonustype = dn->dn_phys->dn_bonustype ?
- dn->dn_phys->dn_bonustype : dn->dn_oldphys->dn_bonustype;
- ASSERT(dn->dn_phys->dn_type != 0 ||
- (bcmp(DN_BONUS(dn->dn_phys), zerobuf,
- DN_MAX_BONUSLEN) == 0 &&
- DN_USED_BYTES(dn->dn_phys) == 0));
- ASSERT(dn->dn_oldphys->dn_type != 0 ||
- (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf,
- DN_MAX_BONUSLEN) == 0 &&
- DN_USED_BYTES(dn->dn_oldphys) == 0));
- used_cbs[os->os_phys->os_type](os, bonustype,
- DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys),
- DN_USED_BYTES(dn->dn_oldphys),
- DN_USED_BYTES(dn->dn_phys), tx);
+ do_userquota_callback(os, dn->dn_oldphys, B_TRUE, tx);
+ do_userquota_callback(os, dn->dn_phys, B_FALSE, tx);
/*
* The mutex is needed here for interlock with dnode_allocate.
--- a/usr/src/uts/common/fs/zfs/sys/arc.h Fri Aug 28 11:22:11 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h Fri Aug 28 13:57:58 2009 -0700
@@ -120,7 +120,6 @@
int zio_flags, const zbookmark_t *zb);
int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private, uint32_t arc_flags);
-int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
int arc_buf_evict(arc_buf_t *buf);
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h Fri Aug 28 11:22:11 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h Fri Aug 28 13:57:58 2009 -0700
@@ -581,9 +581,8 @@
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
uint64_t *idp, uint64_t *offp);
-typedef void objset_used_cb_t(objset_t *os, dmu_object_type_t bonustype,
- void *oldbonus, void *newbonus, uint64_t oldused, uint64_t newused,
- dmu_tx_t *tx);
+typedef int objset_used_cb_t(dmu_object_type_t bonustype,
+ void *bonus, uint64_t *userp, uint64_t *groupp);
extern void dmu_objset_register_type(dmu_objset_type_t ost,
objset_used_cb_t *cb);
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
--- a/usr/src/uts/common/fs/zfs/sys/zap.h Fri Aug 28 11:22:11 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zap.h Fri Aug 28 13:57:58 2009 -0700
@@ -255,6 +255,8 @@
int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx);
int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value);
+int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
+ dmu_tx_t *tx);
struct zap;
struct zap_leaf;
--- a/usr/src/uts/common/fs/zfs/zap.c Fri Aug 28 11:22:11 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zap.c Fri Aug 28 13:57:58 2009 -0700
@@ -978,6 +978,30 @@
return (zap_lookup(os, obj, name, 8, 1, &value));
}
+int
+zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
+ dmu_tx_t *tx)
+{
+ char name[20];
+ uint64_t value = 0;
+ int err;
+
+ if (delta == 0)
+ return (0);
+
+ (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
+ err = zap_lookup(os, obj, name, 8, 1, &value);
+ if (err != 0 && err != ENOENT)
+ return (err);
+ value += delta;
+ if (value == 0)
+ err = zap_remove(os, obj, name, tx);
+ else
+ err = zap_update(os, obj, name, 8, 1, &value, tx);
+ return (err);
+}
+
+
/*
* Routines for iterating over the attributes.
*/
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c Fri Aug 28 11:22:11 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c Fri Aug 28 13:57:58 2009 -0700
@@ -594,36 +594,18 @@
ASSERT(err == 0);
}
-static void
-zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype,
- void *oldbonus, void *newbonus,
- uint64_t oldused, uint64_t newused, dmu_tx_t *tx)
+static int
+zfs_space_delta_cb(dmu_object_type_t bonustype, void *bonus,
+ uint64_t *userp, uint64_t *groupp)
{
- znode_phys_t *oldznp = oldbonus;
- znode_phys_t *newznp = newbonus;
+ znode_phys_t *znp = bonus;
if (bonustype != DMU_OT_ZNODE)
- return;
-
- /* We charge 512 for the dnode (if it's allocated). */
- if (oldznp->zp_gen != 0)
- oldused += DNODE_SIZE;
- if (newznp->zp_gen != 0)
- newused += DNODE_SIZE;
+ return (ENOENT);
- if (oldznp->zp_uid == newznp->zp_uid) {
- uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx);
- } else {
- uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx);
- uidacct(os, B_FALSE, newznp->zp_uid, newused, tx);
- }
-
- if (oldznp->zp_gid == newznp->zp_gid) {
- uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx);
- } else {
- uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx);
- uidacct(os, B_TRUE, newznp->zp_gid, newused, tx);
- }
+ *userp = znp->zp_uid;
+ *groupp = znp->zp_gid;
+ return (0);
}
static void