PSARC 2007/555 zfs fs-only quotas and reservations
6431277 want filesystem-only quotas
6483677 need immediate reservation
--- a/usr/src/cmd/fs.d/df.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/cmd/fs.d/df.c Mon Oct 29 22:45:33 2007 -0700
@@ -1224,55 +1224,60 @@
adjust_total_blocks(struct df_request *dfrp, fsblkcnt64_t *total,
uint64_t blocksize)
{
- zfs_handle_t *zhp;
char *dataset, *slash;
- uint64_t quota;
+ boolean_t first = TRUE;
+ uint64_t quota = 0;
- if (strcmp(DFR_FSTYPE(dfrp), MNTTYPE_ZFS) != 0 ||
- !load_libzfs())
+ if (strcmp(DFR_FSTYPE(dfrp), MNTTYPE_ZFS) != 0 || !load_libzfs())
return;
/*
* We want to get the total size for this filesystem as bounded by any
* quotas. In order to do this, we start at the current filesystem and
- * work upwards until we find a dataset with a quota. If we reach the
- * pool itself, then the total space is the amount used plus the amount
+ * work upwards looking for the smallest quota. When we reach the
+ * pool itself, the quota is the amount used plus the amount
* available.
*/
if ((dataset = strdup(DFR_SPECIAL(dfrp))) == NULL)
return;
slash = dataset + strlen(dataset);
- do {
+ while (slash != NULL) {
+ zfs_handle_t *zhp;
+ uint64_t this_quota;
+
*slash = '\0';
- if ((zhp = _zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET))
- == NULL) {
- free(dataset);
- return;
+ zhp = _zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET);
+ if (zhp == NULL)
+ break;
+
+ /* true at first iteration of loop */
+ if (first) {
+ quota = _zfs_prop_get_int(zhp, ZFS_PROP_REFQUOTA);
+ if (quota == 0)
+ quota = UINT64_MAX;
+ first = FALSE;
}
- if ((quota = _zfs_prop_get_int(zhp, ZFS_PROP_QUOTA)) != 0) {
- *total = quota / blocksize;
- _zfs_close(zhp);
- free(dataset);
- return;
+ this_quota = _zfs_prop_get_int(zhp, ZFS_PROP_QUOTA);
+ if (this_quota && this_quota < quota)
+ quota = this_quota;
+
+ /* true at last iteration of loop */
+ if ((slash = strrchr(dataset, '/')) == NULL) {
+ uint64_t size;
+
+ size = _zfs_prop_get_int(zhp, ZFS_PROP_USED) +
+ _zfs_prop_get_int(zhp, ZFS_PROP_AVAILABLE);
+ if (size < quota)
+ quota = size;
}
_zfs_close(zhp);
-
- } while ((slash = strrchr(dataset, '/')) != NULL);
-
-
- if ((zhp = _zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) {
- free(dataset);
- return;
}
- *total = (_zfs_prop_get_int(zhp, ZFS_PROP_USED) +
- _zfs_prop_get_int(zhp, ZFS_PROP_AVAILABLE)) / blocksize;
-
- _zfs_close(zhp);
+ *total = quota / blocksize;
free(dataset);
}
--- a/usr/src/cmd/zfs/zfs_main.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/cmd/zfs/zfs_main.c Mon Oct 29 22:45:33 2007 -0700
@@ -281,7 +281,7 @@
{
FILE *fp = cb;
- (void) fprintf(fp, "\t%-13s ", zfs_prop_to_name(prop));
+ (void) fprintf(fp, "\t%-14s ", zfs_prop_to_name(prop));
if (prop == ZFS_PROP_CASE)
(void) fprintf(fp, "NO ");
@@ -348,7 +348,7 @@
(void) fprintf(fp,
gettext("\nThe following properties are supported:\n"));
- (void) fprintf(fp, "\n\t%-13s %s %s %s\n\n",
+ (void) fprintf(fp, "\n\t%-14s %s %s %s\n\n",
"PROPERTY", "EDIT", "INHERIT", "VALUES");
/* Iterate over all properties */
@@ -1270,7 +1270,9 @@
(void) fprintf(stderr, gettext("'%s' property cannot "
"be inherited\n"), propname);
if (prop == ZFS_PROP_QUOTA ||
- prop == ZFS_PROP_RESERVATION)
+ prop == ZFS_PROP_RESERVATION ||
+ prop == ZFS_PROP_REFQUOTA ||
+ prop == ZFS_PROP_REFRESERVATION)
(void) fprintf(stderr, gettext("use 'zfs set "
"%s=none' to clear\n"), propname);
return (1);
--- a/usr/src/cmd/zpool/zpool_main.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/cmd/zpool/zpool_main.c Mon Oct 29 22:45:33 2007 -0700
@@ -3301,8 +3301,8 @@
(void) printf(gettext(" 6 bootfs pool property\n"));
(void) printf(gettext(" 7 Separate intent log devices\n"));
(void) printf(gettext(" 8 Delegated administration\n"));
- (void) printf(gettext(" 9 Case insensitive support and "
- "File system unique identifiers (FUID)\n"));
+ (void) printf(gettext(" 9 refquota and refreservation "
+ "properties\n"));
(void) printf(gettext("For more information on a particular "
"version, including supported releases, see:\n\n"));
(void) printf("http://www.opensolaris.org/os/community/zfs/"
@@ -3385,6 +3385,8 @@
"rollback",
"snapshot",
"filesystem version upgrade",
+ "refquota set",
+ "refreservation set",
};
/*
--- a/usr/src/common/zfs/zfs_prop.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/common/zfs/zfs_prop.c Mon Oct 29 22:45:33 2007 -0700
@@ -250,6 +250,11 @@
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size> | none", "RESERV");
register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT,
ZFS_TYPE_VOLUME, "<size>", "VOLSIZE");
+ register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT,
+ ZFS_TYPE_FILESYSTEM, "<size> | none", "REFQUOTA");
+ register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0,
+ PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "<size> | none", "REFRESERV");
/* inherit number properties */
register_number(ZFS_PROP_RECORDSIZE, "recordsize", SPA_MAXBLOCKSIZE,
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c Mon Oct 29 22:45:33 2007 -0700
@@ -772,6 +772,7 @@
switch (prop) {
case ZFS_PROP_RESERVATION:
+ case ZFS_PROP_REFRESERVATION:
if (intval > volsize) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"'%s' is greater than current "
@@ -1627,6 +1628,7 @@
*/
switch (prop) {
case ZFS_PROP_QUOTA:
+ case ZFS_PROP_REFQUOTA:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"size is less than current used or "
"reserved space"));
@@ -1634,6 +1636,7 @@
break;
case ZFS_PROP_RESERVATION:
+ case ZFS_PROP_REFRESERVATION:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"size is greater than available space"));
(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
@@ -1953,7 +1956,9 @@
break;
case ZFS_PROP_QUOTA:
+ case ZFS_PROP_REFQUOTA:
case ZFS_PROP_RESERVATION:
+ case ZFS_PROP_REFRESERVATION:
*val = getprop_uint64(zhp, prop, source);
if (*val == 0)
*source = ""; /* default */
@@ -2122,7 +2127,10 @@
break;
case ZFS_PROP_QUOTA:
+ case ZFS_PROP_REFQUOTA:
case ZFS_PROP_RESERVATION:
+ case ZFS_PROP_REFRESERVATION:
+
if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
return (-1);
--- a/usr/src/lib/libzfs/common/libzfs_util.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_util.c Mon Oct 29 22:45:33 2007 -0700
@@ -1065,7 +1065,6 @@
const char *propname;
char *value;
boolean_t isnone = B_FALSE;
- boolean_t boolval;
if (type == ZFS_TYPE_POOL) {
proptype = zpool_prop_get_type(prop);
@@ -1116,34 +1115,23 @@
/*
* Quota special: force 'none' and don't allow 0.
*/
- if ((type & ZFS_TYPE_DATASET) && *ivalp == 0 &&
- !isnone && prop == ZFS_PROP_QUOTA) {
+ if ((type & ZFS_TYPE_DATASET) && *ivalp == 0 && !isnone &&
+ (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_REFQUOTA)) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "use 'none' to disable quota"));
+ "use 'none' to disable quota/refquota"));
goto error;
}
break;
case PROP_TYPE_INDEX:
- switch (datatype) {
- case DATA_TYPE_STRING:
- (void) nvpair_value_string(elem, &value);
- break;
-
- case DATA_TYPE_BOOLEAN_VALUE:
- (void) nvpair_value_boolean_value(elem, &boolval);
- if (boolval)
- value = "on";
- else
- value = "off";
- break;
-
- default:
+ if (datatype != DATA_TYPE_STRING) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"'%s' must be a string"), nvpair_name(elem));
goto error;
}
+ (void) nvpair_value_string(elem, &value);
+
if (zprop_string_to_index(prop, value, ivalp, type) != 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"'%s' must be one of '%s'"), propname,
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_property.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_property.c Mon Oct 29 22:45:33 2007 -0700
@@ -100,6 +100,8 @@
ZFS_PROP_RESERVATION,
ZFS_PROP_USED,
ZFS_PROP_VOLSIZE,
+ ZFS_PROP_REFQUOTA,
+ ZFS_PROP_REFRESERVATION,
ZPROP_INVAL
};
--- a/usr/src/uts/common/fs/zfs/dmu_send.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c Mon Oct 29 22:45:33 2007 -0700
@@ -498,6 +498,10 @@
VERIFY(0 == dsl_dataset_open_obj(dp, dsobj, NULL,
DS_MODE_EXCLUSIVE, dmu_recv_tag, &cds));
+ /* copy the refquota from the target fs to the clone */
+ if (ohds->ds_quota > 0)
+ dsl_dataset_set_quota_sync(cds, &ohds->ds_quota, cr, tx);
+
dmu_buf_will_dirty(cds->ds_dbuf, tx);
cds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
@@ -513,6 +517,7 @@
recv_offline_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
+
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c Mon Oct 29 22:45:33 2007 -0700
@@ -294,6 +294,8 @@
txh->txh_space_tooverwrite += space;
} else {
txh->txh_space_towrite += space;
+ if (dn && dn->dn_dbuf->db_blkptr)
+ txh->txh_space_tounref += space;
}
}
@@ -319,7 +321,7 @@
dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
{
uint64_t blkid, nblks;
- uint64_t space = 0;
+ uint64_t space = 0, unref = 0;
dnode_t *dn = txh->txh_dnode;
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
@@ -383,6 +385,7 @@
dprintf_bp(bp, "can free old%s", "");
space += bp_get_dasize(spa, bp);
}
+ unref += BP_GET_ASIZE(bp);
}
nblks = 0;
}
@@ -418,6 +421,7 @@
"can free old%s", "");
space += bp_get_dasize(spa, &bp[i]);
}
+ unref += BP_GET_ASIZE(bp);
}
dbuf_rele(dbuf, FTAG);
}
@@ -432,6 +436,7 @@
rw_exit(&dn->dn_struct_rwlock);
txh->txh_space_tofree += space;
+ txh->txh_space_tounref += unref;
}
void
@@ -550,10 +555,13 @@
* the size will change between now and the dbuf dirty call.
*/
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
- dn->dn_phys->dn_blkptr[0].blk_birth))
+ dn->dn_phys->dn_blkptr[0].blk_birth)) {
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
- else
+ } else {
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
+ txh->txh_space_tounref +=
+ BP_GET_ASIZE(dn->dn_phys->dn_blkptr);
+ }
return;
}
@@ -733,8 +741,9 @@
dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
{
dmu_tx_hold_t *txh;
- uint64_t lsize, asize, fsize, towrite, tofree, tooverwrite;
spa_t *spa = tx->tx_pool->dp_spa;
+ uint64_t lsize, asize, fsize, usize;
+ uint64_t towrite, tofree, tooverwrite, tounref;
ASSERT3U(tx->tx_txg, ==, 0);
@@ -767,7 +776,7 @@
* dmu_tx_unassign() logic.
*/
- towrite = tofree = tooverwrite = 0;
+ towrite = tofree = tooverwrite = tounref = 0;
for (txh = list_head(&tx->tx_holds); txh;
txh = list_next(&tx->tx_holds, txh)) {
dnode_t *dn = txh->txh_dnode;
@@ -787,6 +796,7 @@
towrite += txh->txh_space_towrite;
tofree += txh->txh_space_tofree;
tooverwrite += txh->txh_space_tooverwrite;
+ tounref += txh->txh_space_tounref;
}
/*
@@ -813,16 +823,18 @@
fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree;
lsize = towrite + tooverwrite;
asize = spa_get_asize(tx->tx_pool->dp_spa, lsize);
+ usize = spa_get_asize(tx->tx_pool->dp_spa, tounref);
#ifdef ZFS_DEBUG
tx->tx_space_towrite = asize;
tx->tx_space_tofree = tofree;
tx->tx_space_tooverwrite = tooverwrite;
+ tx->tx_space_tounref = tounref;
#endif
if (tx->tx_dir && asize != 0) {
int err = dsl_dir_tempreserve_space(tx->tx_dir,
- lsize, asize, fsize, &tx->tx_tempreserve_cookie, tx);
+ lsize, asize, fsize, usize, &tx->tx_tempreserve_cookie, tx);
if (err)
return (err);
}
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c Mon Oct 29 22:45:33 2007 -0700
@@ -45,6 +45,7 @@
static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
static dsl_checkfunc_t dsl_dataset_rollback_check;
static dsl_syncfunc_t dsl_dataset_rollback_sync;
+static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
#define DS_REF_MAX (1ULL << 62)
@@ -67,6 +68,25 @@
DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */
};
+/*
+ * Figure out how much of this delta should be propogated to the dsl_dir
+ * layer. If there's a refreservation, that space has already been
+ * partially accounted for in our ancestors.
+ */
+static int64_t
+parent_delta(dsl_dataset_t *ds, int64_t delta)
+{
+ uint64_t old_bytes, new_bytes;
+
+ if (ds->ds_reserved == 0)
+ return (delta);
+
+ old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
+ new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
+
+ ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
+ return (new_bytes - old_bytes);
+}
void
dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
@@ -74,6 +94,7 @@
int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
int compressed = BP_GET_PSIZE(bp);
int uncompressed = BP_GET_UCSIZE(bp);
+ int64_t delta;
dprintf_bp(bp, "born, ds=%p\n", ds);
@@ -96,13 +117,13 @@
}
dmu_buf_will_dirty(ds->ds_dbuf, tx);
mutex_enter(&ds->ds_lock);
+ delta = parent_delta(ds, used);
ds->ds_phys->ds_used_bytes += used;
ds->ds_phys->ds_compressed_bytes += compressed;
ds->ds_phys->ds_uncompressed_bytes += uncompressed;
ds->ds_phys->ds_unique_bytes += used;
mutex_exit(&ds->ds_lock);
- dsl_dir_diduse_space(ds->ds_dir,
- used, compressed, uncompressed, tx);
+ dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx);
}
void
@@ -140,6 +161,7 @@
if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
int err;
+ int64_t delta;
dprintf_bp(bp, "freeing: %s", "");
err = arc_free(pio, tx->tx_pool->dp_spa,
@@ -147,12 +169,13 @@
ASSERT(err == 0);
mutex_enter(&ds->ds_lock);
- /* XXX unique_bytes is not accurate for head datasets */
- /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */
+ ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
+ !DS_UNIQUE_IS_ACCURATE(ds));
+ delta = parent_delta(ds, -used);
ds->ds_phys->ds_unique_bytes -= used;
mutex_exit(&ds->ds_lock);
dsl_dir_diduse_space(ds->ds_dir,
- -used, -compressed, -uncompressed, tx);
+ delta, -compressed, -uncompressed, tx);
} else {
dprintf_bp(bp, "putting on dead list: %s", "");
VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
@@ -375,6 +398,24 @@
ds->ds_fsid_guid =
unique_insert(ds->ds_phys->ds_fsid_guid);
}
+
+ if (!dsl_dataset_is_snapshot(ds)) {
+ boolean_t need_lock =
+ !RW_LOCK_HELD(&dp->dp_config_rwlock);
+
+ if (need_lock)
+ rw_enter(&dp->dp_config_rwlock, RW_READER);
+ VERIFY(0 == dsl_prop_get_ds_locked(ds->ds_dir,
+ "refreservation", sizeof (uint64_t), 1,
+ &ds->ds_reserved, NULL));
+ VERIFY(0 == dsl_prop_get_ds_locked(ds->ds_dir,
+ "refquota", sizeof (uint64_t), 1, &ds->ds_quota,
+ NULL));
+ if (need_lock)
+ rw_exit(&dp->dp_config_rwlock);
+ } else {
+ ds->ds_reserved = ds->ds_quota = 0;
+ }
}
ASSERT3P(ds->ds_dbuf, ==, dbuf);
ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
@@ -591,6 +632,8 @@
dsphys->ds_creation_txg = tx->tx_txg;
dsphys->ds_deadlist_obj =
bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
+ if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
+ dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
dmu_buf_rele(dbuf, FTAG);
dmu_buf_will_dirty(dd->dd_dbuf, tx);
@@ -633,6 +676,9 @@
dsphys->ds_creation_txg = tx->tx_txg;
dsphys->ds_deadlist_obj =
bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
+ if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
+ dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
+
if (origin) {
dsphys->ds_prev_snap_obj = origin->ds_object;
dsphys->ds_prev_snap_txg =
@@ -943,10 +989,53 @@
}
}
+/*
+ * The unique space in the head dataset can be calculated by subtracting
+ * the space used in the most recent snapshot, that is still being used
+ * in this file system, from the space currently in use. To figure out
+ * the space in the most recent snapshot still in use, we need to take
+ * the total space used in the snapshot and subtract out the space that
+ * has been freed up since the snapshot was taken.
+ */
+static void
+dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
+{
+ uint64_t mrs_used;
+ uint64_t dlused, dlcomp, dluncomp;
+
+ ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj);
+
+ if (ds->ds_phys->ds_prev_snap_obj != 0)
+ mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
+ else
+ mrs_used = 0;
+
+ VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp,
+ &dluncomp));
+
+ ASSERT3U(dlused, <=, mrs_used);
+ ds->ds_phys->ds_unique_bytes =
+ ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
+
+ if (!DS_UNIQUE_IS_ACCURATE(ds) &&
+ spa_version(ds->ds_dir->dd_pool->dp_spa) >=
+ SPA_VERSION_UNIQUE_ACCURATE)
+ ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
+}
+
+static uint64_t
+dsl_dataset_unique(dsl_dataset_t *ds)
+{
+ if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds))
+ dsl_dataset_recalc_head_uniq(ds);
+
+ return (ds->ds_phys->ds_unique_bytes);
+}
+
struct killarg {
- uint64_t *usedp;
- uint64_t *compressedp;
- uint64_t *uncompressedp;
+ int64_t *usedp;
+ int64_t *compressedp;
+ int64_t *uncompressedp;
zio_t *zio;
dmu_tx_t *tx;
};
@@ -1042,7 +1131,7 @@
{
/* Free blkptrs that we gave birth to */
zio_t *zio;
- uint64_t used = 0, compressed = 0, uncompressed = 0;
+ int64_t used = 0, compressed = 0, uncompressed = 0;
struct killarg ka;
zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
@@ -1175,7 +1264,7 @@
dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
- uint64_t used = 0, compressed = 0, uncompressed = 0;
+ int64_t used = 0, compressed = 0, uncompressed = 0;
zio_t *zio;
int err;
int after_branch_point = FALSE;
@@ -1190,6 +1279,13 @@
ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
+ /* Remove our reservation */
+ if (ds->ds_reserved != 0) {
+ uint64_t val = 0;
+ dsl_dataset_set_reservation_sync(ds, &val, cr, tx);
+ ASSERT3U(ds->ds_reserved, ==, 0);
+ }
+
ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
obj = ds->ds_object;
@@ -1223,6 +1319,7 @@
blkptr_t bp;
dsl_dataset_t *ds_next;
uint64_t itor = 0;
+ uint64_t old_unique;
spa_scrub_restart(dp->dp_spa, tx->tx_txg);
@@ -1231,6 +1328,8 @@
DS_MODE_NONE, FTAG, &ds_next));
ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
+ old_unique = dsl_dataset_unique(ds_next);
+
dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
ds_next->ds_phys->ds_prev_snap_obj =
ds->ds_phys->ds_prev_snap_obj;
@@ -1312,13 +1411,6 @@
dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
ASSERT3P(ds_next->ds_prev, ==, NULL);
} else {
- /*
- * It would be nice to update the head dataset's
- * unique. To do so we would have to traverse
- * it for blocks born after ds_prev, which is
- * pretty expensive just to maintain something
- * for debugging purposes.
- */
ASSERT3P(ds_next->ds_prev, ==, ds);
dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
ds_next);
@@ -1329,13 +1421,32 @@
} else {
ds_next->ds_prev = NULL;
}
+
+ dsl_dataset_recalc_head_uniq(ds_next);
+
+ /*
+ * Reduce the amount of our unconsmed refreservation
+ * being charged to our parent by the amount of
+ * new unique data we have gained.
+ */
+ if (old_unique < ds_next->ds_reserved) {
+ int64_t mrsdelta;
+ uint64_t new_unique =
+ ds_next->ds_phys->ds_unique_bytes;
+
+ ASSERT(old_unique <= new_unique);
+ mrsdelta = MIN(new_unique - old_unique,
+ ds_next->ds_reserved - old_unique);
+ dsl_dir_diduse_space(ds->ds_dir, -mrsdelta,
+ 0, 0, tx);
+ }
}
dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
/*
- * NB: unique_bytes is not accurate for head objsets
- * because we don't update it when we delete the most
- * recent snapshot -- see above comment.
+ * NB: unique_bytes might not be accurate for the head objset.
+ * Before SPA_VERSION 9, we didn't update its value when we
+ * deleted the most recent snapshot.
*/
ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
} else {
@@ -1366,6 +1477,9 @@
err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
ADVANCE_POST, kill_blkptr, &ka);
ASSERT3U(err, ==, 0);
+ ASSERT(spa_version(dp->dp_spa) <
+ SPA_VERSION_UNIQUE_ACCURATE ||
+ used == ds->ds_phys->ds_unique_bytes);
}
err = zio_wait(zio);
@@ -1421,6 +1535,33 @@
}
+static int
+dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+ uint64_t asize;
+
+ if (!dmu_tx_is_syncing(tx))
+ return (0);
+
+ /*
+ * If there's an fs-only reservation, any blocks that might become
+ * owned by the snapshot dataset must be accommodated by space
+ * outside of the reservation.
+ */
+ asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
+ if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE))
+ return (ENOSPC);
+
+ /*
+ * Propogate any reserved space for this snapshot to other
+ * snapshot checks in this sync group.
+ */
+ if (asize > 0)
+ dsl_dir_willuse_space(ds->ds_dir, asize, tx);
+
+ return (0);
+}
+
/* ARGSUSED */
int
dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
@@ -1455,6 +1596,10 @@
if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
return (ENAMETOOLONG);
+ err = dsl_dataset_snapshot_reserve_space(ds, tx);
+ if (err)
+ return (err);
+
ds->ds_trysnap_txg = tx->tx_txg;
return (0);
}
@@ -1510,12 +1655,24 @@
}
}
+ /*
+ * If we have a reference-reservation on this dataset, we will
+ * need to increase the amount of refreservation being charged
+ * since our unique space is going to zero.
+ */
+ if (ds->ds_reserved) {
+ int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
+ dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx);
+ }
+
bplist_close(&ds->ds_deadlist);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
ds->ds_phys->ds_prev_snap_obj = dsobj;
ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
ds->ds_phys->ds_unique_bytes = 0;
+ if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
+ ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
ds->ds_phys->ds_deadlist_obj =
bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
@@ -1557,14 +1714,22 @@
void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{
+ uint64_t refd, avail, uobjs, aobjs;
+
dsl_dir_stats(ds->ds_dir, nv);
+ dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
+
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
ds->ds_phys->ds_creation_time);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
ds->ds_phys->ds_creation_txg);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
- ds->ds_phys->ds_used_bytes);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
+ ds->ds_quota);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
+ ds->ds_reserved);
if (ds->ds_phys->ds_next_snap_obj) {
/*
@@ -1618,6 +1783,18 @@
{
*refdbytesp = ds->ds_phys->ds_used_bytes;
*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
+ if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
+ *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
+ if (ds->ds_quota != 0) {
+ /*
+ * Adjust available bytes according to refquota
+ */
+ if (*refdbytesp < ds->ds_quota)
+ *availbytesp = MIN(*availbytesp,
+ ds->ds_quota - *refdbytesp);
+ else
+ *availbytesp = 0;
+ }
*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
*availobjsp = DN_MAX_OBJECT - *usedobjsp;
}
@@ -2198,6 +2375,9 @@
uint64_t unique = 0;
int err;
+ if (csa->ohds->ds_reserved)
+ panic("refreservation and clone swap are incompatible");
+
dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx);
@@ -2221,6 +2401,13 @@
}
VERIFY(err == ENOENT);
+ /* undo any accounting due to a refreservation */
+ if (csa->ohds->ds_reserved > csa->ohds->ds_phys->ds_unique_bytes) {
+ dsl_dir_diduse_space(csa->ohds->ds_dir,
+ csa->ohds->ds_phys->ds_unique_bytes -
+ csa->ohds->ds_reserved, 0, 0, tx);
+ }
+
/* reset origin's unique bytes */
csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique;
@@ -2263,6 +2450,13 @@
(y) = __tmp; \
}
+ /* redo any accounting due to a refreservation */
+ if (csa->ohds->ds_reserved > csa->ohds->ds_phys->ds_unique_bytes) {
+ dsl_dir_diduse_space(csa->ohds->ds_dir,
+ csa->ohds->ds_reserved -
+ csa->ohds->ds_phys->ds_unique_bytes, 0, 0, tx);
+ }
+
/* swap ds_*_bytes */
SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
csa->cds->ds_phys->ds_used_bytes);
@@ -2280,6 +2474,9 @@
csa->cds->ds_phys->ds_deadlist_obj));
VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
csa->ohds->ds_phys->ds_deadlist_obj));
+ /* fix up clone's unique */
+ dsl_dataset_recalc_head_uniq(csa->cds);
+
}
/*
@@ -2331,3 +2528,195 @@
return (0);
}
+
+int
+dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
+ uint64_t asize, uint64_t inflight, uint64_t *used)
+{
+ int error = 0;
+
+ ASSERT3S(asize, >, 0);
+
+ mutex_enter(&ds->ds_lock);
+ /*
+ * Make a space adjustment for reserved bytes.
+ */
+ if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
+ ASSERT3U(*used, >=,
+ ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
+ *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
+ }
+
+ if (!check_quota || ds->ds_quota == 0) {
+ mutex_exit(&ds->ds_lock);
+ return (0);
+ }
+ /*
+ * If they are requesting more space, and our current estimate
+ * is over quota, they get to try again unless the actual
+ * on-disk is over quota and there are no pending changes (which
+ * may free up space for us).
+ */
+ if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
+ if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
+ error = ERESTART;
+ else
+ error = EDQUOT;
+ }
+ mutex_exit(&ds->ds_lock);
+
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ dsl_dataset_t *ds = arg1;
+ uint64_t *quotap = arg2;
+ uint64_t new_quota = *quotap;
+
+ if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
+ return (ENOTSUP);
+
+ if (new_quota == 0)
+ return (0);
+
+ if (new_quota < ds->ds_phys->ds_used_bytes ||
+ new_quota < ds->ds_reserved)
+ return (ENOSPC);
+
+ return (0);
+}
+
+/* ARGSUSED */
+void
+dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+{
+ dsl_dataset_t *ds = arg1;
+ uint64_t *quotap = arg2;
+ uint64_t new_quota = *quotap;
+
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+
+ mutex_enter(&ds->ds_lock);
+ ds->ds_quota = new_quota;
+ mutex_exit(&ds->ds_lock);
+
+ dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
+
+ spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa,
+ tx, cr, "%lld dataset = %llu ",
+ (longlong_t)new_quota, ds->ds_dir->dd_phys->dd_head_dataset_obj);
+}
+
+int
+dsl_dataset_set_quota(const char *dsname, uint64_t quota)
+{
+ dsl_dataset_t *ds;
+ int err;
+
+ err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds);
+ if (err)
+ return (err);
+
+ /*
+ * If someone removes a file, then tries to set the quota, we
+ * want to make sure the file freeing takes effect.
+ */
+ txg_wait_open(ds->ds_dir->dd_pool, 0);
+
+ err = dsl_sync_task_do(ds->ds_dir->dd_pool, dsl_dataset_set_quota_check,
+ dsl_dataset_set_quota_sync, ds, "a, 0);
+ dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
+ return (err);
+}
+
+static int
+dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+ dsl_dataset_t *ds = arg1;
+ uint64_t *reservationp = arg2;
+ uint64_t new_reservation = *reservationp;
+ int64_t delta;
+ uint64_t unique;
+
+ if (new_reservation > INT64_MAX)
+ return (EOVERFLOW);
+
+ if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
+ SPA_VERSION_REFRESERVATION)
+ return (ENOTSUP);
+
+ if (dsl_dataset_is_snapshot(ds))
+ return (EINVAL);
+
+ /*
+ * If we are doing the preliminary check in open context, the
+ * space estimates may be inaccurate.
+ */
+ if (!dmu_tx_is_syncing(tx))
+ return (0);
+
+ mutex_enter(&ds->ds_lock);
+ unique = dsl_dataset_unique(ds);
+ delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved);
+ mutex_exit(&ds->ds_lock);
+
+ if (delta > 0 &&
+ delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
+ return (ENOSPC);
+ if (delta > 0 && ds->ds_quota > 0 &&
+ new_reservation > ds->ds_quota)
+ return (ENOSPC);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr,
+ dmu_tx_t *tx)
+{
+ dsl_dataset_t *ds = arg1;
+ uint64_t *reservationp = arg2;
+ uint64_t new_reservation = *reservationp;
+ uint64_t unique;
+ int64_t delta;
+
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+
+ mutex_enter(&ds->ds_lock);
+ unique = dsl_dataset_unique(ds);
+ delta = MAX(0, (int64_t)(new_reservation - unique)) -
+ MAX(0, (int64_t)(ds->ds_reserved - unique));
+ ds->ds_reserved = new_reservation;
+ mutex_exit(&ds->ds_lock);
+
+ dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation",
+ new_reservation, cr, tx);
+
+ dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx);
+
+ spa_history_internal_log(LOG_DS_REFRESERV,
+ ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu",
+ (longlong_t)new_reservation,
+ ds->ds_dir->dd_phys->dd_head_dataset_obj);
+}
+
+int
+dsl_dataset_set_reservation(const char *dsname, uint64_t reservation)
+{
+ dsl_dataset_t *ds;
+ int err;
+
+ err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds);
+ if (err)
+ return (err);
+
+ err = dsl_sync_task_do(ds->ds_dir->dd_pool,
+ dsl_dataset_set_reservation_check,
+ dsl_dataset_set_reservation_sync, ds, &reservation, 0);
+ dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
+ return (err);
+}
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c Mon Oct 29 22:45:33 2007 -0700
@@ -26,6 +26,7 @@
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
#include <sys/dmu_tx.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
@@ -39,7 +40,7 @@
#include <sys/sunddi.h>
#include "zfs_namecheck.h"
-static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd);
+static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
static void dsl_dir_set_reservation_sync(void *arg1, void *arg2,
cred_t *cr, dmu_tx_t *tx);
@@ -518,13 +519,9 @@
void
dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
{
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE,
- dsl_dir_space_available(dd, NULL, 0, TRUE));
-
mutex_enter(&dd->dd_lock);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dd->dd_used_bytes);
- dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA,
- dd->dd_phys->dd_quota);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
dd->dd_phys->dd_reserved);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
@@ -590,15 +587,13 @@
}
static uint64_t
-dsl_dir_estimated_space(dsl_dir_t *dd)
+dsl_dir_space_towrite(dsl_dir_t *dd)
{
- int64_t space;
+ uint64_t space = 0;
int i;
ASSERT(MUTEX_HELD(&dd->dd_lock));
- space = dd->dd_phys->dd_used_bytes;
- ASSERT(space >= 0);
for (i = 0; i < TXG_SIZE; i++) {
space += dd->dd_space_towrite[i&TXG_MASK];
ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
@@ -632,11 +627,9 @@
mutex_enter(&dd->dd_lock);
if (dd->dd_phys->dd_quota != 0)
quota = dd->dd_phys->dd_quota;
- if (ondiskonly) {
- used = dd->dd_used_bytes;
- } else {
- used = dsl_dir_estimated_space(dd);
- }
+ used = dd->dd_used_bytes;
+ if (!ondiskonly)
+ used += dsl_dir_space_towrite(dd);
if (dd == ancestor)
used += delta;
@@ -684,40 +677,50 @@
uint64_t tr_size;
};
-/*
- * Reserve space in this dsl_dir, to be used in this tx's txg.
- * After the space has been dirtied (and thus
- * dsl_dir_willuse_space() has been called), the reservation should
- * be canceled, using dsl_dir_tempreserve_clear().
- */
static int
-dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize,
- boolean_t netfree, boolean_t noquota, list_t *tr_list, dmu_tx_t *tx)
+dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
+ boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
+ dmu_tx_t *tx)
{
uint64_t txg = tx->tx_txg;
- uint64_t est_used, quota, parent_rsrv;
- int edquot = EDQUOT;
+ uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
+ struct tempreserve *tr;
+ int error = EDQUOT;
int txgidx = txg & TXG_MASK;
int i;
- struct tempreserve *tr;
ASSERT3U(txg, !=, 0);
- ASSERT3S(asize, >=, 0);
+ ASSERT3S(asize, >, 0);
mutex_enter(&dd->dd_lock);
+
/*
* Check against the dsl_dir's quota. We don't add in the delta
* when checking for over-quota because they get one free hit.
*/
- est_used = dsl_dir_estimated_space(dd);
+ est_inflight = dsl_dir_space_towrite(dd);
for (i = 0; i < TXG_SIZE; i++)
- est_used += dd->dd_tempreserved[i];
+ est_inflight += dd->dd_tempreserved[i];
+ used_on_disk = dd->dd_used_bytes;
/*
- * If this transaction will result in a net free of space, we want
- * to let it through.
+ * Check for dataset reference quota on first iteration.
*/
- if (netfree || noquota || dd->dd_phys->dd_quota == 0)
+ if (list_head(tr_list) == NULL && tx->tx_objset) {
+ dsl_dataset_t *ds = tx->tx_objset->os->os_dsl_dataset;
+ error = dsl_dataset_check_quota(ds, checkrefquota,
+ asize, est_inflight, &used_on_disk);
+ if (error) {
+ mutex_exit(&dd->dd_lock);
+ return (error);
+ }
+ }
+
+ /*
+ * If this transaction will result in a net free of space,
+ * we want to let it through.
+ */
+ if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
quota = UINT64_MAX;
else
quota = dd->dd_phys->dd_quota;
@@ -735,34 +738,31 @@
uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
if (poolsize < quota) {
quota = poolsize;
- edquot = ENOSPC;
+ error = ENOSPC;
}
}
/*
* If they are requesting more space, and our current estimate
- * is over quota. They get to try again unless the actual
+ * is over quota, they get to try again unless the actual
* on-disk is over quota and there are no pending changes (which
* may free up space for us).
*/
- if (asize > 0 && est_used > quota) {
- if (dd->dd_space_towrite[txg & TXG_MASK] != 0 ||
- dd->dd_space_towrite[(txg-1) & TXG_MASK] != 0 ||
- dd->dd_space_towrite[(txg-2) & TXG_MASK] != 0 ||
- dd->dd_used_bytes < quota)
- edquot = ERESTART;
- dprintf_dd(dd, "failing: used=%lluK est_used = %lluK "
+ if (used_on_disk + est_inflight > quota) {
+ if (est_inflight > 0 || used_on_disk < quota)
+ error = ERESTART;
+ dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
"quota=%lluK tr=%lluK err=%d\n",
- dd->dd_used_bytes>>10, est_used>>10,
- quota>>10, asize>>10, edquot);
+ used_on_disk>>10, est_inflight>>10,
+ quota>>10, asize>>10, error);
mutex_exit(&dd->dd_lock);
- return (edquot);
+ return (error);
}
/* We need to up our estimated delta before dropping dd_lock */
dd->dd_tempreserved[txgidx] += asize;
- parent_rsrv = parent_delta(dd, est_used, asize);
+ parent_rsrv = parent_delta(dd, used_on_disk + est_inflight, asize);
mutex_exit(&dd->dd_lock);
tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP);
@@ -775,7 +775,7 @@
boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
return (dsl_dir_tempreserve_impl(dd->dd_parent,
- parent_rsrv, netfree, ismos, tr_list, tx));
+ parent_rsrv, netfree, ismos, TRUE, tr_list, tx));
} else {
return (0);
}
@@ -783,25 +783,30 @@
/*
* Reserve space in this dsl_dir, to be used in this tx's txg.
- * After the space has been dirtied (and thus
- * dsl_dir_willuse_space() has been called), the reservation should
- * be canceled, using dsl_dir_tempreserve_clear().
+ * After the space has been dirtied (and dsl_dir_willuse_space()
+ * has been called), the reservation should be canceled, using
+ * dsl_dir_tempreserve_clear().
*/
int
-dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize,
- uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx)
+dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
+ uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
{
int err = 0;
list_t *tr_list;
+ if (asize == 0) {
+ *tr_cookiep = NULL;
+ return (0);
+ }
+
tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
list_create(tr_list, sizeof (struct tempreserve),
offsetof(struct tempreserve, tr_node));
- ASSERT3S(asize, >=, 0);
+ ASSERT3S(asize, >, 0);
ASSERT3S(fsize, >=, 0);
err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, FALSE,
- tr_list, tx);
+ asize > usize, tr_list, tx);
if (err == 0) {
struct tempreserve *tr;
@@ -835,6 +840,9 @@
ASSERT3U(tx->tx_txg, !=, 0);
+ if (tr_cookie == NULL)
+ return;
+
while (tr = list_head(tr_list)) {
if (tr->tr_ds == NULL) {
arc_tempreserve_clear(tr->tr_size);
@@ -867,7 +875,7 @@
if (space > 0)
dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
- est_used = dsl_dir_estimated_space(dd);
+ est_used = dsl_dir_space_towrite(dd) + dd->dd_used_bytes;
parent_space = parent_delta(dd, est_used, space);
mutex_exit(&dd->dd_lock);
@@ -924,14 +932,13 @@
/*
* If we are doing the preliminary check in open context, and
* there are pending changes, then don't fail it, since the
- * pending changes could under-estimat the amount of space to be
+ * pending changes could under-estimate the amount of space to be
* freed up.
*/
- towrite = dd->dd_space_towrite[0] + dd->dd_space_towrite[1] +
- dd->dd_space_towrite[2] + dd->dd_space_towrite[3];
+ towrite = dsl_dir_space_towrite(dd);
if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
(new_quota < dd->dd_phys->dd_reserved ||
- new_quota < dsl_dir_estimated_space(dd))) {
+ new_quota < dd->dd_used_bytes + towrite)) {
err = ENOSPC;
}
mutex_exit(&dd->dd_lock);
@@ -978,7 +985,7 @@
return (err);
}
-static int
+int
dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
@@ -1028,15 +1035,15 @@
uint64_t used;
int64_t delta;
+ dmu_buf_will_dirty(dd->dd_dbuf, tx);
+
mutex_enter(&dd->dd_lock);
used = dd->dd_used_bytes;
delta = MAX(used, new_reservation) -
MAX(used, dd->dd_phys->dd_reserved);
+ dd->dd_phys->dd_reserved = new_reservation;
mutex_exit(&dd->dd_lock);
- dmu_buf_will_dirty(dd->dd_dbuf, tx);
- dd->dd_phys->dd_reserved = new_reservation;
-
if (dd->dd_parent != NULL) {
/* Roll up this additional usage into our ancestors */
dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx);
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c Mon Oct 29 22:45:33 2007 -0700
@@ -375,6 +375,24 @@
dd->dd_phys->dd_head_dataset_obj);
}
+void
+dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
+ cred_t *cr, dmu_tx_t *tx)
+{
+ objset_t *mos = dd->dd_pool->dp_meta_objset;
+ uint64_t zapobj = dd->dd_phys->dd_props_zapobj;
+
+ ASSERT(dmu_tx_is_syncing(tx));
+
+ VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx));
+
+ dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE);
+
+ spa_history_internal_log(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx, cr,
+ "%s=%llu dataset = %llu", name, (u_longlong_t)val,
+ dd->dd_phys->dd_head_dataset_obj);
+}
+
int
dsl_prop_set_dd(dsl_dir_t *dd, const char *propname,
int intsz, int numints, const void *buf)
--- a/usr/src/uts/common/fs/zfs/dsl_synctask.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_synctask.c Mon Oct 29 22:45:33 2007 -0700
@@ -158,7 +158,7 @@
* Check for sufficient space.
*/
dstg->dstg_err = dsl_dir_tempreserve_space(dstg->dstg_pool->dp_mos_dir,
- dstg->dstg_space, dstg->dstg_space * 3, 0, &tr_cookie, tx);
+ dstg->dstg_space, dstg->dstg_space * 3, 0, 0, &tr_cookie, tx);
/* don't bother trying again */
if (dstg->dstg_err == ERESTART)
dstg->dstg_err = EAGAIN;
--- a/usr/src/uts/common/fs/zfs/sys/dmu_tx.h Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_tx.h Mon Oct 29 22:45:33 2007 -0700
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -65,6 +65,7 @@
uint64_t tx_space_towrite;
uint64_t tx_space_tofree;
uint64_t tx_space_tooverwrite;
+ uint64_t tx_space_tounref;
refcount_t tx_space_written;
refcount_t tx_space_freed;
#endif
@@ -87,6 +88,7 @@
uint64_t txh_space_towrite;
uint64_t txh_space_tofree;
uint64_t txh_space_tooverwrite;
+ uint64_t txh_space_tounref;
#ifdef ZFS_DEBUG
enum dmu_tx_hold_type txh_type;
uint64_t txh_arg1;
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Mon Oct 29 22:45:33 2007 -0700
@@ -55,6 +55,13 @@
*/
#define DS_FLAG_NOPROMOTE (1ULL<<1)
+/*
+ * DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
+ * calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
+ * refquota/refreservations).
+ */
+#define DS_FLAG_UNIQUE_ACCURATE (1ULL<<2)
+
typedef struct dsl_dataset_phys {
uint64_t ds_dir_obj;
uint64_t ds_prev_snap_obj;
@@ -114,6 +121,9 @@
/* for objset_open() */
kmutex_t ds_opening_lock;
+ uint64_t ds_reserved; /* cached refreservation */
+ uint64_t ds_quota; /* cached refquota */
+
/* Protected by ds_lock; keep at end of struct for better locality */
char ds_snapname[MAXNAMELEN];
} dsl_dataset_t;
@@ -121,6 +131,9 @@
#define dsl_dataset_is_snapshot(ds) \
((ds)->ds_phys->ds_num_children != 0)
+#define DS_UNIQUE_IS_ACCURATE(ds) \
+ (((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
+
int dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
void *tag, dsl_dataset_t **dsp);
int dsl_dataset_open(const char *name, int mode, void *tag,
@@ -179,6 +192,13 @@
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
+int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
+ uint64_t asize, uint64_t inflight, uint64_t *used);
+int dsl_dataset_set_quota(const char *dsname, uint64_t quota);
+void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
+ dmu_tx_t *tx);
+int dsl_dataset_set_reservation(const char *dsname, uint64_t reservation);
+
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h Mon Oct 29 22:45:33 2007 -0700
@@ -110,7 +110,8 @@
void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
- uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx);
+ uint64_t asize, uint64_t fsize, uint64_t usize, void **tr_cookiep,
+ dmu_tx_t *tx);
void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx);
void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx);
void dsl_dir_diduse_space(dsl_dir_t *dd,
@@ -119,6 +120,7 @@
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
+int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"
--- a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h Mon Oct 29 22:45:33 2007 -0700
@@ -67,6 +67,8 @@
int intsz, int numints, const void *buf);
int dsl_prop_set_dd(dsl_dir_t *dd, const char *propname,
int intsz, int numints, const void *buf);
+void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
+ cred_t *cr, dmu_tx_t *tx);
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
void dsl_prop_nvlist_add_string(nvlist_t *nv,
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c Mon Oct 29 22:45:33 2007 -0700
@@ -1411,6 +1411,12 @@
return (error);
break;
+ case ZFS_PROP_REFQUOTA:
+ if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
+ (error = dsl_dataset_set_quota(name, intval)) != 0)
+ return (error);
+ break;
+
case ZFS_PROP_RESERVATION:
if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
(error = dsl_dir_set_reservation(name,
@@ -1418,6 +1424,13 @@
return (error);
break;
+ case ZFS_PROP_REFRESERVATION:
+ if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
+ (error = dsl_dataset_set_reservation(name,
+ intval)) != 0)
+ return (error);
+ break;
+
case ZFS_PROP_VOLSIZE:
if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
(error = zvol_set_volsize(name,
--- a/usr/src/uts/common/sys/fs/zfs.h Mon Oct 29 22:26:03 2007 -0700
+++ b/usr/src/uts/common/sys/fs/zfs.h Mon Oct 29 22:45:33 2007 -0700
@@ -98,6 +98,8 @@
ZFS_PROP_VSCAN,
ZFS_PROP_NBMAND,
ZFS_PROP_SHARESMB,
+ ZFS_PROP_REFQUOTA,
+ ZFS_PROP_REFRESERVATION,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -251,6 +253,9 @@
#define SPA_VERSION_DELEGATED_PERMS SPA_VERSION_8
#define SPA_VERSION_FUID SPA_VERSION_9
#define SPA_VERSION_NORMALIZATION SPA_VERSION_9
+#define SPA_VERSION_REFRESERVATION SPA_VERSION_9
+#define SPA_VERSION_REFQUOTA SPA_VERSION_9
+#define SPA_VERSION_UNIQUE_ACCURATE SPA_VERSION_9
/*
* ZPL version - rev'd whenever an incompatible on-disk format change
@@ -619,6 +624,8 @@
LOG_DS_ROLLBACK,
LOG_DS_SNAPSHOT,
LOG_DS_UPGRADE,
+ LOG_DS_REFQUOTA,
+ LOG_DS_REFRESERV,
LOG_END
} history_internal_events_t;