usr/src/uts/common/fs/zfs/bplist.c
author eschrock
Fri, 03 Mar 2006 20:08:16 -0800
changeset 1544 938876158511
parent 789 b348f31ed315
child 2082 76b439ec3ac1
permissions -rw-r--r--
PSARC 2006/077 zpool clear PSARC 2006/139 FMA for ZFS 6284889 arc should replace the znode cache 6333006 DMU & DSL should not panic upon I/O error 6333092 concurrent reads to a file not scaling with number of readers 6338081 ZFS/FMA phase 1 6338386 need persistent error log 6341326 i/o error causes arc buf hash table corruption 6341639 zfs backup/restore should compute/verify checksum of backup stream 6348002 out of space due to changing properties 6354724 inaccurate error message from zfs restore 6354872 dmu_sync() blows predictive accounting 6355416 zpool scrubbing consumes all memory, system hung 6363995 df should only load libzfs when it encounters a ZFS filesystem 6366320 zfs backup/restore doesn't like signals 6368892 mount -m support needed for legacy mounts 6368902 boot archive fstat support needed for ZFS Mountroot 6369424 BFU complains when bfu'ing a ZFS root filesystem 6374062 mountroot support needed for ZFS 6376356 dirtying dbuf obj=43 lvl=0 blkid=0 but not tx_held 6378391 unused members of dmu_objset_stats_t 6378392 clean up zfs_cmd_t structure 6378685 buf_init should allocate its hash table more carefully 6378976 ziltest should be a first class citizen 6381086 zdb segfaults if there is a spa deferred-free bplist 6381203 deadlock due to i/o while assigning (tc_lock held) 6381209 freed space is not immediately available 6381344 'zpool clear' 6381345 FAULTED devices should really be UNAVAIL 6381346 import should mark devices as persistently unavailable 6383272 recursive mutex_enter() during log replay with zfs root 6386326 origin property is not displayed 6386354 libzfs does too much in its _init section, calls exit(1) 6386624 zpool should not complain about non-existent devices from libdiskmgt 6386910 spa needs to be i/o error hardened 6387735 need a mechanism to inject faults into ZFS 6387736 internal ZFS utilities should be placed in an ON-private package 6389928 libzfs should ship a lint library 6390609 malformed vdev config panics on zpool_create() 6390677 version number checking makes upgrades challenging 6390713 ztest hangs in zil_suspend() 6391873 metadata compression should be turned back on 6392113 ztest sometimes reports leaked blocks because ZIL isn't resilvered 6393004 minor memory leak in unique_insert()

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/bplist.h>
#include <sys/zfs_context.h>

static int
bplist_hold(bplist_t *bpl)
{
	ASSERT(MUTEX_HELD(&bpl->bpl_lock));
	if (bpl->bpl_dbuf == NULL) {
		int err = dmu_bonus_hold(bpl->bpl_mos,
		    bpl->bpl_object, bpl, &bpl->bpl_dbuf);
		if (err)
			return (err);
		bpl->bpl_phys = bpl->bpl_dbuf->db_data;
	}
	return (0);
}

uint64_t
bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
{
	uint64_t obj;

	obj = dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
	    DMU_OT_BPLIST_HDR, sizeof (bplist_phys_t), tx);

	return (obj);
}

void
bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
{
	VERIFY(dmu_object_free(mos, object, tx) == 0);
}

int
bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
{
	dmu_object_info_t doi;
	int err;

	err = dmu_object_info(mos, object, &doi);
	if (err)
		return (err);

	mutex_enter(&bpl->bpl_lock);

	ASSERT(bpl->bpl_dbuf == NULL);
	ASSERT(bpl->bpl_phys == NULL);
	ASSERT(bpl->bpl_cached_dbuf == NULL);
	ASSERT(bpl->bpl_queue == NULL);
	ASSERT(object != 0);

	bpl->bpl_mos = mos;
	bpl->bpl_object = object;
	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;

	mutex_exit(&bpl->bpl_lock);
	return (0);
}

void
bplist_close(bplist_t *bpl)
{
	mutex_enter(&bpl->bpl_lock);

	ASSERT(bpl->bpl_queue == NULL);

	if (bpl->bpl_cached_dbuf) {
		dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
		bpl->bpl_cached_dbuf = NULL;
	}
	if (bpl->bpl_dbuf) {
		dmu_buf_rele(bpl->bpl_dbuf, bpl);
		bpl->bpl_dbuf = NULL;
		bpl->bpl_phys = NULL;
	}

	mutex_exit(&bpl->bpl_lock);
}

boolean_t
bplist_empty(bplist_t *bpl)
{
	boolean_t rv;

	if (bpl->bpl_object == 0)
		return (B_TRUE);

	mutex_enter(&bpl->bpl_lock);
	VERIFY(0 == bplist_hold(bpl)); /* XXX */
	rv = (bpl->bpl_phys->bpl_entries == 0);
	mutex_exit(&bpl->bpl_lock);

	return (rv);
}

static int
bplist_cache(bplist_t *bpl, uint64_t blkid)
{
	int err = 0;

	if (bpl->bpl_cached_dbuf == NULL ||
	    bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
		if (bpl->bpl_cached_dbuf != NULL)
			dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
		err = dmu_buf_hold(bpl->bpl_mos,
		    bpl->bpl_object, blkid << bpl->bpl_blockshift,
		    bpl, &bpl->bpl_cached_dbuf);
		ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
		    1ULL << bpl->bpl_blockshift);
	}
	return (err);
}

int
bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
{
	uint64_t blk, off;
	blkptr_t *bparray;
	int err;

	mutex_enter(&bpl->bpl_lock);

	err = bplist_hold(bpl);
	if (err) {
		mutex_exit(&bpl->bpl_lock);
		return (err);
	}

	if (*itorp >= bpl->bpl_phys->bpl_entries) {
		mutex_exit(&bpl->bpl_lock);
		return (ENOENT);
	}

	blk = *itorp >> bpl->bpl_bpshift;
	off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);

	err = bplist_cache(bpl, blk);
	if (err) {
		mutex_exit(&bpl->bpl_lock);
		return (err);
	}

	bparray = bpl->bpl_cached_dbuf->db_data;
	*bp = bparray[off];
	(*itorp)++;
	mutex_exit(&bpl->bpl_lock);
	return (0);
}

int
bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx)
{
	uint64_t blk, off;
	blkptr_t *bparray;
	int err;

	ASSERT(!BP_IS_HOLE(bp));
	mutex_enter(&bpl->bpl_lock);
	err = bplist_hold(bpl);
	if (err)
		return (err);

	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);

	err = bplist_cache(bpl, blk);
	if (err) {
		mutex_exit(&bpl->bpl_lock);
		return (err);
	}

	dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
	bparray = bpl->bpl_cached_dbuf->db_data;
	bparray[off] = *bp;

	/* We never need the fill count. */
	bparray[off].blk_fill = 0;

	/* The bplist will compress better if we can leave off the checksum */
	bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));

	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
	bpl->bpl_phys->bpl_entries++;
	bpl->bpl_phys->bpl_bytes += BP_GET_ASIZE(bp);
	mutex_exit(&bpl->bpl_lock);

	return (0);
}

/*
 * Deferred entry; will be written later by bplist_sync().
 */
void
bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp)
{
	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);

	ASSERT(!BP_IS_HOLE(bp));
	mutex_enter(&bpl->bpl_lock);
	bpq->bpq_blk = *bp;
	bpq->bpq_next = bpl->bpl_queue;
	bpl->bpl_queue = bpq;
	mutex_exit(&bpl->bpl_lock);
}

void
bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
{
	bplist_q_t *bpq;

	mutex_enter(&bpl->bpl_lock);
	while ((bpq = bpl->bpl_queue) != NULL) {
		bpl->bpl_queue = bpq->bpq_next;
		mutex_exit(&bpl->bpl_lock);
		VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
		kmem_free(bpq, sizeof (*bpq));
		mutex_enter(&bpl->bpl_lock);
	}
	mutex_exit(&bpl->bpl_lock);
}

void
bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
{
	mutex_enter(&bpl->bpl_lock);
	ASSERT3P(bpl->bpl_queue, ==, NULL);
	VERIFY(0 == bplist_hold(bpl));
	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
	VERIFY(0 == dmu_free_range(bpl->bpl_mos,
	    bpl->bpl_object, 0, -1ULL, tx));
	bpl->bpl_phys->bpl_entries = 0;
	bpl->bpl_phys->bpl_bytes = 0;
	mutex_exit(&bpl->bpl_lock);
}