usr/src/uts/common/fs/zfs/dbuf.c
changeset 4944 96d96f8de974
parent 4831 41ec732c6d9f
child 5370 eb153afce98e
equal deleted inserted replaced
4943:f73f303e6a06 4944:96d96f8de974
   305 		ASSERT(db->db_blkid == DB_BONUS_BLKID ||
   305 		ASSERT(db->db_blkid == DB_BONUS_BLKID ||
   306 		    list_head(&dn->dn_dbufs));
   306 		    list_head(&dn->dn_dbufs));
   307 	}
   307 	}
   308 	if (db->db_blkid == DB_BONUS_BLKID) {
   308 	if (db->db_blkid == DB_BONUS_BLKID) {
   309 		ASSERT(dn != NULL);
   309 		ASSERT(dn != NULL);
   310 		ASSERT3U(db->db.db_size, ==, dn->dn_bonuslen);
   310 		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
   311 		ASSERT3U(db->db.db_offset, ==, DB_BONUS_BLKID);
   311 		ASSERT3U(db->db.db_offset, ==, DB_BONUS_BLKID);
   312 	} else {
   312 	} else {
   313 		ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
   313 		ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
   314 	}
   314 	}
   315 
   315 
   466 	ASSERT(MUTEX_HELD(&db->db_mtx));
   466 	ASSERT(MUTEX_HELD(&db->db_mtx));
   467 	ASSERT(db->db_state == DB_UNCACHED);
   467 	ASSERT(db->db_state == DB_UNCACHED);
   468 	ASSERT(db->db_buf == NULL);
   468 	ASSERT(db->db_buf == NULL);
   469 
   469 
   470 	if (db->db_blkid == DB_BONUS_BLKID) {
   470 	if (db->db_blkid == DB_BONUS_BLKID) {
   471 		ASSERT3U(db->db_dnode->dn_bonuslen, ==, db->db.db_size);
   471 		int bonuslen = db->db_dnode->dn_bonuslen;
       
   472 
       
   473 		ASSERT3U(bonuslen, <=, db->db.db_size);
   472 		db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
   474 		db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
   473 		arc_space_consume(DN_MAX_BONUSLEN);
   475 		arc_space_consume(DN_MAX_BONUSLEN);
   474 		if (db->db.db_size < DN_MAX_BONUSLEN)
   476 		if (bonuslen < DN_MAX_BONUSLEN)
   475 			bzero(db->db.db_data, DN_MAX_BONUSLEN);
   477 			bzero(db->db.db_data, DN_MAX_BONUSLEN);
   476 		bcopy(DN_BONUS(db->db_dnode->dn_phys), db->db.db_data,
   478 		bcopy(DN_BONUS(db->db_dnode->dn_phys), db->db.db_data,
   477 		    db->db.db_size);
   479 		    bonuslen);
   478 		dbuf_update_data(db);
   480 		dbuf_update_data(db);
   479 		db->db_state = DB_CACHED;
   481 		db->db_state = DB_CACHED;
   480 		mutex_exit(&db->db_mtx);
   482 		mutex_exit(&db->db_mtx);
   481 		return;
   483 		return;
   482 	}
   484 	}
   779 	}
   781 	}
   780 	mutex_exit(&dn->dn_dbufs_mtx);
   782 	mutex_exit(&dn->dn_dbufs_mtx);
   781 }
   783 }
   782 
   784 
   783 static int
   785 static int
   784 dbuf_new_block(dmu_buf_impl_t *db)
   786 dbuf_block_freeable(dmu_buf_impl_t *db)
   785 {
   787 {
   786 	dsl_dataset_t *ds = db->db_objset->os_dsl_dataset;
   788 	dsl_dataset_t *ds = db->db_objset->os_dsl_dataset;
   787 	uint64_t birth_txg = 0;
   789 	uint64_t birth_txg = 0;
   788 
       
   789 	/* Don't count meta-objects */
       
   790 	if (ds == NULL)
       
   791 		return (FALSE);
       
   792 
   790 
   793 	/*
   791 	/*
   794 	 * We don't need any locking to protect db_blkptr:
   792 	 * We don't need any locking to protect db_blkptr:
   795 	 * If it's syncing, then db_last_dirty will be set
   793 	 * If it's syncing, then db_last_dirty will be set
   796 	 * so we'll ignore db_blkptr.
   794 	 * so we'll ignore db_blkptr.
   797 	 */
   795 	 */
   798 	ASSERT(MUTEX_HELD(&db->db_mtx));
   796 	ASSERT(MUTEX_HELD(&db->db_mtx));
   799 	/* If we have been dirtied since the last snapshot, its not new */
       
   800 	if (db->db_last_dirty)
   797 	if (db->db_last_dirty)
   801 		birth_txg = db->db_last_dirty->dr_txg;
   798 		birth_txg = db->db_last_dirty->dr_txg;
   802 	else if (db->db_blkptr)
   799 	else if (db->db_blkptr)
   803 		birth_txg = db->db_blkptr->blk_birth;
   800 		birth_txg = db->db_blkptr->blk_birth;
   804 
   801 
       
   802 	/* If we don't exist or are in a snapshot, we can't be freed */
   805 	if (birth_txg)
   803 	if (birth_txg)
   806 		return (!dsl_dataset_block_freeable(ds, birth_txg));
   804 		return (ds == NULL ||
       
   805 		    dsl_dataset_block_freeable(ds, birth_txg));
   807 	else
   806 	else
   808 		return (TRUE);
   807 		return (FALSE);
   809 }
   808 }
   810 
   809 
   811 void
   810 void
   812 dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
   811 dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
   813 {
   812 {
   962 	    !BP_IS_HOLE(os->os_rootbp));
   961 	    !BP_IS_HOLE(os->os_rootbp));
   963 	ASSERT(db->db.db_size != 0);
   962 	ASSERT(db->db.db_size != 0);
   964 
   963 
   965 	dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
   964 	dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
   966 
   965 
       
   966 	if (db->db_blkid != DB_BONUS_BLKID) {
       
   967 		/*
       
   968 		 * Update the accounting.
       
   969 		 */
       
   970 		if (dbuf_block_freeable(db)) {
       
   971 			blkptr_t *bp = db->db_blkptr;
       
   972 			int64_t willfree = (bp && !BP_IS_HOLE(bp)) ?
       
   973 			    bp_get_dasize(os->os_spa, bp) : db->db.db_size;
       
   974 			/*
       
   975 			 * This is only a guess -- if the dbuf is dirty
       
   976 			 * in a previous txg, we don't know how much
       
   977 			 * space it will use on disk yet.  We should
       
   978 			 * really have the struct_rwlock to access
       
   979 			 * db_blkptr, but since this is just a guess,
       
   980 			 * it's OK if we get an odd answer.
       
   981 			 */
       
   982 			dnode_willuse_space(dn, -willfree, tx);
       
   983 		}
       
   984 		dnode_willuse_space(dn, db->db.db_size, tx);
       
   985 	}
       
   986 
   967 	/*
   987 	/*
   968 	 * If this buffer is dirty in an old transaction group we need
   988 	 * If this buffer is dirty in an old transaction group we need
   969 	 * to make a copy of it so that the changes we make in this
   989 	 * to make a copy of it so that the changes we make in this
   970 	 * transaction group won't leak out when we sync the older txg.
   990 	 * transaction group won't leak out when we sync the older txg.
   971 	 */
   991 	 */
  1009 	if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) {
  1029 	if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) {
  1010 		mutex_enter(&dn->dn_mtx);
  1030 		mutex_enter(&dn->dn_mtx);
  1011 		dnode_clear_range(dn, db->db_blkid, 1, tx);
  1031 		dnode_clear_range(dn, db->db_blkid, 1, tx);
  1012 		mutex_exit(&dn->dn_mtx);
  1032 		mutex_exit(&dn->dn_mtx);
  1013 		db->db_freed_in_flight = FALSE;
  1033 		db->db_freed_in_flight = FALSE;
  1014 	}
       
  1015 
       
  1016 	if (db->db_blkid != DB_BONUS_BLKID) {
       
  1017 		/*
       
  1018 		 * Update the accounting.
       
  1019 		 */
       
  1020 		if (!dbuf_new_block(db) && db->db_blkptr) {
       
  1021 			/*
       
  1022 			 * This is only a guess -- if the dbuf is dirty
       
  1023 			 * in a previous txg, we don't know how much
       
  1024 			 * space it will use on disk yet.  We should
       
  1025 			 * really have the struct_rwlock to access
       
  1026 			 * db_blkptr, but since this is just a guess,
       
  1027 			 * it's OK if we get an odd answer.
       
  1028 			 */
       
  1029 			dnode_willuse_space(dn,
       
  1030 			    -bp_get_dasize(os->os_spa, db->db_blkptr), tx);
       
  1031 		}
       
  1032 		dnode_willuse_space(dn, db->db.db_size, tx);
       
  1033 	}
  1034 	}
  1034 
  1035 
  1035 	/*
  1036 	/*
  1036 	 * This buffer is now part of this txg
  1037 	 * This buffer is now part of this txg
  1037 	 */
  1038 	 */
  1295 	db->db_blkptr = NULL;
  1296 	db->db_blkptr = NULL;
  1296 
  1297 
  1297 	if (db->db_blkid != DB_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
  1298 	if (db->db_blkid != DB_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
  1298 		list_remove(&dn->dn_dbufs, db);
  1299 		list_remove(&dn->dn_dbufs, db);
  1299 		dnode_rele(dn, db);
  1300 		dnode_rele(dn, db);
       
  1301 		db->db_dnode = NULL;
  1300 	}
  1302 	}
  1301 
  1303 
  1302 	if (db->db_buf)
  1304 	if (db->db_buf)
  1303 		dbuf_gone = arc_buf_evict(db->db_buf);
  1305 		dbuf_gone = arc_buf_evict(db->db_buf);
  1304 
  1306 
  1395 	db->db_immediate_evict = 0;
  1397 	db->db_immediate_evict = 0;
  1396 	db->db_freed_in_flight = 0;
  1398 	db->db_freed_in_flight = 0;
  1397 
  1399 
  1398 	if (blkid == DB_BONUS_BLKID) {
  1400 	if (blkid == DB_BONUS_BLKID) {
  1399 		ASSERT3P(parent, ==, dn->dn_dbuf);
  1401 		ASSERT3P(parent, ==, dn->dn_dbuf);
  1400 		db->db.db_size = dn->dn_bonuslen;
  1402 		db->db.db_size = DN_MAX_BONUSLEN -
       
  1403 		    (dn->dn_nblkptr-1) * sizeof (blkptr_t);
       
  1404 		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
  1401 		db->db.db_offset = DB_BONUS_BLKID;
  1405 		db->db.db_offset = DB_BONUS_BLKID;
  1402 		db->db_state = DB_UNCACHED;
  1406 		db->db_state = DB_UNCACHED;
  1403 		/* the bonus dbuf is not placed in the hash table */
  1407 		/* the bonus dbuf is not placed in the hash table */
  1404 		arc_space_consume(sizeof (dmu_buf_impl_t));
  1408 		arc_space_consume(sizeof (dmu_buf_impl_t));
  1405 		return (db);
  1409 		return (db);
  1469 dbuf_destroy(dmu_buf_impl_t *db)
  1473 dbuf_destroy(dmu_buf_impl_t *db)
  1470 {
  1474 {
  1471 	ASSERT(refcount_is_zero(&db->db_holds));
  1475 	ASSERT(refcount_is_zero(&db->db_holds));
  1472 
  1476 
  1473 	if (db->db_blkid != DB_BONUS_BLKID) {
  1477 	if (db->db_blkid != DB_BONUS_BLKID) {
  1474 		dnode_t *dn = db->db_dnode;
       
  1475 		boolean_t need_mutex = !MUTEX_HELD(&dn->dn_dbufs_mtx);
       
  1476 
       
  1477 		if (need_mutex)
       
  1478 			mutex_enter(&dn->dn_dbufs_mtx);
       
  1479 
       
  1480 		/*
  1478 		/*
  1481 		 * If this dbuf is still on the dn_dbufs list,
  1479 		 * If this dbuf is still on the dn_dbufs list,
  1482 		 * remove it from that list.
  1480 		 * remove it from that list.
  1483 		 */
  1481 		 */
  1484 		if (list_link_active(&db->db_link)) {
  1482 		if (db->db_dnode) {
  1485 			ASSERT(need_mutex);
  1483 			dnode_t *dn = db->db_dnode;
       
  1484 
       
  1485 			mutex_enter(&dn->dn_dbufs_mtx);
  1486 			list_remove(&dn->dn_dbufs, db);
  1486 			list_remove(&dn->dn_dbufs, db);
  1487 			mutex_exit(&dn->dn_dbufs_mtx);
  1487 			mutex_exit(&dn->dn_dbufs_mtx);
  1488 
  1488 
  1489 			dnode_rele(dn, db);
  1489 			dnode_rele(dn, db);
  1490 		} else if (need_mutex) {
  1490 			db->db_dnode = NULL;
  1491 			mutex_exit(&dn->dn_dbufs_mtx);
       
  1492 		}
  1491 		}
  1493 		dbuf_hash_remove(db);
  1492 		dbuf_hash_remove(db);
  1494 	}
  1493 	}
  1495 	db->db_parent = NULL;
  1494 	db->db_parent = NULL;
  1496 	db->db_dnode = NULL;
       
  1497 	db->db_buf = NULL;
  1495 	db->db_buf = NULL;
  1498 
  1496 
  1499 	ASSERT(!list_link_active(&db->db_link));
  1497 	ASSERT(!list_link_active(&db->db_link));
  1500 	ASSERT(db->db.db_data == NULL);
  1498 	ASSERT(db->db.db_data == NULL);
  1501 	ASSERT(db->db_hash_next == NULL);
  1499 	ASSERT(db->db_hash_next == NULL);
  1660 	dmu_buf_impl_t *db;
  1658 	dmu_buf_impl_t *db;
  1661 	int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db);
  1659 	int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db);
  1662 	return (err ? NULL : db);
  1660 	return (err ? NULL : db);
  1663 }
  1661 }
  1664 
  1662 
  1665 dmu_buf_impl_t *
  1663 void
  1666 dbuf_create_bonus(dnode_t *dn)
  1664 dbuf_create_bonus(dnode_t *dn)
  1667 {
  1665 {
  1668 	dmu_buf_impl_t *db = dn->dn_bonus;
       
  1669 
       
  1670 	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
  1666 	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
  1671 
  1667 
  1672 	ASSERT(dn->dn_bonus == NULL);
  1668 	ASSERT(dn->dn_bonus == NULL);
  1673 	db = dbuf_create(dn, 0, DB_BONUS_BLKID, dn->dn_dbuf, NULL);
  1669 	dn->dn_bonus = dbuf_create(dn, 0, DB_BONUS_BLKID, dn->dn_dbuf, NULL);
  1674 	return (db);
       
  1675 }
  1670 }
  1676 
  1671 
  1677 #pragma weak dmu_buf_add_ref = dbuf_add_ref
  1672 #pragma weak dmu_buf_add_ref = dbuf_add_ref
  1678 void
  1673 void
  1679 dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
  1674 dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
  1917 	 * will be synced, since it must have been dirty for dbuf_sync to
  1912 	 * will be synced, since it must have been dirty for dbuf_sync to
  1918 	 * be called).
  1913 	 * be called).
  1919 	 */
  1914 	 */
  1920 	if (db->db_blkid == DB_BONUS_BLKID) {
  1915 	if (db->db_blkid == DB_BONUS_BLKID) {
  1921 		dbuf_dirty_record_t **drp;
  1916 		dbuf_dirty_record_t **drp;
  1922 		/*
  1917 
  1923 		 * Use dn_phys->dn_bonuslen since db.db_size is the length
       
  1924 		 * of the bonus buffer in the open transaction rather than
       
  1925 		 * the syncing transaction.
       
  1926 		 */
       
  1927 		ASSERT(*datap != NULL);
  1918 		ASSERT(*datap != NULL);
  1928 		ASSERT3U(db->db_level, ==, 0);
  1919 		ASSERT3U(db->db_level, ==, 0);
  1929 		ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN);
  1920 		ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN);
  1930 		bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
  1921 		bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
  1931 		if (*datap != db->db.db_data) {
  1922 		if (*datap != db->db.db_data) {