upstream/illumos/illumos-gate: changeset 11935:538c866aaac6

--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Tue Mar 16 09:43:38 2010 -0600
@@ -38,6 +38,8 @@
 #include <sys/zap_leaf.h>
 #include <sys/zap_impl.h>
 #include <ctype.h>
+#include <sys/zfs_acl.h>
+#include <sys/sa_impl.h>
 
 #ifndef _KERNEL
 #include "../genunix/list.h"
@@ -217,7 +219,7 @@
 objset_name(uintptr_t addr, char *buf)
 {
 	static int gotid;
-	static mdb_ctf_id_t os_id, ds_id;
+	static mdb_ctf_id_t osi_id, ds_id;
 	uintptr_t os_dsl_dataset;
 	char ds_snapname[MAXNAMELEN];
 	uintptr_t ds_dir;
@@ -225,9 +227,9 @@
 	buf[0] = '\0';
 
 	if (!gotid) {
-		if (mdb_ctf_lookup_by_name("struct objset",
-		    &os_id) == -1) {
-			mdb_warn("couldn't find struct objset");
+		if (mdb_ctf_lookup_by_name("struct objset_impl",
+		    &osi_id) == -1) {
+			mdb_warn("couldn't find struct objset_impl");
 			return (DCMD_ERR);
 		}
 		if (mdb_ctf_lookup_by_name("struct dsl_dataset",
@@ -239,7 +241,7 @@
 		gotid = TRUE;
 	}
 
-	if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset))
+	if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset))
 		return (DCMD_ERR);
 
 	if (os_dsl_dataset == 0) {
@@ -429,7 +431,7 @@
 		(void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
 		    (u_longlong_t)db.db_object);
 
-	if (blkid == DB_BONUS_BLKID)
+	if (blkid == DMU_BONUS_BLKID)
 		(void) strcpy(blkidname, "bonus");
 	else
 		(void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
@@ -716,7 +718,7 @@
 
 	if (blkid) {
 		if (strcmp(blkid, "bonus") == 0) {
-			data.blkid = DB_BONUS_BLKID;
+			data.blkid = DMU_BONUS_BLKID;
 		} else {
 			data.blkid = mdb_strtoull(blkid);
 		}
@@ -2291,6 +2293,602 @@
 	return (DCMD_OK);
 }
 
+/* ARGSUSED */
+static int
+sa_attr_table(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	sa_attr_table_t *table;
+	sa_os_t sa_os;
+	char *name;
+	int i;
+
+	if (mdb_vread(&sa_os, sizeof (sa_os_t), addr) == -1) {
+		mdb_warn("failed to read sa_os at %p", addr);
+		return (DCMD_ERR);
+	}
+
+	table = mdb_alloc(sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
+	    UM_SLEEP | UM_GC);
+	name = mdb_alloc(MAXPATHLEN, UM_SLEEP | UM_GC);
+
+	if (mdb_vread(table, sizeof (sa_attr_table_t) * sa_os.sa_num_attrs,
+	    (uintptr_t)sa_os.sa_attr_table) == -1) {
+		mdb_warn("failed to read sa_os at %p", addr);
+		return (DCMD_ERR);
+	}
+
+	mdb_printf("%<u>%-10s %-10s %-10s %-10s %s%</u>\n",
+	    "ATTR ID", "REGISTERED", "LENGTH", "BSWAP", "NAME");
+	for (i = 0; i != sa_os.sa_num_attrs; i++) {
+		mdb_readstr(name, MAXPATHLEN, (uintptr_t)table[i].sa_name);
+		mdb_printf("%5x   %8x %8x %8x          %-s\n",
+		    (int)table[i].sa_attr, (int)table[i].sa_registered,
+		    (int)table[i].sa_length, table[i].sa_byteswap, name);
+	}
+
+	return (DCMD_OK);
+}
+
+static int
+sa_get_off_table(uintptr_t addr, uint32_t **off_tab, int attr_count)
+{
+	uintptr_t idx_table;
+
+	if (GETMEMB(addr, struct sa_idx_tab, sa_idx_tab, idx_table)) {
+		mdb_printf("can't find offset table in sa_idx_tab\n");
+		return (-1);
+	}
+
+	*off_tab = mdb_alloc(attr_count * sizeof (uint32_t),
+	    UM_SLEEP | UM_GC);
+
+	if (mdb_vread(*off_tab,
+	    attr_count * sizeof (uint32_t), idx_table) == -1) {
+		mdb_warn("failed to attribute offset table %p", idx_table);
+		return (-1);
+	}
+
+	return (DCMD_OK);
+}
+
+/*ARGSUSED*/
+static int
+sa_attr_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	uint32_t *offset_tab;
+	int attr_count;
+	uint64_t attr_id;
+	uintptr_t attr_addr;
+	uintptr_t bonus_tab, spill_tab;
+	uintptr_t db_bonus, db_spill;
+	uintptr_t os, os_sa;
+	uintptr_t db_data;
+
+	if (argc != 1)
+		return (DCMD_USAGE);
+
+	if (argv[0].a_type == MDB_TYPE_STRING)
+		attr_id = mdb_strtoull(argv[0].a_un.a_str);
+	else
+		return (DCMD_USAGE);
+
+	if (GETMEMB(addr, struct sa_handle, sa_bonus_tab, bonus_tab) ||
+	    GETMEMB(addr, struct sa_handle, sa_spill_tab, spill_tab) ||
+	    GETMEMB(addr, struct sa_handle, sa_os, os) ||
+	    GETMEMB(addr, struct sa_handle, sa_bonus, db_bonus) ||
+	    GETMEMB(addr, struct sa_handle, sa_spill, db_spill)) {
+		mdb_printf("Can't find necessary information in sa_handle "
+		    "in sa_handle\n");
+		return (DCMD_ERR);
+	}
+
+	if (GETMEMB(os, struct objset, os_sa, os_sa)) {
+		mdb_printf("Can't find os_sa in objset\n");
+		return (DCMD_ERR);
+	}
+
+	if (GETMEMB(os_sa, struct sa_os, sa_num_attrs, attr_count)) {
+		mdb_printf("Can't find sa_num_attrs\n");
+		return (DCMD_ERR);
+	}
+
+	if (attr_id > attr_count) {
+		mdb_printf("attribute id number is out of range\n");
+		return (DCMD_ERR);
+	}
+
+	if (bonus_tab) {
+		if (sa_get_off_table(bonus_tab, &offset_tab,
+		    attr_count) == -1) {
+			return (DCMD_ERR);
+		}
+
+		if (GETMEMB(db_bonus, struct dmu_buf, db_data, db_data)) {
+			mdb_printf("can't find db_data in bonus dbuf\n");
+			return (DCMD_ERR);
+		}
+	}
+
+	if (bonus_tab && !TOC_ATTR_PRESENT(offset_tab[attr_id]) &&
+	    spill_tab == NULL) {
+		mdb_printf("Attribute does not exist\n");
+		return (DCMD_ERR);
+	} else if (!TOC_ATTR_PRESENT(offset_tab[attr_id]) && spill_tab) {
+		if (sa_get_off_table(spill_tab, &offset_tab,
+		    attr_count) == -1) {
+			return (DCMD_ERR);
+		}
+		if (GETMEMB(db_spill, struct dmu_buf, db_data, db_data)) {
+			mdb_printf("can't find db_data in spill dbuf\n");
+			return (DCMD_ERR);
+		}
+		if (!TOC_ATTR_PRESENT(offset_tab[attr_id])) {
+			mdb_printf("Attribute does not exist\n");
+			return (DCMD_ERR);
+		}
+	}
+	attr_addr = db_data + TOC_OFF(offset_tab[attr_id]);
+	mdb_printf("%p\n", attr_addr);
+	return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+zfs_ace_print_common(uintptr_t addr, uint_t flags,
+    uint64_t id, uint32_t access_mask, uint16_t ace_flags,
+    uint16_t ace_type, int verbose)
+{
+	if (DCMD_HDRSPEC(flags) && !verbose)
+		mdb_printf("%<u>%-?s %-8s %-8s %-8s %s%</u>\n",
+		    "ADDR", "FLAGS", "MASK", "TYPE", "ID");
+
+	if (!verbose) {
+		mdb_printf("%0?p %-8x %-8x %-8x %-llx\n", addr,
+		    ace_flags, access_mask, ace_type, id);
+		return (DCMD_OK);
+	}
+
+	switch (ace_flags & ACE_TYPE_FLAGS) {
+	case ACE_OWNER:
+		mdb_printf("owner@:");
+		break;
+	case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
+		mdb_printf("group@:");
+		break;
+	case ACE_EVERYONE:
+		mdb_printf("everyone@:");
+		break;
+	case ACE_IDENTIFIER_GROUP:
+		mdb_printf("group:%llx:", (u_longlong_t)id);
+		break;
+	case 0: /* User entry */
+		mdb_printf("user:%llx:", (u_longlong_t)id);
+		break;
+	}
+
+	/* print out permission mask */
+	if (access_mask & ACE_READ_DATA)
+		mdb_printf("r");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_WRITE_DATA)
+		mdb_printf("w");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_EXECUTE)
+		mdb_printf("x");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_APPEND_DATA)
+		mdb_printf("p");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_DELETE)
+		mdb_printf("d");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_DELETE_CHILD)
+		mdb_printf("D");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_READ_ATTRIBUTES)
+		mdb_printf("a");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_WRITE_ATTRIBUTES)
+		mdb_printf("A");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_READ_NAMED_ATTRS)
+		mdb_printf("R");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_WRITE_NAMED_ATTRS)
+		mdb_printf("W");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_READ_ACL)
+		mdb_printf("c");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_WRITE_ACL)
+		mdb_printf("C");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_WRITE_OWNER)
+		mdb_printf("o");
+	else
+		mdb_printf("-");
+	if (access_mask & ACE_SYNCHRONIZE)
+		mdb_printf("s");
+	else
+		mdb_printf("-");
+
+	mdb_printf(":");
+
+	/* Print out inheritance flags */
+	if (ace_flags & ACE_FILE_INHERIT_ACE)
+		mdb_printf("f");
+	else
+		mdb_printf("-");
+	if (ace_flags & ACE_DIRECTORY_INHERIT_ACE)
+		mdb_printf("d");
+	else
+		mdb_printf("-");
+	if (ace_flags & ACE_INHERIT_ONLY_ACE)
+		mdb_printf("i");
+	else
+		mdb_printf("-");
+	if (ace_flags & ACE_NO_PROPAGATE_INHERIT_ACE)
+		mdb_printf("n");
+	else
+		mdb_printf("-");
+	if (ace_flags & ACE_SUCCESSFUL_ACCESS_ACE_FLAG)
+		mdb_printf("S");
+	else
+		mdb_printf("-");
+	if (ace_flags & ACE_FAILED_ACCESS_ACE_FLAG)
+		mdb_printf("F");
+	else
+		mdb_printf("-");
+	if (ace_flags & ACE_INHERITED_ACE)
+		mdb_printf("I");
+	else
+		mdb_printf("-");
+
+	switch (ace_type) {
+	case ACE_ACCESS_ALLOWED_ACE_TYPE:
+		mdb_printf(":allow\n");
+		break;
+	case ACE_ACCESS_DENIED_ACE_TYPE:
+		mdb_printf(":deny\n");
+		break;
+	case ACE_SYSTEM_AUDIT_ACE_TYPE:
+		mdb_printf(":audit\n");
+		break;
+	case ACE_SYSTEM_ALARM_ACE_TYPE:
+		mdb_printf(":alarm\n");
+		break;
+	default:
+		mdb_printf(":?\n");
+	}
+	return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+zfs_ace_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	zfs_ace_t zace;
+	int verbose = FALSE;
+	uint64_t id;
+
+	if (!(flags & DCMD_ADDRSPEC))
+		return (DCMD_USAGE);
+
+	if (mdb_getopts(argc, argv,
+	    'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
+		return (DCMD_USAGE);
+
+	if (mdb_vread(&zace, sizeof (zfs_ace_t), addr) == -1) {
+		mdb_warn("failed to read zfs_ace_t");
+		return (DCMD_ERR);
+	}
+
+	if ((zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == 0 ||
+	    (zace.z_hdr.z_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
+		id = zace.z_fuid;
+	else
+		id = -1;
+
+	return (zfs_ace_print_common(addr, flags, id, zace.z_hdr.z_access_mask,
+	    zace.z_hdr.z_flags, zace.z_hdr.z_type, verbose));
+}
+
+/* ARGSUSED */
+static int
+zfs_ace0_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	ace_t ace;
+	uint64_t id;
+	int verbose = FALSE;
+
+	if (!(flags & DCMD_ADDRSPEC))
+		return (DCMD_USAGE);
+
+	if (mdb_getopts(argc, argv,
+	    'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
+		return (DCMD_USAGE);
+
+	if (mdb_vread(&ace, sizeof (ace_t), addr) == -1) {
+		mdb_warn("failed to read ace_t");
+		return (DCMD_ERR);
+	}
+
+	if ((ace.a_flags & ACE_TYPE_FLAGS) == 0 ||
+	    (ace.a_flags & ACE_TYPE_FLAGS) == ACE_IDENTIFIER_GROUP)
+		id = ace.a_who;
+	else
+		id = -1;
+
+	return (zfs_ace_print_common(addr, flags, id, ace.a_access_mask,
+	    ace.a_flags, ace.a_type, verbose));
+}
+
+typedef struct acl_dump_args {
+	int a_argc;
+	const mdb_arg_t *a_argv;
+	uint16_t a_version;
+	int a_flags;
+} acl_dump_args_t;
+
+/* ARGSUSED */
+static int
+acl_aces_cb(uintptr_t addr, const void *unknown, void *arg)
+{
+	acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
+
+	if (acl_args->a_version == 1) {
+		if (mdb_call_dcmd("zfs_ace", addr,
+		    DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
+		    acl_args->a_argv) != DCMD_OK) {
+			return (WALK_ERR);
+		}
+	} else {
+		if (mdb_call_dcmd("zfs_ace0", addr,
+		    DCMD_ADDRSPEC|acl_args->a_flags, acl_args->a_argc,
+		    acl_args->a_argv) != DCMD_OK) {
+			return (WALK_ERR);
+		}
+	}
+	acl_args->a_flags = DCMD_LOOP;
+	return (WALK_NEXT);
+}
+
+/* ARGSUSED */
+static int
+acl_cb(uintptr_t addr, const void *unknown, void *arg)
+{
+	acl_dump_args_t *acl_args = (acl_dump_args_t *)arg;
+
+	if (acl_args->a_version == 1) {
+		if (mdb_pwalk("zfs_acl_node_aces", acl_aces_cb,
+		    arg, addr) != 0) {
+			mdb_warn("can't walk ACEs");
+			return (DCMD_ERR);
+		}
+	} else {
+		if (mdb_pwalk("zfs_acl_node_aces0", acl_aces_cb,
+		    arg, addr) != 0) {
+			mdb_warn("can't walk ACEs");
+			return (DCMD_ERR);
+		}
+	}
+	return (WALK_NEXT);
+}
+
+/* ARGSUSED */
+static int
+zfs_acl_dump(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	zfs_acl_t zacl;
+	int verbose = FALSE;
+	acl_dump_args_t acl_args;
+
+	if (!(flags & DCMD_ADDRSPEC))
+		return (DCMD_USAGE);
+
+	if (mdb_getopts(argc, argv,
+	    'v', MDB_OPT_SETBITS, TRUE, &verbose, TRUE, NULL) != argc)
+		return (DCMD_USAGE);
+
+	if (mdb_vread(&zacl, sizeof (zfs_acl_t), addr) == -1) {
+		mdb_warn("failed to read zfs_acl_t");
+		return (DCMD_ERR);
+	}
+
+	acl_args.a_argc = argc;
+	acl_args.a_argv = argv;
+	acl_args.a_version = zacl.z_version;
+	acl_args.a_flags = DCMD_LOOPFIRST;
+
+	if (mdb_pwalk("zfs_acl_node", acl_cb, &acl_args, addr) != 0) {
+		mdb_warn("can't walk ACL");
+		return (DCMD_ERR);
+	}
+
+	return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+zfs_acl_node_walk_init(mdb_walk_state_t *wsp)
+{
+	if (wsp->walk_addr == NULL) {
+		mdb_warn("must supply address of zfs_acl_node_t\n");
+		return (WALK_ERR);
+	}
+
+	wsp->walk_addr += OFFSETOF(zfs_acl_t, z_acl);
+
+	if (mdb_layered_walk("list", wsp) == -1) {
+		mdb_warn("failed to walk 'list'\n");
+		return (WALK_ERR);
+	}
+
+	return (WALK_NEXT);
+}
+
+static int
+zfs_acl_node_walk_step(mdb_walk_state_t *wsp)
+{
+	zfs_acl_node_t	aclnode;
+
+	if (mdb_vread(&aclnode, sizeof (zfs_acl_node_t),
+	    wsp->walk_addr) == -1) {
+		mdb_warn("failed to read zfs_acl_node at %p", wsp->walk_addr);
+		return (WALK_ERR);
+	}
+
+	return (wsp->walk_callback(wsp->walk_addr, &aclnode, wsp->walk_cbdata));
+}
+
+typedef struct ace_walk_data {
+	int		ace_count;
+	int		ace_version;
+} ace_walk_data_t;
+
+static int
+zfs_aces_walk_init_common(mdb_walk_state_t *wsp, int version,
+    int ace_count, uintptr_t ace_data)
+{
+	ace_walk_data_t *ace_walk_data;
+
+	if (wsp->walk_addr == NULL) {
+		mdb_warn("must supply address of zfs_acl_node_t\n");
+		return (WALK_ERR);
+	}
+
+	ace_walk_data = mdb_alloc(sizeof (ace_walk_data_t), UM_SLEEP | UM_GC);
+
+	ace_walk_data->ace_count = ace_count;
+	ace_walk_data->ace_version = version;
+
+	wsp->walk_addr = ace_data;
+	wsp->walk_data = ace_walk_data;
+
+	return (WALK_NEXT);
+}
+
+static int
+zfs_acl_node_aces_walk_init_common(mdb_walk_state_t *wsp, int version)
+{
+	static int gotid;
+	static mdb_ctf_id_t acl_id;
+	int z_ace_count;
+	uintptr_t z_acldata;
+
+	if (!gotid) {
+		if (mdb_ctf_lookup_by_name("struct zfs_acl_node",
+		    &acl_id) == -1) {
+			mdb_warn("couldn't find struct zfs_acl_node");
+			return (DCMD_ERR);
+		}
+		gotid = TRUE;
+	}
+
+	if (GETMEMBID(wsp->walk_addr, &acl_id, z_ace_count, z_ace_count)) {
+		return (DCMD_ERR);
+	}
+	if (GETMEMBID(wsp->walk_addr, &acl_id, z_acldata, z_acldata)) {
+		return (DCMD_ERR);
+	}
+
+	return (zfs_aces_walk_init_common(wsp, version,
+	    z_ace_count, z_acldata));
+}
+
+/* ARGSUSED */
+static int
+zfs_acl_node_aces_walk_init(mdb_walk_state_t *wsp)
+{
+	return (zfs_acl_node_aces_walk_init_common(wsp, 1));
+}
+
+/* ARGSUSED */
+static int
+zfs_acl_node_aces0_walk_init(mdb_walk_state_t *wsp)
+{
+	return (zfs_acl_node_aces_walk_init_common(wsp, 0));
+}
+
+static int
+zfs_aces_walk_step(mdb_walk_state_t *wsp)
+{
+	ace_walk_data_t *ace_data = wsp->walk_data;
+	zfs_ace_t zace;
+	ace_t *acep;
+	int status;
+	int entry_type;
+	int allow_type;
+	uintptr_t ptr;
+
+	if (ace_data->ace_count == 0)
+		return (WALK_DONE);
+
+	if (mdb_vread(&zace, sizeof (zfs_ace_t), wsp->walk_addr) == -1) {
+		mdb_warn("failed to read zfs_ace_t at %#lx",
+		    wsp->walk_addr);
+		return (WALK_ERR);
+	}
+
+	switch (ace_data->ace_version) {
+	case 0:
+		acep = (ace_t *)&zace;
+		entry_type = acep->a_flags & ACE_TYPE_FLAGS;
+		allow_type = acep->a_type;
+		break;
+	case 1:
+		entry_type = zace.z_hdr.z_flags & ACE_TYPE_FLAGS;
+		allow_type = zace.z_hdr.z_type;
+		break;
+	default:
+		return (WALK_ERR);
+	}
+
+	ptr = (uintptr_t)wsp->walk_addr;
+	switch (entry_type) {
+	case ACE_OWNER:
+	case ACE_EVERYONE:
+	case (ACE_IDENTIFIER_GROUP | ACE_GROUP):
+		ptr += ace_data->ace_version == 0 ?
+		    sizeof (ace_t) : sizeof (zfs_ace_hdr_t);
+		break;
+	case ACE_IDENTIFIER_GROUP:
+	default:
+		switch (allow_type) {
+		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
+		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
+		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
+			ptr += ace_data->ace_version == 0 ?
+			    sizeof (ace_t) : sizeof (zfs_object_ace_t);
+			break;
+		default:
+			ptr += ace_data->ace_version == 0 ?
+			    sizeof (ace_t) : sizeof (zfs_ace_t);
+			break;
+		}
+	}
+
+	ace_data->ace_count--;
+	status = wsp->walk_callback(wsp->walk_addr,
+	    (void *)(uintptr_t)&zace, wsp->walk_cbdata);
+
+	wsp->walk_addr = ptr;
+	return (status);
+}
+
 /*
  * MDB module linkage information:
  *
@@ -2304,7 +2902,7 @@
 	{ "dbuf", ":", "print dmu_buf_impl_t", dbuf },
 	{ "dbuf_stats", ":", "dbuf stats", dbuf_stats },
 	{ "dbufs",
-	    "\t[-O objset_t*] [-n objset_name | \"mos\"] "
+	    "\t[-O objset_impl_t*] [-n objset_name | \"mos\"] "
 	    "[-o object | \"mdn\"] \n"
 	    "\t[-l level] [-b blkid | \"bonus\"]",
 	    "find dmu_buf_impl_t's that match specified criteria", dbufs },
@@ -2333,6 +2931,14 @@
 	{ "zfs_params", "", "print zfs tunable parameters", zfs_params },
 	{ "refcount", "", "print refcount_t holders", refcount },
 	{ "zap_leaf", "", "print zap_leaf_phys_t", zap_leaf },
+	{ "zfs_aces", ":[-v]", "print all ACEs from a zfs_acl_t",
+	    zfs_acl_dump },
+	{ "zfs_ace", ":[-v]", "print zfs_ace", zfs_ace_print },
+	{ "zfs_ace0", ":[-v]", "print zfs_ace0", zfs_ace0_print },
+	{ "sa_attr_table", ":", "print SA attribute table from sa_os_t",
+	    sa_attr_table},
+	{ "sa_attr", ": attr_id",
+	    "print SA attribute address when given sa_handle_t", sa_attr_print},
 	{ NULL }
 };
 
@@ -2366,6 +2972,13 @@
 		spa_walk_init, spa_walk_step, NULL },
 	{ "metaslab", "given a spa_t *, walk all metaslab_t structures",
 		metaslab_walk_init, metaslab_walk_step, NULL },
+	{ "zfs_acl_node", "given a zfs_acl_t, walk all zfs_acl_nodes",
+	    zfs_acl_node_walk_init, zfs_acl_node_walk_step, NULL },
+	{ "zfs_acl_node_aces", "given a zfs_acl_node_t, walk all ACEs",
+	    zfs_acl_node_aces_walk_init, zfs_aces_walk_step, NULL },
+	{ "zfs_acl_node_aces0",
+	    "given a zfs_acl_node_t, walk all ACEs as ace_t",
+	    zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL },
 	{ NULL }
 };

--- a/usr/src/cmd/zdb/zdb.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/cmd/zdb/zdb.c	Tue Mar 16 09:43:38 2010 -0600
@@ -34,6 +34,9 @@
 #include <sys/zap.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_znode.h>
+#include <sys/zfs_sa.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
 #include <sys/metaslab_impl.h>
@@ -370,6 +373,71 @@
 
 /*ARGSUSED*/
 static void
+dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	zap_cursor_t zc;
+	zap_attribute_t attr;
+
+	dump_zap_stats(os, object);
+	(void) printf("\n");
+
+	for (zap_cursor_init(&zc, os, object);
+	    zap_cursor_retrieve(&zc, &attr) == 0;
+	    zap_cursor_advance(&zc)) {
+		(void) printf("\t\t%s = ", attr.za_name);
+		if (attr.za_num_integers == 0) {
+			(void) printf("\n");
+			continue;
+		}
+		(void) printf(" %llx : [%d:%d:%d]\n",
+		    (u_longlong_t)attr.za_first_integer,
+		    (int)ATTR_LENGTH(attr.za_first_integer),
+		    (int)ATTR_BSWAP(attr.za_first_integer),
+		    (int)ATTR_NUM(attr.za_first_integer));
+	}
+	zap_cursor_fini(&zc);
+}
+
+/*ARGSUSED*/
+static void
+dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	zap_cursor_t zc;
+	zap_attribute_t attr;
+	uint16_t *layout_attrs;
+	int i;
+
+	dump_zap_stats(os, object);
+	(void) printf("\n");
+
+	for (zap_cursor_init(&zc, os, object);
+	    zap_cursor_retrieve(&zc, &attr) == 0;
+	    zap_cursor_advance(&zc)) {
+		(void) printf("\t\t%s = [", attr.za_name);
+		if (attr.za_num_integers == 0) {
+			(void) printf("\n");
+			continue;
+		}
+
+		VERIFY(attr.za_integer_length == 2);
+		layout_attrs = umem_zalloc(attr.za_num_integers *
+		    attr.za_integer_length, UMEM_NOFAIL);
+
+		VERIFY(zap_lookup(os, object, attr.za_name,
+		    attr.za_integer_length,
+		    attr.za_num_integers, layout_attrs) == 0);
+
+		for (i = 0; i != attr.za_num_integers; i++)
+			(void) printf(" %d ", (int)layout_attrs[i]);
+		(void) printf("]\n");
+		umem_free(layout_attrs,
+		    attr.za_num_integers * attr.za_integer_length);
+	}
+	zap_cursor_fini(&zc);
+}
+
+/*ARGSUSED*/
+static void
 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	zap_cursor_t zc;
@@ -1106,6 +1174,8 @@
 static avl_tree_t idx_tree;
 static avl_tree_t domain_tree;
 static boolean_t fuid_table_loaded;
+static boolean_t sa_loaded;
+sa_attr_type_t *sa_attr_table;
 
 static void
 fuid_table_destroy()
@@ -1138,12 +1208,12 @@
 }
 
 static void
-dump_uidgid(objset_t *os, znode_phys_t *zp)
+dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
 {
 	uint32_t uid_idx, gid_idx;
 
-	uid_idx = FUID_INDEX(zp->zp_uid);
-	gid_idx = FUID_INDEX(zp->zp_gid);
+	uid_idx = FUID_INDEX(uid);
+	gid_idx = FUID_INDEX(gid);
 
 	/* Load domain table, if not already loaded */
 	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
@@ -1158,50 +1228,103 @@
 		fuid_table_loaded = B_TRUE;
 	}
 
-	print_idstr(zp->zp_uid, "uid");
-	print_idstr(zp->zp_gid, "gid");
+	print_idstr(uid, "uid");
+	print_idstr(gid, "gid");
 }
 
 /*ARGSUSED*/
 static void
 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
 {
-	znode_phys_t *zp = data;
+	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
+	sa_handle_t *hdl;
+	uint64_t xattr, rdev, gen;
+	uint64_t uid, gid, mode, fsize, parent, links;
+	uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
 	time_t z_crtime, z_atime, z_mtime, z_ctime;
-	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
+	sa_bulk_attr_t bulk[11];
+	int idx = 0;
 	int error;
 
-	ASSERT(size >= sizeof (znode_phys_t));
+	if (!sa_loaded) {
+		uint64_t sa_attrs = 0;
+		uint64_t version;
+
+		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
+		    8, 1, &version) == 0);
+		if (version >= ZPL_VERSION_SA) {
+			VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
+			    8, 1, &sa_attrs) == 0);
+		}
+		sa_attr_table = sa_setup(os, sa_attrs,
+		    zfs_attr_table, ZPL_END);
+		sa_loaded = B_TRUE;
+	}
+
+	if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
+		(void) printf("Failed to get handle for SA znode\n");
+		return;
+	}
+
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
+	    &links, 8);
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
+	    &mode, 8);
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
+	    NULL, &parent, 8);
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
+	    &fsize, 8);
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
+	    acctm, 16);
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
+	    modtm, 16);
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
+	    crtm, 16);
+	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
+	    chgtm, 16);
+
+	if (sa_bulk_lookup(hdl, bulk, idx)) {
+		(void) sa_handle_destroy(hdl);
+		return;
+	}
 
 	error = zfs_obj_to_path(os, object, path, sizeof (path));
 	if (error != 0) {
 		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
 		    (u_longlong_t)object);
 	}
-
 	if (dump_opt['d'] < 3) {
 		(void) printf("\t%s\n", path);
+		(void) sa_handle_destroy(hdl);
 		return;
 	}
 
-	z_crtime = (time_t)zp->zp_crtime[0];
-	z_atime = (time_t)zp->zp_atime[0];
-	z_mtime = (time_t)zp->zp_mtime[0];
-	z_ctime = (time_t)zp->zp_ctime[0];
+	z_crtime = (time_t)crtm[0];
+	z_atime = (time_t)acctm[0];
+	z_mtime = (time_t)modtm[0];
+	z_ctime = (time_t)chgtm[0];
 
 	(void) printf("\tpath	%s\n", path);
-	dump_uidgid(os, zp);
+	dump_uidgid(os, uid, gid);
 	(void) printf("\tatime	%s", ctime(&z_atime));
 	(void) printf("\tmtime	%s", ctime(&z_mtime));
 	(void) printf("\tctime	%s", ctime(&z_ctime));
 	(void) printf("\tcrtime	%s", ctime(&z_crtime));
-	(void) printf("\tgen	%llu\n", (u_longlong_t)zp->zp_gen);
-	(void) printf("\tmode	%llo\n", (u_longlong_t)zp->zp_mode);
-	(void) printf("\tsize	%llu\n", (u_longlong_t)zp->zp_size);
-	(void) printf("\tparent	%llu\n", (u_longlong_t)zp->zp_parent);
-	(void) printf("\tlinks	%llu\n", (u_longlong_t)zp->zp_links);
-	(void) printf("\txattr	%llu\n", (u_longlong_t)zp->zp_xattr);
-	(void) printf("\trdev	0x%016llx\n", (u_longlong_t)zp->zp_rdev);
+	(void) printf("\tgen	%llu\n", (u_longlong_t)gen);
+	(void) printf("\tmode	%llo\n", (u_longlong_t)mode);
+	(void) printf("\tsize	%llu\n", (u_longlong_t)fsize);
+	(void) printf("\tparent	%llu\n", (u_longlong_t)parent);
+	(void) printf("\tlinks	%llu\n", (u_longlong_t)links);
+	if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
+	    sizeof (uint64_t)) == 0)
+		(void) printf("\txattr	%llu\n", (u_longlong_t)xattr);
+	if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
+	    sizeof (uint64_t)) == 0)
+		(void) printf("\trdev	0x%016llx\n", (u_longlong_t)rdev);
+	sa_handle_destroy(hdl);
 }
 
 /*ARGSUSED*/
@@ -1261,7 +1384,11 @@
 	dump_zap,		/* snapshot refcount tags	*/
 	dump_ddt_zap,		/* DDT ZAP object		*/
 	dump_zap,		/* DDT statistics		*/
-	dump_unknown		/* Unknown type, must be last	*/
+	dump_znode,		/* SA object			*/
+	dump_zap,		/* SA Master Node		*/
+	dump_sa_attrs,		/* SA attribute registration	*/
+	dump_sa_layouts,	/* SA attribute layouts		*/
+	dump_unknown,		/* Unknown type, must be last	*/
 };
 
 static void
@@ -1328,11 +1455,13 @@
 	}
 
 	if (verbosity >= 4) {
-		(void) printf("\tdnode flags: %s%s\n",
+		(void) printf("\tdnode flags: %s%s%s\n",
 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
 		    "USED_BYTES " : "",
 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
-		    "USERUSED_ACCOUNTED " : "");
+		    "USERUSED_ACCOUNTED " : "",
+		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
+		    "SPILL_BLKPTR" : "");
 		(void) printf("\tdnode maxblkid: %llu\n",
 		    (longlong_t)dn->dn_phys->dn_maxblkid);
 
@@ -1685,6 +1814,7 @@
 	dump_dir(os);
 	dmu_objset_disown(os, FTAG);
 	fuid_table_destroy();
+	sa_loaded = B_FALSE;
 	return (0);
 }
 
@@ -2961,6 +3091,7 @@
 	(os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
 
 	fuid_table_destroy();
+	sa_loaded = B_FALSE;
 
 	libzfs_fini(g_zfs);
 	kernel_fini();

--- a/usr/src/cmd/zfs/Makefile	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/cmd/zfs/Makefile	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 
@@ -39,10 +39,12 @@
 
 LDLIBS += -lzfs -luutil -lumem -lnvpair
 
+INCS += -I../../common/zfs
+
 C99MODE=	-xc99=%all
 C99LMODE=	-Xc99=%all
 
-CPPFLAGS += -D_LARGEFILE64_SOURCE=1 -D_REENTRANT
+CPPFLAGS += -D_LARGEFILE64_SOURCE=1 -D_REENTRANT $(INCS)
 $(NOT_RELEASE_BUILD)CPPFLAGS += -DDEBUG
 
 # lint complains about unused _umem_* functions

--- a/usr/src/cmd/zfs/zfs_main.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/cmd/zfs/zfs_main.c	Tue Mar 16 09:43:38 2010 -0600
@@ -53,6 +53,7 @@
 
 #include "zfs_iter.h"
 #include "zfs_util.h"
+#include "zfs_comutil.h"
 
 libzfs_handle_t *g_zfs;
 
@@ -1594,31 +1595,25 @@
 {
 	upgrade_cbdata_t *cb = data;
 	int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
-	int i;
-	static struct { int zplver; int spaver; } table[] = {
-		{ZPL_VERSION_FUID, SPA_VERSION_FUID},
-		{ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
-		{0, 0}
-	};
-
-
-	for (i = 0; table[i].zplver; i++) {
-		if (cb->cb_version >= table[i].zplver) {
-			int spa_version;
-
-			if (zfs_spa_version(zhp, &spa_version) < 0)
-				return (-1);
-
-			if (spa_version < table[i].spaver) {
-				/* can't upgrade */
-				(void) printf(gettext("%s: can not be "
-				    "upgraded; the pool version needs to first "
-				    "be upgraded\nto version %d\n\n"),
-				    zfs_get_name(zhp), table[i].spaver);
-				cb->cb_numfailed++;
-				return (0);
-			}
-		}
+	int needed_spa_version;
+	int spa_version;
+
+	if (zfs_spa_version(zhp, &spa_version) < 0)
+		return (-1);
+
+	needed_spa_version = zfs_spa_version_map(cb->cb_version);
+
+	if (needed_spa_version < 0)
+		return (-1);
+
+	if (spa_version < needed_spa_version) {
+		/* can't upgrade */
+		(void) printf(gettext("%s: can not be "
+		    "upgraded; the pool version needs to first "
+		    "be upgraded\nto version %d\n\n"),
+		    zfs_get_name(zhp), needed_spa_version);
+		cb->cb_numfailed++;
+		return (0);
 	}
 
 	/* upgrade */
@@ -1720,6 +1715,7 @@
 		    "unique identifier (FUID)\n"));
 		(void) printf(gettext(" 4   userquota, groupquota "
 		    "properties\n"));
+		(void) printf(gettext(" 5   System attributes\n"));
 		(void) printf(gettext("\nFor more information on a particular "
 		    "version, including supported releases, see:\n\n"));
 		(void) printf("http://www.opensolaris.org/os/community/zfs/"

--- a/usr/src/cmd/zpool/zpool_main.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/cmd/zpool/zpool_main.c	Tue Mar 16 09:43:38 2010 -0600
@@ -3889,6 +3889,7 @@
 		(void) printf(gettext(" 21  Deduplication\n"));
 		(void) printf(gettext(" 22  Received properties\n"));
 		(void) printf(gettext(" 23  Slim ZIL\n"));
+		(void) printf(gettext(" 24  System attributes\n"));
 		(void) printf(gettext("\nFor more information on a particular "
 		    "version, including supported releases, see:\n\n"));
 		(void) printf("http://www.opensolaris.org/os/community/zfs/"

--- a/usr/src/cmd/zstreamdump/zstreamdump.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/cmd/zstreamdump/zstreamdump.c	Tue Mar 16 09:43:38 2010 -0600
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -88,6 +88,7 @@
 	struct drr_write *drrw = &thedrr.drr_u.drr_write;
 	struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
 	struct drr_free *drrf = &thedrr.drr_u.drr_free;
+	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
 	char c;
 	boolean_t verbose = B_FALSE;
 	boolean_t first = B_TRUE;
@@ -378,6 +379,18 @@
 				    (longlong_t)drrf->drr_length);
 			}
 			break;
+		case DRR_SPILL:
+			if (do_byteswap) {
+				drrs->drr_object = BSWAP_64(drrs->drr_object);
+				drrs->drr_length = BSWAP_64(drrs->drr_length);
+			}
+			if (verbose) {
+				(void) printf("SPILL block for object = %llu "
+				    "length = %llu\n", drrs->drr_object,
+				    drrs->drr_length);
+			}
+			(void) ssread(buf, drrs->drr_length, &zc);
+			break;
 		}
 		pcksum = zc;
 	}
@@ -398,12 +411,15 @@
 	    (u_longlong_t)drr_record_count[DRR_WRITE]);
 	(void) printf("\tTotal DRR_FREE records = %lld\n",
 	    (u_longlong_t)drr_record_count[DRR_FREE]);
+	(void) printf("\tTotal DRR_SPILL records = %lld\n",
+	    (u_longlong_t)drr_record_count[DRR_SPILL]);
 	(void) printf("\tTotal records = %lld\n",
 	    (u_longlong_t)(drr_record_count[DRR_BEGIN] +
 	    drr_record_count[DRR_OBJECT] +
 	    drr_record_count[DRR_FREEOBJECTS] +
 	    drr_record_count[DRR_WRITE] +
 	    drr_record_count[DRR_FREE] +
+	    drr_record_count[DRR_SPILL] +
 	    drr_record_count[DRR_END]));
 	(void) printf("\tTotal write size = %lld (0x%llx)\n",
 	    (u_longlong_t)total_write_size, (u_longlong_t)total_write_size);

--- a/usr/src/common/zfs/zfs_comutil.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/common/zfs/zfs_comutil.c	Tue Mar 16 09:43:38 2010 -0600
@@ -39,6 +39,7 @@
 #include <sys/fs/zfs.h>
 #include <sys/int_limits.h>
 #include <sys/nvpair.h>
+#include "zfs_comutil.h"
 
 /*
  * Are there allocatable vdevs?
@@ -103,3 +104,56 @@
 	if (zrpp->zrp_request == 0)
 		zrpp->zrp_request = ZPOOL_NO_REWIND;
 }
+
+typedef struct zfs_version_spa_map {
+	int	version_zpl;
+	int	version_spa;
+} zfs_version_spa_map_t;
+
+/*
+ * Keep this table in monotonically increasing version number order.
+ */
+static zfs_version_spa_map_t zfs_version_table[] = {
+	{ZPL_VERSION_INITIAL, SPA_VERSION_INITIAL},
+	{ZPL_VERSION_DIRENT_TYPE, SPA_VERSION_INITIAL},
+	{ZPL_VERSION_FUID, SPA_VERSION_FUID},
+	{ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
+	{ZPL_VERSION_SA, SPA_VERSION_SA},
+	{0, 0}
+};
+
+/*
+ * Return the max zpl version for a corresponding spa version
+ * -1 is returned if no mapping exists.
+ */
+int
+zfs_zpl_version_map(int spa_version)
+{
+	int i;
+	int version = -1;
+
+	for (i = 0; zfs_version_table[i].version_spa; i++) {
+		if (spa_version >= zfs_version_table[i].version_spa)
+			version = zfs_version_table[i].version_zpl;
+	}
+
+	return (version);
+}
+
+/*
+ * Return the min spa version for a corresponding spa version
+ * -1 is returned if no mapping exists.
+ */
+int
+zfs_spa_version_map(int zpl_version)
+{
+	int i;
+	int version = -1;
+
+	for (i = 0; zfs_version_table[i].version_zpl; i++) {
+		if (zfs_version_table[i].version_zpl >= zpl_version)
+			return (zfs_version_table[i].version_spa);
+	}
+
+	return (version);
+}

--- a/usr/src/common/zfs/zfs_comutil.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/common/zfs/zfs_comutil.h	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -36,6 +36,9 @@
 extern boolean_t zfs_allocatable_devs(nvlist_t *);
 extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);
 
+extern int zfs_zpl_version_map(int spa_version);
+extern int zfs_spa_version_map(int zpl_version);
+
 #ifdef	__cplusplus
 }
 #endif

--- a/usr/src/common/zfs/zfs_prop.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/common/zfs/zfs_prop.c	Tue Mar 16 09:43:38 2010 -0600
@@ -153,6 +153,7 @@
 		{ "2",		2 },
 		{ "3",		3 },
 		{ "4",		4 },
+		{ "5",		5 },
 		{ "current",	ZPL_VERSION },
 		{ NULL }
 	};

--- a/usr/src/grub/capability	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/grub/capability	Tue Mar 16 09:43:38 2010 -0600
@@ -40,7 +40,7 @@
 # This file and the associated version are Solaris specific and are
 # not a part of the open source distribution of GRUB.
 #
-VERSION=15
+VERSION=16
 dboot
 xVM
 zfs

--- a/usr/src/grub/grub-0.97/stage2/fsys_zfs.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/grub/grub-0.97/stage2/fsys_zfs.c	Tue Mar 16 09:43:38 2010 -0600
@@ -670,6 +670,7 @@
 	zapbuf = stack;
 	size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	stack += size;
+
 	if (errnum = dmu_read(zap_dnode, 0, zapbuf, stack))
 		return (errnum);
 
@@ -1425,7 +1426,44 @@
 	}
 
 	/* get the file size and set the file position to 0 */
-	filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size;
+
+	/*
+	 * For DMU_OT_SA we will need to locate the SIZE attribute
+	 * attribute, which could be either in the bonus buffer
+	 * or the "spill" block.
+	 */
+	if (DNODE->dn_bonustype == DMU_OT_SA) {
+		sa_hdr_phys_t *sahdrp;
+		int hdrsize;
+
+		sahdrp = (sa_hdr_phys_t *)DN_BONUS(DNODE);
+		if (DNODE->dn_bonuslen != 0) {
+			sahdrp = (sa_hdr_phys_t *)DN_BONUS(DNODE);
+		} else {
+			if (DNODE->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+				blkptr_t *bp = &DNODE->dn_spill;
+				void *buf;
+
+				buf = (void *)stack;
+				stack += BP_GET_LSIZE(bp);
+
+				/* reset errnum to rawread() failure */
+				errnum = 0;
+				if (zio_read(bp, buf, stack) != 0) {
+					return (0);
+				}
+				sahdrp = buf;
+			} else {
+				errnum = ERR_FSYS_CORRUPT;
+				return (0);
+			}
+		}
+		hdrsize = SA_HDR_SIZE(sahdrp);
+		filemax = *(uint64_t *)((char *)sahdrp + hdrsize +
+		    SA_SIZE_OFFSET);
+	} else {
+		filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size;
+	}
 	filepos = 0;
 
 	dnode_buf = NULL;

--- a/usr/src/grub/grub-0.97/stage2/fsys_zfs.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/grub/grub-0.97/stage2/fsys_zfs.h	Tue Mar 16 09:43:38 2010 -0600
@@ -53,6 +53,7 @@
 #include <zfs-include/dsl_dataset.h>
 #include <zfs-include/zil.h>
 #include <zfs-include/dmu_objset.h>
+#include <zfs-include/sa_impl.h>
 
 /*
  * Global Memory addresses to store MOS and DNODE data

--- a/usr/src/grub/grub-0.97/stage2/zfs-include/dmu.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/dmu.h	Tue Mar 16 09:43:38 2010 -0600
@@ -17,15 +17,13 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_DMU_H
 #define	_SYS_DMU_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * This file describes the interface that the DMU provides for its
  * consumers.
@@ -75,7 +73,22 @@
 	DMU_OT_SPA_HISTORY,		/* UINT8 */
 	DMU_OT_SPA_HISTORY_OFFSETS,	/* spa_his_phys_t */
 	DMU_OT_POOL_PROPS,		/* ZAP */
-
+	DMU_OT_DSL_PERMS,		/* ZAP */
+	DMU_OT_ACL,			/* ACL */
+	DMU_OT_SYSACL,			/* SYSACL */
+	DMU_OT_FUID,			/* FUID table (Packed NVLIST UINT8) */
+	DMU_OT_FUID_SIZE,		/* FUID table size UINT64 */
+	DMU_OT_NEXT_CLONES,		/* ZAP */
+	DMU_OT_SCRUB_QUEUE,		/* ZAP */
+	DMU_OT_USERGROUP_USED,		/* ZAP */
+	DMU_OT_USERGROUP_QUOTA,		/* ZAP */
+	DMU_OT_USERREFS,		/* ZAP */
+	DMU_OT_DDT_ZAP,			/* ZAP */
+	DMU_OT_DDT_STATS,		/* ZAP */
+	DMU_OT_SA,			/* System attr */
+	DMU_OT_SA_MASTER_NODE,		/* ZAP */
+	DMU_OT_SA_ATTR_REGISTRATION,	/* ZAP */
+	DMU_OT_SA_ATTR_LAYOUTS,		/* ZAP */
 	DMU_OT_NUMTYPES
 } dmu_object_type_t;

--- a/usr/src/grub/grub-0.97/stage2/zfs-include/dnode.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/dnode.h	Tue Mar 16 09:43:38 2010 -0600
@@ -17,15 +17,13 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_DNODE_H
 #define	_SYS_DNODE_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * Fixed constants.
  */
@@ -49,6 +47,8 @@
 #define	DNODES_PER_BLOCK	(1ULL << DNODES_PER_BLOCK_SHIFT)
 #define	DNODES_PER_LEVEL_SHIFT	(DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
 
+#define	DNODE_FLAG_SPILL_BLKPTR (1<<2)
+
 #define	DN_BONUS(dnp)	((void*)((dnp)->dn_bonus + \
 	(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
 
@@ -72,7 +72,8 @@
 	uint64_t dn_pad3[4];
 
 	blkptr_t dn_blkptr[1];
-	uint8_t dn_bonus[DN_MAX_BONUSLEN];
+	uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)];
+	blkptr_t dn_spill;
 } dnode_phys_t;
 
 #endif	/* _SYS_DNODE_H */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/sa_impl.h	Tue Mar 16 09:43:38 2010 -0600
@@ -0,0 +1,38 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_SA_IMPL_H
+#define	_SYS_SA_IMPL_H
+
+typedef struct sa_hdr_phys {
+	uint32_t sa_magic;
+	uint16_t sa_layout_info;
+	uint16_t sa_lengths[1];
+} sa_hdr_phys_t;
+
+#define	SA_HDR_SIZE(hdr)	BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
+#define	SA_SIZE_OFFSET	0x8
+
+#endif	/* _SYS_SA_IMPL_H */

--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Tue Mar 16 09:43:38 2010 -0600
@@ -27,7 +27,7 @@
 /*
  * On-disk version number.
  */
-#define	SPA_VERSION			23ULL
+#define	SPA_VERSION			24ULL
 
 /*
  * The following are configuration names used in the nvlist describing a pool's

--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs_znode.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs_znode.h	Tue Mar 16 09:43:38 2010 -0600
@@ -17,7 +17,7 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -27,8 +27,9 @@
 #define	MASTER_NODE_OBJ	1
 #define	ZFS_ROOT_OBJ		"ROOT"
 #define	ZPL_VERSION_STR		"VERSION"
+#define	ZFS_SA_ATTRS		"SA_ATTRS"
 
-#define	ZPL_VERSION		4ULL
+#define	ZPL_VERSION		5ULL
 
 #define	ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)

--- a/usr/src/lib/libzfs/common/libzfs_impl.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_impl.h	Tue Mar 16 09:43:38 2010 -0600
@@ -30,7 +30,6 @@
 #include <sys/dmu.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ioctl.h>
-#include <sys/zfs_acl.h>
 #include <sys/spa.h>
 #include <sys/nvpair.h>

--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Tue Mar 16 09:43:38 2010 -0600
@@ -203,6 +203,7 @@
 	struct drr_end *drre = &thedrr.drr_u.drr_end;
 	struct drr_object *drro = &thedrr.drr_u.drr_object;
 	struct drr_write *drrw = &thedrr.drr_u.drr_write;
+	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
 	FILE *ofp;
 	int outfd;
 	dmu_replay_record_t wbr_drr = {0};
@@ -302,6 +303,18 @@
 			break;
 		}
 
+		case DRR_SPILL:
+		{
+			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
+			    &stream_cksum, outfd) == -1)
+				goto out;
+			(void) ssread(buf, drrs->drr_length, ofp);
+			if (cksum_and_write(buf, drrs->drr_length,
+			    &stream_cksum, outfd) == -1)
+				goto out;
+			break;
+		}
+
 		case DRR_FREEOBJECTS:
 		{
 			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
@@ -1154,6 +1167,14 @@
 	dedup_arg_t dda = { 0 };
 	int featureflags = 0;
 
+	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
+		uint64_t version;
+		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
+		if (version >= ZPL_VERSION_SA) {
+			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
+		}
+	}
+
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot send '%s'"), zhp->zfs_name);
 
@@ -2180,7 +2201,14 @@
 			(void) recv_read(hdl, fd, buf,
 			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
 			break;
-
+		case DRR_SPILL:
+			if (byteswap) {
+				drr->drr_u.drr_write.drr_length =
+				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
+			}
+			(void) recv_read(hdl, fd, buf,
+			    drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
+			break;
 		case DRR_WRITE_BYREF:
 		case DRR_FREEOBJECTS:
 		case DRR_FREE:

--- a/usr/src/lib/libzfs/common/mapfile-vers	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/lib/libzfs/common/mapfile-vers	Tue Mar 16 09:43:38 2010 -0600
@@ -134,6 +134,7 @@
 	zfs_smb_acl_rename;
 	zfs_snapshot;
 	zfs_spa_version;
+	zfs_spa_version_map;
 	zfs_type_to_name;
 	zfs_unmount;
 	zfs_unmountall;
@@ -146,6 +147,7 @@
 	zfs_unshareall_smb;
 	zfs_userspace;
 	zfs_userquota_prop_prefixes;
+	zfs_zpl_version_map;
 	zpool_add;
 	zpool_clear;
 	zpool_clear_label;

--- a/usr/src/lib/libzpool/common/kernel.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/lib/libzpool/common/kernel.c	Tue Mar 16 09:43:38 2010 -0600
@@ -776,6 +776,17 @@
 	return (0);
 }
 
+int
+ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
+{
+	char *end;
+
+	*result = strtoull(str, &end, base);
+	if (*result == 0)
+		return (errno);
+	return (0);
+}
+
 /*
  * =========================================================================
  * kernel emulation setup & teardown

--- a/usr/src/lib/libzpool/common/llib-lzpool	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/lib/libzpool/common/llib-lzpool	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -50,6 +50,8 @@
 #include <sys/dbuf.h>
 #include <sys/zio_checksum.h>
 #include <sys/ddt.h>
+#include <sys/sa.h>
+#include <sys/zfs_sa.h>
 
 extern uint64_t metaslab_gang_bang;
 extern uint64_t metaslab_df_alloc_threshold;

--- a/usr/src/lib/libzpool/common/sys/zfs_context.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/lib/libzpool/common/sys/zfs_context.h	Tue Mar 16 09:43:38 2010 -0600
@@ -536,6 +536,9 @@
 extern int ddi_strtoul(const char *str, char **nptr, int base,
     unsigned long *result);
 
+extern int ddi_strtoull(const char *str, char **nptr, int base,
+    u_longlong_t *result);
+
 /* ZFS Boot Related stuff. */
 
 struct _buf {

--- a/usr/src/psm/stand/bootblks/zfs/common/zfs.fth	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/psm/stand/bootblks/zfs/common/zfs.fth	Tue Mar 16 09:43:38 2010 -0600
@@ -19,13 +19,13 @@
 \ CDDL HEADER END
 \
 \
-\ Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+\ Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
 \ Use is subject to license terms.
 \
 
 
 purpose: ZFS file system support package
-copyright: Copyright 2009 Sun Microsystems, Inc. All Rights Reserved
+copyright: Copyright 2010 Sun Microsystems, Inc. All Rights Reserved
 
 " /packages" get-package  push-package
 
@@ -395,13 +395,18 @@
    \	ZFS dnode (DMU) routines
    \
 
+   d# 44  constant ot-sa#
+
    d# 512 constant /dnode
 
-   : dn_indblkshift   ( dn -- n )  h#  1 +  c@  ;
-   : dn_nlevels       ( dn -- n )  h#  2 +  c@  ;
-   : dn_datablkszsec  ( dn -- n )  h#  8 +  w@  ;
-   : dn_blkptr        ( dn -- p )  h# 40 +      ;
-   : dn_bonus         ( dn -- p )  h# c0 +      ;
+   : dn_indblkshift   ( dn -- n )  h#   1 +  c@  ;
+   : dn_nlevels       ( dn -- n )  h#   2 +  c@  ;
+   : dn_bonustype     ( dn -- n )  h#   4 +  c@  ;
+   : dn_datablkszsec  ( dn -- n )  h#   8 +  w@  ;
+   : dn_bonuslen      ( dn -- n )  h#   a +  w@  ;
+   : dn_blkptr        ( dn -- p )  h#  40 +      ;
+   : dn_bonus         ( dn -- p )  h#  c0 +      ;
+   : dn_spill         ( dn -- p )  h# 180 +      ;
 
    0 instance value dnode
 
@@ -755,7 +760,6 @@
    0 instance value mos-dn
    0 instance value obj-dir
    0 instance value root-dsl
-   0 instance value root-dsl#
    0 instance value fs-dn
 
    \ dn-cache contains dc-dn's contents at dc-blk#
@@ -819,7 +823,6 @@
       obj-dir " root_dataset"  zap-lookup  if
          " no root_dataset"  die
       then                                   ( obj# )
-      dup to root-dsl#
       get-mos-dnode                          (  )
       dnode root-dsl  /dnode  move
    ;
@@ -888,6 +891,20 @@
    \
 
    1       constant master-node#
+
+   0 instance value bootfs-obj#
+   0 instance value root-obj#
+   0 instance value current-obj#
+   0 instance value search-obj#
+
+   instance defer fsize         ( dn -- size )
+   instance defer mode          ( dn -- mode )
+   instance defer parent        ( dn -- obj# )
+   instance defer readlink      ( dst dn -- )
+
+   \
+   \ routines when bonus pool contains a znode
+   \
    d# 264  constant /znode
    d#  56  constant /zn-slink
 
@@ -895,15 +912,77 @@
    : zp_size    ( zn -- n )  h# 50 +  x@  ;
    : zp_parent  ( zn -- n )  h# 58 +  x@  ;
 
-   0 instance value bootfs-obj#
-   0 instance value root-obj#
-   0 instance value current-obj#
-   0 instance value search-obj#
-
    alias  >znode  dn_bonus
 
-   : fsize     ( dn -- n )     >znode zp_size  ;
-   : ftype     ( dn -- n )     >znode zp_mode  h# f000  and  ;
+   : zn-fsize     ( dn -- n )  >znode zp_size    ;
+   : zn-mode      ( dn -- n )  >znode zp_mode    ;
+   : zn-parent    ( dn -- n )  >znode zp_parent  ;
+
+   \ copy symlink target to dst
+   : zn-readlink  ( dst dn -- )
+      dup zn-fsize  tuck /zn-slink  >  if ( dst size dn )
+         \ contents in 1st block
+         temp-space  over dn-bsize        ( dst size dn t-adr bsize )
+         rot  0 lblk#>bp  read-bp         ( dst size )
+         temp-space                       ( dst size src )
+      else                                ( dst size dn )
+         \ contents in dnode
+         >znode  /znode +                 ( dst size src )
+      then                                ( dst size src )
+      -rot  move                          (  )
+   ;
+
+   \
+   \ routines when bonus pool contains sa's
+   \
+
+   \ SA header size when link is in dn_bonus
+   d# 16  constant  /sahdr-link
+
+   : sa_props  ( sa -- n )   h# 4 +  w@  ;
+
+   : sa-hdrsz  ( sa -- sz )  sa_props h# 7  >>  ;
+
+   alias  >sa  dn_bonus
+
+   : >sadata    ( dn -- adr )  >sa dup  sa-hdrsz  +  ;
+   : sa-mode    ( dn -- n )    >sadata           x@  ;
+   : sa-fsize   ( dn -- n )    >sadata  h#  8 +  x@  ;
+   : sa-parent  ( dn -- n )    >sadata  h# 28 +  x@  ;
+
+   \ copy symlink target to dst
+   : sa-readlink  ( dst dn -- )
+      dup  >sa sa-hdrsz  /sahdr-link  <>  if
+         \ contents in 1st attr of dn_spill
+         temp-space  over dn_spill           ( dst dn t-adr bp )
+         dup bp-lsize  swap  read-bp         ( dst dn )
+         sa-fsize                            ( dst size )
+         temp-space dup sa-hdrsz  +          ( dst size src )
+      else                                   ( dst dn )
+         \ content in bonus buf
+         dup dn_bonus  over  dn_bonuslen  +  ( dst dn ebonus )
+         swap sa-fsize  tuck  -              ( dst size src )
+      then                                   ( dst size src )
+      -rot  move                             (  )
+   ;
+
+
+   \ setup attr routines for dn
+   : set-attr  ( dn -- )
+      dn_bonustype  ot-sa#  =  if
+         ['] sa-fsize     to  fsize
+         ['] sa-mode      to  mode
+         ['] sa-parent    to  parent
+         ['] sa-readlink  to  readlink
+      else
+         ['] zn-fsize     to  fsize
+         ['] zn-mode      to  mode
+         ['] zn-parent    to  parent
+         ['] zn-readlink  to  readlink
+      then
+   ;
+
+   : ftype     ( dn -- type )  mode   h# f000  and  ;
    : dir?      ( dn -- flag )  ftype  h# 4000  =  ;
    : symlink?  ( dn -- flag )  ftype  h# a000  =  ;
 
@@ -959,7 +1038,7 @@
       then
 
       2dup " .."  $=  if
-         2drop  >znode zp_parent  ( obj# )
+         2drop  parent            ( obj# )
       else                        ( dn file$ )
          \ search dir
          current-obj# to search-obj#
@@ -967,38 +1046,32 @@
             true  exit            ( not-found )
          then                     ( obj# )
       then                        ( obj# )
-      get-fs-dnode  false         ( found )
+      get-fs-dnode
+      dnode  set-attr
+      false                       ( found )
    ;
 
    /buf-len  instance buffer: fpath-buf
-   : clr-fpath-buf  ( -- )  fpath-buf /buf-len  erase  ;
-
-   : fpath-buf$  ( -- path$ )  fpath-buf cscount  ;
+   /buf-len  instance buffer: tpath-buf
 
-   \ copy symlink target to adr
-   : readlink  ( dst dn -- )
-      dup fsize  tuck /zn-slink  >  if    ( dst size dn )
-         \ contents in 1st block
-         temp-space  over dn-bsize        ( dst size dn t-adr bsize )
-         rot  0 lblk#>bp  read-bp         ( dst size )
-         temp-space                       ( dst size src )
-      else                                ( dst size dn )
-         \ contents in dnode
-         >znode  /znode +                 ( dst size src )
-      then                                ( dst size src )
-      -rot  move                          (  )
-   ;
+   : tpath-buf$  ( -- path$ )  tpath-buf cscount  ;
+   : fpath-buf$  ( -- path$ )  fpath-buf cscount  ;
 
    \ modify tail to account for symlink
    : follow-symlink  ( tail$ -- tail$' )
-      clr-fpath-buf                             ( tail$ )
-      fpath-buf dnode  readlink
+      \ read target
+      tpath-buf /buf-len  erase
+      tpath-buf dnode  readlink
 
-      \ append to current path
+      \ append current path
       ?dup  if                                  ( tail$ )
-	 " /" fpath-buf$  $append               ( tail$ )
-	 fpath-buf$  $append                    (  )
+	 " /" tpath-buf$  $append               ( tail$ )
+	 tpath-buf$  $append                    (  )
       else  drop  then                          (  )
+
+      \ copy to fpath
+      fpath-buf  /buf-len  erase
+      tpath-buf$  fpath-buf  swap move
       fpath-buf$                                ( path$ )
 
       \ get directory that starts changed path
@@ -1008,6 +1081,7 @@
          search-obj#                            ( path$ obj# )
       then                                      ( path$ obj# )
       get-fs-dnode                              ( path$ )
+      dnode  set-attr
    ;
 
    \ open dnode at path
@@ -1020,6 +1094,7 @@
          current-obj#                             ( path$ obj# )
       then                                        ( path$ obj# )
       get-fs-dnode                                ( path$ )
+      dnode  set-attr
 
       \ lookup each path component
       begin                                       ( path$ )
@@ -1173,7 +1248,7 @@
 
       \ zero instance buffers
       file-records /file-records  erase
-      bootprop-buf /buf-len  erase 
+      bootprop-buf /buf-len  erase
    ;
 
    : release-buffers  ( -- )

--- a/usr/src/uts/common/Makefile.files	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/Makefile.files	Tue Mar 16 09:43:38 2010 -0600
@@ -1338,6 +1338,7 @@
 	lzjb.o			\
 	metaslab.o		\
 	refcount.o		\
+	sa.o			\
 	sha256.o		\
 	spa.o			\
 	spa_config.o		\
@@ -1363,6 +1364,7 @@
 	zfs_byteswap.o		\
 	zfs_fm.o		\
 	zfs_fuid.o		\
+	zfs_sa.o		\
 	zfs_znode.o		\
 	zil.o			\
 	zio.o			\

--- a/usr/src/uts/common/fs/zfs/dbuf.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dbuf.c	Tue Mar 16 09:43:38 2010 -0600
@@ -34,6 +34,8 @@
 #include <sys/spa.h>
 #include <sys/zio.h>
 #include <sys/dmu_zfetch.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
 
 static void dbuf_destroy(dmu_buf_impl_t *db);
 static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
@@ -296,13 +298,17 @@
 		ASSERT3U(db->db.db_object, ==, dn->dn_object);
 		ASSERT3P(db->db_objset, ==, dn->dn_objset);
 		ASSERT3U(db->db_level, <, dn->dn_nlevels);
-		ASSERT(db->db_blkid == DB_BONUS_BLKID ||
-		    list_head(&dn->dn_dbufs));
+		ASSERT(db->db_blkid == DMU_BONUS_BLKID || db->db_blkid ==
+		    DMU_SPILL_BLKID || list_head(&dn->dn_dbufs));
 	}
-	if (db->db_blkid == DB_BONUS_BLKID) {
+	if (db->db_blkid == DMU_BONUS_BLKID) {
 		ASSERT(dn != NULL);
 		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
-		ASSERT3U(db->db.db_offset, ==, DB_BONUS_BLKID);
+		ASSERT3U(db->db.db_offset, ==, DMU_BONUS_BLKID);
+	} else if (db->db_blkid == DMU_SPILL_BLKID) {
+		ASSERT(dn != NULL);
+		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
+		ASSERT3U(db->db.db_offset, ==, 0);
 	} else {
 		ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
 	}
@@ -336,8 +342,9 @@
 				ASSERT(db->db_parent == NULL);
 			else
 				ASSERT(db->db_parent != NULL);
-			ASSERT3P(db->db_blkptr, ==,
-			    &dn->dn_phys->dn_blkptr[db->db_blkid]);
+			if (db->db_blkid != DMU_SPILL_BLKID)
+				ASSERT3P(db->db_blkptr, ==,
+				    &dn->dn_phys->dn_blkptr[db->db_blkid]);
 		} else {
 			/* db is pointed to by an indirect block */
 			int epb = db->db_parent->db.db_size >> SPA_BLKPTRSHIFT;
@@ -357,7 +364,7 @@
 		}
 	}
 	if ((db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr)) &&
-	    db->db.db_data && db->db_blkid != DB_BONUS_BLKID &&
+	    db->db.db_data && db->db_blkid != DMU_BONUS_BLKID &&
 	    db->db_state != DB_FILL && !dn->dn_free_txg) {
 		/*
 		 * If the blkptr isn't set but they have nonzero data,
@@ -465,7 +472,7 @@
 		dbuf_set_data(db, buf);
 		db->db_state = DB_CACHED;
 	} else {
-		ASSERT(db->db_blkid != DB_BONUS_BLKID);
+		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 		ASSERT3P(db->db_buf, ==, NULL);
 		VERIFY(arc_buf_remove_ref(buf, db) == 1);
 		db->db_state = DB_UNCACHED;
@@ -490,7 +497,7 @@
 	ASSERT(db->db_state == DB_UNCACHED);
 	ASSERT(db->db_buf == NULL);
 
-	if (db->db_blkid == DB_BONUS_BLKID) {
+	if (db->db_blkid == DMU_BONUS_BLKID) {
 		int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
 
 		ASSERT3U(bonuslen, <=, db->db.db_size);
@@ -570,7 +577,7 @@
 	if ((flags & DB_RF_HAVESTRUCT) == 0)
 		rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER);
 
-	prefetch = db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID &&
+	prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
 	    (flags & DB_RF_NOPREFETCH) == 0 && db->db_dnode != NULL &&
 	    DBUF_IS_CACHEABLE(db);
 
@@ -630,7 +637,7 @@
 dbuf_noread(dmu_buf_impl_t *db)
 {
 	ASSERT(!refcount_is_zero(&db->db_holds));
-	ASSERT(db->db_blkid != DB_BONUS_BLKID);
+	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 	mutex_enter(&db->db_mtx);
 	while (db->db_state == DB_READ || db->db_state == DB_FILL)
 		cv_wait(&db->db_changed, &db->db_mtx);
@@ -675,7 +682,7 @@
 
 	if (dr == NULL ||
 	    (dr->dt.dl.dr_data !=
-	    ((db->db_blkid  == DB_BONUS_BLKID) ? db->db.db_data : db->db_buf)))
+	    ((db->db_blkid  == DMU_BONUS_BLKID) ? db->db.db_data : db->db_buf)))
 		return;
 
 	/*
@@ -686,7 +693,7 @@
 	 *	just null out the current db_data pointer.
 	 */
 	ASSERT(dr->dr_txg >= txg - 2);
-	if (db->db_blkid == DB_BONUS_BLKID) {
+	if (db->db_blkid == DMU_BONUS_BLKID) {
 		/* Note that the data bufs here are zio_bufs */
 		dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN);
 		arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
@@ -713,7 +720,7 @@
 	ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC);
 	ASSERT(db->db_level == 0);
 
-	if (db->db_blkid == DB_BONUS_BLKID ||
+	if (db->db_blkid == DMU_BONUS_BLKID ||
 	    dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN)
 		return;
 
@@ -751,7 +758,7 @@
 	uint64_t first_l1 = start >> epbs;
 	uint64_t last_l1 = end >> epbs;
 
-	if (end > dn->dn_maxblkid) {
+	if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
 		end = dn->dn_maxblkid;
 		last_l1 = end >> epbs;
 	}
@@ -759,7 +766,7 @@
 	mutex_enter(&dn->dn_dbufs_mtx);
 	for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
 		db_next = list_next(&dn->dn_dbufs, db);
-		ASSERT(db->db_blkid != DB_BONUS_BLKID);
+		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 
 		if (db->db_level == 1 &&
 		    db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
@@ -873,7 +880,7 @@
 	int osize = db->db.db_size;
 	arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
 
-	ASSERT(db->db_blkid != DB_BONUS_BLKID);
+	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 
 	/* XXX does *this* func really need the lock? */
 	ASSERT(RW_WRITE_HELD(&db->db_dnode->dn_struct_rwlock));
@@ -970,6 +977,9 @@
 	}
 	mutex_exit(&dn->dn_mtx);
 
+	if (db->db_blkid == DMU_SPILL_BLKID)
+		dn->dn_have_spill = B_TRUE;
+
 	/*
 	 * If this buffer is already dirty, we're done.
 	 */
@@ -979,7 +989,7 @@
 	while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg)
 		drp = &dr->dr_next;
 	if (dr && dr->dr_txg == tx->tx_txg) {
-		if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) {
+		if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) {
 			/*
 			 * If this buffer has already been written out,
 			 * we now need to reset its state.
@@ -1020,7 +1030,7 @@
 
 	dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
 
-	if (db->db_blkid != DB_BONUS_BLKID) {
+	if (db->db_blkid != DMU_BONUS_BLKID) {
 		/*
 		 * Update the accounting.
 		 * Note: we delay "free accounting" until after we drop
@@ -1042,7 +1052,7 @@
 		void *data_old = db->db_buf;
 
 		if (db->db_state != DB_NOFILL) {
-			if (db->db_blkid == DB_BONUS_BLKID) {
+			if (db->db_blkid == DMU_BONUS_BLKID) {
 				dbuf_fix_old_data(db, tx->tx_txg);
 				data_old = db->db.db_data;
 			} else if (db->db.db_object != DMU_META_DNODE_OBJECT) {
@@ -1078,7 +1088,8 @@
 	 * and dbuf_dirty.  We win, as though the dbuf_noread() had
 	 * happened after the free.
 	 */
-	if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID) {
+	if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
+	    db->db_blkid != DMU_SPILL_BLKID) {
 		mutex_enter(&dn->dn_mtx);
 		dnode_clear_range(dn, db->db_blkid, 1, tx);
 		mutex_exit(&dn->dn_mtx);
@@ -1094,7 +1105,8 @@
 
 	mutex_exit(&db->db_mtx);
 
-	if (db->db_blkid == DB_BONUS_BLKID) {
+	if (db->db_blkid == DMU_BONUS_BLKID ||
+	    db->db_blkid == DMU_SPILL_BLKID) {
 		mutex_enter(&dn->dn_mtx);
 		ASSERT(!list_link_active(&dr->dr_dirty_node));
 		list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
@@ -1182,7 +1194,7 @@
 	dbuf_dirty_record_t *dr, **drp;
 
 	ASSERT(txg != 0);
-	ASSERT(db->db_blkid != DB_BONUS_BLKID);
+	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 
 	mutex_enter(&db->db_mtx);
 	/*
@@ -1297,7 +1309,7 @@
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
 
-	ASSERT(db->db_blkid != DB_BONUS_BLKID);
+	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 	ASSERT(tx->tx_txg != 0);
 	ASSERT(db->db_level == 0);
 	ASSERT(!refcount_is_zero(&db->db_holds));
@@ -1319,7 +1331,7 @@
 
 	if (db->db_state == DB_FILL) {
 		if (db->db_level == 0 && db->db_freed_in_flight) {
-			ASSERT(db->db_blkid != DB_BONUS_BLKID);
+			ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 			/* we were freed while filling */
 			/* XXX dbuf_undirty? */
 			bzero(db->db.db_data, db->db.db_size);
@@ -1340,7 +1352,7 @@
 {
 	ASSERT(!refcount_is_zero(&db->db_holds));
 	ASSERT(db->db_dnode->dn_object != DMU_META_DNODE_OBJECT);
-	ASSERT(db->db_blkid != DB_BONUS_BLKID);
+	ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 	ASSERT(db->db_level == 0);
 	ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA);
 	ASSERT(buf != NULL);
@@ -1423,7 +1435,7 @@
 
 	if (db->db_state == DB_CACHED) {
 		ASSERT(db->db.db_data != NULL);
-		if (db->db_blkid == DB_BONUS_BLKID) {
+		if (db->db_blkid == DMU_BONUS_BLKID) {
 			zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN);
 			arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
 		}
@@ -1437,7 +1449,7 @@
 	db->db_state = DB_EVICTING;
 	db->db_blkptr = NULL;
 
-	if (db->db_blkid != DB_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
+	if (db->db_blkid != DMU_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
 		list_remove(&dn->dn_dbufs, db);
 		dnode_rele(dn, db);
 		db->db_dnode = NULL;
@@ -1466,7 +1478,19 @@
 	*parentp = NULL;
 	*bpp = NULL;
 
-	ASSERT(blkid != DB_BONUS_BLKID);
+	ASSERT(blkid != DMU_BONUS_BLKID);
+
+	if (blkid == DMU_SPILL_BLKID) {
+		mutex_enter(&dn->dn_mtx);
+		if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
+			*bpp = &dn->dn_phys->dn_spill;
+		else
+			*bpp = NULL;
+		dbuf_add_ref(dn->dn_dbuf, NULL);
+		*parentp = dn->dn_dbuf;
+		mutex_exit(&dn->dn_mtx);
+		return (0);
+	}
 
 	if (dn->dn_phys->dn_nlevels == 0)
 		nlevels = 1;
@@ -1539,16 +1563,20 @@
 	db->db_immediate_evict = 0;
 	db->db_freed_in_flight = 0;
 
-	if (blkid == DB_BONUS_BLKID) {
+	if (blkid == DMU_BONUS_BLKID) {
 		ASSERT3P(parent, ==, dn->dn_dbuf);
 		db->db.db_size = DN_MAX_BONUSLEN -
 		    (dn->dn_nblkptr-1) * sizeof (blkptr_t);
 		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
-		db->db.db_offset = DB_BONUS_BLKID;
+		db->db.db_offset = DMU_BONUS_BLKID;
 		db->db_state = DB_UNCACHED;
 		/* the bonus dbuf is not placed in the hash table */
 		arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
 		return (db);
+	} else if (blkid == DMU_SPILL_BLKID) {
+		db->db.db_size = (blkptr != NULL) ?
+		    BP_GET_LSIZE(blkptr) : SPA_MINBLOCKSIZE;
+		db->db.db_offset = 0;
 	} else {
 		int blocksize =
 		    db->db_level ? 1<<dn->dn_indblkshift :  dn->dn_datablksz;
@@ -1616,7 +1644,7 @@
 {
 	ASSERT(refcount_is_zero(&db->db_holds));
 
-	if (db->db_blkid != DB_BONUS_BLKID) {
+	if (db->db_blkid != DMU_BONUS_BLKID) {
 		/*
 		 * If this dbuf is still on the dn_dbufs list,
 		 * remove it from that list.
@@ -1652,7 +1680,7 @@
 	dmu_buf_impl_t *db = NULL;
 	blkptr_t *bp = NULL;
 
-	ASSERT(blkid != DB_BONUS_BLKID);
+	ASSERT(blkid != DMU_BONUS_BLKID);
 	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
 
 	if (dnode_block_freed(dn, blkid))
@@ -1708,7 +1736,7 @@
 {
 	dmu_buf_impl_t *db, *parent = NULL;
 
-	ASSERT(blkid != DB_BONUS_BLKID);
+	ASSERT(blkid != DMU_BONUS_BLKID);
 	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
 	ASSERT3U(dn->dn_nlevels, >, level);
 
@@ -1757,7 +1785,7 @@
 	 * still referencing it from db_data, we need to make a copy
 	 * of it in case we decide we want to dirty it again in this txg.
 	 */
-	if (db->db_level == 0 && db->db_blkid != DB_BONUS_BLKID &&
+	if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
 	    dn->dn_object != DMU_META_DNODE_OBJECT &&
 	    db->db_state == DB_CACHED && db->db_data_pending) {
 		dbuf_dirty_record_t *dr = db->db_data_pending;
@@ -1812,7 +1840,34 @@
 	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
 
 	ASSERT(dn->dn_bonus == NULL);
-	dn->dn_bonus = dbuf_create(dn, 0, DB_BONUS_BLKID, dn->dn_dbuf, NULL);
+	dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL);
+}
+
+int
+dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx)
+{
+	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+	if (db->db_blkid != DMU_SPILL_BLKID)
+		return (ENOTSUP);
+	if (blksz == 0)
+		blksz = SPA_MINBLOCKSIZE;
+	if (blksz > SPA_MAXBLOCKSIZE)
+		blksz = SPA_MAXBLOCKSIZE;
+	else
+		blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE);
+
+	rw_enter(&db->db_dnode->dn_struct_rwlock, RW_WRITER);
+	dbuf_new_size(db, blksz, tx);
+	rw_exit(&db->db_dnode->dn_struct_rwlock);
+
+	return (0);
+}
+
+void
+dbuf_rm_spill(dnode_t *dn, dmu_tx_t *tx)
+{
+	dbuf_free_range(dn, DMU_SPILL_BLKID, DMU_SPILL_BLKID, tx);
+	dnode_rm_spill(dn, tx);
 }
 
 #pragma weak dmu_buf_add_ref = dbuf_add_ref
@@ -1858,7 +1913,7 @@
 		dbuf_evict_user(db);
 
 	if (holds == 0) {
-		if (db->db_blkid == DB_BONUS_BLKID) {
+		if (db->db_blkid == DMU_BONUS_BLKID) {
 			mutex_exit(&db->db_mtx);
 			dnode_rele(db->db_dnode, db);
 		} else if (db->db_buf == NULL) {
@@ -1971,6 +2026,11 @@
 	if (db->db_blkptr != NULL)
 		return;
 
+	if (db->db_blkid == DMU_SPILL_BLKID) {
+		db->db_blkptr = &dn->dn_phys->dn_spill;
+		BP_ZERO(db->db_blkptr);
+		return;
+	}
 	if (db->db_level == dn->dn_phys->dn_nlevels-1) {
 		/*
 		 * This buffer was allocated at a time when there was
@@ -2071,13 +2131,19 @@
 	}
 	DBUF_VERIFY(db);
 
+	if (db->db_blkid == DMU_SPILL_BLKID) {
+		mutex_enter(&dn->dn_mtx);
+		dn->dn_phys->dn_flags |= DNODE_FLAG_SPILL_BLKPTR;
+		mutex_exit(&dn->dn_mtx);
+	}
+
 	/*
 	 * If this is a bonus buffer, simply copy the bonus data into the
 	 * dnode.  It will be written out when the dnode is synced (and it
 	 * will be synced, since it must have been dirty for dbuf_sync to
 	 * be called).
 	 */
-	if (db->db_blkid == DB_BONUS_BLKID) {
+	if (db->db_blkid == DMU_BONUS_BLKID) {
 		dbuf_dirty_record_t **drp;
 
 		ASSERT(*datap != NULL);
@@ -2204,14 +2270,27 @@
 		return;
 	}
 
-	ASSERT(BP_GET_TYPE(bp) == dn->dn_type);
+	ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
+	    BP_GET_TYPE(bp) == dn->dn_type) ||
+	    (db->db_blkid == DMU_SPILL_BLKID &&
+	    BP_GET_TYPE(bp) == dn->dn_bonustype));
 	ASSERT(BP_GET_LEVEL(bp) == db->db_level);
 
 	mutex_enter(&db->db_mtx);
 
+#ifdef ZFS_DEBUG
+	if (db->db_blkid == DMU_SPILL_BLKID) {
+		dnode_t *dn = db->db_dnode;
+		ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
+		ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
+		    db->db_blkptr == &dn->dn_phys->dn_spill);
+	}
+#endif
+
 	if (db->db_level == 0) {
 		mutex_enter(&dn->dn_mtx);
-		if (db->db_blkid > dn->dn_phys->dn_maxblkid)
+		if (db->db_blkid > dn->dn_phys->dn_maxblkid &&
+		    db->db_blkid != DMU_SPILL_BLKID)
 			dn->dn_phys->dn_maxblkid = db->db_blkid;
 		mutex_exit(&dn->dn_mtx);
 
@@ -2278,8 +2357,17 @@
 	ASSERT(dr->dr_next == NULL);
 	*drp = dr->dr_next;
 
+#ifdef ZFS_DEBUG
+	if (db->db_blkid == DMU_SPILL_BLKID) {
+		dnode_t *dn = db->db_dnode;
+		ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
+		ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
+		    db->db_blkptr == &dn->dn_phys->dn_spill);
+	}
+#endif
+
 	if (db->db_level == 0) {
-		ASSERT(db->db_blkid != DB_BONUS_BLKID);
+		ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 		ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
 		if (db->db_state != DB_NOFILL) {
 			if (dr->dt.dl.dr_data != db->db_buf)
@@ -2362,6 +2450,7 @@
 	zbookmark_t zb;
 	zio_prop_t zp;
 	zio_t *zio;
+	int wp_flag = 0;
 
 	if (db->db_state != DB_NOFILL) {
 		if (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE) {
@@ -2385,9 +2474,12 @@
 		ASSERT(arc_released(parent->db_buf));
 		zio = parent->db_data_pending->dr_zio;
 	} else {
-		ASSERT(db->db_level == dn->dn_phys->dn_nlevels-1);
-		ASSERT3P(db->db_blkptr, ==,
-		    &dn->dn_phys->dn_blkptr[db->db_blkid]);
+		ASSERT((db->db_level == dn->dn_phys->dn_nlevels-1 &&
+		    db->db_blkid != DMU_SPILL_BLKID) ||
+		    (db->db_blkid == DMU_SPILL_BLKID && db->db_level == 0));
+		if (db->db_blkid != DMU_SPILL_BLKID)
+			ASSERT3P(db->db_blkptr, ==,
+			    &dn->dn_phys->dn_blkptr[db->db_blkid]);
 		zio = dn->dn_zio;
 	}
 
@@ -2399,8 +2491,11 @@
 	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
 	    db->db.db_object, db->db_level, db->db_blkid);
 
-	dmu_write_policy(os, dn, db->db_level,
-	    db->db_state == DB_NOFILL ? WP_NOFILL : 0, &zp);
+	if (db->db_blkid == DMU_SPILL_BLKID)
+		wp_flag = WP_SPILL;
+	wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0;
+
+	dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
 
 	if (db->db_level == 0 && dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
 		ASSERT(db->db_state != DB_NOFILL);

--- a/usr/src/uts/common/fs/zfs/dmu.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Tue Mar 16 09:43:38 2010 -0600
@@ -40,6 +40,7 @@
 #include <sys/zfs_ioctl.h>
 #include <sys/zap.h>
 #include <sys/zio_checksum.h>
+#include <sys/sa.h>
 #ifdef _KERNEL
 #include <sys/vmsystm.h>
 #include <sys/zfs_znode.h>
@@ -90,7 +91,10 @@
 	{	zap_byteswap,		TRUE,	"snapshot refcount tags"},
 	{	zap_byteswap,		TRUE,	"DDT ZAP algorithm"	},
 	{	zap_byteswap,		TRUE,	"DDT statistics"	},
-};
+	{	byteswap_uint8_array,	TRUE,	"System attributes"	},
+	{	zap_byteswap,		TRUE,	"SA master node"	},
+	{	zap_byteswap,		TRUE,	"SA attr registration"	},
+	{	zap_byteswap,		TRUE,	"SA attr layouts"	}, };
 
 int
 dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
@@ -142,6 +146,33 @@
 	return (0);
 }
 
+int
+dmu_set_bonustype(dmu_buf_t *db, dmu_object_type_t type, dmu_tx_t *tx)
+{
+	dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode;
+
+	if (type > DMU_OT_NUMTYPES)
+		return (EINVAL);
+
+	if (dn->dn_bonus != (dmu_buf_impl_t *)db)
+		return (EINVAL);
+
+	dnode_setbonus_type(dn, type, tx);
+	return (0);
+}
+
+int
+dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
+{
+	dnode_t *dn;
+	int error;
+
+	error = dnode_hold(os, object, FTAG, &dn);
+	dbuf_rm_spill(dn, tx);
+	dnode_rele(dn, FTAG);
+	return (error);
+}
+
 /*
  * returns ENOENT, EIO, or 0.
  */
@@ -179,6 +210,61 @@
 }
 
 /*
+ * returns ENOENT, EIO, or 0.
+ *
+ * This interface will allocate a blank spill dbuf when a spill blk
+ * doesn't already exist on the dnode.
+ *
+ * if you only want to find an already existing spill db, then
+ * dmu_spill_hold_existing() should be used.
+ */
+int
+dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags, void *tag, dmu_buf_t **dbp)
+{
+	dmu_buf_impl_t *db = NULL;
+	int err;
+
+	if ((flags & DB_RF_HAVESTRUCT) == 0)
+		rw_enter(&dn->dn_struct_rwlock, RW_READER);
+
+	db = dbuf_hold(dn, DMU_SPILL_BLKID, tag);
+
+	if ((flags & DB_RF_HAVESTRUCT) == 0)
+		rw_exit(&dn->dn_struct_rwlock);
+
+	ASSERT(db != NULL);
+	err = dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | flags);
+	*dbp = &db->db;
+	return (err);
+}
+
+int
+dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
+{
+	dnode_t *dn = ((dmu_buf_impl_t *)bonus)->db_dnode;
+	int err;
+
+	if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_SA)
+		return (EINVAL);
+	rw_enter(&dn->dn_struct_rwlock, RW_READER);
+
+	if (!dn->dn_have_spill) {
+		rw_exit(&dn->dn_struct_rwlock);
+		return (ENOENT);
+	}
+	err = dmu_spill_hold_by_dnode(dn, DB_RF_HAVESTRUCT, tag, dbp);
+	rw_exit(&dn->dn_struct_rwlock);
+	return (err);
+}
+
+int
+dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
+{
+	return (dmu_spill_hold_by_dnode(((dmu_buf_impl_t *)bonus)->db_dnode,
+	    0, tag, dbp));
+}
+
+/*
  * Note: longer-term, we should modify all of the dmu_buf_*() interfaces
  * to take a held dnode rather than <os, object> -- the lookup is wasteful,
  * and can induce severe lock contention when writing to several files
@@ -1349,7 +1435,7 @@
 
 	zp->zp_checksum = checksum;
 	zp->zp_compress = compress;
-	zp->zp_type = type;
+	zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
 	zp->zp_level = level;
 	zp->zp_copies = MIN(copies + ismd, spa_max_replication(os->os_spa));
 	zp->zp_dedup = dedup;
@@ -1514,6 +1600,7 @@
 	arc_init();
 	l2arc_init();
 	xuio_stat_init();
+	sa_cache_init();
 }
 
 void
@@ -1525,4 +1612,5 @@
 	dbuf_fini();
 	l2arc_fini();
 	xuio_stat_fini();
+	sa_cache_fini();
 }

--- a/usr/src/uts/common/fs/zfs/dmu_objset.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c	Tue Mar 16 09:43:38 2010 -0600
@@ -41,6 +41,7 @@
 #include <sys/dmu_impl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/sunddi.h>
+#include <sys/sa.h>
 
 spa_t *
 dmu_objset_spa(objset_t *os)
@@ -500,6 +501,9 @@
 		    secondary_cache_changed_cb, os));
 	}
 
+	if (os->os_sa)
+		sa_tear_down(os);
+
 	/*
 	 * We should need only a single pass over the dnode list, since
 	 * nothing can be added to the list at this point.
@@ -1066,20 +1070,11 @@
 }
 
 static void
-do_userquota_callback(objset_t *os, dnode_phys_t *dnp,
-    boolean_t subtract, dmu_tx_t *tx)
+do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
+    uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
 {
-	static const char zerobuf[DN_MAX_BONUSLEN] = {0};
-	uint64_t user, group;
-
-	ASSERT(dnp->dn_type != 0 ||
-	    (bcmp(DN_BONUS(dnp), zerobuf, DN_MAX_BONUSLEN) == 0 &&
-	    DN_USED_BYTES(dnp) == 0));
-
-	if ((dnp->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) &&
-	    0 == used_cbs[os->os_phys->os_type](dnp->dn_bonustype,
-	    DN_BONUS(dnp), &user, &group)) {
-		int64_t delta = DNODE_SIZE + DN_USED_BYTES(dnp);
+	if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
+		int64_t delta = DNODE_SIZE + used;
 		if (subtract)
 			delta = -delta;
 		VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
@@ -1090,7 +1085,7 @@
 }
 
 void
-dmu_objset_do_userquota_callbacks(objset_t *os, dmu_tx_t *tx)
+dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	list_t *list = &os->os_synced_dnodes;
@@ -1099,7 +1094,6 @@
 
 	while (dn = list_head(list)) {
 		ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
-		ASSERT(dn->dn_oldphys);
 		ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
 		    dn->dn_phys->dn_flags &
 		    DNODE_FLAG_USERUSED_ACCOUNTED);
@@ -1116,20 +1110,39 @@
 
 		/*
 		 * We intentionally modify the zap object even if the
-		 * net delta (due to phys-oldphys) is zero.  Otherwise
+		 * net delta is zero.  Otherwise
 		 * the block of the zap obj could be shared between
 		 * datasets but need to be different between them after
 		 * a bprewrite.
 		 */
-		do_userquota_callback(os, dn->dn_oldphys, B_TRUE, tx);
-		do_userquota_callback(os, dn->dn_phys, B_FALSE, tx);
 
 		/*
 		 * The mutex is needed here for interlock with dnode_allocate.
 		 */
 		mutex_enter(&dn->dn_mtx);
-		zio_buf_free(dn->dn_oldphys, sizeof (dnode_phys_t));
-		dn->dn_oldphys = NULL;
+		ASSERT(dn->dn_id_flags);
+		if (dn->dn_id_flags & DN_ID_OLD_EXIST)  {
+			do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
+			    dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
+		}
+		if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
+			do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
+			    dn->dn_phys->dn_flags,  dn->dn_newuid,
+			    dn->dn_newgid, B_FALSE, tx);
+		}
+
+		dn->dn_oldused = 0;
+		dn->dn_oldflags = 0;
+		if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
+			dn->dn_olduid = dn->dn_newuid;
+			dn->dn_oldgid = dn->dn_newgid;
+			dn->dn_id_flags |= DN_ID_OLD_EXIST;
+			if (dn->dn_bonuslen == 0)
+				dn->dn_id_flags |= DN_ID_CHKED_SPILL;
+			else
+				dn->dn_id_flags |= DN_ID_CHKED_BONUS;
+		}
+		dn->dn_id_flags &= ~(DN_ID_NEW_EXIST|DN_ID_SYNC);
 		mutex_exit(&dn->dn_mtx);
 
 		list_remove(list, dn);
@@ -1137,6 +1150,71 @@
 	}
 }
 
+void
+dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before)
+{
+	objset_t *os = dn->dn_objset;
+	void *data = NULL;
+	dmu_buf_t *spilldb = NULL;
+	uint64_t *user, *group;
+	int flags = dn->dn_id_flags;
+	int error;
+
+	if (!dmu_objset_userused_enabled(dn->dn_objset))
+		return;
+
+	if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
+	    DN_ID_CHKED_SPILL)))
+		return;
+
+	if (before && dn->dn_bonuslen != 0)
+		data = DN_BONUS(dn->dn_phys);
+	else if (!before && dn->dn_bonuslen != 0)
+		data = dn->dn_bonus != NULL ?
+		    dn->dn_bonus->db.db_data : DN_BONUS(dn->dn_phys);
+	else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
+			int rf = 0;
+
+			if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
+				rf |= DB_RF_HAVESTRUCT;
+			error = dmu_spill_hold_by_dnode(dn, rf, FTAG, &spilldb);
+			ASSERT(error == 0);
+			data = spilldb->db_data;
+	} else {
+		mutex_enter(&dn->dn_mtx);
+		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
+		mutex_exit(&dn->dn_mtx);
+		return;
+	}
+
+	if (before) {
+		user = &dn->dn_olduid;
+		group = &dn->dn_oldgid;
+	} else {
+		user = &dn->dn_newuid;
+		group = &dn->dn_newgid;
+	}
+
+	ASSERT(data);
+	error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
+	    user, group);
+
+	mutex_enter(&dn->dn_mtx);
+	if (error == 0 && before)
+		dn->dn_id_flags |= DN_ID_OLD_EXIST;
+	if (error == 0 && !before)
+		dn->dn_id_flags |= DN_ID_NEW_EXIST;
+
+	if (spilldb) {
+		dn->dn_id_flags |= DN_ID_CHKED_SPILL;
+	} else {
+		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
+	}
+	mutex_exit(&dn->dn_mtx);
+	if (spilldb)
+		dmu_buf_rele(spilldb, FTAG);
+}
+
 boolean_t
 dmu_objset_userspace_present(objset_t *os)
 {

--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -183,6 +183,31 @@
 }
 
 static int
+dump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data)
+{
+	struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill);
+
+	if (ba->pending_op != PENDING_NONE) {
+		if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
+			return (EINTR);
+		ba->pending_op = PENDING_NONE;
+	}
+
+	/* write a SPILL record */
+	bzero(ba->drr, sizeof (dmu_replay_record_t));
+	ba->drr->drr_type = DRR_SPILL;
+	drrs->drr_object = object;
+	drrs->drr_length = blksz;
+	drrs->drr_toguid = ba->toguid;
+
+	if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)))
+		return (EINTR);
+	if (dump_bytes(ba, data, blksz))
+		return (EINTR);
+	return (0);
+}
+
+static int
 dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs)
 {
 	struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects);
@@ -319,6 +344,18 @@
 				break;
 		}
 		(void) arc_buf_remove_ref(abuf, &abuf);
+	} else if (type == DMU_OT_SA) {
+		uint32_t aflags = ARC_WAIT;
+		arc_buf_t *abuf;
+		int blksz = BP_GET_LSIZE(bp);
+
+		if (arc_read_nolock(NULL, spa, bp,
+		    arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
+		    ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
+			return (EIO);
+
+		err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data);
+		(void) arc_buf_remove_ref(abuf, &abuf);
 	} else { /* it's a level-0 block of a regular object */
 		uint32_t aflags = ARC_WAIT;
 		arc_buf_t *abuf;
@@ -908,6 +945,11 @@
 		DO64(drr_free.drr_length);
 		DO64(drr_free.drr_toguid);
 		break;
+	case DRR_SPILL:
+		DO64(drr_spill.drr_object);
+		DO64(drr_spill.drr_length);
+		DO64(drr_spill.drr_toguid);
+		break;
 	case DRR_END:
 		DO64(drr_end.drr_checksum.zc_word[0]);
 		DO64(drr_end.drr_checksum.zc_word[1]);
@@ -969,8 +1011,9 @@
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen);
 	}
-	if (err)
+	if (err) {
 		return (EINVAL);
+	}
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_bonus(tx, drro->drr_object);
@@ -1121,6 +1164,56 @@
 	return (0);
 }
 
+static int
+restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
+{
+	dmu_tx_t *tx;
+	void *data;
+	dmu_buf_t *db, *db_spill;
+	int err;
+
+	if (drrs->drr_length < SPA_MINBLOCKSIZE ||
+	    drrs->drr_length > SPA_MAXBLOCKSIZE)
+		return (EINVAL);
+
+	data = restore_read(ra, drrs->drr_length);
+	if (data == NULL)
+		return (ra->err);
+
+	if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
+		return (EINVAL);
+
+	VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
+	if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
+		dmu_buf_rele(db, FTAG);
+		return (err);
+	}
+
+	tx = dmu_tx_create(os);
+
+	dmu_tx_hold_spill(tx, db->db_object);
+
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err) {
+		dmu_buf_rele(db, FTAG);
+		dmu_buf_rele(db_spill, FTAG);
+		dmu_tx_abort(tx);
+		return (err);
+	}
+	dmu_buf_will_dirty(db_spill, tx);
+
+	if (db_spill->db_size < drrs->drr_length)
+		VERIFY(0 == dbuf_spill_set_blksz(db_spill,
+		    drrs->drr_length, tx));
+	bcopy(data, db_spill->db_data, drrs->drr_length);
+
+	dmu_buf_rele(db, FTAG);
+	dmu_buf_rele(db_spill, FTAG);
+
+	dmu_tx_commit(tx);
+	return (0);
+}
+
 /* ARGSUSED */
 static int
 restore_free(struct restorearg *ra, objset_t *os,
@@ -1276,6 +1369,12 @@
 				ra.err = ECKSUM;
 			goto out;
 		}
+		case DRR_SPILL:
+		{
+			struct drr_spill drrs = drr->drr_u.drr_spill;
+			ra.err = restore_spill(&ra, os, &drrs);
+			break;
+		}
 		default:
 			ra.err = EINVAL;
 			goto out;

--- a/usr/src/uts/common/fs/zfs/dmu_traverse.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c	Tue Mar 16 09:43:38 2010 -0600
@@ -33,6 +33,8 @@
 #include <sys/spa.h>
 #include <sys/zio.h>
 #include <sys/dmu_impl.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
 #include <sys/callb.h>
 
 struct prefetch_data {
@@ -273,6 +275,17 @@
 				break;
 			lasterr = err;
 		}
+		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+			SET_BOOKMARK(&czb, objset,
+			    object, 0, DMU_SPILL_BLKID);
+			err = traverse_visitbp(td, dnp, buf,
+			    (blkptr_t *)&dnp->dn_spill, &czb);
+			if (err) {
+				if (!hard)
+					break;
+				lasterr = err;
+			}
+		}
 	}
 	return (err != 0 ? err : lasterr);
 }

--- a/usr/src/uts/common/fs/zfs/dmu_tx.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c	Tue Mar 16 09:43:38 2010 -0600
@@ -33,7 +33,10 @@
 #include <sys/dsl_pool.h>
 #include <sys/zap_impl.h> /* for fzap_default_block_shift */
 #include <sys/spa.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
 #include <sys/zfs_context.h>
+#include <sys/varargs.h>
 
 typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
     uint64_t arg1, uint64_t arg2);
@@ -813,10 +816,11 @@
 					match_offset = TRUE;
 				/*
 				 * We will let this hold work for the bonus
-				 * buffer so that we don't need to hold it
-				 * when creating a new object.
+				 * or spill buffer so that we don't need to
+				 * hold it when creating a new object.
 				 */
-				if (blkid == DB_BONUS_BLKID)
+				if (blkid == DMU_BONUS_BLKID ||
+				    blkid == DMU_SPILL_BLKID)
 					match_offset = TRUE;
 				/*
 				 * They might have to increase nlevels,
@@ -837,8 +841,12 @@
 				    txh->txh_arg2 == DMU_OBJECT_END))
 					match_offset = TRUE;
 				break;
+			case THT_SPILL:
+				if (blkid == DMU_SPILL_BLKID)
+					match_offset = TRUE;
+				break;
 			case THT_BONUS:
-				if (blkid == DB_BONUS_BLKID)
+				if (blkid == DMU_BONUS_BLKID)
 					match_offset = TRUE;
 				break;
 			case THT_ZAP:
@@ -1204,3 +1212,141 @@
 		kmem_free(dcb, sizeof (dmu_tx_callback_t));
 	}
 }
+
+/*
+ * Interface to hold a bunch of attributes.
+ * used for creating new files.
+ * attrsize is the total size of all attributes
+ * to be added during object creation
+ *
+ * For updating/adding a single attribute dmu_tx_hold_sa() should be used.
+ */
+
+/*
+ * hold necessary attribute name for attribute registration.
+ * should be a very rare case where this is needed.  If it does
+ * happen it would only happen on the first write to the file system.
+ */
+static void
+dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx)
+{
+	int i;
+
+	if (!sa->sa_need_attr_registration)
+		return;
+
+	for (i = 0; i != sa->sa_num_attrs; i++) {
+		if (!sa->sa_attr_table[i].sa_registered) {
+			if (sa->sa_reg_attr_obj)
+				dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj,
+				    B_TRUE, sa->sa_attr_table[i].sa_name);
+			else
+				dmu_tx_hold_zap(tx, DMU_NEW_OBJECT,
+				    B_TRUE, sa->sa_attr_table[i].sa_name);
+		}
+	}
+}
+
+
+void
+dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
+{
+	dnode_t *dn;
+	dmu_tx_hold_t *txh;
+	blkptr_t *bp;
+
+	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
+	    THT_SPILL, 0, 0);
+
+	dn = txh->txh_dnode;
+
+	if (dn == NULL)
+		return;
+
+	/* If blkptr doesn't exist then add space to towrite */
+	bp = &dn->dn_phys->dn_spill;
+	if (BP_IS_HOLE(bp)) {
+		txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
+		txh->txh_space_tounref = 0;
+	} else {
+		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
+		    bp->blk_birth))
+			txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
+		else
+			txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
+		if (bp->blk_birth)
+			txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
+	}
+}
+
+void
+dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize)
+{
+	sa_os_t *sa = tx->tx_objset->os_sa;
+
+	dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+
+	if (tx->tx_objset->os_sa->sa_master_obj == 0)
+		return;
+
+	if (tx->tx_objset->os_sa->sa_layout_attr_obj)
+		dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
+	else {
+		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
+		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
+		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
+		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
+	}
+
+	dmu_tx_sa_registration_hold(sa, tx);
+
+	if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill)
+		return;
+
+	(void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT,
+	    THT_SPILL, 0, 0);
+}
+
+/*
+ * Hold SA attribute
+ *
+ * dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *, attribute, add, size)
+ *
+ * variable_size is the total size of all variable sized attributes
+ * passed to this function.  It is not the total size of all
+ * variable size attributes that *may* exist on this object.
+ */
+void
+dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow)
+{
+	uint64_t object;
+	sa_os_t *sa = tx->tx_objset->os_sa;
+
+	ASSERT(hdl != NULL);
+
+	object = sa_handle_object(hdl);
+
+	dmu_tx_hold_bonus(tx, object);
+
+	if (tx->tx_objset->os_sa->sa_master_obj == 0)
+		return;
+
+	if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 ||
+	    tx->tx_objset->os_sa->sa_layout_attr_obj == 0) {
+		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
+		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
+		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
+		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
+	}
+
+	dmu_tx_sa_registration_hold(sa, tx);
+
+	if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj)
+		dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
+
+	if (sa->sa_force_spill || may_grow || hdl->sa_spill ||
+	    ((dmu_buf_impl_t *)hdl->sa_bonus)->db_dnode->dn_have_spill) {
+		ASSERT(tx->tx_txg == 0);
+		dmu_tx_hold_spill(tx, object);
+	}
+}

--- a/usr/src/uts/common/fs/zfs/dnode.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dnode.c	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -210,6 +210,11 @@
 		ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES);
 		dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len);
 	}
+
+	/* Swap SPILL block if we have one */
+	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
+		byteswap_uint64_array(&dnp->dn_spill, sizeof (blkptr_t));
+
 }
 
 void
@@ -258,6 +263,28 @@
 	rw_exit(&dn->dn_struct_rwlock);
 }
 
+void
+dnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx)
+{
+	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
+	dnode_setdirty(dn, tx);
+	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+	dn->dn_bonustype = newtype;
+	dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
+	rw_exit(&dn->dn_struct_rwlock);
+}
+
+void
+dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx)
+{
+	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
+	dnode_setdirty(dn, tx);
+	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+	dn->dn_rm_spillblk[tx->tx_txg&TXG_MASK] = DN_KILL_SPILLBLK;
+	dn->dn_have_spill = B_FALSE;
+	rw_exit(&dn->dn_struct_rwlock);
+}
+
 static void
 dnode_setdblksz(dnode_t *dn, int size)
 {
@@ -294,6 +321,7 @@
 	dn->dn_bonustype = dnp->dn_bonustype;
 	dn->dn_bonuslen = dnp->dn_bonuslen;
 	dn->dn_maxblkid = dnp->dn_maxblkid;
+	dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
 
 	dmu_zfetch_init(&dn->dn_zfetch, dn);
 
@@ -321,7 +349,7 @@
 	}
 	ASSERT(NULL == list_head(&dn->dn_dbufs));
 #endif
-	ASSERT(dn->dn_oldphys == NULL);
+	ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
 
 	mutex_enter(&os->os_lock);
 	list_remove(&os->os_dnodes, dn);
@@ -368,6 +396,7 @@
 	ASSERT(ot != DMU_OT_NONE);
 	ASSERT3U(ot, <, DMU_OT_NUMTYPES);
 	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
+	    (bonustype == DMU_OT_SA && bonuslen == 0) ||
 	    (bonustype != DMU_OT_NONE && bonuslen != 0));
 	ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
 	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
@@ -383,6 +412,8 @@
 		ASSERT3U(dn->dn_next_nlevels[i], ==, 0);
 		ASSERT3U(dn->dn_next_indblkshift[i], ==, 0);
 		ASSERT3U(dn->dn_next_bonuslen[i], ==, 0);
+		ASSERT3U(dn->dn_next_bonustype[i], ==, 0);
+		ASSERT3U(dn->dn_rm_spillblk[i], ==, 0);
 		ASSERT3U(dn->dn_next_blksz[i], ==, 0);
 		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
 		ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
@@ -393,7 +424,11 @@
 	dnode_setdblksz(dn, blocksize);
 	dn->dn_indblkshift = ibs;
 	dn->dn_nlevels = 1;
-	dn->dn_nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
+	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
+		dn->dn_nblkptr = 1;
+	else
+		dn->dn_nblkptr = 1 +
+		    ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
 	dn->dn_bonustype = bonustype;
 	dn->dn_bonuslen = bonuslen;
 	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
@@ -407,10 +442,12 @@
 	}
 
 	dn->dn_allocated_txg = tx->tx_txg;
+	dn->dn_id_flags = 0;
 
 	dnode_setdirty(dn, tx);
 	dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
 	dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
+	dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
 	dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz;
 }
 
@@ -446,8 +483,14 @@
 	if (dn->dn_bonuslen != bonuslen)
 		dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
 	nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
+	if (dn->dn_bonustype != bonustype)
+		dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
 	if (dn->dn_nblkptr != nblkptr)
 		dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
+	if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+		dn->dn_rm_spillblk[tx->tx_txg&TXG_MASK] = DN_KILL_SPILLBLK;
+		dn->dn_have_spill = B_FALSE;
+	}
 	rw_exit(&dn->dn_struct_rwlock);
 
 	/* change type */
@@ -627,11 +670,15 @@
 	if (dn->dn_free_txg ||
 	    ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
 	    ((flag & DNODE_MUST_BE_FREE) &&
-	    (type != DMU_OT_NONE || dn->dn_oldphys))) {
+	    (type != DMU_OT_NONE || (dn->dn_id_flags & DN_ID_SYNC)))) {
 		mutex_exit(&dn->dn_mtx);
 		dbuf_rele(db, FTAG);
 		return (type == DMU_OT_NONE ? ENOENT : EEXIST);
 	}
+	if (flag & DNODE_MUST_BE_FREE) {
+		ASSERT(refcount_is_zero(&dn->dn_holds));
+		ASSERT(!(dn->dn_id_flags & DN_ID_SYNC));
+	}
 	mutex_exit(&dn->dn_mtx);
 
 	if (refcount_add(&dn->dn_holds, tag) == 1)
@@ -706,6 +753,11 @@
 	mutex_exit(&dn->dn_mtx);
 #endif
 
+	/*
+	 * Determine old uid/gid when necessary
+	 */
+	dmu_objset_userquota_get_ids(dn, B_TRUE);
+
 	mutex_enter(&os->os_lock);
 
 	/*
@@ -720,6 +772,7 @@
 	ASSERT(dn->dn_datablksz != 0);
 	ASSERT3U(dn->dn_next_bonuslen[txg&TXG_MASK], ==, 0);
 	ASSERT3U(dn->dn_next_blksz[txg&TXG_MASK], ==, 0);
+	ASSERT3U(dn->dn_next_bonustype[txg&TXG_MASK], ==, 0);
 
 	dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
 	    dn->dn_object, txg);
@@ -814,7 +867,8 @@
 	for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
 		db_next = list_next(&dn->dn_dbufs, db);
 
-		if (db->db_blkid != 0 && db->db_blkid != DB_BONUS_BLKID) {
+		if (db->db_blkid != 0 && db->db_blkid != DMU_BONUS_BLKID &&
+		    db->db_blkid != DMU_SPILL_BLKID) {
 			mutex_exit(&dn->dn_dbufs_mtx);
 			goto fail;
 		}
@@ -858,7 +912,7 @@
 	int epbs, new_nlevels;
 	uint64_t sz;
 
-	ASSERT(blkid != DB_BONUS_BLKID);
+	ASSERT(blkid != DMU_BONUS_BLKID);
 
 	ASSERT(have_read ?
 	    RW_READ_HELD(&dn->dn_struct_rwlock) :
@@ -915,7 +969,8 @@
 		for (dr = list_head(list); dr; dr = dr_next) {
 			dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
 			if (dr->dr_dbuf->db_level != new_nlevels-1 &&
-			    dr->dr_dbuf->db_blkid != DB_BONUS_BLKID) {
+			    dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
+			    dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
 				ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
 				list_remove(&dn->dn_dirty_records[txgoff], dr);
 				list_insert_tail(&new->dt.di.dr_children, dr);
@@ -1170,6 +1225,20 @@
 	rw_exit(&dn->dn_struct_rwlock);
 }
 
+static boolean_t
+dnode_spill_freed(dnode_t *dn)
+{
+	int i;
+
+	mutex_enter(&dn->dn_mtx);
+	for (i = 0; i < TXG_SIZE; i++) {
+		if (dn->dn_rm_spillblk[i] == DN_KILL_SPILLBLK)
+			break;
+	}
+	mutex_exit(&dn->dn_mtx);
+	return (i < TXG_SIZE);
+}
+
 /* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */
 uint64_t
 dnode_block_freed(dnode_t *dn, uint64_t blkid)
@@ -1178,7 +1247,7 @@
 	void *dp = spa_get_dsl(dn->dn_objset->os_spa);
 	int i;
 
-	if (blkid == DB_BONUS_BLKID)
+	if (blkid == DMU_BONUS_BLKID)
 		return (FALSE);
 
 	/*
@@ -1191,6 +1260,9 @@
 	if (dn->dn_free_txg)
 		return (TRUE);
 
+	if (blkid == DMU_SPILL_BLKID)
+		return (dnode_spill_freed(dn));
+
 	range_tofind.fr_blkid = blkid;
 	mutex_enter(&dn->dn_mtx);
 	for (i = 0; i < TXG_SIZE; i++) {

--- a/usr/src/uts/common/fs/zfs/dnode_sync.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -434,7 +434,7 @@
 		db->db_last_dirty = NULL;
 		db->db_dirtycnt -= 1;
 		if (db->db_level == 0) {
-			ASSERT(db->db_blkid == DB_BONUS_BLKID ||
+			ASSERT(db->db_blkid == DMU_BONUS_BLKID ||
 			    dr->dt.dl.dr_data == db->db_buf);
 			dbuf_unoverride(dr);
 		}
@@ -490,6 +490,7 @@
 	dn->dn_maxblkid = 0;
 	dn->dn_allocated_txg = 0;
 	dn->dn_free_txg = 0;
+	dn->dn_have_spill = B_FALSE;
 	mutex_exit(&dn->dn_mtx);
 
 	ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
@@ -512,6 +513,7 @@
 	int txgoff = tx->tx_txg & TXG_MASK;
 	list_t *list = &dn->dn_dirty_records[txgoff];
 	static const dnode_phys_t zerodn = { 0 };
+	boolean_t kill_spill = B_FALSE;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
@@ -523,10 +525,13 @@
 
 	if (dmu_objset_userused_enabled(dn->dn_objset) &&
 	    !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
-		ASSERT(dn->dn_oldphys == NULL);
-		dn->dn_oldphys = zio_buf_alloc(sizeof (dnode_phys_t));
-		*dn->dn_oldphys = *dn->dn_phys; /* struct assignment */
+		mutex_enter(&dn->dn_mtx);
+		dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
+		dn->dn_oldflags = dn->dn_phys->dn_flags;
+		dn->dn_id_flags |= DN_ID_SYNC;
 		dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
+		mutex_exit(&dn->dn_mtx);
+		dmu_objset_userquota_get_ids(dn, B_FALSE);
 	} else {
 		/* Once we account for it, we should always account for it. */
 		ASSERT(!(dn->dn_phys->dn_flags &
@@ -573,6 +578,24 @@
 		dn->dn_next_bonuslen[txgoff] = 0;
 	}
 
+	if (dn->dn_next_bonustype[txgoff]) {
+		ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
+		dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
+		dn->dn_next_bonustype[txgoff] = 0;
+	}
+
+	/*
+	 * We will either remove a spill block when a file is being removed
+	 * or we have been asked to remove it.
+	 */
+	if (dn->dn_rm_spillblk[txgoff] ||
+	    ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
+	    dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) {
+		if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
+			kill_spill = B_TRUE;
+		dn->dn_rm_spillblk[txgoff] = 0;
+	}
+
 	if (dn->dn_next_indblkshift[txgoff]) {
 		ASSERT(dnp->dn_nlevels == 1);
 		dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff];
@@ -589,6 +612,21 @@
 
 	mutex_exit(&dn->dn_mtx);
 
+	if (kill_spill) {
+		dmu_buf_impl_t *spilldb;
+		(void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
+		mutex_enter(&dn->dn_mtx);
+		dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
+		mutex_exit(&dn->dn_mtx);
+		rw_enter(&dn->dn_struct_rwlock, RW_READER);
+		spilldb = dbuf_find(dn, 0, DMU_SPILL_BLKID);
+		if (spilldb) {
+			spilldb->db_blkptr = NULL;
+			mutex_exit(&spilldb->db_mtx);
+		}
+		rw_exit(&dn->dn_struct_rwlock);
+	}
+
 	/* process all the "freed" ranges in the file */
 	while (rp = avl_last(&dn->dn_ranges[txgoff])) {
 		dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx);

--- a/usr/src/uts/common/fs/zfs/dsl_pool.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c	Tue Mar 16 09:43:38 2010 -0600
@@ -343,7 +343,7 @@
 
 	for (ds = list_head(&dp->dp_synced_datasets); ds;
 	    ds = list_next(&dp->dp_synced_datasets, ds))
-		dmu_objset_do_userquota_callbacks(ds->ds_objset, tx);
+		dmu_objset_do_userquota_updates(ds->ds_objset, tx);
 
 	/*
 	 * Sync the datasets again to push out the changes due to

--- a/usr/src/uts/common/fs/zfs/dsl_scrub.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_scrub.c	Tue Mar 16 09:43:38 2010 -0600
@@ -42,6 +42,8 @@
 #include <sys/zil_impl.h>
 #include <sys/zio_checksum.h>
 #include <sys/ddt.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
 
 typedef int (scrub_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
 
@@ -612,6 +614,12 @@
 
 		SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
 		scrub_visitbp(dp, dnp, buf, &dnp->dn_blkptr[j], &czb);
+
+		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+			zbookmark_t czb;
+			SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
+			scrub_visitbp(dp, dnp, buf, &dnp->dn_spill, &czb);
+		}
 	}
 }

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/sa.c	Tue Mar 16 09:43:38 2010 -0600
@@ -0,0 +1,1887 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/dmu.h>
+#include <sys/dmu_impl.h>
+#include <sys/dmu_objset.h>
+#include <sys/dbuf.h>
+#include <sys/dnode.h>
+#include <sys/zap.h>
+#include <sys/sa.h>
+#include <sys/sunddi.h>
+#include <sys/sa_impl.h>
+#include <sys/dnode.h>
+#include <sys/errno.h>
+#include <sys/zfs_context.h>
+
+/*
+ * ZFS System attributes:
+ *
+ * A generic mechanism to allow for arbitrary attributes
+ * to be stored in a dnode.  The data will be stored in the bonus buffer of
+ * the dnode and if necessary a special "spill" block will be used to handle
+ * overflow situations.  The spill block will be sized to fit the data
+ * from 512 - 128K.  When a spill block is used the BP (blkptr_t) for the
+ * spill block is stored at the end of the current bonus buffer.  Any
+ * attributes that would be in the way of the blkptr_t will be relocated
+ * into the spill block.
+ *
+ * Attribute registration:
+ *
+ * Stored persistently on a per dataset basis
+ * a mapping between attribute "string" names and their actual attribute
+ * numeric values, length, and byteswap function.  The names are only used
+ * during registration.  All  attributes are known by their unique attribute
+ * id value.  If an attribute can have a variable size then the value
+ * 0 will be used to indicate this.
+ *
+ * Attribute Layout:
+ *
+ * Attribute layouts are a way to compactly store multiple attributes, but
+ * without taking the overhead associated with managing each attribute
+ * individually.  Since you will typically have the same set of attributes
+ * stored in the same order a single table will be used to represent that
+ * layout.  The ZPL for example will usually have only about 10 different
+ * layouts (regular files, device files, symlinks,
+ * regular files + scanstamp, files/dir with extended attributes, and then
+ * you have the possibility of all of those minus ACL, because it would
+ * be kicked out into the spill block)
+ *
+ * Layouts are simply an array of the attributes and their
+ * ordering i.e. [0, 1, 4, 5, 2]
+ *
+ * Each distinct layout is given a unique layout number and that is whats
+ * stored in the header at the beginning of the SA data buffer.
+ *
+ * A layout only covers a single dbuf (bonus or spill).  If a set of
+ * attributes is split up between the bonus buffer and a spill buffer then
+ * two different layouts will be used.  This allows us to byteswap the
+ * spill without looking at the bonus buffer and keeps the on disk format of
+ * the bonus and spill buffer the same.
+ *
+ * Adding a single attribute will cause the entire set of attributes to
+ * be rewritten and could result in a new layout number being constructed
+ * as part of the rewrite if no such layout exists for the new set of
+ * attribues.  The new attribute will be appended to the end of the already
+ * existing attributes.
+ *
+ * Both the attribute registration and attribute layout information are
+ * stored in normal ZAP attributes.  Their should be a small number of
+ * known layouts and the set of attributes is assumed to typically be quite
+ * small.
+ *
+ * The registered attributes and layout "table" information is maintained
+ * in core and a special "sa_os_t" is attached to the objset_t.
+ *
+ * A special interface is provided to allow for quickly applying
+ * a large set of attributes at once.  sa_replace_all_by_template() is
+ * used to set an array of attributes.  This is used by the ZPL when
+ * creating a brand new file.  The template that is passed into the function
+ * specifies the attribute, size for variable length attributes, location of
+ * data and special "data locator" function if the data isn't in a contiguous
+ * location.
+ *
+ * Byteswap implications:
+ * Since the SA attributes are not entirely self describing we can't do
+ * the normal byteswap processing.  The special ZAP layout attribute and
+ * attribute registration attributes define the byteswap function and the
+ * size of the attributes, unless it is variable sized.
+ * The normal ZFS byteswapping infrastructure assumes you don't need
+ * to read any objects in order to do the necessary byteswapping.  Whereas
+ * SA attributes can only be properly byteswapped if the dataset is opened
+ * and the layout/attribute ZAP attributes are available.  Because of this
+ * the SA attributes will be byteswapped when they are first accessed by
+ * the SA code that will read the SA data.
+ */
+
+typedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t,
+    uint16_t length, int length_idx, boolean_t, void *userp);
+
+static int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype);
+static void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab);
+static void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype,
+    void *data);
+static void sa_idx_tab_rele(objset_t *os, void *arg);
+static void sa_copy_data(sa_data_locator_t *func, void *start, void *target,
+    int buflen);
+static int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
+    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
+    uint16_t buflen, dmu_tx_t *tx);
+
+arc_byteswap_func_t *sa_bswap_table[] = {
+	byteswap_uint64_array,
+	byteswap_uint32_array,
+	byteswap_uint16_array,
+	byteswap_uint8_array,
+	zfs_acl_byteswap,
+};
+
+#define	SA_COPY_DATA(f, s, t, l) \
+	{ \
+		if (f == NULL) { \
+			if (l == 8) { \
+				*(uint64_t *)t = *(uint64_t *)s; \
+			} else if (l == 16) { \
+				*(uint64_t *)t = *(uint64_t *)s; \
+				*(uint64_t *)((uintptr_t)t + 8) = \
+				    *(uint64_t *)((uintptr_t)s + 8); \
+			} else { \
+				bcopy(s, t, l); \
+			} \
+		} else \
+			sa_copy_data(f, s, t, l); \
+	}
+
+/*
+ * This table is fixed and cannot be changed.  Its purpose is to
+ * allow the SA code to work with both old/new ZPL file systems.
+ * It contains the list of legacy attributes.  These attributes aren't
+ * stored in the "attribute" registry zap objects, since older ZPL file systems
+ * won't have the registry.  Only objsets of type ZFS_TYPE_FILESYSTEM will
+ * use this static table.
+ */
+sa_attr_reg_t sa_legacy_attrs[] = {
+	{"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
+	{"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1},
+	{"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2},
+	{"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3},
+	{"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4},
+	{"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5},
+	{"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6},
+	{"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7},
+	{"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8},
+	{"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9},
+	{"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10},
+	{"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11},
+	{"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12},
+	{"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13},
+	{"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14},
+	{"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15},
+};
+
+/*
+ * ZPL legacy layout
+ * This is only used for objects of type DMU_OT_ZNODE
+ */
+sa_attr_type_t sa_legacy_zpl_layout[] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+};
+
+/*
+ * Special dummy layout used for buffers with no attributes.
+ */
+
+sa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
+
+static int sa_legacy_attr_count = 16;
+static kmem_cache_t *sa_cache = NULL;
+
+/*ARGSUSED*/
+static int
+sa_cache_constructor(void *buf, void *unused, int kmflag)
+{
+	sa_handle_t *hdl = buf;
+
+	hdl->sa_bonus_tab = NULL;
+	hdl->sa_spill_tab = NULL;
+	hdl->sa_os = NULL;
+	hdl->sa_userp = NULL;
+	hdl->sa_bonus = NULL;
+	hdl->sa_spill = NULL;
+	mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL);
+	return (0);
+}
+
+/*ARGSUSED*/
+static void
+sa_cache_destructor(void *buf, void *unused)
+{
+	sa_handle_t *hdl = buf;
+	mutex_destroy(&hdl->sa_lock);
+}
+
+void
+sa_cache_init(void)
+{
+	sa_cache = kmem_cache_create("sa_cache",
+	    sizeof (sa_handle_t), 0, sa_cache_constructor,
+	    sa_cache_destructor, NULL, NULL, NULL, 0);
+}
+
+void
+sa_cache_fini(void)
+{
+	if (sa_cache)
+		kmem_cache_destroy(sa_cache);
+}
+
+static int
+layout_num_compare(const void *arg1, const void *arg2)
+{
+	const sa_lot_t *node1 = arg1;
+	const sa_lot_t *node2 = arg2;
+
+	if (node1->lot_num > node2->lot_num)
+		return (1);
+	else if (node1->lot_num < node2->lot_num)
+		return (-1);
+	return (0);
+}
+
+static int
+layout_hash_compare(const void *arg1, const void *arg2)
+{
+	const sa_lot_t *node1 = arg1;
+	const sa_lot_t *node2 = arg2;
+
+	if (node1->lot_hash > node2->lot_hash)
+		return (1);
+	if (node1->lot_hash < node2->lot_hash)
+		return (-1);
+	if (node1->lot_instance > node2->lot_instance)
+		return (1);
+	if (node1->lot_instance < node2->lot_instance)
+		return (-1);
+	return (0);
+}
+
+boolean_t
+sa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count)
+{
+	int i;
+
+	if (count != tbf->lot_attr_count)
+		return (1);
+
+	for (i = 0; i != count; i++) {
+		if (attrs[i] != tbf->lot_attrs[i])
+			return (1);
+	}
+	return (0);
+}
+
+#define	SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF])
+
+static uint64_t
+sa_layout_info_hash(sa_attr_type_t *attrs, int attr_count)
+{
+	int i;
+	uint64_t crc = -1ULL;
+
+	for (i = 0; i != attr_count; i++)
+		crc ^= SA_ATTR_HASH(attrs[i]);
+
+	return (crc);
+}
+
+static boolean_t
+sa_has_blkptr(sa_handle_t *hdl)
+{
+	int rc;
+	if (hdl->sa_spill == NULL) {
+		if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL,
+		    &hdl->sa_spill)) == 0)
+			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
+	} else {
+		rc = 0;
+	}
+
+	return (rc == 0 ? B_TRUE : B_FALSE);
+}
+
+/*
+ * Main attribute lookup/update function
+ * returns 0 for success or non zero for failures
+ *
+ * Operates on bulk array, first failure will abort further processing
+ */
+int
+sa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
+    sa_data_op_t data_op, dmu_tx_t *tx)
+{
+	sa_os_t *sa = hdl->sa_os->os_sa;
+	int i;
+	int error = 0;
+	sa_buf_type_t buftypes;
+
+	buftypes = 0;
+
+	ASSERT(count > 0);
+	for (i = 0; i != count; i++) {
+		ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs);
+
+		bulk[i].sa_addr = NULL;
+		/* First check the bonus buffer */
+
+		if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT(
+		    hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) {
+			SA_ATTR_INFO(sa, hdl->sa_bonus_tab,
+			    SA_GET_HDR(hdl, SA_BONUS),
+			    bulk[i].sa_attr, bulk[i], SA_BONUS, hdl);
+			if (tx && !(buftypes & SA_BONUS)) {
+				dmu_buf_will_dirty(hdl->sa_bonus, tx);
+				buftypes |= SA_BONUS;
+			}
+		}
+		if (bulk[i].sa_addr == NULL && sa_has_blkptr(hdl)) {
+			if (TOC_ATTR_PRESENT(
+			    hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) {
+				SA_ATTR_INFO(sa, hdl->sa_spill_tab,
+				    SA_GET_HDR(hdl, SA_SPILL),
+				    bulk[i].sa_attr, bulk[i], SA_SPILL, hdl);
+				if (tx && !(buftypes & SA_SPILL) &&
+				    bulk[i].sa_size == bulk[i].sa_length) {
+					dmu_buf_will_dirty(hdl->sa_spill, tx);
+					buftypes |= SA_SPILL;
+				}
+			}
+		}
+		switch (data_op) {
+		case SA_LOOKUP:
+			if (bulk[i].sa_addr == NULL)
+				return (ENOENT);
+			if (bulk[i].sa_data) {
+				SA_COPY_DATA(bulk[i].sa_data_func,
+				    bulk[i].sa_addr, bulk[i].sa_data,
+				    bulk[i].sa_size);
+			}
+			continue;
+
+		case SA_UPDATE:
+			/* existing rewrite of attr */
+			if (bulk[i].sa_addr &&
+			    bulk[i].sa_size == bulk[i].sa_length) {
+				SA_COPY_DATA(bulk[i].sa_data_func,
+				    bulk[i].sa_data, bulk[i].sa_addr,
+				    bulk[i].sa_length);
+				continue;
+			} else if (bulk[i].sa_addr) { /* attr size change */
+				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
+				    SA_REPLACE, bulk[i].sa_data_func,
+				    bulk[i].sa_data, bulk[i].sa_length, tx);
+			} else { /* adding new attribute */
+				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
+				    SA_ADD, bulk[i].sa_data_func,
+				    bulk[i].sa_data, bulk[i].sa_length, tx);
+			}
+			if (error)
+				return (error);
+			break;
+		}
+	}
+	return (error);
+}
+
+static sa_lot_t *
+sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
+    uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx)
+{
+	sa_os_t *sa = os->os_sa;
+	sa_lot_t *tb, *findtb;
+	int i;
+	avl_index_t loc;
+
+	ASSERT(MUTEX_HELD(&sa->sa_lock));
+	tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP);
+	tb->lot_attr_count = attr_count;
+	tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
+	    KM_SLEEP);
+	bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count);
+	tb->lot_num = lot_num;
+	tb->lot_hash = hash;
+	tb->lot_instance = 0;
+
+	if (zapadd) {
+		char attr_name[8];
+
+		if (sa->sa_layout_attr_obj == 0) {
+			int error;
+			sa->sa_layout_attr_obj = zap_create(os,
+			    DMU_OT_SA_ATTR_LAYOUTS, DMU_OT_NONE, 0, tx);
+			error = zap_add(os, sa->sa_master_obj, SA_LAYOUTS, 8, 1,
+			    &sa->sa_layout_attr_obj, tx);
+			ASSERT3U(error, ==, 0);
+		}
+
+		(void) snprintf(attr_name, sizeof (attr_name),
+		    "%d", (int)lot_num);
+		VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj,
+		    attr_name, 2, attr_count, attrs, tx));
+	}
+
+	list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t),
+	    offsetof(sa_idx_tab_t, sa_next));
+
+	for (i = 0; i != attr_count; i++) {
+		if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0)
+			tb->lot_var_sizes++;
+	}
+
+	avl_add(&sa->sa_layout_num_tree, tb);
+
+	/* verify we don't have a hash collision */
+	if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) {
+		for (; findtb && findtb->lot_hash == hash;
+		    findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) {
+			if (findtb->lot_instance != tb->lot_instance)
+				break;
+			tb->lot_instance++;
+		}
+	}
+	avl_add(&sa->sa_layout_hash_tree, tb);
+	return (tb);
+}
+
+static void
+sa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs,
+    int count, dmu_tx_t *tx, sa_lot_t **lot)
+{
+	sa_lot_t *tb, tbsearch;
+	avl_index_t loc;
+	sa_os_t *sa = os->os_sa;
+	boolean_t found = B_FALSE;
+
+	mutex_enter(&sa->sa_lock);
+	tbsearch.lot_hash = hash;
+	tbsearch.lot_instance = 0;
+	tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc);
+	if (tb) {
+		for (; tb && tb->lot_hash == hash;
+		    tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) {
+			if (sa_layout_equal(tb, attrs, count) == 0) {
+				found = B_TRUE;
+				break;
+			}
+		}
+	}
+	if (!found) {
+		tb = sa_add_layout_entry(os, attrs, count,
+		    avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx);
+	}
+	mutex_exit(&sa->sa_lock);
+	*lot = tb;
+}
+
+static int
+sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx)
+{
+	int error;
+	uint32_t blocksize;
+
+	if (size == 0) {
+		blocksize = SPA_MINBLOCKSIZE;
+	} else if (size > SPA_MAXBLOCKSIZE) {
+		ASSERT(0);
+		return (EFBIG);
+	} else {
+		blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t);
+	}
+
+	error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx);
+	ASSERT(error == 0);
+	return (error);
+}
+
+static void
+sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen)
+{
+	if (func == NULL) {
+		bcopy(datastart, target, buflen);
+	} else {
+		boolean_t start;
+		int bytes;
+		void *dataptr;
+		void *saptr = target;
+		uint32_t length;
+
+		start = B_TRUE;
+		bytes = 0;
+		while (bytes < buflen) {
+			func(&dataptr, &length, buflen, start, datastart);
+			bcopy(dataptr, saptr, length);
+			saptr = (void *)((caddr_t)saptr + length);
+			bytes += length;
+			start = B_FALSE;
+		}
+	}
+}
+
+/*
+ * Determine several different sizes
+ * first the sa header size
+ * the number of bytes to be stored
+ * if spill would occur the index in the attribute array is returned
+ *
+ * the boolean will_spill will be set when spilling is necessary.  It
+ * is only set when the buftype is SA_BONUS
+ */
+static int
+sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
+    dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total,
+    boolean_t *will_spill)
+{
+	int var_size = 0;
+	int i;
+	int full_space;
+	int hdrsize;
+	boolean_t done = B_FALSE;
+
+	if (buftype == SA_BONUS && sa->sa_force_spill) {
+		*total = 0;
+		*index = 0;
+		*will_spill = B_TRUE;
+		return (0);
+	}
+
+	*index = -1;
+	*total = 0;
+
+	if (buftype == SA_BONUS)
+		*will_spill = B_FALSE;
+
+	hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 :
+	    sizeof (sa_hdr_phys_t);
+
+	full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size;
+
+	for (i = 0; i != attr_count; i++) {
+		boolean_t is_var_sz;
+
+		*total += attr_desc[i].sa_length;
+		if (done)
+			goto next;
+
+		is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0);
+		if (is_var_sz) {
+			var_size++;
+		}
+
+		if (is_var_sz && var_size > 1) {
+			if (P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) +
+			    *total < full_space) {
+				hdrsize += sizeof (uint16_t);
+			} else {
+				done = B_TRUE;
+				*index = i;
+				if (buftype == SA_BONUS)
+					*will_spill = B_TRUE;
+				continue;
+			}
+		}
+
+		/*
+		 * find index of where spill *could* occur.
+		 * Then continue to count of remainder attribute
+		 * space.  The sum is used later for sizing bonus
+		 * and spill buffer.
+		 */
+		if (buftype == SA_BONUS && *index == -1 &&
+		    P2ROUNDUP(*total + hdrsize, 8) >
+		    (full_space - sizeof (blkptr_t))) {
+			*index = i;
+			done = B_TRUE;
+		}
+
+next:
+		if (P2ROUNDUP(*total + hdrsize, 8) > full_space &&
+		    buftype == SA_BONUS)
+			*will_spill = B_TRUE;
+	}
+
+	hdrsize = P2ROUNDUP(hdrsize, 8);
+	return (hdrsize);
+}
+
+#define	BUF_SPACE_NEEDED(total, header) (total + header)
+
+/*
+ * Find layout that corresponds to ordering of attributes
+ * If not found a new layout number is created and added to
+ * persistent layout tables.
+ */
+static int
+sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
+    dmu_tx_t *tx)
+{
+	sa_os_t *sa = hdl->sa_os->os_sa;
+	uint64_t hash;
+	sa_buf_type_t buftype;
+	sa_hdr_phys_t *sahdr;
+	void *data_start;
+	int buf_space;
+	sa_attr_type_t *attrs, *attrs_start;
+	int i, lot_count;
+	int hdrsize, spillhdrsize;
+	int used;
+	dmu_object_type_t bonustype;
+	sa_lot_t *lot;
+	int len_idx;
+	int spill_used;
+	boolean_t spilling;
+
+	dmu_buf_will_dirty(hdl->sa_bonus, tx);
+	bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);
+
+	/* first determine bonus header size and sum of all attributes */
+	hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
+	    SA_BONUS, &i, &used, &spilling);
+
+	if (used > SPA_MAXBLOCKSIZE)
+		return (EFBIG);
+
+	VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
+	    MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
+	    used + hdrsize, tx));
+
+	ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
+	    bonustype == DMU_OT_SA);
+
+	/* setup and size spill buffer when needed */
+	if (spilling) {
+		boolean_t dummy;
+
+		if (hdl->sa_spill == NULL) {
+			int error;
+			error = dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL,
+			    &hdl->sa_spill);
+			ASSERT3U(error, ==, 0);
+		}
+		dmu_buf_will_dirty(hdl->sa_spill, tx);
+
+		spillhdrsize = sa_find_sizes(sa, &attr_desc[i],
+		    attr_count - i, hdl->sa_spill, SA_SPILL, &i,
+		    &spill_used, &dummy);
+
+		if (spill_used > SPA_MAXBLOCKSIZE)
+			return (EFBIG);
+
+		buf_space = hdl->sa_spill->db_size - spillhdrsize;
+		if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
+		    hdl->sa_spill->db_size)
+			VERIFY(0 == sa_resize_spill(hdl,
+			    BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
+	}
+
+	/* setup starting pointers to lay down data */
+	data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize);
+	sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data;
+	buftype = SA_BONUS;
+
+	if (spilling)
+		buf_space = (sa->sa_force_spill) ?
+		    0 : SA_BLKPTR_SPACE - hdrsize;
+	else
+		buf_space = hdl->sa_bonus->db_size - hdrsize;
+
+	attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
+	    KM_SLEEP);
+	lot_count = 0;
+
+	for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) {
+		uint16_t length;
+
+		attrs[i] = attr_desc[i].sa_attr;
+		length = SA_REGISTERED_LEN(sa, attrs[i]);
+		if (length == 0)
+			length = attr_desc[i].sa_length;
+
+		if (buf_space < length) {  /* switch to spill buffer */
+			ASSERT(bonustype != DMU_OT_ZNODE);
+			if (buftype == SA_BONUS && !sa->sa_force_spill) {
+				sa_find_layout(hdl->sa_os, hash, attrs_start,
+				    lot_count, tx, &lot);
+				SA_SET_HDR(sahdr, lot->lot_num, hdrsize);
+			}
+
+			buftype = SA_SPILL;
+			hash = -1ULL;
+			len_idx = 0;
+
+			sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data;
+			sahdr->sa_magic = SA_MAGIC;
+			data_start = (void *)((uintptr_t)sahdr +
+			    spillhdrsize);
+			attrs_start = &attrs[i];
+			buf_space = hdl->sa_spill->db_size - spillhdrsize;
+			lot_count = 0;
+		}
+		hash ^= SA_ATTR_HASH(attrs[i]);
+		attr_desc[i].sa_addr = data_start;
+		attr_desc[i].sa_size = length;
+		SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data,
+		    data_start, length);
+		if (sa->sa_attr_table[attrs[i]].sa_length == 0) {
+			sahdr->sa_lengths[len_idx++] = length;
+		}
+		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
+		    length), 8);
+		buf_space -= P2ROUNDUP(length, 8);
+		lot_count++;
+	}
+
+	sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot);
+	if (bonustype == DMU_OT_SA) {
+		SA_SET_HDR(sahdr, lot->lot_num,
+		    buftype == SA_BONUS ? hdrsize : spillhdrsize);
+	}
+
+	kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count);
+	if (hdl->sa_bonus_tab) {
+		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
+		hdl->sa_bonus_tab = NULL;
+	}
+	if (!sa->sa_force_spill)
+		VERIFY(0 == sa_build_index(hdl, SA_BONUS));
+	if (hdl->sa_spill) {
+		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
+		if (!spilling) {
+			/*
+			 * remove spill block that is no longer needed.
+			 * set sa_spill_remove to prevent sa_attr_op
+			 * from trying to retrieve spill block before its
+			 * been removed.  The flag will be cleared if/when
+			 * the handle is destroyed recreated or
+			 * sa_build_layouts() needs to spill again.
+			 */
+			dmu_buf_rele(hdl->sa_spill, NULL);
+			hdl->sa_spill = NULL;
+			hdl->sa_spill_tab = NULL;
+			VERIFY(0 == dmu_rm_spill(hdl->sa_os,
+			    sa_handle_object(hdl), tx));
+		} else {
+			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
+		}
+	}
+
+	return (0);
+}
+
+static void
+sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count)
+{
+	sa_os_t *sa = os->os_sa;
+	uint64_t sa_attr_count = 0;
+	int error = 0;
+	uint64_t attr_value;
+	sa_attr_table_t *tb;
+	zap_cursor_t zc;
+	zap_attribute_t za;
+	int registered_count = 0;
+	int i;
+	dmu_objset_type_t ostype = dmu_objset_type(os);
+
+	sa->sa_user_table =
+	    kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP);
+	sa->sa_user_table_sz = count * sizeof (sa_attr_type_t);
+
+	if (sa->sa_reg_attr_obj != 0)
+		VERIFY(zap_count(os, sa->sa_reg_attr_obj, &sa_attr_count) == 0);
+
+	if (ostype == DMU_OST_ZFS && sa_attr_count == 0)
+		sa_attr_count += sa_legacy_attr_count;
+
+	/* Allocate attribute numbers for attributes that aren't registered */
+	for (i = 0; i != count; i++) {
+		boolean_t found = B_FALSE;
+		int j;
+
+		if (ostype == DMU_OST_ZFS) {
+			for (j = 0; j != sa_legacy_attr_count; j++) {
+				if (strcmp(reg_attrs[i].sa_name,
+				    sa_legacy_attrs[j].sa_name) == 0) {
+					sa->sa_user_table[i] =
+					    sa_legacy_attrs[j].sa_attr;
+					found = B_TRUE;
+				}
+			}
+		}
+		if (found)
+			continue;
+
+		if (sa->sa_reg_attr_obj)
+			error = zap_lookup(os, sa->sa_reg_attr_obj,
+			    reg_attrs[i].sa_name, 8, 1, &attr_value);
+		else
+			error = ENOENT;
+		switch (error) {
+		default:
+		case ENOENT:
+			sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count;
+			sa_attr_count++;
+			break;
+		case 0:
+			sa->sa_user_table[i] = ATTR_NUM(attr_value);
+			break;
+		}
+	}
+
+	os->os_sa->sa_num_attrs = sa_attr_count;
+	tb = os->os_sa->sa_attr_table =
+	    kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP);
+
+	/*
+	 * Attribute table is constructed from requested attribute list,
+	 * previously foreign registered attributes, and also the legacy
+	 * ZPL set of attributes.
+	 */
+
+	if (sa->sa_reg_attr_obj) {
+		for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj);
+		    zap_cursor_retrieve(&zc, &za) == 0;
+		    zap_cursor_advance(&zc)) {
+			uint64_t value;
+			value  = za.za_first_integer;
+
+			registered_count++;
+			tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value);
+			tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value);
+			tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value);
+			tb[ATTR_NUM(value)].sa_registered = B_TRUE;
+
+			if (tb[ATTR_NUM(value)].sa_name) {
+				continue;
+			}
+			tb[ATTR_NUM(value)].sa_name =
+			    kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP);
+			(void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name,
+			    strlen(za.za_name) +1);
+		}
+		zap_cursor_fini(&zc);
+	}
+
+	if (ostype == DMU_OST_ZFS) {
+		for (i = 0; i != sa_legacy_attr_count; i++) {
+			if (tb[i].sa_name)
+				continue;
+			tb[i].sa_attr = sa_legacy_attrs[i].sa_attr;
+			tb[i].sa_length = sa_legacy_attrs[i].sa_length;
+			tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap;
+			tb[i].sa_registered = B_FALSE;
+			tb[i].sa_name =
+			    kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1,
+			    KM_SLEEP);
+			(void) strlcpy(tb[i].sa_name,
+			    sa_legacy_attrs[i].sa_name,
+			    strlen(sa_legacy_attrs[i].sa_name) + 1);
+		}
+	}
+
+	for (i = 0; i != count; i++) {
+		sa_attr_type_t attr_id;
+
+		attr_id = sa->sa_user_table[i];
+		if (tb[attr_id].sa_name)
+			continue;
+
+		tb[attr_id].sa_length = reg_attrs[i].sa_length;
+		tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap;
+		tb[attr_id].sa_attr = attr_id;
+		tb[attr_id].sa_name =
+		    kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP);
+		(void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name,
+		    strlen(reg_attrs[i].sa_name) + 1);
+	}
+
+	os->os_sa->sa_need_attr_registration =
+	    (sa_attr_count != registered_count);
+}
+
+sa_attr_type_t *
+sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count)
+{
+	zap_cursor_t zc;
+	zap_attribute_t za;
+	sa_os_t *sa;
+	dmu_objset_type_t ostype = dmu_objset_type(os);
+	sa_attr_type_t *tb;
+
+	mutex_enter(&os->os_lock);
+	if (os->os_sa) {
+		mutex_enter(&os->os_sa->sa_lock);
+		mutex_exit(&os->os_lock);
+		tb = os->os_sa->sa_user_table;
+		mutex_exit(&os->os_sa->sa_lock);
+		return (tb);
+	}
+
+	sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP);
+	mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL);
+	sa->sa_master_obj = sa_obj;
+
+	mutex_enter(&sa->sa_lock);
+	mutex_exit(&os->os_lock);
+	avl_create(&sa->sa_layout_num_tree, layout_num_compare,
+	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
+	avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,
+	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node));
+
+	if (sa_obj) {
+		int error;
+		error = zap_lookup(os, sa_obj, SA_LAYOUTS,
+		    8, 1, &sa->sa_layout_attr_obj);
+		if (error != 0 && error != ENOENT) {
+			return (NULL);
+		}
+		error = zap_lookup(os, sa_obj, SA_REGISTRY,
+		    8, 1, &sa->sa_reg_attr_obj);
+		if (error != 0 && error != ENOENT) {
+			mutex_exit(&sa->sa_lock);
+			return (NULL);
+		}
+	}
+
+	os->os_sa = sa;
+	sa_attr_table_setup(os, reg_attrs, count);
+
+	if (sa->sa_layout_attr_obj != 0) {
+		for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj);
+		    zap_cursor_retrieve(&zc, &za) == 0;
+		    zap_cursor_advance(&zc)) {
+			sa_attr_type_t *lot_attrs;
+			uint64_t lot_num;
+
+			lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) *
+			    za.za_num_integers, KM_SLEEP);
+
+			VERIFY(zap_lookup(os, sa->sa_layout_attr_obj,
+			    za.za_name, 2, za.za_num_integers, lot_attrs) == 0);
+			VERIFY(ddi_strtoull(za.za_name, NULL, 10,
+			    (unsigned long long *)&lot_num) == 0);
+
+			(void) sa_add_layout_entry(os, lot_attrs,
+			    za.za_num_integers, lot_num,
+			    sa_layout_info_hash(lot_attrs,
+			    za.za_num_integers), B_FALSE, NULL);
+			kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
+			    za.za_num_integers);
+		}
+		zap_cursor_fini(&zc);
+	}
+
+	/* Add special layout number for old ZNODES */
+	if (ostype == DMU_OST_ZFS) {
+		(void) sa_add_layout_entry(os, sa_legacy_zpl_layout,
+		    sa_legacy_attr_count, 0,
+		    sa_layout_info_hash(sa_legacy_zpl_layout,
+		    sa_legacy_attr_count), B_FALSE, NULL);
+
+		(void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1,
+		    0, B_FALSE, NULL);
+	}
+	mutex_exit(&sa->sa_lock);
+	return (os->os_sa->sa_user_table);
+}
+
+void
+sa_tear_down(objset_t *os)
+{
+	sa_os_t *sa = os->os_sa;
+	sa_lot_t *layout;
+	void *cookie;
+	int i;
+
+	kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
+
+	/* Free up attr table */
+
+	for (i = 0; i != sa->sa_num_attrs; i++) {
+		if (sa->sa_attr_table[i].sa_name)
+			kmem_free(sa->sa_attr_table[i].sa_name,
+			    strlen(sa->sa_attr_table[i].sa_name) + 1);
+	}
+
+	kmem_free(sa->sa_attr_table,
+	    sizeof (sa_attr_table_t) * sa->sa_num_attrs);
+
+	cookie = NULL;
+	while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) {
+		sa_idx_tab_t *tab;
+		while (tab = list_head(&layout->lot_idx_tab)) {
+			ASSERT(refcount_count(&tab->sa_refcount));
+			sa_idx_tab_rele(os, tab);
+		}
+	}
+
+	cookie = NULL;
+	while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) {
+		kmem_free(layout->lot_attrs,
+		    sizeof (sa_attr_type_t) * layout->lot_attr_count);
+		kmem_free(layout, sizeof (sa_lot_t));
+	}
+
+	avl_destroy(&sa->sa_layout_hash_tree);
+	avl_destroy(&sa->sa_layout_num_tree);
+
+	kmem_free(sa, sizeof (sa_os_t));
+	os->os_sa = NULL;
+}
+
+void
+sa_build_idx_tab(void *hdr, void *attr_addr, sa_attr_type_t attr,
+    uint16_t length, int length_idx, boolean_t var_length, void *userp)
+{
+	sa_idx_tab_t *idx_tab = userp;
+
+	if (var_length) {
+		ASSERT(idx_tab->sa_variable_lengths);
+		idx_tab->sa_variable_lengths[length_idx] = length;
+	}
+	TOC_ATTR_ENCODE(idx_tab->sa_idx_tab[attr], length_idx,
+	    (uint32_t)((uintptr_t)attr_addr - (uintptr_t)hdr));
+}
+
+static void
+sa_attr_iter(objset_t *os, sa_hdr_phys_t *hdr, dmu_object_type_t type,
+    sa_iterfunc_t func, sa_lot_t *tab, void *userp)
+{
+	void *data_start;
+	sa_lot_t *tb = tab;
+	sa_lot_t search;
+	avl_index_t loc;
+	sa_os_t *sa = os->os_sa;
+	int i;
+	uint16_t *length_start;
+	uint8_t length_idx = 0;
+
+	if (tab == NULL) {
+		search.lot_num = SA_LAYOUT_NUM(hdr, type);
+		tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
+		ASSERT(tb);
+	}
+
+	if (IS_SA_BONUSTYPE(type)) {
+		data_start = (void *)P2ROUNDUP(((uintptr_t)hdr +
+		    offsetof(sa_hdr_phys_t, sa_lengths) +
+		    (sizeof (uint16_t) * tb->lot_var_sizes)), 8);
+		length_start = hdr->sa_lengths;
+	} else {
+		data_start = hdr;
+	}
+
+	for (i = 0; i != tb->lot_attr_count; i++) {
+		int attr_length, reg_length;
+		uint8_t idx_len;
+
+		reg_length = sa->sa_attr_table[tb->lot_attrs[i]].sa_length;
+		if (reg_length) {
+			attr_length = reg_length;
+			idx_len = 0;
+		} else {
+			attr_length = length_start[length_idx];
+			idx_len = length_idx++;
+		}
+
+		func(hdr, data_start, tb->lot_attrs[i], attr_length,
+		    idx_len, reg_length == 0 ? B_TRUE : B_FALSE, userp);
+
+		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
+		    attr_length), 8);
+	}
+}
+
+/*ARGSUSED*/
+void
+sa_byteswap_cb(void *hdr, void *attr_addr, sa_attr_type_t attr,
+    uint16_t length, int length_idx, boolean_t variable_length, void *userp)
+{
+	sa_handle_t *hdl = userp;
+	sa_os_t *sa = hdl->sa_os->os_sa;
+
+	sa_bswap_table[sa->sa_attr_table[attr].sa_byteswap](attr_addr, length);
+}
+
+void
+sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype)
+{
+	sa_hdr_phys_t *sa_hdr_phys = SA_GET_HDR(hdl, buftype);
+	dmu_buf_impl_t *db;
+	sa_os_t *sa = hdl->sa_os->os_sa;
+	int num_lengths = 1;
+	int i;
+
+	ASSERT(MUTEX_HELD(&sa->sa_lock));
+	if (sa_hdr_phys->sa_magic == SA_MAGIC)
+		return;
+
+	db = SA_GET_DB(hdl, buftype);
+
+	if (buftype == SA_SPILL) {
+		arc_release(db->db_buf, NULL);
+		arc_buf_thaw(db->db_buf);
+	}
+
+	sa_hdr_phys->sa_magic = BSWAP_32(sa_hdr_phys->sa_magic);
+	sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info);
+
+	/*
+	 * Determine number of variable lenghts in header
+	 * The standard 8 byte header has one for free and a
+	 * 16 byte header would have 4 + 1;
+	 */
+	if (SA_HDR_SIZE(sa_hdr_phys) > 8)
+		num_lengths += (SA_HDR_SIZE(sa_hdr_phys) - 8) >> 1;
+	for (i = 0; i != num_lengths; i++)
+		sa_hdr_phys->sa_lengths[i] =
+		    BSWAP_16(sa_hdr_phys->sa_lengths[i]);
+
+	sa_attr_iter(hdl->sa_os, sa_hdr_phys, DMU_OT_SA,
+	    sa_byteswap_cb, NULL, hdl);
+
+	if (buftype == SA_SPILL)
+		arc_buf_freeze(((dmu_buf_impl_t *)hdl->sa_spill)->db_buf);
+}
+
+static int
+sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
+{
+	sa_hdr_phys_t *sa_hdr_phys;
+	dmu_buf_impl_t *db = SA_GET_DB(hdl, buftype);
+	dmu_object_type_t bonustype = SA_BONUSTYPE_FROM_DB(db);
+	sa_os_t *sa = hdl->sa_os->os_sa;
+	sa_idx_tab_t *idx_tab;
+
+	sa_hdr_phys = SA_GET_HDR(hdl, buftype);
+
+	mutex_enter(&sa->sa_lock);
+
+	/* Do we need to byteswap? */
+
+	/* only check if not old znode */
+	if (IS_SA_BONUSTYPE(bonustype) && sa_hdr_phys->sa_magic != SA_MAGIC &&
+	    sa_hdr_phys->sa_magic != 0) {
+		VERIFY(BSWAP_32(sa_hdr_phys->sa_magic) == SA_MAGIC);
+		sa_byteswap(hdl, buftype);
+	}
+
+	idx_tab = sa_find_idx_tab(hdl->sa_os, bonustype, sa_hdr_phys);
+
+	if (buftype == SA_BONUS)
+		hdl->sa_bonus_tab = idx_tab;
+	else
+		hdl->sa_spill_tab = idx_tab;
+
+	mutex_exit(&sa->sa_lock);
+	return (0);
+}
+
+/*ARGSUSED*/
+void
+sa_evict(dmu_buf_t *db, void *sap)
+{
+	panic("evicting sa dbuf %p\n", (void *)db);
+}
+
+static void
+sa_idx_tab_rele(objset_t *os, void *arg)
+{
+	sa_os_t *sa = os->os_sa;
+	sa_idx_tab_t *idx_tab = arg;
+
+	if (idx_tab == NULL)
+		return;
+
+	mutex_enter(&sa->sa_lock);
+	if (refcount_remove(&idx_tab->sa_refcount, NULL) == 0) {
+		list_remove(&idx_tab->sa_layout->lot_idx_tab, idx_tab);
+		if (idx_tab->sa_variable_lengths)
+			kmem_free(idx_tab->sa_variable_lengths,
+			    sizeof (uint16_t) *
+			    idx_tab->sa_layout->lot_var_sizes);
+		refcount_destroy(&idx_tab->sa_refcount);
+		kmem_free(idx_tab->sa_idx_tab,
+		    sizeof (uint32_t) * sa->sa_num_attrs);
+		kmem_free(idx_tab, sizeof (sa_idx_tab_t));
+	}
+	mutex_exit(&sa->sa_lock);
+}
+
+static void
+sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab)
+{
+	sa_os_t *sa = os->os_sa;
+
+	ASSERT(MUTEX_HELD(&sa->sa_lock));
+	(void) refcount_add(&idx_tab->sa_refcount, NULL);
+}
+
+void
+sa_handle_destroy(sa_handle_t *hdl)
+{
+	mutex_enter(&hdl->sa_lock);
+	(void) dmu_buf_update_user((dmu_buf_t *)hdl->sa_bonus, hdl,
+	    NULL, NULL, NULL);
+
+	if (hdl->sa_bonus_tab) {
+		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
+		hdl->sa_bonus_tab = NULL;
+	}
+	if (hdl->sa_spill_tab) {
+		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
+		hdl->sa_spill_tab = NULL;
+	}
+
+	dmu_buf_rele(hdl->sa_bonus, NULL);
+
+	if (hdl->sa_spill)
+		dmu_buf_rele((dmu_buf_t *)hdl->sa_spill, NULL);
+	mutex_exit(&hdl->sa_lock);
+
+	kmem_cache_free(sa_cache, hdl);
+}
+
+int
+sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
+    sa_handle_type_t hdl_type, sa_handle_t **handlepp)
+{
+	int error = 0;
+	dmu_object_info_t doi;
+	sa_handle_t *handle;
+
+#ifdef ZFS_DEBUG
+	dmu_object_info_from_db(db, &doi);
+	ASSERT(doi.doi_bonus_type == DMU_OT_SA ||
+	    doi.doi_bonus_type == DMU_OT_ZNODE);
+#endif
+	/* find handle, if it exists */
+	/* if one doesn't exist then create a new one, and initialize it */
+
+	handle = (hdl_type == SA_HDL_SHARED) ? dmu_buf_get_user(db) : NULL;
+	if (handle == NULL) {
+		sa_handle_t *newhandle;
+		handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
+		handle->sa_userp = userp;
+		handle->sa_bonus = db;
+		handle->sa_os = os;
+		handle->sa_spill = NULL;
+
+		error = sa_build_index(handle, SA_BONUS);
+		newhandle = (hdl_type == SA_HDL_SHARED) ?
+		    dmu_buf_set_user_ie(db, handle,
+		    NULL, sa_evict) : NULL;
+
+		if (newhandle != NULL) {
+			kmem_cache_free(sa_cache, handle);
+			handle = newhandle;
+		}
+	}
+	*handlepp = handle;
+
+	return (error);
+}
+
+int
+sa_handle_get(objset_t *objset, uint64_t objid, void *userp,
+    sa_handle_type_t hdl_type, sa_handle_t **handlepp)
+{
+	dmu_buf_t *db;
+	int error;
+
+	if (error = dmu_bonus_hold(objset, objid, NULL, &db))
+		return (error);
+
+	return (sa_handle_get_from_db(objset, db, userp, hdl_type,
+	    handlepp));
+}
+
+int
+sa_buf_hold(objset_t *objset, uint64_t obj_num, void *tag, dmu_buf_t **db)
+{
+	return (dmu_bonus_hold(objset, obj_num, tag, db));
+}
+
+void
+sa_buf_rele(dmu_buf_t *db, void *tag)
+{
+	dmu_buf_rele(db, tag);
+}
+
+int
+sa_lookup_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count)
+{
+	ASSERT(hdl);
+	ASSERT(MUTEX_HELD(&hdl->sa_lock));
+	return (sa_attr_op(hdl, bulk, count, SA_LOOKUP, NULL));
+}
+
+int
+sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen)
+{
+	int error;
+	sa_bulk_attr_t bulk;
+
+	bulk.sa_attr = attr;
+	bulk.sa_data = buf;
+	bulk.sa_length = buflen;
+	bulk.sa_data_func = NULL;
+
+	ASSERT(hdl);
+	mutex_enter(&hdl->sa_lock);
+	error = sa_lookup_impl(hdl, &bulk, 1);
+	mutex_exit(&hdl->sa_lock);
+	return (error);
+}
+
+#ifdef _KERNEL
+int
+sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
+{
+	int error;
+	sa_bulk_attr_t bulk;
+
+	bulk.sa_data = NULL;
+	bulk.sa_attr = attr;
+	bulk.sa_data_func = NULL;
+
+	ASSERT(hdl);
+
+	mutex_enter(&hdl->sa_lock);
+	if (sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL) == 0) {
+		error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size,
+		    uio->uio_resid), UIO_READ, uio);
+	} else {
+		error = ENOENT;
+	}
+	mutex_exit(&hdl->sa_lock);
+	return (error);
+
+}
+#endif
+
+/*
+ * Find an already existing TOC from given os and data
+ * This is a special interface to be used by the ZPL for
+ * finding the uid/gid/gen attributes.
+ */
+void *
+sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data)
+{
+	sa_idx_tab_t *idx_tab;
+	sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data;
+	sa_os_t *sa = os->os_sa;
+	sa_lot_t *tb, search;
+	avl_index_t loc;
+
+	/*
+	 * Deterimine layout number.  If SA node and header == 0 then
+	 * force the index table to the dummy "1" empty layout.
+	 *
+	 * The layout number would only be zero for a newly created file
+	 * that has not added any attributes yet, or with crypto enabled which
+	 * doesn't write any attributes to the bonus buffer.
+	 */
+
+	search.lot_num = SA_LAYOUT_NUM(hdr, bonustype);
+
+	tb = avl_find(&sa->sa_layout_num_tree, &search, &loc);
+
+	/* Verify header size is consistent with layout information */
+	ASSERT(tb);
+	ASSERT(IS_SA_BONUSTYPE(bonustype) &&
+	    SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) || !IS_SA_BONUSTYPE(bonustype) ||
+	    (IS_SA_BONUSTYPE(bonustype) && hdr->sa_layout_info == 0));
+
+	/*
+	 * See if any of the already existing TOC entries can be reused?
+	 */
+
+	for (idx_tab = list_head(&tb->lot_idx_tab); idx_tab;
+	    idx_tab = list_next(&tb->lot_idx_tab, idx_tab)) {
+		boolean_t valid_idx = B_TRUE;
+		int i;
+
+		if (tb->lot_var_sizes != 0 &&
+		    idx_tab->sa_variable_lengths != NULL) {
+			for (i = 0; i != tb->lot_var_sizes; i++) {
+				if (hdr->sa_lengths[i] !=
+				    idx_tab->sa_variable_lengths[i]) {
+					valid_idx = B_FALSE;
+					break;
+				}
+			}
+		}
+		if (valid_idx) {
+			sa_idx_tab_hold(os, idx_tab);
+			return (idx_tab);
+		}
+	}
+
+	/* No such luck, create a new entry */
+	idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP);
+	idx_tab->sa_idx_tab =
+	    kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP);
+	idx_tab->sa_layout = tb;
+	refcount_create(&idx_tab->sa_refcount);
+	if (tb->lot_var_sizes)
+		idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) *
+		    tb->lot_var_sizes, KM_SLEEP);
+
+	sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab,
+	    tb, idx_tab);
+	sa_idx_tab_hold(os, idx_tab);   /* one hold for consumer */
+	sa_idx_tab_hold(os, idx_tab);	/* one for layout */
+	list_insert_tail(&tb->lot_idx_tab, idx_tab);
+	return (idx_tab);
+}
+
+void
+sa_default_locator(void **dataptr, uint32_t *len, uint32_t total_len,
+    boolean_t start, void *userdata)
+{
+	ASSERT(start);
+
+	*dataptr = userdata;
+	*len = total_len;
+}
+
+static void
+sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
+{
+	uint64_t attr_value = 0;
+	sa_os_t *sa = hdl->sa_os->os_sa;
+	sa_attr_table_t *tb = sa->sa_attr_table;
+	int i;
+
+	mutex_enter(&sa->sa_lock);
+
+	if (!sa->sa_need_attr_registration || sa->sa_master_obj == NULL) {
+		mutex_exit(&sa->sa_lock);
+		return;
+	}
+
+	if (sa->sa_reg_attr_obj == NULL) {
+		int error;
+		sa->sa_reg_attr_obj = zap_create(hdl->sa_os,
+		    DMU_OT_SA_ATTR_REGISTRATION, DMU_OT_NONE, 0, tx);
+		error = zap_add(hdl->sa_os, sa->sa_master_obj,
+		    SA_REGISTRY, 8, 1, &sa->sa_reg_attr_obj, tx);
+		ASSERT(error == 0);
+	}
+	for (i = 0; i != sa->sa_num_attrs; i++) {
+		if (sa->sa_attr_table[i].sa_registered)
+			continue;
+		ATTR_ENCODE(attr_value, tb[i].sa_attr, tb[i].sa_length,
+		    tb[i].sa_byteswap);
+		VERIFY(0 == zap_update(hdl->sa_os, sa->sa_reg_attr_obj,
+		    tb[i].sa_name, 8, 1, &attr_value, tx));
+		tb[i].sa_registered = B_TRUE;
+	}
+	sa->sa_need_attr_registration = B_FALSE;
+	mutex_exit(&sa->sa_lock);
+}
+
+/*
+ * Replace all attributes with attributes specified in template.
+ * If dnode had a spill buffer then those attributes will be
+ * also be replaced, possibly with just an empty spill block
+ *
+ * This interface is intended to only be used for bulk adding of
+ * attributes for a new file.  It will also be used by the ZPL
+ * when converting and old formatted znode to native SA support.
+ */
+int
+sa_replace_all_by_template_locked(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
+    int attr_count, dmu_tx_t *tx)
+{
+	sa_os_t *sa = hdl->sa_os->os_sa;
+
+	if (sa->sa_need_attr_registration)
+		sa_attr_register_sync(hdl, tx);
+	return (sa_build_layouts(hdl, attr_desc, attr_count, tx));
+}
+
+int
+sa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
+    int attr_count, dmu_tx_t *tx)
+{
+	int error;
+
+	mutex_enter(&hdl->sa_lock);
+	error = sa_replace_all_by_template_locked(hdl, attr_desc,
+	    attr_count, tx);
+	mutex_exit(&hdl->sa_lock);
+	return (error);
+}
+
+/*
+ * add/remove/replace a single attribute and then rewrite the entire set
+ * of attributes.
+ */
+static int
+sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
+    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
+    uint16_t buflen, dmu_tx_t *tx)
+{
+	sa_os_t *sa = hdl->sa_os->os_sa;
+	sa_bulk_attr_t *attr_desc;
+	void *old_data[2];
+	int bonus_attr_count = 0;
+	int bonus_data_size, spill_data_size;
+	int spill_attr_count = 0;
+	int error;
+	uint16_t length;
+	int i, j, k, length_idx;
+	sa_hdr_phys_t *hdr;
+	sa_idx_tab_t *idx_tab;
+	int attr_count;
+	int count;
+
+	ASSERT(MUTEX_HELD(&hdl->sa_lock));
+
+	/* First make of copy of the old data */
+
+	if (((dmu_buf_impl_t *)hdl->sa_bonus)->db_dnode->dn_bonuslen) {
+		bonus_data_size = hdl->sa_bonus->db_size;
+		old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP);
+		bcopy(hdl->sa_bonus->db_data, old_data[0],
+		    hdl->sa_bonus->db_size);
+		bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count;
+	} else {
+		old_data[0] = NULL;
+	}
+
+	/* Bring spill buffer online if it isn't currently */
+
+	if (sa_has_blkptr(hdl)) {
+		spill_data_size = hdl->sa_spill->db_size;
+		old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP);
+		bcopy(hdl->sa_spill->db_data, old_data[1],
+		    hdl->sa_spill->db_size);
+		spill_attr_count =
+		    hdl->sa_spill_tab->sa_layout->lot_attr_count;
+	} else {
+		old_data[1] = NULL;
+	}
+
+	/* build descriptor of all attributes */
+
+	attr_count = bonus_attr_count + spill_attr_count;
+	if (action == SA_ADD)
+		attr_count++;
+	else if (action == SA_REMOVE)
+		attr_count--;
+
+	attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP);
+
+	/*
+	 * loop through bonus and spill buffer if it exists, and
+	 * build up new attr_descriptor to reset the attributes
+	 */
+	k = j = 0;
+	count = bonus_attr_count;
+	hdr = SA_GET_HDR(hdl, SA_BONUS);
+	idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS);
+	for (; k != 2; k++) {
+		/* iterate over each attribute in layout */
+		for (i = 0, length_idx = 0; i != count; i++) {
+			sa_attr_type_t attr;
+
+			attr = idx_tab->sa_layout->lot_attrs[i];
+			if (attr == newattr) {
+				if (action == SA_REMOVE) {
+					j++;
+					continue;
+				}
+				ASSERT(SA_REGISTERED_LEN(sa, attr) == 0);
+				ASSERT(action == SA_REPLACE);
+				SA_ADD_BULK_ATTR(attr_desc, j, attr,
+				    locator, datastart, buflen);
+			} else {
+				length = SA_REGISTERED_LEN(sa, attr);
+				if (length == 0) {
+					length = hdr->sa_lengths[length_idx++];
+				}
+
+				SA_ADD_BULK_ATTR(attr_desc, j, attr,
+				    NULL, (void *)
+				    (TOC_OFF(idx_tab->sa_idx_tab[attr]) +
+				    (uintptr_t)old_data[k]), length);
+			}
+		}
+		if (k == 0 && hdl->sa_spill) {
+			hdr = SA_GET_HDR(hdl, SA_SPILL);
+			idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL);
+			count = spill_attr_count;
+		} else {
+			break;
+		}
+	}
+	if (action == SA_ADD) {
+		length = SA_REGISTERED_LEN(sa, newattr);
+		if (length == 0) {
+			length = buflen;
+		}
+		SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator,
+		    datastart, buflen);
+	}
+
+	error = sa_build_layouts(hdl, attr_desc, attr_count, tx);
+
+	if (old_data[0])
+		kmem_free(old_data[0], bonus_data_size);
+	if (old_data[1])
+		kmem_free(old_data[1], spill_data_size);
+	kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count);
+
+	return (error);
+}
+
+static int
+sa_bulk_update_impl(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
+    dmu_tx_t *tx)
+{
+	int error;
+	sa_os_t *sa = hdl->sa_os->os_sa;
+	dmu_object_type_t bonustype;
+
+	bonustype = SA_BONUSTYPE_FROM_DB(SA_GET_DB(hdl, SA_BONUS));
+
+	ASSERT(hdl);
+	ASSERT(MUTEX_HELD(&hdl->sa_lock));
+
+	/* sync out registration table if necessary */
+	if (sa->sa_need_attr_registration)
+		sa_attr_register_sync(hdl, tx);
+
+	error = sa_attr_op(hdl, bulk, count, SA_UPDATE, tx);
+	if (error == 0 && !IS_SA_BONUSTYPE(bonustype) && sa->sa_update_cb)
+		sa->sa_update_cb(hdl, tx);
+
+	return (error);
+}
+
+/*
+ * update or add new attribute
+ */
+int
+sa_update(sa_handle_t *hdl, sa_attr_type_t type,
+    void *buf, uint32_t buflen, dmu_tx_t *tx)
+{
+	int error;
+	sa_bulk_attr_t bulk;
+
+	bulk.sa_attr = type;
+	bulk.sa_data_func = NULL;
+	bulk.sa_length = buflen;
+	bulk.sa_data = buf;
+
+	mutex_enter(&hdl->sa_lock);
+	error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
+	mutex_exit(&hdl->sa_lock);
+	return (error);
+}
+
+int
+sa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr,
+    uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx)
+{
+	int error;
+	sa_bulk_attr_t bulk;
+
+	bulk.sa_attr = attr;
+	bulk.sa_data = userdata;
+	bulk.sa_data_func = locator;
+	bulk.sa_length = buflen;
+
+	mutex_enter(&hdl->sa_lock);
+	error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
+	mutex_exit(&hdl->sa_lock);
+	return (error);
+}
+
+/*
+ * Return size of an attribute
+ */
+
+int
+sa_size(sa_handle_t *hdl, sa_attr_type_t attr, int *size)
+{
+	sa_bulk_attr_t bulk;
+
+	bulk.sa_data = NULL;
+	bulk.sa_attr = attr;
+	bulk.sa_data_func = NULL;
+
+	ASSERT(hdl);
+	mutex_enter(&hdl->sa_lock);
+	if (sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) {
+		mutex_exit(&hdl->sa_lock);
+		return (ENOENT);
+	}
+	*size = bulk.sa_size;
+
+	mutex_exit(&hdl->sa_lock);
+	return (0);
+}
+
+int
+sa_bulk_lookup_locked(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
+{
+	ASSERT(hdl);
+	ASSERT(MUTEX_HELD(&hdl->sa_lock));
+	return (sa_lookup_impl(hdl, attrs, count));
+}
+
+int
+sa_bulk_lookup(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count)
+{
+	int error;
+
+	ASSERT(hdl);
+	mutex_enter(&hdl->sa_lock);
+	error = sa_bulk_lookup_locked(hdl, attrs, count);
+	mutex_exit(&hdl->sa_lock);
+	return (error);
+}
+
+int
+sa_bulk_update(sa_handle_t *hdl, sa_bulk_attr_t *attrs, int count, dmu_tx_t *tx)
+{
+	int error;
+
+	ASSERT(hdl);
+	mutex_enter(&hdl->sa_lock);
+	error = sa_bulk_update_impl(hdl, attrs, count, tx);
+	mutex_exit(&hdl->sa_lock);
+	return (error);
+}
+
+int
+sa_remove(sa_handle_t *hdl, sa_attr_type_t attr, dmu_tx_t *tx)
+{
+	int error;
+
+	mutex_enter(&hdl->sa_lock);
+	error = sa_modify_attrs(hdl, attr, SA_REMOVE, NULL,
+	    NULL, 0, tx);
+	mutex_exit(&hdl->sa_lock);
+	return (error);
+}
+
+void
+sa_object_info(sa_handle_t *hdl, dmu_object_info_t *doi)
+{
+	dmu_object_info_from_db((dmu_buf_t *)hdl->sa_bonus, doi);
+}
+
+void
+sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks)
+{
+	dmu_object_size_from_db((dmu_buf_t *)hdl->sa_bonus,
+	    blksize, nblocks);
+}
+
+void
+sa_update_user(sa_handle_t *newhdl, sa_handle_t *oldhdl)
+{
+	(void) dmu_buf_update_user((dmu_buf_t *)newhdl->sa_bonus,
+	    oldhdl, newhdl, NULL, sa_evict);
+	oldhdl->sa_bonus = NULL;
+}
+
+void
+sa_set_userp(sa_handle_t *hdl, void *ptr)
+{
+	hdl->sa_userp = ptr;
+}
+
+dmu_buf_t *
+sa_get_db(sa_handle_t *hdl)
+{
+	return ((dmu_buf_t *)hdl->sa_bonus);
+}
+
+void *
+sa_get_userdata(sa_handle_t *hdl)
+{
+	return (hdl->sa_userp);
+}
+
+void
+sa_register_update_callback_locked(objset_t *os, sa_update_cb_t *func)
+{
+	ASSERT(MUTEX_HELD(&os->os_sa->sa_lock));
+	os->os_sa->sa_update_cb = func;
+}
+
+void
+sa_register_update_callback(objset_t *os, sa_update_cb_t *func)
+{
+
+	mutex_enter(&os->os_sa->sa_lock);
+	sa_register_update_callback_locked(os, func);
+	mutex_exit(&os->os_sa->sa_lock);
+}
+
+uint64_t
+sa_handle_object(sa_handle_t *hdl)
+{
+	return (hdl->sa_bonus->db_object);
+}
+
+boolean_t
+sa_enabled(objset_t *os)
+{
+	return (os->os_sa == NULL);
+}
+
+int
+sa_set_sa_object(objset_t *os, uint64_t sa_object)
+{
+	sa_os_t *sa = os->os_sa;
+
+	if (sa->sa_master_obj)
+		return (1);
+
+	sa->sa_master_obj = sa_object;
+
+	return (0);
+}
+
+int
+sa_hdrsize(void *arg)
+{
+	sa_hdr_phys_t *hdr = arg;
+
+	return (SA_HDR_SIZE(hdr));
+}
+
+void
+sa_handle_lock(sa_handle_t *hdl)
+{
+	ASSERT(hdl);
+	mutex_enter(&hdl->sa_lock);
+}
+
+void
+sa_handle_unlock(sa_handle_t *hdl)
+{
+	ASSERT(hdl);
+	mutex_exit(&hdl->sa_lock);
+}

--- a/usr/src/uts/common/fs/zfs/sys/dbuf.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h	Tue Mar 16 09:43:38 2010 -0600
@@ -38,7 +38,6 @@
 extern "C" {
 #endif
 
-#define	DB_BONUS_BLKID (-1ULL)
 #define	IN_DMU_SYNC 2
 
 /*
@@ -242,6 +241,10 @@
 
 dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
 void dbuf_create_bonus(struct dnode *dn);
+int dbuf_spill_set_blksz(dmu_buf_t *db, uint64_t blksz, dmu_tx_t *tx);
+void dbuf_spill_hold(struct dnode *dn, dmu_buf_impl_t **dbp, void *tag);
+
+void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx);
 
 dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
 dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,

--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Tue Mar 16 09:43:38 2010 -0600
@@ -63,6 +63,7 @@
 struct nvlist;
 struct arc_buf;
 struct zio_prop;
+struct sa_handle;
 
 typedef struct objset objset_t;
 typedef struct dmu_tx dmu_tx_t;
@@ -122,6 +123,10 @@
 	DMU_OT_USERREFS,		/* ZAP */
 	DMU_OT_DDT_ZAP,			/* ZAP */
 	DMU_OT_DDT_STATS,		/* ZAP */
+	DMU_OT_SA,			/* System attr */
+	DMU_OT_SA_MASTER_NODE,		/* ZAP */
+	DMU_OT_SA_ATTR_REGISTRATION,	/* ZAP */
+	DMU_OT_SA_ATTR_LAYOUTS,		/* ZAP */
 	DMU_OT_NUMTYPES
 } dmu_object_type_t;
 
@@ -159,6 +164,11 @@
 #define	DMU_DEADLIST_OBJECT	(-3ULL)
 
 /*
+ * artificial blkids for bonus buffer and spill blocks
+ */
+#define	DMU_BONUS_BLKID		(-1ULL)
+#define	DMU_SPILL_BLKID		(-2ULL)
+/*
  * Public routines to create, destroy, open, and close objsets.
  */
 int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
@@ -314,6 +324,7 @@
  */
 #define	WP_NOFILL	0x1
 #define	WP_DMU_SYNC	0x2
+#define	WP_SPILL	0x4
 
 void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
     struct zio_prop *zp);
@@ -330,6 +341,17 @@
 int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
 int dmu_bonus_max(void);
 int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
+int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
+int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
+
+/*
+ * Special spill buffer support used by "SA" framework
+ */
+
+int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
+int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags,
+    void *tag, dmu_buf_t **dbp);
+int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
 
 /*
  * Obtain the DMU buffer from the specified object which contains the
@@ -443,6 +465,9 @@
     uint64_t len);
 void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
 void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
+void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
+void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
+void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
 void dmu_tx_abort(dmu_tx_t *tx);
 int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
 void dmu_tx_wait(dmu_tx_t *tx);

--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -33,6 +33,7 @@
 #include <sys/dnode.h>
 #include <sys/zio.h>
 #include <sys/zil.h>
+#include <sys/sa.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -99,6 +100,9 @@
 	/* stuff we store for the user */
 	kmutex_t os_user_ptr_lock;
 	void *os_user_ptr;
+
+	/* SA layout/attribute registration */
+	sa_os_t *os_sa;
 };
 
 #define	DMU_META_OBJSET		0
@@ -146,7 +150,8 @@
 int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
     objset_t **osp);
 void dmu_objset_evict(objset_t *os);
-void dmu_objset_do_userquota_callbacks(objset_t *os, dmu_tx_t *tx);
+void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx);
+void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before);
 boolean_t dmu_objset_userused_enabled(objset_t *os);
 int dmu_objset_userspace_upgrade(objset_t *os);
 boolean_t dmu_objset_userspace_present(objset_t *os);

--- a/usr/src/uts/common/fs/zfs/sys/dmu_tx.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_tx.h	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -77,6 +77,7 @@
 	THT_FREE,
 	THT_ZAP,
 	THT_SPACE,
+	THT_SPILL,
 	THT_NUMTYPES
 };

--- a/usr/src/uts/common/fs/zfs/sys/dnode.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h	Tue Mar 16 09:43:38 2010 -0600
@@ -63,6 +63,19 @@
 #define	DN_MAX_OFFSET_SHIFT	64	/* 2^64 bytes in a dnode */
 
 /*
+ * dnode id flags
+ *
+ * Note: a file will never ever have its
+ * ids moved from bonus->spill
+ * and only in a crypto environment would it be on spill
+ */
+#define	DN_ID_CHKED_BONUS	0x1
+#define	DN_ID_CHKED_SPILL	0x2
+#define	DN_ID_OLD_EXIST		0x4
+#define	DN_ID_NEW_EXIST		0x8
+#define	DN_ID_SYNC		0x10
+
+/*
  * Derived constants.
  */
 #define	DNODE_SIZE	(1 << DNODE_SHIFT)
@@ -70,6 +83,7 @@
 #define	DN_MAX_BONUSLEN	(DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
 #define	DN_MAX_OBJECT	(1ULL << DN_MAX_OBJECT_SHIFT)
 #define	DN_ZERO_BONUSLEN	(DN_MAX_BONUSLEN + 1)
+#define	DN_KILL_SPILLBLK (1)
 
 #define	DNODES_PER_BLOCK_SHIFT	(DNODE_BLOCK_SHIFT - DNODE_SHIFT)
 #define	DNODES_PER_BLOCK	(1ULL << DNODES_PER_BLOCK_SHIFT)
@@ -102,6 +116,9 @@
 #define	DNODE_FLAG_USED_BYTES		(1<<0)
 #define	DNODE_FLAG_USERUSED_ACCOUNTED	(1<<1)
 
+/* Does dnode have a SA spill blkptr in bonus? */
+#define	DNODE_FLAG_SPILL_BLKPTR	(1<<2)
+
 typedef struct dnode_phys {
 	uint8_t dn_type;		/* dmu_object_type_t */
 	uint8_t dn_indblkshift;		/* ln2(indirect block size) */
@@ -122,7 +139,8 @@
 	uint64_t dn_pad3[4];
 
 	blkptr_t dn_blkptr[1];
-	uint8_t dn_bonus[DN_MAX_BONUSLEN];
+	uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)];
+	blkptr_t dn_spill;
 } dnode_phys_t;
 
 typedef struct dnode {
@@ -162,6 +180,8 @@
 	uint8_t dn_next_nblkptr[TXG_SIZE];
 	uint8_t dn_next_nlevels[TXG_SIZE];
 	uint8_t dn_next_indblkshift[TXG_SIZE];
+	uint8_t dn_next_bonustype[TXG_SIZE];
+	uint8_t dn_rm_spillblk[TXG_SIZE];	/* for removing spill blk */
 	uint16_t dn_next_bonuslen[TXG_SIZE];
 	uint32_t dn_next_blksz[TXG_SIZE];	/* next block size in bytes */
 
@@ -186,12 +206,17 @@
 	kmutex_t dn_dbufs_mtx;
 	list_t dn_dbufs;		/* linked list of descendent dbuf_t's */
 	struct dmu_buf_impl *dn_bonus;	/* bonus buffer dbuf */
+	boolean_t dn_have_spill;	/* have spill or are spilling */
 
 	/* parent IO for current sync write */
 	zio_t *dn_zio;
 
 	/* used in syncing context */
-	dnode_phys_t *dn_oldphys;
+	uint64_t dn_oldused;	/* old phys used bytes */
+	uint64_t dn_oldflags;	/* old phys dn_flags */
+	uint64_t dn_olduid, dn_oldgid;
+	uint64_t dn_newuid, dn_newgid;
+	int dn_id_flags;
 
 	/* holds prefetch structure */
 	struct zfetch	dn_zfetch;
@@ -208,6 +233,9 @@
 void dnode_special_close(dnode_t *dn);
 
 void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
+void dnode_setbonus_type(dnode_t *dn, dmu_object_type_t, dmu_tx_t *tx);
+void dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx);
+
 int dnode_hold(struct objset *dd, uint64_t object,
     void *ref, dnode_t **dnp);
 int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/sys/sa.h	Tue Mar 16 09:43:38 2010 -0600
@@ -0,0 +1,171 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_SA_H
+#define	_SYS_SA_H
+
+#include <sys/dmu.h>
+
+/*
+ * Currently available byteswap functions.
+ * If it all possible new attributes should used
+ * one of the already defined byteswap functions.
+ * If a new byteswap function is added then the
+ * ZPL/Pool version will need to be bumped.
+ */
+
+typedef enum sa_bswap_type {
+	SA_UINT64_ARRAY,
+	SA_UINT32_ARRAY,
+	SA_UINT16_ARRAY,
+	SA_UINT8_ARRAY,
+	SA_ACL,
+} sa_bswap_type_t;
+
+typedef uint16_t	sa_attr_type_t;
+
+/*
+ * Attribute to register support for.
+ */
+typedef struct sa_attr_reg {
+	char 			*sa_name;	/* attribute name */
+	uint16_t 		sa_length;
+	sa_bswap_type_t		sa_byteswap;	/* bswap functon enum */
+	sa_attr_type_t 		sa_attr; /* filled in during registration */
+} sa_attr_reg_t;
+
+
+typedef void (sa_data_locator_t)(void **, uint32_t *, uint32_t,
+    boolean_t, void *userptr);
+
+/*
+ * array of attributes to store.
+ *
+ * This array should be treated as opaque/private data.
+ * The SA_BULK_ADD_ATTR() macro should be used for manipulating
+ * the array.
+ *
+ * When sa_replace_all_by_template() is used the attributes
+ * will be stored in the order defined in the array, except that
+ * the attributes may be split between the bonus and the spill buffer
+ *
+ */
+typedef struct sa_bulk_attr {
+	void			*sa_data;
+	sa_data_locator_t	*sa_data_func;
+	uint16_t		sa_length;
+	sa_attr_type_t		sa_attr;
+	/* the following are private to the sa framework */
+	void 			*sa_addr;
+	uint16_t		sa_buftype;
+	uint16_t		sa_size;
+} sa_bulk_attr_t;
+
+
+/*
+ * special macro for adding entries for bulk attr support
+ * bulk - sa_bulk_attr_t
+ * count - integer that will be incremented during each add
+ * attr - attribute to manipulate
+ * func - function for accessing data.
+ * data - pointer to data.
+ * len - length of data
+ */
+
+#define	SA_ADD_BULK_ATTR(b, idx, attr, func, data, len) \
+{ \
+	b[idx].sa_attr = attr;\
+	b[idx].sa_data_func = func; \
+	b[idx].sa_data = data; \
+	b[idx++].sa_length = len; \
+}
+
+typedef struct sa_os sa_os_t;
+
+typedef enum sa_handle_type {
+	SA_HDL_SHARED,
+	SA_HDL_PRIVATE
+} sa_handle_type_t;
+
+struct sa_handle;
+typedef void *sa_lookup_tab_t;
+typedef struct sa_handle sa_handle_t;
+
+typedef void (sa_update_cb_t)(sa_handle_t *, dmu_tx_t *tx);
+
+int sa_handle_get(objset_t *, uint64_t, void *userp,
+    sa_handle_type_t, sa_handle_t **);
+int sa_handle_get_from_db(objset_t *, dmu_buf_t *, void *userp,
+    sa_handle_type_t, sa_handle_t **);
+void sa_handle_destroy(sa_handle_t *);
+int sa_buf_hold(objset_t *, uint64_t, void *, dmu_buf_t **);
+void sa_buf_rele(dmu_buf_t *, void *);
+int sa_lookup(sa_handle_t *, sa_attr_type_t, void *buf, uint32_t buflen);
+int sa_update(sa_handle_t *, sa_attr_type_t, void *buf,
+    uint32_t buflen, dmu_tx_t *);
+int sa_remove(sa_handle_t *, sa_attr_type_t, dmu_tx_t *);
+int sa_bulk_lookup(sa_handle_t *, sa_bulk_attr_t *, int count);
+int sa_bulk_lookup_locked(sa_handle_t *, sa_bulk_attr_t *, int count);
+int sa_bulk_update(sa_handle_t *, sa_bulk_attr_t *, int count, dmu_tx_t *);
+int sa_size(sa_handle_t *, sa_attr_type_t, int *);
+int sa_update_from_cb(sa_handle_t *, sa_attr_type_t,
+    uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *);
+void sa_object_info(sa_handle_t *, dmu_object_info_t *);
+void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *);
+void sa_update_user(sa_handle_t *, sa_handle_t *);
+void *sa_get_userdata(sa_handle_t *);
+void sa_set_userp(sa_handle_t *, void *);
+dmu_buf_t *sa_get_db(sa_handle_t *);
+uint64_t sa_handle_object(sa_handle_t *);
+boolean_t sa_attr_would_spill(sa_handle_t *, sa_attr_type_t, int size);
+void sa_register_update_callback(objset_t *, sa_update_cb_t *);
+sa_attr_type_t *sa_setup(objset_t *, uint64_t, sa_attr_reg_t *, int);
+void sa_tear_down(objset_t *);
+int sa_replace_all_by_template(sa_handle_t *, sa_bulk_attr_t *,
+    int, dmu_tx_t *);
+int sa_replace_all_by_template_locked(sa_handle_t *, sa_bulk_attr_t *,
+    int, dmu_tx_t *);
+boolean_t sa_enabled(objset_t *);
+void sa_cache_init();
+void sa_cache_fini();
+int sa_set_sa_object(objset_t *, uint64_t);
+int sa_hdrsize(void *);
+void sa_handle_lock(sa_handle_t *);
+void sa_handle_unlock(sa_handle_t *);
+
+#ifdef _KERNEL
+int sa_lookup_uio(sa_handle_t *, sa_attr_type_t, uio_t *);
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_SA_H */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/sys/sa_impl.h	Tue Mar 16 09:43:38 2010 -0600
@@ -0,0 +1,288 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_SA_IMPL_H
+#define	_SYS_SA_IMPL_H
+
+#include <sys/dmu.h>
+#include <sys/refcount.h>
+#include <sys/list.h>
+
+/*
+ * Array of known attributes and their
+ * various characteristics.
+ */
+typedef struct sa_attr_table {
+	sa_attr_type_t	sa_attr;
+	uint8_t sa_registered;
+	uint16_t sa_length;
+	sa_bswap_type_t sa_byteswap;
+	char *sa_name;
+} sa_attr_table_t;
+
+/*
+ * Zap attribute format for attribute registration
+ *
+ * 64      56      48      40      32      24      16      8       0
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * |        unused         |      len      | bswap |   attr num    |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * Zap attribute format for layout information.
+ *
+ * layout information is stored as an array of attribute numbers
+ * The name of the attribute is the layout number (0, 1, 2, ...)
+ *
+ * 16       0
+ * +---- ---+
+ * | attr # |
+ * +--------+
+ * | attr # |
+ * +--- ----+
+ *  ......
+ *
+ */
+
+#define	ATTR_BSWAP(x)	BF32_GET(x, 16, 8)
+#define	ATTR_LENGTH(x)	BF32_GET(x, 24, 16)
+#define	ATTR_NUM(x)	BF32_GET(x, 0, 16)
+#define	ATTR_ENCODE(x, attr, length, bswap) \
+{ \
+	BF64_SET(x, 24, 16, length); \
+	BF64_SET(x, 16, 8, bswap); \
+	BF64_SET(x, 0, 16, attr); \
+}
+
+#define	TOC_OFF(x)		BF32_GET(x, 0, 23)
+#define	TOC_ATTR_PRESENT(x)	BF32_GET(x, 31, 1)
+#define	TOC_LEN_IDX(x)		BF32_GET(x, 24, 4)
+#define	TOC_ATTR_ENCODE(x, len_idx, offset) \
+{ \
+	BF32_SET(x, 31, 1, 1); \
+	BF32_SET(x, 24, 7, len_idx); \
+	BF32_SET(x, 0, 24, offset); \
+}
+
+#define	SA_LAYOUTS	"LAYOUTS"
+#define	SA_REGISTRY	"REGISTRY"
+
+/*
+ * Each unique layout will have their own table
+ * sa_lot (layout_table)
+ */
+typedef struct sa_lot {
+	avl_node_t lot_num_node;
+	avl_node_t lot_hash_node;
+	uint64_t lot_num;
+	uint64_t lot_hash;
+	sa_attr_type_t *lot_attrs;	/* array of attr #'s */
+	uint32_t lot_var_sizes;	/* how many aren't fixed size */
+	uint32_t lot_attr_count;	/* total attr count */
+	list_t 	lot_idx_tab;	/* should be only a couple of entries */
+	int	lot_instance;	/* used with lot_hash to identify entry */
+} sa_lot_t;
+
+/* index table of offsets */
+typedef struct sa_idx_tab {
+	list_node_t	sa_next;
+	sa_lot_t	*sa_layout;
+	uint16_t	*sa_variable_lengths;
+	refcount_t	sa_refcount;
+	uint32_t	*sa_idx_tab;	/* array of offsets */
+} sa_idx_tab_t;
+
+/*
+ * Since the offset/index information into the actual data
+ * will usually be identical we can share that information with
+ * all handles that have the exact same offsets.
+ *
+ * You would typically only have a large number of different table of
+ * contents if you had a several variable sized attributes.
+ *
+ * Two AVL trees are used to track the attribute layout numbers.
+ * one is keyed by number and will be consulted when a DMU_OT_SA
+ * object is first read.  The second tree is keyed by the hash signature
+ * of the attributes and will be consulted when an attribute is added
+ * to determine if we already have an instance of that layout.  Both
+ * of these tree's are interconnected.  The only difference is that
+ * when an entry is found in the "hash" tree the list of attributes will
+ * need to be compared against the list of attributes you have in hand.
+ * The assumption is that typically attributes will just be updated and
+ * adding a completely new attribute is a very rare operation.
+ */
+struct sa_os {
+	kmutex_t 	sa_lock;
+	boolean_t	sa_need_attr_registration;
+	boolean_t	sa_force_spill;
+	uint64_t	sa_master_obj;
+	uint64_t	sa_reg_attr_obj;
+	uint64_t	sa_layout_attr_obj;
+	int		sa_num_attrs;
+	sa_attr_table_t *sa_attr_table;	 /* private attr table */
+	sa_update_cb_t	*sa_update_cb;
+	avl_tree_t	sa_layout_num_tree;  /* keyed by layout number */
+	avl_tree_t	sa_layout_hash_tree; /* keyed by layout hash value */
+	int		sa_user_table_sz;
+	sa_attr_type_t	*sa_user_table; /* user name->attr mapping table */
+};
+
+/*
+ * header for all bonus and spill buffers.
+ * The header has a fixed portion with a variable number
+ * of "lengths" depending on the number of variable sized
+ * attribues which are determined by the "layout number"
+ */
+
+#define	SA_MAGIC	0x2F505A  /* ZFS SA */
+typedef struct sa_hdr_phys {
+	uint32_t sa_magic;
+	uint16_t sa_layout_info;  /* Encoded with hdrsize and layout number */
+	uint16_t sa_lengths[1];	/* optional sizes for variable length attrs */
+	/* ... Data follows the lengths.  */
+} sa_hdr_phys_t;
+
+/*
+ * sa_hdr_phys -> sa_layout_info
+ *
+ * 16      10       0
+ * +--------+-------+
+ * | hdrsz  |layout |
+ * +--------+-------+
+ *
+ * Bits 0-10 are the layout number
+ * Bits 11-16 are the size of the header.
+ * The hdrsize is the number * 8
+ *
+ * For example.
+ * hdrsz of 1 ==> 8 byte header
+ *          2 ==> 16 byte header
+ *
+ */
+
+#define	SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
+#define	SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
+#define	SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
+{ \
+	BF32_SET_SB(x, 10, 6, 3, 0, size); \
+	BF32_SET(x, 0, 10, num); \
+}
+
+typedef enum sa_buf_type {
+	SA_BONUS = 1,
+	SA_SPILL = 2
+} sa_buf_type_t;
+
+typedef enum sa_data_op {
+	SA_LOOKUP,
+	SA_UPDATE,
+	SA_ADD,
+	SA_REPLACE,
+	SA_REMOVE
+} sa_data_op_t;
+
+/*
+ * Opaque handle used for most sa functions
+ *
+ * This needs to be kept as small as possible.
+ */
+
+struct sa_handle {
+	kmutex_t	sa_lock;
+	dmu_buf_t	*sa_bonus;
+	dmu_buf_t	*sa_spill;
+	objset_t	*sa_os;
+	void 		*sa_userp;
+	sa_idx_tab_t	*sa_bonus_tab;	 /* idx of bonus */
+	sa_idx_tab_t	*sa_spill_tab; /* only present if spill activated */
+};
+
+#define	SA_GET_DB(hdl, type)	\
+	(dmu_buf_impl_t *)((type == SA_BONUS) ? hdl->sa_bonus : hdl->sa_spill)
+
+#define	SA_GET_HDR(hdl, type) \
+	((sa_hdr_phys_t *)((dmu_buf_impl_t *)(SA_GET_DB(hdl, \
+	type))->db.db_data))
+
+#define	SA_IDX_TAB_GET(hdl, type) \
+	(type == SA_BONUS ? hdl->sa_bonus_tab : hdl->sa_spill_tab)
+
+#define	IS_SA_BONUSTYPE(a)	\
+	((a == DMU_OT_SA) ? B_TRUE : B_FALSE)
+
+#define	SA_BONUSTYPE_FROM_DB(db) \
+	(((dmu_buf_impl_t *)db)->db_dnode->dn_bonustype)
+
+#define	SA_BLKPTR_SPACE	(DN_MAX_BONUSLEN - sizeof (blkptr_t))
+
+#define	SA_LAYOUT_NUM(x, type) \
+	((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \
+	((SA_HDR_LAYOUT_NUM(x)) == 0)) ? 1 : SA_HDR_LAYOUT_NUM(x))))
+
+
+#define	SA_REGISTERED_LEN(sa, attr) sa->sa_attr_table[attr].sa_length
+
+#define	SA_ATTR_LEN(sa, idx, attr, hdr) ((SA_REGISTERED_LEN(sa, attr) == 0) ?\
+	hdr->sa_lengths[TOC_LEN_IDX(idx->sa_idx_tab[attr])] : \
+	SA_REGISTERED_LEN(sa, attr))
+
+#define	SA_SET_HDR(hdr, num, size) \
+	{ \
+		hdr->sa_magic = SA_MAGIC; \
+		SA_HDR_LAYOUT_INFO_ENCODE(hdr->sa_layout_info, num, size); \
+	}
+
+#define	SA_ATTR_INFO(sa, idx, hdr, attr, bulk, type, hdl) \
+	{ \
+		bulk.sa_size = SA_ATTR_LEN(sa, idx, attr, hdr); \
+		bulk.sa_buftype = type; \
+		bulk.sa_addr = \
+		    (void *)((uintptr_t)TOC_OFF(idx->sa_idx_tab[attr]) + \
+		    (uintptr_t)hdr); \
+}
+
+#define	SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) \
+	(SA_HDR_SIZE(hdr) == (sizeof (sa_hdr_phys_t) + \
+	(tb->lot_var_sizes > 1 ? P2ROUNDUP((tb->lot_var_sizes - 1) * \
+	sizeof (uint16_t), 8) : 0)))
+
+int sa_add_impl(sa_handle_t *, sa_attr_type_t,
+    uint32_t, sa_data_locator_t, void *, dmu_tx_t *);
+
+void sa_register_update_callback_locked(objset_t *, sa_update_cb_t *);
+int sa_size_locked(sa_handle_t *, sa_attr_type_t, int *);
+
+void sa_default_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
+int sa_attr_size(sa_os_t *, sa_idx_tab_t *, sa_attr_type_t,
+    uint16_t *, sa_hdr_phys_t *);
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_SA_IMPL_H */

--- a/usr/src/uts/common/fs/zfs/sys/zfs_acl.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_acl.h	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -33,6 +33,7 @@
 #include <sys/acl.h>
 #include <sys/dmu.h>
 #include <sys/zfs_fuid.h>
+#include <sys/sa.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -106,12 +107,18 @@
 
 #define	ZFS_ACE_SPACE	(sizeof (zfs_oldace_t) * ACE_SLOT_CNT)
 
+/*
+ * Size of ACL count is always 2 bytes.
+ * Necessary to for dealing with both V0 ACL and V1 ACL layout
+ */
+#define	ZFS_ACL_COUNT_SIZE	(sizeof (uint16_t))
+
 typedef struct zfs_acl_phys {
 	uint64_t	z_acl_extern_obj;	  /* ext acl pieces */
 	uint32_t	z_acl_size;		  /* Number of bytes in ACL */
 	uint16_t	z_acl_version;		  /* acl version */
 	uint16_t	z_acl_count;		  /* ace count */
-	uint8_t		z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
+	uint8_t	z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
 } zfs_acl_phys_t;
 
 typedef struct acl_ops {
@@ -146,21 +153,26 @@
 	void		*z_allocdata;	/* pointer to kmem allocated memory */
 	size_t		z_allocsize;	/* Size of blob in bytes */
 	size_t		z_size;		/* length of ACL data */
-	int		z_ace_count;	/* number of ACEs in this acl node */
+	uint64_t	z_ace_count;	/* number of ACEs in this acl node */
 	int		z_ace_idx;	/* ace iterator positioned on */
 } zfs_acl_node_t;
 
 typedef struct zfs_acl {
-	int		z_acl_count;	/* Number of ACEs */
+	uint64_t	z_acl_count;	/* Number of ACEs */
 	size_t		z_acl_bytes;	/* Number of bytes in ACL */
 	uint_t		z_version;	/* version of ACL */
 	void		*z_next_ace;	/* pointer to next ACE */
-	int		z_hints;	/* ACL hints (ZFS_INHERIT_ACE ...) */
+	uint64_t	z_hints;	/* ACL hints (ZFS_INHERIT_ACE ...) */
 	zfs_acl_node_t	*z_curr_node;	/* current node iterator is handling */
 	list_t		z_acl;		/* chunks of ACE data */
 	acl_ops_t	z_ops;		/* ACL operations */
 } zfs_acl_t;
 
+typedef struct acl_locator_cb {
+	zfs_acl_t *cb_aclp;
+	zfs_acl_node_t *cb_acl_node;
+} zfs_acl_locator_cb_t;
+
 #define	ACL_DATA_ALLOCED	0x1
 #define	ZFS_ACL_SIZE(aclcnt)	(sizeof (ace_t) * (aclcnt))
 
@@ -174,6 +186,10 @@
 	struct zfs_fuid_info 	*z_fuidp;	/* for tracking fuids for log */
 } zfs_acl_ids_t;
 
+#define	ZFS_EXTERNAL_ACL(zp) \
+	(zp->z_is_sa ? 0 : zfs_external_acl(zp))
+#define	ZNODE_ACL_VERSION(zp) \
+	(zp->z_is_sa ? ZFS_ACL_VERSION_FUID : zfs_znode_acl_version(zp))
 /*
  * Property values for acl_mode and acl_inherit.
  *
@@ -215,6 +231,14 @@
 int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, cred_t *,
     struct zfs_fuid_info **, zfs_acl_t **);
 int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *, dmu_tx_t *);
+uint64_t zfs_external_acl(struct znode *);
+int zfs_znode_acl_version(struct znode *);
+int zfs_acl_size(struct znode *, int *);
+zfs_acl_t *zfs_acl_alloc(int);
+zfs_acl_node_t *zfs_acl_node_alloc(size_t);
+void zfs_acl_xform(struct znode *, zfs_acl_t *, cred_t *);
+void zfs_acl_data_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
+uint64_t zfs_mode_compute(uint64_t, zfs_acl_t *, uint64_t *);
 
 #endif

--- a/usr/src/uts/common/fs/zfs/sys/zfs_dir.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_dir.h	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -57,7 +57,7 @@
 extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
     pathname_t *);
 extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
-    uint_t, znode_t **, int, zfs_acl_ids_t *);
+    uint_t, znode_t **, zfs_acl_ids_t *);
 extern void zfs_rmnode(znode_t *);
 extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
 extern boolean_t zfs_dirempty(znode_t *);

--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Tue Mar 16 09:43:38 2010 -0600
@@ -71,12 +71,13 @@
 
 #define	DMU_BACKUP_FEATURE_DEDUP	(0x1)
 #define	DMU_BACKUP_FEATURE_DEDUPPROPS	(0x2)
+#define	DMU_BACKUP_FEATURE_SA_SPILL	(0x4)
 
 /*
  * Mask of all supported backup features
  */
 #define	DMU_BACKUP_FEATURE_MASK	(DMU_BACKUP_FEATURE_DEDUP | \
-		DMU_BACKUP_FEATURE_DEDUPPROPS)
+		DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL)
 
 /* Are all features in the given flag word currently supported? */
 #define	DMU_STREAM_SUPPORTED(x)	(!((x) & ~DMU_BACKUP_FEATURE_MASK))
@@ -118,7 +119,7 @@
 	enum {
 		DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
 		DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
-		DRR_NUMTYPES
+		DRR_SPILL, DRR_NUMTYPES
 	} drr_type;
 	uint32_t drr_payloadlen;
 	union {
@@ -188,6 +189,13 @@
 			uint8_t drr_pad2[6];
 			ddt_key_t drr_key; /* deduplication key */
 		} drr_write_byref;
+		struct drr_spill {
+			uint64_t drr_object;
+			uint64_t drr_length;
+			uint64_t drr_toguid;
+			uint64_t drr_pad[4]; /* needed for crypto */
+			/* spill data follows */
+		} drr_spill;
 	} drr_u;
 } dmu_replay_record_t;

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_sa.h	Tue Mar 16 09:43:38 2010 -0600
@@ -0,0 +1,143 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_ZFS_SA_H
+#define	_SYS_ZFS_SA_H
+
+#ifdef _KERNEL
+#include <sys/types32.h>
+#include <sys/list.h>
+#include <sys/dmu.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_znode.h>
+#include <sys/sa.h>
+#include <sys/zil.h>
+
+
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * This is the list of known attributes
+ * to the ZPL.  The values of the actual
+ * attributes are not defined by the order
+ * the enums.  It is controlled by the attribute
+ * registration mechanism.  Two different file system
+ * could have different numeric values for the same
+ * attributes.  this list is only used for dereferencing
+ * into the table that will hold the actual numeric value.
+ */
+typedef enum zpl_attr {
+	ZPL_ATIME,
+	ZPL_MTIME,
+	ZPL_CTIME,
+	ZPL_CRTIME,
+	ZPL_GEN,
+	ZPL_MODE,
+	ZPL_SIZE,
+	ZPL_PARENT,
+	ZPL_LINKS,
+	ZPL_XATTR,
+	ZPL_RDEV,
+	ZPL_FLAGS,
+	ZPL_UID,
+	ZPL_GID,
+	ZPL_PAD,
+	ZPL_ZNODE_ACL,
+	ZPL_DACL_COUNT,
+	ZPL_SYMLINK,
+	ZPL_SCANSTAMP,
+	ZPL_DACL_ACES,
+	ZPL_END
+} zpl_attr_t;
+
+#define	ZFS_OLD_ZNODE_PHYS_SIZE	0x108
+#define	ZFS_SA_BASE_ATTR_SIZE	(ZFS_OLD_ZNODE_PHYS_SIZE - \
+    sizeof (zfs_acl_phys_t))
+
+#define	SA_MODE_OFFSET		0
+#define	SA_SIZE_OFFSET		8
+#define	SA_GEN_OFFSET		16
+#define	SA_UID_OFFSET		24
+#define	SA_GID_OFFSET		32
+#define	SA_PARENT_OFFSET	40
+
+extern sa_attr_reg_t zfs_attr_table[ZPL_END + 1];
+extern sa_attr_reg_t zfs_legacy_attr_table[ZPL_END + 1];
+
+/*
+ * This is a deprecated data structure that only exists for
+ * dealing with file systems create prior to ZPL version 5.
+ */
+typedef struct znode_phys {
+	uint64_t zp_atime[2];		/*  0 - last file access time */
+	uint64_t zp_mtime[2];		/* 16 - last file modification time */
+	uint64_t zp_ctime[2];		/* 32 - last file change time */
+	uint64_t zp_crtime[2];		/* 48 - creation time */
+	uint64_t zp_gen;		/* 64 - generation (txg of creation) */
+	uint64_t zp_mode;		/* 72 - file mode bits */
+	uint64_t zp_size;		/* 80 - size of file */
+	uint64_t zp_parent;		/* 88 - directory parent (`..') */
+	uint64_t zp_links;		/* 96 - number of links to file */
+	uint64_t zp_xattr;		/* 104 - DMU object for xattrs */
+	uint64_t zp_rdev;		/* 112 - dev_t for VBLK & VCHR files */
+	uint64_t zp_flags;		/* 120 - persistent flags */
+	uint64_t zp_uid;		/* 128 - file owner */
+	uint64_t zp_gid;		/* 136 - owning group */
+	uint64_t zp_zap;		/* 144 - extra attributes */
+	uint64_t zp_pad[3];		/* 152 - future */
+	zfs_acl_phys_t zp_acl;		/* 176 - 263 ACL */
+	/*
+	 * Data may pad out any remaining bytes in the znode buffer, eg:
+	 *
+	 * |<---------------------- dnode_phys (512) ------------------------>|
+	 * |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
+	 *			|<---- znode (264) ---->|<---- data (56) ---->|
+	 *
+	 * At present, we use this space for the following:
+	 *  - symbolic links
+	 *  - 32-byte anti-virus scanstamp (regular files only)
+	 */
+} znode_phys_t;
+
+#ifdef _KERNEL
+int zfs_sa_readlink(struct znode *, uio_t *);
+void zfs_sa_symlink(struct znode *, char *link, int len, dmu_tx_t *);
+void zfs_sa_upgrade(struct sa_handle  *, dmu_tx_t *);
+void zfs_sa_get_scanstamp(struct znode *, xvattr_t *);
+void zfs_sa_set_scanstamp(struct znode *, xvattr_t *, dmu_tx_t *);
+void zfs_sa_uprade_pre(struct sa_handle *, void *, dmu_tx_t *);
+void zfs_sa_upgrade_post(struct sa_handle *, void *, dmu_tx_t *);
+void zfs_sa_upgrade_txholds(dmu_tx_t *, struct znode *);
+#endif
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_ZFS_SA_H */

--- a/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -31,6 +31,7 @@
 #include <sys/list.h>
 #include <sys/vfs.h>
 #include <sys/zil.h>
+#include <sys/sa.h>
 #include <sys/rrwlock.h>
 #include <sys/zfs_ioctl.h>
 
@@ -39,6 +40,7 @@
 #endif
 
 typedef struct zfsvfs zfsvfs_t;
+struct znode;
 
 struct zfsvfs {
 	vfs_t		*z_vfs;		/* generic fs struct */
@@ -73,11 +75,13 @@
 	boolean_t	z_vscan;	/* virus scan on/off */
 	boolean_t	z_use_fuids;	/* version allows fuids */
 	boolean_t	z_replay;	/* set during ZIL replay */
+	boolean_t	z_use_sa;	/* version allow system attributes */
 	uint64_t	z_version;	/* ZPL version */
 	uint64_t	z_shares_dir;	/* hidden shares dir */
 	kmutex_t	z_lock;
 	uint64_t	z_userquota_obj;
 	uint64_t	z_groupquota_obj;
+	sa_attr_type_t	*z_attr_table;	/* SA attr mapping->id */
 #define	ZFS_OBJ_MTX_SZ	64
 	kmutex_t	z_hold_mtx[ZFS_OBJ_MTX_SZ];	/* znode hold locks */
 };
@@ -140,8 +144,10 @@
     uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
 extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
     const char *domain, uint64_t rid, uint64_t quota);
-extern boolean_t zfs_usergroup_overquota(zfsvfs_t *zfsvfs,
-    boolean_t isgroup, uint64_t fuid);
+extern boolean_t zfs_owner_overquota(zfsvfs_t *zfsvfs, struct znode *,
+    boolean_t isgroup);
+extern boolean_t zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup,
+    uint64_t fuid);
 extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
 extern int zfsvfs_create(const char *name, zfsvfs_t **zfvp);
 extern void zfsvfs_free(zfsvfs_t *zfsvfs);

--- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -32,8 +32,10 @@
 #include <sys/attr.h>
 #include <sys/list.h>
 #include <sys/dmu.h>
+#include <sys/sa.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/rrwlock.h>
+#include <sys/zfs_sa.h>
 #endif
 #include <sys/zfs_acl.h>
 #include <sys/zil.h>
@@ -59,12 +61,14 @@
 #define	ZFS_AV_MODIFIED 	0x0000040000000000
 #define	ZFS_REPARSE		0x0000080000000000
 
-#define	ZFS_ATTR_SET(zp, attr, value)	\
+#define	ZFS_ATTR_SET(zp, attr, value, pflags, tx) \
 { \
 	if (value) \
-		zp->z_phys->zp_flags |= attr; \
+		pflags |= attr; \
 	else \
-		zp->z_phys->zp_flags &= ~attr; \
+		pflags &= ~attr; \
+	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zp->z_zfsvfs), \
+	    &pflags, sizeof (pflags), tx)); \
 }
 
 /*
@@ -80,6 +84,27 @@
 #define	ZFS_BONUS_SCANSTAMP	0x80		/* Scanstamp in bonus area */
 #define	ZFS_NO_EXECS_DENIED	0x100		/* exec was given to everyone */
 
+#define	SA_ZPL_ATIME(z)		z->z_attr_table[ZPL_ATIME]
+#define	SA_ZPL_MTIME(z)		z->z_attr_table[ZPL_MTIME]
+#define	SA_ZPL_CTIME(z)		z->z_attr_table[ZPL_CTIME]
+#define	SA_ZPL_CRTIME(z)	z->z_attr_table[ZPL_CRTIME]
+#define	SA_ZPL_GEN(z)		z->z_attr_table[ZPL_GEN]
+#define	SA_ZPL_DACL_ACES(z)	z->z_attr_table[ZPL_DACL_ACES]
+#define	SA_ZPL_XATTR(z)		z->z_attr_table[ZPL_XATTR]
+#define	SA_ZPL_SYMLINK(z)	z->z_attr_table[ZPL_SYMLINK]
+#define	SA_ZPL_RDEV(z)		z->z_attr_table[ZPL_RDEV]
+#define	SA_ZPL_SCANSTAMP(z)	z->z_attr_table[ZPL_SCANSTAMP]
+#define	SA_ZPL_UID(z)		z->z_attr_table[ZPL_UID]
+#define	SA_ZPL_GID(z)		z->z_attr_table[ZPL_GID]
+#define	SA_ZPL_PARENT(z)	z->z_attr_table[ZPL_PARENT]
+#define	SA_ZPL_LINKS(z)		z->z_attr_table[ZPL_LINKS]
+#define	SA_ZPL_MODE(z)		z->z_attr_table[ZPL_MODE]
+#define	SA_ZPL_DACL_COUNT(z)	z->z_attr_table[ZPL_DACL_COUNT]
+#define	SA_ZPL_FLAGS(z)		z->z_attr_table[ZPL_FLAGS]
+#define	SA_ZPL_SIZE(z)		z->z_attr_table[ZPL_SIZE]
+#define	SA_ZPL_ZNODE_ACL(z)	z->z_attr_table[ZPL_ZNODE_ACL]
+#define	SA_ZPL_PAD(z)		z->z_attr_table[ZPL_PAD]
+
 /*
  * Is ID ephemeral?
  */
@@ -88,8 +113,10 @@
 /*
  * Should we use FUIDs?
  */
-#define	USE_FUIDS(version, os)	(version >= ZPL_VERSION_FUID &&\
+#define	USE_FUIDS(version, os)	(version >= ZPL_VERSION_FUID && \
     spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
+#define	USE_SA(version, os) (version >= ZPL_VERSION_SA && \
+    spa_version(dmu_objset_spa(os)) >= SPA_VERSION_SA)
 
 #define	MASTER_NODE_OBJ	1
 
@@ -104,6 +131,7 @@
 #define	ZPL_VERSION_STR		"VERSION"
 #define	ZFS_FUID_TABLES		"FUID"
 #define	ZFS_SHARES_DIR		"SHARES"
+#define	ZFS_SA_ATTRS		"SA_ATTRS"
 
 #define	ZFS_MAX_BLOCKSIZE	(SPA_MAXBLOCKSIZE)
 
@@ -132,42 +160,6 @@
 #define	ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
 
 /*
- * This is the persistent portion of the znode.  It is stored
- * in the "bonus buffer" of the file.  Short symbolic links
- * are also stored in the bonus buffer.
- */
-typedef struct znode_phys {
-	uint64_t zp_atime[2];		/*  0 - last file access time */
-	uint64_t zp_mtime[2];		/* 16 - last file modification time */
-	uint64_t zp_ctime[2];		/* 32 - last file change time */
-	uint64_t zp_crtime[2];		/* 48 - creation time */
-	uint64_t zp_gen;		/* 64 - generation (txg of creation) */
-	uint64_t zp_mode;		/* 72 - file mode bits */
-	uint64_t zp_size;		/* 80 - size of file */
-	uint64_t zp_parent;		/* 88 - directory parent (`..') */
-	uint64_t zp_links;		/* 96 - number of links to file */
-	uint64_t zp_xattr;		/* 104 - DMU object for xattrs */
-	uint64_t zp_rdev;		/* 112 - dev_t for VBLK & VCHR files */
-	uint64_t zp_flags;		/* 120 - persistent flags */
-	uint64_t zp_uid;		/* 128 - file owner */
-	uint64_t zp_gid;		/* 136 - owning group */
-	uint64_t zp_zap;		/* 144 - extra attributes */
-	uint64_t zp_pad[3];		/* 152 - future */
-	zfs_acl_phys_t zp_acl;		/* 176 - 263 ACL */
-	/*
-	 * Data may pad out any remaining bytes in the znode buffer, eg:
-	 *
-	 * |<---------------------- dnode_phys (512) ------------------------>|
-	 * |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
-	 *			|<---- znode (264) ---->|<---- data (56) ---->|
-	 *
-	 * At present, we use this space for the following:
-	 *  - symbolic links
-	 *  - 32-byte anti-virus scanstamp (regular files only)
-	 */
-} znode_phys_t;
-
-/*
  * Directory entry locks control access to directory entries.
  * They are used to protect creates, deletes, and renames.
  * Each directory znode has a mutex and a list of locked names.
@@ -200,16 +192,20 @@
 	uint_t		z_seq;		/* modification sequence number */
 	uint64_t	z_mapcnt;	/* number of pages mapped to file */
 	uint64_t	z_last_itx;	/* last ZIL itx on this znode */
-	uint64_t	z_gen;		/* generation (same as zp_gen) */
+	uint64_t	z_gen;		/* generation (cached) */
+	uint64_t	z_size;		/* file size (cached) */
+	uint64_t	z_atime[2];	/* atime (cached) */
+	uint64_t	z_links;	/* file links (cached) */
+	uint64_t	z_pflags;	/* pflags (cached) */
+	uid_t		z_uid;		/* uid mapped (cached) */
+	uid_t		z_gid;		/* gid mapped (cached) */
+	mode_t		z_mode;		/* mode (cached) */
 	uint32_t	z_sync_cnt;	/* synchronous open count */
 	kmutex_t	z_acl_lock;	/* acl data lock */
 	zfs_acl_t	*z_acl_cached;	/* cached acl */
 	list_node_t	z_link_node;	/* all znodes in fs link */
-	/*
-	 * These are dmu managed fields.
-	 */
-	znode_phys_t	*z_phys;	/* pointer to persistent znode */
-	dmu_buf_t	*z_dbuf;	/* buffer containing the z_phys */
+	sa_handle_t	*z_sa_hdl;	/* handle to sa data */
+	boolean_t	z_is_sa;	/* are we native sa? */
 } znode_t;
 
 
@@ -252,7 +248,7 @@
 #define	ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
 
 #define	ZFS_VERIFY_ZP(zp) \
-	if ((zp)->z_dbuf == NULL) { \
+	if ((zp)->z_sa_hdl == NULL) { \
 		ZFS_EXIT((zp)->z_zfsvfs); \
 		return (EIO); \
 	} \
@@ -294,14 +290,14 @@
 
 #define	ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
 	if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
-		zfs_time_stamper(zp, ACCESSED, NULL)
+		zfs_tstamp_update_setup(zp, ACCESSED, NULL, NULL, B_FALSE);
 
 extern int	zfs_init_fs(zfsvfs_t *, znode_t **);
 extern void	zfs_set_dataprop(objset_t *);
 extern void	zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
     dmu_tx_t *tx);
-extern void	zfs_time_stamper(znode_t *, uint_t, dmu_tx_t *);
-extern void	zfs_time_stamper_locked(znode_t *, uint_t, dmu_tx_t *);
+extern void	zfs_tstamp_update_setup(znode_t *, uint_t, uint64_t [2],
+    uint64_t [2], boolean_t);
 extern void	zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
 extern int	zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
 extern void	zfs_znode_init(void);
@@ -340,7 +336,7 @@
     znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
 extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
     vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
-extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
+extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
 extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
 extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx);

--- a/usr/src/uts/common/fs/zfs/zfs_acl.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_acl.c	Tue Mar 16 09:43:38 2010 -0600
@@ -50,6 +50,7 @@
 #include <sys/dmu.h>
 #include <sys/dnode.h>
 #include <sys/zap.h>
+#include <sys/sa.h>
 #include "fs/fs_subr.h"
 #include <acl/acl_common.h>
 
@@ -321,6 +322,82 @@
 	zfs_ace_fuid_data
 };
 
+/*
+ * The following three functions are provided for compatibility with
+ * older ZPL version in order to determine if the file use to have
+ * an external ACL and what version of ACL previously existed on the
+ * file.  Would really be nice to not need this, sigh.
+ */
+
+uint64_t
+zfs_external_acl(znode_t *zp)
+{
+	zfs_acl_phys_t acl_phys;
+
+	if (zp->z_is_sa)
+		return (0);
+
+	VERIFY(0 == sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
+	    &acl_phys, sizeof (acl_phys)));
+
+	return (acl_phys.z_acl_extern_obj);
+}
+
+/*
+ * Determine size of ACL in bytes
+ *
+ * This is more complicated than it should be since we have to deal
+ * with old external ACLs.
+ */
+static int
+zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
+    zfs_acl_phys_t *aclphys)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	uint64_t acl_count;
+	int size;
+	int error;
+
+	if (zp->z_is_sa) {
+		if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
+		    &size)) != 0)
+			return (error);
+		*aclsize = size;
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
+		    &acl_count, sizeof (acl_count))) != 0)
+			return (error);
+		*aclcount = acl_count;
+	} else {
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
+		    aclphys, sizeof (*aclphys))) != 0)
+			return (error);
+
+		if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
+			*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
+			*aclcount = aclphys->z_acl_size;
+		} else {
+			*aclsize = aclphys->z_acl_size;
+			*aclcount = aclphys->z_acl_count;
+		}
+	}
+	return (0);
+}
+
+int
+zfs_znode_acl_version(znode_t *zp)
+{
+	zfs_acl_phys_t acl_phys;
+
+	if (zp->z_is_sa) {
+		return (ZFS_ACL_VERSION_FUID);
+	} else {
+		VERIFY(0 == sa_lookup(zp->z_sa_hdl,
+		    SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
+		    &acl_phys, sizeof (acl_phys)));
+		return (acl_phys.z_acl_version);
+	}
+}
+
 static int
 zfs_acl_version(int version)
 {
@@ -336,7 +413,7 @@
 	return (zfs_acl_version(zp->z_zfsvfs->z_version));
 }
 
-static zfs_acl_t *
+zfs_acl_t *
 zfs_acl_alloc(int vers)
 {
 	zfs_acl_t *aclp;
@@ -352,7 +429,7 @@
 	return (aclp);
 }
 
-static zfs_acl_node_t *
+zfs_acl_node_t *
 zfs_acl_node_alloc(size_t bytes)
 {
 	zfs_acl_node_t *aclnode;
@@ -463,6 +540,8 @@
 {
 	zfs_acl_node_t *aclnode;
 
+	ASSERT(aclp);
+
 	if (start == NULL) {
 		aclnode = list_head(&aclp->z_acl);
 		if (aclnode == NULL)
@@ -509,6 +588,7 @@
 		*who = aclp->z_ops.ace_who_get(acep);
 		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
 		aclnode->z_ace_idx++;
+
 		return ((void *)acep);
 	}
 	return (NULL);
@@ -542,7 +622,7 @@
  */
 int
 zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp,
-    void *datap, zfs_ace_t *z_acl, int aclcnt, size_t *size,
+    void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
     zfs_fuid_info_t **fuidp, cred_t *cr)
 {
 	int i;
@@ -773,8 +853,8 @@
  * Determine mode of file based on ACL.
  * Also, create FUIDs for any User/Group ACEs
  */
-static uint64_t
-zfs_mode_compute(znode_t *zp, zfs_acl_t *aclp)
+uint64_t
+zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp, uint64_t *pflags)
 {
 	int		entry_type;
 	mode_t		mode;
@@ -785,7 +865,7 @@
 	uint32_t	access_mask;
 	boolean_t	an_exec_denied = B_FALSE;
 
-	mode = (zp->z_phys->zp_mode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
+	mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
 
 	while (acep = zfs_acl_next_ace(aclp, acep, &who,
 	    &access_mask, &iflags, &type)) {
@@ -930,48 +1010,13 @@
 		an_exec_denied = B_TRUE;
 
 	if (an_exec_denied)
-		zp->z_phys->zp_flags &= ~ZFS_NO_EXECS_DENIED;
+		*pflags &= ~ZFS_NO_EXECS_DENIED;
 	else
-		zp->z_phys->zp_flags |= ZFS_NO_EXECS_DENIED;
+		*pflags |= ZFS_NO_EXECS_DENIED;
 
 	return (mode);
 }
 
-static zfs_acl_t *
-zfs_acl_node_read_internal(znode_t *zp, boolean_t will_modify)
-{
-	zfs_acl_t	*aclp;
-	zfs_acl_node_t	*aclnode;
-
-	aclp = zfs_acl_alloc(zp->z_phys->zp_acl.z_acl_version);
-
-	/*
-	 * Version 0 to 1 znode_acl_phys has the size/count fields swapped.
-	 * Version 0 didn't have a size field, only a count.
-	 */
-	if (zp->z_phys->zp_acl.z_acl_version == ZFS_ACL_VERSION_INITIAL) {
-		aclp->z_acl_count = zp->z_phys->zp_acl.z_acl_size;
-		aclp->z_acl_bytes = ZFS_ACL_SIZE(aclp->z_acl_count);
-	} else {
-		aclp->z_acl_count = zp->z_phys->zp_acl.z_acl_count;
-		aclp->z_acl_bytes = zp->z_phys->zp_acl.z_acl_size;
-	}
-
-	aclnode = zfs_acl_node_alloc(will_modify ? aclp->z_acl_bytes : 0);
-	aclnode->z_ace_count = aclp->z_acl_count;
-	if (will_modify) {
-		bcopy(zp->z_phys->zp_acl.z_ace_data, aclnode->z_acldata,
-		    aclp->z_acl_bytes);
-	} else {
-		aclnode->z_size = aclp->z_acl_bytes;
-		aclnode->z_acldata = &zp->z_phys->zp_acl.z_ace_data[0];
-	}
-
-	list_insert_head(&aclp->z_acl, aclnode);
-
-	return (aclp);
-}
-
 /*
  * Read an external acl object.  If the intent is to modify, always
  * create a new acl and leave any cached acl in place.
@@ -979,12 +1024,13 @@
 static int
 zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
 {
-	uint64_t extacl = zp->z_phys->zp_acl.z_acl_extern_obj;
 	zfs_acl_t	*aclp;
-	size_t		aclsize;
-	size_t		acl_count;
+	int		aclsize;
+	int		acl_count;
 	zfs_acl_node_t	*aclnode;
-	int error;
+	zfs_acl_phys_t	znode_acl;
+	int		version;
+	int		error;
 
 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
 
@@ -993,48 +1039,69 @@
 		return (0);
 	}
 
-	if (zp->z_phys->zp_acl.z_acl_extern_obj == 0) {
-		*aclpp = zfs_acl_node_read_internal(zp, will_modify);
-		if (!will_modify)
-			zp->z_acl_cached = *aclpp;
-		return (0);
-	}
-
-	aclp = zfs_acl_alloc(zp->z_phys->zp_acl.z_acl_version);
-	if (zp->z_phys->zp_acl.z_acl_version == ZFS_ACL_VERSION_INITIAL) {
-		zfs_acl_phys_v0_t *zacl0 =
-		    (zfs_acl_phys_v0_t *)&zp->z_phys->zp_acl;
-
-		aclsize = ZFS_ACL_SIZE(zacl0->z_acl_count);
-		acl_count = zacl0->z_acl_count;
-	} else {
-		aclsize = zp->z_phys->zp_acl.z_acl_size;
-		acl_count = zp->z_phys->zp_acl.z_acl_count;
-		if (aclsize == 0)
-			aclsize = acl_count * sizeof (zfs_ace_t);
-	}
-	aclnode = zfs_acl_node_alloc(aclsize);
-	list_insert_head(&aclp->z_acl, aclnode);
-	error = dmu_read(zp->z_zfsvfs->z_os, extacl, 0,
-	    aclsize, aclnode->z_acldata, DMU_READ_PREFETCH);
-	aclnode->z_ace_count = acl_count;
+	version = ZNODE_ACL_VERSION(zp);
+
+	if ((error = zfs_acl_znode_info(zp, &aclsize,
+	    &acl_count, &znode_acl)) != 0)
+		return (error);
+
+	aclp = zfs_acl_alloc(version);
+
 	aclp->z_acl_count = acl_count;
 	aclp->z_acl_bytes = aclsize;
 
+	aclnode = zfs_acl_node_alloc(aclsize);
+	aclnode->z_ace_count = aclp->z_acl_count;
+	aclnode->z_size = aclsize;
+
+	if (!zp->z_is_sa) {
+		if (znode_acl.z_acl_extern_obj) {
+			error = dmu_read(zp->z_zfsvfs->z_os,
+			    znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
+			    aclnode->z_acldata, DMU_READ_PREFETCH);
+		} else {
+			bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
+			    aclnode->z_size);
+		}
+	} else {
+		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs),
+		    aclnode->z_acldata, aclnode->z_size);
+	}
+
 	if (error != 0) {
 		zfs_acl_free(aclp);
+		zfs_acl_node_free(aclnode);
 		/* convert checksum errors into IO errors */
 		if (error == ECKSUM)
 			error = EIO;
 		return (error);
 	}
 
+	list_insert_head(&aclp->z_acl, aclnode);
+
 	*aclpp = aclp;
 	if (!will_modify)
 		zp->z_acl_cached = aclp;
 	return (0);
 }
 
+/*ARGSUSED*/
+void
+zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
+    boolean_t start, void *userdata)
+{
+	zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
+
+	if (start) {
+		cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
+	} else {
+		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
+		    cb->cb_acl_node);
+	}
+	*dataptr = cb->cb_acl_node->z_acldata;
+	*length = cb->cb_acl_node->z_size;
+}
+
 /*
  * common code for setting ACLs.
  *
@@ -1045,28 +1112,33 @@
 int
 zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
 {
-	int		error;
-	znode_phys_t	*zphys = zp->z_phys;
-	zfs_acl_phys_t	*zacl = &zphys->zp_acl;
-	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
-	uint64_t	aoid = zphys->zp_acl.z_acl_extern_obj;
-	uint64_t	off = 0;
-	dmu_object_type_t otype;
-	zfs_acl_node_t	*aclnode;
-
-	dmu_buf_will_dirty(zp->z_dbuf, tx);
+	int			error;
+	zfsvfs_t		*zfsvfs = zp->z_zfsvfs;
+	dmu_object_type_t	otype;
+	zfs_acl_locator_cb_t	locate = { 0 };
+	uint64_t		mode;
+	sa_bulk_attr_t		bulk[5];
+	uint64_t		ctime[2];
+	int			count = 0;
+
+	mode = zp->z_mode;
+	mode = zfs_mode_compute(mode, aclp, &zp->z_pflags);
+
+	zp->z_mode = mode;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+	    &mode, sizeof (mode));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    &ctime, sizeof (ctime));
 
 	if (zp->z_acl_cached) {
 		zfs_acl_free(zp->z_acl_cached);
 		zp->z_acl_cached = NULL;
 	}
 
-	zphys->zp_mode = zfs_mode_compute(zp, aclp);
-
 	/*
-	 * Decide which object type to use.  If we are forced to
-	 * use old ACL format then transform ACL into zfs_oldace_t
-	 * layout.
+	 * Upgrade needed?
 	 */
 	if (!zfsvfs->z_use_fuids) {
 		otype = DMU_OT_OLDACL;
@@ -1078,84 +1150,113 @@
 		otype = DMU_OT_ACL;
 	}
 
-	if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-		/*
-		 * If ACL was previously external and we are now
-		 * converting to new ACL format then release old
-		 * ACL object and create a new one.
-		 */
-		if (aoid && aclp->z_version != zacl->z_acl_version) {
-			error = dmu_object_free(zfsvfs->z_os,
-			    zp->z_phys->zp_acl.z_acl_extern_obj, tx);
-			if (error)
-				return (error);
-			aoid = 0;
-		}
-		if (aoid == 0) {
-			aoid = dmu_object_alloc(zfsvfs->z_os,
-			    otype, aclp->z_acl_bytes,
-			    otype == DMU_OT_ACL ? DMU_OT_SYSACL : DMU_OT_NONE,
-			    otype == DMU_OT_ACL ? DN_MAX_BONUSLEN : 0, tx);
+	/*
+	 * Arrgh, we have to handle old on disk format
+	 * as well as newer (preferred) SA format.
+	 */
+
+	if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
+		locate.cb_aclp = aclp;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
+		    zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
+		    NULL, &aclp->z_acl_count, sizeof (uint64_t));
+	} else { /* Painful legacy way */
+		zfs_acl_node_t *aclnode;
+		uint64_t off = 0;
+		zfs_acl_phys_t acl_phys;
+		uint64_t aoid;
+
+		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
+		    &acl_phys, sizeof (acl_phys))) != 0)
+			return (error);
+
+		aoid = acl_phys.z_acl_extern_obj;
+
+		if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			/*
+			 * If ACL was previously external and we are now
+			 * converting to new ACL format then release old
+			 * ACL object and create a new one.
+			 */
+			if (aoid &&
+			    aclp->z_version != acl_phys.z_acl_version) {
+				error = dmu_object_free(zfsvfs->z_os, aoid, tx);
+				if (error)
+					return (error);
+				aoid = 0;
+			}
+			if (aoid == 0) {
+				aoid = dmu_object_alloc(zfsvfs->z_os,
+				    otype, aclp->z_acl_bytes,
+				    otype == DMU_OT_ACL ?
+				    DMU_OT_SYSACL : DMU_OT_NONE,
+				    otype == DMU_OT_ACL ?
+				    DN_MAX_BONUSLEN : 0, tx);
+			} else {
+				(void) dmu_object_set_blocksize(zfsvfs->z_os,
+				    aoid, aclp->z_acl_bytes, 0, tx);
+			}
+			acl_phys.z_acl_extern_obj = aoid;
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				if (aclnode->z_ace_count == 0)
+					continue;
+				dmu_write(zfsvfs->z_os, aoid, off,
+				    aclnode->z_size, aclnode->z_acldata, tx);
+				off += aclnode->z_size;
+			}
 		} else {
-			(void) dmu_object_set_blocksize(zfsvfs->z_os, aoid,
-			    aclp->z_acl_bytes, 0, tx);
-		}
-		zphys->zp_acl.z_acl_extern_obj = aoid;
-		for (aclnode = list_head(&aclp->z_acl); aclnode;
-		    aclnode = list_next(&aclp->z_acl, aclnode)) {
-			if (aclnode->z_ace_count == 0)
-				continue;
-			dmu_write(zfsvfs->z_os, aoid, off,
-			    aclnode->z_size, aclnode->z_acldata, tx);
-			off += aclnode->z_size;
+			void *start = acl_phys.z_ace_data;
+			/*
+			 * Migrating back embedded?
+			 */
+			if (acl_phys.z_acl_extern_obj) {
+				error = dmu_object_free(zfsvfs->z_os,
+				    acl_phys.z_acl_extern_obj, tx);
+				if (error)
+					return (error);
+				acl_phys.z_acl_extern_obj = 0;
+			}
+
+			for (aclnode = list_head(&aclp->z_acl); aclnode;
+			    aclnode = list_next(&aclp->z_acl, aclnode)) {
+				if (aclnode->z_ace_count == 0)
+					continue;
+				bcopy(aclnode->z_acldata, start,
+				    aclnode->z_size);
+				start = (caddr_t)start + aclnode->z_size;
+			}
 		}
-	} else {
-		void *start = zacl->z_ace_data;
 		/*
-		 * Migrating back embedded?
+		 * If Old version then swap count/bytes to match old
+		 * layout of znode_acl_phys_t.
 		 */
-		if (zphys->zp_acl.z_acl_extern_obj) {
-			error = dmu_object_free(zfsvfs->z_os,
-			    zp->z_phys->zp_acl.z_acl_extern_obj, tx);
-			if (error)
-				return (error);
-			zphys->zp_acl.z_acl_extern_obj = 0;
+		if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
+			acl_phys.z_acl_size = aclp->z_acl_count;
+			acl_phys.z_acl_count = aclp->z_acl_bytes;
+		} else {
+			acl_phys.z_acl_size = aclp->z_acl_bytes;
+			acl_phys.z_acl_count = aclp->z_acl_count;
 		}
-
-		for (aclnode = list_head(&aclp->z_acl); aclnode;
-		    aclnode = list_next(&aclp->z_acl, aclnode)) {
-			if (aclnode->z_ace_count == 0)
-				continue;
-			bcopy(aclnode->z_acldata, start, aclnode->z_size);
-			start = (caddr_t)start + aclnode->z_size;
-		}
+		acl_phys.z_acl_version = aclp->z_version;
+
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
+		    &acl_phys, sizeof (acl_phys));
 	}
 
 	/*
-	 * If Old version then swap count/bytes to match old
-	 * layout of znode_acl_phys_t.
-	 */
-	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
-		zphys->zp_acl.z_acl_size = aclp->z_acl_count;
-		zphys->zp_acl.z_acl_count = aclp->z_acl_bytes;
-	} else {
-		zphys->zp_acl.z_acl_size = aclp->z_acl_bytes;
-		zphys->zp_acl.z_acl_count = aclp->z_acl_count;
-	}
-
-	zphys->zp_acl.z_acl_version = aclp->z_version;
-
-	/*
 	 * Replace ACL wide bits, but first clear them.
 	 */
-	zp->z_phys->zp_flags &= ~ZFS_ACL_WIDE_FLAGS;
-
-	zp->z_phys->zp_flags |= aclp->z_hints;
+	zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
+
+	zp->z_pflags |= aclp->z_hints;
 
 	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
-		zp->z_phys->zp_flags |= ZFS_ACL_TRIVIAL;
-
-	return (0);
+		zp->z_pflags |= ZFS_ACL_TRIVIAL;
+
+	zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime, B_TRUE);
+	return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
 }
 
 /*
@@ -1643,11 +1744,20 @@
 
 	mutex_enter(&zp->z_lock);
 	mutex_enter(&zp->z_acl_lock);
+
 	*aclp = NULL;
 	error = zfs_acl_node_read(zp, aclp, B_TRUE);
 	if (error == 0) {
-		(*aclp)->z_hints = zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS;
-		zfs_acl_chmod(zp->z_zfsvfs, zp->z_phys->zp_uid, mode, *aclp);
+		uint64_t owner;
+		if (IS_EPHEMERAL(zp->z_uid))
+			if ((error = sa_lookup(zp->z_sa_hdl,
+			    SA_ZPL_UID(zp->z_zfsvfs),
+			    &owner, sizeof (owner))) != 0)
+				return (error);
+		else
+			owner = (uint64_t)zp->z_uid;
+		(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
+		zfs_acl_chmod(zp->z_zfsvfs, owner, mode, *aclp);
 	}
 	mutex_exit(&zp->z_acl_lock);
 	mutex_exit(&zp->z_lock);
@@ -1716,7 +1826,7 @@
 	*need_chmod = B_TRUE;
 	pacep = NULL;
 	aclp = zfs_acl_alloc(paclp->z_version);
-	if (zfsvfs->z_acl_inherit == ZFS_ACL_DISCARD)
+	if (zfsvfs->z_acl_inherit == ZFS_ACL_DISCARD || vtype == VLNK)
 		return (aclp);
 	while (pacep = zfs_acl_next_ace(paclp, pacep, &who,
 	    &access_mask, &iflags, &type)) {
@@ -1837,6 +1947,8 @@
 	zfs_acl_t	*paclp;
 	gid_t		gid;
 	boolean_t	need_chmod = B_TRUE;
+	boolean_t	inherited = B_FALSE;
+	uint64_t	parentgid;
 
 	bzero(acl_ids, sizeof (zfs_acl_ids_t));
 	acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
@@ -1845,7 +1957,6 @@
 		if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
 		    &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
 			return (error);
-
 	/*
 	 * Determine uid and gid.
 	 */
@@ -1859,6 +1970,12 @@
 		    ZFS_GROUP, &acl_ids->z_fuidp);
 		gid = vap->va_gid;
 	} else {
+		if (IS_EPHEMERAL(dzp->z_gid))
+			VERIFY(0 == sa_lookup(dzp->z_sa_hdl, SA_ZPL_GID(zfsvfs),
+			    &parentgid, sizeof (parentgid)));
+		else
+			parentgid = (uint64_t)dzp->z_gid;
+
 		acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
 		    cr, &acl_ids->z_fuidp);
 		acl_ids->z_fgid = 0;
@@ -1867,17 +1984,17 @@
 			    (uint64_t)vap->va_gid,
 			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
 			gid = vap->va_gid;
-			if (acl_ids->z_fgid != dzp->z_phys->zp_gid &&
+			if (acl_ids->z_fgid != parentgid &&
 			    !groupmember(vap->va_gid, cr) &&
 			    secpolicy_vnode_create_gid(cr) != 0)
 				acl_ids->z_fgid = 0;
 		}
 		if (acl_ids->z_fgid == 0) {
-			if (dzp->z_phys->zp_mode & S_ISGID) {
+			if (dzp->z_mode & S_ISGID) {
 				char		*domain;
 				uint32_t	rid;
 
-				acl_ids->z_fgid = dzp->z_phys->zp_gid;
+				acl_ids->z_fgid = parentgid;
 				gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
 				    cr, ZFS_GROUP);
 
@@ -1907,7 +2024,7 @@
 	 * file's new group, clear the file's set-GID bit.
 	 */
 
-	if (!(flag & IS_ROOT_NODE) && (dzp->z_phys->zp_mode & S_ISGID) &&
+	if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
 	    (vap->va_type == VDIR)) {
 		acl_ids->z_mode |= S_ISGID;
 	} else {
@@ -1919,26 +2036,35 @@
 	if (acl_ids->z_aclp == NULL) {
 		mutex_enter(&dzp->z_lock);
 		if (!(flag & IS_ROOT_NODE) && (ZTOV(dzp)->v_type == VDIR &&
-		    (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE)) &&
-		    !(dzp->z_phys->zp_flags & ZFS_XATTR)) {
+		    (dzp->z_pflags & ZFS_INHERIT_ACE)) &&
+		    !(dzp->z_pflags & ZFS_XATTR)) {
 			mutex_enter(&dzp->z_acl_lock);
 			VERIFY(0 == zfs_acl_node_read(dzp, &paclp, B_FALSE));
-			mutex_exit(&dzp->z_acl_lock);
 			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
 			    vap->va_type, paclp, acl_ids->z_mode, &need_chmod);
+			mutex_exit(&dzp->z_acl_lock);
+			inherited = B_TRUE;
 		} else {
 			acl_ids->z_aclp =
 			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
+			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
 		}
 		mutex_exit(&dzp->z_lock);
 		if (need_chmod) {
-			acl_ids->z_aclp->z_hints = (vap->va_type == VDIR) ?
+			acl_ids->z_aclp->z_hints |= (vap->va_type == VDIR) ?
 			    ZFS_ACL_AUTO_INHERIT : 0;
 			zfs_acl_chmod(zfsvfs, acl_ids->z_fuid,
 			    acl_ids->z_mode, acl_ids->z_aclp);
 		}
 	}
 
+	if (inherited || vsecp) {
+		acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
+		    acl_ids->z_aclp, &acl_ids->z_aclp->z_hints);
+		if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
+			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
+	}
+
 	return (0);
 }
 
@@ -1959,8 +2085,8 @@
 boolean_t
 zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids)
 {
-	return (zfs_usergroup_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
-	    zfs_usergroup_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid));
+	return (zfs_fuid_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
+	    zfs_fuid_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid));
 }
 
 /*
@@ -1978,12 +2104,12 @@
 	mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
 	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
 
+	if (mask == 0)
+		return (ENOSYS);
+
 	if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr))
 		return (error);
 
-	if (mask == 0)
-		return (ENOSYS);
-
 	mutex_enter(&zp->z_acl_lock);
 
 	error = zfs_acl_node_read(zp, &aclp, B_FALSE);
@@ -1995,8 +2121,7 @@
 	/*
 	 * Scan ACL to determine number of ACEs
 	 */
-	if ((zp->z_phys->zp_flags & ZFS_ACL_OBJ_ACE) &&
-	    !(mask & VSA_ACE_ALLTYPES)) {
+	if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
 		void *zacep = NULL;
 		uint64_t who;
 		uint32_t access_mask;
@@ -2017,7 +2142,7 @@
 		}
 		vsecp->vsa_aclcnt = count;
 	} else
-		count = aclp->z_acl_count;
+		count = (int)aclp->z_acl_count;
 
 	if (mask & VSA_ACECNT) {
 		vsecp->vsa_aclcnt = count;
@@ -2051,11 +2176,11 @@
 	}
 	if (mask & VSA_ACE_ACLFLAGS) {
 		vsecp->vsa_aclflags = 0;
-		if (zp->z_phys->zp_flags & ZFS_ACL_DEFAULTED)
+		if (zp->z_pflags & ZFS_ACL_DEFAULTED)
 			vsecp->vsa_aclflags |= ACL_DEFAULTED;
-		if (zp->z_phys->zp_flags & ZFS_ACL_PROTECTED)
+		if (zp->z_pflags & ZFS_ACL_PROTECTED)
 			vsecp->vsa_aclflags |= ACL_PROTECTED;
-		if (zp->z_phys->zp_flags & ZFS_ACL_AUTO_INHERIT)
+		if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
 			vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
 	}
 
@@ -2137,7 +2262,7 @@
 	if (mask == 0)
 		return (ENOSYS);
 
-	if (zp->z_phys->zp_flags & ZFS_IMMUTABLE)
+	if (zp->z_pflags & ZFS_IMMUTABLE)
 		return (EPERM);
 
 	if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr))
@@ -2153,37 +2278,40 @@
 	 * existing flags.
 	 */
 	if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
-		aclp->z_hints |= (zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS);
+		aclp->z_hints |=
+		    (zp->z_pflags & V4_ACL_WIDE_FLAGS);
 	}
 top:
 	mutex_enter(&zp->z_lock);
 	mutex_enter(&zp->z_acl_lock);
 
 	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_bonus(tx, zp->z_id);
-
-	if (zp->z_phys->zp_acl.z_acl_extern_obj) {
-		/* Are we upgrading ACL? */
-		if (zfsvfs->z_version <= ZPL_VERSION_FUID &&
-		    zp->z_phys->zp_acl.z_acl_version ==
-		    ZFS_ACL_VERSION_INITIAL) {
-			dmu_tx_hold_free(tx,
-			    zp->z_phys->zp_acl.z_acl_extern_obj,
-			    0, DMU_OBJECT_END);
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-			    0, aclp->z_acl_bytes);
-		} else {
-			dmu_tx_hold_write(tx,
-			    zp->z_phys->zp_acl.z_acl_extern_obj,
-			    0, aclp->z_acl_bytes);
-		}
-	} else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
-	}
+
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 
+	/*
+	 * If old version and ACL won't fit in bonus and we aren't
+	 * upgrading then take out necessary DMU holds
+	 */
+
+	if (ZFS_EXTERNAL_ACL(zp)) {
+		if (zfsvfs->z_version <= ZPL_VERSION_SA &&
+		    ZNODE_ACL_VERSION(zp) <= ZFS_ACL_VERSION_INITIAL) {
+			dmu_tx_hold_free(tx, ZFS_EXTERNAL_ACL(zp), 0,
+			    DMU_OBJECT_END);
+		} else {
+			dmu_tx_hold_write(tx, ZFS_EXTERNAL_ACL(zp),
+			    0, aclp->z_acl_bytes);
+		}
+	} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
+	}
+
+	zfs_sa_upgrade_txholds(tx, zp);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
 		mutex_exit(&zp->z_acl_lock);
@@ -2206,7 +2334,6 @@
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
-	zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
 	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
 
 	if (fuidp)
@@ -2239,19 +2366,19 @@
 	 */
 	if ((v4_mode & WRITE_MASK_DATA) &&
 	    (((ZTOV(zp)->v_type != VDIR) &&
-	    (zp->z_phys->zp_flags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
+	    (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
 	    (ZTOV(zp)->v_type == VDIR &&
-	    (zp->z_phys->zp_flags & ZFS_IMMUTABLE)))) {
+	    (zp->z_pflags & ZFS_IMMUTABLE)))) {
 		return (EPERM);
 	}
 
 	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
-	    (zp->z_phys->zp_flags & ZFS_NOUNLINK)) {
+	    (zp->z_pflags & ZFS_NOUNLINK)) {
 		return (EPERM);
 	}
 
 	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
-	    (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED))) {
+	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
 		return (EACCES);
 	}
 
@@ -2298,10 +2425,7 @@
 	uint32_t	deny_mask = 0;
 	zfs_ace_hdr_t	*acep = NULL;
 	boolean_t	checkit;
-	uid_t		fowner;
-	uid_t		gowner;
-
-	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
+	uint64_t	gowner;
 
 	mutex_enter(&zp->z_acl_lock);
 
@@ -2311,6 +2435,12 @@
 		return (error);
 	}
 
+	ASSERT(zp->z_acl_cached);
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GID(zfsvfs),
+	    &gowner, sizeof (gowner))) != 0)
+		return (error);
+
 	while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
 	    &iflags, &type)) {
 		uint32_t mask_matched;
@@ -2332,7 +2462,7 @@
 
 		switch (entry_type) {
 		case ACE_OWNER:
-			if (uid == fowner)
+			if (uid == zp->z_uid)
 				checkit = B_TRUE;
 			break;
 		case OWNING_GROUP:
@@ -2410,17 +2540,14 @@
 	uint32_t have = ACE_ALL_PERMS;
 
 	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
-		uid_t		owner;
-
-		owner = zfs_fuid_map_id(zp->z_zfsvfs,
-		    zp->z_phys->zp_uid, cr, ZFS_OWNER);
-
 		return (
-		    secpolicy_vnode_access(cr, ZTOV(zp), owner, VREAD) == 0 ||
-		    secpolicy_vnode_access(cr, ZTOV(zp), owner, VWRITE) == 0 ||
-		    secpolicy_vnode_access(cr, ZTOV(zp), owner, VEXEC) == 0 ||
-		    secpolicy_vnode_chown(cr, owner) == 0 ||
-		    secpolicy_vnode_setdac(cr, owner) == 0 ||
+		    secpolicy_vnode_access(cr, ZTOV(zp),
+		    zp->z_uid, VREAD) == 0 || secpolicy_vnode_access(cr,
+		    ZTOV(zp), zp->z_uid, VWRITE) == 0 ||
+		    secpolicy_vnode_access(cr, ZTOV(zp),
+		    zp->z_uid, VEXEC) == 0 ||
+		    secpolicy_vnode_chown(cr, zp->z_uid) == 0 ||
+		    secpolicy_vnode_setdac(cr, zp->z_uid) == 0 ||
 		    secpolicy_vnode_remove(cr) == 0);
 	}
 	return (B_TRUE);
@@ -2479,38 +2606,33 @@
 	boolean_t owner = B_FALSE;
 	boolean_t groupmbr = B_FALSE;
 	boolean_t is_attr;
-	uid_t fowner;
-	uid_t gowner;
 	uid_t uid = crgetuid(cr);
 	int error;
 
-	if (zdp->z_phys->zp_flags & ZFS_AV_QUARANTINED)
+	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
 		return (EACCES);
 
-	is_attr = ((zdp->z_phys->zp_flags & ZFS_XATTR) &&
+	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
 	    (ZTOV(zdp)->v_type == VDIR));
 	if (is_attr)
 		goto slow;
 
+
 	mutex_enter(&zdp->z_acl_lock);
 
-	if (zdp->z_phys->zp_flags & ZFS_NO_EXECS_DENIED) {
+	if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
 		mutex_exit(&zdp->z_acl_lock);
 		return (0);
 	}
 
-	if (FUID_INDEX(zdp->z_phys->zp_uid) != 0 ||
-	    FUID_INDEX(zdp->z_phys->zp_gid) != 0) {
+	if (IS_EPHEMERAL(zdp->z_uid) != 0 || IS_EPHEMERAL(zdp->z_gid) != 0) {
 		mutex_exit(&zdp->z_acl_lock);
 		goto slow;
 	}
 
-	fowner = (uid_t)zdp->z_phys->zp_uid;
-	gowner = (uid_t)zdp->z_phys->zp_gid;
-
-	if (uid == fowner) {
+	if (uid == zdp->z_uid) {
 		owner = B_TRUE;
-		if (zdp->z_phys->zp_mode & S_IXUSR) {
+		if (zdp->z_mode & S_IXUSR) {
 			mutex_exit(&zdp->z_acl_lock);
 			return (0);
 		} else {
@@ -2518,9 +2640,9 @@
 			goto slow;
 		}
 	}
-	if (groupmember(gowner, cr)) {
+	if (groupmember(zdp->z_gid, cr)) {
 		groupmbr = B_TRUE;
-		if (zdp->z_phys->zp_mode & S_IXGRP) {
+		if (zdp->z_mode & S_IXGRP) {
 			mutex_exit(&zdp->z_acl_lock);
 			return (0);
 		} else {
@@ -2529,7 +2651,7 @@
 		}
 	}
 	if (!owner && !groupmbr) {
-		if (zdp->z_phys->zp_mode & S_IXOTH) {
+		if (zdp->z_mode & S_IXOTH) {
 			mutex_exit(&zdp->z_acl_lock);
 			return (0);
 		}
@@ -2555,20 +2677,25 @@
 	uint32_t	working_mode;
 	int		error;
 	int		is_attr;
-	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	boolean_t 	check_privs;
 	znode_t		*xzp;
 	znode_t 	*check_zp = zp;
 
-	is_attr = ((zp->z_phys->zp_flags & ZFS_XATTR) &&
-	    (ZTOV(zp)->v_type == VDIR));
+	is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR));
 
 	/*
 	 * If attribute then validate against base file
 	 */
 	if (is_attr) {
+		uint64_t	parent;
+
+		if ((error = sa_lookup(zp->z_sa_hdl,
+		    SA_ZPL_PARENT(zp->z_zfsvfs), &parent,
+		    sizeof (parent))) != 0)
+			return (error);
+
 		if ((error = zfs_zget(zp->z_zfsvfs,
-		    zp->z_phys->zp_parent, &xzp)) != 0)	{
+		    parent, &xzp)) != 0)	{
 			return (error);
 		}
 
@@ -2607,12 +2734,8 @@
 	}
 
 	if (error && check_privs) {
-		uid_t		owner;
 		mode_t		checkmode = 0;
 
-		owner = zfs_fuid_map_id(zfsvfs, check_zp->z_phys->zp_uid, cr,
-		    ZFS_OWNER);
-
 		/*
 		 * First check for implicit owner permission on
 		 * read_acl/read_attributes
@@ -2622,7 +2745,7 @@
 		ASSERT(working_mode != 0);
 
 		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
-		    owner == crgetuid(cr)))
+		    zp->z_uid == crgetuid(cr)))
 			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
 
 		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
@@ -2636,19 +2759,19 @@
 
 		if (checkmode)
 			error = secpolicy_vnode_access(cr, ZTOV(check_zp),
-			    owner, checkmode);
+			    zp->z_uid, checkmode);
 
 		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
-			error = secpolicy_vnode_chown(cr, owner);
+			error = secpolicy_vnode_chown(cr, zp->z_uid);
 		if (error == 0 && (working_mode & ACE_WRITE_ACL))
-			error = secpolicy_vnode_setdac(cr, owner);
+			error = secpolicy_vnode_setdac(cr, zp->z_uid);
 
 		if (error == 0 && (working_mode &
 		    (ACE_DELETE|ACE_DELETE_CHILD)))
 			error = secpolicy_vnode_remove(cr);
 
 		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
-			error = secpolicy_vnode_chown(cr, owner);
+			error = secpolicy_vnode_chown(cr, zp->z_uid);
 		}
 		if (error == 0) {
 			/*
@@ -2693,12 +2816,9 @@
     mode_t missing_perms, cred_t *cr)
 {
 	int error;
-	uid_t downer;
-	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
-
-	downer = zfs_fuid_map_id(zfsvfs, dzp->z_phys->zp_uid, cr, ZFS_OWNER);
-
-	error = secpolicy_vnode_access(cr, ZTOV(dzp), downer, missing_perms);
+
+	error = secpolicy_vnode_access(cr, ZTOV(dzp),
+	    dzp->z_uid, missing_perms);
 
 	if (error == 0)
 		error = zfs_sticky_remove_access(dzp, zp, cr);
@@ -2765,7 +2885,7 @@
 	 * to determine what was found.
 	 */
 
-	if (zp->z_phys->zp_flags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
+	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
 		return (EPERM);
 
 	/*
@@ -2835,7 +2955,7 @@
 	int add_perm;
 	int error;
 
-	if (szp->z_phys->zp_flags & ZFS_AV_QUARANTINED)
+	if (szp->z_pflags & ZFS_AV_QUARANTINED)
 		return (EACCES);
 
 	add_perm = (ZTOV(szp)->v_type == VDIR) ?

--- a/usr/src/uts/common/fs/zfs/zfs_byteswap.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_byteswap.c	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -27,6 +27,7 @@
 #include <sys/vfs.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_znode.h>
+#include <sys/zfs_sa.h>
 #include <sys/zfs_acl.h>
 
 void

--- a/usr/src/uts/common/fs/zfs/zfs_ctldir.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ctldir.c	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -215,6 +215,7 @@
 {
 	vnode_t *vp, *rvp;
 	zfsctl_node_t *zcp;
+	uint64_t crtime[2];
 
 	ASSERT(zfsvfs->z_ctldir == NULL);
 
@@ -225,7 +226,9 @@
 	zcp->zc_id = ZFSCTL_INO_ROOT;
 
 	VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0);
-	ZFS_TIME_DECODE(&zcp->zc_cmtime, VTOZ(rvp)->z_phys->zp_crtime);
+	VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
+	    &crtime, sizeof (crtime)));
+	ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime);
 	VN_RELE(rvp);
 
 	/*

--- a/usr/src/uts/common/fs/zfs/zfs_dir.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_dir.c	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -52,6 +52,8 @@
 #include <sys/atomic.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
+#include <sys/sa.h>
+#include <sys/zfs_sa.h>
 #include <sys/dnlc.h>
 #include <sys/extdirent.h>
 
@@ -286,8 +288,10 @@
 	 * See if there's an object by this name; if so, put a hold on it.
 	 */
 	if (flag & ZXATTR) {
-		zoid = dzp->z_phys->zp_xattr;
-		error = (zoid == 0 ? ENOENT : 0);
+		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
+		    sizeof (zoid));
+		if (error == 0)
+			error = (zoid == 0 ? ENOENT : 0);
 	} else {
 		if (update)
 			vp = dnlc_lookup(ZTOV(dzp), name);
@@ -379,25 +383,29 @@
 	zfs_dirlock_t *dl;
 	znode_t *zp;
 	int error = 0;
+	uint64_t parent;
 
 	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
 		*vpp = ZTOV(dzp);
 		VN_HOLD(*vpp);
 	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
 		zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
+
 		/*
 		 * If we are a snapshot mounted under .zfs, return
 		 * the vp for the snapshot directory.
 		 */
-		if (dzp->z_phys->zp_parent == dzp->z_id &&
-		    zfsvfs->z_parent != zfsvfs) {
+		if ((error = sa_lookup(dzp->z_sa_hdl,
+		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
+			return (error);
+		if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
 			error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
 			    "snapshot", vpp, NULL, 0, NULL, kcred,
 			    NULL, NULL, NULL);
 			return (error);
 		}
 		rw_enter(&dzp->z_parent_lock, RW_READER);
-		error = zfs_zget(zfsvfs, dzp->z_phys->zp_parent, &zp);
+		error = zfs_zget(zfsvfs, parent, &zp);
 		if (error == 0)
 			*vpp = ZTOV(zp);
 		rw_exit(&dzp->z_parent_lock);
@@ -445,7 +453,7 @@
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
 	ASSERT(zp->z_unlinked);
-	ASSERT3U(zp->z_phys->zp_links, ==, 0);
+	ASSERT(zp->z_links == 0);
 
 	VERIFY3U(0, ==,
 	    zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
@@ -540,10 +548,12 @@
 		    (ZTOV(xzp)->v_type == VLNK));
 
 		tx = dmu_tx_create(zfsvfs->z_os);
-		dmu_tx_hold_bonus(tx, dzp->z_id);
+		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 		dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
-		dmu_tx_hold_bonus(tx, xzp->z_id);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+		/* Is this really needed ? */
+		zfs_sa_upgrade_txholds(tx, xzp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
@@ -576,15 +586,16 @@
 	znode_t		*xzp = NULL;
 	dmu_tx_t	*tx;
 	uint64_t	acl_obj;
+	uint64_t	xattr_obj;
 	int		error;
 
+	ASSERT(zp->z_links == 0);
 	ASSERT(ZTOV(zp)->v_count == 0);
-	ASSERT(zp->z_phys->zp_links == 0);
 
 	/*
 	 * If this is an attribute directory, purge its contents.
 	 */
-	if (ZTOV(zp)->v_type == VDIR && (zp->z_phys->zp_flags & ZFS_XATTR)) {
+	if (ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_XATTR)) {
 		if (zfs_purgedir(zp) != 0) {
 			/*
 			 * Not enough space to delete some xattrs.
@@ -613,12 +624,14 @@
 	 * If the file has extended attributes, we're going to unlink
 	 * the xattr dir.
 	 */
-	if (zp->z_phys->zp_xattr) {
-		error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
+	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+	    &xattr_obj, sizeof (xattr_obj));
+	if (error == 0 && xattr_obj) {
+		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
 		ASSERT(error == 0);
 	}
 
-	acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;
+	acl_obj = ZFS_EXTERNAL_ACL(zp);
 
 	/*
 	 * Set up the final transaction.
@@ -627,11 +640,13 @@
 	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	if (xzp) {
-		dmu_tx_hold_bonus(tx, xzp->z_id);
 		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 	}
 	if (acl_obj)
 		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
+
+	zfs_sa_upgrade_txholds(tx, zp);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		/*
@@ -646,10 +661,12 @@
 	}
 
 	if (xzp) {
-		dmu_buf_will_dirty(xzp->z_dbuf, tx);
+		ASSERT(error == 0);
 		mutex_enter(&xzp->z_lock);
 		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
-		xzp->z_phys->zp_links = 0;	/* no more links to it */
+		xzp->z_links = 0;	/* no more links to it */
+		VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+		    &xzp->z_links, sizeof (xzp->z_links), tx));
 		mutex_exit(&xzp->z_lock);
 		zfs_unlinked_add(xzp, tx);
 	}
@@ -667,11 +684,12 @@
 }
 
 static uint64_t
-zfs_dirent(znode_t *zp)
+zfs_dirent(znode_t *zp, uint64_t mode)
 {
 	uint64_t de = zp->z_id;
+
 	if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
-		de |= IFTODT((zp)->z_phys->zp_mode) << 60;
+		de |= IFTODT(mode) << 60;
 	return (de);
 }
 
@@ -682,12 +700,15 @@
 zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
 {
 	znode_t *dzp = dl->dl_dzp;
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	vnode_t *vp = ZTOV(zp);
 	uint64_t value;
 	int zp_is_dir = (vp->v_type == VDIR);
+	sa_bulk_attr_t bulk[5];
+	uint64_t mtime[2], ctime[2];
+	int count = 0;
 	int error;
 
-	dmu_buf_will_dirty(zp->z_dbuf, tx);
 	mutex_enter(&zp->z_lock);
 
 	if (!(flag & ZRENAMING)) {
@@ -696,22 +717,47 @@
 			mutex_exit(&zp->z_lock);
 			return (ENOENT);
 		}
-		zp->z_phys->zp_links++;
+		zp->z_links++;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+		    &zp->z_links, sizeof (zp->z_links));
+
 	}
-	zp->z_phys->zp_parent = dzp->z_id;	/* dzp is now zp's parent */
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
+	    &dzp->z_id, sizeof (dzp->z_id));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
 
-	if (!(flag & ZNEW))
-		zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
+	if (!(flag & ZNEW)) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    ctime, sizeof (ctime));
+		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
+		    ctime, B_TRUE);
+	}
+	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
+
 	mutex_exit(&zp->z_lock);
 
-	dmu_buf_will_dirty(dzp->z_dbuf, tx);
 	mutex_enter(&dzp->z_lock);
-	dzp->z_phys->zp_size++;			/* one dirent added */
-	dzp->z_phys->zp_links += zp_is_dir;	/* ".." link from zp */
-	zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
+	dzp->z_size++;
+	dzp->z_links += zp_is_dir;
+	count = 0;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &dzp->z_size, sizeof (dzp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &dzp->z_links, sizeof (dzp->z_links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+	    mtime, sizeof (mtime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    ctime, sizeof (ctime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &dzp->z_pflags, sizeof (dzp->z_pflags));
+	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
+	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
 	mutex_exit(&dzp->z_lock);
 
-	value = zfs_dirent(zp);
+	value = zfs_dirent(zp, zp->z_mode);
 	error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name,
 	    8, 1, &value, tx);
 	ASSERT(error == 0);
@@ -733,16 +779,18 @@
 	boolean_t *unlinkedp)
 {
 	znode_t *dzp = dl->dl_dzp;
+	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
 	vnode_t *vp = ZTOV(zp);
 	int zp_is_dir = (vp->v_type == VDIR);
 	boolean_t unlinked = B_FALSE;
+	sa_bulk_attr_t bulk[5];
+	uint64_t mtime[2], ctime[2];
+	int count = 0;
 	int error;
 
 	dnlc_remove(ZTOV(dzp), dl->dl_name);
 
 	if (!(flag & ZRENAMING)) {
-		dmu_buf_will_dirty(zp->z_dbuf, tx);
-
 		if (vn_vfswlock(vp))		/* prevent new mounts on zp */
 			return (EBUSY);
 
@@ -752,35 +800,58 @@
 		}
 
 		mutex_enter(&zp->z_lock);
-		if (zp_is_dir && !zfs_dirempty(zp)) {	/* dir not empty */
+
+		if (zp_is_dir && !zfs_dirempty(zp)) {
 			mutex_exit(&zp->z_lock);
 			vn_vfsunlock(vp);
 			return (EEXIST);
 		}
-		if (zp->z_phys->zp_links <= zp_is_dir) {
+
+		if (zp->z_links <= zp_is_dir) {
 			zfs_panic_recover("zfs: link count on %s is %u, "
 			    "should be at least %u",
 			    zp->z_vnode->v_path ? zp->z_vnode->v_path :
-			    "<unknown>", (int)zp->z_phys->zp_links,
+			    "<unknown>", (int)zp->z_links,
 			    zp_is_dir + 1);
-			zp->z_phys->zp_links = zp_is_dir + 1;
+			zp->z_links = zp_is_dir + 1;
 		}
-		if (--zp->z_phys->zp_links == zp_is_dir) {
+		if (--zp->z_links == zp_is_dir) {
 			zp->z_unlinked = B_TRUE;
-			zp->z_phys->zp_links = 0;
+			zp->z_links = 0;
 			unlinked = B_TRUE;
 		} else {
-			zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+			    NULL, &ctime, sizeof (ctime));
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+			    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
+			zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
+			    B_TRUE);
 		}
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+		    NULL, &zp->z_links, sizeof (zp->z_links));
+		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		count = 0;
+		ASSERT(error == 0);
 		mutex_exit(&zp->z_lock);
 		vn_vfsunlock(vp);
 	}
 
-	dmu_buf_will_dirty(dzp->z_dbuf, tx);
 	mutex_enter(&dzp->z_lock);
-	dzp->z_phys->zp_size--;			/* one dirent removed */
-	dzp->z_phys->zp_links -= zp_is_dir;	/* ".." link from zp */
-	zfs_time_stamper_locked(dzp, CONTENT_MODIFIED, tx);
+	dzp->z_size--;		/* one dirent removed */
+	dzp->z_links -= zp_is_dir;	/* ".." link from zp */
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+	    NULL, &dzp->z_links, sizeof (dzp->z_links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
+	    NULL, &dzp->z_size, sizeof (dzp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+	    NULL, ctime, sizeof (ctime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
+	    NULL, mtime, sizeof (mtime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
+	    NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
+	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
+	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
 	mutex_exit(&dzp->z_lock);
 
 	if (zp->z_zfsvfs->z_norm) {
@@ -815,7 +886,7 @@
 boolean_t
 zfs_dirempty(znode_t *dzp)
 {
-	return (dzp->z_phys->zp_size == 2 && dzp->z_dirlocks == 0);
+	return (dzp->z_size == 2 && dzp->z_dirlocks == 0);
 }
 
 int
@@ -827,6 +898,7 @@
 	int error;
 	zfs_acl_ids_t acl_ids;
 	boolean_t fuid_dirtied;
+	uint64_t parent;
 
 	*xvpp = NULL;
 
@@ -842,7 +914,9 @@
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_bonus(tx, zp->z_id);
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
@@ -855,14 +929,18 @@
 		dmu_tx_abort(tx);
 		return (error);
 	}
-	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, 0, &acl_ids);
+	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
-	ASSERT(xzp->z_phys->zp_parent == zp->z_id);
-	dmu_buf_will_dirty(zp->z_dbuf, tx);
-	zp->z_phys->zp_xattr = xzp->z_id;
+	if ((error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+	    &parent, sizeof (parent))) != 0)
+		return (0);
+
+	ASSERT(parent == zp->z_id);
+	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
+	    sizeof (xzp->z_id), tx));
 
 	(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
 	    xzp, "", NULL, acl_ids.z_fuidp, vap);
@@ -907,7 +985,6 @@
 		return (0);
 	}
 
-	ASSERT(zp->z_phys->zp_xattr == 0);
 
 	if (!(flags & CREATE_XATTR_DIR)) {
 		zfs_dirent_unlock(dl);
@@ -962,20 +1039,14 @@
 zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
 {
 	uid_t  		uid;
-	uid_t		downer;
-	uid_t		fowner;
-	zfsvfs_t	*zfsvfs = zdp->z_zfsvfs;
 
 	if (zdp->z_zfsvfs->z_replay)
 		return (0);
 
-	if ((zdp->z_phys->zp_mode & S_ISVTX) == 0)
+	if ((zdp->z_mode & S_ISVTX) == 0)
 		return (0);
 
-	downer = zfs_fuid_map_id(zfsvfs, zdp->z_phys->zp_uid, cr, ZFS_OWNER);
-	fowner = zfs_fuid_map_id(zfsvfs, zp->z_phys->zp_uid, cr, ZFS_OWNER);
-
-	if ((uid = crgetuid(cr)) == downer || uid == fowner ||
+	if ((uid = crgetuid(cr)) == zdp->z_uid || uid == zp->z_uid ||
 	    (ZTOV(zp)->v_type == VREG &&
 	    zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
 		return (0);

--- a/usr/src/uts/common/fs/zfs/zfs_fuid.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_fuid.c	Tue Mar 16 09:43:38 2010 -0600
@@ -389,10 +389,26 @@
 void
 zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp)
 {
-	*uidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_phys->zp_uid,
-	    cr, ZFS_OWNER);
-	*gidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_phys->zp_gid,
-	    cr, ZFS_GROUP);
+	uint64_t fuid, fgid;
+	sa_bulk_attr_t bulk[2];
+	int count = 0;
+
+	if (IS_EPHEMERAL(zp->z_uid) || IS_EPHEMERAL(zp->z_gid)) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zp->z_zfsvfs),
+		    NULL, &fuid, 8);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zp->z_zfsvfs),
+		    NULL, &fgid, 8);
+		VERIFY(0 == sa_bulk_lookup(zp->z_sa_hdl, bulk, count));
+	}
+	if (IS_EPHEMERAL(zp->z_uid))
+		*uidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
+	else
+		*uidp = zp->z_uid;
+	if (IS_EPHEMERAL(zp->z_gid))
+		*gidp = zfs_fuid_map_id(zp->z_zfsvfs,
+		    zp->z_gid, cr, ZFS_GROUP);
+	else
+		*gidp = zp->z_gid;
 }
 
 uid_t

--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Tue Mar 16 09:43:38 2010 -0600
@@ -68,6 +68,7 @@
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "zfs_deleg.h"
+#include "zfs_comutil.h"
 
 extern struct modlfs zfs_modlfs;
 
@@ -1954,20 +1955,10 @@
 	case ZFS_PROP_VERSION:
 	{
 		zfsvfs_t *zfsvfs;
-		uint64_t maxzplver = ZPL_VERSION;
 
 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs)) != 0)
 			break;
 
-		if (zfs_earlier_version(dsname, SPA_VERSION_USERSPACE))
-			maxzplver = ZPL_VERSION_USERSPACE - 1;
-		if (zfs_earlier_version(dsname, SPA_VERSION_FUID))
-			maxzplver = ZPL_VERSION_FUID - 1;
-		if (intval > maxzplver) {
-			zfsvfs_rele(zfsvfs, FTAG);
-			return (ENOTSUP);
-		}
-
 		err = zfs_set_version(zfsvfs, intval);
 		zfsvfs_rele(zfsvfs, FTAG);
 
@@ -2558,8 +2549,8 @@
  */
 static int
 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
-    boolean_t fuids_ok, nvlist_t *createprops, nvlist_t *zplprops,
-    boolean_t *is_ci)
+    boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
+    nvlist_t *zplprops, boolean_t *is_ci)
 {
 	uint64_t sense = ZFS_PROP_UNDEFINED;
 	uint64_t norm = ZFS_PROP_UNDEFINED;
@@ -2595,6 +2586,7 @@
 	 */
 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
+	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
 	    (zplver < ZPL_VERSION_NORMALIZATION &&
 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
 	    sense != ZFS_PROP_UNDEFINED)))
@@ -2636,11 +2628,13 @@
 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
-	boolean_t fuids_ok = B_TRUE;
+	boolean_t fuids_ok, sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	objset_t *os = NULL;
 	char parentname[MAXNAMELEN];
 	char *cp;
+	spa_t *spa;
+	uint64_t spa_vers;
 	int error;
 
 	(void) strlcpy(parentname, dataset, sizeof (parentname));
@@ -2648,12 +2642,15 @@
 	ASSERT(cp != NULL);
 	cp[0] = '\0';
 
-	if (zfs_earlier_version(dataset, SPA_VERSION_USERSPACE))
-		zplver = ZPL_VERSION_USERSPACE - 1;
-	if (zfs_earlier_version(dataset, SPA_VERSION_FUID)) {
-		zplver = ZPL_VERSION_FUID - 1;
-		fuids_ok = B_FALSE;
-	}
+	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
+		return (error);
+
+	spa_vers = spa_version(spa);
+	spa_close(spa, FTAG);
+
+	zplver = zfs_zpl_version_map(spa_vers);
+	fuids_ok = (zplver >= ZPL_VERSION_FUID);
+	sa_ok = (zplver >= ZPL_VERSION_SA);
 
 	/*
 	 * Open parent object set so we can inherit zplprop values.
@@ -2661,7 +2658,7 @@
 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
 		return (error);
 
-	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, createprops,
+	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
 	    zplprops, is_ci);
 	dmu_objset_rele(os, FTAG);
 	return (error);
@@ -2671,17 +2668,17 @@
 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
-	boolean_t fuids_ok = B_TRUE;
+	boolean_t fuids_ok;
+	boolean_t sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	int error;
 
-	if (spa_vers < SPA_VERSION_FUID) {
-		zplver = ZPL_VERSION_FUID - 1;
-		fuids_ok = B_FALSE;
-	}
-
-	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, createprops,
-	    zplprops, is_ci);
+	zplver = zfs_zpl_version_map(spa_vers);
+	fuids_ok = (zplver >= ZPL_VERSION_FUID);
+	sa_ok = (zplver >= ZPL_VERSION_SA);
+
+	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
+	    createprops, zplprops, is_ci);
 	return (error);
 }

--- a/usr/src/uts/common/fs/zfs/zfs_log.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_log.c	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -276,21 +276,25 @@
 	lr = (lr_create_t *)&itx->itx_lr;
 	lr->lr_doid = dzp->z_id;
 	lr->lr_foid = zp->z_id;
-	lr->lr_mode = zp->z_phys->zp_mode;
-	if (!IS_EPHEMERAL(zp->z_phys->zp_uid)) {
-		lr->lr_uid = (uint64_t)zp->z_phys->zp_uid;
+	lr->lr_mode = zp->z_mode;
+	if (!IS_EPHEMERAL(zp->z_uid)) {
+		lr->lr_uid = (uint64_t)zp->z_uid;
 	} else {
 		lr->lr_uid = fuidp->z_fuid_owner;
 	}
-	if (!IS_EPHEMERAL(zp->z_phys->zp_gid)) {
-		lr->lr_gid = (uint64_t)zp->z_phys->zp_gid;
+	if (!IS_EPHEMERAL(zp->z_gid)) {
+		lr->lr_gid = (uint64_t)zp->z_gid;
 	} else {
 		lr->lr_gid = fuidp->z_fuid_group;
 	}
-	lr->lr_gen = zp->z_phys->zp_gen;
-	lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
-	lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
-	lr->lr_rdev = zp->z_phys->zp_rdev;
+	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen,
+	    sizeof (uint64_t));
+	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
+	    lr->lr_crtime, sizeof (uint64_t) * 2);
+
+	if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zp->z_zfsvfs), &lr->lr_rdev,
+	    sizeof (lr->lr_rdev)) != 0)
+		lr->lr_rdev = 0;
 
 	/*
 	 * Fill in xvattr info if any
@@ -404,12 +408,13 @@
 	lr = (lr_create_t *)&itx->itx_lr;
 	lr->lr_doid = dzp->z_id;
 	lr->lr_foid = zp->z_id;
-	lr->lr_mode = zp->z_phys->zp_mode;
-	lr->lr_uid = zp->z_phys->zp_uid;
-	lr->lr_gid = zp->z_phys->zp_gid;
-	lr->lr_gen = zp->z_phys->zp_gen;
-	lr->lr_crtime[0] = zp->z_phys->zp_crtime[0];
-	lr->lr_crtime[1] = zp->z_phys->zp_crtime[1];
+	lr->lr_uid = zp->z_uid;
+	lr->lr_gid = zp->z_gid;
+	lr->lr_mode = zp->z_mode;
+	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen,
+	    sizeof (uint64_t));
+	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
+	    lr->lr_crtime, sizeof (uint64_t) * 2);
 	bcopy(name, (char *)(lr + 1), namesize);
 	bcopy(link, (char *)(lr + 1) + namesize, linksize);

--- a/usr/src/uts/common/fs/zfs/zfs_replay.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_replay.c	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -645,7 +645,7 @@
 	length = lr->lr_length;
 	eod = offset + length;		/* end of data for this write */
 
-	orig_eof = zp->z_phys->zp_size;
+	orig_eof = zp->z_size;
 
 	/* If it's a dmu_sync() block, write the whole block */
 	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
@@ -667,8 +667,8 @@
 	 * write needs to be there. So we write the whole block and
 	 * reduce the eof.
 	 */
-	if (orig_eof < zp->z_phys->zp_size) /* file length grew ? */
-		zp->z_phys->zp_size = eod;
+	if (orig_eof < zp->z_size) /* file length grew ? */
+		zp->z_size = eod;
 
 	VN_RELE(ZTOV(zp));
 
@@ -695,9 +695,9 @@
 		return (error);
 
 	end = lr->lr_offset + lr->lr_length;
-	if (end > zp->z_phys->zp_size) {
-		ASSERT3U(end - zp->z_phys->zp_size, <, zp->z_blksz);
-		zp->z_phys->zp_size = end;
+	if (end > zp->z_size) {
+		ASSERT3U(end - zp->z_size, <, zp->z_blksz);
+		zp->z_size = end;
 	}
 
 	VN_RELE(ZTOV(zp));

--- a/usr/src/uts/common/fs/zfs/zfs_rlock.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_rlock.c	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -112,7 +112,7 @@
 		 * Range locking is also used by zvol and uses a
 		 * dummied up znode. However, for zvol, we don't need to
 		 * append or grow blocksize, and besides we don't have
-		 * a z_phys or z_zfsvfs - so skip that processing.
+		 * a "sa" data or z_zfsvfs - so skip that processing.
 		 *
 		 * Yes, this is ugly, and would be solved by not handling
 		 * grow or append in range lock code. If that was done then
@@ -125,14 +125,14 @@
 			 * This is done under z_range_lock to avoid races.
 			 */
 			if (new->r_type == RL_APPEND)
-				new->r_off = zp->z_phys->zp_size;
+				new->r_off = zp->z_size;
 
 			/*
 			 * If we need to grow the block size then grab the whole
 			 * file range. This is also done under z_range_lock to
 			 * avoid races.
 			 */
-			end_size = MAX(zp->z_phys->zp_size, new->r_off + len);
+			end_size = MAX(zp->z_size, new->r_off + len);
 			if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
 			    zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) {
 				new->r_off = 0;

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/zfs/zfs_sa.c	Tue Mar 16 09:43:38 2010 -0600
@@ -0,0 +1,340 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/sa.h>
+#include <sys/zfs_acl.h>
+#include <sys/zfs_sa.h>
+
+/*
+ * ZPL attribute registration table.
+ * Order of attributes doesn't matter
+ * a unique value will be assigned for each
+ * attribute that is file system specific
+ *
+ * This is just the set of ZPL attributes that this
+ * version of ZFS deals with natively.  The file system
+ * could have other attributes stored in files, but they will be
+ * ignored.  The SA framework will preserve them, just that
+ * this version of ZFS won't change or delete them.
+ */
+
+sa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
+	{"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
+	{"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1},
+	{"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2},
+	{"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3},
+	{"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4},
+	{"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5},
+	{"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6},
+	{"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7},
+	{"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8},
+	{"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9},
+	{"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10},
+	{"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11},
+	{"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12},
+	{"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13},
+	{"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14},
+	{"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15},
+	{"ZPL_DACL_COUNT", sizeof (uint64_t), SA_UINT64_ARRAY, 0},
+	{"ZPL_SYMLINK", 0, SA_UINT8_ARRAY, 0},
+	{"ZPL_SCANSTAMP", 32, SA_UINT8_ARRAY, 0},
+	{"ZPL_DACL_ACES", 0, SA_ACL, 0},
+	{NULL, 0, 0, 0}
+};
+
+#ifdef _KERNEL
+
+int
+zfs_sa_readlink(znode_t *zp, uio_t *uio)
+{
+	dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
+	size_t bufsz;
+	int error;
+
+	bufsz = zp->z_size;
+	if (bufsz + ZFS_OLD_ZNODE_PHYS_SIZE < db->db_size) {
+		error = uiomove((caddr_t)db->db_data +
+		    ZFS_OLD_ZNODE_PHYS_SIZE,
+		    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
+	} else {
+		dmu_buf_t *dbp;
+		if ((error = dmu_buf_hold(zp->z_zfsvfs->z_os, zp->z_id,
+		    0, FTAG, &dbp)) == 0) {
+			error = uiomove(dbp->db_data,
+			    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
+			dmu_buf_rele(dbp, FTAG);
+		}
+	}
+	return (error);
+}
+
+void
+zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx)
+{
+	dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
+
+	if (ZFS_OLD_ZNODE_PHYS_SIZE + len <= dmu_bonus_max()) {
+		VERIFY(dmu_set_bonus(db,
+		    len + ZFS_OLD_ZNODE_PHYS_SIZE, tx) == 0);
+		if (len) {
+			bcopy(link, (caddr_t)db->db_data +
+			    ZFS_OLD_ZNODE_PHYS_SIZE, len);
+		}
+	} else {
+		dmu_buf_t *dbp;
+
+		zfs_grow_blocksize(zp, len, tx);
+		VERIFY(0 == dmu_buf_hold(zp->z_zfsvfs->z_os,
+		    zp->z_id, 0, FTAG, &dbp));
+
+		dmu_buf_will_dirty(dbp, tx);
+
+		ASSERT3U(len, <=, dbp->db_size);
+		bcopy(link, dbp->db_data, len);
+		dmu_buf_rele(dbp, FTAG);
+	}
+}
+
+void
+zfs_sa_get_scanstamp(znode_t *zp, xvattr_t *xvap)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	xoptattr_t *xoap;
+
+	VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
+	if (zp->z_is_sa) {
+		if (sa_lookup(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
+		    &xoap->xoa_av_scanstamp,
+		    sizeof (xoap->xoa_av_scanstamp)) != 0)
+			return;
+	} else {
+		dmu_object_info_t doi;
+		dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
+		int len;
+
+		if (!(zp->z_pflags & ZFS_BONUS_SCANSTAMP))
+			return;
+
+		sa_object_info(zp->z_sa_hdl, &doi);
+		len = sizeof (xoap->xoa_av_scanstamp) +
+		    ZFS_OLD_ZNODE_PHYS_SIZE;
+
+		if (len <= doi.doi_bonus_size) {
+			(void) memcpy(xoap->xoa_av_scanstamp,
+			    (caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
+			    sizeof (xoap->xoa_av_scanstamp));
+		}
+	}
+	XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
+}
+
+void
+zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
+{
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	xoptattr_t *xoap;
+
+	VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
+	if (zp->z_is_sa)
+		VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
+		    &xoap->xoa_av_scanstamp,
+		    sizeof (xoap->xoa_av_scanstamp), tx));
+	else {
+		dmu_object_info_t doi;
+		dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
+		int len;
+
+		sa_object_info(zp->z_sa_hdl, &doi);
+		len = sizeof (xoap->xoa_av_scanstamp) +
+		    ZFS_OLD_ZNODE_PHYS_SIZE;
+		if (len > doi.doi_bonus_size)
+			VERIFY(dmu_set_bonus(db, len, tx) == 0);
+		(void) memcpy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
+		    xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp));
+
+		zp->z_pflags |= ZFS_BONUS_SCANSTAMP;
+		VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+		    &zp->z_pflags, sizeof (uint64_t), tx));
+	}
+}
+
+/*
+ * I'm not convinced we should do any of this upgrade.
+ * since the SA code can read both old/new znode formats
+ * with probably little to know performance difference.
+ *
+ * All new files will be created with the new format.
+ */
+
+void
+zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
+{
+	dmu_buf_t *db = sa_get_db(hdl);
+	znode_t *zp = sa_get_userdata(hdl);
+	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
+	sa_bulk_attr_t bulk[20];
+	int count = 0;
+	sa_bulk_attr_t sa_attrs[20] = { 0 };
+	zfs_acl_locator_cb_t locate = { 0 };
+	uint64_t uid, gid, mode, rdev, xattr, parent;
+	uint64_t crtime[2], mtime[2], ctime[2];
+	zfs_acl_phys_t znode_acl;
+	char *slink = NULL;
+	char scanstamp[AV_SCANSTAMP_SZ];
+
+	/*
+	 * No upgrade if ACL isn't cached
+	 * since we won't know which locks are held
+	 * and ready the ACL would require special "locked"
+	 * interfaces that would be messy
+	 */
+	if (zp->z_acl_cached == NULL)
+		return;
+
+	/* First do a bulk query of the attributes that aren't cached */
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
+	    &znode_acl, 88);
+
+	if (sa_bulk_lookup_locked(hdl, bulk, count) != 0)
+		return;
+
+
+	/*
+	 * While the order here doesn't matter its best to try and organize
+	 * it is such a way to pick up an already existing layout number
+	 */
+	count = 0;
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, 8);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs),
+	    NULL, &zp->z_gen, 8);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs),
+	    NULL, &parent, 8);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, 8);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL,
+	    zp->z_atime, 16);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL,
+	    &mtime, 16);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL,
+	    &ctime, 16);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL,
+	    &crtime, 16);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &zp->z_links, 8);
+	if (zp->z_vnode->v_type == VBLK || zp->z_vnode->v_type == VCHR)
+		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL,
+		    &rdev, 8);
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
+	    &zp->z_acl_cached->z_acl_count, 8);
+
+	if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID)
+		zfs_acl_xform(zp, zp->z_acl_cached, CRED());
+
+	locate.cb_aclp = zp->z_acl_cached;
+	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs),
+	    zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes);
+	if (xattr)
+		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs),
+		    NULL, &rdev, 8);
+
+	/*
+	 * is it a symlink ?
+	 *
+	 * this will probably never be exercised since we won't
+	 * have the cached ACL.
+	 */
+	if (ZTOV(zp)->v_type == VLNK) {
+		slink = kmem_zalloc(zp->z_size + 1, KM_SLEEP);
+		if (zp->z_size + ZFS_OLD_ZNODE_PHYS_SIZE)
+			bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
+			    slink, zp->z_size);
+		else {
+			dmu_buf_t *dbp;
+			if (dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0,
+			    FTAG, &dbp))
+				return;
+			bcopy(dbp->db_data, slink, zp->z_size);
+			dmu_buf_rele(dbp, FTAG);
+		}
+		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SYMLINK(zfsvfs),
+		    NULL, slink, zp->z_size);
+	}
+
+	/* if scanstamp then add scanstamp */
+
+	if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) {
+		bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
+		    scanstamp, AV_SCANSTAMP_SZ);
+		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs),
+		    NULL, scanstamp, AV_SCANSTAMP_SZ);
+		zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP;
+	}
+
+	VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
+	VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs,
+	    count, tx) == 0);
+	if (znode_acl.z_acl_extern_obj)
+		VERIFY(0 == dmu_object_free(zfsvfs->z_os,
+		    znode_acl.z_acl_extern_obj, tx));
+
+	zp->z_is_sa = B_TRUE;
+	if (slink)
+		kmem_free(slink, zp->z_size + 1);
+
+}
+
+void
+zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp)
+{
+	if (!zp->z_zfsvfs->z_use_sa || zp->z_is_sa)
+		return;
+
+	ASSERT(!zp->z_is_sa);
+
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+
+	if (ZFS_EXTERNAL_ACL(zp)) {
+		dmu_tx_hold_free(tx, ZFS_EXTERNAL_ACL(zp), 0,
+		    DMU_OBJECT_END);
+	}
+}
+
+#endif

--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Tue Mar 16 09:43:38 2010 -0600
@@ -46,6 +46,7 @@
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
+#include <sys/sa.h>
 #include <sys/varargs.h>
 #include <sys/policy.h>
 #include <sys/atomic.h>
@@ -60,6 +61,8 @@
 #include <sys/dnlc.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa_boot.h>
+#include <sys/sa.h>
+#include "zfs_comutil.h"
 
 int zfsfstype;
 vfsops_t *zfs_vfsops = NULL;
@@ -582,6 +585,7 @@
 
 	(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
 	err = zap_lookup(os, obj, buf, 8, 1, &used);
+
 	ASSERT(err == 0 || err == ENOENT);
 	/* no underflow/overflow */
 	ASSERT(delta > 0 || used >= -delta);
@@ -592,20 +596,38 @@
 	else
 		err = zap_update(os, obj, buf, 8, 1, &used, tx);
 	ASSERT(err == 0);
+
 }
 
 static int
-zfs_space_delta_cb(dmu_object_type_t bonustype, void *bonus,
+zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
     uint64_t *userp, uint64_t *groupp)
 {
-	znode_phys_t *znp = bonus;
+	znode_phys_t *znp = data;
+	int error = 0;
 
-	if (bonustype != DMU_OT_ZNODE)
+	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
 		return (ENOENT);
 
-	*userp = znp->zp_uid;
-	*groupp = znp->zp_gid;
-	return (0);
+	if (bonustype == DMU_OT_ZNODE) {
+		*userp = znp->zp_uid;
+		*groupp = znp->zp_gid;
+	} else {
+		int hdrsize;
+
+		ASSERT(bonustype == DMU_OT_SA);
+		hdrsize = sa_hdrsize(data);
+
+		if (hdrsize != 0) {
+			*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
+			    SA_UID_OFFSET));
+			*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
+			    SA_GID_OFFSET));
+		} else {
+			error = ENOENT;
+		}
+	}
+	return (error);
 }
 
 static void
@@ -792,7 +814,7 @@
 }
 
 boolean_t
-zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
+zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
 {
 	char buf[32];
 	uint64_t used, quota, usedobj, quotaobj;
@@ -815,6 +837,32 @@
 	return (used >= quota);
 }
 
+boolean_t
+zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
+{
+	uint64_t fuid;
+	uint64_t quotaobj;
+	uid_t id;
+
+	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
+
+	id = isgroup ? zp->z_gid : zp->z_uid;
+
+	if (quotaobj == 0 || zfsvfs->z_replay)
+		return (B_FALSE);
+
+	if (IS_EPHEMERAL(id)) {
+		VERIFY(0 == sa_lookup(zp->z_sa_hdl,
+		    isgroup ? SA_ZPL_GID(zfsvfs) : SA_ZPL_UID(zfsvfs),
+		    &fuid, sizeof (fuid)));
+	} else {
+		fuid = (uint64_t)id;
+	}
+
+	return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
+}
+
+
 int
 zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
 {
@@ -822,6 +870,7 @@
 	zfsvfs_t *zfsvfs;
 	uint64_t zval;
 	int i, error;
+	uint64_t sa_obj;
 
 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 
@@ -879,6 +928,26 @@
 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
 
 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+
+	if (zfsvfs->z_use_sa) {
+		/* should either have both of these objects or none */
+		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
+		    &sa_obj);
+		if (error)
+			return (error);
+	} else {
+		/*
+		 * Pre SA versions file systems should never touch
+		 * either the attribute registration or layout objects.
+		 */
+		sa_obj = 0;
+	}
+
+	zfsvfs->z_attr_table = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END);
+
+	if (zfsvfs->z_version >= ZPL_VERSION_SA)
+		sa_register_update_callback(os, zfs_sa_upgrade);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
 	    &zfsvfs->z_root);
@@ -1051,6 +1120,7 @@
 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
 	}
+	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
 }
 
 static int
@@ -1732,7 +1802,7 @@
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
 	    zp = list_next(&zfsvfs->z_all_znodes, zp))
-		if (zp->z_dbuf) {
+		if (zp->z_sa_hdl) {
 			ASSERT(ZTOV(zp)->v_count > 0);
 			zfs_znode_dmu_fini(zp);
 		}
@@ -1927,7 +1997,9 @@
 		ZFS_EXIT(zfsvfs);
 		return (err);
 	}
-	zp_gen = zp->z_phys->zp_gen & gen_mask;
+	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
+	    sizeof (uint64_t));
+	zp_gen = zp_gen & gen_mask;
 	if (zp_gen == 0)
 		zp_gen = 1;
 	if (zp->z_unlinked || zp_gen != fid_gen) {
@@ -1966,7 +2038,7 @@
 int
 zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
 {
-	int err;
+	int err, err2;
 
 	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
 	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
@@ -1977,6 +2049,17 @@
 		zfsvfs->z_os = NULL;
 	} else {
 		znode_t *zp;
+		uint64_t sa_obj = 0;
+
+		err2 = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
+		    ZFS_SA_ATTRS, 8, 1, &sa_obj);
+
+		if ((err || err2) && zfsvfs->z_version >= ZPL_VERSION_SA)
+			goto bail;
+
+
+		zfsvfs->z_attr_table = sa_setup(zfsvfs->z_os, sa_obj,
+		    zfs_attr_table,  ZPL_END);
 
 		VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
 
@@ -1995,6 +2078,7 @@
 
 	}
 
+bail:
 	/* release the VOPs */
 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
 	rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
@@ -2111,13 +2195,23 @@
 	if (newvers < zfsvfs->z_version)
 		return (EINVAL);
 
+	if (zfs_spa_version_map(newvers) >
+	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
+		return (ENOTSUP);
+
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
+	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
+		    ZFS_SA_ATTRS);
+		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
+	}
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		return (error);
 	}
+
 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
 	    8, 1, &newvers, tx);
 
@@ -2126,6 +2220,22 @@
 		return (error);
 	}
 
+	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
+		uint64_t sa_obj;
+
+		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
+		    SPA_VERSION_SA);
+		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+		    DMU_OT_NONE, 0, tx);
+
+		error = zap_add(os, MASTER_NODE_OBJ,
+		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+		ASSERT3U(error, ==, 0);
+
+		VERIFY(0 == sa_set_sa_object(os, sa_obj));
+		sa_register_update_callback(os, zfs_sa_upgrade);
+	}
+
 	spa_history_internal_log(LOG_DS_UPGRADE,
 	    dmu_objset_spa(os), tx, CRED(),
 	    "oldver=%llu newver=%llu dataset = %llu",

--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c	Tue Mar 16 09:43:38 2010 -0600
@@ -61,6 +61,7 @@
 #include <sys/txg.h>
 #include <sys/dbuf.h>
 #include <sys/zap.h>
+#include <sys/sa.h>
 #include <sys/dirent.h>
 #include <sys/policy.h>
 #include <sys/sunddi.h>
@@ -69,6 +70,7 @@
 #include "fs/fs_subr.h"
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
+#include <sys/zfs_sa.h>
 #include <sys/dnlc.h>
 #include <sys/zfs_rlock.h>
 #include <sys/extdirent.h>
@@ -176,7 +178,7 @@
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
-	if ((flag & FWRITE) && (zp->z_phys->zp_flags & ZFS_APPENDONLY) &&
+	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & FAPPEND) == 0)) {
 		ZFS_EXIT(zfsvfs);
 		return (EPERM);
@@ -184,8 +186,7 @@
 
 	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
 	    ZTOV(zp)->v_type == VREG &&
-	    !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) &&
-	    zp->z_phys->zp_size > 0) {
+	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
 		if (fs_vscan(*vpp, cr, 0) != 0) {
 			ZFS_EXIT(zfsvfs);
 			return (EACCES);
@@ -223,8 +224,7 @@
 
 	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
 	    ZTOV(zp)->v_type == VREG &&
-	    !(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) &&
-	    zp->z_phys->zp_size > 0)
+	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
 		VERIFY(fs_vscan(vp, cr, 1) == 0);
 
 	ZFS_EXIT(zfsvfs);
@@ -244,7 +244,7 @@
 	int error;
 	boolean_t hole;
 
-	file_sz = zp->z_phys->zp_size;
+	file_sz = zp->z_size;
 	if (noff >= file_sz)  {
 		return (ENXIO);
 	}
@@ -453,7 +453,7 @@
 	ZFS_VERIFY_ZP(zp);
 	os = zfsvfs->z_os;
 
-	if (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED) {
+	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
 		ZFS_EXIT(zfsvfs);
 		return (EACCES);
 	}
@@ -477,7 +477,7 @@
 	/*
 	 * Check for mandatory locks
 	 */
-	if (MANDMODE((mode_t)zp->z_phys->zp_mode)) {
+	if (MANDMODE(zp->z_mode)) {
 		if (error = chklock(vp, FREAD,
 		    uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) {
 			ZFS_EXIT(zfsvfs);
@@ -500,13 +500,13 @@
 	 * If we are reading past end-of-file we can skip
 	 * to the end; but we might still need to set atime.
 	 */
-	if (uio->uio_loffset >= zp->z_phys->zp_size) {
+	if (uio->uio_loffset >= zp->z_size) {
 		error = 0;
 		goto out;
 	}
 
-	ASSERT(uio->uio_loffset < zp->z_phys->zp_size);
-	n = MIN(uio->uio_resid, zp->z_phys->zp_size - uio->uio_loffset);
+	ASSERT(uio->uio_loffset < zp->z_size);
+	n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
 
 	if ((uio->uio_extflg == UIO_XUIO) &&
 	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
@@ -531,8 +531,8 @@
 			 */
 			while (--nblk >= 0) {
 				(void) dmu_xuio_add(xuio,
-				    dmu_request_arcbuf(zp->z_dbuf, blksz),
-				    0, blksz);
+				    dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
+				    blksz), 0, blksz);
 			}
 		}
 	}
@@ -580,6 +580,7 @@
  * Timestamps:
  *	vp - ctime|mtime updated if byte count > 0
  */
+
 /* ARGSUSED */
 static int
 zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
@@ -596,7 +597,6 @@
 	ssize_t		n, nbytes;
 	rl_t		*rl;
 	int		max_blksz = zfsvfs->z_max_blksz;
-	uint64_t	pflags;
 	int		error;
 	arc_buf_t	*abuf;
 	iovec_t		*aiov;
@@ -605,6 +605,9 @@
 	int		iovcnt = uio->uio_iovcnt;
 	iovec_t		*iovp = uio->uio_iov;
 	int		write_eof;
+	int		count = 0;
+	sa_bulk_attr_t	bulk[4];
+	uint64_t	mtime[2], ctime[2];
 
 	/*
 	 * Fasttrack empty write
@@ -619,13 +622,19 @@
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, 8);
+
 	/*
 	 * If immutable or not appending then return EPERM
 	 */
-	pflags = zp->z_phys->zp_flags;
-	if ((pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
-	    ((pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
-	    (uio->uio_loffset < zp->z_phys->zp_size))) {
+	if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
+	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
+	    (uio->uio_loffset < zp->z_size))) {
 		ZFS_EXIT(zfsvfs);
 		return (EPERM);
 	}
@@ -635,7 +644,7 @@
 	/*
 	 * Validate file offset
 	 */
-	woff = ioflag & FAPPEND ? zp->z_phys->zp_size : uio->uio_loffset;
+	woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
 	if (woff < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (EINVAL);
@@ -645,7 +654,7 @@
 	 * Check for mandatory locks before calling zfs_range_lock()
 	 * in order to prevent a deadlock with locks set via fcntl().
 	 */
-	if (MANDMODE((mode_t)zp->z_phys->zp_mode) &&
+	if (MANDMODE((mode_t)zp->z_mode) &&
 	    (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
@@ -678,7 +687,7 @@
 			 * the file block size to increase.
 			 * Note that zp_size cannot change with this lock held.
 			 */
-			woff = zp->z_phys->zp_size;
+			woff = zp->z_size;
 		}
 		uio->uio_loffset = woff;
 	} else {
@@ -700,9 +709,9 @@
 		n = limit - woff;
 
 	/* Will this write extend the file length? */
-	write_eof = (woff + n > zp->z_phys->zp_size);
-
-	end_size = MAX(zp->z_phys->zp_size, woff + n);
+	write_eof = (woff + n > zp->z_size);
+
+	end_size = MAX(zp->z_size, woff + n);
 
 	/*
 	 * Write the file in reasonable size chunks.  Each chunk is written
@@ -713,10 +722,8 @@
 		abuf = NULL;
 		woff = uio->uio_loffset;
 again:
-		if (zfs_usergroup_overquota(zfsvfs,
-		    B_FALSE, zp->z_phys->zp_uid) ||
-		    zfs_usergroup_overquota(zfsvfs,
-		    B_TRUE, zp->z_phys->zp_gid)) {
+		if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
+		    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
 			if (abuf != NULL)
 				dmu_return_arcbuf(abuf);
 			error = EDQUOT;
@@ -735,7 +742,7 @@
 			    aiov->iov_len == arc_buf_size(abuf)));
 			i_iov++;
 		} else if (abuf == NULL && n >= max_blksz &&
-		    woff >= zp->z_phys->zp_size &&
+		    woff >= zp->z_size &&
 		    P2PHASE(woff, max_blksz) == 0 &&
 		    zp->z_blksz == max_blksz) {
 			/*
@@ -747,7 +754,8 @@
 			 */
 			size_t cbytes;
 
-			abuf = dmu_request_arcbuf(zp->z_dbuf, max_blksz);
+			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
+			    max_blksz);
 			ASSERT(abuf != NULL);
 			ASSERT(arc_buf_size(abuf) == max_blksz);
 			if (error = uiocopy(abuf->b_data, max_blksz,
@@ -762,8 +770,9 @@
 		 * Start a transaction.
 		 */
 		tx = dmu_tx_create(zfsvfs->z_os);
-		dmu_tx_hold_bonus(tx, zp->z_id);
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
+		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_NOWAIT);
 		if (error) {
 			if (error == ERESTART) {
@@ -825,7 +834,8 @@
 				xuio_stat_wbuf_copied();
 			} else {
 				ASSERT(xuio || tx_bytes == max_blksz);
-				dmu_assign_arcbuf(zp->z_dbuf, woff, abuf, tx);
+				dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
+				    woff, abuf, tx);
 			}
 			ASSERT(tx_bytes <= uio->uio_resid);
 			uioskip(uio, tx_bytes);
@@ -840,6 +850,8 @@
 		 * partial progress, update the znode and ZIL accordingly.
 		 */
 		if (tx_bytes == 0) {
+			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
+			    (void *)&zp->z_size, sizeof (uint64_t), tx);
 			dmu_tx_commit(tx);
 			ASSERT(error != 0);
 			break;
@@ -853,33 +865,35 @@
 		 * been done, but that would still expose the ISUID/ISGID
 		 * to another app after the partial write is committed.
 		 *
-		 * Note: we don't call zfs_fuid_map_id() here because
-		 * user 0 is not an ephemeral uid.
 		 */
 		mutex_enter(&zp->z_acl_lock);
-		if ((zp->z_phys->zp_mode & (S_IXUSR | (S_IXUSR >> 3) |
+		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
 		    (S_IXUSR >> 6))) != 0 &&
-		    (zp->z_phys->zp_mode & (S_ISUID | S_ISGID)) != 0 &&
+		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
 		    secpolicy_vnode_setid_retain(cr,
-		    (zp->z_phys->zp_mode & S_ISUID) != 0 &&
-		    zp->z_phys->zp_uid == 0) != 0) {
-			zp->z_phys->zp_mode &= ~(S_ISUID | S_ISGID);
+		    (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
+			uint64_t newmode;
+			zp->z_mode &= ~(S_ISUID | S_ISGID);
+			newmode = zp->z_mode;
+			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
+			    (void *)&newmode, sizeof (uint64_t), tx);
 		}
 		mutex_exit(&zp->z_acl_lock);
 
-		/*
-		 * Update time stamp.  NOTE: This marks the bonus buffer as
-		 * dirty, so we don't have to do it again for zp_size.
-		 */
-		zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
+		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
+		    B_TRUE);
 
 		/*
 		 * Update the file size (zp_size) if it has changed;
 		 * account for possible concurrent updates.
 		 */
-		while ((end_size = zp->z_phys->zp_size) < uio->uio_loffset)
-			(void) atomic_cas_64(&zp->z_phys->zp_size, end_size,
+		while ((end_size = zp->z_size) < uio->uio_loffset) {
+			(void) atomic_cas_64(&zp->z_size, end_size,
 			    uio->uio_loffset);
+			ASSERT(error == 0);
+		}
+		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+
 		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
 		dmu_tx_commit(tx);
 
@@ -983,7 +997,7 @@
 	if (buf != NULL) { /* immediate write */
 		zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
 		/* test for truncation needs to be done while range locked */
-		if (offset >= zp->z_phys->zp_size) {
+		if (offset >= zp->z_size) {
 			error = ENOENT;
 		} else {
 			error = dmu_read(os, object, offset, size, buf,
@@ -1010,7 +1024,7 @@
 			zfs_range_unlock(zgd->zgd_rl);
 		}
 		/* test for truncation needs to be done while range locked */
-		if (lr->lr_offset >= zp->z_phys->zp_size)
+		if (lr->lr_offset >= zp->z_size)
 			error = ENOENT;
 #ifdef DEBUG
 		if (zil_fault_io) {
@@ -1132,7 +1146,7 @@
 
 		if (dvp->v_type != VDIR) {
 			return (ENOTDIR);
-		} else if (zdp->z_dbuf == NULL) {
+		} else if (zdp->z_sa_hdl == NULL) {
 			return (EIO);
 		}
 
@@ -1184,7 +1198,7 @@
 		 * We don't allow recursive attributes..
 		 * Maybe someday we will.
 		 */
-		if (zdp->z_phys->zp_flags & ZFS_XATTR) {
+		if (zdp->z_pflags & ZFS_XATTR) {
 			ZFS_EXIT(zfsvfs);
 			return (EINVAL);
 		}
@@ -1277,7 +1291,7 @@
 	ksid_t		*ksid;
 	uid_t		uid;
 	gid_t		gid = crgetgid(cr);
-	zfs_acl_ids_t	acl_ids;
+	zfs_acl_ids_t   acl_ids;
 	boolean_t	fuid_dirtied;
 
 	/*
@@ -1344,6 +1358,7 @@
 			return (error);
 		}
 	}
+
 	if (zp == NULL) {
 		uint64_t txtype;
 
@@ -1359,7 +1374,8 @@
 		 * We only support the creation of regular files in
 		 * extended attribute directories.
 		 */
-		if ((dzp->z_phys->zp_flags & ZFS_XATTR) &&
+
+		if ((dzp->z_pflags & ZFS_XATTR) &&
 		    (vap->va_type != VREG)) {
 			error = EINVAL;
 			goto out;
@@ -1375,15 +1391,19 @@
 		}
 
 		tx = dmu_tx_create(os);
-		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+
+		dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+		    ZFS_SA_BASE_ATTR_SIZE);
+
 		fuid_dirtied = zfsvfs->z_fuid_dirty;
 		if (fuid_dirtied)
 			zfs_fuid_txhold(zfsvfs, tx);
-		dmu_tx_hold_bonus(tx, dzp->z_id);
 		dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
-		if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+		if (!zfsvfs->z_use_sa &&
+		    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-			    0, SPA_MAXBLOCKSIZE);
+			    0, acl_ids.z_aclp->z_acl_bytes);
 		}
 		error = dmu_tx_assign(tx, TXG_NOWAIT);
 		if (error) {
@@ -1398,13 +1418,12 @@
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
-		zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids);
+		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 		if (fuid_dirtied)
 			zfs_fuid_sync(zfsvfs, tx);
 
 		(void) zfs_link_create(dl, zp, tx, ZNEW);
-
 		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
 		if (flag & FIGNORECASE)
 			txtype |= TX_CI;
@@ -1490,6 +1509,9 @@
  *	dvp - ctime|mtime
  *	 vp - ctime (if nlink > 0)
  */
+
+uint64_t null_xattr = 0;
+
 /*ARGSUSED*/
 static int
 zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
@@ -1500,7 +1522,8 @@
 	vnode_t		*vp;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
-	uint64_t	acl_obj, xattr_obj;
+	uint64_t	acl_obj, xattr_obj = 0;
+	uint64_t 	xattr_obj_unlinked = 0;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	boolean_t	may_delete_now, delete_now = FALSE;
@@ -1566,24 +1589,29 @@
 	 */
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
-	dmu_tx_hold_bonus(tx, zp->z_id);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
+	zfs_sa_upgrade_txholds(tx, dzp);
 	if (may_delete_now) {
 		toobig =
-		    zp->z_phys->zp_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT;
+		    zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT;
 		/* if the file is too big, only hold_free a token amount */
 		dmu_tx_hold_free(tx, zp->z_id, 0,
 		    (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
 	}
 
 	/* are there any extended attributes? */
-	if ((xattr_obj = zp->z_phys->zp_xattr) != 0) {
-		/* XXX - do we need this if we are deleting? */
-		dmu_tx_hold_bonus(tx, xattr_obj);
+	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+	    &xattr_obj, sizeof (xattr_obj));
+	if (xattr_obj) {
+		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
+		ASSERT3U(error, ==, 0);
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 	}
 
 	/* are there any additional acls */
-	if ((acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj) != 0 &&
-	    may_delete_now)
+	if ((acl_obj = ZFS_EXTERNAL_ACL(zp)) != 0 && may_delete_now)
 		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
 
 	/* charge as an update -- would be nice not to charge at all */
@@ -1616,26 +1644,37 @@
 	}
 
 	if (unlinked) {
+
 		mutex_enter(&vp->v_lock);
+
+		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
+		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
 		delete_now = may_delete_now && !toobig &&
 		    vp->v_count == 1 && !vn_has_cached_data(vp) &&
-		    zp->z_phys->zp_xattr == xattr_obj &&
-		    zp->z_phys->zp_acl.z_acl_extern_obj == acl_obj;
+		    xattr_obj == xattr_obj_unlinked && ZFS_EXTERNAL_ACL(zp) ==
+		    acl_obj;
 		mutex_exit(&vp->v_lock);
 	}
 
 	if (delete_now) {
-		if (zp->z_phys->zp_xattr) {
-			error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
-			ASSERT3U(error, ==, 0);
-			ASSERT3U(xzp->z_phys->zp_links, ==, 2);
-			dmu_buf_will_dirty(xzp->z_dbuf, tx);
+		if (xattr_obj_unlinked) {
+			ASSERT3U(xzp->z_links, ==, 2);
 			mutex_enter(&xzp->z_lock);
 			xzp->z_unlinked = 1;
-			xzp->z_phys->zp_links = 0;
+			xzp->z_links = 0;
+			error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+			    &xzp->z_links, sizeof (xzp->z_links), tx);
+			ASSERT3U(error,  ==,  0);
 			mutex_exit(&xzp->z_lock);
 			zfs_unlinked_add(xzp, tx);
-			zp->z_phys->zp_xattr = 0; /* probably unnecessary */
+			if (zp->z_is_sa)
+				error = sa_remove(zp->z_sa_hdl,
+				    SA_ZPL_XATTR(zfsvfs), tx);
+			else
+				error = sa_update(zp->z_sa_hdl,
+				    SA_ZPL_XATTR(zfsvfs), &null_xattr,
+				    sizeof (uint64_t), tx);
+			ASSERT3U(error, ==, 0);
 		}
 		mutex_enter(&zp->z_lock);
 		mutex_enter(&vp->v_lock);
@@ -1707,7 +1746,7 @@
 	ksid_t		*ksid;
 	uid_t		uid;
 	gid_t		gid = crgetgid(cr);
-	zfs_acl_ids_t	acl_ids;
+	zfs_acl_ids_t   acl_ids;
 	boolean_t	fuid_dirtied;
 
 	ASSERT(vap->va_type == VDIR);
@@ -1731,7 +1770,7 @@
 	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
-	if (dzp->z_phys->zp_flags & ZFS_XATTR) {
+	if (dzp->z_pflags & ZFS_XATTR) {
 		ZFS_EXIT(zfsvfs);
 		return (EINVAL);
 	}
@@ -1791,9 +1830,14 @@
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
-	if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE)
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-		    0, SPA_MAXBLOCKSIZE);
+	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+		    acl_ids.z_aclp->z_acl_bytes);
+	}
+
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE);
+
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
@@ -1811,10 +1855,11 @@
 	/*
 	 * Create new node.
 	 */
-	zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids);
+	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
+
 	/*
 	 * Now put new name in parent dir.
 	 */
@@ -1829,6 +1874,7 @@
 	    acl_ids.z_fuidp, vap);
 
 	zfs_acl_ids_free(&acl_ids);
+
 	dmu_tx_commit(tx);
 
 	zfs_dirent_unlock(dl);
@@ -1920,8 +1966,10 @@
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
-	dmu_tx_hold_bonus(tx, zp->z_id);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+	zfs_sa_upgrade_txholds(tx, zp);
+	zfs_sa_upgrade_txholds(tx, dzp);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
 		rw_exit(&zp->z_parent_lock);
@@ -2003,6 +2051,7 @@
 	zap_attribute_t	zap;
 	uint_t		bytes_wanted;
 	uint64_t	offset; /* must be unsigned; checks for < 1 */
+	uint64_t	parent;
 	int		local_eof;
 	int		outcount;
 	int		error;
@@ -2012,6 +2061,12 @@
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
+	    &parent, sizeof (parent))) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
 	/*
 	 * If we are not given an eof variable,
 	 * use a local one.
@@ -2099,7 +2154,7 @@
 		} else if (offset == 1) {
 			(void) strcpy(zap.za_name, "..");
 			zap.za_normalization_conflict = 0;
-			objnum = zp->z_phys->zp_parent;
+			objnum = parent;
 		} else if (offset == 2 && zfs_show_ctldir(zp)) {
 			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
 			zap.za_normalization_conflict = 0;
@@ -2293,24 +2348,32 @@
 {
 	znode_t *zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
-	znode_phys_t *pzp;
 	int	error = 0;
 	uint64_t links;
+	uint64_t mtime[2], ctime[2];
 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
 	xoptattr_t *xoap = NULL;
 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
+	sa_bulk_attr_t bulk[2];
+	int count = 0;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
-	pzp = zp->z_phys;
+
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+
+	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
 
 	/*
 	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
 	 * Also, if we are the owner don't bother, since owner should
 	 * always be allowed to read basic attributes of file.
 	 */
-	if (!(pzp->zp_flags & ZFS_ACL_TRIVIAL) &&
-	    (pzp->zp_uid != crgetuid(cr))) {
+	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && (zp->z_uid != crgetuid(cr))) {
 		if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
 		    skipaclchk, cr)) {
 			ZFS_EXIT(zfsvfs);
@@ -2325,16 +2388,17 @@
 
 	mutex_enter(&zp->z_lock);
 	vap->va_type = vp->v_type;
-	vap->va_mode = pzp->zp_mode & MODEMASK;
-	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
+	vap->va_mode = zp->z_mode & MODEMASK;
+	vap->va_uid = zp->z_uid;
+	vap->va_gid = zp->z_gid;
 	vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev;
 	vap->va_nodeid = zp->z_id;
 	if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp))
-		links = pzp->zp_links + 1;
+		links = zp->z_links + 1;
 	else
-		links = pzp->zp_links;
+		links = zp->z_links;
 	vap->va_nlink = MIN(links, UINT32_MAX);	/* nlink_t limit! */
-	vap->va_size = pzp->zp_size;
+	vap->va_size = zp->z_size;
 	vap->va_rdev = vp->v_rdev;
 	vap->va_seq = zp->z_seq;
 
@@ -2345,115 +2409,97 @@
 	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
 		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
 			xoap->xoa_archive =
-			    ((pzp->zp_flags & ZFS_ARCHIVE) != 0);
+			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
 			XVA_SET_RTN(xvap, XAT_ARCHIVE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
 			xoap->xoa_readonly =
-			    ((pzp->zp_flags & ZFS_READONLY) != 0);
+			    ((zp->z_pflags & ZFS_READONLY) != 0);
 			XVA_SET_RTN(xvap, XAT_READONLY);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
 			xoap->xoa_system =
-			    ((pzp->zp_flags & ZFS_SYSTEM) != 0);
+			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
 			XVA_SET_RTN(xvap, XAT_SYSTEM);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
 			xoap->xoa_hidden =
-			    ((pzp->zp_flags & ZFS_HIDDEN) != 0);
+			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
 			XVA_SET_RTN(xvap, XAT_HIDDEN);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
 			xoap->xoa_nounlink =
-			    ((pzp->zp_flags & ZFS_NOUNLINK) != 0);
+			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
 			XVA_SET_RTN(xvap, XAT_NOUNLINK);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
 			xoap->xoa_immutable =
-			    ((pzp->zp_flags & ZFS_IMMUTABLE) != 0);
+			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
 			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
 			xoap->xoa_appendonly =
-			    ((pzp->zp_flags & ZFS_APPENDONLY) != 0);
+			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
 			XVA_SET_RTN(xvap, XAT_APPENDONLY);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
 			xoap->xoa_nodump =
-			    ((pzp->zp_flags & ZFS_NODUMP) != 0);
+			    ((zp->z_pflags & ZFS_NODUMP) != 0);
 			XVA_SET_RTN(xvap, XAT_NODUMP);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
 			xoap->xoa_opaque =
-			    ((pzp->zp_flags & ZFS_OPAQUE) != 0);
+			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
 			XVA_SET_RTN(xvap, XAT_OPAQUE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
 			xoap->xoa_av_quarantined =
-			    ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0);
+			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
 			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
 			xoap->xoa_av_modified =
-			    ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0);
+			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
 			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
-		    vp->v_type == VREG &&
-		    (pzp->zp_flags & ZFS_BONUS_SCANSTAMP)) {
-			size_t len;
-			dmu_object_info_t doi;
-
-			/*
-			 * Only VREG files have anti-virus scanstamps, so we
-			 * won't conflict with symlinks in the bonus buffer.
-			 */
-			dmu_object_info_from_db(zp->z_dbuf, &doi);
-			len = sizeof (xoap->xoa_av_scanstamp) +
-			    sizeof (znode_phys_t);
-			if (len <= doi.doi_bonus_size) {
-				/*
-				 * pzp points to the start of the
-				 * znode_phys_t. pzp + 1 points to the
-				 * first byte after the znode_phys_t.
-				 */
-				(void) memcpy(xoap->xoa_av_scanstamp,
-				    pzp + 1,
-				    sizeof (xoap->xoa_av_scanstamp));
-				XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
-			}
+		    vp->v_type == VREG) {
+			zfs_sa_get_scanstamp(zp, xvap);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
-			ZFS_TIME_DECODE(&xoap->xoa_createtime, pzp->zp_crtime);
+			uint64_t times[2];
+
+			(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
+			    times, sizeof (times));
+			ZFS_TIME_DECODE(&xoap->xoa_createtime, times);
 			XVA_SET_RTN(xvap, XAT_CREATETIME);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
-			xoap->xoa_reparse =
-			    ((pzp->zp_flags & ZFS_REPARSE) != 0);
+			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
 			XVA_SET_RTN(xvap, XAT_REPARSE);
 		}
 	}
 
-	ZFS_TIME_DECODE(&vap->va_atime, pzp->zp_atime);
-	ZFS_TIME_DECODE(&vap->va_mtime, pzp->zp_mtime);
-	ZFS_TIME_DECODE(&vap->va_ctime, pzp->zp_ctime);
+	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
+	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
+	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
 
 	mutex_exit(&zp->z_lock);
 
-	dmu_object_size_from_db(zp->z_dbuf, &vap->va_blksize, &vap->va_nblocks);
+	sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks);
 
 	if (zp->z_blksz == 0) {
 		/*
@@ -2490,7 +2536,6 @@
 	caller_context_t *ct)
 {
 	znode_t		*zp = VTOZ(vp);
-	znode_phys_t	*pzp;
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	zilog_t		*zilog;
 	dmu_tx_t	*tx;
@@ -2501,15 +2546,19 @@
 	int		trim_mask = 0;
 	uint64_t	new_mode;
 	uint64_t	new_uid, new_gid;
+	uint64_t	xattr_obj = 0;
+	uint64_t	mtime[2], ctime[2];
 	znode_t		*attrzp;
 	int		need_policy = FALSE;
-	int		err;
+	int		err, err2;
 	zfs_fuid_info_t *fuidp = NULL;
 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
 	xoptattr_t	*xoap;
 	zfs_acl_t	*aclp = NULL;
 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
-	boolean_t fuid_dirtied = B_FALSE;
+	boolean_t	fuid_dirtied = B_FALSE;
+	sa_bulk_attr_t	bulk[7], xattr_bulk[7];
+	int		count = 0, xattr_count = 0;
 
 	if (mask == 0)
 		return (0);
@@ -2520,7 +2569,6 @@
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
-	pzp = zp->z_phys;
 	zilog = zfsvfs->z_log;
 
 	/*
@@ -2557,14 +2605,14 @@
 	/*
 	 * Immutable files can only alter immutable bit and atime
 	 */
-	if ((pzp->zp_flags & ZFS_IMMUTABLE) &&
+	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
 	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
 	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
 		ZFS_EXIT(zfsvfs);
 		return (EPERM);
 	}
 
-	if ((mask & AT_SIZE) && (pzp->zp_flags & ZFS_READONLY)) {
+	if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
 		ZFS_EXIT(zfsvfs);
 		return (EPERM);
 	}
@@ -2621,9 +2669,10 @@
 	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
 	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
 	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
-	    XVA_ISSET_REQ(xvap, XAT_SYSTEM))))
+	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
 		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
 		    skipaclchk, cr);
+	}
 
 	if (mask & (AT_UID|AT_GID)) {
 		int	idmask = (mask & (AT_UID|AT_GID));
@@ -2636,7 +2685,7 @@
 		 */
 
 		if (!(mask & AT_MODE))
-			vap->va_mode = pzp->zp_mode;
+			vap->va_mode = zp->z_mode;
 
 		/*
 		 * Take ownership or chgrp to group we are a member of
@@ -2674,8 +2723,9 @@
 	}
 
 	mutex_enter(&zp->z_lock);
-	oldva.va_mode = pzp->zp_mode;
-	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
+	oldva.va_mode = zp->z_mode;
+	oldva.va_uid = zp->z_uid;
+	oldva.va_gid = zp->z_gid;
 	if (mask & AT_XVATTR) {
 		/*
 		 * Update xvattr mask to include only those attributes
@@ -2686,7 +2736,7 @@
 		 */
 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
 			if (xoap->xoa_appendonly !=
-			    ((pzp->zp_flags & ZFS_APPENDONLY) != 0)) {
+			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
@@ -2696,7 +2746,7 @@
 
 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
 			if (xoap->xoa_nounlink !=
-			    ((pzp->zp_flags & ZFS_NOUNLINK) != 0)) {
+			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
@@ -2706,7 +2756,7 @@
 
 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
 			if (xoap->xoa_immutable !=
-			    ((pzp->zp_flags & ZFS_IMMUTABLE) != 0)) {
+			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
@@ -2716,7 +2766,7 @@
 
 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
 			if (xoap->xoa_nodump !=
-			    ((pzp->zp_flags & ZFS_NODUMP) != 0)) {
+			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_NODUMP);
@@ -2726,7 +2776,7 @@
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
 			if (xoap->xoa_av_modified !=
-			    ((pzp->zp_flags & ZFS_AV_MODIFIED) != 0)) {
+			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
@@ -2738,7 +2788,7 @@
 			if ((vp->v_type != VREG &&
 			    xoap->xoa_av_quarantined) ||
 			    xoap->xoa_av_quarantined !=
-			    ((pzp->zp_flags & ZFS_AV_QUARANTINED) != 0)) {
+			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
@@ -2805,78 +2855,83 @@
 	 */
 	mask = vap->va_mask;
 
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_bonus(tx, zp->z_id);
-
-	if (mask & AT_MODE) {
-		uint64_t pmode = pzp->zp_mode;
-
-		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
-
-		if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
-			goto out;
-		if (pzp->zp_acl.z_acl_extern_obj) {
-			/* Are we upgrading ACL from old V0 format to new V1 */
-			if (zfsvfs->z_version <= ZPL_VERSION_FUID &&
-			    pzp->zp_acl.z_acl_version ==
-			    ZFS_ACL_VERSION_INITIAL) {
-				dmu_tx_hold_free(tx,
-				    pzp->zp_acl.z_acl_extern_obj, 0,
-				    DMU_OBJECT_END);
-				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-				    0, aclp->z_acl_bytes);
-			} else {
-				dmu_tx_hold_write(tx,
-				    pzp->zp_acl.z_acl_extern_obj, 0,
-				    aclp->z_acl_bytes);
-			}
-		} else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-			    0, aclp->z_acl_bytes);
-		}
-	}
-
-	if (mask & (AT_UID | AT_GID)) {
-		if (pzp->zp_xattr) {
-			err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp);
+	if ((mask & (AT_UID | AT_GID))) {
+		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xattr_obj,
+		    sizeof (xattr_obj));
+
+		if (xattr_obj) {
+			err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
 			if (err)
-				goto out;
-			dmu_tx_hold_bonus(tx, attrzp->z_id);
+				goto out2;
 		}
 		if (mask & AT_UID) {
 			new_uid = zfs_fuid_create(zfsvfs,
 			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
-			if (new_uid != pzp->zp_uid &&
-			    zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) {
+			if (vap->va_uid != zp->z_uid &&
+			    zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) {
 				err = EDQUOT;
-				goto out;
+				goto out2;
 			}
 		}
 
 		if (mask & AT_GID) {
 			new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
 			    cr, ZFS_GROUP, &fuidp);
-			if (new_gid != pzp->zp_gid &&
-			    zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) {
+			if (new_gid != zp->z_gid &&
+			    zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) {
 				err = EDQUOT;
-				goto out;
-			}
-		}
-		fuid_dirtied = zfsvfs->z_fuid_dirty;
-		if (fuid_dirtied) {
-			if (zfsvfs->z_fuid_obj == 0) {
-				dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
-				dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-				    FUID_SIZE_ESTIMATE(zfsvfs));
-				dmu_tx_hold_zap(tx, MASTER_NODE_OBJ,
-				    FALSE, NULL);
-			} else {
-				dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
-				dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
-				    FUID_SIZE_ESTIMATE(zfsvfs));
+				goto out2;
 			}
 		}
 	}
+	tx = dmu_tx_create(zfsvfs->z_os);
+
+	if (mask & AT_MODE) {
+		uint64_t pmode = zp->z_mode;
+		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
+
+		if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
+			goto out;
+
+		if (!zp->z_is_sa && ZFS_EXTERNAL_ACL(zp)) {
+			/*
+			 * Are we upgrading ACL from old V0 format
+			 * to V1 format?
+			 */
+			if (zfsvfs->z_version <= ZPL_VERSION_FUID &&
+			    ZNODE_ACL_VERSION(zp) ==
+			    ZFS_ACL_VERSION_INITIAL) {
+				dmu_tx_hold_free(tx,
+				    ZFS_EXTERNAL_ACL(zp), 0,
+				    DMU_OBJECT_END);
+				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+				    0, aclp->z_acl_bytes);
+			} else {
+				dmu_tx_hold_write(tx, ZFS_EXTERNAL_ACL(zp), 0,
+				    aclp->z_acl_bytes);
+			}
+		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+			    0, aclp->z_acl_bytes);
+		}
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+	} else {
+		if ((mask & AT_XVATTR) &&
+		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
+		else
+			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	}
+
+	if (attrzp) {
+		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
+	}
+
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+
+	zfs_sa_upgrade_txholds(tx, zp);
 
 	err = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (err) {
@@ -2885,8 +2940,7 @@
 		goto out;
 	}
 
-	dmu_buf_will_dirty(zp->z_dbuf, tx);
-
+	count = 0;
 	/*
 	 * Set each attribute requested.
 	 * We group settings according to the locks they need to acquire.
@@ -2897,9 +2951,38 @@
 
 	mutex_enter(&zp->z_lock);
 
+	if (attrzp)
+		mutex_enter(&attrzp->z_lock);
+
+	if (mask & AT_UID) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+		    &new_uid, sizeof (new_uid));
+		zp->z_uid = zfs_fuid_map_id(zfsvfs, new_uid, cr, ZFS_OWNER);
+		if (attrzp) {
+			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+			    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
+			    sizeof (new_uid));
+			attrzp->z_gid = zp->z_uid;
+		}
+	}
+
+	if (mask & AT_GID) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+		    &new_gid, sizeof (new_gid));
+		zp->z_gid = zfs_fuid_map_id(zfsvfs, new_gid, cr, ZFS_GROUP);
+		if (attrzp) {
+			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+			    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
+			    sizeof (new_gid));
+			attrzp->z_gid = zp->z_gid;
+		}
+	}
+
 	if (mask & AT_MODE) {
 		mutex_enter(&zp->z_acl_lock);
-		zp->z_phys->zp_mode = new_mode;
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+		    &new_mode, sizeof (new_mode));
+		zp->z_mode = new_mode;
 		err = zfs_aclset_common(zp, aclp, cr, tx);
 		ASSERT3U(err, ==, 0);
 		zp->z_acl_cached = aclp;
@@ -2908,34 +2991,42 @@
 	}
 
 	if (attrzp)
-		mutex_enter(&attrzp->z_lock);
-
-	if (mask & AT_UID) {
-		pzp->zp_uid = new_uid;
-		if (attrzp)
-			attrzp->z_phys->zp_uid = new_uid;
+		mutex_exit(&attrzp->z_lock);
+
+	if (mask & AT_ATIME) {
+		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+		    &zp->z_atime, sizeof (zp->z_atime));
 	}
 
-	if (mask & AT_GID) {
-		pzp->zp_gid = new_gid;
-		if (attrzp)
-			attrzp->z_phys->zp_gid = new_gid;
+	if (mask & AT_MTIME) {
+		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+		    mtime, sizeof (mtime));
 	}
 
-	if (attrzp)
-		mutex_exit(&attrzp->z_lock);
-
-	if (mask & AT_ATIME)
-		ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime);
-
-	if (mask & AT_MTIME)
-		ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime);
-
 	/* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
-	if (mask & AT_SIZE)
-		zfs_time_stamper_locked(zp, CONTENT_MODIFIED, tx);
-	else if (mask != 0)
-		zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
+	if (mask & AT_SIZE && !(mask & AT_MTIME)) {
+		if (!(mask & AT_MTIME))
+			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
+			    NULL, mtime, sizeof (mtime));
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    &ctime, sizeof (ctime));
+		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
+		    B_TRUE);
+	} else if (mask != 0) {
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    &ctime, sizeof (ctime));
+		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
+		    B_TRUE);
+		if (attrzp) {
+			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+			    SA_ZPL_CTIME(zfsvfs), NULL,
+			    &ctime, sizeof (ctime));
+			zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
+			    mtime, ctime, B_TRUE);
+		}
+	}
 	/*
 	 * Do this after setting timestamps to prevent timestamp
 	 * update from toggling bit
@@ -2967,20 +3058,12 @@
 			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
 		}
 
-		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
-			size_t len;
-			dmu_object_info_t doi;
-
+		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
 			ASSERT(vp->v_type == VREG);
 
-			/* Grow the bonus buffer if necessary. */
-			dmu_object_info_from_db(zp->z_dbuf, &doi);
-			len = sizeof (xoap->xoa_av_scanstamp) +
-			    sizeof (znode_phys_t);
-			if (len > doi.doi_bonus_size)
-				VERIFY(dmu_set_bonus(zp->z_dbuf, len, tx) == 0);
-		}
-		zfs_xvattr_set(zp, xvap);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+		    &zp->z_pflags, sizeof (zp->z_pflags));
+		zfs_xvattr_set(zp, xvap, tx);
 	}
 
 	if (fuid_dirtied)
@@ -2992,9 +3075,14 @@
 	mutex_exit(&zp->z_lock);
 
 out:
+	if (err == 0 && attrzp) {
+		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
+		    xattr_count, tx);
+		ASSERT(err2 == 0);
+	}
+
 	if (attrzp)
 		VN_RELE(ZTOV(attrzp));
-
 	if (aclp)
 		zfs_acl_free(aclp);
 
@@ -3003,14 +3091,17 @@
 		fuidp = NULL;
 	}
 
-	if (err)
+	if (err) {
 		dmu_tx_abort(tx);
-	else
+		if (err == ERESTART)
+			goto top;
+	} else {
+		err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		dmu_tx_commit(tx);
-
-	if (err == ERESTART)
-		goto top;
-
+	}
+
+
+out2:
 	ZFS_EXIT(zfsvfs);
 	return (err);
 }
@@ -3050,7 +3141,7 @@
 	zfs_zlock_t	*zl;
 	znode_t		*zp = tdzp;
 	uint64_t	rootid = zp->z_zfsvfs->z_root;
-	uint64_t	*oidp = &zp->z_id;
+	uint64_t	oidp = zp->z_id;
 	krwlock_t	*rwlp = &szp->z_parent_lock;
 	krw_t		rw = RW_WRITER;
 
@@ -3072,7 +3163,7 @@
 				zfs_rename_unlock(&zl);
 				*zlpp = NULL;
 				zp = tdzp;
-				oidp = &zp->z_id;
+				oidp = zp->z_id;
 				rwlp = &szp->z_parent_lock;
 				rw = RW_WRITER;
 				continue;
@@ -3090,19 +3181,20 @@
 		zl->zl_next = *zlpp;
 		*zlpp = zl;
 
-		if (*oidp == szp->z_id)		/* We're a descendant of szp */
+		if (oidp == szp->z_id)		/* We're a descendant of szp */
 			return (EINVAL);
 
-		if (*oidp == rootid)		/* We've hit the top */
+		if (oidp == rootid)		/* We've hit the top */
 			return (0);
 
 		if (rw == RW_READER) {		/* i.e. not the first pass */
-			int error = zfs_zget(zp->z_zfsvfs, *oidp, &zp);
+			int error = zfs_zget(zp->z_zfsvfs, oidp, &zp);
 			if (error)
 				return (error);
 			zl->zl_znode = zp;
 		}
-		oidp = &zp->z_phys->zp_parent;
+		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs),
+		    &oidp, sizeof (oidp));
 		rwlp = &zp->z_parent_lock;
 		rw = RW_READER;
 
@@ -3182,8 +3274,7 @@
 	 * by renaming a linked file into/outof an attribute directory.
 	 * See the comment in zfs_link() for why this is considered bad.
 	 */
-	if ((tdzp->z_phys->zp_flags & ZFS_XATTR) !=
-	    (sdzp->z_phys->zp_flags & ZFS_XATTR)) {
+	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
 		ZFS_EXIT(zfsvfs);
 		return (EINVAL);
 	}
@@ -3363,14 +3454,20 @@
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_bonus(tx, szp->z_id);	/* nlink changes */
-	dmu_tx_hold_bonus(tx, sdzp->z_id);	/* nlink changes */
+	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
+	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
-	if (sdzp != tdzp)
-		dmu_tx_hold_bonus(tx, tdzp->z_id);	/* nlink changes */
-	if (tzp)
-		dmu_tx_hold_bonus(tx, tzp->z_id);	/* parent changes */
+	if (sdzp != tdzp) {
+		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, tdzp);
+	}
+	if (tzp) {
+		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, tzp);
+	}
+
+	zfs_sa_upgrade_txholds(tx, szp);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
@@ -3401,10 +3498,14 @@
 	if (error == 0) {
 		error = zfs_link_create(tdl, szp, tx, ZRENAMING);
 		if (error == 0) {
-			szp->z_phys->zp_flags |= ZFS_AV_MODIFIED;
+			szp->z_pflags |= ZFS_AV_MODIFIED;
+
+			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+			ASSERT3U(error, ==, 0);
 
 			error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
-			ASSERT(error == 0);
+			ASSERT3U(error, ==, 0);
 
 			zfs_log_rename(zilog, tx,
 			    TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0),
@@ -3462,11 +3563,12 @@
 	dmu_tx_t	*tx;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
-	int		len = strlen(link);
+	uint64_t	len = strlen(link);
 	int		error;
 	int		zflg = ZNEW;
 	zfs_acl_ids_t	acl_ids;
 	boolean_t	fuid_dirtied;
+	uint64_t	txtype = TX_SYMLINK;
 
 	ASSERT(vap->va_type == VLNK);
 
@@ -3511,10 +3613,14 @@
 	tx = dmu_tx_create(zfsvfs->z_os);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
-	dmu_tx_hold_bonus(tx, dzp->z_id);
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
-	if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE)
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE);
+	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+	    ZFS_SA_BASE_ATTR_SIZE + len);
+	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
+	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+		    acl_ids.z_aclp->z_acl_bytes);
+	}
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
@@ -3531,50 +3637,33 @@
 		return (error);
 	}
 
-	dmu_buf_will_dirty(dzp->z_dbuf, tx);
-
 	/*
 	 * Create a new object for the symlink.
-	 * Put the link content into bonus buffer if it will fit;
-	 * otherwise, store it just like any other file data.
+	 * for version 4 ZPL datsets the symlink will be an SA attribute
 	 */
-	if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) {
-		zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, &acl_ids);
-		if (len != 0)
-			bcopy(link, zp->z_phys + 1, len);
-	} else {
-		dmu_buf_t *dbp;
-
-		zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids);
-
-		if (fuid_dirtied)
-			zfs_fuid_sync(zfsvfs, tx);
-		/*
-		 * Nothing can access the znode yet so no locking needed
-		 * for growing the znode's blocksize.
-		 */
-		zfs_grow_blocksize(zp, len, tx);
-
-		VERIFY(0 == dmu_buf_hold(zfsvfs->z_os,
-		    zp->z_id, 0, FTAG, &dbp));
-		dmu_buf_will_dirty(dbp, tx);
-
-		ASSERT3U(len, <=, dbp->db_size);
-		bcopy(link, dbp->db_data, len);
-		dmu_buf_rele(dbp, FTAG);
-	}
-	zp->z_phys->zp_size = len;
-
+
+	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+
+	if (zp->z_is_sa)
+		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
+		    link, len, tx);
+	else
+		zfs_sa_symlink(zp, link, len, tx);
+
+	zp->z_size = len;
+	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
+	    &zp->z_size, sizeof (zp->z_size), tx);
 	/*
 	 * Insert the new object into the directory.
 	 */
 	(void) zfs_link_create(dl, zp, tx, ZNEW);
-	if (error == 0) {
-		uint64_t txtype = TX_SYMLINK;
-		if (flags & FIGNORECASE)
-			txtype |= TX_CI;
-		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
-	}
+
+	if (flags & FIGNORECASE)
+		txtype |= TX_CI;
+	zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
 
 	zfs_acl_ids_free(&acl_ids);
 
@@ -3611,29 +3700,19 @@
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
-	size_t		bufsz;
 	int		error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
-	bufsz = (size_t)zp->z_phys->zp_size;
-	if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) {
-		error = uiomove(zp->z_phys + 1,
-		    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
-	} else {
-		dmu_buf_t *dbp;
-		error = dmu_buf_hold(zfsvfs->z_os, zp->z_id, 0, FTAG, &dbp);
-		if (error) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-		error = uiomove(dbp->db_data,
-		    MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio);
-		dmu_buf_rele(dbp, FTAG);
-	}
+	if (zp->z_is_sa)
+		error = sa_lookup_uio(zp->z_sa_hdl,
+		    SA_ZPL_SYMLINK(zfsvfs), uio);
+	else
+		error = zfs_sa_readlink(zp, uio);
 
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
+
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
@@ -3668,7 +3747,6 @@
 	vnode_t		*realvp;
 	int		error;
 	int		zf = ZNEW;
-	uid_t		owner;
 
 	ASSERT(tdvp->v_type == VDIR);
 
@@ -3701,8 +3779,7 @@
 	 * into "normal" file space in order to circumvent restrictions
 	 * imposed in attribute space.
 	 */
-	if ((szp->z_phys->zp_flags & ZFS_XATTR) !=
-	    (dzp->z_phys->zp_flags & ZFS_XATTR)) {
+	if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
 		ZFS_EXIT(zfsvfs);
 		return (EINVAL);
 	}
@@ -3716,8 +3793,7 @@
 		return (EPERM);
 	}
 
-	owner = zfs_fuid_map_id(zfsvfs, szp->z_phys->zp_uid, cr, ZFS_OWNER);
-	if (owner != crgetuid(cr) &&
+	if (szp->z_uid != crgetuid(cr) &&
 	    secpolicy_basic_link(cr) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (EPERM);
@@ -3738,8 +3814,10 @@
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_bonus(tx, szp->z_id);
+	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
+	zfs_sa_upgrade_txholds(tx, szp);
+	zfs_sa_upgrade_txholds(tx, dzp);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
@@ -3815,10 +3893,8 @@
 	dmu_tx_t	*tx;
 	u_offset_t	off, koff;
 	size_t		len, klen;
-	uint64_t	filesz;
 	int		err;
 
-	filesz = zp->z_phys->zp_size;
 	off = pp->p_offset;
 	len = PAGESIZE;
 	/*
@@ -3826,12 +3902,12 @@
 	 * multiple pages so that we write a full block (thus avoiding
 	 * a read-modify-write).
 	 */
-	if (off < filesz && zp->z_blksz > PAGESIZE) {
+	if (off < zp->z_size && zp->z_blksz > PAGESIZE) {
 		klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE);
 		koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0;
-		ASSERT(koff <= filesz);
-		if (koff + klen > filesz)
-			klen = P2ROUNDUP(filesz - koff, (uint64_t)PAGESIZE);
+		ASSERT(koff <= zp->z_size);
+		if (koff + klen > zp->z_size)
+			klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE);
 		pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags);
 	}
 	ASSERT3U(btop(len), ==, btopr(len));
@@ -3839,30 +3915,32 @@
 	/*
 	 * Can't push pages past end-of-file.
 	 */
-	if (off >= filesz) {
+	if (off >= zp->z_size) {
 		/* ignore all pages */
 		err = 0;
 		goto out;
-	} else if (off + len > filesz) {
-		int npages = btopr(filesz - off);
+	} else if (off + len > zp->z_size) {
+		int npages = btopr(zp->z_size - off);
 		page_t *trunc;
 
 		page_list_break(&pp, &trunc, npages);
 		/* ignore pages past end of file */
 		if (trunc)
 			pvn_write_done(trunc, flags);
-		len = filesz - off;
+		len = zp->z_size - off;
 	}
 
-	if (zfs_usergroup_overquota(zfsvfs, B_FALSE, zp->z_phys->zp_uid) ||
-	    zfs_usergroup_overquota(zfsvfs, B_TRUE, zp->z_phys->zp_gid)) {
+	if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
+	    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
 		err = EDQUOT;
 		goto out;
 	}
 top:
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_write(tx, zp->z_id, off, len);
-	dmu_tx_hold_bonus(tx, zp->z_id);
+
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
 	err = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (err != 0) {
 		if (err == ERESTART) {
@@ -3884,7 +3962,16 @@
 	}
 
 	if (err == 0) {
-		zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
+		uint64_t mtime[2], ctime[2];
+		sa_bulk_attr_t bulk[2];
+		int count = 0;
+
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+		    &mtime, 16);
+		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+		    &ctime, 16);
+		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
+		    B_TRUE);
 		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
 	}
 	dmu_tx_commit(tx);
@@ -3960,14 +4047,14 @@
 	}
 	rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER);
 
-	if (off > zp->z_phys->zp_size) {
+	if (off > zp->z_size) {
 		/* past end of file */
 		zfs_range_unlock(rl);
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 
-	len = MIN(io_len, P2ROUNDUP(zp->z_phys->zp_size, PAGESIZE) - io_off);
+	len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off);
 
 	for (off = io_off; io_off < off + len; io_off += io_len) {
 		if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
@@ -4008,7 +4095,7 @@
 	int error;
 
 	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
-	if (zp->z_dbuf == NULL) {
+	if (zp->z_sa_hdl == NULL) {
 		/*
 		 * The fs has been unmounted, or we did a
 		 * suspend/resume and this file no longer exists.
@@ -4041,13 +4128,15 @@
 	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
 
-		dmu_tx_hold_bonus(tx, zp->z_id);
+		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 		} else {
-			dmu_buf_will_dirty(zp->z_dbuf, tx);
 			mutex_enter(&zp->z_lock);
+			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
+			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
 			zp->z_atime_dirty = 0;
 			mutex_exit(&zp->z_lock);
 			dmu_tx_commit(tx);
@@ -4099,7 +4188,7 @@
 	 * return an error, but we don't worry about races between this
 	 * function and zfs_map().
 	 */
-	if (zp->z_mapcnt > 0 && MANDMODE((mode_t)zp->z_phys->zp_mode)) {
+	if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) {
 		ZFS_EXIT(zfsvfs);
 		return (EAGAIN);
 	}
@@ -4312,15 +4401,14 @@
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
-	if ((prot & PROT_WRITE) &&
-	    (zp->z_phys->zp_flags & (ZFS_IMMUTABLE | ZFS_READONLY |
-	    ZFS_APPENDONLY))) {
+	if ((prot & PROT_WRITE) && (zp->z_pflags &
+	    (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
 		ZFS_EXIT(zfsvfs);
 		return (EPERM);
 	}
 
 	if ((prot & (PROT_READ | PROT_EXEC)) &&
-	    (zp->z_phys->zp_flags & ZFS_AV_QUARANTINED)) {
+	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
 		ZFS_EXIT(zfsvfs);
 		return (EACCES);
 	}
@@ -4343,7 +4431,7 @@
 	/*
 	 * If file is locked, disallow mapping.
 	 */
-	if (MANDMODE((mode_t)zp->z_phys->zp_mode) && vn_has_flocks(vp)) {
+	if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) {
 		ZFS_EXIT(zfsvfs);
 		return (EAGAIN);
 	}
@@ -4489,13 +4577,19 @@
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	uint32_t	gen;
+	uint64_t	gen64;
 	uint64_t	object = zp->z_id;
 	zfid_short_t	*zfid;
-	int		size, i;
+	int		size, i, error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
-	gen = (uint32_t)zp->z_gen;
+
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
+	    &gen64, sizeof (uint64_t))) != 0)
+		return (error);
+
+	gen = (uint32_t)gen64;
 
 	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
 	if (fidp->fid_len < size) {
@@ -4713,21 +4807,24 @@
 		 */
 		if (preamble) {
 			/* data begins in the middle of the arc_buf */
-			abuf = dmu_request_arcbuf(zp->z_dbuf, blksz);
+			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
+			    blksz);
 			ASSERT(abuf);
 			(void) dmu_xuio_add(xuio, abuf,
 			    blksz - preamble, preamble);
 		}
 
 		for (i = 0; i < fullblk; i++) {
-			abuf = dmu_request_arcbuf(zp->z_dbuf, blksz);
+			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
+			    blksz);
 			ASSERT(abuf);
 			(void) dmu_xuio_add(xuio, abuf, 0, blksz);
 		}
 
 		if (postamble) {
 			/* data ends in the middle of the arc_buf */
-			abuf = dmu_request_arcbuf(zp->z_dbuf, blksz);
+			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
+			    blksz);
 			ASSERT(abuf);
 			(void) dmu_xuio_add(xuio, abuf, 0, postamble);
 		}
@@ -4749,7 +4846,7 @@
 			return (EINVAL);
 		}
 
-		maxsize = zp->z_phys->zp_size - uio->uio_loffset;
+		maxsize = zp->z_size - uio->uio_loffset;
 		if (size > maxsize)
 			size = maxsize;

--- a/usr/src/uts/common/fs/zfs/zfs_znode.c	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c	Tue Mar 16 09:43:38 2010 -0600
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -52,6 +52,7 @@
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_rlock.h>
 #include <sys/zfs_fuid.h>
+#include <sys/dnode.h>
 #include <sys/fs/zfs.h>
 #include <sys/kidmap.h>
 #endif /* _KERNEL */
@@ -61,8 +62,11 @@
 #include <sys/stat.h>
 #include <sys/zap.h>
 #include <sys/zfs_znode.h>
+#include <sys/sa.h>
+#include <sys/zfs_sa.h>
 
 #include "zfs_prop.h"
+#include "zfs_comutil.h"
 
 /*
  * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
@@ -131,7 +135,6 @@
 	avl_create(&zp->z_range_avl, zfs_range_compare,
 	    sizeof (rl_t), offsetof(rl_t, r_node));
 
-	zp->z_dbuf = NULL;
 	zp->z_dirlocks = NULL;
 	zp->z_acl_cached = NULL;
 	return (0);
@@ -154,7 +157,6 @@
 	avl_destroy(&zp->z_range_avl);
 	mutex_destroy(&zp->z_range_lock);
 
-	ASSERT(zp->z_dbuf == NULL);
 	ASSERT(zp->z_dirlocks == NULL);
 	ASSERT(zp->z_acl_cached == NULL);
 }
@@ -198,8 +200,15 @@
 	nzp->z_last_itx = ozp->z_last_itx;
 	nzp->z_gen = ozp->z_gen;
 	nzp->z_sync_cnt = ozp->z_sync_cnt;
-	nzp->z_phys = ozp->z_phys;
-	nzp->z_dbuf = ozp->z_dbuf;
+	nzp->z_is_sa = ozp->z_is_sa;
+	nzp->z_sa_hdl = ozp->z_sa_hdl;
+	bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2);
+	nzp->z_links = ozp->z_links;
+	nzp->z_size = ozp->z_size;
+	nzp->z_pflags = ozp->z_pflags;
+	nzp->z_uid = ozp->z_uid;
+	nzp->z_gid = ozp->z_gid;
+	nzp->z_mode = ozp->z_mode;
 
 	/*
 	 * Since this is just an idle znode and kmem is already dealing with
@@ -210,9 +219,7 @@
 		ozp->z_acl_cached = NULL;
 	}
 
-	/* Update back pointers. */
-	(void) dmu_buf_update_user(nzp->z_dbuf, ozp, nzp, &nzp->z_phys,
-	    znode_evict_error);
+	sa_set_userp(nzp->z_sa_hdl, nzp);
 
 	/*
 	 * Invalidate the original znode by clearing fields that provide a
@@ -220,7 +227,7 @@
 	 * ensure that zfs_znode_move() recognizes the znode as invalid in any
 	 * subsequent callback.
 	 */
-	ozp->z_dbuf = NULL;
+	ozp->z_sa_hdl = NULL;
 	POINTER_INVALIDATE(&ozp->z_zfsvfs);
 }
 
@@ -475,6 +482,7 @@
 	sharezp->z_unlinked = 0;
 	sharezp->z_atime_dirty = 0;
 	sharezp->z_zfsvfs = zfsvfs;
+	sharezp->z_is_sa = zfsvfs->z_use_sa;
 
 	vp = ZTOV(sharezp);
 	vn_reinit(vp);
@@ -482,8 +490,7 @@
 
 	VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
 	    kcred, NULL, &acl_ids));
-	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE,
-	    &zp, 0, &acl_ids);
+	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
 	ASSERT3P(zp, ==, sharezp);
 	ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
 	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
@@ -493,8 +500,7 @@
 
 	zfs_acl_ids_free(&acl_ids);
 	ZTOV(sharezp)->v_count = 0;
-	dmu_buf_rele(sharezp->z_dbuf, NULL);
-	sharezp->z_dbuf = NULL;
+	sa_handle_destroy(sharezp->z_sa_hdl);
 	kmem_cache_free(znode_cache, sharezp);
 
 	return (error);
@@ -558,26 +564,25 @@
 }
 
 static void
-zfs_znode_dmu_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db)
+zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
+    dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
 {
-	znode_t		*nzp;
-
 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
 
 	mutex_enter(&zp->z_lock);
 
-	ASSERT(zp->z_dbuf == NULL);
+	ASSERT(zp->z_sa_hdl == NULL);
 	ASSERT(zp->z_acl_cached == NULL);
-	zp->z_dbuf = db;
-	nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_evict_error);
+	if (sa_hdl == NULL) {
+		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
+		    SA_HDL_SHARED, &zp->z_sa_hdl));
+	} else {
+		zp->z_sa_hdl = sa_hdl;
+		sa_set_userp(sa_hdl, zp);
+	}
 
-	/*
-	 * there should be no
-	 * concurrent zgets on this object.
-	 */
-	if (nzp != NULL)
-		panic("existing znode %p for dbuf %p", (void *)nzp, (void *)db);
+	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
 
 	/*
 	 * Slap on VROOT if we are the root znode
@@ -592,14 +597,12 @@
 void
 zfs_znode_dmu_fini(znode_t *zp)
 {
-	dmu_buf_t *db = zp->z_dbuf;
 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
 	    zp->z_unlinked ||
 	    RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
-	ASSERT(zp->z_dbuf != NULL);
-	zp->z_dbuf = NULL;
-	VERIFY(zp == dmu_buf_update_user(db, zp, NULL, NULL, NULL));
-	dmu_buf_rele(db, NULL);
+
+	sa_handle_destroy(zp->z_sa_hdl);
+	zp->z_sa_hdl = NULL;
 }
 
 /*
@@ -610,22 +613,27 @@
  * return the znode
  */
 static znode_t *
-zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz)
+zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
+    dmu_object_type_t obj_type, sa_handle_t *hdl)
 {
 	znode_t	*zp;
 	vnode_t *vp;
+	uint64_t mode;
+	uint64_t parent;
+	uint64_t uid, gid;
+	sa_bulk_attr_t bulk[9];
+	int count = 0;
 
 	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
 
 	ASSERT(zp->z_dirlocks == NULL);
-	ASSERT(zp->z_dbuf == NULL);
 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
 
 	/*
 	 * Defer setting z_zfsvfs until the znode is ready to be a candidate for
 	 * the zfs_znode_move() callback.
 	 */
-	zp->z_phys = NULL;
+	zp->z_sa_hdl = NULL;
 	zp->z_unlinked = 0;
 	zp->z_atime_dirty = 0;
 	zp->z_mapcnt = 0;
@@ -638,16 +646,41 @@
 	vp = ZTOV(zp);
 	vn_reinit(vp);
 
-	zfs_znode_dmu_init(zfsvfs, zp, db);
+	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
 
-	zp->z_gen = zp->z_phys->zp_gen;
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &zp->z_links, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+	    &zp->z_atime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+	    &uid, 8);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+	    &gid, 8);
 
+	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) {
+		if (hdl == NULL)
+			sa_handle_destroy(zp->z_sa_hdl);
+		kmem_cache_free(znode_cache, zp);
+		return (NULL);
+	}
+
+	zp->z_uid = zfs_fuid_map_id(zfsvfs, uid, CRED(), ZFS_OWNER);
+	zp->z_gid = zfs_fuid_map_id(zfsvfs, gid, CRED(), ZFS_GROUP);
+	zp->z_mode = mode;
 	vp->v_vfsp = zfsvfs->z_parent->z_vfs;
-	vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode);
+
+	vp->v_type = IFTOVT((mode_t)mode);
 
 	switch (vp->v_type) {
 	case VDIR:
-		if (zp->z_phys->zp_flags & ZFS_XATTR) {
+		if (zp->z_pflags & ZFS_XATTR) {
 			vn_setops(vp, zfs_xdvnodeops);
 			vp->v_flag |= V_XATTRDIR;
 		} else {
@@ -657,7 +690,13 @@
 		break;
 	case VBLK:
 	case VCHR:
-		vp->v_rdev = zfs_cmpldev(zp->z_phys->zp_rdev);
+		{
+			uint64_t rdev;
+			VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
+			    &rdev, sizeof (rdev)) == 0);
+
+			vp->v_rdev = zfs_cmpldev(rdev);
+		}
 		/*FALLTHROUGH*/
 	case VFIFO:
 	case VSOCK:
@@ -666,10 +705,12 @@
 		break;
 	case VREG:
 		vp->v_flag |= VMODSORT;
-		if (zp->z_phys->zp_parent == zfsvfs->z_shares_dir)
+		if (parent == zfsvfs->z_shares_dir) {
+			ASSERT(uid == 0 && gid == 0);
 			vn_setops(vp, zfs_sharevnodeops);
-		else
+		} else {
 			vn_setops(vp, zfs_fvnodeops);
+		}
 		break;
 	case VLNK:
 		vn_setops(vp, zfs_symvnodeops);
@@ -693,6 +734,9 @@
 	return (zp);
 }
 
+static uint64_t empty_xattr;
+static uint64_t pad[4];
+static zfs_acl_phys_t acl_phys;
 /*
  * Create a new DMU object to hold a zfs znode.
  *
@@ -712,14 +756,23 @@
  */
 void
 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
-    uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_ids_t *acl_ids)
+    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
 {
+	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
+	uint64_t	mode, size, links, parent, pflags;
+	uint64_t 	dzp_pflags = 0;
+	uint64_t	rdev = 0;
+	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	dmu_buf_t	*db;
-	znode_phys_t	*pzp;
-	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	timestruc_t	now;
 	uint64_t	gen, obj;
 	int		err;
+	int		bonuslen;
+	sa_handle_t	*sa_hdl;
+	dmu_object_type_t obj_type;
+	sa_bulk_attr_t	sa_attrs[ZPL_END];
+	int		cnt = 0;
+	zfs_acl_locator_cb_t locate = { 0 };
 
 	ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
 
@@ -733,6 +786,10 @@
 		gen = dmu_tx_get_txg(tx);
 	}
 
+	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
+	bonuslen = (obj_type == DMU_OT_SA) ?
+	    DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;
+
 	/*
 	 * Create a new DMU object.
 	 */
@@ -746,106 +803,211 @@
 		if (zfsvfs->z_replay) {
 			err = zap_create_claim_norm(zfsvfs->z_os, obj,
 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
-			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
+			    obj_type, bonuslen, tx);
 			ASSERT3U(err, ==, 0);
 		} else {
 			obj = zap_create_norm(zfsvfs->z_os,
 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
-			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
+			    obj_type, bonuslen, tx);
 		}
 	} else {
 		if (zfsvfs->z_replay) {
 			err = dmu_object_claim(zfsvfs->z_os, obj,
 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
-			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
+			    obj_type, bonuslen, tx);
 			ASSERT3U(err, ==, 0);
 		} else {
 			obj = dmu_object_alloc(zfsvfs->z_os,
 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
-			    DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx);
+			    obj_type, bonuslen, tx);
 		}
 	}
 
 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
-	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, obj, NULL, &db));
-	dmu_buf_will_dirty(db, tx);
-
-	/*
-	 * Initialize the znode physical data to zero.
-	 */
-	ASSERT(db->db_size >= sizeof (znode_phys_t));
-	bzero(db->db_data, db->db_size);
-	pzp = db->db_data;
+	VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
 
 	/*
 	 * If this is the root, fix up the half-initialized parent pointer
 	 * to reference the just-allocated physical data area.
 	 */
 	if (flag & IS_ROOT_NODE) {
-		dzp->z_dbuf = db;
-		dzp->z_phys = pzp;
 		dzp->z_id = obj;
+	} else {
+		dzp_pflags = dzp->z_pflags;
 	}
 
 	/*
 	 * If parent is an xattr, so am I.
 	 */
-	if (dzp->z_phys->zp_flags & ZFS_XATTR)
+	if (dzp_pflags & ZFS_XATTR) {
 		flag |= IS_XATTR;
-
-	if (vap->va_type == VBLK || vap->va_type == VCHR) {
-		pzp->zp_rdev = zfs_expldev(vap->va_rdev);
 	}
 
 	if (zfsvfs->z_use_fuids)
-		pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
+		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
+	else
+		pflags = 0;
 
 	if (vap->va_type == VDIR) {
-		pzp->zp_size = 2;		/* contents ("." and "..") */
-		pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
+		size = 2;		/* contents ("." and "..") */
+		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
+	} else {
+		size = links = 0;
+	}
+
+	if (vap->va_type == VBLK || vap->va_type == VCHR) {
+		rdev = zfs_expldev(vap->va_rdev);
 	}
 
-	pzp->zp_parent = dzp->z_id;
+	parent = dzp->z_id;
+	mode = acl_ids->z_mode;
 	if (flag & IS_XATTR)
-		pzp->zp_flags |= ZFS_XATTR;
+		pflags |= ZFS_XATTR;
 
-	pzp->zp_gen = gen;
+	/*
+	 * No execs denied will be deterimed when zfs_mode_compute() is called.
+	 */
+	pflags |= acl_ids->z_aclp->z_hints &
+	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
+	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
 
-	ZFS_TIME_ENCODE(&now, pzp->zp_crtime);
-	ZFS_TIME_ENCODE(&now, pzp->zp_ctime);
+	ZFS_TIME_ENCODE(&now, crtime);
+	ZFS_TIME_ENCODE(&now, ctime);
 
 	if (vap->va_mask & AT_ATIME) {
-		ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime);
+		ZFS_TIME_ENCODE(&vap->va_atime, atime);
 	} else {
-		ZFS_TIME_ENCODE(&now, pzp->zp_atime);
+		ZFS_TIME_ENCODE(&now, atime);
 	}
 
 	if (vap->va_mask & AT_MTIME) {
-		ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime);
+		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
 	} else {
-		ZFS_TIME_ENCODE(&now, pzp->zp_mtime);
+		ZFS_TIME_ENCODE(&now, mtime);
 	}
-	pzp->zp_uid = acl_ids->z_fuid;
-	pzp->zp_gid = acl_ids->z_fgid;
-	pzp->zp_mode = acl_ids->z_mode;
+
+	/* Now add in all of the "SA" attributes */
+	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
+	    &sa_hdl));
+
+	/*
+	 * Setup the array of attributes to be replaced/set on the new file
+	 *
+	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
+	 * in the old znode_phys_t format.  Don't change this ordering
+	 */
+
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
+		    NULL, &atime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
+		    NULL, &mtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
+		    NULL, &ctime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
+		    NULL, &crtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
+		    NULL, &gen, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
+		    NULL, &mode, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
+		    NULL, &size, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
+		    NULL, &parent, 8);
+	} else {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
+		    NULL, &mode, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
+		    NULL, &size, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
+		    NULL, &gen, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
+		    &acl_ids->z_fuid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
+		    &acl_ids->z_fgid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
+		    NULL, &parent, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &pflags, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
+		    NULL, &atime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
+		    NULL, &mtime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
+		    NULL, &ctime, 16);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
+		    NULL, &crtime, 16);
+	}
+
+	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
+
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
+		    &empty_xattr, 8);
+	}
+	if (obj_type == DMU_OT_ZNODE ||
+	    (vap->va_type == VBLK || vap->va_type == VCHR)) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
+		    NULL, &rdev, 8);
+
+	}
+	if (obj_type == DMU_OT_ZNODE) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
+		    NULL, &pflags, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
+		    &acl_ids->z_fuid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
+		    &acl_ids->z_fgid, 8);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
+		    sizeof (uint64_t) * 4);
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
+		    &acl_phys, sizeof (zfs_acl_phys_t));
+	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
+		    &acl_ids->z_aclp->z_acl_count, 8);
+		locate.cb_aclp = acl_ids->z_aclp;
+		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
+		    zfs_acl_data_locator, &locate,
+		    acl_ids->z_aclp->z_acl_bytes);
+		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags);
+	}
+
+	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
+
 	if (!(flag & IS_ROOT_NODE)) {
-		*zpp = zfs_znode_alloc(zfsvfs, db, 0);
+		*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
+		ASSERT(*zpp != NULL);
 	} else {
 		/*
 		 * If we are creating the root node, the "parent" we
 		 * passed in is the znode for the root.
 		 */
 		*zpp = dzp;
+
+		(*zpp)->z_sa_hdl = sa_hdl;
 	}
-	VERIFY(0 == zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
+
 	if (vap->va_mask & AT_XVATTR)
-		zfs_xvattr_set(*zpp, (xvattr_t *)vap);
+		zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
+
+	(*zpp)->z_pflags = pflags;
+	(*zpp)->z_mode = mode;
 
+	if (obj_type == DMU_OT_ZNODE ||
+	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
+		err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
+		ASSERT3P(err, ==, 0);
+	}
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
 }
 
+/*
+ * zfs_xvattr_set only updates the in-core attributes
+ * it is assumed the caller will be doing an sa_bulk_update
+ * to push the changes out
+ */
 void
-zfs_xvattr_set(znode_t *zp, xvattr_t *xvap)
+zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
 {
 	xoptattr_t *xoap;
 
@@ -853,62 +1015,74 @@
 	ASSERT(xoap);
 
 	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
-		ZFS_TIME_ENCODE(&xoap->xoa_createtime, zp->z_phys->zp_crtime);
+		uint64_t times[2];
+		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
+		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
+		    &times, sizeof (times), tx);
 		XVA_SET_RTN(xvap, XAT_CREATETIME);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
-		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly);
+		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_READONLY);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
-		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden);
+		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_HIDDEN);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
-		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system);
+		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_SYSTEM);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
-		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive);
+		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_ARCHIVE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
-		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable);
+		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
-		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink);
+		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_NOUNLINK);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
-		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly);
+		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_APPENDONLY);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
-		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump);
+		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_NODUMP);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
-		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque);
+		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_OPAQUE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
 		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
-		    xoap->xoa_av_quarantined);
+		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
-		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified);
+		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
-		(void) memcpy(zp->z_phys + 1, xoap->xoa_av_scanstamp,
-		    sizeof (xoap->xoa_av_scanstamp));
-		zp->z_phys->zp_flags |= ZFS_BONUS_SCANSTAMP;
+		zfs_sa_set_scanstamp(zp, xvap, tx);
 		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
-		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse);
+		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
+		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_REPARSE);
 	}
 }
@@ -920,35 +1094,42 @@
 	dmu_buf_t	*db;
 	znode_t		*zp;
 	int err;
+	sa_handle_t	*hdl;
 
 	*zpp = NULL;
 
 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
 
-	err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db);
+	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
 	if (err) {
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (err);
 	}
 
 	dmu_object_info_from_db(db, &doi);
-	if (doi.doi_bonus_type != DMU_OT_ZNODE ||
-	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
-		dmu_buf_rele(db, NULL);
+	if (doi.doi_bonus_type != DMU_OT_SA &&
+	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
+	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
+	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
+		sa_buf_rele(db, NULL);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (EINVAL);
 	}
 
-	zp = dmu_buf_get_user(db);
-	if (zp != NULL) {
-		mutex_enter(&zp->z_lock);
+	hdl = dmu_buf_get_user(db);
+	if (hdl != NULL) {
+		zp  = sa_get_userdata(hdl);
+
 
 		/*
-		 * Since we do immediate eviction of the z_dbuf, we
-		 * should never find a dbuf with a znode that doesn't
-		 * know about the dbuf.
+		 * Since "SA" does immediate eviction we
+		 * should never find a sa handle that doesn't
+		 * know about the znode.
 		 */
-		ASSERT3P(zp->z_dbuf, ==, db);
+
+		ASSERT3P(zp, !=, NULL);
+
+		mutex_enter(&zp->z_lock);
 		ASSERT3U(zp->z_id, ==, obj_num);
 		if (zp->z_unlinked) {
 			err = ENOENT;
@@ -957,7 +1138,7 @@
 			*zpp = zp;
 			err = 0;
 		}
-		dmu_buf_rele(db, NULL);
+		sa_buf_rele(db, NULL);
 		mutex_exit(&zp->z_lock);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (err);
@@ -969,18 +1150,17 @@
 	 *
 	 * There is a small window where zfs_vget() could
 	 * find this object while a file create is still in
-	 * progress.  Since a gen number can never be zero
-	 * we will check that to determine if its an allocated
-	 * file.
+	 * progress.  This is checked for in zfs_znode_alloc()
+	 *
+	 * if zfs_znode_alloc() fails it will drop the hold on the
+	 * bonus buffer.
 	 */
-
-	if (((znode_phys_t *)db->db_data)->zp_gen != 0) {
-		zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size);
+	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
+	    doi.doi_bonus_type, NULL);
+	if (zp == NULL) {
+		err = ENOENT;
+	} else {
 		*zpp = zp;
-		err = 0;
-	} else {
-		dmu_buf_rele(db, NULL);
-		err = ENOENT;
 	}
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 	return (err);
@@ -993,39 +1173,76 @@
 	dmu_object_info_t doi;
 	dmu_buf_t *db;
 	uint64_t obj_num = zp->z_id;
+	uint64_t mode;
+	uint64_t uid, gid;
+	sa_bulk_attr_t bulk[8];
 	int err;
+	int count = 0;
+	uint64_t gen;
 
 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
 
-	err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db);
+	mutex_enter(&zp->z_acl_lock);
+	if (zp->z_acl_cached) {
+		zfs_acl_free(zp->z_acl_cached);
+		zp->z_acl_cached = NULL;
+	}
+
+	mutex_exit(&zp->z_acl_lock);
+	ASSERT(zp->z_sa_hdl == NULL);
+	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
 	if (err) {
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (err);
 	}
 
 	dmu_object_info_from_db(db, &doi);
-	if (doi.doi_bonus_type != DMU_OT_ZNODE ||
-	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
-		dmu_buf_rele(db, NULL);
+	if (doi.doi_bonus_type != DMU_OT_SA &&
+	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
+	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
+	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
+		sa_buf_rele(db, NULL);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (EINVAL);
 	}
 
-	if (((znode_phys_t *)db->db_data)->zp_gen != zp->z_gen) {
-		dmu_buf_rele(db, NULL);
+	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
+
+	/* reload cached values */
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
+	    &gen, sizeof (gen));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+	    &zp->z_size, sizeof (zp->z_size));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+	    &zp->z_links, sizeof (zp->z_links));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+	    &zp->z_pflags, sizeof (zp->z_pflags));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+	    &zp->z_atime, sizeof (zp->z_atime));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+	    &uid, sizeof (uid));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+	    &gid, sizeof (gid));
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
+	    &mode, sizeof (mode));
+
+	zp->z_mode = mode;
+
+	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
+		zfs_znode_dmu_fini(zp);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (EIO);
 	}
 
-	mutex_enter(&zp->z_acl_lock);
-	if (zp->z_acl_cached) {
-		zfs_acl_free(zp->z_acl_cached);
-		zp->z_acl_cached = NULL;
+	if (gen != zp->z_gen) {
+		zfs_znode_dmu_fini(zp);
+		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+		return (EIO);
 	}
-	mutex_exit(&zp->z_acl_lock);
 
-	zfs_znode_dmu_init(zfsvfs, zp, db);
-	zp->z_unlinked = (zp->z_phys->zp_links == 0);
+	zp->z_uid = zfs_fuid_map_id(zfsvfs, uid, CRED(), ZFS_OWNER);
+	zp->z_gid = zfs_fuid_map_id(zfsvfs, gid, CRED(), ZFS_GROUP);
+	zp->z_unlinked = (zp->z_links == 0);
 	zp->z_blksz = doi.doi_data_block_size;
 
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
@@ -1039,7 +1256,7 @@
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	objset_t *os = zfsvfs->z_os;
 	uint64_t obj = zp->z_id;
-	uint64_t acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;
+	uint64_t acl_obj = ZFS_EXTERNAL_ACL(zp);
 
 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
 	if (acl_obj)
@@ -1057,7 +1274,7 @@
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	uint64_t z_id = zp->z_id;
 
-	ASSERT(zp->z_dbuf && zp->z_phys);
+	ASSERT(zp->z_sa_hdl);
 
 	/*
 	 * Don't allow a zfs_zget() while were trying to release this znode
@@ -1096,6 +1313,7 @@
 		zfs_rmnode(zp);
 		return;
 	}
+
 	mutex_exit(&zp->z_lock);
 	zfs_znode_dmu_fini(zp);
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
@@ -1127,59 +1345,40 @@
 }
 
 void
-zfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx)
+zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
+    uint64_t ctime[2], boolean_t have_tx)
 {
 	timestruc_t	now;
 
-	ASSERT(MUTEX_HELD(&zp->z_lock));
-
 	gethrestime(&now);
 
-	if (tx) {
-		dmu_buf_will_dirty(zp->z_dbuf, tx);
+	if (have_tx) {	/* will sa_bulk_update happen really soon? */
 		zp->z_atime_dirty = 0;
 		zp->z_seq++;
 	} else {
 		zp->z_atime_dirty = 1;
 	}
 
-	if (flag & AT_ATIME)
-		ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime);
+	if (flag & AT_ATIME) {
+		ZFS_TIME_ENCODE(&now, zp->z_atime);
+	}
 
 	if (flag & AT_MTIME) {
-		ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime);
-		if (zp->z_zfsvfs->z_use_fuids)
-			zp->z_phys->zp_flags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED);
+		ZFS_TIME_ENCODE(&now, mtime);
+		if (zp->z_zfsvfs->z_use_fuids) {
+			zp->z_pflags |= (ZFS_ARCHIVE |
+			    ZFS_AV_MODIFIED);
+		}
 	}
 
 	if (flag & AT_CTIME) {
-		ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime);
+		ZFS_TIME_ENCODE(&now, ctime);
 		if (zp->z_zfsvfs->z_use_fuids)
-			zp->z_phys->zp_flags |= ZFS_ARCHIVE;
+			zp->z_pflags |= ZFS_ARCHIVE;
 	}
 }
 
 /*
- * Update the requested znode timestamps with the current time.
- * If we are in a transaction, then go ahead and mark the znode
- * dirty in the transaction so the timestamps will go to disk.
- * Otherwise, we will get pushed next time the znode is updated
- * in a transaction, or when this znode eventually goes inactive.
- *
- * Why is this OK?
- *  1 - Only the ACCESS time is ever updated outside of a transaction.
- *  2 - Multiple consecutive updates will be collapsed into a single
- *	znode update by the transaction grouping semantics of the DMU.
- */
-void
-zfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx)
-{
-	mutex_enter(&zp->z_lock);
-	zfs_time_stamper_locked(zp, flag, tx);
-	mutex_exit(&zp->z_lock);
-}
-
-/*
  * Grow the block size for a file.
  *
  *	IN:	zp	- znode of file to free data in.
@@ -1201,17 +1400,18 @@
 	 * we will not grow.  If there is more than one block in a file,
 	 * the blocksize cannot change.
 	 */
-	if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz)
+	if (zp->z_blksz && zp->z_size > zp->z_blksz)
 		return;
 
 	error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
 	    size, 0, tx);
+
 	if (error == ENOTSUP)
 		return;
 	ASSERT3U(error, ==, 0);
 
 	/* What blocksize did we actually get? */
-	dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy);
+	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
 }
 
 /*
@@ -1254,13 +1454,14 @@
 	/*
 	 * Nothing to do if file already at desired length.
 	 */
-	if (end <= zp->z_phys->zp_size) {
+	if (end <= zp->z_size) {
 		zfs_range_unlock(rl);
 		return (0);
 	}
 top:
 	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_bonus(tx, zp->z_id);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
 	if (end > zp->z_blksz &&
 	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
 		/*
@@ -1288,12 +1489,14 @@
 		zfs_range_unlock(rl);
 		return (error);
 	}
-	dmu_buf_will_dirty(zp->z_dbuf, tx);
 
 	if (newblksz)
 		zfs_grow_blocksize(zp, newblksz, tx);
 
-	zp->z_phys->zp_size = end;
+	zp->z_size = end;
+
+	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
+	    &zp->z_size, sizeof (zp->z_size), tx));
 
 	zfs_range_unlock(rl);
 
@@ -1327,13 +1530,13 @@
 	/*
 	 * Nothing to do if file already at desired length.
 	 */
-	if (off >= zp->z_phys->zp_size) {
+	if (off >= zp->z_size) {
 		zfs_range_unlock(rl);
 		return (0);
 	}
 
-	if (off + len > zp->z_phys->zp_size)
-		len = zp->z_phys->zp_size - off;
+	if (off + len > zp->z_size)
+		len = zp->z_size - off;
 
 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
 
@@ -1368,7 +1571,7 @@
 	/*
 	 * Nothing to do if file already at desired length.
 	 */
-	if (end >= zp->z_phys->zp_size) {
+	if (end >= zp->z_size) {
 		zfs_range_unlock(rl);
 		return (0);
 	}
@@ -1380,7 +1583,8 @@
 	}
 top:
 	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_bonus(tx, zp->z_id);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
 		if (error == ERESTART) {
@@ -1392,9 +1596,11 @@
 		zfs_range_unlock(rl);
 		return (error);
 	}
-	dmu_buf_will_dirty(zp->z_dbuf, tx);
+
+	zp->z_size = end;
 
-	zp->z_phys->zp_size = end;
+	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
+	    &zp->z_size, sizeof (zp->z_size), tx));
 
 	dmu_tx_commit(tx);
 
@@ -1446,9 +1652,17 @@
 	dmu_tx_t *tx;
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	zilog_t *zilog = zfsvfs->z_log;
+	uint64_t mode;
+	uint64_t mtime[2], ctime[2];
+	sa_bulk_attr_t bulk[2];
+	int count = 0;
 	int error;
 
-	if (off > zp->z_phys->zp_size) {
+	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
+	    sizeof (mode))) != 0)
+		return (error);
+
+	if (off > zp->z_size) {
 		error =  zfs_extend(zp, off+len);
 		if (error == 0 && log)
 			goto log;
@@ -1459,8 +1673,9 @@
 	/*
 	 * Check for any locks in the region to be freed.
 	 */
-	if (MANDLOCK(vp, (mode_t)zp->z_phys->zp_mode)) {
-		uint64_t length = (len ? len : zp->z_phys->zp_size - off);
+
+	if (MANDLOCK(vp, (mode_t)mode)) {
+		uint64_t length = (len ? len : zp->z_size - off);
 		if (error = chklock(vp, FWRITE, off, length, flag, NULL))
 			return (error);
 	}
@@ -1469,14 +1684,15 @@
 		error = zfs_trunc(zp, off);
 	} else {
 		if ((error = zfs_free_range(zp, off, len)) == 0 &&
-		    off + len > zp->z_phys->zp_size)
+		    off + len > zp->z_size)
 			error = zfs_extend(zp, off+len);
 	}
 	if (error || !log)
 		return (error);
 log:
 	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_bonus(tx, zp->z_id);
+	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
+	zfs_sa_upgrade_txholds(tx, zp);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
 		if (error == ERESTART) {
@@ -1488,7 +1704,12 @@
 		return (error);
 	}
 
-	zfs_time_stamper(zp, CONTENT_MODIFIED, tx);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
+	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
+	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
+	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+	ASSERT(error == 0);
+
 	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
 
 	dmu_tx_commit(tx);
@@ -1499,7 +1720,7 @@
 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
 {
 	zfsvfs_t	zfsvfs;
-	uint64_t	moid, obj, version;
+	uint64_t	moid, obj, sa_obj, version;
 	uint64_t	sense = ZFS_CASE_SENSITIVE;
 	uint64_t	norm = 0;
 	nvpair_t	*elem;
@@ -1526,12 +1747,7 @@
 	/*
 	 * Set starting attributes.
 	 */
-	if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_USERSPACE)
-		version = ZPL_VERSION;
-	else if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
-		version = ZPL_VERSION_USERSPACE - 1;
-	else
-		version = ZPL_VERSION_FUID - 1;
+	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
 		/* For the moment we expect all zpl props to be uint64_ts */
@@ -1557,6 +1773,18 @@
 	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
 
 	/*
+	 * Create zap object used for SA attribute registration
+	 */
+
+	if (version >= ZPL_VERSION_SA) {
+		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
+		    DMU_OT_NONE, 0, tx);
+		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
+		ASSERT(error == 0);
+	} else {
+		sa_obj = 0;
+	}
+	/*
 	 * Create a delete queue.
 	 */
 	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
@@ -1577,6 +1805,7 @@
 	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
 	rootzp->z_unlinked = 0;
 	rootzp->z_atime_dirty = 0;
+	rootzp->z_is_sa = USE_SA(version, os);
 
 	vp = ZTOV(rootzp);
 	vn_reinit(vp);
@@ -1588,7 +1817,11 @@
 	zfsvfs.z_parent = &zfsvfs;
 	zfsvfs.z_version = version;
 	zfsvfs.z_use_fuids = USE_FUIDS(version, os);
+	zfsvfs.z_use_sa = USE_SA(version, os);
 	zfsvfs.z_norm = norm;
+
+	zfsvfs.z_attr_table = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END);
+
 	/*
 	 * Fold case on file systems that are always or sometimes case
 	 * insensitive.
@@ -1607,7 +1840,7 @@
 	rootzp->z_zfsvfs = &zfsvfs;
 	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
 	    cr, NULL, &acl_ids));
-	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, 0, &acl_ids);
+	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
 	ASSERT3P(zp, ==, rootzp);
 	ASSERT(!vn_in_dnlc(ZTOV(rootzp))); /* not valid to move */
 	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
@@ -1616,8 +1849,7 @@
 	POINTER_INVALIDATE(&rootzp->z_zfsvfs);
 
 	ZTOV(rootzp)->v_count = 0;
-	dmu_buf_rele(rootzp->z_dbuf, NULL);
-	rootzp->z_dbuf = NULL;
+	sa_handle_destroy(rootzp->z_sa_hdl);
 	kmem_cache_free(znode_cache, rootzp);
 
 	/*
@@ -1633,33 +1865,59 @@
 }
 
 #endif /* _KERNEL */
+
 /*
  * Given an object number, return its parent object number and whether
  * or not the object is an extended attribute directory.
  */
 static int
-zfs_obj_to_pobj(objset_t *osp, uint64_t obj, uint64_t *pobjp, int *is_xattrdir)
+zfs_obj_to_pobj(objset_t *osp, uint64_t obj, uint64_t *pobjp, int *is_xattrdir,
+    sa_attr_type_t *sa_table)
 {
 	dmu_buf_t *db;
 	dmu_object_info_t doi;
-	znode_phys_t *zp;
 	int error;
+	uint64_t parent;
+	uint64_t pflags;
+	uint64_t mode;
+	sa_bulk_attr_t bulk[3];
+	sa_handle_t *hdl;
+	int count = 0;
 
-	if ((error = dmu_bonus_hold(osp, obj, FTAG, &db)) != 0)
+	if ((error = sa_buf_hold(osp, obj, FTAG, &db)) != 0)
 		return (error);
 
 	dmu_object_info_from_db(db, &doi);
-	if (doi.doi_bonus_type != DMU_OT_ZNODE ||
+	if ((doi.doi_bonus_type != DMU_OT_SA &&
+	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
+	    doi.doi_bonus_type == DMU_OT_ZNODE &&
 	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
-		dmu_buf_rele(db, FTAG);
+		sa_buf_rele(db, FTAG);
 		return (EINVAL);
 	}
 
-	zp = db->db_data;
-	*pobjp = zp->zp_parent;
-	*is_xattrdir = ((zp->zp_flags & ZFS_XATTR) != 0) &&
-	    S_ISDIR(zp->zp_mode);
-	dmu_buf_rele(db, FTAG);
+	if ((error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE,
+	    &hdl)) != 0) {
+		sa_buf_rele(db, FTAG);
+		return (error);
+	}
+
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT],
+	    NULL, &parent, 8);
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
+	    &pflags, 8);
+	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
+	    &mode, 8);
+
+	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) {
+		sa_buf_rele(db, FTAG);
+		sa_handle_destroy(hdl);
+		return (error);
+	}
+	*pobjp = parent;
+	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
+	sa_handle_destroy(hdl);
+	sa_buf_rele(db, FTAG);
 
 	return (0);
 }
@@ -1668,10 +1926,19 @@
 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
 {
 	char *path = buf + len - 1;
+	sa_attr_type_t *sa_table;
 	int error;
+	uint64_t sa_obj = 0;
 
 	*path = '\0';
 
+	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
+
+	if (error != 0 && error != ENOENT)
+		return (error);
+
+	sa_table = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END);
+
 	for (;;) {
 		uint64_t pobj;
 		char component[MAXNAMELEN + 2];
@@ -1679,7 +1946,7 @@
 		int is_xattrdir;
 
 		if ((error = zfs_obj_to_pobj(osp, obj, &pobj,
-		    &is_xattrdir)) != 0)
+		    &is_xattrdir, sa_table)) != 0)
 			break;
 
 		if (pobj == obj) {
@@ -1707,5 +1974,6 @@
 
 	if (error == 0)
 		(void) memmove(buf, path, buf + len - path);
+
 	return (error);
 }

--- a/usr/src/uts/common/sys/fs/zfs.h	Tue Mar 16 06:44:44 2010 -0700
+++ b/usr/src/uts/common/sys/fs/zfs.h	Tue Mar 16 09:43:38 2010 -0600
@@ -324,14 +324,15 @@
 #define	SPA_VERSION_21			21ULL
 #define	SPA_VERSION_22			22ULL
 #define	SPA_VERSION_23			23ULL
+#define	SPA_VERSION_24			24ULL
 /*
  * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
  * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
  * and do the appropriate changes.  Also bump the version number in
  * usr/src/grub/capability.
  */
-#define	SPA_VERSION			SPA_VERSION_23
-#define	SPA_VERSION_STRING		"23"
+#define	SPA_VERSION			SPA_VERSION_24
+#define	SPA_VERSION_STRING		"24"
 
 /*
  * Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -376,6 +377,7 @@
 #define	SPA_VERSION_DEDUP		SPA_VERSION_21
 #define	SPA_VERSION_RECVD_PROPS		SPA_VERSION_22
 #define	SPA_VERSION_SLIM_ZIL		SPA_VERSION_23
+#define	SPA_VERSION_SA			SPA_VERSION_24
 
 /*
  * ZPL version - rev'd whenever an incompatible on-disk format change
@@ -389,8 +391,9 @@
 #define	ZPL_VERSION_2			2ULL
 #define	ZPL_VERSION_3			3ULL
 #define	ZPL_VERSION_4			4ULL
-#define	ZPL_VERSION			ZPL_VERSION_4
-#define	ZPL_VERSION_STRING		"4"
+#define	ZPL_VERSION_5			5ULL
+#define	ZPL_VERSION			ZPL_VERSION_5
+#define	ZPL_VERSION_STRING		"5"
 
 #define	ZPL_VERSION_INITIAL		ZPL_VERSION_1
 #define	ZPL_VERSION_DIRENT_TYPE		ZPL_VERSION_2
@@ -398,6 +401,7 @@
 #define	ZPL_VERSION_NORMALIZATION	ZPL_VERSION_3
 #define	ZPL_VERSION_SYSATTR		ZPL_VERSION_3
 #define	ZPL_VERSION_USERSPACE		ZPL_VERSION_4
+#define	ZPL_VERSION_SA			ZPL_VERSION_5
 
 /* Rewind request information */
 #define	ZPOOL_NO_REWIND		1  /* No policy - default behavior */

author	Mark Shellenbaum <Mark.Shellenbaum@Sun.COM>
	Tue, 16 Mar 2010 09:43:38 -0600
changeset 11935	538c866aaac6
parent 11934	fdae577692c4
child 11936	54dc8a89ba0d

usr/src/cmd/mdb/common/modules/zfs/zfs.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zdb/zdb.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zfs/Makefile		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zfs/zfs_main.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zpool/zpool_main.c		file \| annotate \| diff \| comparison \| revisions
usr/src/cmd/zstreamdump/zstreamdump.c		file \| annotate \| diff \| comparison \| revisions
usr/src/common/zfs/zfs_comutil.c		file \| annotate \| diff \| comparison \| revisions
usr/src/common/zfs/zfs_comutil.h		file \| annotate \| diff \| comparison \| revisions
usr/src/common/zfs/zfs_prop.c		file \| annotate \| diff \| comparison \| revisions
usr/src/grub/capability		file \| annotate \| diff \| comparison \| revisions
usr/src/grub/grub-0.97/stage2/fsys_zfs.c		file \| annotate \| diff \| comparison \| revisions
usr/src/grub/grub-0.97/stage2/fsys_zfs.h		file \| annotate \| diff \| comparison \| revisions
usr/src/grub/grub-0.97/stage2/zfs-include/dmu.h		file \| annotate \| diff \| comparison \| revisions
usr/src/grub/grub-0.97/stage2/zfs-include/dnode.h		file \| annotate \| diff \| comparison \| revisions
usr/src/grub/grub-0.97/stage2/zfs-include/sa_impl.h		file \| annotate \| diff \| comparison \| revisions
usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h		file \| annotate \| diff \| comparison \| revisions
usr/src/grub/grub-0.97/stage2/zfs-include/zfs_znode.h		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzfs/common/libzfs_impl.h		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzfs/common/libzfs_sendrecv.c		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzfs/common/mapfile-vers		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzpool/common/kernel.c		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzpool/common/llib-lzpool		file \| annotate \| diff \| comparison \| revisions
usr/src/lib/libzpool/common/sys/zfs_context.h		file \| annotate \| diff \| comparison \| revisions
usr/src/psm/stand/bootblks/zfs/common/zfs.fth		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/Makefile.files		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dbuf.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dmu.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dmu_objset.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dmu_send.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dmu_traverse.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dmu_tx.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dnode.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dnode_sync.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dsl_pool.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/dsl_scrub.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sa.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dbuf.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dmu.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dmu_objset.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dmu_tx.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/dnode.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/sa.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/sa_impl.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/zfs_acl.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/zfs_dir.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/zfs_sa.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/sys/zfs_znode.h		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_acl.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_byteswap.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_ctldir.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_dir.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_fuid.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_ioctl.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_log.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_replay.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_rlock.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_sa.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_vfsops.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_vnops.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/fs/zfs/zfs_znode.c		file \| annotate \| diff \| comparison \| revisions
usr/src/uts/common/sys/fs/zfs.h		file \| annotate \| diff \| comparison \| revisions