PSARC/2009/204 ZFS user/group quotas & space accounting
authorMatthew Ahrens <Matthew.Ahrens@Sun.COM>
Sat, 18 Apr 2009 13:41:47 -0700
changeset 9396 f41cf682d0d3
parent 9395 2db090840cf7
child 9397 e667d620a75c
PSARC/2009/204 ZFS user/group quotas & space accounting 6501037 want user/group quotas on ZFS 6830813 zfs list -t all fails assertion 6827260 assertion failed in arc_read(): hdr == pbuf->b_hdr 6815592 panic: No such hold X on refcount Y from zfs_znode_move 6759986 zfs list shows temporary %clone when doing online zfs recv
usr/src/Makefile.master
usr/src/Targetdirs
usr/src/cmd/Makefile
usr/src/cmd/pyzfs/Makefile
usr/src/cmd/pyzfs/pyzfs.py
usr/src/cmd/truss/codes.c
usr/src/cmd/zdb/zdb.c
usr/src/cmd/zfs/Makefile
usr/src/cmd/zfs/zfs_iter.c
usr/src/cmd/zfs/zfs_main.c
usr/src/cmd/zpool/zpool_main.c
usr/src/common/zfs/zfs_deleg.c
usr/src/common/zfs/zfs_deleg.h
usr/src/common/zfs/zfs_namecheck.c
usr/src/common/zfs/zfs_namecheck.h
usr/src/common/zfs/zfs_prop.c
usr/src/common/zfs/zprop_common.c
usr/src/grub/capability
usr/src/grub/grub-0.97/stage2/zfs-include/dmu_objset.h
usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h
usr/src/grub/grub-0.97/stage2/zfs-include/zfs_znode.h
usr/src/lib/Makefile
usr/src/lib/libzfs/Makefile.com
usr/src/lib/libzfs/common/libzfs.h
usr/src/lib/libzfs/common/libzfs_changelist.c
usr/src/lib/libzfs/common/libzfs_dataset.c
usr/src/lib/libzfs/common/libzfs_graph.c
usr/src/lib/libzfs/common/libzfs_sendrecv.c
usr/src/lib/libzfs/common/libzfs_util.c
usr/src/lib/libzfs/common/mapfile-vers
usr/src/lib/pyzfs/Makefile
usr/src/lib/pyzfs/Makefile.com
usr/src/lib/pyzfs/common/__init__.py
usr/src/lib/pyzfs/common/allow.py
usr/src/lib/pyzfs/common/dataset.py
usr/src/lib/pyzfs/common/groupspace.py
usr/src/lib/pyzfs/common/ioctl.c
usr/src/lib/pyzfs/common/mapfile-vers
usr/src/lib/pyzfs/common/unallow.py
usr/src/lib/pyzfs/common/userspace.py
usr/src/lib/pyzfs/common/util.py
usr/src/lib/pyzfs/i386/Makefile
usr/src/lib/pyzfs/sparc/Makefile
usr/src/pkgdefs/SUNWzfsu/prototype_com
usr/src/tools/scripts/check_rtime.pl
usr/src/uts/common/fs/zfs/arc.c
usr/src/uts/common/fs/zfs/dbuf.c
usr/src/uts/common/fs/zfs/dmu.c
usr/src/uts/common/fs/zfs/dmu_objset.c
usr/src/uts/common/fs/zfs/dmu_send.c
usr/src/uts/common/fs/zfs/dmu_traverse.c
usr/src/uts/common/fs/zfs/dmu_tx.c
usr/src/uts/common/fs/zfs/dnode.c
usr/src/uts/common/fs/zfs/dnode_sync.c
usr/src/uts/common/fs/zfs/dsl_dataset.c
usr/src/uts/common/fs/zfs/dsl_dir.c
usr/src/uts/common/fs/zfs/dsl_pool.c
usr/src/uts/common/fs/zfs/dsl_scrub.c
usr/src/uts/common/fs/zfs/spa_errlog.c
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/sys/dmu_objset.h
usr/src/uts/common/fs/zfs/sys/dnode.h
usr/src/uts/common/fs/zfs/sys/dsl_deleg.h
usr/src/uts/common/fs/zfs/sys/dsl_dir.h
usr/src/uts/common/fs/zfs/sys/zfs_acl.h
usr/src/uts/common/fs/zfs/sys/zfs_fuid.h
usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h
usr/src/uts/common/fs/zfs/sys/zfs_znode.h
usr/src/uts/common/fs/zfs/zfs_acl.c
usr/src/uts/common/fs/zfs/zfs_dir.c
usr/src/uts/common/fs/zfs/zfs_fuid.c
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/fs/zfs/zfs_vfsops.c
usr/src/uts/common/fs/zfs/zfs_vnops.c
usr/src/uts/common/fs/zfs/zfs_znode.c
usr/src/uts/common/fs/zfs/zil.c
usr/src/uts/common/fs/zfs/zio.c
usr/src/uts/common/sys/fs/zfs.h
--- a/usr/src/Makefile.master	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/Makefile.master	Sat Apr 18 13:41:47 2009 -0700
@@ -516,6 +516,9 @@
 COMPILE.cpp= $(CC) -E -C $(CFLAGS) $(CPPFLAGS)
 XGETTEXT= /usr/bin/xgettext
 XGETFLAGS= -c TRANSLATION_NOTE
+GNUXGETTEXT= /usr/gnu/bin/xgettext
+GNUXGETFLAGS= --add-comments=TRANSLATION_NOTE --keyword=_ \
+	--strict --no-location --omit-header
 BUILD.po= $(XGETTEXT) $(XGETFLAGS) -d $(<F) $<.i ;\
 	$(RM)	$@ ;\
 	$(SED) "/^domain/d" < $(<F).po > $@ ;\
@@ -1009,8 +1012,6 @@
 
 #
 # Python and Perl executable and message catalog build rules.
-# Note that Python i18n isn't supported by this rule set yet,
-# as it requires a special build tool (pygettext.py).
 #
 .SUFFIXES: .pl .pm .py .pyc
 
@@ -1027,6 +1028,9 @@
 	$(PYTHON) -mpy_compile $<
 	@[ $(<)c = $@ ] || $(MV) $(<)c $@
 
+.py.po:
+	$(GNUXGETTEXT) $(GNUXGETFLAGS) -d $(<F:%.py=%) $< ;
+
 .pl.po .pm.po:
 	$(XGETTEXT) $(XGETFLAGS) -d $(<F) $< ;
 	$(RM)	$@ ;
--- a/usr/src/Targetdirs	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/Targetdirs	Sat Apr 18 13:41:47 2009 -0700
@@ -297,6 +297,9 @@
 	/usr/lib/localedef/src \
 	/usr/lib/lwp \
 	/usr/lib/pool \
+	/usr/lib/python2.4 \
+	/usr/lib/python2.4/vendor-packages \
+	/usr/lib/python2.4/vendor-packages/zfs \
 	/usr/lib/rcap \
 	/usr/lib/rcap/$(MACH32) \
 	/usr/lib/saf \
--- a/usr/src/cmd/Makefile	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/cmd/Makefile	Sat Apr 18 13:41:47 2009 -0700
@@ -327,6 +327,7 @@
 	pwck		\
 	pwconv		\
 	pwd		\
+	pyzfs		\
 	raidctl		\
 	ramdiskadm	\
 	rcap		\
@@ -667,6 +668,7 @@
 	ptools		\
 	pwconv		\
 	pwd		\
+	pyzfs		\
 	raidctl		\
 	ramdiskadm	\
 	rcap		\
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/pyzfs/Makefile	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,49 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.cmd
+
+ROOTCMDDIR=	$(ROOTLIB)/zfs
+
+PYSRCS=		pyzfs.py
+PYOBJS=		$(PYSRCS:%.py=%.pyc)
+PYFILES=	$(PYSRCS) $(PYOBJS)
+POFILE=		pyzfs.po
+
+ROOTLIBZFSFILES= $(PYFILES:%=$(ROOTLIB)/zfs/%)
+
+.KEEP_STATE:
+
+all: $(PYOBJS)
+
+install: all $(ROOTLIBZFSFILES)
+
+clean:
+	$(RM) $(PYOBJS)
+
+$(ROOTLIB)/zfs/%: %
+	$(INS.pyfile)
+
+include ../Makefile.targ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/pyzfs/pyzfs.py	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,79 @@
+#! /usr/bin/python2.4 -S
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+# Note, we want SIGINT (control-c) to exit the process quietly, to mimic
+# the standard behavior of C programs.  The best we can do with pure
+# Python is to run with -S (to disable "import site"), and start our
+# program with a "try" statement.  Hopefully nobody hits ^C before our
+# try statement is executed.
+
+try:
+	import site
+	import gettext
+	import zfs.util
+	import zfs.ioctl
+	import sys
+	import errno
+
+	"""This is the main script for doing zfs subcommands.  It doesn't know
+	what subcommands there are, it just looks for a module zfs.<subcommand>
+	that implements that subcommand."""
+
+	_ = gettext.translation("SUNW_OST_OSCMD", "/usr/lib/locale",
+	    fallback=True).gettext
+
+	if len(sys.argv) < 2:
+		sys.exit(_("missing subcommand argument"))
+
+	zfs.ioctl.set_cmdstr(" ".join(["zfs"] + sys.argv[1:]))
+
+	try:
+		# import zfs.<subcommand>
+		# subfunc =  zfs.<subcommand>.do_<subcommand>
+
+		subcmd = sys.argv[1]
+		__import__("zfs." + subcmd)
+		submod = getattr(zfs, subcmd)
+		subfunc = getattr(submod, "do_" + subcmd)
+	except (ImportError, AttributeError):
+		sys.exit(_("invalid subcommand"))
+
+	try:
+		subfunc()
+	except zfs.util.ZFSError, e:
+		print(e)
+		sys.exit(1)
+
+except IOError, e:
+	import errno
+	import sys
+
+	if e.errno == errno.EPIPE:
+		sys.exit(1)
+	raise
+except KeyboardInterrupt:
+	import sys
+
+	sys.exit(1)
--- a/usr/src/cmd/truss/codes.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/cmd/truss/codes.c	Sat Apr 18 13:41:47 2009 -0700
@@ -1083,6 +1083,12 @@
 		"zfs_cmd_t" },
 	{ (uint_t)ZFS_IOC_SMB_ACL,		"ZFS_IOC_SMB_ACL",
 		"zfs_cmd_t" },
+	{ (uint_t)ZFS_IOC_USERSPACE_ONE,	"ZFS_IOC_USERSPACE_ONE",
+		"zfs_cmd_t" },
+	{ (uint_t)ZFS_IOC_USERSPACE_MANY,	"ZFS_IOC_USERSPACE_MANY",
+		"zfs_cmd_t" },
+	{ (uint_t)ZFS_IOC_USERSPACE_UPGRADE,	"ZFS_IOC_USERSPACE_UPGRADE",
+		"zfs_cmd_t" },
 
 	/* kssl ioctls */
 	{ (uint_t)KSSL_ADD_ENTRY,		"KSSL_ADD_ENTRY",
--- a/usr/src/cmd/zdb/zdb.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/cmd/zdb/zdb.c	Sat Apr 18 13:41:47 2009 -0700
@@ -1026,6 +1026,8 @@
 	dump_packed_nvlist,	/* FUID nvlist size		*/
 	dump_zap,		/* DSL dataset next clones	*/
 	dump_zap,		/* DSL scrub queue		*/
+	dump_zap,		/* ZFS user/group used		*/
+	dump_zap,		/* ZFS user/group quota		*/
 };
 
 static void
@@ -1089,6 +1091,14 @@
 	}
 
 	if (verbosity >= 4) {
+		(void) printf("\tdnode flags: %s%s\n",
+		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
+		    "USED_BYTES " : "",
+		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
+		    "USERUSED_ACCOUNTED " : "");
+		(void) printf("\tdnode maxblkid: %llu\n",
+		    (longlong_t)dn->dn_phys->dn_maxblkid);
+
 		object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
 		object_viewer[doi.doi_type](os, object, NULL, 0);
 		*print_header = 1;
@@ -1143,7 +1153,7 @@
 	uint64_t object, object_count;
 	uint64_t refdbytes, usedobjs, scratch;
 	char numbuf[8];
-	char blkbuf[BP_SPRINTF_LEN];
+	char blkbuf[BP_SPRINTF_LEN + 20];
 	char osname[MAXNAMELEN];
 	char *type = "UNKNOWN";
 	int verbosity = dump_opt['d'];
@@ -1169,8 +1179,8 @@
 	nicenum(refdbytes, numbuf);
 
 	if (verbosity >= 4) {
-		(void) strcpy(blkbuf, ", rootbp ");
-		sprintf_blkptr(blkbuf + strlen(blkbuf),
+		(void) sprintf(blkbuf + strlen(blkbuf), ", rootbp ");
+		(void) sprintf_blkptr(blkbuf + strlen(blkbuf),
 		    BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
 	} else {
 		blkbuf[0] = '\0';
@@ -1205,7 +1215,12 @@
 	}
 
 	dump_object(os, 0, verbosity, &print_header);
-	object_count = 1;
+	object_count = 0;
+	if (os->os->os_userused_dnode &&
+	    os->os->os_userused_dnode->dn_type != 0) {
+		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
+		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
+	}
 
 	object = 0;
 	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
--- a/usr/src/cmd/zfs/Makefile	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/cmd/zfs/Makefile	Sat Apr 18 13:41:47 2009 -0700
@@ -19,11 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 
 PROG=		zfs
 OBJS=		zfs_main.o zfs_iter.o
@@ -38,7 +36,7 @@
 ROOTETCFSTYPE=  $(ROOTETC)/fs/$(FSTYPE)
 USRLIBFSTYPE=	$(ROOTLIB)/fs/$(FSTYPE)
 
-LDLIBS += -lzfs -luutil -lumem -lnvpair -lavl
+LDLIBS += -lzfs -luutil -lumem -lnvpair
 
 C99MODE=	-xc99=%all
 C99LMODE=	-Xc99=%all
--- a/usr/src/cmd/zfs/zfs_iter.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/cmd/zfs/zfs_iter.c	Sat Apr 18 13:41:47 2009 -0700
@@ -368,7 +368,10 @@
 	 * properties other than those listed in cb_proplist/sortcol are
 	 * accessed.
 	 *
-	 * If cb_proplist is NULL then we retain all the properties.
+	 * If cb_proplist is NULL then we retain all the properties.  We
+	 * always retain the zoned property, which some other properties
+	 * need (userquota & friends), and the createtxg property, which
+	 * we need to sort snapshots.
 	 */
 	if (cb.cb_proplist && *cb.cb_proplist) {
 		zprop_list_t *p = *cb.cb_proplist;
@@ -388,6 +391,9 @@
 			}
 			sortcol = sortcol->sc_next;
 		}
+
+		cb.cb_props_table[ZFS_PROP_ZONED] = B_TRUE;
+		cb.cb_props_table[ZFS_PROP_CREATETXG] = B_TRUE;
 	} else {
 		(void) memset(cb.cb_props_table, B_TRUE,
 		    sizeof (cb.cb_props_table));
--- a/usr/src/cmd/zfs/zfs_main.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/cmd/zfs/zfs_main.c	Sat Apr 18 13:41:47 2009 -0700
@@ -39,12 +39,14 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <zone.h>
+#include <grp.h>
+#include <pwd.h>
 #include <sys/mkdev.h>
 #include <sys/mntent.h>
 #include <sys/mnttab.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
-#include <sys/avl.h>
+#include <sys/fs/zfs.h>
 
 #include <libzfs.h>
 #include <libuutil.h>
@@ -56,6 +58,7 @@
 
 static FILE *mnttab_file;
 static char history_str[HIS_MAX_RECORD_LEN];
+const char *pypath = "/usr/lib/zfs/pyzfs.py";
 
 static int zfs_do_clone(int argc, char **argv);
 static int zfs_do_create(int argc, char **argv);
@@ -75,8 +78,8 @@
 static int zfs_do_send(int argc, char **argv);
 static int zfs_do_receive(int argc, char **argv);
 static int zfs_do_promote(int argc, char **argv);
-static int zfs_do_allow(int argc, char **argv);
-static int zfs_do_unallow(int argc, char **argv);
+static int zfs_do_userspace(int argc, char **argv);
+static int zfs_do_python(int argc, char **argv);
 
 /*
  * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
@@ -116,7 +119,9 @@
 	HELP_UNMOUNT,
 	HELP_UNSHARE,
 	HELP_ALLOW,
-	HELP_UNALLOW
+	HELP_UNALLOW,
+	HELP_USERSPACE,
+	HELP_GROUPSPACE
 } zfs_help_t;
 
 typedef struct zfs_command {
@@ -150,6 +155,8 @@
 	{ "get", 	zfs_do_get,		HELP_GET		},
 	{ "inherit",	zfs_do_inherit,		HELP_INHERIT		},
 	{ "upgrade",	zfs_do_upgrade,		HELP_UPGRADE		},
+	{ "userspace",	zfs_do_userspace,	HELP_USERSPACE		},
+	{ "groupspace",	zfs_do_userspace,	HELP_GROUPSPACE		},
 	{ NULL },
 	{ "mount",	zfs_do_mount,		HELP_MOUNT		},
 	{ "unmount",	zfs_do_unmount,		HELP_UNMOUNT		},
@@ -159,9 +166,9 @@
 	{ "send",	zfs_do_send,		HELP_SEND		},
 	{ "receive",	zfs_do_receive,		HELP_RECEIVE		},
 	{ NULL },
-	{ "allow",	zfs_do_allow,		HELP_ALLOW		},
+	{ "allow",	zfs_do_python,		HELP_ALLOW		},
 	{ NULL },
-	{ "unallow",	zfs_do_unallow,		HELP_UNALLOW		},
+	{ "unallow",	zfs_do_python,		HELP_UNALLOW		},
 };
 
 #define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
@@ -250,6 +257,14 @@
 		    "<filesystem|volume>\n"
 		    "\tunallow [-r] -s @setname [<perm|@setname>[,...]] "
 		    "<filesystem|volume>\n"));
+	case HELP_USERSPACE:
+		return (gettext("\tuserspace [-hniHp] [-o field[,...]] "
+		    "[-sS field] ... [-t type[,...]]\n"
+		    "\t    <filesystem|snapshot>\n"));
+	case HELP_GROUPSPACE:
+		return (gettext("\tgroupspace [-hniHpU] [-o field[,...]] "
+		    "[-sS field] ... [-t type[,...]]\n"
+		    "\t    <filesystem|snapshot>\n"));
 	}
 
 	abort();
@@ -311,7 +326,6 @@
 {
 	int i;
 	boolean_t show_properties = B_FALSE;
-	boolean_t show_permissions = B_FALSE;
 	FILE *fp = requested ? stdout : stderr;
 
 	if (current_command == NULL) {
@@ -342,13 +356,7 @@
 	    strcmp(current_command->name, "list") == 0))
 		show_properties = B_TRUE;
 
-	if (current_command != NULL &&
-	    (strcmp(current_command->name, "allow") == 0 ||
-	    strcmp(current_command->name, "unallow") == 0))
-		show_permissions = B_TRUE;
-
 	if (show_properties) {
-
 		(void) fprintf(fp,
 		    gettext("\nThe following properties are supported:\n"));
 
@@ -359,16 +367,26 @@
 		(void) zprop_iter(usage_prop_cb, fp, B_FALSE, B_TRUE,
 		    ZFS_TYPE_DATASET);
 
+		(void) fprintf(fp, "\t%-15s ", "userused@...");
+		(void) fprintf(fp, " NO       NO   <size>\n");
+		(void) fprintf(fp, "\t%-15s ", "groupused@...");
+		(void) fprintf(fp, " NO       NO   <size>\n");
+		(void) fprintf(fp, "\t%-15s ", "userquota@...");
+		(void) fprintf(fp, "YES       NO   <size> | none\n");
+		(void) fprintf(fp, "\t%-15s ", "groupquota@...");
+		(void) fprintf(fp, "YES       NO   <size> | none\n");
+
 		(void) fprintf(fp, gettext("\nSizes are specified in bytes "
 		    "with standard units such as K, M, G, etc.\n"));
 		(void) fprintf(fp, gettext("\nUser-defined properties can "
 		    "be specified by using a name containing a colon (:).\n"));
-
-	} else if (show_permissions) {
-		(void) fprintf(fp,
-		    gettext("\nThe following permissions are supported:\n"));
-
-		zfs_deleg_permissions();
+		(void) fprintf(fp, gettext("\nThe {user|group}{used|quota}@ "
+		    "properties must be appended with\n"
+		    "a user or group specifier of one of these forms:\n"
+		    "    POSIX name      (eg: \"matt\")\n"
+		    "    POSIX id        (eg: \"126829\")\n"
+		    "    SMB name@domain (eg: \"matt@sun\")\n"
+		    "    SMB SID         (eg: \"S-1-234-567-89\")\n"));
 	} else {
 		(void) fprintf(fp,
 		    gettext("\nFor the property list, run: %s\n"),
@@ -1084,6 +1102,17 @@
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    zfs_prop_to_name(pl->pl_prop),
 			    buf, sourcetype, source);
+		} else if (zfs_prop_userquota(pl->pl_user_prop)) {
+			sourcetype = ZPROP_SRC_LOCAL;
+
+			if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
+			    buf, sizeof (buf), cbp->cb_literal) != 0) {
+				sourcetype = ZPROP_SRC_NONE;
+				(void) strlcpy(buf, "-", sizeof (buf));
+			}
+
+			zprop_print_one_property(zfs_get_name(zhp), cbp,
+			    pl->pl_user_prop, buf, sourcetype, source);
 		} else {
 			if (nvlist_lookup_nvlist(userprop,
 			    pl->pl_user_prop, &propval) != 0) {
@@ -1460,21 +1489,30 @@
 {
 	upgrade_cbdata_t *cb = data;
 	int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
-
-	if (cb->cb_version >= ZPL_VERSION_FUID) {
-		int spa_version;
-
-		if (zfs_spa_version(zhp, &spa_version) < 0)
-			return (-1);
-
-		if (spa_version < SPA_VERSION_FUID) {
-			/* can't upgrade */
-			(void) printf(gettext("%s: can not be upgraded; "
-			    "the pool version needs to first be upgraded\nto "
-			    "version %d\n\n"),
-			    zfs_get_name(zhp), SPA_VERSION_FUID);
-			cb->cb_numfailed++;
-			return (0);
+	int i;
+	static struct { int zplver; int spaver; } table[] = {
+		{ZPL_VERSION_FUID, SPA_VERSION_FUID},
+		{ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
+		{0, 0}
+	};
+
+
+	for (i = 0; table[i].zplver; i++) {
+		if (cb->cb_version >= table[i].zplver) {
+			int spa_version;
+
+			if (zfs_spa_version(zhp, &spa_version) < 0)
+				return (-1);
+
+			if (spa_version < table[i].spaver) {
+				/* can't upgrade */
+				(void) printf(gettext("%s: can not be "
+				    "upgraded; the pool version needs to first "
+				    "be upgraded\nto version %d\n\n"),
+				    zfs_get_name(zhp), table[i].spaver);
+				cb->cb_numfailed++;
+				return (0);
+			}
 		}
 	}
 
@@ -1575,6 +1613,8 @@
 		(void) printf(gettext(" 2   Enhanced directory entries\n"));
 		(void) printf(gettext(" 3   Case insensitive and File system "
 		    "unique identifer (FUID)\n"));
+		(void) printf(gettext(" 4   userquota, groupquota "
+		    "properties\n"));
 		(void) printf(gettext("\nFor more information on a particular "
 		    "version, including supported releases, see:\n\n"));
 		(void) printf("http://www.opensolaris.org/os/community/zfs/"
@@ -1623,6 +1663,81 @@
 }
 
 /*
+ * zfs userspace
+ */
+static void
+userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)
+{
+	zfs_userquota_prop_t *typep = arg;
+	zfs_userquota_prop_t p = *typep;
+	char *name, *ug, *propname;
+	char namebuf[32];
+	char sizebuf[32];
+
+	if (domain == NULL || domain[0] == '\0') {
+		if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) {
+			struct group *g = getgrgid(rid);
+			if (g)
+				name = g->gr_name;
+		} else {
+			struct passwd *p = getpwuid(rid);
+			if (p)
+				name = p->pw_name;
+		}
+	}
+
+	if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA)
+		ug = "group";
+	else
+		ug = "user";
+
+	if (p == ZFS_PROP_USERUSED || p == ZFS_PROP_GROUPUSED)
+		propname = "used";
+	else
+		propname = "quota";
+
+	if (!name) {
+		(void) snprintf(namebuf, sizeof (namebuf),
+		    "%llu", (longlong_t)rid);
+		name = namebuf;
+	}
+	zfs_nicenum(space, sizebuf, sizeof (sizebuf));
+
+	(void) printf("%s %s %s%c%s %s\n", propname, ug, domain,
+	    domain[0] ? '-' : ' ', name, sizebuf);
+}
+
+static int
+zfs_do_userspace(int argc, char **argv)
+{
+	zfs_handle_t *zhp;
+	zfs_userquota_prop_t p;
+	int error;
+
+	/*
+	 * Try the python version.  If the execv fails, we'll continue
+	 * and do a simplistic implementation.
+	 */
+	(void) execv(pypath, argv-1);
+
+	(void) printf("internal error: %s not found\n"
+	    "falling back on built-in implementation, "
+	    "some features will not work\n", pypath);
+
+	if ((zhp = zfs_open(g_zfs, argv[argc-1], ZFS_TYPE_DATASET)) == NULL)
+		return (1);
+
+	(void) printf("PROP TYPE NAME VALUE\n");
+
+	for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) {
+		error = zfs_userspace(zhp, p, userspace_cb, &p);
+		if (error)
+			break;
+	}
+	return (error);
+}
+
+/*
  * list [-r][-d max] [-H] [-o property[,property]...] [-t type[,type]...]
  *      [-s property [-s property]...] [-S property [-S property]...]
  *      <dataset> ...
@@ -1711,7 +1826,6 @@
 			first = B_FALSE;
 		}
 
-		right_justify = B_FALSE;
 		if (pl->pl_prop != ZPROP_INVAL) {
 			if (zfs_prop_get(zhp, pl->pl_prop, property,
 			    sizeof (property), NULL, NULL, 0, B_FALSE) != 0)
@@ -1720,6 +1834,13 @@
 				propstr = property;
 
 			right_justify = zfs_prop_align_right(pl->pl_prop);
+		} else if (zfs_prop_userquota(pl->pl_user_prop)) {
+			if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
+			    property, sizeof (property), B_FALSE) != 0)
+				propstr = "-";
+			else
+				propstr = property;
+			right_justify = B_TRUE;
 		} else {
 			if (nvlist_lookup_nvlist(userprops,
 			    pl->pl_user_prop, &propval) != 0)
@@ -1727,6 +1848,7 @@
 			else
 				verify(nvlist_lookup_string(propval,
 				    ZPROP_VALUE, &propstr) == 0);
+			right_justify = B_FALSE;
 		}
 
 		width = pl->pl_width;
@@ -2525,390 +2647,6 @@
 	return (err != 0);
 }
 
-typedef struct allow_cb {
-	int  a_permcnt;
-	size_t a_treeoffset;
-} allow_cb_t;
-
-static void
-zfs_print_perms(avl_tree_t *tree)
-{
-	zfs_perm_node_t *permnode;
-
-	permnode = avl_first(tree);
-	while (permnode != NULL) {
-		(void) printf("%s", permnode->z_pname);
-		permnode = AVL_NEXT(tree, permnode);
-		if (permnode)
-			(void) printf(",");
-		else
-			(void) printf("\n");
-	}
-}
-
-/*
- * Iterate over user/groups/everyone/... and the call perm_iter
- * function to print actual permission when tree has >0 nodes.
- */
-static void
-zfs_iter_perms(avl_tree_t *tree, const char *banner, allow_cb_t *cb)
-{
-	zfs_allow_node_t *item;
-	avl_tree_t *ptree;
-
-	item = avl_first(tree);
-	while (item) {
-		ptree = (void *)((char *)item + cb->a_treeoffset);
-		if (avl_numnodes(ptree)) {
-			if (cb->a_permcnt++ == 0)
-				(void) printf("%s\n", banner);
-			(void) printf("\t%s", item->z_key);
-			/*
-			 * Avoid an extra space being printed
-			 * for "everyone" which is keyed with a null
-			 * string
-			 */
-			if (item->z_key[0] != '\0')
-				(void) printf(" ");
-			zfs_print_perms(ptree);
-		}
-		item = AVL_NEXT(tree, item);
-	}
-}
-
-#define	LINES "-------------------------------------------------------------\n"
-static int
-zfs_print_allows(char *ds)
-{
-	zfs_allow_t *curperms, *perms;
-	zfs_handle_t *zhp;
-	allow_cb_t allowcb = { 0 };
-	char banner[MAXPATHLEN];
-
-	if (ds[0] == '-')
-		usage(B_FALSE);
-
-	if (strrchr(ds, '@')) {
-		(void) fprintf(stderr, gettext("Snapshots don't have 'allow'"
-		    " permissions\n"));
-		return (1);
-	}
-	if ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_DATASET)) == NULL)
-		return (1);
-
-	if (zfs_perm_get(zhp, &perms)) {
-		(void) fprintf(stderr,
-		    gettext("Failed to retrieve 'allows' on %s\n"), ds);
-		zfs_close(zhp);
-		return (1);
-	}
-
-	zfs_close(zhp);
-
-	if (perms != NULL)
-		(void) printf("%s", LINES);
-	for (curperms = perms; curperms; curperms = curperms->z_next) {
-
-		(void) snprintf(banner, sizeof (banner),
-		    gettext("Permission sets on (%s)"), curperms->z_setpoint);
-		allowcb.a_treeoffset =
-		    offsetof(zfs_allow_node_t, z_localdescend);
-		allowcb.a_permcnt = 0;
-		zfs_iter_perms(&curperms->z_sets, banner, &allowcb);
-
-		(void) snprintf(banner, sizeof (banner),
-		    gettext("Create time permissions on (%s)"),
-		    curperms->z_setpoint);
-		allowcb.a_treeoffset =
-		    offsetof(zfs_allow_node_t, z_localdescend);
-		allowcb.a_permcnt = 0;
-		zfs_iter_perms(&curperms->z_crperms, banner, &allowcb);
-
-
-		(void) snprintf(banner, sizeof (banner),
-		    gettext("Local permissions on (%s)"), curperms->z_setpoint);
-		allowcb.a_treeoffset = offsetof(zfs_allow_node_t, z_local);
-		allowcb.a_permcnt = 0;
-		zfs_iter_perms(&curperms->z_user, banner, &allowcb);
-		zfs_iter_perms(&curperms->z_group, banner, &allowcb);
-		zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
-
-		(void) snprintf(banner, sizeof (banner),
-		    gettext("Descendent permissions on (%s)"),
-		    curperms->z_setpoint);
-		allowcb.a_treeoffset = offsetof(zfs_allow_node_t, z_descend);
-		allowcb.a_permcnt = 0;
-		zfs_iter_perms(&curperms->z_user, banner, &allowcb);
-		zfs_iter_perms(&curperms->z_group, banner, &allowcb);
-		zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
-
-		(void) snprintf(banner, sizeof (banner),
-		    gettext("Local+Descendent permissions on (%s)"),
-		    curperms->z_setpoint);
-		allowcb.a_treeoffset =
-		    offsetof(zfs_allow_node_t, z_localdescend);
-		allowcb.a_permcnt = 0;
-		zfs_iter_perms(&curperms->z_user, banner, &allowcb);
-		zfs_iter_perms(&curperms->z_group, banner, &allowcb);
-		zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
-
-		(void) printf("%s", LINES);
-	}
-	zfs_free_allows(perms);
-	return (0);
-}
-
-#define	ALLOWOPTIONS "ldcsu:g:e"
-#define	UNALLOWOPTIONS "ldcsu:g:er"
-
-/*
- * Validate options, and build necessary datastructure to display/remove/add
- * permissions.
- * Returns 0 - If permissions should be added/removed
- * Returns 1 - If permissions should be displayed.
- * Returns -1 - on failure
- */
-int
-parse_allow_args(int *argc, char **argv[], boolean_t unallow,
-    char **ds, int *recurse, nvlist_t **zperms)
-{
-	int c;
-	char *options = unallow ? UNALLOWOPTIONS : ALLOWOPTIONS;
-	zfs_deleg_inherit_t deleg_type = ZFS_DELEG_NONE;
-	zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN;
-	char *who = NULL;
-	char *perms = NULL;
-	zfs_handle_t *zhp;
-
-	while ((c = getopt(*argc, *argv, options)) != -1) {
-		switch (c) {
-		case 'l':
-			if (who_type == ZFS_DELEG_CREATE ||
-			    who_type == ZFS_DELEG_NAMED_SET)
-				usage(B_FALSE);
-
-			deleg_type |= ZFS_DELEG_PERM_LOCAL;
-			break;
-		case 'd':
-			if (who_type == ZFS_DELEG_CREATE ||
-			    who_type == ZFS_DELEG_NAMED_SET)
-				usage(B_FALSE);
-
-			deleg_type |= ZFS_DELEG_PERM_DESCENDENT;
-			break;
-		case 'r':
-			*recurse = B_TRUE;
-			break;
-		case 'c':
-			if (who_type != ZFS_DELEG_WHO_UNKNOWN)
-				usage(B_FALSE);
-			if (deleg_type)
-				usage(B_FALSE);
-			who_type = ZFS_DELEG_CREATE;
-			break;
-		case 's':
-			if (who_type != ZFS_DELEG_WHO_UNKNOWN)
-				usage(B_FALSE);
-			if (deleg_type)
-				usage(B_FALSE);
-			who_type = ZFS_DELEG_NAMED_SET;
-			break;
-		case 'u':
-			if (who_type != ZFS_DELEG_WHO_UNKNOWN)
-				usage(B_FALSE);
-			who_type = ZFS_DELEG_USER;
-			who = optarg;
-			break;
-		case 'g':
-			if (who_type != ZFS_DELEG_WHO_UNKNOWN)
-				usage(B_FALSE);
-			who_type = ZFS_DELEG_GROUP;
-			who = optarg;
-			break;
-		case 'e':
-			if (who_type != ZFS_DELEG_WHO_UNKNOWN)
-				usage(B_FALSE);
-			who_type = ZFS_DELEG_EVERYONE;
-			break;
-		default:
-			usage(B_FALSE);
-			break;
-		}
-	}
-
-	if (deleg_type == 0)
-		deleg_type = ZFS_DELEG_PERM_LOCALDESCENDENT;
-
-	*argc -= optind;
-	*argv += optind;
-
-	if (unallow == B_FALSE && *argc == 1) {
-		/*
-		 * Only print permissions if no options were processed
-		 */
-		if (optind == 1)
-			return (1);
-		else
-			usage(B_FALSE);
-	}
-
-	/*
-	 * initialize variables for zfs_build_perms based on number
-	 * of arguments.
-	 * 3 arguments ==>	zfs [un]allow joe perm,perm,perm <dataset> or
-	 *			zfs [un]allow -s @set1 perm,perm <dataset>
-	 * 2 arguments ==>	zfs [un]allow -c perm,perm <dataset> or
-	 *			zfs [un]allow -u|-g <name> perm <dataset> or
-	 *			zfs [un]allow -e perm,perm <dataset>
-	 *			zfs unallow joe <dataset>
-	 *			zfs unallow -s @set1 <dataset>
-	 * 1 argument  ==>	zfs [un]allow -e <dataset> or
-	 *			zfs [un]allow -c <dataset>
-	 */
-
-	switch (*argc) {
-	case 3:
-		perms = (*argv)[1];
-		who = (*argv)[0];
-		*ds = (*argv)[2];
-
-		/*
-		 * advance argc/argv for do_allow cases.
-		 * for do_allow case make sure who have a know who type
-		 * and its not a permission set.
-		 */
-		if (unallow == B_TRUE) {
-			*argc -= 2;
-			*argv += 2;
-		} else if (who_type != ZFS_DELEG_WHO_UNKNOWN &&
-		    who_type != ZFS_DELEG_NAMED_SET)
-			usage(B_FALSE);
-		break;
-
-	case 2:
-		if (unallow == B_TRUE && (who_type == ZFS_DELEG_EVERYONE ||
-		    who_type == ZFS_DELEG_CREATE || who != NULL)) {
-			perms = (*argv)[0];
-			*ds = (*argv)[1];
-		} else {
-			if (unallow == B_FALSE &&
-			    (who_type == ZFS_DELEG_WHO_UNKNOWN ||
-			    who_type == ZFS_DELEG_NAMED_SET))
-				usage(B_FALSE);
-			else if (who_type == ZFS_DELEG_WHO_UNKNOWN ||
-			    who_type == ZFS_DELEG_NAMED_SET)
-				who = (*argv)[0];
-			else if (who_type != ZFS_DELEG_NAMED_SET)
-				perms = (*argv)[0];
-			*ds = (*argv)[1];
-		}
-		if (unallow == B_TRUE) {
-			(*argc)--;
-			(*argv)++;
-		}
-		break;
-
-	case 1:
-		if (unallow == B_FALSE)
-			usage(B_FALSE);
-		if (who == NULL && who_type != ZFS_DELEG_CREATE &&
-		    who_type != ZFS_DELEG_EVERYONE)
-			usage(B_FALSE);
-		*ds = (*argv)[0];
-		break;
-
-	default:
-		usage(B_FALSE);
-	}
-
-	if (strrchr(*ds, '@')) {
-		(void) fprintf(stderr,
-		    gettext("Can't set or remove 'allow' permissions "
-		    "on snapshots.\n"));
-			return (-1);
-	}
-
-	if ((zhp = zfs_open(g_zfs, *ds, ZFS_TYPE_DATASET)) == NULL)
-		return (-1);
-
-	if ((zfs_build_perms(zhp, who, perms,
-	    who_type, deleg_type, zperms)) != 0) {
-		zfs_close(zhp);
-		return (-1);
-	}
-	zfs_close(zhp);
-	return (0);
-}
-
-static int
-zfs_do_allow(int argc, char **argv)
-{
-	char *ds;
-	nvlist_t *zperms = NULL;
-	zfs_handle_t *zhp;
-	int unused;
-	int ret;
-
-	if ((ret = parse_allow_args(&argc, &argv, B_FALSE, &ds,
-	    &unused, &zperms)) == -1)
-		return (1);
-
-	if (ret == 1)
-		return (zfs_print_allows(argv[0]));
-
-	if ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_DATASET)) == NULL)
-		return (1);
-
-	if (zfs_perm_set(zhp, zperms)) {
-		zfs_close(zhp);
-		nvlist_free(zperms);
-		return (1);
-	}
-	nvlist_free(zperms);
-	zfs_close(zhp);
-
-	return (0);
-}
-
-static int
-unallow_callback(zfs_handle_t *zhp, void *data)
-{
-	nvlist_t *nvp = (nvlist_t *)data;
-	int error;
-
-	error = zfs_perm_remove(zhp, nvp);
-	if (error) {
-		(void) fprintf(stderr, gettext("Failed to remove permissions "
-		    "on %s\n"), zfs_get_name(zhp));
-	}
-	return (error);
-}
-
-static int
-zfs_do_unallow(int argc, char **argv)
-{
-	int recurse = B_FALSE;
-	char *ds;
-	int error;
-	nvlist_t *zperms = NULL;
-	int flags = 0;
-
-	if (parse_allow_args(&argc, &argv, B_TRUE,
-	    &ds, &recurse, &zperms) == -1)
-		return (1);
-
-	if (recurse)
-		flags |= ZFS_ITER_RECURSE;
-	error = zfs_for_each(argc, argv, flags,
-	    ZFS_TYPE_FILESYSTEM|ZFS_TYPE_VOLUME, NULL,
-	    NULL, 0, unallow_callback, (void *)zperms);
-
-	if (zperms)
-		nvlist_free(zperms);
-
-	return (error);
-}
-
 typedef struct get_all_cbdata {
 	zfs_handle_t	**cb_handles;
 	size_t		cb_alloc;
@@ -3974,6 +3712,15 @@
 	return (unshare_unmount(OP_SHARE, argc, argv));
 }
 
+/* ARGSUSED */
+static int
+zfs_do_python(int argc, char **argv)
+{
+	(void) execv(pypath, argv-1);
+	(void) printf("internal error: %s not found\n", pypath);
+	return (-1);
+}
+
 /*
  * Called when invoked as /etc/fs/zfs/mount.  Do the mount if the mountpoint is
  * 'legacy'.  Otherwise, complain that use should be using 'zfs mount'.
--- a/usr/src/cmd/zpool/zpool_main.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/cmd/zpool/zpool_main.c	Sat Apr 18 13:41:47 2009 -0700
@@ -376,12 +376,11 @@
 		}
 		normnm = zpool_prop_to_name(prop);
 	} else {
-		if ((fprop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
-			(void) fprintf(stderr, gettext("property '%s' is "
-			    "not a valid file system property\n"), propname);
-			return (2);
+		if ((fprop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
+			normnm = zfs_prop_to_name(fprop);
+		} else {
+			normnm = propname;
 		}
-		normnm = zfs_prop_to_name(fprop);
 	}
 
 	if (nvlist_lookup_string(proplist, normnm, &strval) == 0 &&
@@ -3527,8 +3526,8 @@
 		(void) printf(gettext(" 11  Improved scrub performance\n"));
 		(void) printf(gettext(" 12  Snapshot properties\n"));
 		(void) printf(gettext(" 13  snapused property\n"));
-		(void) printf(gettext(" 14  passthrough-x aclinherit "
-		    "support\n"));
+		(void) printf(gettext(" 14  passthrough-x aclinherit\n"));
+		(void) printf(gettext(" 15  user/group space accounting\n"));
 		(void) printf(gettext("For more information on a particular "
 		    "version, including supported releases, see:\n\n"));
 		(void) printf("http://www.opensolaris.org/os/community/zfs/"
--- a/usr/src/common/zfs/zfs_deleg.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/common/zfs/zfs_deleg.c	Sat Apr 18 13:41:47 2009 -0700
@@ -19,13 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #if defined(_KERNEL)
 #include <sys/systm.h>
 #include <sys/sunddi.h>
@@ -66,6 +63,10 @@
 	{ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
 	{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE },
 	{ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
+	{ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
+	{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
+	{ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
+	{ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
 	{NULL, ZFS_DELEG_NOTE_NONE }
 };
 
--- a/usr/src/common/zfs/zfs_deleg.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/common/zfs/zfs_deleg.h	Sat Apr 18 13:41:47 2009 -0700
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_ZFS_DELEG_H
 #define	_ZFS_DELEG_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/fs/zfs.h>
 
 #ifdef	__cplusplus
@@ -59,6 +57,10 @@
 	ZFS_DELEG_NOTE_USERPROP,
 	ZFS_DELEG_NOTE_MOUNT,
 	ZFS_DELEG_NOTE_SHARE,
+	ZFS_DELEG_NOTE_USERQUOTA,
+	ZFS_DELEG_NOTE_GROUPQUOTA,
+	ZFS_DELEG_NOTE_USERUSED,
+	ZFS_DELEG_NOTE_GROUPUSED,
 	ZFS_DELEG_NOTE_NONE
 } zfs_deleg_note_t;
 
--- a/usr/src/common/zfs/zfs_namecheck.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/common/zfs/zfs_namecheck.c	Sat Apr 18 13:41:47 2009 -0700
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * Common name validation routines for ZFS.  These routines are shared by the
  * userland code as well as the ioctl() layer to ensure that we don't
@@ -345,19 +343,3 @@
 
 	return (0);
 }
-
-/*
- * Check if the dataset name is private for internal usage.
- * '$' is reserved for internal dataset names. e.g. "$MOS"
- *
- * Return 1 if the given name is used internally.
- * Return 0 if it is not.
- */
-int
-dataset_name_hidden(const char *name)
-{
-	if (strchr(name, '$') != NULL)
-		return (1);
-
-	return (0);
-}
--- a/usr/src/common/zfs/zfs_namecheck.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/common/zfs/zfs_namecheck.h	Sat Apr 18 13:41:47 2009 -0700
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_ZFS_NAMECHECK_H
 #define	_ZFS_NAMECHECK_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -50,7 +48,6 @@
 int pool_namecheck(const char *, namecheck_err_t *, char *);
 int dataset_namecheck(const char *, namecheck_err_t *, char *);
 int mountpoint_namecheck(const char *, namecheck_err_t *);
-int dataset_name_hidden(const char *);
 int snapshot_namecheck(const char *, namecheck_err_t *, char *);
 int permset_namecheck(const char *, namecheck_err_t *, char *);
 
--- a/usr/src/common/zfs/zfs_prop.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/common/zfs/zfs_prop.c	Sat Apr 18 13:41:47 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -43,6 +43,14 @@
 
 static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS];
 
+/* Note this is indexed by zfs_userquota_prop_t, keep the order the same */
+const char *zfs_userquota_prop_prefixes[] = {
+	"userused@",
+	"userquota@",
+	"groupused@",
+	"groupquota@"
+};
+
 zprop_desc_t *
 zfs_prop_get_table(void)
 {
@@ -133,6 +141,7 @@
 		{ "1",		1 },
 		{ "2",		2 },
 		{ "3",		3 },
+		{ "4",		4 },
 		{ "current",	ZPL_VERSION },
 		{ NULL }
 	};
@@ -218,7 +227,7 @@
 	/* default index properties */
 	register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
-	    "1 | 2 | 3 | current", "VERSION", version_table);
+	    "1 | 2 | 3 | 4 | current", "VERSION", version_table);
 	register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
 	    PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
 	    "CANMOUNT", canmount_table);
@@ -307,6 +316,8 @@
 	    PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
 	register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER, PROP_READONLY,
 	    ZFS_TYPE_DATASET, "GUID");
+	register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
+	    PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, NULL);
 
 	/* oddball properties */
 	register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, NULL,
@@ -330,7 +341,6 @@
 	return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET));
 }
 
-
 /*
  * For user property names, we allow all lowercase alphanumeric characters, plus
  * a few useful punctuation characters.
@@ -368,6 +378,26 @@
 }
 
 /*
+ * Returns true if this is a valid userspace-type property (one with a '@').
+ * Note that after the @, any character is valid (eg, another @, for SID
+ * user@domain).
+ */
+boolean_t
+zfs_prop_userquota(const char *name)
+{
+	zfs_userquota_prop_t prop;
+
+	for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) {
+		if (strncmp(name, zfs_userquota_prop_prefixes[prop],
+		    strlen(zfs_userquota_prop_prefixes[prop])) == 0) {
+			return (B_TRUE);
+		}
+	}
+
+	return (B_FALSE);
+}
+
+/*
  * Tables of index types, plus functions to convert between the user view
  * (strings) and internal representation (uint64_t).
  */
--- a/usr/src/common/zfs/zprop_common.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/common/zfs/zprop_common.c	Sat Apr 18 13:41:47 2009 -0700
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * Common routines used by zfs and zpool property management.
  */
@@ -205,9 +203,6 @@
 #ifndef _KERNEL
 	const char *colname = prop_entry->pd_colname;
 	int c;
-
-	if (colname == NULL)
-		return (B_FALSE);
 #endif
 
 	if (len == strlen(propname) &&
@@ -215,7 +210,7 @@
 		return (B_TRUE);
 
 #ifndef _KERNEL
-	if (len != strlen(colname))
+	if (colname == NULL || len != strlen(colname))
 		return (B_FALSE);
 
 	for (c = 0; c < len; c++)
--- a/usr/src/grub/capability	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/grub/capability	Sat Apr 18 13:41:47 2009 -0700
@@ -18,7 +18,7 @@
 #
 # CDDL HEADER END
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #
@@ -40,7 +40,7 @@
 # This file and the associated version are Solaris specific and are
 # not a part of the open source distribution of GRUB.
 #
-VERSION=7
+VERSION=8
 dboot
 xVM
 zfs
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/dmu_objset.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/dmu_objset.h	Sat Apr 18 13:41:47 2009 -0700
@@ -17,21 +17,22 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_DMU_OBJSET_H
 #define	_SYS_DMU_OBJSET_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 typedef struct objset_phys {
 	dnode_phys_t os_meta_dnode;
 	zil_header_t os_zil_header;
 	uint64_t os_type;
-	char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
-	    sizeof (uint64_t)];
+	uint64_t os_flags;
+	char os_pad[2048 - sizeof (dnode_phys_t)*3 -
+	    sizeof (zil_header_t) - sizeof (uint64_t)*2];
+	dnode_phys_t os_userused_dnode;
+	dnode_phys_t os_groupused_dnode;
 } objset_phys_t;
 
 #endif /* _SYS_DMU_OBJSET_H */
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h	Sat Apr 18 13:41:47 2009 -0700
@@ -17,7 +17,7 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -27,7 +27,7 @@
 /*
  * On-disk version number.
  */
-#define	SPA_VERSION			14ULL
+#define	SPA_VERSION			15ULL
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs_znode.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs_znode.h	Sat Apr 18 13:41:47 2009 -0700
@@ -17,20 +17,18 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_FS_ZFS_ZNODE_H
 #define	_SYS_FS_ZFS_ZNODE_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #define	MASTER_NODE_OBJ	1
 #define	ZFS_ROOT_OBJ		"ROOT"
 #define	ZPL_VERSION_STR		"VERSION"
 
-#define	ZPL_VERSION		3ULL
+#define	ZPL_VERSION		4ULL
 
 #define	ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
 
--- a/usr/src/lib/Makefile	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/lib/Makefile	Sat Apr 18 13:41:47 2009 -0700
@@ -238,6 +238,7 @@
 	libzpool	\
 	libzfs		\
 	libzfs_jni	\
+	pyzfs		\
 	libmapid	\
 	brand		\
 	policykit	\
@@ -346,6 +347,7 @@
 	mms		\
 	mpss		\
 	pam_modules	\
+	pyzfs		\
 	rpcsec_gss
 $(CLOSED_BUILD)MSGSUBDIRS += \
 	$(CLOSED)/lib/smartcard
@@ -601,7 +603,8 @@
 pkcs11:		libcryptoutil
 print:		libldap5
 udapl/udapl_tavor:	udapl/libdat
-libzfs:		libdevinfo libdevid libgen libnvpair libuutil libiscsitgt
+libzfs:		libdevinfo libdevid libgen libnvpair libuutil libiscsitgt \
+		libavl libefi libidmap libsec
 libzfs_jni:	libdiskmgt libnvpair libzfs
 libzpool:	libavl libumem libnvpair
 libsec:		libavl libidmap
@@ -615,6 +618,7 @@
 scsi:		libnvpair
 mpapi:		libpthread libdevinfo libsysevent libnvpair
 libgrubmgmt:	libdevinfo libzfs libfstyp
+pyzfs:		libnvpair libsec libidmap libzfs
 
 #
 # The reason this rule checks for the existence of the
--- a/usr/src/lib/libzfs/Makefile.com	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/lib/libzfs/Makefile.com	Sat Apr 18 13:41:47 2009 -0700
@@ -19,11 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 
 LIBRARY= libzfs.a
 VERS= .1
@@ -50,7 +48,8 @@
 
 C99MODE=	-xc99=%all
 C99LMODE=	-Xc99=%all
-LDLIBS +=	-lc -lm -ldevinfo -ldevid -lgen -lnvpair -luutil -lavl -lefi
+LDLIBS +=	-lc -lm -ldevinfo -ldevid -lgen -lnvpair -luutil -lavl -lefi \
+	-lidmap -lsec
 CPPFLAGS +=	$(INCS) -D_REENTRANT
 
 SRCS=	$(OBJS_COMMON:%.o=$(SRCDIR)/%.c)	\
--- a/usr/src/lib/libzfs/common/libzfs.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs.h	Sat Apr 18 13:41:47 2009 -0700
@@ -370,6 +370,8 @@
     zprop_source_t *, char *, size_t, boolean_t);
 extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
     zprop_source_t *, char *, size_t);
+extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
+    char *propbuf, int proplen, boolean_t literal);
 extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
 extern int zfs_prop_inherit(zfs_handle_t *, const char *);
 extern const char *zfs_prop_values(zfs_prop_t);
@@ -457,6 +459,12 @@
     boolean_t, boolean_t, boolean_t, boolean_t, int);
 extern int zfs_promote(zfs_handle_t *);
 
+typedef void (*zfs_userspace_cb_t)(void *arg, const char *domain,
+    uid_t rid, uint64_t space);
+
+extern int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
+    zfs_userspace_cb_t func, void *arg);
+
 typedef struct recvflags {
 	/* print informational messages (ie, -v was specified) */
 	int verbose : 1;
@@ -495,17 +503,6 @@
 extern int zfs_spa_version(zfs_handle_t *, int *);
 
 /*
- * dataset permission functions.
- */
-extern int zfs_perm_set(zfs_handle_t *, nvlist_t *);
-extern int zfs_perm_remove(zfs_handle_t *, nvlist_t *);
-extern int zfs_build_perms(zfs_handle_t *, char *, char *,
-    zfs_deleg_who_type_t, zfs_deleg_inherit_t, nvlist_t **nvlist_t);
-extern int zfs_perm_get(zfs_handle_t *, zfs_allow_t **);
-extern void zfs_free_allows(zfs_allow_t *);
-extern void zfs_deleg_permissions(void);
-
-/*
  * Mount support functions.
  */
 extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
--- a/usr/src/lib/libzfs/common/libzfs_changelist.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_changelist.c	Sat Apr 18 13:41:47 2009 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  *
  * Portions Copyright 2007 Ramprakash Jelari
@@ -621,8 +621,6 @@
 		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
 	} else if (prop == ZFS_PROP_VOLSIZE) {
 		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
-	} else if (prop == ZFS_PROP_VERSION) {
-		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
 	} else {
 		clp->cl_prop = prop;
 	}
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c	Sat Apr 18 13:41:47 2009 -0700
@@ -45,6 +45,8 @@
 #include <grp.h>
 #include <stddef.h>
 #include <ucred.h>
+#include <idmap.h>
+#include <aclutils.h>
 
 #include <sys/spa.h>
 #include <sys/zap.h>
@@ -56,6 +58,8 @@
 #include "zfs_deleg.h"
 
 static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
+static int userquota_propname_decode(const char *propname, boolean_t zoned,
+    zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp);
 
 /*
  * Given a single type (not a mask of types), return the type in a human
@@ -121,8 +125,8 @@
 
 /*
  * Validate a ZFS path.  This is used even before trying to open the dataset, to
- * provide a more meaningful error message.  We place a more useful message in
- * 'buf' detailing exactly why the name was not valid.
+ * provide a more meaningful error message.  We call zfs_error_aux() to
+ * explain exactly why the name was not valid.
  */
 static int
 zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
@@ -346,6 +350,10 @@
 		return (-1);
 	}
 
+	/*
+	 * XXX Why do we store the user props separately, in addition to
+	 * storing them in zfs_props?
+	 */
 	if ((userprops = process_user_props(zhp, allprops)) == NULL) {
 		nvlist_free(allprops);
 		return (-1);
@@ -772,23 +780,18 @@
 		return (NULL);
 	}
 
+	/*
+	 * Make sure this property is valid and applies to this type.
+	 */
+
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
 		const char *propname = nvpair_name(elem);
 
-		/*
-		 * Make sure this property is valid and applies to this type.
-		 */
-		if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
-			if (!zfs_prop_user(propname)) {
-				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-				    "invalid property '%s'"), propname);
-				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
-				goto error;
-			}
-
+		prop = zfs_name_to_prop(propname);
+		if (prop == ZPROP_INVAL && zfs_prop_user(propname)) {
 			/*
-			 * If this is a user property, make sure it's a
+			 * This is a user property: make sure it's a
 			 * string, and that it's less than ZAP_MAXNAMELEN.
 			 */
 			if (nvpair_type(elem) != DATA_TYPE_STRING) {
@@ -814,6 +817,10 @@
 			continue;
 		}
 
+		/*
+		 * Currently, only user properties can be modified on
+		 * snapshots.
+		 */
 		if (type == ZFS_TYPE_SNAPSHOT) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "this property can not be modified for snapshots"));
@@ -821,6 +828,80 @@
 			goto error;
 		}
 
+		if (prop == ZPROP_INVAL && zfs_prop_userquota(propname)) {
+			zfs_userquota_prop_t uqtype;
+			char newpropname[128];
+			char domain[128];
+			uint64_t rid;
+			uint64_t valary[3];
+
+			if (userquota_propname_decode(propname, zoned,
+			    &uqtype, domain, sizeof (domain), &rid) != 0) {
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN,
+				    "'%s' has an invalid user/group name"),
+				    propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			if (uqtype != ZFS_PROP_USERQUOTA &&
+			    uqtype != ZFS_PROP_GROUPQUOTA) {
+				zfs_error_aux(hdl,
+				    dgettext(TEXT_DOMAIN, "'%s' is readonly"),
+				    propname);
+				(void) zfs_error(hdl, EZFS_PROPREADONLY,
+				    errbuf);
+				goto error;
+			}
+
+			if (nvpair_type(elem) == DATA_TYPE_STRING) {
+				(void) nvpair_value_string(elem, &strval);
+				if (strcmp(strval, "none") == 0) {
+					intval = 0;
+				} else if (zfs_nicestrtonum(hdl,
+				    strval, &intval) != 0) {
+					(void) zfs_error(hdl,
+					    EZFS_BADPROP, errbuf);
+					goto error;
+				}
+			} else if (nvpair_type(elem) ==
+			    DATA_TYPE_UINT64) {
+				(void) nvpair_value_uint64(elem, &intval);
+				if (intval == 0) {
+					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+					    "use 'none' to disable "
+					    "userquota/groupquota"));
+					goto error;
+				}
+			} else {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "'%s' must be a number"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+
+			(void) snprintf(newpropname, sizeof (newpropname),
+			    "%s%s", zfs_userquota_prop_prefixes[uqtype],
+			    domain);
+			valary[0] = uqtype;
+			valary[1] = rid;
+			valary[2] = intval;
+			if (nvlist_add_uint64_array(ret, newpropname,
+			    valary, 3) != 0) {
+				(void) no_memory(hdl);
+				goto error;
+			}
+			continue;
+		}
+
+		if (prop == ZPROP_INVAL) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid property '%s'"), propname);
+			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+			goto error;
+		}
+
 		if (!zfs_prop_valid_for_type(prop, type)) {
 			zfs_error_aux(hdl,
 			    dgettext(TEXT_DOMAIN, "'%s' does not "
@@ -960,7 +1041,7 @@
 			} else if (getzoneid() != GLOBAL_ZONEID) {
 				/*
 				 * If zoned property is 'off', this must be in
-				 * a globle zone. If not, something is wrong.
+				 * a global zone. If not, something is wrong.
 				 */
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' cannot be set while dataset "
@@ -1144,808 +1225,6 @@
 	return (NULL);
 }
 
-static int
-zfs_get_perm_who(const char *who, zfs_deleg_who_type_t *who_type,
-    uint64_t *ret_who)
-{
-	struct passwd *pwd;
-	struct group *grp;
-	uid_t id;
-
-	if (*who_type == ZFS_DELEG_EVERYONE || *who_type == ZFS_DELEG_CREATE ||
-	    *who_type == ZFS_DELEG_NAMED_SET) {
-		*ret_who = -1;
-		return (0);
-	}
-	if (who == NULL && !(*who_type == ZFS_DELEG_EVERYONE))
-		return (EZFS_BADWHO);
-
-	if (*who_type == ZFS_DELEG_WHO_UNKNOWN &&
-	    strcmp(who, "everyone") == 0) {
-		*ret_who = -1;
-		*who_type = ZFS_DELEG_EVERYONE;
-		return (0);
-	}
-
-	pwd = getpwnam(who);
-	grp = getgrnam(who);
-
-	if ((*who_type == ZFS_DELEG_USER) && pwd) {
-		*ret_who = pwd->pw_uid;
-	} else if ((*who_type == ZFS_DELEG_GROUP) && grp) {
-		*ret_who = grp->gr_gid;
-	} else if (pwd) {
-		*ret_who = pwd->pw_uid;
-		*who_type = ZFS_DELEG_USER;
-	} else if (grp) {
-		*ret_who = grp->gr_gid;
-		*who_type = ZFS_DELEG_GROUP;
-	} else {
-		char *end;
-
-		id = strtol(who, &end, 10);
-		if (errno != 0 || *end != '\0') {
-			return (EZFS_BADWHO);
-		} else {
-			*ret_who = id;
-			if (*who_type == ZFS_DELEG_WHO_UNKNOWN)
-				*who_type = ZFS_DELEG_USER;
-		}
-	}
-
-	return (0);
-}
-
-static void
-zfs_perms_add_to_nvlist(nvlist_t *who_nvp, char *name, nvlist_t *perms_nvp)
-{
-	if (perms_nvp != NULL) {
-		verify(nvlist_add_nvlist(who_nvp,
-		    name, perms_nvp) == 0);
-	} else {
-		verify(nvlist_add_boolean(who_nvp, name) == 0);
-	}
-}
-
-static void
-helper(zfs_deleg_who_type_t who_type, uint64_t whoid, char *whostr,
-    zfs_deleg_inherit_t inherit, nvlist_t *who_nvp, nvlist_t *perms_nvp,
-    nvlist_t *sets_nvp)
-{
-	boolean_t do_perms, do_sets;
-	char name[ZFS_MAX_DELEG_NAME];
-
-	do_perms = (nvlist_next_nvpair(perms_nvp, NULL) != NULL);
-	do_sets = (nvlist_next_nvpair(sets_nvp, NULL) != NULL);
-
-	if (!do_perms && !do_sets)
-		do_perms = do_sets = B_TRUE;
-
-	if (do_perms) {
-		zfs_deleg_whokey(name, who_type, inherit,
-		    (who_type == ZFS_DELEG_NAMED_SET) ?
-		    whostr : (void *)&whoid);
-		zfs_perms_add_to_nvlist(who_nvp, name, perms_nvp);
-	}
-	if (do_sets) {
-		zfs_deleg_whokey(name, toupper(who_type), inherit,
-		    (who_type == ZFS_DELEG_NAMED_SET) ?
-		    whostr : (void *)&whoid);
-		zfs_perms_add_to_nvlist(who_nvp, name, sets_nvp);
-	}
-}
-
-static void
-zfs_perms_add_who_nvlist(nvlist_t *who_nvp, uint64_t whoid, void *whostr,
-    nvlist_t *perms_nvp, nvlist_t *sets_nvp,
-    zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit)
-{
-	if (who_type == ZFS_DELEG_NAMED_SET || who_type == ZFS_DELEG_CREATE) {
-		helper(who_type, whoid, whostr, 0,
-		    who_nvp, perms_nvp, sets_nvp);
-	} else {
-		if (inherit & ZFS_DELEG_PERM_LOCAL) {
-			helper(who_type, whoid, whostr, ZFS_DELEG_LOCAL,
-			    who_nvp, perms_nvp, sets_nvp);
-		}
-		if (inherit & ZFS_DELEG_PERM_DESCENDENT) {
-			helper(who_type, whoid, whostr, ZFS_DELEG_DESCENDENT,
-			    who_nvp, perms_nvp, sets_nvp);
-		}
-	}
-}
-
-/*
- * Construct nvlist to pass down to kernel for setting/removing permissions.
- *
- * The nvlist is constructed as a series of nvpairs with an optional embedded
- * nvlist of permissions to remove or set.  The topmost nvpairs are the actual
- * base attribute named stored in the dsl.
- * Arguments:
- *
- * whostr:   is a comma separated list of users, groups, or a single set name.
- *           whostr may be null for everyone or create perms.
- * who_type: is the type of entry in whostr.  Typically this will be
- *           ZFS_DELEG_WHO_UNKNOWN.
- * perms:    common separated list of permissions.  May be null if user
- *           is requested to remove permissions by who.
- * inherit:  Specifies the inheritance of the permissions.  Will be either
- *           ZFS_DELEG_PERM_LOCAL and/or  ZFS_DELEG_PERM_DESCENDENT.
- * nvp       The constructed nvlist to pass to zfs_perm_set().
- *           The output nvp will look something like this.
- *              ul$1234 -> {create ; destroy }
- *              Ul$1234 -> { @myset }
- *              s-$@myset - { snapshot; checksum; compression }
- */
-int
-zfs_build_perms(zfs_handle_t *zhp, char *whostr, char *perms,
-    zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit, nvlist_t **nvp)
-{
-	nvlist_t *who_nvp;
-	nvlist_t *perms_nvp = NULL;
-	nvlist_t *sets_nvp = NULL;
-	char errbuf[1024];
-	char *who_tok, *perm;
-	int error;
-
-	*nvp = NULL;
-
-	if (perms) {
-		if ((error = nvlist_alloc(&perms_nvp,
-		    NV_UNIQUE_NAME, 0)) != 0) {
-			return (1);
-		}
-		if ((error = nvlist_alloc(&sets_nvp,
-		    NV_UNIQUE_NAME, 0)) != 0) {
-			nvlist_free(perms_nvp);
-			return (1);
-		}
-	}
-
-	if ((error = nvlist_alloc(&who_nvp, NV_UNIQUE_NAME, 0)) != 0) {
-		if (perms_nvp)
-			nvlist_free(perms_nvp);
-		if (sets_nvp)
-			nvlist_free(sets_nvp);
-		return (1);
-	}
-
-	if (who_type == ZFS_DELEG_NAMED_SET) {
-		namecheck_err_t why;
-		char what;
-
-		if ((error = permset_namecheck(whostr, &why, &what)) != 0) {
-			nvlist_free(who_nvp);
-			if (perms_nvp)
-				nvlist_free(perms_nvp);
-			if (sets_nvp)
-				nvlist_free(sets_nvp);
-
-			switch (why) {
-			case NAME_ERR_NO_AT:
-				zfs_error_aux(zhp->zfs_hdl,
-				    dgettext(TEXT_DOMAIN,
-				    "set definition must begin with an '@' "
-				    "character"));
-			}
-			return (zfs_error(zhp->zfs_hdl,
-			    EZFS_BADPERMSET, whostr));
-		}
-	}
-
-	/*
-	 * Build up nvlist(s) of permissions.  Two nvlists are maintained.
-	 * The first nvlist perms_nvp will have normal permissions and the
-	 * other sets_nvp will have only permssion set names in it.
-	 */
-	for (perm = strtok(perms, ","); perm; perm = strtok(NULL, ",")) {
-		const char *perm_canonical = zfs_deleg_canonicalize_perm(perm);
-
-		if (perm_canonical) {
-			verify(nvlist_add_boolean(perms_nvp,
-			    perm_canonical) == 0);
-		} else if (perm[0] == '@') {
-			verify(nvlist_add_boolean(sets_nvp, perm) == 0);
-		} else {
-			nvlist_free(who_nvp);
-			nvlist_free(perms_nvp);
-			nvlist_free(sets_nvp);
-			return (zfs_error(zhp->zfs_hdl, EZFS_BADPERM, perm));
-		}
-	}
-
-	if (whostr && who_type != ZFS_DELEG_CREATE) {
-		who_tok = strtok(whostr, ",");
-		if (who_tok == NULL) {
-			nvlist_free(who_nvp);
-			if (perms_nvp)
-				nvlist_free(perms_nvp);
-			if (sets_nvp)
-				nvlist_free(sets_nvp);
-			(void) snprintf(errbuf, sizeof (errbuf),
-			    dgettext(TEXT_DOMAIN, "Who string is NULL"),
-			    whostr);
-			return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
-		}
-	}
-
-	/*
-	 * Now create the nvlist(s)
-	 */
-	do {
-		uint64_t who_id;
-
-		error = zfs_get_perm_who(who_tok, &who_type,
-		    &who_id);
-		if (error) {
-			nvlist_free(who_nvp);
-			if (perms_nvp)
-				nvlist_free(perms_nvp);
-			if (sets_nvp)
-				nvlist_free(sets_nvp);
-			(void) snprintf(errbuf, sizeof (errbuf),
-			    dgettext(TEXT_DOMAIN,
-			    "Unable to determine uid/gid for "
-			    "%s "), who_tok);
-			return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
-		}
-
-		/*
-		 * add entries for both local and descendent when required
-		 */
-		zfs_perms_add_who_nvlist(who_nvp, who_id, who_tok,
-		    perms_nvp, sets_nvp, who_type, inherit);
-
-	} while (who_tok = strtok(NULL, ","));
-	*nvp = who_nvp;
-	return (0);
-}
-
-static int
-zfs_perm_set_common(zfs_handle_t *zhp, nvlist_t *nvp, boolean_t unset)
-{
-	zfs_cmd_t zc = { 0 };
-	int error;
-	char errbuf[1024];
-
-	(void) snprintf(errbuf, sizeof (errbuf),
-	    dgettext(TEXT_DOMAIN, "Cannot update 'allows' for '%s'"),
-	    zhp->zfs_name);
-
-	if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, nvp))
-		return (-1);
-
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	zc.zc_perm_action = unset;
-
-	error = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SET_FSACL, &zc);
-	if (error && errno == ENOTSUP) {
-		(void) snprintf(errbuf, sizeof (errbuf),
-		    gettext("Pool must be upgraded to use 'allow/unallow'"));
-		zcmd_free_nvlists(&zc);
-		return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION, errbuf));
-	} else if (error) {
-		return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf));
-	}
-	zcmd_free_nvlists(&zc);
-
-	return (error);
-}
-
-int
-zfs_perm_set(zfs_handle_t *zhp, nvlist_t *nvp)
-{
-	return (zfs_perm_set_common(zhp, nvp, B_FALSE));
-}
-
-int
-zfs_perm_remove(zfs_handle_t *zhp, nvlist_t *perms)
-{
-	return (zfs_perm_set_common(zhp, perms, B_TRUE));
-}
-
-static int
-perm_compare(const void *arg1, const void *arg2)
-{
-	const zfs_perm_node_t *node1 = arg1;
-	const zfs_perm_node_t *node2 = arg2;
-	int ret;
-
-	ret = strcmp(node1->z_pname, node2->z_pname);
-
-	if (ret > 0)
-		return (1);
-	if (ret < 0)
-		return (-1);
-	else
-		return (0);
-}
-
-static void
-zfs_destroy_perm_tree(avl_tree_t *tree)
-{
-	zfs_perm_node_t *permnode;
-	void *cookie = NULL;
-
-	while ((permnode = avl_destroy_nodes(tree,  &cookie)) != NULL)
-		free(permnode);
-	avl_destroy(tree);
-}
-
-static void
-zfs_destroy_tree(avl_tree_t *tree)
-{
-	zfs_allow_node_t *allownode;
-	void *cookie = NULL;
-
-	while ((allownode = avl_destroy_nodes(tree, &cookie)) != NULL) {
-		zfs_destroy_perm_tree(&allownode->z_localdescend);
-		zfs_destroy_perm_tree(&allownode->z_local);
-		zfs_destroy_perm_tree(&allownode->z_descend);
-		free(allownode);
-	}
-	avl_destroy(tree);
-}
-
-void
-zfs_free_allows(zfs_allow_t *allow)
-{
-	zfs_allow_t *allownext;
-	zfs_allow_t *freeallow;
-
-	allownext = allow;
-	while (allownext) {
-		zfs_destroy_tree(&allownext->z_sets);
-		zfs_destroy_tree(&allownext->z_crperms);
-		zfs_destroy_tree(&allownext->z_user);
-		zfs_destroy_tree(&allownext->z_group);
-		zfs_destroy_tree(&allownext->z_everyone);
-		freeallow = allownext;
-		allownext = allownext->z_next;
-		free(freeallow);
-	}
-}
-
-static zfs_allow_t *
-zfs_alloc_perm_tree(zfs_handle_t *zhp, zfs_allow_t *prev, char *setpoint)
-{
-	zfs_allow_t *ptree;
-
-	if ((ptree = zfs_alloc(zhp->zfs_hdl,
-	    sizeof (zfs_allow_t))) == NULL) {
-		return (NULL);
-	}
-
-	(void) strlcpy(ptree->z_setpoint, setpoint, sizeof (ptree->z_setpoint));
-	avl_create(&ptree->z_sets,
-	    perm_compare, sizeof (zfs_allow_node_t),
-	    offsetof(zfs_allow_node_t, z_node));
-	avl_create(&ptree->z_crperms,
-	    perm_compare, sizeof (zfs_allow_node_t),
-	    offsetof(zfs_allow_node_t, z_node));
-	avl_create(&ptree->z_user,
-	    perm_compare, sizeof (zfs_allow_node_t),
-	    offsetof(zfs_allow_node_t, z_node));
-	avl_create(&ptree->z_group,
-	    perm_compare, sizeof (zfs_allow_node_t),
-	    offsetof(zfs_allow_node_t, z_node));
-	avl_create(&ptree->z_everyone,
-	    perm_compare, sizeof (zfs_allow_node_t),
-	    offsetof(zfs_allow_node_t, z_node));
-
-	if (prev)
-		prev->z_next = ptree;
-	ptree->z_next = NULL;
-	return (ptree);
-}
-
-/*
- * Add permissions to the appropriate AVL permission tree.
- * The appropriate tree may not be the requested tree.
- * For example if ld indicates a local permission, but
- * same permission also exists as a descendent permission
- * then the permission will be removed from the descendent
- * tree and add the the local+descendent tree.
- */
-static int
-zfs_coalesce_perm(zfs_handle_t *zhp, zfs_allow_node_t *allownode,
-    char *perm, char ld)
-{
-	zfs_perm_node_t pnode, *permnode, *permnode2;
-	zfs_perm_node_t *newnode;
-	avl_index_t where, where2;
-	avl_tree_t *tree, *altree;
-
-	(void) strlcpy(pnode.z_pname, perm, sizeof (pnode.z_pname));
-
-	if (ld == ZFS_DELEG_NA) {
-		tree =  &allownode->z_localdescend;
-		altree = &allownode->z_descend;
-	} else if (ld == ZFS_DELEG_LOCAL) {
-		tree = &allownode->z_local;
-		altree = &allownode->z_descend;
-	} else {
-		tree = &allownode->z_descend;
-		altree = &allownode->z_local;
-	}
-	permnode = avl_find(tree, &pnode, &where);
-	permnode2 = avl_find(altree, &pnode, &where2);
-
-	if (permnode2) {
-		avl_remove(altree, permnode2);
-		free(permnode2);
-		if (permnode == NULL) {
-			tree =  &allownode->z_localdescend;
-		}
-	}
-
-	/*
-	 * Now insert new permission in either requested location
-	 * local/descendent or into ld when perm will exist in both.
-	 */
-	if (permnode == NULL) {
-		if ((newnode = zfs_alloc(zhp->zfs_hdl,
-		    sizeof (zfs_perm_node_t))) == NULL) {
-			return (-1);
-		}
-		*newnode = pnode;
-		avl_add(tree, newnode);
-	}
-	return (0);
-}
-
-/*
- * Uggh, this is going to be a bit complicated.
- * we have an nvlist coming out of the kernel that
- * will indicate where the permission is set and then
- * it will contain allow of the various "who's", and what
- * their permissions are.  To further complicate this
- * we will then have to coalesce the local,descendent
- * and local+descendent permissions where appropriate.
- * The kernel only knows about a permission as being local
- * or descendent, but not both.
- *
- * In order to make this easier for zfs_main to deal with
- * a series of AVL trees will be used to maintain
- * all of this, primarily for sorting purposes as well
- * as the ability to quickly locate a specific entry.
- *
- * What we end up with are tree's for sets, create perms,
- * user, groups and everyone.  With each of those trees
- * we have subtrees for local, descendent and local+descendent
- * permissions.
- */
-int
-zfs_perm_get(zfs_handle_t *zhp, zfs_allow_t **zfs_perms)
-{
-	zfs_cmd_t zc = { 0 };
-	int error;
-	nvlist_t *nvlist;
-	nvlist_t *permnv, *sourcenv;
-	nvpair_t *who_pair, *source_pair;
-	nvpair_t *perm_pair;
-	char errbuf[1024];
-	zfs_allow_t *zallowp, *newallowp;
-	char  ld;
-	char *nvpname;
-	uid_t	uid;
-	gid_t	gid;
-	avl_tree_t *tree;
-	avl_index_t where;
-
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-
-	if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
-		return (-1);
-
-	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) {
-		if (errno == ENOMEM) {
-			if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, &zc) != 0) {
-				zcmd_free_nvlists(&zc);
-				return (-1);
-			}
-		} else if (errno == ENOTSUP) {
-			zcmd_free_nvlists(&zc);
-			(void) snprintf(errbuf, sizeof (errbuf),
-			    gettext("Pool must be upgraded to use 'allow'"));
-			return (zfs_error(zhp->zfs_hdl,
-			    EZFS_BADVERSION, errbuf));
-		} else {
-			zcmd_free_nvlists(&zc);
-			return (-1);
-		}
-	}
-
-	if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &nvlist) != 0) {
-		zcmd_free_nvlists(&zc);
-		return (-1);
-	}
-
-	zcmd_free_nvlists(&zc);
-
-	source_pair = nvlist_next_nvpair(nvlist, NULL);
-
-	if (source_pair == NULL) {
-		*zfs_perms = NULL;
-		return (0);
-	}
-
-	*zfs_perms = zfs_alloc_perm_tree(zhp, NULL, nvpair_name(source_pair));
-	if (*zfs_perms == NULL) {
-		return (0);
-	}
-
-	zallowp = *zfs_perms;
-
-	for (;;) {
-		struct passwd *pwd;
-		struct group *grp;
-		zfs_allow_node_t *allownode;
-		zfs_allow_node_t  findallownode;
-		zfs_allow_node_t *newallownode;
-
-		(void) strlcpy(zallowp->z_setpoint,
-		    nvpair_name(source_pair),
-		    sizeof (zallowp->z_setpoint));
-
-		if ((error = nvpair_value_nvlist(source_pair, &sourcenv)) != 0)
-			goto abort;
-
-		/*
-		 * Make sure nvlist is composed correctly
-		 */
-		if (zfs_deleg_verify_nvlist(sourcenv)) {
-			goto abort;
-		}
-
-		who_pair = nvlist_next_nvpair(sourcenv, NULL);
-		if (who_pair == NULL) {
-			goto abort;
-		}
-
-		do {
-			error = nvpair_value_nvlist(who_pair, &permnv);
-			if (error) {
-				goto abort;
-			}
-
-			/*
-			 * First build up the key to use
-			 * for looking up in the various
-			 * who trees.
-			 */
-			ld = nvpair_name(who_pair)[1];
-			nvpname = nvpair_name(who_pair);
-			switch (nvpair_name(who_pair)[0]) {
-			case ZFS_DELEG_USER:
-			case ZFS_DELEG_USER_SETS:
-				tree = &zallowp->z_user;
-				uid = atol(&nvpname[3]);
-				pwd = getpwuid(uid);
-				(void) snprintf(findallownode.z_key,
-				    sizeof (findallownode.z_key), "user %s",
-				    (pwd) ? pwd->pw_name :
-				    &nvpair_name(who_pair)[3]);
-				break;
-			case ZFS_DELEG_GROUP:
-			case ZFS_DELEG_GROUP_SETS:
-				tree = &zallowp->z_group;
-				gid = atol(&nvpname[3]);
-				grp = getgrgid(gid);
-				(void) snprintf(findallownode.z_key,
-				    sizeof (findallownode.z_key), "group %s",
-				    (grp) ? grp->gr_name :
-				    &nvpair_name(who_pair)[3]);
-				break;
-			case ZFS_DELEG_CREATE:
-			case ZFS_DELEG_CREATE_SETS:
-				tree = &zallowp->z_crperms;
-				(void) strlcpy(findallownode.z_key, "",
-				    sizeof (findallownode.z_key));
-				break;
-			case ZFS_DELEG_EVERYONE:
-			case ZFS_DELEG_EVERYONE_SETS:
-				(void) snprintf(findallownode.z_key,
-				    sizeof (findallownode.z_key), "everyone");
-				tree = &zallowp->z_everyone;
-				break;
-			case ZFS_DELEG_NAMED_SET:
-			case ZFS_DELEG_NAMED_SET_SETS:
-				(void) snprintf(findallownode.z_key,
-				    sizeof (findallownode.z_key), "%s",
-				    &nvpair_name(who_pair)[3]);
-				tree = &zallowp->z_sets;
-				break;
-			}
-
-			/*
-			 * Place who in tree
-			 */
-			allownode = avl_find(tree, &findallownode, &where);
-			if (allownode == NULL) {
-				if ((newallownode = zfs_alloc(zhp->zfs_hdl,
-				    sizeof (zfs_allow_node_t))) == NULL) {
-					goto abort;
-				}
-				avl_create(&newallownode->z_localdescend,
-				    perm_compare,
-				    sizeof (zfs_perm_node_t),
-				    offsetof(zfs_perm_node_t, z_node));
-				avl_create(&newallownode->z_local,
-				    perm_compare,
-				    sizeof (zfs_perm_node_t),
-				    offsetof(zfs_perm_node_t, z_node));
-				avl_create(&newallownode->z_descend,
-				    perm_compare,
-				    sizeof (zfs_perm_node_t),
-				    offsetof(zfs_perm_node_t, z_node));
-				(void) strlcpy(newallownode->z_key,
-				    findallownode.z_key,
-				    sizeof (findallownode.z_key));
-				avl_insert(tree, newallownode, where);
-				allownode = newallownode;
-			}
-
-			/*
-			 * Now iterate over the permissions and
-			 * place them in the appropriate local,
-			 * descendent or local+descendent tree.
-			 *
-			 * The permissions are added to the tree
-			 * via zfs_coalesce_perm().
-			 */
-			perm_pair = nvlist_next_nvpair(permnv, NULL);
-			if (perm_pair == NULL)
-				goto abort;
-			do {
-				if (zfs_coalesce_perm(zhp, allownode,
-				    nvpair_name(perm_pair), ld) != 0)
-					goto abort;
-			} while (perm_pair = nvlist_next_nvpair(permnv,
-			    perm_pair));
-		} while (who_pair = nvlist_next_nvpair(sourcenv, who_pair));
-
-		source_pair = nvlist_next_nvpair(nvlist, source_pair);
-		if (source_pair == NULL)
-			break;
-
-		/*
-		 * allocate another node from the link list of
-		 * zfs_allow_t structures
-		 */
-		newallowp = zfs_alloc_perm_tree(zhp, zallowp,
-		    nvpair_name(source_pair));
-		if (newallowp == NULL) {
-			goto abort;
-		}
-		zallowp = newallowp;
-	}
-	nvlist_free(nvlist);
-	return (0);
-abort:
-	zfs_free_allows(*zfs_perms);
-	nvlist_free(nvlist);
-	return (-1);
-}
-
-static char *
-zfs_deleg_perm_note(zfs_deleg_note_t note)
-{
-	/*
-	 * Don't put newlines on end of lines
-	 */
-	switch (note) {
-	case ZFS_DELEG_NOTE_CREATE:
-		return (dgettext(TEXT_DOMAIN,
-		    "Must also have the 'mount' ability"));
-	case ZFS_DELEG_NOTE_DESTROY:
-		return (dgettext(TEXT_DOMAIN,
-		    "Must also have the 'mount' ability"));
-	case ZFS_DELEG_NOTE_SNAPSHOT:
-		return (dgettext(TEXT_DOMAIN,
-		    "Must also have the 'mount' ability"));
-	case ZFS_DELEG_NOTE_ROLLBACK:
-		return (dgettext(TEXT_DOMAIN,
-		    "Must also have the 'mount' ability"));
-	case ZFS_DELEG_NOTE_CLONE:
-		return (dgettext(TEXT_DOMAIN, "Must also have the 'create' "
-		    "ability and 'mount'\n"
-		    "\t\t\t\tability in the origin file system"));
-	case ZFS_DELEG_NOTE_PROMOTE:
-		return (dgettext(TEXT_DOMAIN, "Must also have the 'mount'\n"
-		    "\t\t\t\tand 'promote' ability in the origin file system"));
-	case ZFS_DELEG_NOTE_RENAME:
-		return (dgettext(TEXT_DOMAIN, "Must also have the 'mount' "
-		    "and 'create' \n\t\t\t\tability in the new parent"));
-	case ZFS_DELEG_NOTE_RECEIVE:
-		return (dgettext(TEXT_DOMAIN, "Must also have the 'mount'"
-		    " and 'create' ability"));
-	case ZFS_DELEG_NOTE_USERPROP:
-		return (dgettext(TEXT_DOMAIN,
-		    "Allows changing any user property"));
-	case ZFS_DELEG_NOTE_ALLOW:
-		return (dgettext(TEXT_DOMAIN,
-		    "Must also have the permission that is being\n"
-		    "\t\t\t\tallowed"));
-	case ZFS_DELEG_NOTE_MOUNT:
-		return (dgettext(TEXT_DOMAIN,
-		    "Allows mount/umount of ZFS datasets"));
-	case ZFS_DELEG_NOTE_SHARE:
-		return (dgettext(TEXT_DOMAIN,
-		    "Allows sharing file systems over NFS or SMB\n"
-		    "\t\t\t\tprotocols"));
-	case ZFS_DELEG_NOTE_NONE:
-	default:
-		return (dgettext(TEXT_DOMAIN, ""));
-	}
-}
-
-typedef enum {
-	ZFS_DELEG_SUBCOMMAND,
-	ZFS_DELEG_PROP,
-	ZFS_DELEG_OTHER
-} zfs_deleg_perm_type_t;
-
-/*
- * is the permission a subcommand or other?
- */
-zfs_deleg_perm_type_t
-zfs_deleg_perm_type(const char *perm)
-{
-	if (strcmp(perm, "userprop") == 0)
-		return (ZFS_DELEG_OTHER);
-	else
-		return (ZFS_DELEG_SUBCOMMAND);
-}
-
-static char *
-zfs_deleg_perm_type_str(zfs_deleg_perm_type_t type)
-{
-	switch (type) {
-	case ZFS_DELEG_SUBCOMMAND:
-		return (dgettext(TEXT_DOMAIN, "subcommand"));
-	case ZFS_DELEG_PROP:
-		return (dgettext(TEXT_DOMAIN, "property"));
-	case ZFS_DELEG_OTHER:
-		return (dgettext(TEXT_DOMAIN, "other"));
-	}
-	return ("");
-}
-
-/*ARGSUSED*/
-static int
-zfs_deleg_prop_cb(int prop, void *cb)
-{
-	if (zfs_prop_delegatable(prop))
-		(void) fprintf(stderr, "%-15s %-15s\n", zfs_prop_to_name(prop),
-		    zfs_deleg_perm_type_str(ZFS_DELEG_PROP));
-
-	return (ZPROP_CONT);
-}
-
-void
-zfs_deleg_permissions(void)
-{
-	int i;
-
-	(void) fprintf(stderr, "\n%-15s %-15s\t%s\n\n", "NAME",
-	    "TYPE", "NOTES");
-
-	/*
-	 * First print out the subcommands
-	 */
-	for (i = 0; zfs_deleg_perm_tab[i].z_perm != NULL; i++) {
-		(void) fprintf(stderr, "%-15s %-15s\t%s\n",
-		    zfs_deleg_perm_tab[i].z_perm,
-		    zfs_deleg_perm_type_str(
-		    zfs_deleg_perm_type(zfs_deleg_perm_tab[i].z_perm)),
-		    zfs_deleg_perm_note(zfs_deleg_perm_tab[i].z_note));
-	}
-
-	(void) zprop_iter(zfs_deleg_prop_cb, NULL, B_FALSE, B_TRUE,
-	    ZFS_TYPE_DATASET|ZFS_TYPE_VOLUME);
-}
-
 /*
  * Given a property name and value, set the property for the given dataset.
  */
@@ -2422,7 +1701,7 @@
 		case PROP_TYPE_INDEX:
 			*val = getprop_uint64(zhp, prop, source);
 			/*
-			 * If we tried to use a defalut value for a
+			 * If we tried to use a default value for a
 			 * readonly property, it means that it was not
 			 * present; return an error.
 			 */
@@ -2716,7 +1995,7 @@
 {
 	char buf[64];
 
-	zfs_nicenum(val, buf, sizeof (buf));
+	(void) snprintf(buf, sizeof (buf), "%llu", (longlong_t)val);
 	return (zfs_prop_set(zhp, zfs_prop_to_name(prop), buf));
 }
 
@@ -2749,6 +2028,179 @@
 	return (0);
 }
 
+static int
+idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser,
+    char **domainp, idmap_rid_t *ridp)
+{
+	idmap_handle_t *idmap_hdl = NULL;
+	idmap_get_handle_t *get_hdl = NULL;
+	idmap_stat status;
+	int err = EINVAL;
+
+	if (idmap_init(&idmap_hdl) != IDMAP_SUCCESS)
+		goto out;
+	if (idmap_get_create(idmap_hdl, &get_hdl) != IDMAP_SUCCESS)
+		goto out;
+
+	if (isuser) {
+		err = idmap_get_sidbyuid(get_hdl, id,
+		    IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status);
+	} else {
+		err = idmap_get_sidbygid(get_hdl, id,
+		    IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status);
+	}
+	if (err == IDMAP_SUCCESS &&
+	    idmap_get_mappings(get_hdl) == IDMAP_SUCCESS &&
+	    status == IDMAP_SUCCESS)
+		err = 0;
+	else
+		err = EINVAL;
+out:
+	if (get_hdl)
+		idmap_get_destroy(get_hdl);
+	if (idmap_hdl)
+		(void) idmap_fini(idmap_hdl);
+	return (err);
+}
+
+/*
+ * convert the propname into parameters needed by kernel
+ * Eg: userquota@ahrens -> ZFS_PROP_USERQUOTA, "", 126829
+ * Eg: userused@matt@domain -> ZFS_PROP_USERUSED, "S-1-123-456", 789
+ */
+static int
+userquota_propname_decode(const char *propname, boolean_t zoned,
+    zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp)
+{
+	zfs_userquota_prop_t type;
+	char *cp, *end;
+	boolean_t isuser;
+
+	domain[0] = '\0';
+
+	/* Figure out the property type ({user|group}{quota|space}) */
+	for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) {
+		if (strncmp(propname, zfs_userquota_prop_prefixes[type],
+		    strlen(zfs_userquota_prop_prefixes[type])) == 0)
+			break;
+	}
+	if (type == ZFS_NUM_USERQUOTA_PROPS)
+		return (EINVAL);
+	*typep = type;
+
+	isuser = (type == ZFS_PROP_USERQUOTA ||
+	    type == ZFS_PROP_USERUSED);
+
+	cp = strchr(propname, '@') + 1;
+
+	if (strchr(cp, '@')) {
+		/*
+		 * It's a SID name (eg "user@domain") that needs to be
+		 * turned into S-1-domainID-RID.  There should be a
+		 * better way to do this, but for now just translate it
+		 * to the (possibly ephemeral) uid and then back to the
+		 * SID.  This is like getsidname(noresolve=TRUE).
+		 */
+		uid_t id;
+		idmap_rid_t rid;
+		char *mapdomain;
+
+		if (zoned && getzoneid() == GLOBAL_ZONEID)
+			return (ENOENT);
+		if (sid_to_id(cp, isuser, &id) != 0)
+			return (ENOENT);
+		if (idmap_id_to_numeric_domain_rid(id, isuser,
+		    &mapdomain, &rid) != 0)
+			return (ENOENT);
+		(void) strlcpy(domain, mapdomain, domainlen);
+		*ridp = rid;
+	} else if (strncmp(cp, "S-1-", 4) == 0) {
+		/* It's a numeric SID (eg "S-1-234-567-89") */
+		(void) strcpy(domain, cp);
+		cp = strrchr(domain, '-');
+		*cp = '\0';
+		cp++;
+
+		errno = 0;
+		*ridp = strtoull(cp, &end, 10);
+		if (errno == 0 || *end != '\0')
+			return (EINVAL);
+	} else if (!isdigit(*cp)) {
+		/*
+		 * It's a user/group name (eg "user") that needs to be
+		 * turned into a uid/gid
+		 */
+		if (zoned && getzoneid() == GLOBAL_ZONEID)
+			return (ENOENT);
+		if (isuser) {
+			struct passwd *pw;
+			pw = getpwnam(cp);
+			if (pw == NULL)
+				return (ENOENT);
+			*ridp = pw->pw_uid;
+		} else {
+			struct group *gr;
+			gr = getgrnam(cp);
+			if (gr == NULL)
+				return (ENOENT);
+			*ridp = gr->gr_gid;
+		}
+	} else {
+		/* It's a user/group ID (eg "12345"). */
+		uid_t id = strtoul(cp, &end, 10);
+		idmap_rid_t rid;
+		char *mapdomain;
+
+		if (*end != '\0')
+			return (EINVAL);
+		if (id > MAXUID) {
+			/* It's an ephemeral ID. */
+			if (idmap_id_to_numeric_domain_rid(id, isuser,
+			    &mapdomain, &rid) != 0)
+				return (ENOENT);
+			(void) strcpy(domain, mapdomain);
+			*ridp = rid;
+		} else {
+			*ridp = id;
+		}
+	}
+
+	return (0);
+}
+
+int
+zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
+    char *propbuf, int proplen, boolean_t literal)
+{
+	int err;
+	zfs_cmd_t zc = { 0 };
+	zfs_userquota_prop_t type;
+
+	(void) strncpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	err = userquota_propname_decode(propname,
+	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED),
+	    &type, zc.zc_value, sizeof (zc.zc_value), &zc.zc_guid);
+	zc.zc_objset_type = type;
+	if (err)
+		return (err);
+
+	err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_USERSPACE_ONE, &zc);
+	if (err)
+		return (err);
+
+	if (literal) {
+		(void) snprintf(propbuf, proplen, "%llu",
+		    (u_longlong_t)zc.zc_cookie);
+	} else if (zc.zc_cookie == 0 &&
+	    (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA)) {
+		(void) strlcpy(propbuf, "none", proplen);
+	} else {
+		zfs_nicenum(zc.zc_cookie, propbuf, proplen);
+	}
+	return (0);
+}
+
 /*
  * Returns the name of the given zfs handle.
  */
@@ -2826,12 +2278,6 @@
 	while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT,
 	    &zc)) == 0) {
 		/*
-		 * Ignore private dataset names.
-		 */
-		if (dataset_name_hidden(zc.zc_name))
-			continue;
-
-		/*
 		 * Silently ignore errors, as the only plausible explanation is
 		 * that the pool has since been removed.
 		 */
@@ -4488,7 +3934,12 @@
 		zfs_prop_t zfs_prop = zfs_name_to_prop(nvpair_name(curr));
 		nvpair_t *next = nvlist_next_nvpair(zhp->zfs_props, curr);
 
-		if (props[zfs_prop] == B_FALSE)
+		/*
+		 * We leave user:props in the nvlist, so there will be
+		 * some ZPROP_INVAL.  To be extra safe, don't prune
+		 * those.
+		 */
+		if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE)
 			(void) nvlist_remove(zhp->zfs_props,
 			    nvpair_name(curr), nvpair_type(curr));
 		curr = next;
@@ -4576,3 +4027,36 @@
 	return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_RENAME,
 	    oldname, newname));
 }
+
+int
+zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
+    zfs_userspace_cb_t func, void *arg)
+{
+	zfs_cmd_t zc = { 0 };
+	int error;
+	zfs_useracct_t buf[100];
+
+	(void) strncpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+
+	zc.zc_objset_type = type;
+	zc.zc_nvlist_dst = (uintptr_t)buf;
+
+	/* CONSTCOND */
+	while (1) {
+		zfs_useracct_t *zua = buf;
+
+		zc.zc_nvlist_dst_size = sizeof (buf);
+		error = ioctl(zhp->zfs_hdl->libzfs_fd,
+		    ZFS_IOC_USERSPACE_MANY, &zc);
+		if (error || zc.zc_nvlist_dst_size == 0)
+			break;
+
+		while (zc.zc_nvlist_dst_size > 0) {
+			func(arg, zua->zu_domain, zua->zu_rid, zua->zu_space);
+			zua++;
+			zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t);
+		}
+	}
+
+	return (error);
+}
--- a/usr/src/lib/libzfs/common/libzfs_graph.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_graph.c	Sat Apr 18 13:41:47 2009 -0700
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * Iterate over all children of the current object.  This includes the normal
  * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
@@ -399,13 +397,6 @@
 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
-
-		/*
-		 * Ignore private dataset names.
-		 */
-		if (dataset_name_hidden(zc.zc_name))
-			continue;
-
 		/*
 		 * Get statistics for this dataset, to determine the type of the
 		 * dataset and clone statistics.  If this fails, the dataset has
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c	Sat Apr 18 13:41:47 2009 -0700
@@ -237,6 +237,8 @@
 		zfs_prop_t prop = zfs_name_to_prop(propname);
 		nvlist_t *propnv;
 
+		assert(zfs_prop_user(propname) || prop != ZPROP_INVAL);
+
 		if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
 			continue;
 
@@ -594,12 +596,18 @@
 			    zhp->zfs_name, sdd->fromsnap);
 			sdd->err = B_TRUE;
 		} else if (!sdd->seento) {
-			(void) fprintf(stderr,
-			    "WARNING: could not send %s@%s:\n"
-			    "incremental source (%s@%s) "
-			    "is not earlier than it\n",
-			    zhp->zfs_name, sdd->tosnap,
-			    zhp->zfs_name, sdd->fromsnap);
+			if (sdd->fromsnap) {
+				(void) fprintf(stderr,
+				    "WARNING: could not send %s@%s:\n"
+				    "incremental source (%s@%s) "
+				    "is not earlier than it\n",
+				    zhp->zfs_name, sdd->tosnap,
+				    zhp->zfs_name, sdd->fromsnap);
+			} else {
+				(void) fprintf(stderr, "WARNING: "
+				    "could not send %s@%s: does not exist\n",
+				    zhp->zfs_name, sdd->tosnap);
+			}
 			sdd->err = B_TRUE;
 		}
 	} else {
--- a/usr/src/lib/libzfs/common/libzfs_util.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_util.c	Sat Apr 18 13:41:47 2009 -0700
@@ -1221,7 +1221,7 @@
 	 * dataset property,
 	 */
 	if (prop == ZPROP_INVAL && (type == ZFS_TYPE_POOL ||
-	    !zfs_prop_user(propname))) {
+	    (!zfs_prop_user(propname) && !zfs_prop_userquota(propname)))) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "invalid property '%s'"), propname);
 		return (zfs_error(hdl, EZFS_BADPROP,
--- a/usr/src/lib/libzfs/common/mapfile-vers	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/lib/libzfs/common/mapfile-vers	Sat Apr 18 13:41:47 2009 -0700
@@ -47,18 +47,15 @@
 	libzfs_mnttab_cache;
 	libzfs_print_on_error;
 	zfs_allocatable_devs;
-	zfs_build_perms;
 	zfs_clone;
 	zfs_close;
 	zfs_create;
 	zfs_create_ancestors;
 	zfs_dataset_exists;
-	zfs_deleg_permissions;
 	zfs_deleg_share_nfs;
 	zfs_destroy;
 	zfs_destroy_snaps;
 	zfs_expand_proplist;
-	zfs_free_allows;
 	zfs_get_handle;
 	zfs_get_name;
 	zfs_get_pool_handle;
@@ -82,9 +79,6 @@
 	zfs_nicestrtonum;
 	zfs_open;
 	zfs_path_to_zhandle;
-	zfs_perm_get;
-	zfs_perm_remove;
-	zfs_perm_set;
 	zfs_promote;
 	zfs_prop_align_right;
 	zfs_prop_column_name;
@@ -93,14 +87,18 @@
 	zfs_prop_get;
 	zfs_prop_get_int;
 	zfs_prop_get_numeric;
+	zfs_prop_get_table;
+	zfs_prop_get_userquota;
 	zfs_prop_inherit;
 	zfs_prop_inheritable;
+	zfs_prop_init;
 	zfs_prop_is_string;
 	zfs_prop_readonly;
 	zfs_prop_set;
 	zfs_prop_string_to_index;
 	zfs_prop_to_name;
 	zfs_prop_user;
+	zfs_prop_userquota;
 	zfs_prop_valid_for_type;
 	zfs_prop_values;
 	zfs_prune_proplist;
@@ -131,6 +129,8 @@
 	zfs_unshareall_bypath;
 	zfs_unshareall_nfs;
 	zfs_unshareall_smb;
+	zfs_userspace;
+	zfs_userquota_prop_prefixes;
 	zpool_add;
 	zpool_clear;
 	zpool_close;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/Makefile	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+include	../Makefile.lib
+
+SUBDIRS=	$(MACH)
+XGETTEXT=	$(GNUXGETTEXT)
+XGETFLAGS=	$(GNUXGETFLAGS)
+
+all :=		TARGET= all
+install :=	TARGET= install
+clean :=	TARGET= clean
+clobber :=	TARGET= clobber
+lint :=		TARGET= lint
+
+MSGFILES =	`$(FIND) . -name '*.py' -o -name '*.c'`
+POFILE =	pyzfs.po
+
+.KEEP_STATE:
+
+all install clean clobber lint: $(SUBDIRS)
+
+$(POFILE):	pofile_MSGFILES
+
+_msg: $(MSGDOMAINPOFILE)
+
+$(SUBDIRS): FRC
+	@cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.msg.targ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/Makefile.com	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,65 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+LIBRARY =	ioctl.a
+VERS =
+OBJECTS =	ioctl.o
+
+PYSRCS=		__init__.py util.py dataset.py \
+	allow.py unallow.py \
+	userspace.py groupspace.py
+
+
+include ../../Makefile.lib
+
+LIBLINKS = 
+SRCDIR =	../common
+ROOTLIBDIR=	$(ROOT)/usr/lib/python2.4/vendor-packages/zfs
+PYOBJS=		$(PYSRCS:%.py=$(SRCDIR)/%.pyc)
+PYFILES=	$(PYSRCS) $(PYSRCS:%.py=%.pyc)
+ROOTPYZFSFILES= $(PYFILES:%=$(ROOTLIBDIR)/%)
+
+C99MODE=        -xc99=%all
+C99LMODE=       -Xc99=%all
+
+LIBS =		$(DYNLIB)
+LDLIBS +=	-lc -lnvpair -lsec -lidmap -lpython2.4 -lzfs
+CFLAGS +=	$(CCVERBOSE)
+CPPFLAGS +=	-I/usr/include/python2.4
+CPPFLAGS +=	-I../../../uts/common/fs/zfs
+CPPFLAGS +=	-I../../../common/zfs
+
+.KEEP_STATE:
+
+all: $(PYOBJS) $(LIBS)
+
+install: all $(ROOTPYZFSFILES)
+
+$(ROOTLIBDIR)/%: %
+	$(INS.pyfile)
+
+lint: lintcheck
+
+include ../../Makefile.targ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/__init__.py	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,28 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+"""
+package which provides an administrative interface to ZFS
+"""
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/allow.py	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,394 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+"""This module implements the "zfs allow" and "zfs unallow" subcommands.
+The only public interface is the zfs.allow.do_allow() function."""
+
+import zfs.util
+import zfs.dataset
+import optparse
+import sys
+import pwd
+import grp
+import errno
+
+_ = zfs.util._
+
+class FSPerms(object):
+	"""This class represents all the permissions that are set on a
+	particular filesystem (not including those inherited)."""
+
+	__slots__ = "create", "sets", "local", "descend", "ld"
+	__repr__ = zfs.util.default_repr
+
+	def __init__(self, raw):
+		"""Create a FSPerms based on the dict of raw permissions
+		from zfs.ioctl.get_fsacl()."""
+		# set of perms
+		self.create = set()
+
+		# below are { "Ntype name": set(perms) }
+		# where N is a number that we just use for sorting,
+		# type is "user", "group", "everyone", or "" (for sets)
+		# name is a user, group, or set name, or "" (for everyone)
+		self.sets = dict()
+		self.local = dict()
+		self.descend = dict()
+		self.ld = dict()
+
+		# see the comment in dsl_deleg.c for the definition of whokey
+		for whokey in raw.keys():
+			perms = raw[whokey].keys()
+			whotypechr = whokey[0].lower()
+			ws = whokey[3:]
+			if whotypechr == "c":
+				self.create.update(perms)
+			elif whotypechr == "s":
+				nwho = "1" + ws
+				self.sets.setdefault(nwho, set()).update(perms)
+			else:
+				if whotypechr == "u":
+					try:
+						name = pwd.getpwuid(int(ws)).pw_name
+					except KeyError:
+						name = ws
+					nwho = "1user " + name
+				elif whotypechr == "g":
+					try:
+						name = grp.getgrgid(int(ws)).gr_name
+					except KeyError:
+						name = ws
+					nwho = "2group " + name
+				elif whotypechr == "e":
+					nwho = "3everyone"
+				else:
+					raise ValueError(whotypechr)
+
+				if whokey[1] == "l":
+					d = self.local
+				elif whokey[1] == "d":
+					d = self.descend
+				else:
+					raise ValueError(whokey[1])
+
+				d.setdefault(nwho, set()).update(perms)
+
+		# Find perms that are in both local and descend, and
+		# move them to ld.
+		for nwho in self.local:
+			if nwho not in self.descend:
+				continue
+			# note: these are set operations
+			self.ld[nwho] = self.local[nwho] & self.descend[nwho]
+			self.local[nwho] -= self.ld[nwho]
+			self.descend[nwho] -= self.ld[nwho]
+
+	@staticmethod
+	def __ldstr(d, header):
+		s = ""
+		for (nwho, perms) in sorted(d.items()):
+			# local and descend may have entries where perms
+			# is an empty set, due to consolidating all
+			# permissions into ld
+			if perms:
+				s += "\t%s %s\n" % \
+				    (nwho[1:], ",".join(sorted(perms)))
+		if s:
+			s = header + s
+		return s
+
+	def __str__(self):
+		s = self.__ldstr(self.sets, _("Permission sets:\n"))
+
+		if self.create:
+			s += _("Create time permissions:\n")
+			s += "\t%s\n" % ",".join(sorted(self.create))
+
+		s += self.__ldstr(self.local, _("Local permissions:\n"))
+		s += self.__ldstr(self.descend, _("Descendent permissions:\n"))
+		s += self.__ldstr(self.ld, _("Local+Descendent permissions:\n"))
+		return s.rstrip()
+
+def args_to_perms(parser, options, who, perms):
+	"""Return a dict of raw perms {"whostr" -> {"perm" -> None}}
+	based on the command-line input."""
+
+	# perms is not set if we are doing a "zfs unallow <who> <fs>" to
+	# remove all of someone's permissions
+	if perms:
+		setperms = dict(((p, None) for p in perms if p[0] == "@"))
+		baseperms = dict(((canonicalized_perm(p), None)
+		    for p in perms if p[0] != "@"))
+	else:
+		setperms = None
+		baseperms = None
+
+	d = dict()
+	
+	def storeperm(typechr, inheritchr, arg):
+		assert typechr in "ugecs"
+		assert inheritchr in "ld-"
+
+		def mkwhokey(t):
+			return "%c%c$%s" % (t, inheritchr, arg)
+
+		if baseperms or not perms:
+			d[mkwhokey(typechr)] = baseperms
+		if setperms or not perms:
+			d[mkwhokey(typechr.upper())] = setperms
+
+	def decodeid(w, toidfunc, fmt):
+		try:
+			return int(w)
+		except ValueError:
+			try:
+				return toidfunc(w)[2]
+			except KeyError:
+				parser.error(fmt % w)
+
+	if options.set:
+		storeperm("s", "-", who)
+	elif options.create:
+		storeperm("c", "-", "")
+	else:
+		for w in who:
+			if options.user:
+				id = decodeid(w, pwd.getpwnam,
+				    _("invalid user %s"))
+				typechr = "u"
+			elif options.group:
+				id = decodeid(w, grp.getgrnam,
+				    _("invalid group %s"))
+				typechr = "g"
+			elif w == "everyone":
+				id = ""
+				typechr = "e"
+			else:
+				try:
+					id = pwd.getpwnam(w)[2]
+					typechr = "u"
+				except KeyError:
+					try:
+						id = grp.getgrnam(w)[2]
+						typechr = "g"
+					except KeyError:
+						parser.error(_("invalid user/group %s") % w)
+			if options.local:
+				storeperm(typechr, "l", id)
+			if options.descend:
+				storeperm(typechr, "d", id)
+	return d
+
+perms_subcmd = dict(
+    create=_("Must also have the 'mount' ability"),
+    destroy=_("Must also have the 'mount' ability"),
+    snapshot=_("Must also have the 'mount' ability"),
+    rollback=_("Must also have the 'mount' ability"),
+    clone=_("""Must also have the 'create' ability and 'mount'
+\t\t\t\tability in the origin file system"""),
+    promote=_("""Must also have the 'mount'
+\t\t\t\tand 'promote' ability in the origin file system"""),
+    rename=_("""Must also have the 'mount' and 'create'
+\t\t\t\tability in the new parent"""),
+    receive=_("Must also have the 'mount' and 'create' ability"),
+    allow=_("Must also have the permission that is being\n\t\t\t\tallowed"),
+    mount=_("Allows mount/umount of ZFS datasets"),
+    share=_("Allows sharing file systems over NFS or SMB\n\t\t\t\tprotocols"),
+    send="",
+)
+
+perms_other = dict(
+    userprop=_("Allows changing any user property"),
+    userquota=_("Allows accessing any userquota@... property"),
+    groupquota=_("Allows accessing any groupquota@... property"),
+    userused=_("Allows reading any userused@... property"),
+    groupused=_("Allows reading any groupused@... property"),
+)
+
+def hasset(ds, setname):
+	"""Return True if the given setname (string) is defined for this
+	ds (Dataset)."""
+	# It would be nice to cache the result of get_fsacl().
+	for raw in ds.get_fsacl().values():
+		for whokey in raw.keys():
+			if whokey[0].lower() == "s" and whokey[3:] == setname:
+				return True
+	return False
+
+def canonicalized_perm(permname):
+	"""Return the canonical name (string) for this permission (string).
+	Raises ZFSError if it is not a valid permission."""
+	if permname in perms_subcmd.keys() or permname in perms_other.keys():
+		return permname
+	try:
+		return zfs.dataset.getpropobj(permname).name
+	except KeyError:
+		raise zfs.util.ZFSError(errno.EINVAL, permname,
+		    _("invalid permission"))
+		
+def print_perms():
+	"""Print the set of supported permissions."""
+	print(_("\nThe following permissions are supported:\n"))
+	fmt = "%-16s %-14s\t%s"
+	print(fmt % (_("NAME"), _("TYPE"), _("NOTES")))
+
+	for (name, note) in sorted(perms_subcmd.iteritems()):
+		print(fmt % (name, _("subcommand"), note))
+
+	for (name, note) in sorted(perms_other.iteritems()):
+		print(fmt % (name, _("other"), note))
+
+	for (name, prop) in sorted(zfs.dataset.proptable.iteritems()):
+		if prop.visible and prop.delegatable():
+			print(fmt % (name, _("property"), ""))
+
+def do_allow():
+	"""Implementes the "zfs allow" and "zfs unallow" subcommands."""
+	un = (sys.argv[1] == "unallow")
+
+	def usage(msg=None):
+		parser.print_help()
+		print_perms()
+		if msg:
+			print
+			parser.exit("zfs: error: " + msg)
+		else:
+			parser.exit()
+
+	if un:
+		u = _("""unallow [-rldug] <"everyone"|user|group>[,...]
+	    [<perm|@setname>[,...]] <filesystem|volume>
+	unallow [-rld] -e [<perm|@setname>[,...]] <filesystem|volume>
+	unallow [-r] -c [<perm|@setname>[,...]] <filesystem|volume>
+	unallow [-r] -s @setname [<perm|@setname>[,...]] <filesystem|volume>""")
+		verb = _("remove")
+		sstr = _("undefine permission set")
+	else:
+		u = _("""allow <filesystem|volume>
+	allow [-ldug] <"everyone"|user|group>[,...] <perm|@setname>[,...]
+	    <filesystem|volume>
+	allow [-ld] -e <perm|@setname>[,...] <filesystem|volume>
+	allow -c <perm|@setname>[,...] <filesystem|volume>
+	allow -s @setname <perm|@setname>[,...] <filesystem|volume>""")
+		verb = _("set")
+		sstr = _("define permission set")
+
+	parser = optparse.OptionParser(usage=u, prog="zfs")
+
+	parser.add_option("-l", action="store_true", dest="local",
+	    help=_("%s permission locally") % verb)
+	parser.add_option("-d", action="store_true", dest="descend",
+	    help=_("%s permission for descendents") % verb)
+	parser.add_option("-u", action="store_true", dest="user",
+	    help=_("%s permission for user") % verb)
+	parser.add_option("-g", action="store_true", dest="group",
+	    help=_("%s permission for group") % verb)
+	parser.add_option("-e", action="store_true", dest="everyone",
+	    help=_("%s permission for everyone") % verb)
+	parser.add_option("-c", action="store_true", dest="create",
+	    help=_("%s create time permissions") % verb)
+	parser.add_option("-s", action="store_true", dest="set", help=sstr)
+	if un:
+		parser.add_option("-r", action="store_true", dest="recursive",
+		    help=_("remove permissions recursively"))
+
+	if len(sys.argv) == 3 and not un:
+		# just print the permissions on this fs
+
+		if sys.argv[2] == "-h":
+			# hack to make "zfs allow -h" work
+			usage()
+		ds = zfs.dataset.Dataset(sys.argv[2])
+
+		p = dict()
+		for (fs, raw) in ds.get_fsacl().items():
+			p[fs] = FSPerms(raw)
+
+		for fs in sorted(p.keys(), reverse=True):
+			s = _("---- Permissions on %s ") % fs
+			print(s + "-" * (70-len(s)))
+			print(p[fs])
+		return
+	
+
+	(options, args) = parser.parse_args(sys.argv[2:])
+
+	if sum((bool(options.everyone), bool(options.user),
+	    bool(options.group))) > 1:
+		parser.error(_("-u, -g, and -e are mutually exclusive"))
+
+	def mungeargs(expected_len):
+		if un and len(args) == expected_len-1:
+			return (None, args[expected_len-2])
+		elif len(args) == expected_len:
+			return (args[expected_len-2].split(","),
+			    args[expected_len-1])
+		else:
+			usage(_("wrong number of parameters"))
+
+	if options.set:
+		if options.local or options.descend or options.user or \
+		    options.group or options.everyone or options.create:
+			parser.error(_("invalid option combined with -s"))
+		if args[0][0] != "@":
+			parser.error(_("invalid set name: missing '@' prefix"))
+
+		(perms, fsname) = mungeargs(3)
+		who = args[0]
+	elif options.create:
+		if options.local or options.descend or options.user or \
+		    options.group or options.everyone or options.set:
+			parser.error(_("invalid option combined with -c"))
+
+		(perms, fsname) = mungeargs(2)
+		who = None
+	elif options.everyone:
+		if options.user or options.group or \
+		    options.create or options.set:
+			parser.error(_("invalid option combined with -e"))
+
+		(perms, fsname) = mungeargs(2)
+		who = ["everyone"]
+	else:
+		(perms, fsname) = mungeargs(3)
+		who = args[0].split(",")
+
+	if not options.local and not options.descend:
+		options.local = True
+		options.descend = True
+
+	d = args_to_perms(parser, options, who, perms)
+
+	ds = zfs.dataset.Dataset(fsname, snaps=False)
+
+	if not un and perms:
+		for p in perms:
+			if p[0] == "@" and not hasset(ds, p):
+				parser.error(_("set %s is not defined") % p)
+
+	ds.set_fsacl(un, d)
+	if un and options.recursive:
+		for child in ds.descendents():
+			child.set_fsacl(un, d)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/dataset.py	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,205 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+"""Implements the Dataset class, providing methods for manipulating ZFS
+datasets.  Also implements the Property class, which describes ZFS
+properties."""
+
+import zfs.ioctl
+import zfs.util
+import errno
+
+_ = zfs.util._
+
+class Property(object):
+	"""This class represents a ZFS property.  It contains
+	information about the property -- if it's readonly, a number vs
+	string vs index, etc.  Only native properties are represented by
+	this class -- not user properties (eg "user:prop") or userspace
+	properties (eg "userquota@joe")."""
+
+	__slots__ = "name", "number", "type", "default", "attr", "validtypes", \
+	    "values", "colname", "rightalign", "visible", "indextable"
+	__repr__ = zfs.util.default_repr
+
+	def __init__(self, t):
+		"""t is the tuple of information about this property
+		from zfs.ioctl.get_proptable, which should match the
+		members of zprop_desc_t (see zfs_prop.h)."""
+
+		self.name = t[0]
+		self.number = t[1]
+		self.type = t[2]
+		if self.type == "string":
+			self.default = t[3]
+		else:
+			self.default = t[4]
+		self.attr = t[5]
+		self.validtypes = t[6]
+		self.values = t[7]
+		self.colname = t[8]
+		self.rightalign = t[9]
+		self.visible = t[10]
+		self.indextable = t[11]
+
+	def delegatable(self):
+		"""Return True if this property can be delegated with
+		"zfs allow"."""
+		return self.attr != "readonly"
+
+proptable = dict()
+for name, t in zfs.ioctl.get_proptable().iteritems():
+	proptable[name] = Property(t)
+del name, t
+
+def getpropobj(name):
+	"""Return the Property object that is identified by the given
+	name string.  It can be the full name, or the column name."""
+	try:
+		return proptable[name]
+	except KeyError:
+		for p in proptable.itervalues():
+			if p.colname and p.colname.lower() == name:
+				return p
+		raise
+
+class Dataset(object):
+	"""Represents a ZFS dataset (filesystem, snapshot, zvol, clone, etc).
+
+	Generally, this class provides interfaces to the C functions in
+	zfs.ioctl which actually interface with the kernel to manipulate
+	datasets.
+	
+	Unless otherwise noted, any method can raise a ZFSError to
+	indicate failure."""
+
+	__slots__ = "name", "__props"
+	__repr__ = zfs.util.default_repr
+
+	def __init__(self, name, props=None,
+	    types=("filesystem", "volume"), snaps=True):
+		"""Open the named dataset, checking that it exists and
+		is of the specified type.
+		
+		name is the string name of this dataset.
+
+		props is the property settings dict from zfs.ioctl.next_dataset.
+
+		types is an iterable of strings specifying which types
+		of datasets are permitted.  Accepted strings are
+		"filesystem" and "volume".  Defaults to acceptying all
+		types.
+
+		snaps is a boolean specifying if snapshots are acceptable.
+
+		Raises a ZFSError if the dataset can't be accessed (eg
+		doesn't exist) or is not of the specified type.
+		"""
+
+		self.name = name
+
+		e = zfs.util.ZFSError(errno.EINVAL,
+		    _("cannot open %s") % name,
+		    _("operation not applicable to datasets of this type"))
+		if "@" in name and not snaps:
+			raise e
+		if not props:
+			props = zfs.ioctl.dataset_props(name)
+		self.__props = props
+		if "volume" not in types and self.getprop("type") == 3:
+			raise e
+		if "filesystem" not in types and self.getprop("type") == 2:
+			raise e
+
+	def getprop(self, propname):
+		"""Return the value of the given property for this dataset.
+
+		Currently only works for native properties (those with a
+		Property object.)
+		
+		Raises KeyError if propname does not specify a native property.
+		Does not raise ZFSError.
+		"""
+
+		p = getpropobj(propname)
+		try:
+			return self.__props[p.name]["value"]
+		except KeyError:
+			return p.default
+
+	def parent(self):
+		"""Return a Dataset representing the parent of this one."""
+		return Dataset(self.name[:self.name.rindex("/")])
+
+	def descendents(self):
+		"""A generator function which iterates over all
+		descendent Datasets (not including snapshots."""
+
+		cookie = 0
+		while True:
+			# next_dataset raises StopIteration when done
+			(name, cookie, props) = \
+			    zfs.ioctl.next_dataset(self.name, False, cookie)
+			ds = Dataset(name, props)
+			yield ds
+			for child in ds.descendents():
+				yield child
+	
+	def userspace(self, prop):
+		"""A generator function which iterates over a
+		userspace-type property.
+
+		prop specifies which property ("userused@",
+		"userquota@", "groupused@", or "groupquota@").
+
+		returns 3-tuple of domain (string), rid (int), and space (int).
+		"""
+
+		d = zfs.ioctl.userspace_many(self.name, prop)
+		for ((domain, rid), space) in d.iteritems():
+			yield (domain, rid, space)
+
+	def userspace_upgrade(self):
+		"""Initialize the accounting information for
+		userused@... and groupused@... properties."""
+		return zfs.ioctl.userspace_upgrade(self.name)
+	
+	def set_fsacl(self, un, d):
+		"""Add to the "zfs allow"-ed permissions on this Dataset.
+
+		un is True if the specified permissions should be removed.
+
+		d is a dict specifying which permissions to add/remove:
+		{ "whostr" -> None # remove all perms for this entity
+		  "whostr" -> { "perm" -> None} # add/remove these perms
+		} """
+		return zfs.ioctl.set_fsacl(self.name, un, d)
+
+	def get_fsacl(self):
+		"""Get the "zfs allow"-ed permissions on the Dataset.
+
+		Return a dict("whostr": { "perm" -> None })."""
+
+		return zfs.ioctl.get_fsacl(self.name)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/groupspace.py	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,29 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+import zfs.userspace
+
+do_groupspace = zfs.userspace.do_userspace
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/ioctl.c	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,599 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <Python.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/fs/zfs.h>
+#include <strings.h>
+#include <unistd.h>
+#include <libnvpair.h>
+#include <idmap.h>
+#include <zone.h>
+#include <libintl.h>
+#include <libzfs.h>
+#include "zfs_prop.h"
+
+static PyObject *ZFSError;
+static int zfsdevfd;
+
+#ifdef __lint
+#define	dgettext(x, y) y
+#endif
+
+#define	_(s) dgettext(TEXT_DOMAIN, s)
+
+extern int sid_to_id(char *sid, boolean_t user, uid_t *id);
+
+/*PRINTFLIKE1*/
+static void
+seterr(char *fmt, ...)
+{
+	char errstr[1024];
+	va_list v;
+
+	va_start(v, fmt);
+	(void) vsnprintf(errstr, sizeof (errstr), fmt, v);
+	va_end(v);
+
+	PyErr_SetObject(ZFSError, Py_BuildValue("is", errno, errstr));
+}
+
+static char cmdstr[HIS_MAX_RECORD_LEN];
+
+static int
+ioctl_with_cmdstr(int ioc, zfs_cmd_t *zc)
+{
+	int err;
+
+	if (cmdstr[0])
+		zc->zc_history = (uint64_t)(uintptr_t)cmdstr;
+	err = ioctl(zfsdevfd, ioc, zc);
+	cmdstr[0] = '\0';
+	return (err);
+}
+
+static PyObject *
+nvl2py(nvlist_t *nvl)
+{
+	PyObject *pyo;
+	nvpair_t *nvp;
+
+	pyo = PyDict_New();
+
+	for (nvp = nvlist_next_nvpair(nvl, NULL); nvp;
+	    nvp = nvlist_next_nvpair(nvl, nvp)) {
+		PyObject *pyval;
+		char *sval;
+		uint64_t ival;
+		boolean_t bval;
+		nvlist_t *nval;
+
+		switch (nvpair_type(nvp)) {
+		case DATA_TYPE_STRING:
+			(void) nvpair_value_string(nvp, &sval);
+			pyval = Py_BuildValue("s", sval);
+			break;
+
+		case DATA_TYPE_UINT64:
+			(void) nvpair_value_uint64(nvp, &ival);
+			pyval = Py_BuildValue("K", ival);
+			break;
+
+		case DATA_TYPE_NVLIST:
+			(void) nvpair_value_nvlist(nvp, &nval);
+			pyval = nvl2py(nval);
+			break;
+
+		case DATA_TYPE_BOOLEAN:
+			Py_INCREF(Py_None);
+			pyval = Py_None;
+			break;
+
+		case DATA_TYPE_BOOLEAN_VALUE:
+			(void) nvpair_value_boolean_value(nvp, &bval);
+			pyval = Py_BuildValue("i", bval);
+			break;
+
+		default:
+			PyErr_SetNone(PyExc_ValueError);
+			Py_DECREF(pyo);
+			return (NULL);
+		}
+
+		PyDict_SetItemString(pyo, nvpair_name(nvp), pyval);
+		Py_DECREF(pyval);
+	}
+
+	return (pyo);
+}
+
+static nvlist_t *
+dict2nvl(PyObject *d)
+{
+	nvlist_t *nvl;
+	int err;
+	PyObject *key, *value;
+	int pos = 0;
+
+	if (!PyDict_Check(d)) {
+		PyErr_SetObject(PyExc_ValueError, d);
+		return (NULL);
+	}
+
+	err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
+	assert(err == 0);
+
+	while (PyDict_Next(d, &pos, &key, &value)) {
+		char *keystr = PyString_AsString(key);
+		if (keystr == NULL) {
+			PyErr_SetObject(PyExc_KeyError, key);
+			nvlist_free(nvl);
+			return (NULL);
+		}
+
+		if (PyDict_Check(value)) {
+			nvlist_t *valnvl = dict2nvl(value);
+			err = nvlist_add_nvlist(nvl, keystr, valnvl);
+			nvlist_free(valnvl);
+		} else if (value == Py_None) {
+			err = nvlist_add_boolean(nvl, keystr);
+		} else if (PyString_Check(value)) {
+			char *valstr = PyString_AsString(value);
+			err = nvlist_add_string(nvl, keystr, valstr);
+		} else if (PyInt_Check(value)) {
+			uint64_t valint = PyInt_AsUnsignedLongLongMask(value);
+			err = nvlist_add_uint64(nvl, keystr, valint);
+		} else if (PyBool_Check(value)) {
+			boolean_t valbool = value == Py_True ? B_TRUE : B_FALSE;
+			err = nvlist_add_boolean_value(nvl, keystr, valbool);
+		} else {
+			PyErr_SetObject(PyExc_ValueError, value);
+			nvlist_free(nvl);
+			return (NULL);
+		}
+		assert(err == 0);
+	}
+
+	return (nvl);
+}
+
+static PyObject *
+fakepropval(uint64_t value)
+{
+	PyObject *d = PyDict_New();
+	PyDict_SetItemString(d, "value", Py_BuildValue("K", value));
+	return (d);
+}
+
+static void
+add_ds_props(zfs_cmd_t *zc, PyObject *nvl)
+{
+	dmu_objset_stats_t *s = &zc->zc_objset_stats;
+	PyDict_SetItemString(nvl, "numclones",
+	    fakepropval(s->dds_num_clones));
+	PyDict_SetItemString(nvl, "issnap",
+	    fakepropval(s->dds_is_snapshot));
+	PyDict_SetItemString(nvl, "inconsistent",
+	    fakepropval(s->dds_inconsistent));
+}
+
+/* On error, returns NULL but does not set python exception. */
+static PyObject *
+ioctl_with_dstnv(int ioc, zfs_cmd_t *zc)
+{
+	int nvsz = 2048;
+	void *nvbuf;
+	PyObject *pynv = NULL;
+
+again:
+	nvbuf = malloc(nvsz);
+	zc->zc_nvlist_dst_size = nvsz;
+	zc->zc_nvlist_dst = (uintptr_t)nvbuf;
+
+	if (ioctl(zfsdevfd, ioc, zc) == 0) {
+		nvlist_t *nvl;
+
+		errno = nvlist_unpack(nvbuf, zc->zc_nvlist_dst_size, &nvl, 0);
+		if (errno == 0) {
+			pynv = nvl2py(nvl);
+			nvlist_free(nvl);
+		}
+	} else if (errno == ENOMEM) {
+		free(nvbuf);
+		nvsz = zc->zc_nvlist_dst_size;
+		goto again;
+	}
+	free(nvbuf);
+	return (pynv);
+}
+
+static PyObject *
+py_next_dataset(PyObject *self, PyObject *args)
+{
+	int ioc;
+	uint64_t cookie;
+	zfs_cmd_t zc = { 0 };
+	int snaps;
+	char *name;
+	PyObject *nvl;
+	PyObject *ret = NULL;
+
+	if (!PyArg_ParseTuple(args, "siK", &name, &snaps, &cookie))
+		return (NULL);
+
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+	zc.zc_cookie = cookie;
+
+	if (snaps)
+		ioc = ZFS_IOC_SNAPSHOT_LIST_NEXT;
+	else
+		ioc = ZFS_IOC_DATASET_LIST_NEXT;
+
+	nvl = ioctl_with_dstnv(ioc, &zc);
+	if (nvl) {
+		add_ds_props(&zc, nvl);
+		ret = Py_BuildValue("sKO", zc.zc_name, zc.zc_cookie, nvl);
+		Py_DECREF(nvl);
+	} else if (errno == ESRCH) {
+		PyErr_SetNone(PyExc_StopIteration);
+	} else {
+		if (snaps)
+			seterr(_("cannot get snapshots of %s"), name);
+		else
+			seterr(_("cannot get child datasets of %s"), name);
+	}
+	return (ret);
+}
+
+static PyObject *
+py_dataset_props(PyObject *self, PyObject *args)
+{
+	zfs_cmd_t zc = { 0 };
+	int snaps;
+	char *name;
+	PyObject *nvl;
+
+	if (!PyArg_ParseTuple(args, "s", &name))
+		return (NULL);
+
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
+	nvl = ioctl_with_dstnv(ZFS_IOC_OBJSET_STATS, &zc);
+	if (nvl) {
+		add_ds_props(&zc, nvl);
+	} else {
+		seterr(_("cannot access dataset %s"), name);
+	}
+	return (nvl);
+}
+
+static PyObject *
+py_get_fsacl(PyObject *self, PyObject *args)
+{
+	zfs_cmd_t zc = { 0 };
+	char *name;
+	PyObject *nvl;
+
+	if (!PyArg_ParseTuple(args, "s", &name))
+		return (NULL);
+
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
+	nvl = ioctl_with_dstnv(ZFS_IOC_GET_FSACL, &zc);
+	if (nvl == NULL)
+		seterr(_("cannot get permissions on %s"), name);
+
+	return (nvl);
+}
+
+static PyObject *
+py_set_fsacl(PyObject *self, PyObject *args)
+{
+	int un;
+	size_t nvsz;
+	zfs_cmd_t zc = { 0 };
+	char *name, *nvbuf;
+	PyObject *dict, *file;
+	nvlist_t *nvl;
+	int err;
+
+	if (!PyArg_ParseTuple(args, "siO!", &name, &un,
+	    &PyDict_Type, &dict))
+		return (NULL);
+
+	nvl = dict2nvl(dict);
+	if (nvl == NULL)
+		return (NULL);
+
+	err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE);
+	assert(err == 0);
+	nvbuf = malloc(nvsz);
+	err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0);
+	assert(err == 0);
+
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+	zc.zc_nvlist_src_size = nvsz;
+	zc.zc_nvlist_src = (uintptr_t)nvbuf;
+	zc.zc_perm_action = un;
+
+	err = ioctl_with_cmdstr(ZFS_IOC_SET_FSACL, &zc);
+	free(nvbuf);
+	if (err) {
+		seterr(_("cannot set permissions on %s"), name);
+		return (NULL);
+	}
+
+	Py_RETURN_NONE;
+}
+
+static PyObject *
+py_userspace_many(PyObject *self, PyObject *args)
+{
+	zfs_cmd_t zc = { 0 };
+	zfs_userquota_prop_t type;
+	char *name, *propname;
+	int bufsz = 1<<20;
+	void *buf;
+	PyObject *dict, *file;
+	int error;
+
+	if (!PyArg_ParseTuple(args, "ss", &name, &propname))
+		return (NULL);
+
+	for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++)
+		if (strcmp(propname, zfs_userquota_prop_prefixes[type]) == 0)
+			break;
+	if (type == ZFS_NUM_USERQUOTA_PROPS) {
+		PyErr_SetString(PyExc_KeyError, propname);
+		return (NULL);
+	}
+
+	dict = PyDict_New();
+	buf = malloc(bufsz);
+
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+	zc.zc_objset_type = type;
+	zc.zc_cookie = 0;
+
+	while (1) {
+		zfs_useracct_t *zua = buf;
+
+		zc.zc_nvlist_dst = (uintptr_t)buf;
+		zc.zc_nvlist_dst_size = bufsz;
+
+		error = ioctl(zfsdevfd, ZFS_IOC_USERSPACE_MANY, &zc);
+		if (error || zc.zc_nvlist_dst_size == 0)
+			break;
+
+		while (zc.zc_nvlist_dst_size > 0) {
+			PyObject *pykey, *pyval;
+
+			pykey = Py_BuildValue("sI",
+			    zua->zu_domain, zua->zu_rid);
+			pyval = Py_BuildValue("K", zua->zu_space);
+			PyDict_SetItem(dict, pykey, pyval);
+			Py_DECREF(pykey);
+			Py_DECREF(pyval);
+
+			zua++;
+			zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t);
+		}
+	}
+
+	free(buf);
+
+	if (error != 0) {
+		Py_DECREF(dict);
+		seterr(_("cannot get %s property on %s"), propname, name);
+		return (NULL);
+	}
+
+	return (dict);
+}
+
+static PyObject *
+py_userspace_upgrade(PyObject *self, PyObject *args)
+{
+	zfs_cmd_t zc = { 0 };
+	char *name;
+	int error;
+
+	if (!PyArg_ParseTuple(args, "s", &name))
+		return (NULL);
+
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+	error = ioctl(zfsdevfd, ZFS_IOC_USERSPACE_UPGRADE, &zc);
+
+	if (error != 0) {
+		seterr(_("cannot initialize user accounting information on %s"),
+		    name);
+		return (NULL);
+	}
+
+	Py_RETURN_NONE;
+}
+
+static PyObject *
+py_sid_to_id(PyObject *self, PyObject *args)
+{
+	char *sid;
+	int err, isuser;
+	uid_t id;
+
+	if (!PyArg_ParseTuple(args, "si", &sid, &isuser))
+		return (NULL);
+
+	err = sid_to_id(sid, isuser, &id);
+	if (err) {
+		PyErr_SetString(PyExc_KeyError, sid);
+		return (NULL);
+	}
+
+	return (Py_BuildValue("I", id));
+}
+
+/*
+ * Translate the sid string ("S-1-...") to the user@domain name, if
+ * possible.  There should be a better way to do this, but for now we
+ * just translate to the (possibly ephemeral) uid and then back again.
+ */
+static PyObject *
+py_sid_to_name(PyObject *self, PyObject *args)
+{
+	char *sid;
+	int err, isuser;
+	uid_t id;
+	char *name, *domain;
+	char buf[256];
+
+	if (!PyArg_ParseTuple(args, "si", &sid, &isuser))
+		return (NULL);
+
+	err = sid_to_id(sid, isuser, &id);
+	if (err) {
+		PyErr_SetString(PyExc_KeyError, sid);
+		return (NULL);
+	}
+
+	if (isuser) {
+		err = idmap_getwinnamebyuid(id,
+		    IDMAP_REQ_FLG_USE_CACHE, &name, &domain);
+	} else {
+		err = idmap_getwinnamebygid(id,
+		    IDMAP_REQ_FLG_USE_CACHE, &name, &domain);
+	}
+	if (err != IDMAP_SUCCESS) {
+		PyErr_SetString(PyExc_KeyError, sid);
+		return (NULL);
+	}
+	(void) snprintf(buf, sizeof (buf), "%s@%s", name, domain);
+	free(name);
+	free(domain);
+
+	return (Py_BuildValue("s", buf));
+}
+
+static PyObject *
+py_isglobalzone(PyObject *self, PyObject *args)
+{
+	return (Py_BuildValue("i", getzoneid() == GLOBAL_ZONEID));
+}
+
+static PyObject *
+py_set_cmdstr(PyObject *self, PyObject *args)
+{
+	char *str;
+
+	if (!PyArg_ParseTuple(args, "s", &str))
+		return (NULL);
+
+	(void) strlcpy(cmdstr, str, sizeof (cmdstr));
+
+	Py_RETURN_NONE;
+}
+
+static PyObject *
+py_get_proptable(PyObject *self, PyObject *args)
+{
+	zprop_desc_t *t = zfs_prop_get_table();
+	PyObject *d = PyDict_New();
+	zfs_prop_t i;
+
+	for (i = 0; i < ZFS_NUM_PROPS; i++) {
+		zprop_desc_t *p = &t[i];
+		PyObject *tuple;
+		static const char *typetable[] =
+		    {"number", "string", "index"};
+		static const char *attrtable[] =
+		    {"default", "readonly", "inherit", "onetime"};
+		PyObject *indextable;
+
+		if (p->pd_proptype == PROP_TYPE_INDEX) {
+			const zprop_index_t *it = p->pd_table;
+			indextable = PyDict_New();
+			int j;
+			for (j = 0; it[j].pi_name; j++) {
+				PyDict_SetItemString(indextable,
+				    it[j].pi_name,
+				    Py_BuildValue("K", it[j].pi_value));
+			}
+		} else {
+			Py_INCREF(Py_None);
+			indextable = Py_None;
+		}
+
+		tuple = Py_BuildValue("sissKsissiiO",
+		    p->pd_name, p->pd_propnum, typetable[p->pd_proptype],
+		    p->pd_strdefault, p->pd_numdefault,
+		    attrtable[p->pd_attr], p->pd_types,
+		    p->pd_values, p->pd_colname,
+		    p->pd_rightalign, p->pd_visible, indextable);
+		PyDict_SetItemString(d, p->pd_name, tuple);
+		Py_DECREF(tuple);
+	}
+
+	return (d);
+}
+
+static PyMethodDef zfsmethods[] = {
+	{"next_dataset", py_next_dataset, METH_VARARGS,
+	    "Get next child dataset or snapshot."},
+	{"get_fsacl", py_get_fsacl, METH_VARARGS, "Get allowed permissions."},
+	{"set_fsacl", py_set_fsacl, METH_VARARGS, "Set allowed permissions."},
+	{"userspace_many", py_userspace_many, METH_VARARGS,
+	    "Get user space accounting."},
+	{"userspace_upgrade", py_userspace_upgrade, METH_VARARGS,
+	    "Upgrade fs to enable user space accounting."},
+	{"set_cmdstr", py_set_cmdstr, METH_VARARGS,
+	    "Set command string for history logging."},
+	{"dataset_props", py_dataset_props, METH_VARARGS,
+	    "Get dataset properties."},
+	{"get_proptable", py_get_proptable, METH_NOARGS,
+	    "Get property table."},
+	/* Below are not really zfs-specific: */
+	{"sid_to_id", py_sid_to_id, METH_VARARGS, "Map SID to UID/GID."},
+	{"sid_to_name", py_sid_to_name, METH_VARARGS,
+	    "Map SID to name@domain."},
+	{"isglobalzone", py_isglobalzone, METH_NOARGS,
+	    "Determine if this is the global zone."},
+	{NULL, NULL, 0, NULL}
+};
+
+void
+initioctl(void)
+{
+	PyObject *zfs_ioctl = Py_InitModule("zfs.ioctl", zfsmethods);
+	PyObject *zfs_util = PyImport_ImportModule("zfs.util");
+	PyObject *devfile;
+
+	if (zfs_util == NULL)
+		return;
+
+	ZFSError = PyObject_GetAttrString(zfs_util, "ZFSError");
+	devfile = PyObject_GetAttrString(zfs_util, "dev");
+	zfsdevfd = PyObject_AsFileDescriptor(devfile);
+
+	zfs_prop_init();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/mapfile-vers	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,45 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING:  STOP NOW.  DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+#	usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+SUNWprivate {
+    global:
+	initioctl;
+    local:
+	*;
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/unallow.py	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,28 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+import zfs.allow
+
+do_unallow = zfs.allow.do_allow
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/userspace.py	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,277 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+"""This module implements the "zfs userspace" and "zfs groupspace" subcommands.
+The only public interface is the zfs.userspace.do_userspace() function."""
+
+import zfs.util
+import zfs.ioctl
+import zfs.dataset
+import optparse
+import sys
+import pwd
+import grp
+import errno
+
+_ = zfs.util._
+
+# map from property name prefix -> (field name, isgroup)
+props = {
+    "userused@": ("used", False),
+    "userquota@": ("quota", False),
+    "groupused@": ("used", True),
+    "groupquota@": ("quota", True),
+}
+
+def skiptype(options, prop):
+	"""Return True if this property (eg "userquota@") should be skipped."""
+	(field, isgroup) = props[prop]
+	if field not in options.fields:
+		return True
+	if isgroup and "posixgroup" not in options.types and \
+	    "smbgroup" not in options.types:
+		return True
+	if not isgroup and "posixuser" not in options.types and \
+	    "smbuser" not in options.types:
+		return True
+	return False
+
+def updatemax(d, k, v):
+	d[k] = max(d.get(k, None), v)
+
+def new_entry(options, isgroup, domain, rid):
+	"""Return a dict("field": value) for this domain (string) + rid (int)"""
+
+	if domain:
+		idstr = "%s-%u" % (domain, rid)
+	else:
+		idstr = "%u" % rid
+
+	(typename, mapfunc) = {
+	    (1, 1): ("SMB Group",   lambda id: zfs.ioctl.sid_to_name(id, 0)),
+	    (1, 0): ("POSIX Group", lambda id: grp.getgrgid(int(id)).gr_name),
+	    (0, 1): ("SMB User",    lambda id: zfs.ioctl.sid_to_name(id, 1)),
+	    (0, 0): ("POSIX User",  lambda id: pwd.getpwuid(int(id)).pw_name)
+	}[isgroup, bool(domain)]
+
+	if typename.lower().replace(" ", "") not in options.types:
+		return None
+
+	v = dict()
+	v["type"] = typename
+
+	# python's getpwuid/getgrgid is confused by ephemeral uids
+	if not options.noname and rid < 1<<31:
+		try:
+			v["name"] = mapfunc(idstr)
+		except KeyError:
+			pass
+
+	if "name" not in v:
+		v["name"] = idstr
+		if not domain:
+			# it's just a number, so pad it with spaces so
+			# that it will sort numerically
+			v["name.sort"] = "%20d" % rid
+	# fill in default values
+	v["used"] = "0"
+	v["used.sort"] = 0
+	v["quota"] = "none"
+	v["quota.sort"] = 0
+	return v
+
+def process_one_raw(acct, maxfieldlen, options, prop, elem):
+	"""Update the acct and maxfieldlen dicts to incorporate the
+	information from this elem from Dataset.userspace(prop)."""
+
+	(domain, rid, value) = elem
+	(field, isgroup) = props[prop]
+
+	if options.translate and domain:
+		try:
+			rid = zfs.ioctl.sid_to_id("%s-%u" % (domain, rid),
+			    not isgroup)
+			domain = None
+		except KeyError:
+			pass;
+	key = (isgroup, domain, rid)
+		
+	try:
+		v = acct[key]
+	except KeyError:
+		v = new_entry(options, isgroup, domain, rid)
+		if not v:
+			return
+		acct[key] = v
+
+	# Add our value to an existing value, which may be present if
+	# options.translate is set.
+	value = v[field + ".sort"] = value + v[field + ".sort"]
+
+	if options.parsable:
+		v[field] = str(value)
+	else:
+		v[field] = zfs.util.nicenum(value)
+	for k in v.keys():
+		# some of the .sort fields are integers, so have no len()
+		if isinstance(v[k], str):
+			updatemax(maxfieldlen, k, len(v[k]))
+
+def do_userspace():
+	"""Implements the "zfs userspace" and "zfs groupspace" subcommands."""
+
+	def usage(msg=None):
+		parser.print_help()
+		if msg:
+			print
+			parser.exit("zfs: error: " + msg)
+		else:
+			parser.exit()
+
+	if sys.argv[1] == "userspace":
+		defaulttypes = "posixuser,smbuser"
+	else:
+		defaulttypes = "posixgroup,smbgroup"
+
+	fields = ("type", "name", "used", "quota")
+	ljustfields = ("type", "name")
+	types = ("all", "posixuser", "smbuser", "posixgroup", "smbgroup")
+
+	u = _("%s [-niHp] [-o field[,...]] [-sS field] ... \n") % sys.argv[1]
+	u += _("    [-t type[,...]] <filesystem|snapshot>")
+	parser = optparse.OptionParser(usage=u, prog="zfs")
+
+	parser.add_option("-n", action="store_true", dest="noname",
+	    help=_("Print numeric ID instead of user/group name"))
+	parser.add_option("-i", action="store_true", dest="translate",
+	    help=_("translate SID to posix (possibly ephemeral) ID"))
+	parser.add_option("-H", action="store_true", dest="noheaders",
+	    help=_("no headers, tab delimited output"))
+	parser.add_option("-p", action="store_true", dest="parsable",
+	    help=_("exact (parsable) numeric output"))
+	parser.add_option("-o", dest="fields", metavar="field[,...]",
+	    default="type,name,used,quota",
+	    help=_("print only these fields (eg type,name,used,quota)"))
+	parser.add_option("-s", dest="sortfields", metavar="field",
+	    type="choice", choices=fields, default=list(),
+	    action="callback", callback=zfs.util.append_with_opt,
+	    help=_("sort field"))
+	parser.add_option("-S", dest="sortfields", metavar="field",
+	    type="choice", choices=fields, #-s sets the default
+	    action="callback", callback=zfs.util.append_with_opt,
+	    help=_("reverse sort field"))
+	parser.add_option("-t", dest="types", metavar="type[,...]",
+	    default=defaulttypes,
+	    help=_("print only these types (eg posixuser,smbuser,posixgroup,smbgroup,all)"))
+
+	(options, args) = parser.parse_args(sys.argv[2:])
+	if len(args) != 1:
+		usage(_("wrong number of arguments"))
+	dsname = args[0]
+
+	options.fields = options.fields.split(",")
+	for f in options.fields:
+		if f not in fields:
+			usage(_("invalid field %s") % f)
+
+	options.types = options.types.split(",")
+	for t in options.types:
+		if t not in types:
+			usage(_("invalid type %s") % t)
+
+	if not options.sortfields:
+		options.sortfields = [("-s", "type"), ("-s", "name")]
+
+	if "all" in options.types:
+		options.types = types[1:]
+
+	ds = zfs.dataset.Dataset(dsname, types=("filesystem"))
+
+	if ds.getprop("zoned") and zfs.ioctl.isglobalzone():
+		options.noname = True
+
+	if not ds.getprop("useraccounting"):
+		print(_("Initializing accounting information on old filesystem, please wait..."))
+		ds.userspace_upgrade()
+
+	acct = dict()
+	maxfieldlen = dict()
+
+	# gather and process accounting information
+	for prop in props.keys():
+		if skiptype(options, prop):
+			continue;
+		for elem in ds.userspace(prop):
+			process_one_raw(acct, maxfieldlen, options, prop, elem)
+
+	# print out headers
+	if not options.noheaders:
+		line = str()
+		for field in options.fields:
+			# make sure the field header will fit
+			updatemax(maxfieldlen, field, len(field))
+
+			if field in ljustfields:
+				fmt = "%-*s  "
+			else:
+				fmt = "%*s  "
+			line += fmt % (maxfieldlen[field], field.upper())
+		print(line)
+
+	# custom sorting func
+	def cmpkey(val):
+		l = list()
+		for (opt, field) in options.sortfields:
+			try:
+				n = val[field + ".sort"]
+			except KeyError:
+				n = val[field]
+			if opt == "-S":
+				# reverse sorting
+				try:
+					n = -n
+				except TypeError:
+					# it's a string; decompose it
+					# into an array of integers,
+					# each one the negative of that
+					# character
+					n = [-ord(c) for c in n]
+			l.append(n)
+		return l
+
+	# print out data lines
+	for val in sorted(acct.itervalues(), key=cmpkey):
+		line = str()
+		for field in options.fields:
+			if options.noheaders:
+				line += val[field]
+				line += "\t"
+			else:
+				if field in ljustfields:
+					fmt = "%-*s  "
+				else:
+					fmt = "%*s  "
+				line += fmt % (maxfieldlen[field], val[field])
+		print(line)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/common/util.py	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,138 @@
+#! /usr/bin/python2.4
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+"""This module provides utility functions for ZFS.
+zfs.util.dev -- a file object of /dev/zfs """
+
+import gettext
+import errno
+import os
+# Note: this module (zfs.util) should not import zfs.ioctl, because that
+# would introduce a circular dependency
+
+errno.ECANCELED = 47
+errno.ENOTSUP = 48
+
+dev = open("/dev/zfs", "w")
+
+_ = gettext.translation("SUNW_OST_OSLIB", "/usr/lib/locale",
+    fallback=True).gettext
+
+def default_repr(self):
+	"""A simple __repr__ function."""
+	if self.__slots__:
+		str = "<" + self.__class__.__name__
+		for v in self.__slots__:
+			str += " %s: %r" % (v, getattr(self, v))
+		return str + ">"
+	else:
+		return "<%s %s>" % \
+		    (self.__class__.__name__, repr(self.__dict__))
+
+class ZFSError(StandardError):
+	"""This exception class represents a potentially user-visible
+	ZFS error.  If uncaught, it will be printed and the process will
+	exit with exit code 1.
+	
+	errno -- the error number (eg, from ioctl(2))."""
+
+	__slots__ = "why", "task", "errno"
+	__repr__ = default_repr
+
+	def __init__(self, eno, task=None, why=None):
+		"""Create a ZFS exception.
+		eno -- the error number (errno)
+		task -- a string describing the task that failed
+		why -- a string describing why it failed (defaults to
+		    strerror(eno))"""
+
+		self.errno = eno
+		self.task = task
+		self.why = why
+
+	def __str__(self):
+		s = ""
+		if self.task:
+			s += self.task + ": "
+		if self.why:
+			s += self.why
+		else:
+			s += self.strerror
+		return s
+
+	__strs = {
+		errno.EPERM: _("permission denied"),
+		errno.ECANCELED:
+		    _("delegated administration is disabled on pool"),
+		errno.EINTR: _("signal received"),
+		errno.EIO: _("I/O error"),
+		errno.ENOENT: _("dataset does not exist"),
+		errno.ENOSPC: _("out of space"),
+		errno.EEXIST: _("dataset already exists"),
+		errno.EBUSY: _("dataset is busy"),
+		errno.EROFS:
+		    _("snapshot permissions cannot be modified"),
+		errno.ENAMETOOLONG: _("dataset name is too long"),
+		errno.ENOTSUP: _("unsupported version"),
+		errno.EAGAIN: _("pool I/O is currently suspended"),
+	}
+
+	__strs[errno.EACCES] = __strs[errno.EPERM]
+	__strs[errno.ENXIO] = __strs[errno.EIO]
+	__strs[errno.ENODEV] = __strs[errno.EIO]
+	__strs[errno.EDQUOT] = __strs[errno.ENOSPC]
+
+	@property
+	def strerror(self):
+		return ZFSError.__strs.get(self.errno, os.strerror(self.errno))
+
+def nicenum(num):
+	"""Return a nice string (eg "1.23M") for this integer."""
+	index = 0;
+	n = num;
+
+	while n >= 1024:
+		n /= 1024
+		index += 1
+
+	u = " KMGTPE"[index]
+	if index == 0:
+		return "%u" % n;
+	elif n >= 100 or num & ((1024*index)-1) == 0:
+		# it's an exact multiple of its index, or it wouldn't
+		# fit as floating point, so print as an integer
+		return "%u%c" % (n, u)
+	else:
+		# due to rounding, it's tricky to tell what precision to
+		# use; try each precision and see which one fits
+		for i in (2, 1, 0):
+			s = "%.*f%c" % (i, float(num) / (1<<(10*index)), u)
+			if len(s) <= 5:
+				return s
+
+def append_with_opt(option, opt, value, parser):
+	"""A function for OptionParser which appends a tuple (opt, value)."""
+	getattr(parser.values, option.dest).append((opt, value))
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/i386/Makefile	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/lib/pyzfs/sparc/Makefile	Sat Apr 18 13:41:47 2009 -0700
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS)
--- a/usr/src/pkgdefs/SUNWzfsu/prototype_com	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/pkgdefs/SUNWzfsu/prototype_com	Sat Apr 18 13:41:47 2009 -0700
@@ -20,11 +20,9 @@
 #
 
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 
 # packaging files
 i copyright
@@ -38,6 +36,8 @@
 d none usr/lib 755 root bin
 d none usr/lib/zfs 755 root bin
 f none usr/lib/zfs/availdevs 555 root bin
+f none usr/lib/zfs/pyzfs.py 555 root bin
+f none usr/lib/zfs/pyzfs.pyc 555 root bin
 d none usr/lib/devfsadm 755 root sys
 d none usr/lib/devfsadm/linkmod 755 root sys
 f none usr/lib/devfsadm/linkmod/SUNW_zfs_link.so 755 root sys
@@ -57,6 +57,24 @@
 d none usr/lib/mdb/kvm 755 root sys
 d none usr/lib/mdb/proc 755 root sys
 f none usr/lib/mdb/proc/libzpool.so 555 root sys
+d none usr/lib/python2.4 755 root bin
+d none usr/lib/python2.4/vendor-packages 755 root bin
+d none usr/lib/python2.4/vendor-packages/zfs 755 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/__init__.py 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/__init__.pyc 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/allow.py 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/allow.pyc 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/dataset.py 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/dataset.pyc 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/groupspace.py 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/groupspace.pyc 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/ioctl.so 755 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/unallow.py 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/unallow.pyc 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/userspace.py 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/userspace.pyc 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/util.py 644 root bin
+f none usr/lib/python2.4/vendor-packages/zfs/util.pyc 644 root bin
 d none usr/lib/sysevent 755 root bin
 d none usr/lib/sysevent/modules 755 root bin
 f none usr/lib/sysevent/modules/zfs_mod.so 755 root sys
--- a/usr/src/tools/scripts/check_rtime.pl	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/tools/scripts/check_rtime.pl	Sat Apr 18 13:41:47 2009 -0700
@@ -202,7 +202,8 @@
 	/lib.*\ of\ .*libssl3\.so |				# non-OSNET
 	/lib.*\ of\ .*libxml2\.so\.2 |				# non-OSNET
 	/lib.*\ of\ .*libxslt\.so\.1 |				# non-OSNET
-	/lib.*\ of\ .*libpq\.so\.4 				# non-OSNET
+	/lib.*\ of\ .*libpq\.so\.4 |				# non-OSNET
+	/lib.*\ of\ .*libpython2\.4\.so\.1\.0 			# non-OSNET
 }x;
 
 # Define any files that should only have unused (ldd -u) processing.
--- a/usr/src/uts/common/fs/zfs/arc.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/arc.c	Sat Apr 18 13:41:47 2009 -0700
@@ -2504,7 +2504,6 @@
     uint32_t *arc_flags, const zbookmark_t *zb)
 {
 	int err;
-	arc_buf_hdr_t *hdr = pbuf->b_hdr;
 
 	ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt));
 	ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size);
@@ -2512,9 +2511,8 @@
 
 	err = arc_read_nolock(pio, spa, bp, done, private, priority,
 	    zio_flags, arc_flags, zb);
-
-	ASSERT3P(hdr, ==, pbuf->b_hdr);
 	rw_exit(&pbuf->b_lock);
+
 	return (err);
 }
 
--- a/usr/src/uts/common/fs/zfs/dbuf.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dbuf.c	Sat Apr 18 13:41:47 2009 -0700
@@ -329,7 +329,7 @@
 		if (db->db_parent == dn->dn_dbuf) {
 			/* db is pointed to by the dnode */
 			/* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */
-			if (db->db.db_object == DMU_META_DNODE_OBJECT)
+			if (DMU_OBJECT_IS_SPECIAL(db->db.db_object))
 				ASSERT(db->db_parent == NULL);
 			else
 				ASSERT(db->db_parent != NULL);
@@ -908,15 +908,11 @@
 	 * Shouldn't dirty a regular buffer in syncing context.  Private
 	 * objects may be dirtied in syncing context, but only if they
 	 * were already pre-dirtied in open context.
-	 * XXX We may want to prohibit dirtying in syncing context even
-	 * if they did pre-dirty.
 	 */
 	ASSERT(!dmu_tx_is_syncing(tx) ||
 	    BP_IS_HOLE(dn->dn_objset->os_rootbp) ||
-	    dn->dn_object == DMU_META_DNODE_OBJECT ||
-	    dn->dn_objset->os_dsl_dataset == NULL ||
-	    dsl_dir_is_private(dn->dn_objset->os_dsl_dataset->ds_dir));
-
+	    DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
+	    dn->dn_objset->os_dsl_dataset == NULL);
 	/*
 	 * We make this assert for private objects as well, but after we
 	 * check if we're already dirty.  They are allowed to re-dirty
@@ -975,7 +971,8 @@
 	/*
 	 * Only valid if not already dirty.
 	 */
-	ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
+	ASSERT(dn->dn_object == 0 ||
+	    dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
 	    (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
 
 	ASSERT3U(dn->dn_nlevels, >, db->db_level);
@@ -987,15 +984,13 @@
 
 	/*
 	 * We should only be dirtying in syncing context if it's the
-	 * mos, a spa os, or we're initializing the os.  However, we are
-	 * allowed to dirty in syncing context provided we already
-	 * dirtied it in open context.  Hence we must make this
-	 * assertion only if we're not already dirty.
+	 * mos or we're initializing the os or it's a special object.
+	 * However, we are allowed to dirty in syncing context provided
+	 * we already dirtied it in open context.  Hence we must make
+	 * this assertion only if we're not already dirty.
 	 */
-	ASSERT(!dmu_tx_is_syncing(tx) ||
-	    os->os_dsl_dataset == NULL ||
-	    !dsl_dir_is_private(os->os_dsl_dataset->ds_dir) ||
-	    !BP_IS_HOLE(os->os_rootbp));
+	ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
+	    os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp));
 	ASSERT(db->db.db_size != 0);
 
 	dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
--- a/usr/src/uts/common/fs/zfs/dmu.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu.c	Sat Apr 18 13:41:47 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -85,6 +85,8 @@
 	{	byteswap_uint64_array,	TRUE,	"FUID table size"	},
 	{	zap_byteswap,		TRUE,	"DSL dataset next clones"},
 	{	zap_byteswap,		TRUE,	"scrub work queue"	},
+	{	zap_byteswap,		TRUE,	"ZFS user/group used"	},
+	{	zap_byteswap,		TRUE,	"ZFS user/group quota"	},
 };
 
 int
@@ -442,7 +444,8 @@
 	object_size = align == 1 ? dn->dn_datablksz :
 	    (dn->dn_maxblkid + 1) << dn->dn_datablkshift;
 
-	if (trunc || (end = offset + length) > object_size)
+	end = offset + length;
+	if (trunc || end > object_size)
 		end = object_size;
 	if (end <= offset)
 		return (0);
@@ -450,6 +453,7 @@
 
 	while (length) {
 		start = end;
+		/* assert(offset <= start) */
 		err = get_next_chunk(dn, &start, offset);
 		if (err)
 			return (err);
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c	Sat Apr 18 13:41:47 2009 -0700
@@ -164,10 +164,15 @@
 {
 	objset_phys_t *osp = buf;
 
-	ASSERT(size == sizeof (objset_phys_t));
+	ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
 	dnode_byteswap(&osp->os_meta_dnode);
 	byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
 	osp->os_type = BSWAP_64(osp->os_type);
+	osp->os_flags = BSWAP_64(osp->os_flags);
+	if (size == sizeof (objset_phys_t)) {
+		dnode_byteswap(&osp->os_userused_dnode);
+		dnode_byteswap(&osp->os_groupused_dnode);
+	}
 }
 
 int
@@ -210,12 +215,29 @@
 				err = EIO;
 			return (err);
 		}
+
+		/* Increase the blocksize if we are permitted. */
+		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
+		    arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) {
+			arc_buf_t *buf = arc_buf_alloc(spa,
+			    sizeof (objset_phys_t), &osi->os_phys_buf,
+			    ARC_BUFC_METADATA);
+			bzero(buf->b_data, sizeof (objset_phys_t));
+			bcopy(osi->os_phys_buf->b_data, buf->b_data,
+			    arc_buf_size(osi->os_phys_buf));
+			arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf);
+			osi->os_phys_buf = buf;
+		}
+
 		osi->os_phys = osi->os_phys_buf->b_data;
+		osi->os_flags = osi->os_phys->os_flags;
 	} else {
-		osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t),
+		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
+		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
+		osi->os_phys_buf = arc_buf_alloc(spa, size,
 		    &osi->os_phys_buf, ARC_BUFC_METADATA);
 		osi->os_phys = osi->os_phys_buf->b_data;
-		bzero(osi->os_phys, sizeof (objset_phys_t));
+		bzero(osi->os_phys, size);
 	}
 
 	/*
@@ -276,6 +298,12 @@
 
 	osi->os_meta_dnode = dnode_special_open(osi,
 	    &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);
+	if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) {
+		osi->os_userused_dnode = dnode_special_open(osi,
+		    &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT);
+		osi->os_groupused_dnode = dnode_special_open(osi,
+		    &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT);
+	}
 
 	/*
 	 * We should be the only thread trying to do this because we
@@ -456,12 +484,14 @@
 	os.os = osi;
 	(void) dmu_objset_evict_dbufs(&os);
 
-	ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode);
-	ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode);
-	ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL);
+	dnode_special_close(osi->os_meta_dnode);
+	if (osi->os_userused_dnode) {
+		dnode_special_close(osi->os_userused_dnode);
+		dnode_special_close(osi->os_groupused_dnode);
+	}
+	zil_free(osi->os_zil);
 
-	dnode_special_close(osi->os_meta_dnode);
-	zil_free(osi->os_zil);
+	ASSERT3P(list_head(&osi->os_dnodes), ==, NULL);
 
 	VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1);
 	mutex_destroy(&osi->os_lock);
@@ -520,6 +550,10 @@
 	ASSERT(type != DMU_OST_ANY);
 	ASSERT(type < DMU_OST_NUMTYPES);
 	osi->os_phys->os_type = type;
+	if (dmu_objset_userused_enabled(osi)) {
+		osi->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+		osi->os_flags = osi->os_phys->os_flags;
+	}
 
 	dsl_dataset_dirty(ds, tx);
 
@@ -825,7 +859,7 @@
 }
 
 static void
-dmu_objset_sync_dnodes(list_t *list, dmu_tx_t *tx)
+dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 
@@ -833,14 +867,20 @@
 		ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
 		ASSERT(dn->dn_dbuf->db_data_pending);
 		/*
-		 * Initialize dn_zio outside dnode_sync()
-		 * to accomodate meta-dnode
+		 * Initialize dn_zio outside dnode_sync() because the
+		 * meta-dnode needs to set it ouside dnode_sync().
 		 */
 		dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
 		ASSERT(dn->dn_zio);
 
 		ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
 		list_remove(list, dn);
+
+		if (newlist) {
+			(void) dnode_add_ref(dn, newlist);
+			list_insert_tail(newlist, dn);
+		}
+
 		dnode_sync(dn, tx);
 	}
 }
@@ -859,9 +899,12 @@
 	ASSERT(BP_GET_LEVEL(bp) == 0);
 
 	/*
-	 * Update rootbp fill count.
+	 * Update rootbp fill count: it should be the number of objects
+	 * allocated in the object set (not counting the "special"
+	 * objects that are stored in the objset_phys_t -- the meta
+	 * dnode and user/group accounting objects).
 	 */
-	bp->blk_fill = 1;	/* count the meta-dnode */
+	bp->blk_fill = 0;
 	for (int i = 0; i < dnp->dn_nblkptr; i++)
 		bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
 
@@ -884,6 +927,7 @@
 	writeprops_t wp = { 0 };
 	zio_t *zio;
 	list_t *list;
+	list_t *newlist = NULL;
 	dbuf_dirty_record_t *dr;
 
 	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
@@ -921,20 +965,41 @@
 	}
 
 	arc_release(os->os_phys_buf, &os->os_phys_buf);
+
 	zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os),
 	    tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os,
 	    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
 
 	/*
-	 * Sync meta-dnode - the parent IO for the sync is the root block
+	 * Sync special dnodes - the parent IO for the sync is the root block
 	 */
 	os->os_meta_dnode->dn_zio = zio;
 	dnode_sync(os->os_meta_dnode, tx);
 
+	os->os_phys->os_flags = os->os_flags;
+
+	if (os->os_userused_dnode &&
+	    os->os_userused_dnode->dn_type != DMU_OT_NONE) {
+		os->os_userused_dnode->dn_zio = zio;
+		dnode_sync(os->os_userused_dnode, tx);
+		os->os_groupused_dnode->dn_zio = zio;
+		dnode_sync(os->os_groupused_dnode, tx);
+	}
+
 	txgoff = tx->tx_txg & TXG_MASK;
 
-	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], tx);
-	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], tx);
+	if (dmu_objset_userused_enabled(os)) {
+		newlist = &os->os_synced_dnodes;
+		/*
+		 * We must create the list here because it uses the
+		 * dn_dirty_link[] of this txg.
+		 */
+		list_create(newlist, sizeof (dnode_t),
+		    offsetof(dnode_t, dn_dirty_link[txgoff]));
+	}
+
+	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
+	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
 
 	list = &os->os_meta_dnode->dn_dirty_records[txgoff];
 	while (dr = list_head(list)) {
@@ -951,6 +1016,145 @@
 	zio_nowait(zio);
 }
 
+static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
+
+void
+dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
+{
+	used_cbs[ost] = cb;
+}
+
+boolean_t
+dmu_objset_userused_enabled(objset_impl_t *os)
+{
+	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
+	    used_cbs[os->os_phys->os_type] &&
+	    os->os_userused_dnode);
+}
+
+void
+dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx)
+{
+	dnode_t *dn;
+	list_t *list = &os->os_synced_dnodes;
+	static const char zerobuf[DN_MAX_BONUSLEN] = {0};
+
+	ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
+
+	while (dn = list_head(list)) {
+		dmu_object_type_t bonustype;
+
+		ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
+		ASSERT(dn->dn_oldphys);
+		ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
+		    dn->dn_phys->dn_flags &
+		    DNODE_FLAG_USERUSED_ACCOUNTED);
+
+		/* Allocate the user/groupused objects if necessary. */
+		if (os->os_userused_dnode->dn_type == DMU_OT_NONE) {
+			VERIFY(0 == zap_create_claim(&os->os,
+			    DMU_USERUSED_OBJECT,
+			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+			VERIFY(0 == zap_create_claim(&os->os,
+			    DMU_GROUPUSED_OBJECT,
+			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+		}
+
+		/*
+		 * If the object was not previously
+		 * accounted, pretend that it was free.
+		 */
+		if (!(dn->dn_oldphys->dn_flags &
+		    DNODE_FLAG_USERUSED_ACCOUNTED)) {
+			bzero(dn->dn_oldphys, sizeof (dnode_phys_t));
+		}
+
+		/*
+		 * If the object was freed, use the previous bonustype.
+		 */
+		bonustype = dn->dn_phys->dn_bonustype ?
+		    dn->dn_phys->dn_bonustype : dn->dn_oldphys->dn_bonustype;
+		ASSERT(dn->dn_phys->dn_type != 0 ||
+		    (bcmp(DN_BONUS(dn->dn_phys), zerobuf,
+		    DN_MAX_BONUSLEN) == 0 &&
+		    DN_USED_BYTES(dn->dn_phys) == 0));
+		ASSERT(dn->dn_oldphys->dn_type != 0 ||
+		    (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf,
+		    DN_MAX_BONUSLEN) == 0 &&
+		    DN_USED_BYTES(dn->dn_oldphys) == 0));
+		used_cbs[os->os_phys->os_type](&os->os, bonustype,
+		    DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys),
+		    DN_USED_BYTES(dn->dn_oldphys),
+		    DN_USED_BYTES(dn->dn_phys), tx);
+
+		/*
+		 * The mutex is needed here for interlock with dnode_allocate.
+		 */
+		mutex_enter(&dn->dn_mtx);
+		zio_buf_free(dn->dn_oldphys, sizeof (dnode_phys_t));
+		dn->dn_oldphys = NULL;
+		mutex_exit(&dn->dn_mtx);
+
+		list_remove(list, dn);
+		dnode_rele(dn, list);
+	}
+}
+
+boolean_t
+dmu_objset_userspace_present(objset_t *os)
+{
+	return (os->os->os_phys->os_flags &
+	    OBJSET_FLAG_USERACCOUNTING_COMPLETE);
+}
+
+int
+dmu_objset_userspace_upgrade(objset_t *os)
+{
+	uint64_t obj;
+	int err = 0;
+
+	if (dmu_objset_userspace_present(os))
+		return (0);
+	if (!dmu_objset_userused_enabled(os->os))
+		return (ENOTSUP);
+	if (dmu_objset_is_snapshot(os))
+		return (EINVAL);
+
+	/*
+	 * We simply need to mark every object dirty, so that it will be
+	 * synced out and now accounted.  If this is called
+	 * concurrently, or if we already did some work before crashing,
+	 * that's fine, since we track each object's accounted state
+	 * independently.
+	 */
+
+	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
+		dmu_tx_t *tx = dmu_tx_create(os);
+		dmu_buf_t *db;
+		int objerr;
+
+		if (issig(JUSTLOOKING) && issig(FORREAL))
+			return (EINTR);
+
+		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
+		if (objerr)
+			continue;
+		dmu_tx_hold_bonus(tx, obj);
+		objerr = dmu_tx_assign(tx, TXG_WAIT);
+		if (objerr) {
+			dmu_tx_abort(tx);
+			continue;
+		}
+		dmu_buf_will_dirty(db, tx);
+		dmu_buf_rele(db, FTAG);
+		dmu_tx_commit(tx);
+	}
+
+	os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+	txg_wait_synced(dmu_objset_pool(os), 0);
+	return (0);
+}
+
 void
 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp)
@@ -984,6 +1188,8 @@
 
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
 	    os->os->os_phys->os_type);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
+	    dmu_objset_userspace_present(os));
 }
 
 int
--- a/usr/src/uts/common/fs/zfs/dmu_send.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c	Sat Apr 18 13:41:47 2009 -0700
@@ -161,7 +161,9 @@
 	if (issig(JUSTLOOKING) && issig(FORREAL))
 		return (EINTR);
 
-	if (bp == NULL && zb->zb_object == 0) {
+	if (zb->zb_object != 0 && DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
+		return (0);
+	} else if (bp == NULL && zb->zb_object == 0) {
 		uint64_t span = BP_SPAN(dnp, zb->zb_level);
 		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
 		err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT);
--- a/usr/src/uts/common/fs/zfs/dmu_traverse.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c	Sat Apr 18 13:41:47 2009 -0700
@@ -64,6 +64,9 @@
 	void *td_arg;
 };
 
+static int traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp,
+    arc_buf_t *buf, uint64_t objset, uint64_t object);
+
 /* ARGSUSED */
 static void
 traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
@@ -189,7 +192,7 @@
 		}
 	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
 		uint32_t flags = ARC_WAIT;
-		int i, j;
+		int i;
 		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 
 		err = arc_read(NULL, td->td_spa, bp, pbuf,
@@ -201,20 +204,15 @@
 		/* recursively visitbp() blocks below this */
 		dnp = buf->b_data;
 		for (i = 0; i < epb && err == 0; i++, dnp++) {
-			for (j = 0; j < dnp->dn_nblkptr; j++) {
-				SET_BOOKMARK(&czb, zb->zb_objset,
-				    zb->zb_blkid * epb + i,
-				    dnp->dn_nlevels - 1, j);
-				err = traverse_visitbp(td, dnp, buf,
-				    (blkptr_t *)&dnp->dn_blkptr[j], &czb);
-				if (err)
-					break;
-			}
+			err = traverse_dnode(td, dnp, buf, zb->zb_objset,
+			    zb->zb_blkid * epb + i);
+			if (err)
+				break;
 		}
 	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
 		uint32_t flags = ARC_WAIT;
 		objset_phys_t *osp;
-		int j;
+		dnode_phys_t *dnp;
 
 		err = arc_read_nolock(NULL, td->td_spa, bp,
 		    arc_getbuf_func, &buf,
@@ -225,14 +223,17 @@
 		osp = buf->b_data;
 		traverse_zil(td, &osp->os_zil_header);
 
-		for (j = 0; j < osp->os_meta_dnode.dn_nblkptr; j++) {
-			SET_BOOKMARK(&czb, zb->zb_objset, 0,
-			    osp->os_meta_dnode.dn_nlevels - 1, j);
-			err = traverse_visitbp(td, &osp->os_meta_dnode, buf,
-			    (blkptr_t *)&osp->os_meta_dnode.dn_blkptr[j],
-			    &czb);
-			if (err)
-				break;
+		dnp = &osp->os_meta_dnode;
+		err = traverse_dnode(td, dnp, buf, zb->zb_objset, 0);
+		if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
+			dnp = &osp->os_userused_dnode;
+			err = traverse_dnode(td, dnp, buf, zb->zb_objset,
+			    DMU_USERUSED_OBJECT);
+		}
+		if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
+			dnp = &osp->os_groupused_dnode;
+			err = traverse_dnode(td, dnp, buf, zb->zb_objset,
+			    DMU_GROUPUSED_OBJECT);
 		}
 	}
 
@@ -245,6 +246,23 @@
 	return (err);
 }
 
+static int
+traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp,
+    arc_buf_t *buf, uint64_t objset, uint64_t object)
+{
+	int j, err = 0;
+	zbookmark_t czb;
+
+	for (j = 0; j < dnp->dn_nblkptr; j++) {
+		SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
+		err = traverse_visitbp(td, dnp, buf,
+		    (blkptr_t *)&dnp->dn_blkptr[j], &czb);
+		if (err)
+			break;
+	}
+	return (err);
+}
+
 /* ARGSUSED */
 static int
 traverse_prefetcher(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c	Sat Apr 18 13:41:47 2009 -0700
@@ -628,7 +628,7 @@
 }
 
 void
-dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
+dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
 {
 	dmu_tx_hold_t *txh;
 	dnode_t *dn;
--- a/usr/src/uts/common/fs/zfs/dnode.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dnode.c	Sat Apr 18 13:41:47 2009 -0700
@@ -156,7 +156,7 @@
 	}
 	if (dn->dn_phys->dn_type != DMU_OT_NONE)
 		ASSERT3U(dn->dn_phys->dn_nlevels, <=, dn->dn_nlevels);
-	ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || dn->dn_dbuf != NULL);
+	ASSERT(DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_dbuf != NULL);
 	if (dn->dn_dbuf != NULL) {
 		ASSERT3P(dn->dn_phys, ==,
 		    (dnode_phys_t *)dn->dn_dbuf->db.db_data +
@@ -321,6 +321,7 @@
 	}
 	ASSERT(NULL == list_head(&dn->dn_dbufs));
 #endif
+	ASSERT(dn->dn_oldphys == NULL);
 
 	mutex_enter(&os->os_lock);
 	list_remove(&os->os_dnodes, dn);
@@ -551,6 +552,22 @@
 	 */
 	ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0);
 
+	if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) {
+		dn = (object == DMU_USERUSED_OBJECT) ?
+		    os->os_userused_dnode : os->os_groupused_dnode;
+		if (dn == NULL)
+			return (ENOENT);
+		type = dn->dn_type;
+		if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE)
+			return (ENOENT);
+		if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
+			return (EEXIST);
+		DNODE_VERIFY(dn);
+		(void) refcount_add(&dn->dn_holds, tag);
+		*dnp = dn;
+		return (0);
+	}
+
 	if (object == 0 || object >= DN_MAX_OBJECT)
 		return (EINVAL);
 
@@ -609,7 +626,8 @@
 	type = dn->dn_type;
 	if (dn->dn_free_txg ||
 	    ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
-	    ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)) {
+	    ((flag & DNODE_MUST_BE_FREE) &&
+	    (type != DMU_OT_NONE || dn->dn_oldphys))) {
 		mutex_exit(&dn->dn_mtx);
 		dbuf_rele(db, FTAG);
 		return (type == DMU_OT_NONE ? ENOENT : EEXIST);
@@ -674,8 +692,10 @@
 	objset_impl_t *os = dn->dn_objset;
 	uint64_t txg = tx->tx_txg;
 
-	if (dn->dn_object == DMU_META_DNODE_OBJECT)
+	if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
+		dsl_dataset_dirty(os->os_dsl_dataset, tx);
 		return;
+	}
 
 	DNODE_VERIFY(dn);
 
@@ -1255,7 +1275,7 @@
 	dprintf("probing object %llu offset %llx level %d of %u\n",
 	    dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels);
 
-	hole = flags & DNODE_FIND_HOLE;
+	hole = ((flags & DNODE_FIND_HOLE) != 0);
 	inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1;
 	ASSERT(txg == 0 || !hole);
 
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c	Sat Apr 18 13:41:47 2009 -0700
@@ -504,9 +504,6 @@
 
 /*
  * Write out the dnode's dirty buffers.
- *
- * NOTE: The dnode is kept in memory by being dirty.  Once the
- * dirty bit is cleared, it may be evicted.  Beware of this!
  */
 void
 dnode_sync(dnode_t *dn, dmu_tx_t *tx)
@@ -515,20 +512,33 @@
 	dnode_phys_t *dnp = dn->dn_phys;
 	int txgoff = tx->tx_txg & TXG_MASK;
 	list_t *list = &dn->dn_dirty_records[txgoff];
+	static const dnode_phys_t zerodn = { 0 };
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
+	ASSERT(dnp->dn_type != DMU_OT_NONE ||
+	    bcmp(dnp, &zerodn, DNODE_SIZE) == 0);
 	DNODE_VERIFY(dn);
 
 	ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
 
+	if (dmu_objset_userused_enabled(dn->dn_objset) &&
+	    !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
+		ASSERT(dn->dn_oldphys == NULL);
+		dn->dn_oldphys = zio_buf_alloc(sizeof (dnode_phys_t));
+		*dn->dn_oldphys = *dn->dn_phys; /* struct assignment */
+		dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
+	} else {
+		/* Once we account for it, we should always account for it. */
+		ASSERT(!(dn->dn_phys->dn_flags &
+		    DNODE_FLAG_USERUSED_ACCOUNTED));
+	}
+
 	mutex_enter(&dn->dn_mtx);
 	if (dn->dn_allocated_txg == tx->tx_txg) {
 		/* The dnode is newly allocated or reallocated */
 		if (dnp->dn_type == DMU_OT_NONE) {
 			/* this is a first alloc, not a realloc */
-			/* XXX shouldn't the phys already be zeroed? */
-			bzero(dnp, DNODE_CORE_SIZE);
 			dnp->dn_nlevels = 1;
 			dnp->dn_nblkptr = dn->dn_nblkptr;
 		}
@@ -626,7 +636,7 @@
 
 	dbuf_sync_list(list, tx);
 
-	if (dn->dn_object != DMU_META_DNODE_OBJECT) {
+	if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
 		ASSERT3P(list_head(list), ==, NULL);
 		dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
 	}
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Sat Apr 18 13:41:47 2009 -0700
@@ -975,6 +975,27 @@
 		(void) dmu_free_object(os, obj);
 	}
 
+	/*
+	 * We need to sync out all in-flight IO before we try to evict
+	 * (the dataset evict func is trying to clear the cached entries
+	 * for this dataset in the ARC).
+	 */
+	txg_wait_synced(dd->dd_pool, 0);
+
+	/*
+	 * If we managed to free all the objects in open
+	 * context, the user space accounting should be zero.
+	 */
+	if (ds->ds_phys->ds_bp.blk_fill == 0 &&
+	    dmu_objset_userused_enabled(os->os)) {
+		uint64_t count;
+
+		ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
+		    count == 0);
+		ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
+		    count == 0);
+	}
+
 	dmu_objset_close(os);
 	if (err != ESRCH)
 		goto out;
@@ -1059,7 +1080,6 @@
 	return (ds->ds_user_ptr);
 }
 
-
 blkptr_t *
 dsl_dataset_get_blkptr(dsl_dataset_t *ds)
 {
@@ -1488,7 +1508,7 @@
 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
 		if (after_branch_point &&
 		    ds_prev->ds_phys->ds_next_clones_obj != 0) {
-			VERIFY(0 == zap_remove_int(mos,
+			VERIFY3U(0, ==, zap_remove_int(mos,
 			    ds_prev->ds_phys->ds_next_clones_obj, obj, tx));
 			if (ds->ds_phys->ds_next_snap_obj != 0) {
 				VERIFY(0 == zap_add_int(mos,
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c	Sat Apr 18 13:41:47 2009 -0700
@@ -227,19 +227,6 @@
 	return (result);
 }
 
-int
-dsl_dir_is_private(dsl_dir_t *dd)
-{
-	int rv = FALSE;
-
-	if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent))
-		rv = TRUE;
-	if (dataset_name_hidden(dd->dd_myname))
-		rv = TRUE;
-	return (rv);
-}
-
-
 static int
 getcomponent(const char *path, char *component, const char **nextp)
 {
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c	Sat Apr 18 13:41:47 2009 -0700
@@ -302,23 +302,51 @@
 
 	dp->dp_read_overhead = 0;
 	start = gethrtime();
+
 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 	while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
-		if (!list_link_active(&ds->ds_synced_link))
-			list_insert_tail(&dp->dp_synced_datasets, ds);
-		else
-			dmu_buf_rele(ds->ds_dbuf, ds);
+		/*
+		 * We must not sync any non-MOS datasets twice, because
+		 * we may have taken a snapshot of them.  However, we
+		 * may sync newly-created datasets on pass 2.
+		 */
+		ASSERT(!list_link_active(&ds->ds_synced_link));
+		list_insert_tail(&dp->dp_synced_datasets, ds);
 		dsl_dataset_sync(ds, zio, tx);
 	}
 	DTRACE_PROBE(pool_sync__1setup);
+	err = zio_wait(zio);
 
-	err = zio_wait(zio);
 	write_time = gethrtime() - start;
 	ASSERT(err == 0);
 	DTRACE_PROBE(pool_sync__2rootzio);
 
-	while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg))
+	for (ds = list_head(&dp->dp_synced_datasets); ds;
+	    ds = list_next(&dp->dp_synced_datasets, ds))
+		dmu_objset_do_userquota_callbacks(ds->ds_user_ptr, tx);
+
+	/*
+	 * Sync the datasets again to push out the changes due to
+	 * userquota updates.  This must be done before we process the
+	 * sync tasks, because that could cause a snapshot of a dataset
+	 * whose ds_bp will be rewritten when we do this 2nd sync.
+	 */
+	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+	while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
+		ASSERT(list_link_active(&ds->ds_synced_link));
+		dmu_buf_rele(ds->ds_dbuf, ds);
+		dsl_dataset_sync(ds, zio, tx);
+	}
+	err = zio_wait(zio);
+
+	while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) {
+		/*
+		 * No more sync tasks should have been added while we
+		 * were syncing.
+		 */
+		ASSERT(spa_sync_pass(dp->dp_spa) == 1);
 		dsl_sync_task_group_sync(dstg, tx);
+	}
 	DTRACE_PROBE(pool_sync__3task);
 
 	start = gethrtime();
--- a/usr/src/uts/common/fs/zfs/dsl_scrub.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_scrub.c	Sat Apr 18 13:41:47 2009 -0700
@@ -45,6 +45,8 @@
 
 static scrub_cb_t dsl_pool_scrub_clean_cb;
 static dsl_syncfunc_t dsl_pool_scrub_cancel_sync;
+static void scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
+    uint64_t objset, uint64_t object);
 
 int zfs_scrub_min_time = 1; /* scrub for at least 1 sec each txg */
 int zfs_resilver_min_time = 3; /* resilver for at least 3 sec each txg */
@@ -483,7 +485,7 @@
 	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
 		uint32_t flags = ARC_WAIT;
 		dnode_phys_t *child_dnp;
-		int i, j;
+		int i;
 		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 
 		err = arc_read(NULL, dp->dp_spa, bp, pbuf,
@@ -498,20 +500,12 @@
 		child_dnp = buf->b_data;
 
 		for (i = 0; i < epb; i++, child_dnp++) {
-			for (j = 0; j < child_dnp->dn_nblkptr; j++) {
-				zbookmark_t czb;
-
-				SET_BOOKMARK(&czb, zb->zb_objset,
-				    zb->zb_blkid * epb + i,
-				    child_dnp->dn_nlevels - 1, j);
-				scrub_visitbp(dp, child_dnp, buf,
-				    &child_dnp->dn_blkptr[j], &czb);
-			}
+			scrub_visitdnode(dp, child_dnp, buf, zb->zb_objset,
+			    zb->zb_blkid * epb + i);
 		}
 	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
 		uint32_t flags = ARC_WAIT;
 		objset_phys_t *osp;
-		int j;
 
 		err = arc_read_nolock(NULL, dp->dp_spa, bp,
 		    arc_getbuf_func, &buf,
@@ -527,13 +521,13 @@
 
 		traverse_zil(dp, &osp->os_zil_header);
 
-		for (j = 0; j < osp->os_meta_dnode.dn_nblkptr; j++) {
-			zbookmark_t czb;
-
-			SET_BOOKMARK(&czb, zb->zb_objset, 0,
-			    osp->os_meta_dnode.dn_nlevels - 1, j);
-			scrub_visitbp(dp, &osp->os_meta_dnode, buf,
-			    &osp->os_meta_dnode.dn_blkptr[j], &czb);
+		scrub_visitdnode(dp, &osp->os_meta_dnode,
+		    buf, zb->zb_objset, 0);
+		if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
+			scrub_visitdnode(dp, &osp->os_userused_dnode,
+			    buf, zb->zb_objset, 0);
+			scrub_visitdnode(dp, &osp->os_groupused_dnode,
+			    buf, zb->zb_objset, 0);
 		}
 	}
 
@@ -543,6 +537,21 @@
 }
 
 static void
+scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
+    uint64_t objset, uint64_t object)
+{
+	int j;
+
+	for (j = 0; j < dnp->dn_nblkptr; j++) {
+		zbookmark_t czb;
+
+		SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
+		scrub_visitbp(dp, dnp, buf, &dnp->dn_blkptr[j], &czb);
+	}
+
+}
+
+static void
 scrub_visit_rootbp(dsl_pool_t *dp, dsl_dataset_t *ds, blkptr_t *bp)
 {
 	zbookmark_t zb;
--- a/usr/src/uts/common/fs/zfs/spa_errlog.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/spa_errlog.c	Sat Apr 18 13:41:47 2009 -0700
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * Routines to manage the on-disk persistent error log.
  *
@@ -61,8 +59,8 @@
  * lowercase hexidecimal numbers that don't overflow.
  */
 #ifdef _KERNEL
-static uint64_t
-strtonum(char *str, char **nptr)
+uint64_t
+strtonum(const char *str, char **nptr)
 {
 	uint64_t val = 0;
 	char c;
@@ -82,7 +80,8 @@
 		str++;
 	}
 
-	*nptr = str;
+	if (nptr)
+		*nptr = (char *)str;
 
 	return (val);
 }
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Sat Apr 18 13:41:47 2009 -0700
@@ -114,6 +114,8 @@
 	DMU_OT_FUID_SIZE,		/* FUID table size UINT64 */
 	DMU_OT_NEXT_CLONES,		/* ZAP */
 	DMU_OT_SCRUB_QUEUE,		/* ZAP */
+	DMU_OT_USERGROUP_USED,		/* ZAP */
+	DMU_OT_USERGROUP_QUOTA,		/* ZAP */
 	DMU_OT_NUMTYPES
 } dmu_object_type_t;
 
@@ -156,6 +158,9 @@
 #define	DMU_MAX_ACCESS (10<<20) /* 10MB */
 #define	DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
 
+#define	DMU_USERUSED_OBJECT	(-1ULL)
+#define	DMU_GROUPUSED_OBJECT	(-2ULL)
+
 /*
  * Public routines to create, destroy, open, and close objsets.
  */
@@ -423,7 +428,7 @@
 void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
 void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
     uint64_t len);
-void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name);
+void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
 void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
 void dmu_tx_abort(dmu_tx_t *tx);
 int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
@@ -563,6 +568,12 @@
     int maxlen, boolean_t *conflict);
 extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp);
+
+typedef void objset_used_cb_t(objset_t *os, dmu_object_type_t bonustype,
+    void *oldbonus, void *newbonus, uint64_t oldused, uint64_t newused,
+    dmu_tx_t *tx);
+extern void dmu_objset_register_type(dmu_objset_type_t ost,
+    objset_used_cb_t *cb);
 extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
 extern void *dmu_objset_get_user(objset_t *os);
 
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Sat Apr 18 13:41:47 2009 -0700
@@ -42,12 +42,20 @@
 struct dmu_tx;
 struct objset_impl;
 
+#define	OBJSET_PHYS_SIZE 2048
+#define	OBJSET_OLD_PHYS_SIZE 1024
+
+#define	OBJSET_FLAG_USERACCOUNTING_COMPLETE	(1ULL<<0)
+
 typedef struct objset_phys {
 	dnode_phys_t os_meta_dnode;
 	zil_header_t os_zil_header;
 	uint64_t os_type;
-	char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
-	    sizeof (uint64_t)];
+	uint64_t os_flags;
+	char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
+	    sizeof (zil_header_t) - sizeof (uint64_t)*2];
+	dnode_phys_t os_userused_dnode;
+	dnode_phys_t os_groupused_dnode;
 } objset_phys_t;
 
 struct objset {
@@ -62,6 +70,8 @@
 	arc_buf_t *os_phys_buf;
 	objset_phys_t *os_phys;
 	dnode_t *os_meta_dnode;
+	dnode_t *os_userused_dnode;
+	dnode_t *os_groupused_dnode;
 	zilog_t *os_zil;
 	objset_t os;
 	uint8_t os_checksum;	/* can change, under dsl_dir's locks */
@@ -74,6 +84,8 @@
 	struct dmu_tx *os_synctx; /* XXX sketchy */
 	blkptr_t *os_rootbp;
 	zil_header_t os_zil_header;
+	list_t os_synced_dnodes;
+	uint64_t os_flags;
 
 	/* Protected by os_obj_lock */
 	kmutex_t os_obj_lock;
@@ -92,6 +104,7 @@
 } objset_impl_t;
 
 #define	DMU_META_DNODE_OBJECT	0
+#define	DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0)
 
 #define	DMU_OS_IS_L2CACHEABLE(os)				\
 	((os)->os_secondary_cache == ZFS_CACHE_ALL ||		\
@@ -128,6 +141,10 @@
 int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
     objset_impl_t **osip);
 void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
+void dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx);
+boolean_t dmu_objset_userused_enabled(objset_impl_t *os);
+int dmu_objset_userspace_upgrade(objset_t *os);
+boolean_t dmu_objset_userspace_present(objset_t *os);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h	Sat Apr 18 13:41:47 2009 -0700
@@ -98,7 +98,8 @@
 };
 
 /* Is dn_used in bytes?  if not, it's in multiples of SPA_MINBLOCKSIZE */
-#define	DNODE_FLAG_USED_BYTES	(1<<0)
+#define	DNODE_FLAG_USED_BYTES		(1<<0)
+#define	DNODE_FLAG_USERUSED_ACCOUNTED	(1<<1)
 
 typedef struct dnode_phys {
 	uint8_t dn_type;		/* dmu_object_type_t */
@@ -131,10 +132,7 @@
 	 */
 	krwlock_t dn_struct_rwlock;
 
-	/*
-	 * Our link on dataset's dd_dnodes list.
-	 * Protected by dd_accounting_mtx.
-	 */
+	/* Our link on dn_objset->os_dnodes list; protected by os_lock.  */
 	list_node_t dn_link;
 
 	/* immutable: */
@@ -191,6 +189,9 @@
 	/* parent IO for current sync write */
 	zio_t *dn_zio;
 
+	/* used in syncing context */
+	dnode_phys_t *dn_oldphys;
+
 	/* holds prefetch structure */
 	struct zfetch	dn_zfetch;
 } dnode_t;
--- a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h	Sat Apr 18 13:41:47 2009 -0700
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_DSL_DELEG_H
 #define	_SYS_DSL_DELEG_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/dmu.h>
 #include <sys/dsl_pool.h>
 #include <sys/zfs_context.h>
@@ -51,6 +49,10 @@
 #define	ZFS_DELEG_PERM_ALLOW		"allow"
 #define	ZFS_DELEG_PERM_USERPROP		"userprop"
 #define	ZFS_DELEG_PERM_VSCAN		"vscan"
+#define	ZFS_DELEG_PERM_USERQUOTA	"userquota"
+#define	ZFS_DELEG_PERM_GROUPQUOTA	"groupquota"
+#define	ZFS_DELEG_PERM_USERUSED		"userused"
+#define	ZFS_DELEG_PERM_GROUPUSED	"groupused"
 
 /*
  * Note: the names of properties that are marked delegatable are also
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h	Sat Apr 18 13:41:47 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -107,7 +107,6 @@
     const char *tail, void *tag, dsl_dir_t **);
 void dsl_dir_name(dsl_dir_t *dd, char *buf);
 int dsl_dir_namelen(dsl_dir_t *dd);
-int dsl_dir_is_private(dsl_dir_t *dd);
 uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
     const char *name, dmu_tx_t *tx);
 dsl_checkfunc_t dsl_dir_destroy_check;
--- a/usr/src/uts/common/fs/zfs/sys/zfs_acl.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_acl.h	Sat Apr 18 13:41:47 2009 -0700
@@ -166,7 +166,7 @@
 
 struct zfs_fuid_info;
 
-typedef struct zfs_acl_ids_t {
+typedef struct zfs_acl_ids {
 	uint64_t		z_fuid;		/* file owner fuid */
 	uint64_t		z_fgid;		/* file group owner fuid */
 	uint64_t		z_mode;		/* mode to set on create */
@@ -195,6 +195,7 @@
 int zfs_acl_ids_create(struct znode *, int, vattr_t *,
     cred_t *, vsecattr_t *, zfs_acl_ids_t *);
 void zfs_acl_ids_free(zfs_acl_ids_t *);
+boolean_t zfs_acl_ids_overquota(struct zfsvfs *, zfs_acl_ids_t *);
 int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
 int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
 void zfs_acl_rele(void *);
--- a/usr/src/uts/common/fs/zfs/sys/zfs_fuid.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_fuid.h	Sat Apr 18 13:41:47 2009 -0700
@@ -49,11 +49,11 @@
  * Estimate space needed for one more fuid table entry.
  * for now assume its current size + 1K
  */
-#define	FUID_SIZE_ESTIMATE(z) (z->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
+#define	FUID_SIZE_ESTIMATE(z) ((z)->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
 
-#define	FUID_INDEX(x)	(x >> 32)
-#define	FUID_RID(x)	(x & 0xffffffff)
-#define	FUID_ENCODE(idx, rid) ((idx << 32) | rid)
+#define	FUID_INDEX(x)	((x) >> 32)
+#define	FUID_RID(x)	((x) & 0xffffffff)
+#define	FUID_ENCODE(idx, rid) (((uint64_t)(idx) << 32) | (rid))
 /*
  * FUIDs cause problems for the intent log
  * we need to replay the creation of the FUID,
@@ -111,6 +111,10 @@
 extern void zfs_fuid_info_free(zfs_fuid_info_t *);
 extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *);
 void zfs_fuid_sync(zfsvfs_t *, dmu_tx_t *);
+extern int zfs_fuid_find_by_domain(zfsvfs_t *, const char *domain,
+    char **retdomain, boolean_t addok);
+extern const char *zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx);
+extern void zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
 #endif
 
 char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h	Sat Apr 18 13:41:47 2009 -0700
@@ -166,6 +166,13 @@
 	zinject_record_t zc_inject_record;
 } zfs_cmd_t;
 
+typedef struct zfs_useracct {
+	char zu_domain[256];
+	uid_t zu_rid;
+	uint32_t zu_pad;
+	uint64_t zu_space;
+} zfs_useracct_t;
+
 #define	ZVOL_MAX_MINOR	(1 << 16)
 #define	ZFS_MIN_MINOR	(ZVOL_MAX_MINOR + 1)
 
--- a/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h	Sat Apr 18 13:41:47 2009 -0700
@@ -73,10 +73,12 @@
 	boolean_t	z_vscan;	/* virus scan on/off */
 	boolean_t	z_use_fuids;	/* version allows fuids */
 	boolean_t	z_replay;	/* set during ZIL replay */
-	kmutex_t	z_online_recv_lock; /* recv in prog grabs as WRITER */
+	kmutex_t	z_online_recv_lock; /* held while recv in progress */
 	uint64_t	z_version;	/* ZPL version */
 	uint64_t	z_shares_dir;	/* hidden shares dir */
 	kmutex_t	z_lock;
+	uint64_t	z_userquota_obj;
+	uint64_t	z_groupquota_obj;
 #define	ZFS_OBJ_MTX_SZ	64
 	kmutex_t	z_hold_mtx[ZFS_OBJ_MTX_SZ];	/* znode hold locks */
 };
@@ -133,6 +135,17 @@
 
 extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
 extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
+extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    const char *domain, uint64_t rid, uint64_t *valuep);
+extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
+extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    const char *domain, uint64_t rid, uint64_t quota);
+extern boolean_t zfs_usergroup_overquota(zfsvfs_t *zfsvfs,
+    boolean_t isgroup, uint64_t fuid);
+extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
+extern int zfsvfs_create(const char *name, int mode, zfsvfs_t **zvp);
+extern void zfsvfs_free(zfsvfs_t *zfsvfs);
 
 #ifdef	__cplusplus
 }
--- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h	Sat Apr 18 13:41:47 2009 -0700
@@ -93,6 +93,8 @@
 
 /*
  * Special attributes for master node.
+ * "userquota@" and "groupquota@" are also valid (from
+ * zfs_userquota_prop_prefixes[]).
  */
 #define	ZFS_FSID		"FSID"
 #define	ZFS_UNLINKED_SET	"DELETE_QUEUE"
@@ -310,7 +312,6 @@
 extern int	zfs_sync(vfs_t *vfsp, short flag, cred_t *cr);
 extern dev_t	zfs_cmpldev(uint64_t);
 extern int	zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
-extern int	zfs_set_version(const char *name, uint64_t newvers);
 extern int	zfs_get_stats(objset_t *os, nvlist_t *nv);
 extern void	zfs_znode_dmu_fini(znode_t *);
 
@@ -337,7 +338,7 @@
     vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
 extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
 extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
-int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
+extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
 
 extern caddr_t zfs_map_page(page_t *, enum seg_rw);
 extern void zfs_unmap_page(page_t *, caddr_t);
--- a/usr/src/uts/common/fs/zfs/zfs_acl.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_acl.c	Sat Apr 18 13:41:47 2009 -0700
@@ -1900,6 +1900,12 @@
 	acl_ids->z_fuidp = NULL;
 }
 
+boolean_t
+zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids)
+{
+	return (zfs_usergroup_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
+	    zfs_usergroup_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid));
+}
 
 /*
  * Retrieve a files ACL
@@ -2119,18 +2125,8 @@
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
 	}
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied) {
-		if (zfsvfs->z_fuid_obj == 0) {
-			dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-			    FUID_SIZE_ESTIMATE(zfsvfs));
-			dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
-		} else {
-			dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
-			dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
-			    FUID_SIZE_ESTIMATE(zfsvfs));
-		}
-	}
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
 
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
--- a/usr/src/uts/common/fs/zfs/zfs_dir.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_dir.c	Sat Apr 18 13:41:47 2009 -0700
@@ -816,23 +816,17 @@
 	if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
 	    &acl_ids)) != 0)
 		return (error);
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
+		zfs_acl_ids_free(&acl_ids);
+		return (EDQUOT);
+	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_bonus(tx, zp->z_id);
 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied) {
-		if (zfsvfs->z_fuid_obj == 0) {
-			dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-			    FUID_SIZE_ESTIMATE(zfsvfs));
-			dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
-		} else {
-			dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
-			dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
-			    FUID_SIZE_ESTIMATE(zfsvfs));
-		}
-	}
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
--- a/usr/src/uts/common/fs/zfs/zfs_fuid.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_fuid.c	Sat Apr 18 13:41:47 2009 -0700
@@ -294,13 +294,14 @@
 /*
  * Query domain table for a given domain.
  *
- * If domain isn't found it is added to AVL trees and
- * the zfsvfs->z_fuid_dirty flag will be set to TRUE.
- * it will then be necessary for the caller or another
- * thread to detect the dirty table and sync out the changes.
+ * If domain isn't found and addok is set, it is added to AVL trees and
+ * the zfsvfs->z_fuid_dirty flag will be set to TRUE.  It will then be
+ * necessary for the caller or another thread to detect the dirty table
+ * and sync out the changes.
  */
-static int
-zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain)
+int
+zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain,
+    char **retdomain, boolean_t addok)
 {
 	fuid_domain_t searchnode, *findnode;
 	avl_index_t loc;
@@ -312,14 +313,14 @@
 	 * for the user nobody.
 	 */
 	if (domain[0] == '\0') {
-		*retdomain = nulldomain;
+		if (retdomain)
+			*retdomain = nulldomain;
 		return (0);
 	}
 
 	searchnode.f_ksid = ksid_lookupdomain(domain);
-	if (retdomain) {
+	if (retdomain)
 		*retdomain = searchnode.f_ksid->kd_name;
-	}
 	if (!zfsvfs->z_fuid_loaded)
 		zfs_fuid_init(zfsvfs);
 
@@ -331,7 +332,7 @@
 		rw_exit(&zfsvfs->z_fuid_lock);
 		ksiddomain_rele(searchnode.f_ksid);
 		return (findnode->f_idx);
-	} else {
+	} else if (addok) {
 		fuid_domain_t *domnode;
 		uint64_t retidx;
 
@@ -351,6 +352,8 @@
 		zfsvfs->z_fuid_dirty = B_TRUE;
 		rw_exit(&zfsvfs->z_fuid_lock);
 		return (retidx);
+	} else {
+		return (-1);
 	}
 }
 
@@ -360,7 +363,7 @@
  * Returns a pointer from an avl node of the domain string.
  *
  */
-static char *
+const char *
 zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx)
 {
 	char *domain;
@@ -397,7 +400,7 @@
     cred_t *cr, zfs_fuid_type_t type)
 {
 	uint32_t index = FUID_INDEX(fuid);
-	char *domain;
+	const char *domain;
 	uid_t id;
 
 	if (index == 0)
@@ -514,7 +517,7 @@
 	rid = ksid_getrid(ksid);
 	domain = ksid_getdomain(ksid);
 
-	idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain);
+	idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
 
 	zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type);
 
@@ -605,7 +608,7 @@
 		}
 	}
 
-	idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain);
+	idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
 
 	if (!zfsvfs->z_replay)
 		zfs_fuid_node_add(fuidpp, kdomain,
@@ -702,7 +705,7 @@
 					return (B_TRUE);
 				}
 			} else {
-				char *domain;
+				const char *domain;
 
 				domain = zfs_fuid_find_by_idx(zfsvfs, idx);
 				ASSERT(domain != NULL);
@@ -725,4 +728,19 @@
 	gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
 	return (groupmember(gid, cr));
 }
+
+void
+zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
+{
+	if (zfsvfs->z_fuid_obj == 0) {
+		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
+		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
+		    FUID_SIZE_ESTIMATE(zfsvfs));
+		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
+	} else {
+		dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
+		dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
+		    FUID_SIZE_ESTIMATE(zfsvfs));
+	}
+}
 #endif
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c	Sat Apr 18 13:41:47 2009 -0700
@@ -93,6 +93,15 @@
 	boolean_t		zvec_pool_check;
 } zfs_ioc_vec_t;
 
+/* This array is indexed by zfs_userquota_prop_t */
+static const char *userquota_perms[] = {
+	ZFS_DELEG_PERM_USERUSED,
+	ZFS_DELEG_PERM_USERQUOTA,
+	ZFS_DELEG_PERM_GROUPUSED,
+	ZFS_DELEG_PERM_GROUPQUOTA,
+};
+
+static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 static void clear_props(char *dataset, nvlist_t *props, nvlist_t *newprops);
 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
     boolean_t *);
@@ -703,6 +712,55 @@
 	}
 }
 
+static int
+zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
+{
+	int err = zfs_secpolicy_read(zc, cr);
+	if (err)
+		return (err);
+
+	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
+		return (EINVAL);
+
+	if (zc->zc_value[0] == 0) {
+		/*
+		 * They are asking about a posix uid/gid.  If it's
+		 * themself, allow it.
+		 */
+		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
+		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
+			if (zc->zc_guid == crgetuid(cr))
+				return (0);
+		} else {
+			if (groupmember(zc->zc_guid, cr))
+				return (0);
+		}
+	}
+
+	return (zfs_secpolicy_write_perms(zc->zc_name,
+	    userquota_perms[zc->zc_objset_type], cr));
+}
+
+static int
+zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
+{
+	int err = zfs_secpolicy_read(zc, cr);
+	if (err)
+		return (err);
+
+	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
+		return (EINVAL);
+
+	return (zfs_secpolicy_write_perms(zc->zc_name,
+	    userquota_perms[zc->zc_objset_type], cr));
+}
+
+static int
+zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
+{
+	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, cr));
+}
+
 /*
  * Returns the nvlist as specified by the user in the zfs_cmd_t.
  */
@@ -762,6 +820,69 @@
 }
 
 static int
+getzfsvfs(const char *dsname, zfsvfs_t **zvp)
+{
+	objset_t *os;
+	int error;
+
+	error = dmu_objset_open(dsname, DMU_OST_ZFS,
+	    DS_MODE_USER | DS_MODE_READONLY, &os);
+	if (error)
+		return (error);
+
+	mutex_enter(&os->os->os_user_ptr_lock);
+	*zvp = dmu_objset_get_user(os);
+	if (*zvp) {
+		VFS_HOLD((*zvp)->z_vfs);
+	} else {
+		error = ESRCH;
+	}
+	mutex_exit(&os->os->os_user_ptr_lock);
+	dmu_objset_close(os);
+	return (error);
+}
+
+/*
+ * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
+ * case its z_vfs will be NULL, and it will be opened as the owner.
+ */
+static int
+zfsvfs_hold(const char *name, boolean_t readonly, void *tag, zfsvfs_t **zvp)
+{
+	int error = 0;
+	int mode = DS_MODE_OWNER | (readonly ? DS_MODE_READONLY : 0);
+
+	if (getzfsvfs(name, zvp) != 0)
+		error = zfsvfs_create(name, mode, zvp);
+	if (error == 0) {
+		rrw_enter(&(*zvp)->z_teardown_lock, RW_READER, tag);
+		if ((*zvp)->z_unmounted) {
+			/*
+			 * XXX we could probably try again, since the unmounting
+			 * thread should be just about to disassociate the
+			 * objset from the zfsvfs.
+			 */
+			rrw_exit(&(*zvp)->z_teardown_lock, tag);
+			return (EBUSY);
+		}
+	}
+	return (error);
+}
+
+static void
+zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
+{
+	rrw_exit(&zfsvfs->z_teardown_lock, tag);
+
+	if (zfsvfs->z_vfs) {
+		VFS_RELE(zfsvfs->z_vfs);
+	} else {
+		dmu_objset_close(zfsvfs->z_os);
+		zfsvfs_free(zfsvfs);
+	}
+}
+
+static int
 zfs_ioc_pool_create(zfs_cmd_t *zc)
 {
 	int error;
@@ -1313,6 +1434,23 @@
 	return (err);
 }
 
+static boolean_t
+dataset_name_hidden(const char *name)
+{
+	/*
+	 * Skip over datasets that are not visible in this zone,
+	 * internal datasets (which have a $ in their name), and
+	 * temporary datasets (which have a % in their name).
+	 */
+	if (strchr(name, '$') != NULL)
+		return (B_TRUE);
+	if (strchr(name, '%') != NULL)
+		return (B_TRUE);
+	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
+		return (B_TRUE);
+	return (B_FALSE);
+}
+
 /*
  * inputs:
  * zc_name		name of filesystem
@@ -1321,6 +1459,7 @@
  *
  * outputs:
  * zc_name		name of next filesystem
+ * zc_cookie		zap cursor
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
@@ -1362,15 +1501,10 @@
 		    NULL, &zc->zc_cookie);
 		if (error == ENOENT)
 			error = ESRCH;
-	} while (error == 0 && !INGLOBALZONE(curproc) &&
-	    !zone_dataset_visible(zc->zc_name, NULL));
+	} while (error == 0 && dataset_name_hidden(zc->zc_name));
 	dmu_objset_close(os);
 
-	/*
-	 * If it's a hidden dataset (ie. with a '$' in its name), don't
-	 * try to get stats for it.  Userland will skip over it.
-	 */
-	if (error == 0 && strchr(zc->zc_name, '$') == NULL)
+	if (error == 0)
 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
 
 	return (error);
@@ -1399,9 +1533,10 @@
 	if (error)
 		return (error == ENOENT ? ESRCH : error);
 
-	if (zc->zc_cookie == 0)
+	if (zc->zc_cookie == 0) {
 		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
 		    NULL, DS_FIND_SNAPSHOTS);
+	}
 	/*
 	 * A dataset name of maximum length cannot have any snapshots,
 	 * so exit immediately.
@@ -1434,6 +1569,7 @@
 	uint64_t intval;
 	char *strval;
 	nvlist_t *genericnvl;
+	boolean_t issnap = (strchr(name, '@') != NULL);
 
 	/*
 	 * First validate permission to set all of the properties
@@ -1448,16 +1584,35 @@
 			 * If this is a user-defined property, it must be a
 			 * string, and there is no further validation to do.
 			 */
-			if (!zfs_prop_user(propname) ||
-			    nvpair_type(elem) != DATA_TYPE_STRING)
-				return (EINVAL);
-
-			if (error = zfs_secpolicy_write_perms(name,
-			    ZFS_DELEG_PERM_USERPROP, CRED()))
-				return (error);
-			continue;
+			if (zfs_prop_user(propname) &&
+			    nvpair_type(elem) == DATA_TYPE_STRING) {
+				if (error = zfs_secpolicy_write_perms(name,
+				    ZFS_DELEG_PERM_USERPROP, CRED()))
+					return (error);
+				continue;
+			}
+
+			if (!issnap && zfs_prop_userquota(propname) &&
+			    nvpair_type(elem) == DATA_TYPE_UINT64_ARRAY) {
+				const char *perm;
+				const char *up = zfs_userquota_prop_prefixes
+				    [ZFS_PROP_USERQUOTA];
+				if (strncmp(propname, up, strlen(up)) == 0)
+					perm = ZFS_DELEG_PERM_USERQUOTA;
+				else
+					perm = ZFS_DELEG_PERM_GROUPQUOTA;
+				if (error = zfs_secpolicy_write_perms(name,
+				    perm, CRED()))
+					return (error);
+				continue;
+			}
+
+			return (EINVAL);
 		}
 
+		if (issnap)
+			return (EINVAL);
+
 		if ((error = zfs_secpolicy_setprop(name, prop, CRED())) != 0)
 			return (error);
 
@@ -1493,8 +1648,7 @@
 			break;
 
 		case ZFS_PROP_COPIES:
-			if (zfs_earlier_version(name,
-			    SPA_VERSION_DITTO_BLOCKS))
+			if (zfs_earlier_version(name, SPA_VERSION_DITTO_BLOCKS))
 				return (ENOTSUP);
 			break;
 
@@ -1520,13 +1674,44 @@
 		zfs_prop_t prop = zfs_name_to_prop(propname);
 
 		if (prop == ZPROP_INVAL) {
-			VERIFY(nvpair_value_string(elem, &strval) == 0);
-			error = dsl_prop_set(name, propname, 1,
-			    strlen(strval) + 1, strval);
-			if (error == 0)
-				continue;
-			else
-				goto out;
+			if (zfs_prop_userquota(propname)) {
+				uint64_t *valary;
+				unsigned int vallen;
+				const char *domain;
+				zfs_userquota_prop_t type;
+				uint64_t rid;
+				uint64_t quota;
+				zfsvfs_t *zfsvfs;
+
+				VERIFY(nvpair_value_uint64_array(elem,
+				    &valary, &vallen) == 0);
+				VERIFY(vallen == 3);
+				type = valary[0];
+				rid = valary[1];
+				quota = valary[2];
+				domain = propname +
+				    strlen(zfs_userquota_prop_prefixes[type]);
+
+				error = zfsvfs_hold(name, B_FALSE, FTAG,
+				    &zfsvfs);
+				if (error == 0) {
+					error = zfs_set_userquota(zfsvfs,
+					    type, domain, rid, quota);
+					zfsvfs_rele(zfsvfs, FTAG);
+				}
+				if (error == 0)
+					continue;
+				else
+					goto out;
+			} else if (zfs_prop_user(propname)) {
+				VERIFY(nvpair_value_string(elem, &strval) == 0);
+				error = dsl_prop_set(name, propname, 1,
+				    strlen(strval) + 1, strval);
+				if (error == 0)
+					continue;
+				else
+					goto out;
+			}
 		}
 
 		switch (prop) {
@@ -1570,10 +1755,26 @@
 			break;
 
 		case ZFS_PROP_VERSION:
-			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
-			    (error = zfs_set_version(name, intval)) != 0)
+		{
+			zfsvfs_t *zfsvfs;
+
+			if ((error = nvpair_value_uint64(elem, &intval)) != 0)
+				goto out;
+			if ((error = zfsvfs_hold(name, B_FALSE, FTAG,
+			    &zfsvfs)) != 0)
+				goto out;
+			error = zfs_set_version(zfsvfs, intval);
+			zfsvfs_rele(zfsvfs, FTAG);
+
+			if (error == 0 && intval >= ZPL_VERSION_USERSPACE) {
+				zfs_cmd_t zc = { 0 };
+				(void) strcpy(zc.zc_name, name);
+				(void) zfs_ioc_userspace_upgrade(&zc);
+			}
+			if (error)
 				goto out;
 			break;
+		}
 
 		default:
 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
@@ -1980,11 +2181,10 @@
  * processing.
  */
 static int
-zfs_fill_zplprops_impl(objset_t *os, uint64_t default_zplver,
+zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
     boolean_t fuids_ok, nvlist_t *createprops, nvlist_t *zplprops,
     boolean_t *is_ci)
 {
-	uint64_t zplver = default_zplver;
 	uint64_t sense = ZFS_PROP_UNDEFINED;
 	uint64_t norm = ZFS_PROP_UNDEFINED;
 	uint64_t u8 = ZFS_PROP_UNDEFINED;
@@ -2072,6 +2272,8 @@
 	ASSERT(cp != NULL);
 	cp[0] = '\0';
 
+	if (zfs_earlier_version(dataset, SPA_VERSION_USERSPACE))
+		zplver = ZPL_VERSION_USERSPACE - 1;
 	if (zfs_earlier_version(dataset, SPA_VERSION_FUID)) {
 		zplver = ZPL_VERSION_FUID - 1;
 		fuids_ok = B_FALSE;
@@ -2255,6 +2457,7 @@
  * zc_name	name of filesystem
  * zc_value	short name of snapshot
  * zc_cookie	recursive flag
+ * zc_nvlist_src[_size] property list
  *
  * outputs:	none
  */
@@ -2390,31 +2593,19 @@
 	if (error)
 		return (error);
 
-	if (dmu_objset_type(os) == DMU_OST_ZFS) {
-		mutex_enter(&os->os->os_user_ptr_lock);
-		zfsvfs = dmu_objset_get_user(os);
-		if (zfsvfs != NULL)
-			VFS_HOLD(zfsvfs->z_vfs);
-		mutex_exit(&os->os->os_user_ptr_lock);
-	}
-
-	if (zfsvfs != NULL) {
-		char *osname;
+	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
 		int mode;
 
-		osname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
-		error = zfs_suspend_fs(zfsvfs, osname, &mode);
+		error = zfs_suspend_fs(zfsvfs, NULL, &mode);
 		if (error == 0) {
 			int resume_err;
 
-			ASSERT(strcmp(osname, zc->zc_name) == 0);
 			error = dmu_objset_rollback(os);
-			resume_err = zfs_resume_fs(zfsvfs, osname, mode);
+			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name, mode);
 			error = error ? error : resume_err;
 		} else {
 			dmu_objset_close(os);
 		}
-		kmem_free(osname, MAXNAMELEN);
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
 		error = dmu_objset_rollback(os);
@@ -2529,32 +2720,26 @@
 		return (EBADF);
 	}
 
-	if (dmu_objset_open(tofs, DMU_OST_ANY,
-	    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
-		/*
-		 * Try to get the zfsvfs for the receiving objset.
-		 * There won't be one if we're operating on a zvol,
-		 * if the objset doesn't exist yet, or is not mounted.
-		 */
-		mutex_enter(&os->os->os_user_ptr_lock);
-		if (zfsvfs = dmu_objset_get_user(os)) {
-			if (!mutex_tryenter(&zfsvfs->z_online_recv_lock)) {
-				mutex_exit(&os->os->os_user_ptr_lock);
-				dmu_objset_close(os);
-				zfsvfs = NULL;
-				error = EBUSY;
-				goto out;
-			}
-			VFS_HOLD(zfsvfs->z_vfs);
+	if (getzfsvfs(tofs, &zfsvfs) == 0) {
+		if (!mutex_tryenter(&zfsvfs->z_online_recv_lock)) {
+			VFS_RELE(zfsvfs->z_vfs);
+			zfsvfs = NULL;
+			error = EBUSY;
+			goto out;
 		}
-		mutex_exit(&os->os->os_user_ptr_lock);
-
 		/*
 		 * If new properties are supplied, they are to completely
 		 * replace the existing ones, so stash away the existing ones.
 		 */
 		if (props)
-			(void) dsl_prop_get_all(os, &origprops, TRUE);
+			(void) dsl_prop_get_all(zfsvfs->z_os, &origprops, TRUE);
+	} else if (props && dmu_objset_open(tofs, DMU_OST_ANY,
+	    DS_MODE_USER | DS_MODE_READONLY, &os) == 0) {
+		/*
+		 * Get the props even if there was no zfsvfs (zvol or
+		 * unmounted zpl).
+		 */
+		(void) dsl_prop_get_all(os, &origprops, TRUE);
 
 		dmu_objset_close(os);
 	}
@@ -2827,6 +3012,120 @@
 }
 
 /*
+ * Retrieve a single {user|group}{used|quota}@... property.
+ *
+ * inputs:
+ * zc_name	name of filesystem
+ * zc_objset_type zfs_userquota_prop_t
+ * zc_value	domain name (eg. "S-1-234-567-89")
+ * zc_guid	RID/UID/GID
+ *
+ * outputs:
+ * zc_cookie	property value
+ */
+static int
+zfs_ioc_userspace_one(zfs_cmd_t *zc)
+{
+	zfsvfs_t *zfsvfs;
+	int error;
+
+	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
+		return (EINVAL);
+
+	error = zfsvfs_hold(zc->zc_name, B_TRUE, FTAG, &zfsvfs);
+	if (error)
+		return (error);
+
+	error = zfs_userspace_one(zfsvfs,
+	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
+	zfsvfs_rele(zfsvfs, FTAG);
+
+	return (error);
+}
+
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ * zc_cookie		zap cursor
+ * zc_objset_type	zfs_userquota_prop_t
+ * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
+ *
+ * outputs:
+ * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
+ * zc_cookie	zap cursor
+ */
+static int
+zfs_ioc_userspace_many(zfs_cmd_t *zc)
+{
+	zfsvfs_t *zfsvfs;
+	int error;
+
+	error = zfsvfs_hold(zc->zc_name, B_TRUE, FTAG, &zfsvfs);
+	if (error)
+		return (error);
+
+	int bufsize = zc->zc_nvlist_dst_size;
+	void *buf = kmem_alloc(bufsize, KM_SLEEP);
+
+	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
+	    buf, &zc->zc_nvlist_dst_size);
+
+	if (error == 0) {
+		error = xcopyout(buf,
+		    (void *)(uintptr_t)zc->zc_nvlist_dst,
+		    zc->zc_nvlist_dst_size);
+	}
+	kmem_free(buf, bufsize);
+	zfsvfs_rele(zfsvfs, FTAG);
+
+	return (error);
+}
+
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ *
+ * outputs:
+ * none
+ */
+static int
+zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
+{
+	objset_t *os;
+	int error;
+	zfsvfs_t *zfsvfs;
+
+	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
+		if (!dmu_objset_userused_enabled(zfsvfs->z_os->os)) {
+			/*
+			 * If userused is not enabled, it may be because the
+			 * objset needs to be closed & reopened (to grow the
+			 * objset_phys_t).  Suspend/resume the fs will do that.
+			 */
+			int mode;
+			error = zfs_suspend_fs(zfsvfs, NULL, &mode);
+			if (error == 0) {
+				error = zfs_resume_fs(zfsvfs,
+				    zc->zc_name, mode);
+			}
+		}
+		if (error == 0)
+			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
+		VFS_RELE(zfsvfs->z_vfs);
+	} else {
+		error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
+		    DS_MODE_USER, &os);
+		if (error)
+			return (error);
+
+		error = dmu_objset_userspace_upgrade(os);
+		dmu_objset_close(os);
+	}
+
+	return (error);
+}
+
+/*
  * We don't want to have a hard dependency
  * against some special symbols in sharefs
  * nfs, and smbsrv.  Determine them if needed when
@@ -3206,27 +3505,24 @@
 	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
 	    B_TRUE },
 	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
-	    B_FALSE }
+	    B_FALSE },
+	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one,
+	    DATASET_NAME, B_FALSE, B_FALSE },
+	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many,
+	    DATASET_NAME, B_FALSE, B_FALSE },
+	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
+	    DATASET_NAME, B_FALSE, B_TRUE },
 };
 
 int
 pool_status_check(const char *name, zfs_ioc_namecheck_t type)
 {
 	spa_t *spa;
-	char pool[ZFS_MAXNAMELEN];
 	int error;
 
 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
 
-	(void) strlcpy(pool, name, ZFS_MAXNAMELEN);
-	if (type == DATASET_NAME) {
-		char *p;
-
-		if ((p = strpbrk(pool, "/@")) != NULL)
-			*p = '\0';
-	}
-
-	error = spa_open(pool, &spa, FTAG);
+	error = spa_open(name, &spa, FTAG);
 	if (error == 0) {
 		if (spa_suspended(spa))
 			error = EAGAIN;
@@ -3292,7 +3588,7 @@
 	rc = xcopyout(zc, (void *)arg, sizeof (zfs_cmd_t));
 	if (error == 0) {
 		error = rc;
-		if (zfs_ioc_vec[vec].zvec_his_log == B_TRUE)
+		if (zfs_ioc_vec[vec].zvec_his_log)
 			zfs_log_history(zc);
 	}
 
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c	Sat Apr 18 13:41:47 2009 -0700
@@ -568,6 +568,393 @@
 
 }
 
+static void
+uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid,
+    int64_t delta, dmu_tx_t *tx)
+{
+	uint64_t used = 0;
+	char buf[32];
+	int err;
+	uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
+
+	if (delta == 0)
+		return;
+
+	(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
+	err = zap_lookup(os, obj, buf, 8, 1, &used);
+	ASSERT(err == 0 || err == ENOENT);
+	/* no underflow/overflow */
+	ASSERT(delta > 0 || used >= -delta);
+	ASSERT(delta < 0 || used + delta > used);
+	used += delta;
+	if (used == 0)
+		err = zap_remove(os, obj, buf, tx);
+	else
+		err = zap_update(os, obj, buf, 8, 1, &used, tx);
+	ASSERT(err == 0);
+}
+
+static void
+zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype,
+    void *oldbonus, void *newbonus,
+    uint64_t oldused, uint64_t newused, dmu_tx_t *tx)
+{
+	znode_phys_t *oldznp = oldbonus;
+	znode_phys_t *newznp = newbonus;
+
+	if (bonustype != DMU_OT_ZNODE)
+		return;
+
+	/* We charge 512 for the dnode (if it's allocated). */
+	if (oldznp->zp_gen != 0)
+		oldused += DNODE_SIZE;
+	if (newznp->zp_gen != 0)
+		newused += DNODE_SIZE;
+
+	if (oldznp->zp_uid == newznp->zp_uid) {
+		uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx);
+	} else {
+		uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx);
+		uidacct(os, B_FALSE, newznp->zp_uid, newused, tx);
+	}
+
+	if (oldznp->zp_gid == newznp->zp_gid) {
+		uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx);
+	} else {
+		uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx);
+		uidacct(os, B_TRUE, newznp->zp_gid, newused, tx);
+	}
+}
+
+static void
+fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
+    char *domainbuf, int buflen, uid_t *ridp)
+{
+	extern uint64_t strtonum(const char *str, char **nptr);
+	uint64_t fuid;
+	const char *domain;
+
+	fuid = strtonum(fuidstr, NULL);
+
+	domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
+	if (domain)
+		(void) strlcpy(domainbuf, domain, buflen);
+	else
+		domainbuf[0] = '\0';
+	*ridp = FUID_RID(fuid);
+}
+
+static uint64_t
+zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
+{
+	switch (type) {
+	case ZFS_PROP_USERUSED:
+		return (DMU_USERUSED_OBJECT);
+	case ZFS_PROP_GROUPUSED:
+		return (DMU_GROUPUSED_OBJECT);
+	case ZFS_PROP_USERQUOTA:
+		return (zfsvfs->z_userquota_obj);
+	case ZFS_PROP_GROUPQUOTA:
+		return (zfsvfs->z_groupquota_obj);
+	}
+	return (0);
+}
+
+int
+zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
+{
+	int error;
+	zap_cursor_t zc;
+	zap_attribute_t za;
+	zfs_useracct_t *buf = vbuf;
+	uint64_t obj;
+
+	if (!dmu_objset_userspace_present(zfsvfs->z_os))
+		return (ENOTSUP);
+
+	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
+	if (obj == 0) {
+		*bufsizep = 0;
+		return (0);
+	}
+
+	for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
+	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
+	    zap_cursor_advance(&zc)) {
+		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
+		    *bufsizep)
+			break;
+
+		fuidstr_to_sid(zfsvfs, za.za_name,
+		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
+
+		buf->zu_space = za.za_first_integer;
+		buf++;
+	}
+	if (error == ENOENT)
+		error = 0;
+
+	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
+	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
+	*cookiep = zap_cursor_serialize(&zc);
+	zap_cursor_fini(&zc);
+	return (error);
+}
+
+/*
+ * buf must be big enough (eg, 32 bytes)
+ */
+static int
+id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
+    char *buf, boolean_t addok)
+{
+	uint64_t fuid;
+	int domainid = 0;
+
+	if (domain && domain[0]) {
+		domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
+		if (domainid == -1)
+			return (ENOENT);
+	}
+	fuid = FUID_ENCODE(domainid, rid);
+	(void) sprintf(buf, "%llx", (longlong_t)fuid);
+	return (0);
+}
+
+int
+zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    const char *domain, uint64_t rid, uint64_t *valp)
+{
+	char buf[32];
+	int err;
+	uint64_t obj;
+
+	*valp = 0;
+
+	if (!dmu_objset_userspace_present(zfsvfs->z_os))
+		return (ENOTSUP);
+
+	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
+	if (obj == 0)
+		return (0);
+
+	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
+	if (err)
+		return (err);
+
+	err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
+	if (err == ENOENT)
+		err = 0;
+	return (err);
+}
+
+int
+zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
+    const char *domain, uint64_t rid, uint64_t quota)
+{
+	char buf[32];
+	int err;
+	dmu_tx_t *tx;
+	uint64_t *objp;
+	boolean_t fuid_dirtied;
+
+	if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
+		return (EINVAL);
+
+	if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
+		return (ENOTSUP);
+
+	objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
+	    &zfsvfs->z_groupquota_obj;
+
+	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
+	if (err)
+		return (err);
+	fuid_dirtied = zfsvfs->z_fuid_dirty;
+
+	tx = dmu_tx_create(zfsvfs->z_os);
+	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
+	if (*objp == 0) {
+		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
+		    zfs_userquota_prop_prefixes[type]);
+	}
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
+	err = dmu_tx_assign(tx, TXG_WAIT);
+	if (err) {
+		dmu_tx_abort(tx);
+		return (err);
+	}
+
+	mutex_enter(&zfsvfs->z_lock);
+	if (*objp == 0) {
+		*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
+		    DMU_OT_NONE, 0, tx);
+		VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
+		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
+	}
+	mutex_exit(&zfsvfs->z_lock);
+
+	if (quota == 0) {
+		err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
+		if (err == ENOENT)
+			err = 0;
+	} else {
+		err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
+	}
+	ASSERT(err == 0);
+	if (fuid_dirtied)
+		zfs_fuid_sync(zfsvfs, tx);
+	dmu_tx_commit(tx);
+	return (err);
+}
+
+boolean_t
+zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
+{
+	char buf[32];
+	uint64_t used, quota, usedobj, quotaobj;
+	int err;
+
+	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
+	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
+
+	if (quotaobj == 0 || zfsvfs->z_replay)
+		return (B_FALSE);
+
+	(void) sprintf(buf, "%llx", (longlong_t)fuid);
+	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
+	if (err != 0)
+		return (B_FALSE);
+
+	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
+	if (err != 0)
+		return (B_FALSE);
+	return (used >= quota);
+}
+
+int
+zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
+{
+	objset_t *os;
+	zfsvfs_t *zfsvfs;
+	uint64_t zval;
+	int i, error;
+
+	if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL))
+		return (error);
+	if (zval)
+		mode |= DS_MODE_READONLY;
+
+	error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
+	if (error == EROFS) {
+		mode |= DS_MODE_READONLY;
+		error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
+	}
+	if (error)
+		return (error);
+
+	/*
+	 * Initialize the zfs-specific filesystem structure.
+	 * Should probably make this a kmem cache, shuffle fields,
+	 * and just bzero up to z_hold_mtx[].
+	 */
+	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+	zfsvfs->z_vfs = NULL;
+	zfsvfs->z_parent = zfsvfs;
+	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
+	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
+	zfsvfs->z_os = os;
+
+	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
+	if (error) {
+		goto out;
+	} else if (zfsvfs->z_version > ZPL_VERSION) {
+		(void) printf("Mismatched versions:  File system "
+		    "is version %llu on-disk format, which is "
+		    "incompatible with this software version %lld!",
+		    (u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
+		error = ENOTSUP;
+		goto out;
+	}
+
+	if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
+		goto out;
+	zfsvfs->z_norm = (int)zval;
+
+	if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
+		goto out;
+	zfsvfs->z_utf8 = (zval != 0);
+
+	if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
+		goto out;
+	zfsvfs->z_case = (uint_t)zval;
+
+	/*
+	 * Fold case on file systems that are always or sometimes case
+	 * insensitive.
+	 */
+	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
+	    zfsvfs->z_case == ZFS_CASE_MIXED)
+		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
+
+	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
+	    &zfsvfs->z_root);
+	if (error)
+		goto out;
+	ASSERT(zfsvfs->z_root != 0);
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
+	    &zfsvfs->z_unlinkedobj);
+	if (error)
+		goto out;
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
+	    8, 1, &zfsvfs->z_userquota_obj);
+	if (error && error != ENOENT)
+		goto out;
+
+	error = zap_lookup(os, MASTER_NODE_OBJ,
+	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
+	    8, 1, &zfsvfs->z_groupquota_obj);
+	if (error && error != ENOENT)
+		goto out;
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
+	    &zfsvfs->z_fuid_obj);
+	if (error && error != ENOENT)
+		goto out;
+
+	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
+	    &zfsvfs->z_shares_dir);
+	if (error && error != ENOENT)
+		goto out;
+
+	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
+	    offsetof(znode_t, z_link_node));
+	rrw_init(&zfsvfs->z_teardown_lock);
+	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
+	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
+	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
+
+	*zvp = zfsvfs;
+	return (0);
+
+out:
+	dmu_objset_close(os);
+	*zvp = NULL;
+	kmem_free(zfsvfs, sizeof (zfsvfs_t));
+	return (error);
+}
+
 static int
 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
 {
@@ -646,9 +1033,13 @@
 	return (0);
 }
 
-static void
-zfs_freezfsvfs(zfsvfs_t *zfsvfs)
+void
+zfsvfs_free(zfsvfs_t *zfsvfs)
 {
+	int i;
+
+	zfs_fuid_destroy(zfsvfs);
+
 	mutex_destroy(&zfsvfs->z_znodes_lock);
 	mutex_destroy(&zfsvfs->z_online_recv_lock);
 	mutex_destroy(&zfsvfs->z_lock);
@@ -656,42 +1047,38 @@
 	rrw_destroy(&zfsvfs->z_teardown_lock);
 	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
 	rw_destroy(&zfsvfs->z_fuid_lock);
+	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
 }
 
+static void
+zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
+{
+	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+	if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) {
+		vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
+		vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
+		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
+		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
+	}
+}
+
 static int
 zfs_domount(vfs_t *vfsp, char *osname)
 {
 	dev_t mount_dev;
-	uint64_t recordsize, readonly;
+	uint64_t recordsize, fsid_guid;
 	int error = 0;
-	int mode;
 	zfsvfs_t *zfsvfs;
-	znode_t *zp = NULL;
 
 	ASSERT(vfsp);
 	ASSERT(osname);
 
-	/*
-	 * Initialize the zfs-specific filesystem structure.
-	 * Should probably make this a kmem cache, shuffle fields,
-	 * and just bzero up to z_hold_mtx[].
-	 */
-	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+	error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs);
+	if (error)
+		return (error);
 	zfsvfs->z_vfs = vfsp;
-	zfsvfs->z_parent = zfsvfs;
-	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
-	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
-	zfsvfs->z_fuid_dirty = B_FALSE;
-
-	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
-	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
-	    offsetof(znode_t, z_link_node));
-	rrw_init(&zfsvfs->z_teardown_lock);
-	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
-	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
 
 	/* Initialize the generic filesystem structure. */
 	vfsp->vfs_bcount = 0;
@@ -713,39 +1100,24 @@
 	vfsp->vfs_flag |= VFS_NOTRUNC;
 	vfsp->vfs_data = zfsvfs;
 
-	if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL))
-		goto out;
-
-	mode = DS_MODE_OWNER;
-	if (readonly)
-		mode |= DS_MODE_READONLY;
-
-	error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
-	if (error == EROFS) {
-		mode = DS_MODE_OWNER | DS_MODE_READONLY;
-		error = dmu_objset_open(osname, DMU_OST_ZFS, mode,
-		    &zfsvfs->z_os);
-	}
-
-	if (error)
-		goto out;
-
-	if (error = zfs_init_fs(zfsvfs, &zp))
-		goto out;
-
-	/* The call to zfs_init_fs leaves the vnode held, release it here. */
-	VN_RELE(ZTOV(zp));
+	/*
+	 * The fsid is 64 bits, composed of an 8-bit fs type, which
+	 * separates our fsid from any other filesystem types, and a
+	 * 56-bit objset unique ID.  The objset unique ID is unique to
+	 * all objsets open on this system, provided by unique_create().
+	 * The 8-bit fs type must be put in the low bits of fsid[1]
+	 * because that's where other Solaris filesystems put it.
+	 */
+	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
+	ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
+	vfsp->vfs_fsid.val[0] = fsid_guid;
+	vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
+	    zfsfstype & 0xFF;
 
 	/*
 	 * Set features for file system.
 	 */
-	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
-	if (zfsvfs->z_use_fuids) {
-		vfs_set_feature(vfsp, VFSFT_XVATTR);
-		vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
-		vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS);
-		vfs_set_feature(vfsp, VFSFT_ACLONCREATE);
-	}
+	zfs_set_fuid_feature(zfsvfs);
 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
@@ -758,7 +1130,6 @@
 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
 		uint64_t pval;
 
-		ASSERT(mode & DS_MODE_READONLY);
 		atime_changed_cb(zfsvfs, B_FALSE);
 		readonly_changed_cb(zfsvfs, B_TRUE);
 		if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
@@ -773,9 +1144,8 @@
 		zfsctl_create(zfsvfs);
 out:
 	if (error) {
-		if (zfsvfs->z_os)
-			dmu_objset_close(zfsvfs->z_os);
-		zfs_freezfsvfs(zfsvfs);
+		dmu_objset_close(zfsvfs->z_os);
+		zfsvfs_free(zfsvfs);
 	} else {
 		atomic_add_32(&zfs_active_fs_count, 1);
 	}
@@ -1452,15 +1822,16 @@
  * 'z_teardown_inactive_lock' write held.
  */
 int
-zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode)
+zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
 {
 	int error;
 
 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
 		return (error);
 
-	*mode = zfsvfs->z_os->os_mode;
-	dmu_objset_name(zfsvfs->z_os, name);
+	*modep = zfsvfs->z_os->os_mode;
+	if (name)
+		dmu_objset_name(zfsvfs->z_os, name);
 	dmu_objset_close(zfsvfs->z_os);
 
 	return (0);
@@ -1519,12 +1890,6 @@
 zfs_freevfs(vfs_t *vfsp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
-	int i;
-
-	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
-		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
-
-	zfs_fuid_destroy(zfsvfs);
 
 	/*
 	 * If this is a snapshot, we have an extra VFS_HOLD on our parent
@@ -1533,7 +1898,7 @@
 	if (zfsvfs->z_issnap)
 		VFS_RELE(zfsvfs->z_parent->z_vfs);
 
-	zfs_freezfsvfs(zfsvfs);
+	zfsvfs_free(zfsvfs);
 
 	atomic_add_32(&zfs_active_fs_count, -1);
 }
@@ -1592,6 +1957,8 @@
 	 * Initialize znode cache, vnode ops, etc...
 	 */
 	zfs_znode_init();
+
+	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
 }
 
 void
@@ -1608,54 +1975,46 @@
 }
 
 int
-zfs_set_version(const char *name, uint64_t newvers)
+zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
 {
 	int error;
-	objset_t *os;
+	objset_t *os = zfsvfs->z_os;
 	dmu_tx_t *tx;
-	uint64_t curvers;
-
-	/*
-	 * XXX for now, require that the filesystem be unmounted.  Would
-	 * be nice to find the zfsvfs_t and just update that if
-	 * possible.
-	 */
 
 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
 		return (EINVAL);
 
-	error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_OWNER, &os);
-	if (error)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
-	    8, 1, &curvers);
-	if (error)
-		goto out;
-	if (newvers < curvers) {
-		error = EINVAL;
-		goto out;
-	}
+	if (newvers < zfsvfs->z_version)
+		return (EINVAL);
 
 	tx = dmu_tx_create(os);
-	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR);
+	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
-		goto out;
+		return (error);
 	}
-	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1,
-	    &newvers, tx);
+	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
+	    8, 1, &newvers, tx);
+
+	if (error) {
+		dmu_tx_commit(tx);
+		return (error);
+	}
 
 	spa_history_internal_log(LOG_DS_UPGRADE,
 	    dmu_objset_spa(os), tx, CRED(),
-	    "oldver=%llu newver=%llu dataset = %llu", curvers, newvers,
-	    dmu_objset_id(os));
+	    "oldver=%llu newver=%llu dataset = %llu",
+	    zfsvfs->z_version, newvers, dmu_objset_id(os));
+
 	dmu_tx_commit(tx);
 
-out:
-	dmu_objset_close(os);
-	return (error);
+	zfsvfs->z_version = newvers;
+
+	if (zfsvfs->z_version >= ZPL_VERSION_FUID)
+		zfs_set_fuid_feature(zfsvfs);
+
+	return (0);
 }
 
 /*
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c	Sat Apr 18 13:41:47 2009 -0700
@@ -667,6 +667,13 @@
 		/*
 		 * Start a transaction.
 		 */
+		if (zfs_usergroup_overquota(zfsvfs,
+		    B_FALSE, zp->z_phys->zp_uid) ||
+		    zfs_usergroup_overquota(zfsvfs,
+		    B_TRUE, zp->z_phys->zp_gid)) {
+			error = EDQUOT;
+			break;
+		}
 		woff = uio->uio_loffset;
 		tx = dmu_tx_create(zfsvfs->z_os);
 		dmu_tx_hold_bonus(tx, zp->z_id);
@@ -1183,23 +1190,16 @@
 		if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp,
 		    &acl_ids)) != 0)
 			goto out;
+		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
+			error = EDQUOT;
+			goto out;
+		}
 
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
 		fuid_dirtied = zfsvfs->z_fuid_dirty;
-		if (fuid_dirtied) {
-			if (zfsvfs->z_fuid_obj == 0) {
-				dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
-				dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-				    FUID_SIZE_ESTIMATE(zfsvfs));
-				dmu_tx_hold_zap(tx, MASTER_NODE_OBJ,
-				    FALSE, NULL);
-			} else {
-				dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
-				dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
-				    FUID_SIZE_ESTIMATE(zfsvfs));
-			}
-		}
+		if (fuid_dirtied)
+			zfs_fuid_txhold(zfsvfs, tx);
 		dmu_tx_hold_bonus(tx, dzp->z_id);
 		dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 		if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
@@ -1608,6 +1608,11 @@
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
+		zfs_dirent_unlock(dl);
+		ZFS_EXIT(zfsvfs);
+		return (EDQUOT);
+	}
 
 	/*
 	 * Add a new entry to the directory.
@@ -1616,18 +1621,8 @@
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied) {
-		if (zfsvfs->z_fuid_obj == 0) {
-			dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-			    FUID_SIZE_ESTIMATE(zfsvfs));
-			dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
-		} else {
-			dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
-			dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
-			    FUID_SIZE_ESTIMATE(zfsvfs));
-		}
-	}
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
 	if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE)
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 		    0, SPA_MAXBLOCKSIZE);
@@ -2623,11 +2618,8 @@
 
 		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
 
-		if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) {
-			dmu_tx_abort(tx);
-			ZFS_EXIT(zfsvfs);
-			return (err);
-		}
+		if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
+			goto out;
 		if (pzp->zp_acl.z_acl_extern_obj) {
 			/* Are we upgrading ACL from old V0 format to new V1 */
 			if (zfsvfs->z_version <= ZPL_VERSION_FUID &&
@@ -2652,22 +2644,28 @@
 	if (mask & (AT_UID | AT_GID)) {
 		if (pzp->zp_xattr) {
 			err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp);
-			if (err) {
-				dmu_tx_abort(tx);
-				ZFS_EXIT(zfsvfs);
-				if (aclp)
-					zfs_acl_free(aclp);
-				return (err);
-			}
+			if (err)
+				goto out;
 			dmu_tx_hold_bonus(tx, attrzp->z_id);
 		}
 		if (mask & AT_UID) {
 			new_uid = zfs_fuid_create(zfsvfs,
 			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
+			if (new_uid != pzp->zp_uid &&
+			    zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) {
+				err = EDQUOT;
+				goto out;
+			}
 		}
+
 		if (mask & AT_GID) {
 			new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
 			    cr, ZFS_GROUP, &fuidp);
+			if (new_gid != pzp->zp_gid &&
+			    zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) {
+				err = EDQUOT;
+				goto out;
+			}
 		}
 		fuid_dirtied = zfsvfs->z_fuid_dirty;
 		if (fuid_dirtied) {
@@ -2687,22 +2685,9 @@
 
 	err = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (err) {
-		if (attrzp)
-			VN_RELE(ZTOV(attrzp));
-
-		if (aclp) {
-			zfs_acl_free(aclp);
-			aclp = NULL;
-		}
-
-		if (err == ERESTART) {
+		if (err == ERESTART)
 			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
-		return (err);
+		goto out;
 	}
 
 	dmu_buf_will_dirty(zp->z_dbuf, tx);
@@ -2740,9 +2725,6 @@
 			attrzp->z_phys->zp_gid = new_gid;
 	}
 
-	if (aclp)
-		zfs_acl_free(aclp);
-
 	if (attrzp)
 		mutex_exit(&attrzp->z_lock);
 
@@ -2810,15 +2792,29 @@
 	if (mask != 0)
 		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
 
-	if (fuidp)
-		zfs_fuid_info_free(fuidp);
 	mutex_exit(&zp->z_lock);
 
-	dmu_tx_commit(tx);
-
+out:
 	if (attrzp)
 		VN_RELE(ZTOV(attrzp));
 
+	if (aclp) {
+		zfs_acl_free(aclp);
+		aclp = NULL;
+	}
+
+	if (fuidp) {
+		zfs_fuid_info_free(fuidp);
+		fuidp = NULL;
+	}
+
+	if (err)
+		dmu_tx_abort(tx);
+	else
+		dmu_tx_commit(tx);
+
+	if (err == ERESTART)
+		goto top;
 
 	ZFS_EXIT(zfsvfs);
 	return (err);
@@ -3286,6 +3282,12 @@
 	}
 
 	VERIFY(0 == zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids));
+	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
+		zfs_acl_ids_free(&acl_ids);
+		zfs_dirent_unlock(dl);
+		ZFS_EXIT(zfsvfs);
+		return (EDQUOT);
+	}
 	tx = dmu_tx_create(zfsvfs->z_os);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
@@ -3293,18 +3295,8 @@
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 	if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE)
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE);
-	if (fuid_dirtied) {
-		if (zfsvfs->z_fuid_obj == 0) {
-			dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-			    FUID_SIZE_ESTIMATE(zfsvfs));
-			dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
-		} else {
-			dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
-			dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
-			    FUID_SIZE_ESTIMATE(zfsvfs));
-		}
-	}
+	if (fuid_dirtied)
+		zfs_fuid_txhold(zfsvfs, tx);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
@@ -3357,7 +3349,6 @@
 	 * Insert the new object into the directory.
 	 */
 	(void) zfs_link_create(dl, zp, tx, ZNEW);
-out:
 	if (error == 0) {
 		uint64_t txtype = TX_SYMLINK;
 		if (flags & FIGNORECASE)
@@ -3642,6 +3633,12 @@
 			pvn_write_done(trunc, flags);
 		len = filesz - off;
 	}
+
+	if (zfs_usergroup_overquota(zfsvfs, B_FALSE, zp->z_phys->zp_uid) ||
+	    zfs_usergroup_overquota(zfsvfs, B_TRUE, zp->z_phys->zp_gid)) {
+		err = EDQUOT;
+		goto out;
+	}
 top:
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_write(tx, zp->z_id, off, len);
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c	Sat Apr 18 13:41:47 2009 -0700
@@ -206,17 +206,6 @@
 	POINTER_INVALIDATE(&ozp->z_zfsvfs);
 }
 
-/*
- * Wrapper function for ZFS_ENTER that returns 0 if successful and otherwise
- * returns a non-zero error code.
- */
-static int
-zfs_enter(zfsvfs_t *zfsvfs)
-{
-	ZFS_ENTER(zfsvfs);
-	return (0);
-}
-
 /*ARGSUSED*/
 static kmem_cbrc_t
 zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
@@ -241,8 +230,11 @@
 
 	/*
 	 * Ensure that the filesystem is not unmounted during the move.
+	 * This is the equivalent to ZFS_ENTER().
 	 */
-	if (zfs_enter(zfsvfs) != 0) {		/* ZFS_ENTER */
+	rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
+	if (zfsvfs->z_unmounted) {
+		ZFS_EXIT(zfsvfs);
 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
@@ -472,107 +464,6 @@
 }
 
 /*
- * zfs_init_fs - Initialize the zfsvfs struct and the file system
- *	incore "master" object.  Verify version compatibility.
- */
-int
-zfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp)
-{
-	extern int zfsfstype;
-
-	objset_t	*os = zfsvfs->z_os;
-	int		i, error;
-	uint64_t fsid_guid;
-	uint64_t zval;
-
-	*zpp = NULL;
-
-	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
-	if (error) {
-		return (error);
-	} else if (zfsvfs->z_version > ZPL_VERSION) {
-		(void) printf("Mismatched versions:  File system "
-		    "is version %llu on-disk format, which is "
-		    "incompatible with this software version %lld!",
-		    (u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
-		return (ENOTSUP);
-	}
-
-	if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
-		return (error);
-	zfsvfs->z_norm = (int)zval;
-	if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
-		return (error);
-	zfsvfs->z_utf8 = (zval != 0);
-	if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
-		return (error);
-	zfsvfs->z_case = (uint_t)zval;
-	/*
-	 * Fold case on file systems that are always or sometimes case
-	 * insensitive.
-	 */
-	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
-	    zfsvfs->z_case == ZFS_CASE_MIXED)
-		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
-
-	/*
-	 * The fsid is 64 bits, composed of an 8-bit fs type, which
-	 * separates our fsid from any other filesystem types, and a
-	 * 56-bit objset unique ID.  The objset unique ID is unique to
-	 * all objsets open on this system, provided by unique_create().
-	 * The 8-bit fs type must be put in the low bits of fsid[1]
-	 * because that's where other Solaris filesystems put it.
-	 */
-	fsid_guid = dmu_objset_fsid_guid(os);
-	ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
-	zfsvfs->z_vfs->vfs_fsid.val[0] = fsid_guid;
-	zfsvfs->z_vfs->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
-	    zfsfstype & 0xFF;
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
-	    &zfsvfs->z_root);
-	if (error)
-		return (error);
-	ASSERT(zfsvfs->z_root != 0);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
-	    &zfsvfs->z_unlinkedobj);
-	if (error)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
-	    &zfsvfs->z_fuid_obj);
-	if (error == ENOENT)
-		error = 0;
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
-	    &zfsvfs->z_shares_dir);
-	if (error && error != ENOENT)
-		return (error);
-
-	/*
-	 * Initialize zget mutex's
-	 */
-	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
-		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
-
-	error = zfs_zget(zfsvfs, zfsvfs->z_root, zpp);
-	if (error) {
-		/*
-		 * On error, we destroy the mutexes here since it's not
-		 * possible for the caller to determine if the mutexes were
-		 * initialized properly.
-		 */
-		for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
-			mutex_destroy(&zfsvfs->z_hold_mtx[i]);
-		return (error);
-	}
-	ASSERT3U((*zpp)->z_id, ==, zfsvfs->z_root);
-
-	return (error);
-}
-
-/*
  * define a couple of values we need available
  * for both 64 and 32 bit environments.
  */
@@ -1542,7 +1433,7 @@
 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
 {
 	zfsvfs_t	zfsvfs;
-	uint64_t	moid, doid, version;
+	uint64_t	moid, obj, version;
 	uint64_t	sense = ZFS_CASE_SENSITIVE;
 	uint64_t	norm = 0;
 	nvpair_t	*elem;
@@ -1568,12 +1459,12 @@
 	/*
 	 * Set starting attributes.
 	 */
-	if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
+	if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_USERSPACE)
 		version = ZPL_VERSION;
+	else if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
+		version = ZPL_VERSION_USERSPACE - 1;
 	else
 		version = ZPL_VERSION_FUID - 1;
-	error = zap_update(os, moid, ZPL_VERSION_STR,
-	    8, 1, &version, tx);
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
 		/* For the moment we expect all zpl props to be uint64_ts */
@@ -1584,9 +1475,8 @@
 		VERIFY(nvpair_value_uint64(elem, &val) == 0);
 		name = nvpair_name(elem);
 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
-			version = val;
-			error = zap_update(os, moid, ZPL_VERSION_STR,
-			    8, 1, &version, tx);
+			if (val < version)
+				version = val;
 		} else {
 			error = zap_update(os, moid, name, 8, 1, &val, tx);
 		}
@@ -1597,13 +1487,14 @@
 			sense = val;
 	}
 	ASSERT(version != 0);
+	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
 
 	/*
 	 * Create a delete queue.
 	 */
-	doid = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
+	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
 
-	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &doid, tx);
+	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
 	ASSERT(error == 0);
 
 	/*
--- a/usr/src/uts/common/fs/zfs/zil.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zil.c	Sat Apr 18 13:41:47 2009 -0700
@@ -1219,6 +1219,13 @@
 	spa_t *spa = zilog->zl_spa;
 	lwb_t *lwb;
 
+	/*
+	 * We don't zero out zl_destroy_txg, so make sure we don't try
+	 * to destroy it twice.
+	 */
+	if (spa_sync_pass(spa) != 1)
+		return;
+
 	mutex_enter(&zilog->zl_lock);
 
 	ASSERT(zilog->zl_stop_sync == 0);
@@ -1229,7 +1236,6 @@
 		blkptr_t blk = zh->zh_log;
 
 		ASSERT(list_head(&zilog->zl_lwb_list) == NULL);
-		ASSERT(spa_sync_pass(spa) == 1);
 
 		bzero(zh, sizeof (zil_header_t));
 		bzero(zilog->zl_replayed_seq, sizeof (zilog->zl_replayed_seq));
--- a/usr/src/uts/common/fs/zfs/zio.c	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zio.c	Sat Apr 18 13:41:47 2009 -0700
@@ -875,16 +875,10 @@
 		 * few passes, stop compressing to ensure convergence.
 		 */
 		pass = spa_sync_pass(zio->io_spa);
-		ASSERT(pass > 1);
 
 		if (pass > SYNC_PASS_DONT_COMPRESS)
 			compress = ZIO_COMPRESS_OFF;
 
-		/*
-		 * Only MOS (objset 0) data should need to be rewritten.
-		 */
-		ASSERT(zio->io_logical->io_bookmark.zb_objset == 0);
-
 		/* Make sure someone doesn't change their mind on overwrites */
 		ASSERT(MIN(zp->zp_ndvas + BP_IS_GANG(bp),
 		    spa_max_replication(zio->io_spa)) == BP_GET_NDVAS(bp));
--- a/usr/src/uts/common/sys/fs/zfs.h	Sat Apr 18 01:13:46 2009 -0700
+++ b/usr/src/uts/common/sys/fs/zfs.h	Sat Apr 18 13:41:47 2009 -0700
@@ -105,9 +105,20 @@
 	ZFS_PROP_USEDDS,
 	ZFS_PROP_USEDCHILD,
 	ZFS_PROP_USEDREFRESERV,
+	ZFS_PROP_USERACCOUNTING,	/* not exposed to the user */
 	ZFS_NUM_PROPS
 } zfs_prop_t;
 
+typedef enum {
+	ZFS_PROP_USERUSED,
+	ZFS_PROP_USERQUOTA,
+	ZFS_PROP_GROUPUSED,
+	ZFS_PROP_GROUPQUOTA,
+	ZFS_NUM_USERQUOTA_PROPS
+} zfs_userquota_prop_t;
+
+extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS];
+
 /*
  * Pool properties are identified by these constants and must be added to the
  * end of this list to ensure that external consumers are not affected
@@ -169,6 +180,7 @@
 const char *zfs_prop_to_name(zfs_prop_t);
 zfs_prop_t zfs_name_to_prop(const char *);
 boolean_t zfs_prop_user(const char *);
+boolean_t zfs_prop_userquota(const char *name);
 int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
 int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
 boolean_t zfs_prop_valid_for_type(int, zfs_type_t);
@@ -260,13 +272,15 @@
 #define	SPA_VERSION_12			12ULL
 #define	SPA_VERSION_13			13ULL
 #define	SPA_VERSION_14			14ULL
+#define	SPA_VERSION_15			15ULL
 /*
  * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
  * format change. Go to usr/src/grub/grub-0.95/stage2/{zfs-include/, fsys_zfs*},
- * and do the appropriate changes.
+ * and do the appropriate changes.  Also bump the version number in
+ * usr/src/grub/capability.
  */
-#define	SPA_VERSION			SPA_VERSION_14
-#define	SPA_VERSION_STRING		"14"
+#define	SPA_VERSION			SPA_VERSION_15
+#define	SPA_VERSION_STRING		"15"
 
 /*
  * Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -302,6 +316,7 @@
 #define	SPA_VERSION_SNAP_PROPS		SPA_VERSION_12
 #define	SPA_VERSION_USED_BREAKDOWN	SPA_VERSION_13
 #define	SPA_VERSION_PASSTHROUGH_X	SPA_VERSION_14
+#define	SPA_VERSION_USERSPACE		SPA_VERSION_15
 
 /*
  * ZPL version - rev'd whenever an incompatible on-disk format change
@@ -314,14 +329,16 @@
 #define	ZPL_VERSION_1			1ULL
 #define	ZPL_VERSION_2			2ULL
 #define	ZPL_VERSION_3			3ULL
-#define	ZPL_VERSION			ZPL_VERSION_3
-#define	ZPL_VERSION_STRING		"3"
+#define	ZPL_VERSION_4			4ULL
+#define	ZPL_VERSION			ZPL_VERSION_4
+#define	ZPL_VERSION_STRING		"4"
 
 #define	ZPL_VERSION_INITIAL		ZPL_VERSION_1
 #define	ZPL_VERSION_DIRENT_TYPE		ZPL_VERSION_2
 #define	ZPL_VERSION_FUID		ZPL_VERSION_3
 #define	ZPL_VERSION_NORMALIZATION	ZPL_VERSION_3
 #define	ZPL_VERSION_SYSATTR		ZPL_VERSION_3
+#define	ZPL_VERSION_USERSPACE		ZPL_VERSION_4
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
@@ -571,7 +588,10 @@
 	ZFS_IOC_ISCSI_PERM_CHECK,
 	ZFS_IOC_SHARE,
 	ZFS_IOC_INHERIT_PROP,
-	ZFS_IOC_SMB_ACL
+	ZFS_IOC_SMB_ACL,
+	ZFS_IOC_USERSPACE_ONE,
+	ZFS_IOC_USERSPACE_MANY,
+	ZFS_IOC_USERSPACE_UPGRADE
 } zfs_ioc_t;
 
 /*