3977 zones can commit suicide by zvol
authorJerry Jelinek <jerry.jelinek@joyent.com>
Mon, 11 Jun 2012 18:23:37 +0000
changeset 14222 c3f8a4690b1f
parent 14221 489a49e3fc33
child 14223 1652c59077c6
3977 zones can commit suicide by zvol Reviewed by: Gordon Ross <[email protected]> Reviewed by: Richard Lowe <[email protected]> Approved by: Garrett D'Amore <[email protected]>
usr/src/cmd/zoneadm/zfs.c
usr/src/uts/common/fs/dev/sdev_subr.c
usr/src/uts/common/fs/dev/sdev_zvolops.c
--- a/usr/src/cmd/zoneadm/zfs.c	Thu Nov 10 09:09:20 2011 +0000
+++ b/usr/src/cmd/zoneadm/zfs.c	Mon Jun 11 18:23:37 2012 +0000
@@ -22,6 +22,7 @@
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 /*
@@ -71,34 +72,6 @@
 } clone_data_t;
 
 /*
- * A ZFS file system iterator call-back function which is used to validate
- * datasets imported into the zone.
- */
-/* ARGSUSED */
-static int
-check_zvol(zfs_handle_t *zhp, void *unused)
-{
-	int ret;
-
-	if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) {
-		/*
-		 * TRANSLATION_NOTE
-		 * zfs and dataset are literals that should not be translated.
-		 */
-		(void) fprintf(stderr, gettext("cannot verify zfs dataset %s: "
-		    "volumes cannot be specified as a zone dataset resource\n"),
-		    zfs_get_name(zhp));
-		ret = -1;
-	} else {
-		ret = zfs_iter_children(zhp, check_zvol, NULL);
-	}
-
-	zfs_close(zhp);
-
-	return (ret);
-}
-
-/*
  * A ZFS file system iterator call-back function which returns the
  * zfs_handle_t for a ZFS file system on the specified mount point.
  */
@@ -1259,17 +1232,6 @@
 			continue;
 		}
 
-		if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) {
-			(void) fprintf(stderr, gettext("cannot verify zfs "
-			    "dataset %s: volumes cannot be specified as a "
-			    "zone dataset resource\n"),
-			    dstab.zone_dataset_name);
-			return_code = Z_ERR;
-		}
-
-		if (zfs_iter_children(zhp, check_zvol, NULL) != 0)
-			return_code = Z_ERR;
-
 		zfs_close(zhp);
 	}
 	(void) zonecfg_enddsent(handle);
--- a/usr/src/uts/common/fs/dev/sdev_subr.c	Thu Nov 10 09:09:20 2011 +0000
+++ b/usr/src/uts/common/fs/dev/sdev_subr.c	Mon Jun 11 18:23:37 2012 +0000
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 /*
@@ -537,7 +538,7 @@
 	SDEV_DYNAMIC | SDEV_VTOR },
 
 	{ "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
-	devzvol_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
+	devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
 
 	{ "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
 
--- a/usr/src/uts/common/fs/dev/sdev_zvolops.c	Thu Nov 10 09:09:20 2011 +0000
+++ b/usr/src/uts/common/fs/dev/sdev_zvolops.c	Mon Jun 11 18:23:37 2012 +0000
@@ -21,6 +21,7 @@
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ * Copyright 2013 Joyent, Inc.  All rights reserved.
  */
 
 /* vnode ops for the /dev/zvol directory */
@@ -47,6 +48,7 @@
 static ldi_handle_t devzvol_lh;
 static kmutex_t devzvol_mtx;
 static boolean_t devzvol_isopen;
+static major_t devzvol_major;
 
 /*
  * we need to use ddi_mod* since fs/dev gets loaded early on in
@@ -61,12 +63,16 @@
 int
 sdev_zvol_create_minor(char *dsname)
 {
+	if (szcm == NULL)
+		return (-1);
 	return ((*szcm)(dsname));
 }
 
 int
 sdev_zvol_name2minor(char *dsname, minor_t *minor)
 {
+	if (szn2m == NULL)
+		return (-1);
 	return ((*szn2m)(dsname, minor));
 }
 
@@ -74,6 +80,7 @@
 devzvol_open_zfs()
 {
 	int rc;
+	dev_t dv;
 
 	devzvol_li = ldi_ident_from_anon();
 	if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
@@ -94,6 +101,9 @@
 		cmn_err(CE_WARN, "couldn't resolve zvol_name2minor");
 		return (rc);
 	}
+	if (ldi_get_dev(devzvol_lh, &dv))
+		return (-1);
+	devzvol_major = getmajor(dv);
 	return (0);
 }
 
@@ -270,6 +280,8 @@
 	sdcmn_err13(("  v_type %d do_type %d",
 	    SDEVTOV(dv)->v_type, do_type));
 	if ((SDEVTOV(dv)->v_type == VLNK && do_type != DMU_OST_ZVOL) ||
+	    ((SDEVTOV(dv)->v_type == VBLK || SDEVTOV(dv)->v_type == VCHR) &&
+	    do_type != DMU_OST_ZVOL) ||
 	    (SDEVTOV(dv)->v_type == VDIR && do_type == DMU_OST_ZVOL)) {
 		kmem_free(dsname, strlen(dsname) + 1);
 		return (SDEV_VTOR_STALE);
@@ -486,6 +498,82 @@
 	rw_downgrade(&ddv->sdev_contents);
 }
 
+/*
+ * This function is used to create a dir or dev inside a zone's /dev when the
+ * zone has a zvol that is dynamically created within the zone (i.e. inside
+ * of a delegated dataset.  Since there is no /devices tree within a zone,
+ * we create the chr/blk devices directly inside the zone's /dev instead of
+ * making symlinks.
+ */
+static int
+devzvol_mk_ngz_node(struct sdev_node *parent, char *nm)
+{
+	struct vattr vattr;
+	timestruc_t now;
+	enum vtype expected_type = VDIR;
+	dmu_objset_type_t do_type;
+	struct sdev_node *dv = NULL;
+	int res;
+	char *dsname;
+
+	bzero(&vattr, sizeof (vattr));
+	gethrestime(&now);
+	vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
+	vattr.va_uid = SDEV_UID_DEFAULT;
+	vattr.va_gid = SDEV_GID_DEFAULT;
+	vattr.va_type = VNON;
+	vattr.va_atime = now;
+	vattr.va_mtime = now;
+	vattr.va_ctime = now;
+
+	if ((dsname = devzvol_make_dsname(parent->sdev_path, nm)) == NULL)
+		return (ENOENT);
+
+	if (devzvol_objset_check(dsname, &do_type) != 0) {
+		kmem_free(dsname, strlen(dsname) + 1);
+		return (ENOENT);
+	}
+	if (do_type == DMU_OST_ZVOL)
+		expected_type = VBLK;
+
+	if (expected_type == VDIR) {
+		vattr.va_type = VDIR;
+		vattr.va_mode = SDEV_DIRMODE_DEFAULT;
+	} else {
+		minor_t minor;
+		dev_t devnum;
+		int rc;
+
+		rc = sdev_zvol_create_minor(dsname);
+		if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
+		    sdev_zvol_name2minor(dsname, &minor)) {
+			kmem_free(dsname, strlen(dsname) + 1);
+			return (ENOENT);
+		}
+
+		devnum = makedevice(devzvol_major, minor);
+		vattr.va_rdev = devnum;
+
+		if (strstr(parent->sdev_path, "/rdsk/") != NULL)
+			vattr.va_type = VCHR;
+		else
+			vattr.va_type = VBLK;
+		vattr.va_mode = SDEV_DEVMODE_DEFAULT;
+	}
+	kmem_free(dsname, strlen(dsname) + 1);
+
+	rw_enter(&parent->sdev_contents, RW_WRITER);
+
+	res = sdev_mknode(parent, nm, &dv, &vattr,
+	    NULL, NULL, kcred, SDEV_READY);
+	rw_exit(&parent->sdev_contents);
+	if (res != 0)
+		return (ENOENT);
+
+	SDEV_RELE(dv);
+	return (0);
+}
+
 /*ARGSUSED*/
 static int
 devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
@@ -505,9 +593,52 @@
 		return (error);
 
 	rw_enter(&parent->sdev_contents, RW_READER);
-	if (!SDEV_IS_GLOBAL(parent)) {
+	if (SDEV_IS_GLOBAL(parent)) {
+		/*
+		 * During iter_datasets, don't create GZ dev when running in
+		 * NGZ.  We can't return ENOENT here since that could
+		 * incorrectly trigger the creation of the dev from the
+		 * recursive call through prof_filldir during iter_datasets.
+		 */
+		if (getzoneid() != GLOBAL_ZONEID) {
+			rw_exit(&parent->sdev_contents);
+			return (EPERM);
+		}
+	} else {
+		int res;
+
 		rw_exit(&parent->sdev_contents);
-		return (prof_lookup(dvp, nm, vpp, cred));
+
+		/*
+		 * If we're in the global zone and reach down into a non-global
+		 * zone's /dev/zvol then this action could trigger the creation
+		 * of all of the zvol devices for every zone into the non-global
+		 * zone's /dev tree. This could be a big security hole. To
+		 * prevent this, disallow the global zone from looking inside
+		 * a non-global zones /dev/zvol. This behavior is similar to
+		 * delegated datasets, which cannot be used by the global zone.
+		 */
+		if (getzoneid() == GLOBAL_ZONEID)
+			return (EPERM);
+
+		res = prof_lookup(dvp, nm, vpp, cred);
+
+		/*
+		 * We won't find a zvol that was dynamically created inside
+		 * a NGZ, within a delegated dataset, in the zone's dev profile
+		 * but prof_lookup will also find it via sdev_cache_lookup.
+		 */
+		if (res == ENOENT) {
+			/*
+			 * We have to create the sdev node for the dymamically
+			 * created zvol.
+			 */
+			if (devzvol_mk_ngz_node(parent, nm) != 0)
+				return (ENOENT);
+			res = prof_lookup(dvp, nm, vpp, cred);
+		}
+
+		return (res);
 	}
 
 	dsname = devzvol_make_dsname(parent->sdev_path, nm);
@@ -613,8 +744,10 @@
 		} else if (rc == ENOENT) {
 			goto skip;
 		} else {
-			/* EBUSY == problem with zvols's dmu holds? */
-			ASSERT(0);
+			/*
+			 * EBUSY == problem with zvols's dmu holds?
+			 * EPERM when in a NGZ and traversing up and out.
+			 */
 			goto skip;
 		}
 		if (arg == ZFS_IOC_DATASET_LIST_NEXT &&