# HG changeset patch # User Jerry Jelinek # Date 1339439017 0 # Node ID c3f8a4690b1fea651b663d6217db7b3e4a2a9d86 # Parent 489a49e3fc33242d9ca943d2bfddb6d03f8ab21f 3977 zones can commit suicide by zvol Reviewed by: Gordon Ross Reviewed by: Richard Lowe Approved by: Garrett D'Amore diff -r 489a49e3fc33 -r c3f8a4690b1f usr/src/cmd/zoneadm/zfs.c --- a/usr/src/cmd/zoneadm/zfs.c Thu Nov 10 09:09:20 2011 +0000 +++ b/usr/src/cmd/zoneadm/zfs.c Mon Jun 11 18:23:37 2012 +0000 @@ -22,6 +22,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* @@ -71,34 +72,6 @@ } clone_data_t; /* - * A ZFS file system iterator call-back function which is used to validate - * datasets imported into the zone. - */ -/* ARGSUSED */ -static int -check_zvol(zfs_handle_t *zhp, void *unused) -{ - int ret; - - if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) { - /* - * TRANSLATION_NOTE - * zfs and dataset are literals that should not be translated. - */ - (void) fprintf(stderr, gettext("cannot verify zfs dataset %s: " - "volumes cannot be specified as a zone dataset resource\n"), - zfs_get_name(zhp)); - ret = -1; - } else { - ret = zfs_iter_children(zhp, check_zvol, NULL); - } - - zfs_close(zhp); - - return (ret); -} - -/* * A ZFS file system iterator call-back function which returns the * zfs_handle_t for a ZFS file system on the specified mount point. */ @@ -1259,17 +1232,6 @@ continue; } - if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) { - (void) fprintf(stderr, gettext("cannot verify zfs " - "dataset %s: volumes cannot be specified as a " - "zone dataset resource\n"), - dstab.zone_dataset_name); - return_code = Z_ERR; - } - - if (zfs_iter_children(zhp, check_zvol, NULL) != 0) - return_code = Z_ERR; - zfs_close(zhp); } (void) zonecfg_enddsent(handle); diff -r 489a49e3fc33 -r c3f8a4690b1f usr/src/uts/common/fs/dev/sdev_subr.c --- a/usr/src/uts/common/fs/dev/sdev_subr.c Thu Nov 10 09:09:20 2011 +0000 +++ b/usr/src/uts/common/fs/dev/sdev_subr.c Mon Jun 11 18:23:37 2012 +0000 @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* @@ -537,7 +538,7 @@ SDEV_DYNAMIC | SDEV_VTOR }, { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops, - devzvol_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, + devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, diff -r 489a49e3fc33 -r c3f8a4690b1f usr/src/uts/common/fs/dev/sdev_zvolops.c --- a/usr/src/uts/common/fs/dev/sdev_zvolops.c Thu Nov 10 09:09:20 2011 +0000 +++ b/usr/src/uts/common/fs/dev/sdev_zvolops.c Mon Jun 11 18:23:37 2012 +0000 @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2013 Joyent, Inc. All rights reserved. */ /* vnode ops for the /dev/zvol directory */ @@ -47,6 +48,7 @@ static ldi_handle_t devzvol_lh; static kmutex_t devzvol_mtx; static boolean_t devzvol_isopen; +static major_t devzvol_major; /* * we need to use ddi_mod* since fs/dev gets loaded early on in @@ -61,12 +63,16 @@ int sdev_zvol_create_minor(char *dsname) { + if (szcm == NULL) + return (-1); return ((*szcm)(dsname)); } int sdev_zvol_name2minor(char *dsname, minor_t *minor) { + if (szn2m == NULL) + return (-1); return ((*szn2m)(dsname, minor)); } @@ -74,6 +80,7 @@ devzvol_open_zfs() { int rc; + dev_t dv; devzvol_li = ldi_ident_from_anon(); if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred, @@ -94,6 +101,9 @@ cmn_err(CE_WARN, "couldn't resolve zvol_name2minor"); return (rc); } + if (ldi_get_dev(devzvol_lh, &dv)) + return (-1); + devzvol_major = getmajor(dv); return (0); } @@ -270,6 +280,8 @@ sdcmn_err13((" v_type %d do_type %d", SDEVTOV(dv)->v_type, do_type)); if ((SDEVTOV(dv)->v_type == VLNK && do_type != DMU_OST_ZVOL) || + ((SDEVTOV(dv)->v_type == VBLK || SDEVTOV(dv)->v_type == VCHR) && + do_type != DMU_OST_ZVOL) || (SDEVTOV(dv)->v_type == VDIR && do_type == DMU_OST_ZVOL)) { kmem_free(dsname, strlen(dsname) + 1); return (SDEV_VTOR_STALE); @@ -486,6 +498,82 @@ rw_downgrade(&ddv->sdev_contents); } +/* + * This function is used to create a dir or dev inside a zone's /dev when the + * zone has a zvol that is dynamically created within the zone (i.e. inside + * of a delegated dataset. Since there is no /devices tree within a zone, + * we create the chr/blk devices directly inside the zone's /dev instead of + * making symlinks. + */ +static int +devzvol_mk_ngz_node(struct sdev_node *parent, char *nm) +{ + struct vattr vattr; + timestruc_t now; + enum vtype expected_type = VDIR; + dmu_objset_type_t do_type; + struct sdev_node *dv = NULL; + int res; + char *dsname; + + bzero(&vattr, sizeof (vattr)); + gethrestime(&now); + vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; + vattr.va_uid = SDEV_UID_DEFAULT; + vattr.va_gid = SDEV_GID_DEFAULT; + vattr.va_type = VNON; + vattr.va_atime = now; + vattr.va_mtime = now; + vattr.va_ctime = now; + + if ((dsname = devzvol_make_dsname(parent->sdev_path, nm)) == NULL) + return (ENOENT); + + if (devzvol_objset_check(dsname, &do_type) != 0) { + kmem_free(dsname, strlen(dsname) + 1); + return (ENOENT); + } + if (do_type == DMU_OST_ZVOL) + expected_type = VBLK; + + if (expected_type == VDIR) { + vattr.va_type = VDIR; + vattr.va_mode = SDEV_DIRMODE_DEFAULT; + } else { + minor_t minor; + dev_t devnum; + int rc; + + rc = sdev_zvol_create_minor(dsname); + if ((rc != 0 && rc != EEXIST && rc != EBUSY) || + sdev_zvol_name2minor(dsname, &minor)) { + kmem_free(dsname, strlen(dsname) + 1); + return (ENOENT); + } + + devnum = makedevice(devzvol_major, minor); + vattr.va_rdev = devnum; + + if (strstr(parent->sdev_path, "/rdsk/") != NULL) + vattr.va_type = VCHR; + else + vattr.va_type = VBLK; + vattr.va_mode = SDEV_DEVMODE_DEFAULT; + } + kmem_free(dsname, strlen(dsname) + 1); + + rw_enter(&parent->sdev_contents, RW_WRITER); + + res = sdev_mknode(parent, nm, &dv, &vattr, + NULL, NULL, kcred, SDEV_READY); + rw_exit(&parent->sdev_contents); + if (res != 0) + return (ENOENT); + + SDEV_RELE(dv); + return (0); +} + /*ARGSUSED*/ static int devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, @@ -505,9 +593,52 @@ return (error); rw_enter(&parent->sdev_contents, RW_READER); - if (!SDEV_IS_GLOBAL(parent)) { + if (SDEV_IS_GLOBAL(parent)) { + /* + * During iter_datasets, don't create GZ dev when running in + * NGZ. We can't return ENOENT here since that could + * incorrectly trigger the creation of the dev from the + * recursive call through prof_filldir during iter_datasets. + */ + if (getzoneid() != GLOBAL_ZONEID) { + rw_exit(&parent->sdev_contents); + return (EPERM); + } + } else { + int res; + rw_exit(&parent->sdev_contents); - return (prof_lookup(dvp, nm, vpp, cred)); + + /* + * If we're in the global zone and reach down into a non-global + * zone's /dev/zvol then this action could trigger the creation + * of all of the zvol devices for every zone into the non-global + * zone's /dev tree. This could be a big security hole. To + * prevent this, disallow the global zone from looking inside + * a non-global zones /dev/zvol. This behavior is similar to + * delegated datasets, which cannot be used by the global zone. + */ + if (getzoneid() == GLOBAL_ZONEID) + return (EPERM); + + res = prof_lookup(dvp, nm, vpp, cred); + + /* + * We won't find a zvol that was dynamically created inside + * a NGZ, within a delegated dataset, in the zone's dev profile + * but prof_lookup will also find it via sdev_cache_lookup. + */ + if (res == ENOENT) { + /* + * We have to create the sdev node for the dymamically + * created zvol. + */ + if (devzvol_mk_ngz_node(parent, nm) != 0) + return (ENOENT); + res = prof_lookup(dvp, nm, vpp, cred); + } + + return (res); } dsname = devzvol_make_dsname(parent->sdev_path, nm); @@ -613,8 +744,10 @@ } else if (rc == ENOENT) { goto skip; } else { - /* EBUSY == problem with zvols's dmu holds? */ - ASSERT(0); + /* + * EBUSY == problem with zvols's dmu holds? + * EPERM when in a NGZ and traversing up and out. + */ goto skip; } if (arg == ZFS_IOC_DATASET_LIST_NEXT &&