usr/src/uts/common/fs/dev/sdev_zvolops.c
changeset 14222 c3f8a4690b1f
parent 10763 f1a11aaa04fc
child 14223 1652c59077c6
equal deleted inserted replaced
14221:489a49e3fc33 14222:c3f8a4690b1f
    19  * CDDL HEADER END
    19  * CDDL HEADER END
    20  */
    20  */
    21 /*
    21 /*
    22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
    22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
    23  * Use is subject to license terms.
    23  * Use is subject to license terms.
       
    24  * Copyright 2013 Joyent, Inc.  All rights reserved.
    24  */
    25  */
    25 
    26 
    26 /* vnode ops for the /dev/zvol directory */
    27 /* vnode ops for the /dev/zvol directory */
    27 
    28 
    28 #include <sys/types.h>
    29 #include <sys/types.h>
    45 static size_t devzvol_zclist_size;
    46 static size_t devzvol_zclist_size;
    46 static ldi_ident_t devzvol_li;
    47 static ldi_ident_t devzvol_li;
    47 static ldi_handle_t devzvol_lh;
    48 static ldi_handle_t devzvol_lh;
    48 static kmutex_t devzvol_mtx;
    49 static kmutex_t devzvol_mtx;
    49 static boolean_t devzvol_isopen;
    50 static boolean_t devzvol_isopen;
       
    51 static major_t devzvol_major;
    50 
    52 
    51 /*
    53 /*
    52  * we need to use ddi_mod* since fs/dev gets loaded early on in
    54  * we need to use ddi_mod* since fs/dev gets loaded early on in
    53  * startup(), and linking fs/dev to fs/zfs would drag in a lot of
    55  * startup(), and linking fs/dev to fs/zfs would drag in a lot of
    54  * other stuff (like drv/random) before the rest of the system is
    56  * other stuff (like drv/random) before the rest of the system is
    59 int (*szn2m)(char *, minor_t *);
    61 int (*szn2m)(char *, minor_t *);
    60 
    62 
    61 int
    63 int
    62 sdev_zvol_create_minor(char *dsname)
    64 sdev_zvol_create_minor(char *dsname)
    63 {
    65 {
       
    66 	if (szcm == NULL)
       
    67 		return (-1);
    64 	return ((*szcm)(dsname));
    68 	return ((*szcm)(dsname));
    65 }
    69 }
    66 
    70 
    67 int
    71 int
    68 sdev_zvol_name2minor(char *dsname, minor_t *minor)
    72 sdev_zvol_name2minor(char *dsname, minor_t *minor)
    69 {
    73 {
       
    74 	if (szn2m == NULL)
       
    75 		return (-1);
    70 	return ((*szn2m)(dsname, minor));
    76 	return ((*szn2m)(dsname, minor));
    71 }
    77 }
    72 
    78 
    73 int
    79 int
    74 devzvol_open_zfs()
    80 devzvol_open_zfs()
    75 {
    81 {
    76 	int rc;
    82 	int rc;
       
    83 	dev_t dv;
    77 
    84 
    78 	devzvol_li = ldi_ident_from_anon();
    85 	devzvol_li = ldi_ident_from_anon();
    79 	if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
    86 	if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
    80 	    &devzvol_lh, devzvol_li))
    87 	    &devzvol_lh, devzvol_li))
    81 		return (-1);
    88 		return (-1);
    92 	if ((szn2m = (int(*)(char *, minor_t *))
    99 	if ((szn2m = (int(*)(char *, minor_t *))
    93 	    ddi_modsym(zfs_mod, "zvol_name2minor", &rc)) == NULL) {
   100 	    ddi_modsym(zfs_mod, "zvol_name2minor", &rc)) == NULL) {
    94 		cmn_err(CE_WARN, "couldn't resolve zvol_name2minor");
   101 		cmn_err(CE_WARN, "couldn't resolve zvol_name2minor");
    95 		return (rc);
   102 		return (rc);
    96 	}
   103 	}
       
   104 	if (ldi_get_dev(devzvol_lh, &dv))
       
   105 		return (-1);
       
   106 	devzvol_major = getmajor(dv);
    97 	return (0);
   107 	return (0);
    98 }
   108 }
    99 
   109 
   100 void
   110 void
   101 devzvol_close_zfs()
   111 devzvol_close_zfs()
   268 		return (SDEV_VTOR_INVALID);
   278 		return (SDEV_VTOR_INVALID);
   269 	}
   279 	}
   270 	sdcmn_err13(("  v_type %d do_type %d",
   280 	sdcmn_err13(("  v_type %d do_type %d",
   271 	    SDEVTOV(dv)->v_type, do_type));
   281 	    SDEVTOV(dv)->v_type, do_type));
   272 	if ((SDEVTOV(dv)->v_type == VLNK && do_type != DMU_OST_ZVOL) ||
   282 	if ((SDEVTOV(dv)->v_type == VLNK && do_type != DMU_OST_ZVOL) ||
       
   283 	    ((SDEVTOV(dv)->v_type == VBLK || SDEVTOV(dv)->v_type == VCHR) &&
       
   284 	    do_type != DMU_OST_ZVOL) ||
   273 	    (SDEVTOV(dv)->v_type == VDIR && do_type == DMU_OST_ZVOL)) {
   285 	    (SDEVTOV(dv)->v_type == VDIR && do_type == DMU_OST_ZVOL)) {
   274 		kmem_free(dsname, strlen(dsname) + 1);
   286 		kmem_free(dsname, strlen(dsname) + 1);
   275 		return (SDEV_VTOR_STALE);
   287 		return (SDEV_VTOR_STALE);
   276 	}
   288 	}
   277 	if (SDEVTOV(dv)->v_type == VLNK) {
   289 	if (SDEVTOV(dv)->v_type == VLNK) {
   484 			dv = SDEV_NEXT_ENTRY(ddv, dv);
   496 			dv = SDEV_NEXT_ENTRY(ddv, dv);
   485 	}
   497 	}
   486 	rw_downgrade(&ddv->sdev_contents);
   498 	rw_downgrade(&ddv->sdev_contents);
   487 }
   499 }
   488 
   500 
       
   501 /*
       
   502  * This function is used to create a dir or dev inside a zone's /dev when the
       
   503  * zone has a zvol that is dynamically created within the zone (i.e. inside
       
   504  * of a delegated dataset.  Since there is no /devices tree within a zone,
       
   505  * we create the chr/blk devices directly inside the zone's /dev instead of
       
   506  * making symlinks.
       
   507  */
       
   508 static int
       
   509 devzvol_mk_ngz_node(struct sdev_node *parent, char *nm)
       
   510 {
       
   511 	struct vattr vattr;
       
   512 	timestruc_t now;
       
   513 	enum vtype expected_type = VDIR;
       
   514 	dmu_objset_type_t do_type;
       
   515 	struct sdev_node *dv = NULL;
       
   516 	int res;
       
   517 	char *dsname;
       
   518 
       
   519 	bzero(&vattr, sizeof (vattr));
       
   520 	gethrestime(&now);
       
   521 	vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
       
   522 	vattr.va_uid = SDEV_UID_DEFAULT;
       
   523 	vattr.va_gid = SDEV_GID_DEFAULT;
       
   524 	vattr.va_type = VNON;
       
   525 	vattr.va_atime = now;
       
   526 	vattr.va_mtime = now;
       
   527 	vattr.va_ctime = now;
       
   528 
       
   529 	if ((dsname = devzvol_make_dsname(parent->sdev_path, nm)) == NULL)
       
   530 		return (ENOENT);
       
   531 
       
   532 	if (devzvol_objset_check(dsname, &do_type) != 0) {
       
   533 		kmem_free(dsname, strlen(dsname) + 1);
       
   534 		return (ENOENT);
       
   535 	}
       
   536 	if (do_type == DMU_OST_ZVOL)
       
   537 		expected_type = VBLK;
       
   538 
       
   539 	if (expected_type == VDIR) {
       
   540 		vattr.va_type = VDIR;
       
   541 		vattr.va_mode = SDEV_DIRMODE_DEFAULT;
       
   542 	} else {
       
   543 		minor_t minor;
       
   544 		dev_t devnum;
       
   545 		int rc;
       
   546 
       
   547 		rc = sdev_zvol_create_minor(dsname);
       
   548 		if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
       
   549 		    sdev_zvol_name2minor(dsname, &minor)) {
       
   550 			kmem_free(dsname, strlen(dsname) + 1);
       
   551 			return (ENOENT);
       
   552 		}
       
   553 
       
   554 		devnum = makedevice(devzvol_major, minor);
       
   555 		vattr.va_rdev = devnum;
       
   556 
       
   557 		if (strstr(parent->sdev_path, "/rdsk/") != NULL)
       
   558 			vattr.va_type = VCHR;
       
   559 		else
       
   560 			vattr.va_type = VBLK;
       
   561 		vattr.va_mode = SDEV_DEVMODE_DEFAULT;
       
   562 	}
       
   563 	kmem_free(dsname, strlen(dsname) + 1);
       
   564 
       
   565 	rw_enter(&parent->sdev_contents, RW_WRITER);
       
   566 
       
   567 	res = sdev_mknode(parent, nm, &dv, &vattr,
       
   568 	    NULL, NULL, kcred, SDEV_READY);
       
   569 	rw_exit(&parent->sdev_contents);
       
   570 	if (res != 0)
       
   571 		return (ENOENT);
       
   572 
       
   573 	SDEV_RELE(dv);
       
   574 	return (0);
       
   575 }
       
   576 
   489 /*ARGSUSED*/
   577 /*ARGSUSED*/
   490 static int
   578 static int
   491 devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
   579 devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
   492     struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
   580     struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
   493     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
   581     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
   503 	/* execute access is required to search the directory */
   591 	/* execute access is required to search the directory */
   504 	if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
   592 	if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
   505 		return (error);
   593 		return (error);
   506 
   594 
   507 	rw_enter(&parent->sdev_contents, RW_READER);
   595 	rw_enter(&parent->sdev_contents, RW_READER);
   508 	if (!SDEV_IS_GLOBAL(parent)) {
   596 	if (SDEV_IS_GLOBAL(parent)) {
       
   597 		/*
       
   598 		 * During iter_datasets, don't create GZ dev when running in
       
   599 		 * NGZ.  We can't return ENOENT here since that could
       
   600 		 * incorrectly trigger the creation of the dev from the
       
   601 		 * recursive call through prof_filldir during iter_datasets.
       
   602 		 */
       
   603 		if (getzoneid() != GLOBAL_ZONEID) {
       
   604 			rw_exit(&parent->sdev_contents);
       
   605 			return (EPERM);
       
   606 		}
       
   607 	} else {
       
   608 		int res;
       
   609 
   509 		rw_exit(&parent->sdev_contents);
   610 		rw_exit(&parent->sdev_contents);
   510 		return (prof_lookup(dvp, nm, vpp, cred));
   611 
       
   612 		/*
       
   613 		 * If we're in the global zone and reach down into a non-global
       
   614 		 * zone's /dev/zvol then this action could trigger the creation
       
   615 		 * of all of the zvol devices for every zone into the non-global
       
   616 		 * zone's /dev tree. This could be a big security hole. To
       
   617 		 * prevent this, disallow the global zone from looking inside
       
   618 		 * a non-global zones /dev/zvol. This behavior is similar to
       
   619 		 * delegated datasets, which cannot be used by the global zone.
       
   620 		 */
       
   621 		if (getzoneid() == GLOBAL_ZONEID)
       
   622 			return (EPERM);
       
   623 
       
   624 		res = prof_lookup(dvp, nm, vpp, cred);
       
   625 
       
   626 		/*
       
   627 		 * We won't find a zvol that was dynamically created inside
       
   628 		 * a NGZ, within a delegated dataset, in the zone's dev profile
       
   629 		 * but prof_lookup will also find it via sdev_cache_lookup.
       
   630 		 */
       
   631 		if (res == ENOENT) {
       
   632 			/*
       
   633 			 * We have to create the sdev node for the dymamically
       
   634 			 * created zvol.
       
   635 			 */
       
   636 			if (devzvol_mk_ngz_node(parent, nm) != 0)
       
   637 				return (ENOENT);
       
   638 			res = prof_lookup(dvp, nm, vpp, cred);
       
   639 		}
       
   640 
       
   641 		return (res);
   511 	}
   642 	}
   512 
   643 
   513 	dsname = devzvol_make_dsname(parent->sdev_path, nm);
   644 	dsname = devzvol_make_dsname(parent->sdev_path, nm);
   514 	rw_exit(&parent->sdev_contents);
   645 	rw_exit(&parent->sdev_contents);
   515 	sdcmn_err13(("rvp dsname %s", dsname ? dsname : "(null)"));
   646 	sdcmn_err13(("rvp dsname %s", dsname ? dsname : "(null)"));
   611 		if (rc == 0) {
   742 		if (rc == 0) {
   612 			VN_RELE(vpp);
   743 			VN_RELE(vpp);
   613 		} else if (rc == ENOENT) {
   744 		} else if (rc == ENOENT) {
   614 			goto skip;
   745 			goto skip;
   615 		} else {
   746 		} else {
   616 			/* EBUSY == problem with zvols's dmu holds? */
   747 			/*
   617 			ASSERT(0);
   748 			 * EBUSY == problem with zvols's dmu holds?
       
   749 			 * EPERM when in a NGZ and traversing up and out.
       
   750 			 */
   618 			goto skip;
   751 			goto skip;
   619 		}
   752 		}
   620 		if (arg == ZFS_IOC_DATASET_LIST_NEXT &&
   753 		if (arg == ZFS_IOC_DATASET_LIST_NEXT &&
   621 		    zc->zc_objset_stats.dds_type != DMU_OST_ZFS)
   754 		    zc->zc_objset_stats.dds_type != DMU_OST_ZFS)
   622 			sdev_iter_snapshots(dvp, zc->zc_name);
   755 			sdev_iter_snapshots(dvp, zc->zc_name);