# HG changeset patch # User Tim Haley # Date 1246152120 21600 # Node ID b4907297e7408984c1567fb7e70156070bc915a4 # Parent 13d7f3eec672db45927a02e4f68ccf66857c0eaa 6775100 stat() performance on files on zfs should be improved 6827779 rrwlock is overly protective of its counters diff -r 13d7f3eec672 -r b4907297e740 usr/src/uts/common/fs/zfs/rrwlock.c --- a/usr/src/uts/common/fs/zfs/rrwlock.c Fri Jun 26 17:26:34 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/rrwlock.c Sat Jun 27 19:22:00 2009 -0600 @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include @@ -118,7 +116,7 @@ rrw_node_t *prev = NULL; if (refcount_count(&rrl->rr_linked_rcount) == 0) - return (NULL); + return (B_FALSE); for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) { if (rn->rn_rrl == rrl) { @@ -159,6 +157,14 @@ rrw_enter_read(rrwlock_t *rrl, void *tag) { mutex_enter(&rrl->rr_lock); +#if !defined(DEBUG) && defined(_KERNEL) + if (!rrl->rr_writer && !rrl->rr_writer_wanted) { + rrl->rr_anon_rcount.rc_count++; + mutex_exit(&rrl->rr_lock); + return; + } + DTRACE_PROBE(zfs__rrwfastpath__rdmiss); +#endif ASSERT(rrl->rr_writer != curthread); ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0); @@ -208,19 +214,28 @@ rrw_exit(rrwlock_t *rrl, void *tag) { mutex_enter(&rrl->rr_lock); +#if !defined(DEBUG) && defined(_KERNEL) + if (!rrl->rr_writer && rrl->rr_linked_rcount.rc_count == 0) { + rrl->rr_anon_rcount.rc_count--; + if (rrl->rr_anon_rcount.rc_count == 0) + cv_broadcast(&rrl->rr_cv); + mutex_exit(&rrl->rr_lock); + return; + } + DTRACE_PROBE(zfs__rrwfastpath__exitmiss); +#endif ASSERT(!refcount_is_zero(&rrl->rr_anon_rcount) || !refcount_is_zero(&rrl->rr_linked_rcount) || rrl->rr_writer != NULL); if (rrl->rr_writer == NULL) { - if (rrn_find_and_remove(rrl)) { - if (refcount_remove(&rrl->rr_linked_rcount, tag) == 0) - cv_broadcast(&rrl->rr_cv); - - } else { - if (refcount_remove(&rrl->rr_anon_rcount, tag) == 0) - cv_broadcast(&rrl->rr_cv); - } + int64_t count; + if (rrn_find_and_remove(rrl)) + count = refcount_remove(&rrl->rr_linked_rcount, tag); + else + count = refcount_remove(&rrl->rr_anon_rcount, tag); + if (count == 0) + cv_broadcast(&rrl->rr_cv); } else { ASSERT(rrl->rr_writer == curthread); ASSERT(refcount_is_zero(&rrl->rr_anon_rcount) && diff -r 13d7f3eec672 -r b4907297e740 usr/src/uts/common/fs/zfs/sys/zfs_acl.h --- a/usr/src/uts/common/fs/zfs/sys/zfs_acl.h Fri Jun 26 17:26:34 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/zfs_acl.h Sat Jun 27 19:22:00 2009 -0600 @@ -203,6 +203,7 @@ void zfs_ace_byteswap(void *, size_t, boolean_t); extern boolean_t zfs_has_access(struct znode *zp, cred_t *cr); extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *); +int zfs_fastaccesschk_execute(struct znode *, cred_t *); extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *); extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *); extern int zfs_acl_access(struct znode *, int, cred_t *); diff -r 13d7f3eec672 -r b4907297e740 usr/src/uts/common/fs/zfs/sys/zfs_znode.h --- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h Fri Jun 26 17:26:34 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h Sat Jun 27 19:22:00 2009 -0600 @@ -77,6 +77,7 @@ #define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */ #define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */ #define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */ +#define ZFS_NO_EXECS_DENIED 0x100 /* exec was given to everyone */ /* * Is ID ephemeral? @@ -200,6 +201,7 @@ uint64_t z_gen; /* generation (same as zp_gen) */ uint32_t z_sync_cnt; /* synchronous open count */ kmutex_t z_acl_lock; /* acl data lock */ + zfs_acl_t *z_acl_cached; /* cached acl */ list_node_t z_link_node; /* all znodes in fs link */ /* * These are dmu managed fields. diff -r 13d7f3eec672 -r b4907297e740 usr/src/uts/common/fs/zfs/zfs_acl.c --- a/usr/src/uts/common/fs/zfs/zfs_acl.c Fri Jun 26 17:26:34 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/zfs_acl.c Sat Jun 27 19:22:00 2009 -0600 @@ -781,6 +781,7 @@ uint64_t who; uint16_t iflags, type; uint32_t access_mask; + boolean_t an_exec_denied = B_FALSE; mode = (zp->z_phys->zp_mode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX)); @@ -905,8 +906,26 @@ } } } + } else { + /* + * Only care if this IDENTIFIER_GROUP or + * USER ACE denies execute access to someone, + * mode is not affected + */ + if ((access_mask & ACE_EXECUTE) && type == DENY) + an_exec_denied = B_TRUE; } } + + if (!an_exec_denied && !(seen & (S_IXUSR | S_IXGRP | S_IXOTH)) || + !(mode & (S_IXUSR | S_IXGRP | S_IXOTH))) + an_exec_denied = B_TRUE; + + if (an_exec_denied) + zp->z_phys->zp_flags &= ~ZFS_NO_EXECS_DENIED; + else + zp->z_phys->zp_flags |= ZFS_NO_EXECS_DENIED; + return (mode); } @@ -960,8 +979,14 @@ ASSERT(MUTEX_HELD(&zp->z_acl_lock)); + if (zp->z_acl_cached) { + *aclpp = zp->z_acl_cached; + return (0); + } + if (zp->z_phys->zp_acl.z_acl_extern_obj == 0) { *aclpp = zfs_acl_node_read_internal(zp, will_modify); + zp->z_acl_cached = *aclpp; return (0); } @@ -994,7 +1019,7 @@ return (error); } - *aclpp = aclp; + zp->z_acl_cached = *aclpp = aclp; return (0); } @@ -1019,6 +1044,11 @@ dmu_buf_will_dirty(zp->z_dbuf, tx); + if (zp->z_acl_cached != aclp && zp->z_acl_cached) { + zfs_acl_free(zp->z_acl_cached); + zp->z_acl_cached = NULL; + } + zphys->zp_mode = zfs_mode_compute(zp, aclp); /* @@ -1606,6 +1636,7 @@ if (error == 0) { (*aclp)->z_hints = zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS; zfs_acl_chmod(zp->z_zfsvfs, zp->z_phys->zp_uid, mode, *aclp); + zp->z_acl_cached = *aclp; } mutex_exit(&zp->z_acl_lock); mutex_exit(&zp->z_lock); @@ -1869,7 +1900,6 @@ mutex_exit(&dzp->z_acl_lock); acl_ids->z_aclp = zfs_acl_inherit(zfsvfs, vap->va_type, paclp, acl_ids->z_mode, &need_chmod); - zfs_acl_free(paclp); } else { acl_ids->z_aclp = zfs_acl_alloc(zfs_acl_version_zp(dzp)); @@ -1998,8 +2028,6 @@ mutex_exit(&zp->z_acl_lock); - zfs_acl_free(aclp); - return (0); } @@ -2095,11 +2123,6 @@ aclp->z_hints |= (zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS); } top: - if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)) { - zfs_acl_free(aclp); - return (error); - } - mutex_enter(&zp->z_lock); mutex_enter(&zp->z_acl_lock); @@ -2154,7 +2177,7 @@ if (fuidp) zfs_fuid_info_free(fuidp); - zfs_acl_free(aclp); + zp->z_acl_cached = aclp; dmu_tx_commit(tx); done: mutex_exit(&zp->z_acl_lock); @@ -2301,7 +2324,6 @@ checkit = B_TRUE; break; } else { - zfs_acl_free(aclp); mutex_exit(&zp->z_acl_lock); return (EIO); } @@ -2334,7 +2356,6 @@ } mutex_exit(&zp->z_acl_lock); - zfs_acl_free(aclp); /* Put the found 'denies' back on the working mode */ if (deny_mask) { @@ -2420,6 +2441,72 @@ check_privs, B_FALSE, cr)); } +int +zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr) +{ + boolean_t owner = B_FALSE; + boolean_t groupmbr = B_FALSE; + boolean_t is_attr; + uid_t fowner; + uid_t gowner; + uid_t uid = crgetuid(cr); + int error; + + if (zdp->z_phys->zp_flags & ZFS_AV_QUARANTINED) + return (EACCES); + + is_attr = ((zdp->z_phys->zp_flags & ZFS_XATTR) && + (ZTOV(zdp)->v_type == VDIR)); + if (is_attr) + goto slow; + + mutex_enter(&zdp->z_acl_lock); + + if (zdp->z_phys->zp_flags & ZFS_NO_EXECS_DENIED) { + mutex_exit(&zdp->z_acl_lock); + return (0); + } + + if (FUID_INDEX(zdp->z_phys->zp_uid) != 0 || + FUID_INDEX(zdp->z_phys->zp_gid) != 0) { + mutex_exit(&zdp->z_acl_lock); + goto slow; + } + + fowner = (uid_t)zdp->z_phys->zp_uid; + gowner = (uid_t)zdp->z_phys->zp_gid; + + if (uid == fowner) { + owner = B_TRUE; + if (zdp->z_phys->zp_mode & S_IXUSR) { + mutex_exit(&zdp->z_acl_lock); + return (0); + } + } + if (groupmember(gowner, cr)) { + groupmbr = B_TRUE; + if (zdp->z_phys->zp_mode & S_IXGRP) { + mutex_exit(&zdp->z_acl_lock); + return (0); + } + } + if (!owner && !groupmbr) { + if (zdp->z_phys->zp_mode & S_IXOTH) { + mutex_exit(&zdp->z_acl_lock); + return (0); + } + } + + mutex_exit(&zdp->z_acl_lock); + +slow: + DTRACE_PROBE(zfs__fastpath__execute__access__miss); + ZFS_ENTER(zdp->z_zfsvfs); + error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr); + ZFS_EXIT(zdp->z_zfsvfs); + return (error); +} + /* * Determine whether Access should be granted/denied, invoking least * priv subsytem when a deny is determined. diff -r 13d7f3eec672 -r b4907297e740 usr/src/uts/common/fs/zfs/zfs_vnops.c --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c Fri Jun 26 17:26:34 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c Sat Jun 27 19:22:00 2009 -0600 @@ -988,6 +988,27 @@ } /* + * If vnode is for a device return a specfs vnode instead. + */ +static int +specvp_check(vnode_t **vpp, cred_t *cr) +{ + int error = 0; + + if (IS_DEVVP(*vpp)) { + struct vnode *svp; + + svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); + VN_RELE(*vpp); + if (svp == NULL) + error = ENOSYS; + *vpp = svp; + } + return (error); +} + + +/* * Lookup an entry in a directory, or an extended attribute directory. * If it exists, return a held vnode reference for it. * @@ -1017,7 +1038,46 @@ { znode_t *zdp = VTOZ(dvp); zfsvfs_t *zfsvfs = zdp->z_zfsvfs; - int error; + int error = 0; + + /* fast path */ + if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { + + if (dvp->v_type != VDIR) { + return (ENOTDIR); + } else if (zdp->z_dbuf == NULL) { + return (EIO); + } + + if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { + error = zfs_fastaccesschk_execute(zdp, cr); + if (!error) { + *vpp = dvp; + VN_HOLD(*vpp); + return (0); + } + return (error); + } else { + vnode_t *tvp = dnlc_lookup(dvp, nm); + + if (tvp) { + error = zfs_fastaccesschk_execute(zdp, cr); + if (error) { + VN_RELE(tvp); + return (error); + } + if (tvp == DNLC_NO_VNODE) { + VN_RELE(tvp); + return (ENOENT); + } else { + *vpp = tvp; + return (specvp_check(vpp, cr)); + } + } + } + } + + DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zdp); @@ -1082,21 +1142,8 @@ } error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); - if (error == 0) { - /* - * Convert device special files - */ - if (IS_DEVVP(*vpp)) { - vnode_t *svp; - - svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); - VN_RELE(*vpp); - if (svp == NULL) - error = ENOSYS; - else - *vpp = svp; - } - } + if (error == 0) + error = specvp_check(vpp, cr); ZFS_EXIT(zfsvfs); return (error); @@ -1332,19 +1379,7 @@ VN_RELE(ZTOV(zp)); } else { *vpp = ZTOV(zp); - /* - * If vnode is for a device return a specfs vnode instead. - */ - if (IS_DEVVP(*vpp)) { - struct vnode *svp; - - svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); - VN_RELE(*vpp); - if (svp == NULL) { - error = ENOSYS; - } - *vpp = svp; - } + error = specvp_check(vpp, cr); } ZFS_EXIT(zfsvfs); @@ -2456,6 +2491,7 @@ top: attrzp = NULL; + /* Can this be moved to before the top label? */ if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { ZFS_EXIT(zfsvfs); return (EROFS); @@ -2856,11 +2892,6 @@ if (attrzp) VN_RELE(ZTOV(attrzp)); - if (aclp) { - zfs_acl_free(aclp); - aclp = NULL; - } - if (fuidp) { zfs_fuid_info_free(fuidp); fuidp = NULL; diff -r 13d7f3eec672 -r b4907297e740 usr/src/uts/common/fs/zfs/zfs_znode.c --- a/usr/src/uts/common/fs/zfs/zfs_znode.c Fri Jun 26 17:26:34 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/zfs_znode.c Sat Jun 27 19:22:00 2009 -0600 @@ -133,6 +133,7 @@ zp->z_dbuf = NULL; zp->z_dirlocks = NULL; + zp->z_acl_cached = NULL; return (0); } @@ -1081,6 +1082,11 @@ list_remove(&zfsvfs->z_all_znodes, zp); mutex_exit(&zfsvfs->z_znodes_lock); + if (zp->z_acl_cached) { + zfs_acl_free(zp->z_acl_cached); + zp->z_acl_cached = NULL; + } + kmem_cache_free(znode_cache, zp); VFS_RELE(zfsvfs->z_vfs);