author | rg137905 |
Fri, 24 Jun 2005 19:50:32 -0700 | |
changeset 74 | 524df0e4e452 |
parent 73 | d6a2308c356e |
child 75 | 373b673bc08e |
--- a/usr/src/uts/common/Makefile.files Fri Jun 24 18:31:53 2005 -0700 +++ b/usr/src/uts/common/Makefile.files Fri Jun 24 19:50:32 2005 -0700 @@ -787,7 +787,7 @@ nfs_export.o nfs_log.o nfs_log_xdr.o \ nfs4_srv.o nfs4_state.o nfs4_srv_attr.o \ nfs4_srv_ns.o nfs4_db.o nfs4_srv_deleg.o \ - nfs4_deleg_ops.o nfs4_srv_readdir.o + nfs4_deleg_ops.o nfs4_srv_readdir.o nfs4_dispatch.o PCFS_OBJS += pc_alloc.o pc_dir.o pc_node.o pc_subr.o \ pc_vfsops.o pc_vnops.o
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/fs/nfs/nfs4_dispatch.c Fri Jun 24 19:50:32 2005 -0700 @@ -0,0 +1,525 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + + +#include <rpc/types.h> +#include <rpc/auth.h> +#include <rpc/auth_unix.h> +#include <rpc/auth_des.h> +#include <rpc/svc.h> +#include <rpc/xdr.h> +#include <nfs/nfs4.h> +#include <nfs/nfs_dispatch.h> +#include <nfs/nfs4_drc.h> + +/* + * This is the duplicate request cache for NFSv4 + */ +rfs4_drc_t *nfs4_drc = NULL; + +/* + * How long the entry can remain in the cache + * once it has been sent to the client and not + * used in a reply (in seconds) + */ +unsigned nfs4_drc_lifetime = 1; + +/* + * The default size of the duplicate request cache + */ +uint32_t nfs4_drc_max = 8 * 1024; + +/* + * The number of buckets we'd like to hash the + * replies into.. do not change this on the fly. + */ +uint32_t nfs4_drc_hash = 541; + +/* + * Initialize a duplicate request cache. + */ +rfs4_drc_t * +rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size, unsigned ttl) +{ + rfs4_drc_t *drc; + uint32_t bki; + + ASSERT(drc_size); + ASSERT(drc_hash_size); + + drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP); + + drc->max_size = drc_size; + drc->in_use = 0; + drc->drc_ttl = ttl; + + mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL); + + drc->dr_hash = drc_hash_size; + + drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP); + + for (bki = 0; bki < drc_hash_size; bki++) { + list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t), + offsetof(rfs4_dupreq_t, dr_bkt_next)); + } + + list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t), + offsetof(rfs4_dupreq_t, dr_next)); + + return (drc); +} + +/* + * Destroy a duplicate request cache. + */ +void +rfs4_fini_drc(rfs4_drc_t *drc) +{ + rfs4_dupreq_t *drp, *drp_next; + + ASSERT(drc); + + /* iterate over the dr_cache and free the enties */ + for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) { + + if (drp->dr_state == NFS4_DUP_REPLAY) + rfs4_compound_free(&(drp->dr_res)); + + if (drp->dr_addr.buf != NULL) + kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); + + drp_next = list_next(&(drc->dr_cache), drp); + + kmem_free(drp, sizeof (rfs4_dupreq_t)); + } + + mutex_destroy(&drc->lock); + kmem_free(drc->dr_buckets, + sizeof (list_t)*drc->dr_hash); + kmem_free(drc, sizeof (rfs4_drc_t)); +} + +/* + * rfs4_dr_chstate: + * + * Change the state of a rfs4_dupreq. If it's not in transition + * to the FREE state, update the time used and return. If we + * are moving to the FREE state then we need to clean up the + * compound results and move the entry to the end of the list. + */ +void +rfs4_dr_chstate(rfs4_dupreq_t *drp, int new_state) +{ + rfs4_drc_t *drc; + + ASSERT(drp); + ASSERT(drp->drc); + ASSERT(drp->dr_bkt); + ASSERT(MUTEX_HELD(&drp->drc->lock)); + + drp->dr_state = new_state; + + if (new_state != NFS4_DUP_FREE) { + gethrestime(&drp->dr_time_used); + return; + } + + drc = drp->drc; + + /* + * Remove entry from the bucket and + * dr_cache list, free compound results. + */ + list_remove(drp->dr_bkt, drp); + list_remove(&(drc->dr_cache), drp); + rfs4_compound_free(&(drp->dr_res)); +} + +/* + * rfs4_alloc_dr: + * + * Pick an entry off the tail -- Use if it is + * marked NFS4_DUP_FREE, or is an entry in the + * NFS4_DUP_REPLAY state that has timed-out... + * Otherwise malloc a new one if we have not reached + * our maximum cache limit. + * + * The list should be in time order, so no need + * to traverse backwards looking for a timed out + * entry, NFS4_DUP_FREE's are place on the tail. + */ +rfs4_dupreq_t * +rfs4_alloc_dr(rfs4_drc_t *drc) +{ + rfs4_dupreq_t *drp_tail, *drp = NULL; + + ASSERT(drc); + ASSERT(MUTEX_HELD(&drc->lock)); + + if ((drp_tail = list_tail(&drc->dr_cache)) != NULL) { + + switch (drp_tail->dr_state) { + + case NFS4_DUP_FREE: + list_remove(&(drc->dr_cache), drp_tail); + DTRACE_PROBE1(nfss__i__drc_freeclaim, + rfs4_dupreq_t *, drp_tail); + return (drp_tail); + /* NOTREACHED */ + + case NFS4_DUP_REPLAY: + if (gethrestime_sec() > + drp_tail->dr_time_used.tv_sec+drc->drc_ttl) { + /* this entry has timedout so grab it. */ + rfs4_dr_chstate(drp_tail, NFS4_DUP_FREE); + DTRACE_PROBE1(nfss__i__drc_ttlclaim, + rfs4_dupreq_t *, drp_tail); + return (drp_tail); + } + break; + } + } + + /* + * Didn't find something to recycle have + * we hit the cache limit ? + */ + if (drc->in_use >= drc->max_size) { + DTRACE_PROBE1(nfss__i__drc_full, + rfs4_drc_t *, drc); + return (NULL); + } + + + /* nope, so let's malloc a new one */ + drp = kmem_zalloc(sizeof (rfs4_dupreq_t), KM_SLEEP); + drp->drc = drc; + drc->in_use++; + gethrestime(&drp->dr_time_created); + DTRACE_PROBE1(nfss__i__drc_new, rfs4_dupreq_t *, drp); + + return (drp); +} + +/* + * rfs4_find_dr: + * + * Search for an entry in the duplicate request cache by + * calculating the hash index based on the XID, and examining + * the entries in the hash bucket. If we find a match stamp the + * time_used and return. If the entry does not match it could be + * ready to be freed. Once we have searched the bucket and we + * have not exhausted the maximum limit for the cache we will + * allocate a new entry. + */ +int +rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup) +{ + + uint32_t the_xid; + list_t *dr_bkt; + rfs4_dupreq_t *drp; + int bktdex; + + /* + * Get the XID, calculate the bucket and search to + * see if we need to replay from the cache. + */ + the_xid = req->rq_xprt->xp_xid; + bktdex = the_xid % drc->dr_hash; + + dr_bkt = (list_t *) + &(drc->dr_buckets[(the_xid % drc->dr_hash)]); + + DTRACE_PROBE3(nfss__i__drc_bktdex, + int, bktdex, + uint32_t, the_xid, + list_t *, dr_bkt); + + *dup = NULL; + + mutex_enter(&drc->lock); + /* + * Search the bucket for a matching xid and address. + */ + for (drp = list_head(dr_bkt); drp != NULL; + drp = list_next(dr_bkt, drp)) { + + if (drp->dr_xid == the_xid && + drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len && + bcmp((caddr_t)drp->dr_addr.buf, + (caddr_t)req->rq_xprt->xp_rtaddr.buf, + drp->dr_addr.len) == 0) { + + /* + * Found a match so REPLAY the Reply + */ + if (drp->dr_state == NFS4_DUP_REPLAY) { + gethrestime(&drp->dr_time_used); + mutex_exit(&drc->lock); + *dup = drp; + DTRACE_PROBE1(nfss__i__drc_replay, + rfs4_dupreq_t *, drp); + return (NFS4_DUP_REPLAY); + } + + /* + * This entry must be in transition, so return + * the 'pending' status. + */ + mutex_exit(&drc->lock); + return (NFS4_DUP_PENDING); + } + + /* + * Not a match, but maybe this entry is ready + * to be reused. + */ + if (drp->dr_state == NFS4_DUP_REPLAY && + (gethrestime_sec() > + drp->dr_time_used.tv_sec+drc->drc_ttl)) { + rfs4_dr_chstate(drp, NFS4_DUP_FREE); + list_insert_tail(&(drp->drc->dr_cache), drp); + } + } + + drp = rfs4_alloc_dr(drc); + mutex_exit(&drc->lock); + + if (drp == NULL) { + return (NFS4_DUP_ERROR); + } + + /* + * Place at the head of the list, init the state + * to NEW and clear the time used field. + */ + + drp->dr_state = NFS4_DUP_NEW; + drp->dr_time_used.tv_sec = drp->dr_time_used.tv_nsec = 0; + + /* + * If needed, resize the address buffer + */ + if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { + if (drp->dr_addr.buf != NULL) + kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); + drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; + drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP); + if (drp->dr_addr.buf == NULL) { + /* + * If the malloc fails, mark the entry + * as free and put on the tail. + */ + drp->dr_addr.maxlen = 0; + drp->dr_state = NFS4_DUP_FREE; + mutex_enter(&drc->lock); + list_insert_tail(&(drc->dr_cache), drp); + mutex_exit(&drc->lock); + return (NFS4_DUP_ERROR); + } + } + + + /* + * Copy the address. + */ + drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len; + + bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf, + (caddr_t)drp->dr_addr.buf, + drp->dr_addr.len); + + drp->dr_xid = the_xid; + drp->dr_bkt = dr_bkt; + + /* + * Insert at the head of the bucket and + * the drc lists.. + */ + mutex_enter(&drc->lock); + list_insert_head(&drc->dr_cache, drp); + list_insert_head(dr_bkt, drp); + mutex_exit(&drc->lock); + + *dup = drp; + + return (NFS4_DUP_NEW); +} + +/* + * + * This function handles the duplicate request cache, + * NULL_PROC and COMPOUND procedure calls for NFSv4; + * + * Passed into this function are:- + * + * disp A pointer to our dispatch table entry + * req The request to process + * xprt The server transport handle + * ap A pointer to the arguments + * + * + * When appropriate this function is responsible for inserting + * the reply into the duplicate cache or replaying an existing + * cached reply. + * + * dr_stat reflects the state of the duplicate request that + * has been inserted into or retrieved from the cache + * + * drp is the duplicate request entry + * + */ +int +rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, + SVCXPRT *xprt, char *ap) +{ + + COMPOUND4res res_buf, *rbp; + COMPOUND4args *cap; + + cred_t *cr = NULL; + int error = 0; + int dis_flags = 0; + int dr_stat = NFS4_NOT_DUP; + rfs4_dupreq_t *drp = NULL; + + ASSERT(disp); + + /* + * Short circuit the RPC_NULL proc. + */ + if (disp->dis_proc == rpc_null) { + if (!svc_sendreply(xprt, xdr_void, NULL)) { + return (1); + } + return (0); + } + + /* Only NFSv4 Compounds from this point onward */ + + rbp = &res_buf; + cap = (COMPOUND4args *)ap; + + /* + * Figure out the disposition of the whole COMPOUND + * and record it's IDEMPOTENTCY. + */ + rfs4_compound_flagproc(cap, &dis_flags); + + /* + * If NON-IDEMPOTENT then we need to figure out if this + * request can be replied from the duplicate cache. + * + * If this is a new request then we need to insert the + * reply into the duplicate cache. + */ + if (!(dis_flags & RPC_IDEMPOTENT)) { + /* look for a replay from the cache or allocate */ + dr_stat = rfs4_find_dr(req, nfs4_drc, &drp); + + switch (dr_stat) { + + case NFS4_DUP_ERROR: + svcerr_systemerr(xprt); + return (1); + /* NOTREACHED */ + + case NFS4_DUP_PENDING: + /* + * reply has previously been inserted into the + * duplicate cache, however the reply has + * not yet been sent via svc_sendreply() + */ + return (1); + /* NOTREACHED */ + + case NFS4_DUP_NEW: + curthread->t_flag |= T_DONTPEND; + /* NON-IDEMPOTENT proc call */ + rfs4_compound(cap, rbp, NULL, req, cr); + + curthread->t_flag &= ~T_DONTPEND; + if (curthread->t_flag & T_WOULDBLOCK) { + curthread->t_flag &= ~T_WOULDBLOCK; + /* + * mark this entry as FREE and plop + * on the end of the cache list + */ + mutex_enter(&drp->drc->lock); + rfs4_dr_chstate(drp, NFS4_DUP_FREE); + list_insert_tail(&(drp->drc->dr_cache), drp); + mutex_exit(&drp->drc->lock); + return (1); + } + drp->dr_res = res_buf; + break; + + case NFS4_DUP_REPLAY: + /* replay from the cache */ + rbp = &(drp->dr_res); + break; + } + } else { + curthread->t_flag |= T_DONTPEND; + /* IDEMPOTENT proc call */ + rfs4_compound(cap, rbp, NULL, req, cr); + + curthread->t_flag &= ~T_DONTPEND; + if (curthread->t_flag & T_WOULDBLOCK) { + curthread->t_flag &= ~T_WOULDBLOCK; + return (1); + } + } + + /* + * Send out the replayed reply or the 'real' one. + */ + if (!svc_sendreply(xprt, xdr_COMPOUND4res, (char *)rbp)) { + DTRACE_PROBE2(nfss__e__dispatch_sendfail, + struct svc_req *, xprt, + char *, rbp); + error++; + } + + /* + * If this reply was just inserted into the duplicate cache + * mark it as available for replay + */ + if (dr_stat == NFS4_DUP_NEW) { + mutex_enter(&drp->drc->lock); + rfs4_dr_chstate(drp, NFS4_DUP_REPLAY); + mutex_exit(&drp->drc->lock); + } else if (dr_stat == NFS4_NOT_DUP) { + rfs4_compound_free(rbp); + } + + return (error); +}
--- a/usr/src/uts/common/fs/nfs/nfs_server.c Fri Jun 24 18:31:53 2005 -0700 +++ b/usr/src/uts/common/fs/nfs/nfs_server.c Fri Jun 24 19:50:32 2005 -0700 @@ -81,6 +81,8 @@ #include <nfs/nfs_acl.h> #include <nfs/nfs_log.h> #include <nfs/lm.h> +#include <nfs/nfs_dispatch.h> +#include <nfs/nfs4_drc.h> #include <rpcsvc/nfsauth_prot.h> @@ -142,36 +144,6 @@ } /* - * RPC dispatch table - * Indexed by version, proc - */ - -struct rpcdisp { - void (*dis_proc)(); /* proc to call */ - xdrproc_t dis_xdrargs; /* xdr routine to get args */ - xdrproc_t dis_fastxdrargs; /* `fast' xdr routine to get args */ - int dis_argsz; /* sizeof args */ - xdrproc_t dis_xdrres; /* xdr routine to put results */ - xdrproc_t dis_fastxdrres; /* `fast' xdr routine to put results */ - int dis_ressz; /* size of results */ - void (*dis_resfree)(); /* frees space allocated by proc */ - int dis_flags; /* flags, see below */ - fhandle_t *(*dis_getfh)(); /* returns the fhandle for the req */ - void (*dis_flagproc)(); /* calculate dis_flags (nfsv4 only) */ -}; - -#define RPC_IDEMPOTENT 0x1 /* idempotent or not */ -/* - * Be very careful about which NFS procedures get the RPC_ALLOWANON bit. - * Right now, it this bit is on, we ignore the results of per NFS request - * access control. - */ -#define RPC_ALLOWANON 0x2 /* allow anonymous access */ -#define RPC_MAPRESP 0x4 /* use mapped response buffer */ -#define RPC_AVOIDWORK 0x8 /* do work avoidance for dups */ -#define RPC_PUBLICFH_OK 0x10 /* allow use of public filehandle */ - -/* * PUBLICFH_CHECK() checks if the dispatch routine supports * RPC_PUBLICFH_OK, if the filesystem is exported public, and if the * incoming request is using the public filehandle. The check duplicates @@ -187,17 +159,8 @@ (exi == exi_public && exportmatch(exi_root, \ &fh->fh_fsid, (fid_t *)&fh->fh_xlen)))) -struct rpc_disptable { - int dis_nprocs; - char **dis_procnames; - kstat_named_t **dis_proccntp; - struct rpcdisp *dis_table; -}; - static void nfs_srv_shutdown_all(int); static void rfs4_server_start(int); -static void rpc_null(caddr_t *, caddr_t *); -static void rfs_error(caddr_t *, caddr_t *); static void nullfree(void); static void rfs_dispatch(struct svc_req *, SVCXPRT *); static void acl_dispatch(struct svc_req *, SVCXPRT *); @@ -279,6 +242,8 @@ static kmutex_t nfs_server_upordown_lock; static kcondvar_t nfs_server_upordown_cv; +int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *); + /* * RDMA wait variables. */ @@ -342,6 +307,7 @@ nfs_server_upordown = NFS_SERVER_STOPPING; mutex_exit(&nfs_server_upordown_lock); rfs4_state_fini(); + rfs4_fini_drc(nfs4_drc); mutex_enter(&nfs_server_upordown_lock); nfs_server_upordown = NFS_SERVER_STOPPED; cv_signal(&nfs_server_upordown_cv); @@ -547,6 +513,9 @@ } } else { rfs4_state_init(); + nfs4_drc = rfs4_init_drc(nfs4_drc_max, + nfs4_drc_hash, + nfs4_drc_lifetime); } /* @@ -615,7 +584,7 @@ } /* ARGSUSED */ -static void +void rpc_null(caddr_t *argp, caddr_t *resp) { } @@ -663,70 +632,70 @@ xdr_void, NULL_xdrproc_t, 0, xdr_void, NULL_xdrproc_t, 0, nullfree, RPC_IDEMPOTENT, - 0, 0}, + 0}, /* RFS_GETATTR = 1 */ {rfs_getattr, xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t), xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat), nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP, - rfs_getattr_getfh, 0}, + rfs_getattr_getfh}, /* RFS_SETATTR = 2 */ {rfs_setattr, xdr_saargs, NULL_xdrproc_t, sizeof (struct nfssaargs), xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat), nullfree, RPC_MAPRESP, - rfs_setattr_getfh, 0}, + rfs_setattr_getfh}, /* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */ {rfs_error, xdr_void, NULL_xdrproc_t, 0, xdr_void, NULL_xdrproc_t, 0, nullfree, RPC_IDEMPOTENT, - 0, 0}, + 0}, /* RFS_LOOKUP = 4 */ {rfs_lookup, xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs), xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres), nullfree, RPC_IDEMPOTENT|RPC_MAPRESP|RPC_PUBLICFH_OK, - rfs_lookup_getfh, 0}, + rfs_lookup_getfh}, /* RFS_READLINK = 5 */ {rfs_readlink, xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t), xdr_rdlnres, NULL_xdrproc_t, sizeof (struct nfsrdlnres), rfs_rlfree, RPC_IDEMPOTENT, - rfs_readlink_getfh, 0}, + rfs_readlink_getfh}, /* RFS_READ = 6 */ {rfs_read, xdr_readargs, NULL_xdrproc_t, sizeof (struct nfsreadargs), xdr_rdresult, NULL_xdrproc_t, sizeof (struct nfsrdresult), rfs_rdfree, RPC_IDEMPOTENT, - rfs_read_getfh, 0}, + rfs_read_getfh}, /* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */ {rfs_error, xdr_void, NULL_xdrproc_t, 0, xdr_void, NULL_xdrproc_t, 0, nullfree, RPC_IDEMPOTENT, - 0, 0}, + 0}, /* RFS_WRITE = 8 */ {rfs_write, xdr_writeargs, NULL_xdrproc_t, sizeof (struct nfswriteargs), xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat), nullfree, RPC_MAPRESP, - rfs_write_getfh, 0}, + rfs_write_getfh}, /* RFS_CREATE = 9 */ {rfs_create, xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs), xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres), nullfree, RPC_MAPRESP, - rfs_create_getfh, 0}, + rfs_create_getfh}, /* RFS_REMOVE = 10 */ {rfs_remove, @@ -737,7 +706,7 @@ xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat), #endif nullfree, RPC_MAPRESP, - rfs_remove_getfh, 0}, + rfs_remove_getfh}, /* RFS_RENAME = 11 */ {rfs_rename, @@ -748,7 +717,7 @@ xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat), #endif nullfree, RPC_MAPRESP, - rfs_rename_getfh, 0}, + rfs_rename_getfh}, /* RFS_LINK = 12 */ {rfs_link, @@ -759,7 +728,7 @@ xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat), #endif nullfree, RPC_MAPRESP, - rfs_link_getfh, 0}, + rfs_link_getfh}, /* RFS_SYMLINK = 13 */ {rfs_symlink, @@ -770,14 +739,14 @@ xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat), #endif nullfree, RPC_MAPRESP, - rfs_symlink_getfh, 0}, + rfs_symlink_getfh}, /* RFS_MKDIR = 14 */ {rfs_mkdir, xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs), xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres), nullfree, RPC_MAPRESP, - rfs_mkdir_getfh, 0}, + rfs_mkdir_getfh}, /* RFS_RMDIR = 15 */ {rfs_rmdir, @@ -788,21 +757,21 @@ xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat), #endif nullfree, RPC_MAPRESP, - rfs_rmdir_getfh, 0}, + rfs_rmdir_getfh}, /* RFS_READDIR = 16 */ {rfs_readdir, xdr_rddirargs, NULL_xdrproc_t, sizeof (struct nfsrddirargs), xdr_putrddirres, NULL_xdrproc_t, sizeof (struct nfsrddirres), rfs_rddirfree, RPC_IDEMPOTENT, - rfs_readdir_getfh, 0}, + rfs_readdir_getfh}, /* RFS_STATFS = 17 */ {rfs_statfs, xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t), xdr_statfs, xdr_faststatfs, sizeof (struct nfsstatfs), nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP, - rfs_statfs_getfh, 0}, + rfs_statfs_getfh}, }; static char *rfscallnames_v3[] = { @@ -840,154 +809,154 @@ xdr_void, NULL_xdrproc_t, 0, xdr_void, NULL_xdrproc_t, 0, nullfree, RPC_IDEMPOTENT, - 0, 0}, + 0}, /* RFS3_GETATTR = 1 */ {rfs3_getattr, xdr_nfs_fh3, xdr_fastnfs_fh3, sizeof (GETATTR3args), xdr_GETATTR3res, NULL_xdrproc_t, sizeof (GETATTR3res), nullfree, (RPC_IDEMPOTENT | RPC_ALLOWANON), - rfs3_getattr_getfh, 0}, + rfs3_getattr_getfh}, /* RFS3_SETATTR = 2 */ {rfs3_setattr, xdr_SETATTR3args, NULL_xdrproc_t, sizeof (SETATTR3args), xdr_SETATTR3res, NULL_xdrproc_t, sizeof (SETATTR3res), nullfree, 0, - rfs3_setattr_getfh, 0}, + rfs3_setattr_getfh}, /* RFS3_LOOKUP = 3 */ {rfs3_lookup, xdr_diropargs3, NULL_xdrproc_t, sizeof (LOOKUP3args), xdr_LOOKUP3res, NULL_xdrproc_t, sizeof (LOOKUP3res), nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK), - rfs3_lookup_getfh, 0}, + rfs3_lookup_getfh}, /* RFS3_ACCESS = 4 */ {rfs3_access, xdr_ACCESS3args, NULL_xdrproc_t, sizeof (ACCESS3args), xdr_ACCESS3res, NULL_xdrproc_t, sizeof (ACCESS3res), nullfree, RPC_IDEMPOTENT, - rfs3_access_getfh, 0}, + rfs3_access_getfh}, /* RFS3_READLINK = 5 */ {rfs3_readlink, xdr_nfs_fh3, xdr_fastnfs_fh3, sizeof (READLINK3args), xdr_READLINK3res, NULL_xdrproc_t, sizeof (READLINK3res), rfs3_readlink_free, RPC_IDEMPOTENT, - rfs3_readlink_getfh, 0}, + rfs3_readlink_getfh}, /* RFS3_READ = 6 */ {rfs3_read, xdr_READ3args, NULL_xdrproc_t, sizeof (READ3args), xdr_READ3res, NULL_xdrproc_t, sizeof (READ3res), rfs3_read_free, RPC_IDEMPOTENT, - rfs3_read_getfh, 0}, + rfs3_read_getfh}, /* RFS3_WRITE = 7 */ {rfs3_write, xdr_WRITE3args, NULL_xdrproc_t, sizeof (WRITE3args), xdr_WRITE3res, NULL_xdrproc_t, sizeof (WRITE3res), nullfree, 0, - rfs3_write_getfh, 0}, + rfs3_write_getfh}, /* RFS3_CREATE = 8 */ {rfs3_create, xdr_CREATE3args, NULL_xdrproc_t, sizeof (CREATE3args), xdr_CREATE3res, NULL_xdrproc_t, sizeof (CREATE3res), nullfree, 0, - rfs3_create_getfh, 0}, + rfs3_create_getfh}, /* RFS3_MKDIR = 9 */ {rfs3_mkdir, xdr_MKDIR3args, NULL_xdrproc_t, sizeof (MKDIR3args), xdr_MKDIR3res, NULL_xdrproc_t, sizeof (MKDIR3res), nullfree, 0, - rfs3_mkdir_getfh, 0}, + rfs3_mkdir_getfh}, /* RFS3_SYMLINK = 10 */ {rfs3_symlink, xdr_SYMLINK3args, NULL_xdrproc_t, sizeof (SYMLINK3args), xdr_SYMLINK3res, NULL_xdrproc_t, sizeof (SYMLINK3res), nullfree, 0, - rfs3_symlink_getfh, 0}, + rfs3_symlink_getfh}, /* RFS3_MKNOD = 11 */ {rfs3_mknod, xdr_MKNOD3args, NULL_xdrproc_t, sizeof (MKNOD3args), xdr_MKNOD3res, NULL_xdrproc_t, sizeof (MKNOD3res), nullfree, 0, - rfs3_mknod_getfh, 0}, + rfs3_mknod_getfh}, /* RFS3_REMOVE = 12 */ {rfs3_remove, xdr_diropargs3, NULL_xdrproc_t, sizeof (REMOVE3args), xdr_REMOVE3res, NULL_xdrproc_t, sizeof (REMOVE3res), nullfree, 0, - rfs3_remove_getfh, 0}, + rfs3_remove_getfh}, /* RFS3_RMDIR = 13 */ {rfs3_rmdir, xdr_diropargs3, NULL_xdrproc_t, sizeof (RMDIR3args), xdr_RMDIR3res, NULL_xdrproc_t, sizeof (RMDIR3res), nullfree, 0, - rfs3_rmdir_getfh, 0}, + rfs3_rmdir_getfh}, /* RFS3_RENAME = 14 */ {rfs3_rename, xdr_RENAME3args, NULL_xdrproc_t, sizeof (RENAME3args), xdr_RENAME3res, NULL_xdrproc_t, sizeof (RENAME3res), nullfree, 0, - rfs3_rename_getfh, 0}, + rfs3_rename_getfh}, /* RFS3_LINK = 15 */ {rfs3_link, xdr_LINK3args, NULL_xdrproc_t, sizeof (LINK3args), xdr_LINK3res, NULL_xdrproc_t, sizeof (LINK3res), nullfree, 0, - rfs3_link_getfh, 0}, + rfs3_link_getfh}, /* RFS3_READDIR = 16 */ {rfs3_readdir, xdr_READDIR3args, NULL_xdrproc_t, sizeof (READDIR3args), xdr_READDIR3res, NULL_xdrproc_t, sizeof (READDIR3res), rfs3_readdir_free, RPC_IDEMPOTENT, - rfs3_readdir_getfh, 0}, + rfs3_readdir_getfh}, /* RFS3_READDIRPLUS = 17 */ {rfs3_readdirplus, xdr_READDIRPLUS3args, NULL_xdrproc_t, sizeof (READDIRPLUS3args), xdr_READDIRPLUS3res, NULL_xdrproc_t, sizeof (READDIRPLUS3res), rfs3_readdirplus_free, RPC_AVOIDWORK, - rfs3_readdirplus_getfh, 0}, + rfs3_readdirplus_getfh}, /* RFS3_FSSTAT = 18 */ {rfs3_fsstat, xdr_nfs_fh3, xdr_fastnfs_fh3, sizeof (FSSTAT3args), xdr_FSSTAT3res, NULL_xdrproc_t, sizeof (FSSTAT3res), nullfree, RPC_IDEMPOTENT, - rfs3_fsstat_getfh, 0}, + rfs3_fsstat_getfh}, /* RFS3_FSINFO = 19 */ {rfs3_fsinfo, xdr_nfs_fh3, xdr_fastnfs_fh3, sizeof (FSINFO3args), xdr_FSINFO3res, NULL_xdrproc_t, sizeof (FSINFO3res), nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON, - rfs3_fsinfo_getfh, 0}, + rfs3_fsinfo_getfh}, /* RFS3_PATHCONF = 20 */ {rfs3_pathconf, xdr_nfs_fh3, xdr_fastnfs_fh3, sizeof (PATHCONF3args), xdr_PATHCONF3res, NULL_xdrproc_t, sizeof (PATHCONF3res), nullfree, RPC_IDEMPOTENT, - rfs3_pathconf_getfh, 0}, + rfs3_pathconf_getfh}, /* RFS3_COMMIT = 21 */ {rfs3_commit, xdr_COMMIT3args, NULL_xdrproc_t, sizeof (COMMIT3args), xdr_COMMIT3res, NULL_xdrproc_t, sizeof (COMMIT3res), nullfree, RPC_IDEMPOTENT, - rfs3_commit_getfh, 0}, + rfs3_commit_getfh}, }; static char *rfscallnames_v4[] = { @@ -1011,14 +980,13 @@ {rpc_null, xdr_void, NULL_xdrproc_t, 0, xdr_void, NULL_xdrproc_t, 0, - nullfree, RPC_IDEMPOTENT, 0, 0}, + nullfree, RPC_IDEMPOTENT, 0}, /* RFS4_compound = 1 */ {rfs4_compound, xdr_COMPOUND4args, NULL_xdrproc_t, sizeof (COMPOUND4args), xdr_COMPOUND4res, NULL_xdrproc_t, sizeof (COMPOUND4res), - rfs4_compound_free, 0 /* XXX? RPC_IDEMPOTENT */, - 0, rfs4_compound_flagproc}, + rfs4_compound_free, 0, 0}, }; union rfs_args { @@ -1424,6 +1392,7 @@ return (FALSE); } + static void common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers, rpcvers_t max_vers, char *pgmname, @@ -1461,9 +1430,6 @@ vers = req->rq_vers; if (vers < min_vers || vers > max_vers) { - TRACE_3(TR_FAC_NFS, TR_CMN_DISPATCH_START, - "common_dispatch_start:(%S) proc_num %d xid %x", - "bad version", (int)vers, 0); svcerr_progvers(req->rq_xprt, min_vers, max_vers); error++; cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers); @@ -1473,16 +1439,12 @@ which = req->rq_proc; if (which < 0 || which >= disptable[(int)vers].dis_nprocs) { - TRACE_3(TR_FAC_NFS, TR_CMN_DISPATCH_START, - "common_dispatch_start:(%S) proc_num %d xid %x", - "bad proc", which, 0); svcerr_noproc(req->rq_xprt); error++; goto done; } (*(disptable[(int)vers].dis_proccntp))[which].value.ui64++; - DTRACE_PROBE2(nfs__dispatch, struct svc_req *, req, SVCXPRT *, xprt); disp = &disptable[(int)vers].dis_table[which]; @@ -1492,8 +1454,7 @@ */ args = (char *)&args_buf; - TRACE_0(TR_FAC_NFS, TR_SVC_GETARGS_START, - "svc_getargs_start:"); + #ifdef DEBUG if (rfs_no_fast_xdrargs || (auth_flavor == RPCSEC_GSS) || disp->dis_fastxdrargs == NULL_xdrproc_t || @@ -1505,8 +1466,6 @@ #endif bzero(args, disp->dis_argsz); if (!SVC_GETARGS(xprt, disp->dis_xdrargs, args)) { - TRACE_1(TR_FAC_NFS, TR_SVC_GETARGS_END, - "svc_getargs_end:(%S)", "bad"); svcerr_decode(xprt); error++; cmn_err(CE_NOTE, "%s: bad getargs for %u/%d", @@ -1514,22 +1473,20 @@ goto done; } } - TRACE_1(TR_FAC_NFS, TR_SVC_GETARGS_END, - "svc_getargs_end:(%S)", "good"); /* - * Calculate flags (only relevant for nfsv4 compounds) + * If Version 4 use that specific dispatch function. */ - if (disp->dis_flagproc) - (*disp->dis_flagproc)(args, &dis_flags); - - else - dis_flags = disp->dis_flags; + if (req->rq_vers == 4) { + error += rfs4_dispatch(disp, req, xprt, args); + goto done; + } + + dis_flags = disp->dis_flags; /* * Find export information and check authentication, * setting the credential if everything is ok. - * *** NFSv4 Does not do this. */ if (disp->dis_getfh != NULL) { fhandle_t *fh; @@ -1580,11 +1537,8 @@ } #endif - TRACE_0(TR_FAC_NFS, TR_CHECKEXPORT_START, - "checkexport_start:"); exi = checkexport(&fh->fh_fsid, (fid_t *)&fh->fh_xlen); - TRACE_0(TR_FAC_NFS, TR_CHECKEXPORT_END, - "checkexport_end:"); + if (exi != NULL) { publicfh_ok = PUBLICFH_CHECK(disp, exi, fh); @@ -1641,12 +1595,9 @@ case DUP_NEW: case DUP_DROP: curthread->t_flag |= T_DONTPEND; - TRACE_4(TR_FAC_NFS, TR_CMN_PROC_START, - "cmn_proc_start:%p vers %d proc_num %d req %x", - disptable, vers, which, req); + (*disp->dis_proc)(args, res, exi, req, cr); - TRACE_0(TR_FAC_NFS, TR_CMN_PROC_END, - "cmn_proc_end:"); + curthread->t_flag &= ~T_DONTPEND; if (curthread->t_flag & T_WOULDBLOCK) { curthread->t_flag &= ~T_WOULDBLOCK; @@ -1674,12 +1625,9 @@ } else { curthread->t_flag |= T_DONTPEND; - TRACE_4(TR_FAC_NFS, TR_CMN_PROC_START, - "cmn_proc_start:%p vers %d proc_num %d req %x", - disptable, vers, which, req); + (*disp->dis_proc)(args, res, exi, req, cr); - TRACE_0(TR_FAC_NFS, TR_CMN_PROC_END, - "cmn_proc_end:"); + curthread->t_flag &= ~T_DONTPEND; if (curthread->t_flag & T_WOULDBLOCK) { curthread->t_flag &= ~T_WOULDBLOCK; @@ -1731,8 +1679,6 @@ /* * Serialize and send results struct */ - TRACE_0(TR_FAC_NFS, TR_SVC_SENDREPLY_START, - "svc_sendreply_start:"); #ifdef DEBUG if (rfs_no_fast_xdrres == 0 && res != (char *)&res_buf) { #else @@ -1748,8 +1694,6 @@ error++; } } - TRACE_0(TR_FAC_NFS, TR_SVC_SENDREPLY_END, - "svc_sendreply_end:"); /* * Log if needed @@ -1766,19 +1710,13 @@ * have non-idempotent procedures with functions. */ if (disp->dis_resfree != nullfree && dupcached == FALSE) { - TRACE_0(TR_FAC_NFS, TR_SVC_FREERES_START, - "svc_freeres_start:"); (*disp->dis_resfree)(res); - TRACE_0(TR_FAC_NFS, TR_SVC_FREERES_END, - "svc_freeres_end:"); } done: /* * Free arguments struct */ - TRACE_0(TR_FAC_NFS, TR_SVC_FREEARGS_START, - "svc_freeargs_start:"); if (disp) { if (!SVC_FREEARGS(xprt, disp->dis_xdrargs, args)) { cmn_err(CE_NOTE, "%s: bad freeargs", pgmname); @@ -1791,20 +1729,12 @@ } } - TRACE_0(TR_FAC_NFS, TR_SVC_FREEARGS_END, - "svc_freeargs_end:"); - if (exi != NULL) exi_rele(exi); global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error; global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++; - - - TRACE_1(TR_FAC_NFS, TR_CMN_DISPATCH_END, - "common_dispatch_end:proc_num %d", - which); } static void
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/nfs/nfs4_drc.h Fri Jun 24 19:50:32 2005 -0700 @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _NFS4_DRC_H +#define _NFS4_DRC_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * NFSv4 Duplicate Request cache. + */ +typedef struct rfs4_drc { + kmutex_t lock; + uint32_t dr_hash; + uint32_t max_size; + uint32_t in_use; + unsigned drc_ttl; + list_t dr_cache; + list_t *dr_buckets; +} rfs4_drc_t; + +/* + * NFSv4 Duplicate request cache entry. + */ +typedef struct rfs4_dupreq { + list_node_t dr_bkt_next; + list_node_t dr_next; + list_t *dr_bkt; + rfs4_drc_t *drc; + int dr_state; + timestruc_t dr_time_created; + timestruc_t dr_time_used; + uint32_t dr_xid; + struct netbuf dr_addr; + COMPOUND4res dr_res; +} rfs4_dupreq_t; + +/* + * State of rfs4_dupreq. + */ +#define NFS4_DUP_ERROR -1 +#define NFS4_NOT_DUP 0 +#define NFS4_DUP_NEW 1 +#define NFS4_DUP_PENDING 2 +#define NFS4_DUP_FREE 3 + +#define NFS4_DUP_REPLAY 4 + +extern rfs4_drc_t *nfs4_drc; +extern unsigned nfs4_drc_lifetime; +extern uint32_t nfs4_drc_max; +extern uint32_t nfs4_drc_hash; + +rfs4_drc_t *rfs4_init_drc(uint32_t, uint32_t, unsigned); +void rfs4_fini_drc(rfs4_drc_t *); +void rfs4_dr_chstate(rfs4_dupreq_t *, int); +rfs4_dupreq_t *rfs4_alloc_dr(rfs4_drc_t *); +int rfs4_find_dr(struct svc_req *, rfs4_drc_t *, rfs4_dupreq_t **); + +#ifdef __cplusplus +} +#endif + +#endif /* _NFS4_DRC_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/nfs/nfs_dispatch.h Fri Jun 24 19:50:32 2005 -0700 @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. + * All rights reserved. + * Use is subject to license terms. + */ + + +#ifndef _NFS_DISPATCH_H +#define _NFS_DISPATCH_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * RPC dispatch table + * Indexed by version, proc + */ + +struct rpcdisp { + void (*dis_proc)(); /* proc to call */ + xdrproc_t dis_xdrargs; /* xdr routine to get args */ + xdrproc_t dis_fastxdrargs; /* `fast' xdr routine to get args */ + int dis_argsz; /* sizeof args */ + xdrproc_t dis_xdrres; /* xdr routine to put results */ + xdrproc_t dis_fastxdrres; /* `fast' xdr routine to put results */ + int dis_ressz; /* size of results */ + void (*dis_resfree)(); /* frees space allocated by proc */ + int dis_flags; /* flags, see below */ + fhandle_t *(*dis_getfh)(); /* returns the fhandle for the req */ +}; + +#define RPC_IDEMPOTENT 0x1 /* idempotent or not */ +/* + * Be very careful about which NFS procedures get the RPC_ALLOWANON bit. + * Right now, it this bit is on, we ignore the results of per NFS request + * access control. + */ +#define RPC_ALLOWANON 0x2 /* allow anonymous access */ +#define RPC_MAPRESP 0x4 /* use mapped response buffer */ +#define RPC_AVOIDWORK 0x8 /* do work avoidance for dups */ +#define RPC_PUBLICFH_OK 0x10 /* allow use of public filehandle */ + +struct rpc_disptable { + int dis_nprocs; + char **dis_procnames; + kstat_named_t **dis_proccntp; + struct rpcdisp *dis_table; +}; + +void rpc_null(caddr_t *, caddr_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _NFS_DISPATCH_H */