usr/src/uts/common/fs/nfs/nfs4_dispatch.c
changeset 74 524df0e4e452
child 76 c6ba53ffbc0e
equal deleted inserted replaced
73:d6a2308c356e 74:524df0e4e452
       
     1 /*
       
     2  * CDDL HEADER START
       
     3  *
       
     4  * The contents of this file are subject to the terms of the
       
     5  * Common Development and Distribution License, Version 1.0 only
       
     6  * (the "License").  You may not use this file except in compliance
       
     7  * with the License.
       
     8  *
       
     9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
       
    10  * or http://www.opensolaris.org/os/licensing.
       
    11  * See the License for the specific language governing permissions
       
    12  * and limitations under the License.
       
    13  *
       
    14  * When distributing Covered Code, include this CDDL HEADER in each
       
    15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
       
    16  * If applicable, add the following below this CDDL HEADER, with the
       
    17  * fields enclosed by brackets "[]" replaced with your own identifying
       
    18  * information: Portions Copyright [yyyy] [name of copyright owner]
       
    19  *
       
    20  * CDDL HEADER END
       
    21  */
       
    22 
       
    23 /*
       
    24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
       
    25  * Use is subject to license terms.
       
    26  */
       
    27 
       
    28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
       
    29 
       
    30 
       
    31 #include <rpc/types.h>
       
    32 #include <rpc/auth.h>
       
    33 #include <rpc/auth_unix.h>
       
    34 #include <rpc/auth_des.h>
       
    35 #include <rpc/svc.h>
       
    36 #include <rpc/xdr.h>
       
    37 #include <nfs/nfs4.h>
       
    38 #include <nfs/nfs_dispatch.h>
       
    39 #include <nfs/nfs4_drc.h>
       
    40 
       
    41 /*
       
    42  * This is the duplicate request cache for NFSv4
       
    43  */
       
    44 rfs4_drc_t *nfs4_drc = NULL;
       
    45 
       
    46 /*
       
    47  * How long the entry can remain in the cache
       
    48  * once it has been sent to the client and not
       
    49  * used in a reply (in seconds)
       
    50  */
       
    51 unsigned nfs4_drc_lifetime = 1;
       
    52 
       
    53 /*
       
    54  * The default size of the duplicate request cache
       
    55  */
       
    56 uint32_t nfs4_drc_max = 8 * 1024;
       
    57 
       
    58 /*
       
    59  * The number of buckets we'd like to hash the
       
    60  * replies into.. do not change this on the fly.
       
    61  */
       
    62 uint32_t nfs4_drc_hash = 541;
       
    63 
       
    64 /*
       
    65  * Initialize a duplicate request cache.
       
    66  */
       
    67 rfs4_drc_t *
       
    68 rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size, unsigned ttl)
       
    69 {
       
    70 	rfs4_drc_t *drc;
       
    71 	uint32_t   bki;
       
    72 
       
    73 	ASSERT(drc_size);
       
    74 	ASSERT(drc_hash_size);
       
    75 
       
    76 	drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP);
       
    77 
       
    78 	drc->max_size = drc_size;
       
    79 	drc->in_use = 0;
       
    80 	drc->drc_ttl = ttl;
       
    81 
       
    82 	mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL);
       
    83 
       
    84 	drc->dr_hash = drc_hash_size;
       
    85 
       
    86 	drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP);
       
    87 
       
    88 	for (bki = 0; bki < drc_hash_size; bki++) {
       
    89 		list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t),
       
    90 		    offsetof(rfs4_dupreq_t, dr_bkt_next));
       
    91 	}
       
    92 
       
    93 	list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t),
       
    94 		    offsetof(rfs4_dupreq_t, dr_next));
       
    95 
       
    96 	return (drc);
       
    97 }
       
    98 
       
    99 /*
       
   100  * Destroy a duplicate request cache.
       
   101  */
       
   102 void
       
   103 rfs4_fini_drc(rfs4_drc_t *drc)
       
   104 {
       
   105 	rfs4_dupreq_t *drp, *drp_next;
       
   106 
       
   107 	ASSERT(drc);
       
   108 
       
   109 	/* iterate over the dr_cache and free the enties */
       
   110 	for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) {
       
   111 
       
   112 		if (drp->dr_state == NFS4_DUP_REPLAY)
       
   113 			rfs4_compound_free(&(drp->dr_res));
       
   114 
       
   115 		if (drp->dr_addr.buf != NULL)
       
   116 			kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen);
       
   117 
       
   118 		drp_next = list_next(&(drc->dr_cache), drp);
       
   119 
       
   120 		kmem_free(drp, sizeof (rfs4_dupreq_t));
       
   121 	}
       
   122 
       
   123 	mutex_destroy(&drc->lock);
       
   124 	kmem_free(drc->dr_buckets,
       
   125 		sizeof (list_t)*drc->dr_hash);
       
   126 	kmem_free(drc, sizeof (rfs4_drc_t));
       
   127 }
       
   128 
       
   129 /*
       
   130  * rfs4_dr_chstate:
       
   131  *
       
   132  * Change the state of a rfs4_dupreq. If it's not in transition
       
   133  * to the FREE state, update the time used and return. If we
       
   134  * are moving to the FREE state then we need to clean up the
       
   135  * compound results and move the entry to the end of the list.
       
   136  */
       
   137 void
       
   138 rfs4_dr_chstate(rfs4_dupreq_t *drp, int new_state)
       
   139 {
       
   140 	rfs4_drc_t *drc;
       
   141 
       
   142 	ASSERT(drp);
       
   143 	ASSERT(drp->drc);
       
   144 	ASSERT(drp->dr_bkt);
       
   145 	ASSERT(MUTEX_HELD(&drp->drc->lock));
       
   146 
       
   147 	drp->dr_state = new_state;
       
   148 
       
   149 	if (new_state != NFS4_DUP_FREE) {
       
   150 		gethrestime(&drp->dr_time_used);
       
   151 		return;
       
   152 	}
       
   153 
       
   154 	drc = drp->drc;
       
   155 
       
   156 	/*
       
   157 	 * Remove entry from the bucket and
       
   158 	 * dr_cache list, free compound results.
       
   159 	 */
       
   160 	list_remove(drp->dr_bkt, drp);
       
   161 	list_remove(&(drc->dr_cache), drp);
       
   162 	rfs4_compound_free(&(drp->dr_res));
       
   163 }
       
   164 
       
   165 /*
       
   166  * rfs4_alloc_dr:
       
   167  *
       
   168  * Pick an entry off the tail -- Use if it is
       
   169  * marked NFS4_DUP_FREE, or is an entry in the
       
   170  * NFS4_DUP_REPLAY state that has timed-out...
       
   171  * Otherwise malloc a new one if we have not reached
       
   172  * our maximum cache limit.
       
   173  *
       
   174  * The list should be in time order, so no need
       
   175  * to traverse backwards looking for a timed out
       
   176  * entry, NFS4_DUP_FREE's are place on the tail.
       
   177  */
       
   178 rfs4_dupreq_t *
       
   179 rfs4_alloc_dr(rfs4_drc_t *drc)
       
   180 {
       
   181 	rfs4_dupreq_t *drp_tail, *drp = NULL;
       
   182 
       
   183 	ASSERT(drc);
       
   184 	ASSERT(MUTEX_HELD(&drc->lock));
       
   185 
       
   186 	if ((drp_tail = list_tail(&drc->dr_cache)) != NULL) {
       
   187 
       
   188 		switch (drp_tail->dr_state) {
       
   189 
       
   190 		case NFS4_DUP_FREE:
       
   191 			list_remove(&(drc->dr_cache), drp_tail);
       
   192 			DTRACE_PROBE1(nfss__i__drc_freeclaim,
       
   193 					rfs4_dupreq_t *, drp_tail);
       
   194 			return (drp_tail);
       
   195 			/* NOTREACHED */
       
   196 
       
   197 		case NFS4_DUP_REPLAY:
       
   198 			if (gethrestime_sec() >
       
   199 			    drp_tail->dr_time_used.tv_sec+drc->drc_ttl) {
       
   200 				/* this entry has timedout so grab it. */
       
   201 				rfs4_dr_chstate(drp_tail, NFS4_DUP_FREE);
       
   202 				DTRACE_PROBE1(nfss__i__drc_ttlclaim,
       
   203 					rfs4_dupreq_t *, drp_tail);
       
   204 				return (drp_tail);
       
   205 			}
       
   206 			break;
       
   207 		}
       
   208 	}
       
   209 
       
   210 	/*
       
   211 	 * Didn't find something to recycle have
       
   212 	 * we hit the cache limit ?
       
   213 	 */
       
   214 	if (drc->in_use >= drc->max_size) {
       
   215 		DTRACE_PROBE1(nfss__i__drc_full,
       
   216 			rfs4_drc_t *, drc);
       
   217 		return (NULL);
       
   218 	}
       
   219 
       
   220 
       
   221 	/* nope, so let's malloc a new one */
       
   222 	drp = kmem_zalloc(sizeof (rfs4_dupreq_t), KM_SLEEP);
       
   223 	drp->drc = drc;
       
   224 	drc->in_use++;
       
   225 	gethrestime(&drp->dr_time_created);
       
   226 	DTRACE_PROBE1(nfss__i__drc_new, rfs4_dupreq_t *, drp);
       
   227 
       
   228 	return (drp);
       
   229 }
       
   230 
       
   231 /*
       
   232  * rfs4_find_dr:
       
   233  *
       
   234  * Search for an entry in the duplicate request cache by
       
   235  * calculating the hash index based on the XID, and examining
       
   236  * the entries in the hash bucket. If we find a match stamp the
       
   237  * time_used and return. If the entry does not match it could be
       
   238  * ready to be freed. Once we have searched the bucket and we
       
   239  * have not exhausted the maximum limit for the cache we will
       
   240  * allocate a new entry.
       
   241  */
       
   242 int
       
   243 rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup)
       
   244 {
       
   245 
       
   246 	uint32_t	the_xid;
       
   247 	list_t		*dr_bkt;
       
   248 	rfs4_dupreq_t	*drp;
       
   249 	int		bktdex;
       
   250 
       
   251 	/*
       
   252 	 * Get the XID, calculate the bucket and search to
       
   253 	 * see if we need to replay from the cache.
       
   254 	 */
       
   255 	the_xid = req->rq_xprt->xp_xid;
       
   256 	bktdex = the_xid % drc->dr_hash;
       
   257 
       
   258 	dr_bkt = (list_t *)
       
   259 		&(drc->dr_buckets[(the_xid % drc->dr_hash)]);
       
   260 
       
   261 	DTRACE_PROBE3(nfss__i__drc_bktdex,
       
   262 			int, bktdex,
       
   263 			uint32_t, the_xid,
       
   264 			list_t *, dr_bkt);
       
   265 
       
   266 	*dup = NULL;
       
   267 
       
   268 	mutex_enter(&drc->lock);
       
   269 	/*
       
   270 	 * Search the bucket for a matching xid and address.
       
   271 	 */
       
   272 	for (drp = list_head(dr_bkt); drp != NULL;
       
   273 		drp = list_next(dr_bkt, drp)) {
       
   274 
       
   275 		if (drp->dr_xid == the_xid &&
       
   276 		    drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len &&
       
   277 		    bcmp((caddr_t)drp->dr_addr.buf,
       
   278 		    (caddr_t)req->rq_xprt->xp_rtaddr.buf,
       
   279 		    drp->dr_addr.len) == 0) {
       
   280 
       
   281 			/*
       
   282 			 * Found a match so REPLAY the Reply
       
   283 			 */
       
   284 			if (drp->dr_state == NFS4_DUP_REPLAY) {
       
   285 				gethrestime(&drp->dr_time_used);
       
   286 				mutex_exit(&drc->lock);
       
   287 				*dup = drp;
       
   288 				DTRACE_PROBE1(nfss__i__drc_replay,
       
   289 					rfs4_dupreq_t *, drp);
       
   290 				return (NFS4_DUP_REPLAY);
       
   291 			}
       
   292 
       
   293 			/*
       
   294 			 * This entry must be in transition, so return
       
   295 			 * the 'pending' status.
       
   296 			 */
       
   297 			mutex_exit(&drc->lock);
       
   298 			return (NFS4_DUP_PENDING);
       
   299 		}
       
   300 
       
   301 		/*
       
   302 		 * Not a match, but maybe this entry is ready
       
   303 		 * to be reused.
       
   304 		 */
       
   305 		if (drp->dr_state == NFS4_DUP_REPLAY &&
       
   306 			(gethrestime_sec() >
       
   307 			drp->dr_time_used.tv_sec+drc->drc_ttl)) {
       
   308 			rfs4_dr_chstate(drp, NFS4_DUP_FREE);
       
   309 			list_insert_tail(&(drp->drc->dr_cache), drp);
       
   310 		}
       
   311 	}
       
   312 
       
   313 	drp = rfs4_alloc_dr(drc);
       
   314 	mutex_exit(&drc->lock);
       
   315 
       
   316 	if (drp == NULL) {
       
   317 		return (NFS4_DUP_ERROR);
       
   318 	}
       
   319 
       
   320 	/*
       
   321 	 * Place at the head of the list, init the state
       
   322 	 * to NEW and clear the time used field.
       
   323 	 */
       
   324 
       
   325 	drp->dr_state = NFS4_DUP_NEW;
       
   326 	drp->dr_time_used.tv_sec = drp->dr_time_used.tv_nsec = 0;
       
   327 
       
   328 	/*
       
   329 	 * If needed, resize the address buffer
       
   330 	 */
       
   331 	if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) {
       
   332 		if (drp->dr_addr.buf != NULL)
       
   333 			kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen);
       
   334 		drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len;
       
   335 		drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP);
       
   336 		if (drp->dr_addr.buf == NULL) {
       
   337 			/*
       
   338 			 * If the malloc fails, mark the entry
       
   339 			 * as free and put on the tail.
       
   340 			 */
       
   341 			drp->dr_addr.maxlen = 0;
       
   342 			drp->dr_state = NFS4_DUP_FREE;
       
   343 			mutex_enter(&drc->lock);
       
   344 			list_insert_tail(&(drc->dr_cache), drp);
       
   345 			mutex_exit(&drc->lock);
       
   346 			return (NFS4_DUP_ERROR);
       
   347 		}
       
   348 	}
       
   349 
       
   350 
       
   351 	/*
       
   352 	 * Copy the address.
       
   353 	 */
       
   354 	drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len;
       
   355 
       
   356 	bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf,
       
   357 		(caddr_t)drp->dr_addr.buf,
       
   358 		drp->dr_addr.len);
       
   359 
       
   360 	drp->dr_xid = the_xid;
       
   361 	drp->dr_bkt = dr_bkt;
       
   362 
       
   363 	/*
       
   364 	 * Insert at the head of the bucket and
       
   365 	 * the drc lists..
       
   366 	 */
       
   367 	mutex_enter(&drc->lock);
       
   368 	list_insert_head(&drc->dr_cache, drp);
       
   369 	list_insert_head(dr_bkt, drp);
       
   370 	mutex_exit(&drc->lock);
       
   371 
       
   372 	*dup = drp;
       
   373 
       
   374 	return (NFS4_DUP_NEW);
       
   375 }
       
   376 
       
   377 /*
       
   378  *
       
   379  * This function handles the duplicate request cache,
       
   380  * NULL_PROC and COMPOUND procedure calls for NFSv4;
       
   381  *
       
   382  * Passed into this function are:-
       
   383  *
       
   384  * 	disp	A pointer to our dispatch table entry
       
   385  * 	req	The request to process
       
   386  * 	xprt	The server transport handle
       
   387  * 	ap	A pointer to the arguments
       
   388  *
       
   389  *
       
   390  * When appropriate this function is responsible for inserting
       
   391  * the reply into the duplicate cache or replaying an existing
       
   392  * cached reply.
       
   393  *
       
   394  * dr_stat 	reflects the state of the duplicate request that
       
   395  * 		has been inserted into or retrieved from the cache
       
   396  *
       
   397  * drp		is the duplicate request entry
       
   398  *
       
   399  */
       
   400 int
       
   401 rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req,
       
   402 		SVCXPRT *xprt, char *ap)
       
   403 {
       
   404 
       
   405 	COMPOUND4res res_buf, *rbp;
       
   406 	COMPOUND4args *cap;
       
   407 
       
   408 	cred_t 	*cr = NULL;
       
   409 	int	error = 0;
       
   410 	int 	dis_flags = 0;
       
   411 	int 	dr_stat = NFS4_NOT_DUP;
       
   412 	rfs4_dupreq_t *drp = NULL;
       
   413 
       
   414 	ASSERT(disp);
       
   415 
       
   416 	/*
       
   417 	 * Short circuit the RPC_NULL proc.
       
   418 	 */
       
   419 	if (disp->dis_proc == rpc_null) {
       
   420 		if (!svc_sendreply(xprt, xdr_void, NULL)) {
       
   421 			return (1);
       
   422 		}
       
   423 		return (0);
       
   424 	}
       
   425 
       
   426 	/* Only NFSv4 Compounds from this point onward */
       
   427 
       
   428 	rbp = &res_buf;
       
   429 	cap = (COMPOUND4args *)ap;
       
   430 
       
   431 	/*
       
   432 	 * Figure out the disposition of the whole COMPOUND
       
   433 	 * and record it's IDEMPOTENTCY.
       
   434 	 */
       
   435 	rfs4_compound_flagproc(cap, &dis_flags);
       
   436 
       
   437 	/*
       
   438 	 * If NON-IDEMPOTENT then we need to figure out if this
       
   439 	 * request can be replied from the duplicate cache.
       
   440 	 *
       
   441 	 * If this is a new request then we need to insert the
       
   442 	 * reply into the duplicate cache.
       
   443 	 */
       
   444 	if (!(dis_flags & RPC_IDEMPOTENT)) {
       
   445 		/* look for a replay from the cache or allocate */
       
   446 		dr_stat = rfs4_find_dr(req, nfs4_drc, &drp);
       
   447 
       
   448 		switch (dr_stat) {
       
   449 
       
   450 		case NFS4_DUP_ERROR:
       
   451 			svcerr_systemerr(xprt);
       
   452 			return (1);
       
   453 			/* NOTREACHED */
       
   454 
       
   455 		case NFS4_DUP_PENDING:
       
   456 			/*
       
   457 			 * reply has previously been inserted into the
       
   458 			 * duplicate cache, however the reply has
       
   459 			 * not yet been sent via svc_sendreply()
       
   460 			 */
       
   461 			return (1);
       
   462 			/* NOTREACHED */
       
   463 
       
   464 		case NFS4_DUP_NEW:
       
   465 			curthread->t_flag |= T_DONTPEND;
       
   466 			/* NON-IDEMPOTENT proc call */
       
   467 			rfs4_compound(cap, rbp, NULL, req, cr);
       
   468 
       
   469 			curthread->t_flag &= ~T_DONTPEND;
       
   470 			if (curthread->t_flag & T_WOULDBLOCK) {
       
   471 				curthread->t_flag &= ~T_WOULDBLOCK;
       
   472 				/*
       
   473 				 * mark this entry as FREE and plop
       
   474 				 * on the end of the cache list
       
   475 				 */
       
   476 				mutex_enter(&drp->drc->lock);
       
   477 				rfs4_dr_chstate(drp, NFS4_DUP_FREE);
       
   478 				list_insert_tail(&(drp->drc->dr_cache), drp);
       
   479 				mutex_exit(&drp->drc->lock);
       
   480 				return (1);
       
   481 			}
       
   482 			drp->dr_res = res_buf;
       
   483 			break;
       
   484 
       
   485 		case NFS4_DUP_REPLAY:
       
   486 			/* replay from the cache */
       
   487 			rbp = &(drp->dr_res);
       
   488 			break;
       
   489 		}
       
   490 	} else {
       
   491 		curthread->t_flag |= T_DONTPEND;
       
   492 		/* IDEMPOTENT proc call */
       
   493 		rfs4_compound(cap, rbp, NULL, req, cr);
       
   494 
       
   495 		curthread->t_flag &= ~T_DONTPEND;
       
   496 		if (curthread->t_flag & T_WOULDBLOCK) {
       
   497 			curthread->t_flag &= ~T_WOULDBLOCK;
       
   498 			return (1);
       
   499 		}
       
   500 	}
       
   501 
       
   502 	/*
       
   503 	 * Send out the replayed reply or the 'real' one.
       
   504 	 */
       
   505 	if (!svc_sendreply(xprt,  xdr_COMPOUND4res, (char *)rbp)) {
       
   506 		DTRACE_PROBE2(nfss__e__dispatch_sendfail,
       
   507 			struct svc_req *, xprt,
       
   508 			char *, rbp);
       
   509 		error++;
       
   510 	}
       
   511 
       
   512 	/*
       
   513 	 * If this reply was just inserted into the duplicate cache
       
   514 	 * mark it as available for replay
       
   515 	 */
       
   516 	if (dr_stat == NFS4_DUP_NEW) {
       
   517 		mutex_enter(&drp->drc->lock);
       
   518 		rfs4_dr_chstate(drp, NFS4_DUP_REPLAY);
       
   519 		mutex_exit(&drp->drc->lock);
       
   520 	} else if (dr_stat == NFS4_NOT_DUP) {
       
   521 		rfs4_compound_free(rbp);
       
   522 	}
       
   523 
       
   524 	return (error);
       
   525 }