|
1 /* |
|
2 * CDDL HEADER START |
|
3 * |
|
4 * The contents of this file are subject to the terms of the |
|
5 * Common Development and Distribution License, Version 1.0 only |
|
6 * (the "License"). You may not use this file except in compliance |
|
7 * with the License. |
|
8 * |
|
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
10 * or http://www.opensolaris.org/os/licensing. |
|
11 * See the License for the specific language governing permissions |
|
12 * and limitations under the License. |
|
13 * |
|
14 * When distributing Covered Code, include this CDDL HEADER in each |
|
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
16 * If applicable, add the following below this CDDL HEADER, with the |
|
17 * fields enclosed by brackets "[]" replaced with your own identifying |
|
18 * information: Portions Copyright [yyyy] [name of copyright owner] |
|
19 * |
|
20 * CDDL HEADER END |
|
21 */ |
|
22 |
|
23 /* |
|
24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. |
|
25 * Use is subject to license terms. |
|
26 */ |
|
27 |
|
28 #pragma ident "%Z%%M% %I% %E% SMI" |
|
29 |
|
30 |
|
31 #include <rpc/types.h> |
|
32 #include <rpc/auth.h> |
|
33 #include <rpc/auth_unix.h> |
|
34 #include <rpc/auth_des.h> |
|
35 #include <rpc/svc.h> |
|
36 #include <rpc/xdr.h> |
|
37 #include <nfs/nfs4.h> |
|
38 #include <nfs/nfs_dispatch.h> |
|
39 #include <nfs/nfs4_drc.h> |
|
40 |
|
41 /* |
|
42 * This is the duplicate request cache for NFSv4 |
|
43 */ |
|
44 rfs4_drc_t *nfs4_drc = NULL; |
|
45 |
|
46 /* |
|
47 * How long the entry can remain in the cache |
|
48 * once it has been sent to the client and not |
|
49 * used in a reply (in seconds) |
|
50 */ |
|
51 unsigned nfs4_drc_lifetime = 1; |
|
52 |
|
53 /* |
|
54 * The default size of the duplicate request cache |
|
55 */ |
|
56 uint32_t nfs4_drc_max = 8 * 1024; |
|
57 |
|
58 /* |
|
59 * The number of buckets we'd like to hash the |
|
60 * replies into.. do not change this on the fly. |
|
61 */ |
|
62 uint32_t nfs4_drc_hash = 541; |
|
63 |
|
64 /* |
|
65 * Initialize a duplicate request cache. |
|
66 */ |
|
67 rfs4_drc_t * |
|
68 rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size, unsigned ttl) |
|
69 { |
|
70 rfs4_drc_t *drc; |
|
71 uint32_t bki; |
|
72 |
|
73 ASSERT(drc_size); |
|
74 ASSERT(drc_hash_size); |
|
75 |
|
76 drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP); |
|
77 |
|
78 drc->max_size = drc_size; |
|
79 drc->in_use = 0; |
|
80 drc->drc_ttl = ttl; |
|
81 |
|
82 mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL); |
|
83 |
|
84 drc->dr_hash = drc_hash_size; |
|
85 |
|
86 drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP); |
|
87 |
|
88 for (bki = 0; bki < drc_hash_size; bki++) { |
|
89 list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t), |
|
90 offsetof(rfs4_dupreq_t, dr_bkt_next)); |
|
91 } |
|
92 |
|
93 list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t), |
|
94 offsetof(rfs4_dupreq_t, dr_next)); |
|
95 |
|
96 return (drc); |
|
97 } |
|
98 |
|
99 /* |
|
100 * Destroy a duplicate request cache. |
|
101 */ |
|
102 void |
|
103 rfs4_fini_drc(rfs4_drc_t *drc) |
|
104 { |
|
105 rfs4_dupreq_t *drp, *drp_next; |
|
106 |
|
107 ASSERT(drc); |
|
108 |
|
109 /* iterate over the dr_cache and free the enties */ |
|
110 for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) { |
|
111 |
|
112 if (drp->dr_state == NFS4_DUP_REPLAY) |
|
113 rfs4_compound_free(&(drp->dr_res)); |
|
114 |
|
115 if (drp->dr_addr.buf != NULL) |
|
116 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); |
|
117 |
|
118 drp_next = list_next(&(drc->dr_cache), drp); |
|
119 |
|
120 kmem_free(drp, sizeof (rfs4_dupreq_t)); |
|
121 } |
|
122 |
|
123 mutex_destroy(&drc->lock); |
|
124 kmem_free(drc->dr_buckets, |
|
125 sizeof (list_t)*drc->dr_hash); |
|
126 kmem_free(drc, sizeof (rfs4_drc_t)); |
|
127 } |
|
128 |
|
129 /* |
|
130 * rfs4_dr_chstate: |
|
131 * |
|
132 * Change the state of a rfs4_dupreq. If it's not in transition |
|
133 * to the FREE state, update the time used and return. If we |
|
134 * are moving to the FREE state then we need to clean up the |
|
135 * compound results and move the entry to the end of the list. |
|
136 */ |
|
137 void |
|
138 rfs4_dr_chstate(rfs4_dupreq_t *drp, int new_state) |
|
139 { |
|
140 rfs4_drc_t *drc; |
|
141 |
|
142 ASSERT(drp); |
|
143 ASSERT(drp->drc); |
|
144 ASSERT(drp->dr_bkt); |
|
145 ASSERT(MUTEX_HELD(&drp->drc->lock)); |
|
146 |
|
147 drp->dr_state = new_state; |
|
148 |
|
149 if (new_state != NFS4_DUP_FREE) { |
|
150 gethrestime(&drp->dr_time_used); |
|
151 return; |
|
152 } |
|
153 |
|
154 drc = drp->drc; |
|
155 |
|
156 /* |
|
157 * Remove entry from the bucket and |
|
158 * dr_cache list, free compound results. |
|
159 */ |
|
160 list_remove(drp->dr_bkt, drp); |
|
161 list_remove(&(drc->dr_cache), drp); |
|
162 rfs4_compound_free(&(drp->dr_res)); |
|
163 } |
|
164 |
|
165 /* |
|
166 * rfs4_alloc_dr: |
|
167 * |
|
168 * Pick an entry off the tail -- Use if it is |
|
169 * marked NFS4_DUP_FREE, or is an entry in the |
|
170 * NFS4_DUP_REPLAY state that has timed-out... |
|
171 * Otherwise malloc a new one if we have not reached |
|
172 * our maximum cache limit. |
|
173 * |
|
174 * The list should be in time order, so no need |
|
175 * to traverse backwards looking for a timed out |
|
176 * entry, NFS4_DUP_FREE's are place on the tail. |
|
177 */ |
|
178 rfs4_dupreq_t * |
|
179 rfs4_alloc_dr(rfs4_drc_t *drc) |
|
180 { |
|
181 rfs4_dupreq_t *drp_tail, *drp = NULL; |
|
182 |
|
183 ASSERT(drc); |
|
184 ASSERT(MUTEX_HELD(&drc->lock)); |
|
185 |
|
186 if ((drp_tail = list_tail(&drc->dr_cache)) != NULL) { |
|
187 |
|
188 switch (drp_tail->dr_state) { |
|
189 |
|
190 case NFS4_DUP_FREE: |
|
191 list_remove(&(drc->dr_cache), drp_tail); |
|
192 DTRACE_PROBE1(nfss__i__drc_freeclaim, |
|
193 rfs4_dupreq_t *, drp_tail); |
|
194 return (drp_tail); |
|
195 /* NOTREACHED */ |
|
196 |
|
197 case NFS4_DUP_REPLAY: |
|
198 if (gethrestime_sec() > |
|
199 drp_tail->dr_time_used.tv_sec+drc->drc_ttl) { |
|
200 /* this entry has timedout so grab it. */ |
|
201 rfs4_dr_chstate(drp_tail, NFS4_DUP_FREE); |
|
202 DTRACE_PROBE1(nfss__i__drc_ttlclaim, |
|
203 rfs4_dupreq_t *, drp_tail); |
|
204 return (drp_tail); |
|
205 } |
|
206 break; |
|
207 } |
|
208 } |
|
209 |
|
210 /* |
|
211 * Didn't find something to recycle have |
|
212 * we hit the cache limit ? |
|
213 */ |
|
214 if (drc->in_use >= drc->max_size) { |
|
215 DTRACE_PROBE1(nfss__i__drc_full, |
|
216 rfs4_drc_t *, drc); |
|
217 return (NULL); |
|
218 } |
|
219 |
|
220 |
|
221 /* nope, so let's malloc a new one */ |
|
222 drp = kmem_zalloc(sizeof (rfs4_dupreq_t), KM_SLEEP); |
|
223 drp->drc = drc; |
|
224 drc->in_use++; |
|
225 gethrestime(&drp->dr_time_created); |
|
226 DTRACE_PROBE1(nfss__i__drc_new, rfs4_dupreq_t *, drp); |
|
227 |
|
228 return (drp); |
|
229 } |
|
230 |
|
231 /* |
|
232 * rfs4_find_dr: |
|
233 * |
|
234 * Search for an entry in the duplicate request cache by |
|
235 * calculating the hash index based on the XID, and examining |
|
236 * the entries in the hash bucket. If we find a match stamp the |
|
237 * time_used and return. If the entry does not match it could be |
|
238 * ready to be freed. Once we have searched the bucket and we |
|
239 * have not exhausted the maximum limit for the cache we will |
|
240 * allocate a new entry. |
|
241 */ |
|
242 int |
|
243 rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup) |
|
244 { |
|
245 |
|
246 uint32_t the_xid; |
|
247 list_t *dr_bkt; |
|
248 rfs4_dupreq_t *drp; |
|
249 int bktdex; |
|
250 |
|
251 /* |
|
252 * Get the XID, calculate the bucket and search to |
|
253 * see if we need to replay from the cache. |
|
254 */ |
|
255 the_xid = req->rq_xprt->xp_xid; |
|
256 bktdex = the_xid % drc->dr_hash; |
|
257 |
|
258 dr_bkt = (list_t *) |
|
259 &(drc->dr_buckets[(the_xid % drc->dr_hash)]); |
|
260 |
|
261 DTRACE_PROBE3(nfss__i__drc_bktdex, |
|
262 int, bktdex, |
|
263 uint32_t, the_xid, |
|
264 list_t *, dr_bkt); |
|
265 |
|
266 *dup = NULL; |
|
267 |
|
268 mutex_enter(&drc->lock); |
|
269 /* |
|
270 * Search the bucket for a matching xid and address. |
|
271 */ |
|
272 for (drp = list_head(dr_bkt); drp != NULL; |
|
273 drp = list_next(dr_bkt, drp)) { |
|
274 |
|
275 if (drp->dr_xid == the_xid && |
|
276 drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len && |
|
277 bcmp((caddr_t)drp->dr_addr.buf, |
|
278 (caddr_t)req->rq_xprt->xp_rtaddr.buf, |
|
279 drp->dr_addr.len) == 0) { |
|
280 |
|
281 /* |
|
282 * Found a match so REPLAY the Reply |
|
283 */ |
|
284 if (drp->dr_state == NFS4_DUP_REPLAY) { |
|
285 gethrestime(&drp->dr_time_used); |
|
286 mutex_exit(&drc->lock); |
|
287 *dup = drp; |
|
288 DTRACE_PROBE1(nfss__i__drc_replay, |
|
289 rfs4_dupreq_t *, drp); |
|
290 return (NFS4_DUP_REPLAY); |
|
291 } |
|
292 |
|
293 /* |
|
294 * This entry must be in transition, so return |
|
295 * the 'pending' status. |
|
296 */ |
|
297 mutex_exit(&drc->lock); |
|
298 return (NFS4_DUP_PENDING); |
|
299 } |
|
300 |
|
301 /* |
|
302 * Not a match, but maybe this entry is ready |
|
303 * to be reused. |
|
304 */ |
|
305 if (drp->dr_state == NFS4_DUP_REPLAY && |
|
306 (gethrestime_sec() > |
|
307 drp->dr_time_used.tv_sec+drc->drc_ttl)) { |
|
308 rfs4_dr_chstate(drp, NFS4_DUP_FREE); |
|
309 list_insert_tail(&(drp->drc->dr_cache), drp); |
|
310 } |
|
311 } |
|
312 |
|
313 drp = rfs4_alloc_dr(drc); |
|
314 mutex_exit(&drc->lock); |
|
315 |
|
316 if (drp == NULL) { |
|
317 return (NFS4_DUP_ERROR); |
|
318 } |
|
319 |
|
320 /* |
|
321 * Place at the head of the list, init the state |
|
322 * to NEW and clear the time used field. |
|
323 */ |
|
324 |
|
325 drp->dr_state = NFS4_DUP_NEW; |
|
326 drp->dr_time_used.tv_sec = drp->dr_time_used.tv_nsec = 0; |
|
327 |
|
328 /* |
|
329 * If needed, resize the address buffer |
|
330 */ |
|
331 if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { |
|
332 if (drp->dr_addr.buf != NULL) |
|
333 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); |
|
334 drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; |
|
335 drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP); |
|
336 if (drp->dr_addr.buf == NULL) { |
|
337 /* |
|
338 * If the malloc fails, mark the entry |
|
339 * as free and put on the tail. |
|
340 */ |
|
341 drp->dr_addr.maxlen = 0; |
|
342 drp->dr_state = NFS4_DUP_FREE; |
|
343 mutex_enter(&drc->lock); |
|
344 list_insert_tail(&(drc->dr_cache), drp); |
|
345 mutex_exit(&drc->lock); |
|
346 return (NFS4_DUP_ERROR); |
|
347 } |
|
348 } |
|
349 |
|
350 |
|
351 /* |
|
352 * Copy the address. |
|
353 */ |
|
354 drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len; |
|
355 |
|
356 bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf, |
|
357 (caddr_t)drp->dr_addr.buf, |
|
358 drp->dr_addr.len); |
|
359 |
|
360 drp->dr_xid = the_xid; |
|
361 drp->dr_bkt = dr_bkt; |
|
362 |
|
363 /* |
|
364 * Insert at the head of the bucket and |
|
365 * the drc lists.. |
|
366 */ |
|
367 mutex_enter(&drc->lock); |
|
368 list_insert_head(&drc->dr_cache, drp); |
|
369 list_insert_head(dr_bkt, drp); |
|
370 mutex_exit(&drc->lock); |
|
371 |
|
372 *dup = drp; |
|
373 |
|
374 return (NFS4_DUP_NEW); |
|
375 } |
|
376 |
|
377 /* |
|
378 * |
|
379 * This function handles the duplicate request cache, |
|
380 * NULL_PROC and COMPOUND procedure calls for NFSv4; |
|
381 * |
|
382 * Passed into this function are:- |
|
383 * |
|
384 * disp A pointer to our dispatch table entry |
|
385 * req The request to process |
|
386 * xprt The server transport handle |
|
387 * ap A pointer to the arguments |
|
388 * |
|
389 * |
|
390 * When appropriate this function is responsible for inserting |
|
391 * the reply into the duplicate cache or replaying an existing |
|
392 * cached reply. |
|
393 * |
|
394 * dr_stat reflects the state of the duplicate request that |
|
395 * has been inserted into or retrieved from the cache |
|
396 * |
|
397 * drp is the duplicate request entry |
|
398 * |
|
399 */ |
|
400 int |
|
401 rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, |
|
402 SVCXPRT *xprt, char *ap) |
|
403 { |
|
404 |
|
405 COMPOUND4res res_buf, *rbp; |
|
406 COMPOUND4args *cap; |
|
407 |
|
408 cred_t *cr = NULL; |
|
409 int error = 0; |
|
410 int dis_flags = 0; |
|
411 int dr_stat = NFS4_NOT_DUP; |
|
412 rfs4_dupreq_t *drp = NULL; |
|
413 |
|
414 ASSERT(disp); |
|
415 |
|
416 /* |
|
417 * Short circuit the RPC_NULL proc. |
|
418 */ |
|
419 if (disp->dis_proc == rpc_null) { |
|
420 if (!svc_sendreply(xprt, xdr_void, NULL)) { |
|
421 return (1); |
|
422 } |
|
423 return (0); |
|
424 } |
|
425 |
|
426 /* Only NFSv4 Compounds from this point onward */ |
|
427 |
|
428 rbp = &res_buf; |
|
429 cap = (COMPOUND4args *)ap; |
|
430 |
|
431 /* |
|
432 * Figure out the disposition of the whole COMPOUND |
|
433 * and record it's IDEMPOTENTCY. |
|
434 */ |
|
435 rfs4_compound_flagproc(cap, &dis_flags); |
|
436 |
|
437 /* |
|
438 * If NON-IDEMPOTENT then we need to figure out if this |
|
439 * request can be replied from the duplicate cache. |
|
440 * |
|
441 * If this is a new request then we need to insert the |
|
442 * reply into the duplicate cache. |
|
443 */ |
|
444 if (!(dis_flags & RPC_IDEMPOTENT)) { |
|
445 /* look for a replay from the cache or allocate */ |
|
446 dr_stat = rfs4_find_dr(req, nfs4_drc, &drp); |
|
447 |
|
448 switch (dr_stat) { |
|
449 |
|
450 case NFS4_DUP_ERROR: |
|
451 svcerr_systemerr(xprt); |
|
452 return (1); |
|
453 /* NOTREACHED */ |
|
454 |
|
455 case NFS4_DUP_PENDING: |
|
456 /* |
|
457 * reply has previously been inserted into the |
|
458 * duplicate cache, however the reply has |
|
459 * not yet been sent via svc_sendreply() |
|
460 */ |
|
461 return (1); |
|
462 /* NOTREACHED */ |
|
463 |
|
464 case NFS4_DUP_NEW: |
|
465 curthread->t_flag |= T_DONTPEND; |
|
466 /* NON-IDEMPOTENT proc call */ |
|
467 rfs4_compound(cap, rbp, NULL, req, cr); |
|
468 |
|
469 curthread->t_flag &= ~T_DONTPEND; |
|
470 if (curthread->t_flag & T_WOULDBLOCK) { |
|
471 curthread->t_flag &= ~T_WOULDBLOCK; |
|
472 /* |
|
473 * mark this entry as FREE and plop |
|
474 * on the end of the cache list |
|
475 */ |
|
476 mutex_enter(&drp->drc->lock); |
|
477 rfs4_dr_chstate(drp, NFS4_DUP_FREE); |
|
478 list_insert_tail(&(drp->drc->dr_cache), drp); |
|
479 mutex_exit(&drp->drc->lock); |
|
480 return (1); |
|
481 } |
|
482 drp->dr_res = res_buf; |
|
483 break; |
|
484 |
|
485 case NFS4_DUP_REPLAY: |
|
486 /* replay from the cache */ |
|
487 rbp = &(drp->dr_res); |
|
488 break; |
|
489 } |
|
490 } else { |
|
491 curthread->t_flag |= T_DONTPEND; |
|
492 /* IDEMPOTENT proc call */ |
|
493 rfs4_compound(cap, rbp, NULL, req, cr); |
|
494 |
|
495 curthread->t_flag &= ~T_DONTPEND; |
|
496 if (curthread->t_flag & T_WOULDBLOCK) { |
|
497 curthread->t_flag &= ~T_WOULDBLOCK; |
|
498 return (1); |
|
499 } |
|
500 } |
|
501 |
|
502 /* |
|
503 * Send out the replayed reply or the 'real' one. |
|
504 */ |
|
505 if (!svc_sendreply(xprt, xdr_COMPOUND4res, (char *)rbp)) { |
|
506 DTRACE_PROBE2(nfss__e__dispatch_sendfail, |
|
507 struct svc_req *, xprt, |
|
508 char *, rbp); |
|
509 error++; |
|
510 } |
|
511 |
|
512 /* |
|
513 * If this reply was just inserted into the duplicate cache |
|
514 * mark it as available for replay |
|
515 */ |
|
516 if (dr_stat == NFS4_DUP_NEW) { |
|
517 mutex_enter(&drp->drc->lock); |
|
518 rfs4_dr_chstate(drp, NFS4_DUP_REPLAY); |
|
519 mutex_exit(&drp->drc->lock); |
|
520 } else if (dr_stat == NFS4_NOT_DUP) { |
|
521 rfs4_compound_free(rbp); |
|
522 } |
|
523 |
|
524 return (error); |
|
525 } |