author | ek110237 |
Mon, 31 Oct 2005 22:53:57 -0800 | |
changeset 806 | 849fb015aa25 |
parent 76 | c6ba53ffbc0e |
child 1267 | 4e080122b679 |
permissions | -rw-r--r-- |
74 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
5 |
* Common Development and Distribution License, Version 1.0 only |
|
6 |
* (the "License"). You may not use this file except in compliance |
|
7 |
* with the License. |
|
8 |
* |
|
9 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
10 |
* or http://www.opensolaris.org/os/licensing. |
|
11 |
* See the License for the specific language governing permissions |
|
12 |
* and limitations under the License. |
|
13 |
* |
|
14 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
15 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
16 |
* If applicable, add the following below this CDDL HEADER, with the |
|
17 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
18 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
19 |
* |
|
20 |
* CDDL HEADER END |
|
21 |
*/ |
|
22 |
||
23 |
/* |
|
24 |
* Copyright 2005 Sun Microsystems, Inc. All rights reserved. |
|
25 |
* Use is subject to license terms. |
|
26 |
*/ |
|
27 |
||
28 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
29 |
||
76
c6ba53ffbc0e
6257280 assertion triggered in nfs/rpc code (fix non-debug build)
rg137905
parents:
74
diff
changeset
|
30 |
#include <sys/systm.h> |
c6ba53ffbc0e
6257280 assertion triggered in nfs/rpc code (fix non-debug build)
rg137905
parents:
74
diff
changeset
|
31 |
#include <sys/sdt.h> |
74 | 32 |
#include <rpc/types.h> |
33 |
#include <rpc/auth.h> |
|
34 |
#include <rpc/auth_unix.h> |
|
35 |
#include <rpc/auth_des.h> |
|
36 |
#include <rpc/svc.h> |
|
37 |
#include <rpc/xdr.h> |
|
38 |
#include <nfs/nfs4.h> |
|
39 |
#include <nfs/nfs_dispatch.h> |
|
40 |
#include <nfs/nfs4_drc.h> |
|
41 |
||
42 |
/* |
|
43 |
* This is the duplicate request cache for NFSv4 |
|
44 |
*/ |
|
45 |
rfs4_drc_t *nfs4_drc = NULL; |
|
46 |
||
47 |
/* |
|
48 |
* How long the entry can remain in the cache |
|
49 |
* once it has been sent to the client and not |
|
50 |
* used in a reply (in seconds) |
|
51 |
*/ |
|
52 |
unsigned nfs4_drc_lifetime = 1; |
|
53 |
||
54 |
/* |
|
55 |
* The default size of the duplicate request cache |
|
56 |
*/ |
|
57 |
uint32_t nfs4_drc_max = 8 * 1024; |
|
58 |
||
59 |
/* |
|
60 |
* The number of buckets we'd like to hash the |
|
61 |
* replies into.. do not change this on the fly. |
|
62 |
*/ |
|
63 |
uint32_t nfs4_drc_hash = 541; |
|
64 |
||
65 |
/* |
|
66 |
* Initialize a duplicate request cache. |
|
67 |
*/ |
|
68 |
rfs4_drc_t * |
|
69 |
rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size, unsigned ttl) |
|
70 |
{ |
|
71 |
rfs4_drc_t *drc; |
|
72 |
uint32_t bki; |
|
73 |
||
74 |
ASSERT(drc_size); |
|
75 |
ASSERT(drc_hash_size); |
|
76 |
||
77 |
drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP); |
|
78 |
||
79 |
drc->max_size = drc_size; |
|
80 |
drc->in_use = 0; |
|
81 |
drc->drc_ttl = ttl; |
|
82 |
||
83 |
mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL); |
|
84 |
||
85 |
drc->dr_hash = drc_hash_size; |
|
86 |
||
87 |
drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP); |
|
88 |
||
89 |
for (bki = 0; bki < drc_hash_size; bki++) { |
|
90 |
list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t), |
|
91 |
offsetof(rfs4_dupreq_t, dr_bkt_next)); |
|
92 |
} |
|
93 |
||
94 |
list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t), |
|
95 |
offsetof(rfs4_dupreq_t, dr_next)); |
|
96 |
||
97 |
return (drc); |
|
98 |
} |
|
99 |
||
100 |
/* |
|
101 |
* Destroy a duplicate request cache. |
|
102 |
*/ |
|
103 |
void |
|
104 |
rfs4_fini_drc(rfs4_drc_t *drc) |
|
105 |
{ |
|
106 |
rfs4_dupreq_t *drp, *drp_next; |
|
107 |
||
108 |
ASSERT(drc); |
|
109 |
||
110 |
/* iterate over the dr_cache and free the enties */ |
|
111 |
for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) { |
|
112 |
||
113 |
if (drp->dr_state == NFS4_DUP_REPLAY) |
|
114 |
rfs4_compound_free(&(drp->dr_res)); |
|
115 |
||
116 |
if (drp->dr_addr.buf != NULL) |
|
117 |
kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); |
|
118 |
||
119 |
drp_next = list_next(&(drc->dr_cache), drp); |
|
120 |
||
121 |
kmem_free(drp, sizeof (rfs4_dupreq_t)); |
|
122 |
} |
|
123 |
||
124 |
mutex_destroy(&drc->lock); |
|
125 |
kmem_free(drc->dr_buckets, |
|
126 |
sizeof (list_t)*drc->dr_hash); |
|
127 |
kmem_free(drc, sizeof (rfs4_drc_t)); |
|
128 |
} |
|
129 |
||
130 |
/* |
|
131 |
* rfs4_dr_chstate: |
|
132 |
* |
|
133 |
* Change the state of a rfs4_dupreq. If it's not in transition |
|
134 |
* to the FREE state, update the time used and return. If we |
|
135 |
* are moving to the FREE state then we need to clean up the |
|
136 |
* compound results and move the entry to the end of the list. |
|
137 |
*/ |
|
138 |
void |
|
139 |
rfs4_dr_chstate(rfs4_dupreq_t *drp, int new_state) |
|
140 |
{ |
|
141 |
rfs4_drc_t *drc; |
|
142 |
||
143 |
ASSERT(drp); |
|
144 |
ASSERT(drp->drc); |
|
145 |
ASSERT(drp->dr_bkt); |
|
146 |
ASSERT(MUTEX_HELD(&drp->drc->lock)); |
|
147 |
||
148 |
drp->dr_state = new_state; |
|
149 |
||
150 |
if (new_state != NFS4_DUP_FREE) { |
|
151 |
gethrestime(&drp->dr_time_used); |
|
152 |
return; |
|
153 |
} |
|
154 |
||
155 |
drc = drp->drc; |
|
156 |
||
157 |
/* |
|
158 |
* Remove entry from the bucket and |
|
159 |
* dr_cache list, free compound results. |
|
160 |
*/ |
|
161 |
list_remove(drp->dr_bkt, drp); |
|
162 |
list_remove(&(drc->dr_cache), drp); |
|
163 |
rfs4_compound_free(&(drp->dr_res)); |
|
164 |
} |
|
165 |
||
166 |
/* |
|
167 |
* rfs4_alloc_dr: |
|
168 |
* |
|
169 |
* Pick an entry off the tail -- Use if it is |
|
170 |
* marked NFS4_DUP_FREE, or is an entry in the |
|
171 |
* NFS4_DUP_REPLAY state that has timed-out... |
|
172 |
* Otherwise malloc a new one if we have not reached |
|
173 |
* our maximum cache limit. |
|
174 |
* |
|
175 |
* The list should be in time order, so no need |
|
176 |
* to traverse backwards looking for a timed out |
|
177 |
* entry, NFS4_DUP_FREE's are place on the tail. |
|
178 |
*/ |
|
179 |
rfs4_dupreq_t * |
|
180 |
rfs4_alloc_dr(rfs4_drc_t *drc) |
|
181 |
{ |
|
182 |
rfs4_dupreq_t *drp_tail, *drp = NULL; |
|
183 |
||
184 |
ASSERT(drc); |
|
185 |
ASSERT(MUTEX_HELD(&drc->lock)); |
|
186 |
||
187 |
if ((drp_tail = list_tail(&drc->dr_cache)) != NULL) { |
|
188 |
||
189 |
switch (drp_tail->dr_state) { |
|
190 |
||
191 |
case NFS4_DUP_FREE: |
|
192 |
list_remove(&(drc->dr_cache), drp_tail); |
|
193 |
DTRACE_PROBE1(nfss__i__drc_freeclaim, |
|
194 |
rfs4_dupreq_t *, drp_tail); |
|
195 |
return (drp_tail); |
|
196 |
/* NOTREACHED */ |
|
197 |
||
198 |
case NFS4_DUP_REPLAY: |
|
199 |
if (gethrestime_sec() > |
|
200 |
drp_tail->dr_time_used.tv_sec+drc->drc_ttl) { |
|
201 |
/* this entry has timedout so grab it. */ |
|
202 |
rfs4_dr_chstate(drp_tail, NFS4_DUP_FREE); |
|
203 |
DTRACE_PROBE1(nfss__i__drc_ttlclaim, |
|
204 |
rfs4_dupreq_t *, drp_tail); |
|
205 |
return (drp_tail); |
|
206 |
} |
|
207 |
break; |
|
208 |
} |
|
209 |
} |
|
210 |
||
211 |
/* |
|
212 |
* Didn't find something to recycle have |
|
213 |
* we hit the cache limit ? |
|
214 |
*/ |
|
215 |
if (drc->in_use >= drc->max_size) { |
|
216 |
DTRACE_PROBE1(nfss__i__drc_full, |
|
217 |
rfs4_drc_t *, drc); |
|
218 |
return (NULL); |
|
219 |
} |
|
220 |
||
221 |
||
222 |
/* nope, so let's malloc a new one */ |
|
223 |
drp = kmem_zalloc(sizeof (rfs4_dupreq_t), KM_SLEEP); |
|
224 |
drp->drc = drc; |
|
225 |
drc->in_use++; |
|
226 |
gethrestime(&drp->dr_time_created); |
|
227 |
DTRACE_PROBE1(nfss__i__drc_new, rfs4_dupreq_t *, drp); |
|
228 |
||
229 |
return (drp); |
|
230 |
} |
|
231 |
||
232 |
/* |
|
233 |
* rfs4_find_dr: |
|
234 |
* |
|
235 |
* Search for an entry in the duplicate request cache by |
|
236 |
* calculating the hash index based on the XID, and examining |
|
237 |
* the entries in the hash bucket. If we find a match stamp the |
|
238 |
* time_used and return. If the entry does not match it could be |
|
239 |
* ready to be freed. Once we have searched the bucket and we |
|
240 |
* have not exhausted the maximum limit for the cache we will |
|
241 |
* allocate a new entry. |
|
242 |
*/ |
|
243 |
int |
|
244 |
rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup) |
|
245 |
{ |
|
246 |
||
247 |
uint32_t the_xid; |
|
248 |
list_t *dr_bkt; |
|
249 |
rfs4_dupreq_t *drp; |
|
250 |
int bktdex; |
|
251 |
||
252 |
/* |
|
253 |
* Get the XID, calculate the bucket and search to |
|
254 |
* see if we need to replay from the cache. |
|
255 |
*/ |
|
256 |
the_xid = req->rq_xprt->xp_xid; |
|
257 |
bktdex = the_xid % drc->dr_hash; |
|
258 |
||
259 |
dr_bkt = (list_t *) |
|
260 |
&(drc->dr_buckets[(the_xid % drc->dr_hash)]); |
|
261 |
||
262 |
DTRACE_PROBE3(nfss__i__drc_bktdex, |
|
263 |
int, bktdex, |
|
264 |
uint32_t, the_xid, |
|
265 |
list_t *, dr_bkt); |
|
266 |
||
267 |
*dup = NULL; |
|
268 |
||
269 |
mutex_enter(&drc->lock); |
|
270 |
/* |
|
271 |
* Search the bucket for a matching xid and address. |
|
272 |
*/ |
|
273 |
for (drp = list_head(dr_bkt); drp != NULL; |
|
274 |
drp = list_next(dr_bkt, drp)) { |
|
275 |
||
276 |
if (drp->dr_xid == the_xid && |
|
277 |
drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len && |
|
278 |
bcmp((caddr_t)drp->dr_addr.buf, |
|
279 |
(caddr_t)req->rq_xprt->xp_rtaddr.buf, |
|
280 |
drp->dr_addr.len) == 0) { |
|
281 |
||
282 |
/* |
|
283 |
* Found a match so REPLAY the Reply |
|
284 |
*/ |
|
285 |
if (drp->dr_state == NFS4_DUP_REPLAY) { |
|
286 |
gethrestime(&drp->dr_time_used); |
|
287 |
mutex_exit(&drc->lock); |
|
288 |
*dup = drp; |
|
289 |
DTRACE_PROBE1(nfss__i__drc_replay, |
|
290 |
rfs4_dupreq_t *, drp); |
|
291 |
return (NFS4_DUP_REPLAY); |
|
292 |
} |
|
293 |
||
294 |
/* |
|
295 |
* This entry must be in transition, so return |
|
296 |
* the 'pending' status. |
|
297 |
*/ |
|
298 |
mutex_exit(&drc->lock); |
|
299 |
return (NFS4_DUP_PENDING); |
|
300 |
} |
|
301 |
||
302 |
/* |
|
303 |
* Not a match, but maybe this entry is ready |
|
304 |
* to be reused. |
|
305 |
*/ |
|
306 |
if (drp->dr_state == NFS4_DUP_REPLAY && |
|
307 |
(gethrestime_sec() > |
|
308 |
drp->dr_time_used.tv_sec+drc->drc_ttl)) { |
|
309 |
rfs4_dr_chstate(drp, NFS4_DUP_FREE); |
|
310 |
list_insert_tail(&(drp->drc->dr_cache), drp); |
|
311 |
} |
|
312 |
} |
|
313 |
||
314 |
drp = rfs4_alloc_dr(drc); |
|
315 |
mutex_exit(&drc->lock); |
|
316 |
||
317 |
if (drp == NULL) { |
|
318 |
return (NFS4_DUP_ERROR); |
|
319 |
} |
|
320 |
||
321 |
/* |
|
322 |
* Place at the head of the list, init the state |
|
323 |
* to NEW and clear the time used field. |
|
324 |
*/ |
|
325 |
||
326 |
drp->dr_state = NFS4_DUP_NEW; |
|
327 |
drp->dr_time_used.tv_sec = drp->dr_time_used.tv_nsec = 0; |
|
328 |
||
329 |
/* |
|
330 |
* If needed, resize the address buffer |
|
331 |
*/ |
|
332 |
if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) { |
|
333 |
if (drp->dr_addr.buf != NULL) |
|
334 |
kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen); |
|
335 |
drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len; |
|
336 |
drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP); |
|
337 |
if (drp->dr_addr.buf == NULL) { |
|
338 |
/* |
|
339 |
* If the malloc fails, mark the entry |
|
340 |
* as free and put on the tail. |
|
341 |
*/ |
|
342 |
drp->dr_addr.maxlen = 0; |
|
343 |
drp->dr_state = NFS4_DUP_FREE; |
|
344 |
mutex_enter(&drc->lock); |
|
345 |
list_insert_tail(&(drc->dr_cache), drp); |
|
346 |
mutex_exit(&drc->lock); |
|
347 |
return (NFS4_DUP_ERROR); |
|
348 |
} |
|
349 |
} |
|
350 |
||
351 |
||
352 |
/* |
|
353 |
* Copy the address. |
|
354 |
*/ |
|
355 |
drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len; |
|
356 |
||
357 |
bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf, |
|
358 |
(caddr_t)drp->dr_addr.buf, |
|
359 |
drp->dr_addr.len); |
|
360 |
||
361 |
drp->dr_xid = the_xid; |
|
362 |
drp->dr_bkt = dr_bkt; |
|
363 |
||
364 |
/* |
|
365 |
* Insert at the head of the bucket and |
|
366 |
* the drc lists.. |
|
367 |
*/ |
|
368 |
mutex_enter(&drc->lock); |
|
369 |
list_insert_head(&drc->dr_cache, drp); |
|
370 |
list_insert_head(dr_bkt, drp); |
|
371 |
mutex_exit(&drc->lock); |
|
372 |
||
373 |
*dup = drp; |
|
374 |
||
375 |
return (NFS4_DUP_NEW); |
|
376 |
} |
|
377 |
||
378 |
/* |
|
379 |
* |
|
380 |
* This function handles the duplicate request cache, |
|
381 |
* NULL_PROC and COMPOUND procedure calls for NFSv4; |
|
382 |
* |
|
383 |
* Passed into this function are:- |
|
384 |
* |
|
385 |
* disp A pointer to our dispatch table entry |
|
386 |
* req The request to process |
|
387 |
* xprt The server transport handle |
|
388 |
* ap A pointer to the arguments |
|
389 |
* |
|
390 |
* |
|
391 |
* When appropriate this function is responsible for inserting |
|
392 |
* the reply into the duplicate cache or replaying an existing |
|
393 |
* cached reply. |
|
394 |
* |
|
395 |
* dr_stat reflects the state of the duplicate request that |
|
396 |
* has been inserted into or retrieved from the cache |
|
397 |
* |
|
398 |
* drp is the duplicate request entry |
|
399 |
* |
|
400 |
*/ |
|
401 |
int |
|
402 |
rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req, |
|
403 |
SVCXPRT *xprt, char *ap) |
|
404 |
{ |
|
405 |
||
406 |
COMPOUND4res res_buf, *rbp; |
|
407 |
COMPOUND4args *cap; |
|
408 |
||
409 |
cred_t *cr = NULL; |
|
410 |
int error = 0; |
|
411 |
int dis_flags = 0; |
|
412 |
int dr_stat = NFS4_NOT_DUP; |
|
413 |
rfs4_dupreq_t *drp = NULL; |
|
414 |
||
415 |
ASSERT(disp); |
|
416 |
||
417 |
/* |
|
418 |
* Short circuit the RPC_NULL proc. |
|
419 |
*/ |
|
420 |
if (disp->dis_proc == rpc_null) { |
|
421 |
if (!svc_sendreply(xprt, xdr_void, NULL)) { |
|
422 |
return (1); |
|
423 |
} |
|
424 |
return (0); |
|
425 |
} |
|
426 |
||
427 |
/* Only NFSv4 Compounds from this point onward */ |
|
428 |
||
429 |
rbp = &res_buf; |
|
430 |
cap = (COMPOUND4args *)ap; |
|
431 |
||
432 |
/* |
|
433 |
* Figure out the disposition of the whole COMPOUND |
|
434 |
* and record it's IDEMPOTENTCY. |
|
435 |
*/ |
|
436 |
rfs4_compound_flagproc(cap, &dis_flags); |
|
437 |
||
438 |
/* |
|
439 |
* If NON-IDEMPOTENT then we need to figure out if this |
|
440 |
* request can be replied from the duplicate cache. |
|
441 |
* |
|
442 |
* If this is a new request then we need to insert the |
|
443 |
* reply into the duplicate cache. |
|
444 |
*/ |
|
445 |
if (!(dis_flags & RPC_IDEMPOTENT)) { |
|
446 |
/* look for a replay from the cache or allocate */ |
|
447 |
dr_stat = rfs4_find_dr(req, nfs4_drc, &drp); |
|
448 |
||
449 |
switch (dr_stat) { |
|
450 |
||
451 |
case NFS4_DUP_ERROR: |
|
452 |
svcerr_systemerr(xprt); |
|
453 |
return (1); |
|
454 |
/* NOTREACHED */ |
|
455 |
||
456 |
case NFS4_DUP_PENDING: |
|
457 |
/* |
|
458 |
* reply has previously been inserted into the |
|
459 |
* duplicate cache, however the reply has |
|
460 |
* not yet been sent via svc_sendreply() |
|
461 |
*/ |
|
462 |
return (1); |
|
463 |
/* NOTREACHED */ |
|
464 |
||
465 |
case NFS4_DUP_NEW: |
|
466 |
curthread->t_flag |= T_DONTPEND; |
|
467 |
/* NON-IDEMPOTENT proc call */ |
|
468 |
rfs4_compound(cap, rbp, NULL, req, cr); |
|
469 |
||
470 |
curthread->t_flag &= ~T_DONTPEND; |
|
471 |
if (curthread->t_flag & T_WOULDBLOCK) { |
|
472 |
curthread->t_flag &= ~T_WOULDBLOCK; |
|
473 |
/* |
|
474 |
* mark this entry as FREE and plop |
|
475 |
* on the end of the cache list |
|
476 |
*/ |
|
477 |
mutex_enter(&drp->drc->lock); |
|
478 |
rfs4_dr_chstate(drp, NFS4_DUP_FREE); |
|
479 |
list_insert_tail(&(drp->drc->dr_cache), drp); |
|
480 |
mutex_exit(&drp->drc->lock); |
|
481 |
return (1); |
|
482 |
} |
|
483 |
drp->dr_res = res_buf; |
|
484 |
break; |
|
485 |
||
486 |
case NFS4_DUP_REPLAY: |
|
487 |
/* replay from the cache */ |
|
488 |
rbp = &(drp->dr_res); |
|
489 |
break; |
|
490 |
} |
|
491 |
} else { |
|
492 |
curthread->t_flag |= T_DONTPEND; |
|
493 |
/* IDEMPOTENT proc call */ |
|
494 |
rfs4_compound(cap, rbp, NULL, req, cr); |
|
495 |
||
496 |
curthread->t_flag &= ~T_DONTPEND; |
|
497 |
if (curthread->t_flag & T_WOULDBLOCK) { |
|
498 |
curthread->t_flag &= ~T_WOULDBLOCK; |
|
499 |
return (1); |
|
500 |
} |
|
501 |
} |
|
502 |
||
503 |
/* |
|
504 |
* Send out the replayed reply or the 'real' one. |
|
505 |
*/ |
|
806 | 506 |
if (!svc_sendreply(xprt, xdr_COMPOUND4res_srv, (char *)rbp)) { |
74 | 507 |
DTRACE_PROBE2(nfss__e__dispatch_sendfail, |
508 |
struct svc_req *, xprt, |
|
509 |
char *, rbp); |
|
510 |
error++; |
|
511 |
} |
|
512 |
||
513 |
/* |
|
514 |
* If this reply was just inserted into the duplicate cache |
|
515 |
* mark it as available for replay |
|
516 |
*/ |
|
517 |
if (dr_stat == NFS4_DUP_NEW) { |
|
518 |
mutex_enter(&drp->drc->lock); |
|
519 |
rfs4_dr_chstate(drp, NFS4_DUP_REPLAY); |
|
520 |
mutex_exit(&drp->drc->lock); |
|
521 |
} else if (dr_stat == NFS4_NOT_DUP) { |
|
522 |
rfs4_compound_free(rbp); |
|
523 |
} |
|
524 |
||
525 |
return (error); |
|
526 |
} |