0
|
1 |
/*
|
|
2 |
* CDDL HEADER START
|
|
3 |
*
|
|
4 |
* The contents of this file are subject to the terms of the
|
|
5 |
* Common Development and Distribution License, Version 1.0 only
|
|
6 |
* (the "License"). You may not use this file except in compliance
|
|
7 |
* with the License.
|
|
8 |
*
|
|
9 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
10 |
* or http://www.opensolaris.org/os/licensing.
|
|
11 |
* See the License for the specific language governing permissions
|
|
12 |
* and limitations under the License.
|
|
13 |
*
|
|
14 |
* When distributing Covered Code, include this CDDL HEADER in each
|
|
15 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
16 |
* If applicable, add the following below this CDDL HEADER, with the
|
|
17 |
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
18 |
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
19 |
*
|
|
20 |
* CDDL HEADER END
|
|
21 |
*/
|
|
22 |
/*
|
|
23 |
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
|
|
24 |
* Use is subject to license terms.
|
|
25 |
*/
|
|
26 |
|
|
27 |
#pragma ident "%Z%%M% %I% %E% SMI"
|
|
28 |
|
|
29 |
#include <sys/cred.h>
|
|
30 |
#include <sys/kstat.h>
|
|
31 |
#include <sys/list.h>
|
|
32 |
#include <sys/systm.h>
|
|
33 |
#include <sys/vfs.h>
|
|
34 |
#include <sys/vnode.h>
|
|
35 |
#include <sys/cmn_err.h>
|
|
36 |
|
|
37 |
#include <nfs/nfs4_clnt.h>
|
|
38 |
#include <nfs/rnode4.h>
|
|
39 |
|
|
40 |
/*
|
|
41 |
* Recovery kstats
|
|
42 |
*/
|
|
43 |
typedef struct rkstat {
|
|
44 |
kstat_named_t badhandle;
|
|
45 |
kstat_named_t badowner;
|
|
46 |
kstat_named_t clientid;
|
|
47 |
kstat_named_t dead_file;
|
|
48 |
kstat_named_t delay;
|
|
49 |
kstat_named_t fail_relock;
|
|
50 |
kstat_named_t file_diff;
|
|
51 |
kstat_named_t no_grace;
|
|
52 |
kstat_named_t not_responding;
|
|
53 |
kstat_named_t opens_changed;
|
|
54 |
kstat_named_t siglost;
|
|
55 |
kstat_named_t unexp_action;
|
|
56 |
kstat_named_t unexp_errno;
|
|
57 |
kstat_named_t unexp_status;
|
|
58 |
kstat_named_t wrongsec;
|
|
59 |
kstat_named_t lost_state_bad_op;
|
|
60 |
} rkstat_t;
|
|
61 |
|
|
62 |
static rkstat_t rkstat_template = {
|
|
63 |
{ "badhandle", KSTAT_DATA_ULONG },
|
|
64 |
{ "badowner", KSTAT_DATA_ULONG },
|
|
65 |
{ "clientid", KSTAT_DATA_ULONG },
|
|
66 |
{ "dead_file", KSTAT_DATA_ULONG },
|
|
67 |
{ "delay", KSTAT_DATA_ULONG },
|
|
68 |
{ "fail_relock", KSTAT_DATA_ULONG },
|
|
69 |
{ "file_diff", KSTAT_DATA_ULONG },
|
|
70 |
{ "no_grace", KSTAT_DATA_ULONG },
|
|
71 |
{ "not_responding", KSTAT_DATA_ULONG },
|
|
72 |
{ "opens_changed", KSTAT_DATA_ULONG },
|
|
73 |
{ "siglost", KSTAT_DATA_ULONG },
|
|
74 |
{ "unexp_action", KSTAT_DATA_ULONG },
|
|
75 |
{ "unexp_errno", KSTAT_DATA_ULONG },
|
|
76 |
{ "unexp_status", KSTAT_DATA_ULONG },
|
|
77 |
{ "wrongsec", KSTAT_DATA_ULONG },
|
|
78 |
{ "bad_op", KSTAT_DATA_ULONG },
|
|
79 |
};
|
|
80 |
|
|
81 |
/* maximum number of messages allowed on the mi's mi_msg_list */
|
|
82 |
int nfs4_msg_max = NFS4_MSG_MAX;
|
|
83 |
#define DEFAULT_LEASE 180
|
|
84 |
|
|
85 |
static char *strdup(const char *);
|
|
86 |
|
|
87 |
/*
|
|
88 |
* Sets the appropiate fields of "ep", given "id" and various parameters.
|
|
89 |
* Assumes that ep's fields have been initialized to zero/null, except for
|
|
90 |
* re_type and mount point info, which are already set.
|
|
91 |
*/
|
|
92 |
static void
|
|
93 |
set_event(nfs4_event_type_t id, nfs4_revent_t *ep, mntinfo4_t *mi,
|
|
94 |
rnode4_t *rp1, rnode4_t *rp2, uint_t count, pid_t pid, nfsstat4 nfs4_error,
|
|
95 |
char *server1, char *why, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2,
|
|
96 |
seqid4 seqid1, seqid4 seqid2)
|
|
97 |
{
|
|
98 |
int len;
|
|
99 |
|
|
100 |
switch (id) {
|
|
101 |
case RE_BAD_SEQID:
|
|
102 |
ep->re_mi = mi;
|
|
103 |
|
|
104 |
/* bad seqid'd file <path/component name> */
|
|
105 |
if (rp1 && rp1->r_svnode.sv_name)
|
|
106 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
107 |
else
|
|
108 |
ep->re_char1 = NULL;
|
|
109 |
ep->re_rp1 = rp1;
|
|
110 |
|
|
111 |
/* for LOCK/LOCKU */
|
|
112 |
ep->re_pid = pid;
|
|
113 |
|
|
114 |
ep->re_stat4 = nfs4_error;
|
|
115 |
ep->re_tag1 = tag1;
|
|
116 |
ep->re_tag2 = tag2;
|
|
117 |
ep->re_seqid1 = seqid1;
|
|
118 |
ep->re_seqid2 = seqid2;
|
|
119 |
break;
|
|
120 |
case RE_BADHANDLE:
|
|
121 |
ASSERT(rp1 != NULL);
|
|
122 |
|
|
123 |
/* dead file <path/component name> */
|
|
124 |
if (rp1->r_svnode.sv_name)
|
|
125 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
126 |
else
|
|
127 |
ep->re_char1 = NULL;
|
|
128 |
ep->re_rp1 = rp1;
|
|
129 |
break;
|
|
130 |
case RE_CLIENTID:
|
|
131 |
ep->re_mi = mi;
|
|
132 |
|
|
133 |
/* the error we failed with */
|
|
134 |
ep->re_uint = count;
|
|
135 |
ep->re_stat4 = nfs4_error;
|
|
136 |
break;
|
|
137 |
case RE_DEAD_FILE:
|
|
138 |
ASSERT(rp1 != NULL);
|
|
139 |
|
|
140 |
/* dead file <path/component name> */
|
|
141 |
if (rp1->r_svnode.sv_name)
|
|
142 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
143 |
else
|
|
144 |
ep->re_char1 = NULL;
|
|
145 |
ep->re_rp1 = rp1;
|
|
146 |
|
|
147 |
/* why the file got killed */
|
|
148 |
if (why) {
|
|
149 |
len = strlen(why);
|
|
150 |
ep->re_char2 = kmem_alloc(len + 1, KM_SLEEP);
|
|
151 |
bcopy(why, ep->re_char2, len);
|
|
152 |
ep->re_char2[len] = '\0';
|
|
153 |
} else
|
|
154 |
ep->re_char2 = NULL;
|
|
155 |
|
|
156 |
ep->re_stat4 = nfs4_error;
|
|
157 |
break;
|
|
158 |
case RE_END:
|
|
159 |
/* first rnode */
|
|
160 |
if (rp1 && rp1->r_svnode.sv_name)
|
|
161 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
162 |
else
|
|
163 |
ep->re_char1 = NULL;
|
|
164 |
ep->re_rp1 = rp1;
|
|
165 |
|
|
166 |
/* second rnode */
|
|
167 |
if (rp2 && rp2->r_svnode.sv_name)
|
|
168 |
ep->re_char2 = fn_path(rp2->r_svnode.sv_name);
|
|
169 |
else
|
|
170 |
ep->re_char2 = NULL;
|
|
171 |
ep->re_rp2 = rp2;
|
|
172 |
|
|
173 |
ep->re_mi = mi;
|
|
174 |
break;
|
|
175 |
case RE_FAIL_RELOCK:
|
|
176 |
ASSERT(rp1 != NULL);
|
|
177 |
|
|
178 |
/* error on fail relock */
|
|
179 |
ep->re_uint = count;
|
|
180 |
|
|
181 |
/* process that failed */
|
|
182 |
ep->re_pid = pid;
|
|
183 |
|
|
184 |
/* nfs4 error */
|
|
185 |
ep->re_stat4 = nfs4_error;
|
|
186 |
|
|
187 |
/* file <path/component name> */
|
|
188 |
if (rp1->r_svnode.sv_name)
|
|
189 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
190 |
else
|
|
191 |
ep->re_char1 = NULL;
|
|
192 |
ep->re_rp1 = rp1;
|
|
193 |
break;
|
|
194 |
case RE_FAIL_REMAP_LEN:
|
|
195 |
/* length of returned filehandle */
|
|
196 |
ep->re_uint = count;
|
|
197 |
break;
|
|
198 |
case RE_FAIL_REMAP_OP:
|
|
199 |
break;
|
|
200 |
case RE_FAILOVER:
|
|
201 |
/* server we're failing over to (if not picking original) */
|
|
202 |
if (server1 != NULL) {
|
|
203 |
len = strlen(server1);
|
|
204 |
ep->re_char1 = kmem_alloc(len + 1, KM_SLEEP);
|
|
205 |
bcopy(server1, ep->re_char1, len);
|
|
206 |
ep->re_char1[len] = '\0';
|
|
207 |
} else {
|
|
208 |
ep->re_char1 = NULL;
|
|
209 |
}
|
|
210 |
break;
|
|
211 |
case RE_FILE_DIFF:
|
|
212 |
ASSERT(rp1 != NULL);
|
|
213 |
|
|
214 |
/* dead file <path/component name> */
|
|
215 |
if (rp1->r_svnode.sv_name)
|
|
216 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
217 |
else
|
|
218 |
ep->re_char1 = NULL;
|
|
219 |
ep->re_rp1 = rp1;
|
|
220 |
break;
|
|
221 |
case RE_LOST_STATE:
|
|
222 |
ep->re_uint = count; /* op number */
|
|
223 |
if (rp1 && rp1->r_svnode.sv_name)
|
|
224 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
225 |
else
|
|
226 |
ep->re_char1 = NULL;
|
|
227 |
ep->re_rp1 = rp1;
|
|
228 |
if (rp2 && rp2->r_svnode.sv_name)
|
|
229 |
ep->re_char2 = fn_path(rp2->r_svnode.sv_name);
|
|
230 |
else
|
|
231 |
ep->re_char2 = NULL;
|
|
232 |
ep->re_rp2 = rp2;
|
|
233 |
break;
|
|
234 |
case RE_OPENS_CHANGED:
|
|
235 |
ep->re_mi = mi;
|
|
236 |
|
|
237 |
/* original number of open files */
|
|
238 |
ep->re_uint = count;
|
|
239 |
/* new number of open files */
|
|
240 |
ep->re_pid = pid;
|
|
241 |
break;
|
|
242 |
case RE_SIGLOST:
|
|
243 |
case RE_SIGLOST_NO_DUMP:
|
|
244 |
ASSERT(rp1 != NULL);
|
|
245 |
|
|
246 |
/* file <path/component name> */
|
|
247 |
if (rp1->r_svnode.sv_name)
|
|
248 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
249 |
else
|
|
250 |
ep->re_char1 = NULL;
|
|
251 |
ep->re_rp1 = rp1;
|
|
252 |
ep->re_pid = pid;
|
|
253 |
ep->re_uint = count;
|
|
254 |
ep->re_stat4 = nfs4_error;
|
|
255 |
break;
|
|
256 |
case RE_START:
|
|
257 |
/* file <path/component name> */
|
|
258 |
if (rp1 && rp1->r_svnode.sv_name)
|
|
259 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
260 |
else
|
|
261 |
ep->re_char1 = NULL;
|
|
262 |
ep->re_rp1 = rp1;
|
|
263 |
|
|
264 |
/* file <path/component name> */
|
|
265 |
if (rp2 && rp2->r_svnode.sv_name)
|
|
266 |
ep->re_char2 = fn_path(rp2->r_svnode.sv_name);
|
|
267 |
else
|
|
268 |
ep->re_char2 = NULL;
|
|
269 |
ep->re_rp2 = rp2;
|
|
270 |
|
|
271 |
ep->re_mi = mi;
|
|
272 |
ep->re_uint = count;
|
|
273 |
break;
|
|
274 |
case RE_UNEXPECTED_ACTION:
|
|
275 |
case RE_UNEXPECTED_ERRNO:
|
|
276 |
/* the error that is unexpected */
|
|
277 |
ep->re_uint = count;
|
|
278 |
break;
|
|
279 |
case RE_UNEXPECTED_STATUS:
|
|
280 |
/* nfsstat4 error */
|
|
281 |
ep->re_stat4 = nfs4_error;
|
|
282 |
break;
|
|
283 |
case RE_WRONGSEC:
|
|
284 |
/* the error we failed with */
|
|
285 |
ep->re_uint = count;
|
|
286 |
|
|
287 |
/* file <path/component name> */
|
|
288 |
if (rp1 && rp1->r_svnode.sv_name)
|
|
289 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
290 |
else
|
|
291 |
ep->re_char1 = NULL;
|
|
292 |
ep->re_rp1 = rp1;
|
|
293 |
|
|
294 |
/* file <path/component name> */
|
|
295 |
if (rp2 && rp2->r_svnode.sv_name)
|
|
296 |
ep->re_char2 = fn_path(rp2->r_svnode.sv_name);
|
|
297 |
else
|
|
298 |
ep->re_char2 = NULL;
|
|
299 |
ep->re_rp2 = rp2;
|
|
300 |
break;
|
|
301 |
case RE_LOST_STATE_BAD_OP:
|
|
302 |
ep->re_uint = count; /* the unexpected op */
|
|
303 |
ep->re_pid = pid;
|
|
304 |
ep->re_rp1 = rp1;
|
|
305 |
if (rp1 != NULL && rp1->r_svnode.sv_name != NULL)
|
|
306 |
ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
307 |
ep->re_rp2 = rp2;
|
|
308 |
if (rp2 != NULL && rp2->r_svnode.sv_name != NULL)
|
|
309 |
ep->re_char2 = fn_path(rp2->r_svnode.sv_name);
|
|
310 |
break;
|
|
311 |
default:
|
|
312 |
break;
|
|
313 |
}
|
|
314 |
}
|
|
315 |
|
|
316 |
/*
|
|
317 |
* Sets the appropiate fields of the 'fact' for this 'id'.
|
|
318 |
*/
|
|
319 |
static void
|
|
320 |
set_fact(nfs4_fact_type_t id, nfs4_rfact_t *fp, nfsstat4 stat4,
|
|
321 |
nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, int error,
|
|
322 |
vnode_t *vp)
|
|
323 |
{
|
|
324 |
rnode4_t *rp1;
|
|
325 |
|
|
326 |
switch (id) {
|
|
327 |
case RF_BADOWNER:
|
|
328 |
fp->rf_op = op;
|
|
329 |
fp->rf_reboot = reboot;
|
|
330 |
fp->rf_stat4 = stat4;
|
|
331 |
break;
|
|
332 |
case RF_RENEW_EXPIRED:
|
|
333 |
break;
|
|
334 |
case RF_ERR:
|
|
335 |
fp->rf_op = op;
|
|
336 |
fp->rf_reboot = reboot;
|
|
337 |
fp->rf_stat4 = stat4;
|
|
338 |
fp->rf_action = raction;
|
|
339 |
fp->rf_error = error;
|
|
340 |
break;
|
|
341 |
case RF_SRV_OK:
|
|
342 |
break;
|
|
343 |
case RF_SRV_NOT_RESPOND:
|
|
344 |
break;
|
|
345 |
case RF_SRVS_OK:
|
|
346 |
break;
|
|
347 |
case RF_SRVS_NOT_RESPOND:
|
|
348 |
gethrestime(&fp->rf_time);
|
|
349 |
break;
|
|
350 |
case RF_DELMAP_CB_ERR:
|
|
351 |
fp->rf_op = op;
|
|
352 |
fp->rf_stat4 = stat4;
|
|
353 |
|
|
354 |
rp1 = VTOR4(vp);
|
|
355 |
fp->rf_rp1 = rp1;
|
|
356 |
if (rp1 && rp1->r_svnode.sv_name)
|
|
357 |
fp->rf_char1 = fn_path(rp1->r_svnode.sv_name);
|
|
358 |
else
|
|
359 |
fp->rf_char1 = NULL;
|
|
360 |
break;
|
|
361 |
default:
|
|
362 |
zcmn_err(getzoneid(), CE_NOTE, "illegal fact %d", id);
|
|
363 |
break;
|
|
364 |
}
|
|
365 |
}
|
|
366 |
|
|
367 |
/*
|
|
368 |
* Returns 1 if the event/fact is of a successful communication
|
|
369 |
* from the server; 0 otherwise.
|
|
370 |
*/
|
|
371 |
static int
|
|
372 |
successful_comm(nfs4_debug_msg_t *msgp)
|
|
373 |
{
|
|
374 |
if (msgp->msg_type == RM_EVENT) {
|
|
375 |
switch (msgp->rmsg_u.msg_event.re_type) {
|
|
376 |
case RE_BAD_SEQID:
|
|
377 |
case RE_BADHANDLE:
|
|
378 |
case RE_FAIL_REMAP_LEN:
|
|
379 |
case RE_FAIL_REMAP_OP:
|
|
380 |
case RE_FILE_DIFF:
|
|
381 |
case RE_START:
|
|
382 |
case RE_UNEXPECTED_ACTION:
|
|
383 |
case RE_UNEXPECTED_ERRNO:
|
|
384 |
case RE_UNEXPECTED_STATUS:
|
|
385 |
case RE_WRONGSEC:
|
|
386 |
return (1);
|
|
387 |
case RE_CLIENTID:
|
|
388 |
case RE_DEAD_FILE:
|
|
389 |
case RE_END:
|
|
390 |
case RE_FAIL_RELOCK:
|
|
391 |
case RE_FAILOVER:
|
|
392 |
case RE_LOST_STATE:
|
|
393 |
case RE_OPENS_CHANGED:
|
|
394 |
case RE_SIGLOST:
|
|
395 |
case RE_SIGLOST_NO_DUMP:
|
|
396 |
case RE_LOST_STATE_BAD_OP:
|
|
397 |
return (0);
|
|
398 |
default:
|
|
399 |
return (0);
|
|
400 |
}
|
|
401 |
} else {
|
|
402 |
switch (msgp->rmsg_u.msg_fact.rf_type) {
|
|
403 |
case RF_BADOWNER:
|
|
404 |
case RF_ERR:
|
|
405 |
case RF_RENEW_EXPIRED:
|
|
406 |
case RF_SRV_OK:
|
|
407 |
case RF_SRVS_OK:
|
|
408 |
case RF_DELMAP_CB_ERR:
|
|
409 |
return (1);
|
|
410 |
case RF_SRV_NOT_RESPOND:
|
|
411 |
case RF_SRVS_NOT_RESPOND:
|
|
412 |
return (0);
|
|
413 |
default:
|
|
414 |
return (0);
|
|
415 |
}
|
|
416 |
}
|
|
417 |
}
|
|
418 |
|
|
419 |
/*
|
|
420 |
* Iterate backwards through the mi's mi_msg_list to find the earliest
|
|
421 |
* message that we should find relevant facts to investigate.
|
|
422 |
*/
|
|
423 |
static nfs4_debug_msg_t *
|
|
424 |
find_beginning(nfs4_debug_msg_t *first_msg, mntinfo4_t *mi)
|
|
425 |
{
|
|
426 |
nfs4_debug_msg_t *oldest_msg, *cur_msg;
|
|
427 |
time_t lease;
|
|
428 |
|
|
429 |
ASSERT(mutex_owned(&mi->mi_msg_list_lock));
|
|
430 |
if (mi->mi_lease_period > 0)
|
|
431 |
lease = 2 * mi->mi_lease_period;
|
|
432 |
else
|
|
433 |
lease = DEFAULT_LEASE;
|
|
434 |
|
|
435 |
oldest_msg = first_msg;
|
|
436 |
cur_msg = list_prev(&mi->mi_msg_list, first_msg);
|
|
437 |
while (cur_msg &&
|
|
438 |
first_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) {
|
|
439 |
oldest_msg = cur_msg;
|
|
440 |
if ((cur_msg->msg_type == RM_FACT) &&
|
|
441 |
(cur_msg->rmsg_u.msg_fact.rf_type == RF_SRV_OK)) {
|
|
442 |
/* find where we lost contact with the server */
|
|
443 |
while (cur_msg) {
|
|
444 |
if ((cur_msg->msg_type == RM_FACT) &&
|
|
445 |
(cur_msg->rmsg_u.msg_fact.rf_type ==
|
|
446 |
RF_SRV_NOT_RESPOND))
|
|
447 |
break;
|
|
448 |
oldest_msg = cur_msg;
|
|
449 |
cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
|
|
450 |
}
|
|
451 |
/*
|
|
452 |
* Find the first successful message before
|
|
453 |
* we lost contact with the server.
|
|
454 |
*/
|
|
455 |
if (cur_msg) {
|
|
456 |
cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
|
|
457 |
while (cur_msg && !successful_comm(cur_msg)) {
|
|
458 |
oldest_msg = cur_msg;
|
|
459 |
cur_msg = list_prev(&mi->mi_msg_list,
|
|
460 |
cur_msg);
|
|
461 |
}
|
|
462 |
}
|
|
463 |
/*
|
|
464 |
* If we're not at the dummy head pointer,
|
|
465 |
* set the oldest and current message.
|
|
466 |
*/
|
|
467 |
if (cur_msg) {
|
|
468 |
first_msg = cur_msg;
|
|
469 |
oldest_msg = cur_msg;
|
|
470 |
cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
|
|
471 |
}
|
|
472 |
} else
|
|
473 |
cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
|
|
474 |
}
|
|
475 |
|
|
476 |
return (oldest_msg);
|
|
477 |
}
|
|
478 |
|
|
479 |
/*
|
|
480 |
* Returns 1 if facts have been found; 0 otherwise.
|
|
481 |
*/
|
|
482 |
static int
|
|
483 |
get_facts(nfs4_debug_msg_t *msgp, nfs4_rfact_t *ret_fp, char **mnt_pt,
|
|
484 |
mntinfo4_t *mi)
|
|
485 |
{
|
|
486 |
nfs4_debug_msg_t *cur_msg, *oldest_msg;
|
|
487 |
nfs4_rfact_t *cur_fp;
|
|
488 |
int found_a_fact = 0;
|
|
489 |
int len;
|
|
490 |
|
|
491 |
cur_msg = msgp;
|
|
492 |
|
|
493 |
/* find the oldest msg to search backwards to */
|
|
494 |
oldest_msg = find_beginning(cur_msg, mi);
|
|
495 |
ASSERT(oldest_msg != NULL);
|
|
496 |
|
|
497 |
/*
|
|
498 |
* Create a fact sheet by searching from our current message
|
|
499 |
* backwards to the 'oldest_msg', recording facts along the way
|
|
500 |
* until we found facts that have been inspected by another time.
|
|
501 |
*/
|
|
502 |
while (cur_msg && cur_msg != list_prev(&mi->mi_msg_list, oldest_msg)) {
|
|
503 |
if (cur_msg->msg_type != RM_FACT) {
|
|
504 |
cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
|
|
505 |
continue;
|
|
506 |
}
|
|
507 |
|
|
508 |
cur_fp = &cur_msg->rmsg_u.msg_fact;
|
|
509 |
/*
|
|
510 |
* If this fact has already been looked at, then so
|
|
511 |
* have all preceding facts. Return Now.
|
|
512 |
*/
|
|
513 |
if (cur_fp->rf_status == RFS_INSPECT)
|
|
514 |
return (found_a_fact);
|
|
515 |
|
|
516 |
cur_fp->rf_status = RFS_INSPECT;
|
|
517 |
found_a_fact = 1;
|
|
518 |
switch (cur_fp->rf_type) {
|
|
519 |
case RF_BADOWNER:
|
|
520 |
break;
|
|
521 |
case RF_ERR:
|
|
522 |
/*
|
|
523 |
* Don't want to overwrite a fact that was
|
|
524 |
* previously found during our current search.
|
|
525 |
*/
|
|
526 |
if (!ret_fp->rf_reboot)
|
|
527 |
ret_fp->rf_reboot = cur_fp->rf_reboot;
|
|
528 |
if (!ret_fp->rf_stat4)
|
|
529 |
ret_fp->rf_stat4 = cur_fp->rf_stat4;
|
|
530 |
if (!ret_fp->rf_action)
|
|
531 |
ret_fp->rf_action = cur_fp->rf_action;
|
|
532 |
break;
|
|
533 |
case RF_RENEW_EXPIRED:
|
|
534 |
if (cur_msg->msg_mntpt && !(*mnt_pt)) {
|
|
535 |
len = strlen(cur_msg->msg_mntpt) + 1;
|
|
536 |
*mnt_pt = kmem_alloc(len, KM_SLEEP);
|
|
537 |
bcopy(cur_msg->msg_mntpt, *mnt_pt, len);
|
|
538 |
}
|
|
539 |
break;
|
|
540 |
case RF_SRV_OK:
|
|
541 |
break;
|
|
542 |
case RF_SRV_NOT_RESPOND:
|
|
543 |
/*
|
|
544 |
* Okay to overwrite this fact as
|
|
545 |
* we want the earliest time.
|
|
546 |
*/
|
|
547 |
ret_fp->rf_time = cur_fp->rf_time;
|
|
548 |
break;
|
|
549 |
case RF_SRVS_OK:
|
|
550 |
break;
|
|
551 |
case RF_SRVS_NOT_RESPOND:
|
|
552 |
break;
|
|
553 |
case RF_DELMAP_CB_ERR:
|
|
554 |
break;
|
|
555 |
default:
|
|
556 |
zcmn_err(getzoneid(), CE_NOTE,
|
|
557 |
"get facts: illegal fact %d", cur_fp->rf_type);
|
|
558 |
break;
|
|
559 |
}
|
|
560 |
cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
|
|
561 |
}
|
|
562 |
|
|
563 |
return (found_a_fact);
|
|
564 |
}
|
|
565 |
|
|
566 |
/*
|
|
567 |
* Returns 1 if this fact is identical to the last fact recorded
|
|
568 |
* (only checks for a match within the last 2 lease periods).
|
|
569 |
*/
|
|
570 |
static int
|
|
571 |
facts_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg,
|
|
572 |
mntinfo4_t *mi)
|
|
573 |
{
|
|
574 |
nfs4_rfact_t *fp1, *fp2;
|
|
575 |
int lease, len;
|
|
576 |
|
|
577 |
ASSERT(mutex_owned(&mi->mi_msg_list_lock));
|
|
578 |
if (mi->mi_lease_period > 0)
|
|
579 |
lease = 2 * mi->mi_lease_period;
|
|
580 |
else
|
|
581 |
lease = DEFAULT_LEASE;
|
|
582 |
|
|
583 |
fp2 = &new_msg->rmsg_u.msg_fact;
|
|
584 |
|
|
585 |
while (cur_msg &&
|
|
586 |
new_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) {
|
|
587 |
if (cur_msg->msg_type != RM_FACT) {
|
|
588 |
cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
|
|
589 |
continue;
|
|
590 |
}
|
|
591 |
fp1 = &cur_msg->rmsg_u.msg_fact;
|
|
592 |
if (fp1->rf_type != fp2->rf_type)
|
|
593 |
return (0);
|
|
594 |
|
|
595 |
/* now actually compare the facts */
|
|
596 |
if (fp1->rf_action != fp2->rf_action)
|
|
597 |
return (0);
|
|
598 |
if (fp1->rf_stat4 != fp2->rf_stat4)
|
|
599 |
return (0);
|
|
600 |
if (fp1->rf_reboot != fp2->rf_reboot)
|
|
601 |
return (0);
|
|
602 |
if (fp1->rf_op != fp2->rf_op)
|
|
603 |
return (0);
|
|
604 |
if (fp1->rf_time.tv_sec != fp2->rf_time.tv_sec)
|
|
605 |
return (0);
|
|
606 |
if (fp1->rf_error != fp2->rf_error)
|
|
607 |
return (0);
|
|
608 |
if (fp1->rf_rp1 != fp2->rf_rp1)
|
|
609 |
return (0);
|
|
610 |
if (cur_msg->msg_srv != NULL) {
|
|
611 |
if (new_msg->msg_srv == NULL)
|
|
612 |
return (0);
|
|
613 |
len = strlen(cur_msg->msg_srv);
|
|
614 |
if (strncmp(cur_msg->msg_srv, new_msg->msg_srv,
|
|
615 |
len) != 0)
|
|
616 |
return (0);
|
|
617 |
} else if (new_msg->msg_srv != NULL) {
|
|
618 |
return (0);
|
|
619 |
}
|
|
620 |
if (cur_msg->msg_mntpt != NULL) {
|
|
621 |
if (new_msg->msg_mntpt == NULL)
|
|
622 |
return (0);
|
|
623 |
len = strlen(cur_msg->msg_mntpt);
|
|
624 |
if (strncmp(cur_msg->msg_mntpt, new_msg->msg_mntpt,
|
|
625 |
len) != 0)
|
|
626 |
return (0);
|
|
627 |
} else if (new_msg->msg_mntpt != NULL) {
|
|
628 |
return (0);
|
|
629 |
}
|
|
630 |
if (fp1->rf_char1 != NULL) {
|
|
631 |
if (fp2->rf_char1 == NULL)
|
|
632 |
return (0);
|
|
633 |
len = strlen(fp1->rf_char1);
|
|
634 |
if (strncmp(fp1->rf_char1, fp2->rf_char1, len) != 0)
|
|
635 |
return (0);
|
|
636 |
} else if (fp2->rf_char1 != NULL) {
|
|
637 |
return (0);
|
|
638 |
}
|
|
639 |
return (1);
|
|
640 |
}
|
|
641 |
|
|
642 |
return (0);
|
|
643 |
}
|
|
644 |
|
|
645 |
/*
|
|
646 |
* Returns 1 if these two messages are identical; 0 otherwise.
|
|
647 |
*/
|
|
648 |
static int
|
|
649 |
events_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg,
|
|
650 |
mntinfo4_t *mi)
|
|
651 |
{
|
|
652 |
nfs4_revent_t *ep1, *ep2;
|
|
653 |
int len;
|
|
654 |
|
|
655 |
/* find the last event, bypassing all facts */
|
|
656 |
while (cur_msg && cur_msg->msg_type != RM_EVENT)
|
|
657 |
cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
|
|
658 |
|
|
659 |
if (!cur_msg)
|
|
660 |
return (0);
|
|
661 |
|
|
662 |
if (cur_msg->msg_type != RM_EVENT)
|
|
663 |
return (0);
|
|
664 |
|
|
665 |
ep1 = &cur_msg->rmsg_u.msg_event;
|
|
666 |
ep2 = &new_msg->rmsg_u.msg_event;
|
|
667 |
if (ep1->re_type != ep2->re_type)
|
|
668 |
return (0);
|
|
669 |
|
|
670 |
/*
|
|
671 |
* Since we zalloc the buffer, then the two nfs4_debug_msg's
|
|
672 |
* must match up even if all the fields weren't filled in
|
|
673 |
* the first place.
|
|
674 |
*/
|
|
675 |
if (ep1->re_mi != ep2->re_mi)
|
|
676 |
return (0);
|
|
677 |
if (ep1->re_uint != ep2->re_uint)
|
|
678 |
return (0);
|
|
679 |
if (ep1->re_stat4 != ep2->re_stat4)
|
|
680 |
return (0);
|
|
681 |
if (ep1->re_pid != ep2->re_pid)
|
|
682 |
return (0);
|
|
683 |
if (ep1->re_rp1 != ep2->re_rp1)
|
|
684 |
return (0);
|
|
685 |
if (ep1->re_rp2 != ep2->re_rp2)
|
|
686 |
return (0);
|
|
687 |
if (ep1->re_tag1 != ep2->re_tag1)
|
|
688 |
return (0);
|
|
689 |
if (ep1->re_tag2 != ep2->re_tag2)
|
|
690 |
return (0);
|
|
691 |
if (ep1->re_seqid1 != ep2->re_seqid1)
|
|
692 |
return (0);
|
|
693 |
if (ep1->re_seqid2 != ep2->re_seqid2)
|
|
694 |
return (0);
|
|
695 |
|
|
696 |
if (cur_msg->msg_srv != NULL) {
|
|
697 |
if (new_msg->msg_srv == NULL)
|
|
698 |
return (0);
|
|
699 |
len = strlen(cur_msg->msg_srv);
|
|
700 |
if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, len) != 0)
|
|
701 |
return (0);
|
|
702 |
} else if (new_msg->msg_srv != NULL) {
|
|
703 |
return (0);
|
|
704 |
}
|
|
705 |
|
|
706 |
if (ep1->re_char1 != NULL) {
|
|
707 |
if (ep2->re_char1 == NULL)
|
|
708 |
return (0);
|
|
709 |
len = strlen(ep1->re_char1);
|
|
710 |
if (strncmp(ep1->re_char1, ep2->re_char1, len) != 0)
|
|
711 |
return (0);
|
|
712 |
} else if (ep2->re_char1 != NULL) {
|
|
713 |
return (0);
|
|
714 |
}
|
|
715 |
|
|
716 |
if (ep1->re_char2 != NULL) {
|
|
717 |
if (ep2->re_char2 == NULL)
|
|
718 |
return (0);
|
|
719 |
len = strlen(ep1->re_char2);
|
|
720 |
if (strncmp(ep1->re_char2, ep2->re_char2, len) != 0)
|
|
721 |
return (0);
|
|
722 |
} else if (ep2->re_char2 != NULL) {
|
|
723 |
return (0);
|
|
724 |
}
|
|
725 |
|
|
726 |
if (cur_msg->msg_mntpt != NULL) {
|
|
727 |
if (new_msg->msg_mntpt == NULL)
|
|
728 |
return (0);
|
|
729 |
len = strlen(cur_msg->msg_mntpt);
|
|
730 |
if (strncmp(cur_msg->msg_mntpt, cur_msg->msg_mntpt, len) != 0)
|
|
731 |
return (0);
|
|
732 |
} else if (new_msg->msg_mntpt != NULL) {
|
|
733 |
return (0);
|
|
734 |
}
|
|
735 |
|
|
736 |
return (1);
|
|
737 |
}
|
|
738 |
|
|
739 |
/*
|
|
740 |
* Free up a recovery event.
|
|
741 |
*/
|
|
742 |
static void
|
|
743 |
free_event(nfs4_revent_t *ep)
|
|
744 |
{
|
|
745 |
int len;
|
|
746 |
|
|
747 |
if (ep->re_char1) {
|
|
748 |
len = strlen(ep->re_char1) + 1;
|
|
749 |
kmem_free(ep->re_char1, len);
|
|
750 |
}
|
|
751 |
if (ep->re_char2) {
|
|
752 |
len = strlen(ep->re_char2) + 1;
|
|
753 |
kmem_free(ep->re_char2, len);
|
|
754 |
}
|
|
755 |
}
|
|
756 |
|
|
757 |
/*
|
|
758 |
* Free up a recovery fact.
|
|
759 |
*/
|
|
760 |
static void
|
|
761 |
free_fact(nfs4_rfact_t *fp)
|
|
762 |
{
|
|
763 |
int len;
|
|
764 |
|
|
765 |
if (fp->rf_char1) {
|
|
766 |
len = strlen(fp->rf_char1) + 1;
|
|
767 |
kmem_free(fp->rf_char1, len);
|
|
768 |
}
|
|
769 |
}
|
|
770 |
|
|
771 |
/*
|
|
772 |
* Free up the message.
|
|
773 |
*/
|
|
774 |
void
|
|
775 |
nfs4_free_msg(nfs4_debug_msg_t *msg)
|
|
776 |
{
|
|
777 |
int len;
|
|
778 |
|
|
779 |
if (msg->msg_type == RM_EVENT)
|
|
780 |
free_event(&msg->rmsg_u.msg_event);
|
|
781 |
else
|
|
782 |
free_fact(&msg->rmsg_u.msg_fact);
|
|
783 |
|
|
784 |
if (msg->msg_srv) {
|
|
785 |
len = strlen(msg->msg_srv) + 1;
|
|
786 |
kmem_free(msg->msg_srv, len);
|
|
787 |
}
|
|
788 |
|
|
789 |
if (msg->msg_mntpt) {
|
|
790 |
len = strlen(msg->msg_mntpt) + 1;
|
|
791 |
kmem_free(msg->msg_mntpt, len);
|
|
792 |
}
|
|
793 |
|
|
794 |
/* free up the data structure itself */
|
|
795 |
kmem_free(msg, sizeof (*msg));
|
|
796 |
}
|
|
797 |
|
|
798 |
/*
|
|
799 |
* Prints out the interesting facts for recovery events:
|
|
800 |
* -DEAD_FILE
|
|
801 |
* -SIGLOST(_NO_DUMP)
|
|
802 |
*/
|
|
803 |
static void
|
|
804 |
print_facts(nfs4_debug_msg_t *msg, mntinfo4_t *mi)
|
|
805 |
{
|
|
806 |
nfs4_rfact_t *fp;
|
|
807 |
char *mount_pt;
|
|
808 |
int len;
|
|
809 |
|
|
810 |
if (msg->rmsg_u.msg_event.re_type != RE_DEAD_FILE &&
|
|
811 |
msg->rmsg_u.msg_event.re_type != RE_SIGLOST &&
|
|
812 |
msg->rmsg_u.msg_event.re_type != RE_SIGLOST_NO_DUMP)
|
|
813 |
return;
|
|
814 |
|
|
815 |
fp = kmem_zalloc(sizeof (*fp), KM_SLEEP);
|
|
816 |
mount_pt = NULL;
|
|
817 |
|
|
818 |
if (get_facts(msg, fp, &mount_pt, mi)) {
|
|
819 |
char time[256];
|
|
820 |
|
|
821 |
|
|
822 |
if (fp->rf_time.tv_sec)
|
|
823 |
(void) snprintf(time, 256, "%ld",
|
|
824 |
(gethrestime_sec() - fp->rf_time.tv_sec)/60);
|
|
825 |
zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
|
|
826 |
"!NFS4 FACT SHEET: %s%s %s%s %s %s%s%s %s%s",
|
|
827 |
fp->rf_action ? "\n Action: " : "",
|
|
828 |
fp->rf_action ? nfs4_recov_action_to_str(fp->rf_action) :
|
|
829 |
"",
|
|
830 |
fp->rf_stat4 ? "\n NFS4 error: " : "",
|
|
831 |
fp->rf_stat4 ? nfs4_stat_to_str(fp->rf_stat4) : "",
|
|
832 |
fp->rf_reboot ? "\n Suspected server reboot. " : "",
|
|
833 |
fp->rf_time.tv_sec ? "\n Server was down for " : "",
|
|
834 |
fp->rf_time.tv_sec ? time : "",
|
|
835 |
fp->rf_time.tv_sec ? " minutes." : "",
|
|
836 |
mount_pt ? " \n Client's lease expired on mount " : "",
|
|
837 |
mount_pt ? mount_pt : "");
|
|
838 |
}
|
|
839 |
|
|
840 |
if (mount_pt) {
|
|
841 |
len = strlen(mount_pt) + 1;
|
|
842 |
kmem_free(mount_pt, len);
|
|
843 |
}
|
|
844 |
|
|
845 |
/* free the fact struct itself */
|
|
846 |
if (fp)
|
|
847 |
kmem_free(fp, sizeof (*fp));
|
|
848 |
}
|
|
849 |
|
|
850 |
/*
|
|
851 |
* Print an event message to /var/adm/messages
|
|
852 |
* The last argument to this fuction dictates the repeat status
|
|
853 |
* of the event. If set to 1, it means that we are dumping this
|
|
854 |
* event and it will _never_ be printed after this time. Else if
|
|
855 |
* set to 0 it will be printed again.
|
|
856 |
*/
|
|
857 |
static void
|
|
858 |
queue_print_event(nfs4_debug_msg_t *msg, mntinfo4_t *mi, int dump)
|
|
859 |
{
|
|
860 |
nfs4_revent_t *ep;
|
|
861 |
zoneid_t zoneid;
|
|
862 |
|
|
863 |
ep = &msg->rmsg_u.msg_event;
|
|
864 |
zoneid = mi->mi_zone->zone_id;
|
|
865 |
|
|
866 |
switch (ep->re_type) {
|
|
867 |
case RE_BAD_SEQID:
|
|
868 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
869 |
"Operation %s for file %s (rnode_pt 0x%p), pid %d using "
|
|
870 |
"seqid %d got %s. Last good seqid was %d for "
|
|
871 |
"operation %s.",
|
|
872 |
msg->msg_srv, msg->msg_mntpt,
|
|
873 |
nfs4_ctags[ep->re_tag1].ct_str, ep->re_char1,
|
|
874 |
(void *)ep->re_rp1, ep->re_pid, ep->re_seqid1,
|
|
875 |
nfs4_stat_to_str(ep->re_stat4), ep->re_seqid2,
|
|
876 |
nfs4_ctags[ep->re_tag2].ct_str);
|
|
877 |
break;
|
|
878 |
case RE_BADHANDLE:
|
|
879 |
ASSERT(ep->re_rp1 != NULL);
|
|
880 |
if (ep->re_char1 != NULL) {
|
|
881 |
zcmn_err(zoneid, CE_NOTE,
|
|
882 |
"![NFS4][Server: %s][Mntpt: %s]"
|
|
883 |
"server %s said filehandle was "
|
|
884 |
"invalid for file: %s (rnode_pt 0x%p) on mount %s",
|
|
885 |
msg->msg_srv, msg->msg_mntpt, msg->msg_srv,
|
|
886 |
ep->re_char1, (void *)ep->re_rp1, msg->msg_mntpt);
|
|
887 |
} else {
|
|
888 |
zcmn_err(zoneid, CE_NOTE,
|
|
889 |
"![NFS4][Server: %s][Mntpt: %s]"
|
|
890 |
"server %s said filehandle was "
|
|
891 |
"invalid for file: (rnode_pt 0x%p) on mount %s"
|
|
892 |
" for fh:", msg->msg_srv, msg->msg_mntpt,
|
|
893 |
msg->msg_srv, (void *)ep->re_rp1, msg->msg_mntpt);
|
|
894 |
sfh4_printfhandle(ep->re_rp1->r_fh);
|
|
895 |
}
|
|
896 |
break;
|
|
897 |
case RE_CLIENTID:
|
|
898 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
899 |
"Can't recover clientid on mount point %s "
|
|
900 |
"(mi 0x%p) due to error %d (%s), for server %s. Marking "
|
|
901 |
"file system as unusable.",
|
|
902 |
msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt,
|
|
903 |
(void *)ep->re_mi, ep->re_uint,
|
|
904 |
nfs4_stat_to_str(ep->re_stat4),
|
|
905 |
msg->msg_srv);
|
|
906 |
break;
|
|
907 |
case RE_DEAD_FILE:
|
|
908 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
909 |
"File %s (rnode_pt: %p) was closed due to NFS "
|
|
910 |
"recovery error on server %s(%s %s)", msg->msg_srv,
|
|
911 |
msg->msg_mntpt, ep->re_char1, (void *)ep->re_rp1,
|
|
912 |
msg->msg_srv, ep->re_char2 ? ep->re_char2 : "",
|
|
913 |
ep->re_stat4 ? nfs4_stat_to_str(ep->re_stat4) : "");
|
|
914 |
break;
|
|
915 |
case RE_END:
|
|
916 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
917 |
"NFS Recovery done for mount %s (mi 0x%p) "
|
|
918 |
"on server %s, rnode_pt1 %s (0x%p), "
|
|
919 |
"rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt,
|
|
920 |
msg->msg_mntpt, (void *)ep->re_mi, msg->msg_srv,
|
|
921 |
ep->re_char1, (void *)ep->re_rp1, ep->re_char2,
|
|
922 |
(void *)ep->re_rp2);
|
|
923 |
break;
|
|
924 |
case RE_FAIL_RELOCK:
|
|
925 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
926 |
"Couldn't reclaim lock for pid %d for "
|
|
927 |
"file %s (rnode_pt 0x%p) on (server %s): error %d",
|
|
928 |
msg->msg_srv, msg->msg_mntpt, ep->re_pid, ep->re_char1,
|
|
929 |
(void *)ep->re_rp1, msg->msg_srv,
|
|
930 |
ep->re_uint ? ep->re_uint : ep->re_stat4);
|
|
931 |
break;
|
|
932 |
case RE_FAIL_REMAP_LEN:
|
|
933 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
934 |
"remap_lookup: server %s returned bad "
|
|
935 |
"fhandle length (%d)", msg->msg_srv, msg->msg_mntpt,
|
|
936 |
msg->msg_srv, ep->re_uint);
|
|
937 |
break;
|
|
938 |
case RE_FAIL_REMAP_OP:
|
|
939 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
940 |
"remap_lookup: didn't get expected OP_GETFH"
|
|
941 |
" for server %s", msg->msg_srv, msg->msg_mntpt,
|
|
942 |
msg->msg_srv);
|
|
943 |
break;
|
|
944 |
case RE_FAILOVER:
|
|
945 |
if (ep->re_char1)
|
|
946 |
zcmn_err(zoneid, CE_NOTE,
|
|
947 |
"![NFS4][Server: %s][Mntpt: %s]"
|
|
948 |
"failing over from %s to %s", msg->msg_srv,
|
|
949 |
msg->msg_mntpt, msg->msg_srv, ep->re_char1);
|
|
950 |
else
|
|
951 |
zcmn_err(zoneid, CE_NOTE,
|
|
952 |
"![NFS4][Server: %s][Mntpt: %s]"
|
|
953 |
"NFS4: failing over: selecting "
|
|
954 |
"original server %s", msg->msg_srv, msg->msg_mntpt,
|
|
955 |
msg->msg_srv);
|
|
956 |
break;
|
|
957 |
case RE_FILE_DIFF:
|
|
958 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
959 |
"File %s (rnode_pt: %p) on server %s was closed "
|
|
960 |
"and failed attempted failover since its is different than "
|
|
961 |
"the original file", msg->msg_srv, msg->msg_mntpt,
|
|
962 |
ep->re_char1, (void *)ep->re_rp1, msg->msg_srv);
|
|
963 |
break;
|
|
964 |
case RE_LOST_STATE:
|
|
965 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
966 |
"Lost %s request for fs %s, file %s (rnode_pt: 0x%p), "
|
|
967 |
"dir %s (0x%p) for server %s", msg->msg_srv, msg->msg_mntpt,
|
|
968 |
nfs4_op_to_str(ep->re_uint), msg->msg_mntpt,
|
|
969 |
ep->re_char1, (void *)ep->re_rp1, ep->re_char2,
|
|
970 |
(void *)ep->re_rp2, msg->msg_srv);
|
|
971 |
break;
|
|
972 |
case RE_OPENS_CHANGED:
|
|
973 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
974 |
"The number of open files to reopen changed "
|
|
975 |
"for mount %s mi 0x%p (old %d, new %d) on server %s",
|
|
976 |
msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt,
|
|
977 |
(void *)ep->re_mi, ep->re_uint, ep->re_pid, msg->msg_srv);
|
|
978 |
break;
|
|
979 |
case RE_SIGLOST:
|
|
980 |
case RE_SIGLOST_NO_DUMP:
|
|
981 |
if (ep->re_uint)
|
|
982 |
zcmn_err(zoneid, CE_NOTE,
|
|
983 |
"![NFS4][Server: %s][Mntpt: %s]"
|
|
984 |
"Process %d lost its locks on "
|
|
985 |
"file %s (rnode_pt: %p) due to NFS recovery error "
|
|
986 |
"(%d) on server %s.", msg->msg_srv, msg->msg_mntpt,
|
|
987 |
ep->re_pid, ep->re_char1, (void *)ep->re_rp1,
|
|
988 |
ep->re_uint, msg->msg_srv);
|
|
989 |
else
|
|
990 |
zcmn_err(zoneid, CE_NOTE,
|
|
991 |
"![NFS4][Server: %s][Mntpt: %s]"
|
|
992 |
"Process %d lost its locks on "
|
|
993 |
"file %s (rnode_pt: %p) due to NFS recovery error "
|
|
994 |
"(%s) on server %s.", msg->msg_srv, msg->msg_mntpt,
|
|
995 |
ep->re_pid, ep->re_char1, (void *)ep->re_rp1,
|
|
996 |
nfs4_stat_to_str(ep->re_stat4), msg->msg_srv);
|
|
997 |
break;
|
|
998 |
case RE_START:
|
|
999 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1000 |
"NFS Starting recovery for mount %s "
|
|
1001 |
"(mi 0x%p mi_recovflags [0x%x]) on server %s, "
|
|
1002 |
"rnode_pt1 %s (0x%p), rnode_pt2 %s (0x%p)", msg->msg_srv,
|
|
1003 |
msg->msg_mntpt, msg->msg_mntpt, (void *)ep->re_mi,
|
|
1004 |
ep->re_uint, msg->msg_srv, ep->re_char1, (void *)ep->re_rp1,
|
|
1005 |
ep->re_char2, (void *)ep->re_rp2);
|
|
1006 |
break;
|
|
1007 |
case RE_UNEXPECTED_ACTION:
|
|
1008 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1009 |
"NFS recovery: unexpected action (%s) on server %s",
|
|
1010 |
msg->msg_srv, msg->msg_mntpt,
|
|
1011 |
nfs4_recov_action_to_str(ep->re_uint), msg->msg_srv);
|
|
1012 |
break;
|
|
1013 |
case RE_UNEXPECTED_ERRNO:
|
|
1014 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1015 |
"NFS recovery: unexpected errno (%d) on server %s",
|
|
1016 |
msg->msg_srv, msg->msg_mntpt, ep->re_uint, msg->msg_srv);
|
|
1017 |
break;
|
|
1018 |
case RE_UNEXPECTED_STATUS:
|
|
1019 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1020 |
"NFS recovery: unexpected NFS status code (%s) "
|
|
1021 |
"on server %s", msg->msg_srv, msg->msg_mntpt,
|
|
1022 |
nfs4_stat_to_str(ep->re_stat4),
|
|
1023 |
msg->msg_srv);
|
|
1024 |
break;
|
|
1025 |
case RE_WRONGSEC:
|
|
1026 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1027 |
"NFS can't recover from NFS4ERR_WRONGSEC."
|
|
1028 |
" error %d for server %s: rnode_pt1 %s (0x%p)"
|
|
1029 |
" rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt,
|
|
1030 |
ep->re_uint, msg->msg_srv, ep->re_char1, (void *)ep->re_rp1,
|
|
1031 |
ep->re_char2, (void *)ep->re_rp2);
|
|
1032 |
break;
|
|
1033 |
case RE_LOST_STATE_BAD_OP:
|
|
1034 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1035 |
"NFS lost state with unrecognized op (%d)."
|
|
1036 |
" fs %s, server %s, pid %d, file %s (rnode_pt: 0x%p), "
|
|
1037 |
"dir %s (0x%p)", msg->msg_srv, msg->msg_mntpt,
|
|
1038 |
ep->re_uint, msg->msg_mntpt, msg->msg_srv, ep->re_pid,
|
|
1039 |
ep->re_char1, (void *)ep->re_rp1, ep->re_char2,
|
|
1040 |
(void *)ep->re_rp2);
|
|
1041 |
break;
|
|
1042 |
default:
|
|
1043 |
zcmn_err(zoneid, CE_WARN,
|
|
1044 |
"!queue_print_event: illegal event %d", ep->re_type);
|
|
1045 |
break;
|
|
1046 |
}
|
|
1047 |
|
|
1048 |
print_facts(msg, mi);
|
|
1049 |
|
|
1050 |
/*
|
|
1051 |
* If set this event will not be printed again and is considered
|
|
1052 |
* dumped.
|
|
1053 |
*/
|
|
1054 |
if (dump)
|
|
1055 |
msg->msg_status = NFS4_MS_NO_DUMP;
|
|
1056 |
}
|
|
1057 |
|
|
1058 |
/*
|
|
1059 |
* Print a fact message to /var/adm/messages
|
|
1060 |
*/
|
|
1061 |
static void
|
|
1062 |
queue_print_fact(nfs4_debug_msg_t *msg, int dump)
|
|
1063 |
{
|
|
1064 |
nfs4_rfact_t *fp;
|
|
1065 |
zoneid_t zoneid;
|
|
1066 |
|
|
1067 |
fp = &msg->rmsg_u.msg_fact;
|
|
1068 |
zoneid = getzoneid();
|
|
1069 |
|
|
1070 |
switch (fp->rf_type) {
|
|
1071 |
case RF_BADOWNER:
|
|
1072 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1073 |
"NFSMAPID_DOMAIN does not match the server: %s domain\n"
|
|
1074 |
"Please check configuration", msg->msg_srv, msg->msg_mntpt,
|
|
1075 |
msg->msg_srv);
|
|
1076 |
break;
|
|
1077 |
case RF_ERR:
|
|
1078 |
if (fp->rf_error)
|
|
1079 |
zcmn_err(zoneid, CE_NOTE,
|
|
1080 |
"![NFS4][Server: %s][Mntpt: %s]NFS op %s got "
|
|
1081 |
"error %d causing recovery action %s.%s",
|
|
1082 |
msg->msg_srv, msg->msg_mntpt,
|
|
1083 |
nfs4_op_to_str(fp->rf_op), fp->rf_error,
|
|
1084 |
nfs4_recov_action_to_str(fp->rf_action),
|
|
1085 |
fp->rf_reboot ?
|
|
1086 |
" Client also suspects that the server rebooted,"
|
|
1087 |
" or experienced a network partition." : "");
|
|
1088 |
else
|
|
1089 |
zcmn_err(zoneid, CE_NOTE,
|
|
1090 |
"![NFS4][Server: %s][Mntpt: %s]NFS op %s got "
|
|
1091 |
"error %s causing recovery action %s.%s",
|
|
1092 |
msg->msg_srv, msg->msg_mntpt,
|
|
1093 |
nfs4_op_to_str(fp->rf_op),
|
|
1094 |
nfs4_stat_to_str(fp->rf_stat4),
|
|
1095 |
nfs4_recov_action_to_str(fp->rf_action),
|
|
1096 |
fp->rf_reboot ?
|
|
1097 |
" Client also suspects that the server rebooted,"
|
|
1098 |
" or experienced a network partition." : "");
|
|
1099 |
break;
|
|
1100 |
case RF_RENEW_EXPIRED:
|
|
1101 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1102 |
"NFS4 renew thread detected client's "
|
|
1103 |
"lease has expired. Current open files/locks/IO may fail",
|
|
1104 |
msg->msg_srv, msg->msg_mntpt);
|
|
1105 |
break;
|
|
1106 |
case RF_SRV_NOT_RESPOND:
|
|
1107 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1108 |
"NFS server %s not responding; still trying\n",
|
|
1109 |
msg->msg_srv, msg->msg_mntpt, msg->msg_srv);
|
|
1110 |
break;
|
|
1111 |
case RF_SRV_OK:
|
|
1112 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1113 |
"NFS server %s ok", msg->msg_srv, msg->msg_mntpt,
|
|
1114 |
msg->msg_srv);
|
|
1115 |
break;
|
|
1116 |
case RF_SRVS_NOT_RESPOND:
|
|
1117 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1118 |
"NFS servers %s not responding; still trying", msg->msg_srv,
|
|
1119 |
msg->msg_mntpt, msg->msg_srv);
|
|
1120 |
break;
|
|
1121 |
case RF_SRVS_OK:
|
|
1122 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1123 |
"NFS servers %s ok", msg->msg_srv, msg->msg_mntpt,
|
|
1124 |
msg->msg_srv);
|
|
1125 |
break;
|
|
1126 |
case RF_DELMAP_CB_ERR:
|
|
1127 |
zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
|
|
1128 |
"NFS op %s got error %s when executing delmap on file %s "
|
|
1129 |
"(rnode_pt 0x%p).",
|
|
1130 |
msg->msg_srv, msg->msg_mntpt, nfs4_op_to_str(fp->rf_op),
|
|
1131 |
nfs4_stat_to_str(fp->rf_stat4), fp->rf_char1,
|
|
1132 |
(void *)fp->rf_rp1);
|
|
1133 |
break;
|
|
1134 |
default:
|
|
1135 |
zcmn_err(zoneid, CE_WARN, "!queue_print_fact: illegal fact %d",
|
|
1136 |
fp->rf_type);
|
|
1137 |
}
|
|
1138 |
|
|
1139 |
/*
|
|
1140 |
* If set this fact will not be printed again and is considered
|
|
1141 |
* dumped.
|
|
1142 |
*/
|
|
1143 |
if (dump)
|
|
1144 |
msg->msg_status = NFS4_MS_NO_DUMP;
|
|
1145 |
}
|
|
1146 |
|
|
1147 |
/*
|
|
1148 |
* Returns 1 if the entire queue should be dumped, 0 otherwise.
|
|
1149 |
*/
|
|
1150 |
static int
|
|
1151 |
id_to_dump_queue(nfs4_event_type_t id)
|
|
1152 |
{
|
|
1153 |
switch (id) {
|
|
1154 |
case RE_DEAD_FILE:
|
|
1155 |
case RE_SIGLOST:
|
|
1156 |
case RE_WRONGSEC:
|
|
1157 |
case RE_CLIENTID:
|
|
1158 |
return (1);
|
|
1159 |
default:
|
|
1160 |
return (0);
|
|
1161 |
}
|
|
1162 |
}
|
|
1163 |
|
|
1164 |
/*
|
|
1165 |
* Returns 1 if the event (but not the entire queue) should be printed;
|
|
1166 |
* 0 otherwise.
|
|
1167 |
*/
|
|
1168 |
static int
|
|
1169 |
id_to_dump_solo_event(nfs4_event_type_t id)
|
|
1170 |
{
|
|
1171 |
switch (id) {
|
|
1172 |
case RE_BAD_SEQID:
|
|
1173 |
case RE_BADHANDLE:
|
|
1174 |
case RE_FAIL_REMAP_LEN:
|
|
1175 |
case RE_FAIL_REMAP_OP:
|
|
1176 |
case RE_FAILOVER:
|
|
1177 |
case RE_OPENS_CHANGED:
|
|
1178 |
case RE_SIGLOST_NO_DUMP:
|
|
1179 |
case RE_UNEXPECTED_ACTION:
|
|
1180 |
case RE_UNEXPECTED_ERRNO:
|
|
1181 |
case RE_UNEXPECTED_STATUS:
|
|
1182 |
case RE_LOST_STATE_BAD_OP:
|
|
1183 |
return (1);
|
|
1184 |
default:
|
|
1185 |
return (0);
|
|
1186 |
}
|
|
1187 |
}
|
|
1188 |
|
|
1189 |
/*
|
|
1190 |
* Returns 1 if the fact (but not the entire queue) should be printed;
|
|
1191 |
* 0 otherwise.
|
|
1192 |
*/
|
|
1193 |
static int
|
|
1194 |
id_to_dump_solo_fact(nfs4_fact_type_t id)
|
|
1195 |
{
|
|
1196 |
switch (id) {
|
|
1197 |
case RF_SRV_NOT_RESPOND:
|
|
1198 |
case RF_SRV_OK:
|
|
1199 |
case RF_SRVS_NOT_RESPOND:
|
|
1200 |
case RF_SRVS_OK:
|
|
1201 |
return (1);
|
|
1202 |
default:
|
|
1203 |
return (0);
|
|
1204 |
}
|
|
1205 |
}
|
|
1206 |
|
|
1207 |
/*
|
|
1208 |
* Update a kernel stat
|
|
1209 |
*/
|
|
1210 |
static void
|
|
1211 |
update_recov_kstats(nfs4_debug_msg_t *msg, mntinfo4_t *mi)
|
|
1212 |
{
|
|
1213 |
rkstat_t *rsp;
|
|
1214 |
|
|
1215 |
if (!mi->mi_recov_ksp)
|
|
1216 |
return;
|
|
1217 |
|
|
1218 |
rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data;
|
|
1219 |
|
|
1220 |
if (msg->msg_type == RM_EVENT) {
|
|
1221 |
switch (msg->rmsg_u.msg_event.re_type) {
|
|
1222 |
case RE_BADHANDLE:
|
|
1223 |
rsp->badhandle.value.ul++;
|
|
1224 |
break;
|
|
1225 |
case RE_CLIENTID:
|
|
1226 |
rsp->clientid.value.ul++;
|
|
1227 |
break;
|
|
1228 |
case RE_DEAD_FILE:
|
|
1229 |
rsp->dead_file.value.ul++;
|
|
1230 |
break;
|
|
1231 |
case RE_FAIL_RELOCK:
|
|
1232 |
rsp->fail_relock.value.ul++;
|
|
1233 |
break;
|
|
1234 |
case RE_FILE_DIFF:
|
|
1235 |
rsp->file_diff.value.ul++;
|
|
1236 |
break;
|
|
1237 |
case RE_OPENS_CHANGED:
|
|
1238 |
rsp->opens_changed.value.ul++;
|
|
1239 |
break;
|
|
1240 |
case RE_SIGLOST:
|
|
1241 |
case RE_SIGLOST_NO_DUMP:
|
|
1242 |
rsp->siglost.value.ul++;
|
|
1243 |
break;
|
|
1244 |
case RE_UNEXPECTED_ACTION:
|
|
1245 |
rsp->unexp_action.value.ul++;
|
|
1246 |
break;
|
|
1247 |
case RE_UNEXPECTED_ERRNO:
|
|
1248 |
rsp->unexp_errno.value.ul++;
|
|
1249 |
break;
|
|
1250 |
case RE_UNEXPECTED_STATUS:
|
|
1251 |
rsp->unexp_status.value.ul++;
|
|
1252 |
break;
|
|
1253 |
case RE_WRONGSEC:
|
|
1254 |
rsp->wrongsec.value.ul++;
|
|
1255 |
break;
|
|
1256 |
case RE_LOST_STATE_BAD_OP:
|
|
1257 |
rsp->lost_state_bad_op.value.ul++;
|
|
1258 |
break;
|
|
1259 |
default:
|
|
1260 |
break;
|
|
1261 |
}
|
|
1262 |
} else if (msg->msg_type == RM_FACT) {
|
|
1263 |
switch (msg->rmsg_u.msg_fact.rf_type) {
|
|
1264 |
case RF_BADOWNER:
|
|
1265 |
rsp->badowner.value.ul++;
|
|
1266 |
break;
|
|
1267 |
case RF_SRV_NOT_RESPOND:
|
|
1268 |
rsp->not_responding.value.ul++;
|
|
1269 |
break;
|
|
1270 |
default:
|
|
1271 |
break;
|
|
1272 |
}
|
|
1273 |
}
|
|
1274 |
}
|
|
1275 |
|
|
1276 |
/*
|
|
1277 |
* Dump the mi's mi_msg_list of recovery messages.
|
|
1278 |
*/
|
|
1279 |
static void
|
|
1280 |
dump_queue(mntinfo4_t *mi, nfs4_debug_msg_t *msg)
|
|
1281 |
{
|
|
1282 |
nfs4_debug_msg_t *tmp_msg;
|
|
1283 |
|
|
1284 |
ASSERT(mutex_owned(&mi->mi_msg_list_lock));
|
|
1285 |
|
|
1286 |
/* update kstats */
|
|
1287 |
update_recov_kstats(msg, mi);
|
|
1288 |
|
|
1289 |
/*
|
|
1290 |
* If we aren't supposed to dump the queue then see if we
|
|
1291 |
* should just print this single message, then return.
|
|
1292 |
*/
|
|
1293 |
if (!id_to_dump_queue(msg->rmsg_u.msg_event.re_type)) {
|
|
1294 |
if (id_to_dump_solo_event(msg->rmsg_u.msg_event.re_type))
|
|
1295 |
queue_print_event(msg, mi, 0);
|
|
1296 |
return;
|
|
1297 |
}
|
|
1298 |
|
|
1299 |
/*
|
|
1300 |
* Write all events/facts in the queue that haven't been
|
|
1301 |
* previously written to disk.
|
|
1302 |
*/
|
|
1303 |
tmp_msg = list_head(&mi->mi_msg_list);
|
|
1304 |
while (tmp_msg) {
|
|
1305 |
if (tmp_msg->msg_status == NFS4_MS_DUMP) {
|
|
1306 |
if (tmp_msg->msg_type == RM_EVENT)
|
|
1307 |
queue_print_event(tmp_msg, mi, 1);
|
|
1308 |
else if (tmp_msg->msg_type == RM_FACT)
|
|
1309 |
queue_print_fact(tmp_msg, 1);
|
|
1310 |
}
|
|
1311 |
tmp_msg = list_next(&mi->mi_msg_list, tmp_msg);
|
|
1312 |
}
|
|
1313 |
}
|
|
1314 |
|
|
1315 |
/*
|
|
1316 |
* Places the event into mi's debug recovery message queue. Some of the
|
|
1317 |
* fields can be overloaded to be a generic value, depending on the event
|
|
1318 |
* type. These include "count", "why".
|
|
1319 |
*/
|
|
1320 |
void
|
|
1321 |
nfs4_queue_event(nfs4_event_type_t id, mntinfo4_t *mi, char *server1,
|
|
1322 |
uint_t count, vnode_t *vp1, vnode_t *vp2, nfsstat4 nfs4_error,
|
|
1323 |
char *why, pid_t pid, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2,
|
|
1324 |
seqid4 seqid1, seqid4 seqid2)
|
|
1325 |
{
|
|
1326 |
nfs4_debug_msg_t *msg;
|
|
1327 |
nfs4_revent_t *ep;
|
|
1328 |
char *cur_srv;
|
|
1329 |
rnode4_t *rp1 = NULL, *rp2 = NULL;
|
|
1330 |
refstr_t *mntpt;
|
|
1331 |
|
|
1332 |
ASSERT(mi != NULL);
|
|
1333 |
if (vp1)
|
|
1334 |
rp1 = VTOR4(vp1);
|
|
1335 |
if (vp2)
|
|
1336 |
rp2 = VTOR4(vp2);
|
|
1337 |
|
|
1338 |
/*
|
|
1339 |
* Initialize the message with the relevent server/mount_pt/time
|
|
1340 |
* information. Also place the relevent event related info.
|
|
1341 |
*/
|
|
1342 |
msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
|
|
1343 |
msg->msg_type = RM_EVENT;
|
|
1344 |
msg->msg_status = NFS4_MS_DUMP;
|
|
1345 |
ep = &msg->rmsg_u.msg_event;
|
|
1346 |
ep->re_type = id;
|
|
1347 |
gethrestime(&msg->msg_time);
|
|
1348 |
|
|
1349 |
cur_srv = mi->mi_curr_serv->sv_hostname;
|
|
1350 |
msg->msg_srv = strdup(cur_srv);
|
|
1351 |
mntpt = vfs_getmntpoint(mi->mi_vfsp);
|
|
1352 |
msg->msg_mntpt = strdup(refstr_value(mntpt));
|
|
1353 |
refstr_rele(mntpt);
|
|
1354 |
|
|
1355 |
set_event(id, ep, mi, rp1, rp2, count, pid, nfs4_error, server1,
|
|
1356 |
why, tag1, tag2, seqid1, seqid2);
|
|
1357 |
|
|
1358 |
mutex_enter(&mi->mi_msg_list_lock);
|
|
1359 |
|
|
1360 |
/* if this event is the same as the last event, drop it */
|
|
1361 |
if (events_same(list_tail(&mi->mi_msg_list), msg, mi)) {
|
|
1362 |
mutex_exit(&mi->mi_msg_list_lock);
|
|
1363 |
nfs4_free_msg(msg);
|
|
1364 |
return;
|
|
1365 |
}
|
|
1366 |
|
|
1367 |
/* queue the message at the end of the list */
|
|
1368 |
list_insert_tail(&mi->mi_msg_list, msg);
|
|
1369 |
|
|
1370 |
dump_queue(mi, msg);
|
|
1371 |
|
|
1372 |
if (mi->mi_msg_count == nfs4_msg_max) {
|
|
1373 |
nfs4_debug_msg_t *rm_msg;
|
|
1374 |
|
|
1375 |
/* remove the queue'd message at the front of the list */
|
|
1376 |
rm_msg = list_head(&mi->mi_msg_list);
|
|
1377 |
list_remove(&mi->mi_msg_list, rm_msg);
|
|
1378 |
mutex_exit(&mi->mi_msg_list_lock);
|
|
1379 |
nfs4_free_msg(rm_msg);
|
|
1380 |
} else {
|
|
1381 |
mi->mi_msg_count++;
|
|
1382 |
mutex_exit(&mi->mi_msg_list_lock);
|
|
1383 |
}
|
|
1384 |
}
|
|
1385 |
|
|
1386 |
/*
|
|
1387 |
* Places the fact into mi's debug recovery messages queue.
|
|
1388 |
*/
|
|
1389 |
void
|
|
1390 |
nfs4_queue_fact(nfs4_fact_type_t fid, mntinfo4_t *mi, nfsstat4 stat4,
|
|
1391 |
nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, char *srvname,
|
|
1392 |
int error, vnode_t *vp)
|
|
1393 |
{
|
|
1394 |
nfs4_debug_msg_t *msg;
|
|
1395 |
nfs4_rfact_t *fp;
|
|
1396 |
char *cur_srv;
|
|
1397 |
refstr_t *mntpt;
|
|
1398 |
|
|
1399 |
/*
|
|
1400 |
* Initialize the message with the relevent server/mount_pt/time
|
|
1401 |
* information. Also place the relevent fact related info.
|
|
1402 |
*/
|
|
1403 |
msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
|
|
1404 |
msg->msg_type = RM_FACT;
|
|
1405 |
msg->msg_status = NFS4_MS_DUMP;
|
|
1406 |
gethrestime(&msg->msg_time);
|
|
1407 |
|
|
1408 |
if (srvname)
|
|
1409 |
cur_srv = srvname;
|
|
1410 |
else
|
|
1411 |
cur_srv = mi->mi_curr_serv->sv_hostname;
|
|
1412 |
|
|
1413 |
msg->msg_srv = strdup(cur_srv);
|
|
1414 |
mntpt = vfs_getmntpoint(mi->mi_vfsp);
|
|
1415 |
msg->msg_mntpt = strdup(refstr_value(mntpt));
|
|
1416 |
refstr_rele(mntpt);
|
|
1417 |
|
|
1418 |
fp = &msg->rmsg_u.msg_fact;
|
|
1419 |
fp->rf_type = fid;
|
|
1420 |
fp->rf_status = RFS_NO_INSPECT;
|
|
1421 |
set_fact(fid, fp, stat4, raction, op, reboot, error, vp);
|
|
1422 |
|
|
1423 |
update_recov_kstats(msg, mi);
|
|
1424 |
|
|
1425 |
mutex_enter(&mi->mi_msg_list_lock);
|
|
1426 |
|
|
1427 |
/* if this fact is the same as the last fact, drop it */
|
|
1428 |
if (facts_same(list_tail(&mi->mi_msg_list), msg, mi)) {
|
|
1429 |
mutex_exit(&mi->mi_msg_list_lock);
|
|
1430 |
nfs4_free_msg(msg);
|
|
1431 |
return;
|
|
1432 |
}
|
|
1433 |
|
|
1434 |
/* queue the message at the end of the list */
|
|
1435 |
list_insert_tail(&mi->mi_msg_list, msg);
|
|
1436 |
|
|
1437 |
if (id_to_dump_solo_fact(msg->rmsg_u.msg_fact.rf_type))
|
|
1438 |
queue_print_fact(msg, 0);
|
|
1439 |
|
|
1440 |
if (mi->mi_msg_count == nfs4_msg_max) {
|
|
1441 |
nfs4_debug_msg_t *rm_msg;
|
|
1442 |
|
|
1443 |
/* remove the queue'd message at the front of the list */
|
|
1444 |
rm_msg = list_head(&mi->mi_msg_list);
|
|
1445 |
list_remove(&mi->mi_msg_list, rm_msg);
|
|
1446 |
mutex_exit(&mi->mi_msg_list_lock);
|
|
1447 |
nfs4_free_msg(rm_msg);
|
|
1448 |
} else {
|
|
1449 |
mi->mi_msg_count++;
|
|
1450 |
mutex_exit(&mi->mi_msg_list_lock);
|
|
1451 |
}
|
|
1452 |
}
|
|
1453 |
|
|
1454 |
/*
|
|
1455 |
* Initialize the 'mi_recov_kstat' kstat.
|
|
1456 |
*/
|
|
1457 |
void
|
|
1458 |
nfs4_mnt_recov_kstat_init(vfs_t *vfsp)
|
|
1459 |
{
|
|
1460 |
mntinfo4_t *mi = VFTOMI4(vfsp);
|
|
1461 |
kstat_t *ksp;
|
|
1462 |
zoneid_t zoneid = mi->mi_zone->zone_id;
|
|
1463 |
|
|
1464 |
/*
|
|
1465 |
* Create the version specific kstats.
|
|
1466 |
*
|
|
1467 |
* PSARC 2001/697 Contract Private Interface
|
|
1468 |
* All nfs kstats are under SunMC contract
|
|
1469 |
* Please refer to the PSARC listed above and contact
|
|
1470 |
* SunMC before making any changes!
|
|
1471 |
*
|
|
1472 |
* Changes must be reviewed by Solaris File Sharing
|
|
1473 |
* Changes must be communicated to [email protected]
|
|
1474 |
*
|
|
1475 |
*/
|
|
1476 |
|
|
1477 |
if ((ksp = kstat_create_zone("nfs", getminor(vfsp->vfs_dev),
|
|
1478 |
"mi_recov_kstat", "misc", KSTAT_TYPE_NAMED,
|
|
1479 |
sizeof (rkstat_t) / sizeof (kstat_named_t),
|
|
1480 |
KSTAT_FLAG_WRITABLE, zoneid)) == NULL) {
|
|
1481 |
mi->mi_recov_ksp = NULL;
|
|
1482 |
zcmn_err(GLOBAL_ZONEID, CE_NOTE,
|
|
1483 |
"!mi_recov_kstat for mi %p failed\n",
|
|
1484 |
(void *)mi);
|
|
1485 |
return;
|
|
1486 |
}
|
|
1487 |
if (zoneid != GLOBAL_ZONEID)
|
|
1488 |
kstat_zone_add(ksp, GLOBAL_ZONEID);
|
|
1489 |
mi->mi_recov_ksp = ksp;
|
|
1490 |
bcopy(&rkstat_template, ksp->ks_data, sizeof (rkstat_t));
|
|
1491 |
kstat_install(ksp);
|
|
1492 |
}
|
|
1493 |
|
|
1494 |
/*
|
|
1495 |
* Increment the "delay" kstat.
|
|
1496 |
*/
|
|
1497 |
void
|
|
1498 |
nfs4_mi_kstat_inc_delay(mntinfo4_t *mi)
|
|
1499 |
{
|
|
1500 |
rkstat_t *rsp;
|
|
1501 |
|
|
1502 |
if (!mi->mi_recov_ksp)
|
|
1503 |
return;
|
|
1504 |
|
|
1505 |
rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data;
|
|
1506 |
rsp->delay.value.ul++;
|
|
1507 |
}
|
|
1508 |
|
|
1509 |
/*
|
|
1510 |
* Increment the "no_grace" kstat.
|
|
1511 |
*/
|
|
1512 |
void
|
|
1513 |
nfs4_mi_kstat_inc_no_grace(mntinfo4_t *mi)
|
|
1514 |
{
|
|
1515 |
rkstat_t *rsp;
|
|
1516 |
|
|
1517 |
if (!mi->mi_recov_ksp)
|
|
1518 |
return;
|
|
1519 |
|
|
1520 |
rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data;
|
|
1521 |
rsp->no_grace.value.ul++;
|
|
1522 |
}
|
|
1523 |
|
|
1524 |
/*
|
|
1525 |
* Allocate and copy a string. XXX There really ought to be a single
|
|
1526 |
* strdup() for the entire kernel.
|
|
1527 |
*/
|
|
1528 |
static char *
|
|
1529 |
strdup(const char *s)
|
|
1530 |
{
|
|
1531 |
size_t len;
|
|
1532 |
char *new;
|
|
1533 |
|
|
1534 |
len = strlen(s);
|
|
1535 |
new = kmem_alloc(len + 1, KM_SLEEP);
|
|
1536 |
bcopy(s, new, len);
|
|
1537 |
new[len] = '\0';
|
|
1538 |
|
|
1539 |
return (new);
|
|
1540 |
}
|