author | eschrock |
Fri, 26 Oct 2007 13:47:19 -0700 | |
changeset 5345 | 44060de1d838 |
parent 5238 | 37127716670e |
child 6073 | 47f6aa7a8077 |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
1712 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
0 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
21 |
/* |
|
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
22 |
* Copyright 2007 Sun Microsystems, Inc. All rights reserved. |
0 | 23 |
* Use is subject to license terms. |
24 |
*/ |
|
25 |
||
26 |
#pragma ident "%Z%%M% %I% %E% SMI" |
|
27 |
||
28 |
/* |
|
29 |
* method.c - method execution functions |
|
30 |
* |
|
31 |
* This file contains the routines needed to run a method: a fork(2)-exec(2) |
|
32 |
* invocation monitored using either the contract filesystem or waitpid(2). |
|
33 |
* (Plain fork1(2) support is provided in fork.c.) |
|
34 |
* |
|
35 |
* Contract Transfer |
|
36 |
* When we restart a service, we want to transfer any contracts that the old |
|
37 |
* service's contract inherited. This means that (a) we must not abandon the |
|
38 |
* old contract when the service dies and (b) we must write the id of the old |
|
39 |
* contract into the terms of the new contract. There should be limits to |
|
40 |
* (a), though, since we don't want to keep the contract around forever. To |
|
41 |
* this end we'll say that services in the offline state may have a contract |
|
42 |
* to be transfered and services in the disabled or maintenance states cannot. |
|
43 |
* This means that when a service transitions from online (or degraded) to |
|
44 |
* offline, the contract should be preserved, and when the service transitions |
|
45 |
* from offline to online (i.e., the start method), we'll transfer inherited |
|
46 |
* contracts. |
|
47 |
*/ |
|
48 |
||
49 |
#include <sys/contract/process.h> |
|
50 |
#include <sys/ctfs.h> |
|
51 |
#include <sys/stat.h> |
|
52 |
#include <sys/time.h> |
|
53 |
#include <sys/types.h> |
|
54 |
#include <sys/uio.h> |
|
55 |
#include <sys/wait.h> |
|
56 |
#include <alloca.h> |
|
57 |
#include <assert.h> |
|
58 |
#include <errno.h> |
|
59 |
#include <fcntl.h> |
|
60 |
#include <libcontract.h> |
|
61 |
#include <libcontract_priv.h> |
|
62 |
#include <libgen.h> |
|
63 |
#include <librestart.h> |
|
64 |
#include <libscf.h> |
|
65 |
#include <limits.h> |
|
66 |
#include <port.h> |
|
67 |
#include <sac.h> |
|
68 |
#include <signal.h> |
|
69 |
#include <stdlib.h> |
|
70 |
#include <string.h> |
|
71 |
#include <strings.h> |
|
72 |
#include <unistd.h> |
|
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
73 |
#include <atomic.h> |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
74 |
#include <poll.h> |
0 | 75 |
|
76 |
#include "startd.h" |
|
77 |
||
78 |
#define SBIN_SH "/sbin/sh" |
|
79 |
||
80 |
/* |
|
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
81 |
* Used to tell if contracts are in the process of being |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
82 |
* stored into the svc.startd internal hash table. |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
83 |
*/ |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
84 |
volatile uint16_t storing_contract = 0; |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
85 |
|
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
86 |
/* |
0 | 87 |
* Mapping from restart_on method-type to contract events. Must correspond to |
88 |
* enum method_restart_t. |
|
89 |
*/ |
|
90 |
static uint_t method_events[] = { |
|
91 |
/* METHOD_RESTART_ALL */ |
|
92 |
CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE | CT_PR_EV_EMPTY, |
|
93 |
/* METHOD_RESTART_EXTERNAL_FAULT */ |
|
94 |
CT_PR_EV_HWERR | CT_PR_EV_SIGNAL, |
|
95 |
/* METHOD_RESTART_ANY_FAULT */ |
|
96 |
CT_PR_EV_HWERR | CT_PR_EV_SIGNAL | CT_PR_EV_CORE |
|
97 |
}; |
|
98 |
||
99 |
/* |
|
100 |
* method_record_start(restarter_inst_t *) |
|
101 |
* Record a service start for rate limiting. Place the current time |
|
102 |
* in the circular array of instance starts. |
|
103 |
*/ |
|
104 |
static void |
|
105 |
method_record_start(restarter_inst_t *inst) |
|
106 |
{ |
|
107 |
int index = inst->ri_start_index++ % RINST_START_TIMES; |
|
108 |
||
109 |
inst->ri_start_time[index] = gethrtime(); |
|
110 |
} |
|
111 |
||
112 |
/* |
|
113 |
* method_rate_critical(restarter_inst_t *) |
|
114 |
* Return true if the average start interval is less than the permitted |
|
115 |
* interval. Implicit success if insufficient measurements for an |
|
116 |
* average exist. |
|
117 |
*/ |
|
118 |
static int |
|
119 |
method_rate_critical(restarter_inst_t *inst) |
|
120 |
{ |
|
121 |
uint_t n = inst->ri_start_index; |
|
122 |
hrtime_t avg_ns = 0; |
|
123 |
||
124 |
if (inst->ri_start_index < RINST_START_TIMES) |
|
125 |
return (0); |
|
126 |
||
127 |
avg_ns = |
|
128 |
(inst->ri_start_time[(n - 1) % RINST_START_TIMES] - |
|
129 |
inst->ri_start_time[n % RINST_START_TIMES]) / |
|
130 |
(RINST_START_TIMES - 1); |
|
131 |
||
132 |
return (avg_ns < RINST_FAILURE_RATE_NS); |
|
133 |
} |
|
134 |
||
135 |
/* |
|
136 |
* int method_is_transient() |
|
137 |
* Determine if the method for the given instance is transient, |
|
138 |
* from a contract perspective. Return 1 if it is, and 0 if it isn't. |
|
139 |
*/ |
|
140 |
static int |
|
141 |
method_is_transient(restarter_inst_t *inst, int type) |
|
142 |
{ |
|
143 |
if (instance_is_transient_style(inst) || type != METHOD_START) |
|
144 |
return (1); |
|
145 |
else |
|
146 |
return (0); |
|
147 |
} |
|
148 |
||
149 |
/* |
|
150 |
* void method_store_contract() |
|
151 |
* Store the newly created contract id into local structures and |
|
152 |
* the repository. If the repository connection is broken it is rebound. |
|
153 |
*/ |
|
154 |
static void |
|
155 |
method_store_contract(restarter_inst_t *inst, int type, ctid_t *cid) |
|
156 |
{ |
|
157 |
int r; |
|
158 |
boolean_t primary; |
|
159 |
||
160 |
if (errno = contract_latest(cid)) |
|
161 |
uu_die("%s: Couldn't get new contract's id", inst->ri_i.i_fmri); |
|
162 |
||
163 |
primary = !method_is_transient(inst, type); |
|
164 |
||
165 |
if (!primary) { |
|
166 |
if (inst->ri_i.i_transient_ctid != 0) { |
|
167 |
log_framework(LOG_INFO, |
|
168 |
"%s: transient ctid expected to be 0 but " |
|
169 |
"was set to %ld\n", inst->ri_i.i_fmri, |
|
170 |
inst->ri_i.i_transient_ctid); |
|
171 |
} |
|
172 |
||
173 |
inst->ri_i.i_transient_ctid = *cid; |
|
174 |
} else { |
|
175 |
if (inst->ri_i.i_primary_ctid != 0) { |
|
176 |
/* |
|
177 |
* There was an old contract that we transferred. |
|
178 |
* Remove it. |
|
179 |
*/ |
|
180 |
method_remove_contract(inst, B_TRUE, B_FALSE); |
|
181 |
} |
|
182 |
||
183 |
if (inst->ri_i.i_primary_ctid != 0) { |
|
184 |
log_framework(LOG_INFO, |
|
185 |
"%s: primary ctid expected to be 0 but " |
|
186 |
"was set to %ld\n", inst->ri_i.i_fmri, |
|
187 |
inst->ri_i.i_primary_ctid); |
|
188 |
} |
|
189 |
||
190 |
inst->ri_i.i_primary_ctid = *cid; |
|
191 |
inst->ri_i.i_primary_ctid_stopped = 0; |
|
192 |
||
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
193 |
log_framework(LOG_DEBUG, "Storing primary contract %ld for " |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
194 |
"%s.\n", *cid, inst->ri_i.i_fmri); |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
195 |
|
0 | 196 |
contract_hash_store(*cid, inst->ri_id); |
197 |
} |
|
198 |
||
199 |
again: |
|
200 |
if (inst->ri_mi_deleted) |
|
201 |
return; |
|
202 |
||
203 |
r = restarter_store_contract(inst->ri_m_inst, *cid, primary ? |
|
204 |
RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT); |
|
205 |
switch (r) { |
|
206 |
case 0: |
|
207 |
break; |
|
208 |
||
209 |
case ECANCELED: |
|
210 |
inst->ri_mi_deleted = B_TRUE; |
|
211 |
break; |
|
212 |
||
213 |
case ECONNABORTED: |
|
214 |
libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst)); |
|
215 |
/* FALLTHROUGH */ |
|
216 |
||
217 |
case EBADF: |
|
218 |
libscf_reget_instance(inst); |
|
219 |
goto again; |
|
220 |
||
221 |
case ENOMEM: |
|
222 |
case EPERM: |
|
223 |
case EACCES: |
|
224 |
case EROFS: |
|
225 |
uu_die("%s: Couldn't store contract id %ld", |
|
226 |
inst->ri_i.i_fmri, *cid); |
|
227 |
/* NOTREACHED */ |
|
228 |
||
229 |
case EINVAL: |
|
230 |
default: |
|
231 |
bad_error("restarter_store_contract", r); |
|
232 |
} |
|
233 |
} |
|
234 |
||
235 |
/* |
|
236 |
* void method_remove_contract() |
|
237 |
* Remove any non-permanent contracts from internal structures and |
|
238 |
* the repository, then abandon them. |
|
239 |
* Returns |
|
240 |
* 0 - success |
|
241 |
* ECANCELED - inst was deleted from the repository |
|
242 |
* |
|
243 |
* If the repository connection was broken, it is rebound. |
|
244 |
*/ |
|
245 |
void |
|
246 |
method_remove_contract(restarter_inst_t *inst, boolean_t primary, |
|
247 |
boolean_t abandon) |
|
248 |
{ |
|
249 |
ctid_t * const ctidp = primary ? &inst->ri_i.i_primary_ctid : |
|
250 |
&inst->ri_i.i_transient_ctid; |
|
251 |
||
252 |
int r; |
|
253 |
||
254 |
assert(*ctidp != 0); |
|
255 |
||
256 |
log_framework(LOG_DEBUG, "Removing %s contract %lu for %s.\n", |
|
257 |
primary ? "primary" : "transient", *ctidp, inst->ri_i.i_fmri); |
|
258 |
||
259 |
if (abandon) |
|
260 |
contract_abandon(*ctidp); |
|
261 |
||
262 |
again: |
|
263 |
if (inst->ri_mi_deleted) { |
|
264 |
r = ECANCELED; |
|
265 |
goto out; |
|
266 |
} |
|
267 |
||
268 |
r = restarter_remove_contract(inst->ri_m_inst, *ctidp, primary ? |
|
269 |
RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT); |
|
270 |
switch (r) { |
|
271 |
case 0: |
|
272 |
break; |
|
273 |
||
274 |
case ECANCELED: |
|
275 |
inst->ri_mi_deleted = B_TRUE; |
|
276 |
break; |
|
277 |
||
278 |
case ECONNABORTED: |
|
279 |
libscf_handle_rebind(scf_instance_handle(inst->ri_m_inst)); |
|
280 |
/* FALLTHROUGH */ |
|
281 |
||
282 |
case EBADF: |
|
283 |
libscf_reget_instance(inst); |
|
284 |
goto again; |
|
285 |
||
286 |
case ENOMEM: |
|
287 |
case EPERM: |
|
288 |
case EACCES: |
|
289 |
case EROFS: |
|
290 |
log_error(LOG_INFO, "%s: Couldn't remove contract id %ld: " |
|
291 |
"%s.\n", inst->ri_i.i_fmri, *ctidp, strerror(r)); |
|
292 |
break; |
|
293 |
||
294 |
case EINVAL: |
|
295 |
default: |
|
296 |
bad_error("restarter_remove_contract", r); |
|
297 |
} |
|
298 |
||
299 |
out: |
|
300 |
if (primary) |
|
301 |
contract_hash_remove(*ctidp); |
|
302 |
||
303 |
*ctidp = 0; |
|
304 |
} |
|
305 |
||
306 |
/* |
|
307 |
* int method_ready_contract(restarter_inst_t *, int, method_restart_t, int) |
|
308 |
* |
|
309 |
* Activate a contract template for the type method of inst. type, |
|
310 |
* restart_on, and cte_mask dictate the critical events term of the contract. |
|
311 |
* Returns |
|
312 |
* 0 - success |
|
313 |
* ECANCELED - inst has been deleted from the repository |
|
314 |
*/ |
|
315 |
static int |
|
316 |
method_ready_contract(restarter_inst_t *inst, int type, |
|
317 |
method_restart_t restart_on, uint_t cte_mask) |
|
318 |
{ |
|
319 |
int tmpl, err, istrans, iswait, ret; |
|
320 |
uint_t cevents, fevents; |
|
321 |
||
322 |
/* |
|
323 |
* Correctly supporting wait-style services is tricky without |
|
324 |
* rearchitecting startd to cope with multiple event sources |
|
325 |
* simultaneously trying to stop an instance. Until a better |
|
326 |
* solution is implemented, we avoid this problem for |
|
327 |
* wait-style services by making contract events fatal and |
|
328 |
* letting the wait code alone handle stopping the service. |
|
329 |
*/ |
|
330 |
iswait = instance_is_wait_style(inst); |
|
331 |
istrans = method_is_transient(inst, type); |
|
332 |
||
333 |
tmpl = open64(CTFS_ROOT "/process/template", O_RDWR); |
|
334 |
if (tmpl == -1) |
|
335 |
uu_die("Could not create contract template"); |
|
336 |
||
337 |
/* |
|
338 |
* We assume non-login processes are unlikely to create |
|
339 |
* multiple process groups, and set CT_PR_PGRPONLY for all |
|
340 |
* wait-style services' contracts. |
|
341 |
*/ |
|
342 |
err = ct_pr_tmpl_set_param(tmpl, CT_PR_INHERIT | CT_PR_REGENT | |
|
343 |
(iswait ? CT_PR_PGRPONLY : 0)); |
|
344 |
assert(err == 0); |
|
345 |
||
346 |
if (istrans) { |
|
347 |
cevents = 0; |
|
348 |
fevents = 0; |
|
349 |
} else { |
|
350 |
assert(restart_on >= 0); |
|
351 |
assert(restart_on <= METHOD_RESTART_ANY_FAULT); |
|
352 |
cevents = method_events[restart_on] & ~cte_mask; |
|
353 |
fevents = iswait ? |
|
354 |
(method_events[restart_on] & ~cte_mask & CT_PR_ALLFATAL) : |
|
355 |
0; |
|
356 |
} |
|
357 |
||
358 |
err = ct_tmpl_set_critical(tmpl, cevents); |
|
359 |
assert(err == 0); |
|
360 |
||
361 |
err = ct_tmpl_set_informative(tmpl, 0); |
|
362 |
assert(err == 0); |
|
363 |
err = ct_pr_tmpl_set_fatal(tmpl, fevents); |
|
364 |
assert(err == 0); |
|
365 |
||
366 |
err = ct_tmpl_set_cookie(tmpl, istrans ? METHOD_OTHER_COOKIE : |
|
367 |
METHOD_START_COOKIE); |
|
368 |
assert(err == 0); |
|
369 |
||
370 |
if (type == METHOD_START && inst->ri_i.i_primary_ctid != 0) { |
|
371 |
ret = ct_pr_tmpl_set_transfer(tmpl, inst->ri_i.i_primary_ctid); |
|
372 |
switch (ret) { |
|
373 |
case 0: |
|
374 |
break; |
|
375 |
||
376 |
case ENOTEMPTY: |
|
377 |
/* No contracts for you! */ |
|
378 |
method_remove_contract(inst, B_TRUE, B_TRUE); |
|
379 |
if (inst->ri_mi_deleted) { |
|
380 |
ret = ECANCELED; |
|
381 |
goto out; |
|
382 |
} |
|
383 |
break; |
|
384 |
||
385 |
case EINVAL: |
|
386 |
case ESRCH: |
|
387 |
case EACCES: |
|
388 |
default: |
|
389 |
bad_error("ct_pr_tmpl_set_transfer", ret); |
|
390 |
} |
|
391 |
} |
|
392 |
||
393 |
err = ct_tmpl_activate(tmpl); |
|
394 |
assert(err == 0); |
|
395 |
||
396 |
ret = 0; |
|
397 |
||
398 |
out: |
|
399 |
err = close(tmpl); |
|
400 |
assert(err == 0); |
|
401 |
||
402 |
return (ret); |
|
403 |
} |
|
404 |
||
405 |
static const char *method_names[] = { "start", "stop", "refresh" }; |
|
406 |
||
407 |
static void |
|
408 |
exec_method(const restarter_inst_t *inst, int type, const char *method, |
|
409 |
struct method_context *mcp, uint8_t need_session) |
|
410 |
{ |
|
411 |
char *cmd; |
|
412 |
const char *errf; |
|
413 |
char **nenv; |
|
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
414 |
int rsmc_errno = 0; |
0 | 415 |
|
416 |
cmd = uu_msprintf("exec %s", method); |
|
417 |
||
418 |
if (inst->ri_utmpx_prefix[0] != '\0' && inst->ri_utmpx_prefix != NULL) |
|
419 |
(void) utmpx_mark_init(getpid(), inst->ri_utmpx_prefix); |
|
420 |
||
421 |
setlog(inst->ri_logstem); |
|
5238 | 422 |
log_instance(inst, B_FALSE, "Executing %s method (\"%s\").", |
0 | 423 |
method_names[type], method); |
424 |
||
425 |
if (need_session) |
|
426 |
(void) setpgrp(); |
|
427 |
||
428 |
/* Set credentials. */ |
|
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
429 |
rsmc_errno = restarter_set_method_context(mcp, &errf); |
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
430 |
if (rsmc_errno != 0) { |
0 | 431 |
(void) fputs("svc.startd could not set context for method: ", |
432 |
stderr); |
|
433 |
||
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
434 |
if (rsmc_errno == -1) { |
0 | 435 |
if (strcmp(errf, "core_set_process_path") == 0) { |
436 |
(void) fputs("Could not set corefile path.\n", |
|
437 |
stderr); |
|
438 |
} else if (strcmp(errf, "setproject") == 0) { |
|
439 |
(void) fprintf(stderr, "%s: a resource control " |
|
440 |
"assignment failed\n", errf); |
|
441 |
} else if (strcmp(errf, "pool_set_binding") == 0) { |
|
442 |
(void) fprintf(stderr, "%s: a system error " |
|
443 |
"occurred\n", errf); |
|
444 |
} else { |
|
445 |
#ifndef NDEBUG |
|
446 |
uu_warn("%s:%d: Bad function name \"%s\" for " |
|
447 |
"error %d from " |
|
448 |
"restarter_set_method_context().\n", |
|
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
449 |
__FILE__, __LINE__, errf, rsmc_errno); |
0 | 450 |
#endif |
451 |
abort(); |
|
452 |
} |
|
453 |
||
454 |
exit(1); |
|
455 |
} |
|
456 |
||
457 |
if (errf != NULL && strcmp(errf, "pool_set_binding") == 0) { |
|
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
458 |
switch (rsmc_errno) { |
0 | 459 |
case ENOENT: |
460 |
(void) fprintf(stderr, "%s: the pool could not " |
|
461 |
"be found\n", errf); |
|
462 |
break; |
|
463 |
||
464 |
case EBADF: |
|
465 |
(void) fprintf(stderr, "%s: the configuration " |
|
466 |
"is invalid\n", errf); |
|
467 |
break; |
|
468 |
||
1712 | 469 |
case EINVAL: |
470 |
(void) fprintf(stderr, "%s: pool name \"%s\" " |
|
471 |
"is invalid\n", errf, mcp->resource_pool); |
|
472 |
break; |
|
473 |
||
0 | 474 |
default: |
475 |
#ifndef NDEBUG |
|
476 |
uu_warn("%s:%d: Bad error %d for function %s " |
|
477 |
"in restarter_set_method_context().\n", |
|
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
478 |
__FILE__, __LINE__, rsmc_errno, errf); |
0 | 479 |
#endif |
480 |
abort(); |
|
481 |
} |
|
482 |
||
483 |
exit(SMF_EXIT_ERR_CONFIG); |
|
484 |
} |
|
485 |
||
486 |
if (errf != NULL) { |
|
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
487 |
errno = rsmc_errno; |
0 | 488 |
perror(errf); |
489 |
||
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
490 |
switch (rsmc_errno) { |
0 | 491 |
case EINVAL: |
492 |
case EPERM: |
|
493 |
case ENOENT: |
|
494 |
case ENAMETOOLONG: |
|
495 |
case ERANGE: |
|
496 |
case ESRCH: |
|
497 |
exit(SMF_EXIT_ERR_CONFIG); |
|
498 |
/* NOTREACHED */ |
|
499 |
||
500 |
default: |
|
501 |
exit(1); |
|
502 |
} |
|
503 |
} |
|
504 |
||
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
505 |
switch (rsmc_errno) { |
0 | 506 |
case ENOMEM: |
507 |
(void) fputs("Out of memory.\n", stderr); |
|
508 |
exit(1); |
|
509 |
/* NOTREACHED */ |
|
510 |
||
511 |
case ENOENT: |
|
512 |
(void) fputs("Missing passwd entry for user.\n", |
|
513 |
stderr); |
|
514 |
exit(SMF_EXIT_ERR_CONFIG); |
|
515 |
/* NOTREACHED */ |
|
516 |
||
517 |
default: |
|
518 |
#ifndef NDEBUG |
|
519 |
uu_warn("%s:%d: Bad miscellaneous error %d from " |
|
520 |
"restarter_set_method_context().\n", __FILE__, |
|
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
521 |
__LINE__, rsmc_errno); |
0 | 522 |
#endif |
523 |
abort(); |
|
524 |
} |
|
525 |
} |
|
526 |
||
5040
ff6ebd8761a6
PSARC 2007/177 SMF read-protected property storage
wesolows
parents:
4816
diff
changeset
|
527 |
nenv = set_smf_env(mcp->env, mcp->env_sz, NULL, inst, |
ff6ebd8761a6
PSARC 2007/177 SMF read-protected property storage
wesolows
parents:
4816
diff
changeset
|
528 |
method_names[type]); |
0 | 529 |
|
530 |
log_preexec(); |
|
531 |
||
532 |
(void) execle(SBIN_SH, SBIN_SH, "-c", cmd, NULL, nenv); |
|
533 |
||
534 |
exit(10); |
|
535 |
} |
|
536 |
||
537 |
static void |
|
538 |
write_status(restarter_inst_t *inst, const char *mname, int stat) |
|
539 |
{ |
|
540 |
int r; |
|
541 |
||
542 |
again: |
|
543 |
if (inst->ri_mi_deleted) |
|
544 |
return; |
|
545 |
||
546 |
r = libscf_write_method_status(inst->ri_m_inst, mname, stat); |
|
547 |
switch (r) { |
|
548 |
case 0: |
|
549 |
break; |
|
550 |
||
551 |
case ECONNABORTED: |
|
552 |
libscf_reget_instance(inst); |
|
553 |
goto again; |
|
554 |
||
555 |
case ECANCELED: |
|
556 |
inst->ri_mi_deleted = 1; |
|
557 |
break; |
|
558 |
||
559 |
case EPERM: |
|
560 |
case EACCES: |
|
561 |
case EROFS: |
|
562 |
log_framework(LOG_INFO, "Could not write exit status " |
|
563 |
"for %s method of %s: %s.\n", mname, |
|
564 |
inst->ri_i.i_fmri, strerror(r)); |
|
565 |
break; |
|
566 |
||
567 |
case ENAMETOOLONG: |
|
568 |
default: |
|
569 |
bad_error("libscf_write_method_status", r); |
|
570 |
} |
|
571 |
} |
|
572 |
||
573 |
/* |
|
574 |
* int method_run() |
|
575 |
* Execute the type method of instp. If it requires a fork(), wait for it |
|
576 |
* to return and return its exit code in *exit_code. Otherwise set |
|
577 |
* *exit_code to 0 if the method succeeds & -1 if it fails. If the |
|
578 |
* repository connection is broken, it is rebound, but inst may not be |
|
579 |
* reset. |
|
580 |
* Returns |
|
581 |
* 0 - success |
|
582 |
* EINVAL - A correct method or method context couldn't be retrieved. |
|
583 |
* EIO - Contract kill failed. |
|
584 |
* EFAULT - Method couldn't be executed successfully. |
|
585 |
* ELOOP - Retry threshold exceeded. |
|
586 |
* ECANCELED - inst was deleted from the repository before method was run |
|
587 |
* ERANGE - Timeout retry threshold exceeded. |
|
588 |
* EAGAIN - Failed due to external cause, retry. |
|
589 |
*/ |
|
590 |
int |
|
591 |
method_run(restarter_inst_t **instp, int type, int *exit_code) |
|
592 |
{ |
|
593 |
char *method; |
|
594 |
int ret_status; |
|
595 |
pid_t pid; |
|
596 |
method_restart_t restart_on; |
|
597 |
uint_t cte_mask; |
|
598 |
uint8_t need_session; |
|
599 |
scf_handle_t *h; |
|
600 |
scf_snapshot_t *snap; |
|
601 |
const char *mname; |
|
602 |
const char *errstr; |
|
603 |
struct method_context *mcp; |
|
604 |
int result = 0, timeout_fired = 0; |
|
605 |
int sig, r; |
|
606 |
boolean_t transient; |
|
607 |
uint64_t timeout; |
|
608 |
uint8_t timeout_retry; |
|
609 |
ctid_t ctid; |
|
610 |
int ctfd = -1; |
|
611 |
restarter_inst_t *inst = *instp; |
|
612 |
int id = inst->ri_id; |
|
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
613 |
int forkerr; |
0 | 614 |
|
615 |
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock)); |
|
616 |
assert(instance_in_transition(inst)); |
|
617 |
||
618 |
if (inst->ri_mi_deleted) |
|
619 |
return (ECANCELED); |
|
620 |
||
621 |
*exit_code = 0; |
|
622 |
||
623 |
assert(0 <= type && type <= 2); |
|
624 |
mname = method_names[type]; |
|
625 |
||
626 |
if (type == METHOD_START) |
|
627 |
inst->ri_pre_online_hook(); |
|
628 |
||
629 |
h = scf_instance_handle(inst->ri_m_inst); |
|
630 |
||
631 |
snap = scf_snapshot_create(h); |
|
632 |
if (snap == NULL || |
|
633 |
scf_instance_get_snapshot(inst->ri_m_inst, "running", snap) != 0) { |
|
634 |
log_framework(LOG_DEBUG, |
|
635 |
"Could not get running snapshot for %s. " |
|
636 |
"Using editing version to run method %s.\n", |
|
637 |
inst->ri_i.i_fmri, mname); |
|
638 |
scf_snapshot_destroy(snap); |
|
639 |
snap = NULL; |
|
640 |
} |
|
641 |
||
642 |
/* |
|
643 |
* After this point, we may be logging to the instance log. |
|
644 |
* Make sure we've noted where that log is as a property of |
|
645 |
* the instance. |
|
646 |
*/ |
|
647 |
r = libscf_note_method_log(inst->ri_m_inst, st->st_log_prefix, |
|
648 |
inst->ri_logstem); |
|
649 |
if (r != 0) { |
|
650 |
log_framework(LOG_WARNING, |
|
651 |
"%s: couldn't note log location: %s\n", |
|
652 |
inst->ri_i.i_fmri, strerror(r)); |
|
653 |
} |
|
654 |
||
655 |
if ((method = libscf_get_method(h, type, inst, snap, &restart_on, |
|
656 |
&cte_mask, &need_session, &timeout, &timeout_retry)) == NULL) { |
|
657 |
if (errno == LIBSCF_PGROUP_ABSENT) { |
|
658 |
log_framework(LOG_DEBUG, |
|
659 |
"%s: instance has no method property group '%s'.\n", |
|
660 |
inst->ri_i.i_fmri, mname); |
|
661 |
if (type == METHOD_REFRESH) |
|
662 |
log_instance(inst, B_TRUE, "No '%s' method " |
|
663 |
"defined. Treating as :true.", mname); |
|
664 |
else |
|
665 |
log_instance(inst, B_TRUE, "Method property " |
|
666 |
"group '%s' is not present.", mname); |
|
667 |
scf_snapshot_destroy(snap); |
|
668 |
return (0); |
|
669 |
} else if (errno == LIBSCF_PROPERTY_ABSENT) { |
|
670 |
log_framework(LOG_DEBUG, |
|
671 |
"%s: instance has no '%s/exec' method property.\n", |
|
672 |
inst->ri_i.i_fmri, mname); |
|
673 |
log_instance(inst, B_TRUE, "Method property '%s/exec " |
|
674 |
"is not present.", mname); |
|
675 |
scf_snapshot_destroy(snap); |
|
676 |
return (0); |
|
677 |
} else { |
|
678 |
log_error(LOG_WARNING, |
|
679 |
"%s: instance libscf_get_method failed\n", |
|
680 |
inst->ri_i.i_fmri); |
|
681 |
scf_snapshot_destroy(snap); |
|
682 |
return (EINVAL); |
|
683 |
} |
|
684 |
} |
|
685 |
||
686 |
/* open service contract if stopping a non-transient service */ |
|
687 |
if (type == METHOD_STOP && (!instance_is_transient_style(inst))) { |
|
688 |
if (inst->ri_i.i_primary_ctid == 0) { |
|
689 |
/* service is not running, nothing to stop */ |
|
690 |
log_framework(LOG_DEBUG, "%s: instance has no primary " |
|
691 |
"contract, no service to stop.\n", |
|
692 |
inst->ri_i.i_fmri); |
|
693 |
scf_snapshot_destroy(snap); |
|
694 |
return (0); |
|
695 |
} |
|
696 |
if ((ctfd = contract_open(inst->ri_i.i_primary_ctid, "process", |
|
697 |
"events", O_RDONLY)) < 0) { |
|
698 |
result = EFAULT; |
|
699 |
log_instance(inst, B_TRUE, "Could not open service " |
|
5238 | 700 |
"contract %ld. Stop method not run.", |
0 | 701 |
inst->ri_i.i_primary_ctid); |
702 |
goto out; |
|
703 |
} |
|
704 |
} |
|
705 |
||
706 |
if (restarter_is_null_method(method)) { |
|
707 |
log_framework(LOG_DEBUG, "%s: null method succeeds\n", |
|
708 |
inst->ri_i.i_fmri); |
|
709 |
||
5238 | 710 |
log_instance(inst, B_TRUE, "Executing %s method (null).", |
711 |
mname); |
|
0 | 712 |
|
713 |
if (type == METHOD_START) |
|
714 |
write_status(inst, mname, 0); |
|
715 |
goto out; |
|
716 |
} |
|
717 |
||
718 |
sig = restarter_is_kill_method(method); |
|
719 |
if (sig >= 0) { |
|
720 |
||
721 |
if (inst->ri_i.i_primary_ctid == 0) { |
|
722 |
log_error(LOG_ERR, "%s: :kill with no contract\n", |
|
723 |
inst->ri_i.i_fmri); |
|
5238 | 724 |
log_instance(inst, B_TRUE, "Invalid use of \":kill\" " |
725 |
"as stop method for transient service."); |
|
0 | 726 |
result = EINVAL; |
727 |
goto out; |
|
728 |
} |
|
729 |
||
730 |
log_framework(LOG_DEBUG, |
|
731 |
"%s: :killing contract with signal %d\n", |
|
732 |
inst->ri_i.i_fmri, sig); |
|
733 |
||
5238 | 734 |
log_instance(inst, B_TRUE, "Executing %s method (:kill).", |
0 | 735 |
mname); |
736 |
||
737 |
if (contract_kill(inst->ri_i.i_primary_ctid, sig, |
|
738 |
inst->ri_i.i_fmri) != 0) { |
|
739 |
result = EIO; |
|
740 |
goto out; |
|
741 |
} else |
|
742 |
goto assured_kill; |
|
743 |
} |
|
744 |
||
745 |
log_framework(LOG_DEBUG, "%s: forking to run method %s\n", |
|
746 |
inst->ri_i.i_fmri, method); |
|
747 |
||
748 |
errstr = restarter_get_method_context(RESTARTER_METHOD_CONTEXT_VERSION, |
|
749 |
inst->ri_m_inst, snap, mname, method, &mcp); |
|
750 |
||
751 |
if (errstr != NULL) { |
|
752 |
log_error(LOG_WARNING, "%s: %s\n", inst->ri_i.i_fmri, errstr); |
|
753 |
result = EINVAL; |
|
754 |
goto out; |
|
755 |
} |
|
756 |
||
757 |
r = method_ready_contract(inst, type, restart_on, cte_mask); |
|
758 |
if (r != 0) { |
|
759 |
assert(r == ECANCELED); |
|
760 |
assert(inst->ri_mi_deleted); |
|
761 |
restarter_free_method_context(mcp); |
|
762 |
result = ECANCELED; |
|
763 |
goto out; |
|
764 |
} |
|
765 |
||
766 |
/* |
|
767 |
* Validate safety of method contexts, to save children work. |
|
768 |
*/ |
|
769 |
if (!restarter_rm_libs_loadable()) |
|
770 |
log_framework(LOG_DEBUG, "%s: method contexts limited " |
|
771 |
"to root-accessible libraries\n", inst->ri_i.i_fmri); |
|
772 |
||
773 |
/* |
|
774 |
* If the service is restarting too quickly, send it to |
|
775 |
* maintenance. |
|
776 |
*/ |
|
777 |
if (type == METHOD_START) { |
|
778 |
method_record_start(inst); |
|
779 |
if (method_rate_critical(inst)) { |
|
780 |
log_instance(inst, B_TRUE, "Restarting too quickly, " |
|
5238 | 781 |
"changing state to maintenance."); |
0 | 782 |
result = ELOOP; |
3179
80729b9ca1d6
5079387 _get_auth_policy() doesn't provide corresponding free function
jeanm
parents:
1712
diff
changeset
|
783 |
restarter_free_method_context(mcp); |
0 | 784 |
goto out; |
785 |
} |
|
786 |
} |
|
787 |
||
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
788 |
atomic_add_16(&storing_contract, 1); |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
789 |
pid = startd_fork1(&forkerr); |
0 | 790 |
if (pid == 0) |
791 |
exec_method(inst, type, method, mcp, need_session); |
|
792 |
||
793 |
if (pid == -1) { |
|
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
794 |
atomic_add_16(&storing_contract, -1); |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
795 |
if (forkerr == EAGAIN) |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
796 |
result = EAGAIN; |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
797 |
else |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
798 |
result = EFAULT; |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
799 |
|
0 | 800 |
log_error(LOG_WARNING, |
119
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
801 |
"%s: Couldn't fork to execute method %s: %s\n", |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
802 |
inst->ri_i.i_fmri, method, strerror(forkerr)); |
7bdce363820e
6202359 startd needs to retry methods on fork failure
sl108498
parents:
0
diff
changeset
|
803 |
|
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
804 |
restarter_free_method_context(mcp); |
0 | 805 |
goto out; |
806 |
} |
|
807 |
||
808 |
||
809 |
/* |
|
810 |
* Get the contract id, decide whether it is primary or transient, and |
|
811 |
* stash it in inst & the repository. |
|
812 |
*/ |
|
813 |
method_store_contract(inst, type, &ctid); |
|
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
814 |
atomic_add_16(&storing_contract, -1); |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
815 |
|
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
816 |
restarter_free_method_context(mcp); |
0 | 817 |
|
818 |
/* |
|
819 |
* Similarly for the start method PID. |
|
820 |
*/ |
|
821 |
if (type == METHOD_START && !inst->ri_mi_deleted) |
|
822 |
(void) libscf_write_start_pid(inst->ri_m_inst, pid); |
|
823 |
||
824 |
if (instance_is_wait_style(inst) && type == METHOD_START) { |
|
825 |
/* Wait style instances don't get timeouts on start methods. */ |
|
826 |
if (wait_register(pid, inst->ri_i.i_fmri, 1, 0)) { |
|
827 |
log_error(LOG_WARNING, |
|
828 |
"%s: couldn't register %ld for wait\n", |
|
829 |
inst->ri_i.i_fmri, pid); |
|
830 |
result = EFAULT; |
|
831 |
goto contract_out; |
|
832 |
} |
|
833 |
write_status(inst, mname, 0); |
|
834 |
||
835 |
} else { |
|
836 |
int r, err; |
|
837 |
time_t start_time; |
|
838 |
time_t end_time; |
|
839 |
||
840 |
/* |
|
841 |
* Because on upgrade/live-upgrade we may have no chance |
|
842 |
* to override faulty timeout values on the way to |
|
843 |
* manifest import, all services on the path to manifest |
|
844 |
* import are treated the same as INFINITE timeout services. |
|
845 |
*/ |
|
846 |
||
847 |
start_time = time(NULL); |
|
848 |
if (timeout != METHOD_TIMEOUT_INFINITE && !is_timeout_ovr(inst)) |
|
849 |
timeout_insert(inst, ctid, timeout); |
|
850 |
else |
|
851 |
timeout = METHOD_TIMEOUT_INFINITE; |
|
852 |
||
853 |
/* Unlock the instance while waiting for the method. */ |
|
854 |
MUTEX_UNLOCK(&inst->ri_lock); |
|
855 |
||
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
856 |
do { |
0 | 857 |
r = waitpid(pid, &ret_status, NULL); |
4816
8f99ff23cf47
6411807 exec_method drops errno when juggling errors
acruz
parents:
4432
diff
changeset
|
858 |
} while (r == -1 && errno == EINTR); |
0 | 859 |
if (r == -1) |
860 |
err = errno; |
|
861 |
||
862 |
/* Re-grab the lock. */ |
|
863 |
inst = inst_lookup_by_id(id); |
|
864 |
||
865 |
/* |
|
866 |
* inst can't be removed, as the removal thread waits |
|
867 |
* for completion of this one. |
|
868 |
*/ |
|
869 |
assert(inst != NULL); |
|
870 |
*instp = inst; |
|
871 |
||
872 |
if (inst->ri_timeout != NULL && inst->ri_timeout->te_fired) |
|
873 |
timeout_fired = 1; |
|
874 |
||
875 |
timeout_remove(inst, ctid); |
|
876 |
||
877 |
log_framework(LOG_DEBUG, |
|
878 |
"%s method for %s exited with status %d.\n", mname, |
|
879 |
inst->ri_i.i_fmri, WEXITSTATUS(ret_status)); |
|
880 |
||
881 |
if (r == -1) { |
|
882 |
log_error(LOG_WARNING, |
|
883 |
"Couldn't waitpid() for %s method of %s (%s).\n", |
|
884 |
mname, inst->ri_i.i_fmri, strerror(err)); |
|
885 |
result = EFAULT; |
|
886 |
goto contract_out; |
|
887 |
} |
|
888 |
||
889 |
if (type == METHOD_START) |
|
890 |
write_status(inst, mname, ret_status); |
|
891 |
||
892 |
/* return ERANGE if this service doesn't retry on timeout */ |
|
893 |
if (timeout_fired == 1 && timeout_retry == 0) { |
|
894 |
result = ERANGE; |
|
895 |
goto contract_out; |
|
896 |
} |
|
897 |
||
898 |
if (!WIFEXITED(ret_status)) { |
|
899 |
/* |
|
900 |
* If method didn't exit itself (it was killed by an |
|
901 |
* external entity, etc.), consider the entire |
|
902 |
* method_run as failed. |
|
903 |
*/ |
|
904 |
if (WIFSIGNALED(ret_status)) { |
|
905 |
char buf[SIG2STR_MAX]; |
|
906 |
(void) sig2str(WTERMSIG(ret_status), buf); |
|
907 |
||
908 |
log_error(LOG_WARNING, "%s: Method \"%s\" " |
|
909 |
"failed due to signal %s.\n", |
|
910 |
inst->ri_i.i_fmri, method, buf); |
|
911 |
log_instance(inst, B_TRUE, "Method \"%s\" " |
|
5238 | 912 |
"failed due to signal %s.", mname, buf); |
0 | 913 |
} else { |
914 |
log_error(LOG_WARNING, "%s: Method \"%s\" " |
|
915 |
"failed with exit status %d.\n", |
|
916 |
inst->ri_i.i_fmri, method, |
|
917 |
WEXITSTATUS(ret_status)); |
|
918 |
log_instance(inst, B_TRUE, "Method \"%s\" " |
|
5238 | 919 |
"failed with exit status %d.", mname, |
0 | 920 |
WEXITSTATUS(ret_status)); |
921 |
} |
|
922 |
result = EAGAIN; |
|
923 |
goto contract_out; |
|
924 |
} |
|
925 |
||
926 |
*exit_code = WEXITSTATUS(ret_status); |
|
927 |
if (*exit_code != 0) { |
|
928 |
log_error(LOG_WARNING, |
|
929 |
"%s: Method \"%s\" failed with exit status %d.\n", |
|
930 |
inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status)); |
|
931 |
} |
|
932 |
||
933 |
log_instance(inst, B_TRUE, "Method \"%s\" exited with status " |
|
5238 | 934 |
"%d.", mname, *exit_code); |
0 | 935 |
|
936 |
if (*exit_code != 0) |
|
937 |
goto contract_out; |
|
938 |
||
939 |
end_time = time(NULL); |
|
940 |
||
941 |
/* Give service contract remaining seconds to empty */ |
|
942 |
if (timeout != METHOD_TIMEOUT_INFINITE) |
|
943 |
timeout -= (end_time - start_time); |
|
944 |
} |
|
945 |
||
946 |
assured_kill: |
|
947 |
/* |
|
948 |
* For stop methods, assure that the service contract has emptied |
|
949 |
* before returning. |
|
950 |
*/ |
|
951 |
if (type == METHOD_STOP && (!instance_is_transient_style(inst)) && |
|
952 |
!(contract_is_empty(inst->ri_i.i_primary_ctid))) { |
|
953 |
||
954 |
if (timeout != METHOD_TIMEOUT_INFINITE) |
|
955 |
timeout_insert(inst, inst->ri_i.i_primary_ctid, |
|
956 |
timeout); |
|
957 |
||
958 |
for (;;) { |
|
4244
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
959 |
(void) poll(NULL, 0, 100); |
77e7b46e3d5e
6243574 Services can be shown online with empty contracts
jeanm
parents:
3179
diff
changeset
|
960 |
if (contract_is_empty(inst->ri_i.i_primary_ctid)) |
0 | 961 |
break; |
962 |
} |
|
963 |
||
964 |
if (timeout != METHOD_TIMEOUT_INFINITE) |
|
965 |
if (inst->ri_timeout->te_fired) |
|
966 |
result = EFAULT; |
|
967 |
||
968 |
timeout_remove(inst, inst->ri_i.i_primary_ctid); |
|
969 |
} |
|
970 |
||
971 |
contract_out: |
|
972 |
/* Abandon contracts for transient methods & methods that fail. */ |
|
973 |
transient = method_is_transient(inst, type); |
|
974 |
if ((transient || *exit_code != 0 || result != 0) && |
|
975 |
(restarter_is_kill_method(method) < 0)) |
|
976 |
method_remove_contract(inst, !transient, B_TRUE); |
|
977 |
||
978 |
out: |
|
979 |
if (ctfd >= 0) |
|
980 |
(void) close(ctfd); |
|
981 |
scf_snapshot_destroy(snap); |
|
982 |
free(method); |
|
983 |
return (result); |
|
984 |
} |
|
985 |
||
986 |
/* |
|
987 |
* The method thread executes a service method to effect a state transition. |
|
988 |
* The next_state of info->sf_id should be non-_NONE on entrance, and it will |
|
989 |
* be _NONE on exit (state will either be what next_state was (on success), or |
|
990 |
* it will be _MAINT (on error)). |
|
991 |
* |
|
992 |
* There are six classes of methods to consider: start & other (stop, refresh) |
|
993 |
* for each of "normal" services, wait services, and transient services. For |
|
994 |
* each, the method must be fetched from the repository & executed. fork()ed |
|
995 |
* methods must be waited on, except for the start method of wait services |
|
996 |
* (which must be registered with the wait subsystem via wait_register()). If |
|
997 |
* the method succeeded (returned 0), then for start methods its contract |
|
998 |
* should be recorded as the primary contract for the service. For other |
|
999 |
* methods, it should be abandoned. If the method fails, then depending on |
|
1000 |
* the failure, either the method should be reexecuted or the service should |
|
1001 |
* be put into maintenance. Either way the contract should be abandoned. |
|
1002 |
*/ |
|
1003 |
void * |
|
1004 |
method_thread(void *arg) |
|
1005 |
{ |
|
1006 |
fork_info_t *info = arg; |
|
1007 |
restarter_inst_t *inst; |
|
1008 |
scf_handle_t *local_handle; |
|
1009 |
scf_instance_t *s_inst = NULL; |
|
1010 |
int r, exit_code; |
|
1011 |
boolean_t retryable; |
|
1012 |
const char *aux; |
|
1013 |
||
1014 |
assert(0 <= info->sf_method_type && info->sf_method_type <= 2); |
|
1015 |
||
1016 |
/* Get (and lock) the restarter_inst_t. */ |
|
1017 |
inst = inst_lookup_by_id(info->sf_id); |
|
1018 |
||
1019 |
assert(inst->ri_method_thread != 0); |
|
1020 |
assert(instance_in_transition(inst) == 1); |
|
1021 |
||
1022 |
/* |
|
1023 |
* We cannot leave this function with inst in transition, because |
|
1024 |
* protocol.c withholds messages for inst otherwise. |
|
1025 |
*/ |
|
1026 |
||
1027 |
log_framework(LOG_DEBUG, "method_thread() running %s method for %s.\n", |
|
1028 |
method_names[info->sf_method_type], inst->ri_i.i_fmri); |
|
1029 |
||
1030 |
local_handle = libscf_handle_create_bound_loop(); |
|
1031 |
||
1032 |
rebind_retry: |
|
1033 |
/* get scf_instance_t */ |
|
1034 |
switch (r = libscf_fmri_get_instance(local_handle, inst->ri_i.i_fmri, |
|
1035 |
&s_inst)) { |
|
1036 |
case 0: |
|
1037 |
break; |
|
1038 |
||
1039 |
case ECONNABORTED: |
|
1040 |
libscf_handle_rebind(local_handle); |
|
1041 |
goto rebind_retry; |
|
1042 |
||
1043 |
case ENOENT: |
|
1044 |
/* |
|
1045 |
* It's not there, but we need to call this so protocol.c |
|
1046 |
* doesn't think it's in transition anymore. |
|
1047 |
*/ |
|
1048 |
(void) restarter_instance_update_states(local_handle, inst, |
|
1049 |
inst->ri_i.i_state, RESTARTER_STATE_NONE, RERR_NONE, |
|
1050 |
NULL); |
|
1051 |
goto out; |
|
1052 |
||
1053 |
case EINVAL: |
|
1054 |
case ENOTSUP: |
|
1055 |
default: |
|
1056 |
bad_error("libscf_fmri_get_instance", r); |
|
1057 |
} |
|
1058 |
||
1059 |
inst->ri_m_inst = s_inst; |
|
1060 |
inst->ri_mi_deleted = B_FALSE; |
|
1061 |
||
1062 |
retry: |
|
1063 |
if (info->sf_method_type == METHOD_START) |
|
1064 |
log_transition(inst, START_REQUESTED); |
|
1065 |
||
1066 |
r = method_run(&inst, info->sf_method_type, &exit_code); |
|
1067 |
||
1068 |
if (r == 0 && exit_code == 0) { |
|
1069 |
/* Success! */ |
|
1070 |
assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE); |
|
1071 |
||
1072 |
/* |
|
1073 |
* When a stop method succeeds, remove the primary contract of |
|
1074 |
* the service, unless we're going to offline, in which case |
|
1075 |
* retain the contract so we can transfer inherited contracts to |
|
1076 |
* the replacement service. |
|
1077 |
*/ |
|
1078 |
||
1079 |
if (info->sf_method_type == METHOD_STOP && |
|
1080 |
inst->ri_i.i_primary_ctid != 0) { |
|
1081 |
if (inst->ri_i.i_next_state == RESTARTER_STATE_OFFLINE) |
|
1082 |
inst->ri_i.i_primary_ctid_stopped = 1; |
|
1083 |
else |
|
1084 |
method_remove_contract(inst, B_TRUE, B_TRUE); |
|
1085 |
} |
|
1086 |
/* |
|
1087 |
* We don't care whether the handle was rebound because this is |
|
1088 |
* the last thing we do with it. |
|
1089 |
*/ |
|
1090 |
(void) restarter_instance_update_states(local_handle, inst, |
|
1091 |
inst->ri_i.i_next_state, RESTARTER_STATE_NONE, |
|
1092 |
info->sf_event_type, NULL); |
|
1093 |
||
1094 |
(void) update_fault_count(inst, FAULT_COUNT_RESET); |
|
1095 |
||
1096 |
goto out; |
|
1097 |
} |
|
1098 |
||
1099 |
/* Failure. Retry or go to maintenance. */ |
|
1100 |
||
1101 |
if (r != 0 && r != EAGAIN) { |
|
1102 |
retryable = B_FALSE; |
|
1103 |
} else { |
|
1104 |
switch (exit_code) { |
|
1105 |
case SMF_EXIT_ERR_CONFIG: |
|
1106 |
case SMF_EXIT_ERR_NOSMF: |
|
1107 |
case SMF_EXIT_ERR_PERM: |
|
1108 |
case SMF_EXIT_ERR_FATAL: |
|
1109 |
retryable = B_FALSE; |
|
1110 |
break; |
|
1111 |
||
1112 |
default: |
|
1113 |
retryable = B_TRUE; |
|
1114 |
} |
|
1115 |
} |
|
1116 |
||
1117 |
if (retryable && update_fault_count(inst, FAULT_COUNT_INCR) != 1) |
|
1118 |
goto retry; |
|
1119 |
||
1120 |
/* maintenance */ |
|
1121 |
if (r == ELOOP) |
|
1122 |
log_transition(inst, START_FAILED_REPEATEDLY); |
|
1123 |
else if (r == ERANGE) |
|
1124 |
log_transition(inst, START_FAILED_TIMEOUT_FATAL); |
|
1125 |
else if (exit_code == SMF_EXIT_ERR_CONFIG) |
|
1126 |
log_transition(inst, START_FAILED_CONFIGURATION); |
|
1127 |
else if (exit_code == SMF_EXIT_ERR_FATAL) |
|
1128 |
log_transition(inst, START_FAILED_FATAL); |
|
1129 |
else |
|
1130 |
log_transition(inst, START_FAILED_OTHER); |
|
1131 |
||
1132 |
if (r == ELOOP) |
|
1133 |
aux = "restarting_too_quickly"; |
|
1134 |
else if (retryable) |
|
1135 |
aux = "fault_threshold_reached"; |
|
1136 |
else |
|
1137 |
aux = "method_failed"; |
|
1138 |
||
1139 |
(void) restarter_instance_update_states(local_handle, inst, |
|
1140 |
RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_FAULT, |
|
1141 |
(char *)aux); |
|
1142 |
||
1143 |
if (!method_is_transient(inst, info->sf_method_type) && |
|
1144 |
inst->ri_i.i_primary_ctid != 0) |
|
1145 |
method_remove_contract(inst, B_TRUE, B_TRUE); |
|
1146 |
||
1147 |
out: |
|
1148 |
inst->ri_method_thread = 0; |
|
1149 |
MUTEX_UNLOCK(&inst->ri_lock); |
|
1150 |
(void) pthread_cond_broadcast(&inst->ri_method_cv); |
|
1151 |
||
1152 |
scf_instance_destroy(s_inst); |
|
1153 |
scf_handle_destroy(local_handle); |
|
1154 |
startd_free(info, sizeof (fork_info_t)); |
|
1155 |
return (NULL); |
|
1156 |
} |