6311392 The Quick and the Dead: rapid-fire svcadm restart leads to maintenance
6219078 svc.startd's algorithm for detecting restart loops should be sensible
--- a/usr/src/cmd/cmd-inet/usr.lib/inetd/inetd.c Wed Jan 13 14:57:39 2010 -0800
+++ b/usr/src/cmd/cmd-inet/usr.lib/inetd/inetd.c Wed Jan 13 09:08:53 2010 -0800
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1726,6 +1726,7 @@
case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
remove_instance(instance);
goto done;
+ case RESTARTER_EVENT_TYPE_STOP_RESET:
case RESTARTER_EVENT_TYPE_STOP:
switch (instance->cur_istate) {
case IIS_OFFLINE_CONRATE:
--- a/usr/src/cmd/svc/startd/graph.c Wed Jan 13 14:57:39 2010 -0800
+++ b/usr/src/cmd/svc/startd/graph.c Wed Jan 13 09:08:53 2010 -0800
@@ -825,6 +825,7 @@
assert(v->gv_state != RESTARTER_STATE_DISABLED);
break;
+ case RESTARTER_EVENT_TYPE_STOP_RESET:
case RESTARTER_EVENT_TYPE_STOP:
log_framework(LOG_DEBUG, "Stopping %s.\n", v->gv_name);
assert(v->gv_state == RESTARTER_STATE_DEGRADED ||
@@ -1730,8 +1731,14 @@
switch (v->gv_type) {
case GVT_INST:
/* Restarter */
- if (err > RERR_NONE && inst_running(v))
- vertex_send_event(v, RESTARTER_EVENT_TYPE_STOP);
+ if (err > RERR_NONE && inst_running(v)) {
+ if (err == RERR_RESTART) {
+ vertex_send_event(v,
+ RESTARTER_EVENT_TYPE_STOP_RESET);
+ } else {
+ vertex_send_event(v, RESTARTER_EVENT_TYPE_STOP);
+ }
+ }
break;
case GVT_SVC:
@@ -1759,8 +1766,15 @@
e = uu_list_first(v->gv_dependents);
svc = e->ge_vertex;
- if (inst_running(svc))
- vertex_send_event(svc, RESTARTER_EVENT_TYPE_STOP);
+ if (inst_running(svc)) {
+ if (err == RERR_RESTART) {
+ vertex_send_event(svc,
+ RESTARTER_EVENT_TYPE_STOP_RESET);
+ } else {
+ vertex_send_event(svc,
+ RESTARTER_EVENT_TYPE_STOP);
+ }
+ }
break;
default:
@@ -1846,7 +1860,7 @@
(void) scf_handle_unbind(h);
scf_handle_destroy(h);
- vertex_send_event(v, RESTARTER_EVENT_TYPE_STOP);
+ vertex_send_event(v, RESTARTER_EVENT_TYPE_STOP_RESET);
}
/*
--- a/usr/src/cmd/svc/startd/restarter.c Wed Jan 13 14:57:39 2010 -0800
+++ b/usr/src/cmd/svc/startd/restarter.c Wed Jan 13 09:08:53 2010 -0800
@@ -140,6 +140,18 @@
static uu_list_pool_t *restarter_queue_pool;
+/*
+ * Function used to reset the restart times for an instance, when
+ * an administrative task comes along and essentially makes the times
+ * in this array ineffective.
+ */
+static void
+reset_start_times(restarter_inst_t *inst)
+{
+ inst->ri_start_index = 0;
+ bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
+}
+
/*ARGSUSED*/
static int
restarter_instance_compare(const void *lc_arg, const void *rc_arg,
@@ -1643,8 +1655,12 @@
switch (event->riq_type) {
case RESTARTER_EVENT_TYPE_ENABLE:
case RESTARTER_EVENT_TYPE_DISABLE:
+ (void) enable_inst(h, inst, event->riq_type);
+ break;
+
case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
- (void) enable_inst(h, inst, event->riq_type);
+ if (enable_inst(h, inst, event->riq_type) == 0)
+ reset_start_times(inst);
break;
case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
@@ -1652,6 +1668,9 @@
inst = NULL;
goto cont;
+ case RESTARTER_EVENT_TYPE_STOP_RESET:
+ reset_start_times(inst);
+ /* FALLTHROUGH */
case RESTARTER_EVENT_TYPE_STOP:
(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
break;
@@ -1710,7 +1729,8 @@
* Stop the instance. If it can be restarted,
* the graph engine will send a new event.
*/
- (void) stop_instance(h, inst, RSTOP_RESTART);
+ if (stop_instance(h, inst, RSTOP_RESTART) == 0)
+ reset_start_times(inst);
}
break;
@@ -1980,6 +2000,9 @@
* process we're monitoring, then the
* wait_thread will stop the instance.
*/
+ if (type == CT_PR_EV_EMPTY)
+ reset_start_times(inst);
+
log_framework(LOG_DEBUG,
"%s: ignoring contract event on wait-style service\n",
fmri);
--- a/usr/src/cmd/svc/startd/startd.h Wed Jan 13 14:57:39 2010 -0800
+++ b/usr/src/cmd/svc/startd/startd.h Wed Jan 13 09:08:53 2010 -0800
@@ -394,8 +394,8 @@
#define RINST_RETAKE_MASK 0x0f000000
-#define RINST_START_TIMES 10 /* failures to consider */
-#define RINST_FAILURE_RATE_NS 1000000000LL /* 1 failure/second */
+#define RINST_START_TIMES 5 /* failures to consider */
+#define RINST_FAILURE_RATE_NS 600000000000LL /* 1 failure/10 minutes */
/* Number of events in the queue when we start dropping ADMIN events. */
#define RINST_QUEUE_THRESHOLD 100
@@ -653,6 +653,7 @@
int walk_dependency_pgs(scf_instance_t *, callback_t, void *);
int walk_property_astrings(scf_property_t *, callback_t, void *);
+void libscf_reset_start_times(restarter_inst_t *, int);
/* libscf.c - used by restarter.c/method.c/expand.c */
char *libscf_get_method(scf_handle_t *, int, restarter_inst_t *,
--- a/usr/src/lib/librestart/common/librestart.h Wed Jan 13 14:57:39 2010 -0800
+++ b/usr/src/lib/librestart/common/librestart.h Wed Jan 13 09:08:53 2010 -0800
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -124,6 +124,7 @@
#define RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE 13
#define RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY 14
#define RESTARTER_EVENT_TYPE_ADMIN_DISABLE 15
+#define RESTARTER_EVENT_TYPE_STOP_RESET 16
#define RESTARTER_EVENT_ERROR -1