/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
/*
* restarter.c - service manipulation
*
* This component manages services whose restarter is svc.startd, the standard
* restarter. It translates restarter protocol events from the graph engine
* into actions on processes, as a delegated restarter would do.
*
* The master restarter manages a number of always-running threads:
* - restarter event thread: events from the graph engine
* - timeout thread: thread to fire queued timeouts
* - contract thread: thread to handle contract events
* - wait thread: thread to handle wait-based services
*
* The other threads are created as-needed:
* - per-instance method threads
* - per-instance event processing threads
*
* The interaction of all threads must result in the following conditions
* being satisfied (on a per-instance basis):
* - restarter events must be processed in order
* - method execution must be serialized
* - instance delete must be held until outstanding methods are complete
* - contract events shouldn't be processed while a method is running
* - timeouts should fire even when a method is running
*
* Service instances are represented by restarter_inst_t's and are kept in the
* instance_list list.
*
* Service States
* The current state of a service instance is kept in
* restarter_inst_t->ri_i.i_state. If transition to a new state could take
* some time, then before we effect the transition we set
* restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
* rotate i_next_state to i_state and set i_next_state to
* RESTARTER_STATE_NONE. So usually i_next_state is _NONE when ri_lock is not
* held. The exception is when we launch methods, which are done with
* a separate thread. To keep any other threads from grabbing ri_lock before
* method_thread() does, we set ri_method_thread to the thread id of the
* method thread, and when it is nonzero any thread with a different thread id
* waits on ri_method_cv.
*
* Method execution is serialized by blocking on ri_method_cv in
* inst_lookup_by_id() and waiting for a 0 value of ri_method_thread. This
* also prevents the instance structure from being deleted until all
* outstanding operations such as method_thread() have finished.
*
* Lock ordering:
*
* dgraph_lock [can be held when taking:]
* utmpx_lock
* dictionary->dict_lock
* st->st_load_lock
* wait_info_lock
* ru->restarter_update_lock
* restarter_queue->rpeq_lock
* instance_list.ril_lock
* inst->ri_lock
* st->st_configd_live_lock
*
* instance_list.ril_lock
* graph_queue->gpeq_lock
* gu->gu_lock
* st->st_configd_live_lock
* dictionary->dict_lock
* inst->ri_lock
* graph_queue->gpeq_lock
* gu->gu_lock
* tu->tu_lock
* tq->tq_lock
* inst->ri_queue_lock
* wait_info_lock
* bp->cb_lock
* utmpx_lock
*
* single_user_thread_lock
* wait_info_lock
* utmpx_lock
*
* gu_freeze_lock
*
* logbuf_mutex nests inside pretty much everything.
*/
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <libcontract.h>
#include <libcontract_priv.h>
#include <libintl.h>
#include <librestart.h>
#include <librestart_priv.h>
#include <libuutil.h>
#include <limits.h>
#include <poll.h>
#include <port.h>
#include <pthread.h>
#include <stdarg.h>
#include <stdio.h>
#include <strings.h>
#include <unistd.h>
#include "startd.h"
#include "protocol.h"
/*
* Function used to reset the restart times for an instance, when
* an administrative task comes along and essentially makes the times
* in this array ineffective.
*/
static void
{
inst->ri_start_index = 0;
}
/*ARGSUSED*/
static int
void *private)
{
return (1);
return (-1);
return (0);
}
static restarter_inst_t *
{
int id;
if (id == -1)
return (NULL);
return (inst_lookup_by_id(id));
}
{
while (inst->ri_method_thread != 0 &&
}
}
return (inst);
}
static restarter_inst_t *
{
int id;
if (id == -1)
return (NULL);
return (inst);
}
const char *
{
switch (flags & RINST_STYLE_MASK) {
case RINST_CONTRACT: return ("contract");
case RINST_TRANSIENT: return ("transient");
case RINST_WAIT: return ("wait");
default:
#ifndef NDEBUG
#endif
abort();
/* NOTREACHED */
}
}
/*
* Fails with ECONNABORTED or ECANCELED.
*/
static int
{
int fd, r;
if (fd >= 0) {
assert(r == 0);
return (0);
}
switch (r) {
case 0:
case ECONNABORTED:
case ECANCELED:
*ctidp = 0;
return (r);
case ENOMEM:
uu_die("Out of memory\n");
/* NOTREACHED */
case EPERM:
uu_die("Insufficient privilege.\n");
/* NOTREACHED */
case EACCES:
uu_die("Repository backend access denied.\n");
/* NOTREACHED */
case EROFS:
return (0);
case EINVAL:
case EBADF:
default:
assert(0);
abort();
/* NOTREACHED */
}
}
/*
* int restarter_insert_inst(scf_handle_t *, char *)
* If the inst is already in the restarter list, return its id. If the inst
* is not in the restarter list, initialize a restarter_inst_t, initialize its
* states, insert it into the list, and return 0.
*
* Fails with
* ENOENT - name is not in the repository
*/
static int
{
int id, r;
char *c;
/*
* We don't use inst_lookup_by_name() here because we want the lookup
* & insert to be atomic.
*/
if (id != -1) {
&idx);
return (0);
}
}
/* Allocate an instance */
/*
* id shouldn't be -1 since we use the same dictionary as graph.c, but
* just in case.
*/
pg = safe_scf_pg_create(h);
NULL, SCF_DECODE_FMRI_EXACT) != 0) {
switch (scf_error()) {
goto rep_retry;
case SCF_ERROR_NOT_FOUND:
goto deleted;
}
scf_strerror(scf_error()));
}
/*
* If there's no running snapshot, then we execute using the editing
* snapshot. Pending snapshots will be taken later.
*/
0)) {
switch (scf_error()) {
case SCF_ERROR_NOT_SET:
break;
goto rep_retry;
default:
assert(0);
abort();
}
goto deleted;
}
for (c = logfilebuf; *c != '\0'; c++)
if (*c == '/')
*c = '-';
/*
* we're probably being restarted & don't want to mess up the states
* that are there.
*/
if (r != 0) {
switch (scf_error()) {
goto rep_retry;
case SCF_ERROR_NOT_SET:
goto deleted;
case SCF_ERROR_NOT_FOUND:
/*
* This shouldn't happen since the graph engine should
* have initialized the state to uninitialized/none if
* there was no restarter pg. In case somebody
* deleted it, though....
*/
break;
default:
assert(0);
abort();
}
} else {
if (r != 0) {
} else {
if (next_state != RESTARTER_STATE_NONE) {
/*
* Force next_state to _NONE since we
* don't look for method processes.
*/
} else {
/*
* The reason for transition will depend on
* state.
*/
if (st->st_initial == 0)
else if (state == RESTARTER_STATE_MAINT)
/*
* Inform the restarter of our state without
* changing the STIME in the repository.
*/
}
}
}
&inst->ri_utmpx_prefix)) {
case 0:
break;
case ECONNABORTED:
goto rep_retry;
case ECANCELED:
goto deleted;
case ENOENT:
/*
* This is odd, because the graph engine should have required
* the general property group. So we'll just use default
* flags in anticipation of the graph engine sending us
* REMOVE_INSTANCE when it finds out that the general property
* group has been deleted.
*/
break;
default:
assert(0);
abort();
}
/*
* Copy our names to smaller buffers to reduce our memory footprint.
*/
}
}
switch (r) {
case 0:
break;
case ECONNABORTED:
goto rep_retry;
case ECANCELED:
goto deleted;
case ECHILD:
case ENOENT:
break;
default:
assert(0);
abort();
}
&start_pid)) {
case 0:
break;
case ECONNABORTED:
goto rep_retry;
case ECANCELED:
goto deleted;
default:
assert(0);
abort();
}
case 0:
break;
case ECONNABORTED:
goto rep_retry;
case ECANCELED:
goto deleted;
default:
assert(0);
abort();
}
}
case 0:
break;
case ECONNABORTED:
goto rep_retry;
case ECANCELED:
goto deleted;
default:
assert(0);
abort();
}
}
/* No more failures we live through, so add it to the list. */
if (start_pid != -1 &&
int ret;
if (ret == -1) {
/*
* Implication: if we can't reregister the
* instance, we will start another one. Two
* instances may or may not result in a resource
* conflict.
*/
"%s: couldn't reregister %ld for wait\n",
} else if (ret == 1) {
/*
* Leading PID has exited.
*/
}
}
if (do_commit_states)
name);
return (0);
return (ENOENT);
}
static void
{
int id;
/*
* Must drop the instance lock so we can pick up the instance_list
* lock & remove the instance.
*/
return;
}
/*
* We can lock the instance without holding the instance_list lock
* since we removed the instance from the list.
*/
startd_free(e, sizeof (*e));
}
/*
* instance_is_wait_style()
*
* Returns 1 if the given instance is a "wait-style" service instance.
*/
int
{
}
/*
* instance_is_transient_style()
*
* Returns 1 if the given instance is a transient service instance.
*/
int
{
}
/*
* instance_in_transition()
* Returns 1 if instance is in transition, 0 if not
*/
int
{
return (0);
return (1);
}
/*
* returns 1 if instance is already started, 0 if not
*/
static int
{
int ret;
ret = 1;
else
ret = 0;
return (ret);
}
/*
* Returns
* 0 - success
* ECONNRESET - success, but h was rebound
*/
int
{
int e;
int prev_state_online;
int state_online;
switch (e) {
case 0:
break;
case ENOMEM:
++retry_count;
if (retry_count < ALLOC_RETRY) {
goto retry;
}
/* Like startd_alloc(). */
uu_die("Insufficient memory.\n");
/* NOTREACHED */
case ECONNABORTED:
goto retry;
case EPERM:
case EACCES:
case EROFS:
/* FALLTHROUGH */
case ENOENT:
break;
case EINVAL:
default:
bad_error("_restarter_commit_states", e);
}
(void *)states);
if (prev_state_online && !state_online)
else if (!prev_state_online && state_online)
return (rebound ? ECONNRESET : 0);
}
void
{
return;
}
static void
{
int r;
const char *fmri;
/*
* This is where we'd check inst->ri_method_thread and if it
* were nonzero we'd wait in anticipation of another thread
* executing a method for inst. Doing so with the instance_list
* locked, though, leads to deadlock. Since taking a snapshot
* during that window won't hurt anything, we'll just continue.
*/
}
switch (r = libscf_snapshots_poststart(h, fmri,
B_FALSE)) {
case 0:
case ENOENT:
break;
case ECONNABORTED:
break;
case EACCES:
default:
bad_error("libscf_snapshots_poststart", r);
}
}
}
}
/* ARGSUSED */
void *
{
scf_handle_t *h;
int r;
for (;;) {
r = libscf_create_self(h);
if (r == 0)
break;
assert(r == ECONNABORTED);
}
(void) scf_handle_unbind(h);
return (NULL);
}
/*
* int stop_instance()
*
* Stop the instance identified by the instance given as the second argument,
* for the cause stated.
*
* Returns
* 0 - success
* -1 - inst is in transition
*/
static int
{
const char *cp;
int err;
switch (cause) {
case RSTOP_EXIT:
re = RERR_RESTART;
cp = "all processes in service exited";
break;
case RSTOP_ERR_CFG:
re = RERR_FAULT;
cp = "service exited with a configuration error";
break;
case RSTOP_ERR_EXIT:
re = RERR_RESTART;
cp = "service exited with an error";
break;
case RSTOP_CORE:
re = RERR_FAULT;
cp = "process dumped core";
break;
case RSTOP_SIGNAL:
re = RERR_FAULT;
cp = "process received fatal signal from outside the service";
break;
case RSTOP_HWERR:
re = RERR_FAULT;
cp = "process killed due to uncorrectable hardware error";
break;
case RSTOP_DEPENDENCY:
re = RERR_RESTART;
cp = "dependency activity requires stop";
break;
case RSTOP_DISABLE:
re = RERR_RESTART;
cp = "service disabled";
break;
case RSTOP_RESTART:
re = RERR_RESTART;
cp = "service restarting";
break;
default:
#ifndef NDEBUG
#endif
abort();
}
/* Services in the disabled and maintenance state are ignored */
return (0);
}
/* Already stopped instances are left alone */
if (instance_started(inst) == 0) {
return (0);
}
if (instance_in_transition(inst)) {
/* requeue event by returning -1 */
"Restarter: Not stopping %s, in transition.\n",
return (-1);
}
if (instance_is_wait_style(inst) &&
(cause == RSTOP_EXIT ||
cause == RSTOP_ERR_CFG ||
cause == RSTOP_ERR_EXIT)) {
/*
* No need to stop instance, as child has exited; remove
* contract and move the instance to the offline state.
*/
reason)) {
case 0:
case ECONNRESET:
break;
default:
}
if (cause == RSTOP_ERR_EXIT) {
/*
* The RSTOP_ERR_EXIT cause is set via the
* wait_thread -> wait_remove code path when we have
* a "wait" style svc that exited with an error. If
* the svc is failing too quickly, we throttle it so
* Since we know we're running in the wait thread its
* ok to throttle it right here.
*/
if (method_rate_critical(inst)) {
"quickly, throttling.");
(void) sleep(WT_SVC_ERR_THROTTLE);
}
} else {
}
}
reason)) {
case 0:
case ECONNRESET:
break;
default:
}
if (cause != RSTOP_ERR_CFG)
return (0);
/*
* Stopping a wait service through means other than the pid
* exiting should keep wait_thread() from restarting the
* service, by removing it from the wait list.
* We cannot remove it right now otherwise the process will
* end up <defunct> so mark it to be ignored.
*/
}
/*
* There are some configuration errors which we cannot detect until we
* try to run the method. For example, see exec_method() where the
* restarter_set_method_context() call can return SMF_EXIT_ERR_CONFIG
* in several cases. If this happens for a "wait-style" svc,
* wait_remove() sets the cause as RSTOP_ERR_CFG so that we can detect
* the configuration error and go into maintenance, even though it is
* a "wait-style" svc.
*/
if (cause == RSTOP_ERR_CFG)
else
case 0:
case ECONNRESET:
break;
default:
}
return (0);
}
/*
* Returns
* ENOENT - fmri is not in instance_list
* 0 - success
* ECONNRESET - success, though handle was rebound
* -1 - instance is in transition
*/
int
{
int r;
return (ENOENT);
return (r);
}
static void
{
int r;
const char *cp;
"Ignoring maintenance off command because %s is not in the "
return;
}
switch (cause) {
case RUNMAINT_CLEAR:
cp = "clear requested";
break;
case RUNMAINT_DISABLE:
cp = "disable requested";
break;
default:
#ifndef NDEBUG
#endif
abort();
}
cp);
/*
* If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
* a primary contract.
*/
return;
case 0:
break;
case ECONNABORTED:
goto rep_retry;
case ENOENT:
/* Must have been deleted. */
return;
case EINVAL:
case ENOTSUP:
default:
bad_error("libscf_handle_rebind", r);
}
switch (r) {
case 0:
break;
case ENOMEM:
++tries;
if (tries < ALLOC_RETRY) {
goto again;
}
uu_die("Insufficient memory.\n");
/* NOTREACHED */
case ECONNABORTED:
goto rep_retry;
case ECANCELED:
break;
case EPERM:
case EACCES:
case EROFS:
"Could not remove contract id %lu for %s (%s).\n", ctid,
break;
case EINVAL:
case EBADF:
default:
bad_error("restarter_remove_contract", r);
}
}
/*
* enable_inst()
* Set inst->ri_i.i_enabled. Expects 'e' to be _ENABLE, _DISABLE, or
* _ADMIN_DISABLE. If the event is _ENABLE and inst is uninitialized or
* disabled, move it to offline. If the event is _DISABLE or
* _ADMIN_DISABLE, make sure inst will move to disabled.
*
* Returns
* 0 - success
* ECONNRESET - h was rebound
*/
static int
{
int r;
e == RESTARTER_EVENT_TYPE_DISABLE ||
e == RESTARTER_EVENT_TYPE_ENABLE);
if (e == RESTARTER_EVENT_TYPE_ENABLE) {
if (state == RESTARTER_STATE_UNINIT ||
/*
* B_FALSE: Don't log an error if the log_instance()
* fails because it will fail on the miniroot before
* install-discovery runs.
*/
/*
* If we are coming from DISABLED, it was obviously an
* enable request. If we are coming from UNINIT, it may
* have been a sevice in MAINT that was cleared.
*/
else if (state == RESTARTER_STATE_DISABLED)
(void) restarter_instance_update_states(h, inst,
} else {
"Not changing state of %s for enable command.\n",
}
} else {
switch (state) {
case RESTARTER_STATE_ONLINE:
case RESTARTER_STATE_DEGRADED:
return (r == ECONNRESET ? 0 : r);
case RESTARTER_STATE_OFFLINE:
case RESTARTER_STATE_UNINIT:
}
/* B_FALSE: See log_instance(..., "Enabled."); above */
/*
* If we are coming from OFFLINE, it was obviously a
* disable request. But if we are coming from
* UNINIT, it may have been a disable request for a
* service in MAINT.
*/
(void) restarter_instance_update_states(h, inst,
return (0);
case RESTARTER_STATE_DISABLED:
break;
case RESTARTER_STATE_MAINT:
/*
* We only want to pull the instance out of maintenance
* if the disable is on adminstrative request. The
* graph engine sends _DISABLE events whenever a
* service isn't in the disabled state, and we don't
* want to pull the service out of maintenance if,
* for example, it is there due to a dependency cycle.
*/
if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
break;
default:
#ifndef NDEBUG
#endif
abort();
}
}
return (0);
}
static void
{
/*
* We want to keep the original reason for restarts and clear actions
*/
switch (reason) {
new_reason = reason;
break;
default:
}
/* Services in the disabled and maintenance state are ignored */
return;
}
/* Already started instances are left alone */
"%s: start_instance -> is already started\n",
return;
}
}
static int
{
int ret = 0;
return (-1);
return (ret);
}
static boolean_t
{
return (-1);
return (ret);
}
static void
{
/* Services in the maintenance state are ignored */
"%s: maintain_instance -> is already in maintenance\n",
return;
}
/*
* If reason state is restarter_str_service_request and
* restarter_actions/auxiliary_fmri property is set with a valid fmri,
* copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
*/
if (reason == restarter_str_service_request &&
if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
"restarter_inst_set_aux_fmri failed: ",
} else {
"restarter_inst_validate_ractions_aux_fmri "
"restarter_inst_reset_aux_fmri failed: ",
}
}
}
(void) restarter_instance_update_states(h, rip,
reason);
return;
}
}
static void
{
int r;
switch (r) {
case 0:
break;
case ECONNABORTED:
goto rep_retry;
case ENOENT:
/* Must have been deleted. */
return;
case EINVAL:
case ENOTSUP:
default:
bad_error("libscf_fmri_get_instance", r);
}
&rip->ri_utmpx_prefix);
switch (r) {
case 0:
break;
case ECONNABORTED:
goto rep_retry;
case ECANCELED:
case ENOENT:
/* Succeed in anticipation of REMOVE_INSTANCE. */
break;
default:
bad_error("libscf_get_startd_properties", r);
}
if (instance_started(rip)) {
/* Refresh does not change the state. */
(void) restarter_instance_update_states(h, rip,
}
}
"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
"INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
};
/*
* void *restarter_process_events()
*
* Called in a separate thread to process the events on an instance's
* queue. Empties the queue completely, and tries to keep the thread
* around for a little while after the queue is empty to save on
* startup costs.
*/
static void *
{
scf_handle_t *h;
/* grab the queue lock */
goto out;
/* drop the queue lock */
/*
* Grab the inst lock -- this waits until any outstanding
* method finishes running.
*/
/* Getting deleted in the middle isn't an error. */
goto cont;
}
/* process the event */
break;
break;
goto cont;
/* FALLTHROUGH */
break;
break;
maintain_instance(h, inst, 0,
break;
maintain_instance(h, inst, 0,
break;
if (event_from_tty(h, inst) == 0)
maintain_instance(h, inst, 0,
else
maintain_instance(h, inst, 0,
break;
if (event_from_tty(h, inst) == 0)
else
break;
break;
refresh_instance(h, inst);
break;
"%s command (for %s) unimplemented.\n",
break;
if (!instance_started(inst)) {
"Not restarting %s; not running.\n",
} else {
/*
* Stop the instance. If it can be restarted,
* the graph engine will send a new event.
*/
if (restart_dump(h, inst)) {
(void) contract_kill(
} else if (stop_instance(h, inst,
RSTOP_RESTART) == 0) {
}
}
break;
default:
#ifndef NDEBUG
uu_warn("%s:%d: Bad restarter event %d. "
#endif
abort();
}
cont:
/* grab the queue lock */
goto out;
/* delete the event */
}
/*
* Try to preserve the thread for a little while for future use.
*/
goto again;
rip->ri_queue_thread = 0;
out:
(void) scf_handle_unbind(h);
return (NULL);
}
static int
switch (t) {
return (1);
default:
return (0);
}
}
static void
{
int r;
assert(r == 0);
}
/*
* void *restarter_event_thread()
*
* Handle incoming graph events by placing them on a per-instance
* queue. We can't lock the main part of the instance structure, so
* just modify the seprarately locked event queue portion.
*/
/*ARGSUSED*/
static void *
{
scf_handle_t *h;
/*
* This is a new thread, and thus, gets its own handle
* to the repository.
*/
/*CONSTCOND*/
while (1) {
while (ru->restarter_update_wakeup == 0)
ru->restarter_update_wakeup = 0;
while ((e = restarter_event_dequeue()) != NULL) {
char *fmri;
/*
* ADD_INSTANCE is special: there's likely no
* instance structure yet, so we need to handle the
* addition synchronously.
*/
switch (e->rpe_type) {
if (restarter_insert_inst(h, e->rpe_inst) != 0)
"Could not add %s.\n", e->rpe_inst);
if (--st->st_load_instances == 0)
(void) pthread_cond_broadcast(
&st->st_load_cv);
goto nolookup;
}
/*
* Lookup the instance, locking only the event queue.
* Can't grab ri_lock here because it might be held
* by a long-running method.
*/
"Ignoring %s command for unknown service "
e->rpe_inst);
goto nolookup;
}
/* Keep ADMIN events from filling up the queue. */
if (is_admin_event(e->rpe_type) &&
"queue overflow. Dropping administrative "
"request.");
"queue overflow. Dropping administrative "
goto nolookup;
}
/* Now add the event to the instance queue. */
restarter_queue_event(rip, e);
if (rip->ri_queue_thread == 0) {
/*
* Start a thread if one isn't already
* running.
*/
restarter_process_events, (void *)fmri);
} else {
/*
* Signal the existing thread that there's
* a new event.
*/
(void) pthread_cond_broadcast(
&rip->ri_queue_cv);
}
}
}
/*
* Unreachable for now -- there's currently no graceful cleanup
* called on exit().
*/
(void) scf_handle_unbind(h);
return (NULL);
}
static restarter_inst_t *
{
int id;
if (id == -1)
return (NULL);
/*
* Since ri_lock isn't held by the contract id lookup, this
* instance may have been restarted and now be in a new
* contract, making the old contract no longer valid for this
* instance.
*/
}
}
return (inst);
}
/*
* void contract_action()
* Take action on contract events.
*/
static void
{
/*
* If startd has stopped this contract, there is no need to
* stop it again.
*/
return;
| CT_PR_EV_HWERR)) == 0) {
/*
* There shouldn't be other events, since that's not how we set
* the terms. Thus, just log an error and drive on.
*/
"%s: contract %ld received unexpected critical event "
return;
}
if (instance_is_wait_style(inst)) {
/*
* We ignore all events; if they impact the
* process we're monitoring, then the
* wait_thread will stop the instance.
*/
"%s: ignoring contract event on wait-style service\n",
fmri);
} else {
/*
* A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
*/
switch (type) {
case CT_PR_EV_EMPTY:
break;
case CT_PR_EV_CORE:
break;
case CT_PR_EV_SIGNAL:
break;
case CT_PR_EV_HWERR:
break;
}
}
}
/*
* void *restarter_contract_event_thread(void *)
* Listens to the process contract bundle for critical events, taking action
* on events from contracts we know we are responsible for.
*/
/*ARGSUSED*/
static void *
{
/*
* Await graph load completion. That is, stop here, until we've scanned
* the repository for contract - instance associations.
*/
/*
* This is a new thread, and thus, gets its own handle
* to the repository.
*/
uu_die("Unable to bind a new repository handle: %s\n",
scf_strerror(scf_error()));
if (fd == -1)
uu_die("process bundle open failed");
/*
* Make sure we get all events (including those generated by configd
* before this thread was started).
*/
for (;;) {
"Error reading next contract event: %s",
continue;
}
/* Fetch cookie. */
< 0) {
continue;
}
continue;
}
/*
* svc.configd(1M) restart handling performed by the
* fork_configd_thread. We don't acknowledge, as that thread
* will do so.
*/
if (cookie == CONFIGD_COOKIE) {
continue;
}
if (storing_contract != 0 &&
/*
* This can happen for two reasons:
* - method_run() has not yet stored the
* the contract into the internal hash table.
* - we receive an EMPTY event for an abandoned
* contract.
* If there is any contract in the process of
* being stored into the hash table then re-read
* the event later.
*/
"Reset event %d for unknown "
/* don't go too fast */
(void) ct_event_reset(fd);
continue;
}
/*
* Do not call contract_to_inst() again if first
* call succeeded.
*/
/*
* This can happen if we receive an EMPTY
* event for an abandoned contract.
*/
"Received event %d for unknown contract id "
} else {
"Received event %d for contract id "
}
O_WRONLY);
if (efd != -1) {
}
}
/*NOTREACHED*/
return (NULL);
}
/*
* Timeout queue, processed by restarter_timeouts_event_thread().
*/
typedef struct timeout_update {
int tu_wakeup;
static const char *timeout_ovr_svcs[] = {
"svc:/system/manifest-import:default",
"svc:/system/rmtmpfiles:default",
"svc:/system/filesystem/usr:default",
"svc:/system/filesystem/minimal:default",
"svc:/system/filesystem/local:default",
};
int
{
int i;
for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
"svc.startd. Using infinite timeout.");
return (1);
}
}
return (0);
}
/*ARGSUSED*/
static int
{
return (1);
return (-1);
return (0);
}
void
{
}
void
{
/*
* If we overflow LLONG_MAX, we're never timing out anyways, so
* just return.
*/
"treating as infinite.");
return;
}
/* hrtime is in nanoseconds. Convert timeout_sec. */
/* Insert the calculated timeout time onto the queue. */
}
void
{
return;
}
static int
{
timeout_entry_t *e;
int ret;
/*
* Walk through the (sorted) timeouts list. While the timeout
* at the head of the list is <= the current time, kill the
* method.
*/
"Method or service exit timed out. Killing contract %ld.",
e->te_ctid);
e->te_fired = 1;
}
ret = 0;
else
ret = -1;
return (ret);
}
/*
* void *restarter_timeouts_event_thread(void *)
* Responsible for monitoring the method timeouts. This thread must
* be started before any methods are called.
*/
/*ARGSUSED*/
static void *
{
/*
* Timeouts are entered on a priority queue, which is processed by
* this thread. As timeouts are specified in seconds, we'll do
* the necessary processing every second, as long as the queue
* is not empty.
*/
/*CONSTCOND*/
while (1) {
/*
* As long as the timeout list isn't empty, process it
* every second.
*/
if (timeout_now() == 0) {
(void) sleep(1);
continue;
}
/* The list is empty, wait until we have more timeouts. */
}
return (NULL);
}
void
{
}
void
{
"restarter_instance_queue", sizeof (restarter_instance_qentry_t),
"contract_list", sizeof (contract_entry_t),
}