restarter.c revision 92175b8ef69769482e7a4af5adcbc84d244b92f2
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* restarter.c - service manipulation
*
* This component manages services whose restarter is svc.startd, the standard
* restarter. It translates restarter protocol events from the graph engine
* into actions on processes, as a delegated restarter would do.
*
* The master restarter manages a number of always-running threads:
* - restarter event thread: events from the graph engine
* - timeout thread: thread to fire queued timeouts
* - contract thread: thread to handle contract events
* - wait thread: thread to handle wait-based services
*
* The other threads are created as-needed:
* - per-instance method threads
* - per-instance event processing threads
*
* The interaction of all threads must result in the following conditions
* being satisfied (on a per-instance basis):
* - restarter events must be processed in order
* - method execution must be serialized
* - instance delete must be held until outstanding methods are complete
* - contract events shouldn't be processed while a method is running
* - timeouts should fire even when a method is running
*
* Service instances are represented by restarter_inst_t's and are kept in the
* instance_list list.
*
* Service States
* The current state of a service instance is kept in
* restarter_inst_t->ri_i.i_state. If transition to a new state could take
* some time, then before we effect the transition we set
* restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
* rotate i_next_state to i_state and set i_next_state to
* RESTARTER_STATE_NONE. So usually i_next_state is _NONE when ri_lock is not
* held. The exception is when we launch methods, which are done with
* a separate thread. To keep any other threads from grabbing ri_lock before
* method_thread() does, we set ri_method_thread to the thread id of the
* method thread, and when it is nonzero any thread with a different thread id
* waits on ri_method_cv.
*
* Method execution is serialized by blocking on ri_method_cv in
* inst_lookup_by_id() and waiting for a 0 value of ri_method_thread. This
* also prevents the instance structure from being deleted until all
* outstanding operations such as method_thread() have finished.
*
* Lock ordering:
*
* dgraph_lock [can be held when taking:]
* utmpx_lock
* dictionary->dict_lock
* st->st_load_lock
* wait_info_lock
* ru->restarter_update_lock
* restarter_queue->rpeq_lock
* instance_list.ril_lock
* inst->ri_lock
* st->st_configd_live_lock
*
* instance_list.ril_lock
* graph_queue->gpeq_lock
* gu->gu_lock
* st->st_configd_live_lock
* dictionary->dict_lock
* inst->ri_lock
* graph_queue->gpeq_lock
* gu->gu_lock
* tu->tu_lock
* tq->tq_lock
* inst->ri_queue_lock
* wait_info_lock
* bp->cb_lock
* utmpx_lock
*
* single_user_thread_lock
* wait_info_lock
* utmpx_lock
*
* gu_freeze_lock
*
* logbuf_mutex nests inside pretty much everything.
*/
#include <sys/contract/process.h>
#include <sys/ctfs.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <libcontract.h>
#include <libcontract_priv.h>
#include <libintl.h>
#include <librestart.h>
#include <librestart_priv.h>
#include <libuutil.h>
#include <limits.h>
#include <poll.h>
#include <port.h>
#include <pthread.h>
#include <stdarg.h>
#include <stdio.h>
#include <strings.h>
#include <unistd.h>
#include "startd.h"
#include "protocol.h"
static uu_list_pool_t *restarter_instance_pool;
static restarter_instance_list_t instance_list;
static uu_list_pool_t *restarter_queue_pool;
/*ARGSUSED*/
static int
restarter_instance_compare(const void *lc_arg, const void *rc_arg,
void *private)
{
int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
int rc_id = *(int *)rc_arg;
if (lc_id > rc_id)
return (1);
if (lc_id < rc_id)
return (-1);
return (0);
}
static restarter_inst_t *
inst_lookup_by_name(const char *name)
{
int id;
id = dict_lookup_byname(name);
if (id == -1)
return (NULL);
return (inst_lookup_by_id(id));
}
restarter_inst_t *
inst_lookup_by_id(int id)
{
restarter_inst_t *inst;
MUTEX_LOCK(&instance_list.ril_lock);
inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
if (inst != NULL)
MUTEX_LOCK(&inst->ri_lock);
MUTEX_UNLOCK(&instance_list.ril_lock);
if (inst != NULL) {
while (inst->ri_method_thread != 0 &&
!pthread_equal(inst->ri_method_thread, pthread_self())) {
++inst->ri_method_waiters;
(void) pthread_cond_wait(&inst->ri_method_cv,
&inst->ri_lock);
assert(inst->ri_method_waiters > 0);
--inst->ri_method_waiters;
}
}
return (inst);
}
static restarter_inst_t *
inst_lookup_queue(const char *name)
{
int id;
restarter_inst_t *inst;
id = dict_lookup_byname(name);
if (id == -1)
return (NULL);
MUTEX_LOCK(&instance_list.ril_lock);
inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
if (inst != NULL)
MUTEX_LOCK(&inst->ri_queue_lock);
MUTEX_UNLOCK(&instance_list.ril_lock);
return (inst);
}
const char *
service_style(int flags)
{
switch (flags & RINST_STYLE_MASK) {
case RINST_CONTRACT: return ("contract");
case RINST_TRANSIENT: return ("transient");
case RINST_WAIT: return ("wait");
default:
#ifndef NDEBUG
uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
#endif
abort();
/* NOTREACHED */
}
}
/*
* Fails with ECONNABORTED or ECANCELED.
*/
static int
check_contract(restarter_inst_t *inst, boolean_t primary,
scf_instance_t *scf_inst)
{
ctid_t *ctidp;
int fd, r;
ctidp = primary ? &inst->ri_i.i_primary_ctid :
&inst->ri_i.i_transient_ctid;
assert(*ctidp >= 1);
fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
if (fd >= 0) {
r = close(fd);
assert(r == 0);
return (0);
}
r = restarter_remove_contract(scf_inst, *ctidp, primary ?
RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
switch (r) {
case 0:
case ECONNABORTED:
case ECANCELED:
*ctidp = 0;
return (r);
case ENOMEM:
uu_die("Out of memory\n");
/* NOTREACHED */
case EPERM:
uu_die("Insufficient privilege.\n");
/* NOTREACHED */
case EACCES:
uu_die("Repository backend access denied.\n");
/* NOTREACHED */
case EROFS:
log_error(LOG_INFO, "Could not remove unusable contract id %ld "
"for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
return (0);
case EINVAL:
case EBADF:
default:
assert(0);
abort();
/* NOTREACHED */
}
}
static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
/*
* int restarter_insert_inst(scf_handle_t *, char *)
* If the inst is already in the restarter list, return its id. If the inst
* is not in the restarter list, initialize a restarter_inst_t, initialize its
* states, insert it into the list, and return 0.
*
* Fails with
* ENOENT - name is not in the repository
*/
static int
restarter_insert_inst(scf_handle_t *h, const char *name)
{
int id, r;
restarter_inst_t *inst;
uu_list_index_t idx;
scf_service_t *scf_svc;
scf_instance_t *scf_inst;
scf_snapshot_t *snap = NULL;
scf_propertygroup_t *pg;
char *svc_name, *inst_name;
char logfilebuf[PATH_MAX];
char *c;
boolean_t do_commit_states;
restarter_instance_state_t state, next_state;
protocol_states_t *ps;
pid_t start_pid;
MUTEX_LOCK(&instance_list.ril_lock);
/*
* We don't use inst_lookup_by_name() here because we want the lookup
* & insert to be atomic.
*/
id = dict_lookup_byname(name);
if (id != -1) {
inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
&idx);
if (inst != NULL) {
MUTEX_UNLOCK(&instance_list.ril_lock);
return (0);
}
}
/* Allocate an instance */
inst = startd_zalloc(sizeof (restarter_inst_t));
inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
inst->ri_utmpx_prefix[0] = '\0';
inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
(void) strcpy((char *)inst->ri_i.i_fmri, name);
inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
/*
* id shouldn't be -1 since we use the same dictionary as graph.c, but
* just in case.
*/
inst->ri_id = (id != -1 ? id : dict_insert(name));
special_online_hooks_get(name, &inst->ri_pre_online_hook,
&inst->ri_post_online_hook, &inst->ri_post_offline_hook);
scf_svc = safe_scf_service_create(h);
scf_inst = safe_scf_instance_create(h);
pg = safe_scf_pg_create(h);
svc_name = startd_alloc(max_scf_name_size);
inst_name = startd_alloc(max_scf_name_size);
rep_retry:
if (snap != NULL)
scf_snapshot_destroy(snap);
if (inst->ri_logstem != NULL)
startd_free(inst->ri_logstem, PATH_MAX);
if (inst->ri_common_name != NULL)
startd_free(inst->ri_common_name, max_scf_value_size);
if (inst->ri_C_common_name != NULL)
startd_free(inst->ri_C_common_name, max_scf_value_size);
snap = NULL;
inst->ri_logstem = NULL;
inst->ri_common_name = NULL;
inst->ri_C_common_name = NULL;
if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
NULL, SCF_DECODE_FMRI_EXACT) != 0) {
switch (scf_error()) {
case SCF_ERROR_CONNECTION_BROKEN:
libscf_handle_rebind(h);
goto rep_retry;
case SCF_ERROR_NOT_FOUND:
goto deleted;
}
uu_die("Can't decode FMRI %s: %s\n", name,
scf_strerror(scf_error()));
}
/*
* If there's no running snapshot, then we execute using the editing
* snapshot. Pending snapshots will be taken later.
*/
snap = libscf_get_running_snapshot(scf_inst);
if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
(scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
0)) {
switch (scf_error()) {
case SCF_ERROR_NOT_SET:
break;
case SCF_ERROR_CONNECTION_BROKEN:
libscf_handle_rebind(h);
goto rep_retry;
default:
assert(0);
abort();
}
goto deleted;
}
(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
for (c = logfilebuf; *c != '\0'; c++)
if (*c == '/')
*c = '-';
inst->ri_logstem = startd_alloc(PATH_MAX);
(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
LOG_SUFFIX);
/*
* If the restarter group is missing, use uninit/none. Otherwise,
* we're probably being restarted & don't want to mess up the states
* that are there.
*/
state = RESTARTER_STATE_UNINIT;
next_state = RESTARTER_STATE_NONE;
r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
if (r != 0) {
switch (scf_error()) {
case SCF_ERROR_CONNECTION_BROKEN:
libscf_handle_rebind(h);
goto rep_retry;
case SCF_ERROR_NOT_SET:
goto deleted;
case SCF_ERROR_NOT_FOUND:
/*
* This shouldn't happen since the graph engine should
* have initialized the state to uninitialized/none if
* there was no restarter pg. In case somebody
* deleted it, though....
*/
do_commit_states = B_TRUE;
break;
default:
assert(0);
abort();
}
} else {
r = libscf_read_states(pg, &state, &next_state);
if (r != 0) {
do_commit_states = B_TRUE;
} else {
if (next_state != RESTARTER_STATE_NONE) {
/*
* Force next_state to _NONE since we
* don't look for method processes.
*/
next_state = RESTARTER_STATE_NONE;
do_commit_states = B_TRUE;
} else {
/*
* Inform the restarter of our state without
* changing the STIME in the repository.
*/
ps = startd_alloc(sizeof (*ps));
inst->ri_i.i_state = ps->ps_state = state;
inst->ri_i.i_next_state = ps->ps_state_next =
next_state;
graph_protocol_send_event(inst->ri_i.i_fmri,
GRAPH_UPDATE_STATE_CHANGE, ps);
do_commit_states = B_FALSE;
}
}
}
switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
&inst->ri_utmpx_prefix)) {
case 0:
break;
case ECONNABORTED:
libscf_handle_rebind(h);
goto rep_retry;
case ECANCELED:
goto deleted;
case ENOENT:
/*
* This is odd, because the graph engine should have required
* the general property group. So we'll just use default
* flags in anticipation of the graph engine sending us
* REMOVE_INSTANCE when it finds out that the general property
* group has been deleted.
*/
inst->ri_flags = RINST_CONTRACT;
break;
default:
assert(0);
abort();
}
switch (libscf_get_template_values(scf_inst, snap,
&inst->ri_common_name, &inst->ri_C_common_name)) {
case 0:
break;
case ECONNABORTED:
libscf_handle_rebind(h);
goto rep_retry;
case ECANCELED:
goto deleted;
case ECHILD:
case ENOENT:
break;
default:
assert(0);
abort();
}
switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
&inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
&start_pid)) {
case 0:
break;
case ECONNABORTED:
libscf_handle_rebind(h);
goto rep_retry;
case ECANCELED:
goto deleted;
default:
assert(0);
abort();
}
if (inst->ri_i.i_primary_ctid >= 1) {
contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
switch (check_contract(inst, B_TRUE, scf_inst)) {
case 0:
break;
case ECONNABORTED:
libscf_handle_rebind(h);
goto rep_retry;
case ECANCELED:
goto deleted;
default:
assert(0);
abort();
}
}
if (inst->ri_i.i_transient_ctid >= 1) {
switch (check_contract(inst, B_FALSE, scf_inst)) {
case 0:
break;
case ECONNABORTED:
libscf_handle_rebind(h);
goto rep_retry;
case ECANCELED:
goto deleted;
default:
assert(0);
abort();
}
}
/* No more failures we live through, so add it to the list. */
(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
MUTEX_LOCK(&inst->ri_lock);
MUTEX_LOCK(&inst->ri_queue_lock);
(void) pthread_cond_init(&inst->ri_method_cv, NULL);
uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
uu_list_insert(instance_list.ril_instance_list, inst, idx);
MUTEX_UNLOCK(&instance_list.ril_lock);
if (start_pid != -1 &&
(inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
int ret;
ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
if (ret == -1) {
/*
* Implication: if we can't reregister the
* instance, we will start another one. Two
* instances may or may not result in a resource
* conflict.
*/
log_error(LOG_WARNING,
"%s: couldn't reregister %ld for wait\n",
inst->ri_i.i_fmri, start_pid);
} else if (ret == 1) {
/*
* Leading PID has exited.
*/
(void) stop_instance(h, inst, RSTOP_EXIT);
}
}
scf_pg_destroy(pg);
if (do_commit_states)
(void) restarter_instance_update_states(h, inst, state,
next_state, RERR_NONE, NULL);
log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
service_style(inst->ri_flags));
MUTEX_UNLOCK(&inst->ri_queue_lock);
MUTEX_UNLOCK(&inst->ri_lock);
startd_free(svc_name, max_scf_name_size);
startd_free(inst_name, max_scf_name_size);
scf_snapshot_destroy(snap);
scf_instance_destroy(scf_inst);
scf_service_destroy(scf_svc);
log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
name);
return (0);
deleted:
MUTEX_UNLOCK(&instance_list.ril_lock);
startd_free(inst_name, max_scf_name_size);
startd_free(svc_name, max_scf_name_size);
if (snap != NULL)
scf_snapshot_destroy(snap);
scf_pg_destroy(pg);
scf_instance_destroy(scf_inst);
scf_service_destroy(scf_svc);
startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
uu_list_destroy(inst->ri_queue);
if (inst->ri_logstem != NULL)
startd_free(inst->ri_logstem, PATH_MAX);
if (inst->ri_common_name != NULL)
startd_free(inst->ri_common_name, max_scf_value_size);
if (inst->ri_C_common_name != NULL)
startd_free(inst->ri_C_common_name, max_scf_value_size);
startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
startd_free(inst, sizeof (restarter_inst_t));
return (ENOENT);
}
static void
restarter_delete_inst(restarter_inst_t *ri)
{
int id;
restarter_inst_t *rip;
void *cookie = NULL;
restarter_instance_qentry_t *e;
assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
/*
* Must drop the instance lock so we can pick up the instance_list
* lock & remove the instance.
*/
id = ri->ri_id;
MUTEX_UNLOCK(&ri->ri_lock);
MUTEX_LOCK(&instance_list.ril_lock);
rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
if (rip == NULL) {
MUTEX_UNLOCK(&instance_list.ril_lock);
return;
}
assert(ri == rip);
uu_list_remove(instance_list.ril_instance_list, ri);
log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
ri->ri_i.i_fmri);
MUTEX_UNLOCK(&instance_list.ril_lock);
/*
* We can lock the instance without holding the instance_list lock
* since we removed the instance from the list.
*/
MUTEX_LOCK(&ri->ri_lock);
MUTEX_LOCK(&ri->ri_queue_lock);
if (ri->ri_i.i_primary_ctid >= 1)
contract_hash_remove(ri->ri_i.i_primary_ctid);
while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
startd_free(e, sizeof (*e));
uu_list_destroy(ri->ri_queue);
startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
startd_free(ri->ri_logstem, PATH_MAX);
startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
(void) pthread_mutex_destroy(&ri->ri_lock);
(void) pthread_mutex_destroy(&ri->ri_queue_lock);
startd_free(ri, sizeof (restarter_inst_t));
}
/*
* instance_is_wait_style()
*
* Returns 1 if the given instance is a "wait-style" service instance.
*/
int
instance_is_wait_style(restarter_inst_t *inst)
{
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
}
/*
* instance_is_transient_style()
*
* Returns 1 if the given instance is a transient service instance.
*/
int
instance_is_transient_style(restarter_inst_t *inst)
{
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
}
/*
* instance_in_transition()
* Returns 1 if instance is in transition, 0 if not
*/
int
instance_in_transition(restarter_inst_t *inst)
{
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
return (0);
return (1);
}
/*
* Returns
* 0 - success
* ECONNRESET - success, but h was rebound
*/
int
restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
restarter_instance_state_t new_state,
restarter_instance_state_t new_state_next, restarter_error_t err, char *aux)
{
protocol_states_t *states;
int e;
uint_t retry_count = 0, msecs = ALLOC_DELAY;
boolean_t rebound = B_FALSE;
assert(PTHREAD_MUTEX_HELD(&ri->ri_lock));
retry:
e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
aux);
switch (e) {
case 0:
break;
case ENOMEM:
++retry_count;
if (retry_count < ALLOC_RETRY) {
(void) poll(NULL, 0, msecs);
msecs *= ALLOC_DELAY_MULT;
goto retry;
}
/* Like startd_alloc(). */
uu_die("Insufficient memory.\n");
/* NOTREACHED */
case ECONNABORTED:
libscf_handle_rebind(h);
rebound = B_TRUE;
goto retry;
case EPERM:
case EACCES:
case EROFS:
log_error(LOG_NOTICE, "Could not commit state change for %s "
"to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
/* FALLTHROUGH */
case ENOENT:
ri->ri_i.i_state = new_state;
ri->ri_i.i_next_state = new_state_next;
break;
case EINVAL:
default:
bad_error("_restarter_commit_states", e);
}
states = startd_alloc(sizeof (protocol_states_t));
states->ps_state = new_state;
states->ps_state_next = new_state_next;
states->ps_err = err;
graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
(void *)states);
if (new_state == RESTARTER_STATE_ONLINE)
ri->ri_post_online_hook();
return (rebound ? ECONNRESET : 0);
}
void
restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
{
restarter_inst_t *inst;
assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
inst = inst_lookup_by_name(fmri);
if (inst == NULL)
return;
inst->ri_flags |= flag;
MUTEX_UNLOCK(&inst->ri_lock);
}
static void
restarter_take_pending_snapshots(scf_handle_t *h)
{
restarter_inst_t *inst;
int r;
MUTEX_LOCK(&instance_list.ril_lock);
for (inst = uu_list_first(instance_list.ril_instance_list);
inst != NULL;
inst = uu_list_next(instance_list.ril_instance_list, inst)) {
const char *fmri;
scf_instance_t *sinst = NULL;
MUTEX_LOCK(&inst->ri_lock);
/*
* This is where we'd check inst->ri_method_thread and if it
* were nonzero we'd wait in anticipation of another thread
* executing a method for inst. Doing so with the instance_list
* locked, though, leads to deadlock. Since taking a snapshot
* during that window won't hurt anything, we'll just continue.
*/
fmri = inst->ri_i.i_fmri;
if (inst->ri_flags & RINST_RETAKE_RUNNING) {
scf_snapshot_t *rsnap;
(void) libscf_fmri_get_instance(h, fmri, &sinst);
rsnap = libscf_get_or_make_running_snapshot(sinst,
fmri, B_FALSE);
scf_instance_destroy(sinst);
if (rsnap != NULL)
inst->ri_flags &= ~RINST_RETAKE_RUNNING;
scf_snapshot_destroy(rsnap);
}
if (inst->ri_flags & RINST_RETAKE_START) {
switch (r = libscf_snapshots_poststart(h, fmri,
B_FALSE)) {
case 0:
case ENOENT:
inst->ri_flags &= ~RINST_RETAKE_START;
break;
case ECONNABORTED:
break;
case EACCES:
default:
bad_error("libscf_snapshots_poststart", r);
}
}
MUTEX_UNLOCK(&inst->ri_lock);
}
MUTEX_UNLOCK(&instance_list.ril_lock);
}
/* ARGSUSED */
void *
restarter_post_fsminimal_thread(void *unused)
{
scf_handle_t *h;
int r;
h = libscf_handle_create_bound_loop();
for (;;) {
r = libscf_create_self(h);
if (r == 0)
break;
assert(r == ECONNABORTED);
libscf_handle_rebind(h);
}
restarter_take_pending_snapshots(h);
(void) scf_handle_unbind(h);
scf_handle_destroy(h);
return (NULL);
}
/*
* returns 1 if instance is already started, 0 if not
*/
static int
instance_started(restarter_inst_t *inst)
{
int ret;
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
ret = 1;
else
ret = 0;
return (ret);
}
/*
* int stop_instance()
*
* Stop the instance identified by the instance given as the second argument,
* for the cause stated.
*
* Returns
* 0 - success
* -1 - inst is in transition
*/
static int
stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
stop_cause_t cause)
{
fork_info_t *info;
const char *cp;
int err;
restarter_error_t re;
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
assert(inst->ri_method_thread == 0);
switch (cause) {
case RSTOP_EXIT:
re = RERR_RESTART;
cp = "all processes in service exited";
break;
case RSTOP_CORE:
re = RERR_FAULT;
cp = "process dumped core";
break;
case RSTOP_SIGNAL:
re = RERR_FAULT;
cp = "process received fatal signal from outside the service";
break;
case RSTOP_HWERR:
re = RERR_FAULT;
cp = "process killed due to uncorrectable hardware error";
break;
case RSTOP_DEPENDENCY:
re = RERR_RESTART;
cp = "dependency activity requires stop";
break;
case RSTOP_DISABLE:
re = RERR_RESTART;
cp = "service disabled";
break;
case RSTOP_RESTART:
re = RERR_RESTART;
cp = "service restarting";
break;
default:
#ifndef NDEBUG
(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
cause, __FILE__, __LINE__);
#endif
abort();
}
/* Services in the disabled and maintenance state are ignored */
if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
log_framework(LOG_DEBUG,
"%s: stop_instance -> is maint/disabled\n",
inst->ri_i.i_fmri);
return (0);
}
/* Already stopped instances are left alone */
if (instance_started(inst) == 0) {
log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
inst->ri_i.i_fmri);
return (0);
}
if (instance_in_transition(inst)) {
/* requeue event by returning -1 */
log_framework(LOG_DEBUG,
"Restarter: Not stopping %s, in transition.\n",
inst->ri_i.i_fmri);
return (-1);
}
log_instance(inst, B_TRUE, "Stopping because %s.", cp);
log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
"%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
/*
* No need to stop instance, as child has exited; remove
* contract and move the instance to the offline state.
*/
switch (err = restarter_instance_update_states(local_handle,
inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
NULL)) {
case 0:
case ECONNRESET:
break;
default:
bad_error("restarter_instance_update_states", err);
}
(void) update_fault_count(inst, FAULT_COUNT_RESET);
if (inst->ri_i.i_primary_ctid != 0) {
inst->ri_m_inst =
safe_scf_instance_create(local_handle);
inst->ri_mi_deleted = B_FALSE;
libscf_reget_instance(inst);
method_remove_contract(inst, B_TRUE, B_TRUE);
scf_instance_destroy(inst->ri_m_inst);
inst->ri_m_inst = NULL;
}
switch (err = restarter_instance_update_states(local_handle,
inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
NULL)) {
case 0:
case ECONNRESET:
break;
default:
bad_error("restarter_instance_update_states", err);
}
return (0);
}
switch (err = restarter_instance_update_states(local_handle, inst,
inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
RESTARTER_STATE_DISABLED, RERR_NONE, NULL)) {
case 0:
case ECONNRESET:
break;
default:
bad_error("restarter_instance_update_states", err);
}
info = startd_zalloc(sizeof (fork_info_t));
info->sf_id = inst->ri_id;
info->sf_method_type = METHOD_STOP;
info->sf_event_type = re;
inst->ri_method_thread = startd_thread_create(method_thread, info);
return (0);
}
/*
* Returns
* ENOENT - fmri is not in instance_list
* 0 - success
* ECONNRESET - success, though handle was rebound
* -1 - instance is in transition
*/
int
stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
{
restarter_inst_t *rip;
int r;
rip = inst_lookup_by_name(fmri);
if (rip == NULL)
return (ENOENT);
r = stop_instance(h, rip, flags);
MUTEX_UNLOCK(&rip->ri_lock);
return (r);
}
static void
unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
unmaint_cause_t cause)
{
ctid_t ctid;
scf_instance_t *inst;
int r;
uint_t tries = 0, msecs = ALLOC_DELAY;
const char *cp;
assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
log_error(LOG_DEBUG, "Restarter: "
"Ignoring maintenance off command because %s is not in the "
"maintenance state.\n", rip->ri_i.i_fmri);
return;
}
switch (cause) {
case RUNMAINT_CLEAR:
cp = "clear requested";
break;
case RUNMAINT_DISABLE:
cp = "disable requested";
break;
default:
#ifndef NDEBUG
(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
cause, __FILE__, __LINE__);
#endif
abort();
}
log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
cp);
log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
"%s.\n", rip->ri_i.i_fmri, cp);
(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
RESTARTER_STATE_NONE, RERR_RESTART, NULL);
/*
* If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
* a primary contract.
*/
if (rip->ri_i.i_primary_ctid == 0)
return;
ctid = rip->ri_i.i_primary_ctid;
contract_abandon(ctid);
rip->ri_i.i_primary_ctid = 0;
rep_retry:
switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
case 0:
break;
case ECONNABORTED:
libscf_handle_rebind(h);
goto rep_retry;
case ENOENT:
/* Must have been deleted. */
return;
case EINVAL:
case ENOTSUP:
default:
bad_error("libscf_handle_rebind", r);
}
again:
r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
switch (r) {
case 0:
break;
case ENOMEM:
++tries;
if (tries < ALLOC_RETRY) {
(void) poll(NULL, 0, msecs);
msecs *= ALLOC_DELAY_MULT;
goto again;
}
uu_die("Insufficient memory.\n");
/* NOTREACHED */
case ECONNABORTED:
scf_instance_destroy(inst);
libscf_handle_rebind(h);
goto rep_retry;
case ECANCELED:
break;
case EPERM:
case EACCES:
case EROFS:
log_error(LOG_INFO,
"Could not remove contract id %lu for %s (%s).\n", ctid,
rip->ri_i.i_fmri, strerror(r));
break;
case EINVAL:
case EBADF:
default:
bad_error("restarter_remove_contract", r);
}
scf_instance_destroy(inst);
}
/*
* enable_inst()
* Set inst->ri_i.i_enabled. Expects 'e' to be _ENABLE, _DISABLE, or
* _ADMIN_DISABLE. If the event is _ENABLE and inst is uninitialized or
* disabled, move it to offline. If the event is _DISABLE or
* _ADMIN_DISABLE, make sure inst will move to disabled.
*
* Returns
* 0 - success
* ECONNRESET - h was rebound
*/
static int
enable_inst(scf_handle_t *h, restarter_inst_t *inst, restarter_event_type_t e)
{
restarter_instance_state_t state;
int r;
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
e == RESTARTER_EVENT_TYPE_DISABLE ||
e == RESTARTER_EVENT_TYPE_ENABLE);
assert(instance_in_transition(inst) == 0);
state = inst->ri_i.i_state;
if (e == RESTARTER_EVENT_TYPE_ENABLE) {
inst->ri_i.i_enabled = 1;
if (state == RESTARTER_STATE_UNINIT ||
state == RESTARTER_STATE_DISABLED) {
/*
* B_FALSE: Don't log an error if the log_instance()
* fails because it will fail on the miniroot before
* install-discovery runs.
*/
log_instance(inst, B_FALSE, "Enabled.");
log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
inst->ri_i.i_fmri);
(void) restarter_instance_update_states(h, inst,
RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
RERR_NONE, NULL);
} else {
log_framework(LOG_DEBUG, "Restarter: "
"Not changing state of %s for enable command.\n",
inst->ri_i.i_fmri);
}
} else {
inst->ri_i.i_enabled = 0;
switch (state) {
case RESTARTER_STATE_ONLINE:
case RESTARTER_STATE_DEGRADED:
r = stop_instance(h, inst, RSTOP_DISABLE);
return (r == ECONNRESET ? 0 : r);
case RESTARTER_STATE_OFFLINE:
case RESTARTER_STATE_UNINIT:
if (inst->ri_i.i_primary_ctid != 0) {
inst->ri_m_inst = safe_scf_instance_create(h);
inst->ri_mi_deleted = B_FALSE;
libscf_reget_instance(inst);
method_remove_contract(inst, B_TRUE, B_TRUE);
scf_instance_destroy(inst->ri_m_inst);
}
/* B_FALSE: See log_instance(..., "Enabled."); above */
log_instance(inst, B_FALSE, "Disabled.");
log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
inst->ri_i.i_fmri);
(void) restarter_instance_update_states(h, inst,
RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
RERR_RESTART, NULL);
return (0);
case RESTARTER_STATE_DISABLED:
break;
case RESTARTER_STATE_MAINT:
/*
* We only want to pull the instance out of maintenance
* if the disable is on adminstrative request. The
* graph engine sends _DISABLE events whenever a
* service isn't in the disabled state, and we don't
* want to pull the service out of maintenance if,
* for example, it is there due to a dependency cycle.
*/
if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
unmaintain_instance(h, inst, RUNMAINT_DISABLE);
break;
default:
#ifndef NDEBUG
(void) fprintf(stderr, "Restarter instance %s has "
"unknown state %d.\n", inst->ri_i.i_fmri, state);
#endif
abort();
}
}
return (0);
}
static void
start_instance(scf_handle_t *local_handle, restarter_inst_t *inst)
{
fork_info_t *info;
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
assert(instance_in_transition(inst) == 0);
assert(inst->ri_method_thread == 0);
log_framework(LOG_DEBUG, "%s: trying to start instance\n",
inst->ri_i.i_fmri);
/* Services in the disabled and maintenance state are ignored */
if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
inst->ri_i.i_enabled == 0) {
log_framework(LOG_DEBUG,
"%s: start_instance -> is maint/disabled\n",
inst->ri_i.i_fmri);
return;
}
/* Already started instances are left alone */
if (instance_started(inst) == 1) {
log_framework(LOG_DEBUG,
"%s: start_instance -> is already started\n",
inst->ri_i.i_fmri);
return;
}
log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
(void) restarter_instance_update_states(local_handle, inst,
inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, NULL);
info = startd_zalloc(sizeof (fork_info_t));
info->sf_id = inst->ri_id;
info->sf_method_type = METHOD_START;
info->sf_event_type = RERR_NONE;
inst->ri_method_thread = startd_thread_create(method_thread, info);
}
static void
maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
const char *aux)
{
fork_info_t *info;
assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
assert(aux != NULL);
assert(rip->ri_method_thread == 0);
log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.", aux);
log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
rip->ri_i.i_fmri, aux);
/* Services in the maintenance state are ignored */
if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
log_framework(LOG_DEBUG,
"%s: maintain_instance -> is already in maintenance\n",
rip->ri_i.i_fmri);
return;
}
if (immediate || !instance_started(rip)) {
if (rip->ri_i.i_primary_ctid != 0) {
rip->ri_m_inst = safe_scf_instance_create(h);
rip->ri_mi_deleted = B_FALSE;
libscf_reget_instance(rip);
method_remove_contract(rip, B_TRUE, B_TRUE);
scf_instance_destroy(rip->ri_m_inst);
}
(void) restarter_instance_update_states(h, rip,
RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
(char *)aux);
return;
}
(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
RESTARTER_STATE_MAINT, RERR_NONE, (char *)aux);
info = startd_zalloc(sizeof (*info));
info->sf_id = rip->ri_id;
info->sf_method_type = METHOD_STOP;
info->sf_event_type = RERR_RESTART;
rip->ri_method_thread = startd_thread_create(method_thread, info);
}
static void
refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
{
scf_instance_t *inst;
scf_snapshot_t *snap;
fork_info_t *info;
int r;
assert(PTHREAD_MUTEX_HELD(&rip->ri_lock));
log_instance(rip, B_TRUE, "Rereading configuration.");
log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
rip->ri_i.i_fmri);
rep_retry:
r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
switch (r) {
case 0:
break;
case ECONNABORTED:
libscf_handle_rebind(h);
goto rep_retry;
case ENOENT:
/* Must have been deleted. */
return;
case EINVAL:
case ENOTSUP:
default:
bad_error("libscf_fmri_get_instance", r);
}
snap = libscf_get_running_snapshot(inst);
r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
&rip->ri_utmpx_prefix);
switch (r) {
case 0:
log_framework(LOG_DEBUG, "%s is a %s-style service\n",
rip->ri_i.i_fmri, service_style(rip->ri_flags));
break;
case ECONNABORTED:
scf_instance_destroy(inst);
scf_snapshot_destroy(snap);
libscf_handle_rebind(h);
goto rep_retry;
case ECANCELED:
case ENOENT:
/* Succeed in anticipation of REMOVE_INSTANCE. */
break;
default:
bad_error("libscf_get_startd_properties", r);
}
if (instance_started(rip)) {
/* Refresh does not change the state. */
(void) restarter_instance_update_states(h, rip,
rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE, NULL);
info = startd_zalloc(sizeof (*info));
info->sf_id = rip->ri_id;
info->sf_method_type = METHOD_REFRESH;
info->sf_event_type = RERR_REFRESH;
assert(rip->ri_method_thread == 0);
rip->ri_method_thread =
startd_thread_create(method_thread, info);
}
scf_snapshot_destroy(snap);
scf_instance_destroy(inst);
}
const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
"INVALID_DEPENDENCY", "ADMIN_DISABLE"
};
/*
* void *restarter_process_events()
*
* Called in a separate thread to process the events on an instance's
* queue. Empties the queue completely, and tries to keep the thread
* around for a little while after the queue is empty to save on
* startup costs.
*/
static void *
restarter_process_events(void *arg)
{
scf_handle_t *h;
restarter_instance_qentry_t *event;
restarter_inst_t *rip;
char *fmri = (char *)arg;
struct timespec to;
assert(fmri != NULL);
h = libscf_handle_create_bound_loop();
/* grab the queue lock */
rip = inst_lookup_queue(fmri);
if (rip == NULL)
goto out;
again:
while ((event = uu_list_first(rip->ri_queue)) != NULL) {
restarter_inst_t *inst;
/* drop the queue lock */
MUTEX_UNLOCK(&rip->ri_queue_lock);
/*
* Grab the inst lock -- this waits until any outstanding
* method finishes running.
*/
inst = inst_lookup_by_name(fmri);
if (inst == NULL) {
/* Getting deleted in the middle isn't an error. */
goto cont;
}
assert(instance_in_transition(inst) == 0);
/* process the event */
switch (event->riq_type) {
case RESTARTER_EVENT_TYPE_ENABLE:
case RESTARTER_EVENT_TYPE_DISABLE:
case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
(void) enable_inst(h, inst, event->riq_type);
break;
case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
restarter_delete_inst(inst);
inst = NULL;
goto cont;
case RESTARTER_EVENT_TYPE_STOP:
(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
break;
case RESTARTER_EVENT_TYPE_START:
start_instance(h, inst);
break;
case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
maintain_instance(h, inst, 0, "dependency_cycle");
break;
case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
maintain_instance(h, inst, 0, "invalid_dependency");
break;
case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
maintain_instance(h, inst, 0, "administrative_request");
break;
case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
maintain_instance(h, inst, 1, "administrative_request");
break;
case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
unmaintain_instance(h, inst, RUNMAINT_CLEAR);
break;
case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
refresh_instance(h, inst);
break;
case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
log_framework(LOG_WARNING, "Restarter: "
"%s command (for %s) unimplemented.\n",
event_names[event->riq_type], inst->ri_i.i_fmri);
break;
case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
if (!instance_started(inst)) {
log_framework(LOG_DEBUG, "Restarter: "
"Not restarting %s; not running.\n",
inst->ri_i.i_fmri);
} else {
/*
* Stop the instance. If it can be restarted,
* the graph engine will send a new event.
*/
(void) stop_instance(h, inst, RSTOP_RESTART);
}
break;
case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
default:
#ifndef NDEBUG
uu_warn("%s:%d: Bad restarter event %d. "
"Aborting.\n", __FILE__, __LINE__, event->riq_type);
#endif
abort();
}
assert(inst != NULL);
MUTEX_UNLOCK(&inst->ri_lock);
cont:
/* grab the queue lock */
rip = inst_lookup_queue(fmri);
if (rip == NULL)
goto out;
/* delete the event */
uu_list_remove(rip->ri_queue, event);
startd_free(event, sizeof (restarter_instance_qentry_t));
}
assert(rip != NULL);
/*
* Try to preserve the thread for a little while for future use.
*/
to.tv_sec = 3;
to.tv_nsec = 0;
(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
&rip->ri_queue_lock, &to);
if (uu_list_first(rip->ri_queue) != NULL)
goto again;
rip->ri_queue_thread = 0;
MUTEX_UNLOCK(&rip->ri_queue_lock);
out:
(void) scf_handle_unbind(h);
scf_handle_destroy(h);
free(fmri);
return (NULL);
}
static int
is_admin_event(restarter_event_type_t t) {
switch (t) {
case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
return (1);
default:
return (0);
}
}
static void
restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
{
restarter_instance_qentry_t *qe;
int r;
assert(PTHREAD_MUTEX_HELD(&ri->ri_queue_lock));
assert(!PTHREAD_MUTEX_HELD(&ri->ri_lock));
qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
qe->riq_type = e->rpe_type;
uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
r = uu_list_insert_before(ri->ri_queue, NULL, qe);
assert(r == 0);
}
/*
* void *restarter_event_thread()
*
* Handle incoming graph events by placing them on a per-instance
* queue. We can't lock the main part of the instance structure, so
* just modify the seprarately locked event queue portion.
*/
/*ARGSUSED*/
static void *
restarter_event_thread(void *unused)
{
scf_handle_t *h;
/*
* This is a new thread, and thus, gets its own handle
* to the repository.
*/
h = libscf_handle_create_bound_loop();
MUTEX_LOCK(&ru->restarter_update_lock);
/*CONSTCOND*/
while (1) {
restarter_protocol_event_t *e;
while (ru->restarter_update_wakeup == 0)
(void) pthread_cond_wait(&ru->restarter_update_cv,
&ru->restarter_update_lock);
ru->restarter_update_wakeup = 0;
while ((e = restarter_event_dequeue()) != NULL) {
restarter_inst_t *rip;
char *fmri;
MUTEX_UNLOCK(&ru->restarter_update_lock);
/*
* ADD_INSTANCE is special: there's likely no
* instance structure yet, so we need to handle the
* addition synchronously.
*/
switch (e->rpe_type) {
case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
if (restarter_insert_inst(h, e->rpe_inst) != 0)
log_error(LOG_INFO, "Restarter: "
"Could not add %s.\n", e->rpe_inst);
MUTEX_LOCK(&st->st_load_lock);
if (--st->st_load_instances == 0)
(void) pthread_cond_broadcast(
&st->st_load_cv);
MUTEX_UNLOCK(&st->st_load_lock);
goto nolookup;
}
/*
* Lookup the instance, locking only the event queue.
* Can't grab ri_lock here because it might be held
* by a long-running method.
*/
rip = inst_lookup_queue(e->rpe_inst);
if (rip == NULL) {
log_error(LOG_INFO, "Restarter: "
"Ignoring %s command for unknown service "
"%s.\n", event_names[e->rpe_type],
e->rpe_inst);
goto nolookup;
}
/* Keep ADMIN events from filling up the queue. */
if (is_admin_event(e->rpe_type) &&
uu_list_numnodes(rip->ri_queue) >
RINST_QUEUE_THRESHOLD) {
MUTEX_UNLOCK(&rip->ri_queue_lock);
log_instance(rip, B_TRUE, "Instance event "
"queue overflow. Dropping administrative "
"request.");
log_framework(LOG_DEBUG, "%s: Instance event "
"queue overflow. Dropping administrative "
"request.\n", rip->ri_i.i_fmri);
goto nolookup;
}
/* Now add the event to the instance queue. */
restarter_queue_event(rip, e);
if (rip->ri_queue_thread == 0) {
/*
* Start a thread if one isn't already
* running.
*/
fmri = safe_strdup(e->rpe_inst);
rip->ri_queue_thread = startd_thread_create(
restarter_process_events, (void *)fmri);
} else {
/*
* Signal the existing thread that there's
* a new event.
*/
(void) pthread_cond_broadcast(
&rip->ri_queue_cv);
}
MUTEX_UNLOCK(&rip->ri_queue_lock);
nolookup:
restarter_event_release(e);
MUTEX_LOCK(&ru->restarter_update_lock);
}
}
/*
* Unreachable for now -- there's currently no graceful cleanup
* called on exit().
*/
(void) scf_handle_unbind(h);
scf_handle_destroy(h);
return (NULL);
}
static restarter_inst_t *
contract_to_inst(ctid_t ctid)
{
restarter_inst_t *inst;
int id;
id = lookup_inst_by_contract(ctid);
if (id == -1)
return (NULL);
inst = inst_lookup_by_id(id);
if (inst != NULL) {
/*
* Since ri_lock isn't held by the contract id lookup, this
* instance may have been restarted and now be in a new
* contract, making the old contract no longer valid for this
* instance.
*/
if (ctid != inst->ri_i.i_primary_ctid) {
MUTEX_UNLOCK(&inst->ri_lock);
inst = NULL;
}
}
return (inst);
}
/*
* void contract_action()
* Take action on contract events.
*/
static void
contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
uint32_t type)
{
const char *fmri = inst->ri_i.i_fmri;
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
/*
* If startd has stopped this contract, there is no need to
* stop it again.
*/
if (inst->ri_i.i_primary_ctid > 0 &&
inst->ri_i.i_primary_ctid_stopped)
return;
if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
| CT_PR_EV_HWERR)) == 0) {
/*
* There shouldn't be other events, since that's not how we set
* the terms. Thus, just log an error and drive on.
*/
log_framework(LOG_NOTICE,
"%s: contract %ld received unexpected critical event "
"(%d)\n", fmri, id, type);
return;
}
assert(instance_in_transition(inst) == 0);
if (instance_is_wait_style(inst)) {
/*
* We ignore all events; if they impact the
* process we're monitoring, then the
* wait_thread will stop the instance.
*/
log_framework(LOG_DEBUG,
"%s: ignoring contract event on wait-style service\n",
fmri);
} else {
/*
* A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
*/
switch (type) {
case CT_PR_EV_EMPTY:
(void) stop_instance(h, inst, RSTOP_EXIT);
break;
case CT_PR_EV_CORE:
(void) stop_instance(h, inst, RSTOP_CORE);
break;
case CT_PR_EV_SIGNAL:
(void) stop_instance(h, inst, RSTOP_SIGNAL);
break;
case CT_PR_EV_HWERR:
(void) stop_instance(h, inst, RSTOP_HWERR);
break;
}
}
}
/*
* void *restarter_contract_event_thread(void *)
* Listens to the process contract bundle for critical events, taking action
* on events from contracts we know we are responsible for.
*/
/*ARGSUSED*/
static void *
restarter_contracts_event_thread(void *unused)
{
int fd, err;
scf_handle_t *local_handle;
/*
* Await graph load completion. That is, stop here, until we've scanned
* the repository for contract - instance associations.
*/
MUTEX_LOCK(&st->st_load_lock);
while (!(st->st_load_complete && st->st_load_instances == 0))
(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
MUTEX_UNLOCK(&st->st_load_lock);
/*
* This is a new thread, and thus, gets its own handle
* to the repository.
*/
if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
uu_die("Unable to bind a new repository handle: %s\n",
scf_strerror(scf_error()));
fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
if (fd == -1)
uu_die("process bundle open failed");
/*
* Make sure we get all events (including those generated by configd
* before this thread was started).
*/
err = ct_event_reset(fd);
assert(err == 0);
for (;;) {
int efd, sfd;
ct_evthdl_t ev;
uint32_t type;
ctevid_t evid;
ct_stathdl_t status;
ctid_t ctid;
restarter_inst_t *inst;
uint64_t cookie;
if (err = ct_event_read_critical(fd, &ev)) {
log_error(LOG_WARNING,
"Error reading next contract event: %s",
strerror(err));
continue;
}
evid = ct_event_get_evid(ev);
ctid = ct_event_get_ctid(ev);
type = ct_event_get_type(ev);
/* Fetch cookie. */
if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
< 0) {
ct_event_free(ev);
continue;
}
if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
log_framework(LOG_WARNING, "Could not get status for "
"contract %ld: %s\n", ctid, strerror(err));
startd_close(sfd);
ct_event_free(ev);
continue;
}
cookie = ct_status_get_cookie(status);
ct_status_free(status);
startd_close(sfd);
/*
* svc.configd(1M) restart handling performed by the
* fork_configd_thread. We don't acknowledge, as that thread
* will do so.
*/
if (cookie == CONFIGD_COOKIE) {
ct_event_free(ev);
continue;
}
inst = contract_to_inst(ctid);
if (inst == NULL) {
/*
* This can happen if we receive an EMPTY
* event for an abandoned contract.
*/
log_framework(LOG_DEBUG,
"Received event %d for unknown contract id "
"%ld\n", type, ctid);
} else {
log_framework(LOG_DEBUG,
"Received event %d for contract id "
"%ld (%s)\n", type, ctid,
inst->ri_i.i_fmri);
contract_action(local_handle, inst, ctid, type);
MUTEX_UNLOCK(&inst->ri_lock);
}
efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
O_WRONLY);
if (efd != -1) {
(void) ct_ctl_ack(efd, evid);
startd_close(efd);
}
ct_event_free(ev);
}
/*NOTREACHED*/
return (NULL);
}
/*
* Timeout queue, processed by restarter_timeouts_event_thread().
*/
timeout_queue_t *timeouts;
static uu_list_pool_t *timeout_pool;
typedef struct timeout_update {
pthread_mutex_t tu_lock;
pthread_cond_t tu_cv;
int tu_wakeup;
} timeout_update_t;
timeout_update_t *tu;
static const char *timeout_ovr_svcs[] = {
"svc:/system/manifest-import:default",
"svc:/network/initial:default",
"svc:/network/service:default",
"svc:/system/rmtmpfiles:default",
"svc:/network/loopback:default",
"svc:/network/physical:default",
"svc:/system/device/local:default",
"svc:/system/metainit:default",
"svc:/system/filesystem/usr:default",
"svc:/system/filesystem/minimal:default",
"svc:/system/filesystem/local:default",
NULL
};
int
is_timeout_ovr(restarter_inst_t *inst)
{
int i;
for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
log_instance(inst, B_TRUE, "Timeout override by "
"svc.startd. Using infinite timeout");
return (1);
}
}
return (0);
}
/*ARGSUSED*/
static int
timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
{
hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
if (t1 > t2)
return (1);
else if (t1 < t2)
return (-1);
return (0);
}
void
timeout_init()
{
timeouts = startd_zalloc(sizeof (timeout_queue_t));
(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
timeout_pool = startd_list_pool_create("timeouts",
sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
timeout_compare, UU_LIST_POOL_DEBUG);
assert(timeout_pool != NULL);
timeouts->tq_list = startd_list_create(timeout_pool,
timeouts, UU_LIST_SORTED);
assert(timeouts->tq_list != NULL);
tu = startd_zalloc(sizeof (timeout_update_t));
(void) pthread_cond_init(&tu->tu_cv, NULL);
(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
}
void
timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
{
hrtime_t now, timeout;
timeout_entry_t *entry;
uu_list_index_t idx;
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
now = gethrtime();
/*
* If we overflow LLONG_MAX, we're never timing out anyways, so
* just return.
*/
if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
log_instance(inst, B_TRUE, "timeout_seconds too large, "
"treating as infinite.");
return;
}
/* hrtime is in nanoseconds. Convert timeout_sec. */
timeout = now + (timeout_sec * 1000000000LL);
entry = startd_alloc(sizeof (timeout_entry_t));
entry->te_timeout = timeout;
entry->te_ctid = cid;
entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
entry->te_logstem = safe_strdup(inst->ri_logstem);
entry->te_fired = 0;
/* Insert the calculated timeout time onto the queue. */
MUTEX_LOCK(&timeouts->tq_lock);
(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
uu_list_node_init(entry, &entry->te_link, timeout_pool);
uu_list_insert(timeouts->tq_list, entry, idx);
MUTEX_UNLOCK(&timeouts->tq_lock);
assert(inst->ri_timeout == NULL);
inst->ri_timeout = entry;
MUTEX_LOCK(&tu->tu_lock);
tu->tu_wakeup = 1;
(void) pthread_cond_broadcast(&tu->tu_cv);
MUTEX_UNLOCK(&tu->tu_lock);
}
void
timeout_remove(restarter_inst_t *inst, ctid_t cid)
{
assert(PTHREAD_MUTEX_HELD(&inst->ri_lock));
if (inst->ri_timeout == NULL)
return;
assert(inst->ri_timeout->te_ctid == cid);
MUTEX_LOCK(&timeouts->tq_lock);
uu_list_remove(timeouts->tq_list, inst->ri_timeout);
MUTEX_UNLOCK(&timeouts->tq_lock);
free(inst->ri_timeout->te_fmri);
free(inst->ri_timeout->te_logstem);
startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
inst->ri_timeout = NULL;
}
static int
timeout_now()
{
timeout_entry_t *e;
hrtime_t now;
int ret;
now = gethrtime();
/*
* Walk through the (sorted) timeouts list. While the timeout
* at the head of the list is <= the current time, kill the
* method.
*/
MUTEX_LOCK(&timeouts->tq_lock);
for (e = uu_list_first(timeouts->tq_list);
e != NULL && e->te_timeout <= now;
e = uu_list_next(timeouts->tq_list, e)) {
log_framework(LOG_WARNING, "%s: Method or service exit timed "
"out. Killing contract %ld.\n", e->te_fmri, e->te_ctid);
log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
"Method or service exit timed out. Killing contract %ld",
e->te_ctid);
e->te_fired = 1;
(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
}
if (uu_list_numnodes(timeouts->tq_list) > 0)
ret = 0;
else
ret = -1;
MUTEX_UNLOCK(&timeouts->tq_lock);
return (ret);
}
/*
* void *restarter_timeouts_event_thread(void *)
* Responsible for monitoring the method timeouts. This thread must
* be started before any methods are called.
*/
/*ARGSUSED*/
static void *
restarter_timeouts_event_thread(void *unused)
{
/*
* Timeouts are entered on a priority queue, which is processed by
* this thread. As timeouts are specified in seconds, we'll do
* the necessary processing every second, as long as the queue
* is not empty.
*/
/*CONSTCOND*/
while (1) {
/*
* As long as the timeout list isn't empty, process it
* every second.
*/
if (timeout_now() == 0) {
(void) sleep(1);
continue;
}
/* The list is empty, wait until we have more timeouts. */
MUTEX_LOCK(&tu->tu_lock);
while (tu->tu_wakeup == 0)
(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
tu->tu_wakeup = 0;
MUTEX_UNLOCK(&tu->tu_lock);
}
return (NULL);
}
void
restarter_start()
{
(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
(void) startd_thread_create(restarter_event_thread, NULL);
(void) startd_thread_create(restarter_contracts_event_thread, NULL);
(void) startd_thread_create(wait_thread, NULL);
}
void
restarter_init()
{
restarter_instance_pool = startd_list_pool_create("restarter_instances",
sizeof (restarter_inst_t), offsetof(restarter_inst_t,
ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
(void) memset(&instance_list, 0, sizeof (instance_list));
(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
instance_list.ril_instance_list = startd_list_create(
restarter_instance_pool, &instance_list, UU_LIST_SORTED);
restarter_queue_pool = startd_list_pool_create(
"restarter_instance_queue", sizeof (restarter_instance_qentry_t),
offsetof(restarter_instance_qentry_t, riq_link), NULL,
UU_LIST_POOL_DEBUG);
contract_list_pool = startd_list_pool_create(
"contract_list", sizeof (contract_entry_t),
offsetof(contract_entry_t, ce_link), NULL,
UU_LIST_POOL_DEBUG);
contract_hash_init();
log_framework(LOG_DEBUG, "Initialized restarter\n");
}