rcm_lock.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include "rcm_impl.h"
#include "rcm_module.h"
/*
* Global locks
*/
/*
* Daemon state file
*/
static int state_fd;
#define RCM_STATE_FILE "/var/run/rcm_daemon_state"
/*
* Daemon timeout value
*/
/*
* Struct for a list of outstanding rcm requests
*/
typedef struct {
int seq_num; /* sequence number of request */
int state; /* current state */
int type; /* resource(device) type */
} req_t;
typedef struct {
int n_req;
int n_req_max; /* number of req_t's to follow */
int n_seq_max; /* last sequence number */
int idle_timeout; /* persist idle timeout value */
/* more req_t follows */
} req_list_t;
static req_list_t *dr_req_list;
static req_list_t *info_req_list;
static const char *locked_info = "DR operation in progress";
static const char *locked_err = "Resource is busy";
static int rcmd_get_state();
static void add_to_polling_list(pid_t);
static void remove_from_polling_list(pid_t);
void start_polling_thread();
static void stop_polling_thread();
/*
* Initialize request lists required for locking
*/
void
rcmd_lock_init(void)
{
int size;
/*
* Start info list with one slot, then grow on demand.
*/
/*
* Open daemon state file and map in contents
*/
if (state_fd == -1) {
}
}
if (size == 0) {
size = sizeof (req_list_t);
gettext("cannot truncate %s: %s\n"),
}
}
/*LINTED*/
MAP_SHARED, state_fd, 0);
if (dr_req_list == MAP_FAILED) {
}
/*
* Initial size is one entry
*/
if (dr_req_list->n_req_max == 0) {
return;
}
/*
* Recover the daemon state
*/
}
/*
* Get a unique sequence number--to be called with rcm_req_lock held.
*/
static int
{
int number;
if (dr_req_list == NULL)
return (0);
dr_req_list->n_seq_max++;
return (number);
}
/*
* Find entry in list with the same resource name and sequence number.
* If seq_num == -1, no seq_num matching is required.
*/
static req_t *
{
int i;
/*
* Look for entry with the same resource and seq_num.
* Also match RCM_FILESYS field in flag.
*/
/* stale entry */
continue;
/*
* We need to distiguish a file system root from the directory
* it is mounted on.
*
* Applications are not aware of any difference between the
* two, but the system keeps track of it internally by
* checking for mount points while traversing file path.
* In a similar spirit, RCM is keeping this difference as
* an implementation detail.
*/
/* different resource */
continue;
/* different base seqnum */
continue;
}
return (NULL);
}
/*
* Get the next empty req_t entry. If no entry exists, grow the list.
*/
static req_t *
{
int i;
/*
* If the list is full, grow the list and return the first
* entry in the new portion.
*/
int newsize;
n_req_max += N_REQ_CHUNK;
sizeof (req_t);
if (listp == &info_req_list) {
gettext("cannot truncate %s: %s\n"),
/*LINTED*/
MAP_FAILED) {
gettext("cannot mmap %s: %s\n"),
}
/* Initialize the new entries */
}
}
/*
* List contains empty slots, find it.
*/
for (i = 0; i < n_req_max; i++) {
break;
}
}
}
/*
* When one resource depends on multiple resources, it's possible that
* rcm_get_info can be called multiple times on the resource, resulting
* in duplicate information. By assigning a unique sequence number to
* each rcm_get_info operation, this duplication can be eliminated.
*
* Insert a dr entry in info_req_list
*/
int
{
int error = 0;
char *device;
(void) mutex_lock(&rcm_req_lock);
/*
* Look for entry with the same resource and seq_num.
* If it exists, we return an error so that such
* information is not gathered more than once.
*/
error = -1;
goto out;
}
/*
* Get empty entry and fill in seq_num and device.
*/
out:
(void) mutex_unlock(&rcm_req_lock);
return (error);
}
/*
* Remove all entries associated with seq_num from info_req_list
*/
void
info_req_remove(int seq_num)
{
int i;
seq_num >>= SEQ_NUM_SHIFT;
(void) mutex_lock(&rcm_req_lock);
/* remove all entries with seq_num */
for (i = 0; i < info_req_list->n_req_max; i++) {
continue;
continue;
info_req_list->n_req--;
}
/*
* We don't shrink the info_req_list size for now.
*/
(void) mutex_unlock(&rcm_req_lock);
}
/*
* Checking lock conflicts. There is a conflict if:
* - attempt to DR a node when either its ancester or descendent
* is in the process of DR
* - attempt to register for a node when its ancester is locked for DR
*/
static int
{
int i, ret = RCM_SUCCESS;
if (info)
/*
* During daemon initialization, don't check locks
*/
if (dr_req_list == NULL)
return (ret);
for (i = 0; i < dr_req_list->n_req; i++) {
/*
* Skip empty entries
*/
continue;
/*
* Make sure that none of the ancestors of dr_dev is
* being operated upon.
*/
/*
* An exception to this is the filesystem.
* We should allowed a filesystem rooted at a
* child directory to be unmounted.
*/
continue;
ret = RCM_CONFLICT;
break;
}
/*
* Check descendents only for DR request.
*
* Could have multiple descendents doing DR,
* we want to find them all.
*/
ret = RCM_CONFLICT;
/* don't break here, need to find all conflicts */
}
}
return (ret);
}
/*
* Check for lock conflicts for DR operation or client registration
*/
int
rcm_info_t **info)
{
int result;
char *device;
return (result);
}
static int
transition_state(int state)
{
/*
* If the resource state is in transition, ask caller to
* try again.
*/
switch (state) {
case RCM_STATE_OFFLINING:
case RCM_STATE_SUSPENDING:
case RCM_STATE_RESUMING:
case RCM_STATE_ONLINING:
case RCM_STATE_REMOVING:
return (1);
default:
/*FALLTHROUGH*/
break;
}
return (0);
}
/*
* Update a dr entry in dr_req_list
*/
/*ARGSUSED*/
static int
{
/*
* Find request entry. If not found, return RCM_FAILURE
*/
switch (state) {
case RCM_STATE_OFFLINING:
case RCM_STATE_SUSPENDING:
/* could be re-do operation, no error message */
break;
default:
"update non-existing resource %s\n", device);
}
return (RCM_FAILURE);
}
/*
* During initialization, update is unconditional (forced)
* in order to bring the daemon up in a sane state.
*/
if (rcmd_get_state() == RCMD_INIT)
goto update;
/*
* Don't allow update with mismatched initiator pid. This could happen
* as part of normal operation.
*/
gettext("mismatched dr initiator pid: %ld %ld\n"),
goto failure;
}
"dr_req_update_entry: state=%d, device=%s\n",
/*
* Check that the state transition is valid
*/
switch (state) {
case RCM_STATE_OFFLINING:
/*
* This is the case of re-offlining, which applies only
* if a previous attempt failed.
*/
gettext("%s: invalid offlining from state %d\n"),
goto failure;
}
break;
case RCM_STATE_SUSPENDING:
/*
* This is the case of re-suspending, which applies only
* if a previous attempt failed.
*/
gettext("%s: invalid suspending from state %d\n"),
goto failure;
}
break;
case RCM_STATE_RESUMING:
"%s: invalid resuming from state %d\n",
goto failure;
}
break;
case RCM_STATE_ONLINING:
gettext("%s: invalid onlining from state %d\n"),
goto failure;
}
break;
case RCM_STATE_REMOVING:
gettext("%s: invalid removing from state %d\n"),
goto failure;
}
break;
case RCM_STATE_SUSPEND_FAIL:
break;
case RCM_STATE_OFFLINE_FAIL:
break;
case RCM_STATE_SUSPEND:
break;
case RCM_STATE_OFFLINE:
break;
case RCM_STATE_ONLINE:
break;
default: /* shouldn't be here */
return (RCM_FAILURE);
}
/*
* update the state, interval, and sequence number; sync state file
*/
if (interval)
else
return (RCM_SUCCESS);
}
/*
* A request may be left in a transition state because the operator
* typed ctrl-C. In this case, the daemon thread continues to run
* and will eventually put the state in a non-transitional state.
*
* To be safe, we return EAGAIN to allow librcm to loop and retry.
* If we are called from a module, loop & retry could result in a
* deadlock. The called will check for this case and turn EAGAIN
* into RCM_CONFLICT.
*/
return (EAGAIN);
}
return (RCM_CONFLICT);
}
/*
* Insert a dr entry in dr_req_list
*/
int
{
int error;
char *device;
return (EINVAL);
(void) mutex_lock(&rcm_req_lock);
/*
* In the re-offline/suspend case, attempt to update dr request.
*
* If this succeeds, return success;
* If this fails because of a conflict, return error;
* If this this fails because no entry exists, add a new entry.
*/
info);
switch (error) {
case RCM_FAILURE:
/* proceed to add a new entry */
break;
case RCM_CONFLICT:
case RCM_SUCCESS:
case EAGAIN:
default:
goto out;
}
/*
* Check for lock conflicts
*/
if (error != RCM_SUCCESS) {
goto out;
}
/*
* Get empty request entry, fill in values and sync state file
*/
/* cache interval for failure recovery */
if (interval)
else
/*
* Add initiator pid to polling list
*/
out:
(void) mutex_unlock(&rcm_req_lock);
return (error);
}
/*
* Update a dr entry in dr_req_list
*/
/*ARGSUSED*/
int
rcm_info_t **info)
{
int error;
(void) mutex_lock(&rcm_req_lock);
info);
(void) mutex_unlock(&rcm_req_lock);
return (error);
}
/*
* This function scans the DR request list for the next, non-removed
* entry that is part of the specified sequence. The 'device' name
* of the entry is copied into the provided 'rsrc' buffer.
*
* The 'rsrc' buffer is required because the DR request list is only
* locked during the duration of this lookup. Giving a direct pointer
* to something in the list would be unsafe.
*/
int
{
int i;
int len;
int retval = RCM_FAILURE;
return (RCM_FAILURE);
}
(void) mutex_lock(&rcm_req_lock);
for (i = 0; i < dr_req_list->n_req_max; i++) {
/* Skip removed or non-matching entries */
continue;
}
/* Copy the next-matching 'device' name into 'rsrc' */
if (len < MAXPATHLEN) {
}
break;
}
(void) mutex_unlock(&rcm_req_lock);
return (retval);
}
/*
* Remove a dr entry in dr_req_list
*/
void
{
(void) mutex_lock(&rcm_req_lock);
/* find entry */
(void) mutex_unlock(&rcm_req_lock);
return;
}
dr_req_list->n_req--;
/*
* remove pid from polling list
*/
/*
* We don't shrink the dr_req_list size for now.
* Shouldn't cause big memory leaks.
*/
(void) mutex_unlock(&rcm_req_lock);
}
/*
* Return the list of ongoing dr operation requests
*/
{
int i;
char *rsrc;
int len;
(void) mutex_lock(&rcm_req_lock);
for (i = 0; i < dr_req_list->n_req_max; i++) {
continue;
continue;
} else {
}
gettext("failed (nvlist_alloc=%s).\n"),
}
gettext("failed (nvlist_add=%s).\n"),
}
gettext("failed (nvlist_add=%s).\n"),
}
gettext("failed (nvlist_add=%s).\n"),
}
gettext("failed (nvlist_add=%s).\n"),
}
(char *)locked_info)) {
gettext("failed (nvlist_add=%s).\n"),
}
}
(void) mutex_unlock(&rcm_req_lock);
return (result);
}
/*
* Eliminate entries whose dr initiator is no longer running
* and recover daemon state during daemon restart.
*
* This routine is called from either during daemon initialization
* after all modules have registered resources or from the cleanup
* thread. In either case, it is the only thread running in the
* daemon.
*/
void
{
int i;
struct clean_list {
struct clean_list *next;
char *rsrcname;
int seq_num;
int state;
char *rsrcnames[2];
"clean_dr_list(): look for stale dr initiators\n");
/*
* Make a list of entries to recover. This is necessary because
* the recovery operation will modify dr_req_list.
*/
(void) mutex_lock(&rcm_req_lock);
for (i = 0; i < dr_req_list->n_req_max; i++) {
/* skip empty entries */
continue;
continue;
/* skip cascade operations */
continue;
/*
* In the cleanup case, ignore entries with initiators alive
*/
if ((rcmd_get_state() == RCMD_CLEANUP) &&
continue;
}
(void) mutex_unlock(&rcm_req_lock);
return;
/*
* If everything worked normally, we shouldn't be here.
* Since we are here, something went wrong, so say something.
*/
if (rcmd_get_state() == RCMD_INIT) {
"unexpectedly, recovering previous daemon state\n"));
} else {
"died, attempting automatic recovery\n"));
}
while (list) {
case RCM_STATE_OFFLINE_QUERY:
/* redo */
(void) process_resource_offline(rsrcnames,
} else {
/* undo */
(void) notify_resource_online(rsrcnames,
}
break;
case RCM_STATE_OFFLINE:
case RCM_STATE_OFFLINE_FAIL:
/* redo */
(void) process_resource_offline(rsrcnames,
} else {
/* undo */
(void) notify_resource_online(rsrcnames,
}
break;
case RCM_STATE_SUSPEND_QUERY:
/* redo */
(void) process_resource_suspend(rsrcnames,
} else {
/* undo */
(void) notify_resource_resume(rsrcnames,
}
break;
case RCM_STATE_SUSPEND:
case RCM_STATE_SUSPEND_FAIL:
/* redo */
(void) process_resource_suspend(rsrcnames,
NULL);
} else {
/* undo */
(void) notify_resource_resume(rsrcnames,
}
break;
case RCM_STATE_OFFLINING:
case RCM_STATE_ONLINING:
break;
case RCM_STATE_SUSPENDING:
case RCM_STATE_RESUMING:
break;
case RCM_STATE_REMOVING:
break;
default:
gettext("%s in unknown state %d\n"),
break;
}
}
}
/*
* Selected thread blocking based on event type
*/
/*
* Change barrier state:
* RCMD_INIT - daemon is intializing, only register allowed
* RCMD_NORMAL - normal daemon processing
* RCMD_CLEANUP - cleanup thread is waiting or running
*/
int
{
}
void
rcmd_set_state(int state)
{
/*
* The state transition is as follows:
* INIT --> NORMAL <---> CLEANUP
* The implementation favors the cleanup thread
*/
switch (state) {
case RCMD_CLEANUP:
/*
* Wait for existing threads to exit
*/
break;
case RCMD_INIT:
case RCMD_NORMAL:
default:
break;
}
}
/*
* Increment daemon thread count
*/
int
rcmd_thr_incr(int cmd)
{
int seq_num;
/*
* Set wanted flag
*/
/*
* Wait till it is safe for daemon to perform the operation
*
* NOTE: if a module registers by passing a request to the
* client proccess, we may need to allow register
* to come through during daemon initialization.
*/
(cmd == CMD_REGISTER) ||
(cmd == CMD_UNREGISTER)) {
/*
* Event passthru and register ops don't need sequence number
*/
seq_num = -1;
} else {
/*
* Non register operation gets a sequence number
*/
seq_num = get_seq_number();
}
if ((cmd == CMD_OFFLINE) ||
(cmd == CMD_SUSPEND) ||
(cmd == CMD_GETINFO)) {
/*
* For these operations, need to ask modules to
* register any new resources that came online.
*
* to register with rcm before using system resources.
* Certain registration ops may fail during sync, which
* indicates race conditions. This cannot be avoided
*/
rcmd_db_sync();
}
return (seq_num);
}
/*
* Decrement thread count
*/
void
{
/*
*/
}
/*
* Wakeup all waiting threads as a result of SIGHUP
*/
static int sighup_received = 0;
void
{
sighup_received = 1;
}
void
rcmd_start_timer(int timeout)
{
if (timeout == 0)
else
if (timeout > 0) {
}
for (;;) {
int idletime;
int is_active;
if (timeout > 0)
&abstime);
else
/*
* If sighup received, change timeout to 0 so the daemon is
* shut down at the first possible moment
*/
if (sighup_received)
timeout = 0;
/*
* If timeout is negative, never shutdown the daemon
*/
if (timeout < 0)
continue;
/*
*/
(dr_req_list->n_req != 0));
if (is_active) {
continue;
}
/*
* If idletime is less than timeout, continue to wait
*/
continue;
}
break;
}
(void) script_main_fini();
rcmd_exit(0);
/*NOTREACHED*/
}
/*
* Code related to polling client pid's
* Not declared as static so that we can find this structure easily
* in the core file.
*/
struct {
int n_pids;
int n_max_pids;
int signaled;
int *refcnt;
} polllist;
static int
{
int i;
return (i);
}
}
return (-1);
}
/*
* Resize buffer for new pids
*/
static int
{
const int n_chunk = 10;
int n_max;
return (index);
}
if (polllist.n_max_pids == 0) {
} else {
n_max * sizeof (int));
}
return (index);
}
/*
* rcm_req_lock must be held
*/
static void
{
char procfile[MAXPATHLEN];
return;
/*
* Need to stop the poll thread before manipulating the polllist
* since poll thread may possibly be using polllist.fds[] and
* polllist.n_pids. As an optimization, first check if the pid
* is already in the polllist. If it is, there is no need to
* stop the poll thread. Just increment the pid reference count
* and return;
*/
if (index != -1) {
return;
}
/*
* In an attempt to stop the poll thread we may have released
* and reacquired rcm_req_lock. So find the index again.
*/
if (index != -1) {
goto done;
}
/*
* Open a /proc file
*/
goto done;
}
/*
* add pid to polllist
*/
index = get_pid_index();
done:
}
/*
* rcm_req_lock must be held
*/
static void
{
int i, index;
return;
/*
* Need to stop the poll thread before manipulating the polllist
* since poll thread may possibly be using polllist.fds[] and
* polllist.n_pids. As an optimization, first check the pid
* reference count. If the pid reference count is greater than 1
* there is no need to stop the polling thread.
*/
if (index == -1) {
return;
}
/*
* decrement the pid refcnt
*/
return;
}
/*
* In an attempt to stop the poll thread we may have released
* and reacquired rcm_req_lock. So find the index again.
*/
if (index == -1) {
goto done;
}
goto done;
/*
* refcnt down to zero, delete pid from polling list
*/
sizeof (struct pollfd));
}
done:
}
void
{
}
void
{
(void) mutex_lock(&rcm_req_lock);
}
(void) mutex_unlock(&rcm_req_lock);
}
/*ARGSUSED*/
static void *
{
/*
* Unblock SIGUSR1 to allow polling thread to be killed
*/
(void) sigemptyset(&mask);
/*
* block SIGUSR1 to avoid being killed while holding a lock
*/
(void) sigemptyset(&mask);
need_cleanup = 1;
return (NULL);
}
/*
* rcm_req_lock must be held
*/
void
{
int err;
if (rcmd_get_state() != RCMD_NORMAL)
return;
return;
else
gettext("failed to create polling thread: %s\n"),
}
/*
* rcm_req_lock must be held
*/
static void
{
int err;
else
/*
* thr_kill shouldn't have failed since the
* poll thread id and the signal are valid.
* So log an error. Since when thr_kill
* fails no signal is sent (as per man page),
* the cond_wait below will wait until the
* the poll thread exits by some other means.
* The poll thread, for example, exits on its
* own when any DR initiator process that it
* is currently polling exits.
*/
"fail to kill polling thread %d: %s\n"),
}
}
}