monitor.c revision 4a6a5421113ab662a665c62ed6a24b61a5a36950
/*
SSSD
Service monitor
Copyright (C) Simo Sorce 2008
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <time.h>
#include <string.h>
#ifdef HAVE_SYS_INOTIFY_H
#endif
#include <unistd.h>
#include <fcntl.h>
/* Needed for res_init() */
#include <resolv.h>
#include "popt.h"
#include "tevent.h"
#include "confdb/confdb_setup.h"
#include "collection.h"
#include "ini_config.h"
#include "sbus/sssd_dbus.h"
#include "monitor/monitor_interfaces.h"
/* ping time cannot be less then once every few seconds or the
* monitor will get crazy hammering children with messages */
#define MONITOR_DEF_PING_TIME 10
/* Special value to leave the Kerberos Replay Cache set to use
* the libkrb5 defaults
*/
#define KRB5_RCACHE_DIR_DISABLE "__LIBKRB5_DEFAULTS__"
int cmdline_debug_level;
struct svc_spy;
struct mt_svc {
struct sbus_connection *conn;
char *provider;
char *command;
char *name;
char *identity;
int ping_time;
bool svc_started;
int restarts;
int failed_pongs;
int debug_level;
struct tevent_timer *ping_ev;
};
struct config_file_callback {
int wd;
int retries;
char *filename;
struct config_file_callback *next;
struct config_file_callback *prev;
};
struct config_file_ctx {
struct tevent_timer *timer;
bool needs_update;
struct config_file_callback *callbacks;
};
struct mt_ctx {
struct tevent_context *ev;
struct confdb_ctx *cdb;
struct sss_domain_info *domains;
char **services;
struct sbus_connection *sbus_srv;
struct config_file_ctx *file_ctx;
int inotify_fd;
int service_id_timeout;
bool check_children;
bool services_started;
struct netlink_ctx *nlctx;
const char *conf_path;
};
static int monitor_cleanup(void);
static void network_status_change_cb(void *cb_data)
{
"signaling providers to reset offline status\n"));
/* Don't signal services, only providers */
}
}
}
/* dbus_get_monitor_version
* Return the monitor version over D-BUS */
struct sbus_connection *conn)
{
if (!ret) {
return EIO;
}
/* send reply back */
return EOK;
}
struct mon_init_conn {
struct sbus_connection *conn;
struct tevent_timer *timeout;
};
/* registers a new client.
* if operation is successful also sends back the Monitor version */
struct sbus_connection *conn)
{
struct mon_init_conn *mini;
void *data;
char *svc_name;
int ret;
if (!mini) {
DEBUG(0, ("Connection holds no valid init data\n"));
return EINVAL;
}
/* First thing, cancel the timeout */
if (!dbret) {
/* FIXME: should we just talloc_zfree(conn) ? */
goto done;
}
/* search this service in the list */
while (svc) {
if (ret == 0) {
break;
}
}
if (!svc) {
DEBUG(0, ("Unable to find peer [%s] in list of services,"
" killing connection!\n", svc_name));
/* FIXME: should we just talloc_zfree(conn) ? */
goto done;
}
/* Fill in svc structure with connection data */
if (ret) {
goto done;
}
/* reply that all is ok */
if (!dbret) {
return EIO;
}
/* send reply back */
done:
/* init complete, get rid of temp init context */
return EOK;
}
struct svc_spy {
};
static int svc_destructor(void *mem)
{
if (!svc) {
/* ?!?!? */
return 0;
}
/* try to delist service */
}
/* svc is beeing freed, neutralize the spy */
}
return 0;
}
static int svc_spy_destructor(void *mem)
{
if (!spy) {
/* ?!?!? */
return 0;
}
/* svc->conn has been freed, NULL the pointer in svc */
return 0;
}
{
return EOK;
}
{
int ret;
int i;
svc->svc_started = true;
/* we need to attach a spy to the connection structure so that if some code
* frees it we can zero it out in the service structure. Otherwise we may
* try to access or even free, freed memory. */
if (ret) {
DEBUG(0, ("Failed to attch spy\n"));
goto done;
}
if (!ctx->services_started) {
/* check if all providers are up */
break;
}
}
if (iter) {
/* there are still unstarted providers */
goto done;
}
ctx->services_started = true;
/* then start all services */
}
}
done:
return ret;
}
struct tevent_timer *te,
{
int i;
if (!ctx->services_started) {
"forcing services startup!\n"));
ctx->services_started = true;
/* then start all services */
}
}
}
{
struct tevent_timer *to;
/* 5 seconds should be plenty */
if (!to) {
DEBUG(0,("Out of memory?!\n"));
return ENOMEM;
}
return EOK;
}
struct sbus_method monitor_methods[] = {
};
struct sbus_interface monitor_server_interface = {
};
/* monitor_dbus_init
* Set up the monitor service as a D-BUS Server */
{
char *monitor_address;
int ret;
return ret;
}
return ret;
}
{
int ret;
if (svc->last_restart != 0) {
/* it was long ago reset restart threshold */
}
}
/* restart the process */
return;
}
/* Shut down the current ping timer so it will restart
* cleanly in start_service()
*/
return;
}
return;
}
struct tevent_timer *te,
{
bool process_alive = true;
int ret;
switch (ret) {
case EOK:
/* all fine */
break;
case ECHILD:
process_alive = false;
break;
default:
/* TODO: should we tear down it ? */
break;
}
if (process_alive) {
switch (ret) {
case EOK:
/* all fine */
break;
case ENXIO:
break;
default:
/* TODO: should we tear it down ? */
break;
}
/* too long since we last heard of this process */
("Killing service [%s], not responding to pings!\n",
process_alive = false;
}
}
if (!process_alive) {
return;
}
/* all fine, set up the task checker again */
}
{
DEBUG(0, ("failed to add event, monitor offline for [%s]!\n",
/* FIXME: shutdown ? */
}
}
struct tevent_timer *te,
{
int status;
if (!ctx->check_children) {
goto done;
}
errno = 0;
if (pid == 0) {
goto done;
}
if (pid == -1) {
DEBUG(0, ("waitpid returned -1 (errno:%d[%s])\n",
goto done;
}
/* let's see if it is a known service, and try to restart it */
goto done;
}
}
}
done:
}
{
DEBUG(0, ("failed to add global checker event! PANIC TIME!\n"));
/* FIXME: is this right ? shoulkd we try to clean up first ?*/
exit(-1);
}
}
{
int ret;
DEBUG(0,("Sending signal to child (%s:%d) failed! "
"Ignore and pretend child is dead.\n",
}
return ret;
}
{
if (!reply) {
/* reply should never be null. This function shouldn't be called
* until reply is valid or timeout has occurred. If reply is NULL
* here, something is seriously wrong and we should bail out.
*/
DEBUG(0, ("A reply callback was called but no reply was received"
" and no timeout occurred\n"));
/* Destroy this connection */
return;
}
/* TODO: Handle cases where the call has timed out or returned
* with an error.
*/
}
const char *filename)
{
int ret;
if(ret != 0) {
return EIO;
}
/* Signal all services to reload their DNS configuration */
}
return EOK;
}
{
int ret;
/* The local provider requires no signaling */
return EOK;
}
/* Avoid a race condition where we are trying to
* order a service to reload that hasn't started
* yet.
*/
return EIO;
}
if (!msg) {
DEBUG(0,("Out of memory?!\n"));
return ENOMEM;
}
return ret;
}
{
}
{
}
{
}
{
}
{
while (dom) {
return EINVAL;
}
while (other) {
}
}
}
return EOK;
}
{
while (dom) {
count++;
}
if (count > 1) {
break;
}
}
if (count > 1) {
return EINVAL;
}
return EOK;
}
static char *check_services(char **services)
{
int i;
int ii;
/* Check if services we are about to start are in the list if known */
for (i = 0; services[i]; i++) {
break;
}
}
return services[i];
}
}
return NULL;
}
{
int ret;
int timeout_seconds;
10, &timeout_seconds);
return ret;
}
if(!ctx->service_ctx) {
return ENOMEM;
}
DEBUG(0, ("No services configured!\n"));
return EINVAL;
}
return EINVAL;
}
if(!ctx->domain_ctx) {
return ENOMEM;
}
DEBUG(0, ("No domains configured.\n"));
return ret;
}
DEBUG(0, ("More than one local domain configured.\n"));
return ret;
}
return ret;
}
return EOK;
}
{
int ret;
char *path;
if (!svc) {
return ENOMEM;
}
return ENOMEM;
}
return ENOMEM;
}
if (!path) {
return ENOMEM;
}
return ret;
}
);
return ENOMEM;
}
if (cmdline_debug_level != SSSDBG_UNRESOLVED) {
);
return ENOMEM;
}
}
);
return ENOMEM;
}
}
);
return ENOMEM;
}
}
if (debug_to_file) {
);
return ENOMEM;
}
}
}
return ret;
}
/* 'timeout = 0' should be translated to the default */
}
return EOK;
}
{
int ret;
return ret;
}
}
return ret;
}
{
int ret;
char *path;
if (!svc) {
return ENOMEM;
}
return ENOMEM;
}
return ENOMEM;
}
if (!path) {
return ENOMEM;
}
return ret;
}
return ret;
}
return ret;
}
/* 'timeout = 0' should be translated to the default */
}
/* if no provider is present do not run the domain */
return EIO;
}
/* if there are no custom commands, build a default one */
);
return ENOMEM;
}
if (cmdline_debug_level != SSSDBG_UNRESOLVED) {
);
return ENOMEM;
}
}
);
return ENOMEM;
}
}
);
return ENOMEM;
}
}
if (debug_to_file) {
);
return ENOMEM;
}
}
}
return EOK;
}
{
int ret;
DEBUG(0, ("Could not get provider configuration for [%s]\n",
name));
return ret;
}
/* The LOCAL provider requires no back-end currently
* We'll add it to the service list, but we don't need
* to poll it.
*/
svc->svc_started = true;
return ENOENT;
}
}
return ret;
}
struct tevent_signal *se,
int signum,
int count,
void *siginfo,
void *private_data)
{
/* Signal all services to rotate debug files */
}
}
static int monitor_cleanup(void)
{
char *file;
int ret;
return ENOMEM;
}
errno = 0;
if (ret == -1) {
DEBUG(0, ("Error removing pidfile! (%d [%s])\n",
return errno;
}
return EOK;
}
struct tevent_signal *se,
int signum,
int count,
void *siginfo,
void *private_data)
{
int status;
int kret;
bool killed;
DEBUG(0, ("Monitor received %s: terminating children\n",
/* Kill all of our known children manually */
/* The local provider has no PID */
continue;
}
killed = false;
do {
errno = 0;
if (kret < 0) {
}
error = 0;
do {
errno = 0;
if (pid == -1) {
/* An error occurred while waiting */
DEBUG(0, ("[%d][%s] while waiting for [%s]\n",
/* Forcibly kill this child */
break;
}
} else if (pid != 0) {
error = 0;
} else if WIFSIGNALED(status) {
} else {
/* Forcibly kill this child */
}
killed = true;
}
if (!killed) {
/* Sleep 10ms and try again */
usleep(10000);
}
} while (!killed);
}
#if HAVE_GETPGRP
/* Kill any remaining children in our process group, just in case
* we have any leftover children we don't expect. For example, if
* a krb5_child or ldap_child is running at the same moment.
*/
error = 0;
do {
errno = 0;
if (pid == -1) {
}
}
#endif
exit(0);
}
struct tevent_signal *se,
int signum,
int count,
void *siginfo,
void *private_data)
{
/* Signal all providers to immediately go offline */
/* Don't signal services, only providers */
}
}
}
struct tevent_signal *se,
int signum,
int count,
void *siginfo,
void *private_data)
{
}
}
}
int read_config_file(const char *config_file)
{
int ret;
/* Read the configuration into a collection */
DEBUG(0, ("Parse error reading configuration file [%s]\n",
config_file));
}
return ret;
}
static int monitor_ctx_destructor(void *mem)
{
/* zero out references in svcs so that they don't try
* to access the monitor context on process shutdown */
}
return 0;
}
const char *config_file,
{
if(!ctx) {
return ENOMEM;
}
DEBUG(0,("Out of memory, aborting!\n"));
goto done;
}
DEBUG(0,("The confdb initialization failed\n"));
goto done;
}
/* Initialize the CDB from the configuration file */
/* First-time setup */
/* Purge any existing confdb in case an old
* misconfiguration gets in the way
*/
DEBUG(0,("The confdb initialization failed\n"));
goto done;
}
/* Load special entries */
DEBUG(0, ("Unable to load special entries into confdb\n"));
goto done;
}
DEBUG(0, ("Fatal error initializing confdb\n"));
goto done;
}
DEBUG(0, ("ConfDB initialization has failed [%s]\n",
goto done;
}
/* Validate the configuration in the database */
/* Read in the monitor's configuration */
goto done;
}
done:
}
return ret;
}
#ifdef HAVE_SYS_INOTIFY_H
struct tevent_timer *te,
{
struct config_file_ctx *file_ctx;
if (file_ctx->needs_update) {
/* Skip updating. It's already queued for update.
*/
return;
}
/* We will queue the file for update in one second.
* This way, if there is a script writing to the file
* repeatedly, we won't be attempting to update multiple
* times.
*/
if (!te) {
DEBUG(0, ("Unable to queue config file update! Exiting.\n"));
return;
}
}
struct rewatch_ctx {
struct config_file_callback *cb;
struct config_file_ctx *file_ctx;
};
struct tevent_timer *te,
struct tevent_timer *te,
{
struct inotify_event *in_event;
char *buf;
char *name;
struct config_file_ctx *file_ctx;
struct config_file_callback *cb;
struct rewatch_ctx *rw_ctx;
event_size = sizeof(struct inotify_event);
if (!tmp_ctx) return;
if (!buf) {
goto done;
}
total_len = 0;
while (total_len < event_size) {
if (len == -1) {
DEBUG(0, ("Critical error reading inotify file descriptor.\n"));
goto done;
}
}
/* Read in the name, even though we don't use it,
* so that read ptr is in the right place
*/
if (!name) {
goto done;
}
total_len = 0;
if (len == -1) {
DEBUG(0, ("Critical error reading inotify file descriptor.\n"));
goto done;
}
}
}
break;
}
}
if (!cb) {
DEBUG(0, ("Unknown watch descriptor\n"));
goto done;
}
/* Some text editors will move a new file on top of the
* existing one instead of modifying it. In this case,
* the kernel will send us an IN_IGNORE signal.
* We will try to open a new watch descriptor on the
* new file.
*/
struct tevent_timer *tev;
if(!rw_ctx) {
DEBUG(0, ("Could not restore inotify watch. Quitting!\n"));
goto done;
}
DEBUG(0, ("Could not restore inotify watch. Quitting!\n"));
}
goto done;
}
/* Tell the monitor to signal the children */
file_ctx->needs_update = 0;
done:
}
const char *file,
struct tevent_timer *te,
{
int err;
struct config_file_callback *cb;
struct rewatch_ctx *rw_ctx;
struct config_file_ctx *file_ctx;
/* Retry six times at five-second intervals before giving up */
DEBUG(0, ("Could not restore inotify watch. Switching to polling!\n"));
/* A new callback was created in monitor_config_file_fallback()*/
return;
}
DEBUG(0, ("Could not restore inotify watch. Quitting!\n"));
}
return;
}
/* Tell the monitor to signal the children */
file_ctx->needs_update = 0;
}
#endif
struct tevent_timer *te,
{
struct config_file_ctx *file_ctx;
struct config_file_callback *cb;
if (ret < 0) {
DEBUG(0, ("Could not stat file [%s]. Error [%d:%s]\n",
/* TODO: If the config file is missing, should we shut down? */
return;
}
/* Parse the configuration file and signal the children */
/* Note: this will fire if the modification time changes into the past
* as well as the future.
*/
/* Tell the monitor to signal the children */
}
}
DEBUG(0, ("Error: Config file no longer monitored for changes!\n"));
}
}
{
#ifdef HAVE_SYS_INOTIFY_H
struct config_file_callback *cb;
/* Monitoring the file descriptor should be global */
/* Set up inotify to monitor the config file for changes */
DEBUG(0, ("Could not initialize inotify, error [%d:%s]\n",
return err;
}
if (fd_args < 0) {
/* Could not set nonblocking */
return EINVAL;
}
fd_args |= O_NONBLOCK;
if (ret < 0) {
/* Could not set nonblocking */
return EINVAL;
}
/* Add the inotify file descriptor to the TEvent context */
file_ctx);
if (!tfd) {
return EIO;
}
}
if(!cb) {
return EIO;
}
return ENOMEM;
}
DEBUG(0, ("Could not add inotify watch for file [%s]. Error [%d:%s]\n",
return err;
}
return EOK;
#else
return EINVAL;
#endif
}
const char *file,
{
bool use_inotify;
if (ret < 0) {
DEBUG(0, ("Could not stat file [%s]. Error [%d:%s]\n",
return err;
}
}
true, &use_inotify);
return ret;
}
if (use_inotify) {
use_inotify = false;
}
}
if (!use_inotify) {
/* Could not monitor file with inotify, fall back to polling */
}
return ret;
}
const char *file,
{
if (ret < 0) {
DEBUG(0, ("Could not stat file [%s]. Error [%d:%s]\n",
return err;
}
if (!cb) {
return ENOMEM;
}
return ENOMEM;
}
return EIO;
}
}
return EOK;
}
const char *config_file)
{
struct sysdb_ctx_list *db_list;
struct tevent_signal *tes;
struct sss_domain_info *dom;
char *rcachedir;
int num_providers;
int ret;
int error;
/* Set up the environment variable for the Kerberos Replay Cache */
&rcachedir);
return ret;
}
{
errno = 0;
if (ret < 0) {
DEBUG(1,
("Unable to set KRB5RCACHEDIR: %s."
"Will attempt to use libkrb5 defaults\n",
}
}
/* Set up an event handler for a SIGHUP */
monitor_hup, ctx);
return EIO;
}
/* Set up an event handler for a SIGINT */
BlockSignals(false, SIGINT);
monitor_quit, ctx);
return EIO;
}
/* Set up an event handler for a SIGTERM */
monitor_quit, ctx);
return EIO;
}
/* Handle SIGUSR1 (tell all providers to go offline) */
BlockSignals(false, SIGUSR1);
return EIO;
}
/* Handle SIGUSR2 (tell all providers to go reset offline) */
BlockSignals(false, SIGUSR2);
return EIO;
}
#if 0
/* Watch for changes to the confdb config file */
return ret;
}
#endif
/* Watch for changes to the DNS resolv.conf */
return ret;
}
/* Avoid a startup race condition between process.
* We need to handle DB upgrades or DB creation only
* in one process before all other start.
*/
if (!tmp_ctx) {
return ENOMEM;
}
return ret;
}
/* Initialize D-BUS Server
* The monitor will act as a D-BUS server for all
* SSSD processes */
return ret;
}
return ret;
}
/* start providers */
num_providers = 0;
return ret;
}
}
}
if (num_providers > 0) {
/* now set the services stratup timeout *
* (responders will be started automatically when all
* providers are up and running or when the tomeout
* expires) */
return ret;
}
} else {
int i;
ctx->services_started = true;
/* No providers start services immediately
* Normally this means only LOCAL is configured */
}
}
/* now start checking for global events */
return EOK;
}
struct tevent_timer *te,
{
struct mon_init_conn *mini;
}
/*
* monitor_service_init
* Set up a timeout function and temporary connection structure.
* If the client does not identify before the timeout kicks in,
* the client is forcibly disconnected.
*/
{
struct mon_init_conn *mini;
if (!mini) {
DEBUG(0,("Out of memory?!\n"));
return ENOMEM;
}
/* 5 seconds should be plenty */
DEBUG(0,("Out of memory?!\n"));
return ENOMEM;
}
return EOK;
}
/* service_send_ping
* this function send a dbus ping to a service.
* It returns EOK if all is fine or ENXIO if the connection is
* not available (either not yet set up or teared down).
* Returns e generic error in other cases.
*/
{
int ret;
return ENXIO;
}
/*
* Set up identity request
* This should be a well-known path and method
* for all services
*/
if (!msg) {
DEBUG(0,("Out of memory?!\n"));
return ENOMEM;
}
return ret;
}
{
const char *dbus_error_name;
int type;
if (!reply) {
/* reply should never be null. This function shouldn't be called
* until reply is valid or timeout has occurred. If reply is NULL
* here, something is seriously wrong and we should bail out.
*/
DEBUG(0, ("A reply callback was called but no reply was received"
" and no timeout occurred\n"));
/* Destroy this connection */
goto done;
}
switch (type) {
/* ok peer replied,
* make sure we reset the failure counter in the service structure */
svc->failed_pongs = 0;
break;
case DBUS_MESSAGE_TYPE_ERROR:
if (!dbus_error_name) {
dbus_error_name = "<UNKNOWN>";
}
/* Increase failed pong count */
("A service PING timed out on [%s]. "
"Attempt [%d]\n",
svc->failed_pongs++;
break;
}
("A service PING returned an error [%s], closing connection.\n",
/* Falling through to default intentionally*/
default:
/*
* Timeout or other error occurred or something
* unexpected happened.
* It doesn't matter which, because either way we
* know that this connection isn't trustworthy.
* We'll destroy it now.
*/
}
done:
}
/* service_check_alive
* This function checks if the service child is still alive
*/
{
int status;
if (pid == 0) {
return EOK;
}
/* TODO: what do we do now ? */
return EINVAL;
}
/* TODO: check configuration to see if it was removed
* from the list of process to run */
}
return ECHILD;
}
struct tevent_timer *te,
{
struct tevent_timer *te;
/* at startup we need to start the data providers before the responders
* to avoid races where a service starts before sbus pipes are ready
* to accept connections. So if startup is true delay by 2 seconds any
* process that is not a data provider */
tv = tevent_timeval_current();
/* Add a timed event to start up the service.
* We have to do this in order to avoid a race
* condition where the service being started forks
* and attempts to connect to the SBUS before
* the monitor is serving it.
*/
return ENOMEM;
}
return EOK;
}
struct tevent_timer *te,
{
char **args;
return;
}
DEBUG(0, ("Could not fork child to start service [%s]. "
return;
}
/* Parent */
mt_svc->failed_pongs = 0;
return;
}
/* child */
/* If we are here, exec() has failed
* Print errno and abort quickly */
/* We have to call _exit() instead of exit() here
* because a bug in D-BUS will cause the server to
* close its socket at exit() */
_exit(1);
}
{
int opt;
int opt_daemon = 0;
int opt_interactive = 0;
char *opt_config_file = NULL;
char *config_file = NULL;
int flags = 0;
struct main_context *main_ctx;
int ret;
struct poptOption long_options[] = {
_("Become a daemon (default)"), NULL }, \
_("Run interactive (not a daemon)"), NULL}, \
_("Specify a non-default config file"), NULL}, \
};
/* Set debug level to invalid value so we can deside if -d 0 was used. */
switch(opt) {
default:
return 1;
}
}
/* If the level, timestamps or microseconds was passed at the command-line,
* we want to save it and pass it to the children later.
*/
if (opt_daemon && opt_interactive) {
return 1;
}
if (!opt_daemon && !opt_interactive) {
opt_daemon = 1;
}
if (uid != 0) {
return 8;
}
if (!tmp_ctx) {
return 7;
}
if (opt_config_file)
else
if(!config_file)
return 6;
/* we want a pid file check */
flags |= FLAGS_PID_FILE;
/* Open before server_setup() does to have logging
* during configuration checking */
if (debug_to_file) {
ret = open_debug_file();
if (ret) {
return 7;
}
}
/* Warn if nscd seems to be running */
"nscd socket was detected. Nscd caching capabilities "
"may conflict with SSSD for users and groups. It is "
"recommended not to run nscd in parallel with SSSD, unless "
"nscd is configured not to cache the passwd, group and "
"netgroup nsswitch maps.");
}
/* Parse config file, fail if cannot be done */
"Cannot read config file %s, please check if permissions "
"are 0600 and the file is owned by root.root", config_file);
} else {
}
return 4;
}
/* set up things like debug , signals, daemonization, etc... */
/* loop on main */
ret = monitor_cleanup();
return 0;
}