service-monitor.c revision 7b032348d7bbb93ff96188289d3dfc1899b9abb3
/* Copyright (c) 2005-2017 Dovecot authors, see the included COPYING file */
#include "common.h"
#include "array.h"
#include "ioloop.h"
#include "fd-close-on-exec.h"
#include "hash.h"
#include "str.h"
#include "safe-mkstemp.h"
#include "time-util.h"
#include "master-client.h"
#include "service.h"
#include "service-process.h"
#include "service-process-notify.h"
#include "service-anvil.h"
#include "service-log.h"
#include "service-monitor.h"
#include <unistd.h>
#include <syslog.h>
#include <signal.h>
#define SERVICE_DROP_WARN_INTERVAL_SECS 1
#define MAX_DIE_WAIT_MSECS 5000
#define SERVICE_MAX_EXIT_FAILURES_IN_SEC 10
#define SERVICE_PREFORK_MAX_AT_ONCE 10
const struct master_status *status);
{
struct master_status status;
/* we don't have any extra idling processes anymore. */
/* assume this process is busy */
process->available_count = 0;
} else {
}
}
}
const struct master_status *status)
{
process->total_count +=
process->idle_start = 0;
if (status->available_count != 0)
return;
/* process used up all of its clients */
service->process_avail--;
service->process_avail == 0 &&
/* we may need to start more */
}
const struct master_status *status)
{
if (process->available_count == 0) {
/* process can accept more clients again */
if (service->process_avail++ == 0)
}
/* we have more processes than we really need.
add a bit of randomness so that we don't send the
signal to all of them at once */
process);
}
}
}
static void
const struct master_status *status)
{
struct service_process *process;
/* we've probably wait()ed it away already. ignore */
return;
}
/* a) Process was closed and another process was created with
the same PID, but we're still receiving status update from
the old process.
b) Some process is trying to corrupt our internal state by
trying to pretend to be someone else. We could use stronger
randomness here, but the worst they can do is DoS and there
are already more serious problems if someone is able to do
this.. */
return;
}
/* first status notification */
}
return;
/* process started servicing some more clients */
} else {
/* process finished servicing some clients */
}
}
{
unsigned int i, count;
if (ret <= 0) {
if (ret == 0)
else
return;
return;
}
if ((ret % sizeof(struct master_status)) != 0) {
"(%d bytes)", (int)ret);
return;
}
for (i = 0; i < count; i++)
}
{
return;
}
{
struct service_listener *const *lp;
int fd;
/* drop all pending connections */
}
}
{
}
static void service_drop_connections(struct service_listener *l)
{
const char *limit_name;
unsigned int limit;
int fd;
if (service->last_drop_warning +
limit_name = "process_limit";
limit = 1;
} else {
limit_name = "client_limit";
}
i_warning("service(%s): %s (%u) reached, "
"client connections are being dropped",
}
/* reached process limit, notify processes that they
need to start killing existing connections if they
reach connection limit */
} else if (!service->listen_pending) {
/* maybe this is a temporary peak, stop for a while and
see if it goes away */
} else {
/* this has been happening for a while now. just accept and
close the connection, so it's clear that this is happening
because of the limit, rather than because the service
processes aren't answering fast enough */
if (fd > 0)
}
}
static void service_accept(struct service_listener *l)
{
/* we've reached our limits, new clients will have to
wait until there are more processes available */
return;
}
/* create a child process and let it accept() this connection */
else
}
static bool
{
unsigned int i, count;
for (i = 0; i < count; i++) {
break;
}
}
if (i > 0) {
/* we created some processes, they'll do the listening now */
}
return i == count;
}
{
/* don't prefork more processes if other more important processes had
been forked while we were waiting for this timeout (= master seems
busy) */
return;
}
return;
}
}
{
return;
if (service->process_avail == 0) {
/* quickly start one process now */
return;
return;
}
/* ioloop handles timeouts before fds (= SIGCHLD callback),
so let the first timeout handler call simply update the fork
counter and the second one check if we're busy or not. */
}
}
{
struct service_listener *const *listeners;
struct service_listener *l = *listeners;
}
}
{
if (service->process_avail > 0 ||
return;
}
{
struct service_listener *const *listeners;
struct service_listener *l = *listeners;
}
}
{
return 0;
T_BEGIN {
const char *path;
if (fd == -1) {
path);
} else {
}
} T_END;
i_close_fd(&fd);
return ret;
}
{
if (services_log_init(service_list) < 0)
return;
}
if (service_login_create_notify_fd(service) < 0)
continue;
}
continue;
}
}
/* we haven't yet created status pipe */
continue;
}
}
}
}
}
/* start up a process for startup-services */
}
}
}
{
}
}
{
int i;
for (i = 0; i < 2; i++) {
"close(status fd) failed: %m");
}
}
}
"close(login notify fd) failed: %m");
}
}
}
{
struct service_listener *const *listeners;
struct service_listener *l = *listeners;
i_close_fd(&l->fd);
}
}
{
bool finished;
for (;;) {
if ((*servicep)->process_avail > 0)
}
if (finished ||
break;
usleep(100000);
}
}
{
}
}
return ret;
}
static bool
{
}
return ret;
}
{
/* we've notified all children that the master is dead.
now wait for the children to either die or to tell that
they're no longer listening for new connections. */
/* Even if the waiting stopped early because all the process_avail==0,
it can mean that there are processes that have the listener socket
open (just not actively being listened to). We'll need to make sure
that those sockets are closed before we exit, so that a restart
won't fail. Do this by sending SIGQUIT to all the child processes
that are left, which are handled by lib-master to immediately close
the listener in the signal handler itself. */
/* SIGQUITs were sent. wait a little bit to make sure they're
also processed before quitting. */
usleep(100000);
}
}
{
if (wait)
}
static bool
{
bool throttle;
/* this service has seen no successful exits yet.
try to avoid failure storms by throttling the service if it
only keeps failing rapidly. this is no longer done after
one success to avoid intentional DoSing, in case attacker
finds a way to quickly crash his own session. */
service->exit_failures_in_sec = 0;
}
}
return throttle;
}
void services_monitor_reap_children(void)
{
struct service_process *process;
int status;
bool service_stopped, throttle;
i_error("waitpid() returned unknown PID %s",
continue;
}
if (status == 0) {
/* success - one success resets all failures */
service->exit_failures_in_sec = 0;
} else {
}
/* if we're reloading, we may get here with a service list
that's going to be destroyed after this process is
destroyed. keep the list referenced until we're done. */
if (throttle)
/* if there are no longer listening processes,
start listening for more */
/* throttling */
service->process_count == 0) {
/* log service must always be running */
} else {
}
}
}
}