service-monitor.c revision 9cd232cda7563ad81c01776e5ebc5ed2b3cef898
e59faf65ce864fe95dc00f5d52b8323cdbd0608aTimo Sirainen/* Copyright (c) 2005-2010 Dovecot authors, see the included COPYING file */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen#define SERVICE_PROCESS_KILL_IDLE_MSECS (1000*60)
b2ed2b25c4c457ec1c99ebe5e9bd66a2e2f89cfdTimo Sirainen#define SERVICE_STARTUP_FAILURE_THROTTLE_SECS 60
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainenstatic void service_monitor_start_extra_avail(struct service *service);
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainenstatic void service_process_kill_idle(struct service_process *process)
c0e5c6a86e1de5d4f5591d39b4aa921a23c807d7Timo Sirainen if (service->process_avail <= service->set->process_min_avail) {
c0e5c6a86e1de5d4f5591d39b4aa921a23c807d7Timo Sirainen /* we don't have any extra idling processes anymore. */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen if (kill(process->pid, SIGINT) < 0 && errno != ESRCH) {
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen service_error(service, "kill(%s, SIGINT) failed: %m",
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainenstatic void service_status_more(struct service_process *process,
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen process->available_count - status->available_count;
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* process used up all of its clients */
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen service->process_count == service->process_limit)
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* we may need to start more */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainenstatic void service_status_less(struct service_process *process,
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* process can accept more clients again */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen i_assert(service->process_avail <= service->process_count);
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen if (status->available_count == service->client_limit) {
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen if (service->process_avail > service->set->process_min_avail &&
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* we have more processes than we really need.
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen add a bit of randomness so that we don't send the
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen signal to all of them at once */
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainenservice_status_input_one(struct service *service,
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen process = hash_table_lookup(service_pids, &status->pid);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* we've probably wait()ed it away already. ignore */
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen if (process->uid != status->uid || process->service != service) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* a) Process was closed and another process was created with
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen the same PID, but we're still receiving status update from
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen the old process.
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen b) Some process is trying to corrupt our internal state by
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen trying to pretend to be someone else. We could use stronger
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen randomness here, but the worst they can do is DoS and there
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen are already more serious problems if someone is able to do
55bc6a7a0940ec48a68558ef70838991c5d301d2Timo Sirainen service_error(service, "Ignoring invalid update from child %s "
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen "(UID=%u)", dec2str(status->pid), status->uid);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* first status notification */
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen if (process->available_count == status->available_count)
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen if (process->available_count > status->available_count) {
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* process started servicing some more clients */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* process finished servicing some clients */
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen process->available_count = status->available_count;
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainenstatic void service_status_input(struct service *service)
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen struct master_status status[1024/sizeof(struct master_status)];
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen unsigned int i, count;
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen ret = read(service->status_fd[0], &status, sizeof(status));
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen service_error(service, "read(status) failed: EOF");
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen service_error(service, "read(status) failed: %m");
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen if ((ret % sizeof(struct master_status)) != 0) {
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen service_error(service, "service sent partial status update "
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen for (i = 0; i < count; i++)
9cd232cda7563ad81c01776e5ebc5ed2b3cef898Timo Sirainen service_status_input_one(service, &status[i]);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void service_monitor_throttle(struct service *service)
55bc6a7a0940ec48a68558ef70838991c5d301d2Timo Sirainen service_error(service, "command startup failed, throttling");
b2ed2b25c4c457ec1c99ebe5e9bd66a2e2f89cfdTimo Sirainen service_throttle(service, SERVICE_STARTUP_FAILURE_THROTTLE_SECS);
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainenstatic void service_drop_connections(struct service *service)
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen SERVICE_DROP_WARN_INTERVAL_SECS < ioloop_time) {
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen i_warning("service(%s): process_limit reached, "
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen "client connections are being dropped",
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen /* reached process limit, notify processes that they
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen need to start killing existing connections if they
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen reach connection limit */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void service_accept(struct service *service)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (service->process_count == service->process_limit) {
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* we've reached our limits, new clients will have to
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen wait until there are more processes available */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* create a child process and let it accept() this connection */
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainenstatic void service_monitor_start_extra_avail(struct service *service)
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen unsigned int i, count;
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen if (service->process_avail >= service->set->process_min_avail)
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen count = service->set->process_min_avail - service->process_avail;
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen if (service->process_count + count > service->process_limit)
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen count = service->process_limit - service->process_count;
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen for (i = 0; i < count; i++) {
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen if (service_process_create(service) == NULL) {
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen /* we created some processes, they'll do the listening now */
b2ed2b25c4c457ec1c99ebe5e9bd66a2e2f89cfdTimo Sirainenvoid service_monitor_listen_start(struct service *service)
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen (service->process_count == service->process_limit &&
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen array_foreach(&service->listeners, listeners) {
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen l->io = io_add(l->fd, IO_READ, service_accept, service);
b2ed2b25c4c457ec1c99ebe5e9bd66a2e2f89cfdTimo Sirainenvoid service_monitor_listen_stop(struct service *service)
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen array_foreach(&service->listeners, listeners) {
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainenstatic int service_login_create_notify_fd(struct service *service)
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen fd = safe_mkstemp(prefix, 0600, (uid_t)-1, (gid_t)-1);
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen service_error(service, "safe_mkstemp(%s) failed: %m",
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen service_error(service, "unlink(%s) failed: %m", path);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenvoid services_monitor_start(struct service_list *service_list)
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen array_foreach(&service_list->services, services) {
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen if (service_login_create_notify_fd(service) < 0)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* we haven't yet created status pipe */
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen net_set_nonblock(service->status_fd[0], TRUE);
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen fd_close_on_exec(service->status_fd[0], TRUE);
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen net_set_nonblock(service->status_fd[1], TRUE);
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen fd_close_on_exec(service->status_fd[1], TRUE);
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen if (service_process_create(service_list->log) != NULL)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen service_monitor_listen_stop(service_list->log);
b2ed2b25c4c457ec1c99ebe5e9bd66a2e2f89cfdTimo Sirainenvoid service_monitor_stop(struct service *service)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen for (i = 0; i < 2; i++) {
55bc6a7a0940ec48a68558ef70838991c5d301d2Timo Sirainen "close(status fd) failed: %m");
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen "close(login notify fd) failed: %m");
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenvoid services_monitor_stop(struct service_list *service_list)
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen array_foreach(&service_list->services, services)
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainenstatic void service_process_failure(struct service_process *process, int status)
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen service_process_log_status_error(process, status);
4f4943f6ef1bc45c23de73eebe83779712b3c8cbTimo Sirainen service_process_notify_add(service_anvil_global->kills, process);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
bad5fa318c6c1384ab83bd72d53ce06593274c18Timo Sirainen process = hash_table_lookup(service_pids, &pid);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* success */