service-monitor.c revision d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes/* Copyright (c) 2005-2009 Dovecot authors, see the included COPYING file */
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include "common.h"
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include "array.h"
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include "ioloop.h"
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include "fd-close-on-exec.h"
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include "hash.h"
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include "service.h"
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include "service-process.h"
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include "service-log.h"
0662ed52e814f8f08ef0e09956413a792584eddffuankg#include "service-monitor.h"
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include <unistd.h>
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include <sys/wait.h>
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#include <syslog.h>
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes#define THROTTLE_TIMEOUT (1000*60)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
70953fb44a7140fe206c3a5f011e24209c8c5c6abnicholesstatic void service_monitor_stop(struct service *service);
70953fb44a7140fe206c3a5f011e24209c8c5c6abnicholesstatic void service_monitor_listen_start(struct service *service);
16b55a35cff91315d261d1baa776138af465c4e4fuankgstatic void service_monitor_listen_stop(struct service *service);
16b55a35cff91315d261d1baa776138af465c4e4fuankg
405f61494d3ed3ca9c054dacc05a53513e172145bnicholesstatic void service_status_input(struct service *service)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes{
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes struct master_status status;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes struct service_process *process;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes ssize_t ret;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes ret = read(service->status_fd[0], &status, sizeof(status));
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes switch (ret) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes case 0:
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes i_error("service(%s): read(status) failed: EOF", service->name);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_stop(service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes return;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes case -1:
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes i_error("service(%s): read(status) failed: %m", service->name);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_stop(service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes return;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes default:
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes i_error("service(%s): child %s sent partial status update "
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes "(%d bytes)", service->name,
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes dec2str(status.pid), (int)ret);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes return;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes case sizeof(status):
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes break;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes process = hash_table_lookup(service->list->pids, &status.pid);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (process == NULL) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes /* we've probably wait()ed it away already. ignore */
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes return;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (process->uid != status.uid || process->service != service) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes /* a) Process was closed and another process was created with
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes the same PID, but we're still receiving status update from
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg the old process.
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes b) Some process is trying to corrupt our internal state by
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes trying to pretend to be someone else. We could use stronger
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes randomness here, but the worst they can do is DoS and there
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes are already more serious problems if someone is able to do
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes this.. */
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes i_error("service(%s): Ignoring invalid update from child %s "
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes "(UID=%u)", service->name, dec2str(status.pid),
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes status.uid);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes return;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (process->to_status != NULL) {
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg /* first status notification */
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes timeout_remove(&process->to_status);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (process->available_count == status.available_count)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes return;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (process->available_count > status.available_count) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes /* process started servicing requests */
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes process->total_count +=
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes process->available_count - status.available_count;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (status.available_count == 0) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes i_assert(service->process_avail > 0);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (--service->process_avail == 0)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_listen_start(service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes process->idle_start = 0;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes } else {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes /* process finished servicing requests */
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (process->available_count == 0) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (service->process_avail++ == 0)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_listen_stop(service);
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg i_assert(service->process_avail <=
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service->process_count);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg if (status.available_count == service->set->client_limit)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes process->idle_start = ioloop_time;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes process->available_count = status.available_count;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes}
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholesstatic void service_throttle_timeout(struct service *service)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes{
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes timeout_remove(&service->to_throttle);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_listen_start(service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes}
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
ac7985784d08a3655291f24f711812b4d8b1cbcffuankgstatic void service_monitor_throttle(struct service *service)
0662ed52e814f8f08ef0e09956413a792584eddffuankg{
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (service->to_throttle != NULL)
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg return;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes i_error("service(%s): command startup failed, throttling",
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service->name);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_listen_stop(service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service->to_throttle = timeout_add(THROTTLE_TIMEOUT,
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_throttle_timeout, service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes}
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholesstatic void service_accept(struct service *service)
0662ed52e814f8f08ef0e09956413a792584eddffuankg{
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes i_assert(service->process_avail == 0);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (service->process_count == service->process_limit) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes /* we've reached our limits, new connections will have to
0662ed52e814f8f08ef0e09956413a792584eddffuankg wait until there are more processes available */
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service->listen_pending = TRUE;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_listen_stop(service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes return;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes /* create a child process and let it accept() this connection */
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (service_process_create(service, NULL, -1, NULL, 0) == NULL)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_throttle(service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes else
0662ed52e814f8f08ef0e09956413a792584eddffuankg service_monitor_listen_stop(service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes}
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
ac7985784d08a3655291f24f711812b4d8b1cbcffuankgstatic void service_monitor_listen_start(struct service *service)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes{
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes struct service_listener *const *listeners;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes unsigned int i, count;
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service->listen_pending = FALSE;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes listeners = array_get(&service->listeners, &count);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes for (i = 0; i < count; i++) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (listeners[i]->io == NULL && listeners[i]->fd != -1) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes listeners[i]->io = io_add(listeners[i]->fd, IO_READ,
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_accept, service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes}
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholesstatic void service_monitor_listen_stop(struct service *service)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes{
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes struct service_listener *const *listeners;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes unsigned int i, count;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes listeners = array_get(&service->listeners, &count);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes for (i = 0; i < count; i++) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes struct service_listener *l = listeners[i];
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (l->io != NULL)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes io_remove(&l->io);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes}
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholesvoid services_monitor_start(struct service_list *service_list)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes{
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg struct service *const *services;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes unsigned int i, count;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes services_log_init(service_list);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes services = array_get(&service_list->services, &count);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes for (i = 0; i < count; i++) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (services[i]->status_fd[0] == -1) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes /* we haven't yet created status pipe */
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (pipe(services[i]->status_fd) < 0) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes i_error("service(%s): pipe() failed: %m",
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes services[i]->name);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes continue;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes net_set_nonblock(services[i]->status_fd[0], TRUE);
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg fd_close_on_exec(services[i]->status_fd[0], TRUE);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes net_set_nonblock(services[i]->status_fd[1], TRUE);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes fd_close_on_exec(services[i]->status_fd[1], TRUE);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes services[i]->io_status =
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes io_add(services[i]->status_fd[0], IO_READ,
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_status_input, services[i]);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (services[i]->status_fd[0] != -1)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_listen_start(services[i]);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (service_process_create(service_list->log, NULL, -1, NULL, 0) != NULL)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_listen_stop(service_list->log);
8ffac2c334103c0336602aaede650cb578611151fuankg if (service_process_create(service_list->config, NULL, -1, NULL, 0) != NULL)
8ffac2c334103c0336602aaede650cb578611151fuankg service_monitor_listen_stop(service_list->config);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes}
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
ac7985784d08a3655291f24f711812b4d8b1cbcffuankgstatic void service_monitor_stop(struct service *service)
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg{
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes int i;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (service->io_status != NULL)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes io_remove(&service->io_status);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg if (service->status_fd[0] != -1) {
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg for (i = 0; i < 2; i++) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (close(service->status_fd[i]) < 0) {
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes i_error("service(%s): close(%d) failed: %m",
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service->name, i);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg service->status_fd[i] = -1;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes }
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_listen_stop(service);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes if (service->to_throttle != NULL)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes timeout_remove(&service->to_throttle);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes}
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholesvoid services_monitor_stop(struct service_list *service_list)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes{
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg struct service *const *services;
0662ed52e814f8f08ef0e09956413a792584eddffuankg unsigned int i, count;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes services = array_get(&service_list->services, &count);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes for (i = 0; i < count; i++)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes service_monitor_stop(services[i]);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes services_log_deinit(service_list);
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes}
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes
405f61494d3ed3ca9c054dacc05a53513e172145bnicholesvoid services_monitor_reap_children(struct service_list *service_list)
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes{
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes struct service_process *process;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes struct service *service;
0662ed52e814f8f08ef0e09956413a792584eddffuankg pid_t pid;
405f61494d3ed3ca9c054dacc05a53513e172145bnicholes int status;
ac7985784d08a3655291f24f711812b4d8b1cbcffuankg
while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
process = hash_table_lookup(service_list->pids, &pid);
if (process == NULL) {
i_error("waitpid() returned unknown PID %s",
dec2str(pid));
continue;
}
service = process->service;
if (status == 0) {
/* success */
if (service->listen_pending)
service_monitor_listen_start(service);
} else {
/* failure */
service_process_log_status_error(process, status);
if (process->total_count == 0)
service_monitor_throttle(service);
}
service_process_destroy(process);
if (service->process_avail == 0 && service->to_throttle == NULL)
service_monitor_listen_start(service);
}
}