service-monitor.c revision d8552f9f65e5ff64be5de9faf9a8171799a0bbec
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen/* Copyright (c) 2005-2009 Dovecot authors, see the included COPYING file */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen#define SERVICE_PROCESS_KILL_IDLE_MSECS (1000*60)
b2ed2b25c4c457ec1c99ebe5e9bd66a2e2f89cfdTimo Sirainen#define SERVICE_STARTUP_FAILURE_THROTTLE_SECS 60
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainenstatic void service_monitor_start_extra_avail(struct service *service);
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainenstatic void service_process_kill_idle(struct service_process *process)
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen if (service->process_avail <= service->set->process_min_avail) {
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* we don't have any extra idling processes */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen if (kill(process->pid, SIGINT) < 0 && errno != ESRCH) {
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen service_error(service, "kill(%s, SIGINT) failed: %m",
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainenstatic void service_status_more(struct service_process *process,
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen process->available_count - status->available_count;
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* process used up all of its clients */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* we may need to start more */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainenstatic void service_status_less(struct service_process *process,
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* process can accept more clients again */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen i_assert(service->process_avail <= service->process_count);
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen if (status->available_count == service->client_limit) {
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen if (service->process_avail > service->set->process_min_avail &&
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* we have more processes than we really need.
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen add a bit of randomness so that we don't send the
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen signal to all of them at once */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void service_status_input(struct service *service)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen ret = read(service->status_fd[0], &status, sizeof(status));
55bc6a7a0940ec48a68558ef70838991c5d301d2Timo Sirainen service_error(service, "read(status) failed: EOF");
55bc6a7a0940ec48a68558ef70838991c5d301d2Timo Sirainen service_error(service, "read(status) failed: %m");
55bc6a7a0940ec48a68558ef70838991c5d301d2Timo Sirainen service_error(service, "child %s sent partial status update "
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen case sizeof(status):
bad5fa318c6c1384ab83bd72d53ce06593274c18Timo Sirainen process = hash_table_lookup(service_pids, &status.pid);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* we've probably wait()ed it away already. ignore */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (process->uid != status.uid || process->service != service) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* a) Process was closed and another process was created with
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen the same PID, but we're still receiving status update from
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen the old process.
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen b) Some process is trying to corrupt our internal state by
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen trying to pretend to be someone else. We could use stronger
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen randomness here, but the worst they can do is DoS and there
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen are already more serious problems if someone is able to do
55bc6a7a0940ec48a68558ef70838991c5d301d2Timo Sirainen service_error(service, "Ignoring invalid update from child %s "
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* first status notification */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (process->available_count == status.available_count)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (process->available_count > status.available_count) {
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* process started servicing some more clients */
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* process finished servicing some clients */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen process->available_count = status.available_count;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void service_monitor_throttle(struct service *service)
55bc6a7a0940ec48a68558ef70838991c5d301d2Timo Sirainen service_error(service, "command startup failed, throttling");
b2ed2b25c4c457ec1c99ebe5e9bd66a2e2f89cfdTimo Sirainen service_throttle(service, SERVICE_STARTUP_FAILURE_THROTTLE_SECS);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void service_accept(struct service *service)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (service->process_count == service->process_limit) {
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen /* we've reached our limits, new clients will have to
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen wait until there are more processes available */
e20edc0dedcdfbcfa20e9bb4c3dc460f28bfd405Timo Sirainen i_warning("service(%s): process_limit reached, "
d8552f9f65e5ff64be5de9faf9a8171799a0bbecTimo Sirainen "client connections are being dropped",
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* create a child process and let it accept() this connection */
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen if (service_process_create(service, NULL, NULL) == NULL)
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainenstatic void service_monitor_start_extra_avail(struct service *service)
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen unsigned int i, count;
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen if (service->process_avail >= service->set->process_min_avail)
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen count = service->set->process_min_avail - service->process_avail;
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen if (service->process_count + count > service->process_limit)
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen count = service->process_limit - service->process_count;
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen for (i = 0; i < count; i++) {
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen if (service_process_create(service, NULL, NULL) == NULL) {
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen /* we created some processes, they'll do the listening now */
b2ed2b25c4c457ec1c99ebe5e9bd66a2e2f89cfdTimo Sirainenvoid service_monitor_listen_start(struct service *service)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen unsigned int i, count;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen listeners = array_get(&service->listeners, &count);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen for (i = 0; i < count; i++) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (listeners[i]->io == NULL && listeners[i]->fd != -1) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen listeners[i]->io = io_add(listeners[i]->fd, IO_READ,
b2ed2b25c4c457ec1c99ebe5e9bd66a2e2f89cfdTimo Sirainenvoid service_monitor_listen_stop(struct service *service)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen unsigned int i, count;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen listeners = array_get(&service->listeners, &count);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen for (i = 0; i < count; i++) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenvoid services_monitor_start(struct service_list *service_list)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen unsigned int i, count;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen services = array_get(&service_list->services, &count);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen for (i = 0; i < count; i++) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* we haven't yet created status pipe */
55bc6a7a0940ec48a68558ef70838991c5d301d2Timo Sirainen service_error(services[i], "pipe() failed: %m");
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen net_set_nonblock(services[i]->status_fd[0], TRUE);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen fd_close_on_exec(services[i]->status_fd[0], TRUE);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen net_set_nonblock(services[i]->status_fd[1], TRUE);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen fd_close_on_exec(services[i]->status_fd[1], TRUE);
cdc8485491045d82bb98405d4b995f277d12838eTimo Sirainen service_monitor_start_extra_avail(services[i]);
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen if (service_process_create(service_list->log, NULL, NULL) != NULL)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen service_monitor_listen_stop(service_list->log);
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen if (service_process_create(service_list->config, NULL, NULL) != NULL)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen service_monitor_listen_stop(service_list->config);
b2ed2b25c4c457ec1c99ebe5e9bd66a2e2f89cfdTimo Sirainenvoid service_monitor_stop(struct service *service)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen for (i = 0; i < 2; i++) {
55bc6a7a0940ec48a68558ef70838991c5d301d2Timo Sirainen "close(status fd) failed: %m");
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenvoid services_monitor_stop(struct service_list *service_list)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen unsigned int i, count;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen services = array_get(&service_list->services, &count);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen for (i = 0; i < count; i++)
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainenstatic void service_process_failure(struct service_process *process, int status)
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen service_process_log_status_error(process, status);
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen service_process_notify_add(service->list->anvil_kills, process);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
bad5fa318c6c1384ab83bd72d53ce06593274c18Timo Sirainen process = hash_table_lookup(service_pids, &pid);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* success */