ioloop-epoll.c revision f2767c736d72e6aa9a2aae5d0a9b89abae9e29e9
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen/* Copyright (c) 2004-2013 Dovecot authors, see the included COPYING file */
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen#include "lib.h"
5fb3f13537dffd15a31e997da133a721c0728af8Timo Sirainen#include "array.h"
5fb3f13537dffd15a31e997da133a721c0728af8Timo Sirainen#include "fd-close-on-exec.h"
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen#include "ioloop-private.h"
cf63dc8723b971cc80638fccbf494d961cbafc7fTimo Sirainen#include "ioloop-iolist.h"
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen
cf63dc8723b971cc80638fccbf494d961cbafc7fTimo Sirainen#ifdef IOLOOP_EPOLL
cf63dc8723b971cc80638fccbf494d961cbafc7fTimo Sirainen
cf63dc8723b971cc80638fccbf494d961cbafc7fTimo Sirainen#include <sys/epoll.h>
23878bd03d1de531e3261a25598beec621351910Timo Sirainen#include <unistd.h>
23878bd03d1de531e3261a25598beec621351910Timo Sirainen
cf63dc8723b971cc80638fccbf494d961cbafc7fTimo Sirainenstruct ioloop_handler_context {
cf63dc8723b971cc80638fccbf494d961cbafc7fTimo Sirainen int epfd;
cf63dc8723b971cc80638fccbf494d961cbafc7fTimo Sirainen
cf63dc8723b971cc80638fccbf494d961cbafc7fTimo Sirainen unsigned int deleted_count;
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen ARRAY(struct io_list *) fd_index;
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen ARRAY(struct epoll_event) events;
23878bd03d1de531e3261a25598beec621351910Timo Sirainen};
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainenvoid io_loop_handler_init(struct ioloop *ioloop, unsigned int initial_fd_count)
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen{
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen struct ioloop_handler_context *ctx;
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen ioloop->handler_context = ctx = i_new(struct ioloop_handler_context, 1);
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen i_array_init(&ctx->events, initial_fd_count);
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen i_array_init(&ctx->fd_index, initial_fd_count);
0f9a8663b0ff6fe30389d02284a2b002c40914ebTimo Sirainen
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen ctx->epfd = epoll_create(initial_fd_count);
a9efdb661eb7a8a33aacfdcc3486dcc675a21543Timo Sirainen if (ctx->epfd < 0) {
a9efdb661eb7a8a33aacfdcc3486dcc675a21543Timo Sirainen if (errno != EMFILE)
fab850a6aee4aaef4f4795bd7946807a3ba45041Timo Sirainen i_fatal("epoll_create(): %m");
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen else {
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen i_fatal("epoll_create(): %m (you may need to increase "
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen "/proc/sys/fs/epoll/max_user_instances)");
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen }
94ba4820927b906b333e39445c1508a29387c3aaTimo Sirainen }
94ba4820927b906b333e39445c1508a29387c3aaTimo Sirainen fd_close_on_exec(ctx->epfd, TRUE);
97afa073e3e1e0301dc41173ec34beb08edcce50Timo Sirainen}
97afa073e3e1e0301dc41173ec34beb08edcce50Timo Sirainen
94ba4820927b906b333e39445c1508a29387c3aaTimo Sirainenvoid io_loop_handler_deinit(struct ioloop *ioloop)
cf63dc8723b971cc80638fccbf494d961cbafc7fTimo Sirainen{
23878bd03d1de531e3261a25598beec621351910Timo Sirainen struct ioloop_handler_context *ctx = ioloop->handler_context;
23878bd03d1de531e3261a25598beec621351910Timo Sirainen struct io_list **list;
23878bd03d1de531e3261a25598beec621351910Timo Sirainen unsigned int i, count;
23878bd03d1de531e3261a25598beec621351910Timo Sirainen
23878bd03d1de531e3261a25598beec621351910Timo Sirainen list = array_get_modifiable(&ctx->fd_index, &count);
23878bd03d1de531e3261a25598beec621351910Timo Sirainen for (i = 0; i < count; i++)
23878bd03d1de531e3261a25598beec621351910Timo Sirainen i_free(list[i]);
23878bd03d1de531e3261a25598beec621351910Timo Sirainen
23878bd03d1de531e3261a25598beec621351910Timo Sirainen if (close(ctx->epfd) < 0)
23878bd03d1de531e3261a25598beec621351910Timo Sirainen i_error("close(epoll) failed: %m");
23878bd03d1de531e3261a25598beec621351910Timo Sirainen array_free(&ioloop->handler_context->fd_index);
23878bd03d1de531e3261a25598beec621351910Timo Sirainen array_free(&ioloop->handler_context->events);
94ba4820927b906b333e39445c1508a29387c3aaTimo Sirainen i_free(ioloop->handler_context);
a6ab8f00351265e35b79f3a22b1f5a4978ae5c35Timo Sirainen}
#define IO_EPOLL_ERROR (EPOLLERR | EPOLLHUP)
#define IO_EPOLL_INPUT (EPOLLIN | EPOLLPRI | IO_EPOLL_ERROR)
#define IO_EPOLL_OUTPUT (EPOLLOUT | IO_EPOLL_ERROR)
static int epoll_event_mask(struct io_list *list)
{
int events = 0, i;
struct io_file *io;
for (i = 0; i < IOLOOP_IOLIST_IOS_PER_FD; i++) {
io = list->ios[i];
if (io == NULL)
continue;
if (io->io.condition & IO_READ)
events |= IO_EPOLL_INPUT;
if (io->io.condition & IO_WRITE)
events |= IO_EPOLL_OUTPUT;
if (io->io.condition & IO_ERROR)
events |= IO_EPOLL_ERROR;
}
return events;
}
void io_loop_handle_add(struct io_file *io)
{
struct ioloop_handler_context *ctx = io->io.ioloop->handler_context;
struct io_list **list;
struct epoll_event event;
int op;
bool first;
list = array_idx_modifiable(&ctx->fd_index, io->fd);
if (*list == NULL)
*list = i_new(struct io_list, 1);
first = ioloop_iolist_add(*list, io);
memset(&event, 0, sizeof(event));
event.data.ptr = *list;
event.events = epoll_event_mask(*list);
op = first ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
if (epoll_ctl(ctx->epfd, op, io->fd, &event) < 0) {
if (errno == EPERM && op == EPOLL_CTL_ADD) {
i_fatal("epoll_ctl(add, %d) failed: %m "
"(fd doesn't support epoll%s)", io->fd,
io->fd != STDIN_FILENO ? "" :
" - instead of '<file', try 'cat file|'");
}
i_panic("epoll_ctl(%s, %d) failed: %m",
op == EPOLL_CTL_ADD ? "add" : "mod", io->fd);
}
if (first) {
/* allow epoll_wait() to return the maximum number of events
by keeping space allocated for each file descriptor */
if (ctx->deleted_count > 0)
ctx->deleted_count--;
else
array_append_zero(&ctx->events);
}
}
void io_loop_handle_remove(struct io_file *io, bool closed)
{
struct ioloop_handler_context *ctx = io->io.ioloop->handler_context;
struct io_list **list;
struct epoll_event event;
int op;
bool last;
list = array_idx_modifiable(&ctx->fd_index, io->fd);
last = ioloop_iolist_del(*list, io);
if (!closed) {
memset(&event, 0, sizeof(event));
event.data.ptr = *list;
event.events = epoll_event_mask(*list);
op = last ? EPOLL_CTL_DEL : EPOLL_CTL_MOD;
if (epoll_ctl(ctx->epfd, op, io->fd, &event) < 0) {
const char *errstr = t_strdup_printf(
"epoll_ctl(%s, %d) failed: %m",
op == EPOLL_CTL_DEL ? "del" : "mod", io->fd);
if (errno == EBADF)
i_panic("%s", errstr);
else
i_error("%s", errstr);
}
}
if (last) {
/* since we're not freeing memory in any case, just increase
deleted counter so next handle_add() can just decrease it
insteading of appending to the events array */
ctx->deleted_count++;
}
i_free(io);
}
void io_loop_handler_run(struct ioloop *ioloop)
{
struct ioloop_handler_context *ctx = ioloop->handler_context;
struct epoll_event *events;
const struct epoll_event *event;
struct io_list *list;
struct io_file *io;
struct timeval tv;
unsigned int events_count;
int msecs, ret, i, j;
bool call;
/* get the time left for next timeout task */
msecs = io_loop_get_wait_time(ioloop, &tv);
events = array_get_modifiable(&ctx->events, &events_count);
if (events_count > 0) {
ret = epoll_wait(ctx->epfd, events, events_count, msecs);
if (ret < 0 && errno != EINTR)
i_fatal("epoll_wait(): %m");
} else {
/* no I/Os, but we should have some timeouts.
just wait for them. */
i_assert(msecs >= 0);
usleep(msecs*1000);
ret = 0;
}
/* execute timeout handlers */
io_loop_handle_timeouts(ioloop);
if (!ioloop->running)
return;
for (i = 0; i < ret; i++) {
/* io_loop_handle_add() may cause events array reallocation,
so we have use array_idx() */
event = array_idx(&ctx->events, i);
list = event->data.ptr;
for (j = 0; j < IOLOOP_IOLIST_IOS_PER_FD; j++) {
io = list->ios[j];
if (io == NULL)
continue;
call = FALSE;
if ((event->events & (EPOLLHUP | EPOLLERR)) != 0)
call = TRUE;
else if ((io->io.condition & IO_READ) != 0)
call = (event->events & EPOLLIN) != 0;
else if ((io->io.condition & IO_WRITE) != 0)
call = (event->events & EPOLLOUT) != 0;
else if ((io->io.condition & IO_ERROR) != 0)
call = (event->events & IO_EPOLL_ERROR) != 0;
if (call)
io_loop_call_io(&io->io);
}
}
}
#endif /* IOLOOP_EPOLL */