ioloop-epoll.c revision 9097014815990de521654305dec6a8d5d29407f2
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw/* Copyright (c) 2004-2014 Dovecot authors, see the included COPYING file */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw#include "lib.h"
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw#include "array.h"
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw#include "fd-close-on-exec.h"
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw#include "ioloop-private.h"
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw#include "ioloop-iolist.h"
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw#ifdef IOLOOP_EPOLL
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw#include <sys/epoll.h>
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw#include <unistd.h>
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwstruct ioloop_handler_context {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw int epfd;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw unsigned int deleted_count;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw ARRAY(struct io_list *) fd_index;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw ARRAY(struct epoll_event) events;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw};
148c5f43199ca0b43fc8e3b643aab11cd66ea327Alan Wright
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwvoid io_loop_handler_init(struct ioloop *ioloop, unsigned int initial_fd_count)
c586600796766c83eb9485c446886fd9ed2359a9Keyur Desai{
68b2bbf26c7040fea4281dcb58b81e7627e46f34Gordon Ross struct ioloop_handler_context *ctx;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw ioloop->handler_context = ctx = i_new(struct ioloop_handler_context, 1);
bbf6f00c25b6a2bed23c35eac6d62998ecdb338cJordan Brown
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw i_array_init(&ctx->events, initial_fd_count);
3db3f65c6274eb042354801a308c8e9bc4994553amw i_array_init(&ctx->fd_index, initial_fd_count);
bbf6f00c25b6a2bed23c35eac6d62998ecdb338cJordan Brown
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw ctx->epfd = epoll_create(initial_fd_count);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (ctx->epfd < 0) {
a90cf9f29973990687fa61de9f1f6ea22e924e40Gordon Ross if (errno != EMFILE)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw i_fatal("epoll_create(): %m");
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw else {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw i_fatal("epoll_create(): %m (you may need to increase "
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw "/proc/sys/fs/epoll/max_user_instances)");
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw }
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw }
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw fd_close_on_exec(ctx->epfd, TRUE);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw}
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
b1352070d318187b41b088da3533692976f3f225Alan Wrightvoid io_loop_handler_deinit(struct ioloop *ioloop)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw{
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw struct ioloop_handler_context *ctx = ioloop->handler_context;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw struct io_list **list;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw unsigned int i, count;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw list = array_get_modifiable(&ctx->fd_index, &count);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw for (i = 0; i < count; i++)
9fb67ea305c66b6a297583b9b0db6796b0dfe497afshin salek ardakani - Sun Microsystems - Irvine United States i_free(list[i]);
9fb67ea305c66b6a297583b9b0db6796b0dfe497afshin salek ardakani - Sun Microsystems - Irvine United States
9fb67ea305c66b6a297583b9b0db6796b0dfe497afshin salek ardakani - Sun Microsystems - Irvine United States if (close(ctx->epfd) < 0)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw i_error("close(epoll) failed: %m");
7b59d02d2a384be9a08087b14defadd214b3c1ddjb array_free(&ioloop->handler_context->fd_index);
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb array_free(&ioloop->handler_context->events);
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb i_free(ioloop->handler_context);
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb}
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb#define IO_EPOLL_ERROR (EPOLLERR | EPOLLHUP)
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb#define IO_EPOLL_INPUT (EPOLLIN | EPOLLPRI | IO_EPOLL_ERROR)
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb#define IO_EPOLL_OUTPUT (EPOLLOUT | IO_EPOLL_ERROR)
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb
faa1795a28a5c712eed6d0a3f84d98c368a316c6jbstatic int epoll_event_mask(struct io_list *list)
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb{
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb int events = 0, i;
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb struct io_file *io;
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb for (i = 0; i < IOLOOP_IOLIST_IOS_PER_FD; i++) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw io = list->ios[i];
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (io == NULL)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw continue;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (io->io.condition & IO_READ)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw events |= IO_EPOLL_INPUT;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (io->io.condition & IO_WRITE)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw events |= IO_EPOLL_OUTPUT;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (io->io.condition & IO_ERROR)
a90cf9f29973990687fa61de9f1f6ea22e924e40Gordon Ross events |= IO_EPOLL_ERROR;
a90cf9f29973990687fa61de9f1f6ea22e924e40Gordon Ross }
a90cf9f29973990687fa61de9f1f6ea22e924e40Gordon Ross
a90cf9f29973990687fa61de9f1f6ea22e924e40Gordon Ross return events;
a90cf9f29973990687fa61de9f1f6ea22e924e40Gordon Ross}
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amwvoid io_loop_handle_add(struct io_file *io)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw{
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw struct ioloop_handler_context *ctx = io->io.ioloop->handler_context;
7b59d02d2a384be9a08087b14defadd214b3c1ddjb struct io_list **list;
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb struct epoll_event event;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw int op;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw bool first;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw list = array_idx_modifiable(&ctx->fd_index, io->fd);
dc20a3024900c47dd2ee44b9707e6df38f7d62a5as if (*list == NULL)
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb *list = i_new(struct io_list, 1);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw first = ioloop_iolist_add(*list, io);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw memset(&event, 0, sizeof(event));
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw event.data.ptr = *list;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw event.events = epoll_event_mask(*list);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw op = first ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (epoll_ctl(ctx->epfd, op, io->fd, &event) < 0) {
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb if (errno == EPERM && op == EPOLL_CTL_ADD) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw i_fatal("epoll_ctl(add, %d) failed: %m "
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw "(fd doesn't support epoll%s)", io->fd,
9fb67ea305c66b6a297583b9b0db6796b0dfe497afshin salek ardakani - Sun Microsystems - Irvine United States io->fd != STDIN_FILENO ? "" :
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw " - instead of '<file', try 'cat file|'");
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw }
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw i_panic("epoll_ctl(%s, %d) failed: %m",
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw op == EPOLL_CTL_ADD ? "add" : "mod", io->fd);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw }
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (first) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* allow epoll_wait() to return the maximum number of events
dc20a3024900c47dd2ee44b9707e6df38f7d62a5as by keeping space allocated for each file descriptor */
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb if (ctx->deleted_count > 0)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw ctx->deleted_count--;
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross else
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw array_append_zero(&ctx->events);
dc20a3024900c47dd2ee44b9707e6df38f7d62a5as }
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb}
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
7f3ef643e446c82e27a9386991b140b128baf22cGordon Rossvoid io_loop_handle_remove(struct io_file *io, bool closed)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw{
dc20a3024900c47dd2ee44b9707e6df38f7d62a5as struct ioloop_handler_context *ctx = io->io.ioloop->handler_context;
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb struct io_list **list;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw struct epoll_event event;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw int op;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw bool last;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw list = array_idx_modifiable(&ctx->fd_index, io->fd);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw last = ioloop_iolist_del(*list, io);
ccc71be50bb49efb4e31004c77fb3e065e9c0596Gordon Ross
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb if (!closed) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw memset(&event, 0, sizeof(event));
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw event.data.ptr = *list;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw event.events = epoll_event_mask(*list);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
7b59d02d2a384be9a08087b14defadd214b3c1ddjb op = last ? EPOLL_CTL_DEL : EPOLL_CTL_MOD;
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (epoll_ctl(ctx->epfd, op, io->fd, &event) < 0) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw const char *errstr = t_strdup_printf(
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw "epoll_ctl(%s, %d) failed: %m",
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw op == EPOLL_CTL_DEL ? "del" : "mod", io->fd);
dc20a3024900c47dd2ee44b9707e6df38f7d62a5as if (errno == EBADF)
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb i_panic("%s", errstr);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw else
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw i_error("%s", errstr);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw }
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw }
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (last) {
7b59d02d2a384be9a08087b14defadd214b3c1ddjb /* since we're not freeing memory in any case, just increase
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb deleted counter so next handle_add() can just decrease it
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb insteading of appending to the events array */
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb ctx->deleted_count++;
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb }
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb i_free(io);
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb}
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb
faa1795a28a5c712eed6d0a3f84d98c368a316c6jbvoid io_loop_handler_run_internal(struct ioloop *ioloop)
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb{
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb struct ioloop_handler_context *ctx = ioloop->handler_context;
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb struct epoll_event *events;
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb const struct epoll_event *event;
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb struct io_list *list;
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb struct io_file *io;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw struct timeval tv;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw unsigned int events_count;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw int msecs, ret, i, j;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw bool call;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* get the time left for next timeout task */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw msecs = io_loop_get_wait_time(ioloop, &tv);
dc20a3024900c47dd2ee44b9707e6df38f7d62a5as
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb events = array_get_modifiable(&ctx->events, &events_count);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (ioloop->io_files != NULL) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw ret = epoll_wait(ctx->epfd, events, events_count, msecs);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (ret < 0 && errno != EINTR)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw i_fatal("epoll_wait(): %m");
dc20a3024900c47dd2ee44b9707e6df38f7d62a5as } else {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* no I/Os, but we should have some timeouts.
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb just wait for them. */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw i_assert(msecs >= 0);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw usleep(msecs*1000);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw ret = 0;
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw }
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* execute timeout handlers */
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw io_loop_handle_timeouts(ioloop);
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw if (!ioloop->running)
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw return;
7b59d02d2a384be9a08087b14defadd214b3c1ddjb
faa1795a28a5c712eed6d0a3f84d98c368a316c6jb for (i = 0; i < ret; i++) {
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw /* io_loop_handle_add() may cause events array reallocation,
da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0amw so we have use array_idx() */
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross event = array_idx(&ctx->events, i);
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross list = event->data.ptr;
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross for (j = 0; j < IOLOOP_IOLIST_IOS_PER_FD; j++) {
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross io = list->ios[j];
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross if (io == NULL)
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross continue;
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross call = FALSE;
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross if ((event->events & (EPOLLHUP | EPOLLERR)) != 0)
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross call = TRUE;
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross else if ((io->io.condition & IO_READ) != 0)
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross call = (event->events & EPOLLIN) != 0;
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross else if ((io->io.condition & IO_WRITE) != 0)
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross call = (event->events & EPOLLOUT) != 0;
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross else if ((io->io.condition & IO_ERROR) != 0)
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross call = (event->events & IO_EPOLL_ERROR) != 0;
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross if (call)
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross io_loop_call_io(&io->io);
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross }
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross }
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross}
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross#endif /* IOLOOP_EPOLL */
7f3ef643e446c82e27a9386991b140b128baf22cGordon Ross