director.c revision b0421c7397be2146988ee3afb5dcc491c01206cc
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch/* Copyright (c) 2010-2017 Dovecot authors, see the included COPYING file */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#define DIRECTOR_RECONNECT_TIMEOUT_MSECS (30*1000)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#define DIRECTOR_USER_MOVE_TIMEOUT_MSECS (30*1000)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#define DIRECTOR_QUICK_RECONNECT_TIMEOUT_MSECS 1000
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#define DIRECTOR_DELAYED_DIR_REMOVE_MSECS (1000*30)
7384b4e78eaab44693c985192276e31322155e32Stephan Boschconst char *user_kill_state_names[USER_KILL_STATE_DELAY+1] = {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch "notify-received",
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch "waiting-for-notify",
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch "waiting-for-everyone",
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic struct log_throttle *user_move_throttle;
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic struct log_throttle *user_kill_fail_throttle;
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic const struct log_throttle_settings director_log_throttle_settings = {
7384b4e78eaab44693c985192276e31322155e32Stephan Boschdirector_user_kill_finish_delayed(struct director *dir, struct user *user,
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic bool director_is_self_ip_set(struct director *dir)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (net_ip_compare(&dir->self_ip, &net_ip4_any))
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (net_ip_compare(&dir->self_ip, &net_ip6_any))
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic void director_find_self_ip(struct director *dir)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned int i, count;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch for (i = 0; i < count; i++) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_fatal("director_servers doesn't list ourself");
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen dir->self_host = director_host_lookup(dir, &dir->self_ip,
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen i_fatal("director_servers doesn't list ourself (%s:%u)",
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic unsigned int director_find_self_idx(struct director *dir)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned int i, count;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch for (i = 0; i < count; i++) {
7384b4e78eaab44693c985192276e31322155e32Stephan Boschdirector_has_outgoing_connection(struct director *dir,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (director_connection_get_host(*connp) == host &&
7384b4e78eaab44693c985192276e31322155e32Stephan Boschdirector_log_connect(struct director *dir, struct director_host *host,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch str_printfa(str, ", last network failure %ds ago",
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch (int)(ioloop_time - host->last_network_failure));
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch str_printfa(str, ", last protocol failure %ds ago",
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen (int)(ioloop_time - host->last_protocol_failure));
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen net_ip2addr(&dir->self_ip), str_c(str), reason);
7384b4e78eaab44693c985192276e31322155e32Stephan Boschint director_connect_host(struct director *dir, struct director_host *host,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (director_has_outgoing_connection(dir, host))
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch port = dir->test_port != 0 ? dir->test_port : host->port;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch fd = net_connect_ip(&host->ip, port, &dir->self_ip);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_error("connect(%s) failed: %m", host->name);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* Reset timestamp so that director_connect() won't skip this host
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch while we're still trying to connect to it */
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen (void)director_connection_init_out(dir, fd, host);
7384b4e78eaab44693c985192276e31322155e32Stephan Boschdirector_get_preferred_right_host(struct director *dir)
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen for (i = 0; i < count; i++) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* self, with some removed hosts */
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic void director_quick_reconnect_retry(struct director *dir)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch director_connect(dir, "Alone in director ring - trying to connect to others");
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic bool director_wait_for_others(struct director *dir)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* don't assume we're alone until we've attempted to connect
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch to others for a while */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch ioloop_time - dir->ring_first_alone > DIRECTOR_RING_MIN_WAIT_SECS)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* reset all failures and try again */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch dir->to_reconnect = timeout_add(DIRECTOR_QUICK_RECONNECT_TIMEOUT_MSECS,
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainenvoid director_connect(struct director *dir, const char *reason)
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen /* try to connect to first working server on our right side.
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen the left side is supposed to connect to us. */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch DIRECTOR_RECONNECT_RETRY_SECS > ioloop_time) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* connection failed recently, don't try retrying here */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS > ioloop_time) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* the director recently sent invalid protocol data,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch don't try retrying yet */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (director_connect_host(dir, hosts[idx], reason) == 0) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* success */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (count > 1 && director_wait_for_others(dir))
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* we're the only one */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_warning("director: Couldn't connect to right side, "
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch "we must be the only director left");
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* since we couldn't connect to it,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch it must have failed recently */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_warning("director: Assuming %s is dead, disconnecting",
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch "This connection is dead?");
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch dir->ring_min_version = DIRECTOR_VERSION_MINOR;
7384b4e78eaab44693c985192276e31322155e32Stephan Boschvoid director_set_ring_handshaked(struct director *dir)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch "continuing delayed requests");
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic void director_reconnect_timeout(struct director *dir)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch struct director_host *cur_host, *preferred_host =
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* all directors have been removed, try again later */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch (void)director_connect_host(dir, preferred_host,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch "Reconnect attempt to preferred director");
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* the connection hasn't finished sync yet.
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch keep this timeout for now. */
7384b4e78eaab44693c985192276e31322155e32Stephan Boschvoid director_set_ring_synced(struct director *dir)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_assert((dir->left != NULL && dir->right != NULL) ||
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_warning("Ring is synced, continuing delayed requests "
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch "(syncing took %d secs, hosts_hash=%u)",
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch (int)(ioloop_time - dir->ring_last_sync_time),
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (host != director_get_preferred_right_host(dir)) {
d45ab3fff7c47f1719b9cd310228c0dac2bdd1b2Timo Sirainen /* try to reconnect to preferred host later */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch director_connection_set_synced(dir->left, TRUE);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch director_connection_set_synced(dir->right, TRUE);
7384b4e78eaab44693c985192276e31322155e32Stephan Boschvoid director_sync_send(struct director *dir, struct director_host *host,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned int timestamp, unsigned int hosts_hash)
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen dir->last_sync_sent_ring_change_counter = dir->ring_change_counter;
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen director_connection_get_minor_version(dir->right) > 0) {
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen /* only minor_version>0 supports extra parameters */
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen str_printfa(str, "\t%u\t%u\t%u", minor_version,
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen director_connection_send(dir->right, str_c(str));
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen /* ping our connections in case either of them are hanging.
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen if they are, we want to know it fast. */
50a6d26bd9041f44b4cad0c0357c0c604c132cc8Stephan Boschbool director_resend_sync(struct director *dir)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (!dir->ring_synced && dir->left != NULL && dir->right != NULL) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* send a new SYNC in case the previous one got dropped */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch dir->self_host->last_sync_timestamp = ioloop_time;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch director_sync_send(dir, dir->self_host, dir->sync_seq,
50a6d26bd9041f44b4cad0c0357c0c604c132cc8Stephan Boschstatic void director_sync_timeout(struct director *dir)
50a6d26bd9041f44b4cad0c0357c0c604c132cc8Stephan Bosch i_error("Ring SYNC seq=%u appears to have got lost, resending", dir->sync_seq);
50a6d26bd9041f44b4cad0c0357c0c604c132cc8Stephan Boschvoid director_set_ring_unsynced(struct director *dir)
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen dir->to_sync = timeout_add(DIRECTOR_SYNC_TIMEOUT_MSECS,
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainenstatic void director_sync(struct director *dir)
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen /* we're synced again when we receive this SYNC back */
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen if (dir->right == NULL && dir->left == NULL) {
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen /* we're alone. if we're already synced,
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen don't become unsynced. */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch dir_debug("Ring is desynced (seq=%u, no right connection)",
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch dir_debug("Ring is desynced (seq=%u, sending SYNC to %s)",
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch dir->sync_seq, dir->right == NULL ? "(nowhere)" :
50a6d26bd9041f44b4cad0c0357c0c604c132cc8Stephan Bosch /* send PINGs to our connections more rapidly until we've synced again.
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen if the connection has actually died, we don't need to wait (and
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen delay requests) for as long to detect it */
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen director_connection_set_synced(dir->left, FALSE);
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen director_connection_set_synced(dir->right, FALSE);
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainen director_sync_send(dir, dir->self_host, dir->sync_seq,
65c0e43da8cfc730eeb4634f8aa384081bbfa4e7Timo Sirainenvoid director_sync_freeze(struct director *dir)
7384b4e78eaab44693c985192276e31322155e32Stephan Boschvoid director_notify_ring_added(struct director_host *added_host,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch const char *cmd;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_info("Adding director %s to ring (requested by %s)",
unsigned int i, count;
for (i = 0; i < count; ) {
unsigned int i, count;
const char *cmd;
for (i = 0; i < count; ) {
const char *cmd;
i_unreached();
if (result == 0) {
char *data;
data);
const char *error;
const char *const args[] = {
error);
bool skip_delay)
if (skip_delay) {
} else if (self ||
switch (state) {
case IPC_CLIENT_CMD_STATE_OK:
const char *cmd;
unsigned int username_hash,
const char *cmd;
unsigned int username_hash)
unsigned int username_hash)
case USER_KILL_STATE_KILLING:
case USER_KILL_STATE_NONE:
case USER_KILL_STATE_FLUSHING:
case USER_KILL_STATE_DELAY:
unsigned int username_hash)
unsigned int username_hash)
const char *cmd)
struct director *
return dir;
if (!director_debug)
T_BEGIN {
} T_END;
struct director_user_iter {
unsigned int tag_idx;
return iter;
return NULL;
return user;
unsigned int *hash_r)
const char *error;
&error))
return TRUE;
return FALSE;
void directors_init(void)
void directors_deinit(void)