director.c revision aad23ef1531748997776e50a5272a2fc8e022f04
2892N/A/* Copyright (c) 2010-2016 Dovecot authors, see the included COPYING file */ 5403N/A /* Reset timestamp so that director_connect() won't skip this host 5403N/A while we're still trying to connect to it */ 2892N/A /* self, with some removed hosts */ 2892N/A /* don't assume we're alone until we've attempted to connect 2892N/A /* reset all failures and try again */ 2892N/A /* try to connect to first working server on our right side. 2892N/A the left side is supposed to connect to us. */ 2892N/A /* connection failed recently, don't try retrying here */ 3998N/A /* the director recently sent invalid protocol data, 2892N/A "we must be the only director left");
2892N/A /* since we couldn't connect to it, 2892N/A it must have failed recently */ 4973N/A "This connection is dead?");
2892N/A "continuing delayed requests");
/* all directors have been removed, try again later */ /* the connection hasn't finished sync yet. keep this timeout for now. */ i_warning(
"Ring is synced, continuing delayed requests " "(syncing took %d secs, hosts_hash=%u)",
/* try to reconnect to preferred host later */ /* only minor_version>0 supports extra parameters */ /* ping our connections in case either of them are hanging. if they are, we want to know it fast. */ /* send a new SYNC in case the previous one got dropped */ i_error(
"Ring SYNC appears to have got lost, resending");
/* we're synced again when we receive this SYNC back */ /* we're alone. if we're already synced, don't become unsynced. */ dir_debug(
"Ring is desynced (seq=%u, sending SYNC to %s)",
/* send PINGs to our connections more rapidly until we've synced again. if the connection has actually died, we don't need to wait (and delay requests) for as long to detect it */ for (i = 0; i <
count; ) {
/* others will just disconnect us */ /* mark the host as removed and fully remove it later. this delay is needed, because the removal may trigger director reconnections, which may send the director back and we don't want to re-add it */ /* disconnect any connections to the host */ for (i = 0; i <
count; ) {
i_error(
"Ring has directors that don't support tags - removing host %s with tag '%s'",
i_error(
"Ring has directors that support mixed versions of tags - removing host %s with tag '%s'",
/* add any further version checks here - these directors ignore any extra unknown arguments */ /* update state in case this is the first mail host being added */ "down=%d last_updown_change=%ld (hosts_hash=%u)",
/* mark the host desynced until ring is synced again. except if we're alone in the ring that never happens. */ "HOST-REMOVE\t%s\t%u\t%u\t%s\n",
"HOST-FLUSH\t%s\t%u\t%u\t%s\n",
/* only two directors in this ring and we're forwarding USER-WEAK from one director back to itself via another so it sees we've received it. we can't use director_update_send() for this, because it doesn't send data back to the source. */ i_error(
"%s: Failed to flush user hash %u in host %s: %s",
i_error(
"%s: Failed to flush user hash %u in host %s: %s",
/* user was already freed - ignore */ dir_debug(
"User %u freed while flushing, result=%d",
/* ctx is freed later via user->kill_ctx */ dir_debug(
"Flushing user %u finished, result=%d",
/* Execute flush script, if set. Only the director that started the user moving will call the flush script. Having each director do it would be redundant since they're all supposed to be performing the same flush task to the same backend. */ const char *
const args[] = {
"FLUSH",
i_error(
"%s: Failed to flush user hash %u in host %s: %s",
/* wait for a while for the kills to finish in the backend server, so there are no longer any processes running for the user before we start letting new in connections to the new server. */ /* this is an asynchronous notification about user being killed. there are no guarantees about what might have happened to the user /* shouldn't get here. the command reply isn't finished yet. */ i_error(
"Failed to kill user %u connections: %s",
/* we can't really do anything but continue anyway */ /* user was already freed - ignore */ /* we were still waiting for the kill notification */ i_error(
"%u users' move timed out, their state may now be inconsistent",
i_error(
"Finishing user %u move timed out, " "its state may now be inconsistent (state=%s)",
/* User is being moved again before the previous move finished. We'll just continue wherever we left off dir_debug(
"User %u move restarted - previous kill_state=%s",
/* we didn't even know about the user before now. don't bother performing a local kick, since it wouldn't /* 1. move this user's host, and set its "killing" flag to delay all of its future connections until all directors have killed the connections and notified us about it. 2. tell the other directors about the move 3. once user kill callback is called, tell the other directors with USER-KILLED that we're done killing the user. 4. when some director gets a duplicate USER-KILLED, it's responsible for notifying all directors that user is completely 5. after receiving USER-KILLED-EVERYWHERE notification, new connections are again allowed for the user. /* user is already in this host */ "USER-MOVE\t%s\t%u\t%u\t%u\t%s\n",
"USER-KILLED-EVERYWHERE\t%s\t%u\t%u\t%u\n",
dir_debug(
"User %u kill_state=%s - ignoring USER-KILLED",
/* move restarted. state=none can also happen if USER-MOVE was sent while we were still moving. send back USER-KILLED-EVERYWHERE to avoid hangs. */ dir_debug(
"User %u no longer exists - ignoring USER-KILLED-EVERYWHERE",
dir_debug(
"User %u is no longer being killed - ignoring USER-KILLED-EVERYWHERE",
dir_debug(
"User %u kill_state=%s - ignoring USER-KILLED-EVERYWHERE",
/* we may get called to here from various places. use a timeout to make sure the state callback is called with a clean state. */ /* director_user_expire is very short. user expired before moving the user finished or timed out. */ /* kill_ctx is used as a callback parameter. only remove the timeout and finish the free later. */