director.c revision 7a380e77afc69a81725d410cd67082a37cf140d2
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen/* Copyright (c) 2010-2017 Dovecot authors, see the included COPYING file */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen#define DIRECTOR_RECONNECT_TIMEOUT_MSECS (30*1000)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen#define DIRECTOR_USER_MOVE_TIMEOUT_MSECS (30*1000)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen#define DIRECTOR_QUICK_RECONNECT_TIMEOUT_MSECS 1000
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen#define DIRECTOR_DELAYED_DIR_REMOVE_MSECS (1000*30)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenconst char *user_kill_state_names[USER_KILL_STATE_DELAY+1] = {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen "notify-received",
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen "waiting-for-notify",
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen "waiting-for-everyone",
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic struct log_throttle *user_move_throttle;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic struct log_throttle *user_kill_fail_throttle;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic const struct log_throttle_settings director_log_throttle_settings = {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainendirector_user_kill_finish_delayed(struct director *dir, struct user *user,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic bool director_is_self_ip_set(struct director *dir)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if (net_ip_compare(&dir->self_ip, &net_ip4_any))
cfa9359fbd6a967ccdcd553c5e483a093885ab6fTimo Sirainen if (net_ip_compare(&dir->self_ip, &net_ip6_any))
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic void director_find_self_ip(struct director *dir)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen unsigned int i, count;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen for (i = 0; i < count; i++) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_fatal("director_servers doesn't list ourself");
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen dir->self_host = director_host_lookup(dir, &dir->self_ip,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_fatal("director_servers doesn't list ourself (%s:%u)",
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic unsigned int director_find_self_idx(struct director *dir)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen unsigned int i, count;
b0be0bead3d6963149f7f2a9504b8ab5aced9af5Timo Sirainen for (i = 0; i < count; i++) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainendirector_has_outgoing_connection(struct director *dir,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if (director_connection_get_host(*connp) == host &&
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenint director_connect_host(struct director *dir, struct director_host *host)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if (director_has_outgoing_connection(dir, host))
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen str_printfa(str, "Connecting to %s:%u (as %s",
cfa9359fbd6a967ccdcd553c5e483a093885ab6fTimo Sirainen str_printfa(str, ", last network failure %ds ago",
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen (int)(ioloop_time - host->last_network_failure));
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen str_printfa(str, ", last protocol failure %ds ago",
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen (int)(ioloop_time - host->last_protocol_failure));
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen port = dir->test_port != 0 ? dir->test_port : host->port;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen fd = net_connect_ip(&host->ip, port, &dir->self_ip);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_error("connect(%s) failed: %m", host->name);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* Reset timestamp so that director_connect() won't skip this host
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen while we're still trying to connect to it */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen (void)director_connection_init_out(dir, fd, host);
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainendirector_get_preferred_right_host(struct director *dir)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen for (i = 0; i < count; i++) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* self, with some removed hosts */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic bool director_wait_for_others(struct director *dir)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* don't assume we're alone until we've attempted to connect
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen to others for a while */
cfa9359fbd6a967ccdcd553c5e483a093885ab6fTimo Sirainen ioloop_time - dir->ring_first_alone > DIRECTOR_RING_MIN_WAIT_SECS)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* reset all failures and try again */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen dir->to_reconnect = timeout_add(DIRECTOR_QUICK_RECONNECT_TIMEOUT_MSECS,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* try to connect to first working server on our right side.
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen the left side is supposed to connect to us. */
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen DIRECTOR_RECONNECT_RETRY_SECS > ioloop_time) {
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen /* connection failed recently, don't try retrying here */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS > ioloop_time) {
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen /* the director recently sent invalid protocol data,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen don't try retrying yet */
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if (director_connect_host(dir, hosts[idx]) == 0) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* success */
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if (count > 1 && director_wait_for_others(dir))
2d49f150b4bce6f2f59a84e268e4777901c3e42cTimo Sirainen /* we're the only one */
2d49f150b4bce6f2f59a84e268e4777901c3e42cTimo Sirainen i_warning("director: Couldn't connect to right side, "
2d49f150b4bce6f2f59a84e268e4777901c3e42cTimo Sirainen "we must be the only director left");
2d49f150b4bce6f2f59a84e268e4777901c3e42cTimo Sirainen /* since we couldn't connect to it,
2d49f150b4bce6f2f59a84e268e4777901c3e42cTimo Sirainen it must have failed recently */
2d49f150b4bce6f2f59a84e268e4777901c3e42cTimo Sirainen i_warning("director: Assuming %s is dead, disconnecting",
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen "This connection is dead?");
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen dir->ring_min_version = DIRECTOR_VERSION_MINOR;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenvoid director_set_ring_handshaked(struct director *dir)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen "continuing delayed requests");
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic void director_reconnect_timeout(struct director *dir)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen struct director_host *cur_host, *preferred_host =
1d3f7c1278168d5b1cbfa9a2cc9929a0909056b4Timo Sirainen /* all directors have been removed, try again later */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen (void)director_connect_host(dir, preferred_host);
1d3f7c1278168d5b1cbfa9a2cc9929a0909056b4Timo Sirainen /* the connection hasn't finished sync yet.
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen keep this timeout for now. */
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainenvoid director_set_ring_synced(struct director *dir)
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen i_assert((dir->left != NULL && dir->right != NULL) ||
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_warning("Ring is synced, continuing delayed requests "
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen "(syncing took %d secs, hosts_hash=%u)",
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen (int)(ioloop_time - dir->ring_last_sync_time),
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if (host != director_get_preferred_right_host(dir)) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* try to reconnect to preferred host later */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen director_connection_set_synced(dir->left, TRUE);
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen director_connection_set_synced(dir->right, TRUE);
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainenvoid director_sync_send(struct director *dir, struct director_host *host,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen unsigned int timestamp, unsigned int hosts_hash)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen dir->last_sync_sent_ring_change_counter = dir->ring_change_counter;
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen director_connection_get_minor_version(dir->right) > 0) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* only minor_version>0 supports extra parameters */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen str_printfa(str, "\t%u\t%u\t%u", minor_version,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen director_connection_send(dir->right, str_c(str));
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* ping our connections in case either of them are hanging.
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if they are, we want to know it fast. */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenbool director_resend_sync(struct director *dir)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if (!dir->ring_synced && dir->left != NULL && dir->right != NULL) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* send a new SYNC in case the previous one got dropped */
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen dir->self_host->last_sync_timestamp = ioloop_time;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen director_sync_send(dir, dir->self_host, dir->sync_seq,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic void director_sync_timeout(struct director *dir)
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen i_error("Ring SYNC appears to have got lost, resending");
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenvoid director_set_ring_unsynced(struct director *dir)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen dir->to_sync = timeout_add(DIRECTOR_SYNC_TIMEOUT_MSECS,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainenstatic void director_sync(struct director *dir)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen /* we're synced again when we receive this SYNC back */
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if (dir->right == NULL && dir->left == NULL) {
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen /* we're alone. if we're already synced,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen don't become unsynced. */
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen dir_debug("Ring is desynced (seq=%u, sending SYNC to %s)",
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen dir->sync_seq, dir->right == NULL ? "(nowhere)" :
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* send PINGs to our connections more rapidly until we've synced again.
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if the connection has actually died, we don't need to wait (and
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen delay requests) for as long to detect it */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen director_connection_set_synced(dir->left, FALSE);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen director_connection_set_synced(dir->right, FALSE);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen director_sync_send(dir, dir->self_host, dir->sync_seq,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenvoid director_sync_freeze(struct director *dir)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenvoid director_notify_ring_added(struct director_host *added_host,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen const char *cmd;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen net_ip2addr(&added_host->ip), added_host->port);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen director_update_send(added_host->dir, src, cmd);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic void director_delayed_dir_remove_timeout(struct director *dir)
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen unsigned int i, count;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen for (i = 0; i < count; ) {
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainenvoid director_ring_remove(struct director_host *removed_host,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen struct director_connection *const *conns, *conn;
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen unsigned int i, count;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen const char *cmd;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* others will just disconnect us */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* mark the host as removed and fully remove it later. this delay is
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen needed, because the removal may trigger director reconnections,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen which may send the director back and we don't want to re-add it */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen timeout_add(DIRECTOR_DELAYED_DIR_REMOVE_MSECS,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* disconnect any connections to the host */
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen for (i = 0; i < count; ) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if (director_connection_get_host(conn) != removed_host)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen director_connection_deinit(&conn, "Removing from ring");
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen cmd = t_strdup_printf("DIRECTOR-REMOVE\t%s\t%u\n",
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainendirector_send_host(struct director *dir, struct director_host *src,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen const char *host_tag = mail_host_get_tag(host);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if (dir->ring_min_version >= DIRECTOR_VERSION_TAGS_V2) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen dir->ring_min_version < DIRECTOR_VERSION_TAGS_V2) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if (dir->ring_min_version < DIRECTOR_VERSION_TAGS) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_error("Ring has directors that don't support tags - removing host %s with tag '%s'",
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_error("Ring has directors that support mixed versions of tags - removing host %s with tag '%s'",
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if (dir->ring_min_version >= DIRECTOR_VERSION_UPDOWN) {
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen str_printfa(str, "\t%c%ld\t", host->down ? 'D' : 'U',
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen /* add any further version checks here - these directors ignore
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen any extra unknown arguments */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenvoid director_resend_hosts(struct director *dir)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen array_foreach(mail_hosts_get(dir->mail_hosts), hostp)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen director_send_host(dir, dir->self_host, NULL, *hostp);
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainenvoid director_update_host(struct director *dir, struct director_host *src,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen /* update state in case this is the first mail host being added */
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen "down=%d last_updown_change=%ld (hosts_hash=%u)",
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen net_ip2addr(&host->ip), host->vhost_count, host->down ? 1 : 0,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen /* mark the host desynced until ring is synced again. except if we're
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen alone in the ring that never happens. */
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainenvoid director_remove_host(struct director *dir, struct director_host *src,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen struct user_directory *users = host->tag->users;
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen director_update_send(dir, src, t_strdup_printf(
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen "HOST-REMOVE\t%s\t%u\t%u\t%s\n",
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainenvoid director_flush_host(struct director *dir, struct director_host *src,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen struct user_directory *users = host->tag->users;
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen director_update_send(dir, src, t_strdup_printf(
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen "HOST-FLUSH\t%s\t%u\t%u\t%s\n",
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenvoid director_update_user(struct director *dir, struct director_host *src,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen director_update_send(dir, src, t_strdup_printf("USER\t%u\t%s\n",
14c474d9f4591c397ed0b5206af6537c7b52c924Timo Sirainen user->username_hash, net_ip2addr(&user->host->ip)));
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainenvoid director_update_user_weak(struct director *dir, struct director_host *src,
14c474d9f4591c397ed0b5206af6537c7b52c924Timo Sirainen const char *cmd;
b0be0bead3d6963149f7f2a9504b8ab5aced9af5Timo Sirainen cmd = t_strdup_printf("USER-WEAK\t%s\t%u\t%u\t%u\t%s\n",
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen user->username_hash, net_ip2addr(&user->host->ip));
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if (src != dir->self_host && dir->left != NULL && dir->right != NULL &&
8e57335924f5ff57cbd1929ec99764dc267c3312Timo Sirainen /* only two directors in this ring and we're forwarding
51795bfe9d05d92fe942cb451aec2b9d16d32a11Timo Sirainen USER-WEAK from one director back to itself via another
51795bfe9d05d92fe942cb451aec2b9d16d32a11Timo Sirainen so it sees we've received it. we can't use
8e57335924f5ff57cbd1929ec99764dc267c3312Timo Sirainen director_update_send() for this, because it doesn't send
8e57335924f5ff57cbd1929ec99764dc267c3312Timo Sirainen data back to the source. */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainendirector_flush_user_continue(int result, struct director_kill_context *ctx)
5c7aa03f959b8b9cab3eba8a585a90f4b50a4cdfTimo Sirainen struct user *user = user_directory_lookup(ctx->tag->users,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen struct istream *is = iostream_temp_finish(&ctx->reply, (size_t)-1);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_error("%s: Failed to flush user hash %u in host %s: %s",
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen data == NULL ? "(no output to stdout)" : data);
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen while((data = i_stream_read_next_line(is)) != NULL) {
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen i_error("%s: Failed to flush user hash %u in host %s: %s",
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if (!DIRECTOR_KILL_CONTEXT_IS_VALID(user, ctx)) {
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen /* user was already freed - ignore */
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen dir_debug("User %u freed while flushing, result=%d",
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen /* ctx is freed later via user->kill_ctx */
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen dir_debug("Flushing user %u finished, result=%d",
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen director_user_kill_finish_delayed(dir, user, result == 1);
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainendirector_flush_user(struct director *dir, struct user *user)
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen struct director_kill_context *ctx = user->kill_ctx;
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen /* Execute flush script, if set. Only the director that started the
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen user moving will call the flush script. Having each director do it
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen would be redundant since they're all supposed to be performing the
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen same flush task to the same backend.
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen Flushing is also not triggered if we're moving a user that we just
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen created due to the user move. This means that the user doesn't have
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen an old host, so we couldn't really even perform any flushing on the
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen if (*dir->set->director_flush_socket == '\0' ||
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen director_user_kill_finish_delayed(dir, user, FALSE);
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen if (var_expand(s_sock, dir->set->director_flush_socket, tab, &error) <= 0) {
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen i_error("Failed to expand director_flush_socket=%s: %s",
14c474d9f4591c397ed0b5206af6537c7b52c924Timo Sirainen director_user_kill_finish_delayed(dir, user, FALSE);
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen ctx->socket_path = str_free_without_data(&s_sock);
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen .dns_client_socket_path = DIRECTOR_DNS_SOCKET_PATH,
14c474d9f4591c397ed0b5206af6537c7b52c924Timo Sirainen const char *const args[] = {
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen dir_debug("Flushing user %u via %s", user->username_hash,
bf91bed88d4e294b4577ba2a3b14d87cf35ae135Timo Sirainen if ((program_client_create(ctx->socket_path, args, &set, FALSE,
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen i_error("%s: Failed to flush user hash %u in host %s: %s",
14c474d9f4591c397ed0b5206af6537c7b52c924Timo Sirainen o_stream_set_no_error_handling(ctx->reply, TRUE);
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen program_client_set_output(ctx->pclient, ctx->reply);
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen program_client_run_async(ctx->pclient, director_flush_user_continue, ctx);
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainenstatic void director_user_move_free(struct user *user)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen struct director_kill_context *kill_ctx = user->kill_ctx;
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen dir_debug("User %u move finished at state=%s", user->username_hash,
5787e39e2be32f657b8c98fee8bac794aa852cf8Timo Sirainendirector_user_kill_finish_delayed_to(struct user *user)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen i_assert(user->kill_ctx->kill_state == USER_KILL_STATE_DELAY);
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainendirector_user_kill_finish_delayed(struct director *dir, struct user *user,
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen user->kill_ctx->kill_state = USER_KILL_STATE_NONE;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen user->kill_ctx->kill_state = USER_KILL_STATE_DELAY;
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen /* wait for a while for the kills to finish in the backend server,
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen so there are no longer any processes running for the user before we
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen start letting new in connections to the new server. */
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen timeout_add(dir->set->director_user_kick_delay * 1000,
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainendirector_finish_user_kill(struct director *dir, struct user *user, bool self)
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen struct director_kill_context *kill_ctx = user->kill_ctx;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_assert(kill_ctx->kill_state != USER_KILL_STATE_FLUSHING);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_assert(kill_ctx->kill_state != USER_KILL_STATE_DELAY);
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen /* we're alone */
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen } else if (self ||
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen kill_ctx->kill_state == USER_KILL_STATE_KILLING_NOTIFY_RECEIVED) {
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen director_connection_send(dir->right, t_strdup_printf(
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen kill_ctx->kill_state = USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_assert(kill_ctx->kill_state == USER_KILL_STATE_KILLING);
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen kill_ctx->kill_state = USER_KILL_STATE_KILLED_WAITING_FOR_NOTIFY;
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainenstatic void director_user_kill_fail_throttled(unsigned int new_events_count,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_error("Failed to kill %u users' connections", new_events_count);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainenstatic void director_kill_user_callback(enum ipc_client_cmd_state state,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* this is an asynchronous notification about user being killed.
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen there are no guarantees about what might have happened to the user
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen in the mean time. */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* shouldn't get here. the command reply isn't finished yet. */
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen if (log_throttle_accept(user_kill_fail_throttle)) {
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen i_error("Failed to kill user %u connections: %s",
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen /* we can't really do anything but continue anyway */
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen user = user_directory_lookup(ctx->tag->users, ctx->username_hash);
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen if (!DIRECTOR_KILL_CONTEXT_IS_VALID(user, ctx)) {
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen /* user was already freed - ignore */
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen i_assert(ctx->kill_state == USER_KILL_STATE_KILLING ||
a28a6267f48971117dec958b160deefd14ebb7a6Timo Sirainen ctx->kill_state == USER_KILL_STATE_KILLING_NOTIFY_RECEIVED);
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen /* we were still waiting for the kill notification */
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen director_finish_user_kill(ctx->dir, user, ctx->kill_is_self_initiated);
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainenstatic void director_user_move_throttled(unsigned int new_events_count,
14c474d9f4591c397ed0b5206af6537c7b52c924Timo Sirainen i_error("%u users' move timed out, their state may now be inconsistent",
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainenstatic void director_user_move_timeout(struct user *user)
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen i_assert(user->kill_ctx->kill_state != USER_KILL_STATE_DELAY);
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen if (log_throttle_accept(user_move_throttle)) {
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen "its state may now be inconsistent (state=%s)",
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen user_kill_state_names[user->kill_ctx->kill_state]);
3697080532ccd9f51fac108be6079b616c7a2ddfTimo Sirainen if (user->kill_ctx->kill_state == USER_KILL_STATE_FLUSHING) {
2a6af811ea3de3cf9e2f15e446674dd21b0705f3Timo Sirainen program_client_destroy(&user->kill_ctx->pclient);
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainendirector_kill_user(struct director *dir, struct director_host *src,
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen const char *cmd;
2a34e2be33f8a17d21384a5527ed9f75f4d270e0Timo Sirainen /* User is being moved again before the previous move
unsigned int username_hash,
const char *cmd;
unsigned int username_hash)
unsigned int username_hash)
case USER_KILL_STATE_KILLING:
case USER_KILL_STATE_NONE:
case USER_KILL_STATE_FLUSHING:
case USER_KILL_STATE_DELAY:
unsigned int username_hash)
unsigned int username_hash)
const char *cmd)
struct director *
return dir;
if (!director_debug)
T_BEGIN {
} T_END;
struct director_user_iter {
unsigned int tag_idx;
return iter;
return NULL;
return user;
unsigned int *hash_r)
const char *error;
&error))
return TRUE;
return FALSE;
void directors_init(void)
void directors_deinit(void)