director.c revision c3a2a487e23a282e59254b82deb9344ed0306bb2
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher/* Copyright (c) 2010-2016 Dovecot authors, see the included COPYING file */
a7797068c4deb6ce2bdbcda27c45ff1bbb4a8e78Jakub Hrozek#define DIRECTOR_RECONNECT_TIMEOUT_MSECS (30*1000)
a7797068c4deb6ce2bdbcda27c45ff1bbb4a8e78Jakub Hrozek#define DIRECTOR_USER_MOVE_TIMEOUT_MSECS (30*1000)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher#define DIRECTOR_SYNC_TIMEOUT_MSECS (5*1000)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher#define DIRECTOR_QUICK_RECONNECT_TIMEOUT_MSECS 1000
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher#define DIRECTOR_DELAYED_DIR_REMOVE_MSECS (1000*30)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherstatic struct log_throttle *user_move_throttle;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherstatic struct log_throttle *user_kill_fail_throttle;
65a9065538fd85e6ead925d344e6b421900eb8c2Jakub Hrozekstatic const struct log_throttle_settings director_log_throttle_settings = {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherstatic bool director_is_self_ip_set(struct director *dir)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher if (net_ip_compare(&dir->self_ip, &net_ip4_any))
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher if (net_ip_compare(&dir->self_ip, &net_ip6_any))
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherstatic void director_find_self_ip(struct director *dir)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher unsigned int i, count;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher hosts = array_get(&dir->dir_hosts, &count);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher for (i = 0; i < count; i++) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher i_fatal("director_servers doesn't list ourself");
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallaghervoid director_find_self(struct director *dir)
a7797068c4deb6ce2bdbcda27c45ff1bbb4a8e78Jakub Hrozek dir->self_host = director_host_lookup(dir, &dir->self_ip,
65a9065538fd85e6ead925d344e6b421900eb8c2Jakub Hrozek i_fatal("director_servers doesn't list ourself (%s:%u)",
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherstatic unsigned int director_find_self_idx(struct director *dir)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher unsigned int i, count;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher hosts = array_get(&dir->dir_hosts, &count);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher for (i = 0; i < count; i++) {
ea929f1b022fc2cb77dec89b0e12accef983ec85Jakub Hrozekdirector_has_outgoing_connection(struct director *dir,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher if (director_connection_get_host(*connp) == host &&
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherint director_connect_host(struct director *dir, struct director_host *host)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher if (director_has_outgoing_connection(dir, host))
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher str_printfa(str, "Connecting to %s:%u (as %s",
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher str_printfa(str, ", last network failure %ds ago",
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher (int)(ioloop_time - host->last_network_failure));
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher str_printfa(str, ", last protocol failure %ds ago",
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher (int)(ioloop_time - host->last_protocol_failure));
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher port = dir->test_port != 0 ? dir->test_port : host->port;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher fd = net_connect_ip(&host->ip, port, &dir->self_ip);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher i_error("connect(%s) failed: %m", host->name);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* Reset timestamp so that director_connect() won't skip this host
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher while we're still trying to connect to it */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher (void)director_connection_init_out(dir, fd, host);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherdirector_get_preferred_right_host(struct director *dir)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher struct director_host *const *hosts, *host;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher hosts = array_get(&dir->dir_hosts, &count);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher for (i = 0; i < count; i++) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* self, with some removed hosts */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherstatic bool director_wait_for_others(struct director *dir)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* don't assume we're alone until we've attempted to connect
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher to others for a while */
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek ioloop_time - dir->ring_first_alone > DIRECTOR_RING_MIN_WAIT_SECS)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* reset all failures and try again */
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek dir->to_reconnect = timeout_add(DIRECTOR_QUICK_RECONNECT_TIMEOUT_MSECS,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallaghervoid director_connect(struct director *dir)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* try to connect to first working server on our right side.
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher the left side is supposed to connect to us. */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher hosts = array_get(&dir->dir_hosts, &count);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher unsigned int idx = (self_idx + i) % count;
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek /* connection failed recently, don't try retrying here */
dd3ba5c5b7d2a9d109963ae9e6c94fff34872221Stephen Gallagher DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS > ioloop_time) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* the director recently sent invalid protocol data,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher don't try retrying yet */
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek if (director_connect_host(dir, hosts[idx]) == 0) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher if (count > 1 && director_wait_for_others(dir))
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek /* we're the only one */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher i_warning("director: Couldn't connect to right side, "
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher "we must be the only director left");
a7797068c4deb6ce2bdbcda27c45ff1bbb4a8e78Jakub Hrozek /* since we couldn't connect to it,
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek it must have failed recently */
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek i_warning("director: Assuming %s is dead, disconnecting",
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek "This connection is dead?");
a7797068c4deb6ce2bdbcda27c45ff1bbb4a8e78Jakub Hrozek dir->ring_min_version = DIRECTOR_VERSION_MINOR;
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozekvoid director_set_ring_handshaked(struct director *dir)
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek "continuing delayed requests");
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozekstatic void director_reconnect_timeout(struct director *dir)
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek struct director_host *cur_host, *preferred_host =
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek /* all directors have been removed, try again later */
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek (void)director_connect_host(dir, preferred_host);
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek /* the connection hasn't finished sync yet.
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek keep this timeout for now. */
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozekvoid director_set_ring_synced(struct director *dir)
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek i_assert((dir->left != NULL && dir->right != NULL) ||
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek i_warning("Ring is synced, continuing delayed requests "
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek "(syncing took %d secs, hosts_hash=%u)",
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek if (host != director_get_preferred_right_host(dir)) {
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek /* try to reconnect to preferred host later */
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek director_connection_set_synced(dir->left, TRUE);
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek director_connection_set_synced(dir->right, TRUE);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallaghervoid director_sync_send(struct director *dir, struct director_host *host,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher unsigned int timestamp, unsigned int hosts_hash)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher dir->last_sync_sent_ring_change_counter = dir->ring_change_counter;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_connection_get_minor_version(dir->right) > 0) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* only minor_version>0 supports extra parameters */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher str_printfa(str, "\t%u\t%u\t%u", minor_version,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_connection_send(dir->right, str_c(str));
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* ping our connections in case either of them are hanging.
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek if they are, we want to know it fast. */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherbool director_resend_sync(struct director *dir)
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek if (!dir->ring_synced && dir->left != NULL && dir->right != NULL) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* send a new SYNC in case the previous one got dropped */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher dir->self_host->last_sync_timestamp = ioloop_time;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_sync_send(dir, dir->self_host, dir->sync_seq,
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozekstatic void director_sync_timeout(struct director *dir)
a7797068c4deb6ce2bdbcda27c45ff1bbb4a8e78Jakub Hrozek i_error("Ring SYNC appears to have got lost, resending");
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozekvoid director_set_ring_unsynced(struct director *dir)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher dir->to_sync = timeout_add(DIRECTOR_SYNC_TIMEOUT_MSECS,
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher /* we're synced again when we receive this SYNC back */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher if (dir->right == NULL && dir->left == NULL) {
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek /* we're alone. if we're already synced,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher don't become unsynced. */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher (dir->left == NULL && dir->right == NULL));
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozek dir_debug("Ring is desynced (seq=%u, sending SYNC to %s)",
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozek dir->sync_seq, dir->right == NULL ? "(nowhere)" :
dd3ba5c5b7d2a9d109963ae9e6c94fff34872221Stephen Gallagher /* send PINGs to our connections more rapidly until we've synced again.
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek if the connection has actually died, we don't need to wait (and
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher delay requests) for as long to detect it */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_connection_set_synced(dir->left, FALSE);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_connection_set_synced(dir->right, FALSE);
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek director_sync_send(dir, dir->self_host, dir->sync_seq,
0172959f117b545c8a6b1893f5f56818d82dd624Jakub Hrozekvoid director_notify_ring_added(struct director_host *added_host,
0172959f117b545c8a6b1893f5f56818d82dd624Jakub Hrozek const char *cmd;
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozek net_ip2addr(&added_host->ip), added_host->port);
0172959f117b545c8a6b1893f5f56818d82dd624Jakub Hrozek director_update_send(added_host->dir, src, cmd);
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozekstatic void director_delayed_dir_remove_timeout(struct director *dir)
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozek unsigned int i, count;
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozek for (i = 0; i < count; ) {
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozekvoid director_ring_remove(struct director_host *removed_host,
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozek struct director_connection *const *conns, *conn;
0172959f117b545c8a6b1893f5f56818d82dd624Jakub Hrozek unsigned int i, count;
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozek const char *cmd;
0172959f117b545c8a6b1893f5f56818d82dd624Jakub Hrozek /* others will just disconnect us */
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozek /* mark the host as removed and fully remove it later. this delay is
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozek needed, because the removal may trigger director reconnections,
b20208b80e99abb79c00d5ec526caa9465859c52Jakub Hrozek which may send the director back and we don't want to re-add it */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_delayed_dir_remove_timeout, dir);
dd3ba5c5b7d2a9d109963ae9e6c94fff34872221Stephen Gallagher /* disconnect any connections to the host */
dd3ba5c5b7d2a9d109963ae9e6c94fff34872221Stephen Gallagher conns = array_get(&dir->connections, &count);
dd3ba5c5b7d2a9d109963ae9e6c94fff34872221Stephen Gallagher for (i = 0; i < count; ) {
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek if (director_connection_get_host(conn) != removed_host)
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek director_connection_deinit(&conn, "Removing from ring");
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher conns = array_get(&dir->connections, &count);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher cmd = t_strdup_printf("DIRECTOR-REMOVE\t%s\t%u\n",
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozekdirector_send_host(struct director *dir, struct director_host *src,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher const char *host_tag = mail_host_get_tag(host);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher net_ip2addr(&orig_src->ip), orig_src->port,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher net_ip2addr(&host->ip), host->vhost_count);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher if (dir->ring_min_version >= DIRECTOR_VERSION_TAGS_V2) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher dir->ring_min_version < DIRECTOR_VERSION_TAGS_V2) {
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek if (dir->ring_min_version < DIRECTOR_VERSION_TAGS) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher i_error("Ring has directors that don't support tags - removing host %s with tag '%s'",
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher i_error("Ring has directors that support mixed versions of tags - removing host %s with tag '%s'",
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek if (dir->ring_min_version >= DIRECTOR_VERSION_UPDOWN) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher str_printfa(str, "\t%c%ld\t", host->down ? 'D' : 'U',
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* add any further version checks here - these directors ignore
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher any extra unknown arguments */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher str_append_tabescaped(str, host->hostname);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_update_send(dir, src, str_c(str));
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallaghervoid director_resend_hosts(struct director *dir)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher array_foreach(mail_hosts_get(dir->mail_hosts), hostp)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_send_host(dir, dir->self_host, NULL, *hostp);
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozekvoid director_update_host(struct director *dir, struct director_host *src,
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek /* update state in case this is the first mail host being added */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher dir_debug("Updating host %s vhost_count=%u "
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher "down=%d last_updown_change=%ld (hosts_hash=%u)",
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek net_ip2addr(&host->ip), host->vhost_count, host->down ? 1 : 0,
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek /* mark the host desynced until ring is synced again. except if we're
524ceecc11f3d458eb3c1cf1489c3ff6ccb22226Jakub Hrozek alone in the ring that never happens. */
e5c33e0bd03a2deb8e5011deeb3ae93f960910eeJakub Hrozekvoid director_remove_host(struct director *dir, struct director_host *src,
524ceecc11f3d458eb3c1cf1489c3ff6ccb22226Jakub Hrozek director_update_send(dir, src, t_strdup_printf(
524ceecc11f3d458eb3c1cf1489c3ff6ccb22226Jakub Hrozek "HOST-REMOVE\t%s\t%u\t%u\t%s\n",
524ceecc11f3d458eb3c1cf1489c3ff6ccb22226Jakub Hrozekvoid director_flush_host(struct director *dir, struct director_host *src,
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek director_update_send(dir, src, t_strdup_printf(
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek "HOST-FLUSH\t%s\t%u\t%u\t%s\n",
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozekvoid director_update_user(struct director *dir, struct director_host *src,
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek director_update_send(dir, src, t_strdup_printf("USER\t%u\t%s\n",
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek user->username_hash, net_ip2addr(&user->host->ip)));
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozekvoid director_update_user_weak(struct director *dir, struct director_host *src,
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek const char *cmd;
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek cmd = t_strdup_printf("USER-WEAK\t%s\t%u\t%u\t%u\t%s\n",
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher user->username_hash, net_ip2addr(&user->host->ip));
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher if (src != dir->self_host && dir->left != NULL && dir->right != NULL &&
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_connection_get_host(dir->left) ==
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_connection_get_host(dir->right)) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* only two directors in this ring and we're forwarding
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher USER-WEAK from one director back to itself via another
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek so it sees we've received it. we can't use
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek director_update_send() for this, because it doesn't send
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek data back to the source. */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_connection_send(dir->right, cmd);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherdirector_user_kill_finish_delayed_to(struct director_user_kill_finish_ctx *ctx)
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek i_assert(ctx->user->kill_state == USER_KILL_STATE_DELAY);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher ctx->user->kill_state = USER_KILL_STATE_NONE;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher ctx->dir->state_change_callback(ctx->dir);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagherdirector_user_kill_finish_delayed(struct director *dir, struct user *user)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher struct director_user_kill_finish_ctx *ctx;
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek ctx = i_new(struct director_user_kill_finish_ctx, 1);
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher /* wait for a while for the kills to finish in the backend server,
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher so there are no longer any processes running for the user before we
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher start letting new in connections to the new server. */
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher user->to_move = timeout_add(dir->set->director_user_kick_delay * 1000,
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher director_user_kill_finish_delayed_to, ctx);
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagherdirector_finish_user_kill(struct director *dir, struct user *user, bool self)
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher i_assert(user->kill_state != USER_KILL_STATE_DELAY);
dd3ba5c5b7d2a9d109963ae9e6c94fff34872221Stephen Gallagher /* we're alone */
dd3ba5c5b7d2a9d109963ae9e6c94fff34872221Stephen Gallagher user->kill_state == USER_KILL_STATE_KILLING_NOTIFY_RECEIVED) {
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek director_connection_send(dir->right, t_strdup_printf(
dd3ba5c5b7d2a9d109963ae9e6c94fff34872221Stephen Gallagher "USER-KILLED\t%u\n", user->username_hash));
dd3ba5c5b7d2a9d109963ae9e6c94fff34872221Stephen Gallagher user->kill_state = USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE;
7797e361155f7ce937085fd98e360469d7baf1b6Jakub Hrozek i_assert(user->kill_state == USER_KILL_STATE_KILLING);
dd3ba5c5b7d2a9d109963ae9e6c94fff34872221Stephen Gallagher user->kill_state = USER_KILL_STATE_KILLED_WAITING_FOR_NOTIFY;
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozekstatic void director_user_kill_fail_throttled(unsigned int new_events_count,
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek i_error("Failed to kill %u users' connections", new_events_count);
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozekstatic void director_kill_user_callback(enum ipc_client_cmd_state state,
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek /* this is an asynchronous notification about user being killed.
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek there are no guarantees about what might have happened to the user
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek in the mean time. */
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek /* shouldn't get here. the command reply isn't finished yet. */
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek if (log_throttle_accept(user_kill_fail_throttle)) {
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek i_error("Failed to kill user %u connections: %s",
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek /* we can't really do anything but continue anyway */
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek user = user_directory_lookup(ctx->dir->users, ctx->username_hash);
a7797068c4deb6ce2bdbcda27c45ff1bbb4a8e78Jakub Hrozek /* user was already freed - ignore */
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek } else if (user->kill_state == USER_KILL_STATE_KILLING ||
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek user->kill_state == USER_KILL_STATE_KILLING_NOTIFY_RECEIVED) {
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek /* we were still waiting for the kill notification */
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek director_finish_user_kill(ctx->dir, user, ctx->self);
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek /* we don't currently want to kill the user */
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozekstatic void director_user_move_throttled(unsigned int new_events_count,
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek i_error("%u users' move timed out, their state may now be inconsistent",
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozekstatic void director_user_move_timeout(struct user *user)
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek i_assert(user->kill_state != USER_KILL_STATE_DELAY);
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek "its state may now be inconsistent", user->username_hash);
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozekvoid director_move_user(struct director *dir, struct director_host *src,
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek unsigned int username_hash, struct mail_host *host)
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek const char *cmd;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher /* 1. move this user's host, and set its "killing" flag to delay all of
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher its future connections until all directors have killed the
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher connections and notified us about it.
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek 2. tell the other directors about the move
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher 3. once user kill callback is called, tell the other directors
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher with USER-KILLED that we're done killing the user.
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher 4. when some director gets a duplicate USER-KILLED, it's
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher responsible for notifying all directors that user is completely
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher 5. after receiving USER-KILLED-EVERYWHERE notification,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher new connections are again allowed for the user.
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher user = user_directory_lookup(dir->users, username_hash);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher user = user_directory_add(dir->users, username_hash,
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek /* user is already in this host */
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher if (user->kill_state == USER_KILL_STATE_NONE) {
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek user->to_move = timeout_add(DIRECTOR_USER_MOVE_TIMEOUT_MSECS,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher user->kill_state = USER_KILL_STATE_KILLING;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher cmd = t_strdup_printf("proxy\t*\tKICK-DIRECTOR-HASH\t%u",
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_update_send(dir, src, t_strdup_printf(
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher "USER-MOVE\t%s\t%u\t%u\t%u\t%s\n",
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher user->username_hash, net_ip2addr(&user->host->ip)));
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozekdirector_kick_user_callback(enum ipc_client_cmd_state state ATTR_UNUSED,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallaghervoid director_kick_user(struct director *dir, struct director_host *src,
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek struct director_host *orig_src, const char *username)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher cmd = t_strdup_printf("proxy\t*\tKICK\t%s", username);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_kick_user_callback, (void *)NULL);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher cmd = t_strdup_printf("USER-KICK\t%s\t%u\t%u\t%s\n",
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher director_update_send_version(dir, src, DIRECTOR_VERSION_USER_KICK, cmd);
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozekvoid director_kick_user_hash(struct director *dir, struct director_host *src,
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek const char *cmd;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher cmd = t_strdup_printf("proxy\t*\tKICK-DIRECTOR-HASH\t%u\t%s",
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_kick_user_callback, (void *)NULL);
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek cmd = t_strdup_printf("USER-KICK-HASH\t%s\t%u\t%u\t%u\t%s\n",
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher director_update_send_version(dir, src, DIRECTOR_VERSION_USER_KICK, cmd);
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozekvoid director_user_killed(struct director *dir, unsigned int username_hash)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher user = user_directory_lookup(dir->users, username_hash);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher user->kill_state = USER_KILL_STATE_KILLING_NOTIFY_RECEIVED;
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek case USER_KILL_STATE_KILLED_WAITING_FOR_NOTIFY:
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher case USER_KILL_STATE_KILLING_NOTIFY_RECEIVED:
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek case USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE:
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_user_killed_everywhere(dir, dir->self_host,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallaghervoid director_user_killed_everywhere(struct director *dir,
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek user = user_directory_lookup(dir->users, username_hash);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher user->kill_state != USER_KILL_STATE_KILLED_WAITING_FOR_EVERYONE)
2cb6f28b3a12bb714bf14494d31eb6b6fff64b8bJakub Hrozek director_update_send(dir, src, t_strdup_printf(
2cb6f28b3a12bb714bf14494d31eb6b6fff64b8bJakub Hrozek "USER-KILLED-EVERYWHERE\t%s\t%u\t%u\t%u\n",
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozek net_ip2addr(&orig_src->ip), orig_src->port, orig_src->last_seq,
2cb6f28b3a12bb714bf14494d31eb6b6fff64b8bJakub Hrozekstatic void director_state_callback_timeout(struct director *dir)
2cb6f28b3a12bb714bf14494d31eb6b6fff64b8bJakub Hrozekvoid director_set_state_changed(struct director *dir)
2cb6f28b3a12bb714bf14494d31eb6b6fff64b8bJakub Hrozek /* we may get called to here from various places. use a timeout to
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek make sure the state callback is called with a clean state. */
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek timeout_add(0, director_state_callback_timeout, dir);
6463ed1dcdd45416468b3fa178bd856b5a9ed2c3Jakub Hrozekvoid director_update_send(struct director *dir, struct director_host *src,
65a9065538fd85e6ead925d344e6b421900eb8c2Jakub Hrozek const char *cmd)
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek director_update_send_version(dir, src, 0, cmd);
65a9065538fd85e6ead925d344e6b421900eb8c2Jakub Hrozekvoid director_update_send_version(struct director *dir,
5ee3fba0bd812242a1ffe189f5ddf2689e6e6811Jakub Hrozek if (director_connection_get_host(*connp) != src &&
65a9065538fd85e6ead925d344e6b421900eb8c2Jakub Hrozek director_connection_get_minor_version(*connp) >= min_version)
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozekdirector_init(const struct director_settings *set,
a23014d69b56cbdf48ad05229c334648b5309d8fJakub Hrozek const struct ip_addr *listen_ip, in_port_t listen_port,
65a9065538fd85e6ead925d344e6b421900eb8c2Jakub Hrozek dir->users = user_directory_init(set->director_user_expire,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher dir->mail_hosts = mail_hosts_init(set->director_consistent_hashing);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher dir->ipc_proxy = ipc_client_init(DIRECTOR_IPC_PROXY_PATH);
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher dir->ring_min_version = DIRECTOR_VERSION_MINOR;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallaghervoid director_deinit(struct director **_dir)
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher struct director_host *const *hostp, *host;
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher struct director_connection *conn, *const *connp;
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher while (array_count(&dir->connections) > 0) {
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher director_connection_deinit(&conn, "Shutting down");
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher mail_hosts_deinit(&dir->orig_config_hosts);
2ea6196484055397cc4bc011c5960f790431fa9dStephen Gallagher i_debug("%s", t_strdup_vprintf(fmt, args));
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher log_throttle_init(&director_log_throttle_settings,
52261fe16203dec6e6f69177c6d0a810b47d073fStephen Gallagher log_throttle_init(&director_log_throttle_settings,