maildir-sync.c revision b142deb9a831c89b1bb9129ada655f3e56b9d4cc
bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (C) 2004 Timo Sirainen */
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody Here's a description of how we handle Maildir synchronization and
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody it's problems:
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody We want to be as efficient as we can. The most efficient way to
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody check if changes have occurred is to stat() the new/ and cur/
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody directories and uidlist file - if their mtimes haven't changed,
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody there's no changes and we don't need to do anything.
87b4215acbf020aa5b8dea686b23fc664140cda0Stephan Bosch Problem 1: Multiple changes can happen within a single second -
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody nothing guarantees that once we synced it, someone else didn't just
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody then make a modification. Such modifications wouldn't get noticed
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody until a new modification occurred later.
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody Problem 2: Syncing cur/ directory is much more costly than syncing
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody new/. Moving mails from new/ to cur/ will always change mtime of
87b4215acbf020aa5b8dea686b23fc664140cda0Stephan Bosch cur/ causing us to sync it as well.
87b4215acbf020aa5b8dea686b23fc664140cda0Stephan Bosch Problem 3: We may not be able to move mail from new/ to cur/
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody because we're out of quota, or simply because we're accessing a
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody read-only mailbox.
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody MAILDIR_SYNC_SECS
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody -----------------
191153d1a5b0eb0c129139570e3aa5212f28d2acJosef 'Jeff' Sipek Several checks below use MAILDIR_SYNC_SECS, which should be maximum
62461eb609e1d852e027cf4e07d30d51288678a2Aki Tuomi clock drift between all computers accessing the maildir (eg. via
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody NFS), rounded up to next second. Our default is 1 second, since
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody everyone should be using NTP.
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody Note that setting it to 0 works only if there's only one computer
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody accessing the maildir. It's practically impossible to make two
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody clocks _exactly_ synchronized.
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody It might be possible to only use file server's clock by looking at
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody the atime field, but I don't know how well that would actually work.
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody -------------
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody We have dirty_cur_time variable which is set to cur/ directory's
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody mtime when it's >= time() - MAILDIR_SYNC_SECS and we _think_ we have
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody synchronized the directory.
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody When dirty_cur_time is non-zero, we don't synchronize the cur/
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody directory until
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody a) cur/'s mtime changes
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody b) opening a mail fails with ENOENT
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody c) time() > dirty_cur_time + MAILDIR_SYNC_SECS
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody This allows us to modify the maildir multiple times without having
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody to sync it at every change. The sync will eventually be done to
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody make sure we didn't miss any external changes.
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody The dirty_cur_time is set when:
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody - we change message flags
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody - we expunge messages
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody - we move mail from new/ to cur/
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody - we sync cur/ directory and it's mtime is >= time() - MAILDIR_SYNC_SECS
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody It's unset when we do the final syncing, ie. when mtime is
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody older than time() - MAILDIR_SYNC_SECS.
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody new directory
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody -------------
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody If new/'s mtime is >= time() - MAILDIR_SYNC_SECS, always synchronize
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody it. dirty_cur_time-like feature might save us a few syncs, but
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody that might break a client which saves a mail in one connection and
d6bbf85809664a810726b5c711c7213874d8df57Phil Carmody tries to fetch it in another one. new/ directory is almost always
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody empty, so syncing it should be very fast anyway. Actually this can
d6bbf85809664a810726b5c711c7213874d8df57Phil Carmody still happen if we sync only new/ dir while another client is also
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody moving mails from it to cur/ - it takes us a while to see them.
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody That's pretty unlikely to happen however, and only way to fix it
629e96c5e2d4724b713ca7d62e59ed033107edcdPhil Carmody would be to always synchronize cur/ after new/.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch Normally we move all mails from new/ to cur/ whenever we sync it. If
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch it's not possible for some reason, we mark the mail with "probably
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch exists in new/ directory" flag.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch If rename() still fails because of ENOSPC or EDQUOT, we still save
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch the flag changes in index with dirty-flag on. When moving the mail
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch to cur/ directory, or when we notice it's already moved there, we
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch apply the flag changes to the filename, rename it and remove the
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch dirty flag. If there's dirty flags, this should be tried every time
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch after expunge or when closing the mailbox.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch This file contains UID <-> filename mappings. It's updated only when
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch new mail arrives, so it may contain filenames that have already been
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch deleted. Updating is done by getting uidlist.lock file, writing the
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch whole uidlist into it and rename()ing it over the old uidlist. This
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch means there's no need to lock the file for reading.
191153d1a5b0eb0c129139570e3aa5212f28d2acJosef 'Jeff' Sipek Whenever uidlist is rewritten, it's mtime must be larger than the old
62461eb609e1d852e027cf4e07d30d51288678a2Aki Tuomi one's. Use utime() before rename() if needed. Note that inode checking
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch wouldn't have been sufficient as inode numbers can be reused.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch This file is usually read the first time you need to know filename for
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch given UID. After that it's not re-read unless new mails come that we
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch don't know about.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch broken clients
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch --------------
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch Originally the middle identifier in Maildir filename was specified
191153d1a5b0eb0c129139570e3aa5212f28d2acJosef 'Jeff' Sipek only as <process id>_<delivery counter>. That however created a
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch problem with randomized PIDs which made it possible that the same
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch PID was reused within one second.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch So if within one second a mail was delivered, MUA moved it to cur/
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch and another mail was delivered by a new process using same PID as
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch the first one, we likely ended up overwriting the first mail when
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch the second mail was moved over it.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch Nowadays everyone should be giving a bit more specific identifier,
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch for example include microseconds in it which Dovecot does.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch There's a simple way to prevent this from happening in some cases:
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch Don't move the mail from new/ to cur/ if it's mtime is >= time() -
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch MAILDIR_SYNC_SECS. The second delivery's link() call then fails
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch because the file is already in new/, and it will then use a
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch different filename. There's a few problems with this however:
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch - it requires extra stat() call which is unneeded extra I/O
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch - another MUA might still move the mail to cur/
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch - if first file's flags are modified by either Dovecot or another
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch MUA, it's moved to cur/ (you _could_ just do the dirty-flagging
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch but that'd be ugly)
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch Because this is useful only for very few people and it requires
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch extra I/O, I decided not to implement this. It should be however
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch quite easy to do since we need to be able to deal with files in new/
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch It's also possible to never accidentally overwrite a mail by using
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch link() + unlink() rather than rename(). This however isn't very
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch good idea as it introduces potential race conditions when multiple
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch clients are accessing the mailbox:
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch Trying to move the same mail from new/ to cur/ at the same time:
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch a) Client 1 uses slightly different filename than client 2,
d6bbf85809664a810726b5c711c7213874d8df57Phil Carmody for example one sets read-flag on but the other doesn't.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch You have the same mail duplicated now.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch b) Client 3 sees the mail between Client 1's and 2's link() calls
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch and changes it's flag. You have the same mail duplicated now.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch And it gets worse when they're unlink()ing in cur/ directory:
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch c) Client 1 changes mails's flag and client 2 changes it back
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch between 1's link() and unlink(). The mail is now expunged.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch d) If you try to deal with the duplicates by unlink()ing another
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch one of them, you might end up unlinking both of them.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch So, what should we do then if we notice a duplicate? First of all,
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch it might not be a duplicate at all, readdir() might have just
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch returned it twice because it was just renamed. What we should do is
87b4215acbf020aa5b8dea686b23fc664140cda0Stephan Bosch create a completely new base name for it and rename() it to that.
87b4215acbf020aa5b8dea686b23fc664140cda0Stephan Bosch If the call fails with ENOENT, it only means that it wasn't a
87b4215acbf020aa5b8dea686b23fc664140cda0Stephan Bosch duplicate after all.
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch/* When rename()ing many files from new/ to cur/, it's possible that next
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch readdir() skips some files. we don't of course wish to lose them, so we
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch go and rescan the new/ directory again from beginning until no files are
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch left. This value is just an optimization to avoid checking the directory
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch twice unneededly. usually only NFS is the problem case. 1 is the safest
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch bet here, but I guess 5 will do just fine too. */
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch struct maildir_uidlist_sync_ctx *uidlist_sync_ctx;
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch struct maildir_index_sync_context *index_sync_ctx;
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch struct maildir_keywords_sync_ctx *keywords_sync_ctx;
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Bosch array_t ARRAY_DEFINE(sync_recs, struct mail_index_sync_rec);
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Boschmaildir_sync_get_keywords_sync_ctx(struct maildir_index_sync_context *ctx)
4a272f5b8bacf2852c2e53f3aa8e899e0d5c604fStephan Boschint maildir_filename_get_flags(struct maildir_keywords_sync_ctx *ctx,
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody const char *info;
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody if (info == NULL || info[1] != '2' || info[2] != MAILDIR_FLAGS_SEP)
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody for (info += 3; *info != '\0' && *info != MAILDIR_FLAGS_SEP; info++) {
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody /* unknown keyword. */
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody /* unknown flag - ignore */
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmodymaildir_filename_append_keywords(struct maildir_keywords_sync_ctx *ctx,
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody const unsigned int *indexes;
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody unsigned int i, count;
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody for (i = 0; i < count; i++) {
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody chr = maildir_keywords_idx_char(ctx, indexes[i]);
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmodyconst char *maildir_filename_set_flags(struct maildir_keywords_sync_ctx *ctx,
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody /* remove the old :info from file name, and get the old flags */
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody if (info != NULL && strrchr(fname, '/') > info)
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody if (info[1] == '2' && info[2] == MAILDIR_FLAGS_SEP)
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody /* insert the new flags between old flags. flags must be sorted by
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody their ASCII code. unknown flags are kept. */
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody /* skip all known flags */
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody nextflag = *oldflags == '\0' || *oldflags == MAILDIR_FLAGS_SEP ?
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody if ((flags_left & MAIL_DRAFT) && nextflag > 'D') {
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody if ((flags_left & MAIL_FLAGGED) && nextflag > 'F') {
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody if ((flags_left & MAIL_ANSWERED) && nextflag > 'R') {
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody if ((flags_left & MAIL_SEEN) && nextflag > 'S') {
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody if ((flags_left & MAIL_DELETED) && nextflag > 'T') {
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody if (keywords != NULL && array_is_created(keywords) &&
d3a430481a1e072fb55fee8803c16d075bf7bd91Aki Tuomi if (*oldflags == '\0' || *oldflags == MAILDIR_FLAGS_SEP)
d3a430481a1e072fb55fee8803c16d075bf7bd91Aki Tuomi /* another flagset, we don't know about these, just keep them */
d3a430481a1e072fb55fee8803c16d075bf7bd91Aki Tuomistatic int maildir_expunge(struct maildir_mailbox *mbox, const char *path,
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmody mail_storage_set_critical(STORAGE(mbox->storage),
42826d96c8d0bba9eddc85b01bf70d7db571ae7fPhil Carmodystatic int maildir_sync_flags(struct maildir_mailbox *mbox, const char *path,
void *context)
unsigned int i, count;
fname++;
for (i = 0; i < count; i++) {
i_unreached();
unsigned int i, count;
for (i = 0; i < count; i++) {
i_unreached();
if (expunged) {
} else if (flag_changed) {
for (i = count; i > 0; i--) {
&count);
if (count == 0) {
int ret;
if (ret <= 0)
return ret;
} while (ret > 0);
return ret;
static struct maildir_sync_context *
return ctx;
int ret = 0;
t_push();
t_pop();
t_pop();
t_pop();
return ret;
const char *dir;
unsigned int moves = 0;
bool move_new;
t_push();
if (ret == 0) {
if (new_dir)
if (ret < 0)
flags = 0;
if (move_new) {
moves++;
moves++;
} else if (new_dir) {
if (ret <= 0) {
if (ret < 0)
t_pop();
cur_mtime : 0;
bool failed)
if (ret < 0)
else if (seq != 0) {
if (ret < 0)
if (ret < 0)
return ret;
bool partial)
const char *filename;
int ret = 0;
seq = 0;
unsigned int, MAILDIR_MAX_KEYWORDS);
unsigned int, MAILDIR_MAX_KEYWORDS);
seq++;
if ((uflags &
MAILDIR_UIDLIST_REC_FLAG_NONSYNCED) != 0) {
if ((uflags &
MAILDIR_UIDLIST_REC_FLAG_RACING) != 0) {
filename);
seq--;
flags);
goto __again;
if ((uflags &
MAILDIR_UIDLIST_REC_FLAG_NONSYNCED) != 0) {
seq--;
seq--;
flags);
if (!partial) {
if (uid_validity == 0) {
} else if (uid_validity == 0) {
bool sync_last_commit)
int ret;
if (sync_last_commit) {
} else if (!forced) {
if (ret <= 0) {
return ret;
if (ret < 0)
if (cur_changed) {
ret < 0) < 0)
if (ret < 0)
if (ret == 0)
int ret;
int ret;
struct mailbox_sync_context *
int ret = 0;
ioloop_time) {
if (ret == 0) {
int ret;
t_push();
t_pop();