maildir-sync.c revision bde6382cf65fba6165dc3603f5419e194d87f404
5f5870385cff47efd2f58e7892f251cf13761528Timo Sirainen/* Copyright (C) 2004 Timo Sirainen */
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Here's a description of how we handle Maildir synchronization and
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen it's problems:
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen We want to be as efficient as we can. The most efficient way to
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen check if changes have occured is to stat() the new/ and cur/
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen directories and uidlist file - if their mtimes haven't changed,
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen there's no changes and we don't need to do anything.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Problem 1: Multiple changes can happen within a single second -
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen nothing guarantees that once we synced it, someone else didn't just
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen then make a modification. Such modifications wouldn't get noticed
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen until a new modification occured later.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Problem 2: Syncing cur/ directory is much more costly than syncing
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen new/. Moving mails from new/ to cur/ will always change mtime of
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen cur/ causing us to sync it as well.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Problem 3: We may not be able to move mail from new/ to cur/
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen because we're out of quota, or simply because we're accessing a
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen read-only mailbox.
a10ed8c47534b4c6b6bf2711ccfe577e720a47b4Timo Sirainen MAILDIR_SYNC_SECS
a10ed8c47534b4c6b6bf2711ccfe577e720a47b4Timo Sirainen -----------------
2303ad68175883aaebd1f3b18e69593c2422c7bbTimo Sirainen Several checks below use MAILDIR_SYNC_SECS, which should be maximum
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen clock drift between all computers accessing the maildir (eg. via
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen NFS), rounded up to next second. Our default is 1 second, since
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen everyone should be using NTP.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Note that setting it to 0 works only if there's only one computer
2303ad68175883aaebd1f3b18e69593c2422c7bbTimo Sirainen accessing the maildir. It's practically impossible to make two
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen clocks _exactly_ synchronized.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen It might be possible to only use file server's clock by looking at
67d86acc16b837a01d0967b65fc9a81ccf54ef0bTimo Sirainen the atime field, but I don't know how well that would actually work.
2303ad68175883aaebd1f3b18e69593c2422c7bbTimo Sirainen cur directory
2303ad68175883aaebd1f3b18e69593c2422c7bbTimo Sirainen -------------
67d86acc16b837a01d0967b65fc9a81ccf54ef0bTimo Sirainen We have dirty_cur_time variable which is set to cur/ directory's
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen mtime when it's >= time() - MAILDIR_SYNC_SECS and we _think_ we have
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen synchronized the directory.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen When dirty_cur_time is non-zero, we don't synchronize the cur/
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen directory until
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen a) cur/'s mtime changes
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen b) opening a mail fails with ENOENT
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen c) time() > dirty_cur_time + MAILDIR_SYNC_SECS
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen This allows us to modify the maildir multiple times without having
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen to sync it at every change. The sync will eventually be done to
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen make sure we didn't miss any external changes.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen The dirty_cur_time is set when:
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen - we change message flags
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen - we expunge messages
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen - we move mail from new/ to cur/
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen - we sync cur/ directory and it's mtime is >= time() - MAILDIR_SYNC_SECS
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen It's unset when we do the final syncing, ie. when mtime is
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen older than time() - MAILDIR_SYNC_SECS.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen new directory
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen -------------
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen If new/'s mtime is >= time() - MAILDIR_SYNC_SECS, always synchronize
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen it. dirty_cur_time-like feature might save us a few syncs, but
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen that might break a client which saves a mail in one connection and
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen tries to fetch it in another one. new/ directory is almost always
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen empty, so syncing it should be very fast anyway. Actually this can
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen still happen if we sync only new/ dir while another client is also
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen moving mails from it to cur/ - it takes us a while to see them.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen That's pretty unlikely to happen however, and only way to fix it
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen would be to always synchronize cur/ after new/.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Normally we move all mails from new/ to cur/ whenever we sync it. If
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen it's not possible for some reason, we mark the mail with "probably
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen exists in new/ directory" flag.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen If rename() still fails because of ENOSPC or EDQUOT, we still save
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen the flag changes in index with dirty-flag on. When moving the mail
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen to cur/ directory, or when we notice it's already moved there, we
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen apply the flag changes to the filename, rename it and remove the
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen dirty flag. If there's dirty flags, this should be tried every time
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen after expunge or when closing the mailbox.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen This file contains UID <-> filename mappings. It's updated only when
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen new mail arrives, so it may contain filenames that have already been
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen deleted. Updating is done by getting uidlist.lock file, writing the
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen whole uidlist into it and rename()ing it over the old uidlist. This
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen means there's no need to lock the file for reading.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Whenever uidlist is rewritten, it's mtime must be larger than the old
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen one's. Use utime() before rename() if needed. Note that inode checking
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen wouldn't have been sufficient as inode numbers can be reused.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen This file is usually read the first time you need to know filename for
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen given UID. After that it's not re-read unless new mails come that we
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen don't know about.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen broken clients
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen --------------
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Originally the middle identifier in Maildir filename was specified
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen only as <process id>_<delivery counter>. That however created a
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen problem with randomized PIDs which made it possible that the same
67d86acc16b837a01d0967b65fc9a81ccf54ef0bTimo Sirainen PID was reused within one second.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen So if within one second a mail was delivered, MUA moved it to cur/
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen and another mail was delivered by a new process using same PID as
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen the first one, we likely ended up overwriting the first mail when
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen the second mail was moved over it.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Nowadays everyone should be giving a bit more specific identifier,
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen for example include microseconds in it which Dovecot does.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen There's a simple way to prevent this from happening in some cases:
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Don't move the mail from new/ to cur/ if it's mtime is >= time() -
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen MAILDIR_SYNC_SECS. The second delivery's link() call then fails
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen because the file is already in new/, and it will then use a
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen different filename. There's a few problems with this however:
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen - it requires extra stat() call which is unneeded extra I/O
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen - another MUA might still move the mail to cur/
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen - if first file's flags are modified by either Dovecot or another
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen MUA, it's moved to cur/ (you _could_ just do the dirty-flagging
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen but that'd be ugly)
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Because this is useful only for very few people and it requires
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen extra I/O, I decided not to implement this. It should be however
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen quite easy to do since we need to be able to deal with files in new/
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen It's also possible to never accidentally overwrite a mail by using
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen link() + unlink() rather than rename(). This however isn't very
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen good idea as it introduces potential race conditions when multiple
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen clients are accessing the mailbox:
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen Trying to move the same mail from new/ to cur/ at the same time:
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen a) Client 1 uses slightly different filename than client 2,
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen for example one sets read-flag on but the other doesn't.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen You have the same mail duplicated now.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen b) Client 3 sees the mail between Client 1's and 2's link() calls
67d86acc16b837a01d0967b65fc9a81ccf54ef0bTimo Sirainen and changes it's flag. You have the same mail duplicated now.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen And it gets worse when they're unlink()ing in cur/ directory:
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen c) Client 1 changes mails's flag and client 2 changes it back
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen between 1's link() and unlink(). The mail is now expunged.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen d) If you try to deal with the duplicates by unlink()ing another
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen one of them, you might end up unlinking both of them.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen So, what should we do then if we notice a duplicate? First of all,
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen it might not be a duplicate at all, readdir() might have just
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen returned it twice because it was just renamed. What we should do is
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen create a completely new base name for it and rename() it to that.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen If the call fails with ENOENT, it only means that it wasn't a
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen duplicate after all.
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen struct maildir_uidlist_sync_ctx *uidlist_sync_ctx;
0beef9bf818accfb629a92ef53ff0f6a15005941Timo Sirainenstatic int maildir_expunge(struct index_mailbox *ibox, const char *path,
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainenstatic int maildir_sync_flags(struct index_mailbox *ibox, const char *path,
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen struct maildir_index_sync_context *ctx = context;
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen (void)maildir_filename_get_flags(path, &flags, keywords);
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen mail_index_sync_flags_apply(&ctx->sync_rec, &flags8, keywords);
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen newpath = maildir_filename_set_flags(path, flags8, keywords);
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen mail_index_update_flags(ctx->trans, ctx->seq, MODIFY_ADD,
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainenstatic int maildir_sync_record(struct index_mailbox *ibox,
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen struct mail_index_sync_rec *sync_rec = &ctx->sync_rec;
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen /* make it go through sequences to avoid looping through huge
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen holes in UID range */
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen if (mail_index_lookup_uid_range(view, sync_rec->uid1,
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen if (mail_index_lookup_uid(view, seq, &uid) < 0)
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen if (maildir_file_do(ibox, uid, maildir_expunge,
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen if (mail_index_lookup_uid_range(view, sync_rec->uid1,
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen for (ctx->seq = seq1; ctx->seq <= seq2; ctx->seq++) {
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen if (mail_index_lookup_uid(view, ctx->seq, &uid) < 0)
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen /* if this flag was dirty, drop it */
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen if (mail_index_lookup(view, ctx->seq, &rec) < 0)
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainen if (rec->flags & MAIL_INDEX_MAIL_FLAG_DIRTY) {
acba68a69cdd6f3f00faa18cccef356d95048e46Timo Sirainenint maildir_sync_last_commit(struct index_mailbox *ibox)
bf333c7645b8ddb6eedd6834db2fd908888793e1Timo Sirainen ret = mail_index_sync_begin(ibox->index, &ctx.sync_ctx, &ctx.view,
if (ret == 0) {
return ret;
static struct maildir_sync_context *
return ctx;
const char *old_fname)
int ret = 0;
t_push();
t_pop();
return ret;
const char *dir;
if (ret == 0) {
if (new_dir)
if (ret < 0)
flags = 0;
if (move_new) {
} else if (new_dir) {
if (ret <= 0) {
if (ret < 0)
cur_mtime : 0;
const char *filename;
int ret;
seq = 0;
seq++;
if ((uflags &
MAILDIR_UIDLIST_REC_FLAG_RACING) != 0) {
seq--;
goto __again;
seq--;
INDEX_KEYWORDS_BYTE_COUNT) != 0) {
if (uid_validity == 0) {
} else if (uid_validity == 0) {
if (ret < 0)
else if (seq != 0) {
if (ret == 0) {
return ret;
if (cur_changed) {
return ret;
int ret;
return ret;
int ret;
return ret;
int ret;
if (ret < 0)