maildir-sync.c revision 9a099a65160987349f441c82ab0e38f32b747adb
45312f52ff3a3d4c137447be4c7556500c2f8bf2Timo Sirainen/* Copyright (C) 2004 Timo Sirainen */
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen Here's a description of how we handle Maildir synchronization and
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen it's problems:
d43bed2d458520fd01c28229ce2b178a4593a4a7Timo Sirainen We want to be as efficient as we can. The most efficient way to
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen check if changes have occurred is to stat() the new/ and cur/
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen directories and uidlist file - if their mtimes haven't changed,
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen there's no changes and we don't need to do anything.
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen Problem 1: Multiple changes can happen within a single second -
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen nothing guarantees that once we synced it, someone else didn't just
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen then make a modification. Such modifications wouldn't get noticed
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen until a new modification occurred later.
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen Problem 2: Syncing cur/ directory is much more costly than syncing
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen new/. Moving mails from new/ to cur/ will always change mtime of
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen cur/ causing us to sync it as well.
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen Problem 3: We may not be able to move mail from new/ to cur/
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen because we're out of quota, or simply because we're accessing a
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen read-only mailbox.
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen MAILDIR_SYNC_SECS
a04cd96888653891272a512f7735121193af7b35Timo Sirainen -----------------
f1e9611e93dcb3b745c1904029084fa81644e1b3Timo Sirainen Several checks below use MAILDIR_SYNC_SECS, which should be maximum
a04cd96888653891272a512f7735121193af7b35Timo Sirainen clock drift between all computers accessing the maildir (eg. via
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen NFS), rounded up to next second. Our default is 1 second, since
a04cd96888653891272a512f7735121193af7b35Timo Sirainen everyone should be using NTP.
828edf966ee46f65ec5d907f310cab270e7e1088Timo Sirainen Note that setting it to 0 works only if there's only one computer
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen accessing the maildir. It's practically impossible to make two
63946971b08cfb1eec698c28569e1c4aa237852dTimo Sirainen clocks _exactly_ synchronized.
e60a349c641bb2f4723e4a395a25f55531682d2bTimo Sirainen It might be possible to only use file server's clock by looking at
a04cd96888653891272a512f7735121193af7b35Timo Sirainen the atime field, but I don't know how well that would actually work.
63946971b08cfb1eec698c28569e1c4aa237852dTimo Sirainen cur directory
828edf966ee46f65ec5d907f310cab270e7e1088Timo Sirainen -------------
a04cd96888653891272a512f7735121193af7b35Timo Sirainen We have dirty_cur_time variable which is set to cur/ directory's
4525c4a8f8d1a6365e4469c0c8f46575400a9a67Timo Sirainen mtime when it's >= time() - MAILDIR_SYNC_SECS and we _think_ we have
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen synchronized the directory.
828edf966ee46f65ec5d907f310cab270e7e1088Timo Sirainen When dirty_cur_time is non-zero, we don't synchronize the cur/
a04cd96888653891272a512f7735121193af7b35Timo Sirainen directory until
a04cd96888653891272a512f7735121193af7b35Timo Sirainen a) cur/'s mtime changes
a04cd96888653891272a512f7735121193af7b35Timo Sirainen b) opening a mail fails with ENOENT
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen c) time() > dirty_cur_time + MAILDIR_SYNC_SECS
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen This allows us to modify the maildir multiple times without having
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen to sync it at every change. The sync will eventually be done to
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen make sure we didn't miss any external changes.
e60a349c641bb2f4723e4a395a25f55531682d2bTimo Sirainen The dirty_cur_time is set when:
a04cd96888653891272a512f7735121193af7b35Timo Sirainen - we change message flags
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen - we expunge messages
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen - we move mail from new/ to cur/
a04cd96888653891272a512f7735121193af7b35Timo Sirainen - we sync cur/ directory and it's mtime is >= time() - MAILDIR_SYNC_SECS
a04cd96888653891272a512f7735121193af7b35Timo Sirainen It's unset when we do the final syncing, ie. when mtime is
a04cd96888653891272a512f7735121193af7b35Timo Sirainen older than time() - MAILDIR_SYNC_SECS.
a04cd96888653891272a512f7735121193af7b35Timo Sirainen new directory
a04cd96888653891272a512f7735121193af7b35Timo Sirainen -------------
a04cd96888653891272a512f7735121193af7b35Timo Sirainen If new/'s mtime is >= time() - MAILDIR_SYNC_SECS, always synchronize
a04cd96888653891272a512f7735121193af7b35Timo Sirainen it. dirty_cur_time-like feature might save us a few syncs, but
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen that might break a client which saves a mail in one connection and
828edf966ee46f65ec5d907f310cab270e7e1088Timo Sirainen tries to fetch it in another one. new/ directory is almost always
828edf966ee46f65ec5d907f310cab270e7e1088Timo Sirainen empty, so syncing it should be very fast anyway. Actually this can
828edf966ee46f65ec5d907f310cab270e7e1088Timo Sirainen still happen if we sync only new/ dir while another client is also
828edf966ee46f65ec5d907f310cab270e7e1088Timo Sirainen moving mails from it to cur/ - it takes us a while to see them.
a04cd96888653891272a512f7735121193af7b35Timo Sirainen That's pretty unlikely to happen however, and only way to fix it
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen would be to always synchronize cur/ after new/.
e60a349c641bb2f4723e4a395a25f55531682d2bTimo Sirainen Normally we move all mails from new/ to cur/ whenever we sync it. If
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen it's not possible for some reason, we mark the mail with "probably
a04cd96888653891272a512f7735121193af7b35Timo Sirainen exists in new/ directory" flag.
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen If rename() still fails because of ENOSPC or EDQUOT, we still save
a04cd96888653891272a512f7735121193af7b35Timo Sirainen the flag changes in index with dirty-flag on. When moving the mail
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen to cur/ directory, or when we notice it's already moved there, we
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen apply the flag changes to the filename, rename it and remove the
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen dirty flag. If there's dirty flags, this should be tried every time
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen after expunge or when closing the mailbox.
97cfe59cd16ce624e58e8d9b6003d1e29d75b3d2Timo Sirainen This file contains UID <-> filename mappings. It's updated only when
97cfe59cd16ce624e58e8d9b6003d1e29d75b3d2Timo Sirainen new mail arrives, so it may contain filenames that have already been
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen deleted. Updating is done by getting uidlist.lock file, writing the
2f25f180578a4c280c9f5fda1cb9f22410084a1eTimo Sirainen whole uidlist into it and rename()ing it over the old uidlist. This
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen means there's no need to lock the file for reading.
e60a349c641bb2f4723e4a395a25f55531682d2bTimo Sirainen Whenever uidlist is rewritten, it's mtime must be larger than the old
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen one's. Use utime() before rename() if needed. Note that inode checking
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen wouldn't have been sufficient as inode numbers can be reused.
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen This file is usually read the first time you need to know filename for
e60a349c641bb2f4723e4a395a25f55531682d2bTimo Sirainen given UID. After that it's not re-read unless new mails come that we
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen don't know about.
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen broken clients
d43bed2d458520fd01c28229ce2b178a4593a4a7Timo Sirainen --------------
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen Originally the middle identifier in Maildir filename was specified
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen only as <process id>_<delivery counter>. That however created a
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen problem with randomized PIDs which made it possible that the same
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen PID was reused within one second.
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen So if within one second a mail was delivered, MUA moved it to cur/
fd4632d0060b2e9eef513b544ccff1e26d1fc222Timo Sirainen and another mail was delivered by a new process using same PID as
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen the first one, we likely ended up overwriting the first mail when
4525c4a8f8d1a6365e4469c0c8f46575400a9a67Timo Sirainen the second mail was moved over it.
55a7410569737197afb302b07b488973324b0cc5Timo Sirainen Nowadays everyone should be giving a bit more specific identifier,
d43bed2d458520fd01c28229ce2b178a4593a4a7Timo Sirainen for example include microseconds in it which Dovecot does.
d43bed2d458520fd01c28229ce2b178a4593a4a7Timo Sirainen There's a simple way to prevent this from happening in some cases:
d43bed2d458520fd01c28229ce2b178a4593a4a7Timo Sirainen Don't move the mail from new/ to cur/ if it's mtime is >= time() -
d43bed2d458520fd01c28229ce2b178a4593a4a7Timo Sirainen MAILDIR_SYNC_SECS. The second delivery's link() call then fails
d43bed2d458520fd01c28229ce2b178a4593a4a7Timo Sirainen because the file is already in new/, and it will then use a
d43bed2d458520fd01c28229ce2b178a4593a4a7Timo Sirainen different filename. There's a few problems with this however:
c4b376dd6e0c423006d7ac83a39253bcaf8e7c47Timo Sirainen - it requires extra stat() call which is unneeded extra I/O
97cfe59cd16ce624e58e8d9b6003d1e29d75b3d2Timo Sirainen - another MUA might still move the mail to cur/
97cfe59cd16ce624e58e8d9b6003d1e29d75b3d2Timo Sirainen - if first file's flags are modified by either Dovecot or another
97cfe59cd16ce624e58e8d9b6003d1e29d75b3d2Timo Sirainen MUA, it's moved to cur/ (you _could_ just do the dirty-flagging
d43bed2d458520fd01c28229ce2b178a4593a4a7Timo Sirainen but that'd be ugly)
#include "lib.h"
#include "ioloop.h"
#include "array.h"
#include "buffer.h"
#include "hash.h"
#include "str.h"
#include "maildir-storage.h"
#include "maildir-uidlist.h"
#include "maildir-keywords.h"
#include <stdio.h>
#include <stddef.h>
#include <unistd.h>
#include <dirent.h>
struct maildir_sync_context {
bool partial;
struct maildir_index_sync_context {
int dirty_state;
struct maildir_keywords_sync_ctx *
const char *info;
*flags_r = 0;
switch (*info) {
int idx;
if (idx < 0) {
const unsigned int *indexes;
unsigned int i, count;
char chr;
for (i = 0; i < count; i++) {
int nextflag;
oldflags++;
oldflags++;
void *context)
const char *newpath;
unsigned int i, count;
for (i = 0; i < count; i++) {
i_unreached();
unsigned int i, count;
for (i = 0; i < count; i++) {
i_unreached();
if (expunged) {
} else if (flag_changed) {
for (i = count; i > 0; i--) {
&count);
if (count == 0) {
int ret;
if (ret <= 0)
return ret;
} while (ret > 0);
return ret;
static struct maildir_sync_context *
return ctx;
const char *old_fname)
int ret = 0;
t_push();
t_pop();
return ret;
const char *dir;
unsigned int moves = 0;
bool move_new;
t_push();
if (ret == 0) {
if (new_dir)
if (ret < 0)
flags = 0;
if (move_new) {
moves++;
moves++;
} else if (new_dir) {
if (ret <= 0) {
if (ret < 0)
t_pop();
cur_mtime : 0;
struct maildir_index_sync_context *
return NULL;
return sync_ctx;
bool partial)
const char *filename;
int ret = 0;
seq = 0;
unsigned int, MAILDIR_MAX_KEYWORDS);
unsigned int, MAILDIR_MAX_KEYWORDS);
seq++;
if ((uflags &
MAILDIR_UIDLIST_REC_FLAG_NONSYNCED) != 0) {
if ((uflags &
MAILDIR_UIDLIST_REC_FLAG_RACING) != 0) {
filename);
seq--;
flags);
goto __again;
if ((uflags &
MAILDIR_UIDLIST_REC_FLAG_NONSYNCED) != 0) {
seq--;
seq--;
flags);
if (!partial) {
if (uid_validity == 0) {
} else if (uid_validity == 0) {
if (ret < 0) {
else if (seq != 0) {
if (ret == 0) {
bool sync_last_commit)
int ret;
if (sync_last_commit) {
} else if (!forced) {
if (ret <= 0) {
return ret;
if (ret < 0)
if (cur_changed) {
if (ret < 0) {
if (ret == 0)
int ret;
int ret;
struct mailbox_sync_context *
int ret = 0;
ioloop_time) {
if (ret == 0) {
int ret;
t_push();
t_pop();