maildir-sync.c revision d3eff05aaa4c2bc0a7580ee87a54f6693f4a8241
c25356d5978632df6203437e1953bcb29e0c736fTimo Sirainen/* Copyright (C) 2004 Timo Sirainen */
cfdaa223525f87c9c980a25cc7bb6770a248d76aTimo Sirainen Here's a description of how we handle Maildir synchronization and
cfdaa223525f87c9c980a25cc7bb6770a248d76aTimo Sirainen it's problems:
cfdaa223525f87c9c980a25cc7bb6770a248d76aTimo Sirainen We want to be as efficient as we can. The most efficient way to
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen check if changes have occurred is to stat() the new/ and cur/
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen directories and uidlist file - if their mtimes haven't changed,
3c9783956dea385b322cd7fa6bf8c98c17a907a0Timo Sirainen there's no changes and we don't need to do anything.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen Problem 1: Multiple changes can happen within a single second -
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen nothing guarantees that once we synced it, someone else didn't just
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen then make a modification. Such modifications wouldn't get noticed
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen until a new modification occurred later.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen Problem 2: Syncing cur/ directory is much more costly than syncing
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen new/. Moving mails from new/ to cur/ will always change mtime of
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen cur/ causing us to sync it as well.
7d6389e4053c2dac1fb37180b5756b00785983dcTimo Sirainen Problem 3: We may not be able to move mail from new/ to cur/
7d6389e4053c2dac1fb37180b5756b00785983dcTimo Sirainen because we're out of quota, or simply because we're accessing a
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen read-only mailbox.
40992309053d51192ae1b36d1dd6c057f2d37257Timo Sirainen MAILDIR_SYNC_SECS
a399486f2d8d5bed51bc6344baba61a7f2b0dcdbTimo Sirainen -----------------
a399486f2d8d5bed51bc6344baba61a7f2b0dcdbTimo Sirainen Several checks below use MAILDIR_SYNC_SECS, which should be maximum
a399486f2d8d5bed51bc6344baba61a7f2b0dcdbTimo Sirainen clock drift between all computers accessing the maildir (eg. via
a399486f2d8d5bed51bc6344baba61a7f2b0dcdbTimo Sirainen NFS), rounded up to next second. Our default is 1 second, since
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen everyone should be using NTP.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen Note that setting it to 0 works only if there's only one computer
b567e0172c73dcf7642462e86962060358dd5f28Timo Sirainen accessing the maildir. It's practically impossible to make two
b567e0172c73dcf7642462e86962060358dd5f28Timo Sirainen clocks _exactly_ synchronized.
10c5fd417af4ee30b68c967f5e7d5a49f4f149b5Timo Sirainen It might be possible to only use file server's clock by looking at
10c5fd417af4ee30b68c967f5e7d5a49f4f149b5Timo Sirainen the atime field, but I don't know how well that would actually work.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen cur directory
473080c7c0d25ddfdf77e7dfa0ba8f73c6c669d5Timo Sirainen -------------
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen We have dirty_cur_time variable which is set to cur/ directory's
e82af44fe25ca9b88210f313548dc08538e4a677Timo Sirainen mtime when it's >= time() - MAILDIR_SYNC_SECS and we _think_ we have
e714eed72515794c46c6712a611e5ab924d903daTimo Sirainen synchronized the directory.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen When dirty_cur_time is non-zero, we don't synchronize the cur/
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen directory until
c4457e497e01b57565d24da624968699b166e02aTimo Sirainen a) cur/'s mtime changes
c4457e497e01b57565d24da624968699b166e02aTimo Sirainen b) opening a mail fails with ENOENT
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen c) time() > dirty_cur_time + MAILDIR_SYNC_SECS
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen This allows us to modify the maildir multiple times without having
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen to sync it at every change. The sync will eventually be done to
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen make sure we didn't miss any external changes.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen The dirty_cur_time is set when:
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen - we change message flags
e023e3c2677ab66d7a7445eae9caf3d739e199cbTimo Sirainen - we expunge messages
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen - we move mail from new/ to cur/
e023e3c2677ab66d7a7445eae9caf3d739e199cbTimo Sirainen - we sync cur/ directory and it's mtime is >= time() - MAILDIR_SYNC_SECS
0d7d27765267594a5870892268ab345148306d49Timo Sirainen It's unset when we do the final syncing, ie. when mtime is
0d7d27765267594a5870892268ab345148306d49Timo Sirainen older than time() - MAILDIR_SYNC_SECS.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen new directory
16133a719ce8b6a5b8cedd721340cc1607c43433Timo Sirainen -------------
16133a719ce8b6a5b8cedd721340cc1607c43433Timo Sirainen If new/'s mtime is >= time() - MAILDIR_SYNC_SECS, always synchronize
16133a719ce8b6a5b8cedd721340cc1607c43433Timo Sirainen it. dirty_cur_time-like feature might save us a few syncs, but
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen that might break a client which saves a mail in one connection and
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen tries to fetch it in another one. new/ directory is almost always
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen empty, so syncing it should be very fast anyway. Actually this can
e023e3c2677ab66d7a7445eae9caf3d739e199cbTimo Sirainen still happen if we sync only new/ dir while another client is also
e023e3c2677ab66d7a7445eae9caf3d739e199cbTimo Sirainen moving mails from it to cur/ - it takes us a while to see them.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen That's pretty unlikely to happen however, and only way to fix it
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen would be to always synchronize cur/ after new/.
16133a719ce8b6a5b8cedd721340cc1607c43433Timo Sirainen Normally we move all mails from new/ to cur/ whenever we sync it. If
ed5e91e58dfc372c2135c55427bf6f25a7725042Timo Sirainen it's not possible for some reason, we mark the mail with "probably
ed5e91e58dfc372c2135c55427bf6f25a7725042Timo Sirainen exists in new/ directory" flag.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen If rename() still fails because of ENOSPC or EDQUOT, we still save
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen the flag changes in index with dirty-flag on. When moving the mail
dd2d3ef41dc407afb8afc49e18ff53640e4b4e02Timo Sirainen to cur/ directory, or when we notice it's already moved there, we
a399486f2d8d5bed51bc6344baba61a7f2b0dcdbTimo Sirainen apply the flag changes to the filename, rename it and remove the
a399486f2d8d5bed51bc6344baba61a7f2b0dcdbTimo Sirainen dirty flag. If there's dirty flags, this should be tried every time
a399486f2d8d5bed51bc6344baba61a7f2b0dcdbTimo Sirainen after expunge or when closing the mailbox.
e023e3c2677ab66d7a7445eae9caf3d739e199cbTimo Sirainen This file contains UID <-> filename mappings. It's updated only when
25ee72451d16374ed27fdbf829f4ec756c778352Timo Sirainen new mail arrives, so it may contain filenames that have already been
25ee72451d16374ed27fdbf829f4ec756c778352Timo Sirainen deleted. Updating is done by getting uidlist.lock file, writing the
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen whole uidlist into it and rename()ing it over the old uidlist. This
9f431ccfb6932746db56245c8a3d3415717ef545Timo Sirainen means there's no need to lock the file for reading.
16133a719ce8b6a5b8cedd721340cc1607c43433Timo Sirainen Whenever uidlist is rewritten, it's mtime must be larger than the old
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen one's. Use utime() before rename() if needed. Note that inode checking
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen wouldn't have been sufficient as inode numbers can be reused.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen This file is usually read the first time you need to know filename for
16133a719ce8b6a5b8cedd721340cc1607c43433Timo Sirainen given UID. After that it's not re-read unless new mails come that we
08aea01ef9a9d20703e0fcf8618e6195c0037a44Timo Sirainen don't know about.
4261a8b43792dc4db4b39e6910319835b7450e84Timo Sirainen broken clients
4261a8b43792dc4db4b39e6910319835b7450e84Timo Sirainen --------------
3c9783956dea385b322cd7fa6bf8c98c17a907a0Timo Sirainen Originally the middle identifier in Maildir filename was specified
ebfcfd258acc89633c47d9c3b0b40a1a3f75cdcbTimo Sirainen only as <process id>_<delivery counter>. That however created a
d1f0acc7fc722e13e8296228703adfe8a884d59eTimo Sirainen problem with randomized PIDs which made it possible that the same
4261a8b43792dc4db4b39e6910319835b7450e84Timo Sirainen PID was reused within one second.
4261a8b43792dc4db4b39e6910319835b7450e84Timo Sirainen So if within one second a mail was delivered, MUA moved it to cur/
4261a8b43792dc4db4b39e6910319835b7450e84Timo Sirainen and another mail was delivered by a new process using same PID as
4261a8b43792dc4db4b39e6910319835b7450e84Timo Sirainen the first one, we likely ended up overwriting the first mail when
4261a8b43792dc4db4b39e6910319835b7450e84Timo Sirainen the second mail was moved over it.
4261a8b43792dc4db4b39e6910319835b7450e84Timo Sirainen Nowadays everyone should be giving a bit more specific identifier,
4261a8b43792dc4db4b39e6910319835b7450e84Timo Sirainen for example include microseconds in it which Dovecot does.
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen There's a simple way to prevent this from happening in some cases:
#include "lib.h"
#include "ioloop.h"
#include "array.h"
#include "buffer.h"
#include "hash.h"
#include "str.h"
#include "maildir-storage.h"
#include "maildir-uidlist.h"
#include "maildir-keywords.h"
#include "maildir-sync.h"
#include <stdio.h>
#include <stddef.h>
#include <unistd.h>
#include <dirent.h>
struct maildir_sync_context {
bool partial;
struct maildir_index_sync_context {
int dirty_state;
struct maildir_keywords_sync_ctx *
const char *info;
*flags_r = 0;
switch (*info) {
int idx;
if (idx < 0) {
const unsigned int *indexes;
unsigned int i, count;
char chr;
for (i = 0; i < count; i++) {
int nextflag;
oldflags++;
oldflags++;
unsigned int i, count;
fname++;
for (i = 0; i < count; i++) {
i_unreached();
unsigned int i, count;
for (i = 0; i < count; i++) {
i_unreached();
if (expunged) {
} else if (flag_changed) {
for (i = count; i > 0; i--) {
&count);
if (count == 0) {
int ret;
if (ret <= 0) {
if (ret < 0)
return ret;
} while (ret > 0);
if (ret < 0)
return ret;
static struct maildir_sync_context *
return ctx;
int ret = 0;
t_push();
t_pop();
t_pop();
t_pop();
return ret;
const char *dir;
t_push();
if (ret == 0) {
if (new_dir)
if (ret < 0)
flags = 0;
if (move_new) {
moves++;
moves++;
} else if (new_dir) {
count++;
(void)maildir_uidlist_lock_touch(
if (ret <= 0) {
if (ret < 0)
t_pop();
} else if (seq != 0) {
return ret;
bool partial)
const char *filename;
int ret = 0;
seq = 0;
seq++;
if ((uflags &
MAILDIR_UIDLIST_REC_FLAG_NONSYNCED) != 0) {
if ((uflags &
MAILDIR_UIDLIST_REC_FLAG_RACING) != 0) {
filename);
seq--;
flags);
goto __again;
if ((uflags &
MAILDIR_UIDLIST_REC_FLAG_NONSYNCED) != 0) {
seq--;
seq--;
flags);
if (!partial) {
if (uid_validity == 0) {
} else if (uid_validity == 0) {
bool sync_last_commit)
int ret;
if (sync_last_commit) {
} else if (!forced) {
if (ret <= 0) {
return ret;
if (ret < 0)
if (cur_changed) {
if (ret < 0)
if (ret == 0)
int ret;
int ret;
struct mailbox_sync_context *
int ret = 0;
ioloop_time) {
if (ret == 0) {
int ret;
t_push();
t_pop();