maildir-sync.c revision 9a107dedb8f35727c21b3d1d54475d33f6e2eb1f
02c335c23bf5fa225a467c19f2c063fb0dc7b8c3Timo Sirainen/* Copyright (c) 2004-2013 Dovecot authors, see the included COPYING file */
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen Here's a description of how we handle Maildir synchronization and
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch it's problems:
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi We want to be as efficient as we can. The most efficient way to
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch check if changes have occurred is to stat() the new/ and cur/
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch directories and uidlist file - if their mtimes haven't changed,
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch there's no changes and we don't need to do anything.
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch Problem 1: Multiple changes can happen within a single second -
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch nothing guarantees that once we synced it, someone else didn't just
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch then make a modification. Such modifications wouldn't get noticed
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch until a new modification occurred later.
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch Problem 2: Syncing cur/ directory is much more costly than syncing
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch new/. Moving mails from new/ to cur/ will always change mtime of
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch cur/ causing us to sync it as well.
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen Problem 3: We may not be able to move mail from new/ to cur/
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch because we're out of quota, or simply because we're accessing a
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch read-only mailbox.
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch MAILDIR_SYNC_SECS
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen -----------------
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen Several checks below use MAILDIR_SYNC_SECS, which should be maximum
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen clock drift between all computers accessing the maildir (eg. via
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen NFS), rounded up to next second. Our default is 1 second, since
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen everyone should be using NTP.
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen Note that setting it to 0 works only if there's only one computer
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen accessing the maildir. It's practically impossible to make two
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch clocks _exactly_ synchronized.
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen It might be possible to only use file server's clock by looking at
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch the atime field, but I don't know how well that would actually work.
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen cur directory
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen -------------
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch We have dirty_cur_time variable which is set to cur/ directory's
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch mtime when it's >= time() - MAILDIR_SYNC_SECS and we _think_ we have
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch synchronized the directory.
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch When dirty_cur_time is non-zero, we don't synchronize the cur/
9b3565b09683b48f66de51aebb52786934d1c324Timo Sirainen directory until
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch a) cur/'s mtime changes
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch b) opening a mail fails with ENOENT
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch c) time() > dirty_cur_time + MAILDIR_SYNC_SECS
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen This allows us to modify the maildir multiple times without having
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen to sync it at every change. The sync will eventually be done to
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen make sure we didn't miss any external changes.
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen The dirty_cur_time is set when:
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen - we change message flags
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen - we expunge messages
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen - we move mail from new/ to cur/
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen - we sync cur/ directory and it's mtime is >= time() - MAILDIR_SYNC_SECS
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen It's unset when we do the final syncing, ie. when mtime is
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen older than time() - MAILDIR_SYNC_SECS.
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen new directory
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen -------------
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen If new/'s mtime is >= time() - MAILDIR_SYNC_SECS, always synchronize
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen it. dirty_cur_time-like feature might save us a few syncs, but
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen that might break a client which saves a mail in one connection and
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen tries to fetch it in another one. new/ directory is almost always
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen empty, so syncing it should be very fast anyway. Actually this can
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen still happen if we sync only new/ dir while another client is also
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen moving mails from it to cur/ - it takes us a while to see them.
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen That's pretty unlikely to happen however, and only way to fix it
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen would be to always synchronize cur/ after new/.
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen Normally we move all mails from new/ to cur/ whenever we sync it. If
a071ae737f338f94d2e72c54930b51a1dc336815Timo Sirainen it's not possible for some reason, we mark the mail with "probably
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen exists in new/ directory" flag.
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen If rename() still fails because of ENOSPC or EDQUOT, we still save
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen the flag changes in index with dirty-flag on. When moving the mail
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen to cur/ directory, or when we notice it's already moved there, we
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen apply the flag changes to the filename, rename it and remove the
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen dirty flag. If there's dirty flags, this should be tried every time
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen after expunge or when closing the mailbox.
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen This file contains UID <-> filename mappings. It's updated only when
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen new mail arrives, so it may contain filenames that have already been
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen deleted. Updating is done by getting uidlist.lock file, writing the
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen whole uidlist into it and rename()ing it over the old uidlist. This
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen means there's no need to lock the file for reading.
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi Whenever uidlist is rewritten, it's mtime must be larger than the old
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi one's. Use utime() before rename() if needed. Note that inode checking
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi wouldn't have been sufficient as inode numbers can be reused.
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi This file is usually read the first time you need to know filename for
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi given UID. After that it's not re-read unless new mails come that we
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi don't know about.
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi broken clients
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi --------------
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi Originally the middle identifier in Maildir filename was specified
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi only as <process id>_<delivery counter>. That however created a
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi problem with randomized PIDs which made it possible that the same
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi PID was reused within one second.
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi So if within one second a mail was delivered, MUA moved it to cur/
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi and another mail was delivered by a new process using same PID as
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi the first one, we likely ended up overwriting the first mail when
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi the second mail was moved over it.
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi Nowadays everyone should be giving a bit more specific identifier,
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch for example include microseconds in it which Dovecot does.
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch There's a simple way to prevent this from happening in some cases:
7f52e276c1bf13b4809344492023b90e46c3ac5dTimo Sirainen Don't move the mail from new/ to cur/ if it's mtime is >= time() -
e5a55bb6b867ee3ed95ac216996ff2e24bd596ccAki Tuomi MAILDIR_SYNC_SECS. The second delivery's link() call then fails
72a7c4f2ba93a723e23c941369a2985d75f240c9Stephan Bosch because the file is already in new/, and it will then use a
#include "lib.h"
#include "ioloop.h"
#include "array.h"
#include "buffer.h"
#include "hash.h"
#include "str.h"
#include "eacces-error.h"
#include "nfs-workarounds.h"
#include "maildir-storage.h"
#include "maildir-uidlist.h"
#include "maildir-filename.h"
#include "maildir-sync.h"
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <unistd.h>
#include <dirent.h>
enum maildir_scan_why {
struct maildir_sync_context {
/* we got here from maildir-save.c. it has no
static struct maildir_sync_context *
return ctx;
fname2);
const char *path;
#ifdef HAVE_DIRFD
if (new_dir) {
errno = 0;
flags = 0;
if (move_new) {
move_count++;
move_count++;
} else if (new_dir) {
if (ret <= 0) {
if (ret < 0)
T_BEGIN {
} T_END;
if (ret < 0)
#ifdef __APPLE__
if (errno != 0) {
if (dir_changed) {
const void *data;
if (data_size == 0) {
(undirty || \
*why_r = 0;
if (!*new_changed_r) {
if (!*cur_changed_r) {
if (check_new) {
if (*new_changed_r)
if (check_cur) {
if (*cur_changed_r)
if (!seen_changes) {
*why_r = 0;
} else if (*new_changed_r) {
const char *fname;
int ret;
if (forced) {
&why);
if (ret <= 0)
return ret;
if (!cur_changed) {
sync_flags = 0;
if (forced)
if (ret <= 0) {
if (ret == 0) {
if (forced) {
if (ret <= 0) {
unsigned int count = 0;
if (ret < 0)
if (cur_changed) {
if (ret < 0)
if (ret < 0)
if (ret == 0)
if (ret < 0)
if (ret == 0) {
*find_uid = 0;
*find_uid = 0;
const char **fname_r)
int ret;
if (ret != 0)
return ret;
return ret;
int ret;
T_BEGIN {
} T_END;
} T_END;
return ret;
bool lost_files;
int ret;
if (uid != 0) {
return ret;
bool delayed_expunges;
struct mailbox_sync_context *
int ret = 0;
if (lost_files) {
int ret;
T_BEGIN {
&why);
} T_END;