mbox-sync.c revision 82c70897a2d0e6144ecc56ca8e0eb9fff768f2c5
/* Copyright (c) 2004-2009 Dovecot authors, see the included COPYING file */
/*
Modifying mbox can be slow, so we try to do it all at once minimizing the
required disk I/O. We may need to:
- Update message flags in Status, X-Status and X-Keywords headers
- Write missing X-UID and X-IMAPbase headers
- Write missing or broken Content-Length header if there's space
- Expunge specified messages
Here's how we do it:
- Start reading the mails from the beginning
- X-Keywords, X-UID and X-IMAPbase headers may contain padding at the end
of them, remember how much each message has and offset to beginning of the
padding
- If header needs to be rewritten and there's enough space, do it
- If we didn't have enough space, remember how much was missing
- Continue reading and counting the padding in each message. If available
padding is enough to rewrite all the previous messages needing it, do it
- When we encounter expunged message, treat all of it as padding and
rewrite previous messages if needed (and there's enough space).
Afterwards keep moving messages backwards to fill the expunged space.
Moving is done by rewriting each message's headers, with possibly adding
missing Content-Length header and padding. Message bodies are moved
without modifications.
- If we encounter end of file, grow the file and rewrite needed messages
- Rewriting is done by moving message body forward, rewriting message's
header and doing the same for previous message, until all of them are
rewritten.
*/
#include "lib.h"
#include "ioloop.h"
#include "array.h"
#include "buffer.h"
#include "hostpid.h"
#include "istream.h"
#include "file-set-size.h"
#include "str.h"
#include "read-full.h"
#include "write-full.h"
#include "message-date.h"
#include "istream-raw-mbox.h"
#include "mbox-storage.h"
#include "index-sync-changes.h"
#include "mbox-from.h"
#include "mbox-file.h"
#include "mbox-lock.h"
#include "mbox-sync-private.h"
#include <stddef.h>
#include <stdlib.h>
#include <utime.h>
/* The text below was taken exactly as c-client wrote it to my mailbox,
so it's probably copyrighted by University of Washington. */
#define PSEUDO_MESSAGE_BODY \
"This text is part of the internal format of your mail folder, and is not\n" \
"a real message. It is created automatically by the mail system software.\n" \
"If deleted, important folder data will be lost, and it will be re-created\n" \
"with the data reset to initial values.\n"
const char *fmt, ...)
{
if (sync_ctx->ext_modified) {
"mbox file %s was modified while we were syncing, "
}
}
{
"Unexpectedly lost From-line at offset %"PRIuUOFF_T
" from mbox file %s", from_offset,
return -1;
}
return 0;
}
{
/* Do this even if ext_modified is already set. Expunging code relies
on last_stat being updated. */
return;
}
}
{
if (dirty) {
/* just mark the stat as dirty. */
return;
}
}
static int
struct mbox_sync_mail_context *mail_ctx)
{
/* get EOF */
return 0;
/* need to add 'O' flag to Status-header */
}
}
return 1;
}
{
/* nothing for this or the future ones */
}
/* we can't expunge anything from read-only mboxes */
*sync_expunge_r = FALSE;
}
}
static bool
{
if (sync_ctx->index_reset) {
return TRUE;
}
break;
/* externally expunged message, remove from index */
}
/* this UID was already in index and it was expunged */
"mbox sync: Expunged message reappeared in mailbox %s "
"(UID %u < %u, seq=%u, idx_msgs=%u)",
/* new UID in the middle of the mailbox - shouldn't happen */
"mbox sync: UID inserted in the middle of mailbox %s "
} else {
}
return ret;
}
unsigned char hdr_md5_sum[],
const struct mail_index_record **rec_r)
{
const void *data;
if (sync_ctx->index_reset) {
return;
}
break;
/* externally expunged message, remove from index */
}
}
static void
struct mbox_sync_mail *mail,
bool nocheck)
{
const void *data;
if (!nocheck) {
/* see if from_offset needs updating */
return;
}
}
static void
{
struct mail_keywords *keywords;
}
static void
{
const void *ext_data;
}
}
const struct mail_index_record *rec)
{
/* default to undirtying the message. it gets added back if
/* replace flags */
if (idx_flags != mbox_flags) {
}
/* replace keywords */
&idx_keywords);
}
}
}
const struct mail_index_record *rec)
{
/* flags and keywords are dirty. replace the current
ones from the flags in index file. */
}
}
}
/* apply new changes */
if (flags != orig_flags ||
}
}
}
const struct mail_index_record *rec)
{
if (!sync_ctx->delay_writes) {
/* changes are written to the mbox file */
} else if (mail_ctx->need_rewrite) {
/* make sure this message gets written later */
}
/* new message */
}
} else {
(mbox_flags & MAIL_FLAGS_NONRECENT)) {
MAIL_INDEX_MAIL_FLAG_DIRTY) != 0) {
/* only dirty flag state changed */
bool dirty;
(enum mail_flags)MAIL_INDEX_MAIL_FLAG_DIRTY);
}
/* see if keywords changed */
/* see if we need to update md5 sum. */
}
/* Mail has "Status: O" header. No messages before this
can be recent. */
}
/* update from_offsets, but not if we're going to rewrite this message.
rewriting would just move it anyway. */
if (sync_ctx->need_space_seq == 0) {
}
}
{
const unsigned char *data;
for (;;) {
if (size >= from_line_size)
from_line_size -= size;
if (from_line_size == 0)
break;
if (i_stream_read(input) < 0)
return -1;
}
return 0;
}
{
unsigned char buf[10];
const char *str;
unsigned int i;
int ret;
/* first check that the 10 bytes are there and they're exactly as
expected. just an extra safety check to make sure we never write
to wrong location in the mbox file. */
if (ret < 0) {
return -1;
}
if (ret == 0) {
"X-IMAPbase uid-last unexpectedly points outside "
return -1;
}
break;
}
}
"X-IMAPbase uid-last unexpectedly lost in mbox file %s",
return -1;
}
/* and write it */
sync_ctx->base_uid_last_offset) < 0) {
return -1;
}
return 0;
}
static int
{
return -1;
}
return 0;
}
{
const struct mbox_sync_mail *mails;
unsigned int i, count;
for (i = 0; i < count; i++) {
continue;
}
}
{
}
/* expunging first message, fix space to contain next
message's \n header too since it will be removed. */
}
/* uid-last offset is invalid now */
sync_ctx->base_uid_last_offset = 0;
}
}
{
int ret;
/* move the header backwards to fill expunged space */
if (sync_ctx->dest_first_mail) {
/* we're moving this mail to beginning of file.
skip the initial \n (it's already counted in
expunged_space) */
}
/* read the From-line before rewriting overwrites it */
if (mbox_read_from_line(mail_ctx) < 0)
return -1;
if (ret < 0)
return -1;
if (ret > 0) {
/* rewrite successful, write From-line to
new location */
-move_diff);
if (mbox_write_from_line(mail_ctx) < 0)
return -1;
} else {
if (sync_ctx->dest_first_mail) {
/* didn't have enough space, move the offset
back so seeking into it doesn't fail */
}
}
} else if (mail_ctx->need_rewrite) {
if (sync_ctx->delay_writes) {
/* mark it dirty and do it later */
return 0;
}
return -1;
} else {
/* nothing to do */
return 0;
}
/* first mail with no space to write it */
sync_ctx->space_diff = 0;
if (sync_ctx->expunged_space > 0) {
/* create dummy message to describe the expunged data */
struct mbox_sync_mail mail;
sync_ctx->expunged_space = 0;
}
}
return 0;
}
static int
{
/* mail's keywords are allocated from a pool that's cleared
for each mail. we'll need to copy it to something more
permanent. */
}
if (sync_ctx->space_diff < 0) {
if (sync_ctx->expunged_space > 0) {
sync_ctx->expunged_space = 0;
}
return 0;
}
/* we have enough space now */
/* this message was expunged. fill more or less of the space.
space_diff now consists of a negative "bytes needed" sum,
plus the expunged space of this message. so it contains how
many bytes of _extra_ space we have. */
/* don't waste too much on padding */
} else {
sync_ctx->expunged_space = 0;
}
} else {
/* this message gave enough space from headers. rewriting stops
at the end of this message's headers. */
sync_ctx->expunged_space = 0;
move_diff = 0;
}
return -1;
/* mail_ctx may contain wrong data after rewrite, so make sure we
don't try to access it */
sync_ctx->need_space_seq = 0;
sync_ctx->space_diff = 0;
return 0;
}
static int
{
int ret;
bool deleted;
if (seq == 0) {
"Mailbox isn't a valid mbox file");
return -1;
}
seq++;
} else {
if (ret < 0) {
if (deleted) {
"Message was expunged unexpectedly "
}
return -1;
}
if (ret == 0) {
old_offset) < 0) {
"Error seeking back to original "
"offset %s in mbox file %s",
return -1;
}
return 0;
}
}
if (seq <= 1)
uid = 0;
else
/* set to -1, since it's always increased later */
/* this mbox has pseudo mail which contains the X-IMAP header */
}
return 1;
}
static int
{
int ret;
/* doesn't exist anymore, seek to end of file */
if (ret < 0) {
"i_stream_get_size()");
return -1;
}
size) < 0) {
"Error seeking to end of mbox file %s",
return -1;
}
return 1;
}
}
bool *skipped_mails)
{
int ret;
/* delete sync records up to next message. so if there's still
something left in array, it means the next message needs modifying */
return 1;
/* we'll need to rewrite Status: O headers */
return 1;
}
/* we'll need to rewrite Status: O headers */
}
if (uid != 0) {
/* we can skip forward to next record which needs updating. */
*skipped_mails = TRUE;
}
} else {
/* if there's no sync records left, we can stop. except if
this is a dirty sync, check if there are new messages. */
return 0;
*skipped_mails = TRUE;
} else {
ret = 1;
}
}
if (ret == 0) {
/* seek failed because the offset is dirty. just ignore and
continue from where we are now. */
ret = 1;
}
return ret;
}
{
if (sync_ctx->base_uid_validity != 0 &&
i_warning("UIDVALIDITY changed (%u -> %u) in mbox file %s",
return TRUE;
}
return FALSE;
}
struct mbox_sync_mail_context *mail_ctx,
bool partial)
{
const struct mail_index_record *rec;
int ret;
/* always start from first message so we can read X-IMAP or
X-IMAPbase header */
if (ret <= 0)
return ret;
if (sync_ctx->renumber_uids) {
/* expunge everything */
}
}
return 0;
}
}
/* UID ordering problems, resync everything to make
sure we get everything right */
return 0;
"UIDs broken with partial sync in mbox file %s",
return 0;
}
uids_broken = TRUE;
uid = 0;
if (uid != 0) {
ret = 0;
}
if (ret == 0) {
/* UID found but it's broken */
uid = 0;
} else if (uid == 0 &&
(sync_ctx->delay_writes ||
Also check for existing MD5 sums when we're actually
able to write X-UIDs. */
}
/* get all sync records related to this message. with pseudo
message just get the first sync record so we can jump to
it with partial seeking. */
&expunged);
/* if it was set, it was for the next message */
} else {
/* message wasn't found from index. we have to
read everything from now on, no skipping */
}
}
need new UIDs. */
}
/* oh no, we're out of UIDs. this shouldn't
happen normally, so just try to get it fixed
without crashing. */
"Out of UIDs, renumbering them in mbox "
return 0;
}
}
if (!expunged) {
} T_END;
if (mbox_sync_handle_header(mail_ctx) < 0)
return -1;
} else {
}
} T_END;
}
if (sync_ctx->need_space_seq != 0) {
if (mbox_sync_handle_missing_space(mail_ctx) < 0)
return -1;
return -1;
} else if (sync_ctx->expunged_space > 0) {
if (!expunged) {
/* move the body */
return -1;
return -1;
}
} else if (partial) {
&partial,
if (ret <= 0) {
if (ret < 0)
return -1;
break;
}
}
}
/* rest of the messages in index don't exist -> expunge them */
}
if (!skipped_mails)
/* once we get around to writing the changes, we'll need to do
a full sync to avoid the "UIDs broken in partial sync"
error */
}
return 1;
}
{
unsigned int uid_validity;
i_assert(uid_validity != 0);
"From: Mail System Internal Data <MAILER-DAEMON@%s>\n"
"Subject: DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA"
"\nMessage-ID: <%s@%s>\n"
"X-IMAP: %u %010u\n"
"Status: RO\n"
"\n"
"\n",
"pwrite_full()");
return -1;
}
/* out of disk space, truncate to empty */
}
return 0;
}
struct mbox_sync_mail_context *mail_ctx)
{
int ret;
return 0;
}
if (ret < 0) {
return -1;
}
if (ret == 0) {
/* Not a file - allow anyway */
return 0;
}
"file size unexpectedly shrank in mbox file %s "
return -1;
}
if (sync_ctx->need_space_seq != 0) {
sync_ctx->expunged_space = 0;
"file_set_size()");
"ftruncate()");
}
return -1;
}
return -1;
sync_ctx->need_space_seq = 0;
}
if (sync_ctx->expunged_space > 0) {
/* copy trailer, then truncate the file */
/* everything deleted, the trailer_size still contains
the \n trailer though */
trailer_size = 0;
}
trailer_size) < 0)
return -1;
offset + trailer_size) < 0) {
return -1;
}
if (offset == 0) {
if (mbox_write_pseudo(sync_ctx) < 0)
return -1;
}
sync_ctx->expunged_space = 0;
}
return 0;
}
static void
{
const void *data;
}
}
{
struct mail_index_view *view;
return -1;
}
if (sync_ctx->moved_offsets &&
/* We moved messages inside the mbox file without changing
the file's size. If mtime doesn't change, another process
not using the same index file as us can't know that the file
was changed. So make sure the mtime changes. This should
happen rarely enough that the sleeping doesn't become a
performance problem.
Note that to do this perfectly safe we should do this wait
whenever mails are moved or expunged, regardless of whether
the file's size changed. That however could become a
performance problem and the consequences of being wrong are
quite minimal (an extra logged error message). */
usleep(500000);
"utime()");
return -1;
}
"i_stream_stat()");
return -1;
}
}
}
/* only reason not to have UID validity at this point is if the file
is entirely empty. In that case just make up a new one if needed. */
if (sync_ctx->base_uid_validity == 0) {
}
}
}
/* other sessions have already marked more messages as
recent. */
}
/* mark recent messages */
}
}
return 0;
}
{
sync_ctx->base_uid_validity = 0;
sync_ctx->base_uid_last = 0;
sync_ctx->base_uid_last_offset = 0;
if (sync_ctx->index_reset) {
mail_index_reset(sync_ctx->t);
}
sync_ctx->prev_msg_uid = 0;
sync_ctx->need_space_seq = 0;
sync_ctx->expunged_space = 0;
sync_ctx->space_diff = 0;
}
enum mbox_sync_flags flags)
{
struct mbox_sync_mail_context mail_ctx;
unsigned int i;
return -1;
}
if ((flags & MBOX_SYNC_FORCE_SYNC) != 0) {
/* forcing a full sync. assume file has changed. */
/* file is fully synced */
else
} else if ((flags & MBOX_SYNC_UNDIRTY) != 0 ||
/* we want to do full syncing. always do this if
file size hasn't changed but timestamp has. it most
likely means that someone had modified some header
and we probably want to know about it */
} else {
/* see if we can delay syncing the whole file.
normally we only notice expunges and appends
in partial syncing. */
}
for (i = 0;;) {
break;
if (ret < 0)
return -1;
/* a) partial sync didn't work
b) we ran out of UIDs
c) syncing had errors */
if (sync_ctx->delay_writes &&
/* fixing a broken mbox state, be sure to write
the changes. */
}
if (++i == 3)
break;
}
return -1;
/* only syncs left should be just appends (and their updates)
which weren't synced yet for some reason (crash). we'll just
ignore them, as we've overwritten them above. */
sync_ctx->base_uid_last_offset != 0) {
/* Rewrite uid_last in X-IMAPbase header if we've seen it
(ie. the file isn't empty) */
}
if (mbox_sync_update_index_header(sync_ctx) < 0)
return -1;
return 0;
}
{
const struct mail_index_header *hdr;
const void *data;
return -1;
}
if (data_size == 0) {
/* doesn't exist. FIXME: backwards compatibility copying */
return 0;
}
if (mbox->mbox_broken_offsets)
return 0;
}
{
bool empty;
}
bool *empty_r)
{
/* read-only stream */
return 0;
}
return -1;
}
} else {
return 0;
}
return -1;
}
}
if (mbox_sync_header_refresh(mbox) < 0)
return -1;
/* fully synced */
return 0;
/* flushing dirtyness */
}
/* file changed */
return 1;
}
{
}
unsigned int *lock_id)
{
struct mail_index_sync_ctx *index_sync_ctx;
struct mail_index_view *sync_view;
struct mail_index_transaction *trans;
struct mbox_sync_context sync_ctx;
bool delay_writes;
((flags & MBOX_SYNC_REWRITE) == 0 &&
if ((flags & MBOX_SYNC_LOCK_READING) != 0) {
return -1;
}
if ((flags & MBOX_SYNC_HEADER) != 0 ||
(flags & MBOX_SYNC_FORCE_SYNC) != 0) {
if (mbox_sync_header_refresh(mbox) < 0)
return -1;
changed = 1;
} else {
return -1;
}
if ((flags & MBOX_SYNC_LOCK_READING) != 0) {
/* we just want to lock it for reading. if mbox hasn't been
modified don't do any syncing. */
if (!changed)
return 0;
/* have to sync to make sure offsets have stayed the same */
*lock_id = 0;
}
/* reopen input stream to make sure it has nothing buffered */
if (changed) {
/* we're most likely modifying the mbox while syncing, just
lock it for writing immediately. the mbox must be locked
before index syncing is started to avoid deadlocks, so we
don't have much choice either (well, easy ones anyway). */
return -1;
/* try as read-only */
return -1;
}
}
sync_flags = 0;
if ((flags & MBOX_SYNC_REWRITE) != 0)
if ((flags & MBOX_SYNC_LAST_COMMIT) != 0) {
} else {
}
if (ret <= 0) {
if (ret < 0)
return ret;
}
/* see if we need to drop recent flags */
changed = 1;
}
/* nothing to do */
/* index may need to do internal syncing though, so commit
instead of rollbacking. */
if (mail_index_sync_commit(&index_sync_ctx) < 0) {
return -1;
}
return 0;
}
/* make sure we've read the latest keywords in index */
if (!changed && delay_writes) {
/* if we have only flag changes, we don't need to open the
mbox file */
bool expunged;
if (uid == 0) {
goto nothing_to_do;
}
}
if (*lock_id == 0) {
/* ok, we have something to do but no locks. we'll have to
restart syncing to avoid deadlocking. */
changed = 1;
goto again;
}
if (mbox_file_open_stream(mbox) < 0) {
return -1;
}
if (ret < 0)
else if (mail_index_sync_commit(&index_sync_ctx) < 0) {
ret = -1;
} else {
}
/* try to set atime back to its original value */
else {
}
}
ret = -1;
}
}
return ret;
}
{
unsigned int lock_id = 0;
int ret;
if (lock_id != 0) {
if (ret < 0) {
/* syncing failed, don't leave it locked */
} else if ((flags & MBOX_SYNC_LOCK_READING) == 0) {
ret = -1;
/* drop to read lock */
unsigned int read_lock_id = 0;
ret = -1;
ret = -1;
}
}
return ret;
}
struct mailbox_sync_context *
{
enum mbox_sync_flags mbox_sync_flags = 0;
int ret = 0;
if ((flags & MAILBOX_SYNC_FLAG_FULL_READ) != 0 &&
if ((flags & MAILBOX_SYNC_FLAG_FULL_WRITE) != 0)
if ((flags & MAILBOX_SYNC_FLAG_FORCE_RESYNC) != 0) {
}
}
}