mail-transaction-log-append.c revision 8f430e257605b2fd2b54cdf4c2a20ef8b53cf0ba
c25356d5978632df6203437e1953bcb29e0c736fTimo Sirainen/* Copyright (c) 2003-2010 Dovecot authors, see the included COPYING file */
c25356d5978632df6203437e1953bcb29e0c736fTimo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include "lib.h"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include "array.h"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include "write-full.h"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include "mail-index-private.h"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen#include "mail-transaction-log-private.h"
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainenvoid mail_transaction_log_append_add(struct mail_transaction_log_append_ctx *ctx,
fe363b433b8038a69b55169da9dca27892ad7d18Timo Sirainen enum mail_transaction_type type,
34435ce4a468dc13b23db69dbdd09fe20be88816Timo Sirainen const void *data, size_t size)
34435ce4a468dc13b23db69dbdd09fe20be88816Timo Sirainen{
34435ce4a468dc13b23db69dbdd09fe20be88816Timo Sirainen struct mail_transaction_header hdr;
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
fe363b433b8038a69b55169da9dca27892ad7d18Timo Sirainen i_assert((type & MAIL_TRANSACTION_TYPE_MASK) != 0);
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen i_assert((size % 4) == 0);
ea37a9aedfc3a6ff5f4ea10bc4eff4ca23f62a15Timo Sirainen
if (size == 0)
return;
memset(&hdr, 0, sizeof(hdr));
hdr.type = type;
if (type == MAIL_TRANSACTION_EXPUNGE ||
type == MAIL_TRANSACTION_EXPUNGE_GUID)
hdr.type |= MAIL_TRANSACTION_EXPUNGE_PROT;
if (ctx->external || type == MAIL_TRANSACTION_BOUNDARY)
hdr.type |= MAIL_TRANSACTION_EXTERNAL;
hdr.size = sizeof(hdr) + size;
hdr.size = mail_index_uint32_to_offset(hdr.size);
buffer_append(ctx->output, &hdr, sizeof(hdr));
buffer_append(ctx->output, data, size);
mail_transaction_update_modseq(&hdr, data, &ctx->new_highest_modseq);
}
static int
log_buffer_move_to_memory(struct mail_transaction_log_append_ctx *ctx)
{
struct mail_transaction_log_file *file = ctx->log->head;
/* first we need to truncate this latest write so that log syncing
doesn't break */
if (ftruncate(file->fd, file->sync_offset) < 0) {
mail_index_file_set_syscall_error(ctx->log->index,
file->filepath,
"ftruncate()");
}
if (mail_index_move_to_memory(ctx->log->index) < 0)
return -1;
i_assert(MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file));
i_assert(file->buffer_offset + file->buffer->used == file->sync_offset);
buffer_append_buf(file->buffer, ctx->output, 0, (size_t)-1);
file->sync_offset = file->buffer_offset + file->buffer->used;
return 0;
}
static int log_buffer_write(struct mail_transaction_log_append_ctx *ctx)
{
struct mail_transaction_log_file *file = ctx->log->head;
struct mail_transaction_header *hdr;
uint32_t first_size;
if (ctx->output->used == 0)
return 0;
if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) {
if (file->buffer == NULL) {
file->buffer = buffer_create_dynamic(default_pool, 4096);
file->buffer_offset = sizeof(file->hdr);
}
buffer_append_buf(file->buffer, ctx->output, 0, (size_t)-1);
file->sync_offset = file->buffer_offset + file->buffer->used;
return 0;
}
/* size will be written later once everything is in disk */
hdr = buffer_get_space_unsafe(ctx->output, 0, sizeof(*hdr));
first_size = hdr->size;
i_assert(first_size != 0);
hdr->size = 0;
if (pwrite_full(file->fd, ctx->output->data, ctx->output->used,
file->sync_offset) < 0) {
/* write failure, fallback to in-memory indexes. */
hdr->size = first_size;
mail_index_file_set_syscall_error(ctx->log->index,
file->filepath,
"pwrite_full()");
return log_buffer_move_to_memory(ctx);
}
i_assert(!ctx->sync_includes_this ||
file->sync_offset + ctx->output->used ==
file->max_tail_offset);
/* now that the whole transaction has been written, rewrite the first
record's size so the transaction becomes visible */
hdr->size = first_size;
if (pwrite_full(file->fd, &first_size, sizeof(uint32_t),
file->sync_offset +
offsetof(struct mail_transaction_header, size)) < 0) {
mail_index_file_set_syscall_error(ctx->log->index,
file->filepath,
"pwrite_full()");
return log_buffer_move_to_memory(ctx);
}
if ((ctx->want_fsync &&
file->log->index->fsync_mode != FSYNC_MODE_NEVER) ||
file->log->index->fsync_mode == FSYNC_MODE_ALWAYS) {
if (fdatasync(file->fd) < 0) {
mail_index_file_set_syscall_error(ctx->log->index,
file->filepath,
"fdatasync()");
return log_buffer_move_to_memory(ctx);
}
}
/* FIXME: when we're relying on O_APPEND and someone else wrote a
transaction, we'll need to wait for it to commit its transaction.
if it crashes before doing that, we'll need to overwrite it with
a dummy record */
if (file->mmap_base == NULL && file->buffer != NULL) {
/* we're reading from a file. avoid re-reading the data that
we just wrote. this is also important for some NFS clients,
which for some reason sometimes can't read() this data we
just wrote in the same process */
i_assert(file->buffer_offset +
file->buffer->used == file->sync_offset);
buffer_append(file->buffer, ctx->output->data,
ctx->output->used);
}
file->sync_offset += ctx->output->used;
return 0;
}
static void
log_append_sync_offset_if_needed(struct mail_transaction_log_append_ctx *ctx)
{
struct mail_transaction_log_file *file = ctx->log->head;
struct mail_transaction_header_update *u;
struct mail_transaction_header *hdr;
uint32_t offset;
buffer_t buf;
unsigned char update_data[sizeof(*u) + sizeof(offset)];
if (file->max_tail_offset == file->sync_offset) {
if (ctx->output->used == 0 &&
file->saved_tail_offset == file->max_tail_offset) {
/* nothing to write here after all (e.g. all unchanged
flag updates were dropped by export) */
return;
}
/* FIXME: when we remove exclusive log locking, we
can't rely on this. then write non-changed offset + check
real offset + rewrite the new offset if other transactions
weren't written in the middle */
file->max_tail_offset += ctx->output->used +
sizeof(*hdr) + sizeof(*u) + sizeof(offset);
ctx->sync_includes_this = TRUE;
}
offset = file->max_tail_offset;
if (file->saved_tail_offset == offset)
return;
i_assert(offset > file->saved_tail_offset);
buffer_create_data(&buf, update_data, sizeof(update_data));
u = buffer_append_space_unsafe(&buf, sizeof(*u));
u->offset = offsetof(struct mail_index_header, log_file_tail_offset);
u->size = sizeof(offset);
buffer_append(&buf, &offset, sizeof(offset));
mail_transaction_log_append_add(ctx, MAIL_TRANSACTION_HEADER_UPDATE,
buf.data, buf.used);
}
static int
mail_transaction_log_append_locked(struct mail_transaction_log_append_ctx *ctx)
{
struct mail_transaction_log_file *file = ctx->log->head;
if (file->sync_offset < file->last_size) {
/* there is some garbage at the end of the transaction log
(eg. previous write failed). remove it so reader doesn't
break because of it. */
buffer_set_used_size(file->buffer,
file->sync_offset - file->buffer_offset);
if (!MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) {
if (ftruncate(file->fd, file->sync_offset) < 0) {
mail_index_file_set_syscall_error(ctx->log->index,
file->filepath, "ftruncate()");
}
}
}
if (ctx->append_sync_offset)
log_append_sync_offset_if_needed(ctx);
if (log_buffer_write(ctx) < 0)
return -1;
file->sync_highest_modseq = ctx->new_highest_modseq;
return 0;
}
int mail_transaction_log_append_begin(struct mail_index *index, bool external,
struct mail_transaction_log_append_ctx **ctx_r)
{
struct mail_transaction_log_append_ctx *ctx;
if (!index->log_sync_locked) {
if (mail_transaction_log_lock_head(index->log) < 0)
return -1;
}
ctx = i_new(struct mail_transaction_log_append_ctx, 1);
ctx->log = index->log;
ctx->output = buffer_create_dynamic(default_pool, 1024);
ctx->external = external;
*ctx_r = ctx;
return 0;
}
int mail_transaction_log_append_commit(struct mail_transaction_log_append_ctx **_ctx)
{
struct mail_transaction_log_append_ctx *ctx = *_ctx;
struct mail_index *index = ctx->log->index;
int ret = 0;
*_ctx = NULL;
ret = mail_transaction_log_append_locked(ctx);
if (!index->log_sync_locked)
mail_transaction_log_file_unlock(index->log->head);
buffer_free(&ctx->output);
i_free(ctx);
return ret;
}