mail-index-write.c revision d9f730d678378ec4e850f928a7849a3650ea8c7a
/* Copyright (c) 2003-2011 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "read-full.h"
#include "write-full.h"
#include "ostream.h"
#include "mail-index-private.h"
#include "mail-transaction-log-private.h"
#include <stdio.h>
#define MAIL_INDEX_MIN_UPDATE_SIZE 1024
/* if we're updating >= count-n messages, recreate the index */
#define MAIL_INDEX_MAX_OVERWRITE_NEG_SEQ_COUNT 10
static int mail_index_create_backup(struct mail_index *index)
{
const char *backup_path, *tmp_backup_path;
int ret;
if (index->fd != -1) {
/* we very much want to avoid creating a backup file that
hasn't been written to disk yet */
if (fdatasync(index->fd) < 0) {
mail_index_set_error(index, "fdatasync(%s) failed: %m",
tmp_backup_path);
return -1;
}
}
backup_path = t_strconcat(index->filepath, ".backup", NULL);
tmp_backup_path = t_strconcat(backup_path, ".tmp", NULL);
ret = link(index->filepath, tmp_backup_path);
if (ret < 0 && errno == EEXIST) {
if (unlink(tmp_backup_path) < 0 && errno != ENOENT) {
mail_index_set_error(index, "unlink(%s) failed: %m",
tmp_backup_path);
return -1;
}
ret = link(index->filepath, tmp_backup_path);
}
if (ret < 0) {
if (errno == ENOENT) {
/* no dovecot.index file, ignore */
return 0;
}
mail_index_set_error(index, "link(%s, %s) failed: %m",
index->filepath, tmp_backup_path);
return -1;
}
if (rename(tmp_backup_path, backup_path) < 0) {
mail_index_set_error(index, "rename(%s, %s) failed: %m",
tmp_backup_path, backup_path);
return -1;
}
return 0;
}
static int mail_index_recreate(struct mail_index *index)
{
struct mail_index_map *map = index->map;
struct ostream *output;
unsigned int base_size;
const char *path;
int ret = 0, fd;
i_assert(!MAIL_INDEX_IS_IN_MEMORY(index));
i_assert(map->hdr.indexid == index->indexid);
fd = mail_index_create_tmp_file(index, &path);
if (fd == -1)
return -1;
output = o_stream_create_fd_file(fd, 0, FALSE);
o_stream_cork(output);
base_size = I_MIN(map->hdr.base_header_size, sizeof(map->hdr));
if (o_stream_send(output, &map->hdr, base_size) < 0 ||
o_stream_send(output, CONST_PTR_OFFSET(map->hdr_base, base_size),
map->hdr.header_size - base_size) < 0 ||
o_stream_send(output, map->rec_map->records,
map->rec_map->records_count *
map->hdr.record_size) < 0 ||
o_stream_flush(output) < 0) {
mail_index_file_set_syscall_error(index, path, "write()");
ret = -1;
}
o_stream_destroy(&output);
if (ret == 0 && index->fsync_mode != FSYNC_MODE_NEVER) {
if (fdatasync(fd) < 0) {
mail_index_file_set_syscall_error(index, path,
"fdatasync()");
ret = -1;
}
}
if (close(fd) < 0) {
mail_index_file_set_syscall_error(index, path, "close()");
ret = -1;
}
if ((index->flags & MAIL_INDEX_OPEN_FLAG_KEEP_BACKUPS) != 0)
mail_index_create_backup(index);
if (ret == 0 && rename(path, index->filepath) < 0) {
mail_index_set_error(index, "rename(%s, %s) failed: %m",
path, index->filepath);
ret = -1;
}
if (ret < 0) {
if (unlink(path) < 0) {
mail_index_set_error(index, "unlink(%s) failed: %m",
path);
}
}
return ret;
}
static int mail_index_write_map_over(struct mail_index *index)
{
struct mail_index_map *map = index->map;
struct mail_index_record_map *rec_map = map->rec_map;
unsigned int base_size;
if (MAIL_INDEX_IS_IN_MEMORY(index))
return 0;
/* write extended headers */
if (map->write_ext_header) {
base_size = map->hdr.base_header_size;
if (pwrite_full(index->fd,
CONST_PTR_OFFSET(map->hdr_base, base_size),
map->hdr.header_size - base_size,
base_size) < 0)
return -1;
}
/* write records. */
if (rec_map->write_seq_first != 0) {
size_t rec_offset =
(rec_map->write_seq_first-1) * map->hdr.record_size;
size_t recs_size = map->hdr.record_size *
(rec_map->write_seq_last -
rec_map->write_seq_first + 1);
if (pwrite_full(index->fd,
CONST_PTR_OFFSET(rec_map->records, rec_offset),
recs_size,
map->hdr.header_size + rec_offset) < 0)
return -1;
}
/* Write base header last. If we happen to crash in above pwrites, it
doesn't matter because we haven't yet written log file offsets, so
all the changes will be re-applied and the header/data state will
stay valid.
The base header changes practically always, so
map->write_base_header might not be TRUE here in all situations.
It's used only to figure out if we want to write the map at all. */
base_size = I_MIN(map->hdr.base_header_size, sizeof(map->hdr));
if (pwrite_full(index->fd, &map->hdr, base_size, 0) < 0)
return -1;
return 0;
}
static bool mail_index_has_last_changed(struct mail_index *index)
{
struct mail_index_header hdr;
int ret;
if ((ret = pread_full(index->fd, &hdr, sizeof(hdr), 0)) <= 0) {
if (ret < 0 && errno != ESTALE)
mail_index_set_syscall_error(index, "pread_full()");
return TRUE;
}
return hdr.log_file_head_offset !=
index->last_read_log_file_head_offset ||
hdr.log_file_seq != index->last_read_log_file_seq;
}
#define mail_index_map_has_changed(map) \
((map)->write_base_header || (map)->write_ext_header || \
(map)->rec_map->write_seq_first != 0)
void mail_index_write(struct mail_index *index, bool want_rotate)
{
struct mail_index_map *map = index->map;
const struct mail_index_header *hdr = &map->hdr;
struct stat st;
unsigned int lock_id;
int ret;
i_assert(index->log_sync_locked);
if (!mail_index_map_has_changed(map) || index->readonly)
return;
if (hdr->base_header_size < sizeof(*hdr)) {
/* header size growed. we can't update this file anymore. */
map->write_atomic = TRUE;
}
if (index->fd == -1 || index->last_read_log_file_seq == 0) {
/* index file doesn't exist, it's corrupted or we haven't
opened it for some reason */
map->write_atomic = TRUE;
}
if (index->last_read_stat.st_size < MAIL_INDEX_MIN_UPDATE_SIZE ||
(map->rec_map->write_seq_last - map->rec_map->write_seq_first + 1) +
MAIL_INDEX_MAX_OVERWRITE_NEG_SEQ_COUNT >=
map->rec_map->records_count) {
/* the file is so small that we don't even bother trying to
update it / changes are so large we might as well recreate */
map->write_atomic = TRUE;
}
if (!map->write_atomic) {
/* we can't update the file unless it's the same as it was
when we last read it. this is the first quick check before
locking. */
if (stat(index->filepath, &st) < 0) {
if (errno != ENOENT)
mail_index_set_syscall_error(index, "stat()");
map->write_atomic = TRUE;
} else if (st.st_ino != index->last_read_stat.st_ino ||
!CMP_ST_CTIME(&st, &index->last_read_stat))
map->write_atomic = TRUE;
}
if (!map->write_atomic) {
if (mail_index_try_lock_exclusive(index, &lock_id) <= 0) {
/* locking failed, recreate */
map->write_atomic = TRUE;
} else if (mail_index_has_last_changed(index)) {
/* changed, we can't trust updating it anymore */
map->write_atomic = TRUE;
mail_index_unlock(index, &lock_id);
}
}
if (map->write_atomic) {
if (!MAIL_INDEX_IS_IN_MEMORY(index)) {
if (mail_index_recreate(index) < 0) {
mail_index_move_to_memory(index);
return;
}
}
} else {
ret = mail_index_write_map_over(index);
if (ret < 0)
mail_index_set_syscall_error(index, "pwrite_full()");
else if (index->fsync_mode == FSYNC_MODE_ALWAYS) {
ret = fdatasync(index->fd);
if (ret < 0) {
mail_index_set_syscall_error(index,
"fdatasync()");
}
}
mail_index_unlock(index, &lock_id);
if (ret < 0) {
/* hopefully didn't break badly */
mail_index_move_to_memory(index);
return;
}
}
index->last_read_log_file_seq = hdr->log_file_seq;
index->last_read_log_file_head_offset = hdr->log_file_head_offset;
index->last_read_log_file_tail_offset = hdr->log_file_tail_offset;
map->rec_map->write_seq_first = map->rec_map->write_seq_last = 0;
map->write_atomic = FALSE;
map->write_base_header = FALSE;
map->write_ext_header = FALSE;
if (want_rotate &&
hdr->log_file_seq == index->log->head->hdr.file_seq &&
hdr->log_file_tail_offset == hdr->log_file_head_offset)
(void)mail_transaction_log_rotate(index->log, FALSE);
}