mail-transaction-log-file.c revision b397665e90fa0fc7c6a9156fdd6cf28b571e8e39
89a126810703c666309310d0f3189e9834d70b5bTimo Sirainen/* Copyright (c) 2003-2009 Dovecot authors, see the included COPYING file */
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "lib.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "ioloop.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "buffer.h"
65988f5a8abed57e9894fec77105941e046d3490Timo Sirainen#include "file-dotlock.h"
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen#include "nfs-workarounds.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "read-full.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "write-full.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "mmap-util.h"
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen#include "mail-index-private.h"
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen#include "mail-index-modseq.h"
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen#include "mail-transaction-log-private.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen#define LOG_PREFETCH 1024
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen#define MEMORY_LOG_NAME "(in-memory transaction log file)"
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainenvoid
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainenmail_transaction_log_file_set_corrupted(struct mail_transaction_log_file *file,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen const char *fmt, ...)
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen{
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen va_list va;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen file->corrupted = TRUE;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen file->hdr.indexid = 0;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (!MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen /* indexid=0 marks the log file as corrupted */
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (pwrite_full(file->fd, &file->hdr.indexid,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen sizeof(file->hdr.indexid),
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen offsetof(struct mail_transaction_log_header,
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen indexid)) < 0) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen mail_index_file_set_syscall_error(file->log->index,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen file->filepath, "pwrite()");
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen va_start(va, fmt);
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen T_BEGIN {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen mail_index_set_error(file->log->index,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen "Corrupted transaction log file %s seq %u: %s "
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen "(sync_offset=%"PRIuUOFF_T")",
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen file->filepath, file->hdr.file_seq,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen t_strdup_vprintf(fmt, va), file->sync_offset);
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen } T_END;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen va_end(va);
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen}
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainenstruct mail_transaction_log_file *
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainenmail_transaction_log_file_alloc(struct mail_transaction_log *log,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen const char *path)
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen{
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen struct mail_transaction_log_file *file;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen file = i_new(struct mail_transaction_log_file, 1);
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen file->log = log;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen file->filepath = i_strdup(path);
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen file->fd = -1;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen return file;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen}
7c424aa51c956c628e3512055841aa2f9eef4833Timo Sirainen
f923659c0e5298263d80622c99f4dc4132b4675bTimo Sirainenvoid mail_transaction_log_file_free(struct mail_transaction_log_file **_file)
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen{
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen struct mail_transaction_log_file *file = *_file;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen struct mail_transaction_log_file **p;
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen int old_errno = errno;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen *_file = NULL;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen mail_transaction_log_file_unlock(file);
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen for (p = &file->log->files; *p != NULL; p = &(*p)->next) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (*p == file) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen *p = file->next;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen break;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (file == file->log->head)
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen file->log->head = NULL;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (file->buffer != NULL)
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen buffer_free(&file->buffer);
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen if (file->mmap_base != NULL) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (munmap(file->mmap_base, file->mmap_size) < 0) {
fab050cbfdf3da692441d2e2fb4b2a4c6ac9e0daTimo Sirainen mail_index_file_set_syscall_error(file->log->index,
fab050cbfdf3da692441d2e2fb4b2a4c6ac9e0daTimo Sirainen file->filepath,
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen "munmap()");
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (file->fd != -1) {
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen if (close(file->fd) < 0) {
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen mail_index_file_set_syscall_error(file->log->index,
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen file->filepath,
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen "close()");
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen }
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen }
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen i_free(file->filepath);
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen i_free(file);
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen errno = old_errno;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen}
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainenstatic void
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainenmail_transaction_log_file_skip_to_head(struct mail_transaction_log_file *file)
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen{
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen struct mail_transaction_log *log = file->log;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen struct mail_index_map *map = log->index->map;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen const struct mail_index_modseq_header *modseq_hdr;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen uoff_t head_offset;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen if (map == NULL || file->hdr.file_seq != map->hdr.log_file_seq ||
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen map->hdr.log_file_head_offset == 0)
d65a556a5ec078cd7f1d0060adb16fc860d66b27Timo Sirainen return;
d65a556a5ec078cd7f1d0060adb16fc860d66b27Timo Sirainen
d65a556a5ec078cd7f1d0060adb16fc860d66b27Timo Sirainen /* we can get a valid log offset from index file. initialize
e4ded29bff0662a590c2439ef2df8cda8a7cdd9bTimo Sirainen sync_offset from it so we don't have to read the whole log
e4ded29bff0662a590c2439ef2df8cda8a7cdd9bTimo Sirainen file from beginning. */
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen head_offset = map->hdr.log_file_head_offset;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen modseq_hdr = mail_index_map_get_modseq_header(map);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen if (head_offset < file->hdr.hdr_size) {
bc2d4f1c18222a3bd2a6b2b8b5f6abb560a865b3Timo Sirainen mail_index_set_error(log->index,
bc2d4f1c18222a3bd2a6b2b8b5f6abb560a865b3Timo Sirainen "%s: log_file_head_offset too small",
345253fb28498b2e0a60f4a2a8644c65feee7e75Timo Sirainen log->index->filepath);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->sync_offset = file->hdr.hdr_size;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->sync_highest_modseq = file->hdr.initial_modseq;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen } else if (modseq_hdr == NULL && file->hdr.initial_modseq == 0) {
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen /* modseqs not used yet */
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->sync_offset = head_offset;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->sync_highest_modseq = 0;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen } else if (modseq_hdr == NULL ||
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen modseq_hdr->log_seq != file->hdr.file_seq) {
a4ee24a4d5eefa80bbefc5acba16587ae36c3b5bTimo Sirainen /* highest_modseq not synced, start from beginning */
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->sync_offset = file->hdr.hdr_size;
bc2d4f1c18222a3bd2a6b2b8b5f6abb560a865b3Timo Sirainen file->sync_highest_modseq = file->hdr.initial_modseq;
a93de780c3b78cfaace287026e468f3c3e34683aTimo Sirainen } else if (modseq_hdr->log_offset > head_offset) {
bc2d4f1c18222a3bd2a6b2b8b5f6abb560a865b3Timo Sirainen mail_index_set_error(log->index,
bc2d4f1c18222a3bd2a6b2b8b5f6abb560a865b3Timo Sirainen "%s: modseq_hdr.log_offset too large",
bc2d4f1c18222a3bd2a6b2b8b5f6abb560a865b3Timo Sirainen log->index->filepath);
bc2d4f1c18222a3bd2a6b2b8b5f6abb560a865b3Timo Sirainen file->sync_offset = file->hdr.hdr_size;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->sync_highest_modseq = file->hdr.initial_modseq;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen } else {
345253fb28498b2e0a60f4a2a8644c65feee7e75Timo Sirainen /* start from where we last stopped tracking modseqs */
65988f5a8abed57e9894fec77105941e046d3490Timo Sirainen file->sync_offset = modseq_hdr->log_offset;
65988f5a8abed57e9894fec77105941e046d3490Timo Sirainen file->sync_highest_modseq = modseq_hdr->highest_modseq;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen }
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->saved_tail_offset = log->index->map->hdr.log_file_tail_offset;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->saved_tail_sync_offset = file->saved_tail_offset;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen if (file->saved_tail_offset > file->max_tail_offset)
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->max_tail_offset = file->saved_tail_offset;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen}
a4ee24a4d5eefa80bbefc5acba16587ae36c3b5bTimo Sirainen
a4ee24a4d5eefa80bbefc5acba16587ae36c3b5bTimo Sirainenstatic void
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainenmail_transaction_log_file_add_to_list(struct mail_transaction_log_file *file)
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen{
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen struct mail_transaction_log_file **p;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->sync_offset = file->hdr.hdr_size;
055f4599bba1874fa1148a8fa488517fa077619cTimo Sirainen file->sync_highest_modseq = file->hdr.initial_modseq;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen mail_transaction_log_file_skip_to_head(file);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen /* insert it to correct position */
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen for (p = &file->log->files; *p != NULL; p = &(*p)->next) {
bc2d4f1c18222a3bd2a6b2b8b5f6abb560a865b3Timo Sirainen if ((*p)->hdr.file_seq > file->hdr.file_seq)
345253fb28498b2e0a60f4a2a8644c65feee7e75Timo Sirainen break;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen i_assert((*p)->hdr.file_seq < file->hdr.file_seq);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen }
a93de780c3b78cfaace287026e468f3c3e34683aTimo Sirainen
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen file->next = *p;
bc2d4f1c18222a3bd2a6b2b8b5f6abb560a865b3Timo Sirainen *p = file;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen}
345253fb28498b2e0a60f4a2a8644c65feee7e75Timo Sirainen
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainenstatic int
a93de780c3b78cfaace287026e468f3c3e34683aTimo Sirainenmail_transaction_log_init_hdr(struct mail_transaction_log *log,
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen struct mail_transaction_log_header *hdr)
{
struct mail_index *index = log->index;
memset(hdr, 0, sizeof(*hdr));
hdr->major_version = MAIL_TRANSACTION_LOG_MAJOR_VERSION;
hdr->minor_version = MAIL_TRANSACTION_LOG_MINOR_VERSION;
hdr->hdr_size = sizeof(struct mail_transaction_log_header);
hdr->indexid = log->index->indexid;
hdr->create_stamp = ioloop_time;
#ifndef WORDS_BIGENDIAN
hdr->compat_flags |= MAIL_INDEX_COMPAT_LITTLE_ENDIAN;
#endif
if (index->fd != -1) {
/* not creating index - make sure we have latest header */
if (!index->mapping) {
if (mail_index_map(index,
MAIL_INDEX_SYNC_HANDLER_HEAD) <= 0)
return -1;
} else {
/* if we got here from mapping, the .log file is
corrupted. use whatever values we got from index
file */
}
}
if (index->map != NULL) {
hdr->prev_file_seq = index->map->hdr.log_file_seq;
hdr->prev_file_offset = index->map->hdr.log_file_head_offset;
hdr->file_seq = index->map->hdr.log_file_seq + 1;
hdr->initial_modseq =
mail_index_map_modseq_get_highest(index->map);
} else {
hdr->file_seq = 1;
}
if (log->head != NULL) {
if (hdr->file_seq <= log->head->hdr.file_seq) {
/* make sure the sequence grows */
hdr->file_seq = log->head->hdr.file_seq+1;
}
if (hdr->initial_modseq < log->head->sync_highest_modseq) {
/* this should be always up-to-date */
hdr->initial_modseq = log->head->sync_highest_modseq;
}
}
return 0;
}
struct mail_transaction_log_file *
mail_transaction_log_file_alloc_in_memory(struct mail_transaction_log *log)
{
struct mail_transaction_log_file *file;
file = mail_transaction_log_file_alloc(log, MEMORY_LOG_NAME);
if (mail_transaction_log_init_hdr(log, &file->hdr) < 0) {
i_free(file);
return NULL;
}
file->buffer = buffer_create_dynamic(default_pool, 4096);
file->buffer_offset = sizeof(file->hdr);
mail_transaction_log_file_add_to_list(file);
return file;
}
static int
mail_transaction_log_file_dotlock(struct mail_transaction_log_file *file)
{
int ret;
if (file->log->dotlock_count > 0)
ret = 1;
else {
ret = file_dotlock_create(&file->log->dotlock_settings,
file->filepath, 0,
&file->log->dotlock);
}
if (ret > 0) {
file->log->dotlock_count++;
file->locked = TRUE;
return 0;
}
if (ret < 0) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath,
"file_dotlock_create()");
return -1;
}
mail_index_set_error(file->log->index,
"Timeout while waiting for "
"dotlock for transaction log file %s",
file->filepath);
file->log->index->index_lock_timeout = TRUE;
return -1;
}
static int
mail_transaction_log_file_undotlock(struct mail_transaction_log_file *file)
{
int ret;
if (--file->log->dotlock_count > 0)
return 0;
ret = file_dotlock_delete(&file->log->dotlock);
if (ret < 0) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath, "file_dotlock_delete()");
return -1;
}
if (ret == 0) {
mail_index_set_error(file->log->index,
"Dotlock was lost for transaction log file %s",
file->filepath);
return -1;
}
return 0;
}
int mail_transaction_log_file_lock(struct mail_transaction_log_file *file)
{
int ret;
if (file->locked)
return 0;
if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) {
file->locked = TRUE;
return 0;
}
if (file->log->index->lock_method == FILE_LOCK_METHOD_DOTLOCK)
return mail_transaction_log_file_dotlock(file);
i_assert(file->file_lock == NULL);
ret = mail_index_lock_fd(file->log->index, file->filepath, file->fd,
F_WRLCK, MAIL_TRANSCATION_LOG_LOCK_TIMEOUT,
&file->file_lock);
if (ret > 0) {
file->locked = TRUE;
return 0;
}
if (ret < 0) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath,
"mail_index_wait_lock_fd()");
return -1;
}
mail_index_set_error(file->log->index,
"Timeout while waiting for lock for transaction log file %s",
file->filepath);
file->log->index->index_lock_timeout = TRUE;
return -1;
}
void mail_transaction_log_file_unlock(struct mail_transaction_log_file *file)
{
if (!file->locked)
return;
file->locked = FALSE;
if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file))
return;
if (file->log->index->lock_method == FILE_LOCK_METHOD_DOTLOCK) {
mail_transaction_log_file_undotlock(file);
return;
}
file_unlock(&file->file_lock);
}
static ssize_t
mail_transaction_log_file_read_header(struct mail_transaction_log_file *file)
{
ssize_t pos;
int ret;
memset(&file->hdr, 0, sizeof(file->hdr));
/* try to read the whole header, but it's not necessarily an error to
read less since the older versions of the log format could be
smaller. */
pos = 0;
do {
ret = pread(file->fd, PTR_OFFSET(&file->hdr, pos),
sizeof(file->hdr) - pos, pos);
if (ret > 0)
pos += ret;
} while (ret > 0 && pos < (ssize_t)sizeof(file->hdr));
return ret < 0 ? -1 : pos;
}
static int
mail_transaction_log_file_read_hdr(struct mail_transaction_log_file *file,
bool ignore_estale)
{
struct mail_transaction_log_file *f;
int ret;
i_assert(!MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file));
if (file->corrupted)
return 0;
ret = mail_transaction_log_file_read_header(file);
if (ret < 0) {
if (errno != ESTALE || !ignore_estale) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath,
"pread()");
}
return -1;
}
if (file->hdr.major_version != MAIL_TRANSACTION_LOG_MAJOR_VERSION) {
/* incompatible version - fix silently */
return 0;
}
if (ret < MAIL_TRANSACTION_LOG_HEADER_MIN_SIZE) {
mail_transaction_log_file_set_corrupted(file,
"unexpected end of file while reading header");
return 0;
}
if (file->hdr.minor_version >= 2 || file->hdr.major_version > 1) {
/* we have compatibility flags */
enum mail_index_header_compat_flags compat_flags = 0;
#ifndef WORDS_BIGENDIAN
compat_flags |= MAIL_INDEX_COMPAT_LITTLE_ENDIAN;
#endif
if (file->hdr.compat_flags != compat_flags) {
/* architecture change */
mail_index_set_error(file->log->index,
"Rebuilding index file %s: "
"CPU architecture changed",
file->log->index->filepath);
return 0;
}
}
if (file->hdr.hdr_size < MAIL_TRANSACTION_LOG_HEADER_MIN_SIZE) {
mail_transaction_log_file_set_corrupted(file,
"Header size too small");
return 0;
}
if (file->hdr.hdr_size < sizeof(file->hdr)) {
/* @UNSAFE: smaller than we expected - zero out the fields we
shouldn't have filled */
memset(PTR_OFFSET(&file->hdr, file->hdr.hdr_size), 0,
sizeof(file->hdr) - file->hdr.hdr_size);
}
if (file->hdr.indexid == 0) {
/* corrupted */
file->corrupted = TRUE;
mail_index_set_error(file->log->index,
"Transaction log file %s: marked corrupted",
file->filepath);
return 0;
}
if (file->hdr.indexid != file->log->index->indexid) {
if (file->log->index->indexid != 0 &&
!file->log->index->initial_create) {
/* index file was probably just rebuilt and we don't
know about it yet */
mail_transaction_log_file_set_corrupted(file,
"indexid changed %u -> %u",
file->log->index->indexid, file->hdr.indexid);
return 0;
}
/* creating index file. since transaction log is created
first, use the indexid in it to create the main index
to avoid races. */
file->log->index->indexid = file->hdr.indexid;
}
/* make sure we already don't have a file with the same sequence
opened. it shouldn't happen unless the old log file was
corrupted. */
for (f = file->log->files; f != NULL; f = f->next) {
if (f->hdr.file_seq == file->hdr.file_seq) {
/* mark the old file corrupted. we can't safely remove
it from the list however, so return failure. */
mail_transaction_log_file_set_corrupted(f,
"duplicate transaction log sequence (%u)",
f->hdr.file_seq);
return 0;
}
}
file->sync_highest_modseq = file->hdr.initial_modseq;
return 1;
}
static int
mail_transaction_log_file_stat(struct mail_transaction_log_file *file,
bool ignore_estale)
{
struct stat st;
if (fstat(file->fd, &st) < 0) {
if (errno != ESTALE || !ignore_estale) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath, "fstat()");
}
return -1;
}
file->st_dev = st.st_dev;
file->st_ino = st.st_ino;
file->last_mtime = st.st_mtime;
file->last_size = st.st_size;
return 0;
}
static bool
mail_transaction_log_file_is_dupe(struct mail_transaction_log_file *file)
{
struct mail_transaction_log_file *tmp;
for (tmp = file->log->files; tmp != NULL; tmp = tmp->next) {
if (tmp->st_ino == file->st_ino &&
CMP_DEV_T(tmp->st_dev, file->st_dev))
return TRUE;
}
return FALSE;
}
static int
mail_transaction_log_file_create2(struct mail_transaction_log_file *file,
int new_fd, bool reset,
struct dotlock **dotlock)
{
struct mail_index *index = file->log->index;
struct stat st;
const char *path2;
int fd, ret;
bool rename_existing;
if (index->nfs_flush) {
/* although we check also mtime and file size below, it's done
only to fix broken log files. we don't bother flushing
attribute cache just for that. */
nfs_flush_file_handle_cache(file->filepath);
}
/* log creation is locked now - see if someone already created it.
note that if we're rotating, we need to keep the log locked until
the file has been rewritten. and because fcntl() locks are stupid,
if we go and open()+close() the file and we had it already opened,
its locks are lost. so we use stat() to check if the file has been
recreated, although it almost never is. */
if (reset)
rename_existing = FALSE;
else if (nfs_safe_stat(file->filepath, &st) < 0) {
if (errno != ENOENT) {
mail_index_file_set_syscall_error(index, file->filepath,
"stat()");
return -1;
}
rename_existing = FALSE;
} else if (st.st_ino == file->st_ino &&
CMP_DEV_T(st.st_dev, file->st_dev) &&
/* inode/dev checks are enough when we're rotating the file,
but not when we're replacing a broken log file */
st.st_mtime == file->last_mtime &&
(uoff_t)st.st_size == file->last_size) {
/* no-one else recreated the file */
rename_existing = TRUE;
} else {
/* recreated. use the file if its header is ok */
fd = nfs_safe_open(file->filepath, O_RDWR);
if (fd == -1) {
if (errno != ENOENT) {
mail_index_file_set_syscall_error(index,
file->filepath, "open()");
return -1;
}
} else {
file->fd = fd;
if (mail_transaction_log_file_read_hdr(file,
FALSE) > 0 &&
mail_transaction_log_file_stat(file, FALSE) == 0) {
/* yes, it was ok */
(void)file_dotlock_delete(dotlock);
mail_transaction_log_file_add_to_list(file);
return 0;
}
file->fd = -1;
if (close(fd) < 0) {
mail_index_file_set_syscall_error(index,
file->filepath, "close()");
}
}
rename_existing = FALSE;
}
if (mail_transaction_log_init_hdr(file->log, &file->hdr) < 0)
return -1;
if (reset) {
file->hdr.prev_file_seq = 0;
file->hdr.prev_file_offset = 0;
file->hdr.initial_modseq = 0;
}
if (write_full(new_fd, &file->hdr, sizeof(file->hdr)) < 0) {
mail_index_file_set_syscall_error(index, file->filepath,
"write_full()");
return -1;
}
if (index->nfs_flush) {
/* the header isn't important, so don't bother calling
fdatasync() unless NFS is used */
if (fdatasync(new_fd) < 0) {
mail_index_file_set_syscall_error(index, file->filepath,
"fdatasync()");
return -1;
}
}
file->fd = new_fd;
ret = mail_transaction_log_file_stat(file, FALSE);
if (file->log->head != NULL && file->log->head->locked) {
/* we'll need to preserve the lock */
if (mail_transaction_log_file_lock(file) < 0)
ret = -1;
}
/* if we return -1 the dotlock deletion code closes the fd */
file->fd = -1;
if (ret < 0)
return -1;
/* keep two log files */
if (rename_existing) {
/* rename() would be nice and easy way to do this, except then
there's a race condition between the rename and
file_dotlock_replace(). during that time the log file
doesn't exist, which could cause problems. */
path2 = t_strconcat(file->filepath, ".2", NULL);
if (unlink(path2) < 0 && errno != ENOENT) {
mail_index_set_error(index, "unlink(%s) failed: %m",
path2);
/* try to link() anyway */
}
if (nfs_safe_link(file->filepath, path2, FALSE) < 0 &&
errno != ENOENT && errno != EEXIST) {
mail_index_set_error(index, "link(%s, %s) failed: %m",
file->filepath, path2);
/* ignore the error. we don't care that much about the
second log file and we're going to overwrite this
first one. */
}
}
if (file_dotlock_replace(dotlock,
DOTLOCK_REPLACE_FLAG_DONT_CLOSE_FD) <= 0)
return -1;
/* success */
file->fd = new_fd;
mail_transaction_log_file_add_to_list(file);
return 0;
}
int mail_transaction_log_file_create(struct mail_transaction_log_file *file,
bool reset)
{
struct mail_index *index = file->log->index;
struct dotlock *dotlock;
mode_t old_mask;
int fd;
i_assert(!MAIL_INDEX_IS_IN_MEMORY(index));
/* With dotlocking we might already have path.lock created, so this
filename has to be different. */
old_mask = umask(index->mode ^ 0666);
fd = file_dotlock_open(&file->log->new_dotlock_settings,
file->filepath, 0, &dotlock);
umask(old_mask);
if (fd == -1) {
mail_index_file_set_syscall_error(index, file->filepath,
"file_dotlock_open()");
return -1;
}
if (index->gid != (gid_t)-1 &&
fchown(fd, (uid_t)-1, index->gid) < 0) {
mail_index_file_set_syscall_error(index, file->filepath,
"fchown()");
(void)file_dotlock_delete(&dotlock);
return -1;
}
/* either fd gets used or the dotlock gets deleted and returned fd
is for the existing file */
if (mail_transaction_log_file_create2(file, fd, reset, &dotlock) < 0) {
if (dotlock != NULL)
(void)file_dotlock_delete(&dotlock);
return -1;
}
return 0;
}
int mail_transaction_log_file_open(struct mail_transaction_log_file *file,
bool check_existing)
{
unsigned int i;
bool ignore_estale;
int ret;
for (i = 0;; i++) {
file->fd = nfs_safe_open(file->filepath, O_RDWR);
if (file->fd == -1) {
if (errno == ENOENT)
return 0;
mail_index_file_set_syscall_error(file->log->index,
file->filepath, "open()");
return -1;
}
ignore_estale = i < MAIL_INDEX_ESTALE_RETRY_COUNT;
if (mail_transaction_log_file_stat(file, ignore_estale) < 0)
ret = -1;
else if (check_existing &&
mail_transaction_log_file_is_dupe(file))
return 0;
else {
ret = mail_transaction_log_file_read_hdr(file,
ignore_estale);
}
if (ret > 0) {
/* success */
break;
}
if (ret == 0) {
/* corrupted */
if (unlink(file->filepath) < 0 && errno != ENOENT) {
mail_index_set_error(file->log->index,
"unlink(%s) failed: %m",
file->filepath);
}
return 0;
}
if (errno != ESTALE ||
i == MAIL_INDEX_ESTALE_RETRY_COUNT) {
/* syscall error */
return -1;
}
/* ESTALE - try again */
}
mail_transaction_log_file_add_to_list(file);
return 1;
}
static int
log_file_track_mailbox_sync_offset_hdr(struct mail_transaction_log_file *file,
const void *data, unsigned int size)
{
const struct mail_transaction_header_update *u = data;
const struct mail_index_header *ihdr;
const unsigned int offset_pos =
offsetof(struct mail_index_header, log_file_tail_offset);
const unsigned int offset_size = sizeof(ihdr->log_file_tail_offset);
uint32_t sync_offset;
i_assert(offset_size == sizeof(sync_offset));
if (size < sizeof(*u) || size < sizeof(*u) + u->size) {
mail_transaction_log_file_set_corrupted(file,
"header update extends beyond record size");
return -1;
}
if (u->offset <= offset_pos &&
u->offset + u->size >= offset_pos + offset_size) {
memcpy(&sync_offset,
CONST_PTR_OFFSET(u + 1, offset_pos - u->offset),
sizeof(sync_offset));
if (sync_offset < file->saved_tail_offset) {
if (file->sync_offset < file->saved_tail_sync_offset) {
/* saved_tail_offset was already set in header,
but we still had to resync the file to find
modseqs. ignore this record. */
return 1;
}
mail_transaction_log_file_set_corrupted(file,
"log_file_tail_offset update shrank it "
"(%u vs %"PRIuUOFF_T", file_seq=%u)",
sync_offset, file->saved_tail_offset,
file->hdr.file_seq);
return -1;
}
file->saved_tail_offset = sync_offset;
if (sync_offset > file->max_tail_offset)
file->max_tail_offset = sync_offset;
return 1;
}
return 0;
}
bool
mail_transaction_header_has_modseq(const struct mail_transaction_header *hdr,
const void *data,
uint64_t cur_modseq)
{
if (cur_modseq != 0) {
/* tracking modseqs */
} else if ((hdr->type & MAIL_TRANSACTION_TYPE_MASK) ==
MAIL_TRANSACTION_EXT_INTRO) {
/* modseqs not tracked yet. see if this is a modseq
extension introduction. */
const struct mail_transaction_ext_intro *intro = data;
const unsigned int modseq_ext_len =
strlen(MAIL_INDEX_MODSEQ_EXT_NAME);
if (intro->name_size == modseq_ext_len &&
memcmp(intro + 1, MAIL_INDEX_MODSEQ_EXT_NAME,
modseq_ext_len) == 0) {
/* modseq tracking started */
return TRUE;
}
} else {
/* not tracking modseqs */
return FALSE;
}
switch (hdr->type & MAIL_TRANSACTION_TYPE_MASK) {
case MAIL_TRANSACTION_EXPUNGE | MAIL_TRANSACTION_EXPUNGE_PROT:
if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) == 0) {
/* ignore expunge requests */
break;
}
case MAIL_TRANSACTION_APPEND:
case MAIL_TRANSACTION_FLAG_UPDATE:
case MAIL_TRANSACTION_KEYWORD_UPDATE:
case MAIL_TRANSACTION_KEYWORD_RESET:
/* these changes increase modseq */
return TRUE;
}
return FALSE;
}
static struct modseq_cache *
modseq_cache_hit(struct mail_transaction_log_file *file, unsigned int idx)
{
struct modseq_cache cache;
if (idx > 0) {
/* @UNSAFE: move it to top */
cache = file->modseq_cache[idx];
memmove(file->modseq_cache + 1, file->modseq_cache,
sizeof(*file->modseq_cache) * idx);
file->modseq_cache[0] = cache;
}
return &file->modseq_cache[0];
}
static struct modseq_cache *
modseq_cache_get_offset(struct mail_transaction_log_file *file, uoff_t offset)
{
unsigned int i, best = -1U;
for (i = 0; i < N_ELEMENTS(file->modseq_cache); i++) {
if (offset < file->modseq_cache[i].offset)
continue;
if (file->modseq_cache[i].offset == 0)
return NULL;
if (offset == file->modseq_cache[i].offset) {
/* exact cache hit */
return modseq_cache_hit(file, i);
}
if (best == -1U ||
file->modseq_cache[i].offset <
file->modseq_cache[best].offset)
best = i;
}
if (best == -1U)
return NULL;
return &file->modseq_cache[best];
}
static struct modseq_cache *
modseq_cache_get_modseq(struct mail_transaction_log_file *file, uint64_t modseq)
{
unsigned int i, best = -1U;
for (i = 0; i < N_ELEMENTS(file->modseq_cache); i++) {
if (modseq < file->modseq_cache[i].highest_modseq)
continue;
if (file->modseq_cache[i].offset == 0)
return NULL;
if (modseq == file->modseq_cache[i].highest_modseq) {
/* exact cache hit */
return modseq_cache_hit(file, i);
}
if (best == -1U ||
file->modseq_cache[i].highest_modseq <
file->modseq_cache[best].highest_modseq)
best = i;
}
if (best == -1U)
return NULL;
return &file->modseq_cache[best];
}
static int
log_get_synced_record(struct mail_transaction_log_file *file, uoff_t *offset,
const struct mail_transaction_header **hdr_r)
{
const struct mail_transaction_header *hdr;
uint32_t trans_size;
hdr = CONST_PTR_OFFSET(file->buffer->data,
*offset - file->buffer_offset);
/* we've already synced this record at some point. it should
be valid. */
trans_size = mail_index_offset_to_uint32(hdr->size);
if (trans_size < sizeof(*hdr) ||
*offset - file->buffer_offset + trans_size > file->buffer->used) {
mail_transaction_log_file_set_corrupted(file,
"Transaction log corrupted unexpectedly at "
"%"PRIuUOFF_T": Invalid size %u (type=%x)",
*offset, trans_size, hdr->type);
return -1;
}
*offset += trans_size;
*hdr_r = hdr;
return 0;
}
int mail_transaction_log_file_get_highest_modseq_at(
struct mail_transaction_log_file *file,
uoff_t offset, uint64_t *highest_modseq_r)
{
const struct mail_transaction_header *hdr;
struct modseq_cache *cache;
uoff_t cur_offset;
uint64_t cur_modseq;
int ret;
i_assert(offset <= file->sync_offset);
if (offset == file->sync_offset) {
*highest_modseq_r = file->sync_highest_modseq;
return 0;
}
cache = modseq_cache_get_offset(file, offset);
if (cache == NULL) {
/* nothing usable in cache - scan from beginning */
cur_offset = file->hdr.hdr_size;
cur_modseq = file->hdr.initial_modseq;
} else if (cache->offset == offset) {
/* exact cache hit */
*highest_modseq_r = cache->highest_modseq;
return 0;
} else {
/* use cache to skip over some records */
cur_offset = cache->offset;
cur_modseq = cache->highest_modseq;
}
ret = mail_transaction_log_file_map(file, cur_offset, offset);
if (ret <= 0) {
if (ret < 0)
return -1;
mail_index_set_error(file->log->index,
"%s: Transaction log corrupted, can't get modseq",
file->filepath);
return -1;
}
i_assert(cur_offset >= file->buffer_offset);
i_assert(cur_offset + file->buffer->used >= offset);
while (cur_offset < offset) {
if (log_get_synced_record(file, &cur_offset, &hdr) < 0)
return- 1;
if (mail_transaction_header_has_modseq(hdr, hdr + 1,
cur_modseq))
cur_modseq++;
}
/* @UNSAFE: cache the value */
memmove(file->modseq_cache + 1, file->modseq_cache,
sizeof(*file->modseq_cache) *
(N_ELEMENTS(file->modseq_cache) - 1));
file->modseq_cache[0].offset = cur_offset;
file->modseq_cache[0].highest_modseq = cur_modseq;
*highest_modseq_r = cur_modseq;
return 0;
}
int mail_transaction_log_file_get_modseq_next_offset(
struct mail_transaction_log_file *file,
uint64_t modseq, uoff_t *next_offset_r)
{
const struct mail_transaction_header *hdr;
struct modseq_cache *cache;
uoff_t cur_offset, prev_offset;
uint64_t cur_modseq;
int ret;
if (modseq >= file->sync_highest_modseq) {
*next_offset_r = file->sync_offset;
return 0;
}
cache = modseq_cache_get_modseq(file, modseq);
if (cache == NULL) {
/* nothing usable in cache - scan from beginning */
cur_offset = file->hdr.hdr_size;
cur_modseq = file->hdr.initial_modseq;
} else if (cache->highest_modseq == modseq) {
/* exact cache hit */
*next_offset_r = cache->offset;
return 0;
} else {
/* use cache to skip over some records */
cur_offset = cache->offset;
cur_modseq = cache->highest_modseq;
}
ret = mail_transaction_log_file_map(file, cur_offset,
file->sync_offset);
if (ret <= 0) {
if (ret < 0)
return -1;
mail_index_set_error(file->log->index,
"%s: Transaction log corrupted, can't get modseq",
file->filepath);
return -1;
}
i_assert(cur_offset >= file->buffer_offset);
while (cur_offset < file->sync_offset) {
prev_offset = cur_offset;
if (log_get_synced_record(file, &cur_offset, &hdr) < 0)
return -1;
if (mail_transaction_header_has_modseq(hdr, hdr + 1,
cur_modseq)) {
if (++cur_modseq == modseq)
break;
}
}
if (modseq != cur_modseq) {
/* if we got to sync_offset, cur_modseq should be
sync_highest_modseq */
mail_index_set_error(file->log->index,
"%s: Transaction log changed unexpectedly, "
"can't get modseq", file->filepath);
return -1;
}
/* @UNSAFE: cache the value */
memmove(file->modseq_cache + 1, file->modseq_cache,
sizeof(*file->modseq_cache) *
(N_ELEMENTS(file->modseq_cache) - 1));
file->modseq_cache[0].offset = cur_offset;
file->modseq_cache[0].highest_modseq = cur_modseq;
*next_offset_r = cur_offset;
return 0;
}
static int
log_file_track_sync(struct mail_transaction_log_file *file,
const struct mail_transaction_header *hdr,
unsigned int trans_size)
{
const void *data = hdr + 1;
int ret;
if (mail_transaction_header_has_modseq(hdr, hdr + 1,
file->sync_highest_modseq))
file->sync_highest_modseq++;
if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) == 0)
return 0;
/* external transactions: */
if ((hdr->type & MAIL_TRANSACTION_TYPE_MASK) ==
MAIL_TRANSACTION_HEADER_UPDATE) {
/* see if this updates mailbox_sync_offset */
ret = log_file_track_mailbox_sync_offset_hdr(file, data,
trans_size -
sizeof(*hdr));
if (ret != 0)
return ret < 0 ? -1 : 0;
}
if (file->max_tail_offset == file->sync_offset) {
/* external transactions aren't synced to mailbox. we can
update mailbox sync offset to skip this transaction to
avoid re-reading it at the next sync. */
file->max_tail_offset += trans_size;
}
return 0;
}
static int
mail_transaction_log_file_sync(struct mail_transaction_log_file *file)
{
const struct mail_transaction_header *hdr;
const void *data;
struct stat st;
size_t size, avail;
uint32_t trans_size = 0;
i_assert(file->sync_offset >= file->buffer_offset);
data = buffer_get_data(file->buffer, &size);
while (file->sync_offset - file->buffer_offset + sizeof(*hdr) <= size) {
hdr = CONST_PTR_OFFSET(data, file->sync_offset -
file->buffer_offset);
trans_size = mail_index_offset_to_uint32(hdr->size);
if (trans_size == 0) {
/* unfinished */
return 1;
}
if (trans_size < sizeof(*hdr)) {
mail_transaction_log_file_set_corrupted(file,
"hdr.size too small (%u)", trans_size);
return -1;
}
if (file->sync_offset - file->buffer_offset + trans_size > size)
break;
/* transaction has been fully written */
if (log_file_track_sync(file, hdr, trans_size) < 0)
return -1;
file->sync_offset += trans_size;
trans_size = 0;
}
if (file->mmap_base != NULL && !file->locked) {
/* Now that all the mmaped pages have page faulted, check if
the file had changed while doing that. Only after the last
page has faulted, the size returned by fstat() can be
trusted. Otherwise it might point to a page boundary while
the next page is still being written.
Without this check we might see partial transactions,
sometimes causing "Extension record updated without intro
prefix" errors. */
if (fstat(file->fd, &st) < 0) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath,
"fstat()");
return -1;
}
if ((uoff_t)st.st_size != file->last_size) {
file->last_size = st.st_size;
return 0;
}
}
avail = file->sync_offset - file->buffer_offset;
if (avail != size) {
/* There's more data than we could sync at the moment. If the
last record's size wasn't valid, we can't know if it will
be updated unless we've locked the log. */
if (trans_size != 0) {
/* pread()s or the above fstat() check for mmaps should
have guaranteed that this doesn't happen */
mail_transaction_log_file_set_corrupted(file,
"hdr.size too large (%u)", trans_size);
return -1;
} else if (file->locked) {
mail_transaction_log_file_set_corrupted(file,
"Unexpected garbage at EOF");
return -1;
}
/* The size field will be updated soon */
mail_index_flush_read_cache(file->log->index, file->filepath,
file->fd, file->locked);
}
if (file->next != NULL &&
file->hdr.file_seq == file->next->hdr.prev_file_seq &&
file->next->hdr.prev_file_offset != file->sync_offset) {
mail_transaction_log_file_set_corrupted(file,
"Invalid transaction log size "
"(%"PRIuUOFF_T" vs %u): %s", file->sync_offset,
file->log->head->hdr.prev_file_offset, file->filepath);
return -1;
}
return 1;
}
static int
mail_transaction_log_file_insert_read(struct mail_transaction_log_file *file,
uoff_t offset)
{
void *data;
size_t size;
ssize_t ret;
size = file->buffer_offset - offset;
buffer_copy(file->buffer, size, file->buffer, 0, (size_t)-1);
data = buffer_get_space_unsafe(file->buffer, 0, size);
ret = pread_full(file->fd, data, size, offset);
if (ret > 0) {
/* success */
file->buffer_offset -= size;
return 1;
}
/* failure. don't leave ourself to inconsistent state */
buffer_copy(file->buffer, 0, file->buffer, size, (size_t)-1);
buffer_set_used_size(file->buffer, file->buffer->used - size);
if (ret == 0) {
mail_transaction_log_file_set_corrupted(file, "file shrank");
return 0;
} else if (errno == ESTALE) {
/* log file was deleted in NFS server, fail silently */
return 0;
} else {
mail_index_file_set_syscall_error(file->log->index,
file->filepath, "pread()");
return -1;
}
}
static int
mail_transaction_log_file_read_more(struct mail_transaction_log_file *file)
{
void *data;
size_t size;
uint32_t read_offset;
ssize_t ret;
read_offset = file->buffer_offset + buffer_get_used_size(file->buffer);
do {
data = buffer_append_space_unsafe(file->buffer, LOG_PREFETCH);
ret = pread(file->fd, data, LOG_PREFETCH, read_offset);
if (ret > 0)
read_offset += ret;
size = read_offset - file->buffer_offset;
buffer_set_used_size(file->buffer, size);
} while (ret > 0 || (ret < 0 && errno == EINTR));
file->last_size = read_offset;
if (ret < 0) {
if (errno == ESTALE) {
/* log file was deleted in NFS server, fail silently */
return 0;
}
mail_index_file_set_syscall_error(file->log->index,
file->filepath, "pread()");
return -1;
}
return 1;
}
static bool
mail_transaction_log_file_need_nfs_flush(struct mail_transaction_log_file *file)
{
const struct mail_index_header *hdr = &file->log->index->map->hdr;
uoff_t max_offset = file->last_size;
if (file->next != NULL &&
file->hdr.file_seq == file->next->hdr.prev_file_seq &&
file->next->hdr.prev_file_offset != max_offset) {
/* we already have a newer log file which says that we haven't
synced the entire file. */
return TRUE;
}
if (file->hdr.file_seq == hdr->log_file_seq &&
max_offset < hdr->log_file_head_offset)
return TRUE;
return FALSE;
}
static int
mail_transaction_log_file_read(struct mail_transaction_log_file *file,
uoff_t start_offset, bool nfs_flush)
{
int ret;
i_assert(file->mmap_base == NULL);
/* NFS: if file isn't locked, we're optimistic that we can read enough
data without flushing attribute cache. if after reading we notice
that we really should have read more, flush the cache and try again.
if file is locked, the attribute cache was already flushed when
refreshing the log. */
if (file->log->index->nfs_flush && nfs_flush) {
if (!file->locked)
nfs_flush_attr_cache_unlocked(file->filepath);
else {
nfs_flush_attr_cache_fd_locked(file->filepath,
file->fd);
}
}
if (file->buffer != NULL && file->buffer_offset > start_offset) {
/* we have to insert missing data to beginning of buffer */
ret = mail_transaction_log_file_insert_read(file, start_offset);
if (ret <= 0)
return ret;
}
if (file->buffer == NULL) {
file->buffer =
buffer_create_dynamic(default_pool, LOG_PREFETCH);
file->buffer_offset = start_offset;
}
if ((ret = mail_transaction_log_file_read_more(file)) <= 0)
return ret;
if (file->log->index->nfs_flush && !nfs_flush &&
mail_transaction_log_file_need_nfs_flush(file)) {
/* we didn't read enough data. flush and try again. */
return mail_transaction_log_file_read(file, start_offset, TRUE);
}
if ((ret = mail_transaction_log_file_sync(file)) <= 0) {
i_assert(ret != 0); /* happens only with mmap */
return -1;
}
i_assert(file->sync_offset >= file->buffer_offset);
buffer_set_used_size(file->buffer,
file->sync_offset - file->buffer_offset);
return 1;
}
static int
log_file_map_check_offsets(struct mail_transaction_log_file *file,
uoff_t start_offset, uoff_t end_offset)
{
if (start_offset > file->sync_offset) {
/* broken start offset */
mail_index_set_error(file->log->index,
"%s: start_offset (%"PRIuUOFF_T") > "
"current sync_offset (%"PRIuUOFF_T")",
file->filepath, start_offset, file->sync_offset);
return 0;
}
if (end_offset != (uoff_t)-1 && end_offset > file->sync_offset) {
mail_index_set_error(file->log->index,
"%s: end_offset (%"PRIuUOFF_T") > "
"current sync_offset (%"PRIuUOFF_T")",
file->filepath, start_offset, file->sync_offset);
return 0;
}
return 1;
}
static int
mail_transaction_log_file_mmap(struct mail_transaction_log_file *file)
{
if (file->buffer != NULL) {
/* in case we just switched to mmaping */
buffer_free(&file->buffer);
}
file->mmap_size = file->last_size;
file->mmap_base = mmap(NULL, file->mmap_size, PROT_READ, MAP_SHARED,
file->fd, 0);
if (file->mmap_base == MAP_FAILED) {
file->mmap_base = NULL;
file->mmap_size = 0;
mail_index_file_set_syscall_error(file->log->index,
file->filepath, "mmap()");
return -1;
}
if (file->mmap_size > mmap_get_page_size()) {
if (madvise(file->mmap_base, file->mmap_size,
MADV_SEQUENTIAL) < 0) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath, "madvise()");
}
}
file->buffer = buffer_create_const_data(default_pool,
file->mmap_base,
file->mmap_size);
file->buffer_offset = 0;
return 0;
}
static void
mail_transaction_log_file_munmap(struct mail_transaction_log_file *file)
{
if (file->mmap_base == NULL)
return;
if (munmap(file->mmap_base, file->mmap_size) < 0) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath, "munmap()");
}
file->mmap_base = NULL;
file->mmap_size = 0;
buffer_free(&file->buffer);
}
static int
mail_transaction_log_file_map_mmap(struct mail_transaction_log_file *file,
uoff_t start_offset)
{
struct stat st;
int ret;
/* we are going to mmap() this file, but it's not necessarily
mmaped currently. */
i_assert(file->buffer_offset == 0 || file->mmap_base == NULL);
i_assert(file->mmap_size == 0 || file->mmap_base != NULL);
if (fstat(file->fd, &st) < 0) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath, "fstat()");
return -1;
}
file->last_size = st.st_size;
if ((uoff_t)st.st_size < file->sync_offset) {
mail_transaction_log_file_set_corrupted(file,
"file size shrank");
return 0;
}
if (file->buffer != NULL && file->buffer_offset <= start_offset &&
(uoff_t)st.st_size == file->buffer_offset + file->buffer->used) {
/* we already have the whole file mapped */
if ((ret = mail_transaction_log_file_sync(file)) < 0)
return 0;
if (ret > 0)
return 1;
/* size changed, re-mmap */
}
do {
mail_transaction_log_file_munmap(file);
if (file->last_size - start_offset < mmap_get_page_size()) {
/* just reading the file is probably faster */
return mail_transaction_log_file_read(file,
start_offset,
FALSE);
}
if (mail_transaction_log_file_mmap(file) < 0)
return -1;
if ((ret = mail_transaction_log_file_sync(file)) < 0)
return 0;
} while (ret == 0);
return 1;
}
int mail_transaction_log_file_map(struct mail_transaction_log_file *file,
uoff_t start_offset, uoff_t end_offset)
{
struct mail_index *index = file->log->index;
size_t size;
int ret;
if (file->hdr.indexid == 0) {
/* corrupted */
return 0;
}
i_assert(start_offset >= file->hdr.hdr_size);
i_assert(start_offset <= end_offset);
if (index->log_locked && file == file->log->head &&
end_offset == (uoff_t)-1) {
/* we're not interested of going further than sync_offset */
if (log_file_map_check_offsets(file, start_offset,
end_offset) == 0)
return 0;
i_assert(start_offset <= file->sync_offset);
end_offset = file->sync_offset;
}
if (file->buffer != NULL && file->buffer_offset <= start_offset) {
/* see if we already have it */
size = buffer_get_used_size(file->buffer);
if (file->buffer_offset + size >= end_offset)
return 1;
}
if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) {
if (start_offset < file->buffer_offset) {
/* we had moved the log to memory but failed to read
the beginning of the log file */
mail_index_set_error(index,
"%s: Beginning of the log isn't available",
file->filepath);
return 0;
}
return log_file_map_check_offsets(file, start_offset,
end_offset);
}
if (start_offset > file->sync_offset)
mail_transaction_log_file_skip_to_head(file);
if (start_offset > file->sync_offset) {
/* although we could just skip over the unwanted data, we have
to sync everything so that modseqs are calculated
correctly */
start_offset = file->sync_offset;
}
if (!index->mmap_disable)
ret = mail_transaction_log_file_map_mmap(file, start_offset);
else {
mail_transaction_log_file_munmap(file);
ret = mail_transaction_log_file_read(file, start_offset, FALSE);
}
return ret <= 0 ? ret :
log_file_map_check_offsets(file, start_offset, end_offset);
}
void mail_transaction_log_file_move_to_memory(struct mail_transaction_log_file
*file)
{
buffer_t *buf;
if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file))
return;
if (file->mmap_base != NULL) {
/* just copy to memory */
i_assert(file->buffer_offset == 0);
buf = buffer_create_dynamic(default_pool, file->mmap_size);
buffer_append(buf, file->mmap_base, file->mmap_size);
buffer_free(&file->buffer);
file->buffer = buf;
/* and lose the mmap */
if (munmap(file->mmap_base, file->mmap_size) < 0) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath,
"munmap()");
}
file->mmap_base = NULL;
} else if (file->buffer_offset != 0) {
/* we don't have the full log in the memory. read it. */
(void)mail_transaction_log_file_read(file, 0, FALSE);
}
if (close(file->fd) < 0) {
mail_index_file_set_syscall_error(file->log->index,
file->filepath, "close()");
}
file->fd = -1;
i_free(file->filepath);
file->filepath = i_strconcat(file->log->index->filepath,
MAIL_TRANSACTION_LOG_SUFFIX, NULL);
}