mail-transaction-log-file.c revision f153a2cec0319f549388d28f8cfd4d50229d1132
/* Copyright (c) 2003-2009 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "ioloop.h"
#include "buffer.h"
#include "file-dotlock.h"
#include "nfs-workarounds.h"
#include "read-full.h"
#include "write-full.h"
#include "mmap-util.h"
#include "mail-index-private.h"
#include "mail-index-modseq.h"
#include "mail-transaction-log-private.h"
#define LOG_PREFETCH 1024
#define MEMORY_LOG_NAME "(in-memory transaction log file)"
void
const char *fmt, ...)
{
if (!MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) {
/* indexid=0 marks the log file as corrupted */
offsetof(struct mail_transaction_log_header,
indexid)) < 0) {
}
}
T_BEGIN {
"Corrupted transaction log file %s seq %u: %s "
} T_END;
}
struct mail_transaction_log_file *
const char *path)
{
struct mail_transaction_log_file *file;
return file;
}
{
struct mail_transaction_log_file **p;
if (*p == file) {
break;
}
}
"munmap()");
}
}
"close()");
}
}
}
static void
{
const struct mail_index_modseq_header *modseq_hdr;
return;
/* we can get a valid log offset from index file. initialize
sync_offset from it so we don't have to read the whole log
file from beginning. */
"%s: log_file_head_offset too small",
/* modseqs not used yet */
file->sync_highest_modseq = 0;
} else if (modseq_hdr == NULL ||
/* highest_modseq not synced, start from beginning */
"%s: modseq_hdr.log_offset too large",
} else {
/* start from where we last stopped tracking modseqs */
}
}
}
static void
{
struct mail_transaction_log_file **p;
/* insert it to correct position */
break;
}
*p = file;
}
static int
struct mail_transaction_log_header *hdr)
{
#ifndef WORDS_BIGENDIAN
#endif
/* not creating index - make sure we have latest header */
if (mail_index_map(index,
MAIL_INDEX_SYNC_HANDLER_HEAD) <= 0)
return -1;
} else {
/* if we got here from mapping, the .log file is
corrupted. use whatever values we got from index
file */
}
}
} else {
}
/* make sure the sequence grows */
}
/* this should be always up-to-date */
}
}
return 0;
}
struct mail_transaction_log_file *
{
struct mail_transaction_log_file *file;
return NULL;
}
return file;
}
static int
{
int ret;
ret = 1;
else {
}
if (ret > 0) {
return 0;
}
if (ret < 0) {
"file_dotlock_create()");
return -1;
}
"Timeout while waiting for "
"dotlock for transaction log file %s",
return -1;
}
static int
{
int ret;
return 0;
if (ret < 0) {
return -1;
}
if (ret == 0) {
"Dotlock was lost for transaction log file %s",
return -1;
}
return 0;
}
{
int ret;
return 0;
return 0;
}
return mail_transaction_log_file_dotlock(file);
if (ret > 0) {
return 0;
}
if (ret < 0) {
"mail_index_wait_lock_fd()");
return -1;
}
"Timeout while waiting for lock for transaction log file %s",
return -1;
}
{
return;
return;
return;
}
}
static ssize_t
{
int ret;
/* try to read the whole header, but it's not necessarily an error to
read less since the older versions of the log format could be
smaller. */
pos = 0;
do {
if (ret > 0)
}
static int
bool ignore_estale)
{
struct mail_transaction_log_file *f;
int ret;
return 0;
if (ret < 0) {
"pread()");
}
return -1;
}
/* incompatible version - fix silently */
return 0;
}
if (ret < MAIL_TRANSACTION_LOG_HEADER_MIN_SIZE) {
"unexpected end of file while reading header");
return 0;
}
/* we have compatibility flags */
enum mail_index_header_compat_flags compat_flags = 0;
#ifndef WORDS_BIGENDIAN
#endif
/* architecture change */
"Rebuilding index file %s: "
"CPU architecture changed",
return 0;
}
}
"Header size too small");
return 0;
}
/* @UNSAFE: smaller than we expected - zero out the fields we
shouldn't have filled */
}
/* corrupted */
"Transaction log file %s: marked corrupted",
return 0;
}
/* index file was probably just rebuilt and we don't
know about it yet */
"indexid changed %u -> %u",
return 0;
}
/* creating index file. since transaction log is created
first, use the indexid in it to create the main index
to avoid races. */
}
/* make sure we already don't have a file with the same sequence
opened. it shouldn't happen unless the old log file was
corrupted. */
/* mark the old file corrupted. we can't safely remove
it from the list however, so return failure. */
"duplicate transaction log sequence (%u)",
return 0;
}
}
return 1;
}
static int
bool ignore_estale)
{
}
return -1;
}
return 0;
}
static bool
{
struct mail_transaction_log_file *tmp;
return TRUE;
}
return FALSE;
}
static int
{
const char *path2;
bool rename_existing;
/* although we check also mtime and file size below, it's done
only to fix broken log files. we don't bother flushing
attribute cache just for that. */
}
/* log creation is locked now - see if someone already created it.
note that if we're rotating, we need to keep the log locked until
the file has been rewritten. and because fcntl() locks are stupid,
if we go and open()+close() the file and we had it already opened,
its locks are lost. so we use stat() to check if the file has been
recreated, although it almost never is. */
if (reset)
"stat()");
return -1;
}
but not when we're replacing a broken log file */
/* no-one else recreated the file */
} else {
/* recreated. use the file if its header is ok */
if (fd == -1) {
return -1;
}
} else {
FALSE) > 0 &&
/* yes, it was ok */
(void)file_dotlock_delete(dotlock);
return 0;
}
}
}
}
return -1;
if (reset) {
}
"write_full()");
return -1;
}
/* the header isn't important, so don't bother calling
fdatasync() unless NFS is used */
"fdatasync()");
return -1;
}
}
/* we'll need to preserve the lock */
if (mail_transaction_log_file_lock(file) < 0)
ret = -1;
}
/* if we return -1 the dotlock deletion code closes the fd */
if (ret < 0)
return -1;
/* keep two log files */
if (rename_existing) {
/* rename() would be nice and easy way to do this, except then
there's a race condition between the rename and
file_dotlock_replace(). during that time the log file
doesn't exist, which could cause problems. */
path2);
/* try to link() anyway */
}
/* ignore the error. we don't care that much about the
second log file and we're going to overwrite this
first one. */
}
}
return -1;
/* success */
return 0;
}
bool reset)
{
int fd;
/* With dotlocking we might already have path.lock created, so this
filename has to be different. */
if (fd == -1) {
"file_dotlock_open()");
return -1;
}
"fchown()");
(void)file_dotlock_delete(&dotlock);
return -1;
}
/* either fd gets used or the dotlock gets deleted and returned fd
is for the existing file */
(void)file_dotlock_delete(&dotlock);
return -1;
}
return 0;
}
bool check_existing)
{
unsigned int i;
bool ignore_estale;
int ret;
for (i = 0;; i++) {
return 0;
return -1;
}
ret = -1;
else if (check_existing &&
return 0;
else {
}
if (ret > 0) {
/* success */
break;
}
if (ret == 0) {
/* corrupted */
"unlink(%s) failed: %m",
}
return 0;
}
i == MAIL_INDEX_ESTALE_RETRY_COUNT) {
/* syscall error */
return -1;
}
/* ESTALE - try again */
}
return 1;
}
static int
{
const struct mail_transaction_header_update *u = data;
const struct mail_index_header *ihdr;
const unsigned int offset_pos =
"header update extends beyond record size");
return -1;
}
if (u->offset <= offset_pos &&
sizeof(tail_offset));
/* saved_tail_offset was already set in header,
but we still had to resync the file to find
modseqs. ignore this record. */
return 1;
}
"Transaction log file %s seq %u: "
"log_file_tail_offset update shrank it "
file->sync_offset);
} else {
return 1;
}
}
return 0;
}
bool
const void *data,
{
if (cur_modseq != 0) {
/* tracking modseqs */
/* modseqs not tracked yet. see if this is a modseq
extension introduction. */
const unsigned int modseq_ext_len =
modseq_ext_len) == 0) {
/* modseq tracking started */
return TRUE;
}
} else {
/* not tracking modseqs */
return FALSE;
}
/* ignore expunge requests */
break;
}
case MAIL_TRANSACTION_APPEND:
/* these changes increase modseq */
return TRUE;
}
return FALSE;
}
static struct modseq_cache *
{
struct modseq_cache cache;
if (idx > 0) {
/* @UNSAFE: move it to top */
}
return &file->modseq_cache[0];
}
static struct modseq_cache *
{
unsigned int i, best = -1U;
continue;
return NULL;
/* exact cache hit */
return modseq_cache_hit(file, i);
}
if (best == -1U ||
best = i;
}
if (best == -1U)
return NULL;
}
static struct modseq_cache *
{
unsigned int i, best = -1U;
continue;
return NULL;
/* exact cache hit */
return modseq_cache_hit(file, i);
}
if (best == -1U ||
best = i;
}
if (best == -1U)
return NULL;
}
static int
const struct mail_transaction_header **hdr_r)
{
const struct mail_transaction_header *hdr;
/* we've already synced this record at some point. it should
be valid. */
if (trans_size < sizeof(*hdr) ||
"Transaction log corrupted unexpectedly at "
return -1;
}
*offset += trans_size;
return 0;
}
struct mail_transaction_log_file *file,
{
const struct mail_transaction_header *hdr;
struct modseq_cache *cache;
int ret;
return 0;
}
/* nothing usable in cache - scan from beginning */
/* exact cache hit */
return 0;
} else {
/* use cache to skip over some records */
}
if (ret <= 0) {
if (ret < 0)
return -1;
"%s: Transaction log corrupted, can't get modseq",
return -1;
}
while (cur_offset < offset) {
return- 1;
cur_modseq++;
}
/* @UNSAFE: cache the value */
sizeof(*file->modseq_cache) *
return 0;
}
struct mail_transaction_log_file *file,
{
const struct mail_transaction_header *hdr;
struct modseq_cache *cache;
int ret;
return 0;
}
/* nothing usable in cache - scan from beginning */
/* exact cache hit */
return 0;
} else {
/* use cache to skip over some records */
}
file->sync_offset);
if (ret <= 0) {
if (ret < 0)
return -1;
"%s: Transaction log corrupted, can't get modseq",
return -1;
}
return -1;
cur_modseq)) {
if (++cur_modseq == modseq)
break;
}
}
if (modseq != cur_modseq) {
/* if we got to sync_offset, cur_modseq should be
sync_highest_modseq */
"%s: Transaction log changed unexpectedly, "
return -1;
}
/* @UNSAFE: cache the value */
sizeof(*file->modseq_cache) *
return 0;
}
static int
const struct mail_transaction_header *hdr,
unsigned int trans_size)
{
int ret;
return 0;
/* external transactions: */
/* see if this updates mailbox_sync_offset */
sizeof(*hdr));
if (ret != 0)
return ret < 0 ? -1 : 0;
}
/* external transactions aren't synced to mailbox. we can
update mailbox sync offset to skip this transaction to
avoid re-reading it at the next sync. */
}
return 0;
}
static int
{
const struct mail_transaction_header *hdr;
const void *data;
uint32_t trans_size = 0;
if (trans_size == 0) {
/* unfinished */
return 1;
}
if (trans_size < sizeof(*hdr)) {
"hdr.size too small (%u)", trans_size);
return -1;
}
break;
/* transaction has been fully written */
return -1;
trans_size = 0;
}
/* Now that all the mmaped pages have page faulted, check if
the file had changed while doing that. Only after the last
page has faulted, the size returned by fstat() can be
trusted. Otherwise it might point to a page boundary while
the next page is still being written.
Without this check we might see partial transactions,
sometimes causing "Extension record updated without intro
prefix" errors. */
"fstat()");
return -1;
}
return 0;
}
}
/* There's more data than we could sync at the moment. If the
last record's size wasn't valid, we can't know if it will
be updated unless we've locked the log. */
if (trans_size != 0) {
/* pread()s or the above fstat() check for mmaps should
have guaranteed that this doesn't happen */
"hdr.size too large (%u)", trans_size);
return -1;
"Unexpected garbage at EOF");
return -1;
}
/* The size field will be updated soon */
}
"Invalid transaction log size "
return -1;
}
return 1;
}
static int
{
void *data;
if (ret > 0) {
/* success */
return 1;
}
/* failure. don't leave ourself to inconsistent state */
if (ret == 0) {
return 0;
/* log file was deleted in NFS server, fail silently */
return 0;
} else {
return -1;
}
}
static int
{
void *data;
do {
if (ret > 0)
read_offset += ret;
if (ret < 0) {
/* log file was deleted in NFS server, fail silently */
return 0;
}
return -1;
}
return 1;
}
static bool
{
/* we already have a newer log file which says that we haven't
synced the entire file. */
return TRUE;
}
return TRUE;
return FALSE;
}
static int
{
int ret;
/* NFS: if file isn't locked, we're optimistic that we can read enough
data without flushing attribute cache. if after reading we notice
that we really should have read more, flush the cache and try again.
if file is locked, the attribute cache was already flushed when
refreshing the log. */
else {
}
}
/* we have to insert missing data to beginning of buffer */
if (ret <= 0)
return ret;
}
}
return ret;
/* we didn't read enough data. flush and try again. */
}
return -1;
}
return 1;
}
static int
{
/* broken start offset */
return 0;
}
return 0;
}
return 1;
}
static int
{
/* in case we just switched to mmaping */
}
return -1;
}
MADV_SEQUENTIAL) < 0) {
}
}
file->buffer_offset = 0;
return 0;
}
static void
{
return;
}
}
static int
{
int ret;
/* we are going to mmap() this file, but it's not necessarily
mmaped currently. */
return -1;
}
"file size shrank");
return 0;
}
/* we already have the whole file mapped */
return 0;
if (ret > 0)
return 1;
/* size changed, re-mmap */
}
do {
/* just reading the file is probably faster */
return mail_transaction_log_file_read(file,
FALSE);
}
if (mail_transaction_log_file_mmap(file) < 0)
return -1;
return 0;
} while (ret == 0);
return 1;
}
{
int ret;
/* corrupted */
return 0;
}
/* we're not interested of going further than sync_offset */
end_offset) == 0)
return 0;
}
/* see if we already have it */
return 1;
}
/* we had moved the log to memory but failed to read
the beginning of the log file */
"%s: Beginning of the log isn't available",
return 0;
}
}
/* although we could just skip over the unwanted data, we have
to sync everything so that modseqs are calculated
correctly */
}
if (!index->mmap_disable)
else {
}
}
*file)
{
return;
/* just copy to memory */
/* and lose the mmap */
"munmap()");
}
} else if (file->buffer_offset != 0) {
/* we don't have the full log in the memory. read it. */
}
}
}