nfs-workarounds.c revision 023ed67920a771de3b34b242c13a422eec6a2e01
bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen/*
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen These tests were done with various Linux 2.6 kernels, FreeBSD 6.2 and
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen Solaris 8 and 10.
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen Attribute cache is usually flushed with chown()ing or fchown()ing the file.
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen The safest way would be to use uid=-1 gid=-1, but this doesn't work with
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen Linux (it does with FreeBSD 6.2 and Solaris). So we'll first get the
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen file's owner and use it. As long as we're not root the file's owner can't
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen change accidentally. If would be possible to also use chmod()/fchmod(), but
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen that's riskier since it could actually cause an unwanted change.
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen Write cache can be flushed with fdatasync(). It's all we need, but other
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen tested alternatives are: fcntl locking (Linux 2.6, Solaris),
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen fchown() (Solaris) and dup()+close() (Linux 2.6, Solaris).
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen Read cache flushing is more problematic. There's no universal way to do it.
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen The working methods are:
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen Linux 2.6: fcntl(), O_DIRECT
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen Solaris: fchown(), fcntl(), dup()+close()
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen FreeBSD 6.2: fchown()
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen fchown() can be easily used for Solaris and FreeBSD, but Linux requires
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen playing with locks. O_DIRECT requires CONFIG_NFS_DIRECTIO to be enabled, so
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen we can't always use it.
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen*/
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen#include "lib.h"
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen#include "nfs-workarounds.h"
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen#include <fcntl.h>
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen#include <unistd.h>
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen#include <sys/stat.h>
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen#if defined (__linux__) || defined(__sun)
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen# define READ_CACHE_FLUSH_FCNTL
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen#endif
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen#if defined(__FreeBSD__) || defined(__sun)
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen# define ATTRCACHE_FLUSH_CHOWN_UID_1
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen#endif
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainenstatic void nfs_flush_file_handle_cache_parent_dir(const char *path);
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainenstatic int
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainennfs_safe_do(const char *path, int (*callback)(const char *path, void *context),
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen void *context)
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen{
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen unsigned int i;
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen int ret;
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen for (i = 1;; i++) {
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen ret = callback(path, context);
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen if (ret == 0 || errno != ESTALE || i == NFS_ESTALE_RETRY_COUNT)
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen break;
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen /* ESTALE: Some operating systems may fail with this if they
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen can't internally revalidate the NFS file handle. Flush the
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen file handle and try again */
0ddff4c8aefa66f7e19eddc6d61cd040fb803321Timo Sirainen nfs_flush_file_handle_cache(path);
906520ee2cece20c875835697db08cd5e29b919bTimo Sirainen }
3efdcb59492bd1e0602340a4204003a32b34654aTimo Sirainen return ret;
906520ee2cece20c875835697db08cd5e29b919bTimo Sirainen}
906520ee2cece20c875835697db08cd5e29b919bTimo Sirainen
906520ee2cece20c875835697db08cd5e29b919bTimo Sirainenstruct nfs_safe_open_context {
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen int flags;
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen int fd;
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen};
3efdcb59492bd1e0602340a4204003a32b34654aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainenstatic int nfs_safe_open_callback(const char *path, void *context)
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen{
3efdcb59492bd1e0602340a4204003a32b34654aTimo Sirainen struct nfs_safe_open_context *ctx = context;
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen ctx->fd = open(path, ctx->flags);
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen return ctx->fd == -1 ? -1 : 0;
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen}
906520ee2cece20c875835697db08cd5e29b919bTimo Sirainen
3efdcb59492bd1e0602340a4204003a32b34654aTimo Sirainenint nfs_safe_open(const char *path, int flags)
906520ee2cece20c875835697db08cd5e29b919bTimo Sirainen{
906520ee2cece20c875835697db08cd5e29b919bTimo Sirainen struct nfs_safe_open_context ctx;
906520ee2cece20c875835697db08cd5e29b919bTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen i_assert((flags & O_CREAT) == 0);
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen ctx.flags = flags;
3efdcb59492bd1e0602340a4204003a32b34654aTimo Sirainen if (nfs_safe_do(path, nfs_safe_open_callback, &ctx) < 0)
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen return -1;
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
3efdcb59492bd1e0602340a4204003a32b34654aTimo Sirainen return ctx.fd;
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen}
f877ee033a0737100c2f661a7ca4c559ea2ddb8aTimo Sirainen
static int nfs_safe_stat_callback(const char *path, void *context)
{
struct stat *buf = context;
return stat(path, buf);
}
int nfs_safe_stat(const char *path, struct stat *buf)
{
return nfs_safe_do(path, nfs_safe_stat_callback, buf);
}
static int nfs_safe_lstat_callback(const char *path, void *context)
{
struct stat *buf = context;
return lstat(path, buf);
}
int nfs_safe_lstat(const char *path, struct stat *buf)
{
return nfs_safe_do(path, nfs_safe_lstat_callback, buf);
}
int nfs_safe_link(const char *oldpath, const char *newpath, bool links1)
{
struct stat st;
nlink_t orig_link_count = 1;
if (!links1) {
if (stat(oldpath, &st) < 0)
return -1;
orig_link_count = st.st_nlink;
}
if (link(oldpath, newpath) == 0) {
#ifndef __FreeBSD__
return 0;
#endif
/* FreeBSD at least up to v6.2 converts EEXIST errors to
success. */
} else if (errno != EEXIST)
return -1;
/* We don't know if it succeeded or failed. stat() to make sure. */
if (stat(oldpath, &st) < 0)
return -1;
if (st.st_nlink == orig_link_count) {
errno = EEXIST;
return -1;
}
return 0;
}
static void nfs_flush_chown_uid(const char *path)
{
uid_t uid;
#ifdef ATTRCACHE_FLUSH_CHOWN_UID_1
uid = (uid_t)-1;
#else
struct stat st;
if (stat(path, &st) == 0)
uid = st.st_uid;
else {
if (errno == ESTALE) {
/* ESTALE causes the OS to flush the attr cache */
return;
}
if (likely(errno == ENOENT)) {
nfs_flush_file_handle_cache_parent_dir(path);
return;
}
i_error("nfs_flush_chown_uid: stat(%s) failed: %m", path);
return;
}
#endif
if (chown(path, uid, (gid_t)-1) < 0) {
if (errno == ESTALE || errno == EPERM || errno == ENOENT) {
/* attr cache is flushed */
return;
}
if (likely(errno == ENOENT)) {
nfs_flush_file_handle_cache_parent_dir(path);
return;
}
i_error("nfs_flush_chown_uid: chown(%s) failed: %m", path);
}
}
#ifdef __FreeBSD__
static bool nfs_flush_fchown_uid(const char *path, int fd)
{
uid_t uid;
#ifndef ATTRCACHE_FLUSH_CHOWN_UID_1
struct stat st;
if (fstat(fd, &st) < 0) {
if (likely(errno == ESTALE))
return FALSE;
i_error("nfs_flush_attr_cache_fchown: fstat(%s) failed: %m",
path);
return TRUE;
}
uid = st.st_uid;
#endif
if (fchown(fd, uid, (gid_t)-1) < 0) {
if (errno == ESTALE)
return FALSE;
if (likely(errno == EACCES || errno == EPERM)) {
/* attr cache is flushed */
return TRUE;
}
i_error("nfs_flush_attr_cache_fd_locked: fchown(%s) failed: %m",
path);
}
return TRUE;
}
#endif
#ifdef READ_CACHE_FLUSH_FCNTL
static void nfs_flush_fcntl(const char *path, int fd)
{
struct flock fl;
int ret;
/* If the file was already locked, we'll just get the same lock
again. It should succeed just fine. If was was unlocked, we'll
have to get a lock and then unlock it. Linux 2.6 flushes read cache
only when read/write locking succeeded. */
fl.l_type = F_RDLCK;
fl.l_whence = SEEK_SET;
fl.l_start = 0;
fl.l_len = 0;
alarm(60);
ret = fcntl(fd, F_SETLKW, &fl);
alarm(0);
if (unlikely(ret < 0)) {
i_error("nfs_flush_fcntl: fcntl(%s, F_RDLCK) failed: %m", path);
return;
}
fl.l_type = F_UNLCK;
(void)fcntl(fd, F_SETLKW, &fl);
}
#endif
void nfs_flush_attr_cache_unlocked(const char *path)
{
int fd;
/* Try to flush the attribute cache the nice way first. */
fd = open(path, O_RDONLY);
if (fd != -1)
(void)close(fd);
else if (errno == ESTALE) {
/* this already flushed the cache */
} else {
/* most likely ENOENT, which means a negative cache hit.
flush the file handles for its parent directory. */
nfs_flush_file_handle_cache_parent_dir(path);
}
}
void nfs_flush_attr_cache_maybe_locked(const char *path)
{
nfs_flush_chown_uid(path);
}
bool nfs_flush_attr_cache_fd_locked(const char *path ATTR_UNUSED,
int fd ATTR_UNUSED)
{
#ifdef __FreeBSD__
/* FreeBSD doesn't flush attribute cache with fcntl(), so we have
to do it ourself. */
return nfs_flush_fchown_uid(path, fd);
#else
/* Linux and Solaris are fine. */
return TRUE;
#endif
}
static bool nfs_flush_file_handle_cache_dir(const char *path)
{
#ifdef __linux__
/* chown()ing parent is the safest way to handle this */
nfs_flush_chown_uid(path);
#else
/* rmdir() is the only choice with FreeBSD and Solaris */
if (unlikely(rmdir(path) == 0)) {
if (mkdir(path, 0700) == 0) {
i_warning("nfs_flush_file_handle_cache_dir: "
"rmdir(%s) unexpectedly "
"removed the dir. recreated.", path);
} else {
i_warning("nfs_flush_file_handle_cache_dir: "
"rmdir(%s) unexpectedly "
"removed the dir. mkdir() failed: %m", path);
}
} else if (errno == ESTALE || errno == ENOTDIR ||
errno == ENOTEMPTY || errno == EEXIST || errno == EACCES) {
/* expected failures */
} else if (errno == ENOENT) {
return FALSE;
} else {
i_error("nfs_flush_file_handle_cache_dir: "
"rmdir(%s) failed: %m", path);
}
#endif
return TRUE;
}
static void nfs_flush_file_handle_cache_parent_dir(const char *path)
{
const char *p;
p = strrchr(path, '/');
if (p == NULL)
nfs_flush_file_handle_cache_dir(".");
else T_FRAME(
nfs_flush_file_handle_cache_dir(t_strdup_until(path, p));
);
}
void nfs_flush_file_handle_cache(const char *path)
{
nfs_flush_file_handle_cache_parent_dir(path);
}
void nfs_flush_read_cache_locked(const char *path ATTR_UNUSED,
int fd ATTR_UNUSED)
{
#ifdef READ_CACHE_FLUSH_FCNTL
/* already flushed when fcntl() was called */
#else
/* we can only hope that underlying filesystem uses micro/nanosecond
resolution so that attribute cache flushing notices mtime changes */
nfs_flush_attr_cache_fd_locked(path, fd);
#endif
}
void nfs_flush_read_cache_unlocked(const char *path, int fd)
{
#ifdef READ_CACHE_FLUSH_FCNTL
nfs_flush_fcntl(path, fd);
#else
nfs_flush_read_cache_locked(path, fd);
#endif
}