ostream-file.c revision a94936bafd127680184da114c6a177b37ff656e5
/* Copyright (c) 2002-2003 Timo Sirainen */
/* @UNSAFE: whole file */
#include "lib.h"
#include "ioloop.h"
#include "write-full.h"
#include "network.h"
#include "sendfile-util.h"
#include "istream.h"
#include "istream-internal.h"
#include "ostream-internal.h"
#include <unistd.h>
#include <sys/stat.h>
#ifdef HAVE_SYS_UIO_H
# include <sys/uio.h>
#endif
/* try to keep the buffer size within 4k..128k. ReiserFS may actually return
128k as optimal size. */
#define DEFAULT_OPTIMAL_BLOCK_SIZE 4096
#define MAX_OPTIMAL_BLOCK_SIZE (128*1024)
#define IS_STREAM_EMPTY(fstream) \
((fstream)->head == (fstream)->tail && !(fstream)->full)
#define MAX_SSIZE_T(size) \
((size) < SSIZE_T_MAX ? (size_t)(size) : SSIZE_T_MAX)
struct file_ostream {
struct ostream_private ostream;
int fd;
struct io *io;
uoff_t buffer_offset;
uoff_t real_offset;
unsigned char *buffer; /* ring-buffer */
size_t buffer_size, max_buffer_size, optimal_block_size;
size_t head, tail; /* first unsent/unused byte */
unsigned int full:1; /* if head == tail, is buffer empty or full? */
unsigned int file:1;
unsigned int corked:1;
unsigned int flush_pending:1;
unsigned int socket_cork_set:1;
unsigned int no_socket_cork:1;
unsigned int no_sendfile:1;
unsigned int autoclose_fd:1;
};
static void stream_send_io(struct file_ostream *fstream);
static void stream_closed(struct file_ostream *fstream)
{
if (fstream->io != NULL)
io_remove(&fstream->io);
if (fstream->autoclose_fd && fstream->fd != -1) {
if (close(fstream->fd) < 0)
i_error("file_ostream.close() failed: %m");
}
fstream->fd = -1;
fstream->ostream.ostream.closed = TRUE;
}
static void o_stream_file_close(struct iostream_private *stream)
{
struct file_ostream *fstream = (struct file_ostream *)stream;
/* flush output before really closing it */
o_stream_flush(&fstream->ostream.ostream);
stream_closed(fstream);
}
static void o_stream_file_destroy(struct iostream_private *stream)
{
struct file_ostream *fstream = (struct file_ostream *)stream;
i_free(fstream->buffer);
}
static void
o_stream_file_set_max_buffer_size(struct iostream_private *stream,
size_t max_size)
{
struct file_ostream *fstream = (struct file_ostream *)stream;
fstream->max_buffer_size = max_size;
}
static void update_buffer(struct file_ostream *fstream, size_t size)
{
size_t used;
if (IS_STREAM_EMPTY(fstream) || size == 0)
return;
if (fstream->head < fstream->tail) {
/* ...HXXXT... */
used = fstream->tail - fstream->head;
i_assert(size <= used);
fstream->head += size;
} else {
/* XXXT...HXXX */
used = fstream->buffer_size - fstream->head;
if (size > used) {
size -= used;
i_assert(size <= fstream->tail);
fstream->head = size;
} else {
fstream->head += size;
}
fstream->full = FALSE;
}
if (fstream->head == fstream->tail)
fstream->head = fstream->tail = 0;
if (fstream->head == fstream->buffer_size)
fstream->head = 0;
}
static void o_stream_socket_cork(struct file_ostream *fstream)
{
if (fstream->corked && !fstream->socket_cork_set) {
if (!fstream->no_socket_cork) {
if (net_set_cork(fstream->fd, TRUE) < 0)
fstream->no_socket_cork = TRUE;
else
fstream->socket_cork_set = TRUE;
}
}
}
static int o_stream_lseek(struct file_ostream *fstream)
{
off_t ret;
if (fstream->real_offset == fstream->buffer_offset)
return 0;
ret = lseek(fstream->fd, (off_t)fstream->buffer_offset, SEEK_SET);
if (ret < 0) {
fstream->ostream.ostream.stream_errno = errno;
return -1;
}
if (ret != (off_t)fstream->buffer_offset) {
fstream->ostream.ostream.stream_errno = EINVAL;
return -1;
}
fstream->real_offset = fstream->buffer_offset;
return 0;
}
static ssize_t o_stream_writev(struct file_ostream *fstream,
const struct const_iovec *iov, int iov_size)
{
ssize_t ret;
size_t size, sent;
int i;
o_stream_socket_cork(fstream);
if (iov_size == 1) {
if (!fstream->file ||
fstream->real_offset == fstream->buffer_offset) {
ret = write(fstream->fd, iov->iov_base, iov->iov_len);
if (ret > 0)
fstream->real_offset += ret;
} else {
ret = pwrite(fstream->fd, iov->iov_base, iov->iov_len,
fstream->buffer_offset);
}
} else {
if (o_stream_lseek(fstream) < 0)
return -1;
sent = 0;
while (iov_size > IOV_MAX) {
size = 0;
for (i = 0; i < IOV_MAX; i++)
size += iov[i].iov_len;
ret = writev(fstream->fd, (const struct iovec *)iov,
IOV_MAX);
if (ret != (ssize_t)size)
break;
fstream->real_offset += ret;
fstream->buffer_offset += ret;
sent += ret;
iov += IOV_MAX;
iov_size -= IOV_MAX;
}
if (iov_size <= IOV_MAX) {
ret = writev(fstream->fd, (const struct iovec *)iov,
iov_size);
}
if (ret > 0) {
fstream->real_offset += ret;
ret += sent;
} else {
ret = sent;
}
}
if (ret < 0) {
if (errno == EAGAIN || errno == EINTR)
return 0;
fstream->ostream.ostream.stream_errno = errno;
stream_closed(fstream);
return -1;
}
fstream->buffer_offset += ret;
return ret;
}
/* returns how much of vector was used */
static int o_stream_fill_iovec(struct file_ostream *fstream,
struct const_iovec iov[2])
{
if (IS_STREAM_EMPTY(fstream))
return 0;
if (fstream->head < fstream->tail) {
iov[0].iov_base = fstream->buffer + fstream->head;
iov[0].iov_len = fstream->tail - fstream->head;
return 1;
} else {
iov[0].iov_base = fstream->buffer + fstream->head;
iov[0].iov_len = fstream->buffer_size - fstream->head;
if (fstream->tail == 0)
return 1;
else {
iov[1].iov_base = fstream->buffer;
iov[1].iov_len = fstream->tail;
return 2;
}
}
}
static int buffer_flush(struct file_ostream *fstream)
{
struct const_iovec iov[2];
int iov_len;
ssize_t ret;
iov_len = o_stream_fill_iovec(fstream, iov);
if (iov_len > 0) {
ret = o_stream_writev(fstream, iov, iov_len);
if (ret < 0)
return -1;
update_buffer(fstream, ret);
}
return IS_STREAM_EMPTY(fstream) ? 1 : 0;
}
static void o_stream_file_cork(struct ostream_private *stream, bool set)
{
struct file_ostream *fstream = (struct file_ostream *)stream;
int ret;
if (fstream->corked != set && !stream->ostream.closed) {
if (set && fstream->io != NULL)
io_remove(&fstream->io);
else if (!set) {
/* buffer flushing might close the stream */
ret = buffer_flush(fstream);
if (fstream->io == NULL &&
(ret == 0 || fstream->flush_pending) &&
!stream->ostream.closed) {
fstream->io = io_add(fstream->fd, IO_WRITE,
stream_send_io, fstream);
}
}
if (fstream->socket_cork_set) {
i_assert(!set);
if (net_set_cork(fstream->fd, FALSE) < 0)
fstream->no_socket_cork = TRUE;
fstream->socket_cork_set = FALSE;
}
fstream->corked = set;
}
}
static int o_stream_file_flush(struct ostream_private *stream)
{
struct file_ostream *fstream = (struct file_ostream *) stream;
return buffer_flush(fstream);
}
static void
o_stream_file_flush_pending(struct ostream_private *stream, bool set)
{
struct file_ostream *fstream = (struct file_ostream *) stream;
fstream->flush_pending = set;
if (set && !fstream->corked && fstream->io == NULL) {
fstream->io = io_add(fstream->fd, IO_WRITE,
stream_send_io, fstream);
}
}
static size_t get_unused_space(struct file_ostream *fstream)
{
if (fstream->head > fstream->tail) {
/* XXXT...HXXX */
return fstream->head - fstream->tail;
} else if (fstream->head < fstream->tail) {
/* ...HXXXT... */
return (fstream->buffer_size - fstream->tail) + fstream->head;
} else {
/* either fully unused or fully used */
return fstream->full ? 0 : fstream->buffer_size;
}
}
static size_t o_stream_file_get_used_size(struct ostream_private *stream)
{
struct file_ostream *fstream = (struct file_ostream *)stream;
return fstream->buffer_size - get_unused_space(fstream);
}
static int o_stream_file_seek(struct ostream_private *stream, uoff_t offset)
{
struct file_ostream *fstream = (struct file_ostream *)stream;
if (offset > OFF_T_MAX || !fstream->file) {
stream->ostream.stream_errno = EINVAL;
return -1;
}
if (buffer_flush(fstream) < 0)
return -1;
stream->ostream.stream_errno = 0;
stream->ostream.offset = offset;
fstream->buffer_offset = offset;
return 1;
}
static void o_stream_grow_buffer(struct file_ostream *fstream, size_t bytes)
{
size_t size, new_size, end_size;
size = nearest_power(fstream->buffer_size + bytes);
if (size > fstream->max_buffer_size) {
/* limit the size */
size = fstream->max_buffer_size;
} else if (fstream->corked) {
/* try to use optimal buffer size with corking */
new_size = I_MIN(fstream->optimal_block_size,
fstream->max_buffer_size);
if (new_size > size)
size = new_size;
}
if (size <= fstream->buffer_size)
return;
fstream->buffer = i_realloc(fstream->buffer,
fstream->buffer_size, size);
if (fstream->tail <= fstream->head && !IS_STREAM_EMPTY(fstream)) {
/* move head forward to end of buffer */
end_size = fstream->buffer_size - fstream->head;
memmove(fstream->buffer + size - end_size,
fstream->buffer + fstream->head, end_size);
fstream->head = size - end_size;
}
fstream->full = FALSE;
fstream->buffer_size = size;
}
static void stream_send_io(struct file_ostream *fstream)
{
struct ostream *ostream = &fstream->ostream.ostream;
int ret;
/* Set flush_pending = FALSE first before calling the flush callback,
and change it to TRUE only if callback returns 0. That way the
callback can call o_stream_set_flush_pending() again and we don't
forget it even if flush callback returns 1. */
fstream->flush_pending = FALSE;
o_stream_ref(ostream);
if (fstream->ostream.callback != NULL)
ret = fstream->ostream.callback(fstream->ostream.context);
else
ret = o_stream_file_flush(&fstream->ostream);
if (ret == 0)
fstream->flush_pending = TRUE;
if (!fstream->flush_pending && IS_STREAM_EMPTY(fstream)) {
if (fstream->io != NULL) {
/* all sent */
io_remove(&fstream->io);
}
} else if (!fstream->ostream.ostream.closed) {
/* Add the IO handler if it's not there already. Callback
might have just returned 0 without there being any data
to be sent. */
if (fstream->io == NULL) {
fstream->io = io_add(fstream->fd, IO_WRITE,
stream_send_io, fstream);
}
}
o_stream_unref(&ostream);
}
static size_t o_stream_add(struct file_ostream *fstream,
const void *data, size_t size)
{
size_t unused, sent;
int i;
unused = get_unused_space(fstream);
if (unused < size)
o_stream_grow_buffer(fstream, size-unused);
sent = 0;
for (i = 0; i < 2 && sent < size && !fstream->full; i++) {
unused = fstream->tail >= fstream->head ?
fstream->buffer_size - fstream->tail :
fstream->head - fstream->tail;
if (unused > size-sent)
unused = size-sent;
memcpy(fstream->buffer + fstream->tail,
CONST_PTR_OFFSET(data, sent), unused);
sent += unused;
fstream->tail += unused;
if (fstream->tail == fstream->buffer_size)
fstream->tail = 0;
if (fstream->head == fstream->tail)
fstream->full = TRUE;
}
if (sent != 0 && fstream->io == NULL &&
!fstream->corked && !fstream->file) {
fstream->io = io_add(fstream->fd, IO_WRITE, stream_send_io,
fstream);
}
return sent;
}
static ssize_t o_stream_file_sendv(struct ostream_private *stream,
const struct const_iovec *iov,
unsigned int iov_count)
{
struct file_ostream *fstream = (struct file_ostream *)stream;
size_t size, total_size, added, optimal_size;
unsigned int i;
ssize_t ret = 0;
stream->ostream.stream_errno = 0;
for (i = 0, size = 0; i < iov_count; i++)
size += iov[i].iov_len;
total_size = size;
if (size > get_unused_space(fstream) && !IS_STREAM_EMPTY(fstream)) {
if (o_stream_file_flush(stream) < 0)
return -1;
}
optimal_size = I_MIN(fstream->optimal_block_size,
fstream->max_buffer_size);
if (IS_STREAM_EMPTY(fstream) &&
(!fstream->corked || size >= optimal_size)) {
/* send immediately */
ret = o_stream_writev(fstream, iov, iov_count);
if (ret < 0)
return -1;
size = ret;
while (size > 0 && iov_count > 0 && size >= iov[0].iov_len) {
size -= iov[0].iov_len;
iov++;
iov_count--;
}
if (iov_count == 0)
i_assert(size == 0);
else {
added = o_stream_add(fstream,
CONST_PTR_OFFSET(iov[0].iov_base, size),
iov[0].iov_len - size);
ret += added;
if (added != iov[0].iov_len - size) {
/* buffer full */
stream->ostream.offset += ret;
return ret;
}
iov++;
iov_count--;
}
}
/* buffer it, at least partly */
for (i = 0; i < iov_count; i++) {
added = o_stream_add(fstream, iov[i].iov_base, iov[i].iov_len);
ret += added;
if (added != iov[i].iov_len)
break;
}
stream->ostream.offset += ret;
i_assert(ret <= (ssize_t)total_size);
return ret;
}
static off_t io_stream_sendfile(struct ostream_private *outstream,
struct istream *instream,
int in_fd, uoff_t in_size)
{
struct file_ostream *foutstream = (struct file_ostream *)outstream;
uoff_t start_offset;
uoff_t offset, send_size, v_offset;
ssize_t ret;
o_stream_socket_cork(foutstream);
/* flush out any data in buffer */
if ((ret = buffer_flush(foutstream)) <= 0)
return ret;
if (o_stream_lseek(foutstream) < 0)
return -1;
start_offset = v_offset = instream->v_offset;
do {
offset = instream->real_stream->abs_start_offset + v_offset;
send_size = in_size - v_offset;
ret = safe_sendfile(foutstream->fd, in_fd, &offset,
MAX_SSIZE_T(send_size));
if (ret <= 0) {
if (ret == 0 || errno == EINTR || errno == EAGAIN) {
ret = 0;
break;
}
outstream->ostream.stream_errno = errno;
if (errno != EINVAL) {
/* close only if error wasn't because
sendfile() isn't supported */
stream_closed(foutstream);
}
break;
}
v_offset += ret;
foutstream->real_offset += ret;
foutstream->buffer_offset += ret;
outstream->ostream.offset += ret;
} while ((uoff_t)ret != send_size);
i_stream_seek(instream, v_offset);
return ret < 0 ? -1 : (off_t)(instream->v_offset - start_offset);
}
static off_t io_stream_copy(struct ostream_private *outstream,
struct istream *instream, uoff_t in_size)
{
struct file_ostream *foutstream = (struct file_ostream *)outstream;
uoff_t start_offset;
struct const_iovec iov[3];
int iov_len;
const unsigned char *data;
size_t size, skip_size, block_size;
ssize_t ret;
int pos;
iov_len = o_stream_fill_iovec(foutstream, iov);
skip_size = 0;
for (pos = 0; pos < iov_len; pos++)
skip_size += iov[pos].iov_len;
start_offset = instream->v_offset;
in_size -= instream->v_offset;
while (in_size > 0) {
block_size = I_MIN(foutstream->optimal_block_size, in_size);
(void)i_stream_read_data(instream, &data, &size, block_size-1);
in_size -= size;
if (size == 0) {
/* all sent */
break;
}
pos = iov_len++;
iov[pos].iov_base = (void *) data;
iov[pos].iov_len = size;
ret = o_stream_writev(foutstream, iov, iov_len);
if (ret < 0)
return -1;
if (skip_size > 0) {
if ((size_t)ret < skip_size) {
update_buffer(foutstream, ret);
skip_size -= ret;
ret = 0;
} else {
update_buffer(foutstream, skip_size);
ret -= skip_size;
skip_size = 0;
}
}
outstream->ostream.offset += ret;
i_stream_skip(instream, ret);
if ((size_t)ret != iov[pos].iov_len)
break;
i_assert(skip_size == 0);
iov_len = 0;
}
return (off_t) (instream->v_offset - start_offset);
}
static off_t io_stream_copy_backwards(struct ostream_private *outstream,
struct istream *instream, uoff_t in_size)
{
struct file_ostream *foutstream = (struct file_ostream *)outstream;
uoff_t in_start_offset, in_offset, in_limit, out_offset;
const unsigned char *data;
size_t buffer_size, size, read_size;
ssize_t ret;
i_assert(IS_STREAM_EMPTY(foutstream));
/* figure out optimal buffer size */
buffer_size = instream->real_stream->buffer_size;
if (buffer_size == 0 || buffer_size > foutstream->buffer_size) {
if (foutstream->optimal_block_size > foutstream->buffer_size) {
o_stream_grow_buffer(foutstream,
foutstream->optimal_block_size -
foutstream->buffer_size);
}
buffer_size = foutstream->buffer_size;
}
in_start_offset = instream->v_offset;
in_offset = in_limit = in_size;
out_offset = outstream->ostream.offset + (in_offset - in_start_offset);
while (in_offset > in_start_offset) {
if (in_offset - in_start_offset <= buffer_size)
read_size = in_offset - in_start_offset;
else
read_size = buffer_size;
in_offset -= read_size;
out_offset -= read_size;
for (;;) {
i_assert(in_offset <= in_limit);
i_stream_seek(instream, in_offset);
read_size = in_limit - in_offset;
(void)i_stream_read_data(instream, &data, &size,
read_size-1);
if (size >= read_size) {
size = read_size;
if (instream->mmaped) {
/* we'll have to write it through
buffer or the file gets corrupted */
i_assert(size <=
foutstream->buffer_size);
memcpy(foutstream->buffer, data, size);
data = foutstream->buffer;
}
break;
}
/* buffer too large probably, try with smaller */
read_size -= size;
in_offset += read_size;
out_offset += read_size;
buffer_size -= read_size;
}
in_limit -= size;
ret = pwrite_full(foutstream->fd, data, size, out_offset);
if (ret < 0) {
/* error */
outstream->ostream.stream_errno = errno;
return -1;
}
}
return (off_t) (in_size - in_start_offset);
}
static off_t o_stream_file_send_istream(struct ostream_private *outstream,
struct istream *instream)
{
struct file_ostream *foutstream = (struct file_ostream *)outstream;
const struct stat *st;
uoff_t in_size;
off_t ret;
int in_fd, overlapping;
st = i_stream_stat(instream, TRUE);
if (st == NULL) {
outstream->ostream.stream_errno = instream->stream_errno;
return -1;
}
in_fd = i_stream_get_fd(instream);
in_size = st->st_size;
i_assert(instream->v_offset <= in_size);
outstream->ostream.stream_errno = 0;
if (in_fd != foutstream->fd)
overlapping = 0;
else {
/* copying data within same fd. we'll have to be careful with
seeks and overlapping writes. */
if (in_size == (uoff_t)-1) {
outstream->ostream.stream_errno = EINVAL;
return -1;
}
ret = (off_t)outstream->ostream.offset -
(off_t)(instream->real_stream->abs_start_offset +
instream->v_offset);
if (ret == 0) {
/* copying data over itself. we don't really
need to do that, just fake it. */
return in_size - instream->v_offset;
}
overlapping = ret < 0 ? -1 : 1;
}
if (!foutstream->no_sendfile && in_fd != -1 && overlapping <= 0) {
ret = io_stream_sendfile(outstream, instream, in_fd, in_size);
if (ret >= 0 || outstream->ostream.stream_errno != EINVAL)
return ret;
/* sendfile() not supported (with this fd), fallback to
regular sending. */
outstream->ostream.stream_errno = 0;
foutstream->no_sendfile = TRUE;
}
if (overlapping <= 0)
return io_stream_copy(outstream, instream, in_size);
else
return io_stream_copy_backwards(outstream, instream, in_size);
}
static struct file_ostream *
o_stream_create_fd_common(int fd, bool autoclose_fd)
{
struct file_ostream *fstream;
fstream = i_new(struct file_ostream, 1);
fstream->fd = fd;
fstream->autoclose_fd = autoclose_fd;
fstream->optimal_block_size = DEFAULT_OPTIMAL_BLOCK_SIZE;
fstream->ostream.iostream.close = o_stream_file_close;
fstream->ostream.iostream.destroy = o_stream_file_destroy;
fstream->ostream.iostream.set_max_buffer_size =
o_stream_file_set_max_buffer_size;
fstream->ostream.cork = o_stream_file_cork;
fstream->ostream.flush = o_stream_file_flush;
fstream->ostream.flush_pending = o_stream_file_flush_pending;
fstream->ostream.get_used_size = o_stream_file_get_used_size;
fstream->ostream.seek = o_stream_file_seek;
fstream->ostream.sendv = o_stream_file_sendv;
fstream->ostream.send_istream = o_stream_file_send_istream;
return fstream;
}
static void fstream_init_file(struct file_ostream *fstream)
{
struct stat st;
fstream->no_sendfile = TRUE;
if (fstat(fstream->fd, &st) < 0)
return;
if ((uoff_t)st.st_blksize > fstream->optimal_block_size) {
/* use the optimal block size, but with a reasonable limit */
fstream->optimal_block_size =
I_MIN(st.st_blksize, MAX_OPTIMAL_BLOCK_SIZE);
}
if (S_ISREG(st.st_mode)) {
fstream->no_socket_cork = TRUE;
fstream->file = TRUE;
}
}
struct ostream *
o_stream_create_fd(int fd, size_t max_buffer_size, bool autoclose_fd)
{
struct file_ostream *fstream;
struct ostream *ostream;
off_t offset;
fstream = o_stream_create_fd_common(fd, autoclose_fd);
fstream->max_buffer_size = max_buffer_size;
ostream = o_stream_create(&fstream->ostream);
offset = lseek(fd, 0, SEEK_CUR);
if (offset >= 0) {
ostream->offset = offset;
fstream->real_offset = offset;
fstream->buffer_offset = offset;
fstream_init_file(fstream);
} else {
if (net_getsockname(fd, NULL, NULL) < 0) {
fstream->no_sendfile = TRUE;
fstream->no_socket_cork = TRUE;
}
}
if (max_buffer_size == 0)
fstream->max_buffer_size = fstream->optimal_block_size;
return ostream;
}
struct ostream *
o_stream_create_fd_file(int fd, uoff_t offset, bool autoclose_fd)
{
struct file_ostream *fstream;
struct ostream *ostream;
if (offset == (uoff_t)-1)
offset = lseek(fd, 0, SEEK_CUR);
fstream = o_stream_create_fd_common(fd, autoclose_fd);
fstream_init_file(fstream);
fstream->max_buffer_size = fstream->optimal_block_size;
fstream->real_offset = offset;
fstream->buffer_offset = offset;
ostream = o_stream_create(&fstream->ostream);
ostream->offset = offset;
return ostream;
}