istream-raw-mbox.c revision 5bfe0d3dd969dfb7e778af7bfab095a9628bee5e
0N/A/* Copyright (c) 2003-2008 Dovecot authors, see the included COPYING file */
229N/A
0N/A#include "lib.h"
0N/A#include "buffer.h"
0N/A#include "istream-internal.h"
0N/A#include "istream-raw-mbox.h"
157N/A#include "mbox-from.h"
0N/A
157N/Astruct raw_mbox_istream {
0N/A struct istream_private istream;
0N/A
0N/A time_t received_time, next_received_time;
0N/A char *path, *sender, *next_sender;
0N/A
0N/A uoff_t from_offset, hdr_offset, body_offset, mail_size;
0N/A uoff_t input_peak_offset;
0N/A
0N/A unsigned int crlf_ending:1;
0N/A unsigned int corrupted:1;
0N/A unsigned int eof:1;
157N/A};
157N/A
157N/Astatic void i_stream_raw_mbox_destroy(struct iostream_private *stream)
0N/A{
0N/A struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
0N/A
0N/A i_free(rstream->sender);
0N/A i_free(rstream->next_sender);
0N/A i_free(rstream->path);
0N/A
0N/A i_stream_seek(rstream->istream.parent,
0N/A rstream->istream.istream.v_offset);
0N/A i_stream_unref(&rstream->istream.parent);
0N/A}
0N/A
0N/Astatic void
0N/Ai_stream_raw_mbox_set_max_buffer_size(struct iostream_private *stream,
0N/A size_t max_size)
0N/A{
0N/A struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
0N/A
0N/A rstream->istream.max_buffer_size = max_size;
0N/A i_stream_set_max_buffer_size(rstream->istream.parent, max_size);
0N/A}
0N/A
0N/Astatic int mbox_read_from_line(struct raw_mbox_istream *rstream)
0N/A{
229N/A const unsigned char *buf, *p;
229N/A char *sender;
229N/A time_t received_time;
229N/A size_t pos, line_pos;
0N/A int skip, tz;
0N/A
0N/A buf = i_stream_get_data(rstream->istream.parent, &pos);
0N/A i_assert(pos > 0);
0N/A
0N/A /* from_offset points to "\nFrom ", so unless we're at the beginning
0N/A of the file, skip the initial \n */
0N/A skip = rstream->from_offset != 0;
0N/A if (skip && *buf == '\r')
0N/A skip++;
0N/A
0N/A while ((p = memchr(buf+skip, '\n', pos-skip)) == NULL) {
0N/A if (i_stream_read(rstream->istream.parent) < 0) {
0N/A /* EOF shouldn't happen */
0N/A rstream->istream.istream.eof =
0N/A rstream->istream.parent->eof;
0N/A rstream->istream.istream.stream_errno =
0N/A rstream->istream.parent->stream_errno;
0N/A return -1;
0N/A }
0N/A buf = i_stream_get_data(rstream->istream.parent, &pos);
0N/A i_assert(pos > 0);
0N/A }
0N/A line_pos = (size_t)(p - buf);
0N/A
0N/A if (rstream->from_offset != 0) {
0N/A buf += skip;
0N/A pos -= skip;
0N/A }
0N/A
0N/A /* beginning of mbox */
0N/A if (memcmp(buf, "From ", 5) != 0 ||
0N/A mbox_from_parse(buf+5, pos-5, &received_time, &tz, &sender) < 0) {
0N/A /* broken From - should happen only at beginning of
0N/A file if this isn't a mbox.. */
0N/A rstream->istream.istream.stream_errno = EINVAL;
0N/A return -1;
0N/A }
0N/A
0N/A if (rstream->istream.istream.v_offset == rstream->from_offset) {
0N/A rstream->received_time = received_time;
0N/A i_free(rstream->sender);
0N/A rstream->sender = sender;
0N/A } else {
0N/A rstream->next_received_time = received_time;
0N/A i_free(rstream->next_sender);
0N/A rstream->next_sender = sender;
0N/A }
0N/A
0N/A /* we'll skip over From-line */
0N/A rstream->istream.istream.v_offset += line_pos+1;
0N/A i_stream_skip(rstream->istream.parent, line_pos+1);
0N/A rstream->hdr_offset = rstream->istream.istream.v_offset;
0N/A return 0;
0N/A}
0N/A
0N/Astatic void handle_end_of_mail(struct raw_mbox_istream *rstream, size_t pos)
0N/A{
0N/A rstream->mail_size = rstream->istream.istream.v_offset + pos -
0N/A rstream->hdr_offset;
0N/A
0N/A if (rstream->hdr_offset + rstream->mail_size < rstream->body_offset) {
0N/A /* a) Header didn't have ending \n
0N/A b) "headers\n\nFrom ..", the second \n belongs to next
0N/A message which we didn't know at the time yet.
0N/A
0N/A The +2 check is for CR+LF linefeeds */
0N/A uoff_t new_body_offset =
0N/A rstream->hdr_offset + rstream->mail_size;
0N/A i_assert(rstream->body_offset == (uoff_t)-1 ||
0N/A rstream->body_offset == new_body_offset + 1 ||
0N/A rstream->body_offset == new_body_offset + 2);
0N/A rstream->body_offset = new_body_offset;
0N/A }
0N/A}
0N/A
229N/Astatic ssize_t i_stream_raw_mbox_read(struct istream_private *stream)
229N/A{
229N/A static const char *mbox_from = "\nFrom ";
229N/A struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
229N/A const unsigned char *buf;
229N/A const char *fromp;
0N/A char *sender;
0N/A time_t received_time;
0N/A size_t i, pos, new_pos, from_start_pos, from_after_pos;
0N/A ssize_t ret = 0;
0N/A int eoh_char, tz;
0N/A bool crlf_ending = FALSE;
0N/A
0N/A i_assert(stream->istream.v_offset >= rstream->from_offset);
0N/A
0N/A if (stream->istream.eof)
0N/A return -1;
0N/A
0N/A i_stream_seek(stream->parent, stream->istream.v_offset);
0N/A
0N/A stream->pos -= stream->skip;
0N/A stream->skip = 0;
0N/A stream->buffer = NULL;
0N/A
0N/A ret = 0;
0N/A do {
0N/A buf = i_stream_get_data(stream->parent, &pos);
0N/A if (pos > 1 && stream->istream.v_offset + pos >
0N/A rstream->input_peak_offset) {
0N/A /* fake our read count. needed because if in the end
0N/A we have only one character in buffer and we skip it
0N/A (as potential CR), we want to get back to this
0N/A i_stream_raw_mbox_read() to read more data. */
0N/A ret = pos;
0N/A break;
0N/A }
0N/A ret = i_stream_read(stream->parent);
0N/A } while (ret > 0);
0N/A stream->istream.stream_errno = stream->parent->stream_errno;
0N/A
0N/A if (ret < 0) {
0N/A if (ret == -2) {
0N/A if (stream->istream.v_offset + pos ==
0N/A rstream->input_peak_offset) {
0N/A stream->buffer = buf;
0N/A return -2;
0N/A }
0N/A } else if (stream->istream.v_offset != 0 || pos == 0) {
0N/A /* we've read the whole file, final byte should be
0N/A the \n trailer */
0N/A if (pos > 0 && buf[pos-1] == '\n') {
0N/A pos--;
0N/A if (pos > 0 && buf[pos-1] == '\r') {
0N/A crlf_ending = TRUE;
0N/A pos--;
0N/A }
0N/A }
0N/A
229N/A i_assert(pos >= stream->pos);
0N/A ret = pos == stream->pos ? -1 :
0N/A (ssize_t)(pos - stream->pos);
0N/A
0N/A stream->buffer = buf;
0N/A stream->pos = pos;
229N/A
0N/A if (stream->istream.v_offset == rstream->from_offset) {
0N/A /* haven't seen From-line yet, so this mbox
0N/A stream is now at EOF */
0N/A rstream->eof = TRUE;
0N/A }
229N/A stream->istream.eof = TRUE;
0N/A rstream->crlf_ending = crlf_ending;
0N/A handle_end_of_mail(rstream, pos);
0N/A return ret < 0 ? i_stream_raw_mbox_read(stream) : ret;
229N/A }
0N/A }
0N/A
0N/A if (stream->istream.v_offset == rstream->from_offset) {
0N/A /* beginning of message, we haven't yet read our From-line */
0N/A if (pos == 2 && ret > 0) {
0N/A /* we're at the end of file with CR+LF linefeeds?
0N/A need more data to verify it. */
0N/A rstream->input_peak_offset =
0N/A stream->istream.v_offset + pos;
0N/A return i_stream_raw_mbox_read(stream);
0N/A }
0N/A if (mbox_read_from_line(rstream) < 0) {
0N/A stream->pos = 0;
0N/A rstream->eof = TRUE;
0N/A rstream->corrupted = TRUE;
0N/A return -1;
0N/A }
0N/A
0N/A /* got it. we don't want to return it however,
0N/A so start again from headers */
0N/A buf = i_stream_get_data(stream->parent, &pos);
0N/A if (pos == 0)
0N/A return i_stream_raw_mbox_read(stream);
0N/A }
0N/A
0N/A /* See if we have From-line here - note that it works right only
0N/A because all characters are different in mbox_from. */
0N/A fromp = mbox_from; from_start_pos = from_after_pos = (size_t)-1;
0N/A eoh_char = rstream->body_offset == (uoff_t)-1 ? '\n' : -1;
0N/A for (i = stream->pos; i < pos; i++) {
0N/A if (buf[i] == eoh_char &&
229N/A ((i > 0 && buf[i-1] == '\n') ||
229N/A (i > 1 && buf[i-1] == '\r' && buf[i-2] == '\n') ||
229N/A stream->istream.v_offset + i == rstream->hdr_offset)) {
229N/A rstream->body_offset = stream->istream.v_offset + i + 1;
229N/A eoh_char = -1;
229N/A }
229N/A if (buf[i] == *fromp) {
229N/A if (*++fromp == '\0') {
229N/A /* potential From-line, see if we have the
229N/A rest of the line buffered.
229N/A FIXME: if From-line is longer than input
0N/A buffer, we break. probably irrelevant.. */
0N/A i++;
0N/A if (rstream->hdr_offset + rstream->mail_size ==
stream->istream.v_offset + i - 6 ||
rstream->mail_size == (uoff_t)-1) {
from_after_pos = i;
from_start_pos = i - 6;
if (from_start_pos > 0 &&
buf[from_start_pos-1] == '\r') {
/* CR also belongs to it. */
crlf_ending = TRUE;
from_start_pos--;
} else {
crlf_ending = FALSE;
}
}
fromp = mbox_from;
} else if (from_start_pos != (size_t)-1) {
/* we have the whole From-line here now.
See if it's a valid one. */
if (mbox_from_parse(buf + from_after_pos,
pos - from_after_pos,
&received_time, &tz,
&sender) == 0) {
/* yep, we stop here. */
rstream->next_received_time =
received_time;
i_free(rstream->next_sender);
rstream->next_sender = sender;
stream->istream.eof = TRUE;
rstream->crlf_ending = crlf_ending;
handle_end_of_mail(rstream,
from_start_pos);
break;
}
from_start_pos = (size_t)-1;
}
} else {
fromp = mbox_from;
if (buf[i] == *fromp)
fromp++;
}
}
/* we want to go at least one byte further next time */
rstream->input_peak_offset = stream->istream.v_offset + i;
if (from_start_pos != (size_t)-1) {
/* we're waiting for the \n at the end of From-line */
new_pos = from_start_pos;
} else {
/* leave out the beginnings of potential From-line + CR */
new_pos = i - (fromp - mbox_from);
if (new_pos > 0)
new_pos--;
}
if (stream->istream.v_offset -
rstream->hdr_offset + new_pos > rstream->mail_size) {
/* istream_raw_mbox_set_next_offset() used invalid
cached next_offset? */
i_error("Next message unexpectedly lost from mbox file "
"%s at %"PRIuUOFF_T, rstream->path,
rstream->hdr_offset + rstream->mail_size);
rstream->eof = TRUE;
rstream->corrupted = TRUE;
rstream->istream.istream.stream_errno = EINVAL;
stream->pos = 0;
return -1;
}
stream->buffer = buf;
if (new_pos == stream->pos) {
if (stream->istream.eof || ret > 0)
return i_stream_raw_mbox_read(stream);
i_assert(new_pos > 0);
ret = -2;
} else {
i_assert(new_pos > stream->pos);
ret = new_pos - stream->pos;
stream->pos = new_pos;
}
return ret;
}
static void i_stream_raw_mbox_seek(struct istream_private *stream,
uoff_t v_offset, bool mark ATTR_UNUSED)
{
struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
stream->istream.v_offset = v_offset;
stream->skip = stream->pos = 0;
stream->buffer = NULL;
rstream->input_peak_offset = 0;
rstream->eof = FALSE;
}
static void i_stream_raw_mbox_sync(struct istream_private *stream)
{
struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
i_stream_sync(stream->parent);
rstream->istream.skip = 0;
rstream->istream.pos = 0;
}
static const struct stat *
i_stream_raw_mbox_stat(struct istream_private *stream, bool exact)
{
const struct stat *st;
st = i_stream_stat(stream->parent, exact);
if (st == NULL)
return NULL;
stream->statbuf = *st;
stream->statbuf.st_size = -1;
return &stream->statbuf;
}
struct istream *
i_stream_create_raw_mbox(struct istream *input, const char *path)
{
struct raw_mbox_istream *rstream;
i_assert(path != NULL);
i_assert(input->v_offset == 0);
rstream = i_new(struct raw_mbox_istream, 1);
rstream->path = i_strdup(path);
rstream->body_offset = (uoff_t)-1;
rstream->mail_size = (uoff_t)-1;
rstream->received_time = (time_t)-1;
rstream->next_received_time = (time_t)-1;
rstream->istream.iostream.destroy = i_stream_raw_mbox_destroy;
rstream->istream.iostream.set_max_buffer_size =
i_stream_raw_mbox_set_max_buffer_size;
rstream->istream.max_buffer_size = input->real_stream->max_buffer_size;
rstream->istream.read = i_stream_raw_mbox_read;
rstream->istream.seek = i_stream_raw_mbox_seek;
rstream->istream.sync = i_stream_raw_mbox_sync;
rstream->istream.stat = i_stream_raw_mbox_stat;
rstream->istream.istream.blocking = input->blocking;
rstream->istream.istream.seekable = input->seekable;
i_stream_ref(input);
return i_stream_create(&rstream->istream, input, -1);
}
static int istream_raw_mbox_is_valid_from(struct raw_mbox_istream *rstream)
{
const unsigned char *data;
size_t size;
time_t received_time;
char *sender;
int tz;
/* minimal: "From x Thu Nov 29 22:33:52 2001" = 31 chars */
(void)i_stream_read_data(rstream->istream.parent, &data, &size, 30);
if ((size == 1 && data[0] == '\n') ||
(size == 2 && data[0] == '\r' && data[1] == '\n')) {
/* EOF */
return 1;
}
if (size > 31 && memcmp(data, "\nFrom ", 6) == 0) {
data += 6;
size -= 6;
} else if (size > 32 && memcmp(data, "\r\nFrom ", 7) == 0) {
data += 7;
size -= 7;
} else {
return 0;
}
while (memchr(data, '\n', size) == NULL) {
if (i_stream_read_data(rstream->istream.parent,
&data, &size, size) < 0)
break;
}
if (mbox_from_parse(data, size, &received_time, &tz, &sender) < 0)
return 0;
rstream->next_received_time = received_time;
i_free(rstream->next_sender);
rstream->next_sender = sender;
return 1;
}
uoff_t istream_raw_mbox_get_start_offset(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
return rstream->from_offset;
}
uoff_t istream_raw_mbox_get_header_offset(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
if (rstream->hdr_offset == rstream->from_offset)
(void)i_stream_raw_mbox_read(&rstream->istream);
if (rstream->corrupted) {
i_error("Unexpectedly lost From-line from mbox file %s at "
"%"PRIuUOFF_T, rstream->path, rstream->from_offset);
return (uoff_t)-1;
}
return rstream->hdr_offset;
}
uoff_t istream_raw_mbox_get_body_offset(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
uoff_t offset;
size_t pos;
if (rstream->body_offset != (uoff_t)-1)
return rstream->body_offset;
offset = stream->v_offset;
i_stream_seek(stream, rstream->hdr_offset);
while (rstream->body_offset == (uoff_t)-1) {
i_stream_get_data(stream, &pos);
i_stream_skip(stream, pos);
if (i_stream_raw_mbox_read(&rstream->istream) < 0) {
if (rstream->corrupted) {
i_error("Unexpectedly lost From-line from mbox file "
"%s at %"PRIuUOFF_T, rstream->path,
rstream->from_offset);
} else {
i_assert(rstream->body_offset != (uoff_t)-1);
}
break;
}
}
i_stream_seek(stream, offset);
return rstream->body_offset;
}
uoff_t istream_raw_mbox_get_body_size(struct istream *stream,
uoff_t expected_body_size)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
const unsigned char *data;
size_t size;
uoff_t old_offset, body_size;
i_assert(rstream->hdr_offset != (uoff_t)-1);
i_assert(rstream->body_offset != (uoff_t)-1);
body_size = rstream->mail_size == (uoff_t)-1 ? (uoff_t)-1 :
rstream->mail_size - (rstream->body_offset -
rstream->hdr_offset);
old_offset = stream->v_offset;
if (expected_body_size != (uoff_t)-1) {
/* if we already have the existing body size, use it as long as
it's >= expected body_size. otherwise the previous parsing
may have stopped at a From_-line that belongs to the body. */
if (body_size != (uoff_t)-1 && body_size >= expected_body_size)
return body_size;
i_stream_seek(rstream->istream.parent,
rstream->body_offset + expected_body_size);
if (istream_raw_mbox_is_valid_from(rstream) > 0) {
rstream->mail_size = expected_body_size +
(rstream->body_offset - rstream->hdr_offset);
i_stream_seek(stream, old_offset);
return expected_body_size;
}
/* invalid expected_body_size */
}
if (body_size != (uoff_t)-1)
return body_size;
/* have to read through the message body */
while (i_stream_read_data(stream, &data, &size, 0) > 0)
i_stream_skip(stream, size);
i_stream_seek(stream, old_offset);
i_assert(rstream->mail_size != (uoff_t)-1);
return rstream->mail_size -
(rstream->body_offset - rstream->hdr_offset);
}
time_t istream_raw_mbox_get_received_time(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
if (rstream->received_time == (time_t)-1)
(void)i_stream_raw_mbox_read(&rstream->istream);
return rstream->received_time;
}
const char *istream_raw_mbox_get_sender(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
if (rstream->sender == NULL)
(void)i_stream_raw_mbox_read(&rstream->istream);
return rstream->sender == NULL ? "" : rstream->sender;
}
bool istream_raw_mbox_has_crlf_ending(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
return rstream->crlf_ending;
}
void istream_raw_mbox_next(struct istream *stream, uoff_t expected_body_size)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
uoff_t body_size;
body_size = istream_raw_mbox_get_body_size(stream, expected_body_size);
rstream->mail_size = (uoff_t)-1;
rstream->received_time = rstream->next_received_time;
rstream->next_received_time = (time_t)-1;
i_free(rstream->sender);
rstream->sender = rstream->next_sender;
rstream->next_sender = NULL;
rstream->from_offset = rstream->body_offset + body_size;
rstream->hdr_offset = rstream->from_offset;
rstream->body_offset = (uoff_t)-1;
if (stream->v_offset != rstream->from_offset)
i_stream_seek_mark(stream, rstream->from_offset);
i_stream_seek_mark(rstream->istream.parent, rstream->from_offset);
rstream->eof = FALSE;
rstream->istream.istream.eof = FALSE;
}
int istream_raw_mbox_seek(struct istream *stream, uoff_t offset)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
bool check;
rstream->corrupted = FALSE;
rstream->eof = FALSE;
rstream->istream.istream.eof = FALSE;
if (rstream->mail_size != (uoff_t)-1 &&
rstream->hdr_offset + rstream->mail_size == offset) {
istream_raw_mbox_next(stream, (uoff_t)-1);
return 0;
}
if (offset == rstream->from_offset) {
/* back to beginning of current message */
offset = rstream->hdr_offset;
check = offset == 0;
} else {
rstream->body_offset = (uoff_t)-1;
rstream->mail_size = (uoff_t)-1;
rstream->received_time = (time_t)-1;
rstream->next_received_time = (time_t)-1;
i_free(rstream->sender);
rstream->sender = NULL;
i_free(rstream->next_sender);
rstream->next_sender = NULL;
rstream->from_offset = offset;
rstream->hdr_offset = offset;
check = TRUE;
}
i_stream_seek_mark(stream, offset);
i_stream_seek_mark(rstream->istream.parent, offset);
if (check)
(void)i_stream_raw_mbox_read(&rstream->istream);
return rstream->corrupted ? -1 : 0;
}
void istream_raw_mbox_set_next_offset(struct istream *stream, uoff_t offset)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
i_assert(rstream->hdr_offset != (uoff_t)-1);
rstream->mail_size = offset - rstream->hdr_offset;
}
bool istream_raw_mbox_is_eof(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
return rstream->eof;
}
bool istream_raw_mbox_is_corrupted(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
return rstream->corrupted;
}