istream-raw-mbox.c revision 8ed8c821ba8aab0b4ed0375f87d48737ef0e0d8e
5a580c3a38ced62d4bcc95b8ac7c4f2935b5d294Timo Sirainen/* Copyright (c) 2003-2008 Dovecot authors, see the included COPYING file */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "lib.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "buffer.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "istream-internal.h"
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen#include "istream-raw-mbox.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "mbox-from.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstruct raw_mbox_istream {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch struct istream_private istream;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch time_t received_time, next_received_time;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch char *path, *sender, *next_sender;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch uoff_t from_offset, hdr_offset, body_offset, mail_size;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch uoff_t input_peak_offset;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned int locked:1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned int seeked:1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned int crlf_ending:1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned int corrupted:1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned int mail_size_forced:1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned int eof:1;
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen unsigned int header_missing_eoh:1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch};
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Boschstatic void i_stream_raw_mbox_destroy(struct iostream_private *stream)
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
feba5e502b2131c9a1c766b7ef9ff041dbf71d1dStephan Bosch i_free(rstream->sender);
feba5e502b2131c9a1c766b7ef9ff041dbf71d1dStephan Bosch i_free(rstream->next_sender);
feba5e502b2131c9a1c766b7ef9ff041dbf71d1dStephan Bosch i_free(rstream->path);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_stream_seek(rstream->istream.parent,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->istream.istream.v_offset);
feba5e502b2131c9a1c766b7ef9ff041dbf71d1dStephan Bosch i_stream_unref(&rstream->istream.parent);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
1175415b88ff168e367c77df23901eada13225b9Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic void
7384b4e78eaab44693c985192276e31322155e32Stephan Boschi_stream_raw_mbox_set_max_buffer_size(struct iostream_private *stream,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch size_t max_size)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->istream.max_buffer_size = max_size;
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen i_stream_set_max_buffer_size(rstream->istream.parent, max_size);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic int mbox_read_from_line(struct raw_mbox_istream *rstream)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch const unsigned char *buf, *p;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch char *sender;
6dad0888fcec8372f230941c70d8940b8c203b32Stephan Bosch time_t received_time;
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch size_t pos, line_pos;
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch int skip, tz;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch buf = i_stream_get_data(rstream->istream.parent, &pos);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_assert(pos > 0);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen /* from_offset points to "\nFrom ", so unless we're at the beginning
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen of the file, skip the initial \n */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch skip = rstream->from_offset != 0;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (skip && *buf == '\r')
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch skip++;
1bc12a53ddc6696bb209fb79d7cc66262d2ea621Timo Sirainen
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch while ((p = memchr(buf+skip, '\n', pos-skip)) == NULL) {
1bc12a53ddc6696bb209fb79d7cc66262d2ea621Timo Sirainen if (i_stream_read(rstream->istream.parent) < 0) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* EOF shouldn't happen */
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch rstream->istream.istream.eof =
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->istream.parent->eof;
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen rstream->istream.istream.stream_errno =
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->istream.parent->stream_errno;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return -1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch buf = i_stream_get_data(rstream->istream.parent, &pos);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_assert(pos > 0);
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch line_pos = (size_t)(p - buf);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (rstream->from_offset != 0) {
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch buf += skip;
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen pos -= skip;
1bc12a53ddc6696bb209fb79d7cc66262d2ea621Timo Sirainen }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen /* beginning of mbox */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (memcmp(buf, "From ", 5) != 0 ||
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch mbox_from_parse(buf+5, pos-5, &received_time, &tz, &sender) < 0) {
6dad0888fcec8372f230941c70d8940b8c203b32Stephan Bosch /* broken From - should happen only at beginning of
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen file if this isn't a mbox.. */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->istream.istream.stream_errno = EINVAL;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return -1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (rstream->istream.istream.v_offset == rstream->from_offset) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->received_time = received_time;
101e78ae4d5705f196fba22db3261db042bfb3c2Stephan Bosch i_free(rstream->sender);
101e78ae4d5705f196fba22db3261db042bfb3c2Stephan Bosch rstream->sender = sender;
101e78ae4d5705f196fba22db3261db042bfb3c2Stephan Bosch } else {
101e78ae4d5705f196fba22db3261db042bfb3c2Stephan Bosch rstream->next_received_time = received_time;
101e78ae4d5705f196fba22db3261db042bfb3c2Stephan Bosch i_free(rstream->next_sender);
101e78ae4d5705f196fba22db3261db042bfb3c2Stephan Bosch rstream->next_sender = sender;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch /* we'll skip over From-line */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->istream.istream.v_offset += line_pos+1;
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen i_stream_skip(rstream->istream.parent, line_pos+1);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->hdr_offset = rstream->istream.istream.v_offset;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return 0;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic void handle_end_of_mail(struct raw_mbox_istream *rstream, size_t pos)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->mail_size = rstream->istream.istream.v_offset + pos -
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rstream->hdr_offset;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (rstream->hdr_offset + rstream->mail_size < rstream->body_offset) {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch uoff_t new_body_offset =
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rstream->hdr_offset + rstream->mail_size;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (rstream->body_offset != (uoff_t)-1) {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch /* Header didn't have ending \n */
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rstream->header_missing_eoh = TRUE;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch } else {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch /* "headers\n\nFrom ..", the second \n belongs to next
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch message which we didn't know at the time yet. */
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch }
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch /* The +2 check is for CR+LF linefeeds */
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch i_assert(rstream->body_offset == (uoff_t)-1 ||
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rstream->body_offset == new_body_offset + 1 ||
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rstream->body_offset == new_body_offset + 2);
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rstream->body_offset = new_body_offset;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch }
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch}
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Boschstatic ssize_t i_stream_raw_mbox_read(struct istream_private *stream)
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch{
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch static const char *mbox_from = "\nFrom ";
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch const unsigned char *buf;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch const char *fromp;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch char *sender;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch time_t received_time;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch size_t i, pos, new_pos, from_start_pos, from_after_pos;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch ssize_t ret = 0;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch int eoh_char, tz;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch bool crlf_ending = FALSE;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch i_assert(rstream->seeked);
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch i_assert(stream->istream.v_offset >= rstream->from_offset);
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (stream->istream.eof)
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch return -1;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (rstream->corrupted) {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rstream->istream.istream.stream_errno = EINVAL;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch return -1;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch }
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch i_stream_seek(stream->parent, stream->istream.v_offset);
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch stream->pos -= stream->skip;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch stream->skip = 0;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch stream->buffer = NULL;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch ret = 0;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch do {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch buf = i_stream_get_data(stream->parent, &pos);
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (pos > 1 && stream->istream.v_offset + pos >
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rstream->input_peak_offset) {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch /* fake our read count. needed because if in the end
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch we have only one character in buffer and we skip it
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch (as potential CR), we want to get back to this
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch i_stream_raw_mbox_read() to read more data. */
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen ret = pos;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch break;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch }
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch ret = i_stream_read(stream->parent);
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch } while (ret > 0);
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch stream->istream.stream_errno = stream->parent->stream_errno;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (ret < 0) {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (ret == -2) {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (stream->istream.v_offset + pos ==
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rstream->input_peak_offset) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch stream->buffer = buf;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch return -2;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch }
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch } else if (stream->istream.v_offset != 0 || pos == 0) {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch /* we've read the whole file, final byte should be
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch the \n trailer */
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (pos > 0 && buf[pos-1] == '\n') {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch pos--;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (pos > 0 && buf[pos-1] == '\r') {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch crlf_ending = TRUE;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch pos--;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch i_assert(pos >= stream->pos);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch ret = pos == stream->pos ? -1 :
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen (ssize_t)(pos - stream->pos);
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen
1bc12a53ddc6696bb209fb79d7cc66262d2ea621Timo Sirainen stream->buffer = buf;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch stream->pos = pos;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen if (stream->istream.v_offset == rstream->from_offset) {
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen /* haven't seen From-line yet, so this mbox
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen stream is now at EOF */
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen rstream->eof = TRUE;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch stream->istream.eof = TRUE;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->crlf_ending = crlf_ending;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch handle_end_of_mail(rstream, pos);
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen return ret < 0 ? i_stream_raw_mbox_read(stream) : ret;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (stream->istream.v_offset == rstream->from_offset) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* beginning of message, we haven't yet read our From-line */
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (pos == 2 && ret > 0) {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch /* we're at the end of file with CR+LF linefeeds?
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch need more data to verify it. */
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rstream->input_peak_offset =
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch stream->istream.v_offset + pos;
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen return i_stream_raw_mbox_read(stream);
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (mbox_read_from_line(rstream) < 0) {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch stream->pos = 0;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->eof = TRUE;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->corrupted = TRUE;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return -1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* got it. we don't want to return it however,
a62fe4b300e2f591e939993aec4cac1e7ae30ad1Stephan Bosch so start again from headers */
a62fe4b300e2f591e939993aec4cac1e7ae30ad1Stephan Bosch buf = i_stream_get_data(stream->parent, &pos);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (pos == 0)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch return i_stream_raw_mbox_read(stream);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* See if we have From-line here - note that it works right only
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch because all characters are different in mbox_from. */
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch fromp = mbox_from; from_start_pos = from_after_pos = (size_t)-1;
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen eoh_char = rstream->body_offset == (uoff_t)-1 ? '\n' : -1;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch for (i = stream->pos; i < pos; i++) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (buf[i] == eoh_char &&
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch ((i > 0 && buf[i-1] == '\n') ||
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch (i > 1 && buf[i-1] == '\r' && buf[i-2] == '\n') ||
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch stream->istream.v_offset + i == rstream->hdr_offset)) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->body_offset = stream->istream.v_offset + i + 1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch eoh_char = -1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (buf[i] == *fromp) {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (*++fromp == '\0') {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* potential From-line, see if we have the
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch rest of the line buffered.
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch FIXME: if From-line is longer than input
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch buffer, we break. probably irrelevant.. */
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch i++;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (rstream->hdr_offset + rstream->mail_size ==
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch stream->istream.v_offset + i - 6 ||
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->mail_size == (uoff_t)-1) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch from_after_pos = i;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch from_start_pos = i - 6;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (from_start_pos > 0 &&
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch buf[from_start_pos-1] == '\r') {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* CR also belongs to it. */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch crlf_ending = TRUE;
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch from_start_pos--;
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen } else {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch crlf_ending = FALSE;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch }
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen fromp = mbox_from;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch } else if (from_start_pos != (size_t)-1) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* we have the whole From-line here now.
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch See if it's a valid one. */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (mbox_from_parse(buf + from_after_pos,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch pos - from_after_pos,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch &received_time, &tz,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch &sender) == 0) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* yep, we stop here. */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->next_received_time =
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch received_time;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch i_free(rstream->next_sender);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->next_sender = sender;
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch stream->istream.eof = TRUE;
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch
a62fe4b300e2f591e939993aec4cac1e7ae30ad1Stephan Bosch rstream->crlf_ending = crlf_ending;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch handle_end_of_mail(rstream,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch from_start_pos);
a62fe4b300e2f591e939993aec4cac1e7ae30ad1Stephan Bosch break;
a62fe4b300e2f591e939993aec4cac1e7ae30ad1Stephan Bosch }
a62fe4b300e2f591e939993aec4cac1e7ae30ad1Stephan Bosch from_start_pos = (size_t)-1;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch } else {
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch fromp = mbox_from;
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch if (buf[i] == *fromp)
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch fromp++;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen /* we want to go at least one byte further next time */
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch rstream->input_peak_offset = stream->istream.v_offset + i;
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch if (from_start_pos != (size_t)-1) {
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch /* we're waiting for the \n at the end of From-line */
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch new_pos = from_start_pos;
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch } else {
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch /* leave out the beginnings of potential From-line + CR */
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch new_pos = i - (fromp - mbox_from);
e8f1e510df3ab051a816715c2056f0d10aee929eStephan Bosch if (new_pos > 0)
e8f1e510df3ab051a816715c2056f0d10aee929eStephan Bosch new_pos--;
9e7bf91667639a2390207ab4d90bf88e2afcec2aStephan Bosch }
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch if (stream->istream.v_offset -
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch rstream->hdr_offset + new_pos > rstream->mail_size) {
/* istream_raw_mbox_set_next_offset() used invalid
cached next_offset? */
i_error("Next message unexpectedly lost from mbox file "
"%s at %"PRIuUOFF_T" (%s)", rstream->path,
rstream->hdr_offset + rstream->mail_size,
rstream->mail_size_forced ? "cached" : "noncached");
rstream->eof = TRUE;
rstream->corrupted = TRUE;
rstream->istream.istream.stream_errno = EINVAL;
stream->pos = 0;
return -1;
}
stream->buffer = buf;
if (new_pos == stream->pos) {
if (stream->istream.eof || ret > 0)
return i_stream_raw_mbox_read(stream);
i_assert(new_pos > 0);
ret = -2;
} else {
i_assert(new_pos > stream->pos);
ret = new_pos - stream->pos;
stream->pos = new_pos;
}
return ret;
}
static void i_stream_raw_mbox_seek(struct istream_private *stream,
uoff_t v_offset, bool mark ATTR_UNUSED)
{
struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
stream->istream.v_offset = v_offset;
stream->skip = stream->pos = 0;
stream->buffer = NULL;
rstream->input_peak_offset = 0;
rstream->eof = FALSE;
}
static void i_stream_raw_mbox_sync(struct istream_private *stream)
{
struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
i_stream_sync(stream->parent);
rstream->istream.skip = 0;
rstream->istream.pos = 0;
}
static const struct stat *
i_stream_raw_mbox_stat(struct istream_private *stream, bool exact)
{
const struct stat *st;
st = i_stream_stat(stream->parent, exact);
if (st == NULL)
return NULL;
stream->statbuf = *st;
stream->statbuf.st_size = -1;
return &stream->statbuf;
}
struct istream *
i_stream_create_raw_mbox(struct istream *input, const char *path)
{
struct raw_mbox_istream *rstream;
i_assert(path != NULL);
i_assert(input->v_offset == 0);
rstream = i_new(struct raw_mbox_istream, 1);
rstream->path = i_strdup(path);
rstream->body_offset = (uoff_t)-1;
rstream->mail_size = (uoff_t)-1;
rstream->received_time = (time_t)-1;
rstream->next_received_time = (time_t)-1;
rstream->istream.iostream.destroy = i_stream_raw_mbox_destroy;
rstream->istream.iostream.set_max_buffer_size =
i_stream_raw_mbox_set_max_buffer_size;
rstream->istream.max_buffer_size = input->real_stream->max_buffer_size;
rstream->istream.read = i_stream_raw_mbox_read;
rstream->istream.seek = i_stream_raw_mbox_seek;
rstream->istream.sync = i_stream_raw_mbox_sync;
rstream->istream.stat = i_stream_raw_mbox_stat;
rstream->istream.istream.blocking = input->blocking;
rstream->istream.istream.seekable = input->seekable;
i_stream_ref(input);
return i_stream_create(&rstream->istream, input, -1);
}
static int istream_raw_mbox_is_valid_from(struct raw_mbox_istream *rstream)
{
const unsigned char *data;
size_t size;
time_t received_time;
char *sender;
int tz;
/* minimal: "From x Thu Nov 29 22:33:52 2001" = 31 chars */
(void)i_stream_read_data(rstream->istream.parent, &data, &size, 30);
if ((size == 1 && data[0] == '\n') ||
(size == 2 && data[0] == '\r' && data[1] == '\n')) {
/* EOF */
return 1;
}
if (size > 31 && memcmp(data, "\nFrom ", 6) == 0) {
data += 6;
size -= 6;
} else if (size > 32 && memcmp(data, "\r\nFrom ", 7) == 0) {
data += 7;
size -= 7;
} else {
return 0;
}
while (memchr(data, '\n', size) == NULL) {
if (i_stream_read_data(rstream->istream.parent,
&data, &size, size) < 0)
break;
}
if (mbox_from_parse(data, size, &received_time, &tz, &sender) < 0)
return 0;
rstream->next_received_time = received_time;
i_free(rstream->next_sender);
rstream->next_sender = sender;
return 1;
}
uoff_t istream_raw_mbox_get_start_offset(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
i_assert(rstream->seeked);
return rstream->from_offset;
}
uoff_t istream_raw_mbox_get_header_offset(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
i_assert(rstream->seeked);
if (rstream->hdr_offset == rstream->from_offset)
(void)i_stream_raw_mbox_read(&rstream->istream);
if (rstream->corrupted) {
i_error("Unexpectedly lost From-line from mbox file %s at "
"%"PRIuUOFF_T, rstream->path, rstream->from_offset);
return (uoff_t)-1;
}
return rstream->hdr_offset;
}
uoff_t istream_raw_mbox_get_body_offset(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
uoff_t offset;
size_t pos;
i_assert(rstream->seeked);
if (rstream->body_offset != (uoff_t)-1)
return rstream->body_offset;
offset = stream->v_offset;
i_stream_seek(stream, rstream->hdr_offset);
while (rstream->body_offset == (uoff_t)-1) {
i_stream_get_data(stream, &pos);
i_stream_skip(stream, pos);
if (i_stream_raw_mbox_read(&rstream->istream) < 0) {
if (rstream->corrupted) {
i_error("Unexpectedly lost From-line from mbox file "
"%s at %"PRIuUOFF_T, rstream->path,
rstream->from_offset);
} else {
i_assert(rstream->body_offset != (uoff_t)-1);
}
break;
}
}
i_stream_seek(stream, offset);
return rstream->body_offset;
}
uoff_t istream_raw_mbox_get_body_size(struct istream *stream,
uoff_t expected_body_size)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
const unsigned char *data;
size_t size;
uoff_t old_offset, body_size, next_body_offset;
i_assert(rstream->seeked);
i_assert(rstream->hdr_offset != (uoff_t)-1);
i_assert(rstream->body_offset != (uoff_t)-1);
body_size = rstream->mail_size == (uoff_t)-1 ? (uoff_t)-1 :
rstream->mail_size - (rstream->body_offset -
rstream->hdr_offset);
old_offset = stream->v_offset;
if (expected_body_size != (uoff_t)-1) {
/* if we already have the existing body size, use it as long as
it's >= expected body_size. otherwise the previous parsing
may have stopped at a From_-line that belongs to the body. */
if (body_size != (uoff_t)-1 && body_size >= expected_body_size)
return body_size;
next_body_offset = rstream->body_offset + expected_body_size;
/* If header_missing_eoh is set, the message body begins with
a From_-line and the body_offset is pointing to the line
*before* the first line of the body, i.e. the empty line
separating the headers from the body. If that is the case,
we'll have to skip over the empty line to get the correct
next_body_offset. */
if (rstream->header_missing_eoh) {
i_assert(body_size == 0);
next_body_offset += rstream->crlf_ending ? 2 : 1;
}
i_stream_seek(rstream->istream.parent, next_body_offset);
if (istream_raw_mbox_is_valid_from(rstream) > 0) {
rstream->mail_size =
next_body_offset - rstream->hdr_offset;
i_stream_seek(stream, old_offset);
return expected_body_size;
}
/* invalid expected_body_size */
}
if (body_size != (uoff_t)-1)
return body_size;
/* have to read through the message body */
while (i_stream_read_data(stream, &data, &size, 0) > 0)
i_stream_skip(stream, size);
i_stream_seek(stream, old_offset);
i_assert(rstream->mail_size != (uoff_t)-1);
return rstream->mail_size -
(rstream->body_offset - rstream->hdr_offset);
}
time_t istream_raw_mbox_get_received_time(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
i_assert(rstream->seeked);
if (rstream->received_time == (time_t)-1)
(void)i_stream_raw_mbox_read(&rstream->istream);
return rstream->received_time;
}
const char *istream_raw_mbox_get_sender(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
i_assert(rstream->seeked);
if (rstream->sender == NULL)
(void)i_stream_raw_mbox_read(&rstream->istream);
return rstream->sender == NULL ? "" : rstream->sender;
}
bool istream_raw_mbox_has_crlf_ending(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
i_assert(rstream->seeked);
return rstream->crlf_ending;
}
void istream_raw_mbox_next(struct istream *stream, uoff_t expected_body_size)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
uoff_t body_size;
body_size = istream_raw_mbox_get_body_size(stream, expected_body_size);
rstream->mail_size = (uoff_t)-1;
rstream->received_time = rstream->next_received_time;
rstream->next_received_time = (time_t)-1;
i_free(rstream->sender);
rstream->sender = rstream->next_sender;
rstream->next_sender = NULL;
rstream->from_offset = rstream->body_offset + body_size;
rstream->hdr_offset = rstream->from_offset;
rstream->body_offset = (uoff_t)-1;
rstream->header_missing_eoh = FALSE;
if (stream->v_offset != rstream->from_offset)
i_stream_seek_mark(stream, rstream->from_offset);
i_stream_seek_mark(rstream->istream.parent, rstream->from_offset);
rstream->eof = FALSE;
rstream->istream.istream.eof = FALSE;
}
int istream_raw_mbox_seek(struct istream *stream, uoff_t offset)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
bool check;
i_assert(rstream->locked);
rstream->corrupted = FALSE;
rstream->eof = FALSE;
rstream->istream.istream.eof = FALSE;
/* if seeked is FALSE, we unlocked in the middle. don't try to use
any cached state then. */
if (rstream->mail_size != (uoff_t)-1 && rstream->seeked &&
rstream->hdr_offset + rstream->mail_size == offset) {
istream_raw_mbox_next(stream, (uoff_t)-1);
return 0;
}
if (offset == rstream->from_offset && rstream->seeked) {
/* back to beginning of current message */
offset = rstream->hdr_offset;
check = offset == 0;
} else {
rstream->body_offset = (uoff_t)-1;
rstream->mail_size = (uoff_t)-1;
rstream->received_time = (time_t)-1;
rstream->next_received_time = (time_t)-1;
rstream->header_missing_eoh = FALSE;
i_free(rstream->sender);
rstream->sender = NULL;
i_free(rstream->next_sender);
rstream->next_sender = NULL;
rstream->from_offset = offset;
rstream->hdr_offset = offset;
check = TRUE;
}
rstream->seeked = TRUE;
i_stream_seek_mark(stream, offset);
i_stream_seek_mark(rstream->istream.parent, offset);
if (check)
(void)i_stream_raw_mbox_read(&rstream->istream);
return rstream->corrupted ? -1 : 0;
}
void istream_raw_mbox_set_next_offset(struct istream *stream, uoff_t offset)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
i_assert(rstream->hdr_offset != (uoff_t)-1);
rstream->mail_size_forced = TRUE;
rstream->mail_size = offset - rstream->hdr_offset;
}
bool istream_raw_mbox_is_eof(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
return rstream->eof;
}
bool istream_raw_mbox_is_corrupted(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
return rstream->corrupted;
}
void istream_raw_mbox_set_locked(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
rstream->locked = TRUE;
}
void istream_raw_mbox_set_unlocked(struct istream *stream)
{
struct raw_mbox_istream *rstream =
(struct raw_mbox_istream *)stream->real_stream;
rstream->locked = FALSE;
rstream->seeked = FALSE;
}