istream-raw-mbox.c revision c74ea62a27878910e3ca1614ca055d7e2b3b00d5
5f5870385cff47efd2f58e7892f251cf13761528Timo Sirainen/* Copyright (C) 2003 Timo Sirainen */
37847ec8eaec9ad55c9df10ae109efe7b37ac573Timo Sirainen uoff_t from_offset, hdr_offset, body_offset, mail_size;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenstatic void _close(struct _iostream *stream __attr_unused__)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen i_stream_seek(rstream->input, rstream->istream.istream.v_offset);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void _set_max_buffer_size(struct _iostream *stream, size_t max_size)
8a0ad174adb1eb5108511b90e97f4e5f9089b0eeTimo Sirainen struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
8a0ad174adb1eb5108511b90e97f4e5f9089b0eeTimo Sirainen i_stream_set_max_buffer_size(rstream->input, max_size);
8a0ad174adb1eb5108511b90e97f4e5f9089b0eeTimo Sirainenstatic int mbox_read_from_line(struct raw_mbox_istream *rstream)
8a0ad174adb1eb5108511b90e97f4e5f9089b0eeTimo Sirainen const unsigned char *buf, *p;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen buf = i_stream_get_data(rstream->input, &pos);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* from_offset points to "\nFrom ", so unless we're at the beginning
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen of the file, skip the initial \n */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen while ((p = memchr(buf+skip, '\n', pos-skip)) == NULL) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* EOF - shouldn't happen */
8a0ad174adb1eb5108511b90e97f4e5f9089b0eeTimo Sirainen buf = i_stream_get_data(rstream->input, &pos);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* beginning of mbox */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen mbox_from_parse(buf+5, pos-5, &received_time, &sender) < 0) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* broken From - should happen only at beginning of
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen file if this isn't a mbox.. */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (rstream->istream.istream.v_offset == rstream->from_offset) {
a24519c36d5f8fa22f58b2c693ba547e8d175a54Timo Sirainen /* we'll skip over From-line */
a24519c36d5f8fa22f58b2c693ba547e8d175a54Timo Sirainen rstream->istream.istream.v_offset += line_pos+1;
a24519c36d5f8fa22f58b2c693ba547e8d175a54Timo Sirainen rstream->hdr_offset = rstream->istream.istream.v_offset;
db8b0a3f74a20528d66a3c4be7df920e5c4554c2Timo Sirainenstatic void handle_end_of_mail(struct raw_mbox_istream *rstream, size_t pos)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen rstream->mail_size = rstream->istream.istream.v_offset + pos -
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (rstream->hdr_offset + rstream->mail_size < rstream->body_offset) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* a) Header didn't have ending \n
db8b0a3f74a20528d66a3c4be7df920e5c4554c2Timo Sirainen b) "headers\n\nFrom ..", the second \n belongs to next
db8b0a3f74a20528d66a3c4be7df920e5c4554c2Timo Sirainen message which we didn't know at the time yet. */
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen i_assert(rstream->body_offset == (uoff_t)-1 ||
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen rstream->hdr_offset + rstream->mail_size + 1);
63e207529879438e9f4412d97cdc34bdc82a3702Timo Sirainen struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
2649b237dd4690575e75a30b2bf3b39ebd37b835Timo Sirainen const unsigned char *buf;
2649b237dd4690575e75a30b2bf3b39ebd37b835Timo Sirainen size_t i, pos, new_pos, from_start_pos, from_after_pos;
2649b237dd4690575e75a30b2bf3b39ebd37b835Timo Sirainen i_assert(stream->istream.v_offset >= rstream->from_offset);
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen i_stream_seek(rstream->input, stream->istream.v_offset);
3fe67ec75ccae1230bb9eb9f16affc48377f6441Timo Sirainen buf = i_stream_get_data(rstream->input, &pos);
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen if (pos > 1 && stream->istream.v_offset + pos >
2a6dcd984104fed84bed8795ccdfabb20e41ce52Timo Sirainen /* fake our read count. needed because if in the end
2a6dcd984104fed84bed8795ccdfabb20e41ce52Timo Sirainen we have only one character in buffer and we skip it
2a6dcd984104fed84bed8795ccdfabb20e41ce52Timo Sirainen (as potential CR), we want to get back to this
2a6dcd984104fed84bed8795ccdfabb20e41ce52Timo Sirainen _read() to read more data. */
2a6dcd984104fed84bed8795ccdfabb20e41ce52Timo Sirainen } while (ret > 0);
27586e4785d56aeb76e1fd96af8db799688dc64aTimo Sirainen } else if (stream->istream.v_offset != 0 || pos == 0) {
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen /* we've read the whole file, final byte should be
5214b67a7dabab87da74e04bb8b227f94b95bce4Timo Sirainen the \n trailer */
e5acc283bf030b0b5c79ca4e52d315c516a299faPascal Volk if (stream->istream.v_offset == rstream->from_offset) {
e5acc283bf030b0b5c79ca4e52d315c516a299faPascal Volk /* haven't seen From-line yet, so this mbox
e5acc283bf030b0b5c79ca4e52d315c516a299faPascal Volk stream is now at EOF */
db8b0a3f74a20528d66a3c4be7df920e5c4554c2Timo Sirainen if (stream->istream.v_offset == rstream->from_offset) {
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen /* beginning of message, we haven't yet read our From-line */
adb6413686e52e00dded4932babcc08ff041876bTimo Sirainen /* got it. we don't want to return it however,
adb6413686e52e00dded4932babcc08ff041876bTimo Sirainen so start again from headers */
adb6413686e52e00dded4932babcc08ff041876bTimo Sirainen buf = i_stream_get_data(rstream->input, &pos);
94b0ff77495c3ed14bdd4b5d7ae1eb37e8c9efb5Timo Sirainen /* See if we have From-line here - note that it works right only
94b0ff77495c3ed14bdd4b5d7ae1eb37e8c9efb5Timo Sirainen because all characters are different in mbox_from. */
94b0ff77495c3ed14bdd4b5d7ae1eb37e8c9efb5Timo Sirainen fromp = mbox_from; from_start_pos = from_after_pos = (size_t)-1;
94b0ff77495c3ed14bdd4b5d7ae1eb37e8c9efb5Timo Sirainen eoh_char = rstream->body_offset == (uoff_t)-1 ? '\n' : -1;
b365bd121cdc87f63e1dd47c5085a27091118e00Timo Sirainen (i > 1 && buf[i-1] == '\r' && buf[i-2] == '\n') ||
94b0ff77495c3ed14bdd4b5d7ae1eb37e8c9efb5Timo Sirainen stream->istream.v_offset + i == rstream->hdr_offset)) {
94b0ff77495c3ed14bdd4b5d7ae1eb37e8c9efb5Timo Sirainen rstream->body_offset = stream->istream.v_offset + i + 1;
02e61e13a8360a9d3ec92c5fa5ae60c0f0181b71Timo Sirainen /* potential From-line, see if we have the
adb6413686e52e00dded4932babcc08ff041876bTimo Sirainen rest of the line buffered.
adb6413686e52e00dded4932babcc08ff041876bTimo Sirainen FIXME: if From-line is longer than input
c1d19144dd7b1de6822df6ed1d10af0c9cb38840Timo Sirainen buffer, we break. probably irrelevant.. */
c1d19144dd7b1de6822df6ed1d10af0c9cb38840Timo Sirainen /* CR also belongs to it. */
c1d19144dd7b1de6822df6ed1d10af0c9cb38840Timo Sirainen /* we have the whole From-line here now.
c1d19144dd7b1de6822df6ed1d10af0c9cb38840Timo Sirainen See if it's a valid one. */
c1d19144dd7b1de6822df6ed1d10af0c9cb38840Timo Sirainen /* yep, we stop here. */
b365bd121cdc87f63e1dd47c5085a27091118e00Timo Sirainen /* we want to go at least one byte further next time */
94b0ff77495c3ed14bdd4b5d7ae1eb37e8c9efb5Timo Sirainen rstream->input_peak_offset = stream->istream.v_offset + i;
c58c12049c883b281c088d47a2a7278c21c390e1Timo Sirainen /* we're waiting for the \n at the end of From-line */
c58c12049c883b281c088d47a2a7278c21c390e1Timo Sirainen /* leave out the beginnings of potential From-line + CR */
adb6413686e52e00dded4932babcc08ff041876bTimo Sirainenstatic void _seek(struct _istream *stream, uoff_t v_offset,
b365bd121cdc87f63e1dd47c5085a27091118e00Timo Sirainen struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
b365bd121cdc87f63e1dd47c5085a27091118e00Timo Sirainen struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
94b0ff77495c3ed14bdd4b5d7ae1eb37e8c9efb5Timo Sirainenstatic const struct stat *_stat(struct _istream *stream, int exact)
adb6413686e52e00dded4932babcc08ff041876bTimo Sirainen struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
adb6413686e52e00dded4932babcc08ff041876bTimo Sirainenstruct istream *i_stream_create_raw_mbox(pool_t pool, struct istream *input)
0b4e1043e596bfb36d999dacbf1d4d63ee96d75fTimo Sirainen rstream = p_new(pool, struct raw_mbox_istream, 1);
1433bf361ddb0bba8878c8ada5726d0284edad57Timo Sirainen rstream->istream.iostream.set_max_buffer_size = _set_max_buffer_size;
0b4e1043e596bfb36d999dacbf1d4d63ee96d75fTimo Sirainen return _i_stream_create(&rstream->istream, pool, -1,
94d8e51119003d2bc5a100c663f90141f297385dTimo Sirainenstatic int istream_raw_mbox_is_valid_from(struct raw_mbox_istream *rstream)
0b4e1043e596bfb36d999dacbf1d4d63ee96d75fTimo Sirainen const unsigned char *data;
0b4e1043e596bfb36d999dacbf1d4d63ee96d75fTimo Sirainen /* minimal: "From x Thu Nov 29 22:33:52 2001" = 31 chars */
0b4e1043e596bfb36d999dacbf1d4d63ee96d75fTimo Sirainen if (i_stream_read_data(rstream->input, &data, &size, 30) == -1)
0b4e1043e596bfb36d999dacbf1d4d63ee96d75fTimo Sirainen (size == 2 && data[0] == '\r' && data[1] == '\n')) {
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen if (size > 31 && memcmp(data, "\nFrom ", 6) == 0) {
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen } else if (size > 32 && memcmp(data, "\r\nFrom ", 7) == 0) {
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen if (i_stream_read_data(rstream->input, &data, &size, size) < 0)
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen if (mbox_from_parse(data, size, &received_time, &sender) < 0)
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainenuoff_t istream_raw_mbox_get_start_offset(struct istream *stream)
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen (struct raw_mbox_istream *)stream->real_stream;
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainenuoff_t istream_raw_mbox_get_header_offset(struct istream *stream)
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen (struct raw_mbox_istream *)stream->real_stream;
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainen if (rstream->hdr_offset == rstream->from_offset)
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainenuoff_t istream_raw_mbox_get_body_offset(struct istream *stream)
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainen (struct raw_mbox_istream *)stream->real_stream;
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainenuoff_t istream_raw_mbox_get_body_size(struct istream *stream, uoff_t body_size)
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen (struct raw_mbox_istream *)stream->real_stream;
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen const unsigned char *data;
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen i_stream_seek(rstream->input, rstream->body_offset + body_size);
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen if (istream_raw_mbox_is_valid_from(rstream) > 0) {
8f70c97f7ab7b7e1683ed5cfcd96721a899c2520Timo Sirainen /* have to read through the message body */
8f70c97f7ab7b7e1683ed5cfcd96721a899c2520Timo Sirainen while (i_stream_read_data(stream, &data, &size, 0) > 0)
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainentime_t istream_raw_mbox_get_received_time(struct istream *stream)
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen (struct raw_mbox_istream *)stream->real_stream;
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainenconst char *istream_raw_mbox_get_sender(struct istream *stream)
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen (struct raw_mbox_istream *)stream->real_stream;
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen return rstream->sender == NULL ? "" : rstream->sender;
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainenvoid istream_raw_mbox_next(struct istream *stream, uoff_t body_size)
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen (struct raw_mbox_istream *)stream->real_stream;
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen body_size = istream_raw_mbox_get_body_size(stream, body_size);
9b00ecffbe74fd864d0d72e6112ec53b86f619baTimo Sirainen rstream->received_time = rstream->next_received_time;
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen rstream->from_offset = rstream->body_offset + body_size;
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainen i_stream_seek_mark(stream, rstream->from_offset);
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainen i_stream_seek_mark(rstream->input, rstream->from_offset);
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainenint istream_raw_mbox_seek(struct istream *stream, uoff_t offset)
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen (struct raw_mbox_istream *)stream->real_stream;
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen rstream->hdr_offset + rstream->mail_size == offset) {
dc5606fb66d30a659459446b6ca1a8b4f1146052Timo Sirainen /* back to beginning of current message */
68a4946b12583b88fa802e52ebee45cd96056772Timo Sirainenint istream_raw_mbox_is_eof(struct istream *stream)