istream-header-filter.c revision 5695ec03a0cc4836896e46a01bb9336782aee326
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen/* Copyright (c) 2003-2017 Dovecot authors, see the included COPYING file */
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen const char **headers;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenheader_filter_callback *null_header_filter_callback = NULL;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic ssize_t i_stream_header_filter_read(struct istream_private *stream);
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic void i_stream_header_filter_destroy(struct iostream_private *stream)
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen message_parse_header_deinit(&mstream->hdr_ctx);
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen if (array_is_created(&mstream->match_change_lines))
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenread_mixed(struct header_filter_istream *mstream, size_t body_highwater_size)
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen const unsigned char *data;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen data = i_stream_get_data(mstream->istream.parent, &pos);
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen mstream->istream.istream.eof = mstream->istream.parent->eof;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen if (mstream->end_body_with_lf && data[pos-1] != '\n' &&
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen /* add missing trailing LF to body */
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen mstream->istream.buffer = mstream->hdr_buf->data;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen mstream->istream.pos = mstream->hdr_buf->used;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen data = i_stream_get_data(mstream->istream.parent, &pos);
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen buffer_append(mstream->hdr_buf, data + body_highwater_size,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen mstream->istream.buffer = buffer_get_data(mstream->hdr_buf, &pos);
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen ret = (ssize_t)(pos - mstream->istream.pos - mstream->istream.skip);
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic int cmp_uint(const unsigned int *i1, const unsigned int *i2)
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic bool match_line_changed(struct header_filter_istream *mstream)
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen if (!array_is_created(&mstream->match_change_lines))
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen return array_bsearch(&mstream->match_change_lines, &mstream->cur_line,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic void add_eol(struct header_filter_istream *mstream, bool orig_crlf)
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen if (mstream->crlf || (orig_crlf && mstream->crlf_preserve))
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic ssize_t hdr_stream_update_pos(struct header_filter_istream *mstream)
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen mstream->istream.buffer = buffer_get_data(mstream->hdr_buf, &pos);
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen ret = (ssize_t)(pos - mstream->istream.pos - mstream->istream.skip);
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic ssize_t read_header(struct header_filter_istream *mstream)
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen message_parse_header_init(mstream->istream.parent,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen /* remove skipped data from hdr_buf */
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen mstream->hdr_buf, mstream->istream.skip, (size_t)-1);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen mstream->istream.pos -= mstream->istream.skip;
16f816d3f3c32ae3351834253f52ddd0212bcbf3Timo Sirainen buffer_set_used_size(mstream->hdr_buf, mstream->istream.pos);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen highwater_offset = mstream->istream.istream.v_offset +
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen if (highwater_offset >= mstream->header_size.virtual_size) {
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen /* we want to return mixed headers and body */
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen size_t body_highwater_size = highwater_offset -
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen return read_mixed(mstream, body_highwater_size);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen max_buffer_size = i_stream_get_max_buffer_size(&mstream->istream.istream);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen if (mstream->hdr_buf->used >= max_buffer_size)
b66a7b7ab0db2c9ad425912d3f21a36fcf76d876Timo Sirainen while ((hdr_ret = message_parse_header_next(mstream->hdr_ctx,
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen if (mstream->header_parsed && !mstream->headers_edited) {
e1ca7af110ea6eeb6303bdd8f07c172b11dff2faTimo Sirainen /* Header line continued - use only the first line's
e1ca7af110ea6eeb6303bdd8f07c172b11dff2faTimo Sirainen matched-result. Otherwise multiline headers might
e1ca7af110ea6eeb6303bdd8f07c172b11dff2faTimo Sirainen end up being only partially picked, which wouldn't
e1ca7af110ea6eeb6303bdd8f07c172b11dff2faTimo Sirainen be very good. However, allow callbacks to modify
e1ca7af110ea6eeb6303bdd8f07c172b11dff2faTimo Sirainen the headers in any way they want. */
e1ca7af110ea6eeb6303bdd8f07c172b11dff2faTimo Sirainen /* no include/exclude headers - default matching */
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen matched = i_bsearch(hdr->name, mstream->headers,
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen /* nothing gets excluded */
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen } else if (!mstream->header_parsed || mstream->headers_edited) {
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen /* first time in this line or we have actually modified
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen the header so we always want to call the callbacks */
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen !hdr->continued && !mstream->headers_edited) {
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen if (!array_is_created(&mstream->match_change_lines))
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen i_array_init(&mstream->match_change_lines, 8);
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen /* second time in this line. was it excluded by the
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen callback the first time? */
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen if (mstream->skip_count >= mstream->hdr_buf->used) {
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen /* we need more */
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen mstream->skip_count -= mstream->hdr_buf->used;
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen if (mstream->hdr_buf->used >= max_buffer_size)
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen const unsigned char *data = mstream->hdr_buf->data;
e1ca7af110ea6eeb6303bdd8f07c172b11dff2faTimo Sirainen if (mstream->istream.parent->stream_errno != 0) {
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen if (!mstream->seen_eoh && mstream->add_missing_eoh) {
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen if (mstream->header_parsed && !mstream->headers_edited) {
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen /* don't copy eof here because we're only returning headers here.
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen the body will be returned in separate read() call. */
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen /* need more data to finish parsing headers. we may have some
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen data already available though. */
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen /* finished */
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen message_parse_header_deinit(&mstream->hdr_ctx);
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen if ((!mstream->header_parsed || mstream->headers_edited ||
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen /* check if the callback added more headers.
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen this is allowed only if EOH wasn't added yet. */
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen /* we're at the end of headers. */
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen return i_stream_header_filter_read(&mstream->istream);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainenhandle_end_body_with_lf(struct header_filter_istream *mstream, ssize_t ret)
8153fdec343e40e2a78f5c12353e89b994b28f74Timo Sirainen struct istream_private *stream = &mstream->istream;
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen const unsigned char *data;
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen data = i_stream_get_data(stream->parent, &size);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen last_offset = stream->parent->v_offset + size-1;
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen else if (size > 0)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen if (ret == -1 && stream->parent->eof && !last_lf) {
b92813e2f96d4b28f989528ed5dd6115da7d9bdbTimo Sirainen /* missing LF, need to add it */
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen mstream->last_lf_offset = last_lf ? last_offset : (uoff_t)-1;
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainenstatic ssize_t i_stream_header_filter_read(struct istream_private *stream)
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen stream->istream.v_offset < mstream->header_size.virtual_size) {
8153fdec343e40e2a78f5c12353e89b994b28f74Timo Sirainen v_offset = stream->parent_start_offset + stream->istream.v_offset -
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen ret = i_stream_read_copy_from_parent(&stream->istream);
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Siraineni_stream_header_filter_seek_to_header(struct header_filter_istream *mstream,
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen mstream->istream.parent->real_stream->access_counter;
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen message_parse_header_deinit(&mstream->hdr_ctx);
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainenstatic int skip_header(struct header_filter_istream *mstream)
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen mstream->istream.parent->real_stream->access_counter) {
659fe5d24825b160cae512538088020d97a60239Timo Sirainen /* need to re-parse headers */
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen i_stream_header_filter_seek_to_header(mstream, 0);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen i_stream_read(&mstream->istream.istream) != -1) {
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen pos = i_stream_get_data_size(&mstream->istream.istream);
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen i_stream_skip(&mstream->istream.istream, pos);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen return mstream->istream.istream.stream_errno != 0 ? -1 : 0;
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainenstream_reset_to(struct header_filter_istream *mstream, uoff_t v_offset)
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen mstream->istream.skip = mstream->istream.pos = 0;
b35f7104715edee0cfac6d46ab0b342033867eb7Timo Sirainenstatic void i_stream_header_filter_seek(struct istream_private *stream,
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen /* just reset the input buffer */
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen /* if last_lf_added=TRUE, we're currently at EOF. So reset it only if
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen we're seeking backwards, otherwise we would just add a duplicate */
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen /* seeking to beginning of headers. */
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen i_stream_header_filter_seek_to_header(mstream, 0);
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen /* if we haven't parsed the whole header yet, we don't know if we
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen want to seek inside header or body. so make sure we've parsed the
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen if (v_offset < mstream->header_size.virtual_size) {
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen /* seek into headers. we'll have to re-parse them, use
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen skip_count to set the wanted position */
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen i_stream_header_filter_seek_to_header(mstream, v_offset);
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen v_offset += mstream->header_size.physical_size -
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Siraineni_stream_header_filter_sync(struct istream_private *stream ATTR_UNUSED)
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen i_panic("istream-header-filter sync() not implemented");
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Siraineni_stream_header_filter_stat(struct istream_private *stream, bool exact)
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen if (i_stream_stat(stream->parent, exact, &st) < 0) {
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen stream->istream.stream_errno = stream->parent->stream_errno;
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen /* fix the filtered header size */
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen /* no body */
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen stream->statbuf.st_size = mstream->header_size.physical_size;
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen /* no last-LF */
7fb7365a8fad104a17538a73c338ee3d3420e7b0Timo Sirainen /* yes, we have added LF */
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen stream->statbuf.st_size += mstream->crlf ? 2 : 1;
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen } else if (mstream->last_lf_offset != (uoff_t)-1) {
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen /* no, we didn't need to add LF */
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen /* check if we need to add LF */
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen i_stream_seek(stream->parent, st->st_size - 1);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen ssize_t ret = handle_end_body_with_lf(mstream, -1);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Siraineni_stream_create_header_filter(struct istream *input,
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen const char *const *headers,
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen header_filter_callback *callback, void *context)
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen unsigned int i, j;
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen i_assert((flags & (HEADER_FILTER_INCLUDE|HEADER_FILTER_EXCLUDE)) != 0);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen mstream = i_new(struct header_filter_istream, 1);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen mstream->pool = pool_alloconly_create(MEMPOOL_GROWING
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen mstream->istream.max_buffer_size = input->real_stream->max_buffer_size;
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen mstream->headers = headers_count == 0 ? NULL :
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen p_new(mstream->pool, const char *, headers_count);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen for (i = j = 0; i < headers_count; i++) {
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen strcasecmp(mstream->headers[j-1], headers[i]);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen /* drop duplicate */
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen mstream->headers[j++] = p_strdup(mstream->pool, headers[i]);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen mstream->hdr_buf = buffer_create_dynamic(mstream->pool, 1024);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen mstream->exclude = (flags & HEADER_FILTER_EXCLUDE) != 0;
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen if ((flags & HEADER_FILTER_CRLF_PRESERVE) != 0)
d77c309fccbc6a7594f8cb08fb01009fa613c568Timo Sirainen mstream->hide_body = (flags & HEADER_FILTER_HIDE_BODY) != 0;
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen mstream->add_missing_eoh = (flags & HEADER_FILTER_ADD_MISSING_EOH) != 0;
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen (flags & HEADER_FILTER_END_BODY_WITH_LF) != 0;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen mstream->istream.iostream.destroy = i_stream_header_filter_destroy;
8fcff4c5b52f24d9c681805fdf06b486f1d0fcbeTimo Sirainen mstream->istream.read = i_stream_header_filter_read;
d67f54632110cfb6aafe2d7cd1f99b031c0b208aTimo Sirainen mstream->istream.seek = i_stream_header_filter_seek;
d482b35af87f5fd872bad007da0475813a401a49Timo Sirainen mstream->istream.sync = i_stream_header_filter_sync;
d482b35af87f5fd872bad007da0475813a401a49Timo Sirainen mstream->istream.stat = i_stream_header_filter_stat;
d67f54632110cfb6aafe2d7cd1f99b031c0b208aTimo Sirainen mstream->istream.istream.blocking = input->blocking;
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen mstream->istream.istream.seekable = input->seekable;
d67f54632110cfb6aafe2d7cd1f99b031c0b208aTimo Sirainen return i_stream_create(&mstream->istream, input, -1);