message-parser.c revision 1bc6f1c54b4d77830288b8cf19060bd8a6db7b27
5a580c3a38ced62d4bcc95b8ac7c4f2935b5d294Timo Sirainen/* Copyright (c) 2002-2016 Dovecot authors, see the included COPYING file */
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen/* RFC-2046 requires boundaries are max. 70 chars + "--" prefix + "--" suffix.
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen We'll add a bit more just in case. */
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen struct message_header_parser_ctx *hdr_parser_ctx;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen int (*parse_next_block)(struct message_parser_ctx *ctx,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenmessage_part_header_callback_t *null_message_part_header_callback = NULL;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic int parse_next_header_init(struct message_parser_ctx *ctx,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic int parse_next_body_to_eof(struct message_parser_ctx *ctx,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic int preparsed_parse_next_header_init(struct message_parser_ctx *ctx,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenboundary_find(struct message_boundary *boundaries,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen /* As MIME spec says: search from latest one to oldest one so that we
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen don't break if the same boundary is used in nested parts. Also the
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen full message line doesn't have to match the boundary, only the
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen beginning. However, if there are multiple prefixes whose beginning
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen matches, use the longest matching one. */
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen memcmp(boundaries->boundary, data, boundaries->len) == 0 &&
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen (best == NULL || best->len < boundaries->len))
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic void parse_body_add_block(struct message_parser_ctx *ctx,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen unsigned int missing_cr_count = 0;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen const unsigned char *cur, *next, *data = block->data;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen /* check if we have NULs */
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen ctx->part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen /* count number of lines and missing CRs */
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen while ((next = memchr(cur, '\n', block->size - (cur - data))) != NULL) {
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen ctx->part->body_size.physical_size += block->size;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen ctx->part->body_size.virtual_size += block->size + missing_cr_count;
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainenstatic int message_parser_read_more(struct message_parser_ctx *ctx,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen ret = i_stream_read_bytes(ctx->input, &block_r->data,
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen /* EOF, but we still have some data.
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen return it. */
0c17af9d3f9323136a94e66605776ed4462a172dTimo Sirainen /* reset number of wanted characters if we actually got them */
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainenmessage_part_append(pool_t pool, struct message_part *parent)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART |
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen /* set child position */
b66a7b7ab0db2c9ad425912d3f21a36fcf76d876Timo Sirainenstatic void parse_next_body_multipart_init(struct message_parser_ctx *ctx)
cbf7138b49d32fce0645dc6523fbb42cc07cb2faTimo Sirainen b = p_new(ctx->parser_pool, struct message_boundary, 1);
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainenstatic int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx,
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen ctx->part = message_part_append(ctx->part_pool, ctx->part);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainenboundary_line_find(struct message_parser_ctx *ctx,
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen const unsigned char *data, size_t size, bool full,
9a107dedb8f35727c21b3d1d54475d33f6e2eb1fTimo Sirainen /* not a boundary, just skip this line */
44dc970b18c4e2d06f34cb908924152156e4a45bTimo Sirainen /* need to find the end of line */
44dc970b18c4e2d06f34cb908924152156e4a45bTimo Sirainen if (memchr(data + 2, '\n', size - 2) == NULL &&
6bf1543bb7af03324c04e8f9ac8e430f395989aeTimo Sirainen /* no LF found */
44dc970b18c4e2d06f34cb908924152156e4a45bTimo Sirainen *boundary_r = boundary_find(ctx->boundaries, data, size);
44dc970b18c4e2d06f34cb908924152156e4a45bTimo Sirainen memcmp(data + (*boundary_r)->len, "--", 2) == 0;
44dc970b18c4e2d06f34cb908924152156e4a45bTimo Sirainenstatic int parse_next_mime_header_init(struct message_parser_ctx *ctx,
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen ctx->part = message_part_append(ctx->part_pool, ctx->part);
e30b748edcef3cf3352478bf21fa8f785bdc773aTimo Sirainen ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME;
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainenstatic int parse_next_body_skip_boundary_line(struct message_parser_ctx *ctx,
ca98892a6b8a30ffc1fe26fcf02c7d59e3204e7eTimo Sirainen const unsigned char *ptr;
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen ptr = memchr(block_r->data, '\n', block_r->size);
ea9fd7f876643e985946a2563140359064819b8eTimo Sirainen (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
6925fd9cd70c30884406d50f1d85efb6561e776cTimo Sirainen /* found the LF */
6925fd9cd70c30884406d50f1d85efb6561e776cTimo Sirainen if (ctx->boundaries == NULL || ctx->boundaries->part != ctx->part) {
e0719fca14e337eee5a0d924bc4e9d53151a7188Timo Sirainen /* epilogue */
6925fd9cd70c30884406d50f1d85efb6561e776cTimo Sirainen ctx->parse_next_block = parse_next_body_to_boundary;
6925fd9cd70c30884406d50f1d85efb6561e776cTimo Sirainen ctx->parse_next_block = parse_next_body_to_eof;
6925fd9cd70c30884406d50f1d85efb6561e776cTimo Sirainen /* a new MIME part begins */
6925fd9cd70c30884406d50f1d85efb6561e776cTimo Sirainen ctx->parse_next_block = parse_next_mime_header_init;
4b43f50117630aa12b3cfd0cbd05ae22ba27fec1Timo Sirainen (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
6925fd9cd70c30884406d50f1d85efb6561e776cTimo Sirainenstatic int parse_part_finish(struct message_parser_ctx *ctx,
992a9e2d6c6ee45d87089ac54267e0198a7802c3Timo Sirainen struct message_block *block_r, bool first_line)
992a9e2d6c6ee45d87089ac54267e0198a7802c3Timo Sirainen /* get back to parent MIME part, summing the child MIME part sizes
992a9e2d6c6ee45d87089ac54267e0198a7802c3Timo Sirainen into parent's body sizes */
992a9e2d6c6ee45d87089ac54267e0198a7802c3Timo Sirainen for (part = ctx->part; part != boundary->part; part = part->parent) {
992a9e2d6c6ee45d87089ac54267e0198a7802c3Timo Sirainen message_size_add(&part->parent->body_size, &part->body_size);
992a9e2d6c6ee45d87089ac54267e0198a7802c3Timo Sirainen message_size_add(&part->parent->body_size, &part->header_size);
992a9e2d6c6ee45d87089ac54267e0198a7802c3Timo Sirainen /* this boundary isn't needed anymore */
992a9e2d6c6ee45d87089ac54267e0198a7802c3Timo Sirainen /* forget about the boundaries we possibly skipped */
4b43f50117630aa12b3cfd0cbd05ae22ba27fec1Timo Sirainen /* the boundary itself should already be in buffer. add that. */
538303a216166f3526c0ae9658c9978275cfa100Timo Sirainen block_r->data = i_stream_get_data(ctx->input, &block_r->size);
e0719fca14e337eee5a0d924bc4e9d53151a7188Timo Sirainen /* [[\r]\n]--<boundary>[--] */
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen line_size += 2 + boundary->len + (boundary->epilogue_found ? 2 : 0);
6925fd9cd70c30884406d50f1d85efb6561e776cTimo Sirainen i_assert(block_r->size >= ctx->skip + line_size);
c93cd163f9c1d4b0ca29f49cbfdbf474caeef5bfTimo Sirainen ctx->parse_next_block = parse_next_body_skip_boundary_line;
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen if ((ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
b63284468d717737ecd63d78b6928c5d7f0d3634Timo Sirainenstatic int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
b63284468d717737ecd63d78b6928c5d7f0d3634Timo Sirainen if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
b63284468d717737ecd63d78b6928c5d7f0d3634Timo Sirainen /* handle boundary in first line of message. alternatively
b63284468d717737ecd63d78b6928c5d7f0d3634Timo Sirainen it's an empty line. */
b63284468d717737ecd63d78b6928c5d7f0d3634Timo Sirainen return ret == 0 ? 0 :
b63284468d717737ecd63d78b6928c5d7f0d3634Timo Sirainen parse_part_finish(ctx, boundary, block_r, TRUE);
32ee977e189266744ef69ac4e832fd3111d6f949Timo Sirainen /* skip to beginning of the next line. the first line was
32ee977e189266744ef69ac4e832fd3111d6f949Timo Sirainen handled already. */
32ee977e189266744ef69ac4e832fd3111d6f949Timo Sirainen while ((next = memchr(cur, '\n', end - cur)) != NULL) {
1d2b188f0eedc3cab6e27ceac5425a037f38042eTimo Sirainen /* we can at least skip data until the first [CR]LF.
8e5fedd9ada47735be8ac0f8af2a66e8528bd776Timo Sirainen input buffer can't be full anymore. */
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen ret = boundary_line_find(ctx, cur, end - cur, full, &boundary);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen /* found / need more data */
43955c82f9f52c969c777b3da00bc170183dfdf2Timo Sirainen /* found / need more data */
8bb360f9e5de1c25e4f875205bb06e8bf15dae14Timo Sirainen } else if (boundary_start == 0) {
d3eff05aaa4c2bc0a7580ee87a54f6693f4a8241Timo Sirainen /* no linefeeds in this block. we can just skip it. */
32ee977e189266744ef69ac4e832fd3111d6f949Timo Sirainen if (block_r->data[block_r->size-1] == '\r' && !ctx->eof) {
32ee977e189266744ef69ac4e832fd3111d6f949Timo Sirainen /* this may be the beginning of the \r\n--boundary */
4dc8837ab37c1a606add1067e21ed868754db4e3Timo Sirainen /* the boundary wasn't found from this data block,
4dc8837ab37c1a606add1067e21ed868754db4e3Timo Sirainen we'll need more data. */
4dc8837ab37c1a606add1067e21ed868754db4e3Timo Sirainen ctx->want_count = (block_r->size - boundary_start) + 1;
8e5fedd9ada47735be8ac0f8af2a66e8528bd776Timo Sirainen /* a) we found the boundary
8e5fedd9ada47735be8ac0f8af2a66e8528bd776Timo Sirainen b) we need more data and haven't reached EOF yet
32ee977e189266744ef69ac4e832fd3111d6f949Timo Sirainen so leave CR+LF + last line to buffer */
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen parse_part_finish(ctx, boundary, block_r, FALSE);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainenstatic int parse_next_body_to_eof(struct message_parser_ctx *ctx,
43955c82f9f52c969c777b3da00bc170183dfdf2Timo Sirainen if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
b3bb775c6b735a7f6021dea799601fbfdb656e58Timo Sirainen (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
d22301419109ed4a38351715e6760011421dadecTimo Sirainenstatic void parse_content_type(struct message_parser_ctx *ctx,
41264e5dcef8335ab7ba422822b3ab518b7a327aTimo Sirainen const char *const *results;
b63284468d717737ecd63d78b6928c5d7f0d3634Timo Sirainen rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
b63284468d717737ecd63d78b6928c5d7f0d3634Timo Sirainen if (rfc822_parse_content_type(&parser, content_type) < 0)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen if (strcasecmp(str_c(content_type), "message/rfc822") == 0)
9a56220167d02bbcb66a81b7553f4eb4da939945Timo Sirainen ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
9a56220167d02bbcb66a81b7553f4eb4da939945Timo Sirainen else if (strncasecmp(str_c(content_type), "text", 4) == 0 &&
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen else if (strncasecmp(str_c(content_type), "multipart/", 10) == 0) {
8754bb7a1f24705ffa5434f9e10d57e0b3b88d6eTimo Sirainen ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART;
8754bb7a1f24705ffa5434f9e10d57e0b3b88d6eTimo Sirainen if (strcasecmp(str_c(content_type)+10, "digest") == 0)
641f0c0900ee6e7cf9667f4b40ed95cec7d0cdcaTimo Sirainen ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST;
b60baf6af900a610b2b2ddd24a46f8311acc3386Timo Sirainen if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
b60baf6af900a610b2b2ddd24a46f8311acc3386Timo Sirainen if (strcasecmp(results[0], "boundary") == 0) {
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainenstatic bool block_is_at_eoh(const struct message_block *block)
d3eff05aaa4c2bc0a7580ee87a54f6693f4a8241Timo Sirainen (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
641f0c0900ee6e7cf9667f4b40ed95cec7d0cdcaTimo Sirainenstatic int parse_next_header(struct message_parser_ctx *ctx,
19e8adccba16ff419f5675b1575358c2956dce83Timo Sirainen if ((ret = message_parser_read_more(ctx, block_r, &full)) == 0)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen (part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0) {
38df0cacce475112991e60d796f8f2105c616f01Timo Sirainen /* we are at the end of headers and we've determined that we're
38df0cacce475112991e60d796f8f2105c616f01Timo Sirainen going to start a multipart. add the boundary already here
38df0cacce475112991e60d796f8f2105c616f01Timo Sirainen at this point so we can reliably determine whether the
38df0cacce475112991e60d796f8f2105c616f01Timo Sirainen "\n--boundary" belongs to us or to a previous boundary.
38df0cacce475112991e60d796f8f2105c616f01Timo Sirainen this is a problem if the boundary prefixes are identical,
eae1d6e75713d3d658908ac39b719992e2f8a456Timo Sirainen because MIME requires only the prefix to match. */
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen /* before parsing the header see if we can find a --boundary from here.
41264e5dcef8335ab7ba422822b3ab518b7a327aTimo Sirainen we're guaranteed to be at the beginning of the line here. */
41264e5dcef8335ab7ba422822b3ab518b7a327aTimo Sirainen /* our own body begins with our own --boundary.
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen we don't want to handle that yet. */
43955c82f9f52c969c777b3da00bc170183dfdf2Timo Sirainen /* no boundary */
785d9cca224d33ca3938e9166784f6483e8a27d7Timo Sirainen ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr);
785d9cca224d33ca3938e9166784f6483e8a27d7Timo Sirainen if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) {
43955c82f9f52c969c777b3da00bc170183dfdf2Timo Sirainen ctx->want_count = i_stream_get_data_size(ctx->input) + 1;
785d9cca224d33ca3938e9166784f6483e8a27d7Timo Sirainen } else if (ret == 0) {
785d9cca224d33ca3938e9166784f6483e8a27d7Timo Sirainen /* need more data */
43955c82f9f52c969c777b3da00bc170183dfdf2Timo Sirainen /* boundary found. stop parsing headers here. The previous
785d9cca224d33ca3938e9166784f6483e8a27d7Timo Sirainen [CR]LF belongs to the MIME boundary though. */
785d9cca224d33ca3938e9166784f6483e8a27d7Timo Sirainen /* remove the newline size from the MIME header */
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen /* add the newline size to the parent's body */
44dc970b18c4e2d06f34cb908924152156e4a45bTimo Sirainen ctx->part->parent->body_size.virtual_size += 2;
5fb3bff645380804c9db2510940c41db6b8fdb01Timo Sirainen else if (strcasecmp(hdr->name, "Mime-Version") == 0) {
d22301419109ed4a38351715e6760011421dadecTimo Sirainen /* it's MIME. Content-* headers are valid */
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen } else if (strcasecmp(hdr->name, "Content-Type") == 0) {
3a854fc26bcccb0398f0a9a6fa72db1a4ab8f0b8Timo Sirainen MESSAGE_PARSER_FLAG_MIME_VERSION_STRICT) == 0)
3a854fc26bcccb0398f0a9a6fa72db1a4ab8f0b8Timo Sirainen ctx->prev_hdr_newline_size = hdr->no_newline ? 0 :
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen /* end of headers */
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen /* It's not MIME. Reset everything we found from
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen Content-Type. */
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen (part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen /* when there's no content-type specified and we're
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen content-type */
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
35283613d4c04ce18836e9fc431582c87b3710a0Timo Sirainen /* otherwise we default to text/plain */
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen if (message_parse_header_has_nuls(ctx->hdr_parser_ctx))
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen message_parse_header_deinit(&ctx->hdr_parser_ctx);
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
fe813f74aaccb12f38e1bd9cd338c6a37fa646e5Timo Sirainen ctx->parse_next_block = parse_next_body_to_boundary;
fe813f74aaccb12f38e1bd9cd338c6a37fa646e5Timo Sirainen } else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0)
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen ctx->parse_next_block = parse_next_body_message_rfc822_init;
60f9b96be55e63f0113e273dda8ba3b883c6f095Timo Sirainen ctx->parse_next_block = parse_next_body_to_boundary;
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen ctx->parse_next_block = parse_next_body_to_eof;
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen /* return empty block as end of headers */
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainenstatic int parse_next_header_init(struct message_parser_ctx *ctx,
bde6382cf65fba6165dc3603f5419e194d87f404Timo Sirainen message_parse_header_init(ctx->input, &ctx->part->header_size,
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainenstatic int preparsed_parse_eof(struct message_parser_ctx *ctx ATTR_UNUSED,
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainenstatic void preparsed_skip_to_next(struct message_parser_ctx *ctx)
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen ctx->parse_next_block = preparsed_parse_next_header_init;
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen /* parse epilogue of multipart parent if requested */
60f9b96be55e63f0113e273dda8ba3b883c6f095Timo Sirainen (ctx->part->parent->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
d61a5e0e4ff58d1aa613f0d51161e5bb0f092514Timo Sirainen (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0) {
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen /* check for presence of epilogue */
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen uoff_t parent_end = ctx->part->parent->physical_pos +
7895c4845da515b0aa9bb156674a1fca40803f44Timo Sirainen ctx->part->parent->header_size.physical_size +
7895c4845da515b0aa9bb156674a1fca40803f44Timo Sirainen ctx->parse_next_block = preparsed_parse_epilogue_init;
7895c4845da515b0aa9bb156674a1fca40803f44Timo Sirainenstatic int preparsed_parse_body_finish(struct message_parser_ctx *ctx,
1d3f7c1278168d5b1cbfa9a2cc9929a0909056b4Timo Sirainenstatic int preparsed_parse_prologue_finish(struct message_parser_ctx *ctx,
0ed9ccd0047f75df54a49bc117ca301eb398e447Timo Sirainen ctx->parse_next_block = preparsed_parse_next_header_init;
0ed9ccd0047f75df54a49bc117ca301eb398e447Timo Sirainenstatic int preparsed_parse_body_more(struct message_parser_ctx *ctx,
7c449f545b10daa47027552f98d916a9805da662Timo Sirainen if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
35283613d4c04ce18836e9fc431582c87b3710a0Timo Sirainen if (ctx->input->v_offset + block_r->size >= end_offset) {
ced118ac5caf6fe83d34339c2c65c63b2aa768acTimo Sirainen block_r->size = end_offset - ctx->input->v_offset;
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen ctx->parse_next_block = preparsed_parse_body_finish;
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainenstatic int preparsed_parse_prologue_more(struct message_parser_ctx *ctx,
8d889b6d842e96ecbe7b6493920bbb6df8e0ed30Timo Sirainen const unsigned char *cur;
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen end_offset = ctx->part->children->physical_pos;
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
7d7f4648f72b8c70928e04514b0d93dad0ba6fd5Timo Sirainen if (ctx->input->v_offset + block_r->size >= end_offset) {
7d7f4648f72b8c70928e04514b0d93dad0ba6fd5Timo Sirainen /* we've got the full prologue: clip off the initial boundary */
7d7f4648f72b8c70928e04514b0d93dad0ba6fd5Timo Sirainen block_r->size = end_offset - ctx->input->v_offset;
c2cb5e469cd11759da22d82083d4fbb564d06dfaTimo Sirainen /* [\r]\n--boundary[\r]\n */
3a854fc26bcccb0398f0a9a6fa72db1a4ab8f0b8Timo Sirainen ctx->broken_reason = "Prologue boundary end not at expected position";
a10ed8c47534b4c6b6bf2711ccfe577e720a47b4Timo Sirainen /* find newline just before boundary */
f7ad1162969feff6b08f0e640a928db1783daae9Timo Sirainen if (cur[0] != '\n' || cur[1] != '-' || cur[2] != '-') {
ba3d9eeb0bec6ed8465d68fa2480ad085559b580Timo Sirainen ctx->broken_reason = "Prologue boundary beginning not at expected position";
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen if (cur != block_r->data && cur[-1] == '\r') cur--;
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen /* clip boundary */
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen ctx->parse_next_block = preparsed_parse_prologue_finish;
c2cb5e469cd11759da22d82083d4fbb564d06dfaTimo Sirainen /* retain enough data in the stream buffer to contain initial boundary */
641f0c0900ee6e7cf9667f4b40ed95cec7d0cdcaTimo Sirainen boundary_min_start = end_offset - BOUNDARY_END_MAX_LEN;
d4dcb9c30dba354cff7af6d303ecef7698194c55Timo Sirainen if (ctx->input->v_offset + block_r->size >= boundary_min_start) {
d4dcb9c30dba354cff7af6d303ecef7698194c55Timo Sirainen if (boundary_min_start <= ctx->input->v_offset)
d4dcb9c30dba354cff7af6d303ecef7698194c55Timo Sirainen block_r->size = boundary_min_start - ctx->input->v_offset;
d4dcb9c30dba354cff7af6d303ecef7698194c55Timo Sirainenstatic int preparsed_parse_epilogue_more(struct message_parser_ctx *ctx,
d4dcb9c30dba354cff7af6d303ecef7698194c55Timo Sirainen if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
d4dcb9c30dba354cff7af6d303ecef7698194c55Timo Sirainen if (ctx->input->v_offset + block_r->size >= end_offset) {
d4dcb9c30dba354cff7af6d303ecef7698194c55Timo Sirainen block_r->size = end_offset - ctx->input->v_offset;
d4dcb9c30dba354cff7af6d303ecef7698194c55Timo Sirainen ctx->parse_next_block = preparsed_parse_body_finish;
d4dcb9c30dba354cff7af6d303ecef7698194c55Timo Sirainenstatic int preparsed_parse_epilogue_boundary(struct message_parser_ctx *ctx,
65d6ca3fb5450b81df0190d9e9aa62c00fed5116Timo Sirainen ctx->broken_reason = "Epilogue position is wrong";
1582b4d531679849bba299c17b6ec9402b7df67dTimo Sirainen if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
f7ad1162969feff6b08f0e640a928db1783daae9Timo Sirainen /* [\r]\n--boundary--[\r]\n */
ba3d9eeb0bec6ed8465d68fa2480ad085559b580Timo Sirainen if (cur[0] != '\n' || cur[1] != '-' || data[2] != '-') {
ba3d9eeb0bec6ed8465d68fa2480ad085559b580Timo Sirainen ctx->broken_reason = "Epilogue boundary start not at expected position";
ba3d9eeb0bec6ed8465d68fa2480ad085559b580Timo Sirainen /* find the end of the line */
ba3d9eeb0bec6ed8465d68fa2480ad085559b580Timo Sirainen if ((cur = memchr(cur, '\n', size - (cur-data))) == NULL) {
ba3d9eeb0bec6ed8465d68fa2480ad085559b580Timo Sirainen if (end_offset < ctx->input->v_offset + size) {
ba3d9eeb0bec6ed8465d68fa2480ad085559b580Timo Sirainen ctx->broken_reason = "Epilogue boundary end not at expected position";
ba3d9eeb0bec6ed8465d68fa2480ad085559b580Timo Sirainen } else if (ctx->input->v_offset + size < end_offset &&
d22301419109ed4a38351715e6760011421dadecTimo Sirainen ctx->parse_next_block = preparsed_parse_epilogue_more;
a38ef15060e45e5060bb24c403a580b9a57a818cTimo Sirainenstatic int preparsed_parse_body_init(struct message_parser_ctx *ctx,
e05181d973025627ba08b631c12c07c3bbc99528Timo Sirainen /* header was actually larger than the cached size suggested */
e05181d973025627ba08b631c12c07c3bbc99528Timo Sirainen ctx->broken_reason = "Header larger than its cached size";
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen i_stream_skip(ctx->input, offset - ctx->input->v_offset);
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen /* multipart messages may begin with --boundary--, which makes them
b9b48aaaebf6f72dfab567cda073cde8a7b26598Timo Sirainen not have any children. */
38df0cacce475112991e60d796f8f2105c616f01Timo Sirainen if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen ctx->parse_next_block = preparsed_parse_body_more;
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen ctx->parse_next_block = preparsed_parse_prologue_more;
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainenstatic int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx,
7895c4845da515b0aa9bb156674a1fca40803f44Timo Sirainen /* last child was actually larger than the cached size
cce169a321c9c629e4f2db1a69dae3b75bbcb27aTimo Sirainen ctx->broken_reason = "Part larger than its cached size";
7abd00c65bbf53fda3f638101c4cd43647d1eb07Timo Sirainen i_stream_skip(ctx->input, offset - ctx->input->v_offset);
7abd00c65bbf53fda3f638101c4cd43647d1eb07Timo Sirainen ctx->parse_next_block = preparsed_parse_epilogue_boundary;
ba3d9eeb0bec6ed8465d68fa2480ad085559b580Timo Sirainenstatic int preparsed_parse_finish_header(struct message_parser_ctx *ctx,
e05181d973025627ba08b631c12c07c3bbc99528Timo Sirainen if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
ea9fd7f876643e985946a2563140359064819b8eTimo Sirainen (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0)
ea9fd7f876643e985946a2563140359064819b8eTimo Sirainen ctx->parse_next_block = preparsed_parse_body_init;
e05181d973025627ba08b631c12c07c3bbc99528Timo Sirainen ctx->parse_next_block = preparsed_parse_next_header_init;
65d6ca3fb5450b81df0190d9e9aa62c00fed5116Timo Sirainen } else if ((ctx->flags & MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK) == 0) {
4d938f46f4f956ecb802c30ca771922f5539a660Timo Sirainen ctx->parse_next_block = preparsed_parse_body_init;
f7ad1162969feff6b08f0e640a928db1783daae9Timo Sirainenstatic int preparsed_parse_next_header(struct message_parser_ctx *ctx,
f7ad1162969feff6b08f0e640a928db1783daae9Timo Sirainen ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr);
f7ad1162969feff6b08f0e640a928db1783daae9Timo Sirainen if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) {
f7ad1162969feff6b08f0e640a928db1783daae9Timo Sirainen ctx->want_count = i_stream_get_data_size(ctx->input) + 1;
6013fbad6638795a00e6c2a2dd2cdbee19612494Timo Sirainen message_parse_header_deinit(&ctx->hdr_parser_ctx);
6013fbad6638795a00e6c2a2dd2cdbee19612494Timo Sirainen ctx->parse_next_block = preparsed_parse_finish_header;
6013fbad6638795a00e6c2a2dd2cdbee19612494Timo Sirainen /* return empty block as end of headers */
bfa38f13d605bdd4c6d1f257c46a57bb28c0dd06Timo Sirainen if (ctx->input->v_offset != ctx->part->physical_pos +
bfa38f13d605bdd4c6d1f257c46a57bb28c0dd06Timo Sirainen ctx->broken_reason = "Cached header size mismatch";
bfa38f13d605bdd4c6d1f257c46a57bb28c0dd06Timo Sirainenstatic int preparsed_parse_next_header_init(struct message_parser_ctx *ctx,
6013fbad6638795a00e6c2a2dd2cdbee19612494Timo Sirainen i_assert(ctx->part->physical_pos >= ctx->input->v_offset);
6013fbad6638795a00e6c2a2dd2cdbee19612494Timo Sirainen i_stream_skip(ctx->input, ctx->part->physical_pos -
6013fbad6638795a00e6c2a2dd2cdbee19612494Timo Sirainen /* the header may become truncated by --boundaries. limit the header
6013fbad6638795a00e6c2a2dd2cdbee19612494Timo Sirainen stream's size to what it's supposed to be to avoid duplicating (and
9a107dedb8f35727c21b3d1d54475d33f6e2eb1fTimo Sirainen keeping in sync!) all the same complicated logic as in
9a107dedb8f35727c21b3d1d54475d33f6e2eb1fTimo Sirainen parse_next_header(). */
9a107dedb8f35727c21b3d1d54475d33f6e2eb1fTimo Sirainen hdr_input = i_stream_create_limit(ctx->input, ctx->part->header_size.physical_size);
9a107dedb8f35727c21b3d1d54475d33f6e2eb1fTimo Sirainen message_parse_header_init(hdr_input, NULL, ctx->hdr_flags);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen ctx->parse_next_block = preparsed_parse_next_header;
19e8adccba16ff419f5675b1575358c2956dce83Timo Sirainen return preparsed_parse_next_header(ctx, block_r);
9a107dedb8f35727c21b3d1d54475d33f6e2eb1fTimo Sirainen pool = pool_alloconly_create("Message Parser", 1024);
9a107dedb8f35727c21b3d1d54475d33f6e2eb1fTimo Sirainen ctx = p_new(pool, struct message_parser_ctx, 1);
9a107dedb8f35727c21b3d1d54475d33f6e2eb1fTimo Sirainenmessage_parser_init(pool_t part_pool, struct istream *input,
9a107dedb8f35727c21b3d1d54475d33f6e2eb1fTimo Sirainen ctx = message_parser_init_int(input, hdr_flags, flags);
c2cb5e469cd11759da22d82083d4fbb564d06dfaTimo Sirainen ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1);
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen ctx->parse_next_block = parse_next_header_init;
a8c1d873ebe624cf65893d79e1a509203116cb9aTimo Sirainenmessage_parser_init_from_parts(struct message_part *parts,
1f1ee8db68d9ae1604350801cd8dc33ebe29fe8aTimo Sirainen ctx = message_parser_init_int(input, hdr_flags, flags);
a8c1d873ebe624cf65893d79e1a509203116cb9aTimo Sirainen ctx->parse_next_block = preparsed_parse_next_header_init;
a8c1d873ebe624cf65893d79e1a509203116cb9aTimo Sirainenvoid message_parser_deinit(struct message_parser_ctx **_ctx,
a8c1d873ebe624cf65893d79e1a509203116cb9aTimo Sirainen if (message_parser_deinit_from_parts(_ctx, parts_r, &error) < 0)
a8c1d873ebe624cf65893d79e1a509203116cb9aTimo Sirainen i_panic("message_parser_deinit_from_parts: %s", error);
d756ebcfa96bd7cff02097c8f26df9df368b81b1Timo Sirainenint message_parser_deinit_from_parts(struct message_parser_ctx **_ctx,
b5e6f6f27c1461f0f9f202615eeb738a645188c3Timo Sirainen const char **error_r)
0cb2e8eb55e70f8ebe1e8349bdf49e4cbe5d8834Timo Sirainen int ret = ctx->broken_reason != NULL ? -1 : 0;
d22301419109ed4a38351715e6760011421dadecTimo Sirainen message_parse_header_deinit(&ctx->hdr_parser_ctx);
4d938f46f4f956ecb802c30ca771922f5539a660Timo Sirainenint message_parser_parse_next_block(struct message_parser_ctx *ctx,
785d9cca224d33ca3938e9166784f6483e8a27d7Timo Sirainen while ((ret = ctx->parse_next_block(ctx, block_r)) == 0) {
785d9cca224d33ca3938e9166784f6483e8a27d7Timo Sirainen ret = message_parser_read_more(ctx, block_r, &full);
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen /* Successful EOF or unexpected failure */
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen i_assert(ctx->input->eof || ctx->input->closed ||
7c449f545b10daa47027552f98d916a9805da662Timo Sirainen message_size_add(&ctx->part->parent->body_size,
ae9365d3de0cefae6f2a5d3e9ab79bc11c37b3d5Timo Sirainen message_size_add(&ctx->part->parent->body_size,
return ret;
void *context)
int ret;
if (ret < 0) {
void *context)
int ret;