message-parser.c revision 2db031260f1ff8b40dcf7fb39c8c10a90e9b8c5c
5f5870385cff47efd2f58e7892f251cf13761528Timo Sirainen/* Copyright (c) 2002-2007 Dovecot authors, see the included COPYING file */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen/* RFC-2046 requires boundaries are max. 70 chars + "--" prefix + "--" suffix.
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen We'll add a bit more just in case. */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen struct message_header_parser_ctx *hdr_parser_ctx;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen int (*parse_next_block)(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenmessage_part_header_callback_t *null_message_part_header_callback = NULL;
15a07b47846c47a81d69a14d649564e222d6f742Timo Sirainenstatic int parse_next_header_init(struct message_parser_ctx *ctx,
c1faff067b29fb48426cb84260adba563e93189aTimo Sirainenstatic int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenstatic int parse_next_body_to_eof(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenstatic int preparsed_parse_next_header_init(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenboundary_find(struct message_boundary *boundaries,
0df9428baed48afaff90b4d4f03792d2fd756a43Timo Sirainen /* As MIME spec says: search from latest one to oldest one so that we
0df9428baed48afaff90b4d4f03792d2fd756a43Timo Sirainen don't break if the same boundary is used in nested parts. Also the
0df9428baed48afaff90b4d4f03792d2fd756a43Timo Sirainen full message line doesn't have to match the boundary, only the
0df9428baed48afaff90b4d4f03792d2fd756a43Timo Sirainen beginning. */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen memcmp(boundaries->boundary, data, boundaries->len) == 0)
eacce2276278ce6a8176a9a100807dba50bbfb36Timo Sirainenstatic void parse_body_add_block(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen unsigned int missing_cr_count = 0;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->part->body_size.physical_size += block->size;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->part->body_size.virtual_size += block->size + missing_cr_count;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenstatic int message_parser_read_more(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ret = i_stream_read_data(ctx->input, &block_r->data,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenmessage_part_append(pool_t pool, struct message_part *parent)
3a9eb305fd4aad5502cb7e64625874385ab5bc19Timo Sirainen /* set child position */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenstatic void parse_next_body_multipart_init(struct message_parser_ctx *ctx)
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen b = p_new(ctx->parser_pool, struct message_boundary, 1);
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenstatic void parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx)
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->part = message_part_append(ctx->part_pool, ctx->part);
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainenboundary_line_find(struct message_parser_ctx *ctx,
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen const unsigned char *data, size_t size, bool full,
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen /* not a boundary, just skip this line */
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen /* need to find the end of line */
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen /* no LF found */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen *boundary_r = boundary_find(ctx->boundaries, data, size);
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen memcmp(data + (*boundary_r)->len, "--", 2) == 0;
0b2f7be9fadfd4026a9174e51170890cde3edf48Timo Sirainenstatic int parse_next_body_skip_boundary_line(struct message_parser_ctx *ctx,
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen if ((ret = message_parser_read_more(ctx, block_r)) <= 0)
0b2f7be9fadfd4026a9174e51170890cde3edf48Timo Sirainen /* found the LF */
0b2f7be9fadfd4026a9174e51170890cde3edf48Timo Sirainen /* a new MIME part begins */
34015eb0b74735f2fac07c12697bde20a94735e6Timo Sirainen ctx->part = message_part_append(ctx->part_pool, ctx->part);
34015eb0b74735f2fac07c12697bde20a94735e6Timo Sirainen ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME;
34015eb0b74735f2fac07c12697bde20a94735e6Timo Sirainen ctx->parse_next_block = parse_next_header_init;
34015eb0b74735f2fac07c12697bde20a94735e6Timo Sirainenstatic int parse_part_finish(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen struct message_block *block_r, bool first_line)
0b2f7be9fadfd4026a9174e51170890cde3edf48Timo Sirainen /* message ended unexpectedly */
0b2f7be9fadfd4026a9174e51170890cde3edf48Timo Sirainen /* get back to parent MIME part, summing the child MIME part sizes
0b2f7be9fadfd4026a9174e51170890cde3edf48Timo Sirainen into parent's body sizes */
0b2f7be9fadfd4026a9174e51170890cde3edf48Timo Sirainen for (part = ctx->part; part != boundary->part; part = part->parent) {
0b2f7be9fadfd4026a9174e51170890cde3edf48Timo Sirainen message_size_add(&part->parent->body_size, &part->body_size);
0b2f7be9fadfd4026a9174e51170890cde3edf48Timo Sirainen message_size_add(&part->parent->body_size, &part->header_size);
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen /* this boundary isn't needed anymore */
0b2f7be9fadfd4026a9174e51170890cde3edf48Timo Sirainen ctx->parse_next_block = parse_next_body_to_boundary;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->parse_next_block = parse_next_body_to_eof;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen /* forget about the boundaries we possibly skipped */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen /* the boundary itself should already be in buffer. add that. */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen block_r->data = i_stream_get_data(ctx->input, &block_r->size);
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen i_assert(block_r->size >= ctx->skip + 2 + boundary->len +
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen /* [\n]--<boundary> */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen block_r->size = (first_line ? 0 : 1) + 2 + boundary->len;
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen ctx->parse_next_block = parse_next_body_skip_boundary_line;
3e0bae44b65f5c46989fcef3d1e07203f496327eTimo Sirainenstatic int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen const unsigned char *data;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen if ((ret = message_parser_read_more(ctx, block_r)) == 0 ||
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen /* handle boundary in first line of message. alternatively
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen it's an empty line. */
0df9428baed48afaff90b4d4f03792d2fd756a43Timo Sirainen return parse_part_finish(ctx, boundary, block_r, TRUE);
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen for (i = boundary_start = 0; i < block_r->size; i++) {
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen /* skip to beginning of the next line. the first line was
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen handled already. */
e1203014de25c8c3d3975a9f4b4a04616df4bba2Timo Sirainen /* we can skip the first lines. input buffer can't be
e1203014de25c8c3d3975a9f4b4a04616df4bba2Timo Sirainen full anymore. */
e1203014de25c8c3d3975a9f4b4a04616df4bba2Timo Sirainen /* no linefeeds in this block. we can just skip it. */
ec5fec7eab19e134a2607b7e224b3e14a1771ee0Timo Sirainen ret = boundary_line_find(ctx, block_r->data + next_line_idx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen /* found / need more data */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen /* the boundary wasn't found from this data block,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen we'll need more data. */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->want_count = (block_r->size - boundary_start) + 1;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen /* leave CR+LF + last line to buffer */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen parse_part_finish(ctx, boundary, block_r, FALSE);
e10d8b1291090c26b9ef499637e6e632485ca5beTimo Sirainenstatic int parse_next_body_to_eof(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen if ((ret = message_parser_read_more(ctx, block_r)) <= 0)
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenstatic void parse_content_type(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
e5b723864630e40c9028808ef417dd3d6fbf495bTimo Sirainen if (rfc822_parse_content_type(&parser, content_type) < 0)
e5b723864630e40c9028808ef417dd3d6fbf495bTimo Sirainen if (strcasecmp(str_c(content_type), "message/rfc822") == 0)
e5b723864630e40c9028808ef417dd3d6fbf495bTimo Sirainen ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
e5b723864630e40c9028808ef417dd3d6fbf495bTimo Sirainen else if (strncasecmp(str_c(content_type), "text", 4) == 0 &&
e5b723864630e40c9028808ef417dd3d6fbf495bTimo Sirainen else if (strncasecmp(str_c(content_type), "multipart/", 10) == 0) {
e5b723864630e40c9028808ef417dd3d6fbf495bTimo Sirainen ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART;
e5b723864630e40c9028808ef417dd3d6fbf495bTimo Sirainen if (strcasecmp(str_c(content_type)+10, "digest") == 0)
e5b723864630e40c9028808ef417dd3d6fbf495bTimo Sirainen ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST;
31a574fda352ef4f71dbff9c30e15e4744e132c0Timo Sirainen if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen while (rfc822_parse_content_param(&parser, &key, &value) > 0) {
0df9428baed48afaff90b4d4f03792d2fd756a43Timo Sirainen ctx->last_boundary = p_strdup(ctx->parser_pool, value);
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
0df9428baed48afaff90b4d4f03792d2fd756a43Timo Sirainenstatic int parse_next_header(struct message_parser_ctx *ctx,
0df9428baed48afaff90b4d4f03792d2fd756a43Timo Sirainen ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr);
6882df5fbca4a09cdaa95f54d70bb31b5920528cTimo Sirainen if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) {
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen else if (strcasecmp(hdr->name, "Mime-Version") == 0) {
6882df5fbca4a09cdaa95f54d70bb31b5920528cTimo Sirainen /* it's MIME. Content-* headers are valid */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen } else if (strcasecmp(hdr->name, "Content-Type") == 0) {
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen MESSAGE_PARSER_FLAG_MIME_VERSION_STRICT) == 0)
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen /* end of headers */
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen if ((part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen /* multipart type but no message boundary */
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen /* It's not MIME. Reset everything we found from
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen Content-Type. */
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen (part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen /* when there's no content-type specified and we're
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen content-type */
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen /* otherwise we default to text/plain */
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen if (message_parse_header_has_nuls(ctx->hdr_parser_ctx))
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen message_parse_header_deinit(&ctx->hdr_parser_ctx);
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen ctx->parse_next_block = parse_next_body_to_boundary;
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen } else if (part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) {
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen ctx->parse_next_block = parse_next_header_init;
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen ctx->parse_next_block = parse_next_body_to_boundary;
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen ctx->parse_next_block = parse_next_body_to_eof;
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen /* return empty block as end of headers */
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainenstatic int parse_next_header_init(struct message_parser_ctx *ctx,
04a7b696e5255aa956277a0f7cabee736c69ec96Timo Sirainen message_parse_header_init(ctx->input, &ctx->part->header_size,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenstatic int preparsed_parse_eof(struct message_parser_ctx *ctx ATTR_UNUSED,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenstatic void preparsed_skip_to_next(struct message_parser_ctx *ctx)
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->parse_next_block = preparsed_parse_next_header_init;
0df9428baed48afaff90b4d4f03792d2fd756a43Timo Sirainenstatic int preparsed_parse_body_finish(struct message_parser_ctx *ctx,
c3d40f3092af25cad9e807a85eaad4d92aab107bTimo Sirainenstatic int preparsed_parse_body_more(struct message_parser_ctx *ctx,
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen if (ctx->input->v_offset + block_r->size >= end_offset) {
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen block_r->size = end_offset - ctx->input->v_offset;
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen ctx->parse_next_block = preparsed_parse_body_finish;
3e0bae44b65f5c46989fcef3d1e07203f496327eTimo Sirainenstatic int preparsed_parse_body_init(struct message_parser_ctx *ctx,
bd4de9c8152e6ea032c1cb1df8b79635ff5ddf9eTimo Sirainen i_stream_skip(ctx->input, offset - ctx->input->v_offset);
4530cfa7456c10cd03fe9120c75f8bcb2f623ba4Timo Sirainen ctx->parse_next_block = preparsed_parse_body_more;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen return preparsed_parse_body_more(ctx, block_r);
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenstatic int preparsed_parse_finish_header(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->parse_next_block = preparsed_parse_next_header_init;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen } else if ((ctx->flags & MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK) == 0) {
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->parse_next_block = preparsed_parse_body_init;
04a7b696e5255aa956277a0f7cabee736c69ec96Timo Sirainenstatic int preparsed_parse_next_header(struct message_parser_ctx *ctx,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr);
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) {
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen message_parse_header_deinit(&ctx->hdr_parser_ctx);
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->parse_next_block = preparsed_parse_finish_header;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen /* return empty block as end of headers */
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenstatic int preparsed_parse_next_header_init(struct message_parser_ctx *ctx,
cd2cd224d3216a243d55c71c298a5b7684de0ac4Timo Sirainen i_assert(ctx->part->physical_pos >= ctx->input->v_offset);
cd2cd224d3216a243d55c71c298a5b7684de0ac4Timo Sirainen i_stream_skip(ctx->input, ctx->part->physical_pos -
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen message_parse_header_init(ctx->input, NULL, ctx->hdr_flags);
cd2cd224d3216a243d55c71c298a5b7684de0ac4Timo Sirainen ctx->parse_next_block = preparsed_parse_next_header;
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen return preparsed_parse_next_header(ctx, block_r);
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainenmessage_parser_init(pool_t part_pool, struct istream *input,
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen pool = pool_alloconly_create("Message Parser", 1024);
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx = p_new(pool, struct message_parser_ctx, 1);
02a54da28f376dd66d7939d8546a196a0045b486Timo Sirainen ctx->parts = ctx->part = part_pool == NULL ? NULL :
b66d803de86bfb411165b3465b0d9ef64ecfe2a1Timo Sirainen ctx->parse_next_block = parse_next_header_init;
struct message_parser_ctx *
return ctx;
return parts;
int ret;
if (ret == 0) {
return ret;
void *context)
int ret;
if (ret < 0) {
void *context)
int ret;