message-parser.c revision f0e123a7b3b7315a3ea954f9ce1baaddf7c03fab
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen/* Copyright (C) 2002 Timo Sirainen */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_parse_part_header(struct message_parser_ctx *parser_ctx);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_parse_part_body(struct message_parser_ctx *parser_ctx);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_parse_body(struct message_parser_ctx *parser_ctx,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_size *msg_size, int *has_nuls);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_skip_boundary(struct message_parser_ctx *parser_ctx,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_size *boundary_size, int *has_nuls);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic void message_size_add_part(struct message_size *dest,
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen dest->lines += part->header_size.lines + part->body_size.lines;
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainenmessage_part_append(pool_t pool, struct message_part *parent)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* set child position */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void parse_content_type(const unsigned char *value, size_t value_len,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_parser_ctx *parser_ctx = context;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen const char *str;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (parser_ctx->last_content_type != NULL || value_len == 0)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen p_strndup(parser_ctx->parser_pool, value, value_len);
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen parser_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen parser_ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen else if (strncasecmp(str, "multipart/", 10) == 0) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parser_ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenparse_content_type_param(const unsigned char *name, size_t name_len,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_parser_ctx *parser_ctx = context;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if ((parser_ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen name_len != 8 || memcasecmp(name, "boundary", 8) != 0)
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen p_strndup(parser_ctx->parser_pool, value, value_len);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_parse_multipart(struct message_parser_ctx *parser_ctx)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_part *parent_part, *next_part, *part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* multipart message. add new boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen b = p_new(parser_ctx->parser_pool, struct message_boundary, 1);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* reset fields */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* skip the data before the first boundary */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen next_part = message_skip_boundary(parser_ctx, parser_ctx->boundaries,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* now, parse the parts */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* new child */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part = message_part_append(parser_ctx->part_pool, parent_part);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if ((parent_part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen next_part = message_parse_part_body(parser_ctx);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if ((part->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* it also belongs to parent */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* update our size */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_size_add_part(&parent_part->body_size, part);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* skip the boundary */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* remove boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void message_parse_part_header(struct message_parser_ctx *parser_ctx)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen hdr_ctx = message_parse_header_init(parser_ctx->input,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* call the user-defined header parser */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx->callback(part, hdr, parser_ctx->context);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (!hdr->eoh && strcasecmp(hdr->name, "Mime-Version") == 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* it's MIME. Content-* headers are valid */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (!hdr->eoh && strcasecmp(hdr->name, "Content-Type") == 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* we need to know the boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* It's not MIME. Reset everything we found from
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen Content-Type. */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx->callback(part, NULL, parser_ctx->context);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenmessage_parse_part_body(struct message_parser_ctx *parser_ctx)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* when there's no content-type specified and we're
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen below multipart/digest, the assume message/rfc822
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen content-type */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* otherwise we default to text/plain */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* message/rfc822 part - the message body begins with
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen headers again, this works pretty much the same as
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen a single multipart/mixed item */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part = message_part_append(parser_ctx->part_pool, part);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen next_part = message_parse_part_body(parser_ctx);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* our body size is the size of header+body in message/rfc822 */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_size_add_part(&part->parent->body_size, part);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* normal message, read until the next boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if ((part->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0 &&
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* it also belongs to parent */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen part->parent->flags |= MESSAGE_PART_FLAG_HAS_NULS;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void message_skip_line(struct istream *input,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen const unsigned char *msg;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen while (i_stream_read_data(input, &msg, &size, startpos) > 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* leave the last character, it may be \r */
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainenboundary_find(struct message_boundary *boundaries,
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen memcmp(boundaries->boundary, msg, boundaries->len) == 0)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen/* read until next boundary is found. if skip_over = FALSE, stop at the
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen [\r]\n before the boundary, otherwise leave it right after the known
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary so the ending "--" can be checked. */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen const unsigned char *msg;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen size_t i, size, startpos, line_start, missing_cr_count;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen while (i_stream_read_data(input, &msg, &size, startpos) > 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i >= line_start+2 && msg[line_start] == '-' &&
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* possible boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* missing CR */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* continued long line, continue skipping over it */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* long partial line, see if it's a boundary.
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen RFC-2046 says that the boundaries must be
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen 70 chars without "--" or less. We allow
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen a bit larger.. */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* nope, we can skip over the line, just
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen leave the last char since it may be \r */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* leave the last line to buffer, it may be
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (i > 0) i--; /* leave the \r\n too */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (i > 0) i--;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen line_start != (size_t)-1 && line_start+2 <= size &&
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg[line_start] == '-' && msg[line_start+1] == '-') {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* possible boundary without line feed at end */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* leave the pointer right after the boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen } else if (line_start > 0 && msg[line_start-1] == '\n') {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* leave the \r\n before the boundary */
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen if (line_start > 0 && msg[line_start-1] == '\r')
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg_size->virtual_size += startpos + missing_cr_count;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i_assert(msg_size->virtual_size >= msg_size->physical_size);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenmessage_parse_body(struct message_parser_ctx *parser_ctx,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_get_body_size(parser_ctx->input, &body_size, has_nuls);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary = message_find_boundary(parser_ctx->input, boundaries,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return boundary == NULL ? NULL : boundary->part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen/* skip data until next boundary is found. if it's end boundary,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen skip the footer as well. */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenmessage_skip_boundary(struct message_parser_ctx *parser_ctx,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_size *boundary_size, int *has_nuls)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen const unsigned char *msg;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary = message_find_boundary(parser_ctx->input, boundaries,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* now, see if it's end boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (i_stream_read_data(parser_ctx->input, &msg, &size, 1) > 0)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen end_boundary = msg[0] == '-' && msg[1] == '-';
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* skip the rest of the line */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_skip_line(parser_ctx->input, boundary_size,
if (end_boundary) {
struct message_parser_ctx *
return ctx;
return parts;
void *context)
void *context)
void *context)
void *context)
int ret;
struct message_header_parser_ctx *
int skip_initial_lwsp)
return ctx;
const unsigned char *msg;
colon_pos = 0;
if (ret > 0) {
size = 0;
startpos = 0;
size = i;
colon_pos = i;
if (i < parse_size) {
size = i;
startpos = i;
if (size == 0) {
colon_pos--;
if (!last_no_newline)