message-parser.c revision f0e123a7b3b7315a3ea954f9ce1baaddf7c03fab
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen/* Copyright (C) 2002 Timo Sirainen */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "lib.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "buffer.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "istream.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "str.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "strescape.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "message-content-parser.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "message-parser.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "message-size.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstruct message_boundary {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_boundary *next;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_part *part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen const char *boundary;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen size_t len;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen};
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstruct message_parser_ctx {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen pool_t parser_pool, part_pool;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct istream *input;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_part *parts, *part;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen char *last_boundary;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen char *last_content_type;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_boundary *boundaries;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_header_callback_t *callback;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_body_callback_t *body_callback;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen void *context;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen};
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstruct message_header_parser_ctx {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_header_line line;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct istream *input;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_size *hdr_size;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen string_t *name;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_t *value_buf;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen size_t skip;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen int skip_initial_lwsp;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen int has_nuls;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen};
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic void
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_parse_part_header(struct message_parser_ctx *parser_ctx);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic struct message_part *
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_parse_part_body(struct message_parser_ctx *parser_ctx);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic struct message_part *
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_parse_body(struct message_parser_ctx *parser_ctx,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_boundary *boundaries,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_size *msg_size, int *has_nuls);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic struct message_part *
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_skip_boundary(struct message_parser_ctx *parser_ctx,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_boundary *boundaries,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_size *boundary_size, int *has_nuls);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic void message_size_add_part(struct message_size *dest,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_part *part)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen dest->physical_size +=
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part->header_size.physical_size +
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part->body_size.physical_size;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen dest->virtual_size +=
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen part->header_size.virtual_size +
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen part->body_size.virtual_size;
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen dest->lines += part->header_size.lines + part->body_size.lines;
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen}
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainenstatic struct message_part *
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainenmessage_part_append(pool_t pool, struct message_part *parent)
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen{
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen struct message_part *part, **list;
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part = p_new(pool, struct message_part, 1);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part->parent = parent;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* set child position */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part->physical_pos =
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parent->physical_pos +
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parent->body_size.physical_size +
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parent->header_size.physical_size;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen list = &part->parent->children;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen while (*list != NULL)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen list = &(*list)->next;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen *list = part;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return part;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void parse_content_type(const unsigned char *value, size_t value_len,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen void *context)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_parser_ctx *parser_ctx = context;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen const char *str;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (parser_ctx->last_content_type != NULL || value_len == 0)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen str = parser_ctx->last_content_type =
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen p_strndup(parser_ctx->parser_pool, value, value_len);
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen if (strcasecmp(str, "message/rfc822") == 0)
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen parser_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen else if (strncasecmp(str, "text", 4) == 0 &&
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen (str[4] == '/' || str[4] == '\0'))
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen parser_ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen else if (strncasecmp(str, "multipart/", 10) == 0) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parser_ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (strcasecmp(str+10, "digest") == 0) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parser_ctx->part->flags |=
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen MESSAGE_PART_FLAG_MULTIPART_DIGEST;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic void
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenparse_content_type_param(const unsigned char *name, size_t name_len,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen const unsigned char *value, size_t value_len,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen int value_quoted, void *context)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_parser_ctx *parser_ctx = context;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if ((parser_ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen name_len != 8 || memcasecmp(name, "boundary", 8) != 0)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen if (parser_ctx->last_boundary == NULL) {
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen parser_ctx->last_boundary =
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen p_strndup(parser_ctx->parser_pool, value, value_len);
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen if (value_quoted)
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen str_unescape(parser_ctx->last_boundary);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic struct message_part *
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_parse_multipart(struct message_parser_ctx *parser_ctx)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_part *parent_part, *next_part, *part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_boundary *b;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen int has_nuls;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* multipart message. add new boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen b = p_new(parser_ctx->parser_pool, struct message_boundary, 1);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen b->part = parser_ctx->part;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen b->boundary = parser_ctx->last_boundary;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen b->len = strlen(b->boundary);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen b->next = parser_ctx->boundaries;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parser_ctx->boundaries = b;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* reset fields */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parser_ctx->last_boundary = NULL;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parser_ctx->last_content_type = NULL;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* skip the data before the first boundary */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parent_part = parser_ctx->part;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen next_part = message_skip_boundary(parser_ctx, parser_ctx->boundaries,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen &parent_part->body_size, &has_nuls);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (has_nuls)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* now, parse the parts */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen while (next_part == parent_part) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* new child */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part = message_part_append(parser_ctx->part_pool, parent_part);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if ((parent_part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part->flags |= MESSAGE_PART_FLAG_IS_MIME;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx->part = part;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen message_parse_part_header(parser_ctx);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen next_part = message_parse_part_body(parser_ctx);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if ((part->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* it also belongs to parent */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* update our size */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_size_add_part(&parent_part->body_size, part);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (next_part != parent_part)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* skip the boundary */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen next_part = message_skip_boundary(parser_ctx,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parser_ctx->boundaries,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen &parent_part->body_size,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen &has_nuls);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (has_nuls)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* remove boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i_assert(parser_ctx->boundaries == b);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx->boundaries = b->next;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return next_part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen}
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen#define MUTEX_FLAGS \
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void message_parse_part_header(struct message_parser_ctx *parser_ctx)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen{
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_part *part = parser_ctx->part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_header_parser_ctx *hdr_ctx;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_header_line *hdr;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen int ret;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen hdr_ctx = message_parse_header_init(parser_ctx->input,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen &part->header_size, TRUE);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* call the user-defined header parser */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (parser_ctx->callback != NULL)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx->callback(part, hdr, parser_ctx->context);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (!hdr->eoh && strcasecmp(hdr->name, "Mime-Version") == 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* it's MIME. Content-* headers are valid */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen part->flags |= MESSAGE_PART_FLAG_IS_MIME;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (!hdr->eoh && strcasecmp(hdr->name, "Content-Type") == 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (hdr->continues) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen hdr->use_full_value = TRUE;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen continue;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* we need to know the boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_content_parse_header(hdr->full_value,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen hdr->full_value_len,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parse_content_type,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parse_content_type_param,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i_assert(ret != 0);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* It's not MIME. Reset everything we found from
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen Content-Type. */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen part->flags = 0;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx->last_boundary = NULL;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx->last_content_type = NULL;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (parser_ctx->callback != NULL)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx->callback(part, NULL, parser_ctx->context);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (hdr_ctx->has_nuls)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_parse_header_deinit(hdr_ctx);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen}
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic struct message_part *
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenmessage_parse_part_body(struct message_parser_ctx *parser_ctx)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen{
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_part *part = parser_ctx->part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_part *next_part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen int has_nuls;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (parser_ctx->last_boundary != NULL)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return message_parse_multipart(parser_ctx);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (parser_ctx->last_content_type == NULL) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (part->parent != NULL &&
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen (part->parent->flags &
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen MESSAGE_PART_FLAG_MULTIPART_DIGEST)) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* when there's no content-type specified and we're
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen below multipart/digest, the assume message/rfc822
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen content-type */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen } else {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* otherwise we default to text/plain */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part->flags |= MESSAGE_PART_FLAG_TEXT;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parser_ctx->last_boundary = NULL;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parser_ctx->last_content_type = NULL;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* message/rfc822 part - the message body begins with
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen headers again, this works pretty much the same as
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen a single multipart/mixed item */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen part = message_part_append(parser_ctx->part_pool, part);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parser_ctx->part = part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_parse_part_header(parser_ctx);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen next_part = message_parse_part_body(parser_ctx);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx->part = part->parent;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* our body size is the size of header+body in message/rfc822 */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_size_add_part(&part->parent->body_size, part);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen } else {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* normal message, read until the next boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen next_part = message_parse_body(parser_ctx,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parser_ctx->boundaries,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen &part->body_size, &has_nuls);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (has_nuls)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if ((part->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0 &&
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen part->parent != NULL) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* it also belongs to parent */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen part->parent->flags |= MESSAGE_PART_FLAG_HAS_NULS;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return next_part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen}
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void message_skip_line(struct istream *input,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_size *msg_size, int skip_lf,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen int *has_nuls)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen{
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen const unsigned char *msg;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen size_t i, size, startpos;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen startpos = 0;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen *has_nuls = FALSE;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen while (i_stream_read_data(input, &msg, &size, startpos) > 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen for (i = startpos; i < size; i++) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (msg[i] == '\0')
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen *has_nuls = TRUE;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen else if (msg[i] == '\n') {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (!skip_lf) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (i > 0 && msg[i-1] == '\r')
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen i--;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen startpos = i;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen goto __break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (msg_size != NULL) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (i == 0 || msg[i-1] != '\r')
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen msg_size->virtual_size++;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen msg_size->lines++;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen startpos = i+1;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen goto __break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* leave the last character, it may be \r */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen i_stream_skip(input, i - 1);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen startpos = 1;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (msg_size != NULL) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen msg_size->physical_size += i - 1;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen msg_size->virtual_size += i - 1;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen }
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen__break:
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen i_stream_skip(input, startpos);
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen if (msg_size != NULL) {
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen msg_size->physical_size += startpos;
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen msg_size->virtual_size += startpos;
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen }
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen}
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainenstatic struct message_boundary *
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainenboundary_find(struct message_boundary *boundaries,
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen const unsigned char *msg, size_t len)
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen{
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen while (boundaries != NULL) {
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen if (boundaries->len <= len &&
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen memcmp(boundaries->boundary, msg, boundaries->len) == 0)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return boundaries;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundaries = boundaries->next;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return NULL;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen}
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen/* read until next boundary is found. if skip_over = FALSE, stop at the
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen [\r]\n before the boundary, otherwise leave it right after the known
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary so the ending "--" can be checked. */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic struct message_boundary *
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenmessage_find_boundary(struct istream *input,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_boundary *boundaries,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_size *msg_size, int skip_over,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen int *has_nuls)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen{
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_boundary *boundary;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen const unsigned char *msg;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen size_t i, size, startpos, line_start, missing_cr_count;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary = NULL;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen missing_cr_count = startpos = line_start = 0;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen *has_nuls = FALSE;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen while (i_stream_read_data(input, &msg, &size, startpos) > 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen for (i = startpos; i < size; i++) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (msg[i] != '\n') {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (msg[i] == '\0')
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen *has_nuls = TRUE;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen continue;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (line_start != (size_t)-1 &&
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i >= line_start+2 && msg[line_start] == '-' &&
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen msg[line_start+1] == '-') {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* possible boundary */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen boundary = boundary_find(boundaries,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen msg + line_start + 2,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen i - line_start - 2);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (boundary != NULL)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen break;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (i == 0 || msg[i-1] != '\r') {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* missing CR */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen missing_cr_count++;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen msg_size->lines++;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen line_start = i+1;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (boundary != NULL)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (line_start == (size_t)-1) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* continued long line, continue skipping over it */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen } else if (i - line_start > 128) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* long partial line, see if it's a boundary.
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen RFC-2046 says that the boundaries must be
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen 70 chars without "--" or less. We allow
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen a bit larger.. */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (msg[line_start] == '-' &&
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg[line_start+1] == '-') {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary = boundary_find(boundaries,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg + line_start + 2,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i - line_start - 2);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (boundary != NULL)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen break;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* nope, we can skip over the line, just
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen leave the last char since it may be \r */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i--;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen line_start = (size_t)-1;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen } else {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* leave the last line to buffer, it may be
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i = line_start;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (i > 0) i--; /* leave the \r\n too */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (i > 0) i--;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen line_start -= i;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i_stream_skip(input, i);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg_size->physical_size += i;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg_size->virtual_size += i;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen startpos = size - i;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (boundary == NULL &&
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen line_start != (size_t)-1 && line_start+2 <= size &&
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg[line_start] == '-' && msg[line_start+1] == '-') {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* possible boundary without line feed at end */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary = boundary_find(boundaries,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg + line_start + 2,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen size - line_start - 2);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (boundary != NULL) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i_assert(line_start != (size_t)-1);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (skip_over) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* leave the pointer right after the boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen line_start += 2 + boundary->len;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen } else if (line_start > 0 && msg[line_start-1] == '\n') {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* leave the \r\n before the boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen line_start--;
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen msg_size->lines--;
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen if (line_start > 0 && msg[line_start-1] == '\r')
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen line_start--;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen else
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen missing_cr_count--;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen startpos = line_start;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i_stream_skip(input, startpos);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg_size->physical_size += startpos;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg_size->virtual_size += startpos + missing_cr_count;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen i_assert(msg_size->virtual_size >= msg_size->physical_size);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return boundary;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen}
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic struct message_part *
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenmessage_parse_body(struct message_parser_ctx *parser_ctx,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_boundary *boundaries,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_size *msg_size, int *has_nuls)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen{
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_boundary *boundary;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_size body_size;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (boundaries == NULL) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_get_body_size(parser_ctx->input, &body_size, has_nuls);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_size_add(msg_size, &body_size);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary = NULL;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen } else {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary = message_find_boundary(parser_ctx->input, boundaries,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen msg_size, FALSE, has_nuls);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return boundary == NULL ? NULL : boundary->part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen}
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen/* skip data until next boundary is found. if it's end boundary,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen skip the footer as well. */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic struct message_part *
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenmessage_skip_boundary(struct message_parser_ctx *parser_ctx,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_boundary *boundaries,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_size *boundary_size, int *has_nuls)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen{
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_boundary *boundary;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen const unsigned char *msg;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen size_t size;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen int end_boundary;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary = message_find_boundary(parser_ctx->input, boundaries,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen boundary_size, TRUE, has_nuls);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (boundary == NULL)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return NULL;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* now, see if it's end boundary */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen end_boundary = FALSE;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (i_stream_read_data(parser_ctx->input, &msg, &size, 1) > 0)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen end_boundary = msg[0] == '-' && msg[1] == '-';
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* skip the rest of the line */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_skip_line(parser_ctx->input, boundary_size,
!end_boundary, has_nuls);
if (end_boundary) {
/* skip the footer */
return message_parse_body(parser_ctx, boundary->next,
boundary_size, has_nuls);
}
return boundary == NULL ? NULL : boundary->part;
}
struct message_parser_ctx *
message_parser_init(pool_t part_pool, struct istream *input)
{
struct message_parser_ctx *ctx;
pool_t pool;
pool = pool_alloconly_create("Message Parser", 1024);
ctx = p_new(pool, struct message_parser_ctx, 1);
ctx->parser_pool = pool;
ctx->part_pool = part_pool;
ctx->input = input;
ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1);
return ctx;
}
struct message_part *message_parser_deinit(struct message_parser_ctx *ctx)
{
struct message_part *parts = ctx->parts;
pool_unref(ctx->parser_pool);
return parts;
}
void message_parser_parse_header(struct message_parser_ctx *ctx,
struct message_size *hdr_size,
message_header_callback_t *callback,
void *context)
{
ctx->callback = callback;
ctx->context = context;
message_parse_part_header(ctx);
*hdr_size = ctx->part->header_size;
}
void message_parser_parse_body(struct message_parser_ctx *ctx,
message_header_callback_t *hdr_callback,
message_body_callback_t *body_callback,
void *context)
{
ctx->callback = hdr_callback;
ctx->body_callback = body_callback;
ctx->context = context;
message_parse_part_body(ctx);
}
static void part_parse_headers(struct message_part *part, struct istream *input,
message_header_callback_t *callback,
void *context)
{
while (part != NULL) {
/* note that we want to parse the header of all
the message parts, multiparts too. */
i_assert(part->physical_pos >= input->v_offset);
i_stream_skip(input, part->physical_pos - input->v_offset);
message_parse_header(part, input, NULL, callback, context);
if (part->children != NULL) {
part_parse_headers(part->children, input,
callback, context);
}
part = part->next;
}
}
void message_parse_from_parts(struct message_part *part, struct istream *input,
message_header_callback_t *callback,
void *context)
{
part_parse_headers(part, input, callback, context);
}
void message_parse_header(struct message_part *part, struct istream *input,
struct message_size *hdr_size,
message_header_callback_t *callback, void *context)
{
struct message_header_parser_ctx *hdr_ctx;
struct message_header_line *hdr;
int ret;
hdr_ctx = message_parse_header_init(input, hdr_size, TRUE);
while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0)
callback(part, hdr, context);
i_assert(ret != 0);
message_parse_header_deinit(hdr_ctx);
/* call after the final skipping */
callback(part, NULL, context);
}
struct message_header_parser_ctx *
message_parse_header_init(struct istream *input, struct message_size *hdr_size,
int skip_initial_lwsp)
{
struct message_header_parser_ctx *ctx;
ctx = i_new(struct message_header_parser_ctx, 1);
ctx->input = input;
ctx->hdr_size = hdr_size;
ctx->name = str_new(default_pool, 128);
ctx->skip_initial_lwsp = skip_initial_lwsp;
if (hdr_size != NULL)
memset(hdr_size, 0, sizeof(*hdr_size));
return ctx;
}
void message_parse_header_deinit(struct message_header_parser_ctx *ctx)
{
i_stream_skip(ctx->input, ctx->skip);
if (ctx->value_buf != NULL)
buffer_free(ctx->value_buf);
str_free(ctx->name);
i_free(ctx);
}
int message_parse_header_next(struct message_header_parser_ctx *ctx,
struct message_header_line **hdr_r)
{
struct message_header_line *line = &ctx->line;
const unsigned char *msg;
size_t i, size, startpos, colon_pos, parse_size;
int ret, last_no_newline;
*hdr_r = NULL;
if (line->eoh)
return -1;
if (ctx->skip > 0) {
i_stream_skip(ctx->input, ctx->skip);
ctx->skip = 0;
}
startpos = 0; colon_pos = UINT_MAX;
last_no_newline = line->no_newline;
line->no_newline = FALSE;
if (line->continues) {
if (line->use_full_value && !line->continued) {
/* save the first line */
if (ctx->value_buf != NULL)
buffer_set_used_size(ctx->value_buf, 0);
else {
ctx->value_buf =
buffer_create_dynamic(default_pool,
4096);
}
buffer_append(ctx->value_buf,
line->value, line->value_len);
}
line->continued = TRUE;
line->continues = FALSE;
colon_pos = 0;
} else {
/* new header line */
line->continued = FALSE;
line->name_offset = ctx->input->v_offset;
}
for (;;) {
ret = i_stream_read_data(ctx->input, &msg, &size, startpos+1);
if (ret > 0) {
/* we want to know one byte in advance to find out
if it's multiline header */
parse_size = size-1;
} else {
parse_size = size;
}
if (ret <= 0 && startpos == size) {
if (ret == -1) {
/* error / EOF with no bytes */
return -1;
}
if (ret == 0 && !ctx->input->eof) {
/* stream is nonblocking - need more data */
return 0;
}
if (msg[0] == '\n' ||
(msg[0] == '\r' && size > 1 && msg[1] == '\n')) {
/* end of headers - this mostly happens just
with mbox where headers are read separately
from body */
size = 0;
if (ctx->hdr_size != NULL)
ctx->hdr_size->lines++;
if (msg[0] == '\r')
ctx->skip = 2;
else {
ctx->skip = 1;
if (ctx->hdr_size != NULL)
ctx->hdr_size->virtual_size++;
}
break;
}
/* a) line is larger than input buffer
b) header ended unexpectedly */
if (colon_pos == UINT_MAX && ret == -2 &&
!line->continued) {
/* header name is huge. just skip it. */
message_skip_line(ctx->input, ctx->hdr_size,
TRUE, &ctx->has_nuls);
startpos = 0;
continue;
}
if (ret == -2) {
/* go back to last LWSP if found. */
size_t min_pos =
!line->continued ? colon_pos : 0;
for (i = size-1; i > min_pos; i--) {
if (IS_LWSP(msg[i])) {
size = i;
break;
}
}
line->continues = TRUE;
}
line->no_newline = TRUE;
ctx->skip = size;
break;
}
/* find ':' */
if (colon_pos == UINT_MAX) {
for (i = startpos; i < parse_size; i++) {
if (msg[i] <= ':') {
if (msg[i] == ':') {
colon_pos = i;
line->full_value_offset =
ctx->input->v_offset +
i + 1;
break;
}
if (msg[i] == '\n') {
/* end of headers, or error */
break;
}
if (msg[i] == '\0')
ctx->has_nuls = TRUE;
}
}
}
/* find '\n' */
for (i = startpos; i < parse_size; i++) {
if (msg[i] <= '\n') {
if (msg[i] == '\n')
break;
if (msg[i] == '\0')
ctx->has_nuls = TRUE;
}
}
if (i < parse_size) {
/* got a line */
line->continues = i+1 < size && IS_LWSP(msg[i+1]);
if (ctx->hdr_size != NULL)
ctx->hdr_size->lines++;
if (i == 0 || msg[i-1] != '\r') {
/* missing CR */
if (ctx->hdr_size != NULL)
ctx->hdr_size->virtual_size++;
size = i;
} else {
size = i-1;
}
ctx->skip = i+1;
break;
}
startpos = i;
}
if (size == 0) {
/* end of headers */
line->eoh = TRUE;
line->name_len = line->value_len = line->full_value_len = 0;
line->name = ""; line->value = line->full_value = NULL;
line->middle = NULL; line->middle_len = 0;
} else if (line->continued) {
line->value = msg;
line->value_len = size;
line->middle = NULL;
line->middle_len = 0;
} else if (colon_pos == UINT_MAX) {
/* missing ':', assume the whole line is name */
line->value = NULL;
line->value_len = 0;
str_truncate(ctx->name, 0);
str_append_n(ctx->name, msg, size);
line->name = str_c(ctx->name);
line->name_len = str_len(ctx->name);
line->middle = NULL;
line->middle_len = 0;
} else {
size_t pos;
line->value = msg + colon_pos+1;
line->value_len = size - colon_pos - 1;
if (ctx->skip_initial_lwsp) {
/* get value. skip all LWSP after ':'. Note that
RFC2822 doesn't say we should, but history behind
it..
Exception to this is if the value consists only of
LWSP, then skip only the one LWSP after ':'. */
for (pos = 0; pos < line->value_len; pos++) {
if (!IS_LWSP(line->value[0]))
break;
}
if (pos == line->value_len) {
/* everything was LWSP */
if (line->value_len > 0 &&
IS_LWSP(line->value[0]))
pos = 1;
}
} else {
pos = line->value_len > 0 &&
IS_LWSP(line->value[0]) ? 1 : 0;
}
line->value += pos;
line->value_len -= pos;
line->full_value_offset += pos;
/* get name, skip LWSP before ':' */
while (colon_pos > 0 && IS_LWSP(msg[colon_pos-1]))
colon_pos--;
line->middle = msg + colon_pos;
line->middle_len = (size_t)(line->value - line->middle);
str_truncate(ctx->name, 0);
str_append_n(ctx->name, msg, colon_pos);
line->name = str_c(ctx->name);
line->name_len = str_len(ctx->name);
}
if (!line->continued) {
/* first header line, set full_value = value */
line->full_value = line->value;
line->full_value_len = line->value_len;
} else if (line->use_full_value) {
/* continue saving the full value */
if (!last_no_newline)
buffer_append_c(ctx->value_buf, '\n');
buffer_append(ctx->value_buf, line->value, line->value_len);
line->full_value = buffer_get_data(ctx->value_buf,
&line->full_value_len);
} else {
/* we didn't want full_value, and this is a continued line. */
line->full_value = NULL;
line->full_value_len = 0;
}
/* always reset it */
line->use_full_value = FALSE;
if (ctx->hdr_size != NULL) {
ctx->hdr_size->physical_size += ctx->skip;
ctx->hdr_size->virtual_size += ctx->skip;
}
*hdr_r = line;
return 1;
}