message-parser.c revision e819dc8ca77dced4c9e866250675528381cf36f6
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz/* Copyright (C) 2002 Timo Sirainen */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzmessage_parse_part_header(struct message_parser_ctx *parser_ctx);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzmessage_parse_part_body(struct message_parser_ctx *parser_ctx);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzmessage_parse_body(struct message_parser_ctx *parser_ctx,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz struct message_size *msg_size, int *has_nuls);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzmessage_skip_boundary(struct message_parser_ctx *parser_ctx,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz struct message_size *boundary_size, int *has_nuls);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzstatic void message_size_add_part(struct message_size *dest,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz dest->lines += part->header_size.lines + part->body_size.lines;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzmessage_part_append(pool_t pool, struct message_part *parent)
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz part = p_new(pool, struct message_part, 1);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* set child position */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzstatic void parse_content_type(const unsigned char *value, size_t value_len,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz struct message_parser_ctx *parser_ctx = context;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if (parser_ctx->last_content_type != NULL || value_len == 0)
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz p_strndup(parser_ctx->parser_pool, value, value_len);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if (strcasecmp(str, "message/rfc822") == 0)
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz parser_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz else if (strncasecmp(str, "text", 4) == 0 &&
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz parser_ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz else if (strncasecmp(str, "multipart/", 10) == 0) {
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz parser_ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzparse_content_type_param(const unsigned char *name, size_t name_len,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz const unsigned char *value, size_t value_len,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz struct message_parser_ctx *parser_ctx = context;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if ((parser_ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz name_len != 8 || memcasecmp(name, "boundary", 8) != 0)
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz p_strndup(parser_ctx->parser_pool, value, value_len);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzmessage_parse_multipart(struct message_parser_ctx *parser_ctx)
4ee5a85e75d520497bd43dbfcc6fc273f3e57ceaMichael Slusarz struct message_part *parent_part, *next_part, *part;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* multipart message. add new boundary */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz b = p_new(parser_ctx->parser_pool, struct message_boundary, 1);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* reset fields */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* skip the data before the first boundary */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz next_part = message_skip_boundary(parser_ctx, parser_ctx->boundaries,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* now, parse the parts */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* new child */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz part = message_part_append(parser_ctx->part_pool, parent_part);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if ((parent_part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0)
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz next_part = message_parse_part_body(parser_ctx);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if ((part->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0) {
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* it also belongs to parent */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* update our size */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz message_size_add_part(&parent_part->body_size, part);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* skip the boundary */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz next_part = message_skip_boundary(parser_ctx,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* remove boundary */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzstatic void message_parse_part_header(struct message_parser_ctx *parser_ctx)
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz struct message_part *part = parser_ctx->part;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz struct message_header_parser_ctx *hdr_ctx;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz hdr_ctx = message_parse_header_init(parser_ctx->input,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) {
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* call the user-defined header parser */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz parser_ctx->callback(part, hdr, parser_ctx->context);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if (!hdr->eoh && strcasecmp(hdr->name, "Mime-Version") == 0) {
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* it's MIME. Content-* headers are valid */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if (!hdr->eoh && strcasecmp(hdr->name, "Content-Type") == 0) {
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* we need to know the boundary */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz message_content_parse_header(hdr->full_value,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* It's not MIME. Reset everything we found from
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz Content-Type. */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz parser_ctx->callback(part, NULL, parser_ctx->context);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzmessage_parse_part_body(struct message_parser_ctx *parser_ctx)
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz struct message_part *part = parser_ctx->part;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz return message_parse_multipart(parser_ctx);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if (parser_ctx->last_content_type == NULL) {
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* when there's no content-type specified and we're
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz below multipart/digest, the assume message/rfc822
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz content-type */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* otherwise we default to text/plain */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if (part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) {
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* message/rfc822 part - the message body begins with
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz headers again, this works pretty much the same as
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz a single multipart/mixed item */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz part = message_part_append(parser_ctx->part_pool, part);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz next_part = message_parse_part_body(parser_ctx);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* our body size is the size of header+body in message/rfc822 */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz message_size_add_part(&part->parent->body_size, part);
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* normal message, read until the next boundary */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz next_part = message_parse_body(parser_ctx,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz if ((part->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0 &&
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz /* it also belongs to parent */
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz part->parent->flags |= MESSAGE_PART_FLAG_HAS_NULS;
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarzstatic void message_skip_line(struct istream *input,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz struct message_size *msg_size, int skip_lf,
51ed197520dd9ea534fbc3bc1790ebe3cb5421e2Michael M Slusarz const unsigned char *msg;
if (!skip_lf) {
startpos = i;
goto __break;
goto __break;
static struct message_boundary *
return boundaries;
return NULL;
static struct message_boundary *
int *has_nuls)
const unsigned char *msg;
i = line_start;
line_start -= i;
if (skip_over) {
line_start--;
line_start--;
return boundary;
static struct message_part *
static struct message_part *
const unsigned char *msg;
int end_boundary;
return NULL;
if (end_boundary) {
struct message_parser_ctx *
return ctx;
return parts;
void *context)
void *context)
void *context)
void *context)
int ret;
struct message_header_parser_ctx *
int skip_initial_lwsp)
return ctx;
const unsigned char *msg;
int ret;
colon_pos = 0;
size = 0;
startpos = 0;
size = i;
colon_pos = i;
if (i < parse_size) {
size = i;
startpos = i;
if (size == 0) {
colon_pos--;