message-decoder.c revision 7e50cca6b1dab5a7e2a90a8949678c62f4a0958a
183bea41fa640dc8117f3eb45ff935cd81377a84Timo Sirainen/* Copyright (c) 2006-2011 Dovecot authors, see the included COPYING file */
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen#include "lib.h"
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen#include "buffer.h"
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen#include "base64.h"
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen#include "str.h"
65988f5a8abed57e9894fec77105941e046d3490Timo Sirainen#include "unichar.h"
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen#include "charset-utf8.h"
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen#include "quoted-printable.h"
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen#include "rfc822-parser.h"
dabb4cec4cf9bdb34013de682b08f1284cfb670fTimo Sirainen#include "rfc2231-parser.h"
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen#include "message-parser.h"
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen#include "message-header-decode.h"
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen#include "message-decoder.h"
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenenum content_type {
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen CONTENT_TYPE_UNKNOWN = 0,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen CONTENT_TYPE_BINARY,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen CONTENT_TYPE_QP,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen CONTENT_TYPE_BASE64
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen};
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
64510d2cc23a79d2142030bf5bade44baa490db3Timo Sirainen/* base64 takes max 4 bytes per character, q-p takes max 3. */
64510d2cc23a79d2142030bf5bade44baa490db3Timo Sirainen#define MAX_ENCODING_BUF_SIZE 3
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen/* UTF-8 takes max 5 bytes per character. Not sure about others, but I'd think
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen 10 is more than enough for everyone.. */
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen#define MAX_TRANSLATION_BUF_SIZE 10
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenstruct message_decoder_context {
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen enum message_decoder_flags flags;
f335accff54f408a8bbb328f8098ad458f2ff58eTimo Sirainen struct message_part *prev_part;
f335accff54f408a8bbb328f8098ad458f2ff58eTimo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen struct message_header_line hdr;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen buffer_t *buf, *buf2;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen char *charset_trans_charset;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen struct charset_translation *charset_trans;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen char translation_buf[MAX_TRANSLATION_BUF_SIZE];
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen unsigned int translation_size;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen buffer_t *encoding_buf;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen char *content_charset;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen enum content_type content_type;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen unsigned int charset_utf8:1;
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen unsigned int binary_input:1;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen};
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainenstatic void
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainenmessage_decode_body_init_charset(struct message_decoder_context *ctx,
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen struct message_part *part);
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainenstruct message_decoder_context *
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainenmessage_decoder_init(enum message_decoder_flags flags)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen{
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen struct message_decoder_context *ctx;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx = i_new(struct message_decoder_context, 1);
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen ctx->flags = flags;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->buf = buffer_create_dynamic(default_pool, 8192);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->buf2 = buffer_create_dynamic(default_pool, 8192);
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen ctx->encoding_buf = buffer_create_dynamic(default_pool, 128);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen return ctx;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen}
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenvoid message_decoder_deinit(struct message_decoder_context **_ctx)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen{
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen struct message_decoder_context *ctx = *_ctx;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen *_ctx = NULL;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
c69a255a68103a50fa3f04a527281a169075403fTimo Sirainen if (ctx->charset_trans != NULL)
c69a255a68103a50fa3f04a527281a169075403fTimo Sirainen charset_to_utf8_end(&ctx->charset_trans);
c69a255a68103a50fa3f04a527281a169075403fTimo Sirainen
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen buffer_free(&ctx->encoding_buf);
0f66f12eb4cdbf47670975044c88d8f388bf92dfTimo Sirainen buffer_free(&ctx->buf);
0f66f12eb4cdbf47670975044c88d8f388bf92dfTimo Sirainen buffer_free(&ctx->buf2);
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen i_free(ctx->charset_trans_charset);
c69a255a68103a50fa3f04a527281a169075403fTimo Sirainen i_free(ctx->content_charset);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen i_free(ctx);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen}
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainenvoid message_decoder_set_return_binary(struct message_decoder_context *ctx,
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen bool set)
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen{
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen if (set)
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen ctx->flags |= MESSAGE_DECODER_FLAG_RETURN_BINARY;
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen else
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen ctx->flags &= ~MESSAGE_DECODER_FLAG_RETURN_BINARY;
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen message_decode_body_init_charset(ctx, ctx->prev_part);
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen}
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainenstatic void
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainenparse_content_transfer_encoding(struct message_decoder_context *ctx,
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen struct message_header_line *hdr)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen{
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen struct rfc822_parser_context parser;
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen string_t *value;
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen value = t_str_new(64);
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen (void)rfc822_skip_lwsp(&parser);
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen (void)rfc822_parse_mime_token(&parser, value);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
b38dd9a0b8eb6b599c346acbeea9dbe129bb086eTimo Sirainen ctx->content_type = CONTENT_TYPE_UNKNOWN;
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen switch (str_len(value)) {
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen case 4:
87b426af6a2365c6840b14281a98c23e903bf28eTimo Sirainen if (i_memcasecmp(str_data(value), "7bit", 4) == 0 ||
87b426af6a2365c6840b14281a98c23e903bf28eTimo Sirainen i_memcasecmp(str_data(value), "8bit", 4) == 0)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->content_type = CONTENT_TYPE_BINARY;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen break;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen case 6:
87b426af6a2365c6840b14281a98c23e903bf28eTimo Sirainen if (i_memcasecmp(str_data(value), "base64", 6) == 0)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->content_type = CONTENT_TYPE_BASE64;
87b426af6a2365c6840b14281a98c23e903bf28eTimo Sirainen else if (i_memcasecmp(str_data(value), "binary", 6) == 0)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->content_type = CONTENT_TYPE_BINARY;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen break;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen case 16:
87b426af6a2365c6840b14281a98c23e903bf28eTimo Sirainen if (i_memcasecmp(str_data(value), "quoted-printable", 16) == 0)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->content_type = CONTENT_TYPE_QP;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen break;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen}
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenstatic void
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainenparse_content_type(struct message_decoder_context *ctx,
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen struct message_header_line *hdr)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen{
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen struct rfc822_parser_context parser;
dabb4cec4cf9bdb34013de682b08f1284cfb670fTimo Sirainen const char *const *results;
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen string_t *str;
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen if (ctx->content_charset != NULL)
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen return;
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen (void)rfc822_skip_lwsp(&parser);
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen str = t_str_new(64);
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen if (rfc822_parse_content_type(&parser, str) <= 0)
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen return;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
dabb4cec4cf9bdb34013de682b08f1284cfb670fTimo Sirainen (void)rfc2231_parse(&parser, &results);
dabb4cec4cf9bdb34013de682b08f1284cfb670fTimo Sirainen for (; *results != NULL; results += 2) {
dabb4cec4cf9bdb34013de682b08f1284cfb670fTimo Sirainen if (strcasecmp(results[0], "charset") == 0) {
dabb4cec4cf9bdb34013de682b08f1284cfb670fTimo Sirainen ctx->content_charset = i_strdup(results[1]);
dabb4cec4cf9bdb34013de682b08f1284cfb670fTimo Sirainen ctx->charset_utf8 = charset_is_utf8(results[1]);
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen break;
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen}
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenstatic bool message_decode_header(struct message_decoder_context *ctx,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen struct message_header_line *hdr,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen struct message_block *output)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen{
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen bool dtcase = (ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0;
0ce8f754204c7eeb33805993807393f74faf2cd3Timo Sirainen size_t value_len;
0ce8f754204c7eeb33805993807393f74faf2cd3Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen if (hdr->continues) {
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen hdr->use_full_value = TRUE;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen return FALSE;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
19e8adccba16ff419f5675b1575358c2956dce83Timo Sirainen T_BEGIN {
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen if (hdr->name_len == 12 &&
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen strcasecmp(hdr->name, "Content-Type") == 0)
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen parse_content_type(ctx, hdr);
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen if (hdr->name_len == 25 &&
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0)
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen parse_content_transfer_encoding(ctx, hdr);
19e8adccba16ff419f5675b1575358c2956dce83Timo Sirainen } T_END;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen buffer_set_used_size(ctx->buf, 0);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen message_header_decode_utf8(hdr->full_value, hdr->full_value_len,
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen ctx->buf, dtcase);
0ce8f754204c7eeb33805993807393f74faf2cd3Timo Sirainen value_len = ctx->buf->used;
0ce8f754204c7eeb33805993807393f74faf2cd3Timo Sirainen
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen if (dtcase) {
65988f5a8abed57e9894fec77105941e046d3490Timo Sirainen (void)uni_utf8_to_decomposed_titlecase(hdr->name, hdr->name_len,
65988f5a8abed57e9894fec77105941e046d3490Timo Sirainen ctx->buf);
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen buffer_append_c(ctx->buf, '\0');
b516a7812b9acc04522869fead3aa6d2787dcdc6Timo Sirainen } else {
b516a7812b9acc04522869fead3aa6d2787dcdc6Timo Sirainen if (!uni_utf8_get_valid_data((const unsigned char *)hdr->name,
b516a7812b9acc04522869fead3aa6d2787dcdc6Timo Sirainen hdr->name_len, ctx->buf))
b516a7812b9acc04522869fead3aa6d2787dcdc6Timo Sirainen buffer_append_c(ctx->buf, '\0');
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->hdr = *hdr;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->hdr.full_value = ctx->buf->data;
0ce8f754204c7eeb33805993807393f74faf2cd3Timo Sirainen ctx->hdr.full_value_len = value_len;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->hdr.value_len = 0;
b516a7812b9acc04522869fead3aa6d2787dcdc6Timo Sirainen if (ctx->buf->used != value_len) {
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen ctx->hdr.name = CONST_PTR_OFFSET(ctx->buf->data,
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen ctx->hdr.full_value_len);
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen ctx->hdr.name_len = ctx->buf->used - 1 - value_len;
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen output->hdr = &ctx->hdr;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen return TRUE;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen}
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenstatic void translation_buf_decode(struct message_decoder_context *ctx,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen const unsigned char **data, size_t *size)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen{
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen unsigned char trans_buf[MAX_TRANSLATION_BUF_SIZE+1];
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen unsigned int data_wanted, skip;
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen size_t trans_size;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen /* @UNSAFE: move the previously untranslated bytes to trans_buf
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen and see if we have now enough data to get the next character
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen translated */
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen memcpy(trans_buf, ctx->translation_buf, ctx->translation_size);
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen data_wanted = sizeof(trans_buf) - ctx->translation_size;
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen if (data_wanted > *size)
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen data_wanted = *size;
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen memcpy(trans_buf + ctx->translation_size, *data, data_wanted);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen trans_size = ctx->translation_size + data_wanted;
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen (void)charset_to_utf8(ctx->charset_trans, trans_buf,
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen &trans_size, ctx->buf2);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen i_assert(trans_size > ctx->translation_size);
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen skip = trans_size - ctx->translation_size;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen i_assert(*size >= skip);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen *data += skip;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen *size -= skip;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->translation_size = 0;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen}
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainenstatic void
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainenmessage_decode_body_init_charset(struct message_decoder_context *ctx,
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen struct message_part *part)
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen{
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen enum charset_flags flags;
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen
87506860556bf42f656b13f4b14cf81b75261e95Timo Sirainen ctx->binary_input = ctx->content_charset == NULL &&
87506860556bf42f656b13f4b14cf81b75261e95Timo Sirainen (ctx->flags & MESSAGE_DECODER_FLAG_RETURN_BINARY) != 0 &&
87506860556bf42f656b13f4b14cf81b75261e95Timo Sirainen (part->flags & (MESSAGE_PART_FLAG_TEXT |
87506860556bf42f656b13f4b14cf81b75261e95Timo Sirainen MESSAGE_PART_FLAG_MESSAGE_RFC822)) == 0;
87506860556bf42f656b13f4b14cf81b75261e95Timo Sirainen
87506860556bf42f656b13f4b14cf81b75261e95Timo Sirainen if (ctx->charset_utf8 || ctx->binary_input)
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen return;
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen
87506860556bf42f656b13f4b14cf81b75261e95Timo Sirainen if (ctx->charset_trans != NULL && ctx->content_charset != NULL &&
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen strcasecmp(ctx->content_charset, ctx->charset_trans_charset) == 0) {
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen /* already have the correct translation selected */
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen return;
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen }
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen if (ctx->charset_trans != NULL)
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen charset_to_utf8_end(&ctx->charset_trans);
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen i_free_and_null(ctx->charset_trans_charset);
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen flags = (ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0 ?
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen CHARSET_FLAG_DECOMP_TITLECASE : 0;
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen ctx->charset_trans_charset = i_strdup(ctx->content_charset != NULL ?
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen ctx->content_charset : "UTF-8");
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen if (charset_to_utf8_begin(ctx->charset_trans_charset,
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen flags, &ctx->charset_trans) < 0)
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen ctx->charset_trans = NULL;
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen}
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenstatic bool message_decode_body(struct message_decoder_context *ctx,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen struct message_block *input,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen struct message_block *output)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen{
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen const unsigned char *data = NULL;
e376e08040b5f21ff79a15ae728d2532a34207f6Timo Sirainen size_t pos = 0, size = 0;
e619ecbbc00cba9e6e1e8322caa59776507fac02Timo Sirainen int ret;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen if (ctx->encoding_buf->used != 0) {
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen /* @UNSAFE */
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen buffer_append(ctx->encoding_buf, input->data, input->size);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen switch (ctx->content_type) {
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen case CONTENT_TYPE_UNKNOWN:
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen /* just skip this body */
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen return FALSE;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen case CONTENT_TYPE_BINARY:
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen data = input->data;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen size = pos = input->size;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen break;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen case CONTENT_TYPE_QP:
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen buffer_set_used_size(ctx->buf, 0);
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen if (ctx->encoding_buf->used != 0) {
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen quoted_printable_decode(ctx->encoding_buf->data,
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen ctx->encoding_buf->used,
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen &pos, ctx->buf);
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen } else {
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen quoted_printable_decode(input->data, input->size,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen &pos, ctx->buf);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen data = ctx->buf->data;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen size = ctx->buf->used;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen break;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen case CONTENT_TYPE_BASE64:
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen buffer_set_used_size(ctx->buf, 0);
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen if (ctx->encoding_buf->used != 0) {
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen ret = base64_decode(ctx->encoding_buf->data,
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen ctx->encoding_buf->used,
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen &pos, ctx->buf);
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen } else {
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen ret = base64_decode(input->data, input->size,
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen &pos, ctx->buf);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
e619ecbbc00cba9e6e1e8322caa59776507fac02Timo Sirainen if (ret < 0) {
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen /* corrupted base64 data, don't bother with
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen the rest of it */
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen return FALSE;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
e619ecbbc00cba9e6e1e8322caa59776507fac02Timo Sirainen if (ret == 0) {
64510d2cc23a79d2142030bf5bade44baa490db3Timo Sirainen /* end of base64 input */
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen pos = input->size;
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen buffer_set_used_size(ctx->encoding_buf, 0);
64510d2cc23a79d2142030bf5bade44baa490db3Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen data = ctx->buf->data;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen size = ctx->buf->used;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen break;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen if (ctx->encoding_buf->used != 0)
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen buffer_delete(ctx->encoding_buf, 0, pos);
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen else if (pos != input->size) {
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen buffer_append(ctx->encoding_buf,
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen input->data + pos, input->size - pos);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen if (ctx->binary_input) {
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen output->data = data;
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen output->size = size;
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen } else if (ctx->charset_utf8) {
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen buffer_set_used_size(ctx->buf2, 0);
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen if ((ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0) {
65988f5a8abed57e9894fec77105941e046d3490Timo Sirainen (void)uni_utf8_to_decomposed_titlecase(data, size,
0ae99441ae9ef80f435f3eb85fad16e136036b0bTimo Sirainen ctx->buf2);
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen output->data = ctx->buf2->data;
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen output->size = ctx->buf2->used;
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen } else if (uni_utf8_get_valid_data(data, size, ctx->buf2)) {
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen output->data = data;
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen output->size = size;
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen } else {
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen output->data = ctx->buf2->data;
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen output->size = ctx->buf2->used;
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen }
919733fcead68b0e9617cfff86ae5c74d097c6cdTimo Sirainen } else if (ctx->charset_trans == NULL) {
1dba794aa92dc13e6afd7a50a8c33cb19d6aa235Timo Sirainen /* unknown charset */
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen buffer_set_used_size(ctx->buf2, 0);
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen if (uni_utf8_get_valid_data(data, size, ctx->buf2)) {
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen output->data = data;
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen output->size = size;
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen } else {
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen output->data = ctx->buf2->data;
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen output->size = ctx->buf2->used;
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen } else {
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen buffer_set_used_size(ctx->buf2, 0);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen if (ctx->translation_size != 0)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen translation_buf_decode(ctx, &data, &size);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen pos = size;
055f4599bba1874fa1148a8fa488517fa077619cTimo Sirainen (void)charset_to_utf8(ctx->charset_trans,
055f4599bba1874fa1148a8fa488517fa077619cTimo Sirainen data, &pos, ctx->buf2);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen if (pos != size) {
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->translation_size = size - pos;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen i_assert(ctx->translation_size <=
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen sizeof(ctx->translation_buf));
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen memcpy(ctx->translation_buf, data + pos,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->translation_size);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen output->data = ctx->buf2->data;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen output->size = ctx->buf2->used;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen output->hdr = NULL;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen return TRUE;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen}
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenbool message_decoder_decode_next_block(struct message_decoder_context *ctx,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen struct message_block *input,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen struct message_block *output)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen{
f335accff54f408a8bbb328f8098ad458f2ff58eTimo Sirainen if (input->part != ctx->prev_part) {
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen /* MIME part changed. */
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen message_decoder_decode_reset(ctx);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen }
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen output->part = input->part;
f335accff54f408a8bbb328f8098ad458f2ff58eTimo Sirainen ctx->prev_part = input->part;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen if (input->hdr != NULL)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen return message_decode_header(ctx, input->hdr, output);
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen else if (input->size != 0)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen return message_decode_body(ctx, input, output);
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen else {
9ae664e94e6eeb5c1f900bb90642052633031832Timo Sirainen output->hdr = NULL;
9ae664e94e6eeb5c1f900bb90642052633031832Timo Sirainen output->size = 0;
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen message_decode_body_init_charset(ctx, input->part);
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen return TRUE;
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen }
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen}
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainenvoid message_decoder_decode_reset(struct message_decoder_context *ctx)
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen{
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen i_free_and_null(ctx->content_charset);
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen ctx->content_type = CONTENT_TYPE_BINARY;
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen ctx->charset_utf8 = TRUE;
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen buffer_set_used_size(ctx->encoding_buf, 0);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen}