message-decoder.c revision 511ba4416aafb9f9ba1a4193703b95a033267068
e59faf65ce864fe95dc00f5d52b8323cdbd0608aTimo Sirainen/* Copyright (c) 2006-2007 Dovecot authors, see the included COPYING file */
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
08d6658a4e2ec8104cd1307f6baa75fdb07a24f8Mark Washenberger#include "lib.h"
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen#include "buffer.h"
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen#include "base64.h"
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen#include "str.h"
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen#include "unichar.h"
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen#include "charset-utf8.h"
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen#include "quoted-printable.h"
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen#include "rfc822-parser.h"
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen#include "message-parser.h"
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen#include "message-header-decode.h"
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen#include "message-decoder.h"
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainenenum content_type {
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen CONTENT_TYPE_UNKNOWN = 0,
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen CONTENT_TYPE_BINARY,
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen CONTENT_TYPE_QP,
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen CONTENT_TYPE_BASE64
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen};
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen/* base64 takes max 4 bytes per character, q-p takes max 3. */
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen#define MAX_ENCODING_BUF_SIZE 3
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen/* UTF-8 takes max 5 bytes per character. Not sure about others, but I'd think
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen 10 is more than enough for everyone.. */
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen#define MAX_TRANSLATION_BUF_SIZE 10
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainenstruct message_decoder_context {
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen struct message_part *prev_part;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen struct message_header_line hdr;
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen buffer_t *buf, *buf2;
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen char *charset_trans_charset;
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen struct charset_translation *charset_trans;
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen char translation_buf[MAX_TRANSLATION_BUF_SIZE];
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen unsigned int translation_size;
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen
788a0754cfd38dcfec1902844b085e4e84cfe7e6Timo Sirainen char encoding_buf[MAX_ENCODING_BUF_SIZE];
788a0754cfd38dcfec1902844b085e4e84cfe7e6Timo Sirainen unsigned int encoding_size;
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen char *content_charset;
43d3ea2780b5f8557ede7b4c039e8f56cb8d357dTimo Sirainen enum content_type content_type;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen unsigned int dtcase:1;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen unsigned int charset_utf8:1;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen};
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainenstruct message_decoder_context *message_decoder_init(bool dtcase)
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen{
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen struct message_decoder_context *ctx;
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen ctx = i_new(struct message_decoder_context, 1);
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen ctx->dtcase = dtcase;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen ctx->buf = buffer_create_dynamic(default_pool, 8192);
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen ctx->buf2 = buffer_create_dynamic(default_pool, 8192);
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen return ctx;
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen}
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainenvoid message_decoder_deinit(struct message_decoder_context **_ctx)
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen{
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen struct message_decoder_context *ctx = *_ctx;
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen *_ctx = NULL;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen if (ctx->charset_trans != NULL)
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen charset_to_utf8_end(&ctx->charset_trans);
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen buffer_free(&ctx->buf);
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen buffer_free(&ctx->buf2);
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen i_free(ctx->charset_trans_charset);
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen i_free(ctx->content_charset);
7569ab8537418b7fc369265f26595b0ef9e4cb35Timo Sirainen i_free(ctx);
7569ab8537418b7fc369265f26595b0ef9e4cb35Timo Sirainen}
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainenstatic void
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainenparse_content_transfer_encoding(struct message_decoder_context *ctx,
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen struct message_header_line *hdr)
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen{
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen struct rfc822_parser_context parser;
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen string_t *value;
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen value = t_str_new(64);
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen (void)rfc822_skip_lwsp(&parser);
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen (void)rfc822_parse_mime_token(&parser, value);
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen switch (str_len(value)) {
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen case 4:
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen if (i_memcasecmp(str_data(value), "7bit", 4) == 0 ||
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen i_memcasecmp(str_data(value), "8bit", 4) == 0)
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen ctx->content_type = CONTENT_TYPE_BINARY;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen break;
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen case 6:
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen if (i_memcasecmp(str_data(value), "base64", 6) == 0)
47bb4a7615c85f212f061499f04f121d6d625387Timo Sirainen ctx->content_type = CONTENT_TYPE_BASE64;
47bb4a7615c85f212f061499f04f121d6d625387Timo Sirainen else if (i_memcasecmp(str_data(value), "binary", 6) == 0)
47bb4a7615c85f212f061499f04f121d6d625387Timo Sirainen ctx->content_type = CONTENT_TYPE_BINARY;
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen break;
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen case 16:
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen if (i_memcasecmp(str_data(value), "quoted-printable", 16) == 0)
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen ctx->content_type = CONTENT_TYPE_QP;
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen break;
fc8d5f0ac909cca77840538e8beef98a8d40c21cTimo Sirainen }
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen}
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainenstatic void
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainenparse_content_type(struct message_decoder_context *ctx,
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen struct message_header_line *hdr)
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen{
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen struct rfc822_parser_context parser;
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen const char *key, *value;
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen string_t *str;
fc8d5f0ac909cca77840538e8beef98a8d40c21cTimo Sirainen
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen if (ctx->content_charset != NULL)
fc8d5f0ac909cca77840538e8beef98a8d40c21cTimo Sirainen return;
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen
fc8d5f0ac909cca77840538e8beef98a8d40c21cTimo Sirainen rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
33dd58ab84a020c4f061d2f6031eb6d4c168df1bTimo Sirainen (void)rfc822_skip_lwsp(&parser);
33dd58ab84a020c4f061d2f6031eb6d4c168df1bTimo Sirainen str = t_str_new(64);
33dd58ab84a020c4f061d2f6031eb6d4c168df1bTimo Sirainen if (rfc822_parse_content_type(&parser, str) <= 0)
33dd58ab84a020c4f061d2f6031eb6d4c168df1bTimo Sirainen return;
33dd58ab84a020c4f061d2f6031eb6d4c168df1bTimo Sirainen
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen while (rfc822_parse_content_param(&parser, &key, &value) > 0) {
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen if (strcasecmp(key, "charset") == 0) {
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen ctx->content_charset = i_strdup(value);
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen ctx->charset_utf8 = charset_is_utf8(value);
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen break;
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen }
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen }
5d4855d7b4dcffb6975ed8e3c9c376dac74e5c8aTimo Sirainen}
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainenstatic bool message_decode_header(struct message_decoder_context *ctx,
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen struct message_header_line *hdr,
82f53ea81671bcc7b9bf24a34b04a4ba2752efd3Timo Sirainen struct message_block *output)
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen{
82f53ea81671bcc7b9bf24a34b04a4ba2752efd3Timo Sirainen size_t value_len;
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen if (hdr->continues) {
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen hdr->use_full_value = TRUE;
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen return FALSE;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen }
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen T_FRAME(
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen if (hdr->name_len == 12 &&
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen strcasecmp(hdr->name, "Content-Type") == 0)
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainen parse_content_type(ctx, hdr);
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen if (hdr->name_len == 25 &&
8eb94c5190ba09bb6f6f068eec7bf96750f08d1dTimo Sirainen strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0)
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen parse_content_transfer_encoding(ctx, hdr);
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen );
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen buffer_set_used_size(ctx->buf, 0);
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen message_header_decode_utf8(hdr->full_value, hdr->full_value_len,
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen ctx->buf, ctx->dtcase);
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen value_len = ctx->buf->used;
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen if (ctx->dtcase) {
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen (void)uni_utf8_to_decomposed_titlecase(hdr->name, hdr->name_len,
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen ctx->buf);
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen buffer_append_c(ctx->buf, '\0');
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen }
70c181da837ed85fc5b0426c010b65609bda5329Timo Sirainen
70c181da837ed85fc5b0426c010b65609bda5329Timo Sirainen ctx->hdr = *hdr;
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen ctx->hdr.full_value = ctx->buf->data;
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen ctx->hdr.full_value_len = value_len;
70c181da837ed85fc5b0426c010b65609bda5329Timo Sirainen ctx->hdr.value_len = 0;
70c181da837ed85fc5b0426c010b65609bda5329Timo Sirainen if (ctx->dtcase) {
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainen ctx->hdr.name = CONST_PTR_OFFSET(ctx->buf->data,
3cf67672fdc87583cb23ce088c95bb5dee60e74dTimo Sirainen ctx->hdr.full_value_len);
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainen ctx->hdr.name_len = ctx->buf->used - 1 - value_len;
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainen }
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainen
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainen output->hdr = &ctx->hdr;
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen return TRUE;
3cf67672fdc87583cb23ce088c95bb5dee60e74dTimo Sirainen}
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainen
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainenstatic void translation_buf_decode(struct message_decoder_context *ctx,
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainen const unsigned char **data, size_t *size)
8eb94c5190ba09bb6f6f068eec7bf96750f08d1dTimo Sirainen{
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen unsigned char trans_buf[MAX_TRANSLATION_BUF_SIZE+1];
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen unsigned int data_wanted, skip;
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen size_t trans_size;
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen /* @UNSAFE: move the previously untranslated bytes to trans_buf
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainen and see if we have now enough data to get the next character
adc409a7ac9689d3baf811712ad5a5432cab2d87Timo Sirainen translated */
b321df9603081896b70ec44635af96d674a9839aTimo Sirainen memcpy(trans_buf, ctx->translation_buf, ctx->translation_size);
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen data_wanted = sizeof(trans_buf) - ctx->translation_size;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen if (data_wanted > *size)
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen data_wanted = *size;
f968e62caa52a8924bd05ebf76ff515b5c18e17bTimo Sirainen memcpy(trans_buf + ctx->translation_size, *data, data_wanted);
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen trans_size = ctx->translation_size + data_wanted;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen (void)charset_to_utf8(ctx->charset_trans, trans_buf,
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen &trans_size, ctx->buf2);
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen i_assert(trans_size > ctx->translation_size);
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen skip = trans_size - ctx->translation_size;
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen i_assert(*size >= skip);
47bb4a7615c85f212f061499f04f121d6d625387Timo Sirainen *data += skip;
47bb4a7615c85f212f061499f04f121d6d625387Timo Sirainen *size -= skip;
47bb4a7615c85f212f061499f04f121d6d625387Timo Sirainen
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen ctx->translation_size = 0;
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen}
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainenstatic void message_decode_body_init_charset(struct message_decoder_context *ctx)
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen{
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen enum charset_flags flags;
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen if (ctx->charset_utf8)
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen return;
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen if (ctx->charset_trans != NULL &&
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen strcasecmp(ctx->content_charset, ctx->charset_trans_charset) == 0) {
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen /* already have the correct translation selected */
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen return;
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen }
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen if (ctx->charset_trans != NULL)
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen charset_to_utf8_end(&ctx->charset_trans);
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen i_free(ctx->charset_trans_charset);
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen flags = ctx->dtcase ? CHARSET_FLAG_DECOMP_TITLECASE : 0;
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen ctx->charset_trans_charset = i_strdup(ctx->content_charset != NULL ?
137ea7ca34005345aa2304a940149b7f3774d727Timo Sirainen ctx->content_charset : "UTF-8");
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen if (charset_to_utf8_begin(ctx->charset_trans_charset,
137ea7ca34005345aa2304a940149b7f3774d727Timo Sirainen flags, &ctx->charset_trans) < 0)
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen ctx->charset_trans = NULL;
4376643cd2c7110e752c09f838f2c4eee6ed8ac6Timo Sirainen}
938835576b218f6bb9498d829cef9514f8609c6fTimo Sirainen
938835576b218f6bb9498d829cef9514f8609c6fTimo Sirainenstatic bool message_decode_body(struct message_decoder_context *ctx,
938835576b218f6bb9498d829cef9514f8609c6fTimo Sirainen struct message_block *input,
938835576b218f6bb9498d829cef9514f8609c6fTimo Sirainen struct message_block *output)
938835576b218f6bb9498d829cef9514f8609c6fTimo Sirainen{
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen unsigned char new_buf[MAX_ENCODING_BUF_SIZE+1];
226259ee6fb9830dafc1a5ba1e95bf5a4345b406Timo Sirainen const unsigned char *data = NULL;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen size_t pos, size = 0, skip = 0;
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen int ret;
82f53ea81671bcc7b9bf24a34b04a4ba2752efd3Timo Sirainen
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen if (ctx->encoding_size != 0) {
82f53ea81671bcc7b9bf24a34b04a4ba2752efd3Timo Sirainen /* @UNSAFE */
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen memcpy(new_buf, ctx->encoding_buf, ctx->encoding_size);
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen skip = sizeof(new_buf) - ctx->encoding_size;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen if (skip > input->size)
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen skip = input->size;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen memcpy(new_buf + ctx->encoding_size, input->data, skip);
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen }
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen switch (ctx->content_type) {
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen case CONTENT_TYPE_UNKNOWN:
a8e132559a7ebe54c8269d79ce29fa3338c76199Timo Sirainen /* just skip this body */
e2a700d0628e395d64cbcef4b5b4510816bf51c4Timo Sirainen return FALSE;
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen case CONTENT_TYPE_BINARY:
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen data = input->data;
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen size = pos = input->size;
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen break;
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen case CONTENT_TYPE_QP:
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen buffer_set_used_size(ctx->buf, 0);
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen if (ctx->encoding_size != 0) {
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen quoted_printable_decode(new_buf,
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen ctx->encoding_size + skip,
a8e132559a7ebe54c8269d79ce29fa3338c76199Timo Sirainen &pos, ctx->buf);
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen i_assert(pos >= ctx->encoding_size);
70c181da837ed85fc5b0426c010b65609bda5329Timo Sirainen skip = pos - ctx->encoding_size;
70c181da837ed85fc5b0426c010b65609bda5329Timo Sirainen }
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen quoted_printable_decode(input->data + skip, input->size - skip,
70c181da837ed85fc5b0426c010b65609bda5329Timo Sirainen &pos, ctx->buf);
70c181da837ed85fc5b0426c010b65609bda5329Timo Sirainen pos += skip;
3cf67672fdc87583cb23ce088c95bb5dee60e74dTimo Sirainen data = ctx->buf->data;
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen size = ctx->buf->used;
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen break;
e2a700d0628e395d64cbcef4b5b4510816bf51c4Timo Sirainen case CONTENT_TYPE_BASE64:
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen buffer_set_used_size(ctx->buf, 0);
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen if (ctx->encoding_size != 0) {
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen if (base64_decode(new_buf, ctx->encoding_size + skip,
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen &pos, ctx->buf) < 0) {
e9503210d3521a6833ed62dc332fc42ffb0e7a13Timo Sirainen /* corrupted base64 data, don't bother with
43358fffb1d9f3091fd94895e0ac4643c50e2388Timo Sirainen the rest of it */
43358fffb1d9f3091fd94895e0ac4643c50e2388Timo Sirainen return FALSE;
43358fffb1d9f3091fd94895e0ac4643c50e2388Timo Sirainen }
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen i_assert(pos >= ctx->encoding_size);
43358fffb1d9f3091fd94895e0ac4643c50e2388Timo Sirainen skip = pos - ctx->encoding_size;
43358fffb1d9f3091fd94895e0ac4643c50e2388Timo Sirainen }
430c0b0c370bebeeceba2e206be76bc134742f41Timo Sirainen ret = base64_decode(input->data + skip, input->size - skip,
25ee72451d16374ed27fdbf829f4ec756c778352Timo Sirainen &pos, ctx->buf);
a3dd97fb6d92a89c3de0597fed2d4b044c7aeb84Timo Sirainen if (ret < 0) {
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen /* corrupted base64 data, don't bother with
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen the rest of it */
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen return FALSE;
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen }
484e12acec34f16e5a8adc001e23ae48f1dda8c7Timo Sirainen if (ret == 0) {
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen /* end of base64 input */
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen pos = input->size - skip;
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen }
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen pos += skip;
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen data = ctx->buf->data;
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen size = ctx->buf->used;
484e12acec34f16e5a8adc001e23ae48f1dda8c7Timo Sirainen break;
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen }
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen if (pos != input->size) {
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen /* @UNSAFE */
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen i_assert(pos < input->size);
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen ctx->encoding_size = input->size - pos;
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen i_assert(ctx->encoding_size <= sizeof(ctx->encoding_buf));
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen memcpy(ctx->encoding_buf, input->data + pos,
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen ctx->encoding_size);
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen } else {
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen ctx->encoding_size = 0;
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen }
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen if (ctx->charset_utf8) {
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen if (ctx->dtcase) {
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen buffer_set_used_size(ctx->buf2, 0);
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen (void)uni_utf8_to_decomposed_titlecase(data, size,
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen ctx->buf2);
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen output->data = ctx->buf2->data;
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen output->size = ctx->buf2->used;
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen } else {
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen output->data =
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen uni_utf8_get_valid_data(data, size, ctx->buf2,
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen &output->size);
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen }
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen } else if (ctx->charset_trans == NULL) {
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen /* unknown charset */
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen output->data = uni_utf8_get_valid_data(data, size, ctx->buf2,
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen &output->size);
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen } else {
3cf67672fdc87583cb23ce088c95bb5dee60e74dTimo Sirainen buffer_set_used_size(ctx->buf2, 0);
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen if (ctx->translation_size != 0)
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen translation_buf_decode(ctx, &data, &size);
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen pos = size;
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen (void)charset_to_utf8(ctx->charset_trans,
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen data, &pos, ctx->buf2);
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen if (pos != size) {
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen ctx->translation_size = size - pos;
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen i_assert(ctx->translation_size <=
31597236d79ac38a5cea7ab65a9d0a3df64ed201Timo Sirainen sizeof(ctx->translation_buf));
ce6b6093957885a74fd6e85c18801dbb727d61ecTimo Sirainen memcpy(ctx->translation_buf, data + pos,
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen ctx->translation_size);
35136dd2baf8dc30e4e754294ed81ff48e8c1e64Timo Sirainen }
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen output->data = ctx->buf2->data;
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen output->size = ctx->buf2->used;
137ea7ca34005345aa2304a940149b7f3774d727Timo Sirainen }
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen output->hdr = NULL;
6fabfb7bbfd88d0c1de66981e52850f26067623bTimo Sirainen return TRUE;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen}
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainenbool message_decoder_decode_next_block(struct message_decoder_context *ctx,
6fabfb7bbfd88d0c1de66981e52850f26067623bTimo Sirainen struct message_block *input,
6fabfb7bbfd88d0c1de66981e52850f26067623bTimo Sirainen struct message_block *output)
6fabfb7bbfd88d0c1de66981e52850f26067623bTimo Sirainen{
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen if (input->part != ctx->prev_part) {
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen /* MIME part changed. */
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen message_decoder_decode_reset(ctx);
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen }
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen output->part = input->part;
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen ctx->prev_part = input->part;
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen if (input->hdr != NULL)
3313a51ef9b245248d672c20f930c52a577a42f7Timo Sirainen return message_decode_header(ctx, input->hdr, output);
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen else if (input->size != 0)
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen return message_decode_body(ctx, input, output);
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen else {
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen output->hdr = NULL;
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen output->size = 0;
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen message_decode_body_init_charset(ctx);
137ea7ca34005345aa2304a940149b7f3774d727Timo Sirainen return TRUE;
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen }
0f39a57760d93cddbce3ca43096d78e0fe2f42fdTimo Sirainen}
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainenvoid message_decoder_decode_reset(struct message_decoder_context *ctx)
798cfe56c9871262770384da1239162b3800cce1Timo Sirainen{
82f53ea81671bcc7b9bf24a34b04a4ba2752efd3Timo Sirainen i_free_and_null(ctx->content_charset);
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen ctx->content_type = CONTENT_TYPE_BINARY;
82f53ea81671bcc7b9bf24a34b04a4ba2752efd3Timo Sirainen ctx->charset_utf8 = TRUE;
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen ctx->encoding_size = 0;
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen}
2e29e4797a48d78d669821722bdb54fd0a1d3b94Timo Sirainen