message-decoder.c revision 2c70dc3ca3f0e9e67b76065c4824aba6b2e4251c
5f5870385cff47efd2f58e7892f251cf13761528Timo Sirainen/* Copyright (c) 2006-2015 Dovecot authors, see the included COPYING file */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "lib.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "buffer.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "base64.h"
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen#include "str.h"
d39a04db2f4d0599cb9b5f03a9aa10a3c234453cTimo Sirainen#include "unichar.h"
f7423cbbd9dea363a5df18ebb96da055a977ae79Timo Sirainen#include "charset-utf8.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "quoted-printable.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "rfc822-parser.h"
a3fe8c0c54d87822f4b4f8f0d10caac611861b2bTimo Sirainen#include "rfc2231-parser.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "message-parser.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "message-header-decode.h"
c99fe55d4535d839a6ad0735c4719e076a1adb2cTimo Sirainen#include "message-decoder.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen/* base64 takes max 4 bytes per character, q-p takes max 3. */
7891c8e6debdcfec552cb1beea2a0230fe89957bTimo Sirainen#define MAX_ENCODING_BUF_SIZE 3
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstruct message_decoder_context {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen enum message_decoder_flags flags;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen normalizer_func_t *normalizer;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_part *prev_part;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_header_line hdr;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_t *buf, *buf2;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen char *charset_trans_charset;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct charset_translation *charset_trans;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen char translation_buf[CHARSET_MAX_PENDING_BUF_SIZE];
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen unsigned int translation_size;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_t *encoding_buf;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen char *content_charset;
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen enum message_cte message_cte;
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen unsigned int binary_input:1;
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen};
86791365b10f45982c88e70f2eb94fd6c3fea151Timo Sirainen
86791365b10f45982c88e70f2eb94fd6c3fea151Timo Sirainenstatic void
86791365b10f45982c88e70f2eb94fd6c3fea151Timo Sirainenmessage_decode_body_init_charset(struct message_decoder_context *ctx,
86791365b10f45982c88e70f2eb94fd6c3fea151Timo Sirainen struct message_part *part);
4d4d6d4745682790c20d759ba93dbea46b812c5dTimo Sirainen
4d4d6d4745682790c20d759ba93dbea46b812c5dTimo Sirainenstruct message_decoder_context *
bd20ef9d5c639faf470912ab94e6e6627d3eaebaTimo Sirainenmessage_decoder_init(normalizer_func_t *normalizer,
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen enum message_decoder_flags flags)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_decoder_context *ctx;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
104318260228780a5c6b3181b3401e8e504e2776Timo Sirainen ctx = i_new(struct message_decoder_context, 1);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->flags = flags;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->normalizer = normalizer;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->buf = buffer_create_dynamic(default_pool, 8192);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->buf2 = buffer_create_dynamic(default_pool, 8192);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->encoding_buf = buffer_create_dynamic(default_pool, 128);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return ctx;
b437874782ad048daa155e0ac863c2326c3f5e43Timo Sirainen}
b437874782ad048daa155e0ac863c2326c3f5e43Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenvoid message_decoder_deinit(struct message_decoder_context **_ctx)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_decoder_context *ctx = *_ctx;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen *_ctx = NULL;
f46363f428d8f2784146d36692b21936a48a7006Timo Sirainen
f46363f428d8f2784146d36692b21936a48a7006Timo Sirainen if (ctx->charset_trans != NULL)
f46363f428d8f2784146d36692b21936a48a7006Timo Sirainen charset_to_utf8_end(&ctx->charset_trans);
f46363f428d8f2784146d36692b21936a48a7006Timo Sirainen
b437874782ad048daa155e0ac863c2326c3f5e43Timo Sirainen buffer_free(&ctx->encoding_buf);
b437874782ad048daa155e0ac863c2326c3f5e43Timo Sirainen buffer_free(&ctx->buf);
b437874782ad048daa155e0ac863c2326c3f5e43Timo Sirainen buffer_free(&ctx->buf2);
b437874782ad048daa155e0ac863c2326c3f5e43Timo Sirainen i_free(ctx->charset_trans_charset);
b437874782ad048daa155e0ac863c2326c3f5e43Timo Sirainen i_free(ctx->content_charset);
ccf50662cc02b5e703039a4ff7f91a4470e25b71Timo Sirainen i_free(ctx);
ccf50662cc02b5e703039a4ff7f91a4470e25b71Timo Sirainen}
ccf50662cc02b5e703039a4ff7f91a4470e25b71Timo Sirainen
ccf50662cc02b5e703039a4ff7f91a4470e25b71Timo Sirainenvoid message_decoder_set_return_binary(struct message_decoder_context *ctx,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen bool set)
b437874782ad048daa155e0ac863c2326c3f5e43Timo Sirainen{
71df09024cea5f2faa93da3bb9513ee96ba6bf22Timo Sirainen if (set)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->flags |= MESSAGE_DECODER_FLAG_RETURN_BINARY;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen else
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->flags &= ~MESSAGE_DECODER_FLAG_RETURN_BINARY;
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen message_decode_body_init_charset(ctx, ctx->prev_part);
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen}
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainenenum message_cte message_decoder_parse_cte(struct message_header_line *hdr)
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen{
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen struct rfc822_parser_context parser;
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen enum message_cte message_cte;
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen string_t *value;
f158d9a303bb15a6848ca276c9391c7ca52e452bTimo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen value = t_str_new(64);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen rfc822_skip_lwsp(&parser);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen (void)rfc822_parse_mime_token(&parser, value);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
7fe37c2b0e4cd2a39896ab16e47eb418a59e3934Timo Sirainen message_cte = MESSAGE_CTE_UNKNOWN;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen switch (str_len(value)) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen case 4:
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (i_memcasecmp(str_data(value), "7bit", 4) == 0 ||
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen i_memcasecmp(str_data(value), "8bit", 4) == 0)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen message_cte = MESSAGE_CTE_78BIT;
a3fe8c0c54d87822f4b4f8f0d10caac611861b2bTimo Sirainen break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen case 6:
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (i_memcasecmp(str_data(value), "base64", 6) == 0)
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen message_cte = MESSAGE_CTE_BASE64;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen else if (i_memcasecmp(str_data(value), "binary", 6) == 0)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen message_cte = MESSAGE_CTE_BINARY;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen case 16:
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen if (i_memcasecmp(str_data(value), "quoted-printable", 16) == 0)
0c22bef8f5b35c645de8affd8746307fc53bd222Timo Sirainen message_cte = MESSAGE_CTE_QP;
0c22bef8f5b35c645de8affd8746307fc53bd222Timo Sirainen break;
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen }
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen return message_cte;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen}
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainenstatic void
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainenparse_content_type(struct message_decoder_context *ctx,
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen struct message_header_line *hdr)
8451c4b5afc1ff5366438b2766f75b592c33e1ecTimo Sirainen{
8451c4b5afc1ff5366438b2766f75b592c33e1ecTimo Sirainen struct rfc822_parser_context parser;
8451c4b5afc1ff5366438b2766f75b592c33e1ecTimo Sirainen const char *const *results;
8451c4b5afc1ff5366438b2766f75b592c33e1ecTimo Sirainen string_t *str;
8451c4b5afc1ff5366438b2766f75b592c33e1ecTimo Sirainen
8451c4b5afc1ff5366438b2766f75b592c33e1ecTimo Sirainen if (ctx->content_charset != NULL)
8451c4b5afc1ff5366438b2766f75b592c33e1ecTimo Sirainen return;
8451c4b5afc1ff5366438b2766f75b592c33e1ecTimo Sirainen
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen rfc822_skip_lwsp(&parser);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen str = t_str_new(64);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (rfc822_parse_content_type(&parser, str) <= 0)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
ca4526e3b5fbf5ea3dd477a2098522a44c9ac52cTimo Sirainen rfc2231_parse(&parser, &results);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen for (; *results != NULL; results += 2) {
40a8e6948d662339c0c5e2c7abfb84ae7c1803fdTimo Sirainen if (strcasecmp(results[0], "charset") == 0) {
40a8e6948d662339c0c5e2c7abfb84ae7c1803fdTimo Sirainen ctx->content_charset = i_strdup(results[1]);
40a8e6948d662339c0c5e2c7abfb84ae7c1803fdTimo Sirainen break;
40a8e6948d662339c0c5e2c7abfb84ae7c1803fdTimo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
cd2ed64888b42b481cde6bb9548c8520516fa3e9Timo Sirainenstatic bool message_decode_header(struct message_decoder_context *ctx,
a3fe8c0c54d87822f4b4f8f0d10caac611861b2bTimo Sirainen struct message_header_line *hdr,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_block *output)
a3fe8c0c54d87822f4b4f8f0d10caac611861b2bTimo Sirainen{
a3fe8c0c54d87822f4b4f8f0d10caac611861b2bTimo Sirainen size_t value_len;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
e6b4168ba670d9e51ea7877661def039ae6b53c3Timo Sirainen if (hdr->continues) {
a10ed8c47534b4c6b6bf2711ccfe577e720a47b4Timo Sirainen hdr->use_full_value = TRUE;
e6b4168ba670d9e51ea7877661def039ae6b53c3Timo Sirainen return FALSE;
e6b4168ba670d9e51ea7877661def039ae6b53c3Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen T_BEGIN {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (hdr->name_len == 12 &&
01230de017cd273de41143d88e9c18df1243ae8aTimo Sirainen strcasecmp(hdr->name, "Content-Type") == 0)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parse_content_type(ctx, hdr);
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen if (hdr->name_len == 25 &&
b8efab7ea8876c0a33a73ee0d08eddada31320f8Timo Sirainen strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0)
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen ctx->message_cte = message_decoder_parse_cte(hdr);
b8efab7ea8876c0a33a73ee0d08eddada31320f8Timo Sirainen } T_END;
b8efab7ea8876c0a33a73ee0d08eddada31320f8Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_set_used_size(ctx->buf, 0);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen message_header_decode_utf8(hdr->full_value, hdr->full_value_len,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->buf, ctx->normalizer);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen value_len = ctx->buf->used;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (ctx->normalizer != NULL) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen (void)ctx->normalizer(hdr->name, hdr->name_len, ctx->buf);
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen buffer_append_c(ctx->buf, '\0');
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen } else {
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen if (!uni_utf8_get_valid_data((const unsigned char *)hdr->name,
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen hdr->name_len, ctx->buf))
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen buffer_append_c(ctx->buf, '\0');
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen }
f7423cbbd9dea363a5df18ebb96da055a977ae79Timo Sirainen
f7423cbbd9dea363a5df18ebb96da055a977ae79Timo Sirainen ctx->hdr = *hdr;
f7423cbbd9dea363a5df18ebb96da055a977ae79Timo Sirainen ctx->hdr.full_value = ctx->buf->data;
f7423cbbd9dea363a5df18ebb96da055a977ae79Timo Sirainen ctx->hdr.full_value_len = value_len;
f7423cbbd9dea363a5df18ebb96da055a977ae79Timo Sirainen ctx->hdr.value_len = 0;
f7423cbbd9dea363a5df18ebb96da055a977ae79Timo Sirainen if (ctx->buf->used != value_len) {
f7423cbbd9dea363a5df18ebb96da055a977ae79Timo Sirainen ctx->hdr.name = CONST_PTR_OFFSET(ctx->buf->data,
32b78da5dfbbf6a06b3dbdc9278c60b55714f9bcTimo Sirainen ctx->hdr.full_value_len);
32b78da5dfbbf6a06b3dbdc9278c60b55714f9bcTimo Sirainen ctx->hdr.name_len = ctx->buf->used - 1 - value_len;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen output->hdr = &ctx->hdr;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return TRUE;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
ca4526e3b5fbf5ea3dd477a2098522a44c9ac52cTimo Sirainen
ca4526e3b5fbf5ea3dd477a2098522a44c9ac52cTimo Sirainenstatic void translation_buf_decode(struct message_decoder_context *ctx,
ca4526e3b5fbf5ea3dd477a2098522a44c9ac52cTimo Sirainen const unsigned char **data, size_t *size)
ca4526e3b5fbf5ea3dd477a2098522a44c9ac52cTimo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen unsigned char trans_buf[CHARSET_MAX_PENDING_BUF_SIZE+1];
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen unsigned int data_wanted, skip;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen size_t trans_size, orig_size;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen /* @UNSAFE: move the previously untranslated bytes to trans_buf
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen and see if we have now enough data to get the next character
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen translated */
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen memcpy(trans_buf, ctx->translation_buf, ctx->translation_size);
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen data_wanted = sizeof(trans_buf) - ctx->translation_size;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen if (data_wanted > *size)
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen data_wanted = *size;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen memcpy(trans_buf + ctx->translation_size, *data, data_wanted);
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen orig_size = trans_size = ctx->translation_size + data_wanted;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen (void)charset_to_utf8(ctx->charset_trans, trans_buf,
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen &trans_size, ctx->buf2);
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen if (trans_size <= ctx->translation_size) {
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen /* need more data to finish the translation. */
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen i_assert(orig_size < CHARSET_MAX_PENDING_BUF_SIZE);
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen memcpy(ctx->translation_buf, trans_buf, orig_size);
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen ctx->translation_size = orig_size;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen *data += *size;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen *size = 0;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen return;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen }
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen skip = trans_size - ctx->translation_size;
0161376aac025266d8654577c4b9ce371ffc87eaTimo Sirainen
0161376aac025266d8654577c4b9ce371ffc87eaTimo Sirainen i_assert(*size >= skip);
0161376aac025266d8654577c4b9ce371ffc87eaTimo Sirainen *data += skip;
0161376aac025266d8654577c4b9ce371ffc87eaTimo Sirainen *size -= skip;
0161376aac025266d8654577c4b9ce371ffc87eaTimo Sirainen
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen ctx->translation_size = 0;
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen}
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainen
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainenstatic void
c444eeaa2866152cf62652698aa11b125e8454bcTimo Sirainenmessage_decode_body_init_charset(struct message_decoder_context *ctx,
f158d9a303bb15a6848ca276c9391c7ca52e452bTimo Sirainen struct message_part *part)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->binary_input = ctx->content_charset == NULL &&
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen (ctx->flags & MESSAGE_DECODER_FLAG_RETURN_BINARY) != 0 &&
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen (part->flags & (MESSAGE_PART_FLAG_TEXT |
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen MESSAGE_PART_FLAG_MESSAGE_RFC822)) == 0;
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen if (ctx->binary_input)
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen return;
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen if (ctx->charset_trans != NULL && ctx->content_charset != NULL &&
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen strcasecmp(ctx->content_charset, ctx->charset_trans_charset) == 0) {
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen /* already have the correct translation selected */
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen return;
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen }
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen
e0740628f6ca05f4bc79a9d8a90b650f4d38d4d0Timo Sirainen if (ctx->charset_trans != NULL)
e0740628f6ca05f4bc79a9d8a90b650f4d38d4d0Timo Sirainen charset_to_utf8_end(&ctx->charset_trans);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen i_free_and_null(ctx->charset_trans_charset);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->charset_trans_charset = i_strdup(ctx->content_charset != NULL ?
c6335901c67a4c9365319190a111a2168f3b06f5Timo Sirainen ctx->content_charset : "UTF-8");
c6335901c67a4c9365319190a111a2168f3b06f5Timo Sirainen if (charset_to_utf8_begin(ctx->charset_trans_charset, ctx->normalizer,
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen &ctx->charset_trans) < 0)
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen ctx->charset_trans = NULL;
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen}
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic bool message_decode_body(struct message_decoder_context *ctx,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_block *input,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_block *output)
ca4526e3b5fbf5ea3dd477a2098522a44c9ac52cTimo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen const unsigned char *data = NULL;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen size_t pos = 0, size = 0;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen int ret;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (ctx->encoding_buf->used != 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* @UNSAFE */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen buffer_append(ctx->encoding_buf, input->data, input->size);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen switch (ctx->message_cte) {
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen case MESSAGE_CTE_UNKNOWN:
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen /* just skip this body */
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen return FALSE;
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen case MESSAGE_CTE_78BIT:
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen case MESSAGE_CTE_BINARY:
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen data = input->data;
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen size = pos = input->size;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen case MESSAGE_CTE_QP:
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_set_used_size(ctx->buf, 0);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (ctx->encoding_buf->used != 0) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen (void)quoted_printable_decode(ctx->encoding_buf->data,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->encoding_buf->used,
ca4526e3b5fbf5ea3dd477a2098522a44c9ac52cTimo Sirainen &pos, ctx->buf);
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen } else {
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen (void)quoted_printable_decode(input->data, input->size,
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen &pos, ctx->buf);
a10ed8c47534b4c6b6bf2711ccfe577e720a47b4Timo Sirainen }
a10ed8c47534b4c6b6bf2711ccfe577e720a47b4Timo Sirainen data = ctx->buf->data;
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen size = ctx->buf->used;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen case MESSAGE_CTE_BASE64:
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen buffer_set_used_size(ctx->buf, 0);
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen if (ctx->encoding_buf->used != 0) {
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen ret = base64_decode(ctx->encoding_buf->data,
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen ctx->encoding_buf->used,
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen &pos, ctx->buf);
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen } else {
f5e1d3d6b34ec152aa1ff15c7bd3d3552e9227eaTimo Sirainen ret = base64_decode(input->data, input->size,
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen &pos, ctx->buf);
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen }
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen if (ret < 0) {
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen /* corrupted base64 data, don't bother with
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen the rest of it */
deb06d37292d9112d74bdf80cfebb92ab5151679Timo Sirainen return FALSE;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (ret == 0) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* end of base64 input */
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainen pos = input->size;
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainen buffer_set_used_size(ctx->encoding_buf, 0);
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainen }
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainen data = ctx->buf->data;
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainen size = ctx->buf->used;
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainen break;
86791365b10f45982c88e70f2eb94fd6c3fea151Timo Sirainen }
86791365b10f45982c88e70f2eb94fd6c3fea151Timo Sirainen
86791365b10f45982c88e70f2eb94fd6c3fea151Timo Sirainen if (ctx->encoding_buf->used != 0)
86791365b10f45982c88e70f2eb94fd6c3fea151Timo Sirainen buffer_delete(ctx->encoding_buf, 0, pos);
86791365b10f45982c88e70f2eb94fd6c3fea151Timo Sirainen else if (pos != input->size) {
86791365b10f45982c88e70f2eb94fd6c3fea151Timo Sirainen buffer_append(ctx->encoding_buf,
ccf50662cc02b5e703039a4ff7f91a4470e25b71Timo Sirainen input->data + pos, input->size - pos);
ccf50662cc02b5e703039a4ff7f91a4470e25b71Timo Sirainen }
ccf50662cc02b5e703039a4ff7f91a4470e25b71Timo Sirainen
ccf50662cc02b5e703039a4ff7f91a4470e25b71Timo Sirainen if (ctx->binary_input) {
ccf50662cc02b5e703039a4ff7f91a4470e25b71Timo Sirainen output->data = data;
ccf50662cc02b5e703039a4ff7f91a4470e25b71Timo Sirainen output->size = size;
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen } else {
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen buffer_set_used_size(ctx->buf2, 0);
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen if (ctx->translation_size != 0)
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen translation_buf_decode(ctx, &data, &size);
2e533fb1283b5f06a4063b519e47f1861c910386Timo Sirainen
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen pos = size;
d9515a2eaa94c8287188c38fc28028727671e729Timo Sirainen (void)charset_to_utf8(ctx->charset_trans,
d9515a2eaa94c8287188c38fc28028727671e729Timo Sirainen data, &pos, ctx->buf2);
d9515a2eaa94c8287188c38fc28028727671e729Timo Sirainen if (pos != size) {
d9515a2eaa94c8287188c38fc28028727671e729Timo Sirainen ctx->translation_size = size - pos;
2e533fb1283b5f06a4063b519e47f1861c910386Timo Sirainen i_assert(ctx->translation_size <=
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen sizeof(ctx->translation_buf));
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen memcpy(ctx->translation_buf, data + pos,
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen ctx->translation_size);
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen }
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen output->data = ctx->buf2->data;
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen output->size = ctx->buf2->used;
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen }
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen output->hdr = NULL;
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen return TRUE;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenbool message_decoder_decode_next_block(struct message_decoder_context *ctx,
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen struct message_block *input,
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen struct message_block *output)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (input->part != ctx->prev_part) {
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen /* MIME part changed. */
b8efab7ea8876c0a33a73ee0d08eddada31320f8Timo Sirainen message_decoder_decode_reset(ctx);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen output->part = input->part;
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen ctx->prev_part = input->part;
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen
96d19229e5f322411eb84446e5477d8170cfa5afTimo Sirainen if (input->hdr != NULL)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return message_decode_header(ctx, input->hdr, output);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen else if (input->size != 0)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return message_decode_body(ctx, input, output);
d39a04db2f4d0599cb9b5f03a9aa10a3c234453cTimo Sirainen else {
d39a04db2f4d0599cb9b5f03a9aa10a3c234453cTimo Sirainen output->hdr = NULL;
d39a04db2f4d0599cb9b5f03a9aa10a3c234453cTimo Sirainen output->size = 0;
d39a04db2f4d0599cb9b5f03a9aa10a3c234453cTimo Sirainen message_decode_body_init_charset(ctx, input->part);
d39a04db2f4d0599cb9b5f03a9aa10a3c234453cTimo Sirainen return TRUE;
17fe695b985e9d6e9dc39c05b24e6b3c3b7e1ba1Timo Sirainen }
047c00cd3f7f403672f81569413669238df8c15aTimo Sirainen}
17fe695b985e9d6e9dc39c05b24e6b3c3b7e1ba1Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenvoid message_decoder_decode_reset(struct message_decoder_context *ctx)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen i_free_and_null(ctx->content_charset);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->message_cte = MESSAGE_CTE_78BIT;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_set_used_size(ctx->encoding_buf, 0);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen