message-decoder.c revision 7e50cca6b1dab5a7e2a90a8949678c62f4a0958a
183bea41fa640dc8117f3eb45ff935cd81377a84Timo Sirainen/* Copyright (c) 2006-2011 Dovecot authors, see the included COPYING file */
64510d2cc23a79d2142030bf5bade44baa490db3Timo Sirainen/* base64 takes max 4 bytes per character, q-p takes max 3. */
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen/* UTF-8 takes max 5 bytes per character. Not sure about others, but I'd think
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen 10 is more than enough for everyone.. */
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen char translation_buf[MAX_TRANSLATION_BUF_SIZE];
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainenmessage_decode_body_init_charset(struct message_decoder_context *ctx,
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainenmessage_decoder_init(enum message_decoder_flags flags)
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx = i_new(struct message_decoder_context, 1);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->buf = buffer_create_dynamic(default_pool, 8192);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen ctx->buf2 = buffer_create_dynamic(default_pool, 8192);
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen ctx->encoding_buf = buffer_create_dynamic(default_pool, 128);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenvoid message_decoder_deinit(struct message_decoder_context **_ctx)
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainenvoid message_decoder_set_return_binary(struct message_decoder_context *ctx,
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen ctx->flags |= MESSAGE_DECODER_FLAG_RETURN_BINARY;
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen ctx->flags &= ~MESSAGE_DECODER_FLAG_RETURN_BINARY;
7e50cca6b1dab5a7e2a90a8949678c62f4a0958aTimo Sirainen message_decode_body_init_charset(ctx, ctx->prev_part);
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainenparse_content_transfer_encoding(struct message_decoder_context *ctx,
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen (void)rfc822_parse_mime_token(&parser, value);
87b426af6a2365c6840b14281a98c23e903bf28eTimo Sirainen if (i_memcasecmp(str_data(value), "7bit", 4) == 0 ||
87b426af6a2365c6840b14281a98c23e903bf28eTimo Sirainen i_memcasecmp(str_data(value), "8bit", 4) == 0)
87b426af6a2365c6840b14281a98c23e903bf28eTimo Sirainen if (i_memcasecmp(str_data(value), "base64", 6) == 0)
87b426af6a2365c6840b14281a98c23e903bf28eTimo Sirainen else if (i_memcasecmp(str_data(value), "binary", 6) == 0)
87b426af6a2365c6840b14281a98c23e903bf28eTimo Sirainen if (i_memcasecmp(str_data(value), "quoted-printable", 16) == 0)
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainenparse_content_type(struct message_decoder_context *ctx,
dabb4cec4cf9bdb34013de682b08f1284cfb670fTimo Sirainen const char *const *results;
687d1dee0e92229232aa8be416897b640df67d07Timo Sirainen rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen if (rfc822_parse_content_type(&parser, str) <= 0)
dabb4cec4cf9bdb34013de682b08f1284cfb670fTimo Sirainen ctx->charset_utf8 = charset_is_utf8(results[1]);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenstatic bool message_decode_header(struct message_decoder_context *ctx,
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen bool dtcase = (ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0;
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0)
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen message_header_decode_utf8(hdr->full_value, hdr->full_value_len,
65988f5a8abed57e9894fec77105941e046d3490Timo Sirainen (void)uni_utf8_to_decomposed_titlecase(hdr->name, hdr->name_len,
b516a7812b9acc04522869fead3aa6d2787dcdc6Timo Sirainen if (!uni_utf8_get_valid_data((const unsigned char *)hdr->name,
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen ctx->hdr.name = CONST_PTR_OFFSET(ctx->buf->data,
e1b83f64e62cc3e8967c75fcc3f9b5dbb243d3b3Timo Sirainen ctx->hdr.name_len = ctx->buf->used - 1 - value_len;
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenstatic void translation_buf_decode(struct message_decoder_context *ctx,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen unsigned char trans_buf[MAX_TRANSLATION_BUF_SIZE+1];
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen /* @UNSAFE: move the previously untranslated bytes to trans_buf
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen and see if we have now enough data to get the next character
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen translated */
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen memcpy(trans_buf, ctx->translation_buf, ctx->translation_size);
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen data_wanted = sizeof(trans_buf) - ctx->translation_size;
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen memcpy(trans_buf + ctx->translation_size, *data, data_wanted);
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen trans_size = ctx->translation_size + data_wanted;
62394a19cba1a8df01cad66eaa9331a70464441eTimo Sirainen (void)charset_to_utf8(ctx->charset_trans, trans_buf,
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainenmessage_decode_body_init_charset(struct message_decoder_context *ctx,
87506860556bf42f656b13f4b14cf81b75261e95Timo Sirainen ctx->binary_input = ctx->content_charset == NULL &&
87506860556bf42f656b13f4b14cf81b75261e95Timo Sirainen (ctx->flags & MESSAGE_DECODER_FLAG_RETURN_BINARY) != 0 &&
87506860556bf42f656b13f4b14cf81b75261e95Timo Sirainen if (ctx->charset_trans != NULL && ctx->content_charset != NULL &&
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen strcasecmp(ctx->content_charset, ctx->charset_trans_charset) == 0) {
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen /* already have the correct translation selected */
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen flags = (ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0 ?
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen ctx->charset_trans_charset = i_strdup(ctx->content_charset != NULL ?
2e03303e721a293d796c0287829396f5caea76eaTimo Sirainen if (charset_to_utf8_begin(ctx->charset_trans_charset,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenstatic bool message_decode_body(struct message_decoder_context *ctx,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen /* @UNSAFE */
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen buffer_append(ctx->encoding_buf, input->data, input->size);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen /* just skip this body */
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen quoted_printable_decode(ctx->encoding_buf->data,
db24d60c306c9d477392ff5b61cb3fc95fef3bb7Timo Sirainen quoted_printable_decode(input->data, input->size,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen /* corrupted base64 data, don't bother with
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen the rest of it */
64510d2cc23a79d2142030bf5bade44baa490db3Timo Sirainen /* end of base64 input */
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen if ((ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0) {
65988f5a8abed57e9894fec77105941e046d3490Timo Sirainen (void)uni_utf8_to_decomposed_titlecase(data, size,
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen } else if (uni_utf8_get_valid_data(data, size, ctx->buf2)) {
1dba794aa92dc13e6afd7a50a8c33cb19d6aa235Timo Sirainen /* unknown charset */
7aa59f55d8a4e02c7039fbd22660c4055bfc8393Timo Sirainen if (uni_utf8_get_valid_data(data, size, ctx->buf2)) {
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainenbool message_decoder_decode_next_block(struct message_decoder_context *ctx,
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen /* MIME part changed. */
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen return message_decode_header(ctx, input->hdr, output);
050975ee630c761ab237fce7b8f84fe189bb02d2Timo Sirainen return message_decode_body(ctx, input, output);
5cbefc6537aefbf1491416c433de00fc3e649a13Timo Sirainen message_decode_body_init_charset(ctx, input->part);