bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "lib.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "base64.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "buffer.h"
65988f5a8abed57e9894fec77105941e046d3490Timo Sirainen#include "unichar.h"
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen#include "charset-utf8.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "quoted-printable.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen#include "message-header-decode.h"
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainenstatic size_t
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainenmessage_header_decode_encoded(const unsigned char *data, size_t size,
2ac5f36aa7c2e7a07ba8815d43a6d7483f62e74cTimo Sirainen buffer_t *decodebuf, size_t *charsetlen_r)
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen{
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen#define QCOUNT 3
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen unsigned int num = 0;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen size_t i, start_pos[QCOUNT];
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen /* data should contain "charset?encoding?text?=" */
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen for (i = 0; i < size; i++) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (data[i] == '?') {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen start_pos[num++] = i;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (num == QCOUNT)
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen break;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
79454ba23ef6baf56997cd3cc23123eb69ae4f4cTimo Sirainen if (i+1 >= size || data[i+1] != '=') {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen /* invalid block */
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen return 0;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen }
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen buffer_append(decodebuf, data, start_pos[0]);
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen buffer_append_c(decodebuf, '\0');
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen *charsetlen_r = decodebuf->used;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen switch (data[start_pos[0]+1]) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen case 'q':
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen case 'Q':
e4f1a5fdad77884e1de516521504c15dc936fa9dTimo Sirainen if (quoted_printable_q_decode(data + start_pos[1] + 1,
e4f1a5fdad77884e1de516521504c15dc936fa9dTimo Sirainen start_pos[2] - start_pos[1] - 1,
e4f1a5fdad77884e1de516521504c15dc936fa9dTimo Sirainen decodebuf) < 0) {
e4f1a5fdad77884e1de516521504c15dc936fa9dTimo Sirainen /* we skipped over some invalid data */
e4f1a5fdad77884e1de516521504c15dc936fa9dTimo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen break;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen case 'b':
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen case 'B':
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (base64_decode(data + start_pos[1] + 1,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen start_pos[2] - start_pos[1] - 1,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen NULL, decodebuf) < 0) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen /* contains invalid data. show what we got so far. */
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen break;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen default:
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen /* unknown encoding */
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen return 0;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen }
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen return start_pos[2] + 2;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen}
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
2ac5f36aa7c2e7a07ba8815d43a6d7483f62e74cTimo Sirainenstatic bool is_only_lwsp(const unsigned char *data, size_t size)
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen{
2ac5f36aa7c2e7a07ba8815d43a6d7483f62e74cTimo Sirainen size_t i;
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen for (i = 0; i < size; i++) {
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen if (!(data[i] == ' ' || data[i] == '\t' ||
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen data[i] == '\r' || data[i] == '\n'))
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen return FALSE;
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen }
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen return TRUE;
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen}
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainenvoid message_header_decode(const unsigned char *data, size_t size,
7c424aa51c956c628e3512055841aa2f9eef4833Timo Sirainen message_header_decode_callback_t *callback,
f923659c0e5298263d80622c99f4dc4132b4675bTimo Sirainen void *context)
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen{
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen buffer_t *decodebuf = NULL;
2ac5f36aa7c2e7a07ba8815d43a6d7483f62e74cTimo Sirainen size_t charsetlen = 0;
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen size_t pos, start_pos, ret;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen /* =?charset?Q|B?text?= */
8bb360f9e5de1c25e4f875205bb06e8bf15dae14Timo Sirainen start_pos = 0;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen for (pos = 0; pos + 1 < size; ) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (data[pos] != '=' || data[pos+1] != '?') {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen pos++;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen continue;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen /* encoded string beginning */
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen if (pos != start_pos &&
b014857be9961acf2d37ef7b76d941b20cc8c2d1Timo Sirainen !is_only_lwsp(data+start_pos, pos-start_pos)) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen /* send the unencoded data so far */
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (!callback(data + start_pos, pos - start_pos,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen NULL, context)) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen start_pos = size;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen break;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (decodebuf == NULL) {
fab050cbfdf3da692441d2e2fb4b2a4c6ac9e0daTimo Sirainen decodebuf = buffer_create_dynamic(default_pool,
fab050cbfdf3da692441d2e2fb4b2a4c6ac9e0daTimo Sirainen size - pos);
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen } else {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen buffer_set_used_size(decodebuf, 0);
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen pos += 2;
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen ret = message_header_decode_encoded(data + pos, size - pos,
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen decodebuf, &charsetlen);
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen if (ret == 0) {
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen start_pos = pos-2;
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen continue;
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen }
2a3fc652e13a574ca14ff2405b5c29a59232db49Timo Sirainen pos += ret;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (decodebuf->used > charsetlen) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen /* decodebuf contains <charset> NUL <text> */
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (!callback(CONST_PTR_OFFSET(decodebuf->data,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen charsetlen),
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen decodebuf->used - charsetlen,
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen decodebuf->data, context)) {
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen start_pos = size;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen break;
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen }
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen start_pos = pos;
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen }
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen
812ac1e2570c600a086c09b24d250224a822a97dTimo Sirainen if (size != start_pos) {
79454ba23ef6baf56997cd3cc23123eb69ae4f4cTimo Sirainen i_assert(size > start_pos);
d65a556a5ec078cd7f1d0060adb16fc860d66b27Timo Sirainen (void)callback(data + start_pos, size - start_pos,
d65a556a5ec078cd7f1d0060adb16fc860d66b27Timo Sirainen NULL, context);
d65a556a5ec078cd7f1d0060adb16fc860d66b27Timo Sirainen }
6307d76096764e66bddc63d4a3e5a1aa19cc528fJosef 'Jeff' Sipek buffer_free(&decodebuf);
847aeef259d42e2f14cf126699e28291e6e1fb53Timo Sirainen}
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainenstruct decode_utf8_context {
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen buffer_t *dest;
d9076f5939edf5d20a261494b1a861dcbb0d32e2Timo Sirainen normalizer_func_t *normalizer;
0dffa25d211be541ee3c953b23566a1a990789dfTimo Sirainen bool changed:1;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen};
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainenstatic bool
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainendecode_utf8_callback(const unsigned char *data, size_t size,
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen const char *charset, void *context)
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen{
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen struct decode_utf8_context *ctx = context;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen struct charset_translation *t;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen if (charset == NULL || charset_is_utf8(charset)) {
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen /* ASCII / UTF-8 */
d9076f5939edf5d20a261494b1a861dcbb0d32e2Timo Sirainen if (ctx->normalizer != NULL) {
d9076f5939edf5d20a261494b1a861dcbb0d32e2Timo Sirainen (void)ctx->normalizer(data, size, ctx->dest);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen } else {
b516a7812b9acc04522869fead3aa6d2787dcdc6Timo Sirainen if (uni_utf8_get_valid_data(data, size, ctx->dest))
b516a7812b9acc04522869fead3aa6d2787dcdc6Timo Sirainen buffer_append(ctx->dest, data, size);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen }
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen return TRUE;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen }
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen
d9076f5939edf5d20a261494b1a861dcbb0d32e2Timo Sirainen if (charset_to_utf8_begin(charset, ctx->normalizer, &t) < 0) {
af6c7862e6160ffaecec458f4cec43b94272ad57Timo Sirainen /* data probably still contains some valid ASCII characters.
af6c7862e6160ffaecec458f4cec43b94272ad57Timo Sirainen append them. */
af6c7862e6160ffaecec458f4cec43b94272ad57Timo Sirainen if (uni_utf8_get_valid_data(data, size, ctx->dest))
af6c7862e6160ffaecec458f4cec43b94272ad57Timo Sirainen buffer_append(ctx->dest, data, size);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen return TRUE;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen }
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen /* ignore any errors */
055f4599bba1874fa1148a8fa488517fa077619cTimo Sirainen (void)charset_to_utf8(t, data, &size, ctx->dest);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen charset_to_utf8_end(&t);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen return TRUE;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen}
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen
82d3a1d1594ed93d04d7bf999027b3e5104de6e4Timo Sirainenvoid message_header_decode_utf8(const unsigned char *data, size_t size,
d9076f5939edf5d20a261494b1a861dcbb0d32e2Timo Sirainen buffer_t *dest, normalizer_func_t *normalizer)
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen{
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen struct decode_utf8_context ctx;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen
efe78d3ba24fc866af1c79b9223dc0809ba26cadStephan Bosch i_zero(&ctx);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen ctx.dest = dest;
d9076f5939edf5d20a261494b1a861dcbb0d32e2Timo Sirainen ctx.normalizer = normalizer;
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen message_header_decode(data, size, decode_utf8_callback, &ctx);
72388282bf6718c39af34cfcf51438910f9d62daTimo Sirainen}