/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "base64.h"
#include "buffer.h"
#include "unichar.h"
#include "charset-utf8.h"
#include "quoted-printable.h"
#include "message-header-decode.h"
static size_t
message_header_decode_encoded(const unsigned char *data, size_t size,
buffer_t *decodebuf, size_t *charsetlen_r)
{
#define QCOUNT 3
unsigned int num = 0;
size_t i, start_pos[QCOUNT];
/* data should contain "charset?encoding?text?=" */
for (i = 0; i < size; i++) {
if (data[i] == '?') {
start_pos[num++] = i;
if (num == QCOUNT)
break;
}
}
if (i+1 >= size || data[i+1] != '=') {
/* invalid block */
return 0;
}
buffer_append(decodebuf, data, start_pos[0]);
buffer_append_c(decodebuf, '\0');
*charsetlen_r = decodebuf->used;
switch (data[start_pos[0]+1]) {
case 'q':
case 'Q':
if (quoted_printable_q_decode(data + start_pos[1] + 1,
start_pos[2] - start_pos[1] - 1,
decodebuf) < 0) {
/* we skipped over some invalid data */
}
break;
case 'b':
case 'B':
if (base64_decode(data + start_pos[1] + 1,
start_pos[2] - start_pos[1] - 1,
NULL, decodebuf) < 0) {
/* contains invalid data. show what we got so far. */
}
break;
default:
/* unknown encoding */
return 0;
}
return start_pos[2] + 2;
}
static bool is_only_lwsp(const unsigned char *data, size_t size)
{
size_t i;
for (i = 0; i < size; i++) {
if (!(data[i] == ' ' || data[i] == '\t' ||
data[i] == '\r' || data[i] == '\n'))
return FALSE;
}
return TRUE;
}
void message_header_decode(const unsigned char *data, size_t size,
message_header_decode_callback_t *callback,
void *context)
{
buffer_t *decodebuf = NULL;
size_t charsetlen = 0;
size_t pos, start_pos, ret;
/* =?charset?Q|B?text?= */
start_pos = 0;
for (pos = 0; pos + 1 < size; ) {
if (data[pos] != '=' || data[pos+1] != '?') {
pos++;
continue;
}
/* encoded string beginning */
if (pos != start_pos &&
!is_only_lwsp(data+start_pos, pos-start_pos)) {
/* send the unencoded data so far */
if (!callback(data + start_pos, pos - start_pos,
NULL, context)) {
start_pos = size;
break;
}
}
if (decodebuf == NULL) {
decodebuf = buffer_create_dynamic(default_pool,
size - pos);
} else {
buffer_set_used_size(decodebuf, 0);
}
pos += 2;
ret = message_header_decode_encoded(data + pos, size - pos,
decodebuf, &charsetlen);
if (ret == 0) {
start_pos = pos-2;
continue;
}
pos += ret;
if (decodebuf->used > charsetlen) {
/* decodebuf contains <charset> NUL <text> */
if (!callback(CONST_PTR_OFFSET(decodebuf->data,
charsetlen),
decodebuf->used - charsetlen,
decodebuf->data, context)) {
start_pos = size;
break;
}
}
start_pos = pos;
}
if (size != start_pos) {
i_assert(size > start_pos);
(void)callback(data + start_pos, size - start_pos,
NULL, context);
}
buffer_free(&decodebuf);
}
struct decode_utf8_context {
buffer_t *dest;
normalizer_func_t *normalizer;
bool changed:1;
};
static bool
decode_utf8_callback(const unsigned char *data, size_t size,
const char *charset, void *context)
{
struct decode_utf8_context *ctx = context;
struct charset_translation *t;
if (charset == NULL || charset_is_utf8(charset)) {
/* ASCII / UTF-8 */
if (ctx->normalizer != NULL) {
(void)ctx->normalizer(data, size, ctx->dest);
} else {
if (uni_utf8_get_valid_data(data, size, ctx->dest))
buffer_append(ctx->dest, data, size);
}
return TRUE;
}
if (charset_to_utf8_begin(charset, ctx->normalizer, &t) < 0) {
/* data probably still contains some valid ASCII characters.
append them. */
if (uni_utf8_get_valid_data(data, size, ctx->dest))
buffer_append(ctx->dest, data, size);
return TRUE;
}
/* ignore any errors */
(void)charset_to_utf8(t, data, &size, ctx->dest);
charset_to_utf8_end(&t);
return TRUE;
}
void message_header_decode_utf8(const unsigned char *data, size_t size,
buffer_t *dest, normalizer_func_t *normalizer)
{
struct decode_utf8_context ctx;
i_zero(&ctx);
ctx.dest = dest;
ctx.normalizer = normalizer;
message_header_decode(data, size, decode_utf8_callback, &ctx);
}