message-header-decode.c revision af6c7862e6160ffaecec458f4cec43b94272ad57
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen/* Copyright (c) 2002-2007 Dovecot authors, see the included COPYING file */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "lib.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "base64.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "buffer.h"
d39a04db2f4d0599cb9b5f03a9aa10a3c234453cTimo Sirainen#include "unichar.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "charset-utf8.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "quoted-printable.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "message-header-decode.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
c99fe55d4535d839a6ad0735c4719e076a1adb2cTimo Sirainenstatic size_t
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenmessage_header_decode_encoded(const unsigned char *data, size_t size,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_t *decodebuf, unsigned int *charsetlen_r)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#define QCOUNT 3
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen unsigned int num = 0;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen size_t i, start_pos[QCOUNT];
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* data should contain "charset?encoding?text?=" */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen for (i = 0; i < size; i++) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (data[i] == '?') {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen start_pos[num++] = i;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (num == QCOUNT)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (i == size || data[i+1] != '=') {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* invalid block */
4d4d6d4745682790c20d759ba93dbea46b812c5dTimo Sirainen return 0;
4d4d6d4745682790c20d759ba93dbea46b812c5dTimo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen buffer_append(decodebuf, data, start_pos[0]);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen buffer_append_c(decodebuf, '\0');
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen *charsetlen_r = decodebuf->used;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen switch (data[start_pos[0]+1]) {
17fe695b985e9d6e9dc39c05b24e6b3c3b7e1ba1Timo Sirainen case 'q':
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen case 'Q':
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen quoted_printable_decode(data + start_pos[1] + 1,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen start_pos[2] - start_pos[1] - 1,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen NULL, decodebuf);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen case 'b':
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen case 'B':
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (base64_decode(data + start_pos[1] + 1,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen start_pos[2] - start_pos[1] - 1,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen NULL, decodebuf) < 0) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* contains invalid data. show what we got so far. */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen default:
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* unknown encoding */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return 0;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return start_pos[2] + 2;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenvoid message_header_decode(const unsigned char *data, size_t size,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen message_header_decode_callback_t *callback,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen void *context)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_t *decodebuf = NULL;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen unsigned int charsetlen = 0;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen size_t pos, start_pos, ret;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* =?charset?Q|B?text?= */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen start_pos = pos = 0;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen for (pos = 0; pos + 1 < size; ) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (data[pos] != '=' || data[pos+1] != '?') {
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen pos++;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen continue;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen /* encoded string beginning */
0c22bef8f5b35c645de8affd8746307fc53bd222Timo Sirainen if (pos != start_pos) {
0c22bef8f5b35c645de8affd8746307fc53bd222Timo Sirainen /* send the unencoded data so far */
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen if (!callback(data + start_pos, pos - start_pos,
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen NULL, context)) {
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen start_pos = size;
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen break;
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen }
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen }
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen if (decodebuf == NULL) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen decodebuf = buffer_create_dynamic(default_pool,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen size - pos);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen } else {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_set_used_size(decodebuf, 0);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen pos += 2;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen ret = message_header_decode_encoded(data + pos, size - pos,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen decodebuf, &charsetlen);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (ret == 0) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen start_pos = pos-2;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen continue;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen pos += ret;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (decodebuf->used > charsetlen) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* decodebuf contains <charset> NUL <text> */
01230de017cd273de41143d88e9c18df1243ae8aTimo Sirainen if (!callback(CONST_PTR_OFFSET(decodebuf->data,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen charsetlen),
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen decodebuf->used - charsetlen,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen decodebuf->data, context)) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen start_pos = size;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen break;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen start_pos = pos;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen if (size != start_pos) {
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen (void)callback(data + start_pos, size - start_pos,
78fa3c578c14ee8a612f86cf73b6181c7f16463fTimo Sirainen NULL, context);
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen }
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen if (decodebuf != NULL)
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen buffer_free(&decodebuf);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstruct decode_utf8_context {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen buffer_t *dest;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen unsigned int changed:1;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen unsigned int called:1;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen unsigned int dtcase:1;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen};
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic bool
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainendecode_utf8_callback(const unsigned char *data, size_t size,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen const char *charset, void *context)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct decode_utf8_context *ctx = context;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct charset_translation *t;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen enum charset_flags flags;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* one call with charset=NULL means nothing changed */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (!ctx->called && charset == NULL)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->called = TRUE;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen else
c6335901c67a4c9365319190a111a2168f3b06f5Timo Sirainen ctx->changed = TRUE;
c6335901c67a4c9365319190a111a2168f3b06f5Timo Sirainen
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen if (charset == NULL || charset_is_utf8(charset)) {
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen /* ASCII / UTF-8 */
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen if (ctx->dtcase) {
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen (void)uni_utf8_to_decomposed_titlecase(data, size,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen ctx->dest);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen } else {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_append(ctx->dest, data, size);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return TRUE;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen flags = ctx->dtcase ? CHARSET_FLAG_DECOMP_TITLECASE : 0;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (charset_to_utf8_begin(charset, flags, &t) < 0) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen /* data probably still contains some valid ASCII characters.
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen append them. */
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (uni_utf8_get_valid_data(data, size, ctx->dest))
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_append(ctx->dest, data, size);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return TRUE;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* ignore any errors */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen (void)charset_to_utf8(t, data, &size, ctx->dest);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen charset_to_utf8_end(&t);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return TRUE;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenbool message_header_decode_utf8(const unsigned char *data, size_t size,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen buffer_t *dest, bool dtcase)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct decode_utf8_context ctx;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen size_t used = dest->used;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen memset(&ctx, 0, sizeof(ctx));
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx.dest = dest;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx.dtcase = dtcase;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen message_header_decode(data, size, decode_utf8_callback, &ctx);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return ctx.changed || (dest->used - used != size);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen