message-header-decode.c revision b516a7812b9acc04522869fead3aa6d2787dcdc6
76b43e4417bab52e913da39b5f5bc2a130d3f149Timo Sirainen/* Copyright (c) 2002-2011 Dovecot authors, see the included COPYING file */
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen#include "lib.h"
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen#include "base64.h"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen#include "buffer.h"
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen#include "unichar.h"
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen#include "charset-utf8.h"
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen#include "quoted-printable.h"
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen#include "message-header-decode.h"
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainenstatic size_t
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenmessage_header_decode_encoded(const unsigned char *data, size_t size,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen buffer_t *decodebuf, unsigned int *charsetlen_r)
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen{
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen#define QCOUNT 3
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen unsigned int num = 0;
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen size_t i, start_pos[QCOUNT];
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen /* data should contain "charset?encoding?text?=" */
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen for (i = 0; i < size; i++) {
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (data[i] == '?') {
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen start_pos[num++] = i;
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen if (num == QCOUNT)
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen break;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (i == size || data[i+1] != '=') {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* invalid block */
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen return 0;
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen }
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen buffer_append(decodebuf, data, start_pos[0]);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen buffer_append_c(decodebuf, '\0');
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen *charsetlen_r = decodebuf->used;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen switch (data[start_pos[0]+1]) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen case 'q':
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen case 'Q':
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen quoted_printable_q_decode(data + start_pos[1] + 1,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen start_pos[2] - start_pos[1] - 1,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen decodebuf);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen break;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen case 'b':
82b990b0bb2a1dad5c2634a508a5ad87715db402Timo Sirainen case 'B':
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen if (base64_decode(data + start_pos[1] + 1,
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen start_pos[2] - start_pos[1] - 1,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen NULL, decodebuf) < 0) {
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen /* contains invalid data. show what we got so far. */
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen }
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen break;
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen default:
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* unknown encoding */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen return 0;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen return start_pos[2] + 2;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen}
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainenstatic bool is_only_lwsp(const unsigned char *data, unsigned int size)
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen{
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen unsigned int i;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen for (i = 0; i < size; i++) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (!(data[i] == ' ' || data[i] == '\t' ||
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen data[i] == '\r' || data[i] == '\n'))
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen return FALSE;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen return TRUE;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen}
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainenvoid message_header_decode(const unsigned char *data, size_t size,
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen message_header_decode_callback_t *callback,
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen void *context)
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen{
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen buffer_t *decodebuf = NULL;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen unsigned int charsetlen = 0;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen size_t pos, start_pos, ret;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* =?charset?Q|B?text?= */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen start_pos = 0;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen for (pos = 0; pos + 1 < size; ) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (data[pos] != '=' || data[pos+1] != '?') {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen pos++;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen continue;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* encoded string beginning */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (pos != start_pos &&
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen !is_only_lwsp(data+start_pos, pos-start_pos)) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* send the unencoded data so far */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (!callback(data + start_pos, pos - start_pos,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen NULL, context)) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen start_pos = size;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen break;
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (decodebuf == NULL) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen decodebuf = buffer_create_dynamic(default_pool,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen size - pos);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen } else {
f6c1297c26b355c4aec2a08978f51ec3efecb351Timo Sirainen buffer_set_used_size(decodebuf, 0);
f6c1297c26b355c4aec2a08978f51ec3efecb351Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen pos += 2;
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen ret = message_header_decode_encoded(data + pos, size - pos,
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen decodebuf, &charsetlen);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (ret == 0) {
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen start_pos = pos-2;
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen continue;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen }
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen pos += ret;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen if (decodebuf->used > charsetlen) {
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen /* decodebuf contains <charset> NUL <text> */
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (!callback(CONST_PTR_OFFSET(decodebuf->data,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen charsetlen),
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen decodebuf->used - charsetlen,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen decodebuf->data, context)) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen start_pos = size;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen break;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen start_pos = pos;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (size != start_pos) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen (void)callback(data + start_pos, size - start_pos,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen NULL, context);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (decodebuf != NULL)
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen buffer_free(&decodebuf);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen}
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenstruct decode_utf8_context {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen buffer_t *dest;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen unsigned int changed:1;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen unsigned int called:1;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen unsigned int dtcase:1;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen};
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenstatic bool
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainendecode_utf8_callback(const unsigned char *data, size_t size,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen const char *charset, void *context)
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen{
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen struct decode_utf8_context *ctx = context;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen struct charset_translation *t;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen enum charset_flags flags;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* one call with charset=NULL means nothing changed */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (!ctx->called && charset == NULL)
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen ctx->called = TRUE;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen else
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen ctx->changed = TRUE;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (charset == NULL || charset_is_utf8(charset)) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* ASCII / UTF-8 */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (ctx->dtcase) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen (void)uni_utf8_to_decomposed_titlecase(data, size,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen ctx->dest);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen } else {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (uni_utf8_get_valid_data(data, size, ctx->dest))
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen buffer_append(ctx->dest, data, size);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen return TRUE;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen flags = ctx->dtcase ? CHARSET_FLAG_DECOMP_TITLECASE : 0;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (charset_to_utf8_begin(charset, flags, &t) < 0) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* data probably still contains some valid ASCII characters.
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen append them. */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen if (uni_utf8_get_valid_data(data, size, ctx->dest))
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen buffer_append(ctx->dest, data, size);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen return TRUE;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* ignore any errors */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen (void)charset_to_utf8(t, data, &size, ctx->dest);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen charset_to_utf8_end(&t);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen return TRUE;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen}
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenbool message_header_decode_utf8(const unsigned char *data, size_t size,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen buffer_t *dest, bool dtcase)
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen{
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen struct decode_utf8_context ctx;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen size_t used = dest->used;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen memset(&ctx, 0, sizeof(ctx));
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen ctx.dest = dest;
83bb013a99f0936995f9c7a1077822662d8fefdbTimo Sirainen ctx.dtcase = dtcase;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen message_header_decode(data, size, decode_utf8_callback, &ctx);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen return ctx.changed || (dest->used - used != size);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen}
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen