message-header-decode.c revision 72388282bf6718c39af34cfcf51438910f9d62da
2454dfa32c93c20a8522c6ed42fe057baaac9f9aStephan Bosch/* Copyright (C) 2002-2007 Timo Sirainen */
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen#include "lib.h"
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen#include "base64.h"
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen#include "buffer.h"
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen#include "charset-utf8.h"
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen#include "quoted-printable.h"
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen#include "message-header-decode.h"
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainenstatic size_t
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainenmessage_header_decode_encoded(const unsigned char *data, size_t size,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen buffer_t *decodebuf, unsigned int *charsetlen_r)
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen{
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen#define QCOUNT 3
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen unsigned int num = 0;
0db42260be85e797aa9909a29b20296996f52e75Timo Sirainen size_t i, start_pos[QCOUNT];
0db42260be85e797aa9909a29b20296996f52e75Timo Sirainen
0db42260be85e797aa9909a29b20296996f52e75Timo Sirainen /* data should contain "charset?encoding?text?=" */
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen for (i = 0; i < size; i++) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (data[i] == '?') {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen start_pos[num++] = i;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (num == QCOUNT)
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen break;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (i == size || data[i+1] != '=') {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen /* invalid block */
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen return 0;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen buffer_append(decodebuf, data, start_pos[0]);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen buffer_append_c(decodebuf, '\0');
46e917c9fa05cbe7bddf805d3a9838b61e3960e1Timo Sirainen *charsetlen_r = decodebuf->used;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen switch (data[start_pos[0]+1]) {
95d62f8d6d281cc488dc4f488d4388701e559012Josef 'Jeff' Sipek case 'q':
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen case 'Q':
46e917c9fa05cbe7bddf805d3a9838b61e3960e1Timo Sirainen quoted_printable_decode(data + start_pos[1] + 1,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen start_pos[2] - start_pos[1] - 1,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen NULL, decodebuf);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen break;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen case 'b':
b49aa341d28c0eec1229e30baa2f89d5bae52ff8Phil Carmody case 'B':
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (base64_decode(data + start_pos[1] + 1,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen start_pos[2] - start_pos[1] - 1,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen NULL, decodebuf) < 0) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen /* contains invalid data. show what we got so far. */
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen break;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen default:
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen /* unknown encoding */
16cb5d65265dd0b216542803fd80c4b999ae118eTimo Sirainen return 0;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen return start_pos[2] + 2;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen}
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
efe78d3ba24fc866af1c79b9223dc0809ba26cadStephan Boschvoid message_header_decode(const unsigned char *data, size_t size,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen message_header_decode_callback_t *callback,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen void *context)
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen{
0ca3b9cb0f2a322a25ce7f229dc3d3a0b46be17bTimo Sirainen buffer_t *decodebuf = NULL;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen unsigned int charsetlen = 0;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen size_t pos, start_pos;
7a88e726e7300fb0273cb4e55b43c27fbd90bdbdTimo Sirainen
16cb5d65265dd0b216542803fd80c4b999ae118eTimo Sirainen /* =?charset?Q|B?text?= */
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen t_push();
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen start_pos = pos = 0;
16cb5d65265dd0b216542803fd80c4b999ae118eTimo Sirainen for (pos = 0; pos + 1 < size; ) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (data[pos] != '=' || data[pos+1] != '?') {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen pos++;
16cb5d65265dd0b216542803fd80c4b999ae118eTimo Sirainen continue;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
16cb5d65265dd0b216542803fd80c4b999ae118eTimo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen /* encoded string beginning */
16cb5d65265dd0b216542803fd80c4b999ae118eTimo Sirainen if (pos != start_pos) {
56d1345c43bbd28c36b7faa85e4163bd9e874290Timo Sirainen /* send the unencoded data so far */
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (!callback(data + start_pos, pos - start_pos,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen NULL, context)) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen start_pos = size;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen break;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
16cb5d65265dd0b216542803fd80c4b999ae118eTimo Sirainen if (decodebuf == NULL) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen decodebuf =
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen buffer_create_dynamic(pool_datastack_create(),
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen size - pos);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen } else {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen buffer_set_used_size(decodebuf, 0);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen pos += 2;
56aa97d74071f3a2987140c2ff1cfd5a59cb35aaTimo Sirainen pos += message_header_decode_encoded(data + pos, size - pos,
56aa97d74071f3a2987140c2ff1cfd5a59cb35aaTimo Sirainen decodebuf, &charsetlen);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (decodebuf->used > charsetlen) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen /* decodebuf contains <charset> NUL <text> */
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (!callback(CONST_PTR_OFFSET(decodebuf->data,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen charsetlen),
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen decodebuf->used - charsetlen,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen decodebuf->data, context)) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen start_pos = size;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen break;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen start_pos = pos;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (size != start_pos) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen (void)callback(data + start_pos, size - start_pos,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen NULL, context);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen t_pop();
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen}
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainenstruct decode_utf8_context {
95d62f8d6d281cc488dc4f488d4388701e559012Josef 'Jeff' Sipek buffer_t *dest;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen bool ucase;
8c909e451d14075c05d90382cf8eebc4e354f569Timo Sirainen};
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainenstatic bool
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainendecode_utf8_callback(const unsigned char *data, size_t size,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen const char *charset, void *context)
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen{
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen struct decode_utf8_context *ctx = context;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen struct charset_translation *t;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen bool unknown_charset;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (charset == NULL || charset_is_utf8(charset)) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen /* ASCII / UTF-8 */
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (ctx->ucase) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen charset_utf8_ucase_write(ctx->dest, ctx->dest->used,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen data, size);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen } else {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen buffer_append(ctx->dest, data, size);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen return TRUE;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen t = charset_to_utf8_begin(charset, ctx->ucase, &unknown_charset);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen if (unknown_charset) {
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen /* let's just ignore this part */
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen return TRUE;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen }
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen /* ignore any errors */
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen (void)charset_to_utf8_full(t, data, &size, ctx->dest);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen charset_to_utf8_end(&t);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen return TRUE;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen}
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainenvoid message_header_decode_utf8(const unsigned char *data, size_t size,
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen buffer_t *dest, bool ucase)
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen{
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen struct decode_utf8_context ctx;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen ctx.dest = dest;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen ctx.ucase = ucase;
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen message_header_decode(data, size, decode_utf8_callback, &ctx);
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen}
01435c38e7d671d5a892c4b802cfb204881cd454Timo Sirainen