message-header-decode.c revision 345253fb28498b2e0a60f4a2a8644c65feee7e75
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen/* Copyright (C) 2002-2007 Timo Sirainen */
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen#include "lib.h"
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen#include "base64.h"
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen#include "buffer.h"
e074ffeaee1ce283bd42f167c6810e3d013f8218Timo Sirainen#include "unichar.h"
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen#include "charset-utf8.h"
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen#include "quoted-printable.h"
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen#include "message-header-decode.h"
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainen
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainenstatic size_t
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainenmessage_header_decode_encoded(const unsigned char *data, size_t size,
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen buffer_t *decodebuf, unsigned int *charsetlen_r)
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen{
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen#define QCOUNT 3
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen unsigned int num = 0;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen size_t i, start_pos[QCOUNT];
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen /* data should contain "charset?encoding?text?=" */
797de45dcf6e24642ab347d5033beb92034b779dTimo Sirainen for (i = 0; i < size; i++) {
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen if (data[i] == '?') {
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen start_pos[num++] = i;
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen if (num == QCOUNT)
dba5754de32284b3149ddd5c9bb1701b05707752Timo Sirainen break;
dba5754de32284b3149ddd5c9bb1701b05707752Timo Sirainen }
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen }
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen if (i == size || data[i+1] != '=') {
dac0b2e5e0f38c6d95ef1a842d891480db580236Timo Sirainen /* invalid block */
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen return 0;
797de45dcf6e24642ab347d5033beb92034b779dTimo Sirainen }
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen
dcc76bb1e1bb287e3e71e6a39a7ca207fab0eaa8Timo Sirainen buffer_append(decodebuf, data, start_pos[0]);
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen buffer_append_c(decodebuf, '\0');
dac0b2e5e0f38c6d95ef1a842d891480db580236Timo Sirainen *charsetlen_r = decodebuf->used;
dac0b2e5e0f38c6d95ef1a842d891480db580236Timo Sirainen
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen switch (data[start_pos[0]+1]) {
dac0b2e5e0f38c6d95ef1a842d891480db580236Timo Sirainen case 'q':
717a444a466280a84a468220f647fdcb9f3b546fTimo Sirainen case 'Q':
717a444a466280a84a468220f647fdcb9f3b546fTimo Sirainen quoted_printable_decode(data + start_pos[1] + 1,
717a444a466280a84a468220f647fdcb9f3b546fTimo Sirainen start_pos[2] - start_pos[1] - 1,
717a444a466280a84a468220f647fdcb9f3b546fTimo Sirainen NULL, decodebuf);
717a444a466280a84a468220f647fdcb9f3b546fTimo Sirainen break;
717a444a466280a84a468220f647fdcb9f3b546fTimo Sirainen case 'b':
717a444a466280a84a468220f647fdcb9f3b546fTimo Sirainen case 'B':
717a444a466280a84a468220f647fdcb9f3b546fTimo Sirainen if (base64_decode(data + start_pos[1] + 1,
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainen start_pos[2] - start_pos[1] - 1,
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen NULL, decodebuf) < 0) {
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen /* contains invalid data. show what we got so far. */
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen }
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen break;
797de45dcf6e24642ab347d5033beb92034b779dTimo Sirainen default:
720692523ece4a549f7c589508d5693ee310f6b3Timo Sirainen /* unknown encoding */
720692523ece4a549f7c589508d5693ee310f6b3Timo Sirainen return 0;
720692523ece4a549f7c589508d5693ee310f6b3Timo Sirainen }
4b8459c6c24b79d4ed5974ab6e3289a3f2b701c0Timo Sirainen
720692523ece4a549f7c589508d5693ee310f6b3Timo Sirainen return start_pos[2] + 2;
dcc76bb1e1bb287e3e71e6a39a7ca207fab0eaa8Timo Sirainen}
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainenvoid message_header_decode(const unsigned char *data, size_t size,
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainen message_header_decode_callback_t *callback,
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainen void *context)
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen{
dcc76bb1e1bb287e3e71e6a39a7ca207fab0eaa8Timo Sirainen buffer_t *decodebuf = NULL;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen unsigned int charsetlen = 0;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen size_t pos, start_pos;
8d59f06c9422fa49b538e23ffb06eddb23c6add2Timo Sirainen
8d59f06c9422fa49b538e23ffb06eddb23c6add2Timo Sirainen /* =?charset?Q|B?text?= */
8d59f06c9422fa49b538e23ffb06eddb23c6add2Timo Sirainen t_push();
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen start_pos = pos = 0;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen for (pos = 0; pos + 1 < size; ) {
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen if (data[pos] != '=' || data[pos+1] != '?') {
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen pos++;
dcc76bb1e1bb287e3e71e6a39a7ca207fab0eaa8Timo Sirainen continue;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen }
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen /* encoded string beginning */
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen if (pos != start_pos) {
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen /* send the unencoded data so far */
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen if (!callback(data + start_pos, pos - start_pos,
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen NULL, context)) {
dcc76bb1e1bb287e3e71e6a39a7ca207fab0eaa8Timo Sirainen start_pos = size;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen break;
dcc76bb1e1bb287e3e71e6a39a7ca207fab0eaa8Timo Sirainen }
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen }
dcc76bb1e1bb287e3e71e6a39a7ca207fab0eaa8Timo Sirainen
8d59f06c9422fa49b538e23ffb06eddb23c6add2Timo Sirainen if (decodebuf == NULL) {
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen decodebuf =
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen buffer_create_dynamic(pool_datastack_create(),
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen size - pos);
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen } else {
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen buffer_set_used_size(decodebuf, 0);
193f5296d2a6b847970c222d8a261b89aae46331Timo Sirainen }
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen
193f5296d2a6b847970c222d8a261b89aae46331Timo Sirainen pos += 2;
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainen pos += message_header_decode_encoded(data + pos, size - pos,
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen decodebuf, &charsetlen);
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen if (decodebuf->used > charsetlen) {
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen /* decodebuf contains <charset> NUL <text> */
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen if (!callback(CONST_PTR_OFFSET(decodebuf->data,
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainen charsetlen),
dcc76bb1e1bb287e3e71e6a39a7ca207fab0eaa8Timo Sirainen decodebuf->used - charsetlen,
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen decodebuf->data, context)) {
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen start_pos = size;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen break;
965e13eea8dc7f1da3769ab0c4667e36d0f33192Timo Sirainen }
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainen }
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainen
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen start_pos = pos;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen }
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen if (size != start_pos) {
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen (void)callback(data + start_pos, size - start_pos,
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen NULL, context);
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen }
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen t_pop();
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen}
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainenstruct decode_utf8_context {
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen buffer_t *dest;
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen unsigned int changed:1;
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen unsigned int called:1;
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen unsigned int dtcase:1;
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen};
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainenstatic bool
193f5296d2a6b847970c222d8a261b89aae46331Timo Sirainendecode_utf8_callback(const unsigned char *data, size_t size,
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen const char *charset, void *context)
193f5296d2a6b847970c222d8a261b89aae46331Timo Sirainen{
08f24237ccc177f5b3a09b24d8a725fa47e1ee32Timo Sirainen struct decode_utf8_context *ctx = context;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen struct charset_translation *t;
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen /* one call with charset=NULL means nothing changed */
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen if (!ctx->called && charset == NULL)
8d80659e504ffb34bb0c6a633184fece35751b18Timo Sirainen ctx->called = TRUE;
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen else
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen ctx->changed = TRUE;
745f2c7424d88e368eff0a3a7650b352a9d1f0ddTimo Sirainen
745f2c7424d88e368eff0a3a7650b352a9d1f0ddTimo Sirainen if (charset == NULL || charset_is_utf8(charset)) {
745f2c7424d88e368eff0a3a7650b352a9d1f0ddTimo Sirainen /* ASCII / UTF-8 */
745f2c7424d88e368eff0a3a7650b352a9d1f0ddTimo Sirainen if (ctx->dtcase) {
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen (void)uni_utf8_to_decomposed_titlecase(data, size,
745f2c7424d88e368eff0a3a7650b352a9d1f0ddTimo Sirainen ctx->dest);
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen } else {
745f2c7424d88e368eff0a3a7650b352a9d1f0ddTimo Sirainen buffer_append(ctx->dest, data, size);
745f2c7424d88e368eff0a3a7650b352a9d1f0ddTimo Sirainen }
745f2c7424d88e368eff0a3a7650b352a9d1f0ddTimo Sirainen return TRUE;
745f2c7424d88e368eff0a3a7650b352a9d1f0ddTimo Sirainen }
f05b9dd37f830576ca7d32ec7071bf87906df3d2Timo Sirainen
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen if (charset_to_utf8_begin(charset, ctx->dtcase, &t) < 0) {
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen /* let's just ignore this part */
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen return TRUE;
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen }
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen /* ignore any errors */
dcc76bb1e1bb287e3e71e6a39a7ca207fab0eaa8Timo Sirainen (void)charset_to_utf8(t, data, &size, ctx->dest);
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen charset_to_utf8_end(&t);
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen return TRUE;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen}
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen
797de45dcf6e24642ab347d5033beb92034b779dTimo Sirainenbool message_header_decode_utf8(const unsigned char *data, size_t size,
dba5754de32284b3149ddd5c9bb1701b05707752Timo Sirainen buffer_t *dest, bool dtcase)
dba5754de32284b3149ddd5c9bb1701b05707752Timo Sirainen{
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen struct decode_utf8_context ctx;
dcc76bb1e1bb287e3e71e6a39a7ca207fab0eaa8Timo Sirainen size_t used = dest->used;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen
35565557e05721a761132cec2ba1d93acacb6c14Timo Sirainen memset(&ctx, 0, sizeof(ctx));
dba5754de32284b3149ddd5c9bb1701b05707752Timo Sirainen ctx.dest = dest;
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen ctx.dtcase = dtcase;
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen message_header_decode(data, size, decode_utf8_callback, &ctx);
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen return ctx.changed || (dest->used - used != size);
6795f542ed816a3c977085d4f74df1d62a37b690Timo Sirainen}
c014f12e8268bf37ca2997e632ad7c22b8d04a84Timo Sirainen