message-header-hash.c revision b696d1d6df36a78441175535f5049eb29d5beb93
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen/* Copyright (c) 2013-2017 Dovecot authors, see the included COPYING file */
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen#include "lib.h"
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen#include "hash-method.h"
b780aa272b742a43579cdb523cc79cc8d4521306Timo Sirainen#include "message-header-hash.h"
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen
2fb9ae42f9e36388ec6db24188b9108434043fd0Timo Sirainenvoid message_header_hash_more(struct message_header_hash_context *ctx,
2fb9ae42f9e36388ec6db24188b9108434043fd0Timo Sirainen const struct hash_method *method, void *context,
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen unsigned int version,
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen const unsigned char *data, size_t size)
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen{
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen size_t i, start;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen
b9f564d00b7a115f465ffd6840341c7b8f9bfc8aTimo Sirainen i_assert(version >= 1 && version <= MESSAGE_HEADER_HASH_MAX_VERSION);
b9f564d00b7a115f465ffd6840341c7b8f9bfc8aTimo Sirainen
b9f564d00b7a115f465ffd6840341c7b8f9bfc8aTimo Sirainen if (version == 1) {
b9f564d00b7a115f465ffd6840341c7b8f9bfc8aTimo Sirainen method->loop(context, data, size);
62041dfb7d6ac6e9c633a557075999cdfcff7bd5Timo Sirainen return;
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen }
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen /* - Dovecot IMAP replaces NULs with 0x80 character.
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen - Dovecot POP3 with outlook-no-nuls workaround replaces NULs
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen with 0x80 character.
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen - Zimbra replaces 8bit chars with '?' in header fetches,
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen but not body fetches.
036626b19f14bef582f96e556913ae91b1d67881Timo Sirainen - Yahoo replaces 8bit chars with '?' in partial header
68b5e132f1f8bb2528482310daffcc06c2f019d3Timo Sirainen fetches, but not POP3 TOP. UTF-8 character sequence writes only a
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen single '?'
b780aa272b742a43579cdb523cc79cc8d4521306Timo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen So we'll just replace all control and 8bit chars with '?' and
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen remove any repeated '?', which hopefully will satisfy everybody.
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen Also:
dc53fff3700362b544dcba166ff81420db227a60Timo Sirainen - Zimbra removes trailing spaces and tabs from IMAP BODY[HEADER],
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen but not IMAP BODY[] or POP3 TOP. Just strip away all spaces with
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen version 3 and tabs also with version 4.
dca6d617a23e3f93af3b8df59acb46478179fe55Timo Sirainen */
0bd259973f98837cf0e41fdee3e2a578e51ad09eTimo Sirainen for (i = start = 0; i < size; i++) {
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen bool cur_is_questionmark = FALSE;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen switch (data[i]) {
2615df45a8027948a474abe5e817b34b0499c171Timo Sirainen case ' ':
b780aa272b742a43579cdb523cc79cc8d4521306Timo Sirainen if (version >= 3) {
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen /* strip away spaces */
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen method->loop(context, data + start, i-start);
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen start = i+1;
f330867f937b7a8505807a02edd21524600822eeTimo Sirainen }
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen break;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen case '\t':
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen if (version >= 4) {
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen /* strip away tabs */
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen method->loop(context, data + start, i-start);
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen start = i+1;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen }
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen break;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen case '\n':
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen break;
de83c1e095ba081ef80f630bcd933e5e28aff94bTimo Sirainen default:
57b819d3aa978b88ef478e4f2397c32db231ebb7Timo Sirainen if (data[i] < 0x20 || data[i] >= 0x7f || data[i] == '?') {
b215a8a123623782554a83f3025ef4e771bd8f01Timo Sirainen /* remove repeated '?' */
4ee00532a265bdfb38539d811fcd12d51210ac35Timo Sirainen if (start < i || !ctx->prev_was_questionmark) {
b215a8a123623782554a83f3025ef4e771bd8f01Timo Sirainen method->loop(context, data + start, i-start);
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen method->loop(context, "?", 1);
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen }
eb1572d7c44ebc7b0b039d085c3dbab2ef7043ddTimo Sirainen start = i+1;
eb1572d7c44ebc7b0b039d085c3dbab2ef7043ddTimo Sirainen cur_is_questionmark = TRUE;
eb1572d7c44ebc7b0b039d085c3dbab2ef7043ddTimo Sirainen }
eb1572d7c44ebc7b0b039d085c3dbab2ef7043ddTimo Sirainen break;
eb1572d7c44ebc7b0b039d085c3dbab2ef7043ddTimo Sirainen }
eb1572d7c44ebc7b0b039d085c3dbab2ef7043ddTimo Sirainen ctx->prev_was_questionmark = cur_is_questionmark;
eb1572d7c44ebc7b0b039d085c3dbab2ef7043ddTimo Sirainen }
eb1572d7c44ebc7b0b039d085c3dbab2ef7043ddTimo Sirainen method->loop(context, data + start, i-start);
ed354926406e28254b581f821bb052f38d9c14e8Timo Sirainen}
eb1572d7c44ebc7b0b039d085c3dbab2ef7043ddTimo Sirainen