bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#include "lib.h"
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#include "str.h"
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#include "test-common.h"
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#include "charset-utf8.h"
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainenstatic void test_charset_is_utf8(void)
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen{
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_begin("charset_is_utf8");
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert(charset_is_utf8("AScII"));
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert(charset_is_utf8("us-AScII"));
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert(charset_is_utf8("uTF8"));
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert(charset_is_utf8("uTF-8"));
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_end();
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen}
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainenstatic void test_charset_utf8_common(const char *input_charset)
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen{
b7324e421e2132cbbf753e6fdbe675bbaecdf929Timo Sirainen static const struct {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen const char *input;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen const char *output;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen enum charset_result result;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen } tests[] = {
f3779c0540f0dc8700fd2aef922d69dcc6b0194eTimo Sirainen { "p\xC3\xA4\xC3", "p\xC3\xA4", CHARSET_RET_INCOMPLETE_INPUT },
f3779c0540f0dc8700fd2aef922d69dcc6b0194eTimo Sirainen { "p\xC3\xA4\xC3""a", "p\xC3\xA4"UNICODE_REPLACEMENT_CHAR_UTF8"a", CHARSET_RET_INVALID_INPUT }
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen };
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen string_t *src, *str = t_str_new(256);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen enum charset_result result;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen unsigned int i;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen for (i = 0; i < N_ELEMENTS(tests); i++) {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen str_truncate(str, 0);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(charset_to_utf8_str(input_charset, NULL,
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen tests[i].input, str, &result) == 0, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(strcmp(tests[i].output, str_c(str)) == 0, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(result == tests[i].result, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen }
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen /* check that E2BIG handling works. We assume that iconv() is called
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen with 8192 byte buffer (tmpbuf[8192]) */
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen src = str_new(default_pool, 16384);
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen for (i = 0; i < 8190; i++)
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen str_append_c(src, 'a' + i % ('z'-'a'+1));
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen for (i = 0; i < 256; i++) {
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen str_truncate(str, 0);
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen str_append_c(src, 'A' + i % ('Z'-'A'+1));
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen test_assert_idx(charset_to_utf8_str(input_charset, NULL,
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen str_c(src), str, &result) == 0, i);
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen }
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen str_free(&src);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen}
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainenstatic void test_charset_utf8(void)
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen{
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_begin("charset utf8");
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_charset_utf8_common("UTF-8");
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_end();
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen}
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#ifdef HAVE_ICONV
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainenstatic void test_charset_iconv(void)
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen{
b7324e421e2132cbbf753e6fdbe675bbaecdf929Timo Sirainen static const struct {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen const char *charset;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen const char *input;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen const char *output;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen enum charset_result result;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen } tests[] = {
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody { "ISO-8859-1", "p\xE4\xE4", "p\xC3\xA4\xC3\xA4", CHARSET_RET_OK },
5f7fcc523deee2259146846d5fc9fa61f0299d85Timo Sirainen { "UTF-7", "+AOQA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDk",
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "\xC3\xA4\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4"
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4"
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4"
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4"
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4", CHARSET_RET_OK }
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen };
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen string_t *str = t_str_new(128);
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen struct charset_translation *trans;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen enum charset_result result;
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen size_t pos, left, limit, len;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen unsigned int i;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_begin("charset iconv");
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen for (i = 0; i < N_ELEMENTS(tests); i++) {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen str_truncate(str, 0);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(charset_to_utf8_str(tests[i].charset, NULL,
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen tests[i].input, str, &result) == 0, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(strcmp(tests[i].output, str_c(str)) == 0, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(result == tests[i].result, i);
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen str_truncate(str, 0);
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen test_assert_idx(charset_to_utf8_begin(tests[i].charset, NULL, &trans) == 0, i);
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen len = strlen(tests[i].input);
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen for (pos = 0, limit = 1; limit <= len; pos += left, limit++) {
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen left = limit - pos;
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen result = charset_to_utf8(trans, (const void *)(tests[i].input + pos),
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen &left, str);
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen if (result != CHARSET_RET_INCOMPLETE_INPUT &&
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen result != CHARSET_RET_OK)
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen break;
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen }
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen test_assert_idx(strcmp(tests[i].output, str_c(str)) == 0, i);
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen test_assert_idx(result == tests[i].result, i);
e9257b0d30aa68dc968b6347d9f3f5ac4c8b5c00Timo Sirainen charset_to_utf8_end(&trans);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen }
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen /* Use //IGNORE just to force handling to be done by iconv
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen instead of our own UTF-8 routines. */
576495f5f4f04a08d80483ab997fdee9923c4698Aki Tuomi test_charset_utf8_common("UTF-8//TEST");
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_end();
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen}
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainenstatic void test_charset_iconv_crashes(void)
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen{
b7324e421e2132cbbf753e6fdbe675bbaecdf929Timo Sirainen static const struct {
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen const char *charset;
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen const char *input;
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen } tests[] = {
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen { "CP932", "\203\334" }
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen };
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen string_t *str = t_str_new(128);
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen enum charset_result result;
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen unsigned int i;
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen test_begin("charset iconv crashes");
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen for (i = 0; i < N_ELEMENTS(tests); i++) {
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen str_truncate(str, 0);
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen /* we don't care about checking the result. we only want to
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen verify that there's no crash. */
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen (void)charset_to_utf8_str(tests[i].charset, NULL,
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen tests[i].input, str, &result);
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen }
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen test_end();
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen}
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainenstatic void test_charset_iconv_utf7_state(void)
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen{
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen struct charset_translation *trans;
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen string_t *str = t_str_new(32);
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen unsigned char nextbuf[5+CHARSET_MAX_PENDING_BUF_SIZE+1];
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen size_t size;
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen test_begin("charset iconv utf7 state");
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen test_assert(charset_to_utf8_begin("UTF-7", NULL, &trans) == 0);
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen size = 2;
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen test_assert(charset_to_utf8(trans, (const void *)"a+", &size, str) == CHARSET_RET_INCOMPLETE_INPUT);
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen test_assert(strcmp(str_c(str), "a") == 0);
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen test_assert(size == 1);
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen memset(nextbuf, '?', sizeof(nextbuf));
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen memcpy(nextbuf, "+AOQ-", 5);
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen size = sizeof(nextbuf);
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen test_assert(charset_to_utf8(trans, nextbuf, &size, str) == CHARSET_RET_OK);
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen test_assert(strcmp(str_c(str), "a\xC3\xA4???????????") == 0);
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen charset_to_utf8_end(&trans);
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen test_end();
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen}
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#endif
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainenint main(void)
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen{
baf3e87e186453fda13bd21f7cbcb2efc8492e8bTimo Sirainen static void (*const test_functions[])(void) = {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_charset_is_utf8,
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_charset_utf8,
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#ifdef HAVE_ICONV
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_charset_iconv,
64d895bcca177ee840268180ca7a4e3841295613Timo Sirainen test_charset_iconv_crashes,
8a0ef83121a9ae375448efb2e1f08e136bf04e22Timo Sirainen test_charset_iconv_utf7_state,
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#endif
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen NULL
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen };
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen return test_run(test_functions);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen}