test-charset.c revision f9291653e3d42d630b9212ceb9290c974e51597a
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen/* Copyright (c) 2015 Dovecot authors, see the included COPYING file */
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#include "lib.h"
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#include "str.h"
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#include "test-common.h"
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#include "charset-utf8.h"
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainenstatic void test_charset_is_utf8(void)
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen{
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_begin("charset_is_utf8");
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert(charset_is_utf8("AScII"));
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert(charset_is_utf8("us-AScII"));
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert(charset_is_utf8("uTF8"));
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert(charset_is_utf8("uTF-8"));
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_end();
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen}
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainenstatic void test_charset_utf8_common(const char *input_charset)
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen{
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen struct {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen const char *input;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen const char *output;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen enum charset_result result;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen } tests[] = {
f3779c0540f0dc8700fd2aef922d69dcc6b0194eTimo Sirainen { "p\xC3\xA4\xC3", "p\xC3\xA4", CHARSET_RET_INCOMPLETE_INPUT },
f3779c0540f0dc8700fd2aef922d69dcc6b0194eTimo Sirainen { "p\xC3\xA4\xC3""a", "p\xC3\xA4"UNICODE_REPLACEMENT_CHAR_UTF8"a", CHARSET_RET_INVALID_INPUT }
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen };
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen string_t *src, *str = t_str_new(256);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen enum charset_result result;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen unsigned int i;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen for (i = 0; i < N_ELEMENTS(tests); i++) {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen str_truncate(str, 0);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(charset_to_utf8_str(input_charset, NULL,
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen tests[i].input, str, &result) == 0, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(strcmp(tests[i].output, str_c(str)) == 0, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(result == tests[i].result, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen }
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen /* check that E2BIG handling works. We assume that iconv() is called
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen with 8192 byte buffer (tmpbuf[8192]) */
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen src = str_new(default_pool, 16384);
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen for (i = 0; i < 8190; i++)
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen str_append_c(src, 'a' + i % ('z'-'a'+1));
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen for (i = 0; i < 256; i++) {
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen str_truncate(str, 0);
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen str_append_c(src, 'A' + i % ('Z'-'A'+1));
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen test_assert_idx(charset_to_utf8_str(input_charset, NULL,
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen str_c(src), str, &result) == 0, i);
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen }
f9291653e3d42d630b9212ceb9290c974e51597aTimo Sirainen str_free(&src);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen}
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainenstatic void test_charset_utf8(void)
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen{
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_begin("charset utf8");
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_charset_utf8_common("UTF-8");
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_end();
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen}
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#ifdef HAVE_ICONV
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainenstatic void test_charset_iconv(void)
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen{
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen struct {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen const char *charset;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen const char *input;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen const char *output;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen enum charset_result result;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen } tests[] = {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen { "ISO-8859-1", "p\xE4\xE4", "pää", CHARSET_RET_OK }
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen };
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen string_t *str = t_str_new(128);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen enum charset_result result;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen unsigned int i;
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_begin("charset iconv");
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen for (i = 0; i < N_ELEMENTS(tests); i++) {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen str_truncate(str, 0);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(charset_to_utf8_str(tests[i].charset, NULL,
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen tests[i].input, str, &result) == 0, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(strcmp(tests[i].output, str_c(str)) == 0, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_assert_idx(result == tests[i].result, i);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen }
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen /* Use //IGNORE just to force handling to be done by iconv
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen instead of our own UTF-8 routines. */
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_charset_utf8_common("UTF-8//IGNORE");
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_end();
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen}
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#endif
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainenint main(void)
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen{
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen static void (*test_functions[])(void) = {
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_charset_is_utf8,
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_charset_utf8,
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#ifdef HAVE_ICONV
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen test_charset_iconv,
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen#endif
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen NULL
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen };
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen return test_run(test_functions);
9366765479f32a4df248d015c595d0f46cbf83b7Timo Sirainen}