test-unichar.c revision 814bf67459ad405a157af0b8940602024d7fadfe
2454dfa32c93c20a8522c6ed42fe057baaac9f9aStephan Bosch/* Copyright (c) 2007-2015 Dovecot authors, see the included COPYING file */
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen#include "test-lib.h"
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen#include "str.h"
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen#include "buffer.h"
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen#include "unichar.h"
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainenstatic void test_unichar_uni_utf8_strlen(void)
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen{
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen static const char input[] = "\xC3\xA4\xC3\xA4\0a";
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen test_begin("uni_utf8_strlen()");
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_strlen(input) == 2);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_end();
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_begin("uni_utf8_strlen_n()");
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_strlen_n(input, 1) == 0);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_strlen_n(input, 2) == 1);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_strlen_n(input, 3) == 1);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_strlen_n(input, 4) == 2);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_end();
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen}
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainenstatic void test_unichar_uni_utf8_partial_strlen_n(void)
7a60e1dc9e93ef3f7c7fe1af6385a0bfa1e31bc3Timo Sirainen{
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen static const char input[] = "\xC3\xA4\xC3\xA4\0a";
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen size_t pos;
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_begin("uni_utf8_partial_strlen_n()");
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 1, &pos) == 0 && pos == 0);
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 2, &pos) == 1 && pos == 2);
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 3, &pos) == 1 && pos == 2);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 4, &pos) == 2 && pos == 4);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 5, &pos) == 3 && pos == 5);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_partial_strlen_n(input, 6, &pos) == 4 && pos == 6);
c9b3bbfb605ca19fbd39d083984241b2419e9fe1Timo Sirainen test_end();
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen}
9dcb7a41eaaf832f641b7743060b5cf5ed7c80b3Timo Sirainen
0a53eb0283d7ec28c6105f61e118b96fce8ecb95Timo Sirainenvoid test_unichar(void)
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen{
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen static const char overlong_utf8[] = "\xf8\x80\x95\x81\xa1";
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen static const char collate_in[] = "\xc3\xbc \xc2\xb3";
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen static const char collate_exp[] = "U\xcc\x88 3";
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen buffer_t *collate_out;
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen unichar_t chr, chr2;
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen string_t *str = t_str_new(16);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_begin("unichars encode/decode");
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen for (chr = 0; chr <= 0x10ffff; chr++) {
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen /* The bottom 6 bits should be irrelevant to code coverage,
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen only test 000000, 111111, and something in between. */
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen if ((chr & 63) == 1)
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen chr += rand() % 62; /* After 0, somewhere between 1 and 62 */
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen else if ((chr & 63) > 0 && (chr & 63) < 63)
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen chr |= 63; /* After random, straight to 63 */
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen str_truncate(str, 0);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen uni_ucs4_to_utf8_c(chr, str);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_str_is_valid(str_c(str)));
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_get_char(str_c(str), &chr2) == (int)uni_utf8_char_bytes(*str_data(str)));
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(chr2 == chr);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen if ((chr & 0x63) == 0) {
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen unsigned int utf8len = uni_utf8_char_bytes(*str_c(str));
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen /* virtually truncate the byte string */
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen while (--utf8len > 0)
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_get_char_n(str_c(str), utf8len, &chr2) == 0);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen utf8len = uni_utf8_char_bytes(*str_c(str));
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen /* actually truncate the byte stream */
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen while (--utf8len > 0) {
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen str_truncate(str, utf8len);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(!uni_utf8_str_is_valid(str_c(str)));
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_get_char(str_c(str), &chr2) == 0);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen }
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen }
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen }
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_end();
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_begin("unichar collation");
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen collate_out = buffer_create_dynamic(default_pool, 32);
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen uni_utf8_to_decomposed_titlecase(collate_in, sizeof(collate_in),
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen collate_out);
7ca63fa4166f89fee900b7c14d87d53fbac47242Timo Sirainen test_assert(!strcmp(collate_out->data, collate_exp));
0a53eb0283d7ec28c6105f61e118b96fce8ecb95Timo Sirainen buffer_free(&collate_out);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(!uni_utf8_str_is_valid(overlong_utf8));
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_assert(uni_utf8_get_char(overlong_utf8, &chr2) < 0);
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen test_end();
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen test_unichar_uni_utf8_strlen();
4ee00532a265bdfb38539d811fcd12d51210ac35Timo Sirainen test_unichar_uni_utf8_partial_strlen_n();
00e7c3010f7da4a49881a7feb05e413af353af0aTimo Sirainen}
0a53eb0283d7ec28c6105f61e118b96fce8ecb95Timo Sirainen