test-fts-icu.c revision 5ae97456417a137a85c236a3db32f51fb592e474
bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2015 Dovecot authors, see the included COPYING file */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "lib.h"
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen#include "buffer.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "str.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch#include "unichar.h"
7af70f7646264a2f52b361f9ca78f08681acc4e2Stephan Bosch#include "test-common.h"
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen#include "fts-icu.h"
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic void test_fts_icu_utf8_to_utf16_ascii_resize(void)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch buffer_t *dest = buffer_create_dynamic(pool_datastack_create(), 5);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_begin("fts_icu_utf8_to_utf16 ascii resize");
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch /* dynamic buffers reserve +1 for str_c()'s NUL, so 5 -> 4 */
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_assert(buffer_get_size(dest) == 5);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch fts_icu_utf8_to_utf16(dest, "12");
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_assert(dest->used == 4);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_assert(buffer_get_size(dest) == 5);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch fts_icu_utf8_to_utf16(dest, "123");
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_assert(dest->used == 6);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_assert(buffer_get_size(dest) == 8);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch fts_icu_utf8_to_utf16(dest, "12345");
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen test_assert(dest->used == 10);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_end();
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch}
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic void test_fts_icu_utf8_to_utf16_32bit_resize(void)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
feba5e502b2131c9a1c766b7ef9ff041dbf71d1dStephan Bosch buffer_t *dest;
feba5e502b2131c9a1c766b7ef9ff041dbf71d1dStephan Bosch unsigned int i;
7ebcb054e0d3cc4be54038cbf763ec4189d9725bStephan Bosch
7ebcb054e0d3cc4be54038cbf763ec4189d9725bStephan Bosch test_begin("fts_icu_utf8_to_utf16 32bit resize");
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch for (i = 2; i <= 5; i++) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch dest = buffer_create_dynamic(pool_datastack_create(), i);
7ebcb054e0d3cc4be54038cbf763ec4189d9725bStephan Bosch test_assert(buffer_get_size(dest) == i);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch fts_icu_utf8_to_utf16(dest, "\xF0\x90\x90\x80"); /* 0x10400 */
feba5e502b2131c9a1c766b7ef9ff041dbf71d1dStephan Bosch test_assert(dest->used == 4);
7ebcb054e0d3cc4be54038cbf763ec4189d9725bStephan Bosch }
7ebcb054e0d3cc4be54038cbf763ec4189d9725bStephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_end();
7ebcb054e0d3cc4be54038cbf763ec4189d9725bStephan Bosch}
7ebcb054e0d3cc4be54038cbf763ec4189d9725bStephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic void test_fts_icu_utf16_to_utf8(void)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch string_t *dest = t_str_new(64);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch const UChar src[] = { 0xbd, 'b', 'c' };
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch unsigned int i;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_begin("fts_icu_utf16_to_utf8");
da300472555d9afdb0bcb767456f731cf5c2f6aaStephan Bosch for (i = N_ELEMENTS(src); i > 0; i--) {
f9d2a1f21ad65262bc630f0834d7eead06a1bac3Timo Sirainen fts_icu_utf16_to_utf8(dest, src, i);
f9d2a1f21ad65262bc630f0834d7eead06a1bac3Timo Sirainen test_assert(dest->used == i+1);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_end();
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic void test_fts_icu_utf16_to_utf8_resize(void)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
6dad0888fcec8372f230941c70d8940b8c203b32Stephan Bosch string_t *dest;
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch const UChar src = UNICODE_REPLACEMENT_CHAR;
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch unsigned int i;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_begin("fts_icu_utf16_to_utf8 resize");
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch for (i = 2; i <= 6; i++) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch dest = t_str_new(i);
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen test_assert(buffer_get_size(dest) == i);
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen fts_icu_utf16_to_utf8(dest, &src, 1);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_assert(dest->used == 3);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_assert(strcmp(str_c(dest), UNICODE_REPLACEMENT_CHAR_UTF8) == 0);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch }
1bc12a53ddc6696bb209fb79d7cc66262d2ea621Timo Sirainen
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_end();
1bc12a53ddc6696bb209fb79d7cc66262d2ea621Timo Sirainen}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Boschstatic UTransliterator *get_translit(const char *id)
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch{
1c2f122ae93d3316f6746f255f6659b510527cc8Stephan Bosch UTransliterator *translit;
1c2f122ae93d3316f6746f255f6659b510527cc8Stephan Bosch buffer_t *id_utf16;
1c2f122ae93d3316f6746f255f6659b510527cc8Stephan Bosch UErrorCode err = U_ZERO_ERROR;
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen UParseError perr;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch id_utf16 = buffer_create_dynamic(pool_datastack_create(), 16);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch fts_icu_utf8_to_utf16(id_utf16, id);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch translit = utrans_openU(id_utf16->data, id_utf16->used/sizeof(UChar),
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch UTRANS_FORWARD, NULL, 0, &perr, &err);
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen test_assert(!U_FAILURE(err));
1ec26e0b70ac7f8a4e3dfbc59aa77f572651d5aeStephan Bosch return translit;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschstatic void test_fts_icu_translate(void)
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch{
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen const char *translit_id = "Any-Lower";
1bc12a53ddc6696bb209fb79d7cc66262d2ea621Timo Sirainen UTransliterator *translit;
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch buffer_t *dest = buffer_create_dynamic(pool_datastack_create(), 64);
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen const UChar src[] = { 0xbd, 'B', 'C' };
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch const char *error;
1ec26e0b70ac7f8a4e3dfbc59aa77f572651d5aeStephan Bosch unsigned int i;
6a90041707f1290c8970a3bacb0f8f928aeaaba6Stephan Bosch
1ec26e0b70ac7f8a4e3dfbc59aa77f572651d5aeStephan Bosch test_begin("fts_icu_translate");
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen translit = get_translit(translit_id);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch for (i = N_ELEMENTS(src); i > 0; i--) {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch buffer_set_used_size(dest, 0);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_assert(fts_icu_translate(dest, src, i,
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen translit, &error) == 0);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_assert(dest->used == i * sizeof(UChar));
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen }
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen utrans_close(translit);
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen test_end();
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen}
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainenstatic void test_fts_icu_translate_resize(void)
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen{
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen const char *translit_id = "Any-Hex";
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen const char *src_utf8 = "FOO";
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen buffer_t *dest, *src_utf16;
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen UTransliterator *translit;
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen const char *error;
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen unsigned int i;
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen test_begin("fts_icu_translate_resize resize");
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen src_utf16 = buffer_create_dynamic(pool_datastack_create(), 16);
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen translit = get_translit(translit_id);
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen for (i = 2; i <= 20; i++) {
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen buffer_set_used_size(src_utf16, 0);
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen fts_icu_utf8_to_utf16(src_utf16, src_utf8);
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen dest = buffer_create_dynamic(pool_datastack_create(), i);
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen test_assert(buffer_get_size(dest) == i);
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen test_assert(fts_icu_translate(dest, src_utf16->data,
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen src_utf16->used/sizeof(UChar),
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen translit, &error) == 0);
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen }
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen
d08e49550aa890e71a5f10b7de43347ec44473acTimo Sirainen utrans_close(translit);
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_end();
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch}
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch
7384b4e78eaab44693c985192276e31322155e32Stephan Boschint main(void)
b72c3363092b73cab1da2de4a9d75592e7d8fd6bTimo Sirainen{
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch static void (*test_functions[])(void) = {
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_fts_icu_utf8_to_utf16_ascii_resize,
6ee9ce5ed955a1283dc22ad28980bf9cc23d4c4eStephan Bosch test_fts_icu_utf8_to_utf16_32bit_resize,
6ee9ce5ed955a1283dc22ad28980bf9cc23d4c4eStephan Bosch test_fts_icu_utf16_to_utf8,
6ee9ce5ed955a1283dc22ad28980bf9cc23d4c4eStephan Bosch test_fts_icu_utf16_to_utf8_resize,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_fts_icu_translate,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch test_fts_icu_translate_resize,
7384b4e78eaab44693c985192276e31322155e32Stephan Bosch NULL
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch };
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch return test_run(test_functions);
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch}
208dcaf62332b80b220c8c66e776f7cc0c39253bStephan Bosch