bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2014-2018 Dovecot authors, see the included COPYING file */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "lib.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "sha2.h"
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen#include "str.h"
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen#include "unichar.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "test-common.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "fts-language.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "fts-filter.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include <stdio.h>
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
5916f19b49cae37c888109f6fdff3224f81d33aeTimo Sirainenstatic const char *const stopword_settings[] = {"stopwords_dir", TEST_STOPWORDS_DIR, NULL};
5916f19b49cae37c888109f6fdff3224f81d33aeTimo Sirainenstatic struct fts_language english_language = { .name = "en" };
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovilastatic struct fts_language french_language = { .name = "fr" };
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovilastatic struct fts_language norwegian_language = { .name = "no" };
e467b295e44fb659ec28e9b6b3f05e71335b85e3Teemu Huovila#if defined(HAVE_LIBICU) && defined(HAVE_FTS_STEMMER)
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovilastatic struct fts_language swedish_language = { .name = "sv" };
d965f1c171b714f1460d87bac597ec3e015fae83Teemu Huovila#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainenstatic void test_fts_filter_find(void)
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen{
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_begin("fts filter find");
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_find("stopwords") == fts_filter_stopwords);
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_find("snowball") == fts_filter_stemmer_snowball);
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_find("normalizer-icu") == fts_filter_normalizer_icu);
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_find("lowercase") == fts_filter_lowercase);
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_assert(fts_filter_find("contractions") == fts_filter_contractions);
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_end();
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila}
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovilastatic void test_fts_filter_contractions_fail(void)
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila{
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila struct fts_filter *filter;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila const char *error;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_begin("fts filter contractions, unsupported language");
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_assert(fts_filter_create(fts_filter_contractions, NULL, &english_language, NULL, &filter, &error) != 0);
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_assert(error != NULL);
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_end();
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila}
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovilastatic void test_fts_filter_contractions_fr(void)
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila{
b7324e421e2132cbbf753e6fdbe675bbaecdf929Timo Sirainen static const struct {
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila const char *input;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila const char *output;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila } tests[] = {
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "foo", "foo" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "you're", "you're" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "l'homme", "homme" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "l\xE2\x80\x99homme", "homme" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "aujourd'hui", "aujourd'hui" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "qu\xE2\x80\x99il", "il" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "qu'il", "il" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "du'il", "du'il" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "que", "que" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "'foobar'", "'foobar'" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "foo'bar", "foo'bar" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "a'foo", "a'foo" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "cu'", "cu'" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "qu", "qu" },
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila { "d", "d" },
87af299c8a8bbca798a479896f361163c9539437Teemu Huovila { "qu'", NULL },
87af299c8a8bbca798a479896f361163c9539437Teemu Huovila { "j'adore", "adore" },
fd15d61d05bf628fdd3359a0fec47ffae3609cabTeemu Huovila { "quelqu'un", "quelqu'un" },
fd15d61d05bf628fdd3359a0fec47ffae3609cabTeemu Huovila { "l'esprit", "esprit" }
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila };
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila struct fts_filter *filter;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila const char *error;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila const char *token;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila unsigned int i;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila int ret;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_begin("fts filter contractions, French");
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_assert(fts_filter_create(fts_filter_contractions, NULL, &french_language, NULL, &filter, &error) == 0);
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila for (i = 0; i < N_ELEMENTS(tests); i++) {
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila token = tests[i].input;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila ret = fts_filter_filter(filter, &token, &error);
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_assert(ret >= 0);
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila if (ret > 0)
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_assert_idx(strcmp(token, tests[i].output) == 0, i);
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila else if (ret == 0)
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_assert_idx(token == NULL && tests[i].output == NULL, i);
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila }
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila fts_filter_unref(&filter);
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_end();
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen}
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainenstatic void test_fts_filter_lowercase(void)
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen{
b7324e421e2132cbbf753e6fdbe675bbaecdf929Timo Sirainen static const struct {
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen const char *input;
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen const char *output;
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen } tests[] = {
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen { "foo", "foo" },
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen { "FOO", "foo" },
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen { "fOo", "foo" }
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen };
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen struct fts_filter *filter;
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen const char *error;
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen const char *token;
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen unsigned int i;
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen test_begin("fts filter lowercase");
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0);
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen for (i = 0; i < N_ELEMENTS(tests); i++) {
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen token = tests[i].input;
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 &&
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen strcmp(token, tests[i].output) == 0, 0);
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen }
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen fts_filter_unref(&filter);
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen test_end();
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen}
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila#ifdef HAVE_LIBICU
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovilastatic void test_fts_filter_lowercase_utf8(void)
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila{
b7324e421e2132cbbf753e6fdbe675bbaecdf929Timo Sirainen static const struct {
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila const char *input;
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila const char *output;
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila } tests[] = {
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila { "f\xC3\x85\xC3\x85", "f\xC3\xA5\xC3\xA5" },
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila { "F\xC3\x85\xC3\x85", "f\xC3\xA5\xC3\xA5" },
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila { "F\xC3\x85\xC3\xA5", "f\xC3\xA5\xC3\xA5" }
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila };
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila struct fts_filter *filter;
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila const char *error;
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila const char *token;
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila unsigned int i;
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila test_begin("fts filter lowercase, UTF8");
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0);
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila for (i = 0; i < N_ELEMENTS(tests); i++) {
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila token = tests[i].input;
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 &&
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila strcmp(token, tests[i].output) == 0, 0);
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila }
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila fts_filter_unref(&filter);
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila test_end();
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila}
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovilastatic void test_fts_filter_lowercase_too_long_utf8(void)
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila{
b7324e421e2132cbbf753e6fdbe675bbaecdf929Timo Sirainen static const struct {
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila const char *input;
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila const char *output;
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila } tests[] = {
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila { "f\xC3\x85\xC3\x85", "f\xC3\xA5\xC3\xA5" },
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila { "abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxy" },
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila { "abc\xC3\x85""defghijklmnopqrstuvwxyz", "abc\xC3\xA5""defghijklmnopqrstuvw" },
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila { "abcdefghijklmnopqrstuvwx\xC3\x85", "abcdefghijklmnopqrstuvwx" }
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila };
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila struct fts_filter *filter;
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila const char *error;
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila const char *token;
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila const char * const settings[] = {"maxlen", "25", NULL};
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila unsigned int i;
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila test_begin("fts filter lowercase, too long UTF8");
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, settings, &filter, &error) == 0);
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila for (i = 0; i < N_ELEMENTS(tests); i++) {
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila token = tests[i].input;
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 &&
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila strcmp(token, tests[i].output) == 0, 0);
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila }
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila fts_filter_unref(&filter);
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila test_end();
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila}
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila#endif
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_stopwords_eng(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *filter;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *error;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen int ret;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *input[] = {"an", "elephant", "and", "a", "bear",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "drive", "by", "for", "no", "reason",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "they", "will", "not", "sing", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *output[] = {NULL, "elephant", NULL, NULL, "bear",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "drive", NULL, NULL, NULL, "reason",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen NULL, NULL, NULL, "sing"};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char **ip, **op;
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila const char *token;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filter stopwords, English");
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ip = input;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen op = output;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen while (*ip != NULL) {
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila token = *ip;
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila ret = fts_filter_filter(filter, &token, &error);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen if (ret <= 0) {
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen test_assert(ret == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*op == NULL);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen } else {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*op != NULL);
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila test_assert(strcmp(*ip, token) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen op++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ip++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&filter);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(filter == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_stopwords_fin(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language finnish = { .name = "fi" };
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *filter;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *error;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen int ret;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *input[] = {"olla", "vaiko", "eik\xC3\xB6", "olla",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "kenest\xC3\xA4", "ja", "joista", "jonka",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "testi", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *output[] = {NULL, "vaiko", "eik\xC3\xB6", NULL, NULL,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen NULL, NULL, NULL, "testi"};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *input2[] =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen {"kuka", "kenet", "keneen", "testi", "eiv\xC3\xA4t", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *output2[] = {NULL, NULL, NULL, "testi", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char **ip, **op;
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila const char *token;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filter stopwords, Finnish");
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ip = input;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen op = output;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen while (*ip != NULL) {
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila token = *ip;
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila ret = fts_filter_filter(filter, &token, &error);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen if (ret <= 0) {
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen test_assert(ret == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*op == NULL);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen } else {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*op != NULL);
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila test_assert(strcmp(*ip, token) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen op++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ip++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&filter);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(filter == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ip = input2;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen op = output2;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen while (*ip != NULL) {
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila token = *ip;
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila ret = fts_filter_filter(filter, &token, &error);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen if (ret <= 0) {
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen test_assert(ret == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*op == NULL);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen } else {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*op != NULL);
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila test_assert(strcmp(*ip, token) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen op++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ip++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&filter);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(filter == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_stopwords_fra(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *filter;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *error;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen int ret;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *input[] = {"e\xC3\xBBt", "soyez", "soi", "peut", "que",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "quelconque", "\xC3\xA9t\xC3\xA9",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "l\xE2\x80\x99""av\xC3\xA8nement",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *output[] = {NULL, NULL, NULL, "peut", NULL,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "quelconque", NULL,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "l\xE2\x80\x99""av\xC3\xA8nement",};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char **ip, **op;
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila const char *token;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filter stopwords, French");
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_assert(fts_filter_create(fts_filter_stopwords, NULL, &french_language, stopword_settings, &filter, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ip = input;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen op = output;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen while (*ip != NULL) {
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila token = *ip;
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila ret = fts_filter_filter(filter, &token, &error);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen if (ret <= 0) {
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen test_assert(ret == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*op == NULL);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen } else {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*op != NULL);
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila test_assert(strcmp(*ip, token) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen op++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ip++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&filter);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(filter == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovilastatic void test_fts_filter_stopwords_no(void)
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila{
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila struct fts_filter *filter;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char *error;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila int ret;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char *input[] = {"og", "d\xC3\xA5", "medlemsstatane", "har",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "bunde", "seg", "til", "\xC3\xA5", "fremje",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "allmenn", "v\xC3\xB8rdnad", "for", "pakta",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "og", "halde", "seg", "etter", "menneskerettane",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "og", "den", "grunnleggjande", "fridomen", "i",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "samarbeid", "med", "Dei", "Sameinte",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "Nasjonane", NULL};
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char *output[] = {NULL, NULL, "medlemsstatane", NULL,
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "bunde", NULL, NULL, NULL, "fremje",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "allmenn", "v\xC3\xB8rdnad", NULL, "pakta",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila NULL, "halde", NULL, NULL, "menneskerettane",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila NULL, NULL, "grunnleggjande", "fridomen", NULL,
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "samarbeid", NULL, "Dei", "Sameinte",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "Nasjonane"};
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char **ip, **op;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char *token;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_begin("fts filter stopwords, Norwegian");
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(fts_filter_create(fts_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila ip = input;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila op = output;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila while (*ip != NULL) {
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila token = *ip;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila ret = fts_filter_filter(filter, &token, &error);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila if (ret <= 0) {
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(ret == 0);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(*op == NULL);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila } else {
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(*op != NULL);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(strcmp(*ip, token) == 0);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila }
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila op++;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila ip++;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila }
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila fts_filter_unref(&filter);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(filter == NULL);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_end();
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila}
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
dfb9243af1c95de27c7b3a783629ad901c085927Teemu Huovilastatic void test_fts_filter_stopwords_fail_lazy_init(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language unknown = { .name = "bebobidoop" };
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *filter = NULL;
dfb9243af1c95de27c7b3a783629ad901c085927Teemu Huovila const char *error = NULL, *token = "foobar";
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
dfb9243af1c95de27c7b3a783629ad901c085927Teemu Huovila test_begin("fts filter stopwords, fail filter() (lazy init)");
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_stopwords, NULL, &unknown, stopword_settings, &filter, &error) == 0);
5916f19b49cae37c888109f6fdff3224f81d33aeTimo Sirainen test_assert(filter != NULL && error == NULL);
5916f19b49cae37c888109f6fdff3224f81d33aeTimo Sirainen test_assert(fts_filter_filter(filter, &token, &error) < 0 && error != NULL);
b4598553665bac4e0b89ed36a16f1ee1648ee172Timo Sirainen fts_filter_unref(&filter);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovilastatic void test_fts_filter_stopwords_malformed(void)
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila{
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila const struct fts_language malformed = { .name = "malformed" };
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila struct fts_filter *filter = NULL;
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila const char *error = NULL, *token = "foobar";
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila test_begin("fts filter stopwords, malformed list");
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila test_assert(fts_filter_create(fts_filter_stopwords, NULL, &malformed, stopword_settings, &filter, &error) == 0);
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila test_expect_error_string("seems empty. Is the file correctly formatted?");
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila test_assert(fts_filter_filter(filter, &token, &error) > 0);
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila test_expect_no_more_errors();
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila fts_filter_unref(&filter);
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila test_end();
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila}
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila
632d2181914a1ef7752495b9740df11dcc2112f5Timo Sirainen#ifdef HAVE_FTS_STEMMER
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_stemmer_snowball_stem_english(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *stemmer;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *error;
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila const char *token = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const tokens[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "dries" ,"friendlies", "All", "human", "beings", "are",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "born", "free", "and", "equal", "in", "dignity", "and",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "rights", "They", "are", "endowed", "with", "reason", "and",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "conscience", "and", "should", "act", "towards", "one",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "another", "in", "a", "spirit", "of", "brotherhood", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const bases[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "dri" ,"friend", "All", "human", "be", "are", "born", "free",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "and", "equal", "in", "digniti", "and", "right", "They", "are",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "endow", "with", "reason", "and", "conscienc", "and", "should",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "act", "toward", "one", "anoth", "in", "a", "spirit", "of",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "brotherhood", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const *tpp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const *bpp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filter stem English");
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_stemmer_snowball, NULL, &english_language, NULL, &stemmer, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen bpp = bases;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen for (tpp=tokens; *tpp != NULL; tpp++) {
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila token = *tpp;
5916f19b49cae37c888109f6fdff3224f81d33aeTimo Sirainen test_assert(fts_filter_filter(stemmer, &token, &error) > 0);
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila test_assert(token != NULL);
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila test_assert(null_strcmp(token, *bpp) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen bpp++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&stemmer);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(stemmer == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_stemmer_snowball_stem_french(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *stemmer;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *error;
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila const char *token = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const tokens[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Tous", "les", "\xC3\xAAtres", "humains", "naissent",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "libres", "et", "\xC3\xA9gaux", "en", "dignit\xC3\xA9",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "et", "en", "droits", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const bases[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Tous" ,"le", "\xC3\xAAtre", "humain", "naissent", "libr", "et",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "\xC3\xA9gal", "en", "dignit", "et", "en", "droit", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const *tpp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const *bpp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filter stem French");
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_assert(fts_filter_create(fts_filter_stemmer_snowball, NULL, &french_language, NULL, &stemmer, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen bpp = bases;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen for (tpp=tokens; *tpp != NULL; tpp++) {
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila token = *tpp;
5916f19b49cae37c888109f6fdff3224f81d33aeTimo Sirainen test_assert(fts_filter_filter(stemmer, &token, &error) > 0);
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila test_assert(token != NULL);
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila test_assert(null_strcmp(token, *bpp) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen bpp++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&stemmer);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(stemmer == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_stopwords_stemmer_eng(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen int ret;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *stemmer;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *filter;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *error;
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila const char *token = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const tokens[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "dries" ,"friendlies", "All", "human", "beings", "are",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "born", "free", "and", "equal", "in", "dignity", "and",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "rights", "They", "are", "endowed", "with", "reason", "and",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "conscience", "and", "should", "act", "towards", "one",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "another", "in", "a", "spirit", "of", "brotherhood", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const bases[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "dri" ,"friend", "All", "human", "be", NULL, "born", "free",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen NULL, "equal", NULL, "digniti", NULL, "right", "They", NULL,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "endow", NULL, "reason", NULL, "conscienc", NULL, "should",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "act", "toward", "one", "anoth", NULL, NULL, "spirit", NULL,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "brotherhood", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const *tpp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const *bpp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filters stopwords and stemming chained, English");
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0);
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen bpp = bases;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen for (tpp=tokens; *tpp != NULL; tpp++) {
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila token = *tpp;
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila ret = fts_filter_filter(stemmer, &token, &error);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen test_assert(ret >= 0);
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila if (ret == 0)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*bpp == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen else {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*bpp != NULL);
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila test_assert(null_strcmp(*bpp, token) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen bpp++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&stemmer);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&filter);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(stemmer == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(filter == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
632d2181914a1ef7752495b9740df11dcc2112f5Timo Sirainen#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
2f2faa96aaf6989fae9acab1523f8be372060a02Timo Sirainen#ifdef HAVE_LIBICU
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_normalizer_swedish_short(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *norm = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *input[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Vem",
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "\xC3\x85",
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "\xC3\x85\xC3\x84\xC3\x96",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Vem kan segla f\xC3\xB6rutan vind?\n"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "\xC3\x85\xC3\x84\xC3\x96\xC3\xB6\xC3\xA4\xC3\xA5"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen };
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *expected_output[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "vem",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "a",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "aao",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "vem kan segla forutan vind?\naaooaa"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen };
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const settings[] =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *error = NULL;
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila const char *token = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen unsigned int i;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filter normalizer Swedish short text");
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen for (i = 0; i < N_ELEMENTS(input); i++) {
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen token = input[i];
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert_idx(fts_filter_filter(norm, &token, &error) == 1, i);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert_idx(null_strcmp(token, expected_output[i]) == 0, i);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen }
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen fts_filter_unref(&norm);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(norm == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_normalizer_swedish_short_default_id(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *norm = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *input[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Vem",
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "\xC3\x85",
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "\xC3\x85\xC3\x84\xC3\x96",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "Vem kan segla f\xC3\xB6rutan vind?\n"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "\xC3\x85\xC3\x84\xC3\x96\xC3\xB6\xC3\xA4\xC3\xA5"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen };
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *expected_output[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "vem",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "a",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "aao",
3e786e2a411dc973a2359bc213fcf827e6c314d2Timo Sirainen "vemkanseglaforutanvind?\naaooaa"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen };
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *error = NULL;
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila const char *token = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen unsigned int i;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filter normalizer Swedish short text using default ID");
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, NULL, &norm, &error) == 0);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen for (i = 0; i < N_ELEMENTS(input); i++) {
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen token = input[i];
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert_idx(fts_filter_filter(norm, &token, &error) == 1, i);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert_idx(null_strcmp(token, expected_output[i]) == 0, i);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen }
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen fts_filter_unref(&norm);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(norm == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* UDHRDIR comes from Automake AM_CPPFLAGS */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#define UDHR_FRA_NAME "/udhr_fra.txt"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_normalizer_french(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *norm = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen FILE *input;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const settings[] =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove", NULL};
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen char buf[250] = {0};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *error = NULL;
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila const char *tokens;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen unsigned char sha512_digest[SHA512_RESULTLEN];
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct sha512_ctx ctx;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char correct_digest[] = {
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen 0x06, 0x80, 0xf1, 0x81, 0xf2, 0xed, 0xfb, 0x6d,
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen 0xcd, 0x7d, 0xcb, 0xbd, 0xc4, 0x87, 0xc3, 0xf6,
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen 0xb8, 0x6a, 0x01, 0x82, 0xdf, 0x0a, 0xb5, 0x92,
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen 0x6b, 0x9b, 0x7b, 0x21, 0x5e, 0x62, 0x40, 0xbd,
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen 0xbf, 0x15, 0xb9, 0x7b, 0x75, 0x9c, 0x4e, 0xc9,
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen 0xe8, 0x48, 0xaa, 0x08, 0x63, 0xf2, 0xa0, 0x6c,
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen 0x20, 0x4c, 0x01, 0xe3, 0xb3, 0x4f, 0x15, 0xc6,
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen 0x8c, 0xd6, 0x7a, 0xb7, 0xc5, 0xc6, 0x85, 0x00};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *udhr_path;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filter normalizer French UDHR");
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen udhr_path = t_strconcat(UDHRDIR, UDHR_FRA_NAME, NULL);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen input = fopen(udhr_path, "r");
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert(input != NULL);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen sha512_init(&ctx);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen while (NULL != fgets(buf, sizeof(buf), input)) {
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen tokens = buf;
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen if (fts_filter_filter(norm, &tokens, &error) != 1){
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen break;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen sha512_loop(&ctx, tokens, strlen(tokens));
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen }
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen fclose(input);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen sha512_result(&ctx, sha512_digest);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert(memcmp(sha512_digest, correct_digest,
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen sizeof(sha512_digest)) == 0);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen fts_filter_unref(&norm);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(norm == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainenstatic void test_fts_filter_normalizer_empty(void)
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen{
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen /* test just a couple of these */
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen static const char *empty_tokens[] = {
3e786e2a411dc973a2359bc213fcf827e6c314d2Timo Sirainen "\xC2\xAF", /* U+00AF */
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen "\xCC\x80", /* U+0300 */
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen "\xF3\xA0\x87\xAF", /* U+E01EF */
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen "\xCC\x80\xF3\xA0\x87\xAF" /* U+0300 U+E01EF */
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen };
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen const char * const settings[] =
3e786e2a411dc973a2359bc213fcf827e6c314d2Timo Sirainen {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; [\\x20] Remove", NULL};
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen struct fts_filter *norm;
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen const char *error;
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen unsigned int i;
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen test_begin("fts filter normalizer empty tokens");
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen for (i = 0; i < N_ELEMENTS(empty_tokens); i++) {
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen const char *token = empty_tokens[i];
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen test_assert_idx(fts_filter_filter(norm, &token, &error) == 0, i);
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen }
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen fts_filter_unref(&norm);
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen test_end();
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen}
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainenstatic void test_fts_filter_normalizer_baddata(void)
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen{
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen const char * const settings[] =
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove", NULL};
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen struct fts_filter *norm;
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen const char *token, *error;
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen string_t *str;
0190b33c05bd72f1049255d03a7b5217ff1bbcedAki Tuomi unichar_t i;
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen test_begin("fts filter normalizer bad data");
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen str = t_str_new(128);
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen for (i = 1; i < 0x1ffff; i++) {
0190b33c05bd72f1049255d03a7b5217ff1bbcedAki Tuomi if (!uni_is_valid_ucs4(i)) continue;
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen str_truncate(str, 0);
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen uni_ucs4_to_utf8_c(i, str);
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen token = str_c(str);
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen T_BEGIN {
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen test_assert_idx(fts_filter_filter(norm, &token, &error) >= 0, i);
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen } T_END;
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen }
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen str_truncate(str, 0);
0190b33c05bd72f1049255d03a7b5217ff1bbcedAki Tuomi uni_ucs4_to_utf8_c(UNICHAR_T_MAX, str);
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen token = str_c(str);
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen test_assert(fts_filter_filter(norm, &token, &error) >= 0);
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen fts_filter_unref(&norm);
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen test_end();
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen}
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_normalizer_invalid_id(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *norm = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *settings[] =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen {"id", "Any-One-Out-There; DKFN; [: Nonspacing Mark :] Remove",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen NULL};
f65a6ee214adc07e473d443ccd49a5bb99ed60a0Teemu Huovila const char *error = NULL, *token = "foo";
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filter normalizer invalid id");
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
5916f19b49cae37c888109f6fdff3224f81d33aeTimo Sirainen test_assert(error == NULL);
5916f19b49cae37c888109f6fdff3224f81d33aeTimo Sirainen test_assert(fts_filter_filter(norm, &token, &error) < 0 && error != NULL);
b4598553665bac4e0b89ed36a16f1ee1648ee172Timo Sirainen fts_filter_unref(&norm);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainenstatic void test_fts_filter_normalizer_oversized(void)
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen{
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen struct fts_filter *norm = NULL;
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen const char *settings[] =
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen {"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove", "maxlen", "250",
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen NULL};
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen const char *error = NULL;
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen const char *token = "\xe4\x95\x91\x25\xe2\x94\xad\xe1\x90\xad\xee\x94\x81\xe2\x8e\x9e"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\xe7\x9a\xb7\xea\xbf\x97\xe3\xb2\x8f\xe4\x9c\xbe\xee\xb4\x98\xe1"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\x8d\x99\xe2\x91\x83\xe3\xb1\xb8\xef\xbf\xbd\xe8\xbb\x9c\xef\xbf"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\xbd\xea\xbb\x98\xea\xb5\xac\xe4\x87\xae\xe4\x88\x93\xe9\x86\x8f"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\xe9\x86\x83\xe6\x8f\x8d\xe7\xa3\x9d\xed\x89\x96\xe2\x89\x85\xe6"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\x8c\x82\xec\x80\x98\xee\x91\x96\xe7\xa8\x8a\xec\xbc\x85\xeb\x9c"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\xbd\xeb\x97\x95\xe3\xa4\x9d\xd7\xb1\xea\xa7\x94\xe0\xbb\xac\xee"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\x95\x87\xd5\x9d\xe8\xba\x87\xee\x8b\xae\xe5\xb8\x80\xe9\x8d\x82"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\xe7\xb6\x8c\xe7\x9b\xa0\xef\x82\x9f\xed\x96\xa4\xe3\x8d\xbc\xe1"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\x81\xbd\xe9\x81\xb2\xea\xac\xac\xec\x9b\x98\xe7\x84\xb2\xee\xaf"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\xbc\xeb\xa2\x9d\xe9\x86\xb3\xe0\xb0\x89\xeb\x80\xb6\xe3\x8c\x9d"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\xe9\x8f\x9e\xe2\xae\x8a\xee\x9e\x9a\xef\xbf\xbd\xe7\xa3\x9b\xe4"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\xa3\x8b\xe4\x82\xb9\xeb\x8e\x93\xec\xb5\x82\xe5\xa7\x81\xe2\x8c"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\x97\xea\xbb\xb4\xe5\x85\xb7\xeb\x96\xbe\xe7\x97\x91\xea\xbb\x98"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\xe6\xae\xb4\xe9\x8a\x85\xc4\xb9\xe4\x90\xb2\xe9\x96\xad\xef\x90"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "\x9c\xe5\xa6\xae\xe9\x93\x91\xe8\x87\xa1";
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen test_begin("fts filter normalizer over-sized token");
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen test_assert(error == NULL);
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen test_assert(fts_filter_filter(norm, &token, &error) >= 0);
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen test_assert(strlen(token) <= 250);
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen fts_filter_unref(&norm);
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen test_end();
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen}
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovilastatic void test_fts_filter_normalizer_truncation(void)
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila{
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila struct fts_filter *norm = NULL;
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila const char *settings[] =
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila {"id", "Any-Lower;", "maxlen", "10",
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila NULL};
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila const char *error = NULL;
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila const char *token = "abcdefghi\xC3\x85";
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila test_begin("fts filter normalizer token truncated mid letter");
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL,
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila settings, &norm, &error) == 0);
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila test_assert(error == NULL);
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila test_assert(fts_filter_filter(norm, &token, &error) >= 0);
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila test_assert(strcmp(token, "abcdefghi") == 0);
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila fts_filter_unref(&norm);
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila test_end();
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila}
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila
e0d0e7f377f263d721c6385b2453f80da1e01bf5Timo Sirainen#ifdef HAVE_FTS_STEMMER
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void test_fts_filter_normalizer_stopwords_stemmer_eng(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen int ret;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *normalizer;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *stemmer;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *filter;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *error;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const id_settings[] =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen //{"id", "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen {"id", "Lower", NULL};
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila const char *token = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const tokens[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "dries" ,"friendlies", "All", "human", "beings", "are",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "born", "free", "and", "equal", "in", "dignity", "and",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "rights", "They", "are", "endowed", "with", "reason", "and",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "conscience", "and", "should", "act", "towards", "one",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "another", "in", "a", "spirit", "of", "brotherhood", "ABCFoo",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const bases[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "dri" ,"friend", "all", "human", "be", NULL, "born", "free",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen NULL, "equal", NULL, "digniti", NULL, "right", NULL, NULL,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "endow", NULL, "reason", NULL, "conscienc", NULL, "should",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "act", "toward", "one", "anoth", NULL, NULL, "spirit", NULL,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "brotherhood", "abcfoo", NULL};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const *tpp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char * const *bpp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_begin("fts filters normalizer, stopwords and stemming chained, English");
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, id_settings, &normalizer, &error) == 0);
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_stopwords, normalizer, &english_language, stopword_settings, &filter, &error) == 0);
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_assert(fts_filter_create(fts_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen bpp = bases;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen for (tpp = tokens; *tpp != NULL; tpp++) {
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila token = *tpp;
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila ret = fts_filter_filter(stemmer, &token, &error);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen if (ret <= 0) {
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen test_assert(ret == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*bpp == NULL);
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen } else {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(*bpp != NULL);
d1356a777d42a820218c05fc2c16095ed3e07dbdTimo Sirainen test_assert(strcmp(*bpp, token) == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen bpp++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&stemmer);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&filter);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&normalizer);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(stemmer == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(filter == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_assert(normalizer == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_end();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovilastatic void test_fts_filter_stopwords_normalizer_stemmer_no(void)
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila{
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila int ret;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila struct fts_filter *normalizer;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila struct fts_filter *stemmer;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila struct fts_filter *filter;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char *error;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char *token = NULL;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char * const tokens[] = {
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila /* Nynorsk*/
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "Alle", "har", "plikter", "andsynes", "samfunnet", "d\xC3\xA5",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "personlegdomen", "til", "den", "einskilde", "einast", "der",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "kan", "f\xC3\xA5", "frie", "og", "fullgode",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "voksterk\xC3\xA5r",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila /* Bokmal */
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "Alle", "mennesker", "er", "f\xC3\xB8""dt", "frie", "og", "med",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "samme", "menneskeverd", "og", "menneskerettigheter", "De",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "er", "utstyrt", "med", "fornuft", "og", "samvittighet",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "og", "b\xC3\xB8r", "handle", "mot", "hverandre", "i",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "brorskapets", "\xC3\xA5nd", NULL};
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char * const bases[] = {
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila /* Nynorsk*/
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "all", NULL, "plikt", "andsyn", "samfunn", NULL,
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "personlegdom", NULL, NULL, "einskild", "ein", NULL, NULL,
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "fa", "frie", NULL, "fullgod", "voksterk",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila /* Bokmal */
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "all", "mennesk", NULL, "f\xC3\xB8""dt", "frie", NULL, NULL,
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila NULL, "menneskeverd", NULL, "menneskerett", "de", NULL,
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "utstyrt", NULL, "fornuft", NULL, "samvitt", NULL, "b\xC3\xB8r",
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila "handl", NULL, "hverandr", NULL, "brorskap", "and", NULL};
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char * const *tpp;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila const char * const *bpp;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_begin("fts filters with stopwords, default normalizer and stemming chained, Norwegian");
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(fts_filter_create(fts_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(fts_filter_create(fts_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(fts_filter_create(fts_filter_stemmer_snowball, normalizer, &norwegian_language, NULL, &stemmer, &error) == 0);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila bpp = bases;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila for (tpp = tokens; *tpp != NULL; tpp++) {
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila token = *tpp;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila ret = fts_filter_filter(stemmer, &token, &error);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila if (ret <= 0) {
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(ret == 0);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(*bpp == NULL);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila } else {
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(*bpp != NULL);
83bd5fa0e765cbca38c98a9d397f1f6517a188aeTeemu Huovila test_assert(null_strcmp(*bpp, token) == 0);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila }
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila bpp++;
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila }
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila fts_filter_unref(&stemmer);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila fts_filter_unref(&normalizer);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila fts_filter_unref(&filter);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(stemmer == NULL);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(filter == NULL);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_assert(normalizer == NULL);
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_end();
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila}
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovilastatic void test_fts_filter_stopwords_normalizer_stemmer_sv(void)
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila{
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila int ret;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila struct fts_filter *normalizer;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila struct fts_filter *stemmer;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila struct fts_filter *filter;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila const char *error;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila const char *token = NULL;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila const char * const tokens[] = {
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "Enär", "erkännandet", "av", "det", "inneboende", "värdet",
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "hos", "alla", "medlemmar", "av", "människosläktet", "och",
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "av", "deras", "lika", "och", "oförytterliga", "rättigheter",
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "är", "grundvalen", "för", "frihet", "rättvisa", "och", "fred",
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "i", "världen", NULL};
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila const char * const bases[] = {
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "enar", "erkan", NULL, NULL, "inneboend", "vardet", "hos", NULL,
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "medlemm", NULL, "manniskoslaktet", NULL, NULL, NULL, "lik",
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila NULL, "oforytter", "ratt", NULL, "grundval", NULL, "frihet",
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila "rattvis", NULL, "fred", NULL, "varld", NULL};
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila const char * const *tpp;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila const char * const *bpp;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_begin("fts filters with stopwords, default normalizer and stemming chained, Swedish");
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(fts_filter_create(fts_filter_stopwords, NULL, &swedish_language, stopword_settings, &filter, &error) == 0);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(fts_filter_create(fts_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(fts_filter_create(fts_filter_stemmer_snowball, normalizer, &swedish_language, NULL, &stemmer, &error) == 0);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila bpp = bases;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila for (tpp = tokens; *tpp != NULL; tpp++) {
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila token = *tpp;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila ret = fts_filter_filter(stemmer, &token, &error);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila if (ret <= 0) {
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(ret == 0);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(*bpp == NULL);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila } else {
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(*bpp != NULL);
83bd5fa0e765cbca38c98a9d397f1f6517a188aeTeemu Huovila test_assert(null_strcmp(*bpp, token) == 0);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila }
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila bpp++;
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila }
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila fts_filter_unref(&stemmer);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila fts_filter_unref(&normalizer);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila fts_filter_unref(&filter);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(stemmer == NULL);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(filter == NULL);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_assert(normalizer == NULL);
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_end();
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#endif
e0d0e7f377f263d721c6385b2453f80da1e01bf5Timo Sirainen#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainenstatic void test_fts_filter_english_possessive(void)
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen{
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen struct fts_filter *norm = NULL;
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen const char *input[] = {
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo'",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo's",
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "foo\xC3\xA4's",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo'S",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foos'S",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo's's",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo'ss",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo\xE2\x80\x99s",
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "foo\xC3\xA4\xE2\x80\x99s",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo\xE2\x80\x99S",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foos\xE2\x80\x99S",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo\xE2\x80\x99s\xE2\x80\x99s",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo\xE2\x80\x99ss"
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen };
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen const char *expected_output[] = {
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo'",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo",
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "foo\xC3\xA4",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foos",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo's",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo'ss",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo",
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody "foo\xC3\xA4",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foos",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo\xE2\x80\x99s",
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen "foo\xE2\x80\x99ss"
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen };
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen const char *error = NULL;
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen const char *token = NULL;
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen unsigned int i;
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen test_begin("fts filter english possessive");
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert(fts_filter_create(fts_filter_english_possessive, NULL, NULL, NULL, &norm, &error) == 0);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen for (i = 0; i < N_ELEMENTS(input); i++) {
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen token = input[i];
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert_idx(fts_filter_filter(norm, &token, &error) == 1, i);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen test_assert_idx(null_strcmp(token, expected_output[i]) == 0, i);
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen }
98add73ebcec199c04a9b243190f244c216c30e9Timo Sirainen fts_filter_unref(&norm);
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen test_assert(norm == NULL);
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen test_end();
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen}
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* TODO: Functions to test 1. ref-unref pairs 2. multiple registers +
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen an unregister + find */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenint main(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
baf3e87e186453fda13bd21f7cbcb2efc8492e8bTimo Sirainen static void (*const test_functions[])(void) = {
a7d8afaadae968db20eb979052111d76a3086cd7Timo Sirainen test_fts_filter_find,
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_fts_filter_contractions_fail,
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila test_fts_filter_contractions_fr,
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen test_fts_filter_lowercase,
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila#ifdef HAVE_LIBICU
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila test_fts_filter_lowercase_utf8,
5fcd30add8dcf4d883978cce3e39f3a89184f1e5Teemu Huovila test_fts_filter_lowercase_too_long_utf8,
c8eaee2ad6cc96e2ef42657f89d4404e674680b7Teemu Huovila#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_stopwords_eng,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_stopwords_fin,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_stopwords_fra,
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_fts_filter_stopwords_no,
dfb9243af1c95de27c7b3a783629ad901c085927Teemu Huovila test_fts_filter_stopwords_fail_lazy_init,
3f3c1b629196bc8491f146705b6f8ddadfcde1c8Teemu Huovila test_fts_filter_stopwords_malformed,
632d2181914a1ef7752495b9740df11dcc2112f5Timo Sirainen#ifdef HAVE_FTS_STEMMER
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_stemmer_snowball_stem_english,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_stemmer_snowball_stem_french,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_stopwords_stemmer_eng,
632d2181914a1ef7752495b9740df11dcc2112f5Timo Sirainen#endif
2f2faa96aaf6989fae9acab1523f8be372060a02Timo Sirainen#ifdef HAVE_LIBICU
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_normalizer_swedish_short,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_normalizer_swedish_short_default_id,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_normalizer_french,
f5b6f113bfbdf57b3335118e9b0387a9fb760bd2Timo Sirainen test_fts_filter_normalizer_empty,
bf698b98d3a3a1eced66cc682c449f23bf2b67d0Timo Sirainen test_fts_filter_normalizer_baddata,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_normalizer_invalid_id,
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen test_fts_filter_normalizer_oversized,
35eb3a2394bb064cc6b4a67436e8860994e4636aTeemu Huovila test_fts_filter_normalizer_truncation,
e0d0e7f377f263d721c6385b2453f80da1e01bf5Timo Sirainen#ifdef HAVE_FTS_STEMMER
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen test_fts_filter_normalizer_stopwords_stemmer_eng,
3ec8b0d282d46d1f698b1f2aa27922cb8f26cb97Teemu Huovila test_fts_filter_stopwords_normalizer_stemmer_no,
c5effa0f13da8f45991c89a9d8c9d2109db66039Teemu Huovila test_fts_filter_stopwords_normalizer_stemmer_sv,
e0d0e7f377f263d721c6385b2453f80da1e01bf5Timo Sirainen#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#endif
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen test_fts_filter_english_possessive,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen NULL
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen };
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen int ret;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filters_init();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ret = test_run(test_functions);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filters_deinit();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return ret;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}