test-fts-filter.c revision d965f1c171b714f1460d87bac597ec3e015fae83
5a580c3a38ced62d4bcc95b8ac7c4f2935b5d294Timo Sirainen/* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainenstatic const char *const stopword_settings[] = {"stopwords_dir", TEST_STOPWORDS_DIR, NULL};
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainenstatic struct fts_language english_language = { .name = "en" };
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainenstatic struct fts_language french_language = { .name = "fr" };
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainenstatic struct fts_language norwegian_language = { .name = "no" };
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainenstatic struct fts_language swedish_language = { .name = "sv" };
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen test_assert(fts_filter_find("stopwords") == fts_filter_stopwords);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert(fts_filter_find("snowball") == fts_filter_stemmer_snowball);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert(fts_filter_find("normalizer-icu") == fts_filter_normalizer_icu);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert(fts_filter_find("lowercase") == fts_filter_lowercase);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert(fts_filter_find("contractions") == fts_filter_contractions);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainenstatic void test_fts_filter_contractions_fail(void)
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_begin("fts filter contractions, unsupported language");
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert(fts_filter_create(fts_filter_contractions, NULL, &english_language, NULL, &filter, &error) != 0);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainenstatic void test_fts_filter_contractions_fr(void)
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen unsigned int i;
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_begin("fts filter contractions, French");
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert(fts_filter_create(fts_filter_contractions, NULL, &french_language, NULL, &filter, &error) == 0);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen ret = fts_filter_filter(filter, &token, &error);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert_idx(strcmp(token, tests[i].output) == 0, i);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen else if (ret == 0)
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert_idx(token == NULL && tests[i].output == NULL, i);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen unsigned int i;
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 &&
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainenstatic void test_fts_filter_lowercase_utf8(void)
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen unsigned int i;
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen test_assert(fts_filter_create(fts_filter_lowercase, NULL, &english_language, NULL, &filter, &error) == 0);
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 &&
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainenstatic void test_fts_filter_stopwords_eng(void)
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen const char *input[] = {"an", "elephant", "and", "a", "bear",
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen const char *output[] = {NULL, "elephant", NULL, NULL, "bear",
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen test_assert(fts_filter_create(fts_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0);
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen ret = fts_filter_filter(filter, &token, &error);
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainenstatic void test_fts_filter_stopwords_fin(void)
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen const struct fts_language finnish = { .name = "fi" };
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen const char *input[] = {"olla", "vaiko", "eik\xC3\xB6", "olla",
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen const char *output[] = {NULL, "vaiko", "eik\xC3\xB6", NULL, NULL,
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen const char *input2[] =
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen {"kuka", "kenet", "keneen", "testi", "eiv\xC3\xA4t", NULL};
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen const char *output2[] = {NULL, NULL, NULL, "testi", NULL};
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen test_assert(fts_filter_create(fts_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainen ret = fts_filter_filter(filter, &token, &error);
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen test_assert(fts_filter_create(fts_filter_stopwords, NULL, &finnish, stopword_settings, &filter, &error) == 0);
baf346e71ebd7b44fcba4b48f4d39845453b778bTimo Sirainen ret = fts_filter_filter(filter, &token, &error);
cf636afb3826f0d8e15c248aa1fc04ce72820e08Timo Sirainenstatic void test_fts_filter_stopwords_fra(void)
const char *error;
int ret;
NULL};
const char *token;
test_assert(fts_filter_create(fts_filter_stopwords, NULL, &french_language, stopword_settings, &filter, &error) == 0);
if (ret <= 0) {
op++;
ip++;
test_end();
static void test_fts_filter_stopwords_no(void)
const char *error;
int ret;
const char *token;
test_assert(fts_filter_create(fts_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0);
if (ret <= 0) {
op++;
ip++;
test_end();
static void test_fts_filter_stopwords_fail_lazy_init(void)
test_assert(fts_filter_create(fts_filter_stopwords, NULL, &unknown, stopword_settings, &filter, &error) == 0);
test_end();
#ifdef HAVE_FTS_STEMMER
static void test_fts_filter_stemmer_snowball_stem_english(void)
const char *error;
const char * const tokens[] = {
const char * const bases[] = {
const char * const *tpp;
const char * const *bpp;
test_assert(fts_filter_create(fts_filter_stemmer_snowball, NULL, &english_language, NULL, &stemmer, &error) == 0);
bpp++;
test_end();
static void test_fts_filter_stemmer_snowball_stem_french(void)
const char *error;
const char * const tokens[] = {
const char * const bases[] = {
const char * const *tpp;
const char * const *bpp;
test_assert(fts_filter_create(fts_filter_stemmer_snowball, NULL, &french_language, NULL, &stemmer, &error) == 0);
bpp++;
test_end();
static void test_fts_filter_stopwords_stemmer_eng(void)
int ret;
const char *error;
const char * const tokens[] = {
const char * const bases[] = {
const char * const *tpp;
const char * const *bpp;
test_assert(fts_filter_create(fts_filter_stopwords, NULL, &english_language, stopword_settings, &filter, &error) == 0);
test_assert(fts_filter_create(fts_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0);
if (ret == 0)
bpp++;
test_end();
#ifdef HAVE_LIBICU
static void test_fts_filter_normalizer_swedish_short(void)
const char *input[] = {
const char *expected_output[] = {
const char * const settings[] =
test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
test_end();
static void test_fts_filter_normalizer_swedish_short_default_id(void)
const char *input[] = {
const char *expected_output[] = {
test_end();
static void test_fts_filter_normalizer_french(void)
const char * const settings[] =
const char *tokens;
const unsigned char correct_digest[] = {
const char *udhr_path;
test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
sizeof(sha512_digest)) == 0);
test_end();
static void test_fts_filter_normalizer_empty(void)
static const char *empty_tokens[] = {
const char * const settings[] =
const char *error;
test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
test_end();
static void test_fts_filter_normalizer_baddata(void)
const char * const settings[] =
test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
T_BEGIN {
} T_END;
test_end();
static void test_fts_filter_normalizer_invalid_id(void)
const char *settings[] =
NULL};
test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, settings, &norm, &error) == 0);
test_end();
#ifdef HAVE_FTS_STEMMER
static void test_fts_filter_normalizer_stopwords_stemmer_eng(void)
int ret;
const char *error;
const char * const id_settings[] =
const char * const tokens[] = {
NULL};
const char * const bases[] = {
const char * const *tpp;
const char * const *bpp;
test_assert(fts_filter_create(fts_filter_normalizer_icu, NULL, NULL, id_settings, &normalizer, &error) == 0);
test_assert(fts_filter_create(fts_filter_stopwords, normalizer, &english_language, stopword_settings, &filter, &error) == 0);
test_assert(fts_filter_create(fts_filter_stemmer_snowball, filter, &english_language, NULL, &stemmer, &error) == 0);
if (ret <= 0) {
bpp++;
test_end();
static void test_fts_filter_stopwords_normalizer_stemmer_no(void)
int ret;
const char *error;
const char * const tokens[] = {
const char * const bases[] = {
const char * const *tpp;
const char * const *bpp;
test_assert(fts_filter_create(fts_filter_stopwords, NULL, &norwegian_language, stopword_settings, &filter, &error) == 0);
test_assert(fts_filter_create(fts_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0);
test_assert(fts_filter_create(fts_filter_stemmer_snowball, normalizer, &norwegian_language, NULL, &stemmer, &error) == 0);
if (ret <= 0) {
bpp++;
test_end();
static void test_fts_filter_stopwords_normalizer_stemmer_sv(void)
int ret;
const char *error;
const char * const tokens[] = {
const char * const bases[] = {
const char * const *tpp;
const char * const *bpp;
test_assert(fts_filter_create(fts_filter_stopwords, NULL, &swedish_language, stopword_settings, &filter, &error) == 0);
test_assert(fts_filter_create(fts_filter_normalizer_icu, filter, NULL, NULL, &normalizer, &error) == 0);
test_assert(fts_filter_create(fts_filter_stemmer_snowball, normalizer, &swedish_language, NULL, &stemmer, &error) == 0);
if (ret <= 0) {
bpp++;
test_end();
static void test_fts_filter_english_possessive(void)
const char *input[] = {
const char *expected_output[] = {
test_assert(fts_filter_create(fts_filter_english_possessive, NULL, NULL, NULL, &norm, &error) == 0);
test_end();
int main(void)
static void (*test_functions[])(void) = {
#ifdef HAVE_LIBICU
#ifdef HAVE_FTS_STEMMER
#ifdef HAVE_LIBICU
#ifdef HAVE_FTS_STEMMER
int ret;
return ret;