fts-user.c revision 0ebeb1831a56e020b0958ed1ced50e86ee9347ec
a8c5a86d183db25a57bf193c06b41e092ec2e151Timo Sirainen/* Copyright (c) 2015 Dovecot authors, see the included COPYING file */
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen#define FTS_DEFAULT_TOKENIZERS "generic email-address"
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen#define FTS_DEFAULT_FILTERS "normalizer-icu snowball"
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_tokenizer *index_tokenizer, *search_tokenizer;
fcca16701767c6b92227a9ee125de69d257882f6Timo Sirainenstatic MODULE_CONTEXT_DEFINE_INIT(fts_user_module,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstatic const char *const *str_keyvalues_to_array(const char *str)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen unsigned int i;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenfts_user_init_languages(struct mail_user *user, struct fts_user *fuser,
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen const char **error_r)
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen const char *lang_config[3] = {NULL, NULL, NULL};
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen languages = mail_user_plugin_getenv(user, "fts_languages");
6967fa47dde9f2726bd86019a50627dacf2d7509Timo Sirainen *error_r = "fts_languages setting is missing";
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config");
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen fuser->lang_list = fts_language_list_init(lang_config);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen "fts_languages: Unknown language '%s'", unknown);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenfts_user_create_filters(struct mail_user *user, const struct fts_language *lang,
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen struct fts_filter **filter_r, const char **error_r)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_filter *filter = NULL, *parent = NULL;
fcca16701767c6b92227a9ee125de69d257882f6Timo Sirainen const char *filters_key, *const *filters, *filter_set_name;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen unsigned int i;
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen /* try to get the language-specific filters first */
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen filters_key = t_strconcat("fts_filters_", lang->name, NULL);
9e406b04bb5bed7d73aeed375c40c6a3fea1a2cbTimo Sirainen str = mail_user_plugin_getenv(user, filters_key);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen /* fallback to global filters */
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen str = mail_user_plugin_getenv(user, filters_key);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen filters_key = "fts_filters(built-in default)";
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen *error_r = t_strdup_printf("%s: Unknown filter '%s'",
8cb72c59d5ea4e9e5f638d7ec840bb853f5a188eTimo Sirainen /* try the language-specific setting first */
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen filter_set_name = t_str_replace(filters[i], '-', '_');
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen set_key = t_strdup_printf("fts_filters_%s_%s",
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen set_key = t_strdup_printf("fts_filters_%s", filter_set_name);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (fts_filter_create(filter_class, parent, lang,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen "Filter '%s' init via settings '%s' failed: %s",
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenfts_user_create_tokenizer(struct mail_user *user,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_tokenizer **tokenizer_r, bool search,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const char **error_r)
81b1d14891415fef0c2f37ef1ef3680cdcc600f1Timo Sirainen struct fts_tokenizer *tokenizer = NULL, *parent = NULL;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name;
5069adb2f5b3609fff9a0a705c6edeae56e0030aTimo Sirainen unsigned int i;
5069adb2f5b3609fff9a0a705c6edeae56e0030aTimo Sirainen str = mail_user_plugin_getenv(user, tokenizers_key);
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen tokenizer_class = fts_tokenizer_find(tokenizers[i]);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'",
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_');
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen set_key = t_strdup_printf("fts_tokenizers_%s", tokenizer_set_name);
3785910c303507db5f629684e6dde2cc7f83668eTimo Sirainen /* tell the tokenizers that we're tokenizing a search string
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen (instead of tokenizing indexed data) */
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen if (fts_tokenizer_create(tokenizer_class, parent,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen "Tokenizer '%s' init via settings '%s' failed: %s",
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainenstatic int fts_user_init_tokenizers(struct mail_user *user,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen const char **error_r)
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen if (fts_user_create_tokenizer(user, &fuser->index_tokenizer, FALSE,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen if (fts_user_create_tokenizer(user, &fuser->search_tokenizer, TRUE,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen array_foreach(&fuser->languages, user_langp) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (strcmp((*user_langp)->lang->name, lang->name) == 0)
f2b95f63ebdf77dba4dac938cf8c65c839f1067dTimo Sirainenstruct fts_tokenizer *fts_user_get_index_tokenizer(struct mail_user *user)
f2b95f63ebdf77dba4dac938cf8c65c839f1067dTimo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstruct fts_tokenizer *fts_user_get_search_tokenizer(struct mail_user *user)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstatic int fts_user_language_create(struct mail_user *user,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const char **error_r)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (fts_user_create_filters(user, lang, &filter, error_r) < 0)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen user_lang = p_new(user->pool, struct fts_user_language, 1);
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen array_append(&fuser->languages, &user_lang, 1);
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainenstatic int fts_user_languages_fill_all(struct mail_user *user,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen const char **error_r)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen array_foreach(fts_language_list_get_all(fuser->lang_list), langp) {
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen if (fts_user_language_create(user, fuser, *langp, error_r) < 0)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstruct fts_language_list *fts_user_get_language_list(struct mail_user *user)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainenfts_user_get_all_languages(struct mail_user *user)
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstruct fts_user_language *fts_user_get_data_lang(struct mail_user *user)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen lang = p_new(user->pool, struct fts_user_language, 1);
d10cb4d7a80571af21f776c65604442bf09b1765Timo Sirainen if (fts_filter_create(fts_filter_lowercase, NULL, lang->lang, NULL,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstatic void fts_user_free(struct fts_user *fuser)
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen array_foreach(&fuser->languages, user_langp) {
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen if (fuser->data_lang != NULL && fuser->data_lang->filter != NULL)
2a8b891366a3fc69524c2bb07f68d42c16223a56Timo Sirainen fts_tokenizer_unref(&fuser->search_tokenizer);
8255a22cccf3b0ccf38206c594941820ac1c9e00Timo Sirainenint fts_mail_user_init(struct mail_user *user, const char **error_r)
8255a22cccf3b0ccf38206c594941820ac1c9e00Timo Sirainen fuser = p_new(user->pool, struct fts_user, 1);
8255a22cccf3b0ccf38206c594941820ac1c9e00Timo Sirainen p_array_init(&fuser->languages, user->pool, 4);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (fts_user_init_languages(user, fuser, error_r) < 0) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (fts_user_languages_fill_all(user, fuser, error_r) < 0 ||
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen fts_user_init_tokenizers(user, fuser, error_r) < 0) {
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen MODULE_CONTEXT_SET(user, fts_user_module, fuser);
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainenvoid fts_mail_user_deinit(struct mail_user *user)