fts-user.c revision 0ebeb1831a56e020b0958ed1ced50e86ee9347ec
a8c5a86d183db25a57bf193c06b41e092ec2e151Timo Sirainen/* Copyright (c) 2015 Dovecot authors, see the included COPYING file */
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen#include "lib.h"
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen#include "module-context.h"
6967fa47dde9f2726bd86019a50627dacf2d7509Timo Sirainen#include "mail-user.h"
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen#include "fts-language.h"
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen#include "fts-filter.h"
6967fa47dde9f2726bd86019a50627dacf2d7509Timo Sirainen#include "fts-tokenizer.h"
0536ccb51d41e3078c3a9fa33e509fb4b2420f95Timo Sirainen#include "fts-user.h"
4499995f7029bafd85094694b6a14752ea34c9b3Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen#define FTS_USER_CONTEXT(obj) \
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen MODULE_CONTEXT(obj, fts_user_module)
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen#define FTS_DEFAULT_TOKENIZERS "generic email-address"
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen#define FTS_DEFAULT_FILTERS "normalizer-icu snowball"
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstruct fts_user {
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen union mail_user_module_context module_ctx;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
fcca16701767c6b92227a9ee125de69d257882f6Timo Sirainen struct fts_language_list *lang_list;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_tokenizer *index_tokenizer, *search_tokenizer;
d10cb4d7a80571af21f776c65604442bf09b1765Timo Sirainen struct fts_user_language *data_lang;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen ARRAY_TYPE(fts_user_language) languages;
fcca16701767c6b92227a9ee125de69d257882f6Timo Sirainen};
fcca16701767c6b92227a9ee125de69d257882f6Timo Sirainen
fcca16701767c6b92227a9ee125de69d257882f6Timo Sirainenstatic MODULE_CONTEXT_DEFINE_INIT(fts_user_module,
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen &mail_user_module_register);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstatic const char *const *str_keyvalues_to_array(const char *str)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen{
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const char *key, *value, *const *keyvalues;
6967fa47dde9f2726bd86019a50627dacf2d7509Timo Sirainen ARRAY_TYPE(const_string) arr;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen unsigned int i;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
e2ce8d4a6ac5d82a906178148453e7613fab9ba0Timo Sirainen if (str == NULL)
e2ce8d4a6ac5d82a906178148453e7613fab9ba0Timo Sirainen return NULL;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen t_array_init(&arr, 8);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen keyvalues = t_strsplit_spaces(str, " ");
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen for (i = 0; keyvalues[i] != NULL; i++) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen value = strchr(keyvalues[i], '=');
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (value != NULL)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen key = t_strdup_until(keyvalues[i], value++);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen else {
a6f281d078ed03d555802c1a8e15fefce80132dcTimo Sirainen key = keyvalues[i];
a6f281d078ed03d555802c1a8e15fefce80132dcTimo Sirainen value = "";
a6f281d078ed03d555802c1a8e15fefce80132dcTimo Sirainen }
a6f281d078ed03d555802c1a8e15fefce80132dcTimo Sirainen array_append(&arr, &key, 1);
a6f281d078ed03d555802c1a8e15fefce80132dcTimo Sirainen array_append(&arr, &value, 1);
a6f281d078ed03d555802c1a8e15fefce80132dcTimo Sirainen }
a6f281d078ed03d555802c1a8e15fefce80132dcTimo Sirainen array_append_zero(&arr);
a6f281d078ed03d555802c1a8e15fefce80132dcTimo Sirainen return array_idx(&arr, 0);
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen}
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstatic int
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenfts_user_init_languages(struct mail_user *user, struct fts_user *fuser,
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen const char **error_r)
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen{
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const char *languages, *unknown;
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen const char *lang_config[3] = {NULL, NULL, NULL};
a6f281d078ed03d555802c1a8e15fefce80132dcTimo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen languages = mail_user_plugin_getenv(user, "fts_languages");
6967fa47dde9f2726bd86019a50627dacf2d7509Timo Sirainen if (languages == NULL) {
6967fa47dde9f2726bd86019a50627dacf2d7509Timo Sirainen *error_r = "fts_languages setting is missing";
fcca16701767c6b92227a9ee125de69d257882f6Timo Sirainen return -1;
c1d4780bc0c9017e8e5d366b81e4fad31174c0adTimo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen lang_config[1] = mail_user_plugin_getenv(user, "fts_language_config");
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen fuser->lang_list = fts_language_list_init(lang_config);
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen if (lang_config[1] != NULL)
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen lang_config[0] = "fts_language_config";
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (!fts_language_list_add_names(fuser->lang_list, languages, &unknown)) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen *error_r = t_strdup_printf(
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen "fts_languages: Unknown language '%s'", unknown);
597dba3488c648ffb375ee4a552bd52ac4346979Timo Sirainen return -1;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (array_count(fts_language_list_get_all(fuser->lang_list)) == 0) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen *error_r = "fts_languages setting is empty";
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen return -1;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen return 0;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen}
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainenstatic int
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenfts_user_create_filters(struct mail_user *user, const struct fts_language *lang,
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen struct fts_filter **filter_r, const char **error_r)
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen{
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const struct fts_filter *filter_class;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_filter *filter = NULL, *parent = NULL;
fcca16701767c6b92227a9ee125de69d257882f6Timo Sirainen const char *filters_key, *const *filters, *filter_set_name;
fcca16701767c6b92227a9ee125de69d257882f6Timo Sirainen const char *str, *error, *set_key;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen unsigned int i;
fcca16701767c6b92227a9ee125de69d257882f6Timo Sirainen int ret = 0;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen /* try to get the language-specific filters first */
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen filters_key = t_strconcat("fts_filters_", lang->name, NULL);
9e406b04bb5bed7d73aeed375c40c6a3fea1a2cbTimo Sirainen str = mail_user_plugin_getenv(user, filters_key);
4307c886579381dbb1897ea1388ae6978c96f560Timo Sirainen if (str == NULL) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen /* fallback to global filters */
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen filters_key = "fts_filters";
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen str = mail_user_plugin_getenv(user, filters_key);
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen if (str == NULL) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen str = FTS_DEFAULT_FILTERS;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen filters_key = "fts_filters(built-in default)";
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen }
bace943c67e6cd14ce6c994f533d82a3caad5bf1Timo Sirainen }
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen filters = t_strsplit_spaces(str, " ");
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen for (i = 0; filters[i] != NULL; i++) {
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen filter_class = fts_filter_find(filters[i]);
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen if (filter_class == NULL) {
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen *error_r = t_strdup_printf("%s: Unknown filter '%s'",
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen filters_key, filters[i]);
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen ret = -1;
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen break;
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen }
8cb72c59d5ea4e9e5f638d7ec840bb853f5a188eTimo Sirainen
8cb72c59d5ea4e9e5f638d7ec840bb853f5a188eTimo Sirainen /* try the language-specific setting first */
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen filter_set_name = t_str_replace(filters[i], '-', '_');
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen set_key = t_strdup_printf("fts_filters_%s_%s",
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen lang->name, filter_set_name);
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen str = mail_user_plugin_getenv(user, set_key);
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen if (str == NULL) {
7bcb308d0e13dfa48b483b0addccd889a77bb598Timo Sirainen set_key = t_strdup_printf("fts_filters_%s", filter_set_name);
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen str = mail_user_plugin_getenv(user, set_key);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (fts_filter_create(filter_class, parent, lang,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen str_keyvalues_to_array(str),
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen &filter, &error) < 0) {
5069adb2f5b3609fff9a0a705c6edeae56e0030aTimo Sirainen *error_r = t_strdup_printf(
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen "Filter '%s' init via settings '%s' failed: %s",
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen filters[i], set_key, error);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen ret = -1;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen break;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (parent != NULL)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen fts_filter_unref(&parent);
8d56f3334e22619abf56833d290bb1f49ac6722cTimo Sirainen parent = filter;
8d56f3334e22619abf56833d290bb1f49ac6722cTimo Sirainen }
2c42748505ef4aed83ff59b34e50ed5606900c86Timo Sirainen if (ret < 0) {
2c42748505ef4aed83ff59b34e50ed5606900c86Timo Sirainen if (parent != NULL)
2c42748505ef4aed83ff59b34e50ed5606900c86Timo Sirainen fts_filter_unref(&parent);
2c42748505ef4aed83ff59b34e50ed5606900c86Timo Sirainen return -1;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen *filter_r = filter;
156736910057b280cb9999d4c6c7221c4c80f5c2Timo Sirainen return 0;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen}
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstatic int
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenfts_user_create_tokenizer(struct mail_user *user,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_tokenizer **tokenizer_r, bool search,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const char **error_r)
d10cb4d7a80571af21f776c65604442bf09b1765Timo Sirainen{
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const struct fts_tokenizer *tokenizer_class;
81b1d14891415fef0c2f37ef1ef3680cdcc600f1Timo Sirainen struct fts_tokenizer *tokenizer = NULL, *parent = NULL;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const char *tokenizers_key, *const *tokenizers, *tokenizer_set_name;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const char *str, *error, *set_key;
5069adb2f5b3609fff9a0a705c6edeae56e0030aTimo Sirainen unsigned int i;
5069adb2f5b3609fff9a0a705c6edeae56e0030aTimo Sirainen int ret = 0;
3785910c303507db5f629684e6dde2cc7f83668eTimo Sirainen
5069adb2f5b3609fff9a0a705c6edeae56e0030aTimo Sirainen tokenizers_key = "fts_tokenizers";
5069adb2f5b3609fff9a0a705c6edeae56e0030aTimo Sirainen str = mail_user_plugin_getenv(user, tokenizers_key);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (str == NULL)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen str = FTS_DEFAULT_TOKENIZERS;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen tokenizers = t_strsplit_spaces(str, " ");
c09f9f95db314e7482c95e502e1c56ed6c555797Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen for (i = 0; tokenizers[i] != NULL; i++) {
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen tokenizer_class = fts_tokenizer_find(tokenizers[i]);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (tokenizer_class == NULL) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen *error_r = t_strdup_printf("%s: Unknown tokenizer '%s'",
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen tokenizers_key, tokenizers[i]);
f2b95f63ebdf77dba4dac938cf8c65c839f1067dTimo Sirainen ret = -1;
f2b95f63ebdf77dba4dac938cf8c65c839f1067dTimo Sirainen break;
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen tokenizer_set_name = t_str_replace(tokenizers[i], '-', '_');
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen set_key = t_strdup_printf("fts_tokenizers_%s", tokenizer_set_name);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen str = mail_user_plugin_getenv(user, set_key);
3785910c303507db5f629684e6dde2cc7f83668eTimo Sirainen
3785910c303507db5f629684e6dde2cc7f83668eTimo Sirainen /* tell the tokenizers that we're tokenizing a search string
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen (instead of tokenizing indexed data) */
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen if (search)
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen str = t_strconcat("search=yes ", str, NULL);
8d56f3334e22619abf56833d290bb1f49ac6722cTimo Sirainen
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen if (fts_tokenizer_create(tokenizer_class, parent,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen str_keyvalues_to_array(str),
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen &tokenizer, &error) < 0) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen *error_r = t_strdup_printf(
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen "Tokenizer '%s' init via settings '%s' failed: %s",
5297aa3ceddf3a4ecc09f49c832bc424eff8f906Timo Sirainen tokenizers[i], set_key, error);
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen ret = -1;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen break;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (parent != NULL)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen fts_tokenizer_unref(&parent);
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen parent = tokenizer;
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (ret < 0) {
c09f9f95db314e7482c95e502e1c56ed6c555797Timo Sirainen if (parent != NULL)
c09f9f95db314e7482c95e502e1c56ed6c555797Timo Sirainen fts_tokenizer_unref(&parent);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen return -1;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen *tokenizer_r = tokenizer;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen return 0;
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen}
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainenstatic int fts_user_init_tokenizers(struct mail_user *user,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen struct fts_user *fuser,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen const char **error_r)
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen{
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen if (fts_user_create_tokenizer(user, &fuser->index_tokenizer, FALSE,
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen error_r) < 0)
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen return -1;
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen if (fts_user_create_tokenizer(user, &fuser->search_tokenizer, TRUE,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen error_r) < 0)
9e406b04bb5bed7d73aeed375c40c6a3fea1a2cbTimo Sirainen return -1;
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen return 0;
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen}
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen
4307c886579381dbb1897ea1388ae6978c96f560Timo Sirainenstruct fts_user_language *
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainenfts_user_language_find(struct mail_user *user,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen const struct fts_language *lang)
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen{
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen struct fts_user_language *const *user_langp;
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen array_foreach(&fuser->languages, user_langp) {
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (strcmp((*user_langp)->lang->name, lang->name) == 0)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen return *user_langp;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen }
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen return NULL;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen}
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
f2b95f63ebdf77dba4dac938cf8c65c839f1067dTimo Sirainenstruct fts_tokenizer *fts_user_get_index_tokenizer(struct mail_user *user)
f2b95f63ebdf77dba4dac938cf8c65c839f1067dTimo Sirainen{
f2b95f63ebdf77dba4dac938cf8c65c839f1067dTimo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
f2b95f63ebdf77dba4dac938cf8c65c839f1067dTimo Sirainen
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen return fuser->index_tokenizer;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen}
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstruct fts_tokenizer *fts_user_get_search_tokenizer(struct mail_user *user)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen{
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen return fuser->search_tokenizer;
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen}
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstatic int fts_user_language_create(struct mail_user *user,
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen struct fts_user *fuser,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const struct fts_language *lang,
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen const char **error_r)
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen{
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen struct fts_filter *filter;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_user_language *user_lang;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (fts_user_create_filters(user, lang, &filter, error_r) < 0)
8451cf67733f6633510f6619301474be349c5035Timo Sirainen return -1;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen user_lang = p_new(user->pool, struct fts_user_language, 1);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen user_lang->lang = lang;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen user_lang->filter = filter;
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen array_append(&fuser->languages, &user_lang, 1);
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen return 0;
9e406b04bb5bed7d73aeed375c40c6a3fea1a2cbTimo Sirainen}
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainenstatic int fts_user_languages_fill_all(struct mail_user *user,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen struct fts_user *fuser,
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen const char **error_r)
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen{
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen const struct fts_language *const *langp;
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen array_foreach(fts_language_list_get_all(fuser->lang_list), langp) {
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen if (fts_user_language_create(user, fuser, *langp, error_r) < 0)
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen return -1;
34830cefe1757de0ffca67acdc529d5bc8b06b66Timo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen return 0;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen}
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstruct fts_language_list *fts_user_get_language_list(struct mail_user *user)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen{
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen return fuser->lang_list;
1b9aae1cb53708b5a3d861b4db6ba96ac2eb35b4Timo Sirainen}
1b9aae1cb53708b5a3d861b4db6ba96ac2eb35b4Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenconst ARRAY_TYPE(fts_user_language) *
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainenfts_user_get_all_languages(struct mail_user *user)
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen{
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen return &fuser->languages;
4da70fe8c9cb6e57b36103d78ab1e9c8654f76d9Timo Sirainen}
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstruct fts_user_language *fts_user_get_data_lang(struct mail_user *user)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen{
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
01f4ee4a0243f3fe9af763e1a540cd5cff0d63f5Timo Sirainen struct fts_user_language *lang;
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen const char *error;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (fuser->data_lang != NULL)
01f4ee4a0243f3fe9af763e1a540cd5cff0d63f5Timo Sirainen return fuser->data_lang;
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen lang = p_new(user->pool, struct fts_user_language, 1);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen lang->lang = &fts_language_data;
d10cb4d7a80571af21f776c65604442bf09b1765Timo Sirainen
d10cb4d7a80571af21f776c65604442bf09b1765Timo Sirainen if (fts_filter_create(fts_filter_lowercase, NULL, lang->lang, NULL,
d10cb4d7a80571af21f776c65604442bf09b1765Timo Sirainen &lang->filter, &error) < 0)
01f4ee4a0243f3fe9af763e1a540cd5cff0d63f5Timo Sirainen i_unreached();
d10cb4d7a80571af21f776c65604442bf09b1765Timo Sirainen i_assert(lang->filter != NULL);
d10cb4d7a80571af21f776c65604442bf09b1765Timo Sirainen fuser->data_lang = lang;
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen return fuser->data_lang;
8255a22cccf3b0ccf38206c594941820ac1c9e00Timo Sirainen}
8255a22cccf3b0ccf38206c594941820ac1c9e00Timo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainenstatic void fts_user_free(struct fts_user *fuser)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen{
1fb8ce8b21d0616796ced699b1573b5dd0b61793Timo Sirainen struct fts_user_language *const *user_langp;
1fb8ce8b21d0616796ced699b1573b5dd0b61793Timo Sirainen
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen if (fuser->lang_list != NULL)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen fts_language_list_deinit(&fuser->lang_list);
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen array_foreach(&fuser->languages, user_langp) {
9ec30d84a736a2d0726b600213dcf630ff28bdebTimo Sirainen if ((*user_langp)->filter != NULL)
9ec30d84a736a2d0726b600213dcf630ff28bdebTimo Sirainen fts_filter_unref(&(*user_langp)->filter);
9ec30d84a736a2d0726b600213dcf630ff28bdebTimo Sirainen }
7efee0bb408b0d5253e41997857bdda57855cdc7Timo Sirainen if (fuser->data_lang != NULL && fuser->data_lang->filter != NULL)
1fb8ce8b21d0616796ced699b1573b5dd0b61793Timo Sirainen fts_filter_unref(&fuser->data_lang->filter);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen
a6f281d078ed03d555802c1a8e15fefce80132dcTimo Sirainen if (fuser->index_tokenizer != NULL)
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen fts_tokenizer_unref(&fuser->index_tokenizer);
2a8b891366a3fc69524c2bb07f68d42c16223a56Timo Sirainen if (fuser->search_tokenizer != NULL)
2a8b891366a3fc69524c2bb07f68d42c16223a56Timo Sirainen fts_tokenizer_unref(&fuser->search_tokenizer);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen}
8255a22cccf3b0ccf38206c594941820ac1c9e00Timo Sirainen
8255a22cccf3b0ccf38206c594941820ac1c9e00Timo Sirainenint fts_mail_user_init(struct mail_user *user, const char **error_r)
01f4ee4a0243f3fe9af763e1a540cd5cff0d63f5Timo Sirainen{
01f4ee4a0243f3fe9af763e1a540cd5cff0d63f5Timo Sirainen struct fts_user *fuser;
01f4ee4a0243f3fe9af763e1a540cd5cff0d63f5Timo Sirainen
8255a22cccf3b0ccf38206c594941820ac1c9e00Timo Sirainen fuser = p_new(user->pool, struct fts_user, 1);
8255a22cccf3b0ccf38206c594941820ac1c9e00Timo Sirainen p_array_init(&fuser->languages, user->pool, 4);
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (fts_user_init_languages(user, fuser, error_r) < 0) {
5702c81e2d788449c3bc207eb9c19e539458ad9eTimo Sirainen fts_user_free(fuser);
8255a22cccf3b0ccf38206c594941820ac1c9e00Timo Sirainen return -1;
01f4ee4a0243f3fe9af763e1a540cd5cff0d63f5Timo Sirainen }
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen if (fts_user_languages_fill_all(user, fuser, error_r) < 0 ||
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen fts_user_init_tokenizers(user, fuser, error_r) < 0) {
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen fts_user_free(fuser);
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen return -1;
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen }
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen MODULE_CONTEXT_SET(user, fts_user_module, fuser);
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen return 0;
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen}
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainenvoid fts_mail_user_deinit(struct mail_user *user)
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen{
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen struct fts_user *fuser = FTS_USER_CONTEXT(user);
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen if (fuser != NULL)
be51dfea768ad502e08ebd02917138f7a0f8f625Timo Sirainen fts_user_free(fuser);
2201e2cc1b3f744dac61c2bf8095bcb6b5719540Timo Sirainen}
4d33a3133e8484ebd00f677f457cda82f1365b84Timo Sirainen