bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2014-2018 Dovecot authors, see the included COPYING file */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "lib.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "array.h"
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen#include "str.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "fts-language.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "fts-filter-private.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
7a0d7177394b790cba1ff64417df42cb83bfad12Timo Sirainen#ifdef HAVE_LIBICU
7a0d7177394b790cba1ff64417df42cb83bfad12Timo Sirainen# include "fts-icu.h"
7a0d7177394b790cba1ff64417df42cb83bfad12Timo Sirainen#endif
7a0d7177394b790cba1ff64417df42cb83bfad12Timo Sirainen
1ef271d30011acf79e8e2a42b7b9baae8a2264d3Timo Sirainenstatic ARRAY(const struct fts_filter *) fts_filter_classes;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_filters_init(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_array_init(&fts_filter_classes, FTS_FILTER_CLASSES_NR);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_register(fts_filter_stopwords);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_register(fts_filter_stemmer_snowball);
63713f16bad8b55e74c479adb6b47965b519c29bTimo Sirainen fts_filter_register(fts_filter_normalizer_icu);
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen fts_filter_register(fts_filter_lowercase);
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen fts_filter_register(fts_filter_english_possessive);
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila fts_filter_register(fts_filter_contractions);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_filters_deinit(void)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
093e68f3c0669886b0a351d0fda5f72d86e7f89eTeemu Huovila#ifdef HAVE_LIBICU
d3acad538059ba27f269d390516be7e9fb44294fTimo Sirainen fts_icu_deinit();
093e68f3c0669886b0a351d0fda5f72d86e7f89eTeemu Huovila#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen array_free(&fts_filter_classes);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_filter_register(const struct fts_filter *filter_class)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_assert(fts_filter_find(filter_class->class_name) == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
1ef271d30011acf79e8e2a42b7b9baae8a2264d3Timo Sirainen array_append(&fts_filter_classes, &filter_class, 1);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst struct fts_filter *fts_filter_find(const char *name)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
1ef271d30011acf79e8e2a42b7b9baae8a2264d3Timo Sirainen const struct fts_filter *const *fp = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen array_foreach(&fts_filter_classes, fp) {
1ef271d30011acf79e8e2a42b7b9baae8a2264d3Timo Sirainen if (strcmp((*fp)->class_name, name) == 0)
1ef271d30011acf79e8e2a42b7b9baae8a2264d3Timo Sirainen return *fp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenint fts_filter_create(const struct fts_filter *filter_class,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *parent,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language *lang,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *const *settings,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter **filter_r,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char **error_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *fp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *empty_settings = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_assert(settings == NULL || str_array_length(settings) % 2 == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (settings == NULL)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen settings = &empty_settings;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
be7085921228e1c1333dbf7aa2daca41115c3f23Timo Sirainen if (filter_class->v.create != NULL) {
be7085921228e1c1333dbf7aa2daca41115c3f23Timo Sirainen if (filter_class->v.create(lang, settings, &fp, error_r) < 0) {
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen *filter_r = NULL;
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen return -1;
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen }
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen } else {
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen /* default implementation */
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen if (settings[0] != NULL) {
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen *error_r = t_strdup_printf("Unknown setting: %s", settings[0]);
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen return -1;
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen }
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen fp = i_new(struct fts_filter, 1);
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen *fp = *filter_class;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fp->refcount = 1;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fp->parent = parent;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (parent != NULL) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_ref(parent);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen *filter_r = fp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return 0;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_filter_ref(struct fts_filter *fp)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_assert(fp->refcount > 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fp->refcount++;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_filter_unref(struct fts_filter **_fpp)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *fp = *_fpp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_assert(fp->refcount > 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen *_fpp = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (--fp->refcount > 0)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (fp->parent != NULL)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_filter_unref(&fp->parent);
be7085921228e1c1333dbf7aa2daca41115c3f23Timo Sirainen if (fp->v.destroy != NULL)
be7085921228e1c1333dbf7aa2daca41115c3f23Timo Sirainen fp->v.destroy(fp);
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen else {
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen /* default destroy implementation */
e9fbe5e18b798728041b7e2ffc6c4fa964fc35a3Josef 'Jeff' Sipek str_free(&fp->token);
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen i_free(fp);
bc180e646844ad8ad8d0228016389434433fd405Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
2bb1ef0b669901fb91ff961e7fb074439ef769abTimo Sirainenint fts_filter_filter(struct fts_filter *filter, const char **token,
2bb1ef0b669901fb91ff961e7fb074439ef769abTimo Sirainen const char **error_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila int ret = 0;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
4d44a03eb1b6f60fc9320b6ed47b3a9593a75f75Timo Sirainen i_assert((*token)[0] != '\0');
4d44a03eb1b6f60fc9320b6ed47b3a9593a75f75Timo Sirainen
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila /* Recurse to parent. */
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila if (filter->parent != NULL)
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila ret = fts_filter_filter(filter->parent, token, error_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
1d0f568e26ce5cbf18cd7bb335c6eea20a7e3770Teemu Huovila /* Parent returned token or no parent. */
6111370718708a3a966c60a1941ea303a4e0c759Timo Sirainen if (ret > 0 || filter->parent == NULL)
be7085921228e1c1333dbf7aa2daca41115c3f23Timo Sirainen ret = filter->v.filter(filter, token, error_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
6111370718708a3a966c60a1941ea303a4e0c759Timo Sirainen if (ret <= 0)
6111370718708a3a966c60a1941ea303a4e0c759Timo Sirainen *token = NULL;
9bbc62421e4000b04563eb5f93272fc51b893fb2Timo Sirainen else {
b58ad1c4a4ef561b0081d6daa47c2dd7b223aeacTimo Sirainen i_assert(*token != NULL);
9bbc62421e4000b04563eb5f93272fc51b893fb2Timo Sirainen i_assert((*token)[0] != '\0');
9bbc62421e4000b04563eb5f93272fc51b893fb2Timo Sirainen }
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila return ret;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}