fts-search-args.c revision f784d5bb8edbec88829524135cfa100129f5384d
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny/* Copyright (c) 2015-2017 Dovecot authors, see the included COPYING file */
f26c954658dfd7461f290f0b5d924951a6db219aJan Zelenystatic void strings_deduplicate(ARRAY_TYPE(const_string) *arr)
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny const char *const *strings;
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny unsigned int i, count;
fae99bfe4bfc8b4a12e9c2a0ad01b3684c22f934Simo Sorcefts_search_arg_create_or(const struct mail_search_arg *orig_arg, pool_t pool,
d81d8d3dc151ebc95cd0e3f3b14c1cdaa48980f1Sumit Bose const char *const *tokenp;
c03214d427ea43e7bf8255ccc79faa905c89f7f6Jakub Hrozek /* create the OR arg first as the parent */
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny /* now create all the child args for the OR */
108db0e3b9e06e530364ef8228634f5e3f6bd3b5Jakub Hrozek arg->match_not = FALSE; /* we copied this to the root OR */
f26c954658dfd7461f290f0b5d924951a6db219aJan Zelenyfts_backend_dovecot_expand_tokens(struct fts_filter *filter,
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny const char **error_r)
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny /* first add the word exactly as it without any tokenization */
c03214d427ea43e7bf8255ccc79faa905c89f7f6Jakub Hrozek /* then add it tokenized, but without filtering */
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny /* add the word filtered */
83bf46f4066e3d5e838a32357c201de9bd6ecdfdNikolai Kondrashov ret = fts_filter_filter(filter, &token2, &error);
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny } else if (ret < 0) {
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny *error_r = t_strdup_printf("Couldn't filter search token: %s", error);
21d485184df986e1a123f70c689517386e51a5ceMichal Zidek /* The filter dropped the token, which means it was
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny never even indexed. Ignore this word entirely in the
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny search query. */
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny arg = fts_search_arg_create_or(orig_arg, pool, &tokens);
21d485184df986e1a123f70c689517386e51a5ceMichal Zidekfts_backend_dovecot_tokenize_lang(struct fts_user_language *user_lang,
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov const char *orig_token, const char **error_r)
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny /* we want all the tokens found from the string to be found, so create
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny a parent AND and place all the filtered token alternatives under
bfd59d1a2d0d45125e5164ef12c425690d519f61Jakub Hrozek and_arg = p_new(pool, struct mail_search_arg, 1);
21d485184df986e1a123f70c689517386e51a5ceMichal Zidek /* reset tokenizer between search args in case there's any state left
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny from some previous failure */
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny fts_tokenizer_reset(user_lang->search_tokenizer);
bfd59d1a2d0d45125e5164ef12c425690d519f61Jakub Hrozek while ((ret = fts_tokenizer_next(user_lang->search_tokenizer,
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny (const void *)orig_token,
75dd4b05e1dacc76dc9d5f16be31978f84a71dc5Sumit Bose if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool,
75dd4b05e1dacc76dc9d5f16be31978f84a71dc5Sumit Bose while (ret >= 0 &&
58d02e0d3d6d48c97fccdb2ad7212e065671ad6dStephen Gallagher (ret = fts_tokenizer_final(user_lang->search_tokenizer, &token, &error)) > 0) {
75dd4b05e1dacc76dc9d5f16be31978f84a71dc5Sumit Bose if (fts_backend_dovecot_expand_tokens(user_lang->filter, pool,
75dd4b05e1dacc76dc9d5f16be31978f84a71dc5Sumit Bose *error_r = t_strdup_printf("Couldn't tokenize search args: %s", error);
58d02e0d3d6d48c97fccdb2ad7212e065671ad6dStephen Gallagher /* nothing was actually expanded, remove the empty and_arg */
8be5e4497e5008f7807178acdfcbf97365ec4e73Stephen Gallagherstatic int fts_search_arg_expand(struct fts_backend *backend, pool_t pool,
c20a339d54b39120b4051f690ca759e6d079f177Stephen Gallagher const ARRAY_TYPE(fts_user_language) *languages;
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov struct mail_search_arg *or_arg, *orig_arg = *argp;
c20a339d54b39120b4051f690ca759e6d079f177Stephen Gallagher const char *error, *orig_token = orig_arg->value.str;
c20a339d54b39120b4051f690ca759e6d079f177Stephen Gallagher !fts_header_has_language((*argp)->hdr_field_name)) {
c20a339d54b39120b4051f690ca759e6d079f177Stephen Gallagher /* use only the data-language */
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov languages = fts_user_get_data_languages(backend->ns->user);
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov languages = fts_user_get_all_languages(backend->ns->user);
58d02e0d3d6d48c97fccdb2ad7212e065671ad6dStephen Gallagher /* OR together all the different expansions for different languages.
58d02e0d3d6d48c97fccdb2ad7212e065671ad6dStephen Gallagher it's enough for one of them to match. */
58d02e0d3d6d48c97fccdb2ad7212e065671ad6dStephen Gallagher or_arg = p_new(pool, struct mail_search_arg, 1);
4a9c1047354dbe5a4ed41e5951ae623e3772e113René Genz if (fts_backend_dovecot_tokenize_lang(*langp, pool, or_arg,
58d02e0d3d6d48c97fccdb2ad7212e065671ad6dStephen Gallagher /* we couldn't parse any tokens from the input */
f26c954658dfd7461f290f0b5d924951a6db219aJan Zelenyfts_search_args_expand_tree(struct fts_backend *backend, pool_t pool,
56e88cd5f3501566778b138e4934ee8e7f3fa674Lukas Slebodnik if (fts_search_args_expand_tree(backend, pool,
108db0e3b9e06e530364ef8228634f5e3f6bd3b5Jakub Hrozek /* we're testing for the existence of
108db0e3b9e06e530364ef8228634f5e3f6bd3b5Jakub Hrozek the header */
108db0e3b9e06e530364ef8228634f5e3f6bd3b5Jakub Hrozek /* fall through */
b9fbeb75e7a4f50f98d979a70a710f9221892483Lukas Slebodnik ret = fts_search_arg_expand(backend, pool, argp);
108db0e3b9e06e530364ef8228634f5e3f6bd3b5Jakub Hrozekint fts_search_args_expand(struct fts_backend *backend,
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny struct mail_search_arg *args_dup, *orig_args = args->args;
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny /* don't keep re-expanding every time the search args are used.
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny this is especially important to avoid an assert-crash in
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny index_search_result_update_flags(). */
21d485184df986e1a123f70c689517386e51a5ceMichal Zidek /* duplicate the args, so if expansion fails we haven't changed
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny args_dup = mail_search_arg_dup(args->pool, args->args);
f26c954658dfd7461f290f0b5d924951a6db219aJan Zeleny if (fts_search_args_expand_tree(backend, args->pool, &args_dup) < 0)
21d485184df986e1a123f70c689517386e51a5ceMichal Zidek /* we'll need to re-simplify the args if we changed anything */
21d485184df986e1a123f70c689517386e51a5ceMichal Zidek /* duplicated args aren't initialized */