fts-search.c revision c8b5a21a139992e66b4ad02adb69eaf929b3d024
76b43e4417bab52e913da39b5f5bc2a130d3f149Timo Sirainen/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen#include "lib.h"
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen#include "array.h"
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen#include "str.h"
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen#include "seq-range-array.h"
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen#include "charset-utf8.h"
00d58fcfe8191d6ce7efa801d289a5c0fe88d1aeTimo Sirainen#include "mail-search.h"
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen#include "mail-storage-private.h"
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen#include "fts-api-private.h"
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen#include "fts-storage.h"
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainenstatic void
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainenuid_range_to_seqs(struct mailbox *box, const ARRAY_TYPE(seq_range) *uid_range,
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen ARRAY_TYPE(seq_range) *seq_range)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen{
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen const struct seq_range *range;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen unsigned int i, count;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen uint32_t seq1, seq2;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen range = array_get(uid_range, &count);
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen i_array_init(seq_range, count);
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen for (i = 0; i < count; i++) {
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen mailbox_get_uids(box, range[i].seq1, range[i].seq2,
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen &seq1, &seq2);
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen if (seq1 != 0)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen seq_range_array_add_range(seq_range, seq1, seq2);
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen }
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen}
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainenstatic void fts_uid_results_to_seq(struct fts_search_context *fctx)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen{
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen ARRAY_TYPE(seq_range) uid_range;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen uid_range = fctx->definite_seqs;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen uid_range_to_seqs(fctx->t->box, &uid_range, &fctx->definite_seqs);
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen array_free(&uid_range);
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen uid_range = fctx->maybe_seqs;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen uid_range_to_seqs(fctx->t->box, &uid_range, &fctx->maybe_seqs);
f6c1297c26b355c4aec2a08978f51ec3efecb351Timo Sirainen array_free(&uid_range);
f6c1297c26b355c4aec2a08978f51ec3efecb351Timo Sirainen}
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainenstatic int fts_search_lookup_arg(struct fts_search_context *fctx,
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen struct mail_search_arg *arg, bool filter)
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen{
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen struct fts_backend *backend;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen enum fts_lookup_flags flags = 0;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen const char *key;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen string_t *key_utf8;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen enum charset_result result;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen int ret;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen switch (arg->type) {
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen case SEARCH_HEADER:
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen /* we can filter out messages that don't have the header,
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen but we can't trust definite results list. */
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen flags = FTS_LOOKUP_FLAG_HEADER;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen backend = fctx->fbox->backend_substr;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen key = arg->value.str;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen if (*key == '\0') {
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen /* we're only checking the existence
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen of the header. */
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen key = arg->hdr_field_name;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen }
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen break;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen case SEARCH_TEXT:
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen case SEARCH_TEXT_FAST:
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen flags = FTS_LOOKUP_FLAG_HEADER;
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen case SEARCH_BODY:
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen case SEARCH_BODY_FAST:
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen flags |= FTS_LOOKUP_FLAG_BODY;
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen key = arg->value.str;
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen backend = fctx->fbox->backend_fast != NULL &&
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen (arg->type == SEARCH_TEXT_FAST ||
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen arg->type == SEARCH_BODY_FAST) ?
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen fctx->fbox->backend_fast : fctx->fbox->backend_substr;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen break;
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen default:
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen /* can't filter this */
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen i_assert(filter);
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen return 0;
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen }
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen if (arg->not)
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen flags |= FTS_LOOKUP_FLAG_INVERT;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen /* convert key to titlecase */
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen key_utf8 = t_str_new(128);
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen if (charset_to_utf8_str(fctx->charset, CHARSET_FLAG_DECOMP_TITLECASE,
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen key, key_utf8, &result) < 0) {
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen /* unknown charset, can't handle this */
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen ret = 0;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen } else if (result != CHARSET_RET_OK) {
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen /* let the core code handle this error */
b04e691711fd026fc82ba3e0b411420e7da4ec7eTimo Sirainen ret = 0;
4d2211dac61c615c5bdfd501ea54d46c89d41b0fTimo Sirainen } else if (!backend->locked && fts_backend_lock(backend) <= 0)
4d2211dac61c615c5bdfd501ea54d46c89d41b0fTimo Sirainen ret = -1;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen else if (!filter) {
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen ret = fts_backend_lookup(backend, str_c(key_utf8), flags,
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen &fctx->definite_seqs,
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen &fctx->maybe_seqs);
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen } else {
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen ret = fts_backend_filter(backend, str_c(key_utf8), flags,
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen &fctx->definite_seqs,
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen &fctx->maybe_seqs);
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen }
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen return ret;
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen}
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainenvoid fts_search_lookup(struct fts_search_context *fctx)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen{
4d2211dac61c615c5bdfd501ea54d46c89d41b0fTimo Sirainen struct mail_search_arg *arg;
63f36c2b47217fc2dc4ed49cfc1907311d5ed366Timo Sirainen int ret;
b04e691711fd026fc82ba3e0b411420e7da4ec7eTimo Sirainen
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen if (fctx->best_arg == NULL)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen return;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen i_array_init(&fctx->definite_seqs, 64);
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen i_array_init(&fctx->maybe_seqs, 64);
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen /* start filtering with the best arg */
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen T_BEGIN {
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen ret = fts_search_lookup_arg(fctx, fctx->best_arg, FALSE);
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen } T_END;
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen /* filter the rest */
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen for (arg = fctx->args; arg != NULL && ret == 0; arg = arg->next) {
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen if (arg != fctx->best_arg) {
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen T_BEGIN {
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen ret = fts_search_lookup_arg(fctx, arg, TRUE);
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen } T_END;
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen }
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen }
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen if (fctx->fbox->backend_fast != NULL &&
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen fctx->fbox->backend_fast->locked)
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen fts_backend_unlock(fctx->fbox->backend_fast);
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen if (fctx->fbox->backend_substr != NULL &&
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen fctx->fbox->backend_substr->locked)
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen fts_backend_unlock(fctx->fbox->backend_substr);
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen if (ret == 0) {
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen fctx->seqs_set = TRUE;
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen fts_uid_results_to_seq(fctx);
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen }
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen}
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainenstatic bool arg_is_better(const struct mail_search_arg *new_arg,
19e8adccba16ff419f5675b1575358c2956dce83Timo Sirainen const struct mail_search_arg *old_arg)
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen{
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen if (old_arg == NULL)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen return TRUE;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen if (new_arg == NULL)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen return FALSE;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen /* avoid NOTs */
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen if (old_arg->not && !new_arg->not)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen return TRUE;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen if (!old_arg->not && new_arg->not)
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen return FALSE;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen /* prefer not to use headers. they have a larger possibility of
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen having lots of identical strings */
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen if (old_arg->type == SEARCH_HEADER)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen return TRUE;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen else if (new_arg->type == SEARCH_HEADER)
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen return FALSE;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen return strlen(new_arg->value.str) > strlen(old_arg->value.str);
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen}
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainenstatic void
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainenfts_search_args_find_best(struct mail_search_arg *args,
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen struct mail_search_arg **best_fast_arg,
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainen struct mail_search_arg **best_substr_arg)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen{
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen for (; args != NULL; args = args->next) {
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen switch (args->type) {
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen case SEARCH_BODY_FAST:
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen case SEARCH_TEXT_FAST:
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen if (arg_is_better(args, *best_fast_arg))
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen *best_fast_arg = args;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen break;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen case SEARCH_BODY:
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen case SEARCH_TEXT:
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen case SEARCH_HEADER:
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen if (arg_is_better(args, *best_substr_arg))
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen *best_substr_arg = args;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen break;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen default:
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen break;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen }
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen }
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen}
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainenvoid fts_search_analyze(struct fts_search_context *fctx)
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen{
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen struct mail_search_arg *best_fast_arg = NULL, *best_substr_arg = NULL;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen fts_search_args_find_best(fctx->args, &best_fast_arg, &best_substr_arg);
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen if (best_fast_arg != NULL && fctx->fbox->backend_fast != NULL) {
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen /* use fast backend whenever possible */
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen fctx->best_arg = best_fast_arg;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen fctx->build_backend = fctx->fbox->backend_fast;
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen } else if (best_fast_arg != NULL || best_substr_arg != NULL) {
cbc61fcb33b370d049c16a3c44568b4deb4e2b33Timo Sirainen fctx->build_backend = fctx->fbox->backend_substr;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen fctx->best_arg = arg_is_better(best_substr_arg, best_fast_arg) ?
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen best_substr_arg : best_fast_arg;
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen }
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen}
cf7164ece50797a67fc4bfb5889022ac93a36a8aTimo Sirainen