fts-backend-squat.c revision 5601c23c0d59376dfda22c7eb807c9e1a0870426
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen/* Copyright (c) 2006-2011 Dovecot authors, see the included COPYING file */
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include "lib.h"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include "array.h"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include "str.h"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include "unichar.h"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include "mail-user.h"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include "mail-namespace.h"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include "mail-storage-private.h"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include "mail-search-build.h"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include "squat-trie.h"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include "fts-squat-plugin.h"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen#include <stdlib.h>
7d6389e4053c2dac1fb37180b5756b00785983dcTimo Sirainen
7d6389e4053c2dac1fb37180b5756b00785983dcTimo Sirainen#define SQUAT_FILE_PREFIX "dovecot.index.search"
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen
e82af44fe25ca9b88210f313548dc08538e4a677Timo Sirainenstruct squat_fts_backend {
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen struct fts_backend backend;
10c5fd417af4ee30b68c967f5e7d5a49f4f149b5Timo Sirainen
10c5fd417af4ee30b68c967f5e7d5a49f4f149b5Timo Sirainen struct mailbox *box;
10c5fd417af4ee30b68c967f5e7d5a49f4f149b5Timo Sirainen struct squat_trie *trie;
10c5fd417af4ee30b68c967f5e7d5a49f4f149b5Timo Sirainen
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen unsigned int partial_len, full_len;
1f18053d463f0294387b5e4dd11f9010bda9a24eTimo Sirainen bool refresh;
1f18053d463f0294387b5e4dd11f9010bda9a24eTimo Sirainen};
1f18053d463f0294387b5e4dd11f9010bda9a24eTimo Sirainen
e82af44fe25ca9b88210f313548dc08538e4a677Timo Sirainenstruct squat_fts_backend_update_context {
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen struct fts_backend_update_context ctx;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen struct squat_trie_build_context *build_ctx;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen
c4457e497e01b57565d24da624968699b166e02aTimo Sirainen enum squat_index_type squat_type;
c4457e497e01b57565d24da624968699b166e02aTimo Sirainen uint32_t uid;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen string_t *hdr;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen bool failed;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen};
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainenstatic struct fts_backend *fts_backend_squat_alloc(void)
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen{
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen struct squat_fts_backend *backend;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen backend = i_new(struct squat_fts_backend, 1);
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen backend->backend = fts_backend_squat;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen return &backend->backend;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen}
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainenstatic int
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainenfts_backend_squat_init(struct fts_backend *_backend, const char **error_r)
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen{
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen struct squat_fts_backend *backend =
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen (struct squat_fts_backend *)_backend;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen const char *const *tmp, *env;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen unsigned int len;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen env = mail_user_plugin_getenv(_backend->ns->user, "fts_squat");
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen if (env == NULL)
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen return 0;
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen for (tmp = t_strsplit_spaces(env, " "); *tmp != NULL; tmp++) {
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen if (strncmp(*tmp, "partial=", 8) == 0) {
d1f0acc7fc722e13e8296228703adfe8a884d59eTimo Sirainen if (str_to_uint(*tmp + 8, &len) < 0 || len == 0) {
d1f0acc7fc722e13e8296228703adfe8a884d59eTimo Sirainen *error_r = t_strdup_printf(
965ed6ea3fc8f7637bd0d159d2fdb283a191ce34Timo Sirainen "Invalid partial length: %s", *tmp + 8);
return -1;
}
backend->partial_len = len;
} else if (strncmp(*tmp, "full=", 5) == 0) {
if (str_to_uint(*tmp + 5, &len) < 0 || len == 0) {
*error_r = t_strdup_printf(
"Invalid full length: %s", *tmp + 5);
return -1;
}
backend->full_len = len;
} else {
*error_r = t_strdup_printf("Invalid setting: %s", *tmp);
return -1;
}
}
return 0;
}
static void
fts_backend_squat_unset_box(struct squat_fts_backend *backend)
{
if (backend->trie != NULL)
squat_trie_deinit(&backend->trie);
backend->box = NULL;
}
static void fts_backend_squat_deinit(struct fts_backend *_backend)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)_backend;
fts_backend_squat_unset_box(backend);
i_free(backend);
}
static void
fts_backend_squat_set_box(struct squat_fts_backend *backend,
struct mailbox *box)
{
const struct mailbox_permissions *perm = mailbox_get_permissions(box);
struct mail_storage *storage;
struct mailbox_status status;
const char *path;
enum squat_index_flags flags = 0;
if (backend->box == box)
return;
fts_backend_squat_unset_box(backend);
storage = mailbox_get_storage(box);
path = mailbox_list_get_path(box->list, box->name,
MAILBOX_LIST_PATH_TYPE_INDEX);
i_assert(*path != '\0'); /* fts already checked this */
mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status);
if (storage->set->mmap_disable)
flags |= SQUAT_INDEX_FLAG_MMAP_DISABLE;
if (storage->set->mail_nfs_index)
flags |= SQUAT_INDEX_FLAG_NFS_FLUSH;
if (storage->set->dotlock_use_excl)
flags |= SQUAT_INDEX_FLAG_DOTLOCK_USE_EXCL;
backend->trie =
squat_trie_init(t_strconcat(path, "/"SQUAT_FILE_PREFIX, NULL),
status.uidvalidity,
storage->set->parsed_lock_method,
flags, perm->file_create_mode,
perm->file_create_gid);
if (backend->partial_len != 0)
squat_trie_set_partial_len(backend->trie, backend->partial_len);
if (backend->full_len != 0)
squat_trie_set_full_len(backend->trie, backend->full_len);
backend->box = box;
}
static int
fts_backend_squat_get_last_uid(struct fts_backend *_backend,
struct mailbox *box, uint32_t *last_uid_r)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)_backend;
fts_backend_squat_set_box(backend, box);
return squat_trie_get_last_uid(backend->trie, last_uid_r);
}
static struct fts_backend_update_context *
fts_backend_squat_update_init(struct fts_backend *_backend)
{
struct squat_fts_backend_update_context *ctx;
ctx = i_new(struct squat_fts_backend_update_context, 1);
ctx->ctx.backend = _backend;
ctx->hdr = str_new(default_pool, 1024*32);
return &ctx->ctx;
}
static int get_all_msg_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids)
{
struct mailbox_transaction_context *t;
struct mail_search_context *search_ctx;
struct mail_search_args *search_args;
struct mail *mail;
int ret;
t = mailbox_transaction_begin(box, 0);
search_args = mail_search_build_init();
mail_search_build_add_all(search_args);
search_ctx = mailbox_search_init(t, search_args, NULL, 0, NULL);
mail_search_args_unref(&search_args);
while (mailbox_search_next(search_ctx, &mail)) {
/* *2 because even/odd is for body/header */
seq_range_array_add_range(uids, mail->uid * 2,
mail->uid * 2 + 1);
}
ret = mailbox_search_deinit(&search_ctx);
(void)mailbox_transaction_commit(&t);
return ret;
}
static int
fts_backend_squat_update_uid_changed(struct squat_fts_backend_update_context *ctx)
{
int ret = 0;
if (ctx->uid == 0)
return 0;
if (squat_trie_build_more(ctx->build_ctx, ctx->uid,
SQUAT_INDEX_TYPE_HEADER,
str_data(ctx->hdr), str_len(ctx->hdr)) < 0)
ret = -1;
str_truncate(ctx->hdr, 0);
return ret;
}
static int
fts_backend_squat_build_deinit(struct squat_fts_backend_update_context *ctx)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)ctx->ctx.backend;
ARRAY_TYPE(seq_range) uids;
int ret = 0;
if (ctx->build_ctx == NULL)
return 0;
if (fts_backend_squat_update_uid_changed(ctx) < 0)
ret = -1;
i_array_init(&uids, 1024);
if (get_all_msg_uids(backend->box, &uids) < 0) {
(void)squat_trie_build_deinit(&ctx->build_ctx, NULL);
ret = -1;
} else {
seq_range_array_invert(&uids, 2, (uint32_t)-2);
if (squat_trie_build_deinit(&ctx->build_ctx, &uids) < 0)
ret = -1;
}
array_free(&uids);
return ret;
}
static int
fts_backend_squat_update_deinit(struct fts_backend_update_context *_ctx)
{
struct squat_fts_backend_update_context *ctx =
(struct squat_fts_backend_update_context *)_ctx;
int ret = ctx->failed ? -1 : 0;
if (fts_backend_squat_build_deinit(ctx) < 0)
ret = -1;
str_free(&ctx->hdr);
i_free(ctx);
return ret;
}
static void
fts_backend_squat_update_set_mailbox(struct fts_backend_update_context *_ctx,
struct mailbox *box)
{
struct squat_fts_backend_update_context *ctx =
(struct squat_fts_backend_update_context *)_ctx;
struct squat_fts_backend *backend =
(struct squat_fts_backend *)ctx->ctx.backend;
if (fts_backend_squat_build_deinit(ctx) < 0)
ctx->failed = TRUE;
fts_backend_squat_set_box(backend, box);
if (squat_trie_build_init(backend->trie, &ctx->build_ctx) < 0)
ctx->failed = TRUE;
}
static void
fts_backend_squat_update_expunge(struct fts_backend_update_context *_ctx ATTR_UNUSED,
uint32_t last_uid ATTR_UNUSED)
{
/* FIXME */
}
static bool
fts_backend_squat_update_set_build_key(struct fts_backend_update_context *_ctx,
const struct fts_backend_build_key *key)
{
struct squat_fts_backend_update_context *ctx =
(struct squat_fts_backend_update_context *)_ctx;
if (ctx->failed)
return FALSE;
if (key->uid != ctx->uid) {
if (fts_backend_squat_update_uid_changed(ctx) < 0)
ctx->failed = TRUE;
}
switch (key->type) {
case FTS_BACKEND_BUILD_KEY_HDR:
case FTS_BACKEND_BUILD_KEY_MIME_HDR:
str_printfa(ctx->hdr, "%s: ", key->hdr_name);
ctx->squat_type = SQUAT_INDEX_TYPE_HEADER;
break;
case FTS_BACKEND_BUILD_KEY_BODY_PART:
ctx->squat_type = SQUAT_INDEX_TYPE_BODY;
break;
case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY:
i_unreached();
}
ctx->uid = key->uid;
return TRUE;
}
static void
fts_backend_squat_update_unset_build_key(struct fts_backend_update_context *_ctx)
{
struct squat_fts_backend_update_context *ctx =
(struct squat_fts_backend_update_context *)_ctx;
if (ctx->squat_type == SQUAT_INDEX_TYPE_HEADER)
str_append_c(ctx->hdr, '\n');
}
static int
fts_backend_squat_update_build_more(struct fts_backend_update_context *_ctx,
const unsigned char *data, size_t size)
{
struct squat_fts_backend_update_context *ctx =
(struct squat_fts_backend_update_context *)_ctx;
if (ctx->squat_type == SQUAT_INDEX_TYPE_HEADER) {
str_append_n(ctx->hdr, data, size);
return 0;
}
return squat_trie_build_more(ctx->build_ctx, ctx->uid, ctx->squat_type,
data, size);
}
static int fts_backend_squat_refresh(struct fts_backend *_backend)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)_backend;
backend->refresh = TRUE;
return 0;
}
static int fts_backend_squat_optimize(struct fts_backend *_backend ATTR_UNUSED)
{
/* FIXME: drop expunged messages */
return 0;
}
static int squat_lookup_arg(struct squat_fts_backend *backend,
const struct mail_search_arg *arg, bool and_args,
ARRAY_TYPE(seq_range) *definite_uids,
ARRAY_TYPE(seq_range) *maybe_uids)
{
enum squat_index_type squat_type;
ARRAY_TYPE(seq_range) tmp_definite_uids, tmp_maybe_uids;
string_t *dtc;
uint32_t last_uid;
int ret;
switch (arg->type) {
case SEARCH_TEXT:
squat_type = SQUAT_INDEX_TYPE_HEADER |
SQUAT_INDEX_TYPE_BODY;
break;
case SEARCH_BODY:
squat_type = SQUAT_INDEX_TYPE_BODY;
break;
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
case SEARCH_HEADER_COMPRESS_LWSP:
squat_type = SQUAT_INDEX_TYPE_HEADER;
break;
default:
return 0;
}
i_array_init(&tmp_definite_uids, 128);
i_array_init(&tmp_maybe_uids, 128);
dtc = t_str_new(128);
if (uni_utf8_to_decomposed_titlecase(arg->value.str,
strlen(arg->value.str), dtc) < 0)
i_panic("squat: search key not utf8");
ret = squat_trie_lookup(backend->trie, str_c(dtc), squat_type,
&tmp_definite_uids, &tmp_maybe_uids);
if (arg->match_not) {
/* definite -> non-match
maybe -> maybe
non-match -> maybe */
array_clear(&tmp_maybe_uids);
if (squat_trie_get_last_uid(backend->trie, &last_uid) < 0)
i_unreached();
seq_range_array_add_range(&tmp_maybe_uids, 1, last_uid);
seq_range_array_remove_seq_range(&tmp_maybe_uids,
&tmp_definite_uids);
array_clear(&tmp_definite_uids);
}
if (and_args) {
/* AND:
definite && definite -> definite
definite && maybe -> maybe
maybe && maybe -> maybe */
/* put definites among maybies, so they can be intersected */
seq_range_array_merge(maybe_uids, definite_uids);
seq_range_array_merge(&tmp_maybe_uids, &tmp_definite_uids);
seq_range_array_intersect(maybe_uids, &tmp_maybe_uids);
seq_range_array_intersect(definite_uids, &tmp_definite_uids);
/* remove duplicate maybies that are also definites */
seq_range_array_remove_seq_range(maybe_uids, definite_uids);
} else {
/* OR:
definite || definite -> definite
definite || maybe -> definite
maybe || maybe -> maybe */
/* remove maybies that are now definites */
seq_range_array_remove_seq_range(&tmp_maybe_uids,
definite_uids);
seq_range_array_remove_seq_range(maybe_uids,
&tmp_definite_uids);
seq_range_array_merge(definite_uids, &tmp_definite_uids);
seq_range_array_merge(maybe_uids, &tmp_maybe_uids);
}
array_free(&tmp_definite_uids);
array_free(&tmp_maybe_uids);
return ret < 0 ? -1 : 1;
}
static int
fts_backend_squat_lookup(struct fts_backend *_backend, struct mailbox *box,
struct mail_search_arg *args, bool and_args,
struct fts_result *result)
{
struct squat_fts_backend *backend =
(struct squat_fts_backend *)_backend;
int ret;
fts_backend_squat_set_box(backend, box);
if (backend->refresh) {
if (squat_trie_refresh(backend->trie) < 0)
return -1;
backend->refresh = FALSE;
}
for (; args != NULL; args = args->next) {
ret = squat_lookup_arg(backend, args, and_args,
&result->definite_uids,
&result->maybe_uids);
if (ret < 0)
return -1;
if (ret > 0)
args->match_always = TRUE;
}
return 0;
}
struct fts_backend fts_backend_squat = {
.name = "squat",
.flags = FTS_BACKEND_FLAG_BUILD_DTCASE,
{
fts_backend_squat_alloc,
fts_backend_squat_init,
fts_backend_squat_deinit,
fts_backend_squat_get_last_uid,
fts_backend_squat_update_init,
fts_backend_squat_update_deinit,
fts_backend_squat_update_set_mailbox,
fts_backend_squat_update_expunge,
fts_backend_squat_update_set_build_key,
fts_backend_squat_update_unset_build_key,
fts_backend_squat_update_build_more,
fts_backend_squat_refresh,
fts_backend_squat_optimize,
fts_backend_default_can_lookup,
fts_backend_squat_lookup,
NULL
}
};