fts-storage.c revision bd1b2615928a1e8be190cb0405754f0aec8cac2f
76b43e4417bab52e913da39b5f5bc2a130d3f149Timo Sirainen/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
0fd246126fece57712566c725d6353f255f5fcfaTimo Sirainen struct timeval search_start_time, last_notify;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen union mailbox_transaction_module_context module_ctx;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainenstatic MODULE_CONTEXT_DEFINE_INIT(fts_storage_module,
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainenstatic MODULE_CONTEXT_DEFINE_INIT(fts_mail_module, &mail_module_register);
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainenstatic int fts_mailbox_close(struct mailbox *box)
c37e5edd83ff696d396131f7147ef971cf678911Timo Sirainenstatic int fts_build_mail_flush_headers(struct fts_storage_build_context *ctx)
0f55802e8fdd95ae4290da6da077819209b71f70Timo Sirainen if (fts_backend_build_more(ctx->build, ctx->uid, str_data(ctx->headers),
38a4c09de37bc2ebdc38427a2b958c46dfdcffb1Timo Sirainenstatic bool fts_build_want_index_part(const struct message_block *block)
0f55802e8fdd95ae4290da6da077819209b71f70Timo Sirainen /* we'll index only text/xxx and message/rfc822 parts for now */
38a4c09de37bc2ebdc38427a2b958c46dfdcffb1Timo Sirainenstatic void fts_build_mail_header(struct fts_storage_build_context *ctx,
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen const struct message_header_line *hdr = block->hdr;
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen /* hdr->full_value is always set because we get the block from
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen message_decoder */
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen str_append_n(ctx->headers, hdr->middle, hdr->middle_len);
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen str_append_n(ctx->headers, hdr->full_value, hdr->full_value_len);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainenstatic int fts_build_mail(struct fts_storage_build_context *ctx, uint32_t uid)
83bb013a99f0936995f9c7a1077822662d8fefdbTimo Sirainen if (mail_get_stream(ctx->mail, NULL, NULL, &input) < 0)
043c8a96a035379bcba04f487d58457beefdfcaaTimo Sirainen parser = message_parser_init(pool_datastack_create(), input,
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen ret = message_parser_parse_next_block(parser, &raw_block);
38a4c09de37bc2ebdc38427a2b958c46dfdcffb1Timo Sirainen if (raw_block.hdr == NULL && raw_block.size != 0 &&
38a4c09de37bc2ebdc38427a2b958c46dfdcffb1Timo Sirainen /* skipping this body */
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen if (!message_decoder_decode_next_block(decoder, &raw_block,
38a4c09de37bc2ebdc38427a2b958c46dfdcffb1Timo Sirainen /* end of headers */
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen if (fts_backend_build_more(ctx->build, ctx->uid,
992118a50af940482b6cf884a89be56d7015580aTimo Sirainen if (message_parser_deinit(&parser, &parts) < 0)
992118a50af940482b6cf884a89be56d7015580aTimo Sirainen mail_set_cache_corrupted(ctx->mail, MAIL_FETCH_MESSAGE_PARTS);
c37e5edd83ff696d396131f7147ef971cf678911Timo Sirainen /* Index all headers at the end. This is required for Squat,
c37e5edd83ff696d396131f7147ef971cf678911Timo Sirainen because it can handle only incremental UIDs. */
bc93929cdd9000ca560a5f42a27f50ab307f1efbTimo Sirainenstatic int fts_build_init(struct fts_search_context *fctx)
bc93929cdd9000ca560a5f42a27f50ab307f1efbTimo Sirainen struct mailbox_transaction_context *t = fctx->t;
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen struct fts_backend *backend = fctx->build_backend;
1176124297af5c56e932c0863c6637ff21d8a0efTimo Sirainen uint32_t last_uid, last_uid_locked, seq1, seq2;
eb0ede66120bb63c0212bad69e67efca1eb47324Timo Sirainen if (fts_backend_get_last_uid(backend, &last_uid) < 0)
5afa8e2edf4f313cd56e5909f92f39c3b5b7b4d3Timo Sirainen mailbox_get_seq_range(t->box, last_uid+1, (uint32_t)-1, &seq1, &seq2);
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen /* no new messages */
e22b857e838fe118de3f78513aad6a3c6f4306b3Timo Sirainen fctx->best_arg->type == SEARCH_HEADER_COMPRESS_LWSP) {
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* we're not updating the index just for header lookups */
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen if (fts_backend_build_init(backend, &last_uid_locked, &build) < 0)
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen if (last_uid != last_uid_locked && last_uid_locked != (uint32_t)-1) {
c06cd6539a3dbd68eb546464076187be6bc4290fTimo Sirainen /* changed, need to get again the sequences */
5afa8e2edf4f313cd56e5909f92f39c3b5b7b4d3Timo Sirainen mailbox_get_seq_range(t->box, last_uid+1, (uint32_t)-1,
c06cd6539a3dbd68eb546464076187be6bc4290fTimo Sirainen /* no new messages */
c4b376dd6e0c423006d7ac83a39253bcaf8e7c47Timo Sirainen mail_search_build_add_seqset(search_args, seq1, seq2);
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainen ctx = i_new(struct fts_storage_build_context, 1);
c4b376dd6e0c423006d7ac83a39253bcaf8e7c47Timo Sirainen ctx->search_ctx = mailbox_search_init(t, search_args, NULL);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainenstatic int fts_build_deinit(struct fts_storage_build_context **_ctx)
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen struct fts_storage_build_context *ctx = *_ctx;
0fd246126fece57712566c725d6353f255f5fcfaTimo Sirainen struct mailbox *box = ctx->mail->transaction->box;
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainen if (mailbox_search_deinit(&ctx->search_ctx) < 0)
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen if (fts_backend_build_deinit(&ctx->build) < 0)
0fd246126fece57712566c725d6353f255f5fcfaTimo Sirainen if (ioloop_time - ctx->search_start_time.tv_sec >=
0fd246126fece57712566c725d6353f255f5fcfaTimo Sirainen /* we notified at least once */
0fd246126fece57712566c725d6353f255f5fcfaTimo Sirainenstatic void fts_build_notify(struct fts_storage_build_context *ctx)
0fd246126fece57712566c725d6353f255f5fcfaTimo Sirainen struct mailbox *box = ctx->mail->transaction->box;
0fd246126fece57712566c725d6353f255f5fcfaTimo Sirainen /* set the search time in here, in case a plugin
0fd246126fece57712566c725d6353f255f5fcfaTimo Sirainen already spent some time indexing the mailbox */
0fd246126fece57712566c725d6353f255f5fcfaTimo Sirainen } else if (box->storage->callbacks->notify_ok != NULL) {
c4b376dd6e0c423006d7ac83a39253bcaf8e7c47Timo Sirainen range = array_idx(&ctx->search_args->args->value.seqset, 0);
1176124297af5c56e932c0863c6637ff21d8a0efTimo Sirainen percentage = (ctx->mail->seq - range->seq1) * 100.0 /
0fd246126fece57712566c725d6353f255f5fcfaTimo Sirainen secs = (msecs / (percentage / 100.0) - msecs) / 1000;
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen text = t_strdup_printf("Indexed %d%% of the mailbox, "
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainenstatic int fts_build_more(struct fts_storage_build_context *ctx)
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainen unsigned int count = 0;
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainen while (mailbox_search_next(ctx->search_ctx, ctx->mail) > 0) {
bc93929cdd9000ca560a5f42a27f50ab307f1efbTimo Sirainenstatic bool fts_try_build_init(struct fts_search_context *fctx)
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen if (fts_backend_is_building(fctx->build_backend)) {
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* this process is already building the indexes */
bc93929cdd9000ca560a5f42a27f50ab307f1efbTimo Sirainen /* the index was up to date */
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainenfts_mailbox_search_init(struct mailbox_transaction_context *t,
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen struct fts_transaction_context *ft = FTS_CONTEXT(t);
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainen struct fts_mailbox *fbox = FTS_CONTEXT(t->box);
c4b376dd6e0c423006d7ac83a39253bcaf8e7c47Timo Sirainen ctx = fbox->module_ctx.super.search_init(t, args, sort_program);
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen MODULE_CONTEXT_SET(ctx, fts_storage_module, fctx);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen if (fbox->backend_substr == NULL && fbox->backend_fast == NULL)
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainenstatic int fts_mailbox_search_next_nonblock(struct mail_search_context *ctx,
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainen struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainen struct fts_search_context *fctx = FTS_CONTEXT(ctx);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* we're still waiting for this process (but another command)
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen to finish building the indexes */
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* this command is still building the indexes */
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainen /* finished / error */
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* if we're here, the indexes are either built or they're not used */
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainenfts_mailbox_search_args_definite_set(struct fts_search_context *fctx)
c4b376dd6e0c423006d7ac83a39253bcaf8e7c47Timo Sirainen for (arg = fctx->args->args; arg != NULL; arg = arg->next) {
bd1b2615928a1e8be190cb0405754f0aec8cac2fTimo Sirainenstatic bool fts_mailbox_search_next_update_seq(struct mail_search_context *ctx)
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen struct fts_search_context *fctx = FTS_CONTEXT(ctx);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen struct seq_range *def_range, *maybe_range, *range;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen return fbox->module_ctx.super.search_next_update_seq(ctx);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* fts_search_lookup() was called successfully */
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen def_range = array_get_modifiable(&fctx->definite_seqs,
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen maybe_range = array_get_modifiable(&fctx->maybe_seqs,
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* if we're ahead of current positions, skip them */
c63c3c4d548416914b8c6734fe18dd69bb900775Timo Sirainen wanted_seq > def_range[fctx->definite_idx].seq2)
c63c3c4d548416914b8c6734fe18dd69bb900775Timo Sirainen wanted_seq > maybe_range[fctx->maybe_idx].seq2)
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* use whichever is lower of definite/maybe */
f210ec6b25f80d06419921e9231465bb114ee971Timo Sirainen /* look for the non-indexed mails */
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen use_maybe = maybe_range[fctx->maybe_idx].seq1 <
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* current sequence is already larger than where
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen range begins, so use the current sequence. */
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* ctx->seq points to previous sequence we want */
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen ret = fbox->module_ctx.super.search_next_update_seq(ctx);
c4b376dd6e0c423006d7ac83a39253bcaf8e7c47Timo Sirainen mail_search_args_reset(ctx->args->args, FALSE);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen /* we have definite results, update args */
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainenstatic int fts_mailbox_search_deinit(struct mail_search_context *ctx)
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen struct fts_transaction_context *ft = FTS_CONTEXT(ctx->transaction);
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen struct fts_search_context *fctx = FTS_CONTEXT(ctx);
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainen /* the search was cancelled */
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen return fbox->module_ctx.super.search_deinit(ctx);
73b50eecfc31750a312e2f940023f522eb07178cTimo Sirainenstatic void fts_mail_expunge(struct mail *_mail)
fda168427e1950518acd6d600f1a10a29a5baef0Timo Sirainen struct mail_private *mail = (struct mail_private *)_mail;
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen struct fts_mail *fmail = FTS_MAIL_CONTEXT(mail);
fda168427e1950518acd6d600f1a10a29a5baef0Timo Sirainen struct fts_mailbox *fbox = FTS_CONTEXT(_mail->box);
a1761856683b4bf745eb4e32cefabeb851efb301Timo Sirainen struct fts_transaction_context *ft = FTS_CONTEXT(_mail->transaction);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen fts_backend_expunge(fbox->backend_substr, _mail);
eb0ede66120bb63c0212bad69e67efca1eb47324Timo Sirainen fts_backend_expunge(fbox->backend_fast, _mail);
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainenstatic int fts_score_cmp(const void *key, const void *data)
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainenstatic int fts_mail_get_special(struct mail *_mail, enum mail_fetch_field field,
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen const char **value_r)
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen struct mail_private *mail = (struct mail_private *)_mail;
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen struct fts_mail *fmail = FTS_MAIL_CONTEXT(mail);
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen struct fts_transaction_context *ft = FTS_CONTEXT(_mail->transaction);
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen unsigned int count;
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen if (field != MAIL_FETCH_SEARCH_SCORE || ft->score_map == NULL ||
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen scores = bsearch(&_mail->uid, scores, count, sizeof(*scores),
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen i_snprintf(fmail->score, sizeof(fmail->score),
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen return fmail->module_ctx.super.get_special(_mail, field, value_r);
fda168427e1950518acd6d600f1a10a29a5baef0Timo Sirainenstatic struct mail *
fda168427e1950518acd6d600f1a10a29a5baef0Timo Sirainenfts_mail_alloc(struct mailbox_transaction_context *t,
fda168427e1950518acd6d600f1a10a29a5baef0Timo Sirainen struct mailbox_header_lookup_ctx *wanted_headers)
fda168427e1950518acd6d600f1a10a29a5baef0Timo Sirainen struct fts_mailbox *fbox = FTS_CONTEXT(t->box);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen if (fbox->backend_substr != NULL || fbox->backend_fast != NULL) {
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen fmail = p_new(mail->pool, struct fts_mail, 1);
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen MODULE_CONTEXT_SET(mail, fts_mail_module, fmail);
eb0ede66120bb63c0212bad69e67efca1eb47324Timo Sirainenstatic void fts_box_backends_init(struct mailbox *box)
eb0ede66120bb63c0212bad69e67efca1eb47324Timo Sirainen const char *const *tmp;
eb0ede66120bb63c0212bad69e67efca1eb47324Timo Sirainen for (tmp = t_strsplit(fbox->env, ", "); *tmp != NULL; tmp++) {
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen i_fatal("fts: duplicate substring backend: %s",
14bb36cbb67b42e32105c3d843a8c974dc7ed436Timo Sirainen if ((box->storage->flags & MAIL_STORAGE_FLAG_DEBUG) != 0 &&
14bb36cbb67b42e32105c3d843a8c974dc7ed436Timo Sirainen fbox->backend_substr == NULL && fbox->backend_fast == NULL)
14bb36cbb67b42e32105c3d843a8c974dc7ed436Timo Sirainen i_info("fts: No backends enabled by the fts setting");
85a4ae7e8df7ea45a7665828e5edf48a5fc85730Timo Sirainen ft = i_new(struct fts_transaction_context, 1);
a1761856683b4bf745eb4e32cefabeb851efb301Timo Sirainen /* the backend creation is delayed until the first transaction is
a1761856683b4bf745eb4e32cefabeb851efb301Timo Sirainen started. at that point the mailbox has been synced at least once. */
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen t = fbox->module_ctx.super.transaction_begin(box, flags);
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen MODULE_CONTEXT_SET(t, fts_storage_module, ft);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainenfts_storage_build_context_deinit(struct fts_storage_build_context *build_ctx)
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen (void)fts_backend_build_deinit(&build_ctx->build);
eb0ede66120bb63c0212bad69e67efca1eb47324Timo Sirainenfts_transaction_finish(struct mailbox *box, struct fts_transaction_context *ft,
eb0ede66120bb63c0212bad69e67efca1eb47324Timo Sirainen fts_backend_expunge_finish(fbox->backend_fast,
85a4ae7e8df7ea45a7665828e5edf48a5fc85730Timo Sirainenstatic void fts_transaction_rollback(struct mailbox_transaction_context *t)
85a4ae7e8df7ea45a7665828e5edf48a5fc85730Timo Sirainen struct fts_transaction_context *ft = FTS_CONTEXT(t);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen fts_storage_build_context_deinit(ft->build_ctx);
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen fbox->module_ctx.super.transaction_rollback(t);
85a4ae7e8df7ea45a7665828e5edf48a5fc85730Timo Sirainenstatic int fts_transaction_commit(struct mailbox_transaction_context *t,
85a4ae7e8df7ea45a7665828e5edf48a5fc85730Timo Sirainen struct fts_transaction_context *ft = FTS_CONTEXT(t);
1b0cfbf3cc77a670b92fff5c30f7b1eb17a63ab1Timo Sirainen fts_storage_build_context_deinit(ft->build_ctx);
63f36c2b47217fc2dc4ed49cfc1907311d5ed366Timo Sirainen ret = fbox->module_ctx.super.transaction_commit(t,
e726bf74fcc8d24f4c9d0d83217b3db4314d9d1fTimo Sirainenstatic void fts_mailbox_init(struct mailbox *box, const char *env)
88dc563319efecd6e68bad16b0d92672da05584aTimo Sirainen box->v.search_next_nonblock = fts_mailbox_search_next_nonblock;
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen box->v.search_next_update_seq = fts_mailbox_search_next_update_seq;
2793e3bd31d212d6506686aa70773e13d9d98195Timo Sirainen box->v.search_deinit = fts_mailbox_search_deinit;
85a4ae7e8df7ea45a7665828e5edf48a5fc85730Timo Sirainen box->v.transaction_begin = fts_transaction_begin;
85a4ae7e8df7ea45a7665828e5edf48a5fc85730Timo Sirainen box->v.transaction_rollback = fts_transaction_rollback;
85a4ae7e8df7ea45a7665828e5edf48a5fc85730Timo Sirainen box->v.transaction_commit = fts_transaction_commit;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen MODULE_CONTEXT_SET(box, fts_storage_module, fbox);
e726bf74fcc8d24f4c9d0d83217b3db4314d9d1fTimo Sirainen const char *env;