fts-backend-solr.c revision 0fbe824cee2ee2f8fa0f27f4c0d4561a53b29fa8
02c335c23bf5fa225a467c19f2c063fb0dc7b8c3Timo Sirainen/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen#include "lib.h"
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen#include "array.h"
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen#include "str.h"
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen#include "mail-storage-private.h"
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen#include "solr-connection.h"
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen#include "fts-solr-plugin.h"
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen#include <stdlib.h>
b215a8a123623782554a83f3025ef4e771bd8f01Timo Sirainen#include <curl/curl.h>
b215a8a123623782554a83f3025ef4e771bd8f01Timo Sirainen
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainenstruct solr_fts_backend_build_context {
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen struct fts_backend_build_context ctx;
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen struct solr_connection_post *post;
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen uint32_t prev_uid, uid_validity;
5ef28f68edef46f69961b19b7c1dcd8ec5a955e8Timo Sirainen string_t *cmd;
5ef28f68edef46f69961b19b7c1dcd8ec5a955e8Timo Sirainen bool headers;
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen};
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen
5ef28f68edef46f69961b19b7c1dcd8ec5a955e8Timo Sirainenstatic struct solr_connection *solr_conn = NULL;
5ef28f68edef46f69961b19b7c1dcd8ec5a955e8Timo Sirainen
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainenstatic void solr_quote_str(string_t *dest, const char *str)
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen{
da7f1a07f583df8905684a7b78469960afd7c78dPhil Carmody solr_connection_quote_str(solr_conn, dest, str);
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen}
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainenstatic void
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainenxml_encode_data(string_t *dest, const unsigned char *data, unsigned int len)
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen{
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen unsigned int i;
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen for (i = 0; i < len; i++) {
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen switch (data[i]) {
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen case '&':
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen str_append(dest, "&amp;");
cab0827de053c8b58c6528eb430c089576a49ca9Timo Sirainen break;
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen case '<':
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen str_append(dest, "&lt;");
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen break;
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen case '>':
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen str_append(dest, "&gt;");
d09e4ee15faa7b6f1804b78f15d8778030401b4cTimo Sirainen break;
98348e3f27a2f59c2f02cd67974004b4cd595c8cTimo Sirainen default:
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen str_append_c(dest, data[i]);
93a7d1ee4b518b5c85f9721dc6539e4dab6aae00Timo Sirainen break;
f7f25f9e1a38678d0e97d2e609beac16285fac6bTimo Sirainen }
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen }
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen}
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainenstatic void xml_encode(string_t *dest, const char *str)
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen{
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen xml_encode_data(dest, (const unsigned char *)str, strlen(str));
9f0f2de10e4ea0c99052bf4b2bef8179f2536228Timo Sirainen}
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainenstatic struct fts_backend *
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainenfts_backend_solr_init(struct mailbox *box ATTR_UNUSED)
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen{
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen struct fts_backend *backend;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen if (solr_conn == NULL)
7744586e3e0fd60158abfbb03a233d3bd8d6c48bTimo Sirainen solr_conn = solr_connection_init(getenv("FTS_SOLR"));
4f7987384f306ea93b0258623a4cdd69601f2d0eTimo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen backend = i_new(struct fts_backend, 1);
61618d4c58080570f689614fec204ae14e90cef2Timo Sirainen *backend = fts_backend_solr;
50e20db49f29917fe9adcf1b56b11badf28bd0e4Timo Sirainen return backend;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen}
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainenstatic void fts_backend_solr_deinit(struct fts_backend *backend)
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen{
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen i_free(backend);
02c75e04c6ff80726bb59e3ea34a7995ad1f6f7cTimo Sirainen}
5ef28f68edef46f69961b19b7c1dcd8ec5a955e8Timo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainenstatic int fts_backend_solr_get_last_uid(struct fts_backend *backend,
f9511e684858bf5f6ac77ab12254b85b737beae8Stephan Bosch uint32_t *last_uid_r)
009217abb57a24a4076092e8e4e165545747839eStephan Bosch{
f9511e684858bf5f6ac77ab12254b85b737beae8Stephan Bosch struct mailbox_status status;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen ARRAY_TYPE(seq_range) uids;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen const struct seq_range *uidvals;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen unsigned int count;
7744586e3e0fd60158abfbb03a233d3bd8d6c48bTimo Sirainen string_t *str;
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen str = t_str_new(256);
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen str_append(str, "fl=uid&rows=1&sort=uid%20desc&q=");
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen str_printfa(str, "uidv:%u%%20box:", status.uidvalidity);
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen solr_quote_str(str, backend->box->name);
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen str_append(str, "%20user:");
01c7ba8366ceafdbc0752d93079bbe785bebc9e1Timo Sirainen solr_quote_str(str, backend->box->storage->user);
556d69b89bf0afd1ba53acce4775097e494b6b8bMartti Rannanjärvi
556d69b89bf0afd1ba53acce4775097e494b6b8bMartti Rannanjärvi t_array_init(&uids, 1);
556d69b89bf0afd1ba53acce4775097e494b6b8bMartti Rannanjärvi if (solr_connection_select(solr_conn, str_c(str), &uids) < 0)
556d69b89bf0afd1ba53acce4775097e494b6b8bMartti Rannanjärvi return -1;
13eb655174f3857b38f4e3ac8658c82184715fd4Timo Sirainen
f9511e684858bf5f6ac77ab12254b85b737beae8Stephan Bosch uidvals = array_get(&uids, &count);
02c75e04c6ff80726bb59e3ea34a7995ad1f6f7cTimo Sirainen if (count == 0) {
5ef28f68edef46f69961b19b7c1dcd8ec5a955e8Timo Sirainen /* nothing indexed yet for this mailbox */
f9511e684858bf5f6ac77ab12254b85b737beae8Stephan Bosch *last_uid_r = 0;
f9511e684858bf5f6ac77ab12254b85b737beae8Stephan Bosch } else if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
f9511e684858bf5f6ac77ab12254b85b737beae8Stephan Bosch *last_uid_r = uidvals[0].seq1;
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen } else {
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen i_error("fts_solr: Last UID lookup returned multiple rows");
7744586e3e0fd60158abfbb03a233d3bd8d6c48bTimo Sirainen return -1;
e9e5e84ffb2ce2e606a24ce6d930580367562ff0Timo Sirainen }
e9e5e84ffb2ce2e606a24ce6d930580367562ff0Timo Sirainen return 0;
e9e5e84ffb2ce2e606a24ce6d930580367562ff0Timo Sirainen}
e9e5e84ffb2ce2e606a24ce6d930580367562ff0Timo Sirainen
7744586e3e0fd60158abfbb03a233d3bd8d6c48bTimo Sirainenstatic int
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainenfts_backend_solr_build_init(struct fts_backend *backend, uint32_t *last_uid_r,
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen struct fts_backend_build_context **ctx_r)
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen{
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen struct solr_fts_backend_build_context *ctx;
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen struct mailbox_status status;
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen
383d0e8c24451468d6bea17e4b55d74de744abe6Timo Sirainen *last_uid_r = (uint32_t)-1;
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen
383d0e8c24451468d6bea17e4b55d74de744abe6Timo Sirainen ctx = i_new(struct solr_fts_backend_build_context, 1);
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen ctx->ctx.backend = backend;
7bafda1813454621e03615e83d55bccfa7cc56bdTimo Sirainen ctx->post = solr_connection_post_begin(solr_conn);
a64adf62fa33f2463a86f990217b0c9078531a40Timo Sirainen ctx->cmd = str_new(default_pool, 256);
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen ctx->uid_validity = status.uidvalidity;
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen *ctx_r = &ctx->ctx;
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen return 0;
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen}
7744586e3e0fd60158abfbb03a233d3bd8d6c48bTimo Sirainen
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainenstatic int
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainenfts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
1c3e6a4a7557f23f02abacbef9847dd4346f8553Timo Sirainen uint32_t uid, const unsigned char *data,
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen size_t size, bool headers)
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen{
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen struct solr_fts_backend_build_context *ctx =
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen (struct solr_fts_backend_build_context *)_ctx;
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen struct mailbox *box = _ctx->backend->box;
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen string_t *cmd = ctx->cmd;
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen /* body comes first, then headers */
7744586e3e0fd60158abfbb03a233d3bd8d6c48bTimo Sirainen if (ctx->prev_uid != uid) {
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen /* uid changed */
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_truncate(cmd, 0);
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen if (ctx->prev_uid == 0)
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_append(cmd, "<add>");
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen else
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_append(cmd, "</field></doc>");
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen ctx->prev_uid = uid;
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_printfa(cmd, "<doc>"
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen "<field name=\"uid\">%u</field>"
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen "<field name=\"uidv\">%u</field>",
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen uid, ctx->uid_validity);
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_append(cmd, "<field name=\"box\">");
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen xml_encode(cmd, box->name);
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_append(cmd, "</field><field name=\"user\">");
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen xml_encode(cmd, box->storage->user);
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_printfa(cmd, "</field><field name=\"id\">%u/%u/",
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen uid, ctx->uid_validity);
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen xml_encode(cmd, box->storage->user);
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_append_c(cmd, '/');
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen xml_encode(cmd, box->name);
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_append(cmd, "</field>");
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen ctx->headers = headers;
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen if (headers) {
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_append(cmd, "<field name=\"hdr\">");
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen } else {
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen str_append(cmd, "<field name=\"body\">");
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen }
a8284e999d091cd29210fa75ecdc8076376a7345Timo Sirainen solr_connection_post_more(ctx->post, str_data(cmd),
str_len(cmd));
} else if (headers && !ctx->headers) {
str_truncate(cmd, 0);
str_append(cmd, "</field><field name=\"hdr\">");
solr_connection_post_more(ctx->post, str_data(cmd),
str_len(cmd));
} else {
i_assert(!(!headers && ctx->headers));
}
str_truncate(cmd, 0);
xml_encode_data(cmd, data, size);
solr_connection_post_more(ctx->post, str_data(cmd), str_len(cmd));
return 0;
}
static int
fts_backend_solr_build_deinit(struct fts_backend_build_context *_ctx)
{
struct solr_fts_backend_build_context *ctx =
(struct solr_fts_backend_build_context *)_ctx;
int ret = 0;
if (ctx->prev_uid != 0) {
str_truncate(ctx->cmd, 0);
str_append(ctx->cmd, "</field></doc></add>");
solr_connection_post_more(ctx->post, str_data(ctx->cmd),
str_len(ctx->cmd));
ret = solr_connection_post_end(ctx->post);
/* commit and wait until the documents we just indexed are
visible to the following search */
if (solr_connection_post(solr_conn,
"<commit waitFlush=\"false\" "
"waitSearcher=\"true\"/>") < 0)
ret = -1;
}
str_free(&ctx->cmd);
i_free(ctx);
return ret;
}
static void
fts_backend_solr_expunge(struct fts_backend *backend ATTR_UNUSED,
struct mail *mail)
{
struct mailbox_status status;
mailbox_get_status(mail->box, STATUS_UIDVALIDITY, &status);
T_BEGIN {
string_t *cmd;
cmd = t_str_new(256);
str_printfa(cmd, "<delete><id>%u/%u/",
mail->uid, status.uidvalidity);
xml_encode(cmd, mail->box->storage->user);
str_append_c(cmd, '/');
xml_encode(cmd, mail->box->name);
str_append(cmd, "</id></delete>");
(void)solr_connection_post(solr_conn, str_c(cmd));
} T_END;
}
static void
fts_backend_solr_expunge_finish(struct fts_backend *backend ATTR_UNUSED,
struct mailbox *box ATTR_UNUSED,
bool committed ATTR_UNUSED)
{
}
static int fts_backend_solr_lock(struct fts_backend *backend ATTR_UNUSED)
{
return 1;
}
static void fts_backend_solr_unlock(struct fts_backend *backend ATTR_UNUSED)
{
}
static int
fts_backend_solr_lookup(struct fts_backend *backend, const char *key,
enum fts_lookup_flags flags,
ARRAY_TYPE(seq_range) *definite_uids,
ARRAY_TYPE(seq_range) *maybe_uids)
{
struct mailbox_status status;
string_t *str;
i_assert((flags & FTS_LOOKUP_FLAG_INVERT) == 0);
str = t_str_new(256);
str_append(str, "fl=uid&q=");
if ((flags & FTS_LOOKUP_FLAG_HEADER) == 0) {
/* body only */
i_assert((flags & FTS_LOOKUP_FLAG_BODY) != 0);
str_append(str, "body:");
} else if ((flags & FTS_LOOKUP_FLAG_BODY) == 0) {
/* header only */
str_append(str, "hdr:");
} else {
/* both */
str_append(str, "any:");
}
solr_quote_str(str, key);
mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
str_printfa(str, "%%20uidv:%u%%20box:", status.uidvalidity);
solr_quote_str(str, backend->box->name);
str_append(str, "%20user:");
solr_quote_str(str, backend->box->storage->user);
array_clear(maybe_uids);
return solr_connection_select(solr_conn, str_c(str), definite_uids);
}
struct fts_backend fts_backend_solr = {
MEMBER(name) "solr",
MEMBER(flags) 0,
{
fts_backend_solr_init,
fts_backend_solr_deinit,
fts_backend_solr_get_last_uid,
fts_backend_solr_build_init,
fts_backend_solr_build_more,
fts_backend_solr_build_deinit,
fts_backend_solr_expunge,
fts_backend_solr_expunge_finish,
fts_backend_solr_lock,
fts_backend_solr_unlock,
fts_backend_solr_lookup,
NULL
}
};