fts-backend-solr.c revision 2d7df7973f80011033e8e9fa676d3ff4c14468d8
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose/* Copyright (c) 2006-2013 Dovecot authors, see the included COPYING file */
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "lib.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "array.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "str.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "hash.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "strescape.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "unichar.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "http-url.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "mail-storage-private.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "mailbox-list-private.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "mail-search.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "fts-api.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "solr-connection.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include "fts-solr-plugin.h"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#include <ctype.h>
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#define SOLR_CMDBUF_SIZE (1024*64)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#define SOLR_CMDBUF_FLUSH_SIZE (SOLR_CMDBUF_SIZE-128)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#define SOLR_BUFFER_WARN_SIZE (1024*1024)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose#define SOLR_MAX_MULTI_ROWS 100000
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
1df6751f81f7d9c225463f76b9789b0cc7a0de8bSumit Bosestruct solr_fts_backend {
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose struct fts_backend backend;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose};
c371993cce13edb9185a5f0db76fbee03f0edc04Sumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestruct solr_fts_field {
c371993cce13edb9185a5f0db76fbee03f0edc04Sumit Bose char *key;
5cd4414fce1e0eb4133dfc6fc828bf25c8a959f9Lukas Slebodnik string_t *value;
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose};
c371993cce13edb9185a5f0db76fbee03f0edc04Sumit Bose
c371993cce13edb9185a5f0db76fbee03f0edc04Sumit Bosestruct solr_fts_backend_update_context {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct fts_backend_update_context ctx;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct mailbox *cur_box;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose char box_guid[MAILBOX_GUID_HEX_LENGTH+1];
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose struct solr_connection_post *post;
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose uint32_t prev_uid;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose string_t *cmd, *cur_value, *cur_value2;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose string_t *cmd_expunge;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ARRAY(struct solr_fts_field) fields;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose uint32_t last_indexed_uid;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose uint32_t size_warned_uid;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose unsigned int last_indexed_uid_set:1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose unsigned int body_open:1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose unsigned int documents_added:1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose unsigned int expunges:1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose};
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic bool is_valid_xml_char(unichar_t chr)
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose{
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose /* Valid characters in XML:
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose [#x10000-#x10FFFF]
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose This function gets called only for #x80 and higher */
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (chr > 0xd7ff && chr < 0xe000)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return FALSE;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (chr > 0xfffd && chr < 0x10000)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return FALSE;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return chr < 0x10ffff;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic unsigned int
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosexml_encode_data_max(string_t *dest, const unsigned char *data, unsigned int len,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose unsigned int max_len)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose unichar_t chr;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose unsigned int i;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose i_assert(max_len > 0 || len == 0);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (max_len > len)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose max_len = len;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose for (i = 0; i < max_len; i++) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose switch (data[i]) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose case '&':
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(dest, "&amp;");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose break;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose case '<':
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(dest, "&lt;");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose break;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose case '>':
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(dest, "&gt;");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose break;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose case '\t':
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov case '\n':
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose case '\r':
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* exceptions to the following control char check */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append_c(dest, data[i]);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose break;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose default:
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov if (data[i] < 32) {
233a3c6c48972b177e60d6ef4cecfacd3cf31659Simo Sorce /* SOLR doesn't like control characters.
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose replace them with spaces. */
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append_c(dest, ' ');
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose } else if (data[i] >= 0x80) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* make sure the character is valid for XML
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose so we don't get XML parser errors */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose unsigned int char_len =
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose uni_utf8_char_bytes(data[i]);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (i + char_len <= len &&
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov uni_utf8_get_char_n(data + i, char_len, &chr) == 1 &&
233a3c6c48972b177e60d6ef4cecfacd3cf31659Simo Sorce is_valid_xml_char(chr))
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append_n(dest, data + i, char_len);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose else {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append_n(dest, utf8_replacement_char,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose UTF8_REPLACEMENT_CHAR_LEN);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose i += char_len - 1;
233a3c6c48972b177e60d6ef4cecfacd3cf31659Simo Sorce } else {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append_c(dest, data[i]);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose break;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return i;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic void
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosexml_encode_data(string_t *dest, const unsigned char *data, unsigned int len)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose (void)xml_encode_data_max(dest, data, len, len);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic void xml_encode(string_t *dest, const char *str)
233a3c6c48972b177e60d6ef4cecfacd3cf31659Simo Sorce{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose xml_encode_data(dest, (const unsigned char *)str, strlen(str));
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic void solr_quote_http(string_t *dest, const char *str)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(dest, "%22");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose http_url_escape_param(dest, str);
bc85b7e2b7b1569e64d8832c52cab9ad165e6fc1Sumit Bose str_append(dest, "%22");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic struct fts_backend *fts_backend_solr_alloc(void)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
233a3c6c48972b177e60d6ef4cecfacd3cf31659Simo Sorce struct solr_fts_backend *backend;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose backend = i_new(struct solr_fts_backend, 1);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose backend->backend = fts_backend_solr;
bc85b7e2b7b1569e64d8832c52cab9ad165e6fc1Sumit Bose return &backend->backend;
bc85b7e2b7b1569e64d8832c52cab9ad165e6fc1Sumit Bose}
bc85b7e2b7b1569e64d8832c52cab9ad165e6fc1Sumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic int
233a3c6c48972b177e60d6ef4cecfacd3cf31659Simo Sorcefts_backend_solr_init(struct fts_backend *_backend, const char **error_r)
3e9712c2fdbba8f9cd25886943331e76e0b2ceddSumit Bose{
3e9712c2fdbba8f9cd25886943331e76e0b2ceddSumit Bose struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user);
3e9712c2fdbba8f9cd25886943331e76e0b2ceddSumit Bose const struct fts_solr_settings *set = &fuser->set;
3e9712c2fdbba8f9cd25886943331e76e0b2ceddSumit Bose
3e9712c2fdbba8f9cd25886943331e76e0b2ceddSumit Bose if (solr_conn == NULL) {
3e9712c2fdbba8f9cd25886943331e76e0b2ceddSumit Bose if (solr_connection_init(set->url, set->debug,
3e9712c2fdbba8f9cd25886943331e76e0b2ceddSumit Bose &solr_conn, error_r) < 0)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return -1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return 0;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic void fts_backend_solr_deinit(struct fts_backend *_backend)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
233a3c6c48972b177e60d6ef4cecfacd3cf31659Simo Sorce
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose i_free(backend);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
7ee9ac32485483beece872d6fcb3096fa77a004bSumit Bose
7ee9ac32485483beece872d6fcb3096fa77a004bSumit Bosestatic int
7ee9ac32485483beece872d6fcb3096fa77a004bSumit Boseget_last_uid_fallback(struct fts_backend *_backend, struct mailbox *box,
7ee9ac32485483beece872d6fcb3096fa77a004bSumit Bose uint32_t *last_uid_r)
7ee9ac32485483beece872d6fcb3096fa77a004bSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose const struct seq_range *uidvals;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose const char *box_guid;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose unsigned int count;
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov struct solr_result **results;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose string_t *str;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose pool_t pool;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose int ret = 0;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str = t_str_new(256);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(str, "fl=uid&rows=1&sort=uid+desc&q=");
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (fts_mailbox_get_guid(box, &box_guid) < 0)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return -1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_printfa(str, "box:%s+user:", box_guid);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (_backend->ns->owner != NULL)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose solr_quote_http(str, _backend->ns->owner->username);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose else
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov str_append(str, "%22%22");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose pool = pool_alloconly_create("solr last uid lookup", 1024);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (solr_connection_select(solr_conn, str_c(str),
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose pool, &results) < 0)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ret = -1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose else if (results[0] == NULL) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose /* no UIDs */
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose *last_uid_r = 0;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose } else {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose uidvals = array_get(&results[0]->uids, &count);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose i_assert(count > 0);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose *last_uid_r = uidvals[0].seq1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose } else {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose i_error("fts_solr: Last UID lookup returned multiple rows");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ret = -1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
233a3c6c48972b177e60d6ef4cecfacd3cf31659Simo Sorce pool_unref(&pool);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return ret;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic int
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosefts_backend_solr_get_last_uid(struct fts_backend *_backend,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct mailbox *box, uint32_t *last_uid_r)
233a3c6c48972b177e60d6ef4cecfacd3cf31659Simo Sorce{
233a3c6c48972b177e60d6ef4cecfacd3cf31659Simo Sorce struct fts_index_header hdr;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (fts_index_get_header(box, &hdr)) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose *last_uid_r = hdr.last_indexed_uid;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return 0;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose /* either nothing has been indexed, or the index was corrupted.
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose do it the slow way. */
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (get_last_uid_fallback(_backend, box, last_uid_r) < 0)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return -1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose fts_index_set_last_uid(box, *last_uid_r);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return 0;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic struct fts_backend_update_context *
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosefts_backend_solr_update_init(struct fts_backend *_backend)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct solr_fts_backend_update_context *ctx;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx = i_new(struct solr_fts_backend_update_context, 1);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->ctx.backend = _backend;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose i_array_init(&ctx->fields, 16);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return &ctx->ctx;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic void xml_encode_id(struct solr_fts_backend_update_context *ctx,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose string_t *str, uint32_t uid)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_printfa(str, "%u/%s", uid, ctx->box_guid);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (ctx->ctx.backend->ns->owner != NULL) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append_c(str, '/');
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose xml_encode(str, ctx->ctx.backend->ns->owner->username);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic void
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosefts_backend_solr_doc_open(struct solr_fts_backend_update_context *ctx,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose uint32_t uid)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->documents_added = TRUE;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_printfa(ctx->cmd, "<doc>"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose "<field name=\"uid\">%u</field>"
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose "<field name=\"box\">%s</field>",
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose uid, ctx->box_guid);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cmd, "<field name=\"user\">");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (ctx->ctx.backend->ns->owner != NULL)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose xml_encode(ctx->cmd, ctx->ctx.backend->ns->owner->username);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cmd, "</field>");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_printfa(ctx->cmd, "<field name=\"id\">");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose xml_encode_id(ctx, ctx->cmd, uid);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cmd, "</field>");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic string_t *
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosefts_solr_field_get(struct solr_fts_backend_update_context *ctx, const char *key)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose const struct solr_fts_field *field;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct solr_fts_field new_field;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose /* there are only a few fields. this lookup is fast enough. */
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose array_foreach(&ctx->fields, field) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (strcasecmp(field->key, key) == 0)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return field->value;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose memset(&new_field, 0, sizeof(new_field));
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose new_field.key = str_lcase(i_strdup(key));
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose new_field.value = str_new(default_pool, 128);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose array_append(&ctx->fields, &new_field, 1);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return new_field.value;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic void
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosefts_backend_solr_doc_close(struct solr_fts_backend_update_context *ctx)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct solr_fts_field *field;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (ctx->body_open) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->body_open = FALSE;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cmd, "</field>");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose array_foreach_modifiable(&ctx->fields, field) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_printfa(ctx->cmd, "<field name=\"%s\">", field->key);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose xml_encode_data(ctx->cmd, str_data(field->value), str_len(field->value));
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose str_append(ctx->cmd, "</field>");
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose str_truncate(field->value, 0);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cmd, "</doc>");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic int
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosefts_backed_solr_build_commit(struct solr_fts_backend_update_context *ctx)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (ctx->post == NULL)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return 0;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose fts_backend_solr_doc_close(ctx);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cmd, "</add>");
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose solr_connection_post_more(ctx->post, str_data(ctx->cmd),
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose str_len(ctx->cmd));
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose return solr_connection_post_end(ctx->post);
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose}
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bosestatic void
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bosefts_backend_solr_expunge_flush(struct solr_fts_backend_update_context *ctx)
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose{
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose str_append(ctx->cmd_expunge, "</delete>");
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose (void)solr_connection_post(solr_conn, str_c(ctx->cmd_expunge));
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov str_truncate(ctx->cmd_expunge, 0);
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov str_append(ctx->cmd_expunge, "<delete>");
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose}
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic int
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosefts_backend_solr_update_deinit(struct fts_backend_update_context *_ctx)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose struct solr_fts_backend_update_context *ctx =
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose (struct solr_fts_backend_update_context *)_ctx;
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose struct solr_fts_field *field;
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose const char *str;
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose int ret = _ctx->failed ? -1 : 0;
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose if (fts_backed_solr_build_commit(ctx) < 0)
2962b3d1e072ff2ebbe343095812dad697d6bf1dSumit Bose ret = -1;
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov if (ctx->documents_added || ctx->expunges) {
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov /* commit and wait until the documents we just indexed are
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose visible to the following search */
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose if (ctx->expunges)
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose fts_backend_solr_expunge_flush(ctx);
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose str = t_strdup_printf("<commit waitSearcher=\"%s\"/>",
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose ctx->documents_added ? "true" : "false");
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose if (solr_connection_post(solr_conn, str) < 0)
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose ret = -1;
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose }
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov if (ctx->cmd != NULL)
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose str_free(&ctx->cmd);
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose if (ctx->cmd_expunge != NULL)
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose str_free(&ctx->cmd_expunge);
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose array_foreach_modifiable(&ctx->fields, field) {
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose str_free(&field->value);
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose i_free(field->key);
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose }
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose array_free(&ctx->fields);
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose i_free(ctx);
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose return ret;
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose}
21513e51a4a2eb08f245333bf8f223713a3d7cb3Sumit Bose
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bosestatic void
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bosefts_backend_solr_update_set_mailbox(struct fts_backend_update_context *_ctx,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct mailbox *box)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct solr_fts_backend_update_context *ctx =
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose (struct solr_fts_backend_update_context *)_ctx;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose const char *box_guid;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (ctx->prev_uid != 0) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose fts_index_set_last_uid(ctx->cur_box, ctx->prev_uid);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->prev_uid = 0;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (box != NULL) {
210e57203a1502f78a16b05010d52c9121b644e3Lukas Slebodnik if (fts_mailbox_get_guid(box, &box_guid) < 0)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose _ctx->failed = TRUE;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose i_assert(strlen(box_guid) == sizeof(ctx->box_guid)-1);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose memcpy(ctx->box_guid, box_guid, sizeof(ctx->box_guid)-1);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose } else {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose memset(ctx->box_guid, 0, sizeof(ctx->box_guid));
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->cur_box = box;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic void
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosefts_backend_solr_update_expunge(struct fts_backend_update_context *_ctx,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose uint32_t uid)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct solr_fts_backend_update_context *ctx =
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose (struct solr_fts_backend_update_context *)_ctx;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct fts_index_header hdr;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (!ctx->last_indexed_uid_set) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (!fts_index_get_header(ctx->cur_box, &hdr))
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ctx->last_indexed_uid = 0;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose else
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->last_indexed_uid = hdr.last_indexed_uid;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->last_indexed_uid_set = TRUE;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (ctx->last_indexed_uid == 0 ||
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose uid > ctx->last_indexed_uid + 100) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose /* don't waste time asking Solr to expunge a message that is
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose highly unlikely to be indexed at this time. */
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (!ctx->expunges) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ctx->expunges = TRUE;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ctx->cmd_expunge = str_new(default_pool, 1024);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cmd_expunge, "<delete>");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (str_len(ctx->cmd_expunge) >= SOLR_CMDBUF_FLUSH_SIZE)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose fts_backend_solr_expunge_flush(ctx);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cmd_expunge, "<id>");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose xml_encode_id(ctx, ctx->cmd_expunge, uid);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cmd_expunge, "</id>");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic void
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosefts_backend_solr_uid_changed(struct solr_fts_backend_update_context *ctx,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose uint32_t uid)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (ctx->post == NULL) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose i_assert(ctx->prev_uid == 0);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->post = solr_connection_post_begin(solr_conn);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append(ctx->cmd, "<add>");
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose } else {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose fts_backend_solr_doc_close(ctx);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ctx->prev_uid = uid;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose fts_backend_solr_doc_open(ctx, uid);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic bool
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosefts_backend_solr_update_set_build_key(struct fts_backend_update_context *_ctx,
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose const struct fts_backend_build_key *key)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct solr_fts_backend_update_context *ctx =
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose (struct solr_fts_backend_update_context *)_ctx;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (key->uid != ctx->prev_uid)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose fts_backend_solr_uid_changed(ctx, key->uid);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose switch (key->type) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose case FTS_BACKEND_BUILD_KEY_HDR:
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (fts_header_want_indexed(key->hdr_name)) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->cur_value2 =
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose fts_solr_field_get(ctx, key->hdr_name);
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose }
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose /* fall through */
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose case FTS_BACKEND_BUILD_KEY_MIME_HDR:
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose ctx->cur_value = fts_solr_field_get(ctx, "hdr");
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose xml_encode(ctx->cur_value, key->hdr_name);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cur_value, ": ");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose break;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose case FTS_BACKEND_BUILD_KEY_BODY_PART:
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (!ctx->body_open) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose ctx->body_open = TRUE;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(ctx->cmd, "<field name=\"body\">");
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose }
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose ctx->cur_value = ctx->cmd;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose break;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY:
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose i_unreached();
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose return TRUE;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic void
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosefts_backend_solr_update_unset_build_key(struct fts_backend_update_context *_ctx)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct solr_fts_backend_update_context *ctx =
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose (struct solr_fts_backend_update_context *)_ctx;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* There can be multiple duplicate keys (duplicate header lines,
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose multiple MIME body parts). Make sure they are separated by
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose whitespace. */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append_c(ctx->cur_value, '\n');
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ctx->cur_value = NULL;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (ctx->cur_value2 != NULL) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append_c(ctx->cur_value2, '\n');
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ctx->cur_value2 = NULL;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic int
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosefts_backend_solr_update_build_more(struct fts_backend_update_context *_ctx,
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose const unsigned char *data, size_t size)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct solr_fts_backend_update_context *ctx =
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose (struct solr_fts_backend_update_context *)_ctx;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose unsigned int len;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (_ctx->failed)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return -1;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (ctx->cur_value2 == NULL && ctx->cur_value == ctx->cmd) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* we're writing to message body. if size is huge,
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose flush it once in a while */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose while (size >= SOLR_CMDBUF_FLUSH_SIZE) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (str_len(ctx->cmd) >= SOLR_CMDBUF_FLUSH_SIZE) {
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose solr_connection_post_more(ctx->post,
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose str_data(ctx->cmd),
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose str_len(ctx->cmd));
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose str_truncate(ctx->cmd, 0);
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose }
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose len = xml_encode_data_max(ctx->cmd, data, size,
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose SOLR_CMDBUF_FLUSH_SIZE -
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose str_len(ctx->cmd));
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose i_assert(len > 0);
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose i_assert(len <= size);
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose data += len;
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose size -= len;
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose }
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose xml_encode_data(ctx->cmd, data, size);
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose } else {
cf89f552f06b95bd69d8c61aaa55a330a5d9f6e6Sumit Bose xml_encode_data(ctx->cur_value, data, size);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (ctx->cur_value2 != NULL)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose xml_encode_data(ctx->cur_value2, data, size);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (str_len(ctx->cmd) >= SOLR_CMDBUF_FLUSH_SIZE) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose solr_connection_post_more(ctx->post, str_data(ctx->cmd),
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_len(ctx->cmd));
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_truncate(ctx->cmd, 0);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (str_len(ctx->cur_value) >= SOLR_BUFFER_WARN_SIZE &&
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ctx->size_warned_uid != ctx->prev_uid) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* a large header */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose i_assert(ctx->cur_value != ctx->cmd);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ctx->size_warned_uid = ctx->prev_uid;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose i_warning("fts-solr(%s): Mailbox %s UID=%u header size is huge",
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ctx->cur_box->storage->user->username,
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek mailbox_get_vname(ctx->cur_box), ctx->prev_uid);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek }
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek return 0;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic int fts_backend_solr_refresh(struct fts_backend *backend ATTR_UNUSED)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return 0;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozekstatic int fts_backend_solr_rescan(struct fts_backend *backend)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct mailbox_list_iterate_context *iter;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek const struct mailbox_info *info;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek struct mailbox *box;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek int ret = 0;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* FIXME: proper rescan needed. for now we'll just reset the
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose last-uids */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose iter = mailbox_list_iter_init(backend->ns->list, "*",
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose MAILBOX_LIST_ITER_SKIP_ALIASES |
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose MAILBOX_LIST_ITER_NO_AUTO_BOXES);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose while ((info = mailbox_list_iter_next(iter)) != NULL) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if ((info->flags &
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose (MAILBOX_NONEXISTENT | MAILBOX_NOSELECT)) != 0)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose continue;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose box = mailbox_alloc(info->ns->list, info->vname, 0);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (mailbox_open(box) == 0) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (fts_index_set_last_uid(box, 0) < 0)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ret = -1;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose mailbox_free(&box);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (mailbox_list_iter_deinit(&iter) < 0)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ret = -1;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return ret;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic int fts_backend_solr_optimize(struct fts_backend *backend ATTR_UNUSED)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return 0;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic bool solr_need_escaping(const char *str)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose const char *solr_escape_chars = "+-&|!(){}[]^\"~*?:\\ ";
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose for (; *str != '\0'; str++) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (strchr(solr_escape_chars, *str) != NULL)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return TRUE;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return FALSE;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic void solr_add_str_arg(string_t *str, struct mail_search_arg *arg)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* currently we'll just disable fuzzy searching if there are any
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose parameters that need escaping. solr doesn't seem to give good
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose fuzzy results even if we did escape them.. */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (!arg->fuzzy || solr_need_escaping(arg->value.str))
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose solr_quote_http(str, arg->value.str);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose else {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append(str, arg->value.str);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append_c(str, '~');
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
ad463501d3bdea4c24c17d792efc1c3e65c08c19Sumit Bosestatic bool
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosesolr_add_definite_query(string_t *str, struct mail_search_arg *arg)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose switch (arg->type) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose case SEARCH_TEXT: {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (arg->match_not)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append_c(str, '-');
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_append(str, "(hdr:");
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek solr_add_str_arg(str, arg);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_append(str, "+OR+body:");
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek solr_add_str_arg(str, arg);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_append(str, ")");
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek break;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek }
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek case SEARCH_BODY:
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek if (arg->match_not)
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_append_c(str, '-');
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_append(str, "body:");
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek solr_add_str_arg(str, arg);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek break;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek case SEARCH_HEADER:
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek case SEARCH_HEADER_ADDRESS:
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek case SEARCH_HEADER_COMPRESS_LWSP:
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek if (!fts_header_want_indexed(arg->hdr_field_name))
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek return FALSE;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek if (arg->match_not)
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_append_c(str, '-');
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_append(str, t_str_lcase(arg->hdr_field_name));
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_append_c(str, ':');
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek solr_add_str_arg(str, arg);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose break;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose default:
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return FALSE;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek return TRUE;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic bool
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosesolr_add_definite_query_args(string_t *str, struct mail_search_arg *arg,
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose bool and_args)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose unsigned int last_len;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose last_len = str_len(str);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose for (; arg != NULL; arg = arg->next) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (solr_add_definite_query(str, arg)) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose arg->match_always = TRUE;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose last_len = str_len(str);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (and_args)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append(str, "+AND+");
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose else
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append(str, "+OR+");
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (str_len(str) == last_len)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return FALSE;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_truncate(str, last_len);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return TRUE;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozekstatic bool
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozeksolr_add_maybe_query(string_t *str, struct mail_search_arg *arg)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose switch (arg->type) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose case SEARCH_HEADER:
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose case SEARCH_HEADER_ADDRESS:
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose case SEARCH_HEADER_COMPRESS_LWSP:
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek if (fts_header_want_indexed(arg->hdr_field_name))
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek return FALSE;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek if (arg->match_not) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* all matches would be definite, but all non-matches
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose would be maybies. too much trouble to optimize. */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return FALSE;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* we can check if the search key exists in some header and
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose filter out the messages that have no chance of matching */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append(str, "hdr:");
abee3216261e3378430e472f0c992470b33976f0Sumit Bose if (*arg->value.str != '\0')
abee3216261e3378430e472f0c992470b33976f0Sumit Bose solr_quote_http(str, arg->value.str);
abee3216261e3378430e472f0c992470b33976f0Sumit Bose else {
abee3216261e3378430e472f0c992470b33976f0Sumit Bose /* checking potential existence of the header name */
abee3216261e3378430e472f0c992470b33976f0Sumit Bose solr_quote_http(str, t_str_lcase(arg->hdr_field_name));
abee3216261e3378430e472f0c992470b33976f0Sumit Bose }
abee3216261e3378430e472f0c992470b33976f0Sumit Bose break;
abee3216261e3378430e472f0c992470b33976f0Sumit Bose default:
abee3216261e3378430e472f0c992470b33976f0Sumit Bose return FALSE;
abee3216261e3378430e472f0c992470b33976f0Sumit Bose }
abee3216261e3378430e472f0c992470b33976f0Sumit Bose return TRUE;
abee3216261e3378430e472f0c992470b33976f0Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozekstatic bool
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozeksolr_add_maybe_query_args(string_t *str, struct mail_search_arg *arg,
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek bool and_args)
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek{
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek unsigned int last_len;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek last_len = str_len(str);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek for (; arg != NULL; arg = arg->next) {
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek if (solr_add_maybe_query(str, arg)) {
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek arg->match_always = TRUE;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek last_len = str_len(str);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek if (and_args)
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_append(str, "+AND+");
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek else
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_append(str, "+OR+");
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek }
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek }
abee3216261e3378430e472f0c992470b33976f0Sumit Bose if (str_len(str) == last_len)
abee3216261e3378430e472f0c992470b33976f0Sumit Bose return FALSE;
abee3216261e3378430e472f0c992470b33976f0Sumit Bose
abee3216261e3378430e472f0c992470b33976f0Sumit Bose str_truncate(str, last_len);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek return TRUE;
abee3216261e3378430e472f0c992470b33976f0Sumit Bose}
abee3216261e3378430e472f0c992470b33976f0Sumit Bose
abee3216261e3378430e472f0c992470b33976f0Sumit Bosestatic int solr_search(struct fts_backend *_backend, string_t *str,
abee3216261e3378430e472f0c992470b33976f0Sumit Bose const char *box_guid, ARRAY_TYPE(seq_range) *uids_r,
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ARRAY_TYPE(fts_score_map) *scores_r)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose pool_t pool = pool_alloconly_create("fts solr search", 1024);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct solr_result **results;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose int ret;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* use a separate filter query for selecting the mailbox. it shouldn't
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose affect the score and there could be some caching benefits too. */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_printfa(str, "&fq=%%2Bbox:%s+%%2Buser:", box_guid);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (_backend->ns->owner != NULL)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose solr_quote_http(str, _backend->ns->owner->username);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose else
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose str_append(str, "%22%22");
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ret = solr_connection_select(solr_conn, str_c(str), pool, &results);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (ret == 0 && results[0] != NULL) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose array_append_array(uids_r, &results[0]->uids);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose array_append_array(scores_r, &results[0]->scores);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose pool_unref(&pool);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return ret;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestatic int
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosefts_backend_solr_lookup(struct fts_backend *_backend, struct mailbox *box,
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct mail_search_arg *args, bool and_args,
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct fts_result *result)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose{
9c8db0a17a66c58c36966b17d004142a4aaace8dSumit Bose struct mailbox_status status;
9c8db0a17a66c58c36966b17d004142a4aaace8dSumit Bose string_t *str;
cffe3135f29c737f2598f3c1384bfba1694fb843Sumit Bose const char *box_guid;
415d93196533a6fcd90889c67396ef5af5bf791aSumit Bose unsigned int prefix_len;
cffe3135f29c737f2598f3c1384bfba1694fb843Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (fts_mailbox_get_guid(box, &box_guid) < 0)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return -1;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek mailbox_get_open_status(box, STATUS_UIDNEXT, &status);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str = t_str_new(256);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_printfa(str, "fl=uid,score&rows=%u&sort=uid+asc&q=",
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose status.uidnext);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose prefix_len = str_len(str);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (solr_add_definite_query_args(str, args, and_args)) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (solr_search(_backend, str, box_guid,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose &result->definite_uids, &result->scores) < 0)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose return -1;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_truncate(str, prefix_len);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (solr_add_maybe_query_args(str, args, and_args)) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (solr_search(_backend, str, box_guid,
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose &result->maybe_uids, &result->scores) < 0)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return -1;
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose }
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose result->scores_sorted = TRUE;
2cf7becc05996eb6d8a3352d3d7b97c75652e590Sumit Bose return 0;
eb7095099b2dd0afb1d028dbc15d8c5a897d90f8Sumit Bose}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosestatic int
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashovsolr_search_multi(struct fts_backend *_backend, string_t *str,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct mailbox *const boxes[],
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct fts_multi_result *result)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct solr_result **solr_results;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct fts_result *fts_result;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose ARRAY(struct fts_result) fts_results;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose HASH_TABLE(char *, struct mailbox *) mailboxes;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose struct mailbox *box;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose const char *box_guid;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose unsigned int i, len;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose /* use a separate filter query for selecting the mailbox. it shouldn't
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose affect the score and there could be some caching benefits too. */
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(str, "&fq=%2Buser:");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (_backend->ns->owner != NULL)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose solr_quote_http(str, _backend->ns->owner->username);
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov else
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(str, "%22%22");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose hash_table_create(&mailboxes, default_pool, 0, str_hash, strcmp);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(str, "%2B(");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose len = str_len(str);
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov for (i = 0; boxes[i] != NULL; i++) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (fts_mailbox_get_guid(boxes[i], &box_guid) < 0)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose continue;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (str_len(str) != len)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append(str, "+OR+");
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_printfa(str, "box:%s", box_guid);
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov hash_table_insert(mailboxes, t_strdup_noconst(box_guid),
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose boxes[i]);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose str_append_c(str, ')');
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (solr_connection_select(solr_conn, str_c(str),
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose result->pool, &solr_results) < 0) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose hash_table_destroy(&mailboxes);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return -1;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose }
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose p_array_init(&fts_results, result->pool, 32);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose for (i = 0; solr_results[i] != NULL; i++) {
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose box = hash_table_lookup(mailboxes, solr_results[i]->box_id);
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose if (box == NULL) {
ac7a7ee3d1e138818a1ed78758f7dd3c3306a56bSumit Bose i_warning("fts_solr: Lookup returned unexpected mailbox "
ac7a7ee3d1e138818a1ed78758f7dd3c3306a56bSumit Bose "with guid=%s", solr_results[i]->box_id);
ac7a7ee3d1e138818a1ed78758f7dd3c3306a56bSumit Bose continue;
ac7a7ee3d1e138818a1ed78758f7dd3c3306a56bSumit Bose }
ac7a7ee3d1e138818a1ed78758f7dd3c3306a56bSumit Bose fts_result = array_append_space(&fts_results);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek fts_result->box = box;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek fts_result->definite_uids = solr_results[i]->uids;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek fts_result->scores = solr_results[i]->scores;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek fts_result->scores_sorted = TRUE;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek }
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek array_append_zero(&fts_results);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek result->box_results = array_idx_modifiable(&fts_results, 0);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek hash_table_destroy(&mailboxes);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek return 0;
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek}
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashovstatic int
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bosefts_backend_solr_lookup_multi(struct fts_backend *backend,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct mailbox *const boxes[],
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct mail_search_arg *args, bool and_args,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose struct fts_multi_result *result)
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose{
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose string_t *str;
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
2fc12875f7d51248799016c19c1298b85e06a286Sumit Bose str = t_str_new(256);
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek str_printfa(str, "fl=box,uid,score&rows=%u&sort=box+asc,uid+asc&q=",
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose SOLR_MAX_MULTI_ROWS);
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (solr_add_definite_query_args(str, args, and_args)) {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose if (solr_search_multi(backend, str, boxes, result) < 0)
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return -1;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose }
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose /* FIXME: maybe_uids could be handled also with some more work.. */
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose return 0;
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose}
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bosestruct fts_backend fts_backend_solr = {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose .name = "solr",
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose .flags = FTS_BACKEND_FLAG_FUZZY_SEARCH,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose {
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose fts_backend_solr_alloc,
28c70f003c7b330ab1d998a4eff1248d272a6ba9Sumit Bose fts_backend_solr_init,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose fts_backend_solr_deinit,
a3c8390d19593b1e5277d95bfb4ab206d4785150Nikolai Kondrashov fts_backend_solr_get_last_uid,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose fts_backend_solr_update_init,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose fts_backend_solr_update_deinit,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose fts_backend_solr_update_set_mailbox,
36a12aea020a935ffa40505fa02860c3d921ad0cSumit Bose fts_backend_solr_update_expunge,
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose fts_backend_solr_update_set_build_key,
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose fts_backend_solr_update_unset_build_key,
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose fts_backend_solr_update_build_more,
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose fts_backend_solr_refresh,
e2f6326ea56217afab7623c542a237ee84eb74daSumit Bose fts_backend_solr_rescan,
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek fts_backend_solr_optimize,
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek fts_backend_default_can_lookup,
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek fts_backend_solr_lookup,
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek fts_backend_solr_lookup_multi,
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek NULL
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek }
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek};
c125e741d3111e2f9b56866ba00835ca05c6f349Jakub Hrozek