fts-backend-solr.c revision 4128fe33fde3cf20665650bb2b11a6450c09a816
2454dfa32c93c20a8522c6ed42fe057baaac9f9aStephan Bosch/* Copyright (c) 2006-2012 Dovecot authors, see the included COPYING file */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "lib.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "array.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "str.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "hash.h"
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen#include "strescape.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "unichar.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "mail-storage-private.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "mailbox-list-private.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "mail-search.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "fts-api.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "solr-connection.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#include "fts-solr-plugin.h"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen#include <ctype.h>
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen#define SOLR_CMDBUF_SIZE (1024*64)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen#define SOLR_CMDBUF_FLUSH_SIZE (SOLR_CMDBUF_SIZE-128)
4ee00532a265bdfb38539d811fcd12d51210ac35Timo Sirainen#define SOLR_BUFFER_WARN_SIZE (1024*1024)
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen#define SOLR_MAX_MULTI_ROWS 100000
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen
0dffa25d211be541ee3c953b23566a1a990789dfTimo Sirainenstruct solr_fts_backend {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen struct fts_backend backend;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen};
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstruct solr_fts_field {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen char *key;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen string_t *value;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen};
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstruct solr_fts_backend_update_context {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen struct fts_backend_update_context ctx;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen struct mailbox *cur_box;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen char box_guid[MAILBOX_GUID_HEX_LENGTH+1];
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
5f44975ec6c5755dd74bcd4c47a123a7242ecab3Timo Sirainen struct solr_connection_post *post;
5f44975ec6c5755dd74bcd4c47a123a7242ecab3Timo Sirainen uint32_t prev_uid;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen string_t *cmd, *cur_value, *cur_value2;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ARRAY_DEFINE(fields, struct solr_fts_field);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen uint32_t last_indexed_uid;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen uint32_t size_warned_uid;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
d22301419109ed4a38351715e6760011421dadecTimo Sirainen unsigned int last_indexed_uid_set:1;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned int body_open:1;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned int documents_added:1;
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen unsigned int expunges:1;
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen};
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainenstatic struct solr_connection *solr_conn = NULL;
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic bool is_valid_xml_char(unichar_t chr)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen{
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* Valid characters in XML:
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen [#x10000-#x10FFFF]
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen This function gets called only for #x80 and higher */
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen if (chr > 0xd7ff && chr < 0xe000)
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen return FALSE;
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen if (chr > 0xfffd && chr < 0x10000)
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen return FALSE;
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen return chr < 0x10ffff;
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen}
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainenstatic unsigned int
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainenxml_encode_data_max(string_t *dest, const unsigned char *data, unsigned int len,
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen unsigned int max_len)
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen{
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unichar_t chr;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned int i;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen i_assert(max_len > 0 || len == 0);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (max_len > len)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen max_len = len;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen for (i = 0; i < max_len; i++) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen switch (data[i]) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen case '&':
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_append(dest, "&amp;");
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen break;
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen case '<':
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen str_append(dest, "&lt;");
5f44975ec6c5755dd74bcd4c47a123a7242ecab3Timo Sirainen break;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen case '>':
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_append(dest, "&gt;");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen break;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen case '\t':
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen case '\n':
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen case '\r':
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* exceptions to the following control char check */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_append_c(dest, data[i]);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen break;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen default:
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (data[i] < 32) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* SOLR doesn't like control characters.
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen replace them with spaces. */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_append_c(dest, ' ');
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen } else if (data[i] >= 0x80) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* make sure the character is valid for XML
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen so we don't get XML parser errors */
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen unsigned int char_len =
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen uni_utf8_char_bytes(data[i]);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (i + char_len <= len &&
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen uni_utf8_get_char_n(data + i, char_len, &chr) == 1 &&
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen is_valid_xml_char(chr))
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append_n(dest, data + i, char_len);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen else {
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append_n(dest, utf8_replacement_char,
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen UTF8_REPLACEMENT_CHAR_LEN);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen }
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen i += char_len - 1;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen } else {
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append_c(dest, data[i]);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen }
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen break;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen }
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen }
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen return i;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen}
d85a1a9d9af4a36ded4d30cb277905c807de2ec5Timo Sirainen
d85a1a9d9af4a36ded4d30cb277905c807de2ec5Timo Sirainenstatic void
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenxml_encode_data(string_t *dest, const unsigned char *data, unsigned int len)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen{
d979c1179d55ad86e40f869e48ef3e4db9c817b5Timo Sirainen (void)xml_encode_data_max(dest, data, len, len);
d979c1179d55ad86e40f869e48ef3e4db9c817b5Timo Sirainen}
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic void xml_encode(string_t *dest, const char *str)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen{
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen xml_encode_data(dest, (const unsigned char *)str, strlen(str));
abb404575a238f27ea03b6049880f30f1656ddc4Timo Sirainen}
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
d979c1179d55ad86e40f869e48ef3e4db9c817b5Timo Sirainenstatic void solr_quote_http(string_t *dest, const char *str)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen{
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append(dest, "%22");
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen solr_connection_http_escape(solr_conn, dest, str);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append(dest, "%22");
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen}
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic struct fts_backend *fts_backend_solr_alloc(void)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen{
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen struct solr_fts_backend *backend;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen backend = i_new(struct solr_fts_backend, 1);
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen backend->backend = fts_backend_solr;
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen return &backend->backend;
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen}
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainenstatic int
13e130c3af3032982de6b1d13c6dcddda9164848Timo Sirainenfts_backend_solr_init(struct fts_backend *_backend,
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen const char **error_r ATTR_UNUSED)
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen{
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen const struct fts_solr_settings *set = &fuser->set;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen if (solr_conn == NULL)
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen solr_conn = solr_connection_init(set->url, set->debug);
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen return 0;
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen}
a4f09749814b93e8ad3ec8a0dc18885b874d6f8cTimo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic void fts_backend_solr_deinit(struct fts_backend *_backend)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen{
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen
13e130c3af3032982de6b1d13c6dcddda9164848Timo Sirainen i_free(backend);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen}
13e130c3af3032982de6b1d13c6dcddda9164848Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic int
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenget_last_uid_fallback(struct fts_backend *_backend, struct mailbox *box,
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen uint32_t *last_uid_r)
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen{
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen const struct seq_range *uidvals;
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen const char *box_guid;
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen unsigned int count;
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen struct solr_result **results;
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen string_t *str;
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen pool_t pool;
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen int ret = 0;
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen str = t_str_new(256);
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen str_append(str, "fl=uid&rows=1&sort=uid+desc&q=");
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen if (fts_mailbox_get_guid(box, &box_guid) < 0)
7631f16156aca373004953fe6b01a7f343fb47e0Timo Sirainen return -1;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_printfa(str, "box:%s+user:", box_guid);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (_backend->ns->owner != NULL)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen solr_quote_http(str, _backend->ns->owner->username);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen else
7631f16156aca373004953fe6b01a7f343fb47e0Timo Sirainen str_append(str, "%22%22");
7631f16156aca373004953fe6b01a7f343fb47e0Timo Sirainen
a249dd267f05d349f1b4aa27b40a56083c8ba392Timo Sirainen pool = pool_alloconly_create("solr last uid lookup", 1024);
03010dbaa74ec70f062994dfe3cd39bedc99a28bTimo Sirainen if (solr_connection_select(solr_conn, str_c(str),
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen pool, &results) < 0)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ret = -1;
efe78d3ba24fc866af1c79b9223dc0809ba26cadStephan Bosch else if (results[0] == NULL) {
5f44975ec6c5755dd74bcd4c47a123a7242ecab3Timo Sirainen /* no UIDs */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen *last_uid_r = 0;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen } else {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen uidvals = array_get(&results[0]->uids, &count);
a249dd267f05d349f1b4aa27b40a56083c8ba392Timo Sirainen i_assert(count > 0);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen *last_uid_r = uidvals[0].seq1;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen } else {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen i_error("fts_solr: Last UID lookup returned multiple rows");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ret = -1;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen pool_unref(&pool);
a249dd267f05d349f1b4aa27b40a56083c8ba392Timo Sirainen return ret;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen}
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
7631f16156aca373004953fe6b01a7f343fb47e0Timo Sirainenstatic int
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenfts_backend_solr_get_last_uid(struct fts_backend *_backend,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen struct mailbox *box, uint32_t *last_uid_r)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen{
fe5cca45e94608d1c471990216941bf893bc8adaTimo Sirainen struct fts_index_header hdr;
fe5cca45e94608d1c471990216941bf893bc8adaTimo Sirainen
fe5cca45e94608d1c471990216941bf893bc8adaTimo Sirainen if (fts_index_get_header(box, &hdr)) {
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen *last_uid_r = hdr.last_indexed_uid;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen return 0;
fe5cca45e94608d1c471990216941bf893bc8adaTimo Sirainen }
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen /* either nothing has been indexed, or the index was corrupted.
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen do it the slow way. */
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (get_last_uid_fallback(_backend, box, last_uid_r) < 0)
fe5cca45e94608d1c471990216941bf893bc8adaTimo Sirainen return -1;
fe5cca45e94608d1c471990216941bf893bc8adaTimo Sirainen
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen (void)fts_index_set_last_uid(box, *last_uid_r);
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen return 0;
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen}
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic struct fts_backend_update_context *
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainenfts_backend_solr_update_init(struct fts_backend *_backend)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen{
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen struct solr_fts_backend_update_context *ctx;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen ctx = i_new(struct solr_fts_backend_update_context, 1);
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen ctx->ctx.backend = _backend;
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen i_array_init(&ctx->fields, 16);
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen return &ctx->ctx;
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen}
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic void xml_encode_id(struct solr_fts_backend_update_context *ctx,
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen string_t *str, uint32_t uid)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen{
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_printfa(str, "%u/%s", uid, ctx->box_guid);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (ctx->ctx.backend->ns->owner != NULL) {
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append_c(str, '/');
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen xml_encode(str, ctx->ctx.backend->ns->owner->username);
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen }
ecd69c4e8371853667e01b0c16d436ef7f7393e2Timo Sirainen}
ecd69c4e8371853667e01b0c16d436ef7f7393e2Timo Sirainen
ecd69c4e8371853667e01b0c16d436ef7f7393e2Timo Sirainenstatic void
ecd69c4e8371853667e01b0c16d436ef7f7393e2Timo Sirainenfts_backend_solr_doc_open(struct solr_fts_backend_update_context *ctx,
ecd69c4e8371853667e01b0c16d436ef7f7393e2Timo Sirainen uint32_t uid)
ecd69c4e8371853667e01b0c16d436ef7f7393e2Timo Sirainen{
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen ctx->documents_added = TRUE;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen str_printfa(ctx->cmd, "<doc>"
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen "<field name=\"uid\">%u</field>"
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen "<field name=\"box\">%s</field>",
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen uid, ctx->box_guid);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append(ctx->cmd, "<field name=\"user\">");
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen if (ctx->ctx.backend->ns->owner != NULL)
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen xml_encode(ctx->cmd, ctx->ctx.backend->ns->owner->username);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_append(ctx->cmd, "</field>");
c0225f7f6b43d34dc58c17d3304f0fd60ab89894Timo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_printfa(ctx->cmd, "<field name=\"id\">");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen xml_encode_id(ctx, ctx->cmd, uid);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append(ctx->cmd, "</field>");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen}
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainenstatic string_t *
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenfts_solr_field_get(struct solr_fts_backend_update_context *ctx, const char *key)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen{
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen const struct solr_fts_field *field;
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen struct solr_fts_field new_field;
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* there are only a few fields. this lookup is fast enough. */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen array_foreach(&ctx->fields, field) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (strcasecmp(field->key, key) == 0)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen return field->value;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen memset(&new_field, 0, sizeof(new_field));
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen new_field.key = str_lcase(i_strdup(key));
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen new_field.value = str_new(default_pool, 128);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen array_append(&ctx->fields, &new_field, 1);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen return new_field.value;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen}
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainenstatic void
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenfts_backend_solr_doc_close(struct solr_fts_backend_update_context *ctx)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen{
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen struct solr_fts_field *field;
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen if (ctx->body_open) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->body_open = FALSE;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_append(ctx->cmd, "</field>");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen array_foreach_modifiable(&ctx->fields, field) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_printfa(ctx->cmd, "<field name=\"%s\">", field->key);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append_str(ctx->cmd, field->value);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_append(ctx->cmd, "</field>");
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_truncate(field->value, 0);
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen }
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append(ctx->cmd, "</doc>");
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen}
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen
942302b0247403645394d848b3c620ead262a2a5Timo Sirainenstatic int
942302b0247403645394d848b3c620ead262a2a5Timo Sirainenfts_backed_solr_build_commit(struct solr_fts_backend_update_context *ctx)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen{
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (ctx->post == NULL)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen return 0;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen fts_backend_solr_doc_close(ctx);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append(ctx->cmd, "</add>");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen solr_connection_post_more(ctx->post, str_data(ctx->cmd),
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen str_len(ctx->cmd));
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen return solr_connection_post_end(ctx->post);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen}
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen
942302b0247403645394d848b3c620ead262a2a5Timo Sirainenstatic int
942302b0247403645394d848b3c620ead262a2a5Timo Sirainenfts_backend_solr_update_deinit(struct fts_backend_update_context *_ctx)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen{
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen struct solr_fts_backend_update_context *ctx =
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen (struct solr_fts_backend_update_context *)_ctx;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen struct solr_fts_field *field;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen const char *str;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen int ret = _ctx->failed ? -1 : 0;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (fts_backed_solr_build_commit(ctx) < 0)
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen ret = -1;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (ctx->documents_added || ctx->expunges) {
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen /* commit and wait until the documents we just indexed are
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen visible to the following search */
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen str = t_strdup_printf("<commit waitFlush=\"false\" "
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen "waitSearcher=\"%s\"/>",
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->documents_added ? "true" : "false");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (solr_connection_post(solr_conn, str) < 0)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ret = -1;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_free(&ctx->cmd);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen array_foreach_modifiable(&ctx->fields, field) {
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen str_free(&field->value);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen i_free(field->key);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen }
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen array_free(&ctx->fields);
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen i_free(ctx);
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen return ret;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen}
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic void
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenfts_backend_solr_update_set_mailbox(struct fts_backend_update_context *_ctx,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen struct mailbox *box)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen{
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen struct solr_fts_backend_update_context *ctx =
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen const char *box_guid;
45d47bc4660fe4bbb07817c9580deef9cca63646Timo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (ctx->prev_uid != 0) {
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen (void)fts_index_set_last_uid(ctx->cur_box, ctx->prev_uid);
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen ctx->prev_uid = 0;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen }
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
45d47bc4660fe4bbb07817c9580deef9cca63646Timo Sirainen if (box != NULL) {
45d47bc4660fe4bbb07817c9580deef9cca63646Timo Sirainen if (fts_mailbox_get_guid(box, &box_guid) < 0)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen _ctx->failed = TRUE;
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen i_assert(strlen(box_guid) == sizeof(ctx->box_guid)-1);
45d47bc4660fe4bbb07817c9580deef9cca63646Timo Sirainen memcpy(ctx->box_guid, box_guid, sizeof(ctx->box_guid)-1);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen } else {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen memset(ctx->box_guid, 0, sizeof(ctx->box_guid));
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->cur_box = box;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen}
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic void
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenfts_backend_solr_update_expunge(struct fts_backend_update_context *_ctx,
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen uint32_t uid)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen{
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen struct solr_fts_backend_update_context *ctx =
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen struct fts_index_header hdr;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (!ctx->last_indexed_uid_set) {
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen if (!fts_index_get_header(ctx->cur_box, &hdr))
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen ctx->last_indexed_uid = 0;
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen else
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->last_indexed_uid = hdr.last_indexed_uid;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->last_indexed_uid_set = TRUE;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (ctx->last_indexed_uid == 0 ||
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen uid > ctx->last_indexed_uid + 100) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* don't waste time asking Solr to expunge a message that is
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen highly unlikely to be indexed at this time. */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen return;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen }
c8d1ccf9dec39e9e145d55b472eee43d95764189Timo Sirainen ctx->expunges = TRUE;
c8d1ccf9dec39e9e145d55b472eee43d95764189Timo Sirainen
c8d1ccf9dec39e9e145d55b472eee43d95764189Timo Sirainen T_BEGIN {
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen string_t *cmd;
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen cmd = t_str_new(256);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append(cmd, "<delete><id>");
a4f09749814b93e8ad3ec8a0dc18885b874d6f8cTimo Sirainen xml_encode_id(ctx, cmd, uid);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append(cmd, "</id></delete>");
c8d1ccf9dec39e9e145d55b472eee43d95764189Timo Sirainen
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen (void)solr_connection_post(solr_conn, str_c(cmd));
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen } T_END;
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen}
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen
942302b0247403645394d848b3c620ead262a2a5Timo Sirainenstatic void
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenfts_backend_solr_uid_changed(struct solr_fts_backend_update_context *ctx,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen uint32_t uid)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen{
6df0ab0c1ab91f06b6418cb30eff44405a1b8f02Timo Sirainen if (ctx->post == NULL) {
6df0ab0c1ab91f06b6418cb30eff44405a1b8f02Timo Sirainen i_assert(ctx->prev_uid == 0);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->post = solr_connection_post_begin(solr_conn);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_append(ctx->cmd, "<add>");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen } else {
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi fts_backend_solr_doc_close(ctx);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen }
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi ctx->prev_uid = uid;
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi fts_backend_solr_doc_open(ctx, uid);
6df0ab0c1ab91f06b6418cb30eff44405a1b8f02Timo Sirainen}
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic bool
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainenfts_backend_solr_update_set_build_key(struct fts_backend_update_context *_ctx,
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi const struct fts_backend_build_key *key)
6df0ab0c1ab91f06b6418cb30eff44405a1b8f02Timo Sirainen{
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi struct solr_fts_backend_update_context *ctx =
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi (struct solr_fts_backend_update_context *)_ctx;
6df0ab0c1ab91f06b6418cb30eff44405a1b8f02Timo Sirainen
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi if (key->uid != ctx->prev_uid)
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi fts_backend_solr_uid_changed(ctx, key->uid);
6df0ab0c1ab91f06b6418cb30eff44405a1b8f02Timo Sirainen
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi switch (key->type) {
6df0ab0c1ab91f06b6418cb30eff44405a1b8f02Timo Sirainen case FTS_BACKEND_BUILD_KEY_HDR:
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (fts_header_want_indexed(key->hdr_name)) {
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen ctx->cur_value2 =
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen fts_solr_field_get(ctx, key->hdr_name);
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* fall through */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen case FTS_BACKEND_BUILD_KEY_MIME_HDR:
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->cur_value = fts_solr_field_get(ctx, "hdr");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen xml_encode(ctx->cur_value, key->hdr_name);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_append(ctx->cur_value, ": ");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen break;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen case FTS_BACKEND_BUILD_KEY_BODY_PART:
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (!ctx->body_open) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->body_open = TRUE;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str_append(ctx->cmd, "<field name=\"body\">");
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen }
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen ctx->cur_value = ctx->cmd;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen break;
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY:
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen i_unreached();
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen return TRUE;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen}
21aaa6affb9f134112b75b5db737309fc35ef1cfMartti Rannanjärvi
21aaa6affb9f134112b75b5db737309fc35ef1cfMartti Rannanjärvistatic void
9af6cc9ebc9986c1275ebdfa29c39e152af1557eTimo Sirainenfts_backend_solr_update_unset_build_key(struct fts_backend_update_context *_ctx)
9af6cc9ebc9986c1275ebdfa29c39e152af1557eTimo Sirainen{
21aaa6affb9f134112b75b5db737309fc35ef1cfMartti Rannanjärvi struct solr_fts_backend_update_context *ctx =
9af6cc9ebc9986c1275ebdfa29c39e152af1557eTimo Sirainen (struct solr_fts_backend_update_context *)_ctx;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
21aaa6affb9f134112b75b5db737309fc35ef1cfMartti Rannanjärvi /* There can be multiple duplicate keys (duplicate header lines,
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen multiple MIME body parts). Make sure they are separated by
21aaa6affb9f134112b75b5db737309fc35ef1cfMartti Rannanjärvi whitespace. */
21aaa6affb9f134112b75b5db737309fc35ef1cfMartti Rannanjärvi str_append_c(ctx->cur_value, '\n');
21aaa6affb9f134112b75b5db737309fc35ef1cfMartti Rannanjärvi ctx->cur_value = NULL;
9af6cc9ebc9986c1275ebdfa29c39e152af1557eTimo Sirainen if (ctx->cur_value2 != NULL) {
9af6cc9ebc9986c1275ebdfa29c39e152af1557eTimo Sirainen str_append_c(ctx->cur_value2, '\n');
225e82df5dd1e765f4e52b80c954558f00e5a7dfTimo Sirainen ctx->cur_value2 = NULL;
225e82df5dd1e765f4e52b80c954558f00e5a7dfTimo Sirainen }
225e82df5dd1e765f4e52b80c954558f00e5a7dfTimo Sirainen}
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen
225e82df5dd1e765f4e52b80c954558f00e5a7dfTimo Sirainenstatic int
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenfts_backend_solr_update_build_more(struct fts_backend_update_context *_ctx,
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen const unsigned char *data, size_t size)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen{
225e82df5dd1e765f4e52b80c954558f00e5a7dfTimo Sirainen struct solr_fts_backend_update_context *ctx =
225e82df5dd1e765f4e52b80c954558f00e5a7dfTimo Sirainen (struct solr_fts_backend_update_context *)_ctx;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned int len;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (_ctx->failed)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen return -1;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (ctx->cur_value2 == NULL && ctx->cur_value == ctx->cmd) {
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen /* we're writing to message body. if size is huge,
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen flush it once in a while */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen while (size >= SOLR_CMDBUF_FLUSH_SIZE) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (str_len(ctx->cmd) >= SOLR_CMDBUF_FLUSH_SIZE) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen solr_connection_post_more(ctx->post,
0206dc57f2c04da69599dea5816235cfeb2b897aMartti Rannanjärvi str_data(ctx->cmd),
0206dc57f2c04da69599dea5816235cfeb2b897aMartti Rannanjärvi str_len(ctx->cmd));
0206dc57f2c04da69599dea5816235cfeb2b897aMartti Rannanjärvi str_truncate(ctx->cmd, 0);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen len = xml_encode_data_max(ctx->cmd, data, size,
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen SOLR_CMDBUF_FLUSH_SIZE -
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_len(ctx->cmd));
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen i_assert(len > 0);
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen i_assert(len <= size);
0206dc57f2c04da69599dea5816235cfeb2b897aMartti Rannanjärvi data += len;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen size -= len;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen xml_encode_data(ctx->cmd, data, size);
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen } else {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen xml_encode_data(ctx->cur_value, data, size);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (ctx->cur_value2 != NULL)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen xml_encode_data(ctx->cur_value2, data, size);
fe5cca45e94608d1c471990216941bf893bc8adaTimo Sirainen }
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen if (str_len(ctx->cmd) >= SOLR_CMDBUF_FLUSH_SIZE) {
ecd69c4e8371853667e01b0c16d436ef7f7393e2Timo Sirainen solr_connection_post_more(ctx->post, str_data(ctx->cmd),
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_len(ctx->cmd));
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_truncate(ctx->cmd, 0);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (str_len(ctx->cur_value) >= SOLR_BUFFER_WARN_SIZE &&
db0735f9b388c5bcfb781b1b25015e898d63d953Timo Sirainen ctx->size_warned_uid != ctx->prev_uid) {
f46885a5b78b15a8d2419f6e5d13b643bd85e41fTimo Sirainen /* a large header */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen i_assert(ctx->cur_value != ctx->cmd);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->size_warned_uid = ctx->prev_uid;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen i_warning("fts-solr(%s): Mailbox %s UID=%u header size is huge",
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->cur_box->storage->user->username,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen mailbox_get_vname(ctx->cur_box), ctx->prev_uid);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen }
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen return 0;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen}
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
306b3f41b05da642d87e7ca7a1496efce9f5902fTimo Sirainenstatic int fts_backend_solr_refresh(struct fts_backend *backend ATTR_UNUSED)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen{
21aaa6affb9f134112b75b5db737309fc35ef1cfMartti Rannanjärvi return 0;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen}
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen
ad48319996942463675b53877092ab7e13a7a75aTimo Sirainenstatic int fts_backend_solr_rescan(struct fts_backend *backend)
f46885a5b78b15a8d2419f6e5d13b643bd85e41fTimo Sirainen{
225e82df5dd1e765f4e52b80c954558f00e5a7dfTimo Sirainen struct mailbox_list_iterate_context *iter;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen const struct mailbox_info *info;
6564208826b0f46a00f010d1b5711d85944c3c88Timo Sirainen struct mailbox *box;
6de6ec228a41275ddda972d4a554699ea75cd06dTimo Sirainen int ret = 0;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen
/* FIXME: proper rescan needed. for now we'll just reset the
last-uids */
iter = mailbox_list_iter_init(backend->ns->list, "*",
MAILBOX_LIST_ITER_NO_AUTO_BOXES);
while ((info = mailbox_list_iter_next(iter)) != NULL) {
if ((info->flags &
(MAILBOX_NONEXISTENT | MAILBOX_NOSELECT)) != 0)
continue;
box = mailbox_alloc(info->ns->list, info->name, 0);
if (mailbox_open(box) == 0) {
if (fts_index_set_last_uid(box, 0) < 0)
ret = -1;
}
mailbox_free(&box);
}
if (mailbox_list_iter_deinit(&iter) < 0)
ret = -1;
return ret;
}
static int fts_backend_solr_optimize(struct fts_backend *backend ATTR_UNUSED)
{
return 0;
}
static bool solr_need_escaping(const char *str)
{
const char *solr_escape_chars = "+-&|!(){}[]^\"~*?:\\ ";
for (; *str != '\0'; str++) {
if (strchr(solr_escape_chars, *str) != NULL)
return TRUE;
}
return FALSE;
}
static void solr_add_str_arg(string_t *str, struct mail_search_arg *arg)
{
/* currently we'll just disable fuzzy searching if there are any
parameters that need escaping. solr doesn't seem to give good
fuzzy results even if we did escape them.. */
if (!arg->fuzzy || solr_need_escaping(arg->value.str))
solr_quote_http(str, arg->value.str);
else {
str_append(str, arg->value.str);
str_append_c(str, '~');
}
}
static bool
solr_add_definite_query(string_t *str, struct mail_search_arg *arg)
{
switch (arg->type) {
case SEARCH_TEXT: {
if (arg->match_not)
str_append_c(str, '-');
str_append(str, "(hdr:");
solr_add_str_arg(str, arg);
str_append(str, "+OR+body:");
solr_add_str_arg(str, arg);
str_append(str, ")");
break;
}
case SEARCH_BODY:
if (arg->match_not)
str_append_c(str, '-');
str_append(str, "body:");
solr_add_str_arg(str, arg);
break;
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
case SEARCH_HEADER_COMPRESS_LWSP:
if (!fts_header_want_indexed(arg->hdr_field_name))
return FALSE;
if (arg->match_not)
str_append_c(str, '-');
str_append(str, t_str_lcase(arg->hdr_field_name));
str_append_c(str, ':');
solr_add_str_arg(str, arg);
break;
default:
return FALSE;
}
return TRUE;
}
static bool
solr_add_definite_query_args(string_t *str, struct mail_search_arg *arg,
bool and_args)
{
unsigned int last_len;
last_len = str_len(str);
for (; arg != NULL; arg = arg->next) {
if (solr_add_definite_query(str, arg)) {
arg->match_always = TRUE;
last_len = str_len(str);
if (and_args)
str_append(str, "+AND+");
else
str_append(str, "+OR+");
}
}
if (str_len(str) == last_len)
return FALSE;
str_truncate(str, last_len);
return TRUE;
}
static bool
solr_add_maybe_query(string_t *str, struct mail_search_arg *arg)
{
switch (arg->type) {
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
case SEARCH_HEADER_COMPRESS_LWSP:
if (fts_header_want_indexed(arg->hdr_field_name))
return FALSE;
if (arg->match_not) {
/* all matches would be definite, but all non-matches
would be maybies. too much trouble to optimize. */
return FALSE;
}
/* we can check if the search key exists in some header and
filter out the messages that have no chance of matching */
str_append(str, "hdr:");
if (*arg->value.str != '\0')
solr_quote_http(str, arg->value.str);
else {
/* checking potential existence of the header name */
solr_quote_http(str, t_str_lcase(arg->hdr_field_name));
}
break;
default:
return FALSE;
}
return TRUE;
}
static bool
solr_add_maybe_query_args(string_t *str, struct mail_search_arg *arg,
bool and_args)
{
unsigned int last_len;
last_len = str_len(str);
for (; arg != NULL; arg = arg->next) {
if (solr_add_maybe_query(str, arg)) {
arg->match_always = TRUE;
last_len = str_len(str);
if (and_args)
str_append(str, "+AND+");
else
str_append(str, "+OR+");
}
}
if (str_len(str) == last_len)
return FALSE;
str_truncate(str, last_len);
return TRUE;
}
static int solr_search(struct fts_backend *_backend, string_t *str,
const char *box_guid, ARRAY_TYPE(seq_range) *uids_r,
ARRAY_TYPE(fts_score_map) *scores_r)
{
pool_t pool = pool_alloconly_create("fts solr search", 1024);
struct solr_result **results;
int ret;
/* use a separate filter query for selecting the mailbox. it shouldn't
affect the score and there could be some caching benefits too. */
str_printfa(str, "&fq=%%2Bbox:%s+%%2Buser:", box_guid);
if (_backend->ns->owner != NULL)
solr_quote_http(str, _backend->ns->owner->username);
else
str_append(str, "%22%22");
ret = solr_connection_select(solr_conn, str_c(str), pool, &results);
if (ret == 0 && results[0] != NULL) {
array_append_array(uids_r, &results[0]->uids);
array_append_array(scores_r, &results[0]->scores);
}
pool_unref(&pool);
return ret;
}
static int
fts_backend_solr_lookup(struct fts_backend *_backend, struct mailbox *box,
struct mail_search_arg *args, bool and_args,
struct fts_result *result)
{
struct mailbox_status status;
string_t *str;
const char *box_guid;
unsigned int prefix_len;
if (fts_mailbox_get_guid(box, &box_guid) < 0)
return -1;
mailbox_get_open_status(box, STATUS_UIDNEXT, &status);
str = t_str_new(256);
str_printfa(str, "fl=uid,score&rows=%u&sort=uid+asc&q=",
status.uidnext);
prefix_len = str_len(str);
if (solr_add_definite_query_args(str, args, and_args)) {
if (solr_search(_backend, str, box_guid,
&result->definite_uids, &result->scores) < 0)
return -1;
}
str_truncate(str, prefix_len);
if (solr_add_maybe_query_args(str, args, and_args)) {
if (solr_search(_backend, str, box_guid,
&result->maybe_uids, &result->scores) < 0)
return -1;
}
result->scores_sorted = TRUE;
return 0;
}
static int
solr_search_multi(struct fts_backend *_backend, string_t *str,
struct mailbox *const boxes[],
struct fts_multi_result *result)
{
struct solr_result **solr_results;
struct fts_result *fts_result;
ARRAY_DEFINE(fts_results, struct fts_result);
struct hash_table *mailboxes;
struct mailbox *box;
const char *box_guid;
unsigned int i, len;
/* use a separate filter query for selecting the mailbox. it shouldn't
affect the score and there could be some caching benefits too. */
str_append(str, "&fq=%2Buser:");
if (_backend->ns->owner != NULL)
solr_quote_http(str, _backend->ns->owner->username);
else
str_append(str, "%22%22");
mailboxes = hash_table_create(default_pool, default_pool, 0,
str_hash, (hash_cmp_callback_t *)strcmp);
str_append(str, "%2B(");
len = str_len(str);
for (i = 0; boxes[i] != NULL; i++) {
if (fts_mailbox_get_guid(boxes[i], &box_guid) < 0)
continue;
if (str_len(str) != len)
str_append(str, "+OR+");
str_printfa(str, "box:%s", box_guid);
hash_table_insert(mailboxes, t_strdup_noconst(box_guid),
boxes[i]);
}
str_append_c(str, ')');
if (solr_connection_select(solr_conn, str_c(str),
result->pool, &solr_results) < 0) {
hash_table_destroy(&mailboxes);
return -1;
}
p_array_init(&fts_results, result->pool, 32);
for (i = 0; solr_results[i] != NULL; i++) {
box = hash_table_lookup(mailboxes, solr_results[i]->box_id);
if (box == NULL) {
i_warning("fts_solr: Lookup returned unexpected mailbox "
"with guid=%s", solr_results[i]->box_id);
continue;
}
fts_result = array_append_space(&fts_results);
fts_result->box = box;
fts_result->definite_uids = solr_results[i]->uids;
fts_result->scores = solr_results[i]->scores;
fts_result->scores_sorted = TRUE;
}
(void)array_append_space(&fts_results);
result->box_results = array_idx_modifiable(&fts_results, 0);
hash_table_destroy(&mailboxes);
return 0;
}
static int
fts_backend_solr_lookup_multi(struct fts_backend *backend,
struct mailbox *const boxes[],
struct mail_search_arg *args, bool and_args,
struct fts_multi_result *result)
{
string_t *str;
str = t_str_new(256);
str_printfa(str, "fl=box,uid,score&rows=%u&sort=box+asc,uid+asc&q=",
SOLR_MAX_MULTI_ROWS);
if (solr_add_definite_query_args(str, args, and_args)) {
if (solr_search_multi(backend, str, boxes, result) < 0)
return -1;
}
/* FIXME: maybe_uids could be handled also with some more work.. */
return 0;
}
struct fts_backend fts_backend_solr = {
.name = "solr",
.flags = FTS_BACKEND_FLAG_FUZZY_SEARCH,
{
fts_backend_solr_alloc,
fts_backend_solr_init,
fts_backend_solr_deinit,
fts_backend_solr_get_last_uid,
fts_backend_solr_update_init,
fts_backend_solr_update_deinit,
fts_backend_solr_update_set_mailbox,
fts_backend_solr_update_expunge,
fts_backend_solr_update_set_build_key,
fts_backend_solr_update_unset_build_key,
fts_backend_solr_update_build_more,
fts_backend_solr_refresh,
fts_backend_solr_rescan,
fts_backend_solr_optimize,
fts_backend_default_can_lookup,
fts_backend_solr_lookup,
fts_backend_solr_lookup_multi,
NULL
}
};