fts-backend-solr.c revision 1e60d516e91238b41c951009729f5703eca49211
45312f52ff3a3d4c137447be4c7556500c2f8bf2Timo Sirainen/* Copyright (c) 2006-2015 Dovecot authors, see the included COPYING file */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#include "lib.h"
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen#include "array.h"
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#include "str.h"
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen#include "hash.h"
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#include "strescape.h"
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#include "unichar.h"
0371406d952fe51367c7be91703e5634b7d9d225Timo Sirainen#include "http-url.h"
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#include "mail-storage-private.h"
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#include "mailbox-list-private.h"
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#include "mail-search.h"
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#include "fts-api.h"
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen#include "solr-connection.h"
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen#include "fts-solr-plugin.h"
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#include <ctype.h>
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#define SOLR_CMDBUF_SIZE (1024*64)
22627da0fb77c1d0d9a8e8bc485ef5540b6f2e69Timo Sirainen#define SOLR_CMDBUF_FLUSH_SIZE (SOLR_CMDBUF_SIZE-128)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#define SOLR_MAX_MULTI_ROWS 100000
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen/* If header is larger than this, truncate it. */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#define SOLR_HEADER_MAX_SIZE (1024*1024)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen/* If SOLR_HEADER_MAX_SIZE was already reached, write still to individual
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen header fields as long as they're smaller than this */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen#define SOLR_HEADER_LINE_MAX_TRUNC_SIZE 1024
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainenstruct solr_fts_backend {
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen struct fts_backend backend;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen struct solr_connection *solr_conn;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen};
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstruct solr_fts_field {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen char *key;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen string_t *value;
1285518f4f8905f22f5812d022a9f75b51752ed4Timo Sirainen};
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstruct solr_fts_backend_update_context {
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen struct fts_backend_update_context ctx;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen struct mailbox *cur_box;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen char box_guid[MAILBOX_GUID_HEX_LENGTH+1];
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_connection_post *post;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen uint32_t prev_uid;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen string_t *cmd, *cur_value, *cur_value2;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen string_t *cmd_expunge;
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen ARRAY(struct solr_fts_field) fields;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen uint32_t last_indexed_uid;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen unsigned int tokenized_input:1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen unsigned int last_indexed_uid_set:1;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen unsigned int body_open:1;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen unsigned int documents_added:1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen unsigned int expunges:1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen unsigned int truncate_header:1;
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen};
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic bool is_valid_xml_char(unichar_t chr)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* Valid characters in XML:
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen [#x10000-#x10FFFF]
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen This function gets called only for #x80 and higher */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (chr > 0xd7ff && chr < 0xe000)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return FALSE;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen if (chr > 0xfffd && chr < 0x10000)
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen return FALSE;
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainen return chr < 0x10ffff;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen}
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainenstatic unsigned int
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainenxml_encode_data_max(string_t *dest, const unsigned char *data, unsigned int len,
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen unsigned int max_len)
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen{
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen unichar_t chr;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen unsigned int i;
f4c0b1874b0533bcf2df1d28d584ff02cfdae3faTimo Sirainen
f4c0b1874b0533bcf2df1d28d584ff02cfdae3faTimo Sirainen i_assert(max_len > 0 || len == 0);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen if (max_len > len)
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen max_len = len;
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen for (i = 0; i < max_len; i++) {
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen switch (data[i]) {
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen case '&':
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen str_append(dest, "&amp;");
0371406d952fe51367c7be91703e5634b7d9d225Timo Sirainen break;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen case '<':
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(dest, "&lt;");
336805469db97f2d67c6a8d8fdd91fb48ec6e2b3Timo Sirainen break;
336805469db97f2d67c6a8d8fdd91fb48ec6e2b3Timo Sirainen case '>':
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen str_append(dest, "&gt;");
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen break;
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen case '\t':
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen case '\n':
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen case '\r':
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen /* exceptions to the following control char check */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append_c(dest, data[i]);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen break;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen default:
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (data[i] < 32) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* SOLR doesn't like control characters.
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen replace them with spaces. */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append_c(dest, ' ');
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen } else if (data[i] >= 0x80) {
a54fa00087ba926a3d966a8449d8d7579e89911cTimo Sirainen /* make sure the character is valid for XML
a54fa00087ba926a3d966a8449d8d7579e89911cTimo Sirainen so we don't get XML parser errors */
a54fa00087ba926a3d966a8449d8d7579e89911cTimo Sirainen unsigned int char_len =
a54fa00087ba926a3d966a8449d8d7579e89911cTimo Sirainen uni_utf8_char_bytes(data[i]);
a54fa00087ba926a3d966a8449d8d7579e89911cTimo Sirainen if (i + char_len <= len &&
a54fa00087ba926a3d966a8449d8d7579e89911cTimo Sirainen uni_utf8_get_char_n(data + i, char_len, &chr) == 1 &&
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen is_valid_xml_char(chr))
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append_n(dest, data + i, char_len);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen else {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append_n(dest, utf8_replacement_char,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen UTF8_REPLACEMENT_CHAR_LEN);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen i += char_len - 1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen } else {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append_c(dest, data[i]);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen break;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return i;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic void
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenxml_encode_data(string_t *dest, const unsigned char *data, unsigned int len)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen (void)xml_encode_data_max(dest, data, len, len);
2cfe9983ce7a6280636ee12beccc2e865111967bTimo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic void xml_encode(string_t *dest, const char *str)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen xml_encode_data(dest, (const unsigned char *)str, strlen(str));
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic void solr_quote_http(string_t *dest, const char *str)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(dest, "%22");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen http_url_escape_param(dest, str);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(dest, "%22");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic struct fts_backend *fts_backend_solr_alloc(void)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend *backend;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen backend = i_new(struct solr_fts_backend, 1);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen backend->backend = fts_backend_solr;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return &backend->backend;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic int
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenfts_backend_solr_init(struct fts_backend *_backend, const char **error_r)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen if (fuser == NULL) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen *error_r = "Invalid fts_solr setting";
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return -1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (fuser->set.use_libfts) {
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen /* change our flags so we get proper input */
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen _backend->flags &= ~FTS_BACKEND_FLAG_FUZZY_SEARCH;
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen _backend->flags |= FTS_BACKEND_FLAG_TOKENIZED_INPUT;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return solr_connection_init(fuser->set.url, fuser->set.debug,
a0b89f3b1df99b3a32f44623f13ad1893118825bTimo Sirainen &backend->solr_conn, error_r);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
a0b89f3b1df99b3a32f44623f13ad1893118825bTimo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic void fts_backend_solr_deinit(struct fts_backend *_backend)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen i_free(backend);
a0b89f3b1df99b3a32f44623f13ad1893118825bTimo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic int
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenget_last_uid_fallback(struct fts_backend *_backend, struct mailbox *box,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen uint32_t *last_uid_r)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen const struct seq_range *uidvals;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen const char *box_guid;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen unsigned int count;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_result **results;
a0b89f3b1df99b3a32f44623f13ad1893118825bTimo Sirainen string_t *str;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen pool_t pool;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen int ret = 0;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str = t_str_new(256);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(str, "fl=uid&rows=1&sort=uid+desc&q=");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
a0b89f3b1df99b3a32f44623f13ad1893118825bTimo Sirainen if (fts_mailbox_get_guid(box, &box_guid) < 0)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return -1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_printfa(str, "box:%s+user:", box_guid);
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen if (_backend->ns->owner != NULL)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen solr_quote_http(str, _backend->ns->owner->username);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen else
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(str, "%22%22");
944a12ae4f453cc3f8a25f1e9047a5094fdfe828Timo Sirainen
944a12ae4f453cc3f8a25f1e9047a5094fdfe828Timo Sirainen pool = pool_alloconly_create("solr last uid lookup", 1024);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (solr_connection_select(backend->solr_conn, str_c(str),
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen pool, &results) < 0)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ret = -1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen else if (results[0] == NULL) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* no UIDs */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen *last_uid_r = 0;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen } else {
a94936bafd127680184da114c6a177b37ff656e5Timo Sirainen uidvals = array_get(&results[0]->uids, &count);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen i_assert(count > 0);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen *last_uid_r = uidvals[0].seq1;
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen } else {
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen i_error("fts_solr: Last UID lookup returned multiple rows");
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen ret = -1;
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen }
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen }
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen pool_unref(&pool);
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen return ret;
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen}
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainenstatic int
5fe06fea9fee0f5e4e9cb49f6866877223f78b85Timo Sirainenfts_backend_solr_get_last_uid(struct fts_backend *_backend,
d16b506f5540e3407d256bda35624b38a5ecf88fTimo Sirainen struct mailbox *box, uint32_t *last_uid_r)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen struct fts_index_header hdr;
22627da0fb77c1d0d9a8e8bc485ef5540b6f2e69Timo Sirainen
3e28b527dd6048a40684afd29cff0ee008fc0014Timo Sirainen if (fts_index_get_header(box, &hdr)) {
3e28b527dd6048a40684afd29cff0ee008fc0014Timo Sirainen *last_uid_r = hdr.last_indexed_uid;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return 0;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
ccffb125d94adff0ad776de5a96e22f864d6fb0aTimo Sirainen
43d32cbe60fdaef2699d99f1ca259053e9350411Timo Sirainen /* either nothing has been indexed, or the index was corrupted.
ccffb125d94adff0ad776de5a96e22f864d6fb0aTimo Sirainen do it the slow way. */
ccffb125d94adff0ad776de5a96e22f864d6fb0aTimo Sirainen if (get_last_uid_fallback(_backend, box, last_uid_r) < 0)
ccffb125d94adff0ad776de5a96e22f864d6fb0aTimo Sirainen return -1;
ccffb125d94adff0ad776de5a96e22f864d6fb0aTimo Sirainen
2cfe9983ce7a6280636ee12beccc2e865111967bTimo Sirainen fts_index_set_last_uid(box, *last_uid_r);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return 0;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
b221779c191d1fb5fa7eb03907e62d39d1edeb08Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic struct fts_backend_update_context *
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenfts_backend_solr_update_init(struct fts_backend *_backend)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend_update_context *ctx;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx = i_new(struct solr_fts_backend_update_context, 1);
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen ctx->ctx.backend = _backend;
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen ctx->tokenized_input =
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen (_backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0;
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen i_array_init(&ctx->fields, 16);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return &ctx->ctx;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic void xml_encode_id(struct solr_fts_backend_update_context *ctx,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen string_t *str, uint32_t uid)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_printfa(str, "%u/%s", uid, ctx->box_guid);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (ctx->ctx.backend->ns->owner != NULL) {
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_append_c(str, '/');
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen xml_encode(str, ctx->ctx.backend->ns->owner->username);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen }
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen}
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic void
a1aaf11831cab8346d6d0dc702e37b3f1d95eb43Timo Sirainenfts_backend_solr_doc_open(struct solr_fts_backend_update_context *ctx,
a1aaf11831cab8346d6d0dc702e37b3f1d95eb43Timo Sirainen uint32_t uid)
a1aaf11831cab8346d6d0dc702e37b3f1d95eb43Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->documents_added = TRUE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_printfa(ctx->cmd, "<doc>"
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen "<field name=\"uid\">%u</field>"
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen "<field name=\"box\">%s</field>",
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen uid, ctx->box_guid);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(ctx->cmd, "<field name=\"user\">");
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen if (ctx->ctx.backend->ns->owner != NULL)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen xml_encode(ctx->cmd, ctx->ctx.backend->ns->owner->username);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(ctx->cmd, "</field>");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_printfa(ctx->cmd, "<field name=\"id\">");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen xml_encode_id(ctx, ctx->cmd, uid);
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen str_append(ctx->cmd, "</field>");
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen}
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainenstatic string_t *
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainenfts_solr_field_get(struct solr_fts_backend_update_context *ctx, const char *key)
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen{
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen const struct solr_fts_field *field;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen struct solr_fts_field new_field;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen /* there are only a few fields. this lookup is fast enough. */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen array_foreach(&ctx->fields, field) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (strcasecmp(field->key, key) == 0)
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen return field->value;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen memset(&new_field, 0, sizeof(new_field));
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen new_field.key = str_lcase(i_strdup(key));
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen new_field.value = str_new(default_pool, 128);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen array_append(&ctx->fields, &new_field, 1);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen return new_field.value;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen}
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainenstatic void
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainenfts_backend_solr_doc_close(struct solr_fts_backend_update_context *ctx)
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen{
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen struct solr_fts_field *field;
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen if (ctx->body_open) {
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen ctx->body_open = FALSE;
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen str_append(ctx->cmd, "</field>");
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen }
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen array_foreach_modifiable(&ctx->fields, field) {
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen str_printfa(ctx->cmd, "<field name=\"%s\">", field->key);
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen xml_encode_data(ctx->cmd, str_data(field->value), str_len(field->value));
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen str_append(ctx->cmd, "</field>");
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_truncate(field->value, 0);
19e8adccba16ff419f5675b1575358c2956dce83Timo Sirainen }
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen str_append(ctx->cmd, "</doc>");
19e8adccba16ff419f5675b1575358c2956dce83Timo Sirainen}
46744f1c9837f189e1c1b13e4d83231b3c9dfff6Timo Sirainen
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainenstatic int
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainenfts_backed_solr_build_commit(struct solr_fts_backend_update_context *ctx)
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen{
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen if (ctx->post == NULL)
b221779c191d1fb5fa7eb03907e62d39d1edeb08Timo Sirainen return 0;
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen
b221779c191d1fb5fa7eb03907e62d39d1edeb08Timo Sirainen fts_backend_solr_doc_close(ctx);
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen str_append(ctx->cmd, "</add>");
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen solr_connection_post_more(ctx->post, str_data(ctx->cmd),
e958a3c4573058f17999f0083a34080ca35e34d8Timo Sirainen str_len(ctx->cmd));
b221779c191d1fb5fa7eb03907e62d39d1edeb08Timo Sirainen return solr_connection_post_end(ctx->post);
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen}
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainenstatic void
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainenfts_backend_solr_expunge_flush(struct solr_fts_backend_update_context *ctx)
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen{
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen struct solr_fts_backend *backend =
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen (struct solr_fts_backend *)ctx->ctx.backend;
b221779c191d1fb5fa7eb03907e62d39d1edeb08Timo Sirainen
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen str_append(ctx->cmd_expunge, "</delete>");
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen (void)solr_connection_post(backend->solr_conn, str_c(ctx->cmd_expunge));
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_truncate(ctx->cmd_expunge, 0);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_append(ctx->cmd_expunge, "<delete>");
2cfe9983ce7a6280636ee12beccc2e865111967bTimo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic int
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenfts_backend_solr_update_deinit(struct fts_backend_update_context *_ctx)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend_update_context *ctx =
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend *backend =
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen (struct solr_fts_backend *)_ctx->backend;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_field *field;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen const char *str;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen int ret = _ctx->failed ? -1 : 0;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (fts_backed_solr_build_commit(ctx) < 0)
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen ret = -1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (ctx->documents_added || ctx->expunges) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* commit and wait until the documents we just indexed are
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen visible to the following search */
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen if (ctx->expunges)
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen fts_backend_solr_expunge_flush(ctx);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str = t_strdup_printf("<commit softCommit=\"true\" waitSearcher=\"%s\"/>",
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->documents_added ? "true" : "false");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (solr_connection_post(backend->solr_conn, str) < 0)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ret = -1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
2cfe9983ce7a6280636ee12beccc2e865111967bTimo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (ctx->cmd != NULL)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_free(&ctx->cmd);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (ctx->cmd_expunge != NULL)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_free(&ctx->cmd_expunge);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen array_foreach_modifiable(&ctx->fields, field) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_free(&field->value);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen i_free(field->key);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen array_free(&ctx->fields);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen i_free(ctx);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return ret;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic void
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenfts_backend_solr_update_set_mailbox(struct fts_backend_update_context *_ctx,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct mailbox *box)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend_update_context *ctx =
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen const char *box_guid;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (ctx->prev_uid != 0) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen fts_index_set_last_uid(ctx->cur_box, ctx->prev_uid);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->prev_uid = 0;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (box != NULL) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (fts_mailbox_get_guid(box, &box_guid) < 0)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen _ctx->failed = TRUE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen i_assert(strlen(box_guid) == sizeof(ctx->box_guid)-1);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen memcpy(ctx->box_guid, box_guid, sizeof(ctx->box_guid)-1);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen } else {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen memset(ctx->box_guid, 0, sizeof(ctx->box_guid));
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->cur_box = box;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic void
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenfts_backend_solr_update_expunge(struct fts_backend_update_context *_ctx,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen uint32_t uid)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend_update_context *ctx =
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct fts_index_header hdr;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen if (!ctx->last_indexed_uid_set) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (!fts_index_get_header(ctx->cur_box, &hdr))
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->last_indexed_uid = 0;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen else
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->last_indexed_uid = hdr.last_indexed_uid;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->last_indexed_uid_set = TRUE;
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (ctx->last_indexed_uid == 0 ||
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen uid > ctx->last_indexed_uid + 100) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* don't waste time asking Solr to expunge a message that is
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen highly unlikely to be indexed at this time. */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
dd171dccbe98fc63ca737e6e4e8edbeb601e5cbdTimo Sirainen if (!ctx->expunges) {
dd171dccbe98fc63ca737e6e4e8edbeb601e5cbdTimo Sirainen ctx->expunges = TRUE;
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen ctx->cmd_expunge = str_new(default_pool, 1024);
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen str_append(ctx->cmd_expunge, "<delete>");
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen }
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen if (str_len(ctx->cmd_expunge) >= SOLR_CMDBUF_FLUSH_SIZE)
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen fts_backend_solr_expunge_flush(ctx);
dd171dccbe98fc63ca737e6e4e8edbeb601e5cbdTimo Sirainen
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen str_append(ctx->cmd_expunge, "<id>");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen xml_encode_id(ctx, ctx->cmd_expunge, uid);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(ctx->cmd_expunge, "</id>");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic void
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenfts_backend_solr_uid_changed(struct solr_fts_backend_update_context *ctx,
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen uint32_t uid)
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen{
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen struct solr_fts_backend *backend =
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen (struct solr_fts_backend *)ctx->ctx.backend;
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen if (ctx->post == NULL) {
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen i_assert(ctx->prev_uid == 0);
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen ctx->post = solr_connection_post_begin(backend->solr_conn);
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen str_append(ctx->cmd, "<add>");
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen } else {
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen fts_backend_solr_doc_close(ctx);
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen }
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen ctx->prev_uid = uid;
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen ctx->truncate_header = FALSE;
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen fts_backend_solr_doc_open(ctx, uid);
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen}
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainenstatic bool
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainenfts_backend_solr_update_set_build_key(struct fts_backend_update_context *_ctx,
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen const struct fts_backend_build_key *key)
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen{
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen struct solr_fts_backend_update_context *ctx =
2cfe9983ce7a6280636ee12beccc2e865111967bTimo Sirainen (struct solr_fts_backend_update_context *)_ctx;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (key->uid != ctx->prev_uid)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen fts_backend_solr_uid_changed(ctx, key->uid);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen switch (key->type) {
61dca057fe86fd5ae57f5106f8f049b7287d78cdTimo Sirainen case FTS_BACKEND_BUILD_KEY_HDR:
a0b89f3b1df99b3a32f44623f13ad1893118825bTimo Sirainen if (fts_header_want_indexed(key->hdr_name)) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->cur_value2 =
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen fts_solr_field_get(ctx, key->hdr_name);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* fall through */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen case FTS_BACKEND_BUILD_KEY_MIME_HDR:
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->cur_value = fts_solr_field_get(ctx, "hdr");
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen xml_encode(ctx->cur_value, key->hdr_name);
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen str_append(ctx->cur_value, ": ");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen break;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen case FTS_BACKEND_BUILD_KEY_BODY_PART:
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (!ctx->body_open) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->body_open = TRUE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(ctx->cmd, "<field name=\"body\">");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->cur_value = ctx->cmd;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen break;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY:
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen i_unreached();
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return TRUE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
dd171dccbe98fc63ca737e6e4e8edbeb601e5cbdTimo Sirainenstatic void
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenfts_backend_solr_update_unset_build_key(struct fts_backend_update_context *_ctx)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend_update_context *ctx =
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* There can be multiple duplicate keys (duplicate header lines,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen multiple MIME body parts). Make sure they are separated by
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen whitespace. */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append_c(ctx->cur_value, '\n');
a1aaf11831cab8346d6d0dc702e37b3f1d95eb43Timo Sirainen ctx->cur_value = NULL;
a1aaf11831cab8346d6d0dc702e37b3f1d95eb43Timo Sirainen if (ctx->cur_value2 != NULL) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append_c(ctx->cur_value2, '\n');
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->cur_value2 = NULL;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic int
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenfts_backend_solr_update_build_more(struct fts_backend_update_context *_ctx,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen const unsigned char *data, size_t size)
a0b89f3b1df99b3a32f44623f13ad1893118825bTimo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend_update_context *ctx =
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen unsigned int len;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (_ctx->failed)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return -1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (ctx->cur_value2 == NULL && ctx->cur_value == ctx->cmd) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* we're writing to message body. if size is huge,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen flush it once in a while */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen while (size >= SOLR_CMDBUF_FLUSH_SIZE) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (str_len(ctx->cmd) >= SOLR_CMDBUF_FLUSH_SIZE) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen solr_connection_post_more(ctx->post,
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen str_data(ctx->cmd),
43d32cbe60fdaef2699d99f1ca259053e9350411Timo Sirainen str_len(ctx->cmd));
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_truncate(ctx->cmd, 0);
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen len = xml_encode_data_max(ctx->cmd, data, size,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen SOLR_CMDBUF_FLUSH_SIZE -
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_len(ctx->cmd));
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen i_assert(len > 0);
13a8c553f293349248b161ff851743498916e26eTimo Sirainen i_assert(len <= size);
13a8c553f293349248b161ff851743498916e26eTimo Sirainen data += len;
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen size -= len;
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen }
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen xml_encode_data(ctx->cmd, data, size);
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen if (ctx->tokenized_input)
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen str_append_c(ctx->cmd, ' ');
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen } else {
9fcf7b79236b0045f7709718f7b65ada516565e7Timo Sirainen if (!ctx->truncate_header) {
9fcf7b79236b0045f7709718f7b65ada516565e7Timo Sirainen xml_encode_data(ctx->cur_value, data, size);
9fcf7b79236b0045f7709718f7b65ada516565e7Timo Sirainen if (ctx->tokenized_input)
9fcf7b79236b0045f7709718f7b65ada516565e7Timo Sirainen str_append_c(ctx->cur_value, ' ');
c9343c25215e98880db8f9e9c5f120f6311bc06dTimo Sirainen }
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen if (ctx->cur_value2 != NULL &&
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen (!ctx->truncate_header ||
c9343c25215e98880db8f9e9c5f120f6311bc06dTimo Sirainen str_len(ctx->cur_value2) < SOLR_HEADER_LINE_MAX_TRUNC_SIZE)) {
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen xml_encode_data(ctx->cur_value2, data, size);
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen if (ctx->tokenized_input)
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen str_append_c(ctx->cur_value2, ' ');
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen }
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen }
24ce0c343cefe54af841871fa39dbc3464028b06Timo Sirainen
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen if (str_len(ctx->cmd) >= SOLR_CMDBUF_FLUSH_SIZE) {
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen solr_connection_post_more(ctx->post, str_data(ctx->cmd),
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_len(ctx->cmd));
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_truncate(ctx->cmd, 0);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (!ctx->truncate_header &&
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_len(ctx->cur_value) >= SOLR_HEADER_MAX_SIZE) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* a large header */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen i_assert(ctx->cur_value != ctx->cmd);
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen i_warning("fts-solr(%s): Mailbox %s UID=%u header size is huge, truncating",
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen ctx->cur_box->storage->user->username,
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen mailbox_get_vname(ctx->cur_box), ctx->prev_uid);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ctx->truncate_header = TRUE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return 0;
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen}
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainenstatic int fts_backend_solr_refresh(struct fts_backend *backend ATTR_UNUSED)
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen{
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen return 0;
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen}
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainenstatic int fts_backend_solr_rescan(struct fts_backend *backend)
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* FIXME: proper rescan needed. for now we'll just reset the
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen last-uids */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return fts_backend_reset_last_uids(backend);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic int fts_backend_solr_optimize(struct fts_backend *backend ATTR_UNUSED)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return 0;
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen}
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainenstatic bool solr_need_escaping(const char *str)
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen const char *solr_escape_chars = "+-&|!(){}[]^\"~*?:\\ ";
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen for (; *str != '\0'; str++) {
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen if (strchr(solr_escape_chars, *str) != NULL)
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen return TRUE;
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen }
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen return FALSE;
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen}
68f0dfb4b2815ecbc1bd8d8a68adcfd577ec55aeTimo Sirainen
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainenstatic void solr_add_str_arg(string_t *str, struct mail_search_arg *arg)
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen{
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen /* currently we'll just disable fuzzy searching if there are any
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen parameters that need escaping. solr doesn't seem to give good
1285518f4f8905f22f5812d022a9f75b51752ed4Timo Sirainen fuzzy results even if we did escape them.. */
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen if (!arg->fuzzy || solr_need_escaping(arg->value.str))
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen solr_quote_http(str, arg->value.str);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen else {
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_append(str, arg->value.str);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_append_c(str, '~');
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen }
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen}
06f537a8e0b399222cc2a7755015ef3963525fd2Timo Sirainen
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainenstatic bool
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainensolr_add_definite_query(string_t *str, struct mail_search_arg *arg)
5fe06fea9fee0f5e4e9cb49f6866877223f78b85Timo Sirainen{
5fe06fea9fee0f5e4e9cb49f6866877223f78b85Timo Sirainen switch (arg->type) {
5fe06fea9fee0f5e4e9cb49f6866877223f78b85Timo Sirainen case SEARCH_TEXT: {
5fe06fea9fee0f5e4e9cb49f6866877223f78b85Timo Sirainen if (arg->match_not)
5fe06fea9fee0f5e4e9cb49f6866877223f78b85Timo Sirainen str_append_c(str, '-');
5fe06fea9fee0f5e4e9cb49f6866877223f78b85Timo Sirainen str_append(str, "(hdr:");
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen solr_add_str_arg(str, arg);
510a871e2187891d538bf2ebb3cfd2056003af88Timo Sirainen str_append(str, "+OR+body:");
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen solr_add_str_arg(str, arg);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_append(str, ")");
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen break;
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen }
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen case SEARCH_BODY:
510a871e2187891d538bf2ebb3cfd2056003af88Timo Sirainen if (arg->match_not)
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_append_c(str, '-');
510a871e2187891d538bf2ebb3cfd2056003af88Timo Sirainen str_append(str, "body:");
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen solr_add_str_arg(str, arg);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen break;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen case SEARCH_HEADER:
1285518f4f8905f22f5812d022a9f75b51752ed4Timo Sirainen case SEARCH_HEADER_ADDRESS:
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen case SEARCH_HEADER_COMPRESS_LWSP:
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen if (!fts_header_want_indexed(arg->hdr_field_name))
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen return FALSE;
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen
06f537a8e0b399222cc2a7755015ef3963525fd2Timo Sirainen if (arg->match_not)
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen str_append_c(str, '-');
840a3701b7a0f7fadd17738998c33790a8dfad2dTimo Sirainen str_append(str, t_str_lcase(arg->hdr_field_name));
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen str_append_c(str, ':');
840a3701b7a0f7fadd17738998c33790a8dfad2dTimo Sirainen solr_add_str_arg(str, arg);
840a3701b7a0f7fadd17738998c33790a8dfad2dTimo Sirainen break;
840a3701b7a0f7fadd17738998c33790a8dfad2dTimo Sirainen default:
840a3701b7a0f7fadd17738998c33790a8dfad2dTimo Sirainen return FALSE;
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen }
840a3701b7a0f7fadd17738998c33790a8dfad2dTimo Sirainen return TRUE;
840a3701b7a0f7fadd17738998c33790a8dfad2dTimo Sirainen}
5fe06fea9fee0f5e4e9cb49f6866877223f78b85Timo Sirainen
5fe06fea9fee0f5e4e9cb49f6866877223f78b85Timo Sirainenstatic bool
5fe06fea9fee0f5e4e9cb49f6866877223f78b85Timo Sirainensolr_add_definite_query_args(string_t *str, struct mail_search_arg *arg,
1412a091183dc0e5d6ea4f403a5cd4f4cd5c7301Timo Sirainen bool and_args)
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen{
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen unsigned int last_len;
06f537a8e0b399222cc2a7755015ef3963525fd2Timo Sirainen
06f537a8e0b399222cc2a7755015ef3963525fd2Timo Sirainen last_len = str_len(str);
06f537a8e0b399222cc2a7755015ef3963525fd2Timo Sirainen for (; arg != NULL; arg = arg->next) {
06f537a8e0b399222cc2a7755015ef3963525fd2Timo Sirainen if (solr_add_definite_query(str, arg)) {
06f537a8e0b399222cc2a7755015ef3963525fd2Timo Sirainen arg->match_always = TRUE;
06f537a8e0b399222cc2a7755015ef3963525fd2Timo Sirainen last_len = str_len(str);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen if (and_args)
06f537a8e0b399222cc2a7755015ef3963525fd2Timo Sirainen str_append(str, "+AND+");
06f537a8e0b399222cc2a7755015ef3963525fd2Timo Sirainen else
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_append(str, "+OR+");
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (str_len(str) == last_len)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return FALSE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_truncate(str, last_len);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return TRUE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic bool
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainensolr_add_maybe_query(string_t *str, struct mail_search_arg *arg)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen switch (arg->type) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen case SEARCH_HEADER:
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen case SEARCH_HEADER_ADDRESS:
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen case SEARCH_HEADER_COMPRESS_LWSP:
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (fts_header_want_indexed(arg->hdr_field_name))
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return FALSE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (arg->match_not) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* all matches would be definite, but all non-matches
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen would be maybies. too much trouble to optimize. */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return FALSE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* we can check if the search key exists in some header and
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen filter out the messages that have no chance of matching */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(str, "hdr:");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (*arg->value.str != '\0')
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen solr_quote_http(str, arg->value.str);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen else {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* checking potential existence of the header name */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen solr_quote_http(str, t_str_lcase(arg->hdr_field_name));
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen break;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen default:
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return FALSE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return TRUE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic bool
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainensolr_add_maybe_query_args(string_t *str, struct mail_search_arg *arg,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen bool and_args)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen unsigned int last_len;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen last_len = str_len(str);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen for (; arg != NULL; arg = arg->next) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (solr_add_maybe_query(str, arg)) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen arg->match_always = TRUE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen last_len = str_len(str);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (and_args)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(str, "+AND+");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen else
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(str, "+OR+");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (str_len(str) == last_len)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return FALSE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_truncate(str, last_len);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return TRUE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic int solr_search(struct fts_backend *_backend, string_t *str,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen const char *box_guid, ARRAY_TYPE(seq_range) *uids_r,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ARRAY_TYPE(fts_score_map) *scores_r)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen pool_t pool = pool_alloconly_create("fts solr search", 1024);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_result **results;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen int ret;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* use a separate filter query for selecting the mailbox. it shouldn't
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen affect the score and there could be some caching benefits too. */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_printfa(str, "&fq=%%2Bbox:%s+%%2Buser:", box_guid);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (_backend->ns->owner != NULL)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen solr_quote_http(str, _backend->ns->owner->username);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen else
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(str, "%22%22");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ret = solr_connection_select(backend->solr_conn, str_c(str),
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen pool, &results);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (ret == 0 && results[0] != NULL) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen array_append_array(uids_r, &results[0]->uids);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen array_append_array(scores_r, &results[0]->scores);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen pool_unref(&pool);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return ret;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainenstatic int
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainenfts_backend_solr_lookup(struct fts_backend *_backend, struct mailbox *box,
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen struct mail_search_arg *args,
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen enum fts_lookup_flags flags,
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen struct fts_result *result)
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen{
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen struct mailbox_status status;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen string_t *str;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen const char *box_guid;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen unsigned int prefix_len;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen if (fts_mailbox_get_guid(box, &box_guid) < 0)
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen return -1;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen mailbox_get_open_status(box, STATUS_UIDNEXT, &status);
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen str = t_str_new(256);
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen str_printfa(str, "fl=uid,score&rows=%u&sort=uid+asc&q=",
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen status.uidnext);
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen prefix_len = str_len(str);
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen if (solr_add_definite_query_args(str, args, and_args)) {
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen ARRAY_TYPE(seq_range) *uids_arr =
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen (flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ?
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen &result->definite_uids : &result->maybe_uids;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen if (solr_search(_backend, str, box_guid,
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen uids_arr, &result->scores) < 0)
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen return -1;
1ac19c5c2b66a12f5598792aad15114ee3eb62e2Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_truncate(str, prefix_len);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (solr_add_maybe_query_args(str, args, and_args)) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (solr_search(_backend, str, box_guid,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen &result->maybe_uids, &result->scores) < 0)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return -1;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen }
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen result->scores_sorted = TRUE;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen return 0;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen}
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainenstatic int
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainensolr_search_multi(struct fts_backend *_backend, string_t *str,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct mailbox *const boxes[], enum fts_lookup_flags flags,
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct fts_multi_result *result)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen{
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct solr_result **solr_results;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct fts_result *fts_result;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen ARRAY(struct fts_result) fts_results;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen HASH_TABLE(char *, struct mailbox *) mailboxes;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen struct mailbox *box;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen const char *box_guid;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen unsigned int i, len;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen /* use a separate filter query for selecting the mailbox. it shouldn't
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen affect the score and there could be some caching benefits too. */
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(str, "&fq=%2Buser:");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (_backend->ns->owner != NULL)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen solr_quote_http(str, _backend->ns->owner->username);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen else
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(str, "%22%22");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen hash_table_create(&mailboxes, default_pool, 0, str_hash, strcmp);
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen str_append(str, "%2B(");
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen len = str_len(str);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen for (i = 0; boxes[i] != NULL; i++) {
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen if (fts_mailbox_get_guid(boxes[i], &box_guid) < 0)
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen continue;
6c2c5f20760b06bfb4a40b0ee2ef5ab016bc41f0Timo Sirainen
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen if (str_len(str) != len)
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_append(str, "+OR+");
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen str_printfa(str, "box:%s", box_guid);
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen hash_table_insert(mailboxes, t_strdup_noconst(box_guid),
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen boxes[i]);
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen }
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen str_append_c(str, ')');
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen if (solr_connection_select(backend->solr_conn, str_c(str),
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen result->pool, &solr_results) < 0) {
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen hash_table_destroy(&mailboxes);
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen return -1;
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen }
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen
9672bb2a11c37c275d695451accd824da5c9e485Timo Sirainen p_array_init(&fts_results, result->pool, 32);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen for (i = 0; solr_results[i] != NULL; i++) {
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen box = hash_table_lookup(mailboxes, solr_results[i]->box_id);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen if (box == NULL) {
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen i_warning("fts_solr: Lookup returned unexpected mailbox "
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen "with guid=%s", solr_results[i]->box_id);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen continue;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen }
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_result = array_append_space(&fts_results);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_result->box = box;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen if ((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0)
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainen fts_result->definite_uids = solr_results[i]->uids;
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen else
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen fts_result->maybe_uids = solr_results[i]->uids;
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen fts_result->scores = solr_results[i]->scores;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_result->scores_sorted = TRUE;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen }
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen array_append_zero(&fts_results);
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainen result->box_results = array_idx_modifiable(&fts_results, 0);
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainen hash_table_destroy(&mailboxes);
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainen return 0;
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainen}
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainen
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainenstatic int
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainenfts_backend_solr_lookup_multi(struct fts_backend *backend,
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen struct mailbox *const boxes[],
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainen struct mail_search_arg *args,
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainen enum fts_lookup_flags flags,
9c2b0eb659540b9db8dd3a8a6a2515921fbe8eebTimo Sirainen struct fts_multi_result *result)
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen{
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen string_t *str;
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str = t_str_new(256);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen str_printfa(str, "fl=box,uid,score&rows=%u&sort=box+asc,uid+asc&q=",
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen SOLR_MAX_MULTI_ROWS);
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen if (solr_add_definite_query_args(str, args, and_args)) {
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen if (solr_search_multi(backend, str, boxes, flags, result) < 0)
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen return -1;
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen }
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen /* FIXME: maybe_uids could be handled also with some more work.. */
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen return 0;
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen}
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainenstruct fts_backend fts_backend_solr = {
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen .name = "solr",
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen .flags = FTS_BACKEND_FLAG_FUZZY_SEARCH,
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen {
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen fts_backend_solr_alloc,
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen fts_backend_solr_init,
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen fts_backend_solr_deinit,
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen fts_backend_solr_get_last_uid,
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen fts_backend_solr_update_init,
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen fts_backend_solr_update_deinit,
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_backend_solr_update_set_mailbox,
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_backend_solr_update_expunge,
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_backend_solr_update_set_build_key,
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_backend_solr_update_unset_build_key,
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_backend_solr_update_build_more,
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_backend_solr_refresh,
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_backend_solr_rescan,
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_backend_solr_optimize,
dc9de21d4375faeedbe5b7e941502ac578650da9Timo Sirainen fts_backend_default_can_lookup,
f4c0b1874b0533bcf2df1d28d584ff02cfdae3faTimo Sirainen fts_backend_solr_lookup,
f4c0b1874b0533bcf2df1d28d584ff02cfdae3faTimo Sirainen fts_backend_solr_lookup_multi,
f4c0b1874b0533bcf2df1d28d584ff02cfdae3faTimo Sirainen NULL
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen }
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen};
892b3cbf0eba9ba455448adcf71864a409345c6dTimo Sirainen