fts-backend-solr-old.c revision 6aadd1c52e6b291d47b47b4f4063e9bc8ccf0784
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen/* Copyright (c) 2006-2015 Dovecot authors, see the included COPYING file */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include "lib.h"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include "array.h"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include "str.h"
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen#include "hash.h"
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen#include "strescape.h"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include "unichar.h"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include "http-url.h"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include "imap-utf7.h"
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen#include "mail-storage-private.h"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include "mailbox-list-private.h"
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen#include "mail-search.h"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include "fts-api.h"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include "solr-connection.h"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include "fts-solr-plugin.h"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#include <ctype.h>
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#define SOLR_CMDBUF_SIZE (1024*64)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen#define SOLR_MAX_MULTI_ROWS 100000
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstruct solr_fts_backend {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct fts_backend backend;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct solr_connection *solr_conn;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen char *id_username, *id_namespace;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mail_namespace *default_ns;
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen};
220e21750948941dc6e33b8f11b552fa21d7f81eTimo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstruct solr_fts_backend_update_context {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct fts_backend_update_context ctx;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mailbox *cur_box;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen char *id_box_name;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct solr_connection_post *post;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen uint32_t prev_uid, uid_validity;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen string_t *cmd, *hdr;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen bool headers_open;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen bool body_open;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen bool documents_added;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen};
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic bool is_valid_xml_char(unichar_t chr)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* Valid characters in XML:
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen [#x10000-#x10FFFF]
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen This function gets called only for #x80 and higher */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (chr > 0xd7ff && chr < 0xe000)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen return FALSE;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (chr > 0xfffd && chr < 0x10000)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen return FALSE;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen return chr < 0x10ffff;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenxml_encode_data(string_t *dest, const unsigned char *data, unsigned int len)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen unichar_t chr;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen unsigned int i;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen for (i = 0; i < len; i++) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen switch (data[i]) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen case '&':
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(dest, "&amp;");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen break;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen case '<':
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(dest, "&lt;");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen break;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen case '>':
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(dest, "&gt;");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen break;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen case '\t':
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen case '\n':
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen case '\r':
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen /* exceptions to the following control char check */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append_c(dest, data[i]);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen break;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen default:
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (data[i] < 32) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* SOLR doesn't like control characters.
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen replace them with spaces. */
2dd39e478269d6fb0bb26d12b394aa30ee965e38Timo Sirainen str_append_c(dest, ' ');
2dd39e478269d6fb0bb26d12b394aa30ee965e38Timo Sirainen } else if (data[i] >= 0x80) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* make sure the character is valid for XML
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen so we don't get XML parser errors */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen unsigned int char_len =
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen uni_utf8_get_char_n(data + i, len - i, &chr);
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen if (char_len > 0 && is_valid_xml_char(chr))
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen str_append_n(dest, data + i, char_len);
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen else {
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen str_append_n(dest, utf8_replacement_char,
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen UTF8_REPLACEMENT_CHAR_LEN);
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen }
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen i += char_len - 1;
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen } else {
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen str_append_c(dest, data[i]);
740dfc753f1f087f0c41082e4c551053129e875dTimo Sirainen }
740dfc753f1f087f0c41082e4c551053129e875dTimo Sirainen break;
740dfc753f1f087f0c41082e4c551053129e875dTimo Sirainen }
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen }
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenstatic void xml_encode(string_t *dest, const char *str)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen xml_encode_data(dest, (const unsigned char *)str, strlen(str));
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic const char *solr_escape_id_str(const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen string_t *tmp;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *p;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen for (p = str; *p != '\0'; p++) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (*p == '/' || *p == '!')
8fbb781d749be44e70434f4f70eff09956d58163Timo Sirainen break;
8fbb781d749be44e70434f4f70eff09956d58163Timo Sirainen }
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen if (*p == '\0')
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen return str;
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen tmp = t_str_new(64);
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen for (p = str; *p != '\0'; p++) {
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen switch (*p) {
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen case '/':
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen str_append(tmp, "!\\");
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen break;
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen case '!':
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(tmp, "!!");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen break;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen default:
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append_c(tmp, *p);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen break;
5fb3bff645380804c9db2510940c41db6b8fdb01Timo Sirainen }
d9fdacd5fb3e07997e5c389739d2054f0c8441d8Timo Sirainen }
5fb3bff645380804c9db2510940c41db6b8fdb01Timo Sirainen return str_c(tmp);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainenstatic void solr_quote(string_t *dest, const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append_c(dest, '"');
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(dest, str_escape(str));
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append_c(dest, '"');
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void solr_quote_http(string_t *dest, const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(dest, "%22");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen http_url_escape_param(dest, str);
5fb3bff645380804c9db2510940c41db6b8fdb01Timo Sirainen str_append(dest, "%22");
d9fdacd5fb3e07997e5c389739d2054f0c8441d8Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void fts_solr_set_default_ns(struct solr_fts_backend *backend)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mail_namespace *ns = backend->backend.ns;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(ns->user);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const struct fts_solr_settings *set = &fuser->set;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *str;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (backend->default_ns != NULL)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen return;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (set->default_ns_prefix != NULL) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen backend->default_ns =
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen mail_namespace_find_prefix(ns->user->namespaces,
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen set->default_ns_prefix);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (backend->default_ns == NULL) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen i_error("fts_solr: default_ns setting points to "
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen "nonexistent namespace");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen }
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen }
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen if (backend->default_ns == NULL) {
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen backend->default_ns =
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen mail_namespace_find_inbox(ns->user->namespaces);
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen }
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen while (backend->default_ns->alias_for != NULL)
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen backend->default_ns = backend->default_ns->alias_for;
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen if (ns != backend->default_ns) {
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen str = solr_escape_id_str(ns->prefix);
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen backend->id_namespace = i_strdup(str);
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen }
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen}
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void fts_box_name_get_root(struct mail_namespace **ns, const char **name)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mail_namespace *orig_ns = *ns;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen while ((*ns)->alias_for != NULL)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen *ns = (*ns)->alias_for;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (**name == '\0' && *ns != orig_ns &&
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ((*ns)->flags & NAMESPACE_FLAG_INBOX_USER) != 0) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* ugly workaround to allow selecting INBOX from a Maildir/
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen when it's not in the inbox=yes namespace. */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen *name = "INBOX";
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen }
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic const char *
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_box_get_root(struct mailbox *box, struct mail_namespace **ns_r)
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen{
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen struct mail_namespace *ns = mailbox_get_namespace(box);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *name;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (t_imap_utf8_to_utf7(box->name, &name) < 0)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen i_unreached();
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen fts_box_name_get_root(&ns, &name);
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen *ns_r = ns;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen return name;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen}
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenstatic struct fts_backend *fts_backend_solr_alloc(void)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen{
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen struct solr_fts_backend *backend;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen backend = i_new(struct solr_fts_backend, 1);
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen backend->backend = fts_backend_solr_old;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen return &backend->backend;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen}
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenstatic int
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenfts_backend_solr_init(struct fts_backend *_backend, const char **error_r)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen{
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user);
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen const char *str;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen if (fuser == NULL) {
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen *error_r = "Invalid fts_solr setting";
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen return -1;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen }
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen if (solr_connection_init(fuser->set.url, fuser->set.debug,
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen &backend->solr_conn, error_r) < 0)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen return -1;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen str = solr_escape_id_str(_backend->ns->user->username);
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen backend->id_username = i_strdup(str);
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen return 0;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen}
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenstatic void fts_backend_solr_deinit(struct fts_backend *_backend)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen solr_connection_deinit(&backend->solr_conn);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen i_free(backend->id_namespace);
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen i_free(backend->id_username);
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen i_free(backend);
5fb3bff645380804c9db2510940c41db6b8fdb01Timo Sirainen}
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainenstatic void
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainensolr_add_ns_query(string_t *str, struct solr_fts_backend *backend,
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen struct mail_namespace *ns, bool neg)
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen{
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen while (ns->alias_for != NULL)
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen ns = ns->alias_for;
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen if (ns == backend->default_ns || *ns->prefix == '\0') {
5fb3bff645380804c9db2510940c41db6b8fdb01Timo Sirainen if (!neg)
5fb3bff645380804c9db2510940c41db6b8fdb01Timo Sirainen str_append(str, " -ns:[* TO *]");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen else
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(str, " +ns:[* TO *]");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen } else {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (!neg)
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen str_append(str, " +ns:");
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen else
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen str_append(str, " -ns:");
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen solr_quote(str, ns->prefix);
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen }
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen}
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainensolr_add_ns_query_http(string_t *str, struct solr_fts_backend *backend,
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mail_namespace *ns)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen string_t *tmp;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen tmp = t_str_new(64);
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen solr_add_ns_query(tmp, backend, ns, FALSE);
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen http_url_escape_param(str, str_c(tmp));
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen}
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainenstatic int
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainenfts_backend_solr_get_last_uid_fallback(struct solr_fts_backend *backend,
f97cacf16251b42f530c6a28686cc8c9aa7df3a2Timo Sirainen struct mailbox *box,
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen uint32_t *last_uid_r)
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen{
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen struct mail_namespace *ns;
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen struct mailbox_status status;
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen struct solr_result **results;
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen const struct seq_range *uidvals;
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen const char *box_name;
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen unsigned int count;
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen string_t *str;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen pool_t pool;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen int ret = 0;
61e84692827b6a64912343f515c984853021483aTimo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str = t_str_new(256);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(str, "fl=uid&rows=1&sort=uid+desc&q=");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen box_name = fts_box_get_root(box, &ns);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_printfa(str, "uidv:%u+box:", status.uidvalidity);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen solr_quote_http(str, box_name);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen solr_add_ns_query_http(str, backend, ns);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(str, "+user:");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen solr_quote_http(str, ns->user->username);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen pool = pool_alloconly_create("solr last uid lookup", 1024);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (solr_connection_select(backend->solr_conn, str_c(str),
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen pool, &results) < 0)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ret = -1;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen else if (results[0] == NULL) {
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen /* no UIDs */
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen *last_uid_r = 0;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen } else {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen uidvals = array_get(&results[0]->uids, &count);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen i_assert(count > 0);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen *last_uid_r = uidvals[0].seq1;
25d9db0c00e8041165540e0829d7eab7548cbaa5Timo Sirainen } else {
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen i_error("fts_solr: Last UID lookup returned multiple rows");
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen ret = -1;
25d9db0c00e8041165540e0829d7eab7548cbaa5Timo Sirainen }
25d9db0c00e8041165540e0829d7eab7548cbaa5Timo Sirainen }
7d7917f3b54749f667d8c98e881a965ae84f033eTimo Sirainen pool_unref(&pool);
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen return ret;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen}
7d7917f3b54749f667d8c98e881a965ae84f033eTimo Sirainen
7d7917f3b54749f667d8c98e881a965ae84f033eTimo Sirainenstatic int
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainenfts_backend_solr_get_last_uid(struct fts_backend *_backend,
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen struct mailbox *box, uint32_t *last_uid_r)
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen{
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen struct solr_fts_backend *backend =
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen (struct solr_fts_backend *)_backend;
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen struct fts_index_header hdr;
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen if (fts_index_get_header(box, &hdr)) {
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen *last_uid_r = hdr.last_indexed_uid;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen return 0;
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen }
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen /* either nothing has been indexed, or the index was corrupted.
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen do it the slow way. */
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (fts_backend_solr_get_last_uid_fallback(backend, box, last_uid_r) < 0)
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen return -1;
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen fts_index_set_last_uid(box, *last_uid_r);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen return 0;
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen}
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainenstatic struct fts_backend_update_context *
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainenfts_backend_solr_update_init(struct fts_backend *_backend)
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen{
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen struct solr_fts_backend *backend =
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen (struct solr_fts_backend *)_backend;
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen struct solr_fts_backend_update_context *ctx;
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen ctx = i_new(struct solr_fts_backend_update_context, 1);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen ctx->ctx.backend = _backend;
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen ctx->hdr = str_new(default_pool, 4096);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen fts_solr_set_default_ns(backend);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen return &ctx->ctx;
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen}
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainenstatic void xml_encode_id(struct solr_fts_backend_update_context *ctx,
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen string_t *str, uint32_t uid)
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen{
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen struct solr_fts_backend *backend =
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen (struct solr_fts_backend *)ctx->ctx.backend;
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (uid != 0)
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen str_printfa(str, "%u/", uid);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen else
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen str_append(str, "L/");
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (backend->id_namespace != NULL) {
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen xml_encode(str, backend->id_namespace);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append_c(str, '/');
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen }
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_printfa(str, "%u/", ctx->uid_validity);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen xml_encode(str, backend->id_username);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append_c(str, '/');
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen xml_encode(str, ctx->id_box_name);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backend_solr_add_doc_prefix(struct solr_fts_backend_update_context *ctx,
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen uint32_t uid)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct solr_fts_backend *backend =
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen (struct solr_fts_backend *)ctx->ctx.backend;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mailbox *box = ctx->cur_box;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mail_namespace *ns;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *box_name;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ctx->documents_added = TRUE;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_printfa(ctx->cmd, "<doc>"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen "<field name=\"uid\">%u</field>"
ff4bb2dfb5714eeb0408d3bb862de1646351d097Timo Sirainen "<field name=\"uidv\">%u</field>",
43d32cbe60fdaef2699d99f1ca259053e9350411Timo Sirainen uid, ctx->uid_validity);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen box_name = fts_box_get_root(box, &ns);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (ns != backend->default_ns) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(ctx->cmd, "<field name=\"ns\">");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen xml_encode(ctx->cmd, ns->prefix);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(ctx->cmd, "</field>");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen }
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(ctx->cmd, "<field name=\"box\">");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen xml_encode(ctx->cmd, box_name);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(ctx->cmd, "</field><field name=\"user\">");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen xml_encode(ctx->cmd, ns->user->username);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(ctx->cmd, "</field>");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic int
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backed_solr_build_commit(struct solr_fts_backend_update_context *ctx)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (ctx->post == NULL)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen return 0;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(ctx->cmd, "</doc></add>");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen solr_connection_post_more(ctx->post, str_data(ctx->cmd),
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_len(ctx->cmd));
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen return solr_connection_post_end(&ctx->post);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainenstatic int
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backend_solr_update_deinit(struct fts_backend_update_context *_ctx)
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen{
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct solr_fts_backend_update_context *ctx =
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen struct solr_fts_backend *backend =
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen (struct solr_fts_backend *)_ctx->backend;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *str;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen int ret;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen ret = fts_backed_solr_build_commit(ctx);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen /* commit and wait until the documents we just indexed are
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen visible to the following search */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str = t_strdup_printf("<commit waitFlush=\"false\" "
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen "waitSearcher=\"%s\"/>",
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen ctx->documents_added ? "true" : "false");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (solr_connection_post(backend->solr_conn, str) < 0)
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen ret = -1;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_free(&ctx->cmd);
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen str_free(&ctx->hdr);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen i_free(ctx->id_box_name);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen i_free(ctx);
5fb3bff645380804c9db2510940c41db6b8fdb01Timo Sirainen return ret;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen}
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backend_solr_update_set_mailbox(struct fts_backend_update_context *_ctx,
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mailbox *box)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen{
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen struct solr_fts_backend_update_context *ctx =
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mailbox_status status;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mail_namespace *ns;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (ctx->prev_uid != 0) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen fts_index_set_last_uid(ctx->cur_box, ctx->prev_uid);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ctx->prev_uid = 0;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen }
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ctx->cur_box = box;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ctx->uid_validity = 0;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen i_free_and_null(ctx->id_box_name);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (box != NULL) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ctx->id_box_name = i_strdup(fts_box_get_root(box, &ns));
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ctx->uid_validity = status.uidvalidity;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen }
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen}
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainenstatic void
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainenfts_backend_solr_update_expunge(struct fts_backend_update_context *_ctx,
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen uint32_t uid)
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen{
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen struct solr_fts_backend_update_context *ctx =
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen struct solr_fts_backend *backend =
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen (struct solr_fts_backend *)_ctx->backend;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen T_BEGIN {
string_t *cmd;
cmd = t_str_new(256);
str_append(cmd, "<delete><id>");
xml_encode_id(ctx, cmd, uid);
str_append(cmd, "</id></delete>");
(void)solr_connection_post(backend->solr_conn, str_c(cmd));
} T_END;
}
static void
fts_backend_solr_uid_changed(struct solr_fts_backend_update_context *ctx,
uint32_t uid)
{
struct solr_fts_backend *backend =
(struct solr_fts_backend *)ctx->ctx.backend;
if (ctx->post == NULL) {
i_assert(ctx->prev_uid == 0);
ctx->post = solr_connection_post_begin(backend->solr_conn);
str_append(ctx->cmd, "<add>");
} else {
ctx->headers_open = FALSE;
if (ctx->body_open) {
ctx->body_open = FALSE;
str_append(ctx->cmd, "</field>");
}
str_append(ctx->cmd, "<field name=\"hdr\">");
str_append_str(ctx->cmd, ctx->hdr);
str_append(ctx->cmd, "</field>");
str_truncate(ctx->hdr, 0);
str_append(ctx->cmd, "</doc>");
}
ctx->prev_uid = uid;
fts_backend_solr_add_doc_prefix(ctx, uid);
str_printfa(ctx->cmd, "<field name=\"id\">");
xml_encode_id(ctx, ctx->cmd, uid);
str_append(ctx->cmd, "</field>");
}
static bool
fts_backend_solr_update_set_build_key(struct fts_backend_update_context *_ctx,
const struct fts_backend_build_key *key)
{
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
if (key->uid != ctx->prev_uid)
fts_backend_solr_uid_changed(ctx, key->uid);
switch (key->type) {
case FTS_BACKEND_BUILD_KEY_HDR:
case FTS_BACKEND_BUILD_KEY_MIME_HDR:
xml_encode(ctx->hdr, key->hdr_name);
str_append(ctx->hdr, ": ");
ctx->headers_open = TRUE;
break;
case FTS_BACKEND_BUILD_KEY_BODY_PART:
ctx->headers_open = FALSE;
if (!ctx->body_open) {
ctx->body_open = TRUE;
str_append(ctx->cmd, "<field name=\"body\">");
}
break;
case FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY:
i_unreached();
}
return TRUE;
}
static void
fts_backend_solr_update_unset_build_key(struct fts_backend_update_context *_ctx)
{
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
if (ctx->headers_open)
str_append_c(ctx->hdr, '\n');
else {
i_assert(ctx->body_open);
str_append_c(ctx->cmd, '\n');
}
}
static int
fts_backend_solr_update_build_more(struct fts_backend_update_context *_ctx,
const unsigned char *data, size_t size)
{
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
xml_encode_data(ctx->cmd, data, size);
if (str_len(ctx->cmd) > SOLR_CMDBUF_SIZE-128) {
solr_connection_post_more(ctx->post, str_data(ctx->cmd),
str_len(ctx->cmd));
str_truncate(ctx->cmd, 0);
}
return 0;
}
static int fts_backend_solr_refresh(struct fts_backend *backend ATTR_UNUSED)
{
return 0;
}
static int fts_backend_solr_optimize(struct fts_backend *backend ATTR_UNUSED)
{
return 0;
}
static bool
solr_add_definite_query(string_t *str, struct mail_search_arg *arg)
{
switch (arg->type) {
case SEARCH_TEXT: {
if (arg->match_not)
str_append_c(str, '-');
str_append(str, "(hdr:");
solr_quote_http(str, arg->value.str);
str_append(str, "+OR+body:");
solr_quote_http(str, arg->value.str);
str_append(str, ")");
break;
}
case SEARCH_BODY:
if (arg->match_not)
str_append_c(str, '-');
str_append(str, "body:");
solr_quote_http(str, arg->value.str);
break;
default:
return FALSE;
}
return TRUE;
}
static bool
solr_add_definite_query_args(string_t *str, struct mail_search_arg *arg,
bool and_args)
{
unsigned int last_len;
last_len = str_len(str);
for (; arg != NULL; arg = arg->next) {
if (solr_add_definite_query(str, arg)) {
arg->match_always = TRUE;
last_len = str_len(str);
if (and_args)
str_append(str, "+AND+");
else
str_append(str, "+OR+");
}
}
if (str_len(str) == last_len)
return FALSE;
str_truncate(str, last_len);
return TRUE;
}
static int
fts_backend_solr_lookup(struct fts_backend *_backend, struct mailbox *box,
struct mail_search_arg *args,
enum fts_lookup_flags flags,
struct fts_result *result)
{
struct solr_fts_backend *backend =
(struct solr_fts_backend *)_backend;
bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
struct mail_namespace *ns;
struct mailbox_status status;
string_t *str;
const char *box_name;
pool_t pool;
struct solr_result **results;
int ret;
fts_solr_set_default_ns(backend);
mailbox_get_open_status(box, STATUS_UIDVALIDITY | STATUS_UIDNEXT,
&status);
str = t_str_new(256);
str_printfa(str, "fl=uid,score&rows=%u&sort=uid+asc&q=",
status.uidnext);
if (!solr_add_definite_query_args(str, args, and_args)) {
/* can't search this query */
return 0;
}
/* use a separate filter query for selecting the mailbox. it shouldn't
affect the score and there could be some caching benefits too. */
str_append(str, "&fq=%2Buser:");
solr_quote_http(str, box->storage->user->username);
box_name = fts_box_get_root(box, &ns);
str_printfa(str, "+%%2Buidv:%u+%%2Bbox:", status.uidvalidity);
solr_quote_http(str, box_name);
solr_add_ns_query_http(str, backend, ns);
pool = pool_alloconly_create("fts solr search", 1024);
ret = solr_connection_select(backend->solr_conn, str_c(str),
pool, &results);
if (ret == 0 && results[0] != NULL) {
if ((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0)
array_append_array(&result->definite_uids, &results[0]->uids);
else
array_append_array(&result->maybe_uids, &results[0]->uids);
array_append_array(&result->scores, &results[0]->scores);
}
result->scores_sorted = TRUE;
pool_unref(&pool);
return ret;
}
static char *
mailbox_get_id(struct solr_fts_backend *backend, struct mail_namespace *ns,
const char *mailbox, uint32_t uidvalidity)
{
string_t *str = t_str_new(64);
str_printfa(str, "%u\001", uidvalidity);
str_append(str, mailbox);
if (ns != backend->default_ns)
str_printfa(str, "\001%s", ns->prefix);
return str_c_modifiable(str);
}
static int
solr_search_multi(struct solr_fts_backend *backend, string_t *str,
struct mailbox *const boxes[],
enum fts_lookup_flags flags,
struct fts_multi_result *result)
{
struct solr_result **solr_results;
struct fts_result *fts_result;
ARRAY(struct fts_result) fts_results;
struct mail_namespace *ns;
struct mailbox_status status;
HASH_TABLE(char *, struct mailbox *) mailboxes;
struct mailbox *box;
const char *box_name;
char *box_id;
unsigned int i, len;
/* use a separate filter query for selecting the mailbox. it shouldn't
affect the score and there could be some caching benefits too. */
str_append(str, "&fq=%2Buser:");
if (backend->backend.ns->owner != NULL)
solr_quote_http(str, backend->backend.ns->owner->username);
else
str_append(str, "%22%22");
hash_table_create(&mailboxes, default_pool, 0, str_hash, strcmp);
str_append(str, "%2B(");
len = str_len(str);
for (i = 0; boxes[i] != NULL; i++) {
if (str_len(str) != len)
str_append(str, "+OR+");
box_name = fts_box_get_root(boxes[i], &ns);
mailbox_get_open_status(boxes[i], STATUS_UIDVALIDITY, &status);
str_printfa(str, "%%2B(%%2Buidv:%u+%%2Bbox:", status.uidvalidity);
solr_quote_http(str, box_name);
solr_add_ns_query_http(str, backend, ns);
str_append_c(str, ')');
box_id = mailbox_get_id(backend, ns, box_name, status.uidvalidity);
hash_table_insert(mailboxes, box_id, boxes[i]);
}
str_append_c(str, ')');
if (solr_connection_select(backend->solr_conn, str_c(str),
result->pool, &solr_results) < 0) {
hash_table_destroy(&mailboxes);
return -1;
}
p_array_init(&fts_results, result->pool, 32);
for (i = 0; solr_results[i] != NULL; i++) {
box = hash_table_lookup(mailboxes, solr_results[i]->box_id);
if (box == NULL) {
i_warning("fts_solr: Lookup returned unexpected mailbox "
"with id=%s", solr_results[i]->box_id);
continue;
}
fts_result = array_append_space(&fts_results);
fts_result->box = box;
if ((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0)
fts_result->definite_uids = solr_results[i]->uids;
else
fts_result->maybe_uids = solr_results[i]->uids;
fts_result->scores = solr_results[i]->scores;
fts_result->scores_sorted = TRUE;
}
array_append_zero(&fts_results);
result->box_results = array_idx_modifiable(&fts_results, 0);
hash_table_destroy(&mailboxes);
return 0;
}
static int
fts_backend_solr_lookup_multi(struct fts_backend *_backend,
struct mailbox *const boxes[],
struct mail_search_arg *args,
enum fts_lookup_flags flags,
struct fts_multi_result *result)
{
bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
struct solr_fts_backend *backend =
(struct solr_fts_backend *)_backend;
string_t *str;
fts_solr_set_default_ns(backend);
str = t_str_new(256);
str_printfa(str, "fl=ns,box,uidv,uid,score&rows=%u&sort=box+asc,uid+asc&q=",
SOLR_MAX_MULTI_ROWS);
if (solr_add_definite_query_args(str, args, and_args)) {
if (solr_search_multi(backend, str, boxes, flags, result) < 0)
return -1;
}
/* FIXME: maybe_uids could be handled also with some more work.. */
return 0;
}
struct fts_backend fts_backend_solr_old = {
.name = "solr_old",
.flags = 0,
{
fts_backend_solr_alloc,
fts_backend_solr_init,
fts_backend_solr_deinit,
fts_backend_solr_get_last_uid,
fts_backend_solr_update_init,
fts_backend_solr_update_deinit,
fts_backend_solr_update_set_mailbox,
fts_backend_solr_update_expunge,
fts_backend_solr_update_set_build_key,
fts_backend_solr_update_unset_build_key,
fts_backend_solr_update_build_more,
fts_backend_solr_refresh,
NULL,
fts_backend_solr_optimize,
fts_backend_default_can_lookup,
fts_backend_solr_lookup,
fts_backend_solr_lookup_multi,
NULL
}
};