fts-backend-solr-old.c revision bae41e95e936777e71ea5c74eed03e6630dd8e67
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic const char *solr_escape_chars = "+-&|!(){}[]^\"~*?:\\/ ";
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* Valid characters in XML:
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen [#x10000-#x10FFFF]
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen This function gets called only for #x80 and higher */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenxml_encode_data(string_t *dest, const unsigned char *data, size_t len)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen for (i = 0; i < len; i++) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen switch (data[i]) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* exceptions to the following control char check */
2dd39e478269d6fb0bb26d12b394aa30ee965e38Timo Sirainen /* SOLR doesn't like control characters.
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen replace them with spaces. */
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen /* make sure the character is valid for XML
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen so we don't get XML parser errors */
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen unsigned int char_len =
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void xml_encode(string_t *dest, const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen xml_encode_data(dest, (const unsigned char *)str, strlen(str));
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic const char *solr_escape_id_str(const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *p;
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen if (*p == '\0')
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen switch (*p) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic const char *solr_escape(const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen unsigned int i;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen return "\"\"";
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (strchr(solr_escape_chars, str[i]) != NULL)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void solr_quote(string_t *dest, const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void solr_quote_http(string_t *dest, const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen http_url_escape_param(dest, solr_escape(str));
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void fts_solr_set_default_ns(struct solr_fts_backend *backend)
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen struct mail_namespace *ns = backend->backend.ns;
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT_REQUIRE(ns->user);
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen const struct fts_solr_settings *set = &fuser->set;
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen const char *str;
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen mail_namespace_find_prefix(ns->user->namespaces,
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen i_error("fts_solr: default_ns setting points to "
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen "nonexistent namespace");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen mail_namespace_find_inbox(ns->user->namespaces);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen while (backend->default_ns->alias_for != NULL)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen backend->default_ns = backend->default_ns->alias_for;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void fts_box_name_get_root(struct mail_namespace **ns, const char **name)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen ((*ns)->flags & NAMESPACE_FLAG_INBOX_USER) != 0) {
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen /* ugly workaround to allow selecting INBOX from a Maildir/
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen when it's not in the inbox=yes namespace. */
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenstatic const char *
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenfts_box_get_root(struct mailbox *box, struct mail_namespace **ns_r)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen struct mail_namespace *ns = mailbox_get_namespace(box);
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen if (t_imap_utf8_to_utf7(box->name, &name) < 0)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenstatic struct fts_backend *fts_backend_solr_alloc(void)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenfts_backend_solr_init(struct fts_backend *_backend, const char **error_r)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *str;
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen mail_user_init_ssl_client_settings(_backend->ns->user, &ssl_set);
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen if (solr_connection_init(fuser->set.url, &ssl_set,
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen str = solr_escape_id_str(_backend->ns->user->username);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void fts_backend_solr_deinit(struct fts_backend *_backend)
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainensolr_add_ns_query(string_t *str, struct solr_fts_backend *backend,
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen if (ns == backend->default_ns || *ns->prefix == '\0') {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainensolr_add_ns_query_http(string_t *str, struct solr_fts_backend *backend,
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backend_solr_get_last_uid_fallback(struct solr_fts_backend *backend,
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen unsigned int count;
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen str_append(str, "fl=uid&rows=1&sort=uid+desc&q=");
25d9db0c00e8041165540e0829d7eab7548cbaa5Timo Sirainen mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status);
25d9db0c00e8041165540e0829d7eab7548cbaa5Timo Sirainen str_printfa(str, "uidv:%u+AND+box:", status.uidvalidity);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen pool = pool_alloconly_create("solr last uid lookup", 1024);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (solr_connection_select(backend->solr_conn, str_c(str),
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen /* no UIDs */
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen uidvals = array_get(&results[0]->uids, &count);
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen i_error("fts_solr: Last UID lookup returned multiple rows");
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainenfts_backend_solr_get_last_uid(struct fts_backend *_backend,
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen /* either nothing has been indexed, or the index was corrupted.
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen do it the slow way. */
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (fts_backend_solr_get_last_uid_fallback(backend, box, last_uid_r) < 0)
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainenfts_backend_solr_update_init(struct fts_backend *_backend)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ctx = i_new(struct solr_fts_backend_update_context, 1);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void xml_encode_id(struct solr_fts_backend_update_context *ctx,
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backend_solr_add_doc_prefix(struct solr_fts_backend_update_context *ctx,
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen "<field name=\"uid\">%u</field>"
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen "<field name=\"uidv\">%u</field>",
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen str_append(ctx->cmd, "</field><field name=\"user\">");
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainenfts_backed_solr_build_commit(struct solr_fts_backend_update_context *ctx)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen solr_connection_post_more(ctx->post, str_data(ctx->cmd),
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backend_solr_update_deinit(struct fts_backend_update_context *_ctx)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *str;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* commit and wait until the documents we just indexed are
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen visible to the following search */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str = t_strdup_printf("<commit waitFlush=\"false\" "
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen "waitSearcher=\"%s\"/>",
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (solr_connection_post(backend->solr_conn, str) < 0)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backend_solr_update_set_mailbox(struct fts_backend_update_context *_ctx,
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen fts_index_set_last_uid(ctx->cur_box, ctx->prev_uid);
T_BEGIN {
} T_END;
i_unreached();
return TRUE;
return FALSE;
case SEARCH_TEXT: {
case SEARCH_BODY:
return FALSE;
return TRUE;
bool and_args)
if (and_args)
return FALSE;
return TRUE;
const char *box_name;
int ret;
&status);
return ret;
const char *box_name;
char *box_id;
str_printfa(str, "fl=ns,box,uidv,uid,score&rows=%u&sort=box+asc,uid+asc&q=%%7b!lucene+q.op%%3dAND%%7d",
.flags = 0,
NULL,