fts-backend-solr-old.c revision 6aadd1c52e6b291d47b47b4f4063e9bc8ccf0784
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen/* Copyright (c) 2006-2015 Dovecot authors, see the included COPYING file */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* Valid characters in XML:
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen [#x10000-#x10FFFF]
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen This function gets called only for #x80 and higher */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenxml_encode_data(string_t *dest, const unsigned char *data, unsigned int len)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen unsigned int i;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen for (i = 0; i < len; i++) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen switch (data[i]) {
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainen /* exceptions to the following control char check */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* SOLR doesn't like control characters.
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen replace them with spaces. */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* make sure the character is valid for XML
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen so we don't get XML parser errors */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen unsigned int char_len =
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenstatic void xml_encode(string_t *dest, const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen xml_encode_data(dest, (const unsigned char *)str, strlen(str));
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic const char *solr_escape_id_str(const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *p;
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen if (*p == '\0')
7e459d4ba3add84f31a72992efc33bd53f2c655dTimo Sirainen switch (*p) {
1701e3f91107051b1704721bf1dc1e32491faaf9Timo Sirainenstatic void solr_quote(string_t *dest, const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void solr_quote_http(string_t *dest, const char *str)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void fts_solr_set_default_ns(struct solr_fts_backend *backend)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct mail_namespace *ns = backend->backend.ns;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(ns->user);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const struct fts_solr_settings *set = &fuser->set;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *str;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen mail_namespace_find_prefix(ns->user->namespaces,
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen i_error("fts_solr: default_ns setting points to "
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen "nonexistent namespace");
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen mail_namespace_find_inbox(ns->user->namespaces);
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen while (backend->default_ns->alias_for != NULL)
c58906589cafc32df4c04ffbef933baadd3f2276Timo Sirainen backend->default_ns = backend->default_ns->alias_for;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic void fts_box_name_get_root(struct mail_namespace **ns, const char **name)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ((*ns)->flags & NAMESPACE_FLAG_INBOX_USER) != 0) {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen /* ugly workaround to allow selecting INBOX from a Maildir/
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen when it's not in the inbox=yes namespace. */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenstatic const char *
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_box_get_root(struct mailbox *box, struct mail_namespace **ns_r)
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen struct mail_namespace *ns = mailbox_get_namespace(box);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (t_imap_utf8_to_utf7(box->name, &name) < 0)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenstatic struct fts_backend *fts_backend_solr_alloc(void)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenfts_backend_solr_init(struct fts_backend *_backend, const char **error_r)
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user);
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen const char *str;
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen if (solr_connection_init(fuser->set.url, fuser->set.debug,
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainen str = solr_escape_id_str(_backend->ns->user->username);
5694eeb99b69dea8033ca77ad69743c6b4871370Timo Sirainenstatic void fts_backend_solr_deinit(struct fts_backend *_backend)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainensolr_add_ns_query(string_t *str, struct solr_fts_backend *backend,
533bfba437e4120aa29dd45bca2aa87e30ee28a2Timo Sirainen if (ns == backend->default_ns || *ns->prefix == '\0') {
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainensolr_add_ns_query_http(string_t *str, struct solr_fts_backend *backend,
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainenfts_backend_solr_get_last_uid_fallback(struct solr_fts_backend *backend,
d92f33f13830ba23d814342bf3ea8db721a15bb1Timo Sirainen unsigned int count;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(str, "fl=uid&rows=1&sort=uid+desc&q=");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_printfa(str, "uidv:%u+box:", status.uidvalidity);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen pool = pool_alloconly_create("solr last uid lookup", 1024);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (solr_connection_select(backend->solr_conn, str_c(str),
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen /* no UIDs */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen uidvals = array_get(&results[0]->uids, &count);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen i_error("fts_solr: Last UID lookup returned multiple rows");
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainenfts_backend_solr_get_last_uid(struct fts_backend *_backend,
5160580b0ec3f3288a320987abdf12a990f09df5Timo Sirainen /* either nothing has been indexed, or the index was corrupted.
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen do it the slow way. */
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen if (fts_backend_solr_get_last_uid_fallback(backend, box, last_uid_r) < 0)
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainenfts_backend_solr_update_init(struct fts_backend *_backend)
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen ctx = i_new(struct solr_fts_backend_update_context, 1);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainenstatic void xml_encode_id(struct solr_fts_backend_update_context *ctx,
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backend_solr_add_doc_prefix(struct solr_fts_backend_update_context *ctx,
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen "<field name=\"uid\">%u</field>"
ff4bb2dfb5714eeb0408d3bb862de1646351d097Timo Sirainen "<field name=\"uidv\">%u</field>",
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str_append(ctx->cmd, "</field><field name=\"user\">");
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backed_solr_build_commit(struct solr_fts_backend_update_context *ctx)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen solr_connection_post_more(ctx->post, str_data(ctx->cmd),
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backend_solr_update_deinit(struct fts_backend_update_context *_ctx)
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen const char *str;
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen /* commit and wait until the documents we just indexed are
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen visible to the following search */
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen str = t_strdup_printf("<commit waitFlush=\"false\" "
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen "waitSearcher=\"%s\"/>",
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen if (solr_connection_post(backend->solr_conn, str) < 0)
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainenfts_backend_solr_update_set_mailbox(struct fts_backend_update_context *_ctx,
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen fts_index_set_last_uid(ctx->cur_box, ctx->prev_uid);
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen ctx->id_box_name = i_strdup(fts_box_get_root(box, &ns));
a27e065f1a1f91c7fbdf7c2ea1c387441af0cbb3Timo Sirainen mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status);
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainenfts_backend_solr_update_expunge(struct fts_backend_update_context *_ctx,
ff7056842f14fd3b30a2d327dfab165b9d15dd30Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
} T_END;
i_unreached();
return TRUE;
case SEARCH_TEXT: {
case SEARCH_BODY:
return FALSE;
return TRUE;
bool and_args)
unsigned int last_len;
if (and_args)
return FALSE;
return TRUE;
const char *box_name;
int ret;
&status);
return ret;
const char *box_name;
char *box_id;
unsigned int i, len;
.flags = 0,
NULL,