fts-backend-solr-old.c revision adf8264ab1135c413bcede6af2e4248fd26a1ef9
2454dfa32c93c20a8522c6ed42fe057baaac9f9aStephan Bosch/* Copyright (c) 2006-2011 Dovecot authors, see the included COPYING file */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic struct solr_connection *solr_conn = NULL;
d22301419109ed4a38351715e6760011421dadecTimo Sirainen /* Valid characters in XML:
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen [#x10000-#x10FFFF]
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen This function gets called only for #x80 and higher */
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainenxml_encode_data(string_t *dest, const unsigned char *data, unsigned int len)
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen unsigned int i;
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen for (i = 0; i < len; i++) {
e3367d7b54864d2e4b1931903e3f660ae64fbe3aTimo Sirainen switch (data[i]) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* exceptions to the following control char check */
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen /* SOLR doesn't like control characters.
4542c94adb8910e0174c784754e737cec16af59cTimo Sirainen replace them with spaces. */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* make sure the character is valid for XML
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen so we don't get XML parser errors */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned int char_len =
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen uni_utf8_get_char_n(data + i, char_len, &chr) == 1 &&
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic void xml_encode(string_t *dest, const char *str)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen xml_encode_data(dest, (const unsigned char *)str, strlen(str));
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic const char *solr_escape_id_str(const char *str)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen const char *p;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (*p == '\0')
d979c1179d55ad86e40f869e48ef3e4db9c817b5Timo Sirainen switch (*p) {
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic void solr_quote(string_t *dest, const char *str)
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainenstatic void solr_quote_http(string_t *dest, const char *str)
13e130c3af3032982de6b1d13c6dcddda9164848Timo Sirainen solr_connection_http_escape(solr_conn, dest, str);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic void fts_solr_set_default_ns(struct solr_fts_backend *backend)
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen struct mail_namespace *ns = backend->backend.ns;
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(ns->user);
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen const struct fts_solr_settings *set = &fuser->set;
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen const char *str;
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen mail_namespace_find_prefix(ns->user->namespaces,
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen i_error("fts_solr: default_ns setting points to "
c115c742f730e312d6b6ab5064595cd0d8b4e26eTimo Sirainen "nonexistent namespace");
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen mail_namespace_find_inbox(ns->user->namespaces);
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen while (backend->default_ns->alias_for != NULL)
8eba883232f80178b60fa416f73292bf5f990fecTimo Sirainen backend->default_ns = backend->default_ns->alias_for;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic void fts_box_name_get_root(struct mail_namespace **ns, const char **name)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen ((*ns)->flags & NAMESPACE_FLAG_INBOX_USER) != 0) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* ugly workaround to allow selecting INBOX from a Maildir/
efe78d3ba24fc866af1c79b9223dc0809ba26cadStephan Bosch when it's not in the inbox=yes namespace. */
a249dd267f05d349f1b4aa27b40a56083c8ba392Timo Sirainenstatic const char *
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenfts_box_get_root(struct mailbox *box, struct mail_namespace **ns_r)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen struct mail_namespace *ns = mailbox_get_namespace(box);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (t_imap_utf8_to_utf7(box->name, &name) < 0)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic struct fts_backend *fts_backend_solr_alloc(void)
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenfts_backend_solr_init(struct fts_backend *_backend,
fe5cca45e94608d1c471990216941bf893bc8adaTimo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user);
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen const struct fts_solr_settings *set = &fuser->set;
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen const char *str;
e15b305e90c9834734ccf35ed78f0ad29d570ee9Timo Sirainen solr_conn = solr_connection_init(set->url, set->debug);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen str = solr_escape_id_str(_backend->ns->user->username);
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainenstatic void fts_backend_solr_deinit(struct fts_backend *_backend)
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainensolr_add_ns_query(string_t *str, struct solr_fts_backend *backend,
ecd69c4e8371853667e01b0c16d436ef7f7393e2Timo Sirainen if (ns == backend->default_ns || *ns->prefix == '\0') {
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainensolr_add_ns_query_http(string_t *str, struct solr_fts_backend *backend,
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen solr_connection_http_escape(solr_conn, str, str_c(tmp));
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenfts_backend_solr_get_last_uid_fallback(struct solr_fts_backend *backend,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned int count;
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen str_append(str, "fl=uid&rows=1&sort=uid+desc&q=");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen str_printfa(str, "uidv:%u+box:", status.uidvalidity);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen pool = pool_alloconly_create("solr last uid lookup", 1024);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (solr_connection_select(solr_conn, str_c(str),
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* no UIDs */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen uidvals = array_get(&results[0]->uids, &count);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen i_error("fts_solr: Last UID lookup returned multiple rows");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenfts_backend_solr_get_last_uid(struct fts_backend *_backend,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* either nothing has been indexed, or the index was corrupted.
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen do it the slow way. */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (fts_backend_solr_get_last_uid_fallback(backend, box, last_uid_r) < 0)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen (void)fts_index_set_last_uid(box, *last_uid_r);
942302b0247403645394d848b3c620ead262a2a5Timo Sirainenfts_backend_solr_update_init(struct fts_backend *_backend)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx = i_new(struct solr_fts_backend_update_context, 1);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenstatic void xml_encode_id(struct solr_fts_backend_update_context *ctx,
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainenfts_backend_solr_add_doc_prefix(struct solr_fts_backend_update_context *ctx,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen "<field name=\"uid\">%u</field>"
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen "<field name=\"uidv\">%u</field>",
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen str_append(ctx->cmd, "</field><field name=\"user\">");
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenfts_backed_solr_build_commit(struct solr_fts_backend_update_context *ctx)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen solr_connection_post_more(ctx->post, str_data(ctx->cmd),
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenfts_backend_solr_update_deinit(struct fts_backend_update_context *_ctx)
6df0ab0c1ab91f06b6418cb30eff44405a1b8f02Timo Sirainen const char *str;
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi /* commit and wait until the documents we just indexed are
6df0ab0c1ab91f06b6418cb30eff44405a1b8f02Timo Sirainen visible to the following search */
a08e96bb7821294656492a57482d838571d10c00Aki Tuomi str = t_strdup_printf("<commit waitFlush=\"false\" "
6df0ab0c1ab91f06b6418cb30eff44405a1b8f02Timo Sirainen "waitSearcher=\"%s\"/>",
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenfts_backend_solr_update_set_mailbox(struct fts_backend_update_context *_ctx,
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen (void)fts_index_set_last_uid(ctx->cur_box, ctx->prev_uid);
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen ctx->id_box_name = i_strdup(fts_box_get_root(box, &ns));
21aaa6affb9f134112b75b5db737309fc35ef1cfMartti Rannanjärvi mailbox_get_open_status(box, STATUS_UIDVALIDITY, &status);
225e82df5dd1e765f4e52b80c954558f00e5a7dfTimo Sirainenfts_backend_solr_update_expunge(struct fts_backend_update_context *_ctx,
18a41cbd38f83429b790414c1159c097af4a59b8Timo Sirainen (struct solr_fts_backend_update_context *)_ctx;
a34bd633ab201f6a5ad1c00174fb8b0359031d00Timo Sirainen (void)solr_connection_post(solr_conn, str_c(cmd));
0206dc57f2c04da69599dea5816235cfeb2b897aMartti Rannanjärvifts_backend_solr_uid_changed(struct solr_fts_backend_update_context *ctx,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ctx->post = solr_connection_post_begin(solr_conn);
f46885a5b78b15a8d2419f6e5d13b643bd85e41fTimo Sirainenfts_backend_solr_update_set_build_key(struct fts_backend_update_context *_ctx,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen (struct solr_fts_backend_update_context *)_ctx;
225e82df5dd1e765f4e52b80c954558f00e5a7dfTimo Sirainen str_append(ctx->cmd, "<field name=\"body\">");
return TRUE;
case SEARCH_TEXT: {
case SEARCH_BODY:
return FALSE;
return TRUE;
bool and_args)
unsigned int last_len;
if (and_args)
return FALSE;
return TRUE;
const char *box_name;
int ret;
&status);
return ret;
const char *box_name;
char *box_id;
unsigned int i, len;
.flags = 0,
NULL,