fts-backend-solr.c revision d4fbb3317303ce8e2b2371ec186762a3d5ed5a41
5f5870385cff47efd2f58e7892f251cf13761528Timo Sirainen/* Copyright (c) 2006-2015 Dovecot authors, see the included COPYING file */
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen#define SOLR_CMDBUF_FLUSH_SIZE (SOLR_CMDBUF_SIZE-128)
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen/* If header is larger than this, truncate it. */
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen/* If SOLR_HEADER_MAX_SIZE was already reached, write still to individual
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen header fields as long as they're smaller than this */
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen /* Valid characters in XML:
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen [#x10000-#x10FFFF]
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen This function gets called only for #x80 and higher */
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainenstatic unsigned int
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainenxml_encode_data_max(string_t *dest, const unsigned char *data, unsigned int len,
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen unsigned int max_len)
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen unsigned int i;
643a81fff9003cba13deb49a565a3c8171da524dTimo Sirainen for (i = 0; i < max_len; i++) {
643a81fff9003cba13deb49a565a3c8171da524dTimo Sirainen switch (data[i]) {
5fbccc935e3f7b916aa7c6e302a212821072e83aTimo Sirainen /* exceptions to the following control char check */
5fbccc935e3f7b916aa7c6e302a212821072e83aTimo Sirainen /* SOLR doesn't like control characters.
fcaf124d4a727424a338cccfd4274c2393818cd3Timo Sirainen replace them with spaces. */
421d30619384e72a27e2a5d13ff6525aff4d17feTimo Sirainen /* make sure the character is valid for XML
402e999a878e0cc41a0afb830fea0a93afc75f0dTimo Sirainen so we don't get XML parser errors */
643a81fff9003cba13deb49a565a3c8171da524dTimo Sirainen unsigned int char_len =
0950aed81d1e9618264e6aa4d214d89e005ec8d6Timo Sirainenxml_encode_data(string_t *dest, const unsigned char *data, unsigned int len)
0950aed81d1e9618264e6aa4d214d89e005ec8d6Timo Sirainen (void)xml_encode_data_max(dest, data, len, len);
0950aed81d1e9618264e6aa4d214d89e005ec8d6Timo Sirainenstatic void xml_encode(string_t *dest, const char *str)
0950aed81d1e9618264e6aa4d214d89e005ec8d6Timo Sirainen xml_encode_data(dest, (const unsigned char *)str, strlen(str));
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainenstatic void solr_quote_http(string_t *dest, const char *str)
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainenstatic struct fts_backend *fts_backend_solr_alloc(void)
5fbccc935e3f7b916aa7c6e302a212821072e83aTimo Sirainenfts_backend_solr_init(struct fts_backend *_backend, const char **error_r)
643a81fff9003cba13deb49a565a3c8171da524dTimo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen struct fts_solr_user *fuser = FTS_SOLR_USER_CONTEXT(_backend->ns->user);
643a81fff9003cba13deb49a565a3c8171da524dTimo Sirainen /* change our flags so we get proper input */
643a81fff9003cba13deb49a565a3c8171da524dTimo Sirainen _backend->flags &= ~FTS_BACKEND_FLAG_FUZZY_SEARCH;
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen _backend->flags |= FTS_BACKEND_FLAG_TOKENIZED_INPUT;
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen return solr_connection_init(fuser->set.url, fuser->set.debug,
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainenstatic void fts_backend_solr_deinit(struct fts_backend *_backend)
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainenget_last_uid_fallback(struct fts_backend *_backend, struct mailbox *box,
5fbccc935e3f7b916aa7c6e302a212821072e83aTimo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
643a81fff9003cba13deb49a565a3c8171da524dTimo Sirainen unsigned int count;
5fbccc935e3f7b916aa7c6e302a212821072e83aTimo Sirainen str_append(str, "fl=uid&rows=1&sort=uid+desc&q=");
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen solr_quote_http(str, _backend->ns->owner->username);
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen pool = pool_alloconly_create("solr last uid lookup", 1024);
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen if (solr_connection_select(backend->solr_conn, str_c(str),
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen /* no UIDs */
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen uidvals = array_get(&results[0]->uids, &count);
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen i_error("fts_solr: Last UID lookup returned multiple rows");
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainenfts_backend_solr_get_last_uid(struct fts_backend *_backend,
5fbccc935e3f7b916aa7c6e302a212821072e83aTimo Sirainen /* either nothing has been indexed, or the index was corrupted.
fcaf124d4a727424a338cccfd4274c2393818cd3Timo Sirainen do it the slow way. */
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen if (get_last_uid_fallback(_backend, box, last_uid_r) < 0)
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainenfts_backend_solr_update_init(struct fts_backend *_backend)
fcaf124d4a727424a338cccfd4274c2393818cd3Timo Sirainen ctx = i_new(struct solr_fts_backend_update_context, 1);
6c00502d4ece417ead501db8f0ee3e8287ba4459Timo Sirainen (_backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0;
643a81fff9003cba13deb49a565a3c8171da524dTimo Sirainenstatic void xml_encode_id(struct solr_fts_backend_update_context *ctx,
static string_t *
const char *str;
return ret;
const char *box_guid;
i_unreached();
return TRUE;
unsigned int len;
return TRUE;
return FALSE;
case SEARCH_TEXT: {
case SEARCH_BODY:
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
return FALSE;
return FALSE;
return TRUE;
bool and_args)
unsigned int last_len;
if (and_args)
return FALSE;
return TRUE;
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
return FALSE;
return FALSE;
return FALSE;
return TRUE;
bool and_args)
unsigned int last_len;
if (and_args)
return FALSE;
return TRUE;
int ret;
return ret;
const char *box_guid;
unsigned int prefix_len;
const char *box_guid;
unsigned int i, len;
bool search_all_mailboxes;
if (!search_all_mailboxes)
if (!search_all_mailboxes) {
boxes[i]);
if (!search_all_mailboxes)
if (!search_all_mailboxes) {