fts-backend-solr.c revision 56a5ecd2107f13b1493b3195547147f1c56c0a77
/* Copyright (c) 2006-2011 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "array.h"
#include "str.h"
#include "hash.h"
#include "strescape.h"
#include "unichar.h"
#include "mail-storage-private.h"
#include "mailbox-list-private.h"
#include "mail-search.h"
#include "fts-api.h"
#include "solr-connection.h"
#include "fts-solr-plugin.h"
#include <ctype.h>
#define SOLR_MAX_MULTI_ROWS 100000
struct solr_fts_backend {
struct fts_backend backend;
};
struct solr_fts_backend_update_context {
struct fts_backend_update_context ctx;
struct solr_connection_post *post;
unsigned int last_indexed_uid_set:1;
unsigned int headers_open:1;
unsigned int cur_header_index:1;
unsigned int documents_added:1;
unsigned int expunges:1;
};
{
/* Valid characters in XML:
#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
[#x10000-#x10FFFF]
This function gets called only for #x80 and higher */
return FALSE;
return FALSE;
return chr < 0x10ffff;
}
static void
{
unsigned int i;
for (i = 0; i < len; i++) {
switch (data[i]) {
case '&':
break;
case '<':
break;
case '>':
break;
case '\t':
case '\n':
case '\r':
/* exceptions to the following control char check */
break;
default:
if (data[i] < 32) {
/* SOLR doesn't like control characters.
replace them with spaces. */
} else if (data[i] >= 0x80) {
/* make sure the character is valid for XML
so we don't get XML parser errors */
unsigned int char_len =
uni_utf8_char_bytes(data[i]);
else {
}
i += char_len - 1;
} else {
}
break;
}
}
}
{
}
{
}
static struct fts_backend *fts_backend_solr_alloc(void)
{
struct solr_fts_backend *backend;
}
static int
const char **error_r ATTR_UNUSED)
{
return 0;
}
{
}
static int
{
const char *box_guid;
unsigned int count;
struct solr_result **results;
int ret = 0;
return -1;
else
ret = -1;
/* no UIDs */
*last_uid_r = 0;
} else {
} else {
i_error("fts_solr: Last UID lookup returned multiple rows");
ret = -1;
}
}
pool_unref(&pool);
return ret;
}
static int
{
struct fts_index_header hdr;
return 0;
}
/* either nothing has been indexed, or the index was corrupted.
do it the slow way. */
return -1;
return 0;
}
static struct fts_backend_update_context *
{
struct solr_fts_backend_update_context *ctx;
}
{
}
}
static void
{
"<field name=\"uid\">%u</field>"
"<field name=\"box\">%s</field>",
}
static void
{
}
}
}
static int
{
return 0;
}
static int
{
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
const char *str;
if (fts_backed_solr_build_commit(ctx) < 0)
ret = -1;
/* commit and wait until the documents we just indexed are
visible to the following search */
"waitSearcher=\"%s\"/>",
ret = -1;
}
return ret;
}
static void
{
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
const char *box_guid;
}
} else {
}
}
static void
{
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
struct fts_index_header hdr;
if (!ctx->last_indexed_uid_set) {
ctx->last_indexed_uid = 0;
else
}
if (ctx->last_indexed_uid == 0 ||
/* don't waste time asking Solr to expunge a message that is
highly unlikely to be indexed at this time. */
return;
}
T_BEGIN {
} T_END;
}
static void
{
} else {
}
}
static bool
const struct fts_backend_build_key *key)
{
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
}
/* fall through */
break;
break;
i_unreached();
}
return TRUE;
}
static void
{
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
if (!ctx->headers_open)
else {
/* this is called individually for each header line.
headers are finished only when key changes to body */
}
if (ctx->cur_header_index) {
}
}
static int
{
struct solr_fts_backend_update_context *ctx =
(struct solr_fts_backend_update_context *)_ctx;
return -1;
if (ctx->headers_open) {
if (ctx->cur_header_index)
} else {
}
}
return 0;
}
{
return 0;
}
{
return 0;
}
static bool solr_need_escaping(const char *str)
{
const char *solr_escape_chars = "+-&|!(){}[]^\"~*?:\\ ";
return TRUE;
}
return FALSE;
}
{
/* currently we'll just disable fuzzy searching if there are any
parameters that need escaping. solr doesn't seem to give good
fuzzy results even if we did escape them.. */
else {
}
}
static bool
{
case SEARCH_TEXT: {
break;
}
case SEARCH_BODY:
break;
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
return FALSE;
break;
default:
return FALSE;
}
return TRUE;
}
static bool
bool and_args)
{
unsigned int last_len;
if (and_args)
else
}
}
return FALSE;
return TRUE;
}
static bool
{
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
return FALSE;
/* all matches would be definite, but all non-matches
would be maybies. too much trouble to optimize. */
return FALSE;
}
/* we can check if the search key exists in some header and
filter out the messages that have no chance of matching */
else {
/* checking potential existence of the header name */
}
break;
default:
return FALSE;
}
return TRUE;
}
static bool
bool and_args)
{
unsigned int last_len;
if (and_args)
else
}
}
return FALSE;
return TRUE;
}
{
struct solr_result **results;
int ret;
/* use a separate filter query for selecting the mailbox. it shouldn't
affect the score and there could be some caching benefits too. */
else
}
pool_unref(&pool);
return ret;
}
static int
struct fts_result *result)
{
struct mailbox_status status;
const char *box_guid;
unsigned int prefix_len;
return -1;
return -1;
}
return -1;
}
return 0;
}
static int
struct fts_multi_result *result)
{
struct solr_result **solr_results;
struct fts_result *fts_result;
struct hash_table *mailboxes;
const char *box_guid;
unsigned int i, len;
/* use a separate filter query for selecting the mailbox. it shouldn't
affect the score and there could be some caching benefits too. */
else
continue;
boxes[i]);
}
return -1;
}
for (i = 0; solr_results[i] != NULL; i++) {
i_warning("fts_solr: Lookup returned unexpected mailbox "
continue;
}
}
(void)array_append_space(&fts_results);
return 0;
}
static int
struct fts_multi_result *result)
{
return -1;
}
/* FIXME: maybe_uids could be handled also with some more work.. */
return 0;
}
struct fts_backend fts_backend_solr = {
.name = "solr",
.flags = 0,
{
NULL,
}
};