fts-backend-solr.c revision a7af27522004d1d520072c80883aa2a5c440ea6a
02c335c23bf5fa225a467c19f2c063fb0dc7b8c3Timo Sirainen/* Copyright (c) 2006-2010 Dovecot authors, see the included COPYING file */
25757faf029c369a8318349dafe952e2358df1d8Timo Sirainen
08d6658a4e2ec8104cd1307f6baa75fdb07a24f8Mark Washenberger#include "lib.h"
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen#include "array.h"
ff487c974815bdaa2d05a3b834f4c2c841f4cc34Timo Sirainen#include "str.h"
66d2db642fe24d555d113ba463e446b038d476efTimo Sirainen#include "strescape.h"
25757faf029c369a8318349dafe952e2358df1d8Timo Sirainen#include "mail-storage-private.h"
25757faf029c369a8318349dafe952e2358df1d8Timo Sirainen#include "mail-namespace.h"
4ee00532a265bdfb38539d811fcd12d51210ac35Timo Sirainen#include "solr-connection.h"
4ee00532a265bdfb38539d811fcd12d51210ac35Timo Sirainen#include "fts-solr-plugin.h"
b321df9603081896b70ec44635af96d674a9839aTimo Sirainen
9d75363d3fbabc2fbc2d80f06672e3ed8965804aTimo Sirainen#include <ctype.h>
9d75363d3fbabc2fbc2d80f06672e3ed8965804aTimo Sirainen
9d75363d3fbabc2fbc2d80f06672e3ed8965804aTimo Sirainen#define SOLR_CMDBUF_SIZE (1024*64)
9d75363d3fbabc2fbc2d80f06672e3ed8965804aTimo Sirainen#define SOLR_MAX_ROWS 100000
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen#define FTS_SOLR_MAX_BOX_INC_PATTERNS 5
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen#define FTS_SOLR_MAX_BOX_EXC_PATTERNS 5
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainenstruct solr_fts_backend {
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen struct fts_backend backend;
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen char *id_username, *id_namespace, *id_box_name;
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen struct mail_namespace *default_ns;
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen};
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainenstruct solr_fts_backend_build_context {
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen struct fts_backend_build_context ctx;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen struct solr_connection_post *post;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen uint32_t prev_uid, uid_validity;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen string_t *cmd;
5c99eaa4e3e07ee065580d163240b4ce95b66befTimo Sirainen bool headers;
5c99eaa4e3e07ee065580d163240b4ce95b66befTimo Sirainen bool field_open;
5c99eaa4e3e07ee065580d163240b4ce95b66befTimo Sirainen};
5c99eaa4e3e07ee065580d163240b4ce95b66befTimo Sirainen
5c99eaa4e3e07ee065580d163240b4ce95b66befTimo Sirainenstruct solr_virtual_uid_map_context {
5c99eaa4e3e07ee065580d163240b4ce95b66befTimo Sirainen struct fts_backend *backend;
5c99eaa4e3e07ee065580d163240b4ce95b66befTimo Sirainen struct mailbox *box;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen string_t *vname;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen};
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainenstruct fts_backend_solr_get_last_uids_context {
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen struct fts_backend *backend;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen pool_t pool;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen ARRAY_TYPE(fts_backend_uid_map) *last_uids;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen struct mailbox *box;
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen string_t *vname;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen};
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainenstatic struct solr_connection *solr_conn = NULL;
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainen
7bd72e4deca3cbf757dd1ea298486d9f3bc24226Timo Sirainenstatic void fts_box_name_get_root(struct mail_namespace **ns, const char **name)
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen{
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen struct mail_namespace *orig_ns = *ns;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen while ((*ns)->alias_for != NULL)
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen *ns = (*ns)->alias_for;
1f1e81aab38d833d1c9cdc244c91fd762e0080d4Timo Sirainen
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen if (**name == '\0' && *ns != orig_ns &&
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen ((*ns)->flags & NAMESPACE_FLAG_INBOX) != 0) {
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen /* ugly workaround to allow selecting INBOX from a Maildir/
473080c7c0d25ddfdf77e7dfa0ba8f73c6c669d5Timo Sirainen when it's not in the inbox=yes namespace. */
a8e132559a7ebe54c8269d79ce29fa3338c76199Timo Sirainen *name = "INBOX";
ce6c2809b8a1673372a683716566d973efd2f6eeTimo Sirainen }
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen}
473080c7c0d25ddfdf77e7dfa0ba8f73c6c669d5Timo Sirainen
430c0b0c370bebeeceba2e206be76bc134742f41Timo Sirainenstatic const char *
430c0b0c370bebeeceba2e206be76bc134742f41Timo Sirainenfts_box_get_root(struct mailbox *box, struct mail_namespace **ns_r)
430c0b0c370bebeeceba2e206be76bc134742f41Timo Sirainen{
430c0b0c370bebeeceba2e206be76bc134742f41Timo Sirainen struct mail_namespace *ns = mailbox_get_namespace(box);
430c0b0c370bebeeceba2e206be76bc134742f41Timo Sirainen const char *name = box->name;
430c0b0c370bebeeceba2e206be76bc134742f41Timo Sirainen
430c0b0c370bebeeceba2e206be76bc134742f41Timo Sirainen fts_box_name_get_root(&ns, &name);
ce6c2809b8a1673372a683716566d973efd2f6eeTimo Sirainen *ns_r = ns;
ce6c2809b8a1673372a683716566d973efd2f6eeTimo Sirainen return name;
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen}
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen
6135260095e1704ed6edff9d00bdfc043c11429cTimo Sirainenstatic void
ce6c2809b8a1673372a683716566d973efd2f6eeTimo Sirainenxml_encode_data(string_t *dest, const unsigned char *data, unsigned int len)
ce6c2809b8a1673372a683716566d973efd2f6eeTimo Sirainen{
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen unsigned int i;
6135260095e1704ed6edff9d00bdfc043c11429cTimo Sirainen
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen for (i = 0; i < len; i++) {
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen switch (data[i]) {
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen case '&':
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen str_append(dest, "&amp;");
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen break;
b4f2560c29dacd066ba89e782d95ceed7ac473a3Timo Sirainen case '<':
b4f2560c29dacd066ba89e782d95ceed7ac473a3Timo Sirainen str_append(dest, "&lt;");
b4f2560c29dacd066ba89e782d95ceed7ac473a3Timo Sirainen break;
b4f2560c29dacd066ba89e782d95ceed7ac473a3Timo Sirainen case '>':
1479a685cdb1641783ac02ba135450929f5c2658Timo Sirainen str_append(dest, "&gt;");
b4f2560c29dacd066ba89e782d95ceed7ac473a3Timo Sirainen break;
b4f2560c29dacd066ba89e782d95ceed7ac473a3Timo Sirainen case '\t':
b4f2560c29dacd066ba89e782d95ceed7ac473a3Timo Sirainen case '\n':
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen case '\r':
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen /* exceptions to the following control char check */
ac713658d206e8d001fef7c0e36945793f2eb942Timo Sirainen str_append_c(dest, data[i]);
49e513d090753ccbf95560b2f3a21f081a5b6c51Timo Sirainen break;
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen default:
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen if (data[i] < 32) {
ac713658d206e8d001fef7c0e36945793f2eb942Timo Sirainen /* SOLR doesn't like control characters.
ac713658d206e8d001fef7c0e36945793f2eb942Timo Sirainen replace them with spaces. */
ac713658d206e8d001fef7c0e36945793f2eb942Timo Sirainen str_append_c(dest, ' ');
6135260095e1704ed6edff9d00bdfc043c11429cTimo Sirainen } else {
ac713658d206e8d001fef7c0e36945793f2eb942Timo Sirainen str_append_c(dest, data[i]);
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen }
473080c7c0d25ddfdf77e7dfa0ba8f73c6c669d5Timo Sirainen break;
517d1e7142d57299c733b30423e35e7e1f8d01d6Timo Sirainen }
517d1e7142d57299c733b30423e35e7e1f8d01d6Timo Sirainen }
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen}
446e518e4fe86ff40e33543445f4e99edf840a21Timo Sirainen
446e518e4fe86ff40e33543445f4e99edf840a21Timo Sirainenstatic void xml_encode(string_t *dest, const char *str)
446e518e4fe86ff40e33543445f4e99edf840a21Timo Sirainen{
446e518e4fe86ff40e33543445f4e99edf840a21Timo Sirainen xml_encode_data(dest, (const unsigned char *)str, strlen(str));
446e518e4fe86ff40e33543445f4e99edf840a21Timo Sirainen}
446e518e4fe86ff40e33543445f4e99edf840a21Timo Sirainen
446e518e4fe86ff40e33543445f4e99edf840a21Timo Sirainenstatic const char *solr_escape_id_str(const char *str)
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen{
6135260095e1704ed6edff9d00bdfc043c11429cTimo Sirainen string_t *tmp;
446e518e4fe86ff40e33543445f4e99edf840a21Timo Sirainen const char *p;
446e518e4fe86ff40e33543445f4e99edf840a21Timo Sirainen
d368bfd671ae6d04a69eb7f418521d49b8bbf77aTimo Sirainen for (p = str; *p != '\0'; p++) {
446e518e4fe86ff40e33543445f4e99edf840a21Timo Sirainen if (*p == '/' || *p == '!')
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen break;
6135260095e1704ed6edff9d00bdfc043c11429cTimo Sirainen }
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen if (*p == '\0')
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen return str;
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen
473080c7c0d25ddfdf77e7dfa0ba8f73c6c669d5Timo Sirainen tmp = t_str_new(64);
473080c7c0d25ddfdf77e7dfa0ba8f73c6c669d5Timo Sirainen for (p = str; *p != '\0'; p++) {
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen switch (*p) {
a3dd97fb6d92a89c3de0597fed2d4b044c7aeb84Timo Sirainen case '/':
a3dd97fb6d92a89c3de0597fed2d4b044c7aeb84Timo Sirainen str_append(tmp, "!\\");
f968e62caa52a8924bd05ebf76ff515b5c18e17bTimo Sirainen break;
a3dd97fb6d92a89c3de0597fed2d4b044c7aeb84Timo Sirainen case '!':
a3dd97fb6d92a89c3de0597fed2d4b044c7aeb84Timo Sirainen str_append(tmp, "!!");
a3dd97fb6d92a89c3de0597fed2d4b044c7aeb84Timo Sirainen break;
a3dd97fb6d92a89c3de0597fed2d4b044c7aeb84Timo Sirainen default:
5367840b91df098e016f382960c391691c8d33ffTimo Sirainen str_append_c(tmp, *p);
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen break;
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen }
f968e62caa52a8924bd05ebf76ff515b5c18e17bTimo Sirainen }
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen return str_c(tmp);
f968e62caa52a8924bd05ebf76ff515b5c18e17bTimo Sirainen}
f968e62caa52a8924bd05ebf76ff515b5c18e17bTimo Sirainen
a3dd97fb6d92a89c3de0597fed2d4b044c7aeb84Timo Sirainenstatic void solr_quote(string_t *dest, const char *str)
b24ffea8baa472d9b542e54ed3f9939eefd020adTimo Sirainen{
b24ffea8baa472d9b542e54ed3f9939eefd020adTimo Sirainen str_append_c(dest, '"');
b24ffea8baa472d9b542e54ed3f9939eefd020adTimo Sirainen str_append(dest, str_escape(str));
b24ffea8baa472d9b542e54ed3f9939eefd020adTimo Sirainen str_append_c(dest, '"');
b24ffea8baa472d9b542e54ed3f9939eefd020adTimo Sirainen}
b24ffea8baa472d9b542e54ed3f9939eefd020adTimo Sirainen
b24ffea8baa472d9b542e54ed3f9939eefd020adTimo Sirainenstatic void solr_quote_http(string_t *dest, const char *str)
7d26aee0c0b6c0ce227ef4ae4f20fc86e2c423f2Timo Sirainen{
7d26aee0c0b6c0ce227ef4ae4f20fc86e2c423f2Timo Sirainen str_append(dest, "%22");
7d26aee0c0b6c0ce227ef4ae4f20fc86e2c423f2Timo Sirainen solr_connection_http_escape(solr_conn, dest, str);
7d26aee0c0b6c0ce227ef4ae4f20fc86e2c423f2Timo Sirainen str_append(dest, "%22");
b24ffea8baa472d9b542e54ed3f9939eefd020adTimo Sirainen}
6135260095e1704ed6edff9d00bdfc043c11429cTimo Sirainen
4ed1b49d815ec41a5e4b6a23d23e94b958da1923Timo Sirainenstatic struct fts_backend *
4ed1b49d815ec41a5e4b6a23d23e94b958da1923Timo Sirainenfts_backend_solr_init(struct mailbox *box)
a3dd97fb6d92a89c3de0597fed2d4b044c7aeb84Timo Sirainen{
4ed1b49d815ec41a5e4b6a23d23e94b958da1923Timo Sirainen struct fts_solr_user *fuser =
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen FTS_SOLR_USER_CONTEXT(box->storage->user);
b44650b0f48a4b5f0dc240ed836833a00b643b9fTimo Sirainen const struct fts_solr_settings *set = &fuser->set;
473080c7c0d25ddfdf77e7dfa0ba8f73c6c669d5Timo Sirainen struct solr_fts_backend *backend;
473080c7c0d25ddfdf77e7dfa0ba8f73c6c669d5Timo Sirainen struct mail_namespace *ns;
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen const char *str, *box_name;
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen box_name = fts_box_get_root(box, &ns);
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen i_assert(*box_name != '\0');
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen if (solr_conn == NULL)
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen solr_conn = solr_connection_init(set->url, set->debug);
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen backend = i_new(struct solr_fts_backend, 1);
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen if (set->default_ns_prefix != NULL) {
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen backend->default_ns =
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen mail_namespace_find_prefix(ns->user->namespaces,
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen set->default_ns_prefix);
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen if (backend->default_ns == NULL) {
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen i_fatal("fts_solr: default_ns setting points to "
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen "nonexisting namespace");
e48d89622047bd8bbd0475b881ca9377d592f535Timo Sirainen }
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen } else {
25757faf029c369a8318349dafe952e2358df1d8Timo Sirainen backend->default_ns =
3cf67672fdc87583cb23ce088c95bb5dee60e74dTimo Sirainen mail_namespace_find_inbox(ns->user->namespaces);
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen }
e48d89622047bd8bbd0475b881ca9377d592f535Timo Sirainen while (backend->default_ns->alias_for != NULL)
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen backend->default_ns = backend->default_ns->alias_for;
0727e38ac12efb8963a339daf56255e2be1f29fcTimo Sirainen
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen str = solr_escape_id_str(ns->user->username);
8ff9812659728d4166df8e003a1dd3524ae8514eTimo Sirainen backend->id_username = i_strdup(str);
966cb0c1aa58578339cea6f79b4a423a851ab074Timo Sirainen if (ns != backend->default_ns) {
966cb0c1aa58578339cea6f79b4a423a851ab074Timo Sirainen str = solr_escape_id_str(ns->prefix);
966cb0c1aa58578339cea6f79b4a423a851ab074Timo Sirainen backend->id_namespace = i_strdup(str);
966cb0c1aa58578339cea6f79b4a423a851ab074Timo Sirainen }
d5abbb932a0a598f002da39a8b3326643b1b5efcTimo Sirainen backend->id_box_name = i_strdup(box_name);
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen backend->backend = fts_backend_solr;
d5abbb932a0a598f002da39a8b3326643b1b5efcTimo Sirainen
d5abbb932a0a598f002da39a8b3326643b1b5efcTimo Sirainen if (set->substring_search)
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen backend->backend.flags |= FTS_BACKEND_FLAG_SUBSTRING_LOOKUPS;
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen return &backend->backend;
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen}
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainenstatic void fts_backend_solr_deinit(struct fts_backend *_backend)
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen{
747e77e3ab073a8e9e69c7a3e71b4593c5655d03Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
dd93aba1901a457346990f49c54a738947dc7128Timo Sirainen
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen i_free(backend->id_box_name);
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen i_free(backend->id_namespace);
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen i_free(backend->id_username);
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen i_free(backend);
e48d89622047bd8bbd0475b881ca9377d592f535Timo Sirainen}
e48d89622047bd8bbd0475b881ca9377d592f535Timo Sirainen
e48d89622047bd8bbd0475b881ca9377d592f535Timo Sirainenstatic void
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainensolr_add_ns_query(string_t *str, struct fts_backend *_backend,
e48d89622047bd8bbd0475b881ca9377d592f535Timo Sirainen struct mail_namespace *ns, bool neg)
e48d89622047bd8bbd0475b881ca9377d592f535Timo Sirainen{
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
04052d7cacaa866a3f00afb4e104fa46c04c1dd7Timo Sirainen
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen while (ns->alias_for != NULL)
e48d89622047bd8bbd0475b881ca9377d592f535Timo Sirainen ns = ns->alias_for;
25757faf029c369a8318349dafe952e2358df1d8Timo Sirainen
25757faf029c369a8318349dafe952e2358df1d8Timo Sirainen if (ns == backend->default_ns || *ns->prefix == '\0') {
f3d506e525a720f214020ca0f989a1966b30edaeTimo Sirainen if (!neg)
08aea01ef9a9d20703e0fcf8618e6195c0037a44Timo Sirainen str_append(str, " -ns:[* TO *]");
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen else
f3d506e525a720f214020ca0f989a1966b30edaeTimo Sirainen str_append(str, " +ns:[* TO *]");
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen } else {
08aea01ef9a9d20703e0fcf8618e6195c0037a44Timo Sirainen if (!neg)
08aea01ef9a9d20703e0fcf8618e6195c0037a44Timo Sirainen str_append(str, " +ns:");
849969f639a00eab26791db3cb1b66430420c0cdTimo Sirainen else
25757faf029c369a8318349dafe952e2358df1d8Timo Sirainen str_append(str, " -ns:");
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen solr_quote(str, ns->prefix);
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen }
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen}
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainenstatic void
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainensolr_add_ns_query_http(string_t *str, struct fts_backend *backend,
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen struct mail_namespace *ns)
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen{
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen string_t *tmp;
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen
e48d89622047bd8bbd0475b881ca9377d592f535Timo Sirainen tmp = t_str_new(64);
849969f639a00eab26791db3cb1b66430420c0cdTimo Sirainen solr_add_ns_query(tmp, backend, ns, FALSE);
849969f639a00eab26791db3cb1b66430420c0cdTimo Sirainen solr_connection_http_escape(solr_conn, str, str_c(tmp));
9d75363d3fbabc2fbc2d80f06672e3ed8965804aTimo Sirainen}
9d75363d3fbabc2fbc2d80f06672e3ed8965804aTimo Sirainen
9d75363d3fbabc2fbc2d80f06672e3ed8965804aTimo Sirainenstatic int fts_backend_solr_get_last_uid_fallback(struct fts_backend *backend,
25757faf029c369a8318349dafe952e2358df1d8Timo Sirainen uint32_t *last_uid_r)
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen{
9625595c47c665f5aee57ebfcb1fcbe9ad1bf3a0Martti Rannanjärvi struct mailbox *box = backend->box;
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen struct mail_namespace *ns;
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen struct mailbox_status status;
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen ARRAY_TYPE(seq_range) uids;
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen const struct seq_range *uidvals;
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen const char *box_name;
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen unsigned int count;
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen string_t *str;
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen str = t_str_new(256);
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen str_append(str, "fl=uid&rows=1&sort=uid+desc&q=");
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen box_name = fts_box_get_root(box, &ns);
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen mailbox_get_status(box, STATUS_UIDVALIDITY, &status);
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen str_printfa(str, "uidv:%u+box:", status.uidvalidity);
be5c76fabc7439fd33bc799bc3ab3f570799977bTimo Sirainen solr_quote_http(str, box_name);
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen solr_add_ns_query_http(str, backend, ns);
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen str_append(str, "+user:");
2028d80c2704bbf62b29b2c624b0ee3c3a03c462Timo Sirainen solr_quote_http(str, ns->user->username);
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen t_array_init(&uids, 1);
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen if (solr_connection_select(solr_conn, str_c(str),
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen NULL, NULL, &uids, NULL) < 0)
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen return -1;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen uidvals = array_get(&uids, &count);
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen if (count == 0) {
1a669829132a4b68aaba32400e28bb2a4e19bcaaTimo Sirainen /* nothing indexed yet for this mailbox */
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen *last_uid_r = 0;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen } else if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen *last_uid_r = uidvals[0].seq1;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen } else {
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen i_error("fts_solr: Last UID lookup returned multiple rows");
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen return -1;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen }
2028d80c2704bbf62b29b2c624b0ee3c3a03c462Timo Sirainen return 0;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen}
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainenstatic int fts_backend_solr_get_last_uid(struct fts_backend *backend,
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen uint32_t *last_uid_r)
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen{
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen struct mailbox *box = backend->box;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen struct mail_namespace *ns;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen struct mailbox_status status;
1a669829132a4b68aaba32400e28bb2a4e19bcaaTimo Sirainen ARRAY_TYPE(seq_range) uids;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen const struct seq_range *uidvals;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen const char *box_name;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen unsigned int count;
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen string_t *str;
9ed2951bd0bb1878a27437d7c00611b2baadd614Timo Sirainen
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen str = t_str_new(256);
43a66a0b16299bd4f7615acd85e98bd3832c54d5Timo Sirainen str_append(str, "fl=uid&rows=1&q=last_uid:TRUE+");
box_name = fts_box_get_root(box, &ns);
mailbox_get_status(box, STATUS_UIDVALIDITY, &status);
str_printfa(str, "uidv:%u+box:", status.uidvalidity);
solr_quote_http(str, box_name);
solr_add_ns_query_http(str, backend, ns);
str_append(str, "+user:");
solr_quote_http(str, ns->user->username);
t_array_init(&uids, 1);
if (solr_connection_select(solr_conn, str_c(str),
NULL, NULL, &uids, NULL) < 0)
return -1;
uidvals = array_get(&uids, &count);
if (count == 0) {
/* either nothing is indexed or we're converting from an
older database format without the last_uid fields */
return fts_backend_solr_get_last_uid_fallback(backend,
last_uid_r);
} else if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
*last_uid_r = uidvals[0].seq1;
} else {
i_error("fts_solr: Last UID lookup returned multiple rows");
return -1;
}
return 0;
}
static struct mail_namespace *
solr_get_namespaces(struct fts_backend *_backend,
struct mailbox *box, const char *ns_prefix)
{
struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
struct mail_namespace *namespaces = box->storage->user->namespaces;
if (ns_prefix == NULL)
return backend->default_ns;
else
return mail_namespace_find_prefix(namespaces, ns_prefix);
}
static bool
solr_virtual_get_last_uids(const char *ns_prefix, const char *mailbox,
uint32_t uidvalidity, uint32_t *uid, void *context)
{
struct fts_backend_solr_get_last_uids_context *ctx = context;
struct fts_backend_uid_map *map;
struct mail_namespace *ns;
const char *vname;
ns = solr_get_namespaces(ctx->backend, ctx->box, ns_prefix);
for (; ns != NULL; ns = ns->alias_chain_next) {
vname = mail_namespace_get_vname(ns, ctx->vname, mailbox);
map = array_append_space(ctx->last_uids);
map->mailbox = p_strdup(ctx->pool, vname);
map->uidvalidity = uidvalidity;
map->uid = *uid;
}
return FALSE;
}
static void
solr_add_pattern(string_t *str, const struct mailbox_virtual_pattern *pattern)
{
struct mail_namespace *ns = pattern->ns;
const char *name, *p;
name = pattern->pattern;
if (!mail_namespace_update_name(pattern->ns, &name))
name = mail_namespace_fix_sep(pattern->ns, name);
fts_box_name_get_root(&ns, &name);
if (strcmp(name, "*") == 0) {
str_append(str, "[* TO *]");
return;
}
/* first check if there are any wildcards in the pattern */
for (p = name; *p != '\0'; p++) {
if (*p == '%' || *p == '*')
break;
}
if (*p == '\0') {
/* full mailbox name */
solr_quote(str, name);
return;
}
/* there are at least some wildcards. */
for (p = name; *p != '\0'; p++) {
if (*p == '%' || *p == '*') {
if (p == name || (p[-1] != '%' && p[-1] != '*'))
str_append_c(str, '*');
} else {
if (!i_isalnum(*p))
str_append_c(str, '\\');
str_append_c(str, *p);
}
}
}
static void
fts_backend_solr_filter_mailboxes(struct fts_backend *_backend,
string_t *str, struct mailbox *box)
{
ARRAY_TYPE(mailbox_virtual_patterns) includes_arr, excludes_arr;
struct mail_namespace *ns;
const struct mailbox_virtual_pattern *includes, *excludes;
unsigned int i, inc_count, exc_count;
string_t *fq;
t_array_init(&includes_arr, 16);
t_array_init(&excludes_arr, 16);
mailbox_get_virtual_box_patterns(box, &includes_arr, &excludes_arr);
includes = array_get(&includes_arr, &inc_count);
excludes = array_get(&excludes_arr, &exc_count);
i_assert(inc_count > 0);
/* First see if there are any patterns that begin with a wildcard.
Solr doesn't allow them, so in that case we'll need to return
all mailboxes. */
for (i = 0; i < inc_count; i++) {
if (*includes[i].pattern == '*' ||
*includes[i].pattern == '%')
break;
}
fq = t_str_new(128);
if (i == inc_count && inc_count <= FTS_SOLR_MAX_BOX_INC_PATTERNS) {
/* we can filter what mailboxes we want returned */
str_append_c(fq, '(');
for (i = 0; i < inc_count; i++) {
if (i != 0)
str_append(fq, " OR +");
str_append_c(fq, '(');
str_append(fq, "+box:");
solr_add_pattern(fq, &includes[i]);
solr_add_ns_query(fq, _backend, includes[i].ns, FALSE);
str_append_c(fq, ')');
}
str_append_c(fq, ')');
}
exc_count = I_MIN(FTS_SOLR_MAX_BOX_EXC_PATTERNS, exc_count);
for (i = 0; i < exc_count; i++) {
if (str_len(fq) > 0)
str_append_c(fq, ' ');
str_append(fq, "NOT (");
str_append(fq, "box:");
solr_add_pattern(fq, &excludes[i]);
for (ns = excludes[i].ns; ns->alias_for != NULL; )
ns = ns->alias_for;
solr_add_ns_query(fq, _backend, ns, FALSE);
str_append_c(fq, ')');
}
if (str_len(fq) > 0) {
str_append(str, "&fq=");
solr_connection_http_escape(solr_conn, str, str_c(fq));
}
}
static int
fts_backend_solr_get_all_last_uids(struct fts_backend *backend, pool_t pool,
ARRAY_TYPE(fts_backend_uid_map) *last_uids)
{
struct fts_backend_solr_get_last_uids_context ctx;
string_t *str;
memset(&ctx, 0, sizeof(ctx));
ctx.backend = backend;
ctx.pool = pool;
ctx.last_uids = last_uids;
ctx.box = backend->box;
ctx.vname = t_str_new(256);
str = t_str_new(256);
str_printfa(str, "fl=uid,box,uidv,ns&rows=%u&q=last_uid:TRUE+user:",
SOLR_MAX_ROWS);
solr_quote_http(str, backend->box->storage->user->username);
fts_backend_solr_filter_mailboxes(backend, str, backend->box);
return solr_connection_select(solr_conn, str_c(str),
solr_virtual_get_last_uids, &ctx,
NULL, NULL);
}
static int
fts_backend_solr_build_init(struct fts_backend *backend, uint32_t *last_uid_r,
struct fts_backend_build_context **ctx_r)
{
struct solr_fts_backend_build_context *ctx;
struct mailbox_status status;
*last_uid_r = (uint32_t)-1;
ctx = i_new(struct solr_fts_backend_build_context, 1);
ctx->ctx.backend = backend;
ctx->cmd = str_new(default_pool, SOLR_CMDBUF_SIZE);
mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
ctx->uid_validity = status.uidvalidity;
*ctx_r = &ctx->ctx;
return 0;
}
static void
fts_backend_solr_add_doc_prefix(struct solr_fts_backend_build_context *ctx,
uint32_t uid)
{
struct solr_fts_backend *backend =
(struct solr_fts_backend *)ctx->ctx.backend;
struct mailbox *box = ctx->ctx.backend->box;
struct mail_namespace *ns;
const char *box_name;
str_printfa(ctx->cmd, "<doc>"
"<field name=\"uid\">%u</field>"
"<field name=\"uidv\">%u</field>",
uid, ctx->uid_validity);
box_name = fts_box_get_root(box, &ns);
if (ns != backend->default_ns) {
str_append(ctx->cmd, "<field name=\"ns\">");
xml_encode(ctx->cmd, ns->prefix);
str_append(ctx->cmd, "</field>");
}
str_append(ctx->cmd, "<field name=\"box\">");
xml_encode(ctx->cmd, box_name);
str_append(ctx->cmd, "</field><field name=\"user\">");
xml_encode(ctx->cmd, ns->user->username);
str_append(ctx->cmd, "</field>");
}
static void xml_encode_id(string_t *str, struct fts_backend *_backend,
uint32_t uid, uint32_t uid_validity)
{
struct solr_fts_backend *backend = (struct solr_fts_backend *)_backend;
if (uid != 0)
str_printfa(str, "%u/", uid);
else
str_append(str, "L/");
if (backend->id_namespace != NULL) {
xml_encode(str, backend->id_namespace);
str_append_c(str, '/');
}
str_printfa(str, "%u/", uid_validity);
xml_encode(str, backend->id_username);
str_append_c(str, '/');
xml_encode(str, backend->id_box_name);
}
static void
fts_backend_solr_uid_changed(struct solr_fts_backend_build_context *ctx,
uint32_t uid)
{
if (ctx->post == NULL) {
ctx->post = solr_connection_post_begin(solr_conn);
str_append(ctx->cmd, "<add>");
} else {
if (ctx->field_open) {
str_append(ctx->cmd, "</field>");
ctx->field_open = FALSE;
}
str_append(ctx->cmd, "</doc>");
}
ctx->prev_uid = uid;
ctx->headers = FALSE;
fts_backend_solr_add_doc_prefix(ctx, uid);
str_printfa(ctx->cmd, "<field name=\"id\">");
xml_encode_id(ctx->cmd, ctx->ctx.backend, uid, ctx->uid_validity);
str_append(ctx->cmd, "</field>");
}
static void
fts_backend_solr_build_hdr(struct fts_backend_build_context *_ctx,
uint32_t uid)
{
struct solr_fts_backend_build_context *ctx =
(struct solr_fts_backend_build_context *)_ctx;
if (uid != ctx->prev_uid)
fts_backend_solr_uid_changed(ctx, uid);
else {
i_assert(!ctx->headers);
if (ctx->field_open) {
str_append(ctx->cmd, "</field>");
ctx->field_open = FALSE;
}
}
i_assert(!ctx->field_open);
ctx->field_open = TRUE;
ctx->headers = TRUE;
str_append(ctx->cmd, "<field name=\"hdr\">");
}
static bool
fts_backend_solr_build_body_begin(struct fts_backend_build_context *_ctx,
uint32_t uid, const char *content_type,
const char *content_disposition ATTR_UNUSED)
{
struct solr_fts_backend_build_context *ctx =
(struct solr_fts_backend_build_context *)_ctx;
if (!fts_backend_default_can_index(content_type))
return FALSE;
if (uid != ctx->prev_uid)
fts_backend_solr_uid_changed(ctx, uid);
else {
/* body comes first, then headers */
i_assert(!ctx->headers);
}
if (!ctx->field_open) {
ctx->field_open = TRUE;
ctx->headers = FALSE;
str_append(ctx->cmd, "<field name=\"body\">");
}
return TRUE;
}
static int
fts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
const unsigned char *data, size_t size)
{
struct solr_fts_backend_build_context *ctx =
(struct solr_fts_backend_build_context *)_ctx;
xml_encode_data(ctx->cmd, data, size);
if (str_len(ctx->cmd) > SOLR_CMDBUF_SIZE-128) {
solr_connection_post_more(ctx->post, str_data(ctx->cmd),
str_len(ctx->cmd));
str_truncate(ctx->cmd, 0);
}
return 0;
}
static int
fts_backed_solr_build_commit(struct solr_fts_backend_build_context *ctx)
{
int ret;
if (ctx->post == NULL)
return 0;
if (ctx->field_open) {
str_append(ctx->cmd, "</field>");
ctx->field_open = FALSE;
}
str_append(ctx->cmd, "</doc>");
/* Update the mailbox's last_uid field, replacing the existing
document. Note that since there is no locking, it's possible that
if another session is indexing at the same time, the last_uid value
may shrink. This doesn't really matter, we'll simply do more work
in future by reindexing some messages. */
fts_backend_solr_add_doc_prefix(ctx, ctx->prev_uid);
str_printfa(ctx->cmd, "<field name=\"last_uid\">TRUE</field>"
"<field name=\"id\">");
xml_encode_id(ctx->cmd, ctx->ctx.backend, 0, ctx->uid_validity);
str_append(ctx->cmd, "</field></doc></add>");
solr_connection_post_more(ctx->post, str_data(ctx->cmd),
str_len(ctx->cmd));
ret = solr_connection_post_end(ctx->post);
/* commit and wait until the documents we just indexed are
visible to the following search */
if (solr_connection_post(solr_conn, "<commit waitFlush=\"false\" "
"waitSearcher=\"true\"/>") < 0)
ret = -1;
return ret;
}
static int
fts_backend_solr_build_deinit(struct fts_backend_build_context *_ctx)
{
struct solr_fts_backend_build_context *ctx =
(struct solr_fts_backend_build_context *)_ctx;
int ret;
ret = fts_backed_solr_build_commit(ctx);
str_free(&ctx->cmd);
i_free(ctx);
return ret;
}
static void
fts_backend_solr_expunge(struct fts_backend *backend, struct mail *mail)
{
struct mailbox_status status;
mailbox_get_status(mail->box, STATUS_UIDVALIDITY, &status);
T_BEGIN {
string_t *cmd;
cmd = t_str_new(256);
str_append(cmd, "<delete><id>");
xml_encode_id(cmd, backend, mail->uid, status.uidvalidity);
str_append(cmd, "</id></delete>");
(void)solr_connection_post(solr_conn, str_c(cmd));
} T_END;
}
static void
fts_backend_solr_expunge_finish(struct fts_backend *backend ATTR_UNUSED,
struct mailbox *box ATTR_UNUSED,
bool committed ATTR_UNUSED)
{
solr_connection_post(solr_conn,
"<commit waitFlush=\"false\" waitSearcher=\"false\"/>");
}
static int fts_backend_solr_lock(struct fts_backend *backend ATTR_UNUSED)
{
return 1;
}
static void fts_backend_solr_unlock(struct fts_backend *backend ATTR_UNUSED)
{
}
static bool solr_virtual_uid_map(const char *ns_prefix, const char *mailbox,
uint32_t uidvalidity, uint32_t *uid,
void *context)
{
struct solr_virtual_uid_map_context *ctx = context;
struct mail_namespace *ns;
const char *vname;
bool convert_inbox;
ns = solr_get_namespaces(ctx->backend, ctx->box, ns_prefix);
convert_inbox = (ns->flags & NAMESPACE_FLAG_INBOX) != 0 &&
strcmp(mailbox, "INBOX") == 0;
for (; ns != NULL; ns = ns->alias_chain_next) {
vname = convert_inbox ? ns->prefix :
mail_namespace_get_vname(ns, ctx->vname, mailbox);
if (mailbox_get_virtual_uid(ctx->box, vname, uidvalidity,
*uid, uid))
return TRUE;
}
return FALSE;
}
static int fts_backend_solr_lookup(struct fts_backend_lookup_context *ctx,
ARRAY_TYPE(seq_range) *definite_uids,
ARRAY_TYPE(seq_range) *maybe_uids,
ARRAY_TYPE(fts_score_map) *scores)
{
struct mailbox *box = ctx->backend->box;
struct mail_namespace *ns;
struct solr_virtual_uid_map_context uid_map_ctx;
const struct fts_backend_lookup_field *fields;
const char *box_name;
unsigned int i, count;
struct mailbox_status status;
string_t *str;
bool virtual;
virtual = strcmp(box->storage->name, "virtual") == 0;
mailbox_get_status(box, STATUS_UIDVALIDITY, &status);
str = t_str_new(256);
if (!virtual) {
str_printfa(str, "fl=uid,score&rows=%u&sort=uid+asc&q=",
status.uidnext);
} else {
str_printfa(str, "fl=uid,score,box,uidv,ns&rows=%u"
"&sort=box+asc,uid+asc&q=",
SOLR_MAX_ROWS);
}
/* build a lucene search query from the fields */
fields = array_get(&ctx->fields, &count);
for (i = 0; i < count; i++) {
if (i > 0)
str_append_c(str, '+');
if ((fields[i].flags & FTS_LOOKUP_FLAG_INVERT) != 0)
str_append_c(str, '-');
if ((fields[i].flags & FTS_LOOKUP_FLAG_HEADER) == 0) {
/* body only */
i_assert((fields[i].flags & FTS_LOOKUP_FLAG_BODY) != 0);
str_append(str, "body:");
solr_quote_http(str, fields[i].key);
} else if ((fields[i].flags & FTS_LOOKUP_FLAG_BODY) == 0) {
/* header only */
str_append(str, "hdr:");
solr_quote_http(str, fields[i].key);
} else {
/* both */
str_append(str, "(body:");
solr_quote_http(str, fields[i].key);
str_append(str, "+OR+hdr:");
solr_quote_http(str, fields[i].key);
str_append_c(str, ')');
}
}
/* use a separate filter query for selecting the mailbox. it shouldn't
affect the score and there could be some caching benefits too. */
str_append(str, "&fq=%2Buser:");
solr_quote_http(str, box->storage->user->username);
if (virtual)
fts_backend_solr_filter_mailboxes(ctx->backend, str, box);
else {
box_name = fts_box_get_root(box, &ns);
str_printfa(str, "+%%2Buidv:%u+%%2Bbox:", status.uidvalidity);
solr_quote_http(str, box_name);
solr_add_ns_query_http(str, ctx->backend, ns);
}
array_clear(maybe_uids);
if (!virtual) {
return solr_connection_select(solr_conn, str_c(str), NULL, NULL,
definite_uids, scores);
} else {
memset(&uid_map_ctx, 0, sizeof(uid_map_ctx));
uid_map_ctx.backend = ctx->backend;
uid_map_ctx.box = box;
uid_map_ctx.vname = t_str_new(256);
return solr_connection_select(solr_conn, str_c(str),
solr_virtual_uid_map,
&uid_map_ctx,
definite_uids, scores);
}
}
struct fts_backend fts_backend_solr = {
.name = "solr",
.flags = FTS_BACKEND_FLAG_VIRTUAL_LOOKUPS,
{
fts_backend_solr_init,
fts_backend_solr_deinit,
fts_backend_solr_get_last_uid,
fts_backend_solr_get_all_last_uids,
fts_backend_solr_build_init,
fts_backend_solr_build_hdr,
fts_backend_solr_build_body_begin,
NULL,
fts_backend_solr_build_more,
fts_backend_solr_build_deinit,
fts_backend_solr_expunge,
fts_backend_solr_expunge_finish,
fts_backend_solr_lock,
fts_backend_solr_unlock,
NULL,
NULL,
fts_backend_solr_lookup
}
};