bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen /* box_id -> solr_result */
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen HASH_TABLE(char *, struct solr_result *) mailboxes;
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainenstatic int solr_xml_parse(struct solr_connection *conn,
f0339f522dc9c8e2e8a29ef9a3f937c431c6bd1bTimo Sirainen if (XML_Parse(conn->xml_parser, data, size, done ? 1 : 0) != 0)
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen line = XML_GetCurrentLineNumber(conn->xml_parser);
df1713bd29d29a3e3f3ebfdf05f929525825a7d3Timo Sirainen col = XML_GetCurrentColumnNumber(conn->xml_parser);
df1713bd29d29a3e3f3ebfdf05f929525825a7d3Timo Sirainen i_error("fts_solr: Invalid XML input at %d:%d: %s "
df1713bd29d29a3e3f3ebfdf05f929525825a7d3Timo Sirainen "(near: %.*s)", line, col, XML_ErrorString(err),
526631052ca3175357302af8fa7dcbf763b40c53Stephan Bosch const struct ssl_iostream_settings *ssl_client_set,
526631052ca3175357302af8fa7dcbf763b40c53Stephan Bosch const char **error_r)
f817e98f79893a17b09214081f51834c3d733919J. Nick Koston if (http_url_parse(url, NULL, HTTP_URL_ALLOW_USERINFO_PART, pool_datastack_create(),
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch "fts_solr: Failed to parse HTTP url: %s", error);
f74dbd3ff682fea040f60383e001620d1f1b09d3Stephan Bosch conn->http_host = i_strdup(http_url->host.name);
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch conn->http_base_url = i_strconcat(http_url->path, http_url->enc_query, NULL);
f817e98f79893a17b09214081f51834c3d733919J. Nick Koston /* allow empty password */
f817e98f79893a17b09214081f51834c3d733919J. Nick Koston conn->http_password = i_strdup(http_url->password != NULL ? http_url->password : "");
c215ca02d468b0e542523df1ed18e5f2d7e63968Timo Sirainen solr_http_client = http_client_init(&http_set);
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen "fts_solr: Failed to allocate XML parser");
f7fa93fb42a5b9d4a7e2f0367f03f920ef7ec443Timo Sirainenvoid solr_connection_deinit(struct solr_connection **_conn)
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainenstatic const char *attrs_get_name(const char **attrs)
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainensolr_lookup_xml_start(void *context, const char *name, const char **attrs)
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen struct solr_lookup_xml_context *ctx = context;
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen /* skipping over unwanted elements */
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen /* response -> result -> doc */
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_UID;
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_SCORE;
ecdce39e5ef4b62eefa9f5818f17d153fd5d710aTimo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_MAILBOX;
b8835b8a21c617ceb82ddc5a176243faf36aa8f7Timo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_NAMESPACE;
ecdce39e5ef4b62eefa9f5818f17d153fd5d710aTimo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_UIDVALIDITY;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenstatic struct solr_result *
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainensolr_result_get(struct solr_lookup_xml_context *ctx, const char *box_id)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen result = hash_table_lookup(ctx->mailboxes, box_id);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen box_id_dup = p_strdup(ctx->result_pool, box_id);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen result = p_new(ctx->result_pool, struct solr_result, 1);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen p_array_init(&result->uids, ctx->result_pool, 32);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen p_array_init(&result->scores, ctx->result_pool, 32);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen hash_table_insert(ctx->mailboxes, box_id_dup, result);
34a41fd572d2516549b3a3b340c36730f284612aTimo Sirainenstatic int solr_lookup_add_doc(struct solr_lookup_xml_context *ctx)
34a41fd572d2516549b3a3b340c36730f284612aTimo Sirainen i_error("fts_solr: uid missing from inside doc");
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen /* looking up from a single mailbox only */
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen /* old style lookup */
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen /* new style lookup */
91c58af8e992d028eb325707904debb58ae80438Timo Sirainen if (seq_range_array_add(&result->uids, ctx->uid)) {
91c58af8e992d028eb325707904debb58ae80438Timo Sirainen /* duplicate result */
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainenstatic void solr_lookup_xml_end(void *context, const char *name ATTR_UNUSED)
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen struct solr_lookup_xml_context *ctx = context;
34a41fd572d2516549b3a3b340c36730f284612aTimo Sirainen if (ctx->content_state == SOLR_XML_CONTENT_STATE_ERROR)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (ctx->state == SOLR_XML_RESPONSE_STATE_CONTENT &&
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen ctx->content_state == SOLR_XML_CONTENT_STATE_MAILBOX &&
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen /* mailbox is namespace prefix */
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (ctx->state == SOLR_XML_RESPONSE_STATE_DOC) {
34a41fd572d2516549b3a3b340c36730f284612aTimo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_ERROR;
34a41fd572d2516549b3a3b340c36730f284612aTimo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_NONE;
ecdce39e5ef4b62eefa9f5818f17d153fd5d710aTimo Sirainenstatic int uint32_parse(const char *str, int len, uint32_t *value_r)
ecdce39e5ef4b62eefa9f5818f17d153fd5d710aTimo Sirainen for (i = 0; i < len; i++) {
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainenstatic void solr_lookup_xml_data(void *context, const char *str, int len)
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen struct solr_lookup_xml_context *ctx = context;
34a41fd572d2516549b3a3b340c36730f284612aTimo Sirainen if (uint32_parse(str, len, &ctx->uid) < 0 || ctx->uid == 0) {
34a41fd572d2516549b3a3b340c36730f284612aTimo Sirainen i_error("fts_solr: received invalid uid '%s'",
34a41fd572d2516549b3a3b340c36730f284612aTimo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_ERROR;
a0c453a8edaec90fb0d945c874de0b1845bc7d7eTimo Sirainen ctx->score = strtod(t_strndup(str, len), NULL);
acc4e0a41f1c8ef0559a19c280afc1b97b9e0818Timo Sirainen /* this may be called multiple times, for example if input
acc4e0a41f1c8ef0559a19c280afc1b97b9e0818Timo Sirainen contains '&' characters */
acc4e0a41f1c8ef0559a19c280afc1b97b9e0818Timo Sirainen new_name = ctx->mailbox == NULL ? i_strndup(str, len) :
acc4e0a41f1c8ef0559a19c280afc1b97b9e0818Timo Sirainen i_strconcat(ctx->mailbox, t_strndup(str, len), NULL);
b8835b8a21c617ceb82ddc5a176243faf36aa8f7Timo Sirainen new_name = ctx->ns == NULL ? i_strndup(str, len) :
b8835b8a21c617ceb82ddc5a176243faf36aa8f7Timo Sirainen i_strconcat(ctx->ns, t_strndup(str, len), NULL);
ecdce39e5ef4b62eefa9f5818f17d153fd5d710aTimo Sirainen if (uint32_parse(str, len, &ctx->uidvalidity) < 0)
ecdce39e5ef4b62eefa9f5818f17d153fd5d710aTimo Sirainen i_error("fts_solr: received invalid uidvalidity");
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Boschstatic void solr_connection_payload_input(struct solr_connection *conn)
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch /* read payload */
3858a7a5da361c35f1e6e50c8e3214dc0cf379d6Phil Carmody while ((ret = i_stream_read_more(conn->payload, &data, &size)) > 0) {
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch (void)solr_xml_parse(conn, data, size, FALSE);
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch /* we will be called again for more data */
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch i_error("fts_solr: failed to read payload from HTTP server: %m");
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Boschsolr_connection_select_response(const struct http_response *response,
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch i_error("fts_solr: Lookup failed: Empty response payload");
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainenint solr_connection_select(struct solr_connection *conn, const char *query,
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen pool_t pool, struct solr_result ***box_results_r)
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen struct solr_lookup_xml_context solr_lookup_context;
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen hash_table_create(&solr_lookup_context.mailboxes, default_pool, 0,
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen p_array_init(&solr_lookup_context.results, pool, 32);
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen XML_SetCharacterDataHandler(conn->xml_parser, solr_lookup_xml_data);
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainen XML_SetUserData(conn->xml_parser, &solr_lookup_context);
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch url = t_strconcat(conn->http_base_url, "select?", query, NULL);
c215ca02d468b0e542523df1ed18e5f2d7e63968Timo Sirainen http_req = http_client_request(solr_http_client, "GET",
f817e98f79893a17b09214081f51834c3d733919J. Nick Koston http_client_request_set_auth_simple(http_req, conn->http_user, conn->http_password);
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch http_client_request_set_port(http_req, conn->http_port);
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch http_client_request_set_ssl(http_req, conn->http_ssl);
34a41fd572d2516549b3a3b340c36730f284612aTimo Sirainen solr_lookup_context.content_state == SOLR_XML_CONTENT_STATE_ERROR)
a10ed8c47534b4c6b6bf2711ccfe577e720a47b4Timo Sirainen parse_ret = solr_xml_parse(conn, "", 0, TRUE);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen hash_table_destroy(&solr_lookup_context.mailboxes);
31a574fda352ef4f71dbff9c30e15e4744e132c0Timo Sirainen array_append_zero(&solr_lookup_context.results);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen *box_results_r = array_idx_modifiable(&solr_lookup_context.results, 0);
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Boschsolr_connection_update_response(const struct http_response *response,
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Boschsolr_connection_post_request(struct solr_connection *conn)
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch url = t_strconcat(conn->http_base_url, "update", NULL);
c215ca02d468b0e542523df1ed18e5f2d7e63968Timo Sirainen http_req = http_client_request(solr_http_client, "POST",
f817e98f79893a17b09214081f51834c3d733919J. Nick Koston http_client_request_set_auth_simple(http_req, conn->http_user, conn->http_password);
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch http_client_request_set_port(http_req, conn->http_port);
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch http_client_request_set_ssl(http_req, conn->http_ssl);
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch http_client_request_add_header(http_req, "Content-Type", "text/xml");
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainensolr_connection_post_begin(struct solr_connection *conn)
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch post->http_req = solr_connection_post_request(conn);
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainenvoid solr_connection_post_more(struct solr_connection_post *post,
c9445d17bfb97440cf47c5363993518a943f7010Timo Sirainen (void)http_client_request_send_payload(&post->http_req, data, size);
c9445d17bfb97440cf47c5363993518a943f7010Timo Sirainenint solr_connection_post_end(struct solr_connection_post **_post)
c3a4c931e95737a52e1cebeeb109a2e1cc4d47d6Timo Sirainen if (http_client_request_finish_payload(&post->http_req) < 0 ||
48566ca412a7cf3b42512fd0ec112744778e5da0Timo Sirainenint solr_connection_post(struct solr_connection *conn, const char *cmd)
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch http_req = solr_connection_post_request(conn);
2d7df7973f80011033e8e9fa676d3ff4c14468d8Stephan Bosch post_payload = i_stream_create_from_data(cmd, strlen(cmd));