solr-connection.c revision f817e98f79893a17b09214081f51834c3d733919
2454dfa32c93c20a8522c6ed42fe057baaac9f9aStephan Bosch/* Copyright (c) 2006-2017 Dovecot authors, see the included COPYING file */
b6b06530d654f0436bfbaefc1e988d53fff0cbeeTimo Sirainen /* box_id -> solr_result */
b6b06530d654f0436bfbaefc1e988d53fff0cbeeTimo Sirainen HASH_TABLE(char *, struct solr_result *) mailboxes;
62fc0b4f07eb6f18a3bff4b1fccb636e6fae3cf4Timo Sirainenstatic int solr_xml_parse(struct solr_connection *conn,
8b1a9a4d63b0abccdf7cb1acb8359d5396dd657bTimo Sirainen if (XML_Parse(conn->xml_parser, data, size, done ? 1 : 0) != 0)
507ea0bc5b25efb4c96033a19dec66689a50ebd0Baofeng Wang line = XML_GetCurrentLineNumber(conn->xml_parser);
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen col = XML_GetCurrentColumnNumber(conn->xml_parser);
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen i_error("fts_solr: Invalid XML input at %d:%d: %s "
62fc0b4f07eb6f18a3bff4b1fccb636e6fae3cf4Timo Sirainen "(near: %.*s)", line, col, XML_ErrorString(err),
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainenint solr_connection_init(const char *url, bool debug,
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen struct solr_connection **conn_r, const char **error_r)
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen if (http_url_parse(url, NULL, HTTP_URL_ALLOW_USERINFO_PART, pool_datastack_create(),
62fc0b4f07eb6f18a3bff4b1fccb636e6fae3cf4Timo Sirainen "fts_solr: Failed to parse HTTP url: %s", error);
507ea0bc5b25efb4c96033a19dec66689a50ebd0Baofeng Wang conn->http_host = i_strdup(http_url->host.name);
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen conn->http_base_url = i_strconcat(http_url->path, http_url->enc_query, NULL);
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen /* allow empty password */
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen conn->http_password = i_strdup(http_url->password != NULL ? http_url->password : "");
62fc0b4f07eb6f18a3bff4b1fccb636e6fae3cf4Timo Sirainen solr_http_client = http_client_init(&http_set);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "fts_solr: Failed to allocate XML parser");
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainenvoid solr_connection_deinit(struct solr_connection **_conn)
b6b06530d654f0436bfbaefc1e988d53fff0cbeeTimo Sirainenstatic const char *attrs_get_name(const char **attrs)
83172e28d4ac684dfed83f7c9db933493d7c5922Timo Sirainensolr_lookup_xml_start(void *context, const char *name, const char **attrs)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct solr_lookup_xml_context *ctx = context;
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila /* skipping over unwanted elements */
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen /* response -> result -> doc */
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_UID;
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_SCORE;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_MAILBOX;
b04e76cbc807707d299055be79500f8ff131da43Timo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_NAMESPACE;
0c5854b6891c59c1c3f443569bc823d7db571582Teemu Huovila ctx->content_state = SOLR_XML_CONTENT_STATE_UIDVALIDITY;
b6b06530d654f0436bfbaefc1e988d53fff0cbeeTimo Sirainenstatic struct solr_result *
202468f94e6c6c8b5d3d98ee74e01bb0d0bb04aaTimo Sirainensolr_result_get(struct solr_lookup_xml_context *ctx, const char *box_id)
83172e28d4ac684dfed83f7c9db933493d7c5922Timo Sirainen result = hash_table_lookup(ctx->mailboxes, box_id);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen box_id_dup = p_strdup(ctx->result_pool, box_id);
62fc0b4f07eb6f18a3bff4b1fccb636e6fae3cf4Timo Sirainen result = p_new(ctx->result_pool, struct solr_result, 1);
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila p_array_init(&result->uids, ctx->result_pool, 32);
62fc0b4f07eb6f18a3bff4b1fccb636e6fae3cf4Timo Sirainen p_array_init(&result->scores, ctx->result_pool, 32);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen hash_table_insert(ctx->mailboxes, box_id_dup, result);
f5c0d5cada4da23a167c38426d0c481a3e1d5583Timo Sirainenstatic int solr_lookup_add_doc(struct solr_lookup_xml_context *ctx)
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila i_error("fts_solr: uid missing from inside doc");
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila /* looking up from a single mailbox only */
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila /* old style lookup */
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila /* new style lookup */
19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11ddPhil Carmody if (seq_range_array_add(&result->uids, ctx->uid)) {
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila /* duplicate result */
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovilastatic void solr_lookup_xml_end(void *context, const char *name ATTR_UNUSED)
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila struct solr_lookup_xml_context *ctx = context;
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila if (ctx->content_state == SOLR_XML_CONTENT_STATE_ERROR)
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila if (ctx->state == SOLR_XML_RESPONSE_STATE_CONTENT &&
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila ctx->content_state == SOLR_XML_CONTENT_STATE_MAILBOX &&
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila /* mailbox is namespace prefix */
3a54211bd6c4dc3f8687c16020770551cf83a548Teemu Huovila if (ctx->state == SOLR_XML_RESPONSE_STATE_DOC) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_ERROR;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ctx->content_state = SOLR_XML_CONTENT_STATE_NONE;
507ea0bc5b25efb4c96033a19dec66689a50ebd0Baofeng Wangstatic int uint32_parse(const char *str, int len, uint32_t *value_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen for (i = 0; i < len; i++) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic void solr_lookup_xml_data(void *context, const char *str, int len)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct solr_lookup_xml_context *ctx = context;
507ea0bc5b25efb4c96033a19dec66689a50ebd0Baofeng Wang if (uint32_parse(str, len, &ctx->uid) < 0 || ctx->uid == 0) {
507ea0bc5b25efb4c96033a19dec66689a50ebd0Baofeng Wang ctx->content_state = SOLR_XML_CONTENT_STATE_ERROR;
fdf70410de49eadfbb77997bb60ebba19aee4752Teemu Huovila ctx->score = strtod(t_strndup(str, len), NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* this may be called multiple times, for example if input
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen contains '&' characters */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen new_name = ctx->mailbox == NULL ? i_strndup(str, len) :
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_strconcat(ctx->mailbox, t_strndup(str, len), NULL);
fdf70410de49eadfbb77997bb60ebba19aee4752Teemu Huovila new_name = ctx->ns == NULL ? i_strndup(str, len) :
fdf70410de49eadfbb77997bb60ebba19aee4752Teemu Huovila i_strconcat(ctx->ns, t_strndup(str, len), NULL);
fdf70410de49eadfbb77997bb60ebba19aee4752Teemu Huovila if (uint32_parse(str, len, &ctx->uidvalidity) < 0)
fdf70410de49eadfbb77997bb60ebba19aee4752Teemu Huovila i_error("fts_solr: received invalid uidvalidity");
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainenstatic void solr_connection_payload_input(struct solr_connection *conn)
507ea0bc5b25efb4c96033a19dec66689a50ebd0Baofeng Wang const unsigned char *data;
eb568e46e82bc814ca3384236a483691a12f9c54Baofeng Wang /* read payload */
507ea0bc5b25efb4c96033a19dec66689a50ebd0Baofeng Wang while ((ret = i_stream_read_more(conn->payload, &data, &size)) > 0) {
e4bf76afb82ea28ec9d06823fa7deed5f8277183Timo Sirainen (void)solr_xml_parse(conn, data, size, FALSE);
568fec5b1e629f25d288b48007485b9aa4a018b1Timo Sirainen /* we will be called again for more data */
62fc0b4f07eb6f18a3bff4b1fccb636e6fae3cf4Timo Sirainen i_error("fts_solr: failed to read payload from HTTP server: %m");
8b1a9a4d63b0abccdf7cb1acb8359d5396dd657bTimo Sirainensolr_connection_select_response(const struct http_response *response,
2730605833442b5ddcb261f90b8375fc98201e35Timo Sirainen i_error("fts_solr: Lookup failed: Empty response payload");
0d6f8e7e231ac3fc8647d8fc3072d7d1e477a7cfBaofeng Wangint solr_connection_select(struct solr_connection *conn, const char *query,
b7324e421e2132cbbf753e6fdbe675bbaecdf929Timo Sirainen pool_t pool, struct solr_result ***box_results_r)
edc654a35c3368dfb529ba784aee41dff6f45149Timo Sirainen struct solr_lookup_xml_context solr_lookup_context;
edc654a35c3368dfb529ba784aee41dff6f45149Timo Sirainen const char *url;
edc654a35c3368dfb529ba784aee41dff6f45149Timo Sirainen hash_table_create(&solr_lookup_context.mailboxes, default_pool, 0,
edc654a35c3368dfb529ba784aee41dff6f45149Timo Sirainen p_array_init(&solr_lookup_context.results, pool, 32);
edc654a35c3368dfb529ba784aee41dff6f45149Timo Sirainen XML_SetCharacterDataHandler(conn->xml_parser, solr_lookup_xml_data);
edc654a35c3368dfb529ba784aee41dff6f45149Timo Sirainen XML_SetUserData(conn->xml_parser, &solr_lookup_context);
edc654a35c3368dfb529ba784aee41dff6f45149Timo Sirainen url = t_strconcat(conn->http_base_url, "select?", query, NULL);
edc654a35c3368dfb529ba784aee41dff6f45149Timo Sirainen http_req = http_client_request(solr_http_client, "GET",
b1b0b2b543dc1a10015272fc970ad7534f84e0c5Timo Sirainen http_client_request_set_auth_simple(http_req, conn->http_user, conn->http_password);
b1b0b2b543dc1a10015272fc970ad7534f84e0c5Timo Sirainen http_client_request_set_port(http_req, conn->http_port);
b1b0b2b543dc1a10015272fc970ad7534f84e0c5Timo Sirainen http_client_request_set_ssl(http_req, conn->http_ssl);
b1b0b2b543dc1a10015272fc970ad7534f84e0c5Timo Sirainen solr_lookup_context.content_state == SOLR_XML_CONTENT_STATE_ERROR)
b1b0b2b543dc1a10015272fc970ad7534f84e0c5Timo Sirainen parse_ret = solr_xml_parse(conn, "", 0, TRUE);
b1b0b2b543dc1a10015272fc970ad7534f84e0c5Timo Sirainen hash_table_destroy(&solr_lookup_context.mailboxes);
9d92ea347e1c098fa33ea517514dfdc0bb8995e2Timo Sirainen array_append_zero(&solr_lookup_context.results);
9d92ea347e1c098fa33ea517514dfdc0bb8995e2Timo Sirainen *box_results_r = array_idx_modifiable(&solr_lookup_context.results, 0);
9d92ea347e1c098fa33ea517514dfdc0bb8995e2Timo Sirainensolr_connection_update_response(const struct http_response *response,
62461eb609e1d852e027cf4e07d30d51288678a2Aki Tuomisolr_connection_post_request(struct solr_connection *conn)
9d92ea347e1c098fa33ea517514dfdc0bb8995e2Timo Sirainen const char *url;
9d92ea347e1c098fa33ea517514dfdc0bb8995e2Timo Sirainen url = t_strconcat(conn->http_base_url, "update", NULL);
9d92ea347e1c098fa33ea517514dfdc0bb8995e2Timo Sirainen http_req = http_client_request(solr_http_client, "POST",
9d92ea347e1c098fa33ea517514dfdc0bb8995e2Timo Sirainen http_client_request_set_auth_simple(http_req, conn->http_user, conn->http_password);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen http_client_request_set_port(http_req, conn->http_port);
baf3e87e186453fda13bd21f7cbcb2efc8492e8bTimo Sirainen http_client_request_set_ssl(http_req, conn->http_ssl);
f5c0d5cada4da23a167c38426d0c481a3e1d5583Timo Sirainen http_client_request_add_header(http_req, "Content-Type", "text/xml");
fdf70410de49eadfbb77997bb60ebba19aee4752Teemu Huovilasolr_connection_post_begin(struct solr_connection *conn)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen post->http_req = solr_connection_post_request(conn);
b1965419f329eb7cf78ee39e7c5942462eabb256Timo Sirainenvoid solr_connection_post_more(struct solr_connection_post *post,
return ret;