bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenextern "C" {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen#include "lib.h"
5314b66fca6b2cf6b749d296a30a5eb039e0e9baTimo Sirainen#include "array.h"
5bda841ca9e2ddd90702dc7e2a15326e4068066eTimo Sirainen#include "unichar.h"
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen#include "hash.h"
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen#include "hex-binary.h"
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen#include "ioloop.h"
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen#include "unlink-directory.h"
8d14b5fc9c1ea1fad788315fc98fea89796a56d0Timo Sirainen#include "ioloop.h"
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen#include "mail-index.h"
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen#include "mail-search.h"
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen#include "mail-namespace.h"
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainen#include "mailbox-list-private.h"
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen#include "mail-storage.h"
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen#include "fts-expunge-log.h"
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen#include "fts-lucene-plugin.h"
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen#include "lucene-wrapper.h"
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen#include <sys/stat.h>
5f9231534fd15b9aed2676a3d6cd07158f8e2a39Timo Sirainen#ifdef HAVE_LIBEXTTEXTCAT_TEXTCAT_H
f90c6ff1ae3d5675abfc6ae05574924fda8dca9eTimo Sirainen# include <libexttextcat/textcat.h>
5f9231534fd15b9aed2676a3d6cd07158f8e2a39Timo Sirainen#elif defined (HAVE_LIBTEXTCAT_TEXTCAT_H)
5f9231534fd15b9aed2676a3d6cd07158f8e2a39Timo Sirainen# include <libtextcat/textcat.h>
5f9231534fd15b9aed2676a3d6cd07158f8e2a39Timo Sirainen#elif defined (HAVE_FTS_TEXTCAT)
5f9231534fd15b9aed2676a3d6cd07158f8e2a39Timo Sirainen# include <textcat.h>
2ce5b1d19cba4654239116a34e3ad9d5b6af8551Timo Sirainen#endif
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen};
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen#include <CLucene.h>
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen#include <CLucene/util/CLStreams.h>
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen#include <CLucene/search/MultiPhraseQuery.h>
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen#include "SnowballAnalyzer.h"
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
33502e55a9bf4cafcd184ca9b114c126e420f856Timo Sirainen/* Lucene's default is 10000. Use it here also.. */
33502e55a9bf4cafcd184ca9b114c126e420f856Timo Sirainen#define MAX_TERMS_PER_DOCUMENT 10000
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen#define FTS_LUCENE_MAX_SEARCH_TERMS 1000
33502e55a9bf4cafcd184ca9b114c126e420f856Timo Sirainen
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen#define LUCENE_LOCK_OVERRIDE_SECS 60
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen#define LUCENE_INDEX_CLOSE_TIMEOUT_MSECS (120*1000)
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenusing namespace lucene::document;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenusing namespace lucene::index;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenusing namespace lucene::search;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenusing namespace lucene::queryParser;
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainenusing namespace lucene::analysis;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenusing namespace lucene::analysis;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenusing namespace lucene::util;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainenstruct lucene_query {
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen Query *query;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen BooleanClause::Occur occur;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen};
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo SirainenARRAY_DEFINE_TYPE(lucene_query, struct lucene_query);
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenstruct lucene_analyzer {
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen char *lang;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen Analyzer *analyzer;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen};
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstruct lucene_index {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen char *path;
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen struct mailbox_list *list;
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen struct fts_lucene_settings set;
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainen normalizer_func_t *normalizer;
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen wchar_t mailbox_guid[MAILBOX_GUID_HEX_LENGTH + 1];
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen IndexReader *reader;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen IndexWriter *writer;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen IndexSearcher *searcher;
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen struct timeout *to_close;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen buffer_t *normalizer_buf;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen Analyzer *default_analyzer, *cur_analyzer;
4ee00532a265bdfb38539d811fcd12d51210ac35Timo Sirainen ARRAY(struct lucene_analyzer) analyzers;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen Document *doc;
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen uint32_t prev_uid, prev_part_idx;
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen bool no_analyzer;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen};
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainenstruct rescan_context {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen struct lucene_index *index;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen struct mailbox *box;
de62ce819d59a529530da4b57be1b8d6dad13d6bTimo Sirainen guid_128_t box_guid;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen int box_ret;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen pool_t pool;
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen HASH_TABLE(uint8_t *, uint8_t *) seen_mailbox_guids;
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ARRAY_TYPE(seq_range) uids;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen struct seq_range_iter uids_iter;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen unsigned int uids_iter_n;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen uint32_t last_existing_uid;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen bool warned;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen};
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenstatic void *textcat = NULL;
f87938eab9249ad84681f4fa747aab7b9a719670Timo Sirainen#ifdef HAVE_FTS_TEXTCAT
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenstatic bool textcat_broken = FALSE;
f87938eab9249ad84681f4fa747aab7b9a719670Timo Sirainen#endif
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenstatic int textcat_refcount = 0;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
8d14b5fc9c1ea1fad788315fc98fea89796a56d0Timo Sirainenstatic void lucene_handle_error(struct lucene_index *index, CLuceneError &err,
8d14b5fc9c1ea1fad788315fc98fea89796a56d0Timo Sirainen const char *msg);
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainenstatic void rescan_clear_unseen_mailboxes(struct lucene_index *index,
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen struct rescan_context *rescan_ctx);
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenstruct lucene_index *lucene_index_init(const char *path,
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen struct mailbox_list *list,
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen const struct fts_lucene_settings *set)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen struct lucene_index *index;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index = i_new(struct lucene_index, 1);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->path = i_strdup(path);
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen index->list = list;
de3466de0dcc4b0da5a1205591cb1fb99eb1392fTimo Sirainen if (set != NULL) {
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen index->set = *set;
de3466de0dcc4b0da5a1205591cb1fb99eb1392fTimo Sirainen index->normalizer = !set->normalize ? NULL :
de3466de0dcc4b0da5a1205591cb1fb99eb1392fTimo Sirainen mailbox_list_get_namespace(list)->user->default_normalizer;
de3466de0dcc4b0da5a1205591cb1fb99eb1392fTimo Sirainen } else {
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen /* this is valid only for doveadm dump, so it doesn't matter */
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen index->set.default_language = "";
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen }
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen if (index->set.use_libfts) {
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen index->default_analyzer = _CLNEW KeywordAnalyzer();
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen } else
db9ad8c821c01a18a520c2a07b2d6dc501b4017aTimo Sirainen#ifdef HAVE_FTS_STEMMER
ad3951538244a30d06b1640204e053d814d22a4eTimo Sirainen if (set == NULL || !set->no_snowball) {
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen index->default_analyzer =
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen _CLNEW snowball::SnowballAnalyzer(index->normalizer,
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen index->set.default_language);
917f556f14686178586b4b8c8184c5b52db987f1Timo Sirainen } else
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen#endif
917f556f14686178586b4b8c8184c5b52db987f1Timo Sirainen {
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen index->default_analyzer = _CLNEW standard::StandardAnalyzer();
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen if (index->normalizer != NULL) {
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen index->normalizer_buf =
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen buffer_create_dynamic(default_pool, 1024);
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen }
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen }
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen i_array_init(&index->analyzers, 32);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen textcat_refcount++;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return index;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainenvoid lucene_index_close(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
d90a924480a061683786e459a2e1c1d0b6e4f1e4Josef 'Jeff' Sipek timeout_remove(&index->to_close);
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen _CLDELETE(index->searcher);
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen if (index->writer != NULL) {
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen try {
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen index->writer->close();
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen } catch (CLuceneError &err) {
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen lucene_handle_error(index, err, "IndexWriter::close");
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen }
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen _CLDELETE(index->writer);
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen }
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen if (index->reader != NULL) {
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen try {
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen index->reader->close();
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen } catch (CLuceneError &err) {
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen lucene_handle_error(index, err, "IndexReader::close");
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen }
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen _CLDELETE(index->reader);
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenvoid lucene_index_deinit(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen struct lucene_analyzer *a;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen lucene_index_close(index);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen array_foreach_modifiable(&index->analyzers, a) {
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen i_free(a->lang);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen _CLDELETE(a->analyzer);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen }
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen array_free(&index->analyzers);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen if (--textcat_refcount == 0 && textcat != NULL) {
db9ad8c821c01a18a520c2a07b2d6dc501b4017aTimo Sirainen#ifdef HAVE_FTS_TEXTCAT
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen textcat_Done(textcat);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen#endif
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen textcat = NULL;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen }
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen _CLDELETE(index->default_analyzer);
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen if (index->normalizer_buf != NULL)
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen buffer_free(&index->normalizer_buf);
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen i_free(index->path);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen i_free(index);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainenstatic void lucene_data_translate(struct lucene_index *index,
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen wchar_t *data, unsigned int len)
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen{
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen const char *whitespace_chars = index->set.whitespace_chars;
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen unsigned int i;
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen if (*whitespace_chars == '\0' || index->set.use_libfts)
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen return;
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen for (i = 0; i < len; i++) {
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen if (strchr(whitespace_chars, data[i]) != NULL)
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen data[i] = ' ';
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen }
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen}
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenvoid lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize,
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen wchar_t *dest, size_t destsize)
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen{
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen ARRAY_TYPE(unichars) dest_arr;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen buffer_t buf = { 0, 0, { 0, 0, 0, 0, 0 } };
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen i_assert(sizeof(wchar_t) == sizeof(unichar_t));
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen
3281669db44d09a087a203201248abbc81b3cc1aTimo Sirainen buffer_create_from_data(&buf, dest, sizeof(wchar_t) * destsize);
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen array_create_from_buffer(&dest_arr, &buf, sizeof(wchar_t));
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (uni_utf8_to_ucs4_n(src, srcsize, &dest_arr) < 0)
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen i_unreached();
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen i_assert(array_count(&dest_arr)+1 == destsize);
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen dest[destsize-1] = 0;
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen}
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainenstatic const wchar_t *
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainent_lucene_utf8_to_tchar(struct lucene_index *index, const char *str)
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen{
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen ARRAY_TYPE(unichars) dest_arr;
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen const unichar_t *chars;
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen wchar_t *ret;
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen unsigned int len;
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen i_assert(sizeof(wchar_t) == sizeof(unichar_t));
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen t_array_init(&dest_arr, strlen(str) + 1);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (uni_utf8_to_ucs4(str, &dest_arr) < 0)
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen i_unreached();
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen (void)array_append_space(&dest_arr);
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen chars = array_get_modifiable(&dest_arr, &len);
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen ret = (wchar_t *)chars;
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen lucene_data_translate(index, ret, len - 1);
2e5d624013c30633e8ded148d338ce46c321a995Timo Sirainen return ret;
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen}
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen
62bf16bd8bb79e308e64110ae8d0b2a55a4c1490Timo Sirainenvoid lucene_index_select_mailbox(struct lucene_index *index,
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen const wchar_t guid[MAILBOX_GUID_HEX_LENGTH])
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen{
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen memcpy(index->mailbox_guid, guid,
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen MAILBOX_GUID_HEX_LENGTH * sizeof(wchar_t));
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen index->mailbox_guid[MAILBOX_GUID_HEX_LENGTH] = '\0';
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenvoid lucene_index_unselect_mailbox(struct lucene_index *index)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen memset(index->mailbox_guid, 0, sizeof(index->mailbox_guid));
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen}
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainenstatic void lucene_handle_error(struct lucene_index *index, CLuceneError &err,
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen const char *msg)
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen{
97e511960951550338d69cac98fb5f3ca2badb09Timo Sirainen const char *error, *what = err.what();
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen i_error("lucene index %s: %s failed (#%d): %s",
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen index->path, msg, err.number(), what);
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen if (index->list != NULL &&
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen (err.number() == CL_ERR_CorruptIndex ||
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen err.number() == CL_ERR_IO)) {
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen /* delete corrupted index. most IO errors are also about
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen missing files and other such corruption.. */
1d9053f57383a2382c70f76b0790a7bf192aa891Sergey Kitov if (unlink_directory(index->path, (enum unlink_directory_flags)0, &error) < 0)
97e511960951550338d69cac98fb5f3ca2badb09Timo Sirainen i_error("unlink_directory(%s) failed: %s", index->path, error);
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen rescan_clear_unseen_mailboxes(index, NULL);
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen }
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen}
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstatic int lucene_index_open(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen if (index->reader != NULL) {
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen i_assert(index->to_close != NULL);
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen timeout_reset(index->to_close);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return 1;
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen if (!IndexReader::indexExists(index->path))
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen try {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->reader = IndexReader::open(index->path);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen } catch (CLuceneError &err) {
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen lucene_handle_error(index, err, "IndexReader::open()");
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen i_assert(index->to_close == NULL);
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen index->to_close = timeout_add(LUCENE_INDEX_CLOSE_TIMEOUT_MSECS,
4d72a99412a0577b026b64afc27975f28a58d071Timo Sirainen lucene_index_close, index);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return 1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstatic int lucene_index_open_search(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen int ret;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen if (index->searcher != NULL)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return 1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen if ((ret = lucene_index_open(index)) <= 0)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return ret;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->searcher = _CLNEW IndexSearcher(index->reader);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return 1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
37f96554a5734557cd454691d163e602d36384b4Timo Sirainenstatic int
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainenlucene_doc_get_uid(struct lucene_index *index, Document *doc, uint32_t *uid_r)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen Field *field = doc->getField(_T("uid"));
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen const TCHAR *uid = field == NULL ? NULL : field->stringValue();
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen if (uid == NULL) {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen i_error("lucene: Corrupted FTS index %s: No UID for document",
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->path);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen uint32_t num = 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen while (*uid != 0) {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen num = num*10 + (*uid - '0');
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen uid++;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen *uid_r = num;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainenstatic uint32_t
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainenlucene_doc_get_part(struct lucene_index *index, Document *doc)
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen{
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen Field *field = doc->getField(_T("part"));
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen const TCHAR *part = field == NULL ? NULL : field->stringValue();
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen if (part == NULL)
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen return 0;
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen uint32_t num = 0;
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen while (*part != 0) {
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen num = num*10 + (*part - '0');
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen part++;
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen }
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen return num;
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen}
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenint lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen int ret = 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen *last_uid_r = 0;
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen
37f96554a5734557cd454691d163e602d36384b4Timo Sirainen if ((ret = lucene_index_open_search(index)) <= 0)
37f96554a5734557cd454691d163e602d36384b4Timo Sirainen return ret;
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen Term mailbox_term(_T("box"), index->mailbox_guid);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen TermQuery query(&mailbox_term);
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen uint32_t last_uid = 0;
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen try {
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen Hits *hits = index->searcher->search(&query);
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen for (size_t i = 0; i < hits->length(); i++) {
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen uint32_t uid;
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen if (lucene_doc_get_uid(index, &hits->doc(i),
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen &uid) < 0) {
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen ret = -1;
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen break;
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen }
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (uid > last_uid)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen last_uid = uid;
5314b66fca6b2cf6b749d296a30a5eb039e0e9baTimo Sirainen }
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen _CLDELETE(hits);
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen } catch (CLuceneError &err) {
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen lucene_handle_error(index, err, "last_uid search");
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen ret = -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen *last_uid_r = last_uid;
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen return ret;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenint lucene_index_get_doc_count(struct lucene_index *index, uint32_t *count_r)
37f96554a5734557cd454691d163e602d36384b4Timo Sirainen{
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen int ret;
37f96554a5734557cd454691d163e602d36384b4Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (index->reader == NULL) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen lucene_index_close(index);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if ((ret = lucene_index_open(index)) < 0)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (ret == 0) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen *count_r = 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
a6de00f8d3f65335149f6fe828fca9da6328d42aTimo Sirainen *count_r = index->reader->numDocs();
a6de00f8d3f65335149f6fe828fca9da6328d42aTimo Sirainen return 0;
37f96554a5734557cd454691d163e602d36384b4Timo Sirainen}
37f96554a5734557cd454691d163e602d36384b4Timo Sirainen
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainenstatic int lucene_settings_check(struct lucene_index *index)
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen{
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen uint32_t set_checksum;
97e511960951550338d69cac98fb5f3ca2badb09Timo Sirainen const char *error;
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen int ret = 0;
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen set_checksum = fts_lucene_settings_checksum(&index->set);
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen ret = fts_index_have_compatible_settings(index->list, set_checksum);
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen if (ret != 0)
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen return ret;
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen
264629908d96285d355aac0acf9b60b9b4be7fefTimo Sirainen i_warning("fts-lucene: Settings have changed, rebuilding index for mailbox");
264629908d96285d355aac0acf9b60b9b4be7fefTimo Sirainen
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen /* settings changed, rebuild index */
97e511960951550338d69cac98fb5f3ca2badb09Timo Sirainen if (unlink_directory(index->path, (enum unlink_directory_flags)0, &error) < 0) {
97e511960951550338d69cac98fb5f3ca2badb09Timo Sirainen i_error("unlink_directory(%s) failed: %s", index->path, error);
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen ret = -1;
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen } else {
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen rescan_clear_unseen_mailboxes(index, NULL);
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen }
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen return ret;
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen}
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenint lucene_index_build_init(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen const char *lock_path;
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen struct stat st;
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen lucene_index_close(index);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen lock_path = t_strdup_printf("%s/write.lock", index->path);
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen if (stat(lock_path, &st) == 0 &&
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen st.st_mtime < time(NULL) - LUCENE_LOCK_OVERRIDE_SECS) {
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen if (unlink(lock_path) < 0)
6140ef2418350dce8fba4706b63fe547549ed3e6Timo Sirainen i_error("unlink(%s) failed: %m", lock_path);
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen }
f94e1eb89b911e7bef709a25580590c3fff594acTimo Sirainen
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen if (lucene_settings_check(index) < 0)
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen return -1;
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen bool exists = IndexReader::indexExists(index->path);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen try {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->writer = _CLNEW IndexWriter(index->path,
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen index->default_analyzer,
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen !exists);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen } catch (CLuceneError &err) {
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen lucene_handle_error(index, err, "IndexWriter()");
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
33502e55a9bf4cafcd184ca9b114c126e420f856Timo Sirainen index->writer->setMaxFieldLength(MAX_TERMS_PER_DOCUMENT);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
db9ad8c821c01a18a520c2a07b2d6dc501b4017aTimo Sirainen#ifdef HAVE_FTS_TEXTCAT
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenstatic Analyzer *get_analyzer(struct lucene_index *index, const char *lang)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen{
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainen normalizer_func_t *normalizer = index->normalizer;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen const struct lucene_analyzer *a;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen struct lucene_analyzer new_analyzer;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen Analyzer *analyzer;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen array_foreach(&index->analyzers, a) {
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen if (strcmp(a->lang, lang) == 0)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen return a->analyzer;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen }
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen memset(&new_analyzer, 0, sizeof(new_analyzer));
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen new_analyzer.lang = i_strdup(lang);
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainen new_analyzer.analyzer =
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainen _CLNEW snowball::SnowballAnalyzer(normalizer, lang);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen array_append_i(&index->analyzers.arr, &new_analyzer, 1);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen return new_analyzer.analyzer;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen}
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainenstatic void *textcat_init(struct lucene_index *index)
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen{
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen const char *textcat_dir = index->set.textcat_dir;
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen unsigned int len;
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen if (textcat_dir == NULL)
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen return NULL;
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen /* textcat really wants the '/' suffix */
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen len = strlen(textcat_dir);
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen if (len > 0 && textcat_dir[len-1] != '/')
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen textcat_dir = t_strconcat(textcat_dir, "/", NULL);
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen return special_textcat_Init(index->set.textcat_conf, textcat_dir);
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen}
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenstatic Analyzer *
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenguess_analyzer(struct lucene_index *index, const void *data, size_t size)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen{
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen const char *lang;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen if (textcat_broken)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen return NULL;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen if (textcat == NULL) {
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen textcat = textcat_init(index);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen if (textcat == NULL) {
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen textcat_broken = TRUE;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen return NULL;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen }
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen }
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen /* try to guess the language */
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen lang = textcat_Classify(textcat, (const char *)data,
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen I_MIN(size, 500));
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen const char *p = strchr(lang, ']');
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen if (lang[0] != '[' || p == NULL)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen return NULL;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen lang = t_strdup_until(lang+1, p);
c37098f8ce6d512ba41f09564d04ed25720f0a77Timo Sirainen if (strcmp(lang, index->set.default_language) == 0)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen return index->default_analyzer;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen return get_analyzer(index, lang);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen}
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen#else
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenstatic Analyzer *
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenguess_analyzer(struct lucene_index *index ATTR_UNUSED,
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen const void *data ATTR_UNUSED, size_t size ATTR_UNUSED)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen{
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen return NULL;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen}
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen#endif
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstatic int lucene_index_build_flush(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen int ret = 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen if (index->doc == NULL)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen try {
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen CL_NS(analysis)::Analyzer *analyzer = NULL;
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen if (!index->set.use_libfts) {
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen analyzer = index->cur_analyzer != NULL ?
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen index->cur_analyzer : index->default_analyzer;
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen }
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen index->writer->addDocument(index->doc, analyzer);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen } catch (CLuceneError &err) {
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen lucene_handle_error(index, err, "IndexWriter::addDocument()");
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen ret = -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen _CLDELETE(index->doc);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->doc = NULL;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen index->cur_analyzer = NULL;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return ret;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenint lucene_index_build_more(struct lucene_index *index, uint32_t uid,
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen uint32_t part_idx, const unsigned char *data,
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen size_t size, const char *hdr_name)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen wchar_t id[MAX_INT_STRLEN];
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen size_t namesize, datasize;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen if (uid != index->prev_uid || part_idx != index->prev_part_idx) {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen if (lucene_index_build_flush(index) < 0)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->prev_uid = uid;
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen index->prev_part_idx = part_idx;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->doc = _CLNEW Document();
9b6eb5a72ffe61579e24c6ae4c6ce3d4e9104b95Timo Sirainen swprintf(id, N_ELEMENTS(id), L"%u", uid);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen index->doc->add(*_CLNEW Field(_T("uid"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen if (part_idx != 0) {
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen swprintf(id, N_ELEMENTS(id), L"%u", part_idx);
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen index->doc->add(*_CLNEW Field(_T("part"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen index->doc->add(*_CLNEW Field(_T("box"), index->mailbox_guid, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen if (index->normalizer_buf != NULL && !index->set.use_libfts) {
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen buffer_set_used_size(index->normalizer_buf, 0);
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen index->normalizer(data, size, index->normalizer_buf);
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen data = (const unsigned char *)index->normalizer_buf->data;
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen size = index->normalizer_buf->used;
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen }
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen datasize = uni_utf8_strlen_n(data, size) + 1;
37e5638ddf924fd4a07623095dbc028c564f7192Timo Sirainen wchar_t *dest, *dest_free = NULL;
37e5638ddf924fd4a07623095dbc028c564f7192Timo Sirainen if (datasize < 4096)
d48fcf2c089269a967fb9efdacc428ace89fc91cTimo Sirainen dest = t_new(wchar_t, datasize);
37e5638ddf924fd4a07623095dbc028c564f7192Timo Sirainen else
d48fcf2c089269a967fb9efdacc428ace89fc91cTimo Sirainen dest = dest_free = i_new(wchar_t, datasize);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen lucene_utf8_n_to_tchar(data, size, dest, datasize);
aadd92e0901d82d0a47aee76e7b6c9825523313bTimo Sirainen lucene_data_translate(index, dest, datasize-1);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen int token_flag = index->set.use_libfts ?
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen Field::INDEX_UNTOKENIZED : Field::INDEX_TOKENIZED;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (hdr_name != NULL) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen /* hdr_name should be ASCII, but don't break in case it isn't */
1e1549f93601770d1bddf702baf55c593e3d97e9Timo Sirainen hdr_name = t_str_lcase(hdr_name);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen namesize = uni_utf8_strlen(hdr_name) + 1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen wchar_t wname[namesize];
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen lucene_utf8_n_to_tchar((const unsigned char *)hdr_name,
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen strlen(hdr_name), wname, namesize);
36716baeee63350c23809909199a8d533d559610Timo Sirainen if (!index->set.use_libfts)
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen index->doc->add(*_CLNEW Field(_T("hdr"), wname, Field::STORE_NO | token_flag));
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen index->doc->add(*_CLNEW Field(_T("hdr"), dest, Field::STORE_NO | token_flag));
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (fts_header_want_indexed(hdr_name))
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen index->doc->add(*_CLNEW Field(wname, dest, Field::STORE_NO | token_flag));
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen } else if (size > 0) {
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen if (index->cur_analyzer == NULL && !index->set.use_libfts)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen index->cur_analyzer = guess_analyzer(index, data, size);
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen index->doc->add(*_CLNEW Field(_T("body"), dest, Field::STORE_NO | token_flag));
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen }
37e5638ddf924fd4a07623095dbc028c564f7192Timo Sirainen i_free(dest_free);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return 0;
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen}
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenint lucene_index_build_deinit(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen int ret = 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen if (index->prev_uid == 0) {
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen /* no changes. */
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen return 0;
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->prev_uid = 0;
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen index->prev_part_idx = 0;
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen if (index->writer == NULL) {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen lucene_index_close(index);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen if (lucene_index_build_flush(index) < 0)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen ret = -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen try {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->writer->close();
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen } catch (CLuceneError &err) {
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen lucene_handle_error(index, err, "IndexWriter::close()");
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen ret = -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen lucene_index_close(index);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return ret;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainenstatic int
de62ce819d59a529530da4b57be1b8d6dad13d6bTimo Sirainenwcharguid_to_guid(guid_128_t dest, const wchar_t *src)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen buffer_t buf = { 0, 0, { 0, 0, 0, 0, 0 } };
de62ce819d59a529530da4b57be1b8d6dad13d6bTimo Sirainen char src_chars[GUID_128_SIZE*2 + 1];
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen unsigned int i;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen for (i = 0; i < sizeof(src_chars)-1; i++) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if ((src[i] >= '0' && src[i] <= '9') ||
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen (src[i] >= 'a' && src[i] <= 'f'))
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen src_chars[i] = src[i];
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen else
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return -1;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen }
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (src[i] != '\0')
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return -1;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen src_chars[i] = '\0';
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
3281669db44d09a087a203201248abbc81b3cc1aTimo Sirainen buffer_create_from_data(&buf, dest, GUID_128_SIZE);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return hex_to_binary(src_chars, &buf);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
be044d4f3d08652d7332cdec5aaf8391474908bbTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainenstatic int
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainenrescan_get_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen struct mailbox_status status;
be044d4f3d08652d7332cdec5aaf8391474908bbTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (mailbox_get_status(box, STATUS_MESSAGES, &status) < 0)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return -1;
be044d4f3d08652d7332cdec5aaf8391474908bbTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (status.messages > 0) T_BEGIN {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ARRAY_TYPE(seq_range) seqs;
be044d4f3d08652d7332cdec5aaf8391474908bbTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen t_array_init(&seqs, 2);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen seq_range_array_add_range(&seqs, 1, status.messages);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen mailbox_get_uid_range(box, &seqs, uids);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen } T_END;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return 0;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen}
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainenstatic int rescan_finish(struct rescan_context *ctx)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen{
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen int ret;
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ret = fts_index_set_last_uid(ctx->box, ctx->last_existing_uid);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen mailbox_free(&ctx->box);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return ret;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainenstatic int
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainenfts_lucene_get_mailbox_guid(struct lucene_index *index, Document *doc,
de62ce819d59a529530da4b57be1b8d6dad13d6bTimo Sirainen guid_128_t guid_r)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen Field *field = doc->getField(_T("box"));
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen const TCHAR *box_guid = field == NULL ? NULL : field->stringValue();
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (box_guid == NULL) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen i_error("lucene: Corrupted FTS index %s: No mailbox for document",
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen index->path);
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen return -1;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen }
30003461d9c5b090384b252260916cf50954d943Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen if (wcharguid_to_guid(guid_r, box_guid) < 0) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen i_error("lucene: Corrupted FTS index %s: "
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen "box field not in expected format", index->path);
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen return -1;
30003461d9c5b090384b252260916cf50954d943Timo Sirainen }
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen return 0;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen}
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainenstatic int
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainenrescan_open_mailbox(struct rescan_context *ctx, Document *doc)
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen{
de62ce819d59a529530da4b57be1b8d6dad13d6bTimo Sirainen guid_128_t guid, *guidp;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen int ret;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
de62ce819d59a529530da4b57be1b8d6dad13d6bTimo Sirainen if (fts_lucene_get_mailbox_guid(ctx->index, doc, guid) < 0)
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen return 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (memcmp(guid, ctx->box_guid, sizeof(guid)) == 0) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen /* same as last one */
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return ctx->box_ret;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen }
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen memcpy(ctx->box_guid, guid, sizeof(ctx->box_guid));
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
de62ce819d59a529530da4b57be1b8d6dad13d6bTimo Sirainen guidp = p_new(ctx->pool, guid_128_t, 1);
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen memcpy(guidp, guid, sizeof(*guidp));
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen hash_table_insert(ctx->seen_mailbox_guids, guidp, guidp);
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (ctx->box != NULL)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen rescan_finish(ctx);
4145cbac82bfc0c8bfeceeca0ef841700117930cTimo Sirainen ctx->box = mailbox_alloc_guid(ctx->index->list, guid,
4145cbac82bfc0c8bfeceeca0ef841700117930cTimo Sirainen (enum mailbox_flags)0);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (mailbox_open(ctx->box) < 0) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen enum mail_error error;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen const char *errstr;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
bf7dc750b95039981c0e9d728f313d50cf38a156Martti Rannanjärvi errstr = mailbox_get_last_internal_error(ctx->box, &error);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (error == MAIL_ERROR_NOTFOUND)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ret = 0;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen else {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen i_error("lucene: Couldn't open mailbox %s: %s",
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen mailbox_get_vname(ctx->box), errstr);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ret = -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen mailbox_free(&ctx->box);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ctx->box_ret = ret;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return ret;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen }
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (mailbox_sync(ctx->box, (enum mailbox_sync_flags)0) < 0) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen i_error("lucene: Failed to sync mailbox %s: %s",
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen mailbox_get_vname(ctx->box),
bf7dc750b95039981c0e9d728f313d50cf38a156Martti Rannanjärvi mailbox_get_last_internal_error(ctx->box, NULL));
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen mailbox_free(&ctx->box);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ctx->box_ret = -1;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return -1;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen }
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen array_clear(&ctx->uids);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen rescan_get_uids(ctx->box, &ctx->uids);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ctx->warned = FALSE;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ctx->last_existing_uid = 0;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ctx->uids_iter_n = 0;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen seq_range_array_iter_init(&ctx->uids_iter, &ctx->uids);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ctx->box_ret = 1;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return 1;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen}
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainenstatic int
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainenrescan_next(struct rescan_context *ctx, Document *doc)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen{
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen uint32_t lucene_uid, idx_uid;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen if (lucene_doc_get_uid(ctx->index, doc, &lucene_uid) < 0)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (seq_range_array_iter_nth(&ctx->uids_iter, ctx->uids_iter_n,
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen &idx_uid)) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (idx_uid == lucene_uid) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ctx->uids_iter_n++;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ctx->last_existing_uid = idx_uid;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return 1;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen }
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (idx_uid < lucene_uid) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen /* lucene is missing an UID from the middle. delete
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen the rest of the messages from this mailbox and
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen reindex. */
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (!ctx->warned) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen i_warning("lucene: Mailbox %s "
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen "missing UIDs in the middle",
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen mailbox_get_vname(ctx->box));
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ctx->warned = TRUE;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen } else {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen /* UID has been expunged from index. delete from
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen lucene as well. */
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return 0;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen } else {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen /* the rest of the messages have been expunged from index */
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen}
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainenstatic void
0f801c1bd3d684c219d7f3b1e75f8b85f66f7951Timo Sirainenrescan_clear_unseen_mailbox(struct lucene_index *index,
0f801c1bd3d684c219d7f3b1e75f8b85f66f7951Timo Sirainen struct rescan_context *rescan_ctx,
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen const char *vname,
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen const struct fts_index_header *hdr)
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen{
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen struct mailbox *box;
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen struct mailbox_metadata metadata;
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen
0f801c1bd3d684c219d7f3b1e75f8b85f66f7951Timo Sirainen box = mailbox_alloc(index->list, vname,
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen (enum mailbox_flags)0);
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen if (mailbox_open(box) == 0 &&
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen mailbox_get_metadata(box, MAILBOX_METADATA_GUID,
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen &metadata) == 0 &&
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen (rescan_ctx == NULL ||
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen hash_table_lookup(rescan_ctx->seen_mailbox_guids,
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen metadata.guid) == NULL)) {
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen /* this mailbox had no records in lucene index.
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen make sure its last indexed uid is 0 */
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen (void)fts_index_set_header(box, hdr);
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen }
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen mailbox_free(&box);
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen}
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainenstatic void rescan_clear_unseen_mailboxes(struct lucene_index *index,
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen struct rescan_context *rescan_ctx)
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen{
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen const enum mailbox_list_iter_flags iter_flags =
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen (enum mailbox_list_iter_flags)
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen (MAILBOX_LIST_ITER_NO_AUTO_BOXES |
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen MAILBOX_LIST_ITER_RETURN_NO_FLAGS);
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen struct mailbox_list_iterate_context *iter;
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen const struct mailbox_info *info;
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen struct fts_index_header hdr;
9a85ec311d3216dd6dbad401543330c4356d651dTimo Sirainen struct mail_namespace *ns = index->list->ns;
9a85ec311d3216dd6dbad401543330c4356d651dTimo Sirainen const char *vname;
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen memset(&hdr, 0, sizeof(hdr));
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen hdr.settings_checksum = fts_lucene_settings_checksum(&index->set);
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen
5b6470e0e2ef4012430cdeca7d9b89c1278a0ed4Timo Sirainen iter = mailbox_list_iter_init(index->list, "*", iter_flags);
af2564c7f9e05ad245a032efdfbc5abbb9b70f1eTimo Sirainen while ((info = mailbox_list_iter_next(iter)) != NULL)
0f801c1bd3d684c219d7f3b1e75f8b85f66f7951Timo Sirainen rescan_clear_unseen_mailbox(index, rescan_ctx, info->vname, &hdr);
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen (void)mailbox_list_iter_deinit(&iter);
9a85ec311d3216dd6dbad401543330c4356d651dTimo Sirainen
9a85ec311d3216dd6dbad401543330c4356d651dTimo Sirainen if (ns->prefix_len > 0 &&
9a85ec311d3216dd6dbad401543330c4356d651dTimo Sirainen ns->prefix[ns->prefix_len-1] == mail_namespace_get_sep(ns)) {
9a85ec311d3216dd6dbad401543330c4356d651dTimo Sirainen /* namespace prefix itself isn't returned by the listing */
9a85ec311d3216dd6dbad401543330c4356d651dTimo Sirainen vname = t_strndup(index->list->ns->prefix,
9a85ec311d3216dd6dbad401543330c4356d651dTimo Sirainen index->list->ns->prefix_len-1);
0f801c1bd3d684c219d7f3b1e75f8b85f66f7951Timo Sirainen rescan_clear_unseen_mailbox(index, rescan_ctx, vname, &hdr);
9a85ec311d3216dd6dbad401543330c4356d651dTimo Sirainen }
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen}
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainenint lucene_index_rescan(struct lucene_index *index)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen{
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen static const TCHAR *sort_fields[] = { _T("box"), _T("uid"), NULL };
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen struct rescan_context ctx;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen bool failed = false;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen int ret;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen i_assert(index->list != NULL);
03f4c5f3502801f5b318f464cc75313a88558805Timo Sirainen
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen if ((ret = lucene_index_open_search(index)) < 0)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return ret;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen Term term(_T("box"), _T("*"));
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen WildcardQuery query(&term);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen Sort sort(sort_fields);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen memset(&ctx, 0, sizeof(ctx));
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ctx.index = index;
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen ctx.pool = pool_alloconly_create("guids", 1024);
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen hash_table_create(&ctx.seen_mailbox_guids, ctx.pool, 0,
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen guid_128_hash, guid_128_cmp);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen i_array_init(&ctx.uids, 128);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen if (ret > 0) try {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen Hits *hits = index->searcher->search(&query, &sort);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen for (size_t i = 0; i < hits->length(); i++) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ret = rescan_open_mailbox(&ctx, &hits->doc(i));
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (ret > 0)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen ret = rescan_next(&ctx, &hits->doc(i));
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (ret < 0)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen failed = true;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen else if (ret == 0)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen index->reader->deleteDocument(hits->id(i));
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen }
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen _CLDELETE(hits);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen } catch (CLuceneError &err) {
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen lucene_handle_error(index, err, "rescan search");
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen failed = true;
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen }
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen lucene_index_close(index);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen if (ctx.box != NULL)
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen rescan_finish(&ctx);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen array_free(&ctx.uids);
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen rescan_clear_unseen_mailboxes(index, &ctx);
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen hash_table_destroy(&ctx.seen_mailbox_guids);
3b55c4f0e2ab51e2607556dde8d36360296b29e4Timo Sirainen pool_unref(&ctx.pool);
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainen return failed ? -1 : 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
de62ce819d59a529530da4b57be1b8d6dad13d6bTimo Sirainenstatic void guid128_to_wguid(const guid_128_t guid,
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen wchar_t wguid_hex[MAILBOX_GUID_HEX_LENGTH + 1])
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen{
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen buffer_t buf = { 0, 0, { 0, 0, 0, 0, 0 } };
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen unsigned char guid_hex[MAILBOX_GUID_HEX_LENGTH];
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen unsigned int i;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
3281669db44d09a087a203201248abbc81b3cc1aTimo Sirainen buffer_create_from_data(&buf, guid_hex, MAILBOX_GUID_HEX_LENGTH);
de62ce819d59a529530da4b57be1b8d6dad13d6bTimo Sirainen binary_to_hex_append(&buf, guid, GUID_128_SIZE);
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen for (i = 0; i < MAILBOX_GUID_HEX_LENGTH; i++)
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen wguid_hex[i] = guid_hex[i];
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen wguid_hex[i] = '\0';
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen}
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
1e0842a68211be1c619f082c64d259e9d5cc63aeTimo Sirainenstatic bool
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainenlucene_index_add_uid_filter(BooleanQuery *query,
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen const struct fts_expunge_log_read_record *rec)
1e0842a68211be1c619f082c64d259e9d5cc63aeTimo Sirainen{
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen struct seq_range_iter iter;
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen wchar_t wuid[MAX_INT_STRLEN];
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen unsigned int n;
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen uint32_t uid;
1e0842a68211be1c619f082c64d259e9d5cc63aeTimo Sirainen
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen /* RangeQuery and WildcardQuery work by enumerating through all terms
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen that match them, and then adding TermQueries for them. So we can
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen simply do the same directly, and if it looks like there are too
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen many terms just go through everything. */
1e0842a68211be1c619f082c64d259e9d5cc63aeTimo Sirainen
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen if (seq_range_count(&rec->uids) > FTS_LUCENE_MAX_SEARCH_TERMS)
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen return false;
1e0842a68211be1c619f082c64d259e9d5cc63aeTimo Sirainen
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen seq_range_array_iter_init(&iter, &rec->uids); n = 0;
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen while (seq_range_array_iter_nth(&iter, n++, &uid)) {
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen swprintf(wuid, N_ELEMENTS(wuid), L"%u", uid);
1e0842a68211be1c619f082c64d259e9d5cc63aeTimo Sirainen
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen Term *term = _CLNEW Term(_T("uid"), wuid);
1e0842a68211be1c619f082c64d259e9d5cc63aeTimo Sirainen query->add(_CLNEW TermQuery(term), true, BooleanClause::SHOULD);
1e0842a68211be1c619f082c64d259e9d5cc63aeTimo Sirainen _CLDECDELETE(term);
1e0842a68211be1c619f082c64d259e9d5cc63aeTimo Sirainen }
1e0842a68211be1c619f082c64d259e9d5cc63aeTimo Sirainen return true;
bfd9dc27b76ac169bbaa262217532cbfa4509f97Timo Sirainen}
bfd9dc27b76ac169bbaa262217532cbfa4509f97Timo Sirainen
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainenstatic int
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainenlucene_index_expunge_record(struct lucene_index *index,
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen const struct fts_expunge_log_read_record *rec)
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen{
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen int ret;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen if ((ret = lucene_index_open_search(index)) <= 0)
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen return ret;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen BooleanQuery query;
bfd9dc27b76ac169bbaa262217532cbfa4509f97Timo Sirainen BooleanQuery uids_query;
bfd9dc27b76ac169bbaa262217532cbfa4509f97Timo Sirainen
4ca9910d9db31a68d9d501150045cfd4bb1e2ac7Timo Sirainen if (lucene_index_add_uid_filter(&uids_query, rec))
bfd9dc27b76ac169bbaa262217532cbfa4509f97Timo Sirainen query.add(&uids_query, BooleanClause::MUST);
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen wchar_t wguid[MAILBOX_GUID_HEX_LENGTH + 1];
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen guid128_to_wguid(rec->mailbox_guid, wguid);
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen Term term(_T("box"), wguid);
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen TermQuery mailbox_query(&term);
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen query.add(&mailbox_query, BooleanClause::MUST);
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen try {
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen Hits *hits = index->searcher->search(&query);
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen for (size_t i = 0; i < hits->length(); i++) {
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen uint32_t uid;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen if (lucene_doc_get_uid(index, &hits->doc(i),
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen &uid) < 0 ||
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen seq_range_exists(&rec->uids, uid))
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen index->reader->deleteDocument(hits->id(i));
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen }
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen _CLDELETE(hits);
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen } catch (CLuceneError &err) {
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen lucene_handle_error(index, err, "expunge search");
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen ret = -1;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen }
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen return ret < 0 ? -1 : 0;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen}
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainenint lucene_index_expunge_from_log(struct lucene_index *index,
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen struct fts_expunge_log *log)
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen{
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen struct fts_expunge_log_read_ctx *ctx;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen const struct fts_expunge_log_read_record *rec;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen int ret = 0, ret2;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen ctx = fts_expunge_log_read_begin(log);
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen while ((rec = fts_expunge_log_read_next(ctx)) != NULL) {
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen if (lucene_index_expunge_record(index, rec) < 0) {
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen ret = -1;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen break;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen }
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen }
6990472dfc095728637b1edf697bb30901f6f78dTimo Sirainen
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen lucene_index_close(index);
6990472dfc095728637b1edf697bb30901f6f78dTimo Sirainen
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen ret2 = fts_expunge_log_read_end(&ctx);
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen if (ret < 0 || ret2 < 0)
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen return -1;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen return ret2;
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen}
027c729b3107441f54a2602ccf2c67c6206998d5Timo Sirainen
009d6d90b33bc7f64fa8251ac392cc87a835b833Timo Sirainenint lucene_index_optimize(struct lucene_index *index)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen int ret = 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
a92dab68f02b3c6a18859bf9d59457d45711a4c5Timo Sirainen if (!IndexReader::indexExists(index->path))
a92dab68f02b3c6a18859bf9d59457d45711a4c5Timo Sirainen return 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (IndexReader::isLocked(index->path))
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen IndexReader::unlock(index->path);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8a7c3622d534dfa929d659264dcaa67c00f459b5Timo Sirainen IndexWriter *writer = NULL;
031d075daf75b74b286711c1b6f64c3ae70e541bTimo Sirainen try {
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen writer = _CLNEW IndexWriter(index->path, index->default_analyzer, false);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen writer->optimize();
be044d4f3d08652d7332cdec5aaf8391474908bbTimo Sirainen } catch (CLuceneError &err) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen lucene_handle_error(index, err, "IndexWriter::optimize()");
6a866d5d5533cb744c78bc2f1ca47beaee690d2fTimo Sirainen ret = -1;
be044d4f3d08652d7332cdec5aaf8391474908bbTimo Sirainen }
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen try {
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen writer->close();
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen } catch (CLuceneError &err) {
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen lucene_handle_error(index, err, "IndexWriter::close()");
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen ret = -1;
e8d5561143360da75d5ccb4991c2d1ffb437be1dTimo Sirainen }
8a7c3622d534dfa929d659264dcaa67c00f459b5Timo Sirainen if (writer != NULL)
8a7c3622d534dfa929d659264dcaa67c00f459b5Timo Sirainen _CLDELETE(writer);
6a866d5d5533cb744c78bc2f1ca47beaee690d2fTimo Sirainen return ret;
be044d4f3d08652d7332cdec5aaf8391474908bbTimo Sirainen}
be044d4f3d08652d7332cdec5aaf8391474908bbTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen// Mostly copy&pasted from CLucene's QueryParser
0ae79eb8ff677a3ee757556b90073072d8972d5dTimo Sirainenstatic Query* getFieldQuery(Analyzer *analyzer, const TCHAR* _field, const TCHAR* queryText, bool fuzzy) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen // Use the analyzer to get all the tokens, and then build a TermQuery,
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen // PhraseQuery, or nothing based on the term count
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen StringReader reader(queryText);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen TokenStream* source = analyzer->tokenStream(_field, &reader);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen CLVector<CL_NS(analysis)::Token*, Deletor::Object<CL_NS(analysis)::Token> > v;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen CL_NS(analysis)::Token* t = NULL;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen int32_t positionCount = 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen bool severalTokensAtSamePosition = false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen while (true) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen t = _CLNEW Token();
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen try {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen Token* _t = source->next(t);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (_t == NULL) _CLDELETE(t);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }_CLCATCH_ERR(CL_ERR_IO, _CLLDELETE(source);_CLLDELETE(t);,{
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen t = NULL;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen });
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (t == NULL)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen break;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen v.push_back(t);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (t->getPositionIncrement() != 0)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen positionCount += t->getPositionIncrement();
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen else
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen severalTokensAtSamePosition = true;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen try {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen source->close();
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen _CLCATCH_ERR_CLEANUP(CL_ERR_IO, {_CLLDELETE(source);_CLLDELETE(t);} ); /* cleanup */
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen _CLLDELETE(source);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (v.size() == 0)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return NULL;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen else if (v.size() == 1) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen Term* tm = _CLNEW Term(_field, v.at(0)->termBuffer());
0ae79eb8ff677a3ee757556b90073072d8972d5dTimo Sirainen Query* ret;
0ae79eb8ff677a3ee757556b90073072d8972d5dTimo Sirainen if (fuzzy)
0ae79eb8ff677a3ee757556b90073072d8972d5dTimo Sirainen ret = _CLNEW FuzzyQuery( tm );
0ae79eb8ff677a3ee757556b90073072d8972d5dTimo Sirainen else
d29ec22aa388c84852ddb1d9a7179ef3bb117847Timo Sirainen ret = _CLNEW TermQuery( tm );
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen _CLDECDELETE(tm);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return ret;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen } else {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (severalTokensAtSamePosition) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (positionCount == 1) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen // no phrase query:
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen BooleanQuery* q = _CLNEW BooleanQuery(true);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen for(size_t i=0; i<v.size(); i++ ){
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen Term* tm = _CLNEW Term(_field, v.at(i)->termBuffer());
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen q->add(_CLNEW TermQuery(tm), true, BooleanClause::SHOULD);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen _CLDECDELETE(tm);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return q;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }else {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen MultiPhraseQuery* mpq = _CLNEW MultiPhraseQuery();
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen CLArrayList<Term*> multiTerms;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen int32_t position = -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen for (size_t i = 0; i < v.size(); i++) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen t = v.at(i);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (t->getPositionIncrement() > 0 && multiTerms.size() > 0) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen ValueArray<Term*> termsArray(multiTerms.size());
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen multiTerms.toArray(termsArray.values);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen mpq->add(&termsArray,position);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen multiTerms.clear();
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen position += t->getPositionIncrement();
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen multiTerms.push_back(_CLNEW Term(_field, t->termBuffer()));
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen ValueArray<Term*> termsArray(multiTerms.size());
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen multiTerms.toArray(termsArray.values);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen mpq->add(&termsArray,position);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return mpq;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }else {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen PhraseQuery* pq = _CLNEW PhraseQuery();
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen int32_t position = -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen for (size_t i = 0; i < v.size(); i++) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen t = v.at(i);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen Term* tm = _CLNEW Term(_field, t->termBuffer());
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen position += t->getPositionIncrement();
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen pq->add(tm,position);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen _CLDECDELETE(tm);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return pq;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenstatic Query *
06ed0c58ea392df22ccf4868aac494831ea756e1Timo Sirainenlucene_get_query_str(struct lucene_index *index,
06ed0c58ea392df22ccf4868aac494831ea756e1Timo Sirainen const TCHAR *key, const char *str, bool fuzzy)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen const TCHAR *wvalue;
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen Analyzer *analyzer;
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen if (index->set.use_libfts) {
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen const wchar_t *wstr = t_lucene_utf8_to_tchar(index, str);
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen Term* tm = _CLNEW Term(key, wstr);
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen Query* ret;
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen if (fuzzy)
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen ret = _CLNEW FuzzyQuery( tm );
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen else
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen ret = _CLNEW TermQuery( tm );
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen _CLDECDELETE(tm);
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen return ret;
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen }
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen if (index->normalizer_buf != NULL) {
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen buffer_set_used_size(index->normalizer_buf, 0);
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen index->normalizer(str, strlen(str), index->normalizer_buf);
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen buffer_append_c(index->normalizer_buf, '\0');
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen str = (const char *)index->normalizer_buf->data;
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen }
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen wvalue = t_lucene_utf8_to_tchar(index, str);
f26ef7a3a562dc42a1e9a4dde546bd30df3241e8Timo Sirainen analyzer = guess_analyzer(index, str, strlen(str));
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen if (analyzer == NULL)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen analyzer = index->default_analyzer;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
06ed0c58ea392df22ccf4868aac494831ea756e1Timo Sirainen return getFieldQuery(analyzer, key, wvalue, fuzzy);
06ed0c58ea392df22ccf4868aac494831ea756e1Timo Sirainen}
06ed0c58ea392df22ccf4868aac494831ea756e1Timo Sirainen
06ed0c58ea392df22ccf4868aac494831ea756e1Timo Sirainenstatic Query *
06ed0c58ea392df22ccf4868aac494831ea756e1Timo Sirainenlucene_get_query(struct lucene_index *index,
06ed0c58ea392df22ccf4868aac494831ea756e1Timo Sirainen const TCHAR *key, const struct mail_search_arg *arg)
06ed0c58ea392df22ccf4868aac494831ea756e1Timo Sirainen{
06ed0c58ea392df22ccf4868aac494831ea756e1Timo Sirainen return lucene_get_query_str(index, key, arg->value.str, arg->fuzzy);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenstatic bool
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainenlucene_add_definite_query(struct lucene_index *index,
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen ARRAY_TYPE(lucene_query) &queries,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen struct mail_search_arg *arg,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen enum fts_lookup_flags flags)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen{
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen Query *q;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
cbc8f9d71483a2cf71610f7e7e1f2dc9884bd556Baofeng Wang if (arg->no_fts)
cbc8f9d71483a2cf71610f7e7e1f2dc9884bd556Baofeng Wang return false;
cbc8f9d71483a2cf71610f7e7e1f2dc9884bd556Baofeng Wang
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (arg->match_not && !and_args) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen /* FIXME: we could handle this by doing multiple queries.. */
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen switch (arg->type) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen case SEARCH_TEXT: {
0ae79eb8ff677a3ee757556b90073072d8972d5dTimo Sirainen Query *q1 = lucene_get_query(index, _T("hdr"), arg);
0ae79eb8ff677a3ee757556b90073072d8972d5dTimo Sirainen Query *q2 = lucene_get_query(index, _T("body"), arg);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen if (q1 == NULL && q2 == NULL)
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen q = NULL;
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen else {
1f8e934c62a8bd8e9a912e16b59c5e30993bd173Timo Sirainen BooleanQuery *bq = _CLNEW BooleanQuery();
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen if (q1 != NULL)
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen bq->add(q1, true, BooleanClause::SHOULD);
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen if (q2 != NULL)
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen bq->add(q2, true, BooleanClause::SHOULD);
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen q = bq;
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen break;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen case SEARCH_BODY:
0ae79eb8ff677a3ee757556b90073072d8972d5dTimo Sirainen q = lucene_get_query(index, _T("body"), arg);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen break;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen case SEARCH_HEADER:
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen case SEARCH_HEADER_ADDRESS:
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen case SEARCH_HEADER_COMPRESS_LWSP:
65a67a3c17679d4bd800067ca6273c17e0ae4c62Timo Sirainen if (!fts_header_want_indexed(arg->hdr_field_name) ||
65a67a3c17679d4bd800067ca6273c17e0ae4c62Timo Sirainen *arg->value.str == '\0')
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen q = lucene_get_query(index,
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen t_lucene_utf8_to_tchar(index, t_str_lcase(arg->hdr_field_name)),
0ae79eb8ff677a3ee757556b90073072d8972d5dTimo Sirainen arg);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen break;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen default:
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen if (q == NULL) {
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen /* couldn't handle this search after all (e.g. trying to search
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen a stop word) */
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen return false;
61af1856d2a92cd2c66615a0fbc9ef371a8da8fcTimo Sirainen }
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen struct lucene_query *lq = array_append_space(&queries);
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen lq->query = q;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (!and_args)
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen lq->occur = BooleanClause::SHOULD;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen else if (!arg->match_not)
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen lq->occur = BooleanClause::MUST;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen else
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen lq->occur = BooleanClause::MUST_NOT;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return true;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenstatic bool
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainenlucene_add_maybe_query(struct lucene_index *index,
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen ARRAY_TYPE(lucene_query) &queries,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen struct mail_search_arg *arg,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen enum fts_lookup_flags flags)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen bool and_args = (flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0;
65a67a3c17679d4bd800067ca6273c17e0ae4c62Timo Sirainen Query *q = NULL;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
cbc8f9d71483a2cf71610f7e7e1f2dc9884bd556Baofeng Wang if (arg->no_fts)
cbc8f9d71483a2cf71610f7e7e1f2dc9884bd556Baofeng Wang return false;
cbc8f9d71483a2cf71610f7e7e1f2dc9884bd556Baofeng Wang
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen if (arg->match_not) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen /* FIXME: we could handle this by doing multiple queries.. */
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen switch (arg->type) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen case SEARCH_HEADER:
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen case SEARCH_HEADER_ADDRESS:
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen case SEARCH_HEADER_COMPRESS_LWSP:
4f4daf7df84f450c7342de569bf25195e93d6bc7Timo Sirainen if (*arg->value.str == '\0' && !index->set.use_libfts) {
65a67a3c17679d4bd800067ca6273c17e0ae4c62Timo Sirainen /* checking potential existence of the header name */
65a67a3c17679d4bd800067ca6273c17e0ae4c62Timo Sirainen q = lucene_get_query_str(index, _T("hdr"),
3f8926ac7b10849d0c2f84c513efe025c7ae1ab2Timo Sirainen t_str_lcase(arg->hdr_field_name), FALSE);
65a67a3c17679d4bd800067ca6273c17e0ae4c62Timo Sirainen break;
65a67a3c17679d4bd800067ca6273c17e0ae4c62Timo Sirainen }
65a67a3c17679d4bd800067ca6273c17e0ae4c62Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (fts_header_want_indexed(arg->hdr_field_name))
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen /* we can check if the search key exists in some header and
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen filter out the messages that have no chance of matching */
65a67a3c17679d4bd800067ca6273c17e0ae4c62Timo Sirainen q = lucene_get_query(index, _T("hdr"), arg);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen break;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen default:
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
dc03b7bb2cc2b78bf66856bdfedfb1cae774c43bTimo Sirainen if (q == NULL) {
dc03b7bb2cc2b78bf66856bdfedfb1cae774c43bTimo Sirainen /* couldn't handle this search after all (e.g. trying to search
dc03b7bb2cc2b78bf66856bdfedfb1cae774c43bTimo Sirainen a stop word) */
dc03b7bb2cc2b78bf66856bdfedfb1cae774c43bTimo Sirainen return false;
dc03b7bb2cc2b78bf66856bdfedfb1cae774c43bTimo Sirainen }
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen struct lucene_query *lq = array_append_space(&queries);
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen lq->query = q;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (!and_args)
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen lq->occur = BooleanClause::SHOULD;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen else if (!arg->match_not)
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen lq->occur = BooleanClause::MUST;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen else
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen lq->occur = BooleanClause::MUST_NOT;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return true;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen}
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainenstatic bool queries_have_non_must_nots(ARRAY_TYPE(lucene_query) &queries)
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen{
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen const struct lucene_query *lq;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen array_foreach(&queries, lq) {
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen if (lq->occur != BooleanClause::MUST_NOT)
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen return TRUE;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen }
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen return FALSE;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen}
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainenstatic void search_query_add(BooleanQuery &query,
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen ARRAY_TYPE(lucene_query) &queries)
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen{
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen BooleanQuery *search_query = _CLNEW BooleanQuery();
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen const struct lucene_query *lq;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen if (queries_have_non_must_nots(queries)) {
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen array_foreach(&queries, lq)
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen search_query->add(lq->query, true, lq->occur);
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen query.add(search_query, true, BooleanClause::MUST);
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen } else {
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen array_foreach(&queries, lq)
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen search_query->add(lq->query, true, BooleanClause::SHOULD);
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen query.add(search_query, true, BooleanClause::MUST_NOT);
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenstatic int
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenlucene_index_search(struct lucene_index *index,
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen ARRAY_TYPE(lucene_query) &queries,
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen struct fts_result *result, ARRAY_TYPE(seq_range) *uids_r)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen struct fts_score_map *score;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen int ret = 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen BooleanQuery query;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen search_query_add(query, queries);
20b136f04257b0ba338e49f31a999c0d4b243647Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen Term mailbox_term(_T("box"), index->mailbox_guid);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen TermQuery mailbox_query(&mailbox_term);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen query.add(&mailbox_query, BooleanClause::MUST);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen try {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen Hits *hits = index->searcher->search(&query);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen uint32_t last_uid = 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (result != NULL)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen result->scores_sorted = true;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen for (size_t i = 0; i < hits->length(); i++) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen uint32_t uid;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (lucene_doc_get_uid(index, &hits->doc(i),
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen &uid) < 0) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen ret = -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen break;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
49c848ccaab090b06add472122a1a7ebfaaf6044Timo Sirainen if (seq_range_array_add(uids_r, uid)) {
49c848ccaab090b06add472122a1a7ebfaaf6044Timo Sirainen /* duplicate result */
49c848ccaab090b06add472122a1a7ebfaaf6044Timo Sirainen } else if (result != NULL) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (uid < last_uid)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen result->scores_sorted = false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen last_uid = uid;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen score = array_append_space(&result->scores);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen score->uid = uid;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen score->score = hits->score(i);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
20b136f04257b0ba338e49f31a999c0d4b243647Timo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen _CLDELETE(hits);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return ret;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen } catch (CLuceneError &err) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen lucene_handle_error(index, err, "search");
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen return -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenint lucene_index_lookup(struct lucene_index *index,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen struct mail_search_arg *args,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen enum fts_lookup_flags flags,
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen struct fts_result *result)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen struct mail_search_arg *arg;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (lucene_index_open_search(index) <= 0)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen ARRAY_TYPE(lucene_query) def_queries;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen t_array_init(&def_queries, 16);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen bool have_definites = false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen for (arg = args; arg != NULL; arg = arg->next) {
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen if (lucene_add_definite_query(index, def_queries, arg, flags)) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen arg->match_always = true;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen have_definites = true;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (have_definites) {
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen ARRAY_TYPE(seq_range) *uids_arr =
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen (flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ?
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen &result->definite_uids : &result->maybe_uids;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen if (lucene_index_search(index, def_queries, result,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen uids_arr) < 0)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
acc72c40c5bfe818013e0ae9c9e73eb90ae8fbb1Timo Sirainen if (have_definites) {
acc72c40c5bfe818013e0ae9c9e73eb90ae8fbb1Timo Sirainen /* FIXME: mixing up definite + maybe queries is broken. if the
acc72c40c5bfe818013e0ae9c9e73eb90ae8fbb1Timo Sirainen definite query matched, it'll just assume that the maybe
acc72c40c5bfe818013e0ae9c9e73eb90ae8fbb1Timo Sirainen queries matched as well */
acc72c40c5bfe818013e0ae9c9e73eb90ae8fbb1Timo Sirainen return 0;
acc72c40c5bfe818013e0ae9c9e73eb90ae8fbb1Timo Sirainen }
acc72c40c5bfe818013e0ae9c9e73eb90ae8fbb1Timo Sirainen
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen ARRAY_TYPE(lucene_query) maybe_queries;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen t_array_init(&maybe_queries, 16);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen bool have_maybies = false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen for (arg = args; arg != NULL; arg = arg->next) {
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen if (lucene_add_maybe_query(index, maybe_queries, arg, flags)) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen arg->match_always = true;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen have_maybies = true;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (have_maybies) {
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen if (lucene_index_search(index, maybe_queries, NULL,
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen &result->maybe_uids) < 0)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return 0;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenstatic int
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainenlucene_index_search_multi(struct lucene_index *index,
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen HASH_TABLE_TYPE(wguid_result) guids,
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen ARRAY_TYPE(lucene_query) &queries,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen enum fts_lookup_flags flags,
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen struct fts_multi_result *result)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen struct fts_score_map *score;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen int ret = 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen BooleanQuery query;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen search_query_add(query, queries);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen BooleanQuery mailbox_query;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen struct hash_iterate_context *iter;
a7b0916217f8ebb1da55e049f054e047f81bb911Timo Sirainen void *key, *value;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen iter = hash_table_iterate_init(guids);
a75d470c9223a75801418fcdda258885c36317e0Timo Sirainen while (hash_table_iterate(iter, guids, &key, &value)) {
a7b0916217f8ebb1da55e049f054e047f81bb911Timo Sirainen Term *term = _CLNEW Term(_T("box"), (wchar_t *)key);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen TermQuery *q = _CLNEW TermQuery(term);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen mailbox_query.add(q, true, BooleanClause::SHOULD);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen hash_table_iterate_deinit(&iter);
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen query.add(&mailbox_query, BooleanClause::MUST);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen try {
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen Hits *hits = index->searcher->search(&query);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen for (size_t i = 0; i < hits->length(); i++) {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen uint32_t uid;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen Field *field = hits->doc(i).getField(_T("box"));
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen const TCHAR *box_guid = field == NULL ? NULL : field->stringValue();
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (box_guid == NULL) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen i_error("lucene: Corrupted FTS index %s: No mailbox for document",
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen index->path);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen ret = -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen break;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen struct fts_result *br =
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen hash_table_lookup(guids, box_guid);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (br == NULL) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen i_warning("lucene: Returned unexpected mailbox with GUID %ls", box_guid);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen continue;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen if (lucene_doc_get_uid(index, &hits->doc(i),
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen &uid) < 0) {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen ret = -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen break;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen ARRAY_TYPE(seq_range) *uids_arr =
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen (flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ?
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen &br->maybe_uids : &br->definite_uids;
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen if (!array_is_created(uids_arr)) {
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen p_array_init(uids_arr, result->pool, 32);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen p_array_init(&br->scores, result->pool, 32);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen if (seq_range_array_add(uids_arr, uid)) {
49c848ccaab090b06add472122a1a7ebfaaf6044Timo Sirainen /* duplicate result */
49c848ccaab090b06add472122a1a7ebfaaf6044Timo Sirainen } else {
49c848ccaab090b06add472122a1a7ebfaaf6044Timo Sirainen score = array_append_space(&br->scores);
49c848ccaab090b06add472122a1a7ebfaaf6044Timo Sirainen score->uid = uid;
49c848ccaab090b06add472122a1a7ebfaaf6044Timo Sirainen score->score = hits->score(i);
49c848ccaab090b06add472122a1a7ebfaaf6044Timo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen _CLDELETE(hits);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return ret;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen } catch (CLuceneError &err) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen lucene_handle_error(index, err, "multi search");
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return -1;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen}
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainenint lucene_index_lookup_multi(struct lucene_index *index,
678d0463849ba777106eb7875f27db07a5d8e3dfTimo Sirainen HASH_TABLE_TYPE(wguid_result) guids,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen struct mail_search_arg *args,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen enum fts_lookup_flags flags,
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen struct fts_multi_result *result)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen{
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen struct mail_search_arg *arg;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (lucene_index_open_search(index) <= 0)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen ARRAY_TYPE(lucene_query) def_queries;
bd49b2e35dfa08753e89bef12a694978599d0fc0Timo Sirainen t_array_init(&def_queries, 16);
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen bool have_definites = false;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen for (arg = args; arg != NULL; arg = arg->next) {
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen if (lucene_add_definite_query(index, def_queries, arg, flags)) {
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen arg->match_always = true;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen have_definites = true;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen if (have_definites) {
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen if (lucene_index_search_multi(index, guids, def_queries, flags,
117fb8c00336dc54bab9cfa547249df7a4970611Timo Sirainen result) < 0)
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return -1;
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen }
8d587838c414c48a331f0b54cd7ffd97e5024abdTimo Sirainen return 0;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen}
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainenstruct lucene_index_iter {
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen struct lucene_index *index;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen struct lucene_index_record rec;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen Term *term;
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen WildcardQuery *query;
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen Sort *sort;
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen Hits *hits;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen size_t i;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen bool failed;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen};
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainenstruct lucene_index_iter *
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainenlucene_index_iter_init(struct lucene_index *index)
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen{
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen static const TCHAR *sort_fields[] = { _T("box"), _T("uid"), NULL };
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen struct lucene_index_iter *iter;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen int ret;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen iter = i_new(struct lucene_index_iter, 1);
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen iter->index = index;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen if ((ret = lucene_index_open_search(index)) <= 0) {
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen if (ret < 0)
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen iter->failed = true;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen return iter;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen }
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen iter->term = _CLNEW Term(_T("box"), _T("*"));
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen iter->query = _CLNEW WildcardQuery(iter->term);
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen iter->sort = _CLNEW Sort(sort_fields);
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen try {
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen iter->hits = index->searcher->search(iter->query, iter->sort);
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen } catch (CLuceneError &err) {
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen lucene_handle_error(index, err, "rescan search");
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen iter->failed = true;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen }
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen return iter;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen}
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainenconst struct lucene_index_record *
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainenlucene_index_iter_next(struct lucene_index_iter *iter)
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen{
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen if (iter->hits == NULL)
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen return NULL;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen if (iter->i == iter->hits->length())
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen return NULL;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen Document *doc = &iter->hits->doc(iter->i);
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen iter->i++;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen memset(&iter->rec, 0, sizeof(iter->rec));
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen (void)fts_lucene_get_mailbox_guid(iter->index, doc,
de62ce819d59a529530da4b57be1b8d6dad13d6bTimo Sirainen iter->rec.mailbox_guid);
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen (void)lucene_doc_get_uid(iter->index, doc, &iter->rec.uid);
57b523eeb99ed5d7f5002907a409cdef54353ce5Timo Sirainen iter->rec.part_num = lucene_doc_get_part(iter->index, doc);
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen return &iter->rec;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen}
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainenint lucene_index_iter_deinit(struct lucene_index_iter **_iter)
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen{
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen struct lucene_index_iter *iter = *_iter;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen int ret = iter->failed ? -1 : 0;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen *_iter = NULL;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen if (iter->hits != NULL)
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen _CLDELETE(iter->hits);
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen if (iter->query != NULL) {
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen _CLDELETE(iter->query);
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen _CLDELETE(iter->sort);
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen _CLDELETE(iter->term);
def291c1ccc82f439541ea7b49652a1466a999fcTimo Sirainen }
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen i_free(iter);
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen return ret;
39ed514f9d401b3cb589595c6a2f532050254d77Timo Sirainen}
64f30df0bee5218c9a69915e796d9d1376cfbf29Timo Sirainen
64f30df0bee5218c9a69915e796d9d1376cfbf29Timo Sirainenvoid lucene_shutdown(void)
64f30df0bee5218c9a69915e796d9d1376cfbf29Timo Sirainen{
64f30df0bee5218c9a69915e796d9d1376cfbf29Timo Sirainen _lucene_shutdown();
64f30df0bee5218c9a69915e796d9d1376cfbf29Timo Sirainen}