lucene-wrapper.cc revision 30003461d9c5b090384b252260916cf50954d943
02c335c23bf5fa225a467c19f2c063fb0dc7b8c3Timo Sirainen/* Copyright (c) 2006-2010 Dovecot authors, see the included COPYING file */
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainenextern "C" {
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen#include "lib.h"
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen#include "array.h"
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen#include "env-util.h"
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen#include "unichar.h"
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen#include "hash.h"
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen#include "str.h"
bad5981f287ff1e4094428e27178062548215a93Timo Sirainen#include "strescape.h"
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen#include "mail-search.h"
9edba36ef9b2679b0585c345074b1f1d482bfd20Timo Sirainen#include "lucene-wrapper.h"
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen#include <dirent.h>
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen#include <sys/stat.h>
9edba36ef9b2679b0585c345074b1f1d482bfd20Timo Sirainen};
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen#include <CLucene.h>
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen#include <CLucene/util/CLStreams.h>
0dffa25d211be541ee3c953b23566a1a990789dfTimo Sirainen#include <CLucene/search/MultiPhraseQuery.h>
0dffa25d211be541ee3c953b23566a1a990789dfTimo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen/* Lucene's default is 10000. Use it here also.. */
0248b1c21bed383128c0d20ff11325a2f59d0410Phil Carmody#define MAX_TERMS_PER_DOCUMENT 10000
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen#define LUCENE_LOCK_OVERRIDE_SECS 60
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
9edba36ef9b2679b0585c345074b1f1d482bfd20Timo Sirainenusing namespace lucene::document;
9edba36ef9b2679b0585c345074b1f1d482bfd20Timo Sirainenusing namespace lucene::index;
9edba36ef9b2679b0585c345074b1f1d482bfd20Timo Sirainenusing namespace lucene::search;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainenusing namespace lucene::queryParser;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainenusing namespace lucene::analysis;
9edba36ef9b2679b0585c345074b1f1d482bfd20Timo Sirainenusing namespace lucene::analysis;
fc7b32b6a2a65d604c8070b9b1a204f25c90b391Timo Sirainenusing namespace lucene::util;
50ae8852cb28b11b9589a4ed5f2b54b10b1ab591Timo Sirainen
50ae8852cb28b11b9589a4ed5f2b54b10b1ab591Timo Sirainenstruct lucene_index {
50ae8852cb28b11b9589a4ed5f2b54b10b1ab591Timo Sirainen char *path;
50ae8852cb28b11b9589a4ed5f2b54b10b1ab591Timo Sirainen wchar_t mailbox_guid[MAILBOX_GUID_HEX_LENGTH + 1];
9edba36ef9b2679b0585c345074b1f1d482bfd20Timo Sirainen
01af88dfbb7a022ddb3ab9fb4159f2a4a204ead3Timo Sirainen IndexReader *reader;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen IndexWriter *writer;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen IndexSearcher *searcher;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen Analyzer *analyzer;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen Document *doc;
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen uint32_t prev_uid;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen};
8e4a702a1f96f118976da6eb9ece344df625eabbTimo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainenstruct lucene_index *lucene_index_init(const char *path)
9edba36ef9b2679b0585c345074b1f1d482bfd20Timo Sirainen{
c9bf63e9094761767a63ac6b189bcf60bcffdc44Timo Sirainen struct lucene_index *index;
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen index = i_new(struct lucene_index, 1);
9edba36ef9b2679b0585c345074b1f1d482bfd20Timo Sirainen index->path = i_strdup(path);
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen index->analyzer = _CLNEW standard::StandardAnalyzer();
9edba36ef9b2679b0585c345074b1f1d482bfd20Timo Sirainen return index;
f874c9c2a43220d600b90456696246bf77981cd1Timo Sirainen}
f874c9c2a43220d600b90456696246bf77981cd1Timo Sirainen
f874c9c2a43220d600b90456696246bf77981cd1Timo Sirainenstatic void lucene_index_close(struct lucene_index *index)
f874c9c2a43220d600b90456696246bf77981cd1Timo Sirainen{
f874c9c2a43220d600b90456696246bf77981cd1Timo Sirainen _CLDELETE(index->reader);
f874c9c2a43220d600b90456696246bf77981cd1Timo Sirainen _CLDELETE(index->writer);
8e4a702a1f96f118976da6eb9ece344df625eabbTimo Sirainen _CLDELETE(index->searcher);
8e4a702a1f96f118976da6eb9ece344df625eabbTimo Sirainen}
8e4a702a1f96f118976da6eb9ece344df625eabbTimo Sirainen
8e4a702a1f96f118976da6eb9ece344df625eabbTimo Sirainenvoid lucene_index_deinit(struct lucene_index *index)
8e4a702a1f96f118976da6eb9ece344df625eabbTimo Sirainen{
8e4a702a1f96f118976da6eb9ece344df625eabbTimo Sirainen lucene_index_close(index);
8e4a702a1f96f118976da6eb9ece344df625eabbTimo Sirainen _CLDELETE(index->analyzer);
c9bf63e9094761767a63ac6b189bcf60bcffdc44Timo Sirainen i_free(index->path);
b3286cc6a70ab6b4c53301aa075b16898b80c880Timo Sirainen i_free(index);
b3286cc6a70ab6b4c53301aa075b16898b80c880Timo Sirainen}
96bd662dd0b11b4be42ebfa762f7ca328f37074aTimo Sirainen
4940e43005d04208fe957c8e5a359f9a53a23d1fTimo Sirainenstatic void
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainenlucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize,
156a7c9057782ea8d805c4223082e1dd6041ef21Timo Sirainen wchar_t *dest, size_t destsize)
3bbe99d30871f49610aac0417ee5951d1e740b98Timo Sirainen{
8e4a702a1f96f118976da6eb9ece344df625eabbTimo Sirainen ARRAY_TYPE(unichars) dest_arr;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen buffer_t buf = { 0, 0, { 0, 0, 0, 0, 0 } };
952f5f308ab79cd46ccd79b09db3215b4237d7d4Timo Sirainen
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen i_assert(sizeof(wchar_t) == sizeof(unichar_t));
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen buffer_create_data(&buf, dest, sizeof(wchar_t) * destsize);
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen array_create_from_buffer(&dest_arr, &buf, sizeof(wchar_t));
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen if (uni_utf8_to_ucs4_n(src, srcsize, &dest_arr) < 0)
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen i_unreached();
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen i_assert(array_count(&dest_arr)+1 == destsize);
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen dest[destsize-1] = 0;
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen}
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainenstatic const wchar_t *t_lucene_utf8_to_tchar(const char *str)
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen{
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen ARRAY_TYPE(unichars) dest_arr;
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen const unichar_t *ret;
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen i_assert(sizeof(wchar_t) == sizeof(unichar_t));
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen t_array_init(&dest_arr, strlen(str) + 1);
b72b92d12bde9ca5f325a232babd74c6babef42fTimo Sirainen if (uni_utf8_to_ucs4(str, &dest_arr) < 0)
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen i_unreached();
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen (void)array_append_space(&dest_arr);
0a568c1e8a8066ce9d6467d891a9717bd2a24b26Phil Carmody ret = array_idx(&dest_arr, 0);
3281669db44d09a087a203201248abbc81b3cc1aTimo Sirainen return (const wchar_t *)ret;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen}
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainenvoid lucene_index_select_mailbox(struct lucene_index *index,
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainen const wchar_t guid[MAILBOX_GUID_HEX_LENGTH])
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainen{
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainen memcpy(index->mailbox_guid, guid,
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainen MAILBOX_GUID_HEX_LENGTH * sizeof(wchar_t));
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen index->mailbox_guid[MAILBOX_GUID_HEX_LENGTH] = '\0';
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen}
49d9165e32509d6fd8fe57f65a52d41343558e9aTimo Sirainen
49d9165e32509d6fd8fe57f65a52d41343558e9aTimo Sirainenvoid lucene_index_unselect_mailbox(struct lucene_index *index)
49d9165e32509d6fd8fe57f65a52d41343558e9aTimo Sirainen{
49d9165e32509d6fd8fe57f65a52d41343558e9aTimo Sirainen memset(index->mailbox_guid, 0, sizeof(index->mailbox_guid));
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen}
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
0a568c1e8a8066ce9d6467d891a9717bd2a24b26Phil Carmodystatic void lucene_handle_error(struct lucene_index *index, CLuceneError &err,
3281669db44d09a087a203201248abbc81b3cc1aTimo Sirainen const char *msg)
3281669db44d09a087a203201248abbc81b3cc1aTimo Sirainen{
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen const char *what = err.what();
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen i_error("lucene index %s: %s failed: %s", index->path, msg, what);
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainen}
bdf7a0f43f555483fe6ef47fcaab4f196f3b67b4Timo Sirainen
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainenstatic int lucene_index_open(struct lucene_index *index)
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainen{
bdf7a0f43f555483fe6ef47fcaab4f196f3b67b4Timo Sirainen if (index->reader != NULL)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen return 1;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
bdf7a0f43f555483fe6ef47fcaab4f196f3b67b4Timo Sirainen if (!IndexReader::indexExists(index->path))
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen return 0;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen try {
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen index->reader = IndexReader::open(index->path);
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen } catch (CLuceneError &err) {
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen lucene_handle_error(index, err, "IndexReader::open()");
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen return -1;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen }
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen return 1;
09539f3db2f1b3e24f40844d8456b06d318d0fe7Timo Sirainen}
09539f3db2f1b3e24f40844d8456b06d318d0fe7Timo Sirainen
09539f3db2f1b3e24f40844d8456b06d318d0fe7Timo Sirainenstatic int lucene_index_open_search(struct lucene_index *index)
09539f3db2f1b3e24f40844d8456b06d318d0fe7Timo Sirainen{
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen int ret;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen if (index->searcher != NULL)
0f66f12eb4cdbf47670975044c88d8f388bf92dfTimo Sirainen return 1;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen if ((ret = lucene_index_open(index)) <= 0)
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen return ret;
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen index->searcher = _CLNEW IndexSearcher(index->reader);
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen return 1;
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainen}
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainenstatic int
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainenlucene_doc_get_uid(struct lucene_index *index, Document *doc,
0f66f12eb4cdbf47670975044c88d8f388bf92dfTimo Sirainen const TCHAR *field_name, uint32_t *uid_r)
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen{
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen Field *field = doc->getField(field_name);
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen const TCHAR *uid = field == NULL ? NULL : field->stringValue();
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen if (uid == NULL) {
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen i_error("lucene: Corrupted FTS index %s: No UID for document",
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen index->path);
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen return -1;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen }
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen uint32_t num = 0;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen while (*uid != 0) {
68a4946b12583b88fa802e52ebee45cd96056772Timo Sirainen num = num*10 + (*uid - '0');
1345157bfafcc329b237e55a35db8c2ad368a42aTimo Sirainen uid++;
68a4946b12583b88fa802e52ebee45cd96056772Timo Sirainen }
1345157bfafcc329b237e55a35db8c2ad368a42aTimo Sirainen *uid_r = num;
1345157bfafcc329b237e55a35db8c2ad368a42aTimo Sirainen return 0;
1345157bfafcc329b237e55a35db8c2ad368a42aTimo Sirainen}
1345157bfafcc329b237e55a35db8c2ad368a42aTimo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainenint lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen{
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen int ret = 0;
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen *last_uid_r = 0;
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen if ((ret = lucene_index_open_search(index)) <= 0)
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen return ret;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen Term mailbox_term(_T("box"), index->mailbox_guid);
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen TermQuery query(&mailbox_term);
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen uint32_t last_uid = 0;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen try {
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen Hits *hits = index->searcher->search(&query);
2ca64e9cb20ca517a5a0d4ba3b73dccb2a0dd973Timo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen for (size_t i = 0; i < hits->length(); i++) {
2ca64e9cb20ca517a5a0d4ba3b73dccb2a0dd973Timo Sirainen uint32_t uid;
2ca64e9cb20ca517a5a0d4ba3b73dccb2a0dd973Timo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen if (lucene_doc_get_uid(index, &hits->doc(i),
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen _T("uid"), &uid) < 0) {
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen ret = -1;
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen break;
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen }
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen
06eee07a834e38c2a95603016f9405eb7ac5857dTimo Sirainen if (uid > last_uid)
e9219523bf8c03a1230a3a2710a2a291dc3a82a8Timo Sirainen last_uid = uid;
e9219523bf8c03a1230a3a2710a2a291dc3a82a8Timo Sirainen }
e9219523bf8c03a1230a3a2710a2a291dc3a82a8Timo Sirainen _CLDELETE(hits);
e9219523bf8c03a1230a3a2710a2a291dc3a82a8Timo Sirainen } catch (CLuceneError &err) {
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen lucene_handle_error(index, err, "last_uid search");
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen ret = -1;
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen }
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen *last_uid_r = last_uid;
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen return ret;
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen}
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainenint lucene_index_get_doc_count(struct lucene_index *index, uint32_t *count_r)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen{
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen int ret;
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen if (index->reader == NULL) {
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen lucene_index_close(index);
fc7b32b6a2a65d604c8070b9b1a204f25c90b391Timo Sirainen if ((ret = lucene_index_open(index)) < 0)
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen return -1;
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen if (ret == 0) {
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen *count_r = 0;
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen return 0;
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen }
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen }
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen return index->reader->numDocs();
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen}
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainenint lucene_index_build_init(struct lucene_index *index)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen{
0138d3060877805f0de0bf631642de100ff96b79Timo Sirainen const char *lock_path;
0138d3060877805f0de0bf631642de100ff96b79Timo Sirainen struct stat st;
0138d3060877805f0de0bf631642de100ff96b79Timo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen lucene_index_close(index);
0138d3060877805f0de0bf631642de100ff96b79Timo Sirainen
0138d3060877805f0de0bf631642de100ff96b79Timo Sirainen lock_path = t_strdup_printf("%s/write.lock", index->path);
0138d3060877805f0de0bf631642de100ff96b79Timo Sirainen if (stat(lock_path, &st) == 0 &&
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen st.st_mtime < time(NULL) - LUCENE_LOCK_OVERRIDE_SECS) {
0138d3060877805f0de0bf631642de100ff96b79Timo Sirainen if (unlink(lock_path) < 0)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen i_error("unlink(%s) failed: %m");
0138d3060877805f0de0bf631642de100ff96b79Timo Sirainen }
0138d3060877805f0de0bf631642de100ff96b79Timo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen bool exists = IndexReader::indexExists(index->path);
a05b31e6bb304142baf496e80072aa524e2dae3eTimo Sirainen try {
a05b31e6bb304142baf496e80072aa524e2dae3eTimo Sirainen index->writer = _CLNEW IndexWriter(index->path,
a05b31e6bb304142baf496e80072aa524e2dae3eTimo Sirainen index->analyzer, !exists);
a05b31e6bb304142baf496e80072aa524e2dae3eTimo Sirainen } catch (CLuceneError &err) {
06eee07a834e38c2a95603016f9405eb7ac5857dTimo Sirainen lucene_handle_error(index, err, "IndexWriter()");
e9219523bf8c03a1230a3a2710a2a291dc3a82a8Timo Sirainen return -1;
e9219523bf8c03a1230a3a2710a2a291dc3a82a8Timo Sirainen }
e9219523bf8c03a1230a3a2710a2a291dc3a82a8Timo Sirainen index->writer->setMaxFieldLength(MAX_TERMS_PER_DOCUMENT);
e9219523bf8c03a1230a3a2710a2a291dc3a82a8Timo Sirainen return 0;
a05b31e6bb304142baf496e80072aa524e2dae3eTimo Sirainen}
a05b31e6bb304142baf496e80072aa524e2dae3eTimo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainenstatic int lucene_index_build_flush(struct lucene_index *index)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen{
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen int ret = 0;
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen
8d6cb44a0161d88743756733f83c4fb278485987Timo Sirainen if (index->doc == NULL)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen return 0;
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen try {
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen index->writer->addDocument(index->doc);
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen } catch (CLuceneError &err) {
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen lucene_handle_error(index, err, "IndexWriter::addDocument()");
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen ret = -1;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen }
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen _CLDELETE(index->doc);
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen index->doc = NULL;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen return ret;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen}
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainenint lucene_index_build_more(struct lucene_index *index, uint32_t uid,
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen const unsigned char *data, size_t size,
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen const char *hdr_name)
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen{
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen wchar_t id[MAX_INT_STRLEN];
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen size_t namesize, datasize;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen if (uid != index->prev_uid) {
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen if (lucene_index_build_flush(index) < 0)
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen return -1;
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen index->prev_uid = uid;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen index->doc = _CLNEW Document();
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen swprintf(id, N_ELEMENTS(id), L"%u", uid);
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen index->doc->add(*_CLNEW Field(_T("uid"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen index->doc->add(*_CLNEW Field(_T("box"), index->mailbox_guid, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen }
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
38499bb33c74acc6d725204e893cfc02a5890ec7Timo Sirainen datasize = uni_utf8_strlen_n(data, size) + 1;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen wchar_t dest[datasize];
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen lucene_utf8_n_to_tchar(data, size, dest, datasize);
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen if (hdr_name != NULL) {
8d80659e504ffb34bb0c6a633184fece35751b18Timo Sirainen /* hdr_name should be ASCII, but don't break in case it isn't */
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen namesize = uni_utf8_strlen(hdr_name) + 1;
8d6cb44a0161d88743756733f83c4fb278485987Timo Sirainen wchar_t wname[namesize];
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen lucene_utf8_n_to_tchar((const unsigned char *)hdr_name,
bbe0ee356dc610a8d054b336534d8f33c49a36b7Timo Sirainen strlen(hdr_name), wname, namesize);
bbe0ee356dc610a8d054b336534d8f33c49a36b7Timo Sirainen index->doc->add(*_CLNEW Field(_T("hdr"), wname, Field::STORE_NO | Field::INDEX_UNTOKENIZED));
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen index->doc->add(*_CLNEW Field(_T("hdr"), dest, Field::STORE_NO | Field::INDEX_TOKENIZED));
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen if (fts_header_want_indexed(hdr_name))
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen index->doc->add(*_CLNEW Field(wname, dest, Field::STORE_NO | Field::INDEX_TOKENIZED));
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen } else if (size > 0) {
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen index->doc->add(*_CLNEW Field(_T("body"), dest, Field::STORE_NO | Field::INDEX_TOKENIZED));
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen }
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen return 0;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen}
f874c9c2a43220d600b90456696246bf77981cd1Timo Sirainen
40f114f2e12605483b21e7cf048b7c9bd9568148Timo Sirainenint lucene_index_build_deinit(struct lucene_index *index)
40f114f2e12605483b21e7cf048b7c9bd9568148Timo Sirainen{
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen int ret = 0;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen if (index->prev_uid == 0) {
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen /* no changes. */
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen return 0;
8d6cb44a0161d88743756733f83c4fb278485987Timo Sirainen }
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen index->prev_uid = 0;
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen if (index->writer == NULL) {
02af13e6a41b9c3e6ad1e0b692b5b9741142c221Timo Sirainen lucene_index_close(index);
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen return -1;
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen }
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen if (lucene_index_build_flush(index) < 0)
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen ret = -1;
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen try {
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen index->writer->optimize();
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen } catch (CLuceneError &err) {
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen lucene_handle_error(index, err, "IndexWriter::optimize()");
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen ret = -1;
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen }
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen try {
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen index->writer->close();
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen } catch (CLuceneError &err) {
02af13e6a41b9c3e6ad1e0b692b5b9741142c221Timo Sirainen lucene_handle_error(index, err, "IndexWriter::close()");
02af13e6a41b9c3e6ad1e0b692b5b9741142c221Timo Sirainen ret = -1;
02af13e6a41b9c3e6ad1e0b692b5b9741142c221Timo Sirainen }
02af13e6a41b9c3e6ad1e0b692b5b9741142c221Timo Sirainen
02af13e6a41b9c3e6ad1e0b692b5b9741142c221Timo Sirainen lucene_index_close(index);
02af13e6a41b9c3e6ad1e0b692b5b9741142c221Timo Sirainen return ret;
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen}
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainenstruct uid_id_map {
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen uint32_t imap_uid;
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen int32_t lucene_id;
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen};
bfdef55e14ade87589bd10a6a1dafce53427ecf4Timo SirainenARRAY_DEFINE_TYPE(uid_id_map, struct uid_id_map);
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainenstatic int uid_id_map_cmp(const struct uid_id_map *u1,
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen const struct uid_id_map *u2)
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen{
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen if (u1->imap_uid < u2->imap_uid)
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen return -1;
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainen if (u1->imap_uid > u2->imap_uid)
return 1;
return 0;
}
static int get_mailbox_uid_id_map(struct lucene_index *index,
ARRAY_TYPE(uid_id_map) *uid_id_map)
{
int ret = 0;
/* get a sorted map of imap uid -> lucene id */
Term mailbox_term(_T("box"), index->mailbox_guid);
TermQuery query(&mailbox_term);
try {
Hits *hits = index->searcher->search(&query);
for (size_t i = 0; i < hits->length(); i++) {
uint32_t uid;
if (lucene_doc_get_uid(index, &hits->doc(i),
_T("uid"), &uid) < 0) {
ret = -1;
break;
}
struct uid_id_map *ui = array_append_space(uid_id_map);
ui->imap_uid = uid;
ui->lucene_id = hits->id(i);
}
_CLDELETE(hits);
} catch (CLuceneError &err) {
lucene_handle_error(index, err, "expunge search");
ret = -1;
}
array_sort(uid_id_map, uid_id_map_cmp);
return ret;
}
int lucene_index_optimize_scan(struct lucene_index *index,
const ARRAY_TYPE(seq_range) *existing_uids,
ARRAY_TYPE(seq_range) *missing_uids_r)
{
ARRAY_TYPE(uid_id_map) uid_id_map_arr;
const struct uid_id_map *uid_id_map;
struct seq_range_iter iter;
unsigned int n, i, count;
uint32_t uid;
int ret;
if ((ret = lucene_index_open_search(index)) <= 0) {
if (ret < 0)
return -1;
/* index has been deleted, everything is missing */
seq_range_array_merge(missing_uids_r, existing_uids);
return 0;
}
i_array_init(&uid_id_map_arr, 128);
if (get_mailbox_uid_id_map(index, &uid_id_map_arr) < 0)
return -1;
uid_id_map = array_get(&uid_id_map_arr, &count);
seq_range_array_iter_init(&iter, existing_uids); n = i = 0;
while (seq_range_array_iter_nth(&iter, n++, &uid)) {
while (i < count && uid_id_map[i].imap_uid < uid) {
/* expunged message */
index->reader->deleteDocument(uid_id_map[i].lucene_id);
i++;
}
if (i == count || uid_id_map[i].imap_uid > uid) {
/* uid is missing from index */
seq_range_array_add(missing_uids_r, 0, uid);
} else {
i++;
}
}
for (; i < count; i++)
index->reader->deleteDocument(uid_id_map[i].lucene_id);
array_free(&uid_id_map_arr);
return ret;
}
int lucene_index_optimize_finish(struct lucene_index *index)
{
int ret = 0;
if (IndexReader::isLocked(index->path))
IndexReader::unlock(index->path);
IndexWriter *writer =
_CLNEW IndexWriter(index->path, index->analyzer, false);
try {
writer->optimize();
} catch (CLuceneError &err) {
lucene_handle_error(index, err, "IndexWriter::optimize()");
ret = -1;
}
_CLDELETE(writer);
return ret;
}
// Mostly copy&pasted from CLucene's QueryParser
static Query* getFieldQuery(Analyzer *analyzer, const TCHAR* _field, const TCHAR* queryText, bool fuzzy) {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
StringReader reader(queryText);
TokenStream* source = analyzer->tokenStream(_field, &reader);
CLVector<CL_NS(analysis)::Token*, Deletor::Object<CL_NS(analysis)::Token> > v;
CL_NS(analysis)::Token* t = NULL;
int32_t positionCount = 0;
bool severalTokensAtSamePosition = false;
while (true) {
t = _CLNEW Token();
try {
Token* _t = source->next(t);
if (_t == NULL) _CLDELETE(t);
}_CLCATCH_ERR(CL_ERR_IO, _CLLDELETE(source);_CLLDELETE(t);,{
t = NULL;
});
if (t == NULL)
break;
v.push_back(t);
if (t->getPositionIncrement() != 0)
positionCount += t->getPositionIncrement();
else
severalTokensAtSamePosition = true;
}
try {
source->close();
}
_CLCATCH_ERR_CLEANUP(CL_ERR_IO, {_CLLDELETE(source);_CLLDELETE(t);} ); /* cleanup */
_CLLDELETE(source);
if (v.size() == 0)
return NULL;
else if (v.size() == 1) {
Term* tm = _CLNEW Term(_field, v.at(0)->termBuffer());
Query* ret;
if (fuzzy)
ret = _CLNEW FuzzyQuery( tm );
else
ret = _CLNEW PrefixQuery( tm );
_CLDECDELETE(tm);
return ret;
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
BooleanQuery* q = _CLNEW BooleanQuery(true);
for(size_t i=0; i<v.size(); i++ ){
Term* tm = _CLNEW Term(_field, v.at(i)->termBuffer());
q->add(_CLNEW TermQuery(tm), true, BooleanClause::SHOULD);
_CLDECDELETE(tm);
}
return q;
}else {
MultiPhraseQuery* mpq = _CLNEW MultiPhraseQuery();
CLArrayList<Term*> multiTerms;
int32_t position = -1;
for (size_t i = 0; i < v.size(); i++) {
t = v.at(i);
if (t->getPositionIncrement() > 0 && multiTerms.size() > 0) {
ValueArray<Term*> termsArray(multiTerms.size());
multiTerms.toArray(termsArray.values);
mpq->add(&termsArray,position);
multiTerms.clear();
}
position += t->getPositionIncrement();
multiTerms.push_back(_CLNEW Term(_field, t->termBuffer()));
}
ValueArray<Term*> termsArray(multiTerms.size());
multiTerms.toArray(termsArray.values);
mpq->add(&termsArray,position);
return mpq;
}
}else {
PhraseQuery* pq = _CLNEW PhraseQuery();
int32_t position = -1;
for (size_t i = 0; i < v.size(); i++) {
t = v.at(i);
Term* tm = _CLNEW Term(_field, t->termBuffer());
position += t->getPositionIncrement();
pq->add(tm,position);
_CLDECDELETE(tm);
}
return pq;
}
}
}
static Query *
lucene_get_query(struct lucene_index *index,
const TCHAR *key, const struct mail_search_arg *arg)
{
const TCHAR *wvalue = t_lucene_utf8_to_tchar(arg->value.str);
return getFieldQuery(index->analyzer, key, wvalue, arg->fuzzy);
}
static bool
lucene_add_definite_query(struct lucene_index *index, BooleanQuery &query,
struct mail_search_arg *arg, bool and_args)
{
Query *q;
if (arg->match_not && !and_args) {
/* FIXME: we could handle this by doing multiple queries.. */
return false;
}
switch (arg->type) {
case SEARCH_TEXT: {
BooleanQuery *bq = _CLNEW BooleanQuery();
Query *q1 = lucene_get_query(index, _T("hdr"), arg);
Query *q2 = lucene_get_query(index, _T("body"), arg);
bq->add(q1, true, BooleanClause::SHOULD);
bq->add(q2, true, BooleanClause::SHOULD);
q = bq;
break;
}
case SEARCH_BODY:
q = lucene_get_query(index, _T("body"), arg);
break;
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
case SEARCH_HEADER_COMPRESS_LWSP:
if (!fts_header_want_indexed(arg->hdr_field_name))
return false;
q = lucene_get_query(index,
t_lucene_utf8_to_tchar(arg->hdr_field_name),
arg);
break;
default:
return false;
}
if (!and_args)
query.add(q, true, BooleanClause::SHOULD);
else if (!arg->match_not)
query.add(q, true, BooleanClause::MUST);
else
query.add(q, true, BooleanClause::MUST_NOT);
return true;
}
static bool
lucene_add_maybe_query(struct lucene_index *index, BooleanQuery &query,
struct mail_search_arg *arg, bool and_args)
{
Query *q;
if (arg->match_not && !and_args) {
/* FIXME: we could handle this by doing multiple queries.. */
return false;
}
switch (arg->type) {
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
case SEARCH_HEADER_COMPRESS_LWSP:
if (fts_header_want_indexed(arg->hdr_field_name))
return false;
/* we can check if the search key exists in some header and
filter out the messages that have no chance of matching */
q = lucene_get_query(index, _T("hdr"), arg);
break;
default:
return false;
}
if (!and_args)
query.add(q, true, BooleanClause::SHOULD);
else if (!arg->match_not)
query.add(q, true, BooleanClause::MUST);
else
query.add(q, true, BooleanClause::MUST_NOT);
return true;
}
static int
lucene_index_search(struct lucene_index *index,
Query &search_query, struct fts_result *result,
ARRAY_TYPE(seq_range) *uids_r)
{
struct fts_score_map *score;
int ret = 0;
BooleanQuery query;
query.add(&search_query, BooleanClause::MUST);
Term mailbox_term(_T("box"), index->mailbox_guid);
TermQuery mailbox_query(&mailbox_term);
query.add(&mailbox_query, BooleanClause::MUST);
try {
Hits *hits = index->searcher->search(&query);
uint32_t last_uid = 0;
if (result != NULL)
result->scores_sorted = true;
for (size_t i = 0; i < hits->length(); i++) {
uint32_t uid;
if (lucene_doc_get_uid(index, &hits->doc(i),
_T("uid"), &uid) < 0) {
ret = -1;
break;
}
if (result != NULL) {
if (uid < last_uid)
result->scores_sorted = false;
last_uid = uid;
seq_range_array_add(uids_r, 0, uid);
score = array_append_space(&result->scores);
score->uid = uid;
score->score = hits->score(i);
}
}
_CLDELETE(hits);
return ret;
} catch (CLuceneError &err) {
lucene_handle_error(index, err, "search");
return -1;
}
}
int lucene_index_lookup(struct lucene_index *index,
struct mail_search_arg *args, bool and_args,
struct fts_result *result)
{
struct mail_search_arg *arg;
if (lucene_index_open_search(index) <= 0)
return -1;
BooleanQuery def_query;
bool have_definites = false;
for (arg = args; arg != NULL; arg = arg->next) {
if (lucene_add_definite_query(index, def_query, arg, and_args)) {
arg->match_always = true;
have_definites = true;
}
}
if (have_definites) {
if (lucene_index_search(index, def_query, result,
&result->definite_uids) < 0)
return -1;
}
BooleanQuery maybe_query;
bool have_maybies = false;
for (arg = args; arg != NULL; arg = arg->next) {
if (lucene_add_maybe_query(index, maybe_query, arg, and_args)) {
arg->match_always = true;
have_maybies = true;
}
}
if (have_maybies) {
if (lucene_index_search(index, maybe_query, NULL,
&result->maybe_uids) < 0)
return -1;
}
return 0;
}
static int
lucene_index_search_multi(struct lucene_index *index, struct hash_table *guids,
Query &search_query, struct fts_multi_result *result)
{
struct fts_score_map *score;
int ret = 0;
BooleanQuery query;
query.add(&search_query, BooleanClause::MUST);
BooleanQuery mailbox_query;
struct hash_iterate_context *iter;
void *key, *value;
iter = hash_table_iterate_init(guids);
while (hash_table_iterate(iter, &key, &value)) {
Term *term = _CLNEW Term(_T("box"), (wchar_t *)key);
TermQuery *q = _CLNEW TermQuery(term);
mailbox_query.add(q, true, BooleanClause::SHOULD);
}
hash_table_iterate_deinit(&iter);
query.add(&mailbox_query, BooleanClause::MUST);
try {
Hits *hits = index->searcher->search(&query);
for (size_t i = 0; i < hits->length(); i++) {
uint32_t uid;
Field *field = hits->doc(i).getField(_T("box"));
const TCHAR *box_guid = field == NULL ? NULL : field->stringValue();
if (box_guid == NULL) {
i_error("lucene: Corrupted FTS index %s: No mailbox for document",
index->path);
ret = -1;
break;
}
struct fts_result *br = (struct fts_result *)
hash_table_lookup(guids, (const void *)box_guid);
if (br == NULL) {
i_warning("lucene: Returned unexpected mailbox with GUID %ls", box_guid);
continue;
}
if (lucene_doc_get_uid(index, &hits->doc(i),
_T("uid"), &uid) < 0) {
ret = -1;
break;
}
if (!array_is_created(&br->definite_uids)) {
p_array_init(&br->definite_uids, result->pool, 32);
p_array_init(&br->scores, result->pool, 32);
}
seq_range_array_add(&br->definite_uids, 0, uid);
score = array_append_space(&br->scores);
score->uid = uid;
score->score = hits->score(i);
}
_CLDELETE(hits);
return ret;
} catch (CLuceneError &err) {
lucene_handle_error(index, err, "multi search");
return -1;
}
}
int lucene_index_lookup_multi(struct lucene_index *index,
struct hash_table *guids,
struct mail_search_arg *args, bool and_args,
struct fts_multi_result *result)
{
struct mail_search_arg *arg;
if (lucene_index_open_search(index) <= 0)
return -1;
BooleanQuery def_query;
bool have_definites = false;
for (arg = args; arg != NULL; arg = arg->next) {
if (lucene_add_definite_query(index, def_query, arg, and_args)) {
arg->match_always = true;
have_definites = true;
}
}
if (have_definites) {
if (lucene_index_search_multi(index, guids,
def_query, result) < 0)
return -1;
}
return 0;
}