lucene-wrapper.cc revision 30003461d9c5b090384b252260916cf50954d943
02c335c23bf5fa225a467c19f2c063fb0dc7b8c3Timo Sirainen/* Copyright (c) 2006-2010 Dovecot authors, see the included COPYING file */
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen/* Lucene's default is 10000. Use it here also.. */
50ae8852cb28b11b9589a4ed5f2b54b10b1ab591Timo Sirainen wchar_t mailbox_guid[MAILBOX_GUID_HEX_LENGTH + 1];
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainenstruct lucene_index *lucene_index_init(const char *path)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen index->analyzer = _CLNEW standard::StandardAnalyzer();
f874c9c2a43220d600b90456696246bf77981cd1Timo Sirainenstatic void lucene_index_close(struct lucene_index *index)
8e4a702a1f96f118976da6eb9ece344df625eabbTimo Sirainenvoid lucene_index_deinit(struct lucene_index *index)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainenlucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize,
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen i_assert(sizeof(wchar_t) == sizeof(unichar_t));
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen buffer_create_data(&buf, dest, sizeof(wchar_t) * destsize);
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen array_create_from_buffer(&dest_arr, &buf, sizeof(wchar_t));
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen if (uni_utf8_to_ucs4_n(src, srcsize, &dest_arr) < 0)
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainen i_assert(array_count(&dest_arr)+1 == destsize);
05a0f878264b9853d07f229ffff1bc21355157beTimo Sirainenstatic const wchar_t *t_lucene_utf8_to_tchar(const char *str)
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen i_assert(sizeof(wchar_t) == sizeof(unichar_t));
3281669db44d09a087a203201248abbc81b3cc1aTimo Sirainen return (const wchar_t *)ret;
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainenvoid lucene_index_select_mailbox(struct lucene_index *index,
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainen MAILBOX_GUID_HEX_LENGTH * sizeof(wchar_t));
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen index->mailbox_guid[MAILBOX_GUID_HEX_LENGTH] = '\0';
49d9165e32509d6fd8fe57f65a52d41343558e9aTimo Sirainenvoid lucene_index_unselect_mailbox(struct lucene_index *index)
49d9165e32509d6fd8fe57f65a52d41343558e9aTimo Sirainen memset(index->mailbox_guid, 0, sizeof(index->mailbox_guid));
0a568c1e8a8066ce9d6467d891a9717bd2a24b26Phil Carmodystatic void lucene_handle_error(struct lucene_index *index, CLuceneError &err,
3281669db44d09a087a203201248abbc81b3cc1aTimo Sirainen const char *msg)
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen i_error("lucene index %s: %s failed: %s", index->path, msg, what);
8e361d2906b0e44f7175a20981f8d2280645b58bTimo Sirainenstatic int lucene_index_open(struct lucene_index *index)
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen index->reader = IndexReader::open(index->path);
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen lucene_handle_error(index, err, "IndexReader::open()");
09539f3db2f1b3e24f40844d8456b06d318d0fe7Timo Sirainenstatic int lucene_index_open_search(struct lucene_index *index)
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen index->searcher = _CLNEW IndexSearcher(index->reader);
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainenlucene_doc_get_uid(struct lucene_index *index, Document *doc,
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen const TCHAR *uid = field == NULL ? NULL : field->stringValue();
d5cebe7f98e63d4e2822863ef2faa4971e8b3a5dTimo Sirainen i_error("lucene: Corrupted FTS index %s: No UID for document",
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen while (*uid != 0) {
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainenint lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen if ((ret = lucene_index_open_search(index)) <= 0)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen Term mailbox_term(_T("box"), index->mailbox_guid);
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainen lucene_handle_error(index, err, "last_uid search");
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainenint lucene_index_get_doc_count(struct lucene_index *index, uint32_t *count_r)
5254d77805cd35b9356d072ba325c356c43b0d51Timo Sirainenint lucene_index_build_init(struct lucene_index *index)
0138d3060877805f0de0bf631642de100ff96b79Timo Sirainen lock_path = t_strdup_printf("%s/write.lock", index->path);
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen st.st_mtime < time(NULL) - LUCENE_LOCK_OVERRIDE_SECS) {
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen bool exists = IndexReader::indexExists(index->path);
a05b31e6bb304142baf496e80072aa524e2dae3eTimo Sirainen index->writer = _CLNEW IndexWriter(index->path,
06eee07a834e38c2a95603016f9405eb7ac5857dTimo Sirainen lucene_handle_error(index, err, "IndexWriter()");
e9219523bf8c03a1230a3a2710a2a291dc3a82a8Timo Sirainen index->writer->setMaxFieldLength(MAX_TERMS_PER_DOCUMENT);
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainenstatic int lucene_index_build_flush(struct lucene_index *index)
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen lucene_handle_error(index, err, "IndexWriter::addDocument()");
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainenint lucene_index_build_more(struct lucene_index *index, uint32_t uid,
4b058f90f9e8a2c6b2eed275de4eb8cc5195a71dTimo Sirainen index->doc->add(*_CLNEW Field(_T("uid"), id, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen index->doc->add(*_CLNEW Field(_T("box"), index->mailbox_guid, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen lucene_utf8_n_to_tchar(data, size, dest, datasize);
8d80659e504ffb34bb0c6a633184fece35751b18Timo Sirainen /* hdr_name should be ASCII, but don't break in case it isn't */
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen lucene_utf8_n_to_tchar((const unsigned char *)hdr_name,
bbe0ee356dc610a8d054b336534d8f33c49a36b7Timo Sirainen index->doc->add(*_CLNEW Field(_T("hdr"), wname, Field::STORE_NO | Field::INDEX_UNTOKENIZED));
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen index->doc->add(*_CLNEW Field(_T("hdr"), dest, Field::STORE_NO | Field::INDEX_TOKENIZED));
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen index->doc->add(*_CLNEW Field(wname, dest, Field::STORE_NO | Field::INDEX_TOKENIZED));
def516ea503a60f20d510c14d5070b7ff5bbddf4Timo Sirainen } else if (size > 0) {
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen index->doc->add(*_CLNEW Field(_T("body"), dest, Field::STORE_NO | Field::INDEX_TOKENIZED));
40f114f2e12605483b21e7cf048b7c9bd9568148Timo Sirainenint lucene_index_build_deinit(struct lucene_index *index)
b561170c84d19ef1dee3d528939d77fd38047b3fTimo Sirainen /* no changes. */
e915ba86f157549b7d127f92312bc487b249df7eTimo Sirainen lucene_handle_error(index, err, "IndexWriter::optimize()");
02af13e6a41b9c3e6ad1e0b692b5b9741142c221Timo Sirainen lucene_handle_error(index, err, "IndexWriter::close()");
bfdef55e14ade87589bd10a6a1dafce53427ecf4Timo SirainenARRAY_DEFINE_TYPE(uid_id_map, struct uid_id_map);
a94f166ade968e8127b3eeda729417db9c6ad52fTimo Sirainenstatic int uid_id_map_cmp(const struct uid_id_map *u1,
int ret = 0;
return ret;
unsigned int n, i, count;
int ret;
if (ret < 0)
for (; i < count; i++)
return ret;
int ret = 0;
return ret;
static Query* getFieldQuery(Analyzer *analyzer, const TCHAR* _field, const TCHAR* queryText, bool fuzzy) {
bool severalTokensAtSamePosition = false;
t = NULL;
if (t == NULL)
v.push_back(t);
if (t->getPositionIncrement() != 0)
severalTokensAtSamePosition = true;
if (v.size() == 0)
return NULL;
if (fuzzy)
return ret;
if (severalTokensAtSamePosition) {
t = v.at(i);
return mpq;
t = v.at(i);
return pq;
static Query *
Query *q;
case SEARCH_TEXT: {
q = bq;
case SEARCH_BODY:
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
arg);
if (!and_args)
Query *q;
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
if (!and_args)
int ret = 0;
return ret;
bool have_definites = false;
have_definites = true;
if (have_definites) {
bool have_maybies = false;
have_maybies = true;
if (have_maybies) {
int ret = 0;
return ret;
bool have_definites = false;
have_definites = true;
if (have_definites) {