lucene-wrapper.cc revision 6a866d5d5533cb744c78bc2f1ca47beaee690d2f
e8058322725ba050014777ee2484f7e833ab1e3aLukas Slebodnik/* Copyright (C) 2006 Timo Sirainen */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter/* Lucene's default is 10000. Use it here also.. */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter/* If all the files in the lucene index directory are older than this many
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter seconds, assume we can delete stale locks */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter/* When index is determined to be stale, delete all locks older than this */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter/* Minimum interval between staleness checks */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return false;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return true;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterclass DovecotAnalyzer : public standard::StandardAnalyzer {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter TokenStream *tokenStream(const TCHAR *fieldName,
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter /* Everything except body/headers should go as-is without any
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter modifications. Isn't there any easier way to do this than
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter to implement a whole new RawTokenStream?.. */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterstatic bool lucene_dir_scan(const char *dir, const char *skip_path,
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter unsigned int dir_len;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (dp->d_name[1] == '.' && dp->d_name[2] == '\0')
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterstatic void lucene_delete_stale_locks(struct lucene_index *index)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (index->last_stale_check + STALENESS_CHECK_INTERVAL > now)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (lucene_dir_scan(index->path, index->lock_path,
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter /* the index is probably being updated */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterstruct lucene_index *lucene_index_init(const char *path, const char *lock_path)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter env_put(t_strconcat(LUCENE_LOCK_DIR_ENV_1"=", lock_path, NULL));
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterstatic void lucene_index_close(struct lucene_index *index)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Waltervoid lucene_index_deinit(struct lucene_index *index)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterint lucene_index_select_mailbox(struct lucene_index *index,
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter STRCPY_AtoT(index->tmailbox_name, mailbox_name, len);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterstatic void lucene_handle_error(struct lucene_index *index, CLuceneError &err,
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter const char *msg)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (err.number() == CL_ERR_IO && strncasecmp(what, "Lock", 4) == 0) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter /* "Lock obtain timed out". delete any stale locks. */
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter /* we've already complained about this */
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_error("lucene index %s: %s failed: %s", index->path, msg, what);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterstatic int lucene_index_open(struct lucene_index *index)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter lucene_handle_error(index, err, "IndexReader::open()");
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterstatic int lucene_index_open_search(struct lucene_index *index)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter index->searcher = _CLNEW IndexSearcher(index->reader);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterlucene_doc_get_uid(struct lucene_index *index, Document *doc,
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter TCHAR *uid = field == NULL ? NULL : field->stringValue();
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_error("lucene: Corrupted FTS index %s: No UID for document",
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter while (*uid != 0) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterlucene_index_get_last_uid_int(struct lucene_index *index, bool delete_old)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter bool deleted = false;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if ((ret = lucene_index_open_search(index)) <= 0)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek /* find all the existing last_uids for selected mailbox.
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek if there are more than one, delete the smaller ones. this is normal
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek behavior because we can't update/delete documents in writer, so
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek we'll do it only in here.. */
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek Term mailbox_term(_T("box"), index->tmailbox_name);
58229439447d5617913a5a2e173b78105c694842Pavel Březina lucene_handle_error(index, err, "last_uid search");
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter /* the index was modified. we'll need to release the locks
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter before opening a writer */
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozekint lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek /* delete the old last_uids in here, since we've not write-locked
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek the index yet */
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek if (lucene_index_get_last_uid_int(index, true) < 0)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozekint lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek /* set this even if we fail so fts-storage won't crash */
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek bool exists = IndexReader::indexExists(index->path);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek index->writer = _CLNEW IndexWriter(index->path,
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek lucene_handle_error(index, err, "IndexWriter()");
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek index->writer->setMaxFieldLength(MAX_TERMS_PER_DOCUMENT);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek if (lucene_index_get_last_uid_int(index, false) < 0)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozekstatic int lucene_index_build_flush(struct lucene_index *index)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek lucene_handle_error(index, err, "IndexWriter::addDocument()");
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozekint lucene_index_build_more(struct lucene_index *index, uint32_t uid,
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter unsigned int len;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter lucene_utf8towcs(dest, (const char *)data, len);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek index->doc->add(*Field::Text(_T("box"), index->tmailbox_name));
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek index->doc->add(*Field::Text(_T("headers"), dest));
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek index->doc->add(*Field::Text(_T("body"), dest));
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozekstatic int lucene_index_update_last_uid(struct lucene_index *index)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_snprintf(id, sizeof(id), "%u", index->last_uid);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter doc.add(*Field::Text(_T("box"), index->tmailbox_name));
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter lucene_handle_error(index, err, "IndexWriter::addDocument()");
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterint lucene_index_build_deinit(struct lucene_index *index)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter /* no changes. */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lucene_handle_error(index, err, "IndexWriter::optimize()");
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter lucene_handle_error(index, err, "IndexWriter::close()");
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterint lucene_index_expunge(struct lucene_index *index, uint32_t uid)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if ((ret = lucene_index_open_search(index)) <= 0)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter Term mailbox_term(_T("box"), index->tmailbox_name);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lucene_handle_error(index, err, "expunge search");
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lucene_handle_error(index, err, "IndexReader::close()");
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterint lucene_index_lookup(struct lucene_index *index, enum fts_lookup_flags flags,
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter i_assert((flags & (FTS_LOOKUP_FLAG_HEADERS|FTS_LOOKUP_FLAG_BODY)) != 0);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek unsigned int len = uni_utf8_strlen_n(quoted_key, (size_t)-1);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter Query *content_query1 = NULL, *content_query2 = NULL;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter content_query1 = QueryParser::parse(tkey, _T("headers"),
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter content_query2 = QueryParser::parse(tkey, _T("body"),
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter i_info("lucene: QueryParser::parse(%s) failed: %s",