lucene-wrapper.cc revision 6a866d5d5533cb744c78bc2f1ca47beaee690d2f
e8058322725ba050014777ee2484f7e833ab1e3aLukas Slebodnik/* Copyright (C) 2006 Timo Sirainen */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterextern "C" {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#include "lib.h"
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#include "env-util.h"
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#include "unichar.h"
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#include "str.h"
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#include "str-sanitize.h"
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#include "lucene-wrapper.h"
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#include <dirent.h>
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#include <sys/stat.h>
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter};
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#include <CLucene.h>
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter/* Lucene's default is 10000. Use it here also.. */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#define MAX_TERMS_PER_DOCUMENT 10000
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter/* If all the files in the lucene index directory are older than this many
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter seconds, assume we can delete stale locks */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#define STALE_INDEX_SECS 60
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter/* When index is determined to be stale, delete all locks older than this */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#define STALE_LOCK_SECS 60
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter/* Minimum interval between staleness checks */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter#define STALENESS_CHECK_INTERVAL 10
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterusing namespace lucene::document;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterusing namespace lucene::index;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterusing namespace lucene::search;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterusing namespace lucene::queryParser;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterusing namespace lucene::analysis;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterstruct lucene_index {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter char *path, *lock_path;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter char *mailbox_name;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter TCHAR *tmailbox_name;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter time_t last_stale_check;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter bool lock_error;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter IndexReader *reader;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter IndexWriter *writer;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter IndexSearcher *searcher;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter Analyzer *analyzer;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter Document *doc;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter uint32_t prev_uid, last_uid;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter};
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterclass RawTokenStream : public TokenStream {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter CL_NS(util)::Reader *reader;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterpublic:
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter RawTokenStream(CL_NS(util)::Reader *reader) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter this->reader = reader;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter };
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter bool next(Token *token) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter const TCHAR *data;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter int32_t len = this->reader->read(data);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if (len <= 0)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return false;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter token->set(data, 0, len);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return true;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter void close() { }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter};
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterclass DovecotAnalyzer : public standard::StandardAnalyzer {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterpublic:
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter TokenStream *tokenStream(const TCHAR *fieldName,
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter CL_NS(util)::Reader *reader) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter /* Everything except body/headers should go as-is without any
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter modifications. Isn't there any easier way to do this than
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter to implement a whole new RawTokenStream?.. */
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if (fieldName != 0 &&
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter wcscmp(fieldName, L"headers") != 0 &&
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter wcscmp(fieldName, L"body") != 0)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return _CLNEW RawTokenStream(reader);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return standard::StandardAnalyzer::
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter tokenStream(fieldName, reader);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter};
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterstatic bool lucene_dir_scan(const char *dir, const char *skip_path,
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter time_t stale_stamp, bool unlink_staled)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter{
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter DIR *d;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter struct dirent *dp;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter struct stat st;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter string_t *path;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter unsigned int dir_len;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter bool found_nonstale = FALSE;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter d = opendir(dir);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (d == NULL) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_error("opendir(%s) failed: %m", dir);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return TRUE;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter t_push();
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter path = t_str_new(256);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter str_append(path, dir);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter str_append_c(path, '/');
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter dir_len = str_len(path);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter while ((dp = readdir(d)) != NULL) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (*dp->d_name == '.') {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (dp->d_name[1] == '\0')
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter continue;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (dp->d_name[1] == '.' && dp->d_name[2] == '\0')
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter continue;
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter }
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter str_truncate(path, dir_len);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter str_append(path, dp->d_name);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (skip_path != NULL &&
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter strcmp(str_c(path), skip_path) == 0)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter continue;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (stat(str_c(path), &st) < 0) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (errno != ENOENT)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_error("stat(%s) failed: %m", str_c(path));
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter found_nonstale = TRUE;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter } else if (st.st_ctime <= stale_stamp &&
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter st.st_mtime <= stale_stamp) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (unlink_staled) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (unlink(str_c(path)) < 0 &&
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter errno != ENOENT) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_error("unlink(%s) failed: %m",
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter str_c(path));
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter }
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter }
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter } else {
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter found_nonstale = TRUE;
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if (closedir(d) < 0)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter i_error("closedir(%s) failed: %m", dir);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter t_pop();
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return found_nonstale;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter}
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterstatic void lucene_delete_stale_locks(struct lucene_index *index)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter{
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter time_t now;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter now = time(NULL);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (index->last_stale_check + STALENESS_CHECK_INTERVAL > now)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter index->last_stale_check = now;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (lucene_dir_scan(index->path, index->lock_path,
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter now - STALE_INDEX_SECS, FALSE)) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter /* the index is probably being updated */
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter (void)lucene_dir_scan(index->lock_path, NULL,
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter now - STALE_LOCK_SECS, TRUE);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter}
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterstruct lucene_index *lucene_index_init(const char *path, const char *lock_path)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter{
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter struct lucene_index *index;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter env_put(t_strconcat(LUCENE_LOCK_DIR_ENV_1"=", lock_path, NULL));
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter index = i_new(struct lucene_index, 1);
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter index->path = i_strdup(path);
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter index->lock_path = i_strdup(lock_path);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->analyzer = _CLNEW DovecotAnalyzer();
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lucene_delete_stale_locks(index);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return index;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter}
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterstatic void lucene_index_close(struct lucene_index *index)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter{
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter _CLDELETE(index->reader);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter _CLDELETE(index->writer);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter _CLDELETE(index->searcher);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter}
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Waltervoid lucene_index_deinit(struct lucene_index *index)
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek{
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek lucene_index_close(index);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek _CLDELETE(index->analyzer);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek i_free(index->mailbox_name);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek i_free(index->tmailbox_name);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek i_free(index->path);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek i_free(index->lock_path);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek i_free(index);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek}
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterint lucene_index_select_mailbox(struct lucene_index *index,
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter const char *mailbox_name)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter{
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter size_t len;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter i_free(index->mailbox_name);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter i_free(index->tmailbox_name);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter len = strlen(mailbox_name);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->mailbox_name = i_strdup(mailbox_name);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->tmailbox_name = i_new(TCHAR, len + 1);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter STRCPY_AtoT(index->tmailbox_name, mailbox_name, len);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter}
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterstatic void lucene_handle_error(struct lucene_index *index, CLuceneError &err,
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter const char *msg)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter{
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter const char *what = err.what();
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (err.number() == CL_ERR_IO && strncasecmp(what, "Lock", 4) == 0) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter /* "Lock obtain timed out". delete any stale locks. */
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter lucene_delete_stale_locks(index);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (index->lock_error) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter /* we've already complained about this */
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter index->lock_error = TRUE;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_error("lucene index %s: %s failed: %s", index->path, msg, what);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter}
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterstatic int lucene_index_open(struct lucene_index *index)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter{
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (index->reader != NULL)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return 1;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (!IndexReader::indexExists(index->path))
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return 0;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter try {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter index->reader = IndexReader::open(index->path);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter } catch (CLuceneError &err) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter lucene_handle_error(index, err, "IndexReader::open()");
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return -1;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return 1;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter}
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterstatic int lucene_index_open_search(struct lucene_index *index)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter{
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek int ret;
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek if (index->searcher != NULL)
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter return 1;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if ((ret = lucene_index_open(index)) <= 0)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return ret;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter index->searcher = _CLNEW IndexSearcher(index->reader);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return 1;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter}
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterstatic int
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterlucene_doc_get_uid(struct lucene_index *index, Document *doc,
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter const TCHAR *field_name, uint32_t *uid_r)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter{
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter Field *field = doc->getField(field_name);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter TCHAR *uid = field == NULL ? NULL : field->stringValue();
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (uid == NULL) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_error("lucene: Corrupted FTS index %s: No UID for document",
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter index->path);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return -1;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter uint32_t num = 0;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter while (*uid != 0) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter num = num*10 + (*uid - '0');
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter uid++;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter *uid_r = num;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return 0;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter}
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterstatic int
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterlucene_index_get_last_uid_int(struct lucene_index *index, bool delete_old)
66277b21179d95f6e96abed01a20ccbccf27ce99Pavel Březina{
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter int ret = 0;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter bool deleted = false;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter index->last_uid = 0;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if ((ret = lucene_index_open_search(index)) <= 0)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return ret;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek /* find all the existing last_uids for selected mailbox.
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek if there are more than one, delete the smaller ones. this is normal
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek behavior because we can't update/delete documents in writer, so
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek we'll do it only in here.. */
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek Term mailbox_term(_T("box"), index->tmailbox_name);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek Term last_uid_term(_T("last_uid"), _T("*"));
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek TermQuery mailbox_query(&mailbox_term);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek WildcardQuery last_uid_query(&last_uid_term);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek BooleanQuery query;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek query.add(&mailbox_query, true, false);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek query.add(&last_uid_query, true, false);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek int32_t last_doc_id = -1;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek try {
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter Hits *hits = index->searcher->search(&query);
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek for (int32_t i = 0; i < hits->length(); i++) {
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek uint32_t uid;
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek if (lucene_doc_get_uid(index, &hits->doc(i),
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek _T("last_uid"), &uid) < 0) {
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek ret = -1;
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter break;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek }
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek int32_t del_id = -1;
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter if (uid > index->last_uid) {
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek if (last_doc_id >= 0)
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek del_id = last_doc_id;
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter index->last_uid = uid;
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter last_doc_id = hits->id(i);
66277b21179d95f6e96abed01a20ccbccf27ce99Pavel Březina } else {
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek del_id = hits->id(i);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek }
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter if (del_id >= 0 && delete_old) {
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter index->reader->deleteDocument(del_id);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek deleted = true;
58229439447d5617913a5a2e173b78105c694842Pavel Březina }
58229439447d5617913a5a2e173b78105c694842Pavel Březina }
5de968e80ade1c02d1907834dcff95e9fc9ad10aJakub Hrozek index->lock_error = FALSE;
5de968e80ade1c02d1907834dcff95e9fc9ad10aJakub Hrozek _CLDELETE(hits);
58229439447d5617913a5a2e173b78105c694842Pavel Březina } catch (CLuceneError &err) {
58229439447d5617913a5a2e173b78105c694842Pavel Březina lucene_handle_error(index, err, "last_uid search");
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek ret = -1;
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek }
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek if (deleted) {
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter /* the index was modified. we'll need to release the locks
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter before opening a writer */
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek lucene_index_close(index);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek }
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek return ret;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek}
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozekint lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek{
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek /* delete the old last_uids in here, since we've not write-locked
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek the index yet */
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek if (lucene_index_get_last_uid_int(index, true) < 0)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek return -1;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek *last_uid_r = index->last_uid;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek return 0;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek}
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozekint lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek{
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek uint32_t last_uid = 0;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek i_assert(index->mailbox_name != NULL);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek /* set this even if we fail so fts-storage won't crash */
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek *last_uid_r = index->last_uid;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek lucene_index_close(index);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek bool exists = IndexReader::indexExists(index->path);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek try {
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek index->writer = _CLNEW IndexWriter(index->path,
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek index->analyzer, !exists);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek index->lock_error = FALSE;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek } catch (CLuceneError &err) {
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek lucene_handle_error(index, err, "IndexWriter()");
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek return -1;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek }
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek index->writer->setMaxFieldLength(MAX_TERMS_PER_DOCUMENT);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek if (lucene_index_get_last_uid_int(index, false) < 0)
66277b21179d95f6e96abed01a20ccbccf27ce99Pavel Březina return -1;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek *last_uid_r = index->last_uid;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek return 0;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek}
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozekstatic int lucene_index_build_flush(struct lucene_index *index)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek{
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek int ret = 0;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek if (index->doc == NULL)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek return 0;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek try {
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek index->writer->addDocument(index->doc);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek } catch (CLuceneError &err) {
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek lucene_handle_error(index, err, "IndexWriter::addDocument()");
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek ret = -1;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek }
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek _CLDELETE(index->doc);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek index->doc = NULL;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek return ret;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek}
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozekint lucene_index_build_more(struct lucene_index *index, uint32_t uid,
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek const unsigned char *data, size_t size,
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek bool headers)
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek{
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter unsigned int len;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter char id[MAX_INT_STRLEN];
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_assert(uid > index->last_uid);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_assert(size > 0);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter len = uni_utf8_strlen_n(data, size);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter wchar_t dest[len+1];
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter lucene_utf8towcs(dest, (const char *)data, len);
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter dest[len] = 0;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (uid != index->prev_uid) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter char id[MAX_INT_STRLEN];
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter TCHAR tid[MAX_INT_STRLEN];
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (lucene_index_build_flush(index) < 0)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return -1;
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek index->prev_uid = uid;
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek index->doc = _CLNEW Document();
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek i_snprintf(id, sizeof(id), "%u", uid);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek STRCPY_AtoT(tid, id, MAX_INT_STRLEN);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek index->doc->add(*Field::Text(_T("uid"), tid));
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek index->doc->add(*Field::Text(_T("box"), index->tmailbox_name));
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek }
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek if (headers)
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek index->doc->add(*Field::Text(_T("headers"), dest));
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek else
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek index->doc->add(*Field::Text(_T("body"), dest));
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek return 0;
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek}
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozekstatic int lucene_index_update_last_uid(struct lucene_index *index)
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek{
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek Document doc;
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek char id[MAX_INT_STRLEN];
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek TCHAR tid[MAX_INT_STRLEN];
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter i_snprintf(id, sizeof(id), "%u", index->last_uid);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter STRCPY_AtoT(tid, id, MAX_INT_STRLEN);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter doc.add(*Field::Text(_T("last_uid"), tid));
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter doc.add(*Field::Text(_T("box"), index->tmailbox_name));
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter try {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter index->writer->addDocument(&doc);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return 0;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter } catch (CLuceneError &err) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter lucene_handle_error(index, err, "IndexWriter::addDocument()");
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return -1;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter}
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walterint lucene_index_build_deinit(struct lucene_index *index)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter{
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter int ret = 0;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (index->prev_uid == 0) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter /* no changes. */
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter return 0;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (index->prev_uid > index->last_uid)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter index->last_uid = index->prev_uid;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter index->prev_uid = 0;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter if (index->writer == NULL) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter lucene_index_close(index);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return -1;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if (lucene_index_build_flush(index) < 0)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter ret = -1;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if (lucene_index_update_last_uid(index) < 0)
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter ret = -1;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter try {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->writer->optimize();
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter } catch (CLuceneError &err) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lucene_handle_error(index, err, "IndexWriter::optimize()");
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter ret = -1;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter try {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->writer->close();
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter } catch (CLuceneError &err) {
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter lucene_handle_error(index, err, "IndexWriter::close()");
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter ret = -1;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lucene_index_close(index);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return ret;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter}
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterint lucene_index_expunge(struct lucene_index *index, uint32_t uid)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter{
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter char id[MAX_INT_STRLEN];
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter TCHAR tid[MAX_INT_STRLEN];
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter int ret;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if ((ret = lucene_index_open_search(index)) <= 0)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return ret;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
fcd8093c58638dc7c4f9cddfc97f273b94ce2eadStef Walter i_snprintf(id, sizeof(id), "%u", uid);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter STRCPY_AtoT(tid, id, MAX_INT_STRLEN);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter Term mailbox_term(_T("box"), index->tmailbox_name);
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter Term uid_term(_T("uid"), tid);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter TermQuery mailbox_query(&mailbox_term);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter TermQuery uid_query(&uid_term);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter BooleanQuery query;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter query.add(&mailbox_query, true, false);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter query.add(&uid_query, true, false);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter try {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter Hits *hits = index->searcher->search(&query);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter for (int32_t i = 0; i < hits->length(); i++)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->reader->deleteDocument(hits->id(i));
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->lock_error = FALSE;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter _CLDELETE(hits);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter } catch (CLuceneError &err) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lucene_handle_error(index, err, "expunge search");
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter ret = -1;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter try {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->reader->close();
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter } catch (CLuceneError &err) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lucene_handle_error(index, err, "IndexReader::close()");
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter ret = -1;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek lucene_index_close(index);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return ret;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter}
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walterint lucene_index_lookup(struct lucene_index *index, enum fts_lookup_flags flags,
1319e71fd1680ca4864afe0b1aca2b8c8e4a1ee4Stef Walter const char *key, ARRAY_TYPE(seq_range) *result)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter{
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter const char *quoted_key;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter int ret = 0;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter i_assert((flags & (FTS_LOOKUP_FLAG_HEADERS|FTS_LOOKUP_FLAG_BODY)) != 0);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if (lucene_index_open_search(index) <= 0)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return -1;
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek t_push();
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek quoted_key = strchr(key, ' ') == NULL ?
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek t_strdup_printf("%s*", key) :
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek t_strdup_printf("\"%s\"", key);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek unsigned int len = uni_utf8_strlen_n(quoted_key, (size_t)-1);
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek wchar_t tkey[len + 1];
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek lucene_utf8towcs(tkey, quoted_key, len);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter tkey[len] = 0;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter t_pop();
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter BooleanQuery lookup_query;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter Query *content_query1 = NULL, *content_query2 = NULL;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter try {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if ((flags & FTS_LOOKUP_FLAG_HEADERS) != 0) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter content_query1 = QueryParser::parse(tkey, _T("headers"),
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->analyzer);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lookup_query.add(content_query1, false, false);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if ((flags & FTS_LOOKUP_FLAG_BODY) != 0) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter content_query2 = QueryParser::parse(tkey, _T("body"),
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->analyzer);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lookup_query.add(content_query2, false, false);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter } catch (CLuceneError &err) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if (getenv("DEBUG") != NULL) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter i_info("lucene: QueryParser::parse(%s) failed: %s",
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter str_sanitize(key, 40), err.what());
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if (content_query1 != NULL)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter _CLDELETE(content_query1);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek lucene_index_close(index);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter return -1;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek }
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter BooleanQuery query;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter Term mailbox_term(_T("box"), index->tmailbox_name);
fcd8093c58638dc7c4f9cddfc97f273b94ce2eadStef Walter TermQuery mailbox_query(&mailbox_term);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter query.add(&lookup_query, true, false);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter query.add(&mailbox_query, true, false);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter try {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter Hits *hits = index->searcher->search(&query);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter for (int32_t i = 0; i < hits->length(); i++) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter uint32_t uid;
fcd8093c58638dc7c4f9cddfc97f273b94ce2eadStef Walter
fcd8093c58638dc7c4f9cddfc97f273b94ce2eadStef Walter if (lucene_doc_get_uid(index, &hits->doc(i),
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter _T("uid"), &uid) < 0) {
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek ret = -1;
90e04eae7e54ec892a6f239783df94dab5d1ed9aJakub Hrozek break;
dff909d473f43a6bd0f0286fa2d279c0ebe945c6Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter seq_range_array_add(result, 0, uid);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter index->lock_error = FALSE;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter _CLDELETE(hits);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter } catch (CLuceneError &err) {
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter lucene_handle_error(index, err, "search");
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter ret = -1;
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter }
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if (content_query1 != NULL)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter _CLDELETE(content_query1);
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter if (content_query2 != NULL)
b699c4d7f85a5404be1d1ee9450331aea869b886Stef Walter _CLDELETE(content_query2);
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek return ret;
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek}
4f7f714e118e95896fac5239c7a8b529c39a4758Jakub Hrozek