lucene-wrapper.cc revision 6990472dfc095728637b1edf697bb30901f6f78d
/* Copyright (c) 2006-2010 Dovecot authors, see the included COPYING file */
extern "C" {
#include "lib.h"
#include "array.h"
#include "unichar.h"
#include "hash.h"
#include "hex-binary.h"
#include "mail-index.h"
#include "mail-search.h"
#include "mail-namespace.h"
#include "mail-storage.h"
#include "fts-expunge-log.h"
#include "lucene-wrapper.h"
#ifdef HAVE_LUCENE_TEXTCAT
# include <libtextcat/textcat.h>
#endif
};
#include <CLucene.h>
#include "SnowballAnalyzer.h"
/* Lucene's default is 10000. Use it here also.. */
#define MAX_TERMS_PER_DOCUMENT 10000
#define LUCENE_LOCK_OVERRIDE_SECS 60
#define DEFAULT_LANGUAGE "english"
using namespace lucene::queryParser;
struct lucene_analyzer {
char *lang;
};
struct lucene_index {
char *path;
char *textcat_dir, *textcat_conf;
};
struct rescan_context {
struct lucene_index *index;
struct mailbox_list *list;
int box_ret;
struct seq_range_iter uids_iter;
unsigned int uids_iter_n;
bool warned;
};
static bool textcat_broken = FALSE;
static int textcat_refcount = 0;
const char *textcat_dir,
const char *textcat_conf)
{
struct lucene_index *index;
#ifdef HAVE_LUCENE_TEXTCAT
#else
#endif
return index;
}
{
}
{
struct lucene_analyzer *a;
}
#ifdef HAVE_LUCENE_TEXTCAT
#endif
}
}
{
i_unreached();
}
static const wchar_t *t_lucene_utf8_to_tchar(const char *str)
{
i_unreached();
(void)array_append_space(&dest_arr);
return (const wchar_t *)ret;
}
const wchar_t guid[MAILBOX_GUID_HEX_LENGTH])
{
MAILBOX_GUID_HEX_LENGTH * sizeof(wchar_t));
}
{
}
const char *msg)
{
}
{
return 1;
return 0;
try {
} catch (CLuceneError &err) {
return -1;
}
return 1;
}
{
int ret;
return 1;
return ret;
return 1;
}
static int
{
i_error("lucene: Corrupted FTS index %s: No UID for document",
return -1;
}
while (*uid != 0) {
uid++;
}
return 0;
}
{
int ret = 0;
*last_uid_r = 0;
return ret;
try {
&uid) < 0) {
ret = -1;
break;
}
}
} catch (CLuceneError &err) {
ret = -1;
}
*last_uid_r = last_uid;
return ret;
}
{
int ret;
return -1;
if (ret == 0) {
*count_r = 0;
return 0;
}
}
return 0;
}
{
const char *lock_path;
i_error("unlink(%s) failed: %m");
}
try {
!exists);
} catch (CLuceneError &err) {
return -1;
}
return 0;
}
{
const struct lucene_analyzer *a;
struct lucene_analyzer new_analyzer;
return a->analyzer;
}
return new_analyzer.analyzer;
}
#ifdef HAVE_LUCENE_TEXTCAT
static Analyzer *
{
const char *lang;
if (textcat_broken)
return NULL;
index->textcat_dir);
return NULL;
}
}
/* try to guess the language */
return NULL;
return index->default_analyzer;
}
#else
static Analyzer *
{
return NULL;
}
#endif
{
int ret = 0;
return 0;
try {
} catch (CLuceneError &err) {
ret = -1;
}
return ret;
}
const char *hdr_name)
{
wchar_t id[MAX_INT_STRLEN];
if (lucene_index_build_flush(index) < 0)
return -1;
index->doc->add(*_CLNEW Field(_T("box"), index->mailbox_guid, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
}
/* hdr_name should be ASCII, but don't break in case it isn't */
lucene_utf8_n_to_tchar((const unsigned char *)hdr_name,
} else if (size > 0) {
}
return 0;
}
{
int ret = 0;
/* no changes. */
return 0;
}
return -1;
}
if (lucene_index_build_flush(index) < 0)
ret = -1;
try {
} catch (CLuceneError &err) {
ret = -1;
}
return ret;
}
static int
{
unsigned int i;
for (i = 0; i < sizeof(src_chars)-1; i++) {
else
return -1;
}
if (src[i] != '\0')
return -1;
src_chars[i] = '\0';
}
static int
{
struct mailbox_status status;
return -1;
} T_END;
return 0;
}
{
int ret;
return ret;
}
static int
{
i_error("lucene: Corrupted FTS index %s: No mailbox for document",
return -1;
}
i_error("lucene: Corrupted FTS index %s: "
return -1;
}
return 0;
}
static int
{
int ret;
return 0;
/* same as last one */
}
enum mail_error error;
const char *errstr;
if (error == MAIL_ERROR_NOTFOUND)
ret = 0;
else {
i_error("lucene: Couldn't open mailbox %s: %s",
ret = -1;
}
return ret;
}
i_error("lucene: Failed to sync mailbox %s: %s",
return -1;
}
ctx->last_existing_uid = 0;
ctx->uids_iter_n = 0;
return 1;
}
static int
{
return 0;
&idx_uid)) {
if (idx_uid == lucene_uid) {
ctx->uids_iter_n++;
return 1;
}
if (idx_uid < lucene_uid) {
/* lucene is missing an UID from the middle. delete
the rest of the messages from this mailbox and
reindex. */
i_warning("lucene: Mailbox %s "
"missing UIDs in the middle",
}
} else {
/* UID has been expunged from index. delete from
lucene as well. */
}
return 0;
} else {
/* the rest of the messages have been expunged from index */
return 0;
}
}
struct mailbox_list *list)
{
struct rescan_context ctx;
bool failed = false;
int ret;
return ret;
try {
if (ret > 0)
if (ret < 0)
failed = true;
else if (ret == 0)
}
} catch (CLuceneError &err) {
failed = true;
}
rescan_finish(&ctx);
return failed ? -1 : 0;
}
{
unsigned char guid_hex[MAILBOX_GUID_HEX_LENGTH];
unsigned int i;
for (i = 0; i < MAILBOX_GUID_HEX_LENGTH; i++)
wguid_hex[i] = '\0';
}
static int
const struct fts_expunge_log_read_record *rec)
{
unsigned int count;
int ret;
return ret;
/* search for UIDs between lowest and highest expunged UID */
try {
&uid) < 0 ||
}
} catch (CLuceneError &err) {
ret = -1;
}
return ret < 0 ? -1 : 0;
}
struct fts_expunge_log *log)
{
struct fts_expunge_log_read_ctx *ctx;
const struct fts_expunge_log_read_record *rec;
ret = -1;
break;
}
}
try {
} catch (CLuceneError &err) {
ret = -1;
}
return -1;
return ret2;
}
{
int ret = 0;
try {
} catch (CLuceneError &err) {
ret = -1;
}
return ret;
}
// Mostly copy&pasted from CLucene's QueryParser
static Query* getFieldQuery(Analyzer *analyzer, const TCHAR* _field, const TCHAR* queryText, bool fuzzy) {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
int32_t positionCount = 0;
bool severalTokensAtSamePosition = false;
while (true) {
try {
t = NULL;
});
if (t == NULL)
break;
v.push_back(t);
if (t->getPositionIncrement() != 0)
positionCount += t->getPositionIncrement();
else
severalTokensAtSamePosition = true;
}
try {
}
if (v.size() == 0)
return NULL;
else if (v.size() == 1) {
if (fuzzy)
else
return ret;
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
}
return q;
}else {
t = v.at(i);
multiTerms.clear();
}
position += t->getPositionIncrement();
}
return mpq;
}
}else {
t = v.at(i);
position += t->getPositionIncrement();
}
return pq;
}
}
}
static Query *
{
}
static bool
{
Query *q;
/* FIXME: we could handle this by doing multiple queries.. */
return false;
}
case SEARCH_TEXT: {
q = NULL;
else {
q = bq;
}
break;
}
case SEARCH_BODY:
break;
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
return false;
/* FIXME: handle existence of a search key */
return false;
}
q = lucene_get_query(index,
arg);
break;
default:
return false;
}
if (q == NULL) {
/* couldn't handle this search after all (e.g. trying to search
a stop word) */
return false;
}
if (!and_args)
else
return true;
}
static bool
{
Query *q;
/* FIXME: we could handle this by doing multiple queries.. */
return false;
}
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
return false;
/* we can check if the search key exists in some header and
filter out the messages that have no chance of matching */
break;
default:
return false;
}
if (q == NULL) {
/* couldn't handle this search after all (e.g. trying to search
a stop word) */
return false;
}
if (!and_args)
else
return true;
}
static int
{
struct fts_score_map *score;
int ret = 0;
try {
result->scores_sorted = true;
&uid) < 0) {
ret = -1;
break;
}
result->scores_sorted = false;
}
}
return ret;
} catch (CLuceneError &err) {
return -1;
}
}
struct fts_result *result)
{
struct mail_search_arg *arg;
if (lucene_index_open_search(index) <= 0)
return -1;
bool have_definites = false;
arg->match_always = true;
have_definites = true;
}
}
if (have_definites) {
&result->definite_uids) < 0)
return -1;
}
bool have_maybies = false;
arg->match_always = true;
have_maybies = true;
}
}
if (have_maybies) {
&result->maybe_uids) < 0)
return -1;
}
return 0;
}
static int
{
struct fts_score_map *score;
int ret = 0;
struct hash_iterate_context *iter;
}
try {
i_error("lucene: Corrupted FTS index %s: No mailbox for document",
ret = -1;
break;
}
continue;
}
&uid) < 0) {
ret = -1;
break;
}
}
}
return ret;
} catch (CLuceneError &err) {
return -1;
}
}
struct hash_table *guids,
struct fts_multi_result *result)
{
struct mail_search_arg *arg;
if (lucene_index_open_search(index) <= 0)
return -1;
bool have_definites = false;
arg->match_always = true;
have_definites = true;
}
}
if (have_definites) {
return -1;
}
return 0;
}
struct lucene_index_iter {
struct lucene_index *index;
struct lucene_index_record rec;
size_t i;
bool failed;
};
struct lucene_index_iter *
{
struct lucene_index_iter *iter;
int ret;
if (ret < 0)
return iter;
}
try {
} catch (CLuceneError &err) {
}
return iter;
}
const struct lucene_index_record *
{
return NULL;
return NULL;
iter->i++;
}
{
}
return ret;
}