/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
extern "C" {
#include "lib.h"
#include "array.h"
#include "unichar.h"
#include "hash.h"
#include "hex-binary.h"
#include "ioloop.h"
#include "unlink-directory.h"
#include "ioloop.h"
#include "mail-index.h"
#include "mail-search.h"
#include "mail-namespace.h"
#include "mailbox-list-private.h"
#include "mail-storage.h"
#include "fts-expunge-log.h"
#include "fts-lucene-plugin.h"
#include "lucene-wrapper.h"
#ifdef HAVE_LIBEXTTEXTCAT_TEXTCAT_H
# include <libexttextcat/textcat.h>
#elif defined (HAVE_LIBTEXTCAT_TEXTCAT_H)
# include <libtextcat/textcat.h>
#elif defined (HAVE_FTS_TEXTCAT)
# include <textcat.h>
#endif
};
#include <CLucene.h>
#include "SnowballAnalyzer.h"
/* Lucene's default is 10000. Use it here also.. */
using namespace lucene::queryParser;
struct lucene_query {
};
struct lucene_analyzer {
char *lang;
};
struct lucene_index {
char *path;
bool no_analyzer;
};
struct rescan_context {
int box_ret;
unsigned int uids_iter_n;
bool warned;
};
#ifdef HAVE_FTS_TEXTCAT
#endif
static int textcat_refcount = 0;
const char *msg);
struct rescan_context *rescan_ctx);
struct mailbox_list *list,
const struct fts_lucene_settings *set)
{
} else {
/* this is valid only for doveadm dump, so it doesn't matter */
}
} else
#ifdef HAVE_FTS_STEMMER
} else
#endif
{
}
}
return index;
}
{
try {
} catch (CLuceneError &err) {
}
}
try {
} catch (CLuceneError &err) {
}
}
}
{
struct lucene_analyzer *a;
}
#ifdef HAVE_FTS_TEXTCAT
#endif
}
}
{
unsigned int i;
return;
for (i = 0; i < len; i++) {
data[i] = ' ';
}
}
{
i_unreached();
}
static const wchar_t *
{
wchar_t *ret;
unsigned int len;
i_unreached();
(void)array_append_space(&dest_arr);
return ret;
}
const wchar_t guid[MAILBOX_GUID_HEX_LENGTH])
{
MAILBOX_GUID_HEX_LENGTH * sizeof(wchar_t));
}
{
}
const char *msg)
{
i_error("lucene index %s: %s failed (#%d): %s",
/* delete corrupted index. most IO errors are also about
missing files and other such corruption.. */
}
}
{
return 1;
}
return 0;
try {
} catch (CLuceneError &err) {
return -1;
}
return 1;
}
{
int ret;
return 1;
return ret;
return 1;
}
static int
{
i_error("lucene: Corrupted FTS index %s: No UID for document",
return -1;
}
while (*uid != 0) {
uid++;
}
return 0;
}
static uint32_t
{
return 0;
while (*part != 0) {
part++;
}
return num;
}
{
int ret = 0;
*last_uid_r = 0;
return ret;
try {
&uid) < 0) {
ret = -1;
break;
}
}
} catch (CLuceneError &err) {
ret = -1;
}
*last_uid_r = last_uid;
return ret;
}
{
int ret;
return -1;
if (ret == 0) {
*count_r = 0;
return 0;
}
}
return 0;
}
{
const char *error;
int ret = 0;
if (ret != 0)
return ret;
i_warning("fts-lucene: Settings have changed, rebuilding index for mailbox");
/* settings changed, rebuild index */
ret = -1;
} else {
}
return ret;
}
{
const char *lock_path;
}
if (lucene_settings_check(index) < 0)
return -1;
try {
!exists);
} catch (CLuceneError &err) {
return -1;
}
return 0;
}
#ifdef HAVE_FTS_TEXTCAT
{
const struct lucene_analyzer *a;
return a->analyzer;
}
return new_analyzer.analyzer;
}
{
unsigned int len;
if (textcat_dir == NULL)
return NULL;
/* textcat really wants the '/' suffix */
}
static Analyzer *
{
const char *lang;
if (textcat_broken)
return NULL;
return NULL;
}
}
/* try to guess the language */
return NULL;
return index->default_analyzer;
}
#else
static Analyzer *
{
return NULL;
}
#endif
{
int ret = 0;
return 0;
try {
}
} catch (CLuceneError &err) {
ret = -1;
}
return ret;
}
{
if (lucene_index_build_flush(index) < 0)
return -1;
if (part_idx != 0) {
}
index->doc->add(*_CLNEW Field(_T("box"), index->mailbox_guid, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
}
}
if (datasize < 4096)
else
/* hdr_name should be ASCII, but don't break in case it isn't */
lucene_utf8_n_to_tchar((const unsigned char *)hdr_name,
} else if (size > 0) {
}
return 0;
}
{
int ret = 0;
/* no changes. */
return 0;
}
index->prev_part_idx = 0;
return -1;
}
if (lucene_index_build_flush(index) < 0)
ret = -1;
try {
} catch (CLuceneError &err) {
ret = -1;
}
return ret;
}
static int
{
unsigned int i;
for (i = 0; i < sizeof(src_chars)-1; i++) {
else
return -1;
}
if (src[i] != '\0')
return -1;
src_chars[i] = '\0';
}
static int
{
return -1;
} T_END;
return 0;
}
{
int ret;
return ret;
}
static int
{
i_error("lucene: Corrupted FTS index %s: No mailbox for document",
return -1;
}
i_error("lucene: Corrupted FTS index %s: "
return -1;
}
return 0;
}
static int
{
int ret;
return 0;
/* same as last one */
}
(enum mailbox_flags)0);
const char *errstr;
if (error == MAIL_ERROR_NOTFOUND)
ret = 0;
else {
i_error("lucene: Couldn't open mailbox %s: %s",
ret = -1;
}
return ret;
}
i_error("lucene: Failed to sync mailbox %s: %s",
return -1;
}
ctx->last_existing_uid = 0;
ctx->uids_iter_n = 0;
return 1;
}
static int
{
return 0;
&idx_uid)) {
if (idx_uid == lucene_uid) {
ctx->uids_iter_n++;
return 1;
}
if (idx_uid < lucene_uid) {
/* lucene is missing an UID from the middle. delete
the rest of the messages from this mailbox and
reindex. */
i_warning("lucene: Mailbox %s "
"missing UIDs in the middle",
}
} else {
/* UID has been expunged from index. delete from
lucene as well. */
}
return 0;
} else {
/* the rest of the messages have been expunged from index */
return 0;
}
}
static void
struct rescan_context *rescan_ctx,
const char *vname,
const struct fts_index_header *hdr)
{
(enum mailbox_flags)0);
if (mailbox_open(box) == 0 &&
&metadata) == 0 &&
(rescan_ctx == NULL ||
/* this mailbox had no records in lucene index.
make sure its last indexed uid is 0 */
}
mailbox_free(&box);
}
struct rescan_context *rescan_ctx)
{
(enum mailbox_list_iter_flags)
const char *vname;
(void)mailbox_list_iter_deinit(&iter);
if (ns->prefix_len > 0 &&
/* namespace prefix itself isn't returned by the listing */
}
}
{
bool failed = false;
int ret;
return ret;
if (ret > 0) try {
if (ret > 0)
if (ret < 0)
failed = true;
else if (ret == 0)
}
} catch (CLuceneError &err) {
failed = true;
}
rescan_finish(&ctx);
return failed ? -1 : 0;
}
{
unsigned int i;
for (i = 0; i < MAILBOX_GUID_HEX_LENGTH; i++)
wguid_hex[i] = '\0';
}
static bool
const struct fts_expunge_log_read_record *rec)
{
unsigned int n;
/* RangeQuery and WildcardQuery work by enumerating through all terms
that match them, and then adding TermQueries for them. So we can
simply do the same directly, and if it looks like there are too
many terms just go through everything. */
return false;
}
return true;
}
static int
const struct fts_expunge_log_read_record *rec)
{
int ret;
return ret;
try {
&uid) < 0 ||
}
} catch (CLuceneError &err) {
ret = -1;
}
return ret < 0 ? -1 : 0;
}
struct fts_expunge_log *log)
{
ret = -1;
break;
}
}
return -1;
return ret2;
}
{
int ret = 0;
return 0;
try {
} catch (CLuceneError &err) {
ret = -1;
}
try {
} catch (CLuceneError &err) {
ret = -1;
}
return ret;
}
// Mostly copy&pasted from CLucene's QueryParser
static Query* getFieldQuery(Analyzer *analyzer, const TCHAR* _field, const TCHAR* queryText, bool fuzzy) {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
bool severalTokensAtSamePosition = false;
while (true) {
try {
t = NULL;
});
if (t == NULL)
break;
v.push_back(t);
if (t->getPositionIncrement() != 0)
positionCount += t->getPositionIncrement();
else
severalTokensAtSamePosition = true;
}
try {
}
if (v.size() == 0)
return NULL;
else if (v.size() == 1) {
if (fuzzy)
else
return ret;
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
// no phrase query:
}
return q;
}else {
t = v.at(i);
multiTerms.clear();
}
position += t->getPositionIncrement();
}
return mpq;
}
}else {
t = v.at(i);
position += t->getPositionIncrement();
}
return pq;
}
}
}
static Query *
{
if (fuzzy)
else
return ret;
}
}
}
static Query *
{
}
static bool
struct mail_search_arg *arg,
enum fts_lookup_flags flags)
{
Query *q;
return false;
/* FIXME: we could handle this by doing multiple queries.. */
return false;
}
case SEARCH_TEXT: {
q = NULL;
else {
q = bq;
}
break;
}
case SEARCH_BODY:
break;
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
return false;
q = lucene_get_query(index,
arg);
break;
default:
return false;
}
if (q == NULL) {
/* couldn't handle this search after all (e.g. trying to search
a stop word) */
return false;
}
if (!and_args)
else
return true;
}
static bool
struct mail_search_arg *arg,
enum fts_lookup_flags flags)
{
return false;
/* FIXME: we could handle this by doing multiple queries.. */
return false;
}
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
/* checking potential existence of the header name */
break;
}
return false;
/* we can check if the search key exists in some header and
filter out the messages that have no chance of matching */
break;
default:
return false;
}
if (q == NULL) {
/* couldn't handle this search after all (e.g. trying to search
a stop word) */
return false;
}
if (!and_args)
else
return true;
}
{
return TRUE;
}
return FALSE;
}
{
if (queries_have_non_must_nots(queries)) {
} else {
}
}
static int
{
int ret = 0;
try {
result->scores_sorted = true;
&uid) < 0) {
ret = -1;
break;
}
/* duplicate result */
result->scores_sorted = false;
}
}
return ret;
} catch (CLuceneError &err) {
return -1;
}
}
struct mail_search_arg *args,
enum fts_lookup_flags flags,
struct fts_result *result)
{
if (lucene_index_open_search(index) <= 0)
return -1;
bool have_definites = false;
arg->match_always = true;
have_definites = true;
}
}
if (have_definites) {
(flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ?
uids_arr) < 0)
return -1;
}
if (have_definites) {
/* FIXME: mixing up definite + maybe queries is broken. if the
definite query matched, it'll just assume that the maybe
queries matched as well */
return 0;
}
bool have_maybies = false;
arg->match_always = true;
have_maybies = true;
}
}
if (have_maybies) {
&result->maybe_uids) < 0)
return -1;
}
return 0;
}
static int
enum fts_lookup_flags flags,
struct fts_multi_result *result)
{
int ret = 0;
}
try {
i_error("lucene: Corrupted FTS index %s: No mailbox for document",
ret = -1;
break;
}
continue;
}
&uid) < 0) {
ret = -1;
break;
}
(flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) == 0 ?
if (!array_is_created(uids_arr)) {
}
/* duplicate result */
} else {
}
}
return ret;
} catch (CLuceneError &err) {
return -1;
}
}
struct mail_search_arg *args,
enum fts_lookup_flags flags,
struct fts_multi_result *result)
{
if (lucene_index_open_search(index) <= 0)
return -1;
bool have_definites = false;
arg->match_always = true;
have_definites = true;
}
}
if (have_definites) {
result) < 0)
return -1;
}
return 0;
}
struct lucene_index_iter {
size_t i;
bool failed;
};
struct lucene_index_iter *
{
int ret;
if (ret < 0)
return iter;
}
try {
} catch (CLuceneError &err) {
}
return iter;
}
const struct lucene_index_record *
{
return NULL;
return NULL;
iter->i++;
}
{
}
return ret;
}
void lucene_shutdown(void)
{
}