lucene-wrapper.cc revision dce232dfbb2244555299dffb3618a4724748d260
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen/* Copyright (C) 2006 Timo Sirainen */
33502e55a9bf4cafcd184ca9b114c126e420f856Timo Sirainen/* Lucene's default is 10000. Use it here also.. */
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen return false;
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainenclass DovecotAnalyzer : public standard::StandardAnalyzer {
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen TokenStream *tokenStream(const TCHAR *fieldName,
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen if (fieldName != 0 && wcscmp(fieldName, L"contents") != 0)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstruct lucene_index *lucene_index_init(const char *path)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstatic void lucene_index_close(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenvoid lucene_index_deinit(struct lucene_index *index)
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainenint lucene_index_select_mailbox(struct lucene_index *index,
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen STRCPY_AtoT(index->tmailbox_name, mailbox_name, len);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstatic int lucene_index_open(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->reader = IndexReader::open(index->path);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen i_error("lucene: IndexReader::open(%s): %s", index->path, err.what());
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstatic int lucene_index_open_search(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->searcher = _CLNEW IndexSearcher(index->reader);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstatic int lucene_doc_get_uid(struct lucene_index *index,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen TCHAR *uid = field == NULL ? NULL : field->stringValue();
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen i_error("lucene: Corrupted FTS index %s: No UID for document",
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen while (*uid != 0) {
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainenlucene_index_get_last_uid(struct lucene_index *index)
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen Term mailbox_term(_T("box"), index->tmailbox_name);
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen i_error("lucene: last_uid search for mailbox %s "
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen "returned multiple hits", index->mailbox_name);
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen for (int32_t i = 0; i < hits->length(); i++) {
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen /*if (del_id >= 0)
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen index->reader->deleteDocument(del_id);*/
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen i_error("lucene: last_uid search failed: %s", err.what());
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenint lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen bool exists = IndexReader::indexExists(index->path);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->writer = _CLNEW IndexWriter(index->path,
33502e55a9bf4cafcd184ca9b114c126e420f856Timo Sirainen index->writer->setMaxFieldLength(MAX_TERMS_PER_DOCUMENT);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstatic unsigned int utf8_strlen_n(const void *datap, size_t size)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen const unsigned char *data = (const unsigned char *)datap;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenstatic int lucene_index_build_flush(struct lucene_index *index)
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen i_error("lucene: IndexWriter::addDocument(%s) failed: %s",
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenint lucene_index_build_more(struct lucene_index *index, uint32_t uid,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen unsigned int len;
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen lucene_utf8towcs(dest, (const char *)data, len + 1);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->doc->add(*Field::Text(_T("uid"), tid));
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen index->doc->add(*Field::Text(_T("box"), index->tmailbox_name));
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen index->doc->add(*Field::Text(_T("contents"), dest));
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainenstatic int lucene_index_update_last_uid(struct lucene_index *index)
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen i_snprintf(id, sizeof(id), "%u", index->last_uid);
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen doc.add(*Field::Text(_T("last_uid"), _T("1")));
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen doc.add(*Field::Text(_T("box"), index->tmailbox_name));
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen //index->reader->deleteDocument(index->last_uid_doc_id);
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen i_error("lucene: IndexWriter::deleteDocument(%s) failed: %s",
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen i_error("lucene: IndexWriter::addDocument(%s) failed: %s",
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenint lucene_index_build_deinit(struct lucene_index *index)
dce232dfbb2244555299dffb3618a4724748d260Timo Sirainen /* no changes. */
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen i_error("lucene: IndexWriter::optimize(%s) failed: %s",
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen i_error("lucene: IndexWriter::close(%s) failed: %s",
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenint lucene_index_lookup(struct lucene_index *index, const char *key,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen unsigned int len = utf8_strlen_n(quoted_key, (size_t)-1);
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen content_query = QueryParser::parse(tkey, _T("contents"),
33502e55a9bf4cafcd184ca9b114c126e420f856Timo Sirainen i_info("lucene: QueryParser::parse(%s) failed: %s",
6380f2bc729a03b328793e8ad6ba7587620fa184Timo Sirainen Term mailbox_term(_T("box"), index->tmailbox_name);
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen for (int32_t i = 0; i < hits->length(); i++) {
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainenint lucene_index_filter(struct lucene_index *index, const char *key,
57f4445a46726a17bfe78b0964dd301a6ccb40ecTimo Sirainen /* FIXME: implement */