index-search.c revision c8750d1836c317670bf00b9c47eaa4df909c77c8
/* Copyright (c) 2002-2012 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "ioloop.h"
#include "array.h"
#include "istream.h"
#include "utc-offset.h"
#include "str.h"
#include "time-util.h"
#include "unichar.h"
#include "imap-match.h"
#include "message-address.h"
#include "message-date.h"
#include "message-search.h"
#include "message-parser.h"
#include "mail-index-modseq.h"
#include "index-storage.h"
#include "index-mail.h"
#include "index-sort.h"
#include "mail-search.h"
#include "mailbox-search-result-private.h"
#include "index-search-private.h"
#include <stdlib.h>
#include <ctype.h>
#define SEARCH_NOTIFY_INTERVAL_SECS 10
#define SEARCH_COST_DENTRY 3ULL
#define SEARCH_COST_ATTR 1ULL
#define SEARCH_COST_FILES_READ 25ULL
#define SEARCH_COST_KBYTE 15ULL
#define SEARCH_COST_CACHE 1ULL
#define SEARCH_MIN_NONBLOCK_USECS 200000
#define SEARCH_MAX_NONBLOCK_USECS 250000
#define SEARCH_INITIAL_MAX_COST 30000
#define SEARCH_RECALC_MIN_USECS 50000
struct search_header_context {
struct index_mail *imail;
struct mail_search_arg *args;
struct message_header_line *hdr;
unsigned int parse_headers:1;
unsigned int custom_header:1;
unsigned int threading:1;
};
struct search_body_context {
struct index_search_context *index_ctx;
struct message_part *part;
};
static void search_parse_msgset_args(unsigned int messages_count,
struct mail_search_arg *args,
static void ATTR_NULL(2)
{
}
struct index_search_context *ctx)
{
struct mailbox_metadata metadata;
bool match;
case SEARCH_SEQSET:
break;
case SEARCH_UIDSET:
case SEARCH_INTHREAD:
case SEARCH_FLAGS:
case SEARCH_KEYWORDS:
case SEARCH_MODSEQ:
break;
case SEARCH_MAILBOX_GUID:
&metadata) < 0) {
/* result will be unknown */
break;
}
else
break;
case SEARCH_MAILBOX:
case SEARCH_MAILBOX_GLOB:
break;
case SEARCH_ALL:
else
break;
default:
break;
}
}
struct index_search_context *ctx)
{
else
ARG_SET_RESULT(arg, 0);
}
}
struct mail_search_arg *arg)
{
const unsigned int *keyword_indexes;
unsigned int i, j, count;
/* there probably aren't many keywords, so O(n*m) for now */
for (i = 0; i < search_kws->count; i++) {
for (j = 0; j < count; j++) {
break;
}
if (j == count)
return 0;
}
return 1;
}
static bool
{
/* no private view (set by view syncing) -> no private flags */
return FALSE;
}
}
/* Returns >0 = matched, 0 = not matched, -1 = unknown */
struct mail_search_arg *arg,
const struct mail_index_record *rec)
{
int ret;
case SEARCH_UIDSET:
case SEARCH_INTHREAD:
case SEARCH_FLAGS:
/* recent flag shouldn't be set, but indexes from v1.0.x
may contain it. */
flags |= MAIL_RECENT;
flags &= ~pvt_flags_mask;
}
case SEARCH_KEYWORDS:
T_BEGIN {
} T_END;
return ret;
case SEARCH_MODSEQ: {
} else {
}
}
default:
return -1;
}
}
struct index_search_context *ctx)
{
const struct mail_index_record *rec;
case -1:
/* unknown */
break;
case 0:
ARG_SET_RESULT(arg, 0);
break;
default:
break;
}
}
/* Returns >0 = matched, 0 = not matched, -1 = unknown */
struct mail_search_arg *arg)
{
const char *str;
case SEARCH_MAILBOX:
&str) < 0)
return -1;
case SEARCH_MAILBOX_GLOB:
&str) < 0)
return -1;
default:
return -1;
}
}
struct index_search_context *ctx)
{
case -1:
/* unknown */
break;
case 0:
ARG_SET_RESULT(arg, 0);
break;
default:
break;
}
}
/* Returns >0 = matched, 0 = not matched, -1 = unknown */
struct mail_search_arg *arg)
{
const char *str;
int tz_offset;
bool have_tz_offset;
/* internal dates */
case SEARCH_BEFORE:
case SEARCH_ON:
case SEARCH_SINCE:
return -1;
break;
return -1;
break;
return -1;
break;
}
MAIL_SEARCH_ARG_FLAG_USE_TZ) == 0) {
if (!have_tz_offset) {
}
}
case SEARCH_BEFORE:
case SEARCH_ON:
case SEARCH_SINCE:
default:
/* unreachable */
break;
}
/* sizes */
case SEARCH_SMALLER:
case SEARCH_LARGER:
return -1;
else
case SEARCH_GUID:
return -1;
default:
return -1;
}
}
struct index_search_context *ctx)
{
case -1:
/* unknown */
break;
case 0:
ARG_SET_RESULT(arg, 0);
break;
default:
break;
}
}
{
int timezone_offset;
if (sent_value == NULL)
return 0;
/* NOTE: RFC-3501 specifies that timezone is ignored
in searches. sent_time is returned as UTC, so change it. */
&sent_time, &timezone_offset))
return 0;
switch (type) {
case SEARCH_BEFORE:
return sent_time < search_time;
case SEARCH_ON:
return sent_time >= search_time &&
case SEARCH_SINCE:
return sent_time >= search_time;
default:
i_unreached();
}
}
static struct message_search_context *
{
dtc) < 0)
/* we don't get here if arg is "", but dtc can be "" if it
only contains characters that we need to ignore. handle
those searches by returning them as non-matched. */
} T_END;
}
unsigned int src_len)
{
unsigned int i;
for (i = 0; i < src_len; i++) {
if (!prev_lwsp) {
}
} else {
}
}
}
struct search_header_context *ctx)
{
struct message_search_context *msg_search_ctx;
struct message_block block;
struct message_header_line hdr;
int ret;
/* first check that the field name matches to argument. */
case SEARCH_BEFORE:
case SEARCH_ON:
case SEARCH_SINCE:
return;
/* date is handled differently than others */
return;
}
}
return;
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
return;
break;
default:
return;
}
/* we're just testing existence of the field. always matches. */
return;
}
return;
}
/* We're searching only for values, so drop header name and middle
parts. We use header searching so that MIME words will be decoded. */
hdr.middle_len = 0;
if (msg_search_ctx == NULL)
return;
T_BEGIN {
struct message_address *addr;
case SEARCH_HEADER:
/* simple match */
break;
case SEARCH_HEADER_ADDRESS:
/* we have to match against normalized address */
(unsigned int)-1, TRUE);
break;
/* convert LWSP to single spaces */
break;
default:
i_unreached();
}
} T_END;
/* there may be multiple headers. don't mark this failed yet. */
if (ret > 0)
}
void *context ATTR_UNUSED)
{
case SEARCH_BEFORE:
case SEARCH_ON:
case SEARCH_SINCE:
break;
/* date header not found, so we match only for
NOT searches */
ARG_SET_RESULT(arg, 0);
}
break;
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
ARG_SET_RESULT(arg, 0);
break;
default:
break;
}
}
struct search_header_context *ctx)
{
/* end of headers, mark all unknown SEARCH_HEADERs unmatched */
ctx);
return;
}
return;
if (ctx->parse_headers)
}
}
struct search_body_context *ctx)
{
struct message_search_context *msg_search_ctx;
int ret;
case SEARCH_BODY:
case SEARCH_TEXT:
break;
default:
return;
}
if (msg_search_ctx == NULL) {
ARG_SET_RESULT(arg, 0);
return;
}
/* try again without cached parts */
}
}
struct index_search_context *ctx)
{
const enum message_header_parser_flags hdr_parser_flags =
struct mailbox_header_lookup_ctx *headers_ctx;
struct search_header_context hdr_ctx;
struct search_body_context body_ctx;
const char *const *headers;
int ret;
/* first check what we need to use */
if (!have_headers && !have_body)
return -1;
/* hdr_ctx.imail is different from imail for mails in
virtual mailboxes */
(!have_body ||
/* try to look up the specified headers from cache */
&input) < 0)
else {
search_header, &hdr_ctx);
}
} else if (have_headers) {
/* we need to read the entire header */
else {
if (hdr_ctx.parse_headers) {
}
search_header, &hdr_ctx);
}
}
if (headers_ctx != NULL)
if (failed) {
/* opening mail failed. maybe because of lookup_abort.
update access_parts for prefetching */
if (have_body)
else
return -1;
}
if (have_headers) {
/* see if the header search succeeded in finishing the search */
return ret;
}
return -1;
}
/* we didn't search headers. */
struct message_size hdr_size;
return -1;
}
}
static bool
search_msgset_fix_limits(unsigned int messages_count,
{
unsigned int count;
i_assert(messages_count > 0);
if (count > 0) {
/* "*" used, make sure the last message is in the range
(e.g. with count+1:* we still want to include it) */
}
/* remove all nonexistent messages */
(uint32_t)-1);
}
if (!match_not)
return array_count(seqset) > 0;
else {
/* if all messages are in the range, it can't match */
}
}
static void
search_msgset_fix(unsigned int messages_count,
{
unsigned int count;
*seq2_r = 0;
return;
}
if (!match_not) {
} else if (count == 0) {
/* matches all messages */
min_seq = 1;
} else {
*seq2_r = 0;
return;
}
}
}
static void search_or_parse_msgset_args(unsigned int messages_count,
struct mail_search_arg *args,
{
case SEARCH_SUB:
break;
case SEARCH_OR:
break;
case SEARCH_SEQSET:
break;
default:
break;
}
if (min_seq1 == 0) {
} else {
}
}
}
static void search_parse_msgset_args(unsigned int messages_count,
struct mail_search_arg *args,
{
case SEARCH_SUB:
break;
case SEARCH_OR:
/* go through our children and use the widest seqset
range */
break;
case SEARCH_SEQSET:
break;
default:
break;
}
}
}
{
if (uid_lowwater == 0)
return;
}
struct mail_search_arg *args,
{
enum mail_flags pvt_flags_mask;
if ((pvt_flags_mask & MAIL_SEEN) != 0)
if ((pvt_flags_mask & MAIL_DELETED) != 0)
}
return FALSE;
}
continue;
}
/* SEEN with 0 seen? */
return FALSE;
if (hdr_seen->seen_messages_count ==
/* UNSEEN with all seen? */
return FALSE;
/* SEEN with all seen */
/* UNSEEN with lowwater limiting */
}
}
/* DELETED with 0 deleted? */
hdr_del->deleted_messages_count == 0)
return FALSE;
if (hdr_del->deleted_messages_count ==
/* UNDELETED with all deleted? */
return FALSE;
/* DELETED with all deleted */
/* DELETED with lowwater limiting */
}
}
}
}
unsigned int messages_count,
struct mail_search_arg *args)
{
if (messages_count == 0) {
/* no messages, don't check sequence ranges. although we could
give error message then for FETCH, we shouldn't do it for
UID FETCH. */
return;
}
}
/* no matches */
return;
}
/* UNSEEN and DELETED in root search level may limit the range */
/* no matches */
}
}
{
struct mail_thread_iterate_context *child_iter;
const struct mail_thread_child_node *node;
int ret = 0;
if (child_iter != NULL) {
ret = -1;
}
}
if (mail_thread_iterate_deinit(&iter) < 0)
ret = -1;
return ret;
}
struct mail_search_arg *arg)
{
const struct mail_thread_child_node *node;
int ret = 0;
/* mail_search_args_init() must have been called by now */
return -1;
/* failed earlier */
return -1;
}
if (array_count(search_uids) == 0) {
/* search found nothing - no threads can match */
return 0;
}
if (child_iter != NULL) {
&thread_uids) < 0)
ret = -1;
}
/* yes, we want this thread */
}
}
if (mail_thread_iterate_deinit(&iter) < 0)
ret = -1;
return ret;
}
struct mail_search_arg *arg)
{
int ret = 0;
case SEARCH_OR:
case SEARCH_SUB:
ret = -1;
break;
case SEARCH_INTHREAD:
ret = -1;
break;
default:
break;
}
}
return ret;
}
static void
const enum mail_sort_type *sort_program,
struct mailbox_header_lookup_ctx *wanted_headers,
enum mail_fetch_field *wanted_fields_r,
struct mailbox_header_lookup_ctx **headers_ctx_r)
{
const char *header;
unsigned int i;
*wanted_fields_r = 0;
*headers_ctx_r = NULL;
for (i = 0; sort_program[i] != MAIL_SORT_END; i++) {
switch (sort_program[i] & MAIL_SORT_MASK) {
case MAIL_SORT_ARRIVAL:
break;
case MAIL_SORT_CC:
header = "Cc";
break;
case MAIL_SORT_DATE:
break;
case MAIL_SORT_FROM:
header = "From";
break;
case MAIL_SORT_SIZE:
break;
case MAIL_SORT_SUBJECT:
header = "Subject";
break;
case MAIL_SORT_TO:
header = "To";
break;
}
}
if (wanted_headers != NULL) {
}
if (array_count(&headers) > 0) {
}
}
struct mail_search_context *
struct mail_search_args *args,
const enum mail_sort_type *sort_program,
struct mailbox_header_lookup_ctx *wanted_headers)
{
struct index_search_context *ctx;
struct mailbox_status status;
i_fatal("gettimeofday() failed: %m");
sizeof(void *), 5);
if (args->have_inthreads) {
}
if (sort_program != NULL) {
} else if (wanted_headers != NULL) {
}
/* Need to reset results for match_always cases */
}
static void ATTR_NULL(2)
void *context ATTR_UNUSED)
{
if (search_ctx != NULL) {
}
}
{
int ret;
}
return ret;
}
static unsigned long long
{
}
{
int ret;
if (ret < 0)
return ret;
}
{
struct mail_search_arg *subarg;
case SEARCH_OR:
case SEARCH_SUB:
/* they're static only if all subargs are static */
if (!search_arg_is_static(subarg))
return FALSE;
}
return TRUE;
case SEARCH_SEQSET:
/* changes between syncs, but we can't really handle this
currently. seqsets should be converted to uidsets first. */
case SEARCH_FLAGS:
case SEARCH_KEYWORDS:
case SEARCH_MODSEQ:
case SEARCH_INTHREAD:
break;
case SEARCH_ALL:
case SEARCH_UIDSET:
case SEARCH_BEFORE:
case SEARCH_ON:
case SEARCH_SINCE:
case SEARCH_SMALLER:
case SEARCH_LARGER:
case SEARCH_HEADER:
case SEARCH_HEADER_ADDRESS:
case SEARCH_BODY:
case SEARCH_TEXT:
case SEARCH_GUID:
case SEARCH_MAILBOX:
case SEARCH_MAILBOX_GUID:
case SEARCH_MAILBOX_GLOB:
return TRUE;
}
return FALSE;
}
{
if (search_arg_is_static(arg))
}
}
{
return TRUE;
}
return FALSE;
}
{
if (match == 0 &&
/* if there are saved search results remember
that this message never matches */
}
}
{
static enum mail_lookup_abort cache_lookups[] = {
};
unsigned int i, n = N_ELEMENTS(cache_lookups);
int ret = -1;
if (ctx->have_mailbox_args) {
/* check that the mailbox name matches.
this makes sense only with virtual mailboxes. */
}
/* avoid doing extra work for as long as possible */
/* we're doing prefetching. if we have to read the mail,
do a prefetch first and the final search later */
n--;
}
for (i = 0; i < n && ret < 0; i++) {
}
return ret;
}
struct index_search_context *ctx)
{
float percentage;
/* set the search time in here, in case a plugin
already spent some time indexing the mailbox */
&ctx->search_start_time);
T_BEGIN {
const char *text;
"ETA %d:%02d", (int)percentage,
} T_END;
}
}
{
unsigned long long guess_cost;
long long usecs;
bool ret;
return FALSE;
i_fatal("gettimeofday() failed: %m");
if (usecs < 0) {
/* clock moved backwards. */
return TRUE;
} else if (usecs < SEARCH_MIN_NONBLOCK_USECS) {
/* not finished yet. estimate the next time lookup */
} else {
/* done, or close enough anyway */
}
(SEARCH_MAX_NONBLOCK_USECS / (double)usecs);
if (usecs < SEARCH_RECALC_MIN_USECS) {
/* the estimate may not be very good since we spent
so little time doing this search. don't allow huge changes
to the guess, but allow anyway large enough so that we can
move to right direction. */
}
if (ret)
return ret;
}
{
if (search_would_block(ctx)) {
/* this lookup is useful when a large number of
messages match */
return 0;
}
ret = -1;
T_BEGIN {
} T_END;
if (match < 0) {
/* result isn't known yet, do a prefetch and
finish later */
}
if (match != 0) {
ret = 1;
break;
}
if (search_would_block(ctx)) {
ret = 0;
break;
}
}
return ret;
}
{
struct index_mail *imail;
unsigned int count;
return NULL;
return mail;
}
{
unsigned int count;
int ret = 0;
if (ret <= 0)
break;
/* don't prefetch when using a sort program,
since the mails' access order will change */
return 1;
}
/* no prefetching done, return it immediately */
return 1;
}
ctx->unused_mail_idx++;
}
if (ret == 0) {
/* wait */
return 0;
}
if (ctx->unused_mail_idx == 0) {
/* finished */
return -1;
}
} else {
/* prefetch buffer is full. */
}
/* return the next message */
if (--ctx->unused_mail_idx > 0) {
}
return 1;
}
struct index_mail *imail)
{
int ret;
return ret > 0;
}
{
struct index_mail *imail;
int ret;
break;
/* searching wasn't finished yet */
break;
/* search finished as non-match */
}
return ret;
}
{
int ret;
*tryagain_r = FALSE;
if (ret == 0) {
*tryagain_r = TRUE;
return FALSE;
}
if (ret < 0)
return FALSE;
return TRUE;
}
if (ret == 0) {
*tryagain_r = TRUE;
return FALSE;
}
/* finished searching the messages. now sort them and start
returning the messages. */
return FALSE;
}
/* everything searched at this point already. just returning
matches from sort list */
return FALSE;
return TRUE;
}
{
int ret;
/* first time */
} else {
}
}
ret = 0;
/* check if the sequence matches */
is done. mail_set_seq() can be a bit slow. */
}
/* see if this message never matches */
uid))
ret = 0;
}
if (ret != 0)
break;
/* doesn't, try next one */
}
/* we already know that the static data
matches. mark it as such. */
}
}
return ret != 0;
}