index-sort.c revision bfa67d2c560c3df1d30444b179364dcaaf4ff4a7
/* Copyright (C) 2006 Timo Sirainen */
/* The idea in here is that for each used primary sort condition there's
a 32bit integer in the index file which specifies the sort order. So when
sorting we simply look up the sort IDs and sort the messages by them.
Sort IDs are allocated in two ways:
1) Time and size fields can be directly used as sort IDs, so we simply
use them directly as the missing sort IDs.
2) Strings can't be used as sort IDs directly. The way they're currently
handled is that the whole 32bit integer space is used for them and whenever
adding a string, the available space is halved and the new ID is added in
the middle. For example if we add one mail the first time, it gets ID
2^31. If we then add two mails which are sorted before the first one, they
get IDs 2^31/3 and 2^31/3*2. Once we run out of the available space between
IDs, a large amount of the IDs are renumbered.
We try to avoid looking at mails' contents as much as possible. For case 1)
IDs it's simple because we need to get only the new mails' sort fields once
and use them as sort IDs. For case 2) we'll have to start looking at the
strings from older mails as well. To minimize this, we first sort the
existing sort IDs. After that we start inserting the new mails into the
sorted array by looking the position using binary search. This minimizes
the number of lookups we have to do for the old mails. Usually only a few
mails have been added, so this should be faster than other sort methods.
*/
#include "lib.h"
#include "array.h"
#include "bsearch-insert-pos.h"
#include "str.h"
#include "unichar.h"
#include "message-address.h"
#include "imap-base-subject.h"
#include "index-storage.h"
#include "index-sort.h"
#include <stdlib.h>
#define RENUMBER_SPACE 100
struct mail_sort_node {
};
struct mail_search_sort_program {
struct mailbox_transaction_context *t;
const char *primary_sort_header;
const struct mail_sort_node *nodes_ptr;
unsigned int nodes_count, iter_idx;
unsigned int reverse:1;
unsigned int skipped_mails:1;
unsigned int sort_ids_added:1;
};
struct sort_cmp_context {
struct mail_search_sort_program *program;
enum mail_sort_type cache_type;
const char *cache_str;
};
static struct sort_cmp_context static_node_cmp_context;
struct mail_search_sort_program *
const enum mail_sort_type *sort_program)
{
struct mail_search_sort_program *program;
const char *name;
unsigned int i;
return NULL;
/* we support internal sorting by the primary condition */
program->t = t;
for (i = 0; i < MAX_SORT_PROGRAM_SIZE; i++) {
if (sort_program[i] == MAIL_SORT_END)
break;
}
if (i == MAX_SORT_PROGRAM_SIZE)
i_panic("index_sort_program_init(): Invalid sort program");
case MAIL_SORT_ARRIVAL:
name = "rdate";
break;
case MAIL_SORT_CC:
name = "sort-c";
break;
case MAIL_SORT_DATE:
name = "date";
break;
case MAIL_SORT_FROM:
name = "sort-f";
break;
case MAIL_SORT_SIZE:
name = "size";
break;
case MAIL_SORT_SUBJECT:
name = "sort-s";
break;
case MAIL_SORT_TO:
name = "sort-t";
break;
default:
i_unreached();
}
return program;
}
{
}
{
struct message_address *addr;
const char *str;
return "";
(const unsigned char *)str,
}
static const char *
{
const char *str;
switch (sort_type & MAIL_SORT_MASK) {
case MAIL_SORT_SUBJECT:
case MAIL_SORT_CC:
break;
case MAIL_SORT_FROM:
break;
case MAIL_SORT_TO:
break;
default:
i_unreached();
}
}
const enum mail_sort_type *sort_program,
const struct mail_sort_node *n1,
const struct mail_sort_node *n2)
{
enum mail_sort_type sort_type;
int ret = 0;
switch (sort_type) {
case MAIL_SORT_CC:
case MAIL_SORT_FROM:
case MAIL_SORT_TO:
case MAIL_SORT_SUBJECT:
t_push();
t_pop();
break;
case MAIL_SORT_ARRIVAL:
else {
}
break;
case MAIL_SORT_DATE:
else {
}
break;
case MAIL_SORT_SIZE:
else {
}
break;
case MAIL_SORT_END:
case MAIL_SORT_MASK:
case MAIL_SORT_FLAG_REVERSE:
i_unreached();
}
if (ret == 0)
if ((*sort_program & MAIL_SORT_FLAG_REVERSE) != 0)
return ret;
}
{
return -1;
return 1;
}
{
}
static void
{
struct index_transaction_context *t =
(struct index_transaction_context *)program->t;
const struct mail_sort_node *nodes;
unsigned int i, count;
for (i = 0; i < count; i++) {
continue;
}
}
static int
unsigned int idx2)
{
struct mail_sort_node *nodes;
unsigned int i, count;
const char *last_str = "";
const char *str;
unsigned int skip;
int ret = 1;
t_push();
idx2--;
}
idx1++;
}
else {
/* divide the available space so that each message gets
an equal sized share. leave the same sized space
also between the first and the last messages */
/* we ran out of ID space. have to renumber
the IDs. */
ret = 0;
break;
}
str_truncate(prev_str, 0);
}
}
t_pop();
return ret;
}
static void
unsigned int idx)
{
struct index_transaction_context *t =
(struct index_transaction_context *)program->t;
struct mail_sort_node *nodes;
unsigned int i, count;
/* space is running out, lets just renumber everything */
sort_id = 0;
for (i = 0; i < idx; i++) {
if (sort_id != prev_sort_id)
}
} else {
}
if (sort_id != prev_sort_id) {
}
}
}
}
static void
{
const struct mail_sort_node *nodes;
unsigned int i, j, count;
for (i = 0; i < count; i++) {
for (j = i + 1; j < count; j++) {
break;
}
i == 0 ? 0 : i-1,
}
}
}
{
}
{
}
{
return 0;
/* FIXME: elsewhere we support 64bit message sizes, but here
we support only 32bit sizes.. It's a bit too much trouble
to support 64bit here currently, so until such messages
actually start showing up somewhere, 32bit is enough */
}
{
struct mail_sort_node node;
case MAIL_SORT_ARRIVAL:
break;
case MAIL_SORT_DATE:
break;
case MAIL_SORT_SIZE:
break;
default:
i_unreached();
}
/* add the missing nodes with their sort_ids */
}
/* @UNSAFE: and sort them */
sizeof(struct mail_sort_node), sort_node_cmp);
}
{
switch (ctx->cache_type) {
case MAIL_SORT_ARRIVAL:
break;
case MAIL_SORT_DATE:
break;
case MAIL_SORT_SIZE:
break;
default:
break;
}
}
{
const struct mail_sort_node *cnodes;
/* we wish to avoid reading the actual headers as much as possible.
first sort the nodes which already have sort_ids, then start
inserting the new nodes by finding their insertion position with
binary search */
/* @UNSAFE */
}
&idx);
}
}
{
struct mail_sort_node node;
const void *data;
unsigned int i, first_missing_sort_id_seq;
if (i == 0) {
/* we're building the array from scratch. add here only the
messages that have sort_ids set. */
program->last_sorted_seq = 0;
for (; i < last_seq; i++) {
return -1;
/* the rest don't have sort_ids either */
break;
}
}
}
first_missing_sort_id_seq = i + 1;
case MAIL_SORT_ARRIVAL:
case MAIL_SORT_DATE:
case MAIL_SORT_SIZE:
break;
default:
break;
}
return 0;
}
const struct mail_sort_node *node)
{
const struct mail_sort_node *nodes;
sizeof(*node), sort_node_cmp,
&idx);
}
{
struct index_transaction_context *t =
(struct index_transaction_context *)program->t;
const struct mail_index_header *hdr;
const void *data;
struct mail_sort_node node;
int ret;
/* we're still on the fast path using sort_ids from the
index file */
return -1;
return 0;
}
} else {
}
/* sort_ids are missing, have to generate them */
if (!program->skipped_mails) {
/* as long as we think we're returning all the mails sorted,
which is the common case, we want to avoid duplicating the
node array. so here we just keep counting the sequences
until either we skip a sequence or we reach list_finish() */
return 0;
}
/* we're not returning all the mails. have to create a temporary array
for all the nodes so we can set all the missing sort_ids. */
if (ret < 0)
return -1;
/* add the nodes in the middle */
return -1;
}
/* and add this last node */
}
{
/* nodes array contains a contiguous range of sequences from
the beginning, with the last ones missing sort_id. we can
just sort the array directly without copying it. */
return -1;
}
return 0;
}
{
const struct mail_sort_node *node;
return 0;
} else {
return 0;
}
return 1;
}