squat-trie.c revision 45114a7f9600d04c294f8f1051c0289c092febce
/* Copyright (c) 2007-2008 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "array.h"
#include "str.h"
#include "read-full.h"
#include "istream.h"
#include "ostream.h"
#include "unichar.h"
#include "nfs-workarounds.h"
#include "file-cache.h"
#include "seq-range-array.h"
#include "squat-uidlist.h"
#include "squat-trie-private.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define DEFAULT_NORMALIZE_MAP_CHARS \
"EOTIRSACDNLMVUGPHBFWYXKJQZ0123456789@.-+#$%_&"
#define DEFAULT_PARTIAL_LEN 4
#define DEFAULT_FULL_LEN 4
#define MAX_FAST_LEVEL 3
#define SEQUENTIAL_COUNT 46
#define TRIE_BYTES_LEFT(n) \
((n) * SQUAT_PACK_MAX_SIZE)
#define TRIE_READAHEAD_SIZE \
struct squat_trie_build_context {
struct squat_trie *trie;
unsigned int compress_nodes:1;
};
struct squat_trie_iterate_node {
struct squat_node *node;
unsigned int idx;
};
struct squat_trie_iterate_context {
struct squat_trie *trie;
struct squat_trie_iterate_node cur;
bool failed;
};
{
}
{
}
{
static unsigned char valid_chars[] =
unsigned int i, j;
sizeof(trie->default_normalize_map));
#if 1
unsigned char chr = valid_chars[i];
}
i_assert(j <= SEQUENTIAL_COUNT);
for (i = 128; i < 256; i++)
trie->default_normalize_map[i] = j++;
#else
for (i = 0; i < sizeof(valid_chars)-1; i++) {
unsigned char chr = valid_chars[i];
}
for (i = 128; i < 256; i++)
#endif
}
{
struct squat_node *children;
unsigned int i;
if (node->leaf_string_length > 0) {
if (NODE_IS_DYNAMIC_LEAF(node))
} else if (!node->children_not_mapped) {
trie->node_alloc_size -=
for (i = 0; i < node->child_count; i++)
}
}
struct squat_trie *
{
struct squat_trie *trie;
(flags & SQUAT_INDEX_FLAG_DOTLOCK_USE_EXCL) != 0;
return trie;
}
{
}
}
}
{
trie->locked_file_size = 0;
}
{
}
{
}
{
}
{
sizeof(trie->default_normalize_map));
}
{
return 0;
}
return -1;
}
return 0;
}
{
if (squat_trie_open_fd(trie) < 0)
return -1;
}
{
return 1;
return -1;
}
return 1;
return -1;
}
return 0;
}
return 1;
}
{
if (squat_trie_is_file_stale(trie) > 0)
(void)squat_trie_open(trie);
}
struct file_lock **file_lock_r,
{
int ret;
*file_lock_r = NULL;
for (;;) {
} else {
}
if (ret == 0) {
return 0;
}
if (ret < 0)
return -1;
/* if the trie has been compressed, we need to reopen the
file and try to lock again */
if (ret == 0)
break;
if (*file_lock_r != NULL)
else
if (ret < 0)
return -1;
if (squat_trie_open_fd(trie) < 0)
return -1;
return 0;
}
return 1;
}
static void
{
const unsigned int alloc_size =
struct squat_node *children;
unsigned char *chars;
unsigned int i;
for (i = 0; i < SEQUENTIAL_COUNT; i++)
chars[i] = i;
if (level < MAX_FAST_LEVEL) {
for (i = 0; i < SEQUENTIAL_COUNT; i++)
}
}
static unsigned int
{
unsigned char *chars;
if (node->want_sequential) {
if (chr < SEQUENTIAL_COUNT)
return chr;
}
node->child_count++;
if (old_child_count == 0) {
/* first child */
} else {
}
if (children != old_children) {
old_child_count * sizeof(struct squat_node));
}
}
}
static int
{
return 0;
return -1;
}
return 0;
}
static int
{
const unsigned char *child_chars;
unsigned int i, child_idx, child_count;
return -1;
return -1;
}
child_count = *data++;
return -1;
}
if (child_count == 0)
return 0;
child_chars = data;
data += child_count;
/* get child offsets */
for (i = 0; i < child_count; i++) {
/* we always start with !have_sequential, so at i=0 this
check always goes to add the first child */
child_idx = child_chars[i];
else {
level);
}
/* 1) child offset */
if (num == 0) {
/* no children */
} else {
if ((num & 1) != 0) {
} else {
}
return -1;
}
}
/* 2) uidlist */
if (child->uid_list_idx == 0) {
/* we don't write nodes with empty uidlists */
return -1;
}
/* 3) next uid */
} else {
}
/* 4) unused uids + leaf string flag */
if ((num & 1) != 0) {
/* leaf string */
unsigned int len;
unsigned char *dest;
/* 5) leaf string length */
if (!NODE_IS_DYNAMIC_LEAF(child))
else {
}
/* the string may be long -
recalculate the end pos */
size) < 0)
return -1;
node_offset + 1);
}
return -1;
}
}
}
/* we should never get this far */
return -1;
}
return 0;
}
static void
{
struct squat_node *children;
const unsigned char *chars;
unsigned int i;
for (i = 0; i < child_count; i++) {
/* 1) child offset */
if (node_offsets[i] == 0)
*bufp++ = 0;
else if (node_offsets[i] >= base_offset) {
base_offset = node_offsets[i];
} else {
base_offset = node_offsets[i];
}
/* 2) uidlist */
/* 3) next uid */
}
if (children[i].leaf_string_length == 0) {
/* 4a) unused uids */
} else {
i_assert(node_offsets[i] == 0);
/* 4b) unused uids + flag */
/* 5) leaf string length */
NODE_LEAF_STRING(&children[i]),
}
}
}
static inline void
struct squat_node *node)
{
/* duplicate */
return;
}
node->uid_list_idx =
}
static void
{
struct squat_node *child;
unsigned char *str;
i_assert(leafstr_len > 0);
/* make a copy of the leaf string and convert to normal node by
removing it. */
if (!NODE_IS_DYNAMIC_LEAF(node))
else {
}
node->leaf_string_length = 0;
/* create a new child node for the rest of the string */
/* update uidlist to contain all of parent's UIDs */
}
if (leafstr_len > 1) {
/* make the child a leaf string */
leafstr_len--;
if (!NODE_IS_DYNAMIC_LEAF(child)) {
} else {
}
}
}
static bool
struct squat_node *node,
{
unsigned int i;
if (data_len != leafstr_len) {
/* different lengths, can't match */
T_BEGIN {
} T_END;
return FALSE;
}
for (i = 0; i < data_len; i++) {
/* non-match */
T_BEGIN {
} T_END;
return FALSE;
}
}
return TRUE;
}
{
unsigned char *chars;
unsigned int idx;
int level = 0;
for (;;) {
if (node->children_not_mapped) {
return -1;
}
if (node->leaf_string_length != 0) {
/* the whole string must match or we need to split
the node */
return 0;
}
}
return -1;
}
/* child node's UIDs are relative to ours. so for example if
we're adding UID 4 and this node now has [2,4] UIDs,
unused_uids=3 and so the child node will be adding
UID 4-3 = 1. */
return 0;
level++;
if (node->have_sequential) {
if (*data < SEQUENTIAL_COUNT) {
goto found;
}
} else {
idx = 0;
}
goto found;
}
break;
data++;
}
/* create new children */
for (;;) {
uid = 0;
break;
if (!node->have_sequential) {
/* convert the node into a leaf string */
if (!NODE_IS_DYNAMIC_LEAF(node)) {
} else {
}
break;
}
}
return 0;
}
static int
{
unsigned int i;
i = 0;
else {
/* the first word is longer than others */
return -1;
i = 1;
}
for (; i < size; i++) {
return -1;
}
return 0;
}
static int
unsigned int size)
{
unsigned int i, j, bytelen;
if (char_lengths == NULL) {
/* optimization path: all characters are bytes */
}
i = 0;
else {
/* the first word is longer than others */
bytelen = 0;
return -1;
i = char_lengths[0];
}
for (; i < size; i += char_lengths[i]) {
bytelen = 0;
return -1;
}
return 0;
}
static unsigned char *
unsigned int size)
{
unsigned char *dest;
unsigned int i;
for (i = 0; i < size; i++) {
/* Don't index replacement character */
dest[i++] = 0;
dest[i++] = 0;
dest[i] = 0;
} else {
}
}
return dest;
}
static int
{
const unsigned char *data;
unsigned int i, start = 0;
bool multibyte_chars = FALSE;
int ret = 0;
for (i = 0; i < size; i++) {
if (char_lengths[i] != 1)
if (data[i] != '\0')
continue;
start++;
if (i != start) {
!multibyte_chars ? NULL :
i - start) < 0) {
ret = -1;
start = i;
break;
}
}
start = i + 1;
}
start++;
if (i != start) {
!multibyte_chars ? NULL :
ret = -1;
}
return ret;
}
{
int ret;
T_BEGIN {
} T_END;
return ret;
}
static void
{
unsigned char *chars;
/* move chars */
for (i = j = 0; i < orig_child_count; i++) {
if (children_src[i].next_uid != 0)
}
node->child_count = j;
/* move children. note that children_dest may point to different
location than children_src, although they both point to the
same node. */
for (i = j = 0; i < orig_child_count; i++) {
if (children_src[i].next_uid != 0)
children_dest[j++] = children_src[i];
else
}
}
static int
{
struct squat_node *children;
unsigned int i;
int ret;
return -1;
}
if (child_count == 0) {
node->leaf_string_length == 0);
return 0;
}
for (i = 0; i < child_count; i++) {
T_BEGIN {
} T_END;
if (ret < 0)
return -1;
}
return 0;
}
{
int ret;
return 0;
T_BEGIN {
} T_END;
if (ret < 0)
return -1;
return 0;
}
static struct squat_trie_iterate_context *
{
struct squat_trie_iterate_context *ctx;
return ctx;
}
static int
{
struct squat_trie_iterate_node *nodes;
unsigned int i, count;
for (i = 0; i < count; i++)
}
return ret;
}
static struct squat_node *
{
return NULL;
}
}
}
static struct squat_node *
{
struct squat_trie_iterate_node *iter_nodes;
struct squat_node *children;
unsigned int count, shift_count = 0;
{
if (count == 0)
return NULL;
}
/* no more non-empty children in this node */
}
}
if (shift_count != 0)
else
return squat_trie_iterate_first(ctx);
}
static void
struct squat_trie *trie,
{
if (!array_is_created(shifts_arr)) {
return;
}
/* we'll recalculate this */
node->unused_uids = 0;
(i == shift_count ||
{
if (uid_idx > 0 &&
/* we can merge this and the previous range */
&uid_count);
} else {
if (uid_idx == 0)
else {
node->unused_uids +=
}
uid_idx++;
}
}
break;
/* shift is entirely before UID range */
i++;
} else {
/* handle shifts before UID range */
}
/* update child shifts */
child_shift_count - 1);
/* if the shifts continue after the UID range,
treat it in the next loop iteration */
i++;
else
/* update UIDs - no matter where within the UID range
the shifts happened, the result is the same:
shift number of UIDs are removed, and the rest
are decreased by shift_sum.
123 uids child_shifts
a) s -> 12 1
b) s -> 12 2
c) s -> 12 3
*/
/* removed completely */
&uid_count);
} else if (do_shifts) {
/* the next loop iteration fixes the UIDs */
} else {
&uid_count);
}
}
if (!do_shifts) {
/* root node - UIDs are only removed, not shifted */
shift_sum = 0;
}
}
if (uid_count == 0) {
/* no UIDs left, delete the node's children and mark it
unused */
if (!NODE_IS_DYNAMIC_LEAF(node))
node->child_count = 0;
} else {
if (do_shifts)
else {
}
}
}
static int
struct squat_uidlist_rebuild_context *rebuild_ctx,
struct squat_trie_iterate_context *iter,
{
struct squat_node *node;
int ret = 0;
if (node->uid_list_idx == 0)
return 0;
if (array_count(expunged_uids) > 0)
do {
&uid_range) < 0) {
ret = -1;
break;
}
shift);
node->uid_list_idx =
return ret;
}
static int
struct squat_uidlist_rebuild_context *rebuild_ctx,
struct squat_trie_iterate_context *iter)
{
struct squat_node *node;
int ret = 0;
if (node->uid_list_idx == 0)
return 0;
/* rebuild the uidlist */
array_clear(&uids);
ret = -1;
break;
}
node->uid_list_idx =
}
}
array_free(&uids);
return ret;
}
static int
bool compress)
{
struct squat_trie_iterate_context *iter;
struct squat_uidlist_rebuild_context *rebuild_ctx;
int ret = 0;
compress, &rebuild_ctx)) <= 0)
return ret;
if (expunged_uids != NULL) {
} else {
}
/* lock the trie before we rename uidlist */
ret = -1;
}
{
return FALSE;
return FALSE;
}
return FALSE;
}
return TRUE;
}
{
int ret;
if (trie->locked_file_size == 0) {
/* newly created file */
return 1;
}
if (ret <= 0) {
if (ret < 0) {
return -1;
}
return 0;
}
} else {
return 0;
}
}
return -1;
}
}
}
{
bool changed;
int ret;
return -1;
}
if (ret == 0) {
else
}
trie->unmapped_child_count = 0;
} else {
}
}
/* do this while we're still locked */
}
if (ret < 0)
return -1;
}
{
int fd;
if (fd == -1) {
return -1;
}
i_error("fchown(%s, -1, %ld) failed: %m",
return -1;
}
}
return fd;
}
struct squat_trie_build_context **ctx_r)
{
struct squat_trie_build_context *ctx;
return -1;
}
/* uidlist locks building */
return -1;
return -1;
}
return 0;
}
{
return 0;
return -1;
return 0;
}
{
const char *path;
/* we might as well recreate the file */
if (fd == -1)
return -1;
&file_lock);
if (ret <= 0) {
if (ret == 0) {
i_error("file_wait_lock(%s) failed: %m",
path);
}
return -1;
}
}
} else {
/* we need to lock only while header is being written */
else {
if (squat_trie_write_lock(ctx) < 0) {
return -1;
}
}
}
/* write 1 byte guard at the end of file, so that we can verify broken
squat_unpack_num() input by checking if data==end */
ret = -1;
if (ret == 0)
if (ret == 0) {
o_stream_seek(output, 0);
}
if (output->last_failed_errno != 0) {
ret = -1;
}
if (fd == -1) {
/* appended to the existing file */
return ret;
}
/* recreating the trie file */
if (ret < 0) {
fd = -1;
ret = -1;
}
if (ret < 0) {
} else {
}
return ret;
}
{
int ret;
/* keep trie locked while header is being written and when files are
being renamed, so that while trie is read locked, uidlist can't
change under. */
if (ret == 0) {
if (ret < 0)
}
if (ret == 0)
if (unlock)
else
}
return ret;
}
{
if (squat_trie_open(trie) < 0)
return -1;
}
return 0;
}
static int
{
unsigned char *chars;
unsigned int idx;
int level = 0;
for (;;) {
if (node->children_not_mapped) {
return -1;
}
if (node->leaf_string_length != 0) {
const unsigned char *str;
else
return 0;
/* match */
break;
}
if (size == 0)
break;
level++;
if (node->have_sequential) {
if (*data < SEQUENTIAL_COUNT) {
goto found;
}
} else {
idx = 0;
}
goto found;
}
return 0;
/* follow to children */
if (level == 1) {
/* root level, add all UIDs */
uids) < 0)
return -1;
} else {
return -1;
}
data++;
size--;
}
return -1;
return 1;
}
static void
{
if (count == 0)
return;
if ((type & SQUAT_INDEX_TYPE_HEADER) != 0 &&
(type & SQUAT_INDEX_TYPE_BODY) != 0) {
/* everything is fine, just fix the UIDs */
for (i = 1; i < count; i++) {
/* we can continue the previous range */
} else {
}
}
return;
}
/* we'll have to drop either header or body UIDs */
for (i = 0; i < count; i++) {
}
}
}
struct squat_trie_lookup_context {
struct squat_trie *trie;
enum squat_index_type type;
bool first;
};
static int
unsigned int size)
{
int ret;
i = 0; char_idx = 0;
do {
bytelen = 0;
if (ret <= 0) {
return ret;
}
ctx->maybe_uids);
} else {
}
i += char_lengths[i];
char_idx++;
return 1;
}
{
unsigned int count;
/* increase the range */
} else {
}
}
static int
enum squat_index_type type,
{
struct squat_trie_lookup_context ctx;
unsigned char *data;
int ret = 0;
char_lengths[i] = bytes;
i += bytes;
}
if (data[i] != '\0')
continue;
/* string has nonindexed characters.
search it in parts. */
if (i != start) {
i - start);
}
start = i + char_lengths[i];
}
if (start == 0) {
if (ret > 0) {
}
} else {
}
/* we have the result */
} else {
i - start);
}
} else if (str_bytelen > 0) {
/* string has nonindexed characters. finish the search. */
i - start);
} else if (!searched) {
/* string has only nonindexed chars,
list all root UIDs as maybes */
}
} else {
/* zero string length - list all root UIDs as definite
answers */
}
return ret < 0 ? -1 : 0;
}
enum squat_index_type type,
{
int ret;
T_BEGIN {
} T_END;
return ret;
}
{
}
{
return trie->node_alloc_size;
}