squat-trie.c revision 78b806afb68cf1fed9dda6b45cbf7f8f2e08260d
45312f52ff3a3d4c137447be4c7556500c2f8bf2Timo Sirainen/* Copyright (c) 2007-2016 Dovecot authors, see the included COPYING file */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen "EOTIRSACDNLMVUGPHBFWYXKJQZ0123456789@.-+#$%_&"
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen struct squat_uidlist_build_context *uidlist_build_ctx;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned int idx;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ARRAY(struct squat_trie_iterate_node) parents;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic int squat_trie_map(struct squat_trie *trie, bool building);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenvoid squat_trie_delete(struct squat_trie *trie)
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainenstatic void squat_trie_set_corrupted(struct squat_trie *trie)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic void squat_trie_normalize_map_build(struct squat_trie *trie)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen static unsigned char valid_chars[] =
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen unsigned int i, j;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen for (i = 0, j = 1; i < sizeof(valid_chars)-1; i++) {
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen trie->default_normalize_map[chr-'A'+'a'] = chr;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen trie->default_normalize_map[i] = i_toupper(i);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic void node_free(struct squat_trie *trie, struct squat_node *node)
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen unsigned int i;
7a24bdc1a5e2d5368c2569b4852192f2bdb5a31fTimo Sirainensquat_trie_init(const char *path, uint32_t uidvalidity,
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen enum file_lock_method lock_method, enum squat_index_flags flags,
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen (flags & SQUAT_INDEX_FLAG_DOTLOCK_USE_EXCL) != 0;
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen trie->dotlock_set.nfs_flush = (flags & SQUAT_INDEX_FLAG_NFS_FLUSH) != 0;
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen trie->dotlock_set.timeout = SQUAT_TRIE_LOCK_TIMEOUT;
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen trie->dotlock_set.stale_timeout = SQUAT_TRIE_DOTLOCK_STALE_TIMEOUT;
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen trie->default_partial_len = DEFAULT_PARTIAL_LEN;
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainenstatic void squat_trie_close_fd(struct squat_trie *trie)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen if (munmap(trie->mmap_base, trie->mmap_size) < 0)
939a0d82523538b2de38a02bc9f790a67b7ebf47Timo Sirainenstatic void squat_trie_close(struct squat_trie *trie)
939a0d82523538b2de38a02bc9f790a67b7ebf47Timo Sirainenvoid squat_trie_deinit(struct squat_trie **_trie)
939a0d82523538b2de38a02bc9f790a67b7ebf47Timo Sirainenvoid squat_trie_set_partial_len(struct squat_trie *trie, unsigned int len)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainenvoid squat_trie_set_full_len(struct squat_trie *trie, unsigned int len)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainenstatic void squat_trie_header_init(struct squat_trie *trie)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen trie->hdr.partial_len = trie->default_partial_len;
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen memcpy(trie->hdr.normalize_map, trie->default_normalize_map,
939a0d82523538b2de38a02bc9f790a67b7ebf47Timo Sirainenstatic int squat_trie_open_fd(struct squat_trie *trie)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen file_cache_set_fd(trie->file_cache, trie->fd);
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainenstatic int squat_trie_is_file_stale(struct squat_trie *trie)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen if ((trie->flags & SQUAT_INDEX_FLAG_NFS_FLUSH) != 0)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen if (st.st_ino == st2.st_ino && CMP_DEV_T(st.st_dev, st2.st_dev)) {
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen i_assert(trie->locked_file_size >= trie->data_size);
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainenint squat_trie_refresh(struct squat_trie *trie)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainenstatic int squat_trie_lock(struct squat_trie *trie, int lock_type,
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen if (trie->lock_method != FILE_LOCK_METHOD_DOTLOCK) {
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen ret = file_wait_lock(trie->fd, trie->path, lock_type,
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen i_error("squat trie %s: Locking timed out", trie->path);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* if the trie has been compressed, we need to reopen the
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen file and try to lock again */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if ((trie->flags & SQUAT_INDEX_FLAG_NFS_FLUSH) != 0)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen nfs_flush_read_cache_locked(trie->path, trie->fd);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainennode_make_squential(struct squat_trie *trie, struct squat_node *node, int level)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen const unsigned int alloc_size =
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned char *chars;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned int i;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen for (i = 0; i < SEQUENTIAL_COUNT; i++)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen for (i = 0; i < SEQUENTIAL_COUNT; i++)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic unsigned int
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainennode_add_child(struct squat_trie *trie, struct squat_node *node,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned int old_child_count = node->child_count;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen unsigned char *chars;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen new_size = NODE_CHILDREN_ALLOC_SIZE(node->child_count);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* first child */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen old_size = NODE_CHILDREN_ALLOC_SIZE(old_child_count);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen node->children.data = i_realloc(node->children.data,
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen old_children = (void *)(NODE_CHILDREN_CHARS(node) +
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainentrie_file_cache_read(struct squat_trie *trie, size_t offset, size_t size)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (file_cache_read(trie->file_cache, offset, size) < 0) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen trie->data = file_cache_get_map(trie->file_cache, &trie->data_size);
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainennode_read_children(struct squat_trie *trie, struct squat_node *node, int level)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen const unsigned char *child_chars;
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen i_assert(trie->data_size <= trie->locked_file_size);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (trie_file_cache_read(trie, node_offset, TRIE_READAHEAD_SIZE) < 0)
db0735f9b388c5bcfb781b1b25015e898d63d953Timo Sirainen if (unlikely(node_offset >= trie->data_size)) {
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen data = CONST_PTR_OFFSET(trie->data, node_offset);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen end = CONST_PTR_OFFSET(trie->data, trie->data_size);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen if (unlikely(node_offset + child_count >= trie->data_size)) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* get child offsets */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen for (i = 0; i < child_count; i++) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* we always start with !have_sequential, so at i=0 this
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen check always goes to add the first child */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen if (node->have_sequential && child_chars[i] < SEQUENTIAL_COUNT)
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen child_idx = node_add_child(trie, node, child_chars[i],
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* 1) child offset */
e3736b5d480878031c386ac55d201fcf08e68766Timo Sirainen /* no children */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* 2) uidlist */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen child->uid_list_idx = squat_unpack_num(&data, end);
e3736b5d480878031c386ac55d201fcf08e68766Timo Sirainen /* we don't write nodes with empty uidlists */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen if (!UIDLIST_IS_SINGLETON(child->uid_list_idx)) {
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* 3) next uid */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen child->next_uid = squat_unpack_num(&data, end) + 1;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* 4) unused uids + leaf string flag */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* leaf string */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen unsigned int len;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen unsigned char *dest;
a21823d90cee6a18aeab0378637472c7e3fbbab2Timo Sirainen /* 5) leaf string length */
a21823d90cee6a18aeab0378637472c7e3fbbab2Timo Sirainen /* the string may be long -
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen recalculate the end pos */
a21823d90cee6a18aeab0378637472c7e3fbbab2Timo Sirainen size = len + TRIE_BYTES_LEFT(child_count - i);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* we should never get this far */
e3736b5d480878031c386ac55d201fcf08e68766Timo Sirainennode_write_children(struct squat_trie_build_context *ctx,
a21823d90cee6a18aeab0378637472c7e3fbbab2Timo Sirainen struct squat_node *node, const uoff_t *node_offsets)
d516e6848ecfbc7381abe9414fd8011fdf9d8c95Timo Sirainen const unsigned char *chars;
a21823d90cee6a18aeab0378637472c7e3fbbab2Timo Sirainen uint8_t child_count, buf[SQUAT_PACK_MAX_SIZE * 5], *bufp;
d516e6848ecfbc7381abe9414fd8011fdf9d8c95Timo Sirainen unsigned int i;
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen o_stream_nsend(ctx->output, chars, child_count);
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen for (i = 0; i < child_count; i++) {
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen /* 1) child offset */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* 2) uidlist */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen squat_pack_num(&bufp, children[i].uid_list_idx);
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen if (!UIDLIST_IS_SINGLETON(children[i].uid_list_idx)) {
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* 3) next uid */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen squat_pack_num(&bufp, children[i].next_uid - 1);
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* 4a) unused uids */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen squat_pack_num(&bufp, children[i].unused_uids << 1);
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* 4b) unused uids + flag */
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen squat_pack_num(&bufp, (children[i].unused_uids << 1) | 1);
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen /* 5) leaf string length */
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen squat_pack_num(&bufp, children[i].leaf_string_length - 1);
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainenstatic inline void
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainennode_add_uid(struct squat_trie_build_context *ctx, uint32_t uid,
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen /* duplicate */
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen squat_uidlist_build_add_uid(ctx->uidlist_build_ctx,
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainennode_split_string(struct squat_trie_build_context *ctx, struct squat_node *node)
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen unsigned char *str;
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen unsigned int uid, idx, leafstr_len = node->leaf_string_length;
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen /* make a copy of the leaf string and convert to normal node by
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen removing it. */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen memcpy(str, node->children.static_leaf_string, leafstr_len);
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen memcpy(str, node->children.leaf_string, leafstr_len);
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen /* create a new child node for the rest of the string */
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen idx = node_add_child(ctx->trie, node, str[0], MAX_FAST_LEVEL);
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen /* update uidlist to contain all of parent's UIDs */
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen child->next_uid = node->next_uid - node->unused_uids;
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen squat_uidlist_build_add_uid(ctx->uidlist_build_ctx,
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen i_assert(!child->have_sequential && child->children.data == NULL);
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen /* make the child a leaf string */
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen child->children.leaf_string = i_malloc(leafstr_len);
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainennode_leaf_string_add_or_split(struct squat_trie_build_context *ctx,
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen const unsigned char *data, unsigned int data_len)
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen const unsigned char *str = NODE_LEAF_STRING(node);
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen const unsigned int leafstr_len = node->leaf_string_length;
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen unsigned int i;
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen /* different lengths, can't match */
3b49aee9ced3b0370a3be396aca53acd5f21418cTimo Sirainen for (i = 0; i < data_len; i++) {
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* non-match */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainenstatic int squat_build_add(struct squat_trie_build_context *ctx, uint32_t uid,
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen unsigned char *chars;
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen unsigned int idx;
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen if (unlikely(node_read_children(trie, node, level) < 0))
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* the whole string must match or we need to split
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen if (node_leaf_string_add_or_split(ctx, node, data,
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* child node's UIDs are relative to ours. so for example if
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen we're adding UID 4 and this node now has [2,4] UIDs,
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen unused_uids=3 and so the child node will be adding
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen UID 4-3 = 1. */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen i_assert(node->child_count >= SEQUENTIAL_COUNT);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* create new children */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* convert the node into a leaf string */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen memcpy(node->children.leaf_string, data, len);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainensquat_build_word_bytes(struct squat_trie_build_context *ctx, uint32_t uid,
41955c400476941fa274f18b106a5922866fd780Timo Sirainen unsigned int i;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen if (trie->hdr.full_len <= trie->hdr.partial_len)
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* the first word is longer than others */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen for (; i < size; i++) {
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainensquat_build_word(struct squat_trie_build_context *ctx, uint32_t uid,
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen const unsigned char *data, const uint8_t *char_lengths,
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen unsigned int size)
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen unsigned int i, j, bytelen;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* optimization path: all characters are bytes */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen return squat_build_word_bytes(ctx, uid, data, size);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen if (trie->hdr.full_len <= trie->hdr.partial_len)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen /* the first word is longer than others */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen for (j = 0; j < trie->hdr.full_len && bytelen < size; j++)
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen if (squat_build_add(ctx, uid, data, bytelen) < 0)
41955c400476941fa274f18b106a5922866fd780Timo Sirainen for (j = 0; j < trie->hdr.partial_len && i+bytelen < size; j++)
41955c400476941fa274f18b106a5922866fd780Timo Sirainen if (squat_build_add(ctx, uid, data + i, bytelen) < 0)
41955c400476941fa274f18b106a5922866fd780Timo Sirainenstatic unsigned char *
41955c400476941fa274f18b106a5922866fd780Timo Sirainensquat_data_normalize(struct squat_trie *trie, const unsigned char *data,
41955c400476941fa274f18b106a5922866fd780Timo Sirainen unsigned int size)
41955c400476941fa274f18b106a5922866fd780Timo Sirainen static const unsigned char replacement_utf8[] = { 0xef, 0xbf, 0xbd };
41955c400476941fa274f18b106a5922866fd780Timo Sirainen unsigned char *dest;
41955c400476941fa274f18b106a5922866fd780Timo Sirainen unsigned int i;
41955c400476941fa274f18b106a5922866fd780Timo Sirainen for (i = 0; i < size; i++) {
41955c400476941fa274f18b106a5922866fd780Timo Sirainen if (data[i] == replacement_utf8[0] && i + 2 < size &&
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* Don't index replacement character */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainensquat_trie_build_more_real(struct squat_trie_build_context *ctx,
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen const unsigned char *input, unsigned int size)
41955c400476941fa274f18b106a5922866fd780Timo Sirainen const unsigned char *data;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen unsigned int i, start = 0;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen uid = uid * 2 + (type == SQUAT_INDEX_TYPE_HEADER ? 1 : 0);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen data = squat_data_normalize(trie, input, size);
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen for (i = 0; i < size; i++) {
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen char_lengths[i] = uni_utf8_char_bytes(input[i]);
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainenint squat_trie_build_more(struct squat_trie_build_context *ctx,
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen const unsigned char *input, unsigned int size)
5b486fdbf2077a994337dc8bd4477ec51d5daf4eTimo Sirainen ret = squat_trie_build_more_real(ctx, uid, type, input, size);
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainennode_drop_unused_children(struct squat_trie *trie, struct squat_node *node)
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen unsigned char *chars;
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen struct squat_node *children_src, *children_dest;
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen unsigned int i, j, orig_child_count = node->child_count;
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen /* move chars */
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen for (i = j = 0; i < orig_child_count; i++) {
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen /* move children. note that children_dest may point to different
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen location than children_src, although they both point to the
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen same node. */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen for (i = j = 0; i < orig_child_count; i++) {
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainensquat_write_node(struct squat_trie_build_context *ctx, struct squat_node *node,
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen unsigned int i;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen if (node->children_not_mapped && ctx->compress_nodes) {
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen if (node_read_children(trie, node, MAX_FAST_LEVEL) < 0)
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen *node_offset_r = !node->children_not_mapped ? 0 :
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen for (i = 0; i < child_count; i++) {
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainenstatic int squat_write_nodes(struct squat_trie_build_context *ctx)
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen ret = squat_write_node(ctx, &ctx->trie->root, &node_offset, 0);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen trie->hdr.root_unused_uids = trie->root.unused_uids;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen trie->hdr.root_next_uid = trie->root.next_uid;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen trie->hdr.root_uidlist_idx = trie->root.uid_list_idx;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainensquat_trie_iterate_init(struct squat_trie *trie)
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen ctx = i_new(struct squat_trie_iterate_context, 1);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen i_array_init(&ctx->parents, trie->hdr.partial_len*2);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainensquat_trie_iterate_deinit(struct squat_trie_iterate_context *ctx)
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainenstatic struct squat_node *
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainensquat_trie_iterate_first(struct squat_trie_iterate_context *ctx)
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen if (node_read_children(ctx->trie, ctx->cur.node, 1) < 0) {
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainenstatic struct squat_node *
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainensquat_trie_iterate_next(struct squat_trie_iterate_context *ctx,
2b9fd042e701cfe7d79c4294a5ab401d6ec9ce18Timo Sirainen while (ctx->cur.idx == ctx->cur.node->child_count ||
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen iter_nodes = array_get_modifiable(&ctx->parents, &count);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen children = NODE_CHILDREN_NODES(ctx->cur.node);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen while (children[ctx->cur.idx++].uid_list_idx == 0) {
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen if (ctx->cur.idx == ctx->cur.node->child_count) {
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* no more non-empty children in this node */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen return squat_trie_iterate_next(ctx, shifts_r);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen memset(&ctx->cur.shifts, 0, sizeof(ctx->cur.shifts));
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainensquat_uidlist_update_expunged_uids(const ARRAY_TYPE(seq_range) *shifts_arr,
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen unsigned int i, uid_idx, uid_count, shift_count;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen uint32_t child_shift_seq1, child_shift_count, seq_high;
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen i_assert(node->uid_list_idx != 0 || node->child_count == 0);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* we'll recalculate this */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen uids = array_get_modifiable(uids_arr, &uid_count);
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* skip UID ranges until we skip/overlap shifts */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen I_MAX(shifts[i].seq1, seq_high) > uids[uid_idx].seq2))
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen uids[uid_idx-1].seq2 >= uids[uid_idx].seq1 - 1) {
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* we can merge this and the previous range */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* shift is entirely before UID range */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* handle shifts before UID range */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* update child shifts */
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen /* if the shifts continue after the UID range,
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen treat it in the next loop iteration */
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainen /* update UIDs - no matter where within the UID range
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen the shifts happened, the result is the same:
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen shift number of UIDs are removed, and the rest
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen are decreased by shift_sum.
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen 123 uids child_shifts
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* removed completely */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* the next loop iteration fixes the UIDs */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* root node - UIDs are only removed, not shifted */
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen /* no UIDs left, delete the node's children and mark it
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainensquat_trie_expunge_uidlists(struct squat_trie_build_context *ctx,
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen struct squat_uidlist_rebuild_context *rebuild_ctx,
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen ARRAY_TYPE(seq_range) uid_range, root_shifts, shifts;
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen i_array_init(&root_shifts, array_count(expunged_uids));
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen array_append_array(&root_shifts, expunged_uids);
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen i_array_init(&iter->cur.shifts, array_count(expunged_uids));
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen if (squat_uidlist_get_seqrange(ctx->trie->uidlist,
dee43975a70bcdb9dc83d34d6a2b177d37bb7194Timo Sirainen squat_uidlist_update_expunged_uids(&shifts, &iter->cur.shifts,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen squat_uidlist_rebuild_nextu(rebuild_ctx, &uid_range);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen i_assert(node->uid_list_idx != 0 || node->next_uid == 0);
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen node = squat_trie_iterate_next(iter, &shifts);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainensquat_trie_renumber_uidlists2(struct squat_trie_build_context *ctx,
942302b0247403645394d848b3c620ead262a2a5Timo Sirainen struct squat_uidlist_rebuild_context *rebuild_ctx,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (!UIDLIST_IS_SINGLETON(node->uid_list_idx)) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* rebuild the uidlist */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen squat_uidlist_rebuild_next(rebuild_ctx, &uids);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen node = squat_trie_iterate_next(iter, &shifts);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainensquat_trie_renumber_uidlists(struct squat_trie_build_context *ctx,
939a0d82523538b2de38a02bc9f790a67b7ebf47Timo Sirainen struct squat_uidlist_rebuild_context *rebuild_ctx;
939a0d82523538b2de38a02bc9f790a67b7ebf47Timo Sirainen if ((ret = squat_uidlist_rebuild_init(ctx->uidlist_build_ctx,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen I_MAX((unsigned int)now, ctx->trie->hdr.indexid + 1);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ret = squat_trie_expunge_uidlists(ctx, rebuild_ctx, iter,
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ret = squat_trie_renumber_uidlists2(ctx, rebuild_ctx, iter);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* lock the trie before we rename uidlist */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen i_assert(ctx->file_lock == NULL && ctx->dotlock == NULL);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen return squat_uidlist_rebuild_finish(rebuild_ctx, ret < 0);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainenstatic bool squat_trie_check_header(struct squat_trie *trie)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (trie->hdr.version != SQUAT_TRIE_VERSION ||
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if (trie->hdr.partial_len > trie->hdr.full_len) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen i_error("Corrupted %s: partial len > full len", trie->path);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen i_error("Corrupted %s: full len=0", trie->path);
4ece61edd7c266a4b8f3b290a7f0a3cb3d13ca0fTimo Sirainenstatic int squat_trie_map_header(struct squat_trie *trie)
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen /* newly created file */
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen if ((trie->flags & SQUAT_INDEX_FLAG_MMAP_DISABLE) != 0) {
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen ret = pread_full(trie->fd, &trie->hdr, sizeof(trie->hdr), 0);
24e5e4526d8f5cbc056ab97fd0d154d0936d7a5eTimo Sirainen i_error("Corrupted %s: File too small", trie->path);
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen if (trie->locked_file_size < sizeof(trie->hdr)) {
905457e0982fc15930d90e174f271dc69f9afcf9Timo Sirainen i_error("Corrupted %s: File too small", trie->path);
bool changed;
int ret;
if (ret == 0) {
if (ret < 0)
int fd;
return fd;
const char *path;
&file_lock);
if (ret <= 0) {
if (ret == 0) {
path);
if (ret == 0)
if (ret == 0) {
return ret;
if (ret < 0) {
if (ret < 0) {
return ret;
int ret;
if (ret == 0) {
if (ret < 0)
if (ret == 0)
if (unlock)
return ret;
unsigned char *chars;
unsigned int idx;
int level = 0;
const unsigned char *str;
if (size == 0)
level++;
goto found;
idx = 0;
goto found;
uids) < 0)
data++;
size--;
if (count == 0)
for (i = 0; i < count; i++) {
struct squat_trie_lookup_context {
bool first;
unsigned int size)
int ret;
i += char_lengths[i];
i = 0; char_idx = 0;
bytelen = 0;
if (ret <= 0) {
return ret;
i += char_lengths[i];
char_idx++;
unsigned int count;
unsigned char *data;
int ret = 0;
i += bytes;
if (i != start) {
i - start);
if (start == 0) {
if (ret > 0) {
i - start);
} else if (str_bytelen > 0) {
i - start);
} else if (!searched) {
i_unreached();
int ret;
T_BEGIN {
} T_END;
return ret;