squat-trie.c revision 446f37765473d419b534f7934850e77e842d3a2a
/* Copyright (C) 2006 Timo Sirainen */
#include "lib.h"
#include "array.h"
#include "bsearch-insert-pos.h"
#include "file-cache.h"
#include "file-lock.h"
#include "istream.h"
#include "ostream.h"
#include "read-full.h"
#include "write-full.h"
#include "mmap-util.h"
#include "unichar.h"
#include "squat-uidlist.h"
#include "squat-trie.h"
#include "squat-trie-private.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
/* 8bit character counter holds only 255, so we can't use 256. */
#define MAX_8BIT_CHAR_COUNT 255
#define FAST_8BIT_LEVEL 2
#define TRIE_COMPRESS_PERCENTAGE 30
#define SQUAT_TRIE_VERSION 1
#define SQUAT_TRIE_LOCK_TIMEOUT 60
/* for non-x86 use memcpy() when accessing unaligned int* addresses */
#if defined(__i386__) || defined(__x86_64__)
# define ALLOW_UNALIGNED_ACCESS
#endif
#define BLOCK_SIZE 4
struct squat_trie {
char *filepath;
int fd;
enum file_lock_method lock_method;
int lock_count;
int lock_type; /* F_RDLCK / F_WRLCK */
struct file_cache *file_cache;
void *mmap_base; /* NULL with mmap_disable=yes */
const uint8_t *const_mmap_base;
const struct squat_trie_header *hdr;
char *uidlist_filepath;
struct squat_uidlist *uidlist;
unsigned int corrupted:1;
unsigned int mmap_disable:1;
};
struct squat_trie_build_context {
struct squat_trie *trie;
unsigned int prev_added_size;
unsigned int node_count;
unsigned int deleted_space;
unsigned int modified:1;
unsigned int failed:1;
unsigned int locked:1;
};
struct squat_trie_compress_context {
struct squat_trie *trie;
const char *tmp_path;
int fd;
struct squat_uidlist_compress_ctx *uidlist_ctx;
unsigned int node_count;
};
struct trie_node {
/* new characters have been added to this node */
/* idx pointers have been updated */
/* the node pointers are valid as long as their lowest bit is 0,
otherwise they're offsets to the trie file (>> 1).
in leaf nodes the children pointers are uint32_t uid_list_idx[]; */
/* uint8_t 8bit_chars[chars_8bit_count]; */
/* struct trie_node *children[chars_8bit_count]; */
/* uint16_t 16bit_chars[chars_16bit_count]; */
/* struct trie_node *children[chars_16bit_count]; */
};
#define NODE_CHARS8(node) \
#define NODE_CHILDREN8(node) \
(struct trie_node **) \
((char *)((node) + 1) + \
((node)->chars_8bit_count) * \
((level) == BLOCK_SIZE ? \
(struct trie_node **) \
static int
static int
{
}
{
}
{
uint8_t c;
/* number continues as long as the highest bit is set */
while (num >= 0x80) {
num >>= 7;
}
c = num;
}
{
const uint8_t *c = *p;
unsigned int bits = 0;
while (c != end && *c >= 0x80) {
bits += 7;
c++;
}
if (c == end) {
/* last number shouldn't end with high bit */
return 0;
}
/* we have only 32bit numbers */
return 0;
}
*p = c + 1;
return value;
}
static const uint16_t *
{
size_t i;
buffer_set_used_size(dest, 0);
for (i = 0; i < size; i++) {
if (src[i] <= 32)
chr = 0;
else if (src[i] <= 'z')
else if (src[i] < 128)
else {
/* UTF-8 input */
/* FIXME: can we do anything better than just
truncate with >16bit values? */
chr = 0;
else {
}
}
}
}
static void
{
i_error("%s failed with index search file %s: %m",
}
{
}
static void
{
unsigned int i;
#ifndef ALLOW_UNALIGNED_ACCESS
#endif
for (i = 0; i < count; i++)
#ifndef ALLOW_UNALIGNED_ACCESS
} else {
/* unaligned access */
for (i = 0; i < count; i++) {
sizeof(children[i]));
}
}
#endif
}
static void
{
unsigned int i;
if (level == BLOCK_SIZE) {
return;
}
#ifndef ALLOW_UNALIGNED_ACCESS
#endif
for (i = 0; i < count; i++) {
}
#ifndef ALLOW_UNALIGNED_ACCESS
} else {
/* unaligned access */
for (i = 0; i < count; i++) {
sizeof(idx));
}
}
#endif
}
{
return 0;
if (ret < 0) {
return -1;
}
return 0;
}
static void
{
int i, j;
j = chars8_count - 1;
if (j >= 0 && i == chars[j])
else
chars[i] = i;
}
}
static int
{
unsigned int idx_size, alloced_chars8_count;
return -1;
return -1;
}
/* get 8bit char count and check that it's valid */
return -1;
if (chars8_count > MAX_8BIT_CHAR_COUNT ||
return -1;
}
if ((num & 1) == 0) {
/* no 16bit chars */
chars16_count = 0;
chars16_memsize = 0;
chars16_offset = 0;
} else {
/* get the 16bit char count */
if (chars16_count > 65536) {
return -1;
}
/* map the required area size and make sure it exists */
return -1;
return -1;
}
}
{
const void *end_offset;
if (alloced_chars8_count != chars8_count)
if (chars16_count == 0)
else {
sizeof(uint16_t));
}
}
return 0;
}
unsigned int count)
{
unsigned int i;
for (i = 0; i < count; i++) {
}
}
{
if (level < BLOCK_SIZE) {
}
}
{
}
}
}
{
}
}
static int
{
return -1;
return -1;
}
if (hdr->root_offset != 0 &&
return -1;
}
return -1;
}
return 0;
}
{
struct squat_trie_header hdr;
int ret;
sizeof(hdr.modify_counter),
if (ret < 0) {
return -1;
}
if (ret == 0) {
/* broken file, treat as modified */
return 1;
}
}
{
const struct squat_trie_header *hdr;
if (!trie->mmap_disable) {
/* everything is already mapped */
return 1;
}
} else {
if (ret <= 0)
}
}
return -1;
}
if (!trie->mmap_disable) {
return -1;
}
} else {
if (ret < 0) {
return -1;
}
return -1;
}
}
return -1;
return 0;
}
return 1;
}
{
/* don't bother adding complexity by trying to handle this
error here. we'll break later anyway in easier error
handling paths. */
} else {
}
if (trie->mmap_disable)
}
{
int fd;
if (fd == -1) {
return 0;
return -1;
}
return 1;
}
{
struct squat_trie_header hdr;
return -1;
}
return -1;
}
}
return 0;
}
struct squat_trie *
{
struct squat_trie *trie;
return trie;
}
{
}
{
int ret;
return ret;
if (ret == 0) {
*uid_r = 0;
return 0;
}
}
return -1;
return ret;
}
{
return 1;
return -1;
}
}
static int
{
int ret;
if (ret == 0)
return ret;
}
{
int ret;
if (trie->lock_count > 0) {
/* read lock -> write lock would deadlock */
trie->lock_count++;
return 1;
}
return -1;
if (ret == 0) {
return -1;
}
} else {
return -1;
}
}
for (;;) {
if (ret <= 0)
return ret;
/* if the trie has been compressed, we need to reopen the
file and try to lock again */
if (ret == 0)
break;
if (ret < 0)
return -1;
return -1;
}
if (created) {
/* we possibly created this file. now that we've locked the
file, we can safely check if someone else already wrote the
header or if we should do it now */
if (trie_file_create_finish(trie) < 0) {
return -1;
}
}
if (squat_trie_map(trie) <= 0) {
return -1;
}
return -1;
}
trie->lock_count++;
return 1;
}
{
if (--trie->lock_count > 0)
return;
}
static struct trie_node *
{
if (level <= FAST_8BIT_LEVEL) {
idx_size);
for (i = 0; i < MAX_8BIT_CHAR_COUNT; i++)
chars[i] = i;
if (chars16_count > 0) {
}
} else if (chr < MAX_8BIT_CHAR_COUNT) {
} else {
}
return node;
}
static struct trie_node *
unsigned int level)
{
if (chr < MAX_8BIT_CHAR_COUNT) {
new_idx_offset = sizeof(*node) +
} else {
}
if (chr < MAX_8BIT_CHAR_COUNT) {
} else {
}
/* 0..character position */
if (chr < MAX_8BIT_CHAR_COUNT) {
/* rest of the characters */
} else {
/* rest of the characters */
}
/* indexes from 0 to character position */
/* zero the inserted character index */
/* rest of the indexes */
return node;
}
static int
{
int ret;
if (*data < MAX_8BIT_CHAR_COUNT) {
unsigned int count;
ctx->node_count++;
} else if (level <= FAST_8BIT_LEVEL) {
} else {
sizeof(chars[0]),
}
}
} else {
unsigned int count;
ctx->node_count++;
char_idx = 0;
} else {
unsigned int idx_size;
sizeof(chars[0]),
}
}
}
if (level < BLOCK_SIZE) {
if ((child_idx & 1) != 0) {
return -1;
}
if (ret < 0)
return -1;
if (ret > 0)
} else {
uid) < 0)
return -1;
}
return modified ? 1 : 0;
}
static uint32_t
{
return 0;
if (*data < MAX_8BIT_CHAR_COUNT) {
if (level <= FAST_8BIT_LEVEL)
else {
sizeof(chars[0]), chr_8bit_cmp);
return 0;
}
} else {
sizeof(chars[0]), chr_16bit_cmp);
return 0;
}
if (level < BLOCK_SIZE) {
if ((child_idx & 1) != 0) {
/* not mapped to memory yet. do it. */
return -1;
}
} else {
}
}
{
unsigned int i;
/* skip all blocks that contain spaces or control characters.
no-one searches them anyway */
for (i = 0; i < BLOCK_SIZE; i++) {
if (data[i] == 0)
return FALSE;
}
return TRUE;
}
struct squat_trie_build_context *
{
struct squat_trie_build_context *ctx;
else {
}
*last_uid_r = 0;
return ctx;
}
{
if (ret == 0)
return ret;
}
{
unsigned int i, tmp_size;
return -1;
t_push();
/* @UNSAFE: continue from last block */
for (i = 0; i + BLOCK_SIZE <= tmp_size; i++) {
if (block_want_add(buf+i)) {
if (trie_insert_node(ctx,
t_pop();
return -1;
}
}
}
if (size < BLOCK_SIZE) {
t_pop();
return 0;
}
t_pop();
return -1;
}
}
for (i = 0; i + BLOCK_SIZE <= size; i++) {
if (block_want_add(str+i)) {
t_pop();
return -1;
}
}
}
t_pop();
return 0;
}
unsigned int count)
{
unsigned int i;
for (i = 0; i < count; i++) {
continue;
if ((child_idx & 1) != 0)
else
}
}
{
buffer_set_used_size(buf, 0);
if (node->chars_16bit_count > 0) {
}
}
{
unsigned int i;
for (i = 0; i < node->chars_8bit_count; i++) {
return -1;
}
for (i = 0; i < node->chars_16bit_count; i++) {
return -1;
}
return 0;
}
{
buffer_set_used_size(buf, 0);
if (node->chars_16bit_count > 0) {
}
}
static int
unsigned int count)
{
unsigned int i;
for (i = 0; i < count; i++) {
return -1;
}
}
return 0;
}
{
if (level < BLOCK_SIZE) {
node->chars_8bit_count) < 0)
return -1;
node->chars_16bit_count) < 0)
return -1;
}
return 0;
if (level < BLOCK_SIZE) {
if (level <= FAST_8BIT_LEVEL)
} else {
return -1;
}
if ((offset & 1) != 0) {
offset++;
}
/* append to end of file. the parent node is written later. */
} else {
/* overwrite node's contents */
/* FIXME: write only the indexes if !node->resized */
}
return 0;
}
static int
{
struct squat_trie_header hdr;
return -1;
}
if (hdr.used_file_size == 0) {
}
ctx->deleted_space = 0;
return -1;
/* update the header */
}
return 0;
}
unsigned int current_message_count)
{
/* see if we've reached the max. deleted space in file */
return TRUE;
}
}
static int
{
/* nothing changed */
return 0;
}
return -1;
return -1;
return -1;
if (squat_trie_map(trie) <= 0)
return -1;
}
return -1;
}
return 0;
}
{
struct trie_node **child_dest;
unsigned int i, j, old_count;
for (i = j = 0; i < old_count; i++) {
}
node->chars_8bit_count = j;
for (i = j = 0; i < old_count; i++) {
child_dest[j++] = child_src[i];
}
if (node->chars_16bit_count > 0) {
}
}
{
struct trie_node **child_dest;
unsigned int i, j, old_count;
for (i = j = 0; i < old_count; i++) {
}
node->chars_16bit_count = j;
for (i = j = 0; i < old_count; i++) {
child_dest[j++] = child_src[i];
}
}
{
unsigned int i, j, old_count;
for (i = j = 0; i < old_count; i++) {
if (child_src[i] != 0)
}
node->chars_8bit_count = j;
for (i = j = 0; i < old_count; i++) {
if (child_src[i] != 0)
child_dest[j++] = child_src[i];
}
}
{
unsigned int i, j, old_count;
for (i = j = 0; i < old_count; i++) {
if (child_src[i] != 0)
}
node->chars_16bit_count = j;
for (i = j = 0; i < old_count; i++) {
if (child_src[i] != 0)
child_dest[j++] = child_src[i];
}
}
static int
unsigned int level)
{
struct trie_node *child_node;
unsigned int i;
int ret = 0;
bool need_char_compress = FALSE;
for (i = 0; i < count; i++) {
continue;
}
child_idx &= ~1;
return -1;
if (child_node->file_offset != 0)
else {
}
if (ret < 0)
return -1;
}
return need_char_compress ? 0 : 1;
}
static int
{
unsigned int i;
int ret;
bool compress_chars = FALSE;
for (i = 0; i < node->chars_8bit_count; i++) {
if (ret < 0)
return -1;
if (ret == 0) {
idx8[i] = 0;
}
}
if (compress_chars) {
}
for (i = 0; i < node->chars_16bit_count; i++) {
if (ret < 0)
return -1;
if (ret == 0) {
idx16[i] = 0;
}
}
if (compress_chars) {
node->chars_16bit_count = i;
}
return 0;
}
static int
{
int ret;
if (level == BLOCK_SIZE) {
return -1;
if (node->chars_8bit_count == 0 &&
node->chars_16bit_count == 0) {
/* everything expunged */
ctx->node_count--;
node->file_offset = 0;
return 0;
}
} else {
level + 1)) < 0)
return -1;
if (ret == 0)
level + 1)) < 0)
return -1;
if (ret == 0)
if (node->chars_8bit_count == 0 &&
node->chars_16bit_count == 0) {
/* everything expunged */
ctx->node_count--;
node->file_offset = 0;
return 0;
}
}
return 0;
}
struct squat_trie *trie)
{
struct squat_trie_header hdr;
return -1;
}
/* write a dummy header first */
return 0;
}
static void
{
struct squat_trie_header hdr;
}
{
struct squat_trie_compress_context ctx;
unsigned int orig_lock_count;
int ret;
return -1;
return -1;
}
if (ret == 0) {
/* do the compression */
else {
}
}
if (ret == 0 && orig_lock_count > 0) {
/* lock the file before renaming so we can keep it locked. */
&file_lock) <= 0)
ret = -1;
}
if (ret == 0) {
i_error("rename(%s, %s) failed: %m",
ret = -1;
}
}
if (ret < 0) {
} else {
if (squat_trie_map(trie) <= 0)
return -1;
}
return ret;
}
unsigned int current_message_count)
{
bool compress;
int ret;
return ret;
if (compress)
return ret;
}
{
}
{
if (len < BLOCK_SIZE)
return -1;
/* skip the blocks that can't exist */
if (--len < BLOCK_SIZE)
return -1;
}
return -1;
return 0;
}
static int
{
if (list == 0)
return 0;
return -1;
}
while (len > BLOCK_SIZE) {
len--;
continue;
if (list == 0) {
return 0;
}
return -1;
}
}
}
const char *str)
{
unsigned int len;
int ret;
return -1;
return ret;
}
static int
{
continue;
if (list == 0) {
return 0;
}
return -1;
}
}
}
const char *str)
{
unsigned int len;
int ret;
return -1;
return ret;
}
{
}