index-thread-links.c revision afb00cdb7ceb2ebe754d9c6b229c663a6de3b736
40N/A/* Copyright (c) 2002-2008 Dovecot authors, see the included COPYING file */
40N/A
40N/A#include "lib.h"
40N/A#include "array.h"
371N/A#include "message-id.h"
40N/A#include "mail-storage.h"
40N/A#include "index-thread-private.h"
40N/A
40N/Astatic struct mail_thread_node *
40N/Athread_msgid_get(struct mail_thread_update_context *ctx, uint32_t ref_uid,
40N/A const char *msgid, uint32_t *idx_r)
40N/A{
40N/A struct mail_thread_node *node, new_node;
40N/A struct msgid_search_key key;
40N/A const char **msgidp;
40N/A uint32_t idx;
40N/A
40N/A key.msgid = msgid;
40N/A key.msgid_crc32 = crc32_str_nonzero(msgid);
40N/A
40N/A node = mail_hash_lookup(ctx->hash_trans, &key, &idx);
40N/A if (node == NULL) {
40N/A /* not found, create */
40N/A memset(&new_node, 0, sizeof(new_node));
40N/A new_node.msgid_crc32 = key.msgid_crc32;
40N/A new_node.uid = ref_uid;
40N/A
40N/A mail_hash_insert(ctx->hash_trans, &key, &new_node, &idx);
40N/A node = mail_hash_lookup_idx(ctx->hash_trans, idx);
40N/A } else if (node->uid == 0 && ref_uid != 0) {
40N/A /* make non-existing node uniquely identifiable */
40N/A if (node->exists) {
493N/A mail_hash_transaction_set_corrupted(ctx->hash_trans,
40N/A "uid=0 found");
40N/A ctx->failed = TRUE;
70N/A } else {
493N/A node->uid = ref_uid;
40N/A mail_hash_update(ctx->hash_trans, idx);
40N/A }
493N/A }
40N/A
40N/A /* keep message-ids cached */
40N/A msgidp = array_idx_modifiable(&ctx->msgid_cache, idx);
40N/A if (*msgidp == NULL)
40N/A *msgidp = p_strdup(ctx->msgid_pool, msgid);
40N/A
40N/A *idx_r = idx;
40N/A return node;
40N/A}
70N/A
493N/Astatic void thread_msg_add(struct mail_thread_update_context *ctx, uint32_t uid,
70N/A const char *msgid, uint32_t *idx_r)
493N/A{
40N/A struct mail_thread_node *node;
40N/A struct mail_thread_node unode;
493N/A
40N/A if (msgid != NULL) {
493N/A node = thread_msgid_get(ctx, 0, msgid, idx_r);
40N/A if (!node->exists) {
70N/A /* add UID to node */
493N/A node->uid = uid;
493N/A node->exists = TRUE;
40N/A } else {
70N/A /* duplicate, keep the original. if the original ever
70N/A gets expunged, rebuild. */
40N/A node->expunge_rebuilds = TRUE;
70N/A msgid = NULL;
}
mail_hash_update(ctx->hash_trans, *idx_r);
}
if (msgid == NULL) {
/* no valid message-id */
memset(&unode, 0, sizeof(unode));
unode.uid = uid;
unode.exists = TRUE;
mail_hash_insert(ctx->hash_trans, NULL, &unode, idx_r);
}
}
static bool thread_node_has_ancestor(struct mail_thread_update_context *ctx,
const struct mail_thread_node *node,
const struct mail_thread_node *ancestor)
{
while (node != ancestor) {
if (node->parent_idx == 0)
return FALSE;
node = mail_hash_lookup_idx(ctx->hash_trans, node->parent_idx);
}
return TRUE;
}
static void thread_link_reference(struct mail_thread_update_context *ctx,
uint32_t parent_idx, uint32_t child_idx)
{
struct mail_thread_node *node, *parent, *child;
uint32_t idx;
parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx);
child = mail_hash_lookup_idx(ctx->hash_trans, child_idx);
child->link_refcount++;
mail_hash_update(ctx->hash_trans, parent_idx);
if (thread_node_has_ancestor(ctx, parent, child)) {
if (parent == child) {
/* loops to itself - ignore */
return;
}
/* child is an ancestor of parent. Adding child -> parent_node
would introduce a loop. If any messages referencing the path
between parent_node's parent and child_node get expunged, we
have to rebuild the tree because the loop might break.
For example:
#1: a -> b (a.ref=1, b.ref=1)
#2: b -> a (a.ref=2, b.ref=2)
#3: c -> a -> b (a.ref=3, b.ref=3, c.ref=1)
Expunging #3 wouldn't break the loop, but expunging #1
would. */
node = parent;
do {
idx = node->parent_idx;
if (idx == 0) {
/* earlier lookup_idx() failed */
ctx->failed = TRUE;
break;
}
node = mail_hash_lookup_idx(ctx->hash_trans, idx);
node->parent_unref_rebuilds = TRUE;
mail_hash_update(ctx->hash_trans, idx);
} while (node != child);
return;
} else if (child->parent_idx == parent_idx) {
/* The same link already exists */
return;
}
/* Set parent_node as child_node's parent */
if (child->parent_idx == 0)
child->parent_idx = parent_idx;
else {
/* Conflicting parent already exists, keep the original */
if (child->exists) {
/* If this message gets expunged,
the parent is changed. */
child->expunge_rebuilds = TRUE;
} else {
/* Message doesn't exist, so it was one of the node's
children that created the original reference. If
that reference gets dropped, the parent is changed.
We could catch this in one of several ways:
a) Link to parent node gets unreferenced
b) Link to this node gets unreferenced
c) Any of the child nodes gets expunged
b) is probably the least likely to happen,
so use it */
child->parent_unref_rebuilds = TRUE;
}
}
mail_hash_update(ctx->hash_trans, child_idx);
}
static void
thread_link_references(struct mail_thread_update_context *ctx, uint32_t ref_uid,
const char **references)
{
const char *msgid, *last_msgid;
uint32_t parent_idx, child_idx;
last_msgid = message_id_get_next(references);
if (last_msgid == NULL)
return;
(void)thread_msgid_get(ctx, ref_uid, last_msgid, &parent_idx);
while ((msgid = message_id_get_next(references)) != NULL) {
(void)thread_msgid_get(ctx, ref_uid, msgid, &child_idx);
thread_link_reference(ctx, parent_idx, child_idx);
parent_idx = child_idx;
last_msgid = msgid;
}
/* link the last ID to us */
*references = last_msgid;
}
static int thread_get_mail_header(struct mail *mail, const char *name,
const char **value_r)
{
if (mail_get_first_header(mail, name, value_r) < 0) {
if (!mail->expunged)
return -1;
/* Message is expunged. Instead of failing the entire THREAD
command, just treat the header as non-existing. */
*value_r = NULL;
}
return 0;
}
static bool references_are_crc32_unique(const char *references)
{
const char *msgid;
uint32_t msgid_crc32;
ARRAY_TYPE(uint32_t) crc_arr;
const uint32_t *crc;
unsigned int i, count;
t_array_init(&crc_arr, 32);
while ((msgid = message_id_get_next(&references)) != NULL) {
msgid_crc32 = crc32_str_nonzero(msgid);
crc = array_get(&crc_arr, &count);
for (i = 0; i < count; i++) {
if (crc[i] == msgid_crc32)
return FALSE;
}
array_append(&crc_arr, &msgid_crc32, 1);
}
return TRUE;
}
int mail_thread_add(struct mail_thread_update_context *ctx, struct mail *mail)
{
const char *message_id, *in_reply_to, *references, *parent_msgid;
const struct mail_thread_node *parent, *old_parent;
struct mail_hash_header *hdr;
struct mail_thread_node *node;
uint32_t idx, parent_idx, ref_uid;
hdr = mail_hash_get_header(ctx->hash_trans);
i_assert(mail->uid > hdr->last_uid);
hdr->last_uid = mail->uid;
hdr->message_count++;
if (thread_get_mail_header(mail, HDR_MESSAGE_ID, &message_id) < 0 ||
thread_get_mail_header(mail, HDR_REFERENCES, &references) < 0)
return -1;
ref_uid = references_are_crc32_unique(references) ? mail->uid : 0;
thread_msg_add(ctx, mail->uid, message_id_get_next(&message_id), &idx);
thread_link_references(ctx, ref_uid, &references);
if (references != NULL)
parent_msgid = references;
else {
/* no valid IDs in References:, use In-Reply-To: instead */
if (thread_get_mail_header(mail, HDR_IN_REPLY_TO,
&in_reply_to) < 0)
return -1;
parent_msgid = message_id_get_next(&in_reply_to);
}
parent = parent_msgid == NULL ? NULL :
thread_msgid_get(ctx, ref_uid, parent_msgid, &parent_idx);
node = mail_hash_lookup_idx(ctx->hash_trans, idx);
old_parent = node->parent_idx == 0 ? NULL :
mail_hash_lookup_idx(ctx->hash_trans, node->parent_idx);
if (old_parent != NULL &&
(parent == NULL || old_parent->parent_idx != parent_idx)) {
/* conflicting parent, remove it. */
node->parent_idx = 0;
/* If this message gets expunged, we have to revert back to
the original parent. */
node->expunge_rebuilds = TRUE;
mail_hash_update(ctx->hash_trans, idx);
}
if (parent != NULL)
thread_link_reference(ctx, parent_idx, idx);
return 0;
}
static bool
mail_thread_node_lookup(struct mail_thread_update_context *ctx, uint32_t uid,
uint32_t *idx_r, struct mail_thread_node **node_r)
{
struct msgid_search_key key;
const char *msgids;
int ret;
if (!mail_set_uid(ctx->tmp_mail, uid))
return FALSE;
ret = mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID, &msgids);
if (ret <= 0)
return FALSE;
key.msgid = message_id_get_next(&msgids);
if (key.msgid == NULL)
return FALSE;
key.msgid_crc32 = crc32_str_nonzero(key.msgid);
*node_r = mail_hash_lookup(ctx->hash_trans, &key, idx_r);
if (*node_r == NULL)
return FALSE;
if ((*node_r)->uid != ctx->tmp_mail->uid) {
/* duplicate Message-ID probably */
return FALSE;
}
return TRUE;
}
static bool
thread_msgid_lookup(struct mail_thread_update_context *ctx, const char *msgid,
uint32_t *idx_r)
{
struct msgid_search_key key;
key.msgid = msgid;
key.msgid_crc32 = crc32_str_nonzero(msgid);
ctx->cmp_match_count = 0;
ctx->cmp_last_idx = 0;
if (mail_hash_lookup(ctx->hash_trans, &key, idx_r) == NULL) {
if (ctx->cmp_match_count != 1 || ctx->failed) {
/* couldn't find the message-id */
return FALSE;
}
/* there's only one key with this crc32 value, so it
must be what we're looking for */
*idx_r = ctx->cmp_last_idx;
}
return TRUE;
}
static bool
thread_unref_msgid(struct mail_thread_update_context *ctx,
uint32_t parent_idx, uint32_t child_idx)
{
struct mail_thread_node *parent, *child;
parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx);
if (parent->parent_unref_rebuilds)
return FALSE;
child = mail_hash_lookup_idx(ctx->hash_trans, child_idx);
if (child->link_refcount == 0) {
mail_hash_transaction_set_corrupted(ctx->hash_trans,
"unexpected refcount=0");
return FALSE;
}
child->link_refcount--;
if (child->link_refcount == 0) {
/* we don't have a root anymore */
child->parent_idx = 0;
}
mail_hash_update(ctx->hash_trans, child_idx);
return TRUE;
}
static bool
thread_unref_links(struct mail_thread_update_context *ctx,
uint32_t last_child_idx, const char *references,
bool *valid_r)
{
const char *msgid;
uint32_t parent_idx, child_idx;
/* tmp_mail may be changed below, so we have to duplicate the
references string */
references = t_strdup(references);
*valid_r = FALSE;
msgid = message_id_get_next(&references);
if (msgid == NULL)
return TRUE;
if (!thread_msgid_lookup(ctx, msgid, &parent_idx))
return FALSE;
*valid_r = TRUE;
while ((msgid = message_id_get_next(&references)) != NULL) {
if (!thread_msgid_lookup(ctx, msgid, &child_idx) ||
!thread_unref_msgid(ctx, parent_idx, child_idx))
return FALSE;
parent_idx = child_idx;
}
return thread_unref_msgid(ctx, parent_idx, last_child_idx);
}
int mail_thread_remove(struct mail_thread_update_context *ctx, uint32_t uid)
{
struct mail_hash_header *hdr;
struct mail_thread_node *node;
const char *references, *in_reply_to;
uint32_t idx, parent_idx;
bool have_refs;
if (!mail_thread_node_lookup(ctx, uid, &idx, &node))
return 0;
if (node->expunge_rebuilds)
return 0;
if (mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES,
&references) < 0)
return -1;
if (!thread_unref_links(ctx, idx, references, &have_refs))
return 0;
if (!have_refs) {
/* no valid IDs in References:, use In-Reply-To: instead */
if (mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO,
&in_reply_to) < 0)
return -1;
in_reply_to = message_id_get_next(&in_reply_to);
if (in_reply_to != NULL &&
(!thread_msgid_lookup(ctx, in_reply_to, &parent_idx) ||
!thread_unref_msgid(ctx, parent_idx, idx)))
return 0;
}
/* get the node again, the pointer may have changed */
node = mail_hash_lookup_idx(ctx->hash_trans, idx);
node->uid = 0;
node->exists = FALSE;
mail_hash_update(ctx->hash_trans, idx);
hdr = mail_hash_get_header(ctx->hash_trans);
hdr->message_count--;
return 1;
}