imap-thread.c revision 7c424aa51c956c628e3512055841aa2f9eef4833
/* Copyright (C) 2002 Timo Sirainen */
/*
* Merge sort code in sort_nodes() is copyright 2001 Simon Tatham.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* Implementation of draft-ietf-imapext-thread-12 threading algorithm */
#include "common.h"
#include "hash.h"
#include "ostream.h"
#include "str.h"
#include "message-tokenize.h"
#include "imap-base-subject.h"
#include "imap-thread.h"
#include <stdlib.h>
/* how much memory to allocate initially. these are very rough
approximations. */
#define APPROX_MSG_COUNT 128
#define APPROX_MSGID_SIZE 45
/* Try to buffer this much data before sending it to output stream. */
#define OUTPUT_BUF_SIZE 2048
struct root_info {
char *base_subject;
unsigned int reply:1;
unsigned int sorted:1;
};
struct node {
unsigned int id;
union {
char *msgid;
} u;
};
struct thread_context {
struct mail_search_context *search_ctx;
struct hash_table *msgid_hash;
struct hash_table *subject_hash;
int id_is_uid;
};
{
}
{
static const char *wanted_headers[] = {
"message-id", "in-reply-to", "references",
};
struct thread_context *ctx;
int ret;
if (type != MAIL_THREAD_REFERENCES)
i_fatal("Only REFERENCES threading supported");
/* initialize searching */
return FALSE;
sizeof(struct node) *
return ret;
}
{
ctx->root_count++;
}
{
return node;
}
{
return node;
}
unsigned int id)
{
/* first time we see this message */
return node;
}
/* seen before in references */
} else {
/* duplicate */
}
return node;
}
{
struct message_tokenizer *tok;
int valid_end;
if (valid_end) {
/* <xx@xx> - valid message ID found */
return TRUE;
}
}
return FALSE;
}
static void strip_lwsp(char *str)
{
/* @UNSAFE */
char *dest;
/* find the first lwsp */
if (*str == '\0')
return;
str++;
}
}
*dest = '\0';
}
{
const char *p;
int found_at;
return NULL;
for (;;) {
/* skip until '<' */
while (*msgid != '<') {
if (*msgid == '\0') {
return NULL;
}
msgid++;
}
msgid++;
/* check it through quickly to see if it's already normalized */
for (;; p++) {
if ((unsigned char)*p >= 'A') /* matches most */
continue;
if (*p == '@')
if (*p == '>' || *p == '"' || *p == '(')
break;
if (*p == '\0') {
*msgid_p = p;
return NULL;
}
}
if (*p == '>') {
*msgid_p = p+1;
if (found_at) {
char *s;
strip_lwsp(s);
return s;
}
} else {
/* ok, do it the slow way */
/* allocate only once, so we don't leak
with multiple invalid message IDs */
}
}
/* invalid message id, see if there's another valid one */
}
}
{
break;
}
}
if (!add_to_root)
else
}
{
do {
return TRUE;
return TRUE;
}
return FALSE;
}
{
/* already got a parent, don't want to replace it */
return;
}
/* already have this parent, ignore */
return;
}
/* this would create a loop, not allowed */
return;
}
/* link them */
}
const char *parent_msgid, const char *child_msgid,
int replace)
{
}
{
return FALSE;
}
/* link the last message to us */
return TRUE;
}
{
t_push();
sent_date = 0;
/* link references */
else {
/* no references, make sure it's not linked */
}
}
t_pop();
}
{
return node;
}
{
old_parent = *parent;
for (;;) {
break;
}
}
{
struct node **a;
a = node_p;
if (NODE_IS_DUMMY(*node_p)) {
/* no children -> delete */
continue;
/* promote children to our level,
deleting the dummy node */
continue;
}
}
}
}
{
}
{
return list; /* just one node */
insize = 1;
for (;;) {
p = list;
nmerges = 0; /* count number of merges we do in this pass */
while (p != 0) {
nmerges++; /* there exists a merge to be done */
/* step `insize' places along from p */
q = p;
psize = 0;
for (i = 0; i < insize; i++) {
psize++;
q = q->next;
if (q == NULL) break;
}
/* if q hasn't fallen off end, we have two lists to
merge */
/* now we have two lists; merge them */
/* decide whether next element of merge comes
from p or q */
if (psize == 0) {
/* p is empty; e must come from q. */
} else if (qsize == 0 || !q) {
/* q is empty; e must come from p. */
} else if (node_cmp(p, q) <= 0) {
/* First element of p is lower
(or same); e must come from p. */
} else {
/* First element of q is lower;
e must come from q. */
}
/* add the next element to the merged list */
if (tail)
else
list = e;
tail = e;
}
/* now p has stepped `insize' places along,
and q has too */
p = q;
}
/* If we have done only one merge, we're finished. */
if (nmerges <= 1) {
/* allow for nmerges == 0, the empty list case */
return list;
}
/* Otherwise repeat, merging lists twice the size */
insize *= 2;
}
}
{
char *hash_subject;
int is_reply_or_forward;
return;
if (*subject == '\0')
return;
} else {
hash_subject = key;
if (!NODE_IS_DUMMY(hash_node) &&
(NODE_IS_DUMMY(node) ||
}
}
{
unsigned int id;
ctx->subject_hash =
if (!NODE_IS_DUMMY(node))
else {
/* sort children, use the first one's id */
}
else
t_push();
node);
t_pop();
}
}
}
{
}
{
char *base_subject;
/* deleted node */
continue;
}
/* (ii) If the thread subject is empty, skip this message. */
if (base_subject == NULL) {
continue;
}
/* (iii) Lookup the message associated with this thread
subject in the subject table. */
/* (iv) If the message in the subject table is the current
message, skip this message. */
continue;
}
/* Otherwise, merge the current message with the one in the
subject table using the following rules: */
if (NODE_IS_DUMMY(node) &&
/* If both messages are dummies, append the current
message's children to the children of the message in
the subject table (the children of both messages
become siblings), and then delete the current
message. */
} else if (NODE_IS_DUMMY(hash_node) ||
/* If the message in the subject table is a dummy
and the current message is not, make the current
message a child of the message in the subject table
(a sibling of its children).
If the current message is a reply or forward and
the message in the subject table is not, make the
current message a child of the message in the
subject table (a sibling of its children). */
} else {
/* Otherwise, create a new dummy message and make both
the current message and the message in the subject
table children of the dummy. Then replace the
message in the subject table with the dummy
message. */
/* create new nodes for the children - reusing
existing ones have problems since the other one
might have been handled already and we'd introduce
loops..
current node will be destroyed, hash_node will be
the dummy so we don't need to update hash */
}
}
}
{
/* sort the children first, they're needed to sort dummy root nodes */
continue;
}
}
{
/* no siblings - special case to avoid extra paranthesis */
else {
}
return TRUE;
}
/* string getting full, flush it */
return FALSE;
str_truncate(str, 0);
}
else {
}
}
return TRUE;
}
{
/* sort root nodes again, they have been modified since the last time */
continue;
/* string getting full, flush it */
return;
str_truncate(str, 0);
}
if (!NODE_IS_DUMMY(node))
if (!NODE_IS_DUMMY(node))
node->first_child =
}
return;
}
}
}
{
}
{
/* (2) save root nodes and drop the msgids */
/* drop the memory allocated for message-IDs and msgid_hash,
reuse their memory for base subjects */
/* no messages */
return;
}
/* (3) */
/* initialize the node->u.info for all root nodes */
/* (4) */
/* (5) Gather together messages under the root that have the same
base subject text. */
/* (5.C) Merge threads with the same thread subject. */
/* (6) Sort and send replies */
t_push();
t_pop();
}