mbox-sync-parse.c revision d371507847d62ba311b4bcc23d18f45c3d0f1a38
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen/* Copyright (C) 2004 Timo Sirainen */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen/* MD5 header summing logic was pretty much copy&pasted from popa3d by
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen Solar Designer */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "lib.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "buffer.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "istream.h"
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen#include "str.h"
6d2b3ce2c6ef62334985ece4f0ab8b154e0e9560Timo Sirainen#include "write-full.h"
c6335901c67a4c9365319190a111a2168f3b06f5Timo Sirainen#include "message-parser.h"
c6335901c67a4c9365319190a111a2168f3b06f5Timo Sirainen#include "mail-index.h"
c6335901c67a4c9365319190a111a2168f3b06f5Timo Sirainen#include "mbox-sync-private.h"
01230de017cd273de41143d88e9c18df1243ae8aTimo Sirainen
01230de017cd273de41143d88e9c18df1243ae8aTimo Sirainen#include <stdlib.h>
047c00cd3f7f403672f81569413669238df8c15aTimo Sirainen
047c00cd3f7f403672f81569413669238df8c15aTimo Sirainen#define IS_LWSP_LF(c) (IS_LWSP(c) || (c) == '\n')
047c00cd3f7f403672f81569413669238df8c15aTimo Sirainen
047c00cd3f7f403672f81569413669238df8c15aTimo Sirainenstruct mbox_flag_type mbox_status_flags[] = {
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen { 'R', MAIL_SEEN },
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen { 'O', MBOX_NONRECENT },
cf0ad1a0bddb0787f3d7b408a96d721a8b2a98a3Timo Sirainen { 0, 0 }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen};
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstruct mbox_flag_type mbox_xstatus_flags[] = {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen { 'A', MAIL_ANSWERED },
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen { 'F', MAIL_FLAGGED },
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen { 'T', MAIL_DRAFT },
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen { 'D', MAIL_DELETED },
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen { 0, 0 }
7487ff578435377bbeefffdbfb78ca09ed1292dfTimo Sirainen};
89237470342ea6d4bbdf4cff9764037cfb3f6f45Timo Sirainen
89237470342ea6d4bbdf4cff9764037cfb3f6f45Timo Sirainenstruct header_func {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen const char *header;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen int (*func)(struct mbox_sync_mail_context *ctx,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_header_line *hdr);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen};
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
4d4d6d4745682790c20d759ba93dbea46b812c5dTimo Sirainenstatic void parse_trailing_whitespace(struct mbox_sync_mail_context *ctx,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_header_line *hdr)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen size_t i, space = 0;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen /* the value may contain newlines. we can't count whitespace before
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen and after it as a single contiguous whitespace block, as that may
a3fe8c0c54d87822f4b4f8f0d10caac611861b2bTimo Sirainen get us into situation where removing whitespace goes eg.
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen " \n \n" -> " \n\n" which would then be treated as end of headers.
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen
578ef2538ccf42e2a48234c24a8b709397101d88Timo Sirainen that could probably be avoided by being careful, but as newlines
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen should never be there (we don't generate them), it's not worth the
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen trouble. */
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen for (i = hdr->full_value_len; i > 0; i--) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen if (!IS_LWSP(hdr->full_value[i-1]))
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen break;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen space++;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen }
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen if (space > ctx->mail.space) {
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->mail.offset = hdr->full_value_offset + i;
e0740628f6ca05f4bc79a9d8a90b650f4d38d4d0Timo Sirainen ctx->mail.space = space;
e0740628f6ca05f4bc79a9d8a90b650f4d38d4d0Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen}
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainen
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainenstatic enum mail_flags mbox_flag_find(struct mbox_flag_type *flags, char chr)
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainen{
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainen int i;
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen for (i = 0; flags[i].chr != 0; i++) {
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen if (flags[i].chr == chr)
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen return flags[i].flag;
6fdfa4d4cf14d1d7764d7faa8258f112e39c8dbeTimo Sirainen }
3f603ef00e35fca21605afa0ad8d76e94fee2b96Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen return 0;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen}
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainenstatic void parse_status_flags(struct mbox_sync_mail_context *ctx,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_header_line *hdr,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct mbox_flag_type *flags_list)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen{
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen size_t i;
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen for (i = 0; i < hdr->full_value_len; i++) {
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen ctx->mail.flags |=
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen mbox_flag_find(flags_list, hdr->full_value[i]);
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen }
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen}
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic int parse_status(struct mbox_sync_mail_context *ctx,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_header_line *hdr)
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen{
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen parse_status_flags(ctx, hdr, mbox_status_flags);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->hdr_pos[MBOX_HDR_STATUS] = str_len(ctx->header);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return TRUE;
b2d562f9c7fd13f9a16e9b3bcee904630b80b1feTimo Sirainen}
b2d562f9c7fd13f9a16e9b3bcee904630b80b1feTimo Sirainen
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainenstatic int parse_x_status(struct mbox_sync_mail_context *ctx,
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen struct message_header_line *hdr)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen{
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen parse_status_flags(ctx, hdr, mbox_xstatus_flags);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen ctx->hdr_pos[MBOX_HDR_X_STATUS] = str_len(ctx->header);
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen return TRUE;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen}
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainen
6c2ce1d5bf17b21e804a079eb0f973b7ab83e0d8Timo Sirainenstatic int parse_x_imap_base(struct mbox_sync_mail_context *ctx,
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen struct message_header_line *hdr)
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen{
d176f84ce5ca2073f4dfbafb457b9c74f6bf0d76Timo Sirainen const char *str;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen char *end;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen size_t pos;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen uint32_t uid_validity, uid_last;
8d3278a82b964217d95c340ec6f82037cdc59d19Timo Sirainen
if (ctx->seq != 1 || ctx->seen_imapbase) {
/* Valid only in first message */
return FALSE;
}
/* <uid validity> <last uid> */
t_push();
str = t_strndup(hdr->full_value, hdr->full_value_len);
uid_validity = strtoul(str, &end, 10);
uid_last = strtoul(end, &end, 10);
pos = end - str;
t_pop();
while (pos < hdr->full_value_len && IS_LWSP_LF(hdr->full_value[pos]))
pos++;
if (uid_validity == 0) {
/* broken */
return FALSE;
}
if (ctx->sync_ctx->base_uid_validity == 0) {
ctx->sync_ctx->base_uid_validity = uid_validity;
ctx->sync_ctx->base_uid_last = uid_last;
if (ctx->sync_ctx->next_uid-1 <= uid_last)
ctx->sync_ctx->next_uid = uid_last+1;
else {
ctx->sync_ctx->update_base_uid_last =
ctx->sync_ctx->next_uid - 1;
ctx->need_rewrite = TRUE;
}
}
if (ctx->sync_ctx->next_uid <= ctx->sync_ctx->prev_msg_uid) {
/* broken, update */
ctx->sync_ctx->next_uid = ctx->sync_ctx->prev_msg_uid+1;
}
ctx->hdr_pos[MBOX_HDR_X_IMAPBASE] = str_len(ctx->header);
ctx->seen_imapbase = TRUE;
if (pos == hdr->full_value_len)
return TRUE;
// FIXME: save keywords
parse_trailing_whitespace(ctx, hdr);
return TRUE;
}
static int parse_x_imap(struct mbox_sync_mail_context *ctx,
struct message_header_line *hdr)
{
if (!parse_x_imap_base(ctx, hdr))
return FALSE;
/* this is the c-client style "FOLDER INTERNAL DATA" message.
skip it. */
ctx->pseudo = TRUE;
return TRUE;
}
static int parse_x_keywords(struct mbox_sync_mail_context *ctx,
struct message_header_line *hdr)
{
// FIXME: parse them
ctx->hdr_pos[MBOX_HDR_X_KEYWORDS] = str_len(ctx->header);
parse_trailing_whitespace(ctx, hdr);
return TRUE;
}
static int parse_x_uid(struct mbox_sync_mail_context *ctx,
struct message_header_line *hdr)
{
uint32_t value = 0;
size_t i;
if (ctx->mail.uid != 0) {
/* duplicate */
return FALSE;
}
for (i = 0; i < hdr->full_value_len; i++) {
if (hdr->full_value[i] < '0' || hdr->full_value[i] > '9')
break;
value = value*10 + (hdr->full_value[i] - '0');
}
if (value >= ctx->sync_ctx->next_uid) {
/* next_uid broken - fix it */
ctx->sync_ctx->next_uid = value+1;
}
if (value <= ctx->sync_ctx->prev_msg_uid) {
/* broken - UIDs must be growing */
return FALSE;
}
ctx->mail.uid = value;
ctx->sync_ctx->prev_msg_uid = value;
if (ctx->sync_ctx->dest_first_mail && !ctx->seen_imapbase) {
/* everything was good, except we can't have X-UID before
X-IMAPbase header (to keep c-client compatibility). keep
the UID, but when we're rewriting this makes sure the
X-UID is appended after X-IMAPbase. */
return FALSE;
}
ctx->hdr_pos[MBOX_HDR_X_UID] = str_len(ctx->header);
ctx->parsed_uid = value;
parse_trailing_whitespace(ctx, hdr);
return TRUE;
}
static int parse_x_uidl(struct mbox_sync_mail_context *ctx,
struct message_header_line *hdr)
{
size_t i;
for (i = 0; i < hdr->full_value_len; i++) {
if (IS_LWSP_LF(hdr->full_value[i]))
break;
}
str_truncate(ctx->uidl, 0);
str_append_n(ctx->uidl, hdr->full_value, i);
return TRUE;
}
static int parse_content_length(struct mbox_sync_mail_context *ctx,
struct message_header_line *hdr)
{
uoff_t value = 0;
size_t i;
if (ctx->content_length != (uoff_t)-1) {
/* duplicate */
return FALSE;
}
for (i = 0; i < hdr->full_value_len; i++) {
if (hdr->full_value[i] < '0' || hdr->full_value[i] > '9')
break;
value = value*10 + (hdr->full_value[i] - '0');
}
for (; i < hdr->full_value_len; i++) {
if (!IS_LWSP_LF(hdr->full_value[i])) {
/* broken value */
return FALSE;
}
}
ctx->content_length = value;
return TRUE;
}
static int parse_date(struct mbox_sync_mail_context *ctx,
struct message_header_line *hdr)
{
if (!ctx->seen_received_hdr) {
/* Received-header contains date too, and more trusted one */
md5_update(&ctx->hdr_md5_ctx, hdr->value, hdr->value_len);
}
return TRUE;
}
static int parse_delivered_to(struct mbox_sync_mail_context *ctx,
struct message_header_line *hdr)
{
md5_update(&ctx->hdr_md5_ctx, hdr->value, hdr->value_len);
return TRUE;
}
static int parse_message_id(struct mbox_sync_mail_context *ctx,
struct message_header_line *hdr)
{
if (!ctx->seen_received_hdr) {
/* Received-header contains unique ID too,
and more trusted one */
md5_update(&ctx->hdr_md5_ctx, hdr->value, hdr->value_len);
}
return TRUE;
}
static int parse_received(struct mbox_sync_mail_context *ctx,
struct message_header_line *hdr)
{
if (!ctx->seen_received_hdr) {
/* get only the first received-header */
md5_update(&ctx->hdr_md5_ctx, hdr->value, hdr->value_len);
if (!hdr->continues)
ctx->seen_received_hdr = TRUE;
}
return TRUE;
}
static int parse_x_delivery_id(struct mbox_sync_mail_context *ctx,
struct message_header_line *hdr)
{
/* Let the local delivery agent help generate unique ID's but don't
blindly trust this header alone as it could just as easily come from
the remote. */
md5_update(&ctx->hdr_md5_ctx, hdr->value, hdr->value_len);
return TRUE;
}
static struct header_func header_funcs[] = {
{ "Content-Length", parse_content_length },
{ "Date", parse_date },
{ "Delivered-To", parse_delivered_to },
{ "Message-ID", parse_message_id },
{ "Received", parse_received },
{ "Status", parse_status },
{ "X-Delivery-ID", parse_x_delivery_id },
{ "X-IMAP", parse_x_imap },
{ "X-IMAPbase", parse_x_imap_base },
{ "X-Keywords", parse_x_keywords },
{ "X-Status", parse_x_status },
{ "X-UID", parse_x_uid },
{ "X-UIDL", parse_x_uidl },
{ NULL, NULL }
};
static struct header_func *header_func_find(const char *header)
{
int i;
for (i = 0; header_funcs[i].header != NULL; i++) {
if (strcasecmp(header_funcs[i].header, header) == 0)
return &header_funcs[i];
}
return NULL;
}
void mbox_sync_parse_next_mail(struct istream *input,
struct mbox_sync_mail_context *ctx)
{
struct mbox_sync_context *sync_ctx = ctx->sync_ctx;
struct message_header_parser_ctx *hdr_ctx;
struct message_header_line *hdr;
struct header_func *func;
size_t line_start_pos;
int i;
ctx->hdr_offset = ctx->mail.offset;
ctx->header_first_change = (size_t)-1;
ctx->header_last_change = 0;
for (i = 0; i < MBOX_HDR_COUNT; i++)
ctx->hdr_pos[i] = (size_t)-1;
ctx->content_length = (uoff_t)-1;
str_truncate(ctx->header, 0);
md5_init(&ctx->hdr_md5_ctx);
line_start_pos = 0;
hdr_ctx = message_parse_header_init(input, NULL, FALSE);
while ((hdr = message_parse_header_next(hdr_ctx)) != NULL) {
if (hdr->eoh) {
ctx->have_eoh = TRUE;
break;
}
if (!hdr->continued) {
line_start_pos = str_len(ctx->header);
str_append(ctx->header, hdr->name);
str_append(ctx->header, ": ");
}
if (ctx->header_first_change == (size_t)-1 &&
hdr->full_value_offset != str_len(ctx->header)) {
/* whitespaces around ':' are non-standard. either
there's whitespace before ':' or none after.
if we're going to rewrite this message, we can't
do it partially from here after as offsets won't
match. this shouldn't happen pretty much ever, so
don't try to optimize this - just rewrite the whole
thing. */
ctx->no_partial_rewrite = TRUE;
}
func = header_func_find(hdr->name);
if (func != NULL) {
if (hdr->continues) {
hdr->use_full_value = TRUE;
continue;
}
if (!func->func(ctx, hdr)) {
/* this header is broken, remove it */
ctx->need_rewrite = TRUE;
str_truncate(ctx->header, line_start_pos);
if (ctx->header_first_change == (size_t)-1) {
ctx->header_first_change =
line_start_pos;
}
continue;
}
buffer_append(ctx->header, hdr->full_value,
hdr->full_value_len);
} else {
buffer_append(ctx->header, hdr->value,
hdr->value_len);
}
if (!hdr->no_newline)
str_append_c(ctx->header, '\n');
}
message_parse_header_deinit(hdr_ctx);
md5_final(&ctx->hdr_md5_ctx, ctx->hdr_md5_sum);
if ((ctx->seq == 1 && sync_ctx->base_uid_validity == 0) ||
(ctx->seq > 1 && sync_ctx->dest_first_mail)) {
/* missing X-IMAPbase */
ctx->need_rewrite = TRUE;
}
if (ctx->seq == 1 && sync_ctx->update_base_uid_last != 0 &&
sync_ctx->update_base_uid_last > sync_ctx->base_uid_last) {
/* update uid-last field in X-IMAPbase */
ctx->need_rewrite = TRUE;
}
ctx->body_offset = input->v_offset;
}