mdbox-storage-rebuild.c revision 47ede56f4e6eebfe631a1f0febf74d7adcdbcd00
5a580c3a38ced62d4bcc95b8ac7c4f2935b5d294Timo Sirainen/* Copyright (c) 2009-2013 Dovecot authors, see the included COPYING file */
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen struct mdbox_map_mail_index_header orig_map_hdr;
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen HASH_TABLE(uint8_t *, struct mdbox_rebuild_msg *) guid_hash;
4082d5b171d1c3a00ba705093d62b8afc9cf17aeTimo Sirainenmdbox_storage_rebuild_init(struct mdbox_storage *storage,
c991d8c2c0d5d6c025e24fc00cb06dd61c42456dTimo Sirainen ctx = i_new(struct mdbox_storage_rebuild_context, 1);
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen ctx->pool = pool_alloconly_create("dbox map rebuild", 1024*256);
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen hash_table_create(&ctx->guid_hash, ctx->pool, 0,
d06d6667bac64aabe1efb216af56ca45108d63b0Timo Sirainenmdbox_storage_rebuild_deinit(struct mdbox_storage_rebuild_context *ctx)
c991d8c2c0d5d6c025e24fc00cb06dd61c42456dTimo Sirainenmdbox_rebuild_msg_offset_cmp(struct mdbox_rebuild_msg *const *m1,
355fe8b5d02904df39e793f66da5432d86649d4aTimo Sirainenstatic int mdbox_rebuild_msg_uid_cmp(struct mdbox_rebuild_msg *const *m1,
355fe8b5d02904df39e793f66da5432d86649d4aTimo Sirainenstatic void rebuild_scan_metadata(struct mdbox_storage_rebuild_context *ctx,
355fe8b5d02904df39e793f66da5432d86649d4aTimo Sirainen if (dbox_file_metadata_get(file, DBOX_METADATA_POP3_UIDL) != NULL)
355fe8b5d02904df39e793f66da5432d86649d4aTimo Sirainen if (dbox_file_metadata_get(file, DBOX_METADATA_POP3_ORDER) != NULL)
355fe8b5d02904df39e793f66da5432d86649d4aTimo Sirainenstatic int rebuild_file_mails(struct mdbox_storage_rebuild_context *ctx,
6646bd844c85d5b27451199d8868b6d2357cd293Timo Sirainen while ((ret = dbox_file_seek_next(file, &offset, &last)) >= 0) {
6646bd844c85d5b27451199d8868b6d2357cd293Timo Sirainen if ((ret = dbox_file_metadata_read(file)) < 0)
6646bd844c85d5b27451199d8868b6d2357cd293Timo Sirainen /* file is corrupted. fix it and retry. */
6646bd844c85d5b27451199d8868b6d2357cd293Timo Sirainen /* use existing file header if it was ok */
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen if ((ret = dbox_file_fix(file, prev_offset)) < 0)
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen /* file was deleted */
1d3f7c1278168d5b1cbfa9a2cc9929a0909056b4Timo Sirainen /* seek to the offset where we last left off */
e262f3aa3429dbc74f668bc8bd501cf08b955778Timo Sirainen guid = dbox_file_metadata_get(file, DBOX_METADATA_GUID);
e262f3aa3429dbc74f668bc8bd501cf08b955778Timo Sirainen "Message is missing GUID");
e262f3aa3429dbc74f668bc8bd501cf08b955778Timo Sirainen rec = p_new(ctx->pool, struct mdbox_rebuild_msg, 1);
1d3f7c1278168d5b1cbfa9a2cc9929a0909056b4Timo Sirainen rec->rec_size = file->input->v_offset - offset;
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen rec->mail_size = dbox_file_get_plaintext_size(file);
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen mail_generate_guid_128_hash(guid, rec->guid_128);
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen old_rec = hash_table_lookup(ctx->guid_hash, guid_p);
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen hash_table_insert(ctx->guid_hash, guid_p, rec);
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen else if (rec->mail_size == old_rec->mail_size) {
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen /* two mails' GUID and size are the same, which quite
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen likely means that their contents are the same as
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen well. we'll compare the mail sizes instead of the
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen record sizes, because the records' metadata may
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen save this duplicate mail with refcount=0 to the map,
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen so it will eventually be purged. */
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen /* duplicate GUID, but not a duplicate message. */
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen old_rec->file_id, old_rec->offset, old_rec->mail_size,
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen rec->guid_hash_next = old_rec->guid_hash_next;
503a863a317acba125a4e46435694e35fad769e4Timo Sirainenrebuild_rename_file(struct mdbox_storage_rebuild_context *ctx,
461ffead9720d1e516b959d5e41f049c73d38c7cTimo Sirainen const char *dir, const char **fname_p, uint32_t *file_id_r)
ba14267101444b8f144091cefd437e1ea44d3e32Timo Sirainen const char *old_path, *new_path, *fname = *fname_p;
ef174bf5299348e8c0662d235341869f319cfe54Timo Sirainen old_path = t_strconcat(dir, "/", fname, NULL);
ba14267101444b8f144091cefd437e1ea44d3e32Timo Sirainen new_path = t_strdup_printf("%s/"MDBOX_MAIL_FILE_FORMAT,
ba14267101444b8f144091cefd437e1ea44d3e32Timo Sirainen /* use link()+unlink() instead of rename() to make sure we
ba14267101444b8f144091cefd437e1ea44d3e32Timo Sirainen don't overwrite any files. */
5e85a6a1349177c613dea55aabb20d857b8240a5Timo Sirainen i_error("link(%s, %s) failed: %m", old_path, new_path);
ba14267101444b8f144091cefd437e1ea44d3e32Timo Sirainenstatic int rebuild_add_file(struct mdbox_storage_rebuild_context *ctx,
ba14267101444b8f144091cefd437e1ea44d3e32Timo Sirainen id_str = fname + strlen(MDBOX_MAIL_FILE_PREFIX);
ef174bf5299348e8c0662d235341869f319cfe54Timo Sirainen if (str_to_uint32(id_str, &file_id) < 0 || file_id == 0) {
ba14267101444b8f144091cefd437e1ea44d3e32Timo Sirainen /* m.*.broken files are created by file fixing
ba14267101444b8f144091cefd437e1ea44d3e32Timo Sirainen m.*.lock files are created if flock() isn't available */
ba14267101444b8f144091cefd437e1ea44d3e32Timo Sirainen if (ext == NULL || (strcmp(ext, ".broken") != 0 &&
461ffead9720d1e516b959d5e41f049c73d38c7cTimo Sirainen "Skipping file with missing ID: %s/%s",
e20e638805c4bd54e039891a3e92760b1dfa189aTimo Sirainen if (!seq_range_exists(&ctx->seen_file_ids, file_id)) {
c47e837a127c533e67debafde8ccf9691041be16Timo Sirainen /* duplicate file. either readdir() returned it twice
c47e837a127c533e67debafde8ccf9691041be16Timo Sirainen (unlikely) or it exists in both alt and primary storage.
c47e837a127c533e67debafde8ccf9691041be16Timo Sirainen to make sure we don't lose any mails from either of the
461ffead9720d1e516b959d5e41f049c73d38c7cTimo Sirainen files, give this file a new ID and rename it. */
461ffead9720d1e516b959d5e41f049c73d38c7cTimo Sirainen if (rebuild_rename_file(ctx, dir, &fname, &file_id) < 0)
c47e837a127c533e67debafde8ccf9691041be16Timo Sirainen seq_range_array_add(&ctx->seen_file_ids, file_id);
c47e837a127c533e67debafde8ccf9691041be16Timo Sirainen file = mdbox_file_init(ctx->storage, file_id);
c47e837a127c533e67debafde8ccf9691041be16Timo Sirainen if ((ret = dbox_file_open(file, &deleted)) > 0 && !deleted)
461ffead9720d1e516b959d5e41f049c73d38c7cTimo Sirainen i_error("mdbox rebuild: Failed to fix file %s/%s", dir, fname);
461ffead9720d1e516b959d5e41f049c73d38c7cTimo Sirainenrebuild_add_missing_map_uids(struct mdbox_storage_rebuild_context *ctx,
c47e837a127c533e67debafde8ccf9691041be16Timo Sirainen unsigned int i, count;
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen msgs = array_get_modifiable(&ctx->msgs, &count);
503a863a317acba125a4e46435694e35fad769e4Timo Sirainen for (i = 0; i < count; i++) {
e4cebacdec9c9e5b685dde5f7cbf7a5cf7e1d248Timo Sirainen mail_index_update_ext(ctx->atomic->sync_trans, seq,
e4cebacdec9c9e5b685dde5f7cbf7a5cf7e1d248Timo Sirainenstatic int rebuild_apply_map(struct mdbox_storage_rebuild_context *ctx)
e4cebacdec9c9e5b685dde5f7cbf7a5cf7e1d248Timo Sirainen struct mdbox_rebuild_msg search_msg, *search_msgp = &search_msg;
9905ec03fb2011419caeac4cd5a1b6c28ab50a73Timo Sirainen array_sort(&ctx->msgs, mdbox_rebuild_msg_offset_cmp);
e20e638805c4bd54e039891a3e92760b1dfa189aTimo Sirainen /* msgs now contains a list of all messages that exists in m.* files,
9905ec03fb2011419caeac4cd5a1b6c28ab50a73Timo Sirainen sorted by file_id,offset */
e20e638805c4bd54e039891a3e92760b1dfa189aTimo Sirainen hdr = mail_index_get_header(ctx->atomic->sync_view);
9905ec03fb2011419caeac4cd5a1b6c28ab50a73Timo Sirainen for (seq = 1; seq <= hdr->messages_count; seq++) {
9905ec03fb2011419caeac4cd5a1b6c28ab50a73Timo Sirainen if (mdbox_map_view_lookup_rec(map, ctx->atomic->sync_view,
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen /* look up the rebuild msg record for this message based on
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen the (file_id, offset, size) triplet */
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen /* map record points to nonexistent or
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen a duplicate message. */
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen mail_index_expunge(ctx->atomic->sync_trans, seq);
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen /* remember this message's map_uid */
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen rebuild_add_missing_map_uids(ctx, hdr->next_uid);
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen /* afterwards we're interested in looking up map_uids.
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen re-sort the messages to make it easier. */
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen array_sort(&ctx->msgs, mdbox_rebuild_msg_uid_cmp);
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainenrebuild_lookup_map_uid(struct mdbox_storage_rebuild_context *ctx,
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen struct mdbox_rebuild_msg search_msg, *search_msgp = &search_msg;
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainenguid_hash_have_map_uid(struct mdbox_rebuild_msg **recp, uint32_t map_uid)
90b50df264b57e0f63cd8cc6aea1ce3bb7cf5f64Timo Sirainen for (rec = *recp; rec != NULL; rec = rec->guid_hash_next) {
5fdeff082e329e4a85bb7e74aaec2c35e2288557Timo Sirainenrebuild_mailbox_multi(struct mdbox_storage_rebuild_context *ctx,
5fdeff082e329e4a85bb7e74aaec2c35e2288557Timo Sirainen /* Rebuild the mailbox's index. Note that index is reset at this point,
5fdeff082e329e4a85bb7e74aaec2c35e2288557Timo Sirainen so although we can still access the old messages, we'll need to
5fdeff082e329e4a85bb7e74aaec2c35e2288557Timo Sirainen append anything we want to keep as new messages. */
5fdeff082e329e4a85bb7e74aaec2c35e2288557Timo Sirainen for (old_seq = 1; old_seq <= hdr->messages_count; old_seq++) {
5fdeff082e329e4a85bb7e74aaec2c35e2288557Timo Sirainen mail_index_lookup_ext(view, old_seq, mbox->ext_id,
5fdeff082e329e4a85bb7e74aaec2c35e2288557Timo Sirainen memset(&new_dbox_rec, 0, sizeof(new_dbox_rec));
5fdeff082e329e4a85bb7e74aaec2c35e2288557Timo Sirainen memcpy(&new_dbox_rec, data, sizeof(new_dbox_rec));
} T_END;
const void *data;
int ret;
if (ret <= 0) {
int ret = 0;
MAILBOX_NOSELECT)) == 0) {
T_BEGIN {
} T_END;
if (ret < 0) {
return ret;
int ret;
if (ret < 0)
if (ret <= 0) {
unsigned int i, count;
for (i = 0; i < count; i++) {
const void *data;
unsigned int i, count;
struct dirent *d;
int ret = 0;
} T_END;
return ret;
const void *data;
FALSE) < 0)
int ret;
if (ret == 0) {
return ret;
int ret;
return ret;