journal-file.c revision 0240c603691e006165d8687d6a2c70859755b11f
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt This file is part of systemd.
7bd8e95d44977833d0de3fc4e893eb3bc84351d6Patrik Flykt Copyright 2011 Lennart Poettering
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt systemd is free software; you can redistribute it and/or modify it
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt under the terms of the GNU Lesser General Public License as published by
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt the Free Software Foundation; either version 2.1 of the License, or
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt (at your option) any later version.
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt systemd is distributed in the hope that it will be useful, but
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt WITHOUT ANY WARRANTY; without even the implied warranty of
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt Lesser General Public License for more details.
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt You should have received a copy of the GNU Lesser General Public License
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt along with systemd; If not, see <http://www.gnu.org/licenses/>.
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt/* This is the minimum journal file size */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt#define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL) /* 4 MiB */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt/* These are the lower and upper bounds if we deduce the max_use value
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt * from the file system size */
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
f12abb48fc510b8b349c05e35ba048134debaf25Patrik Flykt/* This is the default minimal use limit, how much we'll use even if keep_free suggests otherwise. */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt#define DEFAULT_MIN_USE (1ULL*1024ULL*1024ULL) /* 1 MiB */
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt/* This is the upper bound if we deduce max_size from max_use */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt/* This is the upper bound if we deduce the keep_free value from the
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt * file system size */
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt/* This is the keep_free value when we can't determine the system
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt/* This is the default maximum number of journal files to keep around. */
66eac1201a9c1596f5901f8dbbf24bda7e350878Dan Williams/* n_data was the first entry we added after the initial file format design */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt/* How many entries to keep in the entry array chain cache at max */
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt/* How much to increase the journal file size at once each time we allocate something new. */
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt/* Reread fstat() of the file for detecting deletions at least this often */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt/* The mmap context to use for the header we pick as one above the last defined typed */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flyktstatic int journal_file_set_online(JournalFile *f) {
76253e73f9c9c24fec755e485516f3b55d0707b4Dan WilliamsJournalFile* journal_file_close(JournalFile *f) {
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams /* Write the final tag */
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams /* Be friendly to btrfs: turn COW back on again now,
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams * and defragment the file. We won't write to the file
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams * ever again, hence remove all fragmentation, and
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams * reenable all the good bits COW usually provides
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams * (such as data checksumming). */
fe4b2156256c5bdf52341576571ce9f095d9f085Tom Gundersen munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
fe4b2156256c5bdf52341576571ce9f095d9f085Tom Gundersenstatic int journal_file_init_header(JournalFile *f, JournalFile *template) {
66eac1201a9c1596f5901f8dbbf24bda7e350878Dan Williams f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
66eac1201a9c1596f5901f8dbbf24bda7e350878Dan Williams f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt h.tail_entry_seqnum = template->header->tail_entry_seqnum;
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt if (k != sizeof(h))
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flyktstatic int journal_file_refresh_header(JournalFile *f) {
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt r = sd_id128_get_machine(&f->header->machine_id);
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt if (sd_id128_equal(boot_id, f->header->boot_id))
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt /* Sync the online state to disk */
ea3b3a75abb3f8b853f7da454b9b8e258a120eeaPatrik Flyktstatic int journal_file_verify_header(JournalFile *f) {
ea3b3a75abb3f8b853f7da454b9b8e258a120eeaPatrik Flykt if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
ea3b3a75abb3f8b853f7da454b9b8e258a120eeaPatrik Flykt /* In both read and write mode we refuse to open files with
ea3b3a75abb3f8b853f7da454b9b8e258a120eeaPatrik Flykt * incompatible flags we don't know */
ea3b3a75abb3f8b853f7da454b9b8e258a120eeaPatrik Flykt flags = le32toh(f->header->incompatible_flags);
3f0c075f8ef3344da5a6bda524540201f9204e61Patrik Flykt log_debug("Journal file %s has unknown incompatible flags %"PRIx32,
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt flags = (flags & HEADER_INCOMPATIBLE_ANY) & ~HEADER_INCOMPATIBLE_SUPPORTED;
c806ffb9592fa9a2b13a1f9f9be4c77cd5b211aaZbigniew Jędrzejewski-Szmek log_debug("Journal file %s uses incompatible flags %"PRIx32
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt " disabled at compilation time.", f->path, flags);
4e3e6679e8f73b83d38e4b20d8b025e12991d1cbPatrik Flykt /* When open for writing we refuse to open files with
4e3e6679e8f73b83d38e4b20d8b025e12991d1cbPatrik Flykt * compatible flags, too */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt if (f->writable && (flags & ~HEADER_COMPATIBLE_SUPPORTED)) {
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt log_debug("Journal file %s has unknown compatible flags %"PRIx32,
c806ffb9592fa9a2b13a1f9f9be4c77cd5b211aaZbigniew Jędrzejewski-Szmek f->path, flags & ~HEADER_COMPATIBLE_ANY);
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt flags = (flags & HEADER_COMPATIBLE_ANY) & ~HEADER_COMPATIBLE_SUPPORTED;
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt log_debug("Journal file %s uses compatible flags %"PRIx32
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt " disabled at compilation time.", f->path, flags);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt /* The first addition was n_data, so check that we are at least this large */
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
3f0c075f8ef3344da5a6bda524540201f9204e61Patrik Flykt if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
3f0c075f8ef3344da5a6bda524540201f9204e61Patrik Flykt if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt !VALID64(le64toh(f->header->field_hash_table_offset)) ||
3f0c075f8ef3344da5a6bda524540201f9204e61Patrik Flykt !VALID64(le64toh(f->header->tail_object_offset)) ||
3f0c075f8ef3344da5a6bda524540201f9204e61Patrik Flykt !VALID64(le64toh(f->header->entry_array_offset)))
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt if (!sd_id128_equal(machine_id, f->header->machine_id))
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt log_debug("Journal file %s has unknown state %i.", f->path, state);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
926695f1b5f9395eeb416cc2f478a9cf75fdbeb4Thomas Hindoe Paaboel Andersenstatic int journal_file_fstat(JournalFile *f) {
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt /* Refuse appending to files that are already deleted */
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flyktstatic int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt /* We assume that this file is not sparse, and we know that
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt * for sure, since we always call posix_fallocate()
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt * ourselves */
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt if (new_size < le64toh(f->header->header_size))
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt /* We already pre-allocated enough space, but before
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt * we write to it, let's check with fstat() if the
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt * file got deleted, in order make sure we don't throw
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt * away the data immediately. Don't check fstat() for
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt * all writes though, but only once ever 10s. */
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt if (f->last_stat_usec + LAST_STAT_REFRESH_USEC > now(CLOCK_MONOTONIC))
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt /* Allocate more space. */
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt available = LESS_BY((uint64_t) svfs.f_bfree * (uint64_t) svfs.f_bsize, f->metrics.keep_free);
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt /* Increase by larger blocks at once */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt /* Note that the glibc fallocate() fallback is very
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt inefficient, hence we try to minimize the allocation area
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt as we can. */
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt r = posix_fallocate(f->fd, old_size, new_size - old_size);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flyktstatic unsigned type_to_context(ObjectType type) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt /* One context for each type, plus one catch-all for the rest */
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt assert_cc(_OBJECT_TYPE_MAX <= MMAP_CACHE_MAX_CONTEXTS);
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt assert_cc(CONTEXT_HEADER < MMAP_CACHE_MAX_CONTEXTS);
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flyktstatic int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt /* Avoid SIGBUS on invalid accesses */
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt if (offset + size > (uint64_t) f->last_stat.st_size) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt /* Hmm, out of range? Let's refresh the fstat() data
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt * first, before we trust that check. */
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt if (offset + size > (uint64_t) f->last_stat.st_size)
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt return mmap_cache_get(f->mmap, f->fd, f->prot, type_to_context(type), keep_always, offset, size, &f->last_stat, ret);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flyktstatic uint64_t minimum_header_size(Object *o) {
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt return sizeof(ObjectHeader);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flyktint journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret) {
513a6fa8679510ea1b55967bdb482dd5f8a39f21Ronny Chevalier /* Objects may only be located at multiple of 64 bit */
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt if (s < sizeof(ObjectHeader))
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt if (type > OBJECT_UNUSED && o->object.type != type)
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt if (s > sizeof(ObjectHeader)) {
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt r = journal_file_move_to(f, type, false, offset, s, &t);
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flyktstatic uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt /* If an external seqnum counter was passed, we update
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt * both the local and the external one, and set it to
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt * the maximum of both */
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flyktint journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset) {
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt r = journal_file_move_to(f, type, false, p, size, &t);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flyktstatic int journal_file_setup_data_hash_table(JournalFile *f) {
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt /* We estimate that we need 1 hash table entry per 768 bytes
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt of journal file and we want to make sure we never get
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt beyond 75% fill level. Calculate the hash table size for
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt the maximum file size based on these metrics. */
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
9021bb9f935c93b516b10c88db2a212a9e3a8140Tom Gundersen f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flyktstatic int journal_file_setup_field_hash_table(JournalFile *f) {
f12abb48fc510b8b349c05e35ba048134debaf25Patrik Flykt /* We use a fixed size hash table for the fields as this
f12abb48fc510b8b349c05e35ba048134debaf25Patrik Flykt * number should grow very slowly only */
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flyktint journal_file_map_data_hash_table(JournalFile *f) {
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt p = le64toh(f->header->data_hash_table_offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flyktint journal_file_map_field_hash_table(JournalFile *f) {
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt p = le64toh(f->header->field_hash_table_offset);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt /* This might alter the window we are looking at */
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt o->field.next_hash_offset = o->field.head_data_offset = 0;
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt p = le64toh(f->field_hash_table[h].tail_hash_offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt f->field_hash_table[h].head_hash_offset = htole64(offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt f->field_hash_table[h].tail_hash_offset = htole64(offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
5da1b97f3c3d15521f2dcfbc18eccd6580122ebcPatrik Flykt m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
6599680e2d33597f0f11a99e1c3c957b42418568Patrik Flykt /* This might alter the window we are looking at */
6599680e2d33597f0f11a99e1c3c957b42418568Patrik Flykt o->data.next_hash_offset = o->data.next_field_offset = 0;
6599680e2d33597f0f11a99e1c3c957b42418568Patrik Flykt o->data.entry_offset = o->data.entry_array_offset = 0;
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt p = le64toh(f->data_hash_table[h].tail_hash_offset);
c47e8936a43ce546e8a74fa569e9fbfae6c64be7Patrik Flykt /* Only entry in the hash table is easy */
c47e8936a43ce546e8a74fa569e9fbfae6c64be7Patrik Flykt f->data_hash_table[h].head_hash_offset = htole64(offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt /* Move back to the previous data object, to patch in
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt * pointer */
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt f->data_hash_table[h].tail_hash_offset = htole64(offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt const void *field, uint64_t size, uint64_t hash,
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt /* If the field hash table is empty, we can't find anything */
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt if (le64toh(f->header->field_hash_table_size) <= 0)
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt /* Map the field hash table, if it isn't mapped yet. */
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt osize = offsetof(Object, field.payload) + size;
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt p = le64toh(f->field_hash_table[h].head_hash_offset);
9d89d1ae71cb298218e35a69d6b70e2c94de5271Patrik Flykt while (p > 0) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt return journal_file_find_field_object_with_hash(f,
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt const void *data, uint64_t size, uint64_t hash,
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt /* If there's no data hash table, then there's no entry. */
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt if (le64toh(f->header->data_hash_table_size) <= 0)
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt /* Map the data hash table, if it isn't mapped yet. */
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt p = le64toh(f->data_hash_table[h].head_hash_offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt while (p > 0) {
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt if (o->object.flags & OBJECT_COMPRESSION_MASK) {
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt return journal_file_find_data_object_with_hash(f,
f12abb48fc510b8b349c05e35ba048134debaf25Patrik Flykt r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
c3e2adeaba8e043caed0ef139eeaea016bd152d0Patrik Flykt else if (r > 0) {
c3e2adeaba8e043caed0ef139eeaea016bd152d0Patrik Flykt osize = offsetof(Object, field.payload) + size;
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt /* The linking might have altered the window, so let's
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt * refresh our pointer */
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt const void *eq;
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt if (r > 0) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt if (JOURNAL_FILE_COMPRESS(f) && size >= COMPRESSION_SIZE_THRESHOLD) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt compression = compress_blob(data, size, o->data.payload, &rsize);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt o->object.size = htole64(offsetof(Object, data.payload) + rsize);
9021bb9f935c93b516b10c88db2a212a9e3a8140Tom Gundersen log_debug("Compressed data object %"PRIu64" -> %zu using %s",
9021bb9f935c93b516b10c88db2a212a9e3a8140Tom Gundersen size, rsize, object_compressed_to_string(compression));
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt /* Compression didn't work, we don't really care why, let's continue without compression */
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt /* The linking might have altered the window, so let's
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt * refresh our pointer */
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt /* Create field object ... */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt /* ... and link it in. */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt o->data.next_field_offset = fo->field.head_data_offset;
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
cc22955cfefb4bd6e7a135f1ec95fb5a07ba9ce3Thomas Halleruint64_t journal_file_entry_n_items(Object *o) {
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flyktuint64_t journal_file_entry_array_n_items(Object *o) {
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flyktuint64_t journal_file_hash_table_n_items(Object *o) {
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt if (o->object.type != OBJECT_DATA_HASH_TABLE &&
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flyktstatic int link_entry_into_array(JournalFile *f,
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt while (a > 0) {
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt if (i < n) {
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt a = le64toh(o->entry_array.next_entry_array_offset);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
62e3d1aed512d68cab1fc9b509e813a1fa2b3790Lennart Poettering r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt o->entry_array.next_entry_array_offset = htole64(q);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flyktstatic int link_entry_into_array_plus_one(JournalFile *f,
uint64_t p;
assert(f);
assert(o);
return -EINVAL;
return link_entry_into_array_plus_one(f,
offset);
uint64_t n, i;
assert(f);
assert(o);
return -EINVAL;
r = link_entry_into_array(f,
offset);
/* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
f->tail_entry_monotonic_valid = true;
n = journal_file_entry_n_items(o);
static int journal_file_append_entry_internal(
JournalFile *f,
Object *o;
assert(f);
#ifdef HAVE_GCRYPT
if (ret)
*ret = o;
if (offset)
assert(f);
int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
assert(f);
if (!ts) {
if (f->tail_entry_monotonic_valid &&
return -EINVAL;
#ifdef HAVE_GCRYPT
for (i = 0; i < n_iovec; i++) {
uint64_t p;
Object *o;
r = -EIO;
typedef struct ChainCacheItem {
static void chain_cache_put(
OrderedHashmap *h,
if (!ci) {
if (!ci)
static int generic_array_get(
JournalFile *f,
uint64_t i,
Object *o;
uint64_t p = 0, a, t = 0;
assert(f);
a = first;
uint64_t k;
k = journal_file_entry_array_n_items(o);
goto found;
if (ret)
*ret = o;
if (offset)
*offset = p;
static int generic_array_get_plus_one(
JournalFile *f,
uint64_t i,
Object *o;
assert(f);
if (ret)
*ret = o;
if (offset)
static int generic_array_bisect(
JournalFile *f,
uint64_t n,
bool subtract_one = false;
assert(f);
a = first;
if (r == TEST_LEFT) {
if (right <= 0)
return -EBADMSG;
if (r == TEST_FOUND)
if (r == TEST_RIGHT) {
left = 0;
if (last_index > 0) {
return -EBADMSG;
if (r == TEST_FOUND)
if (r == TEST_RIGHT)
right = x;
return -EBADMSG;
if (r == TEST_FOUND)
if (r == TEST_RIGHT)
right = y;
subtract_one = true;
i = left;
goto found;
return -EBADMSG;
if (r == TEST_FOUND)
if (r == TEST_RIGHT)
right = i;
subtract_one = true;
goto found;
if (subtract_one && t == 0 && i == 0)
chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
if (subtract_one && i == 0)
p = last_p;
else if (subtract_one)
if (ret)
*ret = o;
if (offset)
*offset = p;
if (idx)
static int generic_array_bisect_plus_one(
JournalFile *f,
uint64_t n,
bool step_back = false;
Object *o;
assert(f);
if (r == TEST_FOUND)
if (r == TEST_LEFT)
if (r == TEST_RIGHT) {
goto found;
if (r == 0 && step_back)
goto found;
if (r > 0 && idx)
(*idx) ++;
if (ret)
*ret = o;
if (offset)
if (idx)
*idx = 0;
assert(f);
assert(p > 0);
if (p == needle)
return TEST_FOUND;
else if (p < needle)
return TEST_LEFT;
return TEST_RIGHT;
Object *o;
assert(f);
assert(p > 0);
return TEST_FOUND;
return TEST_LEFT;
return TEST_RIGHT;
JournalFile *f,
return generic_array_bisect(f,
Object *o;
assert(f);
assert(p > 0);
return TEST_FOUND;
return TEST_LEFT;
return TEST_RIGHT;
JournalFile *f,
return generic_array_bisect(f,
Object *o;
assert(f);
assert(p > 0);
return TEST_FOUND;
return TEST_LEFT;
return TEST_RIGHT;
static int find_data_object_by_boot_id(
JournalFile *f,
Object **o,
uint64_t *b) {
JournalFile *f,
Object *o;
assert(f);
return -ENOENT;
return generic_array_bisect_plus_one(f,
f->current_offset = 0;
f->current_seqnum = 0;
f->current_realtime = 0;
f->current_monotonic = 0;
f->current_xor_hash = 0;
JournalFile *f,
uint64_t p,
assert(f);
r = generic_array_bisect(f,
r = generic_array_get(f,
f->path, i);
return -EBADMSG;
if (offset)
JournalFile *f,
uint64_t n, i;
Object *d;
assert(f);
assert(p > 0 || !o);
return -EINVAL;
return generic_array_get_plus_one(f,
JournalFile *f,
uint64_t p,
Object *d;
assert(f);
return generic_array_bisect_plus_one(f,
JournalFile *f,
Object *o, *d;
uint64_t b, z;
assert(f);
return -ENOENT;
uint64_t p, q;
if (ret)
if (offset)
*offset = q;
JournalFile *f,
Object *d;
assert(f);
return generic_array_bisect_plus_one(f,
JournalFile *f,
Object *d;
assert(f);
return generic_array_bisect_plus_one(f,
Object *o;
uint64_t p;
assert(f);
goto fail;
case OBJECT_UNUSED:
case OBJECT_DATA:
case OBJECT_FIELD:
case OBJECT_ENTRY:
case OBJECT_FIELD_HASH_TABLE:
case OBJECT_DATA_HASH_TABLE:
case OBJECT_ENTRY_ARRAY:
case OBJECT_TAG:
fail:
assert(f);
f->path,
100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
unsigned attrs;
assert(f);
int journal_file_open(
const char *fname,
int flags,
bool compress,
bool seal,
bool newly_created = false;
JournalFile *f;
return -EINVAL;
return -EINVAL;
return -ENOMEM;
#if defined(HAVE_LZ4)
#ifdef HAVE_GCRYPT
if (mmap_cache)
if (!f->mmap) {
r = -ENOMEM;
goto fail;
if (!f->path) {
r = -ENOMEM;
goto fail;
if (!f->chain_cache) {
r = -ENOMEM;
goto fail;
if (f->fd < 0) {
r = -errno;
goto fail;
r = journal_file_fstat(f);
goto fail;
(void) journal_file_warn_btrfs(f);
#ifdef HAVE_GCRYPT
if (f->seal) {
r = journal_file_fss_load(f);
f->seal = false;
goto fail;
r = journal_file_fstat(f);
goto fail;
newly_created = true;
r = -EIO;
goto fail;
r = mmap_cache_get(f->mmap, f->fd, f->prot, CONTEXT_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h);
goto fail;
f->header = h;
if (!newly_created) {
r = journal_file_verify_header(f);
goto fail;
#ifdef HAVE_GCRYPT
r = journal_file_fss_load(f);
goto fail;
if (f->writable) {
if (metrics) {
} else if (template)
r = journal_file_refresh_header(f);
goto fail;
#ifdef HAVE_GCRYPT
r = journal_file_hmac_setup(f);
goto fail;
if (newly_created) {
goto fail;
goto fail;
#ifdef HAVE_GCRYPT
r = journal_file_append_first_tag(f);
goto fail;
r = -EIO;
goto fail;
*ret = f;
fail:
r = -EIO;
size_t l;
assert(f);
assert(*f);
old_file = *f;
return -EINVAL;
return -EINVAL;
return -ENOMEM;
return -errno;
r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
*f = new_file;
const char *fname,
int flags,
bool compress,
bool seal,
size_t l;
if (!IN_SET(r,
random_u64()) < 0)
return -ENOMEM;
return -errno;
(void) btrfs_defrag(p);
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
uint64_t i, n;
assert(o);
assert(p);
return -EPERM;
n = journal_file_entry_n_items(o);
uint64_t l, h;
size_t t;
void *data;
Object *u;
return -EBADMSG;
t = (size_t) l;
if ((uint64_t) t != l)
return -E2BIG;
l = rsize;
return -EPROTONOSUPPORT;
return -EIO;
assert(m);
*m = (JournalMetrics) {
char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX], e[FORMAT_BYTES_MAX];
assert(m);
fs_size = 0;
if (fs_size > 0) {
if (m->max_size != 0) {
if (fs_size > 0) {
m->n_max_files);
assert(f);
if (from) {
return -ENOENT;
if (to) {
return -ENOENT;
int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
Object *o;
uint64_t p;
assert(f);
if (from) {
if (to) {
r = generic_array_get_plus_one(f,
&o, NULL);
assert(f);
if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
f->path,
100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
f->path,
100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
if (max_file_usec > 0) {
usec_t t, h;
if (h > 0 && t > h + max_file_usec)