journal-file.c revision b9a1617d75c16a48cccf4ff135013dca9af94e7d
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt This file is part of systemd.
7bd8e95d44977833d0de3fc4e893eb3bc84351d6Patrik Flykt Copyright 2011 Lennart Poettering
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt systemd is free software; you can redistribute it and/or modify it
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt under the terms of the GNU Lesser General Public License as published by
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt the Free Software Foundation; either version 2.1 of the License, or
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt (at your option) any later version.
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt systemd is distributed in the hope that it will be useful, but
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt WITHOUT ANY WARRANTY; without even the implied warranty of
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt Lesser General Public License for more details.
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt You should have received a copy of the GNU Lesser General Public License
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt along with systemd; If not, see <http://www.gnu.org/licenses/>.
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams#define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
3733eec3e292e4ddb4cba5eb8d3bd8cbee7102d8Lennart Poettering#define COMPRESSION_SIZE_THRESHOLD (512ULL)
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt/* This is the minimum journal file size */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt#define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL) /* 4 MiB */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt/* These are the lower and upper bounds if we deduce the max_use value
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams * from the file system size */
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
76253e73f9c9c24fec755e485516f3b55d0707b4Dan Williams#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt/* This is the upper bound if we deduce max_size from max_use */
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt/* This is the upper bound if we deduce the keep_free value from the
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt * file system size */
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt/* This is the keep_free value when we can't determine the system
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt#define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt/* n_data was the first entry we added after the initial file format design */
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt/* How many entries to keep in the entry array chain cache at max */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt/* How much to increase the journal file size at once each time we allocate something new. */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt/* Reread fstat() of the file for detecting deletions at least this often */
41e4615d4f4f5c61afa84ba857f23c0ac496687bPatrik Flykt/* The mmap context to use for the header we pick as one above the last defined typed */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flyktstatic int journal_file_set_online(JournalFile *f) {
0ae0e5cd96813bacad43a39920a043d8d20a67dbLennart Poettering /* Write the final tag */
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
0ae0e5cd96813bacad43a39920a043d8d20a67dbLennart Poetteringstatic int journal_file_init_header(JournalFile *f, JournalFile *template) {
fe4b2156256c5bdf52341576571ce9f095d9f085Tom Gundersen f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
fe4b2156256c5bdf52341576571ce9f095d9f085Tom Gundersen f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
fe4b2156256c5bdf52341576571ce9f095d9f085Tom Gundersen h.tail_entry_seqnum = template->header->tail_entry_seqnum;
764aad6258eec3bd4ae62ea341ea507bd69ce628Tom Gundersen if (k != sizeof(h))
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flyktstatic int journal_file_refresh_header(JournalFile *f) {
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt r = sd_id128_get_machine(&f->header->machine_id);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt if (sd_id128_equal(boot_id, f->header->boot_id))
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt /* Sync the online state to disk */
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flyktstatic int journal_file_verify_header(JournalFile *f) {
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt /* In both read and write mode we refuse to open files with
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt * incompatible flags we don't know */
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt flags = le32toh(f->header->incompatible_flags);
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt log_debug("Journal file %s has unknown incompatible flags %"PRIx32,
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt flags = (flags & HEADER_INCOMPATIBLE_ANY) & ~HEADER_INCOMPATIBLE_SUPPORTED;
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt log_debug("Journal file %s uses incompatible flags %"PRIx32
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt " disabled at compilation time.", f->path, flags);
ea3b3a75abb3f8b853f7da454b9b8e258a120eeaPatrik Flykt /* When open for writing we refuse to open files with
ea3b3a75abb3f8b853f7da454b9b8e258a120eeaPatrik Flykt * compatible flags, too */
ea3b3a75abb3f8b853f7da454b9b8e258a120eeaPatrik Flykt if (f->writable && (flags & ~HEADER_COMPATIBLE_SUPPORTED)) {
e6b18ffaea7d557eec3028a37c043da67a78550cDavid Herrmann log_debug("Journal file %s has unknown compatible flags %"PRIx32,
ea3b3a75abb3f8b853f7da454b9b8e258a120eeaPatrik Flykt flags = (flags & HEADER_COMPATIBLE_ANY) & ~HEADER_COMPATIBLE_SUPPORTED;
ea3b3a75abb3f8b853f7da454b9b8e258a120eeaPatrik Flykt log_debug("Journal file %s uses compatible flags %"PRIx32
3f0c075f8ef3344da5a6bda524540201f9204e61Patrik Flykt " disabled at compilation time.", f->path, flags);
4e3e6679e8f73b83d38e4b20d8b025e12991d1cbPatrik Flykt /* The first addition was n_data, so check that we are at least this large */
f89087272b5561c9a3fc9d6a4e2a09f75f688fa7Thomas Haller if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
f89087272b5561c9a3fc9d6a4e2a09f75f688fa7Thomas Haller if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
f89087272b5561c9a3fc9d6a4e2a09f75f688fa7Thomas Haller if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
4e3e6679e8f73b83d38e4b20d8b025e12991d1cbPatrik Flykt if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
c806ffb9592fa9a2b13a1f9f9be4c77cd5b211aaZbigniew Jędrzejewski-Szmek !VALID64(le64toh(f->header->field_hash_table_offset)) ||
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt !VALID64(le64toh(f->header->tail_object_offset)) ||
c3e2adeaba8e043caed0ef139eeaea016bd152d0Patrik Flykt !VALID64(le64toh(f->header->entry_array_offset)))
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt if (!sd_id128_equal(machine_id, f->header->machine_id))
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt log_debug("Journal file %s has unknown state %u.", f->path, state);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt /* Refuse appending to files that are already deleted */
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flyktstatic int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt /* We assume that this file is not sparse, and we know that
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt * for sure, since we always call posix_fallocate()
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt if (new_size < le64toh(f->header->header_size))
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt /* We already pre-allocated enough space, but before
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt * we write to it, let's check with fstat() if the
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt * file got deleted, in order make sure we don't throw
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt * away the data immediately. Don't check fstat() for
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt * all writes though, but only once ever 10s. */
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt if (f->last_stat_usec + LAST_STAT_REFRESH_USEC > now(CLOCK_MONOTONIC))
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt /* Allocate more space. */
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt /* Increase by larger blocks at once */
66eac1201a9c1596f5901f8dbbf24bda7e350878Dan Williams new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt /* Note that the glibc fallocate() fallback is very
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt inefficient, hence we try to minimize the allocation area
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt as we can. */
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt r = posix_fallocate(f->fd, old_size, new_size - old_size);
346e13a25dc6f76d3bc9d8decd40dc4782b02d2aPatrik Flykt f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flyktstatic unsigned type_to_context(ObjectType type) {
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt /* One context for each type, plus one catch-all for the rest */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt assert_cc(_OBJECT_TYPE_MAX <= MMAP_CACHE_MAX_CONTEXTS);
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt assert_cc(CONTEXT_HEADER < MMAP_CACHE_MAX_CONTEXTS);
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flyktstatic int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt /* Avoid SIGBUS on invalid accesses */
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt if (offset + size > (uint64_t) f->last_stat.st_size) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt /* Hmm, out of range? Let's refresh the fstat() data
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt * first, before we trust that check. */
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt if (offset + size > (uint64_t) f->last_stat.st_size)
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt return mmap_cache_get(f->mmap, f->fd, f->prot, type_to_context(type), keep_always, offset, size, &f->last_stat, ret);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flyktstatic uint64_t minimum_header_size(Object *o) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt return sizeof(ObjectHeader);
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flyktint journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret) {
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt /* Objects may only be located at multiple of 64 bit */
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt if (s < sizeof(ObjectHeader))
513a6fa8679510ea1b55967bdb482dd5f8a39f21Ronny Chevalier if (type > OBJECT_UNUSED && o->object.type != type)
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt if (s > sizeof(ObjectHeader)) {
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt r = journal_file_move_to(f, type, false, offset, s, &t);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flyktstatic uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt /* If an external seqnum counter was passed, we update
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt * both the local and the external one, and set it to
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt * the maximum of both */
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flyktint journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset) {
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
3dc34fcc97b41f8b7b019027225b121dfbb9871dPatrik Flykt r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt r = journal_file_move_to(f, type, false, p, size, &t);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flyktstatic int journal_file_setup_data_hash_table(JournalFile *f) {
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt /* We estimate that we need 1 hash table entry per 768 of
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt journal file and we want to make sure we never get beyond
fa94c34b083b5b4019975624453e53d0cbad2a5dTom Gundersen 75% fill level. Calculate the hash table size for the
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt maximum file size based on these metrics. */
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flyktstatic int journal_file_setup_field_hash_table(JournalFile *f) {
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt /* We use a fixed size hash table for the fields as this
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt * number should grow very slowly only */
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flykt f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
f12abb48fc510b8b349c05e35ba048134debaf25Patrik Flyktstatic int journal_file_map_data_hash_table(JournalFile *f) {
cfb5b3805759e63dc5e0cae6e92e1df885b5c5b6Tom Gundersen p = le64toh(f->header->data_hash_table_offset);
44481a8b537839cd9ffead4d261491641f5b5260Zbigniew Jędrzejewski-Szmekstatic int journal_file_map_field_hash_table(JournalFile *f) {
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt p = le64toh(f->header->field_hash_table_offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt /* This might alter the window we are looking at */
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt o->field.next_hash_offset = o->field.head_data_offset = 0;
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt p = le64toh(f->field_hash_table[h].tail_hash_offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt f->field_hash_table[h].head_hash_offset = htole64(offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt f->field_hash_table[h].tail_hash_offset = htole64(offset);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt /* This might alter the window we are looking at */
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt o->data.next_hash_offset = o->data.next_field_offset = 0;
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt o->data.entry_offset = o->data.entry_array_offset = 0;
ed6ee21953dac9c78383da00bc4514ece6b75ab5Patrik Flykt p = le64toh(f->data_hash_table[h].tail_hash_offset);
7bd8e95d44977833d0de3fc4e893eb3bc84351d6Patrik Flykt /* Only entry in the hash table is easy */
7bd8e95d44977833d0de3fc4e893eb3bc84351d6Patrik Flykt f->data_hash_table[h].head_hash_offset = htole64(offset);
7bd8e95d44977833d0de3fc4e893eb3bc84351d6Patrik Flykt /* Move back to the previous data object, to patch in
7bd8e95d44977833d0de3fc4e893eb3bc84351d6Patrik Flykt * pointer */
5da1b97f3c3d15521f2dcfbc18eccd6580122ebcPatrik Flykt r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
5da1b97f3c3d15521f2dcfbc18eccd6580122ebcPatrik Flykt f->data_hash_table[h].tail_hash_offset = htole64(offset);
6599680e2d33597f0f11a99e1c3c957b42418568Patrik Flykt if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
6599680e2d33597f0f11a99e1c3c957b42418568Patrik Flykt f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
41e4615d4f4f5c61afa84ba857f23c0ac496687bPatrik Flykt const void *field, uint64_t size, uint64_t hash,
c47e8936a43ce546e8a74fa569e9fbfae6c64be7Patrik Flykt osize = offsetof(Object, field.payload) + size;
c47e8936a43ce546e8a74fa569e9fbfae6c64be7Patrik Flykt m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt p = le64toh(f->field_hash_table[h].head_hash_offset);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt while (p > 0) {
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt return journal_file_find_field_object_with_hash(f,
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt const void *data, uint64_t size, uint64_t hash,
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt p = le64toh(f->data_hash_table[h].head_hash_offset);
f89087272b5561c9a3fc9d6a4e2a09f75f688fa7Thomas Haller while (p > 0) {
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt if (o->object.flags & OBJECT_COMPRESSION_MASK) {
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt return journal_file_find_data_object_with_hash(f,
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt else if (r > 0) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt osize = offsetof(Object, field.payload) + size;
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt /* The linking might have altered the window, so let's
631bbe71298ec892f77f44f94feb612646fe6853Patrik Flykt * refresh our pointer */
a9aff3615b430f86bd0a824214d95f634efaf894Patrik Flykt r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt const void *eq;
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
7246333cb803b03440d3bd0bdaa233564d09b5aePatrik Flykt else if (r > 0) {
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
ed19c567e5fcdcec1a2b6dbac63787e001ad5d55Tom Gundersen compression = compress_blob(data, size, o->data.payload, &rsize);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt o->object.size = htole64(offsetof(Object, data.payload) + rsize);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt log_debug("Compressed data object %"PRIu64" -> %zu using %s",
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt size, rsize, object_compressed_to_string(compression));
356779df90a2ecab5da2cb310ad0f8ebc9ca9f46Lennart Poettering r = journal_file_link_data(f, o, p, hash);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt /* The linking might have altered the window, so let's
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt * refresh our pointer */
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt /* Create field object ... */
a34b57c0d43b8bf819ccd4f62c314b41b625454dPatrik Flykt r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
9021bb9f935c93b516b10c88db2a212a9e3a8140Tom Gundersen /* ... and link it in. */
9021bb9f935c93b516b10c88db2a212a9e3a8140Tom Gundersen o->data.next_field_offset = fo->field.head_data_offset;
c3e2adeaba8e043caed0ef139eeaea016bd152d0Patrik Flykt r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
d1b0afe3653b4316a6361d204169620726d468a0Patrik Flyktuint64_t journal_file_entry_n_items(Object *o) {
f12abb48fc510b8b349c05e35ba048134debaf25Patrik Flykt return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
0ae0e5cd96813bacad43a39920a043d8d20a67dbLennart Poetteringuint64_t journal_file_entry_array_n_items(Object *o) {
0ae0e5cd96813bacad43a39920a043d8d20a67dbLennart Poettering return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flyktuint64_t journal_file_hash_table_n_items(Object *o) {
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt if (o->object.type != OBJECT_DATA_HASH_TABLE &&
d7c9c21f18704580f66a1ce73fb6b506fdf40732Patrik Flykt return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
f12abb48fc510b8b349c05e35ba048134debaf25Patrik Flyktstatic int link_entry_into_array(JournalFile *f,
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt while (a > 0) {
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt if (i < n) {
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt a = le64toh(o->entry_array.next_entry_array_offset);
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
bbfa43ca37df0718287c25a8e39ee7477ebf33f6Patrik Flykt offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt o->entry_array.next_entry_array_offset = htole64(q);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flykt f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
139b011ab81ccea1d51f09e0261a1c390115c6ffPatrik Flyktstatic int link_entry_into_array_plus_one(JournalFile *f,
3733eec3e292e4ddb4cba5eb8d3bd8cbee7102d8Lennart Poettering r = link_entry_into_array(f, first, &i, p);
3733eec3e292e4ddb4cba5eb8d3bd8cbee7102d8Lennart Poetteringstatic int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
3733eec3e292e4ddb4cba5eb8d3bd8cbee7102d8Lennart Poettering p = le64toh(o->entry.items[i].object_offset);
3f0c075f8ef3344da5a6bda524540201f9204e61Patrik Flykt r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
f12abb48fc510b8b349c05e35ba048134debaf25Patrik Flyktstatic int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
da6fe470e17fa02f3adedc779585caf8669252bdPatrik Flykt /* Link up the entry itself */
/* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
f->tail_entry_monotonic_valid = true;
n = journal_file_entry_n_items(o);
static int journal_file_append_entry_internal(
JournalFile *f,
Object *o;
assert(f);
#ifdef HAVE_GCRYPT
if (ret)
*ret = o;
if (offset)
assert(f);
int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
assert(f);
if (!ts) {
if (f->tail_entry_monotonic_valid &&
return -EINVAL;
#ifdef HAVE_GCRYPT
for (i = 0; i < n_iovec; i++) {
uint64_t p;
Object *o;
r = -EIO;
typedef struct ChainCacheItem {
static void chain_cache_put(
OrderedHashmap *h,
if (!ci) {
if (!ci)
static int generic_array_get(
JournalFile *f,
uint64_t i,
Object *o;
uint64_t p = 0, a, t = 0;
assert(f);
a = first;
uint64_t k;
k = journal_file_entry_array_n_items(o);
goto found;
if (ret)
*ret = o;
if (offset)
*offset = p;
static int generic_array_get_plus_one(
JournalFile *f,
uint64_t i,
Object *o;
assert(f);
if (ret)
*ret = o;
if (offset)
static int generic_array_bisect(
JournalFile *f,
uint64_t n,
bool subtract_one = false;
assert(f);
a = first;
if (r == TEST_LEFT) {
if (right <= 0)
return -EBADMSG;
if (r == TEST_FOUND)
if (r == TEST_RIGHT) {
left = 0;
if (last_index > 0) {
return -EBADMSG;
if (r == TEST_FOUND)
if (r == TEST_RIGHT)
right = x;
return -EBADMSG;
if (r == TEST_FOUND)
if (r == TEST_RIGHT)
right = y;
subtract_one = true;
i = left;
goto found;
return -EBADMSG;
if (r == TEST_FOUND)
if (r == TEST_RIGHT)
right = i;
subtract_one = true;
goto found;
if (subtract_one && t == 0 && i == 0)
chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
if (subtract_one && i == 0)
p = last_p;
else if (subtract_one)
if (ret)
*ret = o;
if (offset)
*offset = p;
if (idx)
static int generic_array_bisect_plus_one(
JournalFile *f,
uint64_t n,
bool step_back = false;
Object *o;
assert(f);
if (r == TEST_FOUND)
if (r == TEST_LEFT)
if (r == TEST_RIGHT) {
goto found;
if (r == 0 && step_back)
goto found;
if (r > 0 && idx)
(*idx) ++;
if (ret)
*ret = o;
if (offset)
if (idx)
*idx = 0;
assert(f);
assert(p > 0);
if (p == needle)
return TEST_FOUND;
else if (p < needle)
return TEST_LEFT;
return TEST_RIGHT;
Object *o;
assert(f);
assert(p > 0);
return TEST_FOUND;
return TEST_LEFT;
return TEST_RIGHT;
JournalFile *f,
return generic_array_bisect(f,
Object *o;
assert(f);
assert(p > 0);
return TEST_FOUND;
return TEST_LEFT;
return TEST_RIGHT;
JournalFile *f,
return generic_array_bisect(f,
Object *o;
assert(f);
assert(p > 0);
return TEST_FOUND;
return TEST_LEFT;
return TEST_RIGHT;
static inline int find_data_object_by_boot_id(
JournalFile *f,
Object **o,
uint64_t *b) {
JournalFile *f,
Object *o;
assert(f);
return -ENOENT;
return generic_array_bisect_plus_one(f,
f->current_offset = 0;
f->current_seqnum = 0;
f->current_realtime = 0;
f->current_monotonic = 0;
f->current_xor_hash = 0;
void journal_file_save_location(JournalFile *f, direction_t direction, Object *o, uint64_t offset) {
JournalFile *f,
uint64_t p,
assert(f);
r = generic_array_bisect(f,
r = generic_array_get(f,
f->path, i);
return -EBADMSG;
if (offset)
JournalFile *f,
uint64_t n, i;
Object *d;
assert(f);
assert(p > 0 || !o);
return -EINVAL;
return generic_array_get_plus_one(f,
JournalFile *f,
uint64_t p,
Object *d;
assert(f);
return generic_array_bisect_plus_one(f,
JournalFile *f,
Object *o, *d;
uint64_t b, z;
assert(f);
return -ENOENT;
uint64_t p, q;
if (ret)
if (offset)
*offset = q;
JournalFile *f,
Object *d;
assert(f);
return generic_array_bisect_plus_one(f,
JournalFile *f,
Object *d;
assert(f);
return generic_array_bisect_plus_one(f,
Object *o;
uint64_t p;
assert(f);
goto fail;
case OBJECT_UNUSED:
case OBJECT_DATA:
case OBJECT_FIELD:
case OBJECT_ENTRY:
case OBJECT_FIELD_HASH_TABLE:
case OBJECT_DATA_HASH_TABLE:
case OBJECT_ENTRY_ARRAY:
case OBJECT_TAG:
fail:
assert(f);
f->path,
100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
int journal_file_open(
const char *fname,
int flags,
bool compress,
bool seal,
bool newly_created = false;
JournalFile *f;
return -EINVAL;
return -EINVAL;
return -ENOMEM;
#if defined(HAVE_LZ4)
#ifdef HAVE_GCRYPT
if (mmap_cache)
if (!f->mmap) {
r = -ENOMEM;
goto fail;
if (!f->path) {
r = -ENOMEM;
goto fail;
if (!f->chain_cache) {
r = -ENOMEM;
goto fail;
if (f->fd < 0) {
r = -errno;
goto fail;
r = journal_file_fstat(f);
goto fail;
#ifdef HAVE_GCRYPT
if (f->seal) {
r = journal_file_fss_load(f);
f->seal = false;
goto fail;
r = journal_file_fstat(f);
goto fail;
newly_created = true;
r = -EIO;
goto fail;
r = mmap_cache_get(f->mmap, f->fd, f->prot, CONTEXT_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h);
r = -errno;
goto fail;
f->header = h;
if (!newly_created) {
r = journal_file_verify_header(f);
goto fail;
#ifdef HAVE_GCRYPT
r = journal_file_fss_load(f);
goto fail;
if (f->writable) {
if (metrics) {
} else if (template)
r = journal_file_refresh_header(f);
goto fail;
#ifdef HAVE_GCRYPT
r = journal_file_hmac_setup(f);
goto fail;
if (newly_created) {
goto fail;
goto fail;
#ifdef HAVE_GCRYPT
r = journal_file_append_first_tag(f);
goto fail;
goto fail;
r = journal_file_map_data_hash_table(f);
goto fail;
r = -EIO;
goto fail;
*ret = f;
fail:
r = -EIO;
size_t l;
assert(f);
assert(*f);
old_file = *f;
return -EINVAL;
return -EINVAL;
return -ENOMEM;
return -errno;
r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
*f = new_file;
const char *fname,
int flags,
bool compress,
bool seal,
size_t l;
random_u64()) < 0)
return -ENOMEM;
return -errno;
(void) btrfs_defrag(p);
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
uint64_t i, n;
assert(o);
assert(p);
return -EPERM;
n = journal_file_entry_n_items(o);
uint64_t l, h;
size_t t;
void *data;
Object *u;
return -EBADMSG;
t = (size_t) l;
if ((uint64_t) t != l)
return -E2BIG;
l = rsize;
return -EPROTONOSUPPORT;
return -EIO;
assert(m);
if (fs_size > 0) {
if (fs_size > 0) {
assert(f);
if (from) {
return -ENOENT;
if (to) {
return -ENOENT;
int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
Object *o;
uint64_t p;
assert(f);
if (from) {
if (to) {
r = generic_array_get_plus_one(f,
&o, NULL);
assert(f);
if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
f->path,
100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
f->path,
100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
if (max_file_usec > 0) {
usec_t t, h;
if (h > 0 && t > h + max_file_usec)